diff --git a/arctic_training/checkpoint/hf_engine.py b/arctic_training/checkpoint/hf_engine.py index 3d5bfb64..3604bbc3 100644 --- a/arctic_training/checkpoint/hf_engine.py +++ b/arctic_training/checkpoint/hf_engine.py @@ -87,7 +87,7 @@ def _save_z3_checkpoint(self, model) -> None: if model.global_rank == 0: output_state_dict[k] = v_p if model.global_rank == 0: - model.save_pretrained( + model_to_save.save_pretrained( self.checkpoint_dir, state_dict=output_state_dict, safe_serialization=True, diff --git a/arctic_training/trainer/trainer.py b/arctic_training/trainer/trainer.py index 6b497b47..b8dba82e 100644 --- a/arctic_training/trainer/trainer.py +++ b/arctic_training/trainer/trainer.py @@ -222,6 +222,13 @@ def __init__(self, config: TrainerConfig, mode: str = "train") -> None: scheduler_factory = self.config.scheduler.factory(self) self.scheduler = scheduler_factory() + # Synchronize all processes before DeepSpeed initialization + # This helps ensure all processes are ready, especially in multi-node setups + if self.config.world_size > 1: + logger.info(f"Rank {self.global_rank} synchronizing before DeepSpeed initialization...") + torch.distributed.barrier() + logger.info(f"Rank {self.global_rank} proceeding with DeepSpeed initialization") + self.model, *_ = deepspeed.initialize( model=self.model, optimizer=self.optimizer, @@ -400,10 +407,17 @@ def epoch(self) -> None: # deal correctly with packed samples under FA2, by calculating each seqlen tflos separately sample_seqlens = batch.pop("packed_sample_seqlens") else: - sample_seqlens = [ - [len(batch["input_ids"][idx]) * self.config.sequence_parallel_size] - for idx in range(len(batch["input_ids"])) - ] + if "input_ids" not in batch: + # batch is a ContrastiveLearningBatch + sample_seqlens = [ + [len(batch.query_tokens[idx]) * self.config.sequence_parallel_size] + for idx in range(batch.query_tokens.shape[0]) + ] + else: + sample_seqlens = [ + [len(batch["input_ids"][idx]) * self.config.sequence_parallel_size] + for idx in range(len(batch["input_ids"])) + ] self.metrics.seqlens = sample_seqlens self.metrics.start_timer("step") diff --git a/projects/.DS_Store b/projects/.DS_Store new file mode 100644 index 00000000..5c1d911c Binary files /dev/null and b/projects/.DS_Store differ diff --git a/projects/arctic_embed/.DS_Store b/projects/arctic_embed/.DS_Store new file mode 100644 index 00000000..a2802bad Binary files /dev/null and b/projects/arctic_embed/.DS_Store differ diff --git a/projects/arctic_embed/examples/finetune_models/README.md b/projects/arctic_embed/examples/finetune_models/README.md index a936b82d..425ee068 100644 --- a/projects/arctic_embed/examples/finetune_models/README.md +++ b/projects/arctic_embed/examples/finetune_models/README.md @@ -17,7 +17,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone https://hf.co/datasets/Snowflake/arctic-embed-ft mv ./data.gitignore ./data/.gitignore # Ensure we have all the files you need for training downloaded from LFS. -cd arctic-embed-ft-v1/ +cd data/ git lfs pull --include="combined/pretokenized/example_dot95/,eval/" # Optional: Download more large files (e.g. everything but the very large precomputed embeddings). diff --git a/projects/arctic_embed/examples/finetune_models/data/.gitignore b/projects/arctic_embed/examples/finetune_models/data/.gitignore deleted file mode 100644 index 4bed5da9..00000000 --- a/projects/arctic_embed/examples/finetune_models/data/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.parquet diff --git a/projects/arctic_embed/examples/finetune_models/data_processing_scripts/rebatch_pretokenized.py b/projects/arctic_embed/examples/finetune_models/data_processing_scripts/rebatch_pretokenized.py new file mode 100644 index 00000000..2f49f7ef --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/data_processing_scripts/rebatch_pretokenized.py @@ -0,0 +1,334 @@ +"""Re-batch pretokenized Arctic Embed datasets to a uniform query batch size. + +This utility reads one or more existing pretokenized datasets (either on local +storage or S3), shuffles their batches, and rewrites them so that every output +batch contains the same number of queries. Documents and relevance labels are +preserved for each query. The datasets are assumed to share a tokenizer and the +same tokenization configuration (prefixes, max sequence lengths, etc.). + +Example usage: + +``` +python rebatch_pretokenized.py \ + --input-roots s3://bucket/ds_a s3://bucket/ds_b \ + --output-root s3://bucket/ds_mixed \ + --queries-per-batch 512 \ + --shuffle-seed 123 +``` + +NOTE: This script keeps the per-query set of positives/negatives exactly as +they appeared in the source batches. If different datasets use different counts +of hard negatives (e.g. 10 vs. 30), the output batches will contain the mixed +set of counts. +""" + +from __future__ import annotations + +import argparse +import random +from collections import defaultdict +from collections import deque +from concurrent.futures import ThreadPoolExecutor +from pathlib import PurePosixPath +from typing import Dict +from typing import Iterable +from typing import List +from typing import Sequence +from typing import Tuple + +import fsspec +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq +from fsspec.core import url_to_fs +from tqdm.auto import tqdm + + +QUERY_TOKEN_COLUMN = "QUERY_TOKEN_ID_LIST" +DOC_TOKEN_COLUMN = "DOCUMENT_TOKEN_ID_LIST" +QUERY_BATCH_ID_COLUMN = "BATCH_QUERY_ID" +DOC_BATCH_ID_COLUMN = "BATCH_DOCUMENT_ID" +RELATION_VALUE_COLUMN = "RELEVANCE" + + +class QueryExample: + """In-memory representation of a query and its associated documents.""" + + __slots__ = ("query_tokens", "doc_examples") + + def __init__(self, query_tokens: Sequence[int], doc_examples: List[Tuple[Sequence[int], int]]): + self.query_tokens = query_tokens + self.doc_examples = doc_examples + + +def parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Rebatch pretokenized datasets to a uniform batch size.") + parser.add_argument( + "--input-roots", + nargs="+", + required=True, + help="List of dataset roots (local paths or s3:// URIs) each containing data/batch_*/", + ) + parser.add_argument( + "--output-root", + required=True, + help="Destination root (local path or s3:// URI) for the rebatched dataset.", + ) + parser.add_argument( + "--queries-per-batch", + type=int, + required=True, + help="Number of queries per output batch.", + ) + parser.add_argument( + "--shuffle-seed", + type=int, + default=0, + help="Seed used to shuffle source batches before reprocessing (default: 0).", + ) + parser.add_argument( + "--progress", + action="store_true", + help="Display a progress bar while converting batches.", + ) + parser.add_argument( + "--prefetch-workers", + type=int, + default=4, + help="Number of concurrent workers used to prefetch batches (default: 4).", + ) + return parser.parse_args() + + +def list_batch_directories(fs: fsspec.AbstractFileSystem, root_path: str) -> List[str]: + entries = fs.ls(root_path, detail=True) + batch_dirs = [entry["name"] for entry in entries if entry.get("type") == "directory"] + batch_dirs.sort() + if len(batch_dirs) == 0: + raise ValueError(f"No batch directories found under {root_path}") + return batch_dirs + + +def read_batch_tables( + fs: fsspec.AbstractFileSystem, + batch_dir: str, +) -> Tuple[pa.Table, pa.Table, pa.Table]: + queries = pq.read_table(str(PurePosixPath(batch_dir) / "queries.parquet"), filesystem=fs) + documents = pq.read_table(str(PurePosixPath(batch_dir) / "documents.parquet"), filesystem=fs) + relations = pq.read_table(str(PurePosixPath(batch_dir) / "relations.parquet"), filesystem=fs) + return queries, documents, relations + + +def extract_query_examples( + queries_table: pa.Table, + documents_table: pa.Table, + relations_table: pa.Table, +) -> Iterable[QueryExample]: + query_ids = queries_table.column(QUERY_BATCH_ID_COLUMN).to_numpy(zero_copy_only=False) + query_token_lists = queries_table.column(QUERY_TOKEN_COLUMN).to_pylist() + + document_ids = documents_table.column(DOC_BATCH_ID_COLUMN).to_numpy(zero_copy_only=False) + document_token_lists = documents_table.column(DOC_TOKEN_COLUMN).to_pylist() + doc_id_to_tokens = {int(doc_id): doc_tokens for doc_id, doc_tokens in zip(document_ids, document_token_lists)} + + relations_by_query: Dict[int, List[Tuple[int, int]]] = defaultdict(list) + rel_q_ids = relations_table.column(QUERY_BATCH_ID_COLUMN).to_numpy(zero_copy_only=False) + rel_d_ids = relations_table.column(DOC_BATCH_ID_COLUMN).to_numpy(zero_copy_only=False) + rel_values = relations_table.column(RELATION_VALUE_COLUMN).to_numpy(zero_copy_only=False) + for qid, did, rel in zip(rel_q_ids, rel_d_ids, rel_values): + rel_int = int(rel) + if rel_int == 0: + rel_int = -1 + relations_by_query[int(qid)].append((int(did), rel_int)) + + for qid, q_tokens in zip(query_ids, query_token_lists): + doc_examples = [] + for did, rel in relations_by_query[int(qid)]: + doc_tokens = doc_id_to_tokens.get(did) + if doc_tokens is None: + raise KeyError(f"Missing document tokens for document id {did}") + doc_examples.append((doc_tokens, rel)) + if not doc_examples: + raise ValueError("Encountered query with zero associated documents; this should not happen.") + yield QueryExample(q_tokens, doc_examples) + + +def build_large_list_array(token_lists: Sequence[Sequence[int]], value_type: pa.DataType) -> pa.LargeListArray: + offsets = [0] + flat_values: List[int] = [] + for tokens in token_lists: + offsets.append(offsets[-1] + len(tokens)) + flat_values.extend(tokens) + offsets_array = pa.array(offsets, type=pa.int64()) + values_array = pa.array(flat_values, type=value_type) + return pa.LargeListArray.from_arrays(offsets_array, values_array) + + +def write_batch( + fs: fsspec.AbstractFileSystem, + output_root: str, + batch_index: int, + examples: Sequence[QueryExample], + query_id_type: pa.DataType, + doc_id_type: pa.DataType, + relation_value_type: pa.DataType, + query_token_value_type: pa.DataType, + doc_token_value_type: pa.DataType, +) -> None: + batch_dir = PurePosixPath(output_root) / f"batch_{batch_index:08d}" + fs.makedirs(str(batch_dir), exist_ok=True) + + query_token_lists: List[Sequence[int]] = [] + doc_token_lists: List[Sequence[int]] = [] + relations_q: List[int] = [] + relations_d: List[int] = [] + relations_v: List[int] = [] + + doc_tokens_to_index: Dict[Tuple[int, ...], int] = {} + + for q_idx, example in enumerate(examples): + query_token_lists.append(example.query_tokens) + for doc_tokens, rel in example.doc_examples: + doc_key = tuple(doc_tokens) + doc_idx = doc_tokens_to_index.get(doc_key) + if doc_idx is None: + doc_idx = len(doc_token_lists) + doc_tokens_to_index[doc_key] = doc_idx + doc_token_lists.append(doc_tokens) + relations_q.append(q_idx) + relations_d.append(doc_idx) + relations_v.append(rel) + + query_ids_array = pa.array(np.arange(len(query_token_lists)), type=query_id_type) + doc_ids_array = pa.array(np.arange(len(doc_token_lists)), type=doc_id_type) + + queries_table = pa.table( + { + QUERY_BATCH_ID_COLUMN: query_ids_array, + QUERY_TOKEN_COLUMN: build_large_list_array(query_token_lists, query_token_value_type), + } + ) + + documents_table = pa.table( + { + DOC_BATCH_ID_COLUMN: doc_ids_array, + DOC_TOKEN_COLUMN: build_large_list_array(doc_token_lists, doc_token_value_type), + } + ) + + relations_table = pa.table( + { + QUERY_BATCH_ID_COLUMN: pa.array(relations_q, type=query_id_type), + DOC_BATCH_ID_COLUMN: pa.array(relations_d, type=doc_id_type), + RELATION_VALUE_COLUMN: pa.array(relations_v, type=relation_value_type), + } + ) + + pq.write_table(queries_table, str(batch_dir / "queries.parquet"), filesystem=fs) + pq.write_table(documents_table, str(batch_dir / "documents.parquet"), filesystem=fs) + pq.write_table(relations_table, str(batch_dir / "relations.parquet"), filesystem=fs) + + +def rebatch_datasets(args: argparse.Namespace) -> None: + dataset_specs = [] + for root in args.input_roots: + fs, path = url_to_fs(root) + batch_dirs = list_batch_directories(fs, path) + dataset_specs.append((fs, batch_dirs)) + + # Determine types from the first batch of the first dataset. + sample_fs, sample_batch_dirs = dataset_specs[0] + sample_queries, sample_docs, sample_relations = read_batch_tables(sample_fs, sample_batch_dirs[0]) + query_id_type = sample_queries.schema.field(QUERY_BATCH_ID_COLUMN).type + doc_id_type = sample_docs.schema.field(DOC_BATCH_ID_COLUMN).type + relation_value_type = sample_relations.schema.field(RELATION_VALUE_COLUMN).type + query_token_value_type = sample_queries.schema.field(QUERY_TOKEN_COLUMN).type.value_type + doc_token_value_type = sample_docs.schema.field(DOC_TOKEN_COLUMN).type.value_type + + # Prepare output filesystem and write metadata. + out_fs, out_path = url_to_fs(args.output_root) + out_fs.makedirs(out_path, exist_ok=True) + + # Enumerate all batch directories across datasets and optionally shuffle them. + all_batches: List[Tuple[fsspec.AbstractFileSystem, str]] = [] + for fs, batch_dirs in dataset_specs: + all_batches.extend((fs, batch_dir) for batch_dir in batch_dirs) + + rng = random.Random(args.shuffle_seed) + rng.shuffle(all_batches) + + pending_examples: List[QueryExample] = [] + batch_index = 0 + total_batches = len(all_batches) + if total_batches == 0: + raise ValueError("No batch directories found across provided inputs.") + + progress = tqdm(total=total_batches, desc="Rebatching", disable=not args.progress) + + max_workers = max(1, args.prefetch_workers) + batch_iter = iter(all_batches) + pending_futures = deque() + + def submit_next(executor: ThreadPoolExecutor) -> bool: + try: + fs, batch_dir = next(batch_iter) + except StopIteration: + return False + future = executor.submit(read_batch_tables, fs, batch_dir) + pending_futures.append((future, fs, batch_dir)) + return True + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + for _ in range(min(max_workers, total_batches)): + submit_next(executor) + + while pending_futures: + future, fs, batch_dir = pending_futures.popleft() + queries_table, documents_table, relations_table = future.result() + submit_next(executor) + + for example in extract_query_examples(queries_table, documents_table, relations_table): + pending_examples.append(example) + if len(pending_examples) == args.queries_per_batch: + write_batch( + out_fs, + out_path, + batch_index, + pending_examples, + query_id_type, + doc_id_type, + relation_value_type, + query_token_value_type, + doc_token_value_type, + ) + pending_examples = [] + batch_index += 1 + + progress.update(1) + + if pending_examples: + write_batch( + out_fs, + out_path, + batch_index, + pending_examples, + query_id_type, + doc_id_type, + relation_value_type, + query_token_value_type, + doc_token_value_type, + ) + batch_index += 1 + + progress.close() + print(f"Wrote {batch_index} batches to {args.output_root}") + + +def main() -> None: + args = parse_arguments() + rebatch_datasets(args) + + +if __name__ == "__main__": + main() + diff --git a/projects/arctic_embed/examples/finetune_models/finetune_e5_base_unsupervised.py b/projects/arctic_embed/examples/finetune_models/finetune_arctic2.py similarity index 73% rename from projects/arctic_embed/examples/finetune_models/finetune_e5_base_unsupervised.py rename to projects/arctic_embed/examples/finetune_models/finetune_arctic2.py index ff246a43..cb21bbc4 100644 --- a/projects/arctic_embed/examples/finetune_models/finetune_e5_base_unsupervised.py +++ b/projects/arctic_embed/examples/finetune_models/finetune_arctic2.py @@ -35,18 +35,22 @@ from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA from arctic_embed.trainer import BiencoderTrainer from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig from arctic_training.config.checkpoint import CheckpointConfig from arctic_training.config.logger import LoggerConfig from arctic_training.config.optimizer import OptimizerConfig from arctic_training.config.wandb import WandBConfig from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType LEARNING_RATE = 3e-5 GRADIENT_CLIPPING = 10.0 -DATASET_NAME = "example_dot95" -DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") -EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/unified_IR/batched_tokenized_Alibaba_NLP_gte_multilingual_base/batched_512/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] def now_timestamp_str() -> str: @@ -55,30 +59,40 @@ def now_timestamp_str() -> str: ts = now_timestamp_str() -checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts -mconf = BiencoderModelConfig(name_or_path="intfloat/e5-base-unsupervised", pooling="first_token") +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/arctic2m/unified-IR-{ts}" +local_cache_dir = "/scratch/checkpoints-temp" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/local", + pooling="first_token", + dtype=DType.FP32, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) dconf = ContrastivePretokenizedDataConfig( - # filesystem="s3", - # root_directory="my-bucket/path/to/combined/pretokenized/example_dot95/data", - filesystem="local", + filesystem="s3", root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, # Depending on how much GPU memory you have, you may need to split each # batch into a number of smaller sub-batches by setting the split_factor. # If you do so, you will probably want to decrease the learning rate accordingly. - # split_factor=4, - max_seq_length_query=512, - max_seq_length_doc=512, - eval_root_directories=EVAL_DATA_PATHS, - eval_max_seq_length_doc=512, - eval_max_seq_length_query=512, + split_factor=2, + max_seq_length_query=1024, + max_seq_length_doc=1024, + preserve_relations_on_split=True, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, ) -sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000, learning_rate=LEARNING_RATE) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) lconf = LoggerConfig(level="INFO") wconf = WandBConfig( enable=True, - project="arctic-training-arctic-embed-testbed", - name=f"e5-base-unsupervised-finetune-{ts}", + project="arctic-embed", + name=f"arctic2m-unified-IR-{ts}", ) # Reference: https://www.deepspeed.ai/training/#gradient-clipping dsconf = { @@ -92,11 +106,12 @@ def now_timestamp_str() -> str: # this risk. "communication_data_type": "fp32", } -cconf = CheckpointConfig( - output_dir=checkpoint_dir, - type="biencoder", +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, save_every_n_steps=300, save_end_of_training=True, + max_local_checkpoints = 2, ) @@ -135,10 +150,10 @@ def configure_non_distributed_distributed_training_if_needed() -> None: wandb=wconf, deepspeed=dsconf, loss_log_interval=0, - eval_frequency=100, + # eval_interval=100, use_in_batch_negatives=False, loss_temperature=0.02, overfit_first_batch=False, ) trainer = BiencoderTrainer(config=tconf) - trainer.train() + trainer.train() \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/finetune_arctic2_h2h.py b/projects/arctic_embed/examples/finetune_models/finetune_arctic2_h2h.py new file mode 100644 index 00000000..96be4dfa --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/finetune_arctic2_h2h.py @@ -0,0 +1,159 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_h2h/batched_512/data/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/arctic2m/synthetic-experiment-1/unified-h2h-{ts}" +local_cache_dir = "/scratch/checkpoints-temp-h2h" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/local", + pooling="first_token", + dtype=DType.FP32, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + split_factor=4, + max_seq_length_query=1024, + max_seq_length_doc=1024, + preserve_relations_on_split=True, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, +) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-embed", + name=f"arctic2m-synthetic-experiment-1-unified-h2h-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, + save_every_n_steps=3000, + save_end_of_training=True, + max_local_checkpoints = 2, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + # eval_interval=100, + use_in_batch_negatives=False, + loss_temperature=0.02, + overfit_first_batch=False, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/finetune_arctic2_synthetic.py b/projects/arctic_embed/examples/finetune_models/finetune_arctic2_synthetic.py new file mode 100644 index 00000000..3745199e --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/finetune_arctic2_synthetic.py @@ -0,0 +1,159 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_synthetic/batched_512/data/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/arctic2m/synthetic-experiment-1/unified-synthetic-{ts}" +local_cache_dir = "/scratch/checkpoints-temp-synthetic" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/local", + pooling="first_token", + dtype=DType.FP32, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + split_factor=4, + max_seq_length_query=1024, + max_seq_length_doc=1024, + preserve_relations_on_split=True, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, +) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-embed", + name=f"arctic2m-synthetic-experiment-1-unified-synthetic-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, + save_every_n_steps=3000, + save_end_of_training=True, + max_local_checkpoints = 2, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + # eval_interval=100, + use_in_batch_negatives=False, + loss_temperature=0.02, + overfit_first_batch=False, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_h2h.py b/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_h2h.py new file mode 100644 index 00000000..edc49435 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_h2h.py @@ -0,0 +1,159 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_h2h/Arctic2L_batched_512/data/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/arctic2l/synthetic-experiment-1/unified-h2h-{ts}" +local_cache_dir = "/scratch/checkpoints-temp-arctic2l-h2h" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/arctic2l-pretrained", + pooling="first_token", + dtype=DType.BF16, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + split_factor=16, + max_seq_length_query=1024, + max_seq_length_doc=1024, + preserve_relations_on_split=True, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, +) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-training-arctic-embed-testbed", + name=f"arctic2l-synthetic-experiment-1-unified-h2h-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, + save_every_n_steps=30000, + save_end_of_training=True, + max_local_checkpoints = 2, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + # eval_interval=100, + use_in_batch_negatives=False, + loss_temperature=0.02, + overfit_first_batch=False, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_synthetic.py b/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_synthetic.py new file mode 100644 index 00000000..14c48adb --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/finetune_arctic2l_synthetic.py @@ -0,0 +1,159 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_synthetic/Arctic2L_batched_512/data/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/arctic2l/synthetic-experiment-1/unified-synthetic-{ts}" +local_cache_dir = "/scratch/checkpoints-temp-arctic2l-synthetic" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/arctic2l-pretrained", + pooling="first_token", + dtype=DType.BF16, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + split_factor=16, + max_seq_length_query=1024, + max_seq_length_doc=1024, + preserve_relations_on_split=True, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, +) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-training-arctic-embed-testbed", + name=f"arctic2l-synthetic-experiment-1-unified-synthetic-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, + save_every_n_steps=30000, + save_end_of_training=True, + max_local_checkpoints = 2, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + # eval_interval=100, + use_in_batch_negatives=False, + loss_temperature=0.02, + overfit_first_batch=False, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/finetune_qwen.py b/projects/arctic_embed/examples/finetune_models/finetune_qwen.py new file mode 100644 index 00000000..8502aa9c --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/finetune_qwen.py @@ -0,0 +1,165 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig +from arctic_training.config.enums import DType + +LEARNING_RATE = 5e-6 +GRADIENT_CLIPPING = 10.0 +# DATASET_NAME = "example_dot95" +# DATA_PATH = str(Path(__file__).parent / "data" / "combined" / "pretokenized" / DATASET_NAME / "data") +# DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/data" +# DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/unified_ft/batched_tokenized_Qwen_Qwen3_0.6B/batched_512/" +# DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/unified_IR/batched_tokenized_Alibaba_NLP_gte_multilingual_base/batched_512/" +# DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/unified_ft/batched_tokenized_Qwen_Qwen3_0.6B/batched_16/" +DATA_PATH = "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/unified_IR/batched_tokenized_Qwen_Qwen3_0.6B/batched_512/" +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] +# EVAL_DATA_PATH = ["s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/InstructIR/eval/"] + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +# checkpoint_dir = Path(__file__).parent / "checkpoints" / "finetune_e5_base_unsupervised" / ts +checkpoint_dir = f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/training/checkpoints/qwenpoint6b/IR-{ts}" +local_cache_dir = "/scratch/checkpoint-saving" + +mconf = BiencoderModelConfig( + name_or_path="/scratch/qwen/hf_encoder", + pooling="last_token", + dtype=DType.FP32, + disable_activation_checkpoint=True, + kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + split_factor=32, + max_seq_length_query=1024, + max_seq_length_doc=1024, + # eval_root_directories=EVAL_DATA_PATH, + # eval_max_seq_length_doc=1024, + # eval_max_seq_length_query=1024, + pad_value=151643, + left_pad=True, + preserve_relations_on_split=True, +) +sconf = WSDSchedulerConfig(num_warmup_steps=500, num_decay_steps=1_000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-embed", + name=f"qwen_point6b_IR_{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = BiencoderS3CheckpointConfig( + output_dir = local_cache_dir, + s3_path=checkpoint_dir, + save_every_n_steps=30000, + save_end_of_training=True, + max_local_checkpoints = 2, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + # eval_interval=100, + use_in_batch_negatives=False, + loss_temperature=0.02, + overfit_first_batch=False, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/pretrain.py b/projects/arctic_embed/examples/finetune_models/pretrain.py new file mode 100644 index 00000000..b0e43e5a --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/pretrain.py @@ -0,0 +1,137 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.biencoder_s3_checkpoint import BiencoderS3CheckpointConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig + +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig + + +def now_timestamp_str() -> str: + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +def build_trainer_config_from_json(cfg: dict) -> BiencoderTrainerConfig: + # Model + mconf = BiencoderModelConfig( + name_or_path=cfg["BASE_MODEL"], + pooling=cfg.get("POOLING_METHOD", "last_token"), + disable_activation_checkpoint=not cfg.get("ACTIVATION_CHECKPOINTING", False), + ) + + # Data + dconf = ContrastivePretokenizedDataConfig( + filesystem=cfg.get("FILE_SYSTEM", "s3"), + root_directory=cfg["TRAINING_DATA_PATH"], + eval_root_directories=cfg.get("EVALUATION_DATA_PATHS", []), + max_seq_length_query=cfg.get("MAX_SEQ_LENGTH_QUERY", 32), + max_seq_length_doc=cfg.get("MAX_SEQ_LENGTH_DOC", 256), + eval_max_seq_length_query=cfg.get("MAX_SEQ_LENGTH_QUERY", 32), + eval_max_seq_length_doc=cfg.get("MAX_SEQ_LENGTH_DOC", 256), + pad_value=cfg["PAD_VALUE"], + left_pad=cfg.get("LEFT_PADDING", True), + ) + + # Sched/optim/logging + sconf = WSDSchedulerConfig( + num_warmup_steps=cfg.get("WARMUP_STEPS", 2000), + num_decay_steps=cfg.get("DECAY_STEPS", 2000), + ) + oconf = OptimizerConfig( + weight_decay=cfg.get("WEIGHT_DECAY", 0.01), + learning_rate=cfg["LEARNING_RATE"], + ) + lconf = LoggerConfig(level=cfg.get("LOG_LEVEL", "INFO")) + + # W&B + wconf = WandBConfig( + enable=cfg.get("ENABLE_WANDB", True), + project=cfg.get("WANDB_PROJECT", "arctic-embed"), + name=cfg.get("WANDB_RUN_NAME", f"arctic-embed-pretraining-qwen3-0point6-{now_timestamp_str()}"), + ) + + # DeepSpeed + dsconf = { + "gradient_clipping": cfg.get("GRADIENT_CLIPPING", 10.0), + "zero_optimization": {"stage": int(cfg.get("ZERO_STAGE", 1))}, + "communication_data_type": cfg.get("COMMUNICATION_DATA_TYPE", "fp32"), + } + + # S3 Checkpoint configuration (always required) + # Local cache directory + local_cache_dir = Path("/tmp") / "arctic_embed_checkpoints_cache" + local_cache_dir.mkdir(parents=True, exist_ok=True) + + cconf = BiencoderS3CheckpointConfig( + output_dir=local_cache_dir, # Used as staging directory + s3_path=cfg.get("S3_CHECKPOINT_PATH"), + local_cache_dir=str(local_cache_dir), + max_local_checkpoints=cfg.get("MAX_LOCAL_CHECKPOINTS", 3), + save_every_n_steps=cfg.get("SAVE_STEPS", 300), + save_end_of_training=cfg.get("SAVE_END_OF_TRAINING", True), + auto_resume=cfg.get("AUTO_RESUME", True), + ) + + # Trainer config + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=cfg.get("LOSS_LOG_INTERVAL", 0), + eval_frequency=cfg.get("EVAL_STEPS", 300), + use_in_batch_negatives=cfg.get("IN_BATCH_NEGATIVES", True), + loss_temperature=cfg.get("LOSS_TEMPERATURE", 0.02), + overfit_first_batch=cfg.get("OVERFIT_FIRST_BATCH", False), + mrl_dim=cfg.get("MRL_DIM", 256), + ) + return tconf + + +def main() -> None: + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + + parser = argparse.ArgumentParser(description="Arctic-Embed biencoder pretraining via JSON config") + parser.add_argument("config_json", type=str, help="Path to JSON config file") + args = parser.parse_args() + + with open(args.config_json, "r") as f: + cfg = json.load(f) + + tconf = build_trainer_config_from_json(cfg) + trainer = BiencoderTrainer(config=tconf) + trainer.train() + + +if __name__ == "__main__": + main() diff --git a/projects/arctic_embed/examples/finetune_models/pretrain_bge_retromae.py b/projects/arctic_embed/examples/finetune_models/pretrain_bge_retromae.py new file mode 100644 index 00000000..3fb9418e --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/pretrain_bge_retromae.py @@ -0,0 +1,195 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATA_PATH = str(Path(__file__).parent / "data" / "pretrain_amazonqa" / "batched_16384") +DATA_PATH = ( + "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/combined_all_16384" +) +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] # fix this +datasets = [ + "amazon_qa", + "ccnews_de_v1", + "ccnews_en_v1", + "ccnews_es_v1", + "ccnews_fr_v1", + "ccnews_it_v1", + "ccnews_pl_v1", + "ccnews_pt_v1", + "faq", + "mc4_de_v1", + "mc4_en_v1", + "mc4_es_v1", + "mc4_fr_v1", + "mc4_it_v1", + "mc4_pl_v1", + "mc4_pt_v1", + "mwiki_de_v1", + "mwiki_en_v1", + "mwiki_es_v1", + "mwiki_fr_v1", + "mwiki_it_v1", + "mwiki_pl_v1", + "mwiki_pt_v1", + "paq", + "pes2o", + "red_pajama", + "red_pajamas_1t_stackexchange", + "s2orc_title_abstracts", + "snippets4", + "techrepo", + "top_stories", + "trivia_qa", + "wikipedia", +] +EVAL_DATA_PATHS = [ + f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/combined_all_16384_eval/{dataset}" + for dataset in datasets +] +# from transformers import AutoTokenizer +# tok = AutoTokenizer.from_pretrained("BAAI/bge-m3-retromae") +# tok.pad_token_id --> 1 +PAD_VALUE = 1 +LEFT_PAD = False + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +checkpoint_dir = Path(__file__).parent / "checkpoints" / "pretrain_bge_retromae" / ts +mconf = BiencoderModelConfig( + name_or_path="BAAI/bge-m3-retromae", pooling="first_token", kwargs={"trust_remote_code": True} +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + # split_factor=4, + max_seq_length_query=32, + max_seq_length_doc=256, + eval_root_directories=EVAL_DATA_PATHS, + eval_max_seq_length_doc=32, + eval_max_seq_length_query=256, + pad_value=PAD_VALUE, + left_pad=LEFT_PAD, +) +sconf = WSDSchedulerConfig(num_warmup_steps=2000, num_decay_steps=2000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-training-arctic-embed-testbed", + name=f"bge-m3-retromae-pretrain-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = CheckpointConfig( + output_dir=checkpoint_dir, + type="biencoder", + save_every_n_steps=300, + save_end_of_training=True, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + eval_frequency=10, + use_in_batch_negatives=True, + loss_temperature=0.02, + overfit_first_batch=False, + mrl_dim=256, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/pretrain_mgte.py b/projects/arctic_embed/examples/finetune_models/pretrain_mgte.py new file mode 100644 index 00000000..221d4758 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/pretrain_mgte.py @@ -0,0 +1,201 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig + +LEARNING_RATE = 1e-4 +GRADIENT_CLIPPING = 10.0 +# DATA_PATH = str(Path(__file__).parent / "data" / "pretrain_amazonqa" / "batched_16384") +DATA_PATH = ( + "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/Alibaba_NLP_gte_multilingual_base/combined_all_32768" +) +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] # fix this +datasets = [ + "amazon_qa", + "ccnews_de_v1", + "ccnews_en_v1", + "ccnews_es_v1", + "ccnews_fr_v1", + "ccnews_it_v1", + "ccnews_pl_v1", + "ccnews_pt_v1", + "faq", + "mc4_de_v1", + "mc4_en_v1", + "mc4_es_v1", + "mc4_fr_v1", + "mc4_it_v1", + "mc4_pl_v1", + "mc4_pt_v1", + "mwiki_de_v1", + "mwiki_en_v1", + "mwiki_es_v1", + "mwiki_fr_v1", + "mwiki_it_v1", + "mwiki_pl_v1", + "mwiki_pt_v1", + "paq", + "pes2o", + "red_pajama", + "red_pajamas_1t_stackexchange", + "s2orc_title_abstracts", + "snippets4", + "techrepo", + "top_stories", + "trivia_qa", + "wikipedia", +] +EVAL_DATA_PATHS = [ + f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/Alibaba_NLP_gte_multilingual_base/combined_all_32768_eval/{dataset}" + for dataset in datasets +] +# from transformers import AutoTokenizer +# tok = AutoTokenizer.from_pretrained("BAAI/bge-m3-retromae") +# tok.pad_token_id --> 1 +PAD_VALUE = 1 +LEFT_PAD = False + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +checkpoint_dir = Path(__file__).parent / "checkpoints" / "pretrain_mgte" / ts +mconf = BiencoderModelConfig( + name_or_path="Alibaba-NLP/gte-multilingual-mlm-base", + pooling="first_token", + kwargs={ + "trust_remote_code": True, + "unpad_inputs": True, + "use_memory_efficient_attention": True, + }, +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + # split_factor=4, + max_seq_length_query=32, + max_seq_length_doc=256, + eval_root_directories=EVAL_DATA_PATHS, + eval_max_seq_length_doc=32, + eval_max_seq_length_query=256, + pad_value=PAD_VALUE, + left_pad=LEFT_PAD, +) +sconf = WSDSchedulerConfig(num_warmup_steps=2000, num_decay_steps=2000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-training-arctic-embed-testbed", + name=f"mgte-pretrain-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = CheckpointConfig( + output_dir=checkpoint_dir, + type="biencoder", + save_every_n_steps=300, + save_end_of_training=True, +) + + +def configure_non_distributed_distributed_training_if_needed() -> None: + """Detect if we need to manually initialize distributed training environment + and do so if needed. + + NOTE: We have to do this step because Arctic Training doesn't have a default + 1-GPU launching mode and will instead fall back to trying to auto-discover + distributed training configuration (e.g. via MPI). + """ + num_cli_args = len(sys.argv) - 1 + if num_cli_args == 0: + print("***No CLI args detected, configuring for single-GPU training.***") + from os import environ + + from torch import distributed as dist + + environ["MASTER_ADDR"] = "localhost" + environ["MASTER_PORT"] = "12335" + environ["LOCAL_RANK"] = "0" + dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + eval_frequency=300, + use_in_batch_negatives=True, + loss_temperature=0.02, + overfit_first_batch=False, + mrl_dim=256, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/pretrain_qwen3_0point6.py b/projects/arctic_embed/examples/finetune_models/pretrain_qwen3_0point6.py new file mode 100644 index 00000000..38ae5ffb --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/pretrain_qwen3_0point6.py @@ -0,0 +1,197 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to use the Arctic Embed codebase to finetune +the venerable E5-base-v2 model (released in May 2023) on a version of MSMARCO +training data which has been hard-negative-mined using a more modern technique. + +The code needed to recreate the training data can be found in the sibling directory +`data_prep` within the `hard_negative_mining` subdirectory. + +Original model paper: https://arxiv.org/abs/2212.03533 +Model page: https://huggingface.co/intfloat/e5-base-v2 +Better negative mining paper: https://arxiv.org/abs/2407.15831 +""" +import sys +from datetime import datetime +from datetime import timezone +from pathlib import Path + +from arctic_embed.biencoder_model_factory import BiencoderModelConfig +from arctic_embed.contrastive_dataloader import ContrastivePretokenizedDataConfig +from arctic_embed.core.cuda_allocator_config import CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA +from arctic_embed.trainer import BiencoderTrainer +from arctic_embed.trainer import BiencoderTrainerConfig + +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.config.logger import LoggerConfig +from arctic_training.config.optimizer import OptimizerConfig +from arctic_training.config.wandb import WandBConfig +from arctic_training.scheduler.wsd_factory import WSDSchedulerConfig + +LEARNING_RATE = 3e-5 +GRADIENT_CLIPPING = 10.0 +# DATA_PATH = str(Path(__file__).parent / "data" / "pretrain_amazonqa" / "batched_16384") +DATA_PATH = ( + "s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/Qwen_Qwen3_Embedding_0.6B/combined_all_16384" +) +# EVAL_DATA_PATHS = [str(path) for path in (Path(__file__).parent / "data" / "eval").iterdir() if path.is_dir()] # fix this +datasets = [ + "amazon_qa", + "ccnews_de_v1", + "ccnews_en_v1", + "ccnews_es_v1", + "ccnews_fr_v1", + "ccnews_it_v1", + "ccnews_pl_v1", + "ccnews_pt_v1", + "faq", + "mc4_de_v1", + "mc4_en_v1", + "mc4_es_v1", + "mc4_fr_v1", + "mc4_it_v1", + "mc4_pl_v1", + "mc4_pt_v1", + "mwiki_de_v1", + "mwiki_en_v1", + "mwiki_es_v1", + "mwiki_fr_v1", + "mwiki_it_v1", + "mwiki_pl_v1", + "mwiki_pt_v1", + "paq", + "pes2o", + "red_pajama", + "red_pajamas_1t_stackexchange", + "s2orc_title_abstracts", + "snippets4", + "techrepo", + "top_stories", + "trivia_qa", + "wikipedia", +] +EVAL_DATA_PATHS = [ + f"s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/pretrain_data_arctic_training_format/Qwen_Qwen3_Embedding_0.6B/combined_all_16384_eval/{dataset}" + for dataset in datasets +] +# from transformers import AutoTokenizer +# tok = AutoTokenizer.from_pretrained("BAAI/bge-m3-retromae") +# tok.pad_token_id --> 1 +PAD_VALUE = 151643 +LEFT_PAD = True + + +def now_timestamp_str() -> str: + """Get the current ISO 8601 UTC timestamp.""" + return datetime.now(timezone.utc).strftime(r"%Y%m%dT%H%M%SZ") + + +ts = now_timestamp_str() +checkpoint_dir = Path(__file__).parent / "checkpoints" / "pretrain_qwen3_0point6" / ts +mconf = BiencoderModelConfig( + name_or_path="Qwen/Qwen3-0.6B", + pooling="last_token", + disable_activation_checkpoint=False, +) +dconf = ContrastivePretokenizedDataConfig( + filesystem="s3", + root_directory=DATA_PATH, + # filesystem="local", + # root_directory=DATA_PATH, + # Depending on how much GPU memory you have, you may need to split each + # batch into a number of smaller sub-batches by setting the split_factor. + # If you do so, you will probably want to decrease the learning rate accordingly. + # split_factor=4, + max_seq_length_query=32, + max_seq_length_doc=256, + eval_root_directories=EVAL_DATA_PATHS, + eval_max_seq_length_doc=32, + eval_max_seq_length_query=256, + pad_value=PAD_VALUE, + left_pad=LEFT_PAD, +) +sconf = WSDSchedulerConfig(num_warmup_steps=2000, num_decay_steps=2000) +oconf = OptimizerConfig(weight_decay=0.01, learning_rate=LEARNING_RATE) +lconf = LoggerConfig(level="INFO") +wconf = WandBConfig( + enable=True, + project="arctic-training-arctic-embed-testbed", + name=f"qwen3_0point6-pretrain-{ts}", +) +# Reference: https://www.deepspeed.ai/training/#gradient-clipping +dsconf = { + "gradient_clipping": GRADIENT_CLIPPING, + "zero_optimization": {"stage": 1}, + # NOTE: The underlying DeepSpeed engine scales gradients down by a factor of + # `1/world_size`` in the backwards pass, so we pre-scale the loss up by a factor + # of `world_size`. Given these scalings, there is a potential for increased + # numerical imprecision when using low-precision floating point representation, + # so we set communication to fp32 in the backwards all-reduce to somewhat mitigate + # this risk. + "communication_data_type": "fp32", +} +cconf = CheckpointConfig( + output_dir=checkpoint_dir, + type="biencoder", + save_every_n_steps=300, + save_end_of_training=True, +) + + +# def configure_non_distributed_distributed_training_if_needed() -> None: +# """Detect if we need to manually initialize distributed training environment +# and do so if needed. + +# NOTE: We have to do this step because Arctic Training doesn't have a default +# 1-GPU launching mode and will instead fall back to trying to auto-discover +# distributed training configuration (e.g. via MPI). +# """ +# num_cli_args = len(sys.argv) - 1 +# if num_cli_args == 0: +# print("***No CLI args detected, configuring for single-GPU training.***") +# from os import environ + +# from torch import distributed as dist + +# environ["MASTER_ADDR"] = "localhost" +# environ["MASTER_PORT"] = "12335" +# environ["LOCAL_RANK"] = "0" +# dist.init_process_group(backend="nccl", world_size=1, rank=0) + + +if __name__ == "__main__": + CUDA_ALLOCATOR_CONFIG_FOR_DYNAMICALLY_SIZED_DATA.set_env() + # configure_non_distributed_distributed_training_if_needed() + tconf = BiencoderTrainerConfig( + type="biencoder", + model=mconf, + data=dconf, + scheduler=sconf, + optimizer=oconf, + logger=lconf, + checkpoint=cconf, + wandb=wconf, + deepspeed=dsconf, + loss_log_interval=0, + eval_frequency=300, + use_in_batch_negatives=True, + loss_temperature=0.02, + overfit_first_batch=False, + mrl_dim=256, + ) + trainer = BiencoderTrainer(config=tconf) + trainer.train() diff --git a/projects/arctic_embed/examples/finetune_models/sync.sh b/projects/arctic_embed/examples/finetune_models/sync.sh new file mode 100644 index 00000000..14386127 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/sync.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# This script continuously synchronizes a local directory to an AWS S3 bucket +# at a set interval. + +# Exit immediately if a command exits with a non-zero status. +set -e + +# --- Configuration --- + +# The local source directory. +SOURCE_DIR="/notebook/ArcticTraining/projects/arctic_embed/examples/finetune_models/checkpoints" + +# The destination S3 bucket and path. +S3_DESTINATION="s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/pyu/arctic_training_checkpoints" + +# The sync interval in seconds (30 minutes = 30 * 60 = 1800 seconds). +SYNC_INTERVAL_SECONDS=1800 + +# --- Main Logic --- + +echo "🚀 Starting continuous sync script. Will sync every 30 minutes." +echo " Press [CTRL+C] to stop the script." + +# Infinite loop to run the sync command periodically. +while true +do + echo "" + echo "▶️ Starting sync at $(date)..." + + # Check if the source directory exists before attempting to sync + if [ ! -d "$SOURCE_DIR" ]; then + echo "⚠️ Warning: Source directory '$SOURCE_DIR' not found. Skipping this cycle." + else + # Execute the AWS S3 sync command. + aws s3 sync "$SOURCE_DIR" "$S3_DESTINATION" + echo "✅ Sync completed successfully." + fi + + echo "💤 Sleeping for ${SYNC_INTERVAL_SECONDS} seconds (30 minutes) until the next run." + sleep $SYNC_INTERVAL_SECONDS +done \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/train-log-metrics.jsonl b/projects/arctic_embed/examples/finetune_models/train-log-metrics.jsonl new file mode 100644 index 00000000..3d278bcd --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/train-log-metrics.jsonl @@ -0,0 +1,44664 @@ +{"epoch": 0, "iter": 1, "iter_tflops": 5.220628877738378, "iter_time": 3.951840667724609, "loss": 2.8207898139953613, "lr": 6.000000000000001e-08, "seqlen": 8192.0, "step_tflops": 10.501634040546572, "step_time": 1.9645603179931639} +{"epoch": 0, "iter": 2, "iter_tflops": 12.832949164218737, "iter_time": 1.607665802001953, "loss": 2.7725331783294678, "lr": 1.2000000000000002e-07, "seqlen": 8192.0, "step_tflops": 15.135805919138113, "step_time": 1.3630654106140137} +{"epoch": 0, "iter": 3, "iter_tflops": 19.506363372454004, "iter_time": 1.057659652709961, "loss": 2.766493797302246, "lr": 1.8e-07, "seqlen": 8192.0, "step_tflops": 23.374325042485587, "step_time": 0.8826391124725342} +{"epoch": 0, "iter": 4, "iter_tflops": 36.483371892199095, "iter_time": 0.5654930572509764, "loss": 2.817659854888916, "lr": 2.4000000000000003e-07, "seqlen": 8192.0, "step_tflops": 39.70572965774749, "step_time": 0.5195999088287353} +{"epoch": 0, "iter": 5, "iter_tflops": 16.07491106205264, "iter_time": 0.9910104980468749, "loss": 2.4057180881500244, "lr": 3.0000000000000004e-07, "seqlen": 6368.0, "step_tflops": 16.972731003306787, "step_time": 0.9385882339477539} +{"epoch": 0, "iter": 6, "iter_tflops": 15.228023712538445, "iter_time": 1.0461242980957033, "loss": 1.9717350006103516, "lr": 3.6e-07, "seqlen": 6368.0, "step_tflops": 19.782174355299727, "step_time": 0.8052909317016602} +{"epoch": 0, "iter": 7, "iter_tflops": 22.86282130785579, "iter_time": 0.6967821426391603, "loss": 2.1223645210266113, "lr": 4.2e-07, "seqlen": 6368.0, "step_tflops": 24.608355077394183, "step_time": 0.6473575973510742} +{"epoch": 0, "iter": 8, "iter_tflops": 25.668184023551948, "iter_time": 0.6206284637451172, "loss": 2.2820732593536377, "lr": 4.800000000000001e-07, "seqlen": 6368.0, "step_tflops": 27.505787528318912, "step_time": 0.5791655883789063} +{"epoch": 0, "iter": 9, "iter_tflops": 25.492010661460526, "iter_time": 0.8093160552978516, "loss": 2.0137274265289307, "lr": 5.4e-07, "seqlen": 8192.0, "step_tflops": 27.452958208659858, "step_time": 0.7515071182250976} +{"epoch": 0, "iter": 10, "iter_tflops": 15.01193244561392, "iter_time": 1.3743129730224608, "loss": 1.9096287488937378, "lr": 6.000000000000001e-07, "seqlen": 8192.0, "step_tflops": 19.2293359910186, "step_time": 1.0728968238830567} +{"epoch": 0, "iter": 11, "iter_tflops": 51.17772186904939, "iter_time": 0.4031264533996582, "loss": 1.8901807069778442, "lr": 6.599999999999999e-07, "seqlen": 8192.0, "step_tflops": 55.734995741471614, "step_time": 0.3701640815734863} +{"epoch": 0, "iter": 12, "iter_tflops": 51.46642220589452, "iter_time": 0.400865119934082, "loss": 1.9388353824615479, "lr": 7.2e-07, "seqlen": 8192.0, "step_tflops": 55.759970312890346, "step_time": 0.36999828720092776} +{"epoch": 0, "iter": 13, "iter_tflops": 45.211731822146234, "iter_time": 0.4563216819763184, "loss": 1.7255266904830933, "lr": 7.799999999999999e-07, "seqlen": 8192.0, "step_tflops": 49.29043574925755, "step_time": 0.4185618000030517} +{"epoch": 0, "iter": 14, "iter_tflops": 36.9040076309515, "iter_time": 0.559047508239746, "loss": 1.5264889001846313, "lr": 8.4e-07, "seqlen": 8192.0, "step_tflops": 40.84434209632606, "step_time": 0.5051150894165038} +{"epoch": 0, "iter": 15, "iter_tflops": 40.35325723989705, "iter_time": 0.5112621612548828, "loss": 2.124992609024048, "lr": 9e-07, "seqlen": 8192.0, "step_tflops": 43.88993924934041, "step_time": 0.4700642986297608} +{"epoch": 0, "iter": 16, "iter_tflops": 45.061388932829054, "iter_time": 0.45784415435791015, "loss": 1.9679317474365234, "lr": 9.600000000000001e-07, "seqlen": 8192.0, "step_tflops": 49.39475433854267, "step_time": 0.4176778240203857} +{"epoch": 0, "iter": 17, "iter_tflops": 26.896403863059696, "iter_time": 0.7670576934814454, "loss": 2.1431140899658203, "lr": 1.0200000000000002e-06, "seqlen": 8192.0, "step_tflops": 28.829681884532565, "step_time": 0.7156198806762697} +{"epoch": 0, "iter": 18, "iter_tflops": 9.1319683553774, "iter_time": 2.2592164916992186, "loss": 2.190006732940674, "lr": 1.08e-06, "seqlen": 8192.0, "step_tflops": 11.363951667350642, "step_time": 1.8154858551025392} +{"epoch": 0, "iter": 19, "iter_tflops": 14.608394394954004, "iter_time": 1.412276596069336, "loss": 2.515913724899292, "lr": 1.14e-06, "seqlen": 8192.0, "step_tflops": 16.678618339108073, "step_time": 1.2369785728454592} +{"epoch": 0, "iter": 20, "iter_tflops": 28.695727414074714, "iter_time": 0.7189604644775391, "loss": 2.0843515396118164, "lr": 1.2000000000000002e-06, "seqlen": 8192.0, "step_tflops": 35.81452783660839, "step_time": 0.5760537624359131} +{"epoch": 0, "iter": 21, "iter_tflops": 13.001469899555698, "iter_time": 1.1371968994140624, "loss": 1.883620023727417, "lr": 1.26e-06, "seqlen": 5920.0, "step_tflops": 13.729673468248016, "step_time": 1.076881492614746} +{"epoch": 0, "iter": 22, "iter_tflops": 10.069218414327215, "iter_time": 1.468359375, "loss": 2.205829620361328, "lr": 1.3199999999999999e-06, "seqlen": 5920.0, "step_tflops": 12.229983597793726, "step_time": 1.2089330406188965} +{"epoch": 0, "iter": 23, "iter_tflops": 24.311286451658056, "iter_time": 0.6081632614135741, "loss": 2.2484474182128906, "lr": 1.38e-06, "seqlen": 5920.0, "step_tflops": 26.001029337201818, "step_time": 0.5686402282714843} +{"epoch": 0, "iter": 24, "iter_tflops": 24.717073092040994, "iter_time": 0.5981788864135742, "loss": 2.1860392093658447, "lr": 1.44e-06, "seqlen": 5920.0, "step_tflops": 26.466074121472538, "step_time": 0.5586484489440918} +{"epoch": 0, "iter": 25, "iter_tflops": 17.766768194294777, "iter_time": 1.1612181396484373, "loss": 2.3770501613616943, "lr": 1.5e-06, "seqlen": 8192.0, "step_tflops": 19.177950694535433, "step_time": 1.0757715377807617} +{"epoch": 0, "iter": 26, "iter_tflops": 22.295154499829778, "iter_time": 0.9253622131347655, "loss": 2.6436374187469482, "lr": 1.5599999999999999e-06, "seqlen": 8192.0, "step_tflops": 24.726891902097126, "step_time": 0.834358543395996} +{"epoch": 0, "iter": 27, "iter_tflops": 41.485023428635934, "iter_time": 0.4973142547607422, "loss": 2.5539233684539795, "lr": 1.62e-06, "seqlen": 8192.0, "step_tflops": 44.41279998620386, "step_time": 0.4645303497314453} +{"epoch": 0, "iter": 28, "iter_tflops": 43.79046353698518, "iter_time": 0.47113211059570315, "loss": 2.4250786304473877, "lr": 1.68e-06, "seqlen": 8192.0, "step_tflops": 46.85676711389287, "step_time": 0.44030125808715814} +{"epoch": 0, "iter": 29, "iter_tflops": 46.3049184806271, "iter_time": 0.44554864120483395, "loss": 1.4379181861877441, "lr": 1.74e-06, "seqlen": 8192.0, "step_tflops": 50.732734281220516, "step_time": 0.4066623611450195} +{"epoch": 0, "iter": 30, "iter_tflops": 47.84476390245131, "iter_time": 0.43120901489257807, "loss": 1.355305790901184, "lr": 1.8e-06, "seqlen": 8192.0, "step_tflops": 51.97146979028509, "step_time": 0.3969695987701416} +{"epoch": 0, "iter": 31, "iter_tflops": 55.27426452619817, "iter_time": 0.3732495346069336, "loss": 0.9588444232940674, "lr": 1.86e-06, "seqlen": 8192.0, "step_tflops": 59.756909139059054, "step_time": 0.34525034523010256} +{"epoch": 0, "iter": 32, "iter_tflops": 53.54859652565837, "iter_time": 0.38527795028686523, "loss": 1.523586392402649, "lr": 1.9200000000000003e-06, "seqlen": 8192.0, "step_tflops": 57.903786942105675, "step_time": 0.35629955482482917} +{"epoch": 0, "iter": 33, "iter_tflops": 41.29895661516809, "iter_time": 0.49955483627319336, "loss": 2.4580118656158447, "lr": 1.98e-06, "seqlen": 8192.0, "step_tflops": 44.42086275483478, "step_time": 0.46444603347778324} +{"epoch": 0, "iter": 34, "iter_tflops": 13.981404523192518, "iter_time": 1.4756095123291018, "loss": 1.9120484590530396, "lr": 2.0400000000000004e-06, "seqlen": 8192.0, "step_tflops": 16.142157600668032, "step_time": 1.2780877265930177} +{"epoch": 0, "iter": 35, "iter_tflops": 15.822558272347285, "iter_time": 1.3039037780761717, "loss": 2.4670910835266113, "lr": 2.1000000000000002e-06, "seqlen": 8192.0, "step_tflops": 18.69933495341803, "step_time": 1.1033062705993655} +{"epoch": 0, "iter": 36, "iter_tflops": 17.04658799002689, "iter_time": 1.2102770080566407, "loss": 2.353423833847046, "lr": 2.16e-06, "seqlen": 8192.0, "step_tflops": 21.929457089132278, "step_time": 0.940793628692627} +{"epoch": 0, "iter": 37, "iter_tflops": 11.601392648903058, "iter_time": 1.355495788574219, "loss": 2.0279979705810547, "lr": 2.22e-06, "seqlen": 6288.0, "step_tflops": 21.70815698969879, "step_time": 0.7244115142822265} +{"epoch": 0, "iter": 38, "iter_tflops": 7.708725502231588, "iter_time": 2.039979095458984, "loss": 1.7830756902694702, "lr": 2.28e-06, "seqlen": 6288.0, "step_tflops": 9.121222499966384, "step_time": 1.724071403503418} +{"epoch": 0, "iter": 39, "iter_tflops": 9.87213062718834, "iter_time": 1.5929326171874998, "loss": 2.2087392807006836, "lr": 2.34e-06, "seqlen": 6288.0, "step_tflops": 12.080095434054096, "step_time": 1.3017810134887695} +{"epoch": 0, "iter": 40, "iter_tflops": 23.376337828696588, "iter_time": 0.6727161026000976, "loss": 2.147340774536133, "lr": 2.4000000000000003e-06, "seqlen": 6288.0, "step_tflops": 25.26651297947484, "step_time": 0.6223905487060547} +{"epoch": 0, "iter": 41, "iter_tflops": 18.147519446980283, "iter_time": 0.8259781951904296, "loss": 1.5860844850540161, "lr": 2.46e-06, "seqlen": 6000.0, "step_tflops": 19.249602267498386, "step_time": 0.778689094543457} +{"epoch": 0, "iter": 42, "iter_tflops": 11.822396502325889, "iter_time": 1.2678863677978516, "loss": 1.8405208587646484, "lr": 2.52e-06, "seqlen": 6000.0, "step_tflops": 13.316051611571636, "step_time": 1.1256681632995604} +{"epoch": 0, "iter": 43, "iter_tflops": 26.989564792298467, "iter_time": 0.5553796615600586, "loss": 2.0864109992980957, "lr": 2.58e-06, "seqlen": 6000.0, "step_tflops": 28.790236720797452, "step_time": 0.5206436996459961} +{"epoch": 0, "iter": 44, "iter_tflops": 28.1633053303921, "iter_time": 0.532233528137207, "loss": 2.2413275241851807, "lr": 2.6399999999999997e-06, "seqlen": 6000.0, "step_tflops": 29.6979854776275, "step_time": 0.5047297019958495} +{"epoch": 0, "iter": 45, "iter_tflops": 25.02712548728976, "iter_time": 0.6381625061035157, "loss": 0.29722458124160767, "lr": 2.7e-06, "seqlen": 6384.0, "step_tflops": 27.0214866439471, "step_time": 0.59106196975708} +{"epoch": 0, "iter": 46, "iter_tflops": 14.085932687093734, "iter_time": 1.1338527221679688, "loss": 0.3547014594078064, "lr": 2.76e-06, "seqlen": 6384.0, "step_tflops": 17.12473353449062, "step_time": 0.9326494388580322} +{"epoch": 0, "iter": 47, "iter_tflops": 32.60114323417193, "iter_time": 0.4899022407531739, "loss": 0.3115774691104889, "lr": 2.82e-06, "seqlen": 6384.0, "step_tflops": 35.86737086333504, "step_time": 0.4452897644042969} +{"epoch": 0, "iter": 48, "iter_tflops": 38.151203137787604, "iter_time": 0.41863353729248043, "loss": 0.18750666081905365, "lr": 2.88e-06, "seqlen": 6384.0, "step_tflops": 42.05719590866395, "step_time": 0.37975363731384276} +{"epoch": 0, "iter": 49, "iter_tflops": 30.896809518757486, "iter_time": 0.667741874694824, "loss": 0.2781710922718048, "lr": 2.9400000000000002e-06, "seqlen": 8192.0, "step_tflops": 33.252091484066646, "step_time": 0.6204449882507324} +{"epoch": 0, "iter": 50, "iter_tflops": 9.814993612721814, "iter_time": 2.1019976501464845, "loss": 0.326744943857193, "lr": 3e-06, "seqlen": 8192.0, "step_tflops": 12.378583594211378, "step_time": 1.666676429748535} +{"epoch": 0, "iter": 51, "iter_tflops": 15.576880803230459, "iter_time": 1.3244688568115233, "loss": 0.23117630183696747, "lr": 3.06e-06, "seqlen": 8192.0, "step_tflops": 20.24024334232072, "step_time": 1.019310546875} +{"epoch": 0, "iter": 52, "iter_tflops": 44.856240967214255, "iter_time": 0.4599380836486816, "loss": 0.2479611337184906, "lr": 3.1199999999999998e-06, "seqlen": 8192.0, "step_tflops": 49.35220456900947, "step_time": 0.41803793144226076} +{"epoch": 0, "iter": 53, "iter_tflops": 13.22952730959655, "iter_time": 1.3905869293212891, "loss": 1.4962600469589233, "lr": 3.18e-06, "seqlen": 7328.0, "step_tflops": 13.866488003771504, "step_time": 1.3267099609375} +{"epoch": 0, "iter": 54, "iter_tflops": 13.09021034024483, "iter_time": 1.40538671875, "loss": 1.6440449953079224, "lr": 3.24e-06, "seqlen": 7328.0, "step_tflops": 16.984942391035975, "step_time": 1.0831245307922361} +{"epoch": 0, "iter": 55, "iter_tflops": 32.649823526275696, "iter_time": 0.563458106994629, "loss": 1.842158555984497, "lr": 3.3e-06, "seqlen": 7328.0, "step_tflops": 34.73710456020866, "step_time": 0.5296010704040527} +{"epoch": 0, "iter": 56, "iter_tflops": 31.84250364681085, "iter_time": 0.5777437591552734, "loss": 1.8163195848464966, "lr": 3.36e-06, "seqlen": 7328.0, "step_tflops": 33.71064046480197, "step_time": 0.5457270317077636} +{"epoch": 0, "iter": 57, "iter_tflops": 45.48328823667999, "iter_time": 0.45359722900390625, "loss": 1.5982441902160645, "lr": 3.4200000000000003e-06, "seqlen": 8192.0, "step_tflops": 50.08492361845126, "step_time": 0.41192223167419434} +{"epoch": 0, "iter": 58, "iter_tflops": 49.87130194251146, "iter_time": 0.4136866836547851, "loss": 1.2452499866485596, "lr": 3.48e-06, "seqlen": 8192.0, "step_tflops": 54.24387123977976, "step_time": 0.3803396224975586} +{"epoch": 0, "iter": 59, "iter_tflops": 50.57829706323982, "iter_time": 0.4079040756225586, "loss": 1.9573177099227905, "lr": 3.54e-06, "seqlen": 8192.0, "step_tflops": 54.38737486796465, "step_time": 0.37933607864379887} +{"epoch": 0, "iter": 60, "iter_tflops": 49.898111849475896, "iter_time": 0.41346441268920897, "loss": 1.4735437631607056, "lr": 3.6e-06, "seqlen": 8192.0, "step_tflops": 53.25405368753293, "step_time": 0.38740888404846185} +{"epoch": 0, "iter": 61, "iter_tflops": 39.937173069569624, "iter_time": 0.5165887298583984, "loss": 1.9838145971298218, "lr": 3.66e-06, "seqlen": 8192.0, "step_tflops": 43.237248721109886, "step_time": 0.4771601829528809} +{"epoch": 0, "iter": 62, "iter_tflops": 39.65166078525001, "iter_time": 0.5203084335327148, "loss": 1.7181293964385986, "lr": 3.72e-06, "seqlen": 8192.0, "step_tflops": 42.63724802808538, "step_time": 0.48387488555908204} +{"epoch": 0, "iter": 63, "iter_tflops": 44.1581420690082, "iter_time": 0.46720927429199216, "loss": 2.0783543586730957, "lr": 3.7800000000000002e-06, "seqlen": 8192.0, "step_tflops": 47.23570340366453, "step_time": 0.4367690544128418} +{"epoch": 0, "iter": 64, "iter_tflops": 43.17229493328126, "iter_time": 0.4778780822753906, "loss": 2.0055408477783203, "lr": 3.8400000000000005e-06, "seqlen": 8192.0, "step_tflops": 46.18028877025232, "step_time": 0.44675107192993163} +{"epoch": 0, "iter": 65, "iter_tflops": 44.39196882746416, "iter_time": 0.4647483329772949, "loss": 2.154197931289673, "lr": 3.9e-06, "seqlen": 8192.0, "step_tflops": 48.27950357784708, "step_time": 0.4273261318206787} +{"epoch": 0, "iter": 66, "iter_tflops": 40.70417505214552, "iter_time": 0.5068544807434082, "loss": 1.9318841695785522, "lr": 3.96e-06, "seqlen": 8192.0, "step_tflops": 43.54005134979756, "step_time": 0.4738417358398438} +{"epoch": 0, "iter": 67, "iter_tflops": 41.66225297477214, "iter_time": 0.49519869995117194, "loss": 2.0264110565185547, "lr": 4.0200000000000005e-06, "seqlen": 8192.0, "step_tflops": 44.80598669223631, "step_time": 0.46045394897460945} +{"epoch": 0, "iter": 68, "iter_tflops": 46.0994352419507, "iter_time": 0.44753462600708, "loss": 2.0812129974365234, "lr": 4.080000000000001e-06, "seqlen": 8192.0, "step_tflops": 49.645938446662136, "step_time": 0.41556457901000976} +{"epoch": 0, "iter": 69, "iter_tflops": 56.42461472348962, "iter_time": 0.3656399536132812, "loss": 0.35367435216903687, "lr": 4.14e-06, "seqlen": 8192.0, "step_tflops": 62.473767462454575, "step_time": 0.3302361030578613} +{"epoch": 0, "iter": 70, "iter_tflops": 53.721768631031935, "iter_time": 0.38403600692749024, "loss": 0.2572857737541199, "lr": 4.2000000000000004e-06, "seqlen": 8192.0, "step_tflops": 59.475238766184134, "step_time": 0.34688542556762697} +{"epoch": 0, "iter": 71, "iter_tflops": 55.965412601623996, "iter_time": 0.36864006805419924, "loss": 0.25195181369781494, "lr": 4.26e-06, "seqlen": 8192.0, "step_tflops": 61.13903888359602, "step_time": 0.337445499420166} +{"epoch": 0, "iter": 72, "iter_tflops": 58.442561655615094, "iter_time": 0.35301487350463867, "loss": 0.21929271519184113, "lr": 4.32e-06, "seqlen": 8192.0, "step_tflops": 63.425992067269476, "step_time": 0.32527821540832524} +{"epoch": 0, "iter": 73, "iter_tflops": 29.228511463503885, "iter_time": 0.7058550872802735, "loss": 1.6718558073043823, "lr": 4.3799999999999996e-06, "seqlen": 8192.0, "step_tflops": 30.989015388943237, "step_time": 0.6657550506591798} +{"epoch": 0, "iter": 74, "iter_tflops": 17.6214948596906, "iter_time": 1.1707913360595703, "loss": 1.8788255453109741, "lr": 4.44e-06, "seqlen": 8192.0, "step_tflops": 21.668125592036382, "step_time": 0.9521402034759522} +{"epoch": 0, "iter": 75, "iter_tflops": 38.96140646114235, "iter_time": 0.5295264053344727, "loss": 1.948462724685669, "lr": 4.5e-06, "seqlen": 8192.0, "step_tflops": 42.01961043878346, "step_time": 0.4909872627258301} +{"epoch": 0, "iter": 76, "iter_tflops": 41.64327451514547, "iter_time": 0.4954243812561035, "loss": 1.7998366355895996, "lr": 4.56e-06, "seqlen": 8192.0, "step_tflops": 45.162631209071534, "step_time": 0.456817792892456} +{"epoch": 0, "iter": 77, "iter_tflops": 32.23913050160661, "iter_time": 0.6399395141601563, "loss": 1.9834595918655396, "lr": 4.62e-06, "seqlen": 8192.0, "step_tflops": 35.78974125051283, "step_time": 0.5764527149200439} +{"epoch": 0, "iter": 78, "iter_tflops": 37.14036785597497, "iter_time": 0.5554897460937499, "loss": 1.6661674976348877, "lr": 4.68e-06, "seqlen": 8192.0, "step_tflops": 40.33487434354895, "step_time": 0.5114951725006103} +{"epoch": 0, "iter": 79, "iter_tflops": 34.67818250617593, "iter_time": 0.5949300689697264, "loss": 1.689835786819458, "lr": 4.74e-06, "seqlen": 8192.0, "step_tflops": 37.61086113673639, "step_time": 0.548540843963623} +{"epoch": 0, "iter": 80, "iter_tflops": 37.41838750330187, "iter_time": 0.5513624420166016, "loss": 1.6854463815689087, "lr": 4.800000000000001e-06, "seqlen": 8192.0, "step_tflops": 40.22671793390319, "step_time": 0.5128704147338867} +{"epoch": 0, "iter": 81, "iter_tflops": 36.808127287905954, "iter_time": 0.5605037536621095, "loss": 1.7249133586883545, "lr": 4.86e-06, "seqlen": 8192.0, "step_tflops": 40.50357334217683, "step_time": 0.5093647747039795} +{"epoch": 0, "iter": 82, "iter_tflops": 43.219838425652426, "iter_time": 0.47735239791870115, "loss": 1.6479722261428833, "lr": 4.92e-06, "seqlen": 8192.0, "step_tflops": 47.37624875635944, "step_time": 0.4354733448028564} +{"epoch": 0, "iter": 83, "iter_tflops": 51.07534950349223, "iter_time": 0.403934455871582, "loss": 1.4253069162368774, "lr": 4.980000000000001e-06, "seqlen": 8192.0, "step_tflops": 55.31981334712447, "step_time": 0.372942211151123} +{"epoch": 0, "iter": 84, "iter_tflops": 49.805873725070846, "iter_time": 0.41423012924194336, "loss": 1.6995408535003662, "lr": 5.04e-06, "seqlen": 8192.0, "step_tflops": 53.47707608841716, "step_time": 0.38579322242736813} +{"epoch": 0, "iter": 85, "iter_tflops": 45.11109600462668, "iter_time": 0.45733966445922847, "loss": 0.8864627480506897, "lr": 5.1e-06, "seqlen": 8192.0, "step_tflops": 48.9215396621218, "step_time": 0.4217179927825928} +{"epoch": 0, "iter": 86, "iter_tflops": 34.88295672780117, "iter_time": 0.5914376373291015, "loss": 0.9850254058837891, "lr": 5.16e-06, "seqlen": 8192.0, "step_tflops": 41.526735249198964, "step_time": 0.4968147239685059} +{"epoch": 0, "iter": 87, "iter_tflops": 40.845617317960375, "iter_time": 0.5050993194580079, "loss": 0.9061932563781738, "lr": 5.22e-06, "seqlen": 8192.0, "step_tflops": 44.67284998381151, "step_time": 0.4618262214660645} +{"epoch": 0, "iter": 88, "iter_tflops": 40.29778206902506, "iter_time": 0.5119659805297851, "loss": 1.2238596677780151, "lr": 5.279999999999999e-06, "seqlen": 8192.0, "step_tflops": 43.91519722057142, "step_time": 0.4697939395904541} +{"epoch": 0, "iter": 89, "iter_tflops": 23.7905453436501, "iter_time": 0.8671971664428711, "loss": 0.707321047782898, "lr": 5.34e-06, "seqlen": 8192.0, "step_tflops": 25.64062055726671, "step_time": 0.8046253585815429} +{"epoch": 0, "iter": 90, "iter_tflops": 35.61660933894612, "iter_time": 0.5792548446655275, "loss": 0.7302997708320618, "lr": 5.4e-06, "seqlen": 8192.0, "step_tflops": 43.90661723497516, "step_time": 0.4698857440948486} +{"epoch": 0, "iter": 91, "iter_tflops": 49.00288760407692, "iter_time": 0.4210179138183594, "loss": 0.6139833331108093, "lr": 5.46e-06, "seqlen": 8192.0, "step_tflops": 53.04689524603528, "step_time": 0.38892179107666014} +{"epoch": 0, "iter": 92, "iter_tflops": 54.03248049802866, "iter_time": 0.3818276214599609, "loss": 1.0240143537521362, "lr": 5.52e-06, "seqlen": 8192.0, "step_tflops": 58.82536338082146, "step_time": 0.35071765518188475} +{"epoch": 0, "iter": 93, "iter_tflops": 47.28564977619315, "iter_time": 0.4363077087402344, "loss": 0.8239365220069885, "lr": 5.58e-06, "seqlen": 8192.0, "step_tflops": 51.78324427903835, "step_time": 0.3984125328063965} +{"epoch": 0, "iter": 94, "iter_tflops": 36.52104774035895, "iter_time": 0.5649096832275391, "loss": 0.731926441192627, "lr": 5.64e-06, "seqlen": 8192.0, "step_tflops": 41.780479071707, "step_time": 0.4937974376678467} +{"epoch": 0, "iter": 95, "iter_tflops": 41.53434612151484, "iter_time": 0.4967236862182617, "loss": 0.7950512766838074, "lr": 5.7000000000000005e-06, "seqlen": 8192.0, "step_tflops": 45.66083349519991, "step_time": 0.4518334846496582} +{"epoch": 0, "iter": 96, "iter_tflops": 44.17784093345168, "iter_time": 0.46700094604492187, "loss": 0.6576852798461914, "lr": 5.76e-06, "seqlen": 8192.0, "step_tflops": 48.427883679209955, "step_time": 0.4260168304443359} +{"epoch": 0, "iter": 97, "iter_tflops": 34.63234960045009, "iter_time": 0.5957174072265625, "loss": 1.3902592658996582, "lr": 5.82e-06, "seqlen": 8192.0, "step_tflops": 38.18862342316255, "step_time": 0.5402418746948242} +{"epoch": 0, "iter": 98, "iter_tflops": 36.78232580967982, "iter_time": 0.5608969268798828, "loss": 1.0605261325836182, "lr": 5.8800000000000005e-06, "seqlen": 8192.0, "step_tflops": 40.10465363444384, "step_time": 0.5144314098358155} +{"epoch": 0, "iter": 99, "iter_tflops": 50.70740676249902, "iter_time": 0.40686548233032227, "loss": 1.274208903312683, "lr": 5.940000000000001e-06, "seqlen": 8192.0, "step_tflops": 54.83721119870335, "step_time": 0.3762243385314941} +{"epoch": 0, "iter": 100, "iter_tflops": 50.91978948730172, "iter_time": 0.4051684761047363, "loss": 1.3940032720565796, "lr": 6e-06, "seqlen": 8192.0, "step_tflops": 54.54838888763426, "step_time": 0.37821636772155764} +{"epoch": 0, "iter": 101, "iter_tflops": 36.226326832419026, "iter_time": 0.5695055313110352, "loss": 1.6927621364593506, "lr": 6.0600000000000004e-06, "seqlen": 8192.0, "step_tflops": 38.881326913080095, "step_time": 0.5306170120239257} +{"epoch": 0, "iter": 102, "iter_tflops": 38.257528008152605, "iter_time": 0.5392688598632813, "loss": 1.6380274295806885, "lr": 6.12e-06, "seqlen": 8192.0, "step_tflops": 41.961462994304384, "step_time": 0.4916676406860352} +{"epoch": 0, "iter": 103, "iter_tflops": 39.84581823613085, "iter_time": 0.5177731170654296, "loss": 1.6107228994369507, "lr": 6.18e-06, "seqlen": 8192.0, "step_tflops": 43.543096900918144, "step_time": 0.47380859375} +{"epoch": 0, "iter": 104, "iter_tflops": 40.020628484446256, "iter_time": 0.5155114822387695, "loss": 1.3035483360290527, "lr": 6.2399999999999995e-06, "seqlen": 8192.0, "step_tflops": 43.520801290699, "step_time": 0.47405132484436036} +{"epoch": 0, "iter": 105, "iter_tflops": 27.69224639931647, "iter_time": 0.7450133590698242, "loss": 0.8742462992668152, "lr": 6.3e-06, "seqlen": 8192.0, "step_tflops": 29.611538560092416, "step_time": 0.6967248077392578} +{"epoch": 0, "iter": 106, "iter_tflops": 13.99813763514202, "iter_time": 1.4738455963134764, "loss": 1.0239039659500122, "lr": 6.36e-06, "seqlen": 8192.0, "step_tflops": 17.436190325724805, "step_time": 1.1832340164184572} +{"epoch": 0, "iter": 107, "iter_tflops": 23.46894607141047, "iter_time": 0.8790805282592773, "loss": 1.0525261163711548, "lr": 6.42e-06, "seqlen": 8192.0, "step_tflops": 28.75864974908549, "step_time": 0.7173874187469482} +{"epoch": 0, "iter": 108, "iter_tflops": 53.29819160942531, "iter_time": 0.38708805847167965, "loss": 0.894136369228363, "lr": 6.48e-06, "seqlen": 8192.0, "step_tflops": 57.50673486971215, "step_time": 0.3587596054077149} +{"epoch": 0, "iter": 109, "iter_tflops": 21.926650767228157, "iter_time": 0.6538323440551759, "loss": 1.2570337057113647, "lr": 6.54e-06, "seqlen": 5744.0, "step_tflops": 23.23685256863123, "step_time": 0.6169662361145019} +{"epoch": 0, "iter": 110, "iter_tflops": 8.734781915655901, "iter_time": 1.6412949523925782, "loss": 1.4803884029388428, "lr": 6.6e-06, "seqlen": 5744.0, "step_tflops": 10.046363731208249, "step_time": 1.4270191535949706} +{"epoch": 0, "iter": 111, "iter_tflops": 25.993157680883222, "iter_time": 0.5515433578491211, "loss": 1.4242593050003052, "lr": 6.660000000000001e-06, "seqlen": 5744.0, "step_tflops": 27.734619693652405, "step_time": 0.5169118461608887} +{"epoch": 0, "iter": 112, "iter_tflops": 28.41511173929696, "iter_time": 0.50453271484375, "loss": 1.5361590385437012, "lr": 6.72e-06, "seqlen": 5744.0, "step_tflops": 30.040490542964665, "step_time": 0.4772343330383301} +{"epoch": 0, "iter": 113, "iter_tflops": 22.194608989449662, "iter_time": 0.9295542678833009, "loss": 1.5135551691055298, "lr": 6.78e-06, "seqlen": 8192.0, "step_tflops": 23.2708912756851, "step_time": 0.886562240600586} +{"epoch": 0, "iter": 114, "iter_tflops": 15.378817409915278, "iter_time": 1.3415266571044921, "loss": 1.5542088747024536, "lr": 6.840000000000001e-06, "seqlen": 8192.0, "step_tflops": 18.930699853411518, "step_time": 1.0898220176696776} +{"epoch": 0, "iter": 115, "iter_tflops": 40.15388799964261, "iter_time": 0.5138006439208984, "loss": 1.4388686418533325, "lr": 6.900000000000001e-06, "seqlen": 8192.0, "step_tflops": 42.88863354248, "step_time": 0.4810387229919434} +{"epoch": 0, "iter": 116, "iter_tflops": 49.050082956011096, "iter_time": 0.42061281585693355, "loss": 1.5666215419769287, "lr": 6.96e-06, "seqlen": 8192.0, "step_tflops": 52.77376586914123, "step_time": 0.3909346466064453} +{"epoch": 0, "iter": 117, "iter_tflops": 26.337496068225647, "iter_time": 0.783335418701172, "loss": 0.025817232206463814, "lr": 7.0200000000000006e-06, "seqlen": 8192.0, "step_tflops": 28.07163132461139, "step_time": 0.7349445877075196} +{"epoch": 0, "iter": 118, "iter_tflops": 15.635305606156185, "iter_time": 1.3195196838378906, "loss": 0.024415943771600723, "lr": 7.08e-06, "seqlen": 8192.0, "step_tflops": 22.283813350571474, "step_time": 0.9258331680297851} +{"epoch": 0, "iter": 119, "iter_tflops": 48.86648273891614, "iter_time": 0.4221931343078613, "loss": 0.057956330478191376, "lr": 7.14e-06, "seqlen": 8192.0, "step_tflops": 54.10342711865665, "step_time": 0.38132692527770995} +{"epoch": 0, "iter": 120, "iter_tflops": 48.776516116339295, "iter_time": 0.4229718551635742, "loss": 0.04187269136309624, "lr": 7.2e-06, "seqlen": 8192.0, "step_tflops": 53.95591631458772, "step_time": 0.38236944007873536} +{"epoch": 0, "iter": 121, "iter_tflops": 25.591871837536537, "iter_time": 0.8061580505371094, "loss": 1.4178147315979004, "lr": 7.26e-06, "seqlen": 8192.0, "step_tflops": 27.09882149352779, "step_time": 0.7613280715942383} +{"epoch": 0, "iter": 122, "iter_tflops": 13.697935140038961, "iter_time": 1.506146240234375, "loss": 1.4405244588851929, "lr": 7.32e-06, "seqlen": 8192.0, "step_tflops": 19.043850193502635, "step_time": 1.0833467655181885} +{"epoch": 0, "iter": 123, "iter_tflops": 40.785182848846105, "iter_time": 0.5058477630615233, "loss": 1.609618067741394, "lr": 7.3800000000000005e-06, "seqlen": 8192.0, "step_tflops": 44.592010854641494, "step_time": 0.46266344833374023} +{"epoch": 0, "iter": 124, "iter_tflops": 40.142879609359, "iter_time": 0.5139415435791016, "loss": 1.3385896682739258, "lr": 7.44e-06, "seqlen": 8192.0, "step_tflops": 43.74248153692703, "step_time": 0.47164890480041505} +{"epoch": 0, "iter": 125, "iter_tflops": 28.611354274017224, "iter_time": 0.7210806350708008, "loss": 0.022349435836076736, "lr": 7.5e-06, "seqlen": 8192.0, "step_tflops": 31.246660605835757, "step_time": 0.6602655487060547} +{"epoch": 0, "iter": 126, "iter_tflops": 23.826644412162054, "iter_time": 0.8658833007812501, "loss": 0.041080400347709656, "lr": 7.5600000000000005e-06, "seqlen": 8192.0, "step_tflops": 29.497951995910448, "step_time": 0.699407657623291} +{"epoch": 0, "iter": 127, "iter_tflops": 58.2855641092178, "iter_time": 0.3539657516479492, "loss": 0.018141819164156914, "lr": 7.62e-06, "seqlen": 8192.0, "step_tflops": 63.96496819116067, "step_time": 0.3225373840332031} +{"epoch": 0, "iter": 128, "iter_tflops": 53.30199308606087, "iter_time": 0.3870604515075684, "loss": 0.02601616643369198, "lr": 7.680000000000001e-06, "seqlen": 8192.0, "step_tflops": 58.578934477411174, "step_time": 0.3521930484771729} +{"epoch": 0, "iter": 129, "iter_tflops": 44.377090984594304, "iter_time": 0.46490414428710936, "loss": 0.5867365002632141, "lr": 7.74e-06, "seqlen": 8192.0, "step_tflops": 48.33647923967882, "step_time": 0.42682242965698247} +{"epoch": 0, "iter": 130, "iter_tflops": 14.511855820817237, "iter_time": 1.421671615600586, "loss": 0.39221858978271484, "lr": 7.8e-06, "seqlen": 8192.0, "step_tflops": 18.980457612730834, "step_time": 1.086965021133423} +{"epoch": 0, "iter": 131, "iter_tflops": 39.52261540995723, "iter_time": 0.5220072937011719, "loss": 0.422559916973114, "lr": 7.860000000000001e-06, "seqlen": 8192.0, "step_tflops": 43.50406634678354, "step_time": 0.47423368072509764} +{"epoch": 0, "iter": 132, "iter_tflops": 44.878376795934756, "iter_time": 0.45971122360229494, "loss": 0.36117902398109436, "lr": 7.92e-06, "seqlen": 8192.0, "step_tflops": 48.906344967917576, "step_time": 0.42184901618957515} +{"epoch": 0, "iter": 133, "iter_tflops": 22.78702661518176, "iter_time": 0.9053876953125, "loss": 1.5130248069763184, "lr": 7.98e-06, "seqlen": 8192.0, "step_tflops": 24.377596991937747, "step_time": 0.8463136672973633} +{"epoch": 0, "iter": 134, "iter_tflops": 40.80980877065712, "iter_time": 0.5055425186157226, "loss": 1.2983276844024658, "lr": 8.040000000000001e-06, "seqlen": 8192.0, "step_tflops": 44.01732957847604, "step_time": 0.4687038879394531} +{"epoch": 0, "iter": 135, "iter_tflops": 46.07981415250248, "iter_time": 0.4477251892089844, "loss": 1.2557049989700317, "lr": 8.1e-06, "seqlen": 8192.0, "step_tflops": 49.92251689097443, "step_time": 0.41326228713989255} +{"epoch": 0, "iter": 136, "iter_tflops": 49.801792368172094, "iter_time": 0.41426407623291017, "loss": 0.961929202079773, "lr": 8.160000000000001e-06, "seqlen": 8192.0, "step_tflops": 53.59646295125995, "step_time": 0.3849338626861572} +{"epoch": 0, "iter": 137, "iter_tflops": 47.79818007905549, "iter_time": 0.4316292686462402, "loss": 0.9519545435905457, "lr": 8.220000000000001e-06, "seqlen": 8192.0, "step_tflops": 52.16751229366687, "step_time": 0.39547780990600584} +{"epoch": 0, "iter": 138, "iter_tflops": 15.425625583997913, "iter_time": 1.3374558715820313, "loss": 1.05486261844635, "lr": 8.28e-06, "seqlen": 8192.0, "step_tflops": 18.257199205510034, "step_time": 1.1300251083374022} +{"epoch": 0, "iter": 139, "iter_tflops": 37.166049022077566, "iter_time": 0.5551059112548827, "loss": 1.1053674221038818, "lr": 8.340000000000001e-06, "seqlen": 8192.0, "step_tflops": 40.66154494182408, "step_time": 0.5073858737945557} +{"epoch": 0, "iter": 140, "iter_tflops": 42.72519844779482, "iter_time": 0.48287882232666013, "loss": 1.0256553888320923, "lr": 8.400000000000001e-06, "seqlen": 8192.0, "step_tflops": 46.57696193312254, "step_time": 0.44294631195068357} +{"epoch": 0, "iter": 141, "iter_tflops": 19.502313777403003, "iter_time": 1.0578792724609376, "loss": 0.5490008592605591, "lr": 8.459999999999999e-06, "seqlen": 8192.0, "step_tflops": 20.721676699690967, "step_time": 0.9956285781860351} +{"epoch": 0, "iter": 142, "iter_tflops": 20.340482773198254, "iter_time": 1.0142873077392578, "loss": 0.6053564548492432, "lr": 8.52e-06, "seqlen": 8192.0, "step_tflops": 24.332819792147518, "step_time": 0.84787105178833} +{"epoch": 0, "iter": 143, "iter_tflops": 51.400861939619645, "iter_time": 0.4013764114379883, "loss": 0.8134863972663879, "lr": 8.58e-06, "seqlen": 8192.0, "step_tflops": 55.813236763969925, "step_time": 0.3696451721191406} +{"epoch": 0, "iter": 144, "iter_tflops": 50.40073527378375, "iter_time": 0.40934112167358394, "loss": 0.662963330745697, "lr": 8.64e-06, "seqlen": 8192.0, "step_tflops": 54.65720063604384, "step_time": 0.3774634132385254} +{"epoch": 0, "iter": 145, "iter_tflops": 24.595830168715455, "iter_time": 0.8388045196533201, "loss": 1.3554767370224, "lr": 8.7e-06, "seqlen": 8192.0, "step_tflops": 26.444010556091566, "step_time": 0.7801802024841308} +{"epoch": 0, "iter": 146, "iter_tflops": 10.379718362876673, "iter_time": 1.9876351928710938, "loss": 1.630285620689392, "lr": 8.759999999999999e-06, "seqlen": 8192.0, "step_tflops": 11.851488944215946, "step_time": 1.7408018188476564} +{"epoch": 0, "iter": 147, "iter_tflops": 15.566179999353187, "iter_time": 1.3253793487548828, "loss": 1.5885130167007446, "lr": 8.82e-06, "seqlen": 8192.0, "step_tflops": 20.795401647968056, "step_time": 0.9920988235473633} +{"epoch": 0, "iter": 148, "iter_tflops": 35.23844214101469, "iter_time": 0.5854712142944336, "loss": 1.4798117876052856, "lr": 8.88e-06, "seqlen": 8192.0, "step_tflops": 43.37135630863085, "step_time": 0.4756847667694092} +{"epoch": 0, "iter": 149, "iter_tflops": 22.286908140823016, "iter_time": 0.7387005462646484, "loss": 1.345321536064148, "lr": 8.939999999999999e-06, "seqlen": 6576.0, "step_tflops": 23.635802352621834, "step_time": 0.6965429382324219} +{"epoch": 0, "iter": 150, "iter_tflops": 10.420831117832357, "iter_time": 1.579850112915039, "loss": 1.0980658531188965, "lr": 9e-06, "seqlen": 6576.0, "step_tflops": 12.813333799749216, "step_time": 1.284860870361328} +{"epoch": 0, "iter": 151, "iter_tflops": 24.53644606088793, "iter_time": 0.6709753799438476, "loss": 1.225976586341858, "lr": 9.06e-06, "seqlen": 6576.0, "step_tflops": 26.489077195127113, "step_time": 0.6215147132873535} +{"epoch": 0, "iter": 152, "iter_tflops": 27.930567604251642, "iter_time": 0.5894384765624999, "loss": 0.9379245042800903, "lr": 9.12e-06, "seqlen": 6576.0, "step_tflops": 29.825325090571845, "step_time": 0.5519923477172851} +{"epoch": 0, "iter": 153, "iter_tflops": 20.580341689133455, "iter_time": 1.002466033935547, "loss": 0.2994917929172516, "lr": 9.18e-06, "seqlen": 8192.0, "step_tflops": 21.910974634937673, "step_time": 0.9415872116088867} +{"epoch": 0, "iter": 154, "iter_tflops": 19.74541280198154, "iter_time": 1.0448550109863282, "loss": 0.3188997507095337, "lr": 9.24e-06, "seqlen": 8192.0, "step_tflops": 24.35222713071449, "step_time": 0.8471953468322754} +{"epoch": 0, "iter": 155, "iter_tflops": 46.540051569712006, "iter_time": 0.44329760742187496, "loss": 0.23639392852783203, "lr": 9.3e-06, "seqlen": 8192.0, "step_tflops": 50.834452208001714, "step_time": 0.40584864425659184} +{"epoch": 0, "iter": 156, "iter_tflops": 44.64318409921582, "iter_time": 0.46213311004638674, "loss": 0.31603893637657166, "lr": 9.36e-06, "seqlen": 8192.0, "step_tflops": 48.88566391608769, "step_time": 0.42202747917175293} +{"epoch": 0, "iter": 157, "iter_tflops": 13.924391995379915, "iter_time": 0.7528831253051758, "loss": 0.017670797184109688, "lr": 9.42e-06, "seqlen": 4224.0, "step_tflops": 15.088938234533384, "step_time": 0.6947765045166014} +{"epoch": 0, "iter": 158, "iter_tflops": 4.519472329595355, "iter_time": 2.3196158752441405, "loss": 0.044896770268678665, "lr": 9.48e-06, "seqlen": 4224.0, "step_tflops": 4.970241827693987, "step_time": 2.1092413864135744} +{"epoch": 0, "iter": 159, "iter_tflops": 5.814147479722919, "iter_time": 1.80309147644043, "loss": 0.015846971422433853, "lr": 9.54e-06, "seqlen": 4224.0, "step_tflops": 7.639264894417191, "step_time": 1.3723100204467773} +{"epoch": 0, "iter": 160, "iter_tflops": 22.710011709815284, "iter_time": 0.4616219444274902, "loss": 0.025845075026154518, "lr": 9.600000000000001e-06, "seqlen": 4224.0, "step_tflops": 25.34071958957272, "step_time": 0.4136993713378906} +{"epoch": 0, "iter": 161, "iter_tflops": 13.887830464879931, "iter_time": 1.2179676361083984, "loss": 1.0667738914489746, "lr": 9.66e-06, "seqlen": 6752.0, "step_tflops": 14.664779752715873, "step_time": 1.1534389419555664} +{"epoch": 0, "iter": 162, "iter_tflops": 10.951837943138344, "iter_time": 1.5444830474853513, "loss": 1.1410009860992432, "lr": 9.72e-06, "seqlen": 6752.0, "step_tflops": 14.829417638289724, "step_time": 1.1406333312988282} +{"epoch": 0, "iter": 163, "iter_tflops": 27.413487819095078, "iter_time": 0.6170294036865234, "loss": 1.095210313796997, "lr": 9.780000000000001e-06, "seqlen": 6752.0, "step_tflops": 29.441647407037085, "step_time": 0.5745238304138184} +{"epoch": 0, "iter": 164, "iter_tflops": 28.071347789133352, "iter_time": 0.6025691452026367, "loss": 0.9219949245452881, "lr": 9.84e-06, "seqlen": 6752.0, "step_tflops": 30.01738159105845, "step_time": 0.5635044479370117} +{"epoch": 0, "iter": 165, "iter_tflops": 21.283278759263926, "iter_time": 0.969356918334961, "loss": 0.7924243807792664, "lr": 9.9e-06, "seqlen": 8192.0, "step_tflops": 22.663413388497364, "step_time": 0.9103259582519532} +{"epoch": 0, "iter": 166, "iter_tflops": 23.177245646127023, "iter_time": 0.8901443176269531, "loss": 0.7439011335372925, "lr": 9.960000000000001e-06, "seqlen": 8192.0, "step_tflops": 31.099281947029436, "step_time": 0.6633945293426514} +{"epoch": 0, "iter": 167, "iter_tflops": 56.17811757139869, "iter_time": 0.3672443008422852, "loss": 0.8680049180984497, "lr": 1.002e-05, "seqlen": 8192.0, "step_tflops": 60.98416609532442, "step_time": 0.33830246162414557} +{"epoch": 0, "iter": 168, "iter_tflops": 51.009821805486965, "iter_time": 0.4044533538818359, "loss": 0.8230850696563721, "lr": 1.008e-05, "seqlen": 8192.0, "step_tflops": 55.23929041393122, "step_time": 0.37348585319519045} +{"epoch": 0, "iter": 169, "iter_tflops": 34.6857175927051, "iter_time": 0.5948008270263672, "loss": 0.9072800278663635, "lr": 1.0140000000000001e-05, "seqlen": 8192.0, "step_tflops": 37.162897838085215, "step_time": 0.5551529808044433} +{"epoch": 0, "iter": 170, "iter_tflops": 20.147268400170287, "iter_time": 1.0240144271850586, "loss": 0.6317659616470337, "lr": 1.02e-05, "seqlen": 8192.0, "step_tflops": 22.68767398411483, "step_time": 0.9093525199890138} +{"epoch": 0, "iter": 171, "iter_tflops": 40.467828360325306, "iter_time": 0.5098146934509278, "loss": 0.6234942674636841, "lr": 1.0260000000000002e-05, "seqlen": 8192.0, "step_tflops": 44.454300125755545, "step_time": 0.46409668922424324} +{"epoch": 0, "iter": 172, "iter_tflops": 38.44160903836559, "iter_time": 0.5366865234375, "loss": 0.5395563244819641, "lr": 1.032e-05, "seqlen": 8192.0, "step_tflops": 42.181867556803596, "step_time": 0.48909862709045415} +{"epoch": 0, "iter": 173, "iter_tflops": 18.020121276728506, "iter_time": 0.6737497787475586, "loss": 0.10582209378480911, "lr": 1.0379999999999999e-05, "seqlen": 4880.0, "step_tflops": 19.62055004609465, "step_time": 0.6187926788330078} +{"epoch": 0, "iter": 174, "iter_tflops": 10.865882042697079, "iter_time": 1.11735546875, "loss": 0.15950746834278107, "lr": 1.044e-05, "seqlen": 4880.0, "step_tflops": 12.940595168619957, "step_time": 0.9382143993377685} +{"epoch": 0, "iter": 175, "iter_tflops": 27.621302659895292, "iter_time": 0.4395539512634278, "loss": 0.10615313798189163, "lr": 1.05e-05, "seqlen": 4880.0, "step_tflops": 30.512125182540125, "step_time": 0.39790911483764646} +{"epoch": 0, "iter": 176, "iter_tflops": 26.56583890682588, "iter_time": 0.4570174789428711, "loss": 0.15345710515975952, "lr": 1.0559999999999999e-05, "seqlen": 4880.0, "step_tflops": 29.256147354811276, "step_time": 0.4149915084838867} +{"epoch": 0, "iter": 177, "iter_tflops": 29.697425576063527, "iter_time": 0.694709831237793, "loss": 0.20339880883693695, "lr": 1.062e-05, "seqlen": 8192.0, "step_tflops": 31.83063555565025, "step_time": 0.6481521072387695} +{"epoch": 0, "iter": 178, "iter_tflops": 20.72514552725294, "iter_time": 0.9954619369506836, "loss": 0.2871270775794983, "lr": 1.068e-05, "seqlen": 8192.0, "step_tflops": 27.384611804618377, "step_time": 0.7533827266693115} +{"epoch": 0, "iter": 179, "iter_tflops": 54.01877420918294, "iter_time": 0.38192450332641603, "loss": 0.27718281745910645, "lr": 1.074e-05, "seqlen": 8192.0, "step_tflops": 59.46864145278105, "step_time": 0.3469239082336426} +{"epoch": 0, "iter": 180, "iter_tflops": 54.19940819606696, "iter_time": 0.38065163803100577, "loss": 0.24402175843715668, "lr": 1.08e-05, "seqlen": 8192.0, "step_tflops": 58.98415609410112, "step_time": 0.3497734794616699} +{"epoch": 0, "iter": 181, "iter_tflops": 50.667110063332714, "iter_time": 0.4071890716552734, "loss": 0.32335492968559265, "lr": 1.086e-05, "seqlen": 8192.0, "step_tflops": 55.76485786026661, "step_time": 0.36996585845947266} +{"epoch": 0, "iter": 182, "iter_tflops": 49.99593739477483, "iter_time": 0.41265539932250983, "loss": 0.33137014508247375, "lr": 1.092e-05, "seqlen": 8192.0, "step_tflops": 54.59370682056442, "step_time": 0.37790241241455075} +{"epoch": 0, "iter": 183, "iter_tflops": 53.59367593210602, "iter_time": 0.3849538803100586, "loss": 0.3237469792366028, "lr": 1.098e-05, "seqlen": 8192.0, "step_tflops": 58.481576660784086, "step_time": 0.35277936553955075} +{"epoch": 0, "iter": 184, "iter_tflops": 55.49822306434821, "iter_time": 0.3717433166503906, "loss": 0.42459192872047424, "lr": 1.104e-05, "seqlen": 8192.0, "step_tflops": 60.20600731327802, "step_time": 0.3426749992370606} +{"epoch": 0, "iter": 185, "iter_tflops": 44.60776128939804, "iter_time": 0.4625000877380371, "loss": 0.4397657513618469, "lr": 1.11e-05, "seqlen": 8192.0, "step_tflops": 49.57843116018441, "step_time": 0.41613042259216315} +{"epoch": 0, "iter": 186, "iter_tflops": 48.397650574445336, "iter_time": 0.42628295516967774, "loss": 0.36135134100914, "lr": 1.116e-05, "seqlen": 8192.0, "step_tflops": 52.89945975267081, "step_time": 0.39000575065612797} +{"epoch": 0, "iter": 187, "iter_tflops": 51.932594652072765, "iter_time": 0.39726675796508787, "loss": 0.28694069385528564, "lr": 1.1220000000000001e-05, "seqlen": 8192.0, "step_tflops": 56.51826786359639, "step_time": 0.36503407287597656} +{"epoch": 0, "iter": 188, "iter_tflops": 46.74440057651609, "iter_time": 0.441359676361084, "loss": 0.3497793674468994, "lr": 1.128e-05, "seqlen": 8192.0, "step_tflops": 50.94594562033682, "step_time": 0.4049604587554932} +{"epoch": 0, "iter": 189, "iter_tflops": 40.588714184715194, "iter_time": 0.5082963066101074, "loss": 0.6381979584693909, "lr": 1.134e-05, "seqlen": 8192.0, "step_tflops": 43.75777075269865, "step_time": 0.4714841079711914} +{"epoch": 0, "iter": 190, "iter_tflops": 11.41195981430453, "iter_time": 1.8078484191894528, "loss": 0.7473300099372864, "lr": 1.1400000000000001e-05, "seqlen": 8192.0, "step_tflops": 13.839457398191678, "step_time": 1.4907443923950194} +{"epoch": 0, "iter": 191, "iter_tflops": 15.629390637621924, "iter_time": 1.320019058227539, "loss": 0.588635265827179, "lr": 1.146e-05, "seqlen": 8192.0, "step_tflops": 18.472373587635232, "step_time": 1.1168620758056642} +{"epoch": 0, "iter": 192, "iter_tflops": 18.334887991064246, "iter_time": 1.1252369537353515, "loss": 0.6152427196502686, "lr": 1.152e-05, "seqlen": 8192.0, "step_tflops": 23.254610734826617, "step_time": 0.8871829223632812} +{"epoch": 0, "iter": 193, "iter_tflops": 19.61260322967829, "iter_time": 0.826880401611328, "loss": 0.8423949480056763, "lr": 1.1580000000000001e-05, "seqlen": 6480.0, "step_tflops": 20.58830956102807, "step_time": 0.7876934814453125} +{"epoch": 0, "iter": 194, "iter_tflops": 9.966707115135584, "iter_time": 1.6271449584960938, "loss": 1.1090155839920044, "lr": 1.164e-05, "seqlen": 6480.0, "step_tflops": 11.837796261323733, "step_time": 1.3699574546813964} +{"epoch": 0, "iter": 195, "iter_tflops": 26.24231251963822, "iter_time": 0.6179820175170898, "loss": 0.9700158834457397, "lr": 1.1700000000000001e-05, "seqlen": 6480.0, "step_tflops": 28.150457604438976, "step_time": 0.576092845916748} +{"epoch": 0, "iter": 196, "iter_tflops": 26.131165893190047, "iter_time": 0.6206105499267578, "loss": 1.1193009614944458, "lr": 1.1760000000000001e-05, "seqlen": 6480.0, "step_tflops": 28.06348473208966, "step_time": 0.5778782424926757} +{"epoch": 0, "iter": 197, "iter_tflops": 24.52643884346338, "iter_time": 0.8411777038574219, "loss": 0.7258867025375366, "lr": 1.182e-05, "seqlen": 8192.0, "step_tflops": 25.956228738297757, "step_time": 0.7948417205810547} +{"epoch": 0, "iter": 198, "iter_tflops": 11.31104818076355, "iter_time": 1.8239771575927735, "loss": 0.6648061275482178, "lr": 1.1880000000000001e-05, "seqlen": 8192.0, "step_tflops": 13.986913561418794, "step_time": 1.4750283126831054} +{"epoch": 0, "iter": 199, "iter_tflops": 11.491303330038653, "iter_time": 1.7953658447265626, "loss": 0.4846198558807373, "lr": 1.1940000000000001e-05, "seqlen": 8192.0, "step_tflops": 13.264486326926397, "step_time": 1.5553631706237794} +{"epoch": 0, "iter": 200, "iter_tflops": 33.748738222725954, "iter_time": 0.6113145141601561, "loss": 0.5929960012435913, "lr": 1.2e-05, "seqlen": 8192.0, "step_tflops": 50.20073432015914, "step_time": 0.4109719467163086} +{"epoch": 0, "iter": 201, "iter_tflops": 26.581118520037833, "iter_time": 0.7029674530029297, "loss": 0.9744642376899719, "lr": 1.2060000000000001e-05, "seqlen": 7440.0, "step_tflops": 28.121646553894433, "step_time": 0.6644582901000977} +{"epoch": 0, "iter": 202, "iter_tflops": 17.32272760554076, "iter_time": 1.0786789245605468, "loss": 0.6188753247261047, "lr": 1.2120000000000001e-05, "seqlen": 7440.0, "step_tflops": 20.142677600529392, "step_time": 0.9276652069091798} +{"epoch": 0, "iter": 203, "iter_tflops": 32.86254163988854, "iter_time": 0.5686006088256835, "loss": 0.9649367332458496, "lr": 1.2180000000000002e-05, "seqlen": 7440.0, "step_tflops": 35.0775520206128, "step_time": 0.532695701599121} +{"epoch": 0, "iter": 204, "iter_tflops": 34.90575657457617, "iter_time": 0.5353174667358398, "loss": 0.9125530123710632, "lr": 1.224e-05, "seqlen": 7440.0, "step_tflops": 37.038149698224665, "step_time": 0.5044976959228515} +{"epoch": 0, "iter": 205, "iter_tflops": 32.69259236054249, "iter_time": 0.631063247680664, "loss": 0.6601971387863159, "lr": 1.2299999999999999e-05, "seqlen": 8192.0, "step_tflops": 34.99966877244618, "step_time": 0.5894653930664062} +{"epoch": 0, "iter": 206, "iter_tflops": 9.535947539968634, "iter_time": 2.1635074462890627, "loss": 0.8669436573982239, "lr": 1.236e-05, "seqlen": 8192.0, "step_tflops": 11.78575391580746, "step_time": 1.7505111389160157} +{"epoch": 0, "iter": 207, "iter_tflops": 14.859545142174401, "iter_time": 1.388406799316406, "loss": 1.2430059909820557, "lr": 1.242e-05, "seqlen": 8192.0, "step_tflops": 18.67919024053003, "step_time": 1.104496139526367} +{"epoch": 0, "iter": 208, "iter_tflops": 19.31909562790939, "iter_time": 1.0679119720458983, "loss": 0.9575577974319458, "lr": 1.2479999999999999e-05, "seqlen": 8192.0, "step_tflops": 23.43911990681655, "step_time": 0.8801991539001465} +{"epoch": 0, "iter": 209, "iter_tflops": 15.359539630787918, "iter_time": 1.1280252075195312, "loss": 0.936623752117157, "lr": 1.254e-05, "seqlen": 6912.0, "step_tflops": 16.087632465935695, "step_time": 1.076973129272461} +{"epoch": 0, "iter": 210, "iter_tflops": 19.0243402210647, "iter_time": 0.9107252960205079, "loss": 0.8845379948616028, "lr": 1.26e-05, "seqlen": 6912.0, "step_tflops": 20.886566557620892, "step_time": 0.8295258979797364} +{"epoch": 0, "iter": 211, "iter_tflops": 26.566327277490092, "iter_time": 0.6521770095825195, "loss": 1.0182976722717285, "lr": 1.2659999999999999e-05, "seqlen": 6912.0, "step_tflops": 28.42773696567477, "step_time": 0.6094733428955078} +{"epoch": 0, "iter": 212, "iter_tflops": 27.78540064285822, "iter_time": 0.623563003540039, "loss": 0.7478991150856018, "lr": 1.272e-05, "seqlen": 6912.0, "step_tflops": 29.702680501690228, "step_time": 0.5833126029968262} +{"epoch": 0, "iter": 213, "iter_tflops": 21.97053550802651, "iter_time": 0.9390346221923829, "loss": 0.8235960602760315, "lr": 1.278e-05, "seqlen": 8192.0, "step_tflops": 23.60373704628119, "step_time": 0.8740604705810546} +{"epoch": 0, "iter": 214, "iter_tflops": 19.34058406682292, "iter_time": 1.0667254638671873, "loss": 1.1375408172607422, "lr": 1.284e-05, "seqlen": 8192.0, "step_tflops": 23.597341101018568, "step_time": 0.8742973804473877} +{"epoch": 0, "iter": 215, "iter_tflops": 48.528614734206585, "iter_time": 0.4251325454711914, "loss": 0.8664333820343018, "lr": 1.29e-05, "seqlen": 8192.0, "step_tflops": 52.66635866370493, "step_time": 0.3917319145202637} +{"epoch": 0, "iter": 216, "iter_tflops": 51.78187339733296, "iter_time": 0.39842308044433594, "loss": 0.79986172914505, "lr": 1.296e-05, "seqlen": 8192.0, "step_tflops": 56.2166922482634, "step_time": 0.36699230575561526} +{"epoch": 0, "iter": 217, "iter_tflops": 33.915561310570325, "iter_time": 0.6083075942993165, "loss": 1.3996338844299316, "lr": 1.302e-05, "seqlen": 8192.0, "step_tflops": 36.18773876742059, "step_time": 0.5701128120422364} +{"epoch": 0, "iter": 218, "iter_tflops": 9.475304308877226, "iter_time": 2.177354187011719, "loss": 1.4624851942062378, "lr": 1.308e-05, "seqlen": 8192.0, "step_tflops": 10.797249674503767, "step_time": 1.91077303314209} +{"epoch": 0, "iter": 219, "iter_tflops": 11.372842543233194, "iter_time": 1.8140665740966797, "loss": 1.2759158611297607, "lr": 1.314e-05, "seqlen": 8192.0, "step_tflops": 13.481437587612369, "step_time": 1.5303333473205567} +{"epoch": 0, "iter": 220, "iter_tflops": 30.118221657254526, "iter_time": 0.685003707885742, "loss": 1.4438679218292236, "lr": 1.32e-05, "seqlen": 8192.0, "step_tflops": 37.09231998052871, "step_time": 0.5562093048095703} +{"epoch": 0, "iter": 221, "iter_tflops": 17.40247081532559, "iter_time": 0.9083498306274415, "loss": 0.8137471079826355, "lr": 1.326e-05, "seqlen": 6320.0, "step_tflops": 18.27909512976817, "step_time": 0.8647874145507812} +{"epoch": 0, "iter": 222, "iter_tflops": 12.87139410030444, "iter_time": 1.2281133880615236, "loss": 0.6735737323760986, "lr": 1.3320000000000001e-05, "seqlen": 6320.0, "step_tflops": 15.544526186762479, "step_time": 1.01691947555542} +{"epoch": 0, "iter": 223, "iter_tflops": 24.384854214382102, "iter_time": 0.6482520370483399, "loss": 0.8697383999824524, "lr": 1.338e-05, "seqlen": 6320.0, "step_tflops": 26.30627452085264, "step_time": 0.6009034614562989} +{"epoch": 0, "iter": 224, "iter_tflops": 23.15328040501996, "iter_time": 0.6827339859008789, "loss": 0.7277957797050476, "lr": 1.344e-05, "seqlen": 6320.0, "step_tflops": 24.857171444225745, "step_time": 0.635934440612793} +{"epoch": 0, "iter": 225, "iter_tflops": 27.278495160384868, "iter_time": 0.7563134765624999, "loss": 0.9287852048873901, "lr": 1.3500000000000001e-05, "seqlen": 8192.0, "step_tflops": 29.307960637790885, "step_time": 0.7039416275024415} +{"epoch": 0, "iter": 226, "iter_tflops": 17.02472945245246, "iter_time": 1.2118309173583985, "loss": 0.9547605514526367, "lr": 1.356e-05, "seqlen": 8192.0, "step_tflops": 22.50811295203081, "step_time": 0.9166069831848145} +{"epoch": 0, "iter": 227, "iter_tflops": 50.701268362200736, "iter_time": 0.40691474151611334, "loss": 1.097113847732544, "lr": 1.362e-05, "seqlen": 8192.0, "step_tflops": 55.07059336521169, "step_time": 0.3746299476623535} +{"epoch": 0, "iter": 228, "iter_tflops": 47.191986170864865, "iter_time": 0.4371736640930176, "loss": 1.0762759447097778, "lr": 1.3680000000000001e-05, "seqlen": 8192.0, "step_tflops": 51.09571077552285, "step_time": 0.40377349090576176} +{"epoch": 0, "iter": 229, "iter_tflops": 27.45230828194537, "iter_time": 0.7515249099731447, "loss": 0.6152461171150208, "lr": 1.374e-05, "seqlen": 8192.0, "step_tflops": 29.121148954045875, "step_time": 0.7084574012756347} +{"epoch": 0, "iter": 230, "iter_tflops": 14.169275263150526, "iter_time": 1.4560443725585936, "loss": 0.6656825542449951, "lr": 1.3800000000000002e-05, "seqlen": 8192.0, "step_tflops": 17.68566305676265, "step_time": 1.166543399810791} +{"epoch": 0, "iter": 231, "iter_tflops": 38.615972913421395, "iter_time": 0.5342632064819336, "loss": 0.5643512606620789, "lr": 1.3860000000000001e-05, "seqlen": 8192.0, "step_tflops": 42.38984263473553, "step_time": 0.48669898796081545} +{"epoch": 0, "iter": 232, "iter_tflops": 43.607674528734506, "iter_time": 0.47310694122314445, "loss": 0.7383673191070557, "lr": 1.392e-05, "seqlen": 8192.0, "step_tflops": 47.21327721402482, "step_time": 0.4369765186309814} +{"epoch": 0, "iter": 233, "iter_tflops": 22.647102186158758, "iter_time": 0.910981605529785, "loss": 1.258697748184204, "lr": 1.3980000000000002e-05, "seqlen": 8192.0, "step_tflops": 24.3587422914293, "step_time": 0.8469687499999999} +{"epoch": 0, "iter": 234, "iter_tflops": 18.312518435449626, "iter_time": 1.1266114807128906, "loss": 1.2146064043045044, "lr": 1.4040000000000001e-05, "seqlen": 8192.0, "step_tflops": 22.202264069961526, "step_time": 0.9292337684631349} +{"epoch": 0, "iter": 235, "iter_tflops": 42.041635451779825, "iter_time": 0.4907300415039062, "loss": 1.2505264282226562, "lr": 1.4099999999999999e-05, "seqlen": 8192.0, "step_tflops": 45.260957686108995, "step_time": 0.45582538604736333} +{"epoch": 0, "iter": 236, "iter_tflops": 42.14788130929784, "iter_time": 0.48949301528930667, "loss": 0.9308245778083801, "lr": 1.416e-05, "seqlen": 8192.0, "step_tflops": 45.26576565339174, "step_time": 0.455776969909668} +{"epoch": 0, "iter": 237, "iter_tflops": 29.762910052745344, "iter_time": 0.6931813278198241, "loss": 1.245286226272583, "lr": 1.422e-05, "seqlen": 8192.0, "step_tflops": 31.515780622150096, "step_time": 0.6546273994445801} +{"epoch": 0, "iter": 238, "iter_tflops": 17.478944689468094, "iter_time": 1.1803397674560545, "loss": 1.0763545036315918, "lr": 1.428e-05, "seqlen": 8192.0, "step_tflops": 19.926730407533753, "step_time": 1.035347650527954} +{"epoch": 0, "iter": 239, "iter_tflops": 40.215843621207476, "iter_time": 0.5130090942382812, "loss": 1.2094000577926636, "lr": 1.434e-05, "seqlen": 8192.0, "step_tflops": 43.80758139688186, "step_time": 0.47094801521301266} +{"epoch": 0, "iter": 240, "iter_tflops": 39.869969727753556, "iter_time": 0.51745947265625, "loss": 1.1459811925888062, "lr": 1.44e-05, "seqlen": 8192.0, "step_tflops": 43.19024500403472, "step_time": 0.47767947387695314} +{"epoch": 0, "iter": 241, "iter_tflops": 19.04718155560019, "iter_time": 1.0831572875976563, "loss": 0.697243869304657, "lr": 1.446e-05, "seqlen": 8192.0, "step_tflops": 19.956990401801455, "step_time": 1.0337777938842774} +{"epoch": 0, "iter": 242, "iter_tflops": 11.57064598977326, "iter_time": 1.7830545959472657, "loss": 0.655898928642273, "lr": 1.452e-05, "seqlen": 8192.0, "step_tflops": 15.621418156624427, "step_time": 1.3206927375793456} +{"epoch": 0, "iter": 243, "iter_tflops": 43.67758563056864, "iter_time": 0.4723496780395508, "loss": 0.6039911508560181, "lr": 1.458e-05, "seqlen": 8192.0, "step_tflops": 47.659535335019086, "step_time": 0.4328849067687988} +{"epoch": 0, "iter": 244, "iter_tflops": 45.660885923395064, "iter_time": 0.45183296585083005, "loss": 0.9210366606712341, "lr": 1.464e-05, "seqlen": 8192.0, "step_tflops": 49.849920515087504, "step_time": 0.4138641204833984} +{"epoch": 0, "iter": 245, "iter_tflops": 34.44576373637966, "iter_time": 0.5989442901611329, "loss": 0.01356600970029831, "lr": 1.47e-05, "seqlen": 8192.0, "step_tflops": 37.4618452724796, "step_time": 0.5507228317260742} +{"epoch": 0, "iter": 246, "iter_tflops": 18.450645866814757, "iter_time": 1.1181773071289063, "loss": 0.005164118018001318, "lr": 1.4760000000000001e-05, "seqlen": 8192.0, "step_tflops": 21.02159319595522, "step_time": 0.9814238777160644} +{"epoch": 0, "iter": 247, "iter_tflops": 44.82265167680493, "iter_time": 0.46028275299072274, "loss": 0.020541585981845856, "lr": 1.482e-05, "seqlen": 8192.0, "step_tflops": 49.92146509280198, "step_time": 0.4132709941864014} +{"epoch": 0, "iter": 248, "iter_tflops": 50.86518862986714, "iter_time": 0.405603401184082, "loss": 0.032419826835393906, "lr": 1.488e-05, "seqlen": 8192.0, "step_tflops": 56.26426620914814, "step_time": 0.3666819972991943} +{"epoch": 0, "iter": 249, "iter_tflops": 5.210890605475031, "iter_time": 0.5802651901245117, "loss": 0.12370920926332474, "lr": 1.4940000000000001e-05, "seqlen": 1232.0, "step_tflops": 5.697914493100784, "step_time": 0.5306675682067871} +{"epoch": 0, "iter": 250, "iter_tflops": 5.665314948525402, "iter_time": 0.5337211532592774, "loss": 0.2571699321269989, "lr": 1.5e-05, "seqlen": 1232.0, "step_tflops": 6.156471167076161, "step_time": 0.49114149093627935} +{"epoch": 0, "iter": 251, "iter_tflops": 6.355247330907285, "iter_time": 0.47577981948852544, "loss": 1.6774293184280396, "lr": 1.506e-05, "seqlen": 1232.0, "step_tflops": 6.923803428093208, "step_time": 0.43671061134338374} +{"epoch": 0, "iter": 252, "iter_tflops": 6.49748351529667, "iter_time": 0.4653645401000977, "loss": 1.6781550645828247, "lr": 1.5120000000000001e-05, "seqlen": 1232.0, "step_tflops": 7.099836618267871, "step_time": 0.42588281822204593} +{"epoch": 0, "iter": 253, "iter_tflops": 18.95166846025269, "iter_time": 1.0886162109375, "loss": 1.3996020555496216, "lr": 1.518e-05, "seqlen": 8192.0, "step_tflops": 20.012089497882446, "step_time": 1.0309315032958983} +{"epoch": 0, "iter": 254, "iter_tflops": 16.634796385441316, "iter_time": 1.2402372131347656, "loss": 1.2861168384552002, "lr": 1.524e-05, "seqlen": 8192.0, "step_tflops": 21.732527699885452, "step_time": 0.9493186340332032} +{"epoch": 0, "iter": 255, "iter_tflops": 39.51409008995964, "iter_time": 0.5221199188232423, "loss": 1.3603589534759521, "lr": 1.53e-05, "seqlen": 8192.0, "step_tflops": 42.71023934539842, "step_time": 0.4830479488372802} +{"epoch": 0, "iter": 256, "iter_tflops": 41.352500803800865, "iter_time": 0.498908000946045, "loss": 1.308079719543457, "lr": 1.5360000000000002e-05, "seqlen": 8192.0, "step_tflops": 44.73122874878067, "step_time": 0.4612234916687012} +{"epoch": 0, "iter": 257, "iter_tflops": 22.57532688254171, "iter_time": 0.9138779525756836, "loss": 0.858637273311615, "lr": 1.542e-05, "seqlen": 8192.0, "step_tflops": 23.89773874432048, "step_time": 0.8633073501586914} +{"epoch": 0, "iter": 258, "iter_tflops": 36.20780389528111, "iter_time": 0.5697968750000001, "loss": 0.8272855877876282, "lr": 1.548e-05, "seqlen": 8192.0, "step_tflops": 40.19789759957744, "step_time": 0.5132381229400635} +{"epoch": 0, "iter": 259, "iter_tflops": 48.217458256038285, "iter_time": 0.4278760070800781, "loss": 0.9353231191635132, "lr": 1.554e-05, "seqlen": 8192.0, "step_tflops": 52.14964737049958, "step_time": 0.39561328887939456} +{"epoch": 0, "iter": 260, "iter_tflops": 48.554020585725276, "iter_time": 0.42491009521484374, "loss": 0.871121883392334, "lr": 1.56e-05, "seqlen": 8192.0, "step_tflops": 52.48689049546232, "step_time": 0.39307136154174804} +{"epoch": 0, "iter": 261, "iter_tflops": 30.013658838129743, "iter_time": 0.6873901519775389, "loss": 1.0112546682357788, "lr": 1.5660000000000003e-05, "seqlen": 8192.0, "step_tflops": 31.941531157632376, "step_time": 0.6459018325805663} +{"epoch": 0, "iter": 262, "iter_tflops": 9.360193446404834, "iter_time": 2.2041311035156252, "loss": 1.0662671327590942, "lr": 1.5720000000000002e-05, "seqlen": 8192.0, "step_tflops": 11.194196602385635, "step_time": 1.8430168991088869} +{"epoch": 0, "iter": 263, "iter_tflops": 12.918198249083003, "iter_time": 1.5970565795898437, "loss": 1.2308082580566406, "lr": 1.578e-05, "seqlen": 8192.0, "step_tflops": 15.129671388871175, "step_time": 1.3636180839538574} +{"epoch": 0, "iter": 264, "iter_tflops": 39.27368238444877, "iter_time": 0.5253159942626954, "loss": 1.3625235557556152, "lr": 1.584e-05, "seqlen": 8192.0, "step_tflops": 42.718460991589595, "step_time": 0.48295498085021965} +{"epoch": 0, "iter": 265, "iter_tflops": 12.416085733984954, "iter_time": 1.263256393432617, "loss": 0.8658929467201233, "lr": 1.59e-05, "seqlen": 6272.0, "step_tflops": 13.166690812530046, "step_time": 1.1912408294677732} +{"epoch": 0, "iter": 266, "iter_tflops": 11.379403774385255, "iter_time": 1.378341079711914, "loss": 0.6593328714370728, "lr": 1.596e-05, "seqlen": 6272.0, "step_tflops": 15.857189202667925, "step_time": 0.9891223144531249} +{"epoch": 0, "iter": 267, "iter_tflops": 29.639603593273083, "iter_time": 0.5291804809570312, "loss": 0.9633762836456299, "lr": 1.6020000000000002e-05, "seqlen": 6272.0, "step_tflops": 31.5204968073886, "step_time": 0.49760318756103517} +{"epoch": 0, "iter": 268, "iter_tflops": 26.02075918376528, "iter_time": 0.6027764053344726, "loss": 0.7386669516563416, "lr": 1.6080000000000002e-05, "seqlen": 6272.0, "step_tflops": 27.69416730365636, "step_time": 0.5663539009094238} +{"epoch": 0, "iter": 269, "iter_tflops": 30.23054376561412, "iter_time": 0.6824585647583008, "loss": 0.0841054692864418, "lr": 1.614e-05, "seqlen": 8192.0, "step_tflops": 32.13003651169022, "step_time": 0.6421123580932618} +{"epoch": 0, "iter": 270, "iter_tflops": 12.09917162925784, "iter_time": 1.7051657867431635, "loss": 0.06396710872650146, "lr": 1.62e-05, "seqlen": 8192.0, "step_tflops": 15.636897801996634, "step_time": 1.3193853263854982} +{"epoch": 0, "iter": 271, "iter_tflops": 43.94273866815889, "iter_time": 0.4694994926452637, "loss": 0.09504442662000656, "lr": 1.626e-05, "seqlen": 8192.0, "step_tflops": 48.64585572940294, "step_time": 0.42410793685913084} +{"epoch": 0, "iter": 272, "iter_tflops": 47.89887144573515, "iter_time": 0.43072191238403323, "loss": 0.0647376999258995, "lr": 1.6320000000000003e-05, "seqlen": 8192.0, "step_tflops": 52.82882077859077, "step_time": 0.3905272388458252} +{"epoch": 0, "iter": 273, "iter_tflops": 18.280160270776083, "iter_time": 0.9006130676269531, "loss": 0.04141662269830704, "lr": 1.6380000000000002e-05, "seqlen": 6576.0, "step_tflops": 19.809270665077417, "step_time": 0.8310932540893555} +{"epoch": 0, "iter": 274, "iter_tflops": 17.751669942253677, "iter_time": 0.9274254913330077, "loss": 0.0136225875467062, "lr": 1.6440000000000002e-05, "seqlen": 6576.0, "step_tflops": 19.990743742148656, "step_time": 0.8235487098693848} +{"epoch": 0, "iter": 275, "iter_tflops": 46.61163069308285, "iter_time": 0.3532026443481446, "loss": 0.0066367387771606445, "lr": 1.65e-05, "seqlen": 6576.0, "step_tflops": 51.36363740945764, "step_time": 0.32052541542053226} +{"epoch": 0, "iter": 276, "iter_tflops": 45.74712812195729, "iter_time": 0.3598772621154785, "loss": 0.008983737789094448, "lr": 1.656e-05, "seqlen": 6576.0, "step_tflops": 50.23346680547797, "step_time": 0.3277367115020752} +{"epoch": 0, "iter": 277, "iter_tflops": 27.099042546613713, "iter_time": 0.7613218612670898, "loss": 0.5783246159553528, "lr": 1.6620000000000004e-05, "seqlen": 8192.0, "step_tflops": 28.540256292520752, "step_time": 0.722876953125} +{"epoch": 0, "iter": 278, "iter_tflops": 14.745083152653573, "iter_time": 1.399184616088867, "loss": 0.4871577024459839, "lr": 1.6680000000000003e-05, "seqlen": 8192.0, "step_tflops": 19.065150059961688, "step_time": 1.0821364345550537} +{"epoch": 0, "iter": 279, "iter_tflops": 37.49122149729177, "iter_time": 0.5502913131713867, "loss": 0.5373903512954712, "lr": 1.6740000000000002e-05, "seqlen": 8192.0, "step_tflops": 41.01654945390152, "step_time": 0.5029943714141846} +{"epoch": 0, "iter": 280, "iter_tflops": 39.430196070477805, "iter_time": 0.5232308120727538, "loss": 0.7247188091278076, "lr": 1.6800000000000002e-05, "seqlen": 8192.0, "step_tflops": 43.265372755597525, "step_time": 0.4768500118255615} +{"epoch": 0, "iter": 281, "iter_tflops": 25.24151146524728, "iter_time": 0.8173477859497069, "loss": 0.29182472825050354, "lr": 1.686e-05, "seqlen": 8192.0, "step_tflops": 26.9203378186823, "step_time": 0.7663757286071777} +{"epoch": 0, "iter": 282, "iter_tflops": 12.593347430045595, "iter_time": 1.6382533416748046, "loss": 0.4145127832889557, "lr": 1.6919999999999997e-05, "seqlen": 8192.0, "step_tflops": 14.83888431144425, "step_time": 1.3903399391174316} +{"epoch": 0, "iter": 283, "iter_tflops": 48.48672624771888, "iter_time": 0.42549982452392576, "loss": 0.3361699879169464, "lr": 1.698e-05, "seqlen": 8192.0, "step_tflops": 52.30347759778974, "step_time": 0.39444974708557135} +{"epoch": 0, "iter": 284, "iter_tflops": 48.54951683473923, "iter_time": 0.42494951248168944, "loss": 0.47761768102645874, "lr": 1.704e-05, "seqlen": 8192.0, "step_tflops": 52.96347973148103, "step_time": 0.3895343284606934} +{"epoch": 0, "iter": 285, "iter_tflops": 20.56628470512898, "iter_time": 0.6970803756713868, "loss": 0.21860827505588531, "lr": 1.71e-05, "seqlen": 5744.0, "step_tflops": 21.763641689384507, "step_time": 0.6587295303344727} +{"epoch": 0, "iter": 286, "iter_tflops": 12.453778999688973, "iter_time": 1.1511649169921876, "loss": 0.38327449560165405, "lr": 1.716e-05, "seqlen": 5744.0, "step_tflops": 15.754825673005739, "step_time": 0.9099658584594726} +{"epoch": 0, "iter": 287, "iter_tflops": 27.96762125292127, "iter_time": 0.5126053924560547, "loss": 0.3403107523918152, "lr": 1.7219999999999998e-05, "seqlen": 5744.0, "step_tflops": 30.712859951900057, "step_time": 0.4667866649627686} +{"epoch": 0, "iter": 288, "iter_tflops": 25.327148524908292, "iter_time": 0.5660468826293945, "loss": 0.3116096258163452, "lr": 1.728e-05, "seqlen": 5744.0, "step_tflops": 27.882174578432565, "step_time": 0.5141763038635254} +{"epoch": 0, "iter": 289, "iter_tflops": 18.96233218119332, "iter_time": 1.0880040130615236, "loss": 0.21104057133197784, "lr": 1.734e-05, "seqlen": 8192.0, "step_tflops": 20.356658659317887, "step_time": 1.0134813308715822} +{"epoch": 0, "iter": 290, "iter_tflops": 37.029285824794215, "iter_time": 0.5571561279296875, "loss": 0.15936076641082764, "lr": 1.74e-05, "seqlen": 8192.0, "step_tflops": 40.95116815948205, "step_time": 0.503797435760498} +{"epoch": 0, "iter": 291, "iter_tflops": 53.61805909201293, "iter_time": 0.3847788200378418, "loss": 0.1563793271780014, "lr": 1.746e-05, "seqlen": 8192.0, "step_tflops": 58.16161167684987, "step_time": 0.3547201137542725} +{"epoch": 0, "iter": 292, "iter_tflops": 53.962249765731876, "iter_time": 0.38232456207275395, "loss": 0.1234995573759079, "lr": 1.7519999999999998e-05, "seqlen": 8192.0, "step_tflops": 58.76351784739256, "step_time": 0.35108676719665527} +{"epoch": 0, "iter": 293, "iter_tflops": 39.12032702907963, "iter_time": 0.5273752822875977, "loss": 1.3540571928024292, "lr": 1.758e-05, "seqlen": 8192.0, "step_tflops": 42.0828520801193, "step_time": 0.490249412536621} +{"epoch": 0, "iter": 294, "iter_tflops": 43.35010796792264, "iter_time": 0.47591792678833006, "loss": 1.3289079666137695, "lr": 1.764e-05, "seqlen": 8192.0, "step_tflops": 46.55115980489522, "step_time": 0.4431918258666992} +{"epoch": 0, "iter": 295, "iter_tflops": 47.448974834368585, "iter_time": 0.4348058853149414, "loss": 1.018540859222412, "lr": 1.77e-05, "seqlen": 8192.0, "step_tflops": 51.08649200311837, "step_time": 0.4038463535308838} +{"epoch": 0, "iter": 296, "iter_tflops": 44.575224090874904, "iter_time": 0.4628376846313476, "loss": 1.180645227432251, "lr": 1.776e-05, "seqlen": 8192.0, "step_tflops": 47.77979810512263, "step_time": 0.43179532623291017} +{"epoch": 0, "iter": 297, "iter_tflops": 23.498473023976903, "iter_time": 0.8779759216308594, "loss": 1.2924576997756958, "lr": 1.782e-05, "seqlen": 8192.0, "step_tflops": 24.687322132065894, "step_time": 0.8356958847045899} +{"epoch": 0, "iter": 298, "iter_tflops": 17.10056890938046, "iter_time": 1.206456558227539, "loss": 1.2678152322769165, "lr": 1.7879999999999998e-05, "seqlen": 8192.0, "step_tflops": 19.23258542023571, "step_time": 1.0727155532836914} +{"epoch": 0, "iter": 299, "iter_tflops": 39.082933592317666, "iter_time": 0.5278798599243164, "loss": 1.170820713043213, "lr": 1.794e-05, "seqlen": 8192.0, "step_tflops": 42.481014589775256, "step_time": 0.48565444374084465} +{"epoch": 0, "iter": 300, "iter_tflops": 39.814203560308655, "iter_time": 0.5181842575073243, "loss": 1.1420869827270508, "lr": 1.8e-05, "seqlen": 8192.0, "step_tflops": 42.85039313897704, "step_time": 0.48146800994873046} +{"epoch": 0, "iter": 301, "iter_tflops": 22.02706622659428, "iter_time": 0.9366246643066406, "loss": 1.0151078701019287, "lr": 1.806e-05, "seqlen": 8192.0, "step_tflops": 23.205262093345493, "step_time": 0.8890696182250977} +{"epoch": 0, "iter": 302, "iter_tflops": 22.663043579620116, "iter_time": 0.9103408126831054, "loss": 1.0488226413726807, "lr": 1.812e-05, "seqlen": 8192.0, "step_tflops": 25.812357014821796, "step_time": 0.7992719726562499} +{"epoch": 0, "iter": 303, "iter_tflops": 39.653352792078095, "iter_time": 0.5202862319946289, "loss": 1.0052815675735474, "lr": 1.818e-05, "seqlen": 8192.0, "step_tflops": 43.05905174224483, "step_time": 0.4791348781585693} +{"epoch": 0, "iter": 304, "iter_tflops": 47.91599388461227, "iter_time": 0.4305679969787598, "loss": 0.9954573512077332, "lr": 1.824e-05, "seqlen": 8192.0, "step_tflops": 51.99553909764366, "step_time": 0.3967858371734619} +{"epoch": 0, "iter": 305, "iter_tflops": 31.232290312992312, "iter_time": 0.6605693435668945, "loss": 0.5140836834907532, "lr": 1.83e-05, "seqlen": 8192.0, "step_tflops": 34.106678819757384, "step_time": 0.6048989295959473} +{"epoch": 0, "iter": 306, "iter_tflops": 10.334652442306902, "iter_time": 1.9963025970458985, "loss": 0.6273610591888428, "lr": 1.836e-05, "seqlen": 8192.0, "step_tflops": 12.46557520900427, "step_time": 1.6550454483032226} +{"epoch": 0, "iter": 307, "iter_tflops": 11.351223246525283, "iter_time": 1.8175216064453126, "loss": 0.5179721117019653, "lr": 1.842e-05, "seqlen": 8192.0, "step_tflops": 13.075093449092416, "step_time": 1.577892623901367} +{"epoch": 0, "iter": 308, "iter_tflops": 40.41111636744325, "iter_time": 0.5105301551818848, "loss": 0.38769400119781494, "lr": 1.848e-05, "seqlen": 8192.0, "step_tflops": 44.31482989360834, "step_time": 0.46555732154846186} +{"epoch": 0, "iter": 309, "iter_tflops": 17.83218929992125, "iter_time": 0.888757095336914, "loss": 0.7994700074195862, "lr": 1.854e-05, "seqlen": 6336.0, "step_tflops": 18.81311418121452, "step_time": 0.8424168701171875} +{"epoch": 0, "iter": 310, "iter_tflops": 12.819503933041947, "iter_time": 1.2362790985107421, "loss": 0.8353286981582642, "lr": 1.86e-05, "seqlen": 6336.0, "step_tflops": 16.287171552589022, "step_time": 0.973065502166748} +{"epoch": 0, "iter": 311, "iter_tflops": 24.50119803901147, "iter_time": 0.6468452987670898, "loss": 0.9059646129608154, "lr": 1.866e-05, "seqlen": 6336.0, "step_tflops": 26.408815905282005, "step_time": 0.6001209907531738} +{"epoch": 0, "iter": 312, "iter_tflops": 24.201561115297235, "iter_time": 0.6548538208007812, "loss": 0.6960185766220093, "lr": 1.872e-05, "seqlen": 6336.0, "step_tflops": 25.995436750414253, "step_time": 0.6096641082763672} +{"epoch": 0, "iter": 313, "iter_tflops": 36.426020804066596, "iter_time": 0.5663833999633788, "loss": 0.7833435535430908, "lr": 1.878e-05, "seqlen": 8192.0, "step_tflops": 40.455671213577304, "step_time": 0.5099678955078125} +{"epoch": 0, "iter": 314, "iter_tflops": 35.611743475924875, "iter_time": 0.5793339920043945, "loss": 0.7103496193885803, "lr": 1.884e-05, "seqlen": 8192.0, "step_tflops": 40.04950471895098, "step_time": 0.5151397914886474} +{"epoch": 0, "iter": 315, "iter_tflops": 44.8089906575356, "iter_time": 0.4604230804443359, "loss": 0.7311384677886963, "lr": 1.8900000000000002e-05, "seqlen": 8192.0, "step_tflops": 48.98590745214974, "step_time": 0.42116385269165035} +{"epoch": 0, "iter": 316, "iter_tflops": 41.44972241101485, "iter_time": 0.4977377967834473, "loss": 0.6153272390365601, "lr": 1.896e-05, "seqlen": 8192.0, "step_tflops": 45.584014640402565, "step_time": 0.4525949211120605} +{"epoch": 0, "iter": 317, "iter_tflops": 28.64250408498846, "iter_time": 0.7202964324951172, "loss": 0.759590208530426, "lr": 1.902e-05, "seqlen": 8192.0, "step_tflops": 30.520672013813087, "step_time": 0.6759711418151855} +{"epoch": 0, "iter": 318, "iter_tflops": 10.770688290481782, "iter_time": 1.9154851531982422, "loss": 0.7845444083213806, "lr": 1.908e-05, "seqlen": 8192.0, "step_tflops": 13.487961720785327, "step_time": 1.5295931243896486} +{"epoch": 0, "iter": 319, "iter_tflops": 13.466028183468069, "iter_time": 1.5320845336914062, "loss": 0.9709535837173462, "lr": 1.914e-05, "seqlen": 8192.0, "step_tflops": 15.942612865651345, "step_time": 1.2940848331451416} +{"epoch": 0, "iter": 320, "iter_tflops": 38.44452033546158, "iter_time": 0.536645881652832, "loss": 0.8265582919120789, "lr": 1.9200000000000003e-05, "seqlen": 8192.0, "step_tflops": 42.442718882863254, "step_time": 0.48609264564514165} +{"epoch": 0, "iter": 321, "iter_tflops": 22.257868042442976, "iter_time": 0.6954877471923827, "loss": 0.8190824389457703, "lr": 1.9260000000000002e-05, "seqlen": 6192.0, "step_tflops": 24.211864907162937, "step_time": 0.6393590316772462} +{"epoch": 0, "iter": 322, "iter_tflops": 22.38789193383206, "iter_time": 0.6914485092163085, "loss": 0.7417949438095093, "lr": 1.932e-05, "seqlen": 6192.0, "step_tflops": 23.989085227664738, "step_time": 0.6452965736389161} +{"epoch": 0, "iter": 323, "iter_tflops": 24.54885703529926, "iter_time": 0.6305822906494142, "loss": 0.7175537943840027, "lr": 1.938e-05, "seqlen": 6192.0, "step_tflops": 26.42800451719955, "step_time": 0.5857451133728028} +{"epoch": 0, "iter": 324, "iter_tflops": 23.92713502608963, "iter_time": 0.6469673233032227, "loss": 0.569252610206604, "lr": 1.944e-05, "seqlen": 6192.0, "step_tflops": 25.72607230938164, "step_time": 0.6017270851135254} +{"epoch": 0, "iter": 325, "iter_tflops": 28.322537066951885, "iter_time": 0.7284338073730469, "loss": 0.7424002289772034, "lr": 1.95e-05, "seqlen": 8192.0, "step_tflops": 30.654100952241368, "step_time": 0.6730288238525391} +{"epoch": 0, "iter": 326, "iter_tflops": 39.899402284212904, "iter_time": 0.5170777587890625, "loss": 0.9986813068389893, "lr": 1.9560000000000002e-05, "seqlen": 8192.0, "step_tflops": 43.976348058086685, "step_time": 0.46914067268371584} +{"epoch": 0, "iter": 327, "iter_tflops": 40.088134292255134, "iter_time": 0.5146433944702149, "loss": 0.7986699342727661, "lr": 1.9620000000000002e-05, "seqlen": 8192.0, "step_tflops": 43.919746336471796, "step_time": 0.4697452793121338} +{"epoch": 0, "iter": 328, "iter_tflops": 40.49850234441486, "iter_time": 0.5094285545349121, "loss": 0.8896185159683228, "lr": 1.968e-05, "seqlen": 8192.0, "step_tflops": 44.298280341607104, "step_time": 0.4657312507629394} +{"epoch": 0, "iter": 329, "iter_tflops": 35.44616458515158, "iter_time": 0.5820402221679687, "loss": 0.2194972038269043, "lr": 1.974e-05, "seqlen": 8192.0, "step_tflops": 39.68439998447051, "step_time": 0.5198791847229004} +{"epoch": 0, "iter": 330, "iter_tflops": 39.81994971026053, "iter_time": 0.5181094818115235, "loss": 0.2928140461444855, "lr": 1.98e-05, "seqlen": 8192.0, "step_tflops": 44.72152743700422, "step_time": 0.46132354354858396} +{"epoch": 0, "iter": 331, "iter_tflops": 42.255574399478014, "iter_time": 0.4882454872131348, "loss": 0.34648874402046204, "lr": 1.9860000000000003e-05, "seqlen": 8192.0, "step_tflops": 46.348609178314824, "step_time": 0.4451286430358886} +{"epoch": 0, "iter": 332, "iter_tflops": 40.965965136822014, "iter_time": 0.5036154632568359, "loss": 0.23854421079158783, "lr": 1.9920000000000002e-05, "seqlen": 8192.0, "step_tflops": 44.86356399371563, "step_time": 0.4598630084991455} +{"epoch": 0, "iter": 333, "iter_tflops": 23.58761851302967, "iter_time": 0.814949447631836, "loss": 0.3892771899700165, "lr": 1.9980000000000002e-05, "seqlen": 7648.0, "step_tflops": 25.15742105690358, "step_time": 0.7640972671508789} +{"epoch": 0, "iter": 334, "iter_tflops": 38.24599957516286, "iter_time": 0.502607250213623, "loss": 0.34712931513786316, "lr": 2.004e-05, "seqlen": 7648.0, "step_tflops": 41.82352993996272, "step_time": 0.4596148796081543} +{"epoch": 0, "iter": 335, "iter_tflops": 44.218987055775315, "iter_time": 0.43471635055541996, "loss": 0.22416138648986816, "lr": 2.01e-05, "seqlen": 7648.0, "step_tflops": 47.83421131522714, "step_time": 0.4018612651824951} +{"epoch": 0, "iter": 336, "iter_tflops": 50.0621976557121, "iter_time": 0.3839766845703125, "loss": 0.26956117153167725, "lr": 2.016e-05, "seqlen": 7648.0, "step_tflops": 54.488968250840834, "step_time": 0.3527818069458008} +{"epoch": 0, "iter": 337, "iter_tflops": 40.229347209088736, "iter_time": 0.5128368949890136, "loss": 0.9713760018348694, "lr": 2.0220000000000003e-05, "seqlen": 8192.0, "step_tflops": 43.56814384959876, "step_time": 0.47353620529174806} +{"epoch": 0, "iter": 338, "iter_tflops": 14.942997074558306, "iter_time": 1.3806529846191404, "loss": 1.2279971837997437, "lr": 2.0280000000000002e-05, "seqlen": 8192.0, "step_tflops": 18.20568344690703, "step_time": 1.133222686767578} +{"epoch": 0, "iter": 339, "iter_tflops": 13.981942226000962, "iter_time": 1.4755527648925781, "loss": 1.029977798461914, "lr": 2.0340000000000002e-05, "seqlen": 8192.0, "step_tflops": 16.23111555664878, "step_time": 1.271082904815674} +{"epoch": 0, "iter": 340, "iter_tflops": 31.42277080796956, "iter_time": 0.6565650634765625, "loss": 0.961839497089386, "lr": 2.04e-05, "seqlen": 8192.0, "step_tflops": 39.07561195576936, "step_time": 0.5279787693023681} +{"epoch": 0, "iter": 341, "iter_tflops": 21.569575578012618, "iter_time": 0.7347610855102538, "loss": 0.7553679347038269, "lr": 2.046e-05, "seqlen": 6336.0, "step_tflops": 23.369371567079703, "step_time": 0.67817333984375} +{"epoch": 0, "iter": 342, "iter_tflops": 27.688523424141216, "iter_time": 0.5723846130371094, "loss": 0.6626463532447815, "lr": 2.0520000000000003e-05, "seqlen": 6336.0, "step_tflops": 29.49352998495988, "step_time": 0.5373546257019044} +{"epoch": 0, "iter": 343, "iter_tflops": 28.555261579793108, "iter_time": 0.5550110168457031, "loss": 0.9214179515838623, "lr": 2.0580000000000003e-05, "seqlen": 6336.0, "step_tflops": 30.40195730014008, "step_time": 0.5212981719970703} +{"epoch": 0, "iter": 344, "iter_tflops": 29.300050501394832, "iter_time": 0.5409029846191405, "loss": 0.7083775401115417, "lr": 2.064e-05, "seqlen": 6336.0, "step_tflops": 31.104182503416578, "step_time": 0.5095290565490723} +{"epoch": 0, "iter": 345, "iter_tflops": 35.02549553034193, "iter_time": 0.5890307388305664, "loss": 0.4054161012172699, "lr": 2.07e-05, "seqlen": 8192.0, "step_tflops": 37.66248953111915, "step_time": 0.5477888946533204} +{"epoch": 0, "iter": 346, "iter_tflops": 46.01493252740224, "iter_time": 0.44835648727416993, "loss": 0.31987741589546204, "lr": 2.0759999999999998e-05, "seqlen": 8192.0, "step_tflops": 51.2454862309402, "step_time": 0.40259337997436523} +{"epoch": 0, "iter": 347, "iter_tflops": 50.69408699447054, "iter_time": 0.40697238540649416, "loss": 0.40430423617362976, "lr": 2.082e-05, "seqlen": 8192.0, "step_tflops": 55.11250310261659, "step_time": 0.374345064163208} +{"epoch": 0, "iter": 348, "iter_tflops": 55.012908427529695, "iter_time": 0.37502277374267573, "loss": 0.3406684100627899, "lr": 2.088e-05, "seqlen": 8192.0, "step_tflops": 60.073176199211254, "step_time": 0.34343270683288574} +{"epoch": 0, "iter": 349, "iter_tflops": 48.541863290198904, "iter_time": 0.42501651382446287, "loss": 0.30454713106155396, "lr": 2.094e-05, "seqlen": 8192.0, "step_tflops": 53.28465000623842, "step_time": 0.3871864318847656} +{"epoch": 0, "iter": 350, "iter_tflops": 14.869399667061542, "iter_time": 1.3874866485595703, "loss": 0.25362977385520935, "lr": 2.1e-05, "seqlen": 8192.0, "step_tflops": 15.787633207484726, "step_time": 1.306788246154785} +{"epoch": 0, "iter": 351, "iter_tflops": 24.220167467269945, "iter_time": 0.8518146514892578, "loss": 0.18629084527492523, "lr": 2.1059999999999998e-05, "seqlen": 8192.0, "step_tflops": 30.368003720508955, "step_time": 0.6793694343566894} +{"epoch": 0, "iter": 352, "iter_tflops": 49.3451447539289, "iter_time": 0.41809774017333984, "loss": 0.2531978487968445, "lr": 2.1119999999999998e-05, "seqlen": 8192.0, "step_tflops": 53.416008624748926, "step_time": 0.38623427772521973} +{"epoch": 0, "iter": 353, "iter_tflops": 19.879586145635724, "iter_time": 0.7581233291625977, "loss": 0.5615546107292175, "lr": 2.118e-05, "seqlen": 6032.0, "step_tflops": 20.900410726898446, "step_time": 0.7210948257446289} +{"epoch": 0, "iter": 354, "iter_tflops": 13.230225892421247, "iter_time": 1.139147445678711, "loss": 0.5677434206008911, "lr": 2.124e-05, "seqlen": 6032.0, "step_tflops": 15.372329294425244, "step_time": 0.980409523010254} +{"epoch": 0, "iter": 355, "iter_tflops": 27.285138358607252, "iter_time": 0.5523584976196289, "loss": 0.6294676065444946, "lr": 2.13e-05, "seqlen": 6032.0, "step_tflops": 29.070054049422893, "step_time": 0.5184434127807618} +{"epoch": 0, "iter": 356, "iter_tflops": 28.350064923158897, "iter_time": 0.5316100006103516, "loss": 0.6497498154640198, "lr": 2.136e-05, "seqlen": 6032.0, "step_tflops": 30.10035308079945, "step_time": 0.5006977157592772} +{"epoch": 0, "iter": 357, "iter_tflops": 42.26357309353236, "iter_time": 0.4401688423156738, "loss": 0.00353352096863091, "lr": 2.1419999999999998e-05, "seqlen": 7408.0, "step_tflops": 46.41387590411269, "step_time": 0.4008091907501221} +{"epoch": 0, "iter": 358, "iter_tflops": 45.410621178281055, "iter_time": 0.409664249420166, "loss": 0.013000319711863995, "lr": 2.148e-05, "seqlen": 7408.0, "step_tflops": 50.92586562955758, "step_time": 0.36529782676696776} +{"epoch": 0, "iter": 359, "iter_tflops": 50.06340707430197, "iter_time": 0.37159093093872064, "loss": 0.015763217583298683, "lr": 2.154e-05, "seqlen": 7408.0, "step_tflops": 55.020134007246696, "step_time": 0.33811455345153807} +{"epoch": 0, "iter": 360, "iter_tflops": 51.96252553574673, "iter_time": 0.358010082244873, "loss": 0.012007098644971848, "lr": 2.16e-05, "seqlen": 7408.0, "step_tflops": 57.40728474736332, "step_time": 0.32405483245849614} +{"epoch": 0, "iter": 361, "iter_tflops": 41.728267818305376, "iter_time": 0.4944152870178223, "loss": 0.007764178793877363, "lr": 2.166e-05, "seqlen": 8192.0, "step_tflops": 45.06093239564762, "step_time": 0.4578487930297852} +{"epoch": 0, "iter": 362, "iter_tflops": 15.03875415550002, "iter_time": 1.3718618774414062, "loss": 0.004851930774748325, "lr": 2.172e-05, "seqlen": 8192.0, "step_tflops": 18.390892486892092, "step_time": 1.1218103485107422} +{"epoch": 0, "iter": 363, "iter_tflops": 40.16403742167946, "iter_time": 0.5136708068847656, "loss": 0.042058419436216354, "lr": 2.178e-05, "seqlen": 8192.0, "step_tflops": 45.686715659892286, "step_time": 0.45157751464843754} +{"epoch": 0, "iter": 364, "iter_tflops": 44.170571748397656, "iter_time": 0.4670778007507324, "loss": 0.007167781237512827, "lr": 2.184e-05, "seqlen": 8192.0, "step_tflops": 48.55286983340153, "step_time": 0.424920166015625} +{"epoch": 0, "iter": 365, "iter_tflops": 17.575798057800696, "iter_time": 0.9320377731323242, "loss": 0.0661921575665474, "lr": 2.19e-05, "seqlen": 6544.0, "step_tflops": 18.813046849914365, "step_time": 0.8707418746948242} +{"epoch": 0, "iter": 366, "iter_tflops": 15.846254321232955, "iter_time": 1.0337652893066407, "loss": 0.07581028342247009, "lr": 2.196e-05, "seqlen": 6544.0, "step_tflops": 19.5087798107643, "step_time": 0.8396889934539795} +{"epoch": 0, "iter": 367, "iter_tflops": 43.81188954114029, "iter_time": 0.37390096282958984, "loss": 0.041176147758960724, "lr": 2.202e-05, "seqlen": 6544.0, "step_tflops": 47.91306881865416, "step_time": 0.3418964405059815} +{"epoch": 0, "iter": 368, "iter_tflops": 45.88226387001185, "iter_time": 0.35702919387817383, "loss": 0.05139070749282837, "lr": 2.208e-05, "seqlen": 6544.0, "step_tflops": 50.08592521051168, "step_time": 0.3270640926361084} +{"epoch": 0, "iter": 369, "iter_tflops": 31.792560269250203, "iter_time": 0.6489283447265625, "loss": 0.9925256967544556, "lr": 2.214e-05, "seqlen": 8192.0, "step_tflops": 33.90452019984734, "step_time": 0.6085056915283202} +{"epoch": 0, "iter": 370, "iter_tflops": 20.40742596055166, "iter_time": 1.0109601058959963, "loss": 0.9467501044273376, "lr": 2.22e-05, "seqlen": 8192.0, "step_tflops": 25.168547775867637, "step_time": 0.8197172794342041} +{"epoch": 0, "iter": 371, "iter_tflops": 51.47121057240366, "iter_time": 0.40082782745361323, "loss": 0.8671295642852783, "lr": 2.226e-05, "seqlen": 8192.0, "step_tflops": 55.61397741895696, "step_time": 0.3709695739746094} +{"epoch": 0, "iter": 372, "iter_tflops": 45.00167545786927, "iter_time": 0.458451675415039, "loss": 1.0917702913284302, "lr": 2.232e-05, "seqlen": 8192.0, "step_tflops": 48.387075544564915, "step_time": 0.4263761196136475} +{"epoch": 0, "iter": 373, "iter_tflops": 28.6909513525785, "iter_time": 0.7190801467895508, "loss": 0.19745895266532898, "lr": 2.238e-05, "seqlen": 8192.0, "step_tflops": 30.198513518073757, "step_time": 0.6831824188232423} +{"epoch": 0, "iter": 374, "iter_tflops": 18.633444196933663, "iter_time": 1.1072077331542969, "loss": 0.17786136269569397, "lr": 2.2440000000000002e-05, "seqlen": 8192.0, "step_tflops": 23.072509633036997, "step_time": 0.8941850643157959} +{"epoch": 0, "iter": 375, "iter_tflops": 38.103042144516216, "iter_time": 0.5414552841186523, "loss": 0.1983841508626938, "lr": 2.25e-05, "seqlen": 8192.0, "step_tflops": 42.30450132099897, "step_time": 0.4876808109283447} +{"epoch": 0, "iter": 376, "iter_tflops": 45.936442991880966, "iter_time": 0.44912257385253906, "loss": 0.17902401089668274, "lr": 2.256e-05, "seqlen": 8192.0, "step_tflops": 50.27515551823059, "step_time": 0.4103635940551758} +{"epoch": 0, "iter": 377, "iter_tflops": 35.61644515185295, "iter_time": 0.5792575149536133, "loss": 0.9345681667327881, "lr": 2.262e-05, "seqlen": 8192.0, "step_tflops": 39.4515694141265, "step_time": 0.5229473457336425} +{"epoch": 0, "iter": 378, "iter_tflops": 37.516297281084, "iter_time": 0.5499235000610351, "loss": 0.7916598916053772, "lr": 2.268e-05, "seqlen": 8192.0, "step_tflops": 41.50686510063674, "step_time": 0.49705255889892574} +{"epoch": 0, "iter": 379, "iter_tflops": 38.96933945141416, "iter_time": 0.5294186096191407, "loss": 0.6489274501800537, "lr": 2.274e-05, "seqlen": 8192.0, "step_tflops": 42.45458739130117, "step_time": 0.4859567546844482} +{"epoch": 0, "iter": 380, "iter_tflops": 43.95045557218068, "iter_time": 0.4694170570373535, "loss": 0.8573379516601562, "lr": 2.2800000000000002e-05, "seqlen": 8192.0, "step_tflops": 47.70046284526143, "step_time": 0.4325134868621826} +{"epoch": 0, "iter": 381, "iter_tflops": 32.955667643822004, "iter_time": 0.6260256576538086, "loss": 0.7103372812271118, "lr": 2.286e-05, "seqlen": 8192.0, "step_tflops": 35.97654326753432, "step_time": 0.5734595832824707} +{"epoch": 0, "iter": 382, "iter_tflops": 39.9668030231338, "iter_time": 0.5162057495117187, "loss": 0.9438204765319824, "lr": 2.292e-05, "seqlen": 8192.0, "step_tflops": 43.79800095418802, "step_time": 0.4710510311126709} +{"epoch": 0, "iter": 383, "iter_tflops": 38.89276674617348, "iter_time": 0.5304609375, "loss": 1.0343990325927734, "lr": 2.298e-05, "seqlen": 8192.0, "step_tflops": 42.50649867695041, "step_time": 0.4853632774353027} +{"epoch": 0, "iter": 384, "iter_tflops": 43.430604902177876, "iter_time": 0.475035831451416, "loss": 0.8542938232421875, "lr": 2.304e-05, "seqlen": 8192.0, "step_tflops": 47.40048033135682, "step_time": 0.43525072669982906} +{"epoch": 0, "iter": 385, "iter_tflops": 22.33927550365518, "iter_time": 0.9235345840454102, "loss": 1.3424527645111084, "lr": 2.3100000000000002e-05, "seqlen": 8192.0, "step_tflops": 23.8807328471587, "step_time": 0.8639221267700196} +{"epoch": 0, "iter": 386, "iter_tflops": 19.691996855360966, "iter_time": 1.0476892547607422, "loss": 1.178105354309082, "lr": 2.3160000000000002e-05, "seqlen": 8192.0, "step_tflops": 23.73774045524322, "step_time": 0.8691262569427491} +{"epoch": 0, "iter": 387, "iter_tflops": 45.046635750367116, "iter_time": 0.4579941024780274, "loss": 1.1752568483352661, "lr": 2.322e-05, "seqlen": 8192.0, "step_tflops": 48.77748920670353, "step_time": 0.4229634170532226} +{"epoch": 0, "iter": 388, "iter_tflops": 44.42060043076905, "iter_time": 0.46444877624511716, "loss": 1.1426459550857544, "lr": 2.328e-05, "seqlen": 8192.0, "step_tflops": 47.645904306836485, "step_time": 0.4330087509155273} +{"epoch": 0, "iter": 389, "iter_tflops": 31.764053921081643, "iter_time": 0.6495107192993164, "loss": 0.640899121761322, "lr": 2.334e-05, "seqlen": 8192.0, "step_tflops": 33.69207626388856, "step_time": 0.6123425979614258} +{"epoch": 0, "iter": 390, "iter_tflops": 13.031329286933078, "iter_time": 1.5831917877197266, "loss": 0.5993897318840027, "lr": 2.3400000000000003e-05, "seqlen": 8192.0, "step_tflops": 15.830191217819316, "step_time": 1.3032750663757326} +{"epoch": 0, "iter": 391, "iter_tflops": 32.88299234404579, "iter_time": 0.6274092483520508, "loss": 0.5497337579727173, "lr": 2.3460000000000002e-05, "seqlen": 8192.0, "step_tflops": 40.32916583645403, "step_time": 0.5115675735473633} +{"epoch": 0, "iter": 392, "iter_tflops": 38.07532700212715, "iter_time": 0.5418494110107422, "loss": 0.7474902868270874, "lr": 2.3520000000000002e-05, "seqlen": 8192.0, "step_tflops": 41.34527028288062, "step_time": 0.4989952507019043} +{"epoch": 0, "iter": 393, "iter_tflops": 17.951618182058816, "iter_time": 1.0546923217773438, "loss": 0.1021069809794426, "lr": 2.358e-05, "seqlen": 7536.0, "step_tflops": 19.049934487896518, "step_time": 0.9938844604492186} +{"epoch": 0, "iter": 394, "iter_tflops": 20.04844603979937, "iter_time": 0.9443841094970703, "loss": 0.13390175998210907, "lr": 2.364e-05, "seqlen": 7536.0, "step_tflops": 24.003286223772594, "step_time": 0.7887850723266602} +{"epoch": 0, "iter": 395, "iter_tflops": 46.55798746215861, "iter_time": 0.40666349411010744, "loss": 0.2262147068977356, "lr": 2.37e-05, "seqlen": 7536.0, "step_tflops": 51.078749050739276, "step_time": 0.37067144775390626} +{"epoch": 0, "iter": 396, "iter_tflops": 49.87606753625166, "iter_time": 0.3796095962524414, "loss": 0.16130472719669342, "lr": 2.3760000000000003e-05, "seqlen": 7536.0, "step_tflops": 54.15473374011785, "step_time": 0.349617338180542} +{"epoch": 0, "iter": 397, "iter_tflops": 46.958817685285226, "iter_time": 0.43934439849853507, "loss": 1.067458987236023, "lr": 2.3820000000000002e-05, "seqlen": 8192.0, "step_tflops": 51.16381557195888, "step_time": 0.4032360229492188} +{"epoch": 0, "iter": 398, "iter_tflops": 45.661439125291565, "iter_time": 0.4518274917602539, "loss": 1.290739893913269, "lr": 2.3880000000000002e-05, "seqlen": 8192.0, "step_tflops": 49.12743403723865, "step_time": 0.4199505615234375} +{"epoch": 0, "iter": 399, "iter_tflops": 46.28707504765021, "iter_time": 0.4457203979492188, "loss": 1.2584599256515503, "lr": 2.394e-05, "seqlen": 8192.0, "step_tflops": 49.96860586072582, "step_time": 0.41288111114501946} +{"epoch": 0, "iter": 400, "iter_tflops": 52.29852529640095, "iter_time": 0.3944870986938476, "loss": 1.1451901197433472, "lr": 2.4e-05, "seqlen": 8192.0, "step_tflops": 56.399353957901255, "step_time": 0.36580372047424314} +{"epoch": 0, "iter": 401, "iter_tflops": 28.67936778306251, "iter_time": 0.7193705825805665, "loss": 0.7794200778007507, "lr": 2.4060000000000003e-05, "seqlen": 8192.0, "step_tflops": 30.166806498584002, "step_time": 0.6839004821777344} +{"epoch": 0, "iter": 402, "iter_tflops": 10.945101969221556, "iter_time": 1.8849612884521483, "loss": 0.9648943543434143, "lr": 2.4120000000000003e-05, "seqlen": 8192.0, "step_tflops": 14.063412055154567, "step_time": 1.4670048370361326} +{"epoch": 0, "iter": 403, "iter_tflops": 17.168637339328193, "iter_time": 1.201673324584961, "loss": 0.9302108883857727, "lr": 2.4180000000000002e-05, "seqlen": 8192.0, "step_tflops": 18.96929461487153, "step_time": 1.0876046752929687} +{"epoch": 0, "iter": 404, "iter_tflops": 25.86928727859173, "iter_time": 0.7975130233764647, "loss": 0.6252689957618713, "lr": 2.4240000000000002e-05, "seqlen": 8192.0, "step_tflops": 31.649027273336777, "step_time": 0.6518713302612305} +{"epoch": 0, "iter": 405, "iter_tflops": 24.21696096213909, "iter_time": 0.637535140991211, "loss": 0.5956109166145325, "lr": 2.43e-05, "seqlen": 6176.0, "step_tflops": 25.81243517918676, "step_time": 0.5981289062499999} +{"epoch": 0, "iter": 406, "iter_tflops": 23.089504224027188, "iter_time": 0.6686658782958985, "loss": 0.6982903480529785, "lr": 2.4360000000000004e-05, "seqlen": 6176.0, "step_tflops": 24.766602680438197, "step_time": 0.6233864135742186} +{"epoch": 0, "iter": 407, "iter_tflops": 23.56523421582496, "iter_time": 0.6551669921875, "loss": 0.8026307225227356, "lr": 2.442e-05, "seqlen": 6176.0, "step_tflops": 25.178712502679222, "step_time": 0.6131832046508789} +{"epoch": 0, "iter": 408, "iter_tflops": 24.432103979272632, "iter_time": 0.6319211654663086, "loss": 0.657933235168457, "lr": 2.448e-05, "seqlen": 6176.0, "step_tflops": 26.235605977637004, "step_time": 0.5884813041687013} +{"epoch": 0, "iter": 409, "iter_tflops": 22.47150799254473, "iter_time": 0.8150018539428711, "loss": 0.39563649892807007, "lr": 2.454e-05, "seqlen": 7296.0, "step_tflops": 24.503656200078964, "step_time": 0.7474117546081543} +{"epoch": 0, "iter": 410, "iter_tflops": 37.256681578832, "iter_time": 0.4915714416503907, "loss": 0.2091742902994156, "lr": 2.4599999999999998e-05, "seqlen": 7296.0, "step_tflops": 48.803890606652885, "step_time": 0.37526353836059567} +{"epoch": 0, "iter": 411, "iter_tflops": 35.89175759592483, "iter_time": 0.5102653617858887, "loss": 0.2640657126903534, "lr": 2.4659999999999998e-05, "seqlen": 7296.0, "step_tflops": 39.092553696107544, "step_time": 0.4684861679077149} +{"epoch": 0, "iter": 412, "iter_tflops": 44.7257436056131, "iter_time": 0.4094805183410644, "loss": 0.1595914661884308, "lr": 2.472e-05, "seqlen": 7296.0, "step_tflops": 48.4184373966709, "step_time": 0.3782509651184082} +{"epoch": 0, "iter": 413, "iter_tflops": 31.95612474227102, "iter_time": 0.6456068649291993, "loss": 0.0777617022395134, "lr": 2.478e-05, "seqlen": 8192.0, "step_tflops": 34.0495377922036, "step_time": 0.6059140548706055} +{"epoch": 0, "iter": 414, "iter_tflops": 15.259520939662245, "iter_time": 1.3520144958496094, "loss": 0.0441213957965374, "lr": 2.484e-05, "seqlen": 8192.0, "step_tflops": 18.837880940513756, "step_time": 1.0951918411254884} +{"epoch": 0, "iter": 415, "iter_tflops": 52.198175509840866, "iter_time": 0.395245491027832, "loss": 0.07452769577503204, "lr": 2.49e-05, "seqlen": 8192.0, "step_tflops": 56.911824329518794, "step_time": 0.3625097904205323} +{"epoch": 0, "iter": 416, "iter_tflops": 56.2783232774671, "iter_time": 0.3665904083251953, "loss": 0.09279096126556396, "lr": 2.4959999999999998e-05, "seqlen": 8192.0, "step_tflops": 61.723365390012475, "step_time": 0.3342509498596191} +{"epoch": 0, "iter": 417, "iter_tflops": 31.83380677332323, "iter_time": 0.6480875396728516, "loss": 1.0432618856430054, "lr": 2.502e-05, "seqlen": 8192.0, "step_tflops": 33.80759334324288, "step_time": 0.6102502861022949} +{"epoch": 0, "iter": 418, "iter_tflops": 26.168913292435537, "iter_time": 0.7883817443847656, "loss": 0.9277839660644531, "lr": 2.508e-05, "seqlen": 8192.0, "step_tflops": 31.8577962805189, "step_time": 0.6475995178222657} +{"epoch": 0, "iter": 419, "iter_tflops": 40.50224401409035, "iter_time": 0.5093814926147461, "loss": 0.9849147796630859, "lr": 2.514e-05, "seqlen": 8192.0, "step_tflops": 44.12284051336189, "step_time": 0.4675830764770508} +{"epoch": 0, "iter": 420, "iter_tflops": 46.794225799217735, "iter_time": 0.44088972854614256, "loss": 0.9885811805725098, "lr": 2.52e-05, "seqlen": 8192.0, "step_tflops": 50.73478524887684, "step_time": 0.4066459217071533} +{"epoch": 0, "iter": 421, "iter_tflops": 16.66767847362016, "iter_time": 1.2377904663085937, "loss": 0.1422169804573059, "lr": 2.526e-05, "seqlen": 8192.0, "step_tflops": 17.950106296439454, "step_time": 1.1493577346801758} +{"epoch": 0, "iter": 422, "iter_tflops": 14.525193740756553, "iter_time": 1.4203661499023439, "loss": 0.15521381795406342, "lr": 2.5319999999999998e-05, "seqlen": 8192.0, "step_tflops": 17.29685174154833, "step_time": 1.1927658176422118} +{"epoch": 0, "iter": 423, "iter_tflops": 42.311328947868034, "iter_time": 0.48760211563110356, "loss": 0.13443075120449066, "lr": 2.538e-05, "seqlen": 8192.0, "step_tflops": 46.76295260905786, "step_time": 0.44118457794189453} +{"epoch": 0, "iter": 424, "iter_tflops": 42.53750237576801, "iter_time": 0.4850095176696777, "loss": 0.11502664536237717, "lr": 2.544e-05, "seqlen": 8192.0, "step_tflops": 46.6515096725939, "step_time": 0.4422384967803955} +{"epoch": 0, "iter": 425, "iter_tflops": 18.493097470463663, "iter_time": 1.1156104888916016, "loss": 0.6984594464302063, "lr": 2.55e-05, "seqlen": 8192.0, "step_tflops": 19.57907012318723, "step_time": 1.0537320404052735} +{"epoch": 0, "iter": 426, "iter_tflops": 15.480186961757868, "iter_time": 1.3327418823242188, "loss": 0.8745102882385254, "lr": 2.556e-05, "seqlen": 8192.0, "step_tflops": 18.606594891259466, "step_time": 1.1088054332733155} +{"epoch": 0, "iter": 427, "iter_tflops": 37.679145547343595, "iter_time": 0.5475467453002929, "loss": 0.7688736319541931, "lr": 2.562e-05, "seqlen": 8192.0, "step_tflops": 41.05509187739684, "step_time": 0.5025221614837646} +{"epoch": 0, "iter": 428, "iter_tflops": 38.94196019215455, "iter_time": 0.5297908325195313, "loss": 0.8690835237503052, "lr": 2.568e-05, "seqlen": 8192.0, "step_tflops": 42.56273826168373, "step_time": 0.48472195053100586} +{"epoch": 0, "iter": 429, "iter_tflops": 39.242785926876685, "iter_time": 0.5257295837402344, "loss": 0.42906612157821655, "lr": 2.574e-05, "seqlen": 8192.0, "step_tflops": 43.49167768777062, "step_time": 0.474368766784668} +{"epoch": 0, "iter": 430, "iter_tflops": 39.13451527339754, "iter_time": 0.5271840820312499, "loss": 0.40006494522094727, "lr": 2.58e-05, "seqlen": 8192.0, "step_tflops": 43.31316026317696, "step_time": 0.476323902130127} +{"epoch": 0, "iter": 431, "iter_tflops": 42.61363976039705, "iter_time": 0.4841429557800293, "loss": 0.43751564621925354, "lr": 2.586e-05, "seqlen": 8192.0, "step_tflops": 46.288877590573804, "step_time": 0.44570304107666014} +{"epoch": 0, "iter": 432, "iter_tflops": 42.17994647737544, "iter_time": 0.4891209030151367, "loss": 0.47830137610435486, "lr": 2.592e-05, "seqlen": 8192.0, "step_tflops": 45.90405471533202, "step_time": 0.44943945884704584} +{"epoch": 0, "iter": 433, "iter_tflops": 24.1229092919401, "iter_time": 0.8552489776611327, "loss": 0.48981618881225586, "lr": 2.5980000000000002e-05, "seqlen": 8192.0, "step_tflops": 25.703947787866856, "step_time": 0.8026429901123046} +{"epoch": 0, "iter": 434, "iter_tflops": 17.667172001945755, "iter_time": 1.1677643432617189, "loss": 0.6803942918777466, "lr": 2.604e-05, "seqlen": 8192.0, "step_tflops": 21.90843733438579, "step_time": 0.9416962604522705} +{"epoch": 0, "iter": 435, "iter_tflops": 37.453585017572514, "iter_time": 0.5508442916870118, "loss": 0.5314188003540039, "lr": 2.61e-05, "seqlen": 8192.0, "step_tflops": 40.819468227606656, "step_time": 0.5054228878021241} +{"epoch": 0, "iter": 436, "iter_tflops": 45.467900521358686, "iter_time": 0.45375074005126953, "loss": 0.6539049744606018, "lr": 2.616e-05, "seqlen": 8192.0, "step_tflops": 49.27064436712197, "step_time": 0.41872993087768556} +{"epoch": 0, "iter": 437, "iter_tflops": 24.421408585741826, "iter_time": 0.8447953948974609, "loss": 1.0680216550827026, "lr": 2.622e-05, "seqlen": 8192.0, "step_tflops": 26.059239675631762, "step_time": 0.7916997489929201} +{"epoch": 0, "iter": 438, "iter_tflops": 17.60542605255614, "iter_time": 1.1718599395751954, "loss": 1.044469952583313, "lr": 2.628e-05, "seqlen": 8192.0, "step_tflops": 20.83711333023907, "step_time": 0.9901128425598145} +{"epoch": 0, "iter": 439, "iter_tflops": 37.104663183683556, "iter_time": 0.5560242767333985, "loss": 1.3130377531051636, "lr": 2.6340000000000002e-05, "seqlen": 8192.0, "step_tflops": 40.48495644226798, "step_time": 0.5095990047454834} +{"epoch": 0, "iter": 440, "iter_tflops": 34.300083415870624, "iter_time": 0.6014881439208983, "loss": 1.1751153469085693, "lr": 2.64e-05, "seqlen": 8192.0, "step_tflops": 37.23264847241536, "step_time": 0.5541129722595215} +{"epoch": 0, "iter": 441, "iter_tflops": 36.037887197632124, "iter_time": 0.5724834365844726, "loss": 0.3651660978794098, "lr": 2.646e-05, "seqlen": 8192.0, "step_tflops": 39.8978849447135, "step_time": 0.5170974235534669} +{"epoch": 0, "iter": 442, "iter_tflops": 42.9486410782741, "iter_time": 0.4803666191101074, "loss": 0.3851735293865204, "lr": 2.652e-05, "seqlen": 8192.0, "step_tflops": 47.86986725209864, "step_time": 0.43098288536071777} +{"epoch": 0, "iter": 443, "iter_tflops": 44.35945884704287, "iter_time": 0.4650889358520508, "loss": 0.2882857918739319, "lr": 2.658e-05, "seqlen": 8192.0, "step_tflops": 48.49327601345316, "step_time": 0.4254423542022705} +{"epoch": 0, "iter": 444, "iter_tflops": 40.100869843619876, "iter_time": 0.5144799499511719, "loss": 0.3229615092277527, "lr": 2.6640000000000002e-05, "seqlen": 8192.0, "step_tflops": 43.95577810105099, "step_time": 0.4693602161407471} +{"epoch": 0, "iter": 445, "iter_tflops": 18.017390295800706, "iter_time": 1.1450655822753908, "loss": 0.06481076776981354, "lr": 2.6700000000000002e-05, "seqlen": 8192.0, "step_tflops": 19.26393139377916, "step_time": 1.0709700469970704} +{"epoch": 0, "iter": 446, "iter_tflops": 23.429516847091914, "iter_time": 0.8805599212646484, "loss": 0.046095747500658035, "lr": 2.676e-05, "seqlen": 8192.0, "step_tflops": 28.391695179046494, "step_time": 0.7266594467163086} +{"epoch": 0, "iter": 447, "iter_tflops": 54.091102986018626, "iter_time": 0.38141380691528326, "loss": 0.1270509511232376, "lr": 2.682e-05, "seqlen": 8192.0, "step_tflops": 59.16714211159143, "step_time": 0.3486917362213135} +{"epoch": 0, "iter": 448, "iter_tflops": 56.0623728732087, "iter_time": 0.3680025024414062, "loss": 0.06839845329523087, "lr": 2.688e-05, "seqlen": 8192.0, "step_tflops": 61.17296746176724, "step_time": 0.33725834083557127} +{"epoch": 0, "iter": 449, "iter_tflops": 37.34953796417952, "iter_time": 0.5523788146972657, "loss": 1.052770733833313, "lr": 2.6940000000000003e-05, "seqlen": 8192.0, "step_tflops": 40.21701560098256, "step_time": 0.5129941444396973} +{"epoch": 0, "iter": 450, "iter_tflops": 28.07158557345519, "iter_time": 0.7349457855224609, "loss": 0.9903242588043213, "lr": 2.7000000000000002e-05, "seqlen": 8192.0, "step_tflops": 34.355117641813244, "step_time": 0.600524606704712} +{"epoch": 0, "iter": 451, "iter_tflops": 49.0094179112349, "iter_time": 0.4209618148803711, "loss": 0.8248346447944641, "lr": 2.7060000000000002e-05, "seqlen": 8192.0, "step_tflops": 53.00815954922726, "step_time": 0.3892059955596924} +{"epoch": 0, "iter": 452, "iter_tflops": 49.44691528804321, "iter_time": 0.41723722076416014, "loss": 0.9206903576850891, "lr": 2.712e-05, "seqlen": 8192.0, "step_tflops": 53.20311471518623, "step_time": 0.38777980613708496} +{"epoch": 0, "iter": 453, "iter_tflops": 33.10026542751632, "iter_time": 0.6232908782958985, "loss": 0.15317736566066742, "lr": 2.718e-05, "seqlen": 8192.0, "step_tflops": 35.366936044213766, "step_time": 0.5833441009521484} +{"epoch": 0, "iter": 454, "iter_tflops": 32.21586719866264, "iter_time": 0.6404016189575196, "loss": 0.08494804799556732, "lr": 2.724e-05, "seqlen": 8192.0, "step_tflops": 40.54103648623984, "step_time": 0.5088940811157227} +{"epoch": 0, "iter": 455, "iter_tflops": 54.429306605018276, "iter_time": 0.3790438423156738, "loss": 0.16271160542964935, "lr": 2.7300000000000003e-05, "seqlen": 8192.0, "step_tflops": 59.45967091897496, "step_time": 0.34697624778747554} +{"epoch": 0, "iter": 456, "iter_tflops": 53.58995593231109, "iter_time": 0.3849806022644043, "loss": 0.12504367530345917, "lr": 2.7360000000000002e-05, "seqlen": 8192.0, "step_tflops": 58.47629046802648, "step_time": 0.35281125640869143} +{"epoch": 0, "iter": 457, "iter_tflops": 48.57045221568257, "iter_time": 0.4247663459777832, "loss": 0.5111910700798035, "lr": 2.7420000000000002e-05, "seqlen": 8192.0, "step_tflops": 53.26670360274368, "step_time": 0.38731688117980956} +{"epoch": 0, "iter": 458, "iter_tflops": 45.43797666484467, "iter_time": 0.4540495643615723, "loss": 0.4996762275695801, "lr": 2.748e-05, "seqlen": 8192.0, "step_tflops": 50.8076342945741, "step_time": 0.40606286430358884} +{"epoch": 0, "iter": 459, "iter_tflops": 44.00148669697589, "iter_time": 0.46887264633178705, "loss": 0.4080991744995117, "lr": 2.754e-05, "seqlen": 8192.0, "step_tflops": 47.10249786159711, "step_time": 0.43800423431396485} +{"epoch": 0, "iter": 460, "iter_tflops": 47.2595531015398, "iter_time": 0.4365486373901367, "loss": 0.40753284096717834, "lr": 2.7600000000000003e-05, "seqlen": 8192.0, "step_tflops": 51.04946391555267, "step_time": 0.40413927841186525} +{"epoch": 0, "iter": 461, "iter_tflops": 28.64898821887546, "iter_time": 0.7201334075927736, "loss": 0.11193523555994034, "lr": 2.7660000000000003e-05, "seqlen": 8192.0, "step_tflops": 30.17862879127573, "step_time": 0.683632568359375} +{"epoch": 0, "iter": 462, "iter_tflops": 13.678248134119226, "iter_time": 1.5083140258789063, "loss": 0.19339719414710999, "lr": 2.7720000000000002e-05, "seqlen": 8192.0, "step_tflops": 17.574735233240578, "step_time": 1.1739063625335693} +{"epoch": 0, "iter": 463, "iter_tflops": 41.074150691571184, "iter_time": 0.5022889862060547, "loss": 0.14679105579853058, "lr": 2.778e-05, "seqlen": 8192.0, "step_tflops": 45.17135375236717, "step_time": 0.4567295818328857} +{"epoch": 0, "iter": 464, "iter_tflops": 45.79628358991133, "iter_time": 0.4504971122741699, "loss": 0.1508486270904541, "lr": 2.784e-05, "seqlen": 8192.0, "step_tflops": 50.23445567506867, "step_time": 0.41069606971740724} +{"epoch": 0, "iter": 465, "iter_tflops": 25.06434771806765, "iter_time": 0.8231250915527345, "loss": 0.7360296845436096, "lr": 2.79e-05, "seqlen": 8192.0, "step_tflops": 26.903732028184876, "step_time": 0.7668487586975098} +{"epoch": 0, "iter": 466, "iter_tflops": 9.294784260718012, "iter_time": 2.2196419982910154, "loss": 0.580294668674469, "lr": 2.7960000000000003e-05, "seqlen": 8192.0, "step_tflops": 10.321286253906145, "step_time": 1.9988878326416017} +{"epoch": 0, "iter": 467, "iter_tflops": 16.697826014822425, "iter_time": 1.2355556640624998, "loss": 0.5485919713973999, "lr": 2.8020000000000003e-05, "seqlen": 8192.0, "step_tflops": 21.36149797862865, "step_time": 0.9658074321746827} +{"epoch": 0, "iter": 468, "iter_tflops": 47.5768941024255, "iter_time": 0.4336368293762207, "loss": 0.6177873611450195, "lr": 2.8080000000000002e-05, "seqlen": 8192.0, "step_tflops": 51.406274532046226, "step_time": 0.40133415031433106} +{"epoch": 0, "iter": 469, "iter_tflops": 24.57296659401349, "iter_time": 0.6299636001586914, "loss": 0.6576817035675049, "lr": 2.8139999999999998e-05, "seqlen": 6192.0, "step_tflops": 26.315861611767698, "step_time": 0.5882412185668946} +{"epoch": 0, "iter": 470, "iter_tflops": 23.200056498077483, "iter_time": 0.6672429656982422, "loss": 0.5699245929718018, "lr": 2.8199999999999998e-05, "seqlen": 6192.0, "step_tflops": 25.007549702091175, "step_time": 0.6190160446166992} +{"epoch": 0, "iter": 471, "iter_tflops": 22.53624849151035, "iter_time": 0.6868966903686524, "loss": 0.8236421942710876, "lr": 2.826e-05, "seqlen": 6192.0, "step_tflops": 24.133560777623746, "step_time": 0.6414335060119629} +{"epoch": 0, "iter": 472, "iter_tflops": 24.69747754384533, "iter_time": 0.6267876739501952, "loss": 0.5862663388252258, "lr": 2.832e-05, "seqlen": 6192.0, "step_tflops": 26.45218865607305, "step_time": 0.5852095909118653} +{"epoch": 0, "iter": 473, "iter_tflops": 22.716601899965866, "iter_time": 0.9081945266723633, "loss": 0.32422712445259094, "lr": 2.838e-05, "seqlen": 8192.0, "step_tflops": 24.02315611563809, "step_time": 0.8588002929687499} +{"epoch": 0, "iter": 474, "iter_tflops": 10.455473022505654, "iter_time": 1.9732338714599609, "loss": 0.4656655788421631, "lr": 2.844e-05, "seqlen": 8192.0, "step_tflops": 12.037256954708441, "step_time": 1.713936454772949} +{"epoch": 0, "iter": 475, "iter_tflops": 12.686809287693205, "iter_time": 1.6261845703125, "loss": 0.4723568260669708, "lr": 2.8499999999999998e-05, "seqlen": 8192.0, "step_tflops": 14.217907320161062, "step_time": 1.4510640029907227} +{"epoch": 0, "iter": 476, "iter_tflops": 19.740426065095516, "iter_time": 1.0451189575195312, "loss": 0.35951030254364014, "lr": 2.856e-05, "seqlen": 8192.0, "step_tflops": 21.80738411194892, "step_time": 0.946059986114502} +{"epoch": 0, "iter": 477, "iter_tflops": 18.7897185313416, "iter_time": 0.9396151123046875, "loss": 0.665556788444519, "lr": 2.862e-05, "seqlen": 7040.0, "step_tflops": 19.650623294020193, "step_time": 0.8984500503540039} +{"epoch": 0, "iter": 478, "iter_tflops": 11.726496155120879, "iter_time": 1.5055736389160155, "loss": 0.6561362147331238, "lr": 2.868e-05, "seqlen": 7040.0, "step_tflops": 14.847961466472805, "step_time": 1.1890590858459473} +{"epoch": 0, "iter": 479, "iter_tflops": 32.645244040105894, "iter_time": 0.5408170166015625, "loss": 0.6250931620597839, "lr": 2.874e-05, "seqlen": 7040.0, "step_tflops": 34.75809524310331, "step_time": 0.5079422035217285} +{"epoch": 0, "iter": 480, "iter_tflops": 31.142747900713456, "iter_time": 0.5669089813232422, "loss": 0.6538581848144531, "lr": 2.88e-05, "seqlen": 7040.0, "step_tflops": 33.034223997445885, "step_time": 0.5344488639831543} +{"epoch": 0, "iter": 481, "iter_tflops": 34.65648117128131, "iter_time": 0.595302604675293, "loss": 1.127738356590271, "lr": 2.8859999999999998e-05, "seqlen": 8192.0, "step_tflops": 36.95709588395359, "step_time": 0.5582444458007813} +{"epoch": 0, "iter": 482, "iter_tflops": 13.03216643699269, "iter_time": 1.5830900878906249, "loss": 1.2176660299301147, "lr": 2.892e-05, "seqlen": 8192.0, "step_tflops": 17.279875869791454, "step_time": 1.1939375991821288} +{"epoch": 0, "iter": 483, "iter_tflops": 42.4657458018281, "iter_time": 0.48582906341552734, "loss": 1.0775829553604126, "lr": 2.898e-05, "seqlen": 8192.0, "step_tflops": 46.59289045176954, "step_time": 0.44279488372802733} +{"epoch": 0, "iter": 484, "iter_tflops": 44.44754586875505, "iter_time": 0.4641672134399414, "loss": 1.0781718492507935, "lr": 2.904e-05, "seqlen": 8192.0, "step_tflops": 47.904356355212094, "step_time": 0.4306725959777832} +{"epoch": 0, "iter": 485, "iter_tflops": 36.681483749504416, "iter_time": 0.5624389038085937, "loss": 1.2728184461593628, "lr": 2.91e-05, "seqlen": 8192.0, "step_tflops": 39.42535477236646, "step_time": 0.5232950630187988} +{"epoch": 0, "iter": 486, "iter_tflops": 14.711620407637012, "iter_time": 1.402367172241211, "loss": 1.0538908243179321, "lr": 2.916e-05, "seqlen": 8192.0, "step_tflops": 16.545984064959413, "step_time": 1.2468943176269531} +{"epoch": 0, "iter": 487, "iter_tflops": 44.55043735841056, "iter_time": 0.4630951957702637, "loss": 1.188881516456604, "lr": 2.922e-05, "seqlen": 8192.0, "step_tflops": 48.0626035650174, "step_time": 0.4292545967102051} +{"epoch": 0, "iter": 488, "iter_tflops": 43.18390402559831, "iter_time": 0.47774961471557614, "loss": 1.0239485502243042, "lr": 2.928e-05, "seqlen": 8192.0, "step_tflops": 46.25988399187081, "step_time": 0.4459823875427246} +{"epoch": 0, "iter": 489, "iter_tflops": 50.21631606667082, "iter_time": 0.41084442520141606, "loss": 0.23391909897327423, "lr": 2.934e-05, "seqlen": 8192.0, "step_tflops": 54.986985504176516, "step_time": 0.37519957351684574} +{"epoch": 0, "iter": 490, "iter_tflops": 39.609334970485534, "iter_time": 0.5208644256591797, "loss": 0.2675726115703583, "lr": 2.94e-05, "seqlen": 8192.0, "step_tflops": 44.107407934742405, "step_time": 0.4677466773986817} +{"epoch": 0, "iter": 491, "iter_tflops": 41.056976687938196, "iter_time": 0.5024990921020508, "loss": 0.3206675052642822, "lr": 2.946e-05, "seqlen": 8192.0, "step_tflops": 45.102061210222864, "step_time": 0.4574312782287598} +{"epoch": 0, "iter": 492, "iter_tflops": 41.335128043317525, "iter_time": 0.4991176872253418, "loss": 0.2653185725212097, "lr": 2.9520000000000002e-05, "seqlen": 8192.0, "step_tflops": 45.32596731076725, "step_time": 0.45517160987854005} +{"epoch": 0, "iter": 493, "iter_tflops": 19.19409660975768, "iter_time": 1.0748666076660156, "loss": 0.04231259226799011, "lr": 2.958e-05, "seqlen": 8192.0, "step_tflops": 20.37969475647732, "step_time": 1.0123357467651366} +{"epoch": 0, "iter": 494, "iter_tflops": 12.834841865097209, "iter_time": 1.607428726196289, "loss": 0.03661578521132469, "lr": 2.964e-05, "seqlen": 8192.0, "step_tflops": 18.238861358060014, "step_time": 1.1311612663269044} +{"epoch": 0, "iter": 495, "iter_tflops": 54.1703495730964, "iter_time": 0.3808558311462402, "loss": 0.07716088742017746, "lr": 2.97e-05, "seqlen": 8192.0, "step_tflops": 59.44963429571243, "step_time": 0.34703482627868654} +{"epoch": 0, "iter": 496, "iter_tflops": 52.49427801418415, "iter_time": 0.3930160446166992, "loss": 0.05950884893536568, "lr": 2.976e-05, "seqlen": 8192.0, "step_tflops": 57.27543260824777, "step_time": 0.36020842742919923} +{"epoch": 0, "iter": 497, "iter_tflops": 21.026163102829162, "iter_time": 0.9812105712890625, "loss": 0.7990596294403076, "lr": 2.982e-05, "seqlen": 8192.0, "step_tflops": 21.919552720175577, "step_time": 0.9412187271118164} +{"epoch": 0, "iter": 498, "iter_tflops": 18.847275760108023, "iter_time": 1.0946459197998046, "loss": 0.6564105749130249, "lr": 2.9880000000000002e-05, "seqlen": 8192.0, "step_tflops": 22.93021075906382, "step_time": 0.8997341423034667} +{"epoch": 0, "iter": 499, "iter_tflops": 49.13759520132663, "iter_time": 0.4198637199401855, "loss": 0.8367950916290283, "lr": 2.994e-05, "seqlen": 8192.0, "step_tflops": 53.29648776577592, "step_time": 0.3871004333496093} +{"epoch": 0, "iter": 500, "iter_tflops": 49.40836522715776, "iter_time": 0.41756276321411134, "loss": 0.5730463862419128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.41576040387127, "step_time": 0.3862360725402832} +{"epoch": 0, "iter": 501, "iter_tflops": 44.009636447240815, "iter_time": 0.4687858200073242, "loss": 0.2778932452201843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.947669830773236, "step_time": 0.4302835483551026} +{"epoch": 0, "iter": 502, "iter_tflops": 36.71008893911185, "iter_time": 0.5620006408691406, "loss": 0.3293323218822479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40399469950712, "step_time": 0.5106201419830322} +{"epoch": 0, "iter": 503, "iter_tflops": 37.13441638568722, "iter_time": 0.5555787734985351, "loss": 0.3042842149734497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.687518810417856, "step_time": 0.5070619716644287} +{"epoch": 0, "iter": 504, "iter_tflops": 44.84390210728757, "iter_time": 0.4600646362304688, "loss": 0.2913662791252136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.05808117467825, "step_time": 0.42054424095153814} +{"epoch": 0, "iter": 505, "iter_tflops": 38.820281938457924, "iter_time": 0.5314514083862305, "loss": 0.0017934006173163652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.78691244715084, "step_time": 0.4821823387145997} +{"epoch": 0, "iter": 506, "iter_tflops": 11.592216049498102, "iter_time": 1.779736801147461, "loss": 0.01663093827664852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.048251510064876, "step_time": 1.2855664367675783} +{"epoch": 0, "iter": 507, "iter_tflops": 16.002387154264692, "iter_time": 1.289250991821289, "loss": 0.00672938534989953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.74296825450212, "step_time": 1.10073779296875} +{"epoch": 0, "iter": 508, "iter_tflops": 25.15453118483573, "iter_time": 0.820174041748047, "loss": 0.005714531987905502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.037113745418765, "step_time": 0.7105077209472657} +{"epoch": 0, "iter": 509, "iter_tflops": 19.712728976922303, "iter_time": 0.7852831802368165, "loss": 0.8227626085281372, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 20.73640155261067, "step_time": 0.7465169143676756} +{"epoch": 0, "iter": 510, "iter_tflops": 11.317944968715723, "iter_time": 1.367746047973633, "loss": 0.46888166666030884, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.575274672728622, "step_time": 1.0620777206420897} +{"epoch": 0, "iter": 511, "iter_tflops": 27.562215594030125, "iter_time": 0.5616411514282227, "loss": 0.6752705574035645, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.374594785748624, "step_time": 0.5269885292053222} +{"epoch": 0, "iter": 512, "iter_tflops": 28.496784282317073, "iter_time": 0.5432218017578125, "loss": 0.5857551693916321, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 30.24433510233028, "step_time": 0.5118338508605956} +{"epoch": 0, "iter": 513, "iter_tflops": 38.37580160218645, "iter_time": 0.5376068420410157, "loss": 0.007916850037872791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.305644105069256, "step_time": 0.49947395706176745} +{"epoch": 0, "iter": 514, "iter_tflops": 12.727689730696644, "iter_time": 1.6209613800048828, "loss": 0.021286753937602043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.0047342857564, "step_time": 1.2890619201660156} +{"epoch": 0, "iter": 515, "iter_tflops": 29.795816520169378, "iter_time": 0.6924157791137696, "loss": 0.009524174965918064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.70884807745019, "step_time": 0.5620196380615234} +{"epoch": 0, "iter": 516, "iter_tflops": 43.96122448530642, "iter_time": 0.46930206680297853, "loss": 0.007105359807610512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71349781604607, "step_time": 0.42351903343200686} +{"epoch": 0, "iter": 517, "iter_tflops": 6.856355878555696, "iter_time": 3.0090464782714847, "loss": 1.0079336166381836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.9885878110674495, "step_time": 2.952111938476563} +{"epoch": 0, "iter": 518, "iter_tflops": 18.30135176085757, "iter_time": 1.1272988891601563, "loss": 1.2406284809112549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.928752327664906, "step_time": 0.9857775173187255} +{"epoch": 0, "iter": 519, "iter_tflops": 39.836607728911936, "iter_time": 0.5178928298950195, "loss": 1.0107682943344116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40929099132199, "step_time": 0.4752690734863281} +{"epoch": 0, "iter": 520, "iter_tflops": 39.275767266588595, "iter_time": 0.5252881088256836, "loss": 0.809942364692688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54558886739783, "step_time": 0.4849173336029053} +{"epoch": 0, "iter": 521, "iter_tflops": 18.2833492520033, "iter_time": 0.8354906463623047, "loss": 0.5931301116943359, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.49543505634553, "step_time": 0.7835458526611327} +{"epoch": 0, "iter": 522, "iter_tflops": 14.773472987139707, "iter_time": 1.0339862060546876, "loss": 0.6003614664077759, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 17.721844313503755, "step_time": 0.8619626159667968} +{"epoch": 0, "iter": 523, "iter_tflops": 27.052859963271867, "iter_time": 0.5646562805175781, "loss": 0.487446129322052, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.79233876523004, "step_time": 0.5305427742004394} +{"epoch": 0, "iter": 524, "iter_tflops": 29.013258335451013, "iter_time": 0.5265029907226564, "loss": 0.5002220869064331, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 30.696297243827303, "step_time": 0.4976355018615723} +{"epoch": 0, "iter": 525, "iter_tflops": 23.282555245504533, "iter_time": 0.8861180953979493, "loss": 0.6456629037857056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.46857392979748, "step_time": 0.8431669769287109} +{"epoch": 0, "iter": 526, "iter_tflops": 14.053958611179896, "iter_time": 1.4679916229248047, "loss": 0.7306216955184937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.660281477405853, "step_time": 1.168219970703125} +{"epoch": 0, "iter": 527, "iter_tflops": 49.07754624965775, "iter_time": 0.42037744522094733, "loss": 0.520386278629303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.56337182014433, "step_time": 0.3781125106811523} +{"epoch": 0, "iter": 528, "iter_tflops": 51.12795351655721, "iter_time": 0.4035188598632813, "loss": 0.47843271493911743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.39715069905008, "step_time": 0.37242156410217286} +{"epoch": 0, "iter": 529, "iter_tflops": 19.62502823808206, "iter_time": 0.6869284591674804, "loss": 0.06035801023244858, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 20.842034196043908, "step_time": 0.6468174018859862} +{"epoch": 0, "iter": 530, "iter_tflops": 7.507108613414972, "iter_time": 1.7957633361816407, "loss": 0.10475528985261917, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 8.742222254088208, "step_time": 1.5420553283691405} +{"epoch": 0, "iter": 531, "iter_tflops": 8.822480439363261, "iter_time": 1.5280272369384766, "loss": 0.029257969930768013, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 9.89560513373932, "step_time": 1.3623209724426268} +{"epoch": 0, "iter": 532, "iter_tflops": 29.40984096080955, "iter_time": 0.4583836555480957, "loss": 0.07058659940958023, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 32.47576257663192, "step_time": 0.4151092796325684} +{"epoch": 0, "iter": 533, "iter_tflops": 13.620637304168376, "iter_time": 1.130509323120117, "loss": 0.5398657917976379, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.47889034116037, "step_time": 1.0634970703125} +{"epoch": 0, "iter": 534, "iter_tflops": 14.32011672531988, "iter_time": 1.0752885437011719, "loss": 0.7014752626419067, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.216483175309683, "step_time": 0.8013046569824219} +{"epoch": 0, "iter": 535, "iter_tflops": 17.99062415523348, "iter_time": 0.8559045715332032, "loss": 0.6637566089630127, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 18.808651990236324, "step_time": 0.8186794815063476} +{"epoch": 0, "iter": 536, "iter_tflops": 26.91156183541412, "iter_time": 0.5721800003051758, "loss": 0.47746890783309937, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.620790675664303, "step_time": 0.538009506225586} +{"epoch": 0, "iter": 537, "iter_tflops": 10.902456888823131, "iter_time": 1.8923343353271482, "loss": 0.17216451466083527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.152246719879539, "step_time": 1.8499495239257813} +{"epoch": 0, "iter": 538, "iter_tflops": 37.503099837095704, "iter_time": 0.5501170196533203, "loss": 0.11735828965902328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20148652371502, "step_time": 0.40293934631347655} +{"epoch": 0, "iter": 539, "iter_tflops": 57.43382217914975, "iter_time": 0.35921505355834965, "loss": 0.09228850901126862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.71354207723821, "step_time": 0.32897350120544433} +{"epoch": 0, "iter": 540, "iter_tflops": 57.33282646855456, "iter_time": 0.35984783554077143, "loss": 0.08620208501815796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.33859972672204, "step_time": 0.3309521484375} +{"epoch": 0, "iter": 541, "iter_tflops": 32.16335667258005, "iter_time": 0.641447151184082, "loss": 0.7294651865959167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.10709760172933, "step_time": 0.6048915023803711} +{"epoch": 0, "iter": 542, "iter_tflops": 12.268558597383871, "iter_time": 1.6816232604980468, "loss": 0.5204401016235352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.399928031111262, "step_time": 1.2579990272521973} +{"epoch": 0, "iter": 543, "iter_tflops": 11.992720245341735, "iter_time": 1.7203014068603517, "loss": 0.704420804977417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.141568897798008, "step_time": 1.4588970756530761} +{"epoch": 0, "iter": 544, "iter_tflops": 48.554179254339836, "iter_time": 0.42490870666503905, "loss": 0.6211035251617432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.86969814970866, "step_time": 0.39022529411315915} +{"epoch": 0, "iter": 545, "iter_tflops": 19.448426816871734, "iter_time": 0.8507327499389649, "loss": 0.6257787346839905, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 20.441505230189794, "step_time": 0.8094029006958008} +{"epoch": 0, "iter": 546, "iter_tflops": 8.577468106311098, "iter_time": 1.9289391021728517, "loss": 0.5531981587409973, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 10.629474683655673, "step_time": 1.5565598602294923} +{"epoch": 0, "iter": 547, "iter_tflops": 9.724965539871993, "iter_time": 1.7013339080810546, "loss": 0.5706459283828735, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 11.239197704567209, "step_time": 1.4721169662475584} +{"epoch": 0, "iter": 548, "iter_tflops": 25.95020951820556, "iter_time": 0.6375830459594727, "loss": 0.44618821144104004, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.81722514113099, "step_time": 0.5947902259826661} +{"epoch": 0, "iter": 549, "iter_tflops": 17.50130524026096, "iter_time": 0.8681522445678711, "loss": 0.5591825246810913, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 19.21803481198754, "step_time": 0.7906009941101073} +{"epoch": 0, "iter": 550, "iter_tflops": 15.618278774941116, "iter_time": 0.9728215026855469, "loss": 0.6138613224029541, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 16.239941734133048, "step_time": 0.9355820159912109} +{"epoch": 0, "iter": 551, "iter_tflops": 26.39981036471341, "iter_time": 0.5755267639160156, "loss": 0.5736266374588013, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.023584671626775, "step_time": 0.542178939819336} +{"epoch": 0, "iter": 552, "iter_tflops": 27.328577293759167, "iter_time": 0.5559673767089843, "loss": 0.5458124279975891, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.025746097120557, "step_time": 0.5234593238830567} +{"epoch": 0, "iter": 553, "iter_tflops": 26.7493314475264, "iter_time": 0.7712751083374023, "loss": 0.6784047484397888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.328341456929685, "step_time": 0.728284553527832} +{"epoch": 0, "iter": 554, "iter_tflops": 22.56729312150795, "iter_time": 0.9142032852172851, "loss": 0.6273162961006165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.485626261700098, "step_time": 0.8095187969207764} +{"epoch": 0, "iter": 555, "iter_tflops": 45.62819180547028, "iter_time": 0.4521567192077637, "loss": 0.5394850969314575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.502992777075804, "step_time": 0.41676457023620606} +{"epoch": 0, "iter": 556, "iter_tflops": 52.77605084825537, "iter_time": 0.39091772079467774, "loss": 0.6069902777671814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.01676922202022, "step_time": 0.36184255599975584} +{"epoch": 0, "iter": 557, "iter_tflops": 50.57584275654329, "iter_time": 0.4079238700866699, "loss": 0.0029681192245334387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.83345285553974, "step_time": 0.3695113315582275} +{"epoch": 0, "iter": 558, "iter_tflops": 56.50140509351788, "iter_time": 0.3651430168151855, "loss": 0.005021311808377504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.335136548620184, "step_time": 0.33097053527832027} +{"epoch": 0, "iter": 559, "iter_tflops": 59.990887390115184, "iter_time": 0.3439037895202636, "loss": 0.010727413929998875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.68983840292391, "step_time": 0.3140682640075684} +{"epoch": 0, "iter": 560, "iter_tflops": 60.530755492910195, "iter_time": 0.34083654403686525, "loss": 0.014530373737215996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.69937569043773, "step_time": 0.3093146419525147} +{"epoch": 0, "iter": 561, "iter_tflops": 45.06438105720243, "iter_time": 0.45781375503540034, "loss": 1.0821655988693237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.021106291799676, "step_time": 0.42086144256591795} +{"epoch": 0, "iter": 562, "iter_tflops": 10.000073252752447, "iter_time": 2.06309423828125, "loss": 1.0578391551971436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.605191302397214, "step_time": 1.777746955871582} +{"epoch": 0, "iter": 563, "iter_tflops": 11.06406278003384, "iter_time": 1.8646941833496093, "loss": 1.0313796997070312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.182501068415064, "step_time": 1.565036361694336} +{"epoch": 0, "iter": 564, "iter_tflops": 22.854382647198754, "iter_time": 0.902719352722168, "loss": 1.2018377780914307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.43874140239098, "step_time": 0.7518964958190917} +{"epoch": 0, "iter": 565, "iter_tflops": 20.155171535746156, "iter_time": 0.778197280883789, "loss": 0.42695435881614685, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 21.292090850631947, "step_time": 0.7366444091796875} +{"epoch": 0, "iter": 566, "iter_tflops": 10.152421649567914, "iter_time": 1.5449220123291016, "loss": 0.525524914264679, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 11.997345776129409, "step_time": 1.3073474731445314} +{"epoch": 0, "iter": 567, "iter_tflops": 24.24402792091404, "iter_time": 0.6469510650634765, "loss": 0.5348442196846008, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.0561988566818, "step_time": 0.6019565544128417} +{"epoch": 0, "iter": 568, "iter_tflops": 26.726349464637007, "iter_time": 0.5868627777099609, "loss": 0.6902253031730652, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.57945362267694, "step_time": 0.5488103408813477} +{"epoch": 0, "iter": 569, "iter_tflops": 16.47255004270852, "iter_time": 1.2524529266357425, "loss": 0.2325447052717209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.528501986173193, "step_time": 1.1770026626586914} +{"epoch": 0, "iter": 570, "iter_tflops": 20.02715689421669, "iter_time": 1.0301558837890623, "loss": 0.1526428908109665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.429422513141702, "step_time": 0.8445182647705078} +{"epoch": 0, "iter": 571, "iter_tflops": 51.93928723853029, "iter_time": 0.39721556854248047, "loss": 0.24610291421413422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.040206525775965, "step_time": 0.36169387817382814} +{"epoch": 0, "iter": 572, "iter_tflops": 49.5828939622589, "iter_time": 0.4160929679870605, "loss": 0.17533445358276367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.739182279367974, "step_time": 0.38391156387329106} +{"epoch": 0, "iter": 573, "iter_tflops": 27.984007380406723, "iter_time": 0.7372458572387696, "loss": 0.33294856548309326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.75170838181973, "step_time": 0.6934423141479492} +{"epoch": 0, "iter": 574, "iter_tflops": 19.211655133470014, "iter_time": 1.0738842315673827, "loss": 0.3339115381240845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.4953801050448, "step_time": 0.8092090969085693} +{"epoch": 0, "iter": 575, "iter_tflops": 45.69607580536796, "iter_time": 0.45148501586914064, "loss": 0.40026113390922546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48971690590393, "step_time": 0.4168763694763184} +{"epoch": 0, "iter": 576, "iter_tflops": 52.5954647663079, "iter_time": 0.3922599334716797, "loss": 0.39249345660209656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.10161058821334, "step_time": 0.361304931640625} +{"epoch": 0, "iter": 577, "iter_tflops": 30.817503330198754, "iter_time": 0.6694602508544922, "loss": 1.141068935394287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55129725692323, "step_time": 0.6338024978637695} +{"epoch": 0, "iter": 578, "iter_tflops": 17.54390440867754, "iter_time": 1.1759693298339842, "loss": 1.0240683555603027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.80592071758291, "step_time": 1.041662935256958} +{"epoch": 0, "iter": 579, "iter_tflops": 44.23099045616331, "iter_time": 0.4664397811889649, "loss": 0.9027907252311707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68985068898457, "step_time": 0.43260973167419436} +{"epoch": 0, "iter": 580, "iter_tflops": 47.9674104796716, "iter_time": 0.43010646820068354, "loss": 1.0991874933242798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.695381421333295, "step_time": 0.3990896854400635} +{"epoch": 0, "iter": 581, "iter_tflops": 32.29037855562263, "iter_time": 0.6389238662719727, "loss": 0.24693667888641357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.38332679639108, "step_time": 0.6000319175720215} +{"epoch": 0, "iter": 582, "iter_tflops": 12.094438499183267, "iter_time": 1.7058330993652346, "loss": 0.21749675273895264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.090718405173494, "step_time": 1.4641619338989258} +{"epoch": 0, "iter": 583, "iter_tflops": 11.449198385358061, "iter_time": 1.8019683837890625, "loss": 0.23567087948322296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.36719574189238, "step_time": 1.5434122390747071} +{"epoch": 0, "iter": 584, "iter_tflops": 32.179886439770314, "iter_time": 0.641117660522461, "loss": 0.28113028407096863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.48037102734805, "step_time": 0.5814790802001955} +{"epoch": 0, "iter": 585, "iter_tflops": 13.357613157833594, "iter_time": 1.1374635009765626, "loss": 0.44726428389549255, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.052293254600324, "step_time": 1.0812325897216797} +{"epoch": 0, "iter": 586, "iter_tflops": 11.296291311433087, "iter_time": 1.3450252838134766, "loss": 0.4473358392715454, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.642099241008635, "step_time": 1.1137433586120604} +{"epoch": 0, "iter": 587, "iter_tflops": 27.345652247580357, "iter_time": 0.5556202239990234, "loss": 0.6804386377334595, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.09578990843628, "step_time": 0.5221991729736328} +{"epoch": 0, "iter": 588, "iter_tflops": 26.444650847584636, "iter_time": 0.574550880432129, "loss": 0.5772340893745422, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.024091999554482, "step_time": 0.5421691246032715} +{"epoch": 0, "iter": 589, "iter_tflops": 32.79074649673256, "iter_time": 0.6291742553710937, "loss": 0.6000269651412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.18117081740131, "step_time": 0.5864243011474609} +{"epoch": 0, "iter": 590, "iter_tflops": 21.324124127804374, "iter_time": 0.9675001602172851, "loss": 0.5591043829917908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.33056089539214, "step_time": 0.7835417404174805} +{"epoch": 0, "iter": 591, "iter_tflops": 47.84687394385697, "iter_time": 0.43118999862670904, "loss": 0.7025584578514099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64353399973733, "step_time": 0.3994903507232666} +{"epoch": 0, "iter": 592, "iter_tflops": 48.60154849144939, "iter_time": 0.42449457168579097, "loss": 0.6664637923240662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60049162742966, "step_time": 0.3922224464416504} +{"epoch": 0, "iter": 593, "iter_tflops": 33.970183362473726, "iter_time": 0.5136665191650391, "loss": 0.031502615660429, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 36.743574767947294, "step_time": 0.4748951606750489} +{"epoch": 0, "iter": 594, "iter_tflops": 38.69838787434017, "iter_time": 0.4509062728881836, "loss": 0.09103412926197052, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 47.65327295806272, "step_time": 0.36617308235168455} +{"epoch": 0, "iter": 595, "iter_tflops": 48.113992767607435, "iter_time": 0.36266675949096683, "loss": 0.07142039388418198, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 52.60552393401285, "step_time": 0.3317017784118652} +{"epoch": 0, "iter": 596, "iter_tflops": 47.03942608839671, "iter_time": 0.3709515037536621, "loss": 0.053681641817092896, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 51.290957378580785, "step_time": 0.340203161239624} +{"epoch": 0, "iter": 597, "iter_tflops": 43.878707717780046, "iter_time": 0.4701846199035644, "loss": 1.4044568538665771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64079483863413, "step_time": 0.43305519104003903} +{"epoch": 0, "iter": 598, "iter_tflops": 41.426765110354566, "iter_time": 0.49801362609863287, "loss": 1.2102868556976318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.923593191155724, "step_time": 0.4592485160827636} +{"epoch": 0, "iter": 599, "iter_tflops": 42.845450162389945, "iter_time": 0.48152355575561523, "loss": 0.9972885251045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.853497221279966, "step_time": 0.4499350051879883} +{"epoch": 0, "iter": 600, "iter_tflops": 45.53895356567556, "iter_time": 0.453042766571045, "loss": 0.950727105140686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.53970283385576, "step_time": 0.42503543090820306} +{"epoch": 0, "iter": 601, "iter_tflops": 22.612068720579916, "iter_time": 0.9123930130004884, "loss": 0.9945647120475769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.747747684322956, "step_time": 0.868760009765625} +{"epoch": 0, "iter": 602, "iter_tflops": 15.582316637589617, "iter_time": 1.3240068206787108, "loss": 1.103270173072815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.93689957138141, "step_time": 1.0348195533752442} +{"epoch": 0, "iter": 603, "iter_tflops": 46.66708473413616, "iter_time": 0.4420909004211425, "loss": 1.1150290966033936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.394113745681956, "step_time": 0.4093949069976807} +{"epoch": 0, "iter": 604, "iter_tflops": 47.21291513400527, "iter_time": 0.43697986984252934, "loss": 1.2307653427124023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82949255898385, "step_time": 0.40588824462890627} +{"epoch": 0, "iter": 605, "iter_tflops": 37.11843657427306, "iter_time": 0.5558179550170899, "loss": 0.017031989991664886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09160317470978, "step_time": 0.5145988655090332} +{"epoch": 0, "iter": 606, "iter_tflops": 7.8021610872647384, "iter_time": 2.6442793579101562, "loss": 0.021296778693795204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.70759359718382, "step_time": 2.125253112792969} +{"epoch": 0, "iter": 607, "iter_tflops": 12.284558294020641, "iter_time": 1.679433074951172, "loss": 0.018574824556708336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.271301192710979, "step_time": 1.4456350708007812} +{"epoch": 0, "iter": 608, "iter_tflops": 34.59535233550144, "iter_time": 0.5963544845581055, "loss": 0.014129700139164925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49970084283552, "step_time": 0.4636231956481934} +{"epoch": 0, "iter": 609, "iter_tflops": 19.815156408104425, "iter_time": 0.7874192428588868, "loss": 0.5546090602874756, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 20.912392773752916, "step_time": 0.7461047439575197} +{"epoch": 0, "iter": 610, "iter_tflops": 10.282357376510332, "iter_time": 1.5174375762939452, "loss": 0.5367986559867859, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.160967125134986, "step_time": 1.1855386695861816} +{"epoch": 0, "iter": 611, "iter_tflops": 25.55119522871279, "iter_time": 0.6106499252319336, "loss": 0.5222402811050415, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.427480814261642, "step_time": 0.5688759956359863} +{"epoch": 0, "iter": 612, "iter_tflops": 24.338652764987675, "iter_time": 0.6410722732543945, "loss": 0.39116597175598145, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.17065105271562, "step_time": 0.5961959228515625} +{"epoch": 0, "iter": 613, "iter_tflops": 23.85681640788241, "iter_time": 0.8647882080078125, "loss": 1.1545063257217407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.983117437443543, "step_time": 0.7940191764831543} +{"epoch": 0, "iter": 614, "iter_tflops": 27.55567071403442, "iter_time": 0.7487059097290039, "loss": 1.0453925132751465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.54274601607509, "step_time": 0.6339690418243408} +{"epoch": 0, "iter": 615, "iter_tflops": 33.74432636377848, "iter_time": 0.6113944396972656, "loss": 0.9388598799705505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.4498742419905, "step_time": 0.5660127487182617} +{"epoch": 0, "iter": 616, "iter_tflops": 39.99743180322743, "iter_time": 0.5158104553222657, "loss": 1.1105331182479858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2838193027334, "step_time": 0.4766467895507812} +{"epoch": 0, "iter": 617, "iter_tflops": 25.017443219576066, "iter_time": 0.6515167388916016, "loss": 0.04324819892644882, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 28.043203132805303, "step_time": 0.5812204456329346} +{"epoch": 0, "iter": 618, "iter_tflops": 31.13074405135433, "iter_time": 0.5235751190185547, "loss": 0.04562579095363617, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 34.47923673753034, "step_time": 0.4727274894714355} +{"epoch": 0, "iter": 619, "iter_tflops": 35.49815738125443, "iter_time": 0.4591585655212402, "loss": 0.08183258771896362, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 39.30559094084173, "step_time": 0.4146810321807861} +{"epoch": 0, "iter": 620, "iter_tflops": 36.110398136597055, "iter_time": 0.45137367248535154, "loss": 0.06051252782344818, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 39.7265335434799, "step_time": 0.4102870693206787} +{"epoch": 0, "iter": 621, "iter_tflops": 25.20597816191974, "iter_time": 0.8185000152587889, "loss": 0.7095669507980347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.266921130845077, "step_time": 0.7566345100402831} +{"epoch": 0, "iter": 622, "iter_tflops": 27.31888780263435, "iter_time": 0.7551952209472657, "loss": 0.8176822662353516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.45621415646621, "step_time": 0.6166595363616943} +{"epoch": 0, "iter": 623, "iter_tflops": 40.92895013575492, "iter_time": 0.5040709190368652, "loss": 0.7201006412506104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62788939805928, "step_time": 0.4622914905548096} +{"epoch": 0, "iter": 624, "iter_tflops": 45.53791406798656, "iter_time": 0.45305310821533207, "loss": 0.8025088310241699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49153771248401, "step_time": 0.41686103248596196} +{"epoch": 0, "iter": 625, "iter_tflops": 36.51949904672139, "iter_time": 0.5649336395263671, "loss": 0.9059610366821289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.045707605470085, "step_time": 0.5151886367797852} +{"epoch": 0, "iter": 626, "iter_tflops": 39.03594113670083, "iter_time": 0.5285153350830079, "loss": 0.7960159778594971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.5201135605898, "step_time": 0.4852078647613526} +{"epoch": 0, "iter": 627, "iter_tflops": 35.07091461745805, "iter_time": 0.5882679061889649, "loss": 0.7702622413635254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.93768492833342, "step_time": 0.543815299987793} +{"epoch": 0, "iter": 628, "iter_tflops": 37.90894221236691, "iter_time": 0.5442276229858398, "loss": 0.8976344466209412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35062227542211, "step_time": 0.4989306659698486} +{"epoch": 0, "iter": 629, "iter_tflops": 18.089986948690466, "iter_time": 1.1404703369140625, "loss": 0.7535009384155273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.242285598915192, "step_time": 1.0721747894287108} +{"epoch": 0, "iter": 630, "iter_tflops": 27.01827443770013, "iter_time": 0.7635977478027344, "loss": 0.741110622882843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.39449356081168, "step_time": 0.6177992630004883} +{"epoch": 0, "iter": 631, "iter_tflops": 42.19781257225797, "iter_time": 0.4889138145446778, "loss": 0.664652407169342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.086323297245364, "step_time": 0.457590950012207} +{"epoch": 0, "iter": 632, "iter_tflops": 44.43257417453297, "iter_time": 0.464323616027832, "loss": 0.9246011972427368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.97306407656764, "step_time": 0.4300557804107666} +{"epoch": 0, "iter": 633, "iter_tflops": 30.033948460343957, "iter_time": 0.68692578125, "loss": 0.395794540643692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.041537621327564, "step_time": 0.6438858757019043} +{"epoch": 0, "iter": 634, "iter_tflops": 17.728102636869018, "iter_time": 1.1637507934570313, "loss": 0.5239349007606506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.79842185718247, "step_time": 1.0420574760437011} +{"epoch": 0, "iter": 635, "iter_tflops": 51.204601610233325, "iter_time": 0.4029148330688477, "loss": 0.4741162955760956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68309354644915, "step_time": 0.37050911140441894} +{"epoch": 0, "iter": 636, "iter_tflops": 54.2784693322434, "iter_time": 0.3800971870422364, "loss": 0.43483561277389526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.58024217840237, "step_time": 0.3521851863861084} +{"epoch": 0, "iter": 637, "iter_tflops": 36.0735802432326, "iter_time": 0.5719169921874999, "loss": 1.2178380489349365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.787980430196654, "step_time": 0.5318939857482909} +{"epoch": 0, "iter": 638, "iter_tflops": 15.534543278551023, "iter_time": 1.3280785369873045, "loss": 1.06952965259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.531316563684108, "step_time": 1.0563083877563477} +{"epoch": 0, "iter": 639, "iter_tflops": 42.424510234894456, "iter_time": 0.48630127716064453, "loss": 1.0290359258651733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.600171573420006, "step_time": 0.4524345588684082} +{"epoch": 0, "iter": 640, "iter_tflops": 41.287128702247344, "iter_time": 0.4996979484558105, "loss": 1.329809546470642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37855228313548, "step_time": 0.46488883590698243} +{"epoch": 0, "iter": 641, "iter_tflops": 31.01185905278454, "iter_time": 0.6652646484375, "loss": 0.1979806274175644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00739695276842, "step_time": 0.6250445480346679} +{"epoch": 0, "iter": 642, "iter_tflops": 12.222350453477262, "iter_time": 1.6879808502197267, "loss": 0.09790772199630737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.679921832275031, "step_time": 1.4053953247070314} +{"epoch": 0, "iter": 643, "iter_tflops": 13.67833295874366, "iter_time": 1.508304672241211, "loss": 0.13507966697216034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.688997054312427, "step_time": 1.2362093086242678} +{"epoch": 0, "iter": 644, "iter_tflops": 17.336622332021193, "iter_time": 1.1900295867919923, "loss": 0.1538386046886444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.714544732049404, "step_time": 0.9959713706970215} +{"epoch": 0, "iter": 645, "iter_tflops": 17.438915836825668, "iter_time": 0.9064514999389648, "loss": 0.6482340097427368, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 18.650223826724794, "step_time": 0.8475786437988282} +{"epoch": 0, "iter": 646, "iter_tflops": 6.894009308819945, "iter_time": 2.292937347412109, "loss": 0.6154623031616211, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 8.50913780485105, "step_time": 1.8577124710083008} +{"epoch": 0, "iter": 647, "iter_tflops": 8.58395778494752, "iter_time": 1.8415201721191405, "loss": 0.6193014979362488, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 10.561227618308674, "step_time": 1.4967513236999515} +{"epoch": 0, "iter": 648, "iter_tflops": 23.6742940307118, "iter_time": 0.6677086715698242, "loss": 0.36519500613212585, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.47443127456633, "step_time": 0.6205253906250001} +{"epoch": 0, "iter": 649, "iter_tflops": 15.915419061963314, "iter_time": 1.1196560058593752, "loss": 0.4654262661933899, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 17.045648949422777, "step_time": 1.0454160232543945} +{"epoch": 0, "iter": 650, "iter_tflops": 14.096441733407923, "iter_time": 1.2641342315673827, "loss": 0.6721711158752441, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 16.321399658992313, "step_time": 1.0918055381774903} +{"epoch": 0, "iter": 651, "iter_tflops": 30.739879494935717, "iter_time": 0.579696304321289, "loss": 0.4926963448524475, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 33.03752224719104, "step_time": 0.5393804779052734} +{"epoch": 0, "iter": 652, "iter_tflops": 34.40895948743325, "iter_time": 0.5178824005126952, "loss": 0.5101708769798279, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 36.41828100945747, "step_time": 0.48930905151367193} +{"epoch": 0, "iter": 653, "iter_tflops": 41.40766131625677, "iter_time": 0.49824338912963867, "loss": 0.6456246972084045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17738858979347, "step_time": 0.456668571472168} +{"epoch": 0, "iter": 654, "iter_tflops": 44.67520432129251, "iter_time": 0.46180188369750974, "loss": 0.7242870926856995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.111615182570624, "step_time": 0.4288173122406006} +{"epoch": 0, "iter": 655, "iter_tflops": 41.14771793162371, "iter_time": 0.5013909530639649, "loss": 0.8265038728713989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87950231591713, "step_time": 0.4701761054992676} +{"epoch": 0, "iter": 656, "iter_tflops": 49.958024763086875, "iter_time": 0.4129685592651367, "loss": 0.7925403714179993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3050530856038, "step_time": 0.3799111194610596} +{"epoch": 0, "iter": 657, "iter_tflops": 27.22750472842813, "iter_time": 0.757729866027832, "loss": 1.0926183462142944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.796527621161605, "step_time": 0.7164437942504883} +{"epoch": 0, "iter": 658, "iter_tflops": 17.334111668086596, "iter_time": 1.1902019500732421, "loss": 1.04261314868927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.88074370868874, "step_time": 0.9880439987182617} +{"epoch": 0, "iter": 659, "iter_tflops": 44.488793155952116, "iter_time": 0.4637368659973145, "loss": 0.9260590076446533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70589567375382, "step_time": 0.43246423149108887} +{"epoch": 0, "iter": 660, "iter_tflops": 46.30297633718745, "iter_time": 0.44556732940673827, "loss": 1.3625355958938599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71165578620715, "step_time": 0.41501521492004395} +{"epoch": 0, "iter": 661, "iter_tflops": 35.55469449326517, "iter_time": 0.580263557434082, "loss": 0.04270336031913757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14989333027826, "step_time": 0.5407903327941894} +{"epoch": 0, "iter": 662, "iter_tflops": 31.090181754811017, "iter_time": 0.6635887069702149, "loss": 0.05903850123286247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85286855081066, "step_time": 0.5176815185546875} +{"epoch": 0, "iter": 663, "iter_tflops": 48.22698626206496, "iter_time": 0.4277914733886719, "loss": 0.06795493513345718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96597439075201, "step_time": 0.3895159816741944} +{"epoch": 0, "iter": 664, "iter_tflops": 44.82332257584113, "iter_time": 0.4602758636474609, "loss": 0.08587920665740967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48082063409914, "step_time": 0.4169513206481933} +{"epoch": 0, "iter": 665, "iter_tflops": 15.405870728889505, "iter_time": 1.3391708831787108, "loss": 0.9506700038909912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.390504520623153, "step_time": 1.2587222976684571} +{"epoch": 0, "iter": 666, "iter_tflops": 17.95406480780495, "iter_time": 1.1491043243408203, "loss": 0.9292234778404236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.67832782616614, "step_time": 0.9516921081542968} +{"epoch": 0, "iter": 667, "iter_tflops": 39.94932183079415, "iter_time": 0.5164316329956053, "loss": 1.2541223764419556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08501083656829, "step_time": 0.4788461952209473} +{"epoch": 0, "iter": 668, "iter_tflops": 34.42071138345772, "iter_time": 0.5993802185058593, "loss": 0.982951283454895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.076386792112075, "step_time": 0.5564483299255372} +{"epoch": 0, "iter": 669, "iter_tflops": 19.697780417409028, "iter_time": 1.047381637573242, "loss": 1.2234526872634888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.064837891331827, "step_time": 0.9794090805053712} +{"epoch": 0, "iter": 670, "iter_tflops": 18.451174116061143, "iter_time": 1.1181452941894532, "loss": 1.1070187091827393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.449938043792088, "step_time": 0.9189822025299073} +{"epoch": 0, "iter": 671, "iter_tflops": 39.61892477004895, "iter_time": 0.5207383499145507, "loss": 1.0808019638061523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.311901650555434, "step_time": 0.4763377437591553} +{"epoch": 0, "iter": 672, "iter_tflops": 42.44435351961501, "iter_time": 0.48607392501831054, "loss": 1.1115989685058594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.12540253628082, "step_time": 0.44728267669677735} +{"epoch": 0, "iter": 673, "iter_tflops": 29.43412886547731, "iter_time": 0.7009242095947266, "loss": 0.9890761375427246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.373947168672597, "step_time": 0.6575867996215821} +{"epoch": 0, "iter": 674, "iter_tflops": 16.00043416713974, "iter_time": 1.2894083557128906, "loss": 1.1462377309799194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.173139876510195, "step_time": 0.9304543075561525} +{"epoch": 0, "iter": 675, "iter_tflops": 43.4904165478271, "iter_time": 0.47438252258300784, "loss": 1.2151036262512207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71640217883631, "step_time": 0.44162419509887696} +{"epoch": 0, "iter": 676, "iter_tflops": 45.40297605526606, "iter_time": 0.45439958572387706, "loss": 1.1160484552383423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75473051341052, "step_time": 0.4231608562469482} +{"epoch": 0, "iter": 677, "iter_tflops": 26.219117806012097, "iter_time": 0.7868721466064452, "loss": 0.15368564426898956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.604642364528754, "step_time": 0.7473776779174806} +{"epoch": 0, "iter": 678, "iter_tflops": 14.150545943339322, "iter_time": 1.4579715576171877, "loss": 0.19315671920776367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.386357397817747, "step_time": 1.1866254119873048} +{"epoch": 0, "iter": 679, "iter_tflops": 32.567899586744296, "iter_time": 0.6334794006347657, "loss": 0.19581305980682373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.93951798576907, "step_time": 0.5585100898742676} +{"epoch": 0, "iter": 680, "iter_tflops": 49.86446224884905, "iter_time": 0.41374342727661134, "loss": 0.1657063066959381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.996512338736416, "step_time": 0.38208196449279785} +{"epoch": 0, "iter": 681, "iter_tflops": 35.34308156858233, "iter_time": 0.5837378234863282, "loss": 0.03658706322312355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.94270596931208, "step_time": 0.543743335723877} +{"epoch": 0, "iter": 682, "iter_tflops": 8.349864070589348, "iter_time": 2.4708298645019533, "loss": 0.04908427596092224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.885144572344139, "step_time": 2.0870806045532224} +{"epoch": 0, "iter": 683, "iter_tflops": 9.821891783812955, "iter_time": 2.1005213623046877, "loss": 0.05287777632474899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.044871365958013, "step_time": 1.7128529548645017} +{"epoch": 0, "iter": 684, "iter_tflops": 40.03691661702216, "iter_time": 0.5153017578125, "loss": 0.05415186285972595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.40275307287169, "step_time": 0.4646354579925537} +{"epoch": 0, "iter": 685, "iter_tflops": 16.33657192144686, "iter_time": 0.9250439147949219, "loss": 0.46479901671409607, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.060043132805514, "step_time": 0.8858152542114258} +{"epoch": 0, "iter": 686, "iter_tflops": 11.12972550316834, "iter_time": 1.3578094482421874, "loss": 0.6479557752609253, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.946296010597473, "step_time": 1.0110897331237791} +{"epoch": 0, "iter": 687, "iter_tflops": 26.352431148830128, "iter_time": 0.5734592895507813, "loss": 0.5621142387390137, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.03095825979558, "step_time": 0.5391198654174805} +{"epoch": 0, "iter": 688, "iter_tflops": 27.749831048291625, "iter_time": 0.5445815658569335, "loss": 0.6785753965377808, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.50652526966963, "step_time": 0.5121594734191894} +{"epoch": 0, "iter": 689, "iter_tflops": 18.82397260806036, "iter_time": 1.096001037597656, "loss": 0.8429580926895142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.593498248459955, "step_time": 1.0529561004638672} +{"epoch": 0, "iter": 690, "iter_tflops": 19.03652862070982, "iter_time": 1.083763427734375, "loss": 1.2428882122039795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.64687458142549, "step_time": 0.8044291496276855} +{"epoch": 0, "iter": 691, "iter_tflops": 45.30877590816388, "iter_time": 0.45534431457519536, "loss": 0.8556256294250488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.893256090709485, "step_time": 0.42196194648742674} +{"epoch": 0, "iter": 692, "iter_tflops": 47.07178359198629, "iter_time": 0.43829003143310546, "loss": 1.015012264251709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96043333239962, "step_time": 0.40484533119201666} +{"epoch": 0, "iter": 693, "iter_tflops": 27.95382877954828, "iter_time": 0.7380417785644531, "loss": 0.28263023495674133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.909628820133946, "step_time": 0.6897809944152833} +{"epoch": 0, "iter": 694, "iter_tflops": 12.003223389053085, "iter_time": 1.7187960968017577, "loss": 0.3441516160964966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.867606747682975, "step_time": 1.3876539688110352} +{"epoch": 0, "iter": 695, "iter_tflops": 53.3111772786191, "iter_time": 0.3869937705993652, "loss": 0.4180357754230499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.96885449261028, "step_time": 0.3558996238708496} +{"epoch": 0, "iter": 696, "iter_tflops": 53.55900040372075, "iter_time": 0.38520310974121097, "loss": 0.38561296463012695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.690852173948564, "step_time": 0.35761464309692387} +{"epoch": 0, "iter": 697, "iter_tflops": 25.593801096154856, "iter_time": 0.8060972824096679, "loss": 0.13972359895706177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.030006960334077, "step_time": 0.7632663040161133} +{"epoch": 0, "iter": 698, "iter_tflops": 18.321818357078385, "iter_time": 1.1260396270751951, "loss": 0.1132977083325386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.050312280173443, "step_time": 0.857830587387085} +{"epoch": 0, "iter": 699, "iter_tflops": 40.91685645617426, "iter_time": 0.5042199058532715, "loss": 0.16914382576942444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.922053615122195, "step_time": 0.45926425552368166} +{"epoch": 0, "iter": 700, "iter_tflops": 43.54385590118629, "iter_time": 0.47380033493041995, "loss": 0.09359589219093323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.974923300163205, "step_time": 0.4300391139984131} +{"epoch": 0, "iter": 701, "iter_tflops": 19.379666397199017, "iter_time": 1.064574234008789, "loss": 0.9064266681671143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.715304116767243, "step_time": 0.9959348602294922} +{"epoch": 0, "iter": 702, "iter_tflops": 26.232915807832537, "iter_time": 0.7864582672119141, "loss": 0.7046473026275635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.79792173053897, "step_time": 0.6104249153137207} +{"epoch": 0, "iter": 703, "iter_tflops": 41.12981830397192, "iter_time": 0.5016091575622559, "loss": 0.8254305124282837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.00630658485961, "step_time": 0.45840450096130375} +{"epoch": 0, "iter": 704, "iter_tflops": 43.80106230231965, "iter_time": 0.4710181083679199, "loss": 0.7601540684700012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.478273410138755, "step_time": 0.43453756904602053} +{"epoch": 0, "iter": 705, "iter_tflops": 18.02498118827378, "iter_time": 1.1445833587646486, "loss": 0.21762017905712128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.136130609346075, "step_time": 1.078122528076172} +{"epoch": 0, "iter": 706, "iter_tflops": 34.45521134510902, "iter_time": 0.5987800598144533, "loss": 0.2910533547401428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.47893949616686, "step_time": 0.5361658554077149} +{"epoch": 0, "iter": 707, "iter_tflops": 52.399653596638444, "iter_time": 0.3937257614135743, "loss": 0.24543899297714233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.06332082299273, "step_time": 0.36154736900329587} +{"epoch": 0, "iter": 708, "iter_tflops": 55.19037548910662, "iter_time": 0.37381687164306643, "loss": 0.20343180000782013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.74422793245272, "step_time": 0.3453236274719238} +{"epoch": 0, "iter": 709, "iter_tflops": 40.9872296662103, "iter_time": 0.5033541831970214, "loss": 1.0101324319839478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55667468073915, "step_time": 0.46303036880493165} +{"epoch": 0, "iter": 710, "iter_tflops": 35.563997881088724, "iter_time": 0.5801117630004883, "loss": 1.1553648710250854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.694356107000814, "step_time": 0.5197487888336182} +{"epoch": 0, "iter": 711, "iter_tflops": 42.30741171656904, "iter_time": 0.4876472625732422, "loss": 1.1952763795852661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.133877810148995, "step_time": 0.44720050621032714} +{"epoch": 0, "iter": 712, "iter_tflops": 42.90106042154411, "iter_time": 0.4808993835449219, "loss": 0.9993749856948853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79202155399886, "step_time": 0.4409104976654053} +{"epoch": 0, "iter": 713, "iter_tflops": 17.417088885380455, "iter_time": 1.1845316772460939, "loss": 0.8610668778419495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.516444161198397, "step_time": 1.114203857421875} +{"epoch": 0, "iter": 714, "iter_tflops": 21.24923146819271, "iter_time": 0.9709101028442383, "loss": 0.7810499668121338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.77412833896924, "step_time": 0.8677960014343262} +{"epoch": 0, "iter": 715, "iter_tflops": 41.919989799679094, "iter_time": 0.4921540679931641, "loss": 0.7857491374015808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7400432801025, "step_time": 0.45105102729797364} +{"epoch": 0, "iter": 716, "iter_tflops": 36.113627753617315, "iter_time": 0.5712827758789062, "loss": 0.7996947765350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.148382258530994, "step_time": 0.526997344970703} +{"epoch": 0, "iter": 717, "iter_tflops": 23.446793651074486, "iter_time": 0.8799110794067383, "loss": 1.1977540254592896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.42100302685458, "step_time": 0.8115766906738281} +{"epoch": 0, "iter": 718, "iter_tflops": 21.58773596498786, "iter_time": 0.9556858367919923, "loss": 1.1460816860198975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.38109842746633, "step_time": 0.7820407314300537} +{"epoch": 0, "iter": 719, "iter_tflops": 47.97175964212321, "iter_time": 0.43006747436523435, "loss": 0.9081206917762756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75101530168192, "step_time": 0.39866065216064456} +{"epoch": 0, "iter": 720, "iter_tflops": 46.830646836268386, "iter_time": 0.4405468406677246, "loss": 1.1463890075683594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.01911030485382, "step_time": 0.41246422386169435} +{"epoch": 0, "iter": 721, "iter_tflops": 26.61929597847676, "iter_time": 0.7750427932739258, "loss": 0.37550732493400574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.05570029233377, "step_time": 0.735361915588379} +{"epoch": 0, "iter": 722, "iter_tflops": 14.927088644765552, "iter_time": 1.3821244049072265, "loss": 0.39634761214256287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.885204904314218, "step_time": 1.153528495788574} +{"epoch": 0, "iter": 723, "iter_tflops": 42.0005262270376, "iter_time": 0.4912103576660156, "loss": 0.340650737285614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.235024356966434, "step_time": 0.4462221832275391} +{"epoch": 0, "iter": 724, "iter_tflops": 41.80791601371611, "iter_time": 0.4934733772277832, "loss": 0.4059266448020935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63380448967667, "step_time": 0.45210110664367675} +{"epoch": 0, "iter": 725, "iter_tflops": 24.312246228841175, "iter_time": 0.8485885391235352, "loss": 0.4411941468715668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.250179031796304, "step_time": 0.7859410591125489} +{"epoch": 0, "iter": 726, "iter_tflops": 24.347615726426607, "iter_time": 0.8473558044433595, "loss": 0.5079391598701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.780017311500146, "step_time": 0.6927831268310547} +{"epoch": 0, "iter": 727, "iter_tflops": 39.36741225191336, "iter_time": 0.5240652694702148, "loss": 0.5875071883201599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08343201823583, "step_time": 0.47886374282836913} +{"epoch": 0, "iter": 728, "iter_tflops": 42.49377509895244, "iter_time": 0.48550860595703127, "loss": 0.5730838179588318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4781358136488, "step_time": 0.44388814544677735} +{"epoch": 0, "iter": 729, "iter_tflops": 19.76714766360613, "iter_time": 1.0437061462402344, "loss": 0.19763314723968506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.279996540642173, "step_time": 0.9695064315795898} +{"epoch": 0, "iter": 730, "iter_tflops": 21.103526259893762, "iter_time": 0.9776135635375975, "loss": 0.19371268153190613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.537521968916128, "step_time": 0.8078737449645996} +{"epoch": 0, "iter": 731, "iter_tflops": 47.653526476713935, "iter_time": 0.4329394912719726, "loss": 0.1572239249944687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68811749421872, "step_time": 0.3991457710266113} +{"epoch": 0, "iter": 732, "iter_tflops": 47.318367273737806, "iter_time": 0.4360060310363769, "loss": 0.24279075860977173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05886720318121, "step_time": 0.40406484985351565} +{"epoch": 0, "iter": 733, "iter_tflops": 8.630803331182527, "iter_time": 0.8518130798339842, "loss": 0.009619379416108131, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 9.079210544257085, "step_time": 0.8097434387207032} +{"epoch": 0, "iter": 734, "iter_tflops": 5.97366308551101, "iter_time": 1.2307073669433595, "loss": 0.00128291139844805, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 7.251968527813258, "step_time": 1.0137704181671143} +{"epoch": 0, "iter": 735, "iter_tflops": 17.056042331521542, "iter_time": 0.43103968811035154, "loss": 0.010765871033072472, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 18.916182922670387, "step_time": 0.38865299606323245} +{"epoch": 0, "iter": 736, "iter_tflops": 17.148875713365538, "iter_time": 0.4287063064575195, "loss": 0.008535118773579597, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 18.8913970505606, "step_time": 0.3891629161834717} +{"epoch": 0, "iter": 737, "iter_tflops": 25.732764202161555, "iter_time": 0.8017441635131836, "loss": 0.06180461123585701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.230405391574042, "step_time": 0.7308110961914062} +{"epoch": 0, "iter": 738, "iter_tflops": 42.88077600899205, "iter_time": 0.4811268692016602, "loss": 0.06979092955589294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60691456269564, "step_time": 0.4426616458892822} +{"epoch": 0, "iter": 739, "iter_tflops": 54.37675605222813, "iter_time": 0.37941015625, "loss": 0.08740046620368958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.18187454331182, "step_time": 0.3486049346923828} +{"epoch": 0, "iter": 740, "iter_tflops": 50.62964090715794, "iter_time": 0.40749041748046877, "loss": 0.02897525765001774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49403440147274, "step_time": 0.37177137565612794} +{"epoch": 0, "iter": 741, "iter_tflops": 48.2247711807203, "iter_time": 0.42781112289428713, "loss": 0.15662023425102234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.00287578239462, "step_time": 0.38924479484558105} +{"epoch": 0, "iter": 742, "iter_tflops": 42.05351676447093, "iter_time": 0.4905913963317871, "loss": 0.2096228003501892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30958362055584, "step_time": 0.4455037574768067} +{"epoch": 0, "iter": 743, "iter_tflops": 42.7460241011873, "iter_time": 0.48264356613159176, "loss": 0.17625702917575836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.792016695941385, "step_time": 0.4409105434417725} +{"epoch": 0, "iter": 744, "iter_tflops": 42.82291723270502, "iter_time": 0.48177692794799803, "loss": 0.19865792989730835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85632340460238, "step_time": 0.44030542755126956} +{"epoch": 0, "iter": 745, "iter_tflops": 1.553915813205491, "iter_time": 1.0338648376464845, "loss": 1.0099966526031494, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.6589655839362887, "step_time": 0.9683979797363282} +{"epoch": 0, "iter": 746, "iter_tflops": 1.1169746054571288, "iter_time": 1.4382949371337892, "loss": 1.0890374183654785, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.451798449130979, "step_time": 1.1065853672027588} +{"epoch": 0, "iter": 747, "iter_tflops": 3.209285862169103, "iter_time": 0.5005907821655273, "loss": 0.8109326958656311, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.5002282211448916, "step_time": 0.45898119163513185} +{"epoch": 0, "iter": 748, "iter_tflops": 3.215031210733038, "iter_time": 0.4996962127685547, "loss": 0.943299412727356, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.4974900761238006, "step_time": 0.4593405227661133} +{"epoch": 0, "iter": 749, "iter_tflops": 19.21023793709834, "iter_time": 1.0739634552001953, "loss": 0.012534655630588531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.529897711016538, "step_time": 1.0049291915893555} +{"epoch": 0, "iter": 750, "iter_tflops": 17.692572769040876, "iter_time": 1.1660878143310547, "loss": 0.03189617395401001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.94384451424412, "step_time": 0.9401767997741699} +{"epoch": 0, "iter": 751, "iter_tflops": 46.151422731015145, "iter_time": 0.4470304985046386, "loss": 0.00792335532605648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.378878607524015, "step_time": 0.4015481472015381} +{"epoch": 0, "iter": 752, "iter_tflops": 46.0002965273472, "iter_time": 0.4484991416931152, "loss": 0.028159799054265022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6352928760131, "step_time": 0.4074449329376221} +{"epoch": 0, "iter": 753, "iter_tflops": 20.065284988917735, "iter_time": 1.0281983795166016, "loss": 0.173158198595047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.403598452145204, "step_time": 0.9639077072143554} +{"epoch": 0, "iter": 754, "iter_tflops": 14.929920386132197, "iter_time": 1.3818622589111327, "loss": 0.1437375843524933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.061975062171655, "step_time": 1.1422390651702883} +{"epoch": 0, "iter": 755, "iter_tflops": 50.78024008483099, "iter_time": 0.4062819213867187, "loss": 0.08499796688556671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.039628492943905, "step_time": 0.37484071159362786} +{"epoch": 0, "iter": 756, "iter_tflops": 56.25338316600207, "iter_time": 0.36675293731689457, "loss": 0.14785341918468475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.02390761977496, "step_time": 0.3380821437835693} +{"epoch": 0, "iter": 757, "iter_tflops": 30.74710534027567, "iter_time": 0.670993034362793, "loss": 0.7635672092437744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.550333760510476, "step_time": 0.6338212585449219} +{"epoch": 0, "iter": 758, "iter_tflops": 17.680130166775925, "iter_time": 1.1669084625244142, "loss": 0.759468674659729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.59039744959351, "step_time": 0.9132682838439943} +{"epoch": 0, "iter": 759, "iter_tflops": 49.54516240651239, "iter_time": 0.41640984725952146, "loss": 0.6572378873825073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84592968197174, "step_time": 0.38315047454833984} +{"epoch": 0, "iter": 760, "iter_tflops": 47.3924934811603, "iter_time": 0.4353240776062011, "loss": 0.8720169067382812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18653080806283, "step_time": 0.4030570774078369} +{"epoch": 0, "iter": 761, "iter_tflops": 23.388883647509278, "iter_time": 0.8820897064208983, "loss": 0.9358811974525452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.529198674220147, "step_time": 0.8410830612182617} +{"epoch": 0, "iter": 762, "iter_tflops": 15.879890433316199, "iter_time": 1.2991962127685548, "loss": 0.7443644404411316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.353324789665514, "step_time": 1.0136473388671876} +{"epoch": 0, "iter": 763, "iter_tflops": 46.357436286636094, "iter_time": 0.4450438842773437, "loss": 0.8146141767501831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.051603915380774, "step_time": 0.4121964511871338} +{"epoch": 0, "iter": 764, "iter_tflops": 53.18795402013059, "iter_time": 0.3878903388977051, "loss": 1.0588018894195557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.499341006417374, "step_time": 0.35880573844909663} +{"epoch": 0, "iter": 765, "iter_tflops": 44.82873768168501, "iter_time": 0.46022026443481445, "loss": 0.03038300760090351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.028580590149176, "step_time": 0.42079728317260745} +{"epoch": 0, "iter": 766, "iter_tflops": 31.543481012453142, "iter_time": 0.6540525283813476, "loss": 0.059829723089933395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.534873048429176, "step_time": 0.4340201663970948} +{"epoch": 0, "iter": 767, "iter_tflops": 52.18364474636615, "iter_time": 0.39535554885864255, "loss": 0.036383867263793945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.14557453783195, "step_time": 0.3610269680023193} +{"epoch": 0, "iter": 768, "iter_tflops": 56.0015099755657, "iter_time": 0.3684024505615235, "loss": 0.08827763050794601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.10297016810341, "step_time": 0.3376446914672852} +{"epoch": 0, "iter": 769, "iter_tflops": 30.642210562541038, "iter_time": 0.6732899856567383, "loss": 0.0045636314898729324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.586961045286365, "step_time": 0.6331088523864747} +{"epoch": 0, "iter": 770, "iter_tflops": 13.97703922525929, "iter_time": 1.4760703735351561, "loss": 0.011674805544316769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.204514867582457, "step_time": 1.2731694641113283} +{"epoch": 0, "iter": 771, "iter_tflops": 59.89448863970434, "iter_time": 0.34445729446411133, "loss": 0.008030731230974197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.13220733948845, "step_time": 0.3119674110412598} +{"epoch": 0, "iter": 772, "iter_tflops": 67.25514755057947, "iter_time": 0.3067585792541504, "loss": 0.0043703243136405945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 73.55390085478192, "step_time": 0.2804894542694092} +{"epoch": 0, "iter": 773, "iter_tflops": 46.751538587986474, "iter_time": 0.37851736450195317, "loss": 0.004645403474569321, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 51.607916404057306, "step_time": 0.3428983459472656} +{"epoch": 0, "iter": 774, "iter_tflops": 44.258038046064364, "iter_time": 0.39984305572509765, "loss": 0.029003333300352097, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 48.823742810623955, "step_time": 0.36245212173461905} +{"epoch": 0, "iter": 775, "iter_tflops": 51.51182879622981, "iter_time": 0.343537971496582, "loss": 0.01263091154396534, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 56.86059825966199, "step_time": 0.3112220010757446} +{"epoch": 0, "iter": 776, "iter_tflops": 51.28925101895553, "iter_time": 0.34502880859375, "loss": 0.0019553222227841616, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 56.28707825905685, "step_time": 0.31439310264587406} +{"epoch": 0, "iter": 777, "iter_tflops": 28.838809065915942, "iter_time": 0.7153933944702148, "loss": 1.0638107061386108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.381296130940868, "step_time": 0.6790721969604492} +{"epoch": 0, "iter": 778, "iter_tflops": 10.270217759901609, "iter_time": 2.0088272705078123, "loss": 0.9588099718093872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.257884891188532, "step_time": 1.6830875549316409} +{"epoch": 0, "iter": 779, "iter_tflops": 16.659824188556428, "iter_time": 1.2383740234375, "loss": 0.9386553764343262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.50471013637433, "step_time": 0.9593755683898926} +{"epoch": 0, "iter": 780, "iter_tflops": 43.722145458850896, "iter_time": 0.471868278503418, "loss": 0.9522782564163208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10171782524039, "step_time": 0.43801148796081546} +{"epoch": 0, "iter": 781, "iter_tflops": 22.96940530405575, "iter_time": 0.7167513046264649, "loss": 0.4165007174015045, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 24.518774750816757, "step_time": 0.671458969116211} +{"epoch": 0, "iter": 782, "iter_tflops": 10.940812280852684, "iter_time": 1.5047649841308595, "loss": 0.31197914481163025, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 12.473345153716059, "step_time": 1.3198825988769531} +{"epoch": 0, "iter": 783, "iter_tflops": 30.07314438260024, "iter_time": 0.5474436264038086, "loss": 0.5726213455200195, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.999936928997165, "step_time": 0.5144807395935058} +{"epoch": 0, "iter": 784, "iter_tflops": 29.759996089740227, "iter_time": 0.5532040786743164, "loss": 0.4626636505126953, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.693227935739998, "step_time": 0.5194595909118653} +{"epoch": 0, "iter": 785, "iter_tflops": 28.326090686498414, "iter_time": 0.7283424224853515, "loss": 0.2543029487133026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.336208149298017, "step_time": 0.680081485748291} +{"epoch": 0, "iter": 786, "iter_tflops": 13.113840292156091, "iter_time": 1.573230499267578, "loss": 0.31242263317108154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.721974337803747, "step_time": 1.1641532211303711} +{"epoch": 0, "iter": 787, "iter_tflops": 44.93958699173347, "iter_time": 0.45908507156372064, "loss": 0.3587796986103058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3865352802086, "step_time": 0.42638088035583493} +{"epoch": 0, "iter": 788, "iter_tflops": 49.387507809815226, "iter_time": 0.4177391090393066, "loss": 0.3057902455329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.456862214865794, "step_time": 0.38593910408020016} +{"epoch": 0, "iter": 789, "iter_tflops": 24.284989770695137, "iter_time": 0.6728556671142578, "loss": 0.12380960583686829, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 25.773370324738938, "step_time": 0.6339990768432617} +{"epoch": 0, "iter": 790, "iter_tflops": 13.54386733492776, "iter_time": 1.2064717254638673, "loss": 0.11566890776157379, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 16.023798461899478, "step_time": 1.0197515296936035} +{"epoch": 0, "iter": 791, "iter_tflops": 41.48317870732787, "iter_time": 0.39390166091918943, "loss": 0.08094920217990875, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 45.239703491656826, "step_time": 0.36119363594055176} +{"epoch": 0, "iter": 792, "iter_tflops": 42.539628042159016, "iter_time": 0.38411931991577153, "loss": 0.18701577186584473, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 46.42094633126929, "step_time": 0.3520025825500488} +{"epoch": 0, "iter": 793, "iter_tflops": 2.6580071373463263, "iter_time": 0.604414825439453, "loss": 1.112372636795044, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.8513157605129016, "step_time": 0.5634377441406251} +{"epoch": 0, "iter": 794, "iter_tflops": 2.7419587728518535, "iter_time": 0.5859092178344727, "loss": 1.1664987802505493, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.459645964721256, "step_time": 0.4643651218414307} +{"epoch": 0, "iter": 795, "iter_tflops": 3.7389766000925726, "iter_time": 0.4296734352111816, "loss": 1.1623526811599731, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.058080989484489, "step_time": 0.39588636207580563} +{"epoch": 0, "iter": 796, "iter_tflops": 3.911084536471713, "iter_time": 0.41076558303833005, "loss": 1.3354873657226562, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.2359607975950695, "step_time": 0.3792619895935059} +{"epoch": 0, "iter": 797, "iter_tflops": 20.658401365971873, "iter_time": 0.9986781234741211, "loss": 0.4498780369758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.55993055878622, "step_time": 0.9569183654785156} +{"epoch": 0, "iter": 798, "iter_tflops": 13.283713169046212, "iter_time": 1.5531119384765628, "loss": 0.4842224419116974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.935265075433414, "step_time": 1.2182326889038086} +{"epoch": 0, "iter": 799, "iter_tflops": 40.300553676272244, "iter_time": 0.5119307708740234, "loss": 0.58039391040802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.983787745876924, "step_time": 0.46906131935119627} +{"epoch": 0, "iter": 800, "iter_tflops": 41.385887161091844, "iter_time": 0.4985055274963379, "loss": 0.6182308793067932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.30740661732214, "step_time": 0.4553580760955811} +{"epoch": 0, "iter": 801, "iter_tflops": 18.471164793588233, "iter_time": 0.9959744262695311, "loss": 0.03194881230592728, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 19.62981510390894, "step_time": 0.93718701171875} +{"epoch": 0, "iter": 802, "iter_tflops": 9.53134963163389, "iter_time": 1.930136703491211, "loss": 0.009665610268712044, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 10.848785221744583, "step_time": 1.695748176574707} +{"epoch": 0, "iter": 803, "iter_tflops": 14.176698437721198, "iter_time": 1.2976792755126954, "loss": 0.01088220439851284, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 16.71607104056617, "step_time": 1.100546157836914} +{"epoch": 0, "iter": 804, "iter_tflops": 28.036155865144178, "iter_time": 0.6561815338134765, "loss": 0.002667528111487627, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 35.884218653135456, "step_time": 0.5126712646484375} +{"epoch": 0, "iter": 805, "iter_tflops": 17.717238480969645, "iter_time": 0.8206876068115234, "loss": 0.5601742267608643, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 18.64421116604431, "step_time": 0.7798837890625} +{"epoch": 0, "iter": 806, "iter_tflops": 12.051114062704611, "iter_time": 1.2065538482666016, "loss": 0.6730489730834961, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 15.646672521760674, "step_time": 0.9292913894653321} +{"epoch": 0, "iter": 807, "iter_tflops": 22.77200832351161, "iter_time": 0.6385171585083008, "loss": 0.674596905708313, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.443593437579818, "step_time": 0.5948519020080567} +{"epoch": 0, "iter": 808, "iter_tflops": 23.038698415748897, "iter_time": 0.6311258468627929, "loss": 0.3954992890357971, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.746013463691607, "step_time": 0.5875822410583497} +{"epoch": 0, "iter": 809, "iter_tflops": 20.15891650811843, "iter_time": 1.023422737121582, "loss": 0.11042895168066025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.16942405637347, "step_time": 0.9745703735351561} +{"epoch": 0, "iter": 810, "iter_tflops": 26.176633180724277, "iter_time": 0.7881492385864258, "loss": 0.1122390627861023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.870800996866176, "step_time": 0.6906776123046875} +{"epoch": 0, "iter": 811, "iter_tflops": 55.335376827489284, "iter_time": 0.3728373184204102, "loss": 0.17669664323329926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.233535783093494, "step_time": 0.3425183868408203} +{"epoch": 0, "iter": 812, "iter_tflops": 53.141482782656006, "iter_time": 0.3882295417785644, "loss": 0.09455779194831848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.815346632379566, "step_time": 0.3568445873260498} +{"epoch": 0, "iter": 813, "iter_tflops": 30.56937291777835, "iter_time": 0.6748942337036133, "loss": 0.6577077507972717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.70362460746731, "step_time": 0.6308503646850586} +{"epoch": 0, "iter": 814, "iter_tflops": 37.9849701922797, "iter_time": 0.5431383361816406, "loss": 0.5677343010902405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37158141421463, "step_time": 0.49867790412902824} +{"epoch": 0, "iter": 815, "iter_tflops": 45.38970873025932, "iter_time": 0.4545324058532715, "loss": 0.7501932382583618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.468779450382065, "step_time": 0.4170528106689454} +{"epoch": 0, "iter": 816, "iter_tflops": 43.72137457531855, "iter_time": 0.47187659835815426, "loss": 0.6729366183280945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.41400826933303, "step_time": 0.43512654304504395} +{"epoch": 0, "iter": 817, "iter_tflops": 18.814789375099238, "iter_time": 1.0965359802246093, "loss": 0.12116573750972748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.162920922216937, "step_time": 1.023219482421875} +{"epoch": 0, "iter": 818, "iter_tflops": 16.39242580863138, "iter_time": 1.2585747680664061, "loss": 0.12230954319238663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.940660487634535, "step_time": 0.9403132381439209} +{"epoch": 0, "iter": 819, "iter_tflops": 55.45318064359945, "iter_time": 0.37204526901245116, "loss": 0.07181768864393234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.495513375210905, "step_time": 0.34103510093688966} +{"epoch": 0, "iter": 820, "iter_tflops": 58.33559478852949, "iter_time": 0.3536621780395508, "loss": 0.11749129742383957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.32372471079661, "step_time": 0.32580353736877443} +{"epoch": 0, "iter": 821, "iter_tflops": 27.866456284811406, "iter_time": 0.7403558349609374, "loss": 0.027510961517691612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.499837231998917, "step_time": 0.6993629608154296} +{"epoch": 0, "iter": 822, "iter_tflops": 14.153310108271366, "iter_time": 1.457686813354492, "loss": 0.014609990641474724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.643305225051353, "step_time": 1.2396031455993652} +{"epoch": 0, "iter": 823, "iter_tflops": 42.53476647100268, "iter_time": 0.48504071426391604, "loss": 0.01823023147881031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23048107389446, "step_time": 0.43681734848022463} +{"epoch": 0, "iter": 824, "iter_tflops": 47.12089804654732, "iter_time": 0.4378331985473632, "loss": 0.030100399628281593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.15472239998005, "step_time": 0.3955747928619384} +{"epoch": 0, "iter": 825, "iter_tflops": 37.71896676016667, "iter_time": 0.5469686813354493, "loss": 0.5706030130386353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36083269462096, "step_time": 0.49880749893188475} +{"epoch": 0, "iter": 826, "iter_tflops": 37.10468354871305, "iter_time": 0.5560239715576172, "loss": 0.5867719054222107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.466754643043224, "step_time": 0.4858175220489502} +{"epoch": 0, "iter": 827, "iter_tflops": 41.70976540152946, "iter_time": 0.4946346092224121, "loss": 0.832788348197937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.56850092295605, "step_time": 0.45274900627136233} +{"epoch": 0, "iter": 828, "iter_tflops": 38.78290284583249, "iter_time": 0.531963623046875, "loss": 0.8188380002975464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43457522209615, "step_time": 0.48618593215942385} +{"epoch": 0, "iter": 829, "iter_tflops": 17.525605250933506, "iter_time": 1.1771972045898438, "loss": 0.13831989467144012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.806251889506004, "step_time": 1.09703377532959} +{"epoch": 0, "iter": 830, "iter_tflops": 20.439776719742408, "iter_time": 1.0093600234985352, "loss": 0.07940849661827087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.49988295930894, "step_time": 0.8090662040710449} +{"epoch": 0, "iter": 831, "iter_tflops": 51.27972018620454, "iter_time": 0.40232461166381833, "loss": 0.08790869265794754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.91802044333213, "step_time": 0.3689525012969971} +{"epoch": 0, "iter": 832, "iter_tflops": 45.108014904615, "iter_time": 0.4573709030151367, "loss": 0.08307979255914688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.922772347287506, "step_time": 0.4217073669433594} +{"epoch": 0, "iter": 833, "iter_tflops": 37.33343694081698, "iter_time": 0.5526170425415039, "loss": 0.7298507690429688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.194131144498854, "step_time": 0.5132862167358399} +{"epoch": 0, "iter": 834, "iter_tflops": 23.548831497974188, "iter_time": 0.8760983963012695, "loss": 0.7482532262802124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.212186536873553, "step_time": 0.7312830390930176} +{"epoch": 0, "iter": 835, "iter_tflops": 48.70306764604269, "iter_time": 0.42360973358154297, "loss": 0.5666559338569641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.391924719113135, "step_time": 0.3937838439941407} +{"epoch": 0, "iter": 836, "iter_tflops": 49.80588977847506, "iter_time": 0.4142299957275391, "loss": 0.6503618955612183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.008378108473266, "step_time": 0.38199802017211915} +{"epoch": 0, "iter": 837, "iter_tflops": 40.49154977637549, "iter_time": 0.5095160255432128, "loss": 0.32397323846817017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80744708917441, "step_time": 0.47094945907592767} +{"epoch": 0, "iter": 838, "iter_tflops": 10.535344018839687, "iter_time": 1.9582743072509767, "loss": 0.37459370493888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.428778241817694, "step_time": 1.6599454193115233} +{"epoch": 0, "iter": 839, "iter_tflops": 13.817667691275147, "iter_time": 1.49309521484375, "loss": 0.3967149257659912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.112977312584093, "step_time": 1.205581771850586} +{"epoch": 0, "iter": 840, "iter_tflops": 18.44328701199137, "iter_time": 1.1186234588623047, "loss": 0.415595680475235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.422075292086895, "step_time": 0.920124174118042} +{"epoch": 0, "iter": 841, "iter_tflops": 25.53619411217489, "iter_time": 0.5997941284179689, "loss": 0.4112105667591095, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 27.293379932434856, "step_time": 0.5611785469055176} +{"epoch": 0, "iter": 842, "iter_tflops": 12.543617404677928, "iter_time": 1.2210559997558594, "loss": 0.7077013254165649, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 14.948768590603086, "step_time": 1.02459672164917} +{"epoch": 0, "iter": 843, "iter_tflops": 23.705340607882373, "iter_time": 0.6461185073852539, "loss": 0.630138635635376, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.474086130395776, "step_time": 0.601256477355957} +{"epoch": 0, "iter": 844, "iter_tflops": 22.181799520370873, "iter_time": 0.6904966964721679, "loss": 0.3936345875263214, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 23.934692911625092, "step_time": 0.6399271278381347} +{"epoch": 0, "iter": 845, "iter_tflops": 18.121641325500406, "iter_time": 1.1384781951904297, "loss": 1.2966915369033813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.220268031532637, "step_time": 1.0734030075073242} +{"epoch": 0, "iter": 846, "iter_tflops": 19.201795868694834, "iter_time": 1.0744356231689451, "loss": 1.1240768432617188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.435125417612802, "step_time": 0.7255495872497558} +{"epoch": 0, "iter": 847, "iter_tflops": 37.43169325964939, "iter_time": 0.5511664505004883, "loss": 1.0731935501098633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.55841260031527, "step_time": 0.5086760597229004} +{"epoch": 0, "iter": 848, "iter_tflops": 39.47881861665104, "iter_time": 0.5225863952636719, "loss": 1.0488693714141846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.92539289101561, "step_time": 0.4806267833709717} +{"epoch": 0, "iter": 849, "iter_tflops": 24.716252895217963, "iter_time": 0.8347176895141601, "loss": 0.30957913398742676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.781605521282327, "step_time": 0.7703456573486328} +{"epoch": 0, "iter": 850, "iter_tflops": 14.808692225963982, "iter_time": 1.3931745758056642, "loss": 0.38486045598983765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.989768278150038, "step_time": 1.1468237495422364} +{"epoch": 0, "iter": 851, "iter_tflops": 47.38942921575772, "iter_time": 0.4353522262573243, "loss": 0.4658806324005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18328810324451, "step_time": 0.4030826129913331} +{"epoch": 0, "iter": 852, "iter_tflops": 48.05227113153153, "iter_time": 0.42934689712524415, "loss": 0.3457464575767517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.73113470346755, "step_time": 0.39881385993957524} +{"epoch": 0, "iter": 853, "iter_tflops": 36.27250822272706, "iter_time": 0.5687804489135742, "loss": 0.8837136030197144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.81988793484653, "step_time": 0.5314568023681641} +{"epoch": 0, "iter": 854, "iter_tflops": 18.793320360227874, "iter_time": 1.0977886352539064, "loss": 1.0117727518081665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.699804143300895, "step_time": 0.9088665866851807} +{"epoch": 0, "iter": 855, "iter_tflops": 40.286267316645784, "iter_time": 0.5121123123168946, "loss": 0.9530600905418396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13075422481534, "step_time": 0.46749922752380374} +{"epoch": 0, "iter": 856, "iter_tflops": 42.69927370036908, "iter_time": 0.48317200088500983, "loss": 0.8680264949798584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.3401276911186, "step_time": 0.4452101135253906} +{"epoch": 0, "iter": 857, "iter_tflops": 20.86659620870174, "iter_time": 0.9887138900756837, "loss": 0.1479032337665558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.535208789595675, "step_time": 0.9155048751831054} +{"epoch": 0, "iter": 858, "iter_tflops": 24.235959288178908, "iter_time": 0.851259620666504, "loss": 0.2604547441005707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.113230615060885, "step_time": 0.760923469543457} +{"epoch": 0, "iter": 859, "iter_tflops": 42.51332299120549, "iter_time": 0.4852853660583496, "loss": 0.20688121020793915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.368336855142715, "step_time": 0.4449392604827881} +{"epoch": 0, "iter": 860, "iter_tflops": 38.927043737532294, "iter_time": 0.5299938430786133, "loss": 0.2139144092798233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1209248366742, "step_time": 0.48980628013610844} +{"epoch": 0, "iter": 861, "iter_tflops": 21.895497341542182, "iter_time": 0.9422527923583983, "loss": 0.6176660656929016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.49929065298259, "step_time": 0.8779453735351563} +{"epoch": 0, "iter": 862, "iter_tflops": 13.620368140580684, "iter_time": 1.5147236328125, "loss": 0.6593320369720459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.337111391556313, "step_time": 1.1899960174560547} +{"epoch": 0, "iter": 863, "iter_tflops": 41.30566240227678, "iter_time": 0.49947373580932614, "loss": 0.5022221803665161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15651680051097, "step_time": 0.45687964820861815} +{"epoch": 0, "iter": 864, "iter_tflops": 41.767397522260495, "iter_time": 0.4939520950317383, "loss": 0.5348890423774719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92816104686699, "step_time": 0.4592018241882324} +{"epoch": 0, "iter": 865, "iter_tflops": 19.341377266440688, "iter_time": 1.0666817169189453, "loss": 0.15741264820098877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.9304806480721, "step_time": 0.985696117401123} +{"epoch": 0, "iter": 866, "iter_tflops": 33.99491537518518, "iter_time": 0.6068876266479492, "loss": 0.08796004951000214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67166364562634, "step_time": 0.4420475273132325} +{"epoch": 0, "iter": 867, "iter_tflops": 55.023670256993185, "iter_time": 0.37494942474365234, "loss": 0.10265950113534927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.6749245562628, "step_time": 0.3457246685028076} +{"epoch": 0, "iter": 868, "iter_tflops": 50.819748074785885, "iter_time": 0.4059660720825195, "loss": 0.13508115708827972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.21480140204074, "step_time": 0.37365150260925295} +{"epoch": 0, "iter": 869, "iter_tflops": 24.752299206848825, "iter_time": 0.8335021057128906, "loss": 0.017451591789722443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.945459211052135, "step_time": 0.795171646118164} +{"epoch": 0, "iter": 870, "iter_tflops": 21.08062112557936, "iter_time": 0.9786757888793945, "loss": 0.03381400927901268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.658291221759693, "step_time": 0.8366797733306885} +{"epoch": 0, "iter": 871, "iter_tflops": 53.29576298245446, "iter_time": 0.3871056976318359, "loss": 0.06091003492474556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.10113042902552, "step_time": 0.3550893650054932} +{"epoch": 0, "iter": 872, "iter_tflops": 49.2761929450563, "iter_time": 0.4186827812194824, "loss": 0.05155026167631149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.76030630605424, "step_time": 0.3837607135772705} +{"epoch": 0, "iter": 873, "iter_tflops": 33.34686681917929, "iter_time": 0.6186816177368164, "loss": 1.134688138961792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.611151398550774, "step_time": 0.5793436241149902} +{"epoch": 0, "iter": 874, "iter_tflops": 17.65282126880325, "iter_time": 1.1687136688232422, "loss": 0.979210376739502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.61674490248185, "step_time": 1.0006959686279298} +{"epoch": 0, "iter": 875, "iter_tflops": 43.99059464313188, "iter_time": 0.4689887390136719, "loss": 1.1619068384170532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.682707283605936, "step_time": 0.4326745414733887} +{"epoch": 0, "iter": 876, "iter_tflops": 42.965488773968005, "iter_time": 0.48017825698852534, "loss": 1.1556462049484253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.09932639712603, "step_time": 0.4475356826782227} +{"epoch": 0, "iter": 877, "iter_tflops": 30.62439662833375, "iter_time": 0.58322713470459, "loss": 0.09531090408563614, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 32.84894143792876, "step_time": 0.5437307357788086} +{"epoch": 0, "iter": 878, "iter_tflops": 10.036888671603569, "iter_time": 1.779533447265625, "loss": 0.04776482284069061, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 12.148193001569771, "step_time": 1.4702580947875976} +{"epoch": 0, "iter": 879, "iter_tflops": 36.646373536137524, "iter_time": 0.48738735580444337, "loss": 0.07521439343690872, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 41.16341678211058, "step_time": 0.43390419197082525} +{"epoch": 0, "iter": 880, "iter_tflops": 51.71113938978344, "iter_time": 0.34539906311035157, "loss": 0.08138392865657806, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 56.54340859703731, "step_time": 0.31588083457946775} +{"epoch": 0, "iter": 881, "iter_tflops": 31.641301191086896, "iter_time": 0.4595360336303711, "loss": 0.049983423203229904, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 34.447198881666914, "step_time": 0.42210451126098636} +{"epoch": 0, "iter": 882, "iter_tflops": 6.81504702072735, "iter_time": 2.1335609283447265, "loss": 0.06324494630098343, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 8.456695983532134, "step_time": 1.719385215759277} +{"epoch": 0, "iter": 883, "iter_tflops": 9.709635047816212, "iter_time": 1.4975143737792969, "loss": 0.06309834122657776, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.930439327373264, "step_time": 1.218757972717285} +{"epoch": 0, "iter": 884, "iter_tflops": 11.163455534657375, "iter_time": 1.3024925842285155, "loss": 0.053811002522706985, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.594477664847215, "step_time": 1.0695753383636475} +{"epoch": 0, "iter": 885, "iter_tflops": 23.851605288083185, "iter_time": 0.7091735687255859, "loss": 0.5426276922225952, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 25.938050164000416, "step_time": 0.6521279716491699} +{"epoch": 0, "iter": 886, "iter_tflops": 30.51356143617316, "iter_time": 0.554341323852539, "loss": 0.49217790365219116, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.53523248737308, "step_time": 0.5198957176208496} +{"epoch": 0, "iter": 887, "iter_tflops": 29.164880568560417, "iter_time": 0.5799759063720703, "loss": 0.5468316078186035, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 30.92328504900579, "step_time": 0.5469964790344238} +{"epoch": 0, "iter": 888, "iter_tflops": 31.130194732770427, "iter_time": 0.5433608169555664, "loss": 0.5746917128562927, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 33.14592624039235, "step_time": 0.510316951751709} +{"epoch": 0, "iter": 889, "iter_tflops": 42.22745017738815, "iter_time": 0.48857066726684567, "loss": 0.043001893907785416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06784232365637, "step_time": 0.44784154129028314} +{"epoch": 0, "iter": 890, "iter_tflops": 41.31174649155825, "iter_time": 0.49940017700195316, "loss": 0.06483916938304901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.93713789305654, "step_time": 0.44911577987670903} +{"epoch": 0, "iter": 891, "iter_tflops": 44.30223743251875, "iter_time": 0.4656896514892578, "loss": 0.05441895127296448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.800482940537236, "step_time": 0.4227641258239746} +{"epoch": 0, "iter": 892, "iter_tflops": 44.231468675695545, "iter_time": 0.4664347381591797, "loss": 0.07412265986204147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62257577178261, "step_time": 0.4243109951019287} +{"epoch": 0, "iter": 893, "iter_tflops": 22.959266712521337, "iter_time": 0.8985954895019531, "loss": 0.09091497212648392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.87842133601294, "step_time": 0.8292766342163085} +{"epoch": 0, "iter": 894, "iter_tflops": 16.170631680745085, "iter_time": 1.2758372039794923, "loss": 0.14312759041786194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.087757726750183, "step_time": 0.9783445816040038} +{"epoch": 0, "iter": 895, "iter_tflops": 41.74863770543307, "iter_time": 0.4941740531921387, "loss": 0.12832343578338623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.62374895250009, "step_time": 0.4522007503509522} +{"epoch": 0, "iter": 896, "iter_tflops": 44.773797324500315, "iter_time": 0.46078498458862305, "loss": 0.12631072103977203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.018117477071996, "step_time": 0.4208871040344238} +{"epoch": 0, "iter": 897, "iter_tflops": 14.14700569860472, "iter_time": 1.458336410522461, "loss": 1.120560646057129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.244071554503336, "step_time": 1.3533847198486328} +{"epoch": 0, "iter": 898, "iter_tflops": 15.256069774198343, "iter_time": 1.352320343017578, "loss": 0.9134288430213928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.373731020748508, "step_time": 1.0649003791809082} +{"epoch": 0, "iter": 899, "iter_tflops": 35.74367473961383, "iter_time": 0.5771956481933593, "loss": 1.1329594850540161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.820961432358246, "step_time": 0.5314421062469482} +{"epoch": 0, "iter": 900, "iter_tflops": 36.6862442032501, "iter_time": 0.5623659210205079, "loss": 1.1969612836837769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.78027408385581, "step_time": 0.5186262283325195} +{"epoch": 0, "iter": 901, "iter_tflops": 18.686934354755742, "iter_time": 1.1040384216308592, "loss": 1.055508017539978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.796124102156586, "step_time": 1.0421784286499023} +{"epoch": 0, "iter": 902, "iter_tflops": 15.08977228359663, "iter_time": 1.367223648071289, "loss": 1.1389315128326416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.50462711331509, "step_time": 1.0577538032531737} +{"epoch": 0, "iter": 903, "iter_tflops": 35.83157778262477, "iter_time": 0.575779655456543, "loss": 1.0801658630371094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84757011160602, "step_time": 0.5310780944824218} +{"epoch": 0, "iter": 904, "iter_tflops": 36.49775279438209, "iter_time": 0.5652702407836914, "loss": 0.9505773186683655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.57276519428319, "step_time": 0.5213457641601563} +{"epoch": 0, "iter": 905, "iter_tflops": 22.97313574799497, "iter_time": 0.8980530014038085, "loss": 0.07593385130167007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.9716187386853, "step_time": 0.8261816635131836} +{"epoch": 0, "iter": 906, "iter_tflops": 15.48829191967553, "iter_time": 1.3320444641113283, "loss": 0.05149287357926369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.45509847314926, "step_time": 1.0604466247558595} +{"epoch": 0, "iter": 907, "iter_tflops": 54.039043326889434, "iter_time": 0.38178125, "loss": 0.06288031488656998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.989657080657594, "step_time": 0.34974086189270015} +{"epoch": 0, "iter": 908, "iter_tflops": 56.91157309871192, "iter_time": 0.3625113906860352, "loss": 0.03621210157871246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.031998754698016, "step_time": 0.3325879211425781} +{"epoch": 0, "iter": 909, "iter_tflops": 29.061179925805227, "iter_time": 0.709919334411621, "loss": 0.7154364585876465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.768920709760426, "step_time": 0.6705172958374024} +{"epoch": 0, "iter": 910, "iter_tflops": 9.761637291788608, "iter_time": 2.113486999511719, "loss": 0.4982556104660034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.23126945724157, "step_time": 1.8369333572387694} +{"epoch": 0, "iter": 911, "iter_tflops": 13.012253621549494, "iter_time": 1.585512710571289, "loss": 0.6098190546035767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3897761875929, "step_time": 1.1863921241760256} +{"epoch": 0, "iter": 912, "iter_tflops": 33.893832928545926, "iter_time": 0.6086975631713867, "loss": 0.55910325050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59622333270194, "step_time": 0.45247373580932615} +{"epoch": 0, "iter": 913, "iter_tflops": 26.037295420195836, "iter_time": 0.5756917190551758, "loss": 0.5370435118675232, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.906484229139537, "step_time": 0.5371316299438476} +{"epoch": 0, "iter": 914, "iter_tflops": 25.981690168031236, "iter_time": 0.5769237976074219, "loss": 0.47541916370391846, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.573712680762522, "step_time": 0.5436139678955079} +{"epoch": 0, "iter": 915, "iter_tflops": 27.689384848458904, "iter_time": 0.5413430252075194, "loss": 0.5203543305397034, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.506681132508984, "step_time": 0.5080020790100098} +{"epoch": 0, "iter": 916, "iter_tflops": 27.023227111629712, "iter_time": 0.5546878356933593, "loss": 0.5084055066108704, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.555417535074433, "step_time": 0.5249250984191894} +{"epoch": 0, "iter": 917, "iter_tflops": 37.2574772334777, "iter_time": 0.5537437057495117, "loss": 0.12560288608074188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14771008779548, "step_time": 0.5138797073364257} +{"epoch": 0, "iter": 918, "iter_tflops": 32.19285436480672, "iter_time": 0.6408594055175783, "loss": 0.1317455917596817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72825643398428, "step_time": 0.519305284500122} +{"epoch": 0, "iter": 919, "iter_tflops": 54.27406107142621, "iter_time": 0.3801280593872071, "loss": 0.12630212306976318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.27451816755395, "step_time": 0.34806007957458496} +{"epoch": 0, "iter": 920, "iter_tflops": 54.94697706517102, "iter_time": 0.3754727668762208, "loss": 0.11404042690992355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.92987715094138, "step_time": 0.3442538928985596} +{"epoch": 0, "iter": 921, "iter_tflops": 21.00563508004278, "iter_time": 0.9821694717407227, "loss": 0.8028964996337891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.93189542478275, "step_time": 0.9406890335083008} +{"epoch": 0, "iter": 922, "iter_tflops": 17.420412766898018, "iter_time": 1.1843056640625, "loss": 0.8334911465644836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.576574270513213, "step_time": 1.053866382598877} +{"epoch": 0, "iter": 923, "iter_tflops": 47.098158045169036, "iter_time": 0.43804459381103517, "loss": 0.8718349933624268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8588890565578, "step_time": 0.4056536407470704} +{"epoch": 0, "iter": 924, "iter_tflops": 48.52224979411499, "iter_time": 0.4251883125305176, "loss": 0.7796390056610107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.04006144392055, "step_time": 0.3964463710784912} +{"epoch": 0, "iter": 925, "iter_tflops": 36.29445104450543, "iter_time": 0.5684365768432617, "loss": 1.1319745779037476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97295581960708, "step_time": 0.5293694839477538} +{"epoch": 0, "iter": 926, "iter_tflops": 43.18295340145442, "iter_time": 0.47776013183593746, "loss": 1.3331193923950195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33372865462674, "step_time": 0.43586453247070317} +{"epoch": 0, "iter": 927, "iter_tflops": 46.21536747613292, "iter_time": 0.4464119758605957, "loss": 0.9853516817092896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.81899647808462, "step_time": 0.4141210174560547} +{"epoch": 0, "iter": 928, "iter_tflops": 45.32149500371313, "iter_time": 0.4552165260314941, "loss": 1.2332333326339722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68223976166106, "step_time": 0.4237909679412841} +{"epoch": 0, "iter": 929, "iter_tflops": 28.459402141164343, "iter_time": 0.7249306716918945, "loss": 0.2950584292411804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.060700582450764, "step_time": 0.6863144607543945} +{"epoch": 0, "iter": 930, "iter_tflops": 12.486012532143919, "iter_time": 1.652336441040039, "loss": 0.3178274631500244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.727595379387978, "step_time": 1.2333568000793458} +{"epoch": 0, "iter": 931, "iter_tflops": 44.956193077639576, "iter_time": 0.4589154930114746, "loss": 0.16234692931175232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36497698081751, "step_time": 0.4265709362030029} +{"epoch": 0, "iter": 932, "iter_tflops": 54.093173976304904, "iter_time": 0.3813992042541504, "loss": 0.17790795862674713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.87157196490379, "step_time": 0.3504423751831055} +{"epoch": 0, "iter": 933, "iter_tflops": 48.985519226024856, "iter_time": 0.4211671905517579, "loss": 0.0571935661137104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.83418447709414, "step_time": 0.3832340679168701} +{"epoch": 0, "iter": 934, "iter_tflops": 52.291441011495266, "iter_time": 0.3945405426025391, "loss": 0.07864275574684143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.26476245635417, "step_time": 0.36027554512023924} +{"epoch": 0, "iter": 935, "iter_tflops": 52.67828910415278, "iter_time": 0.39164319610595705, "loss": 0.03268333896994591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.880383947064026, "step_time": 0.3564436187744141} +{"epoch": 0, "iter": 936, "iter_tflops": 53.10545455791165, "iter_time": 0.3884929275512696, "loss": 0.0729479193687439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.973028935657084, "step_time": 0.3558739967346191} +{"epoch": 0, "iter": 937, "iter_tflops": 28.636489586757833, "iter_time": 0.7204477157592772, "loss": 0.6452608704566956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.364937604294415, "step_time": 0.6794380340576172} +{"epoch": 0, "iter": 938, "iter_tflops": 14.939671569498826, "iter_time": 1.3809603118896483, "loss": 0.6027958393096924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.410153987551908, "step_time": 1.1850035057067871} +{"epoch": 0, "iter": 939, "iter_tflops": 49.2696434217985, "iter_time": 0.4187384376525879, "loss": 0.6322644352912903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.56935048936901, "step_time": 0.3851286849975586} +{"epoch": 0, "iter": 940, "iter_tflops": 51.718649358401635, "iter_time": 0.3989101371765137, "loss": 0.5541962385177612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.720562335667566, "step_time": 0.3702599658966065} +{"epoch": 0, "iter": 941, "iter_tflops": 25.35155655058572, "iter_time": 0.8137998733520506, "loss": 1.0563360452651978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.666542498610717, "step_time": 0.7736696090698243} +{"epoch": 0, "iter": 942, "iter_tflops": 12.528961624928733, "iter_time": 1.6466722564697265, "loss": 1.0113710165023804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.36726362311283, "step_time": 1.4359793243408203} +{"epoch": 0, "iter": 943, "iter_tflops": 35.796046211381565, "iter_time": 0.5763511810302734, "loss": 0.9356115460395813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80994408225129, "step_time": 0.5315929718017578} +{"epoch": 0, "iter": 944, "iter_tflops": 35.64903121218571, "iter_time": 0.57872802734375, "loss": 1.0718162059783936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.56511078784675, "step_time": 0.5349678268432618} +{"epoch": 0, "iter": 945, "iter_tflops": 33.545480605481174, "iter_time": 0.6150185699462891, "loss": 0.05083077773451805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.352787697233076, "step_time": 0.5523307571411133} +{"epoch": 0, "iter": 946, "iter_tflops": 38.143019162450116, "iter_time": 0.5408877944946289, "loss": 0.0302981398999691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22510757847113, "step_time": 0.47729420852661136} +{"epoch": 0, "iter": 947, "iter_tflops": 45.65591387614526, "iter_time": 0.4518821716308594, "loss": 0.0947599783539772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16314682225992, "step_time": 0.4112798900604248} +{"epoch": 0, "iter": 948, "iter_tflops": 49.292005163975034, "iter_time": 0.4185484733581543, "loss": 0.08310215175151825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27639039372085, "step_time": 0.3801117458343506} +{"epoch": 0, "iter": 949, "iter_tflops": 23.891169884685805, "iter_time": 0.8635447158813475, "loss": 0.31878915429115295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.84509604687994, "step_time": 0.7982595024108887} +{"epoch": 0, "iter": 950, "iter_tflops": 21.8229179983993, "iter_time": 0.9453865661621094, "loss": 0.15973491966724396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.44795396332411, "step_time": 0.7800638771057128} +{"epoch": 0, "iter": 951, "iter_tflops": 43.54839853618645, "iter_time": 0.4737509117126465, "loss": 0.3011664152145386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.97963966782004, "step_time": 0.42999684143066397} +{"epoch": 0, "iter": 952, "iter_tflops": 42.23108088811679, "iter_time": 0.4885286636352539, "loss": 0.2263231873512268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.208946749105706, "step_time": 0.4464740047454834} +{"epoch": 0, "iter": 953, "iter_tflops": 23.921886536300526, "iter_time": 0.8624358901977539, "loss": 0.39401674270629883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.111029357523304, "step_time": 0.7901294593811035} +{"epoch": 0, "iter": 954, "iter_tflops": 47.97505628974928, "iter_time": 0.43003792190551754, "loss": 0.37174850702285767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.7370993895102, "step_time": 0.3912064514160156} +{"epoch": 0, "iter": 955, "iter_tflops": 46.122920408375315, "iter_time": 0.44730674743652343, "loss": 0.23891323804855347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38652480968631, "step_time": 0.40945656776428224} +{"epoch": 0, "iter": 956, "iter_tflops": 47.4239343172438, "iter_time": 0.43503546905517587, "loss": 0.20699362456798553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.11430009009824, "step_time": 0.40362664604187004} +{"epoch": 0, "iter": 957, "iter_tflops": 40.706002196558984, "iter_time": 0.5068317298889161, "loss": 0.6533001661300659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0884676213721, "step_time": 0.4679476203918457} +{"epoch": 0, "iter": 958, "iter_tflops": 46.81695525363481, "iter_time": 0.44067567825317383, "loss": 1.0349174737930298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.881929781169795, "step_time": 0.40546994972229006} +{"epoch": 0, "iter": 959, "iter_tflops": 45.81879703032992, "iter_time": 0.45027575683593746, "loss": 0.8837394118309021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15156054542935, "step_time": 0.41974442481994634} +{"epoch": 0, "iter": 960, "iter_tflops": 48.777649779143836, "iter_time": 0.42296202468872074, "loss": 0.7303272485733032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53113253033076, "step_time": 0.39274031448364255} +{"epoch": 0, "iter": 961, "iter_tflops": 40.6878691423952, "iter_time": 0.5070576057434082, "loss": 0.4219682216644287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.47637651569001, "step_time": 0.4638663291931153} +{"epoch": 0, "iter": 962, "iter_tflops": 43.58812136657401, "iter_time": 0.47331917190551764, "loss": 0.3337664008140564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70124601011217, "step_time": 0.43250638580322265} +{"epoch": 0, "iter": 963, "iter_tflops": 42.16031376910116, "iter_time": 0.48934867095947265, "loss": 0.2467714548110962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.157216017967706, "step_time": 0.45687257385253904} +{"epoch": 0, "iter": 964, "iter_tflops": 50.90148962941308, "iter_time": 0.4053141403198242, "loss": 0.3764917254447937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.960897937267625, "step_time": 0.37537766456604005} +{"epoch": 0, "iter": 965, "iter_tflops": 44.098130268466925, "iter_time": 0.467845085144043, "loss": 0.1625235676765442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.07684035574944, "step_time": 0.4291274833679199} +{"epoch": 0, "iter": 966, "iter_tflops": 51.23224533048937, "iter_time": 0.4026974296569824, "loss": 0.14637009799480438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.79494252161578, "step_time": 0.36976637268066403} +{"epoch": 0, "iter": 967, "iter_tflops": 51.10609453478466, "iter_time": 0.4036914520263672, "loss": 0.22566525638103485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.63534166474629, "step_time": 0.3708271198272705} +{"epoch": 0, "iter": 968, "iter_tflops": 57.94654465438219, "iter_time": 0.3560366477966308, "loss": 0.1904604285955429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.94239959871909, "step_time": 0.32777735900878907} +{"epoch": 0, "iter": 969, "iter_tflops": 35.22777036751841, "iter_time": 0.5856485748291015, "loss": 0.6172595024108887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.669385724941534, "step_time": 0.5476886100769043} +{"epoch": 0, "iter": 970, "iter_tflops": 17.879614867143385, "iter_time": 1.1538891448974609, "loss": 0.46201789379119873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.563057949251974, "step_time": 0.9567795791625976} +{"epoch": 0, "iter": 971, "iter_tflops": 48.53823083633591, "iter_time": 0.4250483207702636, "loss": 0.3916972875595093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.417007205361834, "step_time": 0.3935954113006592} +{"epoch": 0, "iter": 972, "iter_tflops": 50.07231290821519, "iter_time": 0.41202597427368165, "loss": 0.534826397895813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02584667519563, "step_time": 0.3818745059967041} +{"epoch": 0, "iter": 973, "iter_tflops": 45.92317523277528, "iter_time": 0.4492523307800293, "loss": 0.9210443496704102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94571217949509, "step_time": 0.41307036399841307} +{"epoch": 0, "iter": 974, "iter_tflops": 38.6417740300733, "iter_time": 0.533906478881836, "loss": 0.8583554029464722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7512895271067, "step_time": 0.4941426658630371} +{"epoch": 0, "iter": 975, "iter_tflops": 45.897553093009215, "iter_time": 0.4495031242370606, "loss": 0.8962559103965759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48038921364458, "step_time": 0.4169549560546875} +{"epoch": 0, "iter": 976, "iter_tflops": 52.394463027777874, "iter_time": 0.3937647666931153, "loss": 1.0835274457931519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.50851470154086, "step_time": 0.3650970764160157} +{"epoch": 0, "iter": 977, "iter_tflops": 26.302252913142492, "iter_time": 0.7843850326538087, "loss": 1.103935956954956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.68115187208647, "step_time": 0.7453119583129882} +{"epoch": 0, "iter": 978, "iter_tflops": 13.863005191716805, "iter_time": 1.488212203979492, "loss": 0.9373868703842163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.49926487379571, "step_time": 1.1789691543579102} +{"epoch": 0, "iter": 979, "iter_tflops": 48.02455787298209, "iter_time": 0.42959465789794926, "loss": 0.7260744571685791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.92741617598377, "step_time": 0.39730637550354003} +{"epoch": 0, "iter": 980, "iter_tflops": 48.59182171732172, "iter_time": 0.42457954406738285, "loss": 0.7409687042236328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20224618858677, "step_time": 0.3952146701812744} +{"epoch": 0, "iter": 981, "iter_tflops": 36.54500608229976, "iter_time": 0.5645393371582031, "loss": 0.3715927004814148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.413875850233175, "step_time": 0.523447467803955} +{"epoch": 0, "iter": 982, "iter_tflops": 38.121188613215, "iter_time": 0.5411975402832031, "loss": 0.4276287257671356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.569282345368464, "step_time": 0.4963062229156494} +{"epoch": 0, "iter": 983, "iter_tflops": 47.700421826054544, "iter_time": 0.43251385879516596, "loss": 0.38497084379196167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.35474976879001, "step_time": 0.3940634536743164} +{"epoch": 0, "iter": 984, "iter_tflops": 46.174210306602426, "iter_time": 0.4468098831176758, "loss": 0.3685998320579529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.53115991327735, "step_time": 0.4082845821380615} +{"epoch": 0, "iter": 985, "iter_tflops": 17.431582440190983, "iter_time": 1.0743112335205078, "loss": 0.009005619212985039, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 18.766329040132707, "step_time": 0.9979013366699219} +{"epoch": 0, "iter": 986, "iter_tflops": 22.086930015249532, "iter_time": 0.8478745040893555, "loss": 0.02081124484539032, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 27.186660129462982, "step_time": 0.6888284454345702} +{"epoch": 0, "iter": 987, "iter_tflops": 50.97983548879368, "iter_time": 0.3673402366638184, "loss": 0.004669149871915579, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 55.80096576067769, "step_time": 0.3356025218963623} +{"epoch": 0, "iter": 988, "iter_tflops": 53.96734632514856, "iter_time": 0.3470051078796387, "loss": 0.006809841841459274, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 59.20613427962277, "step_time": 0.3163007526397705} +{"epoch": 0, "iter": 989, "iter_tflops": 45.730955321628386, "iter_time": 0.45114066314697265, "loss": 0.3685433864593506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.7591975313746, "step_time": 0.41461869430541987} +{"epoch": 0, "iter": 990, "iter_tflops": 49.3562163949173, "iter_time": 0.4180039520263672, "loss": 0.5656233429908752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.47392970459434, "step_time": 0.3787333431243896} +{"epoch": 0, "iter": 991, "iter_tflops": 48.17292181467357, "iter_time": 0.42827158355712885, "loss": 0.6598917245864868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.265173862496866, "step_time": 0.39473882865905763} +{"epoch": 0, "iter": 992, "iter_tflops": 51.99328046592773, "iter_time": 0.3968030738830567, "loss": 0.6071162223815918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.203497942033394, "step_time": 0.36707846069335937} +{"epoch": 0, "iter": 993, "iter_tflops": 44.310811558498166, "iter_time": 0.4655995407104492, "loss": 0.4847956597805023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.30288361584447, "step_time": 0.42711929321289066} +{"epoch": 0, "iter": 994, "iter_tflops": 46.473689840002145, "iter_time": 0.44393061065673833, "loss": 0.40848419070243835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24613799493749, "step_time": 0.4106005821228027} +{"epoch": 0, "iter": 995, "iter_tflops": 47.075375649533875, "iter_time": 0.4382565879821777, "loss": 0.34549131989479065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.844806521651996, "step_time": 0.4057659950256348} +{"epoch": 0, "iter": 996, "iter_tflops": 53.25609227899422, "iter_time": 0.3873940544128418, "loss": 0.4152461290359497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.838107889890594, "step_time": 0.3567041568756103} +{"epoch": 0, "iter": 997, "iter_tflops": 42.2602894083527, "iter_time": 0.48819101333618165, "loss": 0.1277296096086502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68332968955142, "step_time": 0.45161098480224615} +{"epoch": 0, "iter": 998, "iter_tflops": 11.319704992987075, "iter_time": 1.8225822601318358, "loss": 0.04460233822464943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.584806575323519, "step_time": 1.3237952880859374} +{"epoch": 0, "iter": 999, "iter_tflops": 10.481253914489907, "iter_time": 1.9683802795410157, "loss": 0.05546637997031212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.746599530603827, "step_time": 1.6185566558837892} +{"epoch": 0, "iter": 1000, "iter_tflops": 18.297968011897066, "iter_time": 1.1275073547363281, "loss": 0.10246419161558151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.09641804492223, "step_time": 0.8220732326507569} +{"epoch": 0, "iter": 1001, "iter_tflops": 13.905852932928372, "iter_time": 1.1750658569335937, "loss": 0.44467398524284363, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 14.521113896778672, "step_time": 1.1252782058715822} +{"epoch": 0, "iter": 1002, "iter_tflops": 21.056414478380614, "iter_time": 0.7760244750976563, "loss": 0.5111154913902283, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 25.481305374425347, "step_time": 0.6412659301757813} +{"epoch": 0, "iter": 1003, "iter_tflops": 28.377106582892626, "iter_time": 0.5758266067504882, "loss": 0.4413643479347229, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.233373061619137, "step_time": 0.5404720458984376} +{"epoch": 0, "iter": 1004, "iter_tflops": 29.41838287379512, "iter_time": 0.5554449768066406, "loss": 0.4350246787071228, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.257891195635622, "step_time": 0.5227573699951171} +{"epoch": 0, "iter": 1005, "iter_tflops": 30.27227087885069, "iter_time": 0.6815178680419922, "loss": 0.03467915579676628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.275964892060394, "step_time": 0.6392091941833496} +{"epoch": 0, "iter": 1006, "iter_tflops": 22.635879619022965, "iter_time": 0.9114332580566405, "loss": 0.06542124599218369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.94819152701114, "step_time": 0.7950879154205324} +{"epoch": 0, "iter": 1007, "iter_tflops": 56.4593101348932, "iter_time": 0.3654152603149414, "loss": 0.049304306507110596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.34843739490994, "step_time": 0.3308999290466309} +{"epoch": 0, "iter": 1008, "iter_tflops": 58.26230439534194, "iter_time": 0.35410706329345704, "loss": 0.05241097882390022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.68902475542611, "step_time": 0.3239348316192627} +{"epoch": 0, "iter": 1009, "iter_tflops": 25.994176747910355, "iter_time": 0.7936813583374023, "loss": 0.16787435114383698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.41664355359839, "step_time": 0.7525025253295898} +{"epoch": 0, "iter": 1010, "iter_tflops": 17.234812894462824, "iter_time": 1.197059326171875, "loss": 0.13950788974761963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.99421798708469, "step_time": 0.897229621887207} +{"epoch": 0, "iter": 1011, "iter_tflops": 49.724476342929286, "iter_time": 0.41490821075439455, "loss": 0.30538395047187805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.003970737688455, "step_time": 0.38202919578552247} +{"epoch": 0, "iter": 1012, "iter_tflops": 53.32538958969034, "iter_time": 0.3868906288146973, "loss": 0.14260214567184448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.06932361849319, "step_time": 0.3552838611602784} +{"epoch": 0, "iter": 1013, "iter_tflops": 41.13784791263491, "iter_time": 0.5015112495422362, "loss": 0.2602105140686035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33056259906632, "step_time": 0.46539209747314453} +{"epoch": 0, "iter": 1014, "iter_tflops": 35.55325939191054, "iter_time": 0.580286979675293, "loss": 0.2520747184753418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44735152963111, "step_time": 0.5230032615661622} +{"epoch": 0, "iter": 1015, "iter_tflops": 42.77263214167077, "iter_time": 0.48234332275390623, "loss": 0.28733932971954346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.597411454804984, "step_time": 0.44275192260742186} +{"epoch": 0, "iter": 1016, "iter_tflops": 42.28046157706851, "iter_time": 0.4879580955505371, "loss": 0.24708329141139984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4716292927104, "step_time": 0.4439502944946289} +{"epoch": 0, "iter": 1017, "iter_tflops": 2.2395399527103144, "iter_time": 0.7524355926513672, "loss": 0.3501887023448944, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 2.4021170949311426, "step_time": 0.7015101699829102} +{"epoch": 0, "iter": 1018, "iter_tflops": 3.675023314773079, "iter_time": 0.4585303077697754, "loss": 0.34620359539985657, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.9829999042647186, "step_time": 0.42307547378540034} +{"epoch": 0, "iter": 1019, "iter_tflops": 3.92339743829704, "iter_time": 0.42950264358520507, "loss": 0.42250388860702515, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 4.243648942100954, "step_time": 0.3970897674560547} +{"epoch": 0, "iter": 1020, "iter_tflops": 3.766798039460888, "iter_time": 0.44735861968994134, "loss": 0.5986237525939941, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 4.064462667813695, "step_time": 0.41459590339660646} +{"epoch": 0, "iter": 1021, "iter_tflops": 37.3341116416833, "iter_time": 0.5526070556640625, "loss": 0.7905014753341675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19542015913458, "step_time": 0.5132697563171387} +{"epoch": 0, "iter": 1022, "iter_tflops": 42.27906015571173, "iter_time": 0.48797426986694337, "loss": 0.8090558052062988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.52516596474672, "step_time": 0.45317997360229484} +{"epoch": 0, "iter": 1023, "iter_tflops": 49.99138491491251, "iter_time": 0.4126929779052735, "loss": 0.9061230421066284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.11252169737449, "step_time": 0.38126283645629877} +{"epoch": 0, "iter": 1024, "iter_tflops": 50.072590135980406, "iter_time": 0.4120236930847168, "loss": 0.8755170702934265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.212927805836316, "step_time": 0.38055671119689943} +{"epoch": 0, "iter": 1025, "iter_tflops": 29.605965301869738, "iter_time": 0.6968559646606446, "loss": 0.7069819569587708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.421028467054672, "step_time": 0.6566014709472657} +{"epoch": 0, "iter": 1026, "iter_tflops": 11.28192037489956, "iter_time": 1.8286863250732421, "loss": 0.6234087944030762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.064067250504074, "step_time": 1.3695566520690918} +{"epoch": 0, "iter": 1027, "iter_tflops": 43.8405276131589, "iter_time": 0.47059409713745115, "loss": 0.567651629447937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43959506614526, "step_time": 0.4348918552398682} +{"epoch": 0, "iter": 1028, "iter_tflops": 43.81433847446106, "iter_time": 0.47087538528442385, "loss": 0.8098371028900146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17118498792559, "step_time": 0.4373664455413819} +{"epoch": 0, "iter": 1029, "iter_tflops": 15.858185300325022, "iter_time": 0.8270204315185546, "loss": 0.0025253850035369396, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 16.636794401234766, "step_time": 0.7883155212402345} +{"epoch": 0, "iter": 1030, "iter_tflops": 11.27454574100536, "iter_time": 1.1632436065673828, "loss": 0.03581199422478676, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 15.257763234974387, "step_time": 0.8595652618408203} +{"epoch": 0, "iter": 1031, "iter_tflops": 36.14576399851748, "iter_time": 0.3628376274108887, "loss": 0.001743523869663477, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 40.020562076038914, "step_time": 0.32770762252807617} +{"epoch": 0, "iter": 1032, "iter_tflops": 39.922066693007935, "iter_time": 0.3285161399841308, "loss": 0.010104315355420113, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 43.71397209826954, "step_time": 0.3000194816589356} +{"epoch": 0, "iter": 1033, "iter_tflops": 27.347193689398537, "iter_time": 0.7544135513305665, "loss": 0.24426940083503723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.734957163693725, "step_time": 0.7179789199829102} +{"epoch": 0, "iter": 1034, "iter_tflops": 15.43953854430799, "iter_time": 1.3362506561279295, "loss": 0.27147015929222107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.8162405359282, "step_time": 1.0411204624176027} +{"epoch": 0, "iter": 1035, "iter_tflops": 43.007764053174476, "iter_time": 0.47970625686645507, "loss": 0.14358530938625336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23130580345451, "step_time": 0.436809720993042} +{"epoch": 0, "iter": 1036, "iter_tflops": 46.18486454293536, "iter_time": 0.44670680999755863, "loss": 0.2858370542526245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.12805587611084, "step_time": 0.4115677967071533} +{"epoch": 0, "iter": 1037, "iter_tflops": 22.054839292510817, "iter_time": 0.9354451980590821, "loss": 0.10602228343486786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.963645566007575, "step_time": 0.8609330101013182} +{"epoch": 0, "iter": 1038, "iter_tflops": 14.13698311544991, "iter_time": 1.459370315551758, "loss": 0.061577294021844864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.196361126128608, "step_time": 1.1338032569885255} +{"epoch": 0, "iter": 1039, "iter_tflops": 56.96378030417391, "iter_time": 0.36217914962768555, "loss": 0.0036874522920697927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.74767047568803, "step_time": 0.32879457283020014} +{"epoch": 0, "iter": 1040, "iter_tflops": 57.35204840071026, "iter_time": 0.35972723007202145, "loss": 0.038028374314308167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.745179713009975, "step_time": 0.3288076248168946} +{"epoch": 0, "iter": 1041, "iter_tflops": 29.053596372871308, "iter_time": 0.7101046371459961, "loss": 0.5373833775520325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.74947286552771, "step_time": 0.6709413719177246} +{"epoch": 0, "iter": 1042, "iter_tflops": 13.478571547862865, "iter_time": 1.5306587524414064, "loss": 0.6965960264205933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.054327938117854, "step_time": 1.2850798606872558} +{"epoch": 0, "iter": 1043, "iter_tflops": 36.37599444386058, "iter_time": 0.5671623229980468, "loss": 0.44693148136138916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46723611006902, "step_time": 0.5227397594451904} +{"epoch": 0, "iter": 1044, "iter_tflops": 43.97094154036609, "iter_time": 0.469198356628418, "loss": 0.46369317173957825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90329325888349, "step_time": 0.43068215370178226} +{"epoch": 0, "iter": 1045, "iter_tflops": 19.53169364777319, "iter_time": 1.0562879943847656, "loss": 0.011533628217875957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.786111576524398, "step_time": 0.9925422286987304} +{"epoch": 0, "iter": 1046, "iter_tflops": 14.20397390715867, "iter_time": 1.4524874267578125, "loss": 0.002905289176851511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.318200788269415, "step_time": 1.1262620029449464} +{"epoch": 0, "iter": 1047, "iter_tflops": 46.48761969825814, "iter_time": 0.4437975883483886, "loss": 0.008069668896496296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.615020894584234, "step_time": 0.39971103668212893} +{"epoch": 0, "iter": 1048, "iter_tflops": 43.03504627319425, "iter_time": 0.47940214538574216, "loss": 0.01604849472641945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31948488539106, "step_time": 0.43599573326110835} +{"epoch": 0, "iter": 1049, "iter_tflops": 20.872258932853494, "iter_time": 0.9884456481933594, "loss": 1.0523076057434082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.500772481006678, "step_time": 0.9169060096740723} +{"epoch": 0, "iter": 1050, "iter_tflops": 17.489721178473083, "iter_time": 1.1796124877929688, "loss": 0.7472048997879028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.511209185592072, "step_time": 0.9590857181549072} +{"epoch": 0, "iter": 1051, "iter_tflops": 45.18679752615124, "iter_time": 0.45657348251342766, "loss": 0.8416144251823425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.678925611537366, "step_time": 0.42381982040405275} +{"epoch": 0, "iter": 1052, "iter_tflops": 45.408440630841305, "iter_time": 0.45434490203857425, "loss": 0.8979740738868713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.176057294775354, "step_time": 0.4195353317260742} +{"epoch": 0, "iter": 1053, "iter_tflops": 38.00137120914961, "iter_time": 0.5429039230346681, "loss": 0.730415940284729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.72457000090955, "step_time": 0.5066006469726563} +{"epoch": 0, "iter": 1054, "iter_tflops": 6.517292081183562, "iter_time": 3.165592895507812, "loss": 0.8199922442436218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.877314531944141, "step_time": 2.6190516357421876} +{"epoch": 0, "iter": 1055, "iter_tflops": 12.536125768392273, "iter_time": 1.6457312164306641, "loss": 0.8457531929016113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.595943405508189, "step_time": 1.4134813308715821} +{"epoch": 0, "iter": 1056, "iter_tflops": 35.88041685292378, "iter_time": 0.5749959259033204, "loss": 1.0020707845687866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7280412662664, "step_time": 0.4718046569824219} +{"epoch": 0, "iter": 1057, "iter_tflops": 12.111096844851861, "iter_time": 1.2984487762451171, "loss": 0.5019017457962036, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 12.993318604168147, "step_time": 1.210286560058594} +{"epoch": 0, "iter": 1058, "iter_tflops": 14.698780492974256, "iter_time": 1.069860107421875, "loss": 0.6163986921310425, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.441444802010157, "step_time": 0.901624782562256} +{"epoch": 0, "iter": 1059, "iter_tflops": 28.587036896638285, "iter_time": 0.5500968475341796, "loss": 0.5989236831665039, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 30.459732722210877, "step_time": 0.51627632522583} +{"epoch": 0, "iter": 1060, "iter_tflops": 27.167196640350678, "iter_time": 0.5788465805053712, "loss": 0.3975457549095154, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.768399124439462, "step_time": 0.5466289176940917} +{"epoch": 0, "iter": 1061, "iter_tflops": 35.88775003145429, "iter_time": 0.5748784332275391, "loss": 0.40757161378860474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.443210005918694, "step_time": 0.5366641731262207} +{"epoch": 0, "iter": 1062, "iter_tflops": 10.172431052933252, "iter_time": 2.028137954711914, "loss": 0.3756375312805176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.473796766648547, "step_time": 1.6539545974731444} +{"epoch": 0, "iter": 1063, "iter_tflops": 14.774360879814425, "iter_time": 1.396411911010742, "loss": 0.43147432804107666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.578617058119704, "step_time": 1.1736471328735352} +{"epoch": 0, "iter": 1064, "iter_tflops": 17.929851983181166, "iter_time": 1.1506560974121094, "loss": 0.2751193940639496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.478212536034174, "step_time": 0.9605591468811034} +{"epoch": 0, "iter": 1065, "iter_tflops": 17.50514383893382, "iter_time": 0.9545534286499024, "loss": 0.6187211871147156, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 18.32635493368086, "step_time": 0.9117795181274414} +{"epoch": 0, "iter": 1066, "iter_tflops": 11.013091703757658, "iter_time": 1.5172483367919922, "loss": 0.29944509267807007, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 13.806121186015332, "step_time": 1.2103033752441406} +{"epoch": 0, "iter": 1067, "iter_tflops": 24.477508976479992, "iter_time": 0.6826509628295898, "loss": 0.4994640648365021, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.271122899229713, "step_time": 0.636044189453125} +{"epoch": 0, "iter": 1068, "iter_tflops": 27.7889389310985, "iter_time": 0.6013038177490234, "loss": 0.7246094942092896, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 29.74810305117044, "step_time": 0.5617028770446778} +{"epoch": 0, "iter": 1069, "iter_tflops": 19.113534948571605, "iter_time": 1.0793970642089843, "loss": 0.9465811252593994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.721770305625725, "step_time": 0.995624080657959} +{"epoch": 0, "iter": 1070, "iter_tflops": 24.43131578912671, "iter_time": 0.8444528198242187, "loss": 1.233617901802063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.964028173403857, "step_time": 0.737772590637207} +{"epoch": 0, "iter": 1071, "iter_tflops": 46.30817240181181, "iter_time": 0.445517333984375, "loss": 1.2596542835235596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91070105251128, "step_time": 0.413360122680664} +{"epoch": 0, "iter": 1072, "iter_tflops": 47.54872592078767, "iter_time": 0.43389371871948246, "loss": 1.0131728649139404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30562036616451, "step_time": 0.40212150955200193} +{"epoch": 0, "iter": 1073, "iter_tflops": 41.843052432497586, "iter_time": 0.4930589981079101, "loss": 0.6667143702507019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43207371219761, "step_time": 0.45410855865478517} +{"epoch": 0, "iter": 1074, "iter_tflops": 48.19556070010273, "iter_time": 0.4280704116821289, "loss": 0.5205100178718567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57735186745856, "step_time": 0.3850711688995361} +{"epoch": 0, "iter": 1075, "iter_tflops": 50.913888101260184, "iter_time": 0.4052154388427735, "loss": 0.9516507387161255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.967170092117556, "step_time": 0.37533483123779304} +{"epoch": 0, "iter": 1076, "iter_tflops": 47.89232369821232, "iter_time": 0.43078079986572265, "loss": 0.7880908250808716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.04148233621743, "step_time": 0.396435546875} +{"epoch": 0, "iter": 1077, "iter_tflops": 25.067519011439522, "iter_time": 0.8230209579467773, "loss": 0.4444087743759155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.386553106428817, "step_time": 0.7818790664672852} +{"epoch": 0, "iter": 1078, "iter_tflops": 12.970477733282609, "iter_time": 1.5906194000244143, "loss": 0.38345789909362793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.59182296626753, "step_time": 1.1727660942077638} +{"epoch": 0, "iter": 1079, "iter_tflops": 48.9102333215215, "iter_time": 0.4218154792785645, "loss": 0.3842311501502991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.29697254127463, "step_time": 0.3870969123840332} +{"epoch": 0, "iter": 1080, "iter_tflops": 48.7022790889908, "iter_time": 0.42361659240722654, "loss": 0.44929057359695435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.626355986307495, "step_time": 0.3920296802520752} +{"epoch": 0, "iter": 1081, "iter_tflops": 44.697363852638986, "iter_time": 0.46157293701171875, "loss": 0.7496750950813293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81147867346896, "step_time": 0.4226688899993897} +{"epoch": 0, "iter": 1082, "iter_tflops": 41.56603814601656, "iter_time": 0.49634495925903316, "loss": 0.7137653827667236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.050787530752025, "step_time": 0.4579518947601318} +{"epoch": 0, "iter": 1083, "iter_tflops": 49.66968637016306, "iter_time": 0.41536589050292966, "loss": 0.9975404739379883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.803177462447245, "step_time": 0.383454927444458} +{"epoch": 0, "iter": 1084, "iter_tflops": 47.78868476762937, "iter_time": 0.43171503067016603, "loss": 0.9296732544898987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64076788221332, "step_time": 0.39951174926757815} +{"epoch": 0, "iter": 1085, "iter_tflops": 32.05873195074645, "iter_time": 0.6435405349731445, "loss": 0.6973595023155212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.20361679822929, "step_time": 0.6031845588684082} +{"epoch": 0, "iter": 1086, "iter_tflops": 37.952544567540414, "iter_time": 0.5436023788452149, "loss": 0.6140825748443604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03147342431747, "step_time": 0.49084868621826167} +{"epoch": 0, "iter": 1087, "iter_tflops": 42.99947615053077, "iter_time": 0.4797987174987793, "loss": 0.8456215858459473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.882454459013864, "step_time": 0.44006001281738283} +{"epoch": 0, "iter": 1088, "iter_tflops": 39.15349619133833, "iter_time": 0.5269285125732422, "loss": 0.858858048915863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74551665024871, "step_time": 0.48264929580688476} +{"epoch": 0, "iter": 1089, "iter_tflops": 39.80276427461129, "iter_time": 0.5183331832885743, "loss": 0.07272344827651978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.562107568339854, "step_time": 0.473601821899414} +{"epoch": 0, "iter": 1090, "iter_tflops": 9.592386409793644, "iter_time": 2.1507779846191406, "loss": 0.1272878348827362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.946144722296921, "step_time": 1.8847817230224608} +{"epoch": 0, "iter": 1091, "iter_tflops": 14.546934805893065, "iter_time": 1.418243347167969, "loss": 0.09447718411684036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.264603592012502, "step_time": 1.1295670013427734} +{"epoch": 0, "iter": 1092, "iter_tflops": 23.03466773489004, "iter_time": 0.8956540527343749, "loss": 0.0723314955830574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.752707341453156, "step_time": 0.743390302658081} +{"epoch": 0, "iter": 1093, "iter_tflops": 12.074910643482434, "iter_time": 1.3057308044433593, "loss": 0.4605635106563568, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 12.787070727519263, "step_time": 1.2330097427368163} +{"epoch": 0, "iter": 1094, "iter_tflops": 10.618101389390205, "iter_time": 1.4848777770996093, "loss": 0.42426836490631104, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 12.35408515474936, "step_time": 1.2762242279052733} +{"epoch": 0, "iter": 1095, "iter_tflops": 24.53510726145486, "iter_time": 0.6426131591796875, "loss": 0.42670130729675293, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 26.34048615397419, "step_time": 0.5985684051513672} +{"epoch": 0, "iter": 1096, "iter_tflops": 25.930688342938872, "iter_time": 0.6080279312133788, "loss": 0.43654200434684753, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 27.807151533834475, "step_time": 0.5669974060058594} +{"epoch": 0, "iter": 1097, "iter_tflops": 18.472060145443876, "iter_time": 1.1168810272216798, "loss": 1.0372867584228516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.877674417122293, "step_time": 1.0379027786254882} +{"epoch": 0, "iter": 1098, "iter_tflops": 42.63777173577416, "iter_time": 0.4838689422607421, "loss": 1.4312803745269775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98764635491605, "step_time": 0.4390748443603516} +{"epoch": 0, "iter": 1099, "iter_tflops": 46.23275252905321, "iter_time": 0.44624411010742193, "loss": 1.286378264427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.86699122250178, "step_time": 0.4137224445343018} +{"epoch": 0, "iter": 1100, "iter_tflops": 45.31285675285902, "iter_time": 0.45530330657958984, "loss": 0.9700436592102051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.674485397181016, "step_time": 0.4238584823608399} +{"epoch": 0, "iter": 1101, "iter_tflops": 41.458000731215435, "iter_time": 0.49763840866088865, "loss": 0.09169530868530273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.127014637526685, "step_time": 0.457178337097168} +{"epoch": 0, "iter": 1102, "iter_tflops": 8.493055878721641, "iter_time": 2.4291719970703127, "loss": 0.13693204522132874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.153612432080424, "step_time": 2.03189688873291} +{"epoch": 0, "iter": 1103, "iter_tflops": 13.003702137566783, "iter_time": 1.5865553741455078, "loss": 0.12088967859745026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.440879571630443, "step_time": 1.4286590652465818} +{"epoch": 0, "iter": 1104, "iter_tflops": 24.882322342147404, "iter_time": 0.8291466217041016, "loss": 0.07251132279634476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.76844497173664, "step_time": 0.5611086769104003} +{"epoch": 0, "iter": 1105, "iter_tflops": 20.69152744034888, "iter_time": 0.8075573501586915, "loss": 0.45571276545524597, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 22.006967389854776, "step_time": 0.7592865829467774} +{"epoch": 0, "iter": 1106, "iter_tflops": 8.512119852413647, "iter_time": 1.963035690307617, "loss": 0.5297273993492126, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 10.64920416740036, "step_time": 1.5690933151245117} +{"epoch": 0, "iter": 1107, "iter_tflops": 29.647655178239194, "iter_time": 0.5636059570312499, "loss": 0.5032118558883667, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 31.64308739147919, "step_time": 0.528064624786377} +{"epoch": 0, "iter": 1108, "iter_tflops": 31.071006064047676, "iter_time": 0.5377873840332033, "loss": 0.48853799700737, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 32.99085645507709, "step_time": 0.5064917030334473} +{"epoch": 0, "iter": 1109, "iter_tflops": 32.14141431089031, "iter_time": 0.6418850555419922, "loss": 0.9359007477760315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.363969166699654, "step_time": 0.6003699226379394} +{"epoch": 0, "iter": 1110, "iter_tflops": 34.757892561401675, "iter_time": 0.5935657196044921, "loss": 1.028980016708374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.09206720245935, "step_time": 0.5416112861633301} +{"epoch": 0, "iter": 1111, "iter_tflops": 37.54204404658376, "iter_time": 0.549546356201172, "loss": 0.8247805833816528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8356827828784, "step_time": 0.5052222003936768} +{"epoch": 0, "iter": 1112, "iter_tflops": 36.13014077998814, "iter_time": 0.5710216751098632, "loss": 1.2060139179229736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.301365175205795, "step_time": 0.5249459762573242} +{"epoch": 0, "iter": 1113, "iter_tflops": 12.010119691490878, "iter_time": 1.2650829315185546, "loss": 0.09551624953746796, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.814697077894005, "step_time": 1.1856540451049804} +{"epoch": 0, "iter": 1114, "iter_tflops": 18.878351153101757, "iter_time": 0.8048265075683594, "loss": 0.07598321884870529, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.490315210392417, "step_time": 0.646811134338379} +{"epoch": 0, "iter": 1115, "iter_tflops": 37.670685195166385, "iter_time": 0.4033321228027344, "loss": 0.09263944625854492, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 41.051312302843364, "step_time": 0.37011721611022946} +{"epoch": 0, "iter": 1116, "iter_tflops": 37.56624798684028, "iter_time": 0.40445341873168944, "loss": 0.10206548124551773, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 40.76307189790184, "step_time": 0.3727343578338623} +{"epoch": 0, "iter": 1117, "iter_tflops": 23.576950887669327, "iter_time": 0.8750535049438478, "loss": 1.0717707872390747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.76414396597602, "step_time": 0.8331034393310547} +{"epoch": 0, "iter": 1118, "iter_tflops": 12.811034583175688, "iter_time": 1.6104158782958986, "loss": 0.7599400281906128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.271878471518555, "step_time": 1.2678986968994141} +{"epoch": 0, "iter": 1119, "iter_tflops": 35.18336612144231, "iter_time": 0.586387710571289, "loss": 1.089532494544983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20284430273044, "step_time": 0.540040771484375} +{"epoch": 0, "iter": 1120, "iter_tflops": 37.67556005092973, "iter_time": 0.5475988540649415, "loss": 1.0819637775421143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.665676535025035, "step_time": 0.5073343238830567} +{"epoch": 0, "iter": 1121, "iter_tflops": 25.885162339900074, "iter_time": 0.7970239181518555, "loss": 1.023316502571106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.640919413065568, "step_time": 0.7463967895507814} +{"epoch": 0, "iter": 1122, "iter_tflops": 11.643734986961269, "iter_time": 1.7718621673583985, "loss": 0.8749179244041443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.043995545061447, "step_time": 1.4690330429077147} +{"epoch": 0, "iter": 1123, "iter_tflops": 16.983878437748196, "iter_time": 1.2147457122802734, "loss": 0.9910374879837036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.45111447362404, "step_time": 1.0088004512786866} +{"epoch": 0, "iter": 1124, "iter_tflops": 43.26538296595268, "iter_time": 0.4768498992919922, "loss": 0.9094225764274597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.301331631219625, "step_time": 0.4361630592346191} +{"epoch": 0, "iter": 1125, "iter_tflops": 21.13406785439121, "iter_time": 0.7285988464355468, "loss": 0.4946412146091461, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 22.781967860568436, "step_time": 0.6758967247009277} +{"epoch": 0, "iter": 1126, "iter_tflops": 21.92452655513909, "iter_time": 0.7023302154541016, "loss": 0.39414849877357483, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.69788415857632, "step_time": 0.6497735137939453} +{"epoch": 0, "iter": 1127, "iter_tflops": 24.215732847831713, "iter_time": 0.6358782348632812, "loss": 0.45758509635925293, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.908102897132625, "step_time": 0.5943413734436035} +{"epoch": 0, "iter": 1128, "iter_tflops": 25.328876074986596, "iter_time": 0.6079329147338867, "loss": 0.3193022906780243, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 27.003782455350066, "step_time": 0.5702259483337402} +{"epoch": 0, "iter": 1129, "iter_tflops": 32.851939996848095, "iter_time": 0.6280022888183594, "loss": 0.18712033331394196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.511909757922794, "step_time": 0.5650510654449463} +{"epoch": 0, "iter": 1130, "iter_tflops": 49.663743269118285, "iter_time": 0.4154155960083008, "loss": 0.23696325719356537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.87641676569335, "step_time": 0.37595555114746093} +{"epoch": 0, "iter": 1131, "iter_tflops": 56.01269691401812, "iter_time": 0.3683288726806641, "loss": 0.22815632820129395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.76478132007877, "step_time": 0.33952386665344236} +{"epoch": 0, "iter": 1132, "iter_tflops": 59.79981017906626, "iter_time": 0.34500265884399417, "loss": 0.30487537384033203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.95735294530549, "step_time": 0.3176098251342773} +{"epoch": 0, "iter": 1133, "iter_tflops": 43.895686765086666, "iter_time": 0.4700027503967285, "loss": 0.5574125647544861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.25354329563157, "step_time": 0.42755603218078614} +{"epoch": 0, "iter": 1134, "iter_tflops": 45.75284719361366, "iter_time": 0.4509248008728027, "loss": 0.5882067084312439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.44421181599648, "step_time": 0.4089883213043213} +{"epoch": 0, "iter": 1135, "iter_tflops": 46.46479924637273, "iter_time": 0.44401555252075203, "loss": 0.5450435876846313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40532010281756, "step_time": 0.4093038883209229} +{"epoch": 0, "iter": 1136, "iter_tflops": 51.18343027178568, "iter_time": 0.4030814933776855, "loss": 0.5985514521598816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.09261341431395, "step_time": 0.37448021125793457} +{"epoch": 0, "iter": 1137, "iter_tflops": 41.19853033723628, "iter_time": 0.5007725601196289, "loss": 1.182557463645935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29047709258395, "step_time": 0.465813304901123} +{"epoch": 0, "iter": 1138, "iter_tflops": 39.75909999621687, "iter_time": 0.5189024276733398, "loss": 1.330065369606018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64714033481954, "step_time": 0.4726791572570801} +{"epoch": 0, "iter": 1139, "iter_tflops": 43.50785131156279, "iter_time": 0.47419242477416995, "loss": 1.0629570484161377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.568927975435564, "step_time": 0.4430227279663086} +{"epoch": 0, "iter": 1140, "iter_tflops": 45.360647796608276, "iter_time": 0.4548236083984375, "loss": 1.1821635961532593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89470770686751, "step_time": 0.4219494190216065} +{"epoch": 0, "iter": 1141, "iter_tflops": 26.65250902790013, "iter_time": 0.7740769729614259, "loss": 0.4992349445819855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.918619610188458, "step_time": 0.7389725494384765} +{"epoch": 0, "iter": 1142, "iter_tflops": 15.616667502865472, "iter_time": 1.321094497680664, "loss": 0.5326108932495117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.587958452775265, "step_time": 1.0020951595306395} +{"epoch": 0, "iter": 1143, "iter_tflops": 52.346921144786606, "iter_time": 0.39412238693237306, "loss": 0.33319276571273804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.96608341342471, "step_time": 0.36216450691223145} +{"epoch": 0, "iter": 1144, "iter_tflops": 44.848820472457504, "iter_time": 0.46001418304443364, "loss": 0.47013282775878906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24470692487944, "step_time": 0.4276343421936036} +{"epoch": 0, "iter": 1145, "iter_tflops": 29.161881688988508, "iter_time": 0.7074678421020508, "loss": 0.9088520407676697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.762216701747587, "step_time": 0.6706634216308595} +{"epoch": 0, "iter": 1146, "iter_tflops": 14.588733366780405, "iter_time": 1.414179901123047, "loss": 1.105109453201294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.920516931935513, "step_time": 1.0904085540771484} +{"epoch": 0, "iter": 1147, "iter_tflops": 38.09391662555933, "iter_time": 0.5415849914550781, "loss": 1.1629263162612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29931471728986, "step_time": 0.4995505046844482} +{"epoch": 0, "iter": 1148, "iter_tflops": 39.69536896524834, "iter_time": 0.5197355270385742, "loss": 1.1369585990905762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86668543392752, "step_time": 0.4812850189208984} +{"epoch": 0, "iter": 1149, "iter_tflops": 19.51476700953194, "iter_time": 1.0572041931152343, "loss": 0.6545480489730835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.73617735422698, "step_time": 0.9949323425292969} +{"epoch": 0, "iter": 1150, "iter_tflops": 16.67540401973529, "iter_time": 1.2372170104980469, "loss": 0.7860298752784729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.81103759174042, "step_time": 1.0413938903808595} +{"epoch": 0, "iter": 1151, "iter_tflops": 40.76546521345601, "iter_time": 0.5060924339294434, "loss": 0.7780687212944031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.59929198839656, "step_time": 0.46258791542053224} +{"epoch": 0, "iter": 1152, "iter_tflops": 39.00851100991862, "iter_time": 0.5288869781494141, "loss": 0.7314189076423645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.69324591375244, "step_time": 0.48324021911621096} +{"epoch": 0, "iter": 1153, "iter_tflops": 19.06760866239701, "iter_time": 1.0819969024658203, "loss": 0.9191572666168213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.470161483454557, "step_time": 1.0078617858886716} +{"epoch": 0, "iter": 1154, "iter_tflops": 25.128463249280326, "iter_time": 0.8210248794555663, "loss": 1.0322537422180176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.05095764319878, "step_time": 0.6644269638061524} +{"epoch": 0, "iter": 1155, "iter_tflops": 44.51995013138863, "iter_time": 0.46341232299804686, "loss": 1.0564104318618774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77794511063118, "step_time": 0.4318120727539062} +{"epoch": 0, "iter": 1156, "iter_tflops": 42.5214549516512, "iter_time": 0.4851925582885742, "loss": 0.9588309526443481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.51916601395096, "step_time": 0.4532397079467774} +{"epoch": 0, "iter": 1157, "iter_tflops": 36.25179956110542, "iter_time": 0.5691053619384766, "loss": 1.1153517961502075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.73133903986829, "step_time": 0.5326718368530273} +{"epoch": 0, "iter": 1158, "iter_tflops": 11.902834799948529, "iter_time": 1.733292434692383, "loss": 0.9503077268600464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.816966883400184, "step_time": 1.3043647155761717} +{"epoch": 0, "iter": 1159, "iter_tflops": 37.520323139827134, "iter_time": 0.5498644943237304, "loss": 1.0650041103363037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80684274842231, "step_time": 0.5055792636871338} +{"epoch": 0, "iter": 1160, "iter_tflops": 33.009208058910694, "iter_time": 0.6250102539062501, "loss": 1.0096392631530762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.01406541824917, "step_time": 0.5728621101379394} +{"epoch": 0, "iter": 1161, "iter_tflops": 20.979120720148575, "iter_time": 0.9834107818603515, "loss": 0.848577082157135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.900765117270954, "step_time": 0.9008910140991211} +{"epoch": 0, "iter": 1162, "iter_tflops": 16.912343791471404, "iter_time": 1.2198837585449218, "loss": 0.870248556137085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.351675189358215, "step_time": 1.0137294998168946} +{"epoch": 0, "iter": 1163, "iter_tflops": 36.62639758194354, "iter_time": 0.5632848129272461, "loss": 0.7845796942710876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.186782950567384, "step_time": 0.5133800716400148} +{"epoch": 0, "iter": 1164, "iter_tflops": 41.85354071414969, "iter_time": 0.49293544006347656, "loss": 1.0021981000900269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71738126193051, "step_time": 0.45127461242675776} +{"epoch": 0, "iter": 1165, "iter_tflops": 29.11779516369739, "iter_time": 0.7085390014648438, "loss": 0.3169964849948883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.40232052621296, "step_time": 0.6569926414489746} +{"epoch": 0, "iter": 1166, "iter_tflops": 10.972752175391275, "iter_time": 1.8802113800048827, "loss": 0.32027170062065125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.76619041814898, "step_time": 1.7534216918945313} +{"epoch": 0, "iter": 1167, "iter_tflops": 15.23815774452343, "iter_time": 1.3539099578857419, "loss": 0.22641156613826752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.240789032929673, "step_time": 1.1966443920135499} +{"epoch": 0, "iter": 1168, "iter_tflops": 38.69936115542865, "iter_time": 0.5331119918823242, "loss": 0.3250159025192261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.314018637317496, "step_time": 0.48757112121582036} +{"epoch": 0, "iter": 1169, "iter_tflops": 16.617013793420973, "iter_time": 0.953750473022461, "loss": 0.3806801736354828, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 17.669356341996803, "step_time": 0.8969474868774413} +{"epoch": 0, "iter": 1170, "iter_tflops": 6.952549349953254, "iter_time": 2.2795213623046875, "loss": 0.5402979850769043, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 8.999761499661634, "step_time": 1.7609894180297854} +{"epoch": 0, "iter": 1171, "iter_tflops": 10.166820908828187, "iter_time": 1.5588437042236327, "loss": 0.5540784597396851, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 13.727526379377107, "step_time": 1.1545040473937989} +{"epoch": 0, "iter": 1172, "iter_tflops": 28.442166806684664, "iter_time": 0.5572179107666015, "loss": 0.5929377675056458, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.238642523198703, "step_time": 0.5241136322021485} +{"epoch": 0, "iter": 1173, "iter_tflops": 16.470460576856674, "iter_time": 0.8828118667602539, "loss": 0.4378916621208191, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.250124203205296, "step_time": 0.8429109191894532} +{"epoch": 0, "iter": 1174, "iter_tflops": 9.478462916659119, "iter_time": 1.5340375518798828, "loss": 0.5141916871070862, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.91807600099683, "step_time": 1.220022262573242} +{"epoch": 0, "iter": 1175, "iter_tflops": 23.049527019148588, "iter_time": 0.6308293457031251, "loss": 0.6748005151748657, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.706391946384166, "step_time": 0.5885245437622071} +{"epoch": 0, "iter": 1176, "iter_tflops": 22.14627088465591, "iter_time": 0.6565583038330077, "loss": 0.6775071620941162, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.77617791456742, "step_time": 0.6115498504638672} +{"epoch": 0, "iter": 1177, "iter_tflops": 24.712903799788055, "iter_time": 0.834830810546875, "loss": 0.40681323409080505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.35892441788523, "step_time": 0.7540900802612304} +{"epoch": 0, "iter": 1178, "iter_tflops": 47.51814022104664, "iter_time": 0.43417300033569334, "loss": 0.3018050491809845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40772272134436, "step_time": 0.39366514015197757} +{"epoch": 0, "iter": 1179, "iter_tflops": 54.30371636844475, "iter_time": 0.3799204711914062, "loss": 0.2546827793121338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01478982966183, "step_time": 0.34959191703796383} +{"epoch": 0, "iter": 1180, "iter_tflops": 53.795045891585204, "iter_time": 0.38351288986206056, "loss": 0.28345662355422974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.707228358929186, "step_time": 0.35142339515686033} +{"epoch": 0, "iter": 1181, "iter_tflops": 37.274081541911734, "iter_time": 0.5534970321655274, "loss": 0.7329489588737488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.16237253612171, "step_time": 0.5136921005249023} +{"epoch": 0, "iter": 1182, "iter_tflops": 17.86539602901183, "iter_time": 1.1548075103759765, "loss": 0.7517885565757751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.661060345661404, "step_time": 0.9524507656097411} +{"epoch": 0, "iter": 1183, "iter_tflops": 41.99069410327711, "iter_time": 0.49132537460327147, "loss": 0.7505102753639221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00584674753532, "step_time": 0.4484450340270996} +{"epoch": 0, "iter": 1184, "iter_tflops": 35.94395534461691, "iter_time": 0.5739794998168946, "loss": 0.6315928101539612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14771137363231, "step_time": 0.5270063762664795} +{"epoch": 0, "iter": 1185, "iter_tflops": 19.472467716100958, "iter_time": 1.0595007171630861, "loss": 0.9046043753623962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.59958277446351, "step_time": 1.0015296783447265} +{"epoch": 0, "iter": 1186, "iter_tflops": 18.12934926201048, "iter_time": 1.1379941558837892, "loss": 0.959977388381958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.650754518264236, "step_time": 0.9990479278564453} +{"epoch": 0, "iter": 1187, "iter_tflops": 38.847713538393585, "iter_time": 0.5310761337280273, "loss": 1.1183973550796509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.139084713263735, "step_time": 0.4895951976776123} +{"epoch": 0, "iter": 1188, "iter_tflops": 35.42130347386486, "iter_time": 0.5824487380981445, "loss": 1.2655695676803589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.1875229995058, "step_time": 0.5402574424743654} +{"epoch": 0, "iter": 1189, "iter_tflops": 23.134167074591424, "iter_time": 0.8918018722534179, "loss": 0.1851034313440323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.98583842599671, "step_time": 0.8257114753723144} +{"epoch": 0, "iter": 1190, "iter_tflops": 21.43584146441071, "iter_time": 0.9624578323364259, "loss": 0.13808783888816833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.534290322910365, "step_time": 0.8079759902954101} +{"epoch": 0, "iter": 1191, "iter_tflops": 55.442546845902385, "iter_time": 0.37211662673950197, "loss": 0.217432901263237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.720803767410366, "step_time": 0.3397697696685791} +{"epoch": 0, "iter": 1192, "iter_tflops": 52.34583487953891, "iter_time": 0.3941305656433106, "loss": 0.14391326904296875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.008630356884716, "step_time": 0.36189421463012694} +{"epoch": 0, "iter": 1193, "iter_tflops": 25.554746041789056, "iter_time": 0.551433837890625, "loss": 0.05880923569202423, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 28.46087004593943, "step_time": 0.4951272277832031} +{"epoch": 0, "iter": 1194, "iter_tflops": 29.44910090669854, "iter_time": 0.47851211929321286, "loss": 0.07407519221305847, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 32.38993686913153, "step_time": 0.4350657348632812} +{"epoch": 0, "iter": 1195, "iter_tflops": 31.07477646962522, "iter_time": 0.4534787788391113, "loss": 0.04927615448832512, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 34.12477490050067, "step_time": 0.4129478282928467} +{"epoch": 0, "iter": 1196, "iter_tflops": 32.56384197103599, "iter_time": 0.43274229431152345, "loss": 0.10058832168579102, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 35.83488316113221, "step_time": 0.39324117851257323} +{"epoch": 0, "iter": 1197, "iter_tflops": 31.049877340795106, "iter_time": 0.664450080871582, "loss": 1.0277100801467896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.05518668702187, "step_time": 0.605813549041748} +{"epoch": 0, "iter": 1198, "iter_tflops": 41.74861901367088, "iter_time": 0.49417427444458006, "loss": 0.9266196489334106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0039041144006, "step_time": 0.45842897224426266} +{"epoch": 0, "iter": 1199, "iter_tflops": 44.67430425608127, "iter_time": 0.46181118774414065, "loss": 0.9186232089996338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01776340899429, "step_time": 0.42965544509887693} +{"epoch": 0, "iter": 1200, "iter_tflops": 41.48148295363494, "iter_time": 0.4973567008972168, "loss": 0.9059624671936035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.110403670490136, "step_time": 0.45734668350219726} +{"epoch": 0, "iter": 1201, "iter_tflops": 28.78468768920812, "iter_time": 0.7167384872436523, "loss": 0.868966281414032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.508332619031172, "step_time": 0.6762445449829101} +{"epoch": 0, "iter": 1202, "iter_tflops": 17.449652727515534, "iter_time": 1.1823211517333987, "loss": 1.1549488306045532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.78732098339415, "step_time": 0.9053759994506835} +{"epoch": 0, "iter": 1203, "iter_tflops": 38.867936150075636, "iter_time": 0.5307998199462891, "loss": 1.2615045309066772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1946693504833, "step_time": 0.48895023536682125} +{"epoch": 0, "iter": 1204, "iter_tflops": 37.57316403876189, "iter_time": 0.549091194152832, "loss": 0.9266809225082397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78432644075699, "step_time": 0.5058583850860595} +{"epoch": 0, "iter": 1205, "iter_tflops": 6.010875720551595, "iter_time": 1.023978988647461, "loss": 0.0058210138231515884, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 6.41322280482361, "step_time": 0.9597375030517576} +{"epoch": 0, "iter": 1206, "iter_tflops": 7.864085753131856, "iter_time": 0.7826733627319337, "loss": 0.005763770081102848, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 8.902545974329547, "step_time": 0.691376428604126} +{"epoch": 0, "iter": 1207, "iter_tflops": 16.66139467305654, "iter_time": 0.36941748046875, "loss": 0.017602019011974335, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 18.36327418021109, "step_time": 0.3351804466247559} +{"epoch": 0, "iter": 1208, "iter_tflops": 18.34174267507985, "iter_time": 0.335573917388916, "loss": 0.0033197361044585705, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 20.114136379161717, "step_time": 0.30600421142578127} +{"epoch": 0, "iter": 1209, "iter_tflops": 40.673939487747894, "iter_time": 0.507231258392334, "loss": 0.3598833382129669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88183572426911, "step_time": 0.4701511039733887} +{"epoch": 0, "iter": 1210, "iter_tflops": 9.845206639858484, "iter_time": 2.0955470275878905, "loss": 0.21496275067329407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.62801368026573, "step_time": 1.6337560310363768} +{"epoch": 0, "iter": 1211, "iter_tflops": 14.385991576249202, "iter_time": 1.434109939575195, "loss": 0.24063117802143097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.509771040768882, "step_time": 1.1146055488586426} +{"epoch": 0, "iter": 1212, "iter_tflops": 24.388473833723236, "iter_time": 0.8459362258911134, "loss": 0.3584442436695099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.521237229276736, "step_time": 0.6988559913635255} +{"epoch": 0, "iter": 1213, "iter_tflops": 18.528479040154124, "iter_time": 0.8244371948242186, "loss": 0.34966301918029785, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.567535729144065, "step_time": 0.7806587142944336} +{"epoch": 0, "iter": 1214, "iter_tflops": 8.549216754341257, "iter_time": 1.7867797393798825, "loss": 0.5279944539070129, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 10.648246899337277, "step_time": 1.434561710357666} +{"epoch": 0, "iter": 1215, "iter_tflops": 22.978299614726073, "iter_time": 0.6647823181152345, "loss": 0.4297955632209778, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.763468457278282, "step_time": 0.6168589553833007} +{"epoch": 0, "iter": 1216, "iter_tflops": 22.818570520892703, "iter_time": 0.6694357681274414, "loss": 0.30828914046287537, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.466619258176458, "step_time": 0.6243431968688965} +{"epoch": 0, "iter": 1217, "iter_tflops": 28.781850182386716, "iter_time": 0.6880021896362305, "loss": 0.2602194547653198, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 31.450618514041327, "step_time": 0.6296211929321289} +{"epoch": 0, "iter": 1218, "iter_tflops": 7.535974908976574, "iter_time": 2.6276594848632815, "loss": 0.1673872172832489, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 9.294199996457001, "step_time": 2.1305734710693356} +{"epoch": 0, "iter": 1219, "iter_tflops": 9.914365014134546, "iter_time": 1.997301483154297, "loss": 0.1954776644706726, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 12.090186748284244, "step_time": 1.6378552589416504} +{"epoch": 0, "iter": 1220, "iter_tflops": 41.20379308860708, "iter_time": 0.480586238861084, "loss": 0.20815229415893555, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 45.2207942799561, "step_time": 0.43789535903930665} +{"epoch": 0, "iter": 1221, "iter_tflops": 14.728272204581229, "iter_time": 1.0871842346191407, "loss": 0.6651838421821594, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 15.779787784011607, "step_time": 1.0147376861572266} +{"epoch": 0, "iter": 1222, "iter_tflops": 9.590281816044907, "iter_time": 1.669642837524414, "loss": 0.5391538739204407, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 11.777251546362955, "step_time": 1.3595995025634764} +{"epoch": 0, "iter": 1223, "iter_tflops": 23.81944667060919, "iter_time": 0.6722383422851563, "loss": 0.4575527012348175, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.73376266866067, "step_time": 0.6222310180664062} +{"epoch": 0, "iter": 1224, "iter_tflops": 25.961383376093522, "iter_time": 0.6167755050659179, "loss": 0.7012379169464111, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.806935360485774, "step_time": 0.5758399887084961} +{"epoch": 0, "iter": 1225, "iter_tflops": 15.138557529724054, "iter_time": 1.3628176574707032, "loss": 0.8687313795089722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.945561936016533, "step_time": 1.2938454971313478} +{"epoch": 0, "iter": 1226, "iter_tflops": 18.305867356114682, "iter_time": 1.1270208129882813, "loss": 1.1295394897460938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.659751066432, "step_time": 0.7458886184692384} +{"epoch": 0, "iter": 1227, "iter_tflops": 44.3483659604871, "iter_time": 0.46520526885986324, "loss": 0.885258674621582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.7695664316386, "step_time": 0.43188781166076656} +{"epoch": 0, "iter": 1228, "iter_tflops": 43.70070587950405, "iter_time": 0.4720997772216797, "loss": 1.2328981161117554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.884927154419685, "step_time": 0.44003680419921876} +{"epoch": 0, "iter": 1229, "iter_tflops": 46.4617974879095, "iter_time": 0.4440442390441895, "loss": 0.7573136687278748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57539510420089, "step_time": 0.4079274806976318} +{"epoch": 0, "iter": 1230, "iter_tflops": 40.408181898137, "iter_time": 0.5105672302246094, "loss": 0.69745272397995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.56060095219747, "step_time": 0.4736182022094727} +{"epoch": 0, "iter": 1231, "iter_tflops": 48.52647983976498, "iter_time": 0.4251512489318848, "loss": 0.791322648525238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67488542543782, "step_time": 0.3916685028076172} +{"epoch": 0, "iter": 1232, "iter_tflops": 49.04301836045618, "iter_time": 0.4206734046936035, "loss": 1.0016955137252808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8662022539386, "step_time": 0.3902510986328125} +{"epoch": 0, "iter": 1233, "iter_tflops": 28.04022317050402, "iter_time": 0.7357678070068359, "loss": 1.1375646591186523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.619404699487895, "step_time": 0.6965397758483887} +{"epoch": 0, "iter": 1234, "iter_tflops": 21.06544356766097, "iter_time": 0.9793809204101563, "loss": 0.9392949938774109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.993430492256888, "step_time": 0.8254606552124024} +{"epoch": 0, "iter": 1235, "iter_tflops": 43.42497730478084, "iter_time": 0.47509739303588866, "loss": 1.044600486755371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8023004258698, "step_time": 0.44081366348266604} +{"epoch": 0, "iter": 1236, "iter_tflops": 44.52799246403995, "iter_time": 0.4633286247253417, "loss": 1.1502408981323242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.71226521271122, "step_time": 0.43240649795532227} +{"epoch": 0, "iter": 1237, "iter_tflops": 36.23016456025895, "iter_time": 0.5694452056884765, "loss": 0.6089638471603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82623448369469, "step_time": 0.531369930267334} +{"epoch": 0, "iter": 1238, "iter_tflops": 14.854393401793176, "iter_time": 1.3888883209228515, "loss": 0.536857545375824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.19712030362835, "step_time": 1.199683036804199} +{"epoch": 0, "iter": 1239, "iter_tflops": 36.03563102518266, "iter_time": 0.5725192794799805, "loss": 0.5975744128227234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.287094326666754, "step_time": 0.4362943801879883} +{"epoch": 0, "iter": 1240, "iter_tflops": 46.5097002260515, "iter_time": 0.4435868949890136, "loss": 0.5683519840240479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99653545749679, "step_time": 0.4126504631042481} +{"epoch": 0, "iter": 1241, "iter_tflops": 40.04325140830019, "iter_time": 0.5152202377319336, "loss": 1.1195690631866455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45109598293358, "step_time": 0.47481180953979496} +{"epoch": 0, "iter": 1242, "iter_tflops": 19.02882096646971, "iter_time": 1.084202407836914, "loss": 1.0398890972137451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.209257450250956, "step_time": 0.9289411659240722} +{"epoch": 0, "iter": 1243, "iter_tflops": 43.884972723115524, "iter_time": 0.4701174964904785, "loss": 1.0774623155593872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.195612042347975, "step_time": 0.43714007759094237} +{"epoch": 0, "iter": 1244, "iter_tflops": 47.75917971855184, "iter_time": 0.43198173904418946, "loss": 1.1703927516937256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.405418242020296, "step_time": 0.401340835571289} +{"epoch": 0, "iter": 1245, "iter_tflops": 25.564601734929088, "iter_time": 0.8070179901123047, "loss": 1.0706640481948853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.829441394912518, "step_time": 0.7689721603393554} +{"epoch": 0, "iter": 1246, "iter_tflops": 19.27813144217594, "iter_time": 1.0701811828613281, "loss": 1.0312186479568481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.039249083848226, "step_time": 0.8954759521484376} +{"epoch": 0, "iter": 1247, "iter_tflops": 42.600566071610686, "iter_time": 0.4842915344238281, "loss": 0.907279372215271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79354613934895, "step_time": 0.45052404212951663} +{"epoch": 0, "iter": 1248, "iter_tflops": 42.92277122897699, "iter_time": 0.4806561393737793, "loss": 1.1501712799072266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98523347690085, "step_time": 0.448646053314209} +{"epoch": 0, "iter": 1249, "iter_tflops": 21.61772169716462, "iter_time": 0.9543602142333983, "loss": 0.015798313543200493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.57901673218702, "step_time": 0.9137286071777344} +{"epoch": 0, "iter": 1250, "iter_tflops": 13.724837444380979, "iter_time": 1.5031940155029295, "loss": 0.003526735119521618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.412874371163255, "step_time": 1.184818374633789} +{"epoch": 0, "iter": 1251, "iter_tflops": 54.450887146236894, "iter_time": 0.37889361572265634, "loss": 0.011293270625174046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.69484465124923, "step_time": 0.3456093006134034} +{"epoch": 0, "iter": 1252, "iter_tflops": 55.6400531204084, "iter_time": 0.3707957191467285, "loss": 0.008731572888791561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.60638403747514, "step_time": 0.34041122627258297} +{"epoch": 0, "iter": 1253, "iter_tflops": 38.723456063851856, "iter_time": 0.5327802734375, "loss": 0.748447835445404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.904252226380976, "step_time": 0.49233890151977533} +{"epoch": 0, "iter": 1254, "iter_tflops": 37.73333381373898, "iter_time": 0.5467604217529297, "loss": 0.8309340476989746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.06841829434469, "step_time": 0.5023590965270996} +{"epoch": 0, "iter": 1255, "iter_tflops": 41.04382168898734, "iter_time": 0.5026601486206055, "loss": 0.9608320593833923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67380257495732, "step_time": 0.46181637382507323} +{"epoch": 0, "iter": 1256, "iter_tflops": 47.53711942561926, "iter_time": 0.43399965667724616, "loss": 1.1503288745880127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81820967193162, "step_time": 0.3981436958312988} +{"epoch": 0, "iter": 1257, "iter_tflops": 25.15666560338481, "iter_time": 0.8201044540405272, "loss": 0.776974618434906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.88400868825056, "step_time": 0.7674113540649413} +{"epoch": 0, "iter": 1258, "iter_tflops": 12.910565124634383, "iter_time": 1.5980008087158202, "loss": 1.101041316986084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.0237585243828, "step_time": 1.287531478881836} +{"epoch": 0, "iter": 1259, "iter_tflops": 17.122786033454034, "iter_time": 1.204891159057617, "loss": 1.18230402469635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.223569120514874, "step_time": 1.0201509628295897} +{"epoch": 0, "iter": 1260, "iter_tflops": 19.019265065307547, "iter_time": 1.0847471466064453, "loss": 1.1171926259994507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.163074664518884, "step_time": 0.9748627662658691} +{"epoch": 0, "iter": 1261, "iter_tflops": 13.254939065904294, "iter_time": 1.2265826110839844, "loss": 0.7505693435668945, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 14.145756295759094, "step_time": 1.1493395919799805} +{"epoch": 0, "iter": 1262, "iter_tflops": 11.18170620351675, "iter_time": 1.4540068817138674, "loss": 0.5965246558189392, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 14.023106529725696, "step_time": 1.159392017364502} +{"epoch": 0, "iter": 1263, "iter_tflops": 25.354877731851456, "iter_time": 0.6412287979125976, "loss": 0.7700743079185486, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.364741653836614, "step_time": 0.5941323318481445} +{"epoch": 0, "iter": 1264, "iter_tflops": 26.463314930713647, "iter_time": 0.6143704147338866, "loss": 0.3265666663646698, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 28.378782776748988, "step_time": 0.5729025764465332} +{"epoch": 0, "iter": 1265, "iter_tflops": 23.33499190081329, "iter_time": 0.8841268768310546, "loss": 1.0247465372085571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.382958647561516, "step_time": 0.8127930946350098} +{"epoch": 0, "iter": 1266, "iter_tflops": 21.67727924106267, "iter_time": 0.9517381439208985, "loss": 0.8494834303855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.57422819319169, "step_time": 0.7763572044372559} +{"epoch": 0, "iter": 1267, "iter_tflops": 42.01229358524288, "iter_time": 0.4910727729797363, "loss": 0.8757615089416504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08059402169537, "step_time": 0.4576491050720215} +{"epoch": 0, "iter": 1268, "iter_tflops": 47.87168013711732, "iter_time": 0.43096656417846685, "loss": 0.9661794900894165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30865881541806, "step_time": 0.4020976963043213} +{"epoch": 0, "iter": 1269, "iter_tflops": 47.528046225722484, "iter_time": 0.4340825080871582, "loss": 0.7224346995353699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.757218848214436, "step_time": 0.3986128692626953} +{"epoch": 0, "iter": 1270, "iter_tflops": 35.904885633406145, "iter_time": 0.5746040725708008, "loss": 0.5659501552581787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33805065724427, "step_time": 0.46531349945068357} +{"epoch": 0, "iter": 1271, "iter_tflops": 44.66699879567388, "iter_time": 0.46188671875000004, "loss": 0.796527624130249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65667310342992, "step_time": 0.4240136489868165} +{"epoch": 0, "iter": 1272, "iter_tflops": 49.6843116323182, "iter_time": 0.41524362182617186, "loss": 0.6163279414176941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.60759293832768, "step_time": 0.38485394287109376} +{"epoch": 0, "iter": 1273, "iter_tflops": 45.92814130983933, "iter_time": 0.44920375442504884, "loss": 0.5574139356613159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.066782438595695, "step_time": 0.41207148742675787} +{"epoch": 0, "iter": 1274, "iter_tflops": 47.19001008001824, "iter_time": 0.43719197082519534, "loss": 0.3925318121910095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.085528110648305, "step_time": 0.40385397338867185} +{"epoch": 0, "iter": 1275, "iter_tflops": 45.83989228938239, "iter_time": 0.4500685424804687, "loss": 0.5098050236701965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69082463403935, "step_time": 0.41518919563293455} +{"epoch": 0, "iter": 1276, "iter_tflops": 44.68407258500946, "iter_time": 0.4617102317810059, "loss": 0.4877661168575287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.630278403858846, "step_time": 0.42424378776550287} +{"epoch": 0, "iter": 1277, "iter_tflops": 38.86209675268744, "iter_time": 0.5308795776367188, "loss": 0.918901801109314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.159017249308405, "step_time": 0.4893637199401855} +{"epoch": 0, "iter": 1278, "iter_tflops": 35.484130084136474, "iter_time": 0.5814174804687502, "loss": 1.031248927116394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.60920553227304, "step_time": 0.5343568515777588} +{"epoch": 0, "iter": 1279, "iter_tflops": 38.079473203679804, "iter_time": 0.541790412902832, "loss": 0.7720741033554077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.296809551846536, "step_time": 0.49958080863952636} +{"epoch": 0, "iter": 1280, "iter_tflops": 38.22035074896885, "iter_time": 0.5397934112548828, "loss": 0.9926677346229553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38732532691077, "step_time": 0.49848820495605467} +{"epoch": 0, "iter": 1281, "iter_tflops": 22.09927118250482, "iter_time": 0.9335644302368165, "loss": 0.6263396739959717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.735980802805194, "step_time": 0.869190689086914} +{"epoch": 0, "iter": 1282, "iter_tflops": 16.756870567894914, "iter_time": 1.2312020568847657, "loss": 0.6814040541648865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.187934524169382, "step_time": 1.021951675415039} +{"epoch": 0, "iter": 1283, "iter_tflops": 47.51689401559832, "iter_time": 0.4341843872070313, "loss": 0.8217386603355408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31124220622292, "step_time": 0.4020774517059326} +{"epoch": 0, "iter": 1284, "iter_tflops": 51.887349897671356, "iter_time": 0.3976131668090821, "loss": 0.6580778360366821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.97099367007271, "step_time": 0.36860330963134763} +{"epoch": 0, "iter": 1285, "iter_tflops": 3.195168139604563, "iter_time": 0.5273930816650391, "loss": 0.8927826285362244, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.447037706010402, "step_time": 0.48885730743408207} +{"epoch": 0, "iter": 1286, "iter_tflops": 2.675739420039518, "iter_time": 0.6297734222412109, "loss": 1.110416293144226, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.0301617117854067, "step_time": 0.5561120929718018} +{"epoch": 0, "iter": 1287, "iter_tflops": 4.073567508856977, "iter_time": 0.4136692390441895, "loss": 1.0892640352249146, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 4.401867559672665, "step_time": 0.38281696319580083} +{"epoch": 0, "iter": 1288, "iter_tflops": 4.094664293052665, "iter_time": 0.4115379066467285, "loss": 0.9891809821128845, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 4.418191723077514, "step_time": 0.381402545928955} +{"epoch": 0, "iter": 1289, "iter_tflops": 3.654786448130687, "iter_time": 0.7410133666992188, "loss": 0.27380475401878357, "lr": 3e-05, "seqlen": 1104.0, "step_tflops": 3.8553972479414953, "step_time": 0.7024556579589842} +{"epoch": 0, "iter": 1290, "iter_tflops": 1.7274155501814656, "iter_time": 1.5678020324707034, "loss": 0.2011982947587967, "lr": 3e-05, "seqlen": 1104.0, "step_tflops": 2.255291770059269, "step_time": 1.2008404617309572} +{"epoch": 0, "iter": 1291, "iter_tflops": 4.994237775198329, "iter_time": 0.5422740631103515, "loss": 0.40503087639808655, "lr": 3e-05, "seqlen": 1104.0, "step_tflops": 5.422430050296927, "step_time": 0.49945238304138184} +{"epoch": 0, "iter": 1292, "iter_tflops": 5.466511435508867, "iter_time": 0.4954248504638672, "loss": 0.27019038796424866, "lr": 3e-05, "seqlen": 1104.0, "step_tflops": 5.958702283285279, "step_time": 0.4545025882720947} +{"epoch": 0, "iter": 1293, "iter_tflops": 19.922740319988435, "iter_time": 1.0355550079345703, "loss": 0.2643909156322479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.221978232673134, "step_time": 0.9721569442749024} +{"epoch": 0, "iter": 1294, "iter_tflops": 16.268800453102795, "iter_time": 1.2681385803222656, "loss": 0.3127443790435791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.072107003527705, "step_time": 0.8228703517913818} +{"epoch": 0, "iter": 1295, "iter_tflops": 39.311652773825976, "iter_time": 0.5248086013793946, "loss": 0.48501497507095337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06222049366705, "step_time": 0.47909962081909185} +{"epoch": 0, "iter": 1296, "iter_tflops": 40.57919726080074, "iter_time": 0.5084155158996583, "loss": 0.3672957122325897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01842405695544, "step_time": 0.4686922340393067} +{"epoch": 0, "iter": 1297, "iter_tflops": 24.892855282947593, "iter_time": 0.828795783996582, "loss": 0.8303813934326172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.77388524882817, "step_time": 0.7705677871704102} +{"epoch": 0, "iter": 1298, "iter_tflops": 13.43118113949253, "iter_time": 1.5360595092773437, "loss": 1.0277799367904663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.88450272052674, "step_time": 1.1535737857818602} +{"epoch": 0, "iter": 1299, "iter_tflops": 38.337637088693235, "iter_time": 0.5381420211791992, "loss": 1.0277491807937622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.754016310720154, "step_time": 0.49411039543151847} +{"epoch": 0, "iter": 1300, "iter_tflops": 44.42437615279084, "iter_time": 0.4644093017578124, "loss": 1.1325039863586426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.25035720608973, "step_time": 0.427584264755249} +{"epoch": 0, "iter": 1301, "iter_tflops": 15.461188528109963, "iter_time": 1.3343795318603515, "loss": 1.0238652229309082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.552820906567664, "step_time": 1.2463793106079102} +{"epoch": 0, "iter": 1302, "iter_tflops": 16.233917214747912, "iter_time": 1.2708635406494142, "loss": 0.9983116984367371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.63406877246369, "step_time": 0.9998558082580566} +{"epoch": 0, "iter": 1303, "iter_tflops": 33.6093140132746, "iter_time": 0.6138504791259765, "loss": 0.9105719327926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.522172845080824, "step_time": 0.5648922805786133} +{"epoch": 0, "iter": 1304, "iter_tflops": 34.324843217234566, "iter_time": 0.6010542678833009, "loss": 0.9101387858390808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.097104210866824, "step_time": 0.5561375732421875} +{"epoch": 0, "iter": 1305, "iter_tflops": 28.971710640781232, "iter_time": 0.7121116790771483, "loss": 0.7020695805549622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.257178287788626, "step_time": 0.6600433769226074} +{"epoch": 0, "iter": 1306, "iter_tflops": 10.954830288895366, "iter_time": 1.883287368774414, "loss": 0.7913380265235901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.436411430026793, "step_time": 1.6589265823364259} +{"epoch": 0, "iter": 1307, "iter_tflops": 11.53451983034261, "iter_time": 1.7886391296386719, "loss": 0.5879972577095032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.313916679582698, "step_time": 1.5495886001586916} +{"epoch": 0, "iter": 1308, "iter_tflops": 34.17641485449841, "iter_time": 0.603664649963379, "loss": 0.6337584257125854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.349601680610526, "step_time": 0.45493439292907717} +{"epoch": 0, "iter": 1309, "iter_tflops": 27.791670486218543, "iter_time": 0.6175045623779296, "loss": 0.6454038023948669, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 29.726972408044624, "step_time": 0.5773034362792968} +{"epoch": 0, "iter": 1310, "iter_tflops": 13.626036628331384, "iter_time": 1.259462585449219, "loss": 0.6545457243919373, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 15.34539453647073, "step_time": 1.1183474807739258} +{"epoch": 0, "iter": 1311, "iter_tflops": 17.89183142369911, "iter_time": 0.9591798019409179, "loss": 0.3992815613746643, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 20.467902137614153, "step_time": 0.8384583435058595} +{"epoch": 0, "iter": 1312, "iter_tflops": 29.36861723598246, "iter_time": 0.5843476791381835, "loss": 0.6090601086616516, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 31.108343941208055, "step_time": 0.5516681747436524} +{"epoch": 0, "iter": 1313, "iter_tflops": 25.049298701976543, "iter_time": 0.5967672500610351, "loss": 0.6627371907234192, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.95233751967204, "step_time": 0.5546309700012207} +{"epoch": 0, "iter": 1314, "iter_tflops": 27.60553738155943, "iter_time": 0.5415073394775392, "loss": 0.5432456731796265, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.414191168277004, "step_time": 0.5082105102539063} +{"epoch": 0, "iter": 1315, "iter_tflops": 25.065930708729972, "iter_time": 0.5963712768554688, "loss": 0.6157838106155396, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.60231423624821, "step_time": 0.561928596496582} +{"epoch": 0, "iter": 1316, "iter_tflops": 26.936773300135698, "iter_time": 0.5549514389038086, "loss": 0.42533618211746216, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.588774572891282, "step_time": 0.5228835906982422} +{"epoch": 0, "iter": 1317, "iter_tflops": 30.081770988831188, "iter_time": 0.685833740234375, "loss": 0.8370818495750427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.172861117481894, "step_time": 0.6412576560974121} +{"epoch": 0, "iter": 1318, "iter_tflops": 32.434016135594966, "iter_time": 0.6360943222045898, "loss": 0.9144291877746582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.69730148675283, "step_time": 0.5779454650878906} +{"epoch": 0, "iter": 1319, "iter_tflops": 34.41892695391516, "iter_time": 0.5994112930297851, "loss": 1.032841682434082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.491118579370685, "step_time": 0.5502928237915039} +{"epoch": 0, "iter": 1320, "iter_tflops": 36.607433636764554, "iter_time": 0.5635766143798828, "loss": 0.9292358160018921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87979761278403, "step_time": 0.5173319511413573} +{"epoch": 0, "iter": 1321, "iter_tflops": 17.795748343565343, "iter_time": 0.9159088287353516, "loss": 0.4755682349205017, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 18.921952015800677, "step_time": 0.8613954315185546} +{"epoch": 0, "iter": 1322, "iter_tflops": 9.534232434827858, "iter_time": 1.7095537719726561, "loss": 0.476983904838562, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 10.958972732890992, "step_time": 1.4873002624511718} +{"epoch": 0, "iter": 1323, "iter_tflops": 8.733613992150449, "iter_time": 1.8662701416015626, "loss": 0.5973418951034546, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 11.023090893209181, "step_time": 1.4786490631103517} +{"epoch": 0, "iter": 1324, "iter_tflops": 14.995382820013473, "iter_time": 1.0869534454345702, "loss": 0.5630269050598145, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 18.071020712747565, "step_time": 0.9019569664001464} +{"epoch": 0, "iter": 1325, "iter_tflops": 20.53472604286756, "iter_time": 0.7518563232421875, "loss": 0.6597923636436462, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.7317216316343, "step_time": 0.7104436492919921} +{"epoch": 0, "iter": 1326, "iter_tflops": 9.672093008715182, "iter_time": 1.5962588043212893, "loss": 0.4859448969364166, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.062793751922165, "step_time": 1.2798994941711426} +{"epoch": 0, "iter": 1327, "iter_tflops": 22.25055061910458, "iter_time": 0.6938778228759765, "loss": 0.5338082909584045, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.97854386968336, "step_time": 0.6438741111755372} +{"epoch": 0, "iter": 1328, "iter_tflops": 23.6973225554171, "iter_time": 0.6515151062011719, "loss": 0.5351430177688599, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.34702814533201, "step_time": 0.6091113929748535} +{"epoch": 0, "iter": 1329, "iter_tflops": 36.272852699848976, "iter_time": 0.5687750473022462, "loss": 1.2670420408248901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.188491221549505, "step_time": 0.5133582496643067} +{"epoch": 0, "iter": 1330, "iter_tflops": 35.55719380950338, "iter_time": 0.580222770690918, "loss": 1.2159175872802734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.966754796924214, "step_time": 0.5294537258148193} +{"epoch": 0, "iter": 1331, "iter_tflops": 38.7303732541217, "iter_time": 0.5326851196289062, "loss": 0.9698737263679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32401966695542, "step_time": 0.48745590972900393} +{"epoch": 0, "iter": 1332, "iter_tflops": 38.70493291989418, "iter_time": 0.5330352478027344, "loss": 0.973784863948822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77167125556429, "step_time": 0.49390155792236334} +{"epoch": 0, "iter": 1333, "iter_tflops": 30.74638936811311, "iter_time": 0.671008659362793, "loss": 0.7426632642745972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.09567974346821, "step_time": 0.6050940666198731} +{"epoch": 0, "iter": 1334, "iter_tflops": 38.59572181484584, "iter_time": 0.5345435333251952, "loss": 0.7545399069786072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22610501760265, "step_time": 0.4885862312316894} +{"epoch": 0, "iter": 1335, "iter_tflops": 39.54894918380521, "iter_time": 0.5216597137451171, "loss": 0.7941955924034119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.12496407243214, "step_time": 0.47840256690979005} +{"epoch": 0, "iter": 1336, "iter_tflops": 37.59285458159934, "iter_time": 0.5488035888671875, "loss": 0.7470657229423523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89025778712069, "step_time": 0.5045478954315186} +{"epoch": 0, "iter": 1337, "iter_tflops": 39.75842599161599, "iter_time": 0.5189112243652344, "loss": 0.014036186039447784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.56310138218579, "step_time": 0.46296359252929686} +{"epoch": 0, "iter": 1338, "iter_tflops": 43.29041212081532, "iter_time": 0.4765741996765137, "loss": 0.01087844930589199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.937091870855454, "step_time": 0.4303784961700439} +{"epoch": 0, "iter": 1339, "iter_tflops": 46.578373937277384, "iter_time": 0.44293288421630855, "loss": 0.0056582554243505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47224859381311, "step_time": 0.4008197441101074} +{"epoch": 0, "iter": 1340, "iter_tflops": 45.9504387085258, "iter_time": 0.4489857788085937, "loss": 0.019788969308137894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.89409314768416, "step_time": 0.40537304496765136} +{"epoch": 0, "iter": 1341, "iter_tflops": 17.80879162566293, "iter_time": 0.9405833129882812, "loss": 0.0672738254070282, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 19.005346636419663, "step_time": 0.8813652572631836} +{"epoch": 0, "iter": 1342, "iter_tflops": 14.983591238961793, "iter_time": 1.117933074951172, "loss": 0.11071476340293884, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 17.12706475905264, "step_time": 0.9780223560333252} +{"epoch": 0, "iter": 1343, "iter_tflops": 42.84648955467775, "iter_time": 0.3909457321166992, "loss": 0.07424551248550415, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 47.156443690253944, "step_time": 0.3552144927978515} +{"epoch": 0, "iter": 1344, "iter_tflops": 46.61833107222365, "iter_time": 0.35931471252441405, "loss": 0.053801197558641434, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 50.64015806797458, "step_time": 0.33077803993225097} +{"epoch": 0, "iter": 1345, "iter_tflops": 25.327953050440346, "iter_time": 0.8145582656860351, "loss": 0.9283478856086731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.63779861445273, "step_time": 0.7745044479370118} +{"epoch": 0, "iter": 1346, "iter_tflops": 15.45058229599729, "iter_time": 1.3352955322265627, "loss": 0.8091250061988831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.22387580024879, "step_time": 1.197819454193115} +{"epoch": 0, "iter": 1347, "iter_tflops": 33.074197872238074, "iter_time": 0.6237821273803711, "loss": 0.9100151062011719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.18469875368752, "step_time": 0.5701607093811035} +{"epoch": 0, "iter": 1348, "iter_tflops": 37.85910701293894, "iter_time": 0.5449440078735351, "loss": 1.003839373588562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71214872060465, "step_time": 0.5067552108764648} +{"epoch": 0, "iter": 1349, "iter_tflops": 22.14188417589964, "iter_time": 0.9317677459716797, "loss": 1.2068432569503784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.7452333445544, "step_time": 0.8688520011901854} +{"epoch": 0, "iter": 1350, "iter_tflops": 16.329028985946227, "iter_time": 1.2634611358642578, "loss": 1.1212022304534912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.1142236917643, "step_time": 1.1389443931579588} +{"epoch": 0, "iter": 1351, "iter_tflops": 43.774462510188044, "iter_time": 0.47130432510375975, "loss": 0.8261159062385559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.96207444668121, "step_time": 0.43931393051147466} +{"epoch": 0, "iter": 1352, "iter_tflops": 41.05514158617639, "iter_time": 0.5025215530395508, "loss": 0.832187831401825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87222633861526, "step_time": 0.47025408172607425} +{"epoch": 0, "iter": 1353, "iter_tflops": 24.084531893571317, "iter_time": 0.8566117706298828, "loss": 0.1962105631828308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.340920699713937, "step_time": 0.8141414337158202} +{"epoch": 0, "iter": 1354, "iter_tflops": 20.46326962596054, "iter_time": 1.0082012252807617, "loss": 0.20423577725887299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.12232865211334, "step_time": 0.8922584667205811} +{"epoch": 0, "iter": 1355, "iter_tflops": 49.45921585408448, "iter_time": 0.4171334533691406, "loss": 0.15034277737140656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.548786865633524, "step_time": 0.38527658081054694} +{"epoch": 0, "iter": 1356, "iter_tflops": 52.32381631857157, "iter_time": 0.39429642105102536, "loss": 0.21656017005443573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.691384799613935, "step_time": 0.36391937828063964} +{"epoch": 0, "iter": 1357, "iter_tflops": 46.76370953702849, "iter_time": 0.4411774368286133, "loss": 0.049130212515592575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.945887071749944, "step_time": 0.40496092414855955} +{"epoch": 0, "iter": 1358, "iter_tflops": 16.70542246678028, "iter_time": 1.2349938201904296, "loss": 0.07383783161640167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.389804076313727, "step_time": 1.011833827972412} +{"epoch": 0, "iter": 1359, "iter_tflops": 43.766462308028224, "iter_time": 0.4713904762268067, "loss": 0.13576915860176086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46727650142174, "step_time": 0.425670576095581} +{"epoch": 0, "iter": 1360, "iter_tflops": 48.460435878040514, "iter_time": 0.4257306632995605, "loss": 0.10741135478019714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47584538662529, "step_time": 0.38580210113525387} +{"epoch": 0, "iter": 1361, "iter_tflops": 24.699749764946855, "iter_time": 0.8352754058837891, "loss": 0.615668535232544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.49388061378933, "step_time": 0.7787116508483887} +{"epoch": 0, "iter": 1362, "iter_tflops": 12.40353725812467, "iter_time": 1.6633233795166016, "loss": 0.8666067123413086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.1072378390198, "step_time": 1.20598624420166} +{"epoch": 0, "iter": 1363, "iter_tflops": 19.638263274756614, "iter_time": 1.0505559082031248, "loss": 0.9803312420845032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.529799723345178, "step_time": 0.8410624523162843} +{"epoch": 0, "iter": 1364, "iter_tflops": 39.93095375395086, "iter_time": 0.516669189453125, "loss": 0.8177111148834229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.63352365463751, "step_time": 0.47282666587829586} +{"epoch": 0, "iter": 1365, "iter_tflops": 14.96006736563964, "iter_time": 1.062123748779297, "loss": 0.43887901306152344, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 15.995401506227392, "step_time": 0.9933756790161132} +{"epoch": 0, "iter": 1366, "iter_tflops": 12.773022252608328, "iter_time": 1.243984588623047, "loss": 0.5486738681793213, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 15.16658632099463, "step_time": 1.0476611213684082} +{"epoch": 0, "iter": 1367, "iter_tflops": 27.245371217978978, "iter_time": 0.583197883605957, "loss": 0.3786585330963135, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 28.939921976308828, "step_time": 0.5490492630004883} +{"epoch": 0, "iter": 1368, "iter_tflops": 29.53729621658885, "iter_time": 0.537945068359375, "loss": 0.633010983467102, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 31.411540647635903, "step_time": 0.5058472938537597} +{"epoch": 0, "iter": 1369, "iter_tflops": 28.92463036177825, "iter_time": 0.7132707748413085, "loss": 1.0791181325912476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.548806803145347, "step_time": 0.6753485870361328} +{"epoch": 0, "iter": 1370, "iter_tflops": 14.746128763392278, "iter_time": 1.3990854034423827, "loss": 1.0911413431167603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.203958512821195, "step_time": 1.1992061882019043} +{"epoch": 0, "iter": 1371, "iter_tflops": 36.004273186970295, "iter_time": 0.5730179138183593, "loss": 1.051397442817688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.36578567627098, "step_time": 0.5240869235992431} +{"epoch": 0, "iter": 1372, "iter_tflops": 37.160058680336526, "iter_time": 0.5551953964233398, "loss": 0.8045376539230347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.41791553286032, "step_time": 0.5104442729949952} +{"epoch": 0, "iter": 1373, "iter_tflops": 15.658519258377284, "iter_time": 0.8739306259155274, "loss": 0.010017755441367626, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 16.70999721653301, "step_time": 0.8189384689331055} +{"epoch": 0, "iter": 1374, "iter_tflops": 13.35525512634633, "iter_time": 1.0246498031616211, "loss": 0.026766471564769745, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 15.122230951243242, "step_time": 0.9049233264923097} +{"epoch": 0, "iter": 1375, "iter_tflops": 30.083687241080007, "iter_time": 0.4548797302246094, "loss": 0.027094196528196335, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 33.34344686177055, "step_time": 0.41040926551818846} +{"epoch": 0, "iter": 1376, "iter_tflops": 37.36024880774653, "iter_time": 0.36628395080566406, "loss": 0.0028114046435803175, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 41.21546727336761, "step_time": 0.33202242851257324} +{"epoch": 0, "iter": 1377, "iter_tflops": 19.161408018391572, "iter_time": 1.0767002868652344, "loss": 0.17780213057994843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.210974107461656, "step_time": 1.0207866973876953} +{"epoch": 0, "iter": 1378, "iter_tflops": 21.542599808035114, "iter_time": 0.9576881942749025, "loss": 0.1653410643339157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.32120050142639, "step_time": 0.783820384979248} +{"epoch": 0, "iter": 1379, "iter_tflops": 48.597685699102826, "iter_time": 0.4245283126831055, "loss": 0.19383999705314636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.59630591979223, "step_time": 0.39225366020202634} +{"epoch": 0, "iter": 1380, "iter_tflops": 55.45715131448079, "iter_time": 0.3720186309814454, "loss": 0.24619899690151215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.36493211668811, "step_time": 0.3417728271484375} +{"epoch": 0, "iter": 1381, "iter_tflops": 33.978252161027555, "iter_time": 0.6071852493286133, "loss": 0.2938988208770752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.46493410312884, "step_time": 0.5657789878845215} +{"epoch": 0, "iter": 1382, "iter_tflops": 48.311628459171025, "iter_time": 0.42704198074340827, "loss": 0.3118777275085449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87345709581199, "step_time": 0.3901975517272949} +{"epoch": 0, "iter": 1383, "iter_tflops": 49.0251784552568, "iter_time": 0.42082648468017575, "loss": 0.21309219300746918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.91207478834703, "step_time": 0.38991276741027836} +{"epoch": 0, "iter": 1384, "iter_tflops": 50.26420409373447, "iter_time": 0.41045300292968756, "loss": 0.29386231303215027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.25385313797279, "step_time": 0.3802696456909179} +{"epoch": 0, "iter": 1385, "iter_tflops": 26.21758141066932, "iter_time": 0.7869182586669923, "loss": 0.887023389339447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.637415571312296, "step_time": 0.7464914169311523} +{"epoch": 0, "iter": 1386, "iter_tflops": 12.386487815308861, "iter_time": 1.6656128692626955, "loss": 0.8518761396408081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.918327891621177, "step_time": 1.3829360542297362} +{"epoch": 0, "iter": 1387, "iter_tflops": 35.485021778134275, "iter_time": 0.5814028701782228, "loss": 0.9619290828704834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.87191494426547, "step_time": 0.5307454891204835} +{"epoch": 0, "iter": 1388, "iter_tflops": 35.56935365124289, "iter_time": 0.5800244140625002, "loss": 0.8038643002510071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.53073061593152, "step_time": 0.535445167541504} +{"epoch": 0, "iter": 1389, "iter_tflops": 21.566596117795445, "iter_time": 0.9566226119995117, "loss": 0.18907324969768524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.43638094353567, "step_time": 0.8803020210266113} +{"epoch": 0, "iter": 1390, "iter_tflops": 37.20937848850608, "iter_time": 0.5544595031738281, "loss": 0.15719673037528992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46744007665707, "step_time": 0.4439903182983399} +{"epoch": 0, "iter": 1391, "iter_tflops": 48.52627389312791, "iter_time": 0.42515305328369135, "loss": 0.24604231119155884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53077383665628, "step_time": 0.39274299621582026} +{"epoch": 0, "iter": 1392, "iter_tflops": 42.35970582349266, "iter_time": 0.48704524993896486, "loss": 0.22893694043159485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27626313349414, "step_time": 0.445824535369873} +{"epoch": 0, "iter": 1393, "iter_tflops": 31.864533535513512, "iter_time": 0.6474625930786132, "loss": 0.4232967495918274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.93272171805036, "step_time": 0.6079999618530273} +{"epoch": 0, "iter": 1394, "iter_tflops": 15.661067321353505, "iter_time": 1.317349136352539, "loss": 0.7258856296539307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.07107384376855, "step_time": 1.1416639480590818} +{"epoch": 0, "iter": 1395, "iter_tflops": 48.20190637444292, "iter_time": 0.42801405715942387, "loss": 0.5616106390953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26956592700172, "step_time": 0.394705659866333} +{"epoch": 0, "iter": 1396, "iter_tflops": 52.16572348950068, "iter_time": 0.39549137115478517, "loss": 0.49792933464050293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.306763621437376, "step_time": 0.3664052448272705} +{"epoch": 0, "iter": 1397, "iter_tflops": 1.75277489278657, "iter_time": 0.8717532119750977, "loss": 1.0673846006393433, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.8384088065673412, "step_time": 0.8311465530395508} +{"epoch": 0, "iter": 1398, "iter_tflops": 1.104222122364207, "iter_time": 1.3837679138183594, "loss": 1.1851695775985718, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.2858271490096993, "step_time": 1.1883301296234132} +{"epoch": 0, "iter": 1399, "iter_tflops": 3.4676440188733673, "iter_time": 0.4406412925720215, "loss": 1.1941734552383423, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.756843205745337, "step_time": 0.40672103118896485} +{"epoch": 0, "iter": 1400, "iter_tflops": 3.857385536679182, "iter_time": 0.3961198921203614, "loss": 1.21523118019104, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 4.146643765271774, "step_time": 0.3684876804351807} +{"epoch": 0, "iter": 1401, "iter_tflops": 32.77766677049202, "iter_time": 0.6294253234863282, "loss": 0.9843817949295044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.73917110279605, "step_time": 0.5938856010437013} +{"epoch": 0, "iter": 1402, "iter_tflops": 14.513490348642287, "iter_time": 1.4215115051269533, "loss": 0.9902501106262207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.70826231432287, "step_time": 1.1650546588897706} +{"epoch": 0, "iter": 1403, "iter_tflops": 37.06193013429701, "iter_time": 0.5566653823852539, "loss": 0.9966299533843994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.65077895338776, "step_time": 0.5075202503204346} +{"epoch": 0, "iter": 1404, "iter_tflops": 40.110933576017864, "iter_time": 0.5143508682250977, "loss": 0.9344085454940796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82398785552973, "step_time": 0.4707717056274414} +{"epoch": 0, "iter": 1405, "iter_tflops": 22.47998740597362, "iter_time": 0.9177537841796876, "loss": 0.09037449210882187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.18769491267138, "step_time": 0.8529582328796388} +{"epoch": 0, "iter": 1406, "iter_tflops": 27.875329057624196, "iter_time": 0.7401201782226563, "loss": 0.05198342353105545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.42243315042736, "step_time": 0.6565721187591552} +{"epoch": 0, "iter": 1407, "iter_tflops": 53.485863967075915, "iter_time": 0.3857298355102539, "loss": 0.18586474657058716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.07076205175768, "step_time": 0.3552750606536865} +{"epoch": 0, "iter": 1408, "iter_tflops": 51.34573788427465, "iter_time": 0.40180732345581055, "loss": 0.14544843137264252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.081826891068104, "step_time": 0.36787484741210935} +{"epoch": 0, "iter": 1409, "iter_tflops": 30.37697817828197, "iter_time": 0.6791687240600586, "loss": 0.02780204638838768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.17050758651624, "step_time": 0.6413045692443848} +{"epoch": 0, "iter": 1410, "iter_tflops": 17.957694371081917, "iter_time": 1.1488720703125, "loss": 0.05780176818370819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.032131236698568, "step_time": 0.9809321403503419} +{"epoch": 0, "iter": 1411, "iter_tflops": 49.1361510019688, "iter_time": 0.4198760604858398, "loss": 0.04917697235941887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07165029148004, "step_time": 0.38155102348327635} +{"epoch": 0, "iter": 1412, "iter_tflops": 57.142668831654944, "iter_time": 0.3610453262329102, "loss": 0.03776688128709793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.11100787460255, "step_time": 0.33216484832763676} +{"epoch": 0, "iter": 1413, "iter_tflops": 29.292633498153535, "iter_time": 0.7043099594116211, "loss": 0.9992769956588745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.8630327916184, "step_time": 0.6684726562500001} +{"epoch": 0, "iter": 1414, "iter_tflops": 16.51529202209672, "iter_time": 1.2492115478515626, "loss": 0.7973877191543579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.47626631861481, "step_time": 0.9606461944580078} +{"epoch": 0, "iter": 1415, "iter_tflops": 36.17444913100862, "iter_time": 0.5703222579956055, "loss": 1.0346285104751587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.23503496597838, "step_time": 0.5258334426879883} +{"epoch": 0, "iter": 1416, "iter_tflops": 34.41209061685583, "iter_time": 0.5995303726196288, "loss": 0.7755734324455261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.12391964183115, "step_time": 0.5557358627319335} +{"epoch": 0, "iter": 1417, "iter_tflops": 35.68613574111222, "iter_time": 0.5781262969970703, "loss": 0.493092805147171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.60800538706977, "step_time": 0.5208819103240966} +{"epoch": 0, "iter": 1418, "iter_tflops": 37.99975102857273, "iter_time": 0.5429270706176759, "loss": 0.527992844581604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.36269234111248, "step_time": 0.48701091384887696} +{"epoch": 0, "iter": 1419, "iter_tflops": 39.337193348628006, "iter_time": 0.5244678573608398, "loss": 0.3785557150840759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59933108342713, "step_time": 0.4843055744171143} +{"epoch": 0, "iter": 1420, "iter_tflops": 41.57657337798968, "iter_time": 0.49621918869018555, "loss": 0.7804308533668518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.32802381609166, "step_time": 0.45515095901489255} +{"epoch": 0, "iter": 1421, "iter_tflops": 17.372817995857208, "iter_time": 1.1875502014160157, "loss": 0.010887935757637024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.556929242865763, "step_time": 1.1117730331420896} +{"epoch": 0, "iter": 1422, "iter_tflops": 17.780215222934544, "iter_time": 1.1603399200439453, "loss": 0.009577919729053974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.911308944011722, "step_time": 0.9415728454589842} +{"epoch": 0, "iter": 1423, "iter_tflops": 43.94280829029925, "iter_time": 0.46949874877929687, "loss": 0.0022663078270852566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.663236245915265, "step_time": 0.42395646286010746} +{"epoch": 0, "iter": 1424, "iter_tflops": 50.30608757628165, "iter_time": 0.410111270904541, "loss": 0.016964882612228394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.44587508279362, "step_time": 0.3720942897796631} +{"epoch": 0, "iter": 1425, "iter_tflops": 17.8103647616161, "iter_time": 1.158375686645508, "loss": 1.011991262435913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.163362711734464, "step_time": 1.076590461730957} +{"epoch": 0, "iter": 1426, "iter_tflops": 25.3406226757244, "iter_time": 0.8141510086059571, "loss": 0.8889952898025513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.388673740955326, "step_time": 0.7267367858886717} +{"epoch": 0, "iter": 1427, "iter_tflops": 47.00586252580436, "iter_time": 0.4389046897888184, "loss": 0.9722286462783813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.54970396906098, "step_time": 0.40813480377197264} +{"epoch": 0, "iter": 1428, "iter_tflops": 43.990231464780784, "iter_time": 0.4689926109313965, "loss": 1.007193922996521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.21149118506503, "step_time": 0.4369930496215821} +{"epoch": 0, "iter": 1429, "iter_tflops": 29.320734808677496, "iter_time": 0.7036349411010743, "loss": 0.6942611336708069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.943355579105702, "step_time": 0.6667374343872071} +{"epoch": 0, "iter": 1430, "iter_tflops": 18.47255756952785, "iter_time": 1.1168509521484375, "loss": 0.5814340710639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.204594707314126, "step_time": 0.9291362342834472} +{"epoch": 0, "iter": 1431, "iter_tflops": 40.95921284286779, "iter_time": 0.5036984863281251, "loss": 0.6393336057662964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.727165248201636, "step_time": 0.46126539421081536} +{"epoch": 0, "iter": 1432, "iter_tflops": 41.344110005841735, "iter_time": 0.49900925445556643, "loss": 0.9685266017913818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17098175712826, "step_time": 0.4567333431243897} +{"epoch": 0, "iter": 1433, "iter_tflops": 14.537896052496077, "iter_time": 1.4191251220703125, "loss": 1.141061782836914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.441683866154632, "step_time": 1.3360650100708007} +{"epoch": 0, "iter": 1434, "iter_tflops": 16.45759678535778, "iter_time": 1.2535908966064453, "loss": 0.8911998867988586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.26151052856695, "step_time": 0.9267607192993162} +{"epoch": 0, "iter": 1435, "iter_tflops": 32.37739909226949, "iter_time": 0.6372066345214844, "loss": 1.0269889831542969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.80523022873855, "step_time": 0.5927584266662598} +{"epoch": 0, "iter": 1436, "iter_tflops": 40.39571960705183, "iter_time": 0.5107247428894042, "loss": 1.0026695728302002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.931878709391036, "step_time": 0.4696155529022216} +{"epoch": 0, "iter": 1437, "iter_tflops": 16.35101868181345, "iter_time": 1.0797555694580077, "loss": 0.6213066577911377, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 17.208842613367196, "step_time": 1.0259320678710937} +{"epoch": 0, "iter": 1438, "iter_tflops": 13.422207820822683, "iter_time": 1.3153650817871092, "loss": 0.6664226651191711, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 18.67442471811074, "step_time": 0.9454161911010742} +{"epoch": 0, "iter": 1439, "iter_tflops": 33.68707894518718, "iter_time": 0.5240912551879884, "loss": 0.5608769059181213, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 36.81234980645132, "step_time": 0.4795972976684571} +{"epoch": 0, "iter": 1440, "iter_tflops": 38.36144439692133, "iter_time": 0.4602304153442383, "loss": 0.6404889225959778, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 41.63338724946238, "step_time": 0.4240611839294433} +{"epoch": 0, "iter": 1441, "iter_tflops": 21.11630222459524, "iter_time": 0.9770220794677733, "loss": 0.7116594314575195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.69262382005466, "step_time": 0.909154167175293} +{"epoch": 0, "iter": 1442, "iter_tflops": 16.36048098154025, "iter_time": 1.2610322113037111, "loss": 0.8179261088371277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.725873488776703, "step_time": 1.0458899841308595} +{"epoch": 0, "iter": 1443, "iter_tflops": 45.308665450864936, "iter_time": 0.4553454246520996, "loss": 0.9620616436004639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.269079678569014, "step_time": 0.4187432289123535} +{"epoch": 0, "iter": 1444, "iter_tflops": 46.955604996609644, "iter_time": 0.43937445831298827, "loss": 0.9992705583572388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.93986472476226, "step_time": 0.40500880050659177} +{"epoch": 0, "iter": 1445, "iter_tflops": 38.976295897845716, "iter_time": 0.5293241195678711, "loss": 0.8128041625022888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56599468698626, "step_time": 0.48468486785888676} +{"epoch": 0, "iter": 1446, "iter_tflops": 44.18636805474493, "iter_time": 0.4669108238220215, "loss": 1.0601483583450317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84288157249236, "step_time": 0.42239714050292965} +{"epoch": 0, "iter": 1447, "iter_tflops": 45.64037410592409, "iter_time": 0.4520360298156738, "loss": 0.928782045841217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.41832459655765, "step_time": 0.41747861099243166} +{"epoch": 0, "iter": 1448, "iter_tflops": 44.546601285322936, "iter_time": 0.4631350746154785, "loss": 0.8538289070129395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.16346298134129, "step_time": 0.42835569190979006} +{"epoch": 0, "iter": 1449, "iter_tflops": 26.964894680410666, "iter_time": 0.7651093673706054, "loss": 0.4407030940055847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.574722170317177, "step_time": 0.7220050430297852} +{"epoch": 0, "iter": 1450, "iter_tflops": 14.231912288154025, "iter_time": 1.4496360778808595, "loss": 0.39660611748695374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.608788617768717, "step_time": 1.1716361618041993} +{"epoch": 0, "iter": 1451, "iter_tflops": 46.506546694641344, "iter_time": 0.4436169738769531, "loss": 0.4085029065608978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52595575088654, "step_time": 0.4083266353607178} +{"epoch": 0, "iter": 1452, "iter_tflops": 47.98888484638274, "iter_time": 0.42991400146484376, "loss": 0.3400115370750427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.354220912013744, "step_time": 0.39406743431091307} +{"epoch": 0, "iter": 1453, "iter_tflops": 37.37468847643291, "iter_time": 0.5520071029663086, "loss": 0.98723965883255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.451261320345864, "step_time": 0.5100234909057617} +{"epoch": 0, "iter": 1454, "iter_tflops": 45.88166586996227, "iter_time": 0.4496587715148926, "loss": 0.969101071357727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.730169911326385, "step_time": 0.4148607082366943} +{"epoch": 0, "iter": 1455, "iter_tflops": 45.39951996621956, "iter_time": 0.45443417739868164, "loss": 0.8513332009315491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78609121896471, "step_time": 0.4228888397216797} +{"epoch": 0, "iter": 1456, "iter_tflops": 47.07088965761098, "iter_time": 0.43829835510253906, "loss": 1.0028573274612427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.476715426507425, "step_time": 0.40872496032714845} +{"epoch": 0, "iter": 1457, "iter_tflops": 27.13030978117785, "iter_time": 0.760444450378418, "loss": 0.9402428865432739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.62810523333925, "step_time": 0.7206587142944335} +{"epoch": 0, "iter": 1458, "iter_tflops": 11.055483070917823, "iter_time": 1.8661412963867188, "loss": 0.887391984462738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.156221932994482, "step_time": 1.3612293090820313} +{"epoch": 0, "iter": 1459, "iter_tflops": 15.796066803169984, "iter_time": 1.3060905456542968, "loss": 0.9938359260559082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.675664938328595, "step_time": 1.1672032470703124} +{"epoch": 0, "iter": 1460, "iter_tflops": 28.03197768107147, "iter_time": 0.7359842300415039, "loss": 0.8533960580825806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.75814911547159, "step_time": 0.5935613384246825} +{"epoch": 0, "iter": 1461, "iter_tflops": 16.511195536017258, "iter_time": 0.9921333465576171, "loss": 0.5687435269355774, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 17.227482641195433, "step_time": 0.9508822631835938} +{"epoch": 0, "iter": 1462, "iter_tflops": 8.184414811144588, "iter_time": 2.001524612426758, "loss": 0.5227237343788147, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 10.93713101357816, "step_time": 1.497770088195801} +{"epoch": 0, "iter": 1463, "iter_tflops": 9.241635405063834, "iter_time": 1.7725550689697265, "loss": 0.456840842962265, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 10.541419297658715, "step_time": 1.5539945068359375} +{"epoch": 0, "iter": 1464, "iter_tflops": 27.498440203417378, "iter_time": 0.5957177047729492, "loss": 0.3983420729637146, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 29.357453571725276, "step_time": 0.5579948425292969} +{"epoch": 0, "iter": 1465, "iter_tflops": 22.186007581761633, "iter_time": 0.7125000457763672, "loss": 0.42327970266342163, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 23.579359114983376, "step_time": 0.6703969917297363} +{"epoch": 0, "iter": 1466, "iter_tflops": 11.803669353813744, "iter_time": 1.3392048645019532, "loss": 0.38244229555130005, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.50081008758562, "step_time": 1.17085799407959} +{"epoch": 0, "iter": 1467, "iter_tflops": 28.942482326042956, "iter_time": 0.5461705474853515, "loss": 0.472738653421402, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.832491932649155, "step_time": 0.5126906852722168} +{"epoch": 0, "iter": 1468, "iter_tflops": 27.975252262937364, "iter_time": 0.5650541152954102, "loss": 0.6862632036209106, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 29.491348048978143, "step_time": 0.5360057258605957} +{"epoch": 0, "iter": 1469, "iter_tflops": 28.667714240811794, "iter_time": 0.7196630096435547, "loss": 0.9111517667770386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.542592653735724, "step_time": 0.6754859924316405} +{"epoch": 0, "iter": 1470, "iter_tflops": 15.21492223825782, "iter_time": 1.355977584838867, "loss": 0.992060124874115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.851764324242467, "step_time": 1.2242690505981446} +{"epoch": 0, "iter": 1471, "iter_tflops": 42.696952448777104, "iter_time": 0.4831982688903809, "loss": 0.9880784749984741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.868615290408016, "step_time": 0.4497867088317871} +{"epoch": 0, "iter": 1472, "iter_tflops": 41.86893649916579, "iter_time": 0.4927541809082031, "loss": 0.882858157157898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87311350971329, "step_time": 0.4597651443481446} +{"epoch": 0, "iter": 1473, "iter_tflops": 4.273595401878009, "iter_time": 0.5231268768310547, "loss": 0.4933153986930847, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 4.638315762648108, "step_time": 0.4819923286437988} +{"epoch": 0, "iter": 1474, "iter_tflops": 4.040009615777396, "iter_time": 0.5533730926513671, "loss": 0.6173865795135498, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 4.424882421739527, "step_time": 0.5052411346435548} +{"epoch": 0, "iter": 1475, "iter_tflops": 4.274807757164258, "iter_time": 0.522978515625, "loss": 0.06906634569168091, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 4.640657661998077, "step_time": 0.48174909210205075} +{"epoch": 0, "iter": 1476, "iter_tflops": 4.219729290092705, "iter_time": 0.5298047485351564, "loss": 0.04331202805042267, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 4.587721380246952, "step_time": 0.4873078441619873} +{"epoch": 0, "iter": 1477, "iter_tflops": 24.180625905790027, "iter_time": 0.8532075881958008, "loss": 1.160090446472168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.752005017933165, "step_time": 0.801145133972168} +{"epoch": 0, "iter": 1478, "iter_tflops": 9.20937148053086, "iter_time": 2.240228179931641, "loss": 1.1733494997024536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.17377771016156, "step_time": 2.0278694992065427} +{"epoch": 0, "iter": 1479, "iter_tflops": 14.155698015462452, "iter_time": 1.45744091796875, "loss": 1.021860122680664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.431360565977545, "step_time": 1.2555925254821776} +{"epoch": 0, "iter": 1480, "iter_tflops": 43.75031459567375, "iter_time": 0.47156446075439457, "loss": 1.0169578790664673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.119934507859384, "step_time": 0.43784215164184576} +{"epoch": 0, "iter": 1481, "iter_tflops": 17.99696436087246, "iter_time": 0.830617919921875, "loss": 0.4052586257457733, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.94368844437558, "step_time": 0.7891072082519531} +{"epoch": 0, "iter": 1482, "iter_tflops": 6.455583626979828, "iter_time": 2.3156080017089846, "loss": 0.45642608404159546, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 8.090916747399799, "step_time": 1.8475781631469725} +{"epoch": 0, "iter": 1483, "iter_tflops": 9.99771018373168, "iter_time": 1.4952024841308593, "loss": 0.4492349624633789, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 11.625656528021318, "step_time": 1.2858285522460937} +{"epoch": 0, "iter": 1484, "iter_tflops": 13.038264043324494, "iter_time": 1.1465177459716798, "loss": 0.7402504682540894, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.849261698224264, "step_time": 0.8871962089538573} +{"epoch": 0, "iter": 1485, "iter_tflops": 20.233078217004305, "iter_time": 0.7893694152832031, "loss": 0.47569164633750916, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 21.373742409134906, "step_time": 0.7472427062988282} +{"epoch": 0, "iter": 1486, "iter_tflops": 9.481732882386021, "iter_time": 1.6844360961914064, "loss": 0.7286961078643799, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.305444675001704, "step_time": 1.2979110908508302} +{"epoch": 0, "iter": 1487, "iter_tflops": 28.244882338763446, "iter_time": 0.5654607772827148, "loss": 0.4329792559146881, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.1332981521836, "step_time": 0.5300240631103517} +{"epoch": 0, "iter": 1488, "iter_tflops": 28.570452016498514, "iter_time": 0.5590171661376954, "loss": 0.4702763855457306, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.516017427104714, "step_time": 0.5233767204284667} +{"epoch": 0, "iter": 1489, "iter_tflops": 37.67299718513781, "iter_time": 0.5168385543823243, "loss": 0.3552543520927429, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 41.00966120399337, "step_time": 0.4747870826721192} +{"epoch": 0, "iter": 1490, "iter_tflops": 46.37951831955711, "iter_time": 0.419815860748291, "loss": 0.5716207027435303, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 50.596089480803336, "step_time": 0.38482929420471196} +{"epoch": 0, "iter": 1491, "iter_tflops": 45.21389104029086, "iter_time": 0.4306388359069824, "loss": 0.5575172305107117, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 49.115536641874954, "step_time": 0.3964296989440918} +{"epoch": 0, "iter": 1492, "iter_tflops": 49.86809857596522, "iter_time": 0.3904471588134766, "loss": 0.2025502473115921, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 54.245648898305284, "step_time": 0.3589386024475098} +{"epoch": 0, "iter": 1493, "iter_tflops": 41.76014275944351, "iter_time": 0.49403790664672853, "loss": 0.010556836612522602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.23689721637682, "step_time": 0.45606782913208005} +{"epoch": 0, "iter": 1494, "iter_tflops": 30.102288548720534, "iter_time": 0.6853662796020508, "loss": 0.0024411834310740232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10417783968022, "step_time": 0.5414391460418702} +{"epoch": 0, "iter": 1495, "iter_tflops": 51.82859366671427, "iter_time": 0.39806392669677737, "loss": 0.010879695415496826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.57457580460873, "step_time": 0.3646707592010498} +{"epoch": 0, "iter": 1496, "iter_tflops": 53.11501401070033, "iter_time": 0.3884230079650879, "loss": 0.007750772871077061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.59319702832475, "step_time": 0.3521073188781738} +{"epoch": 0, "iter": 1497, "iter_tflops": 45.12564294425498, "iter_time": 0.45719223403930664, "loss": 0.6231045126914978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11972814773387, "step_time": 0.4200164432525635} +{"epoch": 0, "iter": 1498, "iter_tflops": 44.889299772775296, "iter_time": 0.4595993614196777, "loss": 0.7185062766075134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.477606664557136, "step_time": 0.4169784049987793} +{"epoch": 0, "iter": 1499, "iter_tflops": 50.77376180922425, "iter_time": 0.40633375930786136, "loss": 0.7079229950904846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.03583527409281, "step_time": 0.3748665466308594} +{"epoch": 0, "iter": 1500, "iter_tflops": 51.78019521963371, "iter_time": 0.3984359931945801, "loss": 0.6989279389381409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.77594296340255, "step_time": 0.3698923301696777} +{"epoch": 0, "iter": 1501, "iter_tflops": 37.1444399665774, "iter_time": 0.5554288482666015, "loss": 0.36901792883872986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.21092587876621, "step_time": 0.513071834564209} +{"epoch": 0, "iter": 1502, "iter_tflops": 47.176642152359555, "iter_time": 0.4373158531188964, "loss": 0.28927648067474365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.175854539761865, "step_time": 0.4031411628723144} +{"epoch": 0, "iter": 1503, "iter_tflops": 53.67205708509209, "iter_time": 0.3843917045593262, "loss": 0.347005158662796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.13027283143226, "step_time": 0.3549113483428955} +{"epoch": 0, "iter": 1504, "iter_tflops": 49.3438418459469, "iter_time": 0.41810877990722656, "loss": 0.16932184994220734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36706619251565, "step_time": 0.3865884895324707} +{"epoch": 0, "iter": 1505, "iter_tflops": 41.6005363779241, "iter_time": 0.49593335342407224, "loss": 0.4044349789619446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2965116062961, "step_time": 0.45546760177612305} +{"epoch": 0, "iter": 1506, "iter_tflops": 48.632066692116595, "iter_time": 0.42422818756103514, "loss": 0.24351508915424347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.623343546231396, "step_time": 0.39205212211608886} +{"epoch": 0, "iter": 1507, "iter_tflops": 49.11181462049194, "iter_time": 0.42008412170410153, "loss": 0.3145686388015747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2154584700567, "step_time": 0.3876898574829102} +{"epoch": 0, "iter": 1508, "iter_tflops": 51.116941488441675, "iter_time": 0.40360578918457035, "loss": 0.25749632716178894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.5887286130736, "step_time": 0.3711380710601807} +{"epoch": 0, "iter": 1509, "iter_tflops": 42.83269657060745, "iter_time": 0.4816669311523437, "loss": 0.26445165276527405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53258323152726, "step_time": 0.4433687553405762} +{"epoch": 0, "iter": 1510, "iter_tflops": 38.253069130876, "iter_time": 0.5393317184448242, "loss": 0.32402488589286804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.57682141855606, "step_time": 0.48456161880493165} +{"epoch": 0, "iter": 1511, "iter_tflops": 41.51356019144605, "iter_time": 0.49697239685058586, "loss": 0.2562936544418335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.471702507840384, "step_time": 0.45371280097961425} +{"epoch": 0, "iter": 1512, "iter_tflops": 45.87847005233971, "iter_time": 0.4496900939941406, "loss": 0.27925509214401245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21211355686215, "step_time": 0.41087881088256833} +{"epoch": 0, "iter": 1513, "iter_tflops": 17.275262525533467, "iter_time": 1.1942564392089845, "loss": 0.9683539867401123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.434043349755544, "step_time": 1.1191843872070313} +{"epoch": 0, "iter": 1514, "iter_tflops": 22.878119222368916, "iter_time": 0.9017827606201172, "loss": 0.878682017326355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.881917199912394, "step_time": 0.7399452972412109} +{"epoch": 0, "iter": 1515, "iter_tflops": 42.79312246890679, "iter_time": 0.48211236572265626, "loss": 1.0407737493515015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.739152612571274, "step_time": 0.44140923309326174} +{"epoch": 0, "iter": 1516, "iter_tflops": 39.656335959931326, "iter_time": 0.5202470932006835, "loss": 0.7546336650848389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.02957771428761, "step_time": 0.4794630718231201} +{"epoch": 0, "iter": 1517, "iter_tflops": 12.067894060254403, "iter_time": 1.709585235595703, "loss": 0.2064601480960846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.001566835157032, "step_time": 1.5868159408569336} +{"epoch": 0, "iter": 1518, "iter_tflops": 29.50149918127884, "iter_time": 0.6993235626220703, "loss": 0.28994739055633545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.45934709504358, "step_time": 0.5507595596313477} +{"epoch": 0, "iter": 1519, "iter_tflops": 52.12884863965368, "iter_time": 0.39577113342285153, "loss": 0.3023829162120819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.49192505477287, "step_time": 0.36520429229736323} +{"epoch": 0, "iter": 1520, "iter_tflops": 57.03819849182041, "iter_time": 0.3617066116333008, "loss": 0.2908276915550232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.71200504679631, "step_time": 0.3343124809265137} +{"epoch": 0, "iter": 1521, "iter_tflops": 31.702040076103355, "iter_time": 0.6507812576293945, "loss": 0.5980108380317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.78530633518024, "step_time": 0.6106528472900391} +{"epoch": 0, "iter": 1522, "iter_tflops": 13.814866781981998, "iter_time": 1.4933979339599608, "loss": 0.5385366678237915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.091942666506256, "step_time": 1.2070654525756834} +{"epoch": 0, "iter": 1523, "iter_tflops": 46.766392947045574, "iter_time": 0.4411521224975586, "loss": 0.5564223527908325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.44413935895306, "step_time": 0.4089889087677002} +{"epoch": 0, "iter": 1524, "iter_tflops": 51.26288659998526, "iter_time": 0.4024567260742188, "loss": 0.6841110587120056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.19572896594171, "step_time": 0.3737806148529053} +{"epoch": 0, "iter": 1525, "iter_tflops": 46.09409930710684, "iter_time": 0.44758643341064447, "loss": 0.2230834662914276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57465518275905, "step_time": 0.40793344879150395} +{"epoch": 0, "iter": 1526, "iter_tflops": 48.65660787929342, "iter_time": 0.42401421737670897, "loss": 0.24741174280643463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.897531807550564, "step_time": 0.3827836418151856} +{"epoch": 0, "iter": 1527, "iter_tflops": 53.07845800899462, "iter_time": 0.3886905212402343, "loss": 0.28018125891685486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.61497704972818, "step_time": 0.35808559799194334} +{"epoch": 0, "iter": 1528, "iter_tflops": 51.40413178401164, "iter_time": 0.40135087966918936, "loss": 0.29323089122772217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.72056290974281, "step_time": 0.3702599620819092} +{"epoch": 0, "iter": 1529, "iter_tflops": 25.97890247405027, "iter_time": 0.7941480026245118, "loss": 1.2104040384292603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.54120528072431, "step_time": 0.7490991516113281} +{"epoch": 0, "iter": 1530, "iter_tflops": 12.12087210725444, "iter_time": 1.7021129608154295, "loss": 1.0060360431671143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.126001983233609, "step_time": 1.3639488830566404} +{"epoch": 0, "iter": 1531, "iter_tflops": 35.44462440689733, "iter_time": 0.5820655136108399, "loss": 0.9142515659332275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.425686958979114, "step_time": 0.5369089050292969} +{"epoch": 0, "iter": 1532, "iter_tflops": 36.490614353229276, "iter_time": 0.5653808212280274, "loss": 0.6989091038703918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.54812640903918, "step_time": 0.5216705665588379} +{"epoch": 0, "iter": 1533, "iter_tflops": 18.200876321854263, "iter_time": 1.133521987915039, "loss": 1.1351048946380615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.75507214449388, "step_time": 1.044344123840332} +{"epoch": 0, "iter": 1534, "iter_tflops": 20.88356973728818, "iter_time": 0.9879102935791017, "loss": 1.0594278573989868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.84603881575387, "step_time": 0.7982303848266602} +{"epoch": 0, "iter": 1535, "iter_tflops": 49.999905489482465, "iter_time": 0.4126226501464844, "loss": 1.0495119094848633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.958234827444834, "step_time": 0.3823530101776123} +{"epoch": 0, "iter": 1536, "iter_tflops": 56.29890148762981, "iter_time": 0.366456413269043, "loss": 0.9101637005805969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.82407417199109, "step_time": 0.3391928901672363} +{"epoch": 0, "iter": 1537, "iter_tflops": 32.66777203733948, "iter_time": 0.6315427169799804, "loss": 0.16834980249404907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.58594583521264, "step_time": 0.5965166778564454} +{"epoch": 0, "iter": 1538, "iter_tflops": 14.560468557251198, "iter_time": 1.4169251098632814, "loss": 0.2082645744085312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.286054607053845, "step_time": 1.2667950592041017} +{"epoch": 0, "iter": 1539, "iter_tflops": 43.36181567976426, "iter_time": 0.4757894287109375, "loss": 0.17632773518562317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.909894506184706, "step_time": 0.4218184013366699} +{"epoch": 0, "iter": 1540, "iter_tflops": 51.75737289100428, "iter_time": 0.3986116828918457, "loss": 0.17201565206050873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.333107669944575, "step_time": 0.3662338962554932} +{"epoch": 0, "iter": 1541, "iter_tflops": 33.37222732663427, "iter_time": 0.6182114639282227, "loss": 0.80401611328125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.52293691138131, "step_time": 0.5807823143005371} +{"epoch": 0, "iter": 1542, "iter_tflops": 10.656378675655986, "iter_time": 1.9360323181152344, "loss": 0.7708939909934998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.45168859606174, "step_time": 1.6568912200927735} +{"epoch": 0, "iter": 1543, "iter_tflops": 16.353397669301014, "iter_time": 1.2615784149169922, "loss": 0.7489238977432251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.44732918770616, "step_time": 1.0608702774047851} +{"epoch": 0, "iter": 1544, "iter_tflops": 37.64619832618168, "iter_time": 0.5480259475708007, "loss": 0.7964860796928406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.094782030491736, "step_time": 0.5020368156433105} +{"epoch": 0, "iter": 1545, "iter_tflops": 12.49857928376583, "iter_time": 1.2778550872802734, "loss": 0.42876583337783813, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 13.353509874348378, "step_time": 1.1960430831909181} +{"epoch": 0, "iter": 1546, "iter_tflops": 15.798598728816943, "iter_time": 1.0109360580444338, "loss": 0.3737562298774719, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 23.16636006283366, "step_time": 0.689420913696289} +{"epoch": 0, "iter": 1547, "iter_tflops": 29.959192264756954, "iter_time": 0.533104263305664, "loss": 0.6311073303222656, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 31.850298923099018, "step_time": 0.5014512786865234} +{"epoch": 0, "iter": 1548, "iter_tflops": 28.380112003516338, "iter_time": 0.562766387939453, "loss": 0.5767496824264526, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.142397105021743, "step_time": 0.5298640670776367} +{"epoch": 0, "iter": 1549, "iter_tflops": 15.821155056051376, "iter_time": 1.3040194244384766, "loss": 0.25100868940353394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.344395864931574, "step_time": 1.2622732391357423} +{"epoch": 0, "iter": 1550, "iter_tflops": 16.96294918745401, "iter_time": 1.2162444915771484, "loss": 0.17219418287277222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.014180185631112, "step_time": 0.8591212921142578} +{"epoch": 0, "iter": 1551, "iter_tflops": 48.1137828559638, "iter_time": 0.4287979927062988, "loss": 0.30303746461868286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44505484323105, "step_time": 0.39338491630554207} +{"epoch": 0, "iter": 1552, "iter_tflops": 50.73886554707464, "iter_time": 0.40661322021484375, "loss": 0.3137419819831848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.77914056089242, "step_time": 0.3766231689453125} +{"epoch": 0, "iter": 1553, "iter_tflops": 38.173014197456936, "iter_time": 0.4420361442565917, "loss": 0.11121891438961029, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 41.59667263260349, "step_time": 0.4056538887023926} +{"epoch": 0, "iter": 1554, "iter_tflops": 35.83906720562986, "iter_time": 0.4708228569030762, "loss": 0.10096801817417145, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 40.02993237964072, "step_time": 0.42153086471557627} +{"epoch": 0, "iter": 1555, "iter_tflops": 44.9178722914228, "iter_time": 0.3756600914001465, "loss": 0.104767344892025, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 49.178001571387256, "step_time": 0.34311788749694827} +{"epoch": 0, "iter": 1556, "iter_tflops": 37.86748385712123, "iter_time": 0.4456026725769043, "loss": 0.12091303616762161, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 41.123521622568724, "step_time": 0.4103211822509766} +{"epoch": 0, "iter": 1557, "iter_tflops": 30.88894772250266, "iter_time": 0.6679118270874024, "loss": 0.00463574705645442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.803199974981695, "step_time": 0.6289353942871094} +{"epoch": 0, "iter": 1558, "iter_tflops": 25.04847404472793, "iter_time": 0.8236467208862304, "loss": 0.011449570767581463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.143681272088802, "step_time": 0.7330630741119385} +{"epoch": 0, "iter": 1559, "iter_tflops": 53.72537993299971, "iter_time": 0.3840101928710938, "loss": 0.003643870120868087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.74797602937759, "step_time": 0.35117964744567876} +{"epoch": 0, "iter": 1560, "iter_tflops": 54.618957892488694, "iter_time": 0.37772770309448245, "loss": 0.029366355389356613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.958270662415764, "step_time": 0.34409086990356447} +{"epoch": 0, "iter": 1561, "iter_tflops": 28.87953415518549, "iter_time": 0.7143845672607422, "loss": 0.26859408617019653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.427070048046915, "step_time": 0.6780506134033204} +{"epoch": 0, "iter": 1562, "iter_tflops": 12.818280649917687, "iter_time": 1.6095055236816407, "loss": 0.43569278717041016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.031552643540001, "step_time": 1.37251912689209} +{"epoch": 0, "iter": 1563, "iter_tflops": 49.53277732258748, "iter_time": 0.41651396560668946, "loss": 0.3697861433029175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.812204032193925, "step_time": 0.38339060592651364} +{"epoch": 0, "iter": 1564, "iter_tflops": 54.443424276365896, "iter_time": 0.37894555282592773, "loss": 0.3880822956562042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.923259334941996, "step_time": 0.35013496780395503} +{"epoch": 0, "iter": 1565, "iter_tflops": 33.67921156267613, "iter_time": 0.5058961791992188, "loss": 0.03861257806420326, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 36.19534669992618, "step_time": 0.47072858810424806} +{"epoch": 0, "iter": 1566, "iter_tflops": 13.06467183883529, "iter_time": 1.3041417846679686, "loss": 0.05571332946419716, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 15.787826211038626, "step_time": 1.0791976184844971} +{"epoch": 0, "iter": 1567, "iter_tflops": 34.47861643988774, "iter_time": 0.49416670989990236, "loss": 0.05850915610790253, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 38.04842527066878, "step_time": 0.4478026180267335} +{"epoch": 0, "iter": 1568, "iter_tflops": 38.88603896560256, "iter_time": 0.438156852722168, "loss": 0.04138856381177902, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 42.94065748373835, "step_time": 0.3967844333648682} +{"epoch": 0, "iter": 1569, "iter_tflops": 36.13157793377336, "iter_time": 0.5709989624023438, "loss": 0.06663160026073456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3356987434481, "step_time": 0.5114847183227539} +{"epoch": 0, "iter": 1570, "iter_tflops": 44.16454555465749, "iter_time": 0.46714153289794924, "loss": 0.12012284994125366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39226105089888, "step_time": 0.426330430984497} +{"epoch": 0, "iter": 1571, "iter_tflops": 42.927642450892705, "iter_time": 0.48060159683227544, "loss": 0.08896558731794357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.920712671161716, "step_time": 0.4397011966705322} +{"epoch": 0, "iter": 1572, "iter_tflops": 44.047844530814594, "iter_time": 0.4683791847229004, "loss": 0.14362813532352448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.224842992435484, "step_time": 0.42781048583984377} +{"epoch": 0, "iter": 1573, "iter_tflops": 36.2391740622933, "iter_time": 0.5693036346435547, "loss": 0.1540381908416748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.622688402625464, "step_time": 0.520688886642456} +{"epoch": 0, "iter": 1574, "iter_tflops": 47.93317190152532, "iter_time": 0.43041369247436534, "loss": 0.2398419827222824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.11064239187948, "step_time": 0.3959094066619873} +{"epoch": 0, "iter": 1575, "iter_tflops": 52.10773865524574, "iter_time": 0.395931468963623, "loss": 0.1866605579853058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.45287051296281, "step_time": 0.36545694351196295} +{"epoch": 0, "iter": 1576, "iter_tflops": 51.19597182080067, "iter_time": 0.4029827499389649, "loss": 0.16643890738487244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.46243268169969, "step_time": 0.3719832057952881} +{"epoch": 0, "iter": 1577, "iter_tflops": 38.94963953837413, "iter_time": 0.5296863784790038, "loss": 0.9590551257133484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29272037138454, "step_time": 0.4878166580200196} +{"epoch": 0, "iter": 1578, "iter_tflops": 43.829205737431415, "iter_time": 0.47071566009521487, "loss": 1.0666110515594482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.304366101845396, "step_time": 0.4361350803375244} +{"epoch": 0, "iter": 1579, "iter_tflops": 46.45956837438792, "iter_time": 0.4440655441284179, "loss": 1.087000846862793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.02424134520825, "step_time": 0.41242191696166985} +{"epoch": 0, "iter": 1580, "iter_tflops": 45.80493646541433, "iter_time": 0.450412010192871, "loss": 0.8707130551338196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33755427882737, "step_time": 0.4181620635986328} +{"epoch": 0, "iter": 1581, "iter_tflops": 2.2509080529199745, "iter_time": 0.7835500411987304, "loss": 0.039931125938892365, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 2.36334218217319, "step_time": 0.7462732696533204} +{"epoch": 0, "iter": 1582, "iter_tflops": 1.345139196847239, "iter_time": 1.3111647491455078, "loss": 0.06929145753383636, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 1.588975755359216, "step_time": 1.1099597282409666} +{"epoch": 0, "iter": 1583, "iter_tflops": 3.3192150957491235, "iter_time": 0.5313602905273439, "loss": 0.15115736424922943, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 3.6471180046571976, "step_time": 0.48358706665039064} +{"epoch": 0, "iter": 1584, "iter_tflops": 3.3284566327680585, "iter_time": 0.5298849563598632, "loss": 0.040442220866680145, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 3.6349080639111184, "step_time": 0.48521147346496585} +{"epoch": 0, "iter": 1585, "iter_tflops": 31.411169188361946, "iter_time": 0.6568075637817382, "loss": 0.010299512185156345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.669141611294, "step_time": 0.5950852127075196} +{"epoch": 0, "iter": 1586, "iter_tflops": 54.78235605110006, "iter_time": 0.37660106277465816, "loss": 0.008687986060976982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.34612785620209, "step_time": 0.3418793258666992} +{"epoch": 0, "iter": 1587, "iter_tflops": 58.4149918152329, "iter_time": 0.3531814842224121, "loss": 0.026389803737401962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.21653152982849, "step_time": 0.321273868560791} +{"epoch": 0, "iter": 1588, "iter_tflops": 54.22880577141911, "iter_time": 0.38044528579711917, "loss": 0.008423889055848122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.47130296038087, "step_time": 0.3469083824157715} +{"epoch": 0, "iter": 1589, "iter_tflops": 29.647119864292524, "iter_time": 0.6958886260986328, "loss": 0.8578693270683289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.37694957890254, "step_time": 0.6575238761901856} +{"epoch": 0, "iter": 1590, "iter_tflops": 12.202407287911301, "iter_time": 1.6907396240234376, "loss": 0.9196938872337341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.602353417822364, "step_time": 1.322306510925293} +{"epoch": 0, "iter": 1591, "iter_tflops": 10.972123970785074, "iter_time": 1.8803190307617186, "loss": 1.1112622022628784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.732673785767346, "step_time": 1.6203268737792969} +{"epoch": 0, "iter": 1592, "iter_tflops": 19.189325597245453, "iter_time": 1.0751338500976562, "loss": 0.8909550309181213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.032659455450585, "step_time": 0.8957321472167967} +{"epoch": 0, "iter": 1593, "iter_tflops": 18.737382725191875, "iter_time": 0.8414506454467774, "loss": 0.39114245772361755, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 20.252269506202943, "step_time": 0.7785094299316405} +{"epoch": 0, "iter": 1594, "iter_tflops": 27.458204756174993, "iter_time": 0.5742029724121094, "loss": 0.4621277451515198, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 29.293526495899382, "step_time": 0.5382275428771973} +{"epoch": 0, "iter": 1595, "iter_tflops": 28.83465605631079, "iter_time": 0.5467928161621094, "loss": 0.4456496834754944, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.73220376410042, "step_time": 0.5130313110351562} +{"epoch": 0, "iter": 1596, "iter_tflops": 28.265072622067027, "iter_time": 0.557811508178711, "loss": 0.7114192843437195, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.00236425115841, "step_time": 0.525511344909668} +{"epoch": 0, "iter": 1597, "iter_tflops": 21.836209180436878, "iter_time": 0.6043325424194336, "loss": 0.023442892357707024, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 23.42403545190293, "step_time": 0.5633671379089356} +{"epoch": 0, "iter": 1598, "iter_tflops": 10.655229350417873, "iter_time": 1.2384840698242188, "loss": 0.001963128102943301, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 12.726040382622218, "step_time": 1.0369550476074219} +{"epoch": 0, "iter": 1599, "iter_tflops": 34.55202810170353, "iter_time": 0.3819264030456543, "loss": 0.057186104357242584, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 37.86171610882403, "step_time": 0.3485402450561523} +{"epoch": 0, "iter": 1600, "iter_tflops": 37.66606496752078, "iter_time": 0.3503506889343262, "loss": 0.0040793875232338905, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 41.495525627569506, "step_time": 0.3180181865692139} +{"epoch": 0, "iter": 1601, "iter_tflops": 46.387716480910036, "iter_time": 0.4447533760070801, "loss": 0.49031969904899597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84500537186376, "step_time": 0.4057644081115722} +{"epoch": 0, "iter": 1602, "iter_tflops": 46.183951524411704, "iter_time": 0.4467156410217285, "loss": 0.6284011602401733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.90965126654124, "step_time": 0.3974423446655273} +{"epoch": 0, "iter": 1603, "iter_tflops": 52.94621314372905, "iter_time": 0.38966136169433596, "loss": 0.45784875750541687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.45410401291709, "step_time": 0.35908824729919436} +{"epoch": 0, "iter": 1604, "iter_tflops": 51.475872934535836, "iter_time": 0.40079152297973636, "loss": 0.559536874294281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.51450109979025, "step_time": 0.37163431358337395} +{"epoch": 0, "iter": 1605, "iter_tflops": 22.315888434546014, "iter_time": 0.9245024490356446, "loss": 0.4965219497680664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.310841891145166, "step_time": 0.8850428314208985} +{"epoch": 0, "iter": 1606, "iter_tflops": 18.557985370560946, "iter_time": 1.1117097625732422, "loss": 0.6354783773422241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.452365702080936, "step_time": 1.0087387351989745} +{"epoch": 0, "iter": 1607, "iter_tflops": 40.829977448749446, "iter_time": 0.5052927970886231, "loss": 0.4856330454349518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.84496007675114, "step_time": 0.3904079685211182} +{"epoch": 0, "iter": 1608, "iter_tflops": 49.03013937452664, "iter_time": 0.4207839050292969, "loss": 0.6541013717651367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.04767571460816, "step_time": 0.3889160690307617} +{"epoch": 0, "iter": 1609, "iter_tflops": 42.89558384172172, "iter_time": 0.48096078109741214, "loss": 0.6661098003387451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.545744028123465, "step_time": 0.44324339294433596} +{"epoch": 0, "iter": 1610, "iter_tflops": 45.33585000537821, "iter_time": 0.4550723876953125, "loss": 0.6245191097259521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.783156301224466, "step_time": 0.40625859069824216} +{"epoch": 0, "iter": 1611, "iter_tflops": 50.03030482914701, "iter_time": 0.41237193298339847, "loss": 0.6673768162727356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.459791823079215, "step_time": 0.37883166313171385} +{"epoch": 0, "iter": 1612, "iter_tflops": 52.00368324783782, "iter_time": 0.3967236976623535, "loss": 0.6467686295509338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.35964200454575, "step_time": 0.36606147193908695} +{"epoch": 0, "iter": 1613, "iter_tflops": 41.23413269707994, "iter_time": 0.5003401832580567, "loss": 0.7574467062950134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.79353411080222, "step_time": 0.4605819549560547} +{"epoch": 0, "iter": 1614, "iter_tflops": 43.09842722303485, "iter_time": 0.4786971321105957, "loss": 0.6946642398834229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.394286717910134, "step_time": 0.42631258583068843} +{"epoch": 0, "iter": 1615, "iter_tflops": 48.28649109173386, "iter_time": 0.4272642936706543, "loss": 0.7633622884750366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44697347687009, "step_time": 0.3933705253601074} +{"epoch": 0, "iter": 1616, "iter_tflops": 46.12994186299364, "iter_time": 0.4472386627197266, "loss": 0.7216573357582092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.87838409424446, "step_time": 0.4136279449462891} +{"epoch": 0, "iter": 1617, "iter_tflops": 46.03465473318232, "iter_time": 0.44816440200805663, "loss": 0.4266818165779114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.199189910061065, "step_time": 0.4109845905303955} +{"epoch": 0, "iter": 1618, "iter_tflops": 46.12160196032587, "iter_time": 0.44731953430175786, "loss": 0.4568680226802826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.349989428963916, "step_time": 0.39409928703308106} +{"epoch": 0, "iter": 1619, "iter_tflops": 50.643188594234985, "iter_time": 0.4073814086914063, "loss": 0.47237858176231384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.029005158118984, "step_time": 0.3749130744934082} +{"epoch": 0, "iter": 1620, "iter_tflops": 53.337759382271294, "iter_time": 0.3868009033203125, "loss": 0.3203320801258087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.82735012334307, "step_time": 0.35677051544189453} +{"epoch": 0, "iter": 1621, "iter_tflops": 43.376126197255324, "iter_time": 0.47563245773315427, "loss": 0.206761434674263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.867954571439526, "step_time": 0.4401961574554444} +{"epoch": 0, "iter": 1622, "iter_tflops": 39.736803920440416, "iter_time": 0.5191935806274414, "loss": 0.37494027614593506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65820832967806, "step_time": 0.47255932617187507} +{"epoch": 0, "iter": 1623, "iter_tflops": 41.93881030007289, "iter_time": 0.4919332084655762, "loss": 0.2879849970340729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.77896749281532, "step_time": 0.4506675148010254} +{"epoch": 0, "iter": 1624, "iter_tflops": 41.49479811075321, "iter_time": 0.49719710540771483, "loss": 0.35632696747779846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6699196337919, "step_time": 0.4517435913085937} +{"epoch": 0, "iter": 1625, "iter_tflops": 27.962779552693497, "iter_time": 0.7378055343627931, "loss": 0.7544582486152649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.122731940952722, "step_time": 0.6849011421203614} +{"epoch": 0, "iter": 1626, "iter_tflops": 45.205898461216414, "iter_time": 0.45638056564331053, "loss": 1.001895785331726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.011981707905896, "step_time": 0.42093979454040525} +{"epoch": 0, "iter": 1627, "iter_tflops": 45.99053211131657, "iter_time": 0.44859436416625975, "loss": 0.7891120910644531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60127862100681, "step_time": 0.41593874359130856} +{"epoch": 0, "iter": 1628, "iter_tflops": 49.57259007388211, "iter_time": 0.41617945480346685, "loss": 0.8772763013839722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.30591673444242, "step_time": 0.38703196144104} +{"epoch": 0, "iter": 1629, "iter_tflops": 24.519018080033593, "iter_time": 0.8414322891235351, "loss": 1.1787558794021606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.805043520806777, "step_time": 0.7994984970092773} +{"epoch": 0, "iter": 1630, "iter_tflops": 9.770856257045951, "iter_time": 2.111492889404297, "loss": 1.032907485961914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.270823561896135, "step_time": 1.8304867782592773} +{"epoch": 0, "iter": 1631, "iter_tflops": 13.543008174546767, "iter_time": 1.5233759918212892, "loss": 1.1203255653381348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.0799836269263, "step_time": 1.207910614013672} +{"epoch": 0, "iter": 1632, "iter_tflops": 24.27036766642922, "iter_time": 0.8500527801513672, "loss": 0.8498689532279968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.614015931039784, "step_time": 0.6966665229797364} +{"epoch": 0, "iter": 1633, "iter_tflops": 13.87189881267203, "iter_time": 1.1159304656982423, "loss": 0.41762590408325195, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.904222432470117, "step_time": 1.0386368408203124} +{"epoch": 0, "iter": 1634, "iter_tflops": 12.629307117634617, "iter_time": 1.225726348876953, "loss": 0.5039750337600708, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 15.003306415149606, "step_time": 1.0317775344848634} +{"epoch": 0, "iter": 1635, "iter_tflops": 23.013580355652152, "iter_time": 0.6726495513916015, "loss": 0.46863287687301636, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.64572439037923, "step_time": 0.6281038551330566} +{"epoch": 0, "iter": 1636, "iter_tflops": 23.387131094814485, "iter_time": 0.6619056625366211, "loss": 0.5017627477645874, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.233001369325994, "step_time": 0.613485263824463} +{"epoch": 0, "iter": 1637, "iter_tflops": 27.77845998230537, "iter_time": 0.7427011260986329, "loss": 0.3811509311199188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.50106254053612, "step_time": 0.6764057312011719} +{"epoch": 0, "iter": 1638, "iter_tflops": 35.79191237554911, "iter_time": 0.5764177474975586, "loss": 0.3316677510738373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83145641232881, "step_time": 0.5312984733581543} +{"epoch": 0, "iter": 1639, "iter_tflops": 45.78843293334492, "iter_time": 0.45057435226440434, "loss": 0.23476427793502808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98895188948967, "step_time": 0.41271306419372555} +{"epoch": 0, "iter": 1640, "iter_tflops": 43.72039660676546, "iter_time": 0.4718871536254883, "loss": 0.39753013849258423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.604462323049816, "step_time": 0.433385705947876} +{"epoch": 0, "iter": 1641, "iter_tflops": 22.691246136665246, "iter_time": 0.9092093658447266, "loss": 0.0837649255990982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.282727831618104, "step_time": 0.8496200942993164} +{"epoch": 0, "iter": 1642, "iter_tflops": 15.746812270332743, "iter_time": 1.3101758728027344, "loss": 0.10651912540197372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.26575606922973, "step_time": 1.1294957313537597} +{"epoch": 0, "iter": 1643, "iter_tflops": 39.32426908648737, "iter_time": 0.5246402282714844, "loss": 0.13260693848133087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.558427173683135, "step_time": 0.47364183807373045} +{"epoch": 0, "iter": 1644, "iter_tflops": 40.58154303449867, "iter_time": 0.5083861274719239, "loss": 0.1341283768415451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.660147117508544, "step_time": 0.46195758056640623} +{"epoch": 0, "iter": 1645, "iter_tflops": 15.865623566219284, "iter_time": 1.2011745910644531, "loss": 0.20298171043395996, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 16.888228596062635, "step_time": 1.1284418487548828} +{"epoch": 0, "iter": 1646, "iter_tflops": 15.830515977300964, "iter_time": 1.203838455200195, "loss": 0.19669575989246368, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 20.369547075466457, "step_time": 0.9355821132659912} +{"epoch": 0, "iter": 1647, "iter_tflops": 41.843942531008125, "iter_time": 0.45543949127197264, "loss": 0.19285590946674347, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 45.83935229958811, "step_time": 0.41574287033081053} +{"epoch": 0, "iter": 1648, "iter_tflops": 38.84110795553824, "iter_time": 0.4906498527526855, "loss": 0.18162328004837036, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 42.589793195406514, "step_time": 0.4474636402130127} +{"epoch": 0, "iter": 1649, "iter_tflops": 32.08702668731185, "iter_time": 0.6429730529785157, "loss": 0.6897439956665039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.70564306772995, "step_time": 0.5944593353271486} +{"epoch": 0, "iter": 1650, "iter_tflops": 13.371598627675807, "iter_time": 1.542904037475586, "loss": 1.166008710861206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.398817760270543, "step_time": 1.1857755966186525} +{"epoch": 0, "iter": 1651, "iter_tflops": 13.889144385442535, "iter_time": 1.485411407470703, "loss": 0.840472936630249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.731829838908451, "step_time": 1.3114236373901367} +{"epoch": 0, "iter": 1652, "iter_tflops": 38.96589051419818, "iter_time": 0.5294654693603515, "loss": 1.0345163345336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45152825754957, "step_time": 0.4859917736053467} +{"epoch": 0, "iter": 1653, "iter_tflops": 13.703960776885028, "iter_time": 1.3183759765625, "loss": 0.6321316361427307, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 14.891455833913955, "step_time": 1.2132442169189452} +{"epoch": 0, "iter": 1654, "iter_tflops": 13.68169773628723, "iter_time": 1.3205212554931642, "loss": 0.531952440738678, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 17.326112049387934, "step_time": 1.0427597732543945} +{"epoch": 0, "iter": 1655, "iter_tflops": 28.28095510036807, "iter_time": 0.6388388442993164, "loss": 0.5843949913978577, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 30.368161986909914, "step_time": 0.5949313850402832} +{"epoch": 0, "iter": 1656, "iter_tflops": 28.754844405704674, "iter_time": 0.6283105697631836, "loss": 0.39114516973495483, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 30.79232743836586, "step_time": 0.5867361831665039} +{"epoch": 0, "iter": 1657, "iter_tflops": 8.015710176282544, "iter_time": 1.1271009674072265, "loss": 0.021892528980970383, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 8.552547467535772, "step_time": 1.056353645324707} +{"epoch": 0, "iter": 1658, "iter_tflops": 8.790586058125259, "iter_time": 1.0277488479614258, "loss": 0.01478524412959814, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 11.762268241963406, "step_time": 0.7680928974151611} +{"epoch": 0, "iter": 1659, "iter_tflops": 19.85456962394312, "iter_time": 0.45503452682495116, "loss": 0.013081287033855915, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 22.033154180534435, "step_time": 0.4100418224334717} +{"epoch": 0, "iter": 1660, "iter_tflops": 19.59993744804918, "iter_time": 0.4609460983276367, "loss": 0.012123833410441875, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 21.769913224713648, "step_time": 0.4150000324249268} +{"epoch": 0, "iter": 1661, "iter_tflops": 18.158481881534833, "iter_time": 1.1361684112548829, "loss": 0.6520427465438843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.24609600345598, "step_time": 1.0719625167846678} +{"epoch": 0, "iter": 1662, "iter_tflops": 14.249364143956692, "iter_time": 1.4478606414794921, "loss": 0.7914021015167236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.15319100730588, "step_time": 0.854176721572876} +{"epoch": 0, "iter": 1663, "iter_tflops": 50.895872676569326, "iter_time": 0.405358871459961, "loss": 0.6808803081512451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.39251603068265, "step_time": 0.3724527244567871} +{"epoch": 0, "iter": 1664, "iter_tflops": 51.04252704878764, "iter_time": 0.40419420242309567, "loss": 0.6753699779510498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.2061155714282, "step_time": 0.37371029090881347} +{"epoch": 0, "iter": 1665, "iter_tflops": 38.623987064488794, "iter_time": 0.5341523513793945, "loss": 0.09029673784971237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57546112689341, "step_time": 0.4962324638366699} +{"epoch": 0, "iter": 1666, "iter_tflops": 11.761335111215754, "iter_time": 1.7541455383300781, "loss": 0.13936376571655273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.440124781210613, "step_time": 1.428733741760254} +{"epoch": 0, "iter": 1667, "iter_tflops": 44.05278717123875, "iter_time": 0.4683266334533691, "loss": 0.09938095510005951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.578325310871165, "step_time": 0.42469750404357914} +{"epoch": 0, "iter": 1668, "iter_tflops": 36.54647050348655, "iter_time": 0.5645167160034179, "loss": 0.06743352115154266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09661499131789, "step_time": 0.5145345439910889} +{"epoch": 0, "iter": 1669, "iter_tflops": 18.846765044084666, "iter_time": 1.094675582885742, "loss": 0.004386417102068663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.937254040215134, "step_time": 1.034801155090332} +{"epoch": 0, "iter": 1670, "iter_tflops": 16.749283078367277, "iter_time": 1.231759796142578, "loss": 0.01041143387556076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.78416096198636, "step_time": 0.9926353797912599} +{"epoch": 0, "iter": 1671, "iter_tflops": 50.89875859889089, "iter_time": 0.40533588790893554, "loss": 0.005418035201728344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.657826959301396, "step_time": 0.3641349239349365} +{"epoch": 0, "iter": 1672, "iter_tflops": 49.615262339966264, "iter_time": 0.41582151412963864, "loss": 0.0014038033550605178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.300695842694246, "step_time": 0.3799416046142578} +{"epoch": 0, "iter": 1673, "iter_tflops": 21.456742314660975, "iter_time": 0.9615203094482421, "loss": 1.0574194192886353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.089093663123087, "step_time": 0.8935428047180175} +{"epoch": 0, "iter": 1674, "iter_tflops": 19.929008022720772, "iter_time": 1.0352293243408202, "loss": 0.7526337504386902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.647407836553047, "step_time": 0.7742251567840577} +{"epoch": 0, "iter": 1675, "iter_tflops": 39.0664426376414, "iter_time": 0.5281026916503906, "loss": 0.8518498539924622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76104596017705, "step_time": 0.49402722167968754} +{"epoch": 0, "iter": 1676, "iter_tflops": 46.11878241794238, "iter_time": 0.447346881866455, "loss": 1.2872177362442017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.3341392818348, "step_time": 0.4181910095214843} +{"epoch": 0, "iter": 1677, "iter_tflops": 25.39508964571285, "iter_time": 0.8124048309326173, "loss": 0.036470141261816025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.744638489997662, "step_time": 0.7714104461669922} +{"epoch": 0, "iter": 1678, "iter_tflops": 13.54454285057847, "iter_time": 1.5232033843994137, "loss": 0.05068160593509674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.28291652503309, "step_time": 1.193727544784546} +{"epoch": 0, "iter": 1679, "iter_tflops": 49.44657758585594, "iter_time": 0.4172400703430176, "loss": 0.05380982905626297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.70724400899412, "step_time": 0.3841398658752441} +{"epoch": 0, "iter": 1680, "iter_tflops": 52.78855710725601, "iter_time": 0.3908251075744629, "loss": 0.06077204644680023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.662110777385664, "step_time": 0.35779289436340334} +{"epoch": 0, "iter": 1681, "iter_tflops": 36.751663381499, "iter_time": 0.5613648910522461, "loss": 0.13941772282123566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62868733993202, "step_time": 0.520610065460205} +{"epoch": 0, "iter": 1682, "iter_tflops": 50.39490521308326, "iter_time": 0.4093884773254395, "loss": 0.25239500403404236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.153903244561654, "step_time": 0.37406406974792483} +{"epoch": 0, "iter": 1683, "iter_tflops": 51.16868140788714, "iter_time": 0.4031976776123047, "loss": 0.12053724378347397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.78704513428211, "step_time": 0.369818717956543} +{"epoch": 0, "iter": 1684, "iter_tflops": 51.46838427043366, "iter_time": 0.4008498382568359, "loss": 0.1936989426612854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.593979068195466, "step_time": 0.3711030197143555} +{"epoch": 0, "iter": 1685, "iter_tflops": 35.06629356143338, "iter_time": 0.5883454284667969, "loss": 0.6321770548820496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.415576734140245, "step_time": 0.5514038619995117} +{"epoch": 0, "iter": 1686, "iter_tflops": 8.619762171517069, "iter_time": 2.3934643554687494, "loss": 0.6360107064247131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.749364372056899, "step_time": 1.9192849731445314} +{"epoch": 0, "iter": 1687, "iter_tflops": 9.40098992094188, "iter_time": 2.194566070556641, "loss": 0.782240629196167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.43144413501171, "step_time": 1.8047670326232912} +{"epoch": 0, "iter": 1688, "iter_tflops": 32.3459276314493, "iter_time": 0.6378266143798829, "loss": 0.6243612170219421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.91581944442817, "step_time": 0.5168650875091553} +{"epoch": 0, "iter": 1689, "iter_tflops": 13.14786897248349, "iter_time": 1.2521687927246092, "loss": 0.4634993076324463, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 13.892219843893587, "step_time": 1.1850770721435546} +{"epoch": 0, "iter": 1690, "iter_tflops": 12.675654427289205, "iter_time": 1.2988166656494142, "loss": 0.5197893381118774, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 15.171409756487721, "step_time": 1.0851563224792482} +{"epoch": 0, "iter": 1691, "iter_tflops": 24.948488621183095, "iter_time": 0.6598937301635743, "loss": 0.33454516530036926, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.84906408384196, "step_time": 0.613181568145752} +{"epoch": 0, "iter": 1692, "iter_tflops": 25.225426287761145, "iter_time": 0.6526490783691405, "loss": 0.4347876310348511, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.997955725718914, "step_time": 0.6097999191284179} +{"epoch": 0, "iter": 1693, "iter_tflops": 20.714679678156877, "iter_time": 0.981945556640625, "loss": 0.07866238802671432, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 22.05058182535151, "step_time": 0.9224558258056641} +{"epoch": 0, "iter": 1694, "iter_tflops": 11.065820208980508, "iter_time": 1.8381545410156253, "loss": 0.08084756880998611, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 13.456159452289938, "step_time": 1.511626533508301} +{"epoch": 0, "iter": 1695, "iter_tflops": 13.681517832200496, "iter_time": 1.4867274169921876, "loss": 0.15427058935165405, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 17.12759173178366, "step_time": 1.1875976486206055} +{"epoch": 0, "iter": 1696, "iter_tflops": 25.479633386845396, "iter_time": 0.7983116302490235, "loss": 0.08581550419330597, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 31.93040030456686, "step_time": 0.6370320281982422} +{"epoch": 0, "iter": 1697, "iter_tflops": 20.274374225079846, "iter_time": 0.8180993270874024, "loss": 0.5815357565879822, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 21.468170628932416, "step_time": 0.7726066741943359} +{"epoch": 0, "iter": 1698, "iter_tflops": 11.239400561044267, "iter_time": 1.475741683959961, "loss": 0.6237165331840515, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 12.783682160289281, "step_time": 1.2974706115722658} +{"epoch": 0, "iter": 1699, "iter_tflops": 14.59151273944132, "iter_time": 1.1367191467285154, "loss": 0.45197612047195435, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 17.198926423440547, "step_time": 0.9643887939453126} +{"epoch": 0, "iter": 1700, "iter_tflops": 28.22679097951365, "iter_time": 0.5876138000488281, "loss": 0.41031625866889954, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 29.88844423236995, "step_time": 0.5549453086853028} +{"epoch": 0, "iter": 1701, "iter_tflops": 25.728156653301873, "iter_time": 0.6702270355224609, "loss": 0.4392172694206238, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 27.3351564924897, "step_time": 0.6308252220153808} +{"epoch": 0, "iter": 1702, "iter_tflops": 12.983781022123908, "iter_time": 1.3280958862304686, "loss": 0.4832289218902588, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 14.06136304923621, "step_time": 1.2263182525634768} +{"epoch": 0, "iter": 1703, "iter_tflops": 31.04358389628107, "iter_time": 0.5554676361083984, "loss": 0.2610551714897156, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 33.11346937284244, "step_time": 0.5207459831237793} +{"epoch": 0, "iter": 1704, "iter_tflops": 30.57025590340304, "iter_time": 0.5640680999755859, "loss": 0.3957461416721344, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 32.662606487732596, "step_time": 0.5279341735839843} +{"epoch": 0, "iter": 1705, "iter_tflops": 31.592122533810528, "iter_time": 0.6530455017089845, "loss": 1.0217183828353882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.753009466977524, "step_time": 0.6112371559143066} +{"epoch": 0, "iter": 1706, "iter_tflops": 13.529341416008027, "iter_time": 1.524914840698242, "loss": 0.7823452353477478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.539119082440497, "step_time": 1.2474118728637695} +{"epoch": 0, "iter": 1707, "iter_tflops": 18.640962292039802, "iter_time": 1.1067611846923828, "loss": 0.9764940738677979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.966839748368173, "step_time": 0.9391926078796387} +{"epoch": 0, "iter": 1708, "iter_tflops": 48.167227236491115, "iter_time": 0.4283222160339355, "loss": 1.1478086709976196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.045107653127424, "step_time": 0.3964079322814941} +{"epoch": 0, "iter": 1709, "iter_tflops": 27.872184941509104, "iter_time": 0.5730219268798828, "loss": 0.4164031147956848, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 29.865022722413293, "step_time": 0.5347852325439453} +{"epoch": 0, "iter": 1710, "iter_tflops": 27.771421596642323, "iter_time": 0.5751010284423829, "loss": 0.44296637177467346, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 29.652827288201074, "step_time": 0.5386121520996093} +{"epoch": 0, "iter": 1711, "iter_tflops": 27.31844426817604, "iter_time": 0.5846369934082032, "loss": 0.5141296982765198, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 29.133981237457917, "step_time": 0.5482042770385742} +{"epoch": 0, "iter": 1712, "iter_tflops": 29.995333024180585, "iter_time": 0.5324619369506837, "loss": 0.3654788136482239, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 31.879375185907328, "step_time": 0.5009939193725587} +{"epoch": 0, "iter": 1713, "iter_tflops": 26.208338115653685, "iter_time": 0.7871957931518555, "loss": 0.855381429195404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.571071671582835, "step_time": 0.7482876892089844} +{"epoch": 0, "iter": 1714, "iter_tflops": 17.012714940501052, "iter_time": 1.2126867218017578, "loss": 0.7459787130355835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.451678211139864, "step_time": 0.8437495918273926} +{"epoch": 0, "iter": 1715, "iter_tflops": 44.835169145822775, "iter_time": 0.4601542472839355, "loss": 0.740043580532074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08312316981186, "step_time": 0.4290714111328125} +{"epoch": 0, "iter": 1716, "iter_tflops": 49.42184158317152, "iter_time": 0.417448902130127, "loss": 0.9293901324272156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24729376023669, "step_time": 0.3874580669403076} +{"epoch": 0, "iter": 1717, "iter_tflops": 39.715278265078716, "iter_time": 0.519474983215332, "loss": 0.2975972592830658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.059885669202814, "step_time": 0.47912559890747064} +{"epoch": 0, "iter": 1718, "iter_tflops": 9.08934787501163, "iter_time": 2.269810089111328, "loss": 0.3306722044944763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.909044184200399, "step_time": 1.7323886947631835} +{"epoch": 0, "iter": 1719, "iter_tflops": 10.78793866479102, "iter_time": 1.9124222106933595, "loss": 0.5474745035171509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.548436860471872, "step_time": 1.5227655944824219} +{"epoch": 0, "iter": 1720, "iter_tflops": 39.25016849891728, "iter_time": 0.5256306991577149, "loss": 0.44816166162490845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.042374331467244, "step_time": 0.4793205261230469} +{"epoch": 0, "iter": 1721, "iter_tflops": 13.722374348455697, "iter_time": 1.1877888336181641, "loss": 0.53020840883255, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.577971806043598, "step_time": 1.1180761795043948} +{"epoch": 0, "iter": 1722, "iter_tflops": 11.519691533734928, "iter_time": 1.414906204223633, "loss": 0.3870677351951599, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 15.459301197728207, "step_time": 1.05433504486084} +{"epoch": 0, "iter": 1723, "iter_tflops": 23.431169829601505, "iter_time": 0.6956239547729491, "loss": 0.4601925015449524, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 25.292249069189428, "step_time": 0.6444378662109376} +{"epoch": 0, "iter": 1724, "iter_tflops": 25.095271071938754, "iter_time": 0.6494961929321289, "loss": 0.4086277484893799, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.96132511250556, "step_time": 0.604543098449707} +{"epoch": 0, "iter": 1725, "iter_tflops": 17.11159619601162, "iter_time": 1.2056790771484376, "loss": 0.9377257823944092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.46973931336828, "step_time": 1.117021369934082} +{"epoch": 0, "iter": 1726, "iter_tflops": 16.010589107628856, "iter_time": 1.2885905303955079, "loss": 0.8591076135635376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.961717619564837, "step_time": 0.9842272415161133} +{"epoch": 0, "iter": 1727, "iter_tflops": 38.19254997978831, "iter_time": 0.5401863327026367, "loss": 0.9146894812583923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81218640621319, "step_time": 0.4934229774475098} +{"epoch": 0, "iter": 1728, "iter_tflops": 39.57524567536068, "iter_time": 0.5213130874633789, "loss": 0.9870098829269409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09668841723656, "step_time": 0.47871644592285156} +{"epoch": 0, "iter": 1729, "iter_tflops": 31.567537421902745, "iter_time": 0.6535541000366212, "loss": 1.0687072277069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.64672378571138, "step_time": 0.59547025680542} +{"epoch": 0, "iter": 1730, "iter_tflops": 36.79709912942676, "iter_time": 0.5606717376708985, "loss": 1.0752936601638794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9686169961306, "step_time": 0.516182321548462} +{"epoch": 0, "iter": 1731, "iter_tflops": 38.298143529415434, "iter_time": 0.5386969604492188, "loss": 0.8551393151283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60430475528945, "step_time": 0.4958884334564209} +{"epoch": 0, "iter": 1732, "iter_tflops": 36.69653072252471, "iter_time": 0.5622082824707031, "loss": 0.9973692297935486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.80957240681915, "step_time": 0.5182445392608643} +{"epoch": 0, "iter": 1733, "iter_tflops": 19.48113941265301, "iter_time": 1.0590290985107422, "loss": 0.37993377447128296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.82987088680964, "step_time": 0.9904570999145508} +{"epoch": 0, "iter": 1734, "iter_tflops": 17.56376189546449, "iter_time": 1.1746397857666016, "loss": 0.3432433009147644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.607850561503504, "step_time": 1.0011278686523437} +{"epoch": 0, "iter": 1735, "iter_tflops": 39.47370924329217, "iter_time": 0.5226540374755859, "loss": 0.3704732358455658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.56956446645846, "step_time": 0.47352076530456544} +{"epoch": 0, "iter": 1736, "iter_tflops": 45.494039660005434, "iter_time": 0.45349003219604495, "loss": 0.3635123074054718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.66051050931854, "step_time": 0.41544263839721685} +{"epoch": 0, "iter": 1737, "iter_tflops": 19.533918359323398, "iter_time": 1.0561676940917968, "loss": 0.5402036309242249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.077894967542534, "step_time": 0.9788023681640626} +{"epoch": 0, "iter": 1738, "iter_tflops": 23.09657244141184, "iter_time": 0.8932534713745117, "loss": 0.7187244892120361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.386444736097413, "step_time": 0.7267938518524171} +{"epoch": 0, "iter": 1739, "iter_tflops": 49.28893605989042, "iter_time": 0.418574535369873, "loss": 0.4251556098461151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43736847790016, "step_time": 0.3860798931121826} +{"epoch": 0, "iter": 1740, "iter_tflops": 46.45773455506218, "iter_time": 0.4440830726623535, "loss": 0.5173285007476807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.31524635794828, "step_time": 0.4100366191864014} +{"epoch": 0, "iter": 1741, "iter_tflops": 19.741263311936862, "iter_time": 0.7241469039916992, "loss": 0.1356087476015091, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 21.054888392029017, "step_time": 0.6789670143127442} +{"epoch": 0, "iter": 1742, "iter_tflops": 10.573431911333817, "iter_time": 1.3520278778076171, "loss": 0.09119356423616409, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.983270326870658, "step_time": 1.022334144592285} +{"epoch": 0, "iter": 1743, "iter_tflops": 29.390008882511072, "iter_time": 0.4864093360900879, "loss": 0.11880441009998322, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 32.29171888275583, "step_time": 0.4427009525299072} +{"epoch": 0, "iter": 1744, "iter_tflops": 28.097603158153866, "iter_time": 0.5087827110290527, "loss": 0.13448196649551392, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 30.698849381569154, "step_time": 0.4656713523864746} +{"epoch": 0, "iter": 1745, "iter_tflops": 26.488224564819262, "iter_time": 0.7788779296875, "loss": 0.3019639253616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.43156546401294, "step_time": 0.7256404342651366} +{"epoch": 0, "iter": 1746, "iter_tflops": 9.122177079786256, "iter_time": 2.2616414184570313, "loss": 0.22167915105819702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.176935566873679, "step_time": 1.8458631515502928} +{"epoch": 0, "iter": 1747, "iter_tflops": 18.50504951241574, "iter_time": 1.1148899383544921, "loss": 0.34917137026786804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.38795957891181, "step_time": 0.9646125164031982} +{"epoch": 0, "iter": 1748, "iter_tflops": 49.30598178674986, "iter_time": 0.4184298286437988, "loss": 0.2110017091035843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53245065524844, "step_time": 0.38539415359497076} +{"epoch": 0, "iter": 1749, "iter_tflops": 26.39446093718736, "iter_time": 0.5973443756103516, "loss": 0.5372286438941956, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.322279866224694, "step_time": 0.5566848030090332} +{"epoch": 0, "iter": 1750, "iter_tflops": 28.503165090098445, "iter_time": 0.5531520004272461, "loss": 0.5416329503059387, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.434398616626385, "step_time": 0.518051399230957} +{"epoch": 0, "iter": 1751, "iter_tflops": 28.562389034632904, "iter_time": 0.5520050430297851, "loss": 0.5183621048927307, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.344558843862927, "step_time": 0.5195851707458496} +{"epoch": 0, "iter": 1752, "iter_tflops": 27.80491157633527, "iter_time": 0.567043083190918, "loss": 0.5076947212219238, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 29.498423629737978, "step_time": 0.5344889945983886} +{"epoch": 0, "iter": 1753, "iter_tflops": 25.257668333032548, "iter_time": 0.8168249435424805, "loss": 0.9073103666305542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.593815119175744, "step_time": 0.7757854003906249} +{"epoch": 0, "iter": 1754, "iter_tflops": 22.511267843880795, "iter_time": 0.9164785232543945, "loss": 1.1386884450912476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.804324056581535, "step_time": 0.6697466716766358} +{"epoch": 0, "iter": 1755, "iter_tflops": 49.43162205113592, "iter_time": 0.41736630630493166, "loss": 1.0865238904953003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.262274557158534, "step_time": 0.3873490886688232} +{"epoch": 0, "iter": 1756, "iter_tflops": 44.699572628589756, "iter_time": 0.46155012893676756, "loss": 0.883573591709137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98714567529354, "step_time": 0.42992958259582514} +{"epoch": 0, "iter": 1757, "iter_tflops": 28.130295573332933, "iter_time": 0.6100712051391601, "loss": 0.1295561045408249, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 29.8774115039467, "step_time": 0.5743965911865234} +{"epoch": 0, "iter": 1758, "iter_tflops": 10.713924263348929, "iter_time": 1.6017924804687498, "loss": 0.2342861443758011, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 13.144432923523883, "step_time": 1.305608497619629} +{"epoch": 0, "iter": 1759, "iter_tflops": 10.820050253163568, "iter_time": 1.5860816650390626, "loss": 0.15845760703086853, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 12.780164946993876, "step_time": 1.3428217391967772} +{"epoch": 0, "iter": 1760, "iter_tflops": 15.278441722762253, "iter_time": 1.123248275756836, "loss": 0.19916211068630219, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 18.168597519391383, "step_time": 0.9445684127807618} +{"epoch": 0, "iter": 1761, "iter_tflops": 16.92285588056797, "iter_time": 0.8857521133422851, "loss": 0.5221337080001831, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 19.74108547355505, "step_time": 0.759302490234375} +{"epoch": 0, "iter": 1762, "iter_tflops": 8.666948384828023, "iter_time": 1.7294963226318358, "loss": 0.48135894536972046, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 10.19239142764604, "step_time": 1.4706514625549316} +{"epoch": 0, "iter": 1763, "iter_tflops": 23.065602990779894, "iter_time": 0.6498618469238281, "loss": 0.4139283299446106, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.77067596332158, "step_time": 0.6051290397644042} +{"epoch": 0, "iter": 1764, "iter_tflops": 23.071204574586876, "iter_time": 0.6497040634155273, "loss": 0.43934109807014465, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.772441553387697, "step_time": 0.6050859107971192} +{"epoch": 0, "iter": 1765, "iter_tflops": 23.64293010518323, "iter_time": 0.8726115341186524, "loss": 0.7707863450050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.554944594542437, "step_time": 0.8073229598999023} +{"epoch": 0, "iter": 1766, "iter_tflops": 15.683235608237181, "iter_time": 1.315487060546875, "loss": 0.9954031109809875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.801949387599198, "step_time": 1.0418718433380127} +{"epoch": 0, "iter": 1767, "iter_tflops": 44.43282532365985, "iter_time": 0.4643209915161133, "loss": 0.7710902690887451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.804147727460645, "step_time": 0.43157538604736323} +{"epoch": 0, "iter": 1768, "iter_tflops": 45.302780867319015, "iter_time": 0.45540457153320313, "loss": 0.9422639012336731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56839323763779, "step_time": 0.42478435325622554} +{"epoch": 0, "iter": 1769, "iter_tflops": 37.03032836745596, "iter_time": 0.5571404418945313, "loss": 0.6811142563819885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03768071716183, "step_time": 0.5152919235229492} +{"epoch": 0, "iter": 1770, "iter_tflops": 16.971778333023032, "iter_time": 1.2156117706298828, "loss": 0.7672686576843262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.466057584944174, "step_time": 1.117244079589844} +{"epoch": 0, "iter": 1771, "iter_tflops": 39.9117077394886, "iter_time": 0.5169183349609375, "loss": 0.9514396786689758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11638955740029, "step_time": 0.46765144920349117} +{"epoch": 0, "iter": 1772, "iter_tflops": 42.77183721041232, "iter_time": 0.4823522872924804, "loss": 0.8339961171150208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.554756596207476, "step_time": 0.44315758514404296} +{"epoch": 0, "iter": 1773, "iter_tflops": 15.971653022365203, "iter_time": 1.2917318878173827, "loss": 0.25505438446998596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.099101999273557, "step_time": 1.20656005859375} +{"epoch": 0, "iter": 1774, "iter_tflops": 15.269443035262436, "iter_time": 1.351135955810547, "loss": 0.2255537509918213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.446378967984746, "step_time": 1.0609221153259278} +{"epoch": 0, "iter": 1775, "iter_tflops": 48.770429903543935, "iter_time": 0.4230246391296386, "loss": 0.18306812644004822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.030120204748094, "step_time": 0.3890448188781738} +{"epoch": 0, "iter": 1776, "iter_tflops": 49.36175813346683, "iter_time": 0.41795702362060544, "loss": 0.27578970789909363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.74262769912734, "step_time": 0.38388695144653323} +{"epoch": 0, "iter": 1777, "iter_tflops": 30.009669877286466, "iter_time": 0.6874815216064454, "loss": 1.032464623451233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.835747924219376, "step_time": 0.6480480232238769} +{"epoch": 0, "iter": 1778, "iter_tflops": 25.43702183744207, "iter_time": 0.8110656051635743, "loss": 0.8945068717002869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.467772581027873, "step_time": 0.6556261157989501} +{"epoch": 0, "iter": 1779, "iter_tflops": 49.53700708485273, "iter_time": 0.416478401184082, "loss": 0.8121706247329712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43236779860672, "step_time": 0.38611602592468264} +{"epoch": 0, "iter": 1780, "iter_tflops": 48.93651022485197, "iter_time": 0.421588981628418, "loss": 0.8365505933761597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37360604490738, "step_time": 0.3939215774536132} +{"epoch": 0, "iter": 1781, "iter_tflops": 42.53340835113489, "iter_time": 0.48505620193481447, "loss": 0.6982762813568115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.285770569535075, "step_time": 0.4457329597473144} +{"epoch": 0, "iter": 1782, "iter_tflops": 43.99337932901369, "iter_time": 0.46895905303955077, "loss": 0.7118175625801086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25494936291643, "step_time": 0.4365911674499512} +{"epoch": 0, "iter": 1783, "iter_tflops": 46.91897597955499, "iter_time": 0.4397174720764161, "loss": 0.8189202547073364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.4512738476438, "step_time": 0.4089310722351074} +{"epoch": 0, "iter": 1784, "iter_tflops": 52.631049433490276, "iter_time": 0.39199472045898437, "loss": 0.7336046099662781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.72949255833021, "step_time": 0.36367491722106937} +{"epoch": 0, "iter": 1785, "iter_tflops": 31.16277327983111, "iter_time": 0.6620429229736328, "loss": 1.083484411239624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.05487969526357, "step_time": 0.6241466827392578} +{"epoch": 0, "iter": 1786, "iter_tflops": 12.034390706257279, "iter_time": 1.7143446655273438, "loss": 0.8567160964012146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.812257998253163, "step_time": 1.392839195251465} +{"epoch": 0, "iter": 1787, "iter_tflops": 11.567398822198182, "iter_time": 1.7835551300048829, "loss": 0.9113948941230774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.856051651205407, "step_time": 1.3887332916259765} +{"epoch": 0, "iter": 1788, "iter_tflops": 23.603653398609623, "iter_time": 0.8740635681152344, "loss": 0.8609201908111572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.43080094263669, "step_time": 0.7805701217651366} +{"epoch": 0, "iter": 1789, "iter_tflops": 11.353709663609026, "iter_time": 1.3454252166748046, "loss": 0.46979811787605286, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.078373918622116, "step_time": 1.264703956604004} +{"epoch": 0, "iter": 1790, "iter_tflops": 15.220725548755835, "iter_time": 1.0036030960083007, "loss": 0.33005255460739136, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 17.786484051797316, "step_time": 0.8588300666809081} +{"epoch": 0, "iter": 1791, "iter_tflops": 23.433185701132494, "iter_time": 0.6518775329589843, "loss": 0.4605956971645355, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.258218050859732, "step_time": 0.6047761268615722} +{"epoch": 0, "iter": 1792, "iter_tflops": 24.432693831661183, "iter_time": 0.6252101135253906, "loss": 0.4654397666454315, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.186990797598344, "step_time": 0.5833265609741211} +{"epoch": 0, "iter": 1793, "iter_tflops": 15.91966260821274, "iter_time": 1.295950424194336, "loss": 0.03254460170865059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10403400498356, "step_time": 1.2062121429443358} +{"epoch": 0, "iter": 1794, "iter_tflops": 21.136469144938957, "iter_time": 0.976089874267578, "loss": 0.008479470387101173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.237808353314975, "step_time": 0.786311616897583} +{"epoch": 0, "iter": 1795, "iter_tflops": 62.134061871315176, "iter_time": 0.3320416030883789, "loss": 0.0073030926287174225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.48279103474331, "step_time": 0.3012595310211182} +{"epoch": 0, "iter": 1796, "iter_tflops": 64.08709343341825, "iter_time": 0.3219227523803711, "loss": 0.016346244141459465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.80901316904591, "step_time": 0.29553624343872065} +{"epoch": 0, "iter": 1797, "iter_tflops": 34.794861951786565, "iter_time": 0.59293505859375, "loss": 0.3601604104042053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.15477857689123, "step_time": 0.5552742958068848} +{"epoch": 0, "iter": 1798, "iter_tflops": 17.041071326937924, "iter_time": 1.2106688079833985, "loss": 0.536793053150177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.249790990176027, "step_time": 1.0188299484252932} +{"epoch": 0, "iter": 1799, "iter_tflops": 38.73861365849085, "iter_time": 0.5325718078613281, "loss": 0.5527243614196777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.300300163345135, "step_time": 0.4877292461395264} +{"epoch": 0, "iter": 1800, "iter_tflops": 41.326140509123086, "iter_time": 0.49922623443603514, "loss": 0.5421862006187439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85415562301108, "step_time": 0.45995946693420403} +{"epoch": 0, "iter": 1801, "iter_tflops": 18.048913292659684, "iter_time": 1.1430656890869142, "loss": 0.9405348300933838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.398843236534283, "step_time": 1.063521842956543} +{"epoch": 0, "iter": 1802, "iter_tflops": 21.45557614331988, "iter_time": 0.9615725708007812, "loss": 0.7928588390350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.726396346431326, "step_time": 0.7719369735717775} +{"epoch": 0, "iter": 1803, "iter_tflops": 41.78538000613495, "iter_time": 0.4937395210266113, "loss": 0.9715872406959534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44863200795186, "step_time": 0.4539431133270264} +{"epoch": 0, "iter": 1804, "iter_tflops": 35.859308802279266, "iter_time": 0.5753343887329102, "loss": 0.859061062335968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88375501510749, "step_time": 0.5305838775634766} +{"epoch": 0, "iter": 1805, "iter_tflops": 29.369873951025063, "iter_time": 0.7024576797485351, "loss": 0.87128084897995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.35362234152606, "step_time": 0.637674919128418} +{"epoch": 0, "iter": 1806, "iter_tflops": 43.42948261395863, "iter_time": 0.4750481071472168, "loss": 0.9609243273735046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49669978232579, "step_time": 0.43436898994445794} +{"epoch": 0, "iter": 1807, "iter_tflops": 44.44473516829839, "iter_time": 0.4641965675354004, "loss": 0.868710994720459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39173344400972, "step_time": 0.42633507919311525} +{"epoch": 0, "iter": 1808, "iter_tflops": 38.33114688370039, "iter_time": 0.538233139038086, "loss": 0.6022709608078003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.83291028886989, "step_time": 0.49317853736877443} +{"epoch": 0, "iter": 1809, "iter_tflops": 37.85432506705723, "iter_time": 0.5450128479003906, "loss": 0.21109047532081604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.27310432519415, "step_time": 0.4880430202484131} +{"epoch": 0, "iter": 1810, "iter_tflops": 42.65871343777488, "iter_time": 0.48363140487670897, "loss": 0.12681438028812408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08923153127921, "step_time": 0.4381276321411133} +{"epoch": 0, "iter": 1811, "iter_tflops": 43.56469193489506, "iter_time": 0.47357372665405273, "loss": 0.19704167544841766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84886013480812, "step_time": 0.4311721000671387} +{"epoch": 0, "iter": 1812, "iter_tflops": 39.5819409935751, "iter_time": 0.5212249069213868, "loss": 0.21695710718631744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.658592655125446, "step_time": 0.47255516624450683} +{"epoch": 0, "iter": 1813, "iter_tflops": 21.012620248515255, "iter_time": 0.9818429718017578, "loss": 0.26738423109054565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.574763002578802, "step_time": 0.9139007797241211} +{"epoch": 0, "iter": 1814, "iter_tflops": 19.272826532700222, "iter_time": 1.0704757537841798, "loss": 0.223160058259964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.473138210968298, "step_time": 0.8789235305786133} +{"epoch": 0, "iter": 1815, "iter_tflops": 37.338142830251755, "iter_time": 0.5525473937988281, "loss": 0.20443911850452423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.72305303235546, "step_time": 0.5066195182800293} +{"epoch": 0, "iter": 1816, "iter_tflops": 39.30782757618066, "iter_time": 0.5248596725463867, "loss": 0.24058598279953003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.15527040233852, "step_time": 0.4780666027069092} +{"epoch": 0, "iter": 1817, "iter_tflops": 22.02173341444518, "iter_time": 0.9368514785766602, "loss": 0.009972562082111835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.69088659566598, "step_time": 0.8708451423645018} +{"epoch": 0, "iter": 1818, "iter_tflops": 18.607880339314576, "iter_time": 1.1087288360595702, "loss": 0.02455485239624977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.509475372703402, "step_time": 0.9165515041351318} +{"epoch": 0, "iter": 1819, "iter_tflops": 55.00569071186786, "iter_time": 0.3750719833374024, "loss": 0.006203442811965942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.373163592928016, "step_time": 0.3417262287139893} +{"epoch": 0, "iter": 1820, "iter_tflops": 61.1119092960301, "iter_time": 0.3375953025817871, "loss": 0.00628205481916666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.00266768602523, "step_time": 0.30791450881958005} +{"epoch": 0, "iter": 1821, "iter_tflops": 34.04317310537658, "iter_time": 0.6060273361206054, "loss": 1.1060423851013184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.37157001435255, "step_time": 0.5672313156127929} +{"epoch": 0, "iter": 1822, "iter_tflops": 10.949621853521265, "iter_time": 1.8841831970214844, "loss": 1.1453216075897217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.67100673736837, "step_time": 1.6282126541137696} +{"epoch": 0, "iter": 1823, "iter_tflops": 36.046309278640585, "iter_time": 0.5723496780395507, "loss": 0.7884194254875183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.40530248269032, "step_time": 0.5235613536834717} +{"epoch": 0, "iter": 1824, "iter_tflops": 36.85154324240884, "iter_time": 0.5598434066772461, "loss": 1.023094654083252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.95184221847769, "step_time": 0.5163990535736084} +{"epoch": 0, "iter": 1825, "iter_tflops": 23.72571011131123, "iter_time": 0.8695669555664063, "loss": 0.8058513402938843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.09549031744614, "step_time": 0.7905999565124512} +{"epoch": 0, "iter": 1826, "iter_tflops": 37.99387115805796, "iter_time": 0.5430110931396483, "loss": 0.5395988821983337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01361275461531, "step_time": 0.4910573539733887} +{"epoch": 0, "iter": 1827, "iter_tflops": 40.45971945466844, "iter_time": 0.5099168701171874, "loss": 0.6545150876045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34776920583546, "step_time": 0.46521152877807626} +{"epoch": 0, "iter": 1828, "iter_tflops": 43.46948412814491, "iter_time": 0.47461095809936527, "loss": 0.6267614960670471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.38701969165768, "step_time": 0.43537436294555665} +{"epoch": 0, "iter": 1829, "iter_tflops": 30.86509567711301, "iter_time": 0.6684279785156251, "loss": 0.13536207377910614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.62230349546741, "step_time": 0.6136133270263671} +{"epoch": 0, "iter": 1830, "iter_tflops": 9.251996471752895, "iter_time": 2.229907196044922, "loss": 0.1791878491640091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.00982243502068, "step_time": 1.8738806762695313} +{"epoch": 0, "iter": 1831, "iter_tflops": 14.404564464886999, "iter_time": 1.4322608337402345, "loss": 0.12153927981853485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98783587670866, "step_time": 1.214462728500366} +{"epoch": 0, "iter": 1832, "iter_tflops": 50.739220655685166, "iter_time": 0.40661037445068365, "loss": 0.07738716155290604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.47058518795406, "step_time": 0.37192853546142574} +{"epoch": 0, "iter": 1833, "iter_tflops": 21.08593923937614, "iter_time": 0.7613283004760742, "loss": 0.3513251543045044, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 22.28377068934936, "step_time": 0.7204042129516602} +{"epoch": 0, "iter": 1834, "iter_tflops": 16.89681296195199, "iter_time": 0.9500798950195313, "loss": 0.2873401641845703, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 22.039955451970222, "step_time": 0.7283736267089843} +{"epoch": 0, "iter": 1835, "iter_tflops": 25.439635604876745, "iter_time": 0.6310358581542969, "loss": 0.420431911945343, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.311751636661036, "step_time": 0.5877807655334472} +{"epoch": 0, "iter": 1836, "iter_tflops": 25.73181195976582, "iter_time": 0.6238706512451173, "loss": 0.418194442987442, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.60998669655082, "step_time": 0.581431728363037} +{"epoch": 0, "iter": 1837, "iter_tflops": 19.532639172914056, "iter_time": 1.056236862182617, "loss": 0.6488971710205078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.614223352656932, "step_time": 1.000818374633789} +{"epoch": 0, "iter": 1838, "iter_tflops": 14.837263636269736, "iter_time": 1.3904918060302736, "loss": 0.7044339179992676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.761813334091677, "step_time": 1.2308389968872069} +{"epoch": 0, "iter": 1839, "iter_tflops": 47.10952037747137, "iter_time": 0.43793894195556643, "loss": 0.7570720911026001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.03985889518168, "step_time": 0.40421533203124993} +{"epoch": 0, "iter": 1840, "iter_tflops": 47.843604199671695, "iter_time": 0.43121946716308596, "loss": 0.655020534992218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.73610212175346, "step_time": 0.3987755680084229} +{"epoch": 0, "iter": 1841, "iter_tflops": 28.098696079442522, "iter_time": 0.7342366867065431, "loss": 0.7559863328933716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.725373764873776, "step_time": 0.6940566558837891} +{"epoch": 0, "iter": 1842, "iter_tflops": 15.580596435642962, "iter_time": 1.3241529998779298, "loss": 0.7931536436080933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.524866300387217, "step_time": 1.1136972961425782} +{"epoch": 0, "iter": 1843, "iter_tflops": 49.145422140156356, "iter_time": 0.41979685211181644, "loss": 0.7933429479598999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.520582004580184, "step_time": 0.38547961807250974} +{"epoch": 0, "iter": 1844, "iter_tflops": 43.9494991145987, "iter_time": 0.4694272727966308, "loss": 0.6026133894920349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.166644519702004, "step_time": 0.43740854835510257} +{"epoch": 0, "iter": 1845, "iter_tflops": 23.18875724577494, "iter_time": 0.8897024230957031, "loss": 0.19945403933525085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.210871194277935, "step_time": 0.8521417236328125} +{"epoch": 0, "iter": 1846, "iter_tflops": 14.717016535554826, "iter_time": 1.4018529815673828, "loss": 0.15349344909191132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.009540665153768, "step_time": 1.0310628242492674} +{"epoch": 0, "iter": 1847, "iter_tflops": 42.03958219680594, "iter_time": 0.49075400924682616, "loss": 0.18464376032352448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17469027237512, "step_time": 0.4468052387237549} +{"epoch": 0, "iter": 1848, "iter_tflops": 45.81647238086814, "iter_time": 0.4502986030578613, "loss": 0.18411047756671906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.317050707439115, "step_time": 0.41002191543579103} +{"epoch": 0, "iter": 1849, "iter_tflops": 19.52698713570276, "iter_time": 1.0565425872802734, "loss": 0.04683281108736992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.89243770711655, "step_time": 0.987490966796875} +{"epoch": 0, "iter": 1850, "iter_tflops": 20.639627638405507, "iter_time": 0.9995865173339843, "loss": 0.030377332121133804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.79228439063731, "step_time": 0.716549379348755} +{"epoch": 0, "iter": 1851, "iter_tflops": 54.37143043425073, "iter_time": 0.3794473190307618, "loss": 0.05740624666213989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.51739984623437, "step_time": 0.3466396980285644} +{"epoch": 0, "iter": 1852, "iter_tflops": 54.71791803582286, "iter_time": 0.37704456329345704, "loss": 0.0320143885910511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.3958458989124, "step_time": 0.34734909820556636} +{"epoch": 0, "iter": 1853, "iter_tflops": 20.71135510864051, "iter_time": 0.6096997604370118, "loss": 0.018661778420209885, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 22.03000051104304, "step_time": 0.5732050819396973} +{"epoch": 0, "iter": 1854, "iter_tflops": 8.175527658536618, "iter_time": 1.5445740966796877, "loss": 0.007379280403256416, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 9.002500698248454, "step_time": 1.4026889495849608} +{"epoch": 0, "iter": 1855, "iter_tflops": 6.645309520437178, "iter_time": 1.9002438049316408, "loss": 0.0009598447359167039, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 7.706944098061892, "step_time": 1.6384844741821287} +{"epoch": 0, "iter": 1856, "iter_tflops": 13.784784705199526, "iter_time": 0.9160613327026367, "loss": 0.007134724874049425, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 17.32484956433501, "step_time": 0.7288783779144287} +{"epoch": 0, "iter": 1857, "iter_tflops": 20.84438298366085, "iter_time": 0.7819508514404298, "loss": 0.3811025619506836, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 22.036595768127825, "step_time": 0.7396461410522462} +{"epoch": 0, "iter": 1858, "iter_tflops": 6.480813769596718, "iter_time": 2.5150056152343754, "loss": 0.4012760818004608, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 7.646313141937996, "step_time": 2.1316525650024416} +{"epoch": 0, "iter": 1859, "iter_tflops": 9.042305051294264, "iter_time": 1.8025584106445314, "loss": 0.5387294292449951, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 10.897916735696729, "step_time": 1.4956329193115234} +{"epoch": 0, "iter": 1860, "iter_tflops": 25.72652753781677, "iter_time": 0.6335593872070312, "loss": 0.347726047039032, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.62247839261411, "step_time": 0.5322653121948242} +{"epoch": 0, "iter": 1861, "iter_tflops": 25.693300971612686, "iter_time": 0.5833993606567384, "loss": 0.48190930485725403, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.546644874026427, "step_time": 0.5441481323242188} +{"epoch": 0, "iter": 1862, "iter_tflops": 22.991124692527396, "iter_time": 0.6519670333862304, "loss": 0.3133490979671478, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.78598214951844, "step_time": 0.6047553520202636} +{"epoch": 0, "iter": 1863, "iter_tflops": 23.222390613415243, "iter_time": 0.6454742584228517, "loss": 0.5658307671546936, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.7783963800665, "step_time": 0.6049404945373534} +{"epoch": 0, "iter": 1864, "iter_tflops": 21.910913066610156, "iter_time": 0.6841091156005858, "loss": 0.3482959568500519, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.43687109938994, "step_time": 0.6395672569274902} +{"epoch": 0, "iter": 1865, "iter_tflops": 37.51909040860616, "iter_time": 0.5498825607299805, "loss": 0.12140733003616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.888685601106964, "step_time": 0.49252186393737796} +{"epoch": 0, "iter": 1866, "iter_tflops": 43.493042081537766, "iter_time": 0.47435388565063474, "loss": 0.09372678399085999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.058006885568865, "step_time": 0.429295654296875} +{"epoch": 0, "iter": 1867, "iter_tflops": 42.5946657527244, "iter_time": 0.48435861968994137, "loss": 0.1293744593858719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.108306386161736, "step_time": 0.43795022773742676} +{"epoch": 0, "iter": 1868, "iter_tflops": 44.37631831591507, "iter_time": 0.4649122390747071, "loss": 0.10794279724359512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.363056480292435, "step_time": 0.4265878753662109} +{"epoch": 0, "iter": 1869, "iter_tflops": 18.286572981532007, "iter_time": 1.128209945678711, "loss": 0.7305559515953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.581183984446696, "step_time": 1.0536182861328127} +{"epoch": 0, "iter": 1870, "iter_tflops": 20.98319794968937, "iter_time": 0.9832196960449218, "loss": 0.4347871243953705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.026005441494444, "step_time": 0.7361410655975341} +{"epoch": 0, "iter": 1871, "iter_tflops": 47.72288355804885, "iter_time": 0.43231028747558586, "loss": 0.5894566178321838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.84243362479675, "step_time": 0.3979576587677002} +{"epoch": 0, "iter": 1872, "iter_tflops": 52.458881044044205, "iter_time": 0.3932812347412109, "loss": 0.5359055399894714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.57651226493251, "step_time": 0.36465827751159663} +{"epoch": 0, "iter": 1873, "iter_tflops": 43.94985304905334, "iter_time": 0.46942349243164067, "loss": 0.6808715462684631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.593298728105815, "step_time": 0.433487361907959} +{"epoch": 0, "iter": 1874, "iter_tflops": 32.015522645901086, "iter_time": 0.6444090805053712, "loss": 0.6683835983276367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.476083648447, "step_time": 0.5097107143402099} +{"epoch": 0, "iter": 1875, "iter_tflops": 36.474516573580445, "iter_time": 0.5656303482055663, "loss": 0.6189478039741516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62721215151657, "step_time": 0.5206294460296632} +{"epoch": 0, "iter": 1876, "iter_tflops": 37.09806914456977, "iter_time": 0.5561231079101562, "loss": 0.6462302207946777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.280012606451194, "step_time": 0.5121918334960938} +{"epoch": 0, "iter": 1877, "iter_tflops": 24.945692001840776, "iter_time": 0.8270403366088868, "loss": 0.9181788563728333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.64248906011766, "step_time": 0.7743680953979492} +{"epoch": 0, "iter": 1878, "iter_tflops": 40.83233226848733, "iter_time": 0.5052636566162109, "loss": 0.6828112006187439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88566926179711, "step_time": 0.47011003494262693} +{"epoch": 0, "iter": 1879, "iter_tflops": 44.34222608265252, "iter_time": 0.46526968383789064, "loss": 0.9659107327461243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.71446398125305, "step_time": 0.4323865718841553} +{"epoch": 0, "iter": 1880, "iter_tflops": 44.70934055437628, "iter_time": 0.461449291229248, "loss": 1.1312220096588135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.978838609449674, "step_time": 0.4300040206909179} +{"epoch": 0, "iter": 1881, "iter_tflops": 26.415455531746744, "iter_time": 0.7810235748291016, "loss": 0.9049833416938782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.798650610573716, "step_time": 0.7421616897583008} +{"epoch": 0, "iter": 1882, "iter_tflops": 17.997765030799965, "iter_time": 1.146314193725586, "loss": 1.2175840139389038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12073021390432, "step_time": 0.9768172454833985} +{"epoch": 0, "iter": 1883, "iter_tflops": 38.914034739927835, "iter_time": 0.5301710205078125, "loss": 1.0875184535980225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42962884438532, "step_time": 0.4862426109313965} +{"epoch": 0, "iter": 1884, "iter_tflops": 36.97397035602115, "iter_time": 0.5579896697998047, "loss": 0.9506534337997437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.01316996096393, "step_time": 0.5156075744628906} +{"epoch": 0, "iter": 1885, "iter_tflops": 18.123082207961527, "iter_time": 1.138387680053711, "loss": 0.09611135721206665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.51927460412192, "step_time": 1.0569600524902345} +{"epoch": 0, "iter": 1886, "iter_tflops": 17.4451642640111, "iter_time": 1.1826253509521485, "loss": 0.08992478996515274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.248034525790985, "step_time": 0.9709647960662842} +{"epoch": 0, "iter": 1887, "iter_tflops": 35.285934002572304, "iter_time": 0.5846832199096679, "loss": 0.0772254690527916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80321128041875, "step_time": 0.531685209274292} +{"epoch": 0, "iter": 1888, "iter_tflops": 39.559588473954456, "iter_time": 0.521519416809082, "loss": 0.09312673658132553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46188611341423, "step_time": 0.47469392967224117} +{"epoch": 0, "iter": 1889, "iter_tflops": 12.706768022388893, "iter_time": 1.6236302947998047, "loss": 1.0507994890213013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.325456902618185, "step_time": 1.5482466125488283} +{"epoch": 0, "iter": 1890, "iter_tflops": 17.487391926542337, "iter_time": 1.1797696075439452, "loss": 1.0000197887420654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.747556740863413, "step_time": 0.8336618328094483} +{"epoch": 0, "iter": 1891, "iter_tflops": 46.20247254769002, "iter_time": 0.44653656768798833, "loss": 1.0396398305892944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.864454203255505, "step_time": 0.41374349403381344} +{"epoch": 0, "iter": 1892, "iter_tflops": 42.84215830629404, "iter_time": 0.48156055450439456, "loss": 1.0005757808685303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91079396728512, "step_time": 0.4493734855651855} +{"epoch": 0, "iter": 1893, "iter_tflops": 42.601594248441636, "iter_time": 0.4842798461914062, "loss": 0.06113705784082413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.222901434611174, "step_time": 0.44633921432495116} +{"epoch": 0, "iter": 1894, "iter_tflops": 48.82997785637358, "iter_time": 0.4225087623596191, "loss": 0.05947088822722435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.70454411465572, "step_time": 0.3841591777801514} +{"epoch": 0, "iter": 1895, "iter_tflops": 56.15994144925341, "iter_time": 0.3673631591796875, "loss": 0.032547689974308014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.461358130541925, "step_time": 0.3356758480072021} +{"epoch": 0, "iter": 1896, "iter_tflops": 58.90532572470174, "iter_time": 0.35024156570434567, "loss": 0.045746635645627975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.36195152958562, "step_time": 0.3205479793548584} +{"epoch": 0, "iter": 1897, "iter_tflops": 49.684352254827466, "iter_time": 0.41524328231811525, "loss": 0.2075280398130417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51612694985529, "step_time": 0.37844019126892087} +{"epoch": 0, "iter": 1898, "iter_tflops": 46.21922657520666, "iter_time": 0.44637470245361327, "loss": 0.21271295845508575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.43163987091947, "step_time": 0.40909027671813963} +{"epoch": 0, "iter": 1899, "iter_tflops": 52.66962428781068, "iter_time": 0.39170762634277345, "loss": 0.1433195173740387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.32868476489496, "step_time": 0.35987383270263673} +{"epoch": 0, "iter": 1900, "iter_tflops": 55.33160642068414, "iter_time": 0.3728627243041992, "loss": 0.1834886372089386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.24444282221334, "step_time": 0.34245637512207033} +{"epoch": 0, "iter": 1901, "iter_tflops": 25.90617565479251, "iter_time": 0.7803660049438478, "loss": 0.2884100377559662, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 27.33716101212143, "step_time": 0.7395171279907227} +{"epoch": 0, "iter": 1902, "iter_tflops": 18.189307075831085, "iter_time": 1.1114386444091795, "loss": 0.21833905577659607, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 23.42236760142694, "step_time": 0.863119354248047} +{"epoch": 0, "iter": 1903, "iter_tflops": 40.4575288960173, "iter_time": 0.4996918830871582, "loss": 0.19222304224967957, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 44.38336800562854, "step_time": 0.45549267005920413} +{"epoch": 0, "iter": 1904, "iter_tflops": 41.759206854638485, "iter_time": 0.484115966796875, "loss": 0.24419668316841125, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 45.88997773820265, "step_time": 0.4405384311676025} +{"epoch": 0, "iter": 1905, "iter_tflops": 26.287969424390496, "iter_time": 0.7848112258911133, "loss": 0.23466725647449493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.105432157856367, "step_time": 0.7340607109069824} +{"epoch": 0, "iter": 1906, "iter_tflops": 22.566529644755615, "iter_time": 0.9142342147827149, "loss": 0.21413026750087738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.49064366333901, "step_time": 0.7504769172668457} +{"epoch": 0, "iter": 1907, "iter_tflops": 38.14951039530922, "iter_time": 0.5407957611083984, "loss": 0.281737357378006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62519081003887, "step_time": 0.4956396141052246} +{"epoch": 0, "iter": 1908, "iter_tflops": 36.17749226268648, "iter_time": 0.5702742843627929, "loss": 0.21955697238445282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.606304775919604, "step_time": 0.520904275894165} +{"epoch": 0, "iter": 1909, "iter_tflops": 17.348611773806137, "iter_time": 0.8687253036499024, "loss": 0.4870016276836395, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.578884265017876, "step_time": 0.8111993064880372} +{"epoch": 0, "iter": 1910, "iter_tflops": 11.622438169782832, "iter_time": 1.296731185913086, "loss": 0.4413301944732666, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 13.18898177470144, "step_time": 1.142709747314453} +{"epoch": 0, "iter": 1911, "iter_tflops": 23.403012073058665, "iter_time": 0.6439845428466796, "loss": 0.47256699204444885, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 25.218409633034888, "step_time": 0.5976260299682618} +{"epoch": 0, "iter": 1912, "iter_tflops": 22.066180966805362, "iter_time": 0.6829989318847657, "loss": 0.4577541649341583, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 23.752453232791115, "step_time": 0.6345103759765625} +{"epoch": 0, "iter": 1913, "iter_tflops": 30.64463818819943, "iter_time": 0.6732366485595703, "loss": 0.43038231134414673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.385811315438104, "step_time": 0.6179599266052246} +{"epoch": 0, "iter": 1914, "iter_tflops": 47.660527368594636, "iter_time": 0.4328758964538574, "loss": 0.3858686089515686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77275107406781, "step_time": 0.39849328231811526} +{"epoch": 0, "iter": 1915, "iter_tflops": 52.98679329874741, "iter_time": 0.38936293792724613, "loss": 0.5092308521270752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.05397790838921, "step_time": 0.3553777751922607} +{"epoch": 0, "iter": 1916, "iter_tflops": 48.4264782944514, "iter_time": 0.4260291938781738, "loss": 0.44231361150741577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.46670781966742, "step_time": 0.3932225666046143} +{"epoch": 0, "iter": 1917, "iter_tflops": 31.706131803749148, "iter_time": 0.6506972732543945, "loss": 0.07570940256118774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.64133927346293, "step_time": 0.6132661170959472} +{"epoch": 0, "iter": 1918, "iter_tflops": 12.979753696754816, "iter_time": 1.589482666015625, "loss": 0.14028367400169373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.39565919106625, "step_time": 1.3400591201782228} +{"epoch": 0, "iter": 1919, "iter_tflops": 47.013011140205776, "iter_time": 0.4388379516601562, "loss": 0.08750104159116745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.570336434376955, "step_time": 0.39244743156433104} +{"epoch": 0, "iter": 1920, "iter_tflops": 51.66602132947014, "iter_time": 0.3993164749145508, "loss": 0.12775897979736328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.17901536527117, "step_time": 0.367238431930542} +{"epoch": 0, "iter": 1921, "iter_tflops": 35.78644722853071, "iter_time": 0.57650577545166, "loss": 0.14756424725055695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.37819311794763, "step_time": 0.537573341369629} +{"epoch": 0, "iter": 1922, "iter_tflops": 17.329079861412378, "iter_time": 1.1905475463867188, "loss": 0.16723787784576416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.521475479303014, "step_time": 1.0053416252136231} +{"epoch": 0, "iter": 1923, "iter_tflops": 50.432115079661145, "iter_time": 0.40908642196655276, "loss": 0.15266558527946472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.057158861353365, "step_time": 0.37472136116027827} +{"epoch": 0, "iter": 1924, "iter_tflops": 48.349531014481705, "iter_time": 0.42670721054077154, "loss": 0.18883563578128815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6344354115417, "step_time": 0.39196950340270986} +{"epoch": 0, "iter": 1925, "iter_tflops": 41.37753662675007, "iter_time": 0.4986061325073242, "loss": 0.23725366592407227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03164475676979, "step_time": 0.45814656829833983} +{"epoch": 0, "iter": 1926, "iter_tflops": 40.82134902764121, "iter_time": 0.505399600982666, "loss": 0.268608421087265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.710525714731446, "step_time": 0.4614370594024658} +{"epoch": 0, "iter": 1927, "iter_tflops": 38.26235227780635, "iter_time": 0.5392008666992187, "loss": 0.2721881568431854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7598817381615, "step_time": 0.49404099464416495} +{"epoch": 0, "iter": 1928, "iter_tflops": 43.73369853618357, "iter_time": 0.47174362564086914, "loss": 0.2393333613872528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56304656558554, "step_time": 0.4337630786895752} +{"epoch": 0, "iter": 1929, "iter_tflops": 19.760592031775005, "iter_time": 1.0440523986816406, "loss": 0.5951581597328186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.910618765438187, "step_time": 0.9866323776245117} +{"epoch": 0, "iter": 1930, "iter_tflops": 9.200464396678736, "iter_time": 2.24239697265625, "loss": 0.7275391221046448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.927358751947724, "step_time": 1.8880219802856444} +{"epoch": 0, "iter": 1931, "iter_tflops": 14.16585966112137, "iter_time": 1.4563954467773437, "loss": 0.5895384550094604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.35513517771715, "step_time": 1.1887601737976075} +{"epoch": 0, "iter": 1932, "iter_tflops": 43.56721905060768, "iter_time": 0.4735462570190429, "loss": 0.646586000919342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.349431435672614, "step_time": 0.4357199840545654} +{"epoch": 0, "iter": 1933, "iter_tflops": 18.207431521465328, "iter_time": 0.845712776184082, "loss": 0.3997487425804138, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.132650702510237, "step_time": 0.8048156890869141} +{"epoch": 0, "iter": 1934, "iter_tflops": 9.130429589785548, "iter_time": 1.686476776123047, "loss": 0.6405526995658875, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.947818209008378, "step_time": 1.1892549934387207} +{"epoch": 0, "iter": 1935, "iter_tflops": 26.378517717480804, "iter_time": 0.583742332458496, "loss": 0.3694479763507843, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.18381119866905, "step_time": 0.5463511428833008} +{"epoch": 0, "iter": 1936, "iter_tflops": 22.902630655224986, "iter_time": 0.6723357543945312, "loss": 0.43116822838783264, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.676156252976316, "step_time": 0.6240136146545411} +{"epoch": 0, "iter": 1937, "iter_tflops": 26.890809043675706, "iter_time": 0.76721728515625, "loss": 0.7660619616508484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66265842164284, "step_time": 0.719789951324463} +{"epoch": 0, "iter": 1938, "iter_tflops": 13.564464507579423, "iter_time": 1.5209663085937497, "loss": 0.7255485653877258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.833301748481476, "step_time": 1.3030190315246581} +{"epoch": 0, "iter": 1939, "iter_tflops": 40.11216876693507, "iter_time": 0.5143350296020508, "loss": 1.2718206644058228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24738630649629, "step_time": 0.4770483322143555} +{"epoch": 0, "iter": 1940, "iter_tflops": 46.02852052814688, "iter_time": 0.44822412872314454, "loss": 0.9659079909324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.577311779014984, "step_time": 0.4161398181915283} +{"epoch": 0, "iter": 1941, "iter_tflops": 25.47865845739447, "iter_time": 0.8097401809692382, "loss": 0.12830354273319244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.819110563086507, "step_time": 0.7692683715820312} +{"epoch": 0, "iter": 1942, "iter_tflops": 13.740639413831056, "iter_time": 1.501465316772461, "loss": 0.13970717787742615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.400515367847873, "step_time": 1.2579539756774902} +{"epoch": 0, "iter": 1943, "iter_tflops": 35.649324471255504, "iter_time": 0.5787232666015626, "loss": 0.10381457954645157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.89009450064271, "step_time": 0.530497386932373} +{"epoch": 0, "iter": 1944, "iter_tflops": 40.335151847141546, "iter_time": 0.5114916534423828, "loss": 0.07445133477449417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.28718121819124, "step_time": 0.4658479709625244} +{"epoch": 0, "iter": 1945, "iter_tflops": 22.653747811728174, "iter_time": 0.9107143630981445, "loss": 0.9186732172966003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.61823974294527, "step_time": 0.8380409698486327} +{"epoch": 0, "iter": 1946, "iter_tflops": 22.46604873118094, "iter_time": 0.9183231887817384, "loss": 0.9931445717811584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.34265923052935, "step_time": 0.7545386619567871} +{"epoch": 0, "iter": 1947, "iter_tflops": 47.01836942946254, "iter_time": 0.43878794097900387, "loss": 0.9278748631477356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86241605135681, "step_time": 0.4056255111694336} +{"epoch": 0, "iter": 1948, "iter_tflops": 47.008906397045536, "iter_time": 0.43887627029418946, "loss": 1.0746917724609375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.465472595536376, "step_time": 0.4088160171508789} +{"epoch": 0, "iter": 1949, "iter_tflops": 42.48825246467243, "iter_time": 0.48557171249389647, "loss": 0.36191755533218384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.05509624850791, "step_time": 0.4479654846191406} +{"epoch": 0, "iter": 1950, "iter_tflops": 8.820434747811873, "iter_time": 2.3390109558105467, "loss": 0.39187178015708923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.185288947770887, "step_time": 1.8444846267700195} +{"epoch": 0, "iter": 1951, "iter_tflops": 10.348624819886883, "iter_time": 1.9936072540283203, "loss": 0.31947067379951477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.369465036394857, "step_time": 1.6679050750732423} +{"epoch": 0, "iter": 1952, "iter_tflops": 24.593538668492602, "iter_time": 0.8388826751708984, "loss": 0.3067009449005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.1572700242109, "step_time": 0.6040029983520507} +{"epoch": 0, "iter": 1953, "iter_tflops": 21.557796296107547, "iter_time": 0.7408629760742186, "loss": 0.39540809392929077, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 22.81627914722658, "step_time": 0.6999990234375} +{"epoch": 0, "iter": 1954, "iter_tflops": 6.9577071007065365, "iter_time": 2.2954937438964844, "loss": 0.40503746271133423, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 8.269545956008647, "step_time": 1.9313482513427733} +{"epoch": 0, "iter": 1955, "iter_tflops": 10.670373527360473, "iter_time": 1.4967960662841797, "loss": 0.46350327134132385, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 13.366991142748713, "step_time": 1.194836814880371} +{"epoch": 0, "iter": 1956, "iter_tflops": 25.23057057620543, "iter_time": 0.6330167236328125, "loss": 0.3481278419494629, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 27.1268659550626, "step_time": 0.5887658805847167} +{"epoch": 0, "iter": 1957, "iter_tflops": 15.236725255436406, "iter_time": 1.0831973266601564, "loss": 0.3689897954463959, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 16.242492426870403, "step_time": 1.0161236114501955} +{"epoch": 0, "iter": 1958, "iter_tflops": 16.48022665904296, "iter_time": 1.0014655990600585, "loss": 0.6457217335700989, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 21.327250007012633, "step_time": 0.7738634872436523} +{"epoch": 0, "iter": 1959, "iter_tflops": 29.724564904126797, "iter_time": 0.5552437896728516, "loss": 0.5288420915603638, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 31.602666195567707, "step_time": 0.5222464447021484} +{"epoch": 0, "iter": 1960, "iter_tflops": 30.402014678768825, "iter_time": 0.5428712615966796, "loss": 0.43367233872413635, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 32.25702155106734, "step_time": 0.5116523246765137} +{"epoch": 0, "iter": 1961, "iter_tflops": 25.60493786263654, "iter_time": 0.8057466735839844, "loss": 0.7871828675270081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.97396181464854, "step_time": 0.764852180480957} +{"epoch": 0, "iter": 1962, "iter_tflops": 14.125006412092413, "iter_time": 1.4606077270507811, "loss": 0.8276649713516235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.802297011067992, "step_time": 1.158900646209717} +{"epoch": 0, "iter": 1963, "iter_tflops": 39.2064143346709, "iter_time": 0.526217300415039, "loss": 0.9873127937316895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.36865098316934, "step_time": 0.4649925804138183} +{"epoch": 0, "iter": 1964, "iter_tflops": 43.263028481802394, "iter_time": 0.4768758506774903, "loss": 0.9187813997268677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.2239345803451, "step_time": 0.43687790298461915} +{"epoch": 0, "iter": 1965, "iter_tflops": 3.428067440170255, "iter_time": 0.6406755142211914, "loss": 0.23056744039058685, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 3.786822262525545, "step_time": 0.5799793910980224} +{"epoch": 0, "iter": 1966, "iter_tflops": 4.977025575117256, "iter_time": 0.4412834205627441, "loss": 0.17201383411884308, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 5.48107243513612, "step_time": 0.40070239830017096} +{"epoch": 0, "iter": 1967, "iter_tflops": 5.4471081437454085, "iter_time": 0.40320089340209964, "loss": 0.23410195112228394, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 5.9125069104098165, "step_time": 0.3714632225036621} +{"epoch": 0, "iter": 1968, "iter_tflops": 4.596861128130662, "iter_time": 0.477777946472168, "loss": 0.21044649183750153, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 4.989130836915839, "step_time": 0.4402127227783203} +{"epoch": 0, "iter": 1969, "iter_tflops": 19.794618933135837, "iter_time": 1.0422576751708985, "loss": 0.10229827463626862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.919040579765, "step_time": 0.9862351684570313} +{"epoch": 0, "iter": 1970, "iter_tflops": 19.401078847338162, "iter_time": 1.0633992919921875, "loss": 0.10085506737232208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.126208537015565, "step_time": 0.855132022857666} +{"epoch": 0, "iter": 1971, "iter_tflops": 35.56574210670459, "iter_time": 0.5800833129882812, "loss": 0.07079023867845535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.056670214634586, "step_time": 0.5282348289489746} +{"epoch": 0, "iter": 1972, "iter_tflops": 42.94348139034727, "iter_time": 0.4804243354797363, "loss": 0.10189011693000793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.22387128526331, "step_time": 0.4368784885406494} +{"epoch": 0, "iter": 1973, "iter_tflops": 20.211479558525834, "iter_time": 1.0207611694335939, "loss": 0.01837116666138172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.441918398002052, "step_time": 0.9621850585937499} +{"epoch": 0, "iter": 1974, "iter_tflops": 15.757064382719403, "iter_time": 1.3093234252929689, "loss": 0.003597117494791746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.069600608122155, "step_time": 0.934819522857666} +{"epoch": 0, "iter": 1975, "iter_tflops": 59.42199391768071, "iter_time": 0.3471962509155273, "loss": 0.024110568687319756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.55476539993181, "step_time": 0.31471538925170905} +{"epoch": 0, "iter": 1976, "iter_tflops": 59.48901426375722, "iter_time": 0.34680509948730465, "loss": 0.01185540296137333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.44192273836681, "step_time": 0.3152580585479736} +{"epoch": 0, "iter": 1977, "iter_tflops": 23.64650672135624, "iter_time": 0.656375602722168, "loss": 0.03894065320491791, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.220301402636977, "step_time": 0.615416519165039} +{"epoch": 0, "iter": 1978, "iter_tflops": 13.051342557151369, "iter_time": 1.1892255554199218, "loss": 0.0904015600681305, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.036902296623914, "step_time": 0.9678296852111816} +{"epoch": 0, "iter": 1979, "iter_tflops": 39.11700578538441, "iter_time": 0.3967836952209473, "loss": 0.05729640647768974, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 42.86125470032317, "step_time": 0.3621216926574707} +{"epoch": 0, "iter": 1980, "iter_tflops": 41.29079983438217, "iter_time": 0.37589463424682623, "loss": 0.08059854060411453, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 45.13708546323318, "step_time": 0.3438633651733398} +{"epoch": 0, "iter": 1981, "iter_tflops": 24.644324961573837, "iter_time": 0.8371539306640625, "loss": 0.156707301735878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.98365334538013, "step_time": 0.794002799987793} +{"epoch": 0, "iter": 1982, "iter_tflops": 23.971742524992578, "iter_time": 0.8606422119140626, "loss": 0.2572675049304962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.33130209073735, "step_time": 0.6801914882659912} +{"epoch": 0, "iter": 1983, "iter_tflops": 54.79914209847395, "iter_time": 0.3764857025146484, "loss": 0.18340927362442017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.53130726423306, "step_time": 0.34655871772766117} +{"epoch": 0, "iter": 1984, "iter_tflops": 46.55581418102425, "iter_time": 0.443147518157959, "loss": 0.21601036190986633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.13192298908576, "step_time": 0.4115360488891602} +{"epoch": 0, "iter": 1985, "iter_tflops": 22.58042135968475, "iter_time": 0.9136717681884766, "loss": 0.23436814546585083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.57048403290253, "step_time": 0.875293586730957} +{"epoch": 0, "iter": 1986, "iter_tflops": 12.548963323229332, "iter_time": 1.6440476379394529, "loss": 0.22331036627292633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.97804490538291, "step_time": 1.2912151412963868} +{"epoch": 0, "iter": 1987, "iter_tflops": 49.50503433471702, "iter_time": 0.41674738311767584, "loss": 0.14483891427516937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.819659800132115, "step_time": 0.38333749389648436} +{"epoch": 0, "iter": 1988, "iter_tflops": 45.63242626368661, "iter_time": 0.4521147613525391, "loss": 0.18078650534152985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51455514204725, "step_time": 0.41666724967956537} +{"epoch": 0, "iter": 1989, "iter_tflops": 39.26319744347691, "iter_time": 0.5254562759399415, "loss": 0.8163626194000244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30340090563381, "step_time": 0.48769349670410156} +{"epoch": 0, "iter": 1990, "iter_tflops": 25.07286502258526, "iter_time": 0.8228454742431641, "loss": 0.7382106184959412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57427802713743, "step_time": 0.6144910545349122} +{"epoch": 0, "iter": 1991, "iter_tflops": 42.95881574073047, "iter_time": 0.48025284576416016, "loss": 0.7709171175956726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.878323518058444, "step_time": 0.44009879112243655} +{"epoch": 0, "iter": 1992, "iter_tflops": 39.69997926681018, "iter_time": 0.5196751708984375, "loss": 0.6425160765647888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.458268553857586, "step_time": 0.4747334442138672} +{"epoch": 0, "iter": 1993, "iter_tflops": 36.05698616995174, "iter_time": 0.5721801986694336, "loss": 0.621588945388794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.18771714194195, "step_time": 0.5264683685302735} +{"epoch": 0, "iter": 1994, "iter_tflops": 9.891013747474048, "iter_time": 2.085842163085937, "loss": 0.469220370054245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.237413266196697, "step_time": 1.685903144836426} +{"epoch": 0, "iter": 1995, "iter_tflops": 12.284165315290604, "iter_time": 1.679486801147461, "loss": 0.46629005670547485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.166827703039202, "step_time": 1.456295928955078} +{"epoch": 0, "iter": 1996, "iter_tflops": 29.88327407931262, "iter_time": 0.6903893280029297, "loss": 0.45325762033462524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.19036099942707, "step_time": 0.6034184169769288} +{"epoch": 0, "iter": 1997, "iter_tflops": 22.879915701776273, "iter_time": 0.7070079116821288, "loss": 0.48497894406318665, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 24.80478862681453, "step_time": 0.6521434898376466} +{"epoch": 0, "iter": 1998, "iter_tflops": 24.786861901213143, "iter_time": 0.6526151428222657, "loss": 0.5523166060447693, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 26.951314218146166, "step_time": 0.6002038078308105} +{"epoch": 0, "iter": 1999, "iter_tflops": 24.085772082224914, "iter_time": 0.6716114959716797, "loss": 0.40540486574172974, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.952017284059977, "step_time": 0.6233149909973145} +{"epoch": 0, "iter": 2000, "iter_tflops": 25.65136986964455, "iter_time": 0.6306205673217774, "loss": 0.47186049818992615, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 27.546932998593164, "step_time": 0.5872262229919434} +{"epoch": 0, "iter": 2001, "iter_tflops": 37.55794735309226, "iter_time": 0.5493136596679687, "loss": 0.07508571445941925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8989049608688, "step_time": 0.4924017353057862} +{"epoch": 0, "iter": 2002, "iter_tflops": 40.93215930666195, "iter_time": 0.5040313987731934, "loss": 0.04437325522303581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.071895785851204, "step_time": 0.4577374248504639} +{"epoch": 0, "iter": 2003, "iter_tflops": 42.83329124140131, "iter_time": 0.4816602439880371, "loss": 0.05098368972539902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.235893384584216, "step_time": 0.436767297744751} +{"epoch": 0, "iter": 2004, "iter_tflops": 45.09951650137408, "iter_time": 0.45745708847045896, "loss": 0.0508866086602211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49421108316073, "step_time": 0.4168385162353515} +{"epoch": 0, "iter": 2005, "iter_tflops": 21.14274563161775, "iter_time": 0.9758001098632813, "loss": 0.13436390459537506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.723464244975993, "step_time": 0.9079202575683594} +{"epoch": 0, "iter": 2006, "iter_tflops": 17.637755003141514, "iter_time": 1.1697119903564452, "loss": 0.07780032604932785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.54545625052704, "step_time": 0.80762282371521} +{"epoch": 0, "iter": 2007, "iter_tflops": 48.421226493945696, "iter_time": 0.42607540130615235, "loss": 0.09817986935377121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.763251953783694, "step_time": 0.3910125465393066} +{"epoch": 0, "iter": 2008, "iter_tflops": 53.31901787048621, "iter_time": 0.3869368629455566, "loss": 0.14140965044498444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.856223719660676, "step_time": 0.3565924663543701} +{"epoch": 0, "iter": 2009, "iter_tflops": 33.007042410200704, "iter_time": 0.6250512619018554, "loss": 0.14636290073394775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.02580243820063, "step_time": 0.589025577545166} +{"epoch": 0, "iter": 2010, "iter_tflops": 15.540951288342217, "iter_time": 1.3275309295654298, "loss": 0.15524323284626007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.792248443668676, "step_time": 1.0978512535095215} +{"epoch": 0, "iter": 2011, "iter_tflops": 43.613841268668885, "iter_time": 0.4730400466918946, "loss": 0.08460311591625214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92266849130017, "step_time": 0.43050802803039556} +{"epoch": 0, "iter": 2012, "iter_tflops": 46.16237575340583, "iter_time": 0.446924430847168, "loss": 0.08839255571365356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28351808093049, "step_time": 0.41029534721374517} +{"epoch": 0, "iter": 2013, "iter_tflops": 19.74298715390742, "iter_time": 1.044983383178711, "loss": 0.027655532583594322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.717788076878644, "step_time": 0.9958154525756836} +{"epoch": 0, "iter": 2014, "iter_tflops": 14.574991029746636, "iter_time": 1.4155132904052736, "loss": 0.047907184809446335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6020494104767, "step_time": 1.109076374053955} +{"epoch": 0, "iter": 2015, "iter_tflops": 37.79817168450716, "iter_time": 0.5458225250244141, "loss": 0.0427878275513649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.663373565594526, "step_time": 0.495185380935669} +{"epoch": 0, "iter": 2016, "iter_tflops": 45.10216765345935, "iter_time": 0.4574301986694336, "loss": 0.06893862783908844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62870129822588, "step_time": 0.4157089138031006} +{"epoch": 0, "iter": 2017, "iter_tflops": 23.55927101195807, "iter_time": 0.8757101821899415, "loss": 0.8521972894668579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.165764637490827, "step_time": 0.819807933807373} +{"epoch": 0, "iter": 2018, "iter_tflops": 15.364883887679312, "iter_time": 1.342743209838867, "loss": 0.9148197770118713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.102432302719464, "step_time": 1.139686267852783} +{"epoch": 0, "iter": 2019, "iter_tflops": 49.518164955005815, "iter_time": 0.41663687515258785, "loss": 1.1533256769180298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.749428519713035, "step_time": 0.38383837890624994} +{"epoch": 0, "iter": 2020, "iter_tflops": 45.409913069999426, "iter_time": 0.4543301696777344, "loss": 0.9991825222969055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84370997869184, "step_time": 0.4223899765014648} +{"epoch": 0, "iter": 2021, "iter_tflops": 41.1946005572994, "iter_time": 0.5008203315734863, "loss": 0.41036903858184814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65633084478231, "step_time": 0.46199705886840825} +{"epoch": 0, "iter": 2022, "iter_tflops": 44.161887367022416, "iter_time": 0.4671696510314941, "loss": 0.35796165466308594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.154080106041654, "step_time": 0.42843915748596195} +{"epoch": 0, "iter": 2023, "iter_tflops": 47.94981127761425, "iter_time": 0.43026433181762697, "loss": 0.30356714129447937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74749322481252, "step_time": 0.3986877861022949} +{"epoch": 0, "iter": 2024, "iter_tflops": 49.63298959234151, "iter_time": 0.4156729965209961, "loss": 0.2790538966655731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6413088536806, "step_time": 0.38461204528808596} +{"epoch": 0, "iter": 2025, "iter_tflops": 26.279446922034666, "iter_time": 0.7850657424926758, "loss": 1.3175586462020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.589587551418894, "step_time": 0.7477854995727539} +{"epoch": 0, "iter": 2026, "iter_tflops": 15.698965391848061, "iter_time": 1.3141689910888672, "loss": 1.0667543411254883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.424857280713447, "step_time": 1.1197423782348632} +{"epoch": 0, "iter": 2027, "iter_tflops": 45.175934964798266, "iter_time": 0.4566832656860351, "loss": 1.1835271120071411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57077282218796, "step_time": 0.42476354217529294} +{"epoch": 0, "iter": 2028, "iter_tflops": 46.59997784152186, "iter_time": 0.44272753906250006, "loss": 0.859620988368988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.01247119398638, "step_time": 0.4125189781188965} +{"epoch": 0, "iter": 2029, "iter_tflops": 26.586488677702086, "iter_time": 0.7759991836547853, "loss": 0.7063320279121399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.929029277289185, "step_time": 0.7386971206665038} +{"epoch": 0, "iter": 2030, "iter_tflops": 17.077007939746526, "iter_time": 1.20812109375, "loss": 0.6478508114814758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.246608375017445, "step_time": 1.0189901008605957} +{"epoch": 0, "iter": 2031, "iter_tflops": 45.20039941438823, "iter_time": 0.45643608856201173, "loss": 0.575625479221344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93753310530449, "step_time": 0.4215801696777344} +{"epoch": 0, "iter": 2032, "iter_tflops": 47.17185870711051, "iter_time": 0.4373601989746093, "loss": 0.5812128782272339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.884546020531054, "step_time": 0.4054491024017334} +{"epoch": 0, "iter": 2033, "iter_tflops": 43.397538479459314, "iter_time": 0.47539778137207034, "loss": 0.1377287656068802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45463595458957, "step_time": 0.43475401496887206} +{"epoch": 0, "iter": 2034, "iter_tflops": 48.699490398233294, "iter_time": 0.42364085006713864, "loss": 0.1878417730331421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.007628059883785, "step_time": 0.3892098979949951} +{"epoch": 0, "iter": 2035, "iter_tflops": 49.890678918542335, "iter_time": 0.41352601242065423, "loss": 0.11743547767400742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.084459088402156, "step_time": 0.3814606609344482} +{"epoch": 0, "iter": 2036, "iter_tflops": 56.52259042465258, "iter_time": 0.3650061569213867, "loss": 0.16264638304710388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.26118789565586, "step_time": 0.3367726650238037} +{"epoch": 0, "iter": 2037, "iter_tflops": 38.35413795888101, "iter_time": 0.5379104995727539, "loss": 0.1016143187880516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93350724655541, "step_time": 0.5040148010253906} +{"epoch": 0, "iter": 2038, "iter_tflops": 13.880238885022294, "iter_time": 1.4863644409179686, "loss": 0.10759188234806061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.285562776576413, "step_time": 1.0697688083648682} +{"epoch": 0, "iter": 2039, "iter_tflops": 43.478315282020326, "iter_time": 0.4745145568847656, "loss": 0.08748966455459595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86259124679881, "step_time": 0.43104840278625484} +{"epoch": 0, "iter": 2040, "iter_tflops": 48.23703086106887, "iter_time": 0.42770239257812503, "loss": 0.09985391050577164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.692564819993116, "step_time": 0.3915370903015137} +{"epoch": 0, "iter": 2041, "iter_tflops": 16.500397393720135, "iter_time": 1.200090850830078, "loss": 0.13540618121623993, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 17.76933848217803, "step_time": 1.1143901596069337} +{"epoch": 0, "iter": 2042, "iter_tflops": 17.892435919432334, "iter_time": 1.1067233123779296, "loss": 0.12344817072153091, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 21.869159445123064, "step_time": 0.9054749450683595} +{"epoch": 0, "iter": 2043, "iter_tflops": 48.06926073112172, "iter_time": 0.4119467544555664, "loss": 0.21239492297172546, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 52.18520701969468, "step_time": 0.37945573234558105} +{"epoch": 0, "iter": 2044, "iter_tflops": 46.25120061576541, "iter_time": 0.4281397171020508, "loss": 0.15665653347969055, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 50.01607401212307, "step_time": 0.3959122409820557} +{"epoch": 0, "iter": 2045, "iter_tflops": 23.56073364674657, "iter_time": 0.8246555099487305, "loss": 0.13155372440814972, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 24.73442423461223, "step_time": 0.7855242004394531} +{"epoch": 0, "iter": 2046, "iter_tflops": 12.65871464614236, "iter_time": 1.534870590209961, "loss": 0.168847918510437, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 14.441586462305558, "step_time": 1.3453846549987794} +{"epoch": 0, "iter": 2047, "iter_tflops": 39.794586091946165, "iter_time": 0.4882445259094238, "loss": 0.25122979283332825, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 43.75278743422548, "step_time": 0.44407430839538575} +{"epoch": 0, "iter": 2048, "iter_tflops": 42.092438465572755, "iter_time": 0.46159095382690435, "loss": 0.1715877652168274, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 45.89793113556423, "step_time": 0.42331949043273925} +{"epoch": 0, "iter": 2049, "iter_tflops": 30.22053345478349, "iter_time": 0.6826846237182618, "loss": 1.1363605260849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.76441049436275, "step_time": 0.6296799850463867} +{"epoch": 0, "iter": 2050, "iter_tflops": 36.30730082420518, "iter_time": 0.5682353973388672, "loss": 1.0452311038970947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.477222299673414, "step_time": 0.5226075267791748} +{"epoch": 0, "iter": 2051, "iter_tflops": 36.76014358400284, "iter_time": 0.5612353897094727, "loss": 1.0115586519241333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.725949177400416, "step_time": 0.5193354454040527} +{"epoch": 0, "iter": 2052, "iter_tflops": 36.11356071515338, "iter_time": 0.5712838363647461, "loss": 1.1832247972488403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.973190887112395, "step_time": 0.5293662910461425} +{"epoch": 0, "iter": 2053, "iter_tflops": 30.97544874178038, "iter_time": 0.6660466384887695, "loss": 0.0051706330850720406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.885663405658626, "step_time": 0.6088443145751954} +{"epoch": 0, "iter": 2054, "iter_tflops": 43.66655223269805, "iter_time": 0.4724690284729004, "loss": 0.006645783316344023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.1904374426564, "step_time": 0.4281159210205078} +{"epoch": 0, "iter": 2055, "iter_tflops": 51.6125664101633, "iter_time": 0.39973004531860346, "loss": 0.009620453231036663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.981979854440354, "step_time": 0.3620634727478027} +{"epoch": 0, "iter": 2056, "iter_tflops": 52.23485593049671, "iter_time": 0.39496794128417967, "loss": 0.0076979356817901134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.42433704157463, "step_time": 0.35927438735961914} +{"epoch": 0, "iter": 2057, "iter_tflops": 17.492747395367587, "iter_time": 1.179408416748047, "loss": 0.2954115867614746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.68474590206795, "step_time": 1.1041677322387695} +{"epoch": 0, "iter": 2058, "iter_tflops": 16.545741496976685, "iter_time": 1.24691259765625, "loss": 0.3793509304523468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54864495689092, "step_time": 1.0040123596191406} +{"epoch": 0, "iter": 2059, "iter_tflops": 48.61990437924285, "iter_time": 0.4243343086242675, "loss": 0.3834628760814667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.489960182113464, "step_time": 0.39304837417602545} +{"epoch": 0, "iter": 2060, "iter_tflops": 45.72353061053167, "iter_time": 0.45121392059326176, "loss": 0.36270588636398315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30992203584335, "step_time": 0.41839639282226565} +{"epoch": 0, "iter": 2061, "iter_tflops": 51.100081811297784, "iter_time": 0.4037389526367187, "loss": 0.022764958441257477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.75579537845509, "step_time": 0.3635063762664794} +{"epoch": 0, "iter": 2062, "iter_tflops": 53.25980121141399, "iter_time": 0.3873670768737793, "loss": 0.03532256558537483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.44515516257231, "step_time": 0.3529992084503174} +{"epoch": 0, "iter": 2063, "iter_tflops": 55.41008801452993, "iter_time": 0.3723346099853516, "loss": 0.06298395246267319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.605149342704856, "step_time": 0.3404181613922119} +{"epoch": 0, "iter": 2064, "iter_tflops": 54.46679649094882, "iter_time": 0.3787829437255859, "loss": 0.09839709103107452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.30950447106274, "step_time": 0.3478547611236572} +{"epoch": 0, "iter": 2065, "iter_tflops": 46.79997817908762, "iter_time": 0.44083553695678707, "loss": 0.10324086993932724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.46226714897078, "step_time": 0.40089748573303224} +{"epoch": 0, "iter": 2066, "iter_tflops": 13.406495560517056, "iter_time": 1.5388878784179687, "loss": 0.1145571693778038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.305426108806596, "step_time": 1.4421865768432618} +{"epoch": 0, "iter": 2067, "iter_tflops": 16.0372486450334, "iter_time": 1.2864484405517578, "loss": 0.10284978151321411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.854522686419624, "step_time": 1.0942251815795898} +{"epoch": 0, "iter": 2068, "iter_tflops": 14.83417966886083, "iter_time": 1.3907808837890625, "loss": 0.07666981220245361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.136540173918544, "step_time": 1.1375429553985597} +{"epoch": 0, "iter": 2069, "iter_tflops": 19.22738305335039, "iter_time": 0.8029778976440431, "loss": 0.4158860146999359, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 20.3056304557115, "step_time": 0.7603390426635742} +{"epoch": 0, "iter": 2070, "iter_tflops": 7.202749270920078, "iter_time": 2.143509796142578, "loss": 0.4027915894985199, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.538746222919274, "step_time": 1.6185736846923828} +{"epoch": 0, "iter": 2071, "iter_tflops": 8.35343308885173, "iter_time": 1.848241729736328, "loss": 0.5274191498756409, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.642866240304453, "step_time": 1.601096939086914} +{"epoch": 0, "iter": 2072, "iter_tflops": 22.933704692903834, "iter_time": 0.6732084426879883, "loss": 0.48788124322891235, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.027533043343453, "step_time": 0.6168871536254883} +{"epoch": 0, "iter": 2073, "iter_tflops": 21.67432391413777, "iter_time": 0.7501169509887695, "loss": 0.5323835015296936, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 22.98811238801106, "step_time": 0.7072471847534181} +{"epoch": 0, "iter": 2074, "iter_tflops": 11.698140632196338, "iter_time": 1.3898172607421875, "loss": 0.4274429678916931, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 14.672993540418565, "step_time": 1.1080409545898438} +{"epoch": 0, "iter": 2075, "iter_tflops": 24.72215092194763, "iter_time": 0.6576400985717774, "loss": 0.6976183652877808, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.5273304622847, "step_time": 0.6128878211975097} +{"epoch": 0, "iter": 2076, "iter_tflops": 24.251905946875265, "iter_time": 0.6703917541503905, "loss": 0.7135074138641357, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.15322403933538, "step_time": 0.6216548194885253} +{"epoch": 0, "iter": 2077, "iter_tflops": 38.891645784654685, "iter_time": 0.5304762268066406, "loss": 0.002783143660053611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.966774611094394, "step_time": 0.48016388702392576} +{"epoch": 0, "iter": 2078, "iter_tflops": 12.847828185289263, "iter_time": 1.605803970336914, "loss": 0.018640799447894096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.273757259581114, "step_time": 1.2677523193359375} +{"epoch": 0, "iter": 2079, "iter_tflops": 11.128180543086863, "iter_time": 1.8539502868652342, "loss": 0.008075818419456482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.87243166730544, "step_time": 1.4872009468078613} +{"epoch": 0, "iter": 2080, "iter_tflops": 26.20523070056441, "iter_time": 0.7872891387939452, "loss": 0.007067873142659664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.14523515776723, "step_time": 0.5554169578552246} +{"epoch": 0, "iter": 2081, "iter_tflops": 16.154088725413356, "iter_time": 1.17972509765625, "loss": 0.42197635769844055, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 17.202565849609766, "step_time": 1.1078221740722656} +{"epoch": 0, "iter": 2082, "iter_tflops": 16.306292459209345, "iter_time": 1.1687134857177734, "loss": 0.6185308694839478, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 19.361454919373845, "step_time": 0.9842950325012207} +{"epoch": 0, "iter": 2083, "iter_tflops": 29.491707663028112, "iter_time": 0.6461946563720702, "loss": 0.38609156012535095, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 31.55759536229305, "step_time": 0.6038921432495117} +{"epoch": 0, "iter": 2084, "iter_tflops": 29.461546962305192, "iter_time": 0.6468561859130859, "loss": 0.5177797675132751, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 31.65797509172003, "step_time": 0.6019773483276368} +{"epoch": 0, "iter": 2085, "iter_tflops": 32.776018832712715, "iter_time": 0.6294569702148438, "loss": 0.41873571276664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.325553281075706, "step_time": 0.5679498767852783} +{"epoch": 0, "iter": 2086, "iter_tflops": 38.26714743876477, "iter_time": 0.53913330078125, "loss": 0.307548463344574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7260009286687, "step_time": 0.4828697528839111} +{"epoch": 0, "iter": 2087, "iter_tflops": 40.60559981221006, "iter_time": 0.5080849342346191, "loss": 0.4089865982532501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.337475625437264, "step_time": 0.4653195343017579} +{"epoch": 0, "iter": 2088, "iter_tflops": 40.29833545916578, "iter_time": 0.5119589500427246, "loss": 0.33296722173690796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03862632761535, "step_time": 0.4684772262573242} +{"epoch": 0, "iter": 2089, "iter_tflops": 17.742237790570133, "iter_time": 1.1628236389160156, "loss": 0.20887911319732666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.048379696920556, "step_time": 1.0830891571044923} +{"epoch": 0, "iter": 2090, "iter_tflops": 23.5933258202795, "iter_time": 0.874446174621582, "loss": 0.3439483642578125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.01543366031452, "step_time": 0.6873495063781738} +{"epoch": 0, "iter": 2091, "iter_tflops": 48.81194366402638, "iter_time": 0.42266486358642574, "loss": 0.21865898370742798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.595914367042745, "step_time": 0.3922565803527832} +{"epoch": 0, "iter": 2092, "iter_tflops": 54.608240191913175, "iter_time": 0.37780183792114264, "loss": 0.19802772998809814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.9290443285231, "step_time": 0.35010059547424316} +{"epoch": 0, "iter": 2093, "iter_tflops": 43.040830491601064, "iter_time": 0.47933771896362304, "loss": 0.16076971590518951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.74482499766876, "step_time": 0.4413556690216065} +{"epoch": 0, "iter": 2094, "iter_tflops": 9.181435614668022, "iter_time": 2.247044403076172, "loss": 0.164119154214859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.13734149690008, "step_time": 1.8524253311157226} +{"epoch": 0, "iter": 2095, "iter_tflops": 14.584352224237197, "iter_time": 1.414604721069336, "loss": 0.19707773625850677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.56420594692845, "step_time": 1.1113372459411621} +{"epoch": 0, "iter": 2096, "iter_tflops": 33.61655678085156, "iter_time": 0.6137182235717774, "loss": 0.18277260661125183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.611524715380526, "step_time": 0.48416698646545414} +{"epoch": 0, "iter": 2097, "iter_tflops": 16.20809769259321, "iter_time": 1.1579541015625, "loss": 0.5113424062728882, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 16.85304657072592, "step_time": 1.1136403808593751} +{"epoch": 0, "iter": 2098, "iter_tflops": 14.555059248260655, "iter_time": 1.289464569091797, "loss": 0.4236653447151184, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 19.125714967622724, "step_time": 0.9813088417053223} +{"epoch": 0, "iter": 2099, "iter_tflops": 34.33865025611859, "iter_time": 0.5465629272460937, "loss": 0.5957684516906738, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 36.57950239223503, "step_time": 0.5130806045532226} +{"epoch": 0, "iter": 2100, "iter_tflops": 34.94237718614302, "iter_time": 0.5371195297241211, "loss": 0.27847468852996826, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 37.03524843103895, "step_time": 0.5067667694091796} +{"epoch": 0, "iter": 2101, "iter_tflops": 27.471540672089958, "iter_time": 0.7509987792968749, "loss": 0.7987545132637024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.136262898070513, "step_time": 0.708089900970459} +{"epoch": 0, "iter": 2102, "iter_tflops": 13.96184938172461, "iter_time": 1.4776762695312498, "loss": 0.9449533224105835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.650790719049972, "step_time": 1.1688481178283692} +{"epoch": 0, "iter": 2103, "iter_tflops": 39.304152796183125, "iter_time": 0.5249087448120118, "loss": 0.7249901294708252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00930107540237, "step_time": 0.47968911361694333} +{"epoch": 0, "iter": 2104, "iter_tflops": 39.15193386841111, "iter_time": 0.5269495391845703, "loss": 0.6037291884422302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88803988479885, "step_time": 0.4810453815460205} +{"epoch": 0, "iter": 2105, "iter_tflops": 19.417233746550487, "iter_time": 1.0625145568847656, "loss": 0.8539814352989197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.048946460757072, "step_time": 0.980148509979248} +{"epoch": 0, "iter": 2106, "iter_tflops": 16.387507487491504, "iter_time": 1.2589524993896486, "loss": 0.7905144095420837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.532607287107187, "step_time": 1.0562385864257813} +{"epoch": 0, "iter": 2107, "iter_tflops": 35.31693717837673, "iter_time": 0.5841699523925782, "loss": 0.7445231676101685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61688240463919, "step_time": 0.5342506237030029} +{"epoch": 0, "iter": 2108, "iter_tflops": 38.9078758237309, "iter_time": 0.5302549438476563, "loss": 1.0399467945098877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61818733967776, "step_time": 0.4840912952423096} +{"epoch": 0, "iter": 2109, "iter_tflops": 21.779102553423307, "iter_time": 0.9472885055541992, "loss": 0.17402392625808716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.714490679413355, "step_time": 0.8699783515930175} +{"epoch": 0, "iter": 2110, "iter_tflops": 19.1410503874621, "iter_time": 1.0778454208374022, "loss": 0.22493167221546173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.526367553110465, "step_time": 0.8082267665863038} +{"epoch": 0, "iter": 2111, "iter_tflops": 39.9659642473641, "iter_time": 0.5162165832519531, "loss": 0.1632789820432663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.06961484885787, "step_time": 0.4681478061676025} +{"epoch": 0, "iter": 2112, "iter_tflops": 42.549651594215376, "iter_time": 0.4848710327148438, "loss": 0.18540272116661072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4188201093015, "step_time": 0.4444553623199463} +{"epoch": 0, "iter": 2113, "iter_tflops": 31.443008450517215, "iter_time": 0.656142478942871, "loss": 0.4807376265525818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.69299590654813, "step_time": 0.5946760425567628} +{"epoch": 0, "iter": 2114, "iter_tflops": 35.814567562310685, "iter_time": 0.5760531234741211, "loss": 0.4374013841152191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.101225593321516, "step_time": 0.5276329116821289} +{"epoch": 0, "iter": 2115, "iter_tflops": 37.915735716044125, "iter_time": 0.5441301116943359, "loss": 0.49705398082733154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.600028400838724, "step_time": 0.49593940925598146} +{"epoch": 0, "iter": 2116, "iter_tflops": 43.81388626952972, "iter_time": 0.4708802452087402, "loss": 0.5095658302307129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0869762074011, "step_time": 0.42903703117370606} +{"epoch": 0, "iter": 2117, "iter_tflops": 18.028511114255515, "iter_time": 1.1443592529296875, "loss": 1.4302045106887817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.933438701083215, "step_time": 1.0896643676757813} +{"epoch": 0, "iter": 2118, "iter_tflops": 16.6936180460459, "iter_time": 1.2358671112060546, "loss": 1.2899211645126343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.35849654409704, "step_time": 0.922740644454956} +{"epoch": 0, "iter": 2119, "iter_tflops": 48.161691833798685, "iter_time": 0.42837144470214844, "loss": 1.1272608041763306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.965012564415076, "step_time": 0.39701892662048344} +{"epoch": 0, "iter": 2120, "iter_tflops": 48.14272584267984, "iter_time": 0.4285402030944825, "loss": 1.4528017044067383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85426884223203, "step_time": 0.39786682891845704} +{"epoch": 0, "iter": 2121, "iter_tflops": 24.985366895756858, "iter_time": 0.8257270584106445, "loss": 0.5813966989517212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.205020941676388, "step_time": 0.7872954406738282} +{"epoch": 0, "iter": 2122, "iter_tflops": 46.92531319483709, "iter_time": 0.439658088684082, "loss": 0.57129967212677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.91821370109884, "step_time": 0.4051810150146484} +{"epoch": 0, "iter": 2123, "iter_tflops": 47.61079097159753, "iter_time": 0.43332809829711916, "loss": 0.6060967445373535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3875682222286, "step_time": 0.40148024559020995} +{"epoch": 0, "iter": 2124, "iter_tflops": 45.64722705294979, "iter_time": 0.4519681663513183, "loss": 0.5587829947471619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.970166822668645, "step_time": 0.42129922866821284} +{"epoch": 0, "iter": 2125, "iter_tflops": 39.51943967275134, "iter_time": 0.416588680267334, "loss": 0.006951755378395319, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 43.50403500364897, "step_time": 0.3784327411651611} +{"epoch": 0, "iter": 2126, "iter_tflops": 34.97063481559371, "iter_time": 0.4707764472961426, "loss": 0.003449696581810713, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 39.01703889030617, "step_time": 0.42195286178588864} +{"epoch": 0, "iter": 2127, "iter_tflops": 36.77394778386689, "iter_time": 0.44769061279296873, "loss": 0.004023646470159292, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 40.83808210757076, "step_time": 0.40313722801208496} +{"epoch": 0, "iter": 2128, "iter_tflops": 39.56137342574724, "iter_time": 0.41614710998535154, "loss": 0.007151658646762371, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 43.64058422864934, "step_time": 0.377248643875122} +{"epoch": 0, "iter": 2129, "iter_tflops": 25.289241241047517, "iter_time": 0.815805160522461, "loss": 0.6230607628822327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.721963714423403, "step_time": 0.7442147216796875} +{"epoch": 0, "iter": 2130, "iter_tflops": 21.50273432143357, "iter_time": 0.9594637222290039, "loss": 0.7012397050857544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.437804147966304, "step_time": 0.780363353729248} +{"epoch": 0, "iter": 2131, "iter_tflops": 38.580632879305064, "iter_time": 0.5347525939941405, "loss": 0.7053064703941345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56617838897987, "step_time": 0.49634328460693355} +{"epoch": 0, "iter": 2132, "iter_tflops": 45.36598307846795, "iter_time": 0.4547701187133789, "loss": 0.5884779691696167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51786413604692, "step_time": 0.41663940620422363} +{"epoch": 0, "iter": 2133, "iter_tflops": 18.253798123554837, "iter_time": 1.1302356567382814, "loss": 0.9016060829162598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.4090119551033, "step_time": 1.0629646453857422} +{"epoch": 0, "iter": 2134, "iter_tflops": 18.620616182015727, "iter_time": 1.1079705047607422, "loss": 1.0894653797149658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.367788956824402, "step_time": 0.9223573036193848} +{"epoch": 0, "iter": 2135, "iter_tflops": 46.39122996034006, "iter_time": 0.4447196922302246, "loss": 0.9019816517829895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.09785322614824, "step_time": 0.41181591987609856} +{"epoch": 0, "iter": 2136, "iter_tflops": 45.35522020129082, "iter_time": 0.4548780364990234, "loss": 1.0504100322723389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85793577617462, "step_time": 0.42226699066162104} +{"epoch": 0, "iter": 2137, "iter_tflops": 34.80494419702988, "iter_time": 0.5927632980346679, "loss": 1.0305500030517578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.331139326609176, "step_time": 0.5526510543823242} +{"epoch": 0, "iter": 2138, "iter_tflops": 42.79207283748052, "iter_time": 0.4821241912841797, "loss": 1.0735422372817993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75057675505128, "step_time": 0.441301368713379} +{"epoch": 0, "iter": 2139, "iter_tflops": 46.47212804020612, "iter_time": 0.44394552993774417, "loss": 0.9643415212631226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.000709122687034, "step_time": 0.41261601829528805} +{"epoch": 0, "iter": 2140, "iter_tflops": 41.47608637961151, "iter_time": 0.4974214134216309, "loss": 0.8912987112998962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.56830210752015, "step_time": 0.46290956878662115} +{"epoch": 0, "iter": 2141, "iter_tflops": 25.844772090171013, "iter_time": 0.7982695083618164, "loss": 0.5129699110984802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.299892186509503, "step_time": 0.7557206954956055} +{"epoch": 0, "iter": 2142, "iter_tflops": 20.78284624713985, "iter_time": 0.9926981735229492, "loss": 0.4188596308231354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.240558801593668, "step_time": 0.8173786354064941} +{"epoch": 0, "iter": 2143, "iter_tflops": 40.41744146350732, "iter_time": 0.5104502601623535, "loss": 0.5277597904205322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25926295468786, "step_time": 0.4661418228149414} +{"epoch": 0, "iter": 2144, "iter_tflops": 43.28949699566422, "iter_time": 0.4765842742919922, "loss": 0.5437898635864258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44787961195905, "step_time": 0.43481592178344725} +{"epoch": 0, "iter": 2145, "iter_tflops": 20.522820880474615, "iter_time": 1.0052757186889647, "loss": 0.9778871536254883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.82048736992382, "step_time": 0.9454918746948242} +{"epoch": 0, "iter": 2146, "iter_tflops": 16.17261251142749, "iter_time": 1.275680938720703, "loss": 1.1213229894638062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.09878236769227, "step_time": 0.9778333721160888} +{"epoch": 0, "iter": 2147, "iter_tflops": 38.08595998918213, "iter_time": 0.5416981353759766, "loss": 0.7749113440513611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.424226215784486, "step_time": 0.4980441493988037} +{"epoch": 0, "iter": 2148, "iter_tflops": 38.58231893725526, "iter_time": 0.5347292251586914, "loss": 1.0367995500564575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79152906457857, "step_time": 0.4936668739318848} +{"epoch": 0, "iter": 2149, "iter_tflops": 20.447613820901744, "iter_time": 0.8634309921264648, "loss": 0.17553439736366272, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 22.117707330834744, "step_time": 0.79823388671875} +{"epoch": 0, "iter": 2150, "iter_tflops": 16.215486890771633, "iter_time": 1.0887803497314452, "loss": 0.22440701723098755, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 19.868143313991496, "step_time": 0.8886136569976808} +{"epoch": 0, "iter": 2151, "iter_tflops": 44.574404463907996, "iter_time": 0.39608164596557616, "loss": 0.13973158597946167, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 48.53476543747761, "step_time": 0.36376200294494626} +{"epoch": 0, "iter": 2152, "iter_tflops": 41.217509184796334, "iter_time": 0.4283398933410644, "loss": 0.17452551424503326, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 44.96188616346623, "step_time": 0.392668212890625} +{"epoch": 0, "iter": 2153, "iter_tflops": 35.20482595159961, "iter_time": 0.5860302658081055, "loss": 0.6150850057601929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.79253228532288, "step_time": 0.5459039726257324} +{"epoch": 0, "iter": 2154, "iter_tflops": 19.17376153640429, "iter_time": 1.076006576538086, "loss": 0.7882512211799622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.037705883811693, "step_time": 0.8955359363555908} +{"epoch": 0, "iter": 2155, "iter_tflops": 36.97075738986342, "iter_time": 0.5580381622314453, "loss": 0.5676596760749817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7936360239475, "step_time": 0.5184520835876465} +{"epoch": 0, "iter": 2156, "iter_tflops": 41.90720733712459, "iter_time": 0.49230418395996095, "loss": 0.7554191946983337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59737447988913, "step_time": 0.45246231269836423} +{"epoch": 0, "iter": 2157, "iter_tflops": 26.46501503961015, "iter_time": 0.7795609970092774, "loss": 0.5639617443084717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.305023797293615, "step_time": 0.7288845138549804} +{"epoch": 0, "iter": 2158, "iter_tflops": 10.490181481444166, "iter_time": 1.966705108642578, "loss": 0.5119621157646179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.342420622864005, "step_time": 1.54627815246582} +{"epoch": 0, "iter": 2159, "iter_tflops": 13.104728279526503, "iter_time": 1.5743244018554687, "loss": 0.5006077885627747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.7852128311418, "step_time": 1.3953869819641112} +{"epoch": 0, "iter": 2160, "iter_tflops": 39.941793695779346, "iter_time": 0.5165289688110352, "loss": 0.5904937386512756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71675532220314, "step_time": 0.47192645835876473} +{"epoch": 0, "iter": 2161, "iter_tflops": 13.495845977880627, "iter_time": 1.2077259216308593, "loss": 0.42688778042793274, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.548492115764446, "step_time": 1.1203417434692382} +{"epoch": 0, "iter": 2162, "iter_tflops": 16.635721294631182, "iter_time": 0.9797761535644532, "loss": 0.329669326543808, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 18.68258357021732, "step_time": 0.8724319610595703} +{"epoch": 0, "iter": 2163, "iter_tflops": 27.626976961068415, "iter_time": 0.5899770736694336, "loss": 0.5774446129798889, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.293538919379326, "step_time": 0.5564122200012206} +{"epoch": 0, "iter": 2164, "iter_tflops": 29.307032881634576, "iter_time": 0.5561560287475587, "loss": 0.4714067280292511, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 31.02732706971327, "step_time": 0.5253202438354492} +{"epoch": 0, "iter": 2165, "iter_tflops": 25.997927138485675, "iter_time": 0.7935668640136718, "loss": 0.032708294689655304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.41413066601921, "step_time": 0.7525715026855468} +{"epoch": 0, "iter": 2166, "iter_tflops": 13.212904234276197, "iter_time": 1.5614351806640625, "loss": 0.027506578713655472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.687709989024093, "step_time": 1.2363046531677249} +{"epoch": 0, "iter": 2167, "iter_tflops": 40.95401704074182, "iter_time": 0.5037623901367188, "loss": 0.12624743580818176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.012074684508704, "step_time": 0.4583457584381103} +{"epoch": 0, "iter": 2168, "iter_tflops": 43.641619182412974, "iter_time": 0.472738956451416, "loss": 0.09359632432460785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.31535505142071, "step_time": 0.4270090427398681} +{"epoch": 0, "iter": 2169, "iter_tflops": 21.406513225184188, "iter_time": 0.9637764587402343, "loss": 0.10241147130727768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.71696620691364, "step_time": 0.9081799621582032} +{"epoch": 0, "iter": 2170, "iter_tflops": 16.978771342796684, "iter_time": 1.215111099243164, "loss": 0.11939799040555954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.053433617766483, "step_time": 1.0288060340881346} +{"epoch": 0, "iter": 2171, "iter_tflops": 47.06616941139413, "iter_time": 0.4383423118591308, "loss": 0.0915812999010086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.5213456982374, "step_time": 0.40043778419494624} +{"epoch": 0, "iter": 2172, "iter_tflops": 51.72823599901205, "iter_time": 0.39883620834350586, "loss": 0.11528453975915909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.634459318609856, "step_time": 0.36428516769409175} +{"epoch": 0, "iter": 2173, "iter_tflops": 42.99412548628043, "iter_time": 0.4798584289550781, "loss": 0.38737428188323975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.038900402227625, "step_time": 0.4385964241027832} +{"epoch": 0, "iter": 2174, "iter_tflops": 37.67098808938884, "iter_time": 0.547665313720703, "loss": 0.3986978232860565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39359649880312, "step_time": 0.49841268348693857} +{"epoch": 0, "iter": 2175, "iter_tflops": 36.82208248331846, "iter_time": 0.5602913284301758, "loss": 0.24250097572803497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.35193219031298, "step_time": 0.5112789497375488} +{"epoch": 0, "iter": 2176, "iter_tflops": 41.60433275945999, "iter_time": 0.4958880996704102, "loss": 0.2795124351978302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.232449277682484, "step_time": 0.4561126766204834} +{"epoch": 0, "iter": 2177, "iter_tflops": 34.574798199827555, "iter_time": 0.5967090072631837, "loss": 0.14469078183174133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.38534452583627, "step_time": 0.5374731884002685} +{"epoch": 0, "iter": 2178, "iter_tflops": 49.07097863041818, "iter_time": 0.42043370819091797, "loss": 0.152792289853096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.941462805758334, "step_time": 0.3824718952178955} +{"epoch": 0, "iter": 2179, "iter_tflops": 52.38021645903274, "iter_time": 0.3938718643188476, "loss": 0.1998978704214096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.22561604234454, "step_time": 0.3605219993591309} +{"epoch": 0, "iter": 2180, "iter_tflops": 47.75130827307136, "iter_time": 0.43205294799804694, "loss": 0.16766934096813202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.630942042839834, "step_time": 0.3995877799987793} +{"epoch": 0, "iter": 2181, "iter_tflops": 33.144999177204014, "iter_time": 0.6224496612548829, "loss": 0.7155439257621765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.25613156755671, "step_time": 0.5851774597167969} +{"epoch": 0, "iter": 2182, "iter_tflops": 8.968937366280944, "iter_time": 2.300282928466797, "loss": 0.7270408868789673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.27208084997026, "step_time": 2.0084629211425784} +{"epoch": 0, "iter": 2183, "iter_tflops": 12.814718230415506, "iter_time": 1.6099529571533204, "loss": 0.6903733015060425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.993654039760003, "step_time": 1.3759883651733398} +{"epoch": 0, "iter": 2184, "iter_tflops": 30.73982691152786, "iter_time": 0.6711519088745117, "loss": 0.8031356930732727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83207969755743, "step_time": 0.5312899456024169} +{"epoch": 0, "iter": 2185, "iter_tflops": 12.759751298172809, "iter_time": 1.2452784118652342, "loss": 0.4461441934108734, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.812706993779743, "step_time": 1.1503496627807617} +{"epoch": 0, "iter": 2186, "iter_tflops": 16.59494262335894, "iter_time": 0.9574870605468749, "loss": 0.3516320288181305, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 21.37467754752455, "step_time": 0.7433769607543945} +{"epoch": 0, "iter": 2187, "iter_tflops": 27.04042921542539, "iter_time": 0.5876179962158203, "loss": 0.6207762956619263, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 28.65663417479792, "step_time": 0.5544769401550294} +{"epoch": 0, "iter": 2188, "iter_tflops": 27.812563917059713, "iter_time": 0.5713044967651366, "loss": 0.479583740234375, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.442172653379437, "step_time": 0.5396830940246582} +{"epoch": 0, "iter": 2189, "iter_tflops": 24.553026877436544, "iter_time": 0.8402668075561524, "loss": 0.16516734659671783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.825551043389357, "step_time": 0.7988636322021485} +{"epoch": 0, "iter": 2190, "iter_tflops": 12.983619803621192, "iter_time": 1.5890093688964846, "loss": 0.18999379873275757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.677963804149417, "step_time": 1.2370271186828614} +{"epoch": 0, "iter": 2191, "iter_tflops": 45.93082095720406, "iter_time": 0.44917754745483396, "loss": 0.19182616472244263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.684484620767996, "step_time": 0.41524217605590824} +{"epoch": 0, "iter": 2192, "iter_tflops": 54.863746072566975, "iter_time": 0.3760423774719238, "loss": 0.1292298436164856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.4052219642377, "step_time": 0.34729427528381346} +{"epoch": 0, "iter": 2193, "iter_tflops": 25.698841926097238, "iter_time": 0.8028024597167969, "loss": 0.19156990945339203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.280008705702052, "step_time": 0.756271514892578} +{"epoch": 0, "iter": 2194, "iter_tflops": 18.56753405168717, "iter_time": 1.1111380462646483, "loss": 0.1580333709716797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.427905728566643, "step_time": 0.9198849754333497} +{"epoch": 0, "iter": 2195, "iter_tflops": 40.05010187830465, "iter_time": 0.5151321105957031, "loss": 0.11955872178077698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.08770029847114, "step_time": 0.4679557647705078} +{"epoch": 0, "iter": 2196, "iter_tflops": 41.760712858036776, "iter_time": 0.4940311622619629, "loss": 0.13236869871616364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.818390228757856, "step_time": 0.45027975463867187} +{"epoch": 0, "iter": 2197, "iter_tflops": 19.045413733892694, "iter_time": 1.0832578277587892, "loss": 0.3923475742340088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.57993931634634, "step_time": 1.0024856338500978} +{"epoch": 0, "iter": 2198, "iter_tflops": 19.134796565188534, "iter_time": 1.078197692871094, "loss": 0.4662884771823883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.454460419120363, "step_time": 0.8796234550476074} +{"epoch": 0, "iter": 2199, "iter_tflops": 41.85106404708302, "iter_time": 0.4929646110534668, "loss": 0.3556196093559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.894164230123664, "step_time": 0.44953631591796867} +{"epoch": 0, "iter": 2200, "iter_tflops": 44.05377647835155, "iter_time": 0.46831611633300785, "loss": 0.2730894386768341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.071314597777466, "step_time": 0.42917681121826173} +{"epoch": 0, "iter": 2201, "iter_tflops": 21.833425427423528, "iter_time": 0.9449315948486329, "loss": 1.1014056205749512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.17458025050813, "step_time": 0.8902466964721679} +{"epoch": 0, "iter": 2202, "iter_tflops": 25.244213053001552, "iter_time": 0.8172603149414063, "loss": 0.7066391706466675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.12988136171423, "step_time": 0.7334226989746093} +{"epoch": 0, "iter": 2203, "iter_tflops": 43.40121263257471, "iter_time": 0.47535753631591793, "loss": 1.0649290084838867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62700509140974, "step_time": 0.4424709129333496} +{"epoch": 0, "iter": 2204, "iter_tflops": 44.28067305787151, "iter_time": 0.46591643905639646, "loss": 0.9614747166633606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.15550233639004, "step_time": 0.4375119018554688} +{"epoch": 0, "iter": 2205, "iter_tflops": 24.570408471168843, "iter_time": 0.8396723861694336, "loss": 0.6322875022888184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.86793809841956, "step_time": 0.7975546188354492} +{"epoch": 0, "iter": 2206, "iter_tflops": 12.785444726383501, "iter_time": 1.6136390991210936, "loss": 0.6401415467262268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.071226838741564, "step_time": 1.3689060440063476} +{"epoch": 0, "iter": 2207, "iter_tflops": 38.94593831111655, "iter_time": 0.529736717224121, "loss": 0.5922984480857849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56147816785439, "step_time": 0.48473630142211915} +{"epoch": 0, "iter": 2208, "iter_tflops": 39.51687332206129, "iter_time": 0.5220831451416016, "loss": 0.7954685688018799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30108340889882, "step_time": 0.4764567508697509} +{"epoch": 0, "iter": 2209, "iter_tflops": 21.585255266677567, "iter_time": 0.9557956695556641, "loss": 0.10131945461034775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.220865400326126, "step_time": 0.8884722061157228} +{"epoch": 0, "iter": 2210, "iter_tflops": 15.95510553748797, "iter_time": 1.2930715789794922, "loss": 0.11363019049167633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.608587615500646, "step_time": 1.0010920639038086} +{"epoch": 0, "iter": 2211, "iter_tflops": 52.895346352380514, "iter_time": 0.39003607940673823, "loss": 0.08597790449857712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.7798200545543, "step_time": 0.3570639972686767} +{"epoch": 0, "iter": 2212, "iter_tflops": 56.43891017770713, "iter_time": 0.3655473403930664, "loss": 0.13583867251873016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.37867465016837, "step_time": 0.33612803840637206} +{"epoch": 0, "iter": 2213, "iter_tflops": 27.776648101795228, "iter_time": 0.7427495727539063, "loss": 0.30567672848701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.36560717264985, "step_time": 0.7025597457885742} +{"epoch": 0, "iter": 2214, "iter_tflops": 13.007178841809651, "iter_time": 1.5861313018798828, "loss": 0.38135629892349243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.315465641411516, "step_time": 1.3470758247375487} +{"epoch": 0, "iter": 2215, "iter_tflops": 32.06165797586588, "iter_time": 0.6434818038940431, "loss": 0.4668036103248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.686804254955554, "step_time": 0.5781154670715332} +{"epoch": 0, "iter": 2216, "iter_tflops": 37.772501480452625, "iter_time": 0.5461934661865234, "loss": 0.34662219882011414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10116944487788, "step_time": 0.5019587955474853} +{"epoch": 0, "iter": 2217, "iter_tflops": 17.594477473328304, "iter_time": 1.172589157104492, "loss": 0.052800267934799194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.587337654385504, "step_time": 1.109954200744629} +{"epoch": 0, "iter": 2218, "iter_tflops": 13.680079362071366, "iter_time": 1.5081121215820312, "loss": 0.05767105892300606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.344969620765667, "step_time": 0.9665552997589111} +{"epoch": 0, "iter": 2219, "iter_tflops": 52.691324530071405, "iter_time": 0.39154630661010736, "loss": 0.022842885926365852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.501728893793285, "step_time": 0.35879083824157715} +{"epoch": 0, "iter": 2220, "iter_tflops": 58.72134688444904, "iter_time": 0.35133890151977537, "loss": 0.03169066086411476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.3651073679292, "step_time": 0.320532262802124} +{"epoch": 0, "iter": 2221, "iter_tflops": 30.008763382596996, "iter_time": 0.6875022888183594, "loss": 0.8436997532844543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.913793258004862, "step_time": 0.6464632186889648} +{"epoch": 0, "iter": 2222, "iter_tflops": 18.47617965473569, "iter_time": 1.1166320037841797, "loss": 0.7661040425300598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.430222491279107, "step_time": 0.9197899627685547} +{"epoch": 0, "iter": 2223, "iter_tflops": 38.0435283949306, "iter_time": 0.5423023147583008, "loss": 1.0514665842056274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34671953540016, "step_time": 0.4989777603149414} +{"epoch": 0, "iter": 2224, "iter_tflops": 39.47169432627247, "iter_time": 0.5226807174682617, "loss": 0.8143804669380188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08750886649492, "step_time": 0.4788184337615967} +{"epoch": 0, "iter": 2225, "iter_tflops": 8.42170319815845, "iter_time": 1.1491412353515624, "loss": 0.01685389317572117, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 9.133262858430033, "step_time": 1.0596132583618165} +{"epoch": 0, "iter": 2226, "iter_tflops": 8.642419640450257, "iter_time": 1.1197936248779297, "loss": 0.004642057232558727, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 11.335626200961153, "step_time": 0.8537443141937255} +{"epoch": 0, "iter": 2227, "iter_tflops": 27.742857714963296, "iter_time": 0.3488366813659668, "loss": 0.004621278494596481, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 30.560403115759907, "step_time": 0.3166753520965576} +{"epoch": 0, "iter": 2228, "iter_tflops": 24.517474555312717, "iter_time": 0.3947277030944824, "loss": 0.007337978575378656, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 26.974016571919737, "step_time": 0.3587795829772949} +{"epoch": 0, "iter": 2229, "iter_tflops": 48.45343374721042, "iter_time": 0.42579218673706054, "loss": 0.028559762984514236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94065513978167, "step_time": 0.3897022705078125} +{"epoch": 0, "iter": 2230, "iter_tflops": 34.761093001016505, "iter_time": 0.5935110702514648, "loss": 0.06390349566936493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.748048588927865, "step_time": 0.4715888862609864} +{"epoch": 0, "iter": 2231, "iter_tflops": 44.46606483080275, "iter_time": 0.4639738998413086, "loss": 0.0866851955652237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11693203933002, "step_time": 0.42004035377502436} +{"epoch": 0, "iter": 2232, "iter_tflops": 43.82788481254927, "iter_time": 0.47072984695434567, "loss": 0.07544240355491638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19242842667626, "step_time": 0.42809823417663573} +{"epoch": 0, "iter": 2233, "iter_tflops": 20.524309035948367, "iter_time": 0.8622102279663085, "loss": 0.005928363651037216, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 22.270035263388525, "step_time": 0.7946224136352539} +{"epoch": 0, "iter": 2234, "iter_tflops": 19.84474656720601, "iter_time": 0.8917357101440428, "loss": 0.01768329180777073, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 24.613311242253275, "step_time": 0.7189714946746826} +{"epoch": 0, "iter": 2235, "iter_tflops": 44.49500978481744, "iter_time": 0.39771356964111326, "loss": 0.002595334779471159, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 49.1956258886926, "step_time": 0.3597122478485108} +{"epoch": 0, "iter": 2236, "iter_tflops": 41.049406284758604, "iter_time": 0.4310968360900879, "loss": 0.005237988196313381, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 45.28087080160479, "step_time": 0.390811149597168} +{"epoch": 0, "iter": 2237, "iter_tflops": 22.56114044879564, "iter_time": 0.9144525985717774, "loss": 0.8518004417419434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.095164617357693, "step_time": 0.8562337646484376} +{"epoch": 0, "iter": 2238, "iter_tflops": 31.353348974941262, "iter_time": 0.658018814086914, "loss": 0.9404967427253723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.49068274383162, "step_time": 0.5502992210388183} +{"epoch": 0, "iter": 2239, "iter_tflops": 44.2367127671545, "iter_time": 0.46637944412231447, "loss": 0.6670825481414795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62116019746875, "step_time": 0.4332337436676025} +{"epoch": 0, "iter": 2240, "iter_tflops": 47.47911515257333, "iter_time": 0.43452986526489257, "loss": 0.7789754271507263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4411687204533, "step_time": 0.4010619125366211} +{"epoch": 0, "iter": 2241, "iter_tflops": 40.419702114584865, "iter_time": 0.5104217109680177, "loss": 1.0513534545898438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83130005848307, "step_time": 0.47069316864013677} +{"epoch": 0, "iter": 2242, "iter_tflops": 38.80364823809984, "iter_time": 0.5316792221069336, "loss": 0.6743921637535095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45534758486174, "step_time": 0.4976702575683594} +{"epoch": 0, "iter": 2243, "iter_tflops": 44.94006459924025, "iter_time": 0.459080192565918, "loss": 1.0045788288116455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63059477197107, "step_time": 0.4242410278320312} +{"epoch": 0, "iter": 2244, "iter_tflops": 45.30343699571998, "iter_time": 0.4553979759216309, "loss": 1.1148685216903687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68214401370328, "step_time": 0.42379180145263673} +{"epoch": 0, "iter": 2245, "iter_tflops": 28.073664924712805, "iter_time": 0.7348913497924805, "loss": 0.37971219420433044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.770104649721084, "step_time": 0.6930138053894044} +{"epoch": 0, "iter": 2246, "iter_tflops": 19.68558010292505, "iter_time": 1.0480307617187499, "loss": 0.2680431008338928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.19516811073757, "step_time": 0.9295308513641357} +{"epoch": 0, "iter": 2247, "iter_tflops": 43.455059231347704, "iter_time": 0.47476850509643553, "loss": 0.2926664352416992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.548956469776556, "step_time": 0.43389161491394046} +{"epoch": 0, "iter": 2248, "iter_tflops": 43.58948198136219, "iter_time": 0.47330439758300785, "loss": 0.1783066838979721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54194944813582, "step_time": 0.4339555644989013} +{"epoch": 0, "iter": 2249, "iter_tflops": 16.986737456844633, "iter_time": 1.2145412597656249, "loss": 0.45901304483413696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.727432171338815, "step_time": 1.163794807434082} +{"epoch": 0, "iter": 2250, "iter_tflops": 18.17790242194367, "iter_time": 1.134954574584961, "loss": 0.462467223405838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.14873166923268, "step_time": 0.9314796810150145} +{"epoch": 0, "iter": 2251, "iter_tflops": 52.2410730830703, "iter_time": 0.3949209365844727, "loss": 0.503945529460907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.69468844895105, "step_time": 0.36389817237854} +{"epoch": 0, "iter": 2252, "iter_tflops": 48.99096471501659, "iter_time": 0.42112037658691415, "loss": 0.47596120834350586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56017106984952, "step_time": 0.39252333259582517} +{"epoch": 0, "iter": 2253, "iter_tflops": 32.740242903352474, "iter_time": 0.6301447906494141, "loss": 0.6494265198707581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93621805878653, "step_time": 0.5905359725952147} +{"epoch": 0, "iter": 2254, "iter_tflops": 19.880571304693966, "iter_time": 1.0377515411376954, "loss": 0.6136089563369751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.403040488829483, "step_time": 0.9209059600830077} +{"epoch": 0, "iter": 2255, "iter_tflops": 48.01848518953892, "iter_time": 0.4296489868164063, "loss": 0.6664339303970337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9511839407795, "step_time": 0.39712460708618164} +{"epoch": 0, "iter": 2256, "iter_tflops": 51.13335060107604, "iter_time": 0.40347626876831055, "loss": 0.732890248298645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.15222941943516, "step_time": 0.37407542228698726} +{"epoch": 0, "iter": 2257, "iter_tflops": 35.648825369758995, "iter_time": 0.5787313690185547, "loss": 0.27768370509147644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.289931117307056, "step_time": 0.5388125} +{"epoch": 0, "iter": 2258, "iter_tflops": 13.997364946536647, "iter_time": 1.4739269561767578, "loss": 0.2305680513381958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.275501446363265, "step_time": 1.2676164588928223} +{"epoch": 0, "iter": 2259, "iter_tflops": 45.7341507307897, "iter_time": 0.45110914230346677, "loss": 0.25220218300819397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41165418030648, "step_time": 0.4092524604797363} +{"epoch": 0, "iter": 2260, "iter_tflops": 51.63764091625837, "iter_time": 0.3995359420776367, "loss": 0.21624693274497986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.03769081464494, "step_time": 0.36816459083557135} +{"epoch": 0, "iter": 2261, "iter_tflops": 27.450827388937796, "iter_time": 0.7515654525756837, "loss": 0.07541132718324661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.137791826903353, "step_time": 0.7080527458190917} +{"epoch": 0, "iter": 2262, "iter_tflops": 23.26707474894347, "iter_time": 0.886707664489746, "loss": 0.15235862135887146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.02820363405836, "step_time": 0.7926437721252442} +{"epoch": 0, "iter": 2263, "iter_tflops": 49.04894549117491, "iter_time": 0.42062257003784187, "loss": 0.11715979874134064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.34074711535823, "step_time": 0.38677923774719236} +{"epoch": 0, "iter": 2264, "iter_tflops": 52.89268893049436, "iter_time": 0.3900556755065918, "loss": 0.1566728800535202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.66766952859474, "step_time": 0.35775840568542483} +{"epoch": 0, "iter": 2265, "iter_tflops": 29.745171313900162, "iter_time": 0.693594711303711, "loss": 0.4747159481048584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.52824224690317, "step_time": 0.6543686561584472} +{"epoch": 0, "iter": 2266, "iter_tflops": 40.04985749606391, "iter_time": 0.5151352539062499, "loss": 0.42801129817962646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.79652506903202, "step_time": 0.40615167045593253} +{"epoch": 0, "iter": 2267, "iter_tflops": 48.953943560657166, "iter_time": 0.42143884658813485, "loss": 0.5524272322654724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.91741864903223, "step_time": 0.3898733921051026} +{"epoch": 0, "iter": 2268, "iter_tflops": 47.86190739484926, "iter_time": 0.43105456161499023, "loss": 0.5153247117996216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71964990302935, "step_time": 0.3989024200439453} +{"epoch": 0, "iter": 2269, "iter_tflops": 36.72485864185502, "iter_time": 0.5617746200561524, "loss": 1.1614552736282349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33264231645189, "step_time": 0.5245285415649414} +{"epoch": 0, "iter": 2270, "iter_tflops": 17.920571282249355, "iter_time": 1.1512519989013672, "loss": 0.8836381435394287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.854771475599133, "step_time": 0.9027039947509765} +{"epoch": 0, "iter": 2271, "iter_tflops": 39.483057645554865, "iter_time": 0.5225302886962891, "loss": 0.6965808272361755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20102590793668, "step_time": 0.4888765869140625} +{"epoch": 0, "iter": 2272, "iter_tflops": 42.56654210746602, "iter_time": 0.48467863464355465, "loss": 0.9564002752304077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.474706356607385, "step_time": 0.4536828308105469} +{"epoch": 0, "iter": 2273, "iter_tflops": 34.3081732568867, "iter_time": 0.6013463134765625, "loss": 0.07862380146980286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.795442071008374, "step_time": 0.5606969871520996} +{"epoch": 0, "iter": 2274, "iter_tflops": 10.178498819305442, "iter_time": 2.0269289093017577, "loss": 0.07733962684869766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.431959107698367, "step_time": 1.6595207023620606} +{"epoch": 0, "iter": 2275, "iter_tflops": 10.422009050111111, "iter_time": 1.9795697174072264, "loss": 0.09129826724529266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.625802923181809, "step_time": 1.6340420989990234} +{"epoch": 0, "iter": 2276, "iter_tflops": 36.96214943392449, "iter_time": 0.5581681213378906, "loss": 0.14591841399669647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14465000343383, "step_time": 0.5014283390045167} +{"epoch": 0, "iter": 2277, "iter_tflops": 17.60639625556482, "iter_time": 0.8606485595703125, "loss": 0.5096384286880493, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 18.659846995581898, "step_time": 0.8120602264404297} +{"epoch": 0, "iter": 2278, "iter_tflops": 6.652380884939449, "iter_time": 2.2778190002441407, "loss": 0.4292275607585907, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 8.33643880543006, "step_time": 1.817672981262207} +{"epoch": 0, "iter": 2279, "iter_tflops": 8.448206520426487, "iter_time": 1.7936256103515624, "loss": 0.48794275522232056, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.73565041104302, "step_time": 1.5564362869262696} +{"epoch": 0, "iter": 2280, "iter_tflops": 23.29662686566517, "iter_time": 0.6504340591430663, "loss": 0.5194759964942932, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.242637870627494, "step_time": 0.6002906532287597} +{"epoch": 0, "iter": 2281, "iter_tflops": 14.136866131808047, "iter_time": 1.1616667327880859, "loss": 0.3685090243816376, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.060699216529086, "step_time": 1.0904093399047854} +{"epoch": 0, "iter": 2282, "iter_tflops": 17.11652648217321, "iter_time": 0.959442741394043, "loss": 0.511701226234436, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 20.523245619862085, "step_time": 0.8001817741394043} +{"epoch": 0, "iter": 2283, "iter_tflops": 25.879399549190993, "iter_time": 0.6345714111328125, "loss": 0.5337311029434204, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.842899629831887, "step_time": 0.5898210067749023} +{"epoch": 0, "iter": 2284, "iter_tflops": 25.35412444426381, "iter_time": 0.6477181701660155, "loss": 0.41661226749420166, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.221149385828962, "step_time": 0.6032929344177247} +{"epoch": 0, "iter": 2285, "iter_tflops": 26.582033239310434, "iter_time": 0.776129249572754, "loss": 0.11570123583078384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.458500627924547, "step_time": 0.7249536361694335} +{"epoch": 0, "iter": 2286, "iter_tflops": 20.282652239198633, "iter_time": 1.0171792755126952, "loss": 0.12491759657859802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.48804854528322, "step_time": 0.842496431350708} +{"epoch": 0, "iter": 2287, "iter_tflops": 38.414553496252175, "iter_time": 0.5370645141601563, "loss": 0.08179380744695663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.168712185625836, "step_time": 0.4892512111663819} +{"epoch": 0, "iter": 2288, "iter_tflops": 43.046097883511365, "iter_time": 0.47927906417846683, "loss": 0.12978199124336243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.489236320980226, "step_time": 0.434437255859375} +{"epoch": 0, "iter": 2289, "iter_tflops": 24.42035946918777, "iter_time": 0.8448316879272461, "loss": 0.7711710929870605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.874711989602638, "step_time": 0.7676768226623535} +{"epoch": 0, "iter": 2290, "iter_tflops": 46.321975823327726, "iter_time": 0.4453845748901367, "loss": 0.9583508968353271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.18516363748921, "step_time": 0.41109945678710935} +{"epoch": 0, "iter": 2291, "iter_tflops": 46.3508054113103, "iter_time": 0.44510755157470705, "loss": 0.7133151292800903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.740197922026425, "step_time": 0.41477706909179696} +{"epoch": 0, "iter": 2292, "iter_tflops": 52.225137089923386, "iter_time": 0.3950414428710938, "loss": 0.8636928796768188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.48471313795073, "step_time": 0.3652509212493897} +{"epoch": 0, "iter": 2293, "iter_tflops": 28.89466935599555, "iter_time": 0.7140103683471679, "loss": 0.504987895488739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.52618407084336, "step_time": 0.6758490829467774} +{"epoch": 0, "iter": 2294, "iter_tflops": 11.348461614770152, "iter_time": 1.817963897705078, "loss": 0.5176270008087158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.060437936825522, "step_time": 1.4673151435852052} +{"epoch": 0, "iter": 2295, "iter_tflops": 14.320723418892388, "iter_time": 1.440646041870117, "loss": 0.43155378103256226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.676609788866006, "step_time": 1.1671408576965334} +{"epoch": 0, "iter": 2296, "iter_tflops": 19.02604903443891, "iter_time": 1.0843603668212891, "loss": 0.4515368938446045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.129281803593624, "step_time": 0.8550231075286865} +{"epoch": 0, "iter": 2297, "iter_tflops": 18.358199971769565, "iter_time": 0.8365393142700195, "loss": 0.41647425293922424, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.328055257127357, "step_time": 0.7945629196166992} +{"epoch": 0, "iter": 2298, "iter_tflops": 9.158558338030353, "iter_time": 1.6768311614990234, "loss": 0.4652513265609741, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 10.424149657442381, "step_time": 1.4732478446960449} +{"epoch": 0, "iter": 2299, "iter_tflops": 25.01615573600579, "iter_time": 0.6138975219726562, "loss": 0.5895025730133057, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.832735305888814, "step_time": 0.572336582183838} +{"epoch": 0, "iter": 2300, "iter_tflops": 24.06972631743081, "iter_time": 0.6380361709594727, "loss": 0.5100792050361633, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.747156715768867, "step_time": 0.5964680366516113} +{"epoch": 0, "iter": 2301, "iter_tflops": 25.152402743292644, "iter_time": 0.8202434463500976, "loss": 0.9672709107398987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.966548688573184, "step_time": 0.7650624389648437} +{"epoch": 0, "iter": 2302, "iter_tflops": 12.394948041224954, "iter_time": 1.6644759979248047, "loss": 0.8984353542327881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.870420461439219, "step_time": 1.3873914031982422} +{"epoch": 0, "iter": 2303, "iter_tflops": 14.701395954598073, "iter_time": 1.403342483520508, "loss": 0.7978973388671875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.087958688907097, "step_time": 1.2073468742370608} +{"epoch": 0, "iter": 2304, "iter_tflops": 37.767080987012044, "iter_time": 0.5462718582153321, "loss": 1.0265973806381226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45101097931438, "step_time": 0.4748127384185791} +{"epoch": 0, "iter": 2305, "iter_tflops": 21.18992478973155, "iter_time": 0.767264533996582, "loss": 0.4977502226829529, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 22.39927812652623, "step_time": 0.7258393630981445} +{"epoch": 0, "iter": 2306, "iter_tflops": 13.863323664202795, "iter_time": 1.1727546844482422, "loss": 0.524150013923645, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 17.812818780463797, "step_time": 0.9127290840148925} +{"epoch": 0, "iter": 2307, "iter_tflops": 28.80296331879806, "iter_time": 0.5644654541015626, "loss": 0.4382627606391907, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 30.664726746146396, "step_time": 0.5301947708129882} +{"epoch": 0, "iter": 2308, "iter_tflops": 29.102704717474296, "iter_time": 0.5586517791748047, "loss": 0.3808940649032593, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 30.887886658950407, "step_time": 0.5263642005920409} +{"epoch": 0, "iter": 2309, "iter_tflops": 29.987021599774664, "iter_time": 0.6880007553100586, "loss": 0.5114539265632629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.640766212499667, "step_time": 0.6520415267944335} +{"epoch": 0, "iter": 2310, "iter_tflops": 14.598716224939945, "iter_time": 1.413212860107422, "loss": 0.43227726221084595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.493417597158388, "step_time": 1.1793632316589355} +{"epoch": 0, "iter": 2311, "iter_tflops": 48.75722993005804, "iter_time": 0.4231391639709473, "loss": 0.5343942046165466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.73802813283086, "step_time": 0.3911995620727539} +{"epoch": 0, "iter": 2312, "iter_tflops": 50.92170673747473, "iter_time": 0.40515322113037117, "loss": 0.46221908926963806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.767502477488534, "step_time": 0.37670320129394524} +{"epoch": 0, "iter": 2313, "iter_tflops": 34.628266419030986, "iter_time": 0.5957876510620117, "loss": 0.5239165425300598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.20090599748029, "step_time": 0.5545857810974121} +{"epoch": 0, "iter": 2314, "iter_tflops": 11.435283983693276, "iter_time": 1.8041610107421875, "loss": 0.3688257038593292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.070051808910026, "step_time": 1.3690127792358397} +{"epoch": 0, "iter": 2315, "iter_tflops": 40.362894994308036, "iter_time": 0.5111400833129883, "loss": 0.5668297410011292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21438453166809, "step_time": 0.46661496543884273} +{"epoch": 0, "iter": 2316, "iter_tflops": 44.863235568186795, "iter_time": 0.4598663749694824, "loss": 0.4928445518016815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.942263044250936, "step_time": 0.42153942680358886} +{"epoch": 0, "iter": 2317, "iter_tflops": 17.32359594608078, "iter_time": 1.1909244232177734, "loss": 0.1459241807460785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.40312089741143, "step_time": 1.1210649337768555} +{"epoch": 0, "iter": 2318, "iter_tflops": 19.17763393147016, "iter_time": 1.075789306640625, "loss": 0.18225978314876556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.513662660692752, "step_time": 0.8774087562561035} +{"epoch": 0, "iter": 2319, "iter_tflops": 48.3785095763897, "iter_time": 0.4264516143798828, "loss": 0.20084187388420105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.94596442601027, "step_time": 0.37547968673706056} +{"epoch": 0, "iter": 2320, "iter_tflops": 49.888944825254995, "iter_time": 0.4135403861999512, "loss": 0.11361004412174225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.77411217007507, "step_time": 0.3836621875762939} +{"epoch": 0, "iter": 2321, "iter_tflops": 23.683755848798597, "iter_time": 0.8711073379516602, "loss": 0.8566893935203552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.84188834970207, "step_time": 0.8304961853027344} +{"epoch": 0, "iter": 2322, "iter_tflops": 10.166282074529468, "iter_time": 2.0293646545410153, "loss": 0.8652392625808716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.425383313160324, "step_time": 1.8057244071960448} +{"epoch": 0, "iter": 2323, "iter_tflops": 10.575876780688326, "iter_time": 1.9507690887451172, "loss": 0.9764085412025452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.579858636931515, "step_time": 1.6400099639892578} +{"epoch": 0, "iter": 2324, "iter_tflops": 42.153230057925654, "iter_time": 0.48943090438842773, "loss": 0.954191267490387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.663601174573095, "step_time": 0.4518060989379883} +{"epoch": 0, "iter": 2325, "iter_tflops": 25.553055626485463, "iter_time": 0.7571214218139648, "loss": 0.3187045753002167, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 26.93875968124465, "step_time": 0.7181758193969726} +{"epoch": 0, "iter": 2326, "iter_tflops": 11.49613412193417, "iter_time": 1.6828931884765626, "loss": 0.4712168872356415, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 16.176732010654383, "step_time": 1.1959625587463378} +{"epoch": 0, "iter": 2327, "iter_tflops": 29.94115598001376, "iter_time": 0.6461596145629883, "loss": 0.5477136373519897, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 32.224180282024946, "step_time": 0.600380386352539} +{"epoch": 0, "iter": 2328, "iter_tflops": 26.602596362769848, "iter_time": 0.7272510375976562, "loss": 0.7689393162727356, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 28.578712726946776, "step_time": 0.6769642143249511} +{"epoch": 0, "iter": 2329, "iter_tflops": 18.074540783961503, "iter_time": 1.1414449615478515, "loss": 0.37885043025016785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.387447587771256, "step_time": 1.0641469650268554} +{"epoch": 0, "iter": 2330, "iter_tflops": 13.492956259678198, "iter_time": 1.5290269317626952, "loss": 0.3513740003108978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.99998294325078, "step_time": 1.2135949535369872} +{"epoch": 0, "iter": 2331, "iter_tflops": 37.55191971828486, "iter_time": 0.5494018325805664, "loss": 0.4240226447582245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.0924692783298, "step_time": 0.5020650711059571} +{"epoch": 0, "iter": 2332, "iter_tflops": 41.87524343394716, "iter_time": 0.4926799659729004, "loss": 0.49321913719177246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.637846277127764, "step_time": 0.45206106758117676} +{"epoch": 0, "iter": 2333, "iter_tflops": 33.4216700678998, "iter_time": 0.6172969055175781, "loss": 0.2497887909412384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60207704410412, "step_time": 0.5636590919494628} +{"epoch": 0, "iter": 2334, "iter_tflops": 10.211790639737693, "iter_time": 2.0203208465576172, "loss": 0.31063559651374817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.008352799314041, "step_time": 1.5859881591796874} +{"epoch": 0, "iter": 2335, "iter_tflops": 10.905928835156557, "iter_time": 1.8917319030761717, "loss": 0.3036777675151825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.25574069883706, "step_time": 1.5563893394470212} +{"epoch": 0, "iter": 2336, "iter_tflops": 41.54344551550951, "iter_time": 0.4966148872375488, "loss": 0.32968080043792725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.94756985937323, "step_time": 0.45900353622436524} +{"epoch": 0, "iter": 2337, "iter_tflops": 25.333189646891142, "iter_time": 0.6256016311645508, "loss": 0.4037192463874817, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 27.116815615411035, "step_time": 0.5844522819519042} +{"epoch": 0, "iter": 2338, "iter_tflops": 26.605189140916334, "iter_time": 0.5956914901733399, "loss": 0.3392438292503357, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 28.21060134641774, "step_time": 0.5617918090820313} +{"epoch": 0, "iter": 2339, "iter_tflops": 28.079657068335848, "iter_time": 0.56441162109375, "loss": 0.4722346365451813, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.024390480829506, "step_time": 0.5278536720275879} +{"epoch": 0, "iter": 2340, "iter_tflops": 29.643555350069438, "iter_time": 0.5346350860595703, "loss": 0.3311578929424286, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.397359405240138, "step_time": 0.5047712631225586} +{"epoch": 0, "iter": 2341, "iter_tflops": 26.726937334115743, "iter_time": 0.7719213485717774, "loss": 0.9937055706977844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.16711288777383, "step_time": 0.7324532546997071} +{"epoch": 0, "iter": 2342, "iter_tflops": 19.759602940062187, "iter_time": 1.0441046600341797, "loss": 0.8361979722976685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.455304284347786, "step_time": 0.8795918083190917} +{"epoch": 0, "iter": 2343, "iter_tflops": 39.10223597239132, "iter_time": 0.5276192779541017, "loss": 0.8186984062194824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.27455805726031, "step_time": 0.48802623748779295} +{"epoch": 0, "iter": 2344, "iter_tflops": 33.695973966921635, "iter_time": 0.6122717666625976, "loss": 0.9187391996383667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.64844670237845, "step_time": 0.5629459190368652} +{"epoch": 0, "iter": 2345, "iter_tflops": 25.086447738608427, "iter_time": 0.8223999557495117, "loss": 0.005332576110959053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.920879852161228, "step_time": 0.7663602981567383} +{"epoch": 0, "iter": 2346, "iter_tflops": 8.999799448581618, "iter_time": 2.292394805908203, "loss": 0.0068267881870269775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.089194470174345, "step_time": 2.044870239257812} +{"epoch": 0, "iter": 2347, "iter_tflops": 10.695327917196423, "iter_time": 1.9289818572998048, "loss": 0.010748051106929779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.083592593044111, "step_time": 1.3677837944030762} +{"epoch": 0, "iter": 2348, "iter_tflops": 41.97125960917575, "iter_time": 0.4915528793334961, "loss": 0.010072629898786545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65440781138039, "step_time": 0.4422110252380371} +{"epoch": 0, "iter": 2349, "iter_tflops": 19.553713723242886, "iter_time": 0.8797608795166015, "loss": 0.36662983894348145, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 20.7498810544118, "step_time": 0.8290453491210938} +{"epoch": 0, "iter": 2350, "iter_tflops": 13.593877877597189, "iter_time": 1.2654661560058593, "loss": 0.5225207209587097, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 15.864075533806053, "step_time": 1.0843740844726562} +{"epoch": 0, "iter": 2351, "iter_tflops": 25.448604718572604, "iter_time": 0.6759738922119141, "loss": 0.4920768439769745, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 27.35663550629151, "step_time": 0.628827049255371} +{"epoch": 0, "iter": 2352, "iter_tflops": 25.975876794508363, "iter_time": 0.6622526168823243, "loss": 0.46969154477119446, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 27.931597260098243, "step_time": 0.6158828735351562} +{"epoch": 0, "iter": 2353, "iter_tflops": 27.24842120396468, "iter_time": 0.7571482162475586, "loss": 0.8571106791496277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.180449956753698, "step_time": 0.6835913162231446} +{"epoch": 0, "iter": 2354, "iter_tflops": 37.52885661019033, "iter_time": 0.5497394638061524, "loss": 0.8274162411689758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.77584765046258, "step_time": 0.5059635715484619} +{"epoch": 0, "iter": 2355, "iter_tflops": 37.04379745357462, "iter_time": 0.5569378662109375, "loss": 0.975197434425354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51439684930048, "step_time": 0.5092286968231201} +{"epoch": 0, "iter": 2356, "iter_tflops": 41.58448486204201, "iter_time": 0.4961247825622558, "loss": 1.0793418884277344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.11251987162196, "step_time": 0.4573252296447754} +{"epoch": 0, "iter": 2357, "iter_tflops": 18.263614759987135, "iter_time": 1.1296281585693357, "loss": 0.953507661819458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38298246599311, "step_time": 1.0643921051025391} +{"epoch": 0, "iter": 2358, "iter_tflops": 14.223798421309915, "iter_time": 1.4504630126953124, "loss": 0.9226616024971008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.871563841896926, "step_time": 1.222832317352295} +{"epoch": 0, "iter": 2359, "iter_tflops": 46.73360537691799, "iter_time": 0.44146162796020505, "loss": 1.0577425956726074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40874899429069, "step_time": 0.4092760467529296} +{"epoch": 0, "iter": 2360, "iter_tflops": 43.875604361514505, "iter_time": 0.4702178764343261, "loss": 1.016501545906067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.91461863618437, "step_time": 0.43975831222534184} +{"epoch": 0, "iter": 2361, "iter_tflops": 27.894665449650326, "iter_time": 0.7396071319580078, "loss": 0.7335848808288574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.513644430673136, "step_time": 0.6990357818603516} +{"epoch": 0, "iter": 2362, "iter_tflops": 17.08330332177087, "iter_time": 1.2076758880615235, "loss": 0.7602480053901672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.416293069572383, "step_time": 1.0105210304260255} +{"epoch": 0, "iter": 2363, "iter_tflops": 48.69972369051819, "iter_time": 0.42363882064819336, "loss": 0.9441490769386292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43734709420245, "step_time": 0.3860800476074219} +{"epoch": 0, "iter": 2364, "iter_tflops": 48.025896099471055, "iter_time": 0.4295826873779297, "loss": 0.7562784552574158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.718388718678014, "step_time": 0.3989121475219726} +{"epoch": 0, "iter": 2365, "iter_tflops": 35.56099301298523, "iter_time": 0.5801607818603516, "loss": 0.2783326506614685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11187150380437, "step_time": 0.5413298454284667} +{"epoch": 0, "iter": 2366, "iter_tflops": 13.047413576652525, "iter_time": 1.5812400970458982, "loss": 0.42308521270751953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.121130022760614, "step_time": 1.27975479888916} +{"epoch": 0, "iter": 2367, "iter_tflops": 11.441241122088687, "iter_time": 1.803221633911133, "loss": 0.325165718793869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.099552635167452, "step_time": 1.5749464187622069} +{"epoch": 0, "iter": 2368, "iter_tflops": 15.658268617223447, "iter_time": 1.3175845947265625, "loss": 0.46294355392456055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.567833989361308, "step_time": 1.0543371086120608} +{"epoch": 0, "iter": 2369, "iter_tflops": 23.268658740743778, "iter_time": 0.7675981369018554, "loss": 0.5470765829086304, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 25.40460696882942, "step_time": 0.7030606346130371} +{"epoch": 0, "iter": 2370, "iter_tflops": 26.51724732836024, "iter_time": 0.6735608291625975, "loss": 0.41185101866722107, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 28.528156986219578, "step_time": 0.6260824737548829} +{"epoch": 0, "iter": 2371, "iter_tflops": 28.049882255105295, "iter_time": 0.6367577209472657, "loss": 0.4756700396537781, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 30.142641920449346, "step_time": 0.5925485610961914} +{"epoch": 0, "iter": 2372, "iter_tflops": 28.74216774411952, "iter_time": 0.6214207382202148, "loss": 0.5747746229171753, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 30.866501312824674, "step_time": 0.578652530670166} +{"epoch": 0, "iter": 2373, "iter_tflops": 19.96834957579011, "iter_time": 1.0331897201538087, "loss": 0.10304784774780273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.404029950471855, "step_time": 0.9638882751464843} +{"epoch": 0, "iter": 2374, "iter_tflops": 32.7165316946345, "iter_time": 0.6306014862060547, "loss": 0.07781952619552612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.953260801326394, "step_time": 0.5583023815155029} +{"epoch": 0, "iter": 2375, "iter_tflops": 51.49366351024835, "iter_time": 0.4006530532836914, "loss": 0.10678059607744217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.75808790718088, "step_time": 0.37001077842712404} +{"epoch": 0, "iter": 2376, "iter_tflops": 58.658161339665874, "iter_time": 0.35171735763549805, "loss": 0.04380778968334198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.86535857654632, "step_time": 0.3230404396057129} +{"epoch": 0, "iter": 2377, "iter_tflops": 37.07022331335098, "iter_time": 0.5565408477783204, "loss": 0.8891351819038391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12763978631825, "step_time": 0.5141367301940918} +{"epoch": 0, "iter": 2378, "iter_tflops": 45.113266835680164, "iter_time": 0.4573176574707031, "loss": 0.9890561699867249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.155657986361035, "step_time": 0.41970943641662595} +{"epoch": 0, "iter": 2379, "iter_tflops": 42.238736409905684, "iter_time": 0.4884401206970215, "loss": 0.9775028824806213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38224131736236, "step_time": 0.4546071968078614} +{"epoch": 0, "iter": 2380, "iter_tflops": 45.868960351487516, "iter_time": 0.44978332519531256, "loss": 1.0680114030838013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04612941396009, "step_time": 0.4206467208862305} +{"epoch": 0, "iter": 2381, "iter_tflops": 22.62285546461318, "iter_time": 0.9119579772949219, "loss": 0.986199676990509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.718014692040843, "step_time": 0.8698490905761718} +{"epoch": 0, "iter": 2382, "iter_tflops": 15.79228625353392, "iter_time": 1.3064032135009764, "loss": 0.9961348176002502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.385832912475614, "step_time": 1.012030933380127} +{"epoch": 0, "iter": 2383, "iter_tflops": 47.60981944835897, "iter_time": 0.4333369407653809, "loss": 1.1138455867767334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.492606240131, "step_time": 0.40066127967834475} +{"epoch": 0, "iter": 2384, "iter_tflops": 44.4073208128995, "iter_time": 0.46458766555786135, "loss": 0.8462386131286621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.729270902832056, "step_time": 0.4322524337768555} +{"epoch": 0, "iter": 2385, "iter_tflops": 45.59592387865936, "iter_time": 0.4524767074584961, "loss": 0.021292200312018394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75073888342584, "step_time": 0.41468918800354004} +{"epoch": 0, "iter": 2386, "iter_tflops": 19.436050439352556, "iter_time": 1.0614859008789062, "loss": 0.035737697035074234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.182027446926266, "step_time": 0.9300815067291259} +{"epoch": 0, "iter": 2387, "iter_tflops": 52.693002220928335, "iter_time": 0.39153384017944337, "loss": 0.01678619161248207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.63025824798589, "step_time": 0.3579906482696534} +{"epoch": 0, "iter": 2388, "iter_tflops": 55.321665427319864, "iter_time": 0.37292972564697263, "loss": 0.005138718523085117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.79028917195818, "step_time": 0.33938140106201176} +{"epoch": 0, "iter": 2389, "iter_tflops": 38.7730518693715, "iter_time": 0.532098777770996, "loss": 0.29190170764923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96677462510859, "step_time": 0.491605411529541} +{"epoch": 0, "iter": 2390, "iter_tflops": 13.89875521932956, "iter_time": 1.484384262084961, "loss": 0.2271670699119568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.491496716110547, "step_time": 1.3317688980102538} +{"epoch": 0, "iter": 2391, "iter_tflops": 17.508480486982727, "iter_time": 1.1783486022949219, "loss": 0.3209508955478668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.59704086838515, "step_time": 1.0016532783508298} +{"epoch": 0, "iter": 2392, "iter_tflops": 14.528552542508649, "iter_time": 1.4200377807617188, "loss": 0.41715604066848755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.93828164227396, "step_time": 1.2180157318115234} +{"epoch": 0, "iter": 2393, "iter_tflops": 5.697495553340144, "iter_time": 2.709815826416016, "loss": 0.3787861168384552, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 7.266798598960451, "step_time": 2.124616970062256} +{"epoch": 0, "iter": 2394, "iter_tflops": 22.03013314901891, "iter_time": 0.7008202590942383, "loss": 0.24911798536777496, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.600128165077145, "step_time": 0.6541982955932617} +{"epoch": 0, "iter": 2395, "iter_tflops": 24.153546597950022, "iter_time": 0.639208969116211, "loss": 0.5145401954650879, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.80696471685699, "step_time": 0.5982556953430176} +{"epoch": 0, "iter": 2396, "iter_tflops": 23.179224256888492, "iter_time": 0.6660776672363281, "loss": 0.4448966383934021, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.854345663115755, "step_time": 0.6211856803894042} +{"epoch": 0, "iter": 2397, "iter_tflops": 19.318249603553312, "iter_time": 1.0679587402343749, "loss": 1.0599879026412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.838093206292505, "step_time": 0.9900662841796875} +{"epoch": 0, "iter": 2398, "iter_tflops": 26.189956798710853, "iter_time": 0.7877482833862304, "loss": 1.1334295272827148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.70799497143006, "step_time": 0.5620326995849609} +{"epoch": 0, "iter": 2399, "iter_tflops": 45.397973129025715, "iter_time": 0.4544496612548828, "loss": 0.7786117196083069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83936468793246, "step_time": 0.42242755699157714} +{"epoch": 0, "iter": 2400, "iter_tflops": 45.27122188013235, "iter_time": 0.45572203826904295, "loss": 1.0283228158950806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74059483272401, "step_time": 0.42328358078002926} +{"epoch": 0, "iter": 2401, "iter_tflops": 42.77006073042193, "iter_time": 0.4823723220825195, "loss": 0.40737849473953247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.715498489259005, "step_time": 0.4416327381134033} +{"epoch": 0, "iter": 2402, "iter_tflops": 48.388506134535675, "iter_time": 0.4263635139465332, "loss": 0.21884317696094513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.54195610668815, "step_time": 0.3926594104766846} +{"epoch": 0, "iter": 2403, "iter_tflops": 52.12152543392446, "iter_time": 0.39582674026489256, "loss": 0.2983846366405487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.836544580846734, "step_time": 0.362989933013916} +{"epoch": 0, "iter": 2404, "iter_tflops": 49.72058749703747, "iter_time": 0.4149406623840332, "loss": 0.3674735128879547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84446939263111, "step_time": 0.3831608657836914} +{"epoch": 0, "iter": 2405, "iter_tflops": 34.77603480608252, "iter_time": 0.5670332489013672, "loss": 0.1219387874007225, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 37.51576060257031, "step_time": 0.5256235694885254} +{"epoch": 0, "iter": 2406, "iter_tflops": 17.759482197880626, "iter_time": 1.110345886230469, "loss": 0.1252264678478241, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 19.44873375187645, "step_time": 1.0139049797058104} +{"epoch": 0, "iter": 2407, "iter_tflops": 39.628956930639994, "iter_time": 0.49759492874145506, "loss": 0.08465587347745895, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 43.97225352713485, "step_time": 0.44844569969177245} +{"epoch": 0, "iter": 2408, "iter_tflops": 40.245412939761366, "iter_time": 0.4899730567932129, "loss": 0.12518125772476196, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 44.148752057123964, "step_time": 0.4466528968811036} +{"epoch": 0, "iter": 2409, "iter_tflops": 22.006454613729108, "iter_time": 0.9375019226074218, "loss": 1.0157026052474976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.813863663271185, "step_time": 0.866348014831543} +{"epoch": 0, "iter": 2410, "iter_tflops": 16.339862936423945, "iter_time": 1.2626234130859375, "loss": 1.0407373905181885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.777270002897513, "step_time": 0.9929645957946777} +{"epoch": 0, "iter": 2411, "iter_tflops": 45.27609684282796, "iter_time": 0.4556729698181153, "loss": 0.7544649839401245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87751000440726, "step_time": 0.4220978832244873} +{"epoch": 0, "iter": 2412, "iter_tflops": 44.68689296255782, "iter_time": 0.4616810913085937, "loss": 0.7378599643707275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.043405888046266, "step_time": 0.42942612266540525} +{"epoch": 0, "iter": 2413, "iter_tflops": 25.992207894948542, "iter_time": 0.7937414779663086, "loss": 0.9153429865837097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.29449962980066, "step_time": 0.755870002746582} +{"epoch": 0, "iter": 2414, "iter_tflops": 15.077133739239011, "iter_time": 1.3683697357177733, "loss": 0.8231949806213379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.9735258595832, "step_time": 0.9836731147766112} +{"epoch": 0, "iter": 2415, "iter_tflops": 45.55180377356347, "iter_time": 0.4529149627685547, "loss": 0.9009193778038025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.99132861906903, "step_time": 0.4211172485351563} +{"epoch": 0, "iter": 2416, "iter_tflops": 43.28174579868142, "iter_time": 0.47666962432861326, "loss": 0.8195172548294067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.2389519681333, "step_time": 0.44618428039550784} +{"epoch": 0, "iter": 2417, "iter_tflops": 43.18997597381684, "iter_time": 0.47768244934082027, "loss": 0.7758870124816895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03755893155159, "step_time": 0.43860893249511723} +{"epoch": 0, "iter": 2418, "iter_tflops": 45.962083974370536, "iter_time": 0.4488720207214355, "loss": 0.8827127814292908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6445090983686, "step_time": 0.41557654380798337} +{"epoch": 0, "iter": 2419, "iter_tflops": 46.51585414316035, "iter_time": 0.4435282096862794, "loss": 0.9599795341491699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93001762346972, "step_time": 0.41320020484924314} +{"epoch": 0, "iter": 2420, "iter_tflops": 49.150468173660435, "iter_time": 0.4197537536621094, "loss": 0.9696213006973267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23228654552785, "step_time": 0.38756729888916014} +{"epoch": 0, "iter": 2421, "iter_tflops": 25.98759525109559, "iter_time": 0.7938823623657228, "loss": 0.42764613032341003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.41437745948593, "step_time": 0.7525647277832032} +{"epoch": 0, "iter": 2422, "iter_tflops": 14.677994811338017, "iter_time": 1.405579833984375, "loss": 0.4164532423019409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.389445903270627, "step_time": 1.2588036003112792} +{"epoch": 0, "iter": 2423, "iter_tflops": 34.79888107266089, "iter_time": 0.5928665771484375, "loss": 0.5443693399429321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.008027200422596, "step_time": 0.5428088493347168} +{"epoch": 0, "iter": 2424, "iter_tflops": 42.04535946931788, "iter_time": 0.49068657684326167, "loss": 0.4900353252887726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06776462776889, "step_time": 0.4478422966003418} +{"epoch": 0, "iter": 2425, "iter_tflops": 32.60813654967189, "iter_time": 0.6326977157592774, "loss": 0.654100239276886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.795748638068844, "step_time": 0.576355972290039} +{"epoch": 0, "iter": 2426, "iter_tflops": 47.5712989704452, "iter_time": 0.4336878318786621, "loss": 0.6603614091873169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.84215409457035, "step_time": 0.3979598045349121} +{"epoch": 0, "iter": 2427, "iter_tflops": 47.826481850121056, "iter_time": 0.43137384796142575, "loss": 0.8652055263519287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.53714353679852, "step_time": 0.4003150367736817} +{"epoch": 0, "iter": 2428, "iter_tflops": 48.008542472534444, "iter_time": 0.4297379684448242, "loss": 0.7674250602722168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.41265839524568, "step_time": 0.40128431701660155} +{"epoch": 0, "iter": 2429, "iter_tflops": 43.36911982262551, "iter_time": 0.4757092971801758, "loss": 0.22595156729221344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97040291478011, "step_time": 0.4392360343933105} +{"epoch": 0, "iter": 2430, "iter_tflops": 11.332107198768822, "iter_time": 1.82058757019043, "loss": 0.2532539665699005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.167072777260904, "step_time": 1.456270736694336} +{"epoch": 0, "iter": 2431, "iter_tflops": 13.05482118291558, "iter_time": 1.5803428649902345, "loss": 0.17276941239833832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.29203788680677, "step_time": 1.3491395759582518} +{"epoch": 0, "iter": 2432, "iter_tflops": 35.97396431442624, "iter_time": 0.5735006942749024, "loss": 0.29350531101226807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17726326358511, "step_time": 0.42823299026489253} +{"epoch": 0, "iter": 2433, "iter_tflops": 21.227742681181866, "iter_time": 0.8588691101074218, "loss": 0.45805078744888306, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 22.2903897343869, "step_time": 0.817924346923828} +{"epoch": 0, "iter": 2434, "iter_tflops": 10.985139509260474, "iter_time": 1.6596832885742188, "loss": 0.4184982180595398, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 13.772522239286479, "step_time": 1.3237845726013184} +{"epoch": 0, "iter": 2435, "iter_tflops": 29.781656962321197, "iter_time": 0.612183952331543, "loss": 0.3580686151981354, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 31.979881209298185, "step_time": 0.5701038208007813} +{"epoch": 0, "iter": 2436, "iter_tflops": 28.277781966122195, "iter_time": 0.6447412490844728, "loss": 0.3931032419204712, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 30.4180426118135, "step_time": 0.5993762550354004} +{"epoch": 0, "iter": 2437, "iter_tflops": 21.201619628411482, "iter_time": 0.9730904464721679, "loss": 0.016681723296642303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.766698482284436, "step_time": 0.9061961059570313} +{"epoch": 0, "iter": 2438, "iter_tflops": 29.020367019365374, "iter_time": 0.7109177322387694, "loss": 0.003389199962839484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.662179656482024, "step_time": 0.5477934017181397} +{"epoch": 0, "iter": 2439, "iter_tflops": 54.28649842242326, "iter_time": 0.3800409698486328, "loss": 0.010654698126018047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.72409448805727, "step_time": 0.34544003868103024} +{"epoch": 0, "iter": 2440, "iter_tflops": 57.68140991631781, "iter_time": 0.3576731834411621, "loss": 0.02027878724038601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.087306058651045, "step_time": 0.32702448081970215} +{"epoch": 0, "iter": 2441, "iter_tflops": 38.73637843796119, "iter_time": 0.5326025390625, "loss": 1.127478003501892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60986072072844, "step_time": 0.49582221984863284} +{"epoch": 0, "iter": 2442, "iter_tflops": 18.490324907374625, "iter_time": 1.1157777709960937, "loss": 0.7733104228973389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.470344301851316, "step_time": 0.9181476364135741} +{"epoch": 0, "iter": 2443, "iter_tflops": 43.638729199903665, "iter_time": 0.472770263671875, "loss": 0.8131018280982971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.115089494976004, "step_time": 0.4378871765136719} +{"epoch": 0, "iter": 2444, "iter_tflops": 42.87937190610101, "iter_time": 0.4811426239013672, "loss": 0.7347668409347534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.959646731470365, "step_time": 0.448895824432373} +{"epoch": 0, "iter": 2445, "iter_tflops": 37.350982449460695, "iter_time": 0.5523574523925782, "loss": 0.06309100985527039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1159744780591, "step_time": 0.5142862358093261} +{"epoch": 0, "iter": 2446, "iter_tflops": 15.96121765376123, "iter_time": 1.2925764160156248, "loss": 0.1134304329752922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.886955366937713, "step_time": 0.9877501602172851} +{"epoch": 0, "iter": 2447, "iter_tflops": 44.08083834880907, "iter_time": 0.46802861022949216, "loss": 0.10784994065761566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42230143130217, "step_time": 0.4260659427642822} +{"epoch": 0, "iter": 2448, "iter_tflops": 46.69681413641664, "iter_time": 0.44180944442749026, "loss": 0.10169725865125656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05862618592648, "step_time": 0.40406675720214835} +{"epoch": 0, "iter": 2449, "iter_tflops": 15.91047784552197, "iter_time": 1.2966985473632813, "loss": 0.009457675740122795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.193047304196163, "step_time": 1.199967239379883} +{"epoch": 0, "iter": 2450, "iter_tflops": 18.021278018837737, "iter_time": 1.1448185577392578, "loss": 0.004643844906240702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.31615604668541, "step_time": 0.9678618164062499} +{"epoch": 0, "iter": 2451, "iter_tflops": 45.16476570284737, "iter_time": 0.45679620361328127, "loss": 0.009034421294927597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.15363054854925, "step_time": 0.41135792732238774} +{"epoch": 0, "iter": 2452, "iter_tflops": 48.286655776705246, "iter_time": 0.4272628364562988, "loss": 0.019688840955495834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.00907786116092, "step_time": 0.3891992530822754} +{"epoch": 0, "iter": 2453, "iter_tflops": 40.30036208378731, "iter_time": 0.5119332046508789, "loss": 0.10109949111938477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.545232913096136, "step_time": 0.4631493015289307} +{"epoch": 0, "iter": 2454, "iter_tflops": 43.32880944877292, "iter_time": 0.47615186691284184, "loss": 0.11095880717039108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09377467804643, "step_time": 0.42023848533630365} +{"epoch": 0, "iter": 2455, "iter_tflops": 52.67522454222205, "iter_time": 0.3916659812927246, "loss": 0.1156044602394104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.54126421285902, "step_time": 0.3585443210601807} +{"epoch": 0, "iter": 2456, "iter_tflops": 53.35132536499779, "iter_time": 0.3867025489807129, "loss": 0.050059664994478226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.946506161155256, "step_time": 0.35603688430786135} +{"epoch": 0, "iter": 2457, "iter_tflops": 44.031203208891675, "iter_time": 0.46855620574951173, "loss": 0.18632279336452484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15042271861455, "step_time": 0.42847170066833495} +{"epoch": 0, "iter": 2458, "iter_tflops": 9.225899293487688, "iter_time": 2.236214904785156, "loss": 0.1891917586326599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.260357566918257, "step_time": 1.8321881332397458} +{"epoch": 0, "iter": 2459, "iter_tflops": 12.923264099168662, "iter_time": 1.5964305419921876, "loss": 0.17011575400829315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.020969226046095, "step_time": 1.2877556419372558} +{"epoch": 0, "iter": 2460, "iter_tflops": 21.798774524190225, "iter_time": 0.9464336395263672, "loss": 0.14077633619308472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.03228347250674, "step_time": 0.6062212524414063} +{"epoch": 0, "iter": 2461, "iter_tflops": 12.867553989417155, "iter_time": 1.263509582519531, "loss": 0.4327961802482605, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.848783348669178, "step_time": 1.1739860000610352} +{"epoch": 0, "iter": 2462, "iter_tflops": 13.599721301242027, "iter_time": 1.195486099243164, "loss": 0.45923522114753723, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 15.898625822289809, "step_time": 1.0226215744018554} +{"epoch": 0, "iter": 2463, "iter_tflops": 29.380218957902187, "iter_time": 0.5533749694824219, "loss": 0.40326404571533203, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 31.33679082420735, "step_time": 0.5188239555358887} +{"epoch": 0, "iter": 2464, "iter_tflops": 30.352055585902185, "iter_time": 0.5356565628051757, "loss": 0.3963058292865753, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 32.18938601007058, "step_time": 0.5050819473266601} +{"epoch": 0, "iter": 2465, "iter_tflops": 29.889443827801273, "iter_time": 0.6902468185424805, "loss": 0.7914388179779053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.635888738939023, "step_time": 0.6521420555114746} +{"epoch": 0, "iter": 2466, "iter_tflops": 20.56390342828059, "iter_time": 1.003267379760742, "loss": 0.8477632999420166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.026555905466253, "step_time": 0.8243680667877198} +{"epoch": 0, "iter": 2467, "iter_tflops": 49.13037951647925, "iter_time": 0.41992538452148437, "loss": 1.1763423681259155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36807544894398, "step_time": 0.38658117866516106} +{"epoch": 0, "iter": 2468, "iter_tflops": 48.5322788476973, "iter_time": 0.42510044860839846, "loss": 0.7296081781387329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.08032538512542, "step_time": 0.3961398735046387} +{"epoch": 0, "iter": 2469, "iter_tflops": 29.971335259029306, "iter_time": 0.68836083984375, "loss": 0.8738179206848145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.956592454159388, "step_time": 0.6455974159240723} +{"epoch": 0, "iter": 2470, "iter_tflops": 19.014501141658126, "iter_time": 1.0850189208984375, "loss": 0.7905593514442444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.59007030998721, "step_time": 0.913281509399414} +{"epoch": 0, "iter": 2471, "iter_tflops": 41.163234401031055, "iter_time": 0.5012019538879394, "loss": 0.8407155275344849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.775054667383465, "step_time": 0.4607720451354981} +{"epoch": 0, "iter": 2472, "iter_tflops": 43.82297865913113, "iter_time": 0.47078254699707023, "loss": 0.7473300695419312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9149007688472, "step_time": 0.4305778198242188} +{"epoch": 0, "iter": 2473, "iter_tflops": 31.734046938570952, "iter_time": 0.5719221115112305, "loss": 0.05595937371253967, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 35.38182667104266, "step_time": 0.5129583415985107} +{"epoch": 0, "iter": 2474, "iter_tflops": 33.64628808193187, "iter_time": 0.539417694091797, "loss": 0.01463275495916605, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 37.57516945394602, "step_time": 0.483015869140625} +{"epoch": 0, "iter": 2475, "iter_tflops": 39.42137581186425, "iter_time": 0.46039496994018553, "loss": 0.0422661192715168, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 43.318696119625706, "step_time": 0.41897390174865723} +{"epoch": 0, "iter": 2476, "iter_tflops": 36.59881958096984, "iter_time": 0.49590132522583, "loss": 0.0426565520465374, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 40.35431895238981, "step_time": 0.4497511940002441} +{"epoch": 0, "iter": 2477, "iter_tflops": 20.965830187621968, "iter_time": 0.9840341796875001, "loss": 0.08730241656303406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.464576182179375, "step_time": 0.9183833847045899} +{"epoch": 0, "iter": 2478, "iter_tflops": 22.183868510683276, "iter_time": 0.9300043182373046, "loss": 0.08433902263641357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.12489911763086, "step_time": 0.7897099781036376} +{"epoch": 0, "iter": 2479, "iter_tflops": 39.19589653136376, "iter_time": 0.5263585052490234, "loss": 0.04898977279663086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.0259112729784, "step_time": 0.47950392913818357} +{"epoch": 0, "iter": 2480, "iter_tflops": 46.96522969940865, "iter_time": 0.4392844161987305, "loss": 0.10929834097623825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36420666631896, "step_time": 0.40166284751892084} +{"epoch": 0, "iter": 2481, "iter_tflops": 21.541145782792245, "iter_time": 0.9577528381347655, "loss": 0.07523884624242783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.85901583866583, "step_time": 0.9025363845825196} +{"epoch": 0, "iter": 2482, "iter_tflops": 12.489401782884931, "iter_time": 1.6518880462646486, "loss": 0.14100892841815948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.559334322502604, "step_time": 1.1749359703063964} +{"epoch": 0, "iter": 2483, "iter_tflops": 51.655436936927444, "iter_time": 0.39939829635620117, "loss": 0.09236909449100494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.82981379695829, "step_time": 0.3630329246520996} +{"epoch": 0, "iter": 2484, "iter_tflops": 47.77235407500406, "iter_time": 0.4318626098632812, "loss": 0.11091622710227966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.782591551806824, "step_time": 0.3984175548553467} +{"epoch": 0, "iter": 2485, "iter_tflops": 30.0746070424339, "iter_time": 0.6309224853515625, "loss": 0.011525300331413746, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 32.27059349267182, "step_time": 0.5879887466430664} +{"epoch": 0, "iter": 2486, "iter_tflops": 17.175481046432672, "iter_time": 1.1047577514648437, "loss": 0.008221960626542568, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 23.598767291279763, "step_time": 0.8040566520690919} +{"epoch": 0, "iter": 2487, "iter_tflops": 38.31499421358809, "iter_time": 0.49523029327392576, "loss": 0.03316511958837509, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 42.5098500315638, "step_time": 0.44636115646362307} +{"epoch": 0, "iter": 2488, "iter_tflops": 40.1198036932917, "iter_time": 0.4729521102905273, "loss": 0.008348382078111172, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 44.32815453290957, "step_time": 0.4280517883300781} +{"epoch": 0, "iter": 2489, "iter_tflops": 28.645986726855813, "iter_time": 0.7202088623046876, "loss": 0.7620117664337158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.596808091149732, "step_time": 0.6742890777587891} +{"epoch": 0, "iter": 2490, "iter_tflops": 10.772462658922127, "iter_time": 1.915169647216797, "loss": 0.5634176135063171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.125110296647076, "step_time": 1.4605969848632814} +{"epoch": 0, "iter": 2491, "iter_tflops": 14.994901248464075, "iter_time": 1.3758739166259764, "loss": 0.8690817952156067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.4122511376118, "step_time": 1.184860782623291} +{"epoch": 0, "iter": 2492, "iter_tflops": 18.92837450616288, "iter_time": 1.0899559020996095, "loss": 1.2819831371307373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.097519873487986, "step_time": 0.8932168312072755} +{"epoch": 0, "iter": 2493, "iter_tflops": 20.896272630729115, "iter_time": 0.7290620803833007, "loss": 0.5817793607711792, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 22.70103504200036, "step_time": 0.6711006774902345} +{"epoch": 0, "iter": 2494, "iter_tflops": 23.046124409073872, "iter_time": 0.6610517120361329, "loss": 0.38414451479911804, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.8509914823804, "step_time": 0.6130411338806153} +{"epoch": 0, "iter": 2495, "iter_tflops": 24.77149505670742, "iter_time": 0.6150084991455078, "loss": 0.4602101147174835, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.52011818734341, "step_time": 0.5744574699401855} +{"epoch": 0, "iter": 2496, "iter_tflops": 26.29638533592146, "iter_time": 0.5793450241088868, "loss": 0.5138976573944092, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.0781715982513, "step_time": 0.5425809135437012} +{"epoch": 0, "iter": 2497, "iter_tflops": 34.39278895441699, "iter_time": 0.5998668365478514, "loss": 0.836184024810791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72239226438644, "step_time": 0.5469190120697022} +{"epoch": 0, "iter": 2498, "iter_tflops": 35.49020939230541, "iter_time": 0.581317886352539, "loss": 0.9378417134284973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.710495344573395, "step_time": 0.5329586544036865} +{"epoch": 0, "iter": 2499, "iter_tflops": 35.91954924250788, "iter_time": 0.574369499206543, "loss": 0.7972950339317322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.969778614231195, "step_time": 0.5294126434326172} +{"epoch": 0, "iter": 2500, "iter_tflops": 41.796361045233425, "iter_time": 0.4936098022460938, "loss": 0.8423973321914673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.350633169155095, "step_time": 0.45492404556274413} +{"epoch": 0, "iter": 2501, "iter_tflops": 36.48255384370792, "iter_time": 0.5655057373046876, "loss": 1.0514702796936035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14401664883691, "step_time": 0.513926986694336} +{"epoch": 0, "iter": 2502, "iter_tflops": 46.40168521170776, "iter_time": 0.4446194877624512, "loss": 1.0925946235656738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34940114655086, "step_time": 0.4097584686279297} +{"epoch": 0, "iter": 2503, "iter_tflops": 46.10690039404586, "iter_time": 0.4474621658325196, "loss": 0.9613193869590759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.610942749480664, "step_time": 0.4158577194213867} +{"epoch": 0, "iter": 2504, "iter_tflops": 46.52848989672822, "iter_time": 0.4434077606201172, "loss": 1.0843919515609741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26931008410563, "step_time": 0.4104113121032715} +{"epoch": 0, "iter": 2505, "iter_tflops": 26.918690144136704, "iter_time": 0.7664226379394531, "loss": 0.5616428852081299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.362211431293204, "step_time": 0.7274148406982421} +{"epoch": 0, "iter": 2506, "iter_tflops": 16.503398124664844, "iter_time": 1.2501118469238282, "loss": 0.47462090849876404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.492709520721064, "step_time": 1.0067528400421144} +{"epoch": 0, "iter": 2507, "iter_tflops": 41.201791654135825, "iter_time": 0.5007329216003418, "loss": 0.3850042223930359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77118519060072, "step_time": 0.46081186866760254} +{"epoch": 0, "iter": 2508, "iter_tflops": 42.37126602898513, "iter_time": 0.486912368774414, "loss": 0.5670167207717896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.11473364052564, "step_time": 0.447386157989502} +{"epoch": 0, "iter": 2509, "iter_tflops": 19.522839613484866, "iter_time": 1.0567670440673829, "loss": 0.3120456337928772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.909005155271963, "step_time": 0.9867085189819335} +{"epoch": 0, "iter": 2510, "iter_tflops": 15.987527625710968, "iter_time": 1.2904492797851563, "loss": 0.3146766722202301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.079023216152365, "step_time": 1.1411619567871092} +{"epoch": 0, "iter": 2511, "iter_tflops": 49.75613085619987, "iter_time": 0.4146442489624024, "loss": 0.18475142121315002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.538239477980184, "step_time": 0.3782867527008057} +{"epoch": 0, "iter": 2512, "iter_tflops": 52.84477909636364, "iter_time": 0.3904093055725098, "loss": 0.28230613470077515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.46388686470049, "step_time": 0.3590271148681641} +{"epoch": 0, "iter": 2513, "iter_tflops": 22.67044186090196, "iter_time": 0.9100437316894531, "loss": 1.0659998655319214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.702492973434865, "step_time": 0.870418716430664} +{"epoch": 0, "iter": 2514, "iter_tflops": 16.951408470690893, "iter_time": 1.217072525024414, "loss": 0.9824386835098267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.767993329699674, "step_time": 0.9934081344604494} +{"epoch": 0, "iter": 2515, "iter_tflops": 33.762841922343846, "iter_time": 0.6110591506958007, "loss": 0.9255911111831665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73305503104577, "step_time": 0.5616492691040038} +{"epoch": 0, "iter": 2516, "iter_tflops": 36.17473367696684, "iter_time": 0.570317771911621, "loss": 0.8693919777870178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.410646740919056, "step_time": 0.5234903564453125} +{"epoch": 0, "iter": 2517, "iter_tflops": 22.485557212182986, "iter_time": 0.91752645111084, "loss": 0.05400790274143219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.10232745486836, "step_time": 0.855979305267334} +{"epoch": 0, "iter": 2518, "iter_tflops": 21.467813633965612, "iter_time": 0.9610244369506835, "loss": 0.0334298349916935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.450694893382106, "step_time": 0.7799830436706543} +{"epoch": 0, "iter": 2519, "iter_tflops": 54.31115189174726, "iter_time": 0.3798684577941895, "loss": 0.042096011340618134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.43118917339794, "step_time": 0.3471425323486328} +{"epoch": 0, "iter": 2520, "iter_tflops": 57.338568108673435, "iter_time": 0.3598118019104004, "loss": 0.0555310994386673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.15581108534051, "step_time": 0.3319254169464111} +{"epoch": 0, "iter": 2521, "iter_tflops": 18.377878305770484, "iter_time": 1.122604751586914, "loss": 0.9618980288505554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.113372021421128, "step_time": 1.079406265258789} +{"epoch": 0, "iter": 2522, "iter_tflops": 18.529513449983174, "iter_time": 1.1134179840087892, "loss": 0.7799666523933411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.8918032103878, "step_time": 0.9424117927551269} +{"epoch": 0, "iter": 2523, "iter_tflops": 37.16836722899026, "iter_time": 0.5550712890625, "loss": 0.9485429525375366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.704062009866334, "step_time": 0.5068558883666991} +{"epoch": 0, "iter": 2524, "iter_tflops": 41.93505635869693, "iter_time": 0.49197724533081055, "loss": 0.8665658235549927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.69492661862352, "step_time": 0.4514963703155518} +{"epoch": 0, "iter": 2525, "iter_tflops": 16.954082665673397, "iter_time": 1.2168805541992187, "loss": 0.15005742013454437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.015440342053136, "step_time": 1.1451895217895507} +{"epoch": 0, "iter": 2526, "iter_tflops": 21.91587080533429, "iter_time": 0.941376853942871, "loss": 0.00939002726227045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.08933183297157, "step_time": 0.7615947723388671} +{"epoch": 0, "iter": 2527, "iter_tflops": 47.55366973226684, "iter_time": 0.4338486099243164, "loss": 0.01112937182188034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.5024463910486, "step_time": 0.3929548988342285} +{"epoch": 0, "iter": 2528, "iter_tflops": 48.6761952328586, "iter_time": 0.4238435935974121, "loss": 0.010992348194122314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.906362250708646, "step_time": 0.3827209377288818} +{"epoch": 0, "iter": 2529, "iter_tflops": 25.28199894606799, "iter_time": 0.8160388565063477, "loss": 0.004664131905883551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.653738616285015, "step_time": 0.7460507888793945} +{"epoch": 0, "iter": 2530, "iter_tflops": 26.174379693311337, "iter_time": 0.7882170944213867, "loss": 0.011343001388013363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.09719747033218, "step_time": 0.5561361751556396} +{"epoch": 0, "iter": 2531, "iter_tflops": 58.479709943854246, "iter_time": 0.3527906265258789, "loss": 0.0036728945560753345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.84060606946898, "step_time": 0.31818168830871585} +{"epoch": 0, "iter": 2532, "iter_tflops": 61.31643023272245, "iter_time": 0.3364692535400391, "loss": 0.006629389710724354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.32905636480335, "step_time": 0.30642184257507327} +{"epoch": 0, "iter": 2533, "iter_tflops": 33.42966985220027, "iter_time": 0.6171491851806641, "loss": 0.9458212852478027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.637363511228706, "step_time": 0.5789175033569336} +{"epoch": 0, "iter": 2534, "iter_tflops": 15.363170331868385, "iter_time": 1.3428929748535157, "loss": 1.1083619594573975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.287735375318213, "step_time": 1.1281382350921632} +{"epoch": 0, "iter": 2535, "iter_tflops": 36.37315511140818, "iter_time": 0.5672065963745118, "loss": 1.1396126747131348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.64103932825275, "step_time": 0.5204478454589844} +{"epoch": 0, "iter": 2536, "iter_tflops": 35.44833685890657, "iter_time": 0.5820045547485352, "loss": 0.7651132345199585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.510083302563025, "step_time": 0.5357322483062744} +{"epoch": 0, "iter": 2537, "iter_tflops": 20.72667162648066, "iter_time": 0.9953886413574219, "loss": 0.06647343933582306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.313587219672478, "step_time": 0.9245977935791014} +{"epoch": 0, "iter": 2538, "iter_tflops": 22.367099878580373, "iter_time": 0.9223857192993163, "loss": 0.08123023808002472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.219996542151975, "step_time": 0.7579388732910156} +{"epoch": 0, "iter": 2539, "iter_tflops": 56.18256743237611, "iter_time": 0.36721521377563476, "loss": 0.034042272716760635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.57319838531704, "step_time": 0.3350661334991455} +{"epoch": 0, "iter": 2540, "iter_tflops": 53.58302711697075, "iter_time": 0.3850303840637207, "loss": 0.04615036025643349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.56032526102972, "step_time": 0.352304967880249} +{"epoch": 0, "iter": 2541, "iter_tflops": 32.71986170670088, "iter_time": 0.6305373077392578, "loss": 0.09897926449775696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.88164035737156, "step_time": 0.5914599571228027} +{"epoch": 0, "iter": 2542, "iter_tflops": 9.32127380389738, "iter_time": 2.2133341369628905, "loss": 0.16085226833820343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.427853003164246, "step_time": 1.6600690002441405} +{"epoch": 0, "iter": 2543, "iter_tflops": 10.948236409669828, "iter_time": 1.884421630859375, "loss": 0.12987130880355835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.115695099846478, "step_time": 1.5730080146789551} +{"epoch": 0, "iter": 2544, "iter_tflops": 23.112158866405522, "iter_time": 0.8926510772705079, "loss": 0.07789933681488037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.524129435452867, "step_time": 0.6987875308990479} +{"epoch": 0, "iter": 2545, "iter_tflops": 15.932917016129915, "iter_time": 0.9151572875976564, "loss": 0.40481245517730713, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 16.94072520790476, "step_time": 0.860714340209961} +{"epoch": 0, "iter": 2546, "iter_tflops": 6.714159034917935, "iter_time": 2.1716979064941406, "loss": 0.37788742780685425, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 9.20949122298161, "step_time": 1.5832715148925782} +{"epoch": 0, "iter": 2547, "iter_tflops": 7.4566109059689625, "iter_time": 1.9554627838134766, "loss": 0.45842456817626953, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 9.203787712592066, "step_time": 1.5842526550292968} +{"epoch": 0, "iter": 2548, "iter_tflops": 15.217048614030512, "iter_time": 0.9582098007202148, "loss": 0.3873538672924042, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 19.209449100355357, "step_time": 0.7590600357055665} +{"epoch": 0, "iter": 2549, "iter_tflops": 14.496299610582998, "iter_time": 1.1328633880615235, "loss": 0.5519332885742188, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.522380511012543, "step_time": 1.0579773559570311} +{"epoch": 0, "iter": 2550, "iter_tflops": 15.187917593986867, "iter_time": 1.0812757568359375, "loss": 0.36577939987182617, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 17.737717954134357, "step_time": 0.9258421592712403} +{"epoch": 0, "iter": 2551, "iter_tflops": 27.79654737308296, "iter_time": 0.5908045654296875, "loss": 0.49419355392456055, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 29.59307163565224, "step_time": 0.5549382400512696} +{"epoch": 0, "iter": 2552, "iter_tflops": 30.391131434610553, "iter_time": 0.5403657684326172, "loss": 0.41149577498435974, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 32.31188428144889, "step_time": 0.5082441787719726} +{"epoch": 0, "iter": 2553, "iter_tflops": 44.138511195517005, "iter_time": 0.46741706848144526, "loss": 0.19941100478172302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.9551933890542, "step_time": 0.4214280872344971} +{"epoch": 0, "iter": 2554, "iter_tflops": 38.06462279372355, "iter_time": 0.5420017852783203, "loss": 0.2682986259460449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96382478315637, "step_time": 0.4916399688720703} +{"epoch": 0, "iter": 2555, "iter_tflops": 39.71684445513414, "iter_time": 0.5194544982910156, "loss": 0.21816563606262207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36823423026971, "step_time": 0.47571901130676275} +{"epoch": 0, "iter": 2556, "iter_tflops": 43.229090520449475, "iter_time": 0.4772502326965332, "loss": 0.1716589480638504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42279450953772, "step_time": 0.4350459251403808} +{"epoch": 0, "iter": 2557, "iter_tflops": 15.460328441358035, "iter_time": 1.3344537658691404, "loss": 0.8379057049751282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.464186341061648, "step_time": 1.2530891647338869} +{"epoch": 0, "iter": 2558, "iter_tflops": 24.208766808240505, "iter_time": 0.8522157974243165, "loss": 0.7641770243644714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.36113898561902, "step_time": 0.6004193725585938} +{"epoch": 0, "iter": 2559, "iter_tflops": 48.20427402716988, "iter_time": 0.427993034362793, "loss": 0.8186686038970947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14054682644033, "step_time": 0.3956823387145997} +{"epoch": 0, "iter": 2560, "iter_tflops": 54.762004676441165, "iter_time": 0.3767410202026367, "loss": 0.7508878111839294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.21514782099638, "step_time": 0.3484090518951416} +{"epoch": 0, "iter": 2561, "iter_tflops": 27.223096335974137, "iter_time": 0.7578525695800781, "loss": 0.7704368829727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.7423676279765, "step_time": 0.7177938079833984} +{"epoch": 0, "iter": 2562, "iter_tflops": 17.04828407654969, "iter_time": 1.2101566009521483, "loss": 0.5602684617042542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.941179321564274, "step_time": 0.985192533493042} +{"epoch": 0, "iter": 2563, "iter_tflops": 47.35634512150385, "iter_time": 0.4356563720703125, "loss": 0.7406067252159119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.67405417962878, "step_time": 0.3992544002532959} +{"epoch": 0, "iter": 2564, "iter_tflops": 52.044801642640714, "iter_time": 0.3964102630615234, "loss": 0.6165752410888672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.34320332822098, "step_time": 0.3661682739257812} +{"epoch": 0, "iter": 2565, "iter_tflops": 37.83268548514432, "iter_time": 0.5453245849609375, "loss": 0.601386547088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56615247352707, "step_time": 0.5085790061950684} +{"epoch": 0, "iter": 2566, "iter_tflops": 10.111040561101087, "iter_time": 2.040452056884766, "loss": 0.7132236957550049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.218645047785248, "step_time": 1.6884927444458009} +{"epoch": 0, "iter": 2567, "iter_tflops": 14.030279034156212, "iter_time": 1.470469223022461, "loss": 0.6408553719520569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.00614403386758, "step_time": 1.2131552848815919} +{"epoch": 0, "iter": 2568, "iter_tflops": 14.846390374947172, "iter_time": 1.389637008666992, "loss": 0.8062666654586792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.55709723523497, "step_time": 1.175085678100586} +{"epoch": 0, "iter": 2569, "iter_tflops": 22.085490293813017, "iter_time": 0.7417226181030274, "loss": 0.4510566294193268, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 23.507501530325705, "step_time": 0.6968544769287109} +{"epoch": 0, "iter": 2570, "iter_tflops": 11.076049033742223, "iter_time": 1.4789847564697265, "loss": 0.38884952664375305, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 12.816875626334012, "step_time": 1.2781045989990234} +{"epoch": 0, "iter": 2571, "iter_tflops": 24.71641239054712, "iter_time": 0.6627704467773438, "loss": 0.4165349006652832, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 26.60964197862474, "step_time": 0.6156154861450196} +{"epoch": 0, "iter": 2572, "iter_tflops": 26.94798212321827, "iter_time": 0.607886245727539, "loss": 0.4869828522205353, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 28.831954126844916, "step_time": 0.5681650161743164} +{"epoch": 0, "iter": 2573, "iter_tflops": 16.68898968996698, "iter_time": 1.2362098541259765, "loss": 1.0095713138580322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.97873481534523, "step_time": 1.1475275497436523} +{"epoch": 0, "iter": 2574, "iter_tflops": 19.495721766131417, "iter_time": 1.0582369689941409, "loss": 0.8703785538673401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.82721635417843, "step_time": 0.7988121223449708} +{"epoch": 0, "iter": 2575, "iter_tflops": 46.028807279614156, "iter_time": 0.44822133636474604, "loss": 0.777418851852417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.02349594772591, "step_time": 0.41242806243896485} +{"epoch": 0, "iter": 2576, "iter_tflops": 50.09036600787892, "iter_time": 0.4118774757385254, "loss": 1.0921354293823242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.28262904917076, "step_time": 0.3800680599212646} +{"epoch": 0, "iter": 2577, "iter_tflops": 37.615549062794116, "iter_time": 0.5484724807739259, "loss": 0.6423323750495911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.741766223250494, "step_time": 0.5063868217468261} +{"epoch": 0, "iter": 2578, "iter_tflops": 43.117996828025035, "iter_time": 0.47847986984252927, "loss": 0.7097200751304626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36006498988838, "step_time": 0.4266142635345459} +{"epoch": 0, "iter": 2579, "iter_tflops": 43.27653243385515, "iter_time": 0.47672704696655277, "loss": 0.8489394187927246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.670072402221116, "step_time": 0.4420625991821289} +{"epoch": 0, "iter": 2580, "iter_tflops": 47.14465411582362, "iter_time": 0.4376125755310058, "loss": 0.636384129524231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.141742635292545, "step_time": 0.40341006088256837} +{"epoch": 0, "iter": 2581, "iter_tflops": 28.691729443506333, "iter_time": 0.7190606460571288, "loss": 0.8469679355621338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.408346733604237, "step_time": 0.6784681091308593} +{"epoch": 0, "iter": 2582, "iter_tflops": 20.1027507606991, "iter_time": 1.026282112121582, "loss": 0.784053385257721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.802510943251054, "step_time": 0.8318147125244141} +{"epoch": 0, "iter": 2583, "iter_tflops": 44.46370945423441, "iter_time": 0.463998477935791, "loss": 0.9220026731491089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0968753056714, "step_time": 0.42894872856140137} +{"epoch": 0, "iter": 2584, "iter_tflops": 47.4103096971527, "iter_time": 0.43516048812866204, "loss": 0.7544506192207336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00720637869922, "step_time": 0.40447409248352056} +{"epoch": 0, "iter": 2585, "iter_tflops": 31.288813321189526, "iter_time": 0.6593760299682616, "loss": 0.08544102311134338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.339511037342405, "step_time": 0.6188181190490722} +{"epoch": 0, "iter": 2586, "iter_tflops": 16.556610578245092, "iter_time": 1.2460940246582033, "loss": 0.11560836434364319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.605181612340957, "step_time": 1.1088896598815918} +{"epoch": 0, "iter": 2587, "iter_tflops": 42.670568068254376, "iter_time": 0.48349704360961915, "loss": 0.13879571855068207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93233999092329, "step_time": 0.43959226226806636} +{"epoch": 0, "iter": 2588, "iter_tflops": 38.993480000223734, "iter_time": 0.5290908508300781, "loss": 0.09909738600254059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.713016930138245, "step_time": 0.48301653671264644} +{"epoch": 0, "iter": 2589, "iter_tflops": 34.12092856022504, "iter_time": 0.6046463088989258, "loss": 0.19939810037612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.81879301428581, "step_time": 0.5455249061584473} +{"epoch": 0, "iter": 2590, "iter_tflops": 40.38738200413692, "iter_time": 0.5108301773071289, "loss": 0.24213863909244537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0355361647259, "step_time": 0.4685101013183594} +{"epoch": 0, "iter": 2591, "iter_tflops": 43.026179632344025, "iter_time": 0.4795009384155273, "loss": 0.2182304412126541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.80937266955229, "step_time": 0.4407470626831055} +{"epoch": 0, "iter": 2592, "iter_tflops": 41.40436891301399, "iter_time": 0.4982830085754395, "loss": 0.2603744566440582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01379783473193, "step_time": 0.45832821273803703} +{"epoch": 0, "iter": 2593, "iter_tflops": 17.30907531367087, "iter_time": 1.0580762023925783, "loss": 0.013519782572984695, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 18.53882543786369, "step_time": 0.9878900222778321} +{"epoch": 0, "iter": 2594, "iter_tflops": 17.210341699063235, "iter_time": 1.064146255493164, "loss": 0.00408688560128212, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 21.150220685287426, "step_time": 0.8659162921905518} +{"epoch": 0, "iter": 2595, "iter_tflops": 40.06667422417935, "iter_time": 0.4570961036682129, "loss": 0.01895727589726448, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 44.324819697397125, "step_time": 0.4131843242645264} +{"epoch": 0, "iter": 2596, "iter_tflops": 45.25998610162713, "iter_time": 0.40464706802368167, "loss": 0.008794112130999565, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 50.03086037681195, "step_time": 0.36606047821044924} +{"epoch": 0, "iter": 2597, "iter_tflops": 34.781411190197964, "iter_time": 0.5931643600463867, "loss": 0.3942977488040924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39559926722942, "step_time": 0.5373296394348144} +{"epoch": 0, "iter": 2598, "iter_tflops": 33.59344557492449, "iter_time": 0.6141404418945312, "loss": 0.3350790739059448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42501754618079, "step_time": 0.5369182586669922} +{"epoch": 0, "iter": 2599, "iter_tflops": 40.0889562142448, "iter_time": 0.5146328430175782, "loss": 0.339359849691391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.664913761775225, "step_time": 0.47248675727844236} +{"epoch": 0, "iter": 2600, "iter_tflops": 42.942811369820475, "iter_time": 0.4804318313598633, "loss": 0.32920950651168823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70478600601878, "step_time": 0.44173403358459473} +{"epoch": 0, "iter": 2601, "iter_tflops": 21.32634282981498, "iter_time": 0.9673995056152342, "loss": 1.075319528579712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.669201225486763, "step_time": 0.9100935363769531} +{"epoch": 0, "iter": 2602, "iter_tflops": 17.41355119379306, "iter_time": 1.1847723236083987, "loss": 0.6568031907081604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.085329273722397, "step_time": 0.9784572601318359} +{"epoch": 0, "iter": 2603, "iter_tflops": 50.46147922861943, "iter_time": 0.40884836959838866, "loss": 0.9234777092933655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.38059019178819, "step_time": 0.3793834056854248} +{"epoch": 0, "iter": 2604, "iter_tflops": 48.01764190642922, "iter_time": 0.42965653228759765, "loss": 0.9551945924758911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.529977523826524, "step_time": 0.40037070655822754} +{"epoch": 0, "iter": 2605, "iter_tflops": 29.50617063479472, "iter_time": 0.6992128448486329, "loss": 0.6537678241729736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.416184737813985, "step_time": 0.6567027053833008} +{"epoch": 0, "iter": 2606, "iter_tflops": 9.839409478835593, "iter_time": 2.096781677246094, "loss": 0.6754240989685059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.433276219306697, "step_time": 1.6593449020385744} +{"epoch": 0, "iter": 2607, "iter_tflops": 10.401132397459163, "iter_time": 1.9835430145263673, "loss": 0.5417210459709167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.76019921494884, "step_time": 1.616831615447998} +{"epoch": 0, "iter": 2608, "iter_tflops": 47.96758235490432, "iter_time": 0.4301049270629883, "loss": 0.527508556842804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34939530242848, "step_time": 0.3941037597656249} +{"epoch": 0, "iter": 2609, "iter_tflops": 21.27035707761879, "iter_time": 0.775933380126953, "loss": 0.46789559721946716, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 22.42716619569851, "step_time": 0.7359101867675781} +{"epoch": 0, "iter": 2610, "iter_tflops": 10.103843453525393, "iter_time": 1.6334754333496093, "loss": 0.3276488184928894, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 11.619007252733907, "step_time": 1.4204638748168945} +{"epoch": 0, "iter": 2611, "iter_tflops": 9.319400057215192, "iter_time": 1.7709702301025392, "loss": 0.4553754925727844, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 10.911007782502558, "step_time": 1.512635715484619} +{"epoch": 0, "iter": 2612, "iter_tflops": 21.22912166908092, "iter_time": 0.7774405517578125, "loss": 0.3117671608924866, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 23.258492837938157, "step_time": 0.7096066017150878} +{"epoch": 0, "iter": 2613, "iter_tflops": 23.347432286699235, "iter_time": 0.6210325622558595, "loss": 0.4672984182834625, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.088873293273085, "step_time": 0.577926139831543} +{"epoch": 0, "iter": 2614, "iter_tflops": 20.965083959857164, "iter_time": 0.6916030349731445, "loss": 0.5400591492652893, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.52766889500062, "step_time": 0.6436314277648925} +{"epoch": 0, "iter": 2615, "iter_tflops": 22.374232902819546, "iter_time": 0.648045265197754, "loss": 0.4028530716896057, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.194963796759634, "step_time": 0.5992782554626466} +{"epoch": 0, "iter": 2616, "iter_tflops": 21.505051047819, "iter_time": 0.6742376785278321, "loss": 0.31377312541007996, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.19893753256357, "step_time": 0.6250077476501466} +{"epoch": 0, "iter": 2617, "iter_tflops": 20.369185401607655, "iter_time": 1.0128580551147461, "loss": 1.133917212486267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.726372751495326, "step_time": 0.9495875701904297} +{"epoch": 0, "iter": 2618, "iter_tflops": 25.8754712541159, "iter_time": 0.7973224258422852, "loss": 0.9429506659507751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.191978255733034, "step_time": 0.661423053741455} +{"epoch": 0, "iter": 2619, "iter_tflops": 44.192069075236525, "iter_time": 0.46685058975219723, "loss": 0.9325621128082275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55306429529598, "step_time": 0.43385413360595704} +{"epoch": 0, "iter": 2620, "iter_tflops": 38.652083249494574, "iter_time": 0.5337640762329101, "loss": 0.789776623249054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57907999647053, "step_time": 0.4961892738342285} +{"epoch": 0, "iter": 2621, "iter_tflops": 33.928052616731875, "iter_time": 0.6080836334228517, "loss": 0.15507185459136963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.77338565643553, "step_time": 0.5461806812286377} +{"epoch": 0, "iter": 2622, "iter_tflops": 49.30009618718577, "iter_time": 0.4184797821044922, "loss": 0.28985536098480225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.754724312278334, "step_time": 0.37679111289978023} +{"epoch": 0, "iter": 2623, "iter_tflops": 40.85738679978618, "iter_time": 0.5049538192749023, "loss": 0.19835925102233887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97056604399398, "step_time": 0.45876881980895995} +{"epoch": 0, "iter": 2624, "iter_tflops": 51.294088972733775, "iter_time": 0.4022119102478028, "loss": 0.24092347919940948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.63412894390993, "step_time": 0.3708352031707764} +{"epoch": 0, "iter": 2625, "iter_tflops": 38.62683390897026, "iter_time": 0.5341129837036133, "loss": 0.5250487327575684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.19938312400719, "step_time": 0.48889561843872065} +{"epoch": 0, "iter": 2626, "iter_tflops": 38.71196763161649, "iter_time": 0.5329383850097655, "loss": 0.5729211568832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.355821425861784, "step_time": 0.4870899162292481} +{"epoch": 0, "iter": 2627, "iter_tflops": 40.858438119531776, "iter_time": 0.5049408264160156, "loss": 0.35761600732803345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64915548142051, "step_time": 0.46207130432128907} +{"epoch": 0, "iter": 2628, "iter_tflops": 40.42329176506048, "iter_time": 0.5103763847351074, "loss": 0.4931696951389313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.697029214486, "step_time": 0.47213949966430657} +{"epoch": 0, "iter": 2629, "iter_tflops": 21.525057533855346, "iter_time": 0.9584686813354492, "loss": 1.1170040369033813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.86110817149914, "step_time": 0.9024537811279296} +{"epoch": 0, "iter": 2630, "iter_tflops": 16.063732782379606, "iter_time": 1.2843274841308594, "loss": 0.9666848182678223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.496298410290898, "step_time": 1.0582056694030761} +{"epoch": 0, "iter": 2631, "iter_tflops": 32.62758469222863, "iter_time": 0.6323205871582032, "loss": 0.7050678133964539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.21970369166594, "step_time": 0.5857827110290527} +{"epoch": 0, "iter": 2632, "iter_tflops": 39.18443952834551, "iter_time": 0.5265124053955078, "loss": 1.0332075357437134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45567368979408, "step_time": 0.485944320678711} +{"epoch": 0, "iter": 2633, "iter_tflops": 23.887207344326892, "iter_time": 0.8636879653930665, "loss": 0.631029486656189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.864574916951234, "step_time": 0.7976583251953124} +{"epoch": 0, "iter": 2634, "iter_tflops": 38.61998122787161, "iter_time": 0.5342077560424804, "loss": 0.7130712270736694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.594318648690624, "step_time": 0.42455772781372075} +{"epoch": 0, "iter": 2635, "iter_tflops": 49.273543747229816, "iter_time": 0.4187052917480468, "loss": 0.5475887060165405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21227978326429, "step_time": 0.3877130165100098} +{"epoch": 0, "iter": 2636, "iter_tflops": 48.65214284930666, "iter_time": 0.42405313110351567, "loss": 0.5286072492599487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72281264186675, "step_time": 0.39131245994567876} +{"epoch": 0, "iter": 2637, "iter_tflops": 45.3212496591422, "iter_time": 0.4552189903259278, "loss": 0.394795686006546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.474643166192365, "step_time": 0.417003381729126} +{"epoch": 0, "iter": 2638, "iter_tflops": 36.26811914501365, "iter_time": 0.5688492813110352, "loss": 0.3934239149093628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.460981517828145, "step_time": 0.5099009647369385} +{"epoch": 0, "iter": 2639, "iter_tflops": 40.707440069889785, "iter_time": 0.5068138275146484, "loss": 0.3676677644252777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6886064446864, "step_time": 0.4616633892059327} +{"epoch": 0, "iter": 2640, "iter_tflops": 41.79307565480753, "iter_time": 0.4936486053466797, "loss": 0.34115689992904663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92954681805105, "step_time": 0.44919000816345217} +{"epoch": 0, "iter": 2641, "iter_tflops": 35.245512932905186, "iter_time": 0.585353759765625, "loss": 0.29576998949050903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.537610423036554, "step_time": 0.5353495788574218} +{"epoch": 0, "iter": 2642, "iter_tflops": 40.27790823688725, "iter_time": 0.5122185935974122, "loss": 0.3398682177066803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.86469555140267, "step_time": 0.47033481597900395} +{"epoch": 0, "iter": 2643, "iter_tflops": 49.85379055748284, "iter_time": 0.4138319931030274, "loss": 0.35883641242980957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.88985979671982, "step_time": 0.3828381366729736} +{"epoch": 0, "iter": 2644, "iter_tflops": 48.76968709988696, "iter_time": 0.42303108215332036, "loss": 0.2700459361076355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76744162353501, "step_time": 0.3909815006256103} +{"epoch": 0, "iter": 2645, "iter_tflops": 26.630536040066, "iter_time": 0.7747156677246094, "loss": 1.028950810432434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.975585084611975, "step_time": 0.7374678115844727} +{"epoch": 0, "iter": 2646, "iter_tflops": 15.66242522824905, "iter_time": 1.317234924316406, "loss": 1.1509398221969604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.435491431380537, "step_time": 0.9624735488891601} +{"epoch": 0, "iter": 2647, "iter_tflops": 35.773901421851015, "iter_time": 0.5767079544067384, "loss": 0.9290393590927124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.950580804553816, "step_time": 0.5296735782623291} +{"epoch": 0, "iter": 2648, "iter_tflops": 40.00128411618401, "iter_time": 0.5157607803344726, "loss": 1.0284911394119263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66103727138624, "step_time": 0.47252870750427245} +{"epoch": 0, "iter": 2649, "iter_tflops": 21.27462226671445, "iter_time": 0.9697513427734374, "loss": 0.15619686245918274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.84277777964027, "step_time": 0.903177963256836} +{"epoch": 0, "iter": 2650, "iter_tflops": 14.601336750401405, "iter_time": 1.4129592285156247, "loss": 0.17593860626220703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.884238027435543, "step_time": 1.092503360748291} +{"epoch": 0, "iter": 2651, "iter_tflops": 50.91236252478446, "iter_time": 0.40522758102416995, "loss": 0.16132143139839172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31175600662198, "step_time": 0.37299653816223144} +{"epoch": 0, "iter": 2652, "iter_tflops": 49.05061589667636, "iter_time": 0.4206082458496094, "loss": 0.17063626646995544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21415129320329, "step_time": 0.3876993808746338} +{"epoch": 0, "iter": 2653, "iter_tflops": 26.61598766115847, "iter_time": 0.7751391296386719, "loss": 0.004015330690890551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.098034488059753, "step_time": 0.7342539749145508} +{"epoch": 0, "iter": 2654, "iter_tflops": 10.278459780483972, "iter_time": 2.007216445922851, "loss": 0.007033191155642271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.940660216047608, "step_time": 1.5942844619750975} +{"epoch": 0, "iter": 2655, "iter_tflops": 11.022622256275469, "iter_time": 1.8717046661376955, "loss": 0.020410306751728058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.658893500596228, "step_time": 1.6297706832885743} +{"epoch": 0, "iter": 2656, "iter_tflops": 24.68364154578291, "iter_time": 0.8358204956054687, "loss": 0.009002620354294777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.912620611501815, "step_time": 0.48076983451843264} +{"epoch": 0, "iter": 2657, "iter_tflops": 12.096248114665041, "iter_time": 1.1784481048583986, "loss": 0.416561484336853, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 12.698826717577875, "step_time": 1.122528953552246} +{"epoch": 0, "iter": 2658, "iter_tflops": 10.15144029309295, "iter_time": 1.4042145996093751, "loss": 0.4987671375274658, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 12.099170733007401, "step_time": 1.178163444519043} +{"epoch": 0, "iter": 2659, "iter_tflops": 22.326385569477658, "iter_time": 0.6384732818603516, "loss": 0.4848276674747467, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.104208857040422, "step_time": 0.5913822250366211} +{"epoch": 0, "iter": 2660, "iter_tflops": 23.014041333981204, "iter_time": 0.6193958053588867, "loss": 0.3860302269458771, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.693095680650448, "step_time": 0.5772788009643555} +{"epoch": 0, "iter": 2661, "iter_tflops": 19.84378113800485, "iter_time": 1.039675521850586, "loss": 0.2954200506210327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.308842886951165, "step_time": 0.9681939849853515} +{"epoch": 0, "iter": 2662, "iter_tflops": 8.65201152442247, "iter_time": 2.384542999267578, "loss": 0.28317731618881226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.75512143946019, "step_time": 1.9182576065063477} +{"epoch": 0, "iter": 2663, "iter_tflops": 13.516324405306605, "iter_time": 1.5263834228515627, "loss": 0.3038672208786011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.824405337325379, "step_time": 1.3037515830993653} +{"epoch": 0, "iter": 2664, "iter_tflops": 37.359225891552, "iter_time": 0.5522355728149414, "loss": 0.37575992941856384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.429989033135655, "step_time": 0.49797487258911133} +{"epoch": 0, "iter": 2665, "iter_tflops": 12.810486795325554, "iter_time": 1.2339524383544922, "loss": 0.5628899931907654, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.511855675095907, "step_time": 1.169900848388672} +{"epoch": 0, "iter": 2666, "iter_tflops": 10.4040883665357, "iter_time": 1.5193576660156247, "loss": 0.46151086688041687, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.412228744363146, "step_time": 1.1785909500122072} +{"epoch": 0, "iter": 2667, "iter_tflops": 28.68373331360009, "iter_time": 0.5510974197387695, "loss": 0.3201928734779358, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.544337596410873, "step_time": 0.5175273933410645} +{"epoch": 0, "iter": 2668, "iter_tflops": 27.311189656640483, "iter_time": 0.5787932205200196, "loss": 0.3712896406650543, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.943852746317358, "step_time": 0.546144687652588} +{"epoch": 0, "iter": 2669, "iter_tflops": 20.584119244345143, "iter_time": 0.8337244415283203, "loss": 0.05634918436408043, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 21.739120208780005, "step_time": 0.7894286041259765} +{"epoch": 0, "iter": 2670, "iter_tflops": 15.553358254970407, "iter_time": 1.1033940734863281, "loss": 0.052156757563352585, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 21.278337221291444, "step_time": 0.80652370262146} +{"epoch": 0, "iter": 2671, "iter_tflops": 36.240112617373704, "iter_time": 0.4735493927001953, "loss": 0.020590737462043762, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 40.05579329859612, "step_time": 0.42843948173522955} +{"epoch": 0, "iter": 2672, "iter_tflops": 38.05901347471239, "iter_time": 0.4509177131652831, "loss": 0.03371405228972435, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 42.087179665918775, "step_time": 0.40776035499572755} +{"epoch": 0, "iter": 2673, "iter_tflops": 21.09060223879512, "iter_time": 0.978212631225586, "loss": 1.0710846185684204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.41452880662897, "step_time": 0.9204339599609376} +{"epoch": 0, "iter": 2674, "iter_tflops": 22.245236498450822, "iter_time": 0.9274387130737305, "loss": 0.8982913494110107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.481859954309318, "step_time": 0.7507167835235595} +{"epoch": 0, "iter": 2675, "iter_tflops": 41.743853160559354, "iter_time": 0.4942306938171387, "loss": 0.9351644515991211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.91027203953591, "step_time": 0.45938473701477056} +{"epoch": 0, "iter": 2676, "iter_tflops": 44.29848643398077, "iter_time": 0.46572908401489255, "loss": 1.1194690465927124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53563407292457, "step_time": 0.4340132179260254} +{"epoch": 0, "iter": 2677, "iter_tflops": 27.393173057100967, "iter_time": 0.7531472702026367, "loss": 0.3987867534160614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.966155617854316, "step_time": 0.7122482452392579} +{"epoch": 0, "iter": 2678, "iter_tflops": 16.978309539551002, "iter_time": 1.2151441497802735, "loss": 0.351546972990036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.422264572594678, "step_time": 0.9630678138732911} +{"epoch": 0, "iter": 2679, "iter_tflops": 38.30948856691219, "iter_time": 0.5385374298095703, "loss": 0.5360808968544006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69809920772818, "step_time": 0.4947729969024659} +{"epoch": 0, "iter": 2680, "iter_tflops": 42.963530981304295, "iter_time": 0.4802001380920411, "loss": 0.5170051455497742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85913296104075, "step_time": 0.44027902793884277} +{"epoch": 0, "iter": 2681, "iter_tflops": 20.249481349495678, "iter_time": 1.0188455276489259, "loss": 0.9555179476737976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33711007919971, "step_time": 0.9669113311767578} +{"epoch": 0, "iter": 2682, "iter_tflops": 11.775390229650682, "iter_time": 1.7520517883300781, "loss": 1.0723536014556885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.507102036364328, "step_time": 1.5274256057739257} +{"epoch": 0, "iter": 2683, "iter_tflops": 15.005889569024932, "iter_time": 1.3748664093017577, "loss": 1.097977638244629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.26542344405748, "step_time": 1.129516300201416} +{"epoch": 0, "iter": 2684, "iter_tflops": 36.97711157528574, "iter_time": 0.557942268371582, "loss": 1.0662881135940552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.193399180462215, "step_time": 0.4565067882537842} +{"epoch": 0, "iter": 2685, "iter_tflops": 20.61680371286761, "iter_time": 0.7667304611206054, "loss": 0.3821938633918762, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 21.774940232004713, "step_time": 0.7259506225585938} +{"epoch": 0, "iter": 2686, "iter_tflops": 8.867400972197172, "iter_time": 1.7826566619873048, "loss": 0.39377015829086304, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.038126840853261, "step_time": 1.3131221847534178} +{"epoch": 0, "iter": 2687, "iter_tflops": 23.680698686987483, "iter_time": 0.6675280838012695, "loss": 0.4743364751338959, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.51053404114913, "step_time": 0.619647216796875} +{"epoch": 0, "iter": 2688, "iter_tflops": 25.050141794968365, "iter_time": 0.6310356063842774, "loss": 0.5483878254890442, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 26.89687975151192, "step_time": 0.5877087440490723} +{"epoch": 0, "iter": 2689, "iter_tflops": 16.041896129607093, "iter_time": 1.2860757446289062, "loss": 1.0874483585357666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.96274158977062, "step_time": 1.216259376525879} +{"epoch": 0, "iter": 2690, "iter_tflops": 16.64752839749372, "iter_time": 1.2392886810302732, "loss": 0.9484497308731079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.647653228929435, "step_time": 0.8724372482299805} +{"epoch": 0, "iter": 2691, "iter_tflops": 43.82561289434632, "iter_time": 0.47075424957275386, "loss": 0.8046667575836182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87538774364608, "step_time": 0.4401263542175293} +{"epoch": 0, "iter": 2692, "iter_tflops": 45.44133208827797, "iter_time": 0.45401603698730475, "loss": 0.7834228277206421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87096335230066, "step_time": 0.42215442657470703} +{"epoch": 0, "iter": 2693, "iter_tflops": 43.41457612254784, "iter_time": 0.4752112159729004, "loss": 0.7331061363220215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.322824165614726, "step_time": 0.4359649677276612} +{"epoch": 0, "iter": 2694, "iter_tflops": 47.2048547791224, "iter_time": 0.43705448532104496, "loss": 0.9665573835372925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08175420037357, "step_time": 0.403883810043335} +{"epoch": 0, "iter": 2695, "iter_tflops": 48.482012015977396, "iter_time": 0.4255411987304687, "loss": 0.61397784948349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81531880458751, "step_time": 0.3906270751953125} +{"epoch": 0, "iter": 2696, "iter_tflops": 46.58950649595765, "iter_time": 0.44282704544067386, "loss": 0.8066710233688354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48256584615928, "step_time": 0.4086775932312011} +{"epoch": 0, "iter": 2697, "iter_tflops": 29.690616334673493, "iter_time": 0.6948691558837891, "loss": 1.0368785858154297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.1069329957965, "step_time": 0.6425744094848633} +{"epoch": 0, "iter": 2698, "iter_tflops": 9.193578740161138, "iter_time": 2.244076446533203, "loss": 0.6502652764320374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.466955463961575, "step_time": 1.6548622131347654} +{"epoch": 0, "iter": 2699, "iter_tflops": 17.19273773412172, "iter_time": 1.1999888458251955, "loss": 0.9671692848205566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.668129375031796, "step_time": 0.9982080688476563} +{"epoch": 0, "iter": 2700, "iter_tflops": 37.7365342981187, "iter_time": 0.5467140502929688, "loss": 0.9112911224365234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.26554658333421, "step_time": 0.4999592933654785} +{"epoch": 0, "iter": 2701, "iter_tflops": 18.16160502758459, "iter_time": 0.9653130722045898, "loss": 0.4094420373439789, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 19.270317004186392, "step_time": 0.9097740707397461} +{"epoch": 0, "iter": 2702, "iter_tflops": 9.103149440596331, "iter_time": 1.9258867340087893, "loss": 0.48433196544647217, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 10.884816840610151, "step_time": 1.6106504135131836} +{"epoch": 0, "iter": 2703, "iter_tflops": 9.037705352821092, "iter_time": 1.9398325195312498, "loss": 0.5440446138381958, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 10.458986765890092, "step_time": 1.6762268791198731} +{"epoch": 0, "iter": 2704, "iter_tflops": 27.844942352099764, "iter_time": 0.629616485595703, "loss": 0.5046179890632629, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 29.843690802665943, "step_time": 0.5874486122131348} +{"epoch": 0, "iter": 2705, "iter_tflops": 15.30310760703145, "iter_time": 0.9928569946289062, "loss": 0.44449710845947266, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 16.48974661984847, "step_time": 0.9214087867736817} +{"epoch": 0, "iter": 2706, "iter_tflops": 21.63873237306112, "iter_time": 0.7021574630737304, "loss": 0.4155282974243164, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.64918115567636, "step_time": 0.6424661102294922} +{"epoch": 0, "iter": 2707, "iter_tflops": 26.964539984012553, "iter_time": 0.5634732666015625, "loss": 0.5193727612495422, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.756903645444936, "step_time": 0.5283530387878418} +{"epoch": 0, "iter": 2708, "iter_tflops": 27.696498951541898, "iter_time": 0.5485818786621094, "loss": 0.5346317887306213, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.410516430123938, "step_time": 0.5166110382080078} +{"epoch": 0, "iter": 2709, "iter_tflops": 24.785728208446553, "iter_time": 0.832377944946289, "loss": 1.1033005714416504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.123742515444498, "step_time": 0.7897449417114257} +{"epoch": 0, "iter": 2710, "iter_tflops": 13.621625619428208, "iter_time": 1.5145838012695312, "loss": 1.1022543907165527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.826665554211765, "step_time": 1.3035653934478761} +{"epoch": 0, "iter": 2711, "iter_tflops": 39.017705648274976, "iter_time": 0.5287623443603516, "loss": 0.9554411768913269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66474258330972, "step_time": 0.4835630607604981} +{"epoch": 0, "iter": 2712, "iter_tflops": 39.58175906971924, "iter_time": 0.5212273025512696, "loss": 0.888298511505127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17769589906049, "step_time": 0.4778183059692383} +{"epoch": 0, "iter": 2713, "iter_tflops": 27.70033236769873, "iter_time": 0.7447958831787109, "loss": 0.27660080790519714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.609790267916253, "step_time": 0.6967659454345704} +{"epoch": 0, "iter": 2714, "iter_tflops": 10.092873654108418, "iter_time": 2.044124816894531, "loss": 0.24919863045215607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.895463978358052, "step_time": 1.734366439819336} +{"epoch": 0, "iter": 2715, "iter_tflops": 12.723946136831966, "iter_time": 1.6214382934570313, "loss": 0.20951984822750092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.136147321865769, "step_time": 1.459456600189209} +{"epoch": 0, "iter": 2716, "iter_tflops": 34.9423932543016, "iter_time": 0.5904316101074218, "loss": 0.22905227541923523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.52813780389274, "step_time": 0.5354812011718749} +{"epoch": 0, "iter": 2717, "iter_tflops": 14.715779044616466, "iter_time": 1.1159672241210938, "loss": 0.4415625333786011, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.735609725573841, "step_time": 1.0436409759521483} +{"epoch": 0, "iter": 2718, "iter_tflops": 13.819269224212826, "iter_time": 1.1883643646240236, "loss": 0.3575315773487091, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 16.052519823389517, "step_time": 1.0230373344421386} +{"epoch": 0, "iter": 2719, "iter_tflops": 25.235055768262548, "iter_time": 0.6507743530273438, "loss": 0.5437477827072144, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.16384992897124, "step_time": 0.6045655212402343} +{"epoch": 0, "iter": 2720, "iter_tflops": 25.327297073352927, "iter_time": 0.6484042510986329, "loss": 0.5072650909423828, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.231508920500087, "step_time": 0.6030634269714356} +{"epoch": 0, "iter": 2721, "iter_tflops": 20.42501083550912, "iter_time": 1.0100897216796876, "loss": 0.8784989714622498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.035638684302135, "step_time": 0.9362602920532227} +{"epoch": 0, "iter": 2722, "iter_tflops": 25.229798709384884, "iter_time": 0.8177272338867188, "loss": 0.8006551861763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6470733188665, "step_time": 0.6731831550598144} +{"epoch": 0, "iter": 2723, "iter_tflops": 44.310717530740014, "iter_time": 0.465600528717041, "loss": 0.8934669494628906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52252228984092, "step_time": 0.43413296508789057} +{"epoch": 0, "iter": 2724, "iter_tflops": 45.34662878435732, "iter_time": 0.4549642181396485, "loss": 0.8469520211219788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8234474561526, "step_time": 0.4225652751922608} +{"epoch": 0, "iter": 2725, "iter_tflops": 24.823874103357603, "iter_time": 0.831098861694336, "loss": 0.013648100197315216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.035887214395142, "step_time": 0.7924098510742187} +{"epoch": 0, "iter": 2726, "iter_tflops": 15.563551613694774, "iter_time": 1.3256031799316406, "loss": 0.0041136066429317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.33202881369289, "step_time": 1.1254124526977538} +{"epoch": 0, "iter": 2727, "iter_tflops": 40.78166456222496, "iter_time": 0.5058914031982421, "loss": 0.008333112113177776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.083223308586476, "step_time": 0.39611783218383784} +{"epoch": 0, "iter": 2728, "iter_tflops": 56.09035303188983, "iter_time": 0.36781892776489256, "loss": 0.011059391312301159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.57128470052176, "step_time": 0.3350765476226807} +{"epoch": 0, "iter": 2729, "iter_tflops": 25.749026686886676, "iter_time": 0.8012378005981445, "loss": 0.36197876930236816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.091355330840024, "step_time": 0.7615378875732423} +{"epoch": 0, "iter": 2730, "iter_tflops": 13.092121005007634, "iter_time": 1.5758404235839842, "loss": 0.3419490456581116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.75988065366036, "step_time": 1.309089450836182} +{"epoch": 0, "iter": 2731, "iter_tflops": 38.55880285253369, "iter_time": 0.5350553436279297, "loss": 0.4151841104030609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1930823618967, "step_time": 0.4889686260223388} +{"epoch": 0, "iter": 2732, "iter_tflops": 40.768250207592224, "iter_time": 0.506057861328125, "loss": 0.310267835855484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.28120981761115, "step_time": 0.4659107913970947} +{"epoch": 0, "iter": 2733, "iter_tflops": 20.61579351232669, "iter_time": 1.0007421493530273, "loss": 0.2095538079738617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.97784698465701, "step_time": 0.9387222290039062} +{"epoch": 0, "iter": 2734, "iter_tflops": 20.458496331123936, "iter_time": 1.0084364547729492, "loss": 0.2960602045059204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.235719622531107, "step_time": 0.7575013179779052} +{"epoch": 0, "iter": 2735, "iter_tflops": 50.824272619563764, "iter_time": 0.405929931640625, "loss": 0.342538058757782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.98135132574331, "step_time": 0.3752380218505859} +{"epoch": 0, "iter": 2736, "iter_tflops": 50.910897424771335, "iter_time": 0.40523924255371097, "loss": 0.3213053047657013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.17678284909535, "step_time": 0.3739089603424072} +{"epoch": 0, "iter": 2737, "iter_tflops": 20.589207749045837, "iter_time": 1.0020343551635742, "loss": 0.004661664832383394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.484152986933946, "step_time": 0.9602935485839844} +{"epoch": 0, "iter": 2738, "iter_tflops": 19.389108063180032, "iter_time": 1.0640558319091795, "loss": 0.01417464017868042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.78672486706617, "step_time": 0.7166877651214599} +{"epoch": 0, "iter": 2739, "iter_tflops": 59.01625519928221, "iter_time": 0.34958323669433594, "loss": 0.0009076615679077804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.86635098057332, "step_time": 0.31805540466308596} +{"epoch": 0, "iter": 2740, "iter_tflops": 55.22430788411221, "iter_time": 0.3735871810913086, "loss": 0.007623722776770592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.40694824016047, "step_time": 0.3415351066589356} +{"epoch": 0, "iter": 2741, "iter_tflops": 26.38561490503044, "iter_time": 0.781906867980957, "loss": 0.7899184823036194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.739826900729014, "step_time": 0.743735481262207} +{"epoch": 0, "iter": 2742, "iter_tflops": 11.894327322767507, "iter_time": 1.7345321807861327, "loss": 1.061452031135559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.7428372875603, "step_time": 1.3993977622985838} +{"epoch": 0, "iter": 2743, "iter_tflops": 45.41282811014036, "iter_time": 0.45430100631713866, "loss": 0.8356299996376038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1147272281149, "step_time": 0.4200592098236084} +{"epoch": 0, "iter": 2744, "iter_tflops": 48.63208899462223, "iter_time": 0.42422799301147457, "loss": 0.916677713394165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6226139147733, "step_time": 0.3920575580596924} +{"epoch": 0, "iter": 2745, "iter_tflops": 30.291647067342964, "iter_time": 0.6810819320678712, "loss": 0.4664230942726135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.15319318132141, "step_time": 0.6416499099731445} +{"epoch": 0, "iter": 2746, "iter_tflops": 9.886498284528495, "iter_time": 2.0867948303222654, "loss": 0.7126191854476929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.061064728790205, "step_time": 1.8651996002197264} +{"epoch": 0, "iter": 2747, "iter_tflops": 11.69963524097842, "iter_time": 1.7633963012695313, "loss": 0.6757334470748901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.888756034123388, "step_time": 1.4854529418945313} +{"epoch": 0, "iter": 2748, "iter_tflops": 20.756874098735878, "iter_time": 0.9939402923583985, "loss": 0.7226216793060303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.487446531042412, "step_time": 0.7505642070770264} +{"epoch": 0, "iter": 2749, "iter_tflops": 16.85927279238924, "iter_time": 0.872135757446289, "loss": 0.6853336691856384, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 18.038667401431386, "step_time": 0.8151142387390136} +{"epoch": 0, "iter": 2750, "iter_tflops": 6.913782194335468, "iter_time": 2.1267049255371093, "loss": 0.42038607597351074, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 7.824144975154903, "step_time": 1.8792564163208008} +{"epoch": 0, "iter": 2751, "iter_tflops": 10.310438271405745, "iter_time": 1.4260862884521484, "loss": 0.4115768373012543, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 12.057377462667631, "step_time": 1.2194670600891113} +{"epoch": 0, "iter": 2752, "iter_tflops": 22.61009928645759, "iter_time": 0.6503100433349609, "loss": 0.6334517598152161, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.307344928849485, "step_time": 0.6049025382995605} +{"epoch": 0, "iter": 2753, "iter_tflops": 10.103678192303923, "iter_time": 1.4916526184082033, "loss": 0.38303449749946594, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 10.73136141984318, "step_time": 1.4044050369262697} +{"epoch": 0, "iter": 2754, "iter_tflops": 11.850300353303698, "iter_time": 1.271797134399414, "loss": 0.4223622679710388, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 16.636401052906038, "step_time": 0.9059157676696777} +{"epoch": 0, "iter": 2755, "iter_tflops": 22.124600743730507, "iter_time": 0.6811954803466796, "loss": 0.522337794303894, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 23.72925041414863, "step_time": 0.6351308097839355} +{"epoch": 0, "iter": 2756, "iter_tflops": 26.118869998592046, "iter_time": 0.5770225906372071, "loss": 0.509895920753479, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 27.842157187913635, "step_time": 0.5413078422546387} +{"epoch": 0, "iter": 2757, "iter_tflops": 30.843326346523842, "iter_time": 0.668899757385254, "loss": 0.28298258781433105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.88336361088545, "step_time": 0.6088856391906738} +{"epoch": 0, "iter": 2758, "iter_tflops": 11.316462155914452, "iter_time": 1.8231045379638673, "loss": 0.2584410607814789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.907888619275043, "step_time": 1.598332160949707} +{"epoch": 0, "iter": 2759, "iter_tflops": 15.78779810600508, "iter_time": 1.3067745971679687, "loss": 0.32207930088043213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.406563655018623, "step_time": 1.063098747253418} +{"epoch": 0, "iter": 2760, "iter_tflops": 39.59644987937988, "iter_time": 0.5210339202880859, "loss": 0.46687352657318115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.3216922099115, "step_time": 0.4762300930023194} +{"epoch": 0, "iter": 2761, "iter_tflops": 13.01717700601453, "iter_time": 1.154652374267578, "loss": 0.36787834763526917, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.827004578381457, "step_time": 1.0870260620117187} +{"epoch": 0, "iter": 2762, "iter_tflops": 18.266837823986243, "iter_time": 0.8228197174072266, "loss": 0.366540789604187, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 21.414005562737803, "step_time": 0.7018917732238769} +{"epoch": 0, "iter": 2763, "iter_tflops": 24.13206551731912, "iter_time": 0.6228358001708985, "loss": 0.48690465092658997, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 25.854652227786765, "step_time": 0.5813388710021972} +{"epoch": 0, "iter": 2764, "iter_tflops": 21.94404443272112, "iter_time": 0.6849382019042968, "loss": 0.491431325674057, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.63115799519917, "step_time": 0.6360379943847656} +{"epoch": 0, "iter": 2765, "iter_tflops": 20.767067960009392, "iter_time": 0.9934524002075196, "loss": 0.004670086316764355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.30952428627764, "step_time": 0.9247661781311035} +{"epoch": 0, "iter": 2766, "iter_tflops": 16.362566618099372, "iter_time": 1.2608714752197265, "loss": 0.0065680863335728645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.465500226829924, "step_time": 1.059879955291748} +{"epoch": 0, "iter": 2767, "iter_tflops": 45.43313090166091, "iter_time": 0.4540979919433594, "loss": 0.0092084975913167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36719260498773, "step_time": 0.40961372756958003} +{"epoch": 0, "iter": 2768, "iter_tflops": 50.243966006630174, "iter_time": 0.4106183319091797, "loss": 0.01893071085214615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.64461652075148, "step_time": 0.37076531028747556} +{"epoch": 0, "iter": 2769, "iter_tflops": 20.087003477236546, "iter_time": 1.027086669921875, "loss": 0.11194657534360886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.423918914857854, "step_time": 0.9629934463500976} +{"epoch": 0, "iter": 2770, "iter_tflops": 25.900717519469058, "iter_time": 0.7965452499389649, "loss": 0.07909287512302399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.599309627440974, "step_time": 0.6970126590728759} +{"epoch": 0, "iter": 2771, "iter_tflops": 42.941151582049514, "iter_time": 0.48045040130615235, "loss": 0.12921757996082306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32951594463191, "step_time": 0.4359033279418945} +{"epoch": 0, "iter": 2772, "iter_tflops": 41.35429618069371, "iter_time": 0.49888634109497076, "loss": 0.13308043777942657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.324213341049536, "step_time": 0.4551892242431641} +{"epoch": 0, "iter": 2773, "iter_tflops": 24.40898332694918, "iter_time": 0.8452254333496093, "loss": 0.30558258295059204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.457639529824025, "step_time": 0.7797783126831054} +{"epoch": 0, "iter": 2774, "iter_tflops": 17.08518461607822, "iter_time": 1.2075429077148436, "loss": 0.2632857859134674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.670507467691074, "step_time": 0.9980932273864747} +{"epoch": 0, "iter": 2775, "iter_tflops": 43.84310495901831, "iter_time": 0.47056643295288086, "loss": 0.23757024109363556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9938497034798, "step_time": 0.42986952781677246} +{"epoch": 0, "iter": 2776, "iter_tflops": 43.53934260515983, "iter_time": 0.4738494491577148, "loss": 0.1768464893102646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.40430534437445, "step_time": 0.4352156066894531} +{"epoch": 0, "iter": 2777, "iter_tflops": 32.560718546898435, "iter_time": 0.6336191101074218, "loss": 0.14455553889274597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.177678166374804, "step_time": 0.5864825248718262} +{"epoch": 0, "iter": 2778, "iter_tflops": 10.250285333008723, "iter_time": 2.0127335815429688, "loss": 0.061363525688648224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.825085813030256, "step_time": 1.3916340026855467} +{"epoch": 0, "iter": 2779, "iter_tflops": 12.093374479765401, "iter_time": 1.705983184814453, "loss": 0.12488431483507156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.803478918655726, "step_time": 1.3936652069091795} +{"epoch": 0, "iter": 2780, "iter_tflops": 44.777762716682226, "iter_time": 0.46074417877197266, "loss": 0.13507679104804993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.29884194284187, "step_time": 0.4184904289245605} +{"epoch": 0, "iter": 2781, "iter_tflops": 14.972032728988454, "iter_time": 1.1242815856933592, "loss": 0.3669324219226837, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 15.886495723250698, "step_time": 1.0595653686523439} +{"epoch": 0, "iter": 2782, "iter_tflops": 16.631358441388453, "iter_time": 1.0121109924316407, "loss": 0.4748555123806, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 20.044373515100833, "step_time": 0.8397758445739746} +{"epoch": 0, "iter": 2783, "iter_tflops": 29.925370457432045, "iter_time": 0.5624919738769532, "loss": 0.6312570571899414, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 31.92842137304499, "step_time": 0.5272036628723145} +{"epoch": 0, "iter": 2784, "iter_tflops": 27.819836704284413, "iter_time": 0.60506396484375, "loss": 0.36678817868232727, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 29.449097957187373, "step_time": 0.5715890083312989} +{"epoch": 0, "iter": 2785, "iter_tflops": 39.944632197578834, "iter_time": 0.5164922637939453, "loss": 0.33728891611099243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61100806528255, "step_time": 0.4730707778930664} +{"epoch": 0, "iter": 2786, "iter_tflops": 46.337293278320885, "iter_time": 0.4452373466491699, "loss": 0.26138466596603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.894462405860274, "step_time": 0.40537010383605954} +{"epoch": 0, "iter": 2787, "iter_tflops": 45.830428865808294, "iter_time": 0.45016147613525387, "loss": 0.4301320016384125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42377324858377, "step_time": 0.41743258666992183} +{"epoch": 0, "iter": 2788, "iter_tflops": 50.81041546306951, "iter_time": 0.4060406379699707, "loss": 0.30949389934539795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.683055917349456, "step_time": 0.3772849407196045} +{"epoch": 0, "iter": 2789, "iter_tflops": 25.65368506900271, "iter_time": 0.804215591430664, "loss": 0.9895336627960205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.16307155007796, "step_time": 0.7595272674560546} +{"epoch": 0, "iter": 2790, "iter_tflops": 20.09173007210587, "iter_time": 1.0268450469970705, "loss": 0.9051242470741272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.619406570206216, "step_time": 0.8380012512207031} +{"epoch": 0, "iter": 2791, "iter_tflops": 44.24235404072916, "iter_time": 0.46631997680664067, "loss": 0.9764167070388794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.558510437409296, "step_time": 0.4338044509887696} +{"epoch": 0, "iter": 2792, "iter_tflops": 49.51278159632941, "iter_time": 0.41668217468261726, "loss": 0.7597858309745789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.4869216265349, "step_time": 0.38572220802307133} +{"epoch": 0, "iter": 2793, "iter_tflops": 28.67206546264413, "iter_time": 0.7195537948608399, "loss": 0.011872295290231705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.40516426327343, "step_time": 0.6785391235351562} +{"epoch": 0, "iter": 2794, "iter_tflops": 16.65951894840442, "iter_time": 1.2383967132568356, "loss": 0.009445004165172577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.560495865131518, "step_time": 1.0034336547851561} +{"epoch": 0, "iter": 2795, "iter_tflops": 51.054609245337566, "iter_time": 0.40409854888916014, "loss": 0.0330965593457222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.937798182508764, "step_time": 0.36882205200195317} +{"epoch": 0, "iter": 2796, "iter_tflops": 53.66402553340073, "iter_time": 0.38444923400878905, "loss": 0.035806458443403244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.89980260873449, "step_time": 0.3502744083404541} +{"epoch": 0, "iter": 2797, "iter_tflops": 31.102376269138496, "iter_time": 0.6633285293579103, "loss": 0.7822363972663879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00150649091526, "step_time": 0.6251561126708984} +{"epoch": 0, "iter": 2798, "iter_tflops": 19.086770464241688, "iter_time": 1.0809106521606444, "loss": 0.9435408711433411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.958940193281205, "step_time": 0.7379068508148193} +{"epoch": 0, "iter": 2799, "iter_tflops": 47.00043887493802, "iter_time": 0.438955337524414, "loss": 1.0994558334350586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.88913526214631, "step_time": 0.4054125385284424} +{"epoch": 0, "iter": 2800, "iter_tflops": 50.09229646737393, "iter_time": 0.4118616027832031, "loss": 0.9638447761535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.821038939288506, "step_time": 0.38332767105102533} +{"epoch": 0, "iter": 2801, "iter_tflops": 31.570681493511902, "iter_time": 0.653489013671875, "loss": 0.84627366065979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.544689964670184, "step_time": 0.6150330657958983} +{"epoch": 0, "iter": 2802, "iter_tflops": 14.77177052060408, "iter_time": 1.3966567840576172, "loss": 0.8912489414215088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.296734660314772, "step_time": 1.1927738914489745} +{"epoch": 0, "iter": 2803, "iter_tflops": 39.99376596133619, "iter_time": 0.5158577346801758, "loss": 0.8150864243507385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45220769332665, "step_time": 0.47479966163635257} +{"epoch": 0, "iter": 2804, "iter_tflops": 46.57847502722237, "iter_time": 0.4429319229125976, "loss": 0.7885277271270752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93292758984478, "step_time": 0.41317612457275393} +{"epoch": 0, "iter": 2805, "iter_tflops": 22.148315110204997, "iter_time": 0.9314972000122069, "loss": 0.14752265810966492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.045003864171473, "step_time": 0.8952523345947266} +{"epoch": 0, "iter": 2806, "iter_tflops": 17.01735134681408, "iter_time": 1.2123563232421877, "loss": 0.2244010716676712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.355883355585892, "step_time": 1.0135199317932129} +{"epoch": 0, "iter": 2807, "iter_tflops": 50.90848980208031, "iter_time": 0.4052584075927734, "loss": 0.12678059935569763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.19752289037676, "step_time": 0.3737684669494629} +{"epoch": 0, "iter": 2808, "iter_tflops": 47.72166448800691, "iter_time": 0.4323213310241699, "loss": 0.17526875436306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59704448258272, "step_time": 0.3998502960205078} +{"epoch": 0, "iter": 2809, "iter_tflops": 21.271656286644074, "iter_time": 0.9698865585327148, "loss": 0.05476371571421623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.25513715011114, "step_time": 0.927026123046875} +{"epoch": 0, "iter": 2810, "iter_tflops": 21.477523443132917, "iter_time": 0.9605899658203125, "loss": 0.04402398318052292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.720320762101604, "step_time": 0.8021320457458496} +{"epoch": 0, "iter": 2811, "iter_tflops": 56.81998505855715, "iter_time": 0.3630957221984863, "loss": 0.032833196222782135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.0700676587523, "step_time": 0.3323839378356934} +{"epoch": 0, "iter": 2812, "iter_tflops": 52.68897296515974, "iter_time": 0.3915637817382812, "loss": 0.03802352398633957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.561355823969784, "step_time": 0.35841917228698733} +{"epoch": 0, "iter": 2813, "iter_tflops": 23.490430793377453, "iter_time": 0.8623833770751952, "loss": 0.12080703675746918, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 24.679878002062537, "step_time": 0.8208207931518555} +{"epoch": 0, "iter": 2814, "iter_tflops": 13.18263700376175, "iter_time": 1.536699905395508, "loss": 0.12516579031944275, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 15.76346786671476, "step_time": 1.2851078968048095} +{"epoch": 0, "iter": 2815, "iter_tflops": 49.23028530418002, "iter_time": 0.41148973464965816, "loss": 0.17990025877952576, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 53.79336490579038, "step_time": 0.3765846786499023} +{"epoch": 0, "iter": 2816, "iter_tflops": 54.35337203512907, "iter_time": 0.37270469665527345, "loss": 0.1877221167087555, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 58.972071209665124, "step_time": 0.34351442337036137} +{"epoch": 0, "iter": 2817, "iter_tflops": 29.855825016110003, "iter_time": 0.6910240631103515, "loss": 0.1643299162387848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.678699563560677, "step_time": 0.6512607460021973} +{"epoch": 0, "iter": 2818, "iter_tflops": 12.736207885149033, "iter_time": 1.6198772583007812, "loss": 0.11235073953866959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.388681664340401, "step_time": 1.3406667289733887} +{"epoch": 0, "iter": 2819, "iter_tflops": 39.567468789465494, "iter_time": 0.5214155502319335, "loss": 0.1308313012123108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6908641654702, "step_time": 0.4722061214447022} +{"epoch": 0, "iter": 2820, "iter_tflops": 41.19820897277541, "iter_time": 0.5007764663696289, "loss": 0.17308560013771057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2275426723391, "step_time": 0.45616215896606443} +{"epoch": 0, "iter": 2821, "iter_tflops": 17.85571854901797, "iter_time": 1.155433395385742, "loss": 0.04309415444731712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.95742847872126, "step_time": 1.0882854461669922} +{"epoch": 0, "iter": 2822, "iter_tflops": 18.80896759894453, "iter_time": 1.0968753814697265, "loss": 0.04419223591685295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.15100548765501, "step_time": 0.8911532382965088} +{"epoch": 0, "iter": 2823, "iter_tflops": 56.27706655277047, "iter_time": 0.36659859466552736, "loss": 0.0616670586168766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.75380784139975, "step_time": 0.33408617591857914} +{"epoch": 0, "iter": 2824, "iter_tflops": 57.44447519737298, "iter_time": 0.3591484375, "loss": 0.03159922733902931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.82429814503257, "step_time": 0.32839353752136236} +{"epoch": 0, "iter": 2825, "iter_tflops": 36.50390205111267, "iter_time": 0.5651750183105468, "loss": 0.5791358947753906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.3766816989918, "step_time": 0.5239419021606446} +{"epoch": 0, "iter": 2826, "iter_tflops": 17.97142634177632, "iter_time": 1.1479942169189452, "loss": 0.5634340643882751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.67671389176779, "step_time": 0.9517629661560059} +{"epoch": 0, "iter": 2827, "iter_tflops": 35.85550502069725, "iter_time": 0.5753954238891602, "loss": 0.5206077694892883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.326846053094634, "step_time": 0.5246058502197266} +{"epoch": 0, "iter": 2828, "iter_tflops": 37.940262753474144, "iter_time": 0.5437783508300781, "loss": 0.5712088942527771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08810052017454, "step_time": 0.5021184539794922} +{"epoch": 0, "iter": 2829, "iter_tflops": 24.823804372155397, "iter_time": 0.8311011962890624, "loss": 0.43093299865722656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.454563041722075, "step_time": 0.7798689956665039} +{"epoch": 0, "iter": 2830, "iter_tflops": 14.775874387617723, "iter_time": 1.3962688751220704, "loss": 0.4475662112236023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.223870225047147, "step_time": 1.0732018718719483} +{"epoch": 0, "iter": 2831, "iter_tflops": 40.01989464850558, "iter_time": 0.5155209350585938, "loss": 0.40832188725471497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03979627792315, "step_time": 0.46846478080749515} +{"epoch": 0, "iter": 2832, "iter_tflops": 42.522515086553454, "iter_time": 0.48518046188354486, "loss": 0.4679788649082184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24492787339613, "step_time": 0.4461266231536865} +{"epoch": 0, "iter": 2833, "iter_tflops": 24.750320745073424, "iter_time": 0.8335687332153321, "loss": 0.004192732274532318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.359930419731775, "step_time": 0.7826687393188476} +{"epoch": 0, "iter": 2834, "iter_tflops": 8.011634962368133, "iter_time": 2.5751414794921876, "loss": 0.012636963278055191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.848362611724403, "step_time": 2.0948754959106446} +{"epoch": 0, "iter": 2835, "iter_tflops": 12.087215804222891, "iter_time": 1.7068524169921875, "loss": 0.0061250184662640095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.123430478641996, "step_time": 1.3641808013916015} +{"epoch": 0, "iter": 2836, "iter_tflops": 43.308328782527624, "iter_time": 0.4763770408630371, "loss": 0.004693188704550266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12399359973489, "step_time": 0.42870701217651364} +{"epoch": 0, "iter": 2837, "iter_tflops": 16.818368077252867, "iter_time": 1.0375171813964843, "loss": 0.4280685782432556, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 17.877932977233737, "step_time": 0.9760270309448241} +{"epoch": 0, "iter": 2838, "iter_tflops": 14.387975236171854, "iter_time": 1.2127728576660155, "loss": 0.47317755222320557, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 20.012899618429273, "step_time": 0.871904930114746} +{"epoch": 0, "iter": 2839, "iter_tflops": 25.13051364389542, "iter_time": 0.6943489532470704, "loss": 0.6098014712333679, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 26.931529625529667, "step_time": 0.6479151420593261} +{"epoch": 0, "iter": 2840, "iter_tflops": 25.34680072935435, "iter_time": 0.6884239959716796, "loss": 0.5051108598709106, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 27.154167749668343, "step_time": 0.6426028594970703} +{"epoch": 0, "iter": 2841, "iter_tflops": 26.398219511016862, "iter_time": 0.7815335235595704, "loss": 0.0500057227909565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.454889906179616, "step_time": 0.7250456275939942} +{"epoch": 0, "iter": 2842, "iter_tflops": 9.82433709583943, "iter_time": 2.09999853515625, "loss": 0.13640347123146057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.948877087322911, "step_time": 1.8843113632202146} +{"epoch": 0, "iter": 2843, "iter_tflops": 13.721631613295456, "iter_time": 1.503545211791992, "loss": 0.08427125215530396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.20332391839997, "step_time": 1.2732630424499511} +{"epoch": 0, "iter": 2844, "iter_tflops": 38.69328826976768, "iter_time": 0.5331956634521484, "loss": 0.10913489013910294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.509008422642076, "step_time": 0.48533462142944334} +{"epoch": 0, "iter": 2845, "iter_tflops": 16.759716044496674, "iter_time": 0.9725273971557616, "loss": 0.4988371431827545, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 17.80880996691106, "step_time": 0.9152370681762695} +{"epoch": 0, "iter": 2846, "iter_tflops": 10.073744455846565, "iter_time": 1.6179964752197267, "loss": 0.5684435367584229, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 11.698430617547482, "step_time": 1.39328800201416} +{"epoch": 0, "iter": 2847, "iter_tflops": 29.372575163300116, "iter_time": 0.5549150161743164, "loss": 0.35839852690696716, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 31.34170536162119, "step_time": 0.5200509300231934} +{"epoch": 0, "iter": 2848, "iter_tflops": 27.856358138604804, "iter_time": 0.5851189498901367, "loss": 0.26610979437828064, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.45528009456934, "step_time": 0.5533569183349609} +{"epoch": 0, "iter": 2849, "iter_tflops": 28.40619313381922, "iter_time": 0.72628857421875, "loss": 1.0086536407470703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.12031986906801, "step_time": 0.6849559898376465} +{"epoch": 0, "iter": 2850, "iter_tflops": 15.924276315687292, "iter_time": 1.295574951171875, "loss": 0.8671241402626038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.72601993359665, "step_time": 1.1017340354919434} +{"epoch": 0, "iter": 2851, "iter_tflops": 42.47175087240197, "iter_time": 0.4857603721618653, "loss": 0.9263570308685303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.007538805838244, "step_time": 0.4484285411834717} +{"epoch": 0, "iter": 2852, "iter_tflops": 44.84041386801431, "iter_time": 0.4601004257202148, "loss": 1.114150047302246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.1645224295906, "step_time": 0.4283462696075439} +{"epoch": 0, "iter": 2853, "iter_tflops": 26.968722252459813, "iter_time": 0.7650007781982422, "loss": 0.023280993103981018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.591305854590647, "step_time": 0.7215862617492674} +{"epoch": 0, "iter": 2854, "iter_tflops": 17.584677469260782, "iter_time": 1.1732426452636717, "loss": 0.011319335550069809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.51874134433539, "step_time": 0.9587500114440919} +{"epoch": 0, "iter": 2855, "iter_tflops": 52.51430522658629, "iter_time": 0.3928661613464355, "loss": 0.003614816814661026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.34545369696273, "step_time": 0.3597685985565186} +{"epoch": 0, "iter": 2856, "iter_tflops": 60.7858597135829, "iter_time": 0.3394061317443848, "loss": 0.019401894882321358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.13287318555217, "step_time": 0.30731730270385743} +{"epoch": 0, "iter": 2857, "iter_tflops": 25.972546483342967, "iter_time": 0.7943423461914062, "loss": 0.188742995262146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.376392500507258, "step_time": 0.7536089172363282} +{"epoch": 0, "iter": 2858, "iter_tflops": 16.342357721059454, "iter_time": 1.2624306640624998, "loss": 0.19945533573627472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.79364483243402, "step_time": 1.0423089675903319} +{"epoch": 0, "iter": 2859, "iter_tflops": 41.75596197468161, "iter_time": 0.4940873718261719, "loss": 0.25516414642333984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.99047501251199, "step_time": 0.44859492111206056} +{"epoch": 0, "iter": 2860, "iter_tflops": 41.05236524695827, "iter_time": 0.5025555381774902, "loss": 0.2161208838224411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82402451524045, "step_time": 0.4602686557769775} +{"epoch": 0, "iter": 2861, "iter_tflops": 28.09071847749922, "iter_time": 0.7344452056884767, "loss": 0.1861933469772339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.118909846562968, "step_time": 0.6849880561828613} +{"epoch": 0, "iter": 2862, "iter_tflops": 12.114225499424865, "iter_time": 1.7030468444824218, "loss": 0.09346844255924225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.77013926070263, "step_time": 1.39681103515625} +{"epoch": 0, "iter": 2863, "iter_tflops": 18.121468881893353, "iter_time": 1.138489028930664, "loss": 0.1544647514820099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.252896056358836, "step_time": 0.9707426910400391} +{"epoch": 0, "iter": 2864, "iter_tflops": 24.23957427193684, "iter_time": 0.851132667541504, "loss": 0.14740914106369019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.967506083767006, "step_time": 0.7122150402069093} +{"epoch": 0, "iter": 2865, "iter_tflops": 18.026837196763154, "iter_time": 0.8201788864135742, "loss": 0.56370609998703, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 19.022228076077525, "step_time": 0.7772607498168946} +{"epoch": 0, "iter": 2866, "iter_tflops": 9.715608388189224, "iter_time": 1.5218018951416015, "loss": 0.4165346622467041, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 11.296113580601627, "step_time": 1.3088777084350585} +{"epoch": 0, "iter": 2867, "iter_tflops": 22.820423791746073, "iter_time": 0.6478946838378907, "loss": 0.46867501735687256, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.56888983145635, "step_time": 0.6017867050170899} +{"epoch": 0, "iter": 2868, "iter_tflops": 22.846239622090028, "iter_time": 0.6471625747680665, "loss": 0.2655279338359833, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.515711229584568, "step_time": 0.6030920791625977} +{"epoch": 0, "iter": 2869, "iter_tflops": 18.729128712647263, "iter_time": 1.1015511627197265, "loss": 0.4708390235900879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.002266311377998, "step_time": 1.0314377975463866} +{"epoch": 0, "iter": 2870, "iter_tflops": 29.96286826305601, "iter_time": 0.6885553588867188, "loss": 0.3708309829235077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38099175216844, "step_time": 0.5670844173431396} +{"epoch": 0, "iter": 2871, "iter_tflops": 48.54916556595755, "iter_time": 0.4249525871276856, "loss": 0.3355664312839508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.507232205269396, "step_time": 0.3929190826416016} +{"epoch": 0, "iter": 2872, "iter_tflops": 46.972053868392294, "iter_time": 0.4392205963134766, "loss": 0.41035088896751404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96765266063138, "step_time": 0.4047879867553711} +{"epoch": 0, "iter": 2873, "iter_tflops": 45.380668621611704, "iter_time": 0.45462295150756843, "loss": 0.04384218901395798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.76276778407972, "step_time": 0.4145889472961426} +{"epoch": 0, "iter": 2874, "iter_tflops": 51.1362127508019, "iter_time": 0.4034536857604981, "loss": 0.056040845811367035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.886868521949715, "step_time": 0.36915815925598144} +{"epoch": 0, "iter": 2875, "iter_tflops": 52.24099789522273, "iter_time": 0.39492150497436523, "loss": 0.026932325214147568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.84939602557315, "step_time": 0.36290787506103517} +{"epoch": 0, "iter": 2876, "iter_tflops": 49.323356881664076, "iter_time": 0.41828242874145505, "loss": 0.04235656559467316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.61367719165428, "step_time": 0.3848102684020996} +{"epoch": 0, "iter": 2877, "iter_tflops": 27.42586605337478, "iter_time": 0.7522494812011719, "loss": 1.0369199514389038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.800410379224886, "step_time": 0.7163472061157227} +{"epoch": 0, "iter": 2878, "iter_tflops": 13.350648637865937, "iter_time": 1.545325180053711, "loss": 0.9697859287261963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10256276863766, "step_time": 1.2063159065246583} +{"epoch": 0, "iter": 2879, "iter_tflops": 37.97283535258974, "iter_time": 0.5433119049072266, "loss": 0.857996940612793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.1995308672889, "step_time": 0.5007603988647461} +{"epoch": 0, "iter": 2880, "iter_tflops": 35.12289236776533, "iter_time": 0.5873973388671876, "loss": 0.940467894077301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.015139239098744, "step_time": 0.5427072982788087} +{"epoch": 0, "iter": 2881, "iter_tflops": 33.1317977437344, "iter_time": 0.6226976776123047, "loss": 0.13793818652629852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.99299597142138, "step_time": 0.557702693939209} +{"epoch": 0, "iter": 2882, "iter_tflops": 42.91782550459383, "iter_time": 0.48071152877807616, "loss": 0.06673292070627213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.373590970191366, "step_time": 0.4354977760314941} +{"epoch": 0, "iter": 2883, "iter_tflops": 45.83905230204562, "iter_time": 0.450076789855957, "loss": 0.12065412849187851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.489501459417966, "step_time": 0.4086214542388916} +{"epoch": 0, "iter": 2884, "iter_tflops": 44.54156299397068, "iter_time": 0.46318746185302734, "loss": 0.07538877427577972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.124036687082, "step_time": 0.4199796047210694} +{"epoch": 0, "iter": 2885, "iter_tflops": 22.30521923429501, "iter_time": 0.9249446640014649, "loss": 0.7532150149345398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.711680228048042, "step_time": 0.8700814666748047} +{"epoch": 0, "iter": 2886, "iter_tflops": 11.026860706828554, "iter_time": 1.8709852294921876, "loss": 0.8849668502807617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.462122491159823, "step_time": 1.6555039901733397} +{"epoch": 0, "iter": 2887, "iter_tflops": 10.808372920452971, "iter_time": 1.9088065948486328, "loss": 1.0706651210784912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.070191761781228, "step_time": 1.7092597961425782} +{"epoch": 0, "iter": 2888, "iter_tflops": 37.2711891547755, "iter_time": 0.5535399856567382, "loss": 1.1065667867660522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.67538243876929, "step_time": 0.507213264465332} +{"epoch": 0, "iter": 2889, "iter_tflops": 16.37278499903016, "iter_time": 1.073292434692383, "loss": 0.5462307929992676, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 17.680161095439225, "step_time": 0.9939268188476563} +{"epoch": 0, "iter": 2890, "iter_tflops": 13.291054492553528, "iter_time": 1.322151397705078, "loss": 0.5487629771232605, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 16.773348156420397, "step_time": 1.0476612129211427} +{"epoch": 0, "iter": 2891, "iter_tflops": 31.315557876218527, "iter_time": 0.5611519470214843, "loss": 0.4279188811779022, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 33.59750236584016, "step_time": 0.5230384712219238} +{"epoch": 0, "iter": 2892, "iter_tflops": 33.648318392056325, "iter_time": 0.5222485733032227, "loss": 0.43224719166755676, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 35.64003534890725, "step_time": 0.4930630989074707} +{"epoch": 0, "iter": 2893, "iter_tflops": 14.23183145658396, "iter_time": 0.8189442596435547, "loss": 0.04754951596260071, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 14.930531073828867, "step_time": 0.7806203689575196} +{"epoch": 0, "iter": 2894, "iter_tflops": 11.240382358854355, "iter_time": 1.0368932571411134, "loss": 0.0377904549241066, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 14.687220039823165, "step_time": 0.7935522613525391} +{"epoch": 0, "iter": 2895, "iter_tflops": 31.947974624390575, "iter_time": 0.3648142585754395, "loss": 0.0712963417172432, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 35.00100049275165, "step_time": 0.33299267196655274} +{"epoch": 0, "iter": 2896, "iter_tflops": 30.207265384863927, "iter_time": 0.38583686828613284, "loss": 0.03343644365668297, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 32.929305885914715, "step_time": 0.35394237327575684} +{"epoch": 0, "iter": 2897, "iter_tflops": 26.89520836930559, "iter_time": 0.7670917892456055, "loss": 0.8539607524871826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.302528197503168, "step_time": 0.7289487838745117} +{"epoch": 0, "iter": 2898, "iter_tflops": 15.057349592038792, "iter_time": 1.3701676635742188, "loss": 0.8107409477233887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.95826239971273, "step_time": 1.0882375755310059} +{"epoch": 0, "iter": 2899, "iter_tflops": 35.09145509315956, "iter_time": 0.5879235687255859, "loss": 0.9193890690803528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25019122027079, "step_time": 0.5393722972869873} +{"epoch": 0, "iter": 2900, "iter_tflops": 35.730993182606746, "iter_time": 0.577400505065918, "loss": 1.1298662424087524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.036145830664346, "step_time": 0.5285125637054444} +{"epoch": 0, "iter": 2901, "iter_tflops": 21.73011000448909, "iter_time": 0.9494242553710937, "loss": 0.16850604116916656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.909176094261078, "step_time": 0.9005602569580079} +{"epoch": 0, "iter": 2902, "iter_tflops": 17.796702510361865, "iter_time": 1.159264953613281, "loss": 0.22650039196014404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.988404512306715, "step_time": 1.032153091430664} +{"epoch": 0, "iter": 2903, "iter_tflops": 50.771139314234915, "iter_time": 0.4063547477722168, "loss": 0.17047595977783203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.20494149470071, "step_time": 0.37371823883056643} +{"epoch": 0, "iter": 2904, "iter_tflops": 51.921815529477705, "iter_time": 0.39734923171997066, "loss": 0.16732311248779297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.34713664050457, "step_time": 0.3661427135467529} +{"epoch": 0, "iter": 2905, "iter_tflops": 37.64171941851849, "iter_time": 0.5480911560058594, "loss": 0.748496949672699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.42919151171051, "step_time": 0.5103019065856934} +{"epoch": 0, "iter": 2906, "iter_tflops": 16.808366359223356, "iter_time": 1.2274300231933595, "loss": 0.813648521900177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.012901298626733, "step_time": 1.0851102199554443} +{"epoch": 0, "iter": 2907, "iter_tflops": 39.50298878028437, "iter_time": 0.5222666473388673, "loss": 0.6468536257743835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.18667838406442, "step_time": 0.47771892356872564} +{"epoch": 0, "iter": 2908, "iter_tflops": 40.47929808707389, "iter_time": 0.509670238494873, "loss": 0.6152210235595703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87947187715194, "step_time": 0.47017643165588385} +{"epoch": 0, "iter": 2909, "iter_tflops": 18.972490090448673, "iter_time": 1.0874214935302735, "loss": 1.2962692975997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.682434934492463, "step_time": 0.9975176315307618} +{"epoch": 0, "iter": 2910, "iter_tflops": 16.813772460592208, "iter_time": 1.2270353698730467, "loss": 1.030382513999939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95943531651291, "step_time": 0.984334415435791} +{"epoch": 0, "iter": 2911, "iter_tflops": 38.686216621796426, "iter_time": 0.5332931289672851, "loss": 0.9490324258804321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.090426762100414, "step_time": 0.4901611862182617} +{"epoch": 0, "iter": 2912, "iter_tflops": 34.298666889581334, "iter_time": 0.6015129852294921, "loss": 0.7777646780014038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.975834142399044, "step_time": 0.5579615440368653} +{"epoch": 0, "iter": 2913, "iter_tflops": 15.817052738198122, "iter_time": 1.3043576354980468, "loss": 0.14272192120552063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.890585619276095, "step_time": 1.221455192565918} +{"epoch": 0, "iter": 2914, "iter_tflops": 18.63234826656152, "iter_time": 1.1072728576660156, "loss": 0.12439369410276413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.53134367291103, "step_time": 0.8410095176696778} +{"epoch": 0, "iter": 2915, "iter_tflops": 42.337240649112616, "iter_time": 0.48730368804931634, "loss": 0.23023438453674316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.327755538970145, "step_time": 0.4453290100097656} +{"epoch": 0, "iter": 2916, "iter_tflops": 43.57398309805078, "iter_time": 0.47347274780273435, "loss": 0.1474425047636032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82522089954635, "step_time": 0.4313852214813232} +{"epoch": 0, "iter": 2917, "iter_tflops": 21.83410748945951, "iter_time": 0.9449020767211914, "loss": 0.29660549759864807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.92175897665502, "step_time": 0.9000658950805663} +{"epoch": 0, "iter": 2918, "iter_tflops": 14.883332465712362, "iter_time": 1.3861877746582032, "loss": 0.3329094350337982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.35525202059353, "step_time": 1.1887521705627442} +{"epoch": 0, "iter": 2919, "iter_tflops": 38.85403876404905, "iter_time": 0.5309896774291992, "loss": 0.3821215033531189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.62133553480621, "step_time": 0.48405553817749025} +{"epoch": 0, "iter": 2920, "iter_tflops": 45.63330990507952, "iter_time": 0.4521060066223144, "loss": 0.4436335563659668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.716890069120495, "step_time": 0.4149715213775635} +{"epoch": 0, "iter": 2921, "iter_tflops": 22.35953714738134, "iter_time": 0.9226977005004883, "loss": 0.7069206833839417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.79297623428452, "step_time": 0.8671085662841796} +{"epoch": 0, "iter": 2922, "iter_tflops": 9.784970269409131, "iter_time": 2.108447235107422, "loss": 0.9046001434326172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.814455953381183, "step_time": 1.609985908508301} +{"epoch": 0, "iter": 2923, "iter_tflops": 11.579406479429895, "iter_time": 1.7817056121826174, "loss": 0.7671791315078735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.021182962711347, "step_time": 1.4714231719970703} +{"epoch": 0, "iter": 2924, "iter_tflops": 27.030635693870938, "iter_time": 0.7632485504150391, "loss": 0.9891307353973389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.164262406447804, "step_time": 0.6038793773651122} +{"epoch": 0, "iter": 2925, "iter_tflops": 13.264992051920517, "iter_time": 1.210202178955078, "loss": 0.4910052716732025, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.18295045493388, "step_time": 1.1318746643066406} +{"epoch": 0, "iter": 2926, "iter_tflops": 13.855683154009924, "iter_time": 1.1586092224121094, "loss": 0.5723972320556641, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 15.525643539492545, "step_time": 1.0339875602722168} +{"epoch": 0, "iter": 2927, "iter_tflops": 26.645665513789478, "iter_time": 0.6024740600585937, "loss": 0.39896872639656067, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.506252392013955, "step_time": 0.5631509208679198} +{"epoch": 0, "iter": 2928, "iter_tflops": 26.3918912104207, "iter_time": 0.6082672195434571, "loss": 0.40076595544815063, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.291673849267568, "step_time": 0.5674221458435058} +{"epoch": 0, "iter": 2929, "iter_tflops": 19.792070507057403, "iter_time": 1.0423918762207032, "loss": 0.5833261609077454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.835350267156045, "step_time": 0.9901966247558593} +{"epoch": 0, "iter": 2930, "iter_tflops": 20.029178286086584, "iter_time": 1.0300519180297851, "loss": 0.4257272183895111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.31025196727342, "step_time": 0.7554340229034423} +{"epoch": 0, "iter": 2931, "iter_tflops": 53.01082261159598, "iter_time": 0.3891864433288574, "loss": 0.5068553686141968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.61864151465653, "step_time": 0.3580628242492675} +{"epoch": 0, "iter": 2932, "iter_tflops": 43.70618865959969, "iter_time": 0.4720405540466308, "loss": 0.3673437237739563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23672985512696, "step_time": 0.43675956344604494} +{"epoch": 0, "iter": 2933, "iter_tflops": 27.31301047345614, "iter_time": 0.7553577270507812, "loss": 0.09968363493680954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.745287905024295, "step_time": 0.7177208862304687} +{"epoch": 0, "iter": 2934, "iter_tflops": 13.31127608700273, "iter_time": 1.54989599609375, "loss": 0.08266665041446686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.902863690020745, "step_time": 1.2205679397583007} +{"epoch": 0, "iter": 2935, "iter_tflops": 42.65900314507439, "iter_time": 0.48362812042236325, "loss": 0.12871001660823822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90221770316573, "step_time": 0.439874584197998} +{"epoch": 0, "iter": 2936, "iter_tflops": 43.52455641758996, "iter_time": 0.47401042556762696, "loss": 0.0620604082942009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69698908397629, "step_time": 0.43254498672485353} +{"epoch": 0, "iter": 2937, "iter_tflops": 21.775587255995937, "iter_time": 0.9474414291381836, "loss": 0.1333930790424347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.05100988494091, "step_time": 0.8950190734863283} +{"epoch": 0, "iter": 2938, "iter_tflops": 10.166915497954554, "iter_time": 2.0292382202148436, "loss": 0.08294960111379623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.34600991843056, "step_time": 1.6710737838745118} +{"epoch": 0, "iter": 2939, "iter_tflops": 12.208538313881446, "iter_time": 1.6898905487060547, "loss": 0.08255114406347275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.872144576247635, "step_time": 1.3872305641174316} +{"epoch": 0, "iter": 2940, "iter_tflops": 36.467586392853896, "iter_time": 0.5657378387451172, "loss": 0.11272723227739334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09481503656558, "step_time": 0.4575047817230225} +{"epoch": 0, "iter": 2941, "iter_tflops": 17.127499865924, "iter_time": 1.0235956726074218, "loss": 0.3748525381088257, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 18.254639322627508, "step_time": 0.9603933792114258} +{"epoch": 0, "iter": 2942, "iter_tflops": 13.613589122245632, "iter_time": 1.2878040161132813, "loss": 0.45556002855300903, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 15.05912652940515, "step_time": 1.164186695098877} +{"epoch": 0, "iter": 2943, "iter_tflops": 28.512539617562645, "iter_time": 0.6148745422363282, "loss": 0.5081459879875183, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 30.582209759125004, "step_time": 0.5732625236511231} +{"epoch": 0, "iter": 2944, "iter_tflops": 25.90808788329089, "iter_time": 0.6766857833862305, "loss": 0.6154241561889648, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 27.876763206911455, "step_time": 0.628897789001465} +{"epoch": 0, "iter": 2945, "iter_tflops": 22.030362743354992, "iter_time": 0.9364845123291016, "loss": 0.8542014360427856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.58804680069772, "step_time": 0.8746418762207031} +{"epoch": 0, "iter": 2946, "iter_tflops": 16.45177846041511, "iter_time": 1.2540342407226561, "loss": 0.6618965268135071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.705885447578613, "step_time": 1.046950849533081} +{"epoch": 0, "iter": 2947, "iter_tflops": 37.54286495311403, "iter_time": 0.5495343399047852, "loss": 0.7312357425689697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.815368471667135, "step_time": 0.5054736557006836} +{"epoch": 0, "iter": 2948, "iter_tflops": 43.35049192645423, "iter_time": 0.4759137115478516, "loss": 0.8700261116027832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12677393995099, "step_time": 0.43777860832214355} +{"epoch": 0, "iter": 2949, "iter_tflops": 29.09401360127285, "iter_time": 0.7091181640625001, "loss": 1.0710221529006958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.14776210083007, "step_time": 0.6623619842529297} +{"epoch": 0, "iter": 2950, "iter_tflops": 7.9680993015415975, "iter_time": 2.5892113952636717, "loss": 1.2487781047821045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.52576664328222, "step_time": 2.1658197479248047} +{"epoch": 0, "iter": 2951, "iter_tflops": 12.120355238604933, "iter_time": 1.702185546875, "loss": 0.847917377948761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.04268195948292, "step_time": 1.3715036697387695} +{"epoch": 0, "iter": 2952, "iter_tflops": 36.19768920715041, "iter_time": 0.5699560928344727, "loss": 0.9486321806907654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.670721941760775, "step_time": 0.5200584335327149} +{"epoch": 0, "iter": 2953, "iter_tflops": 21.661737485248253, "iter_time": 0.7903511428833008, "loss": 0.34331583976745605, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 23.698898678718706, "step_time": 0.7224124298095702} +{"epoch": 0, "iter": 2954, "iter_tflops": 26.483123156737122, "iter_time": 0.6464637451171875, "loss": 0.30833736062049866, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 28.50217096956615, "step_time": 0.6006692962646485} +{"epoch": 0, "iter": 2955, "iter_tflops": 25.1305894891559, "iter_time": 0.681256561279297, "loss": 0.5402224063873291, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 26.95359993536261, "step_time": 0.6351796798706054} +{"epoch": 0, "iter": 2956, "iter_tflops": 25.089547885630967, "iter_time": 0.682370964050293, "loss": 0.38497817516326904, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 26.85033722348644, "step_time": 0.6376224937438965} +{"epoch": 0, "iter": 2957, "iter_tflops": 21.666887000235757, "iter_time": 0.9158393173217774, "loss": 0.29079747200012207, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 23.63617427272394, "step_time": 0.8395346374511719} +{"epoch": 0, "iter": 2958, "iter_tflops": 12.818889715593315, "iter_time": 1.5479801635742187, "loss": 0.3746873438358307, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 16.493169244307825, "step_time": 1.203127592086792} +{"epoch": 0, "iter": 2959, "iter_tflops": 41.8769642777727, "iter_time": 0.4738497009277344, "loss": 0.28030121326446533, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 45.87374432079872, "step_time": 0.4325652351379394} +{"epoch": 0, "iter": 2960, "iter_tflops": 39.17986358809248, "iter_time": 0.5064690170288086, "loss": 0.4221588671207428, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 43.000384151429564, "step_time": 0.46146999359130864} +{"epoch": 0, "iter": 2961, "iter_tflops": 31.061052654521713, "iter_time": 0.6642110214233399, "loss": 0.048807352781295776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.6149951542253, "step_time": 0.6137467346191405} +{"epoch": 0, "iter": 2962, "iter_tflops": 12.055479666392719, "iter_time": 1.711345718383789, "loss": 0.06888599693775177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.992314430092458, "step_time": 1.4744589691162109} +{"epoch": 0, "iter": 2963, "iter_tflops": 17.23175732391522, "iter_time": 1.1972715911865235, "loss": 0.04098311439156532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.341647652542566, "step_time": 1.066666805267334} +{"epoch": 0, "iter": 2964, "iter_tflops": 49.87022585961377, "iter_time": 0.4136956100463867, "loss": 0.04829462990164757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.30071192583137, "step_time": 0.3799414920806885} +{"epoch": 0, "iter": 2965, "iter_tflops": 19.707399453259427, "iter_time": 0.7896480941772461, "loss": 0.49339088797569275, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 20.748610877661747, "step_time": 0.7500217971801758} +{"epoch": 0, "iter": 2966, "iter_tflops": 10.991301071525722, "iter_time": 1.4158387908935546, "loss": 0.3490137457847595, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 13.056730411898355, "step_time": 1.1918688621520996} +{"epoch": 0, "iter": 2967, "iter_tflops": 27.11856164394079, "iter_time": 0.5738471908569336, "loss": 0.33625322580337524, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 28.947492676029764, "step_time": 0.5375909614562988} +{"epoch": 0, "iter": 2968, "iter_tflops": 28.38408529267505, "iter_time": 0.5482618255615234, "loss": 0.45434340834617615, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 30.21815010579509, "step_time": 0.5149855422973633} +{"epoch": 0, "iter": 2969, "iter_tflops": 32.017211373626786, "iter_time": 0.6443750915527343, "loss": 0.1987011879682541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.108434030230235, "step_time": 0.6048678016662598} +{"epoch": 0, "iter": 2970, "iter_tflops": 36.42057417789245, "iter_time": 0.5664681015014649, "loss": 0.2807610034942627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33612589908157, "step_time": 0.4991056385040283} +{"epoch": 0, "iter": 2971, "iter_tflops": 42.114794073142164, "iter_time": 0.4898775825500488, "loss": 0.25431421399116516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03207305109138, "step_time": 0.4481895370483399} +{"epoch": 0, "iter": 2972, "iter_tflops": 38.77675978672994, "iter_time": 0.5320478973388673, "loss": 0.22391141951084137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26424828588063, "step_time": 0.4881452846527099} +{"epoch": 0, "iter": 2973, "iter_tflops": 18.050590597230126, "iter_time": 1.14295947265625, "loss": 0.17421948909759521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.32659908939607, "step_time": 1.0674973602294922} +{"epoch": 0, "iter": 2974, "iter_tflops": 19.12523797420475, "iter_time": 1.0787365646362304, "loss": 0.21014991402626038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.746489604543395, "step_time": 0.8013167552947998} +{"epoch": 0, "iter": 2975, "iter_tflops": 49.79705509067243, "iter_time": 0.4143034858703613, "loss": 0.1500508338212967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.014744112358294, "step_time": 0.3819529991149902} +{"epoch": 0, "iter": 2976, "iter_tflops": 50.475535329846444, "iter_time": 0.4087345161437988, "loss": 0.1269574910402298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.47922795218325, "step_time": 0.3786965103149414} +{"epoch": 0, "iter": 2977, "iter_tflops": 37.25012192963891, "iter_time": 0.5538530464172363, "loss": 0.15333789587020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.96383911256628, "step_time": 0.5162440338134766} +{"epoch": 0, "iter": 2978, "iter_tflops": 15.448579504559968, "iter_time": 1.3354686431884766, "loss": 0.18430940806865692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.897673159321187, "step_time": 1.036859603881836} +{"epoch": 0, "iter": 2979, "iter_tflops": 33.00859560478251, "iter_time": 0.6250218505859375, "loss": 0.1323840618133545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43375371459377, "step_time": 0.49792962646484373} +{"epoch": 0, "iter": 2980, "iter_tflops": 44.96559645423837, "iter_time": 0.458819522857666, "loss": 0.15301157534122467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30220149281999, "step_time": 0.4184619121551513} +{"epoch": 0, "iter": 2981, "iter_tflops": 22.394513299855078, "iter_time": 0.9212566146850586, "loss": 0.7805059552192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.9273757619289, "step_time": 0.862238037109375} +{"epoch": 0, "iter": 2982, "iter_tflops": 18.449764175919064, "iter_time": 1.118230743408203, "loss": 1.0857982635498047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.15495713446753, "step_time": 0.7597542285919191} +{"epoch": 0, "iter": 2983, "iter_tflops": 44.98570965831666, "iter_time": 0.45861438369750984, "loss": 0.8017856478691101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.48795189817682, "step_time": 0.4254890689849854} +{"epoch": 0, "iter": 2984, "iter_tflops": 40.90233157003448, "iter_time": 0.5043989601135254, "loss": 0.9518875479698181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.63937533676773, "step_time": 0.4727632637023926} +{"epoch": 0, "iter": 2985, "iter_tflops": 32.67875752312449, "iter_time": 0.6313304138183594, "loss": 0.021077027544379234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.84129979774479, "step_time": 0.5921447715759278} +{"epoch": 0, "iter": 2986, "iter_tflops": 15.120050734643145, "iter_time": 1.3644857330322266, "loss": 0.02481405809521675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.429145277248608, "step_time": 1.0618631553649904} +{"epoch": 0, "iter": 2987, "iter_tflops": 42.673690840997416, "iter_time": 0.48346166229248044, "loss": 0.0625838115811348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.081503734881004, "step_time": 0.4381995449066162} +{"epoch": 0, "iter": 2988, "iter_tflops": 48.460890513408984, "iter_time": 0.4257266693115234, "loss": 0.06512115895748138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.261350336617646, "step_time": 0.38735581016540527} +{"epoch": 0, "iter": 2989, "iter_tflops": 33.39216366931665, "iter_time": 0.6178423690795898, "loss": 0.8959465026855469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.599929495965185, "step_time": 0.5636921653747557} +{"epoch": 0, "iter": 2990, "iter_tflops": 12.215593000261588, "iter_time": 1.6889146118164062, "loss": 0.8688532114028931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.893270139660816, "step_time": 1.4849702987670899} +{"epoch": 0, "iter": 2991, "iter_tflops": 30.008768377825458, "iter_time": 0.6875021743774414, "loss": 0.825809895992279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.16994717760261, "step_time": 0.5550476951599121} +{"epoch": 0, "iter": 2992, "iter_tflops": 37.081913632070616, "iter_time": 0.5563653945922852, "loss": 1.0432347059249878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30271116944056, "step_time": 0.5119033660888672} +{"epoch": 0, "iter": 2993, "iter_tflops": 17.98711473177379, "iter_time": 0.8719964218139649, "loss": 0.4313645362854004, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 19.463690908775842, "step_time": 0.8058440589904785} +{"epoch": 0, "iter": 2994, "iter_tflops": 25.901891020388994, "iter_time": 0.6055426483154297, "loss": 0.4100660979747772, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.225705049445583, "step_time": 0.5556884994506837} +{"epoch": 0, "iter": 2995, "iter_tflops": 28.33956960927385, "iter_time": 0.5534558181762695, "loss": 0.4338008761405945, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 30.260085532122567, "step_time": 0.5183296546936036} +{"epoch": 0, "iter": 2996, "iter_tflops": 28.09308837647198, "iter_time": 0.5583116912841797, "loss": 0.4823104441165924, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.86792758448464, "step_time": 0.5251351852416992} +{"epoch": 0, "iter": 2997, "iter_tflops": 23.62056454222722, "iter_time": 0.8734377822875976, "loss": 1.0511261224746704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.91227036556792, "step_time": 0.8281498718261717} +{"epoch": 0, "iter": 2998, "iter_tflops": 13.709999247117771, "iter_time": 1.5048209075927736, "loss": 0.9867905378341675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.698539889788915, "step_time": 1.1656946640014647} +{"epoch": 0, "iter": 2999, "iter_tflops": 38.38993825490711, "iter_time": 0.5374088745117188, "loss": 1.0254087448120117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.746089656995814, "step_time": 0.4942042160034179} +{"epoch": 0, "iter": 3000, "iter_tflops": 4.017068640962217, "iter_time": 5.135857849121094, "loss": 0.8995550870895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.052618558155733, "step_time": 5.090805664062501} +{"epoch": 0, "iter": 3001, "iter_tflops": 12.54117597624564, "iter_time": 1.6450684967041016, "loss": 0.880676805973053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.035133690548927, "step_time": 1.582729721069336} +{"epoch": 0, "iter": 3002, "iter_tflops": 19.913659971746323, "iter_time": 1.0360272064208984, "loss": 0.8239127397537231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.106216303392685, "step_time": 0.8928806533813477} +{"epoch": 0, "iter": 3003, "iter_tflops": 24.523332023941595, "iter_time": 0.8412842712402344, "loss": 0.8732441067695618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.376867795129144, "step_time": 0.70229044342041} +{"epoch": 0, "iter": 3004, "iter_tflops": 27.980557887596774, "iter_time": 0.7373367462158202, "loss": 0.8362749218940735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.141317741559057, "step_time": 0.684478816986084} +{"epoch": 0, "iter": 3005, "iter_tflops": 17.230048351760267, "iter_time": 1.0031791152954102, "loss": 0.6424059271812439, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 18.452804760398255, "step_time": 0.9367044677734375} +{"epoch": 0, "iter": 3006, "iter_tflops": 20.56087895831523, "iter_time": 0.8406656494140624, "loss": 0.44302767515182495, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 22.109971650412387, "step_time": 0.781766025543213} +{"epoch": 0, "iter": 3007, "iter_tflops": 19.387998406208194, "iter_time": 0.891521873474121, "loss": 0.38760530948638916, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 22.40108092529176, "step_time": 0.7716067237854004} +{"epoch": 0, "iter": 3008, "iter_tflops": 20.254021901813523, "iter_time": 0.8534020919799804, "loss": 0.3963013291358948, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 21.536656805076834, "step_time": 0.8025769653320312} +{"epoch": 0, "iter": 3009, "iter_tflops": 9.005959145776805, "iter_time": 2.290826904296875, "loss": 0.5398116111755371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.521051501533798, "step_time": 2.166892333984375} +{"epoch": 0, "iter": 3010, "iter_tflops": 19.967875232335153, "iter_time": 1.0332142639160158, "loss": 0.6262084245681763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.461630916919557, "step_time": 0.918503807067871} +{"epoch": 0, "iter": 3011, "iter_tflops": 20.853739862606716, "iter_time": 0.9893234329223632, "loss": 0.5184727311134338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.73370194034355, "step_time": 0.8692741470336914} +{"epoch": 0, "iter": 3012, "iter_tflops": 23.766829797935667, "iter_time": 0.8680624923706055, "loss": 0.7327942252159119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.84713401955254, "step_time": 0.7408695449829101} +{"epoch": 0, "iter": 3013, "iter_tflops": 4.8745357467550905, "iter_time": 4.232422241210937, "loss": 0.3838740587234497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 5.009544453503325, "step_time": 4.118357208251953} +{"epoch": 0, "iter": 3014, "iter_tflops": 15.295723412632645, "iter_time": 1.348814498901367, "loss": 0.37495648860931396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.689137252405224, "step_time": 0.8356344451904296} +{"epoch": 0, "iter": 3015, "iter_tflops": 32.727853412529875, "iter_time": 0.6303833389282226, "loss": 0.4238356649875641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.801402452330414, "step_time": 0.5928236236572266} +{"epoch": 0, "iter": 3016, "iter_tflops": 34.256143420656954, "iter_time": 0.6022596664428711, "loss": 0.32589107751846313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.31480369846713, "step_time": 0.5681179962158204} +{"epoch": 0, "iter": 3017, "iter_tflops": 7.231168268893293, "iter_time": 2.0503003234863284, "loss": 0.30084702372550964, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 7.395463002144166, "step_time": 2.0047516479492185} +{"epoch": 0, "iter": 3018, "iter_tflops": 15.56173959303656, "iter_time": 0.9527255325317382, "loss": 0.2986176311969757, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.74699547985849, "step_time": 0.835412769317627} +{"epoch": 0, "iter": 3019, "iter_tflops": 21.717186456303168, "iter_time": 0.6826881866455078, "loss": 0.47633323073387146, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 23.2829700067721, "step_time": 0.6367772941589356} +{"epoch": 0, "iter": 3020, "iter_tflops": 17.279419356714925, "iter_time": 0.8580187988281249, "loss": 0.40746572613716125, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.28908729170121, "step_time": 0.8106509857177735} +{"epoch": 0, "iter": 3021, "iter_tflops": 6.858025367727286, "iter_time": 2.4544646301269526, "loss": 0.05471586808562279, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 7.345737212767783, "step_time": 2.2915032501220702} +{"epoch": 0, "iter": 3022, "iter_tflops": 23.41387477909092, "iter_time": 0.7189233245849609, "loss": 0.047803737223148346, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 28.93966309904089, "step_time": 0.5816508865356446} +{"epoch": 0, "iter": 3023, "iter_tflops": 27.071023212166036, "iter_time": 0.6218006820678711, "loss": 0.03576262667775154, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 29.172494465361257, "step_time": 0.577008617401123} +{"epoch": 0, "iter": 3024, "iter_tflops": 23.78563257406421, "iter_time": 0.7076869049072266, "loss": 0.0540553480386734, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 26.463097239515363, "step_time": 0.6360850563049316} +{"epoch": 0, "iter": 3025, "iter_tflops": 7.974495946066681, "iter_time": 2.587134490966797, "loss": 0.220259889960289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.38036040365057, "step_time": 2.4618384552001955} +{"epoch": 0, "iter": 3026, "iter_tflops": 22.73475588135938, "iter_time": 0.9074693222045898, "loss": 0.25345051288604736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.54605540434383, "step_time": 0.8076038818359376} +{"epoch": 0, "iter": 3027, "iter_tflops": 33.489397754553664, "iter_time": 0.6160485076904296, "loss": 0.2539426386356354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.51937900806348, "step_time": 0.5356029624938965} +{"epoch": 0, "iter": 3028, "iter_tflops": 33.52997670053338, "iter_time": 0.6153029479980467, "loss": 0.24795424938201904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.095041591309176, "step_time": 0.5715769424438476} +{"epoch": 0, "iter": 3029, "iter_tflops": 8.772575415622747, "iter_time": 2.3517715759277347, "loss": 0.0699196383357048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.968256201309922, "step_time": 2.3004576416015623} +{"epoch": 0, "iter": 3030, "iter_tflops": 26.994161922301135, "iter_time": 0.7642798309326173, "loss": 0.03446582704782486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.41854600157627, "step_time": 0.656653350830078} +{"epoch": 0, "iter": 3031, "iter_tflops": 30.27205568568922, "iter_time": 0.6815227127075195, "loss": 0.047304101288318634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.856917725664246, "step_time": 0.6093612442016602} +{"epoch": 0, "iter": 3032, "iter_tflops": 48.61235359520957, "iter_time": 0.424400218963623, "loss": 0.059309665113687515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.10651208789796, "step_time": 0.3884851913452149} +{"epoch": 0, "iter": 3033, "iter_tflops": 35.96954718782128, "iter_time": 0.5735711212158203, "loss": 1.0405244827270508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.45728753674226, "step_time": 0.5364677238464355} +{"epoch": 0, "iter": 3034, "iter_tflops": 12.950728409199135, "iter_time": 1.5930450286865234, "loss": 1.1173442602157593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.88503882845547, "step_time": 1.2987751388549804} +{"epoch": 0, "iter": 3035, "iter_tflops": 15.011919944983187, "iter_time": 1.3743141174316404, "loss": 0.8958134651184082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.798510234524443, "step_time": 1.2281501884460448} +{"epoch": 0, "iter": 3036, "iter_tflops": 13.094465409346329, "iter_time": 1.5755582885742188, "loss": 1.0087963342666626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.263035704918707, "step_time": 1.2685880966186525} +{"epoch": 0, "iter": 3037, "iter_tflops": 7.68826751739445, "iter_time": 2.0347581787109372, "loss": 0.43178442120552063, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 8.276380556558857, "step_time": 1.8901698760986327} +{"epoch": 0, "iter": 3038, "iter_tflops": 14.544950939484847, "iter_time": 1.0755460968017578, "loss": 0.5113351941108704, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 16.154695796624594, "step_time": 0.9683726272583009} +{"epoch": 0, "iter": 3039, "iter_tflops": 17.45917360440759, "iter_time": 0.8960197982788086, "loss": 0.3274250626564026, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 20.214933826029593, "step_time": 0.7738717002868654} +{"epoch": 0, "iter": 3040, "iter_tflops": 20.036111185088206, "iter_time": 0.7807785186767578, "loss": 0.33794310688972473, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 21.472081777456534, "step_time": 0.7285630416870117} +{"epoch": 0, "iter": 3041, "iter_tflops": 9.665881828924002, "iter_time": 2.134424346923828, "loss": 0.4968489706516266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.25487266554961, "step_time": 2.011833221435547} +{"epoch": 0, "iter": 3042, "iter_tflops": 27.285951958429578, "iter_time": 0.756106788635254, "loss": 0.5908165574073792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.339887021140537, "step_time": 0.6583014640808106} +{"epoch": 0, "iter": 3043, "iter_tflops": 35.65837322625789, "iter_time": 0.5785764083862304, "loss": 0.5526727437973022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25126808569779, "step_time": 0.4882952499389648} +{"epoch": 0, "iter": 3044, "iter_tflops": 45.77506029332281, "iter_time": 0.4507059822082519, "loss": 0.6450448632240295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.790322609175306, "step_time": 0.41435950660705567} +{"epoch": 0, "iter": 3045, "iter_tflops": 25.183132639967358, "iter_time": 0.8192425384521483, "loss": 0.8702005743980408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.10237425879352, "step_time": 0.7612282714843749} +{"epoch": 0, "iter": 3046, "iter_tflops": 17.161912075656613, "iter_time": 1.2021442260742188, "loss": 0.9157800078392029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.53376741197031, "step_time": 1.0561758556365966} +{"epoch": 0, "iter": 3047, "iter_tflops": 38.22780268432198, "iter_time": 0.5396881866455079, "loss": 0.881183385848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65108390121307, "step_time": 0.4953314914703369} +{"epoch": 0, "iter": 3048, "iter_tflops": 42.50547608733514, "iter_time": 0.48537495422363275, "loss": 0.9917723536491394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.32558430612478, "step_time": 0.4453498821258545} +{"epoch": 0, "iter": 3049, "iter_tflops": 8.418828897797367, "iter_time": 2.4505894775390624, "loss": 0.8197237849235535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.920789085907249, "step_time": 2.312698272705078} +{"epoch": 0, "iter": 3050, "iter_tflops": 20.819015494517576, "iter_time": 0.9909735412597654, "loss": 0.9192080497741699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.855068151241074, "step_time": 0.8648515853881836} +{"epoch": 0, "iter": 3051, "iter_tflops": 20.931916670669317, "iter_time": 0.9856284942626953, "loss": 0.9623388051986694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.747190016339374, "step_time": 0.833674186706543} +{"epoch": 0, "iter": 3052, "iter_tflops": 35.21284569131977, "iter_time": 0.5858967971801758, "loss": 0.9630069136619568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.69267982581493, "step_time": 0.5197707386016845} +{"epoch": 0, "iter": 3053, "iter_tflops": 7.873059720738916, "iter_time": 2.6204670410156248, "loss": 0.32230550050735474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.142856051900276, "step_time": 2.5336434020996093} +{"epoch": 0, "iter": 3054, "iter_tflops": 24.891120290496897, "iter_time": 0.8288535537719727, "loss": 0.2838937044143677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.285529366573254, "step_time": 0.7044808120727539} +{"epoch": 0, "iter": 3055, "iter_tflops": 22.936025674980357, "iter_time": 0.899506034851074, "loss": 0.25170961022377014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.184134800989217, "step_time": 0.8192099380493164} +{"epoch": 0, "iter": 3056, "iter_tflops": 24.07633531603255, "iter_time": 0.8569033966064453, "loss": 0.2827610671520233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.274967252907317, "step_time": 0.7296593246459961} +{"epoch": 0, "iter": 3057, "iter_tflops": 7.582168183307393, "iter_time": 2.7210018310546875, "loss": 0.14508844912052155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.954208100869052, "step_time": 2.593733184814453} +{"epoch": 0, "iter": 3058, "iter_tflops": 19.82280615747311, "iter_time": 1.0407756271362305, "loss": 0.14736990630626678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.139825025624702, "step_time": 0.8915838165283203} +{"epoch": 0, "iter": 3059, "iter_tflops": 25.660784924383986, "iter_time": 0.8039930801391603, "loss": 0.19304612278938293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.809647483899525, "step_time": 0.7161175270080566} +{"epoch": 0, "iter": 3060, "iter_tflops": 28.03480159369319, "iter_time": 0.7359100952148437, "loss": 0.19473929703235626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.647666574832147, "step_time": 0.673170124053955} +{"epoch": 0, "iter": 3061, "iter_tflops": 7.110157079924665, "iter_time": 2.901636810302734, "loss": 0.6360053420066833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.4123011996083426, "step_time": 2.7833587646484377} +{"epoch": 0, "iter": 3062, "iter_tflops": 17.35782257200744, "iter_time": 1.1885761260986327, "loss": 0.6952977776527405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.679004388084497, "step_time": 0.9097001419067383} +{"epoch": 0, "iter": 3063, "iter_tflops": 39.505824123724445, "iter_time": 0.5222291641235352, "loss": 0.9283401966094971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.073344899800524, "step_time": 0.47897588539123537} +{"epoch": 0, "iter": 3064, "iter_tflops": 39.00024370205158, "iter_time": 0.5289990921020508, "loss": 0.7304046154022217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53455287916281, "step_time": 0.4850431499481201} +{"epoch": 0, "iter": 3065, "iter_tflops": 15.478811934631585, "iter_time": 1.0344686279296875, "loss": 0.5252877473831177, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 16.52804826738196, "step_time": 0.9687983169555665} +{"epoch": 0, "iter": 3066, "iter_tflops": 10.683511607487862, "iter_time": 1.4987904663085938, "loss": 0.4512316584587097, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 12.680500305353302, "step_time": 1.2627534370422362} +{"epoch": 0, "iter": 3067, "iter_tflops": 25.134377710198166, "iter_time": 0.6370694961547851, "loss": 0.35785186290740967, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.070536175453793, "step_time": 0.5915045509338378} +{"epoch": 0, "iter": 3068, "iter_tflops": 24.817750815791335, "iter_time": 0.645197280883789, "loss": 0.3261169493198395, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.4498883668359, "step_time": 0.6053842315673827} +{"epoch": 0, "iter": 3069, "iter_tflops": 16.89138060794405, "iter_time": 1.221397705078125, "loss": 0.4593532681465149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.008125866813092, "step_time": 1.1456546707153321} +{"epoch": 0, "iter": 3070, "iter_tflops": 21.111275983011602, "iter_time": 0.9772546920776367, "loss": 0.2979870140552521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.368453724080116, "step_time": 0.7824157505035401} +{"epoch": 0, "iter": 3071, "iter_tflops": 41.66841496389843, "iter_time": 0.4951254692077637, "loss": 0.33849433064460754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7105199852507, "step_time": 0.45134235000610345} +{"epoch": 0, "iter": 3072, "iter_tflops": 38.888526003429696, "iter_time": 0.5305187835693359, "loss": 0.27838513255119324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55136578720123, "step_time": 0.4848514995574951} +{"epoch": 0, "iter": 3073, "iter_tflops": 17.155745183829996, "iter_time": 1.2025763549804687, "loss": 0.7464648485183716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.233187328246885, "step_time": 1.1315132751464843} +{"epoch": 0, "iter": 3074, "iter_tflops": 18.376799742283197, "iter_time": 1.1226706390380858, "loss": 0.835892915725708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.674799176652026, "step_time": 0.9098688526153564} +{"epoch": 0, "iter": 3075, "iter_tflops": 43.675530309569794, "iter_time": 0.4723719062805176, "loss": 0.7162402868270874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.976288868627805, "step_time": 0.4391809997558594} +{"epoch": 0, "iter": 3076, "iter_tflops": 47.126098633485995, "iter_time": 0.4377848815917969, "loss": 0.6761791706085205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66232324991526, "step_time": 0.4072275447845459} +{"epoch": 0, "iter": 3077, "iter_tflops": 45.02818198413258, "iter_time": 0.4581818008422852, "loss": 0.35594919323921204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.308935003894504, "step_time": 0.41840476799011234} +{"epoch": 0, "iter": 3078, "iter_tflops": 17.122588274341787, "iter_time": 1.2049050750732422, "loss": 0.2689967155456543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.597437115185166, "step_time": 1.109351432800293} +{"epoch": 0, "iter": 3079, "iter_tflops": 12.107925652393059, "iter_time": 1.7039329528808593, "loss": 0.4286609888076782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.869227265555065, "step_time": 1.4875445556640625} +{"epoch": 0, "iter": 3080, "iter_tflops": 32.79646529866531, "iter_time": 0.6290645446777343, "loss": 0.2724972069263458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15876403466199, "step_time": 0.4893666591644287} +{"epoch": 0, "iter": 3081, "iter_tflops": 12.636904372034193, "iter_time": 1.2865712432861327, "loss": 0.5945916175842285, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.37405561888752, "step_time": 1.2156580047607424} +{"epoch": 0, "iter": 3082, "iter_tflops": 18.38623157789044, "iter_time": 0.8842637329101564, "loss": 0.2644495368003845, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 21.654549356561663, "step_time": 0.7508019447326659} +{"epoch": 0, "iter": 3083, "iter_tflops": 25.112228582243713, "iter_time": 0.6474247283935547, "loss": 0.3204122483730316, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.099452134120476, "step_time": 0.5999485778808593} +{"epoch": 0, "iter": 3084, "iter_tflops": 26.075865022175616, "iter_time": 0.6234990768432618, "loss": 0.4797837734222412, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.93066479370453, "step_time": 0.5820941925048828} +{"epoch": 0, "iter": 3085, "iter_tflops": 21.750227266576047, "iter_time": 0.948546112060547, "loss": 0.5247322916984558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.237636655529972, "step_time": 0.8878309707641602} +{"epoch": 0, "iter": 3086, "iter_tflops": 8.841190196312718, "iter_time": 2.3335199279785153, "loss": 0.5645389556884766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.647877312409802, "step_time": 2.1384075317382814} +{"epoch": 0, "iter": 3087, "iter_tflops": 13.45992711400138, "iter_time": 1.5327789916992187, "loss": 0.6614989042282104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.9599651276434, "step_time": 1.2926778564453125} +{"epoch": 0, "iter": 3088, "iter_tflops": 23.73782667090466, "iter_time": 0.8691231002807617, "loss": 0.5042843222618103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.777620462226555, "step_time": 0.6928388900756836} +{"epoch": 0, "iter": 3089, "iter_tflops": 21.840240826996, "iter_time": 0.7726037521362306, "loss": 0.4672839343547821, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 23.780448624256916, "step_time": 0.7095682792663575} +{"epoch": 0, "iter": 3090, "iter_tflops": 25.864574666998973, "iter_time": 0.6523924026489257, "loss": 0.5233039855957031, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.878954754962862, "step_time": 0.6052541122436523} +{"epoch": 0, "iter": 3091, "iter_tflops": 25.880499381673847, "iter_time": 0.6519909744262695, "loss": 0.4937543272972107, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.860711563318024, "step_time": 0.60565043258667} +{"epoch": 0, "iter": 3092, "iter_tflops": 26.1270148057927, "iter_time": 0.6458392639160155, "loss": 0.3678986430168152, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 28.02772555121394, "step_time": 0.602041431427002} +{"epoch": 0, "iter": 3093, "iter_tflops": 15.725009178944706, "iter_time": 1.311992462158203, "loss": 0.6815158724784851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.863668710328554, "step_time": 1.2234048156738282} +{"epoch": 0, "iter": 3094, "iter_tflops": 22.01794054415153, "iter_time": 0.9370128631591796, "loss": 0.5910401940345764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.997128823782816, "step_time": 0.7641958389282226} +{"epoch": 0, "iter": 3095, "iter_tflops": 36.67007583292254, "iter_time": 0.5626138763427735, "loss": 0.50745689868927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8294286768998, "step_time": 0.5179861774444581} +{"epoch": 0, "iter": 3096, "iter_tflops": 41.566449613249816, "iter_time": 0.49634004592895503, "loss": 0.7007686495780945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48428985828099, "step_time": 0.4535872402191162} +{"epoch": 0, "iter": 3097, "iter_tflops": 16.9803108684001, "iter_time": 1.215000930786133, "loss": 0.37274274230003357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.066138138170142, "step_time": 1.1419758529663087} +{"epoch": 0, "iter": 3098, "iter_tflops": 14.48000361679837, "iter_time": 1.4247989196777346, "loss": 0.4686759114265442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.25585094289616, "step_time": 1.0185251445770265} +{"epoch": 0, "iter": 3099, "iter_tflops": 40.3318401021934, "iter_time": 0.5115336532592772, "loss": 0.33433130383491516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29770380112107, "step_time": 0.4657373123168945} +{"epoch": 0, "iter": 3100, "iter_tflops": 41.54112697861279, "iter_time": 0.4966426048278808, "loss": 0.3605678677558899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.49213088360352, "step_time": 0.4535090599060059} +{"epoch": 0, "iter": 3101, "iter_tflops": 21.146980977667624, "iter_time": 0.9756046752929688, "loss": 0.9122337102890015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.956001962993078, "step_time": 0.8987232856750488} +{"epoch": 0, "iter": 3102, "iter_tflops": 29.942470298863416, "iter_time": 0.6890244293212889, "loss": 0.508983314037323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.83958051160091, "step_time": 0.5600252017974854} +{"epoch": 0, "iter": 3103, "iter_tflops": 46.044993766396374, "iter_time": 0.4480637702941894, "loss": 0.6873936653137207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.933800300129874, "step_time": 0.41316890335083006} +{"epoch": 0, "iter": 3104, "iter_tflops": 50.220792555189476, "iter_time": 0.410807804107666, "loss": 0.5015548467636108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.67881802214433, "step_time": 0.3773141822814941} +{"epoch": 0, "iter": 3105, "iter_tflops": 27.546312178945218, "iter_time": 0.7489602737426756, "loss": 0.6354287266731262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.040977449066904, "step_time": 0.7104131927490234} +{"epoch": 0, "iter": 3106, "iter_tflops": 13.602268446370733, "iter_time": 1.5167391815185547, "loss": 0.8886764645576477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.906821847862894, "step_time": 1.1521359672546385} +{"epoch": 0, "iter": 3107, "iter_tflops": 45.33342476426024, "iter_time": 0.4550967330932618, "loss": 0.9149894118309021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.19844044022207, "step_time": 0.41934446144104004} +{"epoch": 0, "iter": 3108, "iter_tflops": 46.55651031366066, "iter_time": 0.4431408920288085, "loss": 0.7683597207069397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21024703638078, "step_time": 0.4108940849304199} +{"epoch": 0, "iter": 3109, "iter_tflops": 35.6995819441635, "iter_time": 0.5779085464477539, "loss": 0.25593137741088867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.27097614595823, "step_time": 0.5390793647766113} +{"epoch": 0, "iter": 3110, "iter_tflops": 7.995251195759783, "iter_time": 2.5804184265136714, "loss": 0.1839064359664917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.180632710883893, "step_time": 2.0265040588378906} +{"epoch": 0, "iter": 3111, "iter_tflops": 10.816965061091487, "iter_time": 1.9072903900146485, "loss": 0.323312371969223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.833318414502935, "step_time": 1.60761954498291} +{"epoch": 0, "iter": 3112, "iter_tflops": 34.490903182644935, "iter_time": 0.5981604309082031, "loss": 0.24782106280326843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.72457996686308, "step_time": 0.5327648105621338} +{"epoch": 0, "iter": 3113, "iter_tflops": 14.336430853254168, "iter_time": 1.202789337158203, "loss": 0.5288262963294983, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 15.463082943517637, "step_time": 1.1151531829833985} +{"epoch": 0, "iter": 3114, "iter_tflops": 17.026717617760912, "iter_time": 1.0127440032958985, "loss": 0.37936508655548096, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 22.706811937155177, "step_time": 0.7594067459106445} +{"epoch": 0, "iter": 3115, "iter_tflops": 30.3354661869975, "iter_time": 0.5684338607788085, "loss": 0.4454195201396942, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 32.379838407651874, "step_time": 0.5325445404052735} +{"epoch": 0, "iter": 3116, "iter_tflops": 31.887162975630474, "iter_time": 0.5407726669311523, "loss": 0.5801237225532532, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 33.851573413018485, "step_time": 0.5093915710449218} +{"epoch": 0, "iter": 3117, "iter_tflops": 26.20395543869548, "iter_time": 0.7873274536132814, "loss": 0.13405853509902954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.68089005137068, "step_time": 0.7453190078735352} +{"epoch": 0, "iter": 3118, "iter_tflops": 17.71065243289868, "iter_time": 1.1648974304199218, "loss": 0.18576101958751678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.384178429391056, "step_time": 0.9647830791473389} +{"epoch": 0, "iter": 3119, "iter_tflops": 51.71929528144868, "iter_time": 0.3989051551818848, "loss": 0.1065433919429779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.35296262636899, "step_time": 0.36610486030578615} +{"epoch": 0, "iter": 3120, "iter_tflops": 55.009702744657204, "iter_time": 0.3750446281433105, "loss": 0.159132719039917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.65745018065937, "step_time": 0.34582593536376954} +{"epoch": 0, "iter": 3121, "iter_tflops": 24.68601319316574, "iter_time": 0.8357401962280274, "loss": 0.11172020435333252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.88851898120789, "step_time": 0.7969205780029297} +{"epoch": 0, "iter": 3122, "iter_tflops": 12.705137824245762, "iter_time": 1.623838623046875, "loss": 0.15808641910552979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.446387718220226, "step_time": 1.2544452838897706} +{"epoch": 0, "iter": 3123, "iter_tflops": 38.58240316144828, "iter_time": 0.5347280578613282, "loss": 0.21030448377132416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20577716767573, "step_time": 0.48882155227661134} +{"epoch": 0, "iter": 3124, "iter_tflops": 41.62311684533445, "iter_time": 0.4956643104553223, "loss": 0.20800887048244476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85963852466898, "step_time": 0.44987475204467775} +{"epoch": 0, "iter": 3125, "iter_tflops": 17.44541568838677, "iter_time": 1.1826083068847657, "loss": 1.0639574527740479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.73492180047078, "step_time": 1.101210548400879} +{"epoch": 0, "iter": 3126, "iter_tflops": 20.342833271877826, "iter_time": 1.0141701126098632, "loss": 0.8161430358886719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.54259186520731, "step_time": 0.7772825508117676} +{"epoch": 0, "iter": 3127, "iter_tflops": 36.931355052788945, "iter_time": 0.5586335372924804, "loss": 0.8156578540802002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22955712905262, "step_time": 0.5128342189788818} +{"epoch": 0, "iter": 3128, "iter_tflops": 38.30464155265206, "iter_time": 0.5386055755615234, "loss": 0.7545244097709656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56305764778027, "step_time": 0.49638055229187017} +{"epoch": 0, "iter": 3129, "iter_tflops": 24.231921514200433, "iter_time": 0.851401466369629, "loss": 0.7430567145347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.025720136759595, "step_time": 0.7927194099426269} +{"epoch": 0, "iter": 3130, "iter_tflops": 11.83835247787698, "iter_time": 1.7427335052490234, "loss": 0.73368239402771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.408826263770907, "step_time": 1.4318372039794922} +{"epoch": 0, "iter": 3131, "iter_tflops": 9.440464684636378, "iter_time": 2.1853896179199217, "loss": 0.8771347999572754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.766586026167149, "step_time": 1.7533627395629885} +{"epoch": 0, "iter": 3132, "iter_tflops": 31.0363179195134, "iter_time": 0.6647403717041016, "loss": 1.0718492269515991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.20804496669015, "step_time": 0.6031064777374268} +{"epoch": 0, "iter": 3133, "iter_tflops": 14.727015259194697, "iter_time": 1.0205947418212893, "loss": 0.3914313018321991, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.890612700438288, "step_time": 0.9458612213134765} +{"epoch": 0, "iter": 3134, "iter_tflops": 17.03416870624485, "iter_time": 0.8823626556396484, "loss": 0.42459720373153687, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 20.7667796086937, "step_time": 0.723767219543457} +{"epoch": 0, "iter": 3135, "iter_tflops": 24.82041324518133, "iter_time": 0.6055626144409181, "loss": 0.41925498843193054, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 26.471819474790706, "step_time": 0.5677854652404785} +{"epoch": 0, "iter": 3136, "iter_tflops": 28.24963790798179, "iter_time": 0.5320533447265624, "loss": 0.35907408595085144, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 30.022719354569652, "step_time": 0.5006313438415527} +{"epoch": 0, "iter": 3137, "iter_tflops": 41.57964068675042, "iter_time": 0.49618258285522454, "loss": 0.1542760133743286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.39552961543233, "step_time": 0.45447412300109863} +{"epoch": 0, "iter": 3138, "iter_tflops": 44.48686131270844, "iter_time": 0.4637570037841797, "loss": 0.17933577299118042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.627361539505785, "step_time": 0.4242692356109619} +{"epoch": 0, "iter": 3139, "iter_tflops": 49.163085557467504, "iter_time": 0.4196460266113281, "loss": 0.1355842649936676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.3183048268585, "step_time": 0.38694203758239754} +{"epoch": 0, "iter": 3140, "iter_tflops": 54.04913848823449, "iter_time": 0.38170994186401364, "loss": 0.17217309772968292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.34212870166368, "step_time": 0.3476635227203369} +{"epoch": 0, "iter": 3141, "iter_tflops": 24.837998185495945, "iter_time": 0.8306262588500976, "loss": 0.8456829786300659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.00477844922753, "step_time": 0.7933577880859375} +{"epoch": 0, "iter": 3142, "iter_tflops": 12.523999435682414, "iter_time": 1.6473246917724609, "loss": 0.9669007658958435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.73450994801451, "step_time": 1.4001886444091796} +{"epoch": 0, "iter": 3143, "iter_tflops": 35.192120766461215, "iter_time": 0.5862418365478516, "loss": 0.9189807772636414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.97982007261621, "step_time": 0.5432119865417481} +{"epoch": 0, "iter": 3144, "iter_tflops": 35.627511464515926, "iter_time": 0.5790775909423829, "loss": 0.9079598188400269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.845123648115255, "step_time": 0.5311115417480469} +{"epoch": 0, "iter": 3145, "iter_tflops": 17.62539739915498, "iter_time": 1.1705321044921875, "loss": 0.04226387292146683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.920167975495534, "step_time": 1.0904286651611328} +{"epoch": 0, "iter": 3146, "iter_tflops": 16.75379899875901, "iter_time": 1.2314277801513673, "loss": 0.020351605489850044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.014337592170875, "step_time": 1.0850282535552978} +{"epoch": 0, "iter": 3147, "iter_tflops": 42.16945181154411, "iter_time": 0.4892426300048828, "loss": 0.04452529177069664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4355805314832, "step_time": 0.4442949409484863} +{"epoch": 0, "iter": 3148, "iter_tflops": 38.67503070811163, "iter_time": 0.5334473724365235, "loss": 0.03174745664000511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.57875317614049, "step_time": 0.4845396347045899} +{"epoch": 0, "iter": 3149, "iter_tflops": 19.24950915964177, "iter_time": 1.071772445678711, "loss": 0.8073056936264038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.72994348142094, "step_time": 0.9952315368652344} +{"epoch": 0, "iter": 3150, "iter_tflops": 18.898525691632667, "iter_time": 1.091677406311035, "loss": 0.6210180521011353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.585206992050345, "step_time": 0.8063680515289307} +{"epoch": 0, "iter": 3151, "iter_tflops": 46.024828334598354, "iter_time": 0.4482600860595703, "loss": 0.6937562823295593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.58090006745321, "step_time": 0.4161097011566163} +{"epoch": 0, "iter": 3152, "iter_tflops": 48.37788944541637, "iter_time": 0.4264570808410645, "loss": 0.7942834496498108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9552541182432, "step_time": 0.39709349632263186} +{"epoch": 0, "iter": 3153, "iter_tflops": 20.62700578373995, "iter_time": 1.0001981735229493, "loss": 0.9355106353759766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.643869983406795, "step_time": 0.9532072372436524} +{"epoch": 0, "iter": 3154, "iter_tflops": 15.863098049965917, "iter_time": 1.300571517944336, "loss": 0.5551562309265137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.252835181608678, "step_time": 1.0715872917175293} +{"epoch": 0, "iter": 3155, "iter_tflops": 47.501219537376784, "iter_time": 0.43432765960693365, "loss": 0.7639307379722595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30984265470001, "step_time": 0.40208841896057124} +{"epoch": 0, "iter": 3156, "iter_tflops": 50.6036571939197, "iter_time": 0.40769965362548827, "loss": 0.8701572418212891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.772965880725366, "step_time": 0.3766656265258789} +{"epoch": 0, "iter": 3157, "iter_tflops": 26.774051990430927, "iter_time": 0.7705629882812501, "loss": 0.7771794199943542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.40310402040143, "step_time": 0.7263675651550293} +{"epoch": 0, "iter": 3158, "iter_tflops": 19.103420630994982, "iter_time": 1.0799685516357422, "loss": 0.6427896022796631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.792798905972735, "step_time": 0.905158405303955} +{"epoch": 0, "iter": 3159, "iter_tflops": 45.20567250356391, "iter_time": 0.45638284683227537, "loss": 0.49090027809143066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.096463857440945, "step_time": 0.4202154674530029} +{"epoch": 0, "iter": 3160, "iter_tflops": 46.55423323172565, "iter_time": 0.4431625671386719, "loss": 0.6876593232154846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.505160899861664, "step_time": 0.408494758605957} +{"epoch": 0, "iter": 3161, "iter_tflops": 37.73592659860963, "iter_time": 0.5346323013305664, "loss": 0.03800368309020996, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 40.75157165846592, "step_time": 0.495069133758545} +{"epoch": 0, "iter": 3162, "iter_tflops": 36.71269216283847, "iter_time": 0.5495332565307617, "loss": 0.04190075024962425, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 45.924270015528904, "step_time": 0.4393068256378173} +{"epoch": 0, "iter": 3163, "iter_tflops": 50.15725742453223, "iter_time": 0.4022318267822266, "loss": 0.02378482185304165, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 54.694784989677736, "step_time": 0.36886231994628904} +{"epoch": 0, "iter": 3164, "iter_tflops": 51.82656651772424, "iter_time": 0.38927613067626954, "loss": 0.04506556689739227, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 56.279414576391105, "step_time": 0.35847645950317386} +{"epoch": 0, "iter": 3165, "iter_tflops": 24.77529842254831, "iter_time": 0.832728355407715, "loss": 0.16948795318603516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.033747370089674, "step_time": 0.792474983215332} +{"epoch": 0, "iter": 3166, "iter_tflops": 15.694427193939662, "iter_time": 1.3145489959716796, "loss": 0.08463090658187866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.605382010350393, "step_time": 1.1088777160644532} +{"epoch": 0, "iter": 3167, "iter_tflops": 44.82252091698372, "iter_time": 0.4602840957641602, "loss": 0.12566262483596802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96346753279947, "step_time": 0.4129235725402832} +{"epoch": 0, "iter": 3168, "iter_tflops": 54.4742602833415, "iter_time": 0.3787310447692871, "loss": 0.12850584089756012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.37152264515164, "step_time": 0.34749139976501464} +{"epoch": 0, "iter": 3169, "iter_tflops": 31.471741879656744, "iter_time": 0.6555434265136718, "loss": 0.7044779658317566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.45291498400498, "step_time": 0.6167203521728515} +{"epoch": 0, "iter": 3170, "iter_tflops": 9.214464127898184, "iter_time": 2.238990051269531, "loss": 0.5120579600334167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.242138109189208, "step_time": 1.685252471923828} +{"epoch": 0, "iter": 3171, "iter_tflops": 9.50668798637213, "iter_time": 2.170166259765625, "loss": 0.5445464849472046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.875675184096682, "step_time": 1.896994270324707} +{"epoch": 0, "iter": 3172, "iter_tflops": 27.805067893560423, "iter_time": 0.7419904022216797, "loss": 0.45873820781707764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.54451272639693, "step_time": 0.597232147216797} +{"epoch": 0, "iter": 3173, "iter_tflops": 9.431175437322471, "iter_time": 1.489846237182617, "loss": 0.42630866169929504, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 9.962870940894634, "step_time": 1.4103365707397462} +{"epoch": 0, "iter": 3174, "iter_tflops": 11.89159693889801, "iter_time": 1.181590774536133, "loss": 0.4562409520149231, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 14.802974961861414, "step_time": 0.9492011756896973} +{"epoch": 0, "iter": 3175, "iter_tflops": 20.497326875317942, "iter_time": 0.6855040817260741, "loss": 0.5041749477386475, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 22.011789329350393, "step_time": 0.63833980178833} +{"epoch": 0, "iter": 3176, "iter_tflops": 22.06262116941386, "iter_time": 0.6368690795898437, "loss": 0.41339820623397827, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 23.611173701654273, "step_time": 0.5950996513366699} +{"epoch": 0, "iter": 3177, "iter_tflops": 17.635704233257908, "iter_time": 1.038479690551758, "loss": 0.07500430196523666, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 18.875452515427856, "step_time": 0.9702718734741211} +{"epoch": 0, "iter": 3178, "iter_tflops": 16.460864866328034, "iter_time": 1.1125977172851562, "loss": 0.0688624382019043, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 18.719408010048152, "step_time": 0.9783600349426269} +{"epoch": 0, "iter": 3179, "iter_tflops": 34.30398865734377, "iter_time": 0.5338831253051759, "loss": 0.05629771947860718, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 37.774546744486855, "step_time": 0.48483230781555176} +{"epoch": 0, "iter": 3180, "iter_tflops": 35.240901785780096, "iter_time": 0.5196893310546875, "loss": 0.043732479214668274, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 39.06719613817475, "step_time": 0.46879025077819825} +{"epoch": 0, "iter": 3181, "iter_tflops": 23.97254411674706, "iter_time": 0.8606134338378906, "loss": 0.5013960599899292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.675135993082357, "step_time": 0.8035436897277832} +{"epoch": 0, "iter": 3182, "iter_tflops": 10.298647393793798, "iter_time": 2.0032818603515623, "loss": 0.5609229803085327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.633961100836466, "step_time": 1.7733507385253904} +{"epoch": 0, "iter": 3183, "iter_tflops": 13.02435679858594, "iter_time": 1.584039337158203, "loss": 0.5546357035636902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.172145329892842, "step_time": 1.3598006782531737} +{"epoch": 0, "iter": 3184, "iter_tflops": 34.24203050924656, "iter_time": 0.6025078887939453, "loss": 0.6549524068832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.78917971264113, "step_time": 0.5459524040222168} +{"epoch": 0, "iter": 3185, "iter_tflops": 15.663223771692738, "iter_time": 0.9856951446533203, "loss": 0.4381379187107086, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.72855760613624, "step_time": 0.9229225845336915} +{"epoch": 0, "iter": 3186, "iter_tflops": 13.493077134983677, "iter_time": 1.1442285156249998, "loss": 0.44576403498649597, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.51976182323437, "step_time": 0.9345875434875488} +{"epoch": 0, "iter": 3187, "iter_tflops": 22.622757200021272, "iter_time": 0.6824616241455078, "loss": 0.4249265193939209, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.351247754771848, "step_time": 0.6340194053649901} +{"epoch": 0, "iter": 3188, "iter_tflops": 22.261525467614, "iter_time": 0.6935357437133788, "loss": 0.4649084508419037, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.934259719286924, "step_time": 0.6450654335021972} +{"epoch": 0, "iter": 3189, "iter_tflops": 36.593524760117, "iter_time": 0.5637908248901368, "loss": 0.17196042835712433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.639252471618875, "step_time": 0.5076641979217529} +{"epoch": 0, "iter": 3190, "iter_tflops": 40.02775739222734, "iter_time": 0.5154196701049805, "loss": 0.19456563889980316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79195914685761, "step_time": 0.4711160202026367} +{"epoch": 0, "iter": 3191, "iter_tflops": 44.98035158567802, "iter_time": 0.45866901397705073, "loss": 0.2867831885814667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22694343185559, "step_time": 0.41910165596008303} +{"epoch": 0, "iter": 3192, "iter_tflops": 36.349078979434694, "iter_time": 0.567582290649414, "loss": 0.26089179515838623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.991264234082635, "step_time": 0.5158900051116944} +{"epoch": 0, "iter": 3193, "iter_tflops": 16.145823388441627, "iter_time": 1.2777975463867188, "loss": 0.20270749926567078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.12210549675025, "step_time": 1.20493904876709} +{"epoch": 0, "iter": 3194, "iter_tflops": 14.936206304724093, "iter_time": 1.3812807006835939, "loss": 0.3298782408237457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.802328402318803, "step_time": 1.0418519020080568} +{"epoch": 0, "iter": 3195, "iter_tflops": 37.70672837306626, "iter_time": 0.5471462097167968, "loss": 0.253911554813385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.032897018743036, "step_time": 0.5027939777374267} +{"epoch": 0, "iter": 3196, "iter_tflops": 39.17131582999163, "iter_time": 0.5266888046264648, "loss": 0.19685058295726776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.402429073814936, "step_time": 0.4865545196533203} +{"epoch": 0, "iter": 3197, "iter_tflops": 1.1495869918510988, "iter_time": 1.2608478088378907, "loss": 0.5303342342376709, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.2318560729307808, "step_time": 1.1766425247192382} +{"epoch": 0, "iter": 3198, "iter_tflops": 1.0554861211487203, "iter_time": 1.3732575073242188, "loss": 0.5654733777046204, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.2497837711100193, "step_time": 1.1597640113830567} +{"epoch": 0, "iter": 3199, "iter_tflops": 3.050518715022106, "iter_time": 0.475150089263916, "loss": 0.7431116700172424, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.316457867855524, "step_time": 0.43704889297485355} +{"epoch": 0, "iter": 3200, "iter_tflops": 3.4227006812739065, "iter_time": 0.4234826164245606, "loss": 0.8332304358482361, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.707641773160159, "step_time": 0.3909369697570801} +{"epoch": 0, "iter": 3201, "iter_tflops": 35.27484040210104, "iter_time": 0.554321678161621, "loss": 0.13946865499019623, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 38.18868819491559, "step_time": 0.512026195526123} +{"epoch": 0, "iter": 3202, "iter_tflops": 45.77495999332366, "iter_time": 0.4271682319641113, "loss": 0.1723650097846985, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 50.249066364210854, "step_time": 0.38913377189636233} +{"epoch": 0, "iter": 3203, "iter_tflops": 45.55128883906888, "iter_time": 0.4292657623291016, "loss": 0.17214126884937286, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 49.4758884421883, "step_time": 0.39521490859985353} +{"epoch": 0, "iter": 3204, "iter_tflops": 48.354163620296966, "iter_time": 0.4043831443786621, "loss": 0.18491783738136292, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 52.717493306685355, "step_time": 0.3709130973815918} +{"epoch": 0, "iter": 3205, "iter_tflops": 29.337228857559754, "iter_time": 0.7032393417358399, "loss": 0.7283783555030823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.200781655575383, "step_time": 0.6612364311218262} +{"epoch": 0, "iter": 3206, "iter_tflops": 15.201149484399824, "iter_time": 1.3572061462402343, "loss": 0.8595107793807983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.8926963810037, "step_time": 1.2213025703430176} +{"epoch": 0, "iter": 3207, "iter_tflops": 43.07457459084694, "iter_time": 0.47896221160888675, "loss": 0.8614398837089539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87139819088832, "step_time": 0.4401638164520264} +{"epoch": 0, "iter": 3208, "iter_tflops": 48.647367518029505, "iter_time": 0.42409475708007816, "loss": 1.0090018510818481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62133954035914, "step_time": 0.3920670528411866} +{"epoch": 0, "iter": 3209, "iter_tflops": 16.538106267277197, "iter_time": 1.247488265991211, "loss": 0.48998627066612244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.123720029939392, "step_time": 1.204825439453125} +{"epoch": 0, "iter": 3210, "iter_tflops": 14.520397031809782, "iter_time": 1.420835357666016, "loss": 0.6575139164924622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46846414126039, "step_time": 1.1810479354858399} +{"epoch": 0, "iter": 3211, "iter_tflops": 49.5690952144827, "iter_time": 0.41620879745483397, "loss": 0.46491268277168274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.708692597530266, "step_time": 0.3841295051574707} +{"epoch": 0, "iter": 3212, "iter_tflops": 47.768719844050565, "iter_time": 0.43189546585083005, "loss": 0.5874053239822388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.783334021023066, "step_time": 0.39841184234619137} +{"epoch": 0, "iter": 3213, "iter_tflops": 32.423954204926595, "iter_time": 0.6362917175292968, "loss": 0.32750269770622253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.68924361626734, "step_time": 0.5947403678894043} +{"epoch": 0, "iter": 3214, "iter_tflops": 10.839284262498476, "iter_time": 1.9033630828857424, "loss": 0.3596498370170593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.963702815701527, "step_time": 1.5914506683349607} +{"epoch": 0, "iter": 3215, "iter_tflops": 15.011745938365229, "iter_time": 1.374330047607422, "loss": 0.32139548659324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.541925009704144, "step_time": 1.176102024078369} +{"epoch": 0, "iter": 3216, "iter_tflops": 20.67405536186817, "iter_time": 0.9979219436645507, "loss": 0.3500876724720001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.428834789375923, "step_time": 0.8805855560302733} +{"epoch": 0, "iter": 3217, "iter_tflops": 15.377146856008798, "iter_time": 1.1080198822021485, "loss": 0.49197497963905334, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 16.030753840518965, "step_time": 1.0628436203002931} +{"epoch": 0, "iter": 3218, "iter_tflops": 10.591207430998157, "iter_time": 1.6087102966308593, "loss": 0.32444190979003906, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 12.445224123861083, "step_time": 1.3690540466308594} +{"epoch": 0, "iter": 3219, "iter_tflops": 29.064149142083895, "iter_time": 0.5862268447875977, "loss": 0.45463329553604126, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 31.023960927211515, "step_time": 0.5491943626403808} +{"epoch": 0, "iter": 3220, "iter_tflops": 30.96822788549172, "iter_time": 0.5501827392578124, "loss": 0.4466870427131653, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 32.75943111734474, "step_time": 0.5201001319885254} +{"epoch": 0, "iter": 3221, "iter_tflops": 21.877401001440713, "iter_time": 0.9430321960449218, "loss": 0.13723620772361755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.011960424589628, "step_time": 0.8965378494262696} +{"epoch": 0, "iter": 3222, "iter_tflops": 17.37368860802386, "iter_time": 1.1874906921386719, "loss": 0.1627311110496521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.134224095093508, "step_time": 1.0246778526306153} +{"epoch": 0, "iter": 3223, "iter_tflops": 44.433520379067915, "iter_time": 0.4643137283325195, "loss": 0.1516660451889038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63858864137957, "step_time": 0.4241713027954101} +{"epoch": 0, "iter": 3224, "iter_tflops": 41.67418571719285, "iter_time": 0.4950569076538086, "loss": 0.12016603350639343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.572534606398534, "step_time": 0.4527089328765869} +{"epoch": 0, "iter": 3225, "iter_tflops": 18.836267610330555, "iter_time": 1.09528564453125, "loss": 0.7438521385192871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.3203500696408, "step_time": 1.0152922286987305} +{"epoch": 0, "iter": 3226, "iter_tflops": 19.446026776050445, "iter_time": 1.0609413299560546, "loss": 0.7526898384094238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.564802574980813, "step_time": 0.8070116500854493} +{"epoch": 0, "iter": 3227, "iter_tflops": 46.83963824256273, "iter_time": 0.44046227264404303, "loss": 0.84235680103302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7513720775411, "step_time": 0.40651301956176755} +{"epoch": 0, "iter": 3228, "iter_tflops": 47.11651789642382, "iter_time": 0.4378739013671875, "loss": 0.8563007116317749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52854023128756, "step_time": 0.40830574989318846} +{"epoch": 0, "iter": 3229, "iter_tflops": 28.757187796318792, "iter_time": 0.7174238891601562, "loss": 1.1727445125579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.692571726443077, "step_time": 0.6721852340698242} +{"epoch": 0, "iter": 3230, "iter_tflops": 12.015289876836091, "iter_time": 1.7170699768066404, "loss": 1.0944268703460693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.612701477005047, "step_time": 1.411860328674316} +{"epoch": 0, "iter": 3231, "iter_tflops": 15.465376652538346, "iter_time": 1.3340181732177734, "loss": 0.8576275110244751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10857744479306, "step_time": 1.205891815185547} +{"epoch": 0, "iter": 3232, "iter_tflops": 21.63295730165299, "iter_time": 0.9536880798339843, "loss": 0.9019759297370911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.346546071088156, "step_time": 0.7278168373107909} +{"epoch": 0, "iter": 3233, "iter_tflops": 25.319956402659795, "iter_time": 0.6113784027099609, "loss": 0.3735498785972595, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.238066362984366, "step_time": 0.5683250160217285} +{"epoch": 0, "iter": 3234, "iter_tflops": 23.266104164924396, "iter_time": 0.6653488006591798, "loss": 0.42937275767326355, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.083052977903616, "step_time": 0.6171527252197265} +{"epoch": 0, "iter": 3235, "iter_tflops": 24.03644620141285, "iter_time": 0.6440250930786133, "loss": 0.5755037665367126, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.711741529968375, "step_time": 0.6020624656677246} +{"epoch": 0, "iter": 3236, "iter_tflops": 22.09905799557046, "iter_time": 0.7004857177734375, "loss": 0.321087509393692, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 23.831848020330455, "step_time": 0.6495540962219238} +{"epoch": 0, "iter": 3237, "iter_tflops": 34.90791433142158, "iter_time": 0.5910147857666016, "loss": 0.4435991942882538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.593591457238894, "step_time": 0.534573040008545} +{"epoch": 0, "iter": 3238, "iter_tflops": 38.92809052670683, "iter_time": 0.5299795913696289, "loss": 0.4565950334072113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.500736604428546, "step_time": 0.48542908096313475} +{"epoch": 0, "iter": 3239, "iter_tflops": 39.17021790431057, "iter_time": 0.5267035675048828, "loss": 0.43317756056785583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.57427182054656, "step_time": 0.4845906372070312} +{"epoch": 0, "iter": 3240, "iter_tflops": 37.75436752973143, "iter_time": 0.546455810546875, "loss": 0.4516538977622986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27358754519249, "step_time": 0.4998618907928467} +{"epoch": 0, "iter": 3241, "iter_tflops": 18.164249755664173, "iter_time": 1.1358076324462891, "loss": 0.44171804189682007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41038186301006, "step_time": 1.0628896255493165} +{"epoch": 0, "iter": 3242, "iter_tflops": 21.264437834257564, "iter_time": 0.9702157974243165, "loss": 0.26338404417037964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.342066851308456, "step_time": 0.8141046123504638} +{"epoch": 0, "iter": 3243, "iter_tflops": 35.87226673330234, "iter_time": 0.5751265640258788, "loss": 0.37897685170173645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.3326098496449, "step_time": 0.5245289745330811} +{"epoch": 0, "iter": 3244, "iter_tflops": 39.80020247108246, "iter_time": 0.5183665466308593, "loss": 0.3187958598136902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.455938596530935, "step_time": 0.47475889778137204} +{"epoch": 0, "iter": 3245, "iter_tflops": 22.563888571885006, "iter_time": 0.9143412246704102, "loss": 0.8080537915229797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.523838874680095, "step_time": 0.8412668838500976} +{"epoch": 0, "iter": 3246, "iter_tflops": 32.58748235645147, "iter_time": 0.6330987243652344, "loss": 0.6411774754524231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.75970065704116, "step_time": 0.518894588470459} +{"epoch": 0, "iter": 3247, "iter_tflops": 46.06012306599877, "iter_time": 0.4479165954589844, "loss": 0.8517841100692749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.674022584089506, "step_time": 0.41532963180541993} +{"epoch": 0, "iter": 3248, "iter_tflops": 41.9681479295614, "iter_time": 0.4915893249511719, "loss": 0.8498317003250122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.987375968175755, "step_time": 0.4585973968505859} +{"epoch": 0, "iter": 3249, "iter_tflops": 34.498229901486866, "iter_time": 0.5980333938598633, "loss": 0.24205733835697174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.961420918143745, "step_time": 0.5581791229248048} +{"epoch": 0, "iter": 3250, "iter_tflops": 12.767705646496177, "iter_time": 1.6158810424804686, "loss": 0.27737367153167725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.87779076993216, "step_time": 1.15400687789917} +{"epoch": 0, "iter": 3251, "iter_tflops": 49.6770768644658, "iter_time": 0.41530409622192377, "loss": 0.24909290671348572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.954509262311724, "step_time": 0.38237941169738765} +{"epoch": 0, "iter": 3252, "iter_tflops": 51.94572559908398, "iter_time": 0.39716633605957025, "loss": 0.2564496695995331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.4313970659398, "step_time": 0.36559600830078126} +{"epoch": 0, "iter": 3253, "iter_tflops": 29.56808902290937, "iter_time": 0.6977486267089844, "loss": 0.8873990774154663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.38866492703028, "step_time": 0.6572784652709962} +{"epoch": 0, "iter": 3254, "iter_tflops": 24.633632552717668, "iter_time": 0.8375173034667969, "loss": 0.6607227325439453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.765865699475118, "step_time": 0.6931124973297118} +{"epoch": 0, "iter": 3255, "iter_tflops": 40.94908392381285, "iter_time": 0.5038230781555175, "loss": 0.877168595790863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.065649117265956, "step_time": 0.46818993759155275} +{"epoch": 0, "iter": 3256, "iter_tflops": 44.72929244093424, "iter_time": 0.46124345779418946, "loss": 0.7146656513214111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9996428738216, "step_time": 0.4298176460266113} +{"epoch": 0, "iter": 3257, "iter_tflops": 30.43584428564479, "iter_time": 0.6778551406860351, "loss": 0.967244565486908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.33072544935871, "step_time": 0.6381265258789062} +{"epoch": 0, "iter": 3258, "iter_tflops": 41.91086405585565, "iter_time": 0.49226123046875003, "loss": 0.9239931702613831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.312812815770684, "step_time": 0.44547269439697257} +{"epoch": 0, "iter": 3259, "iter_tflops": 46.910314997404875, "iter_time": 0.43979865646362304, "loss": 0.9739894270896912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.64695607858529, "step_time": 0.4073511047363282} +{"epoch": 0, "iter": 3260, "iter_tflops": 46.14485616813393, "iter_time": 0.4470941123962402, "loss": 0.8292282223701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.31164286128177, "step_time": 0.4183817920684815} +{"epoch": 0, "iter": 3261, "iter_tflops": 38.84076720609068, "iter_time": 0.5311711120605469, "loss": 0.8845039010047913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96354769711187, "step_time": 0.4916432151794434} +{"epoch": 0, "iter": 3262, "iter_tflops": 38.926684547154125, "iter_time": 0.5299987335205079, "loss": 0.6752908229827881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68216716425321, "step_time": 0.4949621124267578} +{"epoch": 0, "iter": 3263, "iter_tflops": 47.21849594590132, "iter_time": 0.43692822265625, "loss": 0.6559630632400513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24067107377711, "step_time": 0.4026312122344971} +{"epoch": 0, "iter": 3264, "iter_tflops": 47.985014942073704, "iter_time": 0.429948673248291, "loss": 0.8784626722335815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.637495227219986, "step_time": 0.39953706932067873} +{"epoch": 0, "iter": 3265, "iter_tflops": 28.334457583957757, "iter_time": 0.7281273498535156, "loss": 0.7435585260391235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.81552131812677, "step_time": 0.6919581680297853} +{"epoch": 0, "iter": 3266, "iter_tflops": 17.084317852427095, "iter_time": 1.2076041717529296, "loss": 0.6262438297271729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.389474363346377, "step_time": 0.9214639511108399} +{"epoch": 0, "iter": 3267, "iter_tflops": 48.8530956924151, "iter_time": 0.4223088264465332, "loss": 0.5947304964065552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8927313478239, "step_time": 0.39005536270141605} +{"epoch": 0, "iter": 3268, "iter_tflops": 52.34516409273225, "iter_time": 0.3941356163024903, "loss": 0.7323378920555115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.66194857684277, "step_time": 0.3641084365844727} +{"epoch": 0, "iter": 3269, "iter_tflops": 21.866020880869762, "iter_time": 0.9435229949951173, "loss": 0.4081189036369324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9094821667272, "step_time": 0.900548225402832} +{"epoch": 0, "iter": 3270, "iter_tflops": 13.09119780132678, "iter_time": 1.5759515533447266, "loss": 0.3478524088859558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.543578477393904, "step_time": 1.0556456451416016} +{"epoch": 0, "iter": 3271, "iter_tflops": 38.67719247066757, "iter_time": 0.5334175567626953, "loss": 0.3648255467414856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00245252753325, "step_time": 0.4797655086517334} +{"epoch": 0, "iter": 3272, "iter_tflops": 43.725451977436975, "iter_time": 0.4718325958251953, "loss": 0.35899460315704346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91536496550738, "step_time": 0.4305736484527588} +{"epoch": 0, "iter": 3273, "iter_tflops": 19.80870142962228, "iter_time": 1.0415167083740235, "loss": 0.35593271255493164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.945717614382144, "step_time": 0.9849790725708008} +{"epoch": 0, "iter": 3274, "iter_tflops": 10.863603849671343, "iter_time": 1.8991021575927736, "loss": 0.27738267183303833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.10343292151797, "step_time": 1.574480033874512} +{"epoch": 0, "iter": 3275, "iter_tflops": 14.091999538543256, "iter_time": 1.4640288238525392, "loss": 0.27584654092788696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.67768593229826, "step_time": 1.1670698070526122} +{"epoch": 0, "iter": 3276, "iter_tflops": 40.324408853903286, "iter_time": 0.5116279220581055, "loss": 0.31444230675697327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12159109733357, "step_time": 0.4675963172912597} +{"epoch": 0, "iter": 3277, "iter_tflops": 12.758102024619532, "iter_time": 1.2069395141601564, "loss": 0.31229543685913086, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 13.617593662565973, "step_time": 1.1307620010375976} +{"epoch": 0, "iter": 3278, "iter_tflops": 12.463518869166924, "iter_time": 1.2354662933349612, "loss": 0.20359176397323608, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 13.914218651520518, "step_time": 1.1066562805175781} +{"epoch": 0, "iter": 3279, "iter_tflops": 27.03356487073494, "iter_time": 0.5695977401733399, "loss": 0.36452940106391907, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.84078772728704, "step_time": 0.5339055786132813} +{"epoch": 0, "iter": 3280, "iter_tflops": 27.140433922672226, "iter_time": 0.5673548736572266, "loss": 0.4144026041030884, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.928020003315414, "step_time": 0.5322955894470215} +{"epoch": 0, "iter": 3281, "iter_tflops": 24.706519835728056, "iter_time": 0.8350465240478515, "loss": 0.6899281144142151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.040164949387062, "step_time": 0.7922796783447267} +{"epoch": 0, "iter": 3282, "iter_tflops": 20.152999604003348, "iter_time": 1.0237232131958007, "loss": 0.7448649406433105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.181129026045753, "step_time": 0.7590226840972901} +{"epoch": 0, "iter": 3283, "iter_tflops": 41.6299731851368, "iter_time": 0.49558267593383787, "loss": 0.8116070032119751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08065508369809, "step_time": 0.4576484851837158} +{"epoch": 0, "iter": 3284, "iter_tflops": 39.849678394265304, "iter_time": 0.5177229614257813, "loss": 1.107532024383545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.3888111164852, "step_time": 0.4754934043884278} +{"epoch": 0, "iter": 3285, "iter_tflops": 16.556030355032515, "iter_time": 1.2461376953125, "loss": 0.15541289746761322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.489864048292706, "step_time": 1.1796028518676758} +{"epoch": 0, "iter": 3286, "iter_tflops": 20.779755992524148, "iter_time": 0.9928458023071289, "loss": 0.16455213725566864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.8914036029201, "step_time": 0.863536268234253} +{"epoch": 0, "iter": 3287, "iter_tflops": 53.51890045541427, "iter_time": 0.38549172973632817, "loss": 0.11393770575523376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.32203062118581, "step_time": 0.3537444305419922} +{"epoch": 0, "iter": 3288, "iter_tflops": 48.618049897774966, "iter_time": 0.4243504943847656, "loss": 0.182174414396286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.82283776545621, "step_time": 0.3905714721679688} +{"epoch": 0, "iter": 3289, "iter_tflops": 34.48823569409643, "iter_time": 0.5982066955566406, "loss": 0.783153235912323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.9538605999276, "step_time": 0.5582933197021485} +{"epoch": 0, "iter": 3290, "iter_tflops": 35.541653353281696, "iter_time": 0.5804764709472656, "loss": 0.7833049297332764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33493872812354, "step_time": 0.5244979190826417} +{"epoch": 0, "iter": 3291, "iter_tflops": 38.993866289682636, "iter_time": 0.5290856094360352, "loss": 0.6814037561416626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.471601116836965, "step_time": 0.48576208496093753} +{"epoch": 0, "iter": 3292, "iter_tflops": 41.62188967068728, "iter_time": 0.49567892456054696, "loss": 0.7102776765823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57327691266239, "step_time": 0.45270155906677245} +{"epoch": 0, "iter": 3293, "iter_tflops": 32.79545939263045, "iter_time": 0.6290838394165039, "loss": 0.33331286907196045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.77627616502275, "step_time": 0.5766696739196777} +{"epoch": 0, "iter": 3294, "iter_tflops": 9.797873501287226, "iter_time": 2.1056705322265628, "loss": 0.3631284534931183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.88268253587348, "step_time": 1.7362319869995118} +{"epoch": 0, "iter": 3295, "iter_tflops": 12.42575479049112, "iter_time": 1.6603493194580077, "loss": 0.3912098705768585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.335119015889465, "step_time": 1.439199317932129} +{"epoch": 0, "iter": 3296, "iter_tflops": 30.409929669149545, "iter_time": 0.6784327926635743, "loss": 0.3192808926105499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.786152789500115, "step_time": 0.5930835075378418} +{"epoch": 0, "iter": 3297, "iter_tflops": 14.36171885027009, "iter_time": 1.1177855834960937, "loss": 0.3723547160625458, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.906727959534262, "step_time": 1.0769179077148439} +{"epoch": 0, "iter": 3298, "iter_tflops": 12.99104932591075, "iter_time": 1.2357217559814453, "loss": 0.44912949204444885, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 17.006865298118488, "step_time": 0.9439318771362304} +{"epoch": 0, "iter": 3299, "iter_tflops": 28.185670928135597, "iter_time": 0.5695561523437499, "loss": 0.6373275518417358, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.133301182037126, "step_time": 0.5327435646057129} +{"epoch": 0, "iter": 3300, "iter_tflops": 27.371106160796426, "iter_time": 0.5865061569213866, "loss": 0.5314452648162842, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.06227262242812, "step_time": 0.5523767013549804} +{"epoch": 0, "iter": 3301, "iter_tflops": 23.272125775983177, "iter_time": 0.5304089279174805, "loss": 0.007378096226602793, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 25.132800123420814, "step_time": 0.4911407890319823} +{"epoch": 0, "iter": 3302, "iter_tflops": 10.702296376586137, "iter_time": 1.1533733367919923, "loss": 0.0060499561950564384, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 13.83634933819533, "step_time": 0.8921242866516113} +{"epoch": 0, "iter": 3303, "iter_tflops": 33.21035656221246, "iter_time": 0.37168355178833007, "loss": 0.05343854799866676, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 36.509508796204074, "step_time": 0.33809666824340817} +{"epoch": 0, "iter": 3304, "iter_tflops": 38.22892056358792, "iter_time": 0.3228901863098145, "loss": 0.004187194164842367, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 42.03495463344527, "step_time": 0.2936542549133301} +{"epoch": 0, "iter": 3305, "iter_tflops": 27.03081051238898, "iter_time": 0.7632436141967773, "loss": 0.403879851102829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.463699040841163, "step_time": 0.7248212356567383} +{"epoch": 0, "iter": 3306, "iter_tflops": 15.25418110992225, "iter_time": 1.3524877777099609, "loss": 0.4969249963760376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.27919377105125, "step_time": 1.0701222133636474} +{"epoch": 0, "iter": 3307, "iter_tflops": 43.32540923645275, "iter_time": 0.47618923568725585, "loss": 0.4138958752155304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.606774189428315, "step_time": 0.4426629791259766} +{"epoch": 0, "iter": 3308, "iter_tflops": 47.76997718404818, "iter_time": 0.4318840980529785, "loss": 0.3490009307861328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60459008604509, "step_time": 0.39979183006286617} +{"epoch": 0, "iter": 3309, "iter_tflops": 24.620141329177585, "iter_time": 0.8379762420654298, "loss": 0.8175352215766907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.836230616574845, "step_time": 0.7985334167480469} +{"epoch": 0, "iter": 3310, "iter_tflops": 14.756579021086765, "iter_time": 1.3980946044921874, "loss": 0.8775489330291748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.896153608535027, "step_time": 1.091814447402954} +{"epoch": 0, "iter": 3311, "iter_tflops": 40.312313073133794, "iter_time": 0.5117814369201661, "loss": 0.8683063983917236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.769456196406104, "step_time": 0.47135823249816894} +{"epoch": 0, "iter": 3312, "iter_tflops": 38.45326179438862, "iter_time": 0.5365238876342774, "loss": 0.9205742478370667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.87776998260364, "step_time": 0.4926502418518066} +{"epoch": 0, "iter": 3313, "iter_tflops": 32.37615164829355, "iter_time": 0.6372311859130859, "loss": 0.28239619731903076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.19228198068546, "step_time": 0.5862391510009766} +{"epoch": 0, "iter": 3314, "iter_tflops": 12.83131419443941, "iter_time": 1.607870651245117, "loss": 0.40795180201530457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.839097206316437, "step_time": 1.3903199920654297} +{"epoch": 0, "iter": 3315, "iter_tflops": 29.617356879198915, "iter_time": 0.6965879364013672, "loss": 0.35858821868896484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.318084010645656, "step_time": 0.5680666828155518} +{"epoch": 0, "iter": 3316, "iter_tflops": 40.09072438192701, "iter_time": 0.5146101455688477, "loss": 0.26616591215133667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.044010227865286, "step_time": 0.4684199600219726} +{"epoch": 0, "iter": 3317, "iter_tflops": 7.903076917800952, "iter_time": 1.9018307037353517, "loss": 0.3573285937309265, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 8.268370925163367, "step_time": 1.81780842590332} +{"epoch": 0, "iter": 3318, "iter_tflops": 24.526113062987854, "iter_time": 0.6128290405273438, "loss": 0.6894354820251465, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 26.253211559422997, "step_time": 0.5725133590698243} +{"epoch": 0, "iter": 3319, "iter_tflops": 28.04636745716576, "iter_time": 0.5359094848632813, "loss": 0.3665536046028137, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.863837032297557, "step_time": 0.5032948150634765} +{"epoch": 0, "iter": 3320, "iter_tflops": 25.092609417680713, "iter_time": 0.5989936752319336, "loss": 0.47371456027030945, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 26.623229913063675, "step_time": 0.5645563812255859} +{"epoch": 0, "iter": 3321, "iter_tflops": 21.169788324416384, "iter_time": 0.9745536041259767, "loss": 0.23471122980117798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.15165056165184, "step_time": 0.9313569412231445} +{"epoch": 0, "iter": 3322, "iter_tflops": 12.613705549286783, "iter_time": 1.6356092529296875, "loss": 0.19831794500350952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.625597892521693, "step_time": 1.240923402786255} +{"epoch": 0, "iter": 3323, "iter_tflops": 40.42614956086862, "iter_time": 0.5103403053283692, "loss": 0.19235146045684814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.082320623579164, "step_time": 0.46801287269592284} +{"epoch": 0, "iter": 3324, "iter_tflops": 46.010420503020676, "iter_time": 0.4484004554748535, "loss": 0.26718172430992126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.18722505911178, "step_time": 0.4110825710296631} +{"epoch": 0, "iter": 3325, "iter_tflops": 34.44738156680243, "iter_time": 0.5989161605834962, "loss": 0.04662579670548439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.38222685496768, "step_time": 0.5375168457031251} +{"epoch": 0, "iter": 3326, "iter_tflops": 42.34113688756552, "iter_time": 0.48725884628295896, "loss": 0.028386365622282028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23804516440807, "step_time": 0.43674740219116204} +{"epoch": 0, "iter": 3327, "iter_tflops": 45.45777162104375, "iter_time": 0.45385184478759766, "loss": 0.039674289524555206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21332099542616, "step_time": 0.4108689308166504} +{"epoch": 0, "iter": 3328, "iter_tflops": 48.16357771690726, "iter_time": 0.4283546714782715, "loss": 0.015280838124454021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.814420087418554, "step_time": 0.3906337223052979} +{"epoch": 0, "iter": 3329, "iter_tflops": 31.832661194998785, "iter_time": 0.6481108627319336, "loss": 0.20607414841651917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.41145724561977, "step_time": 0.5826106891632079} +{"epoch": 0, "iter": 3330, "iter_tflops": 39.336415124156, "iter_time": 0.5244782333374023, "loss": 0.21004682779312134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.60125943266369, "step_time": 0.47317654991149904} +{"epoch": 0, "iter": 3331, "iter_tflops": 38.15086994343838, "iter_time": 0.5407764892578125, "loss": 0.21377629041671753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.63450970305394, "step_time": 0.49552867698669434} +{"epoch": 0, "iter": 3332, "iter_tflops": 42.0097579579347, "iter_time": 0.49110241317749026, "loss": 0.16199713945388794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.81984919709676, "step_time": 0.45026541709899903} +{"epoch": 0, "iter": 3333, "iter_tflops": 23.046132185228277, "iter_time": 0.8952085037231445, "loss": 0.12876029312610626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.03573262502658, "step_time": 0.8240658988952637} +{"epoch": 0, "iter": 3334, "iter_tflops": 25.706299873251762, "iter_time": 0.8025695495605468, "loss": 0.2285395860671997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.698165282840005, "step_time": 0.6508608093261719} +{"epoch": 0, "iter": 3335, "iter_tflops": 53.33782303151606, "iter_time": 0.3868004417419433, "loss": 0.1824161559343338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88366283922196, "step_time": 0.3564234275817871} +{"epoch": 0, "iter": 3336, "iter_tflops": 54.511206668232205, "iter_time": 0.37847434997558593, "loss": 0.1864955574274063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01996193362942, "step_time": 0.34956128120422364} +{"epoch": 0, "iter": 3337, "iter_tflops": 25.253062002131998, "iter_time": 0.8169739379882812, "loss": 0.19731342792510986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.711254675519832, "step_time": 0.7723745574951171} +{"epoch": 0, "iter": 3338, "iter_tflops": 14.275777199245242, "iter_time": 1.4451818084716797, "loss": 0.21814832091331482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.083088774765983, "step_time": 1.2076910552978515} +{"epoch": 0, "iter": 3339, "iter_tflops": 38.533266122971895, "iter_time": 0.5354099349975586, "loss": 0.23604056239128113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.14424239362157, "step_time": 0.48953528022766113} +{"epoch": 0, "iter": 3340, "iter_tflops": 43.547212296701815, "iter_time": 0.4737638168334961, "loss": 0.23815757036209106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73949206248593, "step_time": 0.43215988731384275} +{"epoch": 0, "iter": 3341, "iter_tflops": 16.635987796161775, "iter_time": 1.240148391723633, "loss": 0.8182725310325623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.91326705403818, "step_time": 1.1517214279174803} +{"epoch": 0, "iter": 3342, "iter_tflops": 31.076741043342825, "iter_time": 0.6638757095336915, "loss": 0.7538104057312012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.8214521316135, "step_time": 0.5924822845458984} +{"epoch": 0, "iter": 3343, "iter_tflops": 45.19863607349187, "iter_time": 0.4564538955688477, "loss": 0.8215717077255249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63023642581464, "step_time": 0.42424415397644044} +{"epoch": 0, "iter": 3344, "iter_tflops": 44.07839822896079, "iter_time": 0.4680545196533203, "loss": 0.8006852865219116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35871689930124, "step_time": 0.4356345539093018} +{"epoch": 0, "iter": 3345, "iter_tflops": 39.69756471290891, "iter_time": 0.5197067794799806, "loss": 0.12310784310102463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05964157295308, "step_time": 0.4791283149719238} +{"epoch": 0, "iter": 3346, "iter_tflops": 32.981579192693886, "iter_time": 0.6255338287353515, "loss": 0.17976593971252441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65336196301044, "step_time": 0.4836920833587647} +{"epoch": 0, "iter": 3347, "iter_tflops": 50.12033714182781, "iter_time": 0.4116311798095703, "loss": 0.17861849069595337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.436572187311896, "step_time": 0.37899325180053706} +{"epoch": 0, "iter": 3348, "iter_tflops": 51.526081701004706, "iter_time": 0.40040097808837893, "loss": 0.12192658334970474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.01549086600451, "step_time": 0.3683105010986328} +{"epoch": 0, "iter": 3349, "iter_tflops": 24.056995012314964, "iter_time": 0.8575922927856446, "loss": 0.7602056264877319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.261043302646446, "step_time": 0.8167158126831054} +{"epoch": 0, "iter": 3350, "iter_tflops": 13.749299373778515, "iter_time": 1.5005196228027344, "loss": 0.9461866021156311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.59890706892762, "step_time": 1.1722940196990967} +{"epoch": 0, "iter": 3351, "iter_tflops": 37.605383125004394, "iter_time": 0.5486207504272461, "loss": 0.8938320875167847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.25293082469255, "step_time": 0.5001121883392334} +{"epoch": 0, "iter": 3352, "iter_tflops": 43.60526365855889, "iter_time": 0.4731330986022949, "loss": 0.8076761364936829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43245654206383, "step_time": 0.43495730590820314} +{"epoch": 0, "iter": 3353, "iter_tflops": 19.224943598374878, "iter_time": 1.0731419525146484, "loss": 0.4704643189907074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.339288528321575, "step_time": 1.0143468627929688} +{"epoch": 0, "iter": 3354, "iter_tflops": 16.56676295223121, "iter_time": 1.24533039855957, "loss": 0.4527971148490906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.412547599922178, "step_time": 1.010706449508667} +{"epoch": 0, "iter": 3355, "iter_tflops": 48.14394852036095, "iter_time": 0.4285293197631836, "loss": 0.5117387175559998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.244217289468835, "step_time": 0.39489716911315914} +{"epoch": 0, "iter": 3356, "iter_tflops": 51.02195254009687, "iter_time": 0.4043571929931641, "loss": 0.49361661076545715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.05761257919314, "step_time": 0.3747182731628418} +{"epoch": 0, "iter": 3357, "iter_tflops": 27.43534443603076, "iter_time": 0.7519895935058593, "loss": 0.16879968345165253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.187974187014163, "step_time": 0.7068354034423827} +{"epoch": 0, "iter": 3358, "iter_tflops": 16.102891824867672, "iter_time": 1.2812042541503907, "loss": 0.12494228035211563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.246714336037844, "step_time": 0.9273771038055421} +{"epoch": 0, "iter": 3359, "iter_tflops": 41.52482985837195, "iter_time": 0.4968375205993652, "loss": 0.14863578975200653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.77890936814204, "step_time": 0.4506680870056152} +{"epoch": 0, "iter": 3360, "iter_tflops": 43.38179229220407, "iter_time": 0.4755703353881836, "loss": 0.11766305565834045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19012804787324, "step_time": 0.43719087791442873} +{"epoch": 0, "iter": 3361, "iter_tflops": 16.737228917989977, "iter_time": 1.1016195373535158, "loss": 0.2564215660095215, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 18.14871553719382, "step_time": 1.0159428825378418} +{"epoch": 0, "iter": 3362, "iter_tflops": 13.80894469393528, "iter_time": 1.3352257385253905, "loss": 0.21900247037410736, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 17.052033591331206, "step_time": 1.0812820816040039} +{"epoch": 0, "iter": 3363, "iter_tflops": 43.462780584122704, "iter_time": 0.4242263870239258, "loss": 0.18899141252040863, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 47.18350155818639, "step_time": 0.3907734222412109} +{"epoch": 0, "iter": 3364, "iter_tflops": 41.72408797974044, "iter_time": 0.44190440750122073, "loss": 0.254997193813324, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 45.03968069577953, "step_time": 0.4093736476898193} +{"epoch": 0, "iter": 3365, "iter_tflops": 11.087108582071362, "iter_time": 0.9304091033935546, "loss": 0.015412922948598862, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 11.595353461212262, "step_time": 0.8896276245117187} +{"epoch": 0, "iter": 3366, "iter_tflops": 6.196334933938469, "iter_time": 1.6647819824218748, "loss": 0.0018896795809268951, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 8.482188213030678, "step_time": 1.2161421670913697} +{"epoch": 0, "iter": 3367, "iter_tflops": 26.389548897443618, "iter_time": 0.3908951530456543, "loss": 0.0046792714856565, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 29.546762796709434, "step_time": 0.3491261234283447} +{"epoch": 0, "iter": 3368, "iter_tflops": 27.21823294524828, "iter_time": 0.37899399185180666, "loss": 0.04278564453125, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 30.25348621808404, "step_time": 0.34097051429748537} +{"epoch": 0, "iter": 3369, "iter_tflops": 36.24035860412951, "iter_time": 0.569285026550293, "loss": 0.18909220397472382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47871112523696, "step_time": 0.522587818145752} +{"epoch": 0, "iter": 3370, "iter_tflops": 11.563097744516648, "iter_time": 1.784218551635742, "loss": 0.2921682298183441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.154270927559091, "step_time": 1.5683950576782226} +{"epoch": 0, "iter": 3371, "iter_tflops": 16.184792094614355, "iter_time": 1.274720947265625, "loss": 0.13914047181606293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.327486803584385, "step_time": 1.067448329925537} +{"epoch": 0, "iter": 3372, "iter_tflops": 41.257554233093174, "iter_time": 0.5000561447143554, "loss": 0.19894568622112274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03383832158069, "step_time": 0.45812425231933585} +{"epoch": 0, "iter": 3373, "iter_tflops": 13.62787538937058, "iter_time": 1.093916046142578, "loss": 0.4111182987689972, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.69529373078515, "step_time": 1.0144575424194335} +{"epoch": 0, "iter": 3374, "iter_tflops": 8.557643323040308, "iter_time": 1.7420393676757813, "loss": 0.33509963750839233, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 10.873122300217299, "step_time": 1.3710644607543945} +{"epoch": 0, "iter": 3375, "iter_tflops": 21.742063466049032, "iter_time": 0.6856640625, "loss": 0.5076225996017456, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.455585081674407, "step_time": 0.6355736389160156} +{"epoch": 0, "iter": 3376, "iter_tflops": 24.30561136314163, "iter_time": 0.6133460845947265, "loss": 0.37316179275512695, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 26.044036464634036, "step_time": 0.5724055709838867} +{"epoch": 0, "iter": 3377, "iter_tflops": 26.551896367840044, "iter_time": 0.7770101699829102, "loss": 0.7836248874664307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.48621995255801, "step_time": 0.7242481994628907} +{"epoch": 0, "iter": 3378, "iter_tflops": 32.12227797115857, "iter_time": 0.6422674484252929, "loss": 1.0095406770706177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.373564773378725, "step_time": 0.5110050010681151} +{"epoch": 0, "iter": 3379, "iter_tflops": 37.87307447497149, "iter_time": 0.5447430343627929, "loss": 0.8981257081031799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37125417964419, "step_time": 0.498681848526001} +{"epoch": 0, "iter": 3380, "iter_tflops": 38.08893569323444, "iter_time": 0.5416558151245117, "loss": 1.0365077257156372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.21080085666795, "step_time": 0.5006234550476074} +{"epoch": 0, "iter": 3381, "iter_tflops": 20.38229214219049, "iter_time": 1.0122067413330078, "loss": 0.8720134496688843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.378351993919697, "step_time": 0.921921932220459} +{"epoch": 0, "iter": 3382, "iter_tflops": 21.619989300345093, "iter_time": 0.9542601165771484, "loss": 1.0589343309402466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.304935895104634, "step_time": 0.815299180984497} +{"epoch": 0, "iter": 3383, "iter_tflops": 38.78117251524464, "iter_time": 0.5319873580932617, "loss": 0.8812733292579651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.354092436236684, "step_time": 0.4871098003387452} +{"epoch": 0, "iter": 3384, "iter_tflops": 35.65542196904368, "iter_time": 0.578624298095703, "loss": 0.8556808233261108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.709664002822144, "step_time": 0.532970100402832} +{"epoch": 0, "iter": 3385, "iter_tflops": 20.777738001411354, "iter_time": 0.9929422302246094, "loss": 0.007385374046862125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.90272861578671, "step_time": 0.9419417037963868} +{"epoch": 0, "iter": 3386, "iter_tflops": 14.91588315380751, "iter_time": 1.3831627197265624, "loss": 0.004207611549645662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.434790292248028, "step_time": 1.1191390399932861} +{"epoch": 0, "iter": 3387, "iter_tflops": 43.95057986472119, "iter_time": 0.46941572952270505, "loss": 0.0036671545822173357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.439334034455044, "step_time": 0.4259161262512208} +{"epoch": 0, "iter": 3388, "iter_tflops": 42.318983129528874, "iter_time": 0.4875139236450195, "loss": 0.011756214313209057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12590029540395, "step_time": 0.4377867240905761} +{"epoch": 0, "iter": 3389, "iter_tflops": 17.500873170884855, "iter_time": 1.1788608093261719, "loss": 0.3018970787525177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.76156767539394, "step_time": 1.0996465682983398} +{"epoch": 0, "iter": 3390, "iter_tflops": 32.28044753690699, "iter_time": 0.6391204299926758, "loss": 0.25290647149086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.97337445463671, "step_time": 0.5161208877563477} +{"epoch": 0, "iter": 3391, "iter_tflops": 50.02070703489776, "iter_time": 0.41245105743408206, "loss": 0.15558142960071564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22797276182079, "step_time": 0.38045112991333013} +{"epoch": 0, "iter": 3392, "iter_tflops": 49.2801832167887, "iter_time": 0.41864888000488276, "loss": 0.2988741993904114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.52312280071957, "step_time": 0.3854613189697265} +{"epoch": 0, "iter": 3393, "iter_tflops": 28.99164338719207, "iter_time": 0.7116220779418946, "loss": 0.7940301895141602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.648357288480387, "step_time": 0.6731549530029296} +{"epoch": 0, "iter": 3394, "iter_tflops": 14.238895695644842, "iter_time": 1.4489251098632814, "loss": 0.9285637140274048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.96477493482183, "step_time": 1.0333747100830077} +{"epoch": 0, "iter": 3395, "iter_tflops": 48.022909713723, "iter_time": 0.4296094017028809, "loss": 0.8815939426422119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.1310894181722, "step_time": 0.3957541217803955} +{"epoch": 0, "iter": 3396, "iter_tflops": 49.886769555440985, "iter_time": 0.41355841827392575, "loss": 0.8573905229568481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.734411380184994, "step_time": 0.38394565010070797} +{"epoch": 0, "iter": 3397, "iter_tflops": 25.117737021000128, "iter_time": 0.82137548828125, "loss": 0.25240153074264526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.462123538313726, "step_time": 0.7796461791992187} +{"epoch": 0, "iter": 3398, "iter_tflops": 12.01313694737358, "iter_time": 1.7173777008056639, "loss": 0.15190938115119934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.058063839362362, "step_time": 1.370102672576904} +{"epoch": 0, "iter": 3399, "iter_tflops": 36.858152915196975, "iter_time": 0.5597430114746094, "loss": 0.2526617646217346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.38338214807483, "step_time": 0.5108807735443115} +{"epoch": 0, "iter": 3400, "iter_tflops": 39.14295406381556, "iter_time": 0.527070426940918, "loss": 0.21567794680595398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88495163352711, "step_time": 0.4810800228118897} +{"epoch": 0, "iter": 3401, "iter_tflops": 29.765067329513965, "iter_time": 0.693131088256836, "loss": 0.05301651358604431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.21930297868796, "step_time": 0.6403333282470703} +{"epoch": 0, "iter": 3402, "iter_tflops": 14.691819138289944, "iter_time": 1.4042572479248046, "loss": 0.053063247352838516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.91513983790965, "step_time": 1.219682113647461} +{"epoch": 0, "iter": 3403, "iter_tflops": 52.95850200786616, "iter_time": 0.3895709419250488, "loss": 0.034388523548841476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.01796180382052, "step_time": 0.35559838485717776} +{"epoch": 0, "iter": 3404, "iter_tflops": 58.3626507377979, "iter_time": 0.3534982261657714, "loss": 0.03585267812013626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.820502732593795, "step_time": 0.32326748657226567} +{"epoch": 0, "iter": 3405, "iter_tflops": 18.859173304414007, "iter_time": 0.7731582336425782, "loss": 0.4408566951751709, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 19.923180917258247, "step_time": 0.7318673248291016} +{"epoch": 0, "iter": 3406, "iter_tflops": 6.176128131549942, "iter_time": 2.3608844909667965, "loss": 0.3745359480381012, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 8.046168141398859, "step_time": 1.812182502746582} +{"epoch": 0, "iter": 3407, "iter_tflops": 8.099884530043397, "iter_time": 1.8001645660400392, "loss": 0.5407044291496277, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 9.4702523957313, "step_time": 1.5396765060424804} +{"epoch": 0, "iter": 3408, "iter_tflops": 17.494593602977304, "iter_time": 0.8334646377563477, "loss": 0.43240994215011597, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 19.28050595435351, "step_time": 0.7562625770568847} +{"epoch": 0, "iter": 3409, "iter_tflops": 22.058901771379077, "iter_time": 0.6961976699829102, "loss": 0.40225985646247864, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.44367457106907, "step_time": 0.6550746116638183} +{"epoch": 0, "iter": 3410, "iter_tflops": 7.923201303706641, "iter_time": 1.938276641845703, "loss": 0.4687178432941437, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 10.240178653654427, "step_time": 1.4997156333923338} +{"epoch": 0, "iter": 3411, "iter_tflops": 21.346669144932402, "iter_time": 0.7194263381958008, "loss": 0.47662341594696045, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.057928782153763, "step_time": 0.6660336303710938} +{"epoch": 0, "iter": 3412, "iter_tflops": 21.17472405093327, "iter_time": 0.7252682952880858, "loss": 0.4565017521381378, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 22.799309630983725, "step_time": 0.6735886421203613} +{"epoch": 0, "iter": 3413, "iter_tflops": 18.797408805257287, "iter_time": 1.0975498657226561, "loss": 0.03322711959481239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.188570102197037, "step_time": 1.0219195022583007} +{"epoch": 0, "iter": 3414, "iter_tflops": 32.62382356259014, "iter_time": 0.6323934860229492, "loss": 0.03490130603313446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.828299265903084, "step_time": 0.5601967487335205} +{"epoch": 0, "iter": 3415, "iter_tflops": 54.50808995120382, "iter_time": 0.3784959907531738, "loss": 0.03156465291976929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.70769144521672, "step_time": 0.34553493881225583} +{"epoch": 0, "iter": 3416, "iter_tflops": 54.82763824680942, "iter_time": 0.3762900276184082, "loss": 0.05145008862018585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.15078094079968, "step_time": 0.3429896202087402} +{"epoch": 0, "iter": 3417, "iter_tflops": 28.710133422638076, "iter_time": 0.718599708557129, "loss": 0.8212664127349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.41275877752873, "step_time": 0.6783696823120117} +{"epoch": 0, "iter": 3418, "iter_tflops": 17.079847914405747, "iter_time": 1.2079202117919923, "loss": 0.8808885812759399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.053982437138718, "step_time": 1.082770679473877} +{"epoch": 0, "iter": 3419, "iter_tflops": 35.384162551186535, "iter_time": 0.5830601043701171, "loss": 1.035122036933899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.2557351861622, "step_time": 0.539294132232666} +{"epoch": 0, "iter": 3420, "iter_tflops": 36.71951624661221, "iter_time": 0.5618563537597656, "loss": 0.9115101099014282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10150719041627, "step_time": 0.514471773147583} +{"epoch": 0, "iter": 3421, "iter_tflops": 17.996632887568758, "iter_time": 1.1463863067626954, "loss": 0.4221816956996918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.326679065282537, "step_time": 1.0674929428100586} +{"epoch": 0, "iter": 3422, "iter_tflops": 22.951173442385937, "iter_time": 0.8989123611450195, "loss": 0.3392338454723358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.154911995480024, "step_time": 0.7327706623077392} +{"epoch": 0, "iter": 3423, "iter_tflops": 47.50759567111377, "iter_time": 0.4342693672180176, "loss": 0.5062690377235413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.46840018894295, "step_time": 0.4008497142791748} +{"epoch": 0, "iter": 3424, "iter_tflops": 47.809161703816685, "iter_time": 0.4315301246643066, "loss": 0.4049301743507385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.92304901866171, "step_time": 0.39733979225158694} +{"epoch": 0, "iter": 3425, "iter_tflops": 45.77897020530368, "iter_time": 0.45066748809814455, "loss": 0.37377384305000305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.00746001428939, "step_time": 0.41256031608581545} +{"epoch": 0, "iter": 3426, "iter_tflops": 12.319336927284084, "iter_time": 1.674691879272461, "loss": 0.38216274976730347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.372676673026783, "step_time": 1.542779655456543} +{"epoch": 0, "iter": 3427, "iter_tflops": 17.17325923135272, "iter_time": 1.2013499145507813, "loss": 0.3781474828720093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.159783936952532, "step_time": 1.0767915534973145} +{"epoch": 0, "iter": 3428, "iter_tflops": 16.382530383185593, "iter_time": 1.2593349761962889, "loss": 0.2881908714771271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.398999034538534, "step_time": 1.1213160820007324} +{"epoch": 0, "iter": 3429, "iter_tflops": 16.833981440485996, "iter_time": 0.846787239074707, "loss": 0.3497532606124878, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 17.702268749870402, "step_time": 0.8052527542114258} +{"epoch": 0, "iter": 3430, "iter_tflops": 9.278526066540294, "iter_time": 1.5363216705322267, "loss": 0.2919875681400299, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 11.607710044910325, "step_time": 1.2280458946228028} +{"epoch": 0, "iter": 3431, "iter_tflops": 21.60725958609262, "iter_time": 0.6597227478027343, "loss": 0.4853450357913971, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.326164731460064, "step_time": 0.6111077766418457} +{"epoch": 0, "iter": 3432, "iter_tflops": 20.43092546929641, "iter_time": 0.6977070465087891, "loss": 0.40333670377731323, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 21.92116385914087, "step_time": 0.6502757225036622} +{"epoch": 0, "iter": 3433, "iter_tflops": 18.107450487744806, "iter_time": 1.1393704223632815, "loss": 0.5826085805892944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.383362041544594, "step_time": 1.0643712615966796} +{"epoch": 0, "iter": 3434, "iter_tflops": 17.73972930972647, "iter_time": 1.162988067626953, "loss": 0.5401875972747803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.292017340051817, "step_time": 0.8157156162261963} +{"epoch": 0, "iter": 3435, "iter_tflops": 49.92245698452134, "iter_time": 0.41326278305053704, "loss": 0.7577512860298157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.086002469161045, "step_time": 0.3814497756958008} +{"epoch": 0, "iter": 3436, "iter_tflops": 45.316731388421665, "iter_time": 0.4552643775939942, "loss": 0.6034895181655884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.77835125114644, "step_time": 0.42295594215393073} +{"epoch": 0, "iter": 3437, "iter_tflops": 32.51656165333352, "iter_time": 0.6344795532226563, "loss": 0.11808788031339645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.806007040216414, "step_time": 0.5927451972961426} +{"epoch": 0, "iter": 3438, "iter_tflops": 13.618307743957821, "iter_time": 1.5149528045654295, "loss": 0.07354652881622314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.34637273467395, "step_time": 1.2621205844879153} +{"epoch": 0, "iter": 3439, "iter_tflops": 48.37780203116336, "iter_time": 0.4264578514099121, "loss": 0.08086935430765152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.79249132887547, "step_time": 0.39079598236083984} +{"epoch": 0, "iter": 3440, "iter_tflops": 51.4065078485393, "iter_time": 0.40133232879638675, "loss": 0.06524398177862167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.04198431014126, "step_time": 0.36813638496398926} +{"epoch": 0, "iter": 3441, "iter_tflops": 42.93965215526454, "iter_time": 0.4804671783447265, "loss": 0.5361522436141968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79389866144849, "step_time": 0.4408928108215333} +{"epoch": 0, "iter": 3442, "iter_tflops": 36.28640975984872, "iter_time": 0.5685625457763672, "loss": 0.5242502093315125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.93944136672599, "step_time": 0.5165593910217285} +{"epoch": 0, "iter": 3443, "iter_tflops": 40.20714816403725, "iter_time": 0.5131200408935548, "loss": 0.6432130336761475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.014630873693896, "step_time": 0.46873262596130366} +{"epoch": 0, "iter": 3444, "iter_tflops": 40.856067633637515, "iter_time": 0.5049701232910156, "loss": 0.6081194281578064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72556069343153, "step_time": 0.4612819423675537} +{"epoch": 0, "iter": 3445, "iter_tflops": 20.117632953302216, "iter_time": 1.0255229110717774, "loss": 0.8440316915512085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.599006917423626, "step_time": 0.9551871337890625} +{"epoch": 0, "iter": 3446, "iter_tflops": 14.374870631425892, "iter_time": 1.4352194213867189, "loss": 0.8156693577766418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.328455902366542, "step_time": 1.0673948097229005} +{"epoch": 0, "iter": 3447, "iter_tflops": 41.87259334977582, "iter_time": 0.49271114730834953, "loss": 0.8932033181190491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89766508338236, "step_time": 0.459513729095459} +{"epoch": 0, "iter": 3448, "iter_tflops": 44.97134589612647, "iter_time": 0.4587608642578125, "loss": 0.919810950756073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.613779839827814, "step_time": 0.424387767791748} +{"epoch": 0, "iter": 3449, "iter_tflops": 44.498790627277984, "iter_time": 0.4636326789855957, "loss": 0.19901524484157562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83677570067984, "step_time": 0.422449951171875} +{"epoch": 0, "iter": 3450, "iter_tflops": 37.11694327957424, "iter_time": 0.5558403167724609, "loss": 0.24083030223846436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60043862136985, "step_time": 0.49593451881408684} +{"epoch": 0, "iter": 3451, "iter_tflops": 38.49089102694752, "iter_time": 0.5359993743896485, "loss": 0.18810878694057465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.23212939632984, "step_time": 0.4885165348052979} +{"epoch": 0, "iter": 3452, "iter_tflops": 41.071298796260024, "iter_time": 0.5023238639831543, "loss": 0.21330176293849945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.784297303739756, "step_time": 0.4606769504547119} +{"epoch": 0, "iter": 3453, "iter_tflops": 27.74061657987662, "iter_time": 0.7437143096923828, "loss": 0.05860140174627304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.827679136514597, "step_time": 0.6916761245727541} +{"epoch": 0, "iter": 3454, "iter_tflops": 8.701357899022353, "iter_time": 2.3710199890136723, "loss": 0.05413954704999924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.78656005581492, "step_time": 1.912666633605957} +{"epoch": 0, "iter": 3455, "iter_tflops": 16.52303986540669, "iter_time": 1.2486257781982422, "loss": 0.08506956696510315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.734013056332305, "step_time": 1.1012639656066894} +{"epoch": 0, "iter": 3456, "iter_tflops": 42.649437482146354, "iter_time": 0.48373659133911134, "loss": 0.10011190176010132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.95016460977265, "step_time": 0.439425371170044} +{"epoch": 0, "iter": 3457, "iter_tflops": 12.576474000198933, "iter_time": 1.2634258880615232, "loss": 0.4356478750705719, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.331223360226367, "step_time": 1.1918968276977537} +{"epoch": 0, "iter": 3458, "iter_tflops": 11.051052545690407, "iter_time": 1.4378216705322264, "loss": 0.4053964912891388, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.372659173698702, "step_time": 1.1055325698852538} +{"epoch": 0, "iter": 3459, "iter_tflops": 27.866169118526898, "iter_time": 0.5702054977416992, "loss": 0.3737679123878479, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.626304498431487, "step_time": 0.5363288841247559} +{"epoch": 0, "iter": 3460, "iter_tflops": 27.611359681538637, "iter_time": 0.5754675979614257, "loss": 0.4493027329444885, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.295464945372967, "step_time": 0.5423857536315918} +{"epoch": 0, "iter": 3461, "iter_tflops": 28.19134612910699, "iter_time": 0.7318236389160157, "loss": 0.23884524405002594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.811694617458258, "step_time": 0.692046989440918} +{"epoch": 0, "iter": 3462, "iter_tflops": 15.180602577079394, "iter_time": 1.3590431213378908, "loss": 0.17473480105400085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.556892273833, "step_time": 1.054926990509033} +{"epoch": 0, "iter": 3463, "iter_tflops": 39.11750713782591, "iter_time": 0.5274132995605469, "loss": 0.2167811095714569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83421499837552, "step_time": 0.4816498565673828} +{"epoch": 0, "iter": 3464, "iter_tflops": 40.18313113312686, "iter_time": 0.5134267272949219, "loss": 0.23821134865283966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.473953422660855, "step_time": 0.4745621662139893} +{"epoch": 0, "iter": 3465, "iter_tflops": 15.21104142873828, "iter_time": 1.1093160247802734, "loss": 0.03549828380346298, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 16.46538504154067, "step_time": 1.0248076171874998} +{"epoch": 0, "iter": 3466, "iter_tflops": 15.911865093161342, "iter_time": 1.060457206726074, "loss": 0.03882893919944763, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 19.57603049736171, "step_time": 0.8619649429321288} +{"epoch": 0, "iter": 3467, "iter_tflops": 45.66985264221453, "iter_time": 0.3694746322631836, "loss": 0.03778935968875885, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 50.02978639507141, "step_time": 0.3372761154174804} +{"epoch": 0, "iter": 3468, "iter_tflops": 41.852508441593294, "iter_time": 0.40317420959472655, "loss": 0.01810251548886299, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 45.847746126657, "step_time": 0.368041036605835} +{"epoch": 0, "iter": 3469, "iter_tflops": 11.093956762032914, "iter_time": 0.5691694412231446, "loss": 0.004030600190162659, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 11.994224453952366, "step_time": 0.5264484748840332} +{"epoch": 0, "iter": 3470, "iter_tflops": 13.037430023873027, "iter_time": 0.48432406997680666, "loss": 0.009639759548008442, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 14.467081680767661, "step_time": 0.43646267509460446} +{"epoch": 0, "iter": 3471, "iter_tflops": 14.33799885649754, "iter_time": 0.4403920822143555, "loss": 0.007383618038147688, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 15.934387890081878, "step_time": 0.39627133560180666} +{"epoch": 0, "iter": 3472, "iter_tflops": 13.950257317044333, "iter_time": 0.4526325950622559, "loss": 0.00951523520052433, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 15.495565473863246, "step_time": 0.40749343299865726} +{"epoch": 0, "iter": 3473, "iter_tflops": 28.73337770766277, "iter_time": 0.7180183868408203, "loss": 0.23247548937797546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.615835154439207, "step_time": 0.6525557022094727} +{"epoch": 0, "iter": 3474, "iter_tflops": 10.995498081721827, "iter_time": 1.8763218688964844, "loss": 0.16860520839691162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.14844716937496, "step_time": 1.5690897369384764} +{"epoch": 0, "iter": 3475, "iter_tflops": 16.906100193009543, "iter_time": 1.220334274291992, "loss": 0.26166507601737976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.57674767379885, "step_time": 1.1105869483947755} +{"epoch": 0, "iter": 3476, "iter_tflops": 25.10261450167013, "iter_time": 0.8218703079223633, "loss": 0.34831151366233826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.52555588145639, "step_time": 0.6758629913330079} +{"epoch": 0, "iter": 3477, "iter_tflops": 15.363509152001104, "iter_time": 0.9729939270019531, "loss": 0.41623836755752563, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.352954759399715, "step_time": 0.914122329711914} +{"epoch": 0, "iter": 3478, "iter_tflops": 16.74266348711491, "iter_time": 0.8928448638916016, "loss": 0.43912383913993835, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 20.477399108740958, "step_time": 0.7300048713684082} +{"epoch": 0, "iter": 3479, "iter_tflops": 26.780048085830842, "iter_time": 0.5581991882324219, "loss": 0.3785986602306366, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.638797007013782, "step_time": 0.521970287322998} +{"epoch": 0, "iter": 3480, "iter_tflops": 27.117834383321416, "iter_time": 0.5512461242675781, "loss": 0.4052892029285431, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.87358544567163, "step_time": 0.5177258338928222} +{"epoch": 0, "iter": 3481, "iter_tflops": 22.78847337465228, "iter_time": 0.9053302154541016, "loss": 0.3092780113220215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.02639215875043, "step_time": 0.8586846237182618} +{"epoch": 0, "iter": 3482, "iter_tflops": 15.125543755225223, "iter_time": 1.363990203857422, "loss": 0.23468217253684998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.789704217115492, "step_time": 1.0979999084472656} +{"epoch": 0, "iter": 3483, "iter_tflops": 45.257955686920816, "iter_time": 0.4558556213378906, "loss": 0.3228687644004822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.018555091059405, "step_time": 0.4208833465576172} +{"epoch": 0, "iter": 3484, "iter_tflops": 47.73857574812225, "iter_time": 0.4321681823730469, "loss": 0.29475268721580505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70082780301991, "step_time": 0.399047643661499} +{"epoch": 0, "iter": 3485, "iter_tflops": 31.463292678197686, "iter_time": 0.6557194671630859, "loss": 1.0516321659088135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.38306021037236, "step_time": 0.6180108528137207} +{"epoch": 0, "iter": 3486, "iter_tflops": 10.214328952082514, "iter_time": 2.0198187866210935, "loss": 1.0137428045272827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.818715765261022, "step_time": 1.4929819717407227} +{"epoch": 0, "iter": 3487, "iter_tflops": 14.0684781725886, "iter_time": 1.4664765624999998, "loss": 1.134506344795227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.62116702768235, "step_time": 1.170813117980957} +{"epoch": 0, "iter": 3488, "iter_tflops": 18.190864140742498, "iter_time": 1.1341458740234376, "loss": 1.1542407274246216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.496488555616732, "step_time": 0.9597424926757813} +{"epoch": 0, "iter": 3489, "iter_tflops": 14.556422008391575, "iter_time": 1.0409783096313476, "loss": 0.5710678100585938, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.419052095773615, "step_time": 0.982740020751953} +{"epoch": 0, "iter": 3490, "iter_tflops": 7.025945901107048, "iter_time": 2.156708831787109, "loss": 0.547271192073822, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.714613714922644, "step_time": 1.5598067016601562} +{"epoch": 0, "iter": 3491, "iter_tflops": 11.145750999586962, "iter_time": 1.3595243225097655, "loss": 0.5314674973487854, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 13.69670516718229, "step_time": 1.1063185920715333} +{"epoch": 0, "iter": 3492, "iter_tflops": 22.69709489808193, "iter_time": 0.6676149368286133, "loss": 0.4811602532863617, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.794640697128123, "step_time": 0.5451741485595704} +{"epoch": 0, "iter": 3493, "iter_tflops": 17.214700965313703, "iter_time": 0.8304282913208008, "loss": 0.35663461685180664, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 18.116971122477665, "step_time": 0.7890708999633789} +{"epoch": 0, "iter": 3494, "iter_tflops": 7.159061324509889, "iter_time": 1.996850433349609, "loss": 0.44014307856559753, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 8.648538385166214, "step_time": 1.6529468994140626} +{"epoch": 0, "iter": 3495, "iter_tflops": 11.537856142105385, "iter_time": 1.2390148162841796, "loss": 0.3452962636947632, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 12.774861516762645, "step_time": 1.119039505004883} +{"epoch": 0, "iter": 3496, "iter_tflops": 12.806451312805505, "iter_time": 1.1162791595458985, "loss": 0.28238773345947266, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 16.333345409098147, "step_time": 0.8752386207580566} +{"epoch": 0, "iter": 3497, "iter_tflops": 13.323830921231615, "iter_time": 1.1526231536865237, "loss": 0.5439944267272949, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.328107673435529, "step_time": 1.071834213256836} +{"epoch": 0, "iter": 3498, "iter_tflops": 12.49493272428217, "iter_time": 1.229086730957031, "loss": 0.5095589756965637, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 15.852417700428871, "step_time": 0.9687705879211426} +{"epoch": 0, "iter": 3499, "iter_tflops": 27.226186917962952, "iter_time": 0.5640656204223633, "loss": 0.34969159960746765, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.03092209361601, "step_time": 0.5289999389648438} +{"epoch": 0, "iter": 3500, "iter_tflops": 27.853336741716788, "iter_time": 0.5513650360107422, "loss": 0.37171491980552673, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.63863785875161, "step_time": 0.5181532325744629} +{"epoch": 0, "iter": 3501, "iter_tflops": 37.4963748729905, "iter_time": 0.5502156829833984, "loss": 0.823826789855957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52867495985863, "step_time": 0.5090492973327637} +{"epoch": 0, "iter": 3502, "iter_tflops": 31.068145627678735, "iter_time": 0.6640593795776367, "loss": 0.7822495698928833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.97442376188653, "step_time": 0.5161073398590088} +{"epoch": 0, "iter": 3503, "iter_tflops": 44.827869319005586, "iter_time": 0.4602291793823242, "loss": 0.817809522151947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36474126203624, "step_time": 0.4265730152130127} +{"epoch": 0, "iter": 3504, "iter_tflops": 51.56952259420355, "iter_time": 0.4000636901855469, "loss": 0.6708001494407654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.64712480625102, "step_time": 0.3707485980987548} +{"epoch": 0, "iter": 3505, "iter_tflops": 36.82386956042622, "iter_time": 0.5602641372680665, "loss": 0.8234474062919617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.60701496563122, "step_time": 0.5208949356079101} +{"epoch": 0, "iter": 3506, "iter_tflops": 9.338651762929677, "iter_time": 2.2092154235839843, "loss": 0.6155245900154114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.813253249192705, "step_time": 1.7464362335205077} +{"epoch": 0, "iter": 3507, "iter_tflops": 12.09864069530385, "iter_time": 1.7052406158447266, "loss": 0.7835151553153992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.108205661932635, "step_time": 1.3655555114746094} +{"epoch": 0, "iter": 3508, "iter_tflops": 24.946170435533794, "iter_time": 0.8270244750976563, "loss": 0.5929478406906128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.639246481238978, "step_time": 0.746441967010498} +{"epoch": 0, "iter": 3509, "iter_tflops": 18.57795801024316, "iter_time": 0.8839683609008788, "loss": 0.44295769929885864, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 19.939303635046613, "step_time": 0.8236158790588379} +{"epoch": 0, "iter": 3510, "iter_tflops": 22.73950901628018, "iter_time": 0.7221935653686523, "loss": 0.34615233540534973, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 24.97276650968701, "step_time": 0.6576094436645508} +{"epoch": 0, "iter": 3511, "iter_tflops": 24.865016987777395, "iter_time": 0.660459114074707, "loss": 0.6153172254562378, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.686539690713488, "step_time": 0.615378662109375} +{"epoch": 0, "iter": 3512, "iter_tflops": 25.059485155431172, "iter_time": 0.6553337783813475, "loss": 0.3469817638397217, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.96979371983521, "step_time": 0.6089155616760255} +{"epoch": 0, "iter": 3513, "iter_tflops": 27.832497684131354, "iter_time": 0.741259147644043, "loss": 1.2199859619140625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.561230947723146, "step_time": 0.6750740356445313} +{"epoch": 0, "iter": 3514, "iter_tflops": 35.756624396172576, "iter_time": 0.5769866104125976, "loss": 1.1417622566223145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.00552239139491, "step_time": 0.5289275016784668} +{"epoch": 0, "iter": 3515, "iter_tflops": 37.587757203865095, "iter_time": 0.5488780136108399, "loss": 0.847542941570282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69398538931773, "step_time": 0.5069813957214355} +{"epoch": 0, "iter": 3516, "iter_tflops": 36.995898232412564, "iter_time": 0.5576589431762695, "loss": 1.0806515216827393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.054630014055405, "step_time": 0.5150738754272461} +{"epoch": 0, "iter": 3517, "iter_tflops": 11.3332385139043, "iter_time": 1.0391122512817383, "loss": 0.005105728283524513, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 12.05555945075705, "step_time": 0.9768527984619141} +{"epoch": 0, "iter": 3518, "iter_tflops": 15.147241740661965, "iter_time": 0.7774687423706056, "loss": 0.02878156490623951, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 25.3200755037647, "step_time": 0.4651055240631103} +{"epoch": 0, "iter": 3519, "iter_tflops": 33.858662476485314, "iter_time": 0.3478137092590332, "loss": 0.0052184127271175385, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 37.03529129671386, "step_time": 0.317980676651001} +{"epoch": 0, "iter": 3520, "iter_tflops": 36.73086764991216, "iter_time": 0.32061608505249023, "loss": 0.003216178622096777, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 40.44044816936949, "step_time": 0.29120614433288583} +{"epoch": 0, "iter": 3521, "iter_tflops": 30.788450834077764, "iter_time": 0.6700919647216798, "loss": 0.8136081695556641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.65640310903073, "step_time": 0.6317625808715821} +{"epoch": 0, "iter": 3522, "iter_tflops": 14.813655200425078, "iter_time": 1.392707824707031, "loss": 0.6035851836204529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.788761430653928, "step_time": 1.098055004119873} +{"epoch": 0, "iter": 3523, "iter_tflops": 49.98772356846436, "iter_time": 0.4127232055664063, "loss": 0.8058184385299683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.065417336543156, "step_time": 0.3815950107574463} +{"epoch": 0, "iter": 3524, "iter_tflops": 44.090854220223314, "iter_time": 0.46792229080200193, "loss": 0.6628568172454834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.549855694904615, "step_time": 0.43388340950012205} +{"epoch": 0, "iter": 3525, "iter_tflops": 24.748762076797853, "iter_time": 0.8336212310791016, "loss": 0.24122440814971924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.025431461573927, "step_time": 0.7927282028198241} +{"epoch": 0, "iter": 3526, "iter_tflops": 14.211792988295988, "iter_time": 1.4516882934570314, "loss": 0.20526553690433502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.09432848497461, "step_time": 0.9780398330688478} +{"epoch": 0, "iter": 3527, "iter_tflops": 46.922696194145345, "iter_time": 0.43968260955810545, "loss": 0.2719118297100067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.02433095204381, "step_time": 0.4043383445739746} +{"epoch": 0, "iter": 3528, "iter_tflops": 50.74918809698467, "iter_time": 0.40653051376342775, "loss": 0.2635493874549866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69823380456186, "step_time": 0.3771802501678467} +{"epoch": 0, "iter": 3529, "iter_tflops": 31.19341227981423, "iter_time": 0.6613926467895508, "loss": 0.9230687618255615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.04400917865199, "step_time": 0.62435200881958} +{"epoch": 0, "iter": 3530, "iter_tflops": 15.462468316303118, "iter_time": 1.3342690887451172, "loss": 0.8730098605155945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.79002608329422, "step_time": 1.0424995613098145} +{"epoch": 0, "iter": 3531, "iter_tflops": 40.4239120579411, "iter_time": 0.5103685531616211, "loss": 0.7014150619506836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37387888451632, "step_time": 0.42649243736267095} +{"epoch": 0, "iter": 3532, "iter_tflops": 47.85231223237615, "iter_time": 0.4311409950256348, "loss": 0.788374662399292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.582512867314314, "step_time": 0.39996294021606443} +{"epoch": 0, "iter": 3533, "iter_tflops": 28.748181574910888, "iter_time": 0.7176486434936523, "loss": 0.22425565123558044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.471873483581188, "step_time": 0.6770536613464356} +{"epoch": 0, "iter": 3534, "iter_tflops": 13.48756574558846, "iter_time": 1.5296380310058595, "loss": 0.21569259464740753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.502378223016983, "step_time": 1.178759437561035} +{"epoch": 0, "iter": 3535, "iter_tflops": 39.0978194118361, "iter_time": 0.5276788787841797, "loss": 0.2488013654947281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.76578367891166, "step_time": 0.4824205646514893} +{"epoch": 0, "iter": 3536, "iter_tflops": 38.20792255484769, "iter_time": 0.539968994140625, "loss": 0.3310357630252838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05780150287931, "step_time": 0.4905414161682129} +{"epoch": 0, "iter": 3537, "iter_tflops": 32.63954419235841, "iter_time": 0.6320888977050781, "loss": 0.006556331645697355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.82692973680326, "step_time": 0.5758543548583984} +{"epoch": 0, "iter": 3538, "iter_tflops": 12.252706672325516, "iter_time": 1.6837988586425783, "loss": 0.009660647250711918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.843459921435272, "step_time": 1.4903133773803712} +{"epoch": 0, "iter": 3539, "iter_tflops": 13.057682241550276, "iter_time": 1.5799965972900392, "loss": 0.001555316848680377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.135316388364805, "step_time": 1.3631094970703126} +{"epoch": 0, "iter": 3540, "iter_tflops": 22.63260607360389, "iter_time": 0.9115650863647462, "loss": 0.035930123180150986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.236890245515415, "step_time": 0.78633913230896} +{"epoch": 0, "iter": 3541, "iter_tflops": 18.479655149022957, "iter_time": 0.8111328506469726, "loss": 0.36108145117759705, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 19.508134464361376, "step_time": 0.7683694915771484} +{"epoch": 0, "iter": 3542, "iter_tflops": 9.951547067058687, "iter_time": 1.506243728637695, "loss": 0.336017906665802, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.596397044758598, "step_time": 1.1899795875549315} +{"epoch": 0, "iter": 3543, "iter_tflops": 22.417809668060986, "iter_time": 0.668640495300293, "loss": 0.35655951499938965, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.150013957485474, "step_time": 0.620681022644043} +{"epoch": 0, "iter": 3544, "iter_tflops": 23.70532399737176, "iter_time": 0.6323244247436524, "loss": 0.39651763439178467, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 25.4691240266185, "step_time": 0.5885343894958497} +{"epoch": 0, "iter": 3545, "iter_tflops": 16.19944565060821, "iter_time": 1.2735678710937501, "loss": 0.3586723804473877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.351177539474424, "step_time": 1.1890313186645507} +{"epoch": 0, "iter": 3546, "iter_tflops": 19.16173171358551, "iter_time": 1.0766820983886718, "loss": 0.38630959391593933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.498849567330193, "step_time": 0.8779618530273436} +{"epoch": 0, "iter": 3547, "iter_tflops": 35.98475736487636, "iter_time": 0.5733286819458008, "loss": 0.27796027064323425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.22370655368236, "step_time": 0.5259853115081787} +{"epoch": 0, "iter": 3548, "iter_tflops": 37.6094793881146, "iter_time": 0.5485609970092773, "loss": 0.32928386330604553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33176709131679, "step_time": 0.4991582736968994} +{"epoch": 0, "iter": 3549, "iter_tflops": 22.28917759542897, "iter_time": 0.9256103515625, "loss": 0.8977919220924377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.66513012989456, "step_time": 0.8717929458618164} +{"epoch": 0, "iter": 3550, "iter_tflops": 14.434844612526947, "iter_time": 1.429256362915039, "loss": 0.9469906687736511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.580415046080603, "step_time": 1.1103677425384522} +{"epoch": 0, "iter": 3551, "iter_tflops": 42.693701907214134, "iter_time": 0.48323505783081055, "loss": 0.7883197069168091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89091098755866, "step_time": 0.44956818389892583} +{"epoch": 0, "iter": 3552, "iter_tflops": 44.59980266341839, "iter_time": 0.4625826187133789, "loss": 1.0566531419754028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.805883378170066, "step_time": 0.4315597171783448} +{"epoch": 0, "iter": 3553, "iter_tflops": 28.479842573865497, "iter_time": 0.7244103775024414, "loss": 0.9430753588676453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.17183243317349, "step_time": 0.6837865600585937} +{"epoch": 0, "iter": 3554, "iter_tflops": 14.833753109551996, "iter_time": 1.390820877075195, "loss": 0.9028454422950745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.454252442973306, "step_time": 1.1820095748901367} +{"epoch": 0, "iter": 3555, "iter_tflops": 34.130651972957075, "iter_time": 0.6044740524291993, "loss": 1.0449377298355103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.51532764360571, "step_time": 0.5499377136230469} +{"epoch": 0, "iter": 3556, "iter_tflops": 38.54084223789655, "iter_time": 0.5353046874999999, "loss": 0.8314034342765808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.073505554481045, "step_time": 0.490358320236206} +{"epoch": 0, "iter": 3557, "iter_tflops": 16.40550749763971, "iter_time": 1.164164535522461, "loss": 0.009220628999173641, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 17.438425359787075, "step_time": 1.0952084045410155} +{"epoch": 0, "iter": 3558, "iter_tflops": 16.13398887119799, "iter_time": 1.1837562408447266, "loss": 0.006266250275075436, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 19.406710594082345, "step_time": 0.9841291713714598} +{"epoch": 0, "iter": 3559, "iter_tflops": 40.857735814149805, "iter_time": 0.46744416046142584, "loss": 0.00724301440641284, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 45.399672137966675, "step_time": 0.4206794700622559} +{"epoch": 0, "iter": 3560, "iter_tflops": 44.46544047797026, "iter_time": 0.42951806640625, "loss": 0.006906741298735142, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 49.025585944180705, "step_time": 0.38956617546081546} +{"epoch": 0, "iter": 3561, "iter_tflops": 17.279183451613367, "iter_time": 1.1939854431152344, "loss": 0.5436053276062012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.593220151604342, "step_time": 1.1096030349731445} +{"epoch": 0, "iter": 3562, "iter_tflops": 15.082899141048038, "iter_time": 1.3678466796875002, "loss": 0.48022979497909546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.992496736995665, "step_time": 1.0319418220520018} +{"epoch": 0, "iter": 3563, "iter_tflops": 38.67987509596052, "iter_time": 0.5333805618286133, "loss": 0.5934994220733643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.40621312987181, "step_time": 0.4865111026763916} +{"epoch": 0, "iter": 3564, "iter_tflops": 44.38799095535474, "iter_time": 0.464789981842041, "loss": 0.5706530213356018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41301007827741, "step_time": 0.42614771270751955} +{"epoch": 0, "iter": 3565, "iter_tflops": 31.44319454622814, "iter_time": 0.6561385955810547, "loss": 0.04444008693099022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.38322558874284, "step_time": 0.6000336837768555} +{"epoch": 0, "iter": 3566, "iter_tflops": 14.430074115755955, "iter_time": 1.4297288665771484, "loss": 0.038223277777433395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.515921947826758, "step_time": 1.2491639022827148} +{"epoch": 0, "iter": 3567, "iter_tflops": 13.253816904978736, "iter_time": 1.556615249633789, "loss": 0.02770206891000271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.755719301097004, "step_time": 1.309435203552246} +{"epoch": 0, "iter": 3568, "iter_tflops": 20.728231158105665, "iter_time": 0.9953137512207032, "loss": 0.10344891250133514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.103860325741426, "step_time": 0.79034645652771} +{"epoch": 0, "iter": 3569, "iter_tflops": 15.223697664867633, "iter_time": 1.0060932388305663, "loss": 0.4695926904678345, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 16.271249777249846, "step_time": 0.9413203964233399} +{"epoch": 0, "iter": 3570, "iter_tflops": 11.283637162589642, "iter_time": 1.3574044494628907, "loss": 0.42988041043281555, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 13.48716933986272, "step_time": 1.1356318664550782} +{"epoch": 0, "iter": 3571, "iter_tflops": 26.204415538746048, "iter_time": 0.5844991760253906, "loss": 0.4019514322280884, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 27.866113995122607, "step_time": 0.5496446075439453} +{"epoch": 0, "iter": 3572, "iter_tflops": 27.480241802335954, "iter_time": 0.5573626098632812, "loss": 0.29139456152915955, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.171755106313753, "step_time": 0.5250441474914551} +{"epoch": 0, "iter": 3573, "iter_tflops": 22.430603904932298, "iter_time": 0.9197743225097657, "loss": 0.09855630248785019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.473660041362567, "step_time": 0.8789039916992187} +{"epoch": 0, "iter": 3574, "iter_tflops": 14.342776227259046, "iter_time": 1.4384309692382813, "loss": 0.20009320974349976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.169492212695182, "step_time": 1.201613492965698} +{"epoch": 0, "iter": 3575, "iter_tflops": 46.86212679533186, "iter_time": 0.4402509002685546, "loss": 0.11871474981307983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.234897962430004, "step_time": 0.40267658042907717} +{"epoch": 0, "iter": 3576, "iter_tflops": 47.020083440650495, "iter_time": 0.43877194595336916, "loss": 0.1628626435995102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.94818376495921, "step_time": 0.40494266891479497} +{"epoch": 0, "iter": 3577, "iter_tflops": 21.108473675590442, "iter_time": 0.9773844299316407, "loss": 0.9680812954902649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.10836066945776, "step_time": 0.9331806106567383} +{"epoch": 0, "iter": 3578, "iter_tflops": 14.852934579033246, "iter_time": 1.3890247344970703, "loss": 0.9741709232330322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54766799030711, "step_time": 1.0040600967407227} +{"epoch": 0, "iter": 3579, "iter_tflops": 40.6652074903773, "iter_time": 0.5073401756286622, "loss": 0.8437231779098511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.142535629789, "step_time": 0.467374454498291} +{"epoch": 0, "iter": 3580, "iter_tflops": 41.2233372832129, "iter_time": 0.5004712104797364, "loss": 0.6959021687507629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6260272021937, "step_time": 0.4623107814788819} +{"epoch": 0, "iter": 3581, "iter_tflops": 23.014358785731176, "iter_time": 0.8964444198608399, "loss": 0.318846732378006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.680286978218493, "step_time": 0.8359341011047364} +{"epoch": 0, "iter": 3582, "iter_tflops": 20.69175651426186, "iter_time": 0.9970682525634766, "loss": 0.3318330645561218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.214959612313987, "step_time": 0.8182084693908691} +{"epoch": 0, "iter": 3583, "iter_tflops": 48.41842047989733, "iter_time": 0.42610009384155273, "loss": 0.42848557233810425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60124878349186, "step_time": 0.3922168006896973} +{"epoch": 0, "iter": 3584, "iter_tflops": 45.82207150261378, "iter_time": 0.450243579864502, "loss": 0.395220547914505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.74061146904093, "step_time": 0.41477362060546874} +{"epoch": 0, "iter": 3585, "iter_tflops": 32.86997598179124, "iter_time": 0.627657699584961, "loss": 0.2686152160167694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.00686158382792, "step_time": 0.5893442764282226} +{"epoch": 0, "iter": 3586, "iter_tflops": 16.610591530994714, "iter_time": 1.2420444793701173, "loss": 0.26745858788490295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.865461261166235, "step_time": 1.0385408744812012} +{"epoch": 0, "iter": 3587, "iter_tflops": 36.98413410242135, "iter_time": 0.5578363265991211, "loss": 0.3799022436141968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78457771568157, "step_time": 0.5058552684783936} +{"epoch": 0, "iter": 3588, "iter_tflops": 42.5186649532985, "iter_time": 0.48522439575195314, "loss": 0.329938679933548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.40695880526796, "step_time": 0.44456896209716795} +{"epoch": 0, "iter": 3589, "iter_tflops": 18.118257658702877, "iter_time": 1.1386908111572265, "loss": 1.0101128816604614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.678730026190284, "step_time": 1.0483955764770507} +{"epoch": 0, "iter": 3590, "iter_tflops": 21.870473338004818, "iter_time": 0.9433309097290039, "loss": 0.9151425361633301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.016736245374254, "step_time": 0.7110066871643066} +{"epoch": 0, "iter": 3591, "iter_tflops": 46.80141062253171, "iter_time": 0.44082204437255856, "loss": 0.9835854172706604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48012741753644, "step_time": 0.4086973342895508} +{"epoch": 0, "iter": 3592, "iter_tflops": 45.18620668651773, "iter_time": 0.4565794525146484, "loss": 0.9405184984207153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62216006170005, "step_time": 0.4243146228790283} +{"epoch": 0, "iter": 3593, "iter_tflops": 28.61194217313079, "iter_time": 0.7210658187866211, "loss": 0.16655951738357544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.343750885934146, "step_time": 0.6799124336242676} +{"epoch": 0, "iter": 3594, "iter_tflops": 20.109910187983203, "iter_time": 1.0259167404174805, "loss": 0.1680310219526291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.8785094879515, "step_time": 0.9017673778533934} +{"epoch": 0, "iter": 3595, "iter_tflops": 36.06134833659116, "iter_time": 0.5721109848022461, "loss": 0.1667957901954651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.808649089064886, "step_time": 0.5182565593719483} +{"epoch": 0, "iter": 3596, "iter_tflops": 37.39060641531775, "iter_time": 0.5517721023559571, "loss": 0.1287730485200882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80138679398923, "step_time": 0.5056468696594238} +{"epoch": 0, "iter": 3597, "iter_tflops": 22.260353826506808, "iter_time": 0.9268088760375975, "loss": 0.8463250398635864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.030160885785374, "step_time": 0.8585499534606934} +{"epoch": 0, "iter": 3598, "iter_tflops": 13.794885118824716, "iter_time": 1.495561096191406, "loss": 0.9177514910697937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.394183001237721, "step_time": 1.3401876220703126} +{"epoch": 0, "iter": 3599, "iter_tflops": 44.316895530632884, "iter_time": 0.4655356216430664, "loss": 0.9535275101661682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.80260106569407, "step_time": 0.4315893497467041} +{"epoch": 0, "iter": 3600, "iter_tflops": 44.907587278878744, "iter_time": 0.4594122009277344, "loss": 0.8609421253204346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0220846105087, "step_time": 0.4296167831420899} +{"epoch": 0, "iter": 3601, "iter_tflops": 3.0214168345361925, "iter_time": 0.6227604064941407, "loss": 0.05906510353088379, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 3.2396356005535045, "step_time": 0.5808118591308593} +{"epoch": 0, "iter": 3602, "iter_tflops": 3.3830216598162015, "iter_time": 0.5561947174072266, "loss": 0.1316542774438858, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 4.391259508242313, "step_time": 0.4284918193817139} +{"epoch": 0, "iter": 3603, "iter_tflops": 4.086531680965867, "iter_time": 0.4604439468383789, "loss": 0.06272540241479874, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 4.403318415834187, "step_time": 0.4273183536529541} +{"epoch": 0, "iter": 3604, "iter_tflops": 4.416925984485703, "iter_time": 0.42600188064575195, "loss": 0.05035385116934776, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 4.758607374155638, "step_time": 0.3954137477874756} +{"epoch": 0, "iter": 3605, "iter_tflops": 24.34097494485455, "iter_time": 0.8475869827270507, "loss": 0.9244284629821777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.656934228860823, "step_time": 0.8041137466430663} +{"epoch": 0, "iter": 3606, "iter_tflops": 17.04905711045752, "iter_time": 1.2101017303466797, "loss": 0.7952520847320557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.548868171739077, "step_time": 1.0040014533996582} +{"epoch": 0, "iter": 3607, "iter_tflops": 35.90796272979625, "iter_time": 0.5745548324584961, "loss": 0.7888237237930298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.09807817599714, "step_time": 0.5276753864288329} +{"epoch": 0, "iter": 3608, "iter_tflops": 42.68943556427343, "iter_time": 0.4832833518981934, "loss": 1.015295147895813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35575415315269, "step_time": 0.4450600337982178} +{"epoch": 0, "iter": 3609, "iter_tflops": 19.729938734121124, "iter_time": 1.0456744842529297, "loss": 0.6605265140533447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.032460122603144, "step_time": 0.9809168014526367} +{"epoch": 0, "iter": 3610, "iter_tflops": 17.060596670971325, "iter_time": 1.2092832336425783, "loss": 1.0305709838867188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.10118254183467, "step_time": 1.0263621788024901} +{"epoch": 0, "iter": 3611, "iter_tflops": 33.521945464849765, "iter_time": 0.6154503631591798, "loss": 0.9180850982666016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.490118499315294, "step_time": 0.5653885040283203} +{"epoch": 0, "iter": 3612, "iter_tflops": 36.80474821662395, "iter_time": 0.5605552139282226, "loss": 0.9271423816680908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12003924295311, "step_time": 0.514234130859375} +{"epoch": 0, "iter": 3613, "iter_tflops": 19.75864946275626, "iter_time": 1.044155044555664, "loss": 0.48620495200157166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.975574171573133, "step_time": 0.9835770568847657} +{"epoch": 0, "iter": 3614, "iter_tflops": 13.696613861695512, "iter_time": 1.5062915344238281, "loss": 0.33542317152023315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.134707820789316, "step_time": 1.1376578941345215} +{"epoch": 0, "iter": 3615, "iter_tflops": 46.78332744833258, "iter_time": 0.44099243545532224, "loss": 0.4572927951812744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.754246398599655, "step_time": 0.4064899978637696} +{"epoch": 0, "iter": 3616, "iter_tflops": 51.71895649131487, "iter_time": 0.39890776824951163, "loss": 0.3861294686794281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.048526855363235, "step_time": 0.36809341239929194} +{"epoch": 0, "iter": 3617, "iter_tflops": 33.94735296187328, "iter_time": 0.6077379150390625, "loss": 0.25348955392837524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.19710825316844, "step_time": 0.5699652404785156} +{"epoch": 0, "iter": 3618, "iter_tflops": 19.409847000534885, "iter_time": 1.0629189147949218, "loss": 0.20058467984199524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.751826708874496, "step_time": 0.9484763641357422} +{"epoch": 0, "iter": 3619, "iter_tflops": 49.942143530110336, "iter_time": 0.4130998802185059, "loss": 0.2141752988100052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17812879777869, "step_time": 0.3808011455535889} +{"epoch": 0, "iter": 3620, "iter_tflops": 50.90432827640738, "iter_time": 0.4052915382385254, "loss": 0.1536163091659546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.87634382329474, "step_time": 0.3759560508728027} +{"epoch": 0, "iter": 3621, "iter_tflops": 44.518823241982695, "iter_time": 0.4634240531921387, "loss": 0.23796270787715912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56064370254609, "step_time": 0.42485214233398433} +{"epoch": 0, "iter": 3622, "iter_tflops": 33.615652465598785, "iter_time": 0.613734733581543, "loss": 0.27442920207977295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.64437443266863, "step_time": 0.5480524997711181} +{"epoch": 0, "iter": 3623, "iter_tflops": 39.538837069350365, "iter_time": 0.5217931289672851, "loss": 0.20197732746601105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.265635284395714, "step_time": 0.4768471183776855} +{"epoch": 0, "iter": 3624, "iter_tflops": 47.44548162192188, "iter_time": 0.43483789825439445, "loss": 0.2977655827999115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.84948797774782, "step_time": 0.39790351486206055} +{"epoch": 0, "iter": 3625, "iter_tflops": 20.73940640186061, "iter_time": 0.9947774353027344, "loss": 0.8643495440483093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.338848009690093, "step_time": 0.9235522575378418} +{"epoch": 0, "iter": 3626, "iter_tflops": 20.222501834961673, "iter_time": 1.0202048034667968, "loss": 0.8966971635818481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.331525514953583, "step_time": 0.8479161529541016} +{"epoch": 0, "iter": 3627, "iter_tflops": 44.9691292498837, "iter_time": 0.4587834777832031, "loss": 0.7657058835029602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40299516341398, "step_time": 0.4262358856201172} +{"epoch": 0, "iter": 3628, "iter_tflops": 47.68542530534287, "iter_time": 0.43264987945556643, "loss": 0.9588671326637268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32311858664797, "step_time": 0.40198440933227536} +{"epoch": 0, "iter": 3629, "iter_tflops": 35.44647635396179, "iter_time": 0.5820351028442382, "loss": 1.0298404693603516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.12589255120731, "step_time": 0.5411307678222657} +{"epoch": 0, "iter": 3630, "iter_tflops": 45.34635807376882, "iter_time": 0.45496693420410156, "loss": 1.1951724290847778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23989801095588, "step_time": 0.4189913940429687} +{"epoch": 0, "iter": 3631, "iter_tflops": 45.99934971149639, "iter_time": 0.4485083732604981, "loss": 1.112847924232483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.743573746312144, "step_time": 0.41474892044067385} +{"epoch": 0, "iter": 3632, "iter_tflops": 43.94494490468432, "iter_time": 0.4694759216308594, "loss": 0.8527814149856567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17928158923184, "step_time": 0.43729138755798336} +{"epoch": 0, "iter": 3633, "iter_tflops": 16.60018206626802, "iter_time": 1.0338129577636719, "loss": 0.32615935802459717, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 17.42203142828938, "step_time": 0.9850449066162109} +{"epoch": 0, "iter": 3634, "iter_tflops": 12.486854947279198, "iter_time": 1.3743639526367186, "loss": 0.3108125925064087, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 15.978905348844217, "step_time": 1.0740086975097656} +{"epoch": 0, "iter": 3635, "iter_tflops": 34.65842770948603, "iter_time": 0.4951604690551758, "loss": 0.36822572350502014, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 37.91227063025663, "step_time": 0.4526630306243896} +{"epoch": 0, "iter": 3636, "iter_tflops": 36.26957018061721, "iter_time": 0.47316478347778324, "loss": 0.2595212459564209, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 39.693241582994546, "step_time": 0.43235277938842764} +{"epoch": 0, "iter": 3637, "iter_tflops": 30.67434530576995, "iter_time": 0.6725846405029297, "loss": 0.947487473487854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.78665059148183, "step_time": 0.6106285514831543} +{"epoch": 0, "iter": 3638, "iter_tflops": 34.66790555141869, "iter_time": 0.595106430053711, "loss": 0.9218699932098389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.939203210568124, "step_time": 0.5437935371398925} +{"epoch": 0, "iter": 3639, "iter_tflops": 35.807460554571726, "iter_time": 0.5761674575805664, "loss": 0.7935268878936768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.77422577188598, "step_time": 0.5320826683044434} +{"epoch": 0, "iter": 3640, "iter_tflops": 38.968924446032766, "iter_time": 0.5294242477416992, "loss": 0.6657978296279907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00070953676224, "step_time": 0.49120821380615237} +{"epoch": 0, "iter": 3641, "iter_tflops": 24.223580276584048, "iter_time": 0.8516946411132812, "loss": 0.7890952825546265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.38523052948748, "step_time": 0.7819182586669922} +{"epoch": 0, "iter": 3642, "iter_tflops": 38.15756845119822, "iter_time": 0.5406815567016601, "loss": 0.8229597210884094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.525904967041335, "step_time": 0.4968246574401855} +{"epoch": 0, "iter": 3643, "iter_tflops": 40.90917311621431, "iter_time": 0.5043146057128907, "loss": 0.8000662326812744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35651066646229, "step_time": 0.4651198482513428} +{"epoch": 0, "iter": 3644, "iter_tflops": 34.386375327585654, "iter_time": 0.5999787216186524, "loss": 0.7836364507675171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.53071828988265, "step_time": 0.549712194442749} +{"epoch": 0, "iter": 3645, "iter_tflops": 19.04101880992698, "iter_time": 1.0835078582763673, "loss": 0.014586400240659714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.08025443754892, "step_time": 1.0274318771362305} +{"epoch": 0, "iter": 3646, "iter_tflops": 15.629310782649465, "iter_time": 1.3200258026123046, "loss": 0.019110985100269318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.50941240495125, "step_time": 1.0574943561553956} +{"epoch": 0, "iter": 3647, "iter_tflops": 57.44904556076326, "iter_time": 0.35911986541748053, "loss": 0.0037533268332481384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.45289396116293, "step_time": 0.3251403083801269} +{"epoch": 0, "iter": 3648, "iter_tflops": 57.6359174263372, "iter_time": 0.3579554977416992, "loss": 0.004396090749651194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.688945813335465, "step_time": 0.32910257530212406} +{"epoch": 0, "iter": 3649, "iter_tflops": 24.308842044090717, "iter_time": 0.8487073745727539, "loss": 0.7264459729194641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.4474099676285, "step_time": 0.8107345123291015} +{"epoch": 0, "iter": 3650, "iter_tflops": 15.884959232455591, "iter_time": 1.2987816467285156, "loss": 0.5852194428443909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.665072612269867, "step_time": 1.10533154296875} +{"epoch": 0, "iter": 3651, "iter_tflops": 37.38290776199046, "iter_time": 0.5518857345581055, "loss": 0.5529316663742065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.22971883689442, "step_time": 0.5003937473297119} +{"epoch": 0, "iter": 3652, "iter_tflops": 37.87370835070177, "iter_time": 0.5447339172363281, "loss": 0.5510192513465881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.457016844299005, "step_time": 0.49765021896362305} +{"epoch": 0, "iter": 3653, "iter_tflops": 20.309478831086214, "iter_time": 1.0158356933593748, "loss": 0.6945524215698242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.617412185488572, "step_time": 0.9543738784790039} +{"epoch": 0, "iter": 3654, "iter_tflops": 19.093581248571212, "iter_time": 1.0805250854492188, "loss": 0.7489485144615173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.76132337108752, "step_time": 0.8008553447723389} +{"epoch": 0, "iter": 3655, "iter_tflops": 48.9248851605762, "iter_time": 0.4216891555786133, "loss": 0.8039229512214661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76269653851746, "step_time": 0.3910166625976563} +{"epoch": 0, "iter": 3656, "iter_tflops": 43.81772249884022, "iter_time": 0.4708390197753906, "loss": 0.9260936975479126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12495605655918, "step_time": 0.4377954959869385} +{"epoch": 0, "iter": 3657, "iter_tflops": 35.950232915143864, "iter_time": 0.5738792724609375, "loss": 0.6087422370910645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75879392179091, "step_time": 0.5322945175170899} +{"epoch": 0, "iter": 3658, "iter_tflops": 41.16605145088489, "iter_time": 0.5011676559448242, "loss": 0.7159999012947083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29240570127079, "step_time": 0.4657930221557617} +{"epoch": 0, "iter": 3659, "iter_tflops": 42.3074491146719, "iter_time": 0.4876468315124512, "loss": 0.8027909994125366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.232320844694726, "step_time": 0.45611397171020507} +{"epoch": 0, "iter": 3660, "iter_tflops": 46.35230027166863, "iter_time": 0.4450931968688965, "loss": 1.0666019916534424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.79895727603048, "step_time": 0.4142876605987549} +{"epoch": 0, "iter": 3661, "iter_tflops": 28.92874116547439, "iter_time": 0.713169418334961, "loss": 0.07322381436824799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.66930118984797, "step_time": 0.6726952590942382} +{"epoch": 0, "iter": 3662, "iter_tflops": 15.906028560219351, "iter_time": 1.297061264038086, "loss": 0.06824463605880737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.36382288291531, "step_time": 1.06544527053833} +{"epoch": 0, "iter": 3663, "iter_tflops": 41.95474536389988, "iter_time": 0.49174636459350585, "loss": 0.10369705408811569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20861905775203, "step_time": 0.4464771709442138} +{"epoch": 0, "iter": 3664, "iter_tflops": 34.921262901420604, "iter_time": 0.5907888717651368, "loss": 0.10516133904457092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.37504543108985, "step_time": 0.5376174354553223} +{"epoch": 0, "iter": 3665, "iter_tflops": 25.2246538104188, "iter_time": 0.8178940200805663, "loss": 0.9235820174217224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.27807181054134, "step_time": 0.7563252143859863} +{"epoch": 0, "iter": 3666, "iter_tflops": 13.944582410160212, "iter_time": 1.4795060119628907, "loss": 0.8391478657722473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.996114175912457, "step_time": 1.1464193496704103} +{"epoch": 0, "iter": 3667, "iter_tflops": 40.9231148232809, "iter_time": 0.5041427955627442, "loss": 0.7237337231636047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.947310990507525, "step_time": 0.46945064544677734} +{"epoch": 0, "iter": 3668, "iter_tflops": 41.31555441573411, "iter_time": 0.4993541488647461, "loss": 0.7357843518257141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.198160213449476, "step_time": 0.4667862510681152} +{"epoch": 0, "iter": 3669, "iter_tflops": 23.659519186303054, "iter_time": 0.8719996948242187, "loss": 0.6853428483009338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.718307932221766, "step_time": 0.8346482925415039} +{"epoch": 0, "iter": 3670, "iter_tflops": 11.773413031882663, "iter_time": 1.7523460235595703, "loss": 0.7811911106109619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.348237929165776, "step_time": 1.2619765872955324} +{"epoch": 0, "iter": 3671, "iter_tflops": 46.04105200118647, "iter_time": 0.44810213088989254, "loss": 0.6802738308906555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.831258135929595, "step_time": 0.4140191173553467} +{"epoch": 0, "iter": 3672, "iter_tflops": 45.62086699768077, "iter_time": 0.45222931671142574, "loss": 0.6155421733856201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30455374242168, "step_time": 0.4184419479370117} +{"epoch": 0, "iter": 3673, "iter_tflops": 25.97163574526547, "iter_time": 0.7943702011108399, "loss": 0.11372725665569305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.439829361082143, "step_time": 0.751866683959961} +{"epoch": 0, "iter": 3674, "iter_tflops": 15.197195466870664, "iter_time": 1.3575592651367185, "loss": 0.06404422968626022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.124116566886723, "step_time": 1.025192506790161} +{"epoch": 0, "iter": 3675, "iter_tflops": 39.95004894940559, "iter_time": 0.5164222335815429, "loss": 0.14762192964553833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.06366032629345, "step_time": 0.4682110691070556} +{"epoch": 0, "iter": 3676, "iter_tflops": 42.87468463626124, "iter_time": 0.4811952247619629, "loss": 0.08513599634170532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92032941971105, "step_time": 0.43970478820800785} +{"epoch": 0, "iter": 3677, "iter_tflops": 16.355035768139917, "iter_time": 1.2614520568847656, "loss": 0.2283773422241211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.113199975420176, "step_time": 1.2055660858154296} +{"epoch": 0, "iter": 3678, "iter_tflops": 15.507512510551374, "iter_time": 1.3303934783935547, "loss": 0.20266883075237274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.624501681290074, "step_time": 0.9540609912872314} +{"epoch": 0, "iter": 3679, "iter_tflops": 48.80024736089611, "iter_time": 0.4227661666870117, "loss": 0.11977525800466537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.963049498485276, "step_time": 0.3895374927520752} +{"epoch": 0, "iter": 3680, "iter_tflops": 53.42485507677373, "iter_time": 0.38617032241821286, "loss": 0.224283367395401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.91433223297232, "step_time": 0.3562346782684326} +{"epoch": 0, "iter": 3681, "iter_tflops": 45.0663363467221, "iter_time": 0.4577938919067383, "loss": 0.3022834360599518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32620376017351, "step_time": 0.4182582874298096} +{"epoch": 0, "iter": 3682, "iter_tflops": 47.78778619860327, "iter_time": 0.4317231483459472, "loss": 0.4227639138698578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.215711484797794, "step_time": 0.3951127529144287} +{"epoch": 0, "iter": 3683, "iter_tflops": 50.01578000624986, "iter_time": 0.41249168777465817, "loss": 0.4167385995388031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.36964361757788, "step_time": 0.379459789276123} +{"epoch": 0, "iter": 3684, "iter_tflops": 52.36535095040479, "iter_time": 0.39398367691040037, "loss": 0.3514450192451477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.59461084562953, "step_time": 0.36454166221618656} +{"epoch": 0, "iter": 3685, "iter_tflops": 31.788020209242188, "iter_time": 0.6490210266113281, "loss": 0.9283173084259033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.65533909111107, "step_time": 0.6130110130310059} +{"epoch": 0, "iter": 3686, "iter_tflops": 14.087793900093118, "iter_time": 1.4644658813476565, "loss": 0.9818248748779297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.77253902510817, "step_time": 1.2300519008636472} +{"epoch": 0, "iter": 3687, "iter_tflops": 39.360086343765545, "iter_time": 0.5241628112792969, "loss": 0.8792634010314941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.0503093671211, "step_time": 0.479232177734375} +{"epoch": 0, "iter": 3688, "iter_tflops": 38.61218872087061, "iter_time": 0.5343155670166015, "loss": 0.8398703932762146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.691504196919, "step_time": 0.49485126304626464} +{"epoch": 0, "iter": 3689, "iter_tflops": 21.183944986028976, "iter_time": 0.9739023361206055, "loss": 0.180577352643013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.819343400801063, "step_time": 0.904105484008789} +{"epoch": 0, "iter": 3690, "iter_tflops": 18.481225779578303, "iter_time": 1.116327117919922, "loss": 0.10907798260450363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.045441618116406, "step_time": 0.9803117408752441} +{"epoch": 0, "iter": 3691, "iter_tflops": 44.980784419370956, "iter_time": 0.4586646003723145, "loss": 0.14183646440505981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.90837896214318, "step_time": 0.4218314723968506} +{"epoch": 0, "iter": 3692, "iter_tflops": 48.303773618243184, "iter_time": 0.4271114234924316, "loss": 0.10284514725208282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51304603534103, "step_time": 0.392875581741333} +{"epoch": 0, "iter": 3693, "iter_tflops": 22.596665696322695, "iter_time": 0.9130149459838867, "loss": 0.8812592029571533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.648322235670694, "step_time": 0.872412567138672} +{"epoch": 0, "iter": 3694, "iter_tflops": 11.457695765496, "iter_time": 1.8006319885253905, "loss": 0.9655053615570068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.333985994471025, "step_time": 1.3454488296508789} +{"epoch": 0, "iter": 3695, "iter_tflops": 46.53816133423405, "iter_time": 0.4433156127929687, "loss": 0.915704071521759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32611701172355, "step_time": 0.4099480495452881} +{"epoch": 0, "iter": 3696, "iter_tflops": 45.20486768947425, "iter_time": 0.45639097213745117, "loss": 0.8802410364151001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39203848901909, "step_time": 0.4263323917388916} +{"epoch": 0, "iter": 3697, "iter_tflops": 22.95051427532504, "iter_time": 0.8989381790161134, "loss": 0.878081202507019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.551152138761463, "step_time": 0.8403309707641602} +{"epoch": 0, "iter": 3698, "iter_tflops": 16.0978535888838, "iter_time": 1.281605239868164, "loss": 0.824092447757721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.625759303205047, "step_time": 1.051225238800049} +{"epoch": 0, "iter": 3699, "iter_tflops": 43.171019509954114, "iter_time": 0.4778922004699707, "loss": 0.8271738290786743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44177167763876, "step_time": 0.4442357120513916} +{"epoch": 0, "iter": 3700, "iter_tflops": 42.20090604525269, "iter_time": 0.4888779754638672, "loss": 0.7348243594169617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.34849913303496, "step_time": 0.45494545364379885} +{"epoch": 0, "iter": 3701, "iter_tflops": 27.859698350144264, "iter_time": 0.7405354232788086, "loss": 0.6918330192565918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.500878985818378, "step_time": 0.699338264465332} +{"epoch": 0, "iter": 3702, "iter_tflops": 14.844794259556268, "iter_time": 1.3897864227294923, "loss": 0.7431172728538513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.93220017847789, "step_time": 1.1505054206848144} +{"epoch": 0, "iter": 3703, "iter_tflops": 36.57556196238713, "iter_time": 0.5640677108764649, "loss": 1.0033495426177979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85526267824965, "step_time": 0.5176504211425781} +{"epoch": 0, "iter": 3704, "iter_tflops": 37.56585865813257, "iter_time": 0.5491979751586915, "loss": 0.9664227962493896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84247598661635, "step_time": 0.505138168334961} +{"epoch": 0, "iter": 3705, "iter_tflops": 32.90188308986857, "iter_time": 0.6270490188598632, "loss": 0.2887706458568573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.49233320440441, "step_time": 0.565354190826416} +{"epoch": 0, "iter": 3706, "iter_tflops": 41.18049053459145, "iter_time": 0.5009919319152834, "loss": 0.3445206880569458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19411389168563, "step_time": 0.456499568939209} +{"epoch": 0, "iter": 3707, "iter_tflops": 37.52498045057135, "iter_time": 0.5497962493896484, "loss": 0.3146088719367981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.032589440309906, "step_time": 0.5027977466583252} +{"epoch": 0, "iter": 3708, "iter_tflops": 37.35832422494309, "iter_time": 0.5522489013671875, "loss": 0.39924782514572144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.077183779743635, "step_time": 0.5022518978118896} +{"epoch": 0, "iter": 3709, "iter_tflops": 37.820829169404114, "iter_time": 0.5454955368041992, "loss": 0.038672320544719696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32560509295836, "step_time": 0.487437650680542} +{"epoch": 0, "iter": 3710, "iter_tflops": 43.881378208566005, "iter_time": 0.470156005859375, "loss": 0.04918864369392395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55080341094974, "step_time": 0.42493825149536135} +{"epoch": 0, "iter": 3711, "iter_tflops": 45.65870100330873, "iter_time": 0.45185458755493163, "loss": 0.0424254909157753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52792205616536, "step_time": 0.4083107452392578} +{"epoch": 0, "iter": 3712, "iter_tflops": 45.07799725479764, "iter_time": 0.4576754684448242, "loss": 0.06024046987295151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67840929444651, "step_time": 0.4152929573059082} +{"epoch": 0, "iter": 3713, "iter_tflops": 19.9443694534386, "iter_time": 1.0344319763183594, "loss": 0.45823901891708374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.337834725423793, "step_time": 0.9668784942626953} +{"epoch": 0, "iter": 3714, "iter_tflops": 15.301199557010976, "iter_time": 1.3483317718505858, "loss": 0.33077144622802734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.2419503464386, "step_time": 1.019224588394165} +{"epoch": 0, "iter": 3715, "iter_tflops": 48.5548065286042, "iter_time": 0.4249032173156738, "loss": 0.29140377044677734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87369564992573, "step_time": 0.3901957912445068} +{"epoch": 0, "iter": 3716, "iter_tflops": 51.12096437823844, "iter_time": 0.40357402801513675, "loss": 0.24359232187271118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.269839894399006, "step_time": 0.3732794151306153} +{"epoch": 0, "iter": 3717, "iter_tflops": 27.83666609358063, "iter_time": 0.741148147583008, "loss": 0.19338272511959076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.489456541008998, "step_time": 0.6996091461181639} +{"epoch": 0, "iter": 3718, "iter_tflops": 14.654547492078802, "iter_time": 1.4078287658691406, "loss": 0.16917438805103302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.084511274500354, "step_time": 1.2075904998779297} +{"epoch": 0, "iter": 3719, "iter_tflops": 45.02419573109751, "iter_time": 0.45822236633300784, "loss": 0.13479237258434296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.99830536027265, "step_time": 0.36195977020263675} +{"epoch": 0, "iter": 3720, "iter_tflops": 48.62954401717759, "iter_time": 0.42425019454956053, "loss": 0.1518448293209076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21016811161746, "step_time": 0.3877284030914307} +{"epoch": 0, "iter": 3721, "iter_tflops": 33.32261406138441, "iter_time": 0.6191319046020508, "loss": 0.7056963443756104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.61063765548363, "step_time": 0.5793519821166992} +{"epoch": 0, "iter": 3722, "iter_tflops": 27.818584429074296, "iter_time": 0.7416298828125001, "loss": 0.8149347305297852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.54820261187161, "step_time": 0.5216695613861084} +{"epoch": 0, "iter": 3723, "iter_tflops": 46.65059561519788, "iter_time": 0.4422471618652344, "loss": 0.7192169427871704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.694130235318795, "step_time": 0.406972038269043} +{"epoch": 0, "iter": 3724, "iter_tflops": 48.32195401230764, "iter_time": 0.4269507293701172, "loss": 0.636096179485321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14665054203774, "step_time": 0.39563602447509766} +{"epoch": 0, "iter": 3725, "iter_tflops": 44.92892692536333, "iter_time": 0.4591939964294433, "loss": 0.11563856899738312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17176309664892, "step_time": 0.4195719699859619} +{"epoch": 0, "iter": 3726, "iter_tflops": 51.33250165060842, "iter_time": 0.4019109306335449, "loss": 0.09568068385124207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.2835147425556, "step_time": 0.36655659484863284} +{"epoch": 0, "iter": 3727, "iter_tflops": 55.13231020909415, "iter_time": 0.37421057510375977, "loss": 0.06776385754346848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.118586592073044, "step_time": 0.3431732959747315} +{"epoch": 0, "iter": 3728, "iter_tflops": 55.853479403262405, "iter_time": 0.36937884140014643, "loss": 0.045837271958589554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.604915720751684, "step_time": 0.3404194736480713} +{"epoch": 0, "iter": 3729, "iter_tflops": 41.050182520831754, "iter_time": 0.5025822601318359, "loss": 0.576033890247345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62260388491057, "step_time": 0.4623462486267089} +{"epoch": 0, "iter": 3730, "iter_tflops": 44.10434048192883, "iter_time": 0.46777920913696297, "loss": 0.5343247652053833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.2137226800672, "step_time": 0.427909158706665} +{"epoch": 0, "iter": 3731, "iter_tflops": 40.76282346214138, "iter_time": 0.5061252326965332, "loss": 0.5175106525421143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.431518329965876, "step_time": 0.4750258407592774} +{"epoch": 0, "iter": 3732, "iter_tflops": 52.39441277692975, "iter_time": 0.3937651443481445, "loss": 0.7348311543464661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.60057756644986, "step_time": 0.3645032329559327} +{"epoch": 0, "iter": 3733, "iter_tflops": 49.772184390402316, "iter_time": 0.4145105094909668, "loss": 0.0108897490426898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31174271308704, "step_time": 0.37299662780761716} +{"epoch": 0, "iter": 3734, "iter_tflops": 52.59573534421371, "iter_time": 0.3922579154968261, "loss": 0.0026243869215250015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.00639045461316, "step_time": 0.3556693210601807} +{"epoch": 0, "iter": 3735, "iter_tflops": 57.591371763257115, "iter_time": 0.3582323684692383, "loss": 0.007915795780718327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.86726605819953, "step_time": 0.32816909027099606} +{"epoch": 0, "iter": 3736, "iter_tflops": 59.447784008044486, "iter_time": 0.34704562759399415, "loss": 0.0031132709700614214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.93818413502198, "step_time": 0.3177035789489746} +{"epoch": 0, "iter": 3737, "iter_tflops": 35.23557467467636, "iter_time": 0.5855188598632812, "loss": 0.5746685862541199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62397540857613, "step_time": 0.5483496437072753} +{"epoch": 0, "iter": 3738, "iter_tflops": 8.98130935980452, "iter_time": 2.2971142272949217, "loss": 0.5378761887550354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.817283098926199, "step_time": 1.9072343139648438} +{"epoch": 0, "iter": 3739, "iter_tflops": 14.765802820487222, "iter_time": 1.3972212524414065, "loss": 0.42325788736343384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.114991839085395, "step_time": 1.2802422561645508} +{"epoch": 0, "iter": 3740, "iter_tflops": 20.843711709917418, "iter_time": 0.9897994079589844, "loss": 0.4325842261314392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.54349608996416, "step_time": 0.8076847991943359} +{"epoch": 0, "iter": 3741, "iter_tflops": 11.349053791591945, "iter_time": 1.4072867584228517, "loss": 0.2918277978897095, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.009404360259577, "step_time": 1.329905517578125} +{"epoch": 0, "iter": 3742, "iter_tflops": 12.052050965671775, "iter_time": 1.3251996002197264, "loss": 0.5009868144989014, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.516536017649088, "step_time": 1.0293130569458007} +{"epoch": 0, "iter": 3743, "iter_tflops": 25.761873165059747, "iter_time": 0.6199616394042968, "loss": 0.34610795974731445, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 27.79652985889472, "step_time": 0.5745815467834472} +{"epoch": 0, "iter": 3744, "iter_tflops": 26.786724421778512, "iter_time": 0.5962421112060546, "loss": 0.48979151248931885, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 28.6378141888325, "step_time": 0.5577022399902344} +{"epoch": 0, "iter": 3745, "iter_tflops": 29.888303096135918, "iter_time": 0.6902731628417969, "loss": 0.20736832916736603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.03664048177784, "step_time": 0.6244912681579591} +{"epoch": 0, "iter": 3746, "iter_tflops": 35.86602587355898, "iter_time": 0.5752266387939453, "loss": 0.3974831700325012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.367334165187756, "step_time": 0.5240663089752197} +{"epoch": 0, "iter": 3747, "iter_tflops": 41.182264107173594, "iter_time": 0.5009703559875488, "loss": 0.39730045199394226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.109945575210205, "step_time": 0.4573513278961182} +{"epoch": 0, "iter": 3748, "iter_tflops": 41.11733743400366, "iter_time": 0.501761417388916, "loss": 0.31087103486061096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.953654152701844, "step_time": 0.4589414119720459} +{"epoch": 0, "iter": 3749, "iter_tflops": 30.081518339628726, "iter_time": 0.6858395004272462, "loss": 0.7826438546180725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.08767529212715, "step_time": 0.6235280456542969} +{"epoch": 0, "iter": 3750, "iter_tflops": 32.09402694901671, "iter_time": 0.6428328094482423, "loss": 1.0932421684265137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.80290313776408, "step_time": 0.5927980613708497} +{"epoch": 0, "iter": 3751, "iter_tflops": 34.016760329224624, "iter_time": 0.6064978942871093, "loss": 0.9323025941848755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.022378434612854, "step_time": 0.5572600784301758} +{"epoch": 0, "iter": 3752, "iter_tflops": 32.32891923103813, "iter_time": 0.6381621780395507, "loss": 0.9171904921531677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93034644764621, "step_time": 0.5906352386474609} +{"epoch": 0, "iter": 3753, "iter_tflops": 35.89702006013946, "iter_time": 0.5320416564941405, "loss": 0.01068841852247715, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 40.29701950828871, "step_time": 0.4739484519958496} +{"epoch": 0, "iter": 3754, "iter_tflops": 40.76744423107142, "iter_time": 0.4684794540405274, "loss": 0.01045010332018137, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 44.939373897426215, "step_time": 0.42498834228515625} +{"epoch": 0, "iter": 3755, "iter_tflops": 42.563037373371216, "iter_time": 0.44871586227416993, "loss": 0.005923903081566095, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 47.2057764020347, "step_time": 0.40458417320251466} +{"epoch": 0, "iter": 3756, "iter_tflops": 36.83964612624335, "iter_time": 0.5184281616210937, "loss": 0.010053194127976894, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 40.75878379569419, "step_time": 0.46857899665832514} +{"epoch": 0, "iter": 3757, "iter_tflops": 20.972474161563603, "iter_time": 0.9837224426269531, "loss": 0.08045642822980881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.540476064607777, "step_time": 0.9152909393310548} +{"epoch": 0, "iter": 3758, "iter_tflops": 14.020845204525974, "iter_time": 1.4714586181640623, "loss": 0.163307324051857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.633710669846774, "step_time": 1.169980266571045} +{"epoch": 0, "iter": 3759, "iter_tflops": 52.147931191518026, "iter_time": 0.3956263084411621, "loss": 0.08919073641300201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.81577595705962, "step_time": 0.36312262153625485} +{"epoch": 0, "iter": 3760, "iter_tflops": 54.476985721936366, "iter_time": 0.3787120971679687, "loss": 0.08645452558994293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.373211428626284, "step_time": 0.3474815158843994} +{"epoch": 0, "iter": 3761, "iter_tflops": 44.9369806808428, "iter_time": 0.4591116981506348, "loss": 0.0030119731090962887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.34663278241486, "step_time": 0.41808513259887703} +{"epoch": 0, "iter": 3762, "iter_tflops": 39.70570998125197, "iter_time": 0.5196001663208009, "loss": 0.0050512379966676235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95417625950992, "step_time": 0.4693773212432861} +{"epoch": 0, "iter": 3763, "iter_tflops": 47.27217018258024, "iter_time": 0.43643212127685543, "loss": 0.009899892844259739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33787039612359, "step_time": 0.3941905422210693} +{"epoch": 0, "iter": 3764, "iter_tflops": 50.88260978603699, "iter_time": 0.4054645309448242, "loss": 0.01815468817949295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.35716274513292, "step_time": 0.3660775756835938} +{"epoch": 0, "iter": 3765, "iter_tflops": 20.29950788188812, "iter_time": 1.0163346633911132, "loss": 0.2529195249080658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.950535881156085, "step_time": 0.9398901977539063} +{"epoch": 0, "iter": 3766, "iter_tflops": 17.27139919619892, "iter_time": 1.1945235748291017, "loss": 0.14493469893932343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.763157301914834, "step_time": 0.993639513015747} +{"epoch": 0, "iter": 3767, "iter_tflops": 46.95246734169777, "iter_time": 0.43940382003784173, "loss": 0.3041360080242157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74752594992427, "step_time": 0.4065438289642334} +{"epoch": 0, "iter": 3768, "iter_tflops": 49.66254661098437, "iter_time": 0.4154256057739258, "loss": 0.18721261620521545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.129546282884725, "step_time": 0.3811429233551025} +{"epoch": 0, "iter": 3769, "iter_tflops": 36.58031970326438, "iter_time": 0.5639943466186523, "loss": 0.012971173040568829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.348544046874565, "step_time": 0.5243165664672852} +{"epoch": 0, "iter": 3770, "iter_tflops": 19.620767111893567, "iter_time": 1.0514927062988282, "loss": 0.012631484307348728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.358247996515903, "step_time": 0.7027358551025391} +{"epoch": 0, "iter": 3771, "iter_tflops": 52.839892836308906, "iter_time": 0.3904454078674316, "loss": 0.0054526543244719505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.14576736903081, "step_time": 0.3548167724609375} +{"epoch": 0, "iter": 3772, "iter_tflops": 55.169333260436744, "iter_time": 0.3739594497680664, "loss": 0.02925381250679493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.87876500132312, "step_time": 0.3388881740570069} +{"epoch": 0, "iter": 3773, "iter_tflops": 34.44468131848077, "iter_time": 0.5989631118774413, "loss": 0.050989627838134766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.76858570553633, "step_time": 0.5611065292358399} +{"epoch": 0, "iter": 3774, "iter_tflops": 18.870830787734576, "iter_time": 1.093279556274414, "loss": 0.04779129475355148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.971035956724844, "step_time": 0.9390132331848144} +{"epoch": 0, "iter": 3775, "iter_tflops": 41.837900237573116, "iter_time": 0.4931197166442871, "loss": 0.05460565909743309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.36675291099499, "step_time": 0.444954460144043} +{"epoch": 0, "iter": 3776, "iter_tflops": 47.50687164072597, "iter_time": 0.4342759857177735, "loss": 0.08027493953704834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38713909527945, "step_time": 0.3938198165893555} +{"epoch": 0, "iter": 3777, "iter_tflops": 16.25929418868454, "iter_time": 0.8466468505859375, "loss": 0.006790434010326862, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 17.352326602941826, "step_time": 0.7933161087036134} +{"epoch": 0, "iter": 3778, "iter_tflops": 14.136389472269205, "iter_time": 0.9737903900146485, "loss": 0.0026884444523602724, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 17.307632064743096, "step_time": 0.7953647365570069} +{"epoch": 0, "iter": 3779, "iter_tflops": 36.15307361352451, "iter_time": 0.3807665252685547, "loss": 0.004179972689598799, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 41.263429000637714, "step_time": 0.33360970115661626} +{"epoch": 0, "iter": 3780, "iter_tflops": 38.43120771816053, "iter_time": 0.3581953582763672, "loss": 0.0012590098194777966, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 42.18110941442372, "step_time": 0.3263517818450928} +{"epoch": 0, "iter": 3781, "iter_tflops": 39.67864718193951, "iter_time": 0.5199545593261719, "loss": 0.09534622728824615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.158259088126286, "step_time": 0.4780334968566895} +{"epoch": 0, "iter": 3782, "iter_tflops": 43.94503310162158, "iter_time": 0.4694749794006348, "loss": 0.09109032154083252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.399870656506636, "step_time": 0.41763456535339355} +{"epoch": 0, "iter": 3783, "iter_tflops": 46.90011613448124, "iter_time": 0.4398942947387695, "loss": 0.10988882184028625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.915103163951585, "step_time": 0.4052057685852051} +{"epoch": 0, "iter": 3784, "iter_tflops": 52.61281500510359, "iter_time": 0.3921305770874023, "loss": 0.08964903652667999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.51176251093818, "step_time": 0.3587282428741455} +{"epoch": 0, "iter": 3785, "iter_tflops": 24.167880197782075, "iter_time": 0.8536575546264648, "loss": 0.7824999094009399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.31963497147996, "step_time": 0.8148258666992187} +{"epoch": 0, "iter": 3786, "iter_tflops": 17.30017318838052, "iter_time": 1.192536819458008, "loss": 0.9258427023887634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.324849826145805, "step_time": 0.9674672355651855} +{"epoch": 0, "iter": 3787, "iter_tflops": 36.58838684235416, "iter_time": 0.5638699951171875, "loss": 0.9708341956138611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.94934901490712, "step_time": 0.5038198165893555} +{"epoch": 0, "iter": 3788, "iter_tflops": 37.89439176100944, "iter_time": 0.5444365921020508, "loss": 0.8130792379379272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96842256371624, "step_time": 0.5035852546691895} +{"epoch": 0, "iter": 3789, "iter_tflops": 36.319225058783864, "iter_time": 0.5680488357543946, "loss": 0.6848952770233154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09601822769519, "step_time": 0.5145422019958497} +{"epoch": 0, "iter": 3790, "iter_tflops": 36.22184072612807, "iter_time": 0.5695760650634766, "loss": 0.6407978534698486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09395830346404, "step_time": 0.5145686378479004} +{"epoch": 0, "iter": 3791, "iter_tflops": 39.16172198413089, "iter_time": 0.5268178329467774, "loss": 0.5258235931396484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91175339392227, "step_time": 0.4807795505523682} +{"epoch": 0, "iter": 3792, "iter_tflops": 40.651188540958046, "iter_time": 0.50751513671875, "loss": 0.7302985191345215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17742936881768, "step_time": 0.4670052967071533} +{"epoch": 0, "iter": 3793, "iter_tflops": 22.271594979509448, "iter_time": 0.9263410873413085, "loss": 0.16787312924861908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.950869557181232, "step_time": 0.8613922538757325} +{"epoch": 0, "iter": 3794, "iter_tflops": 18.405736201501114, "iter_time": 1.1209056396484376, "loss": 0.16126176714897156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.93584192940516, "step_time": 0.9854436988830566} +{"epoch": 0, "iter": 3795, "iter_tflops": 42.81396017918832, "iter_time": 0.48187771987915035, "loss": 0.13881368935108185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13319103561528, "step_time": 0.43771900558471677} +{"epoch": 0, "iter": 3796, "iter_tflops": 42.326365801752345, "iter_time": 0.4874288902282715, "loss": 0.23710674047470093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06450889555796, "step_time": 0.44787394905090333} +{"epoch": 0, "iter": 3797, "iter_tflops": 27.76532734849307, "iter_time": 0.7430524139404298, "loss": 0.5569498538970947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.94172334558346, "step_time": 0.689041618347168} +{"epoch": 0, "iter": 3798, "iter_tflops": 6.725257988194054, "iter_time": 3.067702911376953, "loss": 0.8258422613143921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.129337677757185, "step_time": 2.5378566284179684} +{"epoch": 0, "iter": 3799, "iter_tflops": 15.420701600301147, "iter_time": 1.3378829345703123, "loss": 0.7567961812019348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.592719819416534, "step_time": 1.1096328945159915} +{"epoch": 0, "iter": 3800, "iter_tflops": 36.4026479293991, "iter_time": 0.5667470550537109, "loss": 0.9060836434364319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86221835125463, "step_time": 0.517560094833374} +{"epoch": 0, "iter": 3801, "iter_tflops": 14.980605404999471, "iter_time": 1.0442678909301757, "loss": 0.4047735035419464, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 15.899214638951776, "step_time": 0.983933204650879} +{"epoch": 0, "iter": 3802, "iter_tflops": 11.362555793182405, "iter_time": 1.376782257080078, "loss": 0.36086305975914, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 15.283929774977892, "step_time": 1.023543384552002} +{"epoch": 0, "iter": 3803, "iter_tflops": 26.770141367462173, "iter_time": 0.5843736495971679, "loss": 0.5838760137557983, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.375625790593705, "step_time": 0.5513099632263184} +{"epoch": 0, "iter": 3804, "iter_tflops": 29.481326389272038, "iter_time": 0.5306330184936523, "loss": 0.28979894518852234, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 31.316307679645597, "step_time": 0.49954053878784177} +{"epoch": 0, "iter": 3805, "iter_tflops": 35.09303577606278, "iter_time": 0.5878970870971679, "loss": 0.02414357289671898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.925623627056694, "step_time": 0.5439882469177246} +{"epoch": 0, "iter": 3806, "iter_tflops": 17.468915866224663, "iter_time": 1.1810173950195313, "loss": 0.047277115285396576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.011472955128834, "step_time": 0.9818965835571287} +{"epoch": 0, "iter": 3807, "iter_tflops": 38.90032821177551, "iter_time": 0.5303578262329102, "loss": 0.030994676053524017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53688393950944, "step_time": 0.4850165691375733} +{"epoch": 0, "iter": 3808, "iter_tflops": 46.88709361902536, "iter_time": 0.4400164718627929, "loss": 0.02242039144039154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68285776361618, "step_time": 0.39918639183044435} +{"epoch": 0, "iter": 3809, "iter_tflops": 21.962566211335332, "iter_time": 0.939375358581543, "loss": 1.1397974491119385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.600230018585318, "step_time": 0.8741903572082519} +{"epoch": 0, "iter": 3810, "iter_tflops": 14.21199316174996, "iter_time": 1.4516678466796875, "loss": 0.9490121006965637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.510622387142615, "step_time": 1.2495648574829101} +{"epoch": 0, "iter": 3811, "iter_tflops": 42.2914361991494, "iter_time": 0.487831470489502, "loss": 0.7544711828231812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.40389466766153, "step_time": 0.45439039230346684} +{"epoch": 0, "iter": 3812, "iter_tflops": 40.34020473468791, "iter_time": 0.5114275856018067, "loss": 0.6648231148719788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95852058205341, "step_time": 0.4802561454772949} +{"epoch": 0, "iter": 3813, "iter_tflops": 45.72953313498064, "iter_time": 0.4511546936035157, "loss": 0.3913884460926056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94215505965873, "step_time": 0.4130997848510742} +{"epoch": 0, "iter": 3814, "iter_tflops": 44.54122661038805, "iter_time": 0.46319095993042, "loss": 0.35497477650642395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46748129606619, "step_time": 0.4256687774658203} +{"epoch": 0, "iter": 3815, "iter_tflops": 45.45279633997365, "iter_time": 0.45390152359008784, "loss": 0.27253493666648865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.14696982863641, "step_time": 0.4197836322784424} +{"epoch": 0, "iter": 3816, "iter_tflops": 53.184907802746494, "iter_time": 0.3879125556945801, "loss": 0.44951871037483215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.52449591758465, "step_time": 0.35864883613586424} +{"epoch": 0, "iter": 3817, "iter_tflops": 47.885700575369334, "iter_time": 0.43084038162231447, "loss": 0.0024634168948978186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81943472685647, "step_time": 0.3905966358184814} +{"epoch": 0, "iter": 3818, "iter_tflops": 41.474459157383926, "iter_time": 0.4974409294128418, "loss": 0.02254752814769745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50726035115204, "step_time": 0.44361016654968266} +{"epoch": 0, "iter": 3819, "iter_tflops": 43.70207141337522, "iter_time": 0.47208502578735356, "loss": 0.006772915367037058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.401787015863796, "step_time": 0.426246524810791} +{"epoch": 0, "iter": 3820, "iter_tflops": 49.48947145484945, "iter_time": 0.4168784370422363, "loss": 0.0032310951501131058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.794014049768926, "step_time": 0.3765209369659424} +{"epoch": 0, "iter": 3821, "iter_tflops": 15.735520237627766, "iter_time": 1.3111160736083984, "loss": 0.3675098419189453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.847408095580473, "step_time": 1.2245856094360352} +{"epoch": 0, "iter": 3822, "iter_tflops": 19.64552007126601, "iter_time": 1.0501678466796875, "loss": 0.33841606974601746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.87736693330991, "step_time": 0.7400660743713379} +{"epoch": 0, "iter": 3823, "iter_tflops": 48.57878936062982, "iter_time": 0.4246934471130371, "loss": 0.45463988184928894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75178466599308, "step_time": 0.3910975456237793} +{"epoch": 0, "iter": 3824, "iter_tflops": 47.81342810943309, "iter_time": 0.43149161911010747, "loss": 0.5074220895767212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.58756938753496, "step_time": 0.39992373657226565} +{"epoch": 0, "iter": 3825, "iter_tflops": 40.94103020151261, "iter_time": 0.5039221878051757, "loss": 0.8660465478897095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37779522161569, "step_time": 0.4648967666625976} +{"epoch": 0, "iter": 3826, "iter_tflops": 37.086103648007864, "iter_time": 0.5563025360107421, "loss": 0.9872764348983765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51224986276, "step_time": 0.5092556838989258} +{"epoch": 0, "iter": 3827, "iter_tflops": 41.060261144207395, "iter_time": 0.502458896636963, "loss": 1.0260732173919678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.752162966320945, "step_time": 0.46100774002075195} +{"epoch": 0, "iter": 3828, "iter_tflops": 35.192672198501626, "iter_time": 0.5862326507568358, "loss": 0.9672534465789795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.348471762494164, "step_time": 0.5379899787902832} +{"epoch": 0, "iter": 3829, "iter_tflops": 21.05228217415405, "iter_time": 0.5786347198486328, "loss": 0.007776058278977871, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 23.623206389770715, "step_time": 0.515661642074585} +{"epoch": 0, "iter": 3830, "iter_tflops": 25.782793644019577, "iter_time": 0.4724694137573242, "loss": 0.0057993424125015736, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 28.788475960358472, "step_time": 0.42314089202880856} +{"epoch": 0, "iter": 3831, "iter_tflops": 24.710629057776206, "iter_time": 0.492969295501709, "loss": 0.00481398543342948, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 27.54075148600732, "step_time": 0.44231114768981933} +{"epoch": 0, "iter": 3832, "iter_tflops": 22.95855902375197, "iter_time": 0.5305899810791016, "loss": 0.007599686738103628, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 25.513698893726637, "step_time": 0.4774525814056396} +{"epoch": 0, "iter": 3833, "iter_tflops": 28.57919954974333, "iter_time": 0.7218919296264649, "loss": 0.025320475921034813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.98137814018308, "step_time": 0.6659191665649413} +{"epoch": 0, "iter": 3834, "iter_tflops": 7.914023443993516, "iter_time": 2.606903259277344, "loss": 0.11391554027795792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.982452873179518, "step_time": 2.296821792602539} +{"epoch": 0, "iter": 3835, "iter_tflops": 10.541892623377057, "iter_time": 1.9570578308105469, "loss": 0.07036709040403366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.49906677541401, "step_time": 1.6506107120513915} +{"epoch": 0, "iter": 3836, "iter_tflops": 40.49485141791081, "iter_time": 0.5094744834899902, "loss": 0.052078720182180405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82556350933189, "step_time": 0.4602528533935547} +{"epoch": 0, "iter": 3837, "iter_tflops": 11.644575053040805, "iter_time": 1.3997318878173828, "loss": 0.5863932967185974, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 12.415281771334016, "step_time": 1.312840362548828} +{"epoch": 0, "iter": 3838, "iter_tflops": 16.571591959273615, "iter_time": 0.9835677261352539, "loss": 0.3363650143146515, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 20.458908342081806, "step_time": 0.7966839065551757} +{"epoch": 0, "iter": 3839, "iter_tflops": 24.409446342871767, "iter_time": 0.667744888305664, "loss": 0.4563886821269989, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.145845819895065, "step_time": 0.6233985748291015} +{"epoch": 0, "iter": 3840, "iter_tflops": 26.19646318668417, "iter_time": 0.6221940307617189, "loss": 0.4743807911872864, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 28.077941242161838, "step_time": 0.5805013580322265} +{"epoch": 0, "iter": 3841, "iter_tflops": 18.148266830560566, "iter_time": 1.1368079223632812, "loss": 0.3173883557319641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.767039075455063, "step_time": 1.0437118797302247} +{"epoch": 0, "iter": 3842, "iter_tflops": 46.064541264236915, "iter_time": 0.4478736343383789, "loss": 0.2145756483078003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0438084525481, "step_time": 0.41226066017150875} +{"epoch": 0, "iter": 3843, "iter_tflops": 52.99520292334478, "iter_time": 0.3893011512756348, "loss": 0.22274310886859894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88606107018358, "step_time": 0.35640866088867185} +{"epoch": 0, "iter": 3844, "iter_tflops": 47.426926105285865, "iter_time": 0.4350080261230469, "loss": 0.1535276174545288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.6923515930854, "step_time": 0.39911307716369626} +{"epoch": 0, "iter": 3845, "iter_tflops": 31.23617255052311, "iter_time": 0.6604872436523437, "loss": 0.51667720079422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.24529222758442, "step_time": 0.6205718803405762} +{"epoch": 0, "iter": 3846, "iter_tflops": 10.429966041659329, "iter_time": 1.9780595092773436, "loss": 0.4689770042896271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.45974418053093, "step_time": 1.6558199920654297} +{"epoch": 0, "iter": 3847, "iter_tflops": 12.705298640401374, "iter_time": 1.6238180694580078, "loss": 0.5009593963623047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.907417749967628, "step_time": 1.383948169708252} +{"epoch": 0, "iter": 3848, "iter_tflops": 32.97134074114585, "iter_time": 0.6257280731201171, "loss": 0.4924399256706238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.232387596721196, "step_time": 0.5127981395721435} +{"epoch": 0, "iter": 3849, "iter_tflops": 15.242476562839077, "iter_time": 1.0854806671142578, "loss": 0.4762348532676697, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.057998511835258, "step_time": 1.0303534164428712} +{"epoch": 0, "iter": 3850, "iter_tflops": 5.576203900011147, "iter_time": 2.967146453857422, "loss": 0.5365375280380249, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 6.797678899063101, "step_time": 2.4339798736572265} +{"epoch": 0, "iter": 3851, "iter_tflops": 14.022761318436169, "iter_time": 1.17989697265625, "loss": 0.5054122805595398, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 15.491627385826273, "step_time": 1.0680229530334473} +{"epoch": 0, "iter": 3852, "iter_tflops": 24.925590180284914, "iter_time": 0.6637922515869141, "loss": 0.5356930494308472, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 26.721876904847104, "step_time": 0.6191710891723633} +{"epoch": 0, "iter": 3853, "iter_tflops": 21.27804698889946, "iter_time": 0.7968780593872071, "loss": 0.3713846802711487, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 23.225291946325076, "step_time": 0.7300665512084961} +{"epoch": 0, "iter": 3854, "iter_tflops": 24.71907405629148, "iter_time": 0.6859483795166016, "loss": 0.34036025404930115, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 26.48721487895509, "step_time": 0.6401582374572753} +{"epoch": 0, "iter": 3855, "iter_tflops": 23.56923911059001, "iter_time": 0.7194126510620116, "loss": 0.47569698095321655, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 25.34540202956375, "step_time": 0.668997428894043} +{"epoch": 0, "iter": 3856, "iter_tflops": 26.28554827644103, "iter_time": 0.6450696258544922, "loss": 0.402642160654068, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 28.213763399787265, "step_time": 0.6009835891723633} +{"epoch": 0, "iter": 3857, "iter_tflops": 20.634661660523463, "iter_time": 0.9998270797729493, "loss": 0.30558404326438904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.285739626404105, "step_time": 0.9257531433105467} +{"epoch": 0, "iter": 3858, "iter_tflops": 30.825030121737537, "iter_time": 0.6692967834472656, "loss": 0.43180224299430847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39213315897768, "step_time": 0.5998782749176026} +{"epoch": 0, "iter": 3859, "iter_tflops": 50.45223583923401, "iter_time": 0.4089232749938965, "loss": 0.23322521150112152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.141484430981336, "step_time": 0.3741483154296875} +{"epoch": 0, "iter": 3860, "iter_tflops": 53.162824796269625, "iter_time": 0.38807368850708, "loss": 0.3637230396270752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.46410147708437, "step_time": 0.3590257740020752} +{"epoch": 0, "iter": 3861, "iter_tflops": 27.334641854282136, "iter_time": 0.7547599716186523, "loss": 0.13535240292549133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.058014010552313, "step_time": 0.7099966812133789} +{"epoch": 0, "iter": 3862, "iter_tflops": 17.22543853661463, "iter_time": 1.1977107849121096, "loss": 0.22254501283168793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.4549574310462, "step_time": 1.00861092376709} +{"epoch": 0, "iter": 3863, "iter_tflops": 37.662947467764525, "iter_time": 0.5477822341918945, "loss": 0.12984824180603027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62804501178632, "step_time": 0.4956056308746337} +{"epoch": 0, "iter": 3864, "iter_tflops": 37.47642063590615, "iter_time": 0.5505086441040039, "loss": 0.12772029638290405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.096034840912246, "step_time": 0.502021511077881} +{"epoch": 0, "iter": 3865, "iter_tflops": 19.796265834877918, "iter_time": 1.042170967102051, "loss": 0.7373583912849426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15101899935689, "step_time": 0.9754184188842774} +{"epoch": 0, "iter": 3866, "iter_tflops": 13.857806426094484, "iter_time": 1.4887705078124998, "loss": 0.8384088277816772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.90551095501801, "step_time": 1.29710347366333} +{"epoch": 0, "iter": 3867, "iter_tflops": 39.09558098337131, "iter_time": 0.5277090911865235, "loss": 0.8461323380470276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83173047779026, "step_time": 0.4816777954101562} +{"epoch": 0, "iter": 3868, "iter_tflops": 38.039984522145964, "iter_time": 0.5423528366088867, "loss": 0.8393421769142151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.139217257180924, "step_time": 0.5014945564270019} +{"epoch": 0, "iter": 3869, "iter_tflops": 36.70977946335156, "iter_time": 0.5620053787231446, "loss": 0.009765778668224812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01859436482025, "step_time": 0.5029692955017089} +{"epoch": 0, "iter": 3870, "iter_tflops": 37.52364067191288, "iter_time": 0.5498158798217774, "loss": 0.00392143614590168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36305063876172, "step_time": 0.4987807521820068} +{"epoch": 0, "iter": 3871, "iter_tflops": 52.66982894775637, "iter_time": 0.39170610427856445, "loss": 0.0009997115703299642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.80457601565838, "step_time": 0.35691107749938966} +{"epoch": 0, "iter": 3872, "iter_tflops": 61.781179825895265, "iter_time": 0.33393815994262693, "loss": 0.016663305461406708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.52631870016657, "step_time": 0.30552670288085937} +{"epoch": 0, "iter": 3873, "iter_tflops": 32.09300463355435, "iter_time": 0.6428532867431641, "loss": 1.07945716381073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.32884839701202, "step_time": 0.6009841423034669} +{"epoch": 0, "iter": 3874, "iter_tflops": 12.82000954333774, "iter_time": 1.6092884674072265, "loss": 0.836778461933136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.842840577556123, "step_time": 1.3899693527221677} +{"epoch": 0, "iter": 3875, "iter_tflops": 37.65606280731749, "iter_time": 0.5478823852539062, "loss": 1.0593957901000977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14113170735107, "step_time": 0.5014712200164795} +{"epoch": 0, "iter": 3876, "iter_tflops": 34.00505841537069, "iter_time": 0.6067066040039063, "loss": 0.6386620998382568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.87478955056784, "step_time": 0.5594904747009277} +{"epoch": 0, "iter": 3877, "iter_tflops": 19.787666276426606, "iter_time": 1.0426238861083983, "loss": 0.8466092348098755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.097226123804862, "step_time": 0.9779055023193359} +{"epoch": 0, "iter": 3878, "iter_tflops": 15.992424391714072, "iter_time": 1.2900541534423826, "loss": 0.7465381622314453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57760543260567, "step_time": 1.0538108749389647} +{"epoch": 0, "iter": 3879, "iter_tflops": 36.72800905468312, "iter_time": 0.561726432800293, "loss": 0.6333348751068115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.026040244903626, "step_time": 0.5154417819976806} +{"epoch": 0, "iter": 3880, "iter_tflops": 39.39135721251875, "iter_time": 0.5237467041015625, "loss": 0.7938873767852783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90094148375511, "step_time": 0.4809007167816163} +{"epoch": 0, "iter": 3881, "iter_tflops": 15.487265122966276, "iter_time": 0.9230535278320312, "loss": 0.16396166384220123, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 16.64803901187378, "step_time": 0.8586942100524902} +{"epoch": 0, "iter": 3882, "iter_tflops": 13.188062598349788, "iter_time": 1.0839783782958985, "loss": 0.06155601888895035, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 16.021071295372643, "step_time": 0.8922983016967774} +{"epoch": 0, "iter": 3883, "iter_tflops": 35.65816302758581, "iter_time": 0.4009060897827148, "loss": 0.07209578901529312, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 39.01813113689929, "step_time": 0.3663828659057617} +{"epoch": 0, "iter": 3884, "iter_tflops": 34.81659756143539, "iter_time": 0.41059654617309577, "loss": 0.13984277844429016, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 37.93189315437699, "step_time": 0.3768748016357422} +{"epoch": 0, "iter": 3885, "iter_tflops": 43.14298028260688, "iter_time": 0.4782027893066406, "loss": 0.6970177292823792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99190233691354, "step_time": 0.43903507804870606} +{"epoch": 0, "iter": 3886, "iter_tflops": 37.0574454482978, "iter_time": 0.5567327499389648, "loss": 0.7992129921913147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.985411820074255, "step_time": 0.5033765087127684} +{"epoch": 0, "iter": 3887, "iter_tflops": 38.62721241641153, "iter_time": 0.5341077499389648, "loss": 0.6054714322090149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25951984573548, "step_time": 0.48819990348815917} +{"epoch": 0, "iter": 3888, "iter_tflops": 37.898310093295926, "iter_time": 0.5443803024291992, "loss": 0.7616250514984131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4874937188793, "step_time": 0.4972846431732178} +{"epoch": 0, "iter": 3889, "iter_tflops": 18.460093705429635, "iter_time": 1.1176050262451172, "loss": 0.7536527514457703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.73818086696241, "step_time": 1.045237838745117} +{"epoch": 0, "iter": 3890, "iter_tflops": 17.77636676436255, "iter_time": 1.1605911254882812, "loss": 0.8813682198524475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.840336630461024, "step_time": 0.9899597053527831} +{"epoch": 0, "iter": 3891, "iter_tflops": 39.69788873445751, "iter_time": 0.5197025375366211, "loss": 1.090222954750061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27114549004981, "step_time": 0.4767863960266114} +{"epoch": 0, "iter": 3892, "iter_tflops": 36.068568040388925, "iter_time": 0.571996467590332, "loss": 1.1726676225662231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.30339887135584, "step_time": 0.5249188137054444} +{"epoch": 0, "iter": 3893, "iter_tflops": 19.355665993330355, "iter_time": 1.065894271850586, "loss": 0.6698366403579712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.799427610587415, "step_time": 0.9919067916870117} +{"epoch": 0, "iter": 3894, "iter_tflops": 16.60683656806352, "iter_time": 1.2423253173828124, "loss": 0.4677417278289795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.151082940537943, "step_time": 1.077280776977539} +{"epoch": 0, "iter": 3895, "iter_tflops": 44.32011501839829, "iter_time": 0.46550180435180666, "loss": 0.6213307976722717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.48560845810125, "step_time": 0.42550963401794434} +{"epoch": 0, "iter": 3896, "iter_tflops": 44.49627765906603, "iter_time": 0.46365886306762705, "loss": 0.5859487652778625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.220170731588006, "step_time": 0.42785193824768064} +{"epoch": 0, "iter": 3897, "iter_tflops": 42.57231064441464, "iter_time": 0.4846129608154296, "loss": 0.5910395979881287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.141893242500714, "step_time": 0.44712282180786134} +{"epoch": 0, "iter": 3898, "iter_tflops": 42.89971658200663, "iter_time": 0.4809144477844238, "loss": 0.5754929780960083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.172596039089804, "step_time": 0.4468255043029785} +{"epoch": 0, "iter": 3899, "iter_tflops": 49.790114275353275, "iter_time": 0.41436124038696287, "loss": 0.8017132878303528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.9024007508099, "step_time": 0.38274906539917} +{"epoch": 0, "iter": 3900, "iter_tflops": 48.565712095334135, "iter_time": 0.424807804107666, "loss": 0.7501996755599976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27688481169303, "step_time": 0.3946504001617432} +{"epoch": 0, "iter": 3901, "iter_tflops": 35.78952110914, "iter_time": 0.5764562606811523, "loss": 0.3466646075248718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36126203729906, "step_time": 0.537810604095459} +{"epoch": 0, "iter": 3902, "iter_tflops": 19.9430091786561, "iter_time": 1.0345025329589843, "loss": 0.3714008331298828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.475718831132774, "step_time": 0.9179280834197998} +{"epoch": 0, "iter": 3903, "iter_tflops": 39.58030895304393, "iter_time": 0.5212463989257813, "loss": 0.3269903063774109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32491344566533, "step_time": 0.47619468498229983} +{"epoch": 0, "iter": 3904, "iter_tflops": 40.30753122611193, "iter_time": 0.5118421516418457, "loss": 0.29267656803131104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.63139556730544, "step_time": 0.4728497276306152} +{"epoch": 0, "iter": 3905, "iter_tflops": 28.243261401441504, "iter_time": 0.7304784393310547, "loss": 0.8960567116737366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.69312511405712, "step_time": 0.6721731147766113} +{"epoch": 0, "iter": 3906, "iter_tflops": 8.857613288192539, "iter_time": 2.3291932983398436, "loss": 0.6666468977928162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.09169204177688, "step_time": 2.0443641586303714} +{"epoch": 0, "iter": 3907, "iter_tflops": 17.320838980925185, "iter_time": 1.1911139831542967, "loss": 0.8745272755622864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.512366560950074, "step_time": 1.0057880668640136} +{"epoch": 0, "iter": 3908, "iter_tflops": 36.39029276145211, "iter_time": 0.5669394760131836, "loss": 0.9188710451126099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44767579533453, "step_time": 0.5229989624023437} +{"epoch": 0, "iter": 3909, "iter_tflops": 20.6664538968241, "iter_time": 0.824436767578125, "loss": 0.49675360321998596, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 22.536376176013043, "step_time": 0.7560303535461426} +{"epoch": 0, "iter": 3910, "iter_tflops": 25.833763296294816, "iter_time": 0.6595316467285156, "loss": 0.5109538435935974, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 27.813575360717667, "step_time": 0.6125851936340332} +{"epoch": 0, "iter": 3911, "iter_tflops": 25.312805093654994, "iter_time": 0.6731053466796876, "loss": 0.4549960792064667, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 27.110662379958015, "step_time": 0.6284680252075195} +{"epoch": 0, "iter": 3912, "iter_tflops": 25.86861552130727, "iter_time": 0.6586430740356445, "loss": 0.3995104432106018, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 27.81635412879594, "step_time": 0.6125239982604982} +{"epoch": 0, "iter": 3913, "iter_tflops": 35.05759049217185, "iter_time": 0.5884914855957032, "loss": 0.0352400541305542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4593867401732, "step_time": 0.5228437442779541} +{"epoch": 0, "iter": 3914, "iter_tflops": 38.67951104703104, "iter_time": 0.5333855819702149, "loss": 0.06223258376121521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65516408573066, "step_time": 0.48367164802551266} +{"epoch": 0, "iter": 3915, "iter_tflops": 41.80969006646757, "iter_time": 0.49345243835449215, "loss": 0.02182193100452423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24059084417604, "step_time": 0.44616846656799314} +{"epoch": 0, "iter": 3916, "iter_tflops": 46.71596798381172, "iter_time": 0.4416282997131347, "loss": 0.04758227989077568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.46728616196112, "step_time": 0.4008583908081055} +{"epoch": 0, "iter": 3917, "iter_tflops": 24.37346970981018, "iter_time": 0.8464569778442382, "loss": 0.34369736909866333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.436584072912314, "step_time": 0.7803993682861328} +{"epoch": 0, "iter": 3918, "iter_tflops": 24.76422311421957, "iter_time": 0.8331007766723632, "loss": 0.3897230625152588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.85253201647199, "step_time": 0.740725959777832} +{"epoch": 0, "iter": 3919, "iter_tflops": 46.75083377763201, "iter_time": 0.441298942565918, "loss": 0.35512614250183105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.46020450721167, "step_time": 0.40885869789123536} +{"epoch": 0, "iter": 3920, "iter_tflops": 47.12750018805184, "iter_time": 0.43777186203002927, "loss": 0.27639657258987427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.99589479437469, "step_time": 0.40456381034851074} +{"epoch": 0, "iter": 3921, "iter_tflops": 48.37564923031856, "iter_time": 0.4264768295288086, "loss": 0.18607580661773682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.007221786849755, "step_time": 0.3892128810882568} +{"epoch": 0, "iter": 3922, "iter_tflops": 37.80165426158229, "iter_time": 0.5457722396850586, "loss": 0.141121506690979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73735466362598, "step_time": 0.48274147224426267} +{"epoch": 0, "iter": 3923, "iter_tflops": 37.21965005894503, "iter_time": 0.5543064880371094, "loss": 0.14252403378486633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.60758687679297, "step_time": 0.5080600719451904} +{"epoch": 0, "iter": 3924, "iter_tflops": 44.20699943017507, "iter_time": 0.46669291687011716, "loss": 0.1061495766043663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.530622873357565, "step_time": 0.42511495399475097} +{"epoch": 0, "iter": 3925, "iter_tflops": 18.482724414073893, "iter_time": 1.1162366027832031, "loss": 0.8862863183021545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.752619738785732, "step_time": 1.0444737854003907} +{"epoch": 0, "iter": 3926, "iter_tflops": 15.62764738316355, "iter_time": 1.3201663055419923, "loss": 1.0257622003555298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.396163755268244, "step_time": 1.1214889030456543} +{"epoch": 0, "iter": 3927, "iter_tflops": 41.78821472843501, "iter_time": 0.49370602798461916, "loss": 1.0219172239303589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78922318854759, "step_time": 0.4606262855529785} +{"epoch": 0, "iter": 3928, "iter_tflops": 43.7824418904213, "iter_time": 0.47121842956542964, "loss": 0.8337653279304504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75159920438419, "step_time": 0.44129171752929686} +{"epoch": 0, "iter": 3929, "iter_tflops": 28.907921311158095, "iter_time": 0.7136830520629882, "loss": 0.3943275213241577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.856993849709568, "step_time": 0.6686034812927246} +{"epoch": 0, "iter": 3930, "iter_tflops": 12.379692557763743, "iter_time": 1.666527130126953, "loss": 0.5098618268966675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.022685696790363, "step_time": 1.4712654876708986} +{"epoch": 0, "iter": 3931, "iter_tflops": 38.41403617312502, "iter_time": 0.5370717468261719, "loss": 0.590108335018158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.90645398945395, "step_time": 0.49231303405761717} +{"epoch": 0, "iter": 3932, "iter_tflops": 47.38184770322298, "iter_time": 0.4354218864440918, "loss": 0.6110222935676575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08100469574051, "step_time": 0.40388973617553714} +{"epoch": 0, "iter": 3933, "iter_tflops": 24.5299391843027, "iter_time": 0.8410576705932619, "loss": 0.07545152306556702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.74066181699042, "step_time": 0.801498176574707} +{"epoch": 0, "iter": 3934, "iter_tflops": 12.79449124513837, "iter_time": 1.6124981536865233, "loss": 0.08835706114768982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.417273705392553, "step_time": 1.1845191078186035} +{"epoch": 0, "iter": 3935, "iter_tflops": 46.73690003781674, "iter_time": 0.4414305076599121, "loss": 0.05733535811305046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.436257604996655, "step_time": 0.40905282211303706} +{"epoch": 0, "iter": 3936, "iter_tflops": 53.01259761553557, "iter_time": 0.38917341232299807, "loss": 0.0847804844379425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.16998296135534, "step_time": 0.36087282943725585} +{"epoch": 0, "iter": 3937, "iter_tflops": 32.03179301229531, "iter_time": 0.6440817565917969, "loss": 0.48174238204956055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.3539829760544, "step_time": 0.6005444412231445} +{"epoch": 0, "iter": 3938, "iter_tflops": 9.802010459094134, "iter_time": 2.1047818298339847, "loss": 0.4674091637134552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.257630124256483, "step_time": 1.8326320266723635} +{"epoch": 0, "iter": 3939, "iter_tflops": 11.52735905670675, "iter_time": 1.7897502288818359, "loss": 0.42553603649139404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.487490233080633, "step_time": 1.5296465950012206} +{"epoch": 0, "iter": 3940, "iter_tflops": 32.66075754091725, "iter_time": 0.631678352355957, "loss": 0.49066877365112305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.874114422371925, "step_time": 0.4702338447570801} +{"epoch": 0, "iter": 3941, "iter_tflops": 26.434987044738328, "iter_time": 0.6227864303588868, "loss": 0.4649083614349365, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 28.447553227311406, "step_time": 0.5787264404296876} +{"epoch": 0, "iter": 3942, "iter_tflops": 25.286639604474725, "iter_time": 0.6510691604614258, "loss": 0.31934377551078796, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.236785743284916, "step_time": 0.6044527931213378} +{"epoch": 0, "iter": 3943, "iter_tflops": 25.728007138280244, "iter_time": 0.6398999786376953, "loss": 0.41029855608940125, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.51920298471013, "step_time": 0.5982495651245118} +{"epoch": 0, "iter": 3944, "iter_tflops": 24.313021151633922, "iter_time": 0.6771413192749024, "loss": 0.5232006311416626, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.147260353591758, "step_time": 0.6296396255493164} +{"epoch": 0, "iter": 3945, "iter_tflops": 18.74148894943288, "iter_time": 1.100824676513672, "loss": 0.007173530291765928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.36676691447324, "step_time": 1.012978328704834} +{"epoch": 0, "iter": 3946, "iter_tflops": 17.64252245969824, "iter_time": 1.1693959045410156, "loss": 0.003233768744394183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.279724684525473, "step_time": 0.8862258377075195} +{"epoch": 0, "iter": 3947, "iter_tflops": 55.547770164941944, "iter_time": 0.37141173171997066, "loss": 0.0033843691926449537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.56136663954458, "step_time": 0.33513053131103515} +{"epoch": 0, "iter": 3948, "iter_tflops": 51.14623376596672, "iter_time": 0.4033746376037598, "loss": 0.03347497433423996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.72578432630972, "step_time": 0.3702252693176269} +{"epoch": 0, "iter": 3949, "iter_tflops": 36.9762843812821, "iter_time": 0.5579547500610351, "loss": 0.8407534956932068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81529977963725, "step_time": 0.5181699905395507} +{"epoch": 0, "iter": 3950, "iter_tflops": 13.631967453861526, "iter_time": 1.5134347686767577, "loss": 0.7513498663902283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.312529731741325, "step_time": 1.2647390594482422} +{"epoch": 0, "iter": 3951, "iter_tflops": 38.26114772040393, "iter_time": 0.5392178421020508, "loss": 0.7839629054069519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71161493698282, "step_time": 0.4946126766204834} +{"epoch": 0, "iter": 3952, "iter_tflops": 36.397603150684475, "iter_time": 0.5668256072998046, "loss": 0.6954159736633301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56628298164291, "step_time": 0.5214311771392822} +{"epoch": 0, "iter": 3953, "iter_tflops": 15.125415496967124, "iter_time": 1.3640017700195315, "loss": 0.8470838665962219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.932320367231606, "step_time": 1.294920829772949} +{"epoch": 0, "iter": 3954, "iter_tflops": 20.331738933548554, "iter_time": 1.0147235107421875, "loss": 0.9482080936431885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.491518871816066, "step_time": 0.8093316688537598} +{"epoch": 0, "iter": 3955, "iter_tflops": 45.30856638137453, "iter_time": 0.4553464202880859, "loss": 0.7057436108589172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62809432995653, "step_time": 0.42426284217834476} +{"epoch": 0, "iter": 3956, "iter_tflops": 45.82040916904413, "iter_time": 0.4502599143981934, "loss": 0.8509607315063477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.53557810389027, "step_time": 0.4164904155731201} +{"epoch": 0, "iter": 3957, "iter_tflops": 20.78650399630984, "iter_time": 0.9925234909057616, "loss": 0.03174574673175812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.669054999385832, "step_time": 0.9520993652343749} +{"epoch": 0, "iter": 3958, "iter_tflops": 15.187509800179905, "iter_time": 1.3584250335693362, "loss": 0.019175231456756592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.4140440218212, "step_time": 1.1203999233245852} +{"epoch": 0, "iter": 3959, "iter_tflops": 38.76978099529974, "iter_time": 0.5321436691284179, "loss": 0.022591527551412582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89242748988513, "step_time": 0.4809961738586426} +{"epoch": 0, "iter": 3960, "iter_tflops": 44.17080118528583, "iter_time": 0.4670753746032715, "loss": 0.07879235595464706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.491233790517576, "step_time": 0.42546027183532714} +{"epoch": 0, "iter": 3961, "iter_tflops": 27.349769005730952, "iter_time": 0.7543425140380859, "loss": 0.9253832697868347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.5794776737228, "step_time": 0.6974799804687501} +{"epoch": 0, "iter": 3962, "iter_tflops": 14.784019293596257, "iter_time": 1.3954996337890624, "loss": 0.7849485278129578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.0498333857199, "step_time": 1.2100466346740724} +{"epoch": 0, "iter": 3963, "iter_tflops": 27.32578709474765, "iter_time": 0.7550045471191407, "loss": 0.8051035404205322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.92505001242925, "step_time": 0.6081374530792236} +{"epoch": 0, "iter": 3964, "iter_tflops": 37.18397437472671, "iter_time": 0.5548383102416992, "loss": 0.8899968862533569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68984788903663, "step_time": 0.5070329475402832} +{"epoch": 0, "iter": 3965, "iter_tflops": 20.484549814384238, "iter_time": 0.7497043457031249, "loss": 0.5275921821594238, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 22.379350293085295, "step_time": 0.6862288589477539} +{"epoch": 0, "iter": 3966, "iter_tflops": 22.543086908770235, "iter_time": 0.6812445907592775, "loss": 0.5099827647209167, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.281230562937054, "step_time": 0.6324784889221191} +{"epoch": 0, "iter": 3967, "iter_tflops": 22.759750118900662, "iter_time": 0.6747594299316407, "loss": 0.4279186427593231, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.481564533137973, "step_time": 0.6273028831481933} +{"epoch": 0, "iter": 3968, "iter_tflops": 22.12595490212111, "iter_time": 0.6940878295898437, "loss": 0.38783907890319824, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.84269369694058, "step_time": 0.6441116180419921} +{"epoch": 0, "iter": 3969, "iter_tflops": 23.794012217520603, "iter_time": 0.6884634475708008, "loss": 0.04763378947973251, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.775576082229396, "step_time": 0.6355360450744629} +{"epoch": 0, "iter": 3970, "iter_tflops": 38.12509063438911, "iter_time": 0.42967262268066403, "loss": 0.026819678023457527, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 41.86566473631898, "step_time": 0.3912826366424561} +{"epoch": 0, "iter": 3971, "iter_tflops": 39.19303159674191, "iter_time": 0.41796480178833006, "loss": 0.04227551817893982, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 42.75939931028246, "step_time": 0.3831042518615722} +{"epoch": 0, "iter": 3972, "iter_tflops": 43.105481947648414, "iter_time": 0.38002840805053706, "loss": 0.031851381063461304, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 47.17162264738778, "step_time": 0.3472703876495361} +{"epoch": 0, "iter": 3973, "iter_tflops": 32.55856038349609, "iter_time": 0.6336611099243163, "loss": 0.40779921412467957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.72562065706452, "step_time": 0.5941173439025879} +{"epoch": 0, "iter": 3974, "iter_tflops": 17.84765484134172, "iter_time": 1.1559554290771483, "loss": 0.3009113669395447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.62396726178198, "step_time": 1.051321235656738} +{"epoch": 0, "iter": 3975, "iter_tflops": 40.65095051778471, "iter_time": 0.5075181083679199, "loss": 0.35827720165252686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.81990940937154, "step_time": 0.46031091499328614} +{"epoch": 0, "iter": 3976, "iter_tflops": 40.4942180309811, "iter_time": 0.5094824523925782, "loss": 0.2111537903547287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.486747873823504, "step_time": 0.463758186340332} +{"epoch": 0, "iter": 3977, "iter_tflops": 21.75597424753563, "iter_time": 0.9482955474853516, "loss": 0.05920024216175079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.101929192296357, "step_time": 0.8930463485717772} +{"epoch": 0, "iter": 3978, "iter_tflops": 20.13185598224757, "iter_time": 1.0247983856201173, "loss": 0.03396744281053543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.900140043895252, "step_time": 0.8285533123016358} +{"epoch": 0, "iter": 3979, "iter_tflops": 42.864363950760065, "iter_time": 0.48131108474731443, "loss": 0.03158050402998924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.411236935702554, "step_time": 0.43515197753906254} +{"epoch": 0, "iter": 3980, "iter_tflops": 45.776604650059554, "iter_time": 0.4506907768249511, "loss": 0.06531739979982376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.46491270444652, "step_time": 0.4088205528259277} +{"epoch": 0, "iter": 3981, "iter_tflops": 26.672844361693603, "iter_time": 0.5895748748779297, "loss": 0.0460524745285511, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.97051556683302, "step_time": 0.5428152923583984} +{"epoch": 0, "iter": 3982, "iter_tflops": 8.861036510374788, "iter_time": 1.7746951904296875, "loss": 0.018003422766923904, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 12.103391621709527, "step_time": 1.299275390625} +{"epoch": 0, "iter": 3983, "iter_tflops": 6.868108907381502, "iter_time": 2.289660675048828, "loss": 0.03719749301671982, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 8.637272910936485, "step_time": 1.8206717605590823} +{"epoch": 0, "iter": 3984, "iter_tflops": 16.513773297457785, "iter_time": 0.9522741165161134, "loss": 0.01860806532204151, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 20.424888640950634, "step_time": 0.7699253177642823} +{"epoch": 0, "iter": 3985, "iter_tflops": 12.854057894061855, "iter_time": 1.1597700653076173, "loss": 0.45116305351257324, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 13.501553049561496, "step_time": 1.1041508712768555} +{"epoch": 0, "iter": 3986, "iter_tflops": 11.572078554301902, "iter_time": 1.2882518463134764, "loss": 0.44858694076538086, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.445096092450063, "step_time": 1.032028549194336} +{"epoch": 0, "iter": 3987, "iter_tflops": 26.041746289808106, "iter_time": 0.5724559097290038, "loss": 0.42320141196250916, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 27.768046141550208, "step_time": 0.5368671417236328} +{"epoch": 0, "iter": 3988, "iter_tflops": 27.395150600166698, "iter_time": 0.5441748352050781, "loss": 0.5567373037338257, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 29.13177174017153, "step_time": 0.5117351493835449} +{"epoch": 0, "iter": 3989, "iter_tflops": 31.37713835223363, "iter_time": 0.6575199203491211, "loss": 1.0922682285308838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.492653622001455, "step_time": 0.6159886207580567} +{"epoch": 0, "iter": 3990, "iter_tflops": 19.549806582944488, "iter_time": 1.055309341430664, "loss": 0.9072164297103882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.892871633679736, "step_time": 0.9423658008575438} +{"epoch": 0, "iter": 3991, "iter_tflops": 42.12825152320711, "iter_time": 0.4897210960388184, "loss": 0.840312659740448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.32700190317139, "step_time": 0.4551612205505371} +{"epoch": 0, "iter": 3992, "iter_tflops": 44.17444002665953, "iter_time": 0.4670368995666504, "loss": 0.9062899351119995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86399332087958, "step_time": 0.4310357761383057} +{"epoch": 0, "iter": 3993, "iter_tflops": 39.331151473070925, "iter_time": 0.5245484237670899, "loss": 0.3571154773235321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.615483188577166, "step_time": 0.484122013092041} +{"epoch": 0, "iter": 3994, "iter_tflops": 37.6865822465445, "iter_time": 0.5474386978149414, "loss": 0.44510143995285034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67769765905718, "step_time": 0.4950151920318604} +{"epoch": 0, "iter": 3995, "iter_tflops": 41.67734549077694, "iter_time": 0.49501937484741204, "loss": 0.3359706401824951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.62533777207054, "step_time": 0.45218500328063965} +{"epoch": 0, "iter": 3996, "iter_tflops": 38.52672214295843, "iter_time": 0.5355008773803711, "loss": 0.43197327852249146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.98039896754453, "step_time": 0.49144586563110354} +{"epoch": 0, "iter": 3997, "iter_tflops": 16.930488523878484, "iter_time": 1.218576385498047, "loss": 0.22699587047100067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.177420373314177, "step_time": 1.1349846725463866} +{"epoch": 0, "iter": 3998, "iter_tflops": 16.89940156982225, "iter_time": 1.2208179931640626, "loss": 0.24311335384845734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.04793122903009, "step_time": 0.8579155235290528} +{"epoch": 0, "iter": 3999, "iter_tflops": 46.326609480444596, "iter_time": 0.44534002685546875, "loss": 0.19661535322666168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.33057398868114, "step_time": 0.40991174697875976} +{"epoch": 0, "iter": 4000, "iter_tflops": 46.88035669283824, "iter_time": 0.44007970428466797, "loss": 0.21969960629940033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.01925308737576, "step_time": 0.40437858772277835} +{"epoch": 0, "iter": 4001, "iter_tflops": 36.04606566900632, "iter_time": 0.5723535461425782, "loss": 0.035286713391542435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.65638641380368, "step_time": 0.5337046585083007} +{"epoch": 0, "iter": 4002, "iter_tflops": 15.824432516200309, "iter_time": 1.3037493438720704, "loss": 0.029100151732563972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.591787001198146, "step_time": 1.0019088439941408} +{"epoch": 0, "iter": 4003, "iter_tflops": 51.05865462584412, "iter_time": 0.40406653213500976, "loss": 0.02419617958366871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.72076814239932, "step_time": 0.37025859832763675} +{"epoch": 0, "iter": 4004, "iter_tflops": 55.05800717126321, "iter_time": 0.3747155876159668, "loss": 0.04375382885336876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.87755898503453, "step_time": 0.34455468559265134} +{"epoch": 0, "iter": 4005, "iter_tflops": 24.000301340196124, "iter_time": 0.8596181030273438, "loss": 0.12306247651576996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.145670237212933, "step_time": 0.8204630584716797} +{"epoch": 0, "iter": 4006, "iter_tflops": 14.1734163350694, "iter_time": 1.4556189575195309, "loss": 0.17430494725704193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.44311439233188, "step_time": 1.061100248336792} +{"epoch": 0, "iter": 4007, "iter_tflops": 38.33197440795701, "iter_time": 0.5382215194702148, "loss": 0.10666859149932861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00461552314976, "step_time": 0.49116253662109377} +{"epoch": 0, "iter": 4008, "iter_tflops": 39.40050992052296, "iter_time": 0.5236250381469727, "loss": 0.16314639151096344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.401568414179266, "step_time": 0.4753536396026611} +{"epoch": 0, "iter": 4009, "iter_tflops": 18.02685305215508, "iter_time": 1.1444645080566405, "loss": 0.6737844347953796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.889369607439665, "step_time": 1.0922065658569335} +{"epoch": 0, "iter": 4010, "iter_tflops": 23.42123720051533, "iter_time": 0.8808712081909179, "loss": 0.7764543890953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.034500173179396, "step_time": 0.7924520683288574} +{"epoch": 0, "iter": 4011, "iter_tflops": 45.926482191486436, "iter_time": 0.4492199821472168, "loss": 0.6540110111236572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94450742946001, "step_time": 0.4130803279876709} +{"epoch": 0, "iter": 4012, "iter_tflops": 48.13626978015521, "iter_time": 0.42859767913818364, "loss": 0.6283607482910156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.011873717072696, "step_time": 0.3966612243652344} +{"epoch": 0, "iter": 4013, "iter_tflops": 44.19781202061716, "iter_time": 0.4667899284362793, "loss": 0.9281832575798035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.385193329603155, "step_time": 0.4263927059173584} +{"epoch": 0, "iter": 4014, "iter_tflops": 36.240062339992, "iter_time": 0.569289680480957, "loss": 0.7534361481666565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.151836731626716, "step_time": 0.5138268928527833} +{"epoch": 0, "iter": 4015, "iter_tflops": 37.44900660791592, "iter_time": 0.5509116363525391, "loss": 0.9381629228591919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8810103983546, "step_time": 0.5046620254516601} +{"epoch": 0, "iter": 4016, "iter_tflops": 39.267118305372385, "iter_time": 0.52540380859375, "loss": 0.8775017857551575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94839858269066, "step_time": 0.48036933135986326} +{"epoch": 0, "iter": 4017, "iter_tflops": 31.188556092787607, "iter_time": 0.6614956283569337, "loss": 0.4304080903530121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.62978003746508, "step_time": 0.5957616100311278} +{"epoch": 0, "iter": 4018, "iter_tflops": 36.49753703404104, "iter_time": 0.565273582458496, "loss": 0.5133703947067261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0411073743321, "step_time": 0.5152478256225586} +{"epoch": 0, "iter": 4019, "iter_tflops": 39.02383237178945, "iter_time": 0.528679328918457, "loss": 0.41323795914649963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81812754643557, "step_time": 0.4818308200836181} +{"epoch": 0, "iter": 4020, "iter_tflops": 41.46701010747507, "iter_time": 0.49753028869628907, "loss": 0.35901913046836853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.345625042221364, "step_time": 0.45497428894042974} +{"epoch": 0, "iter": 4021, "iter_tflops": 23.68718161541089, "iter_time": 0.8709813537597656, "loss": 0.22181454300880432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.14976100072016, "step_time": 0.820329605102539} +{"epoch": 0, "iter": 4022, "iter_tflops": 15.490933990654197, "iter_time": 1.3318172760009765, "loss": 0.17418095469474792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.294184324527144, "step_time": 1.127740550994873} +{"epoch": 0, "iter": 4023, "iter_tflops": 46.133887451633946, "iter_time": 0.44720041275024414, "loss": 0.07536636292934418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99474639594612, "step_time": 0.4126652297973633} +{"epoch": 0, "iter": 4024, "iter_tflops": 55.18450247588874, "iter_time": 0.3738566551208496, "loss": 0.21597853302955627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.99814721243773, "step_time": 0.3438621768951416} +{"epoch": 0, "iter": 4025, "iter_tflops": 44.39477395100695, "iter_time": 0.46471896743774416, "loss": 0.008113023824989796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.403535362200046, "step_time": 0.42623112869262697} +{"epoch": 0, "iter": 4026, "iter_tflops": 20.88027694333584, "iter_time": 0.9880660858154297, "loss": 0.019470306113362312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.06315293381602, "step_time": 0.7351666278839111} +{"epoch": 0, "iter": 4027, "iter_tflops": 52.3692446534477, "iter_time": 0.3939543838500976, "loss": 0.002662280108779669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.25089468590484, "step_time": 0.36036281394958497} +{"epoch": 0, "iter": 4028, "iter_tflops": 58.70374686071362, "iter_time": 0.35144423675537106, "loss": 0.011576293036341667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.14463844378288, "step_time": 0.3216339511871338} +{"epoch": 0, "iter": 4029, "iter_tflops": 36.4675303285995, "iter_time": 0.5657387084960936, "loss": 0.13389670848846436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.145428836952036, "step_time": 0.5270371055603027} +{"epoch": 0, "iter": 4030, "iter_tflops": 15.304381522913678, "iter_time": 1.3480514373779295, "loss": 0.2235003113746643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.82977055606272, "step_time": 1.0404100971221926} +{"epoch": 0, "iter": 4031, "iter_tflops": 40.00350318643962, "iter_time": 0.5157321701049805, "loss": 0.17302095890045166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77511196416912, "step_time": 0.4712973327636719} +{"epoch": 0, "iter": 4032, "iter_tflops": 41.19242024980359, "iter_time": 0.5008468399047852, "loss": 0.23926600813865662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87820958801857, "step_time": 0.4597129364013672} +{"epoch": 0, "iter": 4033, "iter_tflops": 20.142154511422877, "iter_time": 1.0242744140625, "loss": 0.11824019998311996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.47824878734915, "step_time": 0.9605575256347658} +{"epoch": 0, "iter": 4034, "iter_tflops": 13.312203199652306, "iter_time": 1.549788055419922, "loss": 0.15058207511901855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.714258011670601, "step_time": 1.3128900833129882} +{"epoch": 0, "iter": 4035, "iter_tflops": 38.8144289936874, "iter_time": 0.5315315475463868, "loss": 0.1038428395986557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.50302036946711, "step_time": 0.48540299797058106} +{"epoch": 0, "iter": 4036, "iter_tflops": 49.31629744720789, "iter_time": 0.4183423042297363, "loss": 0.18263381719589233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.31853216822211, "step_time": 0.3869403877258301} +{"epoch": 0, "iter": 4037, "iter_tflops": 43.00710673096703, "iter_time": 0.4797135887145997, "loss": 0.1461341232061386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49573645504856, "step_time": 0.43437779998779297} +{"epoch": 0, "iter": 4038, "iter_tflops": 47.191941285947784, "iter_time": 0.43717407989501955, "loss": 0.1448420137166977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.831775594186446, "step_time": 0.39803948974609377} +{"epoch": 0, "iter": 4039, "iter_tflops": 50.59865963262319, "iter_time": 0.4077399215698242, "loss": 0.14392781257629395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.67635370523524, "step_time": 0.3773311882019043} +{"epoch": 0, "iter": 4040, "iter_tflops": 47.026358040940465, "iter_time": 0.43871340179443363, "loss": 0.13993977010250092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.99951824524644, "step_time": 0.40453506660461425} +{"epoch": 0, "iter": 4041, "iter_tflops": 49.67706135029109, "iter_time": 0.4153042259216309, "loss": 0.03275282308459282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.656931631618924, "step_time": 0.3774652709960938} +{"epoch": 0, "iter": 4042, "iter_tflops": 52.979860234861874, "iter_time": 0.38941389083862304, "loss": 0.04518497735261917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.07095222707197, "step_time": 0.3552738971710205} +{"epoch": 0, "iter": 4043, "iter_tflops": 52.951655665987545, "iter_time": 0.3896213111877441, "loss": 0.04812992736697197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.75150705180487, "step_time": 0.357239049911499} +{"epoch": 0, "iter": 4044, "iter_tflops": 54.254260240559695, "iter_time": 0.38026679229736327, "loss": 0.03335457667708397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.91602720300102, "step_time": 0.3501779479980469} +{"epoch": 0, "iter": 4045, "iter_tflops": 25.89360826366691, "iter_time": 0.7967639465332031, "loss": 0.9068590998649597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.347354926311557, "step_time": 0.7544091033935546} +{"epoch": 0, "iter": 4046, "iter_tflops": 11.634828069282419, "iter_time": 1.7732185974121093, "loss": 0.8039078116416931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.965768993854626, "step_time": 1.4772615470886232} +{"epoch": 0, "iter": 4047, "iter_tflops": 45.300421011403984, "iter_time": 0.45542829513549804, "loss": 0.7098579406738281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.358008471978295, "step_time": 0.41798877525329586} +{"epoch": 0, "iter": 4048, "iter_tflops": 48.48903327805918, "iter_time": 0.4254795799255371, "loss": 0.8518994450569153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.560446392797346, "step_time": 0.392521276473999} +{"epoch": 0, "iter": 4049, "iter_tflops": 25.573377835223667, "iter_time": 0.8067410430908204, "loss": 0.8124874234199524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.983711950387644, "step_time": 0.764575813293457} +{"epoch": 0, "iter": 4050, "iter_tflops": 12.84100257994791, "iter_time": 1.6066575317382812, "loss": 0.5402548909187317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.126961071207713, "step_time": 1.2792920761108397} +{"epoch": 0, "iter": 4051, "iter_tflops": 39.7355321823802, "iter_time": 0.5192101974487305, "loss": 0.5247337818145752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.701086716094814, "step_time": 0.4720956630706787} +{"epoch": 0, "iter": 4052, "iter_tflops": 40.57835876730984, "iter_time": 0.5084260215759278, "loss": 0.6049799919128418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17810545032456, "step_time": 0.4669981498718262} +{"epoch": 0, "iter": 4053, "iter_tflops": 17.948197086358164, "iter_time": 1.149479995727539, "loss": 0.10718770325183868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.094853190885395, "step_time": 1.080453109741211} +{"epoch": 0, "iter": 4054, "iter_tflops": 20.0043273727457, "iter_time": 1.0313315277099608, "loss": 0.1833088994026184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.869240110071157, "step_time": 0.8295827865600586} +{"epoch": 0, "iter": 4055, "iter_tflops": 50.96956198843768, "iter_time": 0.40477282333374026, "loss": 0.188828244805336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.24441440037048, "step_time": 0.37345121192932135} +{"epoch": 0, "iter": 4056, "iter_tflops": 51.40147308017893, "iter_time": 0.401371639251709, "loss": 0.12155332416296005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.039977422677055, "step_time": 0.3681495685577393} +{"epoch": 0, "iter": 4057, "iter_tflops": 31.104687374558015, "iter_time": 0.6632792434692383, "loss": 0.14660587906837463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.0852329775374, "step_time": 0.623574073791504} +{"epoch": 0, "iter": 4058, "iter_tflops": 13.412651259498023, "iter_time": 1.5381816101074217, "loss": 0.23153156042099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.300002903036145, "step_time": 1.2657110328674315} +{"epoch": 0, "iter": 4059, "iter_tflops": 41.62794840755033, "iter_time": 0.4956067810058594, "loss": 0.12810860574245453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91350978907801, "step_time": 0.4493469047546387} +{"epoch": 0, "iter": 4060, "iter_tflops": 41.98888477151504, "iter_time": 0.4913465461730957, "loss": 0.1258735954761505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17640462995105, "step_time": 0.44678865051269534} +{"epoch": 0, "iter": 4061, "iter_tflops": 20.867863487466483, "iter_time": 0.9886538467407227, "loss": 0.2109261006116867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.491338918678515, "step_time": 0.9172905883789063} +{"epoch": 0, "iter": 4062, "iter_tflops": 18.810284858518827, "iter_time": 1.096798568725586, "loss": 0.29668691754341125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.30832231393885, "step_time": 0.7287995834350587} +{"epoch": 0, "iter": 4063, "iter_tflops": 53.896821737297685, "iter_time": 0.3827886848449707, "loss": 0.26919588446617126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.4017955805414, "step_time": 0.35326128768920895} +{"epoch": 0, "iter": 4064, "iter_tflops": 54.29779721727058, "iter_time": 0.3799618873596192, "loss": 0.3023950755596161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.82828724594627, "step_time": 0.3507002239227295} +{"epoch": 0, "iter": 4065, "iter_tflops": 38.806491000128894, "iter_time": 0.5316402740478515, "loss": 0.07288039475679398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.988540527127824, "step_time": 0.4913505744934082} +{"epoch": 0, "iter": 4066, "iter_tflops": 29.478331825799863, "iter_time": 0.6998731689453125, "loss": 0.04422082379460335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.805906878104445, "step_time": 0.6288834991455079} +{"epoch": 0, "iter": 4067, "iter_tflops": 38.28260344029978, "iter_time": 0.5389156341552734, "loss": 0.09873779118061066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86888010030221, "step_time": 0.4927548446655274} +{"epoch": 0, "iter": 4068, "iter_tflops": 45.89957083553874, "iter_time": 0.4494833641052246, "loss": 0.06163617968559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45055614297974, "step_time": 0.4089368896484375} +{"epoch": 0, "iter": 4069, "iter_tflops": 19.627031983083388, "iter_time": 1.0511570739746094, "loss": 0.047336749732494354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.179889572950962, "step_time": 0.9740888137817382} +{"epoch": 0, "iter": 4070, "iter_tflops": 23.9165709527384, "iter_time": 0.862627571105957, "loss": 0.04735235497355461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.08236429555573, "step_time": 0.7094022102355957} +{"epoch": 0, "iter": 4071, "iter_tflops": 52.96477540001058, "iter_time": 0.38952479934692386, "loss": 0.05549318715929985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.75385024740745, "step_time": 0.3572245559692383} +{"epoch": 0, "iter": 4072, "iter_tflops": 49.340323797885674, "iter_time": 0.4181385917663574, "loss": 0.03956056386232376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.74802313564749, "step_time": 0.3838484153747559} +{"epoch": 0, "iter": 4073, "iter_tflops": 43.019068555669826, "iter_time": 0.47958020019531256, "loss": 0.12845179438591003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00585660187557, "step_time": 0.4389047451019287} +{"epoch": 0, "iter": 4074, "iter_tflops": 47.68956368376301, "iter_time": 0.4326123352050782, "loss": 0.19263629615306854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.01985686792607, "step_time": 0.39660035133361815} +{"epoch": 0, "iter": 4075, "iter_tflops": 49.85681550907302, "iter_time": 0.41380688476562505, "loss": 0.08163651078939438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.226796152180775, "step_time": 0.38045938491821285} +{"epoch": 0, "iter": 4076, "iter_tflops": 49.95949322027377, "iter_time": 0.4129564208984375, "loss": 0.13320216536521912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.35291888294554, "step_time": 0.37957655143737795} +{"epoch": 0, "iter": 4077, "iter_tflops": 31.227702572913483, "iter_time": 0.660666389465332, "loss": 0.35397276282310486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.16258757024978, "step_time": 0.6221195335388184} +{"epoch": 0, "iter": 4078, "iter_tflops": 13.888553877465394, "iter_time": 1.485474563598633, "loss": 0.3505985736846924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.859257146195148, "step_time": 1.155204460144043} +{"epoch": 0, "iter": 4079, "iter_tflops": 35.027151490179826, "iter_time": 0.5890028915405273, "loss": 0.3340826630592346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76211781018619, "step_time": 0.5322488727569581} +{"epoch": 0, "iter": 4080, "iter_tflops": 39.67990771208727, "iter_time": 0.5199380416870117, "loss": 0.25969088077545166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45021541741613, "step_time": 0.4748214321136474} +{"epoch": 0, "iter": 4081, "iter_tflops": 20.64763688730031, "iter_time": 0.9991987762451171, "loss": 1.0016131401062012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.89037511644948, "step_time": 0.9424732742309571} +{"epoch": 0, "iter": 4082, "iter_tflops": 7.011083043607984, "iter_time": 2.9426400146484375, "loss": 0.9019043445587158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.203003991175189, "step_time": 2.515065643310547} +{"epoch": 0, "iter": 4083, "iter_tflops": 13.37295793371279, "iter_time": 1.5427472076416016, "loss": 0.7982413172721863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.322308827494403, "step_time": 1.2639813232421873} +{"epoch": 0, "iter": 4084, "iter_tflops": 42.068794007639454, "iter_time": 0.4904132385253906, "loss": 0.9398494958877563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45051383629724, "step_time": 0.4539243183135987} +{"epoch": 0, "iter": 4085, "iter_tflops": 24.4267733355122, "iter_time": 0.7210916595458985, "loss": 0.6032512187957764, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 25.895212925942815, "step_time": 0.6802007217407227} +{"epoch": 0, "iter": 4086, "iter_tflops": 13.351038883613873, "iter_time": 1.3192937774658202, "loss": 0.4818004071712494, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 15.623557371659187, "step_time": 1.127396411895752} +{"epoch": 0, "iter": 4087, "iter_tflops": 30.219255151493147, "iter_time": 0.5828714981079102, "loss": 0.37305161356925964, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 32.148150278163996, "step_time": 0.547899097442627} +{"epoch": 0, "iter": 4088, "iter_tflops": 32.99847681276299, "iter_time": 0.5337804718017578, "loss": 0.47512874007225037, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 35.02501428853372, "step_time": 0.5028960838317871} +{"epoch": 0, "iter": 4089, "iter_tflops": 47.83658369614378, "iter_time": 0.43128275299072266, "loss": 0.2643905580043793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.721019730046315, "step_time": 0.3913257675170898} +{"epoch": 0, "iter": 4090, "iter_tflops": 48.12616217459176, "iter_time": 0.42868769454956057, "loss": 0.16798581182956696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56041727687612, "step_time": 0.3925214939117431} +{"epoch": 0, "iter": 4091, "iter_tflops": 49.20254234416515, "iter_time": 0.4193095016479492, "loss": 0.1966259926557541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.523605086639066, "step_time": 0.3854578456878662} +{"epoch": 0, "iter": 4092, "iter_tflops": 54.23603423720708, "iter_time": 0.3803945808410645, "loss": 0.20867449045181274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.87255118354632, "step_time": 0.3504365463256836} +{"epoch": 0, "iter": 4093, "iter_tflops": 29.627091620168656, "iter_time": 0.6963590545654297, "loss": 0.6054180860519409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.398094107751398, "step_time": 0.6570810775756836} +{"epoch": 0, "iter": 4094, "iter_tflops": 14.879278569460665, "iter_time": 1.386565444946289, "loss": 0.6834335923194885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.155056898667137, "step_time": 1.0770572814941406} +{"epoch": 0, "iter": 4095, "iter_tflops": 36.38672067806222, "iter_time": 0.566995132446289, "loss": 0.5474627017974854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72290417824654, "step_time": 0.5193752555847168} +{"epoch": 0, "iter": 4096, "iter_tflops": 38.951226715355084, "iter_time": 0.529664794921875, "loss": 0.5136355757713318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71215202684073, "step_time": 0.48302631759643555} +{"epoch": 0, "iter": 4097, "iter_tflops": 32.938875545954744, "iter_time": 0.6263448028564452, "loss": 0.03714810311794281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.096389706025846, "step_time": 0.5715555953979492} +{"epoch": 0, "iter": 4098, "iter_tflops": 7.990532485499322, "iter_time": 2.5819422607421876, "loss": 0.03429549187421799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.317886096400967, "step_time": 2.21413883972168} +{"epoch": 0, "iter": 4099, "iter_tflops": 10.796261517801554, "iter_time": 1.9109479217529297, "loss": 0.05949242040514946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.404816777697606, "step_time": 1.5390806045532224} +{"epoch": 0, "iter": 4100, "iter_tflops": 50.5367951092447, "iter_time": 0.40823905563354496, "loss": 0.032898299396038055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68601955425433, "step_time": 0.37048964309692384} +{"epoch": 0, "iter": 4101, "iter_tflops": 15.726072134239958, "iter_time": 0.9869591064453126, "loss": 0.3274785578250885, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.436248051673186, "step_time": 0.9443146667480469} +{"epoch": 0, "iter": 4102, "iter_tflops": 9.748461370449188, "iter_time": 1.5921476745605472, "loss": 0.4444703757762909, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 13.236239475254104, "step_time": 1.1726132736206054} +{"epoch": 0, "iter": 4103, "iter_tflops": 27.340986225913998, "iter_time": 0.5676821594238282, "loss": 0.4949241876602173, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.18166215373536, "step_time": 0.5318747787475586} +{"epoch": 0, "iter": 4104, "iter_tflops": 25.795398599148466, "iter_time": 0.6016960754394531, "loss": 0.5409778356552124, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 27.29028899088703, "step_time": 0.568736743927002} +{"epoch": 0, "iter": 4105, "iter_tflops": 37.196493272012724, "iter_time": 0.5546515731811524, "loss": 0.33820095658302307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17735013508165, "step_time": 0.513500602722168} +{"epoch": 0, "iter": 4106, "iter_tflops": 13.910458205834775, "iter_time": 1.4831354370117187, "loss": 0.27163517475128174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.63703912910539, "step_time": 1.1697594680786132} +{"epoch": 0, "iter": 4107, "iter_tflops": 38.04609707181265, "iter_time": 0.5422657012939454, "loss": 0.4363974630832672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78723948245161, "step_time": 0.4937175502777099} +{"epoch": 0, "iter": 4108, "iter_tflops": 40.45913529091883, "iter_time": 0.5099242324829102, "loss": 0.3774830400943756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34383088244892, "step_time": 0.46525284576416015} +{"epoch": 0, "iter": 4109, "iter_tflops": 31.377587266886803, "iter_time": 0.657510513305664, "loss": 0.22789646685123444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.735440497063955, "step_time": 0.5939493846893311} +{"epoch": 0, "iter": 4110, "iter_tflops": 40.354180701275496, "iter_time": 0.5112504615783691, "loss": 0.21697665750980377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.23215690311868, "step_time": 0.4664274806976318} +{"epoch": 0, "iter": 4111, "iter_tflops": 44.26729289470112, "iter_time": 0.4660572662353516, "loss": 0.25084957480430603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37076102850515, "step_time": 0.4265199279785157} +{"epoch": 0, "iter": 4112, "iter_tflops": 44.016977422342656, "iter_time": 0.46870763778686525, "loss": 0.24864721298217773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.240442382453764, "step_time": 0.4276721458435059} +{"epoch": 0, "iter": 4113, "iter_tflops": 21.889294755210173, "iter_time": 0.942519790649414, "loss": 0.04408466815948486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.653065100004262, "step_time": 0.8722376327514649} +{"epoch": 0, "iter": 4114, "iter_tflops": 15.565237412531957, "iter_time": 1.3254596099853515, "loss": 0.028022995218634605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.4749513716718, "step_time": 1.116706241607666} +{"epoch": 0, "iter": 4115, "iter_tflops": 51.29950318685809, "iter_time": 0.4021694602966308, "loss": 0.034911397844552994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.94810944735451, "step_time": 0.36875407791137693} +{"epoch": 0, "iter": 4116, "iter_tflops": 53.47331277793211, "iter_time": 0.38582037353515625, "loss": 0.034935228526592255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.09697465107677, "step_time": 0.35511476516723633} +{"epoch": 0, "iter": 4117, "iter_tflops": 18.33268961547159, "iter_time": 1.1253718872070313, "loss": 0.8289260268211365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.000618183416858, "step_time": 1.0858116989135742} +{"epoch": 0, "iter": 4118, "iter_tflops": 20.62531687301016, "iter_time": 1.0002800750732421, "loss": 0.9283225536346436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.51699330098827, "step_time": 0.7497582778930664} +{"epoch": 0, "iter": 4119, "iter_tflops": 44.668238710760576, "iter_time": 0.4618738975524902, "loss": 0.6832037568092346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12768894453852, "step_time": 0.4286740951538086} +{"epoch": 0, "iter": 4120, "iter_tflops": 42.856689523757325, "iter_time": 0.48139727401733395, "loss": 0.8126125931739807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07310526048032, "step_time": 0.44779038429260254} +{"epoch": 0, "iter": 4121, "iter_tflops": 29.126511619971055, "iter_time": 0.7026285629272461, "loss": 0.13129112124443054, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 30.898733734540134, "step_time": 0.6623287277221679} +{"epoch": 0, "iter": 4122, "iter_tflops": 19.145154983533452, "iter_time": 1.068945068359375, "loss": 0.13299107551574707, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 21.784560239645003, "step_time": 0.9394322757720948} +{"epoch": 0, "iter": 4123, "iter_tflops": 41.02088630230721, "iter_time": 0.4988950958251953, "loss": 0.15659968554973602, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 44.87233336057999, "step_time": 0.45607432174682616} +{"epoch": 0, "iter": 4124, "iter_tflops": 39.03692509868228, "iter_time": 0.5242502822875977, "loss": 0.15836101770401, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 42.90473573115695, "step_time": 0.47698974609375} +{"epoch": 0, "iter": 4125, "iter_tflops": 20.05413950298521, "iter_time": 1.0287698211669922, "loss": 0.08609554171562195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.07978106971035, "step_time": 0.9787147903442383} +{"epoch": 0, "iter": 4126, "iter_tflops": 22.430590880837556, "iter_time": 0.9197748565673828, "loss": 0.061683740466833115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.464957976906867, "step_time": 0.7511787757873535} +{"epoch": 0, "iter": 4127, "iter_tflops": 44.93615142955953, "iter_time": 0.45912017059326166, "loss": 0.08330672979354858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.01851199573107, "step_time": 0.4208837165832519} +{"epoch": 0, "iter": 4128, "iter_tflops": 49.69118645412476, "iter_time": 0.41518617248535156, "loss": 0.07116532325744629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.39725536411601, "step_time": 0.3792671775817871} +{"epoch": 0, "iter": 4129, "iter_tflops": 31.804639523112662, "iter_time": 0.6486818847656249, "loss": 0.9154776334762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.196535455351565, "step_time": 0.5861683044433594} +{"epoch": 0, "iter": 4130, "iter_tflops": 32.91991626027052, "iter_time": 0.6267055282592773, "loss": 0.8117319941520691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.37487049862132, "step_time": 0.5671798477172851} +{"epoch": 0, "iter": 4131, "iter_tflops": 40.342564787104315, "iter_time": 0.5113976669311524, "loss": 0.8272556662559509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.08872945171501, "step_time": 0.46794484138488773} +{"epoch": 0, "iter": 4132, "iter_tflops": 43.417224573278524, "iter_time": 0.47518222808837896, "loss": 0.8700538277626038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.11692303639345, "step_time": 0.43787013626098636} +{"epoch": 0, "iter": 4133, "iter_tflops": 18.162202868025002, "iter_time": 1.1359356384277344, "loss": 0.15323126316070557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.295622171315372, "step_time": 1.0692111053466797} +{"epoch": 0, "iter": 4134, "iter_tflops": 44.040981711664614, "iter_time": 0.46845217132568356, "loss": 0.15929238498210907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.34310536260453, "step_time": 0.4267639274597168} +{"epoch": 0, "iter": 4135, "iter_tflops": 53.11070559847564, "iter_time": 0.388454517364502, "loss": 0.1608620136976242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88753690807704, "step_time": 0.3563995742797852} +{"epoch": 0, "iter": 4136, "iter_tflops": 50.92491925816757, "iter_time": 0.40512766265869143, "loss": 0.10727717727422714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.366595540325726, "step_time": 0.37262709236145014} +{"epoch": 0, "iter": 4137, "iter_tflops": 36.37167470477349, "iter_time": 0.5672296829223633, "loss": 0.3066268861293793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.280280325654154, "step_time": 0.5252277565002441} +{"epoch": 0, "iter": 4138, "iter_tflops": 45.179277460872385, "iter_time": 0.45664947891235347, "loss": 0.36644887924194336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.00425586435446, "step_time": 0.41258675193786626} +{"epoch": 0, "iter": 4139, "iter_tflops": 50.76852423869087, "iter_time": 0.40637567901611327, "loss": 0.32267653942108154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.880163537381456, "step_time": 0.3759298839569092} +{"epoch": 0, "iter": 4140, "iter_tflops": 52.51640562770367, "iter_time": 0.39285044860839846, "loss": 0.22093185782432556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.03032322609991, "step_time": 0.3617565593719482} +{"epoch": 0, "iter": 4141, "iter_tflops": 37.53543380012646, "iter_time": 0.5496431350708008, "loss": 0.4085904061794281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.482834392640804, "step_time": 0.5096257171630859} +{"epoch": 0, "iter": 4142, "iter_tflops": 22.911761955489496, "iter_time": 0.9004586181640625, "loss": 0.2706824839115143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.1044225844331, "step_time": 0.7340870800018311} +{"epoch": 0, "iter": 4143, "iter_tflops": 46.89282250465351, "iter_time": 0.4399627151489258, "loss": 0.3666566014289856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0586804146104, "step_time": 0.40406632804870607} +{"epoch": 0, "iter": 4144, "iter_tflops": 41.87701023500785, "iter_time": 0.4926591796875, "loss": 0.23262079060077667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.696206306291025, "step_time": 0.45148372650146484} +{"epoch": 0, "iter": 4145, "iter_tflops": 35.743367169965104, "iter_time": 0.5772006149291992, "loss": 0.7582280039787292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.24822646688163, "step_time": 0.5256567077636718} +{"epoch": 0, "iter": 4146, "iter_tflops": 35.07042066435425, "iter_time": 0.5882761917114258, "loss": 0.726186990737915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31927110015736, "step_time": 0.524706916809082} +{"epoch": 0, "iter": 4147, "iter_tflops": 44.54462773009839, "iter_time": 0.4631555938720703, "loss": 0.9112411737442017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.401913502490224, "step_time": 0.42624541091918944} +{"epoch": 0, "iter": 4148, "iter_tflops": 42.100313189291384, "iter_time": 0.49004608154296875, "loss": 0.8285517692565918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80150870719122, "step_time": 0.4504457187652588} +{"epoch": 0, "iter": 4149, "iter_tflops": 31.844341563861978, "iter_time": 0.6478731384277344, "loss": 0.7271822094917297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.999320081183114, "step_time": 0.5894712657928467} +{"epoch": 0, "iter": 4150, "iter_tflops": 39.85439685902106, "iter_time": 0.5176616668701173, "loss": 0.8469135761260986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.835209023024014, "step_time": 0.4816386795043945} +{"epoch": 0, "iter": 4151, "iter_tflops": 41.238821857418785, "iter_time": 0.500283290863037, "loss": 0.9760844707489014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.479015633219596, "step_time": 0.4638388061523437} +{"epoch": 0, "iter": 4152, "iter_tflops": 45.97405286893847, "iter_time": 0.4487551612854004, "loss": 0.8742648363113403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50205463687578, "step_time": 0.41677246856689454} +{"epoch": 0, "iter": 4153, "iter_tflops": 29.07839498444323, "iter_time": 0.7094990463256835, "loss": 0.24903932213783264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.779317646199612, "step_time": 0.6702908020019532} +{"epoch": 0, "iter": 4154, "iter_tflops": 11.807689046695076, "iter_time": 1.747259216308594, "loss": 0.27328255772590637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.207637356351388, "step_time": 1.5620578422546387} +{"epoch": 0, "iter": 4155, "iter_tflops": 40.208498099123894, "iter_time": 0.513102813720703, "loss": 0.17594046890735626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.234282494428236, "step_time": 0.46640506744384763} +{"epoch": 0, "iter": 4156, "iter_tflops": 39.17962404375011, "iter_time": 0.5265771179199219, "loss": 0.17065854370594025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.946798555653935, "step_time": 0.48038722801208494} +{"epoch": 0, "iter": 4157, "iter_tflops": 18.360757488176123, "iter_time": 1.1123526458740236, "loss": 0.12324097752571106, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 19.481745916937324, "step_time": 1.0483473739624025} +{"epoch": 0, "iter": 4158, "iter_tflops": 17.233633629967574, "iter_time": 1.1851033630371095, "loss": 0.14911028742790222, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 20.97854515613106, "step_time": 0.9735487861633301} +{"epoch": 0, "iter": 4159, "iter_tflops": 38.76119223597434, "iter_time": 0.5269094161987304, "loss": 0.059783145785331726, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 42.69803795409938, "step_time": 0.47832729911804195} +{"epoch": 0, "iter": 4160, "iter_tflops": 37.960057379493506, "iter_time": 0.538029670715332, "loss": 0.10446729511022568, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 41.95551013748016, "step_time": 0.48679272651672356} +{"epoch": 0, "iter": 4161, "iter_tflops": 40.25801084516935, "iter_time": 0.5124717559814453, "loss": 0.004732534289360046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.021966563579866, "step_time": 0.45824505424499506} +{"epoch": 0, "iter": 4162, "iter_tflops": 41.6580545448569, "iter_time": 0.495248607635498, "loss": 0.005975720938295126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14366422000171, "step_time": 0.4471056613922119} +{"epoch": 0, "iter": 4163, "iter_tflops": 45.083557504501705, "iter_time": 0.45761902236938473, "loss": 0.0044550648890435696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89303955932706, "step_time": 0.4135064468383789} +{"epoch": 0, "iter": 4164, "iter_tflops": 44.69670595371212, "iter_time": 0.4615797309875488, "loss": 0.00575279351323843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.668618970183005, "step_time": 0.41537481689453126} +{"epoch": 0, "iter": 4165, "iter_tflops": 21.901788413080354, "iter_time": 0.9419821395874024, "loss": 0.940765917301178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.483354900263432, "step_time": 0.8785411453247072} +{"epoch": 0, "iter": 4166, "iter_tflops": 15.07216758259972, "iter_time": 1.3688206024169922, "loss": 0.9852179288864136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.093566149150266, "step_time": 1.0805259399414062} +{"epoch": 0, "iter": 4167, "iter_tflops": 33.26841560867374, "iter_time": 0.6201405487060547, "loss": 1.0369994640350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.16701066525955, "step_time": 0.5704395561218261} +{"epoch": 0, "iter": 4168, "iter_tflops": 39.0114170738162, "iter_time": 0.5288475799560547, "loss": 1.0402557849884033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25669725536974, "step_time": 0.4882325134277343} +{"epoch": 0, "iter": 4169, "iter_tflops": 26.544871551138304, "iter_time": 0.7772157974243165, "loss": 1.0825172662734985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.140288532721446, "step_time": 0.7079920806884765} +{"epoch": 0, "iter": 4170, "iter_tflops": 33.2954437891271, "iter_time": 0.6196371383666993, "loss": 0.8583795428276062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.36366909461398, "step_time": 0.5673545608520508} +{"epoch": 0, "iter": 4171, "iter_tflops": 37.96671326949874, "iter_time": 0.5433995132446289, "loss": 0.8098399639129639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.17508013803129, "step_time": 0.5010577621459961} +{"epoch": 0, "iter": 4172, "iter_tflops": 35.557259733374536, "iter_time": 0.5802216949462891, "loss": 0.8008418083190918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.68576487147766, "step_time": 0.5332993564605712} +{"epoch": 0, "iter": 4173, "iter_tflops": 29.88303565095086, "iter_time": 0.6903948364257813, "loss": 0.5611156821250916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.611115354828826, "step_time": 0.632639923095703} +{"epoch": 0, "iter": 4174, "iter_tflops": 8.821557746878778, "iter_time": 2.3387131958007816, "loss": 0.5632142424583435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.337879886663863, "step_time": 1.9956793594360351} +{"epoch": 0, "iter": 4175, "iter_tflops": 11.371347841958354, "iter_time": 1.8143050231933593, "loss": 0.6240252256393433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.66744787174617, "step_time": 1.6286700935363767} +{"epoch": 0, "iter": 4176, "iter_tflops": 30.6313810454599, "iter_time": 0.6735280227661133, "loss": 0.5130041241645813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29467892464601, "step_time": 0.5250352993011475} +{"epoch": 0, "iter": 4177, "iter_tflops": 24.22191332706508, "iter_time": 0.7373892745971681, "loss": 0.41585835814476013, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 25.85270587394633, "step_time": 0.6908746490478515} +{"epoch": 0, "iter": 4178, "iter_tflops": 16.020698635516556, "iter_time": 1.1148689270019532, "loss": 0.33481094241142273, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 19.36492090394674, "step_time": 0.9223367958068848} +{"epoch": 0, "iter": 4179, "iter_tflops": 32.234119799072374, "iter_time": 0.5541016540527344, "loss": 0.41518545150756836, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 34.288736766922746, "step_time": 0.5208993034362793} +{"epoch": 0, "iter": 4180, "iter_tflops": 29.538124589203164, "iter_time": 0.6046754608154297, "loss": 0.46682098507881165, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 31.283960625855897, "step_time": 0.5709308776855468} +{"epoch": 0, "iter": 4181, "iter_tflops": 47.125328286227955, "iter_time": 0.4377920379638671, "loss": 0.08005747944116592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.328144071818016, "step_time": 0.39426381111145015} +{"epoch": 0, "iter": 4182, "iter_tflops": 49.19372753039796, "iter_time": 0.41938463592529296, "loss": 0.07348419725894928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.7663070527162, "step_time": 0.3767114238739014} +{"epoch": 0, "iter": 4183, "iter_tflops": 51.94032377362714, "iter_time": 0.3972076416015625, "loss": 0.06430855393409729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.832747792718415, "step_time": 0.36301418304443356} +{"epoch": 0, "iter": 4184, "iter_tflops": 53.11855202141887, "iter_time": 0.3883971366882324, "loss": 0.08094626665115356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.866715781189534, "step_time": 0.356527811050415} +{"epoch": 0, "iter": 4185, "iter_tflops": 30.71598645856151, "iter_time": 0.6716728286743164, "loss": 0.11377260833978653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.73739780467946, "step_time": 0.6301995544433594} +{"epoch": 0, "iter": 4186, "iter_tflops": 36.623725366154076, "iter_time": 0.5633259124755859, "loss": 0.1673981100320816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.619939239300344, "step_time": 0.5079055728912353} +{"epoch": 0, "iter": 4187, "iter_tflops": 39.29563351534246, "iter_time": 0.5250225448608399, "loss": 0.24762889742851257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22146749779296, "step_time": 0.47733440589904785} +{"epoch": 0, "iter": 4188, "iter_tflops": 47.359167064213594, "iter_time": 0.43563041305541994, "loss": 0.2379124015569687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45065863274731, "step_time": 0.40098793792724613} +{"epoch": 0, "iter": 4189, "iter_tflops": 20.92836484433275, "iter_time": 0.9857957687377931, "loss": 0.07543031126260757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.880708108800967, "step_time": 0.942889663696289} +{"epoch": 0, "iter": 4190, "iter_tflops": 7.99166286945543, "iter_time": 2.5815770568847656, "loss": 0.07003737986087799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.22295279997767, "step_time": 2.0181149139404297} +{"epoch": 0, "iter": 4191, "iter_tflops": 17.051256857126237, "iter_time": 1.2099456176757815, "loss": 0.11943723261356354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.28429652673744, "step_time": 1.017096820831299} +{"epoch": 0, "iter": 4192, "iter_tflops": 34.72240601369102, "iter_time": 0.5941723480224609, "loss": 0.0626540407538414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.811480483599205, "step_time": 0.531571928024292} +{"epoch": 0, "iter": 4193, "iter_tflops": 22.087415219912632, "iter_time": 0.7101192932128906, "loss": 0.48176148533821106, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.46093578616803, "step_time": 0.6685453567504882} +{"epoch": 0, "iter": 4194, "iter_tflops": 12.677068008064573, "iter_time": 1.2372497863769532, "loss": 0.46373286843299866, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.250133848513503, "step_time": 0.9652043380737305} +{"epoch": 0, "iter": 4195, "iter_tflops": 22.386126984123035, "iter_time": 0.7006437377929687, "loss": 0.468452125787735, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.120274693184832, "step_time": 0.6502703590393065} +{"epoch": 0, "iter": 4196, "iter_tflops": 22.30918482660152, "iter_time": 0.7030601882934571, "loss": 0.4897836148738861, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.876221321249712, "step_time": 0.6569171676635741} +{"epoch": 0, "iter": 4197, "iter_tflops": 30.59234075824092, "iter_time": 0.6743875427246094, "loss": 0.29575052857398987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.075359568286444, "step_time": 0.6054549026489258} +{"epoch": 0, "iter": 4198, "iter_tflops": 37.08890430062847, "iter_time": 0.5562605285644531, "loss": 0.40555867552757263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.943054106002066, "step_time": 0.5038972778320312} +{"epoch": 0, "iter": 4199, "iter_tflops": 38.54532889974405, "iter_time": 0.5352423782348632, "loss": 0.5102224349975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34283597811988, "step_time": 0.487239294052124} +{"epoch": 0, "iter": 4200, "iter_tflops": 37.56101119922778, "iter_time": 0.5492688522338868, "loss": 0.47293201088905334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80510490437329, "step_time": 0.5056007957458497} +{"epoch": 0, "iter": 4201, "iter_tflops": 23.732192661427344, "iter_time": 0.8693294296264649, "loss": 0.05519145727157593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.61048574729972, "step_time": 0.8055721282958984} +{"epoch": 0, "iter": 4202, "iter_tflops": 42.90274603999432, "iter_time": 0.4808804893493652, "loss": 0.044141918420791626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.80125186281668, "step_time": 0.4408235397338867} +{"epoch": 0, "iter": 4203, "iter_tflops": 49.270504770961544, "iter_time": 0.4187311172485351, "loss": 0.08108269423246384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.87147396257171, "step_time": 0.38296879577636717} +{"epoch": 0, "iter": 4204, "iter_tflops": 52.401815917925425, "iter_time": 0.3937095146179199, "loss": 0.16825217008590698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.972981255105495, "step_time": 0.3621206588745117} +{"epoch": 0, "iter": 4205, "iter_tflops": 35.34907613358018, "iter_time": 0.5836388320922852, "loss": 0.4956175982952118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.80156786308242, "step_time": 0.5457734870910644} +{"epoch": 0, "iter": 4206, "iter_tflops": 17.008296936277233, "iter_time": 1.213001724243164, "loss": 0.5529592633247375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.4490311082715, "step_time": 1.0089032287597657} +{"epoch": 0, "iter": 4207, "iter_tflops": 38.154071130811914, "iter_time": 0.5407311172485352, "loss": 0.6338456273078918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93930544515758, "step_time": 0.4919274005889892} +{"epoch": 0, "iter": 4208, "iter_tflops": 41.97026488980201, "iter_time": 0.4915645294189453, "loss": 0.6427684426307678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74019163358646, "step_time": 0.4510495643615723} +{"epoch": 0, "iter": 4209, "iter_tflops": 24.157736334975056, "iter_time": 0.8540160064697266, "loss": 0.0851457267999649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.79170364646674, "step_time": 0.799912010192871} +{"epoch": 0, "iter": 4210, "iter_tflops": 23.70120100244233, "iter_time": 0.870466163635254, "loss": 0.07368670403957367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.64833233812386, "step_time": 0.6319187545776367} +{"epoch": 0, "iter": 4211, "iter_tflops": 44.64403795101001, "iter_time": 0.4621242713928223, "loss": 0.08970732986927032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00012609111923, "step_time": 0.42104164123535154} +{"epoch": 0, "iter": 4212, "iter_tflops": 41.45814850896248, "iter_time": 0.49763663482666015, "loss": 0.11238696426153183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.81128362369902, "step_time": 0.45034960556030273} +{"epoch": 0, "iter": 4213, "iter_tflops": 17.36833438598015, "iter_time": 1.1878567657470704, "loss": 0.4456852972507477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.426707523777342, "step_time": 1.1196299438476562} +{"epoch": 0, "iter": 4214, "iter_tflops": 21.627978612828752, "iter_time": 0.9539076156616211, "loss": 0.4027057886123657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.14130033612685, "step_time": 0.7079674987792969} +{"epoch": 0, "iter": 4215, "iter_tflops": 45.964650389300985, "iter_time": 0.4488469581604004, "loss": 0.6451135873794556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.65765773976989, "step_time": 0.41546650505065913} +{"epoch": 0, "iter": 4216, "iter_tflops": 44.95205292217777, "iter_time": 0.45895775985717774, "loss": 0.42051824927330017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.60606845415351, "step_time": 0.42445509719848634} +{"epoch": 0, "iter": 4217, "iter_tflops": 35.3675406129873, "iter_time": 0.5833341293334962, "loss": 0.9598702192306519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.88893676111498, "step_time": 0.5445149765014649} +{"epoch": 0, "iter": 4218, "iter_tflops": 41.5773270578152, "iter_time": 0.4962101936340333, "loss": 0.8731115460395813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93407451530973, "step_time": 0.45914139175415036} +{"epoch": 0, "iter": 4219, "iter_tflops": 44.25570247147286, "iter_time": 0.46617932510375976, "loss": 1.0082486867904663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55948174975884, "step_time": 0.4337955913543701} +{"epoch": 0, "iter": 4220, "iter_tflops": 46.1656766635571, "iter_time": 0.44689247512817387, "loss": 0.9818437695503235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.61754054306175, "step_time": 0.4158024215698243} +{"epoch": 0, "iter": 4221, "iter_tflops": 38.033997508913664, "iter_time": 0.5424382095336915, "loss": 0.24407696723937988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.964400655191916, "step_time": 0.5036346969604493} +{"epoch": 0, "iter": 4222, "iter_tflops": 36.1884068350696, "iter_time": 0.5701022872924804, "loss": 0.2791755199432373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.818425801732744, "step_time": 0.5181293106079101} +{"epoch": 0, "iter": 4223, "iter_tflops": 39.3895365908209, "iter_time": 0.52377091217041, "loss": 0.3781748414039612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22301102439535, "step_time": 0.4773173599243164} +{"epoch": 0, "iter": 4224, "iter_tflops": 43.870438770249685, "iter_time": 0.4702732429504395, "loss": 0.2843416929244995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8647272186888, "step_time": 0.43102916717529294} +{"epoch": 0, "iter": 4225, "iter_tflops": 22.127442387408923, "iter_time": 0.9323758773803712, "loss": 0.12645968794822693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.598104102752995, "step_time": 0.8742691116333007} +{"epoch": 0, "iter": 4226, "iter_tflops": 10.878620962432386, "iter_time": 1.8964805908203126, "loss": 0.050637293606996536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.328655653092042, "step_time": 1.6734260482788084} +{"epoch": 0, "iter": 4227, "iter_tflops": 11.534938244018917, "iter_time": 1.788574249267578, "loss": 0.014566397294402122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.914725299364875, "step_time": 1.482680618286133} +{"epoch": 0, "iter": 4228, "iter_tflops": 33.9857483718633, "iter_time": 0.6070513229370117, "loss": 0.022209225222468376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.35243736296676, "step_time": 0.47589235496521} +{"epoch": 0, "iter": 4229, "iter_tflops": 14.759723598847769, "iter_time": 1.249214340209961, "loss": 0.3557431697845459, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 15.667668893793449, "step_time": 1.1768220596313477} +{"epoch": 0, "iter": 4230, "iter_tflops": 12.953925697296874, "iter_time": 1.4233568115234378, "loss": 0.7212072610855103, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 16.96816666656748, "step_time": 1.0866264305114746} +{"epoch": 0, "iter": 4231, "iter_tflops": 28.051694318760905, "iter_time": 0.6572885818481444, "loss": 0.3973303735256195, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 30.1796300412202, "step_time": 0.610943817138672} +{"epoch": 0, "iter": 4232, "iter_tflops": 29.346144791250516, "iter_time": 0.6282957611083985, "loss": 0.4144803285598755, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 31.267640271590793, "step_time": 0.5896849975585938} +{"epoch": 0, "iter": 4233, "iter_tflops": 17.721226990848898, "iter_time": 1.1642023162841797, "loss": 0.013482869602739811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.945695180501456, "step_time": 1.0889594345092775} +{"epoch": 0, "iter": 4234, "iter_tflops": 16.04760561260552, "iter_time": 1.285618179321289, "loss": 0.005841187667101622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.953452129648078, "step_time": 0.8267831401824951} +{"epoch": 0, "iter": 4235, "iter_tflops": 45.531460351742815, "iter_time": 0.4531173248291016, "loss": 0.01056742388755083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.58769340827665, "step_time": 0.4078283100128174} +{"epoch": 0, "iter": 4236, "iter_tflops": 47.300542720214146, "iter_time": 0.4361703338623047, "loss": 0.011934002861380577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40734336629765, "step_time": 0.393667989730835} +{"epoch": 0, "iter": 4237, "iter_tflops": 24.78422936166387, "iter_time": 0.8324282836914062, "loss": 0.06880602240562439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.71027873159338, "step_time": 0.7724027786254882} +{"epoch": 0, "iter": 4238, "iter_tflops": 40.65045370304308, "iter_time": 0.5075243110656739, "loss": 0.0433967150747776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.869830678457326, "step_time": 0.4597987823486328} +{"epoch": 0, "iter": 4239, "iter_tflops": 43.26801082164163, "iter_time": 0.4768209381103516, "loss": 0.04002867639064789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90292773097529, "step_time": 0.4306854400634766} +{"epoch": 0, "iter": 4240, "iter_tflops": 47.75782342013416, "iter_time": 0.43199400711059566, "loss": 0.0778549462556839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55658294083395, "step_time": 0.39255013084411616} +{"epoch": 0, "iter": 4241, "iter_tflops": 38.20271639225444, "iter_time": 0.540042579650879, "loss": 0.10156405717134476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.417193257726254, "step_time": 0.48638516426086426} +{"epoch": 0, "iter": 4242, "iter_tflops": 39.225962381109376, "iter_time": 0.525955062866211, "loss": 0.07444660365581512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24679598968032, "step_time": 0.47705484390258784} +{"epoch": 0, "iter": 4243, "iter_tflops": 42.28986837787532, "iter_time": 0.4878495559692383, "loss": 0.06693460047245026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.452562926855336, "step_time": 0.44413251304626467} +{"epoch": 0, "iter": 4244, "iter_tflops": 44.79517693854323, "iter_time": 0.46056506347656245, "loss": 0.07442021369934082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1463030445306, "step_time": 0.41978932762145993} +{"epoch": 0, "iter": 4245, "iter_tflops": 1.7283609101247681, "iter_time": 0.9295158843994141, "loss": 0.015209089033305645, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.850349598368548, "step_time": 0.8682353439331055} +{"epoch": 0, "iter": 4246, "iter_tflops": 2.1798554121830214, "iter_time": 0.7369933395385742, "loss": 0.07408375293016434, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.186918409428107, "step_time": 0.5041041889190674} +{"epoch": 0, "iter": 4247, "iter_tflops": 3.5828950908562414, "iter_time": 0.4483912811279297, "loss": 0.09936414659023285, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.8719614299253284, "step_time": 0.4149160442352295} +{"epoch": 0, "iter": 4248, "iter_tflops": 3.9959975424197065, "iter_time": 0.4020370140075683, "loss": 0.14202407002449036, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.310498684776285, "step_time": 0.37270372581481936} +{"epoch": 0, "iter": 4249, "iter_tflops": 29.342528180971957, "iter_time": 0.7031123352050782, "loss": 0.0629536584019661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.127774514489406, "step_time": 0.6627872962951661} +{"epoch": 0, "iter": 4250, "iter_tflops": 12.825281261555691, "iter_time": 1.608626983642578, "loss": 0.032870952039957047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.145792909811181, "step_time": 1.362166618347168} +{"epoch": 0, "iter": 4251, "iter_tflops": 41.28423549569798, "iter_time": 0.49973296737670897, "loss": 0.03673752024769783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.666851573264054, "step_time": 0.45177394104003904} +{"epoch": 0, "iter": 4252, "iter_tflops": 37.79623015297694, "iter_time": 0.5458505630493163, "loss": 0.04543781280517578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46471058501035, "step_time": 0.49755788040161136} +{"epoch": 0, "iter": 4253, "iter_tflops": 26.17430596871757, "iter_time": 0.7882193145751955, "loss": 0.050649601966142654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.156242844042996, "step_time": 0.732736026763916} +{"epoch": 0, "iter": 4254, "iter_tflops": 21.606647180662172, "iter_time": 0.9548493728637696, "loss": 0.06794910132884979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.665684265006902, "step_time": 0.803839605331421} +{"epoch": 0, "iter": 4255, "iter_tflops": 53.20791370036451, "iter_time": 0.38774483108520513, "loss": 0.033005062490701675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.391362350527174, "step_time": 0.35332440757751465} +{"epoch": 0, "iter": 4256, "iter_tflops": 58.38492986331571, "iter_time": 0.35336333465576175, "loss": 0.055099889636039734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.803041013577236, "step_time": 0.3233559589385987} +{"epoch": 0, "iter": 4257, "iter_tflops": 26.326804661409025, "iter_time": 0.7836535339355468, "loss": 0.9225960969924927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.7442374430999, "step_time": 0.7436172485351563} +{"epoch": 0, "iter": 4258, "iter_tflops": 13.063481327683094, "iter_time": 1.5792952117919923, "loss": 0.921107828617096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.789717689709551, "step_time": 1.3066157302856447} +{"epoch": 0, "iter": 4259, "iter_tflops": 14.751486990069795, "iter_time": 1.3985772094726563, "loss": 0.9320582151412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.16870890556127, "step_time": 1.275988925933838} +{"epoch": 0, "iter": 4260, "iter_tflops": 26.96797106659691, "iter_time": 0.765022087097168, "loss": 0.8596043586730957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.51560647018363, "step_time": 0.6155667667388915} +{"epoch": 0, "iter": 4261, "iter_tflops": 18.117477012411307, "iter_time": 0.8341142883300781, "loss": 0.29005667567253113, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 19.07730042320077, "step_time": 0.7921480560302735} +{"epoch": 0, "iter": 4262, "iter_tflops": 13.112667566902108, "iter_time": 1.1524768981933593, "loss": 0.5028071403503418, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.516035079522117, "step_time": 1.0410588264465332} +{"epoch": 0, "iter": 4263, "iter_tflops": 28.033679366945332, "iter_time": 0.5390675354003907, "loss": 0.5034102201461792, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.84038580595184, "step_time": 0.5064293251037598} +{"epoch": 0, "iter": 4264, "iter_tflops": 27.447383162667016, "iter_time": 0.5505824127197265, "loss": 0.49308639764785767, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.160318155567637, "step_time": 0.5182401084899902} +{"epoch": 0, "iter": 4265, "iter_tflops": 30.75704464532049, "iter_time": 0.6707761993408203, "loss": 0.32703277468681335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.65113181677842, "step_time": 0.631864574432373} +{"epoch": 0, "iter": 4266, "iter_tflops": 16.87579223769843, "iter_time": 1.2225259246826172, "loss": 0.3759082555770874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.141686652051433, "step_time": 1.0242982063293458} +{"epoch": 0, "iter": 4267, "iter_tflops": 38.33070243696381, "iter_time": 0.5382393798828125, "loss": 0.34587031602859497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95581713695559, "step_time": 0.4917338027954101} +{"epoch": 0, "iter": 4268, "iter_tflops": 39.87965499164281, "iter_time": 0.5173338012695312, "loss": 0.21646854281425476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.754076161297576, "step_time": 0.4715239200592041} +{"epoch": 0, "iter": 4269, "iter_tflops": 22.78018225082637, "iter_time": 0.9056597213745118, "loss": 0.9781255722045898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.55027472121352, "step_time": 0.8403610038757324} +{"epoch": 0, "iter": 4270, "iter_tflops": 17.48749483755727, "iter_time": 1.179762664794922, "loss": 0.7469932436943054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.39817939488709, "step_time": 0.9641518154144286} +{"epoch": 0, "iter": 4271, "iter_tflops": 45.6710939817297, "iter_time": 0.45173197555542, "loss": 0.8528258800506592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.24087084874964, "step_time": 0.4189831161499023} +{"epoch": 0, "iter": 4272, "iter_tflops": 44.10455592346586, "iter_time": 0.46777692413330074, "loss": 1.0429222583770752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.623263771373765, "step_time": 0.43321460723876953} +{"epoch": 0, "iter": 4273, "iter_tflops": 28.844398726876083, "iter_time": 0.7152547607421874, "loss": 0.35329553484916687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.65449935716442, "step_time": 0.6730200767517089} +{"epoch": 0, "iter": 4274, "iter_tflops": 14.87606499724834, "iter_time": 1.386864974975586, "loss": 0.40851670503616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.262025227043406, "step_time": 1.195172248840332} +{"epoch": 0, "iter": 4275, "iter_tflops": 49.649965581898385, "iter_time": 0.4155308723449707, "loss": 0.5030426979064941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.004009833257435, "step_time": 0.38202891921997073} +{"epoch": 0, "iter": 4276, "iter_tflops": 45.15829288159692, "iter_time": 0.4568616790771484, "loss": 0.3573463261127472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.586357873712025, "step_time": 0.42462729072570804} +{"epoch": 0, "iter": 4277, "iter_tflops": 39.66216519043425, "iter_time": 0.5201706314086915, "loss": 0.03518639877438545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87553709992213, "step_time": 0.48118565750122066} +{"epoch": 0, "iter": 4278, "iter_tflops": 21.058378307698007, "iter_time": 0.9797095108032227, "loss": 0.12240659445524216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.93479306316576, "step_time": 0.861970832824707} +{"epoch": 0, "iter": 4279, "iter_tflops": 50.663849777565595, "iter_time": 0.40721527481079106, "loss": 0.04914679750800133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.4597732469621, "step_time": 0.3720010433197022} +{"epoch": 0, "iter": 4280, "iter_tflops": 55.38792212824195, "iter_time": 0.37248361587524415, "loss": 0.061307914555072784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.59291785223811, "step_time": 0.3404868793487548} +{"epoch": 0, "iter": 4281, "iter_tflops": 38.4630444754428, "iter_time": 0.5363874282836915, "loss": 1.047868251800537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39373225398156, "step_time": 0.4984110488891601} +{"epoch": 0, "iter": 4282, "iter_tflops": 29.286813590640676, "iter_time": 0.7044499206542969, "loss": 0.9649367928504944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.073809835201345, "step_time": 0.6237894458770752} +{"epoch": 0, "iter": 4283, "iter_tflops": 35.053609087612585, "iter_time": 0.5885583267211913, "loss": 0.9359124898910522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19164836450292, "step_time": 0.5401990852355957} +{"epoch": 0, "iter": 4284, "iter_tflops": 34.003101752958166, "iter_time": 0.6067415161132812, "loss": 0.787394106388092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73764895305503, "step_time": 0.5615790367126465} +{"epoch": 0, "iter": 4285, "iter_tflops": 13.275105304072868, "iter_time": 1.126062713623047, "loss": 0.34460723400115967, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 14.094495148982359, "step_time": 1.0605985488891603} +{"epoch": 0, "iter": 4286, "iter_tflops": 12.768476815537316, "iter_time": 1.170742706298828, "loss": 0.4206765592098236, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 15.119479936854113, "step_time": 0.9886981010437011} +{"epoch": 0, "iter": 4287, "iter_tflops": 22.731932680632134, "iter_time": 0.6576036148071289, "loss": 0.32773664593696594, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.307050361505112, "step_time": 0.6149903373718262} +{"epoch": 0, "iter": 4288, "iter_tflops": 22.07059395453589, "iter_time": 0.6773085098266601, "loss": 0.31051111221313477, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.648268925848676, "step_time": 0.6321224250793457} +{"epoch": 0, "iter": 4289, "iter_tflops": 22.982627755612555, "iter_time": 0.8976820983886719, "loss": 0.4414067268371582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.617981334580218, "step_time": 0.8380497665405272} +{"epoch": 0, "iter": 4290, "iter_tflops": 11.32148807539037, "iter_time": 1.8222952117919922, "loss": 0.5029545426368713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.527191939867453, "step_time": 1.5251571502685548} +{"epoch": 0, "iter": 4291, "iter_tflops": 14.694250898508837, "iter_time": 1.404024856567383, "loss": 0.38770994544029236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.425084153733227, "step_time": 1.1839881706237794} +{"epoch": 0, "iter": 4292, "iter_tflops": 24.839925659876254, "iter_time": 0.8305618057250976, "loss": 0.4100489020347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.04172217730711, "step_time": 0.5887579784393311} +{"epoch": 0, "iter": 4293, "iter_tflops": 21.95335564856058, "iter_time": 0.7704939651489258, "loss": 0.33570483326911926, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 23.315024459289205, "step_time": 0.7254947586059571} +{"epoch": 0, "iter": 4294, "iter_tflops": 11.857784383538963, "iter_time": 1.4264830169677734, "loss": 0.5202515125274658, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 15.86461396849247, "step_time": 1.0662048301696776} +{"epoch": 0, "iter": 4295, "iter_tflops": 30.766095048027697, "iter_time": 0.5497911911010742, "loss": 0.34942951798439026, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.81320961173377, "step_time": 0.5154914207458496} +{"epoch": 0, "iter": 4296, "iter_tflops": 30.186467057524595, "iter_time": 0.5603480529785156, "loss": 0.5241005420684814, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.04038452359833, "step_time": 0.52792525100708} +{"epoch": 0, "iter": 4297, "iter_tflops": 3.412159896685951, "iter_time": 0.4708275604248047, "loss": 0.6018694639205933, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.759542310139694, "step_time": 0.4273230056762696} +{"epoch": 0, "iter": 4298, "iter_tflops": 3.5461338642746068, "iter_time": 0.45303955841064447, "loss": 0.4334806203842163, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.8764697501672165, "step_time": 0.4144334983825683} +{"epoch": 0, "iter": 4299, "iter_tflops": 3.6334053331308884, "iter_time": 0.4421579132080078, "loss": 0.5905470848083496, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.9529504345136632, "step_time": 0.4064151439666748} +{"epoch": 0, "iter": 4300, "iter_tflops": 3.724146348334106, "iter_time": 0.43138447570800775, "loss": 0.6462217569351196, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.039558844984677, "step_time": 0.39770157623291014} +{"epoch": 0, "iter": 4301, "iter_tflops": 45.27533765461286, "iter_time": 0.4556806106567383, "loss": 0.27909985184669495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.395839098909015, "step_time": 0.41766865158081057} +{"epoch": 0, "iter": 4302, "iter_tflops": 9.485727268657412, "iter_time": 2.174961700439453, "loss": 0.25167784094810486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.253609156764096, "step_time": 1.8332868347167968} +{"epoch": 0, "iter": 4303, "iter_tflops": 11.95881495169942, "iter_time": 1.725178756713867, "loss": 0.18557634949684143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.826557248167697, "step_time": 1.49213525390625} +{"epoch": 0, "iter": 4304, "iter_tflops": 22.09099077255786, "iter_time": 0.9339143600463867, "loss": 0.2658891975879669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.38110846479966, "step_time": 0.7820404338836671} +{"epoch": 0, "iter": 4305, "iter_tflops": 18.365308933950224, "iter_time": 0.876342674255371, "loss": 0.365225613117218, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 19.284064675159954, "step_time": 0.8345908508300781} +{"epoch": 0, "iter": 4306, "iter_tflops": 9.93435503435087, "iter_time": 1.6200653076171874, "loss": 0.30886197090148926, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 12.951516119638708, "step_time": 1.2426579093933106} +{"epoch": 0, "iter": 4307, "iter_tflops": 23.238622951439968, "iter_time": 0.6925670242309571, "loss": 0.3803977370262146, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.01765924450195, "step_time": 0.6433177375793457} +{"epoch": 0, "iter": 4308, "iter_tflops": 24.037562944568965, "iter_time": 0.6695480728149414, "loss": 0.5250867605209351, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.875628033160893, "step_time": 0.621986988067627} +{"epoch": 0, "iter": 4309, "iter_tflops": 20.854085306246255, "iter_time": 0.9893070449829102, "loss": 0.7767459154129028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.534315563741263, "step_time": 0.9155411643981934} +{"epoch": 0, "iter": 4310, "iter_tflops": 17.97806629481375, "iter_time": 1.1475702209472658, "loss": 0.7672544121742249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.752386664595736, "step_time": 0.8685903358459472} +{"epoch": 0, "iter": 4311, "iter_tflops": 37.801314482603416, "iter_time": 0.5457771453857423, "loss": 1.1982227563858032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46820113972888, "step_time": 0.497515998840332} +{"epoch": 0, "iter": 4312, "iter_tflops": 36.043175278199875, "iter_time": 0.5723994445800781, "loss": 0.8637481331825256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.1449522735965, "step_time": 0.5270435218811035} +{"epoch": 0, "iter": 4313, "iter_tflops": 35.44412637487641, "iter_time": 0.5820736923217774, "loss": 0.29777565598487854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.196674319111104, "step_time": 0.5263480606079102} +{"epoch": 0, "iter": 4314, "iter_tflops": 40.07534773326106, "iter_time": 0.5148075981140137, "loss": 0.28424108028411865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.082758160027126, "step_time": 0.45762713623046875} +{"epoch": 0, "iter": 4315, "iter_tflops": 44.178883141059664, "iter_time": 0.4669899291992187, "loss": 0.35080811381340027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63701660064535, "step_time": 0.4241850128173828} +{"epoch": 0, "iter": 4316, "iter_tflops": 41.64289134515918, "iter_time": 0.4954289398193359, "loss": 0.3491986393928528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43785813266505, "step_time": 0.45405074882507324} +{"epoch": 0, "iter": 4317, "iter_tflops": 20.930590244476047, "iter_time": 0.9856909561157225, "loss": 0.9300926923751831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.234273299732465, "step_time": 0.927896011352539} +{"epoch": 0, "iter": 4318, "iter_tflops": 15.07345217028137, "iter_time": 1.3687039489746093, "loss": 1.1117639541625977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9761302577829, "step_time": 0.8979359569549561} +{"epoch": 0, "iter": 4319, "iter_tflops": 41.62828324006883, "iter_time": 0.4956027946472168, "loss": 0.7231948375701904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.660920112935216, "step_time": 0.46194958496093746} +{"epoch": 0, "iter": 4320, "iter_tflops": 44.159552926467846, "iter_time": 0.4671943473815918, "loss": 0.7616785764694214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31780590028729, "step_time": 0.43601120376586916} +{"epoch": 0, "iter": 4321, "iter_tflops": 41.78975622975404, "iter_time": 0.49368781661987304, "loss": 0.028956955298781395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.99927694131257, "step_time": 0.44850908279418944} +{"epoch": 0, "iter": 4322, "iter_tflops": 51.21292587940862, "iter_time": 0.4028493423461914, "loss": 0.038639020174741745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.648526659891566, "step_time": 0.36419470596313475} +{"epoch": 0, "iter": 4323, "iter_tflops": 58.61925163198041, "iter_time": 0.35195081710815423, "loss": 0.0362103097140789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.25806961008931, "step_time": 0.3210661888122558} +{"epoch": 0, "iter": 4324, "iter_tflops": 61.080990086917865, "iter_time": 0.33776619338989256, "loss": 0.05988297611474991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.70650045836679, "step_time": 0.3092816047668457} +{"epoch": 0, "iter": 4325, "iter_tflops": 36.408966654354174, "iter_time": 0.566648696899414, "loss": 0.6122831106185913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.94468415967086, "step_time": 0.529753776550293} +{"epoch": 0, "iter": 4326, "iter_tflops": 22.754831296009897, "iter_time": 0.9066687088012694, "loss": 0.4399028718471527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.065251643759954, "step_time": 0.7351116523742676} +{"epoch": 0, "iter": 4327, "iter_tflops": 48.59273375080791, "iter_time": 0.42457157516479493, "loss": 0.6168749928474426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67552749366222, "step_time": 0.39166372871398936} +{"epoch": 0, "iter": 4328, "iter_tflops": 50.61409200356246, "iter_time": 0.4076156005859375, "loss": 0.5825651288032532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.43260332394394, "step_time": 0.37902088546752927} +{"epoch": 0, "iter": 4329, "iter_tflops": 43.47729503073785, "iter_time": 0.474525691986084, "loss": 0.24401508271694183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.21533521995186, "step_time": 0.43695747184753425} +{"epoch": 0, "iter": 4330, "iter_tflops": 17.435158868056213, "iter_time": 1.1833040161132813, "loss": 0.24337002635002136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.617296323138532, "step_time": 0.9121821289062501} +{"epoch": 0, "iter": 4331, "iter_tflops": 42.92118758752727, "iter_time": 0.4806738739013672, "loss": 0.2107192724943161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.74866008615192, "step_time": 0.44131946182250975} +{"epoch": 0, "iter": 4332, "iter_tflops": 39.54388640833539, "iter_time": 0.5217265014648438, "loss": 0.20407766103744507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94748745814739, "step_time": 0.4803795223236084} +{"epoch": 0, "iter": 4333, "iter_tflops": 18.79215461189602, "iter_time": 1.0978567352294921, "loss": 0.18699519336223602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.17153036255255, "step_time": 1.0227827606201172} +{"epoch": 0, "iter": 4334, "iter_tflops": 25.95859433016161, "iter_time": 0.7947692871093751, "loss": 0.17035672068595886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.0135506042096, "step_time": 0.6444487762451172} +{"epoch": 0, "iter": 4335, "iter_tflops": 48.848102147844514, "iter_time": 0.42235199737548823, "loss": 0.18969301879405975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96101383135248, "step_time": 0.3895524654388427} +{"epoch": 0, "iter": 4336, "iter_tflops": 47.48099672885131, "iter_time": 0.43451264572143555, "loss": 0.12059224396944046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30136349203968, "step_time": 0.40215487670898437} +{"epoch": 0, "iter": 4337, "iter_tflops": 39.77628986484586, "iter_time": 0.5186781768798828, "loss": 0.9910297989845276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.16284287674472, "step_time": 0.47798273086547854} +{"epoch": 0, "iter": 4338, "iter_tflops": 44.820531752991734, "iter_time": 0.46030452346801753, "loss": 0.9692586660385132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.64524578977026, "step_time": 0.4241132545471191} +{"epoch": 0, "iter": 4339, "iter_tflops": 46.59394093869471, "iter_time": 0.4427849006652832, "loss": 0.9645628929138184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34542472142956, "step_time": 0.40979083251953125} +{"epoch": 0, "iter": 4340, "iter_tflops": 44.669999279715334, "iter_time": 0.46185569381713865, "loss": 0.6518377661705017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.199882389770096, "step_time": 0.42803203010559077} +{"epoch": 0, "iter": 4341, "iter_tflops": 23.855804715435305, "iter_time": 0.8648248825073243, "loss": 0.8995907306671143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.065977061783382, "step_time": 0.8230715866088867} +{"epoch": 0, "iter": 4342, "iter_tflops": 15.709058177915415, "iter_time": 1.3133246612548828, "loss": 0.9272498488426208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.22189098680234, "step_time": 1.1322147369384765} +{"epoch": 0, "iter": 4343, "iter_tflops": 45.42748905623941, "iter_time": 0.4541543884277344, "loss": 0.8403728604316711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.194577501371576, "step_time": 0.4193773899078369} +{"epoch": 0, "iter": 4344, "iter_tflops": 45.086004945074215, "iter_time": 0.45759418106079097, "loss": 0.8652192950248718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.237370098462634, "step_time": 0.4276993846893311} +{"epoch": 0, "iter": 4345, "iter_tflops": 31.236573420088387, "iter_time": 0.6604787673950195, "loss": 0.2100362628698349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.29504880102959, "step_time": 0.6196444892883302} +{"epoch": 0, "iter": 4346, "iter_tflops": 11.925482634467098, "iter_time": 1.730000717163086, "loss": 0.17500261962413788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.0356548382184, "step_time": 1.372144660949707} +{"epoch": 0, "iter": 4347, "iter_tflops": 15.963036879756977, "iter_time": 1.2924291076660157, "loss": 0.2291577309370041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.479489087429812, "step_time": 1.116432029724121} +{"epoch": 0, "iter": 4348, "iter_tflops": 24.701165982465298, "iter_time": 0.8352275161743165, "loss": 0.2385912537574768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.29573790687054, "step_time": 0.6809899654388428} +{"epoch": 0, "iter": 4349, "iter_tflops": 24.80676224435459, "iter_time": 0.680211784362793, "loss": 0.36404135823249817, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.438480718665925, "step_time": 0.6382307739257812} +{"epoch": 0, "iter": 4350, "iter_tflops": 11.178131234033582, "iter_time": 1.5095414123535154, "loss": 0.4766920208930969, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 13.866083380505886, "step_time": 1.2169155158996583} +{"epoch": 0, "iter": 4351, "iter_tflops": 25.449265672613645, "iter_time": 0.6630388565063476, "loss": 0.5236998796463013, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.41406710880284, "step_time": 0.615518009185791} +{"epoch": 0, "iter": 4352, "iter_tflops": 26.002486369113175, "iter_time": 0.6489322509765626, "loss": 0.47466927766799927, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.95388246968787, "step_time": 0.6036317863464356} +{"epoch": 0, "iter": 4353, "iter_tflops": 16.678382924854496, "iter_time": 1.236996032714844, "loss": 0.10373180359601974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.83128175766923, "step_time": 1.1570168533325194} +{"epoch": 0, "iter": 4354, "iter_tflops": 20.75093430365471, "iter_time": 0.9942248001098634, "loss": 0.07880530506372452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.093083140857168, "step_time": 0.8563077373504638} +{"epoch": 0, "iter": 4355, "iter_tflops": 50.414321908651694, "iter_time": 0.40923080444335935, "loss": 0.12708482146263123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.93483706515271, "step_time": 0.37555574226379396} +{"epoch": 0, "iter": 4356, "iter_tflops": 54.01937095200679, "iter_time": 0.3819202842712402, "loss": 0.06929556280374527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.86883314542264, "step_time": 0.35045867919921875} +{"epoch": 0, "iter": 4357, "iter_tflops": 33.4185586979232, "iter_time": 0.6173543777465821, "loss": 0.2998528778553009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.73807506095793, "step_time": 0.5772860870361328} +{"epoch": 0, "iter": 4358, "iter_tflops": 16.4647612472172, "iter_time": 1.25304541015625, "loss": 0.2123541533946991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.157361414315925, "step_time": 1.0235016918182374} +{"epoch": 0, "iter": 4359, "iter_tflops": 41.15909647145227, "iter_time": 0.5012523422241211, "loss": 0.21874871850013733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.12837323449724, "step_time": 0.4571645736694336} +{"epoch": 0, "iter": 4360, "iter_tflops": 39.86020514686799, "iter_time": 0.5175862350463867, "loss": 0.2574213743209839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76375815437594, "step_time": 0.4714196033477784} +{"epoch": 0, "iter": 4361, "iter_tflops": 18.37293413991032, "iter_time": 1.1229068450927735, "loss": 0.9277072548866272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.471907974146767, "step_time": 1.0595311737060547} +{"epoch": 0, "iter": 4362, "iter_tflops": 16.561966892057455, "iter_time": 1.2456910247802733, "loss": 0.7513450980186462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.06291691358295, "step_time": 1.02831974029541} +{"epoch": 0, "iter": 4363, "iter_tflops": 40.26645370419651, "iter_time": 0.5123643035888673, "loss": 0.9931864738464355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27294237390625, "step_time": 0.4767665977478027} +{"epoch": 0, "iter": 4364, "iter_tflops": 40.74821091437048, "iter_time": 0.5063067321777344, "loss": 0.7216176390647888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75162483192605, "step_time": 0.47155033874511715} +{"epoch": 0, "iter": 4365, "iter_tflops": 32.018687213551196, "iter_time": 0.6443453903198242, "loss": 0.024266144260764122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.398107102876175, "step_time": 0.5997740936279297} +{"epoch": 0, "iter": 4366, "iter_tflops": 48.09093463073061, "iter_time": 0.42900171661376957, "loss": 0.04677742347121239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96522277826699, "step_time": 0.38952150917053224} +{"epoch": 0, "iter": 4367, "iter_tflops": 51.33744348405407, "iter_time": 0.40187224197387694, "loss": 0.058246318250894547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.33259982818722, "step_time": 0.3662371978759766} +{"epoch": 0, "iter": 4368, "iter_tflops": 55.059201631448396, "iter_time": 0.37470745849609377, "loss": 0.05159090831875801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.31531759432568, "step_time": 0.34205396461486814} +{"epoch": 0, "iter": 4369, "iter_tflops": 30.07814797021561, "iter_time": 0.6859163513183594, "loss": 0.363070011138916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.09234381972912, "step_time": 0.6428665237426758} +{"epoch": 0, "iter": 4370, "iter_tflops": 33.776344406185515, "iter_time": 0.6108148727416992, "loss": 0.5172395706176758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.08108839167407, "step_time": 0.5417674331665039} +{"epoch": 0, "iter": 4371, "iter_tflops": 38.32239893778352, "iter_time": 0.5383560028076172, "loss": 0.49934327602386475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.51041596340414, "step_time": 0.4970100402832031} +{"epoch": 0, "iter": 4372, "iter_tflops": 38.856010096704914, "iter_time": 0.5309627380371094, "loss": 0.2826118469238281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15632570536226, "step_time": 0.4893949642181397} +{"epoch": 0, "iter": 4373, "iter_tflops": 18.077898471906856, "iter_time": 1.1412329559326173, "loss": 0.6298750042915344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.409063220420286, "step_time": 1.0629618377685546} +{"epoch": 0, "iter": 4374, "iter_tflops": 21.204365915329628, "iter_time": 0.9729644165039062, "loss": 0.6248207688331604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.885929247660382, "step_time": 0.7970003051757814} +{"epoch": 0, "iter": 4375, "iter_tflops": 37.33335034995816, "iter_time": 0.5526183242797852, "loss": 0.5749738812446594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.841998845112165, "step_time": 0.505144069671631} +{"epoch": 0, "iter": 4376, "iter_tflops": 38.50628707203921, "iter_time": 0.5357850646972656, "loss": 0.6350771188735962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.225835171726246, "step_time": 0.4885893535614013} +{"epoch": 0, "iter": 4377, "iter_tflops": 18.30161930427063, "iter_time": 1.1272824096679688, "loss": 0.0905858501791954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.132658582755038, "step_time": 1.0783181762695313} +{"epoch": 0, "iter": 4378, "iter_tflops": 23.37865883123175, "iter_time": 0.8824754943847658, "loss": 0.11257603019475937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.52162203978535, "step_time": 0.698846881866455} +{"epoch": 0, "iter": 4379, "iter_tflops": 47.27243545150473, "iter_time": 0.43642967224121093, "loss": 0.0829567238688469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70300030088071, "step_time": 0.39903087615966804} +{"epoch": 0, "iter": 4380, "iter_tflops": 46.90369749621708, "iter_time": 0.4398607063293457, "loss": 0.11332771182060242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.738643725719534, "step_time": 0.4066149978637695} +{"epoch": 0, "iter": 4381, "iter_tflops": 16.257492824422535, "iter_time": 0.642356689453125, "loss": 0.07014250010251999, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 17.315315235214516, "step_time": 0.603114013671875} +{"epoch": 0, "iter": 4382, "iter_tflops": 9.237265052731448, "iter_time": 1.130541259765625, "loss": 0.031626515090465546, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 11.303377470791192, "step_time": 0.9238928184509277} +{"epoch": 0, "iter": 4383, "iter_tflops": 23.69330095199294, "iter_time": 0.44076210784912107, "loss": 0.025336742401123047, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 26.22460626864965, "step_time": 0.39821796226501466} +{"epoch": 0, "iter": 4384, "iter_tflops": 23.534466565296484, "iter_time": 0.44373681640624996, "loss": 0.026784269139170647, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 25.92258781683938, "step_time": 0.4028575134277344} +{"epoch": 0, "iter": 4385, "iter_tflops": 32.881650460838294, "iter_time": 0.6274348526000977, "loss": 0.7383078336715698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.68196180477527, "step_time": 0.5781939239501954} +{"epoch": 0, "iter": 4386, "iter_tflops": 39.6028177180868, "iter_time": 0.5209501419067384, "loss": 0.8415095210075378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.16073978611965, "step_time": 0.4780060214996338} +{"epoch": 0, "iter": 4387, "iter_tflops": 45.41055667256872, "iter_time": 0.45432373046875, "loss": 0.6075312495231628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.21128665711847, "step_time": 0.4192349948883057} +{"epoch": 0, "iter": 4388, "iter_tflops": 46.429028124597885, "iter_time": 0.4443576431274414, "loss": 0.6562749147415161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98758195850501, "step_time": 0.4127243747711182} +{"epoch": 0, "iter": 4389, "iter_tflops": 13.423005390464054, "iter_time": 0.5506813201904297, "loss": 0.01409653015434742, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 14.565139676834143, "step_time": 0.5074993095397949} +{"epoch": 0, "iter": 4390, "iter_tflops": 15.44270633262665, "iter_time": 0.4786595153808594, "loss": 0.011446169577538967, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 17.20496551580698, "step_time": 0.429631685256958} +{"epoch": 0, "iter": 4391, "iter_tflops": 15.075072012103663, "iter_time": 0.49033253860473636, "loss": 0.007012072950601578, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 16.795695025443283, "step_time": 0.44010077095031735} +{"epoch": 0, "iter": 4392, "iter_tflops": 15.787774109001344, "iter_time": 0.468197624206543, "loss": 0.002633434720337391, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 17.526094887755598, "step_time": 0.4217595748901367} +{"epoch": 0, "iter": 4393, "iter_tflops": 19.553365788291906, "iter_time": 1.055117248535156, "loss": 0.04244044050574303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.74958597499307, "step_time": 0.9942894058227539} +{"epoch": 0, "iter": 4394, "iter_tflops": 8.598125173602927, "iter_time": 2.39948745727539, "loss": 0.023325800895690918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.679547594764433, "step_time": 2.1314109268188477} +{"epoch": 0, "iter": 4395, "iter_tflops": 13.648122372147919, "iter_time": 1.511643356323242, "loss": 0.03713396564126015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.840941745658043, "step_time": 1.0950139217376706} +{"epoch": 0, "iter": 4396, "iter_tflops": 43.76030969673734, "iter_time": 0.4714567527770996, "loss": 0.020902005955576897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21100555782591, "step_time": 0.4279332752227783} +{"epoch": 0, "iter": 4397, "iter_tflops": 15.651902511678564, "iter_time": 1.2043790893554687, "loss": 0.3965800404548645, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 16.674897989428676, "step_time": 1.1304911193847655} +{"epoch": 0, "iter": 4398, "iter_tflops": 21.454989957331875, "iter_time": 0.8786219024658203, "loss": 0.2878158390522003, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 23.459832977603757, "step_time": 0.8035361595153807} +{"epoch": 0, "iter": 4399, "iter_tflops": 33.89540975884819, "iter_time": 0.5561468124389648, "loss": 0.304714173078537, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 36.05840900341059, "step_time": 0.5227857971191406} +{"epoch": 0, "iter": 4400, "iter_tflops": 30.515675499047614, "iter_time": 0.6177423171997071, "loss": 0.4826975166797638, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 32.46055808797615, "step_time": 0.5807301292419433} +{"epoch": 0, "iter": 4401, "iter_tflops": 37.55886911494727, "iter_time": 0.549300178527832, "loss": 0.67274409532547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.480749379272524, "step_time": 0.5096519660949708} +{"epoch": 0, "iter": 4402, "iter_tflops": 14.55058928237216, "iter_time": 1.4178871459960936, "loss": 0.7430899739265442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.647998744261585, "step_time": 1.318449333190918} +{"epoch": 0, "iter": 4403, "iter_tflops": 29.54938174728808, "iter_time": 0.6981903610229492, "loss": 0.6141985654830933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.43679546762559, "step_time": 0.5662159156799317} +{"epoch": 0, "iter": 4404, "iter_tflops": 46.13357302324285, "iter_time": 0.44720346069335937, "loss": 0.7276445627212524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91265476944986, "step_time": 0.41334394264221197} +{"epoch": 0, "iter": 4405, "iter_tflops": 28.59151489544357, "iter_time": 0.7215809860229492, "loss": 0.24107207357883453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.28083110025696, "step_time": 0.6813252067565918} +{"epoch": 0, "iter": 4406, "iter_tflops": 14.580972607473692, "iter_time": 1.4149326019287107, "loss": 0.3228451907634735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.04978379593415, "step_time": 1.285443702697754} +{"epoch": 0, "iter": 4407, "iter_tflops": 38.3254508133199, "iter_time": 0.5383131332397461, "loss": 0.331257164478302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34178478338937, "step_time": 0.4872513904571534} +{"epoch": 0, "iter": 4408, "iter_tflops": 40.650829521193955, "iter_time": 0.5075196189880371, "loss": 0.3219180703163147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25318031121997, "step_time": 0.4662058944702149} +{"epoch": 0, "iter": 4409, "iter_tflops": 26.863059558510223, "iter_time": 0.7680098190307617, "loss": 0.7496360540390015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.840006272563574, "step_time": 0.7153636970520021} +{"epoch": 0, "iter": 4410, "iter_tflops": 9.4224192743677, "iter_time": 2.189574981689453, "loss": 0.7091871500015259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.730805607445458, "step_time": 1.9226043472290038} +{"epoch": 0, "iter": 4411, "iter_tflops": 10.873083978939475, "iter_time": 1.8974463500976564, "loss": 0.7508276104927063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.191747142675458, "step_time": 1.5639394302368166} +{"epoch": 0, "iter": 4412, "iter_tflops": 35.02037121333824, "iter_time": 0.5891169281005859, "loss": 0.9139032363891602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.41613556556429, "step_time": 0.5370423965454101} +{"epoch": 0, "iter": 4413, "iter_tflops": 19.201041220558654, "iter_time": 0.9151996536254884, "loss": 0.6970827579498291, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 20.16548974723393, "step_time": 0.8714286880493164} +{"epoch": 0, "iter": 4414, "iter_tflops": 17.245862757011654, "iter_time": 1.0189566345214844, "loss": 0.42716655135154724, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 18.920803442516107, "step_time": 0.9287547607421875} +{"epoch": 0, "iter": 4415, "iter_tflops": 31.19234289133141, "iter_time": 0.5633685913085937, "loss": 0.384061723947525, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 33.207237143319134, "step_time": 0.5291854362487793} +{"epoch": 0, "iter": 4416, "iter_tflops": 30.45905092984167, "iter_time": 0.5769315109252929, "loss": 0.48435965180397034, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 32.32374547513325, "step_time": 0.5436494445800781} +{"epoch": 0, "iter": 4417, "iter_tflops": 31.969908819008364, "iter_time": 0.6453285064697266, "loss": 0.08160243928432465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23207236895719, "step_time": 0.6026831588745117} +{"epoch": 0, "iter": 4418, "iter_tflops": 13.63650436420996, "iter_time": 1.5129312438964841, "loss": 0.1268034726381302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.286417677623172, "step_time": 1.3496356010437012} +{"epoch": 0, "iter": 4419, "iter_tflops": 53.025627475326345, "iter_time": 0.38907778167724616, "loss": 0.140301913022995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.00450822510134, "step_time": 0.3556808624267578} +{"epoch": 0, "iter": 4420, "iter_tflops": 51.229281677120895, "iter_time": 0.40272072601318354, "loss": 0.06205720081925392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.98805306100433, "step_time": 0.3684909973144532} +{"epoch": 0, "iter": 4421, "iter_tflops": 38.597023004782265, "iter_time": 0.5345255126953125, "loss": 0.9138911962509155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.9314958734029, "step_time": 0.4920190200805664} +{"epoch": 0, "iter": 4422, "iter_tflops": 41.76467882752569, "iter_time": 0.49398424911499017, "loss": 0.7089589834213257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83729588777552, "step_time": 0.4500940361022949} +{"epoch": 0, "iter": 4423, "iter_tflops": 45.68452942491098, "iter_time": 0.4515991249084473, "loss": 0.8770109415054321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.263389107911706, "step_time": 0.4187915992736816} +{"epoch": 0, "iter": 4424, "iter_tflops": 40.07487290588801, "iter_time": 0.5148136978149414, "loss": 0.7535275816917419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93479422721259, "step_time": 0.480521541595459} +{"epoch": 0, "iter": 4425, "iter_tflops": 44.66647422274771, "iter_time": 0.4618921432495117, "loss": 0.28503769636154175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35049892427783, "step_time": 0.41805237960815433} +{"epoch": 0, "iter": 4426, "iter_tflops": 38.12877772729858, "iter_time": 0.5410898208618165, "loss": 0.29140034317970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77272433863741, "step_time": 0.49388910675048825} +{"epoch": 0, "iter": 4427, "iter_tflops": 36.28365450012673, "iter_time": 0.5686057205200196, "loss": 0.3968678414821625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.993631544131354, "step_time": 0.515859468460083} +{"epoch": 0, "iter": 4428, "iter_tflops": 38.56507503834882, "iter_time": 0.5349683227539063, "loss": 0.23154352605342865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.981087032054475, "step_time": 0.49143781089782707} +{"epoch": 0, "iter": 4429, "iter_tflops": 22.569019326840344, "iter_time": 0.9141333618164061, "loss": 0.18059393763542175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.127104984770614, "step_time": 0.8551002502441407} +{"epoch": 0, "iter": 4430, "iter_tflops": 46.207209840498635, "iter_time": 0.4464907875061035, "loss": 0.24530158936977386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16979295704743, "step_time": 0.4112254066467286} +{"epoch": 0, "iter": 4431, "iter_tflops": 50.83951461831989, "iter_time": 0.40580823135375976, "loss": 0.21824945509433746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.89173562398143, "step_time": 0.3758506317138672} +{"epoch": 0, "iter": 4432, "iter_tflops": 44.57958137039261, "iter_time": 0.46279244613647463, "loss": 0.29724210500717163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.072302696860945, "step_time": 0.429167989730835} +{"epoch": 0, "iter": 4433, "iter_tflops": 35.469213546403466, "iter_time": 0.5816619949340821, "loss": 0.28376504778862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.017785593545796, "step_time": 0.5426695213317871} +{"epoch": 0, "iter": 4434, "iter_tflops": 13.029452778529626, "iter_time": 1.5834197998046875, "loss": 0.2825145423412323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.6496057322294, "step_time": 1.1689265937805176} +{"epoch": 0, "iter": 4435, "iter_tflops": 42.05812431620241, "iter_time": 0.49053765106201175, "loss": 0.31358957290649414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.718642883715, "step_time": 0.4512621593475342} +{"epoch": 0, "iter": 4436, "iter_tflops": 38.36889288007643, "iter_time": 0.5377036437988281, "loss": 0.32252922654151917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.94788200949628, "step_time": 0.49182682228088376} +{"epoch": 0, "iter": 4437, "iter_tflops": 23.241436538693673, "iter_time": 0.8876858139038086, "loss": 0.17259985208511353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85555202174626, "step_time": 0.8300396423339844} +{"epoch": 0, "iter": 4438, "iter_tflops": 48.36822602522182, "iter_time": 0.4265422821044922, "loss": 0.19542498886585236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.992638008687926, "step_time": 0.38931999397277833} +{"epoch": 0, "iter": 4439, "iter_tflops": 47.58040627449485, "iter_time": 0.43360482025146485, "loss": 0.21970641613006592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74547863216244, "step_time": 0.3987033081054687} +{"epoch": 0, "iter": 4440, "iter_tflops": 47.74734254022401, "iter_time": 0.4320888328552246, "loss": 0.162192240357399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.07889284270626, "step_time": 0.39615077018737793} +{"epoch": 0, "iter": 4441, "iter_tflops": 16.797565234834877, "iter_time": 0.8850631942749023, "loss": 0.010260247625410557, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.692539244637967, "step_time": 0.8402924270629883} +{"epoch": 0, "iter": 4442, "iter_tflops": 10.566814839031077, "iter_time": 1.4069430541992185, "loss": 0.002521029906347394, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 14.472784666209277, "step_time": 1.0272319450378418} +{"epoch": 0, "iter": 4443, "iter_tflops": 33.34693986756896, "iter_time": 0.4458252182006836, "loss": 0.01014742162078619, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 37.04047679790283, "step_time": 0.40136920547485355} +{"epoch": 0, "iter": 4444, "iter_tflops": 32.90485772769121, "iter_time": 0.4518149528503418, "loss": 0.010906553827226162, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 36.1942860206834, "step_time": 0.4107528667449951} +{"epoch": 0, "iter": 4445, "iter_tflops": 21.135816260636307, "iter_time": 0.9761200256347656, "loss": 0.8610546588897705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.617505451032944, "step_time": 0.9121736946105957} +{"epoch": 0, "iter": 4446, "iter_tflops": 23.997729086894427, "iter_time": 0.8597102432250977, "loss": 0.6002447009086609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.832860237387173, "step_time": 0.7688741836547852} +{"epoch": 0, "iter": 4447, "iter_tflops": 41.897876179078985, "iter_time": 0.4924138259887695, "loss": 0.8176438808441162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88418627569759, "step_time": 0.4596517219543457} +{"epoch": 0, "iter": 4448, "iter_tflops": 46.064588346032956, "iter_time": 0.44787317657470704, "loss": 0.7933743596076965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.793826499811196, "step_time": 0.4143303489685059} +{"epoch": 0, "iter": 4449, "iter_tflops": 38.32037982906366, "iter_time": 0.5383843688964843, "loss": 0.9354880452156067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.403676642965124, "step_time": 0.49829133987426755} +{"epoch": 0, "iter": 4450, "iter_tflops": 11.710225884543254, "iter_time": 1.7618014984130859, "loss": 0.8561925292015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.244305195111773, "step_time": 1.557733169555664} +{"epoch": 0, "iter": 4451, "iter_tflops": 12.30363853005552, "iter_time": 1.6768286437988282, "loss": 0.6605254411697388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.44364394514497, "step_time": 1.3358954391479494} +{"epoch": 0, "iter": 4452, "iter_tflops": 16.026360966608294, "iter_time": 1.2873224029541017, "loss": 0.9138621091842651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.722605804489003, "step_time": 1.1019349403381349} +{"epoch": 0, "iter": 4453, "iter_tflops": 10.786816209451747, "iter_time": 1.4502671508789065, "loss": 0.45376595854759216, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 11.320067151674621, "step_time": 1.3819498596191406} +{"epoch": 0, "iter": 4454, "iter_tflops": 12.965641162698502, "iter_time": 1.2065554656982422, "loss": 0.473906546831131, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 17.28615880446533, "step_time": 0.9049879379272461} +{"epoch": 0, "iter": 4455, "iter_tflops": 27.284536386925215, "iter_time": 0.5733564605712891, "loss": 0.27516719698905945, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.111067879879595, "step_time": 0.5373820457458496} +{"epoch": 0, "iter": 4456, "iter_tflops": 27.5716735610075, "iter_time": 0.5673854064941406, "loss": 0.3173545002937317, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.36339133294776, "step_time": 0.5327642517089843} +{"epoch": 0, "iter": 4457, "iter_tflops": 27.134260424642466, "iter_time": 0.7603337326049805, "loss": 0.16478225588798523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.590926021731732, "step_time": 0.7215958480834962} +{"epoch": 0, "iter": 4458, "iter_tflops": 18.6058300572866, "iter_time": 1.1088510131835938, "loss": 0.07886476069688797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.340922057296215, "step_time": 0.9234665184020996} +{"epoch": 0, "iter": 4459, "iter_tflops": 39.67610773346714, "iter_time": 0.5199878387451172, "loss": 0.062258198857307434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.634108206570424, "step_time": 0.4728203315734863} +{"epoch": 0, "iter": 4460, "iter_tflops": 39.724236672655735, "iter_time": 0.5193578338623047, "loss": 0.10700417309999466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95861804867523, "step_time": 0.46932989311218265} +{"epoch": 0, "iter": 4461, "iter_tflops": 19.181295910486906, "iter_time": 1.0755839233398439, "loss": 0.11542946100234985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.164413458469905, "step_time": 1.0231437454223633} +{"epoch": 0, "iter": 4462, "iter_tflops": 12.99034691587964, "iter_time": 1.5881864929199219, "loss": 0.18966257572174072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.77836775254614, "step_time": 1.1604604988098144} +{"epoch": 0, "iter": 4463, "iter_tflops": 40.386185598170975, "iter_time": 0.5108453102111816, "loss": 0.1808352768421173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44492491135043, "step_time": 0.4641945858001709} +{"epoch": 0, "iter": 4464, "iter_tflops": 41.72858076334109, "iter_time": 0.49441157913208006, "loss": 0.13831746578216553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86476550778535, "step_time": 0.449824462890625} +{"epoch": 0, "iter": 4465, "iter_tflops": 15.69490377819283, "iter_time": 1.3145090789794922, "loss": 0.0043882643803954124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.773877785103416, "step_time": 1.229953727722168} +{"epoch": 0, "iter": 4466, "iter_tflops": 20.566304441791804, "iter_time": 1.0031502532958985, "loss": 0.006756874732673168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.193225967204302, "step_time": 0.7586850318908692} +{"epoch": 0, "iter": 4467, "iter_tflops": 45.25094083361808, "iter_time": 0.4559262886047364, "loss": 0.01041086670011282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24879220204942, "step_time": 0.41057889366149897} +{"epoch": 0, "iter": 4468, "iter_tflops": 49.04546536326189, "iter_time": 0.42065241622924804, "loss": 0.0018279565265402198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.254971596241695, "step_time": 0.3802618064880371} +{"epoch": 0, "iter": 4469, "iter_tflops": 19.044358134066638, "iter_time": 1.0833178710937499, "loss": 0.6851207613945007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.56537646930741, "step_time": 1.0031955184936525} +{"epoch": 0, "iter": 4470, "iter_tflops": 32.56345106278802, "iter_time": 0.6335659408569336, "loss": 0.6737427115440369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.182127149828084, "step_time": 0.5702012329101562} +{"epoch": 0, "iter": 4471, "iter_tflops": 42.90297883114656, "iter_time": 0.48087788009643556, "loss": 0.8629422783851624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62014437029723, "step_time": 0.44253602790832525} +{"epoch": 0, "iter": 4472, "iter_tflops": 43.449106950498695, "iter_time": 0.4748335456848144, "loss": 0.7223341464996338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.78141740019374, "step_time": 0.441010440826416} +{"epoch": 0, "iter": 4473, "iter_tflops": 39.011512749431745, "iter_time": 0.5288462829589844, "loss": 0.8563677668571472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.957718342551644, "step_time": 0.4917115211486816} +{"epoch": 0, "iter": 4474, "iter_tflops": 14.738070363297707, "iter_time": 1.3998503875732422, "loss": 0.9151664972305298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.707198069193105, "step_time": 1.1651246814727783} +{"epoch": 0, "iter": 4475, "iter_tflops": 36.077780364792936, "iter_time": 0.5718504104614258, "loss": 0.753871738910675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.64887321368503, "step_time": 0.5203450145721435} +{"epoch": 0, "iter": 4476, "iter_tflops": 42.792720894157064, "iter_time": 0.48211688995361324, "loss": 0.8496448993682861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.72734869823687, "step_time": 0.4415207386016846} +{"epoch": 0, "iter": 4477, "iter_tflops": 24.115360118863162, "iter_time": 0.8555167083740234, "loss": 0.1716005802154541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.855448781192184, "step_time": 0.7979398727416991} +{"epoch": 0, "iter": 4478, "iter_tflops": 9.416894978940181, "iter_time": 2.1908594665527343, "loss": 0.11706626415252686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.113709042530362, "step_time": 1.856364372253418} +{"epoch": 0, "iter": 4479, "iter_tflops": 9.931064274239338, "iter_time": 2.0774302673339844, "loss": 0.08117368817329407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.848595424043884, "step_time": 1.741226936340332} +{"epoch": 0, "iter": 4480, "iter_tflops": 31.758561164187324, "iter_time": 0.6496230545043945, "loss": 0.13530972599983215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.207423165604496, "step_time": 0.5131165313720704} +{"epoch": 0, "iter": 4481, "iter_tflops": 15.365535345602222, "iter_time": 1.154365219116211, "loss": 0.3902892768383026, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.35643954851692, "step_time": 1.0844315795898438} +{"epoch": 0, "iter": 4482, "iter_tflops": 12.602374460838158, "iter_time": 1.4074680633544923, "loss": 0.3214568495750427, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.580881887949367, "step_time": 1.0697524833679197} +{"epoch": 0, "iter": 4483, "iter_tflops": 30.95944322412124, "iter_time": 0.5729250183105468, "loss": 0.36190158128738403, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 32.94242728960136, "step_time": 0.5384375419616699} +{"epoch": 0, "iter": 4484, "iter_tflops": 32.11483113257534, "iter_time": 0.5523130264282228, "loss": 0.3222223222255707, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 34.150149487421814, "step_time": 0.5193956642150879} +{"epoch": 0, "iter": 4485, "iter_tflops": 21.354140391816443, "iter_time": 0.8789037094116211, "loss": 0.12934310734272003, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 22.391058945793326, "step_time": 0.8382021255493164} +{"epoch": 0, "iter": 4486, "iter_tflops": 13.80091037804745, "iter_time": 1.3599271850585937, "loss": 0.24932964146137238, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 16.881680601071444, "step_time": 1.1117514686584473} +{"epoch": 0, "iter": 4487, "iter_tflops": 46.75730015295884, "iter_time": 0.40139685440063483, "loss": 0.20453597605228424, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 50.74651629961878, "step_time": 0.3698427906036377} +{"epoch": 0, "iter": 4488, "iter_tflops": 46.970300274319214, "iter_time": 0.3995766067504883, "loss": 0.13195346295833588, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 51.13871284822952, "step_time": 0.3670063667297363} +{"epoch": 0, "iter": 4489, "iter_tflops": 27.303548046998074, "iter_time": 0.7556195068359375, "loss": 0.24909447133541107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.872481295425327, "step_time": 0.7145590744018555} +{"epoch": 0, "iter": 4490, "iter_tflops": 16.286966057591137, "iter_time": 1.2667241668701172, "loss": 0.16362904012203217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66069111030575, "step_time": 0.9985674438476564} +{"epoch": 0, "iter": 4491, "iter_tflops": 35.284557807375, "iter_time": 0.5847060241699218, "loss": 0.27192223072052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.93174099995989, "step_time": 0.5299298973083496} +{"epoch": 0, "iter": 4492, "iter_tflops": 38.94162652251846, "iter_time": 0.5297953720092773, "loss": 0.3011992275714874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61925566207727, "step_time": 0.48407916069030765} +{"epoch": 0, "iter": 4493, "iter_tflops": 35.08659767935766, "iter_time": 0.5514004516601563, "loss": 0.005450287833809853, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 39.288535825703065, "step_time": 0.4924277629852295} +{"epoch": 0, "iter": 4494, "iter_tflops": 40.06328630194784, "iter_time": 0.48290511322021484, "loss": 0.012456106022000313, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 44.85368346750377, "step_time": 0.4313305912017822} +{"epoch": 0, "iter": 4495, "iter_tflops": 44.677537864526045, "iter_time": 0.43303115463256836, "loss": 0.008632674813270569, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 49.41552623100542, "step_time": 0.391511884689331} +{"epoch": 0, "iter": 4496, "iter_tflops": 41.45437252044656, "iter_time": 0.4667002449035645, "loss": 0.0029842008370906115, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 45.95669060565924, "step_time": 0.4209782199859619} +{"epoch": 0, "iter": 4497, "iter_tflops": 20.96217519604081, "iter_time": 0.9842057571411132, "loss": 1.0363773107528687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.560191807121374, "step_time": 0.9144910507202149} +{"epoch": 0, "iter": 4498, "iter_tflops": 22.27300307753488, "iter_time": 0.9262825241088867, "loss": 0.9130469560623169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.85747473340985, "step_time": 0.7681695213317871} +{"epoch": 0, "iter": 4499, "iter_tflops": 43.71360858323231, "iter_time": 0.4719604301452637, "loss": 0.7492629289627075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0826444106323, "step_time": 0.438188928604126} +{"epoch": 0, "iter": 4500, "iter_tflops": 43.031336958435325, "iter_time": 0.4794434700012208, "loss": 0.8279737234115601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.085204507141114, "step_time": 0.44767282104492184} +{"epoch": 0, "iter": 4501, "iter_tflops": 25.386520548291465, "iter_time": 0.8126790542602539, "loss": 0.5956138372421265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.737984028844274, "step_time": 0.7716024322509766} +{"epoch": 0, "iter": 4502, "iter_tflops": 11.857487886741657, "iter_time": 1.739921112060547, "loss": 0.5574770569801331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.38800417742678, "step_time": 1.258914342880249} +{"epoch": 0, "iter": 4503, "iter_tflops": 38.20149723792253, "iter_time": 0.540059814453125, "loss": 0.5206220149993896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76102209789157, "step_time": 0.4940275039672851} +{"epoch": 0, "iter": 4504, "iter_tflops": 36.87852525106229, "iter_time": 0.5594337997436524, "loss": 0.5044787526130676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.11415022917343, "step_time": 0.5143096237182617} +{"epoch": 0, "iter": 4505, "iter_tflops": 19.70762314607877, "iter_time": 1.0468585357666016, "loss": 0.8277531862258911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.748958682934592, "step_time": 0.994319465637207} +{"epoch": 0, "iter": 4506, "iter_tflops": 19.570947843702637, "iter_time": 1.0541693572998048, "loss": 1.0206778049468994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.07999798941405, "step_time": 0.9343793201446532} +{"epoch": 0, "iter": 4507, "iter_tflops": 36.06271028732859, "iter_time": 0.5720893783569336, "loss": 0.8600747585296631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.12756847952298, "step_time": 0.5272776794433593} +{"epoch": 0, "iter": 4508, "iter_tflops": 37.83063508678943, "iter_time": 0.5453541412353515, "loss": 0.9471970796585083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.72366998672749, "step_time": 0.5066118431091309} +{"epoch": 0, "iter": 4509, "iter_tflops": 33.23106150865322, "iter_time": 0.6208376312255859, "loss": 0.7087640166282654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.617693728287584, "step_time": 0.5634187030792236} +{"epoch": 0, "iter": 4510, "iter_tflops": 32.550885442408166, "iter_time": 0.6338105163574219, "loss": 1.0049357414245605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.578656287241365, "step_time": 0.5798727569580078} +{"epoch": 0, "iter": 4511, "iter_tflops": 33.17947526894529, "iter_time": 0.6218028869628905, "loss": 0.9098429083824158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.08197881656594, "step_time": 0.5717838706970214} +{"epoch": 0, "iter": 4512, "iter_tflops": 37.34657402389387, "iter_time": 0.5524226531982422, "loss": 0.9513614177703857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.798572803047165, "step_time": 0.5056817455291748} +{"epoch": 0, "iter": 4513, "iter_tflops": 18.811033585891856, "iter_time": 1.096754913330078, "loss": 0.36070916056632996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.148986818489675, "step_time": 1.0239270935058595} +{"epoch": 0, "iter": 4514, "iter_tflops": 21.842864902955565, "iter_time": 0.9445232391357422, "loss": 0.3713926672935486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.71431271823958, "step_time": 0.6717094306945801} +{"epoch": 0, "iter": 4515, "iter_tflops": 46.59565304889129, "iter_time": 0.44276863098144537, "loss": 0.3618554174900055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.71417531918557, "step_time": 0.40681118011474604} +{"epoch": 0, "iter": 4516, "iter_tflops": 43.2073535840461, "iter_time": 0.4774903297424316, "loss": 0.320873498916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59456595472823, "step_time": 0.4427789611816407} +{"epoch": 0, "iter": 4517, "iter_tflops": 27.305798060709357, "iter_time": 0.755557243347168, "loss": 0.858525812625885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.985838992894625, "step_time": 0.7117645797729493} +{"epoch": 0, "iter": 4518, "iter_tflops": 17.25567552740144, "iter_time": 1.195612045288086, "loss": 0.947567343711853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.380690836850007, "step_time": 1.0122862701416016} +{"epoch": 0, "iter": 4519, "iter_tflops": 43.52488287438126, "iter_time": 0.4740068702697754, "loss": 0.8770351409912109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.9730160603172, "step_time": 0.4392115993499756} +{"epoch": 0, "iter": 4520, "iter_tflops": 41.73907293345459, "iter_time": 0.494287296295166, "loss": 1.1090461015701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.737556028355684, "step_time": 0.461158260345459} +{"epoch": 0, "iter": 4521, "iter_tflops": 27.399503667940913, "iter_time": 0.4431121406555176, "loss": 0.002692675916478038, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 30.040002931513712, "step_time": 0.40416283416748044} +{"epoch": 0, "iter": 4522, "iter_tflops": 7.650784846070749, "iter_time": 1.5869029083251953, "loss": 0.0010407380759716034, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 9.170011419902417, "step_time": 1.323995376586914} +{"epoch": 0, "iter": 4523, "iter_tflops": 6.42406997920786, "iter_time": 1.8899315795898435, "loss": 0.003277725540101528, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 8.003664206032123, "step_time": 1.516936794281006} +{"epoch": 0, "iter": 4524, "iter_tflops": 14.874173891689207, "iter_time": 0.8162505569458007, "loss": 0.0051711928099393845, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 16.73376357231452, "step_time": 0.7255422649383544} +{"epoch": 0, "iter": 4525, "iter_tflops": 20.518329902792182, "iter_time": 0.8263834762573242, "loss": 0.34998562932014465, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 21.624797698683118, "step_time": 0.784100227355957} +{"epoch": 0, "iter": 4526, "iter_tflops": 9.458105483626879, "iter_time": 1.792748962402344, "loss": 0.35736092925071716, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 11.917681632720612, "step_time": 1.4227606773376467} +{"epoch": 0, "iter": 4527, "iter_tflops": 25.52175356231176, "iter_time": 0.664374755859375, "loss": 0.34869927167892456, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 27.466874223269553, "step_time": 0.6173257522583008} +{"epoch": 0, "iter": 4528, "iter_tflops": 26.44940397439309, "iter_time": 0.6410733795166015, "loss": 0.31974461674690247, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 28.553008006008998, "step_time": 0.5938431701660156} +{"epoch": 0, "iter": 4529, "iter_tflops": 30.912908751632756, "iter_time": 0.6673941192626953, "loss": 0.44018763303756714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39165343918349, "step_time": 0.5998866424560546} +{"epoch": 0, "iter": 4530, "iter_tflops": 34.551877251662134, "iter_time": 0.597104850769043, "loss": 0.43740880489349365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.47474732001418, "step_time": 0.5362242755889892} +{"epoch": 0, "iter": 4531, "iter_tflops": 44.461557919032735, "iter_time": 0.46402093124389643, "loss": 0.4732663631439209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47061836690947, "step_time": 0.42564122772216795} +{"epoch": 0, "iter": 4532, "iter_tflops": 40.45459996954707, "iter_time": 0.5099813995361328, "loss": 0.3933108448982239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35088297994589, "step_time": 0.46517886734008784} +{"epoch": 0, "iter": 4533, "iter_tflops": 16.870079491601548, "iter_time": 1.2229399108886718, "loss": 0.17554216086864471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.940454326765348, "step_time": 1.149976089477539} +{"epoch": 0, "iter": 4534, "iter_tflops": 22.68579417022387, "iter_time": 0.9094278717041016, "loss": 0.11611102521419525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.84168665996452, "step_time": 0.7410144996643067} +{"epoch": 0, "iter": 4535, "iter_tflops": 42.06559881375743, "iter_time": 0.4904504890441894, "loss": 0.16810978949069977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48689079953058, "step_time": 0.4535613040924072} +{"epoch": 0, "iter": 4536, "iter_tflops": 53.331362619018854, "iter_time": 0.38684729766845705, "loss": 0.13701044023036957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.33290561029089, "step_time": 0.35367848205566405} +{"epoch": 0, "iter": 4537, "iter_tflops": 28.874137126779317, "iter_time": 0.7145180969238282, "loss": 0.7366646528244019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.473419598945277, "step_time": 0.6770193099975585} +{"epoch": 0, "iter": 4538, "iter_tflops": 14.815966733102275, "iter_time": 1.392490539550781, "loss": 0.5572232604026794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.025920988531407, "step_time": 1.211746109008789} +{"epoch": 0, "iter": 4539, "iter_tflops": 17.083194752542557, "iter_time": 1.2076835632324219, "loss": 0.8277567028999329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.940709997089126, "step_time": 1.0892460479736328} +{"epoch": 0, "iter": 4540, "iter_tflops": 18.646214554871857, "iter_time": 1.1064494323730467, "loss": 1.1402857303619385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.275865774561225, "step_time": 0.9261634864807128} +{"epoch": 0, "iter": 4541, "iter_tflops": 20.014734421980798, "iter_time": 0.8061705932617187, "loss": 0.3568224608898163, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 21.119807785186453, "step_time": 0.7639885025024414} +{"epoch": 0, "iter": 4542, "iter_tflops": 10.641626160915811, "iter_time": 1.516242919921875, "loss": 0.21172572672367096, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.396726421869925, "step_time": 1.2044203796386719} +{"epoch": 0, "iter": 4543, "iter_tflops": 24.632250147605486, "iter_time": 0.6550473556518556, "loss": 0.4654456377029419, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 26.549137585985576, "step_time": 0.6077519569396972} +{"epoch": 0, "iter": 4544, "iter_tflops": 25.90436195052757, "iter_time": 0.622879280090332, "loss": 0.4721289873123169, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 27.6202349448888, "step_time": 0.5841836738586426} +{"epoch": 0, "iter": 4545, "iter_tflops": 13.233056784988769, "iter_time": 1.3777506408691405, "loss": 0.08156654983758926, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 14.091339199184006, "step_time": 1.2938339080810548} +{"epoch": 0, "iter": 4546, "iter_tflops": 18.794391257851583, "iter_time": 0.9700687942504883, "loss": 0.2022279053926468, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 21.420321985540042, "step_time": 0.8511474514007568} +{"epoch": 0, "iter": 4547, "iter_tflops": 44.09397705090899, "iter_time": 0.41347716140747065, "loss": 0.07954691350460052, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 47.94296408386276, "step_time": 0.38028212928771976} +{"epoch": 0, "iter": 4548, "iter_tflops": 47.78296476279445, "iter_time": 0.3815554885864258, "loss": 0.14079372584819794, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 51.87786571448526, "step_time": 0.3514379825592041} +{"epoch": 0, "iter": 4549, "iter_tflops": 35.979016796699035, "iter_time": 0.5734201583862305, "loss": 0.6459587812423706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57702263479676, "step_time": 0.5348026390075684} +{"epoch": 0, "iter": 4550, "iter_tflops": 12.044416562709182, "iter_time": 1.7129176330566405, "loss": 0.7184744477272034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.90298140710088, "step_time": 1.384360145568848} +{"epoch": 0, "iter": 4551, "iter_tflops": 48.8078914292749, "iter_time": 0.42269995498657226, "loss": 0.5564817786216736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03877375238637, "step_time": 0.38898134422302244} +{"epoch": 0, "iter": 4552, "iter_tflops": 47.32972676942027, "iter_time": 0.43590138626098635, "loss": 0.7049899697303772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.39454864814555, "step_time": 0.4014257164001465} +{"epoch": 0, "iter": 4553, "iter_tflops": 21.548753134623635, "iter_time": 0.8231306686401367, "loss": 0.00282835541293025, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 22.604907862897146, "step_time": 0.7846720581054687} +{"epoch": 0, "iter": 4554, "iter_tflops": 14.819250065675629, "iter_time": 1.1969188385009766, "loss": 0.0036049203481525183, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 19.18008207330492, "step_time": 0.9247843418121338} +{"epoch": 0, "iter": 4555, "iter_tflops": 48.567562727340096, "iter_time": 0.36521164703369136, "loss": 0.00484835309907794, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 53.36869543504412, "step_time": 0.33235662651062015} +{"epoch": 0, "iter": 4556, "iter_tflops": 48.36292889806813, "iter_time": 0.36675693511962887, "loss": 0.007468902040272951, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 52.96800364971217, "step_time": 0.3348708343505859} +{"epoch": 0, "iter": 4557, "iter_tflops": 32.16936577536417, "iter_time": 0.6413273315429687, "loss": 0.005275716073811054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.11885027410024, "step_time": 0.6046831398010254} +{"epoch": 0, "iter": 4558, "iter_tflops": 15.253790973918358, "iter_time": 1.3525223693847657, "loss": 0.0016819173470139503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.859567597037792, "step_time": 1.0388490791320801} +{"epoch": 0, "iter": 4559, "iter_tflops": 42.337640349286254, "iter_time": 0.4872990875244141, "loss": 0.0282350592315197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12798128584481, "step_time": 0.4377673931121826} +{"epoch": 0, "iter": 4560, "iter_tflops": 47.70715118154003, "iter_time": 0.4324528503417969, "loss": 0.017766805365681648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.854081699593614, "step_time": 0.3903405914306641} +{"epoch": 0, "iter": 4561, "iter_tflops": 30.4699261680798, "iter_time": 0.6770969314575195, "loss": 0.0793323740363121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.17682359636711, "step_time": 0.6218525848388672} +{"epoch": 0, "iter": 4562, "iter_tflops": 47.592312841635014, "iter_time": 0.43349634170532225, "loss": 0.0813901424407959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16230321482245, "step_time": 0.3955173034667969} +{"epoch": 0, "iter": 4563, "iter_tflops": 50.944554647871854, "iter_time": 0.4049715156555176, "loss": 0.06408426910638809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.77971836776495, "step_time": 0.36986729431152343} +{"epoch": 0, "iter": 4564, "iter_tflops": 56.82061902960249, "iter_time": 0.3630916709899903, "loss": 0.12706446647644043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.03701872956297, "step_time": 0.3325610084533691} +{"epoch": 0, "iter": 4565, "iter_tflops": 36.40702257471289, "iter_time": 0.566678955078125, "loss": 0.29527828097343445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15051167340816, "step_time": 0.5269686813354492} +{"epoch": 0, "iter": 4566, "iter_tflops": 8.233328383563277, "iter_time": 2.505802337646484, "loss": 0.2622251510620117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.213910776238201, "step_time": 2.019901481628418} +{"epoch": 0, "iter": 4567, "iter_tflops": 15.002457276943696, "iter_time": 1.3751809539794924, "loss": 0.21315476298332214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.693942751477458, "step_time": 1.1659975280761716} +{"epoch": 0, "iter": 4568, "iter_tflops": 21.314786694003658, "iter_time": 0.9679239959716798, "loss": 0.1783454716205597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.181736588836873, "step_time": 0.853168399810791} +{"epoch": 0, "iter": 4569, "iter_tflops": 16.74038762559543, "iter_time": 0.9173835372924805, "loss": 0.5070542097091675, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.632295120477423, "step_time": 0.8709788436889649} +{"epoch": 0, "iter": 4570, "iter_tflops": 13.71083581046785, "iter_time": 1.1200889739990234, "loss": 0.3814738690853119, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.87014684285913, "step_time": 0.9103273468017579} +{"epoch": 0, "iter": 4571, "iter_tflops": 23.963430788349417, "iter_time": 0.6408663330078125, "loss": 0.4083874225616455, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.59870908765421, "step_time": 0.599926971435547} +{"epoch": 0, "iter": 4572, "iter_tflops": 22.543978901292377, "iter_time": 0.6812176361083985, "loss": 0.312874436378479, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.25939789448941, "step_time": 0.6330476989746093} +{"epoch": 0, "iter": 4573, "iter_tflops": 33.44110434746805, "iter_time": 0.6169381637573242, "loss": 0.8150538206100464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84237115836426, "step_time": 0.5599827823638915} +{"epoch": 0, "iter": 4574, "iter_tflops": 34.831248749118835, "iter_time": 0.592315643310547, "loss": 0.7481977343559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14443339895058, "step_time": 0.5408677406311035} +{"epoch": 0, "iter": 4575, "iter_tflops": 39.33076247650631, "iter_time": 0.5245536117553711, "loss": 0.9115933179855347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.78399663054022, "step_time": 0.4822152004241943} +{"epoch": 0, "iter": 4576, "iter_tflops": 35.708403020758844, "iter_time": 0.577765785217285, "loss": 0.8480628728866577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80494664095318, "step_time": 0.5316614322662354} +{"epoch": 0, "iter": 4577, "iter_tflops": 34.490348008382135, "iter_time": 0.5981700592041016, "loss": 0.1899077147245407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.8325245785278, "step_time": 0.5453269042968751} +{"epoch": 0, "iter": 4578, "iter_tflops": 34.48952363836994, "iter_time": 0.5981843566894531, "loss": 0.19175809621810913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10055730083136, "step_time": 0.5414905967712402} +{"epoch": 0, "iter": 4579, "iter_tflops": 41.975210941177636, "iter_time": 0.49150660705566407, "loss": 0.2989380359649658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24598072108277, "step_time": 0.44611646652221687} +{"epoch": 0, "iter": 4580, "iter_tflops": 47.68806584590508, "iter_time": 0.4326259231567383, "loss": 0.22801262140274048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26617394569212, "step_time": 0.3947312755584717} +{"epoch": 0, "iter": 4581, "iter_tflops": 19.55286500675377, "iter_time": 1.055144271850586, "loss": 0.9004611968994141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.569301959870895, "step_time": 1.0030040664672852} +{"epoch": 0, "iter": 4582, "iter_tflops": 16.866375044538874, "iter_time": 1.223208511352539, "loss": 1.1588845252990723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.82623657563212, "step_time": 0.9452428245544434} +{"epoch": 0, "iter": 4583, "iter_tflops": 35.435146081879836, "iter_time": 0.5822212066650391, "loss": 1.000963568687439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.655875818790626, "step_time": 0.5337117080688476} +{"epoch": 0, "iter": 4584, "iter_tflops": 33.35166977856562, "iter_time": 0.6185925216674805, "loss": 0.839125394821167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.19170806635446, "step_time": 0.5700502853393554} +{"epoch": 0, "iter": 4585, "iter_tflops": 36.38581638382722, "iter_time": 0.5670092239379882, "loss": 0.6675942540168762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03468198415039, "step_time": 0.5153305206298828} +{"epoch": 0, "iter": 4586, "iter_tflops": 44.64450376912459, "iter_time": 0.46211944961547846, "loss": 0.7347275018692017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78996267424403, "step_time": 0.4228552837371826} +{"epoch": 0, "iter": 4587, "iter_tflops": 51.100356534678774, "iter_time": 0.4037367820739746, "loss": 0.8265350461006165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.57940412204582, "step_time": 0.3712003364562988} +{"epoch": 0, "iter": 4588, "iter_tflops": 44.3922442954507, "iter_time": 0.4647454490661621, "loss": 0.8520163297653198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.795643910042024, "step_time": 0.43165217208862305} +{"epoch": 0, "iter": 4589, "iter_tflops": 31.158551314726083, "iter_time": 0.6621326293945312, "loss": 0.8814113736152649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.222055257684886, "step_time": 0.6210059356689454} +{"epoch": 0, "iter": 4590, "iter_tflops": 8.92082440075469, "iter_time": 2.3126891174316406, "loss": 0.875276505947113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.365814255722364, "step_time": 1.9903012924194334} +{"epoch": 0, "iter": 4591, "iter_tflops": 13.79288612110561, "iter_time": 1.4957778472900392, "loss": 1.0157135725021362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.456644499048085, "step_time": 1.3347718200683594} +{"epoch": 0, "iter": 4592, "iter_tflops": 34.80022143380719, "iter_time": 0.5928437423706054, "loss": 0.8041135668754578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.94114668269842, "step_time": 0.543765682220459} +{"epoch": 0, "iter": 4593, "iter_tflops": 19.182401145002395, "iter_time": 0.8326055221557618, "loss": 0.37416499853134155, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 20.504418914090685, "step_time": 0.7789234695434571} +{"epoch": 0, "iter": 4594, "iter_tflops": 8.063985639101068, "iter_time": 1.980580551147461, "loss": 0.506066083908081, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 10.050718915426641, "step_time": 1.589077682495117} +{"epoch": 0, "iter": 4595, "iter_tflops": 12.138925883135446, "iter_time": 1.3157155151367188, "loss": 0.29789474606513977, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 14.130748810011102, "step_time": 1.1302566719055176} +{"epoch": 0, "iter": 4596, "iter_tflops": 23.041553804641058, "iter_time": 0.6931552124023437, "loss": 0.42830246686935425, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.851163583195817, "step_time": 0.6426810989379883} +{"epoch": 0, "iter": 4597, "iter_tflops": 13.791463092300317, "iter_time": 1.1076103515625, "loss": 0.3875206410884857, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 14.804642814533466, "step_time": 1.0318092422485352} +{"epoch": 0, "iter": 4598, "iter_tflops": 11.58990861263368, "iter_time": 1.318005844116211, "loss": 0.37761834263801575, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 14.466378621774389, "step_time": 1.0559358139038086} +{"epoch": 0, "iter": 4599, "iter_tflops": 27.624469626416598, "iter_time": 0.5529723281860351, "loss": 0.45839741826057434, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.353807513899213, "step_time": 0.5203947486877442} +{"epoch": 0, "iter": 4600, "iter_tflops": 27.672246783774522, "iter_time": 0.5520176010131836, "loss": 0.5972104072570801, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.444661422726547, "step_time": 0.5187890281677247} +{"epoch": 0, "iter": 4601, "iter_tflops": 35.17203688213658, "iter_time": 0.5865765914916993, "loss": 0.1719387173652649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64036822683805, "step_time": 0.5339259033203125} +{"epoch": 0, "iter": 4602, "iter_tflops": 49.801381930483416, "iter_time": 0.4142674903869629, "loss": 0.17827950417995453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.252825339975416, "step_time": 0.38027684974670406} +{"epoch": 0, "iter": 4603, "iter_tflops": 47.74156016727413, "iter_time": 0.4321411666870117, "loss": 0.20826150476932526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74256619173221, "step_time": 0.3987257499694824} +{"epoch": 0, "iter": 4604, "iter_tflops": 52.48470383015695, "iter_time": 0.3930877380371094, "loss": 0.2252056896686554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.87889877012216, "step_time": 0.3627196369171143} +{"epoch": 0, "iter": 4605, "iter_tflops": 33.41658800897567, "iter_time": 0.6173907852172852, "loss": 0.12329880148172379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.52058448215906, "step_time": 0.5808207778930664} +{"epoch": 0, "iter": 4606, "iter_tflops": 10.807668798672355, "iter_time": 1.908930953979492, "loss": 0.12265153229236603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.24595809182478, "step_time": 1.5575387878417968} +{"epoch": 0, "iter": 4607, "iter_tflops": 41.396290700693676, "iter_time": 0.49838024520874025, "loss": 0.176522359251976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.3861015666474, "step_time": 0.38645064735412604} +{"epoch": 0, "iter": 4608, "iter_tflops": 48.94031945369585, "iter_time": 0.42155616760253906, "loss": 0.09100313484668732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.97946943786313, "step_time": 0.38941676330566405} +{"epoch": 0, "iter": 4609, "iter_tflops": 46.699537456682876, "iter_time": 0.4417836799621582, "loss": 0.21224527060985565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.39505365439145, "step_time": 0.4014217720031738} +{"epoch": 0, "iter": 4610, "iter_tflops": 37.228948321096304, "iter_time": 0.5541680450439453, "loss": 0.2234872579574585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89950425701905, "step_time": 0.5044338283538818} +{"epoch": 0, "iter": 4611, "iter_tflops": 37.893594704221215, "iter_time": 0.5444480438232422, "loss": 0.19498685002326965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18474943538162, "step_time": 0.5009401245117188} +{"epoch": 0, "iter": 4612, "iter_tflops": 40.58701847505412, "iter_time": 0.5083175430297852, "loss": 0.16360679268836975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35379603833616, "step_time": 0.46514831542968743} +{"epoch": 0, "iter": 4613, "iter_tflops": 17.750405275352396, "iter_time": 1.1622885894775392, "loss": 0.1804523915052414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.983834532578836, "step_time": 1.0867716674804688} +{"epoch": 0, "iter": 4614, "iter_tflops": 15.25899345285169, "iter_time": 1.3520612335205078, "loss": 0.1278991550207138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.220988358866588, "step_time": 0.9722022914886475} +{"epoch": 0, "iter": 4615, "iter_tflops": 38.958499423978154, "iter_time": 0.52956591796875, "loss": 0.14441095292568207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90530279710047, "step_time": 0.48085183334350584} +{"epoch": 0, "iter": 4616, "iter_tflops": 40.43911291455773, "iter_time": 0.5101767082214356, "loss": 0.21406736969947815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55828879982067, "step_time": 0.4630135955810547} +{"epoch": 0, "iter": 4617, "iter_tflops": 19.39612259088756, "iter_time": 1.0636710205078126, "loss": 0.19822965562343597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.513698629876828, "step_time": 1.0057227554321289} +{"epoch": 0, "iter": 4618, "iter_tflops": 15.574540864607194, "iter_time": 1.3246678466796875, "loss": 0.1498539000749588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.168665883334693, "step_time": 1.0762926139831543} +{"epoch": 0, "iter": 4619, "iter_tflops": 41.85183192178765, "iter_time": 0.49295556640625005, "loss": 0.14343516528606415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.133904570198794, "step_time": 0.447200246810913} +{"epoch": 0, "iter": 4620, "iter_tflops": 41.17000430496042, "iter_time": 0.5011195373535156, "loss": 0.1201539859175682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2865851901225, "step_time": 0.45556743621826173} +{"epoch": 0, "iter": 4621, "iter_tflops": 32.22442096639938, "iter_time": 0.6402316284179688, "loss": 0.35445889830589294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.69067065888084, "step_time": 0.5780528392791748} +{"epoch": 0, "iter": 4622, "iter_tflops": 32.18923033890706, "iter_time": 0.6409315567016601, "loss": 0.36464571952819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.5242747130452, "step_time": 0.5807604427337646} +{"epoch": 0, "iter": 4623, "iter_tflops": 41.77159447033383, "iter_time": 0.49390246582031244, "loss": 0.3238602578639984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.955562390429414, "step_time": 0.4489357204437256} +{"epoch": 0, "iter": 4624, "iter_tflops": 40.9738604241683, "iter_time": 0.5035184211730956, "loss": 0.28896644711494446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97877669500233, "step_time": 0.4586850738525391} +{"epoch": 0, "iter": 4625, "iter_tflops": 18.822081163468198, "iter_time": 1.0961111755371094, "loss": 0.8837212920188904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.179216812659124, "step_time": 1.0223931732177736} +{"epoch": 0, "iter": 4626, "iter_tflops": 14.796934004368445, "iter_time": 1.394281646728516, "loss": 0.8754461407661438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.043176868991562, "step_time": 0.9804172458648682} +{"epoch": 0, "iter": 4627, "iter_tflops": 47.15644142663712, "iter_time": 0.43750318908691405, "loss": 0.840671956539154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.952360385012284, "step_time": 0.4049094753265381} +{"epoch": 0, "iter": 4628, "iter_tflops": 51.34776875870807, "iter_time": 0.40179143142700197, "loss": 0.9075955748558044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.366835016362366, "step_time": 0.37262548065185547} +{"epoch": 0, "iter": 4629, "iter_tflops": 42.01484714397951, "iter_time": 0.49104292678833006, "loss": 0.004685091320425272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.770646123153476, "step_time": 0.4507494487762451} +{"epoch": 0, "iter": 4630, "iter_tflops": 23.974433764788376, "iter_time": 0.8605456008911133, "loss": 0.007932358421385288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.826349920281604, "step_time": 0.6917069492340088} +{"epoch": 0, "iter": 4631, "iter_tflops": 47.65094811692655, "iter_time": 0.43296291732788084, "loss": 0.0036343750543892384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.971764959982934, "step_time": 0.3894734020233155} +{"epoch": 0, "iter": 4632, "iter_tflops": 48.88799558843542, "iter_time": 0.42200735092163083, "loss": 0.012526392936706543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.06014658149479, "step_time": 0.3816322154998779} +{"epoch": 0, "iter": 4633, "iter_tflops": 17.52947929223329, "iter_time": 1.176937042236328, "loss": 0.1168510913848877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.087609605915315, "step_time": 1.0808631324768065} +{"epoch": 0, "iter": 4634, "iter_tflops": 22.162970714193797, "iter_time": 0.930881233215332, "loss": 0.21663615107536316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.291364894022987, "step_time": 0.755956823348999} +{"epoch": 0, "iter": 4635, "iter_tflops": 48.44183527750196, "iter_time": 0.4258941345214844, "loss": 0.3065851032733917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.12892752464931, "step_time": 0.39577053451538086} +{"epoch": 0, "iter": 4636, "iter_tflops": 50.09526273366154, "iter_time": 0.41183721542358404, "loss": 0.19077090919017792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.70592606590314, "step_time": 0.37712721443176267} +{"epoch": 0, "iter": 4637, "iter_tflops": 26.958930266241055, "iter_time": 0.7652786407470703, "loss": 0.7498361468315125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.3714340195561, "step_time": 0.7271783828735352} +{"epoch": 0, "iter": 4638, "iter_tflops": 15.47707320950384, "iter_time": 1.333010009765625, "loss": 0.6658361554145813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.841916198828606, "step_time": 1.1563272285461426} +{"epoch": 0, "iter": 4639, "iter_tflops": 48.24995623030468, "iter_time": 0.4275878181457519, "loss": 0.7809597849845886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.253106684193085, "step_time": 0.3948299884796142} +{"epoch": 0, "iter": 4640, "iter_tflops": 49.112638794753494, "iter_time": 0.4200770721435547, "loss": 0.7172720432281494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.271565751889966, "step_time": 0.38728153038024904} +{"epoch": 0, "iter": 4641, "iter_tflops": 27.888368099427204, "iter_time": 0.7397741394042969, "loss": 0.012392126955091953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.462778508643705, "step_time": 0.7002426300048828} +{"epoch": 0, "iter": 4642, "iter_tflops": 13.714567249200291, "iter_time": 1.5043196868896485, "loss": 0.003743890905752778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.681643980596768, "step_time": 1.2367542152404785} +{"epoch": 0, "iter": 4643, "iter_tflops": 44.28907091373925, "iter_time": 0.46582809448242185, "loss": 0.00237196683883667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.277230524155335, "step_time": 0.4186739654541015} +{"epoch": 0, "iter": 4644, "iter_tflops": 44.699055417792266, "iter_time": 0.46155546951293946, "loss": 0.00742289237678051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.637745361994696, "step_time": 0.41563317108154296} +{"epoch": 0, "iter": 4645, "iter_tflops": 17.05324573945872, "iter_time": 1.2098045043945311, "loss": 0.7934309840202332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.42316505492006, "step_time": 1.119845230102539} +{"epoch": 0, "iter": 4646, "iter_tflops": 17.95204809072515, "iter_time": 1.149233413696289, "loss": 0.7255373597145081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.76973334319136, "step_time": 0.9476961975097656} +{"epoch": 0, "iter": 4647, "iter_tflops": 45.85789258637124, "iter_time": 0.4498918800354004, "loss": 0.6298006772994995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52658937000693, "step_time": 0.4165660057067871} +{"epoch": 0, "iter": 4648, "iter_tflops": 47.548979253070925, "iter_time": 0.43389140701293943, "loss": 0.6544222235679626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.1823186171969, "step_time": 0.40309024810791017} +{"epoch": 0, "iter": 4649, "iter_tflops": 35.91473458004383, "iter_time": 0.5698251495361328, "loss": 0.08251911401748657, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 38.62733845740384, "step_time": 0.5298091926574707} +{"epoch": 0, "iter": 4650, "iter_tflops": 14.718902789728999, "iter_time": 1.3903970489501953, "loss": 0.050281889736652374, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 16.69892578979889, "step_time": 1.2255350589752196} +{"epoch": 0, "iter": 4651, "iter_tflops": 41.12305598038319, "iter_time": 0.49765559768676754, "loss": 0.030259396880865097, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 45.49029009439726, "step_time": 0.44987884140014645} +{"epoch": 0, "iter": 4652, "iter_tflops": 41.82323060378023, "iter_time": 0.4893242034912109, "loss": 0.022383010014891624, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 46.11998332368882, "step_time": 0.443736478805542} +{"epoch": 0, "iter": 4653, "iter_tflops": 32.13099933891254, "iter_time": 0.6420931167602539, "loss": 0.18879234790802002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.70867226601774, "step_time": 0.5777614288330077} +{"epoch": 0, "iter": 4654, "iter_tflops": 37.829551228259255, "iter_time": 0.5453697662353516, "loss": 0.06530127674341202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78714246080016, "step_time": 0.49371869659423834} +{"epoch": 0, "iter": 4655, "iter_tflops": 43.28567994465706, "iter_time": 0.47662630081176754, "loss": 0.12982021272182465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.247741022400604, "step_time": 0.43665777587890625} +{"epoch": 0, "iter": 4656, "iter_tflops": 46.30014924178616, "iter_time": 0.4455945358276367, "loss": 0.1541411578655243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75963132822037, "step_time": 0.4064468746185303} +{"epoch": 0, "iter": 4657, "iter_tflops": 19.445883022198974, "iter_time": 1.060949172973633, "loss": 0.35766854882240295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.174757824388855, "step_time": 0.9743248863220215} +{"epoch": 0, "iter": 4658, "iter_tflops": 44.811511225376265, "iter_time": 0.4603971824645996, "loss": 0.36587950587272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.58009603449379, "step_time": 0.42468202400207516} +{"epoch": 0, "iter": 4659, "iter_tflops": 51.259915513899074, "iter_time": 0.4024800529479981, "loss": 0.3031151294708252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.696347175856204, "step_time": 0.3704209442138672} +{"epoch": 0, "iter": 4660, "iter_tflops": 48.73870961117354, "iter_time": 0.4232999534606934, "loss": 0.34467989206314087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90093184367969, "step_time": 0.3899948978424072} +{"epoch": 0, "iter": 4661, "iter_tflops": 46.89338237788536, "iter_time": 0.439957462310791, "loss": 0.15441903471946716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.720711323877744, "step_time": 0.3988942337036133} +{"epoch": 0, "iter": 4662, "iter_tflops": 47.45632299857215, "iter_time": 0.4347385597229004, "loss": 0.16422700881958008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.48913312449382, "step_time": 0.39305456733703614} +{"epoch": 0, "iter": 4663, "iter_tflops": 51.82940376447022, "iter_time": 0.3980577049255371, "loss": 0.06088457256555557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.19505269027188, "step_time": 0.36713362693786616} +{"epoch": 0, "iter": 4664, "iter_tflops": 53.54232772700962, "iter_time": 0.38532305908203124, "loss": 0.12657691538333893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.886753440680856, "step_time": 0.35640439796447754} +{"epoch": 0, "iter": 4665, "iter_tflops": 29.501413568937412, "iter_time": 0.6993255920410156, "loss": 0.6908861398696899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.421326754378644, "step_time": 0.6565952377319335} +{"epoch": 0, "iter": 4666, "iter_tflops": 17.419616463694492, "iter_time": 1.1843598022460937, "loss": 0.7382034063339233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.48859419724964, "step_time": 0.8783451805114746} +{"epoch": 0, "iter": 4667, "iter_tflops": 39.22487845913637, "iter_time": 0.525969596862793, "loss": 0.6907106041908264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.810422900823134, "step_time": 0.4819175357818604} +{"epoch": 0, "iter": 4668, "iter_tflops": 41.34593088920384, "iter_time": 0.4989872779846191, "loss": 0.5591379404067993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08763189592183, "step_time": 0.4575776691436768} +{"epoch": 0, "iter": 4669, "iter_tflops": 16.909305686383323, "iter_time": 1.2201029357910156, "loss": 1.083762288093567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.98518315757703, "step_time": 1.1471161193847657} +{"epoch": 0, "iter": 4670, "iter_tflops": 13.58603221088141, "iter_time": 1.518551788330078, "loss": 0.932228147983551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.06702973565499, "step_time": 1.2088274192810058} +{"epoch": 0, "iter": 4671, "iter_tflops": 35.861472087534985, "iter_time": 0.5752996826171876, "loss": 0.7105681896209717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80681623197301, "step_time": 0.5316358184814454} +{"epoch": 0, "iter": 4672, "iter_tflops": 32.826114911822444, "iter_time": 0.6284963531494141, "loss": 0.6932371258735657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.60324875802078, "step_time": 0.5794722175598144} +{"epoch": 0, "iter": 4673, "iter_tflops": 36.66436260436572, "iter_time": 0.562701545715332, "loss": 0.0044878143817186356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.95047204706115, "step_time": 0.5038059997558593} +{"epoch": 0, "iter": 4674, "iter_tflops": 40.85606609044203, "iter_time": 0.5049701423645019, "loss": 0.01663103885948658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.76813889570603, "step_time": 0.4507741413116455} +{"epoch": 0, "iter": 4675, "iter_tflops": 46.478101862549536, "iter_time": 0.4438884696960449, "loss": 0.005205201916396618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31775124561423, "step_time": 0.4020264530181884} +{"epoch": 0, "iter": 4676, "iter_tflops": 39.95688940928692, "iter_time": 0.5163338241577148, "loss": 0.011003081686794758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49187058216842, "step_time": 0.4637047901153565} +{"epoch": 0, "iter": 4677, "iter_tflops": 20.13733353400177, "iter_time": 1.024519630432129, "loss": 0.09881393611431122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.534447755467344, "step_time": 0.9580507354736327} +{"epoch": 0, "iter": 4678, "iter_tflops": 20.310093864993114, "iter_time": 1.015804931640625, "loss": 0.12248008698225021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.59836957157437, "step_time": 0.7475475482940673} +{"epoch": 0, "iter": 4679, "iter_tflops": 55.955733512336934, "iter_time": 0.36870383453369143, "loss": 0.07003556936979294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.18165646416481, "step_time": 0.3372104434967041} +{"epoch": 0, "iter": 4680, "iter_tflops": 50.17117149724812, "iter_time": 0.41121410751342774, "loss": 0.06270971894264221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.77086553080932, "step_time": 0.3766800708770752} +{"epoch": 0, "iter": 4681, "iter_tflops": 40.1380300369154, "iter_time": 0.5140036392211914, "loss": 0.14655360579490662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.289592629802975, "step_time": 0.4765832214355469} +{"epoch": 0, "iter": 4682, "iter_tflops": 14.911967746433051, "iter_time": 1.383525894165039, "loss": 0.12657739222049713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.6649343573834, "step_time": 1.167912265777588} +{"epoch": 0, "iter": 4683, "iter_tflops": 40.25186855894911, "iter_time": 0.5125499572753907, "loss": 0.09816117584705353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42139835795843, "step_time": 0.46444043350219727} +{"epoch": 0, "iter": 4684, "iter_tflops": 46.14682562578409, "iter_time": 0.4470750312805175, "loss": 0.06683138012886047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.65121841270524, "step_time": 0.4073168258666992} +{"epoch": 0, "iter": 4685, "iter_tflops": 24.39574424762302, "iter_time": 0.8456841201782226, "loss": 0.10605396330356598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.082395824561463, "step_time": 0.7909968719482421} +{"epoch": 0, "iter": 4686, "iter_tflops": 22.095512942165982, "iter_time": 0.9337232208251953, "loss": 0.16579166054725647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.536727256724575, "step_time": 0.7492209701538085} +{"epoch": 0, "iter": 4687, "iter_tflops": 52.02369560593416, "iter_time": 0.3965710868835449, "loss": 0.11655297130346298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.69271358223763, "step_time": 0.3639108486175537} +{"epoch": 0, "iter": 4688, "iter_tflops": 53.736513076990676, "iter_time": 0.38393063354492185, "loss": 0.12216436862945557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.6050983250961, "step_time": 0.35203581428527836} +{"epoch": 0, "iter": 4689, "iter_tflops": 27.97609871018249, "iter_time": 0.4557275276184082, "loss": 0.05258515104651451, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 30.428010910914875, "step_time": 0.4190046577453613} +{"epoch": 0, "iter": 4690, "iter_tflops": 6.288863525530198, "iter_time": 2.0273103790283202, "loss": 0.1101735308766365, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 7.027447546124718, "step_time": 1.8142402648925777} +{"epoch": 0, "iter": 4691, "iter_tflops": 5.274756312335957, "iter_time": 2.417074371337891, "loss": 0.09668640792369843, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 6.067078704180858, "step_time": 2.1014196319580076} +{"epoch": 0, "iter": 4692, "iter_tflops": 18.467661178680075, "iter_time": 0.6903677825927733, "loss": 0.08048567920923233, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 33.466156001227574, "step_time": 0.3809663200378418} +{"epoch": 0, "iter": 4693, "iter_tflops": 23.874379120619476, "iter_time": 0.7033361587524414, "loss": 0.5368192195892334, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 25.39197000430666, "step_time": 0.6613001708984374} +{"epoch": 0, "iter": 4694, "iter_tflops": 15.99799311577729, "iter_time": 1.049613784790039, "loss": 0.4442894756793976, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 17.66293154472013, "step_time": 0.9506753768920897} +{"epoch": 0, "iter": 4695, "iter_tflops": 30.130767568705565, "iter_time": 0.5572946014404296, "loss": 0.5613682270050049, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 32.169357760490776, "step_time": 0.521978530883789} +{"epoch": 0, "iter": 4696, "iter_tflops": 29.483352291173986, "iter_time": 0.5695320510864258, "loss": 0.5169864296913147, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 31.411435253487806, "step_time": 0.5345732841491699} +{"epoch": 0, "iter": 4697, "iter_tflops": 24.617325812977672, "iter_time": 0.8380720825195312, "loss": 0.9171178340911865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.876999509481603, "step_time": 0.7972753372192383} +{"epoch": 0, "iter": 4698, "iter_tflops": 16.343820146920603, "iter_time": 1.2623177032470703, "loss": 0.9062162041664124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.991813363221688, "step_time": 0.8973234596252443} +{"epoch": 0, "iter": 4699, "iter_tflops": 38.82617065394447, "iter_time": 0.5313708038330077, "loss": 0.9354645609855652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53075173810313, "step_time": 0.48508650016784666} +{"epoch": 0, "iter": 4700, "iter_tflops": 39.334509176484595, "iter_time": 0.5245036468505859, "loss": 0.8317376971244812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.642642867154194, "step_time": 0.4838136692047119} +{"epoch": 0, "iter": 4701, "iter_tflops": 23.69805735849677, "iter_time": 0.8705816345214844, "loss": 0.2580418586730957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.403102642137252, "step_time": 0.8121485710144043} +{"epoch": 0, "iter": 4702, "iter_tflops": 37.85860507181451, "iter_time": 0.5449512329101562, "loss": 0.25780200958251953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42843746361487, "step_time": 0.49799352264404295} +{"epoch": 0, "iter": 4703, "iter_tflops": 38.14392413259854, "iter_time": 0.5408749618530274, "loss": 0.2055911123752594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.26157221429945, "step_time": 0.5000074501037597} +{"epoch": 0, "iter": 4704, "iter_tflops": 38.89623743795156, "iter_time": 0.5304136047363281, "loss": 0.26526281237602234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.350264947091986, "step_time": 0.4871538238525391} +{"epoch": 0, "iter": 4705, "iter_tflops": 15.742612220491567, "iter_time": 1.310525421142578, "loss": 0.3540728688240051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.657656548429802, "step_time": 1.2385351715087891} +{"epoch": 0, "iter": 4706, "iter_tflops": 28.607058984395547, "iter_time": 0.7211889038085938, "loss": 0.5496546030044556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.360333981969475, "step_time": 0.5241595134735106} +{"epoch": 0, "iter": 4707, "iter_tflops": 51.5948353502333, "iter_time": 0.3998674163818359, "loss": 0.6128315329551697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.04665076297955, "step_time": 0.36810573387145995} +{"epoch": 0, "iter": 4708, "iter_tflops": 49.75317806797016, "iter_time": 0.41466885757446287, "loss": 0.41481760144233704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82275211600486, "step_time": 0.3833154697418213} +{"epoch": 0, "iter": 4709, "iter_tflops": 13.042031052483068, "iter_time": 0.6527200088500976, "loss": 0.007432899437844753, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 13.892490586782253, "step_time": 0.6127623100280761} +{"epoch": 0, "iter": 4710, "iter_tflops": 19.146583467329176, "iter_time": 0.44461167907714844, "loss": 0.002257091458886862, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 22.013459266769445, "step_time": 0.38670862770080566} +{"epoch": 0, "iter": 4711, "iter_tflops": 22.293826270780137, "iter_time": 0.38184538269042967, "loss": 0.0007182105910032988, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 24.531043058421247, "step_time": 0.3470213069915771} +{"epoch": 0, "iter": 4712, "iter_tflops": 18.291065501231067, "iter_time": 0.46540725708007813, "loss": 0.01629718951880932, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 20.294707720127466, "step_time": 0.4194588432312012} +{"epoch": 0, "iter": 4713, "iter_tflops": 18.65332811623738, "iter_time": 1.1060274810791015, "loss": 0.08778820931911469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.729748142813655, "step_time": 1.045684585571289} +{"epoch": 0, "iter": 4714, "iter_tflops": 19.098450034635416, "iter_time": 1.0802496261596681, "loss": 0.019456686452031136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.417145772653505, "step_time": 0.8116998538970948} +{"epoch": 0, "iter": 4715, "iter_tflops": 44.48578549623443, "iter_time": 0.46376821899414056, "loss": 0.042273491621017456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.20215313857533, "step_time": 0.4193128185272217} +{"epoch": 0, "iter": 4716, "iter_tflops": 43.93630403570583, "iter_time": 0.46956825256347656, "loss": 0.08330465853214264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47115877292345, "step_time": 0.4256364822387695} +{"epoch": 0, "iter": 4717, "iter_tflops": 21.757956874603952, "iter_time": 0.9482091369628907, "loss": 0.8523772358894348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.12972516392505, "step_time": 0.8919731369018555} +{"epoch": 0, "iter": 4718, "iter_tflops": 13.944591470608376, "iter_time": 1.4795050506591796, "loss": 0.9068567156791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.608612266425325, "step_time": 1.1716478958129883} +{"epoch": 0, "iter": 4719, "iter_tflops": 34.31579181310051, "iter_time": 0.6012128067016601, "loss": 0.9175214767456055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39287826338078, "step_time": 0.5517385787963867} +{"epoch": 0, "iter": 4720, "iter_tflops": 35.004940190845666, "iter_time": 0.5893766250610352, "loss": 0.6045447587966919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92855304885202, "step_time": 0.5439462318420409} +{"epoch": 0, "iter": 4721, "iter_tflops": 19.8139338160361, "iter_time": 1.0412416687011719, "loss": 0.02749570459127426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.849577748440467, "step_time": 0.9895209274291994} +{"epoch": 0, "iter": 4722, "iter_tflops": 44.31954154127429, "iter_time": 0.4655078277587891, "loss": 0.05452142283320427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8499816746611, "step_time": 0.40572469902038577} +{"epoch": 0, "iter": 4723, "iter_tflops": 53.305485648623026, "iter_time": 0.38703509140014647, "loss": 0.018805278465151787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.56903883135147, "step_time": 0.35225255393981936} +{"epoch": 0, "iter": 4724, "iter_tflops": 57.16713731433125, "iter_time": 0.36089079284667963, "loss": 0.043213073164224625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.395345704075154, "step_time": 0.33065116119384774} +{"epoch": 0, "iter": 4725, "iter_tflops": 41.95144218680958, "iter_time": 0.491785083770752, "loss": 0.6943466663360596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.520333004816045, "step_time": 0.4532280883789062} +{"epoch": 0, "iter": 4726, "iter_tflops": 44.21441109928488, "iter_time": 0.46661468505859377, "loss": 0.9648231863975525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82181647480548, "step_time": 0.43141593170166015} +{"epoch": 0, "iter": 4727, "iter_tflops": 48.21864948079855, "iter_time": 0.42786543655395504, "loss": 0.9103425145149231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40207689105087, "step_time": 0.3937075538635254} +{"epoch": 0, "iter": 4728, "iter_tflops": 45.19068084073858, "iter_time": 0.45653424835205075, "loss": 0.8019387722015381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.003608667707674, "step_time": 0.42101171875000004} +{"epoch": 0, "iter": 4729, "iter_tflops": 27.00442231944304, "iter_time": 0.7639894409179688, "loss": 0.6685325503349304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.57226422478302, "step_time": 0.7220671539306641} +{"epoch": 0, "iter": 4730, "iter_tflops": 13.084647594997074, "iter_time": 1.576740478515625, "loss": 0.4306945204734802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.18925013992849, "step_time": 1.2743699264526367} +{"epoch": 0, "iter": 4731, "iter_tflops": 37.516981210126005, "iter_time": 0.549913475036621, "loss": 0.37779825925827026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32065875128354, "step_time": 0.4874946212768555} +{"epoch": 0, "iter": 4732, "iter_tflops": 38.875694749937004, "iter_time": 0.5306938858032226, "loss": 0.40556854009628296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41887600052243, "step_time": 0.48636586952209476} +{"epoch": 0, "iter": 4733, "iter_tflops": 32.428087034919095, "iter_time": 0.6362106246948243, "loss": 0.7526957392692566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.62708244231236, "step_time": 0.5790845642089844} +{"epoch": 0, "iter": 4734, "iter_tflops": 37.70648756654959, "iter_time": 0.5471497039794921, "loss": 0.9348726868629456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18282073190272, "step_time": 0.5009635848999023} +{"epoch": 0, "iter": 4735, "iter_tflops": 37.736733359008745, "iter_time": 0.5467111663818359, "loss": 0.8755850195884705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24095927859879, "step_time": 0.5002573623657227} +{"epoch": 0, "iter": 4736, "iter_tflops": 34.71088866048074, "iter_time": 0.5943694992065429, "loss": 0.7850351929664612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6165060968493, "step_time": 0.5484585266113281} +{"epoch": 0, "iter": 4737, "iter_tflops": 29.101138165049054, "iter_time": 0.7089445571899415, "loss": 0.17711859941482544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.27285878068885, "step_time": 0.6597124252319335} +{"epoch": 0, "iter": 4738, "iter_tflops": 10.700883977122423, "iter_time": 1.9279803009033203, "loss": 0.09224164485931396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.846519592802442, "step_time": 1.4899840621948244} +{"epoch": 0, "iter": 4739, "iter_tflops": 14.860044556424551, "iter_time": 1.388360137939453, "loss": 0.12693849205970764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.9028440091585, "step_time": 1.0914280147552489} +{"epoch": 0, "iter": 4740, "iter_tflops": 42.15375377081268, "iter_time": 0.48942482376098634, "loss": 0.13220149278640747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20069251460115, "step_time": 0.4465537719726563} +{"epoch": 0, "iter": 4741, "iter_tflops": 13.949458164701818, "iter_time": 1.1009284973144533, "loss": 0.29141873121261597, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.912268511586129, "step_time": 1.029847068786621} +{"epoch": 0, "iter": 4742, "iter_tflops": 14.679343960905124, "iter_time": 1.0461881713867187, "loss": 0.5514012575149536, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.376954548589225, "step_time": 0.8837771873474121} +{"epoch": 0, "iter": 4743, "iter_tflops": 25.808292754317108, "iter_time": 0.5950550918579102, "loss": 0.3976254463195801, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.38411618430141, "step_time": 0.5608125495910644} +{"epoch": 0, "iter": 4744, "iter_tflops": 28.467583036340827, "iter_time": 0.5394682083129883, "loss": 0.2820879817008972, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.25699260726255, "step_time": 0.5075638618469238} +{"epoch": 0, "iter": 4745, "iter_tflops": 21.623441837743318, "iter_time": 0.6027612838745118, "loss": 0.060087937861680984, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 23.247833202987014, "step_time": 0.560644660949707} +{"epoch": 0, "iter": 4746, "iter_tflops": 9.80241657308738, "iter_time": 1.3296490173339843, "loss": 0.06880287081003189, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 10.939779062415301, "step_time": 1.1914110412597656} +{"epoch": 0, "iter": 4747, "iter_tflops": 31.786877562323255, "iter_time": 0.4100362968444824, "loss": 0.024184608832001686, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 35.22127963709879, "step_time": 0.3700539474487305} +{"epoch": 0, "iter": 4748, "iter_tflops": 34.10311797840968, "iter_time": 0.3821871528625488, "loss": 0.02351563610136509, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 37.12010819578419, "step_time": 0.35112434196472164} +{"epoch": 0, "iter": 4749, "iter_tflops": 34.72151077270006, "iter_time": 0.5941876678466796, "loss": 0.8105750679969788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.99677616180377, "step_time": 0.5576457099914551} +{"epoch": 0, "iter": 4750, "iter_tflops": 15.665541412450214, "iter_time": 1.3169729003906252, "loss": 0.8647734522819519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.250568090481472, "step_time": 1.0187908515930177} +{"epoch": 0, "iter": 4751, "iter_tflops": 43.44329100056511, "iter_time": 0.47489711380004884, "loss": 0.8691025972366333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67833125319137, "step_time": 0.44198438453674316} +{"epoch": 0, "iter": 4752, "iter_tflops": 47.90445437205676, "iter_time": 0.4306717147827149, "loss": 0.8856309652328491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.61919868505509, "step_time": 0.3996786861419678} +{"epoch": 0, "iter": 4753, "iter_tflops": 23.59808762827224, "iter_time": 0.8742697219848632, "loss": 0.9283500909805298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.813922911571154, "step_time": 0.8314321594238281} +{"epoch": 0, "iter": 4754, "iter_tflops": 25.963320824959876, "iter_time": 0.7946246032714843, "loss": 0.8106658458709717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.734613748621737, "step_time": 0.6501132698059082} +{"epoch": 0, "iter": 4755, "iter_tflops": 41.32294756710822, "iter_time": 0.4992648086547852, "loss": 0.9246610999107361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.232573829228194, "step_time": 0.4664230842590332} +{"epoch": 0, "iter": 4756, "iter_tflops": 46.99462812993133, "iter_time": 0.4390096130371094, "loss": 0.7578451633453369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.4945757753454, "step_time": 0.4085803909301758} +{"epoch": 0, "iter": 4757, "iter_tflops": 43.99375293744125, "iter_time": 0.4689550704956054, "loss": 0.31473177671432495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.02704205249131, "step_time": 0.4295724372863769} +{"epoch": 0, "iter": 4758, "iter_tflops": 46.10680448521263, "iter_time": 0.4474630966186523, "loss": 0.2433534562587738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.033530190982404, "step_time": 0.4042654590606689} +{"epoch": 0, "iter": 4759, "iter_tflops": 42.6503714885118, "iter_time": 0.4837259979248047, "loss": 0.38529738783836365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.679397280258605, "step_time": 0.44197429084777834} +{"epoch": 0, "iter": 4760, "iter_tflops": 39.010388871970086, "iter_time": 0.5288615188598633, "loss": 0.3404971957206726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89293571348184, "step_time": 0.48099047470092776} +{"epoch": 0, "iter": 4761, "iter_tflops": 31.046778395309573, "iter_time": 0.6645164031982421, "loss": 0.8860909938812256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.864118762028966, "step_time": 0.6092316665649414} +{"epoch": 0, "iter": 4762, "iter_tflops": 46.68685498950588, "iter_time": 0.44190369033813476, "loss": 0.8753108382225037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.72299617339296, "step_time": 0.4067404346466065} +{"epoch": 0, "iter": 4763, "iter_tflops": 47.53695939581812, "iter_time": 0.43400111770629884, "loss": 0.6978133916854858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.16417737977798, "step_time": 0.4032331714630127} +{"epoch": 0, "iter": 4764, "iter_tflops": 47.698193432192426, "iter_time": 0.432534065246582, "loss": 0.8795652985572815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.318125216204564, "step_time": 0.40202352333068847} +{"epoch": 0, "iter": 4765, "iter_tflops": 43.35453206894517, "iter_time": 0.47586936187744144, "loss": 0.2912655472755432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19455772503321, "step_time": 0.4371498432159424} +{"epoch": 0, "iter": 4766, "iter_tflops": 47.178077992433685, "iter_time": 0.43730254364013676, "loss": 0.20051945745944977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.15899782833104, "step_time": 0.3955423679351807} +{"epoch": 0, "iter": 4767, "iter_tflops": 49.21482954466925, "iter_time": 0.4192048149108887, "loss": 0.20736633241176605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28873938275251, "step_time": 0.3871567192077636} +{"epoch": 0, "iter": 4768, "iter_tflops": 51.06232653350949, "iter_time": 0.4040374755859375, "loss": 0.2223365753889084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.282193127732825, "step_time": 0.3731960029602051} +{"epoch": 0, "iter": 4769, "iter_tflops": 25.085108468096458, "iter_time": 0.822443862915039, "loss": 0.6889702677726746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.34641948240016, "step_time": 0.7830701065063477} +{"epoch": 0, "iter": 4770, "iter_tflops": 13.671559573750619, "iter_time": 1.5090519409179688, "loss": 0.8231952786445618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.155897897870883, "step_time": 1.2770007362365723} +{"epoch": 0, "iter": 4771, "iter_tflops": 42.65322320217467, "iter_time": 0.4836936569213867, "loss": 0.7086651921272278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.26698362643711, "step_time": 0.44591395187377925} +{"epoch": 0, "iter": 4772, "iter_tflops": 41.978745293566114, "iter_time": 0.4914652252197266, "loss": 0.7858056426048279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.06767852380808, "step_time": 0.457780258178711} +{"epoch": 0, "iter": 4773, "iter_tflops": 5.3967050044265665, "iter_time": 0.6261123809814453, "loss": 0.04794718325138092, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 5.780688627261538, "step_time": 0.5845227165222168} +{"epoch": 0, "iter": 4774, "iter_tflops": 3.397607460900386, "iter_time": 0.9945068283081056, "loss": 0.05907148867845535, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 3.7958672384626344, "step_time": 0.8901638565063477} +{"epoch": 0, "iter": 4775, "iter_tflops": 6.048677523367786, "iter_time": 0.5586252212524414, "loss": 0.16263611614704132, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 6.65587092457398, "step_time": 0.5076636638641356} +{"epoch": 0, "iter": 4776, "iter_tflops": 6.266340837153618, "iter_time": 0.5392211990356446, "loss": 0.23958761990070343, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 6.8358211481595355, "step_time": 0.49429962348937984} +{"epoch": 0, "iter": 4777, "iter_tflops": 23.007540649565627, "iter_time": 0.896710075378418, "loss": 0.07246352732181549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.629591305404634, "step_time": 0.8376547241210938} +{"epoch": 0, "iter": 4778, "iter_tflops": 10.390993955300226, "iter_time": 1.9854783477783204, "loss": 0.035797569900751114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.876004731350985, "step_time": 1.7372082595825193} +{"epoch": 0, "iter": 4779, "iter_tflops": 12.131577507907274, "iter_time": 1.7006109466552732, "loss": 0.08849498629570007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.05500261580127, "step_time": 1.37038126373291} +{"epoch": 0, "iter": 4780, "iter_tflops": 33.85130283955074, "iter_time": 0.6094623184204102, "loss": 0.04346587508916855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.66230911340208, "step_time": 0.5336229000091552} +{"epoch": 0, "iter": 4781, "iter_tflops": 14.366779877516304, "iter_time": 1.1059849853515626, "loss": 0.48686450719833374, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 15.409862247122794, "step_time": 1.0311216659545899} +{"epoch": 0, "iter": 4782, "iter_tflops": 11.150045515577022, "iter_time": 1.4250563201904298, "loss": 0.44326913356781006, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.169262539193257, "step_time": 1.206555248260498} +{"epoch": 0, "iter": 4783, "iter_tflops": 22.621499841267845, "iter_time": 0.7024044799804687, "loss": 0.30933746695518494, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 24.274121354825215, "step_time": 0.6545836448669434} +{"epoch": 0, "iter": 4784, "iter_tflops": 24.164796375918616, "iter_time": 0.6575450744628906, "loss": 0.5127996802330017, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.81123169513245, "step_time": 0.6156018829345703} +{"epoch": 0, "iter": 4785, "iter_tflops": 20.301005010905378, "iter_time": 1.016259712219238, "loss": 0.4186848998069763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.95121922135327, "step_time": 0.939860939025879} +{"epoch": 0, "iter": 4786, "iter_tflops": 18.176118053422385, "iter_time": 1.1350659942626955, "loss": 0.47731655836105347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.982252360938222, "step_time": 0.9385341033935548} +{"epoch": 0, "iter": 4787, "iter_tflops": 47.7155208176802, "iter_time": 0.43237699508666994, "loss": 0.39506810903549194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75952661420208, "step_time": 0.39859509658813475} +{"epoch": 0, "iter": 4788, "iter_tflops": 48.071967913659485, "iter_time": 0.42917097854614256, "loss": 0.4397444725036621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16704356982814, "step_time": 0.39548136329650885} +{"epoch": 0, "iter": 4789, "iter_tflops": 40.7461036950629, "iter_time": 0.5063329162597656, "loss": 0.03342391550540924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.845133084846815, "step_time": 0.4600520076751709} +{"epoch": 0, "iter": 4790, "iter_tflops": 10.894798571434057, "iter_time": 1.8936645202636717, "loss": 0.034110989421606064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.055814721069227, "step_time": 1.7112981567382815} +{"epoch": 0, "iter": 4791, "iter_tflops": 11.976943825236152, "iter_time": 1.7225674438476564, "loss": 0.020517900586128235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.934028660478903, "step_time": 1.4806266021728516} +{"epoch": 0, "iter": 4792, "iter_tflops": 19.626089262501353, "iter_time": 1.051207565307617, "loss": 0.03922082856297493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.322674293639952, "step_time": 0.7550905628204345} +{"epoch": 0, "iter": 4793, "iter_tflops": 18.296038511552304, "iter_time": 0.8527985687255859, "loss": 0.5589280724525452, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 19.252876631300435, "step_time": 0.8104158020019532} +{"epoch": 0, "iter": 4794, "iter_tflops": 8.265075926412765, "iter_time": 1.8878030395507812, "loss": 0.47830483317375183, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 11.169788462122531, "step_time": 1.396878330230713} +{"epoch": 0, "iter": 4795, "iter_tflops": 9.85156869291447, "iter_time": 1.583791976928711, "loss": 0.33188962936401367, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 11.148096751576913, "step_time": 1.3995963439941408} +{"epoch": 0, "iter": 4796, "iter_tflops": 11.767367871958841, "iter_time": 1.3259409942626954, "loss": 0.3422406315803528, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.953120215018679, "step_time": 1.1182327117919924} +{"epoch": 0, "iter": 4797, "iter_tflops": 14.247671676050913, "iter_time": 1.0664056396484374, "loss": 0.3896980881690979, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.282898854291107, "step_time": 0.9941698608398438} +{"epoch": 0, "iter": 4798, "iter_tflops": 14.879224409271222, "iter_time": 1.021141761779785, "loss": 0.39459607005119324, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 16.566580031292936, "step_time": 0.9171354255676271} +{"epoch": 0, "iter": 4799, "iter_tflops": 27.713202710755834, "iter_time": 0.5482512283325196, "loss": 0.3327348232269287, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.577393564785147, "step_time": 0.5136962928771972} +{"epoch": 0, "iter": 4800, "iter_tflops": 25.66509218323382, "iter_time": 0.5920024490356445, "loss": 0.44754818081855774, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.200468117089446, "step_time": 0.558585880279541} +{"epoch": 0, "iter": 4801, "iter_tflops": 37.75448876569132, "iter_time": 0.5464540557861328, "loss": 0.003065818687900901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.311865458841076, "step_time": 0.49939873886108394} +{"epoch": 0, "iter": 4802, "iter_tflops": 23.85536782215642, "iter_time": 0.864840721130371, "loss": 0.003647761419415474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.60664352126195, "step_time": 0.6138992576599122} +{"epoch": 0, "iter": 4803, "iter_tflops": 56.11686582374414, "iter_time": 0.36764514923095704, "loss": 0.03530814126133919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.40171441763254, "step_time": 0.3360019130706787} +{"epoch": 0, "iter": 4804, "iter_tflops": 59.21134846253467, "iter_time": 0.3484314079284667, "loss": 0.008161697536706924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.66789870584793, "step_time": 0.31903145027160645} +{"epoch": 0, "iter": 4805, "iter_tflops": 40.043910199361285, "iter_time": 0.5152117614746095, "loss": 0.8928536176681519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46238155135308, "step_time": 0.4746885185241699} +{"epoch": 0, "iter": 4806, "iter_tflops": 11.437832302344583, "iter_time": 1.803759048461914, "loss": 0.8304856419563293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.192629131639437, "step_time": 1.357967296600342} +{"epoch": 0, "iter": 4807, "iter_tflops": 11.425224953920873, "iter_time": 1.805749435424805, "loss": 1.054691195487976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.189235258740542, "step_time": 1.5642372817993164} +{"epoch": 0, "iter": 4808, "iter_tflops": 19.348632236882132, "iter_time": 1.066281753540039, "loss": 0.923458456993103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.152778885230052, "step_time": 0.8910849800109863} +{"epoch": 0, "iter": 4809, "iter_tflops": 16.894391732400276, "iter_time": 0.8534148712158203, "loss": 0.35860034823417664, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 18.31557056304424, "step_time": 0.7871949768066406} +{"epoch": 0, "iter": 4810, "iter_tflops": 20.32330337174595, "iter_time": 0.7094282302856445, "loss": 0.4564515948295593, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 21.970228479884003, "step_time": 0.6562483024597168} +{"epoch": 0, "iter": 4811, "iter_tflops": 23.331972656550246, "iter_time": 0.6179471130371095, "loss": 0.3908507525920868, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 25.011202839003428, "step_time": 0.5764586868286133} +{"epoch": 0, "iter": 4812, "iter_tflops": 20.593563423485246, "iter_time": 0.700118034362793, "loss": 0.4474687874317169, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 22.21749183141339, "step_time": 0.6489447708129883} +{"epoch": 0, "iter": 4813, "iter_tflops": 27.358724016254822, "iter_time": 0.754095603942871, "loss": 0.05878297984600067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61009474204045, "step_time": 0.673996395111084} +{"epoch": 0, "iter": 4814, "iter_tflops": 41.62616379303047, "iter_time": 0.4956280288696289, "loss": 0.09014962613582611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8564997912326, "step_time": 0.4403037700653075} +{"epoch": 0, "iter": 4815, "iter_tflops": 44.19107101964455, "iter_time": 0.4668611335754395, "loss": 0.16602492332458496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70455536046732, "step_time": 0.42359679412841794} +{"epoch": 0, "iter": 4816, "iter_tflops": 44.55103297406059, "iter_time": 0.4630890045166015, "loss": 0.07028653472661972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.805367877665795, "step_time": 0.42272181129455566} +{"epoch": 0, "iter": 4817, "iter_tflops": 34.13822592967515, "iter_time": 0.604339942932129, "loss": 0.05565262585878372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17614201393923, "step_time": 0.5404185028076172} +{"epoch": 0, "iter": 4818, "iter_tflops": 51.107614357308584, "iter_time": 0.4036794471740723, "loss": 0.03720247745513916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.17322558044207, "step_time": 0.3672762832641601} +{"epoch": 0, "iter": 4819, "iter_tflops": 53.46780108562126, "iter_time": 0.3858601455688476, "loss": 0.03465650603175163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.88684359984954, "step_time": 0.3503514919281006} +{"epoch": 0, "iter": 4820, "iter_tflops": 57.300106615840285, "iter_time": 0.3600533180236817, "loss": 0.040271688252687454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.44599802862338, "step_time": 0.3303829574584961} +{"epoch": 0, "iter": 4821, "iter_tflops": 24.031449354227313, "iter_time": 0.8585039215087892, "loss": 0.7798146605491638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.146675966437922, "step_time": 0.8204302444458008} +{"epoch": 0, "iter": 4822, "iter_tflops": 14.938607080068772, "iter_time": 1.3810587158203127, "loss": 0.9691309928894043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.370594396423147, "step_time": 1.1877022190093993} +{"epoch": 0, "iter": 4823, "iter_tflops": 36.2646959931726, "iter_time": 0.568902976989746, "loss": 0.8606839776039124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.28187687499536, "step_time": 0.5252064094543457} +{"epoch": 0, "iter": 4824, "iter_tflops": 35.818303832385205, "iter_time": 0.5759930343627929, "loss": 0.8562331795692444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8369094072931, "step_time": 0.5312238750457764} +{"epoch": 0, "iter": 4825, "iter_tflops": 33.5247993091519, "iter_time": 0.6153979721069336, "loss": 0.7920799851417542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.89149984473254, "step_time": 0.5592370491027832} +{"epoch": 0, "iter": 4826, "iter_tflops": 45.98103923210926, "iter_time": 0.44868697738647456, "loss": 0.9467153549194336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.90875923293106, "step_time": 0.41337620544433595} +{"epoch": 0, "iter": 4827, "iter_tflops": 48.007072258149186, "iter_time": 0.42975112915039065, "loss": 0.7942693829536438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.782517925802836, "step_time": 0.39841812133789056} +{"epoch": 0, "iter": 4828, "iter_tflops": 49.248158491700394, "iter_time": 0.4189211158752441, "loss": 0.8163955807685852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16938687662562, "step_time": 0.3880257930755615} +{"epoch": 0, "iter": 4829, "iter_tflops": 33.16452536311945, "iter_time": 0.6220831832885743, "loss": 0.7181913256645203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.43911454541987, "step_time": 0.5821560096740723} +{"epoch": 0, "iter": 4830, "iter_tflops": 9.572992651373657, "iter_time": 2.155135208129883, "loss": 0.6284587979316711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.857399860904936, "step_time": 1.9001873168945314} +{"epoch": 0, "iter": 4831, "iter_tflops": 15.184731115652612, "iter_time": 1.3586736145019531, "loss": 0.6229391694068909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.312905038936755, "step_time": 1.1916598320007326} +{"epoch": 0, "iter": 4832, "iter_tflops": 34.73444408647968, "iter_time": 0.593966423034668, "loss": 0.6329171061515808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71553446411846, "step_time": 0.46138537216186526} +{"epoch": 0, "iter": 4833, "iter_tflops": 18.312836862519276, "iter_time": 0.907969367980957, "loss": 0.3073693811893463, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 19.313288596966874, "step_time": 0.8609354553222658} +{"epoch": 0, "iter": 4834, "iter_tflops": 15.427286573549143, "iter_time": 1.0777977600097657, "loss": 0.3123649060726166, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 18.438520432734386, "step_time": 0.9017803230285644} +{"epoch": 0, "iter": 4835, "iter_tflops": 25.686815151301396, "iter_time": 0.6473163299560547, "loss": 0.24730658531188965, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 27.5825246833354, "step_time": 0.6028271560668945} +{"epoch": 0, "iter": 4836, "iter_tflops": 24.362708886576122, "iter_time": 0.682497787475586, "loss": 0.37937140464782715, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 26.243547687875616, "step_time": 0.633584114074707} +{"epoch": 0, "iter": 4837, "iter_tflops": 29.152929448474968, "iter_time": 0.6820859222412109, "loss": 0.06800981611013412, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 31.49599622383558, "step_time": 0.6313438262939453} +{"epoch": 0, "iter": 4838, "iter_tflops": 48.35626201778867, "iter_time": 0.4112146377563477, "loss": 0.0435606949031353, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 53.30903940986302, "step_time": 0.3730099620819092} +{"epoch": 0, "iter": 4839, "iter_tflops": 52.03102253459606, "iter_time": 0.38217205429077156, "loss": 0.03915276378393173, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 57.16670465363988, "step_time": 0.3478388843536377} +{"epoch": 0, "iter": 4840, "iter_tflops": 57.19918504509217, "iter_time": 0.3476413650512695, "loss": 0.08788315951824188, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 62.40318512768728, "step_time": 0.3186504459381103} +{"epoch": 0, "iter": 4841, "iter_tflops": 37.78754668439739, "iter_time": 0.5459759979248047, "loss": 0.008502241224050522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90035189952261, "step_time": 0.5044233741760255} +{"epoch": 0, "iter": 4842, "iter_tflops": 51.98007351176501, "iter_time": 0.39690389251708985, "loss": 0.005388651508837938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.950326572456945, "step_time": 0.3622647094726562} +{"epoch": 0, "iter": 4843, "iter_tflops": 58.68753547100328, "iter_time": 0.35154131698608393, "loss": 0.007339159958064556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.7619765175237, "step_time": 0.3185680027008057} +{"epoch": 0, "iter": 4844, "iter_tflops": 54.93916273589621, "iter_time": 0.3755261726379394, "loss": 0.029493896290659904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.183851289213564, "step_time": 0.3428011512756348} +{"epoch": 0, "iter": 4845, "iter_tflops": 34.166101294868284, "iter_time": 0.5892798919677735, "loss": 0.023548757657408714, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 36.508919805775555, "step_time": 0.5514651374816895} +{"epoch": 0, "iter": 4846, "iter_tflops": 14.906375981198646, "iter_time": 1.3506566925048828, "loss": 0.01785864867269993, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 18.88541832137594, "step_time": 1.066081573486328} +{"epoch": 0, "iter": 4847, "iter_tflops": 51.64914057407516, "iter_time": 0.38981087112426754, "loss": 0.0037390857469290495, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 57.13893622265904, "step_time": 0.35235861587524414} +{"epoch": 0, "iter": 4848, "iter_tflops": 54.86188465372764, "iter_time": 0.36698331832885744, "loss": 0.003332613268867135, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 60.24530513196885, "step_time": 0.3341902980804443} +{"epoch": 0, "iter": 4849, "iter_tflops": 46.04352022922872, "iter_time": 0.4480781097412109, "loss": 0.11130888015031815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.53886544871091, "step_time": 0.40822233200073244} +{"epoch": 0, "iter": 4850, "iter_tflops": 37.70031124718172, "iter_time": 0.5472393417358399, "loss": 0.13186883926391602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.44037681499619, "step_time": 0.4978500461578369} +{"epoch": 0, "iter": 4851, "iter_tflops": 42.6494495899974, "iter_time": 0.48373645401000975, "loss": 0.15588562190532684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89573606529469, "step_time": 0.4399353809356689} +{"epoch": 0, "iter": 4852, "iter_tflops": 45.23084930816953, "iter_time": 0.4561288108825684, "loss": 0.1787826269865036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.644042236772655, "step_time": 0.41558045196533194} +{"epoch": 0, "iter": 4853, "iter_tflops": 16.125159004842665, "iter_time": 1.2794350433349608, "loss": 0.1693752110004425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.37126684819472, "step_time": 1.1876562423706054} +{"epoch": 0, "iter": 4854, "iter_tflops": 18.830854771624978, "iter_time": 1.0956004791259766, "loss": 0.30905309319496155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.56290338903567, "step_time": 0.7223037948608398} +{"epoch": 0, "iter": 4855, "iter_tflops": 51.127590045860785, "iter_time": 0.403521728515625, "loss": 0.2279861867427826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68294018820379, "step_time": 0.3705101318359375} +{"epoch": 0, "iter": 4856, "iter_tflops": 50.201719866087636, "iter_time": 0.4109638786315918, "loss": 0.18554991483688354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.24305028005786, "step_time": 0.3803453788757324} +{"epoch": 0, "iter": 4857, "iter_tflops": 38.14364973164393, "iter_time": 0.5408788528442383, "loss": 0.008662858977913857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24910438998867, "step_time": 0.5001585807800293} +{"epoch": 0, "iter": 4858, "iter_tflops": 19.172945049349675, "iter_time": 1.0760523986816408, "loss": 0.011537591926753521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.867823919055496, "step_time": 0.9434451999664307} +{"epoch": 0, "iter": 4859, "iter_tflops": 53.005016747281154, "iter_time": 0.3892290725708008, "loss": 0.005827574525028467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.05724845309073, "step_time": 0.35535775566101074} +{"epoch": 0, "iter": 4860, "iter_tflops": 55.690452008312164, "iter_time": 0.37046015548706057, "loss": 0.006463612429797649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.831448115714366, "step_time": 0.33915177345275876} +{"epoch": 0, "iter": 4861, "iter_tflops": 29.661474595994665, "iter_time": 0.6955518493652344, "loss": 0.3579619824886322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.372814793121837, "step_time": 0.6576105346679687} +{"epoch": 0, "iter": 4862, "iter_tflops": 19.543795433579714, "iter_time": 1.0556339263916017, "loss": 0.45395785570144653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.086399277735165, "step_time": 0.8936470890045167} +{"epoch": 0, "iter": 4863, "iter_tflops": 40.24638283511531, "iter_time": 0.5126198196411133, "loss": 0.4236983358860016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05996271858873, "step_time": 0.4682503623962403} +{"epoch": 0, "iter": 4864, "iter_tflops": 44.70871482688087, "iter_time": 0.4614557495117188, "loss": 0.3874969184398651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70970012309142, "step_time": 0.42355205345153807} +{"epoch": 0, "iter": 4865, "iter_tflops": 19.654568326998373, "iter_time": 1.0496843872070312, "loss": 0.38265854120254517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.254635687539064, "step_time": 0.9706632385253906} +{"epoch": 0, "iter": 4866, "iter_tflops": 16.669120380106598, "iter_time": 1.2376833953857422, "loss": 0.4031856060028076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.829310821736193, "step_time": 1.095690315246582} +{"epoch": 0, "iter": 4867, "iter_tflops": 39.21751734068272, "iter_time": 0.5260683212280274, "loss": 0.6139678955078125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00329811188387, "step_time": 0.47975607490539546} +{"epoch": 0, "iter": 4868, "iter_tflops": 38.20092465621427, "iter_time": 0.5400679092407226, "loss": 0.3822084069252014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72885169639907, "step_time": 0.49440836906433105} +{"epoch": 0, "iter": 4869, "iter_tflops": 20.183532545660437, "iter_time": 1.022174560546875, "loss": 0.11288057267665863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.765425340445084, "step_time": 0.947883773803711} +{"epoch": 0, "iter": 4870, "iter_tflops": 18.694698204783073, "iter_time": 1.1035799179077148, "loss": 0.059905849397182465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.790703163048672, "step_time": 0.9052416400909424} +{"epoch": 0, "iter": 4871, "iter_tflops": 43.17426525032192, "iter_time": 0.4778562736511231, "loss": 0.13272008299827576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.458265652439074, "step_time": 0.4347207641601562} +{"epoch": 0, "iter": 4872, "iter_tflops": 42.531277682670144, "iter_time": 0.48508050155639654, "loss": 0.09831731766462326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.7444735012586, "step_time": 0.4413589878082275} +{"epoch": 0, "iter": 4873, "iter_tflops": 20.732279790733994, "iter_time": 0.9951193847656249, "loss": 0.9681040048599243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.362352692400712, "step_time": 0.9225815277099609} +{"epoch": 0, "iter": 4874, "iter_tflops": 40.947152427991405, "iter_time": 0.5038468437194824, "loss": 0.846792995929718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.478852851142136, "step_time": 0.463840503692627} +{"epoch": 0, "iter": 4875, "iter_tflops": 45.13400769100107, "iter_time": 0.4571075019836426, "loss": 0.6987949013710022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.490487728551, "step_time": 0.42546681785583496} +{"epoch": 0, "iter": 4876, "iter_tflops": 40.857200061919734, "iter_time": 0.5049561271667481, "loss": 0.8534186482429504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73038970953641, "step_time": 0.47177931976318355} +{"epoch": 0, "iter": 4877, "iter_tflops": 46.76626394545519, "iter_time": 0.44115333938598633, "loss": 0.37152618169784546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90768547490486, "step_time": 0.4052648105621338} +{"epoch": 0, "iter": 4878, "iter_tflops": 43.17012699967664, "iter_time": 0.4779020805358887, "loss": 0.30576878786087036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.529428350174506, "step_time": 0.4340698852539063} +{"epoch": 0, "iter": 4879, "iter_tflops": 50.25571649502848, "iter_time": 0.4105223236083984, "loss": 0.36102306842803955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.55186842295815, "step_time": 0.3781922435760498} +{"epoch": 0, "iter": 4880, "iter_tflops": 46.88384846790781, "iter_time": 0.4400469284057617, "loss": 0.5200068354606628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15451584229628, "step_time": 0.40330932998657226} +{"epoch": 0, "iter": 4881, "iter_tflops": 39.17871355407224, "iter_time": 0.4885316314697266, "loss": 0.005102439317852259, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 42.322370866745125, "step_time": 0.4522440605163574} +{"epoch": 0, "iter": 4882, "iter_tflops": 18.44588848377508, "iter_time": 1.037631820678711, "loss": 0.013850344344973564, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 22.15249339677216, "step_time": 0.8640129356384278} +{"epoch": 0, "iter": 4883, "iter_tflops": 52.287444586722614, "iter_time": 0.36605424118041996, "loss": 0.0008997333934530616, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 57.760818155617315, "step_time": 0.33136720466613767} +{"epoch": 0, "iter": 4884, "iter_tflops": 54.12009123513369, "iter_time": 0.35365869522094723, "loss": 0.010519429109990597, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 59.67074882890033, "step_time": 0.3207608623504639} +{"epoch": 0, "iter": 4885, "iter_tflops": 45.017369684595, "iter_time": 0.45829184722900396, "loss": 0.4143567383289337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45566844695732, "step_time": 0.41716337394714365} +{"epoch": 0, "iter": 4886, "iter_tflops": 43.94572762023855, "iter_time": 0.4694675598144532, "loss": 0.4380224049091339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.360697191245855, "step_time": 0.43561633872985844} +{"epoch": 0, "iter": 4887, "iter_tflops": 48.25293820329408, "iter_time": 0.4275613937377929, "loss": 0.36925172805786133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27728628295887, "step_time": 0.3946473693847656} +{"epoch": 0, "iter": 4888, "iter_tflops": 50.714367442281436, "iter_time": 0.4068096389770508, "loss": 0.4705497920513153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.00156543517179, "step_time": 0.3751001148223877} +{"epoch": 0, "iter": 4889, "iter_tflops": 36.10921628532634, "iter_time": 0.5713525695800782, "loss": 0.8562126755714417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82971059529317, "step_time": 0.5313223609924317} +{"epoch": 0, "iter": 4890, "iter_tflops": 15.720423744278662, "iter_time": 1.3123751525878906, "loss": 0.7882133722305298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.398100213859866, "step_time": 0.9211090812683105} +{"epoch": 0, "iter": 4891, "iter_tflops": 34.77698596910178, "iter_time": 0.5932398376464845, "loss": 0.7947124242782593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.190527540481874, "step_time": 0.5402149391174316} +{"epoch": 0, "iter": 4892, "iter_tflops": 38.36651395441303, "iter_time": 0.5377369842529297, "loss": 0.83888840675354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.906228964562814, "step_time": 0.4923156776428223} +{"epoch": 0, "iter": 4893, "iter_tflops": 18.786271918757333, "iter_time": 1.0982005157470704, "loss": 0.8595993518829346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.9846487038443, "step_time": 1.032347068786621} +{"epoch": 0, "iter": 4894, "iter_tflops": 14.846398688946646, "iter_time": 1.38963623046875, "loss": 0.7526414394378662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.49690925797973, "step_time": 1.0065465602874755} +{"epoch": 0, "iter": 4895, "iter_tflops": 46.54273500665063, "iter_time": 0.44327204895019534, "loss": 0.8286600708961487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20514488083192, "step_time": 0.4109358425140381} +{"epoch": 0, "iter": 4896, "iter_tflops": 44.87397953052815, "iter_time": 0.4597562713623047, "loss": 0.7370689511299133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.677771778877215, "step_time": 0.42382986640930176} +{"epoch": 0, "iter": 4897, "iter_tflops": 31.4769064970924, "iter_time": 0.6554358673095704, "loss": 0.2687288522720337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.63485537415132, "step_time": 0.6133843383789063} +{"epoch": 0, "iter": 4898, "iter_tflops": 40.05492844544148, "iter_time": 0.5150700378417968, "loss": 0.3491221070289612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.2097371359192, "step_time": 0.46666401672363284} +{"epoch": 0, "iter": 4899, "iter_tflops": 45.9421514808113, "iter_time": 0.44906676864624023, "loss": 0.30296793580055237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.25284069381917, "step_time": 0.4105458164215088} +{"epoch": 0, "iter": 4900, "iter_tflops": 40.513810346889564, "iter_time": 0.5092360687255859, "loss": 0.33144107460975647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.421545395680425, "step_time": 0.46443889617919926} +{"epoch": 0, "iter": 4901, "iter_tflops": 34.34310603508468, "iter_time": 0.6007346420288086, "loss": 0.3844347596168518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92535541634203, "step_time": 0.543992094039917} +{"epoch": 0, "iter": 4902, "iter_tflops": 35.868843197963216, "iter_time": 0.5751814575195313, "loss": 0.37853124737739563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.709524670911684, "step_time": 0.5195502510070802} +{"epoch": 0, "iter": 4903, "iter_tflops": 41.23452252895785, "iter_time": 0.5003354530334472, "loss": 0.2703319787979126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01900142887562, "step_time": 0.45827523612976073} +{"epoch": 0, "iter": 4904, "iter_tflops": 39.76362746757827, "iter_time": 0.5188433456420899, "loss": 0.3213918209075928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40622162718089, "step_time": 0.4753026809692383} +{"epoch": 0, "iter": 4905, "iter_tflops": 36.28808532204498, "iter_time": 0.5685362930297851, "loss": 0.824306070804596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.97326617363698, "step_time": 0.5161222858428954} +{"epoch": 0, "iter": 4906, "iter_tflops": 38.86827861420461, "iter_time": 0.5307951431274415, "loss": 0.6695822477340698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.556383118654686, "step_time": 0.4847943363189698} +{"epoch": 0, "iter": 4907, "iter_tflops": 38.79790607135666, "iter_time": 0.5317579116821289, "loss": 1.0095900297164917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49133976177366, "step_time": 0.48553643226623533} +{"epoch": 0, "iter": 4908, "iter_tflops": 35.5044741817148, "iter_time": 0.581084327697754, "loss": 0.8510100841522217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11621559033409, "step_time": 0.5412681503295899} +{"epoch": 0, "iter": 4909, "iter_tflops": 20.21229972403978, "iter_time": 1.0207197494506837, "loss": 0.37004488706588745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.553812843105927, "step_time": 0.9571899719238282} +{"epoch": 0, "iter": 4910, "iter_tflops": 13.821375040423339, "iter_time": 1.4926947174072265, "loss": 0.3239838182926178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.918525767087033, "step_time": 1.2194380168914796} +{"epoch": 0, "iter": 4911, "iter_tflops": 40.80451595830113, "iter_time": 0.5056080932617187, "loss": 0.31335076689720154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46863928726407, "step_time": 0.4639470386505127} +{"epoch": 0, "iter": 4912, "iter_tflops": 47.019920741160796, "iter_time": 0.4387734642028808, "loss": 0.2844773530960083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42517123085686, "step_time": 0.4011866760253906} +{"epoch": 0, "iter": 4913, "iter_tflops": 17.687089175801727, "iter_time": 1.1664493408203125, "loss": 0.02310018427670002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.646990909901934, "step_time": 1.1064033660888672} +{"epoch": 0, "iter": 4914, "iter_tflops": 7.923606942167896, "iter_time": 2.603750244140625, "loss": 0.019978782162070274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.358388465460836, "step_time": 2.2045562210083007} +{"epoch": 0, "iter": 4915, "iter_tflops": 14.491437925337973, "iter_time": 1.4236746978759767, "loss": 0.04358727112412453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.355353410074564, "step_time": 1.188745225906372} +{"epoch": 0, "iter": 4916, "iter_tflops": 52.850172379104976, "iter_time": 0.3903694648742676, "loss": 0.0294167622923851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.900143766283335, "step_time": 0.35632197380065916} +{"epoch": 0, "iter": 4917, "iter_tflops": 21.22155042955174, "iter_time": 0.8009355545043946, "loss": 0.260979026556015, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 22.351698244624917, "step_time": 0.7604386062622072} +{"epoch": 0, "iter": 4918, "iter_tflops": 7.405139660872804, "iter_time": 2.2953104248046876, "loss": 0.3488021194934845, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 9.321098751347517, "step_time": 1.823507583618164} +{"epoch": 0, "iter": 4919, "iter_tflops": 10.048286493820067, "iter_time": 1.6915415649414065, "loss": 0.4952128529548645, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 11.3733700690286, "step_time": 1.4944641876220703} +{"epoch": 0, "iter": 4920, "iter_tflops": 18.745429492808366, "iter_time": 0.9067327194213867, "loss": 0.24963794648647308, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 22.964047483770422, "step_time": 0.7401610832214355} +{"epoch": 0, "iter": 4921, "iter_tflops": 11.756269652196842, "iter_time": 1.2055921783447265, "loss": 0.39853811264038086, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 12.634403402853247, "step_time": 1.121799446105957} +{"epoch": 0, "iter": 4922, "iter_tflops": 14.54101683398497, "iter_time": 0.9747094650268555, "loss": 0.34452277421951294, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 17.210618955383424, "step_time": 0.8235187110900879} +{"epoch": 0, "iter": 4923, "iter_tflops": 21.345395279496277, "iter_time": 0.6639964523315429, "loss": 0.5297354459762573, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 22.95565559881797, "step_time": 0.6174193840026856} +{"epoch": 0, "iter": 4924, "iter_tflops": 21.692798478485695, "iter_time": 0.6533627624511719, "loss": 0.46043747663497925, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 23.309727064340212, "step_time": 0.6080408706665039} +{"epoch": 0, "iter": 4925, "iter_tflops": 25.041165608240338, "iter_time": 0.8238871078491211, "loss": 0.024986937642097473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.337065158479742, "step_time": 0.7546930656433105} +{"epoch": 0, "iter": 4926, "iter_tflops": 7.52784392341829, "iter_time": 2.7406377868652347, "loss": 0.04639439284801483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.486524566629766, "step_time": 2.4310415115356445} +{"epoch": 0, "iter": 4927, "iter_tflops": 15.347764814718998, "iter_time": 1.3442409210205077, "loss": 0.033151496201753616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.309007850151165, "step_time": 1.1919281387329101} +{"epoch": 0, "iter": 4928, "iter_tflops": 40.63166249351234, "iter_time": 0.5077590293884278, "loss": 0.05628862604498863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83255337687559, "step_time": 0.4601810951232911} +{"epoch": 0, "iter": 4929, "iter_tflops": 20.42969235779239, "iter_time": 0.7037373199462891, "loss": 0.39113375544548035, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 22.101824140983727, "step_time": 0.650495491027832} +{"epoch": 0, "iter": 4930, "iter_tflops": 24.401419700738554, "iter_time": 0.589192642211914, "loss": 0.4282968044281006, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 25.978174592232893, "step_time": 0.5534313774108887} +{"epoch": 0, "iter": 4931, "iter_tflops": 25.982806156208394, "iter_time": 0.5533327255249023, "loss": 0.4109356701374054, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.65746158254845, "step_time": 0.5198285064697266} +{"epoch": 0, "iter": 4932, "iter_tflops": 26.07255112635477, "iter_time": 0.5514280853271485, "loss": 0.24519209563732147, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.77002633746748, "step_time": 0.5177214012145996} +{"epoch": 0, "iter": 4933, "iter_tflops": 25.98404608393722, "iter_time": 0.7939907989501954, "loss": 0.23524759709835052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.58716106597794, "step_time": 0.7478512725830078} +{"epoch": 0, "iter": 4934, "iter_tflops": 15.909396880750391, "iter_time": 1.2967866516113282, "loss": 0.3931191861629486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.648819289791433, "step_time": 1.106294891357422} +{"epoch": 0, "iter": 4935, "iter_tflops": 37.52448733008272, "iter_time": 0.5498034744262695, "loss": 0.2898399233818054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.06732916695146, "step_time": 0.5023724193572998} +{"epoch": 0, "iter": 4936, "iter_tflops": 38.634033994548915, "iter_time": 0.534013442993164, "loss": 0.29049116373062134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30216273793934, "step_time": 0.4877077713012696} +{"epoch": 0, "iter": 4937, "iter_tflops": 22.23953488612098, "iter_time": 0.927676483154297, "loss": 0.36927786469459534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.850424004268792, "step_time": 0.8650199890136719} +{"epoch": 0, "iter": 4938, "iter_tflops": 18.644073806023293, "iter_time": 1.1065764770507813, "loss": 0.5168532133102417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.587192606883878, "step_time": 0.9133978652954103} +{"epoch": 0, "iter": 4939, "iter_tflops": 48.05440036988335, "iter_time": 0.4293278732299805, "loss": 0.415256142616272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.050647014932856, "step_time": 0.3963657455444336} +{"epoch": 0, "iter": 4940, "iter_tflops": 46.755071824674815, "iter_time": 0.44125894165039065, "loss": 0.3733561038970947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52873826767763, "step_time": 0.4083041496276856} +{"epoch": 0, "iter": 4941, "iter_tflops": 42.87591303765011, "iter_time": 0.4811814384460449, "loss": 0.15254467725753784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98136310243892, "step_time": 0.43913356590270997} +{"epoch": 0, "iter": 4942, "iter_tflops": 33.78860500886668, "iter_time": 0.6105932312011719, "loss": 0.15961600840091705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.189348959593374, "step_time": 0.5547581253051758} +{"epoch": 0, "iter": 4943, "iter_tflops": 39.28165575829318, "iter_time": 0.5252093658447266, "loss": 0.13787008821964264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94434784483789, "step_time": 0.4804146423339844} +{"epoch": 0, "iter": 4944, "iter_tflops": 37.649058530291455, "iter_time": 0.5479843139648437, "loss": 0.14177286624908447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54776147220609, "step_time": 0.49656329917907716} +{"epoch": 0, "iter": 4945, "iter_tflops": 19.70485584092559, "iter_time": 1.0470055541992187, "loss": 0.043226663023233414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.259579170583518, "step_time": 0.9704375305175781} +{"epoch": 0, "iter": 4946, "iter_tflops": 17.99472538314368, "iter_time": 1.1465078277587892, "loss": 0.03351520374417305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.30562139047737, "step_time": 0.7555621318817138} +{"epoch": 0, "iter": 4947, "iter_tflops": 54.34906789003304, "iter_time": 0.3796034469604492, "loss": 0.03340382128953934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.19035689300196, "step_time": 0.3485549774169922} +{"epoch": 0, "iter": 4948, "iter_tflops": 53.03394282169549, "iter_time": 0.38901677703857424, "loss": 0.02303783781826496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.13831233591514, "step_time": 0.3548622703552246} +{"epoch": 0, "iter": 4949, "iter_tflops": 25.865305721561473, "iter_time": 0.7976357879638672, "loss": 0.8861591815948486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.26926836955536, "step_time": 0.756569381713867} +{"epoch": 0, "iter": 4950, "iter_tflops": 17.500660692467726, "iter_time": 1.1788751220703124, "loss": 0.7262172698974609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.897582730601894, "step_time": 0.9872478446960449} +{"epoch": 0, "iter": 4951, "iter_tflops": 35.7727367651961, "iter_time": 0.5767267303466798, "loss": 0.7964186072349548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13808801710514, "step_time": 0.5271359577178956} +{"epoch": 0, "iter": 4952, "iter_tflops": 38.596554191693, "iter_time": 0.5345320053100586, "loss": 0.8225188851356506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32122687016236, "step_time": 0.4874880771636963} +{"epoch": 0, "iter": 4953, "iter_tflops": 14.843348076393546, "iter_time": 1.3899218292236328, "loss": 0.2715960741043091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.660274818924195, "step_time": 1.317415802001953} +{"epoch": 0, "iter": 4954, "iter_tflops": 19.343750060349784, "iter_time": 1.0665508728027344, "loss": 0.28939417004585266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.19394518764553, "step_time": 0.7876283378601074} +{"epoch": 0, "iter": 4955, "iter_tflops": 51.77035144026124, "iter_time": 0.39851175308227543, "loss": 0.3810849189758301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.08603583655426, "step_time": 0.36784724044799805} +{"epoch": 0, "iter": 4956, "iter_tflops": 50.33590991088844, "iter_time": 0.40986829376220696, "loss": 0.3053804636001587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.081087593278, "step_time": 0.38148444175720214} +{"epoch": 0, "iter": 4957, "iter_tflops": 41.006548706690005, "iter_time": 0.5031170425415039, "loss": 0.6860762238502502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.651776066908134, "step_time": 0.46204418563842775} +{"epoch": 0, "iter": 4958, "iter_tflops": 11.728717942029874, "iter_time": 1.7590237579345702, "loss": 0.6682614088058472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.99528734895695, "step_time": 1.5875827102661133} +{"epoch": 0, "iter": 4959, "iter_tflops": 9.10387179005375, "iter_time": 2.266188934326172, "loss": 0.9072040915489197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.241655160595707, "step_time": 1.8352362899780272} +{"epoch": 0, "iter": 4960, "iter_tflops": 22.803823904005952, "iter_time": 0.9047207870483398, "loss": 0.8343385457992554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.91536272309858, "step_time": 0.7390587654113769} +{"epoch": 0, "iter": 4961, "iter_tflops": 23.069477524585544, "iter_time": 0.7243161468505859, "loss": 0.2835044264793396, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 24.492258843143333, "step_time": 0.6822398529052736} +{"epoch": 0, "iter": 4962, "iter_tflops": 7.904398889690879, "iter_time": 2.1139615173339847, "loss": 0.3505253493785858, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 9.92494959457394, "step_time": 1.6835949554443361} +{"epoch": 0, "iter": 4963, "iter_tflops": 29.63256471638914, "iter_time": 0.5638929748535155, "loss": 0.3618268072605133, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 31.625743707700142, "step_time": 0.5283542175292969} +{"epoch": 0, "iter": 4964, "iter_tflops": 30.42938384869727, "iter_time": 0.5491269607543946, "loss": 0.4426080584526062, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 32.36157676740761, "step_time": 0.5163405723571778} +{"epoch": 0, "iter": 4965, "iter_tflops": 28.0127540526798, "iter_time": 0.7364892959594727, "loss": 0.343315064907074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.813308568588955, "step_time": 0.6920095252990723} +{"epoch": 0, "iter": 4966, "iter_tflops": 18.70984849635541, "iter_time": 1.1026862945556641, "loss": 0.3688916862010956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.792035511275486, "step_time": 0.9051887226104736} +{"epoch": 0, "iter": 4967, "iter_tflops": 38.40955819969449, "iter_time": 0.5371343612670898, "loss": 0.5323130488395691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13507259783298, "step_time": 0.4896418170928955} +{"epoch": 0, "iter": 4968, "iter_tflops": 38.36681007919806, "iter_time": 0.5377328338623047, "loss": 0.4928150475025177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00022827060576, "step_time": 0.4912138423919678} +{"epoch": 0, "iter": 4969, "iter_tflops": 19.386560403017175, "iter_time": 1.0641956634521483, "loss": 0.4188007414340973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.876943765157392, "step_time": 0.9882238388061524} +{"epoch": 0, "iter": 4970, "iter_tflops": 19.674575201531027, "iter_time": 1.048616973876953, "loss": 0.4671972393989563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.351187965357152, "step_time": 0.8835136585235595} +{"epoch": 0, "iter": 4971, "iter_tflops": 37.27340438510473, "iter_time": 0.5535070877075196, "loss": 0.3904731869697571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.688819155535974, "step_time": 0.5070457668304443} +{"epoch": 0, "iter": 4972, "iter_tflops": 36.594833112271864, "iter_time": 0.5637706680297851, "loss": 0.2699809968471527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.748764604738, "step_time": 0.5190373516082764} +{"epoch": 0, "iter": 4973, "iter_tflops": 24.52420962958043, "iter_time": 0.841254165649414, "loss": 0.12865212559700012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.410834617700605, "step_time": 0.7811602249145508} +{"epoch": 0, "iter": 4974, "iter_tflops": 9.551458630403456, "iter_time": 2.1599940185546873, "loss": 0.09742654860019684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.673560981588697, "step_time": 1.7673350524902343} +{"epoch": 0, "iter": 4975, "iter_tflops": 12.66080129798557, "iter_time": 1.6295251007080076, "loss": 0.19656997919082642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.809376905922422, "step_time": 1.3049909324645996} +{"epoch": 0, "iter": 4976, "iter_tflops": 38.606116154269486, "iter_time": 0.5343996124267578, "loss": 0.10436967015266418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45564769396694, "step_time": 0.4859446182250976} +{"epoch": 0, "iter": 4977, "iter_tflops": 21.33735898266137, "iter_time": 0.76580672454834, "loss": 0.5993088483810425, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 23.27560335588544, "step_time": 0.7020352058410644} +{"epoch": 0, "iter": 4978, "iter_tflops": 24.31739668418809, "iter_time": 0.6719589767456053, "loss": 0.371770977973938, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.255634575133556, "step_time": 0.6223537635803222} +{"epoch": 0, "iter": 4979, "iter_tflops": 26.515304680449024, "iter_time": 0.6162589187622071, "loss": 0.37602749466896057, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 28.479705185381444, "step_time": 0.573752182006836} +{"epoch": 0, "iter": 4980, "iter_tflops": 23.78182674817034, "iter_time": 0.6870915832519531, "loss": 0.3462373912334442, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 25.662830219771127, "step_time": 0.636729965209961} +{"epoch": 0, "iter": 4981, "iter_tflops": 30.35590534274785, "iter_time": 0.6796401977539063, "loss": 0.06734026223421097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.93115642863539, "step_time": 0.6264916191101074} +{"epoch": 0, "iter": 4982, "iter_tflops": 10.378941904255946, "iter_time": 1.987783889770508, "loss": 0.07819844037294388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.483086172111035, "step_time": 1.5301462326049806} +{"epoch": 0, "iter": 4983, "iter_tflops": 15.91969091191584, "iter_time": 1.2959481201171876, "loss": 0.13593994081020355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.33703417180828, "step_time": 1.125105255126953} +{"epoch": 0, "iter": 4984, "iter_tflops": 42.46596020366785, "iter_time": 0.4858266105651856, "loss": 0.1436353623867035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.907085752284885, "step_time": 0.4398289337158203} +{"epoch": 0, "iter": 4985, "iter_tflops": 18.70877498831369, "iter_time": 0.8143066558837891, "loss": 0.6830873489379883, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 20.371184633258785, "step_time": 0.7478543968200684} +{"epoch": 0, "iter": 4986, "iter_tflops": 22.5119703381218, "iter_time": 0.6767368545532226, "loss": 0.3111996054649353, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.210203557387555, "step_time": 0.6292669105529785} +{"epoch": 0, "iter": 4987, "iter_tflops": 24.6645924246612, "iter_time": 0.6176741027832031, "loss": 0.3124788999557495, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.50133256498841, "step_time": 0.5748646774291992} +{"epoch": 0, "iter": 4988, "iter_tflops": 22.791238721144882, "iter_time": 0.668444580078125, "loss": 0.4522988200187683, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.528434098924254, "step_time": 0.6211028366088867} +{"epoch": 0, "iter": 4989, "iter_tflops": 32.58702721455528, "iter_time": 0.633107566833496, "loss": 0.8267494440078735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.7042955300541, "step_time": 0.5778322525024414} +{"epoch": 0, "iter": 4990, "iter_tflops": 39.515493783120625, "iter_time": 0.5221013717651367, "loss": 0.8661195635795593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22196075110606, "step_time": 0.4773289585113525} +{"epoch": 0, "iter": 4991, "iter_tflops": 40.14367517266662, "iter_time": 0.5139313583374023, "loss": 0.8256317377090454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.647487302564336, "step_time": 0.4726753997802735} +{"epoch": 0, "iter": 4992, "iter_tflops": 40.26706589519052, "iter_time": 0.5123565139770508, "loss": 0.9651365876197815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.89582232694982, "step_time": 0.47000129890441894} +{"epoch": 0, "iter": 4993, "iter_tflops": 20.658980580556438, "iter_time": 0.9986501235961915, "loss": 0.7334635257720947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.868335394690593, "step_time": 0.9434231338500976} +{"epoch": 0, "iter": 4994, "iter_tflops": 9.052127569562407, "iter_time": 2.279143035888672, "loss": 0.9467846155166626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.372725455116347, "step_time": 1.8140852508544922} +{"epoch": 0, "iter": 4995, "iter_tflops": 12.019779914394656, "iter_time": 1.7164285583496093, "loss": 0.762578547000885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.478930288269089, "step_time": 1.332850082397461} +{"epoch": 0, "iter": 4996, "iter_tflops": 34.295964676843745, "iter_time": 0.6015603790283203, "loss": 0.7676482200622559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.356156068432384, "step_time": 0.4988639049530029} +{"epoch": 0, "iter": 4997, "iter_tflops": 13.015290591544492, "iter_time": 1.2554689331054685, "loss": 0.49622824788093567, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 13.9184763098822, "step_time": 1.1740001296997071} +{"epoch": 0, "iter": 4998, "iter_tflops": 12.06203852104039, "iter_time": 1.354687515258789, "loss": 0.43249571323394775, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 14.4197141262199, "step_time": 1.1331911888122557} +{"epoch": 0, "iter": 4999, "iter_tflops": 24.421798303916585, "iter_time": 0.6690863952636719, "loss": 0.45699599385261536, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.32475188548207, "step_time": 0.6207197341918945} +{"epoch": 0, "iter": 5000, "iter_tflops": 24.225294912040717, "iter_time": 0.6745136871337891, "loss": 0.4634566903114319, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.113725232730673, "step_time": 0.6257358093261719} +{"epoch": 0, "iter": 5001, "iter_tflops": 17.9403805321619, "iter_time": 1.1499808197021486, "loss": 0.6800439357757568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.20373167230042, "step_time": 1.0743273162841795} +{"epoch": 0, "iter": 5002, "iter_tflops": 23.906270433835644, "iter_time": 0.8629992523193359, "loss": 0.7106834650039673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.75959828965513, "step_time": 0.7173637580871581} +{"epoch": 0, "iter": 5003, "iter_tflops": 47.355340002273685, "iter_time": 0.43566561889648436, "loss": 0.7367545962333679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49613219541296, "step_time": 0.40063384628295895} +{"epoch": 0, "iter": 5004, "iter_tflops": 48.25837661444213, "iter_time": 0.42751321029663086, "loss": 0.6513713002204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.22283854913915, "step_time": 0.3950588302612305} +{"epoch": 0, "iter": 5005, "iter_tflops": 31.870041603775103, "iter_time": 0.6473506927490233, "loss": 0.1776689887046814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.90687027184712, "step_time": 0.6084635162353516} +{"epoch": 0, "iter": 5006, "iter_tflops": 10.170167335324436, "iter_time": 2.028589385986328, "loss": 0.1772235929965973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.287208160035902, "step_time": 1.5527034168243408} +{"epoch": 0, "iter": 5007, "iter_tflops": 38.4207892090747, "iter_time": 0.5369773483276367, "loss": 0.17061567306518555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.207026498145154, "step_time": 0.48880708312988286} +{"epoch": 0, "iter": 5008, "iter_tflops": 38.58828049598607, "iter_time": 0.534646614074707, "loss": 0.2776903510093689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.378527320020076, "step_time": 0.4868289394378661} +{"epoch": 0, "iter": 5009, "iter_tflops": 32.665339625265204, "iter_time": 0.6315897445678711, "loss": 0.9038929343223572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.600143049958135, "step_time": 0.5795227699279785} +{"epoch": 0, "iter": 5010, "iter_tflops": 43.98160103478203, "iter_time": 0.46908464050292964, "loss": 0.7811673283576965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.66018674614492, "step_time": 0.4328789901733398} +{"epoch": 0, "iter": 5011, "iter_tflops": 45.73659428452118, "iter_time": 0.4510850410461426, "loss": 0.9080489873886108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.36864739960311, "step_time": 0.41789869880676267} +{"epoch": 0, "iter": 5012, "iter_tflops": 46.03863459366595, "iter_time": 0.448125659942627, "loss": 0.9906304478645325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.41140407963491, "step_time": 0.41753708267211914} +{"epoch": 0, "iter": 5013, "iter_tflops": 41.13769145768074, "iter_time": 0.5015131568908692, "loss": 0.5194514989852905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34948241847277, "step_time": 0.46519355773925775} +{"epoch": 0, "iter": 5014, "iter_tflops": 45.87069390486846, "iter_time": 0.4497663269042969, "loss": 0.5972163081169128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64223502518232, "step_time": 0.4155955810546875} +{"epoch": 0, "iter": 5015, "iter_tflops": 47.84716771362793, "iter_time": 0.4311873512268066, "loss": 0.6528534293174744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85141597636952, "step_time": 0.3978887195587158} +{"epoch": 0, "iter": 5016, "iter_tflops": 42.33966017686504, "iter_time": 0.48727584075927743, "loss": 0.4965064823627472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.485738338133864, "step_time": 0.4535727958679199} +{"epoch": 0, "iter": 5017, "iter_tflops": 24.74861190593348, "iter_time": 0.8336262893676758, "loss": 0.10459683835506439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.065916434795916, "step_time": 0.7914969558715821} +{"epoch": 0, "iter": 5018, "iter_tflops": 12.324124974723562, "iter_time": 1.674041244506836, "loss": 0.08742477744817734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.53638880246417, "step_time": 1.3279207782745361} +{"epoch": 0, "iter": 5019, "iter_tflops": 50.99093893015041, "iter_time": 0.4046031303405762, "loss": 0.11448679864406586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.665689196133656, "step_time": 0.37062495422363284} +{"epoch": 0, "iter": 5020, "iter_tflops": 51.51438574460367, "iter_time": 0.40049188613891595, "loss": 0.09425593167543411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.75961992113468, "step_time": 0.3700006122589111} +{"epoch": 0, "iter": 5021, "iter_tflops": 27.789942240202365, "iter_time": 0.7423942565917969, "loss": 0.06559804826974869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.71496698965947, "step_time": 0.6942997283935546} +{"epoch": 0, "iter": 5022, "iter_tflops": 13.413091415714527, "iter_time": 1.5381311340332031, "loss": 0.06008034199476242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.85101784083139, "step_time": 1.1557376556396484} +{"epoch": 0, "iter": 5023, "iter_tflops": 49.10059647117427, "iter_time": 0.4201800994873047, "loss": 0.12452109903097153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.9152227304803, "step_time": 0.3826580410003662} +{"epoch": 0, "iter": 5024, "iter_tflops": 49.02545620820331, "iter_time": 0.4208241004943848, "loss": 0.05790337920188904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.39402219913973, "step_time": 0.38639332008361815} +{"epoch": 0, "iter": 5025, "iter_tflops": 47.94857208456092, "iter_time": 0.43027545166015624, "loss": 0.19622747600078583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.98187114005688, "step_time": 0.38939911079406736} +{"epoch": 0, "iter": 5026, "iter_tflops": 47.06575367528801, "iter_time": 0.43834618377685547, "loss": 0.11642716825008392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45520883676501, "step_time": 0.40095247840881343} +{"epoch": 0, "iter": 5027, "iter_tflops": 45.02839529892834, "iter_time": 0.458179630279541, "loss": 0.12229222059249878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.921270829504834, "step_time": 0.4217203102111816} +{"epoch": 0, "iter": 5028, "iter_tflops": 53.018233105424095, "iter_time": 0.38913204574584964, "loss": 0.088045634329319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.95940363276958, "step_time": 0.3559576568603516} +{"epoch": 0, "iter": 5029, "iter_tflops": 36.0151688555687, "iter_time": 0.5728445587158203, "loss": 0.13525809347629547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.694393119556096, "step_time": 0.5331804389953614} +{"epoch": 0, "iter": 5030, "iter_tflops": 14.836315112618173, "iter_time": 1.3905807037353515, "loss": 0.16541604697704315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.679833670782777, "step_time": 1.1669280319213866} +{"epoch": 0, "iter": 5031, "iter_tflops": 24.046518252584757, "iter_time": 0.857965934753418, "loss": 0.11280859261751175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.0726375087987, "step_time": 0.7620644092559815} +{"epoch": 0, "iter": 5032, "iter_tflops": 49.91580826180892, "iter_time": 0.41331782913208004, "loss": 0.1421331763267517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.0999996826332, "step_time": 0.3813510837554932} +{"epoch": 0, "iter": 5033, "iter_tflops": 21.44264382571504, "iter_time": 0.7658723068237304, "loss": 0.5448258519172668, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 22.711999621096187, "step_time": 0.7230683059692382} +{"epoch": 0, "iter": 5034, "iter_tflops": 9.840844193082875, "iter_time": 1.6687925109863284, "loss": 0.41981029510498047, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 11.234671205304211, "step_time": 1.461754135131836} +{"epoch": 0, "iter": 5035, "iter_tflops": 9.701174442067245, "iter_time": 1.6928184509277342, "loss": 0.44220995903015137, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 11.975350236353485, "step_time": 1.3713442001342775} +{"epoch": 0, "iter": 5036, "iter_tflops": 14.804195293959685, "iter_time": 1.1093022460937498, "loss": 0.5135505795478821, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 17.86698446769771, "step_time": 0.9191437492370605} +{"epoch": 0, "iter": 5037, "iter_tflops": 15.33715178395347, "iter_time": 1.102872833251953, "loss": 0.2713695168495178, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 16.006436614736863, "step_time": 1.0567578811645506} +{"epoch": 0, "iter": 5038, "iter_tflops": 11.333591482143616, "iter_time": 1.4924596557617187, "loss": 0.39228054881095886, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 14.47225163221732, "step_time": 1.1687834396362304} +{"epoch": 0, "iter": 5039, "iter_tflops": 26.107301485383566, "iter_time": 0.6479002838134765, "loss": 0.3143426775932312, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 28.108159432882196, "step_time": 0.6017799949645996} +{"epoch": 0, "iter": 5040, "iter_tflops": 25.785853364593798, "iter_time": 0.6559770507812499, "loss": 0.33798718452453613, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 27.61165913086294, "step_time": 0.6126009292602538} +{"epoch": 0, "iter": 5041, "iter_tflops": 20.68781801971206, "iter_time": 0.9972580718994142, "loss": 0.7514674663543701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.158452667734625, "step_time": 0.9310710372924805} +{"epoch": 0, "iter": 5042, "iter_tflops": 23.264553176460947, "iter_time": 0.8868037719726563, "loss": 0.8098284602165222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.443778332789094, "step_time": 0.7253288669586182} +{"epoch": 0, "iter": 5043, "iter_tflops": 46.70597727176975, "iter_time": 0.4417227668762207, "loss": 0.8590578436851501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.47589005968901, "step_time": 0.4087316436767578} +{"epoch": 0, "iter": 5044, "iter_tflops": 43.57610541288287, "iter_time": 0.47344968795776365, "loss": 0.7933089733123779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00710780963957, "step_time": 0.43889306259155275} +{"epoch": 0, "iter": 5045, "iter_tflops": 25.77536511499333, "iter_time": 0.8004190597534179, "loss": 0.0602966770529747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.207976449352675, "step_time": 0.7582737197875977} +{"epoch": 0, "iter": 5046, "iter_tflops": 13.516683154627486, "iter_time": 1.5263429107666016, "loss": 0.041906002908945084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.26182452566647, "step_time": 1.2686825809478761} +{"epoch": 0, "iter": 5047, "iter_tflops": 41.96531642201766, "iter_time": 0.49162249374389644, "loss": 0.0409231074154377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.370174556770415, "step_time": 0.4449216270446777} +{"epoch": 0, "iter": 5048, "iter_tflops": 44.0177530330887, "iter_time": 0.4686993789672852, "loss": 0.03598332777619362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.11549675950907, "step_time": 0.42878271865844725} +{"epoch": 0, "iter": 5049, "iter_tflops": 20.318204141469543, "iter_time": 1.0153994598388671, "loss": 0.5754632949829102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.94346335983361, "step_time": 0.9401931304931641} +{"epoch": 0, "iter": 5050, "iter_tflops": 20.70368466722109, "iter_time": 0.9964938049316405, "loss": 0.7103698253631592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.15714478012171, "step_time": 0.8909169807434083} +{"epoch": 0, "iter": 5051, "iter_tflops": 39.67079585335405, "iter_time": 0.5200574645996094, "loss": 0.4617248773574829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.396192432437765, "step_time": 0.47541252708435056} +{"epoch": 0, "iter": 5052, "iter_tflops": 38.842812514270314, "iter_time": 0.5311431427001952, "loss": 0.5892362594604492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43117192521413, "step_time": 0.4862249279022216} +{"epoch": 0, "iter": 5053, "iter_tflops": 22.336447425058154, "iter_time": 0.8735601654052734, "loss": 0.13590019941329956, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 23.879475781595062, "step_time": 0.8171130256652832} +{"epoch": 0, "iter": 5054, "iter_tflops": 35.36720059288563, "iter_time": 0.5517041320800782, "loss": 0.16289179027080536, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 39.059498206259455, "step_time": 0.49955149459838866} +{"epoch": 0, "iter": 5055, "iter_tflops": 40.47198132521337, "iter_time": 0.48211701202392576, "loss": 0.0799533873796463, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 44.37393260446179, "step_time": 0.4397228183746338} +{"epoch": 0, "iter": 5056, "iter_tflops": 42.491355600139386, "iter_time": 0.45920471191406254, "loss": 0.1333940178155899, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 46.4846873595221, "step_time": 0.4197560920715332} +{"epoch": 0, "iter": 5057, "iter_tflops": 28.598686106684116, "iter_time": 0.721400047302246, "loss": 0.0007543968968093395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.00560797720134, "step_time": 0.6653987731933594} +{"epoch": 0, "iter": 5058, "iter_tflops": 7.188431363769445, "iter_time": 2.8700411071777343, "loss": 0.009220048785209656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.358866219231425, "step_time": 2.468168884277344} +{"epoch": 0, "iter": 5059, "iter_tflops": 12.443664149906555, "iter_time": 1.6579596862792971, "loss": 0.004565556067973375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.39143095415977, "step_time": 1.121777503967285} +{"epoch": 0, "iter": 5060, "iter_tflops": 49.206792449151216, "iter_time": 0.41927328491210936, "loss": 0.005720391403883696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.701280739174756, "step_time": 0.3771592407226562} +{"epoch": 0, "iter": 5061, "iter_tflops": 15.981928086467589, "iter_time": 1.0070314331054688, "loss": 0.32638922333717346, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 17.11558402169896, "step_time": 0.9403303985595703} +{"epoch": 0, "iter": 5062, "iter_tflops": 18.959326978564196, "iter_time": 0.8488858261108397, "loss": 0.4978458881378174, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 20.768748813331957, "step_time": 0.7749289131164551} +{"epoch": 0, "iter": 5063, "iter_tflops": 28.20592378942981, "iter_time": 0.5706001358032227, "loss": 0.4174140989780426, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 29.958779573674313, "step_time": 0.5372149391174316} +{"epoch": 0, "iter": 5064, "iter_tflops": 28.359857875952535, "iter_time": 0.5675029830932617, "loss": 0.5045125484466553, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.185907254168093, "step_time": 0.5331727752685547} +{"epoch": 0, "iter": 5065, "iter_tflops": 24.475929182960737, "iter_time": 0.8429135971069337, "loss": 0.2317712903022766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.678123222438856, "step_time": 0.8034502105712892} +{"epoch": 0, "iter": 5066, "iter_tflops": 13.879821964781668, "iter_time": 1.4864090881347656, "loss": 0.17607374489307404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.586715376743523, "step_time": 1.2438323707580565} +{"epoch": 0, "iter": 5067, "iter_tflops": 41.65310659931093, "iter_time": 0.4953074378967285, "loss": 0.25778743624687195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48755035889923, "step_time": 0.4535547275543213} +{"epoch": 0, "iter": 5068, "iter_tflops": 41.06045909491744, "iter_time": 0.5024564743041993, "loss": 0.2373514026403427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.761132570317706, "step_time": 0.4609153594970703} +{"epoch": 0, "iter": 5069, "iter_tflops": 30.68251633384004, "iter_time": 0.6724055252075195, "loss": 0.186599001288414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.71883023201284, "step_time": 0.6118567390441895} +{"epoch": 0, "iter": 5070, "iter_tflops": 41.56677898458403, "iter_time": 0.4963361129760742, "loss": 0.21096022427082062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.47080552425363, "step_time": 0.44395816421508794} +{"epoch": 0, "iter": 5071, "iter_tflops": 48.143880806477966, "iter_time": 0.4285299224853516, "loss": 0.2836865186691284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43776506124419, "step_time": 0.393439603805542} +{"epoch": 0, "iter": 5072, "iter_tflops": 52.69733555207008, "iter_time": 0.3915016441345215, "loss": 0.19518010318279266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.2715041811748, "step_time": 0.3602331352233886} +{"epoch": 0, "iter": 5073, "iter_tflops": 2.645392814368496, "iter_time": 0.6369978637695313, "loss": 1.2497103214263916, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 2.814373271819601, "step_time": 0.5987512702941894} +{"epoch": 0, "iter": 5074, "iter_tflops": 1.2344379215043544, "iter_time": 1.365082473754883, "loss": 1.2150744199752808, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 1.7343108743090623, "step_time": 0.9716306324005126} +{"epoch": 0, "iter": 5075, "iter_tflops": 2.939582349904194, "iter_time": 0.5732479553222656, "loss": 0.1780591607093811, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.207501946854267, "step_time": 0.5253650970458984} +{"epoch": 0, "iter": 5076, "iter_tflops": 3.002459580960841, "iter_time": 0.5612430496215821, "loss": 0.10447550565004349, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.254464376472926, "step_time": 0.5177839965820312} +{"epoch": 0, "iter": 5077, "iter_tflops": 16.378112972001574, "iter_time": 1.2596746368408203, "loss": 0.772681474685669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.326507324319707, "step_time": 1.1907243118286133} +{"epoch": 0, "iter": 5078, "iter_tflops": 20.149333780300367, "iter_time": 1.0239094619750977, "loss": 0.7049180269241333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.386874416192448, "step_time": 0.8821654891967774} +{"epoch": 0, "iter": 5079, "iter_tflops": 48.9896857682969, "iter_time": 0.4211313705444336, "loss": 0.8032094240188599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.04824729232756, "step_time": 0.38891187858581544} +{"epoch": 0, "iter": 5080, "iter_tflops": 49.285410565872944, "iter_time": 0.41860447692871094, "loss": 0.743714451789856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.129830684863464, "step_time": 0.38831468582153317} +{"epoch": 0, "iter": 5081, "iter_tflops": 32.20571499228467, "iter_time": 0.6406034927368164, "loss": 0.10462628304958344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.416998155560734, "step_time": 0.5994448852539063} +{"epoch": 0, "iter": 5082, "iter_tflops": 17.37110247558627, "iter_time": 1.18766748046875, "loss": 0.06371837109327316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.403320124840953, "step_time": 1.0632764587402344} +{"epoch": 0, "iter": 5083, "iter_tflops": 50.47393416116918, "iter_time": 0.40874748229980473, "loss": 0.06897804141044617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.371091233699495, "step_time": 0.37259683799743654} +{"epoch": 0, "iter": 5084, "iter_tflops": 48.862042236020905, "iter_time": 0.422231502532959, "loss": 0.09416580200195312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.903414921659326, "step_time": 0.38997659301757814} +{"epoch": 0, "iter": 5085, "iter_tflops": 34.67845823084525, "iter_time": 0.5949253387451172, "loss": 0.0026017630007117987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.301862569154004, "step_time": 0.5530848083496094} +{"epoch": 0, "iter": 5086, "iter_tflops": 38.72623939230174, "iter_time": 0.5327419815063477, "loss": 0.6262489557266235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.642427181867056, "step_time": 0.48381611633300786} +{"epoch": 0, "iter": 5087, "iter_tflops": 46.02908345756066, "iter_time": 0.4482186470031738, "loss": 0.8860529661178589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30897251705457, "step_time": 0.4100877532958984} +{"epoch": 0, "iter": 5088, "iter_tflops": 40.9212415088068, "iter_time": 0.5041658744812012, "loss": 0.6345326900482178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57860946048303, "step_time": 0.46280253601074217} +{"epoch": 0, "iter": 5089, "iter_tflops": 18.141252305454806, "iter_time": 1.1372474822998049, "loss": 0.9735742211341858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.44543974792592, "step_time": 1.0609733581542968} +{"epoch": 0, "iter": 5090, "iter_tflops": 18.331197317587332, "iter_time": 1.1254635009765623, "loss": 0.9408844113349915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.98762528782464, "step_time": 0.9383047618865966} +{"epoch": 0, "iter": 5091, "iter_tflops": 31.362383385509034, "iter_time": 0.6578292617797852, "loss": 0.9357295036315918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.16316448215709, "step_time": 0.6038987846374512} +{"epoch": 0, "iter": 5092, "iter_tflops": 37.10317659690913, "iter_time": 0.5560465545654297, "loss": 0.8763498663902283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2645995566046, "step_time": 0.5123878974914552} +{"epoch": 0, "iter": 5093, "iter_tflops": 34.91943656820237, "iter_time": 0.5908197708129883, "loss": 0.4317578077316284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.543819266174026, "step_time": 0.5352633419036865} +{"epoch": 0, "iter": 5094, "iter_tflops": 40.075925321437346, "iter_time": 0.514800178527832, "loss": 0.36594846844673157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.490303177150835, "step_time": 0.46372112655639647} +{"epoch": 0, "iter": 5095, "iter_tflops": 36.65282076325502, "iter_time": 0.5628787384033203, "loss": 0.5372569561004639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23253619380982, "step_time": 0.5127962455749511} +{"epoch": 0, "iter": 5096, "iter_tflops": 38.83268627324985, "iter_time": 0.5312816467285157, "loss": 0.4180242121219635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.435564602738886, "step_time": 0.486174596786499} +{"epoch": 0, "iter": 5097, "iter_tflops": 35.35325574457145, "iter_time": 0.5835698318481445, "loss": 0.05281064286828041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.81112467603486, "step_time": 0.5315768013000489} +{"epoch": 0, "iter": 5098, "iter_tflops": 7.525525475608108, "iter_time": 2.7414821166992187, "loss": 0.026570342481136322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.708946835187895, "step_time": 2.1249568939208987} +{"epoch": 0, "iter": 5099, "iter_tflops": 11.91788918962182, "iter_time": 1.7311029815673826, "loss": 0.021910393610596657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.901122918175352, "step_time": 1.4841314353942874} +{"epoch": 0, "iter": 5100, "iter_tflops": 33.5201557743143, "iter_time": 0.6154832229614258, "loss": 0.039990514516830444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.399082223831336, "step_time": 0.4983466396331787} +{"epoch": 0, "iter": 5101, "iter_tflops": 19.313483030301832, "iter_time": 0.807872688293457, "loss": 0.2720324993133545, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 21.06379247456533, "step_time": 0.7407419853210448} +{"epoch": 0, "iter": 5102, "iter_tflops": 22.674710647675926, "iter_time": 0.6881161880493164, "loss": 0.3747648000717163, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.33818020357036, "step_time": 0.6410847206115723} +{"epoch": 0, "iter": 5103, "iter_tflops": 24.005656396351284, "iter_time": 0.6499649581909179, "loss": 0.36994192004203796, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.838580002885355, "step_time": 0.6038580856323242} +{"epoch": 0, "iter": 5104, "iter_tflops": 24.16912469906206, "iter_time": 0.6455689086914062, "loss": 0.32264116406440735, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.884505779312782, "step_time": 0.6027866859436035} +{"epoch": 0, "iter": 5105, "iter_tflops": 17.693041137005324, "iter_time": 1.1660569458007812, "loss": 0.9744179248809814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.01507621107937, "step_time": 1.0849861068725586} +{"epoch": 0, "iter": 5106, "iter_tflops": 21.307893542063596, "iter_time": 0.9682371215820312, "loss": 0.9979395270347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.735240369294416, "step_time": 0.8692178039550781} +{"epoch": 0, "iter": 5107, "iter_tflops": 45.62174557335556, "iter_time": 0.4522206077575684, "loss": 1.1507583856582642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25694235539744, "step_time": 0.4188464107513427} +{"epoch": 0, "iter": 5108, "iter_tflops": 45.79628358991133, "iter_time": 0.4504971122741699, "loss": 1.0068857669830322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.536000505618496, "step_time": 0.4164868640899658} +{"epoch": 0, "iter": 5109, "iter_tflops": 41.70931603051912, "iter_time": 0.49463993835449216, "loss": 0.5500277280807495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.335203198371765, "step_time": 0.45507888031005855} +{"epoch": 0, "iter": 5110, "iter_tflops": 46.029155146846016, "iter_time": 0.4482179489135742, "loss": 0.7496114373207092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.68668336784057, "step_time": 0.41522380065917963} +{"epoch": 0, "iter": 5111, "iter_tflops": 49.440977917362254, "iter_time": 0.41728732681274416, "loss": 0.6782150864601135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.391492590824505, "step_time": 0.3864116268157959} +{"epoch": 0, "iter": 5112, "iter_tflops": 45.50951209213203, "iter_time": 0.4533358535766601, "loss": 0.5985314846038818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.9853362072717, "step_time": 0.4211687641143799} +{"epoch": 0, "iter": 5113, "iter_tflops": 33.86039363254592, "iter_time": 0.6092986907958985, "loss": 0.15898381173610687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.16726897299584, "step_time": 0.5704354820251465} +{"epoch": 0, "iter": 5114, "iter_tflops": 29.7541233236653, "iter_time": 0.6933860321044922, "loss": 0.14839139580726624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.01525136012845, "step_time": 0.5287956066131592} +{"epoch": 0, "iter": 5115, "iter_tflops": 49.74075419925278, "iter_time": 0.41477243041992184, "loss": 0.13590875267982483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.165946921360316, "step_time": 0.3808867874145508} +{"epoch": 0, "iter": 5116, "iter_tflops": 51.754610661593446, "iter_time": 0.3986329574584961, "loss": 0.12401796877384186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.944417115913986, "step_time": 0.36877841567993164} +{"epoch": 0, "iter": 5117, "iter_tflops": 25.8435773615978, "iter_time": 0.7983064117431642, "loss": 0.6705253720283508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.192803345741286, "step_time": 0.7586968231201172} +{"epoch": 0, "iter": 5118, "iter_tflops": 16.931062216559692, "iter_time": 1.2185350952148437, "loss": 0.7415085434913635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.676992673048215, "step_time": 0.9977801818847654} +{"epoch": 0, "iter": 5119, "iter_tflops": 36.28457855367766, "iter_time": 0.5685912399291991, "loss": 0.7730535268783569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.782330578469484, "step_time": 0.5185994186401367} +{"epoch": 0, "iter": 5120, "iter_tflops": 40.52847906625459, "iter_time": 0.5090517578125, "loss": 0.6135878562927246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35621072237634, "step_time": 0.46512299346923835} +{"epoch": 0, "iter": 5121, "iter_tflops": 31.90866238384401, "iter_time": 0.6465671691894531, "loss": 0.05112363025546074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.089719511612664, "step_time": 0.5879526481628419} +{"epoch": 0, "iter": 5122, "iter_tflops": 10.672036177159342, "iter_time": 1.9331918640136718, "loss": 0.032174594700336456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.860820081328997, "step_time": 1.4884468154907229} +{"epoch": 0, "iter": 5123, "iter_tflops": 11.964554823499304, "iter_time": 1.7243511199951174, "loss": 0.0421685092151165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.333546252714045, "step_time": 1.5473073043823242} +{"epoch": 0, "iter": 5124, "iter_tflops": 19.82607226664323, "iter_time": 1.0406041717529297, "loss": 0.04361725598573685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.308046210817725, "step_time": 0.8487351608276368} +{"epoch": 0, "iter": 5125, "iter_tflops": 20.70697149629724, "iter_time": 0.7475779113769532, "loss": 0.37383028864860535, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 22.59183519839188, "step_time": 0.6852065963745118} +{"epoch": 0, "iter": 5126, "iter_tflops": 20.294892310069823, "iter_time": 0.7627571640014648, "loss": 0.36088502407073975, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 21.90313512047657, "step_time": 0.7067515411376952} +{"epoch": 0, "iter": 5127, "iter_tflops": 22.78493738888877, "iter_time": 0.6793994750976563, "loss": 0.47639375925064087, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.51220913681669, "step_time": 0.6315250663757324} +{"epoch": 0, "iter": 5128, "iter_tflops": 24.954692695298803, "iter_time": 0.6203271942138672, "loss": 0.4759996235370636, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.7277812325497, "step_time": 0.5791754417419434} +{"epoch": 0, "iter": 5129, "iter_tflops": 18.068139887528123, "iter_time": 1.1418493347167968, "loss": 0.8628635406494141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.17380544868426, "step_time": 1.0760041122436523} +{"epoch": 0, "iter": 5130, "iter_tflops": 20.094677148142587, "iter_time": 1.0266944503784179, "loss": 0.7476567029953003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.699067733834024, "step_time": 0.9088960723876953} +{"epoch": 0, "iter": 5131, "iter_tflops": 37.75490202803125, "iter_time": 0.5464480743408203, "loss": 0.7654052972793579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.07055570910228, "step_time": 0.5023329524993896} +{"epoch": 0, "iter": 5132, "iter_tflops": 36.46940267286956, "iter_time": 0.5657096633911133, "loss": 0.8060568571090698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7416736838963, "step_time": 0.5191299610137939} +{"epoch": 0, "iter": 5133, "iter_tflops": 33.78614550666006, "iter_time": 0.6106376800537109, "loss": 0.1866723597049713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32415223046628, "step_time": 0.5527545108795167} +{"epoch": 0, "iter": 5134, "iter_tflops": 40.68454666461203, "iter_time": 0.5070990142822266, "loss": 0.2075868546962738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.644138005267905, "step_time": 0.46212323570251457} +{"epoch": 0, "iter": 5135, "iter_tflops": 42.401057451701355, "iter_time": 0.48657025909423823, "loss": 0.1756363958120346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25905188641428, "step_time": 0.4459904098510742} +{"epoch": 0, "iter": 5136, "iter_tflops": 38.81302842687404, "iter_time": 0.5315507278442383, "loss": 0.2992362976074219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08950139641074, "step_time": 0.49017196273803715} +{"epoch": 0, "iter": 5137, "iter_tflops": 21.954172773759904, "iter_time": 0.9397344970703124, "loss": 0.885888397693634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.607462131957902, "step_time": 0.8739225502014161} +{"epoch": 0, "iter": 5138, "iter_tflops": 16.960285385223848, "iter_time": 1.2164355163574219, "loss": 0.8667829036712646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.32327184024908, "step_time": 0.9241966705322264} +{"epoch": 0, "iter": 5139, "iter_tflops": 36.79450958751769, "iter_time": 0.5607111968994141, "loss": 1.0373438596725464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.75435614419187, "step_time": 0.5189643478393555} +{"epoch": 0, "iter": 5140, "iter_tflops": 37.06683659792623, "iter_time": 0.5565916976928711, "loss": 1.0739554166793823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.334253621693726, "step_time": 0.511503044128418} +{"epoch": 0, "iter": 5141, "iter_tflops": 21.295990929964862, "iter_time": 0.9687782821655273, "loss": 0.9198050498962402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.88705225905644, "step_time": 0.9014307861328126} +{"epoch": 0, "iter": 5142, "iter_tflops": 22.362976287205235, "iter_time": 0.9225558013916015, "loss": 0.8964397311210632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.03212706033151, "step_time": 0.7632064418792724} +{"epoch": 0, "iter": 5143, "iter_tflops": 45.11682889619364, "iter_time": 0.457281551361084, "loss": 0.6206423044204712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71869340135322, "step_time": 0.4234738674163818} +{"epoch": 0, "iter": 5144, "iter_tflops": 44.50745749234378, "iter_time": 0.4635423965454102, "loss": 0.7464865446090698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.60385957262314, "step_time": 0.4333911933898926} +{"epoch": 0, "iter": 5145, "iter_tflops": 23.854242411636672, "iter_time": 0.8648815231323241, "loss": 0.0030909741763025522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.04207950932254, "step_time": 0.8238570404052734} +{"epoch": 0, "iter": 5146, "iter_tflops": 22.52911038523456, "iter_time": 0.9157526931762694, "loss": 0.003346079494804144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.75365242392588, "step_time": 0.7433649883270264} +{"epoch": 0, "iter": 5147, "iter_tflops": 48.30143586814889, "iter_time": 0.427132095336914, "loss": 0.017308378592133522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.61727291233294, "step_time": 0.38478446197509764} +{"epoch": 0, "iter": 5148, "iter_tflops": 42.84191022422464, "iter_time": 0.48156334304809567, "loss": 0.003726343857124448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32281153632682, "step_time": 0.4359650840759278} +{"epoch": 0, "iter": 5149, "iter_tflops": 23.857297554888685, "iter_time": 0.864770767211914, "loss": 0.4173603951931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.559298741841225, "step_time": 0.8071854286193847} +{"epoch": 0, "iter": 5150, "iter_tflops": 7.645569053796687, "iter_time": 2.6984379272460934, "loss": 0.38333266973495483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.686746142500674, "step_time": 2.129827003479004} +{"epoch": 0, "iter": 5151, "iter_tflops": 15.049887441137303, "iter_time": 1.3708470306396485, "loss": 0.38943618535995483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.385581682113237, "step_time": 1.0642493915557862} +{"epoch": 0, "iter": 5152, "iter_tflops": 37.5266166411674, "iter_time": 0.5497722778320312, "loss": 0.3838719129562378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68084590104912, "step_time": 0.5071451454162598} +{"epoch": 0, "iter": 5153, "iter_tflops": 23.132066715896134, "iter_time": 0.7667900924682617, "loss": 0.43552929162979126, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 24.889050795526767, "step_time": 0.7126603469848634} +{"epoch": 0, "iter": 5154, "iter_tflops": 9.867342217938045, "iter_time": 1.7975903930664061, "loss": 0.5048196911811829, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 11.140511056218282, "step_time": 1.592156723022461} +{"epoch": 0, "iter": 5155, "iter_tflops": 12.823605991305318, "iter_time": 1.3831865692138672, "loss": 0.4103206992149353, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 15.06811860582905, "step_time": 1.1771502494812012} +{"epoch": 0, "iter": 5156, "iter_tflops": 30.780741816213652, "iter_time": 0.576251205444336, "loss": 0.3441525101661682, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 32.75485383468205, "step_time": 0.5415209503173828} +{"epoch": 0, "iter": 5157, "iter_tflops": 20.638347316161155, "iter_time": 0.7758540496826172, "loss": 0.30753540992736816, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 21.907150073741647, "step_time": 0.7309186859130858} +{"epoch": 0, "iter": 5158, "iter_tflops": 17.38312947686885, "iter_time": 0.9211428451538086, "loss": 0.3510649502277374, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 20.810452835560426, "step_time": 0.7694376220703125} +{"epoch": 0, "iter": 5159, "iter_tflops": 28.447924185945386, "iter_time": 0.5628651580810546, "loss": 0.5566352009773254, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.29885783357555, "step_time": 0.5284801635742188} +{"epoch": 0, "iter": 5160, "iter_tflops": 29.027419198726832, "iter_time": 0.5516282806396485, "loss": 0.3677028715610504, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.83806286497599, "step_time": 0.5192396621704103} +{"epoch": 0, "iter": 5161, "iter_tflops": 24.337663031164947, "iter_time": 0.8477023239135743, "loss": 0.004129483364522457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.18275761284695, "step_time": 0.7879648818969727} +{"epoch": 0, "iter": 5162, "iter_tflops": 13.927505574803845, "iter_time": 1.481320068359375, "loss": 0.019477466121315956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.082029285127415, "step_time": 1.2077659606933593} +{"epoch": 0, "iter": 5163, "iter_tflops": 43.694625046158755, "iter_time": 0.4721654777526855, "loss": 0.0036515092942863703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.04366563928591, "step_time": 0.3494209461212159} +{"epoch": 0, "iter": 5164, "iter_tflops": 57.310144684503975, "iter_time": 0.35999025344848634, "loss": 0.005729569587856531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.64793090444358, "step_time": 0.32931803512573243} +{"epoch": 0, "iter": 5165, "iter_tflops": 54.45233884768367, "iter_time": 0.37888351440429685, "loss": 0.029110567644238472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.32948167543133, "step_time": 0.3419736576080322} +{"epoch": 0, "iter": 5166, "iter_tflops": 18.668450194528436, "iter_time": 1.1051315612792971, "loss": 0.005697394255548716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.051300797490686, "step_time": 1.0289154663085938} +{"epoch": 0, "iter": 5167, "iter_tflops": 8.65277916704556, "iter_time": 2.384331451416016, "loss": 0.0018688987474888563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.875895114025784, "step_time": 1.8969559097290039} +{"epoch": 0, "iter": 5168, "iter_tflops": 14.502306967986017, "iter_time": 1.422607696533203, "loss": 0.0038944564294070005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.75302505094091, "step_time": 1.162117073059082} +{"epoch": 0, "iter": 5169, "iter_tflops": 13.148757573384737, "iter_time": 1.217783905029297, "loss": 0.4201165437698364, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 13.987217807738242, "step_time": 1.1447841567993164} +{"epoch": 0, "iter": 5170, "iter_tflops": 14.11775552131475, "iter_time": 1.1341990814208984, "loss": 0.43892529606819153, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 16.635487528941606, "step_time": 0.9625413932800293} +{"epoch": 0, "iter": 5171, "iter_tflops": 25.82369384860065, "iter_time": 0.6200640945434571, "loss": 0.2914677560329437, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.472521608362957, "step_time": 0.5828494949340821} +{"epoch": 0, "iter": 5172, "iter_tflops": 28.308913026920855, "iter_time": 0.5656291122436523, "loss": 0.44048571586608887, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.164872734688565, "step_time": 0.5308275451660156} +{"epoch": 0, "iter": 5173, "iter_tflops": 22.770042981848885, "iter_time": 0.4003000831604004, "loss": 0.013326406478881836, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 25.360246047380738, "step_time": 0.35941489219665523} +{"epoch": 0, "iter": 5174, "iter_tflops": 24.10571383635803, "iter_time": 0.3781198997497558, "loss": 0.004978957120329142, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 26.670743931128282, "step_time": 0.34175462532043455} +{"epoch": 0, "iter": 5175, "iter_tflops": 25.097447997255323, "iter_time": 0.3631783638000488, "loss": 0.006104431115090847, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 27.58575587075446, "step_time": 0.33041871833801273} +{"epoch": 0, "iter": 5176, "iter_tflops": 24.999067159678113, "iter_time": 0.364607608795166, "loss": 0.00545781385153532, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 27.502003105149445, "step_time": 0.3314249534606934} +{"epoch": 0, "iter": 5177, "iter_tflops": 31.156144969026577, "iter_time": 0.6621837692260742, "loss": 0.1971403807401657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.08712064901565, "step_time": 0.6235384979248046} +{"epoch": 0, "iter": 5178, "iter_tflops": 12.529084923138415, "iter_time": 1.6466560516357422, "loss": 0.23699018359184265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.7798702858834, "step_time": 1.3074311218261718} +{"epoch": 0, "iter": 5179, "iter_tflops": 40.49733118647567, "iter_time": 0.509443286895752, "loss": 0.1297285258769989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52042729004595, "step_time": 0.463407356262207} +{"epoch": 0, "iter": 5180, "iter_tflops": 43.534485065667695, "iter_time": 0.47390232086181644, "loss": 0.27202776074409485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.627872658830725, "step_time": 0.433172685623169} +{"epoch": 0, "iter": 5181, "iter_tflops": 14.153545973633575, "iter_time": 1.4576625213623045, "loss": 0.4152977168560028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.101837253865225, "step_time": 1.366131362915039} +{"epoch": 0, "iter": 5182, "iter_tflops": 18.429997874142686, "iter_time": 1.1194300537109374, "loss": 0.41712629795074463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.817973537708575, "step_time": 0.904159761428833} +{"epoch": 0, "iter": 5183, "iter_tflops": 49.13563093241872, "iter_time": 0.4198805046081543, "loss": 0.48451733589172363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43610422917787, "step_time": 0.3860890274047851} +{"epoch": 0, "iter": 5184, "iter_tflops": 49.89203433812581, "iter_time": 0.413514778137207, "loss": 0.39712685346603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.812609888101626, "step_time": 0.3833877143859863} +{"epoch": 0, "iter": 5185, "iter_tflops": 32.47292600857878, "iter_time": 0.6353321380615234, "loss": 0.5065200924873352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.659011497786786, "step_time": 0.5952591438293456} +{"epoch": 0, "iter": 5186, "iter_tflops": 15.337932094142715, "iter_time": 1.3451026763916016, "loss": 0.5960841774940491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.767966990871184, "step_time": 1.0992716217041014} +{"epoch": 0, "iter": 5187, "iter_tflops": 47.53619686652529, "iter_time": 0.4340080795288086, "loss": 0.5993456840515137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55569598882274, "step_time": 0.40017098236083987} +{"epoch": 0, "iter": 5188, "iter_tflops": 45.81087540337603, "iter_time": 0.4503536186218262, "loss": 0.5576039552688599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.85569822880779, "step_time": 0.4138161582946777} +{"epoch": 0, "iter": 5189, "iter_tflops": 35.044969992322905, "iter_time": 0.5887034149169921, "loss": 0.040097396820783615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72282442644734, "step_time": 0.5469127464294434} +{"epoch": 0, "iter": 5190, "iter_tflops": 9.847314436470887, "iter_time": 2.095098480224609, "loss": 0.07429578900337219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.273885888427946, "step_time": 1.8299895629882814} +{"epoch": 0, "iter": 5191, "iter_tflops": 12.184842662996797, "iter_time": 1.6931768493652344, "loss": 0.06913245469331741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.90298268015211, "step_time": 1.3843600273132326} +{"epoch": 0, "iter": 5192, "iter_tflops": 17.81703905384105, "iter_time": 1.1579417572021484, "loss": 0.040217556059360504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.453914421034575, "step_time": 0.8796439323425292} +{"epoch": 0, "iter": 5193, "iter_tflops": 15.104419229126576, "iter_time": 1.0628228759765626, "loss": 0.4588029682636261, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 16.121681130578008, "step_time": 0.9957598190307617} +{"epoch": 0, "iter": 5194, "iter_tflops": 11.219171533092563, "iter_time": 1.4308830413818359, "loss": 0.28290241956710815, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.092675240367466, "step_time": 1.139125255584717} +{"epoch": 0, "iter": 5195, "iter_tflops": 25.15449501309536, "iter_time": 0.6381890106201172, "loss": 0.3880475163459778, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.068553605337037, "step_time": 0.593061695098877} +{"epoch": 0, "iter": 5196, "iter_tflops": 24.026569623952533, "iter_time": 0.6681487426757812, "loss": 0.29674357175827026, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 25.874900629757164, "step_time": 0.6204206352233888} +{"epoch": 0, "iter": 5197, "iter_tflops": 14.416338248957167, "iter_time": 1.4310911102294923, "loss": 0.007706985343247652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.522652713900468, "step_time": 1.3290958633422851} +{"epoch": 0, "iter": 5198, "iter_tflops": 25.32974923953479, "iter_time": 0.814500503540039, "loss": 0.005321396514773369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.750574615414035, "step_time": 0.6497864608764649} +{"epoch": 0, "iter": 5199, "iter_tflops": 52.76851224918381, "iter_time": 0.3909735679626465, "loss": 0.018596580252051353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.13943262525861, "step_time": 0.354855432510376} +{"epoch": 0, "iter": 5200, "iter_tflops": 56.41758682846817, "iter_time": 0.36568550109863285, "loss": 0.023556353524327278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.13853686296496, "step_time": 0.33201769065856934} +{"epoch": 0, "iter": 5201, "iter_tflops": 28.577900524322924, "iter_time": 0.7219247436523438, "loss": 0.07259657233953476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.401709021739812, "step_time": 0.678616241455078} +{"epoch": 0, "iter": 5202, "iter_tflops": 14.504916178367234, "iter_time": 1.422351791381836, "loss": 0.1053733080625534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.07611666214327, "step_time": 1.2833381309509275} +{"epoch": 0, "iter": 5203, "iter_tflops": 40.74149429873792, "iter_time": 0.5063902015686036, "loss": 0.12914413213729858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.631418927017926, "step_time": 0.3995840892791748} +{"epoch": 0, "iter": 5204, "iter_tflops": 49.215871259156415, "iter_time": 0.4191959419250488, "loss": 0.19254451990127563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.01216528512496, "step_time": 0.38917658615112305} +{"epoch": 0, "iter": 5205, "iter_tflops": 35.65128247516119, "iter_time": 0.5786914825439454, "loss": 0.6023094654083252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39749600201506, "step_time": 0.5373030967712402} +{"epoch": 0, "iter": 5206, "iter_tflops": 14.793877784736718, "iter_time": 1.3945696868896482, "loss": 0.5897449254989624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.162406035585565, "step_time": 1.1359229316711426} +{"epoch": 0, "iter": 5207, "iter_tflops": 41.724023562376715, "iter_time": 0.49446557998657226, "loss": 0.48360732197761536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.986525210451916, "step_time": 0.4586060695648193} +{"epoch": 0, "iter": 5208, "iter_tflops": 45.095228831416726, "iter_time": 0.4575005836486816, "loss": 0.733377993106842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.30951930175788, "step_time": 0.42706062507629394} +{"epoch": 0, "iter": 5209, "iter_tflops": 35.25763610042808, "iter_time": 0.5851524887084961, "loss": 0.9252195954322815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.760370171806514, "step_time": 0.5463689422607423} +{"epoch": 0, "iter": 5210, "iter_tflops": 9.615711471115228, "iter_time": 2.145560791015625, "loss": 0.9530843496322632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.971307860402801, "step_time": 1.7233784103393555} +{"epoch": 0, "iter": 5211, "iter_tflops": 11.464713354815276, "iter_time": 1.7995298156738282, "loss": 0.765937328338623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.7171678809827, "step_time": 1.6223025207519532} +{"epoch": 0, "iter": 5212, "iter_tflops": 17.251080927073776, "iter_time": 1.1959304809570312, "loss": 0.9303866624832153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.08274333718547, "step_time": 0.9785772743225097} +{"epoch": 0, "iter": 5213, "iter_tflops": 12.221362715020344, "iter_time": 1.2298394775390626, "loss": 0.3930741250514984, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.019397422974235, "step_time": 1.154455451965332} +{"epoch": 0, "iter": 5214, "iter_tflops": 15.110855569972795, "iter_time": 0.9946699752807616, "loss": 0.34356364607810974, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 19.80385357857272, "step_time": 0.758959072113037} +{"epoch": 0, "iter": 5215, "iter_tflops": 27.245427600241776, "iter_time": 0.5516637344360351, "loss": 0.3545953333377838, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.02147126597504, "step_time": 0.5179032516479493} +{"epoch": 0, "iter": 5216, "iter_tflops": 26.40438627811864, "iter_time": 0.5692355117797852, "loss": 0.3349195420742035, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.027094989151443, "step_time": 0.5362779960632325} +{"epoch": 0, "iter": 5217, "iter_tflops": 27.124317682005135, "iter_time": 0.7606124420166015, "loss": 0.8775953650474548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.650189598896258, "step_time": 0.7201032104492187} +{"epoch": 0, "iter": 5218, "iter_tflops": 9.709701028422476, "iter_time": 2.124791839599609, "loss": 1.015317440032959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.731834405372055, "step_time": 1.7585564880371092} +{"epoch": 0, "iter": 5219, "iter_tflops": 10.994235904916058, "iter_time": 1.8765372772216797, "loss": 0.687254786491394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.628373943902275, "step_time": 1.6337094230651856} +{"epoch": 0, "iter": 5220, "iter_tflops": 26.38177422728982, "iter_time": 0.7820206985473632, "loss": 0.674988329410553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.46710311483432, "step_time": 0.7001398620605468} +{"epoch": 0, "iter": 5221, "iter_tflops": 13.557227903371034, "iter_time": 1.1237312011718752, "loss": 0.36892572045326233, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.535385793031164, "step_time": 1.0481097793579102} +{"epoch": 0, "iter": 5222, "iter_tflops": 12.158895266709155, "iter_time": 1.2529658050537111, "loss": 0.23493225872516632, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.477319638906838, "step_time": 1.052313575744629} +{"epoch": 0, "iter": 5223, "iter_tflops": 27.10262219121803, "iter_time": 0.5621109237670898, "loss": 0.5885123014450073, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.82700956585177, "step_time": 0.528486312866211} +{"epoch": 0, "iter": 5224, "iter_tflops": 27.428634903569325, "iter_time": 0.5554297561645507, "loss": 0.5121245980262756, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.246164252828518, "step_time": 0.5209120712280273} +{"epoch": 0, "iter": 5225, "iter_tflops": 24.37644478251414, "iter_time": 0.8140394210815429, "loss": 0.06275324523448944, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 25.675550282091766, "step_time": 0.7728514785766603} +{"epoch": 0, "iter": 5226, "iter_tflops": 13.52381127158149, "iter_time": 1.467292510986328, "loss": 0.10591915249824524, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 17.50824631891825, "step_time": 1.1333737621307374} +{"epoch": 0, "iter": 5227, "iter_tflops": 40.05368761940511, "iter_time": 0.49541972732543943, "loss": 0.14047959446907043, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 44.10848450569888, "step_time": 0.4498768711090088} +{"epoch": 0, "iter": 5228, "iter_tflops": 38.03077725937683, "iter_time": 0.5217717971801757, "loss": 0.09507235884666443, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 41.721617182644394, "step_time": 0.4756140422821045} +{"epoch": 0, "iter": 5229, "iter_tflops": 19.701504234952612, "iter_time": 1.0471836700439454, "loss": 0.08232779800891876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.020981132798937, "step_time": 0.9814524536132813} +{"epoch": 0, "iter": 5230, "iter_tflops": 22.968752723071624, "iter_time": 0.8982243728637695, "loss": 0.07613623887300491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.11977216945746, "step_time": 0.642317554473877} +{"epoch": 0, "iter": 5231, "iter_tflops": 51.07510591888994, "iter_time": 0.4039363822937012, "loss": 0.08335054665803909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73353428850785, "step_time": 0.3701737880706787} +{"epoch": 0, "iter": 5232, "iter_tflops": 52.41001598114365, "iter_time": 0.3936479148864746, "loss": 0.06749865412712097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.28744230709717, "step_time": 0.3601329135894776} +{"epoch": 0, "iter": 5233, "iter_tflops": 30.073951539289425, "iter_time": 0.6860120620727539, "loss": 0.7380435466766357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14677451650989, "step_time": 0.6417780265808105} +{"epoch": 0, "iter": 5234, "iter_tflops": 34.97903015802694, "iter_time": 0.5898131942749023, "loss": 0.7834606170654297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21849294537732, "step_time": 0.5398196506500245} +{"epoch": 0, "iter": 5235, "iter_tflops": 36.66720035142033, "iter_time": 0.5626579971313477, "loss": 0.8687318563461304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.98175597243399, "step_time": 0.5160126914978027} +{"epoch": 0, "iter": 5236, "iter_tflops": 36.30904851990695, "iter_time": 0.5682080459594726, "loss": 0.7970284819602966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.559736917170746, "step_time": 0.5215174598693847} +{"epoch": 0, "iter": 5237, "iter_tflops": 20.567589789937866, "iter_time": 1.0030875625610352, "loss": 0.4677046835422516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.161751306673445, "step_time": 0.9309324531555176} +{"epoch": 0, "iter": 5238, "iter_tflops": 15.003574672429405, "iter_time": 1.3750785369873049, "loss": 0.5876829624176025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.879040936466104, "step_time": 1.1539261856079102} +{"epoch": 0, "iter": 5239, "iter_tflops": 37.087366076542125, "iter_time": 0.5562835998535156, "loss": 0.6138269305229187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.820818923040626, "step_time": 0.5054061641693115} +{"epoch": 0, "iter": 5240, "iter_tflops": 36.67180342703924, "iter_time": 0.5625873718261719, "loss": 0.6742079257965088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.15936747185811, "step_time": 0.5137305393218994} +{"epoch": 0, "iter": 5241, "iter_tflops": 18.436153983641105, "iter_time": 1.1190562591552735, "loss": 0.010852120816707611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.87339792579022, "step_time": 1.038126121520996} +{"epoch": 0, "iter": 5242, "iter_tflops": 15.264754385331388, "iter_time": 1.3515509643554688, "loss": 0.009705989621579647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.45813748464642, "step_time": 1.0602810020446776} +{"epoch": 0, "iter": 5243, "iter_tflops": 42.08168179783054, "iter_time": 0.49026304626464845, "loss": 0.007049591280519962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71034576615572, "step_time": 0.4416814556121826} +{"epoch": 0, "iter": 5244, "iter_tflops": 50.752642739725054, "iter_time": 0.4065028419494629, "loss": 0.004588191397488117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92782211250928, "step_time": 0.36888784027099614} +{"epoch": 0, "iter": 5245, "iter_tflops": 16.935430424664286, "iter_time": 1.2182207946777344, "loss": 0.7130538821220398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.075627533963495, "step_time": 1.141376335144043} +{"epoch": 0, "iter": 5246, "iter_tflops": 22.133912400438398, "iter_time": 0.9321033325195313, "loss": 0.5387229919433594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.892079934862473, "step_time": 0.6901859474182129} +{"epoch": 0, "iter": 5247, "iter_tflops": 49.19730973389056, "iter_time": 0.41935409927368167, "loss": 0.7165015339851379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.396603975082584, "step_time": 0.38637463760375973} +{"epoch": 0, "iter": 5248, "iter_tflops": 50.38091744250348, "iter_time": 0.40950214004516605, "loss": 0.5761054754257202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.24675268408065, "step_time": 0.38031941986083984} +{"epoch": 0, "iter": 5249, "iter_tflops": 31.778703470218744, "iter_time": 0.6492113037109376, "loss": 0.01482659112662077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.81554998297874, "step_time": 0.6101066970825195} +{"epoch": 0, "iter": 5250, "iter_tflops": 12.211179157874723, "iter_time": 1.6895250854492188, "loss": 0.013054641894996166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.880739920367319, "step_time": 1.2991267166137694} +{"epoch": 0, "iter": 5251, "iter_tflops": 36.167296303524616, "iter_time": 0.5704350509643554, "loss": 0.008816401474177837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.36088403152741, "step_time": 0.4356146202087403} +{"epoch": 0, "iter": 5252, "iter_tflops": 45.09815926523658, "iter_time": 0.4574708557128907, "loss": 0.011634397320449352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.773019426381126, "step_time": 0.41450355529785154} +{"epoch": 0, "iter": 5253, "iter_tflops": 25.98069557905891, "iter_time": 0.7940931930541992, "loss": 0.08109723031520844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.00488143416176, "step_time": 0.7366963348388672} +{"epoch": 0, "iter": 5254, "iter_tflops": 7.874397683418248, "iter_time": 2.6200217895507816, "loss": 0.0751824602484703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.100951606704276, "step_time": 2.2669160766601566} +{"epoch": 0, "iter": 5255, "iter_tflops": 12.167581248042662, "iter_time": 1.6955788574218749, "loss": 0.11286020278930664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.192643514127273, "step_time": 1.3579660110473633} +{"epoch": 0, "iter": 5256, "iter_tflops": 49.89768647216271, "iter_time": 0.4134679374694824, "loss": 0.09446444362401962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.31076465986201, "step_time": 0.379871166229248} +{"epoch": 0, "iter": 5257, "iter_tflops": 28.780201859079693, "iter_time": 0.5663366470336915, "loss": 0.34562158584594727, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.902851073930474, "step_time": 0.527436222076416} +{"epoch": 0, "iter": 5258, "iter_tflops": 27.354727129084228, "iter_time": 0.5958488616943359, "loss": 0.590786874294281, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.039852748932795, "step_time": 0.5612729225158691} +{"epoch": 0, "iter": 5259, "iter_tflops": 28.858280020333854, "iter_time": 0.5648043823242188, "loss": 0.28477999567985535, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.749845594210182, "step_time": 0.5300606460571289} +{"epoch": 0, "iter": 5260, "iter_tflops": 29.273230106575443, "iter_time": 0.5567982406616211, "loss": 0.5070170164108276, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 31.152580715489567, "step_time": 0.5232081146240234} +{"epoch": 0, "iter": 5261, "iter_tflops": 40.72641247087638, "iter_time": 0.5065777282714844, "loss": 0.3479040265083313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.377118658429694, "step_time": 0.46490385437011716} +{"epoch": 0, "iter": 5262, "iter_tflops": 35.78841321981337, "iter_time": 0.576474105834961, "loss": 0.2217164784669876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.369921651379215, "step_time": 0.5240318660736084} +{"epoch": 0, "iter": 5263, "iter_tflops": 39.824430641799104, "iter_time": 0.5180511856079101, "loss": 0.3972375690937042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66667792145191, "step_time": 0.4724676685333252} +{"epoch": 0, "iter": 5264, "iter_tflops": 45.26929992577368, "iter_time": 0.4557413864135742, "loss": 0.2038760930299759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.58830758283492, "step_time": 0.4160475425720215} +{"epoch": 0, "iter": 5265, "iter_tflops": 20.125963338582864, "iter_time": 1.0250984344482421, "loss": 0.7976177334785461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.160546786815907, "step_time": 0.9749792251586914} +{"epoch": 0, "iter": 5266, "iter_tflops": 15.178614635073103, "iter_time": 1.3592211151123046, "loss": 0.7799943685531616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.403446647170554, "step_time": 1.0632695255279543} +{"epoch": 0, "iter": 5267, "iter_tflops": 45.070258702630326, "iter_time": 0.4577540512084961, "loss": 1.0799392461776733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42538127510238, "step_time": 0.4260388450622558} +{"epoch": 0, "iter": 5268, "iter_tflops": 37.80098897689989, "iter_time": 0.5457818450927734, "loss": 1.055779218673706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.205506970529065, "step_time": 0.5006877727508545} +{"epoch": 0, "iter": 5269, "iter_tflops": 17.870357743577816, "iter_time": 1.1544868774414063, "loss": 0.1588270217180252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.087806518836064, "step_time": 1.080851982116699} +{"epoch": 0, "iter": 5270, "iter_tflops": 20.394137998984846, "iter_time": 1.0116188049316406, "loss": 0.15488837659358978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.831240659070822, "step_time": 0.7412926273345948} +{"epoch": 0, "iter": 5271, "iter_tflops": 53.05781140388353, "iter_time": 0.3888417739868164, "loss": 0.07594747096300125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.894471627461336, "step_time": 0.35635688400268556} +{"epoch": 0, "iter": 5272, "iter_tflops": 47.41045890072811, "iter_time": 0.43515911865234375, "loss": 0.1211857795715332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21655526029082, "step_time": 0.4028207950592041} +{"epoch": 0, "iter": 5273, "iter_tflops": 37.72631181695224, "iter_time": 0.546862190246582, "loss": 0.2686901390552521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.611129928790454, "step_time": 0.5080157470703125} +{"epoch": 0, "iter": 5274, "iter_tflops": 18.722157624763955, "iter_time": 1.1019613189697264, "loss": 0.415071964263916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.472671755232216, "step_time": 0.9180525455474853} +{"epoch": 0, "iter": 5275, "iter_tflops": 39.4940087433143, "iter_time": 0.522385398864746, "loss": 0.2572490870952606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03873430767525, "step_time": 0.4793610649108887} +{"epoch": 0, "iter": 5276, "iter_tflops": 40.90642793118194, "iter_time": 0.5043484497070312, "loss": 0.29685744643211365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.627005231505365, "step_time": 0.4623006496429444} +{"epoch": 0, "iter": 5277, "iter_tflops": 27.100859989524846, "iter_time": 0.7612708053588867, "loss": 0.7943018078804016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.792011778981998, "step_time": 0.692504207611084} +{"epoch": 0, "iter": 5278, "iter_tflops": 33.14714924353298, "iter_time": 0.6224092864990235, "loss": 0.9288374781608582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.951944241607634, "step_time": 0.5583222732543945} +{"epoch": 0, "iter": 5279, "iter_tflops": 40.46588598369551, "iter_time": 0.5098391647338867, "loss": 0.7220893502235413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.104328612923695, "step_time": 0.46777933502197266} +{"epoch": 0, "iter": 5280, "iter_tflops": 42.054209353453395, "iter_time": 0.4905833168029785, "loss": 0.9643253684043884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94346378703222, "step_time": 0.44905394172668456} +{"epoch": 0, "iter": 5281, "iter_tflops": 25.58825108958927, "iter_time": 0.6690718688964843, "loss": 0.06393096596002579, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 28.48401737055213, "step_time": 0.6010521183013916} +{"epoch": 0, "iter": 5282, "iter_tflops": 31.52233509581861, "iter_time": 0.5431189956665039, "loss": 0.11518439650535583, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 34.77362208929246, "step_time": 0.4923380985260009} +{"epoch": 0, "iter": 5283, "iter_tflops": 33.97264177869821, "iter_time": 0.5039460601806641, "loss": 0.12513230741024017, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 37.21727614465673, "step_time": 0.4600116062164307} +{"epoch": 0, "iter": 5284, "iter_tflops": 35.81544164646322, "iter_time": 0.4780166931152343, "loss": 0.13265052437782288, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 39.37162518399304, "step_time": 0.4348405456542969} +{"epoch": 0, "iter": 5285, "iter_tflops": 17.219720872743554, "iter_time": 1.1981084747314452, "loss": 0.842360258102417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.314651071421057, "step_time": 1.1264802932739257} +{"epoch": 0, "iter": 5286, "iter_tflops": 12.718490580236807, "iter_time": 1.622133804321289, "loss": 0.7636423707008362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.05652944267122, "step_time": 1.2095715942382812} +{"epoch": 0, "iter": 5287, "iter_tflops": 35.51297662521228, "iter_time": 0.5809452056884765, "loss": 0.8180320262908936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5997854320682, "step_time": 0.5344872589111328} +{"epoch": 0, "iter": 5288, "iter_tflops": 38.3447226863178, "iter_time": 0.5380425796508789, "loss": 0.6722899675369263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.580434277945486, "step_time": 0.49617311286926274} +{"epoch": 0, "iter": 5289, "iter_tflops": 11.120898280948017, "iter_time": 1.037118453979492, "loss": 0.023346930742263794, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 11.989946680722745, "step_time": 0.9619466323852539} +{"epoch": 0, "iter": 5290, "iter_tflops": 17.27322728456146, "iter_time": 0.6677205505371094, "loss": 0.0095867570489645, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 21.588743271292522, "step_time": 0.5342454948425293} +{"epoch": 0, "iter": 5291, "iter_tflops": 32.14531599186266, "iter_time": 0.3587984275817871, "loss": 0.03364967554807663, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 35.35585652340105, "step_time": 0.32621720886230465} +{"epoch": 0, "iter": 5292, "iter_tflops": 33.945556008205116, "iter_time": 0.33977021408081054, "loss": 0.005643240176141262, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 37.08434966884595, "step_time": 0.3110122985839843} +{"epoch": 0, "iter": 5293, "iter_tflops": 22.337700694190012, "iter_time": 0.9235996932983398, "loss": 0.44850340485572815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.374139719084702, "step_time": 0.882646110534668} +{"epoch": 0, "iter": 5294, "iter_tflops": 15.356310859647825, "iter_time": 1.3434928283691407, "loss": 0.4582047462463379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.664570048183418, "step_time": 1.1053613052368163} +{"epoch": 0, "iter": 5295, "iter_tflops": 44.57343057509762, "iter_time": 0.4628563079833984, "loss": 0.4353700876235962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.14017033404261, "step_time": 0.428562952041626} +{"epoch": 0, "iter": 5296, "iter_tflops": 46.67989465534998, "iter_time": 0.4419695816040039, "loss": 0.36932137608528137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17229947128236, "step_time": 0.4112048625946045} +{"epoch": 0, "iter": 5297, "iter_tflops": 24.156854118168678, "iter_time": 0.8540471954345703, "loss": 0.050461381673812866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.286725556358494, "step_time": 0.8158863220214845} +{"epoch": 0, "iter": 5298, "iter_tflops": 18.5235991159117, "iter_time": 1.113773483276367, "loss": 0.04312517121434212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.403247204536928, "step_time": 0.9208974628448487} +{"epoch": 0, "iter": 5299, "iter_tflops": 55.20896968492077, "iter_time": 0.3736909713745117, "loss": 0.0536477267742157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.605389078732756, "step_time": 0.34041681480407715} +{"epoch": 0, "iter": 5300, "iter_tflops": 57.69809255222244, "iter_time": 0.357569766998291, "loss": 0.033604275435209274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.697286215677806, "step_time": 0.32905879592895504} +{"epoch": 0, "iter": 5301, "iter_tflops": 11.106480018514414, "iter_time": 0.7484230270385743, "loss": 0.004436961840838194, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 11.722359107532553, "step_time": 0.7091017532348632} +{"epoch": 0, "iter": 5302, "iter_tflops": 8.605685746503543, "iter_time": 0.9659131927490234, "loss": 0.0026988147292286158, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 9.897318872207167, "step_time": 0.8398582992553711} +{"epoch": 0, "iter": 5303, "iter_tflops": 22.29502959271292, "iter_time": 0.37283401489257817, "loss": 0.014691037125885487, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 24.435164314034566, "step_time": 0.3401796398162842} +{"epoch": 0, "iter": 5304, "iter_tflops": 24.408363330180922, "iter_time": 0.34055316543579106, "loss": 0.00287068635225296, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 26.89503099730128, "step_time": 0.3090662136077881} +{"epoch": 0, "iter": 5305, "iter_tflops": 27.879141534503635, "iter_time": 0.7400189666748047, "loss": 0.19001524150371552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.481895668424933, "step_time": 0.6997885665893554} +{"epoch": 0, "iter": 5306, "iter_tflops": 12.90456485441086, "iter_time": 1.5987438354492187, "loss": 0.17654961347579956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.834433406508515, "step_time": 1.3029259071350097} +{"epoch": 0, "iter": 5307, "iter_tflops": 31.26655591927305, "iter_time": 0.6598454132080078, "loss": 0.17826345562934875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19745108503813, "step_time": 0.5132438240051269} +{"epoch": 0, "iter": 5308, "iter_tflops": 40.476732675193375, "iter_time": 0.5097025413513183, "loss": 0.3095768392086029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.06969403072066, "step_time": 0.4681469650268555} +{"epoch": 0, "iter": 5309, "iter_tflops": 2.5414747179297055, "iter_time": 0.9416166000366212, "loss": 0.017230357974767685, "lr": 3e-05, "seqlen": 976.0, "step_tflops": 2.686636670319569, "step_time": 0.8907400131225586} +{"epoch": 0, "iter": 5310, "iter_tflops": 2.022070722966228, "iter_time": 1.1834871826171873, "loss": 0.01597617007791996, "lr": 3e-05, "seqlen": 976.0, "step_tflops": 2.605055679321421, "step_time": 0.9186347923278808} +{"epoch": 0, "iter": 5311, "iter_tflops": 4.727640113601801, "iter_time": 0.5061922492980957, "loss": 0.01707756519317627, "lr": 3e-05, "seqlen": 976.0, "step_tflops": 5.123500242214925, "step_time": 0.4670820083618164} +{"epoch": 0, "iter": 5312, "iter_tflops": 3.8247916782286593, "iter_time": 0.6256797714233399, "loss": 0.012770812027156353, "lr": 3e-05, "seqlen": 976.0, "step_tflops": 4.200367255859447, "step_time": 0.5697346534729004} +{"epoch": 0, "iter": 5313, "iter_tflops": 15.350605194340895, "iter_time": 1.168902603149414, "loss": 0.0004158066585659981, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 16.522463418326254, "step_time": 1.0859980087280274} +{"epoch": 0, "iter": 5314, "iter_tflops": 14.21430266322103, "iter_time": 1.2623455963134766, "loss": 0.03519534692168236, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 17.74142851906599, "step_time": 1.0113820514678955} +{"epoch": 0, "iter": 5315, "iter_tflops": 46.745238229218096, "iter_time": 0.38385433578491207, "loss": 0.007131213787943125, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 51.32031242364115, "step_time": 0.3496347064971924} +{"epoch": 0, "iter": 5316, "iter_tflops": 50.74168416403884, "iter_time": 0.3536217346191406, "loss": 0.008351863361895084, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 55.74947488068711, "step_time": 0.3218570652008057} +{"epoch": 0, "iter": 5317, "iter_tflops": 50.37168430590328, "iter_time": 0.4095772018432618, "loss": 0.07173944264650345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.406811469421974, "step_time": 0.3723566284179688} +{"epoch": 0, "iter": 5318, "iter_tflops": 46.62398877009878, "iter_time": 0.44249953842163087, "loss": 0.13308565318584442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.213761461976084, "step_time": 0.4028427696228028} +{"epoch": 0, "iter": 5319, "iter_tflops": 47.70980549244362, "iter_time": 0.43242879104614257, "loss": 0.12953700125217438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.83277952718448, "step_time": 0.3980317802429199} +{"epoch": 0, "iter": 5320, "iter_tflops": 51.02463422633679, "iter_time": 0.40433594131469736, "loss": 0.09959511458873749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.81768632582068, "step_time": 0.36961570549011236} +{"epoch": 0, "iter": 5321, "iter_tflops": 46.02785772285526, "iter_time": 0.4482305831909179, "loss": 0.059896089136600494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.18650693029089, "step_time": 0.4110884532928466} +{"epoch": 0, "iter": 5322, "iter_tflops": 47.42665327716824, "iter_time": 0.4350105285644531, "loss": 0.12661194801330566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19484718971504, "step_time": 0.3952706947326661} +{"epoch": 0, "iter": 5323, "iter_tflops": 50.84866047754555, "iter_time": 0.40573524093627933, "loss": 0.03889627009630203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.47748951521788, "step_time": 0.3718822479248047} +{"epoch": 0, "iter": 5324, "iter_tflops": 53.040653364474764, "iter_time": 0.3889675598144531, "loss": 0.06194187328219414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.723066838718466, "step_time": 0.35741506195068357} +{"epoch": 0, "iter": 5325, "iter_tflops": 39.55662620856534, "iter_time": 0.5215584716796875, "loss": 0.6998457908630371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55065923706512, "step_time": 0.4848595504760742} +{"epoch": 0, "iter": 5326, "iter_tflops": 28.995395509333537, "iter_time": 0.7115299911499023, "loss": 0.7345896363258362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.68731779010569, "step_time": 0.5623494644165039} +{"epoch": 0, "iter": 5327, "iter_tflops": 47.48252285759665, "iter_time": 0.4344986801147461, "loss": 0.7798742651939392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.664233190898436, "step_time": 0.39933029556274413} +{"epoch": 0, "iter": 5328, "iter_tflops": 48.86896293751728, "iter_time": 0.4221717071533203, "loss": 0.659368634223938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.087136459957875, "step_time": 0.38862697982788086} +{"epoch": 0, "iter": 5329, "iter_tflops": 38.44574958502062, "iter_time": 0.5366287231445312, "loss": 0.5661876797676086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47000594151173, "step_time": 0.49749434661865233} +{"epoch": 0, "iter": 5330, "iter_tflops": 22.032735518028392, "iter_time": 0.936383659362793, "loss": 0.6050397753715515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.505727079503735, "step_time": 0.8418886508941651} +{"epoch": 0, "iter": 5331, "iter_tflops": 47.102463812665746, "iter_time": 0.4380045509338379, "loss": 0.382570743560791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.33469885101253, "step_time": 0.4018937282562256} +{"epoch": 0, "iter": 5332, "iter_tflops": 46.70684287830015, "iter_time": 0.44171458053588863, "loss": 0.5838658809661865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.68162696302238, "step_time": 0.40707243919372554} +{"epoch": 0, "iter": 5333, "iter_tflops": 30.352065413859858, "iter_time": 0.6797261810302735, "loss": 0.8125391006469727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.108985567247586, "step_time": 0.642533332824707} +{"epoch": 0, "iter": 5334, "iter_tflops": 13.871001412253843, "iter_time": 1.487354293823242, "loss": 0.8121206164360046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.454471964039957, "step_time": 1.1819947090148926} +{"epoch": 0, "iter": 5335, "iter_tflops": 44.34887145078713, "iter_time": 0.4651999664306641, "loss": 0.8467398881912231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84899454307244, "step_time": 0.43117088890075683} +{"epoch": 0, "iter": 5336, "iter_tflops": 47.2053442559158, "iter_time": 0.43704995346069336, "loss": 0.8407115340232849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.71195554700812, "step_time": 0.4068289871215821} +{"epoch": 0, "iter": 5337, "iter_tflops": 25.384214938980413, "iter_time": 0.8127528686523438, "loss": 0.23226644098758698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.866917798937255, "step_time": 0.767899528503418} +{"epoch": 0, "iter": 5338, "iter_tflops": 17.526755469310746, "iter_time": 1.1771199493408202, "loss": 0.3344420790672302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.596888047233872, "step_time": 1.0527739639282225} +{"epoch": 0, "iter": 5339, "iter_tflops": 35.08057451501968, "iter_time": 0.5881059188842773, "loss": 0.42918580770492554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.53217292602841, "step_time": 0.5354251251220703} +{"epoch": 0, "iter": 5340, "iter_tflops": 38.354217381834566, "iter_time": 0.5379093856811524, "loss": 0.26298025250434875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03892750456138, "step_time": 0.49076165199279786} +{"epoch": 0, "iter": 5341, "iter_tflops": 13.56058249715365, "iter_time": 1.5214017181396486, "loss": 0.8310985565185547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.473899895677327, "step_time": 1.4253997650146486} +{"epoch": 0, "iter": 5342, "iter_tflops": 29.082650953509695, "iter_time": 0.7093952178955079, "loss": 0.860519528388977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88486326485098, "step_time": 0.5046144676208496} +{"epoch": 0, "iter": 5343, "iter_tflops": 47.236023958675716, "iter_time": 0.4367660903930664, "loss": 0.892021656036377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.98144410373366, "step_time": 0.4046784839630127} +{"epoch": 0, "iter": 5344, "iter_tflops": 45.82826846535221, "iter_time": 0.4501826972961426, "loss": 0.9309179782867432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72593407403119, "step_time": 0.4148960475921631} +{"epoch": 0, "iter": 5345, "iter_tflops": 29.35946369749884, "iter_time": 0.7027067565917967, "loss": 0.7578251957893372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.14390180730788, "step_time": 0.6624440841674805} +{"epoch": 0, "iter": 5346, "iter_tflops": 15.720472546425576, "iter_time": 1.312371078491211, "loss": 0.6972851157188416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.93306421413666, "step_time": 1.089685920715332} +{"epoch": 0, "iter": 5347, "iter_tflops": 36.68610832956007, "iter_time": 0.5623680038452149, "loss": 0.7711557149887085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.95046342475979, "step_time": 0.5164168758392333} +{"epoch": 0, "iter": 5348, "iter_tflops": 40.59483965775297, "iter_time": 0.5082196083068848, "loss": 0.7089419960975647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.225750795275246, "step_time": 0.4664950428009033} +{"epoch": 0, "iter": 5349, "iter_tflops": 28.01242935842772, "iter_time": 0.5686906127929686, "loss": 0.1536635160446167, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 31.085395655284724, "step_time": 0.5124723453521728} +{"epoch": 0, "iter": 5350, "iter_tflops": 30.36245096590835, "iter_time": 0.524674560546875, "loss": 0.13044755160808563, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 33.47287913514825, "step_time": 0.47591979026794434} +{"epoch": 0, "iter": 5351, "iter_tflops": 29.614456472978713, "iter_time": 0.5379266586303711, "loss": 0.109795480966568, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 32.52753993883348, "step_time": 0.4897513198852539} +{"epoch": 0, "iter": 5352, "iter_tflops": 29.7820261720678, "iter_time": 0.5348999938964845, "loss": 0.1465325951576233, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 32.604134737507515, "step_time": 0.48860077857971196} +{"epoch": 0, "iter": 5353, "iter_tflops": 21.91306427842541, "iter_time": 0.9414974212646485, "loss": 0.17494718730449677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.288936252772334, "step_time": 0.8858753051757813} +{"epoch": 0, "iter": 5354, "iter_tflops": 19.395611746176805, "iter_time": 1.0636990356445313, "loss": 0.19723941385746002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.799929087252355, "step_time": 0.8668552513122558} +{"epoch": 0, "iter": 5355, "iter_tflops": 45.67210485635042, "iter_time": 0.45172197723388663, "loss": 0.21420596539974213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49277754988533, "step_time": 0.41685058975219724} +{"epoch": 0, "iter": 5356, "iter_tflops": 50.496501685004205, "iter_time": 0.40856480789184574, "loss": 0.22013968229293823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.84720994020966, "step_time": 0.37615575218200686} +{"epoch": 0, "iter": 5357, "iter_tflops": 26.661416679513604, "iter_time": 0.7738183517456054, "loss": 0.07743921875953674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.244807641368055, "step_time": 0.7304384498596191} +{"epoch": 0, "iter": 5358, "iter_tflops": 22.648322952511336, "iter_time": 0.910932502746582, "loss": 0.10240380465984344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.473456625603283, "step_time": 0.8099055347442627} +{"epoch": 0, "iter": 5359, "iter_tflops": 52.513879455629954, "iter_time": 0.3928693466186523, "loss": 0.0653110072016716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.468750380856264, "step_time": 0.3589967308044434} +{"epoch": 0, "iter": 5360, "iter_tflops": 56.32479998941879, "iter_time": 0.36628791427612306, "loss": 0.10226862877607346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.372398034782215, "step_time": 0.3361624145507812} +{"epoch": 0, "iter": 5361, "iter_tflops": 42.00828599864428, "iter_time": 0.49111962127685543, "loss": 0.9679208993911743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80533294155351, "step_time": 0.4504081115722657} +{"epoch": 0, "iter": 5362, "iter_tflops": 10.134749206742294, "iter_time": 2.0356787414550785, "loss": 0.8591934442520142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.107811777564924, "step_time": 1.8573499374389648} +{"epoch": 0, "iter": 5363, "iter_tflops": 9.878085310706934, "iter_time": 2.088572113037109, "loss": 0.7967131733894348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.603580095333802, "step_time": 1.636923267364502} +{"epoch": 0, "iter": 5364, "iter_tflops": 23.802409140947752, "iter_time": 0.8667649307250976, "loss": 0.9557113647460938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.04304256651018, "step_time": 0.7103626785278319} +{"epoch": 0, "iter": 5365, "iter_tflops": 25.08468033479622, "iter_time": 0.7597220153808595, "loss": 0.5362820029258728, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 26.60805373483525, "step_time": 0.7162261505126954} +{"epoch": 0, "iter": 5366, "iter_tflops": 10.702074127335768, "iter_time": 1.780718734741211, "loss": 0.43536388874053955, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 14.0119107884387, "step_time": 1.3600845870971678} +{"epoch": 0, "iter": 5367, "iter_tflops": 11.91595677842281, "iter_time": 1.5993162994384766, "loss": 0.414754182100296, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 13.321751300487167, "step_time": 1.4305464401245116} +{"epoch": 0, "iter": 5368, "iter_tflops": 10.761196296604082, "iter_time": 1.7709354400634763, "loss": 0.5346108675003052, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 13.31103280893618, "step_time": 1.4316983642578123} +{"epoch": 0, "iter": 5369, "iter_tflops": 15.314433577384083, "iter_time": 1.029524383544922, "loss": 0.513633668422699, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 16.365018679290035, "step_time": 0.9634320068359374} +{"epoch": 0, "iter": 5370, "iter_tflops": 15.108440925866555, "iter_time": 1.0435612030029298, "loss": 0.33154675364494324, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 16.677596564025897, "step_time": 0.9453749961853027} +{"epoch": 0, "iter": 5371, "iter_tflops": 26.82687543718306, "iter_time": 0.5877159576416016, "loss": 0.3941275179386139, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.850526432078222, "step_time": 0.5464920310974121} +{"epoch": 0, "iter": 5372, "iter_tflops": 28.719635621986946, "iter_time": 0.5489826889038086, "loss": 0.4538469910621643, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.63133846708551, "step_time": 0.5147206611633302} +{"epoch": 0, "iter": 5373, "iter_tflops": 27.907627834710095, "iter_time": 0.7392636032104491, "loss": 0.2297752946615219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.797143262533957, "step_time": 0.6923849487304689} +{"epoch": 0, "iter": 5374, "iter_tflops": 33.48111773373269, "iter_time": 0.6162008590698242, "loss": 0.12185902893543243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45632837368561, "step_time": 0.49765848350524905} +{"epoch": 0, "iter": 5375, "iter_tflops": 38.66895494932999, "iter_time": 0.5335311889648438, "loss": 0.1398347169160843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.584138251364045, "step_time": 0.4844783611297607} +{"epoch": 0, "iter": 5376, "iter_tflops": 44.46094861110184, "iter_time": 0.46402729034423823, "loss": 0.12801724672317505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93631362415775, "step_time": 0.4215906753540039} +{"epoch": 0, "iter": 5377, "iter_tflops": 19.343132939565322, "iter_time": 1.0665848999023437, "loss": 0.728436291217804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.187989252267794, "step_time": 0.9737164421081543} +{"epoch": 0, "iter": 5378, "iter_tflops": 27.235360757983244, "iter_time": 0.7575112991333008, "loss": 0.7341446876525879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.88413367725, "step_time": 0.6088718013763428} +{"epoch": 0, "iter": 5379, "iter_tflops": 44.08496942068545, "iter_time": 0.46798475265502926, "loss": 0.7971692681312561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.827657641234055, "step_time": 0.4313632431030273} +{"epoch": 0, "iter": 5380, "iter_tflops": 50.69293044794624, "iter_time": 0.4069816703796387, "loss": 0.7285252213478088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.7440787790219, "step_time": 0.37686438369750974} +{"epoch": 0, "iter": 5381, "iter_tflops": 30.65135288324057, "iter_time": 0.6730891647338868, "loss": 0.08049023151397705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.973287206219666, "step_time": 0.6256911354064941} +{"epoch": 0, "iter": 5382, "iter_tflops": 16.231618911741954, "iter_time": 1.2710434875488281, "loss": 0.0772903636097908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.787123161017547, "step_time": 1.0426525039672851} +{"epoch": 0, "iter": 5383, "iter_tflops": 50.405458452152544, "iter_time": 0.40930276489257816, "loss": 0.05870949849486351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.91004687038833, "step_time": 0.37572529411315914} +{"epoch": 0, "iter": 5384, "iter_tflops": 51.967968327566474, "iter_time": 0.3969963455200195, "loss": 0.076690174639225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.0507977921675, "step_time": 0.368078498840332} +{"epoch": 0, "iter": 5385, "iter_tflops": 38.093293065773636, "iter_time": 0.5415938568115235, "loss": 0.14743271470069885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14747656280262, "step_time": 0.5013938941955567} +{"epoch": 0, "iter": 5386, "iter_tflops": 38.17854913538844, "iter_time": 0.5403844299316406, "loss": 0.14377029240131378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.982050001787435, "step_time": 0.49142653846740725} +{"epoch": 0, "iter": 5387, "iter_tflops": 40.074384727374444, "iter_time": 0.514819969177246, "loss": 0.19222426414489746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.659519046475154, "step_time": 0.47254513931274417} +{"epoch": 0, "iter": 5388, "iter_tflops": 41.146450385600886, "iter_time": 0.5014063987731933, "loss": 0.19002985954284668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02476547389224, "step_time": 0.45821656799316407} +{"epoch": 0, "iter": 5389, "iter_tflops": 15.702863878442228, "iter_time": 1.3138427276611326, "loss": 0.7852795720100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.888887221371782, "step_time": 1.221578025817871} +{"epoch": 0, "iter": 5390, "iter_tflops": 21.73139929358931, "iter_time": 0.9493679275512696, "loss": 1.011061429977417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.3898947035127, "step_time": 0.7817800617218018} +{"epoch": 0, "iter": 5391, "iter_tflops": 44.51677702996663, "iter_time": 0.46344535446166985, "loss": 1.061564564704895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.899228003044044, "step_time": 0.4307187061309815} +{"epoch": 0, "iter": 5392, "iter_tflops": 45.98231916193631, "iter_time": 0.448674488067627, "loss": 1.0249332189559937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.938821397215186, "step_time": 0.41312736129760746} +{"epoch": 0, "iter": 5393, "iter_tflops": 31.19233715416959, "iter_time": 0.66141544342041, "loss": 0.4767683446407318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.228867060942974, "step_time": 0.6208786315917969} +{"epoch": 0, "iter": 5394, "iter_tflops": 21.34111999029222, "iter_time": 0.9667296524047851, "loss": 0.4966038167476654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.501602643467546, "step_time": 0.7238573131561279} +{"epoch": 0, "iter": 5395, "iter_tflops": 39.91786154616683, "iter_time": 0.5168386459350586, "loss": 0.5230766534805298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.790617659477526, "step_time": 0.4821405868530273} +{"epoch": 0, "iter": 5396, "iter_tflops": 52.76765553925268, "iter_time": 0.3909799156188965, "loss": 0.6126722097396851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.162534359433515, "step_time": 0.3609198532104493} +{"epoch": 0, "iter": 5397, "iter_tflops": 43.5335021240711, "iter_time": 0.4739130210876465, "loss": 0.5811099410057068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.467602812383376, "step_time": 0.4346352519989014} +{"epoch": 0, "iter": 5398, "iter_tflops": 46.56124071953411, "iter_time": 0.44309587097167974, "loss": 0.47792237997055054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54904120548014, "step_time": 0.4002226428985596} +{"epoch": 0, "iter": 5399, "iter_tflops": 51.218120706271876, "iter_time": 0.40280848312377937, "loss": 0.505541980266571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43453695672976, "step_time": 0.37217039489746095} +{"epoch": 0, "iter": 5400, "iter_tflops": 48.63329423495564, "iter_time": 0.42421747970581053, "loss": 0.548738420009613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77478602581075, "step_time": 0.39092708969116213} +{"epoch": 0, "iter": 5401, "iter_tflops": 27.367312086725928, "iter_time": 0.7538589630126953, "loss": 0.5208656191825867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.784621506638924, "step_time": 0.7167401351928712} +{"epoch": 0, "iter": 5402, "iter_tflops": 14.331780001736437, "iter_time": 1.4395346221923828, "loss": 0.5719060301780701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.23330817445561, "step_time": 1.1971638469696044} +{"epoch": 0, "iter": 5403, "iter_tflops": 37.67618365779688, "iter_time": 0.5475897903442383, "loss": 0.4758293628692627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.74243561497113, "step_time": 0.5063785018920899} +{"epoch": 0, "iter": 5404, "iter_tflops": 43.48146686413977, "iter_time": 0.47448016357421874, "loss": 0.3651336133480072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.30922632726754, "step_time": 0.436090274810791} +{"epoch": 0, "iter": 5405, "iter_tflops": 34.19606893681765, "iter_time": 0.6033176956176757, "loss": 0.8847820162773132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.71501756729923, "step_time": 0.5470259552001954} +{"epoch": 0, "iter": 5406, "iter_tflops": 37.43779016477247, "iter_time": 0.5510766906738283, "loss": 0.5843056440353394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91861905990196, "step_time": 0.48070264053344725} +{"epoch": 0, "iter": 5407, "iter_tflops": 39.74080593036755, "iter_time": 0.5191412963867187, "loss": 0.7009309530258179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.834675365147866, "step_time": 0.4816446800231934} +{"epoch": 0, "iter": 5408, "iter_tflops": 40.12751444166813, "iter_time": 0.5141383361816406, "loss": 0.6703412532806396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.35716450909976, "step_time": 0.4758404693603516} +{"epoch": 0, "iter": 5409, "iter_tflops": 30.272308495607234, "iter_time": 0.6815170211791992, "loss": 0.870668888092041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.47697086613308, "step_time": 0.6162771892547607} +{"epoch": 0, "iter": 5410, "iter_tflops": 33.02331529746076, "iter_time": 0.6247432556152344, "loss": 0.7636458873748779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.143273359324354, "step_time": 0.5708141956329347} +{"epoch": 0, "iter": 5411, "iter_tflops": 37.11288483554811, "iter_time": 0.5559011001586914, "loss": 0.8300251364707947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27807711830362, "step_time": 0.5122164459228515} +{"epoch": 0, "iter": 5412, "iter_tflops": 36.34257734741224, "iter_time": 0.5676838302612305, "loss": 0.9205623865127563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.37107565551372, "step_time": 0.5240165061950683} +{"epoch": 0, "iter": 5413, "iter_tflops": 18.45934795580313, "iter_time": 1.1176501770019531, "loss": 0.18870322406291962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.839527221888364, "step_time": 1.0398984451293947} +{"epoch": 0, "iter": 5414, "iter_tflops": 18.581937567869883, "iter_time": 1.1102767639160156, "loss": 0.1764317750930786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.575767244888908, "step_time": 0.9138601264953612} +{"epoch": 0, "iter": 5415, "iter_tflops": 48.42283447399983, "iter_time": 0.42606125259399413, "loss": 0.26491934061050415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66810810978155, "step_time": 0.3917189025878906} +{"epoch": 0, "iter": 5416, "iter_tflops": 51.96956681173753, "iter_time": 0.3969841346740723, "loss": 0.35302233695983887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.81836288538496, "step_time": 0.3631060886383057} +{"epoch": 0, "iter": 5417, "iter_tflops": 23.438904857876917, "iter_time": 0.8802072296142578, "loss": 0.22211657464504242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.63359283380871, "step_time": 0.837518653869629} +{"epoch": 0, "iter": 5418, "iter_tflops": 13.064351647520956, "iter_time": 1.5791900024414063, "loss": 0.23009978234767914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.61155480053996, "step_time": 1.3215271492004395} +{"epoch": 0, "iter": 5419, "iter_tflops": 45.227095621413476, "iter_time": 0.45616666793823246, "loss": 0.2418297380208969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33738347274112, "step_time": 0.4181635112762451} +{"epoch": 0, "iter": 5420, "iter_tflops": 45.83117260642414, "iter_time": 0.4501541709899902, "loss": 0.202503502368927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44427685818207, "step_time": 0.41725948524475104} +{"epoch": 0, "iter": 5421, "iter_tflops": 38.9008408081779, "iter_time": 0.5303508377075196, "loss": 0.7882739305496216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21034599879658, "step_time": 0.48876864242553714} +{"epoch": 0, "iter": 5422, "iter_tflops": 40.39574283974214, "iter_time": 0.5107244491577149, "loss": 1.0177874565124512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66028457814277, "step_time": 0.4725368537902832} +{"epoch": 0, "iter": 5423, "iter_tflops": 42.793253168553875, "iter_time": 0.4821108932495118, "loss": 0.9828695058822632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03918268172615, "step_time": 0.44812032508850097} +{"epoch": 0, "iter": 5424, "iter_tflops": 43.84111008387841, "iter_time": 0.47058784484863286, "loss": 1.0737096071243286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.24892113903363, "step_time": 0.4366468696594238} +{"epoch": 0, "iter": 5425, "iter_tflops": 28.573290405627795, "iter_time": 0.7220412216186523, "loss": 0.7321839332580566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.24233287566823, "step_time": 0.6821925277709961} +{"epoch": 0, "iter": 5426, "iter_tflops": 14.679297710224713, "iter_time": 1.4054550781250001, "loss": 0.5996955037117004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.112798513929413, "step_time": 1.2804165267944336} +{"epoch": 0, "iter": 5427, "iter_tflops": 32.78036509003368, "iter_time": 0.6293735122680664, "loss": 0.6486182808876038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.89395967701046, "step_time": 0.517148302078247} +{"epoch": 0, "iter": 5428, "iter_tflops": 38.50882484687369, "iter_time": 0.5357497558593749, "loss": 0.8812409043312073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75051373449685, "step_time": 0.4941518478393555} +{"epoch": 0, "iter": 5429, "iter_tflops": 27.251938878915137, "iter_time": 0.7570504837036133, "loss": 0.011409305036067963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.708377352204927, "step_time": 0.6944537315368653} +{"epoch": 0, "iter": 5430, "iter_tflops": 49.829985213333075, "iter_time": 0.41402969360351566, "loss": 0.00899859145283699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.68250606788729, "step_time": 0.37728873443603517} +{"epoch": 0, "iter": 5431, "iter_tflops": 58.58106737348309, "iter_time": 0.3521802253723144, "loss": 0.003228423185646534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.36273088564289, "step_time": 0.3205440979003906} +{"epoch": 0, "iter": 5432, "iter_tflops": 61.40478915764046, "iter_time": 0.3359850883483887, "loss": 0.010415195487439632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.05679549771773, "step_time": 0.30766596221923825} +{"epoch": 0, "iter": 5433, "iter_tflops": 27.225326256468662, "iter_time": 0.7577904968261718, "loss": 0.08027436584234238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.606913721973488, "step_time": 0.7211925659179688} +{"epoch": 0, "iter": 5434, "iter_tflops": 13.485228714267555, "iter_time": 1.529903121948242, "loss": 0.05165687948465347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.4041889479903, "step_time": 1.1854096488952637} +{"epoch": 0, "iter": 5435, "iter_tflops": 52.07327050556626, "iter_time": 0.3961935424804688, "loss": 0.059304822236299515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.48589272545538, "step_time": 0.365243293762207} +{"epoch": 0, "iter": 5436, "iter_tflops": 51.96247450654896, "iter_time": 0.39703831863403316, "loss": 0.08279025554656982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.072776338009206, "step_time": 0.3679342250823974} +{"epoch": 0, "iter": 5437, "iter_tflops": 17.724737036246303, "iter_time": 0.6233413848876953, "loss": 0.005602196790277958, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 18.986153642347325, "step_time": 0.5819273529052734} +{"epoch": 0, "iter": 5438, "iter_tflops": 7.344279502254141, "iter_time": 1.5043765869140624, "loss": 0.008563889190554619, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 8.504835601275428, "step_time": 1.2990917930603028} +{"epoch": 0, "iter": 5439, "iter_tflops": 29.378402018310336, "iter_time": 0.37607770919799804, "loss": 0.003818139899522066, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 32.34407935408939, "step_time": 0.34159457778930663} +{"epoch": 0, "iter": 5440, "iter_tflops": 30.6599135293094, "iter_time": 0.3603585548400879, "loss": 0.0032521733082830906, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 33.68189307487482, "step_time": 0.3280267562866211} +{"epoch": 0, "iter": 5441, "iter_tflops": 28.61084147134675, "iter_time": 0.7210935592651367, "loss": 0.801260232925415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.437133912998714, "step_time": 0.6778264198303223} +{"epoch": 0, "iter": 5442, "iter_tflops": 15.367056279538692, "iter_time": 1.3425533905029297, "loss": 0.9523984789848328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.280592779021035, "step_time": 1.1285790214538574} +{"epoch": 0, "iter": 5443, "iter_tflops": 36.40589573101159, "iter_time": 0.5666964950561524, "loss": 0.7862311601638794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53243098318496, "step_time": 0.5218776836395264} +{"epoch": 0, "iter": 5444, "iter_tflops": 36.61643840457668, "iter_time": 0.5634380187988282, "loss": 0.9145979881286621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.818290801609315, "step_time": 0.518131067276001} +{"epoch": 0, "iter": 5445, "iter_tflops": 16.92499165323666, "iter_time": 1.2189721527099608, "loss": 0.9559400677680969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.02638186531343, "step_time": 1.1444944229125977} +{"epoch": 0, "iter": 5446, "iter_tflops": 15.613664127745139, "iter_time": 1.321348617553711, "loss": 0.7998447418212891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.26337139247634, "step_time": 1.129643211364746} +{"epoch": 0, "iter": 5447, "iter_tflops": 38.15881980344021, "iter_time": 0.5406638259887695, "loss": 0.8453624844551086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.703618672145105, "step_time": 0.49470751380920414} +{"epoch": 0, "iter": 5448, "iter_tflops": 40.66884057580865, "iter_time": 0.5072948532104492, "loss": 0.8259621262550354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.840006991966945, "step_time": 0.4705996856689453} +{"epoch": 0, "iter": 5449, "iter_tflops": 21.55866514411892, "iter_time": 0.9569745330810546, "loss": 0.7858676314353943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.79582887923943, "step_time": 0.9050380935668945} +{"epoch": 0, "iter": 5450, "iter_tflops": 15.473659480708115, "iter_time": 1.3333040924072266, "loss": 0.8484523296356201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.632513610808868, "step_time": 0.8729961547851562} +{"epoch": 0, "iter": 5451, "iter_tflops": 43.05617241420915, "iter_time": 0.479166919708252, "loss": 0.7973271012306213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.310642190198635, "step_time": 0.44549357414245605} +{"epoch": 0, "iter": 5452, "iter_tflops": 39.30210831173073, "iter_time": 0.5249360504150391, "loss": 0.8699672222137451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.323410900230535, "step_time": 0.48746292114257816} +{"epoch": 0, "iter": 5453, "iter_tflops": 24.16294073969783, "iter_time": 0.8538320617675781, "loss": 0.3264504671096802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.34916057310822, "step_time": 0.8138767929077148} +{"epoch": 0, "iter": 5454, "iter_tflops": 13.928020057570007, "iter_time": 1.4812653503417967, "loss": 0.3726205825805664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.767836401823175, "step_time": 1.1611483268737792} +{"epoch": 0, "iter": 5455, "iter_tflops": 49.41860907994822, "iter_time": 0.4174762077331543, "loss": 0.34539052844047546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.712500848687576, "step_time": 0.38410227012634274} +{"epoch": 0, "iter": 5456, "iter_tflops": 54.32203645705944, "iter_time": 0.3797923431396484, "loss": 0.24585923552513123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.729917752569115, "step_time": 0.35128762817382814} +{"epoch": 0, "iter": 5457, "iter_tflops": 20.818421181106277, "iter_time": 0.9910018310546874, "loss": 0.10955242067575455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.705474767209566, "step_time": 0.9505018310546876} +{"epoch": 0, "iter": 5458, "iter_tflops": 15.63295169795975, "iter_time": 1.3197183685302734, "loss": 0.13735395669937134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.487574907902054, "step_time": 0.9174441261291504} +{"epoch": 0, "iter": 5459, "iter_tflops": 49.736005199785396, "iter_time": 0.4148120346069336, "loss": 0.19121240079402924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.95466697339469, "step_time": 0.3823782939910889} +{"epoch": 0, "iter": 5460, "iter_tflops": 46.92651105451176, "iter_time": 0.43964686584472656, "loss": 0.11834925413131714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.741717504684075, "step_time": 0.4065903663635254} +{"epoch": 0, "iter": 5461, "iter_tflops": 46.18994379346107, "iter_time": 0.44665768814086915, "loss": 0.2719426155090332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.39162328605047, "step_time": 0.40941514015197755} +{"epoch": 0, "iter": 5462, "iter_tflops": 12.203291441442186, "iter_time": 1.6906171264648437, "loss": 0.2439114898443222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.958495863396772, "step_time": 1.5920901412963866} +{"epoch": 0, "iter": 5463, "iter_tflops": 8.682110852008105, "iter_time": 2.3762762145996095, "loss": 0.23351523280143738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.421342920967287, "step_time": 1.9796962509155271} +{"epoch": 0, "iter": 5464, "iter_tflops": 18.631875320503163, "iter_time": 1.1073009643554688, "loss": 0.24157845973968506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.11111858583913, "step_time": 0.9772619781494141} +{"epoch": 0, "iter": 5465, "iter_tflops": 17.91380184018008, "iter_time": 0.8527261505126953, "loss": 0.3189604878425598, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 18.987162342618607, "step_time": 0.8045208129882812} +{"epoch": 0, "iter": 5466, "iter_tflops": 16.221975598270536, "iter_time": 0.9416588745117187, "loss": 0.3332791030406952, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 21.137502420584543, "step_time": 0.7226760749816895} +{"epoch": 0, "iter": 5467, "iter_tflops": 24.077337217558004, "iter_time": 0.6344375686645507, "loss": 0.345366895198822, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.883730022082208, "step_time": 0.5901609725952148} +{"epoch": 0, "iter": 5468, "iter_tflops": 27.422814716004705, "iter_time": 0.5570386352539063, "loss": 0.29111993312835693, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.163167636057032, "step_time": 0.523796573638916} +{"epoch": 0, "iter": 5469, "iter_tflops": 24.226377631421016, "iter_time": 0.8515962982177735, "loss": 0.004560528323054314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.520574430658872, "step_time": 0.8084102325439455} +{"epoch": 0, "iter": 5470, "iter_tflops": 15.468961048968294, "iter_time": 1.3337090606689455, "loss": 0.007692880462855101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.353020467480054, "step_time": 0.9661908740997314} +{"epoch": 0, "iter": 5471, "iter_tflops": 53.62064317561195, "iter_time": 0.38476027679443353, "loss": 0.01821882463991642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.764335124649655, "step_time": 0.35108188438415533} +{"epoch": 0, "iter": 5472, "iter_tflops": 59.79415673255066, "iter_time": 0.34503527832031256, "loss": 0.002586578018963337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.60799765415668, "step_time": 0.31446003913879395} +{"epoch": 0, "iter": 5473, "iter_tflops": 28.790672034948752, "iter_time": 0.7165895080566407, "loss": 0.6127340197563171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.475225000930266, "step_time": 0.6769792022705079} +{"epoch": 0, "iter": 5474, "iter_tflops": 13.63151130392068, "iter_time": 1.5134854125976565, "loss": 0.8172830939292908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.29895607189885, "step_time": 1.1926207237243653} +{"epoch": 0, "iter": 5475, "iter_tflops": 39.55643525922756, "iter_time": 0.5215609893798828, "loss": 0.8258788585662842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.184588314694146, "step_time": 0.47774204444885254} +{"epoch": 0, "iter": 5476, "iter_tflops": 38.6176631647128, "iter_time": 0.5342398223876953, "loss": 0.7925420999526978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.230942058254456, "step_time": 0.4885302696228027} +{"epoch": 0, "iter": 5477, "iter_tflops": 15.602337844053741, "iter_time": 1.322307830810547, "loss": 0.7165764570236206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.586574926872586, "step_time": 1.2438429031372071} +{"epoch": 0, "iter": 5478, "iter_tflops": 14.245562354517546, "iter_time": 1.4482470397949216, "loss": 0.8262940645217896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.47207878889988, "step_time": 1.1168798999786376} +{"epoch": 0, "iter": 5479, "iter_tflops": 37.14765563640552, "iter_time": 0.5553807678222656, "loss": 0.8405570387840271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.185996277984515, "step_time": 0.513390121459961} +{"epoch": 0, "iter": 5480, "iter_tflops": 34.052112548441706, "iter_time": 0.6058682403564454, "loss": 0.7187845706939697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.80606770894946, "step_time": 0.5605351181030274} +{"epoch": 0, "iter": 5481, "iter_tflops": 14.601490492301702, "iter_time": 1.412944351196289, "loss": 0.010739881545305252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.513525138792268, "step_time": 1.3298778533935547} +{"epoch": 0, "iter": 5482, "iter_tflops": 28.481225391809996, "iter_time": 0.7243752059936523, "loss": 0.004053221549838781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.406130720596224, "step_time": 0.5826983375549315} +{"epoch": 0, "iter": 5483, "iter_tflops": 56.07003276393199, "iter_time": 0.3679522285461426, "loss": 0.005436121951788664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.48892267676783, "step_time": 0.3355253696441651} +{"epoch": 0, "iter": 5484, "iter_tflops": 59.806035429736504, "iter_time": 0.34496674728393556, "loss": 0.008855625987052917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.24828142925257, "step_time": 0.3114208106994629} +{"epoch": 0, "iter": 5485, "iter_tflops": 30.950833452357664, "iter_time": 0.6665763473510742, "loss": 0.7341423630714417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.93535539467392, "step_time": 0.6264117469787597} +{"epoch": 0, "iter": 5486, "iter_tflops": 17.400375175898823, "iter_time": 1.185669464111328, "loss": 0.7518793344497681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.82303919777265, "step_time": 0.9039590797424315} +{"epoch": 0, "iter": 5487, "iter_tflops": 38.183564317796474, "iter_time": 0.5403134536743165, "loss": 0.8730059862136841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.61275652463229, "step_time": 0.4957877159118652} +{"epoch": 0, "iter": 5488, "iter_tflops": 37.82111058280961, "iter_time": 0.5454914779663086, "loss": 0.8194788098335266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91267382344681, "step_time": 0.5042714538574219} +{"epoch": 0, "iter": 5489, "iter_tflops": 21.91982989979372, "iter_time": 0.9412068252563476, "loss": 0.7018327116966248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.35360194665027, "step_time": 0.8834223327636719} +{"epoch": 0, "iter": 5490, "iter_tflops": 8.557536130941644, "iter_time": 2.410868408203125, "loss": 0.9573752284049988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.986922558680979, "step_time": 1.7211334609985351} +{"epoch": 0, "iter": 5491, "iter_tflops": 10.534834094560015, "iter_time": 1.9583690948486325, "loss": 0.6856630444526672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.843942677293002, "step_time": 1.606289752960205} +{"epoch": 0, "iter": 5492, "iter_tflops": 41.76601281122892, "iter_time": 0.49396847152709955, "loss": 0.7878325581550598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50669145651228, "step_time": 0.4533639526367188} +{"epoch": 0, "iter": 5493, "iter_tflops": 19.18473122595578, "iter_time": 0.8047630920410156, "loss": 0.4722297191619873, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 20.176465569077468, "step_time": 0.7652065505981445} +{"epoch": 0, "iter": 5494, "iter_tflops": 18.072215129666162, "iter_time": 0.8543038864135741, "loss": 0.5058196187019348, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.952715277603538, "step_time": 0.7737875976562499} +{"epoch": 0, "iter": 5495, "iter_tflops": 28.09204913775603, "iter_time": 0.5495919342041015, "loss": 0.37935590744018555, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.94389105795084, "step_time": 0.5156031188964842} +{"epoch": 0, "iter": 5496, "iter_tflops": 28.925290200722344, "iter_time": 0.5337600250244141, "loss": 0.3572066128253937, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 30.64910596701345, "step_time": 0.5037394447326661} +{"epoch": 0, "iter": 5497, "iter_tflops": 26.853590270847803, "iter_time": 0.7682806396484375, "loss": 0.28676989674568176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.45152736135182, "step_time": 0.725131317138672} +{"epoch": 0, "iter": 5498, "iter_tflops": 12.17825231276883, "iter_time": 1.6940931243896484, "loss": 0.34427860379219055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.142166369315717, "step_time": 1.2780870323181153} +{"epoch": 0, "iter": 5499, "iter_tflops": 13.148532327049866, "iter_time": 1.5690795745849608, "loss": 0.2941043972969055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.356789412251336, "step_time": 1.3434509620666504} +{"epoch": 0, "iter": 5500, "iter_tflops": 29.4762779218106, "iter_time": 0.6999219360351563, "loss": 0.31010541319847107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.71858290498647, "step_time": 0.6118612270355224} +{"epoch": 0, "iter": 5501, "iter_tflops": 15.361916718118461, "iter_time": 1.0370063781738281, "loss": 0.37555980682373047, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 16.38050662731346, "step_time": 0.9725221557617187} +{"epoch": 0, "iter": 5502, "iter_tflops": 9.730095994672423, "iter_time": 1.6372300567626954, "loss": 0.39944788813591003, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 11.723618299960833, "step_time": 1.3588301162719727} +{"epoch": 0, "iter": 5503, "iter_tflops": 24.1398236696696, "iter_time": 0.6599222030639648, "loss": 0.27599844336509705, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 25.950445916415177, "step_time": 0.6138779144287109} +{"epoch": 0, "iter": 5504, "iter_tflops": 22.945386746754036, "iter_time": 0.6942748794555662, "loss": 0.5455071330070496, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 24.740611378471424, "step_time": 0.6438970069885254} +{"epoch": 0, "iter": 5505, "iter_tflops": 18.533960444637035, "iter_time": 1.1131508331298827, "loss": 0.6036916375160217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.85600285288167, "step_time": 1.0390355834960938} +{"epoch": 0, "iter": 5506, "iter_tflops": 21.76857636150869, "iter_time": 0.9477465667724608, "loss": 0.594569981098175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.28999484226788, "step_time": 0.7847507629394532} +{"epoch": 0, "iter": 5507, "iter_tflops": 48.747063762259586, "iter_time": 0.42322740936279296, "loss": 0.7138676047325134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81050734940309, "step_time": 0.39066266441345215} +{"epoch": 0, "iter": 5508, "iter_tflops": 50.29901911529749, "iter_time": 0.41016890335083006, "loss": 0.7623675465583801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.498599153023676, "step_time": 0.3785619049072266} +{"epoch": 0, "iter": 5509, "iter_tflops": 24.83013099661228, "iter_time": 0.8308894348144531, "loss": 0.7555676698684692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.052295261945524, "step_time": 0.7919107818603515} +{"epoch": 0, "iter": 5510, "iter_tflops": 24.678149411326025, "iter_time": 0.8360065078735351, "loss": 0.8475538492202759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.483077753490083, "step_time": 0.6997605094909668} +{"epoch": 0, "iter": 5511, "iter_tflops": 49.30821953720798, "iter_time": 0.41841083908081056, "loss": 0.9978787899017334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15070053851916, "step_time": 0.38816221237182613} +{"epoch": 0, "iter": 5512, "iter_tflops": 45.952098777375376, "iter_time": 0.4489695587158203, "loss": 0.9018605351448059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64658422099415, "step_time": 0.4155591735839844} +{"epoch": 0, "iter": 5513, "iter_tflops": 23.990053943240127, "iter_time": 0.8599852905273437, "loss": 0.44081738591194153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.21422877171697, "step_time": 0.8182321853637694} +{"epoch": 0, "iter": 5514, "iter_tflops": 20.174731635947108, "iter_time": 1.0226204681396485, "loss": 0.44069963693618774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.780376194909746, "step_time": 0.8325577201843263} +{"epoch": 0, "iter": 5515, "iter_tflops": 47.09473843946174, "iter_time": 0.43807640075683596, "loss": 0.3281412124633789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.14543859555748, "step_time": 0.40338090896606443} +{"epoch": 0, "iter": 5516, "iter_tflops": 48.67565725469753, "iter_time": 0.4238482780456543, "loss": 0.43424320220947266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76129647036705, "step_time": 0.3910270385742188} +{"epoch": 0, "iter": 5517, "iter_tflops": 32.86608926007613, "iter_time": 0.6277319259643555, "loss": 0.21421827375888824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.28356221618152, "step_time": 0.5847225227355958} +{"epoch": 0, "iter": 5518, "iter_tflops": 12.122703722169797, "iter_time": 1.7018557891845703, "loss": 0.17964328825473785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.285973505620776, "step_time": 1.4441503410339354} +{"epoch": 0, "iter": 5519, "iter_tflops": 39.83306283932671, "iter_time": 0.5179389190673828, "loss": 0.2932741343975067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77332627780658, "step_time": 0.4713165588378906} +{"epoch": 0, "iter": 5520, "iter_tflops": 41.023969002530244, "iter_time": 0.5029034004211426, "loss": 0.1812262386083603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2292093575933, "step_time": 0.45614534950256347} +{"epoch": 0, "iter": 5521, "iter_tflops": 28.261870290305804, "iter_time": 0.729997459411621, "loss": 0.7974569201469421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.559154777975376, "step_time": 0.6751198997497558} +{"epoch": 0, "iter": 5522, "iter_tflops": 9.634896095031882, "iter_time": 2.1412886352539062, "loss": 0.8361499309539795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.144341654161973, "step_time": 1.8512617568969727} +{"epoch": 0, "iter": 5523, "iter_tflops": 9.757326609857902, "iter_time": 2.1144207153320314, "loss": 0.7944599390029907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.332502127554912, "step_time": 1.8205241241455077} +{"epoch": 0, "iter": 5524, "iter_tflops": 34.006774528654915, "iter_time": 0.6066759872436522, "loss": 0.7794280052185059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.07317436817351, "step_time": 0.5418800468444824} +{"epoch": 0, "iter": 5525, "iter_tflops": 16.567551694048323, "iter_time": 0.9368306427001953, "loss": 0.42779022455215454, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 17.63582399309935, "step_time": 0.8800830688476561} +{"epoch": 0, "iter": 5526, "iter_tflops": 6.331277307059002, "iter_time": 2.4514784851074216, "loss": 0.41496995091438293, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 7.635623834644358, "step_time": 2.032707534790039} +{"epoch": 0, "iter": 5527, "iter_tflops": 15.404237004156634, "iter_time": 1.007579284667969, "loss": 0.31337472796440125, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.978568227232458, "step_time": 0.9141518821716308} +{"epoch": 0, "iter": 5528, "iter_tflops": 27.254344884486173, "iter_time": 0.5694868164062501, "loss": 0.45376157760620117, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.099106239880413, "step_time": 0.5333837394714356} +{"epoch": 0, "iter": 5529, "iter_tflops": 15.9574495967873, "iter_time": 0.962394142150879, "loss": 0.35536739230155945, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.855785191039516, "step_time": 0.9111029739379882} +{"epoch": 0, "iter": 5530, "iter_tflops": 26.06688696609427, "iter_time": 0.5891519012451172, "loss": 0.35085856914520264, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.78044415876439, "step_time": 0.552811752319336} +{"epoch": 0, "iter": 5531, "iter_tflops": 28.76094360132845, "iter_time": 0.5339656524658203, "loss": 0.46990641951560974, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.54463412433935, "step_time": 0.5027840881347656} +{"epoch": 0, "iter": 5532, "iter_tflops": 26.52101222464502, "iter_time": 0.5790637207031248, "loss": 0.3728785216808319, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.265508084698943, "step_time": 0.5433249588012695} +{"epoch": 0, "iter": 5533, "iter_tflops": 26.859011679365004, "iter_time": 0.7681255645751953, "loss": 0.4416121244430542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.60243642877689, "step_time": 0.7213054580688476} +{"epoch": 0, "iter": 5534, "iter_tflops": 11.959790891442951, "iter_time": 1.7250379791259765, "loss": 0.5006855726242065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.868404028784308, "step_time": 1.387579559326172} +{"epoch": 0, "iter": 5535, "iter_tflops": 38.60872829243624, "iter_time": 0.5343634567260742, "loss": 0.4405132830142975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.153098638917434, "step_time": 0.48943243026733396} +{"epoch": 0, "iter": 5536, "iter_tflops": 38.848621560570116, "iter_time": 0.531063720703125, "loss": 0.4578477740287781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45667505329011, "step_time": 0.4859328594207764} +{"epoch": 0, "iter": 5537, "iter_tflops": 30.132660261819574, "iter_time": 0.6846754760742187, "loss": 0.25207313895225525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.62088257376007, "step_time": 0.6324505004882812} +{"epoch": 0, "iter": 5538, "iter_tflops": 15.233041026862104, "iter_time": 1.354364730834961, "loss": 0.18846172094345093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.03414813197455, "step_time": 1.0297963943481445} +{"epoch": 0, "iter": 5539, "iter_tflops": 37.799836013156416, "iter_time": 0.5457984924316406, "loss": 0.12915651500225067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59946844047419, "step_time": 0.4959460849761963} +{"epoch": 0, "iter": 5540, "iter_tflops": 36.88623137650076, "iter_time": 0.5593169250488281, "loss": 0.18443354964256287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45593721770963, "step_time": 0.509964542388916} +{"epoch": 0, "iter": 5541, "iter_tflops": 14.070343400733922, "iter_time": 1.1351089782714843, "loss": 0.17939741909503937, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.125120374813196, "step_time": 1.0559501495361328} +{"epoch": 0, "iter": 5542, "iter_tflops": 15.856088643783846, "iter_time": 1.0072706756591798, "loss": 0.3610791265964508, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 19.15554490288298, "step_time": 0.8337728424072266} +{"epoch": 0, "iter": 5543, "iter_tflops": 28.518393681459663, "iter_time": 0.5600376129150391, "loss": 0.25581926107406616, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.372853326127945, "step_time": 0.5258436851501465} +{"epoch": 0, "iter": 5544, "iter_tflops": 28.35079760746505, "iter_time": 0.5633482818603516, "loss": 0.5093888640403748, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.011497925487163, "step_time": 0.5321751403808594} +{"epoch": 0, "iter": 5545, "iter_tflops": 34.77739610347026, "iter_time": 0.5932328414916992, "loss": 0.7503026723861694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.360169926872246, "step_time": 0.5522216186523438} +{"epoch": 0, "iter": 5546, "iter_tflops": 8.996025695139055, "iter_time": 2.2933564453124995, "loss": 0.4842904806137085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.227886010838947, "step_time": 2.01714151763916} +{"epoch": 0, "iter": 5547, "iter_tflops": 13.991798661562212, "iter_time": 1.4745133209228516, "loss": 0.58802729845047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.31126308210093, "step_time": 1.264837272644043} +{"epoch": 0, "iter": 5548, "iter_tflops": 19.31044648817717, "iter_time": 1.0683902893066406, "loss": 0.5909136533737183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.76611699419025, "step_time": 0.8680885276794432} +{"epoch": 0, "iter": 5549, "iter_tflops": 15.79132954180768, "iter_time": 0.9595721206665039, "loss": 0.43008285760879517, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 16.537520145031618, "step_time": 0.9162751998901366} +{"epoch": 0, "iter": 5550, "iter_tflops": 9.796326102701405, "iter_time": 1.5467961578369143, "loss": 0.26711031794548035, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.545434155734442, "step_time": 1.3124599189758301} +{"epoch": 0, "iter": 5551, "iter_tflops": 23.30609433423551, "iter_time": 0.6501698379516602, "loss": 0.3357815444469452, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.972583100232292, "step_time": 0.6067822265624999} +{"epoch": 0, "iter": 5552, "iter_tflops": 21.613527263116325, "iter_time": 0.7010849914550782, "loss": 0.3645244538784027, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.14709685262024, "step_time": 0.654635856628418} +{"epoch": 0, "iter": 5553, "iter_tflops": 33.185220520663655, "iter_time": 0.6216952362060548, "loss": 0.2423562854528427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.06683037385021, "step_time": 0.556591791152954} +{"epoch": 0, "iter": 5554, "iter_tflops": 38.82034881398083, "iter_time": 0.5314504928588867, "loss": 0.2721116244792938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93708721226291, "step_time": 0.4804958801269531} +{"epoch": 0, "iter": 5555, "iter_tflops": 37.898561854462834, "iter_time": 0.5443766860961914, "loss": 0.19855277240276337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.97854686144359, "step_time": 0.49146754837036133} +{"epoch": 0, "iter": 5556, "iter_tflops": 40.14557928930336, "iter_time": 0.5139069824218749, "loss": 0.270095556974411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.170504288633964, "step_time": 0.4670785140991211} +{"epoch": 0, "iter": 5557, "iter_tflops": 14.832949044424662, "iter_time": 1.390896270751953, "loss": 0.687367856502533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.922047876316851, "step_time": 1.2957562789916992} +{"epoch": 0, "iter": 5558, "iter_tflops": 18.736021133335242, "iter_time": 1.1011459350585937, "loss": 0.6873359680175781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.770618070442183, "step_time": 0.9060401191711427} +{"epoch": 0, "iter": 5559, "iter_tflops": 37.578842859985066, "iter_time": 0.5490082168579101, "loss": 0.7094786167144775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.95612798651707, "step_time": 0.5037364253997803} +{"epoch": 0, "iter": 5560, "iter_tflops": 34.85984661280529, "iter_time": 0.5918297271728515, "loss": 1.1036492586135864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.18131249803101, "step_time": 0.5403453197479249} +{"epoch": 0, "iter": 5561, "iter_tflops": 21.936716065470662, "iter_time": 0.9404823150634767, "loss": 0.04370373860001564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.90175558886172, "step_time": 0.8631622657775879} +{"epoch": 0, "iter": 5562, "iter_tflops": 16.49153200721303, "iter_time": 1.2510113372802736, "loss": 0.01830984652042389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.056315694061997, "step_time": 1.0286581954956056} +{"epoch": 0, "iter": 5563, "iter_tflops": 40.06242925502174, "iter_time": 0.5149736022949218, "loss": 0.035364653915166855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0699710797046, "step_time": 0.468144021987915} +{"epoch": 0, "iter": 5564, "iter_tflops": 45.126715666802475, "iter_time": 0.45718136596679687, "loss": 0.06088937819004059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.385430387175, "step_time": 0.41775668144226075} +{"epoch": 0, "iter": 5565, "iter_tflops": 35.455054702688145, "iter_time": 0.5818942794799804, "loss": 0.8463007807731628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.77660882078076, "step_time": 0.5320499687194824} +{"epoch": 0, "iter": 5566, "iter_tflops": 44.82525700238284, "iter_time": 0.46025600051879884, "loss": 0.9006816148757935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.173494411718416, "step_time": 0.41955719757080073} +{"epoch": 0, "iter": 5567, "iter_tflops": 44.297893923575046, "iter_time": 0.4657353134155273, "loss": 0.8454052805900574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01514761596978, "step_time": 0.42967885208129886} +{"epoch": 0, "iter": 5568, "iter_tflops": 50.45726853797842, "iter_time": 0.40888248825073237, "loss": 0.9540684819221497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.56029922311409, "step_time": 0.3781338043212891} +{"epoch": 0, "iter": 5569, "iter_tflops": 30.28590307500125, "iter_time": 0.6812111053466797, "loss": 0.024784134700894356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.109516861545835, "step_time": 0.6425227012634277} +{"epoch": 0, "iter": 5570, "iter_tflops": 14.089046533992082, "iter_time": 1.464335678100586, "loss": 0.020885927602648735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.839687316514375, "step_time": 1.2251470661163328} +{"epoch": 0, "iter": 5571, "iter_tflops": 44.53220154229309, "iter_time": 0.4632848320007324, "loss": 0.04048231244087219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23461195958248, "step_time": 0.4190363788604736} +{"epoch": 0, "iter": 5572, "iter_tflops": 47.33857647724687, "iter_time": 0.43581989669799803, "loss": 0.0841701477766037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34298516751105, "step_time": 0.3941520233154297} +{"epoch": 0, "iter": 5573, "iter_tflops": 24.61018905023519, "iter_time": 0.8383151168823243, "loss": 0.02294384315609932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.65378563252166, "step_time": 0.7740398979187011} +{"epoch": 0, "iter": 5574, "iter_tflops": 19.91593412314622, "iter_time": 1.035908905029297, "loss": 0.04734748974442482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.467315479762664, "step_time": 0.8432103443145751} +{"epoch": 0, "iter": 5575, "iter_tflops": 41.8178492859429, "iter_time": 0.49335615921020515, "loss": 0.024951886385679245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17998790444945, "step_time": 0.4467539825439452} +{"epoch": 0, "iter": 5576, "iter_tflops": 45.80223502402817, "iter_time": 0.4504385757446289, "loss": 0.029320772737264633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24136364898287, "step_time": 0.41063960075378414} +{"epoch": 0, "iter": 5577, "iter_tflops": 20.64107909199326, "iter_time": 0.9995162277221677, "loss": 0.4431944489479065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.26726283787463, "step_time": 0.9265213088989257} +{"epoch": 0, "iter": 5578, "iter_tflops": 13.722592258295645, "iter_time": 1.5034399566650394, "loss": 0.36246898770332336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.379909167900745, "step_time": 1.259536502838135} +{"epoch": 0, "iter": 5579, "iter_tflops": 38.432845564649334, "iter_time": 0.5368088989257813, "loss": 0.44940292835235596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41812344068135, "step_time": 0.4863744983673096} +{"epoch": 0, "iter": 5580, "iter_tflops": 40.0629005230716, "iter_time": 0.5149675445556641, "loss": 0.5193660259246826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.92545439191135, "step_time": 0.4696842365264893} +{"epoch": 0, "iter": 5581, "iter_tflops": 6.279695598757744, "iter_time": 0.9547843704223632, "loss": 0.0022627366706728935, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 6.743035593883504, "step_time": 0.8891774520874023} +{"epoch": 0, "iter": 5582, "iter_tflops": 5.810240994263028, "iter_time": 1.0319288330078125, "loss": 0.015150143764913082, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 8.975640521829252, "step_time": 0.6680030460357667} +{"epoch": 0, "iter": 5583, "iter_tflops": 16.88501998028309, "iter_time": 0.3550931663513184, "loss": 0.0029350710101425648, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 18.590280583623294, "step_time": 0.3225209636688232} +{"epoch": 0, "iter": 5584, "iter_tflops": 16.53503909467676, "iter_time": 0.3626090736389159, "loss": 0.0027386234141886234, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 18.189032021055088, "step_time": 0.32963574981689453} +{"epoch": 0, "iter": 5585, "iter_tflops": 32.25122574672258, "iter_time": 0.6396995162963867, "loss": 0.9515935182571411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.33690472558357, "step_time": 0.6008431358337403} +{"epoch": 0, "iter": 5586, "iter_tflops": 17.431516297837444, "iter_time": 1.183551284790039, "loss": 1.0677697658538818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.038683451723507, "step_time": 0.9806266422271728} +{"epoch": 0, "iter": 5587, "iter_tflops": 37.289795610015744, "iter_time": 0.553263786315918, "loss": 0.7841423749923706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56517501545016, "step_time": 0.5085912609100341} +{"epoch": 0, "iter": 5588, "iter_tflops": 34.858829683753655, "iter_time": 0.5918469924926757, "loss": 0.9854950904846191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.53000716555676, "step_time": 0.5497226104736328} +{"epoch": 0, "iter": 5589, "iter_tflops": 11.848494115847087, "iter_time": 1.0760420837402342, "loss": 0.01860005594789982, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 12.670237211922938, "step_time": 1.0062541122436524} +{"epoch": 0, "iter": 5590, "iter_tflops": 15.5030935866062, "iter_time": 0.8223828506469726, "loss": 0.031073253601789474, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 22.362359236703465, "step_time": 0.570131181716919} +{"epoch": 0, "iter": 5591, "iter_tflops": 27.87425821713925, "iter_time": 0.45739255905151366, "loss": 0.0208057202398777, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 30.767368312902345, "step_time": 0.4143831272125244} +{"epoch": 0, "iter": 5592, "iter_tflops": 27.650480725632896, "iter_time": 0.4610942726135254, "loss": 0.0660603865981102, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 30.53082911824782, "step_time": 0.4175935821533203} +{"epoch": 0, "iter": 5593, "iter_tflops": 35.374958554749774, "iter_time": 0.5832118072509765, "loss": 0.5188143849372864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.03222884253976, "step_time": 0.528565601348877} +{"epoch": 0, "iter": 5594, "iter_tflops": 39.35515198946222, "iter_time": 0.5242285308837891, "loss": 0.36409637331962585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.642822016703626, "step_time": 0.4727259273529052} +{"epoch": 0, "iter": 5595, "iter_tflops": 46.469289036108925, "iter_time": 0.44397265243530265, "loss": 0.42580273747444153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52411916031909, "step_time": 0.40834147834777834} +{"epoch": 0, "iter": 5596, "iter_tflops": 39.30741104373419, "iter_time": 0.524865234375, "loss": 0.3885655999183655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06940843950395, "step_time": 0.4790196628570556} +{"epoch": 0, "iter": 5597, "iter_tflops": 38.521927014713825, "iter_time": 0.5355675354003906, "loss": 0.7612318396568298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45573401356693, "step_time": 0.48594363021850584} +{"epoch": 0, "iter": 5598, "iter_tflops": 36.96264556859874, "iter_time": 0.5581606292724609, "loss": 0.8451955914497375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.63928300884454, "step_time": 0.5076638164520264} +{"epoch": 0, "iter": 5599, "iter_tflops": 45.54383880504823, "iter_time": 0.4529941711425781, "loss": 0.6224402785301208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64884023354361, "step_time": 0.41554029083251953} +{"epoch": 0, "iter": 5600, "iter_tflops": 36.25110217946017, "iter_time": 0.5691163101196289, "loss": 0.713470995426178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49369438618973, "step_time": 0.5223895568847656} +{"epoch": 0, "iter": 5601, "iter_tflops": 18.916249056877426, "iter_time": 1.0906545715332032, "loss": 0.5684178471565247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.159897886578648, "step_time": 1.0233729171752928} +{"epoch": 0, "iter": 5602, "iter_tflops": 37.63440045177551, "iter_time": 0.5481977462768555, "loss": 0.4481823742389679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.039150152717625, "step_time": 0.448120641708374} +{"epoch": 0, "iter": 5603, "iter_tflops": 49.04778984178483, "iter_time": 0.4206324806213379, "loss": 0.6856626272201538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.40410664365018, "step_time": 0.38632035636901857} +{"epoch": 0, "iter": 5604, "iter_tflops": 50.13248341566077, "iter_time": 0.41153144836425776, "loss": 0.5094829797744751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.35942756673463, "step_time": 0.37953110313415533} +{"epoch": 0, "iter": 5605, "iter_tflops": 33.47316664907608, "iter_time": 0.6163472290039061, "loss": 0.6683693528175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.349716371051585, "step_time": 0.567572338104248} +{"epoch": 0, "iter": 5606, "iter_tflops": 15.278543588700948, "iter_time": 1.3503311614990234, "loss": 0.838811457157135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.706799551021756, "step_time": 1.046902286529541} +{"epoch": 0, "iter": 5607, "iter_tflops": 36.175834157040164, "iter_time": 0.5703004226684569, "loss": 1.0131454467773438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.509046192304005, "step_time": 0.5221865749359131} +{"epoch": 0, "iter": 5608, "iter_tflops": 36.81838703321607, "iter_time": 0.5603475646972657, "loss": 0.8939952254295349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.00500297353829, "step_time": 0.5157128353118897} +{"epoch": 0, "iter": 5609, "iter_tflops": 34.625762082981986, "iter_time": 0.5958307418823242, "loss": 0.20639370381832123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.43491287070816, "step_time": 0.5367800254821777} +{"epoch": 0, "iter": 5610, "iter_tflops": 45.57213773399972, "iter_time": 0.45271287536621085, "loss": 0.17943298816680908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40271603554599, "step_time": 0.4093250350952148} +{"epoch": 0, "iter": 5611, "iter_tflops": 44.73422195796164, "iter_time": 0.46119263076782224, "loss": 0.33132174611091614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20033859306211, "step_time": 0.4280279788970947} +{"epoch": 0, "iter": 5612, "iter_tflops": 53.25509118653959, "iter_time": 0.3874013366699218, "loss": 0.22441305220127106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.01418911959024, "step_time": 0.35562150955200195} +{"epoch": 0, "iter": 5613, "iter_tflops": 26.485362231466848, "iter_time": 0.7789621047973634, "loss": 0.8469895720481873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.81413370259145, "step_time": 0.7160060310363769} +{"epoch": 0, "iter": 5614, "iter_tflops": 39.57924417273879, "iter_time": 0.5212604217529297, "loss": 0.7709259986877441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.960350814902064, "step_time": 0.46931139373779296} +{"epoch": 0, "iter": 5615, "iter_tflops": 42.50120319065966, "iter_time": 0.4854237518310548, "loss": 0.744559645652771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.55135757882052, "step_time": 0.4529193992614746} +{"epoch": 0, "iter": 5616, "iter_tflops": 36.23641188918723, "iter_time": 0.5693470306396485, "loss": 0.5829162001609802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.16961307852247, "step_time": 0.526711700439453} +{"epoch": 0, "iter": 5617, "iter_tflops": 44.85786458591244, "iter_time": 0.45992143630981447, "loss": 0.4078517258167267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.987661867282426, "step_time": 0.42114876937866214} +{"epoch": 0, "iter": 5618, "iter_tflops": 42.83322983994452, "iter_time": 0.48166093444824215, "loss": 0.3866446912288666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.503868374640355, "step_time": 0.4253494453430176} +{"epoch": 0, "iter": 5619, "iter_tflops": 47.17525536773655, "iter_time": 0.4373287086486816, "loss": 0.4145079553127289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.04328554127622, "step_time": 0.404188196182251} +{"epoch": 0, "iter": 5620, "iter_tflops": 43.169916110746456, "iter_time": 0.4779044151306152, "loss": 0.4200330972671509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.253324505264, "step_time": 0.44604563522338864} +{"epoch": 0, "iter": 5621, "iter_tflops": 32.82856574830577, "iter_time": 0.6284494323730468, "loss": 0.7792567610740662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.90171325103947, "step_time": 0.5911197929382324} +{"epoch": 0, "iter": 5622, "iter_tflops": 15.517387799696943, "iter_time": 1.3295468139648439, "loss": 0.7812438011169434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.070838116461147, "step_time": 1.141678840637207} +{"epoch": 0, "iter": 5623, "iter_tflops": 42.40225952773892, "iter_time": 0.4865564651489258, "loss": 0.6594947576522827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.471690464950946, "step_time": 0.4537129211425781} +{"epoch": 0, "iter": 5624, "iter_tflops": 45.27690419522401, "iter_time": 0.4556648445129395, "loss": 0.7562376260757446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57491033443899, "step_time": 0.4247273616790771} +{"epoch": 0, "iter": 5625, "iter_tflops": 34.67000126045234, "iter_time": 0.5950704574584961, "loss": 0.045267898589372635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.11332848566544, "step_time": 0.5558944549560547} +{"epoch": 0, "iter": 5626, "iter_tflops": 10.182378318801511, "iter_time": 2.0261566467285155, "loss": 0.054454922676086426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.91561943659249, "step_time": 1.7314327316284182} +{"epoch": 0, "iter": 5627, "iter_tflops": 13.151737793946438, "iter_time": 1.5686971435546877, "loss": 0.04118480533361435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.364056479693486, "step_time": 1.3428155212402344} +{"epoch": 0, "iter": 5628, "iter_tflops": 25.63176205218893, "iter_time": 0.8049034423828124, "loss": 0.048347197473049164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.97885076396724, "step_time": 0.6255855808258056} +{"epoch": 0, "iter": 5629, "iter_tflops": 16.621568154081316, "iter_time": 0.9337861480712891, "loss": 0.35903817415237427, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 17.604928824189898, "step_time": 0.8816275405883788} +{"epoch": 0, "iter": 5630, "iter_tflops": 7.870914848825131, "iter_time": 1.9719423217773437, "loss": 0.48276466131210327, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 9.45449722665083, "step_time": 1.6416515579223634} +{"epoch": 0, "iter": 5631, "iter_tflops": 7.662975571816324, "iter_time": 2.025452117919922, "loss": 0.3678411841392517, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 9.113681019465746, "step_time": 1.703042938232422} +{"epoch": 0, "iter": 5632, "iter_tflops": 23.752985964885063, "iter_time": 0.653433219909668, "loss": 0.4794099032878876, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.580579553116426, "step_time": 0.6067489624023437} +{"epoch": 0, "iter": 5633, "iter_tflops": 19.342239088949647, "iter_time": 0.9149028244018556, "loss": 0.4144526422023773, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 20.61978818022608, "step_time": 0.8582177963256836} +{"epoch": 0, "iter": 5634, "iter_tflops": 7.337012694686711, "iter_time": 2.41191748046875, "loss": 0.21903565526008606, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 8.216186121810594, "step_time": 2.153830123901367} +{"epoch": 0, "iter": 5635, "iter_tflops": 10.38203614987709, "iter_time": 1.7045085296630862, "loss": 0.42478957772254944, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 12.76560829031536, "step_time": 1.3862456665039062} +{"epoch": 0, "iter": 5636, "iter_tflops": 27.7056640768648, "iter_time": 0.6387238769531249, "loss": 0.48310309648513794, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 32.58046046824132, "step_time": 0.5431558952331542} +{"epoch": 0, "iter": 5637, "iter_tflops": 17.155921899653045, "iter_time": 0.9381194458007812, "loss": 0.24673525989055634, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 18.003780768296597, "step_time": 0.8939402313232422} +{"epoch": 0, "iter": 5638, "iter_tflops": 10.370503384422646, "iter_time": 1.5519308319091796, "loss": 0.44918957352638245, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 11.499831162228208, "step_time": 1.3995252380371093} +{"epoch": 0, "iter": 5639, "iter_tflops": 22.958404958596677, "iter_time": 0.7010201263427734, "loss": 0.3298813998699188, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.709322174636267, "step_time": 0.6513454246520995} +{"epoch": 0, "iter": 5640, "iter_tflops": 24.79738887160074, "iter_time": 0.6490322036743164, "loss": 0.4046138823032379, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.580157538187386, "step_time": 0.6055006980895997} +{"epoch": 0, "iter": 5641, "iter_tflops": 16.80718084291715, "iter_time": 1.2275166015624999, "loss": 0.8664068579673767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.120122113085614, "step_time": 1.1385736465454102} +{"epoch": 0, "iter": 5642, "iter_tflops": 16.692033417771274, "iter_time": 1.2359844360351562, "loss": 0.6133114695549011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.42971541356934, "step_time": 0.962732967376709} +{"epoch": 0, "iter": 5643, "iter_tflops": 48.692562811571825, "iter_time": 0.4237011222839356, "loss": 0.7150726318359375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90960849630699, "step_time": 0.38993094253540034} +{"epoch": 0, "iter": 5644, "iter_tflops": 48.56308510289155, "iter_time": 0.4248307838439941, "loss": 0.7360977530479431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3735416330279, "step_time": 0.39392206192016604} +{"epoch": 0, "iter": 5645, "iter_tflops": 28.6705202705074, "iter_time": 0.7195925750732423, "loss": 0.2826145589351654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.277412024385004, "step_time": 0.6814021453857422} +{"epoch": 0, "iter": 5646, "iter_tflops": 14.68558422718306, "iter_time": 1.4048534393310546, "loss": 0.2596736550331116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.421330833464918, "step_time": 1.1842432537078857} +{"epoch": 0, "iter": 5647, "iter_tflops": 39.953701468266964, "iter_time": 0.5163750228881836, "loss": 0.15251412987709045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.774889276876955, "step_time": 0.4712997303009032} +{"epoch": 0, "iter": 5648, "iter_tflops": 44.67491093811899, "iter_time": 0.46180491638183596, "loss": 0.20711229741573334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.755426711344, "step_time": 0.42315481376647945} +{"epoch": 0, "iter": 5649, "iter_tflops": 34.846178530356596, "iter_time": 0.592061866760254, "loss": 0.47595253586769104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36805709231406, "step_time": 0.5377153568267823} +{"epoch": 0, "iter": 5650, "iter_tflops": 11.507293781466743, "iter_time": 1.7928710174560547, "loss": 0.4124346375465393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.88655081879494, "step_time": 1.485688835144043} +{"epoch": 0, "iter": 5651, "iter_tflops": 18.34879025437853, "iter_time": 1.1243843994140625, "loss": 0.4252869486808777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.576577322853066, "step_time": 1.0026494293212893} +{"epoch": 0, "iter": 5652, "iter_tflops": 17.28461762604618, "iter_time": 1.1936100616455076, "loss": 0.4063158929347992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.791991036452767, "step_time": 0.992261562347412} +{"epoch": 0, "iter": 5653, "iter_tflops": 13.571732300480205, "iter_time": 1.0894137115478515, "loss": 0.510873019695282, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 14.527811281217542, "step_time": 1.0177191162109374} +{"epoch": 0, "iter": 5654, "iter_tflops": 11.29132350975234, "iter_time": 1.3094329681396486, "loss": 0.4551030397415161, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 13.495117786081476, "step_time": 1.0955985336303709} +{"epoch": 0, "iter": 5655, "iter_tflops": 22.07398286222522, "iter_time": 0.6698035125732422, "loss": 0.3922027051448822, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.651525231652798, "step_time": 0.6251280250549318} +{"epoch": 0, "iter": 5656, "iter_tflops": 22.70555175054599, "iter_time": 0.6511725158691406, "loss": 0.3714466094970703, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.428067792159602, "step_time": 0.6052558631896973} +{"epoch": 0, "iter": 5657, "iter_tflops": 30.373108353710656, "iter_time": 0.679255256652832, "loss": 0.5151882171630859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.581091802542936, "step_time": 0.6143663711547852} +{"epoch": 0, "iter": 5658, "iter_tflops": 46.80178241607907, "iter_time": 0.44081854248046876, "loss": 0.5103144645690918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74545562077668, "step_time": 0.40656041526794434} +{"epoch": 0, "iter": 5659, "iter_tflops": 51.54210667830608, "iter_time": 0.4002764892578125, "loss": 0.5751038789749146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73458506803086, "step_time": 0.37016680908203126} +{"epoch": 0, "iter": 5660, "iter_tflops": 50.680768523159685, "iter_time": 0.4070793342590332, "loss": 0.5771001577377319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.73719318713538, "step_time": 0.3769117908477783} +{"epoch": 0, "iter": 5661, "iter_tflops": 37.04923970915472, "iter_time": 0.5568560562133789, "loss": 0.8676283955574036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.782758114930125, "step_time": 0.5185938453674316} +{"epoch": 0, "iter": 5662, "iter_tflops": 16.949474504118413, "iter_time": 1.217211395263672, "loss": 0.8492394685745239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.53875517836421, "step_time": 1.0044958095550538} +{"epoch": 0, "iter": 5663, "iter_tflops": 40.00433347788088, "iter_time": 0.5157214660644531, "loss": 0.6479047536849976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.56972873400031, "step_time": 0.4735189800262451} +{"epoch": 0, "iter": 5664, "iter_tflops": 40.06055794359054, "iter_time": 0.5149976577758789, "loss": 0.7149320244789124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48017974978971, "step_time": 0.4744942092895508} +{"epoch": 0, "iter": 5665, "iter_tflops": 15.259987665877139, "iter_time": 1.3519731445312502, "loss": 0.04754269868135452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.680608747529465, "step_time": 1.23683097076416} +{"epoch": 0, "iter": 5666, "iter_tflops": 18.7047290400051, "iter_time": 1.1029880981445312, "loss": 0.07836717367172241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.441828108287115, "step_time": 0.9193143005371094} +{"epoch": 0, "iter": 5667, "iter_tflops": 41.93145913700206, "iter_time": 0.49201945114135737, "loss": 0.06153419241309166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24711607084293, "step_time": 0.4461055145263672} +{"epoch": 0, "iter": 5668, "iter_tflops": 41.05620747110191, "iter_time": 0.5025085067749023, "loss": 0.09620551764965057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.05762765769472, "step_time": 0.4578823738098145} +{"epoch": 0, "iter": 5669, "iter_tflops": 30.223912840262503, "iter_time": 0.6826082916259767, "loss": 0.1953897923231125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27005280297108, "step_time": 0.639326301574707} +{"epoch": 0, "iter": 5670, "iter_tflops": 13.628826973615514, "iter_time": 1.5137835083007811, "loss": 0.3473232090473175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.001772119257236, "step_time": 1.2893005447387695} +{"epoch": 0, "iter": 5671, "iter_tflops": 34.48403954574071, "iter_time": 0.5982794876098632, "loss": 0.23371410369873047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.78904534783072, "step_time": 0.5318793830871582} +{"epoch": 0, "iter": 5672, "iter_tflops": 39.29547591260524, "iter_time": 0.5250246505737305, "loss": 0.22670632600784302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22256489812895, "step_time": 0.477322286605835} +{"epoch": 0, "iter": 5673, "iter_tflops": 36.15055889122684, "iter_time": 0.5706991577148437, "loss": 0.8976346254348755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.77396283930505, "step_time": 0.5187085227966308} +{"epoch": 0, "iter": 5674, "iter_tflops": 36.024097648798175, "iter_time": 0.5727025756835937, "loss": 0.8244059085845947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.435387206349915, "step_time": 0.5231619358062743} +{"epoch": 0, "iter": 5675, "iter_tflops": 34.46540561479369, "iter_time": 0.5986029510498047, "loss": 0.9946377873420715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.579946476686295, "step_time": 0.548992094039917} +{"epoch": 0, "iter": 5676, "iter_tflops": 37.821802496733, "iter_time": 0.5454814987182617, "loss": 0.9157850742340088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.467168282538545, "step_time": 0.49752839088439943} +{"epoch": 0, "iter": 5677, "iter_tflops": 1.3127136460366515, "iter_time": 1.1939090270996096, "loss": 0.3583763539791107, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.3736502949441378, "step_time": 1.1409458999633788} +{"epoch": 0, "iter": 5678, "iter_tflops": 1.2129896873928736, "iter_time": 1.2920643005371093, "loss": 0.3934730291366577, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.456005167863028, "step_time": 1.076411476135254} +{"epoch": 0, "iter": 5679, "iter_tflops": 2.9198688782377062, "iter_time": 0.5367572097778321, "loss": 0.4566110372543335, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.1983403766511174, "step_time": 0.49002310180664066} +{"epoch": 0, "iter": 5680, "iter_tflops": 3.17132937431821, "iter_time": 0.49419675064086915, "loss": 0.43878039717674255, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.4637867502552, "step_time": 0.4524703121185303} +{"epoch": 0, "iter": 5681, "iter_tflops": 21.461633272715943, "iter_time": 0.9613011856079101, "loss": 0.8442724943161011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.05545306167824, "step_time": 0.8948465881347657} +{"epoch": 0, "iter": 5682, "iter_tflops": 24.23793110207345, "iter_time": 0.8511903686523438, "loss": 0.8406473398208618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.492609979231975, "step_time": 0.6995343418121338} +{"epoch": 0, "iter": 5683, "iter_tflops": 32.52730245850232, "iter_time": 0.6342700424194335, "loss": 0.9336322546005249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.87142542041483, "step_time": 0.5751400527954103} +{"epoch": 0, "iter": 5684, "iter_tflops": 42.82219502418343, "iter_time": 0.48178505325317383, "loss": 0.8450803756713867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53373249719605, "step_time": 0.4433578052520752} +{"epoch": 0, "iter": 5685, "iter_tflops": 23.38255936684987, "iter_time": 0.882328285217285, "loss": 0.4030565321445465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.188078315512687, "step_time": 0.8190816802978516} +{"epoch": 0, "iter": 5686, "iter_tflops": 21.511569268144942, "iter_time": 0.9590696640014649, "loss": 0.18569956719875336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.724759019236224, "step_time": 0.5775012645721436} +{"epoch": 0, "iter": 5687, "iter_tflops": 46.329323921475776, "iter_time": 0.4453139343261719, "loss": 0.2532026767730713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.584888308407244, "step_time": 0.40785092544555657} +{"epoch": 0, "iter": 5688, "iter_tflops": 46.317278039896486, "iter_time": 0.4454297485351563, "loss": 0.2602351903915405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.752756092741116, "step_time": 0.4065019340515137} +{"epoch": 0, "iter": 5689, "iter_tflops": 15.486357385997149, "iter_time": 1.3322108612060548, "loss": 0.1380840688943863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.706836746050822, "step_time": 1.234889274597168} +{"epoch": 0, "iter": 5690, "iter_tflops": 18.97171381430285, "iter_time": 1.0874659881591797, "loss": 0.24244855344295502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.99203654207611, "step_time": 0.737034387588501} +{"epoch": 0, "iter": 5691, "iter_tflops": 38.79229749194104, "iter_time": 0.5318347930908203, "loss": 0.10371854901313782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8091287842577, "step_time": 0.4819321041107178} +{"epoch": 0, "iter": 5692, "iter_tflops": 41.2812144501723, "iter_time": 0.4997695388793945, "loss": 0.19530676305294037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.275851989528725, "step_time": 0.4556754341125488} +{"epoch": 0, "iter": 5693, "iter_tflops": 21.55484676815435, "iter_time": 0.9571440582275391, "loss": 0.013612662442028522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.13635462214433, "step_time": 0.8917175521850585} +{"epoch": 0, "iter": 5694, "iter_tflops": 15.604031876159981, "iter_time": 1.3221642761230468, "loss": 0.01700574904680252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.117457139071313, "step_time": 0.9327968120574952} +{"epoch": 0, "iter": 5695, "iter_tflops": 55.361639559973995, "iter_time": 0.37266044998168946, "loss": 0.005304331425577402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.66003177570482, "step_time": 0.3401101665496826} +{"epoch": 0, "iter": 5696, "iter_tflops": 57.8935909852636, "iter_time": 0.35636230468749996, "loss": 0.01098472997546196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.16823788289347, "step_time": 0.32660549354553225} +{"epoch": 0, "iter": 5697, "iter_tflops": 27.786074746500812, "iter_time": 0.742497589111328, "loss": 0.2696417570114136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.287851138172886, "step_time": 0.7044249649047852} +{"epoch": 0, "iter": 5698, "iter_tflops": 12.1041265879908, "iter_time": 1.7044677581787109, "loss": 0.3208434283733368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.824356315659276, "step_time": 1.3917024841308594} +{"epoch": 0, "iter": 5699, "iter_tflops": 45.71959344549343, "iter_time": 0.45125277709960937, "loss": 0.34486907720565796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.321869415882006, "step_time": 0.40998265266418465} +{"epoch": 0, "iter": 5700, "iter_tflops": 49.61089951641652, "iter_time": 0.41585808181762696, "loss": 0.3487962782382965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.638427797039235, "step_time": 0.3846327037811279} +{"epoch": 0, "iter": 5701, "iter_tflops": 43.67904108553873, "iter_time": 0.4723339385986328, "loss": 0.48607337474823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62856472951468, "step_time": 0.43316639137268065} +{"epoch": 0, "iter": 5702, "iter_tflops": 38.65172138091745, "iter_time": 0.5337690734863282, "loss": 0.7719219326972961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.15108914101053, "step_time": 0.47811292648315434} +{"epoch": 0, "iter": 5703, "iter_tflops": 37.06351349276136, "iter_time": 0.5566416015625, "loss": 0.6910943388938904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71074023998981, "step_time": 0.5067727432250977} +{"epoch": 0, "iter": 5704, "iter_tflops": 39.385967559018106, "iter_time": 0.5238183746337891, "loss": 0.5413533449172974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81602740650144, "step_time": 0.4818544540405274} +{"epoch": 0, "iter": 5705, "iter_tflops": 19.87802086515152, "iter_time": 1.0378846893310547, "loss": 0.2082446813583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.945113287294237, "step_time": 0.9850074920654298} +{"epoch": 0, "iter": 5706, "iter_tflops": 17.88401626620899, "iter_time": 1.1536051635742186, "loss": 0.26588174700737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.589127083909375, "step_time": 0.9556242561340333} +{"epoch": 0, "iter": 5707, "iter_tflops": 48.344329596758755, "iter_time": 0.42675312042236324, "loss": 0.1827913224697113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.428415283104805, "step_time": 0.3935097675323487} +{"epoch": 0, "iter": 5708, "iter_tflops": 47.61941780826839, "iter_time": 0.4332495956420898, "loss": 0.17660193145275116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.833988918204135, "step_time": 0.3980224933624268} +{"epoch": 0, "iter": 5709, "iter_tflops": 36.95993719915137, "iter_time": 0.558201530456543, "loss": 0.4543096125125885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.720213494805655, "step_time": 0.5194104385375976} +{"epoch": 0, "iter": 5710, "iter_tflops": 9.735955669616354, "iter_time": 2.1190619812011713, "loss": 0.556954026222229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.241675216504579, "step_time": 1.5580425567626952} +{"epoch": 0, "iter": 5711, "iter_tflops": 11.29320804332921, "iter_time": 1.8268585357666016, "loss": 0.5818765759468079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.142021970779355, "step_time": 1.569856872558594} +{"epoch": 0, "iter": 5712, "iter_tflops": 15.851601888349576, "iter_time": 1.3015147399902345, "loss": 0.7002972960472107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.004748331065482, "step_time": 0.9822109355926514} +{"epoch": 0, "iter": 5713, "iter_tflops": 22.23364558282145, "iter_time": 0.7829668960571289, "loss": 0.4559369385242462, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 24.375977698498403, "step_time": 0.7141542663574219} +{"epoch": 0, "iter": 5714, "iter_tflops": 26.47382896557512, "iter_time": 0.6575629272460938, "loss": 0.5119259357452393, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 28.508612749794562, "step_time": 0.6106297988891602} +{"epoch": 0, "iter": 5715, "iter_tflops": 25.825498683313835, "iter_time": 0.6740705642700195, "loss": 0.47269803285598755, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 27.70980431858417, "step_time": 0.6282328186035157} +{"epoch": 0, "iter": 5716, "iter_tflops": 25.524043118903055, "iter_time": 0.6820317764282227, "loss": 0.39768728613853455, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 27.356057425368757, "step_time": 0.6363566284179688} +{"epoch": 0, "iter": 5717, "iter_tflops": 38.51051834510639, "iter_time": 0.5357261962890625, "loss": 0.032070327550172806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.20468287251695, "step_time": 0.4775198459625244} +{"epoch": 0, "iter": 5718, "iter_tflops": 40.99905076334639, "iter_time": 0.5032090530395508, "loss": 0.05320851504802704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1572574928568, "step_time": 0.4568721542358399} +{"epoch": 0, "iter": 5719, "iter_tflops": 44.20693944728246, "iter_time": 0.4666935501098633, "loss": 0.06060638651251793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68049948628967, "step_time": 0.4238061180114746} +{"epoch": 0, "iter": 5720, "iter_tflops": 41.2940169849183, "iter_time": 0.49961459350585935, "loss": 0.0485420897603035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.35642482661299, "step_time": 0.4548659553527832} +{"epoch": 0, "iter": 5721, "iter_tflops": 20.929789642041612, "iter_time": 0.9857286605834961, "loss": 0.023257426917552948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.477669040265056, "step_time": 0.9178484420776367} +{"epoch": 0, "iter": 5722, "iter_tflops": 13.345476981166101, "iter_time": 1.5459240264892582, "loss": 0.0550839938223362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.027014748899216, "step_time": 1.287269889831543} +{"epoch": 0, "iter": 5723, "iter_tflops": 41.454737495026116, "iter_time": 0.49767758178710936, "loss": 0.024658048525452614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54044483501534, "step_time": 0.4530279312133789} +{"epoch": 0, "iter": 5724, "iter_tflops": 40.94978060915427, "iter_time": 0.5038145065307617, "loss": 0.04596661031246185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.10430527540531, "step_time": 0.457408519744873} +{"epoch": 0, "iter": 5725, "iter_tflops": 33.54636242201187, "iter_time": 0.6150024032592774, "loss": 0.11117220669984818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.21391398144021, "step_time": 0.5543919277191163} +{"epoch": 0, "iter": 5726, "iter_tflops": 40.15758802614948, "iter_time": 0.513753303527832, "loss": 0.09832526743412018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.124169377892954, "step_time": 0.4675689945220947} +{"epoch": 0, "iter": 5727, "iter_tflops": 37.32411063080664, "iter_time": 0.552755126953125, "loss": 0.15766386687755585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.98729878015938, "step_time": 0.5033533344268799} +{"epoch": 0, "iter": 5728, "iter_tflops": 42.37989565835057, "iter_time": 0.4868132209777831, "loss": 0.17893344163894653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66676923713289, "step_time": 0.4420938892364502} +{"epoch": 0, "iter": 5729, "iter_tflops": 26.27717314518163, "iter_time": 0.785133674621582, "loss": 0.4142265021800995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.87762946635189, "step_time": 0.7400591049194336} +{"epoch": 0, "iter": 5730, "iter_tflops": 18.716677762626095, "iter_time": 1.102283950805664, "loss": 0.45891138911247253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.723065550899616, "step_time": 0.9079361877441406} +{"epoch": 0, "iter": 5731, "iter_tflops": 36.53002337826981, "iter_time": 0.5647708816528321, "loss": 0.3614528477191925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.00710481765788, "step_time": 0.5156857414245605} +{"epoch": 0, "iter": 5732, "iter_tflops": 39.50434263053207, "iter_time": 0.5222487487792968, "loss": 0.3894451856613159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.020480624741566, "step_time": 0.47956445884704585} +{"epoch": 0, "iter": 5733, "iter_tflops": 14.180620724878493, "iter_time": 0.7591970443725586, "loss": 0.014262937009334564, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 15.710730614935166, "step_time": 0.6852568225860596} +{"epoch": 0, "iter": 5734, "iter_tflops": 22.691710752212934, "iter_time": 0.47444132614135737, "loss": 0.008840364404022694, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 25.18619086123233, "step_time": 0.42745190811157224} +{"epoch": 0, "iter": 5735, "iter_tflops": 22.830144213481137, "iter_time": 0.47156449127197264, "loss": 0.005545516964048147, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 25.36993721523832, "step_time": 0.42435601043701165} +{"epoch": 0, "iter": 5736, "iter_tflops": 23.898255545113496, "iter_time": 0.4504883346557617, "loss": 0.008679486811161041, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 26.47948281534394, "step_time": 0.4065746078491211} +{"epoch": 0, "iter": 5737, "iter_tflops": 32.43640137808227, "iter_time": 0.6360475463867187, "loss": 0.168526753783226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.3415863627644, "step_time": 0.5837625198364258} +{"epoch": 0, "iter": 5738, "iter_tflops": 7.929251210508022, "iter_time": 2.601896820068359, "loss": 0.16256864368915558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.97471482844935, "step_time": 2.2988021240234375} +{"epoch": 0, "iter": 5739, "iter_tflops": 14.305924768274261, "iter_time": 1.442136306762695, "loss": 0.11285721510648727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.24706249343518, "step_time": 1.19620912361145} +{"epoch": 0, "iter": 5740, "iter_tflops": 46.38073328012604, "iter_time": 0.4448203392028809, "loss": 0.07560567557811737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.35123279125618, "step_time": 0.4097435626983642} +{"epoch": 0, "iter": 5741, "iter_tflops": 19.984848027154463, "iter_time": 0.7582204055786134, "loss": 0.4087296426296234, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 21.12423950808481, "step_time": 0.7173237915039062} +{"epoch": 0, "iter": 5742, "iter_tflops": 11.703269744144777, "iter_time": 1.294759490966797, "loss": 0.31832775473594666, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.401724215309335, "step_time": 1.052160099029541} +{"epoch": 0, "iter": 5743, "iter_tflops": 26.37623668925781, "iter_time": 0.5744913406372071, "loss": 0.39393702149391174, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.169421341043346, "step_time": 0.5379208679199219} +{"epoch": 0, "iter": 5744, "iter_tflops": 28.64742901401392, "iter_time": 0.5289451828002929, "loss": 0.4142187535762787, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 30.361967282121796, "step_time": 0.49907568359375} +{"epoch": 0, "iter": 5745, "iter_tflops": 41.73394019257826, "iter_time": 0.494348087310791, "loss": 0.15348437428474426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61861895241835, "step_time": 0.45225160217285154} +{"epoch": 0, "iter": 5746, "iter_tflops": 11.661649239942257, "iter_time": 1.7691402893066406, "loss": 0.16895443201065063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.832051109384048, "step_time": 1.4915426025390623} +{"epoch": 0, "iter": 5747, "iter_tflops": 12.264867013245523, "iter_time": 1.682129409790039, "loss": 0.17563045024871826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.434191460040962, "step_time": 1.4293210372924803} +{"epoch": 0, "iter": 5748, "iter_tflops": 34.882864031912796, "iter_time": 0.591439208984375, "loss": 0.19902947545051575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56721665642848, "step_time": 0.521418872833252} +{"epoch": 0, "iter": 5749, "iter_tflops": 14.20413374583799, "iter_time": 1.10423486328125, "loss": 0.29215607047080994, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.093774475789322, "step_time": 1.039150260925293} +{"epoch": 0, "iter": 5750, "iter_tflops": 13.89405820085643, "iter_time": 1.1288782196044922, "loss": 0.4377899765968323, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.50575866118529, "step_time": 1.0115402946472167} +{"epoch": 0, "iter": 5751, "iter_tflops": 27.969735925314097, "iter_time": 0.5607739639282227, "loss": 0.33580896258354187, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.8035989734873, "step_time": 0.5262686462402344} +{"epoch": 0, "iter": 5752, "iter_tflops": 26.587051803778483, "iter_time": 0.5899375305175781, "loss": 0.32050803303718567, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.386298325955625, "step_time": 0.552544734954834} +{"epoch": 0, "iter": 5753, "iter_tflops": 24.800363866768663, "iter_time": 0.8318867263793946, "loss": 0.7730498313903809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.077683720884977, "step_time": 0.7911398010253906} +{"epoch": 0, "iter": 5754, "iter_tflops": 17.57101798802517, "iter_time": 1.1741547088623046, "loss": 0.8867568373680115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.933449869941686, "step_time": 0.9406223659515383} +{"epoch": 0, "iter": 5755, "iter_tflops": 37.17756781330867, "iter_time": 0.5549339218139648, "loss": 1.0586860179901123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.459335206199796, "step_time": 0.5099217128753663} +{"epoch": 0, "iter": 5756, "iter_tflops": 37.18439876213464, "iter_time": 0.5548319778442383, "loss": 0.8268057107925415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17495923571559, "step_time": 0.5135311622619629} +{"epoch": 0, "iter": 5757, "iter_tflops": 20.919080013195703, "iter_time": 0.9090295791625977, "loss": 0.07889901846647263, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 22.09097653426837, "step_time": 0.8608067855834962} +{"epoch": 0, "iter": 5758, "iter_tflops": 18.28922541930614, "iter_time": 1.0397412719726562, "loss": 0.08840500563383102, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 25.043570504937122, "step_time": 0.759319143295288} +{"epoch": 0, "iter": 5759, "iter_tflops": 45.88567294541105, "iter_time": 0.41442265701293945, "loss": 0.12616758048534393, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 50.31453189936319, "step_time": 0.37794374275207515} +{"epoch": 0, "iter": 5760, "iter_tflops": 51.57405426141509, "iter_time": 0.36871374130249024, "loss": 0.11144537478685379, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 56.21685806796091, "step_time": 0.3382626342773437} +{"epoch": 0, "iter": 5761, "iter_tflops": 46.160560197931076, "iter_time": 0.44694200897216796, "loss": 0.5280887484550476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40162394346685, "step_time": 0.4093339042663574} +{"epoch": 0, "iter": 5762, "iter_tflops": 41.06374940874245, "iter_time": 0.5024162139892578, "loss": 0.39930421113967896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64766598765298, "step_time": 0.46208671951293945} +{"epoch": 0, "iter": 5763, "iter_tflops": 50.09599263842565, "iter_time": 0.41183121490478514, "loss": 0.4345920979976654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51693833627385, "step_time": 0.3784345588684082} +{"epoch": 0, "iter": 5764, "iter_tflops": 46.57663541934916, "iter_time": 0.4429494171142579, "loss": 0.37715598940849304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96498085469342, "step_time": 0.41291106605529787} +{"epoch": 0, "iter": 5765, "iter_tflops": 23.34396291968321, "iter_time": 0.8837871093749999, "loss": 0.048936568200588226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.58452154406932, "step_time": 0.8391903610229493} +{"epoch": 0, "iter": 5766, "iter_tflops": 21.92201151254426, "iter_time": 0.9411131591796875, "loss": 0.09263107925653458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.57286101549123, "step_time": 0.7763971481323242} +{"epoch": 0, "iter": 5767, "iter_tflops": 40.11265727142534, "iter_time": 0.5143287658691407, "loss": 0.09562122821807861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.238479468423, "step_time": 0.4663608188629151} +{"epoch": 0, "iter": 5768, "iter_tflops": 39.783189172286875, "iter_time": 0.5185882263183593, "loss": 0.08457542210817337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.040564085992536, "step_time": 0.4684566135406494} +{"epoch": 0, "iter": 5769, "iter_tflops": 22.288569315839805, "iter_time": 0.925635612487793, "loss": 0.004124268889427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.91580334617284, "step_time": 0.8626552581787109} +{"epoch": 0, "iter": 5770, "iter_tflops": 27.198481598702887, "iter_time": 0.7585384292602539, "loss": 0.01714681275188923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.676656746269025, "step_time": 0.6725339622497558} +{"epoch": 0, "iter": 5771, "iter_tflops": 54.25965329356924, "iter_time": 0.3802289962768555, "loss": 0.013116316869854927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.20632030771808, "step_time": 0.34267321777343746} +{"epoch": 0, "iter": 5772, "iter_tflops": 54.168156024492504, "iter_time": 0.3808712539672851, "loss": 0.0006766668520867825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.21042568197396, "step_time": 0.3484368381500244} +{"epoch": 0, "iter": 5773, "iter_tflops": 26.832517568429083, "iter_time": 0.7688840026855468, "loss": 0.7724006772041321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.268466604727745, "step_time": 0.7298271179199218} +{"epoch": 0, "iter": 5774, "iter_tflops": 18.666342476922434, "iter_time": 1.10525634765625, "loss": 0.6330420970916748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.275575470291077, "step_time": 0.9697078952789308} +{"epoch": 0, "iter": 5775, "iter_tflops": 39.560734959063126, "iter_time": 0.5215043029785156, "loss": 0.8664867877960205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.309100255698006, "step_time": 0.47636855506896975} +{"epoch": 0, "iter": 5776, "iter_tflops": 43.1974748941248, "iter_time": 0.4775995254516602, "loss": 0.6806967258453369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.15233914383101, "step_time": 0.4375412521362304} +{"epoch": 0, "iter": 5777, "iter_tflops": 16.883249402641972, "iter_time": 0.6066087951660156, "loss": 0.02328183874487877, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 18.888383175079333, "step_time": 0.5422130355834961} +{"epoch": 0, "iter": 5778, "iter_tflops": 19.16013572730669, "iter_time": 0.5345227050781249, "loss": 0.044342104345560074, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 21.394705186080895, "step_time": 0.4786944942474365} +{"epoch": 0, "iter": 5779, "iter_tflops": 21.616375335508582, "iter_time": 0.4737856101989747, "loss": 0.04172345623373985, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 23.88884324248869, "step_time": 0.4287159271240234} +{"epoch": 0, "iter": 5780, "iter_tflops": 25.464450948865558, "iter_time": 0.40218921661376955, "loss": 0.023257331922650337, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 28.055531023027424, "step_time": 0.3650448665618896} +{"epoch": 0, "iter": 5781, "iter_tflops": 23.115206081609394, "iter_time": 0.8925334014892577, "loss": 1.0401701927185059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.54911633189452, "step_time": 0.8404006576538086} +{"epoch": 0, "iter": 5782, "iter_tflops": 19.939813527831223, "iter_time": 1.0346683273315431, "loss": 0.9308896660804749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.382024871105603, "step_time": 0.8461599731445312} +{"epoch": 0, "iter": 5783, "iter_tflops": 43.526448323755375, "iter_time": 0.4739898223876953, "loss": 0.9449926018714905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56548294985554, "step_time": 0.44305550384521486} +{"epoch": 0, "iter": 5784, "iter_tflops": 41.87333413350885, "iter_time": 0.4927024307250976, "loss": 0.848705530166626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7033911229333, "step_time": 0.4615107040405274} +{"epoch": 0, "iter": 5785, "iter_tflops": 41.56954035036156, "iter_time": 0.4963031425476074, "loss": 0.04930463805794716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.133835936226916, "step_time": 0.45710924148559573} +{"epoch": 0, "iter": 5786, "iter_tflops": 42.28752562600878, "iter_time": 0.4878765830993652, "loss": 0.04903492331504822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.37812206476425, "step_time": 0.3794006252288818} +{"epoch": 0, "iter": 5787, "iter_tflops": 58.41762672987055, "iter_time": 0.3531655540466309, "loss": 0.04635557159781456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.86929444877956, "step_time": 0.3230205326080322} +{"epoch": 0, "iter": 5788, "iter_tflops": 52.80474244214661, "iter_time": 0.39070531463623054, "loss": 0.060372985899448395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.57960150582041, "step_time": 0.3583055973052978} +{"epoch": 0, "iter": 5789, "iter_tflops": 37.08702935271043, "iter_time": 0.5562886505126954, "loss": 0.5940819382667542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.025800155219585, "step_time": 0.5154448738098144} +{"epoch": 0, "iter": 5790, "iter_tflops": 43.83093240114207, "iter_time": 0.4706971168518067, "loss": 0.8708282113075256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.024615443332166, "step_time": 0.42959414291381837} +{"epoch": 0, "iter": 5791, "iter_tflops": 48.33581914644428, "iter_time": 0.42682825851440426, "loss": 0.7287238836288452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44214067041666, "step_time": 0.39340677642822264} +{"epoch": 0, "iter": 5792, "iter_tflops": 49.58659671824566, "iter_time": 0.41606189727783205, "loss": 0.8354582786560059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.502410985252205, "step_time": 0.385610538482666} +{"epoch": 0, "iter": 5793, "iter_tflops": 34.986806110599396, "iter_time": 0.5896821060180665, "loss": 0.33338332176208496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.30622699114606, "step_time": 0.5530201034545899} +{"epoch": 0, "iter": 5794, "iter_tflops": 24.55918744304306, "iter_time": 0.8400560302734376, "loss": 0.3481709957122803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.434091712751346, "step_time": 0.5822385311126709} +{"epoch": 0, "iter": 5795, "iter_tflops": 39.239685288934425, "iter_time": 0.5257711257934571, "loss": 0.36955827474594116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55173120719755, "step_time": 0.4848473358154297} +{"epoch": 0, "iter": 5796, "iter_tflops": 36.58053842285575, "iter_time": 0.5639909744262696, "loss": 0.40217503905296326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.91862317118088, "step_time": 0.516828784942627} +{"epoch": 0, "iter": 5797, "iter_tflops": 11.665174095788538, "iter_time": 1.1940480346679687, "loss": 0.0024388248566538095, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 12.36114254218588, "step_time": 1.1268196411132811} +{"epoch": 0, "iter": 5798, "iter_tflops": 19.56104683728609, "iter_time": 0.7120671157836914, "loss": 0.00843505933880806, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 28.714464265891408, "step_time": 0.4850788116455078} +{"epoch": 0, "iter": 5799, "iter_tflops": 37.69149184930137, "iter_time": 0.36954701232910153, "loss": 0.007699275854974985, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 41.54492454649857, "step_time": 0.3352702732086182} +{"epoch": 0, "iter": 5800, "iter_tflops": 36.40731045512383, "iter_time": 0.38258190536499026, "loss": 0.015120320953428745, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 40.08797300505579, "step_time": 0.34745528793334957} +{"epoch": 0, "iter": 5801, "iter_tflops": 35.45178191073046, "iter_time": 0.581947998046875, "loss": 0.7112911343574524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.193981378117094, "step_time": 0.540166088104248} +{"epoch": 0, "iter": 5802, "iter_tflops": 40.75243368379493, "iter_time": 0.5062542686462402, "loss": 0.8406822085380554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.190358614627854, "step_time": 0.46686865997314453} +{"epoch": 0, "iter": 5803, "iter_tflops": 48.33480744167564, "iter_time": 0.42683719253540037, "loss": 0.8007067441940308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10742738946531, "step_time": 0.39593383407592775} +{"epoch": 0, "iter": 5804, "iter_tflops": 41.451340385660565, "iter_time": 0.4977183685302734, "loss": 0.7641920447349548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73269255931737, "step_time": 0.4612083988189698} +{"epoch": 0, "iter": 5805, "iter_tflops": 2.0816268372434243, "iter_time": 0.7151706085205078, "loss": 1.0300540924072266, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.20838697680405, "step_time": 0.6741202278137207} +{"epoch": 0, "iter": 5806, "iter_tflops": 0.8329306151258372, "iter_time": 1.7873257446289064, "loss": 0.9305254220962524, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 1.0805227095371057, "step_time": 1.3777760696411134} +{"epoch": 0, "iter": 5807, "iter_tflops": 0.7772918805232464, "iter_time": 1.9152629394531249, "loss": 0.9427260160446167, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 0.8953569787664862, "step_time": 1.6627092514038087} +{"epoch": 0, "iter": 5808, "iter_tflops": 1.4433905689695854, "iter_time": 1.0314036712646484, "loss": 1.1008657217025757, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 1.7778165852284389, "step_time": 0.8373857822418214} +{"epoch": 0, "iter": 5809, "iter_tflops": 16.251356509702646, "iter_time": 0.9651317214965821, "loss": 0.5025508999824524, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 17.013616256796244, "step_time": 0.921890998840332} +{"epoch": 0, "iter": 5810, "iter_tflops": 11.747580715719748, "iter_time": 1.3351429595947266, "loss": 0.3944953978061676, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.935519869445695, "step_time": 1.1255195236206055} +{"epoch": 0, "iter": 5811, "iter_tflops": 27.20269209295557, "iter_time": 0.5765863037109376, "loss": 0.46192413568496704, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.914761933989478, "step_time": 0.5424460945129395} +{"epoch": 0, "iter": 5812, "iter_tflops": 26.295300688440353, "iter_time": 0.5964829940795898, "loss": 0.5141177177429199, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.03287282957346, "step_time": 0.5595109634399413} +{"epoch": 0, "iter": 5813, "iter_tflops": 22.905060146281354, "iter_time": 0.9007220840454102, "loss": 0.18406763672828674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.01068733033487, "step_time": 0.8592462692260743} +{"epoch": 0, "iter": 5814, "iter_tflops": 13.918215418507426, "iter_time": 1.482308822631836, "loss": 0.2743622362613678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.70111683660842, "step_time": 1.103201145172119} +{"epoch": 0, "iter": 5815, "iter_tflops": 38.89324390159585, "iter_time": 0.5304544296264648, "loss": 0.2607326805591583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48528548373032, "step_time": 0.474438497543335} +{"epoch": 0, "iter": 5816, "iter_tflops": 39.391632070440856, "iter_time": 0.523743049621582, "loss": 0.2718678116798401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21015339645604, "step_time": 0.4774593906402587} +{"epoch": 0, "iter": 5817, "iter_tflops": 17.746749533358724, "iter_time": 1.1625280151367188, "loss": 0.30604034662246704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.80365268001722, "step_time": 1.097185417175293} +{"epoch": 0, "iter": 5818, "iter_tflops": 18.460888666781354, "iter_time": 1.117556900024414, "loss": 0.23134079575538635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.205621477706934, "step_time": 0.8523265361785889} +{"epoch": 0, "iter": 5819, "iter_tflops": 42.88835635280352, "iter_time": 0.4810418319702149, "loss": 0.2729629874229431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81280890105781, "step_time": 0.4407147102355957} +{"epoch": 0, "iter": 5820, "iter_tflops": 44.99611629365171, "iter_time": 0.45850831604003905, "loss": 0.32109779119491577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17580801450157, "step_time": 0.4195374584197998} +{"epoch": 0, "iter": 5821, "iter_tflops": 22.14775149926397, "iter_time": 0.9315209045410157, "loss": 0.8145931959152222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.471122016030893, "step_time": 0.8789990310668945} +{"epoch": 0, "iter": 5822, "iter_tflops": 17.32962535652199, "iter_time": 1.1905100708007812, "loss": 0.8266822695732117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.998520600186797, "step_time": 0.9825022392272948} +{"epoch": 0, "iter": 5823, "iter_tflops": 37.94228352786399, "iter_time": 0.5437493896484374, "loss": 0.902353048324585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.49574431272887, "step_time": 0.49718576812744136} +{"epoch": 0, "iter": 5824, "iter_tflops": 38.05896815837192, "iter_time": 0.5420823135375976, "loss": 0.6888712048530579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.211983811126984, "step_time": 0.5006090850830078} +{"epoch": 0, "iter": 5825, "iter_tflops": 20.206350539789085, "iter_time": 1.0210202713012697, "loss": 0.12928389012813568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.764667332228232, "step_time": 0.9479167861938476} +{"epoch": 0, "iter": 5826, "iter_tflops": 15.904567470565546, "iter_time": 1.297180419921875, "loss": 0.1553587168455124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.65218044203815, "step_time": 1.1060955352783204} +{"epoch": 0, "iter": 5827, "iter_tflops": 48.76593560548631, "iter_time": 0.42306362533569336, "loss": 0.22454065084457397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.139471752264946, "step_time": 0.388244234085083} +{"epoch": 0, "iter": 5828, "iter_tflops": 53.917978251924325, "iter_time": 0.38263848495483394, "loss": 0.14227555692195892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.49590097505464, "step_time": 0.35269297790527343} +{"epoch": 0, "iter": 5829, "iter_tflops": 41.43944218553374, "iter_time": 0.4978612747192383, "loss": 0.23470439016819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.988437261975314, "step_time": 0.45858657836914063} +{"epoch": 0, "iter": 5830, "iter_tflops": 43.31423908266075, "iter_time": 0.4763120384216308, "loss": 0.1314188688993454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.803233398931965, "step_time": 0.44080487632751464} +{"epoch": 0, "iter": 5831, "iter_tflops": 47.338378004028876, "iter_time": 0.43582172393798824, "loss": 0.20509636402130127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.29013996410669, "step_time": 0.40224287796020514} +{"epoch": 0, "iter": 5832, "iter_tflops": 49.991225955873496, "iter_time": 0.4126942901611328, "loss": 0.1416584998369217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2199685831685, "step_time": 0.38050729370117187} +{"epoch": 0, "iter": 5833, "iter_tflops": 23.40606480787259, "iter_time": 0.44100305557250974, "loss": 0.09388627856969833, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 25.886063181666245, "step_time": 0.39875302886962893} +{"epoch": 0, "iter": 5834, "iter_tflops": 18.02070582395958, "iter_time": 0.5727936630249023, "loss": 0.1094709038734436, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 20.394461907205486, "step_time": 0.5061249542236328} +{"epoch": 0, "iter": 5835, "iter_tflops": 21.02857892154688, "iter_time": 0.4908627510070801, "loss": 0.1438601016998291, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 23.06069969517746, "step_time": 0.4476076717376709} +{"epoch": 0, "iter": 5836, "iter_tflops": 19.87796883750181, "iter_time": 0.5192756958007813, "loss": 0.10772839188575745, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 21.774252593908713, "step_time": 0.4740528316497803} +{"epoch": 0, "iter": 5837, "iter_tflops": 24.59239889339968, "iter_time": 0.8389215545654296, "loss": 0.0325491726398468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.232637786705244, "step_time": 0.7864666023254394} +{"epoch": 0, "iter": 5838, "iter_tflops": 19.454844571040567, "iter_time": 1.060460464477539, "loss": 0.05229989066720009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.860318294246298, "step_time": 0.9437691268920899} +{"epoch": 0, "iter": 5839, "iter_tflops": 49.12754515572642, "iter_time": 0.4199496116638183, "loss": 0.024464428424835205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.587472237110156, "step_time": 0.3849984455108643} +{"epoch": 0, "iter": 5840, "iter_tflops": 58.43057445339602, "iter_time": 0.35308729553222656, "loss": 0.04021763429045677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.04910384477472, "step_time": 0.3221136951446533} +{"epoch": 0, "iter": 5841, "iter_tflops": 26.786475174699632, "iter_time": 0.7702056121826172, "loss": 0.8351847529411316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.28633112993857, "step_time": 0.7293661880493164} +{"epoch": 0, "iter": 5842, "iter_tflops": 17.416775681909165, "iter_time": 1.184552978515625, "loss": 0.7451832890510559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.67660635339454, "step_time": 0.9977988243103028} +{"epoch": 0, "iter": 5843, "iter_tflops": 34.23374776433888, "iter_time": 0.6026536636352539, "loss": 0.8949428796768188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.176168141647395, "step_time": 0.5549548149108887} +{"epoch": 0, "iter": 5844, "iter_tflops": 36.94113303993989, "iter_time": 0.5584856719970703, "loss": 0.7201191186904907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19281995939456, "step_time": 0.5133029613494873} +{"epoch": 0, "iter": 5845, "iter_tflops": 19.150907708690227, "iter_time": 1.0772906341552735, "loss": 0.565984308719635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66664347329943, "step_time": 0.9982798385620117} +{"epoch": 0, "iter": 5846, "iter_tflops": 14.60812608070477, "iter_time": 1.4123025360107422, "loss": 0.5741938352584839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6194410553648, "step_time": 1.1080404319763184} +{"epoch": 0, "iter": 5847, "iter_tflops": 40.23916926112626, "iter_time": 0.5127117156982421, "loss": 0.5373215079307556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25545730415791, "step_time": 0.46618190765380857} +{"epoch": 0, "iter": 5848, "iter_tflops": 42.937026876383776, "iter_time": 0.48049655532836916, "loss": 0.5449874997138977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83284701678516, "step_time": 0.44052614402771} +{"epoch": 0, "iter": 5849, "iter_tflops": 21.289817740422183, "iter_time": 0.9690591888427734, "loss": 0.024191027507185936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.62655462093395, "step_time": 0.9118088836669922} +{"epoch": 0, "iter": 5850, "iter_tflops": 28.817043553804194, "iter_time": 0.7159337310791015, "loss": 0.03634720668196678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.64036099764292, "step_time": 0.6320730800628661} +{"epoch": 0, "iter": 5851, "iter_tflops": 51.1710662571278, "iter_time": 0.4031788864135742, "loss": 0.030437227338552475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.79539696854297, "step_time": 0.36976336097717283} +{"epoch": 0, "iter": 5852, "iter_tflops": 56.45692435092064, "iter_time": 0.3654307022094727, "loss": 0.0632203072309494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.79141946953352, "step_time": 0.3338828220367432} +{"epoch": 0, "iter": 5853, "iter_tflops": 33.467172988368354, "iter_time": 0.6164576110839844, "loss": 0.09917882829904556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83082312306906, "step_time": 0.5757917823791504} +{"epoch": 0, "iter": 5854, "iter_tflops": 31.944564885232776, "iter_time": 0.6458404922485352, "loss": 0.12460324168205261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.36268387965782, "step_time": 0.4870110111236572} +{"epoch": 0, "iter": 5855, "iter_tflops": 54.555403709075556, "iter_time": 0.3781677360534668, "loss": 0.15457041561603546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.56393363834111, "step_time": 0.3463688888549805} +{"epoch": 0, "iter": 5856, "iter_tflops": 51.31868204319216, "iter_time": 0.40201916122436526, "loss": 0.18753552436828613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.74043156025185, "step_time": 0.3701279830932617} +{"epoch": 0, "iter": 5857, "iter_tflops": 30.382976614543548, "iter_time": 0.6790346374511718, "loss": 0.47485414147377014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.28194967190315, "step_time": 0.639090690612793} +{"epoch": 0, "iter": 5858, "iter_tflops": 10.81465696977975, "iter_time": 1.9076974487304688, "loss": 0.6010980606079102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.31441613158769, "step_time": 1.549530471801758} +{"epoch": 0, "iter": 5859, "iter_tflops": 13.8015651697927, "iter_time": 1.4948372344970702, "loss": 0.6802176833152771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.077163098971212, "step_time": 1.2832546005249026} +{"epoch": 0, "iter": 5860, "iter_tflops": 37.320995169997026, "iter_time": 0.5528012695312501, "loss": 0.5913181900978088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8164797003358, "step_time": 0.5054598941802979} +{"epoch": 0, "iter": 5861, "iter_tflops": 12.018554155076103, "iter_time": 1.2607938842773436, "loss": 0.4554043412208557, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 12.852271576988091, "step_time": 1.179007110595703} +{"epoch": 0, "iter": 5862, "iter_tflops": 10.968251353481298, "iter_time": 1.3815255584716797, "loss": 0.4135597050189972, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.981166274957689, "step_time": 1.011464614868164} +{"epoch": 0, "iter": 5863, "iter_tflops": 23.468899015847768, "iter_time": 0.6456595840454102, "loss": 0.41847214102745056, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.27770786349347, "step_time": 0.5994578170776367} +{"epoch": 0, "iter": 5864, "iter_tflops": 22.766923458475585, "iter_time": 0.6655672912597655, "loss": 0.26428818702697754, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.526095696221457, "step_time": 0.6178284454345703} +{"epoch": 0, "iter": 5865, "iter_tflops": 15.74849398175285, "iter_time": 1.3100359649658204, "loss": 0.2893845736980438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.981645278678794, "step_time": 1.2149054565429687} +{"epoch": 0, "iter": 5866, "iter_tflops": 16.954723228828495, "iter_time": 1.2168345794677733, "loss": 0.402490496635437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.670504860613264, "step_time": 0.9980933532714844} +{"epoch": 0, "iter": 5867, "iter_tflops": 48.67807167809919, "iter_time": 0.42382725524902354, "loss": 0.42813217639923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.888106966326355, "step_time": 0.39008946800231936} +{"epoch": 0, "iter": 5868, "iter_tflops": 49.11567973758356, "iter_time": 0.42005106353759764, "loss": 0.4001074433326721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26366147267805, "step_time": 0.3873390026092529} +{"epoch": 0, "iter": 5869, "iter_tflops": 33.26757290010795, "iter_time": 0.6201562576293945, "loss": 0.8493341207504272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.61398205805439, "step_time": 0.5792975769042968} +{"epoch": 0, "iter": 5870, "iter_tflops": 8.530024088590748, "iter_time": 2.4186442260742185, "loss": 0.7963216304779053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.687416070762302, "step_time": 1.9304098739624023} +{"epoch": 0, "iter": 5871, "iter_tflops": 13.144417250801897, "iter_time": 1.5695708007812499, "loss": 0.860744297504425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.183355709660042, "step_time": 1.3587966918945313} +{"epoch": 0, "iter": 5872, "iter_tflops": 24.93021575090234, "iter_time": 0.8275537490844729, "loss": 0.83249431848526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.68519487010978, "step_time": 0.6723468303680419} +{"epoch": 0, "iter": 5873, "iter_tflops": 11.047663730050544, "iter_time": 1.4753601684570314, "loss": 0.3838096857070923, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 11.794914559160041, "step_time": 1.381890724182129} +{"epoch": 0, "iter": 5874, "iter_tflops": 11.45725637449778, "iter_time": 1.4226165924072267, "loss": 0.4534960687160492, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.485700986999108, "step_time": 1.125198085784912} +{"epoch": 0, "iter": 5875, "iter_tflops": 24.17682549546065, "iter_time": 0.674169692993164, "loss": 0.30063748359680176, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.073823629752717, "step_time": 0.6251205520629882} +{"epoch": 0, "iter": 5876, "iter_tflops": 25.513037081321276, "iter_time": 0.6388609466552735, "loss": 0.5413357615470886, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.20105178364973, "step_time": 0.5992151756286621} +{"epoch": 0, "iter": 5877, "iter_tflops": 21.170184426278574, "iter_time": 0.9745353698730469, "loss": 0.015903998166322708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.447456162741375, "step_time": 0.9190838088989258} +{"epoch": 0, "iter": 5878, "iter_tflops": 13.586842207753577, "iter_time": 1.5184612579345702, "loss": 0.0028344716411083937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.138522371639393, "step_time": 1.2783756179809571} +{"epoch": 0, "iter": 5879, "iter_tflops": 56.69659658733345, "iter_time": 0.36388592529296876, "loss": 0.008454223163425922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.81668598210766, "step_time": 0.3284333324432373} +{"epoch": 0, "iter": 5880, "iter_tflops": 60.55413113171661, "iter_time": 0.3407049713134766, "loss": 0.009777980856597424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.54394217832187, "step_time": 0.31003713989257814} +{"epoch": 0, "iter": 5881, "iter_tflops": 46.550637322623615, "iter_time": 0.4431968002319336, "loss": 0.11692395806312561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.10497633732475, "step_time": 0.40370028495788574} +{"epoch": 0, "iter": 5882, "iter_tflops": 48.097477345738454, "iter_time": 0.42894335937499994, "loss": 0.14714321494102478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.23071332926834, "step_time": 0.3949992675781251} +{"epoch": 0, "iter": 5883, "iter_tflops": 48.883426577033546, "iter_time": 0.42204679489135744, "loss": 0.1909467875957489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24980290702928, "step_time": 0.38743980979919435} +{"epoch": 0, "iter": 5884, "iter_tflops": 48.75649280296322, "iter_time": 0.4231455612182617, "loss": 0.11098125576972961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33050935988149, "step_time": 0.38685348701477046} +{"epoch": 0, "iter": 5885, "iter_tflops": 31.82876564651724, "iter_time": 0.6481901855468749, "loss": 0.14198733866214752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.99411644097529, "step_time": 0.6069018898010253} +{"epoch": 0, "iter": 5886, "iter_tflops": 37.27914455672468, "iter_time": 0.5534218597412108, "loss": 0.1052241325378418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18808683271321, "step_time": 0.5008995342254638} +{"epoch": 0, "iter": 5887, "iter_tflops": 44.64813040711835, "iter_time": 0.4620819129943848, "loss": 0.13970564305782318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17806146587045, "step_time": 0.4195182342529296} +{"epoch": 0, "iter": 5888, "iter_tflops": 38.50010196178679, "iter_time": 0.5358711395263671, "loss": 0.1074928566813469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30004954965403, "step_time": 0.487732135772705} +{"epoch": 0, "iter": 5889, "iter_tflops": 19.93690611236745, "iter_time": 1.0348192138671874, "loss": 0.8720728754997253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.02265358515726, "step_time": 0.9813743743896485} +{"epoch": 0, "iter": 5890, "iter_tflops": 15.038065865430195, "iter_time": 1.3719246673583987, "loss": 0.8330143690109253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.314661180765, "step_time": 1.1264796714782714} +{"epoch": 0, "iter": 5891, "iter_tflops": 42.13803720856429, "iter_time": 0.48960736846923825, "loss": 0.6915080547332764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.251933573049, "step_time": 0.4559162864685059} +{"epoch": 0, "iter": 5892, "iter_tflops": 44.80776259141217, "iter_time": 0.46043569946289065, "loss": 1.1112812757492065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.31082684977293, "step_time": 0.4270490665435791} +{"epoch": 0, "iter": 5893, "iter_tflops": 25.349149642290918, "iter_time": 0.8138771438598633, "loss": 0.1954067200422287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.685202273327352, "step_time": 0.7731286163330078} +{"epoch": 0, "iter": 5894, "iter_tflops": 16.489320258404653, "iter_time": 1.2511791381835937, "loss": 0.1157345250248909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.073571643230842, "step_time": 0.894143907546997} +{"epoch": 0, "iter": 5895, "iter_tflops": 39.53287642737209, "iter_time": 0.5218718032836914, "loss": 0.16330425441265106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30190957559562, "step_time": 0.476447660446167} +{"epoch": 0, "iter": 5896, "iter_tflops": 42.13260270840173, "iter_time": 0.4896705207824707, "loss": 0.14800599217414856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35009754137438, "step_time": 0.4451143493652344} +{"epoch": 0, "iter": 5897, "iter_tflops": 36.55007056441547, "iter_time": 0.5644611129760743, "loss": 0.14941905438899994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70999099007979, "step_time": 0.5067820701599122} +{"epoch": 0, "iter": 5898, "iter_tflops": 37.026414065333924, "iter_time": 0.5571993408203125, "loss": 0.1557575911283493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89843576259185, "step_time": 0.4924072494506836} +{"epoch": 0, "iter": 5899, "iter_tflops": 44.77104713311782, "iter_time": 0.46081328964233403, "loss": 0.1425161212682724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.05723546883231, "step_time": 0.42055149078369136} +{"epoch": 0, "iter": 5900, "iter_tflops": 37.715045972021144, "iter_time": 0.5470255432128907, "loss": 0.14418074488639832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.64090971535317, "step_time": 0.4954525165557861} +{"epoch": 0, "iter": 5901, "iter_tflops": 17.308777736226887, "iter_time": 1.1919439849853517, "loss": 0.02715008705854416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.410683875589964, "step_time": 1.1206044082641602} +{"epoch": 0, "iter": 5902, "iter_tflops": 24.390170917275793, "iter_time": 0.8458773651123046, "loss": 0.029549840837717056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.271518925049616, "step_time": 0.7565069465637208} +{"epoch": 0, "iter": 5903, "iter_tflops": 54.82601584601077, "iter_time": 0.37630116271972663, "loss": 0.039987172931432724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.151198394172816, "step_time": 0.3429872398376465} +{"epoch": 0, "iter": 5904, "iter_tflops": 53.80811636583072, "iter_time": 0.3834197311401368, "loss": 0.03915360942482948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.50289584643042, "step_time": 0.3526508083343506} +{"epoch": 0, "iter": 5905, "iter_tflops": 30.281846851353095, "iter_time": 0.6813023529052734, "loss": 0.027287108823657036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.23661296958877, "step_time": 0.6399894905090332} +{"epoch": 0, "iter": 5906, "iter_tflops": 17.954404070810643, "iter_time": 1.1490826110839842, "loss": 0.01668224297463894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.055043923654154, "step_time": 1.0287234268188477} +{"epoch": 0, "iter": 5907, "iter_tflops": 53.63029010492635, "iter_time": 0.38469106674194337, "loss": 0.04971907287836075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.6639764801543, "step_time": 0.35168249320983885} +{"epoch": 0, "iter": 5908, "iter_tflops": 54.49787590289374, "iter_time": 0.3785669288635254, "loss": 0.02778315171599388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.476870335920076, "step_time": 0.34687590980529787} +{"epoch": 0, "iter": 5909, "iter_tflops": 22.198206767294234, "iter_time": 0.9294036102294924, "loss": 0.15302640199661255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.253253351665144, "step_time": 0.8872347106933592} +{"epoch": 0, "iter": 5910, "iter_tflops": 14.145966955022722, "iter_time": 1.4584434967041013, "loss": 0.1316850632429123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.13677106764623, "step_time": 1.2785143585205079} +{"epoch": 0, "iter": 5911, "iter_tflops": 32.43063670258395, "iter_time": 0.6361606063842773, "loss": 0.16838262975215912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.98978224578047, "step_time": 0.49133604431152333} +{"epoch": 0, "iter": 5912, "iter_tflops": 40.697414145070205, "iter_time": 0.5069386825561524, "loss": 0.14279229938983917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.745015636325114, "step_time": 0.46108137893676754} +{"epoch": 0, "iter": 5913, "iter_tflops": 35.06768598057768, "iter_time": 0.5883220672607422, "loss": 0.426485151052475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.9070552953403, "step_time": 0.5302661266326905} +{"epoch": 0, "iter": 5914, "iter_tflops": 36.71135330894256, "iter_time": 0.5619812850952148, "loss": 0.33205848932266235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99342335672201, "step_time": 0.5032781314849853} +{"epoch": 0, "iter": 5915, "iter_tflops": 36.074611052222586, "iter_time": 0.571900650024414, "loss": 0.3474351763725281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.37278808558488, "step_time": 0.5239937152862548} +{"epoch": 0, "iter": 5916, "iter_tflops": 31.888002329781806, "iter_time": 0.6469860763549805, "loss": 0.34509676694869995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.256832989137344, "step_time": 0.6022475433349609} +{"epoch": 0, "iter": 5917, "iter_tflops": 22.459852823852575, "iter_time": 0.9185765228271484, "loss": 0.7405707240104675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.406908355957963, "step_time": 0.8452972908020019} +{"epoch": 0, "iter": 5918, "iter_tflops": 17.983730152098815, "iter_time": 1.1472088012695314, "loss": 0.8303045034408569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.41479140529812, "step_time": 0.8811137008666992} +{"epoch": 0, "iter": 5919, "iter_tflops": 47.12901640833738, "iter_time": 0.43775777816772465, "loss": 0.853007435798645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82465829857132, "step_time": 0.40592685127258293} +{"epoch": 0, "iter": 5920, "iter_tflops": 43.69791328064293, "iter_time": 0.4721299476623535, "loss": 0.72120201587677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.948309178551085, "step_time": 0.43944273757934565} +{"epoch": 0, "iter": 5921, "iter_tflops": 24.876963894701692, "iter_time": 0.8293252182006836, "loss": 0.4243312478065491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.11095384632137, "step_time": 0.7901317443847656} +{"epoch": 0, "iter": 5922, "iter_tflops": 12.763640021458249, "iter_time": 1.6163957519531251, "loss": 0.37365463376045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.144617823328199, "step_time": 1.362272310256958} +{"epoch": 0, "iter": 5923, "iter_tflops": 35.17940735728137, "iter_time": 0.5864536972045898, "loss": 0.5077843070030212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44931958223207, "step_time": 0.5365788974761962} +{"epoch": 0, "iter": 5924, "iter_tflops": 40.70134979612734, "iter_time": 0.5068896636962891, "loss": 0.3710094690322876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52318196347919, "step_time": 0.46337868499755863} +{"epoch": 0, "iter": 5925, "iter_tflops": 25.99472498163048, "iter_time": 0.7936646194458009, "loss": 0.04624423384666443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.00613831352257, "step_time": 0.736663272857666} +{"epoch": 0, "iter": 5926, "iter_tflops": 12.057976523987271, "iter_time": 1.7109913482666013, "loss": 0.07114233821630478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.183442325943478, "step_time": 1.3587889404296876} +{"epoch": 0, "iter": 5927, "iter_tflops": 48.6843316308367, "iter_time": 0.42377275848388674, "loss": 0.040629979223012924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.809244360503676, "step_time": 0.38341169357299804} +{"epoch": 0, "iter": 5928, "iter_tflops": 56.73132448805833, "iter_time": 0.36366317367553713, "loss": 0.05559401586651802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.88894088669839, "step_time": 0.3333567066192627} +{"epoch": 0, "iter": 5929, "iter_tflops": 34.304596545694274, "iter_time": 0.6014090118408203, "loss": 0.04372183606028557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.665799569065136, "step_time": 0.5626794929504394} +{"epoch": 0, "iter": 5930, "iter_tflops": 14.931981243890453, "iter_time": 1.3816715393066406, "loss": 0.030385565012693405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.521916052162425, "step_time": 1.1774450607299807} +{"epoch": 0, "iter": 5931, "iter_tflops": 48.95929338941938, "iter_time": 0.4213927955627441, "loss": 0.042433660477399826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.45608366116778, "step_time": 0.38594472503662103} +{"epoch": 0, "iter": 5932, "iter_tflops": 53.08578685969669, "iter_time": 0.3886368598937988, "loss": 0.051983725279569626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.32019986008493, "step_time": 0.35375553512573243} +{"epoch": 0, "iter": 5933, "iter_tflops": 17.682173764707183, "iter_time": 0.6889187698364257, "loss": 0.007680458016693592, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 18.82302514139987, "step_time": 0.6471638488769531} +{"epoch": 0, "iter": 5934, "iter_tflops": 10.87722552801575, "iter_time": 1.119916229248047, "loss": 0.008036734536290169, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 12.136977459525282, "step_time": 1.0036750450134277} +{"epoch": 0, "iter": 5935, "iter_tflops": 26.277479330453072, "iter_time": 0.46357495880126953, "loss": 0.005748304072767496, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 29.325399000913098, "step_time": 0.4153935432434082} +{"epoch": 0, "iter": 5936, "iter_tflops": 28.18476733574351, "iter_time": 0.4322044334411621, "loss": 0.007704985793679953, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 31.212923855264076, "step_time": 0.3902736396789551} +{"epoch": 0, "iter": 5937, "iter_tflops": 17.390802002839752, "iter_time": 1.1863221435546876, "loss": 0.7283926010131836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.75581429804988, "step_time": 1.09998388671875} +{"epoch": 0, "iter": 5938, "iter_tflops": 18.039488501652333, "iter_time": 1.1436628875732422, "loss": 0.5893855690956116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.01485640484584, "step_time": 0.9371441326141359} +{"epoch": 0, "iter": 5939, "iter_tflops": 39.16169476136042, "iter_time": 0.5268181991577148, "loss": 0.6283610463142395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8034827226235, "step_time": 0.4819956741333008} +{"epoch": 0, "iter": 5940, "iter_tflops": 39.508052055431094, "iter_time": 0.5221997146606445, "loss": 0.6746869087219238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.922003408870474, "step_time": 0.480664737701416} +{"epoch": 0, "iter": 5941, "iter_tflops": 20.465209490725794, "iter_time": 1.0081056594848632, "loss": 0.004869101569056511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.84880994125856, "step_time": 0.9442662353515624} +{"epoch": 0, "iter": 5942, "iter_tflops": 27.199934568699792, "iter_time": 0.7584979095458984, "loss": 0.0073571703396737576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.03305625345628, "step_time": 0.6648102378845215} +{"epoch": 0, "iter": 5943, "iter_tflops": 43.6891296379388, "iter_time": 0.472224868774414, "loss": 0.004961194936186075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66956618260815, "step_time": 0.42390132331848146} +{"epoch": 0, "iter": 5944, "iter_tflops": 44.52290046976963, "iter_time": 0.4633816146850587, "loss": 0.019812799990177155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.173022283756744, "step_time": 0.4195612258911133} +{"epoch": 0, "iter": 5945, "iter_tflops": 25.883041252711582, "iter_time": 0.7970892333984375, "loss": 0.518804132938385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.775017040848475, "step_time": 0.7427931900024414} +{"epoch": 0, "iter": 5946, "iter_tflops": 9.75474563093699, "iter_time": 2.1149801635742187, "loss": 0.5913954377174377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.59524108883628, "step_time": 1.9472037811279297} +{"epoch": 0, "iter": 5947, "iter_tflops": 13.371409923144, "iter_time": 1.5429258117675784, "loss": 0.4649011492729187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.235740846104772, "step_time": 1.2707207946777344} +{"epoch": 0, "iter": 5948, "iter_tflops": 37.93537380646708, "iter_time": 0.5438484306335449, "loss": 0.7544729709625244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95868818847657, "step_time": 0.45889002418518066} +{"epoch": 0, "iter": 5949, "iter_tflops": 20.73007408751903, "iter_time": 0.7132261657714845, "loss": 0.5512011647224426, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.049151333459868, "step_time": 0.6705578384399414} +{"epoch": 0, "iter": 5950, "iter_tflops": 10.139618983042624, "iter_time": 1.4581643829345703, "loss": 0.41133078932762146, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 12.867917576437625, "step_time": 1.1489995307922363} +{"epoch": 0, "iter": 5951, "iter_tflops": 21.997397554576633, "iter_time": 0.6721354751586914, "loss": 0.37394559383392334, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.817852259323658, "step_time": 0.6207625732421874} +{"epoch": 0, "iter": 5952, "iter_tflops": 21.143122271831835, "iter_time": 0.699292709350586, "loss": 0.3556690216064453, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.716230713808173, "step_time": 0.650866397857666} +{"epoch": 0, "iter": 5953, "iter_tflops": 17.20824151409585, "iter_time": 1.1989077148437501, "loss": 0.843632698059082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.48801149176887, "step_time": 1.1159173889160157} +{"epoch": 0, "iter": 5954, "iter_tflops": 15.894420784760698, "iter_time": 1.2980085144042968, "loss": 0.8046153783798218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.477207891197573, "step_time": 1.0075149707794189} +{"epoch": 0, "iter": 5955, "iter_tflops": 40.39708102745821, "iter_time": 0.5107075309753418, "loss": 0.849425196647644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.942974134696605, "step_time": 0.469496976852417} +{"epoch": 0, "iter": 5956, "iter_tflops": 37.349561178238176, "iter_time": 0.5523784713745117, "loss": 0.8481555581092834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.67398384235971, "step_time": 0.5072307052612305} +{"epoch": 0, "iter": 5957, "iter_tflops": 18.967678138993087, "iter_time": 0.7579808578491211, "loss": 0.04313283413648605, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 21.018345315147688, "step_time": 0.6840280113220215} +{"epoch": 0, "iter": 5958, "iter_tflops": 35.078496265099155, "iter_time": 0.4098561363220215, "loss": 0.059898741543293, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 38.57460756521534, "step_time": 0.37270986938476564} +{"epoch": 0, "iter": 5959, "iter_tflops": 39.326904300615574, "iter_time": 0.36558018493652344, "loss": 0.04761430621147156, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 43.103839620150765, "step_time": 0.3335465488433838} +{"epoch": 0, "iter": 5960, "iter_tflops": 33.92407433692076, "iter_time": 0.423803367614746, "loss": 0.03563803806900978, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 37.17712584630573, "step_time": 0.3867199687957763} +{"epoch": 0, "iter": 5961, "iter_tflops": 23.22552092208561, "iter_time": 0.8882941131591796, "loss": 0.2243116796016693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.68923823800395, "step_time": 0.8356310272216796} +{"epoch": 0, "iter": 5962, "iter_tflops": 14.832938955515177, "iter_time": 1.3908972167968752, "loss": 0.3523092269897461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.930646662161177, "step_time": 1.0351442108154296} +{"epoch": 0, "iter": 5963, "iter_tflops": 50.7184180593636, "iter_time": 0.40677714920043945, "loss": 0.3058299124240875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.261601166854526, "step_time": 0.3733350658416748} +{"epoch": 0, "iter": 5964, "iter_tflops": 46.53978844934567, "iter_time": 0.44330011367797856, "loss": 0.2864243984222412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57010380604791, "step_time": 0.40797016334533687} +{"epoch": 0, "iter": 5965, "iter_tflops": 31.881892637679133, "iter_time": 0.6471100616455079, "loss": 0.12355571240186691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.90606526748445, "step_time": 0.6084779624938964} +{"epoch": 0, "iter": 5966, "iter_tflops": 25.90011469956635, "iter_time": 0.7965637893676757, "loss": 0.21969346702098846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.890861011133012, "step_time": 0.6678704586029052} +{"epoch": 0, "iter": 5967, "iter_tflops": 48.32216686327986, "iter_time": 0.4269488487243652, "loss": 0.19162912666797638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38189824174796, "step_time": 0.39385921859741213} +{"epoch": 0, "iter": 5968, "iter_tflops": 45.75206264405442, "iter_time": 0.45093253326416016, "loss": 0.18017923831939697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.56970877835304, "step_time": 0.41620364570617674} +{"epoch": 0, "iter": 5969, "iter_tflops": 29.370683560497593, "iter_time": 0.7024383163452148, "loss": 0.0018961407477036119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.067468700619298, "step_time": 0.6640738487243651} +{"epoch": 0, "iter": 5970, "iter_tflops": 14.832248223301288, "iter_time": 1.3909619903564452, "loss": 0.002911145333200693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.760674891779637, "step_time": 1.0996988983154297} +{"epoch": 0, "iter": 5971, "iter_tflops": 53.60258425732344, "iter_time": 0.3848899040222168, "loss": 0.0021765008568763733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.59452068805489, "step_time": 0.3404778728485108} +{"epoch": 0, "iter": 5972, "iter_tflops": 60.75049503887743, "iter_time": 0.33960371017456054, "loss": 0.0027402190025895834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.43382751922444, "step_time": 0.31055102920532224} +{"epoch": 0, "iter": 5973, "iter_tflops": 47.90471023609986, "iter_time": 0.4306694145202637, "loss": 0.29914310574531555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56607173416229, "step_time": 0.39247927093505863} +{"epoch": 0, "iter": 5974, "iter_tflops": 46.563848024133186, "iter_time": 0.443071060180664, "loss": 0.29636532068252563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75332691383833, "step_time": 0.4064973621368408} +{"epoch": 0, "iter": 5975, "iter_tflops": 49.445690178936005, "iter_time": 0.41724755859374996, "loss": 0.2882106900215149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.504165072854754, "step_time": 0.3855978965759277} +{"epoch": 0, "iter": 5976, "iter_tflops": 49.1704722341527, "iter_time": 0.4195829849243164, "loss": 0.27600547671318054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.34650731723509, "step_time": 0.38673747444152834} +{"epoch": 0, "iter": 5977, "iter_tflops": 30.678136703461615, "iter_time": 0.6725015182495118, "loss": 0.8080319166183472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.54258006360685, "step_time": 0.6339722747802734} +{"epoch": 0, "iter": 5978, "iter_tflops": 15.893222622228054, "iter_time": 1.2981063690185546, "loss": 0.9243006110191345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.165502196694415, "step_time": 1.0764702796936034} +{"epoch": 0, "iter": 5979, "iter_tflops": 35.584983904353436, "iter_time": 0.5797696456909179, "loss": 0.840816080570221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.98825783118283, "step_time": 0.5291617183685302} +{"epoch": 0, "iter": 5980, "iter_tflops": 39.47460009002711, "iter_time": 0.5226422424316406, "loss": 0.6933998465538025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41565442366906, "step_time": 0.47519941329956056} +{"epoch": 0, "iter": 5981, "iter_tflops": 17.326223680272193, "iter_time": 1.1907438049316408, "loss": 0.12381605058908463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.239908965002943, "step_time": 1.1310962982177735} +{"epoch": 0, "iter": 5982, "iter_tflops": 16.21000741229585, "iter_time": 1.272738067626953, "loss": 0.09532836824655533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.714621795625288, "step_time": 1.1024050464630128} +{"epoch": 0, "iter": 5983, "iter_tflops": 52.26253064504124, "iter_time": 0.3947587928771973, "loss": 0.14312386512756348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.04603291037213, "step_time": 0.3616569366455078} +{"epoch": 0, "iter": 5984, "iter_tflops": 55.21764457478447, "iter_time": 0.3736322631835937, "loss": 0.17598028481006622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.8295012060412, "step_time": 0.34483144760131834} +{"epoch": 0, "iter": 5985, "iter_tflops": 19.273409229248823, "iter_time": 1.070443389892578, "loss": 0.4800352454185486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.031533654754373, "step_time": 1.0299308013916015} +{"epoch": 0, "iter": 5986, "iter_tflops": 15.118646109430745, "iter_time": 1.3646125030517577, "loss": 0.3632473349571228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.420277152505836, "step_time": 1.0623480472564697} +{"epoch": 0, "iter": 5987, "iter_tflops": 38.134835491074675, "iter_time": 0.5410038681030274, "loss": 0.3203553259372711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54697215975165, "step_time": 0.49657273292541504} +{"epoch": 0, "iter": 5988, "iter_tflops": 36.979322326699176, "iter_time": 0.5579089126586915, "loss": 0.33641669154167175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.66450501382376, "step_time": 0.5073489398956299} +{"epoch": 0, "iter": 5989, "iter_tflops": 13.24591625154114, "iter_time": 0.9441385879516602, "loss": 0.010506833903491497, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 14.246183649043498, "step_time": 0.8778477783203126} +{"epoch": 0, "iter": 5990, "iter_tflops": 10.776887479636516, "iter_time": 1.160444580078125, "loss": 0.0019754364620894194, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 14.258418789453202, "step_time": 0.877094497680664} +{"epoch": 0, "iter": 5991, "iter_tflops": 25.470777932313887, "iter_time": 0.4909932746887207, "loss": 0.0014164309250190854, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 28.090901553199924, "step_time": 0.44519684219360356} +{"epoch": 0, "iter": 5992, "iter_tflops": 27.74627511797323, "iter_time": 0.4507264709472656, "loss": 0.007742753252387047, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 30.653071111332284, "step_time": 0.40798459053039554} +{"epoch": 0, "iter": 5993, "iter_tflops": 31.001155889617685, "iter_time": 0.6654943313598632, "loss": 0.3041727542877197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.08813131209966, "step_time": 0.6052280578613282} +{"epoch": 0, "iter": 5994, "iter_tflops": 37.19761893308215, "iter_time": 0.5546347885131836, "loss": 0.25405141711235046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27625086859785, "step_time": 0.49982963752746584} +{"epoch": 0, "iter": 5995, "iter_tflops": 36.38153249075199, "iter_time": 0.5670759887695312, "loss": 0.30425575375556946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.888290555479045, "step_time": 0.5172218017578125} +{"epoch": 0, "iter": 5996, "iter_tflops": 42.194095736289455, "iter_time": 0.4889568824768066, "loss": 0.4853253662586212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.182206635435975, "step_time": 0.44673251914978024} +{"epoch": 0, "iter": 5997, "iter_tflops": 18.8079613306636, "iter_time": 1.096934066772461, "loss": 0.8013164401054382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.710445822382884, "step_time": 1.0467086181640626} +{"epoch": 0, "iter": 5998, "iter_tflops": 9.846633441186952, "iter_time": 2.095243377685547, "loss": 0.981870174407959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.087468607387759, "step_time": 1.7068167190551757} +{"epoch": 0, "iter": 5999, "iter_tflops": 10.752710834573396, "iter_time": 1.9186876525878906, "loss": 0.8390468955039978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.29469825376788, "step_time": 1.5518286399841308} +{"epoch": 0, "iter": 6000, "iter_tflops": 4.226514901550669, "iter_time": 4.881348815917969, "loss": 0.8264086842536926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.257478768143849, "step_time": 4.84584765625} +{"epoch": 0, "iter": 6001, "iter_tflops": 4.96825391870091, "iter_time": 3.0746349792480467, "loss": 0.45617038011550903, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 5.0584692162176, "step_time": 3.019800384521484} +{"epoch": 0, "iter": 6002, "iter_tflops": 15.507436540640057, "iter_time": 0.985047866821289, "loss": 0.37167850136756897, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 17.53567008842842, "step_time": 0.8711139755249023} +{"epoch": 0, "iter": 6003, "iter_tflops": 17.519466155318963, "iter_time": 0.871919677734375, "loss": 0.5057553648948669, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 20.317776522323467, "step_time": 0.7518326263427736} +{"epoch": 0, "iter": 6004, "iter_tflops": 24.661190912811765, "iter_time": 0.6194172592163085, "loss": 0.5327070355415344, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.131000504140705, "step_time": 0.5630300025939942} +{"epoch": 0, "iter": 6005, "iter_tflops": 44.51960381296254, "iter_time": 0.4634159278869629, "loss": 0.10149253159761429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.603410231627905, "step_time": 0.4244783115386963} +{"epoch": 0, "iter": 6006, "iter_tflops": 23.562985462461462, "iter_time": 0.8755721359252929, "loss": 0.2996499240398407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.34947428976333, "step_time": 0.7277416610717773} +{"epoch": 0, "iter": 6007, "iter_tflops": 34.78932822483738, "iter_time": 0.5930293731689453, "loss": 0.13358521461486816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.488520608823535, "step_time": 0.5503309593200684} +{"epoch": 0, "iter": 6008, "iter_tflops": 34.07826891244397, "iter_time": 0.6054032135009766, "loss": 0.1351180374622345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6406133791902, "step_time": 0.5339225158691406} +{"epoch": 0, "iter": 6009, "iter_tflops": 18.022105297978726, "iter_time": 1.1447660064697267, "loss": 0.9193934798240662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.651719786566037, "step_time": 1.0498365402221679} +{"epoch": 0, "iter": 6010, "iter_tflops": 23.988626160199573, "iter_time": 0.8600364761352539, "loss": 0.8506351113319397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.526867246359103, "step_time": 0.7777433090209961} +{"epoch": 0, "iter": 6011, "iter_tflops": 24.997533797465756, "iter_time": 0.8253251571655275, "loss": 0.7769275903701782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.87913351794171, "step_time": 0.7143944778442383} +{"epoch": 0, "iter": 6012, "iter_tflops": 26.775193523698363, "iter_time": 0.7705301361083985, "loss": 0.7357971668243408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.296335603522348, "step_time": 0.6592175445556641} +{"epoch": 0, "iter": 6013, "iter_tflops": 9.87510885376447, "iter_time": 2.089201629638672, "loss": 0.1066216379404068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.400967573853963, "step_time": 1.9835744476318358} +{"epoch": 0, "iter": 6014, "iter_tflops": 26.852434310467874, "iter_time": 0.7683137130737304, "loss": 0.08267256617546082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.800600679289825, "step_time": 0.6487642707824708} +{"epoch": 0, "iter": 6015, "iter_tflops": 38.10813685922452, "iter_time": 0.5413828964233398, "loss": 0.11911846697330475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.461384749402974, "step_time": 0.474699405670166} +{"epoch": 0, "iter": 6016, "iter_tflops": 37.38055909964137, "iter_time": 0.55192041015625, "loss": 0.1872635781764984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.102067840421135, "step_time": 0.5144645805358886} +{"epoch": 0, "iter": 6017, "iter_tflops": 4.816014206325538, "iter_time": 4.283852294921875, "loss": 0.2508111596107483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.8979949679416706, "step_time": 4.212150817871094} +{"epoch": 0, "iter": 6018, "iter_tflops": 27.581229078015618, "iter_time": 0.7480121154785155, "loss": 0.3113694190979004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.004327722524316, "step_time": 0.6446344909667969} +{"epoch": 0, "iter": 6019, "iter_tflops": 34.32172652118344, "iter_time": 0.6011088485717773, "loss": 0.3096567690372467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.04465381135348, "step_time": 0.5152021942138671} +{"epoch": 0, "iter": 6020, "iter_tflops": 39.32228426324223, "iter_time": 0.5246667098999024, "loss": 0.2687595784664154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81959346180668, "step_time": 0.49333558273315425} +{"epoch": 0, "iter": 6021, "iter_tflops": 5.920072644347843, "iter_time": 2.5664883422851563, "loss": 0.27343088388442993, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 6.067397334498952, "step_time": 2.504170501708985} +{"epoch": 0, "iter": 6022, "iter_tflops": 16.17562643269005, "iter_time": 0.9393019485473634, "loss": 0.3615652918815613, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 18.40936768679388, "step_time": 0.8253296737670899} +{"epoch": 0, "iter": 6023, "iter_tflops": 21.559855034338714, "iter_time": 0.7047263259887695, "loss": 0.5182982087135315, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.455669249376545, "step_time": 0.5968728332519532} +{"epoch": 0, "iter": 6024, "iter_tflops": 19.721576633879373, "iter_time": 0.7704149475097657, "loss": 0.389919251203537, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.685689413233906, "step_time": 0.6697525100708007} +{"epoch": 0, "iter": 6025, "iter_tflops": 10.041011890732912, "iter_time": 2.054682708740235, "loss": 0.07817425578832626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.26000268626923, "step_time": 2.0108273010253903} +{"epoch": 0, "iter": 6026, "iter_tflops": 29.292639209746525, "iter_time": 0.7043098220825195, "loss": 0.10986730456352234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.12670840908527, "step_time": 0.6045439033508302} +{"epoch": 0, "iter": 6027, "iter_tflops": 47.13601684500539, "iter_time": 0.4376927642822266, "loss": 0.044980477541685104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53551906277706, "step_time": 0.3853720645904541} +{"epoch": 0, "iter": 6028, "iter_tflops": 49.25704710696669, "iter_time": 0.41884552001953124, "loss": 0.09339670836925507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38155806711698, "step_time": 0.3864835395812989} +{"epoch": 0, "iter": 6029, "iter_tflops": 11.130307568417702, "iter_time": 1.8535959930419919, "loss": 0.5029619336128235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.383107446892796, "step_time": 1.8124307098388674} +{"epoch": 0, "iter": 6030, "iter_tflops": 26.534043697765107, "iter_time": 0.7775329589843749, "loss": 0.7577030658721924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.25194393466304, "step_time": 0.6601539268493652} +{"epoch": 0, "iter": 6031, "iter_tflops": 26.70739010227091, "iter_time": 0.7724863204956055, "loss": 0.46662089228630066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.772046021509734, "step_time": 0.6704491958618164} +{"epoch": 0, "iter": 6032, "iter_tflops": 33.86311033076689, "iter_time": 0.6092498092651368, "loss": 0.5932550430297852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.373881143801725, "step_time": 0.5376337471008301} +{"epoch": 0, "iter": 6033, "iter_tflops": 12.005145290499811, "iter_time": 1.7185209350585937, "loss": 0.9146751761436462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.347037959889308, "step_time": 1.6709346466064454} +{"epoch": 0, "iter": 6034, "iter_tflops": 20.638391235963507, "iter_time": 0.9996464004516602, "loss": 0.7971264719963074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.17641274062134, "step_time": 0.8194612045288087} +{"epoch": 0, "iter": 6035, "iter_tflops": 21.960931704459455, "iter_time": 0.9394452743530274, "loss": 0.6088705658912659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.437631847241047, "step_time": 0.8442345657348632} +{"epoch": 0, "iter": 6036, "iter_tflops": 25.061938359716727, "iter_time": 0.8232042236328124, "loss": 0.9063757061958313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.332966415284574, "step_time": 0.6801541671752929} +{"epoch": 0, "iter": 6037, "iter_tflops": 22.565373793313352, "iter_time": 0.9142810440063476, "loss": 0.6824366450309753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.779695537560105, "step_time": 0.8002846069335937} +{"epoch": 0, "iter": 6038, "iter_tflops": 38.01302466509166, "iter_time": 0.5427374877929688, "loss": 0.8299887776374817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62620159835843, "step_time": 0.49562757873535157} +{"epoch": 0, "iter": 6039, "iter_tflops": 37.76309323590127, "iter_time": 0.5463295440673828, "loss": 0.8425344228744507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.149612665655255, "step_time": 0.5013678665161133} +{"epoch": 0, "iter": 6040, "iter_tflops": 22.52488613177579, "iter_time": 0.915924430847168, "loss": 0.8529111742973328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.996443702421203, "step_time": 0.7642152328491211} +{"epoch": 0, "iter": 6041, "iter_tflops": 4.478015704594949, "iter_time": 4.607195434570312, "loss": 0.4868590533733368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.593087798599421, "step_time": 4.491769897460938} +{"epoch": 0, "iter": 6042, "iter_tflops": 21.39674881999262, "iter_time": 0.9642162780761719, "loss": 0.516730546951294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.237715368587438, "step_time": 0.7863144035339357} +{"epoch": 0, "iter": 6043, "iter_tflops": 34.909886379397996, "iter_time": 0.5909813995361328, "loss": 0.6762954592704773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.524410047303036, "step_time": 0.5355330162048341} +{"epoch": 0, "iter": 6044, "iter_tflops": 24.325566588599994, "iter_time": 0.8481238632202148, "loss": 0.5020751953125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.19502712153969, "step_time": 0.7066646461486816} +{"epoch": 0, "iter": 6045, "iter_tflops": 11.304275800839262, "iter_time": 1.3657808685302733, "loss": 0.3500838875770569, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.976869385323266, "step_time": 1.1897448577880863} +{"epoch": 0, "iter": 6046, "iter_tflops": 15.024341542225033, "iter_time": 1.0276100006103517, "loss": 0.36467641592025757, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 17.330007868316006, "step_time": 0.8908918991088868} +{"epoch": 0, "iter": 6047, "iter_tflops": 20.16960087277454, "iter_time": 0.7654669876098633, "loss": 0.40298327803611755, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 22.916745832201087, "step_time": 0.673706630706787} +{"epoch": 0, "iter": 6048, "iter_tflops": 17.842021442629306, "iter_time": 0.8653259201049806, "loss": 0.4649682343006134, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.789033244464417, "step_time": 0.7801878662109375} +{"epoch": 0, "iter": 6049, "iter_tflops": 22.428033985846284, "iter_time": 0.9198797149658202, "loss": 0.11479349434375763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.97767878947385, "step_time": 0.8259812164306641} +{"epoch": 0, "iter": 6050, "iter_tflops": 22.093898120356684, "iter_time": 0.9337914657592773, "loss": 0.12364012748003006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.246506197738967, "step_time": 0.786051040649414} +{"epoch": 0, "iter": 6051, "iter_tflops": 28.079848843762687, "iter_time": 0.7347295074462891, "loss": 0.16359402239322662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.02615284748222, "step_time": 0.6441951866149902} +{"epoch": 0, "iter": 6052, "iter_tflops": 30.123184100633107, "iter_time": 0.6848908615112305, "loss": 0.12102297693490982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.147866197463046, "step_time": 0.6223958244323732} +{"epoch": 0, "iter": 6053, "iter_tflops": 18.462847628817133, "iter_time": 1.1174383239746093, "loss": 0.6742198467254639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.38313129962421, "step_time": 1.0121650695800781} +{"epoch": 0, "iter": 6054, "iter_tflops": 23.0550917734182, "iter_time": 0.894860610961914, "loss": 0.5820837616920471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.33880334567323, "step_time": 0.7832965393066406} +{"epoch": 0, "iter": 6055, "iter_tflops": 25.91492440374562, "iter_time": 0.7961085739135743, "loss": 0.8388804793357849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.056775111867477, "step_time": 0.6864040946960449} +{"epoch": 0, "iter": 6056, "iter_tflops": 33.111420107120864, "iter_time": 0.6230809020996093, "loss": 0.6087936162948608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.39132142627219, "step_time": 0.5669234504699707} +{"epoch": 0, "iter": 6057, "iter_tflops": 12.214117503166408, "iter_time": 1.689118637084961, "loss": 0.7113794088363647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.099215115134356, "step_time": 1.5749869995117185} +{"epoch": 0, "iter": 6058, "iter_tflops": 23.315804756162255, "iter_time": 0.8848544464111328, "loss": 0.6130163669586182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.142507130455673, "step_time": 0.7330936584472656} +{"epoch": 0, "iter": 6059, "iter_tflops": 36.41691277472882, "iter_time": 0.5665250549316405, "loss": 0.697395920753479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62431334371695, "step_time": 0.5206675338745117} +{"epoch": 0, "iter": 6060, "iter_tflops": 37.62199493949302, "iter_time": 0.5483785095214844, "loss": 0.7097516655921936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.985346750228864, "step_time": 0.5033773078918456} +{"epoch": 0, "iter": 6061, "iter_tflops": 23.312426875985224, "iter_time": 0.8849826583862305, "loss": 0.6955771446228027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.255603897445045, "step_time": 0.8168917121887206} +{"epoch": 0, "iter": 6062, "iter_tflops": 45.269880435795386, "iter_time": 0.4557355422973633, "loss": 0.6694575548171997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33678262278396, "step_time": 0.4181686038970947} +{"epoch": 0, "iter": 6063, "iter_tflops": 46.82560125330087, "iter_time": 0.44059431076049804, "loss": 0.67368483543396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.65630797021043, "step_time": 0.4072759017944336} +{"epoch": 0, "iter": 6064, "iter_tflops": 48.08155049565135, "iter_time": 0.4290854454040527, "loss": 0.7158787846565247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.07699377353131, "step_time": 0.39616521644592284} +{"epoch": 0, "iter": 6065, "iter_tflops": 30.018631868369404, "iter_time": 0.6872762756347656, "loss": 0.6532849669456482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.868783559501058, "step_time": 0.6473762474060059} +{"epoch": 0, "iter": 6066, "iter_tflops": 10.39288794520648, "iter_time": 1.9851165161132813, "loss": 0.8348523378372192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.757518867358563, "step_time": 1.7547148971557618} +{"epoch": 0, "iter": 6067, "iter_tflops": 15.551771434682642, "iter_time": 1.3266072998046874, "loss": 0.8107741475105286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.294629204623707, "step_time": 1.1277131271362304} +{"epoch": 0, "iter": 6068, "iter_tflops": 15.26588465228893, "iter_time": 1.3514508972167971, "loss": 0.8673800230026245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.150592211041488, "step_time": 1.1366622791290284} +{"epoch": 0, "iter": 6069, "iter_tflops": 24.56125231900551, "iter_time": 0.73391064453125, "loss": 0.41595813632011414, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 26.76838467485219, "step_time": 0.6733975448608398} +{"epoch": 0, "iter": 6070, "iter_tflops": 29.901920493660953, "iter_time": 0.6028296585083008, "loss": 0.41960811614990234, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 31.849575852100813, "step_time": 0.5659656066894532} +{"epoch": 0, "iter": 6071, "iter_tflops": 32.94077874297448, "iter_time": 0.5472173156738281, "loss": 0.31485715508461, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 35.057343154037795, "step_time": 0.5141794242858887} +{"epoch": 0, "iter": 6072, "iter_tflops": 33.69088651545826, "iter_time": 0.5350338439941407, "loss": 0.45247069001197815, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 35.7031592875285, "step_time": 0.5048786964416504} +{"epoch": 0, "iter": 6073, "iter_tflops": 29.553512829002965, "iter_time": 0.6980927658081055, "loss": 0.002361483173444867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.476202297804804, "step_time": 0.6554505310058594} +{"epoch": 0, "iter": 6074, "iter_tflops": 16.97167032457667, "iter_time": 1.2156195068359374, "loss": 0.0038416313473135233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.771480511105743, "step_time": 0.9060058040618897} +{"epoch": 0, "iter": 6075, "iter_tflops": 57.83466438778373, "iter_time": 0.3567253952026367, "loss": 0.001755784614942968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.36519487814074, "step_time": 0.325590311050415} +{"epoch": 0, "iter": 6076, "iter_tflops": 56.33354980191129, "iter_time": 0.3662310218811035, "loss": 0.00655024079605937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.995291547338795, "step_time": 0.33278484535217284} +{"epoch": 0, "iter": 6077, "iter_tflops": 40.35932178440799, "iter_time": 0.5111853370666505, "loss": 0.9679447412490845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87939551341816, "step_time": 0.4701772499084472} +{"epoch": 0, "iter": 6078, "iter_tflops": 44.64709690268781, "iter_time": 0.46209260940551755, "loss": 0.9262890219688416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26371740473887, "step_time": 0.42746590232849124} +{"epoch": 0, "iter": 6079, "iter_tflops": 45.556956555771315, "iter_time": 0.45286373519897455, "loss": 1.005391240119934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.28859152830224, "step_time": 0.41857746124267575} +{"epoch": 0, "iter": 6080, "iter_tflops": 44.71016884789161, "iter_time": 0.4614407424926758, "loss": 0.9889989495277405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.04288543386012, "step_time": 0.4294307746887207} +{"epoch": 0, "iter": 6081, "iter_tflops": 40.15852253231072, "iter_time": 0.5137413482666016, "loss": 0.29986411333084106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74084266950898, "step_time": 0.471666576385498} +{"epoch": 0, "iter": 6082, "iter_tflops": 35.32298147553666, "iter_time": 0.5840699920654295, "loss": 0.2733164131641388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.86381532173198, "step_time": 0.5308561019897461} +{"epoch": 0, "iter": 6083, "iter_tflops": 40.5595918564838, "iter_time": 0.5086612701416016, "loss": 0.38320687413215637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04613373725256, "step_time": 0.4683973770141602} +{"epoch": 0, "iter": 6084, "iter_tflops": 41.85115245973213, "iter_time": 0.49296356964111326, "loss": 0.3253799378871918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59741984269848, "step_time": 0.45246186256408694} +{"epoch": 0, "iter": 6085, "iter_tflops": 18.684248991103516, "iter_time": 1.1041970977783202, "loss": 0.46582871675491333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.16623090124537, "step_time": 1.0230515365600585} +{"epoch": 0, "iter": 6086, "iter_tflops": 24.131693205315507, "iter_time": 0.8549376678466798, "loss": 0.4148207902908325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.74973322586564, "step_time": 0.5613943748474121} +{"epoch": 0, "iter": 6087, "iter_tflops": 46.127075271397224, "iter_time": 0.44726645660400394, "loss": 0.5124208927154541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88614489645122, "step_time": 0.41356359672546383} +{"epoch": 0, "iter": 6088, "iter_tflops": 44.0102406106752, "iter_time": 0.46877938461303703, "loss": 0.4391621947288513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32109298085578, "step_time": 0.43598091697692876} +{"epoch": 0, "iter": 6089, "iter_tflops": 42.26460978197462, "iter_time": 0.4881411094665527, "loss": 0.210154190659523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07885031974903, "step_time": 0.4477345542907716} +{"epoch": 0, "iter": 6090, "iter_tflops": 17.60634214431905, "iter_time": 1.1717989654541014, "loss": 0.31822043657302856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.908919898111062, "step_time": 0.7962930755615234} +{"epoch": 0, "iter": 6091, "iter_tflops": 38.66610410445771, "iter_time": 0.533570526123047, "loss": 0.3140307366847992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.520973211190444, "step_time": 0.485198055267334} +{"epoch": 0, "iter": 6092, "iter_tflops": 41.6073110181564, "iter_time": 0.49585260391235353, "loss": 0.3086665868759155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68378271827146, "step_time": 0.45160650634765626} +{"epoch": 0, "iter": 6093, "iter_tflops": 22.22194500936686, "iter_time": 0.9284107894897461, "loss": 0.36330151557922363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.825677891170578, "step_time": 0.8659184265136719} +{"epoch": 0, "iter": 6094, "iter_tflops": 21.042790131407823, "iter_time": 0.9804352645874024, "loss": 0.38557666540145874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.3047379925967, "step_time": 0.8153055572509765} +{"epoch": 0, "iter": 6095, "iter_tflops": 40.07908586988027, "iter_time": 0.5147595825195312, "loss": 0.3841168284416199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.866531396131776, "step_time": 0.47031513214111326} +{"epoch": 0, "iter": 6096, "iter_tflops": 42.934659253118035, "iter_time": 0.4805230522155762, "loss": 0.42753395438194275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.823624917011045, "step_time": 0.44061290740966796} +{"epoch": 0, "iter": 6097, "iter_tflops": 15.710081605143321, "iter_time": 1.3132391052246093, "loss": 0.4428136944770813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.75548434992722, "step_time": 1.2313039169311524} +{"epoch": 0, "iter": 6098, "iter_tflops": 23.24687509334715, "iter_time": 0.887478141784668, "loss": 0.3072179853916168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.821240556712947, "step_time": 0.648343469619751} +{"epoch": 0, "iter": 6099, "iter_tflops": 49.01255668362577, "iter_time": 0.4209348564147949, "loss": 0.4158369302749634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.326955940963416, "step_time": 0.38687926483154295} +{"epoch": 0, "iter": 6100, "iter_tflops": 50.691210930933046, "iter_time": 0.406995475769043, "loss": 0.3815925717353821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.84993499932085, "step_time": 0.3761370639801025} +{"epoch": 0, "iter": 6101, "iter_tflops": 21.183458427043906, "iter_time": 0.9739247055053711, "loss": 0.9824472069740295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.037819165331175, "step_time": 0.9361676559448242} +{"epoch": 0, "iter": 6102, "iter_tflops": 44.26805995965729, "iter_time": 0.4660491905212403, "loss": 0.9252679944038391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08540862528694, "step_time": 0.42905101776123045} +{"epoch": 0, "iter": 6103, "iter_tflops": 45.04426123624658, "iter_time": 0.45801824569702154, "loss": 0.9205819964408875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49136031029556, "step_time": 0.4254591617584229} +{"epoch": 0, "iter": 6104, "iter_tflops": 44.82006559723817, "iter_time": 0.4603093109130859, "loss": 0.6298949122428894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19431306725916, "step_time": 0.4280814933776856} +{"epoch": 0, "iter": 6105, "iter_tflops": 37.537919730058164, "iter_time": 0.5496067352294922, "loss": 0.8646125793457031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.576987536724815, "step_time": 0.5084432029724121} +{"epoch": 0, "iter": 6106, "iter_tflops": 36.40038946348632, "iter_time": 0.5667822189331054, "loss": 0.8248304128646851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.714959792381606, "step_time": 0.5194791488647461} +{"epoch": 0, "iter": 6107, "iter_tflops": 39.3765739029742, "iter_time": 0.5239433364868165, "loss": 1.0121142864227295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.894539554264334, "step_time": 0.48097249031066897} +{"epoch": 0, "iter": 6108, "iter_tflops": 36.857997679494424, "iter_time": 0.5597453689575196, "loss": 0.7841967940330505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.79453112132474, "step_time": 0.5184404220581055} +{"epoch": 0, "iter": 6109, "iter_tflops": 18.406705652387977, "iter_time": 1.1208466033935547, "loss": 0.8374412059783936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.969827749160476, "step_time": 1.0331132431030274} +{"epoch": 0, "iter": 6110, "iter_tflops": 17.769377039188043, "iter_time": 1.1610476531982423, "loss": 0.7781743407249451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.134054814149113, "step_time": 0.9762013816833496} +{"epoch": 0, "iter": 6111, "iter_tflops": 35.54639167539736, "iter_time": 0.5803990936279296, "loss": 0.85959792137146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.917772612871374, "step_time": 0.5301201000213623} +{"epoch": 0, "iter": 6112, "iter_tflops": 37.02454036075553, "iter_time": 0.5572275390625, "loss": 0.60112065076828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.284848239824086, "step_time": 0.5121303520202637} +{"epoch": 0, "iter": 6113, "iter_tflops": 21.047499087099585, "iter_time": 0.9802159118652344, "loss": 0.41448765993118286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.375599164280658, "step_time": 0.9220353546142579} +{"epoch": 0, "iter": 6114, "iter_tflops": 8.837907915167166, "iter_time": 2.3343865661621095, "loss": 0.3792015016078949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.005414182307025, "step_time": 2.061992950439453} +{"epoch": 0, "iter": 6115, "iter_tflops": 13.69778568209706, "iter_time": 1.5061626739501954, "loss": 0.3961382210254669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.446564297011047, "step_time": 1.1825304489135742} +{"epoch": 0, "iter": 6116, "iter_tflops": 40.06639264425378, "iter_time": 0.5149226608276367, "loss": 0.5015131831169128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.972942706883806, "step_time": 0.4691770038604736} +{"epoch": 0, "iter": 6117, "iter_tflops": 14.566284091272678, "iter_time": 1.0908370819091797, "loss": 0.5652139186859131, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 15.520150471490563, "step_time": 1.0237943801879883} +{"epoch": 0, "iter": 6118, "iter_tflops": 11.202146642559871, "iter_time": 1.4184283905029296, "loss": 0.4503100514411926, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.013996157031832, "step_time": 1.133826686859131} +{"epoch": 0, "iter": 6119, "iter_tflops": 28.258180133280305, "iter_time": 0.5622953338623047, "loss": 0.42095014452934265, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.0761471457165, "step_time": 0.5283071250915528} +{"epoch": 0, "iter": 6120, "iter_tflops": 29.113820909877038, "iter_time": 0.5457697525024414, "loss": 0.34590405225753784, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.951399277764416, "step_time": 0.5133675117492675} +{"epoch": 0, "iter": 6121, "iter_tflops": 21.9953264834568, "iter_time": 0.9379762344360352, "loss": 0.7138915061950684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.96235095609818, "step_time": 0.8984747924804688} +{"epoch": 0, "iter": 6122, "iter_tflops": 11.908323551942976, "iter_time": 1.7324935302734377, "loss": 0.8118674755096436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.827211134295293, "step_time": 1.3035204582214357} +{"epoch": 0, "iter": 6123, "iter_tflops": 46.509688626991334, "iter_time": 0.4435870056152344, "loss": 0.9282997250556946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.4180481314592, "step_time": 0.40920055961608887} +{"epoch": 0, "iter": 6124, "iter_tflops": 44.2198912171375, "iter_time": 0.46655685806274416, "loss": 0.8637209534645081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89069032759345, "step_time": 0.43079549217224117} +{"epoch": 0, "iter": 6125, "iter_tflops": 26.20740517877553, "iter_time": 0.7872238159179688, "loss": 0.793422520160675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.618946884575085, "step_time": 0.746990592956543} +{"epoch": 0, "iter": 6126, "iter_tflops": 14.463027581843875, "iter_time": 1.4264712829589843, "loss": 0.5626058578491211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.400525493079087, "step_time": 1.12122306060791} +{"epoch": 0, "iter": 6127, "iter_tflops": 35.2928741664208, "iter_time": 0.584568244934082, "loss": 0.6481854915618896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.35205784741647, "step_time": 0.5379396743774414} +{"epoch": 0, "iter": 6128, "iter_tflops": 39.46681259230931, "iter_time": 0.5227453689575196, "loss": 0.8557629585266113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.254896010116695, "step_time": 0.47696550941467286} +{"epoch": 0, "iter": 6129, "iter_tflops": 22.17334023499488, "iter_time": 0.9304458999633789, "loss": 0.1921229064464569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.64864031150284, "step_time": 0.8724008331298827} +{"epoch": 0, "iter": 6130, "iter_tflops": 9.012211258102836, "iter_time": 2.289237670898437, "loss": 0.1879291534423828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.225330826520745, "step_time": 2.017645576477051} +{"epoch": 0, "iter": 6131, "iter_tflops": 16.05054771453195, "iter_time": 1.2853825225830078, "loss": 0.2089037001132965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.66864027157973, "step_time": 1.0489333896636963} +{"epoch": 0, "iter": 6132, "iter_tflops": 39.91306441863023, "iter_time": 0.5169007644653321, "loss": 0.17694982886314392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.525702715844, "step_time": 0.4739979419708251} +{"epoch": 0, "iter": 6133, "iter_tflops": 14.157547440633042, "iter_time": 1.1020860443115237, "loss": 0.38546180725097656, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.20274796504735, "step_time": 1.0263167877197266} +{"epoch": 0, "iter": 6134, "iter_tflops": 13.792279326069353, "iter_time": 1.131273162841797, "loss": 0.35471776127815247, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.64989188931851, "step_time": 0.9371133193969727} +{"epoch": 0, "iter": 6135, "iter_tflops": 27.514471253014037, "iter_time": 0.5670774230957032, "loss": 0.5277761220932007, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.315162114090974, "step_time": 0.532244556427002} +{"epoch": 0, "iter": 6136, "iter_tflops": 28.302442576956587, "iter_time": 0.5512893600463867, "loss": 0.5192927122116089, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.082023827179754, "step_time": 0.5186763877868652} +{"epoch": 0, "iter": 6137, "iter_tflops": 31.962290499410855, "iter_time": 0.6454823226928712, "loss": 0.6372528076171875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.31533697319264, "step_time": 0.601220775604248} +{"epoch": 0, "iter": 6138, "iter_tflops": 41.44381770062572, "iter_time": 0.4978087120056152, "loss": 0.7463065981864929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65777685108486, "step_time": 0.461982099533081} +{"epoch": 0, "iter": 6139, "iter_tflops": 47.58024302297262, "iter_time": 0.43360630798339844, "loss": 0.6093536019325256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.229865936648466, "step_time": 0.4027161331176758} +{"epoch": 0, "iter": 6140, "iter_tflops": 50.44964692285093, "iter_time": 0.40894425964355463, "loss": 0.6149965524673462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.32152930843085, "step_time": 0.37979588890075683} +{"epoch": 0, "iter": 6141, "iter_tflops": 44.38308754596159, "iter_time": 0.46484133148193363, "loss": 0.020329540595412254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75320237592462, "step_time": 0.42317411994934073} +{"epoch": 0, "iter": 6142, "iter_tflops": 47.823109173010124, "iter_time": 0.43140427017211913, "loss": 0.02379613369703293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.08749830932093, "step_time": 0.3960853214263916} +{"epoch": 0, "iter": 6143, "iter_tflops": 53.862796294703976, "iter_time": 0.38303049468994144, "loss": 0.009530340321362019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.170602079900654, "step_time": 0.3486713466644287} +{"epoch": 0, "iter": 6144, "iter_tflops": 61.1515402745386, "iter_time": 0.3373765144348145, "loss": 0.0064665102399885654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.08210532973249, "step_time": 0.30754988098144537} +{"epoch": 0, "iter": 6145, "iter_tflops": 27.39822297855428, "iter_time": 0.7530084533691407, "loss": 0.2509654760360718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.845542088804105, "step_time": 0.7152264099121094} +{"epoch": 0, "iter": 6146, "iter_tflops": 15.696754642403205, "iter_time": 1.3143540802001952, "loss": 0.18323810398578644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.799113430147976, "step_time": 1.1591079292297364} +{"epoch": 0, "iter": 6147, "iter_tflops": 47.161824648474635, "iter_time": 0.4374532508850097, "loss": 0.1781717985868454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.272076796733195, "step_time": 0.40238458824157713} +{"epoch": 0, "iter": 6148, "iter_tflops": 52.90762817510602, "iter_time": 0.3899455375671387, "loss": 0.21619389951229095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.179115842236975, "step_time": 0.36081518936157225} +{"epoch": 0, "iter": 6149, "iter_tflops": 35.67334579067739, "iter_time": 0.5783335723876953, "loss": 0.05968334525823593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44462882764668, "step_time": 0.5366443672180176} +{"epoch": 0, "iter": 6150, "iter_tflops": 24.85179210021121, "iter_time": 0.8301652221679687, "loss": 0.03437867388129234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.015835332645395, "step_time": 0.5891932411193848} +{"epoch": 0, "iter": 6151, "iter_tflops": 56.99468753977963, "iter_time": 0.3619827461242676, "loss": 0.02445167303085327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.361986607421464, "step_time": 0.33082803535461425} +{"epoch": 0, "iter": 6152, "iter_tflops": 56.79305793834345, "iter_time": 0.3632678756713867, "loss": 0.06579459458589554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.01138410973668, "step_time": 0.3326984844207764} +{"epoch": 0, "iter": 6153, "iter_tflops": 31.721880382912662, "iter_time": 0.6503742294311523, "loss": 0.5910250544548035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.73773157326442, "step_time": 0.6115139503479003} +{"epoch": 0, "iter": 6154, "iter_tflops": 12.102275233687958, "iter_time": 1.704728500366211, "loss": 0.6286860108375549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.872069008372968, "step_time": 1.2998364295959475} +{"epoch": 0, "iter": 6155, "iter_tflops": 14.625525332273874, "iter_time": 1.41062239074707, "loss": 0.49066489934921265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.197293767391532, "step_time": 1.1337451477050782} +{"epoch": 0, "iter": 6156, "iter_tflops": 15.681515606098593, "iter_time": 1.31563134765625, "loss": 0.5385321378707886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.685022125762433, "step_time": 1.10415140914917} +{"epoch": 0, "iter": 6157, "iter_tflops": 21.015737970121677, "iter_time": 0.7638714523315431, "loss": 0.36750346422195435, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 22.2443571709394, "step_time": 0.7216806564331054} +{"epoch": 0, "iter": 6158, "iter_tflops": 12.25295262413491, "iter_time": 1.3101595001220703, "loss": 0.5053577423095703, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.506369172472292, "step_time": 1.1066395797729491} +{"epoch": 0, "iter": 6159, "iter_tflops": 27.24629204698132, "iter_time": 0.5891929168701172, "loss": 0.3027479946613312, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.005728551267264, "step_time": 0.5534535102844238} +{"epoch": 0, "iter": 6160, "iter_tflops": 25.32212278582852, "iter_time": 0.6339643173217774, "loss": 0.36530715227127075, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.994340838114763, "step_time": 0.594692138671875} +{"epoch": 0, "iter": 6161, "iter_tflops": 21.66602507183737, "iter_time": 0.9522325134277344, "loss": 0.4391695559024811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.660432848211173, "step_time": 0.9104456939697265} +{"epoch": 0, "iter": 6162, "iter_tflops": 21.74965294538687, "iter_time": 0.948571159362793, "loss": 0.6867257952690125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.29295027690504, "step_time": 0.8492625751495361} +{"epoch": 0, "iter": 6163, "iter_tflops": 48.22387118736067, "iter_time": 0.42781910705566406, "loss": 0.5559424757957458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.803124905537494, "step_time": 0.3907172832489013} +{"epoch": 0, "iter": 6164, "iter_tflops": 52.56531814495235, "iter_time": 0.39248489761352534, "loss": 0.5558111667633057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.96254347606837, "step_time": 0.3621870136260986} +{"epoch": 0, "iter": 6165, "iter_tflops": 47.595766555049856, "iter_time": 0.43346488571166997, "loss": 0.14977125823497772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.436483101072874, "step_time": 0.3934492225646973} +{"epoch": 0, "iter": 6166, "iter_tflops": 45.125324789599176, "iter_time": 0.4571954574584961, "loss": 0.14398832619190216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.47720426876847, "step_time": 0.4169817962646485} +{"epoch": 0, "iter": 6167, "iter_tflops": 50.32699110989946, "iter_time": 0.40994092941284177, "loss": 0.1347339004278183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.864726184914105, "step_time": 0.37603565979003906} +{"epoch": 0, "iter": 6168, "iter_tflops": 41.48011522637228, "iter_time": 0.4973731002807617, "loss": 0.15754689276218414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.56819165856968, "step_time": 0.4527520790100098} +{"epoch": 0, "iter": 6169, "iter_tflops": 32.17348639782199, "iter_time": 0.6412451934814453, "loss": 0.30316880345344543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.43286645999749, "step_time": 0.5991686325073242} +{"epoch": 0, "iter": 6170, "iter_tflops": 20.11641340339858, "iter_time": 1.0255850830078126, "loss": 0.22171907126903534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.90552430393474, "step_time": 0.7667976760864258} +{"epoch": 0, "iter": 6171, "iter_tflops": 48.61058532539373, "iter_time": 0.424415657043457, "loss": 0.27962496876716614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63000742565234, "step_time": 0.39200248146057126} +{"epoch": 0, "iter": 6172, "iter_tflops": 51.16200297972034, "iter_time": 0.4032503089904785, "loss": 0.30354347825050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.87490282579148, "step_time": 0.3692372150421143} +{"epoch": 0, "iter": 6173, "iter_tflops": 27.00178490457736, "iter_time": 0.764064064025879, "loss": 0.8200792670249939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.445571247767766, "step_time": 0.7252831497192382} +{"epoch": 0, "iter": 6174, "iter_tflops": 13.945830561839154, "iter_time": 1.4793735961914063, "loss": 0.658417820930481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.20076862671808, "step_time": 1.1994285812377932} +{"epoch": 0, "iter": 6175, "iter_tflops": 12.882999796264244, "iter_time": 1.6014199981689454, "loss": 0.805223286151886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.006223283087097, "step_time": 1.374835834503174} +{"epoch": 0, "iter": 6176, "iter_tflops": 25.226782025170202, "iter_time": 0.8178250198364257, "loss": 0.8490720987319946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.133310019202053, "step_time": 0.6626694526672363} +{"epoch": 0, "iter": 6177, "iter_tflops": 13.96968498064158, "iter_time": 1.1990715789794923, "loss": 0.37967991828918457, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 14.871405943744378, "step_time": 1.1263664169311525} +{"epoch": 0, "iter": 6178, "iter_tflops": 11.237161478648664, "iter_time": 1.4906479949951172, "loss": 0.34205490350723267, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 14.083384155068774, "step_time": 1.1893911323547361} +{"epoch": 0, "iter": 6179, "iter_tflops": 23.140280801172626, "iter_time": 0.7238741989135743, "loss": 0.4258723258972168, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 24.999076870866894, "step_time": 0.6700508308410644} +{"epoch": 0, "iter": 6180, "iter_tflops": 25.488835262622217, "iter_time": 0.6571760559082032, "loss": 0.3319721817970276, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 27.382318680750803, "step_time": 0.6117324256896972} +{"epoch": 0, "iter": 6181, "iter_tflops": 2.007864921840759, "iter_time": 0.6827850799560549, "loss": 0.694749116897583, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.2336264043434535, "step_time": 0.6137732830047606} +{"epoch": 0, "iter": 6182, "iter_tflops": 2.5027013594698824, "iter_time": 0.5477841796874999, "loss": 0.8404660820960999, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.796032007866813, "step_time": 0.49031635093688963} +{"epoch": 0, "iter": 6183, "iter_tflops": 2.7927831959481657, "iter_time": 0.49088673019409174, "loss": 0.7387509942054749, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.051718810158929, "step_time": 0.44923542976379394} +{"epoch": 0, "iter": 6184, "iter_tflops": 2.7714715778624193, "iter_time": 0.4946614723205566, "loss": 0.7610824108123779, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.0273401538230664, "step_time": 0.45285304641723634} +{"epoch": 0, "iter": 6185, "iter_tflops": 28.648627783764297, "iter_time": 0.7129010620117188, "loss": 0.21553824841976166, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 31.558509227629724, "step_time": 0.6471673622131348} +{"epoch": 0, "iter": 6186, "iter_tflops": 8.99117397550453, "iter_time": 2.2715206298828123, "loss": 0.21297000348567963, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 10.365188613981102, "step_time": 1.970406707763672} +{"epoch": 0, "iter": 6187, "iter_tflops": 10.147942647888392, "iter_time": 2.0125889434814455, "loss": 0.17210742831230164, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 11.394592830963859, "step_time": 1.7923972778320314} +{"epoch": 0, "iter": 6188, "iter_tflops": 30.83583350796334, "iter_time": 0.6623345260620118, "loss": 0.2452203780412674, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 40.59910275767263, "step_time": 0.5030563678741455} +{"epoch": 0, "iter": 6189, "iter_tflops": 20.734611624116134, "iter_time": 0.7209491729736328, "loss": 0.3476497232913971, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.307911163970324, "step_time": 0.6701031303405761} +{"epoch": 0, "iter": 6190, "iter_tflops": 5.546339127143848, "iter_time": 2.695219451904297, "loss": 0.3034222722053528, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 7.143553146661417, "step_time": 2.0926002502441405} +{"epoch": 0, "iter": 6191, "iter_tflops": 9.8380359532341, "iter_time": 1.5194700622558595, "loss": 0.5017874240875244, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 11.48005893287005, "step_time": 1.3021362686157225} +{"epoch": 0, "iter": 6192, "iter_tflops": 17.063401800506693, "iter_time": 0.8760621871948244, "loss": 0.28903594613075256, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.45087002767854, "step_time": 0.6374433479309082} +{"epoch": 0, "iter": 6193, "iter_tflops": 14.020764891872775, "iter_time": 1.2181417694091796, "loss": 0.295626163482666, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 14.971023506349937, "step_time": 1.140822425842285} +{"epoch": 0, "iter": 6194, "iter_tflops": 10.813073747927048, "iter_time": 1.579502716064453, "loss": 0.3938775062561035, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 13.734000983481826, "step_time": 1.243576389312744} +{"epoch": 0, "iter": 6195, "iter_tflops": 30.890050915643293, "iter_time": 0.5529055099487306, "loss": 0.2912858724594116, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 32.93810821503576, "step_time": 0.5185264205932618} +{"epoch": 0, "iter": 6196, "iter_tflops": 31.599607220507668, "iter_time": 0.5404902420043947, "loss": 0.42603155970573425, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 33.466451042327805, "step_time": 0.5103403205871582} +{"epoch": 0, "iter": 6197, "iter_tflops": 32.00688506006159, "iter_time": 0.6445829849243164, "loss": 0.19607612490653992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.21068411054791, "step_time": 0.6030599517822266} +{"epoch": 0, "iter": 6198, "iter_tflops": 19.431719991331516, "iter_time": 1.061722457885742, "loss": 0.22127166390419006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.777729791372867, "step_time": 0.8676645622253419} +{"epoch": 0, "iter": 6199, "iter_tflops": 38.64972321492451, "iter_time": 0.5337966690063477, "loss": 0.24118280410766602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.547788082842, "step_time": 0.4848922691345215} +{"epoch": 0, "iter": 6200, "iter_tflops": 36.50164332203553, "iter_time": 0.5652099914550781, "loss": 0.24879516661167145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85354018143063, "step_time": 0.517672794342041} +{"epoch": 0, "iter": 6201, "iter_tflops": 21.303534928720545, "iter_time": 0.9684352188110351, "loss": 0.665435791015625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.016769401443398, "step_time": 0.8963505325317382} +{"epoch": 0, "iter": 6202, "iter_tflops": 37.577227176562616, "iter_time": 0.5490318222045899, "loss": 0.8787128329277039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.127961828914664, "step_time": 0.501631799697876} +{"epoch": 0, "iter": 6203, "iter_tflops": 39.62600824339063, "iter_time": 0.520645263671875, "loss": 0.648928165435791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.271627933517024, "step_time": 0.47678108024597166} +{"epoch": 0, "iter": 6204, "iter_tflops": 38.64410548149324, "iter_time": 0.533874267578125, "loss": 0.741632342338562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18493944330716, "step_time": 0.4890630111694336} +{"epoch": 0, "iter": 6205, "iter_tflops": 31.249193621799037, "iter_time": 0.6602120285034179, "loss": 0.6600205302238464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.87376889806873, "step_time": 0.6090581054687499} +{"epoch": 0, "iter": 6206, "iter_tflops": 33.37453178195293, "iter_time": 0.6181687774658203, "loss": 0.7255566716194153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55959185104978, "step_time": 0.48475778579711915} +{"epoch": 0, "iter": 6207, "iter_tflops": 50.42316450731531, "iter_time": 0.4091590385437011, "loss": 0.6878973841667175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.872542178008224, "step_time": 0.3759820976257324} +{"epoch": 0, "iter": 6208, "iter_tflops": 49.28929272478315, "iter_time": 0.41857150650024416, "loss": 0.8444560766220093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.017833166495194, "step_time": 0.38913498115539547} +{"epoch": 0, "iter": 6209, "iter_tflops": 26.952804885379607, "iter_time": 0.7654525604248048, "loss": 0.07910695672035217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.34446635870278, "step_time": 0.7278702392578125} +{"epoch": 0, "iter": 6210, "iter_tflops": 14.785502605587395, "iter_time": 1.395359634399414, "loss": 0.22519013285636902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.68235609154494, "step_time": 1.1043089752197266} +{"epoch": 0, "iter": 6211, "iter_tflops": 40.252126197681235, "iter_time": 0.5125466766357422, "loss": 0.11066439747810364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.08852620496736, "step_time": 0.46794699859619143} +{"epoch": 0, "iter": 6212, "iter_tflops": 40.54478662780057, "iter_time": 0.5088470115661621, "loss": 0.10857497900724411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.61013121688711, "step_time": 0.4624755172729493} +{"epoch": 0, "iter": 6213, "iter_tflops": 18.536250813888984, "iter_time": 1.1130132904052736, "loss": 0.00757176149636507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.937151292198077, "step_time": 1.0348064880371093} +{"epoch": 0, "iter": 6214, "iter_tflops": 16.969353480356343, "iter_time": 1.2157854766845704, "loss": 0.024008745327591896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.595795490750618, "step_time": 0.9553291759490968} +{"epoch": 0, "iter": 6215, "iter_tflops": 44.77357047652345, "iter_time": 0.4607873191833496, "loss": 0.022643430158495903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49899579014778, "step_time": 0.41679822349548346} +{"epoch": 0, "iter": 6216, "iter_tflops": 52.00395077141037, "iter_time": 0.39672165679931637, "loss": 0.02308804728090763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.656978445803645, "step_time": 0.35782474327087405} +{"epoch": 0, "iter": 6217, "iter_tflops": 33.01020938977672, "iter_time": 0.6249912948608399, "loss": 0.6842687726020813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.55318414176391, "step_time": 0.5644130325317382} +{"epoch": 0, "iter": 6218, "iter_tflops": 34.5933749484644, "iter_time": 0.5963885726928712, "loss": 0.5385922789573669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.84190152514969, "step_time": 0.5451917762756348} +{"epoch": 0, "iter": 6219, "iter_tflops": 37.35108047215287, "iter_time": 0.5523560028076172, "loss": 0.524715006351471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.815722548911616, "step_time": 0.5054692707061768} +{"epoch": 0, "iter": 6220, "iter_tflops": 41.13348357878693, "iter_time": 0.5015644607543945, "loss": 0.521797776222229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77028218037313, "step_time": 0.4608211631774902} +{"epoch": 0, "iter": 6221, "iter_tflops": 37.29852548659231, "iter_time": 0.553134292602539, "loss": 0.4368027150630951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.284556943068274, "step_time": 0.499729076385498} +{"epoch": 0, "iter": 6222, "iter_tflops": 36.82989545474659, "iter_time": 0.5601724700927735, "loss": 0.47580647468566895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.614001231664396, "step_time": 0.5079798316955566} +{"epoch": 0, "iter": 6223, "iter_tflops": 41.20885181490789, "iter_time": 0.5006471328735351, "loss": 0.4280683994293213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.06769035364816, "step_time": 0.4577801380157471} +{"epoch": 0, "iter": 6224, "iter_tflops": 42.297237586426256, "iter_time": 0.48776456069946283, "loss": 0.4458518624305725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20183807448833, "step_time": 0.4465426998138427} +{"epoch": 0, "iter": 6225, "iter_tflops": 25.437196271329004, "iter_time": 0.811060043334961, "loss": 0.5250452756881714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.513033443757788, "step_time": 0.7498661880493164} +{"epoch": 0, "iter": 6226, "iter_tflops": 15.528016430810256, "iter_time": 1.3286367645263673, "loss": 0.36002951860427856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.98464196095153, "step_time": 1.1471506385803223} +{"epoch": 0, "iter": 6227, "iter_tflops": 38.12800464865894, "iter_time": 0.5411007919311523, "loss": 0.4701118469238281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76513826176051, "step_time": 0.4939788150787353} +{"epoch": 0, "iter": 6228, "iter_tflops": 38.6191676955452, "iter_time": 0.534219009399414, "loss": 0.5608624219894409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17100529128266, "step_time": 0.4892246074676514} +{"epoch": 0, "iter": 6229, "iter_tflops": 20.255175884805777, "iter_time": 1.0185590896606445, "loss": 0.22265087068080902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.042213044297068, "step_time": 0.9359810409545899} +{"epoch": 0, "iter": 6230, "iter_tflops": 45.0095762667246, "iter_time": 0.4583712005615235, "loss": 0.2114645540714264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23304552511093, "step_time": 0.41904971122741697} +{"epoch": 0, "iter": 6231, "iter_tflops": 43.730233775268125, "iter_time": 0.47178100204467777, "loss": 0.19957639276981354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.346989915776554, "step_time": 0.43574245262146} +{"epoch": 0, "iter": 6232, "iter_tflops": 49.41106279949477, "iter_time": 0.41753996658325193, "loss": 0.1821051388978958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.467057099492614, "step_time": 0.385865514755249} +{"epoch": 0, "iter": 6233, "iter_tflops": 45.37611336948597, "iter_time": 0.45466859054565434, "loss": 0.5121447443962097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.5856874574477, "step_time": 0.41606952667236335} +{"epoch": 0, "iter": 6234, "iter_tflops": 40.652539734309386, "iter_time": 0.5074982681274414, "loss": 0.408511757850647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1601345279135, "step_time": 0.4568430480957031} +{"epoch": 0, "iter": 6235, "iter_tflops": 45.67654961014668, "iter_time": 0.4516780204772949, "loss": 0.3366043269634247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.12743470662563, "step_time": 0.4199505558013916} +{"epoch": 0, "iter": 6236, "iter_tflops": 49.894080263693844, "iter_time": 0.4134978218078613, "loss": 0.33075886964797974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.181693699427925, "step_time": 0.3807760906219482} +{"epoch": 0, "iter": 6237, "iter_tflops": 30.058428739844935, "iter_time": 0.6863663330078125, "loss": 0.6196037530899048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.83497435874018, "step_time": 0.6480637702941895} +{"epoch": 0, "iter": 6238, "iter_tflops": 16.598738119150433, "iter_time": 1.2429314422607423, "loss": 0.7984994649887085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.50635994449961, "step_time": 1.1148109931945802} +{"epoch": 0, "iter": 6239, "iter_tflops": 38.75670246026402, "iter_time": 0.5323232421875002, "loss": 0.9137102961540222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41003843414099, "step_time": 0.4864672203063965} +{"epoch": 0, "iter": 6240, "iter_tflops": 39.812773290023515, "iter_time": 0.5182028732299805, "loss": 0.7304983139038086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.353560364319684, "step_time": 0.47588002777099614} +{"epoch": 0, "iter": 6241, "iter_tflops": 16.78631504314802, "iter_time": 1.2290424346923827, "loss": 1.0167452096939087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.120957156594077, "step_time": 1.1385211791992187} +{"epoch": 0, "iter": 6242, "iter_tflops": 12.560813780380998, "iter_time": 1.6424965667724611, "loss": 0.8331059813499451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.978010068401225, "step_time": 1.2912179565429687} +{"epoch": 0, "iter": 6243, "iter_tflops": 33.05109859012209, "iter_time": 0.6242180862426757, "loss": 0.6972818374633789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.863601625737935, "step_time": 0.5752655220031739} +{"epoch": 0, "iter": 6244, "iter_tflops": 37.80986312740129, "iter_time": 0.5456537475585937, "loss": 1.0002278089523315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.15170155228334, "step_time": 0.5013424167633057} +{"epoch": 0, "iter": 6245, "iter_tflops": 21.163401861360626, "iter_time": 0.9748476943969727, "loss": 0.09990733116865158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.745408437608855, "step_time": 0.9070443191528321} +{"epoch": 0, "iter": 6246, "iter_tflops": 16.22136681998955, "iter_time": 1.2718468017578126, "loss": 0.11574768275022507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.02134357838979, "step_time": 0.9814355316162111} +{"epoch": 0, "iter": 6247, "iter_tflops": 50.305827409729716, "iter_time": 0.4101133918762207, "loss": 0.13019321858882904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.76752188866697, "step_time": 0.376703067779541} +{"epoch": 0, "iter": 6248, "iter_tflops": 49.23203916036144, "iter_time": 0.419058277130127, "loss": 0.12799589335918427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51635741235327, "step_time": 0.38551004791259763} +{"epoch": 0, "iter": 6249, "iter_tflops": 25.81916336289338, "iter_time": 0.7990612716674805, "loss": 0.4723743498325348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.219011014824883, "step_time": 0.7579663162231445} +{"epoch": 0, "iter": 6250, "iter_tflops": 13.866011083633659, "iter_time": 1.4878895874023437, "loss": 0.5533150434494019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.326929709837046, "step_time": 1.2636235885620115} +{"epoch": 0, "iter": 6251, "iter_tflops": 46.631614337131374, "iter_time": 0.4424271774291991, "loss": 0.5567045211791992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.81083243785694, "step_time": 0.40603730583190917} +{"epoch": 0, "iter": 6252, "iter_tflops": 49.18057786646122, "iter_time": 0.419496768951416, "loss": 0.47227081656455994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90292356575466, "step_time": 0.38998021507263186} +{"epoch": 0, "iter": 6253, "iter_tflops": 44.9472654171184, "iter_time": 0.45900664520263673, "loss": 0.2672889530658722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.88554306317467, "step_time": 0.42202852249145506} +{"epoch": 0, "iter": 6254, "iter_tflops": 30.394807726823856, "iter_time": 0.6787703247070311, "loss": 0.3692951202392578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.40539189946722, "step_time": 0.5371926307678223} +{"epoch": 0, "iter": 6255, "iter_tflops": 40.24625165614019, "iter_time": 0.5126214904785156, "loss": 0.3801734447479248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.241761402346775, "step_time": 0.4663262233734131} +{"epoch": 0, "iter": 6256, "iter_tflops": 43.539085682164114, "iter_time": 0.4738522453308105, "loss": 0.31193381547927856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74311510394088, "step_time": 0.4321270923614502} +{"epoch": 0, "iter": 6257, "iter_tflops": 27.839297742230535, "iter_time": 0.7410780868530273, "loss": 0.22171305119991302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.76234662421403, "step_time": 0.693194450378418} +{"epoch": 0, "iter": 6258, "iter_tflops": 10.34178665588388, "iter_time": 1.9949254608154297, "loss": 0.2578117251396179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.99480830616618, "step_time": 1.4741962203979493} +{"epoch": 0, "iter": 6259, "iter_tflops": 13.549180602070988, "iter_time": 1.5226820068359375, "loss": 0.25560805201530457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.68902151634572, "step_time": 1.2362074966430663} +{"epoch": 0, "iter": 6260, "iter_tflops": 20.673990083715385, "iter_time": 0.9979250946044922, "loss": 0.18984892964363098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.1812758184327, "step_time": 0.6835726108551025} +{"epoch": 0, "iter": 6261, "iter_tflops": 18.88146285086888, "iter_time": 0.8588994064331055, "loss": 0.32210418581962585, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 19.860097125561296, "step_time": 0.816575927734375} +{"epoch": 0, "iter": 6262, "iter_tflops": 7.714197086924612, "iter_time": 2.1022637939453124, "loss": 0.35551512241363525, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 8.891590654129544, "step_time": 1.8238893203735351} +{"epoch": 0, "iter": 6263, "iter_tflops": 8.998509830936802, "iter_time": 1.8022180938720704, "loss": 0.338849812746048, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 11.040798182047778, "step_time": 1.4688500747680664} +{"epoch": 0, "iter": 6264, "iter_tflops": 21.11739873531847, "iter_time": 0.7679580917358398, "loss": 0.35688209533691406, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.567813605512907, "step_time": 0.6342848663330077} +{"epoch": 0, "iter": 6265, "iter_tflops": 18.469196673399026, "iter_time": 0.833726432800293, "loss": 0.4764479696750641, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.493605075316168, "step_time": 0.7899132766723633} +{"epoch": 0, "iter": 6266, "iter_tflops": 12.07497463436345, "iter_time": 1.275220687866211, "loss": 0.3664562702178955, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.350013405353877, "step_time": 1.07304829788208} +{"epoch": 0, "iter": 6267, "iter_tflops": 27.077179844050452, "iter_time": 0.5686802520751952, "loss": 0.3037698268890381, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.961699594767076, "step_time": 0.5316765823364258} +{"epoch": 0, "iter": 6268, "iter_tflops": 29.126047896950823, "iter_time": 0.528676513671875, "loss": 0.31463855504989624, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 30.875549890572827, "step_time": 0.4987201042175293} +{"epoch": 0, "iter": 6269, "iter_tflops": 25.33969706503404, "iter_time": 0.81418074798584, "loss": 0.07910890877246857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.76805298561346, "step_time": 0.7707356796264648} +{"epoch": 0, "iter": 6270, "iter_tflops": 13.65122806016496, "iter_time": 1.5112994537353517, "loss": 0.07378031313419342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.32915840293787, "step_time": 1.2634511222839355} +{"epoch": 0, "iter": 6271, "iter_tflops": 41.764560140647426, "iter_time": 0.493985652923584, "loss": 0.057007238268852234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.05775288054254, "step_time": 0.4479396457672119} +{"epoch": 0, "iter": 6272, "iter_tflops": 43.99712643647469, "iter_time": 0.46891911315917967, "loss": 0.07447260618209839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19583815150427, "step_time": 0.4280679473876953} +{"epoch": 0, "iter": 6273, "iter_tflops": 24.23823612430398, "iter_time": 0.8511796569824218, "loss": 0.4608304500579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.866216687498035, "step_time": 0.7976076965332032} +{"epoch": 0, "iter": 6274, "iter_tflops": 7.37591892599916, "iter_time": 2.7970878906250003, "loss": 0.4105527698993683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.707249161350772, "step_time": 2.1253285217285156} +{"epoch": 0, "iter": 6275, "iter_tflops": 13.062272412976972, "iter_time": 1.5794413757324217, "loss": 0.4590524435043335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.241796953760634, "step_time": 1.2702469787597657} +{"epoch": 0, "iter": 6276, "iter_tflops": 45.135726817383095, "iter_time": 0.4570900917053222, "loss": 0.33772262930870056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.085219752702706, "step_time": 0.42031172752380375} +{"epoch": 0, "iter": 6277, "iter_tflops": 17.58661788872658, "iter_time": 0.8360660781860353, "loss": 0.368041455745697, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 18.53832056536156, "step_time": 0.7931449127197265} +{"epoch": 0, "iter": 6278, "iter_tflops": 11.18152782710625, "iter_time": 1.3149879760742187, "loss": 0.34303799271583557, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 14.668091489140188, "step_time": 1.0024190711975096} +{"epoch": 0, "iter": 6279, "iter_tflops": 26.681283797925122, "iter_time": 0.5510819778442382, "loss": 0.295453280210495, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.476178017557373, "step_time": 0.516346492767334} +{"epoch": 0, "iter": 6280, "iter_tflops": 24.86415264383416, "iter_time": 0.5913563537597656, "loss": 0.3367473781108856, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 26.3822900716929, "step_time": 0.5573274574279785} +{"epoch": 0, "iter": 6281, "iter_tflops": 37.8296972915106, "iter_time": 0.5453676605224609, "loss": 0.7883433699607849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08469456980629, "step_time": 0.5021600799560546} +{"epoch": 0, "iter": 6282, "iter_tflops": 13.732440144834998, "iter_time": 1.502361801147461, "loss": 0.6686422824859619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.931603421971198, "step_time": 1.2949791030883788} +{"epoch": 0, "iter": 6283, "iter_tflops": 15.865869366065553, "iter_time": 1.3003443450927734, "loss": 0.7785895466804504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.502405404607646, "step_time": 1.1150492630004885} +{"epoch": 0, "iter": 6284, "iter_tflops": 15.499197674495598, "iter_time": 1.331107192993164, "loss": 0.648842453956604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.008295117672322, "step_time": 1.213001853942871} +{"epoch": 0, "iter": 6285, "iter_tflops": 5.070325232584596, "iter_time": 3.1257333679199215, "loss": 0.3801456391811371, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 6.1957546524944656, "step_time": 2.557958740234375} +{"epoch": 0, "iter": 6286, "iter_tflops": 24.018913220943666, "iter_time": 0.6598335494995118, "loss": 0.38986510038375854, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.899374607721604, "step_time": 0.6119253845214844} +{"epoch": 0, "iter": 6287, "iter_tflops": 24.354831726709183, "iter_time": 0.6507326736450194, "loss": 0.4116707742214203, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 26.200565218186227, "step_time": 0.6048909492492676} +{"epoch": 0, "iter": 6288, "iter_tflops": 24.24302588877192, "iter_time": 0.653733772277832, "loss": 0.40031784772872925, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 26.04598333262156, "step_time": 0.6084809532165527} +{"epoch": 0, "iter": 6289, "iter_tflops": 15.982544306501273, "iter_time": 1.2908516387939453, "loss": 0.859964907169342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.26006347578562, "step_time": 1.1953080902099609} +{"epoch": 0, "iter": 6290, "iter_tflops": 17.481957006280876, "iter_time": 1.1801363830566405, "loss": 0.90684974193573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.129625948808847, "step_time": 0.891976963043213} +{"epoch": 0, "iter": 6291, "iter_tflops": 37.095120045557316, "iter_time": 0.5561673202514649, "loss": 0.7550101280212402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.084128276500394, "step_time": 0.5146948280334472} +{"epoch": 0, "iter": 6292, "iter_tflops": 39.16180024980685, "iter_time": 0.5268167800903321, "loss": 0.8246751427650452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.043093734281435, "step_time": 0.49071302032470704} +{"epoch": 0, "iter": 6293, "iter_tflops": 16.897827262909313, "iter_time": 1.2209317321777342, "loss": 0.7449209690093994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.83886770897194, "step_time": 1.1565248336791991} +{"epoch": 0, "iter": 6294, "iter_tflops": 21.629068105271454, "iter_time": 0.9538595657348632, "loss": 0.7825891971588135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.422478195189, "step_time": 0.8447583961486816} +{"epoch": 0, "iter": 6295, "iter_tflops": 47.37302123144365, "iter_time": 0.4355030136108398, "loss": 0.7435598373413086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.189722807857216, "step_time": 0.40303194427490235} +{"epoch": 0, "iter": 6296, "iter_tflops": 50.64071613159535, "iter_time": 0.40740129852294926, "loss": 0.7727485299110413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.002920787004214, "step_time": 0.37509087181091316} +{"epoch": 0, "iter": 6297, "iter_tflops": 28.801367735048036, "iter_time": 0.7163233947753905, "loss": 1.0296509265899658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.45452311633901, "step_time": 0.6774393882751465} +{"epoch": 0, "iter": 6298, "iter_tflops": 16.473148714134172, "iter_time": 1.2524074096679687, "loss": 0.7617605328559875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.013564004122674, "step_time": 1.0850723991394042} +{"epoch": 0, "iter": 6299, "iter_tflops": 41.89250604106455, "iter_time": 0.49247694778442386, "loss": 0.6479330062866211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97023380640821, "step_time": 0.45877220916748046} +{"epoch": 0, "iter": 6300, "iter_tflops": 44.3197718019969, "iter_time": 0.4655054092407227, "loss": 0.652654767036438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.65257651117562, "step_time": 0.4329481220245362} +{"epoch": 0, "iter": 6301, "iter_tflops": 35.28289583450616, "iter_time": 0.5847335662841796, "loss": 0.6888622045516968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.762110008376446, "step_time": 0.5463437690734864} +{"epoch": 0, "iter": 6302, "iter_tflops": 27.962607796549502, "iter_time": 0.7378100662231446, "loss": 0.8670846819877625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.863447477797486, "step_time": 0.668463674545288} +{"epoch": 0, "iter": 6303, "iter_tflops": 36.056362127910354, "iter_time": 0.5721901016235351, "loss": 0.9174540042877197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.06459605679907, "step_time": 0.5281276550292968} +{"epoch": 0, "iter": 6304, "iter_tflops": 37.09410031093149, "iter_time": 0.5561826095581055, "loss": 0.7464253902435303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.420008729575706, "step_time": 0.5104178390502929} +{"epoch": 0, "iter": 6305, "iter_tflops": 19.84177034969358, "iter_time": 1.0397808837890627, "loss": 0.16218826174736023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15784014571447, "step_time": 0.9751039505004884} +{"epoch": 0, "iter": 6306, "iter_tflops": 19.342052657891234, "iter_time": 1.0666444702148439, "loss": 0.1805906593799591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.25459427993705, "step_time": 0.9270487365722656} +{"epoch": 0, "iter": 6307, "iter_tflops": 49.84679808345877, "iter_time": 0.4138900451660156, "loss": 0.20794051885604858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.989873061745804, "step_time": 0.3821289501190186} +{"epoch": 0, "iter": 6308, "iter_tflops": 52.40023084128169, "iter_time": 0.3937214241027832, "loss": 0.22111786901950836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.639113450526594, "step_time": 0.36425523376464847} +{"epoch": 0, "iter": 6309, "iter_tflops": 32.506900766519706, "iter_time": 0.6206326904296875, "loss": 0.24458420276641846, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 34.618628532745774, "step_time": 0.5827742500305176} +{"epoch": 0, "iter": 6310, "iter_tflops": 13.434841832940492, "iter_time": 1.501680892944336, "loss": 0.2654368281364441, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 18.714258856882957, "step_time": 1.0780467147827149} +{"epoch": 0, "iter": 6311, "iter_tflops": 37.02500955671629, "iter_time": 0.5448977737426757, "loss": 0.2345806360244751, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 40.830104689647506, "step_time": 0.4941169128417968} +{"epoch": 0, "iter": 6312, "iter_tflops": 41.0792994361667, "iter_time": 0.4911195068359375, "loss": 0.1637064814567566, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 44.68868004272488, "step_time": 0.45145314788818364} +{"epoch": 0, "iter": 6313, "iter_tflops": 17.34207446086626, "iter_time": 1.1896554565429687, "loss": 0.05706531181931496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.209836797865258, "step_time": 1.1329642181396484} +{"epoch": 0, "iter": 6314, "iter_tflops": 17.793836706709897, "iter_time": 1.15945166015625, "loss": 0.05216594040393829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.316369068248573, "step_time": 0.967852144241333} +{"epoch": 0, "iter": 6315, "iter_tflops": 44.32225215948435, "iter_time": 0.46547935867309576, "loss": 0.01954949088394642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.97991259593963, "step_time": 0.4212154006958007} +{"epoch": 0, "iter": 6316, "iter_tflops": 40.867038378147654, "iter_time": 0.5048345642089843, "loss": 0.0782708004117012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09411869017887, "step_time": 0.4575118465423584} +{"epoch": 0, "iter": 6317, "iter_tflops": 14.10071407458187, "iter_time": 1.4631240234375, "loss": 0.9647442102432251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.89576194259653, "step_time": 1.3850310974121094} +{"epoch": 0, "iter": 6318, "iter_tflops": 20.110630299789033, "iter_time": 1.0258800048828125, "loss": 0.8041354417800903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.904569969196654, "step_time": 0.8630606422424316} +{"epoch": 0, "iter": 6319, "iter_tflops": 46.3019520103567, "iter_time": 0.4455771865844727, "loss": 0.9364697337150574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72315675509403, "step_time": 0.41491922187805175} +{"epoch": 0, "iter": 6320, "iter_tflops": 45.229680847752434, "iter_time": 0.45614059448242195, "loss": 0.8640502691268921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38579892970855, "step_time": 0.4263873691558838} +{"epoch": 0, "iter": 6321, "iter_tflops": 24.374934353448502, "iter_time": 0.5430571823120117, "loss": 0.0041886609978973866, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 26.218882131272757, "step_time": 0.5048645133972167} +{"epoch": 0, "iter": 6322, "iter_tflops": 11.846171004396046, "iter_time": 1.1174060516357422, "loss": 0.004795592278242111, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 13.28623362494812, "step_time": 0.9962931213378907} +{"epoch": 0, "iter": 6323, "iter_tflops": 37.01557845596618, "iter_time": 0.3576057357788086, "loss": 0.0017545060254633427, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 40.7837901447933, "step_time": 0.32456481170654294} +{"epoch": 0, "iter": 6324, "iter_tflops": 38.05800984353135, "iter_time": 0.34781070327758784, "loss": 0.01858973503112793, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 41.57829380158091, "step_time": 0.31836282730102544} +{"epoch": 0, "iter": 6325, "iter_tflops": 41.71832395548816, "iter_time": 0.4945331344604493, "loss": 0.04936825484037399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.66397535170832, "step_time": 0.45180239677429196} +{"epoch": 0, "iter": 6326, "iter_tflops": 10.910418177684745, "iter_time": 1.8909535064697263, "loss": 0.056984350085258484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.108849412204021, "step_time": 1.7038029632568361} +{"epoch": 0, "iter": 6327, "iter_tflops": 9.371557137573163, "iter_time": 2.2014584350585937, "loss": 0.0736406072974205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.226605258776607, "step_time": 1.687393440246582} +{"epoch": 0, "iter": 6328, "iter_tflops": 31.432179573797164, "iter_time": 0.6563685302734376, "loss": 0.10004887729883194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61420466278672, "step_time": 0.5079772872924805} +{"epoch": 0, "iter": 6329, "iter_tflops": 22.534053225886865, "iter_time": 0.6869636077880859, "loss": 0.5584576725959778, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 23.915541674283872, "step_time": 0.6472809486389159} +{"epoch": 0, "iter": 6330, "iter_tflops": 9.345616729221753, "iter_time": 1.6563994598388674, "loss": 0.2918981909751892, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.690278252412977, "step_time": 1.448051597595215} +{"epoch": 0, "iter": 6331, "iter_tflops": 11.398099762433649, "iter_time": 1.3581276550292967, "loss": 0.3477325737476349, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 13.268334752041723, "step_time": 1.1666930923461913} +{"epoch": 0, "iter": 6332, "iter_tflops": 25.933027506754943, "iter_time": 0.596925079345703, "loss": 0.3598194718360901, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.557103999080276, "step_time": 0.5617453308105469} +{"epoch": 0, "iter": 6333, "iter_tflops": 28.002698270397268, "iter_time": 0.6143190994262695, "loss": 0.4224509000778198, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 30.019655140236416, "step_time": 0.5730443038940429} +{"epoch": 0, "iter": 6334, "iter_tflops": 30.571798618103248, "iter_time": 0.5626948089599609, "loss": 0.46578142046928406, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.738366029615534, "step_time": 0.5254566574096681} +{"epoch": 0, "iter": 6335, "iter_tflops": 30.377032068993646, "iter_time": 0.5663026046752929, "loss": 0.2610127925872803, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.44274077223737, "step_time": 0.5302447319030762} +{"epoch": 0, "iter": 6336, "iter_tflops": 31.429553779394833, "iter_time": 0.5473381042480469, "loss": 0.41805383563041687, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 33.215477333902875, "step_time": 0.5179089317321778} +{"epoch": 0, "iter": 6337, "iter_tflops": 37.05018946362661, "iter_time": 0.556841781616211, "loss": 0.7981784343719482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.885540357901945, "step_time": 0.5172574653625488} +{"epoch": 0, "iter": 6338, "iter_tflops": 42.565693511221085, "iter_time": 0.48468829727172846, "loss": 0.7351603507995605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24038085118396, "step_time": 0.42767269134521485} +{"epoch": 0, "iter": 6339, "iter_tflops": 47.06819535962495, "iter_time": 0.4383234443664551, "loss": 0.8462162017822266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.01947640649418, "step_time": 0.404376817703247} +{"epoch": 0, "iter": 6340, "iter_tflops": 46.64811940985925, "iter_time": 0.442270637512207, "loss": 0.6402995586395264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36214291959942, "step_time": 0.40965479850769043} +{"epoch": 0, "iter": 6341, "iter_tflops": 37.281234806690634, "iter_time": 0.5533908309936524, "loss": 0.8832495808601379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27579148995056, "step_time": 0.5122455139160156} +{"epoch": 0, "iter": 6342, "iter_tflops": 17.011714246857068, "iter_time": 1.2127580566406249, "loss": 0.7616987228393555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.141015331402055, "step_time": 0.9318043098449708} +{"epoch": 0, "iter": 6343, "iter_tflops": 44.42753208044085, "iter_time": 0.4643763122558594, "loss": 0.8467470407485962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.748829770933675, "step_time": 0.43207537460327144} +{"epoch": 0, "iter": 6344, "iter_tflops": 48.63446892071735, "iter_time": 0.4242072334289551, "loss": 0.7715456485748291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.261292083973146, "step_time": 0.39476814842224117} +{"epoch": 0, "iter": 6345, "iter_tflops": 35.382966649724175, "iter_time": 0.5830798110961913, "loss": 0.003793994663283229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.8636232264882, "step_time": 0.5448790092468262} +{"epoch": 0, "iter": 6346, "iter_tflops": 19.904676390827902, "iter_time": 1.0364947967529299, "loss": 0.009684939868748188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.300105964533028, "step_time": 0.9251567478179932} +{"epoch": 0, "iter": 6347, "iter_tflops": 49.83044249382151, "iter_time": 0.41402589416503904, "loss": 0.009898566640913486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49183881639234, "step_time": 0.37178608512878414} +{"epoch": 0, "iter": 6348, "iter_tflops": 55.01510601055513, "iter_time": 0.3750077934265137, "loss": 0.007081240881234407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.294755792566136, "step_time": 0.34217061233520507} +{"epoch": 0, "iter": 6349, "iter_tflops": 23.5881243708361, "iter_time": 0.8746389999389649, "loss": 0.8956597447395325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.695011569459954, "step_time": 0.8354356689453125} +{"epoch": 0, "iter": 6350, "iter_tflops": 14.79475613441965, "iter_time": 1.3944868927001952, "loss": 0.8046959042549133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.641907524665278, "step_time": 1.1067050666809082} +{"epoch": 0, "iter": 6351, "iter_tflops": 33.11198732328075, "iter_time": 0.6230702285766601, "loss": 0.8706574440002441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.20871160486678, "step_time": 0.5697825908660888} +{"epoch": 0, "iter": 6352, "iter_tflops": 36.4108231843998, "iter_time": 0.5666198043823242, "loss": 0.7319827675819397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27323007125212, "step_time": 0.5253220443725586} +{"epoch": 0, "iter": 6353, "iter_tflops": 24.383142590576302, "iter_time": 0.8461211853027345, "loss": 0.44784027338027954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.014404232341327, "step_time": 0.7930642318725585} +{"epoch": 0, "iter": 6354, "iter_tflops": 10.564827912731435, "iter_time": 1.9528092346191408, "loss": 0.591776967048645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.388975538952472, "step_time": 1.6652784118652344} +{"epoch": 0, "iter": 6355, "iter_tflops": 9.857312721216413, "iter_time": 2.092973419189453, "loss": 0.5714558959007263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.893092762331998, "step_time": 1.8939610595703122} +{"epoch": 0, "iter": 6356, "iter_tflops": 31.043014007034905, "iter_time": 0.6645969848632813, "loss": 0.5118679404258728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.85165344191876, "step_time": 0.43114693069458004} +{"epoch": 0, "iter": 6357, "iter_tflops": 14.822881615637074, "iter_time": 1.077480987548828, "loss": 0.4951268136501312, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.461962835314209, "step_time": 1.032946029663086} +{"epoch": 0, "iter": 6358, "iter_tflops": 13.656571802276588, "iter_time": 1.1695009078979492, "loss": 0.3692285716533661, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 17.537012408965936, "step_time": 0.9107237167358398} +{"epoch": 0, "iter": 6359, "iter_tflops": 23.47566841948795, "iter_time": 0.6803373107910156, "loss": 0.36221206188201904, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.33826085269727, "step_time": 0.6303263359069824} +{"epoch": 0, "iter": 6360, "iter_tflops": 22.915440645998373, "iter_time": 0.6969699325561525, "loss": 0.3956994414329529, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.638343422180718, "step_time": 0.6482324256896972} +{"epoch": 0, "iter": 6361, "iter_tflops": 28.230132782947596, "iter_time": 0.7308181533813477, "loss": 0.7414618730545044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.49230000506009, "step_time": 0.6766001091003417} +{"epoch": 0, "iter": 6362, "iter_tflops": 14.639449559989606, "iter_time": 1.4092806854248048, "loss": 0.6459015011787415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.111320174885453, "step_time": 1.025844814300537} +{"epoch": 0, "iter": 6363, "iter_tflops": 12.88589655703329, "iter_time": 1.6010599975585937, "loss": 0.5774620771408081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.580857742502225, "step_time": 1.414943748474121} +{"epoch": 0, "iter": 6364, "iter_tflops": 36.054364665532134, "iter_time": 0.5722218017578125, "loss": 0.692305862903595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48029323954796, "step_time": 0.4638254833221436} +{"epoch": 0, "iter": 6365, "iter_tflops": 21.194271850720376, "iter_time": 0.7303895416259766, "loss": 0.3305037021636963, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 22.479435018495778, "step_time": 0.6886327209472657} +{"epoch": 0, "iter": 6366, "iter_tflops": 7.318535408239687, "iter_time": 2.115187484741211, "loss": 0.3092910945415497, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 9.021732861919453, "step_time": 1.71586487197876} +{"epoch": 0, "iter": 6367, "iter_tflops": 9.295379081037316, "iter_time": 1.6653516082763673, "loss": 0.44955897331237793, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 11.133901768812247, "step_time": 1.390354866027832} +{"epoch": 0, "iter": 6368, "iter_tflops": 15.48772265229347, "iter_time": 0.9995061798095703, "loss": 0.5291223526000977, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 17.170993351949853, "step_time": 0.9015246925354004} +{"epoch": 0, "iter": 6369, "iter_tflops": 14.54894442623854, "iter_time": 1.1034011688232424, "loss": 0.25957435369491577, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 15.314119040536799, "step_time": 1.0482693939208985} +{"epoch": 0, "iter": 6370, "iter_tflops": 5.925867441089364, "iter_time": 2.709024871826172, "loss": 0.3078276515007019, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 7.239486508699849, "step_time": 2.2174669799804687} +{"epoch": 0, "iter": 6371, "iter_tflops": 11.965758765381965, "iter_time": 1.3416050415039062, "loss": 0.5168704390525818, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 13.847349826818121, "step_time": 1.1593064727783204} +{"epoch": 0, "iter": 6372, "iter_tflops": 27.30236664309998, "iter_time": 0.587982810974121, "loss": 0.3813832998275757, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.06959832904203, "step_time": 0.5522374992370604} +{"epoch": 0, "iter": 6373, "iter_tflops": 20.64386053620914, "iter_time": 0.7657255477905274, "loss": 0.4374575614929199, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 21.798295008087713, "step_time": 0.725172836303711} +{"epoch": 0, "iter": 6374, "iter_tflops": 10.939238695779906, "iter_time": 1.4450303039550783, "loss": 0.4027324318885803, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.828897330551401, "step_time": 1.2321816139221193} +{"epoch": 0, "iter": 6375, "iter_tflops": 26.28190802885212, "iter_time": 0.6014605712890625, "loss": 0.5566576719284058, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.150447650390838, "step_time": 0.5615374794006348} +{"epoch": 0, "iter": 6376, "iter_tflops": 28.39253288141431, "iter_time": 0.5567496032714844, "loss": 0.307130366563797, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.26474761578005, "step_time": 0.5223083839416504} +{"epoch": 0, "iter": 6377, "iter_tflops": 23.272566970404284, "iter_time": 0.886498405456543, "loss": 0.3902602195739746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.504581938953674, "step_time": 0.8419279937744141} +{"epoch": 0, "iter": 6378, "iter_tflops": 20.44731694665972, "iter_time": 1.0089878082275392, "loss": 0.2659086585044861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.649755303686607, "step_time": 0.8043388042449952} +{"epoch": 0, "iter": 6379, "iter_tflops": 36.532627153359456, "iter_time": 0.5647306289672852, "loss": 0.3640702962875366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.960951081198864, "step_time": 0.516281343460083} +{"epoch": 0, "iter": 6380, "iter_tflops": 36.245165559468134, "iter_time": 0.5692095260620118, "loss": 0.2880493998527527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34407439279937, "step_time": 0.5243761310577393} +{"epoch": 0, "iter": 6381, "iter_tflops": 20.425889000219534, "iter_time": 1.0100462951660154, "loss": 0.07846075296401978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.610963190950184, "step_time": 0.9546586761474608} +{"epoch": 0, "iter": 6382, "iter_tflops": 10.403910621130708, "iter_time": 1.9830133361816404, "loss": 0.11670701950788498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.77382115444108, "step_time": 1.4978482208251953} +{"epoch": 0, "iter": 6383, "iter_tflops": 14.952654890248619, "iter_time": 1.37976123046875, "loss": 0.11942319571971893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.90226622783312, "step_time": 1.0914613761901857} +{"epoch": 0, "iter": 6384, "iter_tflops": 30.549951917848173, "iter_time": 0.6753232727050782, "loss": 0.11331168562173843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.88803615227896, "step_time": 0.5445279197692872} +{"epoch": 0, "iter": 6385, "iter_tflops": 13.356607378773573, "iter_time": 1.2264572296142577, "loss": 0.3723035454750061, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 14.40527821665692, "step_time": 1.1371739883422851} +{"epoch": 0, "iter": 6386, "iter_tflops": 17.333861228553598, "iter_time": 0.9450466613769531, "loss": 0.4578908681869507, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 20.905741955675925, "step_time": 0.7835793495178224} +{"epoch": 0, "iter": 6387, "iter_tflops": 23.29311348984192, "iter_time": 0.7032682723999024, "loss": 0.3367209732532501, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.10388934713528, "step_time": 0.6525406265258789} +{"epoch": 0, "iter": 6388, "iter_tflops": 24.06118417433564, "iter_time": 0.68081884765625, "loss": 0.40845659375190735, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.763943783067194, "step_time": 0.6358229866027831} +{"epoch": 0, "iter": 6389, "iter_tflops": 36.390034196437455, "iter_time": 0.566943504333496, "loss": 0.1828811764717102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.322629182971944, "step_time": 0.5116505031585694} +{"epoch": 0, "iter": 6390, "iter_tflops": 37.7888520395517, "iter_time": 0.5459571380615235, "loss": 0.10075531154870987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.153674424516886, "step_time": 0.48942574501037595} +{"epoch": 0, "iter": 6391, "iter_tflops": 40.049727001914086, "iter_time": 0.5151369323730469, "loss": 0.1172812208533287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1303471379493, "step_time": 0.4675035400390625} +{"epoch": 0, "iter": 6392, "iter_tflops": 42.920139839094404, "iter_time": 0.48068560791015624, "loss": 0.10498729348182678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65890272131606, "step_time": 0.4421684246063232} +{"epoch": 0, "iter": 6393, "iter_tflops": 22.017227767390466, "iter_time": 0.9370431976318359, "loss": 0.8107489347457886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.687282247894142, "step_time": 0.870977653503418} +{"epoch": 0, "iter": 6394, "iter_tflops": 18.443241224718097, "iter_time": 1.1186262359619141, "loss": 0.9648363590240479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.76415837557881, "step_time": 0.9935916080474853} +{"epoch": 0, "iter": 6395, "iter_tflops": 38.62709489071224, "iter_time": 0.534109375, "loss": 0.8880277276039124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26800590123952, "step_time": 0.4881018886566162} +{"epoch": 0, "iter": 6396, "iter_tflops": 35.26869810569551, "iter_time": 0.5849689559936524, "loss": 0.6989743113517761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.29476166491343, "step_time": 0.5387445335388183} +{"epoch": 0, "iter": 6397, "iter_tflops": 18.9534598369938, "iter_time": 1.0885133209228515, "loss": 0.0036850683391094208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24239586556556, "step_time": 1.0192021560668945} +{"epoch": 0, "iter": 6398, "iter_tflops": 19.120316315767433, "iter_time": 1.0790142364501953, "loss": 0.005045527592301369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.4259526713081, "step_time": 0.8806938953399659} +{"epoch": 0, "iter": 6399, "iter_tflops": 39.465645625423434, "iter_time": 0.5227608261108398, "loss": 0.005577772855758667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73898195651433, "step_time": 0.47168664169311525} +{"epoch": 0, "iter": 6400, "iter_tflops": 47.99869801518829, "iter_time": 0.4298261070251465, "loss": 0.0031781226862221956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24972269036165, "step_time": 0.387440393447876} +{"epoch": 0, "iter": 6401, "iter_tflops": 22.69498636040524, "iter_time": 0.9090595245361328, "loss": 0.5976327061653137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.82032036179878, "step_time": 0.8312178573608399} +{"epoch": 0, "iter": 6402, "iter_tflops": 44.813130122270366, "iter_time": 0.46038055038452147, "loss": 0.7743894457817078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.116273875989364, "step_time": 0.42004598236083984} +{"epoch": 0, "iter": 6403, "iter_tflops": 52.625717676607685, "iter_time": 0.39203443527221676, "loss": 0.5568536520004272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.07653013427733, "step_time": 0.36146369552612306} +{"epoch": 0, "iter": 6404, "iter_tflops": 52.155542473405426, "iter_time": 0.3955685729980469, "loss": 0.5807366371154785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.125094218423435, "step_time": 0.36759124946594235} +{"epoch": 0, "iter": 6405, "iter_tflops": 29.818555897779078, "iter_time": 0.6918877487182616, "loss": 0.7312385439872742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.773267431895658, "step_time": 0.6493223762512207} +{"epoch": 0, "iter": 6406, "iter_tflops": 17.472998135872295, "iter_time": 1.180741470336914, "loss": 0.6797676682472229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.345158308609665, "step_time": 1.0664732322692874} +{"epoch": 0, "iter": 6407, "iter_tflops": 46.19076631350674, "iter_time": 0.44664973449707035, "loss": 0.6420633792877197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1245568215184, "step_time": 0.4115965270996094} +{"epoch": 0, "iter": 6408, "iter_tflops": 47.11139288221547, "iter_time": 0.43792153549194335, "loss": 0.5175240635871887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.971000443196345, "step_time": 0.40476140022277834} +{"epoch": 0, "iter": 6409, "iter_tflops": 44.64519072117036, "iter_time": 0.4621123390197754, "loss": 0.22156597673892975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73183101941775, "step_time": 0.42335970306396487} +{"epoch": 0, "iter": 6410, "iter_tflops": 44.59969269352037, "iter_time": 0.46258375930786133, "loss": 0.15486720204353333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49565867934855, "step_time": 0.42542145156860356} +{"epoch": 0, "iter": 6411, "iter_tflops": 47.579320463628974, "iter_time": 0.4336147155761718, "loss": 0.1624956727027893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.477196793695576, "step_time": 0.4007812156677246} +{"epoch": 0, "iter": 6412, "iter_tflops": 51.06787709623025, "iter_time": 0.4039935607910156, "loss": 0.16718609631061554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.69476759131229, "step_time": 0.3704314498901367} +{"epoch": 0, "iter": 6413, "iter_tflops": 25.903737250836727, "iter_time": 0.7964523925781248, "loss": 0.3242204785346985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.434174031127494, "step_time": 0.7520216751098633} +{"epoch": 0, "iter": 6414, "iter_tflops": 17.300260625937337, "iter_time": 1.1925307922363282, "loss": 0.3151071071624756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.754529296509794, "step_time": 0.9940525856018068} +{"epoch": 0, "iter": 6415, "iter_tflops": 39.98892751112542, "iter_time": 0.515920150756836, "loss": 0.3465729057788849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00198341557932, "step_time": 0.468867353439331} +{"epoch": 0, "iter": 6416, "iter_tflops": 42.82575408653479, "iter_time": 0.4817450141906738, "loss": 0.28371986746788025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90880183097339, "step_time": 0.4398128433227539} +{"epoch": 0, "iter": 6417, "iter_tflops": 1.7599918813395434, "iter_time": 0.8904931259155274, "loss": 0.2980196177959442, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.8706405590138342, "step_time": 0.8378203201293944} +{"epoch": 0, "iter": 6418, "iter_tflops": 0.9316239356413926, "iter_time": 1.6822889709472657, "loss": 0.3085521161556244, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.1348457797660143, "step_time": 1.3810340576171876} +{"epoch": 0, "iter": 6419, "iter_tflops": 1.078068931089359, "iter_time": 1.4537666625976562, "loss": 0.3445639908313751, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.2092882162311556, "step_time": 1.2960191383361814} +{"epoch": 0, "iter": 6420, "iter_tflops": 2.953778630805751, "iter_time": 0.5305951690673828, "loss": 0.2523348331451416, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.236934810714101, "step_time": 0.48418048667907715} +{"epoch": 0, "iter": 6421, "iter_tflops": 14.33635047165446, "iter_time": 1.1455026245117186, "loss": 0.44891414046287537, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.230673532697894, "step_time": 1.0782403717041016} +{"epoch": 0, "iter": 6422, "iter_tflops": 11.596457236330671, "iter_time": 1.416150360107422, "loss": 0.2937941253185272, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.906621173201886, "step_time": 1.0324208335876466} +{"epoch": 0, "iter": 6423, "iter_tflops": 28.38545234050837, "iter_time": 0.5785473098754883, "loss": 0.3749465346336365, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.12099273724465, "step_time": 0.5452120132446289} +{"epoch": 0, "iter": 6424, "iter_tflops": 27.622441806355653, "iter_time": 0.5945284347534179, "loss": 0.2719822824001312, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 29.498968748963176, "step_time": 0.5567085151672364} +{"epoch": 0, "iter": 6425, "iter_tflops": 34.4228268271083, "iter_time": 0.5993433837890625, "loss": 0.16130855679512024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.87063080626846, "step_time": 0.559553581237793} +{"epoch": 0, "iter": 6426, "iter_tflops": 8.952585310421801, "iter_time": 2.3044844360351564, "loss": 0.06662445515394211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.713834865878189, "step_time": 1.9256497573852538} +{"epoch": 0, "iter": 6427, "iter_tflops": 10.253819422413935, "iter_time": 2.01203987121582, "loss": 0.10152444243431091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.932616087492782, "step_time": 1.7289665031433106} +{"epoch": 0, "iter": 6428, "iter_tflops": 40.387515612732166, "iter_time": 0.5108284873962402, "loss": 0.05562237650156021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.265792720026184, "step_time": 0.46607306098937984} +{"epoch": 0, "iter": 6429, "iter_tflops": 11.64538147480034, "iter_time": 1.3855527496337892, "loss": 0.3139064610004425, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 12.278245836257241, "step_time": 1.3141364440917969} +{"epoch": 0, "iter": 6430, "iter_tflops": 12.156078045473636, "iter_time": 1.3273434295654296, "loss": 0.42270129919052124, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 17.038995500831003, "step_time": 0.9469625320434572} +{"epoch": 0, "iter": 6431, "iter_tflops": 27.906175845311132, "iter_time": 0.5781978302001954, "loss": 0.4143746495246887, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 29.71303864292044, "step_time": 0.543037368774414} +{"epoch": 0, "iter": 6432, "iter_tflops": 29.242127655449686, "iter_time": 0.5517823638916015, "loss": 0.5186702609062195, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.12468767409955, "step_time": 0.5184081039428712} +{"epoch": 0, "iter": 6433, "iter_tflops": 24.920678638005548, "iter_time": 0.8278704528808593, "loss": 0.17941276729106903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.279420617110276, "step_time": 0.7850665283203125} +{"epoch": 0, "iter": 6434, "iter_tflops": 17.386489082000157, "iter_time": 1.1866164245605468, "loss": 0.1311003714799881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.46512676851709, "step_time": 0.810170461654663} +{"epoch": 0, "iter": 6435, "iter_tflops": 47.99370766580381, "iter_time": 0.4298708000183106, "loss": 0.15446195006370544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09677204275578, "step_time": 0.396014814376831} +{"epoch": 0, "iter": 6436, "iter_tflops": 53.22595276957305, "iter_time": 0.3876134185791016, "loss": 0.1703554093837738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.90669555082424, "step_time": 0.35628165817260743} +{"epoch": 0, "iter": 6437, "iter_tflops": 30.13938792191213, "iter_time": 0.6845226440429687, "loss": 0.24106109142303467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.996120562703428, "step_time": 0.6447998428344727} +{"epoch": 0, "iter": 6438, "iter_tflops": 14.517488572049432, "iter_time": 1.4211200103759765, "loss": 0.21851353347301483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.58775030809095, "step_time": 1.1730376625061034} +{"epoch": 0, "iter": 6439, "iter_tflops": 36.03198803277337, "iter_time": 0.5725771636962891, "loss": 0.2551707625389099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76460980305682, "step_time": 0.5188305282592773} +{"epoch": 0, "iter": 6440, "iter_tflops": 41.94400494590061, "iter_time": 0.4918722839355469, "loss": 0.1450374871492386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97188379164192, "step_time": 0.4487763347625732} +{"epoch": 0, "iter": 6441, "iter_tflops": 19.738844488709518, "iter_time": 1.0452026977539062, "loss": 0.6850558519363403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.81370554235448, "step_time": 0.9912263565063477} +{"epoch": 0, "iter": 6442, "iter_tflops": 16.188135325693263, "iter_time": 1.2744576873779299, "loss": 0.6222871541976929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.49679995328826, "step_time": 1.0581784477233886} +{"epoch": 0, "iter": 6443, "iter_tflops": 39.57907791460878, "iter_time": 0.5212626113891602, "loss": 0.7071706652641296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34627084111101, "step_time": 0.4759600563049316} +{"epoch": 0, "iter": 6444, "iter_tflops": 37.51558839465103, "iter_time": 0.5499338912963867, "loss": 0.7871237397193909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71379084519815, "step_time": 0.5067347717285156} +{"epoch": 0, "iter": 6445, "iter_tflops": 18.31575820392557, "iter_time": 1.1264122009277344, "loss": 0.8499609231948853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.430464206799257, "step_time": 1.0617910766601562} +{"epoch": 0, "iter": 6446, "iter_tflops": 16.155754128401885, "iter_time": 1.2770121002197266, "loss": 0.9132283329963684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.12089325367889, "step_time": 1.138525194168091} +{"epoch": 0, "iter": 6447, "iter_tflops": 43.5446931109244, "iter_time": 0.4737912254333496, "loss": 0.9675140380859375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85258387404175, "step_time": 0.4403405704498291} +{"epoch": 0, "iter": 6448, "iter_tflops": 47.36151693542231, "iter_time": 0.435608798980713, "loss": 0.9334514141082764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.88523997948142, "step_time": 0.4054435729980469} +{"epoch": 0, "iter": 6449, "iter_tflops": 46.25003517612624, "iter_time": 0.4460773582458496, "loss": 0.15050235390663147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60274434094374, "step_time": 0.40770700836181645} +{"epoch": 0, "iter": 6450, "iter_tflops": 17.461445715122252, "iter_time": 1.1815226440429687, "loss": 0.12926261126995087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.657771763417408, "step_time": 0.9987085609436036} +{"epoch": 0, "iter": 6451, "iter_tflops": 39.521093958044034, "iter_time": 0.5220273895263672, "loss": 0.15755397081375122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38053490718104, "step_time": 0.47558411979675297} +{"epoch": 0, "iter": 6452, "iter_tflops": 42.25404587946007, "iter_time": 0.4882631492614746, "loss": 0.09363459795713425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58850998637082, "step_time": 0.44283651733398444} +{"epoch": 0, "iter": 6453, "iter_tflops": 36.143704879395784, "iter_time": 0.5708073806762695, "loss": 0.06198268383741379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.377728484044106, "step_time": 0.5109523067474365} +{"epoch": 0, "iter": 6454, "iter_tflops": 39.75223241568733, "iter_time": 0.518992073059082, "loss": 0.041078995913267136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6288474381429, "step_time": 0.4622815666198731} +{"epoch": 0, "iter": 6455, "iter_tflops": 44.326202272336864, "iter_time": 0.46543787765502936, "loss": 0.02886388823390007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80562618891931, "step_time": 0.42271957397460935} +{"epoch": 0, "iter": 6456, "iter_tflops": 42.481020429141296, "iter_time": 0.48565437698364256, "loss": 0.05870207026600838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.468606289399, "step_time": 0.4439791755676269} +{"epoch": 0, "iter": 6457, "iter_tflops": 24.33950421072651, "iter_time": 0.847638198852539, "loss": 0.1354057639837265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.162255555258653, "step_time": 0.7885823707580567} +{"epoch": 0, "iter": 6458, "iter_tflops": 12.939188562206501, "iter_time": 1.594465789794922, "loss": 0.14708565175533295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.896202907261866, "step_time": 1.3849900970458986} +{"epoch": 0, "iter": 6459, "iter_tflops": 14.018096926100887, "iter_time": 1.471747100830078, "loss": 0.18948373198509216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.66372745517948, "step_time": 1.2380839500427245} +{"epoch": 0, "iter": 6460, "iter_tflops": 39.395675622070904, "iter_time": 0.5236892929077148, "loss": 0.18388935923576355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.55295264099045, "step_time": 0.4737013740539551} +{"epoch": 0, "iter": 6461, "iter_tflops": 15.683675410652702, "iter_time": 1.0811246948242188, "loss": 0.2809988260269165, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 16.834211591273412, "step_time": 1.0072350997924806} +{"epoch": 0, "iter": 6462, "iter_tflops": 11.424420043547029, "iter_time": 1.484189895629883, "loss": 0.44456714391708374, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 13.725734018460079, "step_time": 1.235344409942627} +{"epoch": 0, "iter": 6463, "iter_tflops": 26.33771186045214, "iter_time": 0.6437920227050781, "loss": 0.45569494366645813, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 28.330937462418266, "step_time": 0.5984979782104493} +{"epoch": 0, "iter": 6464, "iter_tflops": 26.769722725393745, "iter_time": 0.6334024810791016, "loss": 0.3897158205509186, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 28.749554422229963, "step_time": 0.5897833595275879} +{"epoch": 0, "iter": 6465, "iter_tflops": 17.36780652968336, "iter_time": 1.1878928680419922, "loss": 0.0998275950551033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.856931370589777, "step_time": 1.0940854110717773} +{"epoch": 0, "iter": 6466, "iter_tflops": 21.44079769740611, "iter_time": 0.9622353515625, "loss": 0.1768142282962799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.95710251688387, "step_time": 0.7948149642944337} +{"epoch": 0, "iter": 6467, "iter_tflops": 52.18868484785432, "iter_time": 0.3953173675537109, "loss": 0.14334894716739655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.021678185552595, "step_time": 0.36181140518188476} +{"epoch": 0, "iter": 6468, "iter_tflops": 47.34406228925231, "iter_time": 0.43576939773559575, "loss": 0.104083351790905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24472948611428, "step_time": 0.4025993251800537} +{"epoch": 0, "iter": 6469, "iter_tflops": 40.600274797907936, "iter_time": 0.5081515731811523, "loss": 0.29953840374946594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87870273383236, "step_time": 0.47018467330932623} +{"epoch": 0, "iter": 6470, "iter_tflops": 18.633128089447293, "iter_time": 1.107226516723633, "loss": 0.3907026946544647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.450991333661623, "step_time": 0.8437732944488525} +{"epoch": 0, "iter": 6471, "iter_tflops": 47.497364060853684, "iter_time": 0.4343629150390625, "loss": 0.20960168540477753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62245128267383, "step_time": 0.39965350341796874} +{"epoch": 0, "iter": 6472, "iter_tflops": 47.56499399296935, "iter_time": 0.4337453193664551, "loss": 0.2045699805021286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.615019170501604, "step_time": 0.3997110500335694} +{"epoch": 0, "iter": 6473, "iter_tflops": 31.493293177486578, "iter_time": 0.3970998077392578, "loss": 0.012420311570167542, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 35.00783194390708, "step_time": 0.3572337951660156} +{"epoch": 0, "iter": 6474, "iter_tflops": 30.268811882713432, "iter_time": 0.4131639099121094, "loss": 0.002594297518953681, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 33.287988485660165, "step_time": 0.37569048881530764} +{"epoch": 0, "iter": 6475, "iter_tflops": 36.84266741588343, "iter_time": 0.3394428672790527, "loss": 0.0034565527457743883, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 40.45822694139051, "step_time": 0.30910847091674803} +{"epoch": 0, "iter": 6476, "iter_tflops": 33.148839607021365, "iter_time": 0.3772675247192383, "loss": 0.0022817514836788177, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 36.31204351438346, "step_time": 0.3444031085968018} +{"epoch": 0, "iter": 6477, "iter_tflops": 23.443553110030894, "iter_time": 0.8800327072143554, "loss": 0.03863611817359924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.62423530880258, "step_time": 0.8378369216918945} +{"epoch": 0, "iter": 6478, "iter_tflops": 20.896686066330904, "iter_time": 0.9872902069091796, "loss": 0.05902779847383499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.71786038618713, "step_time": 0.8022087841033936} +{"epoch": 0, "iter": 6479, "iter_tflops": 50.69656372106005, "iter_time": 0.40695250320434573, "loss": 0.033491749316453934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.414891703218565, "step_time": 0.3723023338317871} +{"epoch": 0, "iter": 6480, "iter_tflops": 56.19603919646532, "iter_time": 0.3671271820068359, "loss": 0.021846849471330643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.326529947862944, "step_time": 0.3364138412475585} +{"epoch": 0, "iter": 6481, "iter_tflops": 25.276928901018444, "iter_time": 0.8162025375366211, "loss": 0.8924626708030701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.604463577036633, "step_time": 0.7754748916625976} +{"epoch": 0, "iter": 6482, "iter_tflops": 14.787674193728574, "iter_time": 1.395154724121094, "loss": 0.7362114191055298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.765060735716002, "step_time": 1.1613297481536864} +{"epoch": 0, "iter": 6483, "iter_tflops": 33.174511361457334, "iter_time": 0.6218959274291991, "loss": 0.7477312088012695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.43520531002247, "step_time": 0.5662406272888184} +{"epoch": 0, "iter": 6484, "iter_tflops": 39.38135132383036, "iter_time": 0.5238797760009766, "loss": 0.7637822031974792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.912112261806406, "step_time": 0.48077552986145017} +{"epoch": 0, "iter": 6485, "iter_tflops": 19.39660508004572, "iter_time": 1.063644561767578, "loss": 0.4408472776412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.839907886243058, "step_time": 0.9899800720214846} +{"epoch": 0, "iter": 6486, "iter_tflops": 21.264339177773365, "iter_time": 0.9702202987670898, "loss": 0.5875018239021301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.262790554122837, "step_time": 0.7855636463165282} +{"epoch": 0, "iter": 6487, "iter_tflops": 47.88795754885344, "iter_time": 0.43082007598876954, "loss": 0.4372725784778595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72541058817839, "step_time": 0.39885799407958983} +{"epoch": 0, "iter": 6488, "iter_tflops": 49.33116707110156, "iter_time": 0.41821620559692385, "loss": 0.4936656653881073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26475521456089, "step_time": 0.38733104896545406} +{"epoch": 0, "iter": 6489, "iter_tflops": 25.558098978058286, "iter_time": 0.8072233200073242, "loss": 0.6143760085105896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.78156149130945, "step_time": 0.7703469238281251} +{"epoch": 0, "iter": 6490, "iter_tflops": 15.29012911112599, "iter_time": 1.3493079986572265, "loss": 0.4909675419330597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.20198563805648, "step_time": 1.1334529056549072} +{"epoch": 0, "iter": 6491, "iter_tflops": 47.17811791242337, "iter_time": 0.43730217361450197, "loss": 0.5069401860237122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26263733619244, "step_time": 0.402458683013916} +{"epoch": 0, "iter": 6492, "iter_tflops": 48.03446797615193, "iter_time": 0.42950602722167963, "loss": 0.45149996876716614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.851204702283056, "step_time": 0.3978903408050537} +{"epoch": 0, "iter": 6493, "iter_tflops": 19.50967452486879, "iter_time": 1.0574801483154297, "loss": 0.7229737639427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.324275901559822, "step_time": 1.0150961151123046} +{"epoch": 0, "iter": 6494, "iter_tflops": 19.90104703068262, "iter_time": 1.036683822631836, "loss": 0.9395167231559753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.808425002331344, "step_time": 0.8316164169311524} +{"epoch": 0, "iter": 6495, "iter_tflops": 44.25243693535124, "iter_time": 0.4662137260437012, "loss": 0.9869323372840881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48681893025571, "step_time": 0.43445937156677245} +{"epoch": 0, "iter": 6496, "iter_tflops": 43.13999939029394, "iter_time": 0.4782358322143555, "loss": 0.9833440184593201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6353602968014, "step_time": 0.4423916397094726} +{"epoch": 0, "iter": 6497, "iter_tflops": 2.625890786547533, "iter_time": 0.626767074584961, "loss": 0.6040644645690918, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 2.806038250296183, "step_time": 0.5865286712646485} +{"epoch": 0, "iter": 6498, "iter_tflops": 0.9964884103768071, "iter_time": 1.6516217041015624, "loss": 0.6198275089263916, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 1.1857301832327127, "step_time": 1.3880239448547362} +{"epoch": 0, "iter": 6499, "iter_tflops": 3.700400829748635, "iter_time": 0.4447685432434082, "loss": 0.7473374605178833, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 3.9986370249424485, "step_time": 0.4115957202911378} +{"epoch": 0, "iter": 6500, "iter_tflops": 3.9951653852869446, "iter_time": 0.41195338058471676, "loss": 0.7236379384994507, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.317522724502812, "step_time": 0.38119588279724115} +{"epoch": 0, "iter": 6501, "iter_tflops": 44.677252575329234, "iter_time": 0.46178071212768557, "loss": 0.5090716481208801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96284817993594, "step_time": 0.42136220169067384} +{"epoch": 0, "iter": 6502, "iter_tflops": 37.50480382215872, "iter_time": 0.5500920257568358, "loss": 0.6111385226249695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.22862758050776, "step_time": 0.5004069919586182} +{"epoch": 0, "iter": 6503, "iter_tflops": 41.08109945775821, "iter_time": 0.5022040252685547, "loss": 0.6287657022476196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.989246925101924, "step_time": 0.4585783252716064} +{"epoch": 0, "iter": 6504, "iter_tflops": 36.903150969704335, "iter_time": 0.5590604858398438, "loss": 0.5106127262115479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29228397435777, "step_time": 0.5120358409881591} +{"epoch": 0, "iter": 6505, "iter_tflops": 24.915591533637013, "iter_time": 0.8280394821166992, "loss": 0.6108376979827881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.526588424149438, "step_time": 0.7777514839172364} +{"epoch": 0, "iter": 6506, "iter_tflops": 14.424854617368755, "iter_time": 1.4302462005615233, "loss": 0.6711413264274597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.926079161997908, "step_time": 1.2188938331604005} +{"epoch": 0, "iter": 6507, "iter_tflops": 48.35758234470272, "iter_time": 0.4266361656188964, "loss": 0.5704962611198425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51857989065727, "step_time": 0.3928341846466064} +{"epoch": 0, "iter": 6508, "iter_tflops": 47.69133455056549, "iter_time": 0.4325962715148926, "loss": 0.5520709753036499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64778841757055, "step_time": 0.39945744323730464} +{"epoch": 0, "iter": 6509, "iter_tflops": 41.874801513809054, "iter_time": 0.49268516540527346, "loss": 0.6948265433311462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3340574604058, "step_time": 0.45509038162231447} +{"epoch": 0, "iter": 6510, "iter_tflops": 34.59109866640074, "iter_time": 0.5964278182983399, "loss": 0.643846869468689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.70438970932874, "step_time": 0.5471801471710205} +{"epoch": 0, "iter": 6511, "iter_tflops": 36.18062981831707, "iter_time": 0.5702248306274414, "loss": 0.8518320918083191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44170075383371, "step_time": 0.523078191757202} +{"epoch": 0, "iter": 6512, "iter_tflops": 38.6631099535709, "iter_time": 0.533611846923828, "loss": 0.7526416778564453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93318760562909, "step_time": 0.4919991703033447} +{"epoch": 0, "iter": 6513, "iter_tflops": 20.940074151866575, "iter_time": 0.9852445297241211, "loss": 1.149298906326294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.35681528362421, "step_time": 0.9228100357055663} +{"epoch": 0, "iter": 6514, "iter_tflops": 22.848737530607362, "iter_time": 0.9029423828125, "loss": 1.0223197937011719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.117095157764904, "step_time": 0.7337562217712402} +{"epoch": 0, "iter": 6515, "iter_tflops": 43.29388689946218, "iter_time": 0.4765359497070313, "loss": 0.7967909574508667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54643785664021, "step_time": 0.4432367858886719} +{"epoch": 0, "iter": 6516, "iter_tflops": 42.031640018164495, "iter_time": 0.4908467407226562, "loss": 0.5502470135688782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71459543452567, "step_time": 0.46139506149292} +{"epoch": 0, "iter": 6517, "iter_tflops": 36.90027003929165, "iter_time": 0.559104133605957, "loss": 0.02442047744989395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.599699071101014, "step_time": 0.5209911689758301} +{"epoch": 0, "iter": 6518, "iter_tflops": 18.398058121100505, "iter_time": 1.1213734283447265, "loss": 0.03136925771832466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.50005576163956, "step_time": 0.9169352169036865} +{"epoch": 0, "iter": 6519, "iter_tflops": 38.30902562732761, "iter_time": 0.5385439376831055, "loss": 0.039902444928884506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.261400791431846, "step_time": 0.48817817497253413} +{"epoch": 0, "iter": 6520, "iter_tflops": 40.70565323729283, "iter_time": 0.5068360748291015, "loss": 0.06397871673107147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89881141999165, "step_time": 0.4595019969940185} +{"epoch": 0, "iter": 6521, "iter_tflops": 30.95085435324998, "iter_time": 0.6665758972167969, "loss": 0.7213350534439087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.808081951102395, "step_time": 0.6102414665222167} +{"epoch": 0, "iter": 6522, "iter_tflops": 40.54649613719595, "iter_time": 0.5088255577087402, "loss": 0.7735570669174194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78190811635744, "step_time": 0.4712241744995117} +{"epoch": 0, "iter": 6523, "iter_tflops": 43.65928220233614, "iter_time": 0.4725477027893067, "loss": 0.8974648118019104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.839495044127396, "step_time": 0.4404636192321778} +{"epoch": 0, "iter": 6524, "iter_tflops": 46.971593694711444, "iter_time": 0.43922489929199215, "loss": 0.8601417541503906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41270347503223, "step_time": 0.40924394226074223} +{"epoch": 0, "iter": 6525, "iter_tflops": 26.09732713196856, "iter_time": 0.7905443115234374, "loss": 0.32868269085884094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.52332993126936, "step_time": 0.7495856628417968} +{"epoch": 0, "iter": 6526, "iter_tflops": 13.864002651322581, "iter_time": 1.4881051330566406, "loss": 0.3860907256603241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.194900916926773, "step_time": 1.1338942489624022} +{"epoch": 0, "iter": 6527, "iter_tflops": 39.872196592625244, "iter_time": 0.5174305725097657, "loss": 0.4503876566886902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.710566512582005, "step_time": 0.47199327659606943} +{"epoch": 0, "iter": 6528, "iter_tflops": 39.3196175618268, "iter_time": 0.5247022933959962, "loss": 0.3762332499027252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09547325970008, "step_time": 0.478729944229126} +{"epoch": 0, "iter": 6529, "iter_tflops": 14.5881146133505, "iter_time": 1.4142398834228516, "loss": 0.7560258507728577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.694758395065978, "step_time": 1.3145212554931642} +{"epoch": 0, "iter": 6530, "iter_tflops": 16.647116411302097, "iter_time": 1.239319351196289, "loss": 0.6421906352043152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.123529030485923, "step_time": 1.025222438812256} +{"epoch": 0, "iter": 6531, "iter_tflops": 45.03317460506475, "iter_time": 0.45813100433349613, "loss": 0.95305335521698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.589818332658545, "step_time": 0.4245970497131348} +{"epoch": 0, "iter": 6532, "iter_tflops": 42.06567962834657, "iter_time": 0.4904495468139648, "loss": 0.8117479681968689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.98844175274658, "step_time": 0.45858653259277343} +{"epoch": 0, "iter": 6533, "iter_tflops": 32.52906634802237, "iter_time": 0.6342356491088867, "loss": 0.03412780165672302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.62355490510506, "step_time": 0.595868724822998} +{"epoch": 0, "iter": 6534, "iter_tflops": 33.75197162306746, "iter_time": 0.6112559509277344, "loss": 0.027801718562841415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95746347140018, "step_time": 0.49171450805664063} +{"epoch": 0, "iter": 6535, "iter_tflops": 42.29607354307486, "iter_time": 0.4877779846191406, "loss": 0.03849970921874046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.809436276376836, "step_time": 0.4407464637756348} +{"epoch": 0, "iter": 6536, "iter_tflops": 46.63023810145084, "iter_time": 0.44244023513793934, "loss": 0.04538993909955025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.019641490633724, "step_time": 0.40437550926208493} +{"epoch": 0, "iter": 6537, "iter_tflops": 24.823460279475523, "iter_time": 0.8311127166748047, "loss": 0.04795153811573982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.577957800391, "step_time": 0.7762482604980468} +{"epoch": 0, "iter": 6538, "iter_tflops": 8.980195542117919, "iter_time": 2.297399139404297, "loss": 0.02378687635064125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.793224897038199, "step_time": 1.9114855575561525} +{"epoch": 0, "iter": 6539, "iter_tflops": 12.945288629780418, "iter_time": 1.5937144470214846, "loss": 0.02221588045358658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.068945806866857, "step_time": 1.2839108276367188} +{"epoch": 0, "iter": 6540, "iter_tflops": 48.31313292417802, "iter_time": 0.4270286827087403, "loss": 0.03840547055006027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68615114153068, "step_time": 0.37048876762390137} +{"epoch": 0, "iter": 6541, "iter_tflops": 22.49144630897904, "iter_time": 0.7046449813842774, "loss": 0.43761155009269714, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 23.749480391337993, "step_time": 0.6673192214965821} +{"epoch": 0, "iter": 6542, "iter_tflops": 7.13722099341107, "iter_time": 2.2205400085449214, "loss": 0.4095419943332672, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 8.012131471000174, "step_time": 1.978060997009277} +{"epoch": 0, "iter": 6543, "iter_tflops": 9.535207690853868, "iter_time": 1.6621016845703123, "loss": 0.377829909324646, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 11.650460003052627, "step_time": 1.3603312454223633} +{"epoch": 0, "iter": 6544, "iter_tflops": 23.480137959321393, "iter_time": 0.674974090576172, "loss": 0.3884039521217346, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.317920566667457, "step_time": 0.6259789276123048} +{"epoch": 0, "iter": 6545, "iter_tflops": 12.870071348312072, "iter_time": 1.1170984649658202, "loss": 0.43652063608169556, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 13.810078611220785, "step_time": 1.0410611953735351} +{"epoch": 0, "iter": 6546, "iter_tflops": 13.42722854862328, "iter_time": 1.0707449340820312, "loss": 0.48092716932296753, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 16.38839073452244, "step_time": 0.8772756996154786} +{"epoch": 0, "iter": 6547, "iter_tflops": 22.923731166645634, "iter_time": 0.6271726379394531, "loss": 0.47706523537635803, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.633660933661357, "step_time": 0.5836378517150879} +{"epoch": 0, "iter": 6548, "iter_tflops": 20.288025414048196, "iter_time": 0.7086513671875, "loss": 0.3454822897911072, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 21.922853520736915, "step_time": 0.6558059120178223} +{"epoch": 0, "iter": 6549, "iter_tflops": 26.226631551955737, "iter_time": 0.7866467132568359, "loss": 0.3108387589454651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.183966911855002, "step_time": 0.7320152473449707} +{"epoch": 0, "iter": 6550, "iter_tflops": 12.018869159768366, "iter_time": 1.716558624267578, "loss": 0.29831230640411377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.857894983734841, "step_time": 1.4887609939575197} +{"epoch": 0, "iter": 6551, "iter_tflops": 13.220057881494842, "iter_time": 1.5605902557373048, "loss": 0.36349284648895264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.471455969220234, "step_time": 1.2525361175537109} +{"epoch": 0, "iter": 6552, "iter_tflops": 37.62706495882277, "iter_time": 0.5483046188354492, "loss": 0.28265321254730225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34398042296378, "step_time": 0.49901081848144535} +{"epoch": 0, "iter": 6553, "iter_tflops": 20.029365546653402, "iter_time": 0.8506602172851563, "loss": 0.38706541061401367, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 21.623307256728506, "step_time": 0.7879546012878418} +{"epoch": 0, "iter": 6554, "iter_tflops": 24.54042098483721, "iter_time": 0.6942906341552735, "loss": 0.46742627024650574, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 26.51944919641667, "step_time": 0.6424788208007812} +{"epoch": 0, "iter": 6555, "iter_tflops": 25.239888710939745, "iter_time": 0.6750499038696289, "loss": 0.3132375478744507, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 27.25163122938735, "step_time": 0.6252170486450196} +{"epoch": 0, "iter": 6556, "iter_tflops": 28.15159373898933, "iter_time": 0.6052298355102539, "loss": 0.27866223454475403, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 30.113640600971824, "step_time": 0.5657962341308593} +{"epoch": 0, "iter": 6557, "iter_tflops": 31.426618736001384, "iter_time": 0.6564846725463866, "loss": 0.722575843334198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.66475024271715, "step_time": 0.5951605987548828} +{"epoch": 0, "iter": 6558, "iter_tflops": 37.36237148490968, "iter_time": 0.552189079284668, "loss": 0.5607574582099915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.844810809681874, "step_time": 0.5051092929840088} +{"epoch": 0, "iter": 6559, "iter_tflops": 39.72840308143261, "iter_time": 0.519303367614746, "loss": 0.5136979222297668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37565811909995, "step_time": 0.4756375904083252} +{"epoch": 0, "iter": 6560, "iter_tflops": 39.81542464695179, "iter_time": 0.5181683654785155, "loss": 0.49657872319221497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.460594220871634, "step_time": 0.4747080402374268} +{"epoch": 0, "iter": 6561, "iter_tflops": 25.032252716213513, "iter_time": 0.8241804580688477, "loss": 0.6765220761299133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.99885013574758, "step_time": 0.7641471176147461} +{"epoch": 0, "iter": 6562, "iter_tflops": 8.330831422658349, "iter_time": 2.476474731445313, "loss": 0.5126335620880127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.041477067776748, "step_time": 2.0545875244140626} +{"epoch": 0, "iter": 6563, "iter_tflops": 15.69237070777245, "iter_time": 1.3147212677001954, "loss": 0.5766862630844116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.60270307597488, "step_time": 1.1090374031066896} +{"epoch": 0, "iter": 6564, "iter_tflops": 36.72172472690527, "iter_time": 0.5618225631713867, "loss": 0.7769229412078857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37131018415815, "step_time": 0.5110335388183593} +{"epoch": 0, "iter": 6565, "iter_tflops": 12.645807898179518, "iter_time": 1.2338346099853517, "loss": 0.33602216839790344, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.491929016750271, "step_time": 1.1564569778442384} +{"epoch": 0, "iter": 6566, "iter_tflops": 13.593929723698633, "iter_time": 1.14777961730957, "loss": 0.510407030582428, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.00945179943261, "step_time": 0.9746014823913574} +{"epoch": 0, "iter": 6567, "iter_tflops": 24.00941145106156, "iter_time": 0.6498633041381835, "loss": 0.4467482268810272, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.822381002367138, "step_time": 0.6042369003295898} +{"epoch": 0, "iter": 6568, "iter_tflops": 22.992577169812396, "iter_time": 0.678603157043457, "loss": 0.40610942244529724, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.623115679466483, "step_time": 0.6336661720275879} +{"epoch": 0, "iter": 6569, "iter_tflops": 18.535237178176803, "iter_time": 1.113074157714844, "loss": 0.9174033999443054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.714188228025034, "step_time": 1.0465099182128905} +{"epoch": 0, "iter": 6570, "iter_tflops": 19.189331861151327, "iter_time": 1.075133499145508, "loss": 0.82109534740448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.0840616537065, "step_time": 0.9342073860168458} +{"epoch": 0, "iter": 6571, "iter_tflops": 37.110278668959815, "iter_time": 0.5559401397705077, "loss": 0.8179483413696289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30580200668554, "step_time": 0.5118641109466553} +{"epoch": 0, "iter": 6572, "iter_tflops": 36.46505039549913, "iter_time": 0.5657771835327149, "loss": 0.868183434009552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.59515551188917, "step_time": 0.5210509529113769} +{"epoch": 0, "iter": 6573, "iter_tflops": 28.880508191090893, "iter_time": 0.7143604736328124, "loss": 0.3021267056465149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.227030214363875, "step_time": 0.6606806144714356} +{"epoch": 0, "iter": 6574, "iter_tflops": 9.304375926207952, "iter_time": 2.2173538208007812, "loss": 0.24983756244182587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.100985430350201, "step_time": 1.8584920806884764} +{"epoch": 0, "iter": 6575, "iter_tflops": 16.23834746896367, "iter_time": 1.2705168151855468, "loss": 0.20595352351665497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.557229334934206, "step_time": 1.111755054473877} +{"epoch": 0, "iter": 6576, "iter_tflops": 35.408989154310454, "iter_time": 0.5826512985229492, "loss": 0.295742005109787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79955461633799, "step_time": 0.5056695766448975} +{"epoch": 0, "iter": 6577, "iter_tflops": 21.036893284932205, "iter_time": 0.7416891479492186, "loss": 0.42020079493522644, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 22.93998992873277, "step_time": 0.6801587753295898} +{"epoch": 0, "iter": 6578, "iter_tflops": 23.632145573098796, "iter_time": 0.6602377853393555, "loss": 0.32650938630104065, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.416552682407943, "step_time": 0.6138848037719726} +{"epoch": 0, "iter": 6579, "iter_tflops": 23.0102709360051, "iter_time": 0.6780813446044922, "loss": 0.28533124923706055, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.601816549837206, "step_time": 0.634214771270752} +{"epoch": 0, "iter": 6580, "iter_tflops": 22.622267173119784, "iter_time": 0.6897113952636718, "loss": 0.4187280833721161, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.342492735249554, "step_time": 0.6409711456298828} +{"epoch": 0, "iter": 6581, "iter_tflops": 15.690799171985086, "iter_time": 0.8513884735107422, "loss": 0.024563629180192947, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 17.073519159525954, "step_time": 0.7824377288818359} +{"epoch": 0, "iter": 6582, "iter_tflops": 32.85247577704215, "iter_time": 0.40663497161865236, "loss": 0.00659038545563817, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 36.27536918636031, "step_time": 0.3682654609680176} +{"epoch": 0, "iter": 6583, "iter_tflops": 35.207063449639016, "iter_time": 0.3794399261474609, "loss": 0.004339012783020735, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 38.76214531843961, "step_time": 0.34463947868347167} +{"epoch": 0, "iter": 6584, "iter_tflops": 37.59747900496528, "iter_time": 0.3553154602050781, "loss": 0.0071205454878509045, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 41.345140117434084, "step_time": 0.3231084842681885} +{"epoch": 0, "iter": 6585, "iter_tflops": 31.650320815366435, "iter_time": 0.6518446884155272, "loss": 0.17611733078956604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.6294397792406, "step_time": 0.6134831161499024} +{"epoch": 0, "iter": 6586, "iter_tflops": 14.134636094108295, "iter_time": 1.4596126403808596, "loss": 0.12939658761024475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23014156968545, "step_time": 1.131702320098877} +{"epoch": 0, "iter": 6587, "iter_tflops": 38.21361938206516, "iter_time": 0.5398884963989258, "loss": 0.22625330090522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68929716488631, "step_time": 0.4948774604797363} +{"epoch": 0, "iter": 6588, "iter_tflops": 34.208586870563465, "iter_time": 0.603096923828125, "loss": 0.20196950435638428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.54493483936328, "step_time": 0.5495040435791015} +{"epoch": 0, "iter": 6589, "iter_tflops": 19.956257097738458, "iter_time": 1.0338157806396484, "loss": 0.2687320113182068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.45433724643295, "step_time": 0.9616280975341798} +{"epoch": 0, "iter": 6590, "iter_tflops": 19.25345484760261, "iter_time": 1.071552803039551, "loss": 0.26167017221450806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.75284578673176, "step_time": 0.8011189785003662} +{"epoch": 0, "iter": 6591, "iter_tflops": 36.89493438076046, "iter_time": 0.5591849899291993, "loss": 0.19144995510578156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.24388938296933, "step_time": 0.5126515808105468} +{"epoch": 0, "iter": 6592, "iter_tflops": 44.86837965483732, "iter_time": 0.4598136520385742, "loss": 0.25200143456459045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1053628895876, "step_time": 0.42013931465148924} +{"epoch": 0, "iter": 6593, "iter_tflops": 23.52677616376643, "iter_time": 0.8769197006225586, "loss": 0.04272739589214325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.185827022906132, "step_time": 0.8191548957824708} +{"epoch": 0, "iter": 6594, "iter_tflops": 20.610170582989902, "iter_time": 1.0010151748657226, "loss": 0.03327682614326477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.255664625760826, "step_time": 0.8871427173614501} +{"epoch": 0, "iter": 6595, "iter_tflops": 52.03606258065396, "iter_time": 0.39647683715820314, "loss": 0.02768120914697647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.85126469163065, "step_time": 0.36289594650268553} +{"epoch": 0, "iter": 6596, "iter_tflops": 55.73143715556586, "iter_time": 0.37018771743774415, "loss": 0.03833107650279999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.7272635121033, "step_time": 0.33973362731933593} +{"epoch": 0, "iter": 6597, "iter_tflops": 41.455836305194296, "iter_time": 0.49766439056396483, "loss": 0.8058786392211914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07734820374447, "step_time": 0.45768205833435055} +{"epoch": 0, "iter": 6598, "iter_tflops": 38.198456976887584, "iter_time": 0.540102798461914, "loss": 0.8017343282699585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76619278944203, "step_time": 0.4939663429260253} +{"epoch": 0, "iter": 6599, "iter_tflops": 37.238586603174504, "iter_time": 0.5540246124267578, "loss": 0.708228349685669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.7916560519917, "step_time": 0.5057674903869629} +{"epoch": 0, "iter": 6600, "iter_tflops": 38.96522516755102, "iter_time": 0.5294745101928711, "loss": 0.7278947234153748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03085671192219, "step_time": 0.49085588836669924} +{"epoch": 0, "iter": 6601, "iter_tflops": 36.18777242446129, "iter_time": 0.5701122817993164, "loss": 0.24777951836585999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29360375849904, "step_time": 0.5120190696716309} +{"epoch": 0, "iter": 6602, "iter_tflops": 37.69835610604125, "iter_time": 0.5472677230834961, "loss": 0.13421769440174103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57958778174781, "step_time": 0.4961832141876221} +{"epoch": 0, "iter": 6603, "iter_tflops": 42.414982573493724, "iter_time": 0.486410514831543, "loss": 0.14061318337917328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.566705009530814, "step_time": 0.4430438766479492} +{"epoch": 0, "iter": 6604, "iter_tflops": 41.17045215911612, "iter_time": 0.5011140861511232, "loss": 0.08391943573951721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08841224253123, "step_time": 0.45756974983215326} +{"epoch": 0, "iter": 6605, "iter_tflops": 19.312740526900313, "iter_time": 1.0682633819580079, "loss": 0.5599151849746704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.50589400706506, "step_time": 1.0061055374145509} +{"epoch": 0, "iter": 6606, "iter_tflops": 16.21922110203161, "iter_time": 1.2720150604248048, "loss": 0.6865341663360596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.39966226009833, "step_time": 0.9210448474884032} +{"epoch": 0, "iter": 6607, "iter_tflops": 49.13038665746414, "iter_time": 0.4199253234863281, "loss": 0.7232663035392761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25262663528245, "step_time": 0.3874192657470703} +{"epoch": 0, "iter": 6608, "iter_tflops": 47.119982129638366, "iter_time": 0.43784170913696285, "loss": 0.6617478728294373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83822550509841, "step_time": 0.40581852149963377} +{"epoch": 0, "iter": 6609, "iter_tflops": 33.473438875530704, "iter_time": 0.6163422164916993, "loss": 0.48177483677864075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.86933920134368, "step_time": 0.5751735038757324} +{"epoch": 0, "iter": 6610, "iter_tflops": 7.843063205398036, "iter_time": 2.6304892578125, "loss": 0.5004656314849854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.63174540910365, "step_time": 2.141989082336426} +{"epoch": 0, "iter": 6611, "iter_tflops": 11.669112930886467, "iter_time": 1.7680087280273435, "loss": 0.39434245228767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.235023130379286, "step_time": 1.4493192825317385} +{"epoch": 0, "iter": 6612, "iter_tflops": 35.28381381346004, "iter_time": 0.5847183532714844, "loss": 0.33060887455940247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33781606103854, "step_time": 0.49908523178100594} +{"epoch": 0, "iter": 6613, "iter_tflops": 19.901850966379026, "iter_time": 0.7408557434082031, "loss": 0.5061115026473999, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 21.047169604528435, "step_time": 0.7005407791137696} +{"epoch": 0, "iter": 6614, "iter_tflops": 14.645552721382849, "iter_time": 1.0067493438720705, "loss": 0.5610250234603882, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 18.91499592881134, "step_time": 0.7795085258483886} +{"epoch": 0, "iter": 6615, "iter_tflops": 27.103300107718322, "iter_time": 0.5440075759887695, "loss": 0.5392704606056213, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.985679867982793, "step_time": 0.5086787910461426} +{"epoch": 0, "iter": 6616, "iter_tflops": 25.196943801495383, "iter_time": 0.5851662292480468, "loss": 0.3621833622455597, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.641636457885244, "step_time": 0.5534344940185547} +{"epoch": 0, "iter": 6617, "iter_tflops": 19.324410084541498, "iter_time": 0.6450605316162109, "loss": 0.007517086807638407, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 20.61598033629375, "step_time": 0.6046481437683106} +{"epoch": 0, "iter": 6618, "iter_tflops": 11.068221064943648, "iter_time": 1.1262346649169923, "loss": 0.004248919431120157, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 12.528621617247865, "step_time": 0.9949549617767334} +{"epoch": 0, "iter": 6619, "iter_tflops": 34.34896462837756, "iter_time": 0.3629050941467285, "loss": 0.024559386074543, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 37.922377450840386, "step_time": 0.3287086696624756} +{"epoch": 0, "iter": 6620, "iter_tflops": 32.8509957300733, "iter_time": 0.37945316314697264, "loss": 0.016014523804187775, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 36.197077021991376, "step_time": 0.34437626647949215} +{"epoch": 0, "iter": 6621, "iter_tflops": 25.258038016212573, "iter_time": 0.81681298828125, "loss": 0.20388224720954895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.55508446075273, "step_time": 0.7769168853759767} +{"epoch": 0, "iter": 6622, "iter_tflops": 15.773736327566532, "iter_time": 1.3079395446777344, "loss": 0.14546804130077362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.018438577874413, "step_time": 1.1449989643096923} +{"epoch": 0, "iter": 6623, "iter_tflops": 45.52987346403943, "iter_time": 0.45313311767578124, "loss": 0.21234208345413208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.673301273920416, "step_time": 0.41533566284179685} +{"epoch": 0, "iter": 6624, "iter_tflops": 51.732820341742865, "iter_time": 0.39880086517333985, "loss": 0.18157990276813507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.23994939786377, "step_time": 0.36684054183959963} +{"epoch": 0, "iter": 6625, "iter_tflops": 24.141721863974613, "iter_time": 0.8339668807983398, "loss": 0.04486077278852463, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 25.468461971063803, "step_time": 0.7905226669311524} +{"epoch": 0, "iter": 6626, "iter_tflops": 18.298084834167454, "iter_time": 1.1003007507324218, "loss": 0.04019828885793686, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 23.555681244829177, "step_time": 0.8547151012420654} +{"epoch": 0, "iter": 6627, "iter_tflops": 51.308205996002094, "iter_time": 0.39240110015869145, "loss": 0.04862958565354347, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 56.50736195763714, "step_time": 0.35629687499999996} +{"epoch": 0, "iter": 6628, "iter_tflops": 53.41753282636287, "iter_time": 0.3769061470031739, "loss": 0.024765143170952797, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 58.41966681876489, "step_time": 0.3446338806152344} +{"epoch": 0, "iter": 6629, "iter_tflops": 19.966439511253164, "iter_time": 1.033288558959961, "loss": 0.14568032324314117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.83099730999015, "step_time": 0.9904035415649413} +{"epoch": 0, "iter": 6630, "iter_tflops": 20.031618532551043, "iter_time": 1.0299264373779298, "loss": 0.24437583982944489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.5438636088371, "step_time": 0.8405805149078369} +{"epoch": 0, "iter": 6631, "iter_tflops": 44.6536308266612, "iter_time": 0.4620249938964844, "loss": 0.27835047245025635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.92310603042298, "step_time": 0.42170449066162113} +{"epoch": 0, "iter": 6632, "iter_tflops": 42.133092757129305, "iter_time": 0.4896648254394531, "loss": 0.3069555461406708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.063760308983426, "step_time": 0.4478812274932861} +{"epoch": 0, "iter": 6633, "iter_tflops": 20.213992871901407, "iter_time": 0.8347609558105468, "loss": 0.10461774468421936, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 21.894171266970385, "step_time": 0.7707006492614746} +{"epoch": 0, "iter": 6634, "iter_tflops": 18.990613591026897, "iter_time": 0.8885364303588867, "loss": 0.09116803109645844, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.46483587760063, "step_time": 0.6375951881408691} +{"epoch": 0, "iter": 6635, "iter_tflops": 41.61979566910887, "iter_time": 0.4054285163879395, "loss": 0.07883848994970322, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 45.43410491823805, "step_time": 0.3713917560577393} +{"epoch": 0, "iter": 6636, "iter_tflops": 37.39759480297504, "iter_time": 0.451201530456543, "loss": 0.07196167856454849, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 41.11756040552479, "step_time": 0.41038067054748534} +{"epoch": 0, "iter": 6637, "iter_tflops": 30.47523908232669, "iter_time": 0.676978889465332, "loss": 0.07291895151138306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18499746610501, "step_time": 0.6410158500671388} +{"epoch": 0, "iter": 6638, "iter_tflops": 12.671144425192765, "iter_time": 1.6281949615478517, "loss": 0.060424454510211945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.95153870067195, "step_time": 1.3798642349243164} +{"epoch": 0, "iter": 6639, "iter_tflops": 42.20748599895406, "iter_time": 0.48880176162719724, "loss": 0.16298459470272064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49790736981847, "step_time": 0.4436993980407715} +{"epoch": 0, "iter": 6640, "iter_tflops": 41.7208895353162, "iter_time": 0.4945027236938477, "loss": 0.11421502381563187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.970648597401755, "step_time": 0.4487883930206299} +{"epoch": 0, "iter": 6641, "iter_tflops": 17.59872447297709, "iter_time": 1.1723061828613281, "loss": 0.8720680475234985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.956156041659703, "step_time": 1.088358497619629} +{"epoch": 0, "iter": 6642, "iter_tflops": 22.15783334607713, "iter_time": 0.9310970611572265, "loss": 0.9489257335662842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.761601368098443, "step_time": 0.7431521415710449} +{"epoch": 0, "iter": 6643, "iter_tflops": 45.84301434378488, "iter_time": 0.4500378913879394, "loss": 0.8772529363632202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.40998211015211, "step_time": 0.4175490989685059} +{"epoch": 0, "iter": 6644, "iter_tflops": 42.43624669127289, "iter_time": 0.4861667823791504, "loss": 0.8872365355491638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.663477028542474, "step_time": 0.4518073272705079} +{"epoch": 0, "iter": 6645, "iter_tflops": 48.060306183086, "iter_time": 0.4292751159667969, "loss": 0.061197035014629364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94754191998432, "step_time": 0.3896515827178955} +{"epoch": 0, "iter": 6646, "iter_tflops": 12.871406527807677, "iter_time": 1.6028623962402344, "loss": 0.03416530787944794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.80999835439623, "step_time": 1.4939244003295897} +{"epoch": 0, "iter": 6647, "iter_tflops": 11.59652957063125, "iter_time": 1.7790747985839843, "loss": 0.05523549020290375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.501559079836202, "step_time": 1.4226810646057129} +{"epoch": 0, "iter": 6648, "iter_tflops": 20.801918505601524, "iter_time": 0.9917880172729493, "loss": 0.040552929043769836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.148312594685777, "step_time": 0.854349281311035} +{"epoch": 0, "iter": 6649, "iter_tflops": 16.56383723667343, "iter_time": 1.0187163619995117, "loss": 0.3398495614528656, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 17.674234504276942, "step_time": 0.9547147293090821} +{"epoch": 0, "iter": 6650, "iter_tflops": 13.89157716670236, "iter_time": 1.2146822357177736, "loss": 0.35005098581314087, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.001244056667726, "step_time": 0.9373714370727539} +{"epoch": 0, "iter": 6651, "iter_tflops": 25.4377799242385, "iter_time": 0.6633382339477539, "loss": 0.3020877540111542, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.393736021339095, "step_time": 0.6159748344421386} +{"epoch": 0, "iter": 6652, "iter_tflops": 25.983267325391857, "iter_time": 0.6494122467041016, "loss": 0.43417149782180786, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.968485117077304, "step_time": 0.6033166236877441} +{"epoch": 0, "iter": 6653, "iter_tflops": 22.919280999946125, "iter_time": 0.9001632080078126, "loss": 0.6666085720062256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.872285810693235, "step_time": 0.8294812011718751} +{"epoch": 0, "iter": 6654, "iter_tflops": 43.65556986530497, "iter_time": 0.4725878868103027, "loss": 0.6749359369277954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.274404815146944, "step_time": 0.43641149139404306} +{"epoch": 0, "iter": 6655, "iter_tflops": 45.09272587393663, "iter_time": 0.4575259780883789, "loss": 0.6866325736045837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.9895373313735, "step_time": 0.4211326465606689} +{"epoch": 0, "iter": 6656, "iter_tflops": 49.744693784909586, "iter_time": 0.4147395820617676, "loss": 0.6741048097610474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.849898434459256, "step_time": 0.3831222362518311} +{"epoch": 0, "iter": 6657, "iter_tflops": 39.62513087228911, "iter_time": 0.5206567916870117, "loss": 0.7777295112609863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.664844227852704, "step_time": 0.48356190872192384} +{"epoch": 0, "iter": 6658, "iter_tflops": 19.71226684167174, "iter_time": 1.0466119232177735, "loss": 0.5677380561828613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.73827974930593, "step_time": 0.8339744606018066} +{"epoch": 0, "iter": 6659, "iter_tflops": 48.10846642263186, "iter_time": 0.4288453788757324, "loss": 0.6982308626174927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.988090630372255, "step_time": 0.3968426856994629} +{"epoch": 0, "iter": 6660, "iter_tflops": 50.071661110546216, "iter_time": 0.41203133773803713, "loss": 0.5510407090187073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.314906145706324, "step_time": 0.3798422012329101} +{"epoch": 0, "iter": 6661, "iter_tflops": 33.58744965018309, "iter_time": 0.6142500762939453, "loss": 0.11634281277656555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.74990877217627, "step_time": 0.5770949974060058} +{"epoch": 0, "iter": 6662, "iter_tflops": 20.194707138678858, "iter_time": 1.0216089477539063, "loss": 0.0584593191742897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.005955796387155, "step_time": 0.896771848678589} +{"epoch": 0, "iter": 6663, "iter_tflops": 42.67021188235536, "iter_time": 0.48350107955932625, "loss": 0.09307993948459625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8761533945723, "step_time": 0.4401191654205322} +{"epoch": 0, "iter": 6664, "iter_tflops": 41.9683149988151, "iter_time": 0.4915873680114746, "loss": 0.09508228302001953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.198032193126295, "step_time": 0.44657948684692383} +{"epoch": 0, "iter": 6665, "iter_tflops": 34.68507115494901, "iter_time": 0.5948119125366211, "loss": 0.7041405439376831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17412535367008, "step_time": 0.5404470520019531} +{"epoch": 0, "iter": 6666, "iter_tflops": 35.22012639899113, "iter_time": 0.5857756805419921, "loss": 0.6995761394500732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.333448337253415, "step_time": 0.5382008247375488} +{"epoch": 0, "iter": 6667, "iter_tflops": 39.77232223788307, "iter_time": 0.5187299194335937, "loss": 0.8148948550224304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.624003700588965, "step_time": 0.4729298496246338} +{"epoch": 0, "iter": 6668, "iter_tflops": 39.14175177982973, "iter_time": 0.5270866165161133, "loss": 0.7651376724243164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.739535921105606, "step_time": 0.48271683502197266} +{"epoch": 0, "iter": 6669, "iter_tflops": 20.618766957104523, "iter_time": 1.0005978317260742, "loss": 0.9682157635688782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.253207736831524, "step_time": 0.9271064987182618} +{"epoch": 0, "iter": 6670, "iter_tflops": 15.121785742762157, "iter_time": 1.3643291778564453, "loss": 0.888359010219574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.81370885617387, "step_time": 1.0965989570617676} +{"epoch": 0, "iter": 6671, "iter_tflops": 44.32843809974508, "iter_time": 0.4654144020080566, "loss": 0.8404663801193237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.707699526940736, "step_time": 0.43244787979125976} +{"epoch": 0, "iter": 6672, "iter_tflops": 44.817479444674, "iter_time": 0.46033587265014647, "loss": 0.8553723096847534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08520042010548, "step_time": 0.42905287551879884} +{"epoch": 0, "iter": 6673, "iter_tflops": 39.32215217753555, "iter_time": 0.524668472290039, "loss": 0.8512585163116455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.498271916599826, "step_time": 0.4854572334289551} +{"epoch": 0, "iter": 6674, "iter_tflops": 34.41214798383238, "iter_time": 0.5995293731689453, "loss": 0.775181770324707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.8821280469964, "step_time": 0.5446128444671632} +{"epoch": 0, "iter": 6675, "iter_tflops": 36.38317670888939, "iter_time": 0.5670503616333007, "loss": 0.6375117301940918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.384998966191276, "step_time": 0.523831256866455} +{"epoch": 0, "iter": 6676, "iter_tflops": 39.78989649262339, "iter_time": 0.5185008087158203, "loss": 0.7137326598167419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21313294309635, "step_time": 0.4774264698028564} +{"epoch": 0, "iter": 6677, "iter_tflops": 32.73691746066299, "iter_time": 0.6302088012695313, "loss": 0.8085808157920837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.10767122427733, "step_time": 0.5713770179748535} +{"epoch": 0, "iter": 6678, "iter_tflops": 30.47823734230575, "iter_time": 0.6769122924804686, "loss": 0.76140296459198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.406391074429955, "step_time": 0.617579236984253} +{"epoch": 0, "iter": 6679, "iter_tflops": 38.09639121077097, "iter_time": 0.5415498123168946, "loss": 0.8074531555175781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75584914023893, "step_time": 0.4940887069702148} +{"epoch": 0, "iter": 6680, "iter_tflops": 41.69661444919234, "iter_time": 0.4947906150817871, "loss": 0.8188333511352539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.33614073251838, "step_time": 0.4550694694519043} +{"epoch": 0, "iter": 6681, "iter_tflops": 31.212146396155056, "iter_time": 0.6609956665039063, "loss": 0.49496203660964966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.014285898347346, "step_time": 0.6065420150756836} +{"epoch": 0, "iter": 6682, "iter_tflops": 10.000002324426243, "iter_time": 2.063108871459961, "loss": 0.7234355211257935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.871387494204349, "step_time": 1.7378839263916015} +{"epoch": 0, "iter": 6683, "iter_tflops": 10.220251131762499, "iter_time": 2.018648391723633, "loss": 0.6172308325767517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.138235520183803, "step_time": 1.6996781349182126} +{"epoch": 0, "iter": 6684, "iter_tflops": 46.29827487601614, "iter_time": 0.4456125755310058, "loss": 0.5545006990432739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.63822589352443, "step_time": 0.40742133331298824} +{"epoch": 0, "iter": 6685, "iter_tflops": 17.30657496449825, "iter_time": 0.9631335296630859, "loss": 0.4383142590522766, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 18.12432671258838, "step_time": 0.9196778945922851} +{"epoch": 0, "iter": 6686, "iter_tflops": 11.669620562231058, "iter_time": 1.428370574951172, "loss": 0.32898348569869995, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 13.264890871726701, "step_time": 1.2565910110473633} +{"epoch": 0, "iter": 6687, "iter_tflops": 28.562741667705225, "iter_time": 0.5835764236450195, "loss": 0.37324535846710205, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 30.322135222869186, "step_time": 0.54971533203125} +{"epoch": 0, "iter": 6688, "iter_tflops": 27.520020033237397, "iter_time": 0.6056878814697266, "loss": 0.29858559370040894, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 29.205459115448814, "step_time": 0.570733798980713} +{"epoch": 0, "iter": 6689, "iter_tflops": 30.42176980726977, "iter_time": 0.6781687469482423, "loss": 0.0056883604265749454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.41691985228792, "step_time": 0.6364297904968262} +{"epoch": 0, "iter": 6690, "iter_tflops": 43.87279755133411, "iter_time": 0.4702479591369629, "loss": 0.0215703547000885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.213261905705764, "step_time": 0.41921816825866703} +{"epoch": 0, "iter": 6691, "iter_tflops": 44.261886148898384, "iter_time": 0.4661141967773438, "loss": 0.0064964285120368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.968061624239134, "step_time": 0.42131734085083006} +{"epoch": 0, "iter": 6692, "iter_tflops": 42.2658870406296, "iter_time": 0.48812635803222654, "loss": 0.007048236206173897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.7587214206471, "step_time": 0.44122450065612795} +{"epoch": 0, "iter": 6693, "iter_tflops": 17.494347817101964, "iter_time": 1.179300521850586, "loss": 0.5052653551101685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.394793303950753, "step_time": 1.1215724563598632} +{"epoch": 0, "iter": 6694, "iter_tflops": 15.53079195430886, "iter_time": 1.3283993225097657, "loss": 0.6639542579650879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.420135428084322, "step_time": 1.1200294151306154} +{"epoch": 0, "iter": 6695, "iter_tflops": 49.229400085239696, "iter_time": 0.41908074188232425, "loss": 0.4916251301765442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.611417157328056, "step_time": 0.3848264904022217} +{"epoch": 0, "iter": 6696, "iter_tflops": 50.20814666754108, "iter_time": 0.4109112739562989, "loss": 0.5805140733718872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.64783785728694, "step_time": 0.37752808380126945} +{"epoch": 0, "iter": 6697, "iter_tflops": 41.170096758309214, "iter_time": 0.5011184120178224, "loss": 0.8119087815284729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80781734806578, "step_time": 0.4604351367950439} +{"epoch": 0, "iter": 6698, "iter_tflops": 44.870078977433664, "iter_time": 0.45979623794555663, "loss": 0.7436330914497375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.961962536711205, "step_time": 0.42136982345581053} +{"epoch": 0, "iter": 6699, "iter_tflops": 47.56753670181844, "iter_time": 0.4337221336364746, "loss": 0.7450276613235474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.404134471190645, "step_time": 0.4013508586883545} +{"epoch": 0, "iter": 6700, "iter_tflops": 42.61543987122187, "iter_time": 0.4841225051879883, "loss": 0.6588577628135681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08381183878263, "step_time": 0.4476863498687744} +{"epoch": 0, "iter": 6701, "iter_tflops": 31.380768266157116, "iter_time": 0.6574438629150391, "loss": 0.22949472069740295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.33970114538573, "step_time": 0.6188145904541015} +{"epoch": 0, "iter": 6702, "iter_tflops": 13.679999083340036, "iter_time": 1.5081209716796875, "loss": 0.19944971799850464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.34853248677568, "step_time": 1.2619538497924807} +{"epoch": 0, "iter": 6703, "iter_tflops": 37.81950732299608, "iter_time": 0.5455146026611328, "loss": 0.24054642021656036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.447902062570755, "step_time": 0.4977596569061279} +{"epoch": 0, "iter": 6704, "iter_tflops": 44.67493271107749, "iter_time": 0.4618046913146972, "loss": 0.20273377001285553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15327033537632, "step_time": 0.4197298240661621} +{"epoch": 0, "iter": 6705, "iter_tflops": 36.5658514239717, "iter_time": 0.5642175064086914, "loss": 0.264467716217041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.640066575009214, "step_time": 0.5204606170654297} +{"epoch": 0, "iter": 6706, "iter_tflops": 36.00242145427741, "iter_time": 0.5730473861694336, "loss": 0.28125807642936707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62042183605316, "step_time": 0.5207186737060546} +{"epoch": 0, "iter": 6707, "iter_tflops": 38.606879530113254, "iter_time": 0.5343890457153321, "loss": 0.3302503228187561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.403568392124235, "step_time": 0.486541446685791} +{"epoch": 0, "iter": 6708, "iter_tflops": 38.446840616680646, "iter_time": 0.5366134948730469, "loss": 0.32612839341163635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.795408674566126, "step_time": 0.49362104988098143} +{"epoch": 0, "iter": 6709, "iter_tflops": 13.154307721003871, "iter_time": 1.5683906707763673, "loss": 0.5030597448348999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.860523466496353, "step_time": 1.4884786682128905} +{"epoch": 0, "iter": 6710, "iter_tflops": 18.48830525081904, "iter_time": 1.115899658203125, "loss": 0.5988786220550537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.98814000293442, "step_time": 0.8256354217529296} +{"epoch": 0, "iter": 6711, "iter_tflops": 38.80466724104338, "iter_time": 0.5316652603149414, "loss": 0.578918993473053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.513007522192716, "step_time": 0.4852889671325683} +{"epoch": 0, "iter": 6712, "iter_tflops": 35.18488047330041, "iter_time": 0.5863624725341797, "loss": 0.655793309211731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6154203739652, "step_time": 0.5342708511352539} +{"epoch": 0, "iter": 6713, "iter_tflops": 16.37023482233269, "iter_time": 1.2602808532714842, "loss": 0.7900277972221375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.679932386083212, "step_time": 1.1669215164184568} +{"epoch": 0, "iter": 6714, "iter_tflops": 15.488680836138958, "iter_time": 1.332011016845703, "loss": 0.7454200983047485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.134664060189266, "step_time": 1.0246554622650146} +{"epoch": 0, "iter": 6715, "iter_tflops": 39.627112706195014, "iter_time": 0.5206307525634765, "loss": 0.7814093232154846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30992534103486, "step_time": 0.4763594799041748} +{"epoch": 0, "iter": 6716, "iter_tflops": 40.36428523216503, "iter_time": 0.5111224784851074, "loss": 0.5550209879875183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71924427170906, "step_time": 0.47189959144592286} +{"epoch": 0, "iter": 6717, "iter_tflops": 19.510375234948388, "iter_time": 1.057442169189453, "loss": 0.9280140399932861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.84012631151068, "step_time": 0.9899696960449219} +{"epoch": 0, "iter": 6718, "iter_tflops": 15.762382140478367, "iter_time": 1.3088816986083984, "loss": 0.884163498878479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.070059152686543, "step_time": 0.934800100326538} +{"epoch": 0, "iter": 6719, "iter_tflops": 45.635545535852856, "iter_time": 0.4520838584899903, "loss": 0.7567738890647888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35805486906585, "step_time": 0.41798838233947755} +{"epoch": 0, "iter": 6720, "iter_tflops": 44.998800231014606, "iter_time": 0.45848096847534175, "loss": 0.7511122226715088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12831381324613, "step_time": 0.42866852951049805} +{"epoch": 0, "iter": 6721, "iter_tflops": 27.220662642321113, "iter_time": 0.7579203262329102, "loss": 0.6908460855484009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.807514917178146, "step_time": 0.716170539855957} +{"epoch": 0, "iter": 6722, "iter_tflops": 15.847239330652334, "iter_time": 1.301873031616211, "loss": 0.6284741163253784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.386124589175754, "step_time": 1.0642195873260498} +{"epoch": 0, "iter": 6723, "iter_tflops": 38.994715928507546, "iter_time": 0.5290740814208985, "loss": 0.562032163143158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6749747614446, "step_time": 0.48344711685180664} +{"epoch": 0, "iter": 6724, "iter_tflops": 34.71652047185347, "iter_time": 0.594273078918457, "loss": 0.4430379867553711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.58472947781155, "step_time": 0.5489222297668457} +{"epoch": 0, "iter": 6725, "iter_tflops": 19.04609329627561, "iter_time": 1.0832191772460937, "loss": 0.6698633432388306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.37272384474201, "step_time": 1.0126821365356447} +{"epoch": 0, "iter": 6726, "iter_tflops": 42.610159507327445, "iter_time": 0.4841824989318847, "loss": 0.7347307801246643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1161039950164, "step_time": 0.4473728637695312} +{"epoch": 0, "iter": 6727, "iter_tflops": 44.66785761676731, "iter_time": 0.46187783813476563, "loss": 0.9129496216773987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.44459605593383, "step_time": 0.4258698635101318} +{"epoch": 0, "iter": 6728, "iter_tflops": 47.57049014962672, "iter_time": 0.4336952056884766, "loss": 0.7849446535110474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.46866884588532, "step_time": 0.4008476219177246} +{"epoch": 0, "iter": 6729, "iter_tflops": 44.118015660398854, "iter_time": 0.4676342124938965, "loss": 0.06958045810461044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98148918477208, "step_time": 0.42998026657104493} +{"epoch": 0, "iter": 6730, "iter_tflops": 11.879884095693756, "iter_time": 1.7366409759521484, "loss": 0.1071498915553093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.274010461905798, "step_time": 1.3507319221496583} +{"epoch": 0, "iter": 6731, "iter_tflops": 12.834607942498689, "iter_time": 1.6074580230712892, "loss": 0.07059874385595322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.22555399901254, "step_time": 1.3550307273864746} +{"epoch": 0, "iter": 6732, "iter_tflops": 16.970898114888595, "iter_time": 1.215674819946289, "loss": 0.08454173803329468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.094106895605925, "step_time": 1.0267235870361329} +{"epoch": 0, "iter": 6733, "iter_tflops": 17.640213786832412, "iter_time": 0.8752254257202148, "loss": 0.3970542550086975, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 18.54793539565537, "step_time": 0.8323925704956054} +{"epoch": 0, "iter": 6734, "iter_tflops": 11.15113782598723, "iter_time": 1.3845370635986327, "loss": 0.47430089116096497, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 13.523851866770833, "step_time": 1.1416247215270996} +{"epoch": 0, "iter": 6735, "iter_tflops": 24.042494858385922, "iter_time": 0.6421614608764649, "loss": 0.5317701101303101, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.900901125934254, "step_time": 0.5960859642028807} +{"epoch": 0, "iter": 6736, "iter_tflops": 25.584748466299725, "iter_time": 0.6034518432617186, "loss": 0.4238714575767517, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 27.367542688844743, "step_time": 0.5641413917541505} +{"epoch": 0, "iter": 6737, "iter_tflops": 28.109523080179667, "iter_time": 0.6853215408325196, "loss": 0.22343498468399048, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 30.412892291132792, "step_time": 0.6334176139831543} +{"epoch": 0, "iter": 6738, "iter_tflops": 8.342213457291958, "iter_time": 2.3092266540527344, "loss": 0.08099496364593506, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 10.377563893960914, "step_time": 1.8563182907104494} +{"epoch": 0, "iter": 6739, "iter_tflops": 14.767032841009351, "iter_time": 1.3045316467285157, "loss": 0.10768597573041916, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 17.596278678402122, "step_time": 1.0947804374694825} +{"epoch": 0, "iter": 6740, "iter_tflops": 39.54723307494917, "iter_time": 0.48711528396606446, "loss": 0.14737626910209656, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 43.297265874253284, "step_time": 0.44492559242248536} +{"epoch": 0, "iter": 6741, "iter_tflops": 16.77119417434073, "iter_time": 0.8913259811401367, "loss": 0.43179023265838623, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.762701125991484, "step_time": 0.8415725173950195} +{"epoch": 0, "iter": 6742, "iter_tflops": 8.2834438428556, "iter_time": 1.8046360168457034, "loss": 0.46094799041748047, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 9.627139084150151, "step_time": 1.5527563247680665} +{"epoch": 0, "iter": 6743, "iter_tflops": 11.12560043554627, "iter_time": 1.3436219635009765, "loss": 0.38340213894844055, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.21879975007676, "step_time": 1.1308591842651368} +{"epoch": 0, "iter": 6744, "iter_tflops": 23.4137857418995, "iter_time": 0.6384529724121093, "loss": 0.34236663579940796, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.926121031391606, "step_time": 0.5997163009643555} +{"epoch": 0, "iter": 6745, "iter_tflops": 14.437383020913362, "iter_time": 1.0977394409179688, "loss": 0.41085049510002136, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 15.152584142708422, "step_time": 1.0459262008666992} +{"epoch": 0, "iter": 6746, "iter_tflops": 18.817727041553404, "iter_time": 0.8422103652954102, "loss": 0.4203587770462036, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 23.71132620694291, "step_time": 0.6683930130004883} +{"epoch": 0, "iter": 6747, "iter_tflops": 24.063151665632407, "iter_time": 0.658620491027832, "loss": 0.4480957090854645, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.732388022918787, "step_time": 0.615896385192871} +{"epoch": 0, "iter": 6748, "iter_tflops": 24.789704974242238, "iter_time": 0.6393171997070313, "loss": 0.4621222913265228, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 26.5785173433416, "step_time": 0.5962892723083496} +{"epoch": 0, "iter": 6749, "iter_tflops": 25.98004035817759, "iter_time": 0.7941132202148438, "loss": 0.005788061767816544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.24054044211288, "step_time": 0.7305488204956054} +{"epoch": 0, "iter": 6750, "iter_tflops": 23.63808220319449, "iter_time": 0.8727904968261718, "loss": 0.008883940987288952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.599792527890173, "step_time": 0.7213721389770508} +{"epoch": 0, "iter": 6751, "iter_tflops": 47.47062698780421, "iter_time": 0.4346075630187988, "loss": 0.0022549270652234554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.073146915537606, "step_time": 0.39619448280334474} +{"epoch": 0, "iter": 6752, "iter_tflops": 52.09621827561591, "iter_time": 0.39601902389526367, "loss": 0.01423645205795765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.15516984433466, "step_time": 0.36096635818481443} +{"epoch": 0, "iter": 6753, "iter_tflops": 41.89060327784487, "iter_time": 0.4924993171691895, "loss": 0.4504077136516571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.46556585749819, "step_time": 0.45377404022216794} +{"epoch": 0, "iter": 6754, "iter_tflops": 35.45869448173663, "iter_time": 0.5818345489501953, "loss": 0.6160717606544495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74084564499686, "step_time": 0.5191407775878907} +{"epoch": 0, "iter": 6755, "iter_tflops": 41.53665050321596, "iter_time": 0.4966961288452148, "loss": 0.6386478543281555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.577142633920346, "step_time": 0.4526631622314453} +{"epoch": 0, "iter": 6756, "iter_tflops": 39.67786237498679, "iter_time": 0.51996484375, "loss": 0.43883731961250305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.3586941057422, "step_time": 0.4758236827850342} +{"epoch": 0, "iter": 6757, "iter_tflops": 21.036355679578374, "iter_time": 0.9807351531982422, "loss": 0.010579154826700687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.657979058473988, "step_time": 0.9105442924499512} +{"epoch": 0, "iter": 6758, "iter_tflops": 16.587117917928488, "iter_time": 1.2438021850585939, "loss": 0.02359604649245739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.76254356724712, "step_time": 1.0995893726348875} +{"epoch": 0, "iter": 6759, "iter_tflops": 43.78869184307044, "iter_time": 0.47115117263793943, "loss": 0.006400774233043194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65474403122328, "step_time": 0.424030460357666} +{"epoch": 0, "iter": 6760, "iter_tflops": 46.994687749341075, "iter_time": 0.4390090560913086, "loss": 0.007204934023320675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.56211923207374, "step_time": 0.4001211318969727} +{"epoch": 0, "iter": 6761, "iter_tflops": 14.976991664938076, "iter_time": 1.3775191955566406, "loss": 0.0006755923968739808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.922392410580082, "step_time": 1.295728240966797} +{"epoch": 0, "iter": 6762, "iter_tflops": 20.337521590795994, "iter_time": 1.0144349899291993, "loss": 0.0293301772326231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.892259346226282, "step_time": 0.7671759090423584} +{"epoch": 0, "iter": 6763, "iter_tflops": 52.887417813670446, "iter_time": 0.39009455108642577, "loss": 0.19202038645744324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.33202786019536, "step_time": 0.35985284805297846} +{"epoch": 0, "iter": 6764, "iter_tflops": 51.51409673801735, "iter_time": 0.4004941329956055, "loss": 0.2316364049911499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.131906693624984, "step_time": 0.3675466365814209} +{"epoch": 0, "iter": 6765, "iter_tflops": 33.97166058621262, "iter_time": 0.6073030624389649, "loss": 0.7645207643508911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.444449705120796, "step_time": 0.566096996307373} +{"epoch": 0, "iter": 6766, "iter_tflops": 13.755531869267669, "iter_time": 1.4998397521972655, "loss": 0.697483241558075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.611063691131317, "step_time": 1.108539192199707} +{"epoch": 0, "iter": 6767, "iter_tflops": 37.72783717734872, "iter_time": 0.5468400802612305, "loss": 0.7396439909934998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29973795123031, "step_time": 0.4995453853607178} +{"epoch": 0, "iter": 6768, "iter_tflops": 37.04483827611927, "iter_time": 0.5569222183227539, "loss": 0.7135879993438721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45179064435873, "step_time": 0.5100168170928955} +{"epoch": 0, "iter": 6769, "iter_tflops": 19.455540228225388, "iter_time": 1.0604225463867187, "loss": 0.0629788339138031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.84801565607662, "step_time": 0.9895950698852539} +{"epoch": 0, "iter": 6770, "iter_tflops": 24.525769502890604, "iter_time": 0.8412006607055664, "loss": 0.05304856598377228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.51541571127489, "step_time": 0.6546349792480469} +{"epoch": 0, "iter": 6771, "iter_tflops": 43.18636577461597, "iter_time": 0.47772238159179686, "loss": 0.024495180696249008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.668046242370664, "step_time": 0.4328076171875} +{"epoch": 0, "iter": 6772, "iter_tflops": 44.524983900759956, "iter_time": 0.4633599319458008, "loss": 0.04710494726896286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22273800432669, "step_time": 0.4191374626159668} +{"epoch": 0, "iter": 6773, "iter_tflops": 16.535979890600824, "iter_time": 1.2476486816406251, "loss": 0.11606505513191223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.554405733758177, "step_time": 1.1752658462524412} +{"epoch": 0, "iter": 6774, "iter_tflops": 12.947106506164502, "iter_time": 1.5934906768798829, "loss": 0.10320239514112473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.254016083815188, "step_time": 1.2692920570373536} +{"epoch": 0, "iter": 6775, "iter_tflops": 40.670695133764596, "iter_time": 0.5072717208862305, "loss": 0.09308251738548279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62875150338877, "step_time": 0.46228256034851073} +{"epoch": 0, "iter": 6776, "iter_tflops": 42.758184566219406, "iter_time": 0.48250630187988286, "loss": 0.13891074061393738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.86476445556069, "step_time": 0.44022612190246585} +{"epoch": 0, "iter": 6777, "iter_tflops": 28.420327154067856, "iter_time": 0.7259273757934571, "loss": 0.17473800480365753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.910913847846533, "step_time": 0.6674371910095215} +{"epoch": 0, "iter": 6778, "iter_tflops": 45.26799648493199, "iter_time": 0.455754508972168, "loss": 0.16248318552970886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77713592654407, "step_time": 0.4144692764282227} +{"epoch": 0, "iter": 6779, "iter_tflops": 49.12788253060051, "iter_time": 0.41994672775268554, "loss": 0.11049163341522217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.67102964093678, "step_time": 0.3843990631103516} +{"epoch": 0, "iter": 6780, "iter_tflops": 48.300828064306444, "iter_time": 0.42713747024536136, "loss": 0.12698456645011902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.58302451448924, "step_time": 0.39235273551940925} +{"epoch": 0, "iter": 6781, "iter_tflops": 36.05194877694135, "iter_time": 0.5722601470947266, "loss": 0.07629312574863434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.678532630334026, "step_time": 0.5333990745544434} +{"epoch": 0, "iter": 6782, "iter_tflops": 14.760996259003532, "iter_time": 1.3976762237548828, "loss": 0.0520276241004467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.39086634607815, "step_time": 1.1218119430541993} +{"epoch": 0, "iter": 6783, "iter_tflops": 50.78241863276372, "iter_time": 0.4062644920349121, "loss": 0.1227651983499527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.313786598405244, "step_time": 0.37298284530639647} +{"epoch": 0, "iter": 6784, "iter_tflops": 51.59223808965527, "iter_time": 0.39988754653930664, "loss": 0.07002624869346619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.29507129894889, "step_time": 0.3664813461303711} +{"epoch": 0, "iter": 6785, "iter_tflops": 35.13819106553463, "iter_time": 0.5871415939331055, "loss": 0.7424914240837097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.593101255546955, "step_time": 0.5487999877929688} +{"epoch": 0, "iter": 6786, "iter_tflops": 28.465573199753372, "iter_time": 0.7247735137939453, "loss": 0.8172658681869507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93157861833718, "step_time": 0.5906144046783448} +{"epoch": 0, "iter": 6787, "iter_tflops": 44.83608982936567, "iter_time": 0.4601447982788086, "loss": 0.7326770424842834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.18694390272702, "step_time": 0.42814695930480956} +{"epoch": 0, "iter": 6788, "iter_tflops": 46.72785395910081, "iter_time": 0.44151596450805664, "loss": 0.7155221700668335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.628715267766715, "step_time": 0.4074978675842285} +{"epoch": 0, "iter": 6789, "iter_tflops": 27.061576460299886, "iter_time": 0.7623758926391602, "loss": 0.8548223376274109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.472920304598944, "step_time": 0.7245864944458007} +{"epoch": 0, "iter": 6790, "iter_tflops": 15.302470833705943, "iter_time": 1.3482197570800782, "loss": 0.9553322792053223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.607253581578398, "step_time": 1.1087661819458008} +{"epoch": 0, "iter": 6791, "iter_tflops": 37.90425127386478, "iter_time": 0.5442949752807617, "loss": 0.833404004573822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47952444953824, "step_time": 0.497380184173584} +{"epoch": 0, "iter": 6792, "iter_tflops": 37.90487928644433, "iter_time": 0.5442859573364258, "loss": 0.835444986820221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33734386336185, "step_time": 0.49909093284606937} +{"epoch": 0, "iter": 6793, "iter_tflops": 23.782923903732193, "iter_time": 0.8674750671386718, "loss": 0.4216342270374298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.856683918280464, "step_time": 0.797901756286621} +{"epoch": 0, "iter": 6794, "iter_tflops": 39.0683825256397, "iter_time": 0.5280764694213866, "loss": 0.38706621527671814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14924957678588, "step_time": 0.4673033790588379} +{"epoch": 0, "iter": 6795, "iter_tflops": 47.05687915429056, "iter_time": 0.4384288520812988, "loss": 0.39379245042800903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85888331733922, "step_time": 0.4056536865234375} +{"epoch": 0, "iter": 6796, "iter_tflops": 45.65877655457082, "iter_time": 0.45185383987426764, "loss": 0.4534379541873932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42117635201698, "step_time": 0.4174545211791992} +{"epoch": 0, "iter": 6797, "iter_tflops": 19.53208392920559, "iter_time": 1.0053396759033204, "loss": 0.05712514370679855, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 20.413398157692182, "step_time": 0.961935821533203} +{"epoch": 0, "iter": 6798, "iter_tflops": 18.146652709985517, "iter_time": 1.082093719482422, "loss": 0.04246576130390167, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 22.209768190670076, "step_time": 0.88413254737854} +{"epoch": 0, "iter": 6799, "iter_tflops": 43.82622364729323, "iter_time": 0.4480508995056153, "loss": 0.053053174167871475, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 47.90650363354225, "step_time": 0.409889627456665} +{"epoch": 0, "iter": 6800, "iter_tflops": 41.60780008977604, "iter_time": 0.47193984985351567, "loss": 0.09330607205629349, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 45.80647371649506, "step_time": 0.42868130493164064} +{"epoch": 0, "iter": 6801, "iter_tflops": 22.74254878786549, "iter_time": 0.9071583709716797, "loss": 0.7457898855209351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.308049488442624, "step_time": 0.8487350463867189} +{"epoch": 0, "iter": 6802, "iter_tflops": 12.092339198549958, "iter_time": 1.7061292419433594, "loss": 0.622821569442749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.17280540370419, "step_time": 1.359741521835327} +{"epoch": 0, "iter": 6803, "iter_tflops": 35.92752706505452, "iter_time": 0.5742419586181641, "loss": 0.714766800403595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.344499144459704, "step_time": 0.5243704700469971} +{"epoch": 0, "iter": 6804, "iter_tflops": 40.027273915852525, "iter_time": 0.515425895690918, "loss": 0.6855177283287048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.400491509193266, "step_time": 0.4753654346466064} +{"epoch": 0, "iter": 6805, "iter_tflops": 18.80475957085993, "iter_time": 1.0971208343505858, "loss": 0.8742000460624695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.092928416534505, "step_time": 1.0267838058471679} +{"epoch": 0, "iter": 6806, "iter_tflops": 28.741694624614052, "iter_time": 0.7178106155395507, "loss": 0.8743466138839722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.20420050307899, "step_time": 0.5131576614379884} +{"epoch": 0, "iter": 6807, "iter_tflops": 42.127251973733976, "iter_time": 0.48973271560668946, "loss": 0.8316680788993835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25594700146062, "step_time": 0.4558758544921875} +{"epoch": 0, "iter": 6808, "iter_tflops": 40.657601549336526, "iter_time": 0.5074350852966308, "loss": 1.0375759601593018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46697567389201, "step_time": 0.4746383476257324} +{"epoch": 0, "iter": 6809, "iter_tflops": 46.77185938626287, "iter_time": 0.44110056304931644, "loss": 0.0652787834405899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.357401512833896, "step_time": 0.4017160701751709} +{"epoch": 0, "iter": 6810, "iter_tflops": 10.181747797626278, "iter_time": 2.0262821197509764, "loss": 0.08273283392190933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.120628827468197, "step_time": 1.572416519165039} +{"epoch": 0, "iter": 6811, "iter_tflops": 10.442589147355848, "iter_time": 1.9756684112548828, "loss": 0.07904808223247528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.074352430564696, "step_time": 1.7086708068847656} +{"epoch": 0, "iter": 6812, "iter_tflops": 16.7314228070732, "iter_time": 1.2330746612548829, "loss": 0.08518928289413452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.61632886137925, "step_time": 0.91222114944458} +{"epoch": 0, "iter": 6813, "iter_tflops": 14.369257656098283, "iter_time": 1.102943878173828, "loss": 0.40224602818489075, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 15.416563290244511, "step_time": 1.028016716003418} +{"epoch": 0, "iter": 6814, "iter_tflops": 10.07138796287985, "iter_time": 1.573614761352539, "loss": 0.3612702786922455, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 11.886597015331443, "step_time": 1.3333071479797363} +{"epoch": 0, "iter": 6815, "iter_tflops": 24.324452695045, "iter_time": 0.6515453796386719, "loss": 0.33002448081970215, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 26.31735897702792, "step_time": 0.6022065048217774} +{"epoch": 0, "iter": 6816, "iter_tflops": 24.49083735723844, "iter_time": 0.6471189422607422, "loss": 0.5024534463882446, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 26.344594243347753, "step_time": 0.6015839385986328} +{"epoch": 0, "iter": 6817, "iter_tflops": 17.696037148513753, "iter_time": 1.1658595275878907, "loss": 0.5419081449508667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.934762183726335, "step_time": 1.0895882034301758} +{"epoch": 0, "iter": 6818, "iter_tflops": 14.892856364294083, "iter_time": 1.385301315307617, "loss": 0.6040346622467041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.901111510002817, "step_time": 1.0366804637908935} +{"epoch": 0, "iter": 6819, "iter_tflops": 37.013903990384776, "iter_time": 0.5573876647949219, "loss": 0.4474843442440033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.284809530945715, "step_time": 0.5121308441162109} +{"epoch": 0, "iter": 6820, "iter_tflops": 41.176487701295684, "iter_time": 0.5010406341552734, "loss": 0.5074027180671692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.74934915463602, "step_time": 0.4610367279052735} +{"epoch": 0, "iter": 6821, "iter_tflops": 25.246622466336134, "iter_time": 0.8171823196411133, "loss": 0.3638470768928528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.236069306829688, "step_time": 0.7574915924072266} +{"epoch": 0, "iter": 6822, "iter_tflops": 17.854869456616793, "iter_time": 1.1554883422851563, "loss": 0.29977577924728394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.11576579840896, "step_time": 0.9770468997955323} +{"epoch": 0, "iter": 6823, "iter_tflops": 48.45869080009645, "iter_time": 0.4257459945678711, "loss": 0.32686468958854675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62146421026279, "step_time": 0.3920661239624023} +{"epoch": 0, "iter": 6824, "iter_tflops": 40.58697461449621, "iter_time": 0.5083180923461914, "loss": 0.20910081267356873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.120429034761266, "step_time": 0.46760863304138184} +{"epoch": 0, "iter": 6825, "iter_tflops": 40.142883184853886, "iter_time": 0.5139414978027345, "loss": 0.32122454047203064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.586693392063104, "step_time": 0.4733346786499023} +{"epoch": 0, "iter": 6826, "iter_tflops": 28.133249525600185, "iter_time": 0.7333348922729491, "loss": 0.28829264640808105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.16625330931057, "step_time": 0.6038441886901855} +{"epoch": 0, "iter": 6827, "iter_tflops": 52.651797791997986, "iter_time": 0.3918402481079102, "loss": 0.26645374298095703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.21549921267916, "step_time": 0.3605857467651367} +{"epoch": 0, "iter": 6828, "iter_tflops": 51.96934558485387, "iter_time": 0.39698582458496084, "loss": 0.30390921235084534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.13601332665475, "step_time": 0.3675197486877441} +{"epoch": 0, "iter": 6829, "iter_tflops": 27.764749494481688, "iter_time": 0.7430678787231446, "loss": 0.7750826478004456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.257427091222514, "step_time": 0.7051574783325195} +{"epoch": 0, "iter": 6830, "iter_tflops": 17.797592699360933, "iter_time": 1.1592069702148438, "loss": 0.938964307308197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.424485777837948, "step_time": 1.0101156883239746} +{"epoch": 0, "iter": 6831, "iter_tflops": 38.40845291694152, "iter_time": 0.5371498184204101, "loss": 0.8192679286003113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03740906303738, "step_time": 0.4907793788909912} +{"epoch": 0, "iter": 6832, "iter_tflops": 38.82109950753495, "iter_time": 0.5314402160644531, "loss": 0.6273751258850098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56327789405979, "step_time": 0.4847158050537109} +{"epoch": 0, "iter": 6833, "iter_tflops": 18.623300727401034, "iter_time": 1.1078107910156252, "loss": 0.48535245656967163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.910109573773166, "step_time": 1.0362119522094728} +{"epoch": 0, "iter": 6834, "iter_tflops": 24.751340180896193, "iter_time": 0.8335344009399414, "loss": 0.45385491847991943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.707750467942752, "step_time": 0.6944683856964112} +{"epoch": 0, "iter": 6835, "iter_tflops": 43.8547953128072, "iter_time": 0.4704409942626953, "loss": 0.3827938437461853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43466121488212, "step_time": 0.434937089920044} +{"epoch": 0, "iter": 6836, "iter_tflops": 49.48635733618631, "iter_time": 0.41690467071533205, "loss": 0.3001587986946106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.78062388011322, "step_time": 0.3836157341003419} +{"epoch": 0, "iter": 6837, "iter_tflops": 35.24126276893452, "iter_time": 0.5854243545532227, "loss": 0.1896795928478241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.66761507006205, "step_time": 0.54771435546875} +{"epoch": 0, "iter": 6838, "iter_tflops": 14.634371186129174, "iter_time": 1.4097697296142577, "loss": 0.24177323281764984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.45931503608838, "step_time": 1.1176521701812745} +{"epoch": 0, "iter": 6839, "iter_tflops": 36.94910163082856, "iter_time": 0.5583652267456055, "loss": 0.18813061714172363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.618044311799935, "step_time": 0.5079292678833007} +{"epoch": 0, "iter": 6840, "iter_tflops": 35.37296554185444, "iter_time": 0.5832446670532226, "loss": 0.16735556721687317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.708006967198365, "step_time": 0.5329929161071777} +{"epoch": 0, "iter": 6841, "iter_tflops": 18.00062093546397, "iter_time": 1.14613232421875, "loss": 0.2915840148925781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.941054006829905, "step_time": 1.0892262649536133} +{"epoch": 0, "iter": 6842, "iter_tflops": 8.310921455735222, "iter_time": 2.482407470703125, "loss": 0.24166862666606903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.768836069243406, "step_time": 1.915814613342285} +{"epoch": 0, "iter": 6843, "iter_tflops": 13.821698876413937, "iter_time": 1.4926597442626952, "loss": 0.4317933917045593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.174190268624212, "step_time": 1.1351863937377928} +{"epoch": 0, "iter": 6844, "iter_tflops": 41.13832041382046, "iter_time": 0.5015054893493652, "loss": 0.3388466238975525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.933674872280776, "step_time": 0.4591454753875732} +{"epoch": 0, "iter": 6845, "iter_tflops": 16.01378068910183, "iter_time": 1.002469223022461, "loss": 0.45331770181655884, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 17.034514174399696, "step_time": 0.942399772644043} +{"epoch": 0, "iter": 6846, "iter_tflops": 7.244105224133083, "iter_time": 2.216053161621094, "loss": 0.2447793036699295, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 8.11653095345518, "step_time": 1.9778551177978514} +{"epoch": 0, "iter": 6847, "iter_tflops": 7.424349568418255, "iter_time": 2.162253021240234, "loss": 0.5236475467681885, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 9.128766813123487, "step_time": 1.7585422668457031} +{"epoch": 0, "iter": 6848, "iter_tflops": 27.894591107514966, "iter_time": 0.5754994659423828, "loss": 0.3069377839565277, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.843450584255383, "step_time": 0.537917766571045} +{"epoch": 0, "iter": 6849, "iter_tflops": 24.95582442137734, "iter_time": 0.6121042938232422, "loss": 0.3006364703178406, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.904227816392684, "step_time": 0.5677757186889649} +{"epoch": 0, "iter": 6850, "iter_tflops": 23.041977767575602, "iter_time": 0.6629451446533203, "loss": 0.2502404451370239, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.82043335014526, "step_time": 0.6154432144165041} +{"epoch": 0, "iter": 6851, "iter_tflops": 21.638979670318168, "iter_time": 0.7059282608032227, "loss": 0.47334250807762146, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.164863030809116, "step_time": 0.6594283447265625} +{"epoch": 0, "iter": 6852, "iter_tflops": 21.712586515593266, "iter_time": 0.7035351257324219, "loss": 0.47661229968070984, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.265576700057846, "step_time": 0.6565737648010255} +{"epoch": 0, "iter": 6853, "iter_tflops": 16.329821197994537, "iter_time": 1.2633998413085938, "loss": 0.6112297773361206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.467375944526328, "step_time": 1.181121513366699} +{"epoch": 0, "iter": 6854, "iter_tflops": 29.10216666973128, "iter_time": 0.7089195022583008, "loss": 0.5890076756477356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.23131744495177, "step_time": 0.5694270858764648} +{"epoch": 0, "iter": 6855, "iter_tflops": 42.82161829205572, "iter_time": 0.4817915420532226, "loss": 0.6015482544898987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.679292327209225, "step_time": 0.441975284576416} +{"epoch": 0, "iter": 6856, "iter_tflops": 42.527830599472686, "iter_time": 0.4851198196411133, "loss": 0.6000749468803406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.241762105868474, "step_time": 0.4461571655273437} +{"epoch": 0, "iter": 6857, "iter_tflops": 16.955308345736892, "iter_time": 1.2167925872802736, "loss": 0.08345787972211838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.901290164815236, "step_time": 1.1524919891357421} +{"epoch": 0, "iter": 6858, "iter_tflops": 18.281769769425082, "iter_time": 1.128506362915039, "loss": 0.07916249334812164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.57794584470296, "step_time": 0.9137719459533692} +{"epoch": 0, "iter": 6859, "iter_tflops": 47.98988637999164, "iter_time": 0.429905029296875, "loss": 0.15708523988723755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96404544201627, "step_time": 0.39702631568908686} +{"epoch": 0, "iter": 6860, "iter_tflops": 48.72300144330891, "iter_time": 0.4234364242553711, "loss": 0.1517234891653061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.49869618454472, "step_time": 0.39298296928405757} +{"epoch": 0, "iter": 6861, "iter_tflops": 33.22144059371677, "iter_time": 0.6210174255371094, "loss": 0.32461974024772644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.46731875173796, "step_time": 0.5816930694580077} +{"epoch": 0, "iter": 6862, "iter_tflops": 13.515185721948225, "iter_time": 1.5265120239257814, "loss": 0.38380593061447144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.93180610326108, "step_time": 1.2184815597534182} +{"epoch": 0, "iter": 6863, "iter_tflops": 48.365232381466605, "iter_time": 0.4265686836242676, "loss": 0.47024011611938477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.537746535744915, "step_time": 0.3926908721923828} +{"epoch": 0, "iter": 6864, "iter_tflops": 47.33007966698234, "iter_time": 0.435898136138916, "loss": 0.41214001178741455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.07963576674263, "step_time": 0.40390056037902833} +{"epoch": 0, "iter": 6865, "iter_tflops": 15.29932325065858, "iter_time": 1.0466048126220704, "loss": 0.21572521328926086, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 15.976464363967498, "step_time": 1.002245864868164} +{"epoch": 0, "iter": 6866, "iter_tflops": 11.99443987350983, "iter_time": 1.334980667114258, "loss": 0.23189446330070496, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 15.338577312190063, "step_time": 1.0439263706207276} +{"epoch": 0, "iter": 6867, "iter_tflops": 30.188636482358284, "iter_time": 0.5304096908569337, "loss": 0.16277365386486053, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 33.4280056170101, "step_time": 0.4790098915100098} +{"epoch": 0, "iter": 6868, "iter_tflops": 33.306137413302814, "iter_time": 0.48076260375976565, "loss": 0.1459331065416336, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 36.54784717380244, "step_time": 0.43812006950378424} +{"epoch": 0, "iter": 6869, "iter_tflops": 21.700747026538917, "iter_time": 0.9507089080810548, "loss": 0.6143048405647278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.061629736267324, "step_time": 0.8946069183349609} +{"epoch": 0, "iter": 6870, "iter_tflops": 11.036713712584676, "iter_time": 1.869314910888672, "loss": 0.6169564723968506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.30467257245489, "step_time": 1.676687728881836} +{"epoch": 0, "iter": 6871, "iter_tflops": 8.830869950059645, "iter_time": 2.3362470092773435, "loss": 0.8375301361083984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.599869251236115, "step_time": 1.9463535842895507} +{"epoch": 0, "iter": 6872, "iter_tflops": 32.725831456145, "iter_time": 0.6304222869873047, "loss": 0.9792694449424744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.58502902952472, "step_time": 0.5211842460632324} +{"epoch": 0, "iter": 6873, "iter_tflops": 17.26825311783911, "iter_time": 0.9367642059326172, "loss": 0.3274599313735962, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 18.045608908573456, "step_time": 0.8964109497070312} +{"epoch": 0, "iter": 6874, "iter_tflops": 8.719217921994945, "iter_time": 1.8552445373535156, "loss": 0.3938549757003784, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 10.818884775080148, "step_time": 1.4951893615722658} +{"epoch": 0, "iter": 6875, "iter_tflops": 24.137577018864516, "iter_time": 0.6701700592041016, "loss": 0.36094480752944946, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.999228336737584, "step_time": 0.622183135986328} +{"epoch": 0, "iter": 6876, "iter_tflops": 23.660282189724324, "iter_time": 0.6836892852783203, "loss": 0.3248409628868103, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.339307483800845, "step_time": 0.6383868789672852} +{"epoch": 0, "iter": 6877, "iter_tflops": 18.734457780272695, "iter_time": 1.101237823486328, "loss": 0.36323902010917664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.208162563106697, "step_time": 1.0209287185668945} +{"epoch": 0, "iter": 6878, "iter_tflops": 19.454599493207176, "iter_time": 1.0604738235473632, "loss": 0.3592178225517273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.925757078633122, "step_time": 0.7957759323120117} +{"epoch": 0, "iter": 6879, "iter_tflops": 39.95122880288551, "iter_time": 0.5164069824218751, "loss": 0.2519693672657013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04787376863876, "step_time": 0.46837887382507326} +{"epoch": 0, "iter": 6880, "iter_tflops": 43.3435247453937, "iter_time": 0.4759902114868164, "loss": 0.2987709641456604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.40500256744373, "step_time": 0.4352092056274415} +{"epoch": 0, "iter": 6881, "iter_tflops": 26.796466476486955, "iter_time": 0.7699184341430664, "loss": 0.2074531614780426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.292768197147947, "step_time": 0.7043067207336426} +{"epoch": 0, "iter": 6882, "iter_tflops": 37.34042222885731, "iter_time": 0.5525136642456054, "loss": 0.11493541300296783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.318150714086066, "step_time": 0.49932277107238765} +{"epoch": 0, "iter": 6883, "iter_tflops": 42.70916696278112, "iter_time": 0.4830600776672363, "loss": 0.10204026848077774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99677983304375, "step_time": 0.4389895133972168} +{"epoch": 0, "iter": 6884, "iter_tflops": 41.46881639541513, "iter_time": 0.4975086174011231, "loss": 0.07315682619810104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64344804724571, "step_time": 0.45200558662414553} +{"epoch": 0, "iter": 6885, "iter_tflops": 19.736656694169383, "iter_time": 1.0453185577392579, "loss": 0.5578847527503967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.76400216569959, "step_time": 0.9935990829467772} +{"epoch": 0, "iter": 6886, "iter_tflops": 18.4867109238004, "iter_time": 1.1159958953857423, "loss": 0.6073644757270813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.974954159171887, "step_time": 0.9388458042144776} +{"epoch": 0, "iter": 6887, "iter_tflops": 43.50749606032648, "iter_time": 0.4741962966918945, "loss": 0.4400715231895447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56988574871423, "step_time": 0.43370071601867677} +{"epoch": 0, "iter": 6888, "iter_tflops": 42.686186504003885, "iter_time": 0.4833201370239258, "loss": 0.5825600624084473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.63386139839915, "step_time": 0.4424058589935303} +{"epoch": 0, "iter": 6889, "iter_tflops": 22.369487088261657, "iter_time": 0.9222872848510744, "loss": 0.2905536890029907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.001897641291976, "step_time": 0.8595609321594239} +{"epoch": 0, "iter": 6890, "iter_tflops": 18.742210646547775, "iter_time": 1.1007822875976563, "loss": 0.2920699417591095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.669256954242904, "step_time": 0.9100912990570068} +{"epoch": 0, "iter": 6891, "iter_tflops": 40.57516952041885, "iter_time": 0.5084659843444824, "loss": 0.44830214977264404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55027607151545, "step_time": 0.4630968723297119} +{"epoch": 0, "iter": 6892, "iter_tflops": 41.005111388444405, "iter_time": 0.5031346778869629, "loss": 0.35930687189102173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.98025974520078, "step_time": 0.45866995048522957} +{"epoch": 0, "iter": 6893, "iter_tflops": 19.319182304736042, "iter_time": 1.0679071807861327, "loss": 0.35082533955574036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.424888342778726, "step_time": 1.0100957794189453} +{"epoch": 0, "iter": 6894, "iter_tflops": 14.909658881518746, "iter_time": 1.3837401428222655, "loss": 0.2955264449119568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.733040761199852, "step_time": 1.1634267234802245} +{"epoch": 0, "iter": 6895, "iter_tflops": 48.56634010374529, "iter_time": 0.42480231094360354, "loss": 0.29969051480293274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.749182035753755, "step_time": 0.39111684226989746} +{"epoch": 0, "iter": 6896, "iter_tflops": 49.368704407013695, "iter_time": 0.41789821624755863, "loss": 0.3040497899055481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.603622364652146, "step_time": 0.3848824501037597} +{"epoch": 0, "iter": 6897, "iter_tflops": 42.5466972328537, "iter_time": 0.48490470123291013, "loss": 0.5575785040855408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02102825794371, "step_time": 0.4482971000671387} +{"epoch": 0, "iter": 6898, "iter_tflops": 44.78168839416999, "iter_time": 0.4607037887573242, "loss": 0.778368353843689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.88186606204929, "step_time": 0.42206026840209965} +{"epoch": 0, "iter": 6899, "iter_tflops": 45.78918616634071, "iter_time": 0.45056694030761724, "loss": 0.5625153183937073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50019658536957, "step_time": 0.41678811264038085} +{"epoch": 0, "iter": 6900, "iter_tflops": 46.321697309679195, "iter_time": 0.4453872528076172, "loss": 0.6623334884643555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64119659877519, "step_time": 0.41560427474975586} +{"epoch": 0, "iter": 6901, "iter_tflops": 33.059037525754434, "iter_time": 0.6240681838989258, "loss": 0.6901909112930298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.373834999084124, "step_time": 0.5832303314208984} +{"epoch": 0, "iter": 6902, "iter_tflops": 30.822345474549532, "iter_time": 0.6693550796508789, "loss": 0.8563084602355957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.20259248040615, "step_time": 0.6032026233673096} +{"epoch": 0, "iter": 6903, "iter_tflops": 44.07408449080865, "iter_time": 0.4681003303527832, "loss": 0.6426408886909485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72497507147938, "step_time": 0.43229134178161627} +{"epoch": 0, "iter": 6904, "iter_tflops": 42.38484506198228, "iter_time": 0.4867563743591309, "loss": 1.008955717086792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.628657407271646, "step_time": 0.4521521053314209} +{"epoch": 0, "iter": 6905, "iter_tflops": 37.912400583390436, "iter_time": 0.544177978515625, "loss": 0.4378145933151245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93979488128165, "step_time": 0.5039373931884765} +{"epoch": 0, "iter": 6906, "iter_tflops": 43.82417819475427, "iter_time": 0.470769660949707, "loss": 0.5803396701812744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72132846514221, "step_time": 0.4323243751525878} +{"epoch": 0, "iter": 6907, "iter_tflops": 48.2410047769278, "iter_time": 0.4276671600341797, "loss": 0.457460880279541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33003416045227, "step_time": 0.39424957084655765} +{"epoch": 0, "iter": 6908, "iter_tflops": 49.23956268222887, "iter_time": 0.4189942474365234, "loss": 0.447927862405777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.52718767575169, "step_time": 0.3854320468902588} +{"epoch": 0, "iter": 6909, "iter_tflops": 24.445185950158315, "iter_time": 0.8439736785888673, "loss": 0.1672547161579132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.717288487920502, "step_time": 0.8022266235351563} +{"epoch": 0, "iter": 6910, "iter_tflops": 13.157220751222686, "iter_time": 1.5680434265136718, "loss": 0.22093120217323303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.707369291527264, "step_time": 1.2348499126434325} +{"epoch": 0, "iter": 6911, "iter_tflops": 38.99529174714519, "iter_time": 0.5290662689208985, "loss": 0.23267927765846252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00483054044184, "step_time": 0.4797389793395996} +{"epoch": 0, "iter": 6912, "iter_tflops": 44.784055701232944, "iter_time": 0.4606794357299805, "loss": 0.1487203985452652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94339910975924, "step_time": 0.4215296421051026} +{"epoch": 0, "iter": 6913, "iter_tflops": 24.988067267677543, "iter_time": 0.825637825012207, "loss": 1.196310043334961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.62585727348283, "step_time": 0.7748518028259277} +{"epoch": 0, "iter": 6914, "iter_tflops": 19.21682918356802, "iter_time": 1.0735950927734375, "loss": 0.8048017024993896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.34254451037795, "step_time": 0.8838408126831054} +{"epoch": 0, "iter": 6915, "iter_tflops": 44.3016488135808, "iter_time": 0.46569583892822264, "loss": 0.8076441287994385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.882179646717105, "step_time": 0.43087206268310546} +{"epoch": 0, "iter": 6916, "iter_tflops": 46.83978103620864, "iter_time": 0.4404609298706054, "loss": 1.1499863862991333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.63125386608857, "step_time": 0.4074774360656739} +{"epoch": 0, "iter": 6917, "iter_tflops": 39.44089581005906, "iter_time": 0.5230888671875, "loss": 0.22411979734897614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.719809865057094, "step_time": 0.4829397315979004} +{"epoch": 0, "iter": 6918, "iter_tflops": 47.48043315735255, "iter_time": 0.4345178031921386, "loss": 0.33775731921195984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.2185811351835, "step_time": 0.39509103965759274} +{"epoch": 0, "iter": 6919, "iter_tflops": 51.237876585631255, "iter_time": 0.4026531715393067, "loss": 0.2636558711528778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.61025843894056, "step_time": 0.37099438285827635} +{"epoch": 0, "iter": 6920, "iter_tflops": 51.71189780710046, "iter_time": 0.39896221923828123, "loss": 0.35999780893325806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.04978704427571, "step_time": 0.3680851364135742} +{"epoch": 0, "iter": 6921, "iter_tflops": 29.783908709059364, "iter_time": 0.6926926116943359, "loss": 0.045735545456409454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.577346519094313, "step_time": 0.6533510818481445} +{"epoch": 0, "iter": 6922, "iter_tflops": 15.348656504122896, "iter_time": 1.3441628265380858, "loss": 0.042683348059654236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.60436971949155, "step_time": 1.1719302558898925} +{"epoch": 0, "iter": 6923, "iter_tflops": 50.12879533256368, "iter_time": 0.4115617256164551, "loss": 0.060988184064626694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.44583763980103, "step_time": 0.365502477645874} +{"epoch": 0, "iter": 6924, "iter_tflops": 57.780483341738055, "iter_time": 0.35705989837646485, "loss": 0.028347576037049294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.18935655889453, "step_time": 0.326496337890625} +{"epoch": 0, "iter": 6925, "iter_tflops": 21.63029906278307, "iter_time": 0.9538052825927734, "loss": 0.8040328025817871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.561367645259697, "step_time": 0.9144433898925781} +{"epoch": 0, "iter": 6926, "iter_tflops": 14.529855120385282, "iter_time": 1.4199104766845703, "loss": 0.8698047399520874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.654313526616097, "step_time": 1.1686148815155029} +{"epoch": 0, "iter": 6927, "iter_tflops": 44.04680490923972, "iter_time": 0.46839023971557614, "loss": 0.8213202953338623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.67865458030092, "step_time": 0.4327113189697266} +{"epoch": 0, "iter": 6928, "iter_tflops": 45.84080418837623, "iter_time": 0.4500595893859863, "loss": 0.770739734172821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52058705484565, "step_time": 0.4166164970397949} +{"epoch": 0, "iter": 6929, "iter_tflops": 44.72090321730893, "iter_time": 0.4613299827575683, "loss": 0.9799287915229797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.782735853308594, "step_time": 0.42291792678833007} +{"epoch": 0, "iter": 6930, "iter_tflops": 8.348128437710043, "iter_time": 2.4713435668945314, "loss": 0.8278326988220215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.275950766209368, "step_time": 2.0077065353393557} +{"epoch": 0, "iter": 6931, "iter_tflops": 11.90876008493264, "iter_time": 1.7324300231933596, "loss": 0.7500748634338379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.039511765572751, "step_time": 1.3717927703857422} +{"epoch": 0, "iter": 6932, "iter_tflops": 24.33944221287915, "iter_time": 0.8476403579711914, "loss": 0.6567568778991699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.79335233476193, "step_time": 0.5929607849121095} +{"epoch": 0, "iter": 6933, "iter_tflops": 22.844917629508874, "iter_time": 0.6794067001342773, "loss": 0.5450285077095032, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.375759959985107, "step_time": 0.6367387161254883} +{"epoch": 0, "iter": 6934, "iter_tflops": 10.208355341235844, "iter_time": 1.5204202423095703, "loss": 0.35746562480926514, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 13.662417004360366, "step_time": 1.1360354537963868} +{"epoch": 0, "iter": 6935, "iter_tflops": 22.880364632607243, "iter_time": 0.6783541412353516, "loss": 0.42051514983177185, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.66049604803342, "step_time": 0.629386775970459} +{"epoch": 0, "iter": 6936, "iter_tflops": 24.351239378651456, "iter_time": 0.6373798828124999, "loss": 0.5469490885734558, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.950763749884466, "step_time": 0.5980937690734863} +{"epoch": 0, "iter": 6937, "iter_tflops": 21.460799876131436, "iter_time": 0.9613385162353515, "loss": 0.7068977355957031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.343787095276685, "step_time": 0.8837937660217285} +{"epoch": 0, "iter": 6938, "iter_tflops": 15.150557044783824, "iter_time": 1.3617382812500003, "loss": 0.7447299957275391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.170153497620063, "step_time": 1.0762090930938721} +{"epoch": 0, "iter": 6939, "iter_tflops": 33.23739534459663, "iter_time": 0.6207193222045898, "loss": 1.0892702341079712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.252252023060535, "step_time": 0.569098258972168} +{"epoch": 0, "iter": 6940, "iter_tflops": 36.16278466524187, "iter_time": 0.570506217956543, "loss": 0.8142234086990356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.697987952545255, "step_time": 0.5197012386322022} +{"epoch": 0, "iter": 6941, "iter_tflops": 20.313005561520242, "iter_time": 1.015659324645996, "loss": 0.03479902073740959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.66111877568028, "step_time": 0.9524481964111329} +{"epoch": 0, "iter": 6942, "iter_tflops": 12.097662313885527, "iter_time": 1.7053785247802733, "loss": 0.08672722429037094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.952069490362643, "step_time": 1.4787120666503908} +{"epoch": 0, "iter": 6943, "iter_tflops": 11.468283348780377, "iter_time": 1.7989696350097657, "loss": 0.0301369521766901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.409188266025291, "step_time": 1.5385788536071776} +{"epoch": 0, "iter": 6944, "iter_tflops": 36.52955112600456, "iter_time": 0.5647781829833983, "loss": 0.01571572571992874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32246353530282, "step_time": 0.5116526050567627} +{"epoch": 0, "iter": 6945, "iter_tflops": 16.320828660406598, "iter_time": 0.8909056243896484, "loss": 0.3627907633781433, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.6857046760153, "step_time": 0.8221509017944335} +{"epoch": 0, "iter": 6946, "iter_tflops": 21.412920181486122, "iter_time": 0.6790441436767578, "loss": 0.2983365058898926, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.030488248107748, "step_time": 0.6313508377075194} +{"epoch": 0, "iter": 6947, "iter_tflops": 24.005181545199513, "iter_time": 0.6057158126831055, "loss": 0.34318017959594727, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 25.75810540636406, "step_time": 0.564494857788086} +{"epoch": 0, "iter": 6948, "iter_tflops": 20.324079072396955, "iter_time": 0.7154232177734374, "loss": 0.3817295730113983, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 21.794267261867724, "step_time": 0.6671625099182129} +{"epoch": 0, "iter": 6949, "iter_tflops": 19.361786811066537, "iter_time": 1.0655573120117188, "loss": 0.6790963411331177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.599205853987282, "step_time": 1.0015480041503908} +{"epoch": 0, "iter": 6950, "iter_tflops": 21.604479206666127, "iter_time": 0.9549451904296875, "loss": 0.7480884790420532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.29829055496446, "step_time": 0.7845032157897949} +{"epoch": 0, "iter": 6951, "iter_tflops": 41.77287017417996, "iter_time": 0.49388738250732417, "loss": 0.8438452482223511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.30695172362805, "step_time": 0.45536264801025395} +{"epoch": 0, "iter": 6952, "iter_tflops": 42.57729101762563, "iter_time": 0.48455627441406257, "loss": 0.9446300268173218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60507226414034, "step_time": 0.45238594055175774} +{"epoch": 0, "iter": 6953, "iter_tflops": 44.27142746103865, "iter_time": 0.4660137405395508, "loss": 0.4301425516605377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54534617295426, "step_time": 0.4249860210418701} +{"epoch": 0, "iter": 6954, "iter_tflops": 43.11701129144334, "iter_time": 0.4784908065795899, "loss": 0.5311247706413269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20436661566651, "step_time": 0.42799221229553225} +{"epoch": 0, "iter": 6955, "iter_tflops": 50.41099633755208, "iter_time": 0.40925780105590825, "loss": 0.4161842465400696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.16759125142913, "step_time": 0.37397125816345217} +{"epoch": 0, "iter": 6956, "iter_tflops": 49.66399182005645, "iter_time": 0.41541351699829104, "loss": 0.4762691259384155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.79145812156473, "step_time": 0.38353846931457514} +{"epoch": 0, "iter": 6957, "iter_tflops": 41.51357230025741, "iter_time": 0.4969722518920898, "loss": 0.3955938220024109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20752859723036, "step_time": 0.45636410903930663} +{"epoch": 0, "iter": 6958, "iter_tflops": 37.13094298371053, "iter_time": 0.555630744934082, "loss": 0.31487154960632324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2520958799209, "step_time": 0.5001223106384278} +{"epoch": 0, "iter": 6959, "iter_tflops": 41.11520999424116, "iter_time": 0.501787380218506, "loss": 0.25632619857788086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7559954515184, "step_time": 0.46096826362609866} +{"epoch": 0, "iter": 6960, "iter_tflops": 40.01002394337487, "iter_time": 0.5156481170654297, "loss": 0.2462490350008011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79045219088064, "step_time": 0.47113223266601556} +{"epoch": 0, "iter": 6961, "iter_tflops": 18.85430143914439, "iter_time": 1.0942380218505858, "loss": 0.37700703740119934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24794496213891, "step_time": 1.0189228363037108} +{"epoch": 0, "iter": 6962, "iter_tflops": 21.93084066402004, "iter_time": 0.940734275817871, "loss": 0.5143179297447205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.869637995965522, "step_time": 0.7678217887878418} +{"epoch": 0, "iter": 6963, "iter_tflops": 37.73844705469258, "iter_time": 0.5466863403320312, "loss": 0.515340268611908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.420649953285725, "step_time": 0.49808715057373043} +{"epoch": 0, "iter": 6964, "iter_tflops": 39.54745751404611, "iter_time": 0.5216793899536132, "loss": 0.5148643255233765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.262153445966405, "step_time": 0.47688549613952635} +{"epoch": 0, "iter": 6965, "iter_tflops": 19.022778754243717, "iter_time": 1.0845467834472657, "loss": 0.668384313583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.343443287592148, "step_time": 1.0141397018432619} +{"epoch": 0, "iter": 6966, "iter_tflops": 26.45889067397031, "iter_time": 0.779741439819336, "loss": 0.7337729334831238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.175574436652795, "step_time": 0.6218759994506836} +{"epoch": 0, "iter": 6967, "iter_tflops": 48.696043019809736, "iter_time": 0.423670841217041, "loss": 0.7629160284996033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.61326439068496, "step_time": 0.3921272277832031} +{"epoch": 0, "iter": 6968, "iter_tflops": 46.537667575556654, "iter_time": 0.4433203163146973, "loss": 0.7957742214202881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32628911276768, "step_time": 0.409946647644043} +{"epoch": 0, "iter": 6969, "iter_tflops": 23.63864610852433, "iter_time": 0.8727696762084962, "loss": 0.1441977620124817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.76269829553346, "step_time": 0.8331520767211914} +{"epoch": 0, "iter": 6970, "iter_tflops": 14.429613193382243, "iter_time": 1.4297745361328125, "loss": 0.19814525544643402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.720192433403707, "step_time": 1.1020769996643065} +{"epoch": 0, "iter": 6971, "iter_tflops": 40.33978980565322, "iter_time": 0.5114328460693359, "loss": 0.1647350937128067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.117254146317165, "step_time": 0.4676422843933105} +{"epoch": 0, "iter": 6972, "iter_tflops": 39.35941606049972, "iter_time": 0.5241717376708985, "loss": 0.16036100685596466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08129942432241, "step_time": 0.47888744735717775} +{"epoch": 0, "iter": 6973, "iter_tflops": 16.902434426288256, "iter_time": 1.2205989379882811, "loss": 0.15660639107227325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.257965076652795, "step_time": 1.1299777069091796} +{"epoch": 0, "iter": 6974, "iter_tflops": 22.404990403509796, "iter_time": 0.920825813293457, "loss": 0.19080445170402527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.20082716710722, "step_time": 0.7874214572906495} +{"epoch": 0, "iter": 6975, "iter_tflops": 48.813381644461984, "iter_time": 0.42265241241455076, "loss": 0.1647857427597046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38988423459318, "step_time": 0.38642326736450194} +{"epoch": 0, "iter": 6976, "iter_tflops": 54.24349693564484, "iter_time": 0.3803422470092773, "loss": 0.26797932386398315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.87123200165132, "step_time": 0.35044439888000495} +{"epoch": 0, "iter": 6977, "iter_tflops": 29.85408005989891, "iter_time": 0.6910644531250001, "loss": 0.13256575167179108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.65273722914182, "step_time": 0.6517949256896973} +{"epoch": 0, "iter": 6978, "iter_tflops": 18.379799701313125, "iter_time": 1.1224873962402344, "loss": 0.15964379906654358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.25928441469921, "step_time": 0.9268534030914307} +{"epoch": 0, "iter": 6979, "iter_tflops": 36.237140755081626, "iter_time": 0.569335578918457, "loss": 0.14657191932201385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.78442299025709, "step_time": 0.5185721435546875} +{"epoch": 0, "iter": 6980, "iter_tflops": 41.4451312658274, "iter_time": 0.49779293441772465, "loss": 0.12840153276920319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7206068547432, "step_time": 0.45124277496337895} +{"epoch": 0, "iter": 6981, "iter_tflops": 20.07196854486683, "iter_time": 1.0278560104370116, "loss": 0.7868683934211731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.601944097318363, "step_time": 0.9550572586059569} +{"epoch": 0, "iter": 6982, "iter_tflops": 15.6529996632348, "iter_time": 1.318028106689453, "loss": 0.7929353713989258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.629703851812792, "step_time": 0.9538315296173097} +{"epoch": 0, "iter": 6983, "iter_tflops": 37.1370876555585, "iter_time": 0.5555388107299805, "loss": 0.8179517388343811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.559272169999375, "step_time": 0.5086652793884276} +{"epoch": 0, "iter": 6984, "iter_tflops": 38.84865225659132, "iter_time": 0.5310633010864257, "loss": 1.0379668474197388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.992674766805955, "step_time": 0.49130220031738286} +{"epoch": 0, "iter": 6985, "iter_tflops": 22.924731701316478, "iter_time": 0.8999491806030273, "loss": 0.8539007902145386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.48816524514987, "step_time": 0.842492416381836} +{"epoch": 0, "iter": 6986, "iter_tflops": 8.931861258817781, "iter_time": 2.3098313903808596, "loss": 0.7815710306167603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.226358955950749, "step_time": 1.8377368469238282} +{"epoch": 0, "iter": 6987, "iter_tflops": 16.13565701817522, "iter_time": 1.2786026306152345, "loss": 0.7889236807823181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.654453284574192, "step_time": 0.8368100185394287} +{"epoch": 0, "iter": 6988, "iter_tflops": 38.458541942489475, "iter_time": 0.5364502258300781, "loss": 0.6803632974624634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86278910714725, "step_time": 0.49282653999328613} +{"epoch": 0, "iter": 6989, "iter_tflops": 26.706568447231962, "iter_time": 0.7120369110107422, "loss": 0.5902590751647949, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 29.040678369925924, "step_time": 0.6548077926635743} +{"epoch": 0, "iter": 6990, "iter_tflops": 28.199825023596986, "iter_time": 0.6743326416015625, "loss": 0.3305407464504242, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 30.429913645491744, "step_time": 0.6249134559631347} +{"epoch": 0, "iter": 6991, "iter_tflops": 29.09876539609081, "iter_time": 0.6535006637573242, "loss": 0.5701702833175659, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 31.345769901444026, "step_time": 0.6066548233032226} +{"epoch": 0, "iter": 6992, "iter_tflops": 29.85487887196013, "iter_time": 0.6369499130249023, "loss": 0.3677423596382141, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 32.10389382101882, "step_time": 0.5923288497924805} +{"epoch": 0, "iter": 6993, "iter_tflops": 15.177641730237154, "iter_time": 1.3593082427978516, "loss": 0.7731140851974487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.15493711854591, "step_time": 1.2770766830444336} +{"epoch": 0, "iter": 6994, "iter_tflops": 18.51158965314887, "iter_time": 1.1144960479736328, "loss": 0.9028202295303345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.690559373071178, "step_time": 0.9971259422302248} +{"epoch": 0, "iter": 6995, "iter_tflops": 45.27658579929134, "iter_time": 0.4556680488586426, "loss": 0.801138162612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96618691739642, "step_time": 0.42133347129821774} +{"epoch": 0, "iter": 6996, "iter_tflops": 39.720938715239875, "iter_time": 0.5194009552001952, "loss": 1.0998624563217163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54338586338553, "step_time": 0.4849424438476562} +{"epoch": 0, "iter": 6997, "iter_tflops": 27.920243365999948, "iter_time": 0.738929573059082, "loss": 0.305368572473526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.60959897993029, "step_time": 0.6967704467773437} +{"epoch": 0, "iter": 6998, "iter_tflops": 16.61262814294197, "iter_time": 1.2418922119140627, "loss": 0.1829051375389099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.395622527615906, "step_time": 1.063698444366455} +{"epoch": 0, "iter": 6999, "iter_tflops": 43.75731334214387, "iter_time": 0.4714890365600586, "loss": 0.19771842658519745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.389597181553356, "step_time": 0.4353506832122803} +{"epoch": 0, "iter": 7000, "iter_tflops": 44.37992882480443, "iter_time": 0.4648744163513183, "loss": 0.19109207391738892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.833970047717024, "step_time": 0.43130631828308114} +{"epoch": 0, "iter": 7001, "iter_tflops": 33.23391219168839, "iter_time": 0.6207843780517578, "loss": 0.12436499446630478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.532711589352466, "step_time": 0.5806225471496582} +{"epoch": 0, "iter": 7002, "iter_tflops": 22.26197584550736, "iter_time": 0.9267413482666016, "loss": 0.14598268270492554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.245708669828083, "step_time": 0.757223596572876} +{"epoch": 0, "iter": 7003, "iter_tflops": 48.74289928808443, "iter_time": 0.42326356887817373, "loss": 0.12007220834493637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.024206400643145, "step_time": 0.38908820915222164} +{"epoch": 0, "iter": 7004, "iter_tflops": 51.70262860817322, "iter_time": 0.3990337448120117, "loss": 0.10044734925031662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.213175303234415, "step_time": 0.36701526641845705} +{"epoch": 0, "iter": 7005, "iter_tflops": 32.32444146860058, "iter_time": 0.6382505798339844, "loss": 0.15272530913352966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.472306439259874, "step_time": 0.5984831199645997} +{"epoch": 0, "iter": 7006, "iter_tflops": 16.005280110007565, "iter_time": 1.2890179595947264, "loss": 0.0024773685727268457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.382199846191977, "step_time": 0.9648723545074462} +{"epoch": 0, "iter": 7007, "iter_tflops": 55.62347226595402, "iter_time": 0.37090625, "loss": 0.011616597883403301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.746874080944814, "step_time": 0.3341236915588379} +{"epoch": 0, "iter": 7008, "iter_tflops": 58.133352332227304, "iter_time": 0.3548925476074219, "loss": 0.013112408109009266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.83940385242989, "step_time": 0.3231717758178711} +{"epoch": 0, "iter": 7009, "iter_tflops": 37.119946804109965, "iter_time": 0.5557953414916993, "loss": 0.4542032778263092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.021364124240094, "step_time": 0.5155020065307617} +{"epoch": 0, "iter": 7010, "iter_tflops": 45.9991963466465, "iter_time": 0.4485098686218262, "loss": 0.4582890272140503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16192434858172, "step_time": 0.4112899131774902} +{"epoch": 0, "iter": 7011, "iter_tflops": 49.21288775610519, "iter_time": 0.4192213554382324, "loss": 0.6731836199760437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.980551279787925, "step_time": 0.38940881156921386} +{"epoch": 0, "iter": 7012, "iter_tflops": 49.0811556485759, "iter_time": 0.4203465309143067, "loss": 0.47835999727249146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.845623854232485, "step_time": 0.3904030647277832} +{"epoch": 0, "iter": 7013, "iter_tflops": 29.6925538284758, "iter_time": 0.69482381439209, "loss": 0.245076522231102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.5426112061311, "step_time": 0.6540705642700195} +{"epoch": 0, "iter": 7014, "iter_tflops": 19.878931389899524, "iter_time": 1.0378371505737305, "loss": 0.25991982221603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.08306596744516, "step_time": 0.7909765491485595} +{"epoch": 0, "iter": 7015, "iter_tflops": 41.29854033517031, "iter_time": 0.49955987167358396, "loss": 0.29660263657569885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19210973620731, "step_time": 0.45651981353759763} +{"epoch": 0, "iter": 7016, "iter_tflops": 37.41508752017717, "iter_time": 0.5514110717773437, "loss": 0.20069792866706848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.1520879483195, "step_time": 0.5013377094268799} +{"epoch": 0, "iter": 7017, "iter_tflops": 34.290608924791606, "iter_time": 0.6016543350219725, "loss": 0.5612794160842896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.93687061492515, "step_time": 0.5438269729614258} +{"epoch": 0, "iter": 7018, "iter_tflops": 40.283570286013926, "iter_time": 0.512146598815918, "loss": 0.5557137727737427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.61908464743304, "step_time": 0.4623827152252198} +{"epoch": 0, "iter": 7019, "iter_tflops": 36.293753482269146, "iter_time": 0.5684475021362304, "loss": 0.5487964749336243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.541405664732075, "step_time": 0.5217592334747314} +{"epoch": 0, "iter": 7020, "iter_tflops": 35.532173494441025, "iter_time": 0.5806313400268555, "loss": 0.5306110978126526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.22036589092971, "step_time": 0.5260301132202149} +{"epoch": 0, "iter": 7021, "iter_tflops": 38.0969654935936, "iter_time": 0.541541648864746, "loss": 0.034307412803173065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6557045503851, "step_time": 0.48366551971435545} +{"epoch": 0, "iter": 7022, "iter_tflops": 42.84095762935087, "iter_time": 0.4815740509033204, "loss": 0.02122042328119278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31362044486323, "step_time": 0.43604977416992186} +{"epoch": 0, "iter": 7023, "iter_tflops": 44.01835025407663, "iter_time": 0.46869301986694334, "loss": 0.04591688886284828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49325427289505, "step_time": 0.4254425449371338} +{"epoch": 0, "iter": 7024, "iter_tflops": 45.88552277272852, "iter_time": 0.44962097549438484, "loss": 0.01928829587996006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.423191538563756, "step_time": 0.4091588191986084} +{"epoch": 0, "iter": 7025, "iter_tflops": 21.95720075371785, "iter_time": 0.9396049041748047, "loss": 0.2787916362285614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.462237649718688, "step_time": 0.8793318786621095} +{"epoch": 0, "iter": 7026, "iter_tflops": 12.983673540008908, "iter_time": 1.5890027923583985, "loss": 0.18828441202640533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.32338342851011, "step_time": 1.4403784980773926} +{"epoch": 0, "iter": 7027, "iter_tflops": 9.963635338852177, "iter_time": 2.0706391601562504, "loss": 0.18228156864643097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.823945076339115, "step_time": 1.608794593811035} +{"epoch": 0, "iter": 7028, "iter_tflops": 38.106161743830604, "iter_time": 0.5414109573364257, "loss": 0.25708475708961487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.27656972208825, "step_time": 0.4880030155181885} +{"epoch": 0, "iter": 7029, "iter_tflops": 13.84207336978091, "iter_time": 1.0947001342773437, "loss": 0.5545167922973633, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.890859780561286, "step_time": 1.017598701477051} +{"epoch": 0, "iter": 7030, "iter_tflops": 14.279698659435136, "iter_time": 1.0611512145996094, "loss": 0.44213271141052246, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 16.965543340396476, "step_time": 0.8931585197448731} +{"epoch": 0, "iter": 7031, "iter_tflops": 25.800998446190167, "iter_time": 0.5872997360229492, "loss": 0.378141850233078, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.47289911649717, "step_time": 0.5515588111877442} +{"epoch": 0, "iter": 7032, "iter_tflops": 25.926832848390337, "iter_time": 0.5844493103027343, "loss": 0.47078824043273926, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.62349998043416, "step_time": 0.5485517616271972} +{"epoch": 0, "iter": 7033, "iter_tflops": 25.822043284196173, "iter_time": 0.798972152709961, "loss": 0.23469585180282593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.3138165940162, "step_time": 0.755335433959961} +{"epoch": 0, "iter": 7034, "iter_tflops": 15.036187480343843, "iter_time": 1.3720960540771485, "loss": 0.2174810767173767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.889322700048258, "step_time": 1.0922092781066894} +{"epoch": 0, "iter": 7035, "iter_tflops": 38.21255018634373, "iter_time": 0.5399036026000976, "loss": 0.19874656200408936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.967201880225105, "step_time": 0.4916004066467285} +{"epoch": 0, "iter": 7036, "iter_tflops": 43.300206833007294, "iter_time": 0.4764663963317871, "loss": 0.20928749442100525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26791781578358, "step_time": 0.43647138404846186} +{"epoch": 0, "iter": 7037, "iter_tflops": 19.141934755497477, "iter_time": 1.077795623779297, "loss": 0.8382566571235657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.473078339109023, "step_time": 1.0077181930541992} +{"epoch": 0, "iter": 7038, "iter_tflops": 33.474082375149756, "iter_time": 0.616330368041992, "loss": 0.8085339069366455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.09190430638479, "step_time": 0.5562155380249023} +{"epoch": 0, "iter": 7039, "iter_tflops": 38.9860087448905, "iter_time": 0.5291922454833984, "loss": 0.7797555923461914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.271580483835976, "step_time": 0.48806061363220216} +{"epoch": 0, "iter": 7040, "iter_tflops": 36.00642427378705, "iter_time": 0.5729836807250978, "loss": 0.7768577337265015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14073822455576, "step_time": 0.5271002655029297} +{"epoch": 0, "iter": 7041, "iter_tflops": 21.04186106955087, "iter_time": 0.9804785537719726, "loss": 0.2569226026535034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.69155326845129, "step_time": 0.9091970596313476} +{"epoch": 0, "iter": 7042, "iter_tflops": 25.6631756346698, "iter_time": 0.8039181823730469, "loss": 0.3662302792072296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.320387882840897, "step_time": 0.7036432666778565} +{"epoch": 0, "iter": 7043, "iter_tflops": 48.828021352186795, "iter_time": 0.422525691986084, "loss": 0.3031817674636841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.0927901549203, "step_time": 0.38858559608459475} +{"epoch": 0, "iter": 7044, "iter_tflops": 47.50127460822742, "iter_time": 0.4343271560668946, "loss": 0.2815466523170471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.58702737678387, "step_time": 0.3999279384613037} +{"epoch": 0, "iter": 7045, "iter_tflops": 26.32737034788123, "iter_time": 0.7836366958618164, "loss": 0.06853638589382172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.054726668708494, "step_time": 0.7353874359130859} +{"epoch": 0, "iter": 7046, "iter_tflops": 13.100440651653619, "iter_time": 1.5748396606445314, "loss": 0.05005548894405365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.992907526526258, "step_time": 1.3760568771362305} +{"epoch": 0, "iter": 7047, "iter_tflops": 32.6030453310856, "iter_time": 0.632796516418457, "loss": 0.07395938038825989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3435161436627, "step_time": 0.4990164222717285} +{"epoch": 0, "iter": 7048, "iter_tflops": 45.23892767250443, "iter_time": 0.4560473594665527, "loss": 0.0909433364868164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.595675469835086, "step_time": 0.4159857349395752} +{"epoch": 0, "iter": 7049, "iter_tflops": 20.304637197824114, "iter_time": 1.0160779190063478, "loss": 0.5807374119758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.654109423401845, "step_time": 0.9527565002441406} +{"epoch": 0, "iter": 7050, "iter_tflops": 15.15248568138604, "iter_time": 1.3615649566650392, "loss": 0.7169439196586609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23176849655419, "step_time": 1.1316013317108153} +{"epoch": 0, "iter": 7051, "iter_tflops": 38.618777763727174, "iter_time": 0.5342244033813478, "loss": 0.7381177544593811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.111516963174665, "step_time": 0.48991570472717283} +{"epoch": 0, "iter": 7052, "iter_tflops": 43.74355407541555, "iter_time": 0.47163734054565426, "loss": 0.8989763855934143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06278823584785, "step_time": 0.43837380409240717} +{"epoch": 0, "iter": 7053, "iter_tflops": 28.69864796718727, "iter_time": 0.7188872985839843, "loss": 0.7194528579711914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.386406600003113, "step_time": 0.6789579887390138} +{"epoch": 0, "iter": 7054, "iter_tflops": 23.016504142387976, "iter_time": 0.8963608627319336, "loss": 0.931471049785614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.288284925250675, "step_time": 0.7044145317077637} +{"epoch": 0, "iter": 7055, "iter_tflops": 49.03141198855549, "iter_time": 0.4207729835510254, "loss": 0.612960159778595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43692655162546, "step_time": 0.38608308601379404} +{"epoch": 0, "iter": 7056, "iter_tflops": 48.39942201096398, "iter_time": 0.42626735305786134, "loss": 0.7006516456604004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39939366301008, "step_time": 0.39372771453857425} +{"epoch": 0, "iter": 7057, "iter_tflops": 31.127676515875454, "iter_time": 0.6627893829345703, "loss": 0.1667913794517517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.138213996522374, "step_time": 0.6225771102905274} +{"epoch": 0, "iter": 7058, "iter_tflops": 10.857058274753742, "iter_time": 1.900247100830078, "loss": 0.1795160174369812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.03245520864235, "step_time": 1.4702411804199218} +{"epoch": 0, "iter": 7059, "iter_tflops": 12.62354128840829, "iter_time": 1.6343348541259768, "loss": 0.18716290593147278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.095585839938128, "step_time": 1.2817858085632325} +{"epoch": 0, "iter": 7060, "iter_tflops": 19.700438810713763, "iter_time": 1.0472403030395507, "loss": 0.17520160973072052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.648279800673958, "step_time": 0.8043850765228273} +{"epoch": 0, "iter": 7061, "iter_tflops": 15.475440016831364, "iter_time": 1.0505858154296877, "loss": 0.4092313349246979, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 16.5090908267136, "step_time": 0.9848075790405274} +{"epoch": 0, "iter": 7062, "iter_tflops": 19.061450230801142, "iter_time": 0.8529402313232423, "loss": 0.41625428199768066, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 23.248520928321188, "step_time": 0.699325252532959} +{"epoch": 0, "iter": 7063, "iter_tflops": 28.709800529820267, "iter_time": 0.5662971343994141, "loss": 0.33464497327804565, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 30.62257017916395, "step_time": 0.5309246635437012} +{"epoch": 0, "iter": 7064, "iter_tflops": 28.282800745728238, "iter_time": 0.574846809387207, "loss": 0.3764423727989197, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 29.97091827432355, "step_time": 0.5424684562683106} +{"epoch": 0, "iter": 7065, "iter_tflops": 23.204630065990173, "iter_time": 0.88909383392334, "loss": 0.07642452418804169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.46145587508293, "step_time": 0.8434123306274413} +{"epoch": 0, "iter": 7066, "iter_tflops": 16.333885426701915, "iter_time": 1.2630854797363282, "loss": 0.08823142945766449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.70937224822742, "step_time": 1.0467656326293946} +{"epoch": 0, "iter": 7067, "iter_tflops": 46.440338234495286, "iter_time": 0.44424942398071293, "loss": 0.11789476871490479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96328552105408, "step_time": 0.40482267379760745} +{"epoch": 0, "iter": 7068, "iter_tflops": 46.399201921562266, "iter_time": 0.44464328384399415, "loss": 0.145819753408432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80342887167716, "step_time": 0.40609647750854494} +{"epoch": 0, "iter": 7069, "iter_tflops": 18.310962216162686, "iter_time": 1.1267072296142577, "loss": 0.7533986568450928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.400085890286032, "step_time": 1.0634537200927736} +{"epoch": 0, "iter": 7070, "iter_tflops": 15.91046136972276, "iter_time": 1.2966998901367186, "loss": 0.5938140153884888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.322875073378995, "step_time": 1.067703094482422} +{"epoch": 0, "iter": 7071, "iter_tflops": 37.79599242812905, "iter_time": 0.5458539962768554, "loss": 0.5466360449790955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38225719500335, "step_time": 0.4985492553710938} +{"epoch": 0, "iter": 7072, "iter_tflops": 44.103519737292764, "iter_time": 0.46778791427612304, "loss": 0.6251818537712097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.85544215823228, "step_time": 0.43111279678344727} +{"epoch": 0, "iter": 7073, "iter_tflops": 22.777492469415286, "iter_time": 0.9057666702270508, "loss": 0.21136286854743958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.683655965824595, "step_time": 0.8358200073242187} +{"epoch": 0, "iter": 7074, "iter_tflops": 16.163270247370047, "iter_time": 1.2764182739257812, "loss": 0.300804078578949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.56344985040461, "step_time": 1.111382511138916} +{"epoch": 0, "iter": 7075, "iter_tflops": 45.93700796327693, "iter_time": 0.4491170501708985, "loss": 0.22878402471542358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62302804430917, "step_time": 0.40754364776611324} +{"epoch": 0, "iter": 7076, "iter_tflops": 47.68079833710976, "iter_time": 0.4326918640136719, "loss": 0.2097546011209488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.799848369087655, "step_time": 0.3982848243713379} +{"epoch": 0, "iter": 7077, "iter_tflops": 21.660198159971582, "iter_time": 0.9524886779785157, "loss": 0.8942693471908569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.706973472175044, "step_time": 0.9085796279907227} +{"epoch": 0, "iter": 7078, "iter_tflops": 14.627038246938943, "iter_time": 1.4104764862060546, "loss": 0.9131750464439392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.439571780748647, "step_time": 1.1188488407135009} +{"epoch": 0, "iter": 7079, "iter_tflops": 43.68539810888537, "iter_time": 0.47226520538330075, "loss": 0.7471974492073059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19472575369978, "step_time": 0.43714828681945794} +{"epoch": 0, "iter": 7080, "iter_tflops": 42.337195907025546, "iter_time": 0.4873042030334473, "loss": 0.7350786924362183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.29204795459022, "step_time": 0.4555124893188476} +{"epoch": 0, "iter": 7081, "iter_tflops": 22.298029237561206, "iter_time": 0.9252429122924806, "loss": 0.35864052176475525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.40206072443418, "step_time": 0.8815930252075197} +{"epoch": 0, "iter": 7082, "iter_tflops": 12.576461063741137, "iter_time": 1.6404530181884767, "loss": 0.21789368987083435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.856858642003116, "step_time": 1.3010832710266116} +{"epoch": 0, "iter": 7083, "iter_tflops": 46.84835390083073, "iter_time": 0.44038032913208014, "loss": 0.23446796834468842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.87932410185428, "step_time": 0.4054907150268554} +{"epoch": 0, "iter": 7084, "iter_tflops": 42.78309551391452, "iter_time": 0.48222535705566405, "loss": 0.29870063066482544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91849015010832, "step_time": 0.44929816818237306} +{"epoch": 0, "iter": 7085, "iter_tflops": 24.3100448520221, "iter_time": 0.8486653823852539, "loss": 0.8789493441581726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.56634704615635, "step_time": 0.8069628982543946} +{"epoch": 0, "iter": 7086, "iter_tflops": 14.602624340241203, "iter_time": 1.4128346405029295, "loss": 0.9010991454124451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.76289661738752, "step_time": 1.1614712371826172} +{"epoch": 0, "iter": 7087, "iter_tflops": 25.89416242956256, "iter_time": 0.7967468948364258, "loss": 0.8035916686058044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.545449614536096, "step_time": 0.5972159500122071} +{"epoch": 0, "iter": 7088, "iter_tflops": 36.01810512918194, "iter_time": 0.5727978591918944, "loss": 0.6659965515136719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97081281999067, "step_time": 0.5293985939025879} +{"epoch": 0, "iter": 7089, "iter_tflops": 35.32207663169318, "iter_time": 0.5103257293701172, "loss": 0.049374353140592575, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 39.21187849456532, "step_time": 0.45970163154602056} +{"epoch": 0, "iter": 7090, "iter_tflops": 44.7205858854932, "iter_time": 0.4030753211975098, "loss": 0.07356374710798264, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 49.24035653883045, "step_time": 0.3660770511627197} +{"epoch": 0, "iter": 7091, "iter_tflops": 45.74263666026905, "iter_time": 0.39406920623779296, "loss": 0.029234372079372406, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 49.989965334620344, "step_time": 0.3605876579284668} +{"epoch": 0, "iter": 7092, "iter_tflops": 49.104754056393226, "iter_time": 0.3670879707336426, "loss": 0.06815219670534134, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 53.82401614982803, "step_time": 0.33490188598632814} +{"epoch": 0, "iter": 7093, "iter_tflops": 24.564803481718435, "iter_time": 0.8398639755249023, "loss": 0.08906850963830948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.87432320045213, "step_time": 0.7973578033447266} +{"epoch": 0, "iter": 7094, "iter_tflops": 15.72650825264993, "iter_time": 1.3118674011230467, "loss": 0.11622180789709091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.41012980385921, "step_time": 1.1206381340026854} +{"epoch": 0, "iter": 7095, "iter_tflops": 37.8835135827345, "iter_time": 0.5445929260253906, "loss": 0.13887546956539154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6044089309946, "step_time": 0.495887191772461} +{"epoch": 0, "iter": 7096, "iter_tflops": 42.04416643067335, "iter_time": 0.4907005004882813, "loss": 0.1732570230960846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.440040949187406, "step_time": 0.4442522678375245} +{"epoch": 0, "iter": 7097, "iter_tflops": 22.851896042224677, "iter_time": 0.9028175811767578, "loss": 0.25416889786720276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.202893629482116, "step_time": 0.8524225997924805} +{"epoch": 0, "iter": 7098, "iter_tflops": 25.48124825461898, "iter_time": 0.8096578826904296, "loss": 0.24510855972766876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.06440713364211, "step_time": 0.7098405075073242} +{"epoch": 0, "iter": 7099, "iter_tflops": 45.0014399293132, "iter_time": 0.45845407485961914, "loss": 0.17859423160552979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.116186449316, "step_time": 0.4200467300415039} +{"epoch": 0, "iter": 7100, "iter_tflops": 49.01270814722291, "iter_time": 0.4209335556030274, "loss": 0.20090347528457642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.134670226767454, "step_time": 0.3882793178558349} +{"epoch": 0, "iter": 7101, "iter_tflops": 38.92750324190781, "iter_time": 0.5299875869750976, "loss": 0.3292335271835327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93216429207821, "step_time": 0.4920111770629883} +{"epoch": 0, "iter": 7102, "iter_tflops": 47.56917801874241, "iter_time": 0.4337071685791016, "loss": 0.545836865901947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8604820021696, "step_time": 0.3978191623687744} +{"epoch": 0, "iter": 7103, "iter_tflops": 47.5551369773841, "iter_time": 0.43383522415161124, "loss": 0.4531725347042084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74833297354843, "step_time": 0.39868131637573245} +{"epoch": 0, "iter": 7104, "iter_tflops": 48.05273180295897, "iter_time": 0.4293427810668945, "loss": 0.3407095968723297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4456676718521, "step_time": 0.3933803195953369} +{"epoch": 0, "iter": 7105, "iter_tflops": 24.96622614568468, "iter_time": 0.8263601150512697, "loss": 0.6910585761070251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.421846103708877, "step_time": 0.7808346710205079} +{"epoch": 0, "iter": 7106, "iter_tflops": 34.82934927629137, "iter_time": 0.5923479461669923, "loss": 0.8429815769195557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.337863739428684, "step_time": 0.5381388397216798} +{"epoch": 0, "iter": 7107, "iter_tflops": 37.18377956881111, "iter_time": 0.5548412170410156, "loss": 0.6450859904289246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12280996876688, "step_time": 0.5141986198425293} +{"epoch": 0, "iter": 7108, "iter_tflops": 36.55923396245454, "iter_time": 0.5643196334838867, "loss": 0.7401747107505798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7994397933776, "step_time": 0.5183764801025391} +{"epoch": 0, "iter": 7109, "iter_tflops": 19.61348790138287, "iter_time": 1.0518829498291016, "loss": 0.6690530180931091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.971946847622817, "step_time": 0.9837471771240234} +{"epoch": 0, "iter": 7110, "iter_tflops": 42.693737969196945, "iter_time": 0.48323464965820306, "loss": 0.6922405362129211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49876767977441, "step_time": 0.4436911888122559} +{"epoch": 0, "iter": 7111, "iter_tflops": 45.422254506140504, "iter_time": 0.45420672607421875, "loss": 0.6317275166511536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.14935508091802, "step_time": 0.4197632598876953} +{"epoch": 0, "iter": 7112, "iter_tflops": 46.754141376980385, "iter_time": 0.4412677230834961, "loss": 0.7956158518791199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.81158215043879, "step_time": 0.4060313148498535} +{"epoch": 0, "iter": 7113, "iter_tflops": 23.851555997001366, "iter_time": 0.8649789352416992, "loss": 0.13972054421901703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.10695094598206, "step_time": 0.8217283554077148} +{"epoch": 0, "iter": 7114, "iter_tflops": 11.788382530485348, "iter_time": 1.750120803833008, "loss": 0.0891396701335907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.45972298707618, "step_time": 1.2534289627075195} +{"epoch": 0, "iter": 7115, "iter_tflops": 39.920412588706554, "iter_time": 0.5168056182861328, "loss": 0.10517504066228867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79394866487767, "step_time": 0.471094617843628} +{"epoch": 0, "iter": 7116, "iter_tflops": 40.03762855113547, "iter_time": 0.5152925949096681, "loss": 0.15400490164756775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.881498550293195, "step_time": 0.47015471649169926} +{"epoch": 0, "iter": 7117, "iter_tflops": 33.63623642704319, "iter_time": 0.6133591537475584, "loss": 0.40727242827415466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.21104014102646, "step_time": 0.5544347438812256} +{"epoch": 0, "iter": 7118, "iter_tflops": 38.670540899928454, "iter_time": 0.5335093078613281, "loss": 0.41111457347869873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17859858578725, "step_time": 0.48913653373718263} +{"epoch": 0, "iter": 7119, "iter_tflops": 41.75385561739597, "iter_time": 0.49411229705810555, "loss": 0.4436666965484619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79768608087559, "step_time": 0.4504833164215088} +{"epoch": 0, "iter": 7120, "iter_tflops": 37.933199187514006, "iter_time": 0.5438796081542968, "loss": 0.46974435448646545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45528641619067, "step_time": 0.497670991897583} +{"epoch": 0, "iter": 7121, "iter_tflops": 28.868198472009613, "iter_time": 0.7146650848388673, "loss": 0.24492208659648895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.258018693031502, "step_time": 0.6600256309509277} +{"epoch": 0, "iter": 7122, "iter_tflops": 12.104021914162882, "iter_time": 1.7044824981689455, "loss": 0.22858324646949768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.525579721061288, "step_time": 1.420328407287598} +{"epoch": 0, "iter": 7123, "iter_tflops": 13.119371887165796, "iter_time": 1.5725671691894532, "loss": 0.4367537200450897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.920054651574315, "step_time": 1.295918510437012} +{"epoch": 0, "iter": 7124, "iter_tflops": 26.53334282644287, "iter_time": 0.7775534973144531, "loss": 0.22691024839878082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.70140350635171, "step_time": 0.5330838584899903} +{"epoch": 0, "iter": 7125, "iter_tflops": 28.978419505491935, "iter_time": 0.5879611358642579, "loss": 0.4198012948036194, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 31.147777490676855, "step_time": 0.5470112419128418} +{"epoch": 0, "iter": 7126, "iter_tflops": 30.088966121616256, "iter_time": 0.5662602157592773, "loss": 0.26835864782333374, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 32.445739528609934, "step_time": 0.5251285591125489} +{"epoch": 0, "iter": 7127, "iter_tflops": 29.154895255531176, "iter_time": 0.5844021835327148, "loss": 0.3421986699104309, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 31.09515809693826, "step_time": 0.547936897277832} +{"epoch": 0, "iter": 7128, "iter_tflops": 30.924988210357554, "iter_time": 0.5509520111083984, "loss": 0.35106658935546875, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 32.954296617828824, "step_time": 0.517024673461914} +{"epoch": 0, "iter": 7129, "iter_tflops": 28.29147008417051, "iter_time": 0.7292337036132812, "loss": 0.7839176654815674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.949319605140456, "step_time": 0.6888668518066406} +{"epoch": 0, "iter": 7130, "iter_tflops": 15.16672836142559, "iter_time": 1.360286346435547, "loss": 0.8692854642868042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.32667665480661, "step_time": 1.1907126750946042} +{"epoch": 0, "iter": 7131, "iter_tflops": 37.78324947143365, "iter_time": 0.5460380935668946, "loss": 0.8741467595100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.30696832846281, "step_time": 0.49945794486999506} +{"epoch": 0, "iter": 7132, "iter_tflops": 47.41791944556828, "iter_time": 0.43509065246582024, "loss": 0.6418269872665405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08213077022366, "step_time": 0.4038808326721191} +{"epoch": 0, "iter": 7133, "iter_tflops": 46.95434287271161, "iter_time": 0.4393862686157227, "loss": 0.08966778963804245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.53367309820739, "step_time": 0.4003419952392578} +{"epoch": 0, "iter": 7134, "iter_tflops": 42.4652693239984, "iter_time": 0.48583451461791993, "loss": 0.14895892143249512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15635087248628, "step_time": 0.42841895484924314} +{"epoch": 0, "iter": 7135, "iter_tflops": 50.99003993407012, "iter_time": 0.40461026382446286, "loss": 0.10073666274547577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.81501143457588, "step_time": 0.3696334190368652} +{"epoch": 0, "iter": 7136, "iter_tflops": 52.26860688362469, "iter_time": 0.39471290206909176, "loss": 0.1077929362654686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.956119013728625, "step_time": 0.3622278671264648} +{"epoch": 0, "iter": 7137, "iter_tflops": 32.92742462353396, "iter_time": 0.6265626220703124, "loss": 0.7178894877433777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.09460635356136, "step_time": 0.5878707771301269} +{"epoch": 0, "iter": 7138, "iter_tflops": 19.929982713387766, "iter_time": 1.0351786956787112, "loss": 0.7420117259025574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.120223850903873, "step_time": 0.9326801414489747} +{"epoch": 0, "iter": 7139, "iter_tflops": 47.226864483018005, "iter_time": 0.43685079956054684, "loss": 0.7662814259529114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.12176192877357, "step_time": 0.40356773185729977} +{"epoch": 0, "iter": 7140, "iter_tflops": 49.36991849332427, "iter_time": 0.41788793945312497, "loss": 0.7094259262084961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.006098334059566, "step_time": 0.3892211303710937} +{"epoch": 0, "iter": 7141, "iter_tflops": 33.18029886359354, "iter_time": 0.6217874526977539, "loss": 0.19877421855926514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.50052485655749, "step_time": 0.5811489715576171} +{"epoch": 0, "iter": 7142, "iter_tflops": 10.215054967224726, "iter_time": 2.019675231933594, "loss": 0.34382277727127075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.346532965342995, "step_time": 1.6710029907226562} +{"epoch": 0, "iter": 7143, "iter_tflops": 12.21095153567003, "iter_time": 1.689556579589844, "loss": 0.33063456416130066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.239631620028945, "step_time": 1.448850227355957} +{"epoch": 0, "iter": 7144, "iter_tflops": 31.824166926997112, "iter_time": 0.6482838516235351, "loss": 0.2634996175765991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84259982315988, "step_time": 0.5051366367340089} +{"epoch": 0, "iter": 7145, "iter_tflops": 21.10141751899759, "iter_time": 0.7549448089599609, "loss": 0.4242296814918518, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 22.339405795768826, "step_time": 0.7131078491210938} +{"epoch": 0, "iter": 7146, "iter_tflops": 10.904168919675636, "iter_time": 1.460946334838867, "loss": 0.280200332403183, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 12.759700150874291, "step_time": 1.2484937286376951} +{"epoch": 0, "iter": 7147, "iter_tflops": 25.25681779233022, "iter_time": 0.6307368469238281, "loss": 0.5365562438964844, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.129711034526657, "step_time": 0.5871940765380861} +{"epoch": 0, "iter": 7148, "iter_tflops": 27.086076468604315, "iter_time": 0.5881400222778321, "loss": 0.4183542728424072, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.891378729674877, "step_time": 0.5513895950317382} +{"epoch": 0, "iter": 7149, "iter_tflops": 18.60666168959598, "iter_time": 1.108801452636719, "loss": 0.015957113355398178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.954449620698767, "step_time": 1.033909423828125} +{"epoch": 0, "iter": 7150, "iter_tflops": 33.57371757988022, "iter_time": 0.6145013122558594, "loss": 0.015192905440926552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.377264342473296, "step_time": 0.4756199779510498} +{"epoch": 0, "iter": 7151, "iter_tflops": 58.220837269242466, "iter_time": 0.3543592720031738, "loss": 0.0020421899389475584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.12356950390141, "step_time": 0.32173962974548337} +{"epoch": 0, "iter": 7152, "iter_tflops": 61.50481669567626, "iter_time": 0.33543866348266604, "loss": 0.0055394163355231285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.62443078944948, "step_time": 0.3050834331512451} +{"epoch": 0, "iter": 7153, "iter_tflops": 45.40668008192327, "iter_time": 0.4543625183105469, "loss": 0.40910962224006653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.76501147250096, "step_time": 0.414570255279541} +{"epoch": 0, "iter": 7154, "iter_tflops": 43.83361910605697, "iter_time": 0.47066826629638675, "loss": 0.4283781349658966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.97361408364456, "step_time": 0.43005084991455084} +{"epoch": 0, "iter": 7155, "iter_tflops": 49.5613371340884, "iter_time": 0.4162739486694336, "loss": 0.4315930902957916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.75351208219375, "step_time": 0.38380921936035156} +{"epoch": 0, "iter": 7156, "iter_tflops": 47.975596341436685, "iter_time": 0.43003308105468746, "loss": 0.386538028717041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00486887323728, "step_time": 0.3967146530151367} +{"epoch": 0, "iter": 7157, "iter_tflops": 24.477657736429776, "iter_time": 0.8428540725708009, "loss": 0.7354884743690491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.796329937234606, "step_time": 0.7997685546875} +{"epoch": 0, "iter": 7158, "iter_tflops": 14.244553208625915, "iter_time": 1.448349639892578, "loss": 0.6849706172943115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.371418585142795, "step_time": 1.0650275001525877} +{"epoch": 0, "iter": 7159, "iter_tflops": 35.816942726527614, "iter_time": 0.5760149230957031, "loss": 0.6125412583351135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.10159154579784, "step_time": 0.5276279735565185} +{"epoch": 0, "iter": 7160, "iter_tflops": 33.858505726786284, "iter_time": 0.609332664489746, "loss": 0.7702419757843018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.583951183118934, "step_time": 0.5639383621215821} +{"epoch": 0, "iter": 7161, "iter_tflops": 28.163093071827372, "iter_time": 0.7325578002929688, "loss": 0.39388880133628845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.359371579319458, "step_time": 0.6578924407958985} +{"epoch": 0, "iter": 7162, "iter_tflops": 37.80331731150526, "iter_time": 0.5457482299804688, "loss": 0.2681367099285126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.64340421737839, "step_time": 0.4954228382110596} +{"epoch": 0, "iter": 7163, "iter_tflops": 40.080234149714094, "iter_time": 0.5147448348999023, "loss": 0.25285109877586365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.854096909038375, "step_time": 0.470448486328125} +{"epoch": 0, "iter": 7164, "iter_tflops": 39.2168956973236, "iter_time": 0.52607666015625, "loss": 0.2253292202949524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.968076569729156, "step_time": 0.4801493377685548} +{"epoch": 0, "iter": 7165, "iter_tflops": 35.86313098910434, "iter_time": 0.5752730712890625, "loss": 0.3231862485408783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74274400924731, "step_time": 0.5191159801483154} +{"epoch": 0, "iter": 7166, "iter_tflops": 37.301142210943944, "iter_time": 0.5530954895019532, "loss": 0.5032090544700623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37775854214996, "step_time": 0.498603458404541} +{"epoch": 0, "iter": 7167, "iter_tflops": 43.04028244994684, "iter_time": 0.479343822479248, "loss": 0.41984015703201294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.9409600145223, "step_time": 0.4395115375518799} +{"epoch": 0, "iter": 7168, "iter_tflops": 43.2937863943907, "iter_time": 0.4765370559692383, "loss": 0.3751935362815857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20637041845333, "step_time": 0.4370404529571533} +{"epoch": 0, "iter": 7169, "iter_tflops": 22.116510485433214, "iter_time": 0.9328367385864258, "loss": 0.6977041959762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.459531135877373, "step_time": 0.8794333267211913} +{"epoch": 0, "iter": 7170, "iter_tflops": 16.98483318632597, "iter_time": 1.2146774291992188, "loss": 0.7241433262825012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.145871961881763, "step_time": 0.9756558418273925} +{"epoch": 0, "iter": 7171, "iter_tflops": 38.64761699488672, "iter_time": 0.5338257598876953, "loss": 0.8009073138237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13153731024688, "step_time": 0.48968290328979497} +{"epoch": 0, "iter": 7172, "iter_tflops": 36.16287945223935, "iter_time": 0.5705047225952149, "loss": 0.9466947913169861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6475001383105, "step_time": 0.5203630352020263} +{"epoch": 0, "iter": 7173, "iter_tflops": 13.82622766413917, "iter_time": 1.273951431274414, "loss": 0.12213259190320969, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 14.80783611318996, "step_time": 1.189501449584961} +{"epoch": 0, "iter": 7174, "iter_tflops": 17.100344189197195, "iter_time": 1.0300343856811522, "loss": 0.09291287511587143, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 19.608750916625084, "step_time": 0.8982694816589356} +{"epoch": 0, "iter": 7175, "iter_tflops": 35.194638204469435, "iter_time": 0.500472328186035, "loss": 0.15148130059242249, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 38.51600084313832, "step_time": 0.45731493759155273} +{"epoch": 0, "iter": 7176, "iter_tflops": 38.32510352486783, "iter_time": 0.4595928230285645, "loss": 0.12718088924884796, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 41.9378486846651, "step_time": 0.4200010986328125} +{"epoch": 0, "iter": 7177, "iter_tflops": 18.491002354699138, "iter_time": 1.1157368927001952, "loss": 0.6864760518074036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.664134109447996, "step_time": 1.0491737594604493} +{"epoch": 0, "iter": 7178, "iter_tflops": 17.150268599440913, "iter_time": 1.2029603729248046, "loss": 0.6636282801628113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.519110744766188, "step_time": 1.0569689254760741} +{"epoch": 0, "iter": 7179, "iter_tflops": 47.16599110485904, "iter_time": 0.437414608001709, "loss": 0.8615068793296814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57075489150314, "step_time": 0.40005413055419925} +{"epoch": 0, "iter": 7180, "iter_tflops": 51.37485772643338, "iter_time": 0.4015795745849609, "loss": 0.7155060768127441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.570951801878614, "step_time": 0.37125679588317867} +{"epoch": 0, "iter": 7181, "iter_tflops": 29.749994557650258, "iter_time": 0.6934822616577149, "loss": 0.09083960950374603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.519153371575758, "step_time": 0.6545573501586913} +{"epoch": 0, "iter": 7182, "iter_tflops": 12.750414904337681, "iter_time": 1.6180723266601564, "loss": 0.06750660389661789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.931896895678992, "step_time": 1.3816793441772461} +{"epoch": 0, "iter": 7183, "iter_tflops": 42.01288560202296, "iter_time": 0.4910658531188965, "loss": 0.08973977714776993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28733412979613, "step_time": 0.44571790313720705} +{"epoch": 0, "iter": 7184, "iter_tflops": 42.025064157295866, "iter_time": 0.4909235458374023, "loss": 0.07080096751451492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0836806852929, "step_time": 0.4476876239776611} +{"epoch": 0, "iter": 7185, "iter_tflops": 22.928412823159583, "iter_time": 0.8998046951293945, "loss": 0.20095369219779968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.13872422736236, "step_time": 0.8206897583007813} +{"epoch": 0, "iter": 7186, "iter_tflops": 44.95842040812943, "iter_time": 0.4588927574157715, "loss": 0.21966925263404846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70876504960677, "step_time": 0.42356018447875976} +{"epoch": 0, "iter": 7187, "iter_tflops": 50.032327397102975, "iter_time": 0.41235526275634765, "loss": 0.18013885617256165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.12258123446744, "step_time": 0.38119197273254396} +{"epoch": 0, "iter": 7188, "iter_tflops": 51.152530705448015, "iter_time": 0.40332498168945313, "loss": 0.19446784257888794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.567875728127795, "step_time": 0.37127734756469727} +{"epoch": 0, "iter": 7189, "iter_tflops": 33.725847085651, "iter_time": 0.6117294387817382, "loss": 0.8430980443954468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.0975671001624, "step_time": 0.5715369529724121} +{"epoch": 0, "iter": 7190, "iter_tflops": 18.592061838090075, "iter_time": 1.1096721649169923, "loss": 0.6953726410865784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.789078619980057, "step_time": 0.9924005718231201} +{"epoch": 0, "iter": 7191, "iter_tflops": 38.359513894345305, "iter_time": 0.5378351135253907, "loss": 0.8361428380012512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71311250693386, "step_time": 0.4945949192047119} +{"epoch": 0, "iter": 7192, "iter_tflops": 37.354426449901716, "iter_time": 0.552306526184082, "loss": 0.8291133046150208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70805639807261, "step_time": 0.5068061542510987} +{"epoch": 0, "iter": 7193, "iter_tflops": 9.370353707329759, "iter_time": 0.989885528564453, "loss": 0.025019966065883636, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 9.993643032739318, "step_time": 0.9281477737426758} +{"epoch": 0, "iter": 7194, "iter_tflops": 11.221106363752595, "iter_time": 0.8266188049316407, "loss": 0.033371347934007645, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 14.186367447625502, "step_time": 0.6538373947143555} +{"epoch": 0, "iter": 7195, "iter_tflops": 23.994470319964428, "iter_time": 0.38657146453857416, "loss": 0.05612284690141678, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 26.29782833704722, "step_time": 0.35271268081665036} +{"epoch": 0, "iter": 7196, "iter_tflops": 23.179767651581216, "iter_time": 0.4001583480834961, "loss": 0.06186678260564804, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 25.470147484183844, "step_time": 0.36417447280883786} +{"epoch": 0, "iter": 7197, "iter_tflops": 30.45472907847241, "iter_time": 0.6774348068237306, "loss": 0.0011768892873078585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.335384940081504, "step_time": 0.6380345726013184} +{"epoch": 0, "iter": 7198, "iter_tflops": 14.298174313627422, "iter_time": 1.4429180297851563, "loss": 0.00846428144723177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.35644044880239, "step_time": 1.1886707744598388} +{"epoch": 0, "iter": 7199, "iter_tflops": 44.595901822071816, "iter_time": 0.46262308120727536, "loss": 0.0008171693189069629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25479381282792, "step_time": 0.4188646812438964} +{"epoch": 0, "iter": 7200, "iter_tflops": 48.09342824816399, "iter_time": 0.42897947311401363, "loss": 0.0061873444356024265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.13777151518632, "step_time": 0.38825665664672854} +{"epoch": 0, "iter": 7201, "iter_tflops": 20.077820067549403, "iter_time": 1.0275564498901366, "loss": 0.06712458282709122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.54671772239185, "step_time": 0.9575051651000976} +{"epoch": 0, "iter": 7202, "iter_tflops": 18.58837526870515, "iter_time": 1.1098922424316406, "loss": 0.08610396087169647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.564235560214225, "step_time": 0.875525686264038} +{"epoch": 0, "iter": 7203, "iter_tflops": 48.82214176200064, "iter_time": 0.4225765762329101, "loss": 0.07389477640390396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53637041544707, "step_time": 0.3853659362792969} +{"epoch": 0, "iter": 7204, "iter_tflops": 46.1746892868089, "iter_time": 0.44680524826049806, "loss": 0.03764086589217186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.148976532464744, "step_time": 0.41139610290527345} +{"epoch": 0, "iter": 7205, "iter_tflops": 26.327303192166575, "iter_time": 0.7836386947631835, "loss": 0.9048715829849243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.792793007176254, "step_time": 0.7423181076049804} +{"epoch": 0, "iter": 7206, "iter_tflops": 10.338043862975479, "iter_time": 1.9956477050781252, "loss": 0.6701793670654297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.550632732961406, "step_time": 1.643828956604004} +{"epoch": 0, "iter": 7207, "iter_tflops": 9.982208011475898, "iter_time": 2.0667865753173826, "loss": 0.7686836123466492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.590663994847384, "step_time": 1.6386025009155274} +{"epoch": 0, "iter": 7208, "iter_tflops": 33.516341009718886, "iter_time": 0.6155532760620116, "loss": 0.851367712020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.121266671269595, "step_time": 0.5411964321136475} +{"epoch": 0, "iter": 7209, "iter_tflops": 12.188857448752806, "iter_time": 1.2029637298583982, "loss": 0.2842468321323395, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 12.98269640591854, "step_time": 1.1294074020385743} +{"epoch": 0, "iter": 7210, "iter_tflops": 22.926378143102227, "iter_time": 0.6395582122802734, "loss": 0.3553428649902344, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.742987622577257, "step_time": 0.592602382659912} +{"epoch": 0, "iter": 7211, "iter_tflops": 26.533923209005444, "iter_time": 0.5526040496826172, "loss": 0.3794662654399872, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.305625388570437, "step_time": 0.518015525817871} +{"epoch": 0, "iter": 7212, "iter_tflops": 23.674503671530424, "iter_time": 0.6193478698730468, "loss": 0.242023766040802, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 25.450468034934513, "step_time": 0.5761290283203125} +{"epoch": 0, "iter": 7213, "iter_tflops": 26.26067783573666, "iter_time": 0.7856268463134767, "loss": 0.41234007477760315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.811217841428956, "step_time": 0.7418263244628907} +{"epoch": 0, "iter": 7214, "iter_tflops": 14.144628269450088, "iter_time": 1.4585815277099607, "loss": 0.25877878069877625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.139318023549563, "step_time": 1.2783125953674315} +{"epoch": 0, "iter": 7215, "iter_tflops": 38.38915781971913, "iter_time": 0.5374197998046875, "loss": 0.35607728362083435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.88999864158421, "step_time": 0.49250642585754395} +{"epoch": 0, "iter": 7216, "iter_tflops": 42.912207767980384, "iter_time": 0.4807744598388672, "loss": 0.25459879636764526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77532328516156, "step_time": 0.4410678977966309} +{"epoch": 0, "iter": 7217, "iter_tflops": 20.22885412339605, "iter_time": 1.0198844375610352, "loss": 0.14830291271209717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.809507427158024, "step_time": 0.9459678802490235} +{"epoch": 0, "iter": 7218, "iter_tflops": 27.230777888846514, "iter_time": 0.757638786315918, "loss": 0.2492779791355133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.059139897824476, "step_time": 0.6642519264221192} +{"epoch": 0, "iter": 7219, "iter_tflops": 49.689519157926654, "iter_time": 0.4152001037597656, "loss": 0.19929921627044678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.97071149160062, "step_time": 0.3822646198272705} +{"epoch": 0, "iter": 7220, "iter_tflops": 52.67473921153009, "iter_time": 0.39166958999633783, "loss": 0.22611266374588013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.06542425593176, "step_time": 0.3615340423583984} +{"epoch": 0, "iter": 7221, "iter_tflops": 31.978387669355875, "iter_time": 0.6451574020385741, "loss": 0.8159461617469788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.189554905637216, "step_time": 0.6034326438903809} +{"epoch": 0, "iter": 7222, "iter_tflops": 12.156200712169486, "iter_time": 1.6971662445068358, "loss": 1.0036097764968872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.016423247417492, "step_time": 1.5850048141479494} +{"epoch": 0, "iter": 7223, "iter_tflops": 11.63710122119905, "iter_time": 1.7728722229003906, "loss": 0.7998945713043213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.708523064072224, "step_time": 1.5049829521179199} +{"epoch": 0, "iter": 7224, "iter_tflops": 36.648276092689756, "iter_time": 0.5629485397338867, "loss": 1.0748385190963745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06321792300955, "step_time": 0.5149634647369384} +{"epoch": 0, "iter": 7225, "iter_tflops": 14.131284002737303, "iter_time": 1.0896573486328125, "loss": 0.5789421796798706, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.89954357250617, "step_time": 1.0334717559814453} +{"epoch": 0, "iter": 7226, "iter_tflops": 11.31756220254529, "iter_time": 1.3605630950927736, "loss": 0.35620832443237305, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 15.57591350849063, "step_time": 0.988594181060791} +{"epoch": 0, "iter": 7227, "iter_tflops": 26.91512267973184, "iter_time": 0.5721043014526368, "loss": 0.3616238534450531, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.6897175382568, "step_time": 0.536716941833496} +{"epoch": 0, "iter": 7228, "iter_tflops": 26.16050607781435, "iter_time": 0.5886070175170899, "loss": 0.271005243062973, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 27.76843653072377, "step_time": 0.5545237464904784} +{"epoch": 0, "iter": 7229, "iter_tflops": 43.878986820656785, "iter_time": 0.4701816291809082, "loss": 0.08013366907835007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01910104366819, "step_time": 0.42964347648620604} +{"epoch": 0, "iter": 7230, "iter_tflops": 36.08313071538089, "iter_time": 0.5717656173706054, "loss": 0.08498738706111908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.459772877739226, "step_time": 0.5099161968231201} +{"epoch": 0, "iter": 7231, "iter_tflops": 43.780469543101844, "iter_time": 0.4712396583557129, "loss": 0.08744747191667557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.945162189933356, "step_time": 0.43030605316162107} +{"epoch": 0, "iter": 7232, "iter_tflops": 42.74612613339007, "iter_time": 0.48264241409301756, "loss": 0.06995786726474762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0782369453483, "step_time": 0.43822995185852054} +{"epoch": 0, "iter": 7233, "iter_tflops": 11.700350169614165, "iter_time": 1.2113540649414063, "loss": 0.26059338450431824, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 12.404202858022472, "step_time": 1.1426181030273437} +{"epoch": 0, "iter": 7234, "iter_tflops": 14.229945722992126, "iter_time": 0.9960169219970703, "loss": 0.15828777849674225, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 18.386797351753586, "step_time": 0.7708393402099609} +{"epoch": 0, "iter": 7235, "iter_tflops": 28.42942001291533, "iter_time": 0.4985422401428223, "loss": 0.3449542820453644, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 31.329171364204026, "step_time": 0.4523983917236328} +{"epoch": 0, "iter": 7236, "iter_tflops": 30.85780770398587, "iter_time": 0.45930893325805666, "loss": 0.1957247406244278, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 33.6526074263029, "step_time": 0.4211639995574951} +{"epoch": 0, "iter": 7237, "iter_tflops": 28.883790725521077, "iter_time": 0.7142792892456056, "loss": 0.8867179155349731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.036753750678557, "step_time": 0.6647310371398926} +{"epoch": 0, "iter": 7238, "iter_tflops": 7.912208294267427, "iter_time": 2.607501312255859, "loss": 0.8284376859664917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.191934579842412, "step_time": 2.2444778442382813} +{"epoch": 0, "iter": 7239, "iter_tflops": 13.088426183950963, "iter_time": 1.5762852783203125, "loss": 0.760980486869812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.306641573168328, "step_time": 1.265195743560791} +{"epoch": 0, "iter": 7240, "iter_tflops": 41.73509024709251, "iter_time": 0.4943344650268555, "loss": 0.6604770421981812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87909740555281, "step_time": 0.4597038421630859} +{"epoch": 0, "iter": 7241, "iter_tflops": 17.48572956186124, "iter_time": 0.8338871459960937, "loss": 0.27943477034568787, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 18.413749605503355, "step_time": 0.791860725402832} +{"epoch": 0, "iter": 7242, "iter_tflops": 11.774468481818467, "iter_time": 1.2383680114746094, "loss": 0.5068195462226868, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 13.784365758255586, "step_time": 1.0578016700744628} +{"epoch": 0, "iter": 7243, "iter_tflops": 25.86574656480522, "iter_time": 0.5637233428955079, "loss": 0.33707377314567566, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.60675596629289, "step_time": 0.5281723480224609} +{"epoch": 0, "iter": 7244, "iter_tflops": 25.479528746937383, "iter_time": 0.5722682418823243, "loss": 0.3161548376083374, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.136173569768246, "step_time": 0.5373316574096679} +{"epoch": 0, "iter": 7245, "iter_tflops": 22.84638475793526, "iter_time": 0.9030353698730469, "loss": 0.4730575978755951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.10650728708892, "step_time": 0.8558308868408203} +{"epoch": 0, "iter": 7246, "iter_tflops": 27.37789835793981, "iter_time": 0.7535674667358397, "loss": 0.5705111026763916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.887147456474914, "step_time": 0.5913665924072264} +{"epoch": 0, "iter": 7247, "iter_tflops": 51.103212577486005, "iter_time": 0.4037142181396484, "loss": 0.48204103112220764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73167319359331, "step_time": 0.37018614959716795} +{"epoch": 0, "iter": 7248, "iter_tflops": 49.06535642808394, "iter_time": 0.4204818840026855, "loss": 0.5814318656921387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27172736656303, "step_time": 0.38728035545349127} +{"epoch": 0, "iter": 7249, "iter_tflops": 27.536310574753408, "iter_time": 0.749232307434082, "loss": 0.3336775302886963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.888771865855652, "step_time": 0.690262336730957} +{"epoch": 0, "iter": 7250, "iter_tflops": 9.424172941999194, "iter_time": 2.189167541503906, "loss": 0.4375661015510559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.055991607781703, "step_time": 1.711273048400879} +{"epoch": 0, "iter": 7251, "iter_tflops": 12.385478324484536, "iter_time": 1.6657486267089843, "loss": 0.45784202218055725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.19290336373272, "step_time": 1.274082420349121} +{"epoch": 0, "iter": 7252, "iter_tflops": 20.623285512095208, "iter_time": 1.0003786010742188, "loss": 0.36753809452056885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.697217975527956, "step_time": 0.802853193283081} +{"epoch": 0, "iter": 7253, "iter_tflops": 16.09841887175253, "iter_time": 0.9768436889648437, "loss": 0.5247138738632202, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.222929161445556, "step_time": 0.9130641326904297} +{"epoch": 0, "iter": 7254, "iter_tflops": 21.823918604411876, "iter_time": 0.7205689849853517, "loss": 0.42274701595306396, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.536274064223065, "step_time": 0.640913890838623} +{"epoch": 0, "iter": 7255, "iter_tflops": 25.082479322108348, "iter_time": 0.6269571151733399, "loss": 0.521678626537323, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.990067447970418, "step_time": 0.582645408630371} +{"epoch": 0, "iter": 7256, "iter_tflops": 24.43625922084293, "iter_time": 0.6435370788574218, "loss": 0.5956202745437622, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.232546975727793, "step_time": 0.5994705314636231} +{"epoch": 0, "iter": 7257, "iter_tflops": 18.40222199283194, "iter_time": 0.7458429489135743, "loss": 0.04560280218720436, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 19.877786810164746, "step_time": 0.6904776496887207} +{"epoch": 0, "iter": 7258, "iter_tflops": 6.209774145300729, "iter_time": 2.210252288818359, "loss": 0.020441224798560143, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 8.17710966069729, "step_time": 1.6784864196777343} +{"epoch": 0, "iter": 7259, "iter_tflops": 7.666743966378059, "iter_time": 1.7902211914062498, "loss": 0.1032063364982605, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 9.97632655781625, "step_time": 1.375773681640625} +{"epoch": 0, "iter": 7260, "iter_tflops": 26.037925004851953, "iter_time": 0.5271221694946289, "loss": 0.02557908184826374, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 29.122401876224217, "step_time": 0.47129242897033685} +{"epoch": 0, "iter": 7261, "iter_tflops": 20.575225169612057, "iter_time": 0.8001541213989258, "loss": 0.4883074164390564, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 21.738073356981673, "step_time": 0.7573509826660155} +{"epoch": 0, "iter": 7262, "iter_tflops": 13.165742374718626, "iter_time": 1.2504688873291014, "loss": 0.3683898448944092, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 18.1633502112313, "step_time": 0.906404987335205} +{"epoch": 0, "iter": 7263, "iter_tflops": 29.603326149895203, "iter_time": 0.5561318054199218, "loss": 0.3420741856098175, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.578198997329167, "step_time": 0.5213518104553222} +{"epoch": 0, "iter": 7264, "iter_tflops": 27.213996417356096, "iter_time": 0.6049589691162109, "loss": 0.4802892506122589, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 28.820731904291993, "step_time": 0.5712329330444337} +{"epoch": 0, "iter": 7265, "iter_tflops": 40.523197638163644, "iter_time": 0.5091181030273437, "loss": 0.47167763113975525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32662042830437, "step_time": 0.4654334869384765} +{"epoch": 0, "iter": 7266, "iter_tflops": 45.05495959410589, "iter_time": 0.4579094886779785, "loss": 0.5459075570106506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85469872327112, "step_time": 0.4222949695587158} +{"epoch": 0, "iter": 7267, "iter_tflops": 45.58210368501836, "iter_time": 0.45261389541625974, "loss": 0.41254693269729614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45996578869092, "step_time": 0.4171271286010742} +{"epoch": 0, "iter": 7268, "iter_tflops": 44.928600341427284, "iter_time": 0.4591973342895508, "loss": 0.3190166652202606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62635070968832, "step_time": 0.42427805519104} +{"epoch": 0, "iter": 7269, "iter_tflops": 25.84260402475125, "iter_time": 0.7983364791870117, "loss": 0.7801192402839661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.256300573890822, "step_time": 0.7569293365478517} +{"epoch": 0, "iter": 7270, "iter_tflops": 14.416542842521203, "iter_time": 1.4310708007812496, "loss": 0.7099079489707947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.082642782049238, "step_time": 1.1409335327148438} +{"epoch": 0, "iter": 7271, "iter_tflops": 34.74632442416867, "iter_time": 0.5937633361816406, "loss": 0.6607335805892944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.801158730595354, "step_time": 0.5457793941497803} +{"epoch": 0, "iter": 7272, "iter_tflops": 35.75335809392257, "iter_time": 0.5770393218994141, "loss": 0.6864739656448364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.52369273658377, "step_time": 0.5355429878234863} +{"epoch": 0, "iter": 7273, "iter_tflops": 14.435339249933111, "iter_time": 1.0638721923828125, "loss": 0.2729661464691162, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 15.372286855879738, "step_time": 0.9990287170410157} +{"epoch": 0, "iter": 7274, "iter_tflops": 10.186209896486154, "iter_time": 1.5076614532470702, "loss": 0.20281441509723663, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.115892058346748, "step_time": 1.267538200378418} +{"epoch": 0, "iter": 7275, "iter_tflops": 22.863257451600145, "iter_time": 0.6717046356201172, "loss": 0.37999671697616577, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.689130243436917, "step_time": 0.6220290412902832} +{"epoch": 0, "iter": 7276, "iter_tflops": 24.176401643441668, "iter_time": 0.6352209167480469, "loss": 0.41640159487724304, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.75081033458667, "step_time": 0.5963834075927734} +{"epoch": 0, "iter": 7277, "iter_tflops": 18.812679373043558, "iter_time": 1.096658966064453, "loss": 0.6863043904304504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.15628153635904, "step_time": 1.023556526184082} +{"epoch": 0, "iter": 7278, "iter_tflops": 23.515126583321006, "iter_time": 0.8773541336059569, "loss": 0.6210793852806091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.117537819943536, "step_time": 0.789932559967041} +{"epoch": 0, "iter": 7279, "iter_tflops": 42.48499657589445, "iter_time": 0.4856089248657226, "loss": 0.7159281969070435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71448785264937, "step_time": 0.4513031749725342} +{"epoch": 0, "iter": 7280, "iter_tflops": 42.93990409801201, "iter_time": 0.4804643592834472, "loss": 0.7869958281517029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98605732460988, "step_time": 0.44863801574707035} +{"epoch": 0, "iter": 7281, "iter_tflops": 37.99896341653131, "iter_time": 0.5429383239746094, "loss": 0.6419326066970825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.09493566115991, "step_time": 0.5020349388122559} +{"epoch": 0, "iter": 7282, "iter_tflops": 35.39972126399692, "iter_time": 0.5828038406372069, "loss": 0.7667599320411682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.48326046431058, "step_time": 0.5361056537628174} +{"epoch": 0, "iter": 7283, "iter_tflops": 38.34936067484554, "iter_time": 0.5379775085449219, "loss": 0.6054538488388062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.443878041496504, "step_time": 0.4978079872131348} +{"epoch": 0, "iter": 7284, "iter_tflops": 38.75093364080809, "iter_time": 0.532402488708496, "loss": 0.7929855585098267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.209948039646704, "step_time": 0.4887732505798339} +{"epoch": 0, "iter": 7285, "iter_tflops": 34.18054251502301, "iter_time": 0.6035917510986328, "loss": 0.17521628737449646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.95358048525129, "step_time": 0.5435875415802002} +{"epoch": 0, "iter": 7286, "iter_tflops": 39.13863762984061, "iter_time": 0.5271285552978515, "loss": 0.17819738388061523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.309907306046625, "step_time": 0.4763596782684326} +{"epoch": 0, "iter": 7287, "iter_tflops": 43.75231289665147, "iter_time": 0.47154292297363287, "loss": 0.19947302341461182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.984929154699834, "step_time": 0.42994944190979006} +{"epoch": 0, "iter": 7288, "iter_tflops": 39.975914868621274, "iter_time": 0.5160880889892577, "loss": 0.18302161991596222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44900432722985, "step_time": 0.4748346672058105} +{"epoch": 0, "iter": 7289, "iter_tflops": 30.689439897267174, "iter_time": 0.6722538299560548, "loss": 0.32832983136177063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.15567576958019, "step_time": 0.6040311908721924} +{"epoch": 0, "iter": 7290, "iter_tflops": 38.74074867922207, "iter_time": 0.5325424575805665, "loss": 0.21675419807434082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41704322130657, "step_time": 0.486386884689331} +{"epoch": 0, "iter": 7291, "iter_tflops": 40.68625849166302, "iter_time": 0.50707767868042, "loss": 0.3277451694011688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.369489815579996, "step_time": 0.46498378944396973} +{"epoch": 0, "iter": 7292, "iter_tflops": 40.38320499480459, "iter_time": 0.5108830146789551, "loss": 0.2741147577762604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84248049702256, "step_time": 0.47057313537597656} +{"epoch": 0, "iter": 7293, "iter_tflops": 16.915277812827767, "iter_time": 1.2196721649169922, "loss": 0.09687206149101257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.064848697510293, "step_time": 1.1420573654174806} +{"epoch": 0, "iter": 7294, "iter_tflops": 28.314983026054865, "iter_time": 0.7286281433105469, "loss": 0.09981094300746918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.52106377768819, "step_time": 0.6545176792144776} +{"epoch": 0, "iter": 7295, "iter_tflops": 40.104048007446984, "iter_time": 0.5144391784667969, "loss": 0.12207263708114624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94896858074246, "step_time": 0.46943293952941895} +{"epoch": 0, "iter": 7296, "iter_tflops": 40.246441536486145, "iter_time": 0.5126190719604493, "loss": 0.15537451207637787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04665173243111, "step_time": 0.4683918685913086} +{"epoch": 0, "iter": 7297, "iter_tflops": 19.673567220580374, "iter_time": 1.048670700073242, "loss": 0.23269033432006836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.209978572524328, "step_time": 0.9727069473266601} +{"epoch": 0, "iter": 7298, "iter_tflops": 25.870829158980335, "iter_time": 0.7974654922485351, "loss": 0.22441034018993378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.03187857089673, "step_time": 0.7106358432769775} +{"epoch": 0, "iter": 7299, "iter_tflops": 52.056222128606, "iter_time": 0.39632329559326174, "loss": 0.22878625988960266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.32358987330448, "step_time": 0.36629578399658197} +{"epoch": 0, "iter": 7300, "iter_tflops": 52.11325821591973, "iter_time": 0.395889533996582, "loss": 0.2318163514137268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.13181610219558, "step_time": 0.36754722976684573} +{"epoch": 0, "iter": 7301, "iter_tflops": 38.54686681202043, "iter_time": 0.5352210235595704, "loss": 0.1120995506644249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7372240221319, "step_time": 0.49430919265747064} +{"epoch": 0, "iter": 7302, "iter_tflops": 10.530105856406147, "iter_time": 1.9592484436035156, "loss": 0.10536167025566101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.237334068870785, "step_time": 1.5585535125732422} +{"epoch": 0, "iter": 7303, "iter_tflops": 15.860896467209034, "iter_time": 1.3007520446777345, "loss": 0.1355873942375183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.755611752084334, "step_time": 1.0999957656860353} +{"epoch": 0, "iter": 7304, "iter_tflops": 12.081844548854905, "iter_time": 1.7076112365722655, "loss": 0.12375517934560776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.180134977952052, "step_time": 1.4549292755126952} +{"epoch": 0, "iter": 7305, "iter_tflops": 16.813891442222328, "iter_time": 1.0255630722045899, "loss": 0.3558359146118164, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 18.14771292786173, "step_time": 0.9501861877441407} +{"epoch": 0, "iter": 7306, "iter_tflops": 10.256224172722403, "iter_time": 1.6812918548583984, "loss": 0.2958487868309021, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 12.648422558735895, "step_time": 1.3633088302612304} +{"epoch": 0, "iter": 7307, "iter_tflops": 30.84659530924769, "iter_time": 0.559014892578125, "loss": 0.4341573119163513, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 32.86137998889948, "step_time": 0.524740779876709} +{"epoch": 0, "iter": 7308, "iter_tflops": 31.57261701211867, "iter_time": 0.5461601791381835, "loss": 0.3830113112926483, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 33.51662066427285, "step_time": 0.5144822425842286} +{"epoch": 0, "iter": 7309, "iter_tflops": 20.81438834364127, "iter_time": 0.9911938400268555, "loss": 0.08860291540622711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.774707381586346, "step_time": 0.9474797134399414} +{"epoch": 0, "iter": 7310, "iter_tflops": 13.832754706746211, "iter_time": 1.491466735839844, "loss": 0.09724588692188263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.373739656868825, "step_time": 1.2600110874176025} +{"epoch": 0, "iter": 7311, "iter_tflops": 49.842348513294894, "iter_time": 0.4139269943237304, "loss": 0.08101899176836014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51484191485683, "step_time": 0.3784491119384766} +{"epoch": 0, "iter": 7312, "iter_tflops": 50.6083062476554, "iter_time": 0.40766220092773436, "loss": 0.08943098038434982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.80970935455211, "step_time": 0.37641311645507813} +{"epoch": 0, "iter": 7313, "iter_tflops": 30.579218722629182, "iter_time": 0.6746769332885743, "loss": 0.8789722919464111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55364687567549, "step_time": 0.63375675201416} +{"epoch": 0, "iter": 7314, "iter_tflops": 14.793905949879038, "iter_time": 1.3945670318603518, "loss": 0.6781256794929504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.00388142701358, "step_time": 1.289130615234375} +{"epoch": 0, "iter": 7315, "iter_tflops": 35.77067218238516, "iter_time": 0.5767600173950196, "loss": 0.8016079664230347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.95723281780943, "step_time": 0.5295831356048584} +{"epoch": 0, "iter": 7316, "iter_tflops": 37.379247697787655, "iter_time": 0.5519397735595704, "loss": 0.7581213712692261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56227410596862, "step_time": 0.5086276340484619} +{"epoch": 0, "iter": 7317, "iter_tflops": 17.2520364569872, "iter_time": 1.1958642425537112, "loss": 0.21791042387485504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.776587603667434, "step_time": 1.0987669296264648} +{"epoch": 0, "iter": 7318, "iter_tflops": 33.821330676466786, "iter_time": 0.6100024185180664, "loss": 0.17873118817806244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34616129282722, "step_time": 0.4872010326385498} +{"epoch": 0, "iter": 7319, "iter_tflops": 40.19116149774744, "iter_time": 0.5133241424560547, "loss": 0.18367569148540497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.02139572407859, "step_time": 0.4686605949401855} +{"epoch": 0, "iter": 7320, "iter_tflops": 43.887497602970264, "iter_time": 0.4700904502868652, "loss": 0.26105743646621704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.982172201247856, "step_time": 0.42997414588928223} +{"epoch": 0, "iter": 7321, "iter_tflops": 20.165716281767786, "iter_time": 1.0230776443481444, "loss": 0.7375887036323547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.501948166231156, "step_time": 0.9594988021850586} +{"epoch": 0, "iter": 7322, "iter_tflops": 16.537925828658302, "iter_time": 1.2475018768310548, "loss": 0.8600667119026184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.410479113899854, "step_time": 1.0628843002319337} +{"epoch": 0, "iter": 7323, "iter_tflops": 46.68665630127637, "iter_time": 0.4419055709838867, "loss": 0.8395869135856628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34307215838125, "step_time": 0.4098099822998047} +{"epoch": 0, "iter": 7324, "iter_tflops": 43.51913521869344, "iter_time": 0.47406947326660154, "loss": 0.8329782485961914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12067758279142, "step_time": 0.43783524703979493} +{"epoch": 0, "iter": 7325, "iter_tflops": 28.684787680369336, "iter_time": 0.5311065979003906, "loss": 0.12271115183830261, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 31.15730502493525, "step_time": 0.48896013259887694} +{"epoch": 0, "iter": 7326, "iter_tflops": 28.631543932412743, "iter_time": 0.5320942535400391, "loss": 0.1633421778678894, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 31.607973752551082, "step_time": 0.48198850440979} +{"epoch": 0, "iter": 7327, "iter_tflops": 33.26555789008256, "iter_time": 0.4579715766906738, "loss": 0.1383959949016571, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 36.579227030903766, "step_time": 0.41648447036743164} +{"epoch": 0, "iter": 7328, "iter_tflops": 30.707345434123244, "iter_time": 0.496124942779541, "loss": 0.14445658028125763, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 33.523735285769284, "step_time": 0.4544445858001709} +{"epoch": 0, "iter": 7329, "iter_tflops": 20.13964001345401, "iter_time": 1.0244022979736327, "loss": 0.7073160409927368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.677948627732466, "step_time": 0.9517087554931641} +{"epoch": 0, "iter": 7330, "iter_tflops": 17.131185469927757, "iter_time": 1.2043003997802735, "loss": 0.6452150344848633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.333643547345527, "step_time": 0.9237674751281738} +{"epoch": 0, "iter": 7331, "iter_tflops": 47.20928104024427, "iter_time": 0.43701350784301757, "loss": 0.6850001811981201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62141754138287, "step_time": 0.399661506652832} +{"epoch": 0, "iter": 7332, "iter_tflops": 47.73607328719434, "iter_time": 0.43219083786010737, "loss": 0.6783084273338318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74858401221799, "step_time": 0.39867938232421873} +{"epoch": 0, "iter": 7333, "iter_tflops": 32.2227955066124, "iter_time": 0.5939906997680664, "loss": 0.04899941384792328, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 34.408445381681325, "step_time": 0.5562599716186524} +{"epoch": 0, "iter": 7334, "iter_tflops": 7.016617165716907, "iter_time": 2.7278160400390625, "loss": 0.07056035101413727, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 8.376619591203417, "step_time": 2.2849361419677736} +{"epoch": 0, "iter": 7335, "iter_tflops": 10.069418713593791, "iter_time": 1.90080891418457, "loss": 0.05530498921871185, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 12.203472884583789, "step_time": 1.5684093399047851} +{"epoch": 0, "iter": 7336, "iter_tflops": 31.793352848274075, "iter_time": 0.602013916015625, "loss": 0.04443339630961418, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 40.38082924690841, "step_time": 0.4739883060455322} +{"epoch": 0, "iter": 7337, "iter_tflops": 16.241942967710166, "iter_time": 1.1022183990478516, "loss": 0.5254307389259338, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 17.579987791240207, "step_time": 1.0183265533447265} +{"epoch": 0, "iter": 7338, "iter_tflops": 27.03862501677386, "iter_time": 0.6620961074829103, "loss": 0.19302700459957123, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 29.261566046481004, "step_time": 0.6117980270385742} +{"epoch": 0, "iter": 7339, "iter_tflops": 25.589861413540422, "iter_time": 0.6995805130004884, "loss": 0.45453014969825745, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 27.486449101317177, "step_time": 0.651308879852295} +{"epoch": 0, "iter": 7340, "iter_tflops": 29.101302299641976, "iter_time": 0.6151672592163085, "loss": 0.4339691996574402, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 31.188768959107403, "step_time": 0.573994068145752} +{"epoch": 0, "iter": 7341, "iter_tflops": 18.54101998567475, "iter_time": 0.9056521224975586, "loss": 0.01262929942458868, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 20.20589374198946, "step_time": 0.8310305061340331} +{"epoch": 0, "iter": 7342, "iter_tflops": 13.618107931920733, "iter_time": 1.2330431060791016, "loss": 0.014761192724108696, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 16.34782841143859, "step_time": 1.0271525783538817} +{"epoch": 0, "iter": 7343, "iter_tflops": 35.17324284307658, "iter_time": 0.47740022659301756, "loss": 0.0034375041723251343, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 39.021939720918326, "step_time": 0.4303146953582763} +{"epoch": 0, "iter": 7344, "iter_tflops": 37.23466190263909, "iter_time": 0.4509699630737305, "loss": 0.002826737007126212, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 41.179867592532624, "step_time": 0.40776513099670403} +{"epoch": 0, "iter": 7345, "iter_tflops": 27.906655245164934, "iter_time": 0.7392893676757812, "loss": 0.006853431463241577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.455816385197917, "step_time": 0.6774106216430664} +{"epoch": 0, "iter": 7346, "iter_tflops": 18.408417036306016, "iter_time": 1.120742401123047, "loss": 0.008329108357429504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.257524699065193, "step_time": 0.8505028343200683} +{"epoch": 0, "iter": 7347, "iter_tflops": 56.25214510732297, "iter_time": 0.3667610092163086, "loss": 0.011005650274455547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.39878533865958, "step_time": 0.3306329345703125} +{"epoch": 0, "iter": 7348, "iter_tflops": 51.619741864735936, "iter_time": 0.3996744804382324, "loss": 0.005282315891236067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.51088359072478, "step_time": 0.365081771850586} +{"epoch": 0, "iter": 7349, "iter_tflops": 33.96853558305059, "iter_time": 0.6073589324951172, "loss": 0.004653533920645714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.34270800198698, "step_time": 0.5676817893981934} +{"epoch": 0, "iter": 7350, "iter_tflops": 29.940164246282944, "iter_time": 0.6890774993896485, "loss": 0.01258890051394701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2080718130809, "step_time": 0.4563586254119873} +{"epoch": 0, "iter": 7351, "iter_tflops": 58.840334818448085, "iter_time": 0.35062841796875, "loss": 0.017665274441242218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.66750087538361, "step_time": 0.31903341293334964} +{"epoch": 0, "iter": 7352, "iter_tflops": 58.214675083365904, "iter_time": 0.3543967819213867, "loss": 0.013311224058270454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.64463599961945, "step_time": 0.324160758972168} +{"epoch": 0, "iter": 7353, "iter_tflops": 28.998149134043675, "iter_time": 0.7114624252319336, "loss": 0.8202030658721924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.81795463735186, "step_time": 0.6694504470825196} +{"epoch": 0, "iter": 7354, "iter_tflops": 15.980026709434812, "iter_time": 1.2910550079345704, "loss": 0.8723946809768677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.75443049619959, "step_time": 0.9940573177337646} +{"epoch": 0, "iter": 7355, "iter_tflops": 44.79289266899569, "iter_time": 0.46058855056762693, "loss": 0.800040602684021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.684247707202054, "step_time": 0.4237734889984131} +{"epoch": 0, "iter": 7356, "iter_tflops": 45.36140338069898, "iter_time": 0.45481603240966795, "loss": 0.8938106298446655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.045285232047036, "step_time": 0.42065396118164067} +{"epoch": 0, "iter": 7357, "iter_tflops": 43.11572263369334, "iter_time": 0.4785051078796387, "loss": 0.08247406035661697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54919391876039, "step_time": 0.4338894481658936} +{"epoch": 0, "iter": 7358, "iter_tflops": 49.0158531161132, "iter_time": 0.42090654754638673, "loss": 0.13311238586902618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73484209720305, "step_time": 0.3701651020050049} +{"epoch": 0, "iter": 7359, "iter_tflops": 53.11241427494236, "iter_time": 0.3884420204162598, "loss": 0.08963233232498169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.941377706430366, "step_time": 0.3560683975219726} +{"epoch": 0, "iter": 7360, "iter_tflops": 48.42548403725562, "iter_time": 0.42603794097900394, "loss": 0.08661964535713196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51501783474198, "step_time": 0.39286083030700686} +{"epoch": 0, "iter": 7361, "iter_tflops": 36.09766660517158, "iter_time": 0.5715353775024413, "loss": 0.036631714552640915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.763447886360254, "step_time": 0.5322306098937989} +{"epoch": 0, "iter": 7362, "iter_tflops": 12.118763186444268, "iter_time": 1.702409164428711, "loss": 0.046271342784166336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.872746645038395, "step_time": 1.2997809371948243} +{"epoch": 0, "iter": 7363, "iter_tflops": 10.49031837893373, "iter_time": 1.9666794433593748, "loss": 0.04711632803082466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.249828586355278, "step_time": 1.684194465637207} +{"epoch": 0, "iter": 7364, "iter_tflops": 17.599307693767912, "iter_time": 1.172267333984375, "loss": 0.027310743927955627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.85998265927637, "step_time": 1.0388273677825928} +{"epoch": 0, "iter": 7365, "iter_tflops": 18.466260794938343, "iter_time": 0.8671135711669922, "loss": 0.41932642459869385, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 19.453721645285206, "step_time": 0.823099334716797} +{"epoch": 0, "iter": 7366, "iter_tflops": 14.407755250964717, "iter_time": 1.1113698883056644, "loss": 0.4492841362953186, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 18.779037472486404, "step_time": 0.8526712493896484} +{"epoch": 0, "iter": 7367, "iter_tflops": 23.56760400688174, "iter_time": 0.6794218597412109, "loss": 0.29870301485061646, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.362737797978724, "step_time": 0.6313334732055664} +{"epoch": 0, "iter": 7368, "iter_tflops": 24.894460112972364, "iter_time": 0.6432091827392579, "loss": 0.5180456042289734, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.72984136118686, "step_time": 0.5990437850952148} +{"epoch": 0, "iter": 7369, "iter_tflops": 19.907134607263124, "iter_time": 1.0363668060302735, "loss": 0.5320491194725037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.284906091385754, "step_time": 0.9692828063964843} +{"epoch": 0, "iter": 7370, "iter_tflops": 17.74622008703036, "iter_time": 1.1625626983642579, "loss": 0.6605730056762695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.426289282833924, "step_time": 0.9628869113922119} +{"epoch": 0, "iter": 7371, "iter_tflops": 43.52288077654934, "iter_time": 0.4740286750793457, "loss": 0.580300509929657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.587001107482784, "step_time": 0.43354472923278803} +{"epoch": 0, "iter": 7372, "iter_tflops": 40.023058212439715, "iter_time": 0.5154801864624023, "loss": 0.894536554813385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26515885841794, "step_time": 0.4768523693084717} +{"epoch": 0, "iter": 7373, "iter_tflops": 14.300538302027476, "iter_time": 1.442679504394531, "loss": 0.10471145808696747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.124595490795985, "step_time": 1.3640757217407224} +{"epoch": 0, "iter": 7374, "iter_tflops": 17.084511274500354, "iter_time": 1.2075904998779297, "loss": 0.07543092221021652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.174396950050927, "step_time": 0.9743414916992188} +{"epoch": 0, "iter": 7375, "iter_tflops": 38.87483967134912, "iter_time": 0.5307055587768555, "loss": 0.09339924156665802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09063837734028, "step_time": 0.47878365898132325} +{"epoch": 0, "iter": 7376, "iter_tflops": 45.550023650623444, "iter_time": 0.4529326629638672, "loss": 0.07628713548183441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.909498912042224, "step_time": 0.41337007904052736} +{"epoch": 0, "iter": 7377, "iter_tflops": 9.201024185734617, "iter_time": 0.9121279067993163, "loss": 0.0510130375623703, "lr": 3e-05, "seqlen": 3392.0, "step_tflops": 9.756167237656713, "step_time": 0.8602262268066406} +{"epoch": 0, "iter": 7378, "iter_tflops": 6.277235166411086, "iter_time": 1.3369757080078126, "loss": 0.07026641070842743, "lr": 3e-05, "seqlen": 3392.0, "step_tflops": 7.6841763826638045, "step_time": 1.0921809329986572} +{"epoch": 0, "iter": 7379, "iter_tflops": 17.393440074951688, "iter_time": 0.48251012420654293, "loss": 0.0884428322315216, "lr": 3e-05, "seqlen": 3392.0, "step_tflops": 19.194203792695436, "step_time": 0.4372419414520264} +{"epoch": 0, "iter": 7380, "iter_tflops": 18.412878839872686, "iter_time": 0.45579569625854494, "loss": 0.08951334655284882, "lr": 3e-05, "seqlen": 3392.0, "step_tflops": 20.11599539464405, "step_time": 0.41720584869384764} +{"epoch": 0, "iter": 7381, "iter_tflops": 24.21475797660744, "iter_time": 0.8520049438476562, "loss": 0.5172075033187866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.20078521646598, "step_time": 0.7874227180480958} +{"epoch": 0, "iter": 7382, "iter_tflops": 18.401414630269468, "iter_time": 1.1211688842773437, "loss": 0.3609999716281891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.06469591832706, "step_time": 0.9350273208618165} +{"epoch": 0, "iter": 7383, "iter_tflops": 48.13707824639608, "iter_time": 0.4285904808044434, "loss": 0.4843236804008484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.25503149440031, "step_time": 0.39481544494628906} +{"epoch": 0, "iter": 7384, "iter_tflops": 45.918798924398196, "iter_time": 0.44929514694213873, "loss": 0.40632838010787964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62622307454859, "step_time": 0.4157296733856201} +{"epoch": 0, "iter": 7385, "iter_tflops": 29.371470883122512, "iter_time": 0.7024194869995116, "loss": 0.23455466330051422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.178710928448048, "step_time": 0.6617045059204102} +{"epoch": 0, "iter": 7386, "iter_tflops": 14.449776510564924, "iter_time": 1.4277794189453124, "loss": 0.40688395500183105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.489565461713426, "step_time": 1.115823600769043} +{"epoch": 0, "iter": 7387, "iter_tflops": 40.60034611808241, "iter_time": 0.5081506805419922, "loss": 0.19868023693561554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44548483628806, "step_time": 0.46418873786926274} +{"epoch": 0, "iter": 7388, "iter_tflops": 42.253436152977834, "iter_time": 0.4882701950073242, "loss": 0.3619198203086853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14798820443012, "step_time": 0.4470637683868408} +{"epoch": 0, "iter": 7389, "iter_tflops": 21.73405693766603, "iter_time": 0.949251838684082, "loss": 0.38102179765701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.405396260174747, "step_time": 0.8814673881530761} +{"epoch": 0, "iter": 7390, "iter_tflops": 25.762940216619274, "iter_time": 0.8008050842285156, "loss": 0.2570621967315674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.062452357131207, "step_time": 0.7098882522583009} +{"epoch": 0, "iter": 7391, "iter_tflops": 50.91393123858768, "iter_time": 0.40521509552001944, "loss": 0.36608511209487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.32359233347972, "step_time": 0.3729167366027831} +{"epoch": 0, "iter": 7392, "iter_tflops": 47.295613346378076, "iter_time": 0.43621579360961915, "loss": 0.26805204153060913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.11081511809251, "step_time": 0.403654167175293} +{"epoch": 0, "iter": 7393, "iter_tflops": 26.71402643906699, "iter_time": 0.7722944183349609, "loss": 0.4528340697288513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.277621220732826, "step_time": 0.7295908432006837} +{"epoch": 0, "iter": 7394, "iter_tflops": 15.647721396458175, "iter_time": 1.3184727020263671, "loss": 0.5329002737998962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.865595820393892, "step_time": 1.0935829277038573} +{"epoch": 0, "iter": 7395, "iter_tflops": 44.12262633301632, "iter_time": 0.4675853462219238, "loss": 0.593628466129303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.44983903824427, "step_time": 0.42582377815246586} +{"epoch": 0, "iter": 7396, "iter_tflops": 39.989779078628565, "iter_time": 0.5159091644287109, "loss": 0.5402450561523438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37769105360587, "step_time": 0.4756152992248535} +{"epoch": 0, "iter": 7397, "iter_tflops": 16.71482066835898, "iter_time": 1.2342994232177735, "loss": 0.6796079277992249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.03310470708965, "step_time": 1.1440677490234374} +{"epoch": 0, "iter": 7398, "iter_tflops": 21.433701174207762, "iter_time": 0.9625539398193359, "loss": 0.6373076438903809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.083395849138046, "step_time": 0.7346367092132567} +{"epoch": 0, "iter": 7399, "iter_tflops": 49.26689125624385, "iter_time": 0.4187618293762207, "loss": 0.5721290707588196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18410237090306, "step_time": 0.3879184303283691} +{"epoch": 0, "iter": 7400, "iter_tflops": 47.05377461989681, "iter_time": 0.4384577789306641, "loss": 0.6233492493629456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.913121706927846, "step_time": 0.40522153854370113} +{"epoch": 0, "iter": 7401, "iter_tflops": 33.15712152651525, "iter_time": 0.6222220916748047, "loss": 0.19326259195804596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.64576973523115, "step_time": 0.5787809791564942} +{"epoch": 0, "iter": 7402, "iter_tflops": 16.49575462112424, "iter_time": 1.2506911010742188, "loss": 0.20340411365032196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.981955730496864, "step_time": 1.0868792343139648} +{"epoch": 0, "iter": 7403, "iter_tflops": 35.740702732817695, "iter_time": 0.5772436447143554, "loss": 0.20099468529224396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.05789463814668, "step_time": 0.5282182693481444} +{"epoch": 0, "iter": 7404, "iter_tflops": 43.80871142393182, "iter_time": 0.47093586730957027, "loss": 0.19808754324913025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.930866486597246, "step_time": 0.4304343948364258} +{"epoch": 0, "iter": 7405, "iter_tflops": 16.711699428563843, "iter_time": 1.2345299530029297, "loss": 0.24361099302768707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.78079358245702, "step_time": 1.160302177429199} +{"epoch": 0, "iter": 7406, "iter_tflops": 15.522660109578704, "iter_time": 1.329095230102539, "loss": 0.18880654871463776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.400604123351062, "step_time": 1.1212182693481445} +{"epoch": 0, "iter": 7407, "iter_tflops": 40.96062119682873, "iter_time": 0.5036811676025391, "loss": 0.20206311345100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93987396578506, "step_time": 0.4590821399688721} +{"epoch": 0, "iter": 7408, "iter_tflops": 38.955667458578475, "iter_time": 0.5296044158935547, "loss": 0.21665461361408234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67910484223551, "step_time": 0.483400333404541} +{"epoch": 0, "iter": 7409, "iter_tflops": 19.76314882331238, "iter_time": 1.0439173278808593, "loss": 0.025774208828806877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.043657209739283, "step_time": 0.9803948669433593} +{"epoch": 0, "iter": 7410, "iter_tflops": 17.215950078212714, "iter_time": 1.1983708953857422, "loss": 0.041069284081459045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.744646061572976, "step_time": 0.8337598953247071} +{"epoch": 0, "iter": 7411, "iter_tflops": 52.92679320311097, "iter_time": 0.3898043365478515, "loss": 0.010921704582870007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.043865731394504, "step_time": 0.35543968772888185} +{"epoch": 0, "iter": 7412, "iter_tflops": 51.19132852673444, "iter_time": 0.4030193023681641, "loss": 0.03821001201868057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.813034017484384, "step_time": 0.3696465148925781} +{"epoch": 0, "iter": 7413, "iter_tflops": 41.75378695643302, "iter_time": 0.4941131095886231, "loss": 0.42663633823394775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2711195637807, "step_time": 0.45572306823730463} +{"epoch": 0, "iter": 7414, "iter_tflops": 43.398131525768065, "iter_time": 0.47539128494262695, "loss": 0.4798589050769806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.253956181309405, "step_time": 0.4366003437042236} +{"epoch": 0, "iter": 7415, "iter_tflops": 46.12912150770729, "iter_time": 0.44724661636352536, "loss": 0.44614365696907043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05794044408173, "step_time": 0.4121442737579346} +{"epoch": 0, "iter": 7416, "iter_tflops": 45.99248294194599, "iter_time": 0.4485753364562989, "loss": 0.3542257845401764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67541302443841, "step_time": 0.4153180065155029} +{"epoch": 0, "iter": 7417, "iter_tflops": 32.757476262115404, "iter_time": 0.6298132781982423, "loss": 0.09171881526708603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.979740087345334, "step_time": 0.5898012237548828} +{"epoch": 0, "iter": 7418, "iter_tflops": 11.549698260747395, "iter_time": 1.7862885284423826, "loss": 0.0645039901137352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.028197884379814, "step_time": 1.4706873741149902} +{"epoch": 0, "iter": 7419, "iter_tflops": 52.3813188642873, "iter_time": 0.3938635749816895, "loss": 0.13341006636619568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.65375806539661, "step_time": 0.3578447303771972} +{"epoch": 0, "iter": 7420, "iter_tflops": 50.20906817923741, "iter_time": 0.41090373229980465, "loss": 0.04612908512353897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.669159922534774, "step_time": 0.3773808403015137} +{"epoch": 0, "iter": 7421, "iter_tflops": 45.14365894413885, "iter_time": 0.4570097770690918, "loss": 0.21161071956157684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6381556126002, "step_time": 0.4156297359466553} +{"epoch": 0, "iter": 7422, "iter_tflops": 48.1474729051992, "iter_time": 0.42849795150756836, "loss": 0.23424065113067627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.078830993250314, "step_time": 0.38868778991699215} +{"epoch": 0, "iter": 7423, "iter_tflops": 50.30840766967308, "iter_time": 0.4100923576354981, "loss": 0.18163999915122986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.61092363421206, "step_time": 0.37778327369689946} +{"epoch": 0, "iter": 7424, "iter_tflops": 49.785324224312205, "iter_time": 0.4144011077880859, "loss": 0.3092118501663208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85930326845233, "step_time": 0.38305533599853514} +{"epoch": 0, "iter": 7425, "iter_tflops": 34.49938522949567, "iter_time": 0.5980133666992188, "loss": 0.047674525529146194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.94454426833494, "step_time": 0.5584341049194336} +{"epoch": 0, "iter": 7426, "iter_tflops": 13.17385137217972, "iter_time": 1.5660639343261717, "loss": 0.05632917955517769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.595640871775334, "step_time": 1.1725116271972655} +{"epoch": 0, "iter": 7427, "iter_tflops": 52.00817100740187, "iter_time": 0.39668946456909177, "loss": 0.019215833395719528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.95161834431175, "step_time": 0.36225649261474613} +{"epoch": 0, "iter": 7428, "iter_tflops": 54.893912950378635, "iter_time": 0.3758357238769532, "loss": 0.05099669098854065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.07318420640476, "step_time": 0.34343266105651854} +{"epoch": 0, "iter": 7429, "iter_tflops": 27.862217227574465, "iter_time": 0.7404684753417968, "loss": 0.04631337895989418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.414360709715076, "step_time": 0.7013952713012696} +{"epoch": 0, "iter": 7430, "iter_tflops": 12.375933914274867, "iter_time": 1.667033264160156, "loss": 0.07949716597795486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.503314024813253, "step_time": 1.3307537651062011} +{"epoch": 0, "iter": 7431, "iter_tflops": 51.737090721119344, "iter_time": 0.3987679481506348, "loss": 0.07231039553880692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.56277673723311, "step_time": 0.36474682998657226} +{"epoch": 0, "iter": 7432, "iter_tflops": 50.81969220355613, "iter_time": 0.40596651840209963, "loss": 0.05327253416180611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.37751180077634, "step_time": 0.37255363845825196} +{"epoch": 0, "iter": 7433, "iter_tflops": 44.98624325320347, "iter_time": 0.45860894393920904, "loss": 0.43887802958488464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83503913159339, "step_time": 0.42246497344970707} +{"epoch": 0, "iter": 7434, "iter_tflops": 43.82323965374372, "iter_time": 0.4707797431945801, "loss": 0.5326967835426331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.81341606233778, "step_time": 0.4314917278289795} +{"epoch": 0, "iter": 7435, "iter_tflops": 44.22928927942892, "iter_time": 0.466457721710205, "loss": 0.6151560544967651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73755370615924, "step_time": 0.4321774349212646} +{"epoch": 0, "iter": 7436, "iter_tflops": 49.71844562308684, "iter_time": 0.41495853805541993, "loss": 0.5881710648536682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6634786807222, "step_time": 0.3844531517028809} +{"epoch": 0, "iter": 7437, "iter_tflops": 38.797195238750085, "iter_time": 0.5317676544189454, "loss": 0.1363489031791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.928576668075976, "step_time": 0.49205327606201166} +{"epoch": 0, "iter": 7438, "iter_tflops": 35.72066604937117, "iter_time": 0.5775674362182617, "loss": 0.1989474892616272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.39162149318216, "step_time": 0.45451325225830075} +{"epoch": 0, "iter": 7439, "iter_tflops": 51.86921168098596, "iter_time": 0.3977522087097168, "loss": 0.1344311535358429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.642652751592806, "step_time": 0.3642324733734131} +{"epoch": 0, "iter": 7440, "iter_tflops": 40.22228242824322, "iter_time": 0.5129269714355469, "loss": 0.11542879790067673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.860107145325934, "step_time": 0.4703840198516846} +{"epoch": 0, "iter": 7441, "iter_tflops": 45.75328728079951, "iter_time": 0.4509204635620117, "loss": 0.3899471163749695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04685372930882, "step_time": 0.41223557472229005} +{"epoch": 0, "iter": 7442, "iter_tflops": 44.34983882406342, "iter_time": 0.4651898193359375, "loss": 0.3482000231742859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.51412809319633, "step_time": 0.42525949287414555} +{"epoch": 0, "iter": 7443, "iter_tflops": 45.63912739128868, "iter_time": 0.4520483779907226, "loss": 0.41514915227890015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.56718897003392, "step_time": 0.41622480392456057} +{"epoch": 0, "iter": 7444, "iter_tflops": 46.181334930485214, "iter_time": 0.4467409515380859, "loss": 0.29605454206466675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.757309597648366, "step_time": 0.41463442611694334} +{"epoch": 0, "iter": 7445, "iter_tflops": 26.008401322491878, "iter_time": 0.7932472763061522, "loss": 0.6087126135826111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.3695008756066, "step_time": 0.7537986755371093} +{"epoch": 0, "iter": 7446, "iter_tflops": 16.388630762263183, "iter_time": 1.2588662109375002, "loss": 0.46492528915405273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.369567729901696, "step_time": 1.0651292686462401} +{"epoch": 0, "iter": 7447, "iter_tflops": 35.474821891970024, "iter_time": 0.5815700378417968, "loss": 0.6164616942405701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76314213439985, "step_time": 0.5322348079681397} +{"epoch": 0, "iter": 7448, "iter_tflops": 40.09591805043728, "iter_time": 0.5145434875488282, "loss": 0.6322943568229675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73265158866205, "step_time": 0.471754919052124} +{"epoch": 0, "iter": 7449, "iter_tflops": 25.744590383729687, "iter_time": 0.8013758697509767, "loss": 0.3553810715675354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.06010820357189, "step_time": 0.7352463989257814} +{"epoch": 0, "iter": 7450, "iter_tflops": 44.82071153198479, "iter_time": 0.460302677154541, "loss": 0.5698668956756592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74344759146249, "step_time": 0.40657650375366217} +{"epoch": 0, "iter": 7451, "iter_tflops": 51.36362397274688, "iter_time": 0.4016674041748047, "loss": 0.26097342371940613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.50005436154715, "step_time": 0.371731050491333} +{"epoch": 0, "iter": 7452, "iter_tflops": 50.74066732242745, "iter_time": 0.4065987815856934, "loss": 0.3970329165458679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.9386632525753, "step_time": 0.37552958679199216} +{"epoch": 0, "iter": 7453, "iter_tflops": 29.83868852360925, "iter_time": 0.6914209213256836, "loss": 0.6350921392440796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.715144539641056, "step_time": 0.6505123596191407} +{"epoch": 0, "iter": 7454, "iter_tflops": 13.103645061004089, "iter_time": 1.574454544067383, "loss": 0.47707968950271606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.00393762870487, "step_time": 1.3750452728271485} +{"epoch": 0, "iter": 7455, "iter_tflops": 33.27575622269772, "iter_time": 0.6200037460327148, "loss": 0.46617862582206726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54150968293848, "step_time": 0.484963830947876} +{"epoch": 0, "iter": 7456, "iter_tflops": 38.209622624285444, "iter_time": 0.5399449691772461, "loss": 0.5513120293617249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.793041582731014, "step_time": 0.4936490077972412} +{"epoch": 0, "iter": 7457, "iter_tflops": 17.90043531093201, "iter_time": 1.1525470275878906, "loss": 0.21468910574913025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.969668274216136, "step_time": 1.087583251953125} +{"epoch": 0, "iter": 7458, "iter_tflops": 13.811134284293278, "iter_time": 1.4938015289306639, "loss": 0.2106323391199112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.025993223804814, "step_time": 1.1445190982818603} +{"epoch": 0, "iter": 7459, "iter_tflops": 52.90214502208574, "iter_time": 0.38998595428466803, "loss": 0.26144129037857056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.635725790202954, "step_time": 0.3579566879272461} +{"epoch": 0, "iter": 7460, "iter_tflops": 45.16183377217842, "iter_time": 0.45682585906982426, "loss": 0.3714279234409332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66894557500311, "step_time": 0.42390672874450686} +{"epoch": 0, "iter": 7461, "iter_tflops": 30.22791199594598, "iter_time": 0.6825179824829102, "loss": 0.023149700835347176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.22674821114617, "step_time": 0.6401853942871093} +{"epoch": 0, "iter": 7462, "iter_tflops": 21.09327607241408, "iter_time": 0.9780886306762695, "loss": 0.008858263492584229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.99178899000473, "step_time": 0.7937542705535889} +{"epoch": 0, "iter": 7463, "iter_tflops": 57.368782626561185, "iter_time": 0.35962229919433597, "loss": 0.0023445356637239456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.99747857864235, "step_time": 0.32749078178405766} +{"epoch": 0, "iter": 7464, "iter_tflops": 64.03694071858364, "iter_time": 0.32217487716674803, "loss": 0.006241380702704191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 70.40954121822156, "step_time": 0.29301559352874756} +{"epoch": 0, "iter": 7465, "iter_tflops": 41.829839309082075, "iter_time": 0.4932147445678711, "loss": 0.7615904808044434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.46147656904542, "step_time": 0.45381485748291017} +{"epoch": 0, "iter": 7466, "iter_tflops": 10.771159092894724, "iter_time": 1.9154014282226566, "loss": 0.8535724878311157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.752117861651518, "step_time": 1.755521324157715} +{"epoch": 0, "iter": 7467, "iter_tflops": 14.398574304048669, "iter_time": 1.432856689453125, "loss": 0.6773648858070374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.72367041255681, "step_time": 1.2336462631225587} +{"epoch": 0, "iter": 7468, "iter_tflops": 22.933590708444594, "iter_time": 0.8996015396118165, "loss": 0.6596208214759827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.377563924032568, "step_time": 0.6791556282043457} +{"epoch": 0, "iter": 7469, "iter_tflops": 19.77671260023987, "iter_time": 0.7166644439697266, "loss": 0.4619177281856537, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 21.506110884445654, "step_time": 0.6590343933105469} +{"epoch": 0, "iter": 7470, "iter_tflops": 21.113274624613133, "iter_time": 0.6712964706420899, "loss": 0.5655698776245117, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 22.699084210823752, "step_time": 0.6243981742858886} +{"epoch": 0, "iter": 7471, "iter_tflops": 22.468655019597037, "iter_time": 0.6308017425537108, "loss": 0.3389230966567993, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 24.111232161243468, "step_time": 0.5878283882141113} +{"epoch": 0, "iter": 7472, "iter_tflops": 21.960748256290152, "iter_time": 0.645390884399414, "loss": 0.413906455039978, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 23.58929168568911, "step_time": 0.600834774017334} +{"epoch": 0, "iter": 7473, "iter_tflops": 33.493503416528206, "iter_time": 0.6159729919433594, "loss": 0.6347051858901978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.016453820903536, "step_time": 0.5573492698669433} +{"epoch": 0, "iter": 7474, "iter_tflops": 34.61390333251777, "iter_time": 0.5960348739624024, "loss": 0.5577437281608582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.81122527393364, "step_time": 0.5456340904235839} +{"epoch": 0, "iter": 7475, "iter_tflops": 37.50335417668325, "iter_time": 0.5501132888793947, "loss": 0.5631564259529114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.968112847765035, "step_time": 0.5035890617370605} +{"epoch": 0, "iter": 7476, "iter_tflops": 41.710849142819015, "iter_time": 0.4946217575073242, "loss": 0.6858587265014648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17732688807761, "step_time": 0.4566691951751709} +{"epoch": 0, "iter": 7477, "iter_tflops": 33.60723973492796, "iter_time": 0.6138883666992188, "loss": 0.6795185804367065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.580767536906805, "step_time": 0.5639874420166016} +{"epoch": 0, "iter": 7478, "iter_tflops": 39.148281376376204, "iter_time": 0.5269987030029297, "loss": 0.9520884156227112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14642730231849, "step_time": 0.47816458511352544} +{"epoch": 0, "iter": 7479, "iter_tflops": 44.79674382171239, "iter_time": 0.4605489540100098, "loss": 0.7288926243782043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.23066195711538, "step_time": 0.4277588710784912} +{"epoch": 0, "iter": 7480, "iter_tflops": 37.03686689453954, "iter_time": 0.5570420837402343, "loss": 0.6106812357902527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30919855462135, "step_time": 0.5118209800720215} +{"epoch": 0, "iter": 7481, "iter_tflops": 0.7844426061405495, "iter_time": 1.3975397796630857, "loss": 1.135156512260437, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 0.843160233329789, "step_time": 1.3002151947021483} +{"epoch": 0, "iter": 7482, "iter_tflops": 1.2622491194076124, "iter_time": 0.8685209045410158, "loss": 1.4028054475784302, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 1.5355298976247416, "step_time": 0.7139488124847411} +{"epoch": 0, "iter": 7483, "iter_tflops": 2.642442193306565, "iter_time": 0.4148774757385254, "loss": 1.4872852563858032, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 2.86573462584448, "step_time": 0.38255103492736825} +{"epoch": 0, "iter": 7484, "iter_tflops": 2.7396656293130586, "iter_time": 0.40015457916259767, "loss": 1.223780632019043, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 2.968264760988618, "step_time": 0.3693369140625} +{"epoch": 0, "iter": 7485, "iter_tflops": 25.69372320042868, "iter_time": 0.8029623947143556, "loss": 0.22621740400791168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.98148078051083, "step_time": 0.7646390380859375} +{"epoch": 0, "iter": 7486, "iter_tflops": 13.639124027831116, "iter_time": 1.512640655517578, "loss": 0.30397292971611023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.361218147198468, "step_time": 1.3430636367797852} +{"epoch": 0, "iter": 7487, "iter_tflops": 38.36489460284563, "iter_time": 0.5377596817016602, "loss": 0.21432526409626007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1069283579359, "step_time": 0.48996909332275396} +{"epoch": 0, "iter": 7488, "iter_tflops": 40.5081125215114, "iter_time": 0.5093076972961426, "loss": 0.24320343136787415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25629149976577, "step_time": 0.4661731204986572} +{"epoch": 0, "iter": 7489, "iter_tflops": 27.200653587731658, "iter_time": 0.7584778594970701, "loss": 0.49890270829200745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.366716007545975, "step_time": 0.702533218383789} +{"epoch": 0, "iter": 7490, "iter_tflops": 13.190747357454683, "iter_time": 1.5640579681396485, "loss": 0.5707064270973206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.851990052337943, "step_time": 1.3014828701019288} +{"epoch": 0, "iter": 7491, "iter_tflops": 39.170349538664325, "iter_time": 0.5267017974853515, "loss": 0.4734981954097748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10183775732518, "step_time": 0.4786592540740967} +{"epoch": 0, "iter": 7492, "iter_tflops": 44.73023726719045, "iter_time": 0.4612337150573731, "loss": 0.5653574466705322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.51107785310239, "step_time": 0.4252862319946289} +{"epoch": 0, "iter": 7493, "iter_tflops": 39.29467535590285, "iter_time": 0.5250353469848633, "loss": 0.019300874322652817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.91638487116049, "step_time": 0.4697812347412109} +{"epoch": 0, "iter": 7494, "iter_tflops": 51.743423602320846, "iter_time": 0.3987191429138184, "loss": 0.016485584899783134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.9042213128576, "step_time": 0.3625582256317139} +{"epoch": 0, "iter": 7495, "iter_tflops": 58.05906741679408, "iter_time": 0.355346622467041, "loss": 0.003943913150578737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.91960483696214, "step_time": 0.3227662868499756} +{"epoch": 0, "iter": 7496, "iter_tflops": 57.951726806251536, "iter_time": 0.35600481033325193, "loss": 0.012086858972907066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.89969245733435, "step_time": 0.32286686706542966} +{"epoch": 0, "iter": 7497, "iter_tflops": 29.800748816267735, "iter_time": 0.6923011779785156, "loss": 0.21494783461093903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.648918186496726, "step_time": 0.6518735771179198} +{"epoch": 0, "iter": 7498, "iter_tflops": 19.837182735474673, "iter_time": 1.0400213470458983, "loss": 0.2317061871290207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.229432431996482, "step_time": 0.9280980777740477} +{"epoch": 0, "iter": 7499, "iter_tflops": 38.962836844491015, "iter_time": 0.529506965637207, "loss": 0.2757363021373749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68096976965617, "step_time": 0.4833792114257813} +{"epoch": 0, "iter": 7500, "iter_tflops": 41.69596509144801, "iter_time": 0.49479832077026364, "loss": 0.35086768865585327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.318378071857744, "step_time": 0.45524783515930173} +{"epoch": 0, "iter": 7501, "iter_tflops": 34.392204128462915, "iter_time": 0.5998770370483397, "loss": 0.9465025663375854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62048440600832, "step_time": 0.5484005279541015} +{"epoch": 0, "iter": 7502, "iter_tflops": 33.82603772527186, "iter_time": 0.6099175338745116, "loss": 0.7724947929382324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.69356022266657, "step_time": 0.5622537956237793} +{"epoch": 0, "iter": 7503, "iter_tflops": 35.06027814473498, "iter_time": 0.5884463729858398, "loss": 0.7158573865890503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25055358997716, "step_time": 0.5393671875} +{"epoch": 0, "iter": 7504, "iter_tflops": 35.54203967872349, "iter_time": 0.5804701614379882, "loss": 0.7208837270736694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.60319493586633, "step_time": 0.5344400520324707} +{"epoch": 0, "iter": 7505, "iter_tflops": 35.70254287346639, "iter_time": 0.5778606185913087, "loss": 0.5787146091461182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.286701262431016, "step_time": 0.5251419143676758} +{"epoch": 0, "iter": 7506, "iter_tflops": 37.53716729899472, "iter_time": 0.5496177520751954, "loss": 0.4948190748691559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33318033354745, "step_time": 0.499141206741333} +{"epoch": 0, "iter": 7507, "iter_tflops": 34.30592347162468, "iter_time": 0.6013857498168945, "loss": 0.6054967045783997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.377476960428325, "step_time": 0.5519659214019775} +{"epoch": 0, "iter": 7508, "iter_tflops": 36.93427060238862, "iter_time": 0.5585894393920899, "loss": 0.5910941362380981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3985765231847, "step_time": 0.5106886253356934} +{"epoch": 0, "iter": 7509, "iter_tflops": 20.84545087961866, "iter_time": 0.9897168273925782, "loss": 0.7146828174591064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.69117711408195, "step_time": 0.9092121315002442} +{"epoch": 0, "iter": 7510, "iter_tflops": 25.253269532739534, "iter_time": 0.8169672241210938, "loss": 0.797017514705658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.326452227306266, "step_time": 0.6585837860107422} +{"epoch": 0, "iter": 7511, "iter_tflops": 42.38527389643226, "iter_time": 0.48675144958496097, "loss": 0.6959864497184753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60965629569055, "step_time": 0.45234047317504883} +{"epoch": 0, "iter": 7512, "iter_tflops": 42.76992036367476, "iter_time": 0.4823739051818848, "loss": 0.7932291626930237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.717299333317165, "step_time": 0.4512754211425781} +{"epoch": 0, "iter": 7513, "iter_tflops": 31.946392191767515, "iter_time": 0.6458035507202149, "loss": 0.7229781150817871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.015609714735795, "step_time": 0.6065184097290038} +{"epoch": 0, "iter": 7514, "iter_tflops": 12.456478304021001, "iter_time": 1.656254119873047, "loss": 0.6845548748970032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.238928512389453, "step_time": 1.196773540496826} +{"epoch": 0, "iter": 7515, "iter_tflops": 11.783048533609824, "iter_time": 1.750913055419922, "loss": 0.7107957005500793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.66173662721139, "step_time": 1.5101369667053224} +{"epoch": 0, "iter": 7516, "iter_tflops": 18.379040438500315, "iter_time": 1.1225337677001954, "loss": 0.8906726837158203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.97971176287522, "step_time": 0.8977960090637207} +{"epoch": 0, "iter": 7517, "iter_tflops": 20.91002491640902, "iter_time": 0.8109033279418945, "loss": 0.4348321259021759, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 21.97658441777423, "step_time": 0.7715488662719727} +{"epoch": 0, "iter": 7518, "iter_tflops": 12.436574700753367, "iter_time": 1.3633986206054685, "loss": 0.39769476652145386, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 15.065797985775658, "step_time": 1.1254637031555175} +{"epoch": 0, "iter": 7519, "iter_tflops": 24.75506733437011, "iter_time": 0.6849510269165039, "loss": 0.34404945373535156, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 26.642028361037244, "step_time": 0.6364383583068848} +{"epoch": 0, "iter": 7520, "iter_tflops": 27.495902994567835, "iter_time": 0.6166740112304688, "loss": 0.3072871267795563, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 29.470941114559604, "step_time": 0.5753467025756835} +{"epoch": 0, "iter": 7521, "iter_tflops": 27.02130550434356, "iter_time": 0.763512092590332, "loss": 0.798951268196106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.94200612309693, "step_time": 0.7128425521850585} +{"epoch": 0, "iter": 7522, "iter_tflops": 9.86358243550794, "iter_time": 2.091643035888672, "loss": 0.8097448945045471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.40075297116625, "step_time": 1.8096255187988282} +{"epoch": 0, "iter": 7523, "iter_tflops": 15.193764578396515, "iter_time": 1.3578658142089843, "loss": 0.8916212916374207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.026848064968224, "step_time": 1.1444648246765137} +{"epoch": 0, "iter": 7524, "iter_tflops": 45.7299642670489, "iter_time": 0.4511504402160645, "loss": 1.005372166633606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35724180651991, "step_time": 0.417995267868042} +{"epoch": 0, "iter": 7525, "iter_tflops": 19.815709213729, "iter_time": 0.7296593780517578, "loss": 0.435814768075943, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 20.96778151702721, "step_time": 0.6895683288574219} +{"epoch": 0, "iter": 7526, "iter_tflops": 7.613227043081008, "iter_time": 1.8991576080322268, "loss": 0.4825608432292938, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 9.354528625540528, "step_time": 1.5456383361816406} +{"epoch": 0, "iter": 7527, "iter_tflops": 8.257203891938238, "iter_time": 1.7510428771972655, "loss": 0.3558351993560791, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 11.138778823660536, "step_time": 1.2980523529052732} +{"epoch": 0, "iter": 7528, "iter_tflops": 12.701232496210586, "iter_time": 1.1383712615966797, "loss": 0.34757858514785767, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 14.233711246488356, "step_time": 1.0158080215454102} +{"epoch": 0, "iter": 7529, "iter_tflops": 23.37535111135258, "iter_time": 0.672744499206543, "loss": 0.4425448179244995, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.87663597501141, "step_time": 0.6321449127197266} +{"epoch": 0, "iter": 7530, "iter_tflops": 7.93011884423458, "iter_time": 1.983026885986328, "loss": 0.425752729177475, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 9.080974464701521, "step_time": 1.7317127075195315} +{"epoch": 0, "iter": 7531, "iter_tflops": 9.628894185454014, "iter_time": 1.6331718444824217, "loss": 0.3539045453071594, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 10.734204448910688, "step_time": 1.4650027351379395} +{"epoch": 0, "iter": 7532, "iter_tflops": 24.069711137793526, "iter_time": 0.6533372497558594, "loss": 0.3866185247898102, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.877841736456276, "step_time": 0.6076874198913574} +{"epoch": 0, "iter": 7533, "iter_tflops": 11.206886156453333, "iter_time": 1.4178285217285156, "loss": 0.31580886244773865, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 11.86183811014754, "step_time": 1.3395430526733398} +{"epoch": 0, "iter": 7534, "iter_tflops": 12.208666305241305, "iter_time": 1.3014888305664063, "loss": 0.3350568115711212, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.542189512368283, "step_time": 1.1733289375305176} +{"epoch": 0, "iter": 7535, "iter_tflops": 24.127848627504232, "iter_time": 0.6585519943237305, "loss": 0.5022038817405701, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.978893209923548, "step_time": 0.6116289367675781} +{"epoch": 0, "iter": 7536, "iter_tflops": 24.06478896784638, "iter_time": 0.6602776718139649, "loss": 0.26424258947372437, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.836539045413456, "step_time": 0.6149988899230957} +{"epoch": 0, "iter": 7537, "iter_tflops": 16.037181877880776, "iter_time": 1.2864537963867186, "loss": 0.49962204694747925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.004478081329278, "step_time": 1.213274139404297} +{"epoch": 0, "iter": 7538, "iter_tflops": 20.024490117920855, "iter_time": 1.0302930755615234, "loss": 0.5923277139663696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.304439228763165, "step_time": 0.8488611202239991} +{"epoch": 0, "iter": 7539, "iter_tflops": 40.811420287444726, "iter_time": 0.5055225563049317, "loss": 0.6068445444107056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53391564974425, "step_time": 0.46326700019836425} +{"epoch": 0, "iter": 7540, "iter_tflops": 37.43212539305191, "iter_time": 0.5511600875854492, "loss": 0.5527947545051575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93708695199691, "step_time": 0.5039707279205323} +{"epoch": 0, "iter": 7541, "iter_tflops": 20.385395464294014, "iter_time": 1.0120526504516603, "loss": 0.8459742069244385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.9689151639012, "step_time": 0.9391038818359375} +{"epoch": 0, "iter": 7542, "iter_tflops": 19.532292102917722, "iter_time": 1.056255630493164, "loss": 0.7587202191352844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.118790779784636, "step_time": 0.8553950195312499} +{"epoch": 0, "iter": 7543, "iter_tflops": 37.59869305159914, "iter_time": 0.5487183685302734, "loss": 0.6776575446128845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10887945958519, "step_time": 0.501864652633667} +{"epoch": 0, "iter": 7544, "iter_tflops": 40.72307541655606, "iter_time": 0.5066192398071289, "loss": 0.8142516016960144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.297288548878846, "step_time": 0.465741678237915} +{"epoch": 0, "iter": 7545, "iter_tflops": 13.834948035022608, "iter_time": 1.4912302856445312, "loss": 0.8418858647346497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.665326563131174, "step_time": 1.4067940063476563} +{"epoch": 0, "iter": 7546, "iter_tflops": 21.962373389649837, "iter_time": 0.9393836059570312, "loss": 0.6195603609085083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.494889416516074, "step_time": 0.7786820011138917} +{"epoch": 0, "iter": 7547, "iter_tflops": 36.14277784254814, "iter_time": 0.570822021484375, "loss": 0.7570594549179077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.23132825030573, "step_time": 0.5258831253051758} +{"epoch": 0, "iter": 7548, "iter_tflops": 37.70774526244899, "iter_time": 0.5471314544677734, "loss": 0.927070140838623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93992752122614, "step_time": 0.5039357604980468} +{"epoch": 0, "iter": 7549, "iter_tflops": 18.193019818998923, "iter_time": 1.1340114898681641, "loss": 0.9718024730682373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.535645089263955, "step_time": 1.0560743408203126} +{"epoch": 0, "iter": 7550, "iter_tflops": 26.666891986329553, "iter_time": 0.7736594696044923, "loss": 0.9996691346168518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.89306136059571, "step_time": 0.6272171897888184} +{"epoch": 0, "iter": 7551, "iter_tflops": 47.3001277975082, "iter_time": 0.4361741600036621, "loss": 0.697334885597229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84493223686947, "step_time": 0.40576499176025393} +{"epoch": 0, "iter": 7552, "iter_tflops": 42.84558797046974, "iter_time": 0.4815220069885254, "loss": 0.5678889155387878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.96042552746153, "step_time": 0.4488882179260253} +{"epoch": 0, "iter": 7553, "iter_tflops": 29.10990502160741, "iter_time": 0.7087310485839843, "loss": 0.10151723027229309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.869791390123552, "step_time": 0.6683263015747071} +{"epoch": 0, "iter": 7554, "iter_tflops": 9.879465351502496, "iter_time": 2.0882803649902346, "loss": 0.07766049355268478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.008685733988731, "step_time": 1.7180142745971678} +{"epoch": 0, "iter": 7555, "iter_tflops": 12.545938980827895, "iter_time": 1.6444439544677734, "loss": 0.06457154452800751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.07768802245709, "step_time": 1.4655171699523926} +{"epoch": 0, "iter": 7556, "iter_tflops": 24.402551701623867, "iter_time": 0.8454482040405271, "loss": 0.062127791345119476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.32876670772954, "step_time": 0.7034422454833985} +{"epoch": 0, "iter": 7557, "iter_tflops": 27.892376287990594, "iter_time": 0.560861686706543, "loss": 0.46345868706703186, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.962499505240288, "step_time": 0.5221114883422852} +{"epoch": 0, "iter": 7558, "iter_tflops": 27.033947366787796, "iter_time": 0.578671142578125, "loss": 0.28614458441734314, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.797328703414593, "step_time": 0.5432366790771485} +{"epoch": 0, "iter": 7559, "iter_tflops": 27.688148814154406, "iter_time": 0.5649985961914062, "loss": 0.3765283524990082, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.582760644535956, "step_time": 0.5288135681152344} +{"epoch": 0, "iter": 7560, "iter_tflops": 27.426214388676378, "iter_time": 0.5703946228027343, "loss": 0.3324931263923645, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.225231176654532, "step_time": 0.5352828559875488} +{"epoch": 0, "iter": 7561, "iter_tflops": 37.736272046501256, "iter_time": 0.5467178497314452, "loss": 0.24689219892024994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.67197788707689, "step_time": 0.5072557220458985} +{"epoch": 0, "iter": 7562, "iter_tflops": 15.664188590869932, "iter_time": 1.317086639404297, "loss": 0.30516231060028076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.157079254435082, "step_time": 1.1362561798095703} +{"epoch": 0, "iter": 7563, "iter_tflops": 44.26275699351598, "iter_time": 0.4661050262451172, "loss": 0.30765050649642944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39626708175954, "step_time": 0.42629514122009277} +{"epoch": 0, "iter": 7564, "iter_tflops": 48.78823581441195, "iter_time": 0.42287025070190437, "loss": 0.27807366847991943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.564792944368115, "step_time": 0.3924888191223145} +{"epoch": 0, "iter": 7565, "iter_tflops": 29.348953794114554, "iter_time": 0.7029583969116212, "loss": 0.7848438024520874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.9345395268662, "step_time": 0.6669274482727051} +{"epoch": 0, "iter": 7566, "iter_tflops": 14.230819998888101, "iter_time": 1.449747344970703, "loss": 0.7391080260276794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.903664906819174, "step_time": 1.2205100860595703} +{"epoch": 0, "iter": 7567, "iter_tflops": 36.129966996885905, "iter_time": 0.5710244216918945, "loss": 0.9657683968544006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.241984241147975, "step_time": 0.5257403240203857} +{"epoch": 0, "iter": 7568, "iter_tflops": 35.53886523888799, "iter_time": 0.5805220108032226, "loss": 0.9143439531326294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75547476916876, "step_time": 0.5323401050567627} +{"epoch": 0, "iter": 7569, "iter_tflops": 20.259452986724387, "iter_time": 1.0183440551757814, "loss": 0.6407574415206909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.866518612944144, "step_time": 0.9435015182495118} +{"epoch": 0, "iter": 7570, "iter_tflops": 15.614025106465613, "iter_time": 1.3213180694580078, "loss": 0.6702382564544678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.609645596113463, "step_time": 0.8738417282104491} +{"epoch": 0, "iter": 7571, "iter_tflops": 41.35974144522991, "iter_time": 0.49882065963745115, "loss": 0.885037899017334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.254741269888584, "step_time": 0.45588800048828126} +{"epoch": 0, "iter": 7572, "iter_tflops": 43.21400357997734, "iter_time": 0.4774168510437012, "loss": 0.6942299008369446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.962523219529096, "step_time": 0.4393097324371338} +{"epoch": 0, "iter": 7573, "iter_tflops": 17.57344334756111, "iter_time": 1.1739926605224609, "loss": 0.1363096386194229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.909277502764677, "step_time": 1.0910566787719727} +{"epoch": 0, "iter": 7574, "iter_tflops": 25.49564349958873, "iter_time": 0.8092007369995117, "loss": 0.12629063427448273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.115848375435704, "step_time": 0.6423960304260253} +{"epoch": 0, "iter": 7575, "iter_tflops": 54.51567167181338, "iter_time": 0.37844335174560545, "loss": 0.0814691036939621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.403403801170676, "step_time": 0.34730490493774413} +{"epoch": 0, "iter": 7576, "iter_tflops": 49.39397186994979, "iter_time": 0.417684440612793, "loss": 0.18417306244373322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59548088895069, "step_time": 0.38494091606140135} +{"epoch": 0, "iter": 7577, "iter_tflops": 28.372598539881043, "iter_time": 0.727148536682129, "loss": 0.17398308217525482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.027246112036774, "step_time": 0.6870791091918946} +{"epoch": 0, "iter": 7578, "iter_tflops": 13.689297330833757, "iter_time": 1.5070966033935547, "loss": 0.14645324647426605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.24886304633701, "step_time": 1.1960842552185058} +{"epoch": 0, "iter": 7579, "iter_tflops": 35.89058553688701, "iter_time": 0.5748330154418946, "loss": 0.21141356229782104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.05997811154118, "step_time": 0.5281900939941406} +{"epoch": 0, "iter": 7580, "iter_tflops": 42.74834934006083, "iter_time": 0.48261731338500974, "loss": 0.1574963629245758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73282822289792, "step_time": 0.4414689693450928} +{"epoch": 0, "iter": 7581, "iter_tflops": 20.476009670118554, "iter_time": 1.0075739288330077, "loss": 0.2692273259162903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.040209534356215, "step_time": 0.9360661239624024} +{"epoch": 0, "iter": 7582, "iter_tflops": 20.66389416294526, "iter_time": 0.9984126586914063, "loss": 0.21620452404022217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.62763346448019, "step_time": 0.8050331115722656} +{"epoch": 0, "iter": 7583, "iter_tflops": 48.07465998461462, "iter_time": 0.42914694595336916, "loss": 0.20159782469272614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09165234096113, "step_time": 0.3960537357330322} +{"epoch": 0, "iter": 7584, "iter_tflops": 46.914517302783054, "iter_time": 0.43975926208496097, "loss": 0.20671677589416504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.37458170207754, "step_time": 0.4095536441802979} +{"epoch": 0, "iter": 7585, "iter_tflops": 45.858477402566066, "iter_time": 0.44988614273071287, "loss": 0.14826738834381104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.955023047405405, "step_time": 0.4129933738708496} +{"epoch": 0, "iter": 7586, "iter_tflops": 9.939146108237033, "iter_time": 2.0757410430908205, "loss": 0.23645411431789398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.893250463008409, "step_time": 1.7346892318725586} +{"epoch": 0, "iter": 7587, "iter_tflops": 14.6581963339205, "iter_time": 1.4074783172607424, "loss": 0.12007635086774826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.411021823024047, "step_time": 1.2571486244201662} +{"epoch": 0, "iter": 7588, "iter_tflops": 22.378633953464522, "iter_time": 0.921910316467285, "loss": 0.20246997475624084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.243455206838593, "step_time": 0.7304734268188475} +{"epoch": 0, "iter": 7589, "iter_tflops": 13.762527654810592, "iter_time": 1.0832131958007811, "loss": 0.42890486121177673, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.71859506801115, "step_time": 1.0128515319824218} +{"epoch": 0, "iter": 7590, "iter_tflops": 11.06262758555095, "iter_time": 1.3475778198242188, "loss": 0.35998281836509705, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 13.01761258023513, "step_time": 1.1451985893249512} +{"epoch": 0, "iter": 7591, "iter_tflops": 22.476224330890705, "iter_time": 0.6632676086425782, "loss": 0.30758950114250183, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.228164864779256, "step_time": 0.6153066749572754} +{"epoch": 0, "iter": 7592, "iter_tflops": 23.846833485458127, "iter_time": 0.6251459579467774, "loss": 0.40893495082855225, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 25.62946924331498, "step_time": 0.5816644668579101} +{"epoch": 0, "iter": 7593, "iter_tflops": 20.283247696858332, "iter_time": 1.0171494140625001, "loss": 0.2345990091562271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.039599139857277, "step_time": 0.9360920486450195} +{"epoch": 0, "iter": 7594, "iter_tflops": 14.339609980202827, "iter_time": 1.438748580932617, "loss": 0.2758350074291229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.627676459971575, "step_time": 1.1703807678222655} +{"epoch": 0, "iter": 7595, "iter_tflops": 49.05217385808597, "iter_time": 0.4205948867797852, "loss": 0.19237686693668365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.62021655340869, "step_time": 0.3847633380889893} +{"epoch": 0, "iter": 7596, "iter_tflops": 45.44041196040422, "iter_time": 0.45402523040771486, "loss": 0.151344433426857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45383942566678, "step_time": 0.4171788024902344} +{"epoch": 0, "iter": 7597, "iter_tflops": 25.223811471670285, "iter_time": 0.8179213333129883, "loss": 0.3128259778022766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.596465774762944, "step_time": 0.7757080841064453} +{"epoch": 0, "iter": 7598, "iter_tflops": 13.00463267931081, "iter_time": 1.586441848754883, "loss": 0.26194965839385986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.72717177879694, "step_time": 1.0458211517333984} +{"epoch": 0, "iter": 7599, "iter_tflops": 38.51950822479412, "iter_time": 0.5356011657714843, "loss": 0.2762191891670227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.250115317751785, "step_time": 0.48830857276916495} +{"epoch": 0, "iter": 7600, "iter_tflops": 40.64894683371284, "iter_time": 0.5075431251525879, "loss": 0.2773727476596832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.298600366151256, "step_time": 0.4657278861999512} +{"epoch": 0, "iter": 7601, "iter_tflops": 22.432223848125997, "iter_time": 0.9197079010009765, "loss": 0.7252815365791321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.030492626240576, "step_time": 0.858538101196289} +{"epoch": 0, "iter": 7602, "iter_tflops": 9.9998764457658, "iter_time": 2.0631348419189455, "loss": 0.8405196666717529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.11787324369356, "step_time": 1.5727468261718751} +{"epoch": 0, "iter": 7603, "iter_tflops": 9.807645097304466, "iter_time": 2.1035726013183593, "loss": 0.6658799052238464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.882189383925457, "step_time": 1.7363040466308595} +{"epoch": 0, "iter": 7604, "iter_tflops": 27.946400219457267, "iter_time": 0.7382379608154297, "loss": 0.9972708821296692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25433169022252, "step_time": 0.5125185947418213} +{"epoch": 0, "iter": 7605, "iter_tflops": 15.14055647809101, "iter_time": 1.071115005493164, "loss": 0.36533787846565247, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 15.95722709991761, "step_time": 1.0162966995239258} +{"epoch": 0, "iter": 7606, "iter_tflops": 14.569022313391992, "iter_time": 1.1131342163085938, "loss": 0.3597109019756317, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 17.073425602723052, "step_time": 0.9498549156188965} +{"epoch": 0, "iter": 7607, "iter_tflops": 28.886046705324826, "iter_time": 0.5614225234985352, "loss": 0.453235924243927, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.796752389113323, "step_time": 0.5265904998779297} +{"epoch": 0, "iter": 7608, "iter_tflops": 28.51337003207621, "iter_time": 0.5687604522705079, "loss": 0.4108588397502899, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.2702334701173, "step_time": 0.5357499885559082} +{"epoch": 0, "iter": 7609, "iter_tflops": 45.28273466484383, "iter_time": 0.45560617446899415, "loss": 0.11397533863782883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.87137966140769, "step_time": 0.4136860389709473} +{"epoch": 0, "iter": 7610, "iter_tflops": 14.77953123871567, "iter_time": 1.395923400878906, "loss": 0.06351304799318314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.25696293426097, "step_time": 1.1955228500366213} +{"epoch": 0, "iter": 7611, "iter_tflops": 31.715610989686855, "iter_time": 0.6505027923583985, "loss": 0.0784306451678276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.07347363080864, "step_time": 0.5022972660064697} +{"epoch": 0, "iter": 7612, "iter_tflops": 51.199105139146475, "iter_time": 0.40295808792114257, "loss": 0.053288571536540985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.93651901512525, "step_time": 0.3688304862976074} +{"epoch": 0, "iter": 7613, "iter_tflops": 25.167482499380043, "iter_time": 0.8197519760131837, "loss": 0.7205100059509277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.480748725099684, "step_time": 0.7790978164672852} +{"epoch": 0, "iter": 7614, "iter_tflops": 15.25182493711132, "iter_time": 1.352696716308594, "loss": 0.4561001658439636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.33572877353951, "step_time": 1.1251853561401368} +{"epoch": 0, "iter": 7615, "iter_tflops": 44.77805226193837, "iter_time": 0.46074119949340825, "loss": 0.4855585992336273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.850178512385384, "step_time": 0.4223340454101563} +{"epoch": 0, "iter": 7616, "iter_tflops": 47.035806420915215, "iter_time": 0.4386252746582031, "loss": 0.3781825602054596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.887076105664, "step_time": 0.4054289436340332} +{"epoch": 0, "iter": 7617, "iter_tflops": 30.241946972354796, "iter_time": 0.6822012329101562, "loss": 0.5541275143623352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18966255799493, "step_time": 0.6409229507446289} +{"epoch": 0, "iter": 7618, "iter_tflops": 32.869809372025664, "iter_time": 0.6276608810424804, "loss": 0.5075516104698181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49893874088634, "step_time": 0.5094230651855468} +{"epoch": 0, "iter": 7619, "iter_tflops": 45.79709370071685, "iter_time": 0.450489143371582, "loss": 0.4478616714477539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73298688656066, "step_time": 0.4148372097015381} +{"epoch": 0, "iter": 7620, "iter_tflops": 45.582382211966255, "iter_time": 0.45261112976074225, "loss": 0.4830421805381775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13544924547979, "step_time": 0.4198820571899414} +{"epoch": 0, "iter": 7621, "iter_tflops": 31.68268411479031, "iter_time": 0.651178840637207, "loss": 0.24831408262252808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.69456071446015, "step_time": 0.6122974472045898} +{"epoch": 0, "iter": 7622, "iter_tflops": 11.131052159830435, "iter_time": 1.8534720001220704, "loss": 0.21668073534965515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.682816700835716, "step_time": 1.405118236541748} +{"epoch": 0, "iter": 7623, "iter_tflops": 35.98185860384657, "iter_time": 0.573374870300293, "loss": 0.18575100600719452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49987718087304, "step_time": 0.522307788848877} +{"epoch": 0, "iter": 7624, "iter_tflops": 44.124186117452794, "iter_time": 0.4675688171386719, "loss": 0.1437341719865799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.423065339973355, "step_time": 0.42605922126770013} +{"epoch": 0, "iter": 7625, "iter_tflops": 18.793217962915787, "iter_time": 1.0977946166992187, "loss": 0.00667554372921586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.163569961148674, "step_time": 1.0231865463256835} +{"epoch": 0, "iter": 7626, "iter_tflops": 18.22737731724413, "iter_time": 1.1318739471435546, "loss": 0.0010063203517347574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.74851209024945, "step_time": 0.9486209182739257} +{"epoch": 0, "iter": 7627, "iter_tflops": 53.618656051144676, "iter_time": 0.38477453613281243, "loss": 0.003036458743736148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.87537911658368, "step_time": 0.350419713973999} +{"epoch": 0, "iter": 7628, "iter_tflops": 54.85069738572243, "iter_time": 0.3761318359375, "loss": 0.004583383910357952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.096374316961295, "step_time": 0.3433001365661621} +{"epoch": 0, "iter": 7629, "iter_tflops": 29.1145363413588, "iter_time": 0.708618309020996, "loss": 0.8786534667015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.903595457888272, "step_time": 0.6675952491760254} +{"epoch": 0, "iter": 7630, "iter_tflops": 16.356323372837878, "iter_time": 1.261352752685547, "loss": 0.6323245167732239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.426277825466276, "step_time": 1.0620198936462402} +{"epoch": 0, "iter": 7631, "iter_tflops": 34.58857360055662, "iter_time": 0.5964713592529297, "loss": 0.9850410223007202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.674884893789525, "step_time": 0.5476086673736572} +{"epoch": 0, "iter": 7632, "iter_tflops": 36.89891709891533, "iter_time": 0.5591246337890625, "loss": 0.6462573409080505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19673091638082, "step_time": 0.5132530193328857} +{"epoch": 0, "iter": 7633, "iter_tflops": 33.56781161600319, "iter_time": 0.6146094284057617, "loss": 0.15084229409694672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.166886260891516, "step_time": 0.5550934066772462} +{"epoch": 0, "iter": 7634, "iter_tflops": 37.126447188306145, "iter_time": 0.5556980285644532, "loss": 0.11596673727035522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45290256742196, "step_time": 0.4976996116638184} +{"epoch": 0, "iter": 7635, "iter_tflops": 39.732280219758856, "iter_time": 0.5192526931762695, "loss": 0.1513027846813202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80550158270642, "step_time": 0.4709703750610351} +{"epoch": 0, "iter": 7636, "iter_tflops": 42.20167067548496, "iter_time": 0.4888691177368164, "loss": 0.1015382632613182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.04370172126501, "step_time": 0.44807634353637693} +{"epoch": 0, "iter": 7637, "iter_tflops": 18.222132636117077, "iter_time": 1.1321997222900388, "loss": 0.5844138860702515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.49635976639393, "step_time": 1.0582023391723634} +{"epoch": 0, "iter": 7638, "iter_tflops": 17.05312593745916, "iter_time": 1.209813003540039, "loss": 0.5997117161750793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.549615882202712, "step_time": 0.957376392364502} +{"epoch": 0, "iter": 7639, "iter_tflops": 42.5796579412058, "iter_time": 0.4845293388366699, "loss": 0.8563243746757507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73343295265606, "step_time": 0.45111622238159177} +{"epoch": 0, "iter": 7640, "iter_tflops": 43.49008641123587, "iter_time": 0.47438612365722654, "loss": 0.8471232652664185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.868569901547865, "step_time": 0.4401903781890869} +{"epoch": 0, "iter": 7641, "iter_tflops": 18.534005413158194, "iter_time": 1.113148132324219, "loss": 0.7639917731285095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.366311935145763, "step_time": 1.065308334350586} +{"epoch": 0, "iter": 7642, "iter_tflops": 12.51330079597875, "iter_time": 1.648733123779297, "loss": 0.6658936738967896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.937344423238669, "step_time": 1.3811754570007324} +{"epoch": 0, "iter": 7643, "iter_tflops": 35.243365904706486, "iter_time": 0.585389419555664, "loss": 0.5965090990066528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.31302203829518, "step_time": 0.5384877624511719} +{"epoch": 0, "iter": 7644, "iter_tflops": 40.463226305102005, "iter_time": 0.5098726768493652, "loss": 1.0515813827514648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.62965783902458, "step_time": 0.47286856079101564} +{"epoch": 0, "iter": 7645, "iter_tflops": 24.47039666195593, "iter_time": 0.8431041717529296, "loss": 0.10710079967975616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.2861667455401, "step_time": 0.784865047454834} +{"epoch": 0, "iter": 7646, "iter_tflops": 8.1742075832631, "iter_time": 2.523925811767578, "loss": 0.04723842069506645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.718908981787395, "step_time": 2.1227787551879884} +{"epoch": 0, "iter": 7647, "iter_tflops": 11.214937010389189, "iter_time": 1.8396085052490234, "loss": 0.06958404928445816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.979821981765724, "step_time": 1.2910715484619142} +{"epoch": 0, "iter": 7648, "iter_tflops": 49.91388677451167, "iter_time": 0.41333374023437497, "loss": 0.07981177419424057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.540760113132876, "step_time": 0.3782692699432373} +{"epoch": 0, "iter": 7649, "iter_tflops": 23.959965839877164, "iter_time": 0.6614569015502929, "loss": 0.21100182831287384, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.88539210464324, "step_time": 0.6122559280395509} +{"epoch": 0, "iter": 7650, "iter_tflops": 28.608712769005315, "iter_time": 0.5539740600585938, "loss": 0.3969825506210327, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.502313131541616, "step_time": 0.5195830459594726} +{"epoch": 0, "iter": 7651, "iter_tflops": 29.49051507674855, "iter_time": 0.5374095611572265, "loss": 0.33748960494995117, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.366884712959212, "step_time": 0.5052616767883301} +{"epoch": 0, "iter": 7652, "iter_tflops": 27.078442370295555, "iter_time": 0.5852805175781249, "loss": 0.4547331929206848, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 28.68322703360231, "step_time": 0.552534927368164} +{"epoch": 0, "iter": 7653, "iter_tflops": 20.950332813242163, "iter_time": 0.9135912551879882, "loss": 0.08483006805181503, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 21.940805074949417, "step_time": 0.8723490676879883} +{"epoch": 0, "iter": 7654, "iter_tflops": 16.04381944909491, "iter_time": 1.192985305786133, "loss": 0.06577139347791672, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 22.97431717941467, "step_time": 0.833105972290039} +{"epoch": 0, "iter": 7655, "iter_tflops": 49.86135979329643, "iter_time": 0.3838652000427247, "loss": 0.10768294334411621, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 54.294478405862634, "step_time": 0.3525227870941162} +{"epoch": 0, "iter": 7656, "iter_tflops": 51.72386535208086, "iter_time": 0.3700427398681641, "loss": 0.0891796126961708, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 56.323051802210045, "step_time": 0.33982606124877934} +{"epoch": 0, "iter": 7657, "iter_tflops": 28.58009996657884, "iter_time": 0.7218691864013672, "loss": 0.0896751880645752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.30660350331505, "step_time": 0.6807458152770997} +{"epoch": 0, "iter": 7658, "iter_tflops": 21.087700005655257, "iter_time": 0.9783472595214843, "loss": 0.10752588510513306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.73686377443934, "step_time": 0.7716347618103027} +{"epoch": 0, "iter": 7659, "iter_tflops": 52.13371031071681, "iter_time": 0.39573422622680665, "loss": 0.14707565307617188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.954866623839195, "step_time": 0.36223583221435546} +{"epoch": 0, "iter": 7660, "iter_tflops": 52.23026692400787, "iter_time": 0.3950026435852051, "loss": 0.04144229367375374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.54999186120207, "step_time": 0.36482929229736333} +{"epoch": 0, "iter": 7661, "iter_tflops": 31.372051550416337, "iter_time": 0.6576265335083008, "loss": 0.8659588694572449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.36210734931676, "step_time": 0.6183989906311036} +{"epoch": 0, "iter": 7662, "iter_tflops": 9.346715664091823, "iter_time": 2.2073094177246095, "loss": 0.7438540458679199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.585746206750954, "step_time": 1.9489503250122069} +{"epoch": 0, "iter": 7663, "iter_tflops": 11.679351848875411, "iter_time": 1.766458770751953, "loss": 0.8303669095039368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.388747857935527, "step_time": 1.4338352241516117} +{"epoch": 0, "iter": 7664, "iter_tflops": 35.59206329563264, "iter_time": 0.5796543273925782, "loss": 0.7757254838943481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.62479611045411, "step_time": 0.5341411628723145} +{"epoch": 0, "iter": 7665, "iter_tflops": 14.877993121629894, "iter_time": 1.0459683837890625, "loss": 0.40891650319099426, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 15.906874994113586, "step_time": 0.978313491821289} +{"epoch": 0, "iter": 7666, "iter_tflops": 11.01628194855351, "iter_time": 1.4126281890869143, "loss": 0.4452440142631531, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 13.921893908316054, "step_time": 1.1178012504577637} +{"epoch": 0, "iter": 7667, "iter_tflops": 22.95555482914697, "iter_time": 0.6779148025512695, "loss": 0.4342038631439209, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 24.870181061188656, "step_time": 0.625725658416748} +{"epoch": 0, "iter": 7668, "iter_tflops": 24.47611789896456, "iter_time": 0.6357997817993164, "loss": 0.4259311854839325, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 26.28540058196908, "step_time": 0.5920362663269042} +{"epoch": 0, "iter": 7669, "iter_tflops": 20.98096948861419, "iter_time": 0.9833241271972656, "loss": 0.3721969425678253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.485230017028286, "step_time": 0.9175398025512694} +{"epoch": 0, "iter": 7670, "iter_tflops": 16.99368855097562, "iter_time": 1.2140444641113282, "loss": 0.3384131193161011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.174833401549435, "step_time": 0.9303832473754883} +{"epoch": 0, "iter": 7671, "iter_tflops": 47.27645079659678, "iter_time": 0.43639260482788084, "loss": 0.4622931480407715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.23842220655066, "step_time": 0.4026488838195801} +{"epoch": 0, "iter": 7672, "iter_tflops": 47.61331887812059, "iter_time": 0.43330509185791016, "loss": 0.5825884938240051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20030864858213, "step_time": 0.40294861602783205} +{"epoch": 0, "iter": 7673, "iter_tflops": 25.838894853541962, "iter_time": 0.7984510803222656, "loss": 0.20002391934394836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.284854005503846, "step_time": 0.7561372146606445} +{"epoch": 0, "iter": 7674, "iter_tflops": 12.147308669965167, "iter_time": 1.6984085998535154, "loss": 0.24861368536949158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.995250544457827, "step_time": 1.289826217651367} +{"epoch": 0, "iter": 7675, "iter_tflops": 49.94709157895546, "iter_time": 0.4130589561462403, "loss": 0.2008543312549591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.33459570258051, "step_time": 0.3797045555114746} +{"epoch": 0, "iter": 7676, "iter_tflops": 50.60476421438086, "iter_time": 0.4076907348632812, "loss": 0.18193431198596954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.71408738426865, "step_time": 0.37707096099853515} +{"epoch": 0, "iter": 7677, "iter_tflops": 30.357965062475536, "iter_time": 0.6795940856933593, "loss": 0.16230086982250214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.31458718740857, "step_time": 0.6384452133178711} +{"epoch": 0, "iter": 7678, "iter_tflops": 18.075589716010086, "iter_time": 1.1413787231445311, "loss": 0.13849706947803497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.246233953056937, "step_time": 1.019008945465088} +{"epoch": 0, "iter": 7679, "iter_tflops": 39.809635261965006, "iter_time": 0.5182437210083009, "loss": 0.19220410287380219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78936724679524, "step_time": 0.4711439056396485} +{"epoch": 0, "iter": 7680, "iter_tflops": 45.29167700316389, "iter_time": 0.4555162200927734, "loss": 0.2823027968406677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.582378484285336, "step_time": 0.4160972938537597} +{"epoch": 0, "iter": 7681, "iter_tflops": 23.13331647336549, "iter_time": 0.8918346633911134, "loss": 0.42850372195243835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.84943644722574, "step_time": 0.8302439193725586} +{"epoch": 0, "iter": 7682, "iter_tflops": 8.58277556996377, "iter_time": 2.403778747558594, "loss": 0.42507192492485046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.752130108838873, "step_time": 2.1155474014282225} +{"epoch": 0, "iter": 7683, "iter_tflops": 12.76344977237504, "iter_time": 1.6164198455810546, "loss": 0.3130051791667938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.762425047620951, "step_time": 1.3088781356811523} +{"epoch": 0, "iter": 7684, "iter_tflops": 33.7914495450174, "iter_time": 0.6105418319702149, "loss": 0.38101926445961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.083355947291814, "step_time": 0.41193512535095217} +{"epoch": 0, "iter": 7685, "iter_tflops": 17.803464932381598, "iter_time": 0.8580109176635743, "loss": 0.30408981442451477, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 18.676130565645103, "step_time": 0.8179192810058594} +{"epoch": 0, "iter": 7686, "iter_tflops": 9.994402468746276, "iter_time": 1.5284122619628908, "loss": 0.29781100153923035, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.579292622059299, "step_time": 1.214342311859131} +{"epoch": 0, "iter": 7687, "iter_tflops": 24.090232735759763, "iter_time": 0.6340979537963868, "loss": 0.33078232407569885, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.000132287225252, "step_time": 0.5875188293457031} +{"epoch": 0, "iter": 7688, "iter_tflops": 22.92260832188127, "iter_time": 0.6663974304199219, "loss": 0.34531834721565247, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.70884814794453, "step_time": 0.6182225570678711} +{"epoch": 0, "iter": 7689, "iter_tflops": 23.93375674091206, "iter_time": 0.8620081558227539, "loss": 1.053288459777832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.601091547563765, "step_time": 0.8058677291870117} +{"epoch": 0, "iter": 7690, "iter_tflops": 9.308360323222917, "iter_time": 2.2164046936035153, "loss": 0.839862585067749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.816505518638447, "step_time": 1.9073714218139646} +{"epoch": 0, "iter": 7691, "iter_tflops": 20.929852657697655, "iter_time": 0.9857256927490234, "loss": 0.8414777517318726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.078719812526757, "step_time": 0.8226533756256103} +{"epoch": 0, "iter": 7692, "iter_tflops": 39.020784816933855, "iter_time": 0.5287206192016602, "loss": 0.6096574068069458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79511103735292, "step_time": 0.49362456512451164} +{"epoch": 0, "iter": 7693, "iter_tflops": 22.400850028289977, "iter_time": 0.6764439544677735, "loss": 0.39625465869903564, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.90148098005324, "step_time": 0.6339740867614746} +{"epoch": 0, "iter": 7694, "iter_tflops": 11.349444192046272, "iter_time": 1.3351243743896484, "loss": 0.4093864858150482, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 13.228671836137998, "step_time": 1.1454603881835939} +{"epoch": 0, "iter": 7695, "iter_tflops": 22.878870612732022, "iter_time": 0.662310646057129, "loss": 0.37646132707595825, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.632361448538752, "step_time": 0.6151630897521972} +{"epoch": 0, "iter": 7696, "iter_tflops": 22.40251335995018, "iter_time": 0.6763937301635742, "loss": 0.3751729130744934, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.042650253662885, "step_time": 0.6302516326904297} +{"epoch": 0, "iter": 7697, "iter_tflops": 21.51523263918765, "iter_time": 0.958906364440918, "loss": 0.2295609414577484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.0632417922628, "step_time": 0.8945443878173829} +{"epoch": 0, "iter": 7698, "iter_tflops": 15.96914130699465, "iter_time": 1.2919350585937501, "loss": 0.32933053374290466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.692567629818203, "step_time": 0.9970291690826416} +{"epoch": 0, "iter": 7699, "iter_tflops": 48.8830965277593, "iter_time": 0.42204964447021487, "loss": 0.30094197392463684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.08493180098841, "step_time": 0.3886431198120117} +{"epoch": 0, "iter": 7700, "iter_tflops": 47.25801938586348, "iter_time": 0.4365628051757812, "loss": 0.20894208550453186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90329866135372, "step_time": 0.4052997360229492} +{"epoch": 0, "iter": 7701, "iter_tflops": 32.67053123517704, "iter_time": 0.6314893798828126, "loss": 0.7950408458709717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.69120216045765, "step_time": 0.5947067909240722} +{"epoch": 0, "iter": 7702, "iter_tflops": 7.105474965773788, "iter_time": 2.9035488281250004, "loss": 0.7563464641571045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.83856413597045, "step_time": 2.334213249206543} +{"epoch": 0, "iter": 7703, "iter_tflops": 11.191789353092506, "iter_time": 1.843413314819336, "loss": 0.8486090898513794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.714058434154696, "step_time": 1.5043754997253418} +{"epoch": 0, "iter": 7704, "iter_tflops": 44.32799302349801, "iter_time": 0.465419075012207, "loss": 0.8843544721603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.970766736558204, "step_time": 0.4300763759613037} +{"epoch": 0, "iter": 7705, "iter_tflops": 18.5651628076527, "iter_time": 0.8647014007568359, "loss": 0.34282490611076355, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 19.546012413959282, "step_time": 0.821309326171875} +{"epoch": 0, "iter": 7706, "iter_tflops": 9.587752977692464, "iter_time": 1.6743571014404297, "loss": 0.36703819036483765, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 11.207490683369176, "step_time": 1.4323743591308593} +{"epoch": 0, "iter": 7707, "iter_tflops": 23.44989802147907, "iter_time": 0.6845796203613281, "loss": 0.45462021231651306, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 25.18025090876913, "step_time": 0.6375362319946289} +{"epoch": 0, "iter": 7708, "iter_tflops": 24.539829544135703, "iter_time": 0.6541741561889648, "loss": 0.27289456129074097, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.249012396242627, "step_time": 0.6115781440734863} +{"epoch": 0, "iter": 7709, "iter_tflops": 19.104425021507435, "iter_time": 1.0799117736816406, "loss": 0.10796171426773071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.420134939430667, "step_time": 1.010330909729004} +{"epoch": 0, "iter": 7710, "iter_tflops": 16.642614587934244, "iter_time": 1.2396545867919924, "loss": 0.1449732780456543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.490018477624297, "step_time": 1.0068850612640379} +{"epoch": 0, "iter": 7711, "iter_tflops": 37.37978402832331, "iter_time": 0.5519318542480468, "loss": 0.11641097813844681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89535253803299, "step_time": 0.5044850387573242} +{"epoch": 0, "iter": 7712, "iter_tflops": 43.34573722715391, "iter_time": 0.4759659156799316, "loss": 0.11320358514785767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.120633859777705, "step_time": 0.43783565330505375} +{"epoch": 0, "iter": 7713, "iter_tflops": 21.996515567267505, "iter_time": 0.9379255294799805, "loss": 0.224678635597229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.370122828037857, "step_time": 0.8827978210449219} +{"epoch": 0, "iter": 7714, "iter_tflops": 34.701092826014936, "iter_time": 0.5945372848510742, "loss": 0.2766965627670288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49247213044467, "step_time": 0.5359773578643798} +{"epoch": 0, "iter": 7715, "iter_tflops": 43.659308988151565, "iter_time": 0.4725474128723145, "loss": 0.305495023727417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54639380204415, "step_time": 0.4339150009155273} +{"epoch": 0, "iter": 7716, "iter_tflops": 43.926108512377766, "iter_time": 0.46967724227905266, "loss": 0.3258885443210602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9045405084029, "step_time": 0.4306709403991699} +{"epoch": 0, "iter": 7717, "iter_tflops": 18.950908761617864, "iter_time": 1.0886598510742187, "loss": 0.2102735936641693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.251917789090644, "step_time": 1.0187229537963867} +{"epoch": 0, "iter": 7718, "iter_tflops": 14.16622759645713, "iter_time": 1.4563576202392579, "loss": 0.2648390531539917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.881805451760176, "step_time": 1.1537477893829344} +{"epoch": 0, "iter": 7719, "iter_tflops": 45.71099055273329, "iter_time": 0.45133770370483406, "loss": 0.2747238278388977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.86813268949918, "step_time": 0.4137129745483399} +{"epoch": 0, "iter": 7720, "iter_tflops": 47.26664772840052, "iter_time": 0.43648311233520504, "loss": 0.22490954399108887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00075134581856, "step_time": 0.40452528572082513} +{"epoch": 0, "iter": 7721, "iter_tflops": 23.77229301722747, "iter_time": 0.8678629989624024, "loss": 0.18299786746501923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.833708985990114, "step_time": 0.8307697219848633} +{"epoch": 0, "iter": 7722, "iter_tflops": 13.11838348736938, "iter_time": 1.5726856536865235, "loss": 0.20862938463687897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.213231156277928, "step_time": 1.1985601844787597} +{"epoch": 0, "iter": 7723, "iter_tflops": 40.16278740243033, "iter_time": 0.5136867942810059, "loss": 0.2610452175140381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11111836747603, "step_time": 0.467707332611084} +{"epoch": 0, "iter": 7724, "iter_tflops": 48.4651337539318, "iter_time": 0.425689395904541, "loss": 0.21595673263072968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55142302319284, "step_time": 0.392588674545288} +{"epoch": 0, "iter": 7725, "iter_tflops": 29.958808485813087, "iter_time": 0.6886486663818359, "loss": 0.8086689710617065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.826027680706122, "step_time": 0.648245948791504} +{"epoch": 0, "iter": 7726, "iter_tflops": 12.823960340936827, "iter_time": 1.6087926788330078, "loss": 0.7400531768798828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.88879936527264, "step_time": 1.298467746734619} +{"epoch": 0, "iter": 7727, "iter_tflops": 38.35819166948112, "iter_time": 0.5378536529541016, "loss": 0.6992291212081909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05385488107698, "step_time": 0.49058745193481434} +{"epoch": 0, "iter": 7728, "iter_tflops": 43.18231622741558, "iter_time": 0.47776718139648433, "loss": 0.7954143285751343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.91759148089337, "step_time": 0.4397304477691651} +{"epoch": 0, "iter": 7729, "iter_tflops": 22.057997305391957, "iter_time": 0.9353112716674804, "loss": 0.7786802649497986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.784026696369114, "step_time": 0.8674348449707032} +{"epoch": 0, "iter": 7730, "iter_tflops": 40.57082088485492, "iter_time": 0.5085204849243165, "loss": 0.9246309399604797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.30690393420859, "step_time": 0.46564060401916507} +{"epoch": 0, "iter": 7731, "iter_tflops": 44.411900980864765, "iter_time": 0.4645397529602051, "loss": 0.6919838190078735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86943147678984, "step_time": 0.43098680877685547} +{"epoch": 0, "iter": 7732, "iter_tflops": 46.22419209950465, "iter_time": 0.4463267517089844, "loss": 0.9327177405357361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49052346977968, "step_time": 0.4168695755004883} +{"epoch": 0, "iter": 7733, "iter_tflops": 35.560958874988756, "iter_time": 0.5801613388061525, "loss": 0.5568453073501587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.26274587322646, "step_time": 0.5391953201293945} +{"epoch": 0, "iter": 7734, "iter_tflops": 9.835366673256154, "iter_time": 2.0976435546875, "loss": 0.7011999487876892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.020244106170273, "step_time": 1.7163622741699218} +{"epoch": 0, "iter": 7735, "iter_tflops": 10.900832786690783, "iter_time": 1.892616271972656, "loss": 0.5490468144416809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.696641373700466, "step_time": 1.624925277709961} +{"epoch": 0, "iter": 7736, "iter_tflops": 28.85225055059161, "iter_time": 0.7150601119995117, "loss": 0.499489963054657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.161440965907374, "step_time": 0.6414853591918946} +{"epoch": 0, "iter": 7737, "iter_tflops": 19.94995635889669, "iter_time": 0.8293458557128908, "loss": 0.33984696865081787, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 21.424183682666015, "step_time": 0.7722774353027344} +{"epoch": 0, "iter": 7738, "iter_tflops": 6.204347159095517, "iter_time": 2.666745300292969, "loss": 0.4825625419616699, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 7.404526075148758, "step_time": 2.234500015258789} +{"epoch": 0, "iter": 7739, "iter_tflops": 11.188073176396063, "iter_time": 1.4788438873291014, "loss": 0.36426952481269836, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.184439783150063, "step_time": 1.2549197311401368} +{"epoch": 0, "iter": 7740, "iter_tflops": 25.237431258628096, "iter_time": 0.6555902404785157, "loss": 0.32490596175193787, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.184702909703418, "step_time": 0.6086295547485352} +{"epoch": 0, "iter": 7741, "iter_tflops": 15.934696826728947, "iter_time": 0.9971600341796876, "loss": 0.55417799949646, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 16.91521625072248, "step_time": 0.9393579483032225} +{"epoch": 0, "iter": 7742, "iter_tflops": 10.38500590583765, "iter_time": 1.5300369567871093, "loss": 0.558796763420105, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 12.234552697934598, "step_time": 1.2987350845336916} +{"epoch": 0, "iter": 7743, "iter_tflops": 23.442799769939914, "iter_time": 0.6777962951660156, "loss": 0.4092346429824829, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.292094893656603, "step_time": 0.6282375144958496} +{"epoch": 0, "iter": 7744, "iter_tflops": 25.605239865737797, "iter_time": 0.6205543441772461, "loss": 0.49210771918296814, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.468443832200304, "step_time": 0.5784617042541504} +{"epoch": 0, "iter": 7745, "iter_tflops": 31.50576434343732, "iter_time": 0.6548355178833007, "loss": 0.0016475511947646737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.71847674845413, "step_time": 0.5942395935058593} +{"epoch": 0, "iter": 7746, "iter_tflops": 10.4203655100254, "iter_time": 1.9798819427490235, "loss": 0.006164541468024254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.755219230848262, "step_time": 1.7550581665039062} +{"epoch": 0, "iter": 7747, "iter_tflops": 16.513291305330434, "iter_time": 1.2493628997802735, "loss": 0.002284263726323843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.5375188210945, "step_time": 1.0045562801361085} +{"epoch": 0, "iter": 7748, "iter_tflops": 54.708683313550054, "iter_time": 0.3771082077026367, "loss": 0.005379586014896631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.18691410646386, "step_time": 0.34278370666503905} +{"epoch": 0, "iter": 7749, "iter_tflops": 21.233224233045668, "iter_time": 0.7271229019165039, "loss": 0.17114253342151642, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 22.515902350941307, "step_time": 0.6857004165649414} +{"epoch": 0, "iter": 7750, "iter_tflops": 8.073949545698543, "iter_time": 1.9122194824218748, "loss": 0.30213630199432373, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.26846644618423, "step_time": 1.6657732658386228} +{"epoch": 0, "iter": 7751, "iter_tflops": 26.658056956015553, "iter_time": 0.5791556243896484, "loss": 0.27363160252571106, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 28.450118165931077, "step_time": 0.5426748504638672} +{"epoch": 0, "iter": 7752, "iter_tflops": 27.60456470078936, "iter_time": 0.5592974853515625, "loss": 0.29257088899612427, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.375280052527106, "step_time": 0.5255835380554199} +{"epoch": 0, "iter": 7753, "iter_tflops": 32.89390140721189, "iter_time": 0.6272011718749999, "loss": 0.20078332722187042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.20996405072588, "step_time": 0.5859447479248047} +{"epoch": 0, "iter": 7754, "iter_tflops": 12.440086321411451, "iter_time": 1.6584365234375003, "loss": 0.18465904891490936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.061648508419216, "step_time": 1.3697765884399415} +{"epoch": 0, "iter": 7755, "iter_tflops": 11.316497579468706, "iter_time": 1.823098831176758, "loss": 0.2495642602443695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.86735612612336, "step_time": 1.6033669471740724} +{"epoch": 0, "iter": 7756, "iter_tflops": 25.33365803507524, "iter_time": 0.8143748321533202, "loss": 0.19556188583374023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.69599000553736, "step_time": 0.6509054775238038} +{"epoch": 0, "iter": 7757, "iter_tflops": 16.739134851914145, "iter_time": 1.0842497711181642, "loss": 0.36165720224380493, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 17.720120134877938, "step_time": 1.0242257385253906} +{"epoch": 0, "iter": 7758, "iter_tflops": 12.556543843560771, "iter_time": 1.4454139099121093, "loss": 0.2515285909175873, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 16.077768895875284, "step_time": 1.1288508529663086} +{"epoch": 0, "iter": 7759, "iter_tflops": 33.19595266823362, "iter_time": 0.5467354202270508, "loss": 0.40265557169914246, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 35.30655134560998, "step_time": 0.5140519943237304} +{"epoch": 0, "iter": 7760, "iter_tflops": 30.95083606593363, "iter_time": 0.5863945999145508, "loss": 0.2320127636194229, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 32.82173335309803, "step_time": 0.5529690628051758} +{"epoch": 0, "iter": 7761, "iter_tflops": 45.63573691848958, "iter_time": 0.4520819625854492, "loss": 0.018944354727864265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.59034501875088, "step_time": 0.4078069343566894} +{"epoch": 0, "iter": 7762, "iter_tflops": 46.454295985342256, "iter_time": 0.4441159439086914, "loss": 0.024840986356139183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.670301220993984, "step_time": 0.4071634273529052} +{"epoch": 0, "iter": 7763, "iter_tflops": 53.90781165990226, "iter_time": 0.38271064758300777, "loss": 0.06623265147209167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.85361119766573, "step_time": 0.3505493221282959} +{"epoch": 0, "iter": 7764, "iter_tflops": 57.85716542931692, "iter_time": 0.3565866622924805, "loss": 0.027444833889603615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.18124824618296, "step_time": 0.3265382385253906} +{"epoch": 0, "iter": 7765, "iter_tflops": 39.519729718472284, "iter_time": 0.5220454101562501, "loss": 0.5628660321235657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.505706591642976, "step_time": 0.48537232208251957} +{"epoch": 0, "iter": 7766, "iter_tflops": 44.217124779013744, "iter_time": 0.4665860481262206, "loss": 0.7341630458831787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.4195804122074, "step_time": 0.41746800231933595} +{"epoch": 0, "iter": 7767, "iter_tflops": 46.98782066989172, "iter_time": 0.43907321548461914, "loss": 0.703333854675293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80908438786278, "step_time": 0.4060512752532959} +{"epoch": 0, "iter": 7768, "iter_tflops": 50.76329108391354, "iter_time": 0.4064175720214844, "loss": 0.7832221388816833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.737500930985824, "step_time": 0.3769096717834473} +{"epoch": 0, "iter": 7769, "iter_tflops": 40.68465133504394, "iter_time": 0.5070977096557617, "loss": 0.077665776014328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07205597157727, "step_time": 0.46812187576293945} +{"epoch": 0, "iter": 7770, "iter_tflops": 40.28670665356842, "iter_time": 0.5121067276000977, "loss": 0.09851699322462082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.084724628405915, "step_time": 0.4576071758270263} +{"epoch": 0, "iter": 7771, "iter_tflops": 43.084816910257054, "iter_time": 0.4788483505249023, "loss": 0.08699589967727661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70867462814197, "step_time": 0.4324390411376954} +{"epoch": 0, "iter": 7772, "iter_tflops": 40.97252720780697, "iter_time": 0.5035348052978516, "loss": 0.08386054635047913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8673487703755, "step_time": 0.45982421684265135} +{"epoch": 0, "iter": 7773, "iter_tflops": 31.69796017968308, "iter_time": 0.6508650207519531, "loss": 0.42551878094673157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.00715275671003, "step_time": 0.5893393745422363} +{"epoch": 0, "iter": 7774, "iter_tflops": 44.422110204715516, "iter_time": 0.4644329910278321, "loss": 0.3316456079483032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40063892005889, "step_time": 0.42625663566589356} +{"epoch": 0, "iter": 7775, "iter_tflops": 51.92888576995529, "iter_time": 0.3972951316833496, "loss": 0.32221338152885437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.450775462657354, "step_time": 0.3654705066680908} +{"epoch": 0, "iter": 7776, "iter_tflops": 48.05427270419539, "iter_time": 0.4293290138244628, "loss": 0.3081008195877075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77260734748217, "step_time": 0.39849438858032227} +{"epoch": 0, "iter": 7777, "iter_tflops": 33.097820443791434, "iter_time": 0.6233369216918946, "loss": 0.2369431108236313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.3435970905561, "step_time": 0.5837293090820314} +{"epoch": 0, "iter": 7778, "iter_tflops": 19.547988323260654, "iter_time": 1.0554075012207031, "loss": 0.25966504216194153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.319796358389365, "step_time": 0.8847029876708985} +{"epoch": 0, "iter": 7779, "iter_tflops": 50.49087947759635, "iter_time": 0.40861030197143566, "loss": 0.18467311561107635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.93400147418434, "step_time": 0.3755614547729492} +{"epoch": 0, "iter": 7780, "iter_tflops": 48.36175777987482, "iter_time": 0.4265993309020996, "loss": 0.16637390851974487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.86978003390271, "step_time": 0.39774785041809085} +{"epoch": 0, "iter": 7781, "iter_tflops": 15.843492008846065, "iter_time": 1.0158305969238282, "loss": 0.3828492760658264, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 16.541183180509837, "step_time": 0.9729838409423828} +{"epoch": 0, "iter": 7782, "iter_tflops": 12.752321145187036, "iter_time": 1.2620685882568359, "loss": 0.4669685363769531, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 16.309121044600747, "step_time": 0.9868284072875977} +{"epoch": 0, "iter": 7783, "iter_tflops": 23.125018955279415, "iter_time": 0.6959693298339843, "loss": 0.35958319902420044, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.032429756704367, "step_time": 0.6429381446838378} +{"epoch": 0, "iter": 7784, "iter_tflops": 24.597903714226778, "iter_time": 0.6542957534790039, "loss": 0.22650867700576782, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.4531098155291, "step_time": 0.6084087677001953} +{"epoch": 0, "iter": 7785, "iter_tflops": 19.439632904087688, "iter_time": 1.061290283203125, "loss": 0.5237383246421814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.028488088226556, "step_time": 0.9811020851135255} +{"epoch": 0, "iter": 7786, "iter_tflops": 13.72338616619113, "iter_time": 1.5033529815673827, "loss": 0.6391311883926392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.710398068480302, "step_time": 1.2346260948181151} +{"epoch": 0, "iter": 7787, "iter_tflops": 47.89469666280244, "iter_time": 0.43075945663452153, "loss": 0.604499101638794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.013851579174926, "step_time": 0.39664614105224605} +{"epoch": 0, "iter": 7788, "iter_tflops": 42.45283333405568, "iter_time": 0.48597683334350583, "loss": 0.6478134989738464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79057058250232, "step_time": 0.45055331802368165} +{"epoch": 0, "iter": 7789, "iter_tflops": 40.9540917799821, "iter_time": 0.5037614707946778, "loss": 0.07388128340244293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33446149601477, "step_time": 0.46535116958618167} +{"epoch": 0, "iter": 7790, "iter_tflops": 39.45489100703043, "iter_time": 0.5229033203125, "loss": 0.08954348415136337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53415215712581, "step_time": 0.47390594482421877} +{"epoch": 0, "iter": 7791, "iter_tflops": 45.674112853731415, "iter_time": 0.4517021179199219, "loss": 0.11493217945098877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30036897799457, "step_time": 0.41015789604187014} +{"epoch": 0, "iter": 7792, "iter_tflops": 40.40619392338603, "iter_time": 0.5105923500061035, "loss": 0.10415896028280258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32569312087862, "step_time": 0.46544322395324705} +{"epoch": 0, "iter": 7793, "iter_tflops": 20.124372850280643, "iter_time": 1.0251794509887695, "loss": 0.20431791245937347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.365011112642776, "step_time": 0.9656486206054687} +{"epoch": 0, "iter": 7794, "iter_tflops": 16.724090644209515, "iter_time": 1.2336152648925782, "loss": 0.26732245087623596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.593292822290124, "step_time": 1.0529671401977538} +{"epoch": 0, "iter": 7795, "iter_tflops": 38.102548746935035, "iter_time": 0.5414622955322266, "loss": 0.17657679319381714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69069736182435, "step_time": 0.49486083984374996} +{"epoch": 0, "iter": 7796, "iter_tflops": 43.84223597719461, "iter_time": 0.4705757598876953, "loss": 0.1453305333852768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74298402934379, "step_time": 0.43212827873229975} +{"epoch": 0, "iter": 7797, "iter_tflops": 24.594062741043665, "iter_time": 0.8388647994995119, "loss": 0.3984123468399048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.52925731711135, "step_time": 0.7776732406616211} +{"epoch": 0, "iter": 7798, "iter_tflops": 10.924173806889035, "iter_time": 1.8885724334716796, "loss": 0.5323470830917358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.482591097675458, "step_time": 1.4245443630218506} +{"epoch": 0, "iter": 7799, "iter_tflops": 39.296100049695546, "iter_time": 0.5250163116455078, "loss": 0.3921078145503998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.25636996348356, "step_time": 0.4769492568969726} +{"epoch": 0, "iter": 7800, "iter_tflops": 43.56743278624096, "iter_time": 0.4735439338684082, "loss": 0.28213971853256226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.799105442241974, "step_time": 0.4316209125518799} +{"epoch": 0, "iter": 7801, "iter_tflops": 18.504130199474147, "iter_time": 1.114945327758789, "loss": 0.4502858817577362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.745587115229963, "step_time": 1.0448457870483399} +{"epoch": 0, "iter": 7802, "iter_tflops": 29.440757491512848, "iter_time": 0.7007663955688477, "loss": 0.5708025693893433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.15708937885873, "step_time": 0.5552397632598877} +{"epoch": 0, "iter": 7803, "iter_tflops": 40.3280345230906, "iter_time": 0.5115819244384765, "loss": 0.41770005226135254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13989287965772, "step_time": 0.467402437210083} +{"epoch": 0, "iter": 7804, "iter_tflops": 46.24631171071051, "iter_time": 0.4461132736206055, "loss": 0.4808395504951477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45101994257765, "step_time": 0.40893313026428224} +{"epoch": 0, "iter": 7805, "iter_tflops": 20.12481511829092, "iter_time": 1.0251569213867187, "loss": 0.03780382126569748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.472628650811345, "step_time": 0.9608089370727538} +{"epoch": 0, "iter": 7806, "iter_tflops": 22.651001284804284, "iter_time": 0.9108247909545899, "loss": 0.07510582357645035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.56917132476027, "step_time": 0.8068737640380859} +{"epoch": 0, "iter": 7807, "iter_tflops": 48.68350117209535, "iter_time": 0.4237799873352051, "loss": 0.09042070060968399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.498404909009565, "step_time": 0.38563941383361816} +{"epoch": 0, "iter": 7808, "iter_tflops": 42.92142330391117, "iter_time": 0.4806712341308594, "loss": 0.03578659147024155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31095788546767, "step_time": 0.43607431411743164} +{"epoch": 0, "iter": 7809, "iter_tflops": 19.458600870839266, "iter_time": 1.0602557525634766, "loss": 0.22809091210365295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.84992208964405, "step_time": 0.9895045852661132} +{"epoch": 0, "iter": 7810, "iter_tflops": 34.89091421090371, "iter_time": 0.591302749633789, "loss": 0.2509690225124359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48989855848636, "step_time": 0.522439769744873} +{"epoch": 0, "iter": 7811, "iter_tflops": 40.021015254653925, "iter_time": 0.5155065002441406, "loss": 0.2582971155643463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93615144802121, "step_time": 0.46956988334655764} +{"epoch": 0, "iter": 7812, "iter_tflops": 45.55510580961801, "iter_time": 0.45288213348388673, "loss": 0.28989243507385254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96648618971005, "step_time": 0.4128986263275146} +{"epoch": 0, "iter": 7813, "iter_tflops": 16.778758755709116, "iter_time": 1.229595932006836, "loss": 0.03168868273496628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23263940137879, "step_time": 1.1315472793579102} +{"epoch": 0, "iter": 7814, "iter_tflops": 15.429599715774211, "iter_time": 1.3371113891601563, "loss": 0.03890707343816757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.442373487983428, "step_time": 1.0092318058013916} +{"epoch": 0, "iter": 7815, "iter_tflops": 53.688478092571344, "iter_time": 0.3842741355895996, "loss": 0.024499567225575447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.945036734953426, "step_time": 0.3500056095123292} +{"epoch": 0, "iter": 7816, "iter_tflops": 55.410638117005284, "iter_time": 0.3723309135437012, "loss": 0.022896548733115196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.36126572024824, "step_time": 0.341793586730957} +{"epoch": 0, "iter": 7817, "iter_tflops": 24.511319414134398, "iter_time": 0.8416965713500977, "loss": 0.8372920751571655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.77235214878096, "step_time": 0.8005126342773438} +{"epoch": 0, "iter": 7818, "iter_tflops": 12.034783720175348, "iter_time": 1.7142886810302738, "loss": 0.7998053431510925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.11104233387195, "step_time": 1.2057181034088136} +{"epoch": 0, "iter": 7819, "iter_tflops": 36.277597224078754, "iter_time": 0.5687006607055664, "loss": 0.8562139272689819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.38404304017817, "step_time": 0.5238439712524414} +{"epoch": 0, "iter": 7820, "iter_tflops": 40.49897179701336, "iter_time": 0.5094226493835449, "loss": 0.686643123626709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96613698186842, "step_time": 0.4692496299743652} +{"epoch": 0, "iter": 7821, "iter_tflops": 18.95061791410312, "iter_time": 1.0886765594482422, "loss": 0.07183783501386642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.097577888717527, "step_time": 1.0265462646484376} +{"epoch": 0, "iter": 7822, "iter_tflops": 14.672825972164697, "iter_time": 1.406074981689453, "loss": 0.07018107175827026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3283360799757, "step_time": 1.1905986480712891} +{"epoch": 0, "iter": 7823, "iter_tflops": 46.44490107441492, "iter_time": 0.4442057800292969, "loss": 0.030862506479024887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.585871016097805, "step_time": 0.407843002319336} +{"epoch": 0, "iter": 7824, "iter_tflops": 54.11182544168503, "iter_time": 0.38126774215698245, "loss": 0.034537605941295624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.38362032696221, "step_time": 0.3474206085205078} +{"epoch": 0, "iter": 7825, "iter_tflops": 21.541969902587464, "iter_time": 0.6939291610717775, "loss": 0.03043013997375965, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.872491685750987, "step_time": 0.6535624237060547} +{"epoch": 0, "iter": 7826, "iter_tflops": 10.694578444815633, "iter_time": 1.3977737579345701, "loss": 0.04075175151228905, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.317281469254517, "step_time": 1.1224964447021484} +{"epoch": 0, "iter": 7827, "iter_tflops": 38.55069119942419, "iter_time": 0.3877648010253906, "loss": 0.047358229756355286, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 42.25357347596577, "step_time": 0.3537831211090088} +{"epoch": 0, "iter": 7828, "iter_tflops": 42.46555319515403, "iter_time": 0.35201710510253903, "loss": 0.032809533178806305, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 46.451113634295645, "step_time": 0.3218136215209961} +{"epoch": 0, "iter": 7829, "iter_tflops": 54.31739417714134, "iter_time": 0.3798248023986816, "loss": 0.010778017342090607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.235096852398485, "step_time": 0.3425095100402832} +{"epoch": 0, "iter": 7830, "iter_tflops": 55.73959282517697, "iter_time": 0.37013355255126956, "loss": 0.0019525319803506136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.85112117832247, "step_time": 0.3335605421066284} +{"epoch": 0, "iter": 7831, "iter_tflops": 54.21764382928085, "iter_time": 0.38052360916137695, "loss": 0.007584882900118828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.607583444142946, "step_time": 0.3461152477264405} +{"epoch": 0, "iter": 7832, "iter_tflops": 52.4327176578841, "iter_time": 0.39347747802734373, "loss": 0.006337150000035763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.34877959270113, "step_time": 0.35974773406982424} +{"epoch": 0, "iter": 7833, "iter_tflops": 25.237425897051935, "iter_time": 0.8174801025390626, "loss": 0.8226880431175232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.778360422765054, "step_time": 0.7704390106201171} +{"epoch": 0, "iter": 7834, "iter_tflops": 15.177078149954918, "iter_time": 1.3593587188720704, "loss": 0.8229334950447083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.843459231444932, "step_time": 1.1562272338867188} +{"epoch": 0, "iter": 7835, "iter_tflops": 38.27174392930055, "iter_time": 0.5390685501098633, "loss": 0.9218307137489319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.920027815679425, "step_time": 0.492153621673584} +{"epoch": 0, "iter": 7836, "iter_tflops": 42.16979606948628, "iter_time": 0.4892386360168457, "loss": 0.9620947241783142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94458725877419, "step_time": 0.44904296112060554} +{"epoch": 0, "iter": 7837, "iter_tflops": 18.42231033608358, "iter_time": 1.1198971862792968, "loss": 0.21623146533966064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.719812986355162, "step_time": 1.0462114181518554} +{"epoch": 0, "iter": 7838, "iter_tflops": 23.704746521204697, "iter_time": 0.8703359680175782, "loss": 0.2573549449443817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.06063424182145, "step_time": 0.7099326648712159} +{"epoch": 0, "iter": 7839, "iter_tflops": 52.889300936722236, "iter_time": 0.3900806617736816, "loss": 0.21211156249046326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.56121430626622, "step_time": 0.3584200534820557} +{"epoch": 0, "iter": 7840, "iter_tflops": 48.94653452556515, "iter_time": 0.4215026397705078, "loss": 0.18596893548965454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38385672565503, "step_time": 0.38646689796447753} +{"epoch": 0, "iter": 7841, "iter_tflops": 40.400335894163355, "iter_time": 0.5106663856506348, "loss": 0.0774712860584259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.89441384262856, "step_time": 0.47001638031005866} +{"epoch": 0, "iter": 7842, "iter_tflops": 28.37720754883866, "iter_time": 0.727030433654785, "loss": 0.0923139750957489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.77606708513662, "step_time": 0.6492651672363281} +{"epoch": 0, "iter": 7843, "iter_tflops": 42.017643881907475, "iter_time": 0.49101024246215824, "loss": 0.08778230845928192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15801952684739, "step_time": 0.446966609954834} +{"epoch": 0, "iter": 7844, "iter_tflops": 41.433567384698776, "iter_time": 0.49793186569213865, "loss": 0.06669343262910843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.49293352216776, "step_time": 0.4535010585784912} +{"epoch": 0, "iter": 7845, "iter_tflops": 19.081738778998414, "iter_time": 1.0811956787109376, "loss": 0.559175431728363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.40126379137935, "step_time": 1.0112654647827148} +{"epoch": 0, "iter": 7846, "iter_tflops": 22.54641688849637, "iter_time": 0.9150497665405274, "loss": 0.6722847819328308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.133938193478592, "step_time": 0.8208460350036622} +{"epoch": 0, "iter": 7847, "iter_tflops": 43.60278169360463, "iter_time": 0.4731600303649902, "loss": 0.6242800354957581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.924762126637056, "step_time": 0.43966325187683103} +{"epoch": 0, "iter": 7848, "iter_tflops": 49.20140853986431, "iter_time": 0.4193191642761231, "loss": 0.8735975027084351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.333063433936424, "step_time": 0.3868349609375} +{"epoch": 0, "iter": 7849, "iter_tflops": 30.255720108066054, "iter_time": 0.6818906784057618, "loss": 0.46738705039024353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.080010830657976, "step_time": 0.6431136703491211} +{"epoch": 0, "iter": 7850, "iter_tflops": 17.806171186712163, "iter_time": 1.1586484985351562, "loss": 0.46596476435661316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.664318215805164, "step_time": 1.0491639366149903} +{"epoch": 0, "iter": 7851, "iter_tflops": 42.84092810535581, "iter_time": 0.48157438278198245, "loss": 0.5562644004821777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.204129956792116, "step_time": 0.44652054977416994} +{"epoch": 0, "iter": 7852, "iter_tflops": 50.67940790212393, "iter_time": 0.40709026336669923, "loss": 0.499558687210083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.732760231469875, "step_time": 0.37694231796264643} +{"epoch": 0, "iter": 7853, "iter_tflops": 30.91934805259781, "iter_time": 0.6672551269531249, "loss": 0.990576446056366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.87997845808827, "step_time": 0.6274667587280274} +{"epoch": 0, "iter": 7854, "iter_tflops": 35.90592446396617, "iter_time": 0.5745874481201172, "loss": 0.9084469676017761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17185271097683, "step_time": 0.45672453689575193} +{"epoch": 0, "iter": 7855, "iter_tflops": 42.45522709923903, "iter_time": 0.48594943237304694, "loss": 0.7881249189376831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.4729016674862, "step_time": 0.45370083618164064} +{"epoch": 0, "iter": 7856, "iter_tflops": 41.967499205420516, "iter_time": 0.49159692382812503, "loss": 0.6901819705963135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26404020599529, "step_time": 0.4557943439483643} +{"epoch": 0, "iter": 7857, "iter_tflops": 37.58348493680189, "iter_time": 0.5489404067993164, "loss": 0.5183976888656616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8934220879465, "step_time": 0.5045088539123534} +{"epoch": 0, "iter": 7858, "iter_tflops": 34.296416610976465, "iter_time": 0.6015524520874023, "loss": 0.517776608467102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.86378386757627, "step_time": 0.5448766975402832} +{"epoch": 0, "iter": 7859, "iter_tflops": 43.14820764640772, "iter_time": 0.47814485549926755, "loss": 0.5121160745620728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.054671793571984, "step_time": 0.43844941902160645} +{"epoch": 0, "iter": 7860, "iter_tflops": 42.7764336746475, "iter_time": 0.48230045700073243, "loss": 0.466757208108902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.567292005879395, "step_time": 0.4430382919311523} +{"epoch": 0, "iter": 7861, "iter_tflops": 35.682678895170156, "iter_time": 0.5781823043823242, "loss": 0.034139715135097504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67728848999946, "step_time": 0.5199723644256591} +{"epoch": 0, "iter": 7862, "iter_tflops": 37.58773996239104, "iter_time": 0.5488782653808595, "loss": 0.05437679961323738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00322042131403, "step_time": 0.49117885017395013} +{"epoch": 0, "iter": 7863, "iter_tflops": 46.25154846256698, "iter_time": 0.4460627632141113, "loss": 0.04680667072534561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86541610378768, "step_time": 0.4056015872955322} +{"epoch": 0, "iter": 7864, "iter_tflops": 41.99364381133555, "iter_time": 0.49129086303710934, "loss": 0.021328123286366463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.05587455680744, "step_time": 0.447957914352417} +{"epoch": 0, "iter": 7865, "iter_tflops": 21.454247543461012, "iter_time": 0.9616321182250975, "loss": 0.7092719078063965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.85138179293703, "step_time": 0.9028378982543946} +{"epoch": 0, "iter": 7866, "iter_tflops": 9.781006312739521, "iter_time": 2.109301727294922, "loss": 0.6740898489952087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.513467232981887, "step_time": 1.7919096908569336} +{"epoch": 0, "iter": 7867, "iter_tflops": 10.318383696548999, "iter_time": 1.9994501190185545, "loss": 0.7352126836776733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.927906426204048, "step_time": 1.4812774353027347} +{"epoch": 0, "iter": 7868, "iter_tflops": 47.927084506205084, "iter_time": 0.43046836090087887, "loss": 0.5606307983398438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81931633903057, "step_time": 0.3905975112915039} +{"epoch": 0, "iter": 7869, "iter_tflops": 23.996768086922366, "iter_time": 0.5940300216674804, "loss": 0.28654414415359497, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 25.783375636576917, "step_time": 0.5528678970336914} +{"epoch": 0, "iter": 7870, "iter_tflops": 24.72737605577408, "iter_time": 0.576478500366211, "loss": 0.5033748745918274, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 26.537766445651187, "step_time": 0.5371514854431152} +{"epoch": 0, "iter": 7871, "iter_tflops": 24.88272441491355, "iter_time": 0.5728794174194337, "loss": 0.5384923219680786, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 26.398108900607966, "step_time": 0.539993251800537} +{"epoch": 0, "iter": 7872, "iter_tflops": 26.193630515563342, "iter_time": 0.5442086639404297, "loss": 0.42146793007850647, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 27.85007479470712, "step_time": 0.5118406600952148} +{"epoch": 0, "iter": 7873, "iter_tflops": 30.790081654758588, "iter_time": 0.6700564727783204, "loss": 0.5381120443344116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.80440194878491, "step_time": 0.6289123497009278} +{"epoch": 0, "iter": 7874, "iter_tflops": 10.273633525123657, "iter_time": 2.0081593780517575, "loss": 0.5229642391204834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.76276432787044, "step_time": 1.753932403564453} +{"epoch": 0, "iter": 7875, "iter_tflops": 11.533708182563739, "iter_time": 1.7887649993896484, "loss": 0.6834017038345337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.38473844650723, "step_time": 1.5413893661499025} +{"epoch": 0, "iter": 7876, "iter_tflops": 24.452680158602096, "iter_time": 0.8437150192260742, "loss": 0.4370934069156647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.477595338534776, "step_time": 0.6554215240478515} +{"epoch": 0, "iter": 7877, "iter_tflops": 14.909079481754825, "iter_time": 1.1676246337890626, "loss": 0.3087266683578491, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 15.839165543680261, "step_time": 1.0990609588623046} +{"epoch": 0, "iter": 7878, "iter_tflops": 17.219913227746794, "iter_time": 1.0109347381591798, "loss": 0.3064970374107361, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 20.57242251258968, "step_time": 0.8461914710998535} +{"epoch": 0, "iter": 7879, "iter_tflops": 25.145749181838756, "iter_time": 0.6922922973632812, "loss": 0.3621656000614166, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 27.03121565113333, "step_time": 0.64400390625} +{"epoch": 0, "iter": 7880, "iter_tflops": 28.426270244828473, "iter_time": 0.6123986129760741, "loss": 0.4654458463191986, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 30.42796163121825, "step_time": 0.5721122131347656} +{"epoch": 0, "iter": 7881, "iter_tflops": 22.520766430663294, "iter_time": 0.9160919799804687, "loss": 0.7556313872337341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.068210508701494, "step_time": 0.8571926651000976} +{"epoch": 0, "iter": 7882, "iter_tflops": 9.913930461061765, "iter_time": 2.0810205993652344, "loss": 0.68949955701828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.79295938295682, "step_time": 1.6126912384033203} +{"epoch": 0, "iter": 7883, "iter_tflops": 14.99361272188383, "iter_time": 1.3759921569824216, "loss": 0.8058869242668152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.356615322309, "step_time": 1.1886587982177734} +{"epoch": 0, "iter": 7884, "iter_tflops": 36.57783924985196, "iter_time": 0.5640325927734374, "loss": 0.6867200136184692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79487880768629, "step_time": 0.5057275352478028} +{"epoch": 0, "iter": 7885, "iter_tflops": 14.069871695670237, "iter_time": 1.0827874145507814, "loss": 0.36013883352279663, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.922667904454848, "step_time": 1.0209085998535157} +{"epoch": 0, "iter": 7886, "iter_tflops": 8.119994230276626, "iter_time": 1.8761934509277343, "loss": 0.41698628664016724, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 9.003883475984045, "step_time": 1.6920121231079102} +{"epoch": 0, "iter": 7887, "iter_tflops": 6.97718458966673, "iter_time": 2.1834996337890624, "loss": 0.48958656191825867, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 8.810432054356724, "step_time": 1.7291637802124025} +{"epoch": 0, "iter": 7888, "iter_tflops": 22.532596390569207, "iter_time": 0.6761173782348633, "loss": 0.34072160720825195, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.19003001476143, "step_time": 0.6297916946411133} +{"epoch": 0, "iter": 7889, "iter_tflops": 19.175696358734733, "iter_time": 0.875676887512207, "loss": 0.3462476134300232, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 20.76636189703514, "step_time": 0.8086016311645509} +{"epoch": 0, "iter": 7890, "iter_tflops": 29.647644179099977, "iter_time": 0.5663759994506836, "loss": 0.3911108374595642, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 31.681120404058607, "step_time": 0.5300227355957031} +{"epoch": 0, "iter": 7891, "iter_tflops": 31.356101934822917, "iter_time": 0.5355166320800782, "loss": 0.25872930884361267, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 33.3787055471453, "step_time": 0.5030666656494142} +{"epoch": 0, "iter": 7892, "iter_tflops": 30.8043081418896, "iter_time": 0.5451092758178712, "loss": 0.3799230754375458, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 32.704514587811765, "step_time": 0.5134371910095215} +{"epoch": 0, "iter": 7893, "iter_tflops": 29.37226399064543, "iter_time": 0.7024005203247071, "loss": 0.08162132650613785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.466671594622966, "step_time": 0.6556490554809571} +{"epoch": 0, "iter": 7894, "iter_tflops": 24.730783299868673, "iter_time": 0.8342272567749023, "loss": 0.06845367699861526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.856440416470097, "step_time": 0.7406220321655272} +{"epoch": 0, "iter": 7895, "iter_tflops": 51.227167001971395, "iter_time": 0.4027373504638672, "loss": 0.10768269002437592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.9005619725728, "step_time": 0.3690677299499512} +{"epoch": 0, "iter": 7896, "iter_tflops": 54.350528368317924, "iter_time": 0.379593246459961, "loss": 0.09673628211021423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01376369566488, "step_time": 0.34959799575805667} +{"epoch": 0, "iter": 7897, "iter_tflops": 49.54006949696735, "iter_time": 0.41645265579223634, "loss": 0.03500031679868698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.31228198594734, "step_time": 0.37986055374145505} +{"epoch": 0, "iter": 7898, "iter_tflops": 47.26776351790234, "iter_time": 0.43647280883789064, "loss": 0.01914539374411106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9403387383444, "step_time": 0.3972075271606445} +{"epoch": 0, "iter": 7899, "iter_tflops": 56.27279961239796, "iter_time": 0.36662639236450195, "loss": 0.07007138431072235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.78948478400628, "step_time": 0.33389327621459963} +{"epoch": 0, "iter": 7900, "iter_tflops": 56.532258035288294, "iter_time": 0.36494373703002925, "loss": 0.08871611952781677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.7788851684928, "step_time": 0.33395056343078616} +{"epoch": 0, "iter": 7901, "iter_tflops": 23.853786786885912, "iter_time": 0.5515134887695312, "loss": 0.0039050444029271603, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 25.575147671592475, "step_time": 0.5143933219909669} +{"epoch": 0, "iter": 7902, "iter_tflops": 9.687313331535162, "iter_time": 1.3580323791503908, "loss": 0.006643067114055157, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 11.949171105311832, "step_time": 1.1009705238342284} +{"epoch": 0, "iter": 7903, "iter_tflops": 27.9051366058487, "iter_time": 0.4714431381225586, "loss": 0.0035496919881552458, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 30.976875026926944, "step_time": 0.4246937484741211} +{"epoch": 0, "iter": 7904, "iter_tflops": 31.208874499044885, "iter_time": 0.4215366744995117, "loss": 0.002011415781453252, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 34.60477917053564, "step_time": 0.3801696033477783} +{"epoch": 0, "iter": 7905, "iter_tflops": 20.934173942012983, "iter_time": 0.985522216796875, "loss": 0.5508870482444763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.598373358255124, "step_time": 0.9129459533691405} +{"epoch": 0, "iter": 7906, "iter_tflops": 14.807852115571237, "iter_time": 1.393253616333008, "loss": 0.7011198997497559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.888685350002774, "step_time": 1.1533040637969971} +{"epoch": 0, "iter": 7907, "iter_tflops": 38.28598288659789, "iter_time": 0.5388680648803711, "loss": 0.5657399892807007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.90857918136116, "step_time": 0.49228806877136233} +{"epoch": 0, "iter": 7908, "iter_tflops": 39.15654183232182, "iter_time": 0.5268875274658203, "loss": 0.5299376249313354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.944835304792484, "step_time": 0.48040918922424314} +{"epoch": 0, "iter": 7909, "iter_tflops": 38.18672138309154, "iter_time": 0.5402687835693358, "loss": 0.004535075277090073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53860496659794, "step_time": 0.48499694633483886} +{"epoch": 0, "iter": 7910, "iter_tflops": 38.67648716077617, "iter_time": 0.5334272842407226, "loss": 0.005001699086278677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1166187393663, "step_time": 0.4784951629638672} +{"epoch": 0, "iter": 7911, "iter_tflops": 40.35721514308458, "iter_time": 0.5112120208740234, "loss": 0.0020352888386696577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77781962444907, "step_time": 0.46074359321594244} +{"epoch": 0, "iter": 7912, "iter_tflops": 44.33726117365149, "iter_time": 0.4653217849731445, "loss": 0.02433960512280464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00286185219686, "step_time": 0.42101813507080077} +{"epoch": 0, "iter": 7913, "iter_tflops": 22.66831777364174, "iter_time": 0.9101290054321289, "loss": 0.2640779912471771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.612605295644627, "step_time": 0.8382328186035155} +{"epoch": 0, "iter": 7914, "iter_tflops": 16.954622028110965, "iter_time": 1.2168418426513674, "loss": 0.26785820722579956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.48814585545804, "step_time": 1.0069770908355715} +{"epoch": 0, "iter": 7915, "iter_tflops": 41.9495659535273, "iter_time": 0.4918070793151856, "loss": 0.25694915652275085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.867881226215935, "step_time": 0.4497939071655274} +{"epoch": 0, "iter": 7916, "iter_tflops": 39.59764836809049, "iter_time": 0.5210181503295899, "loss": 0.3025083839893341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11228051810579, "step_time": 0.4785433120727539} +{"epoch": 0, "iter": 7917, "iter_tflops": 18.942948871397114, "iter_time": 1.0891173095703124, "loss": 0.6931072473526001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.191778143742532, "step_time": 1.0217571411132813} +{"epoch": 0, "iter": 7918, "iter_tflops": 19.12096973153178, "iter_time": 1.078977363586426, "loss": 0.6762545704841614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.87248855982999, "step_time": 0.8294744396209717} +{"epoch": 0, "iter": 7919, "iter_tflops": 37.25313088145056, "iter_time": 0.5538083114624023, "loss": 0.5170067548751831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.76428056164463, "step_time": 0.506107141494751} +{"epoch": 0, "iter": 7920, "iter_tflops": 41.59381000700486, "iter_time": 0.49601355361938476, "loss": 0.6112107038497925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.577009355381044, "step_time": 0.4526644859313965} +{"epoch": 0, "iter": 7921, "iter_tflops": 33.19562974443585, "iter_time": 0.6215002899169922, "loss": 0.7781804800033569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.41870635338624, "step_time": 0.5664971542358399} +{"epoch": 0, "iter": 7922, "iter_tflops": 35.12677225939697, "iter_time": 0.5873324584960937, "loss": 0.7684099078178406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14445734260559, "step_time": 0.5408674011230469} +{"epoch": 0, "iter": 7923, "iter_tflops": 33.131545253314705, "iter_time": 0.6227024230957031, "loss": 0.8584994673728943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.991600321794415, "step_time": 0.5732196769714355} +{"epoch": 0, "iter": 7924, "iter_tflops": 37.48909827985594, "iter_time": 0.5503224792480469, "loss": 0.8571452498435974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.67307061570576, "step_time": 0.507242094039917} +{"epoch": 0, "iter": 7925, "iter_tflops": 27.0737038790536, "iter_time": 0.7620343933105469, "loss": 0.8133835196495056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.92487663672111, "step_time": 0.7132647018432617} +{"epoch": 0, "iter": 7926, "iter_tflops": 7.83089276113283, "iter_time": 2.6345774536132813, "loss": 0.7044632434844971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.356791932012126, "step_time": 2.2049323806762695} +{"epoch": 0, "iter": 7927, "iter_tflops": 11.228241424919718, "iter_time": 1.837428741455078, "loss": 0.9947130084037781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.295975789248677, "step_time": 1.4431399307250976} +{"epoch": 0, "iter": 7928, "iter_tflops": 43.96880775926512, "iter_time": 0.46922112655639653, "loss": 0.6069133281707764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.679539171881814, "step_time": 0.4327032909393311} +{"epoch": 0, "iter": 7929, "iter_tflops": 20.003574527317678, "iter_time": 0.7472964935302735, "loss": 0.2200401872396469, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 21.155409872558046, "step_time": 0.7066089096069336} +{"epoch": 0, "iter": 7930, "iter_tflops": 12.542237944970072, "iter_time": 1.1918607482910155, "loss": 0.43705588579177856, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.238378264171942, "step_time": 0.9205722923278808} +{"epoch": 0, "iter": 7931, "iter_tflops": 27.155456600737413, "iter_time": 0.5504824066162108, "loss": 0.4237927496433258, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.980685162055927, "step_time": 0.5158125495910645} +{"epoch": 0, "iter": 7932, "iter_tflops": 25.875582158891916, "iter_time": 0.5777107162475587, "loss": 0.23959937691688538, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.33950525910964, "step_time": 0.5467765769958497} +{"epoch": 0, "iter": 7933, "iter_tflops": 37.01761852504803, "iter_time": 0.5573317337036132, "loss": 0.6029474139213562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.02596752156754, "step_time": 0.5154427185058594} +{"epoch": 0, "iter": 7934, "iter_tflops": 31.862084114024587, "iter_time": 0.6475123672485352, "loss": 0.7793747186660767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.7265326103181, "step_time": 0.5941017417907715} +{"epoch": 0, "iter": 7935, "iter_tflops": 36.75460607900726, "iter_time": 0.5613199462890626, "loss": 0.975951611995697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.94513610183751, "step_time": 0.5164857482910156} +{"epoch": 0, "iter": 7936, "iter_tflops": 38.4307306913339, "iter_time": 0.5368384399414062, "loss": 0.9102928042411804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.756812282416064, "step_time": 0.4940773105621339} +{"epoch": 0, "iter": 7937, "iter_tflops": 20.141617716456803, "iter_time": 1.0243017120361328, "loss": 0.36455199122428894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87779623685311, "step_time": 0.9430151596069336} +{"epoch": 0, "iter": 7938, "iter_tflops": 44.470500254832686, "iter_time": 0.4639276237487792, "loss": 0.2864890396595001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.581111487967625, "step_time": 0.4246731472015381} +{"epoch": 0, "iter": 7939, "iter_tflops": 47.68370154331826, "iter_time": 0.4326655197143555, "loss": 0.4312238395214081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.51707748885113, "step_time": 0.40047096061706544} +{"epoch": 0, "iter": 7940, "iter_tflops": 48.10398504540554, "iter_time": 0.42888533020019526, "loss": 0.31572702527046204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03515339004515, "step_time": 0.39648376464843754} +{"epoch": 0, "iter": 7941, "iter_tflops": 19.29435142605569, "iter_time": 1.0692815246582033, "loss": 0.869692862033844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.10116095055306, "step_time": 1.02636328125} +{"epoch": 0, "iter": 7942, "iter_tflops": 12.831072607422232, "iter_time": 1.6079009246826172, "loss": 0.7562624216079712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.875233420848836, "step_time": 1.2225664081573486} +{"epoch": 0, "iter": 7943, "iter_tflops": 34.4932868517852, "iter_time": 0.5981190948486328, "loss": 0.7886684536933899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.43435746393644, "step_time": 0.5511272239685059} +{"epoch": 0, "iter": 7944, "iter_tflops": 41.912655301459516, "iter_time": 0.49224019241333006, "loss": 0.7492765188217163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.51157666978863, "step_time": 0.4533152885437011} +{"epoch": 0, "iter": 7945, "iter_tflops": 21.552454356601633, "iter_time": 0.9572503051757812, "loss": 0.8558090925216675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.837296543359926, "step_time": 0.9033947372436523} +{"epoch": 0, "iter": 7946, "iter_tflops": 19.6842233755827, "iter_time": 1.0481029968261717, "loss": 0.9242174625396729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.302945833763744, "step_time": 0.9250389461517334} +{"epoch": 0, "iter": 7947, "iter_tflops": 40.117778061886426, "iter_time": 0.5142631149291993, "loss": 0.6165598630905151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7750191426212, "step_time": 0.48231640624999994} +{"epoch": 0, "iter": 7948, "iter_tflops": 47.171234564773, "iter_time": 0.43736598587036135, "loss": 0.9052191376686096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.01246829479538, "step_time": 0.4044323711395264} +{"epoch": 0, "iter": 7949, "iter_tflops": 15.631820642422934, "iter_time": 1.3198138580322265, "loss": 0.6589282751083374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.16709153347532, "step_time": 1.2761165771484375} +{"epoch": 0, "iter": 7950, "iter_tflops": 15.617073716252802, "iter_time": 1.3210601348876954, "loss": 0.7064843773841858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.051058080117564, "step_time": 1.028927921295166} +{"epoch": 0, "iter": 7951, "iter_tflops": 45.40683562080793, "iter_time": 0.4543609619140625, "loss": 0.6276915669441223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.16919730852982, "step_time": 0.41959386444091795} +{"epoch": 0, "iter": 7952, "iter_tflops": 48.82797638705672, "iter_time": 0.4225260810852051, "loss": 0.6924992799758911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8094319250664, "step_time": 0.3906706199645996} +{"epoch": 0, "iter": 7953, "iter_tflops": 32.80855133536429, "iter_time": 0.6288328094482423, "loss": 0.03714590519666672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.978139054268944, "step_time": 0.5898282203674317} +{"epoch": 0, "iter": 7954, "iter_tflops": 13.217029623068113, "iter_time": 1.5609478149414062, "loss": 0.015429846942424774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.977558203538813, "step_time": 1.2151979255676268} +{"epoch": 0, "iter": 7955, "iter_tflops": 42.468080997890375, "iter_time": 0.48580234909057624, "loss": 0.02659556083381176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.895967863090256, "step_time": 0.37582165527343747} +{"epoch": 0, "iter": 7956, "iter_tflops": 53.22025889687029, "iter_time": 0.38765488815307614, "loss": 0.058377742767333984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.238132259395215, "step_time": 0.35425403785705567} +{"epoch": 0, "iter": 7957, "iter_tflops": 39.32657095467162, "iter_time": 0.524609519958496, "loss": 0.05584549903869629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65395250558567, "step_time": 0.4836853866577148} +{"epoch": 0, "iter": 7958, "iter_tflops": 17.30818569462804, "iter_time": 1.1919847564697266, "loss": 0.08897065371274948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.76182801611102, "step_time": 1.0996313095092773} +{"epoch": 0, "iter": 7959, "iter_tflops": 48.69401963337132, "iter_time": 0.42368844604492184, "loss": 0.08301049470901489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.00543753436338, "step_time": 0.38201881980895996} +{"epoch": 0, "iter": 7960, "iter_tflops": 51.686214709952765, "iter_time": 0.3991604652404785, "loss": 0.1153828352689743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.33325846931791, "step_time": 0.36623291587829593} +{"epoch": 0, "iter": 7961, "iter_tflops": 29.594249533016093, "iter_time": 0.6971318359374999, "loss": 0.6088429093360901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.49145991665402, "step_time": 0.6551329650878908} +{"epoch": 0, "iter": 7962, "iter_tflops": 19.14700868034775, "iter_time": 1.077510009765625, "loss": 0.5047914981842041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.266987464618726, "step_time": 0.8867109909057618} +{"epoch": 0, "iter": 7963, "iter_tflops": 38.89577195782858, "iter_time": 0.5304199523925781, "loss": 0.4714886248111725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66241751250043, "step_time": 0.4835894145965576} +{"epoch": 0, "iter": 7964, "iter_tflops": 43.380697230498264, "iter_time": 0.4755823402404785, "loss": 0.5570463538169861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.382626875029736, "step_time": 0.43541472625732425} +{"epoch": 0, "iter": 7965, "iter_tflops": 17.242202929951663, "iter_time": 1.1965462646484375, "loss": 0.020832229405641556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.1724853975888, "step_time": 1.1352928924560548} +{"epoch": 0, "iter": 7966, "iter_tflops": 19.304716117832353, "iter_time": 1.0687074279785156, "loss": 0.023636458441615105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.088589117605167, "step_time": 0.7908090934753418} +{"epoch": 0, "iter": 7967, "iter_tflops": 44.558973835441456, "iter_time": 0.46300647735595707, "loss": 0.029237858951091766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94393748592653, "step_time": 0.4215250053405762} +{"epoch": 0, "iter": 7968, "iter_tflops": 43.167859707095246, "iter_time": 0.47792718124389655, "loss": 0.035646986216306686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56883744558153, "step_time": 0.43371027374267573} +{"epoch": 0, "iter": 7969, "iter_tflops": 29.92859359848122, "iter_time": 0.6893439025878907, "loss": 0.37809067964553833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.79653192372306, "step_time": 0.6290632667541505} +{"epoch": 0, "iter": 7970, "iter_tflops": 47.100996883767294, "iter_time": 0.43801819229125977, "loss": 0.5955992341041565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26236791051094, "step_time": 0.40246079826354975} +{"epoch": 0, "iter": 7971, "iter_tflops": 46.6437072333822, "iter_time": 0.44231247329711915, "loss": 0.47030654549598694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.70968747153786, "step_time": 0.4068471832275391} +{"epoch": 0, "iter": 7972, "iter_tflops": 43.17300901026569, "iter_time": 0.47787017822265626, "loss": 0.4114012122154236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19628643733467, "step_time": 0.446596363067627} +{"epoch": 0, "iter": 7973, "iter_tflops": 24.57388274125455, "iter_time": 0.8395536727905273, "loss": 0.7518137693405151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.76340952085801, "step_time": 0.8007904968261718} +{"epoch": 0, "iter": 7974, "iter_tflops": 14.688530277936177, "iter_time": 1.4045716705322266, "loss": 0.7553604245185852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.57126454709067, "step_time": 1.1741382331848145} +{"epoch": 0, "iter": 7975, "iter_tflops": 41.248089496292316, "iter_time": 0.5001708869934082, "loss": 1.0010974407196045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89697611683871, "step_time": 0.45952078056335455} +{"epoch": 0, "iter": 7976, "iter_tflops": 42.63941588503018, "iter_time": 0.48385028457641605, "loss": 0.8447892665863037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.139168639511745, "step_time": 0.44714922523498535} +{"epoch": 0, "iter": 7977, "iter_tflops": 19.69767424008185, "iter_time": 1.0473872833251954, "loss": 0.001261663157492876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.196828671468158, "step_time": 0.9733103866577149} +{"epoch": 0, "iter": 7978, "iter_tflops": 12.332700875122502, "iter_time": 1.672877151489258, "loss": 0.0005513695068657398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.625924198637737, "step_time": 1.1704971199035643} +{"epoch": 0, "iter": 7979, "iter_tflops": 42.06954469502507, "iter_time": 0.4904044876098632, "loss": 0.007491914555430412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6178337351352, "step_time": 0.4425579624176026} +{"epoch": 0, "iter": 7980, "iter_tflops": 42.54972088905596, "iter_time": 0.48487024307250975, "loss": 0.0018314968328922987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.233019287178024, "step_time": 0.43679387474060055} +{"epoch": 0, "iter": 7981, "iter_tflops": 22.940463662507728, "iter_time": 0.8271300048828125, "loss": 0.1907731145620346, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 24.826086399609935, "step_time": 0.7643067665100098} +{"epoch": 0, "iter": 7982, "iter_tflops": 14.505443679841417, "iter_time": 1.308112060546875, "loss": 0.1595337986946106, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 17.413324559244742, "step_time": 1.089668188095093} +{"epoch": 0, "iter": 7983, "iter_tflops": 45.17048873193678, "iter_time": 0.4200695266723633, "loss": 0.15408171713352203, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 49.50180655045184, "step_time": 0.383314208984375} +{"epoch": 0, "iter": 7984, "iter_tflops": 46.98613061546014, "iter_time": 0.40383716583251955, "loss": 0.1373164802789688, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 50.89842811986605, "step_time": 0.37279630279541015} +{"epoch": 0, "iter": 7985, "iter_tflops": 43.312746787768596, "iter_time": 0.47632844924926754, "loss": 0.021206578239798546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43386847422594, "step_time": 0.4349443588256836} +{"epoch": 0, "iter": 7986, "iter_tflops": 39.52681011769069, "iter_time": 0.5219518966674804, "loss": 0.01851361244916916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78146242852919, "step_time": 0.4712289714813233} +{"epoch": 0, "iter": 7987, "iter_tflops": 44.7349146340187, "iter_time": 0.46118548965454104, "loss": 0.035198576748371124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.53436923993596, "step_time": 0.41650057983398436} +{"epoch": 0, "iter": 7988, "iter_tflops": 44.6872156727773, "iter_time": 0.46167775726318355, "loss": 0.05870869383215904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86164625900031, "step_time": 0.42223492431640625} +{"epoch": 0, "iter": 7989, "iter_tflops": 21.88884913947992, "iter_time": 0.94253897857666, "loss": 0.050458818674087524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.55035764080628, "step_time": 0.8760416221618652} +{"epoch": 0, "iter": 7990, "iter_tflops": 14.660592018092686, "iter_time": 1.4072483215332032, "loss": 0.0634055957198143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.429875531269023, "step_time": 1.1836626987457273} +{"epoch": 0, "iter": 7991, "iter_tflops": 40.46945779932902, "iter_time": 0.5097941665649415, "loss": 0.05289435014128685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.581773375714896, "step_time": 0.46276969146728514} +{"epoch": 0, "iter": 7992, "iter_tflops": 45.379370563337645, "iter_time": 0.45463595581054694, "loss": 0.07546529918909073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6112776945098, "step_time": 0.4158549118041992} +{"epoch": 0, "iter": 7993, "iter_tflops": 34.11707569779797, "iter_time": 0.6047145919799805, "loss": 0.45194342732429504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.73311188517337, "step_time": 0.5467636375427246} +{"epoch": 0, "iter": 7994, "iter_tflops": 41.59119446363859, "iter_time": 0.4960447463989258, "loss": 0.3436713218688965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.680784946490114, "step_time": 0.4516361427307129} +{"epoch": 0, "iter": 7995, "iter_tflops": 42.89814416864167, "iter_time": 0.48093207550048833, "loss": 0.38460299372673035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.96357168093475, "step_time": 0.4392999248504639} +{"epoch": 0, "iter": 7996, "iter_tflops": 43.355815925753724, "iter_time": 0.47585527038574227, "loss": 0.41420283913612366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.30518203519862, "step_time": 0.4361275577545166} +{"epoch": 0, "iter": 7997, "iter_tflops": 27.27934976077342, "iter_time": 0.5884789199829101, "loss": 0.027600498870015144, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.44201920944729, "step_time": 0.5273409156799316} +{"epoch": 0, "iter": 7998, "iter_tflops": 29.67415238479025, "iter_time": 0.5409867172241211, "loss": 0.04092378169298172, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 32.971612197941575, "step_time": 0.4868831462860107} +{"epoch": 0, "iter": 7999, "iter_tflops": 29.046590514301943, "iter_time": 0.5526749267578126, "loss": 0.056978799402713776, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 31.809905178472604, "step_time": 0.5046642608642578} +{"epoch": 0, "iter": 8000, "iter_tflops": 36.10289039849849, "iter_time": 0.4446547660827637, "loss": 0.03274141624569893, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 39.68364109033567, "step_time": 0.40453249359130855} +{"epoch": 0, "iter": 8001, "iter_tflops": 16.552627278877132, "iter_time": 1.2463938903808593, "loss": 0.29063886404037476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.446886452329547, "step_time": 1.1825086135864258} +{"epoch": 0, "iter": 8002, "iter_tflops": 17.640927957527282, "iter_time": 1.1695016021728517, "loss": 0.23187392950057983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.528394580147413, "step_time": 0.9583201122283935} +{"epoch": 0, "iter": 8003, "iter_tflops": 50.67951190513216, "iter_time": 0.4070894279479981, "loss": 0.23181019723415375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.6738321286052, "step_time": 0.37734859085083006} +{"epoch": 0, "iter": 8004, "iter_tflops": 50.1682888429573, "iter_time": 0.411237735748291, "loss": 0.25145137310028076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.60427713104833, "step_time": 0.37782925796508793} +{"epoch": 0, "iter": 8005, "iter_tflops": 39.5549933619323, "iter_time": 0.5215800018310548, "loss": 0.4086969792842865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.679070488951766, "step_time": 0.4834007225036621} +{"epoch": 0, "iter": 8006, "iter_tflops": 23.138296882400105, "iter_time": 0.8916427001953124, "loss": 0.38203293085098267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.408596399694282, "step_time": 0.7262271327972413} +{"epoch": 0, "iter": 8007, "iter_tflops": 46.882795434871845, "iter_time": 0.44005681228637694, "loss": 0.40815064311027527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.47175114854586, "step_time": 0.4087651615142822} +{"epoch": 0, "iter": 8008, "iter_tflops": 45.71942184288292, "iter_time": 0.4512544708251953, "loss": 0.401386022567749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.70760104858125, "step_time": 0.41504906845092776} +{"epoch": 0, "iter": 8009, "iter_tflops": 20.44083013474604, "iter_time": 1.009308006286621, "loss": 0.341721773147583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.36692126257405, "step_time": 0.9655622940063477} +{"epoch": 0, "iter": 8010, "iter_tflops": 23.171250112510602, "iter_time": 0.890374641418457, "loss": 0.31729215383529663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.588160786023362, "step_time": 0.7216656455993653} +{"epoch": 0, "iter": 8011, "iter_tflops": 36.56032237175601, "iter_time": 0.564302833557129, "loss": 0.3563641905784607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.83830660658064, "step_time": 0.5178707447052001} +{"epoch": 0, "iter": 8012, "iter_tflops": 41.14253460312166, "iter_time": 0.5014541206359863, "loss": 0.4848249852657318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93725231303023, "step_time": 0.459108922958374} +{"epoch": 0, "iter": 8013, "iter_tflops": 17.844395082141006, "iter_time": 1.1561665954589844, "loss": 0.17407886683940887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.150243972549532, "step_time": 1.0773279724121094} +{"epoch": 0, "iter": 8014, "iter_tflops": 22.738883121865403, "iter_time": 0.9073046112060547, "loss": 0.2564099133014679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.021548711162982, "step_time": 0.7362581462860107} +{"epoch": 0, "iter": 8015, "iter_tflops": 48.90632749903261, "iter_time": 0.42184916687011725, "loss": 0.2865789532661438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.907588321757785, "step_time": 0.3899458312988281} +{"epoch": 0, "iter": 8016, "iter_tflops": 52.455494465505716, "iter_time": 0.39330662536621097, "loss": 0.27264028787612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.750922259840955, "step_time": 0.3635375900268555} +{"epoch": 0, "iter": 8017, "iter_tflops": 25.92679411119316, "iter_time": 0.7957441024780274, "loss": 0.9110543727874756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.374368044390152, "step_time": 0.7536646499633789} +{"epoch": 0, "iter": 8018, "iter_tflops": 16.118128461321998, "iter_time": 1.2799931182861326, "loss": 0.906182587146759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.388288523765492, "step_time": 0.9215127582550048} +{"epoch": 0, "iter": 8019, "iter_tflops": 42.55573901929406, "iter_time": 0.4848016738891602, "loss": 0.7523903846740723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88991629771829, "step_time": 0.44957792854309075} +{"epoch": 0, "iter": 8020, "iter_tflops": 41.822695117199146, "iter_time": 0.49329899597167964, "loss": 0.9737573266029358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.98469676090921, "step_time": 0.45862471008300787} +{"epoch": 0, "iter": 8021, "iter_tflops": 21.63524645077245, "iter_time": 0.953587173461914, "loss": 0.891055166721344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.640303696505878, "step_time": 0.9112551574707032} +{"epoch": 0, "iter": 8022, "iter_tflops": 14.65021293225829, "iter_time": 1.4082453002929687, "loss": 0.7093361020088196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.949060353045446, "step_time": 1.0341887359619142} +{"epoch": 0, "iter": 8023, "iter_tflops": 41.313377065676086, "iter_time": 0.49938046646118167, "loss": 0.8732090592384338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57923449106613, "step_time": 0.46279604721069334} +{"epoch": 0, "iter": 8024, "iter_tflops": 43.610685941828464, "iter_time": 0.4730742721557617, "loss": 0.7877342104911804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77980328831294, "step_time": 0.4410256576538085} +{"epoch": 0, "iter": 8025, "iter_tflops": 41.17021021010466, "iter_time": 0.501117031097412, "loss": 0.37532591819763184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.859426183427985, "step_time": 0.4599054260253906} +{"epoch": 0, "iter": 8026, "iter_tflops": 36.128364409295166, "iter_time": 0.5710497512817383, "loss": 0.4757968485355377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.375539290314855, "step_time": 0.5109800109863282} +{"epoch": 0, "iter": 8027, "iter_tflops": 43.54904726084468, "iter_time": 0.47374385452270507, "loss": 0.4813043475151062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49675943106692, "step_time": 0.4343684444427491} +{"epoch": 0, "iter": 8028, "iter_tflops": 44.437618143458856, "iter_time": 0.4642709121704101, "loss": 0.46194151043891907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.43874466378914, "step_time": 0.425921308517456} +{"epoch": 0, "iter": 8029, "iter_tflops": 32.11486376272379, "iter_time": 0.6424157257080079, "loss": 0.2608398199081421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.54690496823297, "step_time": 0.5803907127380371} +{"epoch": 0, "iter": 8030, "iter_tflops": 45.21577781470734, "iter_time": 0.45628084945678704, "loss": 0.44419410824775696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.26282573215656, "step_time": 0.41879638862609864} +{"epoch": 0, "iter": 8031, "iter_tflops": 48.976928802104226, "iter_time": 0.4212410621643067, "loss": 0.3376006782054901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.22573066946827, "step_time": 0.3876150360107422} +{"epoch": 0, "iter": 8032, "iter_tflops": 44.33542969452649, "iter_time": 0.465341007232666, "loss": 0.27067825198173523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86413247380005, "step_time": 0.4310345230102539} +{"epoch": 0, "iter": 8033, "iter_tflops": 35.19232411646481, "iter_time": 0.5862384490966797, "loss": 0.3430449962615967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.81716403649105, "step_time": 0.5455484046936036} +{"epoch": 0, "iter": 8034, "iter_tflops": 14.649481813451724, "iter_time": 1.4083155822753906, "loss": 0.5221582055091858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.214587030482715, "step_time": 1.1984657821655273} +{"epoch": 0, "iter": 8035, "iter_tflops": 48.35506860999354, "iter_time": 0.4266583442687988, "loss": 0.61165452003479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51178843406201, "step_time": 0.39288499069213867} +{"epoch": 0, "iter": 8036, "iter_tflops": 51.87775793884258, "iter_time": 0.39768668365478516, "loss": 0.5740784406661987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.06062718295741, "step_time": 0.3680139617919922} +{"epoch": 0, "iter": 8037, "iter_tflops": 43.64505265653161, "iter_time": 0.4727017669677735, "loss": 0.6426875591278076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.36258266601505, "step_time": 0.43559899711608885} +{"epoch": 0, "iter": 8038, "iter_tflops": 23.687245107146133, "iter_time": 0.8709790191650391, "loss": 0.6735321879386902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.47982292741966, "step_time": 0.7244108772277832} +{"epoch": 0, "iter": 8039, "iter_tflops": 36.6447649156289, "iter_time": 0.5630024795532227, "loss": 0.8151776790618896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27454890087124, "step_time": 0.5122613182067871} +{"epoch": 0, "iter": 8040, "iter_tflops": 41.10697394619276, "iter_time": 0.5018879165649414, "loss": 0.6109914779663086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.84130000485814, "step_time": 0.4600913333892822} +{"epoch": 0, "iter": 8041, "iter_tflops": 19.635357437594454, "iter_time": 1.0507113800048828, "loss": 0.8108762502670288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.19796106835339, "step_time": 0.9732583923339843} +{"epoch": 0, "iter": 8042, "iter_tflops": 21.99109506080767, "iter_time": 0.9381567153930664, "loss": 0.6737443208694458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.769111895674072, "step_time": 0.8329363441467286} +{"epoch": 0, "iter": 8043, "iter_tflops": 43.74851323742933, "iter_time": 0.4715838775634766, "loss": 0.68226158618927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33190305489106, "step_time": 0.43588134384155275} +{"epoch": 0, "iter": 8044, "iter_tflops": 43.58706434028338, "iter_time": 0.4733306503295899, "loss": 0.8369672894477844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.86851831879952, "step_time": 0.44019086265563967} +{"epoch": 0, "iter": 8045, "iter_tflops": 28.124253946918433, "iter_time": 0.733569450378418, "loss": 0.682317316532135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.6693804755224, "step_time": 0.6953665084838867} +{"epoch": 0, "iter": 8046, "iter_tflops": 10.779803614831659, "iter_time": 1.913865432739258, "loss": 0.6545200347900391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.901342234688967, "step_time": 1.4841080207824706} +{"epoch": 0, "iter": 8047, "iter_tflops": 38.0716244401937, "iter_time": 0.5419021072387696, "loss": 0.7620310187339783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78094224272561, "step_time": 0.49379196357727057} +{"epoch": 0, "iter": 8048, "iter_tflops": 38.37818658185731, "iter_time": 0.5375734329223633, "loss": 0.6647232174873352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.885355837392225, "step_time": 0.4925610179901123} +{"epoch": 0, "iter": 8049, "iter_tflops": 17.415089602343556, "iter_time": 1.1846676635742186, "loss": 0.49786263704299927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.505744249428307, "step_time": 1.1148480834960939} +{"epoch": 0, "iter": 8050, "iter_tflops": 15.2339685245335, "iter_time": 1.3542822723388672, "loss": 0.6573606133460999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.775411611114208, "step_time": 1.0988357505798338} +{"epoch": 0, "iter": 8051, "iter_tflops": 38.19635702946164, "iter_time": 0.5401324920654297, "loss": 0.7048787474632263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65729873765104, "step_time": 0.49525759315490725} +{"epoch": 0, "iter": 8052, "iter_tflops": 39.25866408575836, "iter_time": 0.5255169525146484, "loss": 0.858077347278595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91409431485288, "step_time": 0.480753324508667} +{"epoch": 0, "iter": 8053, "iter_tflops": 12.315115090753459, "iter_time": 0.5807551193237304, "loss": 0.011032985523343086, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 13.764526736793952, "step_time": 0.5196013107299804} +{"epoch": 0, "iter": 8054, "iter_tflops": 14.71269292527933, "iter_time": 0.4861153678894043, "loss": 0.02127203531563282, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 16.297174702765748, "step_time": 0.43885313034057616} +{"epoch": 0, "iter": 8055, "iter_tflops": 14.082695758788551, "iter_time": 0.5078620071411133, "loss": 0.08150273561477661, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 15.535073308152242, "step_time": 0.46038187217712406} +{"epoch": 0, "iter": 8056, "iter_tflops": 15.706274739536179, "iter_time": 0.45536362075805664, "loss": 0.04879201203584671, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 17.256728701984102, "step_time": 0.4144508647918701} +{"epoch": 0, "iter": 8057, "iter_tflops": 33.34404483131783, "iter_time": 0.6187339782714844, "loss": 0.5664854645729065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.625808242254614, "step_time": 0.5632938766479492} +{"epoch": 0, "iter": 8058, "iter_tflops": 37.42535903900855, "iter_time": 0.5512597351074219, "loss": 0.8073731064796448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9225058998531, "step_time": 0.504150297164917} +{"epoch": 0, "iter": 8059, "iter_tflops": 37.11554587651605, "iter_time": 0.5558612442016602, "loss": 0.8835790157318115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.510917992692974, "step_time": 0.5092724266052246} +{"epoch": 0, "iter": 8060, "iter_tflops": 37.858010392164346, "iter_time": 0.5449597930908203, "loss": 0.8443818688392639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.072625347755974, "step_time": 0.5023076400756836} +{"epoch": 0, "iter": 8061, "iter_tflops": 32.72162793325973, "iter_time": 0.6305032730102539, "loss": 0.11340834200382233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.510835567247966, "step_time": 0.5650676898956299} +{"epoch": 0, "iter": 8062, "iter_tflops": 38.12972288271927, "iter_time": 0.5410764083862305, "loss": 0.17767807841300964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.57799275402714, "step_time": 0.48454828834533686} +{"epoch": 0, "iter": 8063, "iter_tflops": 40.52298327623802, "iter_time": 0.5091207962036133, "loss": 0.10414592921733856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.364501877306374, "step_time": 0.46503606796264657} +{"epoch": 0, "iter": 8064, "iter_tflops": 44.61858089281303, "iter_time": 0.46238793563842767, "loss": 0.08109161257743835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62184577036266, "step_time": 0.4243173656463623} +{"epoch": 0, "iter": 8065, "iter_tflops": 21.64757075963941, "iter_time": 0.9530442810058595, "loss": 0.05152449384331703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.20211454409596, "step_time": 0.889190227508545} +{"epoch": 0, "iter": 8066, "iter_tflops": 20.452574454850968, "iter_time": 1.0087284393310545, "loss": 0.02034078538417816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.266736064931845, "step_time": 0.8165318012237549} +{"epoch": 0, "iter": 8067, "iter_tflops": 53.60595692484033, "iter_time": 0.38486568832397466, "loss": 0.04259856045246124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.73662678723779, "step_time": 0.3512475032806396} +{"epoch": 0, "iter": 8068, "iter_tflops": 52.68362433456797, "iter_time": 0.39160353469848636, "loss": 0.04227062687277794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.45223304163827, "step_time": 0.3590999412536621} +{"epoch": 0, "iter": 8069, "iter_tflops": 43.438021902330675, "iter_time": 0.474954719543457, "loss": 0.007000667508691549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63065385637428, "step_time": 0.4331473922729492} +{"epoch": 0, "iter": 8070, "iter_tflops": 9.08242088873446, "iter_time": 2.2715412292480464, "loss": 0.0063969953916966915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.234709948368383, "step_time": 1.836370819091797} +{"epoch": 0, "iter": 8071, "iter_tflops": 11.79020879296177, "iter_time": 1.7498497161865234, "loss": 0.00491897389292717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.310829474246539, "step_time": 1.44164204788208} +{"epoch": 0, "iter": 8072, "iter_tflops": 43.214764617510696, "iter_time": 0.47740844345092776, "loss": 0.003858281299471855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.16816866034287, "step_time": 0.42831384468078615} +{"epoch": 0, "iter": 8073, "iter_tflops": 11.47948337714345, "iter_time": 1.3235610809326173, "loss": 0.36967939138412476, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.071726451252875, "step_time": 1.2586267166137695} +{"epoch": 0, "iter": 8074, "iter_tflops": 13.550785561495143, "iter_time": 1.1212484588623046, "loss": 0.49434757232666016, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 16.779229255176908, "step_time": 0.905512237548828} +{"epoch": 0, "iter": 8075, "iter_tflops": 26.342278831132095, "iter_time": 0.5767837142944335, "loss": 0.5853064060211182, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.008182127452287, "step_time": 0.5424771003723144} +{"epoch": 0, "iter": 8076, "iter_tflops": 28.15663441361785, "iter_time": 0.539616958618164, "loss": 0.3472636938095093, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.879239281391786, "step_time": 0.5085068359375} +{"epoch": 0, "iter": 8077, "iter_tflops": 23.888062800731277, "iter_time": 0.8636570358276368, "loss": 0.7713958024978638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.00700588382534, "step_time": 0.8250125427246093} +{"epoch": 0, "iter": 8078, "iter_tflops": 13.427789884509885, "iter_time": 1.536447448730469, "loss": 0.587635338306427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.645316612190005, "step_time": 1.1692107295989989} +{"epoch": 0, "iter": 8079, "iter_tflops": 34.15647918800863, "iter_time": 0.6040169830322265, "loss": 0.5530368685722351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.71686488383454, "step_time": 0.5469991626739502} +{"epoch": 0, "iter": 8080, "iter_tflops": 39.50397847839685, "iter_time": 0.5222535629272461, "loss": 0.4596419930458069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74290103650701, "step_time": 0.4826788311004639} +{"epoch": 0, "iter": 8081, "iter_tflops": 23.57635004488307, "iter_time": 0.8750758056640625, "loss": 0.6467541456222534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.443376100253545, "step_time": 0.8108630485534669} +{"epoch": 0, "iter": 8082, "iter_tflops": 14.708098198302762, "iter_time": 1.4027030029296874, "loss": 0.51207035779953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.05594780763401, "step_time": 1.2096128425598145} +{"epoch": 0, "iter": 8083, "iter_tflops": 45.82746773590465, "iter_time": 0.45019056320190426, "loss": 0.6568010449409485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.732767827855035, "step_time": 0.41483903694152835} +{"epoch": 0, "iter": 8084, "iter_tflops": 50.141193943658855, "iter_time": 0.41145995712280276, "loss": 0.6336233019828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.38936331511021, "step_time": 0.3793222103118896} +{"epoch": 0, "iter": 8085, "iter_tflops": 33.806765151366044, "iter_time": 0.6102652359008789, "loss": 0.12867194414138794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.21517791304841, "step_time": 0.5696808547973633} +{"epoch": 0, "iter": 8086, "iter_tflops": 16.987983657875425, "iter_time": 1.214452163696289, "loss": 0.2393442690372467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.6964650314662, "step_time": 0.9090002994537354} +{"epoch": 0, "iter": 8087, "iter_tflops": 34.46635271363386, "iter_time": 0.5985865020751954, "loss": 0.2003360390663147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.676681954468584, "step_time": 0.5475825481414796} +{"epoch": 0, "iter": 8088, "iter_tflops": 39.43026103927979, "iter_time": 0.5232299499511719, "loss": 0.2170829176902771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.871241986489615, "step_time": 0.48123386573791505} +{"epoch": 0, "iter": 8089, "iter_tflops": 21.751345906698827, "iter_time": 0.948497329711914, "loss": 1.137336015701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.085215622525293, "step_time": 0.8936929092407226} +{"epoch": 0, "iter": 8090, "iter_tflops": 22.5120671298552, "iter_time": 0.9164459838867187, "loss": 0.966421365737915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.10450290949987, "step_time": 0.734084981918335} +{"epoch": 0, "iter": 8091, "iter_tflops": 49.074383562810134, "iter_time": 0.42040453720092774, "loss": 0.6461685299873352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.704589058531, "step_time": 0.39144776344299315} +{"epoch": 0, "iter": 8092, "iter_tflops": 43.92833270337501, "iter_time": 0.4696534614562988, "loss": 0.6959327459335327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81787280227586, "step_time": 0.4406670417785644} +{"epoch": 0, "iter": 8093, "iter_tflops": 7.495445161768761, "iter_time": 0.6938362426757811, "loss": 0.0030000146944075823, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 7.943588529196064, "step_time": 0.6546929626464844} +{"epoch": 0, "iter": 8094, "iter_tflops": 7.257019687550644, "iter_time": 0.7166318588256837, "loss": 0.005571144167333841, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 8.161695361279195, "step_time": 0.6371974544525146} +{"epoch": 0, "iter": 8095, "iter_tflops": 15.407542407972842, "iter_time": 0.3375367317199707, "loss": 0.007163228001445532, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 17.00020226292183, "step_time": 0.3059146842956543} +{"epoch": 0, "iter": 8096, "iter_tflops": 15.77240062134343, "iter_time": 0.32972859573364255, "loss": 0.003094853600487113, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 17.282895933433522, "step_time": 0.30091088485717776} +{"epoch": 0, "iter": 8097, "iter_tflops": 33.57863823294436, "iter_time": 0.6144112625122071, "loss": 0.004485453013330698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83869816785005, "step_time": 0.5756652603149414} +{"epoch": 0, "iter": 8098, "iter_tflops": 12.672296393999417, "iter_time": 1.6280469512939453, "loss": 0.02095230109989643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.287481564719503, "step_time": 1.2666840744018555} +{"epoch": 0, "iter": 8099, "iter_tflops": 48.351524143385106, "iter_time": 0.4266896209716797, "loss": 0.003795750206336379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.555658567688965, "step_time": 0.3852271461486816} +{"epoch": 0, "iter": 8100, "iter_tflops": 47.2128302304448, "iter_time": 0.4369806556701661, "loss": 0.0038847392424941063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20419169290176, "step_time": 0.39519994163513184} +{"epoch": 0, "iter": 8101, "iter_tflops": 22.003986870229863, "iter_time": 0.9376070632934571, "loss": 0.2523828446865082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.887225807526864, "step_time": 0.863687297821045} +{"epoch": 0, "iter": 8102, "iter_tflops": 23.781435755997293, "iter_time": 0.8675293502807617, "loss": 0.17241252958774567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.70514414148797, "step_time": 0.7187246093749999} +{"epoch": 0, "iter": 8103, "iter_tflops": 39.87820531297821, "iter_time": 0.5173526077270508, "loss": 0.27287158370018005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66691325928316, "step_time": 0.4724651222229004} +{"epoch": 0, "iter": 8104, "iter_tflops": 39.471717948643175, "iter_time": 0.522680404663086, "loss": 0.18928752839565277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.43997729657995, "step_time": 0.47493334007263177} +{"epoch": 0, "iter": 8105, "iter_tflops": 21.16685580872926, "iter_time": 0.974688621520996, "loss": 0.006618501618504524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.47535432832339, "step_time": 0.917942970275879} +{"epoch": 0, "iter": 8106, "iter_tflops": 26.72357632732084, "iter_time": 0.7720184326171875, "loss": 0.006429625675082207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.53900770630498, "step_time": 0.6151372661590576} +{"epoch": 0, "iter": 8107, "iter_tflops": 52.340260879628964, "iter_time": 0.3941725387573242, "loss": 0.006604798138141632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.090630846416325, "step_time": 0.361374418258667} +{"epoch": 0, "iter": 8108, "iter_tflops": 60.22649621151791, "iter_time": 0.342558422088623, "loss": 0.006385189946740866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.28973794289864, "step_time": 0.311226053237915} +{"epoch": 0, "iter": 8109, "iter_tflops": 38.3254361475128, "iter_time": 0.5383133392333985, "loss": 0.6012480854988098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.41515311989111, "step_time": 0.4981532592773438} +{"epoch": 0, "iter": 8110, "iter_tflops": 47.164648545340626, "iter_time": 0.43742705917358404, "loss": 0.8700548410415649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3122260757936, "step_time": 0.40206974220275876} +{"epoch": 0, "iter": 8111, "iter_tflops": 48.25581204267857, "iter_time": 0.42753593063354495, "loss": 0.6675649285316467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33817125225503, "step_time": 0.3941882762908936} +{"epoch": 0, "iter": 8112, "iter_tflops": 51.071716234123436, "iter_time": 0.403963191986084, "loss": 0.6232114434242249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.121528597878516, "step_time": 0.37428376960754395} +{"epoch": 0, "iter": 8113, "iter_tflops": 35.37885916104424, "iter_time": 0.5831475067138672, "loss": 0.6335560083389282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.935727707849246, "step_time": 0.5438433570861816} +{"epoch": 0, "iter": 8114, "iter_tflops": 16.294443566268832, "iter_time": 1.2661428680419924, "loss": 0.772762656211853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.95449874090772, "step_time": 1.0884536590576173} +{"epoch": 0, "iter": 8115, "iter_tflops": 48.54886877830594, "iter_time": 0.42495518493652346, "loss": 0.7721924781799316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62891885450374, "step_time": 0.3920105895996094} +{"epoch": 0, "iter": 8116, "iter_tflops": 47.30070033483477, "iter_time": 0.43616888046264646, "loss": 0.788267970085144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.76882304998347, "step_time": 0.4063732872009277} +{"epoch": 0, "iter": 8117, "iter_tflops": 40.962652933555475, "iter_time": 0.5036561851501465, "loss": 0.26982975006103516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88875990593294, "step_time": 0.45960488891601564} +{"epoch": 0, "iter": 8118, "iter_tflops": 48.97367662810661, "iter_time": 0.42126903533935545, "loss": 0.2860139310359955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36905130165788, "step_time": 0.38657411003112796} +{"epoch": 0, "iter": 8119, "iter_tflops": 51.28134760234505, "iter_time": 0.4023118438720703, "loss": 0.26194649934768677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.77521388545173, "step_time": 0.36989716529846195} +{"epoch": 0, "iter": 8120, "iter_tflops": 50.72296739259989, "iter_time": 0.406740665435791, "loss": 0.23560506105422974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.852863668948224, "step_time": 0.37611698150634765} +{"epoch": 0, "iter": 8121, "iter_tflops": 39.42754000718717, "iter_time": 0.5232660598754882, "loss": 0.1605883538722992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.57487588454156, "step_time": 0.4734630470275879} +{"epoch": 0, "iter": 8122, "iter_tflops": 45.1712064249196, "iter_time": 0.45673107147216796, "loss": 0.13448046147823334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.29123515668931, "step_time": 0.4185550117492676} +{"epoch": 0, "iter": 8123, "iter_tflops": 48.613764108732205, "iter_time": 0.4243879051208496, "loss": 0.21272160112857819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43680161516805, "step_time": 0.39344683265686037} +{"epoch": 0, "iter": 8124, "iter_tflops": 52.74824338198292, "iter_time": 0.39112380218505854, "loss": 0.14623495936393738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.17693510932633, "step_time": 0.360828950881958} +{"epoch": 0, "iter": 8125, "iter_tflops": 32.855223782296605, "iter_time": 0.6279395217895508, "loss": 0.6476151347160339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.936781360656816, "step_time": 0.5905264511108399} +{"epoch": 0, "iter": 8126, "iter_tflops": 12.947404555202246, "iter_time": 1.5934539947509765, "loss": 0.799397349357605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.369288816282456, "step_time": 1.342358371734619} +{"epoch": 0, "iter": 8127, "iter_tflops": 42.5518102177517, "iter_time": 0.484846435546875, "loss": 0.6656856536865234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03837064600936, "step_time": 0.4481282291412354} +{"epoch": 0, "iter": 8128, "iter_tflops": 44.294118266653626, "iter_time": 0.4657750129699707, "loss": 0.8925701379776001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.720120000969445, "step_time": 0.43233532333374025} +{"epoch": 0, "iter": 8129, "iter_tflops": 42.9760467267408, "iter_time": 0.4800602912902832, "loss": 0.460958331823349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.02485025868859, "step_time": 0.43872746849060057} +{"epoch": 0, "iter": 8130, "iter_tflops": 35.86290506888298, "iter_time": 0.5752766952514649, "loss": 0.4696888327598572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34243457680471, "step_time": 0.5243979873657226} +{"epoch": 0, "iter": 8131, "iter_tflops": 37.473424061113285, "iter_time": 0.5505526657104493, "loss": 0.4401579201221466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01279193479809, "step_time": 0.503040454864502} +{"epoch": 0, "iter": 8132, "iter_tflops": 41.253797585667, "iter_time": 0.5001016807556152, "loss": 0.6031839847564697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.233592344489715, "step_time": 0.4561011505126954} +{"epoch": 0, "iter": 8133, "iter_tflops": 35.340014616127704, "iter_time": 0.5837884826660157, "loss": 0.6921349763870239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.9954531368652, "step_time": 0.529064079284668} +{"epoch": 0, "iter": 8134, "iter_tflops": 37.18377343322488, "iter_time": 0.55484130859375, "loss": 0.8421196341514587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.72302160252629, "step_time": 0.506619909286499} +{"epoch": 0, "iter": 8135, "iter_tflops": 34.72470585330694, "iter_time": 0.5941329956054687, "loss": 0.6649383902549744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.81835904723516, "step_time": 0.5455311660766602} +{"epoch": 0, "iter": 8136, "iter_tflops": 37.23376324967563, "iter_time": 0.5540963821411132, "loss": 0.8118571043014526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.569510115555424, "step_time": 0.5085369148254394} +{"epoch": 0, "iter": 8137, "iter_tflops": 20.720391549521747, "iter_time": 0.9956903305053713, "loss": 0.10053713619709015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.26125432848842, "step_time": 0.9267713851928711} +{"epoch": 0, "iter": 8138, "iter_tflops": 18.46606288115924, "iter_time": 1.1172437591552735, "loss": 0.11063358932733536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.959060587330214, "step_time": 0.9395253238677976} +{"epoch": 0, "iter": 8139, "iter_tflops": 38.75434074109652, "iter_time": 0.5323556823730469, "loss": 0.11323940008878708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.659097527918966, "step_time": 0.4836270503997803} +{"epoch": 0, "iter": 8140, "iter_tflops": 42.402948358663856, "iter_time": 0.4865485610961914, "loss": 0.12038987129926682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60526387476176, "step_time": 0.442677324295044} +{"epoch": 0, "iter": 8141, "iter_tflops": 24.53762283425738, "iter_time": 0.840794303894043, "loss": 0.11137232184410095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.188718657577226, "step_time": 0.7877855262756347} +{"epoch": 0, "iter": 8142, "iter_tflops": 15.683893259515068, "iter_time": 1.3154319000244141, "loss": 0.15475067496299744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.376243465449797, "step_time": 1.1873160934448244} +{"epoch": 0, "iter": 8143, "iter_tflops": 11.45671054030589, "iter_time": 1.800786834716797, "loss": 0.13093845546245575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.839822668647205, "step_time": 1.490705047607422} +{"epoch": 0, "iter": 8144, "iter_tflops": 31.412888176582992, "iter_time": 0.6567716217041015, "loss": 0.18556638062000275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6378629094921, "step_time": 0.5339605236053467} +{"epoch": 0, "iter": 8145, "iter_tflops": 13.37804052073504, "iter_time": 1.2398267059326171, "loss": 0.5591718554496765, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 14.276020293972827, "step_time": 1.1618400344848634} +{"epoch": 0, "iter": 8146, "iter_tflops": 12.36557350911423, "iter_time": 1.341341094970703, "loss": 0.35098597407341003, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 14.452812274308146, "step_time": 1.147627990722656} +{"epoch": 0, "iter": 8147, "iter_tflops": 24.689129275151732, "iter_time": 0.6718119430541992, "loss": 0.3233642280101776, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 26.508676021721275, "step_time": 0.6256989936828614} +{"epoch": 0, "iter": 8148, "iter_tflops": 24.293652169361888, "iter_time": 0.6827483901977538, "loss": 0.326903373003006, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 25.9811321163242, "step_time": 0.6384037399291992} +{"epoch": 0, "iter": 8149, "iter_tflops": 16.899859263689592, "iter_time": 0.6705067749023437, "loss": 0.0013681896962225437, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 18.361079275865137, "step_time": 0.6171461906433106} +{"epoch": 0, "iter": 8150, "iter_tflops": 6.2143458545087045, "iter_time": 1.8234373168945313, "loss": 0.005926354788243771, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 7.640613239520127, "step_time": 1.4830576782226559} +{"epoch": 0, "iter": 8151, "iter_tflops": 5.696939622958461, "iter_time": 1.9890451507568359, "loss": 0.01353514939546585, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 6.866931159824868, "step_time": 1.6501505355834962} +{"epoch": 0, "iter": 8152, "iter_tflops": 20.694385571797696, "iter_time": 0.5475625305175782, "loss": 0.0021516024135053158, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 23.061766284197912, "step_time": 0.4913530902862549} +{"epoch": 0, "iter": 8153, "iter_tflops": 24.82200814581709, "iter_time": 0.6599509429931641, "loss": 0.3899490535259247, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 26.435019880331975, "step_time": 0.6196820640563964} +{"epoch": 0, "iter": 8154, "iter_tflops": 12.622612723131846, "iter_time": 1.2977747192382814, "loss": 0.3172089159488678, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 15.171327150802076, "step_time": 1.0797544288635252} +{"epoch": 0, "iter": 8155, "iter_tflops": 28.80978541052172, "iter_time": 0.5686022109985351, "loss": 0.36113256216049194, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 30.84329512917436, "step_time": 0.5311140594482422} +{"epoch": 0, "iter": 8156, "iter_tflops": 30.03687274570432, "iter_time": 0.5453732757568359, "loss": 0.5788055658340454, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 31.90858811041244, "step_time": 0.5133824043273926} +{"epoch": 0, "iter": 8157, "iter_tflops": 21.13603961125347, "iter_time": 0.9761097106933594, "loss": 0.7138457894325256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.472320143705343, "step_time": 0.918066909790039} +{"epoch": 0, "iter": 8158, "iter_tflops": 17.90563039410453, "iter_time": 1.152212631225586, "loss": 0.7537142634391785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.679049851794396, "step_time": 0.9516604118347167} +{"epoch": 0, "iter": 8159, "iter_tflops": 40.554466833924465, "iter_time": 0.5087255516052246, "loss": 0.639029324054718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9534096746025, "step_time": 0.4693855075836182} +{"epoch": 0, "iter": 8160, "iter_tflops": 48.05207474046664, "iter_time": 0.42934865188598637, "loss": 0.7555699944496155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96567514142715, "step_time": 0.397013864517212} +{"epoch": 0, "iter": 8161, "iter_tflops": 25.260465407239778, "iter_time": 0.8167344970703125, "loss": 0.630481481552124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.59280903371578, "step_time": 0.7758147506713867} +{"epoch": 0, "iter": 8162, "iter_tflops": 16.82413264623059, "iter_time": 1.2262797698974608, "loss": 0.7307085394859314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.418842977390327, "step_time": 1.010394836425781} +{"epoch": 0, "iter": 8163, "iter_tflops": 44.926556220472136, "iter_time": 0.4592182273864746, "loss": 0.7020749449729919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.414713511382445, "step_time": 0.426132719039917} +{"epoch": 0, "iter": 8164, "iter_tflops": 48.552183766872766, "iter_time": 0.4249261703491211, "loss": 0.892528772354126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.22330928470872, "step_time": 0.395055269241333} +{"epoch": 0, "iter": 8165, "iter_tflops": 41.5722231530488, "iter_time": 0.49627111434936516, "loss": 0.6447288990020752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19555848933277, "step_time": 0.456484977722168} +{"epoch": 0, "iter": 8166, "iter_tflops": 11.386253203644651, "iter_time": 1.8119299774169921, "loss": 0.753840446472168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.342555481089136, "step_time": 1.546262523651123} +{"epoch": 0, "iter": 8167, "iter_tflops": 14.925409062807953, "iter_time": 1.3822799377441406, "loss": 0.5100582838058472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.532271367695255, "step_time": 1.1767496109008788} +{"epoch": 0, "iter": 8168, "iter_tflops": 23.686051892915543, "iter_time": 0.8710228958129883, "loss": 0.6141955852508545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.29125532414741, "step_time": 0.7043430976867675} +{"epoch": 0, "iter": 8169, "iter_tflops": 20.350927667901047, "iter_time": 0.8311625061035156, "loss": 0.3734671473503113, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 21.409886631780633, "step_time": 0.7900522003173828} +{"epoch": 0, "iter": 8170, "iter_tflops": 7.67123914690193, "iter_time": 2.2049798889160157, "loss": 0.26836323738098145, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 9.64543946739886, "step_time": 1.7536710586547852} +{"epoch": 0, "iter": 8171, "iter_tflops": 9.971884841485425, "iter_time": 1.6962618713378905, "loss": 0.4165152907371521, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 12.284479484501407, "step_time": 1.3769348602294922} +{"epoch": 0, "iter": 8172, "iter_tflops": 25.846066004316558, "iter_time": 0.6544488449096679, "loss": 0.513267457485199, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 31.715767363625744, "step_time": 0.533328670501709} +{"epoch": 0, "iter": 8173, "iter_tflops": 27.343466969023282, "iter_time": 0.6020945053100586, "loss": 0.175928995013237, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 29.37001135935527, "step_time": 0.5605497055053711} +{"epoch": 0, "iter": 8174, "iter_tflops": 29.164227793916698, "iter_time": 0.5645049591064453, "loss": 0.41991886496543884, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.18383821346231, "step_time": 0.5279449920654297} +{"epoch": 0, "iter": 8175, "iter_tflops": 27.67524809751029, "iter_time": 0.5948763732910156, "loss": 0.34655994176864624, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 29.366119584767016, "step_time": 0.5606239929199218} +{"epoch": 0, "iter": 8176, "iter_tflops": 30.34280660646336, "iter_time": 0.5425783920288086, "loss": 0.3942289352416992, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 32.242817305243356, "step_time": 0.5106052322387695} +{"epoch": 0, "iter": 8177, "iter_tflops": 25.416407166137702, "iter_time": 0.8117234420776367, "loss": 0.9487358927726746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.07086781908379, "step_time": 0.7621142272949218} +{"epoch": 0, "iter": 8178, "iter_tflops": 22.694800843673317, "iter_time": 0.9090669555664062, "loss": 0.8654528260231018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.024935781769038, "step_time": 0.7927433013916015} +{"epoch": 0, "iter": 8179, "iter_tflops": 44.49443814418593, "iter_time": 0.4636780319213867, "loss": 0.8677510023117065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77807067918033, "step_time": 0.43181093788146974} +{"epoch": 0, "iter": 8180, "iter_tflops": 45.99608193125118, "iter_time": 0.44854023742675775, "loss": 0.8808656930923462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.300998824226426, "step_time": 0.41847212028503417} +{"epoch": 0, "iter": 8181, "iter_tflops": 29.492643512107655, "iter_time": 0.6995335464477539, "loss": 0.11180858314037323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.366481585648188, "step_time": 0.6577433128356933} +{"epoch": 0, "iter": 8182, "iter_tflops": 14.57067058072545, "iter_time": 1.4159330139160158, "loss": 0.1995546519756317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.39134158267132, "step_time": 1.1217829551696776} +{"epoch": 0, "iter": 8183, "iter_tflops": 48.835280779823236, "iter_time": 0.4224628829956055, "loss": 0.11788677424192429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37969847207943, "step_time": 0.3864970035552978} +{"epoch": 0, "iter": 8184, "iter_tflops": 50.728363053924795, "iter_time": 0.4066974029541015, "loss": 0.12420762330293655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.91378290271459, "step_time": 0.37569973182678224} +{"epoch": 0, "iter": 8185, "iter_tflops": 41.73689935217368, "iter_time": 0.4943130378723145, "loss": 0.5738921165466309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.505646730661354, "step_time": 0.45337436103820794} +{"epoch": 0, "iter": 8186, "iter_tflops": 12.972449183348274, "iter_time": 1.5903776702880856, "loss": 0.4288162291049957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.094108294730411, "step_time": 1.4638097763061524} +{"epoch": 0, "iter": 8187, "iter_tflops": 12.365229484970198, "iter_time": 1.6684763946533203, "loss": 0.3167256712913513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.256297035624755, "step_time": 1.2691139602661132} +{"epoch": 0, "iter": 8188, "iter_tflops": 19.532077095009754, "iter_time": 1.0562672576904297, "loss": 0.40807846188545227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.045073585025417, "step_time": 0.8580174827575684} +{"epoch": 0, "iter": 8189, "iter_tflops": 23.17734515222227, "iter_time": 0.6573091049194336, "loss": 0.40852871537208557, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.6958167337255, "step_time": 0.6168931427001952} +{"epoch": 0, "iter": 8190, "iter_tflops": 11.568726199886237, "iter_time": 1.3168848266601563, "loss": 0.5411125421524048, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 13.599319171086721, "step_time": 1.1202531394958497} +{"epoch": 0, "iter": 8191, "iter_tflops": 27.429643527959538, "iter_time": 0.5554093322753906, "loss": 0.36574098467826843, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.269752779249607, "step_time": 0.5204922676086425} +{"epoch": 0, "iter": 8192, "iter_tflops": 26.861493655999094, "iter_time": 0.5671568450927734, "loss": 0.5128589868545532, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.384126118436626, "step_time": 0.5367323951721191} +{"epoch": 0, "iter": 8193, "iter_tflops": 24.509050305459297, "iter_time": 0.5583431167602539, "loss": 0.006131375674158335, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 26.657260596365198, "step_time": 0.5133483047485351} +{"epoch": 0, "iter": 8194, "iter_tflops": 19.907877333442734, "iter_time": 0.6873891830444336, "loss": 0.0052648731507360935, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 25.66136007240612, "step_time": 0.5332710151672364} +{"epoch": 0, "iter": 8195, "iter_tflops": 34.33247858972063, "iter_time": 0.3985864143371582, "loss": 0.012211060151457787, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 37.48451937861913, "step_time": 0.3650696277618409} +{"epoch": 0, "iter": 8196, "iter_tflops": 36.41867464559874, "iter_time": 0.37575391387939455, "loss": 0.037457291036844254, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 40.00138048130158, "step_time": 0.34209968185424805} +{"epoch": 0, "iter": 8197, "iter_tflops": 27.746316977852274, "iter_time": 0.7435615158081055, "loss": 0.7772426605224609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.353190244149555, "step_time": 0.7028569412231447} +{"epoch": 0, "iter": 8198, "iter_tflops": 18.899946539225844, "iter_time": 1.0915953369140627, "loss": 0.6366478204727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.770105267762663, "step_time": 0.9060605239868165} +{"epoch": 0, "iter": 8199, "iter_tflops": 47.76255350759593, "iter_time": 0.43195122528076174, "loss": 0.8979829549789429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.820534796809525, "step_time": 0.39812583160400383} +{"epoch": 0, "iter": 8200, "iter_tflops": 48.635595552776714, "iter_time": 0.42419740676879886, "loss": 0.6636717319488525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.24828152437889, "step_time": 0.39486645126342773} +{"epoch": 0, "iter": 8201, "iter_tflops": 45.951444417947556, "iter_time": 0.4489759521484375, "loss": 0.06945343315601349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1108990866735, "step_time": 0.4117087078094483} +{"epoch": 0, "iter": 8202, "iter_tflops": 49.97580064909277, "iter_time": 0.41282167053222657, "loss": 0.055682193487882614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.514317147833864, "step_time": 0.37845275497436526} +{"epoch": 0, "iter": 8203, "iter_tflops": 51.46598876947456, "iter_time": 0.4008684959411621, "loss": 0.07386395335197449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.02662535142204, "step_time": 0.3682373046875} +{"epoch": 0, "iter": 8204, "iter_tflops": 48.093331594823404, "iter_time": 0.4289803352355957, "loss": 0.06277258694171906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.436029104430155, "step_time": 0.3934526290893555} +{"epoch": 0, "iter": 8205, "iter_tflops": 35.87573090272179, "iter_time": 0.5750710296630859, "loss": 0.8924242854118347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64535691653982, "step_time": 0.5338569793701171} +{"epoch": 0, "iter": 8206, "iter_tflops": 43.751453173060575, "iter_time": 0.47155218887329103, "loss": 0.8071290254592896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31794210250389, "step_time": 0.4360099487304687} +{"epoch": 0, "iter": 8207, "iter_tflops": 46.580996400538915, "iter_time": 0.4429079475402832, "loss": 0.8464695811271667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.07476425512903, "step_time": 0.4120058040618897} +{"epoch": 0, "iter": 8208, "iter_tflops": 45.66385293862698, "iter_time": 0.4518036079406738, "loss": 0.5956717729568481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00387507589529, "step_time": 0.42100942993164064} +{"epoch": 0, "iter": 8209, "iter_tflops": 39.73948546413892, "iter_time": 0.5191585464477539, "loss": 0.7921515107154846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24619047347442, "step_time": 0.4770615234375} +{"epoch": 0, "iter": 8210, "iter_tflops": 40.51043642896943, "iter_time": 0.5092784805297852, "loss": 0.785014808177948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.373481714464276, "step_time": 0.47566145706176755} +{"epoch": 0, "iter": 8211, "iter_tflops": 49.9749329351195, "iter_time": 0.4128288383483886, "loss": 0.868476927280426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.019218527744265, "step_time": 0.3819213619232178} +{"epoch": 0, "iter": 8212, "iter_tflops": 44.36175661495163, "iter_time": 0.4650648460388183, "loss": 0.7290540337562561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.924658644334485, "step_time": 0.43049015045166017} +{"epoch": 0, "iter": 8213, "iter_tflops": 40.17606317448306, "iter_time": 0.5135170516967773, "loss": 0.01600375398993492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59616754719382, "step_time": 0.47323181533813474} +{"epoch": 0, "iter": 8214, "iter_tflops": 29.45432848340574, "iter_time": 0.7004435195922851, "loss": 0.03890964388847351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32723472701405, "step_time": 0.5527088642120361} +{"epoch": 0, "iter": 8215, "iter_tflops": 40.84915251044129, "iter_time": 0.505055606842041, "loss": 0.03665883466601372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.810226402050276, "step_time": 0.46041038322448735} +{"epoch": 0, "iter": 8216, "iter_tflops": 40.2942329866054, "iter_time": 0.5120110740661621, "loss": 0.07072403281927109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53101518365556, "step_time": 0.46329717445373536} +{"epoch": 0, "iter": 8217, "iter_tflops": 22.780330592965306, "iter_time": 0.9056538238525391, "loss": 0.7682173848152161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.67085793828293, "step_time": 0.8362535896301269} +{"epoch": 0, "iter": 8218, "iter_tflops": 16.793629708939978, "iter_time": 1.228507110595703, "loss": 0.6687939763069153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.62469873214787, "step_time": 0.954052297592163} +{"epoch": 0, "iter": 8219, "iter_tflops": 47.476677328363856, "iter_time": 0.4345521774291992, "loss": 0.5507371425628662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.453225503829, "step_time": 0.40096793365478517} +{"epoch": 0, "iter": 8220, "iter_tflops": 49.92689874662305, "iter_time": 0.413226016998291, "loss": 0.6485932469367981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.87446517065011, "step_time": 0.3829475326538086} +{"epoch": 0, "iter": 8221, "iter_tflops": 29.401911477035615, "iter_time": 0.701692253112793, "loss": 0.8043642640113831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.01315723146648, "step_time": 0.665236801147461} +{"epoch": 0, "iter": 8222, "iter_tflops": 13.814198876568966, "iter_time": 1.4934701385498046, "loss": 0.7924156785011292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.905048670601795, "step_time": 1.2971411743164063} +{"epoch": 0, "iter": 8223, "iter_tflops": 47.15698088593482, "iter_time": 0.4374981842041015, "loss": 0.8983696103096008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.030029500836676, "step_time": 0.40429319190979} +{"epoch": 0, "iter": 8224, "iter_tflops": 44.07347569993415, "iter_time": 0.46810679626464846, "loss": 0.7697674036026001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.119085132126266, "step_time": 0.43785004425048824} +{"epoch": 0, "iter": 8225, "iter_tflops": 29.838878502726995, "iter_time": 0.691416519165039, "loss": 0.3403264880180359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.69523036824521, "step_time": 0.6509210777282715} +{"epoch": 0, "iter": 8226, "iter_tflops": 12.571691926497126, "iter_time": 1.6410753326416017, "loss": 0.30638933181762695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.539646757975108, "step_time": 1.4189542465209959} +{"epoch": 0, "iter": 8227, "iter_tflops": 40.303490554570466, "iter_time": 0.5118934669494629, "loss": 0.40367668867111206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.4225615564067, "step_time": 0.4260636539459228} +{"epoch": 0, "iter": 8228, "iter_tflops": 48.150883773482896, "iter_time": 0.42846759796142575, "loss": 0.4509963393211365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.12946038146213, "step_time": 0.3957664890289307} +{"epoch": 0, "iter": 8229, "iter_tflops": 32.727693785733855, "iter_time": 0.6303864135742187, "loss": 0.7025589346885681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.91274660536838, "step_time": 0.5909329833984375} +{"epoch": 0, "iter": 8230, "iter_tflops": 23.092786448322116, "iter_time": 0.893399917602539, "loss": 0.6746799349784851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.79881405862173, "step_time": 0.7163869132995605} +{"epoch": 0, "iter": 8231, "iter_tflops": 44.608819833250266, "iter_time": 0.46248911285400385, "loss": 0.6615261435508728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.225932879720126, "step_time": 0.427800817489624} +{"epoch": 0, "iter": 8232, "iter_tflops": 49.38531110923118, "iter_time": 0.4177576904296875, "loss": 0.6494933366775513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.473765351776926, "step_time": 0.3858171081542968} +{"epoch": 0, "iter": 8233, "iter_tflops": 31.931382170692377, "iter_time": 0.6461071243286134, "loss": 0.6869282722473145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.9688101659818, "step_time": 0.6073540229797363} +{"epoch": 0, "iter": 8234, "iter_tflops": 14.650076893554592, "iter_time": 1.4082583770751953, "loss": 0.8390122056007385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.161341911305264, "step_time": 1.1359894886016846} +{"epoch": 0, "iter": 8235, "iter_tflops": 37.99825913212707, "iter_time": 0.5429483871459961, "loss": 0.7842633128166199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54657336569221, "step_time": 0.49657749938964846} +{"epoch": 0, "iter": 8236, "iter_tflops": 38.21440511706072, "iter_time": 0.5398773956298828, "loss": 0.665572464466095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8207579434803, "step_time": 0.4933218460083007} +{"epoch": 0, "iter": 8237, "iter_tflops": 26.194681909749107, "iter_time": 0.7876061859130861, "loss": 0.10701838880777359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.199617429164807, "step_time": 0.7316089859008787} +{"epoch": 0, "iter": 8238, "iter_tflops": 8.67196465427918, "iter_time": 2.379056457519531, "loss": 0.0991637259721756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.482606091826101, "step_time": 1.9681263732910155} +{"epoch": 0, "iter": 8239, "iter_tflops": 14.77424399708136, "iter_time": 1.3964229583740233, "loss": 0.1416780799627304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.435114071270426, "step_time": 1.1833070564270018} +{"epoch": 0, "iter": 8240, "iter_tflops": 35.85026805864658, "iter_time": 0.575479476928711, "loss": 0.09996265918016434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27994904492776, "step_time": 0.4186508693695069} +{"epoch": 0, "iter": 8241, "iter_tflops": 17.261086593664338, "iter_time": 0.9347783660888673, "loss": 0.3835405707359314, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 18.069549642892206, "step_time": 0.8929547576904298} +{"epoch": 0, "iter": 8242, "iter_tflops": 11.655681969602751, "iter_time": 1.3843282928466796, "loss": 0.47619712352752686, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.5320871197992, "step_time": 1.192372631072998} +{"epoch": 0, "iter": 8243, "iter_tflops": 24.54357059089859, "iter_time": 0.6574141387939454, "loss": 0.42259013652801514, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 26.337694302871448, "step_time": 0.6126310882568359} +{"epoch": 0, "iter": 8244, "iter_tflops": 23.19456677107806, "iter_time": 0.6956495666503907, "loss": 0.4971599876880646, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 24.98049759266325, "step_time": 0.6459154891967773} +{"epoch": 0, "iter": 8245, "iter_tflops": 17.453079616945427, "iter_time": 1.1820890045166015, "loss": 0.37369251251220703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.003774340355843, "step_time": 1.0856313667297364} +{"epoch": 0, "iter": 8246, "iter_tflops": 15.990528300081623, "iter_time": 1.2902071228027343, "loss": 0.1866617649793625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66635629257792, "step_time": 0.998293710708618} +{"epoch": 0, "iter": 8247, "iter_tflops": 47.43609636960872, "iter_time": 0.43492393112182615, "loss": 0.2725025713443756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.394715191661774, "step_time": 0.40142441558837894} +{"epoch": 0, "iter": 8248, "iter_tflops": 55.76054865575724, "iter_time": 0.3699944496154785, "loss": 0.23463749885559082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.39396876231797, "step_time": 0.3416085071563721} +{"epoch": 0, "iter": 8249, "iter_tflops": 28.46943195763504, "iter_time": 0.7246752777099609, "loss": 0.7224465012550354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.06547561182149, "step_time": 0.6862054595947266} +{"epoch": 0, "iter": 8250, "iter_tflops": 11.917484760426099, "iter_time": 1.7311617279052733, "loss": 0.697352945804596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.713199241451285, "step_time": 1.2344191684722903} +{"epoch": 0, "iter": 8251, "iter_tflops": 36.83208161403524, "iter_time": 0.5601392211914061, "loss": 0.7423904538154602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.787699885949536, "step_time": 0.5185294342041016} +{"epoch": 0, "iter": 8252, "iter_tflops": 34.59834584270247, "iter_time": 0.5963028869628907, "loss": 0.804608941078186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.605076673372494, "step_time": 0.5486252212524414} +{"epoch": 0, "iter": 8253, "iter_tflops": 20.761818741928312, "iter_time": 0.9937035751342773, "loss": 0.6321958899497986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.051438702718844, "step_time": 0.935589454650879} +{"epoch": 0, "iter": 8254, "iter_tflops": 22.274111740631174, "iter_time": 0.9262364196777345, "loss": 0.8293863534927368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.75549797112761, "step_time": 0.7710973472595215} +{"epoch": 0, "iter": 8255, "iter_tflops": 43.22214606942338, "iter_time": 0.47732691192626947, "loss": 0.8500622510910034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39378423186249, "step_time": 0.44469520759582515} +{"epoch": 0, "iter": 8256, "iter_tflops": 47.37805183845656, "iter_time": 0.4354567718505859, "loss": 0.660475492477417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97292299305402, "step_time": 0.40474613380432134} +{"epoch": 0, "iter": 8257, "iter_tflops": 40.02044191343547, "iter_time": 0.5155138854980469, "loss": 0.621720552444458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00689024995581, "step_time": 0.4797160034179688} +{"epoch": 0, "iter": 8258, "iter_tflops": 42.62616141984512, "iter_time": 0.48400073623657225, "loss": 0.46912598609924316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.834601893003665, "step_time": 0.450120491027832} +{"epoch": 0, "iter": 8259, "iter_tflops": 44.28624830734136, "iter_time": 0.46585778427124025, "loss": 0.8404853343963623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47479567766291, "step_time": 0.43456940078735345} +{"epoch": 0, "iter": 8260, "iter_tflops": 44.68035151627309, "iter_time": 0.46174868392944335, "loss": 0.7548061013221741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.117327440948316, "step_time": 0.42876640510559083} +{"epoch": 0, "iter": 8261, "iter_tflops": 25.351672534802532, "iter_time": 0.8137961502075195, "loss": 0.13325875997543335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.62198159429943, "step_time": 0.7749646072387695} +{"epoch": 0, "iter": 8262, "iter_tflops": 14.757192014801879, "iter_time": 1.3980365295410158, "loss": 0.09021804481744766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.9519413675247, "step_time": 1.1492402458190918} +{"epoch": 0, "iter": 8263, "iter_tflops": 40.7600373662234, "iter_time": 0.5061598281860351, "loss": 0.08868956565856934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.59534177802995, "step_time": 0.4626288909912109} +{"epoch": 0, "iter": 8264, "iter_tflops": 39.75919119026836, "iter_time": 0.5189012374877929, "loss": 0.06032268702983856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7095051290045, "step_time": 0.47200473785400393} +{"epoch": 0, "iter": 8265, "iter_tflops": 19.019474817319985, "iter_time": 1.0847351837158201, "loss": 0.8043902516365051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.300295584398267, "step_time": 1.0162952270507812} +{"epoch": 0, "iter": 8266, "iter_tflops": 40.300030254718074, "iter_time": 0.5119374198913573, "loss": 0.8005027174949646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17325461347431, "step_time": 0.46704943275451655} +{"epoch": 0, "iter": 8267, "iter_tflops": 47.57097133821192, "iter_time": 0.433690818786621, "loss": 0.8487669229507446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3641576405237, "step_time": 0.40166323089599604} +{"epoch": 0, "iter": 8268, "iter_tflops": 40.14428006025319, "iter_time": 0.5139236145019532, "loss": 0.824596643447876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.28406109746276, "step_time": 0.4766441268920898} +{"epoch": 0, "iter": 8269, "iter_tflops": 26.84313025501458, "iter_time": 0.7685800170898438, "loss": 0.6194846630096436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.234881346497556, "step_time": 0.7306952438354493} +{"epoch": 0, "iter": 8270, "iter_tflops": 12.513660971737572, "iter_time": 1.6486856689453124, "loss": 0.6925680637359619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.32061864800287, "step_time": 1.191129135131836} +{"epoch": 0, "iter": 8271, "iter_tflops": 34.25522519635073, "iter_time": 0.6022758102416993, "loss": 0.5939725041389465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82267403749088, "step_time": 0.5454689292907715} +{"epoch": 0, "iter": 8272, "iter_tflops": 37.77587009195838, "iter_time": 0.546144760131836, "loss": 0.8877922892570496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.122260053257506, "step_time": 0.5017013530731201} +{"epoch": 0, "iter": 8273, "iter_tflops": 21.14285076723556, "iter_time": 0.9757952575683594, "loss": 0.4842294752597809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.729795770073494, "step_time": 0.907667350769043} +{"epoch": 0, "iter": 8274, "iter_tflops": 25.855293284906157, "iter_time": 0.7979446716308595, "loss": 0.26856496930122375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.65719925598236, "step_time": 0.6517030563354491} +{"epoch": 0, "iter": 8275, "iter_tflops": 45.33026724750814, "iter_time": 0.45512843322753904, "loss": 0.42739468812942505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.88966985443644, "step_time": 0.42199289894104003} +{"epoch": 0, "iter": 8276, "iter_tflops": 47.50152326305205, "iter_time": 0.4343248825073242, "loss": 0.33583012223243713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.37855060772818, "step_time": 0.4015507106781006} +{"epoch": 0, "iter": 8277, "iter_tflops": 34.582560853347196, "iter_time": 0.5965750656127929, "loss": 0.5825328230857849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.86822115991041, "step_time": 0.5595901527404785} +{"epoch": 0, "iter": 8278, "iter_tflops": 14.224651683034168, "iter_time": 1.4503760070800782, "loss": 0.612944483757019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.203106797764644, "step_time": 1.1992655601501465} +{"epoch": 0, "iter": 8279, "iter_tflops": 40.02465173398027, "iter_time": 0.5154596633911133, "loss": 0.7055317759513855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.793067623501926, "step_time": 0.47110409545898435} +{"epoch": 0, "iter": 8280, "iter_tflops": 36.71378311706051, "iter_time": 0.561944091796875, "loss": 0.6318891048431396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85814558702065, "step_time": 0.5176129798889161} +{"epoch": 0, "iter": 8281, "iter_tflops": 15.899419721020777, "iter_time": 1.1104243927001953, "loss": 0.06489597260951996, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 17.018668868979933, "step_time": 1.0373962631225586} +{"epoch": 0, "iter": 8282, "iter_tflops": 25.16854877345808, "iter_time": 0.7014748306274414, "loss": 0.03879234939813614, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 31.137568181108787, "step_time": 0.5670032863616943} +{"epoch": 0, "iter": 8283, "iter_tflops": 44.733009321147854, "iter_time": 0.3946773033142089, "loss": 0.030261686071753502, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 48.84308534322037, "step_time": 0.36146577072143554} +{"epoch": 0, "iter": 8284, "iter_tflops": 45.54363822857462, "iter_time": 0.38765246200561515, "loss": 0.05427887290716171, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 49.40435025220592, "step_time": 0.35735928916931153} +{"epoch": 0, "iter": 8285, "iter_tflops": 34.82900924137641, "iter_time": 0.592353729248047, "loss": 0.4396023750305176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.31789954150869, "step_time": 0.5528471260070801} +{"epoch": 0, "iter": 8286, "iter_tflops": 16.144723896594645, "iter_time": 1.277884567260742, "loss": 0.3526837229728699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.10552596400486, "step_time": 1.0798495445251466} +{"epoch": 0, "iter": 8287, "iter_tflops": 45.91275012171251, "iter_time": 0.44935433959960935, "loss": 0.2764835059642792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.55821192133367, "step_time": 0.41630019950866703} +{"epoch": 0, "iter": 8288, "iter_tflops": 46.53389845576917, "iter_time": 0.4433562240600586, "loss": 0.38027554750442505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05827728211141, "step_time": 0.41214150047302245} +{"epoch": 0, "iter": 8289, "iter_tflops": 41.74321715131219, "iter_time": 0.494238224029541, "loss": 0.29620906710624695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.556045170170826, "step_time": 0.45287279510498046} +{"epoch": 0, "iter": 8290, "iter_tflops": 50.371905745195214, "iter_time": 0.40957540130615233, "loss": 0.25980329513549805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.01604592682391, "step_time": 0.375001386642456} +{"epoch": 0, "iter": 8291, "iter_tflops": 49.5566150654777, "iter_time": 0.41631361389160154, "loss": 0.2674286663532257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.88727493573067, "step_time": 0.3828565006256104} +{"epoch": 0, "iter": 8292, "iter_tflops": 51.60502856838821, "iter_time": 0.3997884330749511, "loss": 0.4239349663257599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.89918859898757, "step_time": 0.36907679748535155} +{"epoch": 0, "iter": 8293, "iter_tflops": 28.589430367371925, "iter_time": 0.7216335983276366, "loss": 0.5875121355056763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.312666939321876, "step_time": 0.6806096458435058} +{"epoch": 0, "iter": 8294, "iter_tflops": 15.05659303415973, "iter_time": 1.3702365112304689, "loss": 0.7667560577392578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.958310393109183, "step_time": 1.148832660675049} +{"epoch": 0, "iter": 8295, "iter_tflops": 34.99069221413885, "iter_time": 0.5896166152954101, "loss": 0.7683902382850647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.00333574325452, "step_time": 0.5428758583068848} +{"epoch": 0, "iter": 8296, "iter_tflops": 36.61805533004162, "iter_time": 0.5634131393432618, "loss": 0.7179693579673767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.853829897035894, "step_time": 0.5176690311431884} +{"epoch": 0, "iter": 8297, "iter_tflops": 32.803396152739175, "iter_time": 0.6289316329956054, "loss": 0.4965990483760834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.56654737050528, "step_time": 0.5642067680358887} +{"epoch": 0, "iter": 8298, "iter_tflops": 38.55212213430437, "iter_time": 0.5351480636596679, "loss": 0.31551748514175415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.2036008150029, "step_time": 0.48884675979614256} +{"epoch": 0, "iter": 8299, "iter_tflops": 40.44685692572695, "iter_time": 0.510079029083252, "loss": 0.5296934843063354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14230792759195, "step_time": 0.4673768653869629} +{"epoch": 0, "iter": 8300, "iter_tflops": 38.041453474158295, "iter_time": 0.5423318939208984, "loss": 0.4304291009902954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.680326181843164, "step_time": 0.49498397445678705} +{"epoch": 0, "iter": 8301, "iter_tflops": 12.38594237255217, "iter_time": 1.665686218261719, "loss": 0.6570433378219604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.999033429377263, "step_time": 1.5871251983642578} +{"epoch": 0, "iter": 8302, "iter_tflops": 21.068642365727545, "iter_time": 0.9792322235107422, "loss": 0.5116512775421143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.882647499285262, "step_time": 0.739925916671753} +{"epoch": 0, "iter": 8303, "iter_tflops": 41.67081034046699, "iter_time": 0.49509700775146487, "loss": 0.4343269169330597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53620289205101, "step_time": 0.45307013320922856} +{"epoch": 0, "iter": 8304, "iter_tflops": 35.336969437348436, "iter_time": 0.5838387908935547, "loss": 0.505413293838501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.43202433038697, "step_time": 0.536820369720459} +{"epoch": 0, "iter": 8305, "iter_tflops": 35.53380907335151, "iter_time": 0.5806046142578124, "loss": 0.25034335255622864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.59413138365148, "step_time": 0.5210644302368164} +{"epoch": 0, "iter": 8306, "iter_tflops": 36.94570066581421, "iter_time": 0.5584166259765625, "loss": 0.1629204899072647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.74078135813651, "step_time": 0.5063990631103517} +{"epoch": 0, "iter": 8307, "iter_tflops": 41.32608587888974, "iter_time": 0.49922689437866213, "loss": 0.12962648272514343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.252097330019716, "step_time": 0.45591463661193843} +{"epoch": 0, "iter": 8308, "iter_tflops": 40.88576021822031, "iter_time": 0.5046033973693848, "loss": 0.18057170510292053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88647632713818, "step_time": 0.4596282711029053} +{"epoch": 0, "iter": 8309, "iter_tflops": 21.10057255635489, "iter_time": 0.9777504119873046, "loss": 0.361092209815979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.470543905668833, "step_time": 0.9181394805908204} +{"epoch": 0, "iter": 8310, "iter_tflops": 19.599409609403466, "iter_time": 1.0526385192871095, "loss": 0.28031107783317566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.342431104548577, "step_time": 0.7831886672973633} +{"epoch": 0, "iter": 8311, "iter_tflops": 45.12394566927855, "iter_time": 0.4572094306945801, "loss": 0.35921603441238403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65354079135262, "step_time": 0.42404094696044925} +{"epoch": 0, "iter": 8312, "iter_tflops": 48.05231254463495, "iter_time": 0.42934652709960935, "loss": 0.33725878596305847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.80367058256022, "step_time": 0.39825543785095213} +{"epoch": 0, "iter": 8313, "iter_tflops": 28.15185902046934, "iter_time": 0.38672416687011724, "loss": 0.00682280957698822, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 31.184230234256766, "step_time": 0.3491189022064209} +{"epoch": 0, "iter": 8314, "iter_tflops": 28.78273597989609, "iter_time": 0.3782477188110352, "loss": 0.0062244064174592495, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 32.05486484732509, "step_time": 0.33963656616210935} +{"epoch": 0, "iter": 8315, "iter_tflops": 29.52867844902052, "iter_time": 0.36869256591796873, "loss": 0.013589897193014622, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 32.39142153328379, "step_time": 0.3361076393127441} +{"epoch": 0, "iter": 8316, "iter_tflops": 28.759711184033577, "iter_time": 0.37855054092407225, "loss": 0.00191611482296139, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 31.557116351208897, "step_time": 0.3449936332702636} +{"epoch": 0, "iter": 8317, "iter_tflops": 47.60138294606603, "iter_time": 0.4334137420654297, "loss": 0.10037757456302643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.25740128829053, "step_time": 0.3947975406646729} +{"epoch": 0, "iter": 8318, "iter_tflops": 50.29982139889611, "iter_time": 0.4101623611450196, "loss": 0.08822689205408096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.84737513816512, "step_time": 0.37615461921691895} +{"epoch": 0, "iter": 8319, "iter_tflops": 55.60753535280336, "iter_time": 0.3710125503540039, "loss": 0.05537066236138344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.853706666353915, "step_time": 0.3390277214050293} +{"epoch": 0, "iter": 8320, "iter_tflops": 53.00900616154516, "iter_time": 0.38919977951049806, "loss": 0.06779436022043228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.962510403455795, "step_time": 0.35593857765197756} +{"epoch": 0, "iter": 8321, "iter_tflops": 30.98179769880386, "iter_time": 0.6659101486206054, "loss": 0.6533831357955933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.016300280739856, "step_time": 0.6248759956359863} +{"epoch": 0, "iter": 8322, "iter_tflops": 17.641703189796765, "iter_time": 1.1694502105712892, "loss": 0.7834480404853821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.39753467112646, "step_time": 0.921132339477539} +{"epoch": 0, "iter": 8323, "iter_tflops": 44.08341132960157, "iter_time": 0.46800129318237305, "loss": 0.6917390823364258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.4425507506658, "step_time": 0.4348647613525391} +{"epoch": 0, "iter": 8324, "iter_tflops": 44.55558043545475, "iter_time": 0.4630417404174805, "loss": 0.7726690769195557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76654286151515, "step_time": 0.4319151496887207} +{"epoch": 0, "iter": 8325, "iter_tflops": 24.013904474316657, "iter_time": 0.6667947463989259, "loss": 0.17589853703975677, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.513638301451294, "step_time": 0.6275994491577148} +{"epoch": 0, "iter": 8326, "iter_tflops": 12.301185899311008, "iter_time": 1.3016911926269532, "loss": 0.14548414945602417, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 14.672123763018263, "step_time": 1.091344757080078} +{"epoch": 0, "iter": 8327, "iter_tflops": 26.462386386559754, "iter_time": 0.6050983123779297, "loss": 0.18158882856369019, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 35.12140139078192, "step_time": 0.4559141921997071} +{"epoch": 0, "iter": 8328, "iter_tflops": 38.747872571645544, "iter_time": 0.4132445030212402, "loss": 0.18446826934814453, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 41.98121193868345, "step_time": 0.3814169387817383} +{"epoch": 0, "iter": 8329, "iter_tflops": 21.610798377526528, "iter_time": 0.8131484069824219, "loss": 0.015331555157899857, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 22.646214591640653, "step_time": 0.7759701385498046} +{"epoch": 0, "iter": 8330, "iter_tflops": 11.680902912682127, "iter_time": 1.5044030761718747, "loss": 0.04258272796869278, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 17.149658182141593, "step_time": 1.0246726837158204} +{"epoch": 0, "iter": 8331, "iter_tflops": 36.89663442586944, "iter_time": 0.4762707099914551, "loss": 0.03404724970459938, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 40.602086995097636, "step_time": 0.43280500030517577} +{"epoch": 0, "iter": 8332, "iter_tflops": 35.148466547715245, "iter_time": 0.4999588317871093, "loss": 0.0376070998609066, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 38.81838654764106, "step_time": 0.4526923408508301} +{"epoch": 0, "iter": 8333, "iter_tflops": 28.768493069350043, "iter_time": 0.7171419601440431, "loss": 0.39587435126304626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.954065986757257, "step_time": 0.6665067367553712} +{"epoch": 0, "iter": 8334, "iter_tflops": 9.054452803493191, "iter_time": 2.2785577392578125, "loss": 0.4573950171470642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.697732199468732, "step_time": 1.9285483245849608} +{"epoch": 0, "iter": 8335, "iter_tflops": 14.312252413947846, "iter_time": 1.4414987182617187, "loss": 0.43726858496665955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.583179740848305, "step_time": 1.0535109100341797} +{"epoch": 0, "iter": 8336, "iter_tflops": 46.316441488508275, "iter_time": 0.4454377937316894, "loss": 0.37151047587394714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03553468276461, "step_time": 0.4123288307189941} +{"epoch": 0, "iter": 8337, "iter_tflops": 22.809855644424832, "iter_time": 0.6678990097045898, "loss": 0.43955475091934204, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.200985212834, "step_time": 0.6295066032409667} +{"epoch": 0, "iter": 8338, "iter_tflops": 21.239274705311363, "iter_time": 0.7172881469726562, "loss": 0.4772159159183502, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 22.853871928578013, "step_time": 0.6666126441955567} +{"epoch": 0, "iter": 8339, "iter_tflops": 22.217727124733045, "iter_time": 0.6856993026733398, "loss": 0.36935943365097046, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.014298377447236, "step_time": 0.6344003791809082} +{"epoch": 0, "iter": 8340, "iter_tflops": 23.75935954018941, "iter_time": 0.6412075195312501, "loss": 0.28675180673599243, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 25.495950357975964, "step_time": 0.5975333251953124} +{"epoch": 0, "iter": 8341, "iter_tflops": 31.76050378037157, "iter_time": 0.6495833206176758, "loss": 0.24493803083896637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.28431368091315, "step_time": 0.5847100696563721} +{"epoch": 0, "iter": 8342, "iter_tflops": 43.055545488541114, "iter_time": 0.4791738967895508, "loss": 0.137098029255867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65551594815812, "step_time": 0.4240237331390381} +{"epoch": 0, "iter": 8343, "iter_tflops": 51.873614067605416, "iter_time": 0.39771845245361326, "loss": 0.16654832661151886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18834104680176, "step_time": 0.36717748069763184} +{"epoch": 0, "iter": 8344, "iter_tflops": 51.16062365646272, "iter_time": 0.40326118087768553, "loss": 0.18090827763080597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43548330442769, "step_time": 0.37216404151916505} +{"epoch": 0, "iter": 8345, "iter_tflops": 27.170434520197638, "iter_time": 0.7593214416503906, "loss": 0.6597766876220703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.6660482686044, "step_time": 0.719704833984375} +{"epoch": 0, "iter": 8346, "iter_tflops": 17.58573367044279, "iter_time": 1.173172180175781, "loss": 0.6159294843673706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.4257087936821, "step_time": 0.8807030639648437} +{"epoch": 0, "iter": 8347, "iter_tflops": 37.45213777264508, "iter_time": 0.550865577697754, "loss": 0.6940879821777344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.5709199492231, "step_time": 0.5085192432403565} +{"epoch": 0, "iter": 8348, "iter_tflops": 37.3569720646699, "iter_time": 0.5522688903808595, "loss": 0.9677546620368958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.300722447337066, "step_time": 0.5119286270141602} +{"epoch": 0, "iter": 8349, "iter_tflops": 34.585780396681706, "iter_time": 0.5965195312499999, "loss": 0.4487232565879822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.16354138181573, "step_time": 0.5405969352722169} +{"epoch": 0, "iter": 8350, "iter_tflops": 34.30714038209385, "iter_time": 0.6013644180297851, "loss": 0.5362642407417297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.434969224787295, "step_time": 0.5511182174682617} +{"epoch": 0, "iter": 8351, "iter_tflops": 36.027938247175726, "iter_time": 0.5726415252685547, "loss": 0.4940939247608185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46968739224381, "step_time": 0.5227072944641113} +{"epoch": 0, "iter": 8352, "iter_tflops": 37.70514636207188, "iter_time": 0.5471691665649415, "loss": 0.5456550121307373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29588643212782, "step_time": 0.4995919761657715} +{"epoch": 0, "iter": 8353, "iter_tflops": 31.530454393548826, "iter_time": 0.6543227462768554, "loss": 0.02047249674797058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.53878879501245, "step_time": 0.5973311233520507} +{"epoch": 0, "iter": 8354, "iter_tflops": 47.675152684849124, "iter_time": 0.43274310302734376, "loss": 0.01352613139897585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44344858821068, "step_time": 0.39339696502685545} +{"epoch": 0, "iter": 8355, "iter_tflops": 48.902048219338766, "iter_time": 0.4218860816955567, "loss": 0.04213287681341171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.34489772382592, "step_time": 0.38674914360046386} +{"epoch": 0, "iter": 8356, "iter_tflops": 50.56390429015807, "iter_time": 0.4080201835632324, "loss": 0.034077707678079605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.39951528292523, "step_time": 0.37240566825866706} +{"epoch": 0, "iter": 8357, "iter_tflops": 29.860394379234254, "iter_time": 0.6909183197021485, "loss": 0.6579136252403259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.721328907282007, "step_time": 0.6503855361938476} +{"epoch": 0, "iter": 8358, "iter_tflops": 12.632749080683194, "iter_time": 1.633143615722656, "loss": 0.7027940154075623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.02303330263047, "step_time": 1.2875897541046142} +{"epoch": 0, "iter": 8359, "iter_tflops": 36.48330593506454, "iter_time": 0.5654940795898438, "loss": 0.7080167531967163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.720636050636124, "step_time": 0.5194049129486084} +{"epoch": 0, "iter": 8360, "iter_tflops": 39.91928876979671, "iter_time": 0.5168201675415038, "loss": 0.7447919249534607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.16836310817658, "step_time": 0.47792160797119143} +{"epoch": 0, "iter": 8361, "iter_tflops": 18.771402766956168, "iter_time": 1.0990704193115235, "loss": 0.003806514898315072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.090358279826546, "step_time": 1.0269151611328124} +{"epoch": 0, "iter": 8362, "iter_tflops": 12.861990580845635, "iter_time": 1.6040358123779297, "loss": 0.012591850012540817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.189173391086104, "step_time": 1.1342512969970702} +{"epoch": 0, "iter": 8363, "iter_tflops": 43.94302287069398, "iter_time": 0.46949645614624025, "loss": 0.0025940006598830223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.620545154487715, "step_time": 0.42432871627807617} +{"epoch": 0, "iter": 8364, "iter_tflops": 46.05135312762164, "iter_time": 0.44800189590454104, "loss": 0.010902921669185162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.98828289689363, "step_time": 0.40462420654296877} +{"epoch": 0, "iter": 8365, "iter_tflops": 15.194518956220932, "iter_time": 1.1673577575683594, "loss": 0.008395790122449398, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.106587613085498, "step_time": 1.1012537231445314} +{"epoch": 0, "iter": 8366, "iter_tflops": 36.24025575783287, "iter_time": 0.489440242767334, "loss": 0.02148965559899807, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 42.13197673630857, "step_time": 0.4209970893859863} +{"epoch": 0, "iter": 8367, "iter_tflops": 49.50461100073266, "iter_time": 0.35829873657226563, "loss": 0.013039905577898026, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 54.296285670959975, "step_time": 0.32667869186401366} +{"epoch": 0, "iter": 8368, "iter_tflops": 51.993632482360596, "iter_time": 0.3411463813781738, "loss": 0.006504380609840155, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 56.89503165987539, "step_time": 0.31175726699829104} +{"epoch": 0, "iter": 8369, "iter_tflops": 31.980184052444088, "iter_time": 0.6451211624145508, "loss": 0.8157418966293335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.46741518677876, "step_time": 0.5985680503845214} +{"epoch": 0, "iter": 8370, "iter_tflops": 14.616843757183174, "iter_time": 1.411460220336914, "loss": 0.6196571588516235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.31359303665651, "step_time": 1.0156299514770508} +{"epoch": 0, "iter": 8371, "iter_tflops": 47.23444102855092, "iter_time": 0.4367807273864746, "loss": 0.8213743567466736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.260328724468266, "step_time": 0.40247680854797363} +{"epoch": 0, "iter": 8372, "iter_tflops": 49.90578001273973, "iter_time": 0.4134008827209472, "loss": 0.7040784955024719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84495534282716, "step_time": 0.38315740776062013} +{"epoch": 0, "iter": 8373, "iter_tflops": 46.11479498033412, "iter_time": 0.4473855628967285, "loss": 0.42265814542770386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30912063364927, "step_time": 0.41008654594421384} +{"epoch": 0, "iter": 8374, "iter_tflops": 46.42796792231318, "iter_time": 0.444367790222168, "loss": 0.4534238874912262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90814334168224, "step_time": 0.40526116561889647} +{"epoch": 0, "iter": 8375, "iter_tflops": 53.19919100979341, "iter_time": 0.387808406829834, "loss": 0.4011152982711792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.73327985194579, "step_time": 0.3573518352508545} +{"epoch": 0, "iter": 8376, "iter_tflops": 44.1746572360331, "iter_time": 0.4670346031188965, "loss": 0.5372277498245239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.613755871432275, "step_time": 0.43330111503601076} +{"epoch": 0, "iter": 8377, "iter_tflops": 46.89698507735144, "iter_time": 0.43992366409301753, "loss": 0.11853635311126709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52339097915797, "step_time": 0.40042188835144044} +{"epoch": 0, "iter": 8378, "iter_tflops": 46.48027762913472, "iter_time": 0.44386769104003904, "loss": 0.20318128168582916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.273317522806316, "step_time": 0.4023748512268066} +{"epoch": 0, "iter": 8379, "iter_tflops": 49.78281658699972, "iter_time": 0.4144219818115235, "loss": 0.11756622046232224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.770149234359614, "step_time": 0.38369046401977536} +{"epoch": 0, "iter": 8380, "iter_tflops": 57.91694667270808, "iter_time": 0.35621859741210943, "loss": 0.1424635946750641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.092892808743876, "step_time": 0.32699552345275884} +{"epoch": 0, "iter": 8381, "iter_tflops": 18.63700388129218, "iter_time": 0.6993491897583008, "loss": 0.08221818506717682, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 19.76081625433609, "step_time": 0.6595766792297364} +{"epoch": 0, "iter": 8382, "iter_tflops": 5.406733966700124, "iter_time": 2.4106556091308593, "loss": 0.14791542291641235, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 7.2754626708590235, "step_time": 1.7914700622558593} +{"epoch": 0, "iter": 8383, "iter_tflops": 7.59989785454187, "iter_time": 1.7149932556152343, "loss": 0.060734208673238754, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 8.859964008998517, "step_time": 1.4710865135192872} +{"epoch": 0, "iter": 8384, "iter_tflops": 17.555441589433297, "iter_time": 0.7424349594116211, "loss": 0.06437856703996658, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 26.55213668857331, "step_time": 0.49087475395202634} +{"epoch": 0, "iter": 8385, "iter_tflops": 20.953535344423262, "iter_time": 0.7544087982177734, "loss": 0.5104706883430481, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 22.875163073902772, "step_time": 0.6910346984863283} +{"epoch": 0, "iter": 8386, "iter_tflops": 26.607079856161246, "iter_time": 0.5941099700927734, "loss": 0.3448027968406677, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.39954809589694, "step_time": 0.5566120758056642} +{"epoch": 0, "iter": 8387, "iter_tflops": 28.240612183399808, "iter_time": 0.5597446441650391, "loss": 0.39690902829170227, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.14222121173739, "step_time": 0.5244315376281737} +{"epoch": 0, "iter": 8388, "iter_tflops": 29.64846473354709, "iter_time": 0.5331652603149414, "loss": 0.44238704442977905, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 31.480061377829866, "step_time": 0.5021442375183105} +{"epoch": 0, "iter": 8389, "iter_tflops": 33.4135477728063, "iter_time": 0.6174469604492187, "loss": 0.07533356547355652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.93818480052037, "step_time": 0.574071662902832} +{"epoch": 0, "iter": 8390, "iter_tflops": 24.28331179254227, "iter_time": 0.8495996627807617, "loss": 0.05013347789645195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.628405784168535, "step_time": 0.5340912494659423} +{"epoch": 0, "iter": 8391, "iter_tflops": 38.9148702630991, "iter_time": 0.5301596374511719, "loss": 0.03800497576594353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.801765940305955, "step_time": 0.482015007019043} +{"epoch": 0, "iter": 8392, "iter_tflops": 43.21553776793332, "iter_time": 0.47739990234375, "loss": 0.0789349302649498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35025244812565, "step_time": 0.4357124290466309} +{"epoch": 0, "iter": 8393, "iter_tflops": 18.210966246537904, "iter_time": 1.1328939514160155, "loss": 0.6904025077819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.511313206706298, "step_time": 1.0573913345336914} +{"epoch": 0, "iter": 8394, "iter_tflops": 17.076374278629274, "iter_time": 1.2081659240722658, "loss": 0.808853268623352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.894535866471504, "step_time": 0.9422941703796386} +{"epoch": 0, "iter": 8395, "iter_tflops": 40.607845582975926, "iter_time": 0.5080568351745606, "loss": 0.5995771884918213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88872346745941, "step_time": 0.4700773200988769} +{"epoch": 0, "iter": 8396, "iter_tflops": 43.49157486330624, "iter_time": 0.47436988830566407, "loss": 0.7307546138763428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67892690894983, "step_time": 0.441978744506836} +{"epoch": 0, "iter": 8397, "iter_tflops": 22.298560438305884, "iter_time": 0.9252208709716796, "loss": 0.1264471709728241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.486468150562423, "step_time": 0.878424690246582} +{"epoch": 0, "iter": 8398, "iter_tflops": 18.26767673533941, "iter_time": 1.1293769760131835, "loss": 0.13444682955741882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.181280228022427, "step_time": 0.7880093460083007} +{"epoch": 0, "iter": 8399, "iter_tflops": 53.60150687831787, "iter_time": 0.38489764022827144, "loss": 0.10662465542554855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.332525282095084, "step_time": 0.35368078804016106} +{"epoch": 0, "iter": 8400, "iter_tflops": 53.245561978732646, "iter_time": 0.3874706687927246, "loss": 0.20865651965141296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.867985684604264, "step_time": 0.3565199871063232} +{"epoch": 0, "iter": 8401, "iter_tflops": 39.57756138053823, "iter_time": 0.521282585144043, "loss": 0.5659734606742859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05948215765466, "step_time": 0.4791300888061524} +{"epoch": 0, "iter": 8402, "iter_tflops": 45.457442270613846, "iter_time": 0.45385513305664066, "loss": 0.5656747817993164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.16851986355673, "step_time": 0.41959964561462404} +{"epoch": 0, "iter": 8403, "iter_tflops": 48.275545939999205, "iter_time": 0.4273611640930176, "loss": 0.5388069152832031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41559697503569, "step_time": 0.3936060009002685} +{"epoch": 0, "iter": 8404, "iter_tflops": 42.854963377108845, "iter_time": 0.4814166641235351, "loss": 0.35228872299194336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08730456592669, "step_time": 0.44765242195129396} +{"epoch": 0, "iter": 8405, "iter_tflops": 34.8514092013301, "iter_time": 0.5919730072021485, "loss": 0.5514411330223083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.33001690729097, "step_time": 0.5526676712036133} +{"epoch": 0, "iter": 8406, "iter_tflops": 16.037571836345776, "iter_time": 1.2864225158691407, "loss": 0.5030533075332642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.51395073177742, "step_time": 1.0057103958129883} +{"epoch": 0, "iter": 8407, "iter_tflops": 42.007782534581985, "iter_time": 0.49112550735473637, "loss": 0.4221826493740082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97831443154789, "step_time": 0.43916206359863286} +{"epoch": 0, "iter": 8408, "iter_tflops": 50.07330454475409, "iter_time": 0.4120178146362305, "loss": 0.4572083055973053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.040313857908075, "step_time": 0.38177227401733393} +{"epoch": 0, "iter": 8409, "iter_tflops": 35.222939507468865, "iter_time": 0.5857288970947265, "loss": 0.4362489879131317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.50545612115809, "step_time": 0.5500824584960937} +{"epoch": 0, "iter": 8410, "iter_tflops": 11.961933312939651, "iter_time": 1.7247290191650388, "loss": 0.4830015301704407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.695582560397368, "step_time": 1.4038976287841798} +{"epoch": 0, "iter": 8411, "iter_tflops": 11.588909484021208, "iter_time": 1.780244598388672, "loss": 0.3722700774669647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.540575953464472, "step_time": 1.5236496276855467} +{"epoch": 0, "iter": 8412, "iter_tflops": 20.622855979437194, "iter_time": 1.0003994369506837, "loss": 0.3086891174316406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.208888204139424, "step_time": 0.8889307117462157} +{"epoch": 0, "iter": 8413, "iter_tflops": 12.072982155399103, "iter_time": 1.3330843811035153, "loss": 0.33801010251045227, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 12.772250558757602, "step_time": 1.2600992965698243} +{"epoch": 0, "iter": 8414, "iter_tflops": 11.25927245391472, "iter_time": 1.4294266357421872, "loss": 0.3673000931739807, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.15380970436662, "step_time": 1.0620632209777832} +{"epoch": 0, "iter": 8415, "iter_tflops": 29.33827062588738, "iter_time": 0.5485771179199218, "loss": 0.3454482853412628, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 31.307393102684028, "step_time": 0.5140735893249512} +{"epoch": 0, "iter": 8416, "iter_tflops": 29.370416979046475, "iter_time": 0.547976692199707, "loss": 0.4143456816673279, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 31.090218405750917, "step_time": 0.51766455078125} +{"epoch": 0, "iter": 8417, "iter_tflops": 27.368840490373834, "iter_time": 0.7538168640136719, "loss": 0.8842553496360779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.05567452455763, "step_time": 0.7100538482666017} +{"epoch": 0, "iter": 8418, "iter_tflops": 16.067126228601634, "iter_time": 1.2840562286376953, "loss": 0.8926279544830322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.947731857170876, "step_time": 0.9848843612670899} +{"epoch": 0, "iter": 8419, "iter_tflops": 36.233956492159024, "iter_time": 0.569385612487793, "loss": 0.7665260434150696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56796524909745, "step_time": 0.521409008026123} +{"epoch": 0, "iter": 8420, "iter_tflops": 34.86083259183119, "iter_time": 0.59181298828125, "loss": 0.8150168657302856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82213021250623, "step_time": 0.5454767723083496} +{"epoch": 0, "iter": 8421, "iter_tflops": 21.562661452308785, "iter_time": 0.9567971725463867, "loss": 0.4068699777126312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.181162122804835, "step_time": 0.8899939270019531} +{"epoch": 0, "iter": 8422, "iter_tflops": 39.68116249946359, "iter_time": 0.5199216003417969, "loss": 0.39943990111351013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.60440864949431, "step_time": 0.4731423759460449} +{"epoch": 0, "iter": 8423, "iter_tflops": 40.29504326682064, "iter_time": 0.5120007781982422, "loss": 0.4291771948337555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.146974310219136, "step_time": 0.46732746315002444} +{"epoch": 0, "iter": 8424, "iter_tflops": 42.011414402577024, "iter_time": 0.49108304977416983, "loss": 0.2773432731628418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87003388643435, "step_time": 0.44977279853820795} +{"epoch": 0, "iter": 8425, "iter_tflops": 18.463599208182, "iter_time": 1.117392837524414, "loss": 0.24554476141929626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.669656511005225, "step_time": 1.0488791961669923} +{"epoch": 0, "iter": 8426, "iter_tflops": 15.49226113332987, "iter_time": 1.3317031860351562, "loss": 0.26712486147880554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.552862059292234, "step_time": 0.8759484710693362} +{"epoch": 0, "iter": 8427, "iter_tflops": 41.576893001381244, "iter_time": 0.49621537399291993, "loss": 0.18503306806087494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70571441562112, "step_time": 0.4513898048400879} +{"epoch": 0, "iter": 8428, "iter_tflops": 42.49711748566848, "iter_time": 0.4854704208374024, "loss": 0.1578894406557083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.356356504625985, "step_time": 0.4450542507171631} +{"epoch": 0, "iter": 8429, "iter_tflops": 15.44460141286048, "iter_time": 1.3358126220703124, "loss": 0.12689006328582764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.241713059512172, "step_time": 1.2702535400390624} +{"epoch": 0, "iter": 8430, "iter_tflops": 17.323303408281618, "iter_time": 1.190944534301758, "loss": 0.20721659064292908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.991031803756933, "step_time": 0.9828527584075928} +{"epoch": 0, "iter": 8431, "iter_tflops": 38.75909113317988, "iter_time": 0.5322904357910156, "loss": 0.11284427344799042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67997177576207, "step_time": 0.4833905143737793} +{"epoch": 0, "iter": 8432, "iter_tflops": 39.80173260082811, "iter_time": 0.5183466186523438, "loss": 0.11122241616249084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.577037435979214, "step_time": 0.47343956184387204} +{"epoch": 0, "iter": 8433, "iter_tflops": 40.22540985764886, "iter_time": 0.512887092590332, "loss": 0.00627230666577816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92467461424459, "step_time": 0.4592374610900879} +{"epoch": 0, "iter": 8434, "iter_tflops": 43.741530748913696, "iter_time": 0.47165915679931647, "loss": 0.009341444820165634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.791350720672106, "step_time": 0.4228432540893554} +{"epoch": 0, "iter": 8435, "iter_tflops": 44.970260355320136, "iter_time": 0.4587719383239746, "loss": 0.015982838347554207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89229852695352, "step_time": 0.41351258850097655} +{"epoch": 0, "iter": 8436, "iter_tflops": 39.87178447453444, "iter_time": 0.5174359207153321, "loss": 0.017015181481838226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8136603480352, "step_time": 0.4708826732635498} +{"epoch": 0, "iter": 8437, "iter_tflops": 15.927758568961654, "iter_time": 1.295291702270508, "loss": 0.7514840960502625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.070873238702667, "step_time": 1.2085552520751954} +{"epoch": 0, "iter": 8438, "iter_tflops": 19.446724879449217, "iter_time": 1.0609032440185546, "loss": 0.8000436425209045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.70738747276843, "step_time": 0.9504180793762208} +{"epoch": 0, "iter": 8439, "iter_tflops": 47.1766540864683, "iter_time": 0.43731574249267574, "loss": 0.7056782841682434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.94405173776222, "step_time": 0.40497551345825195} +{"epoch": 0, "iter": 8440, "iter_tflops": 43.82764329684627, "iter_time": 0.4707324409484863, "loss": 0.9987937211990356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28523428789308, "step_time": 0.4363115425109863} +{"epoch": 0, "iter": 8441, "iter_tflops": 42.89300817204384, "iter_time": 0.48098966217041017, "loss": 0.3057667315006256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.88113205606265, "step_time": 0.44007242584228523} +{"epoch": 0, "iter": 8442, "iter_tflops": 43.53967559309015, "iter_time": 0.4738458251953125, "loss": 0.4132224917411804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.227791158451055, "step_time": 0.43684222793579097} +{"epoch": 0, "iter": 8443, "iter_tflops": 45.11679050648935, "iter_time": 0.45728194046020504, "loss": 0.43659549951553345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76553766695979, "step_time": 0.42306707763671875} +{"epoch": 0, "iter": 8444, "iter_tflops": 45.86763654337309, "iter_time": 0.44979630661010744, "loss": 0.3499942719936371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51297265719291, "step_time": 0.4166805667877197} +{"epoch": 0, "iter": 8445, "iter_tflops": 30.62780239342224, "iter_time": 0.6736067199707032, "loss": 0.11874475330114365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.652368407678324, "step_time": 0.6318406448364258} +{"epoch": 0, "iter": 8446, "iter_tflops": 15.2513784940321, "iter_time": 1.352736312866211, "loss": 0.11365191638469696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.90551858688413, "step_time": 1.0364509429931639} +{"epoch": 0, "iter": 8447, "iter_tflops": 39.372467772877755, "iter_time": 0.5239979782104491, "loss": 0.10075575113296509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.25780907202109, "step_time": 0.47693338966369625} +{"epoch": 0, "iter": 8448, "iter_tflops": 39.811738752821995, "iter_time": 0.5182163391113281, "loss": 0.10273735225200653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.523830138851345, "step_time": 0.4740183353424073} +{"epoch": 0, "iter": 8449, "iter_tflops": 23.268907670826742, "iter_time": 0.8866378173828127, "loss": 0.040460750460624695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.695033444981352, "step_time": 0.8354349288940429} +{"epoch": 0, "iter": 8450, "iter_tflops": 13.26855984506605, "iter_time": 1.5548856658935548, "loss": 0.025547385215759277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.994041105808034, "step_time": 1.2899237518310547} +{"epoch": 0, "iter": 8451, "iter_tflops": 18.745965251384487, "iter_time": 1.100561813354492, "loss": 0.07909471541643143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.21961958341895, "step_time": 0.9285079536437989} +{"epoch": 0, "iter": 8452, "iter_tflops": 40.32774403659915, "iter_time": 0.5115856094360351, "loss": 0.04751301184296608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67944533792466, "step_time": 0.46175804901123046} +{"epoch": 0, "iter": 8453, "iter_tflops": 10.414607303739832, "iter_time": 1.3883114013671873, "loss": 0.5163787603378296, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 10.949610557398948, "step_time": 1.3204778366088867} +{"epoch": 0, "iter": 8454, "iter_tflops": 11.296877578914636, "iter_time": 1.27988623046875, "loss": 0.4533109664916992, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 13.418248812893166, "step_time": 1.077541358947754} +{"epoch": 0, "iter": 8455, "iter_tflops": 19.68482061996288, "iter_time": 0.7345110397338868, "loss": 0.4864295721054077, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 21.26731578557686, "step_time": 0.6798562736511231} +{"epoch": 0, "iter": 8456, "iter_tflops": 22.191599370810142, "iter_time": 0.6515401535034179, "loss": 0.23945049941539764, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 23.858414817767233, "step_time": 0.6060217399597169} +{"epoch": 0, "iter": 8457, "iter_tflops": 32.620166002070945, "iter_time": 0.6324643936157226, "loss": 0.1184091567993164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.633406637494694, "step_time": 0.578981788635254} +{"epoch": 0, "iter": 8458, "iter_tflops": 10.965809236779842, "iter_time": 1.8814018249511717, "loss": 0.13677772879600525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.19562654128901, "step_time": 1.691679672241211} +{"epoch": 0, "iter": 8459, "iter_tflops": 11.550305739502155, "iter_time": 1.786194580078125, "loss": 0.17163319885730743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.498839470247146, "step_time": 1.5283605346679687} +{"epoch": 0, "iter": 8460, "iter_tflops": 40.66907146884261, "iter_time": 0.5072919731140138, "loss": 0.16139420866966248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.75793870209819, "step_time": 0.46094824981689453} +{"epoch": 0, "iter": 8461, "iter_tflops": 12.107920732503848, "iter_time": 1.4004063262939452, "loss": 0.4529134929180145, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 12.893936553586245, "step_time": 1.3150373992919921} +{"epoch": 0, "iter": 8462, "iter_tflops": 14.709204122391878, "iter_time": 1.1527482147216797, "loss": 0.3570033311843872, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 21.084628843211483, "step_time": 0.8041881561279298} +{"epoch": 0, "iter": 8463, "iter_tflops": 29.253223086853893, "iter_time": 0.5796287384033204, "loss": 0.30838412046432495, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 31.316270700247777, "step_time": 0.5414440612792969} +{"epoch": 0, "iter": 8464, "iter_tflops": 30.477231375850565, "iter_time": 0.5563500366210937, "loss": 0.3496476113796234, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 32.45204804429877, "step_time": 0.5224942588806153} +{"epoch": 0, "iter": 8465, "iter_tflops": 41.38416884076921, "iter_time": 0.49852622604370117, "loss": 0.408500611782074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0479760037916, "step_time": 0.45798047637939454} +{"epoch": 0, "iter": 8466, "iter_tflops": 45.3978325126894, "iter_time": 0.4544510688781738, "loss": 0.4272976219654083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34587206133346, "step_time": 0.4097871913909912} +{"epoch": 0, "iter": 8467, "iter_tflops": 49.509062209330445, "iter_time": 0.4167134780883789, "loss": 0.37538081407546997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.981073429137304, "step_time": 0.3821912422180176} +{"epoch": 0, "iter": 8468, "iter_tflops": 51.31629315322642, "iter_time": 0.4020378761291504, "loss": 0.3741162121295929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.526421516671164, "step_time": 0.3715545310974121} +{"epoch": 0, "iter": 8469, "iter_tflops": 38.59073326412614, "iter_time": 0.5346126327514649, "loss": 0.9256297945976257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.833967918131705, "step_time": 0.49316606903076166} +{"epoch": 0, "iter": 8470, "iter_tflops": 34.65081021130713, "iter_time": 0.5954000320434569, "loss": 0.5560453534126282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21052643825464, "step_time": 0.5399321975708007} +{"epoch": 0, "iter": 8471, "iter_tflops": 37.29005529224732, "iter_time": 0.5532599334716797, "loss": 0.7429631352424622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78653835078157, "step_time": 0.5058309516906738} +{"epoch": 0, "iter": 8472, "iter_tflops": 42.19674288601536, "iter_time": 0.4889262084960938, "loss": 0.6294371485710144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97560950978911, "step_time": 0.44873996734619137} +{"epoch": 0, "iter": 8473, "iter_tflops": 14.084787489600814, "iter_time": 1.4647784729003908, "loss": 0.2674316465854645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.90355840906088, "step_time": 1.3843065490722655} +{"epoch": 0, "iter": 8474, "iter_tflops": 39.21833466390736, "iter_time": 0.5260573577880859, "loss": 0.16224315762519836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42886813516304, "step_time": 0.4750548286437987} +{"epoch": 0, "iter": 8475, "iter_tflops": 49.48368866224665, "iter_time": 0.4169271545410156, "loss": 0.21982350945472717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.835886701053624, "step_time": 0.3832219505310059} +{"epoch": 0, "iter": 8476, "iter_tflops": 44.92446563954104, "iter_time": 0.45923959732055664, "loss": 0.21664270758628845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.585234831015946, "step_time": 0.4246371059417724} +{"epoch": 0, "iter": 8477, "iter_tflops": 33.66834846087212, "iter_time": 0.6127741470336914, "loss": 0.7580276131629944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.01280833515473, "step_time": 0.5728821067810058} +{"epoch": 0, "iter": 8478, "iter_tflops": 15.027797841485752, "iter_time": 1.3728620605468749, "loss": 0.7727380394935608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.033842252466926, "step_time": 1.2111826095581055} +{"epoch": 0, "iter": 8479, "iter_tflops": 16.916122217575957, "iter_time": 1.2196112823486327, "loss": 1.015091896057129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.9535917212034, "step_time": 1.0339538764953615} +{"epoch": 0, "iter": 8480, "iter_tflops": 34.085370227223336, "iter_time": 0.6052770843505859, "loss": 0.6338479518890381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64945833456076, "step_time": 0.4837363548278809} +{"epoch": 0, "iter": 8481, "iter_tflops": 21.084956768131022, "iter_time": 0.7769191970825196, "loss": 0.2818535268306732, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 22.267320676247913, "step_time": 0.7356658630371093} +{"epoch": 0, "iter": 8482, "iter_tflops": 8.17231445498796, "iter_time": 2.0044881744384764, "loss": 0.39891859889030457, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 9.630545396252188, "step_time": 1.70097403717041} +{"epoch": 0, "iter": 8483, "iter_tflops": 12.390920148528581, "iter_time": 1.322041259765625, "loss": 0.4299129843711853, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 13.692544342334504, "step_time": 1.1963669624328612} +{"epoch": 0, "iter": 8484, "iter_tflops": 24.965823620117412, "iter_time": 0.6561492996215821, "loss": 0.4846337139606476, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 29.610858347985292, "step_time": 0.5532196159362792} +{"epoch": 0, "iter": 8485, "iter_tflops": 18.895574165474297, "iter_time": 0.7565567779541016, "loss": 0.44732680916786194, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 19.968448705147292, "step_time": 0.7159081268310546} +{"epoch": 0, "iter": 8486, "iter_tflops": 8.528115621659634, "iter_time": 1.676287628173828, "loss": 0.5219850540161133, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 10.648451479559332, "step_time": 1.342502685546875} +{"epoch": 0, "iter": 8487, "iter_tflops": 21.069871219189512, "iter_time": 0.6784841995239257, "loss": 0.4264882802963257, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.753864492418828, "step_time": 0.6282701873779296} +{"epoch": 0, "iter": 8488, "iter_tflops": 21.569032299848693, "iter_time": 0.6627823867797852, "loss": 0.5502417683601379, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 23.13500622470819, "step_time": 0.6179196395874025} +{"epoch": 0, "iter": 8489, "iter_tflops": 16.07442306795433, "iter_time": 1.2834733428955076, "loss": 0.739870548248291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.992123215333052, "step_time": 1.2141563034057619} +{"epoch": 0, "iter": 8490, "iter_tflops": 16.707651693495425, "iter_time": 1.2348290405273439, "loss": 0.6889200210571289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.70847137229263, "step_time": 1.0468134803771973} +{"epoch": 0, "iter": 8491, "iter_tflops": 40.41691711558304, "iter_time": 0.5104568824768067, "loss": 0.88968425989151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9443735970655, "step_time": 0.4694820251464844} +{"epoch": 0, "iter": 8492, "iter_tflops": 39.19664932033963, "iter_time": 0.5263483963012695, "loss": 0.8441784381866455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41768113293355, "step_time": 0.4863795700073242} +{"epoch": 0, "iter": 8493, "iter_tflops": 20.924757066812436, "iter_time": 0.9859657363891602, "loss": 0.33326056599617004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.602915223484832, "step_time": 0.9127625045776367} +{"epoch": 0, "iter": 8494, "iter_tflops": 37.250981690516966, "iter_time": 0.5538402633666992, "loss": 0.3173845112323761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68170891115081, "step_time": 0.4949675540924072} +{"epoch": 0, "iter": 8495, "iter_tflops": 41.039694963073664, "iter_time": 0.5027106933593749, "loss": 0.3100072145462036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65182767809476, "step_time": 0.4620436515808105} +{"epoch": 0, "iter": 8496, "iter_tflops": 41.5208337606846, "iter_time": 0.4968853378295898, "loss": 0.32536444067955017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38997672069094, "step_time": 0.4545297222137451} +{"epoch": 0, "iter": 8497, "iter_tflops": 31.735977799470007, "iter_time": 0.6500853271484375, "loss": 0.5808287262916565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86895488441994, "step_time": 0.5916751327514649} +{"epoch": 0, "iter": 8498, "iter_tflops": 41.51465638615299, "iter_time": 0.49695927429199216, "loss": 0.8438360691070557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9412396166635, "step_time": 0.45906818962097173} +{"epoch": 0, "iter": 8499, "iter_tflops": 43.69275167283565, "iter_time": 0.47218572235107426, "loss": 0.7679101228713989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.94934049898873, "step_time": 0.43943308448791507} +{"epoch": 0, "iter": 8500, "iter_tflops": 48.1523057900399, "iter_time": 0.4284549446105957, "loss": 0.7600333094596863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.926133122681186, "step_time": 0.3973161926269531} +{"epoch": 0, "iter": 8501, "iter_tflops": 40.12272811242732, "iter_time": 0.5141996688842773, "loss": 0.7623735070228577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24494387585212, "step_time": 0.47707527542114264} +{"epoch": 0, "iter": 8502, "iter_tflops": 42.018676105442495, "iter_time": 0.49099818038940435, "loss": 0.6535378098487854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.354479571828634, "step_time": 0.4450722713470459} +{"epoch": 0, "iter": 8503, "iter_tflops": 47.15555331931367, "iter_time": 0.43751142883300786, "loss": 0.7999207973480225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13760212091339, "step_time": 0.40344272422790534} +{"epoch": 0, "iter": 8504, "iter_tflops": 46.11871162901447, "iter_time": 0.4473475685119629, "loss": 0.8336422443389893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93563341377187, "step_time": 0.41315373611450196} +{"epoch": 0, "iter": 8505, "iter_tflops": 28.11252597214272, "iter_time": 0.7338754806518555, "loss": 0.38236960768699646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.714646997211087, "step_time": 0.6943072052001954} +{"epoch": 0, "iter": 8506, "iter_tflops": 18.122694274213377, "iter_time": 1.1384120483398439, "loss": 0.5542227625846863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.60063990253923, "step_time": 0.8741751747131348} +{"epoch": 0, "iter": 8507, "iter_tflops": 46.795623069075155, "iter_time": 0.44087656402587894, "loss": 0.5303326845169067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80799132105375, "step_time": 0.4060600109100342} +{"epoch": 0, "iter": 8508, "iter_tflops": 50.494550317547926, "iter_time": 0.40858059692382814, "loss": 0.4695865511894226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.438797118768164, "step_time": 0.37897776222229} +{"epoch": 0, "iter": 8509, "iter_tflops": 29.892037809095232, "iter_time": 0.6818591690063477, "loss": 0.14808866381645203, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 31.774436490710073, "step_time": 0.6414640922546386} +{"epoch": 0, "iter": 8510, "iter_tflops": 40.56920986756216, "iter_time": 0.5024046592712402, "loss": 0.06120133772492409, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 45.340941463379906, "step_time": 0.44953102874755857} +{"epoch": 0, "iter": 8511, "iter_tflops": 51.83534781206583, "iter_time": 0.3932096710205078, "loss": 0.07398684322834015, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 56.33278014441019, "step_time": 0.36181704521179203} +{"epoch": 0, "iter": 8512, "iter_tflops": 47.98070454929828, "iter_time": 0.4247990989685059, "loss": 0.030966369435191154, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 52.2491909340082, "step_time": 0.39009522819519044} +{"epoch": 0, "iter": 8513, "iter_tflops": 26.300687574411022, "iter_time": 0.7844317169189453, "loss": 0.49610334634780884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.725892124470295, "step_time": 0.7441092758178711} +{"epoch": 0, "iter": 8514, "iter_tflops": 19.092327808299284, "iter_time": 1.0805960235595702, "loss": 0.36150896549224854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.03882333253837, "step_time": 0.8954925003051758} +{"epoch": 0, "iter": 8515, "iter_tflops": 45.023670980753394, "iter_time": 0.45822770690917974, "loss": 0.43643397092819214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80301501485532, "step_time": 0.42274219131469726} +{"epoch": 0, "iter": 8516, "iter_tflops": 51.16527977561701, "iter_time": 0.4032244834899902, "loss": 0.5157976746559143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.34257288028924, "step_time": 0.37278883934021} +{"epoch": 0, "iter": 8517, "iter_tflops": 31.21580477566777, "iter_time": 0.5814171142578125, "loss": 0.03122326172888279, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 33.524168030677245, "step_time": 0.5413826560974122} +{"epoch": 0, "iter": 8518, "iter_tflops": 19.256609084212705, "iter_time": 0.9425025482177735, "loss": 0.03891206905245781, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 23.957894085610338, "step_time": 0.7575541934967042} +{"epoch": 0, "iter": 8519, "iter_tflops": 45.175653669766284, "iter_time": 0.40175186538696284, "loss": 0.02930554933845997, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 49.43880722290296, "step_time": 0.367108434677124} +{"epoch": 0, "iter": 8520, "iter_tflops": 48.96878329385791, "iter_time": 0.3706321029663086, "loss": 0.04253252223134041, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 53.45485275924701, "step_time": 0.3395276985168457} +{"epoch": 0, "iter": 8521, "iter_tflops": 23.112036706182312, "iter_time": 0.8066957702636719, "loss": 0.023620108142495155, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 24.3219909835302, "step_time": 0.7665648040771484} +{"epoch": 0, "iter": 8522, "iter_tflops": 14.157786331831211, "iter_time": 1.3168995361328126, "loss": 0.04121170938014984, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 21.823007905929565, "step_time": 0.8543452091217042} +{"epoch": 0, "iter": 8523, "iter_tflops": 49.471255569500826, "iter_time": 0.37687303543090817, "loss": 0.029250260442495346, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 54.45039249376797, "step_time": 0.3424104290008545} +{"epoch": 0, "iter": 8524, "iter_tflops": 47.16944274158355, "iter_time": 0.3952639923095703, "loss": 0.03028777800500393, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 51.03972571209274, "step_time": 0.3652915840148926} +{"epoch": 0, "iter": 8525, "iter_tflops": 21.42024754571832, "iter_time": 0.9631585006713868, "loss": 0.6769362092018127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.471201185585418, "step_time": 0.9181126251220704} +{"epoch": 0, "iter": 8526, "iter_tflops": 16.0669774954877, "iter_time": 1.2840681152343751, "loss": 0.6807723045349121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.077460263218015, "step_time": 1.1412606201171875} +{"epoch": 0, "iter": 8527, "iter_tflops": 45.86792012674943, "iter_time": 0.4497935256958008, "loss": 0.5768712759017944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.715321130919406, "step_time": 0.4149846172332764} +{"epoch": 0, "iter": 8528, "iter_tflops": 45.94338358296953, "iter_time": 0.44905472564697263, "loss": 0.5436117053031921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.57902018494549, "step_time": 0.4161254787445068} +{"epoch": 0, "iter": 8529, "iter_tflops": 27.948327614114657, "iter_time": 0.7381870498657226, "loss": 0.538752019405365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.640842778275665, "step_time": 0.6960359954833986} +{"epoch": 0, "iter": 8530, "iter_tflops": 17.464444333055127, "iter_time": 1.1813197784423828, "loss": 0.7110819816589355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.594059649094127, "step_time": 1.0017982788085937} +{"epoch": 0, "iter": 8531, "iter_tflops": 46.204648636985965, "iter_time": 0.44651553726196286, "loss": 0.5242749452590942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9065447021232, "step_time": 0.4133945484161377} +{"epoch": 0, "iter": 8532, "iter_tflops": 42.31349954200508, "iter_time": 0.48757710266113286, "loss": 0.583962082862854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42537944846789, "step_time": 0.45417547988891604} +{"epoch": 0, "iter": 8533, "iter_tflops": 25.890050513384182, "iter_time": 0.796873435974121, "loss": 0.34163936972618103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.328119761787516, "step_time": 0.7549401016235352} +{"epoch": 0, "iter": 8534, "iter_tflops": 18.039835814583114, "iter_time": 1.143640869140625, "loss": 0.22517848014831543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.48636226485193, "step_time": 0.9601948089599608} +{"epoch": 0, "iter": 8535, "iter_tflops": 34.018766923536425, "iter_time": 0.6064621200561523, "loss": 0.36309269070625305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.23410751376609, "step_time": 0.5540912590026856} +{"epoch": 0, "iter": 8536, "iter_tflops": 41.29346870049186, "iter_time": 0.4996212272644043, "loss": 0.26298385858535767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27354079583611, "step_time": 0.4556986961364746} +{"epoch": 0, "iter": 8537, "iter_tflops": 14.51539685418705, "iter_time": 1.4213247985839843, "loss": 0.07558906078338623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.640209492022825, "step_time": 1.31910595703125} +{"epoch": 0, "iter": 8538, "iter_tflops": 20.156722804031528, "iter_time": 1.0235341186523437, "loss": 0.03791392594575882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.179387851718097, "step_time": 0.8900620517730712} +{"epoch": 0, "iter": 8539, "iter_tflops": 40.92451667011839, "iter_time": 0.5041255264282226, "loss": 0.044013217091560364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.834766800031005, "step_time": 0.4601583766937256} +{"epoch": 0, "iter": 8540, "iter_tflops": 43.95999706866443, "iter_time": 0.469315170288086, "loss": 0.0410437136888504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41994872668796, "step_time": 0.4260866451263428} +{"epoch": 0, "iter": 8541, "iter_tflops": 16.972878094910225, "iter_time": 1.2155330047607422, "loss": 0.6166728734970093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.290584679704228, "step_time": 1.1279624938964843} +{"epoch": 0, "iter": 8542, "iter_tflops": 18.382402507549703, "iter_time": 1.1223284606933595, "loss": 0.7658370733261108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.763053770009673, "step_time": 0.9063412017822265} +{"epoch": 0, "iter": 8543, "iter_tflops": 48.354056963898294, "iter_time": 0.42666727066040033, "loss": 0.5558160543441772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43728663847721, "step_time": 0.39344319343566897} +{"epoch": 0, "iter": 8544, "iter_tflops": 46.82020331054291, "iter_time": 0.4406451072692871, "loss": 0.6615860462188721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34331935461875, "step_time": 0.4098079700469971} +{"epoch": 0, "iter": 8545, "iter_tflops": 25.94618612929584, "iter_time": 0.7951493682861328, "loss": 0.07427996397018433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.338541126572306, "step_time": 0.7546523208618164} +{"epoch": 0, "iter": 8546, "iter_tflops": 16.82149635549576, "iter_time": 1.2264719543457032, "loss": 0.12812897562980652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.886499581181845, "step_time": 1.0923725395202637} +{"epoch": 0, "iter": 8547, "iter_tflops": 41.01838997343662, "iter_time": 0.5029718017578125, "loss": 0.07247953116893768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2073557156199, "step_time": 0.45636585426330567} +{"epoch": 0, "iter": 8548, "iter_tflops": 44.03069310480154, "iter_time": 0.46856163406372064, "loss": 0.04954925552010536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32097397218743, "step_time": 0.42695938873291017} +{"epoch": 0, "iter": 8549, "iter_tflops": 35.764623659624085, "iter_time": 0.5768575592041014, "loss": 0.47523680329322815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.51035427500079, "step_time": 0.5221692867279053} +{"epoch": 0, "iter": 8550, "iter_tflops": 41.16921831156606, "iter_time": 0.5011291046142579, "loss": 0.40792638063430786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22499588624021, "step_time": 0.4561878471374512} +{"epoch": 0, "iter": 8551, "iter_tflops": 36.894042905330274, "iter_time": 0.5591985015869141, "loss": 0.31135034561157227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56950661582042, "step_time": 0.5085369586944579} +{"epoch": 0, "iter": 8552, "iter_tflops": 40.784569257784916, "iter_time": 0.5058553733825684, "loss": 0.33245131373405457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.480080513458766, "step_time": 0.4638277015686035} +{"epoch": 0, "iter": 8553, "iter_tflops": 15.684851882285026, "iter_time": 1.159757553100586, "loss": 0.6703814268112183, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 16.66290985479609, "step_time": 1.0916836013793945} +{"epoch": 0, "iter": 8554, "iter_tflops": 25.30093501525475, "iter_time": 0.7189704818725586, "loss": 0.8366960287094116, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 28.787021658921727, "step_time": 0.6319036979675293} +{"epoch": 0, "iter": 8555, "iter_tflops": 42.13174146536317, "iter_time": 0.43175584030151365, "loss": 0.7099462151527405, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 45.75922953397294, "step_time": 0.3975291023254394} +{"epoch": 0, "iter": 8556, "iter_tflops": 43.83497276445482, "iter_time": 0.41497973632812496, "loss": 0.7076712846755981, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 47.67945294207217, "step_time": 0.381519172668457} +{"epoch": 0, "iter": 8557, "iter_tflops": 37.683864428569656, "iter_time": 0.5474781799316406, "loss": 0.317939817905426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51956016892751, "step_time": 0.5091638069152832} +{"epoch": 0, "iter": 8558, "iter_tflops": 9.305115256296064, "iter_time": 2.2171776428222656, "loss": 0.40386077761650085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.868981996063711, "step_time": 1.7382361450195314} +{"epoch": 0, "iter": 8559, "iter_tflops": 11.582307664223675, "iter_time": 1.7812593231201173, "loss": 0.24880538880825043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.002475701423231, "step_time": 1.4733889884948728} +{"epoch": 0, "iter": 8560, "iter_tflops": 26.82594811128032, "iter_time": 0.769072296142578, "loss": 0.2495071440935135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.87593129226097, "step_time": 0.627544002532959} +{"epoch": 0, "iter": 8561, "iter_tflops": 22.169577407113724, "iter_time": 0.7518656005859377, "loss": 0.39427807927131653, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 24.18098550553854, "step_time": 0.6893243713378905} +{"epoch": 0, "iter": 8562, "iter_tflops": 24.277584908189567, "iter_time": 0.6865815811157225, "loss": 0.25117072463035583, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 26.225516897041874, "step_time": 0.6355849037170411} +{"epoch": 0, "iter": 8563, "iter_tflops": 23.966231905853352, "iter_time": 0.6955011825561523, "loss": 0.42434075474739075, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 25.739818827005866, "step_time": 0.6475780868530274} +{"epoch": 0, "iter": 8564, "iter_tflops": 25.598931947438345, "iter_time": 0.6511421127319336, "loss": 0.3455922603607178, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 27.49201107884117, "step_time": 0.6063049583435058} +{"epoch": 0, "iter": 8565, "iter_tflops": 22.48004608605874, "iter_time": 0.9177513885498046, "loss": 0.6786423325538635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.183920952281117, "step_time": 0.8530913391113283} +{"epoch": 0, "iter": 8566, "iter_tflops": 44.29519716740312, "iter_time": 0.4657636680603028, "loss": 0.5604018568992615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.95387066792454, "step_time": 0.4302279090881347} +{"epoch": 0, "iter": 8567, "iter_tflops": 51.989484947030896, "iter_time": 0.39683204269409184, "loss": 0.5508130788803101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.41112700635766, "step_time": 0.36572737693786617} +{"epoch": 0, "iter": 8568, "iter_tflops": 51.40629676420716, "iter_time": 0.40133397674560545, "loss": 0.5150859951972961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.52467940124779, "step_time": 0.37156618881225584} +{"epoch": 0, "iter": 8569, "iter_tflops": 49.81530114776902, "iter_time": 0.4141517372131348, "loss": 0.04584677144885063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.864710600798155, "step_time": 0.3760357666015625} +{"epoch": 0, "iter": 8570, "iter_tflops": 49.48238702973657, "iter_time": 0.41693812179565426, "loss": 0.025738254189491272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.93061309838247, "step_time": 0.3688694324493408} +{"epoch": 0, "iter": 8571, "iter_tflops": 54.072558787216394, "iter_time": 0.38154461288452146, "loss": 0.049267470836639404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.176661397628926, "step_time": 0.34863564491271976} +{"epoch": 0, "iter": 8572, "iter_tflops": 53.78013828821791, "iter_time": 0.38361919784545895, "loss": 0.03663615882396698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.30497522392238, "step_time": 0.3538479080200196} +{"epoch": 0, "iter": 8573, "iter_tflops": 38.48278523764024, "iter_time": 0.5361122741699219, "loss": 0.6021236181259155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55374867978311, "step_time": 0.49649175262451173} +{"epoch": 0, "iter": 8574, "iter_tflops": 45.67696431280364, "iter_time": 0.45167391967773435, "loss": 0.724233090877533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.12161495529351, "step_time": 0.4116206855773926} +{"epoch": 0, "iter": 8575, "iter_tflops": 49.177102734460995, "iter_time": 0.4195264129638672, "loss": 0.7331497073173523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90837815385513, "step_time": 0.3899400100708008} +{"epoch": 0, "iter": 8576, "iter_tflops": 43.25926557710846, "iter_time": 0.4769173316955566, "loss": 0.8727607727050781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53239946638859, "step_time": 0.4433705062866211} +{"epoch": 0, "iter": 8577, "iter_tflops": 39.99112687659951, "iter_time": 0.5158917770385743, "loss": 0.008847378194332123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.639325335302836, "step_time": 0.47276380538940427} +{"epoch": 0, "iter": 8578, "iter_tflops": 11.642271992451272, "iter_time": 1.7720848236083984, "loss": 0.008007162250578403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.587358387103942, "step_time": 1.5184035720825197} +{"epoch": 0, "iter": 8579, "iter_tflops": 14.681939913829863, "iter_time": 1.4052021484375001, "loss": 0.010279941372573376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.522185336859483, "step_time": 1.248690357208252} +{"epoch": 0, "iter": 8580, "iter_tflops": 41.06390873142777, "iter_time": 0.5024142646789551, "loss": 0.003775252727791667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.853016328889666, "step_time": 0.4499397239685059} +{"epoch": 0, "iter": 8581, "iter_tflops": 13.196301009754231, "iter_time": 1.1854659271240235, "loss": 0.40596944093704224, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.861745182707056, "step_time": 1.1285566864013672} +{"epoch": 0, "iter": 8582, "iter_tflops": 7.870019224619359, "iter_time": 1.98776708984375, "loss": 0.3116219937801361, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 9.864113868138439, "step_time": 1.585927070617676} +{"epoch": 0, "iter": 8583, "iter_tflops": 9.651205451332379, "iter_time": 1.6209130859375, "loss": 0.445849746465683, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 11.20140298058472, "step_time": 1.3965898056030275} +{"epoch": 0, "iter": 8584, "iter_tflops": 21.219811816806818, "iter_time": 0.7372245025634766, "loss": 0.31975334882736206, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.667985316857852, "step_time": 0.527294490814209} +{"epoch": 0, "iter": 8585, "iter_tflops": 14.783436292344424, "iter_time": 1.038821807861328, "loss": 0.33721086382865906, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 15.458046664319946, "step_time": 0.993486198425293} +{"epoch": 0, "iter": 8586, "iter_tflops": 15.813580938408734, "iter_time": 0.971149803161621, "loss": 0.4611862301826477, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.441421446997428, "step_time": 0.8805105743408203} +{"epoch": 0, "iter": 8587, "iter_tflops": 26.406930060508017, "iter_time": 0.5815653686523438, "loss": 0.321485310792923, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.01543242684081, "step_time": 0.5481748695373535} +{"epoch": 0, "iter": 8588, "iter_tflops": 27.629003139384704, "iter_time": 0.5558418426513673, "loss": 0.3678343892097473, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.37874566907134, "step_time": 0.5227369537353516} +{"epoch": 0, "iter": 8589, "iter_tflops": 39.266244210897185, "iter_time": 0.5254155044555664, "loss": 0.45665398240089417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81225780689847, "step_time": 0.4818968811035156} +{"epoch": 0, "iter": 8590, "iter_tflops": 16.157284132838516, "iter_time": 1.2768911743164062, "loss": 0.5405259132385254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.056022299153625, "step_time": 1.0826547737121581} +{"epoch": 0, "iter": 8591, "iter_tflops": 37.92379210395282, "iter_time": 0.544014518737793, "loss": 0.6211943030357361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.666714513215034, "step_time": 0.4951456756591797} +{"epoch": 0, "iter": 8592, "iter_tflops": 38.13643332791024, "iter_time": 0.540981201171875, "loss": 0.6619084477424622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81006548425049, "step_time": 0.49344800758361823} +{"epoch": 0, "iter": 8593, "iter_tflops": 32.613522390951864, "iter_time": 0.632593231201172, "loss": 0.28828054666519165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.3282489002882, "step_time": 0.5679077339172364} +{"epoch": 0, "iter": 8594, "iter_tflops": 47.36221082656211, "iter_time": 0.43560241699218755, "loss": 0.14489804208278656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.53595360545252, "step_time": 0.4003242797851563} +{"epoch": 0, "iter": 8595, "iter_tflops": 51.27810308324015, "iter_time": 0.4023372993469238, "loss": 0.24361741542816162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.67105591731809, "step_time": 0.37058922576904296} +{"epoch": 0, "iter": 8596, "iter_tflops": 51.650593515997976, "iter_time": 0.3994357490539551, "loss": 0.16373150050640106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92895831176921, "step_time": 0.36888034629821775} +{"epoch": 0, "iter": 8597, "iter_tflops": 37.35429744967305, "iter_time": 0.5523084335327149, "loss": 0.5727025270462036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.469579838418305, "step_time": 0.5097926292419434} +{"epoch": 0, "iter": 8598, "iter_tflops": 41.41524192037571, "iter_time": 0.4981521911621094, "loss": 0.6556904911994934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48766528164317, "step_time": 0.46374862289428714} +{"epoch": 0, "iter": 8599, "iter_tflops": 42.99828680902211, "iter_time": 0.4798119888305664, "loss": 0.762492835521698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17214706020367, "step_time": 0.4468298492431641} +{"epoch": 0, "iter": 8600, "iter_tflops": 44.74726466643452, "iter_time": 0.4610582046508789, "loss": 0.8780874609947205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24409645952681, "step_time": 0.42763975334167476} +{"epoch": 0, "iter": 8601, "iter_tflops": 35.6844894059258, "iter_time": 0.5781529693603515, "loss": 0.27682986855506897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.556316484120025, "step_time": 0.5350898475646972} +{"epoch": 0, "iter": 8602, "iter_tflops": 46.71688884196045, "iter_time": 0.4416195945739746, "loss": 0.22183331847190857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24828859071185, "step_time": 0.40257136535644533} +{"epoch": 0, "iter": 8603, "iter_tflops": 38.73704209617894, "iter_time": 0.5325934143066406, "loss": 0.3631214201450348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33632544368224, "step_time": 0.48731422233581545} +{"epoch": 0, "iter": 8604, "iter_tflops": 49.590606046764634, "iter_time": 0.41602825927734377, "loss": 0.24575576186180115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.74816148072257, "step_time": 0.383847427368164} +{"epoch": 0, "iter": 8605, "iter_tflops": 20.80925001039513, "iter_time": 0.991438591003418, "loss": 0.538702666759491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.80396103249304, "step_time": 0.946208511352539} +{"epoch": 0, "iter": 8606, "iter_tflops": 23.37187485905828, "iter_time": 0.8827316436767578, "loss": 0.616598904132843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.143192788644793, "step_time": 0.7079215259552002} +{"epoch": 0, "iter": 8607, "iter_tflops": 50.931357506527384, "iter_time": 0.40507645034790046, "loss": 0.48031267523765564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.24783374135238, "step_time": 0.3734280986785889} +{"epoch": 0, "iter": 8608, "iter_tflops": 50.055151627141136, "iter_time": 0.41216723632812496, "loss": 0.46051743626594543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99010724406781, "step_time": 0.3821272926330566} +{"epoch": 0, "iter": 8609, "iter_tflops": 44.520837024305656, "iter_time": 0.4634030914306641, "loss": 0.45831596851348877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37703825174776, "step_time": 0.426464584350586} +{"epoch": 0, "iter": 8610, "iter_tflops": 47.29059361368385, "iter_time": 0.43626209640502933, "loss": 0.42463165521621704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47281219966154, "step_time": 0.4008153553009033} +{"epoch": 0, "iter": 8611, "iter_tflops": 49.883709220933774, "iter_time": 0.4135837898254394, "loss": 0.4888092875480652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.21207626450946, "step_time": 0.38056268882751465} +{"epoch": 0, "iter": 8612, "iter_tflops": 50.25544377360247, "iter_time": 0.41052455139160154, "loss": 0.45957306027412415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.41862459416587, "step_time": 0.3791182460784912} +{"epoch": 0, "iter": 8613, "iter_tflops": 36.09654871118383, "iter_time": 0.5715530776977539, "loss": 0.11118868738412857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64782578379017, "step_time": 0.5338228759765624} +{"epoch": 0, "iter": 8614, "iter_tflops": 12.708353726576508, "iter_time": 1.6234277038574216, "loss": 0.15087394416332245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.86464710016134, "step_time": 1.38793025970459} +{"epoch": 0, "iter": 8615, "iter_tflops": 45.131063558108764, "iter_time": 0.45713732147216796, "loss": 0.1729959398508072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70606202710705, "step_time": 0.42358369064331053} +{"epoch": 0, "iter": 8616, "iter_tflops": 53.91495856137085, "iter_time": 0.3826599159240722, "loss": 0.05899375304579735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.58839743322687, "step_time": 0.3521361637115479} +{"epoch": 0, "iter": 8617, "iter_tflops": 32.39770158699042, "iter_time": 0.6368073196411131, "loss": 0.013664278201758862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.38387066108434, "step_time": 0.6000224266052245} +{"epoch": 0, "iter": 8618, "iter_tflops": 13.712547930435964, "iter_time": 1.5045412139892578, "loss": 0.011229289695620537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.51659765694629, "step_time": 1.177802556991577} +{"epoch": 0, "iter": 8619, "iter_tflops": 37.87897697074574, "iter_time": 0.5446581497192382, "loss": 0.02522362396121025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26253023183369, "step_time": 0.48816512870788575} +{"epoch": 0, "iter": 8620, "iter_tflops": 45.25316225252931, "iter_time": 0.45590390777587897, "loss": 0.0015889494679868221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.15054040873269, "step_time": 0.41138327407836917} +{"epoch": 0, "iter": 8621, "iter_tflops": 2.943657723741706, "iter_time": 0.6926454315185545, "loss": 1.8976614475250244, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 3.2706338773235353, "step_time": 0.6233993625640869} +{"epoch": 0, "iter": 8622, "iter_tflops": 3.7410356641142783, "iter_time": 0.545012466430664, "loss": 0.6629416942596436, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.131192036154664, "step_time": 0.4935406188964843} +{"epoch": 0, "iter": 8623, "iter_tflops": 3.8178334356350394, "iter_time": 0.5340492477416993, "loss": 0.054980382323265076, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.163604951745822, "step_time": 0.4896984939575195} +{"epoch": 0, "iter": 8624, "iter_tflops": 4.373901531660476, "iter_time": 0.4661538581848144, "loss": 0.9389881491661072, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.772327152928912, "step_time": 0.42723623275756833} +{"epoch": 0, "iter": 8625, "iter_tflops": 28.946604928540342, "iter_time": 0.7127293014526367, "loss": 0.10207457840442657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.88339380863534, "step_time": 0.6470795936584472} +{"epoch": 0, "iter": 8626, "iter_tflops": 43.20674537488821, "iter_time": 0.47749705123901365, "loss": 0.14684927463531494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13736414292714, "step_time": 0.4376802539825439} +{"epoch": 0, "iter": 8627, "iter_tflops": 49.45483430240579, "iter_time": 0.41717041015625006, "loss": 0.09578867256641388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.945786799243066, "step_time": 0.38244123840332034} +{"epoch": 0, "iter": 8628, "iter_tflops": 50.04629035295445, "iter_time": 0.41224021530151367, "loss": 0.08541159331798553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.209215890144165, "step_time": 0.3805827693939209} +{"epoch": 0, "iter": 8629, "iter_tflops": 43.190404354282734, "iter_time": 0.4776777114868164, "loss": 0.20313164591789246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.092759002600026, "step_time": 0.43809481430053715} +{"epoch": 0, "iter": 8630, "iter_tflops": 12.982329765131729, "iter_time": 1.589167266845703, "loss": 0.33163154125213623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.165642901388189, "step_time": 1.4564177322387695} +{"epoch": 0, "iter": 8631, "iter_tflops": 13.21096048560795, "iter_time": 1.5616649169921875, "loss": 0.2771777808666229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.902697390625152, "step_time": 1.297332962036133} +{"epoch": 0, "iter": 8632, "iter_tflops": 21.261548581773567, "iter_time": 0.970347640991211, "loss": 0.2721899151802063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.98843630082215, "step_time": 0.7938566703796387} +{"epoch": 0, "iter": 8633, "iter_tflops": 20.904772094848227, "iter_time": 0.8681942596435547, "loss": 0.39134645462036133, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 22.72224127300372, "step_time": 0.7987505683898926} +{"epoch": 0, "iter": 8634, "iter_tflops": 32.25865137125687, "iter_time": 0.5626212615966797, "loss": 0.4787155091762543, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 34.44074150292997, "step_time": 0.5269748077392579} +{"epoch": 0, "iter": 8635, "iter_tflops": 33.22082690069866, "iter_time": 0.5463260498046875, "loss": 0.5098007321357727, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 35.240184110473066, "step_time": 0.5150200996398926} +{"epoch": 0, "iter": 8636, "iter_tflops": 32.650274138433396, "iter_time": 0.555872917175293, "loss": 0.43050673604011536, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 34.53786500083915, "step_time": 0.5254929084777832} +{"epoch": 0, "iter": 8637, "iter_tflops": 29.36498661882854, "iter_time": 0.702574592590332, "loss": 0.152218759059906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.148350322671984, "step_time": 0.6623494758605958} +{"epoch": 0, "iter": 8638, "iter_tflops": 12.784227249458176, "iter_time": 1.6137927703857422, "loss": 0.16393153369426727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.513628942640732, "step_time": 1.1780022048950194} +{"epoch": 0, "iter": 8639, "iter_tflops": 45.330916193378584, "iter_time": 0.45512191772460936, "loss": 0.15942487120628357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.70164430393804, "step_time": 0.4150988121032715} +{"epoch": 0, "iter": 8640, "iter_tflops": 45.54841168519714, "iter_time": 0.45294869232177737, "loss": 0.08535792678594589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96955337895457, "step_time": 0.4213045063018799} +{"epoch": 0, "iter": 8641, "iter_tflops": 30.657854326933215, "iter_time": 0.6729464263916016, "loss": 0.1581762731075287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.659757575454776, "step_time": 0.6316976928710938} +{"epoch": 0, "iter": 8642, "iter_tflops": 14.976363924765353, "iter_time": 1.377576934814453, "loss": 0.284406840801239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.147324898098052, "step_time": 1.1368669281005859} +{"epoch": 0, "iter": 8643, "iter_tflops": 42.72381497742214, "iter_time": 0.48289445877075193, "loss": 0.20946255326271057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.852100873732965, "step_time": 0.44034510993957515} +{"epoch": 0, "iter": 8644, "iter_tflops": 37.63692465248147, "iter_time": 0.5481609802246095, "loss": 0.20505203306674957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14364791164414, "step_time": 0.5014405517578125} +{"epoch": 0, "iter": 8645, "iter_tflops": 23.29482829907791, "iter_time": 0.885651237487793, "loss": 0.10452152043581009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.93946000293146, "step_time": 0.8272470016479493} +{"epoch": 0, "iter": 8646, "iter_tflops": 18.222654512448845, "iter_time": 1.1321672973632813, "loss": 0.07133890688419342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.7994389293063, "step_time": 0.9919062519073486} +{"epoch": 0, "iter": 8647, "iter_tflops": 39.13593687283016, "iter_time": 0.5271649322509766, "loss": 0.0791437029838562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.135252218252035, "step_time": 0.47828846359252936} +{"epoch": 0, "iter": 8648, "iter_tflops": 38.088476459239274, "iter_time": 0.5416623458862304, "loss": 0.07393023371696472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03031155062186, "step_time": 0.49086225509643555} +{"epoch": 0, "iter": 8649, "iter_tflops": 29.879004373861, "iter_time": 0.5551206359863281, "loss": 0.09813349694013596, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 33.283256434620625, "step_time": 0.4983422203063964} +{"epoch": 0, "iter": 8650, "iter_tflops": 37.470525723471404, "iter_time": 0.4426533012390137, "loss": 0.04933198541402817, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 42.36846396135632, "step_time": 0.3914810771942138} +{"epoch": 0, "iter": 8651, "iter_tflops": 37.956441441403115, "iter_time": 0.4369864845275879, "loss": 0.06688078492879868, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 41.207483432635776, "step_time": 0.4025106735229492} +{"epoch": 0, "iter": 8652, "iter_tflops": 42.39111158043265, "iter_time": 0.3912719268798828, "loss": 0.05753201246261597, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 46.041785084490435, "step_time": 0.360247802734375} +{"epoch": 0, "iter": 8653, "iter_tflops": 47.48552162574906, "iter_time": 0.4344712409973145, "loss": 0.09394028037786484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.978212115202545, "step_time": 0.3969181060791016} +{"epoch": 0, "iter": 8654, "iter_tflops": 49.91153014864523, "iter_time": 0.41335325622558594, "loss": 0.06975909322500229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.38959824096974, "step_time": 0.3793205718994141} +{"epoch": 0, "iter": 8655, "iter_tflops": 53.26198055503439, "iter_time": 0.38735122680664064, "loss": 0.05165718123316765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.966045253178514, "step_time": 0.35591687202453615} +{"epoch": 0, "iter": 8656, "iter_tflops": 53.31084358693491, "iter_time": 0.3869961929321289, "loss": 0.07702632248401642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.69342862697801, "step_time": 0.35759867286682123} +{"epoch": 0, "iter": 8657, "iter_tflops": 26.270240351128525, "iter_time": 0.7853408737182617, "loss": 0.04927773028612137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.75123565605306, "step_time": 0.7434297256469726} +{"epoch": 0, "iter": 8658, "iter_tflops": 16.683006633929065, "iter_time": 1.2366531982421876, "loss": 0.1149725392460823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.071748717266516, "step_time": 1.0278672676086424} +{"epoch": 0, "iter": 8659, "iter_tflops": 40.22152323091119, "iter_time": 0.512936653137207, "loss": 0.20180004835128784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15951813168442, "step_time": 0.467194715499878} +{"epoch": 0, "iter": 8660, "iter_tflops": 38.48351471745245, "iter_time": 0.5361021118164062, "loss": 0.0858098715543747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43129110212684, "step_time": 0.48622356224060054} +{"epoch": 0, "iter": 8661, "iter_tflops": 30.895702498443857, "iter_time": 0.6677658004760743, "loss": 0.33766308426856995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.33997435658727, "step_time": 0.6007894268035889} +{"epoch": 0, "iter": 8662, "iter_tflops": 36.65937225785578, "iter_time": 0.5627781448364257, "loss": 0.20502829551696777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73572780648724, "step_time": 0.5064618854522704} +{"epoch": 0, "iter": 8663, "iter_tflops": 39.741959439061574, "iter_time": 0.5191262283325195, "loss": 0.20361830294132233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14792588649866, "step_time": 0.47814797782897944} +{"epoch": 0, "iter": 8664, "iter_tflops": 34.397576351639465, "iter_time": 0.5997833480834962, "loss": 0.25288403034210205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.529841791668815, "step_time": 0.5497250328063965} +{"epoch": 0, "iter": 8665, "iter_tflops": 21.391772652987747, "iter_time": 0.9644405746459962, "loss": 0.8670823574066162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.012227341418384, "step_time": 0.8965274505615234} +{"epoch": 0, "iter": 8666, "iter_tflops": 23.585436256093075, "iter_time": 0.8747386856079102, "loss": 0.866048276424408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.584403548201337, "step_time": 0.6532051010131836} +{"epoch": 0, "iter": 8667, "iter_tflops": 44.43295016919508, "iter_time": 0.46431968688964836, "loss": 0.898135244846344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.66209614272345, "step_time": 0.4328616485595703} +{"epoch": 0, "iter": 8668, "iter_tflops": 43.40141464197219, "iter_time": 0.4753553237915039, "loss": 0.7411419153213501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.648883086362844, "step_time": 0.4422633972167969} +{"epoch": 0, "iter": 8669, "iter_tflops": 27.37686117498739, "iter_time": 0.7535960159301758, "loss": 0.03871474415063858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.879078311679447, "step_time": 0.7143958435058594} +{"epoch": 0, "iter": 8670, "iter_tflops": 18.39212231481145, "iter_time": 1.121735336303711, "loss": 0.08703552186489105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.54582945108925, "step_time": 0.9150736083984377} +{"epoch": 0, "iter": 8671, "iter_tflops": 41.90686930147922, "iter_time": 0.4923081550598144, "loss": 0.08703933656215668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.16789559599012, "step_time": 0.44687099647521966} +{"epoch": 0, "iter": 8672, "iter_tflops": 47.167631154889854, "iter_time": 0.4373993988037109, "loss": 0.08678868412971497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7451310825228, "step_time": 0.39870598602294915} +{"epoch": 0, "iter": 8673, "iter_tflops": 21.522341809199535, "iter_time": 0.9585896224975587, "loss": 0.23716747760772705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.20466351837102, "step_time": 0.8890925521850584} +{"epoch": 0, "iter": 8674, "iter_tflops": 34.49981833239292, "iter_time": 0.5980058593750001, "loss": 0.28179484605789185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.18054672148537, "step_time": 0.5403561573028565} +{"epoch": 0, "iter": 8675, "iter_tflops": 45.89201027804616, "iter_time": 0.44955741500854496, "loss": 0.3830239474773407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88645963979903, "step_time": 0.41356098747253417} +{"epoch": 0, "iter": 8676, "iter_tflops": 52.21503268619061, "iter_time": 0.3951178894042969, "loss": 0.20494091510772705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.53675413090218, "step_time": 0.3649147148132324} +{"epoch": 0, "iter": 8677, "iter_tflops": 26.732690116664102, "iter_time": 0.7717552337646484, "loss": 0.3996483087539673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.24708844408598, "step_time": 0.7303794708251954} +{"epoch": 0, "iter": 8678, "iter_tflops": 15.240037461249354, "iter_time": 1.353742965698242, "loss": 0.2555865943431854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.684728185846495, "step_time": 1.0480761184692384} +{"epoch": 0, "iter": 8679, "iter_tflops": 38.32155987940918, "iter_time": 0.538367790222168, "loss": 0.30132031440734863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80603595361797, "step_time": 0.493495569229126} +{"epoch": 0, "iter": 8680, "iter_tflops": 39.01422000105315, "iter_time": 0.528809585571289, "loss": 0.41912841796875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66537450384398, "step_time": 0.4835558986663818} +{"epoch": 0, "iter": 8681, "iter_tflops": 33.24634369847675, "iter_time": 0.6205522537231445, "loss": 0.34218859672546387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.981202457156805, "step_time": 0.5578805484771728} +{"epoch": 0, "iter": 8682, "iter_tflops": 37.03754309332467, "iter_time": 0.5570319137573242, "loss": 0.3995678424835205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27792967919128, "step_time": 0.4998093090057373} +{"epoch": 0, "iter": 8683, "iter_tflops": 39.84497513712772, "iter_time": 0.5177840728759765, "loss": 0.4490797817707062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73625537821116, "step_time": 0.4717160472869873} +{"epoch": 0, "iter": 8684, "iter_tflops": 40.7760971300446, "iter_time": 0.5059604759216308, "loss": 0.3641956150531769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.483722745387496, "step_time": 0.4637897243499756} +{"epoch": 0, "iter": 8685, "iter_tflops": 31.241403426151276, "iter_time": 0.6603766555786132, "loss": 0.6658332943916321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.68969852083892, "step_time": 0.5947325687408447} +{"epoch": 0, "iter": 8686, "iter_tflops": 35.1630754936913, "iter_time": 0.5867260818481446, "loss": 0.6592299342155457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76652678986257, "step_time": 0.5321883392333985} +{"epoch": 0, "iter": 8687, "iter_tflops": 36.1491642020541, "iter_time": 0.570721176147461, "loss": 0.66945481300354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4973999365982, "step_time": 0.5223405475616456} +{"epoch": 0, "iter": 8688, "iter_tflops": 38.74254811780069, "iter_time": 0.5325177230834961, "loss": 0.7358304262161255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03811892978187, "step_time": 0.49077109146118164} +{"epoch": 0, "iter": 8689, "iter_tflops": 32.697993091764424, "iter_time": 0.6309590148925782, "loss": 0.8476465940475464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.46574975633607, "step_time": 0.5817188034057618} +{"epoch": 0, "iter": 8690, "iter_tflops": 19.567758872564916, "iter_time": 1.0543411560058595, "loss": 0.8535547852516174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.58170115267727, "step_time": 0.6974275550842284} +{"epoch": 0, "iter": 8691, "iter_tflops": 44.76880497642943, "iter_time": 0.460836368560791, "loss": 0.559837281703949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26878431319432, "step_time": 0.4274210300445557} +{"epoch": 0, "iter": 8692, "iter_tflops": 46.476608462683984, "iter_time": 0.4439027328491211, "loss": 0.7755836844444275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71396684167467, "step_time": 0.414995922088623} +{"epoch": 0, "iter": 8693, "iter_tflops": 20.065876021420877, "iter_time": 0.7490485000610352, "loss": 0.3306176960468292, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 21.156646123243537, "step_time": 0.710429916381836} +{"epoch": 0, "iter": 8694, "iter_tflops": 10.41325946829702, "iter_time": 1.443382293701172, "loss": 0.5388795137405396, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.930149009078264, "step_time": 1.0789772834777833} +{"epoch": 0, "iter": 8695, "iter_tflops": 26.770000245159633, "iter_time": 0.5614611206054687, "loss": 0.298200398683548, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.53334808358962, "step_time": 0.5267630805969238} +{"epoch": 0, "iter": 8696, "iter_tflops": 27.460099509662975, "iter_time": 0.5473510513305664, "loss": 0.27615711092948914, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.16147337982627, "step_time": 0.515416835784912} +{"epoch": 0, "iter": 8697, "iter_tflops": 25.317632334366632, "iter_time": 0.8148903198242189, "loss": 0.5492961406707764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.6426762188087, "step_time": 0.7743626556396483} +{"epoch": 0, "iter": 8698, "iter_tflops": 11.690419645793963, "iter_time": 1.764786392211914, "loss": 0.5367012619972229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.570905708499824, "step_time": 1.5202444076538089} +{"epoch": 0, "iter": 8699, "iter_tflops": 39.60956762412532, "iter_time": 0.5208613662719727, "loss": 0.5100467801094055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.55911812123756, "step_time": 0.47363432502746583} +{"epoch": 0, "iter": 8700, "iter_tflops": 40.04207382139905, "iter_time": 0.5152353897094726, "loss": 0.5097401738166809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01500502226815, "step_time": 0.4687286415100098} +{"epoch": 0, "iter": 8701, "iter_tflops": 24.98222604662544, "iter_time": 0.8258308715820313, "loss": 0.002520030364394188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.926705040993237, "step_time": 0.7661945075988769} +{"epoch": 0, "iter": 8702, "iter_tflops": 8.624692701355988, "iter_time": 2.3920960693359374, "loss": 0.004220816772431135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.653648829103053, "step_time": 2.1371290664672853} +{"epoch": 0, "iter": 8703, "iter_tflops": 17.049676921035243, "iter_time": 1.2100577392578127, "loss": 0.011316744610667229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.517376313522522, "step_time": 1.0055424823760988} +{"epoch": 0, "iter": 8704, "iter_tflops": 44.99828692990787, "iter_time": 0.45848619842529303, "loss": 0.006624218076467514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.973692604400426, "step_time": 0.4128390846252442} +{"epoch": 0, "iter": 8705, "iter_tflops": 15.969363477543054, "iter_time": 0.9642373962402343, "loss": 0.4834587574005127, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 17.119262660372513, "step_time": 0.8994696655273435} +{"epoch": 0, "iter": 8706, "iter_tflops": 10.20744617691932, "iter_time": 1.5085318298339843, "loss": 0.4972144067287445, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.46983519052036, "step_time": 1.0641626014709473} +{"epoch": 0, "iter": 8707, "iter_tflops": 27.3034413058618, "iter_time": 0.563967643737793, "loss": 0.2889540493488312, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.127764591174056, "step_time": 0.5286453552246093} +{"epoch": 0, "iter": 8708, "iter_tflops": 28.102348872519467, "iter_time": 0.5479348907470702, "loss": 0.2837929427623749, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.82779210334328, "step_time": 0.5162385940551757} +{"epoch": 0, "iter": 8709, "iter_tflops": 32.51543795738649, "iter_time": 0.6345014801025392, "loss": 0.3704884350299835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.790604738207385, "step_time": 0.5930076141357422} +{"epoch": 0, "iter": 8710, "iter_tflops": 11.525028662400796, "iter_time": 1.7901121215820313, "loss": 0.38303959369659424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.30893237465392, "step_time": 1.5501689338684081} +{"epoch": 0, "iter": 8711, "iter_tflops": 18.879516520056892, "iter_time": 1.092776580810547, "loss": 0.33221861720085144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33806750152924, "step_time": 0.9668679466247558} +{"epoch": 0, "iter": 8712, "iter_tflops": 40.22012695698285, "iter_time": 0.512954460144043, "loss": 0.5548693537712097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.97850420696786, "step_time": 0.46911767196655274} +{"epoch": 0, "iter": 8713, "iter_tflops": 13.790560977383223, "iter_time": 1.2206015930175782, "loss": 0.3588184118270874, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 14.700039989058377, "step_time": 1.1450840072631836} +{"epoch": 0, "iter": 8714, "iter_tflops": 17.07912312497937, "iter_time": 0.985576400756836, "loss": 0.3276705741882324, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 24.8767295077429, "step_time": 0.6766476554870605} +{"epoch": 0, "iter": 8715, "iter_tflops": 24.765825448488304, "iter_time": 0.6796777572631837, "loss": 0.39736539125442505, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 26.689882168559546, "step_time": 0.6306802177429199} +{"epoch": 0, "iter": 8716, "iter_tflops": 25.96607956199243, "iter_time": 0.648260383605957, "loss": 0.32297855615615845, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 27.838877636678493, "step_time": 0.6046501197814942} +{"epoch": 0, "iter": 8717, "iter_tflops": 15.182314686534005, "iter_time": 1.358889862060547, "loss": 0.13979114592075348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.298798564919526, "step_time": 1.2658045578002928} +{"epoch": 0, "iter": 8718, "iter_tflops": 18.165039696085522, "iter_time": 1.1357582397460937, "loss": 0.16396260261535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.833888813381254, "step_time": 0.8307637062072755} +{"epoch": 0, "iter": 8719, "iter_tflops": 43.634214522426426, "iter_time": 0.4728191795349121, "loss": 0.13232114911079407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.97440028587083, "step_time": 0.4300438022613525} +{"epoch": 0, "iter": 8720, "iter_tflops": 46.06955209192209, "iter_time": 0.4478249206542969, "loss": 0.13546176254749298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.51958442498248, "step_time": 0.4083781318664551} +{"epoch": 0, "iter": 8721, "iter_tflops": 18.048283510479312, "iter_time": 1.1431055755615236, "loss": 0.008986271917819977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.35661561249639, "step_time": 1.0658419799804688} +{"epoch": 0, "iter": 8722, "iter_tflops": 24.676115457272875, "iter_time": 0.8360754165649414, "loss": 0.0062432121485471725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.458163620310195, "step_time": 0.6773584175109864} +{"epoch": 0, "iter": 8723, "iter_tflops": 58.00806499811802, "iter_time": 0.35565905380249024, "loss": 0.006239799782633781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.65935169266046, "step_time": 0.3240858249664307} +{"epoch": 0, "iter": 8724, "iter_tflops": 62.63023196646346, "iter_time": 0.32941109848022465, "loss": 0.009187927469611168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.7560731978746, "step_time": 0.30006212615966793} +{"epoch": 0, "iter": 8725, "iter_tflops": 41.091082438820436, "iter_time": 0.5020820159912109, "loss": 0.048186592757701874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44513437933457, "step_time": 0.4641923980712891} +{"epoch": 0, "iter": 8726, "iter_tflops": 13.97543083875539, "iter_time": 1.476240249633789, "loss": 0.02556137926876545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.12981480620374, "step_time": 1.0784784755706787} +{"epoch": 0, "iter": 8727, "iter_tflops": 34.33514052750962, "iter_time": 0.600874008178711, "loss": 0.027680985629558563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.76482369830975, "step_time": 0.5463045101165771} +{"epoch": 0, "iter": 8728, "iter_tflops": 41.88414704744075, "iter_time": 0.49257523345947263, "loss": 0.039129838347435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.812512791084224, "step_time": 0.4503375225067139} +{"epoch": 0, "iter": 8729, "iter_tflops": 20.057517553760153, "iter_time": 1.0285965576171876, "loss": 0.15733011066913605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.336230434253487, "step_time": 0.9669511947631836} +{"epoch": 0, "iter": 8730, "iter_tflops": 15.238377227004603, "iter_time": 1.3538904571533203, "loss": 0.24108441174030304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.626051609456873, "step_time": 1.1704886589050294} +{"epoch": 0, "iter": 8731, "iter_tflops": 38.118794092519984, "iter_time": 0.5412315368652344, "loss": 0.18899936974048615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.896763548149465, "step_time": 0.49242690277099604} +{"epoch": 0, "iter": 8732, "iter_tflops": 42.293442359787974, "iter_time": 0.4878083305358887, "loss": 0.19690538942813873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.42319362634588, "step_time": 0.4444134902954101} +{"epoch": 0, "iter": 8733, "iter_tflops": 21.071473184750573, "iter_time": 0.9791006698608398, "loss": 0.24326564371585846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.75673013361343, "step_time": 0.9065930557250976} +{"epoch": 0, "iter": 8734, "iter_tflops": 26.047765393854263, "iter_time": 0.7920485000610352, "loss": 0.27701500058174133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.465790428965217, "step_time": 0.7001710529327393} +{"epoch": 0, "iter": 8735, "iter_tflops": 50.69891379379861, "iter_time": 0.4069336395263672, "loss": 0.19686004519462585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.04036927481262, "step_time": 0.3748356666564942} +{"epoch": 0, "iter": 8736, "iter_tflops": 50.10909788490024, "iter_time": 0.4117235069274902, "loss": 0.1539647877216339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99002154765444, "step_time": 0.3821278991699219} +{"epoch": 0, "iter": 8737, "iter_tflops": 39.93745146845549, "iter_time": 0.5165851287841797, "loss": 0.3833102285861969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08985441211061, "step_time": 0.4787923698425293} +{"epoch": 0, "iter": 8738, "iter_tflops": 12.2035696657279, "iter_time": 1.690578582763672, "loss": 0.5038836002349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.285235718188282, "step_time": 1.5529339447021484} +{"epoch": 0, "iter": 8739, "iter_tflops": 10.879364385981718, "iter_time": 1.8963509979248045, "loss": 0.41527271270751953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.168555905399119, "step_time": 1.5666936950683592} +{"epoch": 0, "iter": 8740, "iter_tflops": 14.68571630032858, "iter_time": 1.404840805053711, "loss": 0.4905287027359009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.553392089940402, "step_time": 1.175333713531494} +{"epoch": 0, "iter": 8741, "iter_tflops": 18.09666610231146, "iter_time": 0.9801495742797851, "loss": 0.2591588795185089, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 19.319727980290217, "step_time": 0.9180998611450194} +{"epoch": 0, "iter": 8742, "iter_tflops": 15.266101093824599, "iter_time": 1.161884063720703, "loss": 0.3002385199069977, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.87285201827441, "step_time": 1.051241340637207} +{"epoch": 0, "iter": 8743, "iter_tflops": 25.22846930689679, "iter_time": 0.7030723648071289, "loss": 0.2703874707221985, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 27.206346187572425, "step_time": 0.6519596366882324} +{"epoch": 0, "iter": 8744, "iter_tflops": 28.07112200761383, "iter_time": 0.6318749771118164, "loss": 0.39197421073913574, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 30.086232697418108, "step_time": 0.5895533599853516} +{"epoch": 0, "iter": 8745, "iter_tflops": 18.275511776629877, "iter_time": 1.128892791748047, "loss": 0.4399811327457428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.64030548994069, "step_time": 1.0504466705322266} +{"epoch": 0, "iter": 8746, "iter_tflops": 14.807143610075066, "iter_time": 1.393320281982422, "loss": 0.3944326341152191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.342018589751255, "step_time": 1.066646348953247} +{"epoch": 0, "iter": 8747, "iter_tflops": 38.09357425543893, "iter_time": 0.5415898590087891, "loss": 0.4392164945602417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75906612814216, "step_time": 0.49405064392089837} +{"epoch": 0, "iter": 8748, "iter_tflops": 39.31252546203965, "iter_time": 0.5247969512939453, "loss": 0.24769757688045502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.92892210507056, "step_time": 0.4805872707366943} +{"epoch": 0, "iter": 8749, "iter_tflops": 19.233455558385682, "iter_time": 1.0726670227050783, "loss": 0.04457586258649826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.473910882610873, "step_time": 1.007677215576172} +{"epoch": 0, "iter": 8750, "iter_tflops": 15.874330398583721, "iter_time": 1.2996512603759764, "loss": 0.03804726526141167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.437960729947495, "step_time": 1.0094497089385988} +{"epoch": 0, "iter": 8751, "iter_tflops": 44.99266309998603, "iter_time": 0.45854350662231447, "loss": 0.03993379324674606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.773101419905075, "step_time": 0.414502872467041} +{"epoch": 0, "iter": 8752, "iter_tflops": 47.62083250703115, "iter_time": 0.4332367248535156, "loss": 0.039506420493125916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33003086926044, "step_time": 0.39424959564208983} +{"epoch": 0, "iter": 8753, "iter_tflops": 17.65932470578964, "iter_time": 1.1682832641601562, "loss": 0.865262508392334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.936425582405143, "step_time": 1.0894924926757812} +{"epoch": 0, "iter": 8754, "iter_tflops": 16.52591286811615, "iter_time": 1.248408706665039, "loss": 0.6554067134857178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.57289697352098, "step_time": 1.0028287963867186} +{"epoch": 0, "iter": 8755, "iter_tflops": 43.002381237368226, "iter_time": 0.47976630401611325, "loss": 0.5574365258216858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28111101798641, "step_time": 0.44577783584594727} +{"epoch": 0, "iter": 8756, "iter_tflops": 46.138252113119194, "iter_time": 0.44715810775756837, "loss": 0.6979327201843262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.874088017317305, "step_time": 0.41366357421875005} +{"epoch": 0, "iter": 8757, "iter_tflops": 11.6093906323711, "iter_time": 0.7332679977416992, "loss": 0.014504875056445599, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 12.271974768766492, "step_time": 0.6936776504516602} +{"epoch": 0, "iter": 8758, "iter_tflops": 5.3294710016507745, "iter_time": 1.5973057403564452, "loss": 0.002703556092455983, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 6.171319148309129, "step_time": 1.3794124755859376} +{"epoch": 0, "iter": 8759, "iter_tflops": 16.860921265400684, "iter_time": 0.5048831253051758, "loss": 0.0066960775293409824, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 21.82494421852143, "step_time": 0.39004886054992677} +{"epoch": 0, "iter": 8760, "iter_tflops": 20.519014755353076, "iter_time": 0.41487345886230464, "loss": 0.003244334366172552, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 22.653723374076648, "step_time": 0.3757790489196777} +{"epoch": 0, "iter": 8761, "iter_tflops": 31.45216192383254, "iter_time": 0.6559515228271484, "loss": 0.6867411136627197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.59624377971387, "step_time": 0.6140892906188965} +{"epoch": 0, "iter": 8762, "iter_tflops": 18.215791710344924, "iter_time": 1.1325938415527343, "loss": 0.7896966338157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.806730659642675, "step_time": 0.8316732177734374} +{"epoch": 0, "iter": 8763, "iter_tflops": 41.138442765234366, "iter_time": 0.5015039978027344, "loss": 0.8178022503852844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.2317302176808, "step_time": 0.4664319801330566} +{"epoch": 0, "iter": 8764, "iter_tflops": 43.50233979565131, "iter_time": 0.4742525024414063, "loss": 0.6101785898208618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.745964367198155, "step_time": 0.4413449115753174} +{"epoch": 0, "iter": 8765, "iter_tflops": 38.9121795811657, "iter_time": 0.5301962966918945, "loss": 0.13985387980937958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03135354289561, "step_time": 0.4908500862121582} +{"epoch": 0, "iter": 8766, "iter_tflops": 12.209441217061018, "iter_time": 1.6897655792236328, "loss": 0.11451737582683563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.5931632126239, "step_time": 1.4137506179809571} +{"epoch": 0, "iter": 8767, "iter_tflops": 12.236258883851502, "iter_time": 1.6860621948242187, "loss": 0.1298709660768509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.026112477559256, "step_time": 1.470906036376953} +{"epoch": 0, "iter": 8768, "iter_tflops": 21.203009889852087, "iter_time": 0.9730266418457031, "loss": 0.16848228871822357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.81620152298089, "step_time": 0.8662629718780518} +{"epoch": 0, "iter": 8769, "iter_tflops": 13.936176482680006, "iter_time": 1.2225867309570313, "loss": 0.44242262840270996, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 14.886793387876587, "step_time": 1.1445167541503904} +{"epoch": 0, "iter": 8770, "iter_tflops": 18.811352792248186, "iter_time": 0.9057394561767578, "loss": 0.35323405265808105, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 23.19358293805516, "step_time": 0.7346076927185059} +{"epoch": 0, "iter": 8771, "iter_tflops": 25.23284844944568, "iter_time": 0.6752382507324219, "loss": 0.3777596950531006, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 27.196167089741177, "step_time": 0.6264921226501464} +{"epoch": 0, "iter": 8772, "iter_tflops": 29.79501890046352, "iter_time": 0.5718467407226563, "loss": 0.4749259054660797, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 31.807053606621828, "step_time": 0.5356731452941894} +{"epoch": 0, "iter": 8773, "iter_tflops": 26.546679528369715, "iter_time": 0.7771628646850586, "loss": 0.6919639110565186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.133457044215643, "step_time": 0.7333294830322266} +{"epoch": 0, "iter": 8774, "iter_tflops": 19.872216861018593, "iter_time": 1.0381878204345703, "loss": 0.5165395736694336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.69969014220812, "step_time": 0.8705216560363769} +{"epoch": 0, "iter": 8775, "iter_tflops": 45.63852812977748, "iter_time": 0.452054313659668, "loss": 0.8941183686256409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11956509214873, "step_time": 0.4200178375244141} +{"epoch": 0, "iter": 8776, "iter_tflops": 40.36590483694203, "iter_time": 0.5111019706726074, "loss": 0.6194658279418945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27162187477743, "step_time": 0.47678114700317376} +{"epoch": 0, "iter": 8777, "iter_tflops": 30.665730016970233, "iter_time": 0.6727735977172852, "loss": 0.3875235319137573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.65370899142338, "step_time": 0.6318147048950196} +{"epoch": 0, "iter": 8778, "iter_tflops": 16.904619746215257, "iter_time": 1.220441146850586, "loss": 0.3455771505832672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.79930451893737, "step_time": 1.0420110206604005} +{"epoch": 0, "iter": 8779, "iter_tflops": 25.98853108623124, "iter_time": 0.7938537750244141, "loss": 0.360769122838974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73938915955707, "step_time": 0.5615524368286132} +{"epoch": 0, "iter": 8780, "iter_tflops": 49.253917313328714, "iter_time": 0.4188721351623535, "loss": 0.4075625538825989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26991214381559, "step_time": 0.38729355239868163} +{"epoch": 0, "iter": 8781, "iter_tflops": 43.619786002364194, "iter_time": 0.4729755783081055, "loss": 0.13162247836589813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.662849064974786, "step_time": 0.43285481071472165} +{"epoch": 0, "iter": 8782, "iter_tflops": 44.00542602843052, "iter_time": 0.4688306732177734, "loss": 0.06312859058380127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.692723034077694, "step_time": 0.4325836772918702} +{"epoch": 0, "iter": 8783, "iter_tflops": 48.343100609257085, "iter_time": 0.42676396942138667, "loss": 0.16798870265483856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.712222686901974, "step_time": 0.39139107513427734} +{"epoch": 0, "iter": 8784, "iter_tflops": 48.78557721301675, "iter_time": 0.42289329528808595, "loss": 0.16380037367343903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87381712467862, "step_time": 0.3901948947906494} +{"epoch": 0, "iter": 8785, "iter_tflops": 35.538657397319824, "iter_time": 0.580525405883789, "loss": 0.36898499727249146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11981264057885, "step_time": 0.5412170753479004} +{"epoch": 0, "iter": 8786, "iter_tflops": 10.682800543574082, "iter_time": 1.9312439117431641, "loss": 0.4805428385734558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.967159342787527, "step_time": 1.5910264511108398} +{"epoch": 0, "iter": 8787, "iter_tflops": 13.758171149387628, "iter_time": 1.4995520324707032, "loss": 0.4073212742805481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.568299266935277, "step_time": 1.2452149238586423} +{"epoch": 0, "iter": 8788, "iter_tflops": 25.332512465966428, "iter_time": 0.8144116592407227, "loss": 0.40971875190734863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.399148862108206, "step_time": 0.6786733932495117} +{"epoch": 0, "iter": 8789, "iter_tflops": 11.836610040874739, "iter_time": 1.3493198699951172, "loss": 0.42687734961509705, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.704844424760184, "step_time": 1.2571089096069334} +{"epoch": 0, "iter": 8790, "iter_tflops": 13.747502151312654, "iter_time": 1.1617654571533205, "loss": 0.3223209083080292, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.89627199270924, "step_time": 1.004724449157715} +{"epoch": 0, "iter": 8791, "iter_tflops": 29.433401458048316, "iter_time": 0.5426275024414062, "loss": 0.2668705880641937, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 31.460648622869005, "step_time": 0.5076619148254394} +{"epoch": 0, "iter": 8792, "iter_tflops": 30.286931559514475, "iter_time": 0.5273354644775391, "loss": 0.2378801703453064, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 32.11441842992185, "step_time": 0.4973271789550781} +{"epoch": 0, "iter": 8793, "iter_tflops": 15.333481535835196, "iter_time": 0.8738808441162109, "loss": 0.0010231435298919678, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 16.0939502040202, "step_time": 0.8325883712768556} +{"epoch": 0, "iter": 8794, "iter_tflops": 15.99880418663564, "iter_time": 0.8375398330688477, "loss": 0.005963606294244528, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 20.127134595657584, "step_time": 0.6657497978210449} +{"epoch": 0, "iter": 8795, "iter_tflops": 37.39168586655003, "iter_time": 0.35835869598388675, "loss": 0.0035960113164037466, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 41.19150125246699, "step_time": 0.32530098152160647} +{"epoch": 0, "iter": 8796, "iter_tflops": 36.55657786746809, "iter_time": 0.3665451354980469, "loss": 0.0022641890682280064, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 40.2128566274903, "step_time": 0.3332177047729492} +{"epoch": 0, "iter": 8797, "iter_tflops": 53.00464428020777, "iter_time": 0.3892318077087402, "loss": 0.013544651679694653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.634666923660994, "step_time": 0.3518582878112793} +{"epoch": 0, "iter": 8798, "iter_tflops": 41.003981005165215, "iter_time": 0.5031485481262207, "loss": 0.010849795304238796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.349611567466944, "step_time": 0.45493429374694827} +{"epoch": 0, "iter": 8799, "iter_tflops": 55.88788697440676, "iter_time": 0.36915143203735346, "loss": 0.004088710993528366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.38362461952235, "step_time": 0.3361009330749512} +{"epoch": 0, "iter": 8800, "iter_tflops": 60.71018051242123, "iter_time": 0.3398292236328125, "loss": 0.002870118012651801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.40055061000781, "step_time": 0.3107066631317138} +{"epoch": 0, "iter": 8801, "iter_tflops": 44.93247523924487, "iter_time": 0.45915773391723635, "loss": 0.726534366607666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.92154076843498, "step_time": 0.4217179832458496} +{"epoch": 0, "iter": 8802, "iter_tflops": 19.559545665662625, "iter_time": 1.0547838821411133, "loss": 0.7469592690467834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.512906589612104, "step_time": 0.8086531982421875} +{"epoch": 0, "iter": 8803, "iter_tflops": 43.62360309415473, "iter_time": 0.4729341926574707, "loss": 0.7287999987602234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98799641498797, "step_time": 0.4390715732574462} +{"epoch": 0, "iter": 8804, "iter_tflops": 42.84593181567703, "iter_time": 0.4815181427001953, "loss": 0.5725019574165344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.723747085372565, "step_time": 0.45121178436279297} +{"epoch": 0, "iter": 8805, "iter_tflops": 25.330962659496038, "iter_time": 0.8144614868164062, "loss": 0.816183865070343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.672172232483337, "step_time": 0.7735063095092773} +{"epoch": 0, "iter": 8806, "iter_tflops": 13.605333310559804, "iter_time": 1.516397506713867, "loss": 0.9648890495300293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.661028228540836, "step_time": 1.1055711002349853} +{"epoch": 0, "iter": 8807, "iter_tflops": 42.13168139513984, "iter_time": 0.48968122863769536, "loss": 0.8041561245918274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54030582725291, "step_time": 0.45302931404113767} +{"epoch": 0, "iter": 8808, "iter_tflops": 44.274785326130846, "iter_time": 0.46597839736938484, "loss": 0.7919408082962036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.38065844670288, "step_time": 0.4354328155517578} +{"epoch": 0, "iter": 8809, "iter_tflops": 21.60868906974722, "iter_time": 0.7770815734863281, "loss": 0.002466945443302393, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 22.751360546544625, "step_time": 0.7380531845092773} +{"epoch": 0, "iter": 8810, "iter_tflops": 10.737256943049612, "iter_time": 1.5638737335205077, "loss": 0.008428565226495266, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 13.901357878067193, "step_time": 1.2079189853668213} +{"epoch": 0, "iter": 8811, "iter_tflops": 33.74393174104808, "iter_time": 0.4976217422485351, "loss": 0.003206598339602351, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 37.290006549481205, "step_time": 0.45030064773559564} +{"epoch": 0, "iter": 8812, "iter_tflops": 35.94378257478164, "iter_time": 0.4671660270690918, "loss": 0.018450796604156494, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 39.95445562018988, "step_time": 0.42027137756347654} +{"epoch": 0, "iter": 8813, "iter_tflops": 17.605453102948914, "iter_time": 1.1718581390380858, "loss": 0.7230429649353027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.879808484530642, "step_time": 1.0927596817016603} +{"epoch": 0, "iter": 8814, "iter_tflops": 16.322145283446, "iter_time": 1.2639939880371094, "loss": 1.006516933441162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.39823180115003, "step_time": 0.9641494541168214} +{"epoch": 0, "iter": 8815, "iter_tflops": 49.53880709660015, "iter_time": 0.41646326828002933, "loss": 0.8601781725883484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.508519031097435, "step_time": 0.385566520690918} +{"epoch": 0, "iter": 8816, "iter_tflops": 47.995206450546306, "iter_time": 0.4298573760986328, "loss": 0.918002188205719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.457995013245935, "step_time": 0.4009307689666748} +{"epoch": 0, "iter": 8817, "iter_tflops": 41.45055346173542, "iter_time": 0.49772781753540046, "loss": 0.0739160031080246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.86346779151884, "step_time": 0.4598639945983887} +{"epoch": 0, "iter": 8818, "iter_tflops": 17.14668145811892, "iter_time": 1.2032120361328125, "loss": 0.12555445730686188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34145934012752, "step_time": 1.0142386131286623} +{"epoch": 0, "iter": 8819, "iter_tflops": 46.3456533907889, "iter_time": 0.44515703201293944, "loss": 0.148760125041008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45127596549105, "step_time": 0.4089310550689697} +{"epoch": 0, "iter": 8820, "iter_tflops": 52.13480437450111, "iter_time": 0.39572592163085935, "loss": 0.1174759715795517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.88296496796286, "step_time": 0.3626937084197998} +{"epoch": 0, "iter": 8821, "iter_tflops": 45.66798180250548, "iter_time": 0.4517627601623535, "loss": 0.3527712821960449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88490413771727, "step_time": 0.41357388305664056} +{"epoch": 0, "iter": 8822, "iter_tflops": 39.93795519155854, "iter_time": 0.51657861328125, "loss": 0.32549840211868286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.440565746134325, "step_time": 0.47492690658569336} +{"epoch": 0, "iter": 8823, "iter_tflops": 44.656373617053255, "iter_time": 0.4619966163635254, "loss": 0.3878931999206543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.4997655716772, "step_time": 0.42538542747497554} +{"epoch": 0, "iter": 8824, "iter_tflops": 48.3802731201638, "iter_time": 0.4264360694885254, "loss": 0.5468035936355591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.28452670948772, "step_time": 0.3945927181243896} +{"epoch": 0, "iter": 8825, "iter_tflops": 45.70480054463261, "iter_time": 0.45139883041381834, "loss": 0.1587386280298233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9474619840959, "step_time": 0.413055892944336} +{"epoch": 0, "iter": 8826, "iter_tflops": 48.57155494533808, "iter_time": 0.4247567024230957, "loss": 0.18043367564678192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80038163535143, "step_time": 0.3907375831604004} +{"epoch": 0, "iter": 8827, "iter_tflops": 49.962872568757334, "iter_time": 0.4129284896850586, "loss": 0.15170837938785553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.06553975478549, "step_time": 0.3815941467285156} +{"epoch": 0, "iter": 8828, "iter_tflops": 49.30809006760392, "iter_time": 0.41841193771362306, "loss": 0.15999172627925873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.070723934564384, "step_time": 0.38874716567993156} +{"epoch": 0, "iter": 8829, "iter_tflops": 23.541482572991512, "iter_time": 0.8763718872070312, "loss": 0.014500475488603115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.705759823232583, "step_time": 0.8350722122192382} +{"epoch": 0, "iter": 8830, "iter_tflops": 15.55808446343716, "iter_time": 1.3260690002441404, "loss": 0.026173142716288567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.50812879796562, "step_time": 1.0059959011077884} +{"epoch": 0, "iter": 8831, "iter_tflops": 52.56094979685132, "iter_time": 0.3925175170898438, "loss": 0.031611401587724686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.26948395431924, "step_time": 0.35406343269348145} +{"epoch": 0, "iter": 8832, "iter_tflops": 53.92463157966349, "iter_time": 0.3825912742614746, "loss": 0.07684434950351715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01725887939722, "step_time": 0.34957729148864747} +{"epoch": 0, "iter": 8833, "iter_tflops": 33.3851178293598, "iter_time": 0.6179727630615235, "loss": 0.24942126870155334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.6252333997501, "step_time": 0.5791146202087403} +{"epoch": 0, "iter": 8834, "iter_tflops": 10.960339447415501, "iter_time": 1.8823407440185547, "loss": 0.31385207176208496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.15535079758873, "step_time": 1.697284912109375} +{"epoch": 0, "iter": 8835, "iter_tflops": 10.983768431822813, "iter_time": 1.8783256072998047, "loss": 0.35947105288505554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.327214584786782, "step_time": 1.5480424194335938} +{"epoch": 0, "iter": 8836, "iter_tflops": 28.11795017242181, "iter_time": 0.7337339096069336, "loss": 0.39473384618759155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.417063828165155, "step_time": 0.6566843299865722} +{"epoch": 0, "iter": 8837, "iter_tflops": 15.0898927588054, "iter_time": 1.0855814514160156, "loss": 0.5093793272972107, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 15.947240802203996, "step_time": 1.0272189331054689} +{"epoch": 0, "iter": 8838, "iter_tflops": 16.16892024571299, "iter_time": 1.0131355361938477, "loss": 0.46183404326438904, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 19.63810557219725, "step_time": 0.8341592636108399} +{"epoch": 0, "iter": 8839, "iter_tflops": 23.830974964505526, "iter_time": 0.6873956146240234, "loss": 0.4270431697368622, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.68684941400532, "step_time": 0.6377312927246095} +{"epoch": 0, "iter": 8840, "iter_tflops": 25.848042061461523, "iter_time": 0.6337542953491211, "loss": 0.4410695433616638, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.756796222601942, "step_time": 0.5901728553771972} +{"epoch": 0, "iter": 8841, "iter_tflops": 23.362895296703073, "iter_time": 0.8830709228515624, "loss": 0.15300850570201874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.961005227783087, "step_time": 0.8265329589843751} +{"epoch": 0, "iter": 8842, "iter_tflops": 19.18845440947864, "iter_time": 1.0751826629638674, "loss": 0.13439175486564636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.389956866816522, "step_time": 0.9645224456787109} +{"epoch": 0, "iter": 8843, "iter_tflops": 43.72503200700535, "iter_time": 0.47183712768554686, "loss": 0.14204157888889313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.973107693689734, "step_time": 0.4300553894042969} +{"epoch": 0, "iter": 8844, "iter_tflops": 45.185083190006594, "iter_time": 0.45659080505371086, "loss": 0.18801620602607727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.40957562249012, "step_time": 0.4175525341033936} +{"epoch": 0, "iter": 8845, "iter_tflops": 20.829444097315424, "iter_time": 0.9904773941040039, "loss": 0.6873378753662109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.172931158454922, "step_time": 0.9304630661010742} +{"epoch": 0, "iter": 8846, "iter_tflops": 22.732072038766095, "iter_time": 0.9075764617919921, "loss": 0.7069114446640015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.098620459079946, "step_time": 0.7342386627197266} +{"epoch": 0, "iter": 8847, "iter_tflops": 40.120477938030454, "iter_time": 0.5142285079956055, "loss": 0.7511497735977173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66875691487607, "step_time": 0.47244517517089846} +{"epoch": 0, "iter": 8848, "iter_tflops": 36.3517928738535, "iter_time": 0.5675399169921875, "loss": 0.6467279195785522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.591586510901394, "step_time": 0.5210979232788087} +{"epoch": 0, "iter": 8849, "iter_tflops": 18.26202564900636, "iter_time": 1.1297264556884767, "loss": 0.010607246309518814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.508015008383918, "step_time": 1.0575701065063476} +{"epoch": 0, "iter": 8850, "iter_tflops": 24.668534096733396, "iter_time": 0.8363323669433594, "loss": 0.006178486626595259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.60835145092668, "step_time": 0.674034782409668} +{"epoch": 0, "iter": 8851, "iter_tflops": 55.98388314782246, "iter_time": 0.3685184440612793, "loss": 0.009403498843312263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.326039347169676, "step_time": 0.3364165325164795} +{"epoch": 0, "iter": 8852, "iter_tflops": 55.35507848049391, "iter_time": 0.3727046203613282, "loss": 0.01100493036210537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.490783742845586, "step_time": 0.34106176567077634} +{"epoch": 0, "iter": 8853, "iter_tflops": 39.94337249255157, "iter_time": 0.5165085525512696, "loss": 0.1474863737821579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29477448347087, "step_time": 0.47652618026733395} +{"epoch": 0, "iter": 8854, "iter_tflops": 26.188164620043175, "iter_time": 0.7878021926879882, "loss": 0.17934490740299225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.66910705206023, "step_time": 0.6514580116271973} +{"epoch": 0, "iter": 8855, "iter_tflops": 44.6894740362798, "iter_time": 0.46165442657470707, "loss": 0.14845748245716095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.042986579055345, "step_time": 0.4294298706054688} +{"epoch": 0, "iter": 8856, "iter_tflops": 51.73606624088499, "iter_time": 0.39877584457397464, "loss": 0.2202519327402115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.4060522804317, "step_time": 0.3657602806091309} +{"epoch": 0, "iter": 8857, "iter_tflops": 27.356810660679557, "iter_time": 0.7541483459472655, "loss": 0.5526484847068787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.83085973665851, "step_time": 0.7155906448364258} +{"epoch": 0, "iter": 8858, "iter_tflops": 12.31057301699686, "iter_time": 1.675884094238281, "loss": 0.7913550734519958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.86835821455733, "step_time": 1.4876377716064453} +{"epoch": 0, "iter": 8859, "iter_tflops": 14.806874917895358, "iter_time": 1.3933455657958986, "loss": 0.6551917791366577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.43584208972189, "step_time": 1.1832576484680175} +{"epoch": 0, "iter": 8860, "iter_tflops": 44.75588711744033, "iter_time": 0.46096937942504884, "loss": 0.7795944213867188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.545971258101844, "step_time": 0.42498054885864256} +{"epoch": 0, "iter": 8861, "iter_tflops": 20.474804913018037, "iter_time": 0.7660487976074218, "loss": 0.26046645641326904, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 21.637831980543137, "step_time": 0.7248739013671874} +{"epoch": 0, "iter": 8862, "iter_tflops": 10.280873047934273, "iter_time": 1.5256194305419921, "loss": 0.5845083594322205, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 12.04490757323946, "step_time": 1.3021851425170898} +{"epoch": 0, "iter": 8863, "iter_tflops": 23.644796635014036, "iter_time": 0.6633467788696289, "loss": 0.5398418307304382, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.551904202707664, "step_time": 0.6138368225097657} +{"epoch": 0, "iter": 8864, "iter_tflops": 24.05212245171446, "iter_time": 0.6521129150390625, "loss": 0.426465779542923, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.885267407846555, "step_time": 0.6059315299987793} +{"epoch": 0, "iter": 8865, "iter_tflops": 17.72537646294663, "iter_time": 1.1639297790527343, "loss": 0.5207024216651917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.84028576807927, "step_time": 1.095052047729492} +{"epoch": 0, "iter": 8866, "iter_tflops": 18.737743018728363, "iter_time": 1.1010447463989257, "loss": 0.4551321566104889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.136853183190198, "step_time": 0.7602610874176026} +{"epoch": 0, "iter": 8867, "iter_tflops": 36.728558786273794, "iter_time": 0.5617180252075196, "loss": 0.4149901568889618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12178976497634, "step_time": 0.5142116947174072} +{"epoch": 0, "iter": 8868, "iter_tflops": 36.64133682844067, "iter_time": 0.5630551528930664, "loss": 0.4991052746772766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.957379600341504, "step_time": 0.5163274898529052} +{"epoch": 0, "iter": 8869, "iter_tflops": 16.934993915118486, "iter_time": 1.1864162750244143, "loss": 0.3046226501464844, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 17.969864560904256, "step_time": 1.1180914764404297} +{"epoch": 0, "iter": 8870, "iter_tflops": 19.58201548259446, "iter_time": 1.0260410842895509, "loss": 0.29621514678001404, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 23.062037847200884, "step_time": 0.8712132263183594} +{"epoch": 0, "iter": 8871, "iter_tflops": 39.90048477834824, "iter_time": 0.5035515861511229, "loss": 0.20142289996147156, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 43.58249662552745, "step_time": 0.4610096702575684} +{"epoch": 0, "iter": 8872, "iter_tflops": 41.836353627153564, "iter_time": 0.48025104141235353, "loss": 0.41233623027801514, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 45.63284401093543, "step_time": 0.440295862197876} +{"epoch": 0, "iter": 8873, "iter_tflops": 16.023997612953163, "iter_time": 1.2875122680664064, "loss": 0.16833265125751495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.366327096727655, "step_time": 1.1879940643310545} +{"epoch": 0, "iter": 8874, "iter_tflops": 18.648760387171308, "iter_time": 1.1062983856201172, "loss": 0.19965197145938873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.168996471958483, "step_time": 0.6838508377075195} +{"epoch": 0, "iter": 8875, "iter_tflops": 50.971853370643444, "iter_time": 0.40475462722778316, "loss": 0.20488691329956055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66364271146713, "step_time": 0.37063858032226565} +{"epoch": 0, "iter": 8876, "iter_tflops": 47.257118776927435, "iter_time": 0.4365711250305176, "loss": 0.20370659232139587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.322244366197715, "step_time": 0.40199125671386715} +{"epoch": 0, "iter": 8877, "iter_tflops": 22.21419304862079, "iter_time": 0.9287347717285157, "loss": 0.9018278121948242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.414965969345094, "step_time": 0.8811071319580078} +{"epoch": 0, "iter": 8878, "iter_tflops": 15.453874215849604, "iter_time": 1.3350110931396486, "loss": 0.8002496957778931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.158498586588184, "step_time": 1.136167366027832} +{"epoch": 0, "iter": 8879, "iter_tflops": 35.55523537422602, "iter_time": 0.5802547302246094, "loss": 0.625757098197937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.652679518720696, "step_time": 0.5337558422088623} +{"epoch": 0, "iter": 8880, "iter_tflops": 35.792409806687665, "iter_time": 0.5764097366333008, "loss": 0.8056496381759644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.72553672909622, "step_time": 0.5327516479492187} +{"epoch": 0, "iter": 8881, "iter_tflops": 28.71876018279008, "iter_time": 0.7183838500976563, "loss": 0.4732873737812042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.72521271582183, "step_time": 0.6503059158325195} +{"epoch": 0, "iter": 8882, "iter_tflops": 36.56903395788102, "iter_time": 0.5641684036254884, "loss": 0.8088383674621582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.77989897501271, "step_time": 0.518631118774414} +{"epoch": 0, "iter": 8883, "iter_tflops": 35.49726975935715, "iter_time": 0.5812022628784179, "loss": 0.7050187587738037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5987921743785, "step_time": 0.534501012802124} +{"epoch": 0, "iter": 8884, "iter_tflops": 38.72338120399759, "iter_time": 0.5327813034057617, "loss": 0.6183034777641296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09804120017734, "step_time": 0.49007252883911134} +{"epoch": 0, "iter": 8885, "iter_tflops": 17.39156077578065, "iter_time": 1.1862703857421875, "loss": 0.0792943686246872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.68478321341499, "step_time": 1.10416552734375} +{"epoch": 0, "iter": 8886, "iter_tflops": 22.354712966305218, "iter_time": 0.9228968200683594, "loss": 0.20361222326755524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.152219618689152, "step_time": 0.820249418258667} +{"epoch": 0, "iter": 8887, "iter_tflops": 47.04140885903698, "iter_time": 0.43857303619384763, "loss": 0.135455921292305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15876846298268, "step_time": 0.40327580451965334} +{"epoch": 0, "iter": 8888, "iter_tflops": 53.79707019253397, "iter_time": 0.3834984588623047, "loss": 0.11528994143009186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.70636933646229, "step_time": 0.3514285373687744} +{"epoch": 0, "iter": 8889, "iter_tflops": 35.59773305456918, "iter_time": 0.5795620040893554, "loss": 0.47217535972595215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.34261124788079, "step_time": 0.538072208404541} +{"epoch": 0, "iter": 8890, "iter_tflops": 13.871770032279704, "iter_time": 1.4872718811035155, "loss": 0.7250139713287354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.04769542686154, "step_time": 1.210198387145996} +{"epoch": 0, "iter": 8891, "iter_tflops": 39.99390023156937, "iter_time": 0.5158560028076171, "loss": 0.4878185987472534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47678203922844, "step_time": 0.4007844448089599} +{"epoch": 0, "iter": 8892, "iter_tflops": 47.475942154057506, "iter_time": 0.4345589065551758, "loss": 0.5338451266288757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.008251506958196, "step_time": 0.4044658050537109} +{"epoch": 0, "iter": 8893, "iter_tflops": 42.96013393817667, "iter_time": 0.480238109588623, "loss": 0.21088065207004547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57108217733684, "step_time": 0.4430022354125977} +{"epoch": 0, "iter": 8894, "iter_tflops": 12.93642288024989, "iter_time": 1.5948066711425781, "loss": 0.2944280505180359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.576399336742355, "step_time": 1.3245097961425782} +{"epoch": 0, "iter": 8895, "iter_tflops": 13.21603846807419, "iter_time": 1.5610648803710938, "loss": 0.2577976882457733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.086694010150914, "step_time": 1.2824943084716798} +{"epoch": 0, "iter": 8896, "iter_tflops": 16.12685268003913, "iter_time": 1.2793006744384765, "loss": 0.1487092673778534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.225834091112755, "step_time": 1.1319697856903077} +{"epoch": 0, "iter": 8897, "iter_tflops": 13.86666746772137, "iter_time": 1.0957064819335938, "loss": 0.3025597035884857, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.617852349446299, "step_time": 1.039400115966797} +{"epoch": 0, "iter": 8898, "iter_tflops": 7.891623880158445, "iter_time": 1.925306838989258, "loss": 0.4848979413509369, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 8.70075491747406, "step_time": 1.7462619705200193} +{"epoch": 0, "iter": 8899, "iter_tflops": 7.639592935895603, "iter_time": 1.9888229064941405, "loss": 0.37769612669944763, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 9.153035485944029, "step_time": 1.6599736175537108} +{"epoch": 0, "iter": 8900, "iter_tflops": 25.228385097542642, "iter_time": 0.6022500991821289, "loss": 0.5468704700469971, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.476870011309124, "step_time": 0.5529668197631836} +{"epoch": 0, "iter": 8901, "iter_tflops": 18.049613823768848, "iter_time": 0.8100969390869139, "loss": 0.353818416595459, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 19.04916316451074, "step_time": 0.7675894622802735} +{"epoch": 0, "iter": 8902, "iter_tflops": 7.996065492513477, "iter_time": 1.8286414642333984, "loss": 0.406110554933548, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 9.858653249445005, "step_time": 1.483157642364502} +{"epoch": 0, "iter": 8903, "iter_tflops": 21.1711746675797, "iter_time": 0.690653076171875, "loss": 0.32921355962753296, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 22.787892001672443, "step_time": 0.6416537742614746} +{"epoch": 0, "iter": 8904, "iter_tflops": 22.639706136918175, "iter_time": 0.6458536529541015, "loss": 0.3380107879638672, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 24.300283581737176, "step_time": 0.6017187767028809} +{"epoch": 0, "iter": 8905, "iter_tflops": 28.78384725446627, "iter_time": 0.7167594146728515, "loss": 0.7265344262123108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.945846279968812, "step_time": 0.6458145866394043} +{"epoch": 0, "iter": 8906, "iter_tflops": 35.19231678849593, "iter_time": 0.5862385711669923, "loss": 0.7791706323623657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25676864295934, "step_time": 0.5392795639038086} +{"epoch": 0, "iter": 8907, "iter_tflops": 35.41892435714326, "iter_time": 0.5824878616333007, "loss": 0.8604532480239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.2959675262681, "step_time": 0.5387275695800781} +{"epoch": 0, "iter": 8908, "iter_tflops": 37.973638412664684, "iter_time": 0.5433004150390625, "loss": 0.8434604406356812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.04124340023132, "step_time": 0.5026917266845703} +{"epoch": 0, "iter": 8909, "iter_tflops": 31.228529132187766, "iter_time": 0.6606489028930663, "loss": 0.15893034636974335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.725629798656534, "step_time": 0.5941171875} +{"epoch": 0, "iter": 8910, "iter_tflops": 36.46571521855451, "iter_time": 0.5657668685913085, "loss": 0.11184552311897278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.855180161154415, "step_time": 0.5049810924530029} +{"epoch": 0, "iter": 8911, "iter_tflops": 42.165772844163115, "iter_time": 0.4892853164672852, "loss": 0.14721018075942993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0575144045573, "step_time": 0.44794196510314943} +{"epoch": 0, "iter": 8912, "iter_tflops": 43.987109809516014, "iter_time": 0.46902589416503904, "loss": 0.10921474546194077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.119391164588116, "step_time": 0.4287480163574219} +{"epoch": 0, "iter": 8913, "iter_tflops": 15.169172506707394, "iter_time": 1.3600671691894533, "loss": 0.9492086172103882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.185556906683185, "step_time": 1.2746607131958008} +{"epoch": 0, "iter": 8914, "iter_tflops": 17.316093632982163, "iter_time": 1.1914403991699218, "loss": 0.6632364988327026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.913285890300813, "step_time": 0.9414878997802734} +{"epoch": 0, "iter": 8915, "iter_tflops": 37.683753098297814, "iter_time": 0.5474797973632812, "loss": 0.740475594997406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.1777735583939, "step_time": 0.5010249881744384} +{"epoch": 0, "iter": 8916, "iter_tflops": 40.439918146297416, "iter_time": 0.5101665496826171, "loss": 0.7940280437469482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14656494209547, "step_time": 0.4673317966461181} +{"epoch": 0, "iter": 8917, "iter_tflops": 19.85089832054515, "iter_time": 1.0393027648925781, "loss": 0.7990031838417053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.65538589424192, "step_time": 0.9527003402709961} +{"epoch": 0, "iter": 8918, "iter_tflops": 18.34492099353406, "iter_time": 1.124621551513672, "loss": 0.7246425151824951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.970375511828138, "step_time": 0.8606912937164306} +{"epoch": 0, "iter": 8919, "iter_tflops": 42.80039952337328, "iter_time": 0.48203039550781246, "loss": 0.697166919708252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07077883997253, "step_time": 0.4382993869781494} +{"epoch": 0, "iter": 8920, "iter_tflops": 37.70676570360949, "iter_time": 0.5471456680297851, "loss": 0.6068134903907776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.161963862572534, "step_time": 0.5012174243927002} +{"epoch": 0, "iter": 8921, "iter_tflops": 18.34825191562826, "iter_time": 1.1244173889160156, "loss": 0.20103155076503754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.859023954831443, "step_time": 1.0388775177001954} +{"epoch": 0, "iter": 8922, "iter_tflops": 14.06926371019548, "iter_time": 1.4663946838378905, "loss": 0.20351354777812958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57090382860132, "step_time": 1.0541717281341552} +{"epoch": 0, "iter": 8923, "iter_tflops": 42.08127218166377, "iter_time": 0.49026781845092776, "loss": 0.1862463355064392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.96873751813406, "step_time": 0.448807050704956} +{"epoch": 0, "iter": 8924, "iter_tflops": 43.45576802797872, "iter_time": 0.4747607612609863, "loss": 0.19089564681053162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45626303493819, "step_time": 0.4347391090393066} +{"epoch": 0, "iter": 8925, "iter_tflops": 22.964836890111687, "iter_time": 0.8983775329589843, "loss": 0.4711492657661438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.835489699831133, "step_time": 0.8307101554870605} +{"epoch": 0, "iter": 8926, "iter_tflops": 37.465090702200555, "iter_time": 0.5506751251220704, "loss": 0.431238055229187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1108851119944, "step_time": 0.4899230556488037} +{"epoch": 0, "iter": 8927, "iter_tflops": 40.920558178765454, "iter_time": 0.5041742935180664, "loss": 0.40466853976249695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.617120123949704, "step_time": 0.46240307426452637} +{"epoch": 0, "iter": 8928, "iter_tflops": 33.20368842059968, "iter_time": 0.6213494491577148, "loss": 0.2705143690109253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.14167212707034, "step_time": 0.5708394851684571} +{"epoch": 0, "iter": 8929, "iter_tflops": 26.11914261104174, "iter_time": 0.7898840255737305, "loss": 0.8628859519958496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.055903757645694, "step_time": 0.7353565826416016} +{"epoch": 0, "iter": 8930, "iter_tflops": 9.3757454426517, "iter_time": 2.2004750061035154, "loss": 0.7319769263267517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.113046280966461, "step_time": 1.856475082397461} +{"epoch": 0, "iter": 8931, "iter_tflops": 11.93219294103413, "iter_time": 1.7290278167724609, "loss": 0.7163529396057129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.955707963044766, "step_time": 1.379479564666748} +{"epoch": 0, "iter": 8932, "iter_tflops": 36.023011659588875, "iter_time": 0.5727198410034179, "loss": 0.7900886535644531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.23714293215124, "step_time": 0.5258051929473877} +{"epoch": 0, "iter": 8933, "iter_tflops": 13.946663329713143, "iter_time": 1.0982167510986327, "loss": 0.5801840424537659, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 14.93763915697394, "step_time": 1.025360107421875} +{"epoch": 0, "iter": 8934, "iter_tflops": 14.674075741215102, "iter_time": 1.0437767639160156, "loss": 0.49259015917778015, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 17.46616947683646, "step_time": 0.876921485900879} +{"epoch": 0, "iter": 8935, "iter_tflops": 23.75325996839046, "iter_time": 0.6448150405883788, "loss": 0.2825503945350647, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.620363763012477, "step_time": 0.5978236465454102} +{"epoch": 0, "iter": 8936, "iter_tflops": 23.06384348705457, "iter_time": 0.6640896301269532, "loss": 0.3783310651779175, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 24.677104828430323, "step_time": 0.6206748886108397} +{"epoch": 0, "iter": 8937, "iter_tflops": 15.058413157065706, "iter_time": 0.8331921615600586, "loss": 0.003140372224152088, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 16.228092071167136, "step_time": 0.773137825012207} +{"epoch": 0, "iter": 8938, "iter_tflops": 5.426495020750066, "iter_time": 2.312091278076172, "loss": 0.0044839875772595406, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 6.193015748539695, "step_time": 2.0259195709228517} +{"epoch": 0, "iter": 8939, "iter_tflops": 7.545434683355824, "iter_time": 1.6628003997802732, "loss": 0.006030624266713858, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 9.027507314802527, "step_time": 1.3898135299682615} +{"epoch": 0, "iter": 8940, "iter_tflops": 32.3888407246454, "iter_time": 0.387372673034668, "loss": 0.005589101929217577, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 36.363078790336715, "step_time": 0.34503546524047846} +{"epoch": 0, "iter": 8941, "iter_tflops": 15.948321499508543, "iter_time": 0.9680745162963867, "loss": 0.3931335508823395, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.671957386362283, "step_time": 0.9260558471679687} +{"epoch": 0, "iter": 8942, "iter_tflops": 6.172764731373899, "iter_time": 2.5011748046874995, "loss": 0.31671643257141113, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 7.026714725932378, "step_time": 2.1972093963623047} +{"epoch": 0, "iter": 8943, "iter_tflops": 10.256356729418364, "iter_time": 1.5053263092041018, "loss": 0.3013558089733124, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.832817928128001, "step_time": 1.2031000289916993} +{"epoch": 0, "iter": 8944, "iter_tflops": 21.102283971119075, "iter_time": 0.7316347198486328, "loss": 0.36053407192230225, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.313772179282363, "step_time": 0.6349966392517089} +{"epoch": 0, "iter": 8945, "iter_tflops": 22.344926527369843, "iter_time": 0.6671649398803711, "loss": 0.36666110157966614, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.880887864475312, "step_time": 0.6242544937133789} +{"epoch": 0, "iter": 8946, "iter_tflops": 22.053841216492113, "iter_time": 0.6759707489013671, "loss": 0.4174775183200836, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.00940103555622, "step_time": 0.6209130973815918} +{"epoch": 0, "iter": 8947, "iter_tflops": 23.215955170994988, "iter_time": 0.6421338882446289, "loss": 0.34719493985176086, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.964059825576626, "step_time": 0.5971685562133789} +{"epoch": 0, "iter": 8948, "iter_tflops": 22.860496925235505, "iter_time": 0.6521184387207031, "loss": 0.36403557658195496, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.547376578015946, "step_time": 0.6073052864074706} +{"epoch": 0, "iter": 8949, "iter_tflops": 17.745804969684364, "iter_time": 0.8078728179931641, "loss": 0.0013699005357921124, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 19.52665692924237, "step_time": 0.7341939544677735} +{"epoch": 0, "iter": 8950, "iter_tflops": 12.029827977325949, "iter_time": 1.191733871459961, "loss": 0.006323717068880796, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 16.062457453375192, "step_time": 0.8925379886627197} +{"epoch": 0, "iter": 8951, "iter_tflops": 39.62081480992933, "iter_time": 0.36183893585205085, "loss": 0.002853056648746133, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 43.686724674105434, "step_time": 0.32816269874572757} +{"epoch": 0, "iter": 8952, "iter_tflops": 39.81110489447877, "iter_time": 0.36010940933227537, "loss": 0.00365931517444551, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 43.69972574888028, "step_time": 0.3280650672912597} +{"epoch": 0, "iter": 8953, "iter_tflops": 28.742503576648993, "iter_time": 0.7177904129028321, "loss": 0.06749282032251358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.333457403372357, "step_time": 0.6801431579589844} +{"epoch": 0, "iter": 8954, "iter_tflops": 13.68466262495654, "iter_time": 1.5076070251464844, "loss": 0.10492613166570663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.504971854798065, "step_time": 1.1148946170806886} +{"epoch": 0, "iter": 8955, "iter_tflops": 41.10557644245099, "iter_time": 0.5019049797058106, "loss": 0.07621712982654572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.063712497514246, "step_time": 0.4578205471038818} +{"epoch": 0, "iter": 8956, "iter_tflops": 44.497739495465325, "iter_time": 0.4636436309814453, "loss": 0.08401378989219666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94757111501083, "step_time": 0.4214937133789063} +{"epoch": 0, "iter": 8957, "iter_tflops": 29.759708293634176, "iter_time": 0.6932559051513671, "loss": 0.8054133057594299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.85355496487809, "step_time": 0.6279714183807372} +{"epoch": 0, "iter": 8958, "iter_tflops": 37.093346231864274, "iter_time": 0.5561939163208007, "loss": 0.7938650250434875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.33984667359483, "step_time": 0.5114321250915528} +{"epoch": 0, "iter": 8959, "iter_tflops": 38.63200178316221, "iter_time": 0.5340415344238281, "loss": 0.8471618294715881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93321361583804, "step_time": 0.49199886512756347} +{"epoch": 0, "iter": 8960, "iter_tflops": 41.59985961119883, "iter_time": 0.49594142150878906, "loss": 0.6629787683486938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.214161636561215, "step_time": 0.45629715919494634} +{"epoch": 0, "iter": 8961, "iter_tflops": 22.133338290355738, "iter_time": 0.9321275100708009, "loss": 0.45325008034706116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.025642034601873, "step_time": 0.8587114334106445} +{"epoch": 0, "iter": 8962, "iter_tflops": 37.130873134816504, "iter_time": 0.5556317901611328, "loss": 0.5555785298347473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.66259171001784, "step_time": 0.5073728122711182} +{"epoch": 0, "iter": 8963, "iter_tflops": 38.51822926424793, "iter_time": 0.5356189498901367, "loss": 0.5910571813583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.11852565107895, "step_time": 0.48983418083190916} +{"epoch": 0, "iter": 8964, "iter_tflops": 38.139654147819094, "iter_time": 0.5409355163574219, "loss": 0.4530763626098633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52595661957111, "step_time": 0.4968240394592285} +{"epoch": 0, "iter": 8965, "iter_tflops": 6.9959285065142325, "iter_time": 2.949014343261719, "loss": 0.4404844343662262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.801803953280514, "step_time": 2.343961944580078} +{"epoch": 0, "iter": 8966, "iter_tflops": 39.29457029239469, "iter_time": 0.525036750793457, "loss": 0.4619148373603821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06680688084364, "step_time": 0.4790485992431641} +{"epoch": 0, "iter": 8967, "iter_tflops": 40.6890920610203, "iter_time": 0.507042366027832, "loss": 0.4925943613052368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.41864696186043, "step_time": 0.464469202041626} +{"epoch": 0, "iter": 8968, "iter_tflops": 41.33048581206227, "iter_time": 0.49917374801635744, "loss": 0.46167701482772827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.16395291572777, "step_time": 0.4568044242858887} +{"epoch": 0, "iter": 8969, "iter_tflops": 10.662639854327665, "iter_time": 1.2605049896240237, "loss": 0.021469373255968094, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 11.292357704208737, "step_time": 1.1902129821777343} +{"epoch": 0, "iter": 8970, "iter_tflops": 9.143603642519688, "iter_time": 1.4699139709472655, "loss": 0.03682594746351242, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 11.764615984528803, "step_time": 1.1424351425170898} +{"epoch": 0, "iter": 8971, "iter_tflops": 29.309930398146598, "iter_time": 0.45855826187133786, "loss": 0.039950139820575714, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 32.374739536813976, "step_time": 0.4151480731964111} +{"epoch": 0, "iter": 8972, "iter_tflops": 28.897686238753508, "iter_time": 0.46509989166259763, "loss": 0.04249805957078934, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 31.855266945802473, "step_time": 0.4219180068969727} +{"epoch": 0, "iter": 8973, "iter_tflops": 17.12031502317739, "iter_time": 1.2050650634765625, "loss": 0.08750609308481216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.052539129296726, "step_time": 1.1428361053466796} +{"epoch": 0, "iter": 8974, "iter_tflops": 15.000146101733495, "iter_time": 1.375392837524414, "loss": 0.15510354936122894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.02662721530363, "step_time": 1.2116958484649656} +{"epoch": 0, "iter": 8975, "iter_tflops": 42.132277766100344, "iter_time": 0.4896742973327637, "loss": 0.1492617279291153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.37316687329317, "step_time": 0.4448929176330566} +{"epoch": 0, "iter": 8976, "iter_tflops": 43.97806505708743, "iter_time": 0.46912235641479494, "loss": 0.12633655965328217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.16372912579921, "step_time": 0.4283533248901367} +{"epoch": 0, "iter": 8977, "iter_tflops": 20.869882917115476, "iter_time": 0.9885581817626953, "loss": 0.023593036457896233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.425904863147732, "step_time": 0.9199670486450195} +{"epoch": 0, "iter": 8978, "iter_tflops": 18.115870377041865, "iter_time": 1.1388408660888671, "loss": 0.028560994192957878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.034260123556876, "step_time": 0.9363188686370849} +{"epoch": 0, "iter": 8979, "iter_tflops": 44.37166503084352, "iter_time": 0.464960994720459, "loss": 0.03500708192586899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02303142007623, "step_time": 0.4208449153900146} +{"epoch": 0, "iter": 8980, "iter_tflops": 44.42105430078983, "iter_time": 0.46444403076171875, "loss": 0.034430839121341705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.721698263087724, "step_time": 0.4234477500915527} +{"epoch": 0, "iter": 8981, "iter_tflops": 22.461509458104484, "iter_time": 0.9185087738037111, "loss": 0.11756064742803574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.071693979002447, "step_time": 0.8570686187744141} +{"epoch": 0, "iter": 8982, "iter_tflops": 20.700189901320584, "iter_time": 0.9966620407104494, "loss": 0.06204298511147499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.99641449659261, "step_time": 0.8253621139526367} +{"epoch": 0, "iter": 8983, "iter_tflops": 39.32984607678058, "iter_time": 0.5245658340454101, "loss": 0.1599813997745514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22005291111602, "step_time": 0.47735002899169926} +{"epoch": 0, "iter": 8984, "iter_tflops": 44.966021900702735, "iter_time": 0.4588151817321777, "loss": 0.12169527262449265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.4726663401753, "step_time": 0.4170200443267822} +{"epoch": 0, "iter": 8985, "iter_tflops": 18.667020770878384, "iter_time": 1.1052161865234376, "loss": 0.0007805746863596141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.97445450104821, "step_time": 1.03287393951416} +{"epoch": 0, "iter": 8986, "iter_tflops": 18.168299018258566, "iter_time": 1.135554489135742, "loss": 0.0018745089182630181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.441942323527083, "step_time": 0.9193096218109131} +{"epoch": 0, "iter": 8987, "iter_tflops": 39.90038604104933, "iter_time": 0.5170650100708007, "loss": 0.0028545057866722345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.120205879681485, "step_time": 0.46761099815368656} +{"epoch": 0, "iter": 8988, "iter_tflops": 44.62877636168279, "iter_time": 0.4622823028564453, "loss": 0.004776918329298496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.225431922111646, "step_time": 0.4191145248413086} +{"epoch": 0, "iter": 8989, "iter_tflops": 19.014862678568864, "iter_time": 1.084998291015625, "loss": 0.2319299727678299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34300115263418, "step_time": 1.0141617431640624} +{"epoch": 0, "iter": 8990, "iter_tflops": 21.909238451299238, "iter_time": 0.9416618270874023, "loss": 0.14712446928024292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.66853944137047, "step_time": 0.8363321857452393} +{"epoch": 0, "iter": 8991, "iter_tflops": 47.906115627638926, "iter_time": 0.4306567802429199, "loss": 0.15872032940387726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21685386228932, "step_time": 0.3951041088104248} +{"epoch": 0, "iter": 8992, "iter_tflops": 56.849491039526974, "iter_time": 0.36290726852416993, "loss": 0.2131120264530182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.68748047889957, "step_time": 0.3344453907012939} +{"epoch": 0, "iter": 8993, "iter_tflops": 20.57263264699335, "iter_time": 0.8762011566162109, "loss": 0.08029130846261978, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 21.623132159308255, "step_time": 0.8336333694458007} +{"epoch": 0, "iter": 8994, "iter_tflops": 12.75200547321362, "iter_time": 1.4135631103515625, "loss": 0.1756109893321991, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 13.996149674042144, "step_time": 1.2879088134765624} +{"epoch": 0, "iter": 8995, "iter_tflops": 43.09719116191316, "iter_time": 0.41825845336914064, "loss": 0.11069750785827637, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 46.849998961831474, "step_time": 0.3847548542022705} +{"epoch": 0, "iter": 8996, "iter_tflops": 44.674668935205666, "iter_time": 0.40348960494995123, "loss": 0.135499507188797, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 48.32741170018124, "step_time": 0.3729925498962402} +{"epoch": 0, "iter": 8997, "iter_tflops": 30.384526865919806, "iter_time": 0.6789999923706056, "loss": 0.43912169337272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.30655748266397, "step_time": 0.6386038970947265} +{"epoch": 0, "iter": 8998, "iter_tflops": 29.442332649695008, "iter_time": 0.700728904724121, "loss": 0.5717840194702148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.40019545693492, "step_time": 0.5827960338592528} +{"epoch": 0, "iter": 8999, "iter_tflops": 44.761052921808556, "iter_time": 0.4609161796569824, "loss": 0.3098834156990051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40193949297478, "step_time": 0.42624518203735356} +{"epoch": 0, "iter": 9000, "iter_tflops": 4.241940091184807, "iter_time": 4.863598510742188, "loss": 0.32270383834838867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.269237695805836, "step_time": 4.832500549316406} +{"epoch": 0, "iter": 9001, "iter_tflops": 10.806221706110827, "iter_time": 1.909186584472656, "loss": 0.17078366875648499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.493835545147387, "step_time": 1.7949703063964844} +{"epoch": 0, "iter": 9002, "iter_tflops": 26.767907251334083, "iter_time": 0.770739875793457, "loss": 0.07176000624895096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.190411648751127, "step_time": 0.6833657569885254} +{"epoch": 0, "iter": 9003, "iter_tflops": 20.077858379555575, "iter_time": 1.0275544891357422, "loss": 0.04289956018328667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.663874764310126, "step_time": 0.9103074264526367} +{"epoch": 0, "iter": 9004, "iter_tflops": 26.711245704637218, "iter_time": 0.7723748168945314, "loss": 0.11301197111606598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.853426358731664, "step_time": 0.6686807899475098} +{"epoch": 0, "iter": 9005, "iter_tflops": 8.26212341245953, "iter_time": 2.4970691528320312, "loss": 0.13470685482025146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.57273974137104, "step_time": 2.4065927734375} +{"epoch": 0, "iter": 9006, "iter_tflops": 20.09324105163424, "iter_time": 1.0267678298950196, "loss": 0.09420257806777954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.72770386841096, "step_time": 0.8694938888549805} +{"epoch": 0, "iter": 9007, "iter_tflops": 20.308143030100805, "iter_time": 1.0159025115966795, "loss": 0.08050712198019028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.149555187467616, "step_time": 0.8912090682983397} +{"epoch": 0, "iter": 9008, "iter_tflops": 24.362512961074682, "iter_time": 0.846837661743164, "loss": 0.12150198221206665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.591711533846244, "step_time": 0.7477279357910156} +{"epoch": 0, "iter": 9009, "iter_tflops": 8.25453257612615, "iter_time": 2.4993654479980467, "loss": 0.3265073299407959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.984527811899467, "step_time": 2.2962913513183594} +{"epoch": 0, "iter": 9010, "iter_tflops": 25.56562095227007, "iter_time": 0.8069858169555665, "loss": 0.31767773628234863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.879960436302742, "step_time": 0.7399972305297852} +{"epoch": 0, "iter": 9011, "iter_tflops": 27.25072722432265, "iter_time": 0.7570841445922851, "loss": 0.4017598628997803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.924145133446668, "step_time": 0.6462535934448242} +{"epoch": 0, "iter": 9012, "iter_tflops": 35.04708428495806, "iter_time": 0.5886679000854492, "loss": 0.33307355642318726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.46149382433901, "step_time": 0.5364090538024903} +{"epoch": 0, "iter": 9013, "iter_tflops": 5.400927382669944, "iter_time": 2.609135559082031, "loss": 0.2681410610675812, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 5.491227128275534, "step_time": 2.5662299804687505} +{"epoch": 0, "iter": 9014, "iter_tflops": 15.404884839778438, "iter_time": 0.9147586517333985, "loss": 0.26697415113449097, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 17.455363570004, "step_time": 0.8073021011352539} +{"epoch": 0, "iter": 9015, "iter_tflops": 18.611023731773987, "iter_time": 0.7571723022460937, "loss": 0.16514213383197784, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 22.611333600129896, "step_time": 0.6232163009643554} +{"epoch": 0, "iter": 9016, "iter_tflops": 22.123486050001173, "iter_time": 0.6369589157104492, "loss": 0.17382875084877014, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 23.847719831185092, "step_time": 0.5909056205749511} +{"epoch": 0, "iter": 9017, "iter_tflops": 7.542938005766284, "iter_time": 2.6966531677246093, "loss": 0.06155117228627205, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 7.860920652138996, "step_time": 2.5875706634521483} +{"epoch": 0, "iter": 9018, "iter_tflops": 21.201502946456916, "iter_time": 0.9593983840942383, "loss": 0.017951063811779022, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 23.808758317970007, "step_time": 0.8543363494873047} +{"epoch": 0, "iter": 9019, "iter_tflops": 27.16658031188612, "iter_time": 0.7487393493652343, "loss": 0.048475004732608795, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 33.04242199490829, "step_time": 0.6155931205749512} +{"epoch": 0, "iter": 9020, "iter_tflops": 41.987288868937746, "iter_time": 0.4844487037658691, "loss": 0.045192901045084, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 46.28739483999261, "step_time": 0.43944334602355956} +{"epoch": 0, "iter": 9021, "iter_tflops": 17.033480342801326, "iter_time": 1.2112083435058592, "loss": 0.32904621958732605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.542879786149477, "step_time": 1.1126153945922852} +{"epoch": 0, "iter": 9022, "iter_tflops": 21.475892279986503, "iter_time": 0.9606629257202148, "loss": 0.2965635359287262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.57297053860641, "step_time": 0.8395848388671875} +{"epoch": 0, "iter": 9023, "iter_tflops": 21.724226585888314, "iter_time": 0.9496813812255859, "loss": 0.34182417392730713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.741059740852005, "step_time": 0.8014857864379883} +{"epoch": 0, "iter": 9024, "iter_tflops": 32.15770622364695, "iter_time": 0.6415598602294922, "loss": 0.32689368724823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.57844164302632, "step_time": 0.5640233039855957} +{"epoch": 0, "iter": 9025, "iter_tflops": 14.80002080945581, "iter_time": 1.3939908447265625, "loss": 0.766481876373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.697390829727667, "step_time": 1.314300811767578} +{"epoch": 0, "iter": 9026, "iter_tflops": 24.312535200076383, "iter_time": 0.8485784530639648, "loss": 0.7572106719017029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.468841908727626, "step_time": 0.7794482879638672} +{"epoch": 0, "iter": 9027, "iter_tflops": 23.539544576076377, "iter_time": 0.8764440383911134, "loss": 0.7049851417541504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.55565751666271, "step_time": 0.7487062683105469} +{"epoch": 0, "iter": 9028, "iter_tflops": 24.811322193112492, "iter_time": 0.8315193099975585, "loss": 0.7373316287994385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.457184551103296, "step_time": 0.7249871635437011} +{"epoch": 0, "iter": 9029, "iter_tflops": 8.966436065149683, "iter_time": 2.300924621582031, "loss": 0.694778323173523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.331444767205593, "step_time": 2.210921676635742} +{"epoch": 0, "iter": 9030, "iter_tflops": 25.143645694097874, "iter_time": 0.8205291213989256, "loss": 0.871978223323822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.4074457267663, "step_time": 0.7527550621032715} +{"epoch": 0, "iter": 9031, "iter_tflops": 23.188750484920828, "iter_time": 0.8897026824951172, "loss": 0.679206907749176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.89713448003421, "step_time": 0.7670368576049804} +{"epoch": 0, "iter": 9032, "iter_tflops": 26.811786282800426, "iter_time": 0.769478515625, "loss": 0.7263743877410889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.27501968991316, "step_time": 0.6814559898376464} +{"epoch": 0, "iter": 9033, "iter_tflops": 33.95064627253504, "iter_time": 0.6076789627075196, "loss": 0.008572674356400967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.90307588075627, "step_time": 0.5303203678131104} +{"epoch": 0, "iter": 9034, "iter_tflops": 27.68695541922118, "iter_time": 0.745155731201172, "loss": 0.004191793501377106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.686792295192486, "step_time": 0.6949586639404296} +{"epoch": 0, "iter": 9035, "iter_tflops": 31.59417551345053, "iter_time": 0.6530030670166016, "loss": 0.009839493781328201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.65578408049892, "step_time": 0.5953145790100097} +{"epoch": 0, "iter": 9036, "iter_tflops": 33.741670371231365, "iter_time": 0.6114425659179688, "loss": 0.019692832604050636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.3880052252423, "step_time": 0.5669751167297363} +{"epoch": 0, "iter": 9037, "iter_tflops": 10.416158620718965, "iter_time": 1.9806815795898438, "loss": 0.3851589858531952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.054954545837548, "step_time": 1.8662305145263671} +{"epoch": 0, "iter": 9038, "iter_tflops": 33.69457121055066, "iter_time": 0.6122972564697267, "loss": 0.38499146699905396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35270537656685, "step_time": 0.4989055328369141} +{"epoch": 0, "iter": 9039, "iter_tflops": 44.64556037203345, "iter_time": 0.462108512878418, "loss": 0.3485601544380188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.152336657776225, "step_time": 0.4284546699523926} +{"epoch": 0, "iter": 9040, "iter_tflops": 30.348455335885784, "iter_time": 0.6798070373535157, "loss": 0.3322363793849945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.930508741385026, "step_time": 0.5741943054199219} +{"epoch": 0, "iter": 9041, "iter_tflops": 6.523994370474516, "iter_time": 3.1623407897949223, "loss": 0.08573631197214127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.731034401208007, "step_time": 3.0650702819824223} +{"epoch": 0, "iter": 9042, "iter_tflops": 27.66957783778863, "iter_time": 0.7456237182617188, "loss": 0.09964914619922638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.4036787710826, "step_time": 0.6569642257690429} +{"epoch": 0, "iter": 9043, "iter_tflops": 45.580890883906655, "iter_time": 0.4526259384155274, "loss": 0.037338316440582275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.620774053284855, "step_time": 0.3920712661743164} +{"epoch": 0, "iter": 9044, "iter_tflops": 47.27120540559955, "iter_time": 0.43644102859497075, "loss": 0.07540300488471985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42368478069191, "step_time": 0.40119827270507813} +{"epoch": 0, "iter": 9045, "iter_tflops": 17.959445924279777, "iter_time": 0.8938651199340819, "loss": 0.3667030334472656, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 18.869466495655296, "step_time": 0.8507565536499024} +{"epoch": 0, "iter": 9046, "iter_tflops": 20.125224049523137, "iter_time": 0.7976717300415039, "loss": 0.2918029725551605, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 23.527466179542785, "step_time": 0.6823226165771485} +{"epoch": 0, "iter": 9047, "iter_tflops": 20.93038431534862, "iter_time": 0.7669865036010742, "loss": 0.39344194531440735, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 22.423663505470945, "step_time": 0.7159098815917969} +{"epoch": 0, "iter": 9048, "iter_tflops": 18.987674130718453, "iter_time": 0.8454601745605468, "loss": 0.5774633884429932, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 22.255570818358212, "step_time": 0.7213170318603516} +{"epoch": 0, "iter": 9049, "iter_tflops": 33.134670397646815, "iter_time": 0.6226436920166016, "loss": 1.0076076984405518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38470553922206, "step_time": 0.5670265350341797} +{"epoch": 0, "iter": 9050, "iter_tflops": 29.68577940919759, "iter_time": 0.6949823760986328, "loss": 0.781398355960846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.07147668121035, "step_time": 0.6432848014831543} +{"epoch": 0, "iter": 9051, "iter_tflops": 26.258598796201245, "iter_time": 0.7856890487670899, "loss": 0.8822306394577026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.121026257825452, "step_time": 0.7336536483764649} +{"epoch": 0, "iter": 9052, "iter_tflops": 24.586037239238014, "iter_time": 0.8391386260986328, "loss": 0.9644660353660583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.332605251105587, "step_time": 0.7281749534606934} +{"epoch": 0, "iter": 9053, "iter_tflops": 9.632665634622244, "iter_time": 2.141784454345703, "loss": 0.14583618938922882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.365114050774885, "step_time": 1.9904357452392576} +{"epoch": 0, "iter": 9054, "iter_tflops": 20.60960886714794, "iter_time": 1.0010424575805665, "loss": 0.12222089618444443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.433067436441625, "step_time": 0.880426498413086} +{"epoch": 0, "iter": 9055, "iter_tflops": 19.63753538006797, "iter_time": 1.0505948486328125, "loss": 0.11642342805862427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.97608960354378, "step_time": 0.8979375457763672} +{"epoch": 0, "iter": 9056, "iter_tflops": 22.918593555922214, "iter_time": 0.9001902084350586, "loss": 0.0650169849395752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.773186570399634, "step_time": 0.8004867172241211} +{"epoch": 0, "iter": 9057, "iter_tflops": 9.17404590724977, "iter_time": 2.248854400634766, "loss": 0.48041844367980957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.056916009602288, "step_time": 2.051433410644531} +{"epoch": 0, "iter": 9058, "iter_tflops": 27.58707241347424, "iter_time": 0.7478536758422851, "loss": 0.32418686151504517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.167688319168484, "step_time": 0.6413607749938964} +{"epoch": 0, "iter": 9059, "iter_tflops": 48.456742227599456, "iter_time": 0.4257631149291992, "loss": 0.4299471080303192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.72983055520089, "step_time": 0.3839783840179444} +{"epoch": 0, "iter": 9060, "iter_tflops": 50.68076994793205, "iter_time": 0.40707932281494136, "loss": 0.33221814036369324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.922776609579245, "step_time": 0.3756382102966309} +{"epoch": 0, "iter": 9061, "iter_tflops": 26.621022907725358, "iter_time": 0.7749925155639649, "loss": 0.7372113466262817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.076081847250386, "step_time": 0.7348280868530273} +{"epoch": 0, "iter": 9062, "iter_tflops": 12.636370108035537, "iter_time": 1.6326756286621094, "loss": 0.8425837755203247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.79134305406014, "step_time": 1.1596141700744629} +{"epoch": 0, "iter": 9063, "iter_tflops": 45.6760577632689, "iter_time": 0.4516828842163086, "loss": 0.7214904427528381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35984685439308, "step_time": 0.4179732074737549} +{"epoch": 0, "iter": 9064, "iter_tflops": 45.72826337260303, "iter_time": 0.45116722106933593, "loss": 0.6768102049827576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.3629279498223, "step_time": 0.4179471187591553} +{"epoch": 0, "iter": 9065, "iter_tflops": 7.7848595276286146, "iter_time": 0.908454818725586, "loss": 0.0021731285378336906, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 8.13416908478039, "step_time": 0.8694426040649413} +{"epoch": 0, "iter": 9066, "iter_tflops": 6.107087027176629, "iter_time": 1.158030517578125, "loss": 0.004269343335181475, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 8.277710402526779, "step_time": 0.854365858078003} +{"epoch": 0, "iter": 9067, "iter_tflops": 15.441706798462539, "iter_time": 0.4579929695129395, "loss": 0.0034251888282597065, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 17.113380593258196, "step_time": 0.4132551784515381} +{"epoch": 0, "iter": 9068, "iter_tflops": 17.08452323522872, "iter_time": 0.41395320510864253, "loss": 0.004077204503118992, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 18.89654038696879, "step_time": 0.3742586212158203} +{"epoch": 0, "iter": 9069, "iter_tflops": 15.180120028569897, "iter_time": 1.0710243225097655, "loss": 0.0059304325841367245, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 16.238910806498776, "step_time": 1.0011926269531248} +{"epoch": 0, "iter": 9070, "iter_tflops": 17.80164428305189, "iter_time": 0.9133020248413086, "loss": 0.01661897636950016, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 21.97689401862487, "step_time": 0.7397896060943604} +{"epoch": 0, "iter": 9071, "iter_tflops": 43.9704915106752, "iter_time": 0.3697542877197266, "loss": 0.002843197202309966, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 48.272070533749485, "step_time": 0.33680506324768067} +{"epoch": 0, "iter": 9072, "iter_tflops": 43.856572134640174, "iter_time": 0.3707147407531738, "loss": 0.007998967543244362, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 48.02719021725789, "step_time": 0.3385223598480224} +{"epoch": 0, "iter": 9073, "iter_tflops": 37.05881308631058, "iter_time": 0.5567122039794922, "loss": 0.45232638716697693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.777161362841724, "step_time": 0.5186668128967286} +{"epoch": 0, "iter": 9074, "iter_tflops": 9.054902615085899, "iter_time": 2.2784445495605468, "loss": 0.562677264213562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.32923658709535, "step_time": 1.9973493041992185} +{"epoch": 0, "iter": 9075, "iter_tflops": 12.408879650207583, "iter_time": 1.6626072692871092, "loss": 0.6517701148986816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.852038237452962, "step_time": 1.489390453338623} +{"epoch": 0, "iter": 9076, "iter_tflops": 40.61273158691808, "iter_time": 0.5079957122802734, "loss": 0.6110771894454956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62221141981872, "step_time": 0.46235031509399416} +{"epoch": 0, "iter": 9077, "iter_tflops": 17.24549104908102, "iter_time": 0.9808319168090822, "loss": 0.38074228167533875, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 18.162935023879232, "step_time": 0.9312882537841797} +{"epoch": 0, "iter": 9078, "iter_tflops": 6.876637399109171, "iter_time": 2.4597673339843746, "loss": 0.3547925055027008, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 9.220361801660593, "step_time": 1.8345189056396485} +{"epoch": 0, "iter": 9079, "iter_tflops": 10.626332421343962, "iter_time": 1.5917936096191407, "loss": 0.4554736316204071, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 12.759745662775076, "step_time": 1.3256477432250977} +{"epoch": 0, "iter": 9080, "iter_tflops": 30.957190713895077, "iter_time": 0.5463973846435547, "loss": 0.4759902060031891, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.86960017589365, "step_time": 0.5146070518493653} +{"epoch": 0, "iter": 9081, "iter_tflops": 22.905652341672003, "iter_time": 0.7492248229980469, "loss": 0.3550052344799042, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 24.658078832355503, "step_time": 0.6959781188964844} +{"epoch": 0, "iter": 9082, "iter_tflops": 18.077971892712338, "iter_time": 0.9493035736083985, "loss": 0.4105365574359894, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 22.227999201306307, "step_time": 0.7720660400390624} +{"epoch": 0, "iter": 9083, "iter_tflops": 30.396951473689167, "iter_time": 0.5645790939331055, "loss": 0.28252387046813965, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 32.42001780738336, "step_time": 0.5293483619689942} +{"epoch": 0, "iter": 9084, "iter_tflops": 29.981503085209642, "iter_time": 0.5724023666381837, "loss": 0.247606560587883, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 32.05660145868452, "step_time": 0.5353494300842285} +{"epoch": 0, "iter": 9085, "iter_tflops": 43.96834449624353, "iter_time": 0.4692260704040527, "loss": 0.39961278438568115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.369377342327205, "step_time": 0.4265321292877197} +{"epoch": 0, "iter": 9086, "iter_tflops": 40.689836255990876, "iter_time": 0.5070330924987794, "loss": 0.3820779025554657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.951893551275916, "step_time": 0.46940169906616214} +{"epoch": 0, "iter": 9087, "iter_tflops": 47.310863937734084, "iter_time": 0.43607518005371093, "loss": 0.5403863191604614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.9870415065567, "step_time": 0.40463405799865726} +{"epoch": 0, "iter": 9088, "iter_tflops": 50.95225405906268, "iter_time": 0.4049103202819824, "loss": 0.47211992740631104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.202125266554994, "step_time": 0.37373730468750005} +{"epoch": 0, "iter": 9089, "iter_tflops": 28.850151224083028, "iter_time": 0.7151121444702149, "loss": 0.808617889881134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6386644411158, "step_time": 0.6733679122924806} +{"epoch": 0, "iter": 9090, "iter_tflops": 9.781205849477594, "iter_time": 2.1092586975097656, "loss": 0.7232327461242676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.34505514376229, "step_time": 1.671203025817871} +{"epoch": 0, "iter": 9091, "iter_tflops": 9.85678224543684, "iter_time": 2.0930860595703122, "loss": 0.8932252526283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.798052752130884, "step_time": 1.7486863250732423} +{"epoch": 0, "iter": 9092, "iter_tflops": 28.94537019599913, "iter_time": 0.7127597045898438, "loss": 0.7619919776916504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.364942835687934, "step_time": 0.5521510791778564} +{"epoch": 0, "iter": 9093, "iter_tflops": 23.67475787725863, "iter_time": 0.7231490631103514, "loss": 0.4027736186981201, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 25.76138032277267, "step_time": 0.6645753746032714} +{"epoch": 0, "iter": 9094, "iter_tflops": 24.842033650074775, "iter_time": 0.6891697845458985, "loss": 0.3095359206199646, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 26.676795359634017, "step_time": 0.6417704505920411} +{"epoch": 0, "iter": 9095, "iter_tflops": 25.10287512803836, "iter_time": 0.682008689880371, "loss": 0.2783336639404297, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 27.017940224244594, "step_time": 0.6336670684814453} +{"epoch": 0, "iter": 9096, "iter_tflops": 26.125705161269323, "iter_time": 0.6553078231811522, "loss": 0.28907760977745056, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 28.037276513297144, "step_time": 0.6106291732788086} +{"epoch": 0, "iter": 9097, "iter_tflops": 20.107282628634355, "iter_time": 1.0260508041381835, "loss": 0.20677325129508972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.459845458513207, "step_time": 0.9613812713623047} +{"epoch": 0, "iter": 9098, "iter_tflops": 19.698744100733315, "iter_time": 1.0473303985595703, "loss": 0.31294599175453186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.35111180513546, "step_time": 0.8472341499328614} +{"epoch": 0, "iter": 9099, "iter_tflops": 37.24749876251753, "iter_time": 0.5538920516967774, "loss": 0.2643052935600281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90694278001543, "step_time": 0.5043421020507812} +{"epoch": 0, "iter": 9100, "iter_tflops": 41.105564570481064, "iter_time": 0.5019051246643067, "loss": 0.3392060101032257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85760005156987, "step_time": 0.4599241485595703} +{"epoch": 0, "iter": 9101, "iter_tflops": 18.419763719099308, "iter_time": 1.1200520172119142, "loss": 0.11540413647890091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.93045305483041, "step_time": 1.0351542663574218} +{"epoch": 0, "iter": 9102, "iter_tflops": 16.566379920285073, "iter_time": 1.2453591918945313, "loss": 0.08602894842624664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.509700281181626, "step_time": 0.9591529979705811} +{"epoch": 0, "iter": 9103, "iter_tflops": 41.36721401878306, "iter_time": 0.4987305526733399, "loss": 0.1060197576880455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64846207343896, "step_time": 0.45195593833923337} +{"epoch": 0, "iter": 9104, "iter_tflops": 44.8483860837587, "iter_time": 0.4600186386108398, "loss": 0.07407600432634354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.076193752578696, "step_time": 0.42038903045654297} +{"epoch": 0, "iter": 9105, "iter_tflops": 18.92554857747572, "iter_time": 1.09011865234375, "loss": 0.31167662143707275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.421401156088535, "step_time": 1.010268264770508} +{"epoch": 0, "iter": 9106, "iter_tflops": 20.227397877413228, "iter_time": 1.0199578628540038, "loss": 0.2244417369365692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.95666745267319, "step_time": 0.7653428802490235} +{"epoch": 0, "iter": 9107, "iter_tflops": 46.484543473735556, "iter_time": 0.4438269577026367, "loss": 0.29690948128700256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.35443280564501, "step_time": 0.4097175235748291} +{"epoch": 0, "iter": 9108, "iter_tflops": 46.70098351804331, "iter_time": 0.4417700004577637, "loss": 0.16402693092823029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5876264532093, "step_time": 0.40782884979248046} +{"epoch": 0, "iter": 9109, "iter_tflops": 48.098319585517054, "iter_time": 0.428935848236084, "loss": 0.07204336673021317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.73456839574604, "step_time": 0.391225227355957} +{"epoch": 0, "iter": 9110, "iter_tflops": 10.996352274090295, "iter_time": 1.8761761169433593, "loss": 0.05797813832759857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.935703756063376, "step_time": 1.7285192337036135} +{"epoch": 0, "iter": 9111, "iter_tflops": 11.824300094816207, "iter_time": 1.7448046264648436, "loss": 0.07570303231477737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.594715729189122, "step_time": 1.5175818252563478} +{"epoch": 0, "iter": 9112, "iter_tflops": 29.684086205887883, "iter_time": 0.6950220184326172, "loss": 0.0971255823969841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.249782929079046, "step_time": 0.6023715114593506} +{"epoch": 0, "iter": 9113, "iter_tflops": 13.072359996400484, "iter_time": 1.1685393676757811, "loss": 0.394112765789032, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.827705972443765, "step_time": 1.1047072677612304} +{"epoch": 0, "iter": 9114, "iter_tflops": 13.131541048038082, "iter_time": 1.1632730102539064, "loss": 0.44103875756263733, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.229962509819678, "step_time": 1.0029944114685059} +{"epoch": 0, "iter": 9115, "iter_tflops": 27.59894831004833, "iter_time": 0.5534836730957031, "loss": 0.21717390418052673, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.347813750062024, "step_time": 0.5205010299682618} +{"epoch": 0, "iter": 9116, "iter_tflops": 26.680505503417027, "iter_time": 0.5725366516113282, "loss": 0.2967984974384308, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.409707157445172, "step_time": 0.5376883049011231} +{"epoch": 0, "iter": 9117, "iter_tflops": 29.745298918768984, "iter_time": 0.6935917358398438, "loss": 0.09625383466482162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.59485916099582, "step_time": 0.6529889373779297} +{"epoch": 0, "iter": 9118, "iter_tflops": 7.11269781979881, "iter_time": 2.9006003112792973, "loss": 0.727413535118103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.429507797135138, "step_time": 2.447484954833984} +{"epoch": 0, "iter": 9119, "iter_tflops": 13.84627775554479, "iter_time": 1.4900100860595704, "loss": 1.148063063621521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.06160223161438, "step_time": 1.2844978485107423} +{"epoch": 0, "iter": 9120, "iter_tflops": 45.14086011548347, "iter_time": 0.45703811264038086, "loss": 0.7617822289466858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.26335074141197, "step_time": 0.4187919254302978} +{"epoch": 0, "iter": 9121, "iter_tflops": 22.370273034543647, "iter_time": 0.7304467391967774, "loss": 0.42054980993270874, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 23.71951205950893, "step_time": 0.6888966751098632} +{"epoch": 0, "iter": 9122, "iter_tflops": 15.516634715898924, "iter_time": 1.053082275390625, "loss": 0.32337886095046997, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 17.08849753597985, "step_time": 0.9562158966064453} +{"epoch": 0, "iter": 9123, "iter_tflops": 27.94855373211735, "iter_time": 0.584656120300293, "loss": 0.24542036652565002, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 29.698702523347325, "step_time": 0.5502022514343262} +{"epoch": 0, "iter": 9124, "iter_tflops": 29.342180878828234, "iter_time": 0.5568874740600586, "loss": 0.3859221637248993, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.22629682794461, "step_time": 0.5232862892150879} +{"epoch": 0, "iter": 9125, "iter_tflops": 32.14438678137762, "iter_time": 0.641825698852539, "loss": 0.06703340262174606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.219786020841816, "step_time": 0.6028995475769043} +{"epoch": 0, "iter": 9126, "iter_tflops": 12.142531267637551, "iter_time": 1.69907682800293, "loss": 0.07741790264844894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.051203795695269, "step_time": 1.3707271385192872} +{"epoch": 0, "iter": 9127, "iter_tflops": 42.002627535173005, "iter_time": 0.4911857833862305, "loss": 0.0931507870554924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15301011458524, "step_time": 0.4470151233673096} +{"epoch": 0, "iter": 9128, "iter_tflops": 39.74654084373832, "iter_time": 0.5190663909912109, "loss": 0.06329140812158585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83443996133533, "step_time": 0.4706594524383545} +{"epoch": 0, "iter": 9129, "iter_tflops": 17.52463348745894, "iter_time": 1.1772624816894532, "loss": 0.719710111618042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.417554226683464, "step_time": 1.1201863861083985} +{"epoch": 0, "iter": 9130, "iter_tflops": 17.38243305174413, "iter_time": 1.1868933105468749, "loss": 0.6951228380203247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.332795923288202, "step_time": 0.9238025360107422} +{"epoch": 0, "iter": 9131, "iter_tflops": 45.021268155246425, "iter_time": 0.4582521629333496, "loss": 0.7226824760437012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.993825279775, "step_time": 0.42109578895568844} +{"epoch": 0, "iter": 9132, "iter_tflops": 42.52918869759078, "iter_time": 0.4851043281555176, "loss": 0.8201768398284912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90105231427699, "step_time": 0.4494688568115234} +{"epoch": 0, "iter": 9133, "iter_tflops": 27.38549946499728, "iter_time": 0.7533583068847657, "loss": 0.7383076548576355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.009591946388525, "step_time": 0.7111817893981933} +{"epoch": 0, "iter": 9134, "iter_tflops": 12.983456976354878, "iter_time": 1.5890292968750002, "loss": 0.8150653839111328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.57397198552272, "step_time": 1.1739573459625243} +{"epoch": 0, "iter": 9135, "iter_tflops": 34.25621372152191, "iter_time": 0.602258430480957, "loss": 0.7929983735084534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6708814270324, "step_time": 0.5476668643951416} +{"epoch": 0, "iter": 9136, "iter_tflops": 38.06927258579547, "iter_time": 0.5419355850219726, "loss": 0.6219505071640015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60706678809548, "step_time": 0.4958555145263672} +{"epoch": 0, "iter": 9137, "iter_tflops": 18.917535331402163, "iter_time": 1.0905804138183595, "loss": 0.7904366850852966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.44687739693978, "step_time": 1.0090094985961915} +{"epoch": 0, "iter": 9138, "iter_tflops": 15.45989534037282, "iter_time": 1.3344911499023437, "loss": 0.6836974024772644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.58602280165395, "step_time": 0.874716932296753} +{"epoch": 0, "iter": 9139, "iter_tflops": 36.14259620879994, "iter_time": 0.5708248901367187, "loss": 0.7010219097137451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.66626858611588, "step_time": 0.5335682563781738} +{"epoch": 0, "iter": 9140, "iter_tflops": 42.9788899026451, "iter_time": 0.4800285339355469, "loss": 1.0021770000457764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07135460841669, "step_time": 0.44780739974975586} +{"epoch": 0, "iter": 9141, "iter_tflops": 29.967513293747388, "iter_time": 0.688448631286621, "loss": 0.2528242766857147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.757949110281103, "step_time": 0.6496355743408203} +{"epoch": 0, "iter": 9142, "iter_tflops": 13.329087836992436, "iter_time": 1.5478248596191404, "loss": 0.35748013854026794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.975951850975203, "step_time": 1.2153129138946535} +{"epoch": 0, "iter": 9143, "iter_tflops": 37.99140773805556, "iter_time": 0.5430463027954102, "loss": 0.30643799901008606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40569519465105, "step_time": 0.49826704788208004} +{"epoch": 0, "iter": 9144, "iter_tflops": 38.5059887903113, "iter_time": 0.5357892150878907, "loss": 0.20047399401664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74515504235452, "step_time": 0.4942152805328369} +{"epoch": 0, "iter": 9145, "iter_tflops": 14.776939227827585, "iter_time": 1.3961682586669923, "loss": 0.03903336450457573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.580462588256257, "step_time": 1.3241643753051757} +{"epoch": 0, "iter": 9146, "iter_tflops": 16.15323049571018, "iter_time": 1.2772116088867187, "loss": 0.025350095704197884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.648375923017408, "step_time": 0.9109303722381592} +{"epoch": 0, "iter": 9147, "iter_tflops": 43.96364771774906, "iter_time": 0.4692761993408203, "loss": 0.026331549510359764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36808154669035, "step_time": 0.4265435562133789} +{"epoch": 0, "iter": 9148, "iter_tflops": 45.94217138437548, "iter_time": 0.4490665740966797, "loss": 0.034573204815387726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.642231399150916, "step_time": 0.40738910865783684} +{"epoch": 0, "iter": 9149, "iter_tflops": 9.292431809174653, "iter_time": 0.8772806854248048, "loss": 0.01975961960852146, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 9.946843428183502, "step_time": 0.8195636138916015} +{"epoch": 0, "iter": 9150, "iter_tflops": 9.968480655000038, "iter_time": 0.817784698486328, "loss": 0.0036074614617973566, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 11.37124214392114, "step_time": 0.7169024143218995} +{"epoch": 0, "iter": 9151, "iter_tflops": 22.24734010773806, "iter_time": 0.36642901611328127, "loss": 0.01580965705215931, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 24.924190269452307, "step_time": 0.3270746555328369} +{"epoch": 0, "iter": 9152, "iter_tflops": 22.49116193384457, "iter_time": 0.3624566383361817, "loss": 0.005394778214395046, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 24.692570324416373, "step_time": 0.3301426639556884} +{"epoch": 0, "iter": 9153, "iter_tflops": 27.539590253292488, "iter_time": 0.7491430816650391, "loss": 0.7195701003074646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.002456919095312, "step_time": 0.7113567504882812} +{"epoch": 0, "iter": 9154, "iter_tflops": 16.732647564576098, "iter_time": 1.232984405517578, "loss": 0.8716991543769836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.788852811996826, "step_time": 0.9468646049499513} +{"epoch": 0, "iter": 9155, "iter_tflops": 44.08165717158438, "iter_time": 0.4680199165344238, "loss": 0.8136969804763794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98854611435255, "step_time": 0.42991703605651854} +{"epoch": 0, "iter": 9156, "iter_tflops": 45.22288575805929, "iter_time": 0.45620913314819334, "loss": 0.8458744287490845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47568666713243, "step_time": 0.42559672546386723} +{"epoch": 0, "iter": 9157, "iter_tflops": 45.14586645041322, "iter_time": 0.45698743057250973, "loss": 0.12620241940021515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.240989878317215, "step_time": 0.4189821033477783} +{"epoch": 0, "iter": 9158, "iter_tflops": 41.83511151870854, "iter_time": 0.493152587890625, "loss": 0.16680654883384705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18138836361256, "step_time": 0.44674043464660645} +{"epoch": 0, "iter": 9159, "iter_tflops": 44.29926982020181, "iter_time": 0.46572084808349606, "loss": 0.21702417731285095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74852203135394, "step_time": 0.43207815933227534} +{"epoch": 0, "iter": 9160, "iter_tflops": 52.91885419951135, "iter_time": 0.38986281585693355, "loss": 0.2263033241033554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.35891715225754, "step_time": 0.35968415260314945} +{"epoch": 0, "iter": 9161, "iter_tflops": 41.190314208461885, "iter_time": 0.5008724479675293, "loss": 0.7416307926177979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96058927772151, "step_time": 0.4588706207275391} +{"epoch": 0, "iter": 9162, "iter_tflops": 45.40929312411147, "iter_time": 0.4543363723754883, "loss": 0.6644394993782043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18262556672309, "step_time": 0.4194793033599854} +{"epoch": 0, "iter": 9163, "iter_tflops": 49.28415572131313, "iter_time": 0.41861513519287114, "loss": 0.6813626885414124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33805185443582, "step_time": 0.3867987823486328} +{"epoch": 0, "iter": 9164, "iter_tflops": 43.96452759483803, "iter_time": 0.46926680755615235, "loss": 0.5437382459640503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54852965283, "step_time": 0.43389550971984864} +{"epoch": 0, "iter": 9165, "iter_tflops": 29.65661626176661, "iter_time": 0.6956657943725586, "loss": 0.636898934841156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.412472551699675, "step_time": 0.6567803115844727} +{"epoch": 0, "iter": 9166, "iter_tflops": 12.198409711955408, "iter_time": 1.691293701171875, "loss": 0.6392676830291748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.908353729739272, "step_time": 1.3838612823486327} +{"epoch": 0, "iter": 9167, "iter_tflops": 40.793977837114745, "iter_time": 0.5057387046813965, "loss": 0.8058667778968811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88525048128893, "step_time": 0.47011452102661133} +{"epoch": 0, "iter": 9168, "iter_tflops": 39.83646394938797, "iter_time": 0.5178946990966796, "loss": 0.6021180748939514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.585944421431975, "step_time": 0.48445781326293946} +{"epoch": 0, "iter": 9169, "iter_tflops": 25.357966982803656, "iter_time": 0.8135941467285155, "loss": 0.3892166316509247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.66816352571245, "step_time": 0.7736225814819335} +{"epoch": 0, "iter": 9170, "iter_tflops": 19.810566919157665, "iter_time": 1.041418632507324, "loss": 0.36036011576652527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.532685766111236, "step_time": 0.8766994857788086} +{"epoch": 0, "iter": 9171, "iter_tflops": 41.182417451864936, "iter_time": 0.500968490600586, "loss": 0.3455224931240082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.36489255167864, "step_time": 0.4650319728851318} +{"epoch": 0, "iter": 9172, "iter_tflops": 47.12900819453022, "iter_time": 0.43775785446166987, "loss": 0.3939015865325928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.031473537515396, "step_time": 0.40428175163269037} +{"epoch": 0, "iter": 9173, "iter_tflops": 23.080651329095392, "iter_time": 0.8024285583496094, "loss": 0.07261665165424347, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 24.33929075686686, "step_time": 0.7609331741333009} +{"epoch": 0, "iter": 9174, "iter_tflops": 12.789115528558531, "iter_time": 1.4481512603759765, "loss": 0.07380630075931549, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 16.504928152179158, "step_time": 1.1221238651275636} +{"epoch": 0, "iter": 9175, "iter_tflops": 37.282410008127556, "iter_time": 0.4967643928527832, "loss": 0.00890771672129631, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 41.30652723212512, "step_time": 0.44836918067932124} +{"epoch": 0, "iter": 9176, "iter_tflops": 39.05743173369188, "iter_time": 0.4741882133483887, "loss": 0.026768414303660393, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 43.016535607728365, "step_time": 0.43054545211791995} +{"epoch": 0, "iter": 9177, "iter_tflops": 33.790466548877596, "iter_time": 0.6105595932006835, "loss": 0.4840750992298126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.38262637202926, "step_time": 0.5518898887634278} +{"epoch": 0, "iter": 9178, "iter_tflops": 44.28577579256521, "iter_time": 0.46586275482177736, "loss": 0.5198858380317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.11166183396445, "step_time": 0.4288168964385986} +{"epoch": 0, "iter": 9179, "iter_tflops": 45.72111048405959, "iter_time": 0.4512378044128418, "loss": 0.5956697463989258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38199703625142, "step_time": 0.41778572654724117} +{"epoch": 0, "iter": 9180, "iter_tflops": 46.42874792357242, "iter_time": 0.4443603248596191, "loss": 0.7518453598022461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36458145375753, "step_time": 0.4096349639892578} +{"epoch": 0, "iter": 9181, "iter_tflops": 43.78048336486366, "iter_time": 0.47123950958251953, "loss": 0.07976916432380676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64913784766965, "step_time": 0.43297936630249023} +{"epoch": 0, "iter": 9182, "iter_tflops": 7.827309763078909, "iter_time": 2.635783447265625, "loss": 0.10407940298318863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.740408733423783, "step_time": 2.1180932006835937} +{"epoch": 0, "iter": 9183, "iter_tflops": 12.780697744318408, "iter_time": 1.6142384338378908, "loss": 0.08384409546852112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.329567448634906, "step_time": 1.4397568931579587} +{"epoch": 0, "iter": 9184, "iter_tflops": 26.041311968296753, "iter_time": 0.7922447814941406, "loss": 0.07819882035255432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99843876347636, "step_time": 0.4798102931976318} +{"epoch": 0, "iter": 9185, "iter_tflops": 12.45612487061745, "iter_time": 1.1804292907714844, "loss": 0.4768742620944977, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 13.288728474929437, "step_time": 1.10646964263916} +{"epoch": 0, "iter": 9186, "iter_tflops": 12.133297800891116, "iter_time": 1.2118366241455079, "loss": 0.34986674785614014, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 13.428406142677451, "step_time": 1.094960525512695} +{"epoch": 0, "iter": 9187, "iter_tflops": 26.59560330401418, "iter_time": 0.5528573455810546, "loss": 0.5115538239479065, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.36866007107394, "step_time": 0.5183034591674804} +{"epoch": 0, "iter": 9188, "iter_tflops": 28.104791375637287, "iter_time": 0.5231696777343751, "loss": 0.49369320273399353, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 29.746699498452422, "step_time": 0.4942926406860351} +{"epoch": 0, "iter": 9189, "iter_tflops": 29.0351208629968, "iter_time": 0.7105564880371094, "loss": 0.08665785938501358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.855082905579017, "step_time": 0.668644889831543} +{"epoch": 0, "iter": 9190, "iter_tflops": 16.39732799246133, "iter_time": 1.258198501586914, "loss": 0.15087251365184784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.996333945036987, "step_time": 1.1464053497314453} +{"epoch": 0, "iter": 9191, "iter_tflops": 39.00718249092253, "iter_time": 0.5289049911499024, "loss": 0.0965510755777359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80925839570614, "step_time": 0.4819306449890137} +{"epoch": 0, "iter": 9192, "iter_tflops": 39.78894862007026, "iter_time": 0.5185131607055663, "loss": 0.1906988024711609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85004383265901, "step_time": 0.47049197006225585} +{"epoch": 0, "iter": 9193, "iter_tflops": 20.410438654679563, "iter_time": 1.0108108825683595, "loss": 0.5173727869987488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.00465456218251, "step_time": 0.93757861328125} +{"epoch": 0, "iter": 9194, "iter_tflops": 21.15634986820145, "iter_time": 0.9751726379394531, "loss": 0.52983158826828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.666881787203067, "step_time": 0.803802101135254} +{"epoch": 0, "iter": 9195, "iter_tflops": 48.06016012161059, "iter_time": 0.42927642059326165, "loss": 0.5298399329185486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.29953525406215, "step_time": 0.3944794807434082} +{"epoch": 0, "iter": 9196, "iter_tflops": 50.93253599011778, "iter_time": 0.40506707763671873, "loss": 0.5304549336433411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.761293547685135, "step_time": 0.3767459125518799} +{"epoch": 0, "iter": 9197, "iter_tflops": 41.77692622985575, "iter_time": 0.49383943176269535, "loss": 0.40166035294532776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59332871098393, "step_time": 0.45250246238708497} +{"epoch": 0, "iter": 9198, "iter_tflops": 47.50657285421495, "iter_time": 0.4342787170410156, "loss": 0.39016273617744446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.093343492511075, "step_time": 0.3960408782958984} +{"epoch": 0, "iter": 9199, "iter_tflops": 48.737037104264864, "iter_time": 0.42331447982788084, "loss": 0.2453235238790512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77064230302695, "step_time": 0.3909577865600586} +{"epoch": 0, "iter": 9200, "iter_tflops": 48.29559916750695, "iter_time": 0.4271837158203125, "loss": 0.33832886815071106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.45793946416294, "step_time": 0.39328829383850095} +{"epoch": 0, "iter": 9201, "iter_tflops": 21.12323462653363, "iter_time": 0.8260735702514648, "loss": 0.08780617266893387, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 22.177779496526103, "step_time": 0.786794090270996} +{"epoch": 0, "iter": 9202, "iter_tflops": 11.028137540728114, "iter_time": 1.5822568206787109, "loss": 0.03688620403409004, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 14.2124385410908, "step_time": 1.2277517185211182} +{"epoch": 0, "iter": 9203, "iter_tflops": 37.279950212782154, "iter_time": 0.4680624771118165, "loss": 0.05675056576728821, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 41.21462402892892, "step_time": 0.4233775329589844} +{"epoch": 0, "iter": 9204, "iter_tflops": 37.74706937996581, "iter_time": 0.46227021408081054, "loss": 0.04869726300239563, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 41.55940955534906, "step_time": 0.4198651046752929} +{"epoch": 0, "iter": 9205, "iter_tflops": 19.237031871725744, "iter_time": 1.0724676055908204, "loss": 0.9943467974662781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.47801460155481, "step_time": 1.0074752807617187} +{"epoch": 0, "iter": 9206, "iter_tflops": 10.41489910150113, "iter_time": 1.980921112060547, "loss": 0.8108356595039368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.687181993266917, "step_time": 1.7652752838134766} +{"epoch": 0, "iter": 9207, "iter_tflops": 12.052498424828203, "iter_time": 1.7117690277099609, "loss": 0.6187580823898315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.86024881839925, "step_time": 1.300805160522461} +{"epoch": 0, "iter": 9208, "iter_tflops": 25.07655749747893, "iter_time": 0.8227243118286134, "loss": 0.7396330237388611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.499039787756406, "step_time": 0.6549753150939941} +{"epoch": 0, "iter": 9209, "iter_tflops": 27.150437339007624, "iter_time": 0.6699938278198242, "loss": 0.2600674331188202, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 28.892735734244376, "step_time": 0.6295916595458985} +{"epoch": 0, "iter": 9210, "iter_tflops": 24.04609408006211, "iter_time": 0.7564898223876954, "loss": 0.3302452862262726, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 29.262248676696277, "step_time": 0.6216414070129395} +{"epoch": 0, "iter": 9211, "iter_tflops": 31.98658731206998, "iter_time": 0.568695411682129, "loss": 0.40596601366996765, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 34.101609097413665, "step_time": 0.5334242553710937} +{"epoch": 0, "iter": 9212, "iter_tflops": 32.68184558566722, "iter_time": 0.5565972518920899, "loss": 0.2770113945007324, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 34.792937057338996, "step_time": 0.5228252334594727} +{"epoch": 0, "iter": 9213, "iter_tflops": 37.363937768784986, "iter_time": 0.5521659317016601, "loss": 0.5543664693832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96124506033906, "step_time": 0.5036734962463378} +{"epoch": 0, "iter": 9214, "iter_tflops": 43.67515573784011, "iter_time": 0.4723759574890137, "loss": 0.5319174528121948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25393037689789, "step_time": 0.4366005821228027} +{"epoch": 0, "iter": 9215, "iter_tflops": 44.17595873295357, "iter_time": 0.4670208435058594, "loss": 0.6983885765075684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82306519390519, "step_time": 0.4314046669006348} +{"epoch": 0, "iter": 9216, "iter_tflops": 48.39085448557381, "iter_time": 0.4263428230285644, "loss": 0.6162095069885254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3366226904827, "step_time": 0.39419993972778317} +{"epoch": 0, "iter": 9217, "iter_tflops": 34.15461592769432, "iter_time": 0.604049934387207, "loss": 0.028232082724571228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.59687803319559, "step_time": 0.5637391662597655} +{"epoch": 0, "iter": 9218, "iter_tflops": 9.22037169774955, "iter_time": 2.2375555114746093, "loss": 0.04739297181367874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.887725066294243, "step_time": 1.8948947906494142} +{"epoch": 0, "iter": 9219, "iter_tflops": 12.210112362390742, "iter_time": 1.6896726989746094, "loss": 0.06174386292695999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.965712699001381, "step_time": 1.4772675018310548} +{"epoch": 0, "iter": 9220, "iter_tflops": 25.95119699840576, "iter_time": 0.794995834350586, "loss": 0.06807420402765274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.13569741039308, "step_time": 0.5709338684082031} +{"epoch": 0, "iter": 9221, "iter_tflops": 11.709463522064354, "iter_time": 1.2905840148925782, "loss": 0.2923739552497864, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 12.128100942175523, "step_time": 1.246035675048828} +{"epoch": 0, "iter": 9222, "iter_tflops": 12.744287823407733, "iter_time": 1.185789794921875, "loss": 0.3520643413066864, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 16.5579391197716, "step_time": 0.9126767730712891} +{"epoch": 0, "iter": 9223, "iter_tflops": 27.995644539062408, "iter_time": 0.5397999114990234, "loss": 0.31501883268356323, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.826137458896177, "step_time": 0.5066712532043457} +{"epoch": 0, "iter": 9224, "iter_tflops": 26.16032322681997, "iter_time": 0.5776704788208007, "loss": 0.3149268627166748, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.673421633323397, "step_time": 0.5460852165222168} +{"epoch": 0, "iter": 9225, "iter_tflops": 22.179069437603758, "iter_time": 0.930205551147461, "loss": 0.3680456578731537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.281997577364432, "step_time": 0.8861393203735352} +{"epoch": 0, "iter": 9226, "iter_tflops": 12.031891614188906, "iter_time": 1.7147007446289062, "loss": 0.30529218912124634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.912052775837743, "step_time": 1.2965702037811282} +{"epoch": 0, "iter": 9227, "iter_tflops": 37.08607618275006, "iter_time": 0.5563029479980468, "loss": 0.3295919895172119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54520502480488, "step_time": 0.508841760635376} +{"epoch": 0, "iter": 9228, "iter_tflops": 36.58525736585634, "iter_time": 0.5639182281494141, "loss": 0.38659340143203735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.07216981131959, "step_time": 0.514848424911499} +{"epoch": 0, "iter": 9229, "iter_tflops": 1.400296184635697, "iter_time": 1.119235122680664, "loss": 3.2021100521087646, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.4801735021924038, "step_time": 1.0588357849121095} +{"epoch": 0, "iter": 9230, "iter_tflops": 1.13713983655587, "iter_time": 1.3782479705810546, "loss": 3.1997978687286377, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.6414516583190661, "step_time": 0.954801601409912} +{"epoch": 0, "iter": 9231, "iter_tflops": 3.051179803494946, "iter_time": 0.5136572647094726, "loss": 4.109203338623047, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.3487727030147427, "step_time": 0.46801046562194826} +{"epoch": 0, "iter": 9232, "iter_tflops": 3.261276547947637, "iter_time": 0.4805666275024414, "loss": 4.13677453994751, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.5690204170146815, "step_time": 0.43912908554077146} +{"epoch": 0, "iter": 9233, "iter_tflops": 16.18946579371769, "iter_time": 1.2743529510498048, "loss": 0.6705510020256042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.419286786750543, "step_time": 1.1843822174072265} +{"epoch": 0, "iter": 9234, "iter_tflops": 20.43689869096948, "iter_time": 1.0095021667480468, "loss": 0.6783425211906433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.686592090784615, "step_time": 0.8357205982208251} +{"epoch": 0, "iter": 9235, "iter_tflops": 44.79160613085896, "iter_time": 0.46060177993774415, "loss": 0.7592717409133911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.181322627472944, "step_time": 0.42819691085815437} +{"epoch": 0, "iter": 9236, "iter_tflops": 45.86866664076973, "iter_time": 0.449786205291748, "loss": 0.6957961320877075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.668397057101515, "step_time": 0.415376672744751} +{"epoch": 0, "iter": 9237, "iter_tflops": 43.21343834394848, "iter_time": 0.477423095703125, "loss": 0.23597685992717743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.02701474867058, "step_time": 0.438707275390625} +{"epoch": 0, "iter": 9238, "iter_tflops": 10.018843095160664, "iter_time": 2.0592291259765623, "loss": 0.16226613521575928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.147881047407344, "step_time": 1.6983285751342776} +{"epoch": 0, "iter": 9239, "iter_tflops": 15.534435476077604, "iter_time": 1.3280877532958986, "loss": 0.16025109589099884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.28950187690918, "step_time": 1.1932728691101075} +{"epoch": 0, "iter": 9240, "iter_tflops": 40.44127438264174, "iter_time": 0.5101494407653809, "loss": 0.2374405860900879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.61891531687985, "step_time": 0.46238446998596194} +{"epoch": 0, "iter": 9241, "iter_tflops": 16.034051604937645, "iter_time": 1.1705234374999998, "loss": 0.47982731461524963, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 17.080755145022415, "step_time": 1.0987941131591796} +{"epoch": 0, "iter": 9242, "iter_tflops": 16.217423049352103, "iter_time": 1.1572882537841795, "loss": 0.36166954040527344, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 20.70917970844001, "step_time": 0.9062760314941407} +{"epoch": 0, "iter": 9243, "iter_tflops": 33.65582858157369, "iter_time": 0.5576517944335938, "loss": 0.2951221168041229, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 35.87555326268106, "step_time": 0.5231482582092285} +{"epoch": 0, "iter": 9244, "iter_tflops": 33.724798453858355, "iter_time": 0.5565113525390626, "loss": 0.4214016795158386, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 35.84409419063198, "step_time": 0.5236074066162109} +{"epoch": 0, "iter": 9245, "iter_tflops": 22.24540577114953, "iter_time": 0.9274316558837891, "loss": 0.37261804938316345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.52158089359466, "step_time": 0.8771133880615235} +{"epoch": 0, "iter": 9246, "iter_tflops": 22.29434536094014, "iter_time": 0.9253957977294923, "loss": 0.299069344997406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.100031626823306, "step_time": 0.7612940750122071} +{"epoch": 0, "iter": 9247, "iter_tflops": 47.30408454516937, "iter_time": 0.43613767623901367, "loss": 0.27885833382606506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13206878097426, "step_time": 0.40348638343811033} +{"epoch": 0, "iter": 9248, "iter_tflops": 50.33665434111357, "iter_time": 0.409862232208252, "loss": 0.3117491900920868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.31586701923233, "step_time": 0.37983548164367675} +{"epoch": 0, "iter": 9249, "iter_tflops": 2.572718802554353, "iter_time": 0.6244518127441405, "loss": 1.4558839797973633, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.7458284683500604, "step_time": 0.58508349609375} +{"epoch": 0, "iter": 9250, "iter_tflops": 0.8908585726038714, "iter_time": 1.8033602294921875, "loss": 1.1150670051574707, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.1013818771008914, "step_time": 1.4586574859619141} +{"epoch": 0, "iter": 9251, "iter_tflops": 3.689205770647042, "iter_time": 0.43547013092041015, "loss": 1.3313754796981812, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.019410013943786, "step_time": 0.39969520759582516} +{"epoch": 0, "iter": 9252, "iter_tflops": 3.726794737339223, "iter_time": 0.4310779190063476, "loss": 1.1862713098526, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.024555050739965, "step_time": 0.39918423271179204} +{"epoch": 0, "iter": 9253, "iter_tflops": 25.34849499857419, "iter_time": 0.8138981628417968, "loss": 0.04793417826294899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.718054490444082, "step_time": 0.7721779861450195} +{"epoch": 0, "iter": 9254, "iter_tflops": 15.531784969735218, "iter_time": 1.3283143920898437, "loss": 0.02270113304257393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.452595667155546, "step_time": 1.118059154510498} +{"epoch": 0, "iter": 9255, "iter_tflops": 51.643720653690075, "iter_time": 0.39948890686035154, "loss": 0.035241205245256424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.58255567833479, "step_time": 0.3646193294525147} +{"epoch": 0, "iter": 9256, "iter_tflops": 61.49109097341576, "iter_time": 0.3355135383605957, "loss": 0.03982754051685333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.18175090390599, "step_time": 0.30709371566772464} +{"epoch": 0, "iter": 9257, "iter_tflops": 26.915496397950406, "iter_time": 0.7665135803222656, "loss": 0.2705918252468109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.570464736966834, "step_time": 0.7221126327514649} +{"epoch": 0, "iter": 9258, "iter_tflops": 12.652282448519511, "iter_time": 1.6306222686767577, "loss": 0.2609257996082306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.66979924338653, "step_time": 1.2376329917907714} +{"epoch": 0, "iter": 9259, "iter_tflops": 38.84807294713588, "iter_time": 0.5310712203979492, "loss": 0.30435711145401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.705208601917676, "step_time": 0.4831048526763916} +{"epoch": 0, "iter": 9260, "iter_tflops": 37.048119457412746, "iter_time": 0.5568728942871094, "loss": 0.3367564082145691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.563982656443834, "step_time": 0.5086062107086182} +{"epoch": 0, "iter": 9261, "iter_tflops": 19.513730275613067, "iter_time": 1.0572603607177735, "loss": 0.6787652373313904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.90937458211035, "step_time": 0.9866910858154297} +{"epoch": 0, "iter": 9262, "iter_tflops": 17.90951676509631, "iter_time": 1.1519626007080077, "loss": 0.7433039546012878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.136571397603657, "step_time": 1.0245584068298341} +{"epoch": 0, "iter": 9263, "iter_tflops": 37.45082289976775, "iter_time": 0.5508849182128907, "loss": 0.9362439513206482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.892555253631265, "step_time": 0.5045195484161378} +{"epoch": 0, "iter": 9264, "iter_tflops": 38.32330266375816, "iter_time": 0.5383433074951173, "loss": 0.6606774926185608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52586941614642, "step_time": 0.4968250827789306} +{"epoch": 0, "iter": 9265, "iter_tflops": 31.757142025900745, "iter_time": 0.649652084350586, "loss": 0.5313242077827454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.22800717245986, "step_time": 0.5856446380615234} +{"epoch": 0, "iter": 9266, "iter_tflops": 34.94024371898559, "iter_time": 0.5904679336547851, "loss": 0.737762451171875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36101946444124, "step_time": 0.5378140048980713} +{"epoch": 0, "iter": 9267, "iter_tflops": 41.189629705125455, "iter_time": 0.500880771636963, "loss": 0.5756574869155884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67456460331941, "step_time": 0.4618084964752197} +{"epoch": 0, "iter": 9268, "iter_tflops": 39.69648195984586, "iter_time": 0.5197209548950196, "loss": 0.654632031917572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.352804160816895, "step_time": 0.47588832855224605} +{"epoch": 0, "iter": 9269, "iter_tflops": 15.134480861649036, "iter_time": 1.120356155395508, "loss": 0.019182998687028885, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 16.229351798705597, "step_time": 1.0447742462158203} +{"epoch": 0, "iter": 9270, "iter_tflops": 14.71361958093213, "iter_time": 1.1524022827148437, "loss": 0.01403964776545763, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 17.909580437288493, "step_time": 0.9467563381195068} +{"epoch": 0, "iter": 9271, "iter_tflops": 34.180593359302826, "iter_time": 0.4960712242126465, "loss": 0.01048246119171381, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 37.88766688324553, "step_time": 0.44753372764587407} +{"epoch": 0, "iter": 9272, "iter_tflops": 37.04012744767021, "iter_time": 0.45777404022216794, "loss": 0.0022613334003835917, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 41.02294202985841, "step_time": 0.41332990646362305} +{"epoch": 0, "iter": 9273, "iter_tflops": 21.31131184832781, "iter_time": 0.9680818176269531, "loss": 0.26395389437675476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.645263318614095, "step_time": 0.9110555801391602} +{"epoch": 0, "iter": 9274, "iter_tflops": 11.914990826795595, "iter_time": 1.7315240783691408, "loss": 0.2325754016637802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.71396977810212, "step_time": 1.4021432571411132} +{"epoch": 0, "iter": 9275, "iter_tflops": 14.71005681796796, "iter_time": 1.4025162353515626, "loss": 0.2930925190448761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.80021049081504, "step_time": 1.228025894165039} +{"epoch": 0, "iter": 9276, "iter_tflops": 21.022405985287463, "iter_time": 0.9813859329223632, "loss": 0.02876666747033596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.453334989631944, "step_time": 0.8105457897186279} +{"epoch": 0, "iter": 9277, "iter_tflops": 20.40208114969023, "iter_time": 0.8190142440795899, "loss": 0.21037514507770538, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 21.491381977944002, "step_time": 0.7775021209716797} +{"epoch": 0, "iter": 9278, "iter_tflops": 10.37513689597137, "iter_time": 1.6105421295166016, "loss": 0.32918158173561096, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 11.873380662822, "step_time": 1.407315704345703} +{"epoch": 0, "iter": 9279, "iter_tflops": 21.207225828093815, "iter_time": 0.787919891357422, "loss": 0.5353105664253235, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 24.497818911920255, "step_time": 0.6820850105285645} +{"epoch": 0, "iter": 9280, "iter_tflops": 24.117830326876632, "iter_time": 0.6928316040039063, "loss": 0.3636763095855713, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 25.9642691029929, "step_time": 0.6435611572265625} +{"epoch": 0, "iter": 9281, "iter_tflops": 13.456043637906182, "iter_time": 1.1899742431640625, "loss": 0.3882121443748474, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 14.322508243245379, "step_time": 1.117984718322754} +{"epoch": 0, "iter": 9282, "iter_tflops": 11.377073325645707, "iter_time": 1.4074221801757811, "loss": 0.368191659450531, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 13.449362084554993, "step_time": 1.190565414428711} +{"epoch": 0, "iter": 9283, "iter_tflops": 23.220285792025027, "iter_time": 0.6895843353271485, "loss": 0.4196390211582184, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 24.932538649370844, "step_time": 0.6422268333435058} +{"epoch": 0, "iter": 9284, "iter_tflops": 23.263443458162627, "iter_time": 0.6883050384521484, "loss": 0.21670567989349365, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.064114601536215, "step_time": 0.6388554153442383} +{"epoch": 0, "iter": 9285, "iter_tflops": 22.460872333918182, "iter_time": 0.9185348281860352, "loss": 0.400080144405365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.572559567107394, "step_time": 0.8395988807678222} +{"epoch": 0, "iter": 9286, "iter_tflops": 34.31650991424466, "iter_time": 0.6012002258300781, "loss": 0.2866884469985962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.816355417494115, "step_time": 0.5455600700378418} +{"epoch": 0, "iter": 9287, "iter_tflops": 38.67245881312487, "iter_time": 0.5334828491210938, "loss": 0.294888973236084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.173014840822766, "step_time": 0.48920129585266114} +{"epoch": 0, "iter": 9288, "iter_tflops": 38.17341995012472, "iter_time": 0.5404570388793946, "loss": 0.2207464724779129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.556100875750275, "step_time": 0.4964636497497559} +{"epoch": 0, "iter": 9289, "iter_tflops": 24.843462194140592, "iter_time": 0.8304435729980468, "loss": 0.2759609818458557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.874176487492125, "step_time": 0.7676921195983887} +{"epoch": 0, "iter": 9290, "iter_tflops": 17.369994464420238, "iter_time": 1.1877432403564452, "loss": 0.30619513988494873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.764954966921557, "step_time": 0.9935534915924071} +{"epoch": 0, "iter": 9291, "iter_tflops": 36.74725096768554, "iter_time": 0.5614322967529297, "loss": 0.46559301018714905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06725166994765, "step_time": 0.51491162109375} +{"epoch": 0, "iter": 9292, "iter_tflops": 39.02853415308838, "iter_time": 0.5286156387329101, "loss": 0.3833317756652832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.332338473140325, "step_time": 0.4873601188659668} +{"epoch": 0, "iter": 9293, "iter_tflops": 29.91027707316129, "iter_time": 0.6897660446166993, "loss": 0.32584676146507263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.12140213777035, "step_time": 0.6228931198120117} +{"epoch": 0, "iter": 9294, "iter_tflops": 39.68290710602844, "iter_time": 0.5198987426757812, "loss": 0.45027801394462585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42189195144617, "step_time": 0.4751311511993408} +{"epoch": 0, "iter": 9295, "iter_tflops": 39.344203334298236, "iter_time": 0.5243744125366212, "loss": 0.3430613875389099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10931469050327, "step_time": 0.478576234817505} +{"epoch": 0, "iter": 9296, "iter_tflops": 43.58991586425474, "iter_time": 0.4732996864318848, "loss": 0.40857669711112976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59837895246447, "step_time": 0.4334410953521728} +{"epoch": 0, "iter": 9297, "iter_tflops": 18.548196124096197, "iter_time": 1.1122964935302735, "loss": 0.6156461238861084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.087288920157686, "step_time": 1.0270720748901367} +{"epoch": 0, "iter": 9298, "iter_tflops": 22.24296776097013, "iter_time": 0.9275333099365235, "loss": 0.7723097801208496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.18388153269015, "step_time": 0.6835135993957518} +{"epoch": 0, "iter": 9299, "iter_tflops": 47.921441953116485, "iter_time": 0.43051904678344727, "loss": 0.5723116397857666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93688411274797, "step_time": 0.39723394775390625} +{"epoch": 0, "iter": 9300, "iter_tflops": 46.69610210949946, "iter_time": 0.4418161811828613, "loss": 0.581352174282074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.404854559359634, "step_time": 0.40930766868591306} +{"epoch": 0, "iter": 9301, "iter_tflops": 19.242001895550406, "iter_time": 1.0721905975341797, "loss": 0.1178450882434845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.050599200509964, "step_time": 1.0289514694213866} +{"epoch": 0, "iter": 9302, "iter_tflops": 21.319521705374573, "iter_time": 0.9677090225219727, "loss": 0.1354043036699295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.541026574749633, "step_time": 0.7773283920288085} +{"epoch": 0, "iter": 9303, "iter_tflops": 45.204229904107976, "iter_time": 0.4563974113464355, "loss": 0.13672330975532532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46671147302835, "step_time": 0.41707024574279783} +{"epoch": 0, "iter": 9304, "iter_tflops": 37.81738166614914, "iter_time": 0.5455452651977539, "loss": 0.1969287246465683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.420867572905266, "step_time": 0.49808453369140626} +{"epoch": 0, "iter": 9305, "iter_tflops": 15.926328043181345, "iter_time": 1.0543368225097656, "loss": 0.07703236490488052, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 17.16601955789352, "step_time": 0.9781949768066406} +{"epoch": 0, "iter": 9306, "iter_tflops": 20.15955002456246, "iter_time": 0.83294091796875, "loss": 0.030921468511223793, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 24.399082854699532, "step_time": 0.6882108726501465} +{"epoch": 0, "iter": 9307, "iter_tflops": 41.507408140589135, "iter_time": 0.4045474014282227, "loss": 0.04810870438814163, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 45.20555905449222, "step_time": 0.3714524154663086} +{"epoch": 0, "iter": 9308, "iter_tflops": 43.373536459531174, "iter_time": 0.38714191818237303, "loss": 0.04748059809207916, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 47.43137267253782, "step_time": 0.35402125549316404} +{"epoch": 0, "iter": 9309, "iter_tflops": 21.706440181830498, "iter_time": 0.9504595565795899, "loss": 0.7107007503509521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.69590884643923, "step_time": 0.909022575378418} +{"epoch": 0, "iter": 9310, "iter_tflops": 14.50955474067531, "iter_time": 1.4218970794677734, "loss": 0.7866030931472778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.411547748474554, "step_time": 1.010755958557129} +{"epoch": 0, "iter": 9311, "iter_tflops": 36.409366672288364, "iter_time": 0.5666424713134766, "loss": 0.6084254384040833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5311954202637, "step_time": 0.44338197898864745} +{"epoch": 0, "iter": 9312, "iter_tflops": 42.04021158326879, "iter_time": 0.4907466621398926, "loss": 0.7952077984809875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.049183876080626, "step_time": 0.4579681968688965} +{"epoch": 0, "iter": 9313, "iter_tflops": 33.26625019160611, "iter_time": 0.6201809158325196, "loss": 0.7335816025733948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.52277265350233, "step_time": 0.5807849998474122} +{"epoch": 0, "iter": 9314, "iter_tflops": 15.149525776305142, "iter_time": 1.3618309783935547, "loss": 0.9499200582504272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.989235805573145, "step_time": 1.2143626556396485} +{"epoch": 0, "iter": 9315, "iter_tflops": 45.381012853687686, "iter_time": 0.4546195030212402, "loss": 0.8010504841804504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00146862703071, "step_time": 0.4210301055908203} +{"epoch": 0, "iter": 9316, "iter_tflops": 45.99474177877517, "iter_time": 0.4485533065795898, "loss": 0.6151505708694458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.212131861403, "step_time": 0.4192277946472167} +{"epoch": 0, "iter": 9317, "iter_tflops": 22.16177343332049, "iter_time": 0.5533245315551758, "loss": 0.00647682324051857, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 23.872539772512194, "step_time": 0.5136719017028809} +{"epoch": 0, "iter": 9318, "iter_tflops": 9.584502982141592, "iter_time": 1.2794250183105471, "loss": 0.00511690741404891, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 11.222366765615977, "step_time": 1.0926975708007813} +{"epoch": 0, "iter": 9319, "iter_tflops": 25.471825729482855, "iter_time": 0.4814202575683594, "loss": 0.002828487427905202, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 28.297126300098373, "step_time": 0.4333532943725586} +{"epoch": 0, "iter": 9320, "iter_tflops": 26.970288649206218, "iter_time": 0.45467266082763674, "loss": 0.008882462978363037, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 29.968401148137808, "step_time": 0.40918609046936033} +{"epoch": 0, "iter": 9321, "iter_tflops": 12.45925851250581, "iter_time": 1.6558845367431643, "loss": 0.5271279811859131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.231503812894006, "step_time": 1.5592402648925783} +{"epoch": 0, "iter": 9322, "iter_tflops": 19.91820351236019, "iter_time": 1.0357908782958984, "loss": 0.7755290269851685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.410652962620148, "step_time": 0.7526669845581055} +{"epoch": 0, "iter": 9323, "iter_tflops": 44.07441780644758, "iter_time": 0.4680967903137207, "loss": 0.6353572010993958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.67192947918874, "step_time": 0.4327723617553711} +{"epoch": 0, "iter": 9324, "iter_tflops": 49.1093065893448, "iter_time": 0.4201055755615234, "loss": 0.760586142539978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.821290312583635, "step_time": 0.39058291435241704} +{"epoch": 0, "iter": 9325, "iter_tflops": 29.406994349399138, "iter_time": 0.7015709686279297, "loss": 0.1462555080652237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.279764977379465, "step_time": 0.6595667686462402} +{"epoch": 0, "iter": 9326, "iter_tflops": 15.741396699674523, "iter_time": 1.3106266174316406, "loss": 0.10871560871601105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.384321978728103, "step_time": 0.9216760520935058} +{"epoch": 0, "iter": 9327, "iter_tflops": 43.38453732282504, "iter_time": 0.4755402450561524, "loss": 0.16621889173984528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.80891848161997, "step_time": 0.43153232002258296} +{"epoch": 0, "iter": 9328, "iter_tflops": 45.13104472774151, "iter_time": 0.45713751220703125, "loss": 0.1253100484609604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.174839757877876, "step_time": 0.4195457191467285} +{"epoch": 0, "iter": 9329, "iter_tflops": 19.434697438431492, "iter_time": 1.061559799194336, "loss": 0.46462520956993103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.456572594837983, "step_time": 1.0085312881469726} +{"epoch": 0, "iter": 9330, "iter_tflops": 22.66587376997154, "iter_time": 0.9102271423339844, "loss": 0.3825072944164276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.335637845378773, "step_time": 0.7547324714660645} +{"epoch": 0, "iter": 9331, "iter_tflops": 40.5950032849638, "iter_time": 0.508217559814453, "loss": 0.3691779375076294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34589813320171, "step_time": 0.4652311573028564} +{"epoch": 0, "iter": 9332, "iter_tflops": 43.197727456220505, "iter_time": 0.4775967330932617, "loss": 0.4208694100379944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.986839909841734, "step_time": 0.43908238029479985} +{"epoch": 0, "iter": 9333, "iter_tflops": 17.385189319012177, "iter_time": 1.1867051391601562, "loss": 0.23542599380016327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.755294087646845, "step_time": 1.1000143966674805} +{"epoch": 0, "iter": 9334, "iter_tflops": 24.308282202069844, "iter_time": 0.848726921081543, "loss": 0.2969636619091034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.697459018931315, "step_time": 0.6720782165527344} +{"epoch": 0, "iter": 9335, "iter_tflops": 48.76274524459444, "iter_time": 0.4230913047790527, "loss": 0.3030965030193329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89479022195066, "step_time": 0.3900401802062988} +{"epoch": 0, "iter": 9336, "iter_tflops": 53.73292269989261, "iter_time": 0.3839562873840332, "loss": 0.35703742504119873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.31579166274789, "step_time": 0.3537822761535645} +{"epoch": 0, "iter": 9337, "iter_tflops": 46.615805989877735, "iter_time": 0.4425772132873535, "loss": 0.7632845044136047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.858179794604546, "step_time": 0.4056592979431152} +{"epoch": 0, "iter": 9338, "iter_tflops": 42.07356561539225, "iter_time": 0.49035762023925783, "loss": 0.5553796887397766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38836825419666, "step_time": 0.4545458297729492} +{"epoch": 0, "iter": 9339, "iter_tflops": 48.55240257378698, "iter_time": 0.4249242553710938, "loss": 0.8056426048278809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.333997055748604, "step_time": 0.39421971702575687} +{"epoch": 0, "iter": 9340, "iter_tflops": 52.569417931217856, "iter_time": 0.392454288482666, "loss": 0.6663766503334045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.803127336191565, "step_time": 0.36320347976684564} +{"epoch": 0, "iter": 9341, "iter_tflops": 34.28739540690481, "iter_time": 0.6017107238769531, "loss": 0.3857203423976898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.825014403831, "step_time": 0.5602467193603515} +{"epoch": 0, "iter": 9342, "iter_tflops": 44.88338951552346, "iter_time": 0.4596598815917968, "loss": 0.5436618328094482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73584252674883, "step_time": 0.42332485580444335} +{"epoch": 0, "iter": 9343, "iter_tflops": 45.88290056883561, "iter_time": 0.449646671295166, "loss": 0.4956984221935272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71088654828073, "step_time": 0.4150216369628906} +{"epoch": 0, "iter": 9344, "iter_tflops": 48.77551315243807, "iter_time": 0.4229805526733398, "loss": 0.5603640079498291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.42573138992423, "step_time": 0.3935299129486084} +{"epoch": 0, "iter": 9345, "iter_tflops": 26.362641569256414, "iter_time": 0.782588249206543, "loss": 0.7303962707519531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.89542627230593, "step_time": 0.7395869598388672} +{"epoch": 0, "iter": 9346, "iter_tflops": 8.02641247267869, "iter_time": 2.570400360107422, "loss": 0.7674864530563354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.90215305603723, "step_time": 2.083495719909668} +{"epoch": 0, "iter": 9347, "iter_tflops": 13.7079312414507, "iter_time": 1.5050479278564455, "loss": 0.7189649343490601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.381764210298613, "step_time": 1.2593938751220701} +{"epoch": 0, "iter": 9348, "iter_tflops": 35.99333524223818, "iter_time": 0.5731920471191405, "loss": 0.7218886613845825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.916750236622896, "step_time": 0.45931848144531245} +{"epoch": 0, "iter": 9349, "iter_tflops": 27.337286889695385, "iter_time": 0.5887308425903319, "loss": 0.451232373714447, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 29.538008999849797, "step_time": 0.5448675956726075} +{"epoch": 0, "iter": 9350, "iter_tflops": 22.924844318092465, "iter_time": 0.7020463790893554, "loss": 0.38518786430358887, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.673106326168792, "step_time": 0.6523014869689941} +{"epoch": 0, "iter": 9351, "iter_tflops": 24.506584525472615, "iter_time": 0.6567338638305664, "loss": 0.34136348962783813, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.411090055485147, "step_time": 0.6093767395019531} +{"epoch": 0, "iter": 9352, "iter_tflops": 23.232935273024317, "iter_time": 0.692736572265625, "loss": 0.4565946161746979, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.081077640734474, "step_time": 0.6416910858154297} +{"epoch": 0, "iter": 9353, "iter_tflops": 20.43423008265067, "iter_time": 1.0096340026855468, "loss": 0.025090644136071205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.10295997183767, "step_time": 0.9334086265563966} +{"epoch": 0, "iter": 9354, "iter_tflops": 15.68963690764911, "iter_time": 1.3149503479003908, "loss": 0.007801515981554985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.438435705920934, "step_time": 1.1189177780151367} +{"epoch": 0, "iter": 9355, "iter_tflops": 42.491796960897496, "iter_time": 0.48553120803833016, "loss": 0.011140295304358006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0261204693741, "step_time": 0.4387156181335449} +{"epoch": 0, "iter": 9356, "iter_tflops": 44.01393005626201, "iter_time": 0.46874008941650397, "loss": 0.004335086792707443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84614176462789, "step_time": 0.42236894798278807} +{"epoch": 0, "iter": 9357, "iter_tflops": 13.037534835322772, "iter_time": 1.0527425384521483, "loss": 0.00817401334643364, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 13.826795817225644, "step_time": 0.99264990234375} +{"epoch": 0, "iter": 9358, "iter_tflops": 6.847309116930658, "iter_time": 2.0044615020751952, "loss": 0.008684180676937103, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 7.989736017430021, "step_time": 1.7178499374389649} +{"epoch": 0, "iter": 9359, "iter_tflops": 7.009966660648661, "iter_time": 1.957950469970703, "loss": 0.009086700156331062, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 8.521444260581127, "step_time": 1.610662124633789} +{"epoch": 0, "iter": 9360, "iter_tflops": 36.63280561762195, "iter_time": 0.37466875076293943, "loss": 0.005414285231381655, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 40.53669156446826, "step_time": 0.33858627796173096} +{"epoch": 0, "iter": 9361, "iter_tflops": 23.990826028306074, "iter_time": 0.6554855117797852, "loss": 0.48142364621162415, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.52087890160662, "step_time": 0.616187198638916} +{"epoch": 0, "iter": 9362, "iter_tflops": 17.85268451968022, "iter_time": 0.8808556976318359, "loss": 0.5107806921005249, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.24671480335501, "step_time": 0.7401444892883301} +{"epoch": 0, "iter": 9363, "iter_tflops": 28.470064979220798, "iter_time": 0.5523569717407226, "loss": 0.30280253291130066, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 30.363161311444813, "step_time": 0.5179183654785156} +{"epoch": 0, "iter": 9364, "iter_tflops": 28.857051056684295, "iter_time": 0.5449496154785156, "loss": 0.3780669867992401, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 30.679271770584972, "step_time": 0.5125818824768067} +{"epoch": 0, "iter": 9365, "iter_tflops": 22.65018015059891, "iter_time": 0.910857810974121, "loss": 0.7940959334373474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.850013181223943, "step_time": 0.8650348892211914} +{"epoch": 0, "iter": 9366, "iter_tflops": 13.558234779315752, "iter_time": 1.5216651611328127, "loss": 0.7017539143562317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.766371864508024, "step_time": 1.0993650588989259} +{"epoch": 0, "iter": 9367, "iter_tflops": 37.152692018432305, "iter_time": 0.5553054809570312, "loss": 0.7170283794403076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.57736199769051, "step_time": 0.5084385108947753} +{"epoch": 0, "iter": 9368, "iter_tflops": 36.75984925433399, "iter_time": 0.5612398834228516, "loss": 0.7760571837425232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.880603956031344, "step_time": 0.5173214912414551} +{"epoch": 0, "iter": 9369, "iter_tflops": 34.254454116895424, "iter_time": 0.6022893676757813, "loss": 0.17451168596744537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.834696403985305, "step_time": 0.5452956008911133} +{"epoch": 0, "iter": 9370, "iter_tflops": 35.58326822589164, "iter_time": 0.5797975997924805, "loss": 0.14187876880168915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.3085128169735, "step_time": 0.5248505229949951} +{"epoch": 0, "iter": 9371, "iter_tflops": 38.13081594511193, "iter_time": 0.5410608978271485, "loss": 0.12646810710430145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.838378114434235, "step_time": 0.4931140842437744} +{"epoch": 0, "iter": 9372, "iter_tflops": 37.45399483125861, "iter_time": 0.550838264465332, "loss": 0.18526983261108398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12805096591344, "step_time": 0.5016307125091553} +{"epoch": 0, "iter": 9373, "iter_tflops": 20.872104596216907, "iter_time": 0.9884529571533203, "loss": 0.31107980012893677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.400791408654104, "step_time": 0.9209984207153321} +{"epoch": 0, "iter": 9374, "iter_tflops": 38.209856942319995, "iter_time": 0.5399416580200195, "loss": 0.23108454048633575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09796206347905, "step_time": 0.490073450088501} +{"epoch": 0, "iter": 9375, "iter_tflops": 40.62262551007352, "iter_time": 0.5078719863891602, "loss": 0.34001395106315613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.47472974068031, "step_time": 0.4638835048675537} +{"epoch": 0, "iter": 9376, "iter_tflops": 43.89066604375436, "iter_time": 0.4700565147399902, "loss": 0.2886592447757721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.87194285476298, "step_time": 0.4309641990661621} +{"epoch": 0, "iter": 9377, "iter_tflops": 17.815493133963983, "iter_time": 1.158042236328125, "loss": 0.8680915832519531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.10170401826606, "step_time": 1.0800656051635742} +{"epoch": 0, "iter": 9378, "iter_tflops": 16.345112506778207, "iter_time": 1.2622178955078123, "loss": 0.879161536693573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.870607928162617, "step_time": 1.0382718830108644} +{"epoch": 0, "iter": 9379, "iter_tflops": 36.310915839574456, "iter_time": 0.568178825378418, "loss": 0.7517458200454712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.73956547220024, "step_time": 0.5191575012207031} +{"epoch": 0, "iter": 9380, "iter_tflops": 40.32405919080753, "iter_time": 0.5116323585510254, "loss": 0.8554884791374207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.915740846685026, "step_time": 0.4697881240844727} +{"epoch": 0, "iter": 9381, "iter_tflops": 18.469931442747136, "iter_time": 1.1170097503662109, "loss": 0.6544521450996399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.126066244450413, "step_time": 1.025093193054199} +{"epoch": 0, "iter": 9382, "iter_tflops": 16.333442054852547, "iter_time": 1.2631197662353515, "loss": 0.733845055103302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.414987397161468, "step_time": 0.9204151287078858} +{"epoch": 0, "iter": 9383, "iter_tflops": 47.500010514058296, "iter_time": 0.4343387145996093, "loss": 0.5570551753044128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.17177483160339, "step_time": 0.403173303604126} +{"epoch": 0, "iter": 9384, "iter_tflops": 42.887949247590164, "iter_time": 0.4810463981628418, "loss": 0.504356324672699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0266332285369, "step_time": 0.4482425079345703} +{"epoch": 0, "iter": 9385, "iter_tflops": 17.800762144324594, "iter_time": 0.6752236175537107, "loss": 0.002966142725199461, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 18.988032894389182, "step_time": 0.6330036964416504} +{"epoch": 0, "iter": 9386, "iter_tflops": 10.117396001809666, "iter_time": 1.1880028228759765, "loss": 0.0070295133627951145, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 11.037244964066767, "step_time": 1.0889941329956054} +{"epoch": 0, "iter": 9387, "iter_tflops": 21.32276528051121, "iter_time": 0.5636930694580078, "loss": 0.006069645751267672, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 28.423711129325902, "step_time": 0.42286860275268556} +{"epoch": 0, "iter": 9388, "iter_tflops": 26.567388468595215, "iter_time": 0.4524153747558594, "loss": 0.004931288305670023, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 29.345720165388073, "step_time": 0.4095825538635253} +{"epoch": 0, "iter": 9389, "iter_tflops": 22.5035384076916, "iter_time": 0.9167933120727538, "loss": 0.1619081348180771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.446446489566974, "step_time": 0.843930160522461} +{"epoch": 0, "iter": 9390, "iter_tflops": 14.34484938177537, "iter_time": 1.4382230834960938, "loss": 0.26246675848960876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.105495478855378, "step_time": 1.1394934501647949} +{"epoch": 0, "iter": 9391, "iter_tflops": 45.33647668852426, "iter_time": 0.45506609725952146, "loss": 0.22133012115955353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.08573274139357, "step_time": 0.42030733489990235} +{"epoch": 0, "iter": 9392, "iter_tflops": 46.32231464644128, "iter_time": 0.4453813171386719, "loss": 0.2845116853713989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.74531170360811, "step_time": 0.4147344303131103} +{"epoch": 0, "iter": 9393, "iter_tflops": 31.158738725889265, "iter_time": 0.6621286468505859, "loss": 0.7600191235542297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.271503995004984, "step_time": 0.6200829849243163} +{"epoch": 0, "iter": 9394, "iter_tflops": 10.692739943979639, "iter_time": 1.92944873046875, "loss": 0.742336094379425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.340577846147523, "step_time": 1.6718093566894532} +{"epoch": 0, "iter": 9395, "iter_tflops": 36.100919010333165, "iter_time": 0.5714838867187501, "loss": 0.9482772946357727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35435988136456, "step_time": 0.5242390823364258} +{"epoch": 0, "iter": 9396, "iter_tflops": 37.29607116204601, "iter_time": 0.5531706924438476, "loss": 0.8129869103431702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.4900361734423, "step_time": 0.5095350723266602} +{"epoch": 0, "iter": 9397, "iter_tflops": 19.85081438444193, "iter_time": 1.0393071594238281, "loss": 0.08196484297513962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.121785367870114, "step_time": 0.9767684478759767} +{"epoch": 0, "iter": 9398, "iter_tflops": 18.128363596638586, "iter_time": 1.1380560302734377, "loss": 0.15029719471931458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.541860452869788, "step_time": 0.777303970336914} +{"epoch": 0, "iter": 9399, "iter_tflops": 41.304811914730756, "iter_time": 0.4994840202331543, "loss": 0.10451862961053848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.4603402978614, "step_time": 0.45382620048522954} +{"epoch": 0, "iter": 9400, "iter_tflops": 37.84845091086589, "iter_time": 0.5450974349975586, "loss": 0.10720019042491913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.617485923580205, "step_time": 0.4957313747406006} +{"epoch": 0, "iter": 9401, "iter_tflops": 17.36070032705663, "iter_time": 1.1883791046142578, "loss": 0.31156790256500244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.45850058618065, "step_time": 1.1177014846801758} +{"epoch": 0, "iter": 9402, "iter_tflops": 22.34378377888398, "iter_time": 0.9233482437133789, "loss": 0.18293046951293945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.06323862860547, "step_time": 0.8231615161895752} +{"epoch": 0, "iter": 9403, "iter_tflops": 42.30958223979253, "iter_time": 0.4876222457885742, "loss": 0.2445429563522339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23906285693788, "step_time": 0.44618321037292485} +{"epoch": 0, "iter": 9404, "iter_tflops": 42.154582407120564, "iter_time": 0.48941520309448244, "loss": 0.33785051107406616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.264550153402006, "step_time": 0.445937406539917} +{"epoch": 0, "iter": 9405, "iter_tflops": 17.375007193509337, "iter_time": 1.1874005737304687, "loss": 0.24691756069660187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.490821545499053, "step_time": 1.115747802734375} +{"epoch": 0, "iter": 9406, "iter_tflops": 23.47683675899892, "iter_time": 0.8787850646972656, "loss": 0.29557281732559204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.384228450999284, "step_time": 0.7819479560852052} +{"epoch": 0, "iter": 9407, "iter_tflops": 45.742395579513946, "iter_time": 0.45102783203125, "loss": 0.2794753313064575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32565829106485, "step_time": 0.4182629127502441} +{"epoch": 0, "iter": 9408, "iter_tflops": 49.69906196892646, "iter_time": 0.41512038040161137, "loss": 0.2729949653148651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6677707354415, "step_time": 0.3844224052429199} +{"epoch": 0, "iter": 9409, "iter_tflops": 26.062080594621552, "iter_time": 0.7916134490966796, "loss": 0.4920419156551361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.352142288596408, "step_time": 0.7542770614624024} +{"epoch": 0, "iter": 9410, "iter_tflops": 13.417226408847142, "iter_time": 1.5376571044921876, "loss": 0.2664377689361572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.931370862769963, "step_time": 1.218512882232666} +{"epoch": 0, "iter": 9411, "iter_tflops": 38.54966547884301, "iter_time": 0.5351821670532226, "loss": 0.3050574064254761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17589566073628, "step_time": 0.4891678810119629} +{"epoch": 0, "iter": 9412, "iter_tflops": 41.82005394333657, "iter_time": 0.49333015060424806, "loss": 0.3387891948223114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57490427238973, "step_time": 0.45268539428710936} +{"epoch": 0, "iter": 9413, "iter_tflops": 22.35440490607403, "iter_time": 0.9229095382690429, "loss": 0.600356936454773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.028691920580304, "step_time": 0.8586024398803711} +{"epoch": 0, "iter": 9414, "iter_tflops": 16.001066804744276, "iter_time": 1.2893573760986325, "loss": 0.6219732761383057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.48453101357307, "step_time": 1.0071547889709473} +{"epoch": 0, "iter": 9415, "iter_tflops": 34.7317490560762, "iter_time": 0.5940125122070312, "loss": 0.6538580656051636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.5931294769253, "step_time": 0.5487995758056641} +{"epoch": 0, "iter": 9416, "iter_tflops": 39.13308358361595, "iter_time": 0.5272033691406249, "loss": 0.7412201762199402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.886757565475854, "step_time": 0.48105976486206053} +{"epoch": 0, "iter": 9417, "iter_tflops": 25.55221762243609, "iter_time": 0.8074091186523438, "loss": 0.6915227174758911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.400399223507677, "step_time": 0.75294864654541} +{"epoch": 0, "iter": 9418, "iter_tflops": 9.424657873843033, "iter_time": 2.189054901123047, "loss": 0.5575672388076782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.351802940456404, "step_time": 1.6702900466918946} +{"epoch": 0, "iter": 9419, "iter_tflops": 23.074018220694466, "iter_time": 0.8941266021728516, "loss": 0.6265476942062378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.427257789433835, "step_time": 0.7257503929138184} +{"epoch": 0, "iter": 9420, "iter_tflops": 44.619655043176685, "iter_time": 0.46237680435180667, "loss": 0.5003089904785156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.124653272883826, "step_time": 0.428701135635376} +{"epoch": 0, "iter": 9421, "iter_tflops": 26.186430259314818, "iter_time": 0.6428054733276367, "loss": 0.4285833537578583, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 28.047291461795503, "step_time": 0.6001570854187012} +{"epoch": 0, "iter": 9422, "iter_tflops": 30.275544434355457, "iter_time": 0.5559860610961914, "loss": 0.5745561718940735, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 32.35946830441461, "step_time": 0.5201810035705566} +{"epoch": 0, "iter": 9423, "iter_tflops": 29.917826778832758, "iter_time": 0.5626338043212891, "loss": 0.34778034687042236, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 31.80942288805328, "step_time": 0.5291759223937988} +{"epoch": 0, "iter": 9424, "iter_tflops": 28.91253380545292, "iter_time": 0.5821966629028321, "loss": 0.44666019082069397, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 30.708661807774252, "step_time": 0.5481443901062012} +{"epoch": 0, "iter": 9425, "iter_tflops": 25.179548919410493, "iter_time": 0.8193591384887695, "loss": 0.7194196581840515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.615501190147956, "step_time": 0.7751532974243165} +{"epoch": 0, "iter": 9426, "iter_tflops": 14.163432716709492, "iter_time": 1.4566450042724608, "loss": 0.47780779004096985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.606749441453736, "step_time": 1.1717718582153323} +{"epoch": 0, "iter": 9427, "iter_tflops": 36.503245690029544, "iter_time": 0.5651851806640625, "loss": 0.8541112542152405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81844734906842, "step_time": 0.5181290302276612} +{"epoch": 0, "iter": 9428, "iter_tflops": 36.41751110509449, "iter_time": 0.5665157470703125, "loss": 0.8729366660118103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71388061038364, "step_time": 0.5194932651519776} +{"epoch": 0, "iter": 9429, "iter_tflops": 26.3925657184789, "iter_time": 0.7817009429931641, "loss": 0.25698456168174744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.279451140212487, "step_time": 0.7295436325073242} +{"epoch": 0, "iter": 9430, "iter_tflops": 9.045370214179773, "iter_time": 2.2808456726074224, "loss": 0.31263911724090576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.290660253642889, "step_time": 1.8272707748413086} +{"epoch": 0, "iter": 9431, "iter_tflops": 12.979420639775102, "iter_time": 1.589523452758789, "loss": 0.329556941986084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.509038709652314, "step_time": 1.421947650909424} +{"epoch": 0, "iter": 9432, "iter_tflops": 39.67020485930165, "iter_time": 0.5200652122497559, "loss": 0.32398223876953125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50318240826814, "step_time": 0.4742433166503906} +{"epoch": 0, "iter": 9433, "iter_tflops": 19.596000518817906, "iter_time": 0.8066713104248047, "loss": 0.5385245680809021, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 20.60804246094889, "step_time": 0.7670564270019531} +{"epoch": 0, "iter": 9434, "iter_tflops": 9.834619354453721, "iter_time": 1.6073353576660157, "loss": 0.3635326027870178, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.422034077135127, "step_time": 1.272539691925049} +{"epoch": 0, "iter": 9435, "iter_tflops": 27.928972829267334, "iter_time": 0.5659904327392579, "loss": 0.3295764923095703, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 29.785944891854026, "step_time": 0.5307043800354004} +{"epoch": 0, "iter": 9436, "iter_tflops": 28.722557219541994, "iter_time": 0.5503525085449219, "loss": 0.3276944160461426, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.546397563884476, "step_time": 0.5174924926757813} +{"epoch": 0, "iter": 9437, "iter_tflops": 21.063073571409955, "iter_time": 0.9794911193847656, "loss": 0.22704382240772247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.047031438792118, "step_time": 0.9357764816284181} +{"epoch": 0, "iter": 9438, "iter_tflops": 19.213755654574136, "iter_time": 1.073766830444336, "loss": 0.209634467959404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.641226799099385, "step_time": 0.8726744041442871} +{"epoch": 0, "iter": 9439, "iter_tflops": 46.477326592710554, "iter_time": 0.44389587402343755, "loss": 0.2992761433124542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.371059876352, "step_time": 0.40958227920532225} +{"epoch": 0, "iter": 9440, "iter_tflops": 49.10405009847418, "iter_time": 0.4201505470275879, "loss": 0.23294112086296082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.17329259421096, "step_time": 0.3879972915649414} +{"epoch": 0, "iter": 9441, "iter_tflops": 27.274197332241737, "iter_time": 0.7564326553344727, "loss": 0.04826422780752182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.860422979126074, "step_time": 0.7148576278686523} +{"epoch": 0, "iter": 9442, "iter_tflops": 16.018612538790407, "iter_time": 1.287945098876953, "loss": 0.04048570245504379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.413142885316486, "step_time": 1.0627384567260743} +{"epoch": 0, "iter": 9443, "iter_tflops": 46.615458038247134, "iter_time": 0.4425805168151855, "loss": 0.020724380388855934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.42307636214127, "step_time": 0.4091597537994385} +{"epoch": 0, "iter": 9444, "iter_tflops": 50.16672153289513, "iter_time": 0.41125058364868167, "loss": 0.05470028892159462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69274934791538, "step_time": 0.3772180728912353} +{"epoch": 0, "iter": 9445, "iter_tflops": 27.480935595353106, "iter_time": 0.7507420349121093, "loss": 0.23590819537639618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.967284449470238, "step_time": 0.7122204895019532} +{"epoch": 0, "iter": 9446, "iter_tflops": 13.418694887392174, "iter_time": 1.5374888305664063, "loss": 0.32269591093063354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.12796080840501, "step_time": 1.2045271320343018} +{"epoch": 0, "iter": 9447, "iter_tflops": 38.93950406887596, "iter_time": 0.5298242492675781, "loss": 0.2868044972419739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59644077627293, "step_time": 0.484338436126709} +{"epoch": 0, "iter": 9448, "iter_tflops": 41.69931175055806, "iter_time": 0.49475860977172853, "loss": 0.17595937848091125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3183129465112, "step_time": 0.4552484893798828} +{"epoch": 0, "iter": 9449, "iter_tflops": 25.34531374524483, "iter_time": 0.8140003204345703, "loss": 0.750108003616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.514862748311028, "step_time": 0.7498163337707519} +{"epoch": 0, "iter": 9450, "iter_tflops": 39.35389310808232, "iter_time": 0.5242453002929688, "loss": 0.7417725324630737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39702240654811, "step_time": 0.4754034347534179} +{"epoch": 0, "iter": 9451, "iter_tflops": 44.32821501551303, "iter_time": 0.4654167442321777, "loss": 0.6271631717681885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13142747735001, "step_time": 0.4286407985687256} +{"epoch": 0, "iter": 9452, "iter_tflops": 50.88662986932915, "iter_time": 0.4054324989318848, "loss": 0.751911997795105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.07132684959319, "step_time": 0.37462495803833007} +{"epoch": 0, "iter": 9453, "iter_tflops": 27.97063343303867, "iter_time": 0.7375983657836915, "loss": 0.5912260413169861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.57682378583457, "step_time": 0.69754256439209} +{"epoch": 0, "iter": 9454, "iter_tflops": 20.11004613037517, "iter_time": 1.0259098052978517, "loss": 0.8096736669540405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.07372129457922, "step_time": 0.7620339031219483} +{"epoch": 0, "iter": 9455, "iter_tflops": 45.61556701413426, "iter_time": 0.4522818603515625, "loss": 0.6957478523254395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.073703161925785, "step_time": 0.42041036605834964} +{"epoch": 0, "iter": 9456, "iter_tflops": 43.78976540793882, "iter_time": 0.4711396217346192, "loss": 0.8049421906471252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.018777788544284, "step_time": 0.4387841300964355} +{"epoch": 0, "iter": 9457, "iter_tflops": 44.68312933880888, "iter_time": 0.4617199783325196, "loss": 0.5487858653068542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03531069697013, "step_time": 0.4207395286560058} +{"epoch": 0, "iter": 9458, "iter_tflops": 45.40599503731179, "iter_time": 0.45436937332153327, "loss": 0.6792490482330322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.221148807284614, "step_time": 0.4191509952545166} +{"epoch": 0, "iter": 9459, "iter_tflops": 46.460098393098875, "iter_time": 0.4440604782104492, "loss": 0.6985743045806885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5631032510405, "step_time": 0.40802664756774903} +{"epoch": 0, "iter": 9460, "iter_tflops": 46.18257358554328, "iter_time": 0.4467289695739746, "loss": 0.6761601567268372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.144148104541124, "step_time": 0.4114357166290283} +{"epoch": 0, "iter": 9461, "iter_tflops": 27.887636421854904, "iter_time": 0.7397935485839844, "loss": 0.38223105669021606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.691927693326473, "step_time": 0.6948384666442871} +{"epoch": 0, "iter": 9462, "iter_tflops": 15.24068681326796, "iter_time": 1.3536852874755858, "loss": 0.25345003604888916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.18695491186006, "step_time": 1.2003926010131836} +{"epoch": 0, "iter": 9463, "iter_tflops": 34.18976823202865, "iter_time": 0.6034288787841796, "loss": 0.2843709886074066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.46792604755359, "step_time": 0.5506334533691406} +{"epoch": 0, "iter": 9464, "iter_tflops": 40.28163925630091, "iter_time": 0.5121711502075195, "loss": 0.2873060405254364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83785871632905, "step_time": 0.4706227474212646} +{"epoch": 0, "iter": 9465, "iter_tflops": 32.043979209199755, "iter_time": 0.6438368148803713, "loss": 0.5922229886054993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.068283772493764, "step_time": 0.5883120384216309} +{"epoch": 0, "iter": 9466, "iter_tflops": 35.914960867827666, "iter_time": 0.5744428787231445, "loss": 0.5009503960609436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.18569838056484, "step_time": 0.5264954910278321} +{"epoch": 0, "iter": 9467, "iter_tflops": 39.087850202199746, "iter_time": 0.527813461303711, "loss": 0.7055180668830872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79107759743474, "step_time": 0.48213540458679194} +{"epoch": 0, "iter": 9468, "iter_tflops": 39.2531158099223, "iter_time": 0.5255912322998048, "loss": 0.6575766801834106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90585949035178, "step_time": 0.480845594406128} +{"epoch": 0, "iter": 9469, "iter_tflops": 21.410238200746065, "iter_time": 0.9636087799072265, "loss": 0.7053587436676025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.032013156687835, "step_time": 0.8957572822570802} +{"epoch": 0, "iter": 9470, "iter_tflops": 27.659585276034242, "iter_time": 0.7458930892944335, "loss": 0.8228746652603149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.868761373742004, "step_time": 0.6091481552124023} +{"epoch": 0, "iter": 9471, "iter_tflops": 36.46489156931821, "iter_time": 0.5657796478271484, "loss": 0.7099713087081909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.5632289817588, "step_time": 0.5214714279174805} +{"epoch": 0, "iter": 9472, "iter_tflops": 39.413881307683354, "iter_time": 0.523447395324707, "loss": 0.7781336903572083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.02517501338981, "step_time": 0.4795121345520019} +{"epoch": 0, "iter": 9473, "iter_tflops": 16.642983330319815, "iter_time": 1.2396271209716796, "loss": 0.8369839787483215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.819485841276336, "step_time": 1.1577827606201172} +{"epoch": 0, "iter": 9474, "iter_tflops": 14.12622641418603, "iter_time": 1.4604815826416016, "loss": 0.6723659038543701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.9701159114942, "step_time": 1.2157308540344238} +{"epoch": 0, "iter": 9475, "iter_tflops": 34.27822165238074, "iter_time": 0.6018717575073241, "loss": 0.7169915437698364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.80859213468386, "step_time": 0.5456720905303954} +{"epoch": 0, "iter": 9476, "iter_tflops": 38.91860479580324, "iter_time": 0.5301087646484375, "loss": 0.7443966865539551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67771290492772, "step_time": 0.48341609954833986} +{"epoch": 0, "iter": 9477, "iter_tflops": 3.326434181876273, "iter_time": 0.636593246459961, "loss": 0.24462465941905975, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 3.65141108331392, "step_time": 0.5799362182617188} +{"epoch": 0, "iter": 9478, "iter_tflops": 3.8113344636087154, "iter_time": 0.5556021270751953, "loss": 0.19784949719905853, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 4.21233143559854, "step_time": 0.5027110443115235} +{"epoch": 0, "iter": 9479, "iter_tflops": 4.028821468700365, "iter_time": 0.5256091766357422, "loss": 0.009329407475888729, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 4.371307937132405, "step_time": 0.4844283599853515} +{"epoch": 0, "iter": 9480, "iter_tflops": 3.626195530154257, "iter_time": 0.5839689331054687, "loss": 0.02716429904103279, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 3.961754471531092, "step_time": 0.5345070095062255} +{"epoch": 0, "iter": 9481, "iter_tflops": 19.756729215851717, "iter_time": 1.0442565307617189, "loss": 0.1334446370601654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.07154937110217, "step_time": 0.9790971298217773} +{"epoch": 0, "iter": 9482, "iter_tflops": 31.779870565052253, "iter_time": 0.6491874618530274, "loss": 0.18482211232185364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.016552281567975, "step_time": 0.5287779750823974} +{"epoch": 0, "iter": 9483, "iter_tflops": 48.30998599765411, "iter_time": 0.42705649948120117, "loss": 0.09738213568925858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.73592770768547, "step_time": 0.3912151432037354} +{"epoch": 0, "iter": 9484, "iter_tflops": 48.16101935082391, "iter_time": 0.42837742614746094, "loss": 0.13378646969795227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.304181713514616, "step_time": 0.39444443702697757} +{"epoch": 0, "iter": 9485, "iter_tflops": 19.289559895580517, "iter_time": 1.0695471343994143, "loss": 0.5405950546264648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.077264783488136, "step_time": 1.0275848693847656} +{"epoch": 0, "iter": 9486, "iter_tflops": 18.979928629841137, "iter_time": 1.0869953155517578, "loss": 0.4898059070110321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.16279438967092, "step_time": 0.9748756771087648} +{"epoch": 0, "iter": 9487, "iter_tflops": 49.36317462528304, "iter_time": 0.41794503021240226, "loss": 0.6004952192306519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.76759348409198, "step_time": 0.3837087020874024} +{"epoch": 0, "iter": 9488, "iter_tflops": 48.694447533796094, "iter_time": 0.42368472290039066, "loss": 0.4100888967514038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.71652244015385, "step_time": 0.39135915184021} +{"epoch": 0, "iter": 9489, "iter_tflops": 20.478392837975154, "iter_time": 1.007456672668457, "loss": 0.5925628542900085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.337428284421797, "step_time": 0.9668969116210938} +{"epoch": 0, "iter": 9490, "iter_tflops": 17.931942434903707, "iter_time": 1.1505219573974612, "loss": 0.6299117207527161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.16884936309544, "step_time": 0.890466901779175} +{"epoch": 0, "iter": 9491, "iter_tflops": 43.10660592225133, "iter_time": 0.4786063079833984, "loss": 0.8802359700202942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.246182483708985, "step_time": 0.43667218017578124} +{"epoch": 0, "iter": 9492, "iter_tflops": 40.2553295816138, "iter_time": 0.5125058898925782, "loss": 0.74360191822052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49323165545325, "step_time": 0.4743518180847169} +{"epoch": 0, "iter": 9493, "iter_tflops": 24.256649430819326, "iter_time": 0.8505335235595703, "loss": 0.20601779222488403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.58157466064276, "step_time": 0.7761426391601562} +{"epoch": 0, "iter": 9494, "iter_tflops": 23.8590383494382, "iter_time": 0.8647076721191406, "loss": 0.40762779116630554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.996288840932024, "step_time": 0.7115080699920654} +{"epoch": 0, "iter": 9495, "iter_tflops": 48.436452610288946, "iter_time": 0.425941463470459, "loss": 0.2073664516210556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53301128760336, "step_time": 0.39272626876831057} +{"epoch": 0, "iter": 9496, "iter_tflops": 50.17049292207773, "iter_time": 0.41121966934204096, "loss": 0.3015720546245575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.100712951867344, "step_time": 0.38134605598449706} +{"epoch": 0, "iter": 9497, "iter_tflops": 30.684530436695987, "iter_time": 0.6723613891601562, "loss": 0.15067578852176666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.6157409545671, "step_time": 0.6325502014160156} +{"epoch": 0, "iter": 9498, "iter_tflops": 13.08490768936895, "iter_time": 1.5767091369628905, "loss": 0.2513248026371002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.95221090776521, "step_time": 1.3798022003173829} +{"epoch": 0, "iter": 9499, "iter_tflops": 40.95687810464199, "iter_time": 0.5037271995544434, "loss": 0.17366063594818115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.94084414072646, "step_time": 0.459072229385376} +{"epoch": 0, "iter": 9500, "iter_tflops": 38.53360271493194, "iter_time": 0.5354052581787109, "loss": 0.14335088431835175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.092432570492385, "step_time": 0.4901378288269043} +{"epoch": 0, "iter": 9501, "iter_tflops": 15.798758994782997, "iter_time": 1.3058679809570313, "loss": 0.14486432075500488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.865762072637583, "step_time": 1.2232529678344726} +{"epoch": 0, "iter": 9502, "iter_tflops": 13.188514619905883, "iter_time": 1.5643227539062499, "loss": 0.13962627947330475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.72946334234841, "step_time": 1.0456996803283691} +{"epoch": 0, "iter": 9503, "iter_tflops": 41.608650973544066, "iter_time": 0.4958366355895996, "loss": 0.31809043884277344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.51551639295342, "step_time": 0.45327605056762693} +{"epoch": 0, "iter": 9504, "iter_tflops": 40.87131114041437, "iter_time": 0.5047817878723144, "loss": 0.2139502614736557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.981948285765114, "step_time": 0.4586527328491211} +{"epoch": 0, "iter": 9505, "iter_tflops": 32.62180184636279, "iter_time": 0.6324326782226561, "loss": 0.7687400579452515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.398607719386305, "step_time": 0.5828221740722656} +{"epoch": 0, "iter": 9506, "iter_tflops": 7.9702001816462245, "iter_time": 2.588528900146484, "loss": 0.9300331473350525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.73646194252449, "step_time": 1.921591453552246} +{"epoch": 0, "iter": 9507, "iter_tflops": 15.566608681160872, "iter_time": 1.3253428497314454, "loss": 0.6489912867546082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.230794554798727, "step_time": 1.1316617851257325} +{"epoch": 0, "iter": 9508, "iter_tflops": 23.54673994847307, "iter_time": 0.8761762161254882, "loss": 0.7226736545562744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.250335278102504, "step_time": 0.7053284454345703} +{"epoch": 0, "iter": 9509, "iter_tflops": 17.26293740673749, "iter_time": 0.9204367980957032, "loss": 0.24592743813991547, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 18.058307851638755, "step_time": 0.8798965530395507} +{"epoch": 0, "iter": 9510, "iter_tflops": 11.076543773568996, "iter_time": 1.434512710571289, "loss": 0.43622544407844543, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 12.949115567641051, "step_time": 1.2270678062438967} +{"epoch": 0, "iter": 9511, "iter_tflops": 28.37581004528776, "iter_time": 0.5599643783569337, "loss": 0.26790112257003784, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.305275019747988, "step_time": 0.524312774658203} +{"epoch": 0, "iter": 9512, "iter_tflops": 28.611334654306397, "iter_time": 0.5553548278808593, "loss": 0.41691073775291443, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.47295275765203, "step_time": 0.5214277381896972} +{"epoch": 0, "iter": 9513, "iter_tflops": 27.31207640421735, "iter_time": 0.755383560180664, "loss": 0.09278604388237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.33613147762376, "step_time": 0.7032656478881836} +{"epoch": 0, "iter": 9514, "iter_tflops": 13.842549950713357, "iter_time": 1.490411346435547, "loss": 0.10656516999006271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.337132760752638, "step_time": 1.189994550704956} +{"epoch": 0, "iter": 9515, "iter_tflops": 46.19815728964476, "iter_time": 0.4465782775878906, "loss": 0.1155451089143753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.636384687817234, "step_time": 0.40743614768981934} +{"epoch": 0, "iter": 9516, "iter_tflops": 46.99178290822855, "iter_time": 0.43903619384765624, "loss": 0.11089787632226944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0104203148093, "step_time": 0.4044486083984375} +{"epoch": 0, "iter": 9517, "iter_tflops": 34.381000919708946, "iter_time": 0.600072509765625, "loss": 0.7095537185668945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.79846890014226, "step_time": 0.5606508674621582} +{"epoch": 0, "iter": 9518, "iter_tflops": 19.782817228736967, "iter_time": 1.0428794479370116, "loss": 0.9821050763130188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.918510917729133, "step_time": 0.8625576057434081} +{"epoch": 0, "iter": 9519, "iter_tflops": 40.56852625730929, "iter_time": 0.5085492477416992, "loss": 0.7506631016731262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77826543816201, "step_time": 0.47126338386535643} +{"epoch": 0, "iter": 9520, "iter_tflops": 42.3670462908037, "iter_time": 0.4869608650207519, "loss": 0.8245130777359009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.67255033419013, "step_time": 0.45171757125854495} +{"epoch": 0, "iter": 9521, "iter_tflops": 32.25226701479891, "iter_time": 0.6396788635253906, "loss": 0.754705011844635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39758707151474, "step_time": 0.5997831611633302} +{"epoch": 0, "iter": 9522, "iter_tflops": 14.275704698876515, "iter_time": 1.4451891479492187, "loss": 0.8087166547775269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.0287374539943, "step_time": 1.2115456924438477} +{"epoch": 0, "iter": 9523, "iter_tflops": 33.447247121140194, "iter_time": 0.6168248596191407, "loss": 0.7737108469009399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.09016647736585, "step_time": 0.5716541519165038} +{"epoch": 0, "iter": 9524, "iter_tflops": 35.56697051091921, "iter_time": 0.5800632781982422, "loss": 0.8307793736457825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.50004714785346, "step_time": 0.5358719024658203} +{"epoch": 0, "iter": 9525, "iter_tflops": 17.85009826259951, "iter_time": 1.1557971954345703, "loss": 0.323629230260849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.169854964253368, "step_time": 1.0762258529663087} +{"epoch": 0, "iter": 9526, "iter_tflops": 15.24625237964357, "iter_time": 1.3531911315917968, "loss": 0.3376704156398773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.126489974566738, "step_time": 1.025071611404419} +{"epoch": 0, "iter": 9527, "iter_tflops": 38.500120050419035, "iter_time": 0.5358708877563476, "loss": 0.31907013058662415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25860271427916, "step_time": 0.4882104988098145} +{"epoch": 0, "iter": 9528, "iter_tflops": 40.033796498385286, "iter_time": 0.5153419189453126, "loss": 0.43293654918670654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09353406043711, "step_time": 0.4678938522338867} +{"epoch": 0, "iter": 9529, "iter_tflops": 18.495714502798677, "iter_time": 1.11545263671875, "loss": 0.39414939284324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.260451540689353, "step_time": 1.0182938652038573} +{"epoch": 0, "iter": 9530, "iter_tflops": 17.885636325934392, "iter_time": 1.1535006713867189, "loss": 0.4653641879558563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.750364990961756, "step_time": 0.9485401058197022} +{"epoch": 0, "iter": 9531, "iter_tflops": 45.575445023187086, "iter_time": 0.4526800231933594, "loss": 0.3704512417316437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.468328783543384, "step_time": 0.4170566101074219} +{"epoch": 0, "iter": 9532, "iter_tflops": 53.37885736334549, "iter_time": 0.3865030937194825, "loss": 0.38753148913383484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.85044291925353, "step_time": 0.35662809944152835} +{"epoch": 0, "iter": 9533, "iter_tflops": 33.73472373651812, "iter_time": 0.611568473815918, "loss": 0.9208087921142578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.96091273602662, "step_time": 0.573708839416504} +{"epoch": 0, "iter": 9534, "iter_tflops": 34.88082169751818, "iter_time": 0.5914738388061523, "loss": 0.6572862267494202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.022516385652075, "step_time": 0.45823945808410643} +{"epoch": 0, "iter": 9535, "iter_tflops": 43.85256611708624, "iter_time": 0.47046490859985346, "loss": 0.9040111303329468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20646724797877, "step_time": 0.43703955650329596} +{"epoch": 0, "iter": 9536, "iter_tflops": 44.62717038865262, "iter_time": 0.4622989387512207, "loss": 0.7361650466918945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.092675132192774, "step_time": 0.42898619079589845} +{"epoch": 0, "iter": 9537, "iter_tflops": 31.030573287888704, "iter_time": 0.6648634338378907, "loss": 0.13195759057998657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.06772429599804, "step_time": 0.6239042434692383} +{"epoch": 0, "iter": 9538, "iter_tflops": 20.916738680238325, "iter_time": 0.9863437042236328, "loss": 0.14221951365470886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.269483375233484, "step_time": 0.816443027496338} +{"epoch": 0, "iter": 9539, "iter_tflops": 50.334147071741235, "iter_time": 0.4098826484680176, "loss": 0.11997111886739731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.86268696724479, "step_time": 0.3760496368408203} +{"epoch": 0, "iter": 9540, "iter_tflops": 46.61478223941382, "iter_time": 0.44258693313598635, "loss": 0.11899610608816147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16231423095626, "step_time": 0.4112867164611817} +{"epoch": 0, "iter": 9541, "iter_tflops": 23.993248502316415, "iter_time": 0.8598707885742187, "loss": 0.27748820185661316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.1507286941264, "step_time": 0.8202980422973634} +{"epoch": 0, "iter": 9542, "iter_tflops": 14.614340715528852, "iter_time": 1.4117019653320315, "loss": 0.44086897373199463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.07393333241494, "step_time": 1.2835124473571775} +{"epoch": 0, "iter": 9543, "iter_tflops": 41.017248903415656, "iter_time": 0.5029857940673828, "loss": 0.38824155926704407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2377629569681, "step_time": 0.4560591011047363} +{"epoch": 0, "iter": 9544, "iter_tflops": 36.59111628981895, "iter_time": 0.5638279342651367, "loss": 0.33950063586235046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.307838845355434, "step_time": 0.5118382453918457} +{"epoch": 0, "iter": 9545, "iter_tflops": 27.526938843338424, "iter_time": 0.7494873886108399, "loss": 0.23799408972263336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.785688778543356, "step_time": 0.6926512145996093} +{"epoch": 0, "iter": 9546, "iter_tflops": 9.180600982285656, "iter_time": 2.2472486877441407, "loss": 0.28694310784339905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.857930736153644, "step_time": 1.7398561325073243} +{"epoch": 0, "iter": 9547, "iter_tflops": 9.108000524142739, "iter_time": 2.265161651611328, "loss": 0.324271559715271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.113386672301047, "step_time": 1.8564182205200193} +{"epoch": 0, "iter": 9548, "iter_tflops": 29.812356216041742, "iter_time": 0.6920316314697265, "loss": 0.24420778453350067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47188960857823, "step_time": 0.4974717502593994} +{"epoch": 0, "iter": 9549, "iter_tflops": 18.29575843318238, "iter_time": 0.8774340972900391, "loss": 0.44613921642303467, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 19.61718985110498, "step_time": 0.8183293533325195} +{"epoch": 0, "iter": 9550, "iter_tflops": 5.703880706200603, "iter_time": 2.8144561767578127, "loss": 0.3716523349285126, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 6.658523133789456, "step_time": 2.4109433822631834} +{"epoch": 0, "iter": 9551, "iter_tflops": 8.022611092529667, "iter_time": 2.0010096588134765, "loss": 0.339916467666626, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 10.264037510429281, "step_time": 1.564035816192627} +{"epoch": 0, "iter": 9552, "iter_tflops": 28.42308270416506, "iter_time": 0.5647987747192383, "loss": 0.29665035009384155, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.340088343100213, "step_time": 0.5291125755310059} +{"epoch": 0, "iter": 9553, "iter_tflops": 19.318872235333878, "iter_time": 0.8076473236083985, "loss": 0.37077096104621887, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 20.39965132167832, "step_time": 0.7648579483032227} +{"epoch": 0, "iter": 9554, "iter_tflops": 11.769691555546647, "iter_time": 1.3256792144775391, "loss": 0.37645575404167175, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 14.901072769473666, "step_time": 1.0470947761535645} +{"epoch": 0, "iter": 9555, "iter_tflops": 26.209614013201737, "iter_time": 0.5953096237182617, "loss": 0.32114195823669434, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.95188238810043, "step_time": 0.5582033882141113} +{"epoch": 0, "iter": 9556, "iter_tflops": 27.88800094005931, "iter_time": 0.5594820327758789, "loss": 0.2899920642375946, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.671472294500653, "step_time": 0.5258530921936035} +{"epoch": 0, "iter": 9557, "iter_tflops": 26.64859687823816, "iter_time": 0.7508492279052734, "loss": 0.027098800987005234, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 28.20893154149082, "step_time": 0.7093171310424804} +{"epoch": 0, "iter": 9558, "iter_tflops": 11.179422538802463, "iter_time": 1.789813232421875, "loss": 0.023904887959361076, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 13.806169590922565, "step_time": 1.4492852821350097} +{"epoch": 0, "iter": 9559, "iter_tflops": 47.020441571821685, "iter_time": 0.42553999328613284, "loss": 0.02187567763030529, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 51.282298707736715, "step_time": 0.39017514610290527} +{"epoch": 0, "iter": 9560, "iter_tflops": 49.3658630977441, "iter_time": 0.40532216262817383, "loss": 0.05090576410293579, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 53.91275728825113, "step_time": 0.3711381015777588} +{"epoch": 0, "iter": 9561, "iter_tflops": 27.773267222747787, "iter_time": 0.742839988708496, "loss": 0.5564402937889099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.307462582333347, "step_time": 0.7039535903930664} +{"epoch": 0, "iter": 9562, "iter_tflops": 17.617980630907137, "iter_time": 1.1710248718261718, "loss": 0.652919590473175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.38251462127758, "step_time": 0.9648581504821778} +{"epoch": 0, "iter": 9563, "iter_tflops": 46.74301484764362, "iter_time": 0.44137276077270504, "loss": 0.5567843914031982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75762884600745, "step_time": 0.4064629096984863} +{"epoch": 0, "iter": 9564, "iter_tflops": 54.768970556015226, "iter_time": 0.3766931037902832, "loss": 0.7837703227996826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.480825195483106, "step_time": 0.3468528461456299} +{"epoch": 0, "iter": 9565, "iter_tflops": 28.26232694311191, "iter_time": 0.7299856643676758, "loss": 0.3090435266494751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.934187269878887, "step_time": 0.6892150878906251} +{"epoch": 0, "iter": 9566, "iter_tflops": 17.001101087151554, "iter_time": 1.21351513671875, "loss": 0.3019205927848816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.146701176979327, "step_time": 0.9315650825500488} +{"epoch": 0, "iter": 9567, "iter_tflops": 40.88496742316395, "iter_time": 0.5046131820678711, "loss": 0.24225610494613647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64138990077367, "step_time": 0.46215168380737304} +{"epoch": 0, "iter": 9568, "iter_tflops": 41.650705880069864, "iter_time": 0.49533598709106447, "loss": 0.22739271819591522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.809741776442266, "step_time": 0.4503647632598877} +{"epoch": 0, "iter": 9569, "iter_tflops": 17.351802476732022, "iter_time": 1.1889884948730467, "loss": 0.2515694499015808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.629179960700633, "step_time": 1.1074611740112303} +{"epoch": 0, "iter": 9570, "iter_tflops": 20.36924784850842, "iter_time": 1.0128549499511719, "loss": 0.39459389448165894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.553898416314464, "step_time": 0.8402369823455811} +{"epoch": 0, "iter": 9571, "iter_tflops": 39.45794859864281, "iter_time": 0.5228628005981445, "loss": 0.441348135471344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36084191040596, "step_time": 0.4758001136779785} +{"epoch": 0, "iter": 9572, "iter_tflops": 45.097070602026754, "iter_time": 0.45748189926147453, "loss": 0.3759281635284424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39780347328556, "step_time": 0.41765204238891607} +{"epoch": 0, "iter": 9573, "iter_tflops": 18.275602928471848, "iter_time": 1.128887161254883, "loss": 0.03998032957315445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.261536020072043, "step_time": 1.0711032333374024} +{"epoch": 0, "iter": 9574, "iter_tflops": 14.963788365845875, "iter_time": 1.3787346496582031, "loss": 0.020122431218624115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.99085901404643, "step_time": 1.0863696842193602} +{"epoch": 0, "iter": 9575, "iter_tflops": 48.30029878271806, "iter_time": 0.4271421508789062, "loss": 0.03242886811494827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.248556430230046, "step_time": 0.3874488792419434} +{"epoch": 0, "iter": 9576, "iter_tflops": 41.879154655398196, "iter_time": 0.4926339530944824, "loss": 0.021572496742010117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06951441835976, "step_time": 0.4478252868652344} +{"epoch": 0, "iter": 9577, "iter_tflops": 27.31253625843661, "iter_time": 0.7553708419799805, "loss": 0.10118506848812103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.45803189302355, "step_time": 0.7003554611206054} +{"epoch": 0, "iter": 9578, "iter_tflops": 12.634617540806277, "iter_time": 1.632902099609375, "loss": 0.10558727383613586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.633750475758422, "step_time": 1.3196509399414063} +{"epoch": 0, "iter": 9579, "iter_tflops": 13.996497874709915, "iter_time": 1.4740182647705078, "loss": 0.1042267382144928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.058675101245715, "step_time": 1.2847319831848145} +{"epoch": 0, "iter": 9580, "iter_tflops": 39.37381383789811, "iter_time": 0.5239800643920898, "loss": 0.143021821975708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.13248873913571, "step_time": 0.38112220573425293} +{"epoch": 0, "iter": 9581, "iter_tflops": 19.43996935364795, "iter_time": 0.8173594589233397, "loss": 0.3902508020401001, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 20.43206889349448, "step_time": 0.7776717529296876} +{"epoch": 0, "iter": 9582, "iter_tflops": 10.172366647172897, "iter_time": 1.562020263671875, "loss": 0.31963691115379333, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.640918922366868, "step_time": 1.1648366889953614} +{"epoch": 0, "iter": 9583, "iter_tflops": 24.55951886034903, "iter_time": 0.6469769592285156, "loss": 0.4454478919506073, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 26.34705191414128, "step_time": 0.6030823822021484} +{"epoch": 0, "iter": 9584, "iter_tflops": 23.788182087521808, "iter_time": 0.6679553222656249, "loss": 0.46103766560554504, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.639905166232626, "step_time": 0.6197153511047363} +{"epoch": 0, "iter": 9585, "iter_tflops": 19.001137672460658, "iter_time": 1.085782012939453, "loss": 0.38363751769065857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.308008361584047, "step_time": 1.0159092483520507} +{"epoch": 0, "iter": 9586, "iter_tflops": 15.289956548887018, "iter_time": 1.349323226928711, "loss": 0.5522520542144775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.377353139456876, "step_time": 1.1872402744293213} +{"epoch": 0, "iter": 9587, "iter_tflops": 37.87664832625987, "iter_time": 0.544691635131836, "loss": 0.40848308801651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.44664644728413, "step_time": 0.497774736404419} +{"epoch": 0, "iter": 9588, "iter_tflops": 40.84052770328541, "iter_time": 0.5051622657775879, "loss": 0.6142030358314514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.677541375802654, "step_time": 0.46177772712707515} +{"epoch": 0, "iter": 9589, "iter_tflops": 20.622475690464796, "iter_time": 1.00041788482666, "loss": 0.2705879509449005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.178472064151432, "step_time": 0.9302306060791016} +{"epoch": 0, "iter": 9590, "iter_tflops": 31.39065583759088, "iter_time": 0.6572367782592774, "loss": 0.1511053889989853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13478230088274, "step_time": 0.5140452327728271} +{"epoch": 0, "iter": 9591, "iter_tflops": 52.4685633921364, "iter_time": 0.3932086601257324, "loss": 0.196257084608078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.00232706800226, "step_time": 0.361934232711792} +{"epoch": 0, "iter": 9592, "iter_tflops": 52.405661726626576, "iter_time": 0.39368062210083005, "loss": 0.14974229037761688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.447946463467, "step_time": 0.36548882293701174} +{"epoch": 0, "iter": 9593, "iter_tflops": 35.55597449573398, "iter_time": 0.5802426681518555, "loss": 0.8072477579116821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.178051085356316, "step_time": 0.5403914794921874} +{"epoch": 0, "iter": 9594, "iter_tflops": 16.71562471897019, "iter_time": 1.234240051269531, "loss": 0.6016047596931458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.229530711120862, "step_time": 1.072885959625244} +{"epoch": 0, "iter": 9595, "iter_tflops": 37.48468940880127, "iter_time": 0.55038720703125, "loss": 0.6804540157318115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88448202522574, "step_time": 0.5046191730499268} +{"epoch": 0, "iter": 9596, "iter_tflops": 38.57250190717413, "iter_time": 0.5348653182983398, "loss": 0.9421342611312866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.030502959749015, "step_time": 0.4908600196838379} +{"epoch": 0, "iter": 9597, "iter_tflops": 15.077136261132425, "iter_time": 1.3683695068359376, "loss": 0.5154715180397034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.132041990801678, "step_time": 1.2788891525268558} +{"epoch": 0, "iter": 9598, "iter_tflops": 14.586979083162886, "iter_time": 1.4143499755859374, "loss": 0.6522889733314514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.353113041024162, "step_time": 1.0136578845977784} +{"epoch": 0, "iter": 9599, "iter_tflops": 38.782961249269235, "iter_time": 0.5319628219604492, "loss": 0.4538951516151428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45540290148923, "step_time": 0.4859474201202392} +{"epoch": 0, "iter": 9600, "iter_tflops": 39.082115124841906, "iter_time": 0.5278909149169921, "loss": 0.48825666308403015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.326034551365865, "step_time": 0.4874327049255372} +{"epoch": 0, "iter": 9601, "iter_tflops": 20.185779101884496, "iter_time": 1.0220607986450194, "loss": 0.6260009407997131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.74197765742166, "step_time": 0.948906021118164} +{"epoch": 0, "iter": 9602, "iter_tflops": 18.932584229699117, "iter_time": 1.0897135467529298, "loss": 0.7854559421539307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.41133819004774, "step_time": 0.920564998626709} +{"epoch": 0, "iter": 9603, "iter_tflops": 42.80959962489332, "iter_time": 0.4819268035888672, "loss": 0.6894441843032837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.366118290380335, "step_time": 0.44496055030822756} +{"epoch": 0, "iter": 9604, "iter_tflops": 44.10289610688459, "iter_time": 0.4677945289611816, "loss": 0.5079049468040466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27264576807944, "step_time": 0.4364277305603027} +{"epoch": 0, "iter": 9605, "iter_tflops": 36.63190842887589, "iter_time": 0.5632000732421876, "loss": 0.018123041838407516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6700395819888, "step_time": 0.5200673789978028} +{"epoch": 0, "iter": 9606, "iter_tflops": 13.773368089496351, "iter_time": 1.4978974914550782, "loss": 0.03151831030845642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.19130294807883, "step_time": 1.200088996887207} +{"epoch": 0, "iter": 9607, "iter_tflops": 9.952250288004295, "iter_time": 2.0730079040527345, "loss": 0.05073917284607887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.01146542832762, "step_time": 1.7176166915893556} +{"epoch": 0, "iter": 9608, "iter_tflops": 35.98456295036826, "iter_time": 0.5733317794799805, "loss": 0.02436860464513302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.747390902631494, "step_time": 0.5063169212341309} +{"epoch": 0, "iter": 9609, "iter_tflops": 18.75266726840629, "iter_time": 0.7840791091918946, "loss": 0.5068916082382202, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 19.816681763615943, "step_time": 0.7419796524047853} +{"epoch": 0, "iter": 9610, "iter_tflops": 10.396965035521566, "iter_time": 1.414217956542969, "loss": 0.42932382225990295, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 12.50282349929451, "step_time": 1.1760203323364258} +{"epoch": 0, "iter": 9611, "iter_tflops": 25.671321394860808, "iter_time": 0.5727626724243163, "loss": 0.3551281690597534, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.378157375797578, "step_time": 0.5370549392700195} +{"epoch": 0, "iter": 9612, "iter_tflops": 27.13204975186, "iter_time": 0.5419264221191405, "loss": 0.2988765239715576, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.824942285908513, "step_time": 0.5100990142822266} +{"epoch": 0, "iter": 9613, "iter_tflops": 24.96559689263766, "iter_time": 0.8263809432983398, "loss": 0.5181463360786438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.679390547670693, "step_time": 0.7732970314025879} +{"epoch": 0, "iter": 9614, "iter_tflops": 18.36802008181682, "iter_time": 1.123207260131836, "loss": 0.43657320737838745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.935899770158038, "step_time": 0.94051731300354} +{"epoch": 0, "iter": 9615, "iter_tflops": 39.33160626557603, "iter_time": 0.5245423583984374, "loss": 0.5233826041221619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.862253328244826, "step_time": 0.48133478546142583} +{"epoch": 0, "iter": 9616, "iter_tflops": 37.278961086420466, "iter_time": 0.5534245834350586, "loss": 0.5973750948905945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.65584353893755, "step_time": 0.5074570274353027} +{"epoch": 0, "iter": 9617, "iter_tflops": 22.851537050140216, "iter_time": 0.9028317642211913, "loss": 0.05738863721489906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.840741412948443, "step_time": 0.8305345306396483} +{"epoch": 0, "iter": 9618, "iter_tflops": 25.079997981119238, "iter_time": 0.8226114501953126, "loss": 0.025493746623396873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.28176726828547, "step_time": 0.72948388671875} +{"epoch": 0, "iter": 9619, "iter_tflops": 53.94779878339881, "iter_time": 0.38242697525024416, "loss": 0.03174722567200661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.02521224676605, "step_time": 0.34953018760681154} +{"epoch": 0, "iter": 9620, "iter_tflops": 54.148893199930804, "iter_time": 0.3810067443847656, "loss": 0.06000744178891182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.21092158786766, "step_time": 0.34843391990661615} +{"epoch": 0, "iter": 9621, "iter_tflops": 28.550228344730574, "iter_time": 0.7226244659423827, "loss": 0.7368993759155273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.28068699153352, "step_time": 0.6813284492492677} +{"epoch": 0, "iter": 9622, "iter_tflops": 12.833859934752933, "iter_time": 1.6075517120361327, "loss": 0.8322977423667908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.190750594568504, "step_time": 1.358135227203369} +{"epoch": 0, "iter": 9623, "iter_tflops": 45.82847505964399, "iter_time": 0.45018066787719724, "loss": 0.7403877377510071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72316544087539, "step_time": 0.4149191493988037} +{"epoch": 0, "iter": 9624, "iter_tflops": 48.19976404465823, "iter_time": 0.4280330810546875, "loss": 0.8027661442756653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.11862525575287, "step_time": 0.3958487663269043} +{"epoch": 0, "iter": 9625, "iter_tflops": 26.35833431459662, "iter_time": 0.7827161331176758, "loss": 0.33101996779441833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.659827455168067, "step_time": 0.7458865585327148} +{"epoch": 0, "iter": 9626, "iter_tflops": 15.525428191969587, "iter_time": 1.3288582611083983, "loss": 0.3093377947807312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.039816734786168, "step_time": 1.0835762653350829} +{"epoch": 0, "iter": 9627, "iter_tflops": 41.2060822758636, "iter_time": 0.5006807823181152, "loss": 0.3187249004840851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96248080896815, "step_time": 0.45885131645202637} +{"epoch": 0, "iter": 9628, "iter_tflops": 41.34575293432266, "iter_time": 0.4989894256591797, "loss": 0.2383238971233368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96268826847846, "step_time": 0.458849199295044} +{"epoch": 0, "iter": 9629, "iter_tflops": 29.54918930165754, "iter_time": 0.6981949081420897, "loss": 0.23820942640304565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.837466357829584, "step_time": 0.6282790908813476} +{"epoch": 0, "iter": 9630, "iter_tflops": 41.17773577941625, "iter_time": 0.501025447845459, "loss": 0.19899815320968628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95841405469169, "step_time": 0.458892822265625} +{"epoch": 0, "iter": 9631, "iter_tflops": 50.43534795949551, "iter_time": 0.4090601997375488, "loss": 0.23693184554576874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.03726064532419, "step_time": 0.3748568382263184} +{"epoch": 0, "iter": 9632, "iter_tflops": 46.773034861882266, "iter_time": 0.4410894775390626, "loss": 0.24600966274738312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74725334058057, "step_time": 0.40654601287841796} +{"epoch": 0, "iter": 9633, "iter_tflops": 10.981048037368037, "iter_time": 0.9473397674560546, "loss": 0.0026128203608095646, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 11.481670537508274, "step_time": 0.9060339660644531} +{"epoch": 0, "iter": 9634, "iter_tflops": 10.338345750542576, "iter_time": 1.0062328872680664, "loss": 0.002677922835573554, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 12.85314145481323, "step_time": 0.809357271194458} +{"epoch": 0, "iter": 9635, "iter_tflops": 26.016262131405128, "iter_time": 0.39985696029663087, "loss": 0.006971822585910559, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 28.479576826011478, "step_time": 0.3652717018127441} +{"epoch": 0, "iter": 9636, "iter_tflops": 28.54530833386214, "iter_time": 0.3644305877685547, "loss": 0.0035355351865291595, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 31.083577092921804, "step_time": 0.3346713752746582} +{"epoch": 0, "iter": 9637, "iter_tflops": 46.45242507774927, "iter_time": 0.44413383102417, "loss": 0.3196095824241638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.862006121902574, "step_time": 0.4056287803649902} +{"epoch": 0, "iter": 9638, "iter_tflops": 8.26957830574087, "iter_time": 2.4948180847167967, "loss": 0.2470131516456604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.310420964278592, "step_time": 2.0009942932128904} +{"epoch": 0, "iter": 9639, "iter_tflops": 13.602654352729246, "iter_time": 1.5166961517333983, "loss": 0.2590652108192444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.52245876224446, "step_time": 1.2486696929931642} +{"epoch": 0, "iter": 9640, "iter_tflops": 26.691416849681612, "iter_time": 0.7729486083984375, "loss": 0.2749881148338318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.37857965897323, "step_time": 0.6371834011077881} +{"epoch": 0, "iter": 9641, "iter_tflops": 12.871533287756488, "iter_time": 1.1804186096191405, "loss": 0.4010951817035675, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.706980338011562, "step_time": 1.1084715270996095} +{"epoch": 0, "iter": 9642, "iter_tflops": 16.351523381972584, "iter_time": 0.9291976699829101, "loss": 0.5116404891014099, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 18.040867247555322, "step_time": 0.8421877517700196} +{"epoch": 0, "iter": 9643, "iter_tflops": 24.52463198550402, "iter_time": 0.619532127380371, "loss": 0.3369615972042084, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 26.09661897656201, "step_time": 0.5822132530212403} +{"epoch": 0, "iter": 9644, "iter_tflops": 25.829639034176473, "iter_time": 0.588231117248535, "loss": 0.47749611735343933, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.303276005921862, "step_time": 0.5564825782775878} +{"epoch": 0, "iter": 9645, "iter_tflops": 29.987695991462477, "iter_time": 0.6879852828979491, "loss": 0.7871206998825073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.9802838994455, "step_time": 0.6451191482543945} +{"epoch": 0, "iter": 9646, "iter_tflops": 14.409990741493305, "iter_time": 1.4317214965820313, "loss": 0.5890207290649414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.777804278329413, "step_time": 1.0431437797546388} +{"epoch": 0, "iter": 9647, "iter_tflops": 35.06352178509319, "iter_time": 0.5883919372558594, "loss": 0.7285685539245605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.29831248915756, "step_time": 0.5386945838928222} +{"epoch": 0, "iter": 9648, "iter_tflops": 39.07318820443885, "iter_time": 0.5280115203857423, "loss": 0.7851160168647766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.389632323248314, "step_time": 0.4867014026641845} +{"epoch": 0, "iter": 9649, "iter_tflops": 23.58071224676656, "iter_time": 0.8749139251708985, "loss": 0.3482617139816284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.43151882738758, "step_time": 0.8112411079406738} +{"epoch": 0, "iter": 9650, "iter_tflops": 16.663042720311783, "iter_time": 1.2381348266601564, "loss": 0.41641122102737427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.783460516905713, "step_time": 1.0428455371856689} +{"epoch": 0, "iter": 9651, "iter_tflops": 39.652845756323245, "iter_time": 0.5202928848266601, "loss": 0.5194636583328247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.449639272852465, "step_time": 0.47482772827148445} +{"epoch": 0, "iter": 9652, "iter_tflops": 42.72919444301506, "iter_time": 0.48283366394042965, "loss": 0.37513357400894165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46883506720188, "step_time": 0.44397698974609373} +{"epoch": 0, "iter": 9653, "iter_tflops": 19.985842875717122, "iter_time": 1.0322853851318359, "loss": 0.6124330163002014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.27631155304211, "step_time": 0.9696743469238281} +{"epoch": 0, "iter": 9654, "iter_tflops": 13.936873648954258, "iter_time": 1.4803243560791015, "loss": 0.5566208958625793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.511588668098682, "step_time": 1.1781394538879393} +{"epoch": 0, "iter": 9655, "iter_tflops": 34.41320295721865, "iter_time": 0.5995109939575196, "loss": 0.5411494970321655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.25840894658575, "step_time": 0.5537298583984376} +{"epoch": 0, "iter": 9656, "iter_tflops": 32.892384593113704, "iter_time": 0.627230094909668, "loss": 0.765055775642395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.97982103619603, "step_time": 0.573407341003418} +{"epoch": 0, "iter": 9657, "iter_tflops": 29.91790537831184, "iter_time": 0.689590171813965, "loss": 0.9585870504379272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.96795191152453, "step_time": 0.6257923927307127} +{"epoch": 0, "iter": 9658, "iter_tflops": 35.70989075767367, "iter_time": 0.577741714477539, "loss": 0.7938107252120972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76326729625034, "step_time": 0.5322330894470214} +{"epoch": 0, "iter": 9659, "iter_tflops": 32.72243727001443, "iter_time": 0.630487678527832, "loss": 0.7972503900527954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.643520577855064, "step_time": 0.5788175010681151} +{"epoch": 0, "iter": 9660, "iter_tflops": 37.70867439185599, "iter_time": 0.5471179733276368, "loss": 0.8031502366065979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.26330287857242, "step_time": 0.499986478805542} +{"epoch": 0, "iter": 9661, "iter_tflops": 22.06402457776346, "iter_time": 0.9350557708740234, "loss": 0.32746121287345886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.37656021655656, "step_time": 0.8825547180175781} +{"epoch": 0, "iter": 9662, "iter_tflops": 14.2948906470234, "iter_time": 1.443249481201172, "loss": 0.33025193214416504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.591127277227248, "step_time": 1.1097279472351076} +{"epoch": 0, "iter": 9663, "iter_tflops": 45.70589904826339, "iter_time": 0.45138798141479497, "loss": 0.36496254801750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.55237809510453, "step_time": 0.4163492107391357} +{"epoch": 0, "iter": 9664, "iter_tflops": 48.83330180414187, "iter_time": 0.4224800033569336, "loss": 0.3313843607902527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.058997692953874, "step_time": 0.38883308029174807} +{"epoch": 0, "iter": 9665, "iter_tflops": 35.25685049116789, "iter_time": 0.58516552734375, "loss": 0.482463538646698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.1870867304956, "step_time": 0.540263614654541} +{"epoch": 0, "iter": 9666, "iter_tflops": 34.36902220876967, "iter_time": 0.60028165435791, "loss": 0.30466535687446594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88519731664377, "step_time": 0.44962416458129884} +{"epoch": 0, "iter": 9667, "iter_tflops": 43.888208823969315, "iter_time": 0.4700828323364258, "loss": 0.3344254493713379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.388148230254146, "step_time": 0.4353639945983887} +{"epoch": 0, "iter": 9668, "iter_tflops": 48.15060898286099, "iter_time": 0.4284700431823731, "loss": 0.5234901309013367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53392749840265, "step_time": 0.39271941947937017} +{"epoch": 0, "iter": 9669, "iter_tflops": 25.41541271015988, "iter_time": 0.8117552032470703, "loss": 0.7368029952049255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.796332381639697, "step_time": 0.7699222869873048} +{"epoch": 0, "iter": 9670, "iter_tflops": 20.913229040305048, "iter_time": 0.9865092315673829, "loss": 0.7390401363372803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.549864679280955, "step_time": 0.8074834747314453} +{"epoch": 0, "iter": 9671, "iter_tflops": 32.88959819688002, "iter_time": 0.6272832336425782, "loss": 0.7182179093360901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.024469577963, "step_time": 0.572696662902832} +{"epoch": 0, "iter": 9672, "iter_tflops": 37.40962989883275, "iter_time": 0.5514915161132813, "loss": 0.6465502977371216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.82512301232649, "step_time": 0.5053528804779053} +{"epoch": 0, "iter": 9673, "iter_tflops": 18.898447366896672, "iter_time": 0.5739427337646484, "loss": 0.0465799942612648, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 21.134259018787322, "step_time": 0.51322483253479} +{"epoch": 0, "iter": 9674, "iter_tflops": 19.861805287498274, "iter_time": 0.5461047668457031, "loss": 0.05380741134285927, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 21.73183143610584, "step_time": 0.4991124000549317} +{"epoch": 0, "iter": 9675, "iter_tflops": 20.631390049403567, "iter_time": 0.5257341613769531, "loss": 0.02909119613468647, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 22.76829022763812, "step_time": 0.47639179039001467} +{"epoch": 0, "iter": 9676, "iter_tflops": 22.47341859276873, "iter_time": 0.4826424827575683, "loss": 0.03561684489250183, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 24.802952605073255, "step_time": 0.4373119087219238} +{"epoch": 0, "iter": 9677, "iter_tflops": 33.9751319212083, "iter_time": 0.607241012573242, "loss": 0.14084987342357635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.2049417420363, "step_time": 0.5545256233215332} +{"epoch": 0, "iter": 9678, "iter_tflops": 29.860020799437642, "iter_time": 0.6909269638061524, "loss": 0.15938019752502441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72007664958298, "step_time": 0.5469525871276855} +{"epoch": 0, "iter": 9679, "iter_tflops": 36.10075177355022, "iter_time": 0.5714865341186524, "loss": 0.1704564094543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.531811596939725, "step_time": 0.5218858604431152} +{"epoch": 0, "iter": 9680, "iter_tflops": 39.581287465981454, "iter_time": 0.521233512878418, "loss": 0.25627580285072327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.433856308139134, "step_time": 0.47500027084350593} +{"epoch": 0, "iter": 9681, "iter_tflops": 13.024322465984998, "iter_time": 1.223127395629883, "loss": 0.40867918729782104, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 13.883469616228782, "step_time": 1.1474369201660155} +{"epoch": 0, "iter": 9682, "iter_tflops": 23.79548410528421, "iter_time": 0.6694718017578124, "loss": 0.4499433934688568, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.746989734092203, "step_time": 0.5541590881347657} +{"epoch": 0, "iter": 9683, "iter_tflops": 27.98243603637697, "iter_time": 0.5693001708984375, "loss": 0.3579724133014679, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.767212283885303, "step_time": 0.5351661911010742} +{"epoch": 0, "iter": 9684, "iter_tflops": 28.291366042971045, "iter_time": 0.5630836486816406, "loss": 0.4586029648780823, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.133497986449065, "step_time": 0.5286610145568847} +{"epoch": 0, "iter": 9685, "iter_tflops": 34.26487725672225, "iter_time": 0.6021061553955078, "loss": 0.01429653912782669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.891968164339865, "step_time": 0.5592299499511719} +{"epoch": 0, "iter": 9686, "iter_tflops": 21.89607885910112, "iter_time": 0.9422277679443359, "loss": 0.05203654617071152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.28846897717354, "step_time": 0.7847963123321533} +{"epoch": 0, "iter": 9687, "iter_tflops": 51.60137617148015, "iter_time": 0.39981673049926764, "loss": 0.05285104736685753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.43208157257832, "step_time": 0.36559157371520995} +{"epoch": 0, "iter": 9688, "iter_tflops": 49.2502151872539, "iter_time": 0.41890362167358397, "loss": 0.03517472371459007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.11066442373907, "step_time": 0.38127592277526856} +{"epoch": 0, "iter": 9689, "iter_tflops": 34.40027272016565, "iter_time": 0.5997363357543946, "loss": 0.3503984808921814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.008283190712845, "step_time": 0.5574723205566406} +{"epoch": 0, "iter": 9690, "iter_tflops": 36.59386644790659, "iter_time": 0.5637855606079101, "loss": 0.2713243067264557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40989968396382, "step_time": 0.5105455265045166} +{"epoch": 0, "iter": 9691, "iter_tflops": 34.232594124793444, "iter_time": 0.602673973083496, "loss": 0.3497370183467865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18819515942348, "step_time": 0.5547753372192383} +{"epoch": 0, "iter": 9692, "iter_tflops": 43.07818396541514, "iter_time": 0.47892208099365235, "loss": 0.46077585220336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.036785135369485, "step_time": 0.4386161479949951} +{"epoch": 0, "iter": 9693, "iter_tflops": 32.553374904445924, "iter_time": 0.6337620468139649, "loss": 0.22334854304790497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.045574375569174, "step_time": 0.5723613471984863} +{"epoch": 0, "iter": 9694, "iter_tflops": 37.203288695514004, "iter_time": 0.5545502624511719, "loss": 0.3600407838821411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62102995478743, "step_time": 0.4956891632080078} +{"epoch": 0, "iter": 9695, "iter_tflops": 42.007789060286534, "iter_time": 0.4911254310607911, "loss": 0.427898108959198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90440303720092, "step_time": 0.44943604850769053} +{"epoch": 0, "iter": 9696, "iter_tflops": 38.69532471945813, "iter_time": 0.5331676025390625, "loss": 0.36113378405570984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.418393088253104, "step_time": 0.4863714065551758} +{"epoch": 0, "iter": 9697, "iter_tflops": 19.47269066837583, "iter_time": 1.0594885864257813, "loss": 0.22827833890914917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.78264291687317, "step_time": 0.9927078857421874} +{"epoch": 0, "iter": 9698, "iter_tflops": 16.504927191917332, "iter_time": 1.2499960327148436, "loss": 0.26621735095977783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.5288857224312, "step_time": 1.0564398708343505} +{"epoch": 0, "iter": 9699, "iter_tflops": 45.75123748331858, "iter_time": 0.4509406661987304, "loss": 0.35621634125709534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.588373282752656, "step_time": 0.4160469913482666} +{"epoch": 0, "iter": 9700, "iter_tflops": 46.5879340930987, "iter_time": 0.44284199142456054, "loss": 0.292915403842926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.126222081076165, "step_time": 0.41158285331726074} +{"epoch": 0, "iter": 9701, "iter_tflops": 24.362333201227646, "iter_time": 0.8468439102172851, "loss": 0.388461172580719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.634985247444437, "step_time": 0.8048022384643554} +{"epoch": 0, "iter": 9702, "iter_tflops": 10.903143258514403, "iter_time": 1.8922152099609373, "loss": 0.5498428344726562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.563675260753538, "step_time": 1.64212247467041} +{"epoch": 0, "iter": 9703, "iter_tflops": 28.53304661022339, "iter_time": 0.7230596084594727, "loss": 0.4020323157310486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.957619032512333, "step_time": 0.6455766773223877} +{"epoch": 0, "iter": 9704, "iter_tflops": 47.83944116058388, "iter_time": 0.4312569923400879, "loss": 0.5159503221511841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.92747401084931, "step_time": 0.3973059329986572} +{"epoch": 0, "iter": 9705, "iter_tflops": 16.158487000765827, "iter_time": 1.0646165313720704, "loss": 0.32639986276626587, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 16.84474387375535, "step_time": 1.021243927001953} +{"epoch": 0, "iter": 9706, "iter_tflops": 10.127592773143595, "iter_time": 1.698586502075195, "loss": 0.3460305333137512, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 11.961869367873126, "step_time": 1.4381190643310546} +{"epoch": 0, "iter": 9707, "iter_tflops": 24.462336964958293, "iter_time": 0.7032276763916016, "loss": 0.5038393139839172, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 26.202674977819584, "step_time": 0.6565204658508301} +{"epoch": 0, "iter": 9708, "iter_tflops": 26.413223495933252, "iter_time": 0.651287124633789, "loss": 0.2977234125137329, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 28.32791096328123, "step_time": 0.6072665367126465} +{"epoch": 0, "iter": 9709, "iter_tflops": 19.874242005038084, "iter_time": 1.03808203125, "loss": 0.26568758487701416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.17860866258423, "step_time": 0.9741477279663086} +{"epoch": 0, "iter": 9710, "iter_tflops": 17.333435673411927, "iter_time": 1.1902483673095703, "loss": 0.24521303176879883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.0682663086711, "step_time": 0.8943495464324952} +{"epoch": 0, "iter": 9711, "iter_tflops": 40.27517393488891, "iter_time": 0.5122533683776855, "loss": 0.31903529167175293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4768381100449, "step_time": 0.46386151504516604} +{"epoch": 0, "iter": 9712, "iter_tflops": 39.98948575683831, "iter_time": 0.5159129486083985, "loss": 0.2607003152370453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59494637445615, "step_time": 0.47324507141113287} +{"epoch": 0, "iter": 9713, "iter_tflops": 25.42181437422029, "iter_time": 0.8115507888793946, "loss": 0.3517087996006012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.095669842621827, "step_time": 0.7614166259765626} +{"epoch": 0, "iter": 9714, "iter_tflops": 8.664619887493679, "iter_time": 2.3810731201171875, "loss": 0.39929118752479553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.09441523959402, "step_time": 2.0438126449584963} +{"epoch": 0, "iter": 9715, "iter_tflops": 12.212044729090403, "iter_time": 1.689405334472656, "loss": 0.377543568611145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.517124055613161, "step_time": 1.3295694122314452} +{"epoch": 0, "iter": 9716, "iter_tflops": 46.918174942586006, "iter_time": 0.43972497940063476, "loss": 0.324759304523468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.02477118233803, "step_time": 0.40433485603332525} +{"epoch": 0, "iter": 9717, "iter_tflops": 22.930438519341788, "iter_time": 0.6840121994018554, "loss": 0.22661906480789185, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.354971967178717, "step_time": 0.6440040130615234} +{"epoch": 0, "iter": 9718, "iter_tflops": 7.391288074161231, "iter_time": 2.122052276611328, "loss": 0.4339572787284851, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 10.204523240710618, "step_time": 1.537034049987793} +{"epoch": 0, "iter": 9719, "iter_tflops": 23.591480536713945, "iter_time": 0.6648459243774414, "loss": 0.49897855520248413, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.41573935169566, "step_time": 0.6171254539489746} +{"epoch": 0, "iter": 9720, "iter_tflops": 24.18604450755214, "iter_time": 0.6485020599365234, "loss": 0.2621905207633972, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.847491393859773, "step_time": 0.6068170967102051} +{"epoch": 0, "iter": 9721, "iter_tflops": 24.714237049799177, "iter_time": 0.834785774230957, "loss": 0.057487983256578445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.607442165297915, "step_time": 0.7753880805969238} +{"epoch": 0, "iter": 9722, "iter_tflops": 10.803174085760919, "iter_time": 1.909725173950195, "loss": 0.04461224004626274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.703235755490315, "step_time": 1.762853790283203} +{"epoch": 0, "iter": 9723, "iter_tflops": 14.391292657881579, "iter_time": 1.4335816802978516, "loss": 0.014075440354645252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88640226856997, "step_time": 1.2217577896118164} +{"epoch": 0, "iter": 9724, "iter_tflops": 28.491966148616346, "iter_time": 0.72410213470459, "loss": 0.04500432312488556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.1544944463542, "step_time": 0.5868692989349366} +{"epoch": 0, "iter": 9725, "iter_tflops": 18.859938183998437, "iter_time": 0.7774550094604493, "loss": 0.49237126111984253, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.902272277006848, "step_time": 0.7367376556396485} +{"epoch": 0, "iter": 9726, "iter_tflops": 9.819044705785847, "iter_time": 1.4932973480224612, "loss": 0.43825915455818176, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 11.821682964577699, "step_time": 1.2403270721435546} +{"epoch": 0, "iter": 9727, "iter_tflops": 22.433768370241612, "iter_time": 0.6536018905639649, "loss": 0.38158702850341797, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.14461711611881, "step_time": 0.6072887115478516} +{"epoch": 0, "iter": 9728, "iter_tflops": 22.4902548873755, "iter_time": 0.651960304260254, "loss": 0.42335420846939087, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.150319395880125, "step_time": 0.607145320892334} +{"epoch": 0, "iter": 9729, "iter_tflops": 20.947447886165136, "iter_time": 0.9848977127075196, "loss": 0.6832605004310608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.764965958136997, "step_time": 0.9062650718688965} +{"epoch": 0, "iter": 9730, "iter_tflops": 12.940206120609039, "iter_time": 1.5943404083251955, "loss": 0.7897193431854248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.163717375155795, "step_time": 1.2763829650878906} +{"epoch": 0, "iter": 9731, "iter_tflops": 30.739890858893034, "iter_time": 0.6711505126953125, "loss": 0.7972899675369263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.663865605620764, "step_time": 0.6128557472229004} +{"epoch": 0, "iter": 9732, "iter_tflops": 35.126499397982144, "iter_time": 0.5873370208740234, "loss": 0.8246828317642212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.35193709473689, "step_time": 0.5379413681030274} +{"epoch": 0, "iter": 9733, "iter_tflops": 17.67821612216486, "iter_time": 1.1670348052978516, "loss": 0.6606968641281128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.693168747071645, "step_time": 1.103670211791992} +{"epoch": 0, "iter": 9734, "iter_tflops": 19.2229712667537, "iter_time": 1.0732520599365234, "loss": 0.7225903272628784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.387558387890532, "step_time": 0.8126458320617675} +{"epoch": 0, "iter": 9735, "iter_tflops": 41.084357501321804, "iter_time": 0.5021641998291015, "loss": 0.629279375076294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25074605646912, "step_time": 0.46623154067993167} +{"epoch": 0, "iter": 9736, "iter_tflops": 43.89423643024702, "iter_time": 0.4700182800292969, "loss": 0.6923592686653137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.316053970864274, "step_time": 0.43602734756469724} +{"epoch": 0, "iter": 9737, "iter_tflops": 27.11752599187783, "iter_time": 0.7608029403686524, "loss": 0.6645073294639587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.569506370526693, "step_time": 0.7221368560791015} +{"epoch": 0, "iter": 9738, "iter_tflops": 12.989380606326936, "iter_time": 1.5883046417236328, "loss": 0.8112886548042297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.592579200717275, "step_time": 1.1096412868499756} +{"epoch": 0, "iter": 9739, "iter_tflops": 37.15458177953346, "iter_time": 0.5552772369384766, "loss": 0.8668735027313232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.422266495600056, "step_time": 0.5103893299102784} +{"epoch": 0, "iter": 9740, "iter_tflops": 36.93855194759715, "iter_time": 0.5585246963500977, "loss": 0.8023152947425842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.82060410616451, "step_time": 0.5181009674072266} +{"epoch": 0, "iter": 9741, "iter_tflops": 19.056998141922016, "iter_time": 1.0825993347167968, "loss": 0.4130746126174927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.379483572301126, "step_time": 1.0123462371826173} +{"epoch": 0, "iter": 9742, "iter_tflops": 12.306680689884027, "iter_time": 1.6764141387939453, "loss": 0.44349536299705505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.639723870122012, "step_time": 1.1695814323425293} +{"epoch": 0, "iter": 9743, "iter_tflops": 38.25566185317201, "iter_time": 0.539295166015625, "loss": 0.2650708258152008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86124610143632, "step_time": 0.49284470558166504} +{"epoch": 0, "iter": 9744, "iter_tflops": 40.43305731732996, "iter_time": 0.510253116607666, "loss": 0.3160698115825653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66452985382538, "step_time": 0.4724909114837647} +{"epoch": 0, "iter": 9745, "iter_tflops": 19.64929896928032, "iter_time": 1.0499658813476564, "loss": 0.6799951791763306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.922754686389876, "step_time": 0.9860600967407226} +{"epoch": 0, "iter": 9746, "iter_tflops": 14.479014320990865, "iter_time": 1.4248962707519532, "loss": 0.9093257188796997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.413182093454964, "step_time": 1.1204523696899413} +{"epoch": 0, "iter": 9747, "iter_tflops": 32.16185713836849, "iter_time": 0.6414770584106445, "loss": 0.8307687640190125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.73330552898778, "step_time": 0.5939858932495117} +{"epoch": 0, "iter": 9748, "iter_tflops": 35.112920545378074, "iter_time": 0.5875641555786133, "loss": 0.7857950329780579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.174925228764025, "step_time": 0.5404357280731201} +{"epoch": 0, "iter": 9749, "iter_tflops": 32.56478278645667, "iter_time": 0.6335400314331054, "loss": 0.22862528264522552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.350009574422174, "step_time": 0.583623420715332} +{"epoch": 0, "iter": 9750, "iter_tflops": 11.912373566260355, "iter_time": 1.7319045104980466, "loss": 0.27828845381736755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.374330477697448, "step_time": 1.43527335357666} +{"epoch": 0, "iter": 9751, "iter_tflops": 13.84675945322599, "iter_time": 1.4899582519531251, "loss": 0.2601948082447052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.57462673321697, "step_time": 1.2447395553588867} +{"epoch": 0, "iter": 9752, "iter_tflops": 34.028692539970336, "iter_time": 0.6062852249145507, "loss": 0.132034569978714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.40092812150998, "step_time": 0.5516198272705077} +{"epoch": 0, "iter": 9753, "iter_tflops": 13.791227367005748, "iter_time": 1.17293994140625, "loss": 0.3253149688243866, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 14.679230706599942, "step_time": 1.101984275817871} +{"epoch": 0, "iter": 9754, "iter_tflops": 17.90507825184094, "iter_time": 0.9034465637207031, "loss": 0.30413222312927246, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 19.896574107923158, "step_time": 0.8130184288024902} +{"epoch": 0, "iter": 9755, "iter_tflops": 25.443124385731235, "iter_time": 0.6357820358276367, "loss": 0.5018202662467957, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 27.37714906629733, "step_time": 0.5908680038452149} +{"epoch": 0, "iter": 9756, "iter_tflops": 24.306172123045673, "iter_time": 0.6655215530395509, "loss": 0.46483317017555237, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 26.13807546480679, "step_time": 0.618878059387207} +{"epoch": 0, "iter": 9757, "iter_tflops": 20.473536066897744, "iter_time": 1.0076956634521483, "loss": 0.3721332550048828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.111391557569657, "step_time": 0.9330526962280273} +{"epoch": 0, "iter": 9758, "iter_tflops": 17.44855809139459, "iter_time": 1.1823953247070313, "loss": 0.55608069896698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.91311985370014, "step_time": 0.9004052543640138} +{"epoch": 0, "iter": 9759, "iter_tflops": 34.975293210484594, "iter_time": 0.5898762130737305, "loss": 0.600449800491333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.54659152824425, "step_time": 0.5352248458862305} +{"epoch": 0, "iter": 9760, "iter_tflops": 41.27487915899425, "iter_time": 0.49984624862670896, "loss": 0.43448853492736816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.134444051937024, "step_time": 0.4571030826568604} +{"epoch": 0, "iter": 9761, "iter_tflops": 17.782716223111667, "iter_time": 1.160176727294922, "loss": 0.6139323115348816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.020069184406985, "step_time": 1.084701286315918} +{"epoch": 0, "iter": 9762, "iter_tflops": 18.55833408592841, "iter_time": 1.1116888732910157, "loss": 0.7060814499855042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.33225107603239, "step_time": 0.7834914474487306} +{"epoch": 0, "iter": 9763, "iter_tflops": 41.44970652734244, "iter_time": 0.4977379875183106, "loss": 0.647347092628479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85989669319964, "step_time": 0.45990060234069824} +{"epoch": 0, "iter": 9764, "iter_tflops": 42.368318131573986, "iter_time": 0.4869462471008301, "loss": 0.6371804475784302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59717092500415, "step_time": 0.4524643325805664} +{"epoch": 0, "iter": 9765, "iter_tflops": 27.102900150452314, "iter_time": 0.7612135009765625, "loss": 0.3997821807861328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.65465209917708, "step_time": 0.7199910659790039} +{"epoch": 0, "iter": 9766, "iter_tflops": 16.4229265696645, "iter_time": 1.2562373352050782, "loss": 0.5035399198532104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.59704753682493, "step_time": 1.1093746719360351} +{"epoch": 0, "iter": 9767, "iter_tflops": 45.17041377824564, "iter_time": 0.456739086151123, "loss": 0.413926362991333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04995906453551, "step_time": 0.42061387825012203} +{"epoch": 0, "iter": 9768, "iter_tflops": 50.54616682696647, "iter_time": 0.4081633644104004, "loss": 0.4303589463233948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.47113873741962, "step_time": 0.37875274848937984} +{"epoch": 0, "iter": 9769, "iter_tflops": 34.68820613582701, "iter_time": 0.5947581558227539, "loss": 0.3787490725517273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.06030018344588, "step_time": 0.5566898651123047} +{"epoch": 0, "iter": 9770, "iter_tflops": 12.576428426172416, "iter_time": 1.640457275390625, "loss": 0.27108481526374817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.684596179608524, "step_time": 1.315372947692871} +{"epoch": 0, "iter": 9771, "iter_tflops": 35.21118038023155, "iter_time": 0.5859245071411132, "loss": 0.35093116760253906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.72605457332424, "step_time": 0.5327445240020752} +{"epoch": 0, "iter": 9772, "iter_tflops": 35.550131552898556, "iter_time": 0.5803380355834961, "loss": 0.3641190230846405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.65415680421606, "step_time": 0.5337354431152344} +{"epoch": 0, "iter": 9773, "iter_tflops": 11.498275011552236, "iter_time": 1.471083969116211, "loss": 0.36556947231292725, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 12.200435499399797, "step_time": 1.386420021057129} +{"epoch": 0, "iter": 9774, "iter_tflops": 13.763618987019829, "iter_time": 1.2289593353271484, "loss": 0.3962947726249695, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 15.349655442072258, "step_time": 1.1019744453430176} +{"epoch": 0, "iter": 9775, "iter_tflops": 24.632388757356132, "iter_time": 0.6866945877075196, "loss": 0.23443201184272766, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 26.696607398897573, "step_time": 0.6335984115600586} +{"epoch": 0, "iter": 9776, "iter_tflops": 25.198458831923254, "iter_time": 0.6712683563232422, "loss": 0.3973340094089508, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 27.136665091567362, "step_time": 0.6233237571716309} +{"epoch": 0, "iter": 9777, "iter_tflops": 15.52792120199543, "iter_time": 1.3286449127197266, "loss": 0.423468679189682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.314488786664906, "step_time": 1.2645871887207032} +{"epoch": 0, "iter": 9778, "iter_tflops": 17.266824444690236, "iter_time": 1.1948400573730469, "loss": 0.27606257796287537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.730299293212024, "step_time": 0.949415985107422} +{"epoch": 0, "iter": 9779, "iter_tflops": 47.82733661914612, "iter_time": 0.43136613845825195, "loss": 0.3667835295200348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.25053712581756, "step_time": 0.3948494052886963} +{"epoch": 0, "iter": 9780, "iter_tflops": 47.81777602781268, "iter_time": 0.43145238494873045, "loss": 0.4429450035095215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.978991677435914, "step_time": 0.3969121532440186} +{"epoch": 0, "iter": 9781, "iter_tflops": 28.23170809143007, "iter_time": 0.7307773742675782, "loss": 0.6537290811538696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.873500150229766, "step_time": 0.690615207672119} +{"epoch": 0, "iter": 9782, "iter_tflops": 19.938193226750247, "iter_time": 1.034752410888672, "loss": 0.7385160326957703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.915298474476526, "step_time": 0.862673469543457} +{"epoch": 0, "iter": 9783, "iter_tflops": 43.202683722336566, "iter_time": 0.47754194259643556, "loss": 0.5896104574203491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67568231021395, "step_time": 0.4420094680786133} +{"epoch": 0, "iter": 9784, "iter_tflops": 45.925858200559375, "iter_time": 0.4492260856628417, "loss": 0.6835753321647644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.481038384320705, "step_time": 0.4169494857788086} +{"epoch": 0, "iter": 9785, "iter_tflops": 38.64465884215786, "iter_time": 0.5338666229248047, "loss": 0.6155148148536682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.11419393380475, "step_time": 0.48988456344604486} +{"epoch": 0, "iter": 9786, "iter_tflops": 36.5269369346707, "iter_time": 0.564818603515625, "loss": 0.48563897609710693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.81572716935993, "step_time": 0.5054692134857178} +{"epoch": 0, "iter": 9787, "iter_tflops": 38.53275712802911, "iter_time": 0.5354170074462891, "loss": 0.39438170194625854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05068124616312, "step_time": 0.4906244773864746} +{"epoch": 0, "iter": 9788, "iter_tflops": 36.78539751889158, "iter_time": 0.5608500900268555, "loss": 0.49609917402267456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37675089116157, "step_time": 0.5109646778106689} +{"epoch": 0, "iter": 9789, "iter_tflops": 19.307982596600155, "iter_time": 1.068526626586914, "loss": 0.3319680392742157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.881171313088572, "step_time": 0.9880237655639648} +{"epoch": 0, "iter": 9790, "iter_tflops": 17.859400691395603, "iter_time": 1.1551951751708984, "loss": 0.29962119460105896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.50852158887381, "step_time": 0.959205560684204} +{"epoch": 0, "iter": 9791, "iter_tflops": 41.027649671342076, "iter_time": 0.5028582839965821, "loss": 0.22091534733772278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.128814005329346, "step_time": 0.45716010856628414} +{"epoch": 0, "iter": 9792, "iter_tflops": 38.56631090945099, "iter_time": 0.5349511795043946, "loss": 0.34053418040275574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25531127486959, "step_time": 0.48824852752685544} +{"epoch": 0, "iter": 9793, "iter_tflops": 22.848986194560368, "iter_time": 0.9029325561523438, "loss": 0.21909622848033905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.878931412149708, "step_time": 0.8292596321105956} +{"epoch": 0, "iter": 9794, "iter_tflops": 45.162645351621016, "iter_time": 0.4568176498413085, "loss": 0.15190665423870087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.748595555043465, "step_time": 0.4147070541381836} +{"epoch": 0, "iter": 9795, "iter_tflops": 53.267052218732, "iter_time": 0.3873143463134766, "loss": 0.16606631875038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.882933372837165, "step_time": 0.35642791938781737} +{"epoch": 0, "iter": 9796, "iter_tflops": 55.042911330485175, "iter_time": 0.37481835556030274, "loss": 0.18045003712177277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.73902610549045, "step_time": 0.3453536968231201} +{"epoch": 0, "iter": 9797, "iter_tflops": 30.874373967881045, "iter_time": 0.6682271041870117, "loss": 0.6772322654724121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.85040809422022, "step_time": 0.6280315742492676} +{"epoch": 0, "iter": 9798, "iter_tflops": 13.254154708499845, "iter_time": 1.5565755767822265, "loss": 0.6503474116325378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.49154774720791, "step_time": 1.2510101432800291} +{"epoch": 0, "iter": 9799, "iter_tflops": 40.72036434759422, "iter_time": 0.5066529693603516, "loss": 0.49058955907821655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8700604920562, "step_time": 0.4702772979736328} +{"epoch": 0, "iter": 9800, "iter_tflops": 42.683119168459704, "iter_time": 0.4833548698425293, "loss": 0.7974753379821777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.93422440405734, "step_time": 0.4491442661285401} +{"epoch": 0, "iter": 9801, "iter_tflops": 46.76028980827635, "iter_time": 0.4412097015380859, "loss": 0.5092731714248657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.064720523744775, "step_time": 0.4040185337066651} +{"epoch": 0, "iter": 9802, "iter_tflops": 38.2878379060794, "iter_time": 0.5388419570922852, "loss": 0.4829547107219696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.232301212201605, "step_time": 0.48851454734802247} +{"epoch": 0, "iter": 9803, "iter_tflops": 40.5689334282406, "iter_time": 0.5085441436767578, "loss": 0.5451631546020508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.404464156297614, "step_time": 0.46461755371093755} +{"epoch": 0, "iter": 9804, "iter_tflops": 40.27370045723152, "iter_time": 0.5122721099853516, "loss": 0.5879824161529541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05172507015335, "step_time": 0.4683379249572754} +{"epoch": 0, "iter": 9805, "iter_tflops": 35.90614520532569, "iter_time": 0.5745839157104492, "loss": 0.14568263292312622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85605363758179, "step_time": 0.5176401481628419} +{"epoch": 0, "iter": 9806, "iter_tflops": 39.0232607782097, "iter_time": 0.5286870727539063, "loss": 0.13166244328022003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53400935844324, "step_time": 0.4739074993133545} +{"epoch": 0, "iter": 9807, "iter_tflops": 40.93549260675718, "iter_time": 0.5039903564453125, "loss": 0.10273215919733047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.977440742971005, "step_time": 0.4586986980438232} +{"epoch": 0, "iter": 9808, "iter_tflops": 43.956460991163375, "iter_time": 0.46935292434692377, "loss": 0.18876849114894867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.23083722948856, "step_time": 0.42775731658935545} +{"epoch": 0, "iter": 9809, "iter_tflops": 19.460380411714564, "iter_time": 1.0601587982177734, "loss": 0.7453339695930481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.0676868097519, "step_time": 0.9792766380310058} +{"epoch": 0, "iter": 9810, "iter_tflops": 15.427142222873478, "iter_time": 1.3373243865966795, "loss": 0.6941309571266174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.641969001047226, "step_time": 1.1694325904846192} +{"epoch": 0, "iter": 9811, "iter_tflops": 33.42942933183136, "iter_time": 0.6171536254882811, "loss": 0.7037272453308105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.16659781589299, "step_time": 0.5704460678100586} +{"epoch": 0, "iter": 9812, "iter_tflops": 32.85163268696269, "iter_time": 0.6280081634521485, "loss": 0.5363401770591736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.60345829330362, "step_time": 0.579468807220459} +{"epoch": 0, "iter": 9813, "iter_tflops": 20.10362788644909, "iter_time": 1.026237335205078, "loss": 0.6077786684036255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.499882004548816, "step_time": 0.9595910110473632} +{"epoch": 0, "iter": 9814, "iter_tflops": 21.61105697258252, "iter_time": 0.9546545333862304, "loss": 0.6091252565383911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.713867833501517, "step_time": 0.7722990036010742} +{"epoch": 0, "iter": 9815, "iter_tflops": 43.492842715918705, "iter_time": 0.4743560600280762, "loss": 0.4259565472602844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00478521241136, "step_time": 0.4389147491455078} +{"epoch": 0, "iter": 9816, "iter_tflops": 44.90789715013747, "iter_time": 0.4594090309143066, "loss": 0.4891608655452728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.48276433649678, "step_time": 0.42553459548950195} +{"epoch": 0, "iter": 9817, "iter_tflops": 37.67777268881254, "iter_time": 0.5475666961669922, "loss": 0.5692794919013977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.47491030332923, "step_time": 0.5097254905700684} +{"epoch": 0, "iter": 9818, "iter_tflops": 46.589817939030226, "iter_time": 0.44282408523559574, "loss": 0.59113609790802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.497333619690856, "step_time": 0.4085580768585205} +{"epoch": 0, "iter": 9819, "iter_tflops": 45.903229322701534, "iter_time": 0.4494475402832031, "loss": 0.673580527305603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.8615623253761, "step_time": 0.4137674903869629} +{"epoch": 0, "iter": 9820, "iter_tflops": 45.27406532286865, "iter_time": 0.455693416595459, "loss": 0.7647528648376465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91576361944655, "step_time": 0.4217677898406983} +{"epoch": 0, "iter": 9821, "iter_tflops": 29.217572618603906, "iter_time": 0.706119354248047, "loss": 0.7141706347465515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.044365047239662, "step_time": 0.6645680618286133} +{"epoch": 0, "iter": 9822, "iter_tflops": 14.540748268387206, "iter_time": 1.4188467559814455, "loss": 0.6854853630065918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.01627826807747, "step_time": 1.2881328086853028} +{"epoch": 0, "iter": 9823, "iter_tflops": 33.61844036997602, "iter_time": 0.613683837890625, "loss": 0.567388117313385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.431629661422186, "step_time": 0.4862196826934814} +{"epoch": 0, "iter": 9824, "iter_tflops": 44.76338840788472, "iter_time": 0.4608921318054199, "loss": 0.60972660779953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.18153746061691, "step_time": 0.4281950016021729} +{"epoch": 0, "iter": 9825, "iter_tflops": 27.847797900469235, "iter_time": 0.7408518829345704, "loss": 0.0298662930727005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.533743063339838, "step_time": 0.6985600662231444} +{"epoch": 0, "iter": 9826, "iter_tflops": 22.66902563181167, "iter_time": 0.9101005859375001, "loss": 0.05732496455311775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.563767788665885, "step_time": 0.7484859714508056} +{"epoch": 0, "iter": 9827, "iter_tflops": 48.20365362897211, "iter_time": 0.42799854278564453, "loss": 0.06981107592582703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37827100198867, "step_time": 0.3938864936828613} +{"epoch": 0, "iter": 9828, "iter_tflops": 53.032468519023446, "iter_time": 0.38902759170532225, "loss": 0.04114523530006409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.75467761139573, "step_time": 0.3572194385528564} +{"epoch": 0, "iter": 9829, "iter_tflops": 32.2275132798594, "iter_time": 0.6401701965332032, "loss": 0.11951503157615662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.3802818654261, "step_time": 0.6000850601196289} +{"epoch": 0, "iter": 9830, "iter_tflops": 11.146733863354138, "iter_time": 1.850864456176758, "loss": 0.09072619676589966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.41898773140175, "step_time": 1.4308281478881837} +{"epoch": 0, "iter": 9831, "iter_tflops": 13.22261009803499, "iter_time": 1.5602890319824219, "loss": 0.09745511412620544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.044107552757623, "step_time": 1.210453140258789} +{"epoch": 0, "iter": 9832, "iter_tflops": 23.61450926678078, "iter_time": 0.873661750793457, "loss": 0.14257493615150452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.585315524769037, "step_time": 0.6973423519134522} +{"epoch": 0, "iter": 9833, "iter_tflops": 20.08373739469909, "iter_time": 0.7605938568115235, "loss": 0.4346896708011627, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 21.180965925424665, "step_time": 0.7211931381225586} +{"epoch": 0, "iter": 9834, "iter_tflops": 6.7083592781560055, "iter_time": 2.2770943908691406, "loss": 0.46687138080596924, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.611806766587042, "step_time": 1.7737935485839842} +{"epoch": 0, "iter": 9835, "iter_tflops": 8.613187501469872, "iter_time": 1.7735092010498048, "loss": 0.40646716952323914, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.041676362758253, "step_time": 1.383446388244629} +{"epoch": 0, "iter": 9836, "iter_tflops": 18.589644910500947, "iter_time": 0.8217245330810548, "loss": 0.3435133993625641, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 20.539081621154928, "step_time": 0.7437317581176758} +{"epoch": 0, "iter": 9837, "iter_tflops": 23.60242862726154, "iter_time": 0.6298888549804688, "loss": 0.3405477702617645, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.309194300655605, "step_time": 0.5874113006591797} +{"epoch": 0, "iter": 9838, "iter_tflops": 21.080093959204852, "iter_time": 0.7052580871582032, "loss": 0.3957694172859192, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 23.120376482417093, "step_time": 0.643021827697754} +{"epoch": 0, "iter": 9839, "iter_tflops": 24.43775616278058, "iter_time": 0.6083580932617186, "loss": 0.45639488101005554, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.033209933527342, "step_time": 0.5710746688842774} +{"epoch": 0, "iter": 9840, "iter_tflops": 26.611573727684057, "iter_time": 0.5586631927490234, "loss": 0.41446325182914734, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 28.36825157916468, "step_time": 0.524068489074707} +{"epoch": 0, "iter": 9841, "iter_tflops": 29.343641005110424, "iter_time": 0.7030856704711914, "loss": 0.7250415086746216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.340062999351918, "step_time": 0.6582977676391601} +{"epoch": 0, "iter": 9842, "iter_tflops": 20.197922712983488, "iter_time": 1.0214463043212891, "loss": 0.5920595526695251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.597744739863764, "step_time": 0.838739231109619} +{"epoch": 0, "iter": 9843, "iter_tflops": 42.62877738124947, "iter_time": 0.4839710350036621, "loss": 0.9099225401878357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85098090902112, "step_time": 0.44995969772338873} +{"epoch": 0, "iter": 9844, "iter_tflops": 46.61134930716477, "iter_time": 0.44261952972412105, "loss": 0.661514401435852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.417019528952856, "step_time": 0.4092089080810547} +{"epoch": 0, "iter": 9845, "iter_tflops": 37.427714888657356, "iter_time": 0.5512250366210938, "loss": 0.2245907187461853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.02809808418759, "step_time": 0.5154152832031249} +{"epoch": 0, "iter": 9846, "iter_tflops": 29.788317936003924, "iter_time": 0.6925900802612305, "loss": 0.20404291152954102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.742727628936414, "step_time": 0.5466243381500244} +{"epoch": 0, "iter": 9847, "iter_tflops": 40.63238810467082, "iter_time": 0.5077499618530273, "loss": 0.1673731803894043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31779305428103, "step_time": 0.4655261936187744} +{"epoch": 0, "iter": 9848, "iter_tflops": 39.285687035423464, "iter_time": 0.5251554718017578, "loss": 0.1529380828142166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.352794256672134, "step_time": 0.4758884372711182} +{"epoch": 0, "iter": 9849, "iter_tflops": 22.827649186006923, "iter_time": 0.9037765274047852, "loss": 0.03363565355539322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.372220528934342, "step_time": 0.8465003623962403} +{"epoch": 0, "iter": 9850, "iter_tflops": 9.9841873213741, "iter_time": 2.0663768463134766, "loss": 0.021899092942476273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.36000973913107, "step_time": 1.8161158294677735} +{"epoch": 0, "iter": 9851, "iter_tflops": 13.031682848819296, "iter_time": 1.5831488342285156, "loss": 0.051169320940971375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.927366950083297, "step_time": 1.3820986366271972} +{"epoch": 0, "iter": 9852, "iter_tflops": 39.88453703763416, "iter_time": 0.5172704772949218, "loss": 0.03726033866405487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3780018724497, "step_time": 0.46489460182189934} +{"epoch": 0, "iter": 9853, "iter_tflops": 13.336208861307167, "iter_time": 1.1883800659179689, "loss": 0.3232085704803467, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 14.371807528739048, "step_time": 1.102748191833496} +{"epoch": 0, "iter": 9854, "iter_tflops": 16.969221638473336, "iter_time": 0.9339547271728514, "loss": 0.5068947076797485, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 20.41974532230736, "step_time": 0.7761352806091308} +{"epoch": 0, "iter": 9855, "iter_tflops": 22.780754195886054, "iter_time": 0.6956962280273437, "loss": 0.43703678250312805, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 24.422583461188175, "step_time": 0.648927448272705} +{"epoch": 0, "iter": 9856, "iter_tflops": 26.57961359285885, "iter_time": 0.5962646789550781, "loss": 0.38711172342300415, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 28.390711031502835, "step_time": 0.5582278213500976} +{"epoch": 0, "iter": 9857, "iter_tflops": 17.651957716757142, "iter_time": 1.1687708435058595, "loss": 0.41652682423591614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.193809012391153, "step_time": 1.074882713317871} +{"epoch": 0, "iter": 9858, "iter_tflops": 29.01712342860112, "iter_time": 0.710997200012207, "loss": 0.3585819900035858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.75551541619175, "step_time": 0.629850980758667} +{"epoch": 0, "iter": 9859, "iter_tflops": 44.22885378764815, "iter_time": 0.4664623146057129, "loss": 0.3357088565826416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.421265510800865, "step_time": 0.42607505798339845} +{"epoch": 0, "iter": 9860, "iter_tflops": 49.08396238547846, "iter_time": 0.4203224945068359, "loss": 0.43484339118003845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18430581859189, "step_time": 0.38791694641113283} +{"epoch": 0, "iter": 9861, "iter_tflops": 41.98677929686326, "iter_time": 0.4913711853027344, "loss": 0.6184256076812744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98554275846041, "step_time": 0.4486430358886719} +{"epoch": 0, "iter": 9862, "iter_tflops": 47.62837619068965, "iter_time": 0.43316810607910156, "loss": 0.6498143672943115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.58207719482827, "step_time": 0.39235980415344235} +{"epoch": 0, "iter": 9863, "iter_tflops": 45.524274244181804, "iter_time": 0.4531888504028321, "loss": 0.6837682723999023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73451532062799, "step_time": 0.4148244609832764} +{"epoch": 0, "iter": 9864, "iter_tflops": 39.40746151886965, "iter_time": 0.5235326690673828, "loss": 0.7211848497390747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27067292399399, "step_time": 0.476791603088379} +{"epoch": 0, "iter": 9865, "iter_tflops": 18.301022296846515, "iter_time": 1.1273191833496095, "loss": 0.3221309781074524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.833905400313913, "step_time": 1.040193199157715} +{"epoch": 0, "iter": 9866, "iter_tflops": 29.205290631442843, "iter_time": 0.7064163055419922, "loss": 0.3530637323856354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.16044863375958, "step_time": 0.5867699165344239} +{"epoch": 0, "iter": 9867, "iter_tflops": 47.24461538097433, "iter_time": 0.4366866645812988, "loss": 0.3010372817516327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20131032372028, "step_time": 0.40294073295593263} +{"epoch": 0, "iter": 9868, "iter_tflops": 36.76721590901295, "iter_time": 0.5611274337768555, "loss": 0.2582785189151764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.48691171281845, "step_time": 0.5095743942260742} +{"epoch": 0, "iter": 9869, "iter_tflops": 15.896461794940972, "iter_time": 1.2978418579101563, "loss": 0.7794305086135864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.78842478575555, "step_time": 1.2288879852294923} +{"epoch": 0, "iter": 9870, "iter_tflops": 38.300180909035376, "iter_time": 0.5386683044433593, "loss": 0.6906718611717224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75777553395969, "step_time": 0.4825109176635742} +{"epoch": 0, "iter": 9871, "iter_tflops": 43.64857368191941, "iter_time": 0.4726636352539063, "loss": 0.4501033425331116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.980698486617, "step_time": 0.43913977813720706} +{"epoch": 0, "iter": 9872, "iter_tflops": 43.85846194191368, "iter_time": 0.4704016647338867, "loss": 0.6826703548431396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19593061194416, "step_time": 0.4371371269226074} +{"epoch": 0, "iter": 9873, "iter_tflops": 45.32329681055255, "iter_time": 0.455198429107666, "loss": 0.0812692642211914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62539887639101, "step_time": 0.41573657798767094} +{"epoch": 0, "iter": 9874, "iter_tflops": 37.419422556734254, "iter_time": 0.5513471908569336, "loss": 0.11220686882734299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.26242643126292, "step_time": 0.4999970989227294} +{"epoch": 0, "iter": 9875, "iter_tflops": 39.9983677446767, "iter_time": 0.5157983856201172, "loss": 0.09066367894411087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93283457862254, "step_time": 0.46960533523559567} +{"epoch": 0, "iter": 9876, "iter_tflops": 38.93790270878427, "iter_time": 0.5298460388183593, "loss": 0.13333642482757568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.11461550530593, "step_time": 0.48987965965270996} +{"epoch": 0, "iter": 9877, "iter_tflops": 18.057420138210727, "iter_time": 1.1425271911621093, "loss": 0.7825163006782532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.525647104442054, "step_time": 1.0566150970458985} +{"epoch": 0, "iter": 9878, "iter_tflops": 21.317891933062498, "iter_time": 0.9677830047607421, "loss": 0.6382572650909424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.750737070988656, "step_time": 0.8011845817565918} +{"epoch": 0, "iter": 9879, "iter_tflops": 35.798670567313614, "iter_time": 0.5763089294433594, "loss": 0.8995851278305054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.7566088636107, "step_time": 0.5323245277404784} +{"epoch": 0, "iter": 9880, "iter_tflops": 46.69193239007113, "iter_time": 0.4418556365966797, "loss": 0.7534675598144531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49191463154031, "step_time": 0.40860192489624025} +{"epoch": 0, "iter": 9881, "iter_tflops": 40.45778239612827, "iter_time": 0.5099412841796875, "loss": 0.44286414980888367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00682857633457, "step_time": 0.468815731048584} +{"epoch": 0, "iter": 9882, "iter_tflops": 38.05631901339034, "iter_time": 0.5421200485229493, "loss": 0.418680876493454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91284536495679, "step_time": 0.4217929534912109} +{"epoch": 0, "iter": 9883, "iter_tflops": 50.477730683558484, "iter_time": 0.408716739654541, "loss": 0.3476683795452118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81406367405074, "step_time": 0.37638321495056154} +{"epoch": 0, "iter": 9884, "iter_tflops": 46.47595984818576, "iter_time": 0.4439089279174805, "loss": 0.4574768543243408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.47023381990319, "step_time": 0.4087774505615235} +{"epoch": 0, "iter": 9885, "iter_tflops": 22.230487646551545, "iter_time": 0.9280540237426759, "loss": 0.19807802140712738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.25219842964689, "step_time": 0.8872749633789064} +{"epoch": 0, "iter": 9886, "iter_tflops": 18.398737336652722, "iter_time": 1.12133203125, "loss": 0.20494654774665833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.786421428059917, "step_time": 0.946970275878906} +{"epoch": 0, "iter": 9887, "iter_tflops": 47.989153536032646, "iter_time": 0.4299115943908691, "loss": 0.21430358290672302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3094945540427, "step_time": 0.3944043750762939} +{"epoch": 0, "iter": 9888, "iter_tflops": 51.97568350714397, "iter_time": 0.39693741607666017, "loss": 0.2468147724866867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.454198745209865, "step_time": 0.3654483451843262} +{"epoch": 0, "iter": 9889, "iter_tflops": 32.407174366307586, "iter_time": 0.6366211776733399, "loss": 0.6693094968795776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.4621498104925, "step_time": 0.5986595039367676} +{"epoch": 0, "iter": 9890, "iter_tflops": 16.14284433025297, "iter_time": 1.2780333557128905, "loss": 0.6835675239562988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.604058769140174, "step_time": 0.9127163276672363} +{"epoch": 0, "iter": 9891, "iter_tflops": 36.28882600783475, "iter_time": 0.5685246887207032, "loss": 0.5449647903442383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56355211643413, "step_time": 0.5214671688079835} +{"epoch": 0, "iter": 9892, "iter_tflops": 36.615832032366335, "iter_time": 0.5634473495483397, "loss": 0.6131250858306885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87168688433682, "step_time": 0.5174371871948242} +{"epoch": 0, "iter": 9893, "iter_tflops": 23.714105841836698, "iter_time": 0.8699924697875977, "loss": 0.16011440753936768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.395022392151823, "step_time": 0.812406982421875} +{"epoch": 0, "iter": 9894, "iter_tflops": 11.773007174815776, "iter_time": 1.752406433105469, "loss": 0.11292348802089691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.499097967007403, "step_time": 1.4229225540161132} +{"epoch": 0, "iter": 9895, "iter_tflops": 11.025604446600164, "iter_time": 1.8711984100341796, "loss": 0.1041913852095604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.867051128603013, "step_time": 1.6034049530029297} +{"epoch": 0, "iter": 9896, "iter_tflops": 21.003017348807038, "iter_time": 0.9822918853759764, "loss": 0.07263854891061783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.558775624716805, "step_time": 0.7224081935882568} +{"epoch": 0, "iter": 9897, "iter_tflops": 16.594975260973097, "iter_time": 0.9426808395385743, "loss": 0.4369550049304962, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 18.034930674810052, "step_time": 0.8674147682189941} +{"epoch": 0, "iter": 9898, "iter_tflops": 26.25357387903214, "iter_time": 0.5958718338012695, "loss": 0.5242744088172913, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.399900272959048, "step_time": 0.550838737487793} +{"epoch": 0, "iter": 9899, "iter_tflops": 28.359364581621712, "iter_time": 0.5516260833740235, "loss": 0.4043136239051819, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.264694935516644, "step_time": 0.5168981628417969} +{"epoch": 0, "iter": 9900, "iter_tflops": 27.003699411862403, "iter_time": 0.5793193359375001, "loss": 0.38704025745391846, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.60293587622024, "step_time": 0.546928653717041} +{"epoch": 0, "iter": 9901, "iter_tflops": 32.76103370311144, "iter_time": 0.6297448883056641, "loss": 0.10865701735019684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.045627871080114, "step_time": 0.5886923637390137} +{"epoch": 0, "iter": 9902, "iter_tflops": 27.27261539241433, "iter_time": 0.7564765319824218, "loss": 0.1443256437778473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.79804592343208, "step_time": 0.6104226722717285} +{"epoch": 0, "iter": 9903, "iter_tflops": 51.346496396592165, "iter_time": 0.40180138778686525, "loss": 0.1141735315322876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.716415811802875, "step_time": 0.3702875213623047} +{"epoch": 0, "iter": 9904, "iter_tflops": 48.61974877743467, "iter_time": 0.42433566665649414, "loss": 0.2663710415363312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37868823581755, "step_time": 0.39388335609436037} +{"epoch": 0, "iter": 9905, "iter_tflops": 32.89695746622643, "iter_time": 0.6271429061889648, "loss": 0.3445897102355957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.984311654479136, "step_time": 0.5897241516113282} +{"epoch": 0, "iter": 9906, "iter_tflops": 9.040342059787907, "iter_time": 2.2821142578125, "loss": 0.3174739480018616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.475915573296538, "step_time": 1.969383331298828} +{"epoch": 0, "iter": 9907, "iter_tflops": 12.761897869795163, "iter_time": 1.6166164093017579, "loss": 0.436148464679718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.395577834524746, "step_time": 1.4331549415588376} +{"epoch": 0, "iter": 9908, "iter_tflops": 23.57602096128422, "iter_time": 0.8750880203247071, "loss": 0.4074709117412567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.60552626609988, "step_time": 0.6327483673095703} +{"epoch": 0, "iter": 9909, "iter_tflops": 18.024628685912138, "iter_time": 0.8202793807983397, "loss": 0.3496586084365845, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 18.986876040996084, "step_time": 0.7787079467773438} +{"epoch": 0, "iter": 9910, "iter_tflops": 9.858039154213035, "iter_time": 1.4998146209716796, "loss": 0.34668415784835815, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 11.68860956708942, "step_time": 1.2649264373779294} +{"epoch": 0, "iter": 9911, "iter_tflops": 26.49046929710623, "iter_time": 0.5581339874267578, "loss": 0.39814817905426025, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.253384703036662, "step_time": 0.5233083190917969} +{"epoch": 0, "iter": 9912, "iter_tflops": 25.80849202307798, "iter_time": 0.5728824157714845, "loss": 0.4066696763038635, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.34157144481357, "step_time": 0.5407601127624512} +{"epoch": 0, "iter": 9913, "iter_tflops": 26.04952234695424, "iter_time": 0.7919950790405273, "loss": 0.06918779015541077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.621053666055253, "step_time": 0.7469336166381836} +{"epoch": 0, "iter": 9914, "iter_tflops": 15.102552143341326, "iter_time": 1.366066696166992, "loss": 0.07356804609298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.169562097249447, "step_time": 1.1354755496978761} +{"epoch": 0, "iter": 9915, "iter_tflops": 46.69787254044911, "iter_time": 0.44179943084716794, "loss": 0.09073316305875778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.840349052315, "step_time": 0.405801570892334} +{"epoch": 0, "iter": 9916, "iter_tflops": 52.24728061157099, "iter_time": 0.3948740158081055, "loss": 0.1210353672504425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.558556858445115, "step_time": 0.3647740440368653} +{"epoch": 0, "iter": 9917, "iter_tflops": 38.46557874207323, "iter_time": 0.5363520889282226, "loss": 0.06547584384679794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.661949047169344, "step_time": 0.49520231246948243} +{"epoch": 0, "iter": 9918, "iter_tflops": 50.158419866584126, "iter_time": 0.4113186492919922, "loss": 0.0224470105022192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.94362388748424, "step_time": 0.3754956817626953} +{"epoch": 0, "iter": 9919, "iter_tflops": 55.23130953055183, "iter_time": 0.37353982162475585, "loss": 0.02336677350103855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.759938159305825, "step_time": 0.33955093002319336} +{"epoch": 0, "iter": 9920, "iter_tflops": 52.6685209980585, "iter_time": 0.39171583175659186, "loss": 0.036607950925827026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.517495394105275, "step_time": 0.35869248771667483} +{"epoch": 0, "iter": 9921, "iter_tflops": 36.76279526207032, "iter_time": 0.5611949081420898, "loss": 0.07441315799951553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.39069303619163, "step_time": 0.5237555351257325} +{"epoch": 0, "iter": 9922, "iter_tflops": 31.014400028168176, "iter_time": 0.6652101440429689, "loss": 0.06929738819599152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.125410849210795, "step_time": 0.5273067569732667} +{"epoch": 0, "iter": 9923, "iter_tflops": 52.9246790100416, "iter_time": 0.3898199081420899, "loss": 0.03155631944537163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.987246622149634, "step_time": 0.3557867412567139} +{"epoch": 0, "iter": 9924, "iter_tflops": 52.961801888255465, "iter_time": 0.38954666900634766, "loss": 0.12659704685211182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.865377520577184, "step_time": 0.35653605651855463} +{"epoch": 0, "iter": 9925, "iter_tflops": 29.398030084047946, "iter_time": 0.7017848968505859, "loss": 0.19639049470424652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.218933714335854, "step_time": 0.6608519592285156} +{"epoch": 0, "iter": 9926, "iter_tflops": 15.068971094425434, "iter_time": 1.3691109619140625, "loss": 0.2495414912700653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.427720562767384, "step_time": 1.1195683937072753} +{"epoch": 0, "iter": 9927, "iter_tflops": 39.39219155300918, "iter_time": 0.523735610961914, "loss": 0.1520376205444336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40846316092402, "step_time": 0.47527813720703127} +{"epoch": 0, "iter": 9928, "iter_tflops": 40.87107670983797, "iter_time": 0.504784683227539, "loss": 0.15034234523773193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.74352899420415, "step_time": 0.46109669876098636} +{"epoch": 0, "iter": 9929, "iter_tflops": 31.86304859449071, "iter_time": 0.6474927673339843, "loss": 0.7701792120933533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.01315743605455, "step_time": 0.5892383041381835} +{"epoch": 0, "iter": 9930, "iter_tflops": 44.971320467771335, "iter_time": 0.45876112365722654, "loss": 0.6986883282661438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81564672358408, "step_time": 0.4226328010559082} +{"epoch": 0, "iter": 9931, "iter_tflops": 47.026886758738755, "iter_time": 0.4387084693908691, "loss": 0.7480898499488831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6414364117658, "step_time": 0.4073955039978028} +{"epoch": 0, "iter": 9932, "iter_tflops": 42.88119453783672, "iter_time": 0.48112217330932616, "loss": 0.7300355434417725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83421384400333, "step_time": 0.4501243019104004} +{"epoch": 0, "iter": 9933, "iter_tflops": 43.40800814638305, "iter_time": 0.47528311920166016, "loss": 0.21120265126228333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43646063272719, "step_time": 0.4349205913543701} +{"epoch": 0, "iter": 9934, "iter_tflops": 40.58848054789785, "iter_time": 0.5082992324829102, "loss": 0.24397976696491241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01639420972589, "step_time": 0.4687138481140136} +{"epoch": 0, "iter": 9935, "iter_tflops": 49.22340820810587, "iter_time": 0.4191317558288575, "loss": 0.2722393274307251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.45050367742385, "step_time": 0.3859850158691407} +{"epoch": 0, "iter": 9936, "iter_tflops": 50.88499485349549, "iter_time": 0.4054455261230468, "loss": 0.16143915057182312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.11338934412226, "step_time": 0.3743390445709229} +{"epoch": 0, "iter": 9937, "iter_tflops": 24.18714788420578, "iter_time": 0.8529775238037108, "loss": 0.1509075164794922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.621596986091582, "step_time": 0.8052227783203125} +{"epoch": 0, "iter": 9938, "iter_tflops": 13.89464241369809, "iter_time": 1.4848236389160157, "loss": 0.23493286967277527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.940647954548677, "step_time": 1.2178455963134764} +{"epoch": 0, "iter": 9939, "iter_tflops": 37.671538597451885, "iter_time": 0.5476573104858399, "loss": 0.2327488213777542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38349413734077, "step_time": 0.42640768051147465} +{"epoch": 0, "iter": 9940, "iter_tflops": 48.30524609849232, "iter_time": 0.4270984039306641, "loss": 0.22689974308013916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34204470398854, "step_time": 0.3941591053009033} +{"epoch": 0, "iter": 9941, "iter_tflops": 44.40563484884361, "iter_time": 0.46460530471801764, "loss": 0.3227895200252533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74203761767021, "step_time": 0.42327105140686033} +{"epoch": 0, "iter": 9942, "iter_tflops": 49.236998554723904, "iter_time": 0.4190160675048828, "loss": 0.30504974722862244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.604553985988886, "step_time": 0.3848757610321046} +{"epoch": 0, "iter": 9943, "iter_tflops": 49.14815226490113, "iter_time": 0.41977353286743163, "loss": 0.3207481801509857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24309201943805, "step_time": 0.38748864364624025} +{"epoch": 0, "iter": 9944, "iter_tflops": 50.1455635938165, "iter_time": 0.4114241027832031, "loss": 0.1798686683177948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17922703926528, "step_time": 0.38079342651367193} +{"epoch": 0, "iter": 9945, "iter_tflops": 25.48944534547149, "iter_time": 0.809397506713867, "loss": 0.5189737677574158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.746428016560607, "step_time": 0.7713588333129882} +{"epoch": 0, "iter": 9946, "iter_tflops": 16.719639309799355, "iter_time": 1.2339436950683593, "loss": 0.311472088098526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.70790905102852, "step_time": 0.9962905216217041} +{"epoch": 0, "iter": 9947, "iter_tflops": 38.01131158910648, "iter_time": 0.5427619476318359, "loss": 0.3847227692604065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69987809957428, "step_time": 0.4947518901824952} +{"epoch": 0, "iter": 9948, "iter_tflops": 40.554575397615785, "iter_time": 0.5087241897583008, "loss": 0.29625052213668823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1193913827966, "step_time": 0.4676196308135986} +{"epoch": 0, "iter": 9949, "iter_tflops": 26.36301937559046, "iter_time": 0.7825770339965821, "loss": 0.036839332431554794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.56675940418741, "step_time": 0.6977800045013427} +{"epoch": 0, "iter": 9950, "iter_tflops": 48.18604167148773, "iter_time": 0.4281549758911133, "loss": 0.021657662466168404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.768934407476344, "step_time": 0.3836991329193115} +{"epoch": 0, "iter": 9951, "iter_tflops": 53.7542109015252, "iter_time": 0.3838042297363281, "loss": 0.01849985122680664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.79067926657574, "step_time": 0.35092456436157227} +{"epoch": 0, "iter": 9952, "iter_tflops": 51.44982362272862, "iter_time": 0.40099444580078125, "loss": 0.038986630737781525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.35171107975912, "step_time": 0.3661129913330078} +{"epoch": 0, "iter": 9953, "iter_tflops": 25.931919343782145, "iter_time": 0.7955868301391602, "loss": 0.6680278182029724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.324835032570174, "step_time": 0.7550308532714844} +{"epoch": 0, "iter": 9954, "iter_tflops": 11.139516583244092, "iter_time": 1.8520636291503905, "loss": 0.6505666971206665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.957398590877226, "step_time": 1.379323642730713} +{"epoch": 0, "iter": 9955, "iter_tflops": 37.72729870788509, "iter_time": 0.5468478851318359, "loss": 0.6430885791778564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37773812330118, "step_time": 0.4986037044525146} +{"epoch": 0, "iter": 9956, "iter_tflops": 36.63314210927913, "iter_time": 0.5631811065673828, "loss": 0.591828465461731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.956665498856275, "step_time": 0.5163367176055909} +{"epoch": 0, "iter": 9957, "iter_tflops": 17.430924594155925, "iter_time": 1.1835914611816407, "loss": 0.0026756024453788996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.691815253292003, "step_time": 1.103750129699707} +{"epoch": 0, "iter": 9958, "iter_tflops": 18.564093095643834, "iter_time": 1.1113440017700196, "loss": 0.0026021809317171574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.47284928754581, "step_time": 0.7793302974700927} +{"epoch": 0, "iter": 9959, "iter_tflops": 46.5902253096435, "iter_time": 0.442820213317871, "loss": 0.03810678422451019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43673718339057, "step_time": 0.4010964660644531} +{"epoch": 0, "iter": 9960, "iter_tflops": 44.54841062297964, "iter_time": 0.46311626434326175, "loss": 0.00394040159881115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.518627865170224, "step_time": 0.41663298034667967} +{"epoch": 0, "iter": 9961, "iter_tflops": 35.03492067066507, "iter_time": 0.5793996276855468, "loss": 0.022627374157309532, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 39.174618118083714, "step_time": 0.5181727600097656} +{"epoch": 0, "iter": 9962, "iter_tflops": 33.36834966474716, "iter_time": 0.6083375473022461, "loss": 0.037099018692970276, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 37.1431270021067, "step_time": 0.5465134906768799} +{"epoch": 0, "iter": 9963, "iter_tflops": 42.6157269084293, "iter_time": 0.4763316612243652, "loss": 0.06754189729690552, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 47.29792437330756, "step_time": 0.42917781829833984} +{"epoch": 0, "iter": 9964, "iter_tflops": 46.68124210050065, "iter_time": 0.434847469329834, "loss": 0.05663588270545006, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 51.516808806209944, "step_time": 0.39403100585937495} +{"epoch": 0, "iter": 9965, "iter_tflops": 18.02165417641785, "iter_time": 1.144794662475586, "loss": 0.6332066059112549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.955588118103655, "step_time": 1.0883911056518554} +{"epoch": 0, "iter": 9966, "iter_tflops": 17.59647327347957, "iter_time": 1.1724561614990232, "loss": 0.6519610285758972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.625090250607276, "step_time": 0.9540350246429443} +{"epoch": 0, "iter": 9967, "iter_tflops": 34.62084403252775, "iter_time": 0.5959153823852539, "loss": 0.9194054007530212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6120651167065, "step_time": 0.5485232849121093} +{"epoch": 0, "iter": 9968, "iter_tflops": 36.126027878967854, "iter_time": 0.5710866851806641, "loss": 0.7496799230575562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.50473045128168, "step_time": 0.5222436218261719} +{"epoch": 0, "iter": 9969, "iter_tflops": 24.29139549247681, "iter_time": 0.8493169326782227, "loss": 0.2995292842388153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.010211011147877, "step_time": 0.7931920852661134} +{"epoch": 0, "iter": 9970, "iter_tflops": 10.302374897749388, "iter_time": 2.0025570526123047, "loss": 0.28280022740364075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.257341293922453, "step_time": 1.8326790466308596} +{"epoch": 0, "iter": 9971, "iter_tflops": 14.153674280580416, "iter_time": 1.4576493072509766, "loss": 0.3129308521747589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.776897012395382, "step_time": 1.307677516937256} +{"epoch": 0, "iter": 9972, "iter_tflops": 47.49318139735209, "iter_time": 0.43440116882324215, "loss": 0.2634420394897461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7229876398035, "step_time": 0.39887667846679686} +{"epoch": 0, "iter": 9973, "iter_tflops": 15.691362695195016, "iter_time": 0.9396507415771485, "loss": 0.3640718460083008, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 16.358552616088335, "step_time": 0.9013267211914062} +{"epoch": 0, "iter": 9974, "iter_tflops": 9.273476862907286, "iter_time": 1.589953887939453, "loss": 0.3342269957065582, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 11.367750543501751, "step_time": 1.2970376625061035} +{"epoch": 0, "iter": 9975, "iter_tflops": 22.45844918398142, "iter_time": 0.6565190887451171, "loss": 0.3486211895942688, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.17525251062601, "step_time": 0.6098964462280274} +{"epoch": 0, "iter": 9976, "iter_tflops": 23.239352832261392, "iter_time": 0.6344583129882813, "loss": 0.2931416630744934, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.904676443584286, "step_time": 0.5920334129333495} +{"epoch": 0, "iter": 9977, "iter_tflops": 17.251295092163115, "iter_time": 1.1959156341552735, "loss": 0.6108402013778687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.553376885298036, "step_time": 1.1119859008789061} +{"epoch": 0, "iter": 9978, "iter_tflops": 16.64689238955882, "iter_time": 1.2393360290527342, "loss": 0.713696300983429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.700600264742587, "step_time": 0.9507153377532959} +{"epoch": 0, "iter": 9979, "iter_tflops": 44.5735466606482, "iter_time": 0.46285510253906254, "loss": 0.6668168902397156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.701026813920166, "step_time": 0.43250837326049807} +{"epoch": 0, "iter": 9980, "iter_tflops": 45.735924389111695, "iter_time": 0.45109164810180663, "loss": 0.645918071269989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42941080819853, "step_time": 0.41738497734069824} +{"epoch": 0, "iter": 9981, "iter_tflops": 48.09327343193842, "iter_time": 0.4289808540344238, "loss": 0.05475583299994469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.0756360528801, "step_time": 0.3887111873626709} +{"epoch": 0, "iter": 9982, "iter_tflops": 44.846486104008065, "iter_time": 0.4600381278991699, "loss": 0.09861526638269424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8266254880187, "step_time": 0.4225377712249756} +{"epoch": 0, "iter": 9983, "iter_tflops": 46.04178181968656, "iter_time": 0.4480950279235839, "loss": 0.04088744893670082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.39151435753772, "step_time": 0.4094160251617432} +{"epoch": 0, "iter": 9984, "iter_tflops": 50.838069955726766, "iter_time": 0.40581976318359375, "loss": 0.05542388930916786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.3637948164061, "step_time": 0.3726459426879883} +{"epoch": 0, "iter": 9985, "iter_tflops": 24.764595730083244, "iter_time": 0.8330882415771484, "loss": 0.8068490624427795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.086212231272995, "step_time": 0.7908811492919922} +{"epoch": 0, "iter": 9986, "iter_tflops": 12.944852365832588, "iter_time": 1.5937681579589846, "loss": 0.5256139636039734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.671756349712645, "step_time": 1.104935878753662} +{"epoch": 0, "iter": 9987, "iter_tflops": 35.7945091183887, "iter_time": 0.5763759307861328, "loss": 0.8828896284103394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.07074838298326, "step_time": 0.5280444927215575} +{"epoch": 0, "iter": 9988, "iter_tflops": 33.145091804697486, "iter_time": 0.6224479217529296, "loss": 0.7300952076911926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.078197446396494, "step_time": 0.5718437995910645} +{"epoch": 0, "iter": 9989, "iter_tflops": 17.483487405034776, "iter_time": 1.1800330810546875, "loss": 0.30602115392684937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.77059894945926, "step_time": 1.0991174850463867} +{"epoch": 0, "iter": 9990, "iter_tflops": 25.744589893533668, "iter_time": 0.8013758850097655, "loss": 0.35678571462631226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.09429748881097, "step_time": 0.6428273906707763} +{"epoch": 0, "iter": 9991, "iter_tflops": 49.64737494144943, "iter_time": 0.4155525550842285, "loss": 0.3709999620914459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.856049347122756, "step_time": 0.3830784797668457} +{"epoch": 0, "iter": 9992, "iter_tflops": 43.290436723350794, "iter_time": 0.4765739288330078, "loss": 0.25105977058410645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53464258131884, "step_time": 0.4433491344451904} +{"epoch": 0, "iter": 9993, "iter_tflops": 25.96037615915153, "iter_time": 0.7947147369384766, "loss": 0.0019984745886176825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.490563036025957, "step_time": 0.7504791183471681} +{"epoch": 0, "iter": 9994, "iter_tflops": 15.521650976493307, "iter_time": 1.329181640625, "loss": 0.0036820205859839916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.978715114504904, "step_time": 1.0326536712646484} +{"epoch": 0, "iter": 9995, "iter_tflops": 52.15879636447125, "iter_time": 0.39554389572143556, "loss": 0.003010871121659875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.25370806893735, "step_time": 0.3603451061248779} +{"epoch": 0, "iter": 9996, "iter_tflops": 62.84692843986521, "iter_time": 0.3282752876281738, "loss": 0.015315371565520763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.78690901584906, "step_time": 0.2999276142120362} +{"epoch": 0, "iter": 9997, "iter_tflops": 27.0266349403679, "iter_time": 0.7633615341186523, "loss": 0.08759219944477081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.528845528496138, "step_time": 0.7231660842895506} +{"epoch": 0, "iter": 9998, "iter_tflops": 12.543432628606014, "iter_time": 1.644772537231445, "loss": 0.07430731505155563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.109381503819268, "step_time": 1.2058351440429687} +{"epoch": 0, "iter": 9999, "iter_tflops": 37.30170923343272, "iter_time": 0.5530870819091798, "loss": 0.10307598859071732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85933468552714, "step_time": 0.5049297466278077} +{"epoch": 0, "iter": 10000, "iter_tflops": 41.078058459269094, "iter_time": 0.5022412033081054, "loss": 0.04980181157588959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1865530714577, "step_time": 0.4565759525299073} +{"epoch": 0, "iter": 10001, "iter_tflops": 14.962564127128847, "iter_time": 1.3788474578857421, "loss": 0.6387504935264587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.927302448377455, "step_time": 1.2953287963867188} +{"epoch": 0, "iter": 10002, "iter_tflops": 21.041415725314486, "iter_time": 0.9804993057250978, "loss": 0.6603997945785522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.958286804938712, "step_time": 0.7124417839050293} +{"epoch": 0, "iter": 10003, "iter_tflops": 35.25547656357755, "iter_time": 0.5851883316040039, "loss": 0.7185941338539124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.316504583641795, "step_time": 0.5384388198852539} +{"epoch": 0, "iter": 10004, "iter_tflops": 34.50705152534033, "iter_time": 0.5978805084228515, "loss": 0.6234740018844604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.493022652381896, "step_time": 0.5502648773193359} +{"epoch": 0, "iter": 10005, "iter_tflops": 18.16437445284441, "iter_time": 1.135799835205078, "loss": 0.5740410089492798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.299664887563676, "step_time": 1.0689871368408204} +{"epoch": 0, "iter": 10006, "iter_tflops": 21.92293461277926, "iter_time": 0.9410735321044923, "loss": 0.6827502846717834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.935602454599977, "step_time": 0.8273749771118164} +{"epoch": 0, "iter": 10007, "iter_tflops": 43.431949422633835, "iter_time": 0.475021125793457, "loss": 0.6673375964164734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81786246749505, "step_time": 0.4406671390533447} +{"epoch": 0, "iter": 10008, "iter_tflops": 45.38949654960359, "iter_time": 0.4545345306396484, "loss": 0.6999483108520508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.9258256734475, "step_time": 0.42168104934692385} +{"epoch": 0, "iter": 10009, "iter_tflops": 43.597032776307174, "iter_time": 0.47322242355346683, "loss": 0.009881201200187206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70852711890597, "step_time": 0.4324403781890869} +{"epoch": 0, "iter": 10010, "iter_tflops": 52.10393694902636, "iter_time": 0.39596035766601567, "loss": 0.005926040001213551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.92096445772815, "step_time": 0.35619388771057126} +{"epoch": 0, "iter": 10011, "iter_tflops": 55.92946179380172, "iter_time": 0.368877025604248, "loss": 0.0006866208277642727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.431460392265485, "step_time": 0.3358392162322998} +{"epoch": 0, "iter": 10012, "iter_tflops": 58.53467175544098, "iter_time": 0.3524593696594238, "loss": 0.00306831207126379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.34352899064378, "step_time": 0.3206397571563721} +{"epoch": 0, "iter": 10013, "iter_tflops": 43.89428773006538, "iter_time": 0.47001773071289066, "loss": 0.2918689548969269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.79295807779511, "step_time": 0.43167642974853515} +{"epoch": 0, "iter": 10014, "iter_tflops": 9.07369788856165, "iter_time": 2.2737249755859374, "loss": 0.3333684206008911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.379642139427437, "step_time": 1.8129826278686523} +{"epoch": 0, "iter": 10015, "iter_tflops": 15.458671651881058, "iter_time": 1.3345967864990234, "loss": 0.3595443069934845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.831086231023626, "step_time": 1.0403410720825195} +{"epoch": 0, "iter": 10016, "iter_tflops": 21.98743233873818, "iter_time": 0.9383129959106444, "loss": 0.43498802185058594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.444772790942668, "step_time": 0.78015771484375} +{"epoch": 0, "iter": 10017, "iter_tflops": 18.430401569706625, "iter_time": 0.862132209777832, "loss": 0.39220520853996277, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 19.40219361450506, "step_time": 0.8189508438110351} +{"epoch": 0, "iter": 10018, "iter_tflops": 10.009106505700808, "iter_time": 1.5874986267089843, "loss": 0.3337933421134949, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.064430402648632, "step_time": 1.1297608489990234} +{"epoch": 0, "iter": 10019, "iter_tflops": 24.362259417472416, "iter_time": 0.6522154846191407, "loss": 0.6188707947731018, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 26.24488367111375, "step_time": 0.6054301109313965} +{"epoch": 0, "iter": 10020, "iter_tflops": 24.1226645773275, "iter_time": 0.6586935195922852, "loss": 0.13664600253105164, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.982782973854366, "step_time": 0.6115373725891113} +{"epoch": 0, "iter": 10021, "iter_tflops": 14.962780544312215, "iter_time": 1.3788275146484377, "loss": 0.2813929319381714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.052711544938386, "step_time": 1.2852092590332032} +{"epoch": 0, "iter": 10022, "iter_tflops": 22.22454535913999, "iter_time": 0.9283021621704102, "loss": 0.2905920743942261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.47835164411761, "step_time": 0.7508126316070557} +{"epoch": 0, "iter": 10023, "iter_tflops": 50.03613647072699, "iter_time": 0.41232387161254885, "loss": 0.3269577622413635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.58131588166408, "step_time": 0.37798820304870606} +{"epoch": 0, "iter": 10024, "iter_tflops": 50.45902353471336, "iter_time": 0.4088682670593262, "loss": 0.3704080879688263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.96838016798676, "step_time": 0.3753265686035156} +{"epoch": 0, "iter": 10025, "iter_tflops": 20.716584961535656, "iter_time": 0.5333196640014648, "loss": 0.006253181956708431, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 22.31092784488432, "step_time": 0.4952085456848144} +{"epoch": 0, "iter": 10026, "iter_tflops": 8.737531879040615, "iter_time": 1.2644946289062502, "loss": 0.0012341891415417194, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 10.042026090788063, "step_time": 1.1002323665618898} +{"epoch": 0, "iter": 10027, "iter_tflops": 23.974663113930266, "iter_time": 0.4608432693481446, "loss": 0.008769907988607883, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 26.541658918400262, "step_time": 0.4162724781036377} +{"epoch": 0, "iter": 10028, "iter_tflops": 24.048310397635788, "iter_time": 0.45943194961547856, "loss": 0.004127937369048595, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 26.640445519121837, "step_time": 0.41472887992858887} +{"epoch": 0, "iter": 10029, "iter_tflops": 19.568499162783656, "iter_time": 1.05430126953125, "loss": 0.11383791267871857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.07863152429134, "step_time": 0.978768165588379} +{"epoch": 0, "iter": 10030, "iter_tflops": 26.234047802747018, "iter_time": 0.786424331665039, "loss": 0.10462011396884918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.59606382956442, "step_time": 0.6529640407562256} +{"epoch": 0, "iter": 10031, "iter_tflops": 42.433008755477815, "iter_time": 0.4862038803100587, "loss": 0.05718628317117691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8946963231612, "step_time": 0.4399451351165772} +{"epoch": 0, "iter": 10032, "iter_tflops": 43.00430564187031, "iter_time": 0.47974483489990233, "loss": 0.1028163880109787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32502673370813, "step_time": 0.4359446773529053} +{"epoch": 0, "iter": 10033, "iter_tflops": 36.88188619266443, "iter_time": 0.5593828201293946, "loss": 0.0889790952205658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.86326960771271, "step_time": 0.5048811244964599} +{"epoch": 0, "iter": 10034, "iter_tflops": 38.662929744365755, "iter_time": 0.5336143341064453, "loss": 0.19859753549098969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.792565142066785, "step_time": 0.4821186447143555} +{"epoch": 0, "iter": 10035, "iter_tflops": 38.223567929882805, "iter_time": 0.5397479782104492, "loss": 0.14027822017669678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.12661423607731, "step_time": 0.4897401294708252} +{"epoch": 0, "iter": 10036, "iter_tflops": 43.46354186685122, "iter_time": 0.47467584609985347, "loss": 0.1262763887643814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59366415307159, "step_time": 0.4334840335845947} +{"epoch": 0, "iter": 10037, "iter_tflops": 26.088429671844427, "iter_time": 0.7908139266967773, "loss": 0.4246923327445984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.45050032920364, "step_time": 0.7251574935913085} +{"epoch": 0, "iter": 10038, "iter_tflops": 40.13379275955839, "iter_time": 0.5140579071044922, "loss": 0.35653233528137207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17070576666782, "step_time": 0.46707638359069825} +{"epoch": 0, "iter": 10039, "iter_tflops": 47.62223009726358, "iter_time": 0.43322401046752934, "loss": 0.3738958239555359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99140712766314, "step_time": 0.3968173713684082} +{"epoch": 0, "iter": 10040, "iter_tflops": 46.20581709165471, "iter_time": 0.44650424575805664, "loss": 0.2624412178993225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26212653430129, "step_time": 0.4104699687957763} +{"epoch": 0, "iter": 10041, "iter_tflops": 38.407921029108195, "iter_time": 0.5371572570800781, "loss": 0.1470857411623001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62549292007302, "step_time": 0.49563601684570313} +{"epoch": 0, "iter": 10042, "iter_tflops": 14.767591027362498, "iter_time": 1.3970520629882812, "loss": 0.16562120616436005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.582346093601878, "step_time": 1.17339821434021} +{"epoch": 0, "iter": 10043, "iter_tflops": 37.68886814303858, "iter_time": 0.5474054946899414, "loss": 0.06619765609502792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.210457476579705, "step_time": 0.5006276264190674} +{"epoch": 0, "iter": 10044, "iter_tflops": 40.418316816565685, "iter_time": 0.5104392051696778, "loss": 0.06035199016332626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25930967829397, "step_time": 0.46614133071899416} +{"epoch": 0, "iter": 10045, "iter_tflops": 33.98483689789677, "iter_time": 0.6070676040649414, "loss": 0.2399502396583557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.51008216503028, "step_time": 0.5500146179199219} +{"epoch": 0, "iter": 10046, "iter_tflops": 35.876266364340125, "iter_time": 0.5750624465942382, "loss": 0.31176403164863586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.09341119599945, "step_time": 0.527738380432129} +{"epoch": 0, "iter": 10047, "iter_tflops": 40.71282541435697, "iter_time": 0.5067467880249024, "loss": 0.353875994682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.565807723134085, "step_time": 0.4629354782104492} +{"epoch": 0, "iter": 10048, "iter_tflops": 41.60996764540312, "iter_time": 0.49582094573974606, "loss": 0.19885042309761047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50516832081758, "step_time": 0.45337912750244136} +{"epoch": 0, "iter": 10049, "iter_tflops": 20.76333207231336, "iter_time": 0.9936311492919923, "loss": 0.6882151961326599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.04315150638651, "step_time": 0.9359411926269531} +{"epoch": 0, "iter": 10050, "iter_tflops": 9.165089945339398, "iter_time": 2.2510519409179683, "loss": 0.5548601746559143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.872136656367118, "step_time": 1.7377742614746094} +{"epoch": 0, "iter": 10051, "iter_tflops": 14.48678439517729, "iter_time": 1.424132019042969, "loss": 0.7227595448493958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.376970585494625, "step_time": 1.1226602020263674} +{"epoch": 0, "iter": 10052, "iter_tflops": 31.593787929441703, "iter_time": 0.6530110778808593, "loss": 0.7150390148162842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.07956918877837, "step_time": 0.541789047241211} +{"epoch": 0, "iter": 10053, "iter_tflops": 20.826663912306977, "iter_time": 0.7727739791870117, "loss": 0.41303929686546326, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 21.94053617337039, "step_time": 0.7335419616699219} +{"epoch": 0, "iter": 10054, "iter_tflops": 6.572918372900006, "iter_time": 2.4485780944824223, "loss": 0.42229628562927246, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 7.7566948715869195, "step_time": 2.074891975402832} +{"epoch": 0, "iter": 10055, "iter_tflops": 9.829048482318914, "iter_time": 1.6374223785400392, "loss": 0.34915441274642944, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 12.411834253092564, "step_time": 1.2966902084350587} +{"epoch": 0, "iter": 10056, "iter_tflops": 23.759462805757423, "iter_time": 0.6773850097656251, "loss": 0.41920554637908936, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.604531814105105, "step_time": 0.6285724754333496} +{"epoch": 0, "iter": 10057, "iter_tflops": 13.473705993379403, "iter_time": 1.1124968414306642, "loss": 0.16568510234355927, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.464411843128273, "step_time": 1.036298988342285} +{"epoch": 0, "iter": 10058, "iter_tflops": 17.348448162083894, "iter_time": 0.864022834777832, "loss": 0.5120847225189209, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.678234740041603, "step_time": 0.6330478401184082} +{"epoch": 0, "iter": 10059, "iter_tflops": 25.981812142633753, "iter_time": 0.5769210891723632, "loss": 0.4543965458869934, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.56380167546229, "step_time": 0.5438094329833985} +{"epoch": 0, "iter": 10060, "iter_tflops": 26.561109604568358, "iter_time": 0.5643384475708009, "loss": 0.38441184163093567, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.059703591636904, "step_time": 0.5341986351013184} +{"epoch": 0, "iter": 10061, "iter_tflops": 29.72095799799764, "iter_time": 0.6941597747802735, "loss": 0.4321857690811157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.768813682795724, "step_time": 0.6494134063720703} +{"epoch": 0, "iter": 10062, "iter_tflops": 43.443482234840886, "iter_time": 0.4748950233459473, "loss": 0.30376291275024414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.231492242935396, "step_time": 0.4368079967498779} +{"epoch": 0, "iter": 10063, "iter_tflops": 44.904392610100565, "iter_time": 0.4594448852539062, "loss": 0.3807184398174286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.4910061870121, "step_time": 0.42546226882934574} +{"epoch": 0, "iter": 10064, "iter_tflops": 45.31490732261255, "iter_time": 0.4552827033996582, "loss": 0.33963945508003235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55607398593143, "step_time": 0.42489212608337396} +{"epoch": 0, "iter": 10065, "iter_tflops": 41.63580092701498, "iter_time": 0.4955133094787598, "loss": 0.31636354327201843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38679513377369, "step_time": 0.4545615844726562} +{"epoch": 0, "iter": 10066, "iter_tflops": 8.86957465643631, "iter_time": 2.326052185058594, "loss": 0.26529496908187866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.070473448182625, "step_time": 1.8636143798828129} +{"epoch": 0, "iter": 10067, "iter_tflops": 11.680214900239175, "iter_time": 1.7663282470703126, "loss": 0.3174005448818207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.419876341764509, "step_time": 1.53735347366333} +{"epoch": 0, "iter": 10068, "iter_tflops": 29.150743767241906, "iter_time": 0.707738151550293, "loss": 0.32082250714302063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.6645681622096, "step_time": 0.5626983909606934} +{"epoch": 0, "iter": 10069, "iter_tflops": 16.876445818446186, "iter_time": 0.9925462036132813, "loss": 0.5009821653366089, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 17.637966327600665, "step_time": 0.9496929473876953} +{"epoch": 0, "iter": 10070, "iter_tflops": 16.05419449072202, "iter_time": 1.0433816680908203, "loss": 0.2067815065383911, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 21.485162265971653, "step_time": 0.7796381530761719} +{"epoch": 0, "iter": 10071, "iter_tflops": 29.35118109131832, "iter_time": 0.5706977233886719, "loss": 0.402279794216156, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 31.333702496132716, "step_time": 0.5345889854431153} +{"epoch": 0, "iter": 10072, "iter_tflops": 28.18819054576698, "iter_time": 0.5942436141967773, "loss": 0.3386777341365814, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 29.9173186889255, "step_time": 0.559898178100586} +{"epoch": 0, "iter": 10073, "iter_tflops": 25.137367919543383, "iter_time": 0.8207340393066407, "loss": 0.1337163746356964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.683140781744253, "step_time": 0.7731883468627928} +{"epoch": 0, "iter": 10074, "iter_tflops": 22.767404447208293, "iter_time": 0.9061680068969726, "loss": 0.2520182728767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.68572631568574, "step_time": 0.6723351860046387} +{"epoch": 0, "iter": 10075, "iter_tflops": 48.84622051221985, "iter_time": 0.4223682670593261, "loss": 0.20185714960098267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.111052697082954, "step_time": 0.3884519786834717} +{"epoch": 0, "iter": 10076, "iter_tflops": 49.48978438272749, "iter_time": 0.4168758010864258, "loss": 0.2706902027130127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.22684249812976, "step_time": 0.3876069393157959} +{"epoch": 0, "iter": 10077, "iter_tflops": 37.44526359006881, "iter_time": 0.5509667053222657, "loss": 0.22410336136817932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45066015577295, "step_time": 0.5100310707092286} +{"epoch": 0, "iter": 10078, "iter_tflops": 14.12931905329794, "iter_time": 1.4601619110107422, "loss": 0.17511212825775146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.820571363993725, "step_time": 1.226539400100708} +{"epoch": 0, "iter": 10079, "iter_tflops": 36.53662187628317, "iter_time": 0.5646688842773437, "loss": 0.15445663034915924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9401862655273, "step_time": 0.5165497570037842} +{"epoch": 0, "iter": 10080, "iter_tflops": 43.59536033556391, "iter_time": 0.47324057769775385, "loss": 0.3149365186691284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.75105952591235, "step_time": 0.43205519866943354} +{"epoch": 0, "iter": 10081, "iter_tflops": 20.72594579983193, "iter_time": 0.9954235000610351, "loss": 0.4033166766166687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.360169644867547, "step_time": 0.922671600341797} +{"epoch": 0, "iter": 10082, "iter_tflops": 28.217856118933405, "iter_time": 0.7311361083984376, "loss": 0.4043984115123749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.74299234318629, "step_time": 0.593820281982422} +{"epoch": 0, "iter": 10083, "iter_tflops": 48.99953076391712, "iter_time": 0.42104675674438474, "loss": 0.32131683826446533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36203943708928, "step_time": 0.386624906539917} +{"epoch": 0, "iter": 10084, "iter_tflops": 49.273978302092026, "iter_time": 0.4187015991210938, "loss": 0.37217068672180176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57617625407929, "step_time": 0.3850796184539795} +{"epoch": 0, "iter": 10085, "iter_tflops": 39.14956256014548, "iter_time": 0.5153274765014648, "loss": 0.046300675719976425, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 42.18184203142432, "step_time": 0.478282699584961} +{"epoch": 0, "iter": 10086, "iter_tflops": 17.104377497490056, "iter_time": 1.1795135650634763, "loss": 0.10417443513870239, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 20.429814815146106, "step_time": 0.987519733428955} +{"epoch": 0, "iter": 10087, "iter_tflops": 47.97605194686536, "iter_time": 0.4205190811157226, "loss": 0.10548756271600723, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 52.500309170914875, "step_time": 0.3842805042266845} +{"epoch": 0, "iter": 10088, "iter_tflops": 52.514099899454536, "iter_time": 0.3841795883178711, "loss": 0.11224103718996048, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 57.00380962762899, "step_time": 0.35392099952697753} +{"epoch": 0, "iter": 10089, "iter_tflops": 29.168730877538586, "iter_time": 0.7073017196655274, "loss": 0.19355322420597076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.903523764207673, "step_time": 0.6675967979431152} +{"epoch": 0, "iter": 10090, "iter_tflops": 9.837776181583264, "iter_time": 2.097129791259766, "loss": 0.18887752294540405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.454478310984063, "step_time": 1.6565200881958009} +{"epoch": 0, "iter": 10091, "iter_tflops": 15.717878227709067, "iter_time": 1.3125876922607422, "loss": 0.15903730690479279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.422092338995, "step_time": 1.1199104385375978} +{"epoch": 0, "iter": 10092, "iter_tflops": 37.69397459346034, "iter_time": 0.5473313369750976, "loss": 0.20284998416900635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.299955406020956, "step_time": 0.4995427551269531} +{"epoch": 0, "iter": 10093, "iter_tflops": 15.517201243020555, "iter_time": 0.9870632629394531, "loss": 0.39686742424964905, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 16.455058658128507, "step_time": 0.9308055114746094} +{"epoch": 0, "iter": 10094, "iter_tflops": 6.779142280561809, "iter_time": 2.259350616455078, "loss": 0.5346519351005554, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 9.226157591143968, "step_time": 1.6601124725341798} +{"epoch": 0, "iter": 10095, "iter_tflops": 9.492693810554195, "iter_time": 1.613499771118164, "loss": 0.29799601435661316, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 10.963533641754664, "step_time": 1.3970367393493652} +{"epoch": 0, "iter": 10096, "iter_tflops": 17.028091702073056, "iter_time": 0.8994818420410157, "loss": 0.32685786485671997, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 19.955120161663928, "step_time": 0.7675453300476075} +{"epoch": 0, "iter": 10097, "iter_tflops": 16.45646305419425, "iter_time": 1.0103929901123048, "loss": 0.38009917736053467, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 17.993955378220697, "step_time": 0.9240600280761718} +{"epoch": 0, "iter": 10098, "iter_tflops": 28.84158863556812, "iter_time": 0.5765110626220702, "loss": 0.3265761137008667, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 30.775621100696416, "step_time": 0.5402813758850098} +{"epoch": 0, "iter": 10099, "iter_tflops": 28.38456105674573, "iter_time": 0.5857936248779296, "loss": 0.2682253420352936, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 30.169537722095377, "step_time": 0.5511352233886719} +{"epoch": 0, "iter": 10100, "iter_tflops": 30.50111124784567, "iter_time": 0.5451439056396484, "loss": 0.2535493075847626, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 32.37627621723469, "step_time": 0.5135703315734863} +{"epoch": 0, "iter": 10101, "iter_tflops": 16.15305970852484, "iter_time": 1.2772251129150392, "loss": 0.7458932995796204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.75942473329502, "step_time": 1.231014419555664} +{"epoch": 0, "iter": 10102, "iter_tflops": 21.03459858167915, "iter_time": 0.9808170776367187, "loss": 0.7381841540336609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.357628134160233, "step_time": 0.8136050186157227} +{"epoch": 0, "iter": 10103, "iter_tflops": 45.27229168212891, "iter_time": 0.455711269378662, "loss": 0.8353560566902161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.802518927773136, "step_time": 0.42274648857116703} +{"epoch": 0, "iter": 10104, "iter_tflops": 44.31165056515703, "iter_time": 0.4655907249450683, "loss": 0.603293776512146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63408062257535, "step_time": 0.4331162319183349} +{"epoch": 0, "iter": 10105, "iter_tflops": 32.69873522887881, "iter_time": 0.6309446945190429, "loss": 0.1575811505317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.72702963385842, "step_time": 0.5940932388305664} +{"epoch": 0, "iter": 10106, "iter_tflops": 12.283970676800955, "iter_time": 1.6795134124755862, "loss": 0.21342137455940247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.957513790541379, "step_time": 1.4781352767944338} +{"epoch": 0, "iter": 10107, "iter_tflops": 10.777868571675985, "iter_time": 1.9142090454101561, "loss": 0.1389581710100174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.13343454531775, "step_time": 1.5708833389282226} +{"epoch": 0, "iter": 10108, "iter_tflops": 27.12036150124331, "iter_time": 0.7607233963012696, "loss": 0.17452922463417053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.33551804894817, "step_time": 0.455075719833374} +{"epoch": 0, "iter": 10109, "iter_tflops": 18.253676237176286, "iter_time": 0.8166985855102541, "loss": 0.43480151891708374, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 19.232961414552253, "step_time": 0.7751147232055665} +{"epoch": 0, "iter": 10110, "iter_tflops": 9.129208127206518, "iter_time": 1.632973129272461, "loss": 0.2617795467376709, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 10.672645579399932, "step_time": 1.396818759918213} +{"epoch": 0, "iter": 10111, "iter_tflops": 21.794654794618282, "iter_time": 0.6840095291137696, "loss": 0.41911980509757996, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.467994629283133, "step_time": 0.6352375564575196} +{"epoch": 0, "iter": 10112, "iter_tflops": 23.7593714525668, "iter_time": 0.6274472198486328, "loss": 0.3507460653781891, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 25.489288217271525, "step_time": 0.5848633918762207} +{"epoch": 0, "iter": 10113, "iter_tflops": 29.248000469861406, "iter_time": 0.705384750366211, "loss": 0.7762466669082642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.11375049649702, "step_time": 0.6424379959106444} +{"epoch": 0, "iter": 10114, "iter_tflops": 42.96002917554478, "iter_time": 0.4802392807006836, "loss": 0.6714850664138794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25361268006344, "step_time": 0.4460428562164306} +{"epoch": 0, "iter": 10115, "iter_tflops": 44.171097003659355, "iter_time": 0.4670722465515137, "loss": 0.696485698223114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.231842645898226, "step_time": 0.43680475616455083} +{"epoch": 0, "iter": 10116, "iter_tflops": 43.53975445939861, "iter_time": 0.47384496688842775, "loss": 0.6329512000083923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.507992026782695, "step_time": 0.4436031875610351} +{"epoch": 0, "iter": 10117, "iter_tflops": 31.875226943679714, "iter_time": 0.6472453842163086, "loss": 0.3959386944770813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.02306184795765, "step_time": 0.6063855628967285} +{"epoch": 0, "iter": 10118, "iter_tflops": 16.998499873371095, "iter_time": 1.2137008361816406, "loss": 0.3121362626552582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.512138031378733, "step_time": 1.0057992725372316} +{"epoch": 0, "iter": 10119, "iter_tflops": 49.72989943447412, "iter_time": 0.41486296463012695, "loss": 0.3525990843772888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.765071649516116, "step_time": 0.38372669982910157} +{"epoch": 0, "iter": 10120, "iter_tflops": 52.35164976109807, "iter_time": 0.3940867881774902, "loss": 0.3774465322494507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.863242728032205, "step_time": 0.3628195037841797} +{"epoch": 0, "iter": 10121, "iter_tflops": 36.41389788119095, "iter_time": 0.5665719604492188, "loss": 0.6821398735046387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.912897993228164, "step_time": 0.5301865081787109} +{"epoch": 0, "iter": 10122, "iter_tflops": 34.14689844031192, "iter_time": 0.6041864547729492, "loss": 0.6906672120094299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.98884206664896, "step_time": 0.4913470458984375} +{"epoch": 0, "iter": 10123, "iter_tflops": 42.468061656357065, "iter_time": 0.48580257034301755, "loss": 0.616646945476532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61746153794085, "step_time": 0.45226307678222655} +{"epoch": 0, "iter": 10124, "iter_tflops": 45.56182416414873, "iter_time": 0.4528153533935547, "loss": 0.8544456958770752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15649100478271, "step_time": 0.4197023239135742} +{"epoch": 0, "iter": 10125, "iter_tflops": 19.32757940791151, "iter_time": 0.7438664016723633, "loss": 0.5276081562042236, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 20.39398838558067, "step_time": 0.7049693603515625} +{"epoch": 0, "iter": 10126, "iter_tflops": 16.988727347846876, "iter_time": 0.8462751007080077, "loss": 0.4430694878101349, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 20.84735739500264, "step_time": 0.689638339996338} +{"epoch": 0, "iter": 10127, "iter_tflops": 21.089686819144465, "iter_time": 0.6817141036987305, "loss": 0.2155105620622635, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 22.6697662049458, "step_time": 0.63419873046875} +{"epoch": 0, "iter": 10128, "iter_tflops": 21.375154703342538, "iter_time": 0.6726097259521484, "loss": 0.29525384306907654, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 22.96700273467025, "step_time": 0.6259909973144531} +{"epoch": 0, "iter": 10129, "iter_tflops": 19.07124434133597, "iter_time": 1.0817906341552734, "loss": 0.007201638538390398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.651058258513498, "step_time": 0.9990332336425781} +{"epoch": 0, "iter": 10130, "iter_tflops": 21.640411017251743, "iter_time": 0.9533595962524415, "loss": 0.0015805738512426615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.585134862885642, "step_time": 0.8391694259643555} +{"epoch": 0, "iter": 10131, "iter_tflops": 56.37494494564579, "iter_time": 0.3659621047973633, "loss": 0.00722832977771759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.44684920382764, "step_time": 0.33037845420837403} +{"epoch": 0, "iter": 10132, "iter_tflops": 55.87485289285448, "iter_time": 0.36923754501342776, "loss": 0.0014618730638176203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.73176217974245, "step_time": 0.3397084617614746} +{"epoch": 0, "iter": 10133, "iter_tflops": 20.084548674770126, "iter_time": 1.0272122039794922, "loss": 0.3573375344276428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.057437046506408, "step_time": 0.9797533035278321} +{"epoch": 0, "iter": 10134, "iter_tflops": 14.607981826411956, "iter_time": 1.4123164825439452, "loss": 0.21240337193012238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.567400387698953, "step_time": 1.0030967998504638} +{"epoch": 0, "iter": 10135, "iter_tflops": 37.05969375032611, "iter_time": 0.556698974609375, "loss": 0.1646214723587036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56649934935162, "step_time": 0.5085746574401855} +{"epoch": 0, "iter": 10136, "iter_tflops": 39.85129807486782, "iter_time": 0.517701919555664, "loss": 0.19456151127815247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.638770925330334, "step_time": 0.472769811630249} +{"epoch": 0, "iter": 10137, "iter_tflops": 18.53798433956915, "iter_time": 1.1129092102050782, "loss": 0.2779262661933899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.089686632664158, "step_time": 1.0269494934082033} +{"epoch": 0, "iter": 10138, "iter_tflops": 21.28563373040331, "iter_time": 0.9692496719360351, "loss": 0.24558120965957642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.24865268907401, "step_time": 0.7859867610931396} +{"epoch": 0, "iter": 10139, "iter_tflops": 40.57989359600263, "iter_time": 0.5084067916870117, "loss": 0.33031654357910156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.54730595918311, "step_time": 0.4631277484893799} +{"epoch": 0, "iter": 10140, "iter_tflops": 43.390506736102736, "iter_time": 0.4754748229980469, "loss": 0.2291068732738495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48367283394645, "step_time": 0.43448815727233886} +{"epoch": 0, "iter": 10141, "iter_tflops": 18.473778151262586, "iter_time": 1.1167771606445314, "loss": 0.21608875691890717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.81534884934754, "step_time": 1.0411673126220702} +{"epoch": 0, "iter": 10142, "iter_tflops": 21.370794774799542, "iter_time": 0.9653872833251953, "loss": 0.32119429111480713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.571462091607106, "step_time": 0.8396363811492922} +{"epoch": 0, "iter": 10143, "iter_tflops": 45.29485266538863, "iter_time": 0.4554842834472656, "loss": 0.1311589926481247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74699236402348, "step_time": 0.4232280292510986} +{"epoch": 0, "iter": 10144, "iter_tflops": 46.19141922197589, "iter_time": 0.44664342117309574, "loss": 0.2947065234184265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.107502238335975, "step_time": 0.4117366180419922} +{"epoch": 0, "iter": 10145, "iter_tflops": 30.699650042955934, "iter_time": 0.6720302505493164, "loss": 0.7642710208892822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.616214995324604, "step_time": 0.6325410079956055} +{"epoch": 0, "iter": 10146, "iter_tflops": 16.27119525880378, "iter_time": 1.2679519348144532, "loss": 0.7257228493690491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.13565550743484, "step_time": 1.0781492958068848} +{"epoch": 0, "iter": 10147, "iter_tflops": 41.88835516392923, "iter_time": 0.49252574920654296, "loss": 0.8246152400970459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97318429281299, "step_time": 0.4587421112060547} +{"epoch": 0, "iter": 10148, "iter_tflops": 43.584645211169445, "iter_time": 0.47335692214965824, "loss": 0.733505129814148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.756382274720394, "step_time": 0.44124657440185544} +{"epoch": 0, "iter": 10149, "iter_tflops": 37.4784011310938, "iter_time": 0.5504795532226563, "loss": 0.5837518572807312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.217735748999296, "step_time": 0.5129849586486817} +{"epoch": 0, "iter": 10150, "iter_tflops": 19.95783364289725, "iter_time": 1.033734115600586, "loss": 0.5058164000511169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.385757061466922, "step_time": 0.9216169662475586} +{"epoch": 0, "iter": 10151, "iter_tflops": 48.02757005371492, "iter_time": 0.42956771469116206, "loss": 0.7961608171463013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00706149755585, "step_time": 0.3966979274749756} +{"epoch": 0, "iter": 10152, "iter_tflops": 49.40827630617695, "iter_time": 0.41756351470947267, "loss": 0.750672459602356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27353064354098, "step_time": 0.38726724624633785} +{"epoch": 0, "iter": 10153, "iter_tflops": 22.475268731663185, "iter_time": 0.44671607589721685, "loss": 0.002702526981011033, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 24.66251509533318, "step_time": 0.40709813308715825} +{"epoch": 0, "iter": 10154, "iter_tflops": 7.885325881757164, "iter_time": 1.2732592163085936, "loss": 0.0018255303148180246, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 10.003556161604752, "step_time": 1.003649471282959} +{"epoch": 0, "iter": 10155, "iter_tflops": 16.686059342130214, "iter_time": 0.601703712463379, "loss": 0.0044899289496243, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 18.750727624889606, "step_time": 0.5354492931365967} +{"epoch": 0, "iter": 10156, "iter_tflops": 21.137580858324142, "iter_time": 0.4749864196777344, "loss": 0.002876320155337453, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 23.380722751784397, "step_time": 0.42941631698608407} +{"epoch": 0, "iter": 10157, "iter_tflops": 19.356808762084, "iter_time": 1.0658313446044922, "loss": 0.00445379177108407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.821997511036397, "step_time": 0.9908316192626955} +{"epoch": 0, "iter": 10158, "iter_tflops": 15.220981146541583, "iter_time": 1.3554378204345703, "loss": 0.014765406027436256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.366927172264568, "step_time": 1.0652744922637938} +{"epoch": 0, "iter": 10159, "iter_tflops": 49.34337319407572, "iter_time": 0.41811275100708006, "loss": 0.02892787754535675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.42961582663574, "step_time": 0.37904168891906737} +{"epoch": 0, "iter": 10160, "iter_tflops": 47.418355977600065, "iter_time": 0.4350866470336914, "loss": 0.01403934508562088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.245736666463294, "step_time": 0.394885684967041} +{"epoch": 0, "iter": 10161, "iter_tflops": 16.489946494980916, "iter_time": 1.2511316223144533, "loss": 0.6684349775314331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.408506340995604, "step_time": 1.1851156616210936} +{"epoch": 0, "iter": 10162, "iter_tflops": 17.409973914266267, "iter_time": 1.1850157623291013, "loss": 0.5770204663276672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.24706340042954, "step_time": 0.8874709529876709} +{"epoch": 0, "iter": 10163, "iter_tflops": 43.13791108392207, "iter_time": 0.4782589836120605, "loss": 0.7772951126098633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.3931936422562, "step_time": 0.4447008686065673} +{"epoch": 0, "iter": 10164, "iter_tflops": 50.05427744947136, "iter_time": 0.41217443466186526, "loss": 0.6552273035049438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22517132951151, "step_time": 0.3804707851409912} +{"epoch": 0, "iter": 10165, "iter_tflops": 43.99655055008439, "iter_time": 0.46892525100708, "loss": 0.7970970869064331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86539865429117, "step_time": 0.43102312088012695} +{"epoch": 0, "iter": 10166, "iter_tflops": 45.652977951185804, "iter_time": 0.45191123199462896, "loss": 0.7232039570808411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52731141468021, "step_time": 0.4165599327087402} +{"epoch": 0, "iter": 10167, "iter_tflops": 43.27386164675794, "iter_time": 0.47675646972656255, "loss": 0.6872109174728394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57934173104808, "step_time": 0.4429236812591552} +{"epoch": 0, "iter": 10168, "iter_tflops": 44.64530754934586, "iter_time": 0.4621111297607422, "loss": 0.7812500596046448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10764180023381, "step_time": 0.4288527297973633} +{"epoch": 0, "iter": 10169, "iter_tflops": 31.23370549884468, "iter_time": 0.6605394134521484, "loss": 0.1968967169523239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.21804606656917, "step_time": 0.6210808868408204} +{"epoch": 0, "iter": 10170, "iter_tflops": 34.974159616468704, "iter_time": 0.5898953323364258, "loss": 0.1718747317790985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.46308605397963, "step_time": 0.536386848449707} +{"epoch": 0, "iter": 10171, "iter_tflops": 48.6849535074113, "iter_time": 0.4237673454284668, "loss": 0.12480413168668747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.078656481807755, "step_time": 0.3886890678405761} +{"epoch": 0, "iter": 10172, "iter_tflops": 47.72359776411115, "iter_time": 0.43230381774902343, "loss": 0.19401995837688446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48909424800452, "step_time": 0.4006886081695557} +{"epoch": 0, "iter": 10173, "iter_tflops": 45.584438038207026, "iter_time": 0.4525907173156739, "loss": 0.3515809178352356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.812506267339984, "step_time": 0.41417497444152834} +{"epoch": 0, "iter": 10174, "iter_tflops": 47.350959689959694, "iter_time": 0.43570592117309564, "loss": 0.2993900179862976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59339173916346, "step_time": 0.399878604888916} +{"epoch": 0, "iter": 10175, "iter_tflops": 44.41704203180135, "iter_time": 0.46448598480224607, "loss": 0.390450656414032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69144452442739, "step_time": 0.4325952739715576} +{"epoch": 0, "iter": 10176, "iter_tflops": 45.154695241837636, "iter_time": 0.456898078918457, "loss": 0.2426072359085083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83834634812753, "step_time": 0.4224363651275635} +{"epoch": 0, "iter": 10177, "iter_tflops": 36.46776203409673, "iter_time": 0.40319377517700195, "loss": 0.004803641699254513, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 40.35142446505885, "step_time": 0.36438799476623535} +{"epoch": 0, "iter": 10178, "iter_tflops": 30.07678573332904, "iter_time": 0.48886788558959965, "loss": 0.0031092611607164145, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 33.36412924361927, "step_time": 0.4407000865936279} +{"epoch": 0, "iter": 10179, "iter_tflops": 34.54870928579533, "iter_time": 0.42558969497680665, "loss": 0.036709100008010864, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 38.281536336900636, "step_time": 0.3840905055999756} +{"epoch": 0, "iter": 10180, "iter_tflops": 30.768231812065842, "iter_time": 0.47788169097900385, "loss": 0.011348571628332138, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 34.150650312816616, "step_time": 0.43055035591125496} +{"epoch": 0, "iter": 10181, "iter_tflops": 16.74506386196038, "iter_time": 1.2320701599121093, "loss": 0.640309751033783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.24331988807325, "step_time": 1.1308848190307617} +{"epoch": 0, "iter": 10182, "iter_tflops": 20.24518268963566, "iter_time": 1.0190618591308596, "loss": 0.588728666305542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.95734469455642, "step_time": 0.8986707210540772} +{"epoch": 0, "iter": 10183, "iter_tflops": 45.989819559677834, "iter_time": 0.4486013145446778, "loss": 0.7807493805885315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.08286015692907, "step_time": 0.4119392032623291} +{"epoch": 0, "iter": 10184, "iter_tflops": 43.43995095367298, "iter_time": 0.47493362808227546, "loss": 0.6987648010253906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.558821687441394, "step_time": 0.44311889266967774} +{"epoch": 0, "iter": 10185, "iter_tflops": 44.618601138486284, "iter_time": 0.46238772583007814, "loss": 0.4739387333393097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71457019238222, "step_time": 0.42350971031188966} +{"epoch": 0, "iter": 10186, "iter_tflops": 35.191646750238824, "iter_time": 0.5862497329711914, "loss": 0.32589200139045715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42145124133977, "step_time": 0.536968095779419} +{"epoch": 0, "iter": 10187, "iter_tflops": 39.98483990067123, "iter_time": 0.5159728927612305, "loss": 0.4293115437030792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.732293364117766, "step_time": 0.47175878334045407} +{"epoch": 0, "iter": 10188, "iter_tflops": 40.39512642819215, "iter_time": 0.5107322425842284, "loss": 0.32658320665359497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.966395931558495, "step_time": 0.46924686622619627} +{"epoch": 0, "iter": 10189, "iter_tflops": 24.507763081348397, "iter_time": 0.8418187103271484, "loss": 0.0060048410668969154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.322767770458565, "step_time": 0.7837737159729005} +{"epoch": 0, "iter": 10190, "iter_tflops": 18.097843357323406, "iter_time": 1.1399752502441407, "loss": 0.005487933289259672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.568560310593913, "step_time": 0.8753650302886964} +{"epoch": 0, "iter": 10191, "iter_tflops": 47.515450419449394, "iter_time": 0.4341975784301758, "loss": 0.009163947775959969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.675076530804205, "step_time": 0.39166708183288573} +{"epoch": 0, "iter": 10192, "iter_tflops": 49.87723548813424, "iter_time": 0.41363747024536135, "loss": 0.009963667020201683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.06135862937181, "step_time": 0.3746927795410156} +{"epoch": 0, "iter": 10193, "iter_tflops": 13.006693727988138, "iter_time": 1.5861904602050783, "loss": 0.8642650246620178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.846278464524698, "step_time": 1.490010009765625} +{"epoch": 0, "iter": 10194, "iter_tflops": 18.265385012812548, "iter_time": 1.1295186767578125, "loss": 0.62680983543396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.718701802968287, "step_time": 0.9499229602813721} +{"epoch": 0, "iter": 10195, "iter_tflops": 34.404033114045454, "iter_time": 0.599670783996582, "loss": 0.7835801243782043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32667220941104, "step_time": 0.5527171936035156} +{"epoch": 0, "iter": 10196, "iter_tflops": 39.393506256135964, "iter_time": 0.5237181320190429, "loss": 0.5911948680877686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.694822384849516, "step_time": 0.4832223758697509} +{"epoch": 0, "iter": 10197, "iter_tflops": 20.14645110216968, "iter_time": 1.0240559692382811, "loss": 0.6221774816513062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.49969038415828, "step_time": 0.9595995635986329} +{"epoch": 0, "iter": 10198, "iter_tflops": 17.694105070365705, "iter_time": 1.165986831665039, "loss": 0.7160335779190063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.70368002292746, "step_time": 0.9505804309844972} +{"epoch": 0, "iter": 10199, "iter_tflops": 38.64427612695431, "iter_time": 0.5338719100952148, "loss": 0.8488444685935974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22857961731004, "step_time": 0.4885576000213623} +{"epoch": 0, "iter": 10200, "iter_tflops": 35.25857759052764, "iter_time": 0.5851368637084962, "loss": 0.7297731041908264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.45860771422232, "step_time": 0.5364493083953857} +{"epoch": 0, "iter": 10201, "iter_tflops": 21.02724260681309, "iter_time": 0.7361913681030273, "loss": 0.025001874193549156, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 22.818342016112137, "step_time": 0.6784048767089844} +{"epoch": 0, "iter": 10202, "iter_tflops": 5.159120528975623, "iter_time": 3.000525848388672, "loss": 0.01809466816484928, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 5.96106960699706, "step_time": 2.596861892700195} +{"epoch": 0, "iter": 10203, "iter_tflops": 10.361364957375754, "iter_time": 1.494018844604492, "loss": 0.024418199434876442, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.409053641081922, "step_time": 1.07432971572876} +{"epoch": 0, "iter": 10204, "iter_tflops": 29.11215401351248, "iter_time": 0.5317392349243164, "loss": 0.05502207204699516, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 31.993708324445446, "step_time": 0.48384745979309085} +{"epoch": 0, "iter": 10205, "iter_tflops": 15.190733581547917, "iter_time": 1.1676486511230468, "loss": 0.4043521285057068, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.29856383286087, "step_time": 1.0882823638916017} +{"epoch": 0, "iter": 10206, "iter_tflops": 20.177251202949613, "iter_time": 0.8790810699462891, "loss": 0.4192694425582886, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 23.994067375084228, "step_time": 0.7392427177429199} +{"epoch": 0, "iter": 10207, "iter_tflops": 31.261940973105062, "iter_time": 0.5673812637329102, "loss": 0.4024643301963806, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 33.19717937146603, "step_time": 0.53430562210083} +{"epoch": 0, "iter": 10208, "iter_tflops": 30.72186418016826, "iter_time": 0.5773555755615234, "loss": 0.328078031539917, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 32.70037235964878, "step_time": 0.542423168182373} +{"epoch": 0, "iter": 10209, "iter_tflops": 30.351571779197727, "iter_time": 0.6797372360229492, "loss": 0.5339614152908325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.299099251548085, "step_time": 0.6387513580322265} +{"epoch": 0, "iter": 10210, "iter_tflops": 13.462762340367448, "iter_time": 1.5324561920166015, "loss": 0.46853479743003845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.505798826589555, "step_time": 1.2499300231933594} +{"epoch": 0, "iter": 10211, "iter_tflops": 34.83457130584266, "iter_time": 0.592259147644043, "loss": 0.5172591805458069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.162510936004466, "step_time": 0.5406115322113038} +{"epoch": 0, "iter": 10212, "iter_tflops": 35.241369320552195, "iter_time": 0.5854225845336914, "loss": 0.4496995806694031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.269027712418506, "step_time": 0.5391068115234374} +{"epoch": 0, "iter": 10213, "iter_tflops": 19.388505282588703, "iter_time": 1.064088912963867, "loss": 0.613972544670105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.76826511888474, "step_time": 0.9933951339721679} +{"epoch": 0, "iter": 10214, "iter_tflops": 27.103932706824832, "iter_time": 0.7611845016479492, "loss": 0.6969607472419739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.073924782227856, "step_time": 0.6860126724243164} +{"epoch": 0, "iter": 10215, "iter_tflops": 34.670084827380286, "iter_time": 0.5950690231323243, "loss": 0.6162508130073547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82912455538438, "step_time": 0.5453759174346924} +{"epoch": 0, "iter": 10216, "iter_tflops": 40.11589860075673, "iter_time": 0.5142872085571288, "loss": 0.6663702726364136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.368666154165325, "step_time": 0.4757142734527588} +{"epoch": 0, "iter": 10217, "iter_tflops": 8.892364142004702, "iter_time": 1.0159856872558592, "loss": 0.00988141167908907, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 9.38659880224763, "step_time": 0.9624907684326173} +{"epoch": 0, "iter": 10218, "iter_tflops": 9.832737366833957, "iter_time": 0.9188198928833008, "loss": 0.0065014478750526905, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 11.35463922618267, "step_time": 0.7956672611236572} +{"epoch": 0, "iter": 10219, "iter_tflops": 25.472532603729586, "iter_time": 0.3546767349243164, "loss": 0.017583105713129044, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 28.174032428286928, "step_time": 0.32066814422607426} +{"epoch": 0, "iter": 10220, "iter_tflops": 25.57833911253658, "iter_time": 0.3532095909118652, "loss": 0.0012472759699448943, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 28.186729515793385, "step_time": 0.3205236949920654} +{"epoch": 0, "iter": 10221, "iter_tflops": 33.80288867936433, "iter_time": 0.6103352203369141, "loss": 0.14394958317279816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.02231801405103, "step_time": 0.5727308692932129} +{"epoch": 0, "iter": 10222, "iter_tflops": 14.060074927906806, "iter_time": 1.46735302734375, "loss": 0.09395086020231247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.442209436096963, "step_time": 1.118688819885254} +{"epoch": 0, "iter": 10223, "iter_tflops": 41.761974998192834, "iter_time": 0.49401623153686525, "loss": 0.04303542152047157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.795986155750896, "step_time": 0.45050003814697265} +{"epoch": 0, "iter": 10224, "iter_tflops": 45.25803294761485, "iter_time": 0.4558548431396484, "loss": 0.045654598623514175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.726272630993996, "step_time": 0.4148932228088379} +{"epoch": 0, "iter": 10225, "iter_tflops": 15.16067244087839, "iter_time": 1.3608297119140627, "loss": 0.29931414127349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.139441753723393, "step_time": 1.2783027954101562} +{"epoch": 0, "iter": 10226, "iter_tflops": 17.023878457355917, "iter_time": 1.2118914947509765, "loss": 0.4137590527534485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.856614485615122, "step_time": 0.9891870765686035} +{"epoch": 0, "iter": 10227, "iter_tflops": 46.59442465226981, "iter_time": 0.44278030395507806, "loss": 0.23012496531009674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.54472201824625, "step_time": 0.4081750316619873} +{"epoch": 0, "iter": 10228, "iter_tflops": 45.01725539760378, "iter_time": 0.4582930107116699, "loss": 0.3467617630958557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75944452226316, "step_time": 0.4231199455261231} +{"epoch": 0, "iter": 10229, "iter_tflops": 22.33168934090578, "iter_time": 0.9238483123779299, "loss": 0.7364405989646912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.349443721887347, "step_time": 0.8835796585083008} +{"epoch": 0, "iter": 10230, "iter_tflops": 15.400313280949652, "iter_time": 1.3396541442871093, "loss": 0.6376814246177673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.603299165147757, "step_time": 0.9549973526000977} +{"epoch": 0, "iter": 10231, "iter_tflops": 48.26689685915168, "iter_time": 0.42743774414062496, "loss": 0.6473441123962402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.189023776612814, "step_time": 0.39531480026245114} +{"epoch": 0, "iter": 10232, "iter_tflops": 51.74968474898346, "iter_time": 0.39867090225219726, "loss": 0.6051418781280518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.914705232017894, "step_time": 0.3689743766784668} +{"epoch": 0, "iter": 10233, "iter_tflops": 34.4088464095979, "iter_time": 0.599586898803711, "loss": 0.827851414680481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.774228443517345, "step_time": 0.5610204315185547} +{"epoch": 0, "iter": 10234, "iter_tflops": 10.074470964510116, "iter_time": 2.0478587493896483, "loss": 0.5736692547798157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.71150390466018, "step_time": 1.7616092414855955} +{"epoch": 0, "iter": 10235, "iter_tflops": 14.543358972646825, "iter_time": 1.418592056274414, "loss": 0.8668508529663086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.972407450961757, "step_time": 1.215566711425781} +{"epoch": 0, "iter": 10236, "iter_tflops": 39.708928440671755, "iter_time": 0.5195580520629882, "loss": 0.6169605255126953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36025753073039, "step_time": 0.47580652618408203} +{"epoch": 0, "iter": 10237, "iter_tflops": 14.671153152694375, "iter_time": 1.0244807739257813, "loss": 0.30858150124549866, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.66067490117425, "step_time": 0.9597488250732422} +{"epoch": 0, "iter": 10238, "iter_tflops": 10.457202302166351, "iter_time": 1.4373169708251954, "loss": 0.3581301271915436, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.237735999386933, "step_time": 1.1354142684936521} +{"epoch": 0, "iter": 10239, "iter_tflops": 22.858003323739187, "iter_time": 0.6575514984130859, "loss": 0.23362189531326294, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.646070103619174, "step_time": 0.6098462867736816} +{"epoch": 0, "iter": 10240, "iter_tflops": 22.777248585461294, "iter_time": 0.6598827896118165, "loss": 0.3168242573738098, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.552162743699743, "step_time": 0.6121788330078125} +{"epoch": 0, "iter": 10241, "iter_tflops": 16.151308624565008, "iter_time": 1.277363586425781, "loss": 0.5433973670005798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.368757408098848, "step_time": 1.1878278350830078} +{"epoch": 0, "iter": 10242, "iter_tflops": 15.90061569734265, "iter_time": 1.2975028076171873, "loss": 0.6862659454345703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.13344304646669, "step_time": 0.9321230983734131} +{"epoch": 0, "iter": 10243, "iter_tflops": 39.84798251014349, "iter_time": 0.5177449951171874, "loss": 0.6815184950828552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94167834960224, "step_time": 0.480444507598877} +{"epoch": 0, "iter": 10244, "iter_tflops": 48.32164920261888, "iter_time": 0.42695342254638674, "loss": 0.6561105251312256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.2353478197546, "step_time": 0.3949642219543458} +{"epoch": 0, "iter": 10245, "iter_tflops": 21.062972837055018, "iter_time": 0.9794958038330076, "loss": 0.007142570335417986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.91237833994834, "step_time": 0.9415268936157227} +{"epoch": 0, "iter": 10246, "iter_tflops": 14.911808383663407, "iter_time": 1.3835406799316405, "loss": 0.0047017731703817844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.026054275456545, "step_time": 0.9366676959991455} +{"epoch": 0, "iter": 10247, "iter_tflops": 46.06855494058678, "iter_time": 0.4478346138000488, "loss": 0.00295756827108562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86364253086311, "step_time": 0.4056157302856445} +{"epoch": 0, "iter": 10248, "iter_tflops": 45.90990967961781, "iter_time": 0.44938214111328123, "loss": 0.02312341146171093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.51986167247286, "step_time": 0.4083758907318115} +{"epoch": 0, "iter": 10249, "iter_tflops": 18.635842994185072, "iter_time": 1.0670192260742186, "loss": 0.03576349467039108, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 20.141307168457924, "step_time": 0.9872647590637208} +{"epoch": 0, "iter": 10250, "iter_tflops": 20.203143781920044, "iter_time": 0.9842429962158203, "loss": 0.022391708567738533, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 24.788279449869055, "step_time": 0.8021856784820557} +{"epoch": 0, "iter": 10251, "iter_tflops": 42.045020577543134, "iter_time": 0.472940731048584, "loss": 0.061365239322185516, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 46.669818410683156, "step_time": 0.42607414054870607} +{"epoch": 0, "iter": 10252, "iter_tflops": 45.51006926185419, "iter_time": 0.43693193817138676, "loss": 0.042762283235788345, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 50.08699446856573, "step_time": 0.39700531005859374} +{"epoch": 0, "iter": 10253, "iter_tflops": 22.649416367097515, "iter_time": 0.9108885269165039, "loss": 0.48178455233573914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.249056777462886, "step_time": 0.8507998352050782} +{"epoch": 0, "iter": 10254, "iter_tflops": 10.987289064298121, "iter_time": 1.8777237396240236, "loss": 0.4603565037250519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.472204222506578, "step_time": 1.6541657867431643} +{"epoch": 0, "iter": 10255, "iter_tflops": 10.198756089388525, "iter_time": 2.022902923583984, "loss": 0.37581440806388855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.929241570499583, "step_time": 1.7294555892944334} +{"epoch": 0, "iter": 10256, "iter_tflops": 29.576876516016853, "iter_time": 0.6975413208007814, "loss": 0.3946993350982666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.39500707519808, "step_time": 0.566866039276123} +{"epoch": 0, "iter": 10257, "iter_tflops": 13.443241425813591, "iter_time": 1.408397964477539, "loss": 0.39919841289520264, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 14.477082188804582, "step_time": 1.3078211212158204} +{"epoch": 0, "iter": 10258, "iter_tflops": 17.07911111946953, "iter_time": 1.108572555541992, "loss": 0.41760438680648804, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 22.05785775385717, "step_time": 0.8583532485961914} +{"epoch": 0, "iter": 10259, "iter_tflops": 34.197196400941195, "iter_time": 0.5536545639038086, "loss": 0.38603582978248596, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 36.419451540446325, "step_time": 0.5198714714050293} +{"epoch": 0, "iter": 10260, "iter_tflops": 32.66787913582087, "iter_time": 0.5795734024047852, "loss": 0.4687477648258209, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 34.73735197991251, "step_time": 0.5450453987121582} +{"epoch": 0, "iter": 10261, "iter_tflops": 24.540532391775095, "iter_time": 0.8406946182250977, "loss": 0.21013674139976501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.93560999997713, "step_time": 0.7954736175537109} +{"epoch": 0, "iter": 10262, "iter_tflops": 20.99238525387931, "iter_time": 0.9827893905639649, "loss": 0.22009333968162537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.345106064969034, "step_time": 0.8140069904327392} +{"epoch": 0, "iter": 10263, "iter_tflops": 48.31065099554915, "iter_time": 0.42705062103271485, "loss": 0.16717302799224854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.341940350840964, "step_time": 0.39415989112854} +{"epoch": 0, "iter": 10264, "iter_tflops": 46.49562882161939, "iter_time": 0.44372114181518557, "loss": 0.15564942359924316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30940984960896, "step_time": 0.4100841884613037} +{"epoch": 0, "iter": 10265, "iter_tflops": 35.38044917792182, "iter_time": 0.5831212997436523, "loss": 0.40208250284194946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.95692406606525, "step_time": 0.5435396575927735} +{"epoch": 0, "iter": 10266, "iter_tflops": 15.309756562668541, "iter_time": 1.347578155517578, "loss": 0.22917385399341583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.634839011383566, "step_time": 1.1699054069519044} +{"epoch": 0, "iter": 10267, "iter_tflops": 37.18313125306283, "iter_time": 0.5548508911132812, "loss": 0.3275235593318939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.932398001110016, "step_time": 0.5040284595489501} +{"epoch": 0, "iter": 10268, "iter_tflops": 41.71110681711136, "iter_time": 0.49461870193481444, "loss": 0.29429879784584045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73977713597725, "step_time": 0.4510536518096923} +{"epoch": 0, "iter": 10269, "iter_tflops": 23.56153128002336, "iter_time": 0.8756261749267578, "loss": 0.20014800131320953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.141430620313894, "step_time": 0.8206014137268067} +{"epoch": 0, "iter": 10270, "iter_tflops": 7.374538638579252, "iter_time": 2.7976114196777346, "loss": 0.18330086767673492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.210676365661996, "step_time": 2.020541320800781} +{"epoch": 0, "iter": 10271, "iter_tflops": 17.56071904276005, "iter_time": 1.1748433227539061, "loss": 0.18752890825271606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.651667374188825, "step_time": 1.0498393402099608} +{"epoch": 0, "iter": 10272, "iter_tflops": 49.07427847348343, "iter_time": 0.42040543746948245, "loss": 0.1791355311870575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.093086201253435, "step_time": 0.3885834293365479} +{"epoch": 0, "iter": 10273, "iter_tflops": 18.74009422056543, "iter_time": 0.8020404891967774, "loss": 0.3526313304901123, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 19.841403404293228, "step_time": 0.7575227432250977} +{"epoch": 0, "iter": 10274, "iter_tflops": 9.601906857230727, "iter_time": 1.5653468170166016, "loss": 0.36555394530296326, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 11.285610791341844, "step_time": 1.3318122177124023} +{"epoch": 0, "iter": 10275, "iter_tflops": 25.639107537535274, "iter_time": 0.5862261123657226, "loss": 0.2950179874897003, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.27534751997066, "step_time": 0.5510585823059082} +{"epoch": 0, "iter": 10276, "iter_tflops": 26.930313408982766, "iter_time": 0.5581188049316407, "loss": 0.4425945580005646, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.643130885097932, "step_time": 0.5247441139221192} +{"epoch": 0, "iter": 10277, "iter_tflops": 45.04892533568135, "iter_time": 0.4579708251953125, "loss": 0.41008976101875305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13225588164523, "step_time": 0.41990934753417963} +{"epoch": 0, "iter": 10278, "iter_tflops": 8.487656453142428, "iter_time": 2.4307173156738284, "loss": 0.4921301305294037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.624725897712374, "step_time": 1.9418000717163086} +{"epoch": 0, "iter": 10279, "iter_tflops": 15.662086137655818, "iter_time": 1.317263442993164, "loss": 0.6017374396324158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.274719053320734, "step_time": 1.1289417610168455} +{"epoch": 0, "iter": 10280, "iter_tflops": 32.62585891294599, "iter_time": 0.6323540344238281, "loss": 0.5880155563354492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4531159190563, "step_time": 0.49769705009460447} +{"epoch": 0, "iter": 10281, "iter_tflops": 23.272354366701798, "iter_time": 0.6598969650268555, "loss": 0.41652917861938477, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.855657829097193, "step_time": 0.6178615798950196} +{"epoch": 0, "iter": 10282, "iter_tflops": 19.311575741386946, "iter_time": 0.7952409591674805, "loss": 0.3674130141735077, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 20.90310446826762, "step_time": 0.7346925926208496} +{"epoch": 0, "iter": 10283, "iter_tflops": 26.590102522739883, "iter_time": 0.5775591125488282, "loss": 0.2995685935020447, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.343771703427404, "step_time": 0.5418247146606445} +{"epoch": 0, "iter": 10284, "iter_tflops": 27.324448658688734, "iter_time": 0.5620371780395508, "loss": 0.34755977988243103, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.076442648963404, "step_time": 0.5281717643737792} +{"epoch": 0, "iter": 10285, "iter_tflops": 31.80059039507769, "iter_time": 0.6487644805908204, "loss": 0.643333911895752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.86605267544834, "step_time": 0.6091968765258788} +{"epoch": 0, "iter": 10286, "iter_tflops": 11.411464171040363, "iter_time": 1.8079269409179688, "loss": 0.5545969605445862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.707532391950926, "step_time": 1.4027569656372068} +{"epoch": 0, "iter": 10287, "iter_tflops": 10.406103322330635, "iter_time": 1.9825954895019533, "loss": 0.6850066781044006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.324682498010977, "step_time": 1.5483365936279296} +{"epoch": 0, "iter": 10288, "iter_tflops": 29.320370477156104, "iter_time": 0.7036436843872069, "loss": 0.573705792427063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.80743532957192, "step_time": 0.6288542003631592} +{"epoch": 0, "iter": 10289, "iter_tflops": 16.960047347143867, "iter_time": 0.9127376937866211, "loss": 0.3573383092880249, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 18.098719761777986, "step_time": 0.855313232421875} +{"epoch": 0, "iter": 10290, "iter_tflops": 6.938139560359304, "iter_time": 2.2311564025878905, "loss": 0.5424304008483887, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 7.869859186003612, "step_time": 1.9670078125} +{"epoch": 0, "iter": 10291, "iter_tflops": 10.315649692595402, "iter_time": 1.5006398010253905, "loss": 0.41064488887786865, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.859382249110844, "step_time": 1.2037961235046386} +{"epoch": 0, "iter": 10292, "iter_tflops": 23.392312831373545, "iter_time": 0.6617590408325195, "loss": 0.5457818508148193, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.158528929825824, "step_time": 0.6153012580871582} +{"epoch": 0, "iter": 10293, "iter_tflops": 17.772563294419037, "iter_time": 0.9147964477539062, "loss": 0.49834707379341125, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 19.09318294385388, "step_time": 0.8515226516723634} +{"epoch": 0, "iter": 10294, "iter_tflops": 7.139461819737282, "iter_time": 2.2772413635253907, "loss": 0.3446978032588959, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 8.252289279860912, "step_time": 1.9701536407470703} +{"epoch": 0, "iter": 10295, "iter_tflops": 8.209960822692858, "iter_time": 1.9803112487792969, "loss": 0.4110353887081146, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 9.489665608063081, "step_time": 1.7132613983154297} +{"epoch": 0, "iter": 10296, "iter_tflops": 27.88610586944728, "iter_time": 0.5830243148803711, "loss": 0.3451063930988312, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 29.690059475301236, "step_time": 0.547600040435791} +{"epoch": 0, "iter": 10297, "iter_tflops": 17.98603848027772, "iter_time": 0.8629465637207032, "loss": 0.4524689018726349, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 18.891232575327425, "step_time": 0.8215975341796875} +{"epoch": 0, "iter": 10298, "iter_tflops": 9.105664554875466, "iter_time": 1.7045422668457033, "loss": 0.39853912591934204, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 12.12052437939472, "step_time": 1.280554340362549} +{"epoch": 0, "iter": 10299, "iter_tflops": 22.624961241554885, "iter_time": 0.6860117874145507, "loss": 0.3778161108493805, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.430823164714948, "step_time": 0.6353036079406739} +{"epoch": 0, "iter": 10300, "iter_tflops": 24.159184020436502, "iter_time": 0.6424467849731446, "loss": 0.3715701699256897, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.927538442975525, "step_time": 0.5986295280456544} +{"epoch": 0, "iter": 10301, "iter_tflops": 16.330971097312332, "iter_time": 1.2633108825683592, "loss": 0.10261990875005722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.450342659778574, "step_time": 1.1822744064331052} +{"epoch": 0, "iter": 10302, "iter_tflops": 22.598231531098328, "iter_time": 0.9129516830444336, "loss": 0.1633863002061844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.593622975306324, "step_time": 0.747676139831543} +{"epoch": 0, "iter": 10303, "iter_tflops": 41.1387703952384, "iter_time": 0.5015000038146972, "loss": 0.17347264289855957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03839730777452, "step_time": 0.45807787895202645} +{"epoch": 0, "iter": 10304, "iter_tflops": 43.951484578446056, "iter_time": 0.46940606689453124, "loss": 0.13262735307216644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.056044926843576, "step_time": 0.4293131809234619} +{"epoch": 0, "iter": 10305, "iter_tflops": 20.101228040569563, "iter_time": 1.0263598556518556, "loss": 0.6603021621704102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.11863472413602, "step_time": 0.9327471504211426} +{"epoch": 0, "iter": 10306, "iter_tflops": 26.72661479927114, "iter_time": 0.7719306640624999, "loss": 0.6353406310081482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.07020970170377, "step_time": 0.5419222450256348} +{"epoch": 0, "iter": 10307, "iter_tflops": 47.65565747640997, "iter_time": 0.4329201316833496, "loss": 0.7436997294425964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.756301049570084, "step_time": 0.39861993789672845} +{"epoch": 0, "iter": 10308, "iter_tflops": 50.07236992972884, "iter_time": 0.41202550506591795, "loss": 0.49290144443511963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.25428935842536, "step_time": 0.3802665882110596} +{"epoch": 0, "iter": 10309, "iter_tflops": 27.645601816912777, "iter_time": 0.7462703704833984, "loss": 0.07157360762357712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.144083000771104, "step_time": 0.70789990234375} +{"epoch": 0, "iter": 10310, "iter_tflops": 15.656945697682094, "iter_time": 1.3176959228515626, "loss": 0.07704153656959534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.78948755650509, "step_time": 1.0980125694274903} +{"epoch": 0, "iter": 10311, "iter_tflops": 41.24787431756099, "iter_time": 0.5001734962463379, "loss": 0.059480875730514526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.548170398904794, "step_time": 0.4529510917663574} +{"epoch": 0, "iter": 10312, "iter_tflops": 38.09462499707116, "iter_time": 0.5415749206542969, "loss": 0.05178908631205559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.91906931364157, "step_time": 0.4921648750305176} +{"epoch": 0, "iter": 10313, "iter_tflops": 17.864168363299257, "iter_time": 1.1548868713378908, "loss": 0.7946034669876099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.397920917133792, "step_time": 1.063572410583496} +{"epoch": 0, "iter": 10314, "iter_tflops": 12.903602151549373, "iter_time": 1.59886311340332, "loss": 0.820227861404419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.431708074692747, "step_time": 1.2555659713745118} +{"epoch": 0, "iter": 10315, "iter_tflops": 45.13612158706542, "iter_time": 0.4570860939025879, "loss": 0.5866914391517639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25048338655656, "step_time": 0.41890134048461913} +{"epoch": 0, "iter": 10316, "iter_tflops": 47.48813882219039, "iter_time": 0.4344472961425781, "loss": 0.666810154914856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49286779584233, "step_time": 0.4006592445373535} +{"epoch": 0, "iter": 10317, "iter_tflops": 27.68473086842023, "iter_time": 0.7452156066894531, "loss": 0.7362158298492432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.203427872604102, "step_time": 0.7064613647460938} +{"epoch": 0, "iter": 10318, "iter_tflops": 16.068425994430402, "iter_time": 1.2839523620605469, "loss": 0.7904480695724487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.31406166781066, "step_time": 1.0156065216064454} +{"epoch": 0, "iter": 10319, "iter_tflops": 43.6566943541322, "iter_time": 0.4725757141113281, "loss": 0.7252160906791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.044796989198254, "step_time": 0.4385414505004883} +{"epoch": 0, "iter": 10320, "iter_tflops": 45.342631569151756, "iter_time": 0.4550043258666992, "loss": 0.7861800193786621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89405990587677, "step_time": 0.4219550094604493} +{"epoch": 0, "iter": 10321, "iter_tflops": 49.19581458715473, "iter_time": 0.4193668441772461, "loss": 0.022761095315217972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.263184288262416, "step_time": 0.38020425415039066} +{"epoch": 0, "iter": 10322, "iter_tflops": 38.313906322266526, "iter_time": 0.5384753341674804, "loss": 0.022622425109148026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.291734286839656, "step_time": 0.47655964469909673} +{"epoch": 0, "iter": 10323, "iter_tflops": 47.47616012002891, "iter_time": 0.43455691146850584, "loss": 0.07969485968351364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32671482409085, "step_time": 0.3942745800018311} +{"epoch": 0, "iter": 10324, "iter_tflops": 40.77663422090989, "iter_time": 0.5059538116455078, "loss": 0.08026625216007233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87202395706194, "step_time": 0.4597763080596924} +{"epoch": 0, "iter": 10325, "iter_tflops": 27.382533657822922, "iter_time": 0.7534399032592773, "loss": 0.02785135805606842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.64226233177669, "step_time": 0.6960026626586915} +{"epoch": 0, "iter": 10326, "iter_tflops": 10.91079905253016, "iter_time": 1.8908874969482423, "loss": 0.045944832265377045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.294130693441277, "step_time": 1.678125442504883} +{"epoch": 0, "iter": 10327, "iter_tflops": 17.157870649465053, "iter_time": 1.2024273834228516, "loss": 0.055825695395469666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.890554396571098, "step_time": 0.9875799903869629} +{"epoch": 0, "iter": 10328, "iter_tflops": 49.85138951647157, "iter_time": 0.41385192489624023, "loss": 0.04126778244972229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.28287258846835, "step_time": 0.38006635475158695} +{"epoch": 0, "iter": 10329, "iter_tflops": 28.037400331333124, "iter_time": 0.6018336868286133, "loss": 0.44009512662887573, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 30.162925309996293, "step_time": 0.5594235916137695} +{"epoch": 0, "iter": 10330, "iter_tflops": 27.420057214299078, "iter_time": 0.615383544921875, "loss": 0.31499508023262024, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 30.781984131437426, "step_time": 0.5481729812622069} +{"epoch": 0, "iter": 10331, "iter_tflops": 28.97670250979669, "iter_time": 0.5823247833251952, "loss": 0.2301347404718399, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 30.782921755700592, "step_time": 0.5481562843322754} +{"epoch": 0, "iter": 10332, "iter_tflops": 29.895651028222666, "iter_time": 0.5644249725341797, "loss": 0.4147634208202362, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 31.840822297263387, "step_time": 0.529943977355957} +{"epoch": 0, "iter": 10333, "iter_tflops": 40.65131778962555, "iter_time": 0.5075135231018066, "loss": 0.1980343610048294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77471215306687, "step_time": 0.4607755699157715} +{"epoch": 0, "iter": 10334, "iter_tflops": 45.79001967486414, "iter_time": 0.4505587387084961, "loss": 0.22929053008556366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.92265654653819, "step_time": 0.40514566421508785} +{"epoch": 0, "iter": 10335, "iter_tflops": 50.56389625364076, "iter_time": 0.40802024841308593, "loss": 0.1997777670621872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.904761484466476, "step_time": 0.3757614631652832} +{"epoch": 0, "iter": 10336, "iter_tflops": 50.320219223277924, "iter_time": 0.40999609756469724, "loss": 0.2445199191570282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.56769314204804, "step_time": 0.3780825672149658} +{"epoch": 0, "iter": 10337, "iter_tflops": 42.75318105058426, "iter_time": 0.48256277084350585, "loss": 0.544937789440155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45212145351557, "step_time": 0.44413673400878906} +{"epoch": 0, "iter": 10338, "iter_tflops": 45.61168342450363, "iter_time": 0.4523203697204589, "loss": 0.5196283459663391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.215138090124725, "step_time": 0.4108540630340576} +{"epoch": 0, "iter": 10339, "iter_tflops": 47.04042892928977, "iter_time": 0.4385821723937989, "loss": 0.4621841013431549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82017546988113, "step_time": 0.40596265792846675} +{"epoch": 0, "iter": 10340, "iter_tflops": 48.72980904850252, "iter_time": 0.4233772697448731, "loss": 0.5817632079124451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.02391034217436, "step_time": 0.38909038162231446} +{"epoch": 0, "iter": 10341, "iter_tflops": 48.6340066492957, "iter_time": 0.4242112655639648, "loss": 0.13918651640415192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.412380767209804, "step_time": 0.38626051139831546} +{"epoch": 0, "iter": 10342, "iter_tflops": 40.33232194081101, "iter_time": 0.5115275421142578, "loss": 0.13686995208263397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.06735280964947, "step_time": 0.4681718368530274} +{"epoch": 0, "iter": 10343, "iter_tflops": 46.20423416557663, "iter_time": 0.4465195426940918, "loss": 0.2051074355840683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.31728266783762, "step_time": 0.4100200252532959} +{"epoch": 0, "iter": 10344, "iter_tflops": 49.28080065086924, "iter_time": 0.4186436347961426, "loss": 0.13216517865657806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.743248797452985, "step_time": 0.38388251495361336} +{"epoch": 0, "iter": 10345, "iter_tflops": 43.19702394701665, "iter_time": 0.4776045112609863, "loss": 0.7411505579948425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08412105915357, "step_time": 0.4381751861572266} +{"epoch": 0, "iter": 10346, "iter_tflops": 45.707244053234284, "iter_time": 0.451374698638916, "loss": 0.8116165399551392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21614028752132, "step_time": 0.41084586334228523} +{"epoch": 0, "iter": 10347, "iter_tflops": 49.10870459296522, "iter_time": 0.42011072540283206, "loss": 0.776788592338562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.40510886387753, "step_time": 0.3863131065368653} +{"epoch": 0, "iter": 10348, "iter_tflops": 42.933962582144915, "iter_time": 0.48053084945678715, "loss": 0.6147359013557434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.172962836486505, "step_time": 0.4468219547271729} +{"epoch": 0, "iter": 10349, "iter_tflops": 32.996489393728815, "iter_time": 0.6252511672973633, "loss": 0.667955219745636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.348885523627814, "step_time": 0.5836419792175294} +{"epoch": 0, "iter": 10350, "iter_tflops": 13.269878876481714, "iter_time": 1.5547311096191405, "loss": 0.6550570130348206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.602978013820657, "step_time": 1.412800422668457} +{"epoch": 0, "iter": 10351, "iter_tflops": 36.48975806580217, "iter_time": 0.5653940887451172, "loss": 0.7572125196456909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19597417703799, "step_time": 0.5132626819610595} +{"epoch": 0, "iter": 10352, "iter_tflops": 39.75016792962957, "iter_time": 0.5190190277099609, "loss": 0.676454484462738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.499052064036505, "step_time": 0.4742883472442627} +{"epoch": 0, "iter": 10353, "iter_tflops": 34.316360978729385, "iter_time": 0.6012028350830079, "loss": 0.15051274001598358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.55850942942917, "step_time": 0.5493054389953613} +{"epoch": 0, "iter": 10354, "iter_tflops": 39.413457354475845, "iter_time": 0.5234530258178711, "loss": 0.20206159353256226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45158886014191, "step_time": 0.46412499618530273} +{"epoch": 0, "iter": 10355, "iter_tflops": 46.48819511211667, "iter_time": 0.44379209518432616, "loss": 0.20826183259487152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.477718198711, "step_time": 0.4087168407440185} +{"epoch": 0, "iter": 10356, "iter_tflops": 48.643619504694826, "iter_time": 0.42412743377685547, "loss": 0.20254409313201904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.12694977134002, "step_time": 0.3883357429504395} +{"epoch": 0, "iter": 10357, "iter_tflops": 22.036768139827164, "iter_time": 0.8948303070068359, "loss": 0.47053882479667664, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 23.180761815693383, "step_time": 0.8506695404052733} +{"epoch": 0, "iter": 10358, "iter_tflops": 20.555279296846997, "iter_time": 0.9593237686157225, "loss": 0.4507373869419098, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 27.565111675190565, "step_time": 0.7153668823242189} +{"epoch": 0, "iter": 10359, "iter_tflops": 32.65938350740997, "iter_time": 0.6037826156616211, "loss": 0.2054109275341034, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 34.698745992054604, "step_time": 0.5682962722778321} +{"epoch": 0, "iter": 10360, "iter_tflops": 32.915712152049366, "iter_time": 0.5990807037353516, "loss": 0.38272470235824585, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 35.00114864308755, "step_time": 0.5633863105773925} +{"epoch": 0, "iter": 10361, "iter_tflops": 24.985670242912118, "iter_time": 0.8257170333862304, "loss": 0.0016526005929335952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.28127026254591, "step_time": 0.7850112762451171} +{"epoch": 0, "iter": 10362, "iter_tflops": 16.692158091495063, "iter_time": 1.2359752044677734, "loss": 0.008850839920341969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.005351841278426, "step_time": 0.9375489044189452} +{"epoch": 0, "iter": 10363, "iter_tflops": 43.63971830765058, "iter_time": 0.4727595481872559, "loss": 0.0010105324909090996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.543468399820426, "step_time": 0.4250024604797363} +{"epoch": 0, "iter": 10364, "iter_tflops": 47.04309181486487, "iter_time": 0.4385573463439941, "loss": 0.0009559881291352212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98853465463993, "step_time": 0.3968392963409424} +{"epoch": 0, "iter": 10365, "iter_tflops": 30.595902473336636, "iter_time": 0.6743090362548827, "loss": 0.5461370348930359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.36564562753753, "step_time": 0.6183334121704102} +{"epoch": 0, "iter": 10366, "iter_tflops": 13.150022253363709, "iter_time": 1.5689017944335935, "loss": 0.5381452441215515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.033746715767181, "step_time": 1.3723188171386718} +{"epoch": 0, "iter": 10367, "iter_tflops": 44.48638816660441, "iter_time": 0.46376193618774414, "loss": 0.40745627880096436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20349079857286, "step_time": 0.4279999885559082} +{"epoch": 0, "iter": 10368, "iter_tflops": 46.545222468573364, "iter_time": 0.44324835968017584, "loss": 0.3305789530277252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.920214282527645, "step_time": 0.41328134918212894} +{"epoch": 0, "iter": 10369, "iter_tflops": 24.040762656570934, "iter_time": 0.6762796173095702, "loss": 0.4327986538410187, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 25.53006825529466, "step_time": 0.6368286056518555} +{"epoch": 0, "iter": 10370, "iter_tflops": 9.31704450086663, "iter_time": 1.7450037689208981, "loss": 0.2839718759059906, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 11.598016179846088, "step_time": 1.4018154067993165} +{"epoch": 0, "iter": 10371, "iter_tflops": 8.21076967870366, "iter_time": 1.9801161651611328, "loss": 0.31012293696403503, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 9.76227336623494, "step_time": 1.6654192276000976} +{"epoch": 0, "iter": 10372, "iter_tflops": 17.18793688858144, "iter_time": 0.9459121170043946, "loss": 0.3834913671016693, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 22.669490560524178, "step_time": 0.7171876106262206} +{"epoch": 0, "iter": 10373, "iter_tflops": 11.180901943320194, "iter_time": 1.3552501983642578, "loss": 0.21631577610969543, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.98336707518018, "step_time": 1.2644959869384766} +{"epoch": 0, "iter": 10374, "iter_tflops": 15.659482300062193, "iter_time": 0.9676513748168946, "loss": 0.35181474685668945, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 18.86521497262933, "step_time": 0.803220085144043} +{"epoch": 0, "iter": 10375, "iter_tflops": 23.828922937986533, "iter_time": 0.6359045104980469, "loss": 0.2577090561389923, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.629177016165727, "step_time": 0.5912370719909668} +{"epoch": 0, "iter": 10376, "iter_tflops": 23.55578397298121, "iter_time": 0.6432780838012695, "loss": 0.38552016019821167, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.32381338793753, "step_time": 0.5983664207458496} +{"epoch": 0, "iter": 10377, "iter_tflops": 16.4774221596355, "iter_time": 1.2520825958251953, "loss": 0.04835096374154091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.773131589641654, "step_time": 1.1608023834228516} +{"epoch": 0, "iter": 10378, "iter_tflops": 48.92725620752507, "iter_time": 0.4216687202453614, "loss": 0.0492134727537632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.05770961682836, "step_time": 0.38164941978454586} +{"epoch": 0, "iter": 10379, "iter_tflops": 52.17955808942244, "iter_time": 0.3953865127563476, "loss": 0.06912412494421005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.80614688286569, "step_time": 0.3631841735839844} +{"epoch": 0, "iter": 10380, "iter_tflops": 55.87696747212381, "iter_time": 0.3692235717773438, "loss": 0.028903940692543983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.99595137894264, "step_time": 0.338237096786499} +{"epoch": 0, "iter": 10381, "iter_tflops": 20.630330292434753, "iter_time": 1.000036994934082, "loss": 0.002731232438236475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.520028535933257, "step_time": 0.9586926651000978} +{"epoch": 0, "iter": 10382, "iter_tflops": 14.413086897806014, "iter_time": 1.4314139404296875, "loss": 0.010738885030150414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.51320638765377, "step_time": 1.00574688911438} +{"epoch": 0, "iter": 10383, "iter_tflops": 51.81965745099511, "iter_time": 0.3981325721740722, "loss": 0.010827102698385715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.13899281978054, "step_time": 0.36106855392456055} +{"epoch": 0, "iter": 10384, "iter_tflops": 53.13522540936128, "iter_time": 0.38827526092529296, "loss": 0.010151143185794353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.90548997563547, "step_time": 0.35628907585144043} +{"epoch": 0, "iter": 10385, "iter_tflops": 41.82556886819755, "iter_time": 0.4932651023864746, "loss": 0.03052918054163456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.348678989697355, "step_time": 0.4549436492919922} +{"epoch": 0, "iter": 10386, "iter_tflops": 40.42930817024102, "iter_time": 0.5103004341125488, "loss": 0.04875516518950462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6656060484252, "step_time": 0.46190112113952636} +{"epoch": 0, "iter": 10387, "iter_tflops": 37.91899539968329, "iter_time": 0.5440833358764648, "loss": 0.03865573927760124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75912094181175, "step_time": 0.4940499954223633} +{"epoch": 0, "iter": 10388, "iter_tflops": 41.425433353680425, "iter_time": 0.49802963638305664, "loss": 0.02943168208003044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44326886922529, "step_time": 0.4539966869354248} +{"epoch": 0, "iter": 10389, "iter_tflops": 19.775471825464642, "iter_time": 1.0432668151855469, "loss": 0.322061687707901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.087847515748148, "step_time": 0.9783404159545899} +{"epoch": 0, "iter": 10390, "iter_tflops": 18.11753927176051, "iter_time": 1.1387359619140625, "loss": 0.4522117078304291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.937738110673656, "step_time": 0.9404384994506836} +{"epoch": 0, "iter": 10391, "iter_tflops": 38.46815876279543, "iter_time": 0.5363161163330078, "loss": 0.37998223304748535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08086290089262, "step_time": 0.4902725868225098} +{"epoch": 0, "iter": 10392, "iter_tflops": 39.79889621297747, "iter_time": 0.5183835601806641, "loss": 0.3855895698070526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.738726563018304, "step_time": 0.47168939590454106} +{"epoch": 0, "iter": 10393, "iter_tflops": 18.43856583051493, "iter_time": 1.1189098815917968, "loss": 0.29887670278549194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.770892408866604, "step_time": 1.043508460998535} +{"epoch": 0, "iter": 10394, "iter_tflops": 13.38026321432061, "iter_time": 1.5419049072265625, "loss": 0.3332080841064453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.87264691668442, "step_time": 1.22275382232666} +{"epoch": 0, "iter": 10395, "iter_tflops": 35.4844299490749, "iter_time": 0.5814125671386718, "loss": 0.16893310844898224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.81545775601408, "step_time": 0.5315174598693847} +{"epoch": 0, "iter": 10396, "iter_tflops": 39.459627568234275, "iter_time": 0.5228405532836914, "loss": 0.16987133026123047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.957729645040814, "step_time": 0.48026498794555667} +{"epoch": 0, "iter": 10397, "iter_tflops": 16.69905036271049, "iter_time": 0.5964118423461914, "loss": 0.022626521065831184, "lr": 3e-05, "seqlen": 4016.0, "step_tflops": 18.584053747633973, "step_time": 0.5359170570373535} +{"epoch": 0, "iter": 10398, "iter_tflops": 19.589661144512135, "iter_time": 0.5084065170288086, "loss": 0.032592546194791794, "lr": 3e-05, "seqlen": 4016.0, "step_tflops": 21.85643869484444, "step_time": 0.4556786003112793} +{"epoch": 0, "iter": 10399, "iter_tflops": 21.88036225072216, "iter_time": 0.4551803703308105, "loss": 0.04266563430428505, "lr": 3e-05, "seqlen": 4016.0, "step_tflops": 24.1202664125616, "step_time": 0.41291050529479983} +{"epoch": 0, "iter": 10400, "iter_tflops": 21.103235978522644, "iter_time": 0.4719423789978027, "loss": 0.02939414419233799, "lr": 3e-05, "seqlen": 4016.0, "step_tflops": 23.234668184145097, "step_time": 0.42864874649047857} +{"epoch": 0, "iter": 10401, "iter_tflops": 19.109904871554637, "iter_time": 1.0796021041870116, "loss": 0.46590229868888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.70729272003868, "step_time": 0.9963201751708984} +{"epoch": 0, "iter": 10402, "iter_tflops": 16.503483334001587, "iter_time": 1.2501053924560546, "loss": 0.42349353432655334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.5408032725058, "step_time": 0.9152776527404786} +{"epoch": 0, "iter": 10403, "iter_tflops": 47.27223877207908, "iter_time": 0.4364314880371094, "loss": 0.5523043870925903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24355957395177, "step_time": 0.40260851669311526} +{"epoch": 0, "iter": 10404, "iter_tflops": 46.22572898152516, "iter_time": 0.4463119125366211, "loss": 0.3979508876800537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.960751076488606, "step_time": 0.4129460239410401} +{"epoch": 0, "iter": 10405, "iter_tflops": 36.315623019885265, "iter_time": 0.5681051788330077, "loss": 0.8447703123092651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.0396454010957, "step_time": 0.5284651870727538} +{"epoch": 0, "iter": 10406, "iter_tflops": 8.434267001475758, "iter_time": 2.4461039123535158, "loss": 0.7027064561843872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.544546317888916, "step_time": 1.9565653076171876} +{"epoch": 0, "iter": 10407, "iter_tflops": 11.41800278338694, "iter_time": 1.8068916168212892, "loss": 0.8262242078781128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.014483582977002, "step_time": 1.4721265602111815} +{"epoch": 0, "iter": 10408, "iter_tflops": 24.362417703389433, "iter_time": 0.8468409729003906, "loss": 0.6889622211456299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.35888034216399, "step_time": 0.5522406806945801} +{"epoch": 0, "iter": 10409, "iter_tflops": 22.906471603869164, "iter_time": 0.6561601715087891, "loss": 0.32294997572898865, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.546149603467956, "step_time": 0.6123288002014161} +{"epoch": 0, "iter": 10410, "iter_tflops": 25.509444255705162, "iter_time": 0.5892058715820312, "loss": 0.41481462121009827, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.4923918357085, "step_time": 0.546708137512207} +{"epoch": 0, "iter": 10411, "iter_tflops": 27.198362121468712, "iter_time": 0.5526183624267579, "loss": 0.3464399576187134, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.997569910224353, "step_time": 0.518330135345459} +{"epoch": 0, "iter": 10412, "iter_tflops": 26.04161332706712, "iter_time": 0.5771652526855469, "loss": 0.3777168095111847, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.68337444001787, "step_time": 0.5429364967346191} +{"epoch": 0, "iter": 10413, "iter_tflops": 21.34756025174177, "iter_time": 0.9664380035400391, "loss": 0.5980451107025146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.269403389612727, "step_time": 0.9264322509765625} +{"epoch": 0, "iter": 10414, "iter_tflops": 17.794424969008954, "iter_time": 1.159413330078125, "loss": 0.7316075563430786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.830560540375725, "step_time": 0.903661277770996} +{"epoch": 0, "iter": 10415, "iter_tflops": 36.30208311335453, "iter_time": 0.5683170700073242, "loss": 0.8939942717552185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.5272240907742, "step_time": 0.5219464302062988} +{"epoch": 0, "iter": 10416, "iter_tflops": 41.40933815148438, "iter_time": 0.49822321319580076, "loss": 0.7453412413597107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01963399814592, "step_time": 0.4582687969207763} +{"epoch": 0, "iter": 10417, "iter_tflops": 23.537990228647935, "iter_time": 0.8765019149780273, "loss": 0.6885876655578613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.142166129736225, "step_time": 0.820577407836914} +{"epoch": 0, "iter": 10418, "iter_tflops": 42.776290221071484, "iter_time": 0.482302074432373, "loss": 0.8532105684280396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.72006753420681, "step_time": 0.4415895481109619} +{"epoch": 0, "iter": 10419, "iter_tflops": 49.312115181993704, "iter_time": 0.41837778472900394, "loss": 0.6040219068527222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.408147922597465, "step_time": 0.386291124343872} +{"epoch": 0, "iter": 10420, "iter_tflops": 51.738387959377405, "iter_time": 0.39875794982910157, "loss": 0.76881343126297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.93392932867169, "step_time": 0.368847562789917} +{"epoch": 0, "iter": 10421, "iter_tflops": 26.902362853471185, "iter_time": 0.7668877868652344, "loss": 0.6460687518119812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.38594227307454, "step_time": 0.7268067169189453} +{"epoch": 0, "iter": 10422, "iter_tflops": 12.875578626932858, "iter_time": 1.602343017578125, "loss": 0.6197739243507385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.590657368078654, "step_time": 1.323298500061035} +{"epoch": 0, "iter": 10423, "iter_tflops": 37.142647148383965, "iter_time": 0.5554556579589843, "loss": 0.5396721959114075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61501200249007, "step_time": 0.5079671897888184} +{"epoch": 0, "iter": 10424, "iter_tflops": 40.099174513534955, "iter_time": 0.5145017013549805, "loss": 0.5876717567443848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.890954203655276, "step_time": 0.47005342864990235} +{"epoch": 0, "iter": 10425, "iter_tflops": 17.96415400032251, "iter_time": 1.148458953857422, "loss": 0.4090217351913452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.244776030011177, "step_time": 1.0720360412597656} +{"epoch": 0, "iter": 10426, "iter_tflops": 18.533277559676808, "iter_time": 1.1131918487548829, "loss": 0.4030973017215729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.242904686104463, "step_time": 0.9275359401702881} +{"epoch": 0, "iter": 10427, "iter_tflops": 49.76568736605326, "iter_time": 0.41456462478637696, "loss": 0.41231706738471985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.81136770990607, "step_time": 0.3833965644836426} +{"epoch": 0, "iter": 10428, "iter_tflops": 49.76450135903673, "iter_time": 0.4145745048522949, "loss": 0.3271805942058563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.198358833342475, "step_time": 0.380659008026123} +{"epoch": 0, "iter": 10429, "iter_tflops": 31.68881720815539, "iter_time": 0.6510528106689454, "loss": 0.2658717930316925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.407350838593, "step_time": 0.5996129608154298} +{"epoch": 0, "iter": 10430, "iter_tflops": 14.878189761129143, "iter_time": 1.3866669158935545, "loss": 0.15938697755336761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.242362302264834, "step_time": 1.019203845977783} +{"epoch": 0, "iter": 10431, "iter_tflops": 17.056944082895285, "iter_time": 1.2095421905517578, "loss": 0.1296570748090744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.036238355377883, "step_time": 1.0837799530029297} +{"epoch": 0, "iter": 10432, "iter_tflops": 31.603941707771828, "iter_time": 0.6528012771606445, "loss": 0.14234526455402374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.3429032248124, "step_time": 0.5243917407989502} +{"epoch": 0, "iter": 10433, "iter_tflops": 21.312538485795898, "iter_time": 0.7493885879516602, "loss": 0.36844387650489807, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 22.527405981952988, "step_time": 0.7089752426147461} +{"epoch": 0, "iter": 10434, "iter_tflops": 10.610139956577017, "iter_time": 1.5052933502197265, "loss": 0.271109014749527, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.328151813458494, "step_time": 1.2955204772949218} +{"epoch": 0, "iter": 10435, "iter_tflops": 8.30158310412822, "iter_time": 1.9238948669433593, "loss": 0.379975289106369, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 10.612591587973547, "step_time": 1.5049456100463867} +{"epoch": 0, "iter": 10436, "iter_tflops": 15.059625033043856, "iter_time": 1.060542549133301, "loss": 0.43037742376327515, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 19.141405635776252, "step_time": 0.8343887290954589} +{"epoch": 0, "iter": 10437, "iter_tflops": 21.99492173635849, "iter_time": 0.7914648971557618, "loss": 0.48609521985054016, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 23.263226067375946, "step_time": 0.7483144607543945} +{"epoch": 0, "iter": 10438, "iter_tflops": 10.007231942543703, "iter_time": 1.739562805175781, "loss": 0.3257906138896942, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 13.212551202809319, "step_time": 1.317550880432129} +{"epoch": 0, "iter": 10439, "iter_tflops": 10.423668058329092, "iter_time": 1.6700655059814453, "loss": 0.30034205317497253, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 11.853717955131925, "step_time": 1.4685863571166993} +{"epoch": 0, "iter": 10440, "iter_tflops": 12.803772660907974, "iter_time": 1.3596155548095703, "loss": 0.16648662090301514, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 15.365674551779327, "step_time": 1.1329283599853515} +{"epoch": 0, "iter": 10441, "iter_tflops": 20.209378771466962, "iter_time": 0.7558652572631837, "loss": 0.5562644004821777, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 21.547908756414934, "step_time": 0.7089118232727051} +{"epoch": 0, "iter": 10442, "iter_tflops": 10.168581847279984, "iter_time": 1.5022318267822266, "loss": 0.5555854439735413, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.744900656413975, "step_time": 1.3006127281188964} +{"epoch": 0, "iter": 10443, "iter_tflops": 9.517047948505894, "iter_time": 1.6050741119384764, "loss": 0.32741284370422363, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 10.973852032773257, "step_time": 1.3919968338012694} +{"epoch": 0, "iter": 10444, "iter_tflops": 11.072767430495531, "iter_time": 1.3795618286132811, "loss": 0.3984643518924713, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 14.21538135141047, "step_time": 1.0745801963806152} +{"epoch": 0, "iter": 10445, "iter_tflops": 15.845586253610923, "iter_time": 1.0001829223632812, "loss": 0.30289730429649353, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 16.91280996660803, "step_time": 0.9370698776245118} +{"epoch": 0, "iter": 10446, "iter_tflops": 12.883843172693059, "iter_time": 1.2301053771972656, "loss": 0.32165056467056274, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 15.496061908030926, "step_time": 1.0227427368164062} +{"epoch": 0, "iter": 10447, "iter_tflops": 28.738602383774197, "iter_time": 0.5514702682495116, "loss": 0.3369845747947693, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.76412293956792, "step_time": 0.515161273956299} +{"epoch": 0, "iter": 10448, "iter_tflops": 29.876355847224193, "iter_time": 0.5304691390991211, "loss": 0.40061885118484497, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.6932404881577, "step_time": 0.5000588302612305} +{"epoch": 0, "iter": 10449, "iter_tflops": 40.545401840039915, "iter_time": 0.5088392906188964, "loss": 0.6623156070709229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3952407772099, "step_time": 0.46471408081054694} +{"epoch": 0, "iter": 10450, "iter_tflops": 44.28827120522082, "iter_time": 0.46583650588989256, "loss": 0.6047207713127136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.75441466876896, "step_time": 0.43202484321594237} +{"epoch": 0, "iter": 10451, "iter_tflops": 49.84822003699548, "iter_time": 0.41387823867797857, "loss": 0.6379539370536804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.909394412420724, "step_time": 0.3826994113922119} +{"epoch": 0, "iter": 10452, "iter_tflops": 47.32196352612245, "iter_time": 0.43597289657592775, "loss": 0.5981753468513489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.27769031532954, "step_time": 0.40234053802490233} +{"epoch": 0, "iter": 10453, "iter_tflops": 41.08458190092896, "iter_time": 0.5021614570617675, "loss": 0.17850922048091888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6250994764325, "step_time": 0.46232039260864266} +{"epoch": 0, "iter": 10454, "iter_tflops": 43.196616826115566, "iter_time": 0.47760901260375976, "loss": 0.18672576546669006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.208887773003326, "step_time": 0.42795207405090335} +{"epoch": 0, "iter": 10455, "iter_tflops": 48.334508085404195, "iter_time": 0.4268398361206055, "loss": 0.20727327466011047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.35773178843774, "step_time": 0.3940410099029541} +{"epoch": 0, "iter": 10456, "iter_tflops": 49.52360934431486, "iter_time": 0.4165910720825195, "loss": 0.1433815360069275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.68756460331254, "step_time": 0.3842806739807129} +{"epoch": 0, "iter": 10457, "iter_tflops": 27.195848263972792, "iter_time": 0.7586118774414062, "loss": 0.8326181769371033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.89566263046866, "step_time": 0.713985824584961} +{"epoch": 0, "iter": 10458, "iter_tflops": 9.939858374893076, "iter_time": 2.075592300415039, "loss": 0.7653448581695557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.756959017013802, "step_time": 1.754798454284668} +{"epoch": 0, "iter": 10459, "iter_tflops": 13.577703709019147, "iter_time": 1.5194832611083984, "loss": 0.6902868747711182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.28818836773259, "step_time": 1.2666291084289552} +{"epoch": 0, "iter": 10460, "iter_tflops": 28.07538255251834, "iter_time": 0.7348463897705079, "loss": 0.6684978604316711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.009865295087984, "step_time": 0.6066208534240722} +{"epoch": 0, "iter": 10461, "iter_tflops": 14.121724016889605, "iter_time": 1.1367820434570313, "loss": 0.36456218361854553, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 15.052467772114706, "step_time": 1.0664910583496094} +{"epoch": 0, "iter": 10462, "iter_tflops": 14.314256922699446, "iter_time": 1.1214918365478517, "loss": 0.41254723072052, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 17.089666441511003, "step_time": 0.9393584327697755} +{"epoch": 0, "iter": 10463, "iter_tflops": 27.05969618945492, "iter_time": 0.5932558212280273, "loss": 0.29487982392311096, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.91778518477855, "step_time": 0.555136646270752} +{"epoch": 0, "iter": 10464, "iter_tflops": 27.577792517745536, "iter_time": 0.5821104888916014, "loss": 0.3231643736362457, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.22033950281884, "step_time": 0.5493886299133302} +{"epoch": 0, "iter": 10465, "iter_tflops": 38.11256254834836, "iter_time": 0.5413200302124024, "loss": 0.8092858791351318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01686659071856, "step_time": 0.5029904823303223} +{"epoch": 0, "iter": 10466, "iter_tflops": 40.07255683610513, "iter_time": 0.5148434524536133, "loss": 0.7950156331062317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.51507449466029, "step_time": 0.474113712310791} +{"epoch": 0, "iter": 10467, "iter_tflops": 47.361490806027675, "iter_time": 0.43560903930664063, "loss": 0.9327142238616943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22073209766741, "step_time": 0.4027879467010498} +{"epoch": 0, "iter": 10468, "iter_tflops": 44.5639246332067, "iter_time": 0.4629550399780274, "loss": 0.9401113390922546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99988015931325, "step_time": 0.42981552124023437} +{"epoch": 0, "iter": 10469, "iter_tflops": 33.15274389196963, "iter_time": 0.6223042526245118, "loss": 0.5939034223556519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.29077386861626, "step_time": 0.5846030349731446} +{"epoch": 0, "iter": 10470, "iter_tflops": 11.254681639771134, "iter_time": 1.8331121368408203, "loss": 0.7558803558349609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.172353692047672, "step_time": 1.455728099822998} +{"epoch": 0, "iter": 10471, "iter_tflops": 10.807652816604781, "iter_time": 1.9089337768554686, "loss": 0.6217803359031677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.771742254168995, "step_time": 1.615370330810547} +{"epoch": 0, "iter": 10472, "iter_tflops": 23.627737074801235, "iter_time": 0.873172637939453, "loss": 0.5875895023345947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.97749058970724, "step_time": 0.6256113834381103} +{"epoch": 0, "iter": 10473, "iter_tflops": 12.008503111548466, "iter_time": 1.3402423095703124, "loss": 0.3520800769329071, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 12.728692149693417, "step_time": 1.2644114379882812} +{"epoch": 0, "iter": 10474, "iter_tflops": 17.23148999538271, "iter_time": 0.934005355834961, "loss": 0.3787972629070282, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 23.495362357165444, "step_time": 0.6849991798400877} +{"epoch": 0, "iter": 10475, "iter_tflops": 28.321347278785787, "iter_time": 0.5682746582031251, "loss": 0.33189746737480164, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.18540707257594, "step_time": 0.5331816101074218} +{"epoch": 0, "iter": 10476, "iter_tflops": 28.582817200206588, "iter_time": 0.5630761947631836, "loss": 0.2248271107673645, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.192152496612987, "step_time": 0.5330624885559083} +{"epoch": 0, "iter": 10477, "iter_tflops": 29.00755974344829, "iter_time": 0.7112316131591797, "loss": 0.6775740385055542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61139450057699, "step_time": 0.6739677772521973} +{"epoch": 0, "iter": 10478, "iter_tflops": 15.640462420161661, "iter_time": 1.3190846252441406, "loss": 0.6083024144172668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.66239384966657, "step_time": 1.1054902000427245} +{"epoch": 0, "iter": 10479, "iter_tflops": 36.12080472004954, "iter_time": 0.5711692657470703, "loss": 0.855319619178772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.184055254744635, "step_time": 0.4566011924743652} +{"epoch": 0, "iter": 10480, "iter_tflops": 43.93132469185131, "iter_time": 0.4696214752197265, "loss": 0.7403815984725952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10637976247506, "step_time": 0.43796813964843756} +{"epoch": 0, "iter": 10481, "iter_tflops": 26.27539862645442, "iter_time": 0.7851866989135742, "loss": 0.37410539388656616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.56511512950385, "step_time": 0.7484493865966796} +{"epoch": 0, "iter": 10482, "iter_tflops": 10.518514227566213, "iter_time": 1.9614075775146484, "loss": 0.4574095904827118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.326596934661657, "step_time": 1.4400554161071777} +{"epoch": 0, "iter": 10483, "iter_tflops": 40.48505902752638, "iter_time": 0.509597713470459, "loss": 0.356356143951416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.543912134804266, "step_time": 0.4631630344390869} +{"epoch": 0, "iter": 10484, "iter_tflops": 38.7494860128095, "iter_time": 0.532422378540039, "loss": 0.3229225277900696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.19617596441816, "step_time": 0.48893277740478513} +{"epoch": 0, "iter": 10485, "iter_tflops": 21.576791652192984, "iter_time": 0.9561705856323242, "loss": 0.039768286049366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.24445580110002, "step_time": 0.8875705108642579} +{"epoch": 0, "iter": 10486, "iter_tflops": 41.41873842497819, "iter_time": 0.4981101379394531, "loss": 0.03790408372879028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88157323130184, "step_time": 0.44965967941284174} +{"epoch": 0, "iter": 10487, "iter_tflops": 43.360128211479235, "iter_time": 0.47580794525146486, "loss": 0.014465276151895523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.807958718439764, "step_time": 0.4315409832000732} +{"epoch": 0, "iter": 10488, "iter_tflops": 44.24117239886183, "iter_time": 0.46633243179321293, "loss": 0.055424369871616364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71430231326386, "step_time": 0.42351203918457037} +{"epoch": 0, "iter": 10489, "iter_tflops": 22.442312541617575, "iter_time": 0.9192944564819335, "loss": 0.03921973705291748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.087327262058302, "step_time": 0.8565123596191405} +{"epoch": 0, "iter": 10490, "iter_tflops": 13.954509649976185, "iter_time": 1.4784534912109375, "loss": 0.033897362649440765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.224577753690337, "step_time": 1.020100086212158} +{"epoch": 0, "iter": 10491, "iter_tflops": 45.14220409772332, "iter_time": 0.4570245056152344, "loss": 0.02429022639989853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.809634861802664, "step_time": 0.41419885063171386} +{"epoch": 0, "iter": 10492, "iter_tflops": 48.91999817395829, "iter_time": 0.42173128128051757, "loss": 0.05043691024184227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.93232371349717, "step_time": 0.3825367069244385} +{"epoch": 0, "iter": 10493, "iter_tflops": 20.63384858356977, "iter_time": 0.9998664779663086, "loss": 0.12710745632648468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.42811527512093, "step_time": 0.9198763809204101} +{"epoch": 0, "iter": 10494, "iter_tflops": 19.999290662658726, "iter_time": 1.031591262817383, "loss": 0.13271525502204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.29050088334915, "step_time": 0.7559807567596435} +{"epoch": 0, "iter": 10495, "iter_tflops": 53.819105486443185, "iter_time": 0.3833414421081543, "loss": 0.17618319392204285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.52187321833242, "step_time": 0.35253645133972167} +{"epoch": 0, "iter": 10496, "iter_tflops": 48.190756916555564, "iter_time": 0.42811308288574224, "loss": 0.21903935074806213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40349451810698, "step_time": 0.3936969032287598} +{"epoch": 0, "iter": 10497, "iter_tflops": 31.676580111292544, "iter_time": 0.6513043212890626, "loss": 0.6845051646232605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.72692770211166, "step_time": 0.6117098388671875} +{"epoch": 0, "iter": 10498, "iter_tflops": 18.867712904807103, "iter_time": 1.093460220336914, "loss": 0.6639227271080017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.368600382617895, "step_time": 0.922323844909668} +{"epoch": 0, "iter": 10499, "iter_tflops": 43.95713765026744, "iter_time": 0.46934569931030273, "loss": 0.7694246172904968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34489161197541, "step_time": 0.4357617645263672} +{"epoch": 0, "iter": 10500, "iter_tflops": 43.432902670056805, "iter_time": 0.4750107002258301, "loss": 0.8350074291229248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51407988468204, "step_time": 0.4435451278686524} +{"epoch": 0, "iter": 10501, "iter_tflops": 38.14816977983118, "iter_time": 0.5408147659301759, "loss": 0.06701024621725082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27793802790571, "step_time": 0.4998092079162597} +{"epoch": 0, "iter": 10502, "iter_tflops": 10.761775784290998, "iter_time": 1.917071487426758, "loss": 0.08096712827682495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.270853821739399, "step_time": 1.4456803894042967} +{"epoch": 0, "iter": 10503, "iter_tflops": 13.572862744698208, "iter_time": 1.5200252075195313, "loss": 0.06348942220211029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.110901774478451, "step_time": 1.365311866760254} +{"epoch": 0, "iter": 10504, "iter_tflops": 30.827490668466485, "iter_time": 0.6692433624267577, "loss": 0.10102155059576035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74600659393113, "step_time": 0.5190733680725097} +{"epoch": 0, "iter": 10505, "iter_tflops": 19.406659817689857, "iter_time": 0.9076236038208008, "loss": 0.3278527855873108, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 20.31462848821163, "step_time": 0.8670570831298827} +{"epoch": 0, "iter": 10506, "iter_tflops": 11.618327817402223, "iter_time": 1.5160479888916016, "loss": 0.36496660113334656, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 13.021206325783451, "step_time": 1.3527120361328127} +{"epoch": 0, "iter": 10507, "iter_tflops": 24.581549440855593, "iter_time": 0.7165513534545899, "loss": 0.49927425384521484, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 26.646072813446857, "step_time": 0.6610333404541014} +{"epoch": 0, "iter": 10508, "iter_tflops": 25.668185413478362, "iter_time": 0.6862168960571289, "loss": 0.49158838391304016, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 27.901770919040057, "step_time": 0.6312840347290039} +{"epoch": 0, "iter": 10509, "iter_tflops": 20.36072991554712, "iter_time": 1.013278678894043, "loss": 0.628610372543335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.350623036123753, "step_time": 0.9230657005310058} +{"epoch": 0, "iter": 10510, "iter_tflops": 19.22540052311488, "iter_time": 1.0731164474487307, "loss": 0.714107096195221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.50816643963465, "step_time": 0.9166048049926758} +{"epoch": 0, "iter": 10511, "iter_tflops": 44.355810196938414, "iter_time": 0.4651271934509278, "loss": 0.727210283279419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.242411896294456, "step_time": 0.4276546859741211} +{"epoch": 0, "iter": 10512, "iter_tflops": 45.61839731531235, "iter_time": 0.45225379943847654, "loss": 0.7105218172073364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.114795247350735, "step_time": 0.4200586280822754} +{"epoch": 0, "iter": 10513, "iter_tflops": 27.443701797987192, "iter_time": 0.6058765335083007, "loss": 0.03931949660181999, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 29.28583289003113, "step_time": 0.5677658195495605} +{"epoch": 0, "iter": 10514, "iter_tflops": 18.771019062288243, "iter_time": 0.8858067245483399, "loss": 0.032451070845127106, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 22.978777326167414, "step_time": 0.7236022472381591} +{"epoch": 0, "iter": 10515, "iter_tflops": 43.46823960651639, "iter_time": 0.38252054977416994, "loss": 0.037617478519678116, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 47.8204368138136, "step_time": 0.34770688056945803} +{"epoch": 0, "iter": 10516, "iter_tflops": 47.793798178538005, "iter_time": 0.3479006805419922, "loss": 0.020286573097109795, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 52.416392220483296, "step_time": 0.3172193698883057} +{"epoch": 0, "iter": 10517, "iter_tflops": 21.676541608149616, "iter_time": 0.9517705307006836, "loss": 0.022441202774643898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.591637684426487, "step_time": 0.913218147277832} +{"epoch": 0, "iter": 10518, "iter_tflops": 15.44670040690068, "iter_time": 1.335631103515625, "loss": 0.07540205121040344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.60956558610865, "step_time": 1.1715844669342041} +{"epoch": 0, "iter": 10519, "iter_tflops": 41.87116567248374, "iter_time": 0.49272794723510743, "loss": 0.04798096418380737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24065924027397, "step_time": 0.4461678066253662} +{"epoch": 0, "iter": 10520, "iter_tflops": 48.050052859714285, "iter_time": 0.4293667182922363, "loss": 0.04477230831980705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89820918646826, "step_time": 0.39001497077941893} +{"epoch": 0, "iter": 10521, "iter_tflops": 14.368680186565685, "iter_time": 1.3781346435546875, "loss": 0.08449529111385345, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 15.313114556959007, "step_time": 1.2931383666992187} +{"epoch": 0, "iter": 10522, "iter_tflops": 17.131881866711144, "iter_time": 1.155855270385742, "loss": 0.09606410562992096, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 25.04240872293963, "step_time": 0.7907376708984376} +{"epoch": 0, "iter": 10523, "iter_tflops": 49.148644816016855, "iter_time": 0.4028997344970703, "loss": 0.1423620879650116, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 53.64062767863287, "step_time": 0.3691600341796875} +{"epoch": 0, "iter": 10524, "iter_tflops": 48.03072473399161, "iter_time": 0.4122772674560547, "loss": 0.12388482689857483, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 52.11896877082906, "step_time": 0.37993798446655275} +{"epoch": 0, "iter": 10525, "iter_tflops": 43.042784716232106, "iter_time": 0.4793159561157227, "loss": 0.26273447275161743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.86905750628636, "step_time": 0.4401857986450195} +{"epoch": 0, "iter": 10526, "iter_tflops": 10.671971822034207, "iter_time": 1.9332035217285153, "loss": 0.29257434606552124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.618641475197464, "step_time": 1.5149156799316408} +{"epoch": 0, "iter": 10527, "iter_tflops": 16.212646019684207, "iter_time": 1.2725309295654297, "loss": 0.1926059126853943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.67373040482382, "step_time": 1.1048190727233889} +{"epoch": 0, "iter": 10528, "iter_tflops": 16.846768793471306, "iter_time": 1.224632080078125, "loss": 0.24390223622322083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.415440513580993, "step_time": 0.9633746967315673} +{"epoch": 0, "iter": 10529, "iter_tflops": 15.600360963858977, "iter_time": 0.9896670761108399, "loss": 0.3094038665294647, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.784328110945378, "step_time": 0.9198559226989746} +{"epoch": 0, "iter": 10530, "iter_tflops": 13.529608877411821, "iter_time": 1.1411389465332031, "loss": 0.31423047184944153, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 14.986957960345814, "step_time": 1.0301732788085936} +{"epoch": 0, "iter": 10531, "iter_tflops": 26.20968882716975, "iter_time": 0.5890632171630859, "loss": 0.22987507283687592, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 28.030602423609036, "step_time": 0.5507967109680175} +{"epoch": 0, "iter": 10532, "iter_tflops": 27.352571361728373, "iter_time": 0.5644501724243164, "loss": 0.22778382897377014, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.159134719376826, "step_time": 0.5294794845581055} +{"epoch": 0, "iter": 10533, "iter_tflops": 11.735257139250663, "iter_time": 1.1973321990966797, "loss": 0.11301827430725098, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 12.16119157374471, "step_time": 1.1553967514038086} +{"epoch": 0, "iter": 10534, "iter_tflops": 13.498119411571214, "iter_time": 1.0409599151611326, "loss": 0.12680931389331818, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 17.189449760344054, "step_time": 0.8174200706481933} +{"epoch": 0, "iter": 10535, "iter_tflops": 33.59521941290854, "iter_time": 0.41824406814575193, "loss": 0.11902013421058655, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 36.50972969303047, "step_time": 0.38485634803771973} +{"epoch": 0, "iter": 10536, "iter_tflops": 34.09904854364076, "iter_time": 0.41206431961059575, "loss": 0.10829043388366699, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 37.10830263978888, "step_time": 0.3786484489440918} +{"epoch": 0, "iter": 10537, "iter_tflops": 34.01703590631609, "iter_time": 0.6064929809570312, "loss": 0.5082082152366638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.521813257823275, "step_time": 0.5648978424072265} +{"epoch": 0, "iter": 10538, "iter_tflops": 13.045897969511614, "iter_time": 1.5814237976074217, "loss": 0.4982541799545288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.760172600605841, "step_time": 1.309065200805664} +{"epoch": 0, "iter": 10539, "iter_tflops": 39.02966696284936, "iter_time": 0.5286002960205078, "loss": 0.4708850681781769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67857826635632, "step_time": 0.4834062976837159} +{"epoch": 0, "iter": 10540, "iter_tflops": 35.75231389188729, "iter_time": 0.5770561752319336, "loss": 0.49638187885284424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.19232700085536, "step_time": 0.5264064445495605} +{"epoch": 0, "iter": 10541, "iter_tflops": 22.316644622666505, "iter_time": 0.9244711227416992, "loss": 0.5766763091087341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.94020979424682, "step_time": 0.8617758026123047} +{"epoch": 0, "iter": 10542, "iter_tflops": 24.55755261952043, "iter_time": 0.8401119537353515, "loss": 0.6367827653884888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.52061938685715, "step_time": 0.7496594905853271} +{"epoch": 0, "iter": 10543, "iter_tflops": 44.600789106619374, "iter_time": 0.4625723876953125, "loss": 0.6058636903762817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.259923627380914, "step_time": 0.4274995059967041} +{"epoch": 0, "iter": 10544, "iter_tflops": 46.561651599170084, "iter_time": 0.44309196090698244, "loss": 0.5412660241127014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.454359269018674, "step_time": 0.4089060649871826} +{"epoch": 0, "iter": 10545, "iter_tflops": 36.99846254087322, "iter_time": 0.5576202926635742, "loss": 0.47200828790664673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66847620821188, "step_time": 0.520087875366211} +{"epoch": 0, "iter": 10546, "iter_tflops": 33.39457232919667, "iter_time": 0.6177978057861329, "loss": 0.6641101241111755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.381369858634905, "step_time": 0.5519084396362305} +{"epoch": 0, "iter": 10547, "iter_tflops": 41.51613251653287, "iter_time": 0.4969416046142578, "loss": 0.6787548661231995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.629805573031305, "step_time": 0.45214072799682614} +{"epoch": 0, "iter": 10548, "iter_tflops": 39.20082001101814, "iter_time": 0.5262923965454102, "loss": 0.3795962631702423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79251994031372, "step_time": 0.48211915397644045} +{"epoch": 0, "iter": 10549, "iter_tflops": 23.32995041780588, "iter_time": 0.8843179321289063, "loss": 0.0705232173204422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.28670533926201, "step_time": 0.8158869743347168} +{"epoch": 0, "iter": 10550, "iter_tflops": 18.849644620089308, "iter_time": 1.0945083541870118, "loss": 0.059298474341630936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.51978813610643, "step_time": 0.8084351406097412} +{"epoch": 0, "iter": 10551, "iter_tflops": 46.8249700237622, "iter_time": 0.4406002502441406, "loss": 0.07136065512895584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41674129413439, "step_time": 0.40921116638183597} +{"epoch": 0, "iter": 10552, "iter_tflops": 51.53505984255963, "iter_time": 0.4003312225341797, "loss": 0.09326948970556259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.76511229376314, "step_time": 0.3699641704559326} +{"epoch": 0, "iter": 10553, "iter_tflops": 46.57356063826758, "iter_time": 0.4429786605834961, "loss": 0.031676698476076126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.10993724769408, "step_time": 0.40366110038757325} +{"epoch": 0, "iter": 10554, "iter_tflops": 50.298984966184484, "iter_time": 0.41016918182373047, "loss": 0.030150074511766434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.340523752617074, "step_time": 0.3728026428222656} +{"epoch": 0, "iter": 10555, "iter_tflops": 55.775663409367155, "iter_time": 0.3698941841125489, "loss": 0.02867458574473858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.81142697859925, "step_time": 0.33926343345642096} +{"epoch": 0, "iter": 10556, "iter_tflops": 60.58558736792927, "iter_time": 0.340528076171875, "loss": 0.03897630050778389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.089423569054, "step_time": 0.31216936683654783} +{"epoch": 0, "iter": 10557, "iter_tflops": 23.156853570086124, "iter_time": 0.8909281845092774, "loss": 0.5196951627731323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.30760767257686, "step_time": 0.8487504730224609} +{"epoch": 0, "iter": 10558, "iter_tflops": 14.054715792286625, "iter_time": 1.4679125366210939, "loss": 0.5549355149269104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.768341628582917, "step_time": 1.1611153106689454} +{"epoch": 0, "iter": 10559, "iter_tflops": 44.304969525926886, "iter_time": 0.4656609344482422, "loss": 0.6536993980407715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.71087032994242, "step_time": 0.4324191398620606} +{"epoch": 0, "iter": 10560, "iter_tflops": 43.15351581632506, "iter_time": 0.4780860404968261, "loss": 0.6469963192939758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.32124939375604, "step_time": 0.4453915596008301} +{"epoch": 0, "iter": 10561, "iter_tflops": 30.405545112576284, "iter_time": 0.6785306243896484, "loss": 0.007065596524626017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.266735555941786, "step_time": 0.6393920288085938} +{"epoch": 0, "iter": 10562, "iter_tflops": 12.06258289834347, "iter_time": 1.7103379669189454, "loss": 0.012153392657637596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.519707229334495, "step_time": 1.4209028587341308} +{"epoch": 0, "iter": 10563, "iter_tflops": 54.028958952660446, "iter_time": 0.3818525085449219, "loss": 0.004802971612662077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.389544985593055, "step_time": 0.3473859500885009} +{"epoch": 0, "iter": 10564, "iter_tflops": 59.19078185168272, "iter_time": 0.34855247497558595, "loss": 0.006040329113602638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.74730363296347, "step_time": 0.3186401958465576} +{"epoch": 0, "iter": 10565, "iter_tflops": 24.151881200667066, "iter_time": 0.8542230453491211, "loss": 0.13823619484901428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.26896400674966, "step_time": 0.8164598083496095} +{"epoch": 0, "iter": 10566, "iter_tflops": 14.247596839440408, "iter_time": 1.4480402374267576, "loss": 0.17310038208961487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.349149197346005, "step_time": 1.189170331954956} +{"epoch": 0, "iter": 10567, "iter_tflops": 40.08874047874674, "iter_time": 0.514635612487793, "loss": 0.15834695100784302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05181782254837, "step_time": 0.4683369388580322} +{"epoch": 0, "iter": 10568, "iter_tflops": 37.848593411881474, "iter_time": 0.5450953826904297, "loss": 0.11202596873044968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47430203987061, "step_time": 0.49744281387329103} +{"epoch": 0, "iter": 10569, "iter_tflops": 18.1433404876002, "iter_time": 1.1371165924072266, "loss": 0.8672668933868408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.156181800833526, "step_time": 1.0769940338134767} +{"epoch": 0, "iter": 10570, "iter_tflops": 16.187013397856, "iter_time": 1.2745460205078125, "loss": 0.5496259927749634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.464634430989147, "step_time": 1.0081339874267579} +{"epoch": 0, "iter": 10571, "iter_tflops": 37.15161909539406, "iter_time": 0.555321517944336, "loss": 0.7384427785873413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49319613371812, "step_time": 0.5094953098297119} +{"epoch": 0, "iter": 10572, "iter_tflops": 39.42156517290438, "iter_time": 0.5233453674316406, "loss": 0.652849018573761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04789839664578, "step_time": 0.47925901794433595} +{"epoch": 0, "iter": 10573, "iter_tflops": 19.802181724982788, "iter_time": 1.0418596191406249, "loss": 0.5327209830284119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.32245296452697, "step_time": 0.9675759887695314} +{"epoch": 0, "iter": 10574, "iter_tflops": 23.004086520893992, "iter_time": 0.8968447189331054, "loss": 0.5610958933830261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.1553663877472, "step_time": 0.6622003173828126} +{"epoch": 0, "iter": 10575, "iter_tflops": 46.30594571401283, "iter_time": 0.44553875732421877, "loss": 0.4939519762992859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17105607301545, "step_time": 0.4112150535583496} +{"epoch": 0, "iter": 10576, "iter_tflops": 52.74682710439118, "iter_time": 0.39113430404663085, "loss": 0.6346331834793091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.211877069693834, "step_time": 0.36060857582092287} +{"epoch": 0, "iter": 10577, "iter_tflops": 30.540803061255694, "iter_time": 0.6755255737304687, "loss": 0.6054093837738037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.36036223434657, "step_time": 0.6375421066284179} +{"epoch": 0, "iter": 10578, "iter_tflops": 12.435492088527761, "iter_time": 1.6590492248535156, "loss": 0.5454204082489014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.214502452046812, "step_time": 1.2723852348327638} +{"epoch": 0, "iter": 10579, "iter_tflops": 13.020629526872609, "iter_time": 1.5844927825927735, "loss": 0.5642269849777222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.610294697023573, "step_time": 1.4120929069519044} +{"epoch": 0, "iter": 10580, "iter_tflops": 27.025058359923026, "iter_time": 0.7634060668945313, "loss": 0.7994222640991211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.00998393285611, "step_time": 0.6874743270874023} +{"epoch": 0, "iter": 10581, "iter_tflops": 16.207291865004102, "iter_time": 0.9072197113037109, "loss": 0.5235403180122375, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 17.28295167545726, "step_time": 0.850755989074707} +{"epoch": 0, "iter": 10582, "iter_tflops": 9.158742290837997, "iter_time": 1.6054141693115236, "loss": 0.35198065638542175, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 12.810250897762051, "step_time": 1.1477975540161134} +{"epoch": 0, "iter": 10583, "iter_tflops": 21.16697387678354, "iter_time": 0.6946469879150391, "loss": 0.5331472754478455, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.883910253974225, "step_time": 0.6425289421081544} +{"epoch": 0, "iter": 10584, "iter_tflops": 21.105133724683316, "iter_time": 0.696682373046875, "loss": 0.46332189440727234, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.78112347530015, "step_time": 0.6454279861450195} +{"epoch": 0, "iter": 10585, "iter_tflops": 18.005443214541504, "iter_time": 1.1458253631591797, "loss": 0.2627491056919098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.53945637775804, "step_time": 1.0558683471679688} +{"epoch": 0, "iter": 10586, "iter_tflops": 21.572171423966836, "iter_time": 0.956375373840332, "loss": 0.3056241571903229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.28313597984298, "step_time": 0.8496058139801025} +{"epoch": 0, "iter": 10587, "iter_tflops": 41.499849265886276, "iter_time": 0.497136589050293, "loss": 0.2710774838924408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37483498681418, "step_time": 0.4546814002990723} +{"epoch": 0, "iter": 10588, "iter_tflops": 39.557268506231054, "iter_time": 0.5215500030517578, "loss": 0.29609215259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46632048398154, "step_time": 0.47464550209045414} +{"epoch": 0, "iter": 10589, "iter_tflops": 19.514499999881618, "iter_time": 1.0572186584472656, "loss": 0.46974316239356995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.005815710430657, "step_time": 0.9821610260009767} +{"epoch": 0, "iter": 10590, "iter_tflops": 13.333512657379076, "iter_time": 1.5473112030029297, "loss": 0.2509535551071167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.95982036545695, "step_time": 1.148736072540283} +{"epoch": 0, "iter": 10591, "iter_tflops": 38.9098033548843, "iter_time": 0.5302286758422852, "loss": 0.42477530241012573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64692424288836, "step_time": 0.48376509857177735} +{"epoch": 0, "iter": 10592, "iter_tflops": 42.520749227323826, "iter_time": 0.48520061111450197, "loss": 0.4200182259082794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.358797062411575, "step_time": 0.44503082084655765} +{"epoch": 0, "iter": 10593, "iter_tflops": 17.27342829338282, "iter_time": 1.1943832550048827, "loss": 0.8591286540031433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.227267602488883, "step_time": 1.131880760192871} +{"epoch": 0, "iter": 10594, "iter_tflops": 23.190542249175603, "iter_time": 0.8896339416503907, "loss": 0.8551868796348572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.05899363746725, "step_time": 0.7917072238922119} +{"epoch": 0, "iter": 10595, "iter_tflops": 45.037222830070355, "iter_time": 0.45808982467651366, "loss": 0.8952151536941528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61036162461603, "step_time": 0.42441761016845697} +{"epoch": 0, "iter": 10596, "iter_tflops": 47.39167162614181, "iter_time": 0.4353316268920899, "loss": 0.8744842410087585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.325146210888065, "step_time": 0.4019685287475586} +{"epoch": 0, "iter": 10597, "iter_tflops": 42.43811909288627, "iter_time": 0.48614533233642576, "loss": 0.7436528205871582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.190065887510904, "step_time": 0.4466565074920654} +{"epoch": 0, "iter": 10598, "iter_tflops": 14.081834866647515, "iter_time": 1.4650856018066405, "loss": 0.6852639317512512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.221427296325217, "step_time": 1.2718420600891114} +{"epoch": 0, "iter": 10599, "iter_tflops": 13.04506207508455, "iter_time": 1.581525131225586, "loss": 0.6940060257911682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.30472466089422, "step_time": 1.348021213531494} +{"epoch": 0, "iter": 10600, "iter_tflops": 21.805314838105566, "iter_time": 0.9461497650146484, "loss": 0.7351943850517273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.78228687704269, "step_time": 0.7703260593414307} +{"epoch": 0, "iter": 10601, "iter_tflops": 13.090892051407842, "iter_time": 1.2576187438964843, "loss": 0.39190635085105896, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 13.965003359247124, "step_time": 1.1789006271362306} +{"epoch": 0, "iter": 10602, "iter_tflops": 17.589768040141944, "iter_time": 0.9359618148803711, "loss": 0.30384618043899536, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 21.321462315984117, "step_time": 0.7721492538452148} +{"epoch": 0, "iter": 10603, "iter_tflops": 29.223218249601526, "iter_time": 0.5633654403686523, "loss": 0.3966708481311798, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.162673124387844, "step_time": 0.5283035621643066} +{"epoch": 0, "iter": 10604, "iter_tflops": 28.778881576848796, "iter_time": 0.5720636215209962, "loss": 0.2558577358722687, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 30.673409248954375, "step_time": 0.5367304000854493} +{"epoch": 0, "iter": 10605, "iter_tflops": 24.665666281510095, "iter_time": 0.7207838821411134, "loss": 0.15038815140724182, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 26.157409330727184, "step_time": 0.6796779632568359} +{"epoch": 0, "iter": 10606, "iter_tflops": 10.59732472594256, "iter_time": 1.6776512145996094, "loss": 0.11189362406730652, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 14.74187858627128, "step_time": 1.2059938354492188} +{"epoch": 0, "iter": 10607, "iter_tflops": 33.20745061159187, "iter_time": 0.5353802947998046, "loss": 0.20422229170799255, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 36.36937026447672, "step_time": 0.4888348236083984} +{"epoch": 0, "iter": 10608, "iter_tflops": 36.266122353293774, "iter_time": 0.49022651290893554, "loss": 0.1841658055782318, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 39.88812232635666, "step_time": 0.44571199798583977} +{"epoch": 0, "iter": 10609, "iter_tflops": 22.896869715562676, "iter_time": 0.9010442810058594, "loss": 0.8466612696647644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.52723480346788, "step_time": 0.8411504058837892} +{"epoch": 0, "iter": 10610, "iter_tflops": 12.903401426950357, "iter_time": 1.5988879852294922, "loss": 0.7033640742301941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.972558630109859, "step_time": 1.3779270477294923} +{"epoch": 0, "iter": 10611, "iter_tflops": 42.351706579436176, "iter_time": 0.48713724136352543, "loss": 0.7191024422645569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.567197664036215, "step_time": 0.4527619552612304} +{"epoch": 0, "iter": 10612, "iter_tflops": 40.809309912985064, "iter_time": 0.505548698425293, "loss": 0.5278114080429077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93832864444197, "step_time": 0.46954661560058597} +{"epoch": 0, "iter": 10613, "iter_tflops": 33.700739004384495, "iter_time": 0.6121851959228516, "loss": 0.0030042685102671385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.06076549927628, "step_time": 0.572120231628418} +{"epoch": 0, "iter": 10614, "iter_tflops": 7.116704720103733, "iter_time": 2.8989671936035153, "loss": 0.01394736859947443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.545477819534383, "step_time": 2.414270324707031} +{"epoch": 0, "iter": 10615, "iter_tflops": 13.59453031116102, "iter_time": 1.5176025238037107, "loss": 0.0040675881318748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.956504091365446, "step_time": 1.379406135559082} +{"epoch": 0, "iter": 10616, "iter_tflops": 33.44858508989653, "iter_time": 0.6168001861572264, "loss": 0.007733407896012068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39662334643686, "step_time": 0.4754078063964844} +{"epoch": 0, "iter": 10617, "iter_tflops": 15.807768593664402, "iter_time": 1.0025757064819336, "loss": 0.2929545044898987, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 16.494091692623066, "step_time": 0.9608582916259765} +{"epoch": 0, "iter": 10618, "iter_tflops": 8.739416394431624, "iter_time": 1.8134488677978515, "loss": 0.40536564588546753, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 11.356832696762671, "step_time": 1.3955021781921388} +{"epoch": 0, "iter": 10619, "iter_tflops": 22.831962566673326, "iter_time": 0.6941358947753905, "loss": 0.37850329279899597, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 24.455744089525552, "step_time": 0.6480475387573241} +{"epoch": 0, "iter": 10620, "iter_tflops": 22.05267697882352, "iter_time": 0.7186648941040039, "loss": 0.3830283582210541, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 23.654313866640898, "step_time": 0.6700039939880371} +{"epoch": 0, "iter": 10621, "iter_tflops": 19.400531274766927, "iter_time": 1.0634293060302735, "loss": 0.6086980700492859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.691923395171482, "step_time": 0.9970602111816407} +{"epoch": 0, "iter": 10622, "iter_tflops": 14.459621377364064, "iter_time": 1.4268073120117188, "loss": 0.4814194440841675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.87415198828431, "step_time": 1.2226447601318358} +{"epoch": 0, "iter": 10623, "iter_tflops": 38.4424167465811, "iter_time": 0.5366752471923828, "loss": 0.416287362575531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.11464469259895, "step_time": 0.48987932014465335} +{"epoch": 0, "iter": 10624, "iter_tflops": 42.663283599945, "iter_time": 0.4835795974731445, "loss": 0.35820379853248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.48876874198507, "step_time": 0.4437866191864014} +{"epoch": 0, "iter": 10625, "iter_tflops": 13.915390708113776, "iter_time": 1.3546744384765623, "loss": 0.028403375297784805, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 14.963710443478046, "step_time": 1.2597693710327147} +{"epoch": 0, "iter": 10626, "iter_tflops": 16.045583304462472, "iter_time": 1.1748294677734374, "loss": 0.027612652629613876, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 20.72014814369331, "step_time": 0.9097823028564453} +{"epoch": 0, "iter": 10627, "iter_tflops": 35.79521334555961, "iter_time": 0.5266297454833984, "loss": 0.03548464551568031, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 39.358584790879455, "step_time": 0.4789507598876953} +{"epoch": 0, "iter": 10628, "iter_tflops": 41.16796632779873, "iter_time": 0.45790029907226565, "loss": 0.012970450334250927, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 45.34997666020269, "step_time": 0.41567439460754396} +{"epoch": 0, "iter": 10629, "iter_tflops": 16.628246164386322, "iter_time": 1.2407257690429685, "loss": 0.1313493400812149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.73697594213937, "step_time": 1.163168601989746} +{"epoch": 0, "iter": 10630, "iter_tflops": 14.163428117359887, "iter_time": 1.4566454772949218, "loss": 0.09827359020709991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.883055404462255, "step_time": 1.03762188911438} +{"epoch": 0, "iter": 10631, "iter_tflops": 51.17941207450766, "iter_time": 0.40311314010620114, "loss": 0.12543538212776184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.00358485913461, "step_time": 0.36838880157470705} +{"epoch": 0, "iter": 10632, "iter_tflops": 47.73267501225035, "iter_time": 0.43222160720825187, "loss": 0.11327236890792847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75435758341574, "step_time": 0.3986349067687988} +{"epoch": 0, "iter": 10633, "iter_tflops": 21.546623201063323, "iter_time": 0.7831320877075196, "loss": 0.019237302243709564, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 23.007988728578077, "step_time": 0.7333910064697264} +{"epoch": 0, "iter": 10634, "iter_tflops": 15.588234733268491, "iter_time": 1.082473564147949, "loss": 0.030949581414461136, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.856356807967316, "step_time": 0.8948627872467041} +{"epoch": 0, "iter": 10635, "iter_tflops": 35.91267919434753, "iter_time": 0.4698577880859375, "loss": 0.03460168465971947, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 39.27311142244784, "step_time": 0.42965406608581547} +{"epoch": 0, "iter": 10636, "iter_tflops": 38.3626757916924, "iter_time": 0.43985075759887693, "loss": 0.028437241911888123, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 42.077037589781014, "step_time": 0.40102281379699706} +{"epoch": 0, "iter": 10637, "iter_tflops": 36.96509814479992, "iter_time": 0.5581235961914063, "loss": 0.04283225163817406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.31217218931978, "step_time": 0.4993950309753418} +{"epoch": 0, "iter": 10638, "iter_tflops": 43.025264691266784, "iter_time": 0.4795111351013184, "loss": 0.0367082878947258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.40468386996799, "step_time": 0.43521213150024407} +{"epoch": 0, "iter": 10639, "iter_tflops": 41.27281255271888, "iter_time": 0.4998712768554688, "loss": 0.014456480741500854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.24020219279133, "step_time": 0.4560345115661621} +{"epoch": 0, "iter": 10640, "iter_tflops": 45.98451639760848, "iter_time": 0.4486530494689941, "loss": 0.06512290239334106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.844794571568634, "step_time": 0.4057660903930664} +{"epoch": 0, "iter": 10641, "iter_tflops": 18.337468143818533, "iter_time": 1.125078628540039, "loss": 0.1516902595758438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.649476444204232, "step_time": 1.049956398010254} +{"epoch": 0, "iter": 10642, "iter_tflops": 17.339974277052363, "iter_time": 1.189799545288086, "loss": 0.11804047226905823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.570754556447586, "step_time": 0.9140630836486815} +{"epoch": 0, "iter": 10643, "iter_tflops": 40.42941304260345, "iter_time": 0.5102991104125976, "loss": 0.09715690463781357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.54945552646535, "step_time": 0.46310540199279787} +{"epoch": 0, "iter": 10644, "iter_tflops": 39.51169636203956, "iter_time": 0.5221515502929688, "loss": 0.0829182043671608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30219525702162, "step_time": 0.47644451713562014} +{"epoch": 0, "iter": 10645, "iter_tflops": 21.457804747511414, "iter_time": 0.9614727020263673, "loss": 0.1418309509754181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.132662633512574, "step_time": 0.8918598709106443} +{"epoch": 0, "iter": 10646, "iter_tflops": 19.1843402806101, "iter_time": 1.0754132385253905, "loss": 0.13520625233650208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.48893700019302, "step_time": 0.9600797615051269} +{"epoch": 0, "iter": 10647, "iter_tflops": 48.137787765375826, "iter_time": 0.42858416366577157, "loss": 0.1484219878911972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33419101300195, "step_time": 0.394218255996704} +{"epoch": 0, "iter": 10648, "iter_tflops": 50.05315686043479, "iter_time": 0.4121836624145508, "loss": 0.09136145561933517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.97221068412009, "step_time": 0.3822540016174317} +{"epoch": 0, "iter": 10649, "iter_tflops": 33.1247538121622, "iter_time": 0.6228300933837891, "loss": 0.8291724324226379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.85291700871682, "step_time": 0.5754369583129882} +{"epoch": 0, "iter": 10650, "iter_tflops": 10.561740002365143, "iter_time": 1.9533801727294922, "loss": 0.7932500243186951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.738116137817837, "step_time": 1.6196345901489257} +{"epoch": 0, "iter": 10651, "iter_tflops": 17.222449907435372, "iter_time": 1.1979186248779297, "loss": 0.8742668628692627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.380267308807678, "step_time": 1.0645412254333495} +{"epoch": 0, "iter": 10652, "iter_tflops": 32.10462111731814, "iter_time": 0.6426206817626954, "loss": 0.6823885440826416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.131093548499635, "step_time": 0.5872602138519287} +{"epoch": 0, "iter": 10653, "iter_tflops": 19.47466466257964, "iter_time": 0.7864812850952149, "loss": 0.36728325486183167, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 21.15660397295202, "step_time": 0.7239564208984375} +{"epoch": 0, "iter": 10654, "iter_tflops": 26.97202253960098, "iter_time": 0.5678646926879883, "loss": 0.267773300409317, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.780701731188582, "step_time": 0.5321781044006347} +{"epoch": 0, "iter": 10655, "iter_tflops": 26.90031802867435, "iter_time": 0.5693783721923827, "loss": 0.37881597876548767, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.746711501637787, "step_time": 0.5328073539733886} +{"epoch": 0, "iter": 10656, "iter_tflops": 29.02341235306373, "iter_time": 0.5277277221679687, "loss": 0.3025113642215729, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 30.756768828138394, "step_time": 0.4979866180419922} +{"epoch": 0, "iter": 10657, "iter_tflops": 33.26933817589013, "iter_time": 0.6201233520507813, "loss": 0.6940417289733887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.35173472131991, "step_time": 0.5835949401855468} +{"epoch": 0, "iter": 10658, "iter_tflops": 14.39662171451315, "iter_time": 1.433051025390625, "loss": 0.5169450044631958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.197583010566287, "step_time": 1.273714324951172} +{"epoch": 0, "iter": 10659, "iter_tflops": 36.33243360609313, "iter_time": 0.5678423233032227, "loss": 0.656940758228302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.43695454792524, "step_time": 0.5231411437988281} +{"epoch": 0, "iter": 10660, "iter_tflops": 34.98823025103857, "iter_time": 0.5896581039428711, "loss": 0.8411176800727844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.96416569346398, "step_time": 0.543435977935791} +{"epoch": 0, "iter": 10661, "iter_tflops": 13.176048115712112, "iter_time": 0.9953700180053711, "loss": 0.05394919961690903, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 14.007529750887569, "step_time": 0.9362852325439454} +{"epoch": 0, "iter": 10662, "iter_tflops": 10.750369307852896, "iter_time": 1.2199621124267577, "loss": 0.0487884059548378, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 15.832183272101185, "step_time": 0.8283786907196043} +{"epoch": 0, "iter": 10663, "iter_tflops": 33.62363833663232, "iter_time": 0.390054256439209, "loss": 0.010662194341421127, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 36.6760620195791, "step_time": 0.35759136962890625} +{"epoch": 0, "iter": 10664, "iter_tflops": 36.74982281591666, "iter_time": 0.35687364578247066, "loss": 0.037678975611925125, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 40.09459043048804, "step_time": 0.3271025619506836} +{"epoch": 0, "iter": 10665, "iter_tflops": 31.633706920558694, "iter_time": 0.6521870346069336, "loss": 0.38615351915359497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.778818726490165, "step_time": 0.6107701301574707} +{"epoch": 0, "iter": 10666, "iter_tflops": 16.05109895473587, "iter_time": 1.28533837890625, "loss": 0.4492548704147339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.755219356114825, "step_time": 1.1000187797546386} +{"epoch": 0, "iter": 10667, "iter_tflops": 34.81272450088409, "iter_time": 0.5926308212280275, "loss": 0.3652026355266571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.737667764696134, "step_time": 0.5325848121643066} +{"epoch": 0, "iter": 10668, "iter_tflops": 37.20277277238213, "iter_time": 0.5545579528808594, "loss": 0.27300524711608887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59290274088835, "step_time": 0.5082438583374024} +{"epoch": 0, "iter": 10669, "iter_tflops": 18.418965018999813, "iter_time": 1.1201005859375002, "loss": 0.7146634459495544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.39431764320825, "step_time": 1.0637700119018556} +{"epoch": 0, "iter": 10670, "iter_tflops": 13.360609224241868, "iter_time": 1.544173110961914, "loss": 0.7259823679924011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.370382583123934, "step_time": 1.260269477844238} +{"epoch": 0, "iter": 10671, "iter_tflops": 46.3504057918426, "iter_time": 0.44511138916015625, "loss": 0.7937180995941162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.09314923875192, "step_time": 0.4118545913696289} +{"epoch": 0, "iter": 10672, "iter_tflops": 47.48651018078243, "iter_time": 0.43446219635009764, "loss": 0.6215975284576416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96674896201456, "step_time": 0.4047951641082764} +{"epoch": 0, "iter": 10673, "iter_tflops": 45.92968080119347, "iter_time": 0.4491886978149414, "loss": 0.22618509829044342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.350026914207845, "step_time": 0.40975337600708006} +{"epoch": 0, "iter": 10674, "iter_tflops": 47.41162596276588, "iter_time": 0.4351484069824219, "loss": 0.3009934425354004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.91775656996892, "step_time": 0.3973802967071533} +{"epoch": 0, "iter": 10675, "iter_tflops": 46.66851348360182, "iter_time": 0.44207736587524415, "loss": 0.27322977781295776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.136889899136946, "step_time": 0.40344834327697754} +{"epoch": 0, "iter": 10676, "iter_tflops": 52.71085817503371, "iter_time": 0.39140120697021485, "loss": 0.29537343978881836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.454885578513014, "step_time": 0.3590833625793457} +{"epoch": 0, "iter": 10677, "iter_tflops": 51.74665795932756, "iter_time": 0.398694221496582, "loss": 0.01437249407172203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.142662190379305, "step_time": 0.3610453681945801} +{"epoch": 0, "iter": 10678, "iter_tflops": 52.953405452427596, "iter_time": 0.3896084365844727, "loss": 0.006921537686139345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.21675772241003, "step_time": 0.3543841037750244} +{"epoch": 0, "iter": 10679, "iter_tflops": 55.46189832868989, "iter_time": 0.3719867897033692, "loss": 0.015089933760464191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.6164234026948, "step_time": 0.3403548469543457} +{"epoch": 0, "iter": 10680, "iter_tflops": 52.653591381182906, "iter_time": 0.3918269004821778, "loss": 0.003373679704964161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.572579597105666, "step_time": 0.35834929847717284} +{"epoch": 0, "iter": 10681, "iter_tflops": 38.147013298865296, "iter_time": 0.5408311614990233, "loss": 0.2759787142276764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.217469876779504, "step_time": 0.5005424537658691} +{"epoch": 0, "iter": 10682, "iter_tflops": 14.650001811431405, "iter_time": 1.408265594482422, "loss": 0.19718623161315918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.259943173230024, "step_time": 1.071191816329956} +{"epoch": 0, "iter": 10683, "iter_tflops": 38.585507602469875, "iter_time": 0.5346850357055664, "loss": 0.2841312885284424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.262324153994754, "step_time": 0.4881675090789795} +{"epoch": 0, "iter": 10684, "iter_tflops": 38.953407120570276, "iter_time": 0.5296351470947266, "loss": 0.29827386140823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6241548127152, "step_time": 0.4840235214233399} +{"epoch": 0, "iter": 10685, "iter_tflops": 15.626523226395555, "iter_time": 1.0588032531738283, "loss": 0.4823642075061798, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.53668844050715, "step_time": 1.0005276260375977} +{"epoch": 0, "iter": 10686, "iter_tflops": 8.762058191120824, "iter_time": 1.8883021850585937, "loss": 0.5020644068717957, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 10.223772090699775, "step_time": 1.618327705383301} +{"epoch": 0, "iter": 10687, "iter_tflops": 11.882887821224083, "iter_time": 1.3923731231689453, "loss": 0.40324535965919495, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.875554643765827, "step_time": 1.192414577484131} +{"epoch": 0, "iter": 10688, "iter_tflops": 29.195623805406296, "iter_time": 0.5667086868286132, "loss": 0.5092355012893677, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.063001829420454, "step_time": 0.532640525817871} +{"epoch": 0, "iter": 10689, "iter_tflops": 19.7137622572814, "iter_time": 0.855942756652832, "loss": 0.4036935865879059, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 21.080280522986072, "step_time": 0.8004567108154297} +{"epoch": 0, "iter": 10690, "iter_tflops": 11.298084661483431, "iter_time": 1.4935143890380862, "loss": 0.5001630187034607, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 13.954365524197438, "step_time": 1.2092167129516602} +{"epoch": 0, "iter": 10691, "iter_tflops": 24.540930094665296, "iter_time": 0.6875799713134766, "loss": 0.36669760942459106, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.54124397336161, "step_time": 0.6357596511840821} +{"epoch": 0, "iter": 10692, "iter_tflops": 27.377487123715674, "iter_time": 0.6163404235839844, "loss": 0.3363630473613739, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 29.340810223833373, "step_time": 0.5750983657836914} +{"epoch": 0, "iter": 10693, "iter_tflops": 14.492588609778915, "iter_time": 1.4235616607666017, "loss": 0.22564661502838135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.392073811054104, "step_time": 1.3403712692260743} +{"epoch": 0, "iter": 10694, "iter_tflops": 17.503557004455452, "iter_time": 1.1786800537109374, "loss": 0.33106809854507446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.689109201842097, "step_time": 0.9092949981689452} +{"epoch": 0, "iter": 10695, "iter_tflops": 34.94197334949109, "iter_time": 0.590438705444336, "loss": 0.351590096950531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.027163836866215, "step_time": 0.5425356884002686} +{"epoch": 0, "iter": 10696, "iter_tflops": 34.362419637753106, "iter_time": 0.6003969955444336, "loss": 0.30907246470451355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.3322113052227, "step_time": 0.5526351852416993} +{"epoch": 0, "iter": 10697, "iter_tflops": 32.246792166840876, "iter_time": 0.639787467956543, "loss": 0.5397400259971619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.60104960666878, "step_time": 0.5795080127716065} +{"epoch": 0, "iter": 10698, "iter_tflops": 37.614790377126596, "iter_time": 0.548483543395996, "loss": 0.5362840294837952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.007189982734516, "step_time": 0.5031091747283936} +{"epoch": 0, "iter": 10699, "iter_tflops": 38.8317661704581, "iter_time": 0.5312942352294923, "loss": 0.6122450828552246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08885317331868, "step_time": 0.49017951202392573} +{"epoch": 0, "iter": 10700, "iter_tflops": 39.97170052253716, "iter_time": 0.5161425018310547, "loss": 0.690093457698822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.492436295528066, "step_time": 0.4743604927062989} +{"epoch": 0, "iter": 10701, "iter_tflops": 22.145946395881715, "iter_time": 0.9315968322753906, "loss": 0.6991010308265686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.688556100806508, "step_time": 0.8709308166503907} +{"epoch": 0, "iter": 10702, "iter_tflops": 8.430061958446585, "iter_time": 2.447324066162109, "loss": 0.7321653366088867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.09920285537903, "step_time": 2.0428437576293947} +{"epoch": 0, "iter": 10703, "iter_tflops": 11.832492061719607, "iter_time": 1.743596649169922, "loss": 0.666471004486084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.617986747218954, "step_time": 1.411349857330322} +{"epoch": 0, "iter": 10704, "iter_tflops": 35.43218058923895, "iter_time": 0.5822699356079102, "loss": 0.6777989268302917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.62066668993333, "step_time": 0.5341982746124267} +{"epoch": 0, "iter": 10705, "iter_tflops": 12.725547319172135, "iter_time": 1.3809061279296873, "loss": 0.3978269696235657, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 13.480454233292498, "step_time": 1.3035752334594726} +{"epoch": 0, "iter": 10706, "iter_tflops": 14.720105857352573, "iter_time": 1.1937948303222656, "loss": 0.3831225037574768, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 20.197614981775754, "step_time": 0.8700426406860352} +{"epoch": 0, "iter": 10707, "iter_tflops": 31.634615362790747, "iter_time": 0.555492332458496, "loss": 0.35628581047058105, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 33.76311824162283, "step_time": 0.5204728469848633} +{"epoch": 0, "iter": 10708, "iter_tflops": 30.513661092282632, "iter_time": 0.5758989791870117, "loss": 0.5087925791740417, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 32.4294319562511, "step_time": 0.5418777084350586} +{"epoch": 0, "iter": 10709, "iter_tflops": 24.66022199519487, "iter_time": 0.8366142654418945, "loss": 0.9079089760780334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.018363735716, "step_time": 0.7929435424804687} +{"epoch": 0, "iter": 10710, "iter_tflops": 12.97737868764167, "iter_time": 1.5897735595703126, "loss": 0.6608427166938782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.46163832175455, "step_time": 1.3343407135009766} +{"epoch": 0, "iter": 10711, "iter_tflops": 36.10592767477245, "iter_time": 0.5714046096801757, "loss": 0.6572785377502441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74558013911144, "step_time": 0.5190789375305176} +{"epoch": 0, "iter": 10712, "iter_tflops": 38.53947452745233, "iter_time": 0.5353236846923828, "loss": 0.6656246185302734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.190096671499234, "step_time": 0.48900322914123534} +{"epoch": 0, "iter": 10713, "iter_tflops": 18.600191615864055, "iter_time": 1.1091871490478518, "loss": 0.5782050490379333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14464832772129, "step_time": 1.0241476135253906} +{"epoch": 0, "iter": 10714, "iter_tflops": 17.821394896006833, "iter_time": 1.1576587371826172, "loss": 0.6004908680915833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.499263566716767, "step_time": 0.9596186141967773} +{"epoch": 0, "iter": 10715, "iter_tflops": 38.625604086874674, "iter_time": 0.5341299896240235, "loss": 0.48584988713264465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.356891728735384, "step_time": 0.4870776081085205} +{"epoch": 0, "iter": 10716, "iter_tflops": 35.45567948836885, "iter_time": 0.5818840255737304, "loss": 0.4230601191520691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.46119705671905, "step_time": 0.5364131927490234} +{"epoch": 0, "iter": 10717, "iter_tflops": 31.49562401977627, "iter_time": 0.6550463485717773, "loss": 0.8536824584007263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.76155426052064, "step_time": 0.5935031948089599} +{"epoch": 0, "iter": 10718, "iter_tflops": 38.804275224833766, "iter_time": 0.5316706314086914, "loss": 0.7277311682701111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.396794387783956, "step_time": 0.48661918449401853} +{"epoch": 0, "iter": 10719, "iter_tflops": 38.93040677586574, "iter_time": 0.5299480590820312, "loss": 0.7369775176048279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.24215889131393, "step_time": 0.4884005470275879} +{"epoch": 0, "iter": 10720, "iter_tflops": 38.534588364687835, "iter_time": 0.5353915634155273, "loss": 0.6063802242279053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.867577782601415, "step_time": 0.49277017211914065} +{"epoch": 0, "iter": 10721, "iter_tflops": 17.792614641186038, "iter_time": 1.1595312957763673, "loss": 0.11392821371555328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.14338092139328, "step_time": 1.0777142028808593} +{"epoch": 0, "iter": 10722, "iter_tflops": 32.53297787239586, "iter_time": 0.6341593933105469, "loss": 0.08621621131896973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.81222233135365, "step_time": 0.5604414024353027} +{"epoch": 0, "iter": 10723, "iter_tflops": 50.088613830643986, "iter_time": 0.4118918838500977, "loss": 0.06342043727636337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81431117194017, "step_time": 0.37638151550292964} +{"epoch": 0, "iter": 10724, "iter_tflops": 51.4862623056959, "iter_time": 0.4007106475830078, "loss": 0.08601053059101105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.936180285663006, "step_time": 0.3688327198028565} +{"epoch": 0, "iter": 10725, "iter_tflops": 40.681205145205446, "iter_time": 0.5071406669616699, "loss": 0.6613567471504211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44417160994027, "step_time": 0.4642024536132812} +{"epoch": 0, "iter": 10726, "iter_tflops": 39.24410890096887, "iter_time": 0.5257118606567383, "loss": 0.4482734501361847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73302396452276, "step_time": 0.47175090217590326} +{"epoch": 0, "iter": 10727, "iter_tflops": 39.28800722103735, "iter_time": 0.5251244583129883, "loss": 0.5713366270065308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.12887509798277, "step_time": 0.47835918426513674} +{"epoch": 0, "iter": 10728, "iter_tflops": 35.60029794754993, "iter_time": 0.5795202484130859, "loss": 0.5040144920349121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2076838369403, "step_time": 0.5262002620697022} +{"epoch": 0, "iter": 10729, "iter_tflops": 24.80230825280382, "iter_time": 0.8318215103149414, "loss": 0.7661156058311462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.52885130782942, "step_time": 0.7776851425170899} +{"epoch": 0, "iter": 10730, "iter_tflops": 16.073969783266506, "iter_time": 1.283509536743164, "loss": 0.5049542188644409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.20351285892919, "step_time": 1.0743395576477053} +{"epoch": 0, "iter": 10731, "iter_tflops": 44.20081193240732, "iter_time": 0.46675824737548827, "loss": 0.7101510763168335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.637539981338776, "step_time": 0.4330847797393799} +{"epoch": 0, "iter": 10732, "iter_tflops": 45.0355817018145, "iter_time": 0.45810651779174805, "loss": 0.4472126364707947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40550088647388, "step_time": 0.4262138214111328} +{"epoch": 0, "iter": 10733, "iter_tflops": 30.993590096661947, "iter_time": 0.6656567840576173, "loss": 0.14824536442756653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.94976611957439, "step_time": 0.6261377830505371} +{"epoch": 0, "iter": 10734, "iter_tflops": 10.1137358568007, "iter_time": 2.0399082794189454, "loss": 0.153891921043396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.765022696009938, "step_time": 1.4988056297302246} +{"epoch": 0, "iter": 10735, "iter_tflops": 14.09682083715645, "iter_time": 1.463528106689453, "loss": 0.0912293791770935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.483367020534693, "step_time": 1.1800412063598633} +{"epoch": 0, "iter": 10736, "iter_tflops": 17.70452842402997, "iter_time": 1.1653003692626953, "loss": 0.17557117342948914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.177283625826842, "step_time": 0.9742086791992187} +{"epoch": 0, "iter": 10737, "iter_tflops": 15.97066587168603, "iter_time": 0.9487969818115234, "loss": 0.31871306896209717, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 16.808827626363048, "step_time": 0.9014858093261718} +{"epoch": 0, "iter": 10738, "iter_tflops": 12.391778641768614, "iter_time": 1.2228203887939455, "loss": 0.38709551095962524, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.665532362009735, "step_time": 1.0332335166931153} +{"epoch": 0, "iter": 10739, "iter_tflops": 22.70415141366018, "iter_time": 0.6674074401855469, "loss": 0.3868105113506317, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.498467007408184, "step_time": 0.6185252151489258} +{"epoch": 0, "iter": 10740, "iter_tflops": 22.153594559667184, "iter_time": 0.6839937210083008, "loss": 0.27660438418388367, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.726044520504207, "step_time": 0.6386618537902832} +{"epoch": 0, "iter": 10741, "iter_tflops": 36.131740145747685, "iter_time": 0.5709963989257812, "loss": 0.17407989501953125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.185287866040234, "step_time": 0.5133991718292237} +{"epoch": 0, "iter": 10742, "iter_tflops": 37.826038111855624, "iter_time": 0.5454204177856445, "loss": 0.29638054966926575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57257178944438, "step_time": 0.4962669525146485} +{"epoch": 0, "iter": 10743, "iter_tflops": 42.70036160001807, "iter_time": 0.4831596908569336, "loss": 0.20474570989608765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0498330287687, "step_time": 0.43849451065063483} +{"epoch": 0, "iter": 10744, "iter_tflops": 42.70569570628739, "iter_time": 0.4830993423461914, "loss": 0.2336554080247879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87379212542489, "step_time": 0.44014133644104003} +{"epoch": 0, "iter": 10745, "iter_tflops": 19.708598422197685, "iter_time": 1.0468067321777343, "loss": 0.6438900232315063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.231350197900028, "step_time": 0.971727813720703} +{"epoch": 0, "iter": 10746, "iter_tflops": 19.303129177423116, "iter_time": 1.0687952880859377, "loss": 0.6990970373153687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.363967958073914, "step_time": 0.8830303802490235} +{"epoch": 0, "iter": 10747, "iter_tflops": 44.75702011413058, "iter_time": 0.46095771026611326, "loss": 0.9055846929550171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.564803910508736, "step_time": 0.4248157482147217} +{"epoch": 0, "iter": 10748, "iter_tflops": 42.95540750215525, "iter_time": 0.4802909507751465, "loss": 0.7753401398658752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15381398947272, "step_time": 0.44700733757019046} +{"epoch": 0, "iter": 10749, "iter_tflops": 39.890824873661934, "iter_time": 0.5171889419555664, "loss": 0.37158122658729553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26626592031504, "step_time": 0.47684016799926754} +{"epoch": 0, "iter": 10750, "iter_tflops": 42.926054361508136, "iter_time": 0.48061937713623043, "loss": 0.31838569045066833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35867506815649, "step_time": 0.44503199195861814} +{"epoch": 0, "iter": 10751, "iter_tflops": 44.89418971725053, "iter_time": 0.45954930114746095, "loss": 0.4243852198123932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42507992868671, "step_time": 0.42604149627685545} +{"epoch": 0, "iter": 10752, "iter_tflops": 47.50214199096891, "iter_time": 0.4343192253112793, "loss": 0.33279404044151306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43322375890801, "step_time": 0.4011238651275635} +{"epoch": 0, "iter": 10753, "iter_tflops": 29.28768333660016, "iter_time": 0.7044290008544921, "loss": 0.6358116865158081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.205427387360515, "step_time": 0.6611379890441895} +{"epoch": 0, "iter": 10754, "iter_tflops": 34.6349582493496, "iter_time": 0.5956725387573243, "loss": 0.7488399744033813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.57681988334104, "step_time": 0.5490377731323243} +{"epoch": 0, "iter": 10755, "iter_tflops": 35.173547971323316, "iter_time": 0.5865513916015626, "loss": 0.8471336960792542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.952809034792786, "step_time": 0.5435985908508301} +{"epoch": 0, "iter": 10756, "iter_tflops": 33.45365329645988, "iter_time": 0.6167067413330077, "loss": 0.654785692691803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.426744559668336, "step_time": 0.5663721466064453} +{"epoch": 0, "iter": 10757, "iter_tflops": 18.536486895881342, "iter_time": 1.1129991149902343, "loss": 0.13751861453056335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.012705313286403, "step_time": 1.0308997802734374} +{"epoch": 0, "iter": 10758, "iter_tflops": 23.93280248018491, "iter_time": 0.8620425262451172, "loss": 0.12646570801734924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.03688083343409, "step_time": 0.7105134201049804} +{"epoch": 0, "iter": 10759, "iter_tflops": 38.920068448691424, "iter_time": 0.5300888290405273, "loss": 0.10169980674982071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63590285073966, "step_time": 0.48389015197753915} +{"epoch": 0, "iter": 10760, "iter_tflops": 39.91799236092987, "iter_time": 0.5168369522094727, "loss": 0.13096949458122253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68659912302595, "step_time": 0.47225222206115725} +{"epoch": 0, "iter": 10761, "iter_tflops": 26.899156925660854, "iter_time": 0.7669791870117189, "loss": 0.13337074220180511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.074683716852608, "step_time": 0.7095896110534669} +{"epoch": 0, "iter": 10762, "iter_tflops": 10.772960225471818, "iter_time": 1.9150811920166018, "loss": 0.06743129342794418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.024160601706315, "step_time": 1.7158032226562498} +{"epoch": 0, "iter": 10763, "iter_tflops": 9.459346166950136, "iter_time": 2.1810274353027346, "loss": 0.09807566553354263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.853459409108321, "step_time": 1.7405124359130857} +{"epoch": 0, "iter": 10764, "iter_tflops": 24.233462233582163, "iter_time": 0.8513473358154298, "loss": 0.09965250641107559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.03271609504181, "step_time": 0.5889093341827393} +{"epoch": 0, "iter": 10765, "iter_tflops": 15.887432930794695, "iter_time": 0.9460505294799805, "loss": 0.48083943128585815, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 16.659540274948085, "step_time": 0.9022046279907227} +{"epoch": 0, "iter": 10766, "iter_tflops": 11.721160459374143, "iter_time": 1.2823230590820314, "loss": 0.46358585357666016, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.986922387837424, "step_time": 1.0745976791381837} +{"epoch": 0, "iter": 10767, "iter_tflops": 25.9663865226261, "iter_time": 0.5788373489379883, "loss": 0.3512953221797943, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.682899078728184, "step_time": 0.5429458198547363} +{"epoch": 0, "iter": 10768, "iter_tflops": 26.595098549204398, "iter_time": 0.5651535491943359, "loss": 0.3144228756427765, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.365232951182115, "step_time": 0.5298851013183593} +{"epoch": 0, "iter": 10769, "iter_tflops": 21.982916718089893, "iter_time": 0.7881550979614258, "loss": 0.07882405072450638, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 23.26811094557311, "step_time": 0.7446220245361329} +{"epoch": 0, "iter": 10770, "iter_tflops": 14.093451825196173, "iter_time": 1.2293615570068361, "loss": 0.09293463826179504, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 16.830957885636813, "step_time": 1.0294094963073732} +{"epoch": 0, "iter": 10771, "iter_tflops": 40.90318222440771, "iter_time": 0.4235843505859375, "loss": 0.059290848672389984, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 45.05426449331367, "step_time": 0.38455733489990235} +{"epoch": 0, "iter": 10772, "iter_tflops": 45.70422773943234, "iter_time": 0.37908851623535156, "loss": 0.08235432207584381, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 49.88439914980715, "step_time": 0.34732197189331054} +{"epoch": 0, "iter": 10773, "iter_tflops": 27.757535898751005, "iter_time": 0.743260986328125, "loss": 0.6980245113372803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.351046857912454, "step_time": 0.7029082679748535} +{"epoch": 0, "iter": 10774, "iter_tflops": 14.726288766608542, "iter_time": 1.4009703216552734, "loss": 0.6188778877258301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.300467545385434, "step_time": 1.0689426803588866} +{"epoch": 0, "iter": 10775, "iter_tflops": 40.43638721785698, "iter_time": 0.5102110977172851, "loss": 0.697422981262207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31872402608039, "step_time": 0.465516414642334} +{"epoch": 0, "iter": 10776, "iter_tflops": 35.763564136847116, "iter_time": 0.5768746490478516, "loss": 0.6389966607093811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2583749793503, "step_time": 0.5255208225250244} +{"epoch": 0, "iter": 10777, "iter_tflops": 15.108552257105893, "iter_time": 1.365524185180664, "loss": 0.8530751466751099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.146783710692766, "step_time": 1.277721549987793} +{"epoch": 0, "iter": 10778, "iter_tflops": 20.17683908341936, "iter_time": 1.022513656616211, "loss": 0.5640695691108704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.694937741916515, "step_time": 0.6721334209442138} +{"epoch": 0, "iter": 10779, "iter_tflops": 46.35141875717773, "iter_time": 0.44510166168212895, "loss": 0.7407795786857605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24743903724115, "step_time": 0.4105899505615235} +{"epoch": 0, "iter": 10780, "iter_tflops": 45.58495019869414, "iter_time": 0.4525856323242188, "loss": 0.7490716576576233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.340601307059565, "step_time": 0.4181362400054932} +{"epoch": 0, "iter": 10781, "iter_tflops": 23.822719405242175, "iter_time": 0.8660259628295899, "loss": 0.14519432187080383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.062184570615212, "step_time": 0.8231961364746093} +{"epoch": 0, "iter": 10782, "iter_tflops": 33.24939405565963, "iter_time": 0.6204953231811524, "loss": 0.1572539359331131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.682220009370404, "step_time": 0.4949614849090576} +{"epoch": 0, "iter": 10783, "iter_tflops": 45.15023311007395, "iter_time": 0.4569432334899902, "loss": 0.17726173996925354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09509783871553, "step_time": 0.42022715950012207} +{"epoch": 0, "iter": 10784, "iter_tflops": 48.23915197981458, "iter_time": 0.42768358612060553, "loss": 0.18377713859081268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43735374932726, "step_time": 0.39344268989562986} +{"epoch": 0, "iter": 10785, "iter_tflops": 25.708008373831404, "iter_time": 0.802516212463379, "loss": 0.4957471191883087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.082591554184486, "step_time": 0.7617843170166015} +{"epoch": 0, "iter": 10786, "iter_tflops": 16.398031384873143, "iter_time": 1.25814453125, "loss": 0.4186211824417114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.236526019988823, "step_time": 1.0724958076477051} +{"epoch": 0, "iter": 10787, "iter_tflops": 37.02054467033388, "iter_time": 0.5572876815795899, "loss": 0.5649519562721252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3827144014348, "step_time": 0.5108892211914062} +{"epoch": 0, "iter": 10788, "iter_tflops": 36.728016537307546, "iter_time": 0.561726318359375, "loss": 0.5307563543319702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13500478686995, "step_time": 0.5140423831939697} +{"epoch": 0, "iter": 10789, "iter_tflops": 20.548923371122026, "iter_time": 1.0039987564086914, "loss": 0.09857551008462906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.166065925393763, "step_time": 0.930751247406006} +{"epoch": 0, "iter": 10790, "iter_tflops": 20.51822698141476, "iter_time": 1.0055007934570312, "loss": 0.22600510716438293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.273314716790566, "step_time": 0.8163192577362061} +{"epoch": 0, "iter": 10791, "iter_tflops": 47.013851383082255, "iter_time": 0.43883010864257815, "loss": 0.2497997283935547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36442764942342, "step_time": 0.40166111946105953} +{"epoch": 0, "iter": 10792, "iter_tflops": 49.284662771424976, "iter_time": 0.4186108283996583, "loss": 0.22164152562618256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5439256674055, "step_time": 0.38531155967712405} +{"epoch": 0, "iter": 10793, "iter_tflops": 36.32268371134324, "iter_time": 0.48832954406738277, "loss": 0.08897067606449127, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 39.71271301031988, "step_time": 0.4466438636779785} +{"epoch": 0, "iter": 10794, "iter_tflops": 9.23064814094401, "iter_time": 1.9215811614990235, "loss": 0.059110045433044434, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 11.387964701595772, "step_time": 1.5575601119995115} +{"epoch": 0, "iter": 10795, "iter_tflops": 14.778405318830348, "iter_time": 1.2002268981933595, "loss": 0.09274262189865112, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 17.497224846803416, "step_time": 1.0137287330627442} +{"epoch": 0, "iter": 10796, "iter_tflops": 17.057336581326975, "iter_time": 1.03987158203125, "loss": 0.08438693732023239, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 19.350402277270064, "step_time": 0.9166444873809815} +{"epoch": 0, "iter": 10797, "iter_tflops": 15.66581866907845, "iter_time": 0.9620421600341796, "loss": 0.27256521582603455, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 16.46216421566846, "step_time": 0.9155040512084961} +{"epoch": 0, "iter": 10798, "iter_tflops": 10.53012472766407, "iter_time": 1.4312440185546875, "loss": 0.3588269352912903, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 12.2416231956101, "step_time": 1.2311421279907226} +{"epoch": 0, "iter": 10799, "iter_tflops": 26.45828317091758, "iter_time": 0.5696204071044922, "loss": 0.3499147295951843, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.251116542568763, "step_time": 0.5334719429016113} +{"epoch": 0, "iter": 10800, "iter_tflops": 25.18885457926716, "iter_time": 0.598327247619629, "loss": 0.27989661693573, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 26.693742522813142, "step_time": 0.5645959167480469} +{"epoch": 0, "iter": 10801, "iter_tflops": 17.082366744808, "iter_time": 1.0238913726806642, "loss": 0.19333329796791077, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 17.952941923293892, "step_time": 0.9742407684326172} +{"epoch": 0, "iter": 10802, "iter_tflops": 14.247148741656039, "iter_time": 1.2276483001708984, "loss": 0.05976198986172676, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 18.85348351820447, "step_time": 0.9277059020996092} +{"epoch": 0, "iter": 10803, "iter_tflops": 42.68716338702211, "iter_time": 0.40973647689819337, "loss": 0.1338551938533783, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 46.462778624275536, "step_time": 0.3764408512115478} +{"epoch": 0, "iter": 10804, "iter_tflops": 44.032576593229365, "iter_time": 0.397216999053955, "loss": 0.19580084085464478, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 47.82379987702883, "step_time": 0.36572769165039065} +{"epoch": 0, "iter": 10805, "iter_tflops": 19.923418382411004, "iter_time": 0.8799511413574219, "loss": 0.004593132995069027, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 20.92089356339432, "step_time": 0.8379964599609375} +{"epoch": 0, "iter": 10806, "iter_tflops": 9.375690625832714, "iter_time": 1.8699032897949222, "loss": 0.00679301330819726, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 12.301952835147873, "step_time": 1.4251098976135255} +{"epoch": 0, "iter": 10807, "iter_tflops": 36.54729017634172, "iter_time": 0.4796972541809082, "loss": 0.0030792271718382835, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 40.846530816766915, "step_time": 0.42920743560791014} +{"epoch": 0, "iter": 10808, "iter_tflops": 40.46705264159449, "iter_time": 0.43323231124877937, "loss": 0.007746738847345114, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 44.50137798118258, "step_time": 0.39395712089538576} +{"epoch": 0, "iter": 10809, "iter_tflops": 22.557658346170346, "iter_time": 0.9145937576293945, "loss": 0.10249627381563187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.026498042311573, "step_time": 0.8586808395385741} +{"epoch": 0, "iter": 10810, "iter_tflops": 11.671885638123957, "iter_time": 1.7675887298583985, "loss": 0.13216224312782288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.384044246011872, "step_time": 1.5414693145751954} +{"epoch": 0, "iter": 10811, "iter_tflops": 10.082604829191812, "iter_time": 2.046206695556641, "loss": 0.07890630513429642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.699857926148562, "step_time": 1.6245137252807618} +{"epoch": 0, "iter": 10812, "iter_tflops": 48.337609388800736, "iter_time": 0.4268124504089355, "loss": 0.11979854106903076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55928765511724, "step_time": 0.39252993011474613} +{"epoch": 0, "iter": 10813, "iter_tflops": 23.597364861051528, "iter_time": 0.7255207977294922, "loss": 0.36371976137161255, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 25.185713733370136, "step_time": 0.6797654876708984} +{"epoch": 0, "iter": 10814, "iter_tflops": 8.776184793676178, "iter_time": 1.9507769470214846, "loss": 0.45869651436805725, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 10.073579220728202, "step_time": 1.6995328674316406} +{"epoch": 0, "iter": 10815, "iter_tflops": 8.348726238473516, "iter_time": 2.050657608032227, "loss": 0.40621325373649597, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 9.86430386344936, "step_time": 1.7355891723632813} +{"epoch": 0, "iter": 10816, "iter_tflops": 25.62150600138928, "iter_time": 0.6682034606933593, "loss": 0.47120216488838196, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 27.958201951132395, "step_time": 0.6123562240600585} +{"epoch": 0, "iter": 10817, "iter_tflops": 22.161987364598435, "iter_time": 0.744715705871582, "loss": 0.3450642228126526, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 23.511694085112598, "step_time": 0.7019647331237793} +{"epoch": 0, "iter": 10818, "iter_tflops": 6.13829404100403, "iter_time": 2.6887568359375, "loss": 0.2476940155029297, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 7.239720150932367, "step_time": 2.2796986236572265} +{"epoch": 0, "iter": 10819, "iter_tflops": 8.875777604094694, "iter_time": 1.8594855346679688, "loss": 0.2894178628921509, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 10.929965160144956, "step_time": 1.5100121383666991} +{"epoch": 0, "iter": 10820, "iter_tflops": 24.883659448818772, "iter_time": 0.6632617721557618, "loss": 0.2745569944381714, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 26.881092693436237, "step_time": 0.6139772758483887} +{"epoch": 0, "iter": 10821, "iter_tflops": 13.149700613840684, "iter_time": 1.2114652709960938, "loss": 0.34594041109085083, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 14.15383603530293, "step_time": 1.1255185928344726} +{"epoch": 0, "iter": 10822, "iter_tflops": 15.639680715419804, "iter_time": 1.0185889282226563, "loss": 0.4187050461769104, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 22.72501910763388, "step_time": 0.7010073585510254} +{"epoch": 0, "iter": 10823, "iter_tflops": 28.25145506584925, "iter_time": 0.5638791198730468, "loss": 0.3563540577888489, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.143833022644937, "step_time": 0.5284797592163085} +{"epoch": 0, "iter": 10824, "iter_tflops": 28.514716155096956, "iter_time": 0.5586731262207032, "loss": 0.272222101688385, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.343487759768507, "step_time": 0.525002456665039} +{"epoch": 0, "iter": 10825, "iter_tflops": 27.831730550908155, "iter_time": 0.7412795791625977, "loss": 0.02565586008131504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.544138338838515, "step_time": 0.6983142738342285} +{"epoch": 0, "iter": 10826, "iter_tflops": 17.01493786477625, "iter_time": 1.2125282897949219, "loss": 0.03479014337062836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.53174482434653, "step_time": 0.9156456222534181} +{"epoch": 0, "iter": 10827, "iter_tflops": 44.107087429268404, "iter_time": 0.46775007629394527, "loss": 0.048235874623060226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.655224643482995, "step_time": 0.42402627182006836} +{"epoch": 0, "iter": 10828, "iter_tflops": 42.00930373116182, "iter_time": 0.491107723236084, "loss": 0.04997028782963753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39326109765342, "step_time": 0.44470022201538084} +{"epoch": 0, "iter": 10829, "iter_tflops": 20.00189243901675, "iter_time": 1.0314570770263671, "loss": 0.31180623173713684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.594556601360598, "step_time": 0.9553839836120606} +{"epoch": 0, "iter": 10830, "iter_tflops": 18.274922399404048, "iter_time": 1.12892919921875, "loss": 0.23898166418075562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.254868082420504, "step_time": 0.8505959892272948} +{"epoch": 0, "iter": 10831, "iter_tflops": 35.655171861049546, "iter_time": 0.5786283569335938, "loss": 0.43163013458251953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27320910995367, "step_time": 0.5253223247528077} +{"epoch": 0, "iter": 10832, "iter_tflops": 38.19459286362346, "iter_time": 0.5401574401855469, "loss": 0.21889793872833252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.97082657148554, "step_time": 0.49155795097351074} +{"epoch": 0, "iter": 10833, "iter_tflops": 34.803184208745016, "iter_time": 0.5927932739257813, "loss": 0.30523648858070374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.70566641408688, "step_time": 0.5330251464843749} +{"epoch": 0, "iter": 10834, "iter_tflops": 39.77547954346954, "iter_time": 0.5186887435913086, "loss": 0.3005844056606293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14105182470518, "step_time": 0.46739016532897953} +{"epoch": 0, "iter": 10835, "iter_tflops": 41.73462970239176, "iter_time": 0.49433992004394534, "loss": 0.27041691541671753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6235926937614, "step_time": 0.452202299118042} +{"epoch": 0, "iter": 10836, "iter_tflops": 36.208420584992986, "iter_time": 0.5697871704101563, "loss": 0.18478505313396454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.43290654375904, "step_time": 0.5231948471069335} +{"epoch": 0, "iter": 10837, "iter_tflops": 17.23589186128279, "iter_time": 1.196984390258789, "loss": 0.8002432584762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.50865876210975, "step_time": 1.1146725311279297} +{"epoch": 0, "iter": 10838, "iter_tflops": 27.819697710237808, "iter_time": 0.7416002044677735, "loss": 0.6281394958496094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.1505427066729, "step_time": 0.622345573425293} +{"epoch": 0, "iter": 10839, "iter_tflops": 34.88287798126256, "iter_time": 0.5914389724731446, "loss": 0.7680477499961853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.031992895995565, "step_time": 0.5424668006896973} +{"epoch": 0, "iter": 10840, "iter_tflops": 36.68918390027634, "iter_time": 0.5623208618164063, "loss": 0.6970941424369812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86874455846289, "step_time": 0.5174753742218018} +{"epoch": 0, "iter": 10841, "iter_tflops": 31.02662878046833, "iter_time": 0.6649479598999023, "loss": 0.12032244354486465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.911107431289615, "step_time": 0.6083874893188477} +{"epoch": 0, "iter": 10842, "iter_tflops": 8.131899758810688, "iter_time": 2.5370570373535153, "loss": 0.16938021779060364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.118302160532588, "step_time": 2.0389876861572267} +{"epoch": 0, "iter": 10843, "iter_tflops": 12.392060391479223, "iter_time": 1.6648638610839843, "loss": 0.07110331952571869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.092674809523695, "step_time": 1.3669607124328613} +{"epoch": 0, "iter": 10844, "iter_tflops": 34.61965795149909, "iter_time": 0.5959357986450194, "loss": 0.11094135791063309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.85692213603186, "step_time": 0.5309502754211426} +{"epoch": 0, "iter": 10845, "iter_tflops": 13.992322740899905, "iter_time": 1.120950393676758, "loss": 0.2811080515384674, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.98868569264053, "step_time": 1.0464359588623047} +{"epoch": 0, "iter": 10846, "iter_tflops": 16.206814136803374, "iter_time": 0.9677842636108398, "loss": 0.2755676507949829, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 19.140660571245274, "step_time": 0.8194440116882323} +{"epoch": 0, "iter": 10847, "iter_tflops": 27.131663039690654, "iter_time": 0.5780957717895508, "loss": 0.26862218976020813, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.919445615900425, "step_time": 0.5423582420349121} +{"epoch": 0, "iter": 10848, "iter_tflops": 26.341496728974917, "iter_time": 0.5954369201660157, "loss": 0.4306788742542267, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.015662021653174, "step_time": 0.5598546867370605} +{"epoch": 0, "iter": 10849, "iter_tflops": 23.98861615844356, "iter_time": 0.8600368347167969, "loss": 0.3717525005340576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.296548266481242, "step_time": 0.8155695114135744} +{"epoch": 0, "iter": 10850, "iter_tflops": 17.282980232611145, "iter_time": 1.19372314453125, "loss": 0.25822868943214417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.643614781038686, "step_time": 0.9993934555053712} +{"epoch": 0, "iter": 10851, "iter_tflops": 38.262327915219785, "iter_time": 0.5392012100219726, "loss": 0.22331033647060394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.83624304583913, "step_time": 0.4931392498016357} +{"epoch": 0, "iter": 10852, "iter_tflops": 37.02520799835022, "iter_time": 0.5572174911499024, "loss": 0.3226590156555176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90732567629787, "step_time": 0.5043373813629151} +{"epoch": 0, "iter": 10853, "iter_tflops": 16.85979426398364, "iter_time": 1.2236859588623048, "loss": 0.3979344666004181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.94883169727405, "step_time": 1.1494393539428711} +{"epoch": 0, "iter": 10854, "iter_tflops": 18.243074353514217, "iter_time": 1.1309000396728515, "loss": 0.31422898173332214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.598869777769288, "step_time": 0.775637975692749} +{"epoch": 0, "iter": 10855, "iter_tflops": 48.59072941781993, "iter_time": 0.4245890884399414, "loss": 0.45113736391067505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.780882296743506, "step_time": 0.39088193702697754} +{"epoch": 0, "iter": 10856, "iter_tflops": 44.602896018831274, "iter_time": 0.462550537109375, "loss": 0.42154043912887573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13606841758927, "step_time": 0.4285994720458985} +{"epoch": 0, "iter": 10857, "iter_tflops": 40.95109033014766, "iter_time": 0.5037983932495118, "loss": 0.12540414929389954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72508375081138, "step_time": 0.46128686141967773} +{"epoch": 0, "iter": 10858, "iter_tflops": 11.105024146883366, "iter_time": 1.8578161773681638, "loss": 0.13642163574695587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.893885391469697, "step_time": 1.4849045410156252} +{"epoch": 0, "iter": 10859, "iter_tflops": 13.223532789466589, "iter_time": 1.560180160522461, "loss": 0.09878360480070114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.2933658564633, "step_time": 1.2662266159057618} +{"epoch": 0, "iter": 10860, "iter_tflops": 18.46611710448824, "iter_time": 1.117240478515625, "loss": 0.06360079348087311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.17510577295112, "step_time": 0.8902265090942384} +{"epoch": 0, "iter": 10861, "iter_tflops": 12.113898011950557, "iter_time": 1.3895428771972655, "loss": 0.2450868934392929, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 12.805898678700757, "step_time": 1.3144552459716796} +{"epoch": 0, "iter": 10862, "iter_tflops": 22.91056489212344, "iter_time": 0.7347169647216797, "loss": 0.2740723490715027, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 25.66731612788252, "step_time": 0.6558060302734374} +{"epoch": 0, "iter": 10863, "iter_tflops": 29.529776828379344, "iter_time": 0.570027359008789, "loss": 0.6290265917778015, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 31.526480144390504, "step_time": 0.5339251518249513} +{"epoch": 0, "iter": 10864, "iter_tflops": 30.03340312107241, "iter_time": 0.5604686431884766, "loss": 0.24130752682685852, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 32.005740731135496, "step_time": 0.525930046081543} +{"epoch": 0, "iter": 10865, "iter_tflops": 42.50410280116794, "iter_time": 0.48539063644409175, "loss": 0.23693251609802246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.470751618950025, "step_time": 0.4439586791992187} +{"epoch": 0, "iter": 10866, "iter_tflops": 45.13670772730885, "iter_time": 0.45708015823364256, "loss": 0.20162902772426605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39397998999238, "step_time": 0.41768437194824215} +{"epoch": 0, "iter": 10867, "iter_tflops": 49.833706140832916, "iter_time": 0.41399877929687495, "loss": 0.17326287925243378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.18083472582521, "step_time": 0.3807821273803711} +{"epoch": 0, "iter": 10868, "iter_tflops": 46.861757696829784, "iter_time": 0.44025436782836913, "loss": 0.32858431339263916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77246196732262, "step_time": 0.40634416198730466} +{"epoch": 0, "iter": 10869, "iter_tflops": 32.13547560150949, "iter_time": 0.6420036773681641, "loss": 0.24392208456993103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.40338137609464, "step_time": 0.5996821441650391} +{"epoch": 0, "iter": 10870, "iter_tflops": 20.076239861957188, "iter_time": 1.0276373291015626, "loss": 0.1407366245985031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.786996963304453, "step_time": 0.8323353385925294} +{"epoch": 0, "iter": 10871, "iter_tflops": 48.361944601613736, "iter_time": 0.4265976829528809, "loss": 0.19983381032943726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.625400447823765, "step_time": 0.3920367984771729} +{"epoch": 0, "iter": 10872, "iter_tflops": 47.424350999041174, "iter_time": 0.43503164672851563, "loss": 0.10280602425336838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30898568005648, "step_time": 0.40209513473510744} +{"epoch": 0, "iter": 10873, "iter_tflops": 28.17789193722346, "iter_time": 0.7321730651855469, "loss": 0.07500040531158447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.85363808504517, "step_time": 0.6910746841430664} +{"epoch": 0, "iter": 10874, "iter_tflops": 14.899918418706246, "iter_time": 1.3846447296142577, "loss": 0.06777621805667877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.964432451700407, "step_time": 1.148441152572632} +{"epoch": 0, "iter": 10875, "iter_tflops": 41.875170482437845, "iter_time": 0.4926808242797852, "loss": 0.1259220987558365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.222998222101765, "step_time": 0.4463382797241211} +{"epoch": 0, "iter": 10876, "iter_tflops": 46.18001708392313, "iter_time": 0.44675370025634764, "loss": 0.12003696709871292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.70419143964617, "step_time": 0.40689128303527833} +{"epoch": 0, "iter": 10877, "iter_tflops": 20.447796406717828, "iter_time": 1.0089641494750976, "loss": 0.27392062544822693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.733603121977435, "step_time": 0.9492716598510743} +{"epoch": 0, "iter": 10878, "iter_tflops": 16.42978495488688, "iter_time": 1.255712936401367, "loss": 0.24430260062217712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.80142321510023, "step_time": 0.9918116321563721} +{"epoch": 0, "iter": 10879, "iter_tflops": 47.71278673629894, "iter_time": 0.43240177154541015, "loss": 0.264789342880249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.67354910646711, "step_time": 0.39925830268859863} +{"epoch": 0, "iter": 10880, "iter_tflops": 48.65647655605909, "iter_time": 0.4240153617858887, "loss": 0.2940993010997772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.42942899310277, "step_time": 0.39350215911865233} +{"epoch": 0, "iter": 10881, "iter_tflops": 10.90868060307567, "iter_time": 0.7473012771606445, "loss": 0.03197426721453667, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 11.562137943909743, "step_time": 0.7050660514831543} +{"epoch": 0, "iter": 10882, "iter_tflops": 6.676232651042406, "iter_time": 1.221058547973633, "loss": 0.05078558623790741, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 7.574664933567397, "step_time": 1.0762285881042482} +{"epoch": 0, "iter": 10883, "iter_tflops": 21.021604379311697, "iter_time": 0.38779489898681646, "loss": 0.01648862101137638, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 23.06679449136527, "step_time": 0.3534115219116211} +{"epoch": 0, "iter": 10884, "iter_tflops": 23.005197878131167, "iter_time": 0.35435778427124026, "loss": 0.07198351621627808, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 25.19079149978642, "step_time": 0.3236131324768067} +{"epoch": 0, "iter": 10885, "iter_tflops": 42.508700702257784, "iter_time": 0.48533813476562504, "loss": 0.281177818775177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.293369900694145, "step_time": 0.44565979003906253} +{"epoch": 0, "iter": 10886, "iter_tflops": 47.2324840679691, "iter_time": 0.43679882431030276, "loss": 0.3395928740501404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.153506078806046, "step_time": 0.4033172912597656} +{"epoch": 0, "iter": 10887, "iter_tflops": 49.50948959767333, "iter_time": 0.4167098808288574, "loss": 0.41531965136528015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.439627590371146, "step_time": 0.3860635719299316} +{"epoch": 0, "iter": 10888, "iter_tflops": 49.351920624459254, "iter_time": 0.4180403366088868, "loss": 0.32251691818237305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.156555081832025, "step_time": 0.3881194610595703} +{"epoch": 0, "iter": 10889, "iter_tflops": 30.95892845989624, "iter_time": 0.6664020538330078, "loss": 0.7330981492996216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.02257125836927, "step_time": 0.6247573318481445} +{"epoch": 0, "iter": 10890, "iter_tflops": 9.161237659637887, "iter_time": 2.251998504638672, "loss": 0.6873185634613037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.973036159947075, "step_time": 1.8801627197265627} +{"epoch": 0, "iter": 10891, "iter_tflops": 13.235834528697563, "iter_time": 1.5587300872802734, "loss": 0.9846627116203308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.18149367181162, "step_time": 1.2749807853698731} +{"epoch": 0, "iter": 10892, "iter_tflops": 34.78819859758779, "iter_time": 0.5930486297607421, "loss": 0.8390570282936096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.50611709475289, "step_time": 0.5357874298095703} +{"epoch": 0, "iter": 10893, "iter_tflops": 16.688674726306353, "iter_time": 1.0135572967529298, "loss": 0.39814648032188416, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 17.6124295394599, "step_time": 0.960397201538086} +{"epoch": 0, "iter": 10894, "iter_tflops": 15.44300227762408, "iter_time": 1.0953134460449219, "loss": 0.34034356474876404, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 18.079625703955596, "step_time": 0.9355795478820801} +{"epoch": 0, "iter": 10895, "iter_tflops": 24.28369868850496, "iter_time": 0.6965548477172852, "loss": 0.30445536971092224, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 26.02581908526336, "step_time": 0.6499287490844726} +{"epoch": 0, "iter": 10896, "iter_tflops": 25.919403057447454, "iter_time": 0.6525971298217773, "loss": 0.5126877427101135, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 27.89818306259175, "step_time": 0.6063093070983887} +{"epoch": 0, "iter": 10897, "iter_tflops": 19.207340323184564, "iter_time": 1.074125473022461, "loss": 0.6350447535514832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.731524167510603, "step_time": 0.9951556549072265} +{"epoch": 0, "iter": 10898, "iter_tflops": 21.575863041216262, "iter_time": 0.9562117385864257, "loss": 0.7576298713684082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.59171985871186, "step_time": 0.8061628379821777} +{"epoch": 0, "iter": 10899, "iter_tflops": 46.519136177976655, "iter_time": 0.4434969177246093, "loss": 0.5982660055160522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.63535569065672, "step_time": 0.40744442749023435} +{"epoch": 0, "iter": 10900, "iter_tflops": 51.16281415332021, "iter_time": 0.4032439155578613, "loss": 0.5808899402618408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.258489517043046, "step_time": 0.37335608863830566} +{"epoch": 0, "iter": 10901, "iter_tflops": 51.280223424561555, "iter_time": 0.40232066345214845, "loss": 0.06932423263788223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.325136989323795, "step_time": 0.3662857227325439} +{"epoch": 0, "iter": 10902, "iter_tflops": 40.670867630931724, "iter_time": 0.5072695693969727, "loss": 0.08016218990087509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1043180648847, "step_time": 0.45740839004516604} +{"epoch": 0, "iter": 10903, "iter_tflops": 37.42418226125888, "iter_time": 0.5512770690917969, "loss": 0.09039684385061264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80708691128336, "step_time": 0.5055762386322021} +{"epoch": 0, "iter": 10904, "iter_tflops": 41.82631251760532, "iter_time": 0.4932563323974609, "loss": 0.08474984765052795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9013144949414, "step_time": 0.4494662895202637} +{"epoch": 0, "iter": 10905, "iter_tflops": 28.036126706707613, "iter_time": 0.7358753128051758, "loss": 0.22100484371185303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.501330887191894, "step_time": 0.6763997802734375} +{"epoch": 0, "iter": 10906, "iter_tflops": 32.57995588809466, "iter_time": 0.6332449798583983, "loss": 0.20183613896369934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.52005352806647, "step_time": 0.5649250621795654} +{"epoch": 0, "iter": 10907, "iter_tflops": 37.33928687723558, "iter_time": 0.5525304641723633, "loss": 0.2224096655845642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70006549574095, "step_time": 0.5069056587219238} +{"epoch": 0, "iter": 10908, "iter_tflops": 42.76567495862886, "iter_time": 0.4824217910766602, "loss": 0.2685827910900116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.773219116772296, "step_time": 0.441087739944458} +{"epoch": 0, "iter": 10909, "iter_tflops": 13.908192882530948, "iter_time": 1.1896163482666013, "loss": 0.4108416438102722, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 14.81349710398756, "step_time": 1.116914764404297} +{"epoch": 0, "iter": 10910, "iter_tflops": 11.525258057313705, "iter_time": 1.435578582763672, "loss": 0.24555401504039764, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 15.38626462743907, "step_time": 1.0753366088867187} +{"epoch": 0, "iter": 10911, "iter_tflops": 29.192976459689685, "iter_time": 0.5667600784301757, "loss": 0.3192618489265442, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.12561886649565, "step_time": 0.5315689849853515} +{"epoch": 0, "iter": 10912, "iter_tflops": 28.954186509368938, "iter_time": 0.5714342422485352, "loss": 0.28890326619148254, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.75421046460537, "step_time": 0.5379885673522949} +{"epoch": 0, "iter": 10913, "iter_tflops": 40.82579283463018, "iter_time": 0.5053445892333984, "loss": 0.858223021030426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.24768361361923, "step_time": 0.4662638092041016} +{"epoch": 0, "iter": 10914, "iter_tflops": 40.098581389980616, "iter_time": 0.5145093116760254, "loss": 0.6436586380004883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73451910358577, "step_time": 0.45110550880432126} +{"epoch": 0, "iter": 10915, "iter_tflops": 42.18179698747136, "iter_time": 0.4890994453430175, "loss": 0.5722318887710571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.212762906950786, "step_time": 0.45631127548217776} +{"epoch": 0, "iter": 10916, "iter_tflops": 41.71095208347777, "iter_time": 0.49462053680419926, "loss": 0.506846010684967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5173416982655, "step_time": 0.4634394760131836} +{"epoch": 0, "iter": 10917, "iter_tflops": 39.6214180077978, "iter_time": 0.5207055816650391, "loss": 0.6301732659339905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.85386520863038, "step_time": 0.4814290008544922} +{"epoch": 0, "iter": 10918, "iter_tflops": 39.20044552173363, "iter_time": 0.5262974243164062, "loss": 0.5593178868293762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11281406389277, "step_time": 0.4676893539428711} +{"epoch": 0, "iter": 10919, "iter_tflops": 51.36637584654633, "iter_time": 0.4016458854675292, "loss": 0.7251065373420715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68918011425178, "step_time": 0.37046861648559576} +{"epoch": 0, "iter": 10920, "iter_tflops": 45.68406441868414, "iter_time": 0.4516037216186523, "loss": 0.6330376863479614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62320395621992, "step_time": 0.41575496673583984} +{"epoch": 0, "iter": 10921, "iter_tflops": 31.52833102088535, "iter_time": 0.6543668136596679, "loss": 0.793785572052002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.58168189698142, "step_time": 0.6143555755615233} +{"epoch": 0, "iter": 10922, "iter_tflops": 13.640819972604321, "iter_time": 1.5124525909423827, "loss": 0.7128695249557495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.64946171261292, "step_time": 1.1062567825317382} +{"epoch": 0, "iter": 10923, "iter_tflops": 37.93664282632048, "iter_time": 0.5438302383422852, "loss": 0.6088088750839233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52945478326255, "step_time": 0.49678219032287596} +{"epoch": 0, "iter": 10924, "iter_tflops": 37.531956864288475, "iter_time": 0.5496940536499023, "loss": 0.7118020057678223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61850098185176, "step_time": 0.5079235572814942} +{"epoch": 0, "iter": 10925, "iter_tflops": 22.646611147438254, "iter_time": 0.9110013580322265, "loss": 0.6247423887252808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.232947770044966, "step_time": 0.8513654098510742} +{"epoch": 0, "iter": 10926, "iter_tflops": 25.459518888348946, "iter_time": 0.8103489151000975, "loss": 0.5993991494178772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.710337727387234, "step_time": 0.6717963733673096} +{"epoch": 0, "iter": 10927, "iter_tflops": 35.42819952155603, "iter_time": 0.5823353652954102, "loss": 0.7029116153717041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.916083571581595, "step_time": 0.5301431083679199} +{"epoch": 0, "iter": 10928, "iter_tflops": 38.82308867931611, "iter_time": 0.5314129867553711, "loss": 0.709199070930481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45323088766671, "step_time": 0.485972282409668} +{"epoch": 0, "iter": 10929, "iter_tflops": 17.639050465281557, "iter_time": 1.1696260833740233, "loss": 0.37498247623443604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.713653731464515, "step_time": 1.102462074279785} +{"epoch": 0, "iter": 10930, "iter_tflops": 18.92075707863187, "iter_time": 1.0903947143554686, "loss": 0.4823436439037323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.642548377298667, "step_time": 0.9111648197174071} +{"epoch": 0, "iter": 10931, "iter_tflops": 40.50032896441098, "iter_time": 0.5094055786132812, "loss": 0.5841563940048218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33882726464531, "step_time": 0.4653053493499756} +{"epoch": 0, "iter": 10932, "iter_tflops": 40.20808258686113, "iter_time": 0.5131081161499024, "loss": 0.5475308299064636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05138061593485, "step_time": 0.4683415870666504} +{"epoch": 0, "iter": 10933, "iter_tflops": 13.791687000535086, "iter_time": 1.4959078979492189, "loss": 0.46971240639686584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.703583599295506, "step_time": 1.403133689880371} +{"epoch": 0, "iter": 10934, "iter_tflops": 16.458686614245472, "iter_time": 1.2535078887939453, "loss": 0.6720610857009888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.936381207369777, "step_time": 0.9854183158874512} +{"epoch": 0, "iter": 10935, "iter_tflops": 37.1436292484684, "iter_time": 0.5554409713745116, "loss": 0.8125366568565369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.356062681996455, "step_time": 0.511226619720459} +{"epoch": 0, "iter": 10936, "iter_tflops": 37.01857121986756, "iter_time": 0.5573173904418945, "loss": 0.7555320858955383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.16456701068413, "step_time": 0.5136640338897704} +{"epoch": 0, "iter": 10937, "iter_tflops": 16.837881319588988, "iter_time": 1.2252784729003907, "loss": 0.017522433772683144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.083828462627373, "step_time": 1.140858726501465} +{"epoch": 0, "iter": 10938, "iter_tflops": 16.274145096394314, "iter_time": 1.2677221069335936, "loss": 0.549440324306488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.828936384998567, "step_time": 0.9905015373229981} +{"epoch": 0, "iter": 10939, "iter_tflops": 41.05381024274655, "iter_time": 0.5025378494262696, "loss": 0.73067706823349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85887364006972, "step_time": 0.45991109085083004} +{"epoch": 0, "iter": 10940, "iter_tflops": 36.87833715300746, "iter_time": 0.559436653137207, "loss": 0.6748172640800476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.101167477740994, "step_time": 0.514476131439209} +{"epoch": 0, "iter": 10941, "iter_tflops": 26.406128164441732, "iter_time": 0.7812994537353516, "loss": 0.12775646150112152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.911460454357517, "step_time": 0.713595687866211} +{"epoch": 0, "iter": 10942, "iter_tflops": 41.86876211696407, "iter_time": 0.4927562332153321, "loss": 0.09538932889699936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60272983445805, "step_time": 0.44270139503479} +{"epoch": 0, "iter": 10943, "iter_tflops": 46.24097651383684, "iter_time": 0.4461647453308106, "loss": 0.09250035881996155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.504057053826735, "step_time": 0.40850368690490724} +{"epoch": 0, "iter": 10944, "iter_tflops": 47.68965031061609, "iter_time": 0.43261154937744145, "loss": 0.11591100692749023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19343982135596, "step_time": 0.39528135299682615} +{"epoch": 0, "iter": 10945, "iter_tflops": 25.412535843049845, "iter_time": 0.8118470993041992, "loss": 0.4823753833770752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.028192254098666, "step_time": 0.7633175506591797} +{"epoch": 0, "iter": 10946, "iter_tflops": 8.6315772207965, "iter_time": 2.3901881408691406, "loss": 0.4210737943649292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.669527702134761, "step_time": 1.933646369934082} +{"epoch": 0, "iter": 10947, "iter_tflops": 12.17822829066635, "iter_time": 1.694096466064453, "loss": 0.5072426199913025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.129180446038873, "step_time": 1.4601762351989744} +{"epoch": 0, "iter": 10948, "iter_tflops": 37.48919391042632, "iter_time": 0.5503210754394531, "loss": 0.43334951996803284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33770946400373, "step_time": 0.43582787895202635} +{"epoch": 0, "iter": 10949, "iter_tflops": 19.545121438355654, "iter_time": 0.8528236923217773, "loss": 0.37359118461608887, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 20.52062797187613, "step_time": 0.8122822875976562} +{"epoch": 0, "iter": 10950, "iter_tflops": 10.357036536259246, "iter_time": 1.6093930511474608, "loss": 0.26074379682540894, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 12.45509421292991, "step_time": 1.3382911720275878} +{"epoch": 0, "iter": 10951, "iter_tflops": 28.88023439865745, "iter_time": 0.5771609191894531, "loss": 0.2239677608013153, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 30.767201432410975, "step_time": 0.5417633666992188} +{"epoch": 0, "iter": 10952, "iter_tflops": 29.583411822194027, "iter_time": 0.5634421997070312, "loss": 0.32163625955581665, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 31.482476932116917, "step_time": 0.5294546127319336} +{"epoch": 0, "iter": 10953, "iter_tflops": 34.846839519290754, "iter_time": 0.5920506362915038, "loss": 0.16610398888587952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.199655017680286, "step_time": 0.5546044311523438} +{"epoch": 0, "iter": 10954, "iter_tflops": 8.768851693129562, "iter_time": 2.352770263671875, "loss": 0.2877119481563568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.653915331982288, "step_time": 1.9364799575805665} +{"epoch": 0, "iter": 10955, "iter_tflops": 14.68361106931171, "iter_time": 1.4050422210693359, "loss": 0.25036272406578064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.90859696159405, "step_time": 1.1520217666625978} +{"epoch": 0, "iter": 10956, "iter_tflops": 35.38359121225753, "iter_time": 0.5830695190429687, "loss": 0.34200072288513184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.241004859552326, "step_time": 0.45602642059326176} +{"epoch": 0, "iter": 10957, "iter_tflops": 14.643912280721793, "iter_time": 1.0319678344726562, "loss": 0.3755459785461426, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.460390852971877, "step_time": 0.9774685897827149} +{"epoch": 0, "iter": 10958, "iter_tflops": 6.50055100587005, "iter_time": 2.324733154296875, "loss": 0.29751768708229065, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 8.50510679280228, "step_time": 1.7768203048706055} +{"epoch": 0, "iter": 10959, "iter_tflops": 11.041928143911205, "iter_time": 1.368605758666992, "loss": 0.2327713519334793, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 12.661792685422217, "step_time": 1.1935155487060545} +{"epoch": 0, "iter": 10960, "iter_tflops": 20.60772554862995, "iter_time": 0.7333194732666016, "loss": 0.30437779426574707, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.38301911065039, "step_time": 0.6197774925231934} +{"epoch": 0, "iter": 10961, "iter_tflops": 17.859116320548402, "iter_time": 0.8438927078247069, "loss": 0.40263646841049194, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.79153992910945, "step_time": 0.8020193176269532} +{"epoch": 0, "iter": 10962, "iter_tflops": 6.868418895629114, "iter_time": 2.1942718200683595, "loss": 0.4372657537460327, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 7.94739491378759, "step_time": 1.8963670730590823} +{"epoch": 0, "iter": 10963, "iter_tflops": 11.446903645293853, "iter_time": 1.3166161346435548, "loss": 0.28692373633384705, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 13.562312369879994, "step_time": 1.1112543067932128} +{"epoch": 0, "iter": 10964, "iter_tflops": 12.054850631649211, "iter_time": 1.2502169036865234, "loss": 0.2321663498878479, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 13.243475946717975, "step_time": 1.1380077323913573} +{"epoch": 0, "iter": 10965, "iter_tflops": 21.725490090550828, "iter_time": 0.7483503341674804, "loss": 0.28626134991645813, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 22.986812285930327, "step_time": 0.7072871856689453} +{"epoch": 0, "iter": 10966, "iter_tflops": 7.413351495410667, "iter_time": 2.193107635498047, "loss": 0.3572711646556854, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 9.94400838179803, "step_time": 1.6349823074340821} +{"epoch": 0, "iter": 10967, "iter_tflops": 9.999233114418423, "iter_time": 1.6259524688720703, "loss": 0.3011735677719116, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 11.23882989965871, "step_time": 1.446616588592529} +{"epoch": 0, "iter": 10968, "iter_tflops": 24.54847085694006, "iter_time": 0.6622928924560547, "loss": 0.33858609199523926, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 28.90587436822455, "step_time": 0.5624558372497559} +{"epoch": 0, "iter": 10969, "iter_tflops": 21.012335385949097, "iter_time": 0.71530908203125, "loss": 0.4570770561695099, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 22.38588937981433, "step_time": 0.6714191284179687} +{"epoch": 0, "iter": 10970, "iter_tflops": 11.112493363887534, "iter_time": 1.352560028076172, "loss": 0.4086573123931885, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.145188218961826, "step_time": 1.143408073425293} +{"epoch": 0, "iter": 10971, "iter_tflops": 22.08258961501815, "iter_time": 0.6806409301757812, "loss": 0.2608606815338135, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.80196001634856, "step_time": 0.6314738082885742} +{"epoch": 0, "iter": 10972, "iter_tflops": 22.468669152695753, "iter_time": 0.6689454650878905, "loss": 0.2952534258365631, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.12682854279037, "step_time": 0.6229709930419922} +{"epoch": 0, "iter": 10973, "iter_tflops": 20.795536781143753, "iter_time": 0.9920923767089844, "loss": 0.3474438488483429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.37322471815607, "step_time": 0.9221332092285156} +{"epoch": 0, "iter": 10974, "iter_tflops": 17.873866112595202, "iter_time": 1.154260269165039, "loss": 0.16332614421844482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.8362528073869, "step_time": 0.9448092441558837} +{"epoch": 0, "iter": 10975, "iter_tflops": 45.76186854377688, "iter_time": 0.4508359069824219, "loss": 0.43335339426994324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64258702468575, "step_time": 0.41559263420104986} +{"epoch": 0, "iter": 10976, "iter_tflops": 48.1067346198745, "iter_time": 0.42886081695556644, "loss": 0.2382718175649643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.23743958768834, "step_time": 0.39494840621948246} +{"epoch": 0, "iter": 10977, "iter_tflops": 36.742349361730795, "iter_time": 0.561507194519043, "loss": 0.6430314779281616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.541080000803284, "step_time": 0.5217635307312012} +{"epoch": 0, "iter": 10978, "iter_tflops": 34.326903318093265, "iter_time": 0.6010181961059571, "loss": 0.8636268377304077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92299593722693, "step_time": 0.5440259399414062} +{"epoch": 0, "iter": 10979, "iter_tflops": 35.138701084238136, "iter_time": 0.5871330718994141, "loss": 0.580929160118103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.022031540104216, "step_time": 0.5426089210510254} +{"epoch": 0, "iter": 10980, "iter_tflops": 34.10219675454158, "iter_time": 0.6049784317016601, "loss": 0.6157548427581787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.96017109042283, "step_time": 0.558197998046875} +{"epoch": 0, "iter": 10981, "iter_tflops": 22.495221801655173, "iter_time": 0.9171322555541993, "loss": 0.6977236270904541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.960836897615547, "step_time": 0.8610339279174803} +{"epoch": 0, "iter": 10982, "iter_tflops": 8.156577112066927, "iter_time": 2.529381286621094, "loss": 0.6111639738082886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.494245365986986, "step_time": 1.9659435043334963} +{"epoch": 0, "iter": 10983, "iter_tflops": 12.598422726064314, "iter_time": 1.6375933685302733, "loss": 0.567181408405304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.550475122586624, "step_time": 1.326717887878418} +{"epoch": 0, "iter": 10984, "iter_tflops": 43.10370219566038, "iter_time": 0.4786385498046875, "loss": 0.7183448672294617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44983302851724, "step_time": 0.4441586151123047} +{"epoch": 0, "iter": 10985, "iter_tflops": 20.190390713381635, "iter_time": 0.801187141418457, "loss": 0.3594355881214142, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 21.645697425487423, "step_time": 0.7473208694458008} +{"epoch": 0, "iter": 10986, "iter_tflops": 7.807064814688906, "iter_time": 2.07200552368164, "loss": 0.35760849714279175, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 9.768446862351173, "step_time": 1.6559727096557615} +{"epoch": 0, "iter": 10987, "iter_tflops": 8.639721950199531, "iter_time": 1.8723150482177737, "loss": 0.3718337416648865, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 10.290662347440977, "step_time": 1.571937828063965} +{"epoch": 0, "iter": 10988, "iter_tflops": 21.46867769569921, "iter_time": 0.753482894897461, "loss": 0.2946424186229706, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 26.947660848465272, "step_time": 0.6002851791381836} +{"epoch": 0, "iter": 10989, "iter_tflops": 20.00761176784679, "iter_time": 0.7777994995117188, "loss": 0.21058455109596252, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 21.1733955967904, "step_time": 0.7349747161865234} +{"epoch": 0, "iter": 10990, "iter_tflops": 8.382937169173696, "iter_time": 1.8563792266845704, "loss": 0.290828138589859, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 10.776919825227264, "step_time": 1.4440035438537597} +{"epoch": 0, "iter": 10991, "iter_tflops": 27.167305118230647, "iter_time": 0.5728175964355469, "loss": 0.48383021354675293, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 28.969533172299293, "step_time": 0.5371819534301757} +{"epoch": 0, "iter": 10992, "iter_tflops": 27.289543021883684, "iter_time": 0.5702517776489258, "loss": 0.35443300008773804, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 29.05630528423251, "step_time": 0.5355777435302734} +{"epoch": 0, "iter": 10993, "iter_tflops": 36.28103984913119, "iter_time": 0.5686466979980469, "loss": 0.7262732982635498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.04293293978704, "step_time": 0.5284206886291504} +{"epoch": 0, "iter": 10994, "iter_tflops": 34.21205918437083, "iter_time": 0.6030357131958007, "loss": 0.7400044202804565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10579378172973, "step_time": 0.5414161853790284} +{"epoch": 0, "iter": 10995, "iter_tflops": 44.757224200376655, "iter_time": 0.46095560836791993, "loss": 0.5632539987564087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08597510641612, "step_time": 0.42904596328735356} +{"epoch": 0, "iter": 10996, "iter_tflops": 48.163320795781566, "iter_time": 0.42835695648193356, "loss": 0.6881785988807678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98171021212071, "step_time": 0.39689139556884767} +{"epoch": 0, "iter": 10997, "iter_tflops": 37.52470290597748, "iter_time": 0.5498003158569336, "loss": 0.20869001746177673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69447867565297, "step_time": 0.5069752502441407} +{"epoch": 0, "iter": 10998, "iter_tflops": 45.12453638780921, "iter_time": 0.45720344543457025, "loss": 0.1435352861881256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11806262268297, "step_time": 0.4200306854248047} +{"epoch": 0, "iter": 10999, "iter_tflops": 48.84657697508381, "iter_time": 0.4223651847839356, "loss": 0.11802272498607635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33200527659237, "step_time": 0.3868426361083985} +{"epoch": 0, "iter": 11000, "iter_tflops": 50.201523219790865, "iter_time": 0.4109654884338379, "loss": 0.15462273359298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.441859607909855, "step_time": 0.3789564437866211} +{"epoch": 0, "iter": 11001, "iter_tflops": 28.561152742456454, "iter_time": 0.7223480682373045, "loss": 0.38242390751838684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.209994829987874, "step_time": 0.6829227752685547} +{"epoch": 0, "iter": 11002, "iter_tflops": 9.621313547329514, "iter_time": 2.1443115234375, "loss": 0.30513396859169006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.279935029391435, "step_time": 1.829008186340332} +{"epoch": 0, "iter": 11003, "iter_tflops": 12.729320927952184, "iter_time": 1.620753662109375, "loss": 0.3558948338031769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.740575875387671, "step_time": 1.3996124496459958} +{"epoch": 0, "iter": 11004, "iter_tflops": 23.763895722882445, "iter_time": 0.8681696701049805, "loss": 0.2578376829624176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.07825384410188, "step_time": 0.6237056407928467} +{"epoch": 0, "iter": 11005, "iter_tflops": 15.487967334169218, "iter_time": 1.0127022705078126, "loss": 0.36667340993881226, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.740063686826787, "step_time": 0.9369557952880859} +{"epoch": 0, "iter": 11006, "iter_tflops": 12.766310347574969, "iter_time": 1.2286008453369142, "loss": 0.3031717836856842, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.966726226431495, "step_time": 1.0479713096618652} +{"epoch": 0, "iter": 11007, "iter_tflops": 23.625527196291138, "iter_time": 0.6638878173828124, "loss": 0.17272718250751495, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.49077145702224, "step_time": 0.6153089447021484} +{"epoch": 0, "iter": 11008, "iter_tflops": 23.727590589312936, "iter_time": 0.661032127380371, "loss": 0.3662737011909485, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.545147615586362, "step_time": 0.6139991798400878} +{"epoch": 0, "iter": 11009, "iter_tflops": 30.85968295158827, "iter_time": 0.6685452194213868, "loss": 0.002483805175870657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.646152921781905, "step_time": 0.6131783790588379} +{"epoch": 0, "iter": 11010, "iter_tflops": 9.446465843519741, "iter_time": 2.1840012817382815, "loss": 0.012088887393474579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.190270408071248, "step_time": 1.8436635360717775} +{"epoch": 0, "iter": 11011, "iter_tflops": 15.363769640120587, "iter_time": 1.342840591430664, "loss": 0.0006553416606038809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.786782190704905, "step_time": 1.0426704711914063} +{"epoch": 0, "iter": 11012, "iter_tflops": 44.4850522144178, "iter_time": 0.46377586364746093, "loss": 0.006839876063168049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42874040402591, "step_time": 0.4173906383514404} +{"epoch": 0, "iter": 11013, "iter_tflops": 15.429782838578719, "iter_time": 0.987355438232422, "loss": 0.47776511311531067, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 16.261164253007355, "step_time": 0.9368751068115235} +{"epoch": 0, "iter": 11014, "iter_tflops": 6.712336092559532, "iter_time": 2.269653930664062, "loss": 0.31459540128707886, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 8.100207328745611, "step_time": 1.8807765502929688} +{"epoch": 0, "iter": 11015, "iter_tflops": 7.867696211896888, "iter_time": 1.9363584442138673, "loss": 0.44171521067619324, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 9.663103224262208, "step_time": 1.5765825576782229} +{"epoch": 0, "iter": 11016, "iter_tflops": 20.106143131327908, "iter_time": 0.7577126998901367, "loss": 0.4533096253871918, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.79204373895512, "step_time": 0.6403266639709473} +{"epoch": 0, "iter": 11017, "iter_tflops": 10.153722645213906, "iter_time": 1.4521177215576173, "loss": 0.3080529570579529, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 10.815311086662799, "step_time": 1.36328955078125} +{"epoch": 0, "iter": 11018, "iter_tflops": 14.466368343742182, "iter_time": 1.0192192153930664, "loss": 0.3996995985507965, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 18.84544640527993, "step_time": 0.7823853187561034} +{"epoch": 0, "iter": 11019, "iter_tflops": 27.03154067321699, "iter_time": 0.5454517288208007, "loss": 0.5327652096748352, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.83241564312901, "step_time": 0.51138277053833} +{"epoch": 0, "iter": 11020, "iter_tflops": 26.406704489269465, "iter_time": 0.5583582229614257, "loss": 0.40331071615219116, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 27.9552169948827, "step_time": 0.5274293022155762} +{"epoch": 0, "iter": 11021, "iter_tflops": 27.919069849842995, "iter_time": 0.7389606323242187, "loss": 0.006801141891628504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.99699225209621, "step_time": 0.6877720718383789} +{"epoch": 0, "iter": 11022, "iter_tflops": 14.43418783885775, "iter_time": 1.4293213958740232, "loss": 0.0031889043748378754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.51791059693656, "step_time": 1.1141156234741212} +{"epoch": 0, "iter": 11023, "iter_tflops": 44.3409842058549, "iter_time": 0.46528271484375, "loss": 0.0061322832480072975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.28805600985669, "step_time": 0.3665270214080811} +{"epoch": 0, "iter": 11024, "iter_tflops": 58.28675132726266, "iter_time": 0.3539585418701172, "loss": 0.017484135925769806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.78068206326879, "step_time": 0.3234693145751953} +{"epoch": 0, "iter": 11025, "iter_tflops": 26.342854780282174, "iter_time": 0.7831760711669921, "loss": 0.5817654132843018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.830314416052378, "step_time": 0.7413172988891603} +{"epoch": 0, "iter": 11026, "iter_tflops": 14.490676132361104, "iter_time": 1.4237495422363282, "loss": 0.5754164457321167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.89204129149353, "step_time": 1.2213499336242675} +{"epoch": 0, "iter": 11027, "iter_tflops": 46.410931215812326, "iter_time": 0.4445309104919434, "loss": 0.6019741296768188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41251762497758, "step_time": 0.4092454509735107} +{"epoch": 0, "iter": 11028, "iter_tflops": 48.46846685377722, "iter_time": 0.4256601219177246, "loss": 0.7087734341621399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50219563007962, "step_time": 0.3929567756652832} +{"epoch": 0, "iter": 11029, "iter_tflops": 30.099433482225468, "iter_time": 0.6854312896728515, "loss": 0.0371362678706646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.01890691417481, "step_time": 0.6443409690856934} +{"epoch": 0, "iter": 11030, "iter_tflops": 16.50614319399663, "iter_time": 1.2499039459228516, "loss": 0.04872290417551994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.12837624880445, "step_time": 1.0249755496978759} +{"epoch": 0, "iter": 11031, "iter_tflops": 40.06166124243648, "iter_time": 0.5149834747314453, "loss": 0.03129323571920395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31219606359625, "step_time": 0.4655849933624267} +{"epoch": 0, "iter": 11032, "iter_tflops": 41.94921779827846, "iter_time": 0.49181116104125977, "loss": 0.033691804856061935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.3875682739128, "step_time": 0.44475479698181153} +{"epoch": 0, "iter": 11033, "iter_tflops": 16.460798368109856, "iter_time": 1.2533470764160155, "loss": 0.712644100189209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.540067336035634, "step_time": 1.1762265853881837} +{"epoch": 0, "iter": 11034, "iter_tflops": 20.542713232200356, "iter_time": 1.0043022689819336, "loss": 0.4757601320743561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.01415964876154, "step_time": 0.7364523429870606} +{"epoch": 0, "iter": 11035, "iter_tflops": 49.47939964593192, "iter_time": 0.4169632949829101, "loss": 0.722497820854187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.537569986310764, "step_time": 0.38535730171203614} +{"epoch": 0, "iter": 11036, "iter_tflops": 44.264954180998146, "iter_time": 0.4660818901062012, "loss": 0.6776739358901978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70835246578311, "step_time": 0.43244196128845214} +{"epoch": 0, "iter": 11037, "iter_tflops": 29.038322952293885, "iter_time": 0.7104781341552734, "loss": 0.07176438719034195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.807560366932, "step_time": 0.6696763153076173} +{"epoch": 0, "iter": 11038, "iter_tflops": 17.84571024307147, "iter_time": 1.1560813903808593, "loss": 0.05600965768098831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.350791027717616, "step_time": 0.9662917633056641} +{"epoch": 0, "iter": 11039, "iter_tflops": 52.29689589261377, "iter_time": 0.3944993896484375, "loss": 0.11307297646999359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.00287889934903, "step_time": 0.3619307289123535} +{"epoch": 0, "iter": 11040, "iter_tflops": 54.158245765181775, "iter_time": 0.38094094848632815, "loss": 0.08265309035778046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.94775116733859, "step_time": 0.3499894924163818} +{"epoch": 0, "iter": 11041, "iter_tflops": 40.63078855873479, "iter_time": 0.5077699508666992, "loss": 0.45827516913414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35476126200631, "step_time": 0.46513819313049315} +{"epoch": 0, "iter": 11042, "iter_tflops": 47.336528015363086, "iter_time": 0.4358387565612793, "loss": 0.32235366106033325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7032882181205, "step_time": 0.39902865409851074} +{"epoch": 0, "iter": 11043, "iter_tflops": 45.724899461836415, "iter_time": 0.4512004127502442, "loss": 0.32177403569221497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.74939822327793, "step_time": 0.41470036315917974} +{"epoch": 0, "iter": 11044, "iter_tflops": 47.09640306675374, "iter_time": 0.43806091690063476, "loss": 0.3457406163215637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.121524424755115, "step_time": 0.40356960678100584} +{"epoch": 0, "iter": 11045, "iter_tflops": 27.542369687717855, "iter_time": 0.7490674819946289, "loss": 0.0055452315136790276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.18580839768679, "step_time": 0.7068878555297851} +{"epoch": 0, "iter": 11046, "iter_tflops": 11.937977267847474, "iter_time": 1.7281900482177734, "loss": 0.005270033609122038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.063608886318594, "step_time": 1.3695983257293702} +{"epoch": 0, "iter": 11047, "iter_tflops": 40.56175496756672, "iter_time": 0.5086341438293457, "loss": 0.007360788527876139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.306762044955185, "step_time": 0.4553645544052124} +{"epoch": 0, "iter": 11048, "iter_tflops": 41.85976654939157, "iter_time": 0.49286212539672847, "loss": 0.007526145316660404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.29282129130477, "step_time": 0.44566507148742673} +{"epoch": 0, "iter": 11049, "iter_tflops": 19.909021761813484, "iter_time": 1.036268569946289, "loss": 0.5870650410652161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.516079342232818, "step_time": 0.9588686294555665} +{"epoch": 0, "iter": 11050, "iter_tflops": 19.42903241395524, "iter_time": 1.0618693237304688, "loss": 0.7027292847633362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.909558328670023, "step_time": 0.9416480789184571} +{"epoch": 0, "iter": 11051, "iter_tflops": 40.07377006696455, "iter_time": 0.5148278656005859, "loss": 0.5657745003700256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49575171147361, "step_time": 0.47432433509826655} +{"epoch": 0, "iter": 11052, "iter_tflops": 42.10180668135294, "iter_time": 0.49002869796752935, "loss": 0.8136383891105652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44476934362825, "step_time": 0.4539816970825195} +{"epoch": 0, "iter": 11053, "iter_tflops": 37.686326991307546, "iter_time": 0.5474424057006836, "loss": 0.16073225438594818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85384431848373, "step_time": 0.5049976043701172} +{"epoch": 0, "iter": 11054, "iter_tflops": 22.628847561693927, "iter_time": 0.9117164916992188, "loss": 0.23151329159736633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.075313632336517, "step_time": 0.7095742378234864} +{"epoch": 0, "iter": 11055, "iter_tflops": 45.51607948512141, "iter_time": 0.4532704429626465, "loss": 0.13679943978786469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.328220413603354, "step_time": 0.4182411880493164} +{"epoch": 0, "iter": 11056, "iter_tflops": 45.89742961915915, "iter_time": 0.44950433349609387, "loss": 0.15102460980415344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.825335523823, "step_time": 0.41406833076477045} +{"epoch": 0, "iter": 11057, "iter_tflops": 20.906917209419664, "iter_time": 0.9868070602416992, "loss": 0.5934098362922668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.92286867447007, "step_time": 0.9410763626098633} +{"epoch": 0, "iter": 11058, "iter_tflops": 14.46899098665577, "iter_time": 1.4258833618164062, "loss": 0.6891838312149048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.543480874607617, "step_time": 1.2470829849243164} +{"epoch": 0, "iter": 11059, "iter_tflops": 35.49181455798242, "iter_time": 0.5812915954589843, "loss": 0.6685827374458313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.77303255047164, "step_time": 0.5320990428924561} +{"epoch": 0, "iter": 11060, "iter_tflops": 38.28827810658182, "iter_time": 0.5388357620239258, "loss": 0.4614388048648834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38594099945599, "step_time": 0.4985048789978027} +{"epoch": 0, "iter": 11061, "iter_tflops": 19.12119701117022, "iter_time": 1.0789645385742188, "loss": 0.05882621556520462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.646334263864865, "step_time": 0.9992618179321289} +{"epoch": 0, "iter": 11062, "iter_tflops": 15.845985314526024, "iter_time": 1.301976058959961, "loss": 0.09098667651414871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.21152087490536, "step_time": 1.0207590827941895} +{"epoch": 0, "iter": 11063, "iter_tflops": 49.768304566093526, "iter_time": 0.4145428237915039, "loss": 0.08131232112646103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.41344269125963, "step_time": 0.3791543502807617} +{"epoch": 0, "iter": 11064, "iter_tflops": 49.53073869271663, "iter_time": 0.4165311088562012, "loss": 0.07056022435426712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99256695488507, "step_time": 0.382109884262085} +{"epoch": 0, "iter": 11065, "iter_tflops": 43.13807968246997, "iter_time": 0.47825711441040036, "loss": 0.02223239839076996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45571774759644, "step_time": 0.43474410438537603} +{"epoch": 0, "iter": 11066, "iter_tflops": 16.297975078324203, "iter_time": 1.2658685150146485, "loss": 0.05069988593459129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.714103009885733, "step_time": 1.1646705169677736} +{"epoch": 0, "iter": 11067, "iter_tflops": 12.025094189835011, "iter_time": 1.7156700134277347, "loss": 0.023683657869696617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.016283428703149, "step_time": 1.4719375228881835} +{"epoch": 0, "iter": 11068, "iter_tflops": 24.271704353680704, "iter_time": 0.8500059661865232, "loss": 0.04065503180027008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.8446839919874, "step_time": 0.6912820224761963} +{"epoch": 0, "iter": 11069, "iter_tflops": 11.601049185093077, "iter_time": 1.330842010498047, "loss": 0.380530446767807, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.43656975253587, "step_time": 1.2414326400756837} +{"epoch": 0, "iter": 11070, "iter_tflops": 14.511135582052205, "iter_time": 1.0639528198242187, "loss": 0.40018653869628906, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.1677384282559, "step_time": 0.8054765396118164} +{"epoch": 0, "iter": 11071, "iter_tflops": 27.6887418566264, "iter_time": 0.5575971527099609, "loss": 0.31883031129837036, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.554894288403077, "step_time": 0.5223894042968751} +{"epoch": 0, "iter": 11072, "iter_tflops": 27.741374853500865, "iter_time": 0.5565392379760742, "loss": 0.41742822527885437, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.527873317739715, "step_time": 0.5228674430847168} +{"epoch": 0, "iter": 11073, "iter_tflops": 21.559911134700144, "iter_time": 0.9569192276000977, "loss": 0.6842749118804932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.736962026083415, "step_time": 0.9073812713623046} +{"epoch": 0, "iter": 11074, "iter_tflops": 16.455197066327344, "iter_time": 1.253773712158203, "loss": 0.49460840225219727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.860948223743353, "step_time": 0.9889815788269044} +{"epoch": 0, "iter": 11075, "iter_tflops": 37.78887263446974, "iter_time": 0.5459568405151367, "loss": 0.6324200630187988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.25452576226511, "step_time": 0.5000928535461425} +{"epoch": 0, "iter": 11076, "iter_tflops": 38.21447640183425, "iter_time": 0.5398763885498047, "loss": 0.7969635128974915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.82553910983285, "step_time": 0.493265453338623} +{"epoch": 0, "iter": 11077, "iter_tflops": 15.654879808386228, "iter_time": 1.3178698120117187, "loss": 0.01068058144301176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.94990562930119, "step_time": 1.217180435180664} +{"epoch": 0, "iter": 11078, "iter_tflops": 22.418441154552895, "iter_time": 0.9202733306884766, "loss": 0.011596456170082092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.670332954574285, "step_time": 0.8036940364837646} +{"epoch": 0, "iter": 11079, "iter_tflops": 58.85050614801743, "iter_time": 0.3505678176879883, "loss": 0.018488585948944092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.24006914997106, "step_time": 0.3162334709167481} +{"epoch": 0, "iter": 11080, "iter_tflops": 59.177756661570626, "iter_time": 0.34862919235229495, "loss": 0.0008333455771207809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.99988963554883, "step_time": 0.31740197753906246} +{"epoch": 0, "iter": 11081, "iter_tflops": 30.18338206716504, "iter_time": 0.6835249099731445, "loss": 0.39162951707839966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14209644044016, "step_time": 0.6418714332580566} +{"epoch": 0, "iter": 11082, "iter_tflops": 10.331342898496132, "iter_time": 1.996942092895508, "loss": 0.4156738817691803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.52322618129118, "step_time": 1.6474264068603515} +{"epoch": 0, "iter": 11083, "iter_tflops": 13.425080139458517, "iter_time": 1.5367575683593748, "loss": 0.412925660610199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.590744289251179, "step_time": 1.3232911224365234} +{"epoch": 0, "iter": 11084, "iter_tflops": 21.892057977758338, "iter_time": 0.9424008255004883, "loss": 0.4447198212146759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.466517597374636, "step_time": 0.7001537742614745} +{"epoch": 0, "iter": 11085, "iter_tflops": 21.709826521761073, "iter_time": 0.71116015625, "loss": 0.3200663924217224, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.05425629656284, "step_time": 0.6696882095336913} +{"epoch": 0, "iter": 11086, "iter_tflops": 12.440658836745326, "iter_time": 1.2410245971679688, "loss": 0.3276611268520355, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 15.034899612599306, "step_time": 1.0268883743286132} +{"epoch": 0, "iter": 11087, "iter_tflops": 21.45170443194171, "iter_time": 0.7197173385620117, "loss": 0.4195804297924042, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.209124728257343, "step_time": 0.6652195549011231} +{"epoch": 0, "iter": 11088, "iter_tflops": 23.160425672601416, "iter_time": 0.6666183013916016, "loss": 0.3849315941333771, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.956338773766188, "step_time": 0.6186469802856446} +{"epoch": 0, "iter": 11089, "iter_tflops": 17.852617312518248, "iter_time": 1.1556341094970703, "loss": 0.5346283912658691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.0368956863595, "step_time": 1.0837425308227537} +{"epoch": 0, "iter": 11090, "iter_tflops": 13.97829059302186, "iter_time": 1.475938232421875, "loss": 0.5422832369804382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.036500179174187, "step_time": 1.2109936485290527} +{"epoch": 0, "iter": 11091, "iter_tflops": 45.83452964314243, "iter_time": 0.4501212005615235, "loss": 0.5113758444786072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59331969065095, "step_time": 0.4160054950714111} +{"epoch": 0, "iter": 11092, "iter_tflops": 42.76488603453604, "iter_time": 0.4824306907653808, "loss": 0.4626656174659729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22228496706645, "step_time": 0.44634516716003414} +{"epoch": 0, "iter": 11093, "iter_tflops": 24.940974694029325, "iter_time": 0.827196762084961, "loss": 0.7942543029785156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.296108768575365, "step_time": 0.7845683059692383} +{"epoch": 0, "iter": 11094, "iter_tflops": 11.462889929504463, "iter_time": 1.7998160705566404, "loss": 0.664099395275116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.066497151194804, "step_time": 1.466683090209961} +{"epoch": 0, "iter": 11095, "iter_tflops": 38.87465692413753, "iter_time": 0.5307080535888672, "loss": 0.5815490484237671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.800826477222884, "step_time": 0.49355707168579105} +{"epoch": 0, "iter": 11096, "iter_tflops": 44.241006286428515, "iter_time": 0.4663341827392578, "loss": 0.685429036617279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64708802654004, "step_time": 0.43299799346923834} +{"epoch": 0, "iter": 11097, "iter_tflops": 29.11741422473415, "iter_time": 0.7085482711791993, "loss": 0.03155537322163582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.940270424767427, "step_time": 0.6668039169311524} +{"epoch": 0, "iter": 11098, "iter_tflops": 31.115781882856616, "iter_time": 0.6630427474975586, "loss": 0.037628062069416046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14083212776398, "step_time": 0.5270990009307861} +{"epoch": 0, "iter": 11099, "iter_tflops": 45.58058241168933, "iter_time": 0.4526290016174317, "loss": 0.03491517901420593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24015812833612, "step_time": 0.4106494541168213} +{"epoch": 0, "iter": 11100, "iter_tflops": 47.92735250425874, "iter_time": 0.4304659538269043, "loss": 0.05364052578806877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.892434427945815, "step_time": 0.39005755233764644} +{"epoch": 0, "iter": 11101, "iter_tflops": 29.8305776515808, "iter_time": 0.6916089172363282, "loss": 0.6331352591514587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.1766777748126, "step_time": 0.6218553180694579} +{"epoch": 0, "iter": 11102, "iter_tflops": 33.018156106488135, "iter_time": 0.6248408737182618, "loss": 0.6007387042045593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.13853480434281, "step_time": 0.5708890419006347} +{"epoch": 0, "iter": 11103, "iter_tflops": 34.27597753130599, "iter_time": 0.6019111633300781, "loss": 0.7613049149513245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.42695288284873, "step_time": 0.5512362594604492} +{"epoch": 0, "iter": 11104, "iter_tflops": 35.23940510828269, "iter_time": 0.5854552154541015, "loss": 0.7324104309082031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.524863525492876, "step_time": 0.5355267124176025} +{"epoch": 0, "iter": 11105, "iter_tflops": 19.90746229808709, "iter_time": 1.0363497467041016, "loss": 0.005332034546881914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.297794151836573, "step_time": 0.9686962585449219} +{"epoch": 0, "iter": 11106, "iter_tflops": 15.420388371861849, "iter_time": 1.3379101104736328, "loss": 0.007863030768930912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.95616364919615, "step_time": 1.088358060836792} +{"epoch": 0, "iter": 11107, "iter_tflops": 42.429710065127516, "iter_time": 0.4862416801452637, "loss": 0.007490402087569237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.16003901464769, "step_time": 0.4374698143005371} +{"epoch": 0, "iter": 11108, "iter_tflops": 44.60605124196513, "iter_time": 0.4625178184509277, "loss": 0.015253830701112747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59599906586714, "step_time": 0.41598302078247074} +{"epoch": 0, "iter": 11109, "iter_tflops": 20.093968033038696, "iter_time": 1.0267306823730469, "loss": 0.7133271098136902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.727163707179304, "step_time": 0.9495530014038086} +{"epoch": 0, "iter": 11110, "iter_tflops": 17.36761868662031, "iter_time": 1.187905715942383, "loss": 0.9190104603767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.884970134423355, "step_time": 0.987844051361084} +{"epoch": 0, "iter": 11111, "iter_tflops": 32.38156620541923, "iter_time": 0.6371246337890625, "loss": 0.7997145652770996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.24145635349386, "step_time": 0.5854211387634277} +{"epoch": 0, "iter": 11112, "iter_tflops": 36.85274153740691, "iter_time": 0.5598252029418945, "loss": 1.0653159618377686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.154796693992935, "step_time": 0.5137890167236328} +{"epoch": 0, "iter": 11113, "iter_tflops": 20.405311644898834, "iter_time": 1.0110648574829102, "loss": 0.0883520171046257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.714066856415347, "step_time": 0.9501257247924805} +{"epoch": 0, "iter": 11114, "iter_tflops": 19.00967545936003, "iter_time": 1.0852943572998046, "loss": 0.1205703541636467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.16230637192599, "step_time": 0.8907184448242187} +{"epoch": 0, "iter": 11115, "iter_tflops": 44.93127249737834, "iter_time": 0.4591700248718261, "loss": 0.08243387192487717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62477246515904, "step_time": 0.4242918262481689} +{"epoch": 0, "iter": 11116, "iter_tflops": 49.900715832323385, "iter_time": 0.4134428367614746, "loss": 0.09150456637144089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.539949667493644, "step_time": 0.3782748908996582} +{"epoch": 0, "iter": 11117, "iter_tflops": 21.741603221157497, "iter_time": 0.94892236328125, "loss": 0.7344366908073425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.72465945630994, "step_time": 0.9078725051879881} +{"epoch": 0, "iter": 11118, "iter_tflops": 12.338400680674708, "iter_time": 1.6721043548583987, "loss": 0.5973013043403625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.312709874515717, "step_time": 1.3473182525634766} +{"epoch": 0, "iter": 11119, "iter_tflops": 41.02607767389837, "iter_time": 0.5028775520324706, "loss": 0.9209067821502686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31298356556478, "step_time": 0.4655767192840576} +{"epoch": 0, "iter": 11120, "iter_tflops": 43.65544018808765, "iter_time": 0.4725892906188965, "loss": 0.7269246578216553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.930347915073874, "step_time": 0.4396109218597412} +{"epoch": 0, "iter": 11121, "iter_tflops": 38.14065148592696, "iter_time": 0.540921371459961, "loss": 0.5031589269638062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.368145234753015, "step_time": 0.49871932601928715} +{"epoch": 0, "iter": 11122, "iter_tflops": 34.87724875001737, "iter_time": 0.5915344314575195, "loss": 0.6864200830459595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.737388361667875, "step_time": 0.5325886535644531} +{"epoch": 0, "iter": 11123, "iter_tflops": 45.41181800349985, "iter_time": 0.4543111114501953, "loss": 0.5635690689086914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04135247462888, "step_time": 0.4206876945495605} +{"epoch": 0, "iter": 11124, "iter_tflops": 46.37955198492526, "iter_time": 0.44483166885375974, "loss": 0.696790337562561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.10143604145541, "step_time": 0.411786470413208} +{"epoch": 0, "iter": 11125, "iter_tflops": 32.68555258448795, "iter_time": 0.6311991653442384, "loss": 0.9662764668464661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.88873994532485, "step_time": 0.5913395996093751} +{"epoch": 0, "iter": 11126, "iter_tflops": 12.051590547073182, "iter_time": 1.711897979736328, "loss": 0.7264132499694824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.020099545992764, "step_time": 1.2878255500793458} +{"epoch": 0, "iter": 11127, "iter_tflops": 41.32107093348915, "iter_time": 0.499287483215332, "loss": 0.6162391901016235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45101234094307, "step_time": 0.464131015777588} +{"epoch": 0, "iter": 11128, "iter_tflops": 43.667776715240265, "iter_time": 0.4724557800292969, "loss": 0.7100663185119629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25258239363053, "step_time": 0.436613037109375} +{"epoch": 0, "iter": 11129, "iter_tflops": 28.956098380169433, "iter_time": 0.7124956283569337, "loss": 0.44258931279182434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.836231912276936, "step_time": 0.6690536499023436} +{"epoch": 0, "iter": 11130, "iter_tflops": 16.21348530602174, "iter_time": 1.272465057373047, "loss": 0.5164900422096252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.623244935765285, "step_time": 1.1078141098022463} +{"epoch": 0, "iter": 11131, "iter_tflops": 35.083378573985286, "iter_time": 0.5880589141845703, "loss": 0.32864439487457275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92934192832774, "step_time": 0.5299625549316406} +{"epoch": 0, "iter": 11132, "iter_tflops": 39.02637292772371, "iter_time": 0.5286449127197267, "loss": 0.35216253995895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6566914846122, "step_time": 0.4836543292999267} +{"epoch": 0, "iter": 11133, "iter_tflops": 17.58073807540102, "iter_time": 1.1735055389404299, "loss": 0.4434922933578491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.71845647418978, "step_time": 1.1021792068481446} +{"epoch": 0, "iter": 11134, "iter_tflops": 16.372491859411145, "iter_time": 1.2601071166992188, "loss": 0.46336567401885986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.722083999604447, "step_time": 1.1019656524658203} +{"epoch": 0, "iter": 11135, "iter_tflops": 34.83987345366834, "iter_time": 0.5921690139770508, "loss": 0.44181546568870544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.90408869531894, "step_time": 0.5442973098754884} +{"epoch": 0, "iter": 11136, "iter_tflops": 38.59156762880373, "iter_time": 0.5346010742187499, "loss": 0.4864357113838196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08806298394762, "step_time": 0.49018871498107913} +{"epoch": 0, "iter": 11137, "iter_tflops": 18.27326562802039, "iter_time": 1.1290315551757812, "loss": 0.6610965728759766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.806821058331455, "step_time": 1.0416155853271485} +{"epoch": 0, "iter": 11138, "iter_tflops": 27.035752299912108, "iter_time": 0.763104103088379, "loss": 0.7410920858383179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.95234560135659, "step_time": 0.6076485481262207} +{"epoch": 0, "iter": 11139, "iter_tflops": 43.97706877725554, "iter_time": 0.46913298416137694, "loss": 0.7127463221549988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.623593172693745, "step_time": 0.4332116107940674} +{"epoch": 0, "iter": 11140, "iter_tflops": 40.544871127189836, "iter_time": 0.5088459510803223, "loss": 0.5634393692016602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.555951239036034, "step_time": 0.47366876220703125} +{"epoch": 0, "iter": 11141, "iter_tflops": 33.47183251343208, "iter_time": 0.6163717956542968, "loss": 0.5048844218254089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.96753302050859, "step_time": 0.5736032409667968} +{"epoch": 0, "iter": 11142, "iter_tflops": 37.42427186365183, "iter_time": 0.551275749206543, "loss": 0.44898557662963867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10481524436137, "step_time": 0.5019142742156983} +{"epoch": 0, "iter": 11143, "iter_tflops": 36.90774547610607, "iter_time": 0.5589908905029297, "loss": 0.4103409945964813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69030234835807, "step_time": 0.5070272846221924} +{"epoch": 0, "iter": 11144, "iter_tflops": 38.47141615701806, "iter_time": 0.5362707061767579, "loss": 0.4464911222457886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.9171496795872, "step_time": 0.49218741416931155} +{"epoch": 0, "iter": 11145, "iter_tflops": 27.936431576191474, "iter_time": 0.7385013885498045, "loss": 0.20021983981132507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.895248381838876, "step_time": 0.6677756156921387} +{"epoch": 0, "iter": 11146, "iter_tflops": 44.59709550194669, "iter_time": 0.4626106986999512, "loss": 0.30437353253364563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46454809824522, "step_time": 0.4256945400238037} +{"epoch": 0, "iter": 11147, "iter_tflops": 45.19429743544174, "iter_time": 0.4564977149963379, "loss": 0.2841290235519409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25773753637831, "step_time": 0.4188396492004394} +{"epoch": 0, "iter": 11148, "iter_tflops": 44.67680526490532, "iter_time": 0.4617853355407715, "loss": 0.24830977618694305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32046972033749, "step_time": 0.4269638442993164} +{"epoch": 0, "iter": 11149, "iter_tflops": 25.672036129083075, "iter_time": 0.8036407165527344, "loss": 0.732193648815155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.319875055697047, "step_time": 0.7551679306030274} +{"epoch": 0, "iter": 11150, "iter_tflops": 24.70598621945141, "iter_time": 0.8350645599365236, "loss": 0.6481518745422363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.115191965944717, "step_time": 0.7086023521423339} +{"epoch": 0, "iter": 11151, "iter_tflops": 34.46445725025579, "iter_time": 0.5986194229125975, "loss": 0.9053546786308289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65750527271227, "step_time": 0.5478613986968994} +{"epoch": 0, "iter": 11152, "iter_tflops": 38.821991237678624, "iter_time": 0.5314280090332031, "loss": 0.6389710903167725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22020875608852, "step_time": 0.48865446472167967} +{"epoch": 0, "iter": 11153, "iter_tflops": 25.080751883138078, "iter_time": 0.8225867233276368, "loss": 0.12137407809495926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.96918298973525, "step_time": 0.76498770904541} +{"epoch": 0, "iter": 11154, "iter_tflops": 7.088583016380584, "iter_time": 2.910467926025391, "loss": 0.09966391324996948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.401710424979916, "step_time": 2.4555825500488284} +{"epoch": 0, "iter": 11155, "iter_tflops": 14.396206918773016, "iter_time": 1.4330923156738282, "loss": 0.16929450631141663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.171176275239045, "step_time": 1.135374683380127} +{"epoch": 0, "iter": 11156, "iter_tflops": 46.124476132416646, "iter_time": 0.44729166030883794, "loss": 0.1325445920228958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.13412340918612, "step_time": 0.4115179862976074} +{"epoch": 0, "iter": 11157, "iter_tflops": 22.939051535081664, "iter_time": 0.6944666213989258, "loss": 0.3398986756801605, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 24.499388675368575, "step_time": 0.6502368621826171} +{"epoch": 0, "iter": 11158, "iter_tflops": 22.122947898321737, "iter_time": 0.7200851211547851, "loss": 0.2608153820037842, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.861687834414912, "step_time": 0.5930530395507813} +{"epoch": 0, "iter": 11159, "iter_tflops": 26.871158272782594, "iter_time": 0.5928440246582032, "loss": 0.2024586796760559, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.571423441761524, "step_time": 0.5575642967224121} +{"epoch": 0, "iter": 11160, "iter_tflops": 27.916618503419382, "iter_time": 0.5706423797607422, "loss": 0.31252533197402954, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.718833858017465, "step_time": 0.5360373725891113} +{"epoch": 0, "iter": 11161, "iter_tflops": 18.059683785934126, "iter_time": 1.0186672134399413, "loss": 0.10259920358657837, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 18.97084083695125, "step_time": 0.9697412948608398} +{"epoch": 0, "iter": 11162, "iter_tflops": 12.849351572448397, "iter_time": 1.431730438232422, "loss": 0.08667036145925522, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 18.292557159626277, "step_time": 1.0056990718841552} +{"epoch": 0, "iter": 11163, "iter_tflops": 33.19624178228623, "iter_time": 0.5541834487915039, "loss": 0.10449770838022232, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 36.391646249527774, "step_time": 0.5055228233337403} +{"epoch": 0, "iter": 11164, "iter_tflops": 38.024974320571225, "iter_time": 0.48380855178833004, "loss": 0.08332355320453644, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 41.72262159726791, "step_time": 0.44093125152587886} +{"epoch": 0, "iter": 11165, "iter_tflops": 19.151267577438144, "iter_time": 1.0404649810791016, "loss": 0.08703126758337021, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 20.41057355544254, "step_time": 0.9762696380615234} +{"epoch": 0, "iter": 11166, "iter_tflops": 15.79126366317147, "iter_time": 1.2618510894775388, "loss": 0.09660953283309937, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 19.162057875881764, "step_time": 1.03987908744812} +{"epoch": 0, "iter": 11167, "iter_tflops": 41.38896347082648, "iter_time": 0.4814380836486816, "loss": 0.13076630234718323, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 45.44047222342428, "step_time": 0.4385126800537109} +{"epoch": 0, "iter": 11168, "iter_tflops": 39.0505390831408, "iter_time": 0.5102675590515137, "loss": 0.05802566185593605, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 42.81438136215881, "step_time": 0.4654095802307129} +{"epoch": 0, "iter": 11169, "iter_tflops": 19.968226453703817, "iter_time": 1.0331960906982423, "loss": 0.403939813375473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.581831888334072, "step_time": 0.9559472808837891} +{"epoch": 0, "iter": 11170, "iter_tflops": 16.933077021176622, "iter_time": 1.218390106201172, "loss": 0.4749448001384735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.195005522266456, "step_time": 0.929537660598755} +{"epoch": 0, "iter": 11171, "iter_tflops": 35.50480236062613, "iter_time": 0.581078956604004, "loss": 0.5060909390449524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.832733534252384, "step_time": 0.5312810001373292} +{"epoch": 0, "iter": 11172, "iter_tflops": 35.43314396007919, "iter_time": 0.5822541046142579, "loss": 0.3845296800136566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.9878740418331, "step_time": 0.5291669273376465} +{"epoch": 0, "iter": 11173, "iter_tflops": 18.531167488799138, "iter_time": 1.113318603515625, "loss": 0.719768226146698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.094012827205525, "step_time": 1.0267283935546874} +{"epoch": 0, "iter": 11174, "iter_tflops": 15.124132357305337, "iter_time": 1.3641174926757813, "loss": 0.6160856485366821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.372849532128154, "step_time": 1.0649488334655761} +{"epoch": 0, "iter": 11175, "iter_tflops": 31.99395142234992, "iter_time": 0.6448435592651368, "loss": 0.6620904803276062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86317058269771, "step_time": 0.5917733001708985} +{"epoch": 0, "iter": 11176, "iter_tflops": 36.24097494619181, "iter_time": 0.5692753448486328, "loss": 0.5172016620635986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66689724721794, "step_time": 0.5201085777282715} +{"epoch": 0, "iter": 11177, "iter_tflops": 19.8828244547683, "iter_time": 1.0376339416503906, "loss": 0.6991353631019592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.448539585807914, "step_time": 0.9618880310058595} +{"epoch": 0, "iter": 11178, "iter_tflops": 20.665118146617605, "iter_time": 0.9983535232543945, "loss": 0.5645349025726318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.312345287770043, "step_time": 0.8150605278015137} +{"epoch": 0, "iter": 11179, "iter_tflops": 33.0798257867285, "iter_time": 0.6236760025024414, "loss": 0.5512926578521729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.844401882826105, "step_time": 0.5755736579895019} +{"epoch": 0, "iter": 11180, "iter_tflops": 35.680817721537636, "iter_time": 0.5782124633789062, "loss": 0.7251498103141785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.792087278273584, "step_time": 0.5318376750946046} +{"epoch": 0, "iter": 11181, "iter_tflops": 12.77330318898069, "iter_time": 1.6151729278564453, "loss": 0.11986135691404343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.586605601681965, "step_time": 1.5184877014160156} +{"epoch": 0, "iter": 11182, "iter_tflops": 19.684462523172712, "iter_time": 1.0480902633666993, "loss": 0.12989236414432526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.6704733827045, "step_time": 0.7735555801391601} +{"epoch": 0, "iter": 11183, "iter_tflops": 42.2728157054366, "iter_time": 0.48804635238647465, "loss": 0.1105017438530922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77032338836531, "step_time": 0.4411150493621826} +{"epoch": 0, "iter": 11184, "iter_tflops": 40.80916118185413, "iter_time": 0.5055505409240723, "loss": 0.1566445231437683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.995755414791525, "step_time": 0.4585119934082031} +{"epoch": 0, "iter": 11185, "iter_tflops": 24.872526021597864, "iter_time": 0.8294731903076173, "loss": 0.513505756855011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.583853207913297, "step_time": 0.7760761146545411} +{"epoch": 0, "iter": 11186, "iter_tflops": 34.65570791076222, "iter_time": 0.5953158874511719, "loss": 0.5542334318161011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.756235175840104, "step_time": 0.5323296604156494} +{"epoch": 0, "iter": 11187, "iter_tflops": 41.460243888322495, "iter_time": 0.49761148452758786, "loss": 0.5108433365821838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49393811528874, "step_time": 0.4636832427978515} +{"epoch": 0, "iter": 11188, "iter_tflops": 46.90277658049713, "iter_time": 0.43986934280395507, "loss": 0.5823376178741455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62165630725991, "step_time": 0.40755469131469724} +{"epoch": 0, "iter": 11189, "iter_tflops": 29.21913724496989, "iter_time": 0.7060815429687499, "loss": 0.06163714453577995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.048587850504905, "step_time": 0.6644776763916015} +{"epoch": 0, "iter": 11190, "iter_tflops": 48.69567559716651, "iter_time": 0.42367403793334957, "loss": 0.09423162788152695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.301043847994336, "step_time": 0.38706734466552734} +{"epoch": 0, "iter": 11191, "iter_tflops": 55.42220358357908, "iter_time": 0.3722532157897949, "loss": 0.10113735496997833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.75485143615249, "step_time": 0.33957935905456543} +{"epoch": 0, "iter": 11192, "iter_tflops": 47.99565538013648, "iter_time": 0.42985335540771485, "loss": 0.09649833291769028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.101606902333266, "step_time": 0.3959780654907226} +{"epoch": 0, "iter": 11193, "iter_tflops": 27.742943193302537, "iter_time": 0.7436519393920898, "loss": 0.19709137082099915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.391975585066476, "step_time": 0.7019294586181641} +{"epoch": 0, "iter": 11194, "iter_tflops": 16.09636049646831, "iter_time": 1.2817241210937502, "loss": 0.14491549134254456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.096254390905585, "step_time": 1.0803738307952881} +{"epoch": 0, "iter": 11195, "iter_tflops": 51.74486324474651, "iter_time": 0.39870804977416996, "loss": 0.1677234172821045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.31627455887375, "step_time": 0.36634336471557616} +{"epoch": 0, "iter": 11196, "iter_tflops": 48.04591912133569, "iter_time": 0.4294036598205566, "loss": 0.14094771444797516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.31327192065825, "step_time": 0.39437589645385746} +{"epoch": 0, "iter": 11197, "iter_tflops": 26.566588242799405, "iter_time": 0.7765804672241212, "loss": 0.6075717806816101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.98314615816377, "step_time": 0.7372685470581055} +{"epoch": 0, "iter": 11198, "iter_tflops": 14.858076163927864, "iter_time": 1.3885440673828122, "loss": 0.6128141283988953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.03492903356989, "step_time": 1.1439520206451417} +{"epoch": 0, "iter": 11199, "iter_tflops": 40.523269902326746, "iter_time": 0.5091171951293946, "loss": 0.7065024971961975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49048800258852, "step_time": 0.4637192001342773} +{"epoch": 0, "iter": 11200, "iter_tflops": 37.100418072226894, "iter_time": 0.5560878982543945, "loss": 0.6236433982849121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.35047597056927, "step_time": 0.5112974014282227} +{"epoch": 0, "iter": 11201, "iter_tflops": 19.348726654895895, "iter_time": 1.0662765502929688, "loss": 0.3881818652153015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.778520467302062, "step_time": 0.9929048385620116} +{"epoch": 0, "iter": 11202, "iter_tflops": 25.775477884599468, "iter_time": 0.8004155578613281, "loss": 0.38006722927093506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.454195890749716, "step_time": 0.5819083747863769} +{"epoch": 0, "iter": 11203, "iter_tflops": 44.71589825626078, "iter_time": 0.46138161849975584, "loss": 0.27829158306121826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28852860838753, "step_time": 0.427246265411377} +{"epoch": 0, "iter": 11204, "iter_tflops": 50.802048772263575, "iter_time": 0.4061075096130371, "loss": 0.3889828622341156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.97045770087988, "step_time": 0.37531238365173336} +{"epoch": 0, "iter": 11205, "iter_tflops": 34.41700866852475, "iter_time": 0.5994447021484375, "loss": 0.0531172938644886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.96019937984856, "step_time": 0.5581975708007813} +{"epoch": 0, "iter": 11206, "iter_tflops": 21.01988439432329, "iter_time": 0.9815036621093749, "loss": 0.05575660988688469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.348307221594506, "step_time": 0.8473317394256591} +{"epoch": 0, "iter": 11207, "iter_tflops": 51.0714842587733, "iter_time": 0.4039650268554688, "loss": 0.06329981982707977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.736705988606474, "step_time": 0.3701527233123779} +{"epoch": 0, "iter": 11208, "iter_tflops": 53.981492612302596, "iter_time": 0.38218827438354497, "loss": 0.061962731182575226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.8091330413386, "step_time": 0.3508144474029541} +{"epoch": 0, "iter": 11209, "iter_tflops": 2.0431620832428146, "iter_time": 0.9594224090576172, "loss": 0.07854827493429184, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 2.133413261119281, "step_time": 0.9188353347778321} +{"epoch": 0, "iter": 11210, "iter_tflops": 1.2153245286525753, "iter_time": 1.6129481811523438, "loss": 0.19104857742786407, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 1.5117415538383783, "step_time": 1.2966869125366212} +{"epoch": 0, "iter": 11211, "iter_tflops": 4.313600310119957, "iter_time": 0.45443605041503904, "loss": 0.09633812308311462, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 4.691184612298221, "step_time": 0.41785937881469726} +{"epoch": 0, "iter": 11212, "iter_tflops": 4.438371494656704, "iter_time": 0.44166097640991214, "loss": 0.05130660906434059, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 4.802014483489832, "step_time": 0.408215238571167} +{"epoch": 0, "iter": 11213, "iter_tflops": 29.96709086814804, "iter_time": 0.6884583358764649, "loss": 0.7047653794288635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.800218298058297, "step_time": 0.6487720718383789} +{"epoch": 0, "iter": 11214, "iter_tflops": 8.143107635380058, "iter_time": 2.533565124511719, "loss": 0.859860360622406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.636912711219896, "step_time": 1.9395753326416014} +{"epoch": 0, "iter": 11215, "iter_tflops": 12.351741048674521, "iter_time": 1.6702984161376955, "loss": 0.6717679500579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.0040367610576, "step_time": 1.4732247467041015} +{"epoch": 0, "iter": 11216, "iter_tflops": 38.34310354265935, "iter_time": 0.538065299987793, "loss": 0.8415614366531372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94938951720529, "step_time": 0.4130399532318115} +{"epoch": 0, "iter": 11217, "iter_tflops": 21.674439465741983, "iter_time": 0.7482212982177735, "loss": 0.24283142387866974, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 22.97203385576444, "step_time": 0.7059573974609376} +{"epoch": 0, "iter": 11218, "iter_tflops": 7.74951037184864, "iter_time": 2.0926841125488282, "loss": 0.5855781435966492, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 8.869169236827224, "step_time": 1.828500144958496} +{"epoch": 0, "iter": 11219, "iter_tflops": 6.612278796030246, "iter_time": 2.452600341796875, "loss": 0.36086028814315796, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 7.7034477397727725, "step_time": 2.105197280883789} +{"epoch": 0, "iter": 11220, "iter_tflops": 24.06272420746833, "iter_time": 0.6739584884643555, "loss": 0.3853036165237427, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.95357991913905, "step_time": 0.6248570442199707} +{"epoch": 0, "iter": 11221, "iter_tflops": 13.900596856990177, "iter_time": 1.181411651611328, "loss": 0.43354639410972595, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 14.987902220395858, "step_time": 1.0957055130004882} +{"epoch": 0, "iter": 11222, "iter_tflops": 16.382891384778052, "iter_time": 1.0024071273803712, "loss": 0.47213810682296753, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 19.399964207729866, "step_time": 0.8465132675170899} +{"epoch": 0, "iter": 11223, "iter_tflops": 29.933389935146998, "iter_time": 0.5486290435791015, "loss": 0.32344645261764526, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 31.810483171794132, "step_time": 0.5162551918029785} +{"epoch": 0, "iter": 11224, "iter_tflops": 31.478562571534212, "iter_time": 0.5216987609863281, "loss": 0.4963797330856323, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 33.39819838696891, "step_time": 0.49171296310424806} +{"epoch": 0, "iter": 11225, "iter_tflops": 31.334596066953274, "iter_time": 0.6584126205444336, "loss": 0.09191439300775528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.24313206722045, "step_time": 0.6206122055053711} +{"epoch": 0, "iter": 11226, "iter_tflops": 13.115936104663042, "iter_time": 1.5729791107177733, "loss": 0.08486758917570114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.2615664798822, "step_time": 1.3518332824707031} +{"epoch": 0, "iter": 11227, "iter_tflops": 38.09406527406368, "iter_time": 0.5415828781127929, "loss": 0.10911347717046738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84127085373531, "step_time": 0.49307999229431154} +{"epoch": 0, "iter": 11228, "iter_tflops": 42.465680448043095, "iter_time": 0.48582981109619144, "loss": 0.10120035707950592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50068090415552, "step_time": 0.44367293357849125} +{"epoch": 0, "iter": 11229, "iter_tflops": 35.56701074189056, "iter_time": 0.5800626220703125, "loss": 0.605390191078186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2733887789521, "step_time": 0.5253199214935302} +{"epoch": 0, "iter": 11230, "iter_tflops": 37.78101651043343, "iter_time": 0.5460703659057617, "loss": 0.5829380750656128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21021191735075, "step_time": 0.48877019500732427} +{"epoch": 0, "iter": 11231, "iter_tflops": 39.24307181385195, "iter_time": 0.5257257537841796, "loss": 0.6041392683982849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.97605014174145, "step_time": 0.4800602531433106} +{"epoch": 0, "iter": 11232, "iter_tflops": 38.23452012825565, "iter_time": 0.5395933685302734, "loss": 0.7003893852233887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80878152838718, "step_time": 0.4934631614685059} +{"epoch": 0, "iter": 11233, "iter_tflops": 23.51887049043835, "iter_time": 0.877214469909668, "loss": 0.7566754817962646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.183588211119787, "step_time": 0.8192277183532715} +{"epoch": 0, "iter": 11234, "iter_tflops": 17.059548148623374, "iter_time": 1.2093575592041013, "loss": 0.6765511631965637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.4246644193749, "step_time": 1.0101068534851074} +{"epoch": 0, "iter": 11235, "iter_tflops": 45.29155183687576, "iter_time": 0.4555174789428711, "loss": 0.6534662246704102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84876307091253, "step_time": 0.42234628295898435} +{"epoch": 0, "iter": 11236, "iter_tflops": 46.72885522745228, "iter_time": 0.4415065040588379, "loss": 0.6998836398124695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.56473986553001, "step_time": 0.4080134410858154} +{"epoch": 0, "iter": 11237, "iter_tflops": 26.26777730519315, "iter_time": 0.7854145126342773, "loss": 0.08193309605121613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.679341613449964, "step_time": 0.7453607025146485} +{"epoch": 0, "iter": 11238, "iter_tflops": 14.653997471627108, "iter_time": 1.407881607055664, "loss": 0.0443839468061924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.4457704971998, "step_time": 1.182584255218506} +{"epoch": 0, "iter": 11239, "iter_tflops": 47.94563526151972, "iter_time": 0.4303018074035645, "loss": 0.10071911662817001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99887430669554, "step_time": 0.3892741832733154} +{"epoch": 0, "iter": 11240, "iter_tflops": 51.74425925845883, "iter_time": 0.39871270370483397, "loss": 0.07074637711048126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.472123911689806, "step_time": 0.36533234596252445} +{"epoch": 0, "iter": 11241, "iter_tflops": 27.153549932659793, "iter_time": 0.759793601989746, "loss": 0.045269761234521866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.554457737754795, "step_time": 0.7225174331665039} +{"epoch": 0, "iter": 11242, "iter_tflops": 14.133555286965446, "iter_time": 1.4597242584228516, "loss": 0.09563646465539932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.08761806896423, "step_time": 1.2073709411621094} +{"epoch": 0, "iter": 11243, "iter_tflops": 47.42576286455695, "iter_time": 0.4350186958312988, "loss": 0.06452896445989609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.709089506883245, "step_time": 0.39898388671875} +{"epoch": 0, "iter": 11244, "iter_tflops": 53.37882786048243, "iter_time": 0.38650330734252936, "loss": 0.05750039592385292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.14339257691123, "step_time": 0.3548312644958496} +{"epoch": 0, "iter": 11245, "iter_tflops": 32.65752631510892, "iter_time": 0.6317408523559571, "loss": 0.6836186051368713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86090247532362, "step_time": 0.5918118019104004} +{"epoch": 0, "iter": 11246, "iter_tflops": 14.835952244037594, "iter_time": 1.390614715576172, "loss": 0.4312249720096588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.25789251716194, "step_time": 1.19545845413208} +{"epoch": 0, "iter": 11247, "iter_tflops": 37.801602475278074, "iter_time": 0.5457729873657227, "loss": 0.5621160864830017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60762455230003, "step_time": 0.4958488674163818} +{"epoch": 0, "iter": 11248, "iter_tflops": 37.06189051397228, "iter_time": 0.5566659774780274, "loss": 0.6547399759292603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.62600868165612, "step_time": 0.5078296928405762} +{"epoch": 0, "iter": 11249, "iter_tflops": 31.54440606216638, "iter_time": 0.6540333480834961, "loss": 0.6225735545158386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.85944913612385, "step_time": 0.5918364753723144} +{"epoch": 0, "iter": 11250, "iter_tflops": 37.73833961498764, "iter_time": 0.5466878967285156, "loss": 0.5363169312477112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.25522831655849, "step_time": 0.5000843372344971} +{"epoch": 0, "iter": 11251, "iter_tflops": 35.81941025188053, "iter_time": 0.5759752426147461, "loss": 0.355611115694046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.111600011256876, "step_time": 0.527492956161499} +{"epoch": 0, "iter": 11252, "iter_tflops": 34.492988985119, "iter_time": 0.5981242599487304, "loss": 0.3187834918498993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.36621050776492, "step_time": 0.5521323471069336} +{"epoch": 0, "iter": 11253, "iter_tflops": 19.833930858233323, "iter_time": 1.040191864013672, "loss": 0.6445445418357849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.405784076248654, "step_time": 0.9638092880249024} +{"epoch": 0, "iter": 11254, "iter_tflops": 17.978877657842858, "iter_time": 1.1475184326171877, "loss": 0.6328709125518799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.555246242332352, "step_time": 0.9571263198852538} +{"epoch": 0, "iter": 11255, "iter_tflops": 45.503207692168665, "iter_time": 0.4533986625671387, "loss": 0.7597125768661499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.06869780818136, "step_time": 0.4204532508850098} +{"epoch": 0, "iter": 11256, "iter_tflops": 47.0234844447668, "iter_time": 0.4387402114868164, "loss": 0.5723993182182312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.27179290741849, "step_time": 0.4103910427093506} +{"epoch": 0, "iter": 11257, "iter_tflops": 27.675257005424058, "iter_time": 0.7454707107543945, "loss": 0.1175721287727356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.273531327615416, "step_time": 0.7047695503234864} +{"epoch": 0, "iter": 11258, "iter_tflops": 8.981550866577777, "iter_time": 2.2970524597167965, "loss": 0.13227687776088715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.815206631147227, "step_time": 1.7461474990844728} +{"epoch": 0, "iter": 11259, "iter_tflops": 12.19765732190111, "iter_time": 1.6913980255126955, "loss": 0.14869621396064758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.114356914779274, "step_time": 1.4617097778320314} +{"epoch": 0, "iter": 11260, "iter_tflops": 33.423438102925736, "iter_time": 0.6172642517089844, "loss": 0.11552704125642776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4723909994264, "step_time": 0.46390789985656733} +{"epoch": 0, "iter": 11261, "iter_tflops": 20.537387460320865, "iter_time": 0.8256166381835938, "loss": 0.3727251887321472, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 21.79651845394743, "step_time": 0.7779228057861328} +{"epoch": 0, "iter": 11262, "iter_tflops": 11.082564157558023, "iter_time": 1.5299716339111329, "loss": 0.3407166004180908, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 13.815668680521032, "step_time": 1.2273027954101563} +{"epoch": 0, "iter": 11263, "iter_tflops": 26.22727541396585, "iter_time": 0.6465028686523437, "loss": 0.3199651539325714, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 28.244167102506616, "step_time": 0.600336654663086} +{"epoch": 0, "iter": 11264, "iter_tflops": 24.633929936757422, "iter_time": 0.6883192749023438, "loss": 0.27988484501838684, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 26.403860670179878, "step_time": 0.6421791496276856} +{"epoch": 0, "iter": 11265, "iter_tflops": 22.767029320020526, "iter_time": 0.9061829376220704, "loss": 0.4952954053878784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.468674226146856, "step_time": 0.8431635208129884} +{"epoch": 0, "iter": 11266, "iter_tflops": 17.112691195014374, "iter_time": 1.2056019287109376, "loss": 0.2927602231502533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.36172797586563, "step_time": 1.0132290115356444} +{"epoch": 0, "iter": 11267, "iter_tflops": 37.13827041954328, "iter_time": 0.5555211181640625, "loss": 0.23428796231746674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.551487489233295, "step_time": 0.5087629280090332} +{"epoch": 0, "iter": 11268, "iter_tflops": 39.38350902670645, "iter_time": 0.52385107421875, "loss": 0.397939532995224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24919695399092, "step_time": 0.4770283603668213} +{"epoch": 0, "iter": 11269, "iter_tflops": 1.6696717267473034, "iter_time": 0.8681073150634765, "loss": 1.3230836391448975, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.80165901777689, "step_time": 0.804510856628418} +{"epoch": 0, "iter": 11270, "iter_tflops": 0.950658315756539, "iter_time": 1.5246847534179686, "loss": 1.4412413835525513, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.3599051126652044, "step_time": 1.0658495407104494} +{"epoch": 0, "iter": 11271, "iter_tflops": 3.3509610860782133, "iter_time": 0.43254881286621094, "loss": 1.9372680187225342, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.646339905250536, "step_time": 0.3975093593597412} +{"epoch": 0, "iter": 11272, "iter_tflops": 3.542713587633084, "iter_time": 0.40913672637939447, "loss": 1.3551207780838013, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.8531390658599047, "step_time": 0.3761749095916748} +{"epoch": 0, "iter": 11273, "iter_tflops": 25.62127431728853, "iter_time": 0.8036131439208986, "loss": 0.11882318556308746, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 27.033866133095884, "step_time": 0.7616222076416015} +{"epoch": 0, "iter": 11274, "iter_tflops": 13.170970125229402, "iter_time": 1.5632555999755862, "loss": 0.10576730966567993, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 17.699607875078346, "step_time": 1.1632796020507812} +{"epoch": 0, "iter": 11275, "iter_tflops": 51.457018867505035, "iter_time": 0.40013186264038086, "loss": 0.1303820163011551, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 56.29330668491362, "step_time": 0.3657556114196777} +{"epoch": 0, "iter": 11276, "iter_tflops": 48.96636438452512, "iter_time": 0.4204844093322754, "loss": 0.08155848830938339, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 52.934479029737794, "step_time": 0.38896373748779295} +{"epoch": 0, "iter": 11277, "iter_tflops": 29.78102942309333, "iter_time": 0.6927595825195312, "loss": 0.5410369038581848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.50022257447226, "step_time": 0.6549507217407226} +{"epoch": 0, "iter": 11278, "iter_tflops": 9.757361042464334, "iter_time": 2.1144132537841798, "loss": 0.6444550156593323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.272494626873886, "step_time": 1.6810839309692382} +{"epoch": 0, "iter": 11279, "iter_tflops": 13.785578423972806, "iter_time": 1.496570755004883, "loss": 0.491847425699234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.131965626323396, "step_time": 1.278895206451416} +{"epoch": 0, "iter": 11280, "iter_tflops": 26.665392330577934, "iter_time": 0.7737029800415038, "loss": 0.4775928556919098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.41106750663292, "step_time": 0.6365447082519531} +{"epoch": 0, "iter": 11281, "iter_tflops": 15.47329970972446, "iter_time": 0.9107140655517577, "loss": 0.25117355585098267, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 16.33144693133455, "step_time": 0.8628599624633789} +{"epoch": 0, "iter": 11282, "iter_tflops": 12.378476213472847, "iter_time": 1.1384076232910156, "loss": 0.34516409039497375, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 14.610797502941846, "step_time": 0.9644751892089845} +{"epoch": 0, "iter": 11283, "iter_tflops": 24.626022706137515, "iter_time": 0.572230110168457, "loss": 0.2233777940273285, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 26.264283608791676, "step_time": 0.5365366859436036} +{"epoch": 0, "iter": 11284, "iter_tflops": 24.332347612674713, "iter_time": 0.5791365432739258, "loss": 0.3262307941913605, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 25.795646081219125, "step_time": 0.5462841148376465} +{"epoch": 0, "iter": 11285, "iter_tflops": 29.0694520966509, "iter_time": 0.709717315673828, "loss": 0.6396591067314148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.94579167122405, "step_time": 0.6666849479675293} +{"epoch": 0, "iter": 11286, "iter_tflops": 21.690274847714335, "iter_time": 0.9511679153442383, "loss": 0.6886385083198547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.713538504165786, "step_time": 0.8348093700408936} +{"epoch": 0, "iter": 11287, "iter_tflops": 44.56311680409306, "iter_time": 0.4629634323120117, "loss": 0.5222111940383911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15310450545391, "step_time": 0.42844783782958984} +{"epoch": 0, "iter": 11288, "iter_tflops": 46.22573411780607, "iter_time": 0.44631186294555675, "loss": 0.5767805576324463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.81245602980769, "step_time": 0.41417539215087884} +{"epoch": 0, "iter": 11289, "iter_tflops": 32.40298823529716, "iter_time": 0.6367034225463868, "loss": 0.6369626522064209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.544245967495776, "step_time": 0.597236759185791} +{"epoch": 0, "iter": 11290, "iter_tflops": 10.229467803302644, "iter_time": 2.016829605102539, "loss": 0.562501072883606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.53693874545819, "step_time": 1.645624496459961} +{"epoch": 0, "iter": 11291, "iter_tflops": 17.16933586062637, "iter_time": 1.2016244354248045, "loss": 0.5024962425231934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.31569945248155, "step_time": 1.015524646759033} +{"epoch": 0, "iter": 11292, "iter_tflops": 38.37975912042154, "iter_time": 0.5375514068603516, "loss": 0.7278652787208557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10038381397037, "step_time": 0.49004525947570804} +{"epoch": 0, "iter": 11293, "iter_tflops": 12.856574549721163, "iter_time": 1.226343978881836, "loss": 0.35914576053619385, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 13.885046206832401, "step_time": 1.1355081253051758} +{"epoch": 0, "iter": 11294, "iter_tflops": 12.851883921168824, "iter_time": 1.2267915649414065, "loss": 0.34033268690109253, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 15.640968941236956, "step_time": 1.008031078338623} +{"epoch": 0, "iter": 11295, "iter_tflops": 28.813316604572115, "iter_time": 0.5471977767944336, "loss": 0.24589498341083527, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.678052316659457, "step_time": 0.5139368896484374} +{"epoch": 0, "iter": 11296, "iter_tflops": 28.192400528156842, "iter_time": 0.5592493896484375, "loss": 0.2762768268585205, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.05030875198281, "step_time": 0.5246729049682617} +{"epoch": 0, "iter": 11297, "iter_tflops": 29.682834350279524, "iter_time": 0.6950513305664061, "loss": 0.21102574467658997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.442935877443087, "step_time": 0.6561439933776856} +{"epoch": 0, "iter": 11298, "iter_tflops": 13.640090905475446, "iter_time": 1.512533432006836, "loss": 0.22259491682052612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.7638771373117, "step_time": 1.308757568359375} +{"epoch": 0, "iter": 11299, "iter_tflops": 49.02724279038088, "iter_time": 0.4208087654113769, "loss": 0.22544994950294495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.46632872673689, "step_time": 0.38587077140808107} +{"epoch": 0, "iter": 11300, "iter_tflops": 48.001225531468236, "iter_time": 0.4298034744262696, "loss": 0.2372184544801712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20459557580424, "step_time": 0.39519688415527343} +{"epoch": 0, "iter": 11301, "iter_tflops": 32.77843636811068, "iter_time": 0.6294105453491211, "loss": 0.7425243258476257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.162404968161404, "step_time": 0.5867372703552246} +{"epoch": 0, "iter": 11302, "iter_tflops": 10.210736048620534, "iter_time": 2.0205295104980467, "loss": 0.8479712009429932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.163061370489022, "step_time": 1.6962089462280272} +{"epoch": 0, "iter": 11303, "iter_tflops": 11.533247653503958, "iter_time": 1.78883642578125, "loss": 0.7939987182617188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.45944716694365, "step_time": 1.5328336486816407} +{"epoch": 0, "iter": 11304, "iter_tflops": 33.285931014790755, "iter_time": 0.6198142242431641, "loss": 0.7173967957496643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.447757851939286, "step_time": 0.49776138877868653} +{"epoch": 0, "iter": 11305, "iter_tflops": 14.755875643184918, "iter_time": 1.0851504669189453, "loss": 0.33893489837646484, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 15.804601506759688, "step_time": 1.013144515991211} +{"epoch": 0, "iter": 11306, "iter_tflops": 10.972384762618157, "iter_time": 1.4593313751220705, "loss": 0.4224012792110443, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 14.79092250328826, "step_time": 1.0825792198181152} +{"epoch": 0, "iter": 11307, "iter_tflops": 27.318804796587713, "iter_time": 0.5861290588378906, "loss": 0.43269261717796326, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.992665342911476, "step_time": 0.5522895240783692} +{"epoch": 0, "iter": 11308, "iter_tflops": 28.137585594133668, "iter_time": 0.5690731811523437, "loss": 0.2975631654262543, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.69775326575959, "step_time": 0.5391769943237305} +{"epoch": 0, "iter": 11309, "iter_tflops": 25.778892642099507, "iter_time": 0.8003095321655272, "loss": 0.038084059953689575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.229798983894643, "step_time": 0.7576660232543946} +{"epoch": 0, "iter": 11310, "iter_tflops": 15.809212711638358, "iter_time": 1.3050044860839844, "loss": 0.04702483117580414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.360794232802824, "step_time": 1.1236492958068849} +{"epoch": 0, "iter": 11311, "iter_tflops": 52.06801907325222, "iter_time": 0.3962335014343262, "loss": 0.023478427901864052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.1493905939778, "step_time": 0.3610028610229492} +{"epoch": 0, "iter": 11312, "iter_tflops": 55.03548972369919, "iter_time": 0.37486890029907227, "loss": 0.04431390389800072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.049406915312595, "step_time": 0.34356864738464354} +{"epoch": 0, "iter": 11313, "iter_tflops": 23.621886323544327, "iter_time": 0.8733889083862305, "loss": 0.5768213272094727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.868640664977818, "step_time": 0.829602783203125} +{"epoch": 0, "iter": 11314, "iter_tflops": 21.059327197264025, "iter_time": 0.9796653671264649, "loss": 0.8311367630958557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.874022257953566, "step_time": 0.8641649608612062} +{"epoch": 0, "iter": 11315, "iter_tflops": 36.99695306873421, "iter_time": 0.5576430435180664, "loss": 0.715400755405426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09300019870884, "step_time": 0.5145809345245361} +{"epoch": 0, "iter": 11316, "iter_tflops": 38.575290540719735, "iter_time": 0.5348266525268555, "loss": 0.6174653172492981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8875887060106, "step_time": 0.49253476142883296} +{"epoch": 0, "iter": 11317, "iter_tflops": 19.56051949923665, "iter_time": 1.0547313690185547, "loss": 0.14149868488311768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.875558222912403, "step_time": 0.9882894287109374} +{"epoch": 0, "iter": 11318, "iter_tflops": 21.15810352770482, "iter_time": 0.975091812133789, "loss": 0.12992164492607117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.30399364073677, "step_time": 0.7843331241607666} +{"epoch": 0, "iter": 11319, "iter_tflops": 49.429295835301524, "iter_time": 0.41738594818115227, "loss": 0.17534074187278748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.68806264829896, "step_time": 0.3842771091461182} +{"epoch": 0, "iter": 11320, "iter_tflops": 47.49580944881925, "iter_time": 0.43437713241577147, "loss": 0.14492042362689972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.56742915150132, "step_time": 0.40007993125915525} +{"epoch": 0, "iter": 11321, "iter_tflops": 32.65355679020023, "iter_time": 0.6318176498413085, "loss": 0.38733288645744324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.06545052403039, "step_time": 0.5883595733642578} +{"epoch": 0, "iter": 11322, "iter_tflops": 36.54760112680016, "iter_time": 0.564499252319336, "loss": 0.34487977623939514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.011058155391545, "step_time": 0.5156347885131836} +{"epoch": 0, "iter": 11323, "iter_tflops": 43.990671215490565, "iter_time": 0.46898792266845707, "loss": 0.26801741123199463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98253936522676, "step_time": 0.42997085571289057} +{"epoch": 0, "iter": 11324, "iter_tflops": 38.49852941203447, "iter_time": 0.5358930282592773, "loss": 0.3015507459640503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.155918413560656, "step_time": 0.4893996925354004} +{"epoch": 0, "iter": 11325, "iter_tflops": 15.324033795680934, "iter_time": 1.3463226318359376, "loss": 0.010732164606451988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.481976389464702, "step_time": 1.2517366256713867} +{"epoch": 0, "iter": 11326, "iter_tflops": 26.754994077803044, "iter_time": 0.7711118698120119, "loss": 0.002746516140177846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.33030145619024, "step_time": 0.583948980331421} +{"epoch": 0, "iter": 11327, "iter_tflops": 55.610982067098796, "iter_time": 0.3709895553588868, "loss": 0.009447997435927391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.83849431879023, "step_time": 0.33911249351501466} +{"epoch": 0, "iter": 11328, "iter_tflops": 50.66599699095731, "iter_time": 0.4071980171203613, "loss": 0.0036855044309049845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.800955404489706, "step_time": 0.3697265281677246} +{"epoch": 0, "iter": 11329, "iter_tflops": 30.0779398772549, "iter_time": 0.6859210968017578, "loss": 0.475199818611145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.979781645091673, "step_time": 0.6451292800903321} +{"epoch": 0, "iter": 11330, "iter_tflops": 14.422599966121659, "iter_time": 1.430469787597656, "loss": 0.7525027394294739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.12588641584885, "step_time": 1.1382115631103515} +{"epoch": 0, "iter": 11331, "iter_tflops": 43.86239633705595, "iter_time": 0.47035947036743164, "loss": 0.7145851850509644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32594877575743, "step_time": 0.4359361839294434} +{"epoch": 0, "iter": 11332, "iter_tflops": 49.12461429493062, "iter_time": 0.419974666595459, "loss": 0.6124508380889893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95543094222888, "step_time": 0.38959353446960454} +{"epoch": 0, "iter": 11333, "iter_tflops": 22.50515429387831, "iter_time": 0.9167274856567382, "loss": 0.1412794142961502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.624965000904673, "step_time": 0.8732750930786133} +{"epoch": 0, "iter": 11334, "iter_tflops": 13.30126510707596, "iter_time": 1.5510625000000002, "loss": 0.10399222373962402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.983468959422185, "step_time": 1.2907769622802734} +{"epoch": 0, "iter": 11335, "iter_tflops": 50.93820805532943, "iter_time": 0.40502197265624995, "loss": 0.05988715961575508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.686502617348246, "step_time": 0.37048642921447755} +{"epoch": 0, "iter": 11336, "iter_tflops": 53.50130587384828, "iter_time": 0.38561850357055666, "loss": 0.12210416793823242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.14062733563485, "step_time": 0.35484814071655274} +{"epoch": 0, "iter": 11337, "iter_tflops": 29.235808262296185, "iter_time": 0.7056789169311524, "loss": 0.2004808485507965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.0014196024115, "step_time": 0.6654886703491212} +{"epoch": 0, "iter": 11338, "iter_tflops": 13.693246323944194, "iter_time": 1.5066619720458985, "loss": 0.2541946768760681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.012983756505236, "step_time": 1.2126675605773927} +{"epoch": 0, "iter": 11339, "iter_tflops": 11.482886723781371, "iter_time": 1.7966817932128907, "loss": 0.19758154451847076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.425387070747611, "step_time": 1.5367224349975586} +{"epoch": 0, "iter": 11340, "iter_tflops": 19.87314890762738, "iter_time": 1.0381391296386717, "loss": 0.21306155622005463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.046482221985418, "step_time": 0.8579672203063965} +{"epoch": 0, "iter": 11341, "iter_tflops": 21.674257113412867, "iter_time": 0.7557955780029297, "loss": 0.5337237119674683, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 23.64555778788737, "step_time": 0.6927858428955078} +{"epoch": 0, "iter": 11342, "iter_tflops": 22.19453609589715, "iter_time": 0.7380783996582031, "loss": 0.380651593208313, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 23.895837594168658, "step_time": 0.6855297546386718} +{"epoch": 0, "iter": 11343, "iter_tflops": 26.07093011032793, "iter_time": 0.6283361434936523, "loss": 0.2586647570133209, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.873720427033106, "step_time": 0.5876972084045411} +{"epoch": 0, "iter": 11344, "iter_tflops": 25.142326652190047, "iter_time": 0.6515430297851563, "loss": 0.3442547619342804, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 26.76478144283786, "step_time": 0.6120471305847168} +{"epoch": 0, "iter": 11345, "iter_tflops": 17.542130127186834, "iter_time": 1.1760882720947266, "loss": 0.31889602541923523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.679564816283175, "step_time": 1.104473991394043} +{"epoch": 0, "iter": 11346, "iter_tflops": 22.170309608910777, "iter_time": 0.9305730895996094, "loss": 0.314932256937027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.972556552856823, "step_time": 0.6661088333129883} +{"epoch": 0, "iter": 11347, "iter_tflops": 38.06473102766934, "iter_time": 0.5420002441406251, "loss": 0.24333800375461578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71687250872211, "step_time": 0.49455034065246584} +{"epoch": 0, "iter": 11348, "iter_tflops": 38.48616889913096, "iter_time": 0.5360651397705078, "loss": 0.22710581123828888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.66444879890682, "step_time": 0.4951726016998291} +{"epoch": 0, "iter": 11349, "iter_tflops": 17.600737962414165, "iter_time": 1.1721720733642575, "loss": 0.13317011296749115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.093445219828002, "step_time": 1.0805327835083007} +{"epoch": 0, "iter": 11350, "iter_tflops": 13.081198586984492, "iter_time": 1.5771562042236327, "loss": 0.0590587817132473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.826474578807082, "step_time": 1.3035811233520507} +{"epoch": 0, "iter": 11351, "iter_tflops": 49.16140614244077, "iter_time": 0.4196603622436524, "loss": 0.049715638160705566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82686611372387, "step_time": 0.38328617286682126} +{"epoch": 0, "iter": 11352, "iter_tflops": 45.23180522858344, "iter_time": 0.4561191711425781, "loss": 0.027275629341602325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.931783168571, "step_time": 0.42162970924377446} +{"epoch": 0, "iter": 11353, "iter_tflops": 39.02844965971968, "iter_time": 0.5286167831420898, "loss": 0.684319257736206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.190252841170405, "step_time": 0.48900141906738287} +{"epoch": 0, "iter": 11354, "iter_tflops": 45.86224212502296, "iter_time": 0.4498492126464844, "loss": 0.5811915993690491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82851724358416, "step_time": 0.4140418910980225} +{"epoch": 0, "iter": 11355, "iter_tflops": 47.34936407287507, "iter_time": 0.4357206039428711, "loss": 0.7266178131103516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.1766426658846, "step_time": 0.4031349544525147} +{"epoch": 0, "iter": 11356, "iter_tflops": 43.997604623330126, "iter_time": 0.46891401672363286, "loss": 0.5265192985534668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76554050371716, "step_time": 0.43192421340942383} +{"epoch": 0, "iter": 11357, "iter_tflops": 26.44208531679231, "iter_time": 0.7802370071411133, "loss": 0.7205799221992493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.8937159183251, "step_time": 0.7396323089599609} +{"epoch": 0, "iter": 11358, "iter_tflops": 14.825829525628354, "iter_time": 1.391564193725586, "loss": 0.5896043181419373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.974885855614616, "step_time": 1.1477732696533203} +{"epoch": 0, "iter": 11359, "iter_tflops": 37.31975695998287, "iter_time": 0.5528196105957031, "loss": 0.8561861515045166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.812200685473364, "step_time": 0.5055128898620606} +{"epoch": 0, "iter": 11360, "iter_tflops": 38.808722066486744, "iter_time": 0.5316097106933594, "loss": 0.6412367224693298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.924583411484015, "step_time": 0.4921001434326172} +{"epoch": 0, "iter": 11361, "iter_tflops": 21.526199569510787, "iter_time": 0.9584178314208985, "loss": 0.5131055116653442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.785583111305705, "step_time": 0.905445053100586} +{"epoch": 0, "iter": 11362, "iter_tflops": 15.39666875132917, "iter_time": 1.3399712524414062, "loss": 0.4494386613368988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.871105697356285, "step_time": 1.0382458744049072} +{"epoch": 0, "iter": 11363, "iter_tflops": 38.845275985043024, "iter_time": 0.5311094589233398, "loss": 0.4328266978263855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37535794957544, "step_time": 0.4868653507232666} +{"epoch": 0, "iter": 11364, "iter_tflops": 36.35794290281958, "iter_time": 0.5674439163208008, "loss": 0.497077614068985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62265864835899, "step_time": 0.5206892776489258} +{"epoch": 0, "iter": 11365, "iter_tflops": 19.88181285672559, "iter_time": 1.037686737060547, "loss": 0.13872011005878448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.400153696646907, "step_time": 0.9640628662109376} +{"epoch": 0, "iter": 11366, "iter_tflops": 23.542088198422974, "iter_time": 0.8763493423461914, "loss": 0.18806946277618408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.551832270443274, "step_time": 0.7225838718414307} +{"epoch": 0, "iter": 11367, "iter_tflops": 50.57400442922226, "iter_time": 0.4079386978149414, "loss": 0.1816217005252838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.89543155392514, "step_time": 0.37582532691955567} +{"epoch": 0, "iter": 11368, "iter_tflops": 52.79532113620686, "iter_time": 0.3907750358581543, "loss": 0.14703041315078735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.25939598358215, "step_time": 0.360309310913086} +{"epoch": 0, "iter": 11369, "iter_tflops": 25.28821367071113, "iter_time": 0.8158383102416993, "loss": 0.2227337658405304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.648413264979, "step_time": 0.774195945739746} +{"epoch": 0, "iter": 11370, "iter_tflops": 14.795447912254987, "iter_time": 1.3944216918945311, "loss": 0.2419111579656601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.682564063804815, "step_time": 1.166747844696045} +{"epoch": 0, "iter": 11371, "iter_tflops": 38.174243911446766, "iter_time": 0.5404453735351562, "loss": 0.22350502014160156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93319849740016, "step_time": 0.4919990425109863} +{"epoch": 0, "iter": 11372, "iter_tflops": 41.8232572222291, "iter_time": 0.49329236602783205, "loss": 0.2170100212097168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87585371326313, "step_time": 0.44971574020385735} +{"epoch": 0, "iter": 11373, "iter_tflops": 18.386575540692935, "iter_time": 0.5745558700561524, "loss": 0.00859525240957737, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 20.562473374036518, "step_time": 0.5137570133209228} +{"epoch": 0, "iter": 11374, "iter_tflops": 25.475150726612213, "iter_time": 0.4146831169128418, "loss": 0.00391842145472765, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 27.86342161289599, "step_time": 0.3791391830444336} +{"epoch": 0, "iter": 11375, "iter_tflops": 31.27024696486912, "iter_time": 0.3378327941894531, "loss": 0.004266667645424604, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 34.36765508384496, "step_time": 0.30738538551330563} +{"epoch": 0, "iter": 11376, "iter_tflops": 29.031309431816485, "iter_time": 0.3638869590759277, "loss": 0.0025879517197608948, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 31.730161730474375, "step_time": 0.33293605613708493} +{"epoch": 0, "iter": 11377, "iter_tflops": 40.32766976173951, "iter_time": 0.5115865516662598, "loss": 0.5677588582038879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.089958496510675, "step_time": 0.4679317970275879} +{"epoch": 0, "iter": 11378, "iter_tflops": 36.08523826671521, "iter_time": 0.5717322235107423, "loss": 0.19733041524887085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.01468142714235, "step_time": 0.515588098526001} +{"epoch": 0, "iter": 11379, "iter_tflops": 40.81012318043082, "iter_time": 0.5055386238098145, "loss": 0.23663131892681122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.744666547888386, "step_time": 0.4610849761962891} +{"epoch": 0, "iter": 11380, "iter_tflops": 47.29732777667113, "iter_time": 0.4361999816894531, "loss": 0.4912792146205902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77983580133058, "step_time": 0.39843875885009766} +{"epoch": 0, "iter": 11381, "iter_tflops": 16.5185132685882, "iter_time": 1.2489679412841797, "loss": 0.3344000577926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.6372916282581, "step_time": 1.1697427215576173} +{"epoch": 0, "iter": 11382, "iter_tflops": 22.636802043554628, "iter_time": 0.9113961181640626, "loss": 0.37708449363708496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.697802814428353, "step_time": 0.7448639030456542} +{"epoch": 0, "iter": 11383, "iter_tflops": 51.565454365994185, "iter_time": 0.4000952529907227, "loss": 0.35714036226272583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.904216445644856, "step_time": 0.36904360389709473} +{"epoch": 0, "iter": 11384, "iter_tflops": 46.51086096125919, "iter_time": 0.44357582473754886, "loss": 0.3766380846500397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26914397943275, "step_time": 0.4104126682281494} +{"epoch": 0, "iter": 11385, "iter_tflops": 3.5771466970031, "iter_time": 0.4820607376098633, "loss": 0.23599067330360413, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 3.8931338730258225, "step_time": 0.44293415832519534} +{"epoch": 0, "iter": 11386, "iter_tflops": 0.8414152913964068, "iter_time": 2.0494065093994136, "loss": 0.3355845510959625, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 0.9918155010856455, "step_time": 1.738631805419922} +{"epoch": 0, "iter": 11387, "iter_tflops": 1.0805680202014212, "iter_time": 1.595829177856445, "loss": 0.37099209427833557, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 1.3029105661826221, "step_time": 1.3234998779296876} +{"epoch": 0, "iter": 11388, "iter_tflops": 2.0005343741184345, "iter_time": 0.8619706802368163, "loss": 0.24704118072986603, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 2.2644216740288687, "step_time": 0.7615198154449463} +{"epoch": 0, "iter": 11389, "iter_tflops": 19.464687047115188, "iter_time": 0.7805826721191406, "loss": 0.3778136968612671, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 20.564380850413993, "step_time": 0.7388404998779297} +{"epoch": 0, "iter": 11390, "iter_tflops": 11.170991667624328, "iter_time": 1.360111785888672, "loss": 0.2486104518175125, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.13533500709109, "step_time": 1.0748806037902832} +{"epoch": 0, "iter": 11391, "iter_tflops": 22.83931122621006, "iter_time": 0.6652476196289062, "loss": 0.3760356605052948, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 24.522132007888235, "step_time": 0.6195952873229981} +{"epoch": 0, "iter": 11392, "iter_tflops": 21.298846658861667, "iter_time": 0.713362449645996, "loss": 0.3317882716655731, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.92761355448905, "step_time": 0.6626855163574218} +{"epoch": 0, "iter": 11393, "iter_tflops": 36.0482510660484, "iter_time": 0.5723188476562501, "loss": 0.15442407131195068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1277899925183, "step_time": 0.5141348056793212} +{"epoch": 0, "iter": 11394, "iter_tflops": 40.427024082404635, "iter_time": 0.5103292655944823, "loss": 0.17025858163833618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88093236460487, "step_time": 0.4596850471496582} +{"epoch": 0, "iter": 11395, "iter_tflops": 39.48659932174933, "iter_time": 0.5224834213256836, "loss": 0.217557892203331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.453705249644294, "step_time": 0.47478329849243156} +{"epoch": 0, "iter": 11396, "iter_tflops": 38.82288969722788, "iter_time": 0.5314157104492188, "loss": 0.1599118560552597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52070426368277, "step_time": 0.48520112419128414} +{"epoch": 0, "iter": 11397, "iter_tflops": 17.019847568777486, "iter_time": 1.2121785125732423, "loss": 0.357528954744339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.15633998744463, "step_time": 1.1363024444580079} +{"epoch": 0, "iter": 11398, "iter_tflops": 24.4059299803334, "iter_time": 0.8453311767578124, "loss": 0.3516238033771515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.282366868768936, "step_time": 0.7562061462402344} +{"epoch": 0, "iter": 11399, "iter_tflops": 45.32022197530214, "iter_time": 0.4552293128967285, "loss": 0.37073349952697754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.92732304454756, "step_time": 0.42166814422607424} +{"epoch": 0, "iter": 11400, "iter_tflops": 49.02848814183689, "iter_time": 0.42079807662963864, "loss": 0.3524322211742401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.691545016086096, "step_time": 0.3915446681976318} +{"epoch": 0, "iter": 11401, "iter_tflops": 28.420142263908957, "iter_time": 0.7259320983886719, "loss": 0.6657181978225708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.976327834447655, "step_time": 0.688246192932129} +{"epoch": 0, "iter": 11402, "iter_tflops": 10.643413120561931, "iter_time": 1.9383907470703126, "loss": 0.5773642063140869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.176781213818888, "step_time": 1.3593853149414064} +{"epoch": 0, "iter": 11403, "iter_tflops": 8.926378430756843, "iter_time": 2.3112501525878906, "loss": 0.7450023293495178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.763151149344129, "step_time": 1.9168265151977542} +{"epoch": 0, "iter": 11404, "iter_tflops": 43.03746573606042, "iter_time": 0.47937519454956057, "loss": 0.672092616558075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63657995366787, "step_time": 0.4330935077667237} +{"epoch": 0, "iter": 11405, "iter_tflops": 21.867702219890322, "iter_time": 0.8148910369873047, "loss": 0.2761746346950531, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 23.096960480799726, "step_time": 0.7715211944580078} +{"epoch": 0, "iter": 11406, "iter_tflops": 13.637139009728813, "iter_time": 1.3067106323242188, "loss": 0.35874104499816895, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 15.19619903989351, "step_time": 1.1726481399536133} +{"epoch": 0, "iter": 11407, "iter_tflops": 31.15466668520398, "iter_time": 0.5719783401489258, "loss": 0.3416168987751007, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 33.08698048352843, "step_time": 0.5385742149353028} +{"epoch": 0, "iter": 11408, "iter_tflops": 32.00883980013777, "iter_time": 0.5567147903442382, "loss": 0.3533034920692444, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 33.931972748320824, "step_time": 0.5251623497009277} +{"epoch": 0, "iter": 11409, "iter_tflops": 36.07282088941973, "iter_time": 0.5719290313720703, "loss": 0.06721016019582748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84720234114206, "step_time": 0.5310831222534179} +{"epoch": 0, "iter": 11410, "iter_tflops": 11.477361869616349, "iter_time": 1.7975466613769528, "loss": 0.07626844942569733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.217798606979525, "step_time": 1.560857002258301} +{"epoch": 0, "iter": 11411, "iter_tflops": 10.50436584056385, "iter_time": 1.9640494079589845, "loss": 0.07234719395637512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.436866122993036, "step_time": 1.4290562324523925} +{"epoch": 0, "iter": 11412, "iter_tflops": 40.15522482015102, "iter_time": 0.5137835388183594, "loss": 0.04396991431713104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99153535661527, "step_time": 0.46897871017456055} +{"epoch": 0, "iter": 11413, "iter_tflops": 16.4143857298897, "iter_time": 0.9380952606201172, "loss": 0.19173891842365265, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 17.534661106237206, "step_time": 0.8781611099243165} +{"epoch": 0, "iter": 11414, "iter_tflops": 8.029740643869882, "iter_time": 1.9176531524658205, "loss": 0.34129443764686584, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 9.119293525958332, "step_time": 1.6885362243652342} +{"epoch": 0, "iter": 11415, "iter_tflops": 10.306410005299675, "iter_time": 1.4940466613769532, "loss": 0.3492184579372406, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.77677225511342, "step_time": 1.2051758575439455} +{"epoch": 0, "iter": 11416, "iter_tflops": 26.46565531151785, "iter_time": 0.5818203735351563, "loss": 0.45554259419441223, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.301344356950672, "step_time": 0.5440821914672852} +{"epoch": 0, "iter": 11417, "iter_tflops": 14.748468036807626, "iter_time": 1.0940316162109376, "loss": 0.46905753016471863, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 15.354097321871688, "step_time": 1.0508784713745116} +{"epoch": 0, "iter": 11418, "iter_tflops": 15.859800939046728, "iter_time": 1.0173702926635744, "loss": 0.3001291751861572, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 19.155377219874616, "step_time": 0.8423373832702636} +{"epoch": 0, "iter": 11419, "iter_tflops": 27.827590868928088, "iter_time": 0.5798306579589845, "loss": 0.4417373239994049, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 29.556908621227947, "step_time": 0.5459058837890625} +{"epoch": 0, "iter": 11420, "iter_tflops": 28.108702810361756, "iter_time": 0.5740318374633789, "loss": 0.37588053941726685, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.01666126468257, "step_time": 0.5375444717407226} +{"epoch": 0, "iter": 11421, "iter_tflops": 18.52612705829941, "iter_time": 1.1136215057373047, "loss": 0.22326765954494476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.31198732592081, "step_time": 1.068305046081543} +{"epoch": 0, "iter": 11422, "iter_tflops": 16.805972933291756, "iter_time": 1.2276048278808596, "loss": 0.18573622405529022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.635891080875833, "step_time": 0.9997675132751466} +{"epoch": 0, "iter": 11423, "iter_tflops": 37.35669235518321, "iter_time": 0.5522730255126953, "loss": 0.25104349851608276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.48705355797352, "step_time": 0.5095726089477539} +{"epoch": 0, "iter": 11424, "iter_tflops": 40.172704691192315, "iter_time": 0.5135599822998047, "loss": 0.3086044490337372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11020311257348, "step_time": 0.4677170372009277} +{"epoch": 0, "iter": 11425, "iter_tflops": 14.036908126439869, "iter_time": 1.003907096862793, "loss": 0.0008227336220443249, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 15.112499123550672, "step_time": 0.9324567413330078} +{"epoch": 0, "iter": 11426, "iter_tflops": 10.822777844945339, "iter_time": 1.3020457305908202, "loss": 0.11965475231409073, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 13.286673442514426, "step_time": 1.0605929126739502} +{"epoch": 0, "iter": 11427, "iter_tflops": 31.949278092504247, "iter_time": 0.44106635665893557, "loss": 0.003381311194971204, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 35.54313865681629, "step_time": 0.3964689731597901} +{"epoch": 0, "iter": 11428, "iter_tflops": 32.848611430714, "iter_time": 0.4289907875061036, "loss": 0.0021604890935122967, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 36.32722103398286, "step_time": 0.3879116344451905} +{"epoch": 0, "iter": 11429, "iter_tflops": 17.917200072252935, "iter_time": 1.1514686126708984, "loss": 0.1516444981098175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.227210803681785, "step_time": 1.0730154113769532} +{"epoch": 0, "iter": 11430, "iter_tflops": 16.12979697488376, "iter_time": 1.2790671539306642, "loss": 0.14797770977020264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.026020487901476, "step_time": 1.084361993789673} +{"epoch": 0, "iter": 11431, "iter_tflops": 38.954857679729216, "iter_time": 0.5296154251098633, "loss": 0.1854020655155182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89905115907946, "step_time": 0.4809219074249267} +{"epoch": 0, "iter": 11432, "iter_tflops": 41.65677621832809, "iter_time": 0.49526380538940434, "loss": 0.1349477916955948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53121330362156, "step_time": 0.4531197834014893} +{"epoch": 0, "iter": 11433, "iter_tflops": 22.90971856818015, "iter_time": 0.9005389328002931, "loss": 0.1820930391550064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.525325965322587, "step_time": 0.8412158737182617} +{"epoch": 0, "iter": 11434, "iter_tflops": 27.10181036045413, "iter_time": 0.7612441101074218, "loss": 0.1653345823287964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.64781478743382, "step_time": 0.6131480941772461} +{"epoch": 0, "iter": 11435, "iter_tflops": 51.04085502461115, "iter_time": 0.4042074432373047, "loss": 0.1770854890346527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.338130511518884, "step_time": 0.37281876564025873} +{"epoch": 0, "iter": 11436, "iter_tflops": 41.95937068581128, "iter_time": 0.49169215774536135, "loss": 0.1239987164735794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.914034048277294, "step_time": 0.4493417739868164} +{"epoch": 0, "iter": 11437, "iter_tflops": 30.844119313902283, "iter_time": 0.6688825607299805, "loss": 0.5940989851951599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.84950023446832, "step_time": 0.6280489311218261} +{"epoch": 0, "iter": 11438, "iter_tflops": 8.770467440173892, "iter_time": 2.3523368225097654, "loss": 0.4905986189842224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.350601647621078, "step_time": 1.9932265014648438} +{"epoch": 0, "iter": 11439, "iter_tflops": 12.83003086761093, "iter_time": 1.608031478881836, "loss": 0.6451441049575806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.083653465062401, "step_time": 1.3677782745361327} +{"epoch": 0, "iter": 11440, "iter_tflops": 28.961898312769563, "iter_time": 0.7123529434204101, "loss": 0.7393929958343506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.0492684088846, "step_time": 0.6437305603027343} +{"epoch": 0, "iter": 11441, "iter_tflops": 18.950466209535307, "iter_time": 0.8341500015258789, "loss": 0.455352783203125, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 20.64099118339315, "step_time": 0.7658319931030274} +{"epoch": 0, "iter": 11442, "iter_tflops": 24.110237755216097, "iter_time": 0.6556356506347655, "loss": 0.46998587250709534, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.92236805787947, "step_time": 0.6098027534484863} +{"epoch": 0, "iter": 11443, "iter_tflops": 22.86075838297232, "iter_time": 0.6914701232910158, "loss": 0.35098162293434143, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.48266927088461, "step_time": 0.6456620903015137} +{"epoch": 0, "iter": 11444, "iter_tflops": 22.073063149594752, "iter_time": 0.7161457977294923, "loss": 0.4547613561153412, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 23.61311148952724, "step_time": 0.6694387321472168} +{"epoch": 0, "iter": 11445, "iter_tflops": 16.101543717327193, "iter_time": 1.2813115234374999, "loss": 0.10273662954568863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.214006126716342, "step_time": 1.1985062255859376} +{"epoch": 0, "iter": 11446, "iter_tflops": 17.958282545687904, "iter_time": 1.1488344421386718, "loss": 0.08332264423370361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.11741661757839, "step_time": 0.9327985210418703} +{"epoch": 0, "iter": 11447, "iter_tflops": 52.330649620601754, "iter_time": 0.3942449340820312, "loss": 0.10165904462337494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.79733286558648, "step_time": 0.36324053382873533} +{"epoch": 0, "iter": 11448, "iter_tflops": 49.175344113993454, "iter_time": 0.41954141616821294, "loss": 0.03406384214758873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.877943203946735, "step_time": 0.3901644477844239} +{"epoch": 0, "iter": 11449, "iter_tflops": 1.5869075371148837, "iter_time": 0.8639067993164062, "loss": 0.882888913154602, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.6669013640258021, "step_time": 0.8224483108520508} +{"epoch": 0, "iter": 11450, "iter_tflops": 1.0078648611777337, "iter_time": 1.3602420959472656, "loss": 0.7704801559448242, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.1930360826601747, "step_time": 1.1491188163757324} +{"epoch": 0, "iter": 11451, "iter_tflops": 3.139363198682394, "iter_time": 0.4366937255859375, "loss": 0.9782440066337585, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.408499398096785, "step_time": 0.4022122497558594} +{"epoch": 0, "iter": 11452, "iter_tflops": 3.1934512728461413, "iter_time": 0.42929736328125007, "loss": 0.8658189177513123, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.4691011364895616, "step_time": 0.3951860027313232} +{"epoch": 0, "iter": 11453, "iter_tflops": 30.828929507742362, "iter_time": 0.6692121276855469, "loss": 0.7085647583007812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77336849543284, "step_time": 0.6295078735351562} +{"epoch": 0, "iter": 11454, "iter_tflops": 16.18544385137901, "iter_time": 1.2746696166992189, "loss": 0.6580370664596558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.77143065228057, "step_time": 1.0990687866210935} +{"epoch": 0, "iter": 11455, "iter_tflops": 35.079849109998946, "iter_time": 0.5881180801391602, "loss": 0.7154107093811035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11696858007148, "step_time": 0.5412574577331543} +{"epoch": 0, "iter": 11456, "iter_tflops": 35.96100742416373, "iter_time": 0.5737073287963868, "loss": 0.7037584781646729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27373628627478, "step_time": 0.5253152732849121} +{"epoch": 0, "iter": 11457, "iter_tflops": 19.77056124862832, "iter_time": 1.0435259399414063, "loss": 0.4730711579322815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.88616197071059, "step_time": 0.9877876815795898} +{"epoch": 0, "iter": 11458, "iter_tflops": 32.01777697613627, "iter_time": 0.6443637084960937, "loss": 0.6311402916908264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.33753378466802, "step_time": 0.42681311798095706} +{"epoch": 0, "iter": 11459, "iter_tflops": 49.32306269821094, "iter_time": 0.4182849235534667, "loss": 0.510271430015564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28252340052863, "step_time": 0.38720188522338866} +{"epoch": 0, "iter": 11460, "iter_tflops": 46.693407006076086, "iter_time": 0.441841682434082, "loss": 0.6898908019065857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.61488210628201, "step_time": 0.40760923767089846} +{"epoch": 0, "iter": 11461, "iter_tflops": 34.90030808118893, "iter_time": 0.5911435928344727, "loss": 0.30625730752944946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.48096523015061, "step_time": 0.5504418945312499} +{"epoch": 0, "iter": 11462, "iter_tflops": 15.736945119807489, "iter_time": 1.3109973602294922, "loss": 0.22810712456703186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.790288161598134, "step_time": 1.0979657859802248} +{"epoch": 0, "iter": 11463, "iter_tflops": 34.20747906384273, "iter_time": 0.6031164550781251, "loss": 0.21732263267040253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.507971247896215, "step_time": 0.5500455722808838} +{"epoch": 0, "iter": 11464, "iter_tflops": 39.45300867360255, "iter_time": 0.5229282684326172, "loss": 0.23594725131988525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.221103264389875, "step_time": 0.47733842849731445} +{"epoch": 0, "iter": 11465, "iter_tflops": 17.35299213073611, "iter_time": 1.188906982421875, "loss": 0.3574525713920593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.384579825312382, "step_time": 1.122195541381836} +{"epoch": 0, "iter": 11466, "iter_tflops": 14.561531588088979, "iter_time": 1.4168216705322263, "loss": 0.466810405254364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6630995498816, "step_time": 1.105448398590088} +{"epoch": 0, "iter": 11467, "iter_tflops": 47.563669616240176, "iter_time": 0.43375739669799807, "loss": 0.4627755582332611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49793102799879, "step_time": 0.40061985206604006} +{"epoch": 0, "iter": 11468, "iter_tflops": 48.41853014806079, "iter_time": 0.42609912872314454, "loss": 0.4450865089893341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41924920106108, "step_time": 0.39357857704162597} +{"epoch": 0, "iter": 11469, "iter_tflops": 29.699558039196873, "iter_time": 0.6946599502563476, "loss": 0.6031021475791931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.571025016953197, "step_time": 0.6534819030761719} +{"epoch": 0, "iter": 11470, "iter_tflops": 10.85937874586562, "iter_time": 1.8998410491943358, "loss": 0.6242026686668396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.106048051428614, "step_time": 1.4625707664489744} +{"epoch": 0, "iter": 11471, "iter_tflops": 11.404615489811448, "iter_time": 1.8090126342773438, "loss": 0.3829537034034729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.229392135816846, "step_time": 1.5594891510009765} +{"epoch": 0, "iter": 11472, "iter_tflops": 22.36396963532895, "iter_time": 0.9225148239135743, "loss": 0.6038943529129028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.7501455033266, "step_time": 0.7434589309692383} +{"epoch": 0, "iter": 11473, "iter_tflops": 15.449927793891872, "iter_time": 1.0709055633544922, "loss": 0.2183898538351059, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.513642077015515, "step_time": 1.001923957824707} +{"epoch": 0, "iter": 11474, "iter_tflops": 10.42214581258975, "iter_time": 1.5875246734619142, "loss": 0.35816431045532227, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.339256036874199, "step_time": 1.240355052947998} +{"epoch": 0, "iter": 11475, "iter_tflops": 23.677296274801147, "iter_time": 0.6987881317138672, "loss": 0.40481045842170715, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 25.618076908089204, "step_time": 0.6458491668701172} +{"epoch": 0, "iter": 11476, "iter_tflops": 24.193422829961335, "iter_time": 0.6838806457519531, "loss": 0.38490432500839233, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 25.943946222844758, "step_time": 0.6377369689941407} +{"epoch": 0, "iter": 11477, "iter_tflops": 21.074294089101667, "iter_time": 0.9789696121215821, "loss": 0.12725074589252472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.338019919318675, "step_time": 0.9235864944458009} +{"epoch": 0, "iter": 11478, "iter_tflops": 8.38000861156995, "iter_time": 2.461941802978515, "loss": 0.14461131393909454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.632090204820802, "step_time": 1.9404550857543945} +{"epoch": 0, "iter": 11479, "iter_tflops": 16.047736845008252, "iter_time": 1.285607666015625, "loss": 0.07452142983675003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.574056530573845, "step_time": 1.1739516983032228} +{"epoch": 0, "iter": 11480, "iter_tflops": 40.45796035564403, "iter_time": 0.5099390411376953, "loss": 0.12466521561145782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.683487435416744, "step_time": 0.4617162780761719} +{"epoch": 0, "iter": 11481, "iter_tflops": 19.878561913849914, "iter_time": 0.7437777099609375, "loss": 0.37424585223197937, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 21.07447044570305, "step_time": 0.7015707130432127} +{"epoch": 0, "iter": 11482, "iter_tflops": 22.255179611442735, "iter_time": 0.6643501205444335, "loss": 0.3597746193408966, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.258933856061947, "step_time": 0.6094757232666015} +{"epoch": 0, "iter": 11483, "iter_tflops": 23.052449032480535, "iter_time": 0.6413735580444336, "loss": 0.31704118847846985, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.79109825345119, "step_time": 0.5963927497863769} +{"epoch": 0, "iter": 11484, "iter_tflops": 22.343191996145343, "iter_time": 0.661733169555664, "loss": 0.3183862566947937, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.116168672103818, "step_time": 0.6130837554931641} +{"epoch": 0, "iter": 11485, "iter_tflops": 37.93582004260424, "iter_time": 0.5438420333862305, "loss": 0.0344635546207428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29587656509513, "step_time": 0.48778025627136234} +{"epoch": 0, "iter": 11486, "iter_tflops": 40.97290652153567, "iter_time": 0.5035301437377929, "loss": 0.05166331306099892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5915451992269, "step_time": 0.4428076686859131} +{"epoch": 0, "iter": 11487, "iter_tflops": 40.68933849031799, "iter_time": 0.5070392951965332, "loss": 0.03594787418842316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93490667666602, "step_time": 0.4591328887939453} +{"epoch": 0, "iter": 11488, "iter_tflops": 38.37351330251877, "iter_time": 0.5376389007568358, "loss": 0.07627426832914352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96734337783489, "step_time": 0.4915987491607665} +{"epoch": 0, "iter": 11489, "iter_tflops": 15.581338756996379, "iter_time": 1.0987745819091796, "loss": 0.016596456989645958, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 16.665362775010124, "step_time": 1.027303108215332} +{"epoch": 0, "iter": 11490, "iter_tflops": 16.454070470659993, "iter_time": 1.0404950561523438, "loss": 0.007006180007010698, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 22.401503469208514, "step_time": 0.7642513370513916} +{"epoch": 0, "iter": 11491, "iter_tflops": 36.55597004346028, "iter_time": 0.468333324432373, "loss": 0.0041457656770944595, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 40.62946479506242, "step_time": 0.42137840270996096} +{"epoch": 0, "iter": 11492, "iter_tflops": 40.35598498187486, "iter_time": 0.42423395156860355, "loss": 0.003129261778667569, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 44.68144862352306, "step_time": 0.38316526222229} +{"epoch": 0, "iter": 11493, "iter_tflops": 15.018741381595557, "iter_time": 1.3736899108886718, "loss": 0.7863703966140747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.956789570234676, "step_time": 1.2929351119995116} +{"epoch": 0, "iter": 11494, "iter_tflops": 15.2735753573335, "iter_time": 1.3507704010009767, "loss": 0.614665687084198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.75663734149536, "step_time": 1.044261386871338} +{"epoch": 0, "iter": 11495, "iter_tflops": 44.557465389916366, "iter_time": 0.4630221519470215, "loss": 0.6861121654510498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10250598480567, "step_time": 0.42889851760864256} +{"epoch": 0, "iter": 11496, "iter_tflops": 47.97466945168819, "iter_time": 0.43004138946533205, "loss": 0.8370996713638306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64678103395296, "step_time": 0.3994652347564697} +{"epoch": 0, "iter": 11497, "iter_tflops": 31.319103356595807, "iter_time": 0.6587383193969727, "loss": 0.708321750164032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.256617482494335, "step_time": 0.6203605499267577} +{"epoch": 0, "iter": 11498, "iter_tflops": 13.704215042282701, "iter_time": 1.5054560546875002, "loss": 0.5102865099906921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.765718442542244, "step_time": 1.2305523071289062} +{"epoch": 0, "iter": 11499, "iter_tflops": 41.18673132718145, "iter_time": 0.5009160194396972, "loss": 0.5776106119155884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.535887339012305, "step_time": 0.4632464904785156} +{"epoch": 0, "iter": 11500, "iter_tflops": 45.67512656219267, "iter_time": 0.4516920928955078, "loss": 0.6891909837722778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44896284759564, "step_time": 0.4172199440002442} +{"epoch": 0, "iter": 11501, "iter_tflops": 26.584178970564842, "iter_time": 0.7760666046142577, "loss": 0.14492732286453247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.03634703814771, "step_time": 0.735869529724121} +{"epoch": 0, "iter": 11502, "iter_tflops": 13.960280668196447, "iter_time": 1.4778423156738278, "loss": 0.1844058483839035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.424839155347154, "step_time": 1.2560910530090332} +{"epoch": 0, "iter": 11503, "iter_tflops": 47.3022599901963, "iter_time": 0.4361544990539551, "loss": 0.23360736668109894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.61109718069541, "step_time": 0.3997414245605469} +{"epoch": 0, "iter": 11504, "iter_tflops": 51.27473987463844, "iter_time": 0.4023636894226075, "loss": 0.17149177193641663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.74923147007658, "step_time": 0.37006955909729006} +{"epoch": 0, "iter": 11505, "iter_tflops": 13.526360540363672, "iter_time": 0.7660939102172852, "loss": 0.006631130818277597, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 14.236602139161501, "step_time": 0.7278746948242187} +{"epoch": 0, "iter": 11506, "iter_tflops": 6.592349757549867, "iter_time": 1.5718920898437498, "loss": 0.009689564816653728, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 9.069788681235174, "step_time": 1.142525234222412} +{"epoch": 0, "iter": 11507, "iter_tflops": 26.152209724890064, "iter_time": 0.3962365913391113, "loss": 0.004065365996211767, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 28.933664585591778, "step_time": 0.358145523071289} +{"epoch": 0, "iter": 11508, "iter_tflops": 21.844828669147223, "iter_time": 0.47436684417724606, "loss": 0.026226406916975975, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 24.273510012412597, "step_time": 0.4269041614532471} +{"epoch": 0, "iter": 11509, "iter_tflops": 20.13820258234351, "iter_time": 1.0244754180908204, "loss": 0.31353268027305603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.516437490628153, "step_time": 0.958852668762207} +{"epoch": 0, "iter": 11510, "iter_tflops": 20.25249187691009, "iter_time": 1.0186940765380859, "loss": 0.32347097992897034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.206843393974392, "step_time": 0.8184719200134277} +{"epoch": 0, "iter": 11511, "iter_tflops": 51.298679887953405, "iter_time": 0.4021759147644043, "loss": 0.4117002785205841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.702879829770026, "step_time": 0.37037750244140627} +{"epoch": 0, "iter": 11512, "iter_tflops": 49.907555343826644, "iter_time": 0.4133861770629883, "loss": 0.33245527744293213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.89197179181104, "step_time": 0.38282313346862795} +{"epoch": 0, "iter": 11513, "iter_tflops": 20.685874336477518, "iter_time": 0.9973517761230469, "loss": 0.14961059391498566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.59789802615419, "step_time": 0.9552361755371093} +{"epoch": 0, "iter": 11514, "iter_tflops": 15.564772074781366, "iter_time": 1.325499237060547, "loss": 0.1085481122136116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.031915647602084, "step_time": 1.0299111614227294} +{"epoch": 0, "iter": 11515, "iter_tflops": 38.413774788362616, "iter_time": 0.5370754013061523, "loss": 0.0960308313369751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41979361403477, "step_time": 0.4863553485870361} +{"epoch": 0, "iter": 11516, "iter_tflops": 34.950127176767815, "iter_time": 0.5903009567260742, "loss": 0.09792809188365936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.50025434534153, "step_time": 0.5358690185546875} +{"epoch": 0, "iter": 11517, "iter_tflops": 18.223353013949506, "iter_time": 1.1321239013671875, "loss": 0.642228901386261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.526723316607615, "step_time": 1.0565568618774415} +{"epoch": 0, "iter": 11518, "iter_tflops": 19.394352556543453, "iter_time": 1.0637680969238281, "loss": 0.641749918460846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.33471049717094, "step_time": 0.8841375389099122} +{"epoch": 0, "iter": 11519, "iter_tflops": 45.72945348277072, "iter_time": 0.4511554794311524, "loss": 0.8555615544319153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.81407102221549, "step_time": 0.4141619644165039} +{"epoch": 0, "iter": 11520, "iter_tflops": 47.68420856867382, "iter_time": 0.4326609191894531, "loss": 0.7631734609603882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.63241018422931, "step_time": 0.3995764179229736} +{"epoch": 0, "iter": 11521, "iter_tflops": 31.250152411380924, "iter_time": 0.6601917724609374, "loss": 0.6763204336166382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.19470229599825, "step_time": 0.6215176544189454} +{"epoch": 0, "iter": 11522, "iter_tflops": 16.5276729898517, "iter_time": 1.2482757568359375, "loss": 0.5789321064949036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.610676854349162, "step_time": 1.0520337295532227} +{"epoch": 0, "iter": 11523, "iter_tflops": 35.6293558144045, "iter_time": 0.5790476150512696, "loss": 0.5651562213897705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80642305854845, "step_time": 0.5316412048339844} +{"epoch": 0, "iter": 11524, "iter_tflops": 35.056897396461146, "iter_time": 0.5885031204223633, "loss": 0.7203077673912048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.96783485502445, "step_time": 0.5433834609985353} +{"epoch": 0, "iter": 11525, "iter_tflops": 22.10140159420539, "iter_time": 0.9334744415283204, "loss": 0.10428088903427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.880570354847986, "step_time": 0.8639280052185059} +{"epoch": 0, "iter": 11526, "iter_tflops": 25.220095029613322, "iter_time": 0.8180418624877929, "loss": 0.14297707378864288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.58593221644892, "step_time": 0.6331288414001465} +{"epoch": 0, "iter": 11527, "iter_tflops": 50.77158400396167, "iter_time": 0.40635118865966796, "loss": 0.10303784161806107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.241845799416375, "step_time": 0.3734685764312744} +{"epoch": 0, "iter": 11528, "iter_tflops": 50.31963258703926, "iter_time": 0.4100008773803711, "loss": 0.06482494622468948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.30445355859246, "step_time": 0.37991531372070314} +{"epoch": 0, "iter": 11529, "iter_tflops": 25.073970256168202, "iter_time": 0.8228092041015624, "loss": 0.7628176808357239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.39520242796619, "step_time": 0.7816228561401368} +{"epoch": 0, "iter": 11530, "iter_tflops": 12.044340814681982, "iter_time": 1.7129284057617187, "loss": 0.6670318245887756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.918549398241264, "step_time": 1.2194363136291504} +{"epoch": 0, "iter": 11531, "iter_tflops": 38.07971075427616, "iter_time": 0.5417870330810547, "loss": 0.9095625281333923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80581879212083, "step_time": 0.49349813270568843} +{"epoch": 0, "iter": 11532, "iter_tflops": 35.84144991896113, "iter_time": 0.5756210632324219, "loss": 0.9099810123443604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.224107374734686, "step_time": 0.5259799365997314} +{"epoch": 0, "iter": 11533, "iter_tflops": 19.717309518682754, "iter_time": 1.046344253540039, "loss": 0.6199197769165039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.080280461050407, "step_time": 0.9786916046142577} +{"epoch": 0, "iter": 11534, "iter_tflops": 19.729735187344843, "iter_time": 1.0456852722167969, "loss": 0.9272565245628357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.966594483474204, "step_time": 0.6884697399139404} +{"epoch": 0, "iter": 11535, "iter_tflops": 44.86686879899038, "iter_time": 0.4598291358947754, "loss": 0.6179575324058533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.468150854156356, "step_time": 0.4256628971099854} +{"epoch": 0, "iter": 11536, "iter_tflops": 44.63010770188503, "iter_time": 0.4622685127258301, "loss": 0.5207500457763672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58205768941232, "step_time": 0.4335897712707519} +{"epoch": 0, "iter": 11537, "iter_tflops": 25.21455509968061, "iter_time": 0.8182215957641601, "loss": 0.6093628406524658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.59086638226331, "step_time": 0.7758714294433595} +{"epoch": 0, "iter": 11538, "iter_tflops": 14.563686352072784, "iter_time": 1.416612045288086, "loss": 0.4856060743331909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.412329452807, "step_time": 1.184855453491211} +{"epoch": 0, "iter": 11539, "iter_tflops": 38.020888841931665, "iter_time": 0.5426252288818358, "loss": 0.47110381722450256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.222138752635956, "step_time": 0.5004857616424561} +{"epoch": 0, "iter": 11540, "iter_tflops": 34.54226669660489, "iter_time": 0.5972709808349609, "loss": 0.5831912755966187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.975468354049084, "step_time": 0.5432742347717285} +{"epoch": 0, "iter": 11541, "iter_tflops": 19.298924688778822, "iter_time": 1.0690281372070312, "loss": 0.6135913133621216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.83714953704671, "step_time": 0.9901111221313477} +{"epoch": 0, "iter": 11542, "iter_tflops": 19.224504599310798, "iter_time": 1.0731664581298828, "loss": 0.7416477203369141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.194228317430248, "step_time": 0.8894925594329833} +{"epoch": 0, "iter": 11543, "iter_tflops": 46.28415641431658, "iter_time": 0.44574850463867194, "loss": 0.7702320218086243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.865570496701146, "step_time": 0.4137342319488525} +{"epoch": 0, "iter": 11544, "iter_tflops": 48.26872206341167, "iter_time": 0.4274215812683106, "loss": 0.8030137419700623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19457492803235, "step_time": 0.39527275657653804} +{"epoch": 0, "iter": 11545, "iter_tflops": 18.162914065869153, "iter_time": 1.135891159057617, "loss": 0.6673716306686401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.004176270971815, "step_time": 1.0856084060668945} +{"epoch": 0, "iter": 11546, "iter_tflops": 13.65329746591794, "iter_time": 1.5110703887939454, "loss": 0.6370643377304077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.398552117248624, "step_time": 1.185793701171875} +{"epoch": 0, "iter": 11547, "iter_tflops": 41.90104886490608, "iter_time": 0.4923765411376953, "loss": 0.6724616289138794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02451695930809, "step_time": 0.4582190971374511} +{"epoch": 0, "iter": 11548, "iter_tflops": 43.17193376820324, "iter_time": 0.477882080078125, "loss": 0.6525025367736816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.170181747058, "step_time": 0.4468488693237305} +{"epoch": 0, "iter": 11549, "iter_tflops": 34.40046833491169, "iter_time": 0.5997329254150391, "loss": 0.3436712324619293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.91454489568176, "step_time": 0.5588879280090332} +{"epoch": 0, "iter": 11550, "iter_tflops": 10.182412902727778, "iter_time": 2.0261497650146483, "loss": 0.5366882681846619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.362222796732341, "step_time": 1.6688821945190428} +{"epoch": 0, "iter": 11551, "iter_tflops": 15.457134847742582, "iter_time": 1.334729476928711, "loss": 0.5239423513412476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.85218100025707, "step_time": 1.155662353515625} +{"epoch": 0, "iter": 11552, "iter_tflops": 14.276482194446967, "iter_time": 1.4451104431152344, "loss": 0.5246775150299072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.981837342683768, "step_time": 1.214891716003418} +{"epoch": 0, "iter": 11553, "iter_tflops": 11.247980929107372, "iter_time": 1.4090070800781247, "loss": 0.3776163160800934, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 12.021203128457408, "step_time": 1.318377586364746} +{"epoch": 0, "iter": 11554, "iter_tflops": 20.766476507484146, "iter_time": 0.763176399230957, "loss": 0.2683875560760498, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.248565743860578, "step_time": 0.6276984176635743} +{"epoch": 0, "iter": 11555, "iter_tflops": 29.295281258539067, "iter_time": 0.5409910430908204, "loss": 0.44073522090911865, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.168446400742553, "step_time": 0.5084784965515137} +{"epoch": 0, "iter": 11556, "iter_tflops": 28.10079461031667, "iter_time": 0.5639870681762694, "loss": 0.2922162711620331, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.743873028609364, "step_time": 0.5328319129943847} +{"epoch": 0, "iter": 11557, "iter_tflops": 24.974381413870407, "iter_time": 0.8260902709960937, "loss": 0.07040292024612427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.202609463592605, "step_time": 0.7873678970336914} +{"epoch": 0, "iter": 11558, "iter_tflops": 12.880000781526277, "iter_time": 1.6017928771972658, "loss": 0.05537789314985275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.086683483412543, "step_time": 1.282495147705078} +{"epoch": 0, "iter": 11559, "iter_tflops": 37.74452840179009, "iter_time": 0.546598258972168, "loss": 0.10335969924926758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.2262177970222, "step_time": 0.4191078338623047} +{"epoch": 0, "iter": 11560, "iter_tflops": 52.09075199368774, "iter_time": 0.39606058120727544, "loss": 0.1319970190525055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.9847638671544, "step_time": 0.36204578399658194} +{"epoch": 0, "iter": 11561, "iter_tflops": 26.50012234088221, "iter_time": 0.7785282363891601, "loss": 0.08652828633785248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.85961654808613, "step_time": 0.7405375976562499} +{"epoch": 0, "iter": 11562, "iter_tflops": 10.394428773829725, "iter_time": 1.984822250366211, "loss": 0.057059742510318756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.48955124244873, "step_time": 1.651868278503418} +{"epoch": 0, "iter": 11563, "iter_tflops": 10.283021771855662, "iter_time": 2.0063259582519533, "loss": 0.056479938328266144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.851312242205129, "step_time": 1.7408277740478517} +{"epoch": 0, "iter": 11564, "iter_tflops": 46.0904570751406, "iter_time": 0.44762180328369144, "loss": 0.10193675011396408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20631221085474, "step_time": 0.3951838893890381} +{"epoch": 0, "iter": 11565, "iter_tflops": 26.14026120936208, "iter_time": 0.571861198425293, "loss": 0.40238460898399353, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.072622861400003, "step_time": 0.532497486114502} +{"epoch": 0, "iter": 11566, "iter_tflops": 25.754084583075652, "iter_time": 0.5804361267089844, "loss": 0.33840879797935486, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.436328754624153, "step_time": 0.5448469886779784} +{"epoch": 0, "iter": 11567, "iter_tflops": 27.55694031724263, "iter_time": 0.5424622955322265, "loss": 0.2512971758842468, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.38245178682425, "step_time": 0.5087594871520996} +{"epoch": 0, "iter": 11568, "iter_tflops": 26.27278676292271, "iter_time": 0.5689766082763672, "loss": 0.31293442845344543, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.944001122319047, "step_time": 0.534948486328125} +{"epoch": 0, "iter": 11569, "iter_tflops": 34.277986147075794, "iter_time": 0.6018758926391602, "loss": 0.3491685092449188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.580109893263575, "step_time": 0.5639975814819336} +{"epoch": 0, "iter": 11570, "iter_tflops": 16.04982580528062, "iter_time": 1.2854403381347657, "loss": 0.34765467047691345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.619859009334043, "step_time": 1.1080155601501465} +{"epoch": 0, "iter": 11571, "iter_tflops": 40.448713074013654, "iter_time": 0.51005562210083, "loss": 0.3901573419570923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93725623351883, "step_time": 0.4591088829040527} +{"epoch": 0, "iter": 11572, "iter_tflops": 49.155236461650794, "iter_time": 0.41971303558349615, "loss": 0.30717387795448303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18512224001164, "step_time": 0.3879109916687012} +{"epoch": 0, "iter": 11573, "iter_tflops": 20.01701960146806, "iter_time": 1.030677589416504, "loss": 0.5640332698822021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.864055995728947, "step_time": 0.9888342666625977} +{"epoch": 0, "iter": 11574, "iter_tflops": 19.974383533269037, "iter_time": 1.0328776092529297, "loss": 0.5765557289123535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.9410385290155, "step_time": 0.8271946449279786} +{"epoch": 0, "iter": 11575, "iter_tflops": 37.94700891290648, "iter_time": 0.5436816787719727, "loss": 0.5818625688552856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4671703491554, "step_time": 0.4975283660888672} +{"epoch": 0, "iter": 11576, "iter_tflops": 40.21811048738419, "iter_time": 0.5129801788330078, "loss": 0.6271968483924866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01626972377631, "step_time": 0.46871517372131355} +{"epoch": 0, "iter": 11577, "iter_tflops": 14.710827626737482, "iter_time": 0.8639135665893554, "loss": 0.08093278110027313, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 15.709047777062933, "step_time": 0.8090167999267577} +{"epoch": 0, "iter": 11578, "iter_tflops": 17.69652165649749, "iter_time": 0.7181571502685546, "loss": 0.15138690173625946, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 22.141322449505385, "step_time": 0.5739893627166748} +{"epoch": 0, "iter": 11579, "iter_tflops": 33.750554902688656, "iter_time": 0.376553321838379, "loss": 0.06364090740680695, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 36.89011232916453, "step_time": 0.34450650215148926} +{"epoch": 0, "iter": 11580, "iter_tflops": 33.73793451809398, "iter_time": 0.3766941795349121, "loss": 0.07290273159742355, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 36.817596062351846, "step_time": 0.34518504524230953} +{"epoch": 0, "iter": 11581, "iter_tflops": 28.18827338452769, "iter_time": 0.5390112838745117, "loss": 0.005070901941508055, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 30.40131640802993, "step_time": 0.4997743263244629} +{"epoch": 0, "iter": 11582, "iter_tflops": 9.78199536284374, "iter_time": 1.5532411193847657, "loss": 0.009185973554849625, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.102565409426228, "step_time": 1.2554195671081543} +{"epoch": 0, "iter": 11583, "iter_tflops": 12.52055642046707, "iter_time": 1.2135081634521483, "loss": 0.005208294838666916, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.968974276120413, "step_time": 1.0150192756652832} +{"epoch": 0, "iter": 11584, "iter_tflops": 10.615474801905654, "iter_time": 1.4312875976562498, "loss": 0.005608131177723408, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.172657316468449, "step_time": 1.1534345016479493} +{"epoch": 0, "iter": 11585, "iter_tflops": 5.55585188370831, "iter_time": 3.0297389221191406, "loss": 0.4902469217777252, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 6.874696086883839, "step_time": 2.448512702941894} +{"epoch": 0, "iter": 11586, "iter_tflops": 25.825609277413694, "iter_time": 0.6517863922119141, "loss": 0.41036537289619446, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 27.923415936708057, "step_time": 0.6028195381164551} +{"epoch": 0, "iter": 11587, "iter_tflops": 27.62741588592373, "iter_time": 0.6092781448364258, "loss": 0.4463013708591461, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 29.64680920040201, "step_time": 0.5677771453857422} +{"epoch": 0, "iter": 11588, "iter_tflops": 25.81700843765308, "iter_time": 0.652003532409668, "loss": 0.2622872591018677, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 27.695549055470323, "step_time": 0.6077792739868164} +{"epoch": 0, "iter": 11589, "iter_tflops": 22.05639515838332, "iter_time": 0.9353792114257812, "loss": 0.7121508121490479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.95410153740516, "step_time": 0.8612760314941407} +{"epoch": 0, "iter": 11590, "iter_tflops": 18.62224984869687, "iter_time": 1.107873306274414, "loss": 0.7261491417884827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.563020133679977, "step_time": 0.914376417160034} +{"epoch": 0, "iter": 11591, "iter_tflops": 36.44137696768984, "iter_time": 0.5661447296142579, "loss": 0.5696024298667908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.65809437835864, "step_time": 0.5202240257263184} +{"epoch": 0, "iter": 11592, "iter_tflops": 39.64917652967482, "iter_time": 0.5203410339355468, "loss": 0.6442337036132812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8870869378852, "step_time": 0.4810560703277588} +{"epoch": 0, "iter": 11593, "iter_tflops": 18.950493343788725, "iter_time": 1.0886837158203124, "loss": 0.507687509059906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.530841345439757, "step_time": 1.0048830032348635} +{"epoch": 0, "iter": 11594, "iter_tflops": 21.134209167464697, "iter_time": 0.9761942520141602, "loss": 0.5005101561546326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.26824023939002, "step_time": 0.7854006710052491} +{"epoch": 0, "iter": 11595, "iter_tflops": 47.088585384341336, "iter_time": 0.43813364410400396, "loss": 0.46612197160720825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.954436830147614, "step_time": 0.4048929748535156} +{"epoch": 0, "iter": 11596, "iter_tflops": 47.10552020802896, "iter_time": 0.437976131439209, "loss": 0.4032381772994995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00056594408843, "step_time": 0.4045267562866211} +{"epoch": 0, "iter": 11597, "iter_tflops": 23.486000416865902, "iter_time": 0.8784421844482422, "loss": 0.8555659055709839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.742293244514162, "step_time": 0.8338391799926758} +{"epoch": 0, "iter": 11598, "iter_tflops": 18.162325585323465, "iter_time": 1.1359279632568362, "loss": 0.6135673522949219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.201299422550036, "step_time": 0.7874072647094726} +{"epoch": 0, "iter": 11599, "iter_tflops": 36.76527487216721, "iter_time": 0.5611570587158203, "loss": 0.5900139212608337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.016259181548605, "step_time": 0.5155677700042725} +{"epoch": 0, "iter": 11600, "iter_tflops": 38.96383952274627, "iter_time": 0.5294933395385742, "loss": 0.6633446216583252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.346933516895824, "step_time": 0.4871921482086181} +{"epoch": 0, "iter": 11601, "iter_tflops": 15.348722191476353, "iter_time": 1.3441570739746094, "loss": 0.902652382850647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.153688068782067, "step_time": 1.2771754302978515} +{"epoch": 0, "iter": 11602, "iter_tflops": 17.478156582564193, "iter_time": 1.1803929901123047, "loss": 0.6382072567939758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.83667707238174, "step_time": 0.9034192428588868} +{"epoch": 0, "iter": 11603, "iter_tflops": 42.97627041043022, "iter_time": 0.4800577926635742, "loss": 0.6166554093360901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35549132461512, "step_time": 0.445062557220459} +{"epoch": 0, "iter": 11604, "iter_tflops": 41.30667255947123, "iter_time": 0.4994615211486817, "loss": 0.6581189632415771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.50785618269534, "step_time": 0.4635382442474365} +{"epoch": 0, "iter": 11605, "iter_tflops": 24.327670976417494, "iter_time": 0.8480504989624024, "loss": 0.37477898597717285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.51217799203559, "step_time": 0.8086762924194335} +{"epoch": 0, "iter": 11606, "iter_tflops": 11.724044977845221, "iter_time": 1.759724868774414, "loss": 0.31331732869148254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.728061186366313, "step_time": 1.5028410224914552} +{"epoch": 0, "iter": 11607, "iter_tflops": 40.08700160466968, "iter_time": 0.5146579360961914, "loss": 0.5378267168998718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.026303041841125, "step_time": 0.4686083564758301} +{"epoch": 0, "iter": 11608, "iter_tflops": 43.14702348780525, "iter_time": 0.4781579780578613, "loss": 0.46040064096450806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.04454613626316, "step_time": 0.4385437889099121} +{"epoch": 0, "iter": 11609, "iter_tflops": 15.202301460551833, "iter_time": 1.3571033020019532, "loss": 0.5817239284515381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.260942801290522, "step_time": 1.2687513732910158} +{"epoch": 0, "iter": 11610, "iter_tflops": 16.105520228826926, "iter_time": 1.2809951629638672, "loss": 0.6296307444572449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.678333087847147, "step_time": 0.871306837081909} +{"epoch": 0, "iter": 11611, "iter_tflops": 35.151983219365555, "iter_time": 0.5869112243652344, "loss": 0.7346200346946716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.312954185164735, "step_time": 0.5384887161254882} +{"epoch": 0, "iter": 11612, "iter_tflops": 38.82898883977038, "iter_time": 0.5313322372436524, "loss": 0.7522764801979065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.473960806972435, "step_time": 0.48573509788513186} +{"epoch": 0, "iter": 11613, "iter_tflops": 18.773699805006547, "iter_time": 1.0989359436035158, "loss": 0.22164437174797058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.33610869001397, "step_time": 1.014505470275879} +{"epoch": 0, "iter": 11614, "iter_tflops": 16.19214192940944, "iter_time": 1.2741423339843752, "loss": 0.2098378837108612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.98036245824742, "step_time": 1.0869704704284668} +{"epoch": 0, "iter": 11615, "iter_tflops": 49.84458927743587, "iter_time": 0.41390838623046877, "loss": 0.40547606348991394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.418766686761835, "step_time": 0.3791172561645508} +{"epoch": 0, "iter": 11616, "iter_tflops": 44.942619355628565, "iter_time": 0.4590540962219238, "loss": 0.23937442898750305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.52918125517103, "step_time": 0.4251275825500489} +{"epoch": 0, "iter": 11617, "iter_tflops": 45.67088070485753, "iter_time": 0.4517340850830078, "loss": 0.2621728181838989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9277069484197, "step_time": 0.41321932792663574} +{"epoch": 0, "iter": 11618, "iter_tflops": 44.360532928398726, "iter_time": 0.4650776748657227, "loss": 0.2727309465408325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.350181540551205, "step_time": 0.42670146942138676} +{"epoch": 0, "iter": 11619, "iter_tflops": 40.16360433533529, "iter_time": 0.5136763458251954, "loss": 0.22653049230575562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01592069223055, "step_time": 0.47961529541015624} +{"epoch": 0, "iter": 11620, "iter_tflops": 52.49029692200123, "iter_time": 0.39304585266113284, "loss": 0.2276298999786377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.942917707732164, "step_time": 0.3623118438720704} +{"epoch": 0, "iter": 11621, "iter_tflops": 25.437914846643235, "iter_time": 0.7947309722900391, "loss": 0.1497153788805008, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 26.73086733909348, "step_time": 0.7562904167175293} +{"epoch": 0, "iter": 11622, "iter_tflops": 13.877698824348544, "iter_time": 1.4567471923828126, "loss": 0.1510777771472931, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 17.716176926927794, "step_time": 1.1411208457946778} +{"epoch": 0, "iter": 11623, "iter_tflops": 47.32768722172811, "iter_time": 0.42715585708618165, "loss": 0.2518879473209381, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 51.60834688367007, "step_time": 0.39172537040710453} +{"epoch": 0, "iter": 11624, "iter_tflops": 49.49310003187998, "iter_time": 0.4084670143127441, "loss": 0.19529713690280914, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 53.615426360197446, "step_time": 0.3770612335205078} +{"epoch": 0, "iter": 11625, "iter_tflops": 31.278842901579026, "iter_time": 0.6595862121582031, "loss": 0.4822016954421997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.14822844469372, "step_time": 0.6223890228271484} +{"epoch": 0, "iter": 11626, "iter_tflops": 16.185504108593925, "iter_time": 1.2746648712158204, "loss": 0.3794988691806793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.839817170315133, "step_time": 1.0950792846679687} +{"epoch": 0, "iter": 11627, "iter_tflops": 46.73947858139308, "iter_time": 0.4414061546325683, "loss": 0.4133469760417938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.37171151655881, "step_time": 0.4095769805908203} +{"epoch": 0, "iter": 11628, "iter_tflops": 46.29580066022764, "iter_time": 0.4456363906860351, "loss": 0.37316185235977173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.76321696346172, "step_time": 0.414585205078125} +{"epoch": 0, "iter": 11629, "iter_tflops": 42.258236533217634, "iter_time": 0.48821472930908205, "loss": 0.32942435145378113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73493659992854, "step_time": 0.45110139083862305} +{"epoch": 0, "iter": 11630, "iter_tflops": 35.96285633387557, "iter_time": 0.5736778335571289, "loss": 0.40117383003234863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.57490106506428, "step_time": 0.521317626953125} +{"epoch": 0, "iter": 11631, "iter_tflops": 41.56184407290217, "iter_time": 0.49639504623413083, "loss": 0.3125423491001129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.34525625419011, "step_time": 0.45497798919677734} +{"epoch": 0, "iter": 11632, "iter_tflops": 43.73455791580696, "iter_time": 0.4717343559265137, "loss": 0.4127342402935028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56693321266855, "step_time": 0.43372763633728034} +{"epoch": 0, "iter": 11633, "iter_tflops": 12.909947506667276, "iter_time": 1.0663002471923826, "loss": 0.020648282021284103, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 13.818097851289252, "step_time": 0.9962210693359376} +{"epoch": 0, "iter": 11634, "iter_tflops": 13.594140521804967, "iter_time": 1.0126333618164063, "loss": 0.004805908538401127, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 16.86204283525003, "step_time": 0.8163827095031738} +{"epoch": 0, "iter": 11635, "iter_tflops": 40.71526622411221, "iter_time": 0.3381011962890625, "loss": 0.006055591627955437, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 44.698912182453306, "step_time": 0.30796902084350586} +{"epoch": 0, "iter": 11636, "iter_tflops": 41.03132723534514, "iter_time": 0.33549682998657226, "loss": 0.006377932149916887, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 45.04072490419407, "step_time": 0.3056318531036377} +{"epoch": 0, "iter": 11637, "iter_tflops": 28.30092394646909, "iter_time": 0.728990104675293, "loss": 0.8235345482826233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.900536224934672, "step_time": 0.6899907531738282} +{"epoch": 0, "iter": 11638, "iter_tflops": 18.68898601849802, "iter_time": 1.103917221069336, "loss": 0.5818528532981873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.377635228261866, "step_time": 0.9219514617919922} +{"epoch": 0, "iter": 11639, "iter_tflops": 42.11193192298039, "iter_time": 0.48991087722778315, "loss": 0.573946475982666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.322145787358224, "step_time": 0.4552099895477295} +{"epoch": 0, "iter": 11640, "iter_tflops": 47.51789723716592, "iter_time": 0.43417522048950197, "loss": 0.6710184216499329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.12052925134395, "step_time": 0.4035774631500244} +{"epoch": 0, "iter": 11641, "iter_tflops": 23.884874458535403, "iter_time": 0.8637723236083983, "loss": 0.4447990655899048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.097297026236667, "step_time": 0.8220444412231446} +{"epoch": 0, "iter": 11642, "iter_tflops": 13.614448047795648, "iter_time": 1.5153822937011718, "loss": 0.527474045753479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.800362898818477, "step_time": 1.1590265674591065} +{"epoch": 0, "iter": 11643, "iter_tflops": 38.3998065206599, "iter_time": 0.5372707672119141, "loss": 0.5410418510437012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05706954938511, "step_time": 0.49054995346069336} +{"epoch": 0, "iter": 11644, "iter_tflops": 43.532795695065154, "iter_time": 0.47392071151733395, "loss": 0.48856890201568604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.369586076064756, "step_time": 0.43553459548950196} +{"epoch": 0, "iter": 11645, "iter_tflops": 18.26268426502452, "iter_time": 1.1069730224609375, "loss": 0.06061038002371788, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 19.714712690433288, "step_time": 1.025442222595215} +{"epoch": 0, "iter": 11646, "iter_tflops": 39.38098715298407, "iter_time": 0.5133517532348633, "loss": 0.04171217605471611, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 48.04847324586674, "step_time": 0.4207479953765869} +{"epoch": 0, "iter": 11647, "iter_tflops": 53.9760727007195, "iter_time": 0.374541862487793, "loss": 0.017343997955322266, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 59.28159211066049, "step_time": 0.3410215225219726} +{"epoch": 0, "iter": 11648, "iter_tflops": 57.135938722928366, "iter_time": 0.353828067779541, "loss": 0.046121109277009964, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 62.3772178288889, "step_time": 0.3240974750518799} +{"epoch": 0, "iter": 11649, "iter_tflops": 28.41170021566824, "iter_time": 0.7261477966308595, "loss": 0.09067012369632721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.067439438961554, "step_time": 0.6861606407165528} +{"epoch": 0, "iter": 11650, "iter_tflops": 19.27096906207238, "iter_time": 1.0705789337158202, "loss": 0.09318481385707855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.305093733768704, "step_time": 0.8852611255645751} +{"epoch": 0, "iter": 11651, "iter_tflops": 37.97016086358168, "iter_time": 0.5433501739501954, "loss": 0.07177194952964783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65680926644469, "step_time": 0.495263412475586} +{"epoch": 0, "iter": 11652, "iter_tflops": 42.42791262960507, "iter_time": 0.486262279510498, "loss": 0.1185135766863823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.701966898989056, "step_time": 0.4417606983184814} +{"epoch": 0, "iter": 11653, "iter_tflops": 20.56880598950993, "iter_time": 1.0030282516479492, "loss": 0.6622617244720459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.917523303765076, "step_time": 0.9413058776855467} +{"epoch": 0, "iter": 11654, "iter_tflops": 14.276027112605256, "iter_time": 1.445156509399414, "loss": 0.5569821000099182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.998602574264858, "step_time": 1.2895559730529786} +{"epoch": 0, "iter": 11655, "iter_tflops": 36.081330559774, "iter_time": 0.5717941436767578, "loss": 0.6464516520500183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.503672186058296, "step_time": 0.5222576122283935} +{"epoch": 0, "iter": 11656, "iter_tflops": 43.632981712771965, "iter_time": 0.4728325386047364, "loss": 0.8437759876251221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.6813895855241, "step_time": 0.43268649864196773} +{"epoch": 0, "iter": 11657, "iter_tflops": 15.869940794049526, "iter_time": 1.3000107421875, "loss": 0.5705651044845581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.83794464547668, "step_time": 1.225273864746094} +{"epoch": 0, "iter": 11658, "iter_tflops": 18.06704884714336, "iter_time": 1.1419182891845703, "loss": 0.5848639607429504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.49492919557092, "step_time": 0.8422597732543945} +{"epoch": 0, "iter": 11659, "iter_tflops": 48.80713823444412, "iter_time": 0.4227064781188965, "loss": 0.6452142596244812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83707786451542, "step_time": 0.3904662094116211} +{"epoch": 0, "iter": 11660, "iter_tflops": 43.87869917387044, "iter_time": 0.4701847114562988, "loss": 0.5566876530647278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13270921531914, "step_time": 0.43772348022460933} +{"epoch": 0, "iter": 11661, "iter_tflops": 29.1800266311315, "iter_time": 0.4508455505371094, "loss": 0.005535310134291649, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 31.954032802148856, "step_time": 0.4117065677642822} +{"epoch": 0, "iter": 11662, "iter_tflops": 25.58237303247514, "iter_time": 0.5142480392456055, "loss": 0.03749168664216995, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 28.500998614116693, "step_time": 0.46158681488037107} +{"epoch": 0, "iter": 11663, "iter_tflops": 26.79495454971234, "iter_time": 0.4909762077331543, "loss": 0.00449482211843133, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 29.624875434249414, "step_time": 0.4440756282806396} +{"epoch": 0, "iter": 11664, "iter_tflops": 27.83853864192383, "iter_time": 0.4725709686279297, "loss": 0.003724867943674326, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 31.01779976895738, "step_time": 0.4241334095001221} +{"epoch": 0, "iter": 11665, "iter_tflops": 34.87025883137865, "iter_time": 0.5916530075073242, "loss": 0.12669524550437927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80296559174739, "step_time": 0.5316885757446289} +{"epoch": 0, "iter": 11666, "iter_tflops": 40.852089354283905, "iter_time": 0.5050192985534668, "loss": 0.13011083006858826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.489216175777976, "step_time": 0.45353811836242675} +{"epoch": 0, "iter": 11667, "iter_tflops": 40.85133026403246, "iter_time": 0.5050286827087402, "loss": 0.15103021264076233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.000735417832544, "step_time": 0.4584612522125245} +{"epoch": 0, "iter": 11668, "iter_tflops": 39.32266165586526, "iter_time": 0.5246616744995117, "loss": 0.09767993539571762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26873481874011, "step_time": 0.47681295967102055} +{"epoch": 0, "iter": 11669, "iter_tflops": 16.903911954300078, "iter_time": 1.2204922485351561, "loss": 0.12809520959854126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.990787704175194, "step_time": 1.1467587661743164} +{"epoch": 0, "iter": 11670, "iter_tflops": 20.677468102397004, "iter_time": 0.9977572402954102, "loss": 0.10253863036632538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.36753676982711, "step_time": 0.8828955192565918} +{"epoch": 0, "iter": 11671, "iter_tflops": 52.9654689043681, "iter_time": 0.3895196990966797, "loss": 0.11475540697574615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.69421333234058, "step_time": 0.3575938091278076} +{"epoch": 0, "iter": 11672, "iter_tflops": 54.39934822303904, "iter_time": 0.37925258636474607, "loss": 0.07920174300670624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.03269834399193, "step_time": 0.3494858627319336} +{"epoch": 0, "iter": 11673, "iter_tflops": 31.785776819034112, "iter_time": 0.6490668334960937, "loss": 0.6222306489944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83407861399318, "step_time": 0.6097725830078125} +{"epoch": 0, "iter": 11674, "iter_tflops": 16.823351405896084, "iter_time": 1.226336715698242, "loss": 0.7126970887184143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.92383711823203, "step_time": 1.0354980010986328} +{"epoch": 0, "iter": 11675, "iter_tflops": 46.175324788643444, "iter_time": 0.44679909896850595, "loss": 0.6269325613975525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.14401258069107, "step_time": 0.41143682861328124} +{"epoch": 0, "iter": 11676, "iter_tflops": 44.47762086722101, "iter_time": 0.46385335159301766, "loss": 0.8017399311065674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32094655753979, "step_time": 0.4269596309661865} +{"epoch": 0, "iter": 11677, "iter_tflops": 27.58731979759844, "iter_time": 0.7478469696044923, "loss": 0.8726565837860107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.160920971225934, "step_time": 0.7074911499023439} +{"epoch": 0, "iter": 11678, "iter_tflops": 19.20084829085553, "iter_time": 1.0744886474609374, "loss": 0.7756500840187073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.4484615311615, "step_time": 0.8798484916687013} +{"epoch": 0, "iter": 11679, "iter_tflops": 43.84202273538511, "iter_time": 0.47057804870605463, "loss": 0.6322108507156372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31505925393772, "step_time": 0.43603651428222656} +{"epoch": 0, "iter": 11680, "iter_tflops": 46.41558029749175, "iter_time": 0.4444863853454589, "loss": 0.7574348449707031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91841777799183, "step_time": 0.4132962226867676} +{"epoch": 0, "iter": 11681, "iter_tflops": 25.241116349076282, "iter_time": 0.817360580444336, "loss": 0.8271113038063049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.539220148791053, "step_time": 0.777381301879883} +{"epoch": 0, "iter": 11682, "iter_tflops": 15.464437036319836, "iter_time": 1.3340992279052735, "loss": 0.7939333319664001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.224081696926323, "step_time": 0.9283215293884277} +{"epoch": 0, "iter": 11683, "iter_tflops": 36.4573462157405, "iter_time": 0.5658967437744141, "loss": 0.6325848698616028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.77047851434206, "step_time": 0.5187539672851562} +{"epoch": 0, "iter": 11684, "iter_tflops": 35.493674700864815, "iter_time": 0.5812611312866212, "loss": 0.6534976959228516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.47905666958798, "step_time": 0.5361642227172851} +{"epoch": 0, "iter": 11685, "iter_tflops": 31.647067096375064, "iter_time": 0.6466671600341797, "loss": 0.05868405103683472, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 35.395607201381736, "step_time": 0.5781824531555176} +{"epoch": 0, "iter": 11686, "iter_tflops": 38.5026283689721, "iter_time": 0.5315252456665039, "loss": 0.04415224492549896, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 42.8155168776173, "step_time": 0.4779836959838867} +{"epoch": 0, "iter": 11687, "iter_tflops": 40.59816667124777, "iter_time": 0.5040897331237792, "loss": 0.05785627290606499, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 44.491382990775286, "step_time": 0.4599793853759766} +{"epoch": 0, "iter": 11688, "iter_tflops": 45.99013258914035, "iter_time": 0.44498934555053715, "loss": 0.10353880375623703, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 50.45029633578493, "step_time": 0.405649133682251} +{"epoch": 0, "iter": 11689, "iter_tflops": 17.553766573568325, "iter_time": 1.1753086395263672, "loss": 0.04658323526382446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.892623609310252, "step_time": 1.0920184478759765} +{"epoch": 0, "iter": 11690, "iter_tflops": 20.922195713779523, "iter_time": 0.9860864410400391, "loss": 0.0681341364979744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.609271048135223, "step_time": 0.7753347873687744} +{"epoch": 0, "iter": 11691, "iter_tflops": 43.9602482636391, "iter_time": 0.4693124885559082, "loss": 0.031966809183359146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70395754448507, "step_time": 0.4236019935607911} +{"epoch": 0, "iter": 11692, "iter_tflops": 40.671454254587516, "iter_time": 0.5072622528076172, "loss": 0.0309884212911129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.731313285718464, "step_time": 0.46122262001037595} +{"epoch": 0, "iter": 11693, "iter_tflops": 13.232808288155125, "iter_time": 0.8563163528442383, "loss": 0.005265960469841957, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 14.335943975792333, "step_time": 0.7904237174987793} +{"epoch": 0, "iter": 11694, "iter_tflops": 14.019079244792602, "iter_time": 0.8082891845703125, "loss": 0.002692748326808214, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 17.535207595897138, "step_time": 0.6462124881744385} +{"epoch": 0, "iter": 11695, "iter_tflops": 31.419287201450267, "iter_time": 0.3606533164978027, "loss": 0.001689329743385315, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 34.641162620255216, "step_time": 0.3271099834442139} +{"epoch": 0, "iter": 11696, "iter_tflops": 29.48208965826051, "iter_time": 0.3843509826660157, "loss": 0.023612339049577713, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 32.30376948034987, "step_time": 0.3507785720825196} +{"epoch": 0, "iter": 11697, "iter_tflops": 22.439670669041057, "iter_time": 0.919402687072754, "loss": 0.7829303741455078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.486815954158054, "step_time": 0.8784116821289063} +{"epoch": 0, "iter": 11698, "iter_tflops": 17.150593828320122, "iter_time": 1.202937561035156, "loss": 0.7979795932769775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.227542646019668, "step_time": 0.9719021110534667} +{"epoch": 0, "iter": 11699, "iter_tflops": 35.393003061584665, "iter_time": 0.5829144668579103, "loss": 0.6039268374443054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.52562629851677, "step_time": 0.5355161094665527} +{"epoch": 0, "iter": 11700, "iter_tflops": 39.9904686383334, "iter_time": 0.5159002685546875, "loss": 0.6281788349151611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29110204195532, "step_time": 0.4765666046142578} +{"epoch": 0, "iter": 11701, "iter_tflops": 25.176198736266187, "iter_time": 0.8194681701660157, "loss": 0.6683952212333679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.156300939388633, "step_time": 0.7597166328430176} +{"epoch": 0, "iter": 11702, "iter_tflops": 10.277780428599629, "iter_time": 2.0073491210937497, "loss": 0.43057355284690857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.732034946438574, "step_time": 1.7585264282226563} +{"epoch": 0, "iter": 11703, "iter_tflops": 13.305879144859658, "iter_time": 1.550524642944336, "loss": 0.6854147911071777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.608479199854694, "step_time": 1.321787551879883} +{"epoch": 0, "iter": 11704, "iter_tflops": 37.81579881790604, "iter_time": 0.545568099975586, "loss": 0.6341329216957092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40344256329802, "step_time": 0.4982941570281982} +{"epoch": 0, "iter": 11705, "iter_tflops": 11.193482316450478, "iter_time": 1.4854622039794922, "loss": 0.3156752288341522, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 12.09122619089749, "step_time": 1.3751702804565429} +{"epoch": 0, "iter": 11706, "iter_tflops": 16.11021947663779, "iter_time": 1.032108528137207, "loss": 0.37115979194641113, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 17.81910012666099, "step_time": 0.9331276435852051} +{"epoch": 0, "iter": 11707, "iter_tflops": 29.224647166006665, "iter_time": 0.5689545135498046, "loss": 0.40710708498954773, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.313796743907695, "step_time": 0.5309958114624024} +{"epoch": 0, "iter": 11708, "iter_tflops": 30.716101729250283, "iter_time": 0.5413282928466796, "loss": 0.47129714488983154, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 32.620216127000674, "step_time": 0.5097297592163086} +{"epoch": 0, "iter": 11709, "iter_tflops": 30.754074875518082, "iter_time": 0.6708409729003906, "loss": 0.22857925295829773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.81445633412528, "step_time": 0.6287196502685547} +{"epoch": 0, "iter": 11710, "iter_tflops": 14.163523814120376, "iter_time": 1.4566356353759766, "loss": 0.27248695492744446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.051695225897998, "step_time": 1.1428895320892334} +{"epoch": 0, "iter": 11711, "iter_tflops": 39.65144100939248, "iter_time": 0.5203113174438476, "loss": 0.31284213066101074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65127841666307, "step_time": 0.4726343479156494} +{"epoch": 0, "iter": 11712, "iter_tflops": 41.065850946293565, "iter_time": 0.5023905029296876, "loss": 0.23779891431331635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90180398661856, "step_time": 0.45947137260437015} +{"epoch": 0, "iter": 11713, "iter_tflops": 14.866317413419944, "iter_time": 1.2569125671386718, "loss": 0.3401382267475128, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 15.765719579358132, "step_time": 1.1852082672119142} +{"epoch": 0, "iter": 11714, "iter_tflops": 14.279458570980117, "iter_time": 1.308569305419922, "loss": 0.4207032322883606, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 18.195026192006733, "step_time": 1.0269653358459472} +{"epoch": 0, "iter": 11715, "iter_tflops": 40.2912329903583, "iter_time": 0.4637649383544922, "loss": 0.2955629527568817, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 43.45548920022628, "step_time": 0.4299954166412353} +{"epoch": 0, "iter": 11716, "iter_tflops": 40.93673413740337, "iter_time": 0.4564521713256835, "loss": 0.3127610683441162, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 44.488168976220656, "step_time": 0.42001416587829593} +{"epoch": 0, "iter": 11717, "iter_tflops": 19.66865303964246, "iter_time": 1.0489327087402343, "loss": 0.5670592784881592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.52001222387953, "step_time": 1.0054133148193358} +{"epoch": 0, "iter": 11718, "iter_tflops": 13.608375861180464, "iter_time": 1.5160584716796877, "loss": 0.6969663500785828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.382938165866126, "step_time": 1.1222957572937013} +{"epoch": 0, "iter": 11719, "iter_tflops": 34.069194688939305, "iter_time": 0.6055644607543944, "loss": 0.5964818596839905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.95110807480947, "step_time": 0.5583349075317382} +{"epoch": 0, "iter": 11720, "iter_tflops": 36.36392043908787, "iter_time": 0.5673506393432618, "loss": 0.8474035859107971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41012378011813, "step_time": 0.5234973030090333} +{"epoch": 0, "iter": 11721, "iter_tflops": 21.688697838727748, "iter_time": 0.951237075805664, "loss": 0.6025794744491577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.786742487281682, "step_time": 0.8673358078002931} +{"epoch": 0, "iter": 11722, "iter_tflops": 33.979888285378266, "iter_time": 0.6071560134887696, "loss": 0.8721827864646912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.08561156830374, "step_time": 0.5563099174499512} +{"epoch": 0, "iter": 11723, "iter_tflops": 40.64130207934483, "iter_time": 0.5076385955810547, "loss": 0.7325631380081177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.06960317808582, "step_time": 0.4681479301452636} +{"epoch": 0, "iter": 11724, "iter_tflops": 36.933033707481826, "iter_time": 0.5586081466674805, "loss": 0.6921617984771729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99301802757115, "step_time": 0.5158673820495605} +{"epoch": 0, "iter": 11725, "iter_tflops": 21.15871557264957, "iter_time": 0.9750636062622071, "loss": 0.7212198972702026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.10135458608636, "step_time": 0.8930685615539551} +{"epoch": 0, "iter": 11726, "iter_tflops": 15.552237963168364, "iter_time": 1.3265675048828123, "loss": 0.557731568813324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.486038565534756, "step_time": 1.0070806732177733} +{"epoch": 0, "iter": 11727, "iter_tflops": 37.63400710714366, "iter_time": 0.5482034759521484, "loss": 0.8588694334030151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.947300151533526, "step_time": 0.5038450260162354} +{"epoch": 0, "iter": 11728, "iter_tflops": 34.76866014180867, "iter_time": 0.5933818969726562, "loss": 0.7537769079208374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.330298794133626, "step_time": 0.5526634979248047} +{"epoch": 0, "iter": 11729, "iter_tflops": 21.449497591753786, "iter_time": 0.961845069885254, "loss": 0.35464081168174744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.97424533108231, "step_time": 0.8980096282958985} +{"epoch": 0, "iter": 11730, "iter_tflops": 16.64308248383771, "iter_time": 1.2396197357177736, "loss": 0.2469019889831543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.42034307293604, "step_time": 0.9631542053222657} +{"epoch": 0, "iter": 11731, "iter_tflops": 39.10147606295182, "iter_time": 0.5276295318603516, "loss": 0.230768084526062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06235249938154, "step_time": 0.47909815216064455} +{"epoch": 0, "iter": 11732, "iter_tflops": 38.378716557077006, "iter_time": 0.5375660095214844, "loss": 0.29145070910453796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21917204532262, "step_time": 0.4886664638519288} +{"epoch": 0, "iter": 11733, "iter_tflops": 21.57619581011671, "iter_time": 0.9561969909667969, "loss": 0.4991585314273834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.92216700537687, "step_time": 0.9000498733520508} +{"epoch": 0, "iter": 11734, "iter_tflops": 21.757821198600016, "iter_time": 0.9482150497436523, "loss": 0.49652764201164246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.308905058407866, "step_time": 0.7841867027282715} +{"epoch": 0, "iter": 11735, "iter_tflops": 37.783860810323695, "iter_time": 0.5460292587280273, "loss": 0.47048407793045044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.07400517075423, "step_time": 0.5022907657623291} +{"epoch": 0, "iter": 11736, "iter_tflops": 37.32217937532046, "iter_time": 0.5527837295532226, "loss": 0.46768805384635925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83146201088472, "step_time": 0.5052744255065919} +{"epoch": 0, "iter": 11737, "iter_tflops": 19.25515140533348, "iter_time": 1.0714583892822265, "loss": 0.5421127676963806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.243045636635102, "step_time": 1.0191694412231445} +{"epoch": 0, "iter": 11738, "iter_tflops": 9.800732176696844, "iter_time": 2.105056350708008, "loss": 0.6986112594604492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.204404029143198, "step_time": 1.690463005065918} +{"epoch": 0, "iter": 11739, "iter_tflops": 14.952052337171619, "iter_time": 1.3798168334960936, "loss": 0.767045795917511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.271342992403323, "step_time": 1.1945274620056152} +{"epoch": 0, "iter": 11740, "iter_tflops": 24.022755966402666, "iter_time": 0.8588145980834959, "loss": 0.725867509841919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.56441397060705, "step_time": 0.6978353614807129} +{"epoch": 0, "iter": 11741, "iter_tflops": 14.26477359616967, "iter_time": 1.1282551269531251, "loss": 0.5016736388206482, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.174888313563143, "step_time": 1.0605879669189455} +{"epoch": 0, "iter": 11742, "iter_tflops": 13.027292373466857, "iter_time": 1.235429702758789, "loss": 0.40572842955589294, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.83529766398546, "step_time": 1.0163562622070315} +{"epoch": 0, "iter": 11743, "iter_tflops": 29.629496237107965, "iter_time": 0.5431852035522461, "loss": 0.36873725056648254, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 31.559017150816935, "step_time": 0.5099748153686523} +{"epoch": 0, "iter": 11744, "iter_tflops": 27.870968855943964, "iter_time": 0.5774576416015624, "loss": 0.5544134378433228, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 29.5453180823387, "step_time": 0.5447328033447265} +{"epoch": 0, "iter": 11745, "iter_tflops": 30.807151831561065, "iter_time": 0.6696851959228516, "loss": 0.14155113697052002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.87356669280345, "step_time": 0.6275891418457031} +{"epoch": 0, "iter": 11746, "iter_tflops": 13.695325992489968, "iter_time": 1.5064331817626955, "loss": 0.23747725784778595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.640296462892, "step_time": 1.1695434684753416} +{"epoch": 0, "iter": 11747, "iter_tflops": 37.91612593407282, "iter_time": 0.5441245117187501, "loss": 0.21165670454502106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5651224370377, "step_time": 0.49635589408874514} +{"epoch": 0, "iter": 11748, "iter_tflops": 44.08323202683174, "iter_time": 0.4680031967163086, "loss": 0.2548558712005615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.31161378613551, "step_time": 0.42704211044311524} +{"epoch": 0, "iter": 11749, "iter_tflops": 23.505398532128766, "iter_time": 0.8777172393798828, "loss": 0.8194135427474976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.46535476583264, "step_time": 0.8101632080078125} +{"epoch": 0, "iter": 11750, "iter_tflops": 35.65918200397241, "iter_time": 0.5785632858276367, "loss": 0.5870296955108643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.91661002482977, "step_time": 0.5301359367370605} +{"epoch": 0, "iter": 11751, "iter_tflops": 35.516388072384395, "iter_time": 0.5808894042968751, "loss": 0.7070491909980774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.62828011221422, "step_time": 0.5340929870605469} +{"epoch": 0, "iter": 11752, "iter_tflops": 39.2463284857518, "iter_time": 0.52568212890625, "loss": 0.7345194816589355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56709071463674, "step_time": 0.48467238807678215} +{"epoch": 0, "iter": 11753, "iter_tflops": 16.933188780321736, "iter_time": 1.218382064819336, "loss": 0.0636238381266594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.40625599388542, "step_time": 1.1208739852905274} +{"epoch": 0, "iter": 11754, "iter_tflops": 37.59270197984576, "iter_time": 0.5488058166503906, "loss": 0.06248115003108978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32445437403342, "step_time": 0.42692863845825196} +{"epoch": 0, "iter": 11755, "iter_tflops": 55.367689211666494, "iter_time": 0.3726197319030761, "loss": 0.07961313426494598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.521654375622674, "step_time": 0.34088779830932614} +{"epoch": 0, "iter": 11756, "iter_tflops": 51.121140267280865, "iter_time": 0.40357263946533206, "loss": 0.04559461772441864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.670076869072886, "step_time": 0.37059574317932126} +{"epoch": 0, "iter": 11757, "iter_tflops": 27.234257272747445, "iter_time": 0.7575419921875, "loss": 0.3103278577327728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.010627061877525, "step_time": 0.7111564140319825} +{"epoch": 0, "iter": 11758, "iter_tflops": 20.735580923793027, "iter_time": 0.9949609603881836, "loss": 0.20497365295886993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.17245735069088, "step_time": 0.890328254699707} +{"epoch": 0, "iter": 11759, "iter_tflops": 39.47806878257957, "iter_time": 0.522596321105957, "loss": 0.2230791449546814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36240270894876, "step_time": 0.47578298759460447} +{"epoch": 0, "iter": 11760, "iter_tflops": 41.39270516223986, "iter_time": 0.4984234161376954, "loss": 0.22038350999355316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.252592394427694, "step_time": 0.4559096488952637} +{"epoch": 0, "iter": 11761, "iter_tflops": 14.950256871482278, "iter_time": 1.3799825439453128, "loss": 0.5242682099342346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.043185344157187, "step_time": 1.2859723968505858} +{"epoch": 0, "iter": 11762, "iter_tflops": 18.353146183051244, "iter_time": 1.1241175384521485, "loss": 0.5225235819816589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.80696447943904, "step_time": 0.9045962047576904} +{"epoch": 0, "iter": 11763, "iter_tflops": 47.46681105329019, "iter_time": 0.4346425018310547, "loss": 0.42806848883628845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32421712704862, "step_time": 0.40197580528259275} +{"epoch": 0, "iter": 11764, "iter_tflops": 46.8810499671279, "iter_time": 0.44007319641113285, "loss": 0.4490341544151306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.92232643472676, "step_time": 0.4051482906341553} +{"epoch": 0, "iter": 11765, "iter_tflops": 25.424439718628474, "iter_time": 0.8114669876098634, "loss": 0.5196148753166199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.8369050313703, "step_time": 0.76875830078125} +{"epoch": 0, "iter": 11766, "iter_tflops": 16.708974152311203, "iter_time": 1.2347313079833984, "loss": 0.677012026309967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.010252162843557, "step_time": 1.0310261631011963} +{"epoch": 0, "iter": 11767, "iter_tflops": 39.083617651819964, "iter_time": 0.5278706207275391, "loss": 0.569564938545227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83989496083436, "step_time": 0.48158599662780766} +{"epoch": 0, "iter": 11768, "iter_tflops": 38.90650097193779, "iter_time": 0.530273681640625, "loss": 0.5942138433456421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.4508340155368, "step_time": 0.4859997215270995} +{"epoch": 0, "iter": 11769, "iter_tflops": 15.909218624886554, "iter_time": 0.9936082839965821, "loss": 0.03359876945614815, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 16.96067152199073, "step_time": 0.9320109405517578} +{"epoch": 0, "iter": 11770, "iter_tflops": 9.750789284783215, "iter_time": 1.6211540374755862, "loss": 0.036116402596235275, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.384524893215277, "step_time": 1.1810304470062256} +{"epoch": 0, "iter": 11771, "iter_tflops": 39.469195064591595, "iter_time": 0.4005030097961426, "loss": 0.023050757125020027, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 43.35607745180481, "step_time": 0.364597822189331} +{"epoch": 0, "iter": 11772, "iter_tflops": 41.114530339525906, "iter_time": 0.3844755439758301, "loss": 0.024088764563202858, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 44.755842273143344, "step_time": 0.35319481468200686} +{"epoch": 0, "iter": 11773, "iter_tflops": 45.0679752105616, "iter_time": 0.4577772445678711, "loss": 0.5597299337387085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38242043000306, "step_time": 0.4177821445465088} +{"epoch": 0, "iter": 11774, "iter_tflops": 40.32329915299119, "iter_time": 0.5116420021057129, "loss": 0.548755407333374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.679304601692714, "step_time": 0.47233108901977544} +{"epoch": 0, "iter": 11775, "iter_tflops": 40.90207946071951, "iter_time": 0.5044020690917969, "loss": 0.5935770869255066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82502834946016, "step_time": 0.4707605285644531} +{"epoch": 0, "iter": 11776, "iter_tflops": 50.37558464335644, "iter_time": 0.4095454902648926, "loss": 0.7809045910835266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3794386629085, "step_time": 0.3793914394378662} +{"epoch": 0, "iter": 11777, "iter_tflops": 25.502528422230654, "iter_time": 0.8089822769165038, "loss": 0.18219919502735138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.953810185518204, "step_time": 0.7654240112304688} +{"epoch": 0, "iter": 11778, "iter_tflops": 16.145876795594052, "iter_time": 1.2777933197021483, "loss": 0.11857695877552032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.938512138822908, "step_time": 1.0893724575042725} +{"epoch": 0, "iter": 11779, "iter_tflops": 37.21588001668004, "iter_time": 0.5543626403808594, "loss": 0.16615308821201324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.941076690182015, "step_time": 0.5039216156005859} +{"epoch": 0, "iter": 11780, "iter_tflops": 40.446228971490626, "iter_time": 0.5100869483947754, "loss": 0.12862727046012878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21863919108702, "step_time": 0.466570068359375} +{"epoch": 0, "iter": 11781, "iter_tflops": 22.752176020845926, "iter_time": 0.9067745208740234, "loss": 0.04666697978973389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.016166128508484, "step_time": 0.8247104454040527} +{"epoch": 0, "iter": 11782, "iter_tflops": 45.88655717756668, "iter_time": 0.44961083984375, "loss": 0.014681501314043999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.881054152552316, "step_time": 0.3976614170074463} +{"epoch": 0, "iter": 11783, "iter_tflops": 52.42286509625062, "iter_time": 0.39355142974853513, "loss": 0.06255500763654709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.47715127310149, "step_time": 0.35894425964355464} +{"epoch": 0, "iter": 11784, "iter_tflops": 56.70532318001892, "iter_time": 0.3638299255371094, "loss": 0.028710195794701576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.91071299296463, "step_time": 0.33323947525024417} +{"epoch": 0, "iter": 11785, "iter_tflops": 26.43300113309784, "iter_time": 0.7805051498413086, "loss": 0.04958736151456833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.76787937332268, "step_time": 0.7429841232299804} +{"epoch": 0, "iter": 11786, "iter_tflops": 12.37982574422488, "iter_time": 1.6665092010498048, "loss": 0.09213126450777054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.48248786068781, "step_time": 1.4245545177459715} +{"epoch": 0, "iter": 11787, "iter_tflops": 42.496546471279025, "iter_time": 0.48547694396972657, "loss": 0.05906755104660988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58209730530019, "step_time": 0.44289748001098633} +{"epoch": 0, "iter": 11788, "iter_tflops": 46.51537245960438, "iter_time": 0.4435328025817872, "loss": 0.08867251127958298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.237502812882695, "step_time": 0.4026561088562012} +{"epoch": 0, "iter": 11789, "iter_tflops": 11.656650081622887, "iter_time": 1.1007177734375, "loss": 0.027300547808408737, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 12.582537940522503, "step_time": 1.0197212982177735} +{"epoch": 0, "iter": 11790, "iter_tflops": 17.64000865193116, "iter_time": 0.727362564086914, "loss": 0.03648366779088974, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 24.530029731822765, "step_time": 0.5230601863861084} +{"epoch": 0, "iter": 11791, "iter_tflops": 34.458733228650516, "iter_time": 0.37234920501708985, "loss": 0.019539538770914078, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 37.88801361617523, "step_time": 0.3386475219726562} +{"epoch": 0, "iter": 11792, "iter_tflops": 36.55136494748605, "iter_time": 0.35103153991699215, "loss": 0.03836173936724663, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 39.84344916267887, "step_time": 0.32202738952636717} +{"epoch": 0, "iter": 11793, "iter_tflops": 24.459093770866808, "iter_time": 0.8434937820434572, "loss": 0.021718809381127357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.779673664324427, "step_time": 0.8002852859497072} +{"epoch": 0, "iter": 11794, "iter_tflops": 11.746163656425223, "iter_time": 1.7564112091064452, "loss": 0.01393272913992405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.616218034894084, "step_time": 1.4115206451416016} +{"epoch": 0, "iter": 11795, "iter_tflops": 41.614358754502014, "iter_time": 0.4957686271667481, "loss": 0.06168016418814659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.011136824685636, "step_time": 0.4483934745788574} +{"epoch": 0, "iter": 11796, "iter_tflops": 43.675855155226664, "iter_time": 0.4723683929443359, "loss": 0.025284865871071815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.95359705838253, "step_time": 0.4302303638458252} +{"epoch": 0, "iter": 11797, "iter_tflops": 28.108263439733516, "iter_time": 0.7339867706298829, "loss": 0.2544953227043152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.939698708514957, "step_time": 0.6668162384033203} +{"epoch": 0, "iter": 11798, "iter_tflops": 35.208861495198946, "iter_time": 0.5859630966186523, "loss": 0.11941073089838028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.368866297659494, "step_time": 0.5240459136962891} +{"epoch": 0, "iter": 11799, "iter_tflops": 39.497802188204595, "iter_time": 0.5223352279663086, "loss": 0.1148369163274765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23571575639421, "step_time": 0.47717710113525386} +{"epoch": 0, "iter": 11800, "iter_tflops": 37.3971595393338, "iter_time": 0.5516754150390625, "loss": 0.18693727254867554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73484386681715, "step_time": 0.5064728755950927} +{"epoch": 0, "iter": 11801, "iter_tflops": 19.484915140547855, "iter_time": 1.0588238830566405, "loss": 0.6813135743141174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.795337200456636, "step_time": 0.9921018981933594} +{"epoch": 0, "iter": 11802, "iter_tflops": 27.650344049294684, "iter_time": 0.746142379760742, "loss": 0.6866151094436646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.000456823639254, "step_time": 0.6655093383789062} +{"epoch": 0, "iter": 11803, "iter_tflops": 33.97184324790752, "iter_time": 0.6072997970581054, "loss": 0.7135671377182007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.82527567680068, "step_time": 0.5602427444458008} +{"epoch": 0, "iter": 11804, "iter_tflops": 40.06379976176173, "iter_time": 0.5149559860229492, "loss": 0.8627698421478271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71543321616828, "step_time": 0.471940731048584} +{"epoch": 0, "iter": 11805, "iter_tflops": 21.298017248598438, "iter_time": 0.9686861114501953, "loss": 0.022284502163529396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.065472213960195, "step_time": 0.8944578857421875} +{"epoch": 0, "iter": 11806, "iter_tflops": 15.2085008582715, "iter_time": 1.3565501098632813, "loss": 0.03802163153886795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.430139186945798, "step_time": 1.061808837890625} +{"epoch": 0, "iter": 11807, "iter_tflops": 52.68260615965151, "iter_time": 0.39161110305786134, "loss": 0.050749849528074265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.570227187155616, "step_time": 0.35836394119262693} +{"epoch": 0, "iter": 11808, "iter_tflops": 58.078180123965055, "iter_time": 0.3552296829223633, "loss": 0.01714373379945755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.347317305112966, "step_time": 0.3256821975708008} +{"epoch": 0, "iter": 11809, "iter_tflops": 36.7872255769616, "iter_time": 0.5608222198486328, "loss": 0.17552104592323303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.45112047644872, "step_time": 0.5229532966613769} +{"epoch": 0, "iter": 11810, "iter_tflops": 10.496232634728923, "iter_time": 1.9655712890625, "loss": 0.18985414505004883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.09501483014972, "step_time": 1.5754921836853029} +{"epoch": 0, "iter": 11811, "iter_tflops": 40.13013403971672, "iter_time": 0.5141047744750977, "loss": 0.1650828719139099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3396995037081, "step_time": 0.4652961959838867} +{"epoch": 0, "iter": 11812, "iter_tflops": 38.82096686617609, "iter_time": 0.5314420318603515, "loss": 0.13911595940589905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.358175400559404, "step_time": 0.4870628471374512} +{"epoch": 0, "iter": 11813, "iter_tflops": 26.399724831837283, "iter_time": 0.7814889602661133, "loss": 0.529812216758728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.073919726958536, "step_time": 0.7096082572937013} +{"epoch": 0, "iter": 11814, "iter_tflops": 36.500872737586164, "iter_time": 0.565221923828125, "loss": 0.6348938941955566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.90699498296159, "step_time": 0.5169793796539307} +{"epoch": 0, "iter": 11815, "iter_tflops": 39.00813118267077, "iter_time": 0.5288921279907226, "loss": 0.49038031697273254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.714845182468515, "step_time": 0.4829958629608154} +{"epoch": 0, "iter": 11816, "iter_tflops": 37.37219157947512, "iter_time": 0.5520439834594727, "loss": 0.6462563276290894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87427372214583, "step_time": 0.5047452011108398} +{"epoch": 0, "iter": 11817, "iter_tflops": 20.85081016305383, "iter_time": 0.9894624404907228, "loss": 0.19744572043418884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.697115162928927, "step_time": 0.908974262237549} +{"epoch": 0, "iter": 11818, "iter_tflops": 16.039129196154605, "iter_time": 1.286297607421875, "loss": 0.14813734591007233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.22118232938622, "step_time": 0.9721934051513671} +{"epoch": 0, "iter": 11819, "iter_tflops": 49.043355909574274, "iter_time": 0.42067050933837896, "loss": 0.23958714306354523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.72134333305393, "step_time": 0.3840390472412109} +{"epoch": 0, "iter": 11820, "iter_tflops": 47.515984765230876, "iter_time": 0.43419269561767576, "loss": 0.19619625806808472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21578263701902, "step_time": 0.4028268718719482} +{"epoch": 0, "iter": 11821, "iter_tflops": 19.687430070126496, "iter_time": 1.0479322814941407, "loss": 0.1425335556268692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.445902817341278, "step_time": 1.0090575942993165} +{"epoch": 0, "iter": 11822, "iter_tflops": 24.73929740128562, "iter_time": 0.8339401550292969, "loss": 0.18576815724372864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.02686111877799, "step_time": 0.5890077743530273} +{"epoch": 0, "iter": 11823, "iter_tflops": 41.94661787951559, "iter_time": 0.4918416442871093, "loss": 0.18346646428108215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92202590317439, "step_time": 0.44926357460021976} +{"epoch": 0, "iter": 11824, "iter_tflops": 39.18726680668234, "iter_time": 0.5264744186401367, "loss": 0.1245904341340065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37148528715317, "step_time": 0.4869098491668702} +{"epoch": 0, "iter": 11825, "iter_tflops": 16.801600465814406, "iter_time": 1.227924301147461, "loss": 0.7764800190925598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.930230515933555, "step_time": 1.150631805419922} +{"epoch": 0, "iter": 11826, "iter_tflops": 18.403826914934754, "iter_time": 1.121021926879883, "loss": 0.7000100016593933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.645667204753135, "step_time": 0.9110393314361573} +{"epoch": 0, "iter": 11827, "iter_tflops": 37.16459326648967, "iter_time": 0.555127655029297, "loss": 0.5841628909111023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.6807560910014, "step_time": 0.5071462650299072} +{"epoch": 0, "iter": 11828, "iter_tflops": 37.25267310557676, "iter_time": 0.5538151168823242, "loss": 0.6964136362075806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8001240281401, "step_time": 0.505662519454956} +{"epoch": 0, "iter": 11829, "iter_tflops": 16.24436121904131, "iter_time": 1.2700464630126953, "loss": 0.5097148418426514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.55168053912272, "step_time": 1.17544832611084} +{"epoch": 0, "iter": 11830, "iter_tflops": 18.569477458386523, "iter_time": 1.1110217590332032, "loss": 0.4487631022930145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.91921745832139, "step_time": 0.9862268295288086} +{"epoch": 0, "iter": 11831, "iter_tflops": 46.67891079361256, "iter_time": 0.4419788970947266, "loss": 0.45438894629478455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24575007609528, "step_time": 0.41060375213623046} +{"epoch": 0, "iter": 11832, "iter_tflops": 45.27369618191263, "iter_time": 0.45569713211059565, "loss": 0.43458637595176697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87523940162858, "step_time": 0.42211749267578125} +{"epoch": 0, "iter": 11833, "iter_tflops": 18.960630858907226, "iter_time": 1.0881016387939455, "loss": 0.18484027683734894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.68189567981464, "step_time": 1.048226951599121} +{"epoch": 0, "iter": 11834, "iter_tflops": 15.643930061028085, "iter_time": 1.318792236328125, "loss": 0.15258094668388367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.616132049933082, "step_time": 0.9122290878295899} +{"epoch": 0, "iter": 11835, "iter_tflops": 45.93286347193181, "iter_time": 0.4491575736999512, "loss": 0.19231779873371124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.86871896174323, "step_time": 0.4137081108093261} +{"epoch": 0, "iter": 11836, "iter_tflops": 45.74260875107424, "iter_time": 0.4510257301330567, "loss": 0.22207340598106384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.254712620928714, "step_time": 0.4188653717041016} +{"epoch": 0, "iter": 11837, "iter_tflops": 26.846593099817774, "iter_time": 0.7684808807373047, "loss": 0.5877155065536499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.368184465648206, "step_time": 0.7272616806030274} +{"epoch": 0, "iter": 11838, "iter_tflops": 15.079274962314317, "iter_time": 1.3681754302978515, "loss": 0.4581849277019501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.561662315136164, "step_time": 1.1114895401000977} +{"epoch": 0, "iter": 11839, "iter_tflops": 40.05707989353613, "iter_time": 0.5150423736572265, "loss": 0.5164551734924316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68976473891541, "step_time": 0.47221800422668453} +{"epoch": 0, "iter": 11840, "iter_tflops": 41.5043633592249, "iter_time": 0.49708251953125004, "loss": 0.6254180073738098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.102869702442824, "step_time": 0.45742307853698727} +{"epoch": 0, "iter": 11841, "iter_tflops": 19.826066161577383, "iter_time": 1.0406044921875, "loss": 0.03905528783798218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.43237290388968, "step_time": 0.9626135940551758} +{"epoch": 0, "iter": 11842, "iter_tflops": 21.633838564982376, "iter_time": 0.9536492309570314, "loss": 0.02672826126217842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.39005140978701, "step_time": 0.8458815097808838} +{"epoch": 0, "iter": 11843, "iter_tflops": 52.357142047909505, "iter_time": 0.39404544830322263, "loss": 0.0306351650506258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.18337653429315, "step_time": 0.36078830528259276} +{"epoch": 0, "iter": 11844, "iter_tflops": 59.03302954271681, "iter_time": 0.34948390197753904, "loss": 0.030650241300463676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.75446596104834, "step_time": 0.3186049518585205} +{"epoch": 0, "iter": 11845, "iter_tflops": 25.80825847456007, "iter_time": 0.7993989028930666, "loss": 0.6420555114746094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.171525194826394, "step_time": 0.7592909622192383} +{"epoch": 0, "iter": 11846, "iter_tflops": 10.295107974731787, "iter_time": 2.0039705810546877, "loss": 0.961644172668457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.00456794981949, "step_time": 1.5864497451782225} +{"epoch": 0, "iter": 11847, "iter_tflops": 12.221370631618486, "iter_time": 1.688116180419922, "loss": 0.700803279876709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.129072404397032, "step_time": 1.460187400817871} +{"epoch": 0, "iter": 11848, "iter_tflops": 21.651752136972952, "iter_time": 0.9528602294921875, "loss": 0.7635342478752136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.262347586054194, "step_time": 0.850333770751953} +{"epoch": 0, "iter": 11849, "iter_tflops": 15.686294017919478, "iter_time": 1.0416923828125, "loss": 0.34728410840034485, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 16.813016308707503, "step_time": 0.9718834915161133} +{"epoch": 0, "iter": 11850, "iter_tflops": 27.11208974006736, "iter_time": 0.6026939697265625, "loss": 0.40622514486312866, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 28.839898627622432, "step_time": 0.5665863533020019} +{"epoch": 0, "iter": 11851, "iter_tflops": 28.7760211716551, "iter_time": 0.5678440704345703, "loss": 0.26794326305389404, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.669525758016775, "step_time": 0.5327859687805175} +{"epoch": 0, "iter": 11852, "iter_tflops": 28.97998680138845, "iter_time": 0.5638474960327149, "loss": 0.5287423133850098, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.880777282014368, "step_time": 0.5291412467956544} +{"epoch": 0, "iter": 11853, "iter_tflops": 30.292438729146298, "iter_time": 0.6810641326904298, "loss": 0.5060595870018005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.24073181119109, "step_time": 0.639907730102539} +{"epoch": 0, "iter": 11854, "iter_tflops": 14.983187614657576, "iter_time": 1.3769495544433594, "loss": 0.534367024898529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.948116646345134, "step_time": 1.1494851474761962} +{"epoch": 0, "iter": 11855, "iter_tflops": 38.80666196892896, "iter_time": 0.5316379318237305, "loss": 0.6086015701293945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.555462097722454, "step_time": 0.4848048286437988} +{"epoch": 0, "iter": 11856, "iter_tflops": 40.6215661534173, "iter_time": 0.5078852310180664, "loss": 0.5140560269355774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.233654979664536, "step_time": 0.4664116840362549} +{"epoch": 0, "iter": 11857, "iter_tflops": 29.3255244163348, "iter_time": 0.70352001953125, "loss": 0.07872577756643295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.585846123698914, "step_time": 0.6331305141448975} +{"epoch": 0, "iter": 11858, "iter_tflops": 39.44646335087342, "iter_time": 0.523015037536621, "loss": 0.10606855899095535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.249820834727494, "step_time": 0.4662412891387939} +{"epoch": 0, "iter": 11859, "iter_tflops": 42.07619733539479, "iter_time": 0.49032695007324223, "loss": 0.03672763332724571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20886285178035, "step_time": 0.4464748153686523} +{"epoch": 0, "iter": 11860, "iter_tflops": 40.8258057782728, "iter_time": 0.5053444290161133, "loss": 0.04882929101586342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9005851795535, "step_time": 0.45948384475708004} +{"epoch": 0, "iter": 11861, "iter_tflops": 21.540989460594815, "iter_time": 0.9577597885131837, "loss": 0.21850819885730743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.200692419050178, "step_time": 0.8892447319030762} +{"epoch": 0, "iter": 11862, "iter_tflops": 16.80195081421611, "iter_time": 1.227898696899414, "loss": 0.16951268911361694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.801761643835203, "step_time": 1.041881721496582} +{"epoch": 0, "iter": 11863, "iter_tflops": 47.4544579506506, "iter_time": 0.43475564575195313, "loss": 0.11004019528627396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57729231360759, "step_time": 0.4000034236907959} +{"epoch": 0, "iter": 11864, "iter_tflops": 54.38878653834232, "iter_time": 0.3793262329101562, "loss": 0.1980496644973755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.25317116099579, "step_time": 0.348185474395752} +{"epoch": 0, "iter": 11865, "iter_tflops": 31.663768133107176, "iter_time": 0.651567855834961, "loss": 0.47999489307403564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.684825468209766, "step_time": 0.6124744071960448} +{"epoch": 0, "iter": 11866, "iter_tflops": 13.474778959131596, "iter_time": 1.531089569091797, "loss": 0.8247668743133545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.74358823702295, "step_time": 1.3104441757202148} +{"epoch": 0, "iter": 11867, "iter_tflops": 44.88525909705529, "iter_time": 0.4596407356262207, "loss": 0.7034080028533936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.43326812873679, "step_time": 0.42596946907043454} +{"epoch": 0, "iter": 11868, "iter_tflops": 46.44738686536729, "iter_time": 0.4441820068359375, "loss": 0.8258987665176392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1091686862368, "step_time": 0.4117229251861573} +{"epoch": 0, "iter": 11869, "iter_tflops": 29.947329222797077, "iter_time": 0.6889126358032226, "loss": 0.028562430292367935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.89286927624815, "step_time": 0.6468873443603514} +{"epoch": 0, "iter": 11870, "iter_tflops": 23.13226923920725, "iter_time": 0.8918750381469727, "loss": 0.030483653768897057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.9669466591516, "step_time": 0.7376956005096436} +{"epoch": 0, "iter": 11871, "iter_tflops": 54.56960559844831, "iter_time": 0.3780693168640137, "loss": 0.05480147525668144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.45767848819288, "step_time": 0.3469878749847412} +{"epoch": 0, "iter": 11872, "iter_tflops": 55.63914470866767, "iter_time": 0.37080177307128914, "loss": 0.03582393750548363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.36347143349375, "step_time": 0.3417810974121094} +{"epoch": 0, "iter": 11873, "iter_tflops": 41.02510640090485, "iter_time": 0.5028894577026366, "loss": 0.48454225063323975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02274951126641, "step_time": 0.4582370853424072} +{"epoch": 0, "iter": 11874, "iter_tflops": 9.469965771870436, "iter_time": 2.1785816345214846, "loss": 0.4187893867492676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.593643350054528, "step_time": 1.9474974594116212} +{"epoch": 0, "iter": 11875, "iter_tflops": 10.40372657684595, "iter_time": 1.9830484161376953, "loss": 0.4190865457057953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.397021312537174, "step_time": 1.6641976318359375} +{"epoch": 0, "iter": 11876, "iter_tflops": 22.455714358365043, "iter_time": 0.9187458114624023, "loss": 0.4562190771102905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.19720822318066, "step_time": 0.758573944091797} +{"epoch": 0, "iter": 11877, "iter_tflops": 16.66662811940739, "iter_time": 0.9484540786743164, "loss": 0.2856481075286865, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 17.575500867098008, "step_time": 0.8994071655273438} +{"epoch": 0, "iter": 11878, "iter_tflops": 11.05475720373328, "iter_time": 1.429930221557617, "loss": 0.28638163208961487, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.071186874897462, "step_time": 1.309525863647461} +{"epoch": 0, "iter": 11879, "iter_tflops": 22.453630193180256, "iter_time": 0.7040078277587891, "loss": 0.3761961758136749, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.329358890393774, "step_time": 0.6497307014465331} +{"epoch": 0, "iter": 11880, "iter_tflops": 23.8812301567827, "iter_time": 0.6619228286743164, "loss": 0.36053961515426636, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.683364957447104, "step_time": 0.6154774284362793} +{"epoch": 0, "iter": 11881, "iter_tflops": 20.17863603008332, "iter_time": 1.0224225997924805, "loss": 0.6429924964904785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.48820914099747, "step_time": 0.9601122817993164} +{"epoch": 0, "iter": 11882, "iter_tflops": 15.453679391782506, "iter_time": 1.3350279235839844, "loss": 0.7286688089370728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.910011429815487, "step_time": 1.0362170600891114} +{"epoch": 0, "iter": 11883, "iter_tflops": 37.235407469309315, "iter_time": 0.5540719146728516, "loss": 0.5300812125205994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40268563275569, "step_time": 0.5106366863250732} +{"epoch": 0, "iter": 11884, "iter_tflops": 42.68201665242168, "iter_time": 0.4833673553466797, "loss": 0.7065533399581909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65294008434673, "step_time": 0.44222493743896485} +{"epoch": 0, "iter": 11885, "iter_tflops": 32.69566409915891, "iter_time": 0.6310039596557617, "loss": 0.0025396188721060753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.504715759735994, "step_time": 0.5651624202728271} +{"epoch": 0, "iter": 11886, "iter_tflops": 41.014252185956316, "iter_time": 0.5030225448608399, "loss": 0.009100314229726791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.66102528274912, "step_time": 0.45183158683776864} +{"epoch": 0, "iter": 11887, "iter_tflops": 46.53822821070808, "iter_time": 0.4433149757385254, "loss": 0.007739978842437267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.545589797774404, "step_time": 0.40024944114685057} +{"epoch": 0, "iter": 11888, "iter_tflops": 49.462931826485175, "iter_time": 0.4171021156311035, "loss": 0.0026220115832984447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.44697402320686, "step_time": 0.3789208469390869} +{"epoch": 0, "iter": 11889, "iter_tflops": 22.58301444056095, "iter_time": 0.9135668563842774, "loss": 0.5125659108161926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.898508571015235, "step_time": 0.8632795410156249} +{"epoch": 0, "iter": 11890, "iter_tflops": 12.840484174854357, "iter_time": 1.6067223968505857, "loss": 0.6281924843788147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.229321795116302, "step_time": 1.2712233924865721} +{"epoch": 0, "iter": 11891, "iter_tflops": 38.910069854032564, "iter_time": 0.5302250442504883, "loss": 0.7876259684562683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41335818099367, "step_time": 0.4864291439056396} +{"epoch": 0, "iter": 11892, "iter_tflops": 36.66843094064431, "iter_time": 0.5626391143798828, "loss": 0.404666006565094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05262995624711, "step_time": 0.5150995960235596} +{"epoch": 0, "iter": 11893, "iter_tflops": 1.8903474630668033, "iter_time": 0.8083102035522461, "loss": 0.11708009988069534, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.031381130755706, "step_time": 0.7521912651062012} +{"epoch": 0, "iter": 11894, "iter_tflops": 0.6375457384488341, "iter_time": 2.396670623779297, "loss": 0.09153670072555542, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 0.726256408029143, "step_time": 2.1039224243164063} +{"epoch": 0, "iter": 11895, "iter_tflops": 0.8711895612031111, "iter_time": 1.753908920288086, "loss": 0.14519378542900085, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.0990980725441353, "step_time": 1.390219108581543} +{"epoch": 0, "iter": 11896, "iter_tflops": 2.5802002227506384, "iter_time": 0.5921971206665039, "loss": 0.36126989126205444, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.983785366793742, "step_time": 0.5120968685150147} +{"epoch": 0, "iter": 11897, "iter_tflops": 20.933651233965993, "iter_time": 0.743392074584961, "loss": 0.4973095953464508, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 22.85313287928514, "step_time": 0.6809530448913573} +{"epoch": 0, "iter": 11898, "iter_tflops": 26.81467661212698, "iter_time": 0.5803504791259765, "loss": 0.29982471466064453, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 28.58349239260888, "step_time": 0.5444369850158691} +{"epoch": 0, "iter": 11899, "iter_tflops": 26.213108740854004, "iter_time": 0.5936690139770509, "loss": 0.3201262354850769, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 27.8343689153602, "step_time": 0.5590897521972656} +{"epoch": 0, "iter": 11900, "iter_tflops": 28.415572576084354, "iter_time": 0.547654296875, "loss": 0.5126838684082031, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 30.223531446789828, "step_time": 0.5148938484191895} +{"epoch": 0, "iter": 11901, "iter_tflops": 29.664894112927023, "iter_time": 0.6954716720581056, "loss": 0.6482717990875244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.63351560252176, "step_time": 0.6521909790039062} +{"epoch": 0, "iter": 11902, "iter_tflops": 42.745621720659045, "iter_time": 0.4826481094360352, "loss": 0.8621408343315125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.32515893261594, "step_time": 0.4453539714813233} +{"epoch": 0, "iter": 11903, "iter_tflops": 42.658167008700914, "iter_time": 0.4836375999450684, "loss": 0.6882299184799194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92211402615571, "step_time": 0.4492627124786377} +{"epoch": 0, "iter": 11904, "iter_tflops": 46.48452869094048, "iter_time": 0.4438270988464355, "loss": 0.6983724236488342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96095345626176, "step_time": 0.41294435119628903} +{"epoch": 0, "iter": 11905, "iter_tflops": 25.34759280999602, "iter_time": 0.813927131652832, "loss": 0.046564679592847824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.677253237202684, "step_time": 0.7733589859008789} +{"epoch": 0, "iter": 11906, "iter_tflops": 19.235071014941244, "iter_time": 1.072576934814453, "loss": 0.06985481083393097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.165166620354203, "step_time": 0.8198274154663087} +{"epoch": 0, "iter": 11907, "iter_tflops": 50.11127262184973, "iter_time": 0.41170563888549805, "loss": 0.10444913804531097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.595694952273504, "step_time": 0.377888650894165} +{"epoch": 0, "iter": 11908, "iter_tflops": 50.27669595964363, "iter_time": 0.4103510208129883, "loss": 0.061216358095407486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.750525739449024, "step_time": 0.3768200073242188} +{"epoch": 0, "iter": 11909, "iter_tflops": 29.648687600798887, "iter_time": 0.6958518295288086, "loss": 0.14785128831863403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.380598203289942, "step_time": 0.6574474258422851} +{"epoch": 0, "iter": 11910, "iter_tflops": 10.97192924641428, "iter_time": 1.8803524017333986, "loss": 0.17343087494373322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.016723045565586, "step_time": 1.471891357421875} +{"epoch": 0, "iter": 11911, "iter_tflops": 14.366988674619124, "iter_time": 1.436006805419922, "loss": 0.1468902975320816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.814395930483403, "step_time": 1.2269898719787597} +{"epoch": 0, "iter": 11912, "iter_tflops": 20.48800665470802, "iter_time": 1.006983932495117, "loss": 0.1626143902540207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.185406521371128, "step_time": 0.7589032554626465} +{"epoch": 0, "iter": 11913, "iter_tflops": 18.162828326815884, "iter_time": 0.951659309387207, "loss": 0.410418301820755, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 19.340947954101566, "step_time": 0.8936906661987306} +{"epoch": 0, "iter": 11914, "iter_tflops": 11.394083609674308, "iter_time": 1.5169999847412108, "loss": 0.44960543513298035, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 12.438628119384513, "step_time": 1.3896086044311524} +{"epoch": 0, "iter": 11915, "iter_tflops": 11.50565622379557, "iter_time": 1.5022893371582031, "loss": 0.247437983751297, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 15.255212720870501, "step_time": 1.133043830871582} +{"epoch": 0, "iter": 11916, "iter_tflops": 29.11367187218588, "iter_time": 0.5937012939453125, "loss": 0.23044857382774353, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 30.889982025489104, "step_time": 0.5595608520507813} +{"epoch": 0, "iter": 11917, "iter_tflops": 17.18791885978224, "iter_time": 0.912542106628418, "loss": 0.432435005903244, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 18.0607563486789, "step_time": 0.8684409103393556} +{"epoch": 0, "iter": 11918, "iter_tflops": 10.43519248069449, "iter_time": 1.5030580139160157, "loss": 0.2682269215583801, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 12.174279938735383, "step_time": 1.2883472175598145} +{"epoch": 0, "iter": 11919, "iter_tflops": 27.850613942173418, "iter_time": 0.563172492980957, "loss": 0.3378448486328125, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.684464915201897, "step_time": 0.528380744934082} +{"epoch": 0, "iter": 11920, "iter_tflops": 27.706633268541474, "iter_time": 0.5660990829467774, "loss": 0.42505529522895813, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.543761608888115, "step_time": 0.5308971786499024} +{"epoch": 0, "iter": 11921, "iter_tflops": 36.319484081940374, "iter_time": 0.5680447845458985, "loss": 0.7701130509376526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.100190107901234, "step_time": 0.5276468849182129} +{"epoch": 0, "iter": 11922, "iter_tflops": 29.564559907896005, "iter_time": 0.6978319168090821, "loss": 0.7407438158988953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.868586518626465, "step_time": 0.5751855735778809} +{"epoch": 0, "iter": 11923, "iter_tflops": 34.54951593604601, "iter_time": 0.5971456604003905, "loss": 0.5940396189689636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.535074823829326, "step_time": 0.5496483917236328} +{"epoch": 0, "iter": 11924, "iter_tflops": 35.926167191627734, "iter_time": 0.5742636947631836, "loss": 0.6873631477355957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29895888520273, "step_time": 0.5249781188964844} +{"epoch": 0, "iter": 11925, "iter_tflops": 1.7813532271774741, "iter_time": 0.8136815414428711, "loss": 0.6830791234970093, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.9309998454321426, "step_time": 0.7506236953735351} +{"epoch": 0, "iter": 11926, "iter_tflops": 1.2846001479108784, "iter_time": 1.1283310546875, "loss": 0.4957459270954132, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.7249291351023697, "step_time": 0.8402978477478027} +{"epoch": 0, "iter": 11927, "iter_tflops": 3.456947853356577, "iter_time": 0.4192872734069824, "loss": 0.6330955624580383, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.762771438241149, "step_time": 0.3852092170715332} +{"epoch": 0, "iter": 11928, "iter_tflops": 3.2798736684460734, "iter_time": 0.4419238014221191, "loss": 0.6928502917289734, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.5198970976762194, "step_time": 0.4117888107299804} +{"epoch": 0, "iter": 11929, "iter_tflops": 1.5096758636386713, "iter_time": 0.8561076507568359, "loss": 2.378520965576172, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 1.5988652709502367, "step_time": 0.808351448059082} +{"epoch": 0, "iter": 11930, "iter_tflops": 2.911524428486085, "iter_time": 0.44390665054321293, "loss": 2.3944408893585205, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 3.1932535865650675, "step_time": 0.4047423801422119} +{"epoch": 0, "iter": 11931, "iter_tflops": 3.0727973620149474, "iter_time": 0.4206086196899414, "loss": 2.3801469802856445, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 3.324793311383487, "step_time": 0.3887294445037842} +{"epoch": 0, "iter": 11932, "iter_tflops": 3.0384665560473283, "iter_time": 0.4253609619140625, "loss": 2.6398885250091553, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 3.3019133178054827, "step_time": 0.39142307281494143} +{"epoch": 0, "iter": 11933, "iter_tflops": 26.189188764571426, "iter_time": 0.787771385192871, "loss": 0.6079895496368408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.628662533777053, "step_time": 0.746727912902832} +{"epoch": 0, "iter": 11934, "iter_tflops": 13.837207340981394, "iter_time": 1.4909868011474607, "loss": 0.5462476015090942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.42381886635267, "step_time": 1.2561690845489502} +{"epoch": 0, "iter": 11935, "iter_tflops": 47.95394210030029, "iter_time": 0.43022726821899415, "loss": 0.6539013981819153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99989920921805, "step_time": 0.39675256729125974} +{"epoch": 0, "iter": 11936, "iter_tflops": 46.27951382050575, "iter_time": 0.4457932205200195, "loss": 0.6312940120697021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22230611524273, "step_time": 0.41079542350769044} +{"epoch": 0, "iter": 11937, "iter_tflops": 38.938315932305564, "iter_time": 0.5298404159545897, "loss": 0.17969289422035217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.24567630466815, "step_time": 0.4883598823547363} +{"epoch": 0, "iter": 11938, "iter_tflops": 45.89072641472341, "iter_time": 0.4495699920654297, "loss": 0.1068454384803772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.592751926763796, "step_time": 0.41601025772094724} +{"epoch": 0, "iter": 11939, "iter_tflops": 48.56282128552352, "iter_time": 0.42483309173583983, "loss": 0.12165122479200363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74461589822817, "step_time": 0.3911507015228271} +{"epoch": 0, "iter": 11940, "iter_tflops": 50.34543550061088, "iter_time": 0.40979074478149413, "loss": 0.08504242449998856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.856151249933205, "step_time": 0.37609444046020507} +{"epoch": 0, "iter": 11941, "iter_tflops": 28.618438522534884, "iter_time": 0.7209021377563476, "loss": 0.12916319072246552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.29382567920754, "step_time": 0.6810329513549805} +{"epoch": 0, "iter": 11942, "iter_tflops": 13.55217961318313, "iter_time": 1.5223450469970703, "loss": 0.10141818225383759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.91313444328021, "step_time": 1.0360545482635497} +{"epoch": 0, "iter": 11943, "iter_tflops": 41.24464155883143, "iter_time": 0.5002126998901367, "loss": 0.15061978995800018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.220793975886764, "step_time": 0.4562302360534668} +{"epoch": 0, "iter": 11944, "iter_tflops": 39.45431650212714, "iter_time": 0.5229109344482422, "loss": 0.10348164290189743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.07227272962945, "step_time": 0.47898780822753906} +{"epoch": 0, "iter": 11945, "iter_tflops": 18.353017385868142, "iter_time": 1.1241254272460937, "loss": 0.05476975440979004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.660329063489346, "step_time": 1.0493768157958985} +{"epoch": 0, "iter": 11946, "iter_tflops": 18.660035312607537, "iter_time": 1.105629928588867, "loss": 0.08429588377475739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9693517243049, "step_time": 0.8982009487152101} +{"epoch": 0, "iter": 11947, "iter_tflops": 52.359996343178594, "iter_time": 0.3940239677429199, "loss": 0.029302457347512245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.62607745275796, "step_time": 0.35801662063598627} +{"epoch": 0, "iter": 11948, "iter_tflops": 55.61421017553831, "iter_time": 0.37096802139282226, "loss": 0.032990165054798126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.6965370389985, "step_time": 0.339905611038208} +{"epoch": 0, "iter": 11949, "iter_tflops": 22.437886555787436, "iter_time": 0.9194757919311523, "loss": 0.313518762588501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.510840939963515, "step_time": 0.8775140609741212} +{"epoch": 0, "iter": 11950, "iter_tflops": 17.15889841159928, "iter_time": 1.2023553619384768, "loss": 0.2298550009727478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.13151115842687, "step_time": 0.97631888961792} +{"epoch": 0, "iter": 11951, "iter_tflops": 47.705174633545695, "iter_time": 0.4324707679748535, "loss": 0.24063608050346375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.86408189566817, "step_time": 0.39779154968261715} +{"epoch": 0, "iter": 11952, "iter_tflops": 46.32417945251931, "iter_time": 0.4453633880615234, "loss": 0.28952860832214355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.193247298820616, "step_time": 0.41103324890136717} +{"epoch": 0, "iter": 11953, "iter_tflops": 19.440423627418735, "iter_time": 1.0612471160888672, "loss": 0.03348696976900101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.226669681751545, "step_time": 1.019994583129883} +{"epoch": 0, "iter": 11954, "iter_tflops": 17.286423294256775, "iter_time": 1.1934853820800781, "loss": 0.04473438113927841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.653730636749792, "step_time": 0.9107150535583495} +{"epoch": 0, "iter": 11955, "iter_tflops": 45.31276298008333, "iter_time": 0.45530424880981446, "loss": 0.019907476380467415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04140736310908, "step_time": 0.4122804412841797} +{"epoch": 0, "iter": 11956, "iter_tflops": 41.5847470526152, "iter_time": 0.496121654510498, "loss": 0.043904706835746765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.77843643253676, "step_time": 0.45067274284362796} +{"epoch": 0, "iter": 11957, "iter_tflops": 34.2572821566024, "iter_time": 0.6022396469116211, "loss": 0.5216329097747803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.67246515616881, "step_time": 0.5476438407897949} +{"epoch": 0, "iter": 11958, "iter_tflops": 36.19149978425692, "iter_time": 0.5700535659790039, "loss": 0.5442444086074829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.696448265803895, "step_time": 0.5069507141113282} +{"epoch": 0, "iter": 11959, "iter_tflops": 35.60517099685541, "iter_time": 0.5794409332275391, "loss": 0.6225987076759338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.79403438085372, "step_time": 0.5318109817504882} +{"epoch": 0, "iter": 11960, "iter_tflops": 41.455103229016565, "iter_time": 0.49767319107055663, "loss": 0.6888718605041504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.21112445659084, "step_time": 0.4563278121948242} +{"epoch": 0, "iter": 11961, "iter_tflops": 16.26753598566998, "iter_time": 1.2682371520996094, "loss": 0.06443332880735397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.4027414451293, "step_time": 1.1855082473754883} +{"epoch": 0, "iter": 11962, "iter_tflops": 33.42652063185415, "iter_time": 0.6172073287963866, "loss": 0.05003020539879799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65747157940359, "step_time": 0.547861888885498} +{"epoch": 0, "iter": 11963, "iter_tflops": 48.799992849372565, "iter_time": 0.42276837158203123, "loss": 0.041317351162433624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43296986842074, "step_time": 0.3861116752624512} +{"epoch": 0, "iter": 11964, "iter_tflops": 53.77051061919871, "iter_time": 0.3836878852844238, "loss": 0.068483866751194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.797505746167005, "step_time": 0.3508838214874267} +{"epoch": 0, "iter": 11965, "iter_tflops": 33.32897093586752, "iter_time": 0.6190138168334961, "loss": 0.17991523444652557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.540683138871394, "step_time": 0.580492317199707} +{"epoch": 0, "iter": 11966, "iter_tflops": 18.170808563082325, "iter_time": 1.1353976593017578, "loss": 0.27827808260917664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.959537096138824, "step_time": 0.861080638885498} +{"epoch": 0, "iter": 11967, "iter_tflops": 45.333693800463074, "iter_time": 0.4550940322875977, "loss": 0.28160330653190613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8896340566032, "step_time": 0.4219932079315186} +{"epoch": 0, "iter": 11968, "iter_tflops": 46.72006813959789, "iter_time": 0.441589542388916, "loss": 0.2034429907798767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57374124768236, "step_time": 0.4079408206939697} +{"epoch": 0, "iter": 11969, "iter_tflops": 44.68801397597749, "iter_time": 0.46166950988769534, "loss": 0.33229774236679077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0934074679244, "step_time": 0.4202416286468506} +{"epoch": 0, "iter": 11970, "iter_tflops": 38.61540934539449, "iter_time": 0.5342710037231445, "loss": 0.2656688094139099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91648980952434, "step_time": 0.48072649002075196} +{"epoch": 0, "iter": 11971, "iter_tflops": 46.046043998492955, "iter_time": 0.44805355072021485, "loss": 0.2502346932888031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.83329265149257, "step_time": 0.41400221443176266} +{"epoch": 0, "iter": 11972, "iter_tflops": 50.64419490918236, "iter_time": 0.40737331390380854, "loss": 0.313306987285614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69784656528317, "step_time": 0.3771829204559326} +{"epoch": 0, "iter": 11973, "iter_tflops": 34.5485143836687, "iter_time": 0.597162971496582, "loss": 0.18062636256217957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.250136297146064, "step_time": 0.5538528327941895} +{"epoch": 0, "iter": 11974, "iter_tflops": 42.994464883864644, "iter_time": 0.4798546409606934, "loss": 0.20834288001060486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81034968133886, "step_time": 0.4407378635406494} +{"epoch": 0, "iter": 11975, "iter_tflops": 46.007454072769846, "iter_time": 0.4484293670654297, "loss": 0.18442504107952118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.075632889256596, "step_time": 0.41199865722656254} +{"epoch": 0, "iter": 11976, "iter_tflops": 48.947429356597645, "iter_time": 0.4214949340820313, "loss": 0.293459415435791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77282093335913, "step_time": 0.3909416465759278} +{"epoch": 0, "iter": 11977, "iter_tflops": 39.8232389123138, "iter_time": 0.5180666885375976, "loss": 0.7510402202606201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.969144412211286, "step_time": 0.4801374053955078} +{"epoch": 0, "iter": 11978, "iter_tflops": 33.43745805259969, "iter_time": 0.6170054397583008, "loss": 0.8379231691360474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.15960574215231, "step_time": 0.555202163696289} +{"epoch": 0, "iter": 11979, "iter_tflops": 35.4085435875276, "iter_time": 0.5826586303710938, "loss": 0.6930155754089355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.43582628167583, "step_time": 0.5367672691345216} +{"epoch": 0, "iter": 11980, "iter_tflops": 36.152291529216974, "iter_time": 0.5706718063354492, "loss": 0.7870843410491943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34418444381494, "step_time": 0.5243746643066406} +{"epoch": 0, "iter": 11981, "iter_tflops": 21.908665464576938, "iter_time": 0.9416864547729493, "loss": 0.5165497064590454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.5986386091677, "step_time": 0.874249309539795} +{"epoch": 0, "iter": 11982, "iter_tflops": 26.722354160554804, "iter_time": 0.7720537414550781, "loss": 0.5057677626609802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.280663764726455, "step_time": 0.6813289718627931} +{"epoch": 0, "iter": 11983, "iter_tflops": 48.65054323647575, "iter_time": 0.42406707382202147, "loss": 0.4205985963344574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.600579620122886, "step_time": 0.3922217903137207} +{"epoch": 0, "iter": 11984, "iter_tflops": 49.944575921538934, "iter_time": 0.41307976150512693, "loss": 0.6526198387145996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.9474742943918, "step_time": 0.3824292755126953} +{"epoch": 0, "iter": 11985, "iter_tflops": 33.24174305932601, "iter_time": 0.6206381378173829, "loss": 0.7669734358787537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.39918162865671, "step_time": 0.5828127250671387} +{"epoch": 0, "iter": 11986, "iter_tflops": 40.549309055903215, "iter_time": 0.5087902603149415, "loss": 0.658381998538971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29225534573255, "step_time": 0.46579460334777834} +{"epoch": 0, "iter": 11987, "iter_tflops": 42.569290478264215, "iter_time": 0.48464734268188475, "loss": 0.6663942933082581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.752213204266354, "step_time": 0.4509310493469238} +{"epoch": 0, "iter": 11988, "iter_tflops": 42.78965820350813, "iter_time": 0.48215139770507814, "loss": 0.7623074650764465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.214720603958064, "step_time": 0.4464182243347168} +{"epoch": 0, "iter": 11989, "iter_tflops": 32.3683807411105, "iter_time": 0.6373841705322265, "loss": 0.039525482803583145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.59033495769507, "step_time": 0.5964409866333009} +{"epoch": 0, "iter": 11990, "iter_tflops": 8.690725255160325, "iter_time": 2.3739208068847653, "loss": 0.07531661540269852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.2108136436003, "step_time": 1.8402851181030273} +{"epoch": 0, "iter": 11991, "iter_tflops": 9.57396462891641, "iter_time": 2.1549164123535154, "loss": 0.07595685124397278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.20659873639413, "step_time": 1.6901590652465819} +{"epoch": 0, "iter": 11992, "iter_tflops": 36.95608271348649, "iter_time": 0.5582597503662109, "loss": 0.07836848497390747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.166117535164034, "step_time": 0.4892813167572021} +{"epoch": 0, "iter": 11993, "iter_tflops": 16.13914714325474, "iter_time": 0.9743785552978514, "loss": 0.32389143109321594, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.175333064438536, "step_time": 0.9155944061279297} +{"epoch": 0, "iter": 11994, "iter_tflops": 10.440109157616858, "iter_time": 1.506271499633789, "loss": 0.34401825070381165, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.658554148256243, "step_time": 1.348849838256836} +{"epoch": 0, "iter": 11995, "iter_tflops": 22.358609823678513, "iter_time": 0.7033370590209961, "loss": 0.38655728101730347, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.08423141650555, "step_time": 0.6529433555603028} +{"epoch": 0, "iter": 11996, "iter_tflops": 23.064722454986427, "iter_time": 0.681804817199707, "loss": 0.3740335702896118, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.87512286955452, "step_time": 0.6321833648681641} +{"epoch": 0, "iter": 11997, "iter_tflops": 22.695042319209797, "iter_time": 0.6352896347045899, "loss": 0.08520311862230301, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 25.348487436774192, "step_time": 0.5687883815765381} +{"epoch": 0, "iter": 11998, "iter_tflops": 34.25570359972751, "iter_time": 0.42089122772216797, "loss": 0.03653346374630928, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 37.57893917929164, "step_time": 0.38367036056518555} +{"epoch": 0, "iter": 11999, "iter_tflops": 38.89996179035698, "iter_time": 0.37064111328125005, "loss": 0.05318355932831764, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 42.53819864603644, "step_time": 0.3389406604766846} +{"epoch": 0, "iter": 12000, "iter_tflops": 3.0111725103115248, "iter_time": 4.788143188476563, "loss": 0.021963944658637047, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 3.032720308797722, "step_time": 4.754122924804688} +{"epoch": 0, "iter": 12001, "iter_tflops": 0.9016036667984149, "iter_time": 2.610595153808594, "loss": 1.1818550825119019, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 0.9230525470099747, "step_time": 2.549933013916016} +{"epoch": 0, "iter": 12002, "iter_tflops": 2.1380612824519107, "iter_time": 1.100867492675781, "loss": 1.0096499919891357, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 2.4338521283166417, "step_time": 0.9670768966674804} +{"epoch": 0, "iter": 12003, "iter_tflops": 2.93612678606233, "iter_time": 0.8016418685913087, "loss": 0.7567586302757263, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 3.536792099628982, "step_time": 0.6654963302612306} +{"epoch": 0, "iter": 12004, "iter_tflops": 3.266953584832838, "iter_time": 0.7204639129638672, "loss": 0.36500242352485657, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 3.4903100367374194, "step_time": 0.6743590507507324} +{"epoch": 0, "iter": 12005, "iter_tflops": 17.907207176013962, "iter_time": 1.1521111755371094, "loss": 0.9416970610618591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.248137887970792, "step_time": 1.0718488006591798} +{"epoch": 0, "iter": 12006, "iter_tflops": 28.66015449398496, "iter_time": 0.7198528366088868, "loss": 0.8610883355140686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.85780653480801, "step_time": 0.6685858726501466} +{"epoch": 0, "iter": 12007, "iter_tflops": 30.76589719152053, "iter_time": 0.6705831909179687, "loss": 0.6373754143714905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.1188384288497, "step_time": 0.6229413375854492} +{"epoch": 0, "iter": 12008, "iter_tflops": 25.43953951643059, "iter_time": 0.8109853363037111, "loss": 0.639386773109436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.19771608526171, "step_time": 0.731658317565918} +{"epoch": 0, "iter": 12009, "iter_tflops": 12.266448498916112, "iter_time": 1.6819125366210939, "loss": 0.07472702115774155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.08561622672943, "step_time": 1.576623764038086} +{"epoch": 0, "iter": 12010, "iter_tflops": 25.60790307986563, "iter_time": 0.8056533737182617, "loss": 0.08731347322463989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.09054760669086, "step_time": 0.7092026519775391} +{"epoch": 0, "iter": 12011, "iter_tflops": 23.90808855971375, "iter_time": 0.8629336242675781, "loss": 0.05951180309057236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.22548962292086, "step_time": 0.7577859497070313} +{"epoch": 0, "iter": 12012, "iter_tflops": 25.796718509987276, "iter_time": 0.7997565078735351, "loss": 0.04727571830153465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.956985183359567, "step_time": 0.7124738082885742} +{"epoch": 0, "iter": 12013, "iter_tflops": 10.994641697528987, "iter_time": 1.876468017578125, "loss": 0.64503014087677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.875816397168483, "step_time": 1.7372358093261715} +{"epoch": 0, "iter": 12014, "iter_tflops": 19.599516718788095, "iter_time": 1.0526327667236328, "loss": 0.7590225338935852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.31529287578958, "step_time": 0.9245271224975585} +{"epoch": 0, "iter": 12015, "iter_tflops": 31.17089834111961, "iter_time": 0.6618703536987305, "loss": 0.6837884783744812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60956347954368, "step_time": 0.5635438270568848} +{"epoch": 0, "iter": 12016, "iter_tflops": 28.043602469314106, "iter_time": 0.7356791458129882, "loss": 0.5675267577171326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.225582883586178, "step_time": 0.6825705757141114} +{"epoch": 0, "iter": 12017, "iter_tflops": 8.815224033078817, "iter_time": 2.3403935546874997, "loss": 0.04479261487722397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.429007279343859, "step_time": 2.1880451354980472} +{"epoch": 0, "iter": 12018, "iter_tflops": 21.833220059117526, "iter_time": 0.9449404830932617, "loss": 0.02146993950009346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.254600167974285, "step_time": 0.8169241790771484} +{"epoch": 0, "iter": 12019, "iter_tflops": 31.599033301154954, "iter_time": 0.6529026794433593, "loss": 0.032541561871767044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68297453055382, "step_time": 0.5071186103820801} +{"epoch": 0, "iter": 12020, "iter_tflops": 51.33060741969422, "iter_time": 0.4019257621765137, "loss": 0.027977600693702698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.16235206343116, "step_time": 0.36734739112854} +{"epoch": 0, "iter": 12021, "iter_tflops": 9.995963141552155, "iter_time": 2.0639425354003906, "loss": 0.7169902920722961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.229226728556899, "step_time": 2.016877136230469} +{"epoch": 0, "iter": 12022, "iter_tflops": 21.531495469220634, "iter_time": 0.9581820983886719, "loss": 0.5465930700302124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.657866542766904, "step_time": 0.8366941833496093} +{"epoch": 0, "iter": 12023, "iter_tflops": 22.809115159666124, "iter_time": 0.9045109100341797, "loss": 0.7241535186767578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.95459196325594, "step_time": 0.794891845703125} +{"epoch": 0, "iter": 12024, "iter_tflops": 32.318595232328384, "iter_time": 0.6383660354614258, "loss": 0.7165864109992981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.053702768939175, "step_time": 0.542157320022583} +{"epoch": 0, "iter": 12025, "iter_tflops": 9.129700667652548, "iter_time": 2.2597776489257813, "loss": 0.3056217133998871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.61702901970782, "step_time": 2.1452668457031248} +{"epoch": 0, "iter": 12026, "iter_tflops": 20.27008215918873, "iter_time": 1.0178100585937502, "loss": 0.1579882949590683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.635333937283193, "step_time": 0.8728919830322266} +{"epoch": 0, "iter": 12027, "iter_tflops": 21.668044292761685, "iter_time": 0.9521437759399415, "loss": 0.31793493032455444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.427422510122444, "step_time": 0.8445874099731447} +{"epoch": 0, "iter": 12028, "iter_tflops": 28.8787039015936, "iter_time": 0.7144051055908203, "loss": 0.224721297621727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.55053185005773, "step_time": 0.5803315010070801} +{"epoch": 0, "iter": 12029, "iter_tflops": 20.537532508186604, "iter_time": 1.0045556106567384, "loss": 0.25842350721359253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.44520085760367, "step_time": 0.9191761589050292} +{"epoch": 0, "iter": 12030, "iter_tflops": 24.620211041656834, "iter_time": 0.8379738693237304, "loss": 0.3009684085845947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.6330741461712, "step_time": 0.7746418380737304} +{"epoch": 0, "iter": 12031, "iter_tflops": 29.38317627797196, "iter_time": 0.702139663696289, "loss": 0.3450009822845459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.75862012598933, "step_time": 0.593553295135498} +{"epoch": 0, "iter": 12032, "iter_tflops": 32.805688383169546, "iter_time": 0.6288876876831054, "loss": 0.3472352623939514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.580631206277936, "step_time": 0.5639895439147949} +{"epoch": 0, "iter": 12033, "iter_tflops": 20.545775996544585, "iter_time": 1.004152557373047, "loss": 0.4051523208618164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.471682387292415, "step_time": 0.9608512802124024} +{"epoch": 0, "iter": 12034, "iter_tflops": 14.332696861987152, "iter_time": 1.4394425354003906, "loss": 0.6435694694519043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.468693457879304, "step_time": 1.1170846252441404} +{"epoch": 0, "iter": 12035, "iter_tflops": 34.00402660706212, "iter_time": 0.6067250137329101, "loss": 0.9612782001495361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.36864553045114, "step_time": 0.567276927947998} +{"epoch": 0, "iter": 12036, "iter_tflops": 37.421223033724864, "iter_time": 0.5513206634521484, "loss": 0.6360288858413696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.92531847208831, "step_time": 0.516742115020752} +{"epoch": 0, "iter": 12037, "iter_tflops": 8.800919013806046, "iter_time": 2.3441976318359377, "loss": 0.6727917194366455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.097884105590802, "step_time": 2.267680404663086} +{"epoch": 0, "iter": 12038, "iter_tflops": 31.561243421361965, "iter_time": 0.6536844329833984, "loss": 0.6343767642974854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.182599778051184, "step_time": 0.5403271026611328} +{"epoch": 0, "iter": 12039, "iter_tflops": 35.26179365489779, "iter_time": 0.58508349609375, "loss": 0.6656488180160522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.159467588152374, "step_time": 0.5406546478271484} +{"epoch": 0, "iter": 12040, "iter_tflops": 31.10943695133255, "iter_time": 0.663177978515625, "loss": 0.9409313201904297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.86368613678928, "step_time": 0.6277778282165527} +{"epoch": 0, "iter": 12041, "iter_tflops": 6.520717260152303, "iter_time": 3.1639300842285154, "loss": 0.6469425559043884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.780949712172085, "step_time": 3.0425079650878906} +{"epoch": 0, "iter": 12042, "iter_tflops": 9.535685117605672, "iter_time": 2.1635669860839846, "loss": 0.7131525278091431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.53370599497664, "step_time": 1.1766533279418945} +{"epoch": 0, "iter": 12043, "iter_tflops": 41.11573073713671, "iter_time": 0.5017810249328614, "loss": 0.7568348050117493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.418082423289896, "step_time": 0.4644751052856445} +{"epoch": 0, "iter": 12044, "iter_tflops": 32.753814063943366, "iter_time": 0.6298836975097656, "loss": 0.7025952339172363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.25797360013516, "step_time": 0.5255261955261231} +{"epoch": 0, "iter": 12045, "iter_tflops": 3.504832995691432, "iter_time": 4.486843994140625, "loss": 0.3213735520839691, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 3.5847495189441543, "step_time": 4.3868166503906245} +{"epoch": 0, "iter": 12046, "iter_tflops": 12.723357975042019, "iter_time": 1.2359660797119139, "loss": 0.28338387608528137, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.005360096858793, "step_time": 1.1228300285339354} +{"epoch": 0, "iter": 12047, "iter_tflops": 21.663901286486993, "iter_time": 0.7258913650512696, "loss": 0.37597572803497314, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.165925319820996, "step_time": 0.6788262786865235} +{"epoch": 0, "iter": 12048, "iter_tflops": 19.46279190076755, "iter_time": 0.8079847412109374, "loss": 0.44651174545288086, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 22.576501168789914, "step_time": 0.6965489807128906} +{"epoch": 0, "iter": 12049, "iter_tflops": 5.501827984879965, "iter_time": 2.598331817626953, "loss": 0.430194228887558, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 5.837550898724264, "step_time": 2.4488993682861326} +{"epoch": 0, "iter": 12050, "iter_tflops": 13.63983231855143, "iter_time": 1.0480755462646485, "loss": 0.1920141875743866, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 15.566387237507957, "step_time": 0.9183617553710937} +{"epoch": 0, "iter": 12051, "iter_tflops": 14.610641637899473, "iter_time": 0.978435791015625, "loss": 0.3138045072555542, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 16.579336873766213, "step_time": 0.8622525024414063} +{"epoch": 0, "iter": 12052, "iter_tflops": 14.452184958425716, "iter_time": 0.989163558959961, "loss": 0.42951512336730957, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 16.77344713298584, "step_time": 0.8522741088867188} +{"epoch": 0, "iter": 12053, "iter_tflops": 4.057820647820174, "iter_time": 5.084279296875, "loss": 0.4058051109313965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.14471321990654, "step_time": 4.977689025878906} +{"epoch": 0, "iter": 12054, "iter_tflops": 24.297397069107944, "iter_time": 0.8491071472167969, "loss": 0.43711286783218384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.088800862458353, "step_time": 0.7344953460693359} +{"epoch": 0, "iter": 12055, "iter_tflops": 35.55462764395197, "iter_time": 0.5802646484375, "loss": 0.43653830885887146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.585579251885456, "step_time": 0.48446196746826165} +{"epoch": 0, "iter": 12056, "iter_tflops": 45.99752661389892, "iter_time": 0.44852614974975585, "loss": 0.3911915123462677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04497446820345, "step_time": 0.4122510547637939} +{"epoch": 0, "iter": 12057, "iter_tflops": 26.062881449181013, "iter_time": 0.6647748413085939, "loss": 0.4121195673942566, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 27.85456807253847, "step_time": 0.6220145950317383} +{"epoch": 0, "iter": 12058, "iter_tflops": 31.010887049671677, "iter_time": 0.5587053298950195, "loss": 0.36234235763549805, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 33.09164027023264, "step_time": 0.5235747680664062} +{"epoch": 0, "iter": 12059, "iter_tflops": 32.20991977998169, "iter_time": 0.5379072036743164, "loss": 0.3429317772388458, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 34.28084572443862, "step_time": 0.5054119148254395} +{"epoch": 0, "iter": 12060, "iter_tflops": 29.443902219726464, "iter_time": 0.5884392547607422, "loss": 0.35894373059272766, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 31.23435215305075, "step_time": 0.5547080917358399} +{"epoch": 0, "iter": 12061, "iter_tflops": 24.24983113827933, "iter_time": 0.8507726669311524, "loss": 0.7971237897872925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.82797625518266, "step_time": 0.7987886199951171} +{"epoch": 0, "iter": 12062, "iter_tflops": 19.863114362240466, "iter_time": 1.0386635818481447, "loss": 0.779860258102417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.061249150758478, "step_time": 0.7916387042999268} +{"epoch": 0, "iter": 12063, "iter_tflops": 45.37856221783963, "iter_time": 0.45464405441284184, "loss": 0.8087730407714844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18086901099777, "step_time": 0.41949428558349605} +{"epoch": 0, "iter": 12064, "iter_tflops": 46.21472810725555, "iter_time": 0.44641815185546874, "loss": 0.7179063558578491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.596185766552914, "step_time": 0.4159814548492432} +{"epoch": 0, "iter": 12065, "iter_tflops": 45.148937270520314, "iter_time": 0.4569563484191894, "loss": 0.0948285162448883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45204128751853, "step_time": 0.41719397163391114} +{"epoch": 0, "iter": 12066, "iter_tflops": 39.23953781441513, "iter_time": 0.5257731018066406, "loss": 0.14824038743972778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.295851347448185, "step_time": 0.4765143280029297} +{"epoch": 0, "iter": 12067, "iter_tflops": 39.451602509295576, "iter_time": 0.522946907043457, "loss": 0.10418549925088882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42224563177363, "step_time": 0.47512728118896486} +{"epoch": 0, "iter": 12068, "iter_tflops": 44.09013030586255, "iter_time": 0.46792997360229494, "loss": 0.11549539119005203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.981744169639214, "step_time": 0.4299779815673828} +{"epoch": 0, "iter": 12069, "iter_tflops": 31.828769018225376, "iter_time": 0.6481901168823243, "loss": 0.5686266422271729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.80080812620158, "step_time": 0.5928337478637695} +{"epoch": 0, "iter": 12070, "iter_tflops": 33.66890180145316, "iter_time": 0.6127640762329101, "loss": 0.8473644852638245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.63390215930694, "step_time": 0.5631694221496583} +{"epoch": 0, "iter": 12071, "iter_tflops": 34.439905802726216, "iter_time": 0.5990461654663085, "loss": 0.7860288619995117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.430754414269934, "step_time": 0.5511802749633788} +{"epoch": 0, "iter": 12072, "iter_tflops": 41.70027791298279, "iter_time": 0.49474714660644525, "loss": 0.8515492677688599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19852067540855, "step_time": 0.4564550609588623} +{"epoch": 0, "iter": 12073, "iter_tflops": 19.436645003565925, "iter_time": 1.0614534301757812, "loss": 0.635143518447876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.52455255466725, "step_time": 1.005190902709961} +{"epoch": 0, "iter": 12074, "iter_tflops": 24.23918056873985, "iter_time": 0.8511464920043945, "loss": 0.47815918922424316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.03633507752715, "step_time": 0.7630876541137694} +{"epoch": 0, "iter": 12075, "iter_tflops": 45.40452132748608, "iter_time": 0.4543841209411621, "loss": 0.6715205311775208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.804278067706846, "step_time": 0.42273125076293944} +{"epoch": 0, "iter": 12076, "iter_tflops": 42.2029398515985, "iter_time": 0.4888544158935547, "loss": 0.49390754103660583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.279717645311806, "step_time": 0.455636531829834} +{"epoch": 0, "iter": 12077, "iter_tflops": 39.76026216710024, "iter_time": 0.5188872604370118, "loss": 0.6379827260971069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17102881430967, "step_time": 0.47789209747314454} +{"epoch": 0, "iter": 12078, "iter_tflops": 42.34465555347431, "iter_time": 0.4872183570861816, "loss": 0.7193881273269653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64356457280824, "step_time": 0.45200443267822266} +{"epoch": 0, "iter": 12079, "iter_tflops": 44.28930339678114, "iter_time": 0.4658256492614746, "loss": 0.7991388440132141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72232833656014, "step_time": 0.4323153171539307} +{"epoch": 0, "iter": 12080, "iter_tflops": 43.048800251655074, "iter_time": 0.4792489776611328, "loss": 0.7880329489707947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30592529576905, "step_time": 0.4455389537811279} +{"epoch": 0, "iter": 12081, "iter_tflops": 23.260995840462293, "iter_time": 0.8869393920898437, "loss": 0.3112115263938904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.415077543447378, "step_time": 0.8450144577026367} +{"epoch": 0, "iter": 12082, "iter_tflops": 14.234357814424909, "iter_time": 1.4493870239257813, "loss": 0.21323873102664948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.37428638512541, "step_time": 1.1228242053985595} +{"epoch": 0, "iter": 12083, "iter_tflops": 50.299252546777474, "iter_time": 0.41016699981689453, "loss": 0.22339332103729248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.7091047417653, "step_time": 0.37710530281066895} +{"epoch": 0, "iter": 12084, "iter_tflops": 51.96306212964023, "iter_time": 0.39703382873535153, "loss": 0.2263062447309494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.452784775184846, "step_time": 0.36545749855041504} +{"epoch": 0, "iter": 12085, "iter_tflops": 32.310667386798535, "iter_time": 0.6385226669311523, "loss": 0.21989808976650238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.53738711149158, "step_time": 0.5973553657531739} +{"epoch": 0, "iter": 12086, "iter_tflops": 12.532549410414745, "iter_time": 1.6462008514404296, "loss": 0.18722137808799744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.159072876859977, "step_time": 1.3609733047485353} +{"epoch": 0, "iter": 12087, "iter_tflops": 16.462057382692294, "iter_time": 1.253251220703125, "loss": 0.22708436846733093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.912238710603113, "step_time": 1.0908858451843262} +{"epoch": 0, "iter": 12088, "iter_tflops": 30.88719351683866, "iter_time": 0.6679497604370118, "loss": 0.1546330749988556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.43585548293532, "step_time": 0.5991166248321533} +{"epoch": 0, "iter": 12089, "iter_tflops": 22.711370993176565, "iter_time": 0.7122547302246094, "loss": 0.3341572880744934, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 24.677239768570956, "step_time": 0.6555142135620118} +{"epoch": 0, "iter": 12090, "iter_tflops": 24.011198076801666, "iter_time": 0.6736973876953125, "loss": 0.5352033376693726, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.814696507917784, "step_time": 0.6266307029724121} +{"epoch": 0, "iter": 12091, "iter_tflops": 23.890501119380538, "iter_time": 0.6771009674072266, "loss": 0.3070600926876068, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.78722916310835, "step_time": 0.6272981605529786} +{"epoch": 0, "iter": 12092, "iter_tflops": 26.09386190810609, "iter_time": 0.6199266891479491, "loss": 0.43615132570266724, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 27.935561440467946, "step_time": 0.5790569648742676} +{"epoch": 0, "iter": 12093, "iter_tflops": 17.9852692831516, "iter_time": 1.1471106262207031, "loss": 0.09415152668952942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.290501250178657, "step_time": 1.0694949417114257} +{"epoch": 0, "iter": 12094, "iter_tflops": 19.019973264449778, "iter_time": 1.084706756591797, "loss": 0.05456727743148804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.439970795732165, "step_time": 0.8801672019958496} +{"epoch": 0, "iter": 12095, "iter_tflops": 49.20369813529647, "iter_time": 0.4192996520996094, "loss": 0.07597470283508301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38548448003816, "step_time": 0.38645511436462404} +{"epoch": 0, "iter": 12096, "iter_tflops": 51.23220601992345, "iter_time": 0.40269773864746095, "loss": 0.061299849301576614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.87344412809848, "step_time": 0.3692468547821045} +{"epoch": 0, "iter": 12097, "iter_tflops": 20.40999708747163, "iter_time": 0.6904337921142578, "loss": 0.051150280982255936, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 21.72004337844521, "step_time": 0.6487902183532714} +{"epoch": 0, "iter": 12098, "iter_tflops": 11.803494832153795, "iter_time": 1.1938626556396483, "loss": 0.03443000465631485, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 14.320682777447164, "step_time": 0.9840139541625976} +{"epoch": 0, "iter": 12099, "iter_tflops": 35.344934448292655, "iter_time": 0.3986922569274902, "loss": 0.033715032041072845, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 38.773446523529564, "step_time": 0.3634382019042969} +{"epoch": 0, "iter": 12100, "iter_tflops": 38.30134446242174, "iter_time": 0.3679179382324219, "loss": 0.0618550106883049, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 41.91402757607942, "step_time": 0.33620609855651856} +{"epoch": 0, "iter": 12101, "iter_tflops": 31.758402274068768, "iter_time": 0.6496263046264648, "loss": 0.3995421826839447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.78287791156005, "step_time": 0.6106967430114746} +{"epoch": 0, "iter": 12102, "iter_tflops": 12.331429755777759, "iter_time": 1.6730495910644532, "loss": 0.7022374868392944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.67734141718776, "step_time": 1.4056424064636233} +{"epoch": 0, "iter": 12103, "iter_tflops": 47.36249867499794, "iter_time": 0.4355997695922851, "loss": 0.5082240104675293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.484350338737634, "step_time": 0.40072552871704104} +{"epoch": 0, "iter": 12104, "iter_tflops": 47.011830110287136, "iter_time": 0.43884897613525387, "loss": 0.5910149216651917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.92544696543593, "step_time": 0.4051234645843506} +{"epoch": 0, "iter": 12105, "iter_tflops": 25.97070237361071, "iter_time": 0.7943987503051757, "loss": 0.49841707944869995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.398681296598472, "step_time": 0.7529958572387696} +{"epoch": 0, "iter": 12106, "iter_tflops": 13.764218323696785, "iter_time": 1.4988932189941406, "loss": 0.6943694949150085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.821952601866798, "step_time": 1.0408204441070557} +{"epoch": 0, "iter": 12107, "iter_tflops": 35.66066893407556, "iter_time": 0.5785391616821288, "loss": 0.6696780920028687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5740865587025, "step_time": 0.5348433456420898} +{"epoch": 0, "iter": 12108, "iter_tflops": 37.280191451246544, "iter_time": 0.5534063186645508, "loss": 0.559383749961853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.46589173636733, "step_time": 0.5098390922546387} +{"epoch": 0, "iter": 12109, "iter_tflops": 22.071435170935807, "iter_time": 0.9347418212890626, "loss": 0.5138952732086182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.381679889135107, "step_time": 0.8823614730834961} +{"epoch": 0, "iter": 12110, "iter_tflops": 25.39890507990178, "iter_time": 0.8122827911376954, "loss": 0.5093327760696411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.830743448459945, "step_time": 0.6691727542877196} +{"epoch": 0, "iter": 12111, "iter_tflops": 51.84291790251944, "iter_time": 0.39795394134521483, "loss": 0.6522268056869507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.191836786290594, "step_time": 0.36715463829040523} +{"epoch": 0, "iter": 12112, "iter_tflops": 52.085275823294076, "iter_time": 0.39610222244262694, "loss": 0.5616047978401184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.6473809133409, "step_time": 0.3642020721435547} +{"epoch": 0, "iter": 12113, "iter_tflops": 39.61904220499481, "iter_time": 0.4445827445983887, "loss": 0.002471586922183633, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 43.62165830675301, "step_time": 0.4037889251708984} +{"epoch": 0, "iter": 12114, "iter_tflops": 12.276267730690538, "iter_time": 1.4347962188720702, "loss": 0.002928895642980933, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 14.699515735410161, "step_time": 1.1982668571472168} +{"epoch": 0, "iter": 12115, "iter_tflops": 9.790783506828443, "iter_time": 1.7990329895019532, "loss": 0.002887698821723461, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 11.472535871061481, "step_time": 1.5353137893676758} +{"epoch": 0, "iter": 12116, "iter_tflops": 19.555559263727968, "iter_time": 0.9007127990722656, "loss": 0.006804392207413912, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 24.005977418198732, "step_time": 0.7337315292358398} +{"epoch": 0, "iter": 12117, "iter_tflops": 19.642226756767332, "iter_time": 0.7964354248046874, "loss": 0.2270885556936264, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 20.645882316917163, "step_time": 0.7577184143066408} +{"epoch": 0, "iter": 12118, "iter_tflops": 10.295030891846887, "iter_time": 1.5195452423095703, "loss": 0.5327723622322083, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 12.131619989942042, "step_time": 1.2895033988952638} +{"epoch": 0, "iter": 12119, "iter_tflops": 23.21238940619801, "iter_time": 0.6739403228759767, "loss": 0.21002566814422607, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.04916040418006, "step_time": 0.6245225372314454} +{"epoch": 0, "iter": 12120, "iter_tflops": 25.809637975140397, "iter_time": 0.6061210632324219, "loss": 0.35253268480300903, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 27.6198619554055, "step_time": 0.5663954887390137} +{"epoch": 0, "iter": 12121, "iter_tflops": 2.260681077602798, "iter_time": 0.7106437683105469, "loss": 0.3955402672290802, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.509439388515326, "step_time": 0.640198335647583} +{"epoch": 0, "iter": 12122, "iter_tflops": 3.54028094305315, "iter_time": 0.45378853988647455, "loss": 0.3559221625328064, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.8402635423142626, "step_time": 0.418340799331665} +{"epoch": 0, "iter": 12123, "iter_tflops": 3.8795079368558327, "iter_time": 0.4141089401245117, "loss": 0.36600637435913086, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.192042183666553, "step_time": 0.38323538970947263} +{"epoch": 0, "iter": 12124, "iter_tflops": 3.937010332981469, "iter_time": 0.4080606307983398, "loss": 0.2850836515426636, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.256915781185817, "step_time": 0.3773950443267822} +{"epoch": 0, "iter": 12125, "iter_tflops": 24.020493826700918, "iter_time": 0.8588954772949219, "loss": 0.47140127420425415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.233611020468764, "step_time": 0.8176036911010743} +{"epoch": 0, "iter": 12126, "iter_tflops": 12.961970117268152, "iter_time": 1.5916634063720703, "loss": 0.4634719789028168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.988298011562563, "step_time": 1.2144296913146972} +{"epoch": 0, "iter": 12127, "iter_tflops": 41.18390674222539, "iter_time": 0.5009503746032715, "loss": 0.7606831192970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15346700151832, "step_time": 0.42844461250305177} +{"epoch": 0, "iter": 12128, "iter_tflops": 42.413436512208925, "iter_time": 0.4864282455444336, "loss": 0.5937932729721069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42762833006063, "step_time": 0.4541529960632324} +{"epoch": 0, "iter": 12129, "iter_tflops": 24.04444404951111, "iter_time": 0.8580399475097656, "loss": 0.33507055044174194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.194077940625696, "step_time": 0.8188866271972657} +{"epoch": 0, "iter": 12130, "iter_tflops": 13.59434441980031, "iter_time": 1.517623275756836, "loss": 0.24855569005012512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.395383265880454, "step_time": 1.340083137512207} +{"epoch": 0, "iter": 12131, "iter_tflops": 40.658716277597755, "iter_time": 0.507421173095703, "loss": 0.37933963537216187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72869352906406, "step_time": 0.46124963378906253} +{"epoch": 0, "iter": 12132, "iter_tflops": 37.56502735267525, "iter_time": 0.5492101287841797, "loss": 0.4372103214263916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91494951159031, "step_time": 0.5042434062957764} +{"epoch": 0, "iter": 12133, "iter_tflops": 21.534350693346255, "iter_time": 0.9580550537109375, "loss": 0.024580612778663635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.93002205733244, "step_time": 0.8997415466308594} +{"epoch": 0, "iter": 12134, "iter_tflops": 10.870803009009613, "iter_time": 1.897844482421875, "loss": 0.04057168588042259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.943730780600534, "step_time": 1.4795963745117189} +{"epoch": 0, "iter": 12135, "iter_tflops": 13.086667713481843, "iter_time": 1.576497085571289, "loss": 0.03680500015616417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.590701997080814, "step_time": 1.3232947120666505} +{"epoch": 0, "iter": 12136, "iter_tflops": 40.76398483191787, "iter_time": 0.5061108131408691, "loss": 0.030511831864714622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.06700104110655, "step_time": 0.45778713989257813} +{"epoch": 0, "iter": 12137, "iter_tflops": 19.785812840498945, "iter_time": 0.7679137344360351, "loss": 0.32001450657844543, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 21.48892103738881, "step_time": 0.7070525970458984} +{"epoch": 0, "iter": 12138, "iter_tflops": 21.11255875741905, "iter_time": 0.7196568450927735, "loss": 0.39658406376838684, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.73255518374081, "step_time": 0.6683717384338379} +{"epoch": 0, "iter": 12139, "iter_tflops": 23.35747551665052, "iter_time": 0.6504897079467773, "loss": 0.4853735566139221, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.114424021452024, "step_time": 0.6049829139709472} +{"epoch": 0, "iter": 12140, "iter_tflops": 23.29268920609819, "iter_time": 0.6522989807128906, "loss": 0.4257490634918213, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.04822020927827, "step_time": 0.606581916809082} +{"epoch": 0, "iter": 12141, "iter_tflops": 13.01540711839661, "iter_time": 1.3944552764892577, "loss": 0.00896786991506815, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 13.620531590192588, "step_time": 1.3325032882690429} +{"epoch": 0, "iter": 12142, "iter_tflops": 17.50317988683025, "iter_time": 1.0369203338623048, "loss": 0.011829359456896782, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 24.238015475676953, "step_time": 0.7487990570068359} +{"epoch": 0, "iter": 12143, "iter_tflops": 52.649341949887486, "iter_time": 0.3447223167419433, "loss": 0.0036944423336535692, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 57.890423149245755, "step_time": 0.3135130500793457} +{"epoch": 0, "iter": 12144, "iter_tflops": 47.853638989428426, "iter_time": 0.37926902770996096, "loss": 0.0193213801831007, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 52.02854466446439, "step_time": 0.3488354949951172} +{"epoch": 0, "iter": 12145, "iter_tflops": 21.878868913658042, "iter_time": 0.9429689254760742, "loss": 0.010622234083712101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.910156059599643, "step_time": 0.9005217361450195} +{"epoch": 0, "iter": 12146, "iter_tflops": 15.266909758524944, "iter_time": 1.3513601531982422, "loss": 0.012634003534913063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.33632482603142, "step_time": 1.0144946880340577} +{"epoch": 0, "iter": 12147, "iter_tflops": 44.15682972439812, "iter_time": 0.46722315979003903, "loss": 0.0014389089774340391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.967875410682325, "step_time": 0.42131894302368167} +{"epoch": 0, "iter": 12148, "iter_tflops": 46.823637483955785, "iter_time": 0.4406127891540527, "loss": 0.00516639044508338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.79392724760449, "step_time": 0.3983303565979004} +{"epoch": 0, "iter": 12149, "iter_tflops": 28.83992461022309, "iter_time": 0.71536572265625, "loss": 0.6685270667076111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.411543730049047, "step_time": 0.6567997322082519} +{"epoch": 0, "iter": 12150, "iter_tflops": 35.26276893486979, "iter_time": 0.5850673141479492, "loss": 0.6326088905334473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76839212474496, "step_time": 0.532162733078003} +{"epoch": 0, "iter": 12151, "iter_tflops": 39.622029900358136, "iter_time": 0.5206975402832031, "loss": 0.6410462260246277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10215807542107, "step_time": 0.47865569686889653} +{"epoch": 0, "iter": 12152, "iter_tflops": 33.419242629219745, "iter_time": 0.6173417434692383, "loss": 0.656097948551178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.40547839011788, "step_time": 0.5667029914855957} +{"epoch": 0, "iter": 12153, "iter_tflops": 10.214009755889847, "iter_time": 1.5075624389648437, "loss": 0.39824676513671875, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 10.841961921627853, "step_time": 1.4202464065551759} +{"epoch": 0, "iter": 12154, "iter_tflops": 13.776799091405463, "iter_time": 1.1176948547363281, "loss": 0.3142811357975006, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 16.480155488443472, "step_time": 0.9343514671325683} +{"epoch": 0, "iter": 12155, "iter_tflops": 23.575423716576985, "iter_time": 0.6531487045288086, "loss": 0.48252594470977783, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.425258223004143, "step_time": 0.6056283607482911} +{"epoch": 0, "iter": 12156, "iter_tflops": 23.896193087333153, "iter_time": 0.6443811950683594, "loss": 0.3812394440174103, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.702949009902973, "step_time": 0.5990852432250977} +{"epoch": 0, "iter": 12157, "iter_tflops": 23.15859300689897, "iter_time": 0.8908612670898437, "loss": 0.6794339418411255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.226291355574773, "step_time": 0.8178409271240233} +{"epoch": 0, "iter": 12158, "iter_tflops": 18.944433608277684, "iter_time": 1.0890319519042968, "loss": 0.4599698483943939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12861300933701, "step_time": 0.976452808380127} +{"epoch": 0, "iter": 12159, "iter_tflops": 35.94908016649823, "iter_time": 0.5738976745605469, "loss": 0.4630661606788635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2361467756918, "step_time": 0.5258185424804688} +{"epoch": 0, "iter": 12160, "iter_tflops": 36.3706072821007, "iter_time": 0.5672463302612305, "loss": 0.463956743478775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56117568770461, "step_time": 0.5214984931945801} +{"epoch": 0, "iter": 12161, "iter_tflops": 25.813819422573545, "iter_time": 0.7992266921997072, "loss": 0.6023815274238586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.820371446023348, "step_time": 0.7415822448730469} +{"epoch": 0, "iter": 12162, "iter_tflops": 25.84672237546186, "iter_time": 0.7982092742919923, "loss": 0.6393612027168274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.93413094058849, "step_time": 0.6460515098571777} +{"epoch": 0, "iter": 12163, "iter_tflops": 40.061119971503686, "iter_time": 0.5149904327392578, "loss": 0.5741325616836548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.771387343299075, "step_time": 0.4713374366760254} +{"epoch": 0, "iter": 12164, "iter_tflops": 41.67937764629398, "iter_time": 0.49499523925781247, "loss": 0.8765461444854736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.492536121655704, "step_time": 0.4535050201416016} +{"epoch": 0, "iter": 12165, "iter_tflops": 14.22885798907133, "iter_time": 1.0649427795410156, "loss": 0.22829487919807434, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.041319440216059, "step_time": 1.0074195709228515} +{"epoch": 0, "iter": 12166, "iter_tflops": 7.563623579515155, "iter_time": 2.003394195556641, "loss": 0.2350883036851883, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.172707679491076, "step_time": 1.6519570999145508} +{"epoch": 0, "iter": 12167, "iter_tflops": 6.411320221884807, "iter_time": 2.3634632263183595, "loss": 0.5847545862197876, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 7.7645098280423985, "step_time": 1.9515616455078126} +{"epoch": 0, "iter": 12168, "iter_tflops": 25.82598685046133, "iter_time": 0.5867314834594726, "loss": 0.44182220101356506, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.713337340368184, "step_time": 0.5467735404968261} +{"epoch": 0, "iter": 12169, "iter_tflops": 21.857064799998586, "iter_time": 0.6970139923095704, "loss": 0.3665519058704376, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.264072489741576, "step_time": 0.6548586883544922} +{"epoch": 0, "iter": 12170, "iter_tflops": 24.286479765427046, "iter_time": 0.6272905807495116, "loss": 0.3211868703365326, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 25.996014043714403, "step_time": 0.5860390739440918} +{"epoch": 0, "iter": 12171, "iter_tflops": 29.01693901820632, "iter_time": 0.5250271224975586, "loss": 0.2517643868923187, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 30.833648331527183, "step_time": 0.4940926818847656} +{"epoch": 0, "iter": 12172, "iter_tflops": 27.67025734681324, "iter_time": 0.5505796279907227, "loss": 0.337235689163208, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.24662451769117, "step_time": 0.5209038734436036} +{"epoch": 0, "iter": 12173, "iter_tflops": 32.04835605209889, "iter_time": 0.6437488861083984, "loss": 0.04370683431625366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.55061598908667, "step_time": 0.5971266479492189} +{"epoch": 0, "iter": 12174, "iter_tflops": 11.220261862563028, "iter_time": 1.8387354736328123, "loss": 0.050344910472631454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.415929639927981, "step_time": 1.537805732727051} +{"epoch": 0, "iter": 12175, "iter_tflops": 39.97902656223619, "iter_time": 0.5160479202270508, "loss": 0.03504672273993492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35442896714858, "step_time": 0.4651416778564453} +{"epoch": 0, "iter": 12176, "iter_tflops": 41.54405056028437, "iter_time": 0.49660765457153316, "loss": 0.08591308444738388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.364596058421974, "step_time": 0.4547840232849121} +{"epoch": 0, "iter": 12177, "iter_tflops": 15.27072145825745, "iter_time": 1.3510228424072266, "loss": 0.02345857210457325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.380748149319395, "step_time": 1.2594719924926758} +{"epoch": 0, "iter": 12178, "iter_tflops": 19.68267084867042, "iter_time": 1.0481856689453126, "loss": 0.03461087867617607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.323342700341286, "step_time": 0.7837565975189209} +{"epoch": 0, "iter": 12179, "iter_tflops": 59.08841256957903, "iter_time": 0.3491563339233399, "loss": 0.0324103981256485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.73086239869666, "step_time": 0.3187211284637452} +{"epoch": 0, "iter": 12180, "iter_tflops": 59.000068368897566, "iter_time": 0.34967914581298826, "loss": 0.04240811616182327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.35988431684075, "step_time": 0.3205582752227783} +{"epoch": 0, "iter": 12181, "iter_tflops": 27.468687062652045, "iter_time": 0.7510767974853516, "loss": 0.460588663816452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.183486398553246, "step_time": 0.7069440994262695} +{"epoch": 0, "iter": 12182, "iter_tflops": 25.775955262293273, "iter_time": 0.800400733947754, "loss": 0.6489614844322205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.50856156279492, "step_time": 0.5651028861999512} +{"epoch": 0, "iter": 12183, "iter_tflops": 40.02203108030951, "iter_time": 0.5154934158325195, "loss": 0.7578719854354858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.554450485385104, "step_time": 0.4736850833892823} +{"epoch": 0, "iter": 12184, "iter_tflops": 34.637534903198514, "iter_time": 0.5956282272338868, "loss": 0.6792581677436829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.520695630082635, "step_time": 0.5498590354919433} +{"epoch": 0, "iter": 12185, "iter_tflops": 28.271879271790446, "iter_time": 0.7297390213012696, "loss": 0.7065990567207336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.322091922745383, "step_time": 0.6586754665374756} +{"epoch": 0, "iter": 12186, "iter_tflops": 39.31406633425544, "iter_time": 0.524776382446289, "loss": 0.4744158983230591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14529779629768, "step_time": 0.4781771030426025} +{"epoch": 0, "iter": 12187, "iter_tflops": 38.95680670550204, "iter_time": 0.5295889282226561, "loss": 0.7010512948036194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.40545121075114, "step_time": 0.4865198440551758} +{"epoch": 0, "iter": 12188, "iter_tflops": 37.48663596068819, "iter_time": 0.5503586273193359, "loss": 0.6803702116012573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.884860792256525, "step_time": 0.5046144981384277} +{"epoch": 0, "iter": 12189, "iter_tflops": 33.54302308306293, "iter_time": 0.6150636291503906, "loss": 0.19485457241535187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.22726200654374, "step_time": 0.5541931476593017} +{"epoch": 0, "iter": 12190, "iter_tflops": 38.22667001038415, "iter_time": 0.5397041778564453, "loss": 0.11556051671504974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71778542332586, "step_time": 0.4945395183563232} +{"epoch": 0, "iter": 12191, "iter_tflops": 34.42355467345332, "iter_time": 0.5993307113647461, "loss": 0.20757503807544708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.852458830120206, "step_time": 0.5450397186279297} +{"epoch": 0, "iter": 12192, "iter_tflops": 39.20715615854408, "iter_time": 0.5262073440551758, "loss": 0.180598646402359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.944706064593916, "step_time": 0.48041063499450676} +{"epoch": 0, "iter": 12193, "iter_tflops": 19.01404570278593, "iter_time": 1.063229736328125, "loss": 0.03951001912355423, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 20.352701432682654, "step_time": 0.9932980575561523} +{"epoch": 0, "iter": 12194, "iter_tflops": 16.470389278971965, "iter_time": 1.2274329681396483, "loss": 0.038123637437820435, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 20.10024633751487, "step_time": 1.0057736835479736} +{"epoch": 0, "iter": 12195, "iter_tflops": 53.55146864671904, "iter_time": 0.37751156616210935, "loss": 0.03851785138249397, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 58.643334252748836, "step_time": 0.34473310661315926} +{"epoch": 0, "iter": 12196, "iter_tflops": 55.9073464921453, "iter_time": 0.36160361862182616, "loss": 0.019631966948509216, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 60.78728193369199, "step_time": 0.3325744819641113} +{"epoch": 0, "iter": 12197, "iter_tflops": 30.57975056690044, "iter_time": 0.6746651992797852, "loss": 0.46098053455352783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.52409914839123, "step_time": 0.6343325119018555} +{"epoch": 0, "iter": 12198, "iter_tflops": 8.09894354092226, "iter_time": 2.547380828857422, "loss": 0.5889045596122742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.949731411693246, "step_time": 2.0735327072143557} +{"epoch": 0, "iter": 12199, "iter_tflops": 10.488319477003628, "iter_time": 1.967054260253906, "loss": 0.6086510419845581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.810630807578102, "step_time": 1.6104666366577147} +{"epoch": 0, "iter": 12200, "iter_tflops": 28.796479476626534, "iter_time": 0.7164449920654298, "loss": 0.5612725615501404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15325430683726, "step_time": 0.4470127582550049} +{"epoch": 0, "iter": 12201, "iter_tflops": 20.130939972313232, "iter_time": 0.7791339950561524, "loss": 0.38691213726997375, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 21.379767678484917, "step_time": 0.7336234855651855} +{"epoch": 0, "iter": 12202, "iter_tflops": 23.536973351810985, "iter_time": 0.6663855819702148, "loss": 0.45867758989334106, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.385918036097873, "step_time": 0.6178504028320313} +{"epoch": 0, "iter": 12203, "iter_tflops": 24.712877452608744, "iter_time": 0.6346771926879883, "loss": 0.347982794046402, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.55604233916074, "step_time": 0.5906263999938964} +{"epoch": 0, "iter": 12204, "iter_tflops": 26.455917864554035, "iter_time": 0.592861671447754, "loss": 0.38253217935562134, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.268511144613083, "step_time": 0.5548470382690429} +{"epoch": 0, "iter": 12205, "iter_tflops": 27.64871874124974, "iter_time": 0.7461862411499025, "loss": 0.834324300289154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.58999087845178, "step_time": 0.6744393482208253} +{"epoch": 0, "iter": 12206, "iter_tflops": 35.61055840296645, "iter_time": 0.5793532714843749, "loss": 0.6720495223999023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.632832414072965, "step_time": 0.5340300521850586} +{"epoch": 0, "iter": 12207, "iter_tflops": 38.89416971720011, "iter_time": 0.5304418029785156, "loss": 0.6463269591331482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.139309947530485, "step_time": 0.4895925807952881} +{"epoch": 0, "iter": 12208, "iter_tflops": 36.312349856930425, "iter_time": 0.5681563873291016, "loss": 0.8303823471069336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.480769413747616, "step_time": 0.5225605735778809} +{"epoch": 0, "iter": 12209, "iter_tflops": 16.55620632339434, "iter_time": 1.2461244506835936, "loss": 0.5588846802711487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.814210822696932, "step_time": 1.1581255950927734} +{"epoch": 0, "iter": 12210, "iter_tflops": 21.486111986938703, "iter_time": 0.9602059936523437, "loss": 0.7928615212440491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.122276020458756, "step_time": 0.708429983139038} +{"epoch": 0, "iter": 12211, "iter_tflops": 37.01741988422134, "iter_time": 0.5573347244262695, "loss": 0.7402915358543396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59707814364983, "step_time": 0.5081915855407715} +{"epoch": 0, "iter": 12212, "iter_tflops": 35.83861837194733, "iter_time": 0.5756665420532225, "loss": 0.6430532336235046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97915881621888, "step_time": 0.5292852420806884} +{"epoch": 0, "iter": 12213, "iter_tflops": 10.891335120295334, "iter_time": 1.0924261932373047, "loss": 0.009914071299135685, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 11.664060062277771, "step_time": 1.0200547409057616} +{"epoch": 0, "iter": 12214, "iter_tflops": 9.895519427478773, "iter_time": 1.2023603057861327, "loss": 0.0015238525811582804, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 10.951345899738726, "step_time": 1.0864399566650391} +{"epoch": 0, "iter": 12215, "iter_tflops": 22.436149674033604, "iter_time": 0.5303039932250977, "loss": 0.004713236354291439, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 25.099903156021213, "step_time": 0.4740249271392822} +{"epoch": 0, "iter": 12216, "iter_tflops": 25.94687342063573, "iter_time": 0.4585515785217285, "loss": 0.009160485118627548, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 28.598809523721545, "step_time": 0.4160305957794189} +{"epoch": 0, "iter": 12217, "iter_tflops": 20.00924728564735, "iter_time": 1.0310779418945313, "loss": 0.5641060471534729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.534530927768984, "step_time": 0.9580470352172852} +{"epoch": 0, "iter": 12218, "iter_tflops": 19.20738507154676, "iter_time": 1.0741229705810547, "loss": 0.5982029438018799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.14137574886258, "step_time": 0.8915240707397462} +{"epoch": 0, "iter": 12219, "iter_tflops": 38.13484893579406, "iter_time": 0.5410036773681641, "loss": 0.613493025302887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80301835422156, "step_time": 0.493531192779541} +{"epoch": 0, "iter": 12220, "iter_tflops": 41.30889526853614, "iter_time": 0.4994346466064453, "loss": 0.6588578820228577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08114677976117, "step_time": 0.45764349365234375} +{"epoch": 0, "iter": 12221, "iter_tflops": 24.720495964255637, "iter_time": 0.8345744171142577, "loss": 0.4422491490840912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.461963896125408, "step_time": 0.7796508827209472} +{"epoch": 0, "iter": 12222, "iter_tflops": 11.314234232718078, "iter_time": 1.8234635314941408, "loss": 0.7186288833618164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.03720747285111, "step_time": 1.3720029830932616} +{"epoch": 0, "iter": 12223, "iter_tflops": 12.717737982422161, "iter_time": 1.6222297973632813, "loss": 0.5296982526779175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.241345535222152, "step_time": 1.4486758613586426} +{"epoch": 0, "iter": 12224, "iter_tflops": 32.67773039268575, "iter_time": 0.6313502578735352, "loss": 0.5502250790596008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.080327779451075, "step_time": 0.4290963573455811} +{"epoch": 0, "iter": 12225, "iter_tflops": 20.359310561920335, "iter_time": 0.7583343048095703, "loss": 0.3089628219604492, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.575236916798048, "step_time": 0.7155964813232422} +{"epoch": 0, "iter": 12226, "iter_tflops": 16.248796424332134, "iter_time": 0.9501727523803711, "loss": 0.2995724678039551, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.81657790216333, "step_time": 0.7791034202575683} +{"epoch": 0, "iter": 12227, "iter_tflops": 26.87946822929079, "iter_time": 0.574385009765625, "loss": 0.44231748580932617, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 28.640323440710304, "step_time": 0.5390708541870117} +{"epoch": 0, "iter": 12228, "iter_tflops": 28.019422253638194, "iter_time": 0.551016487121582, "loss": 0.40074294805526733, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.673378049389925, "step_time": 0.520303539276123} +{"epoch": 0, "iter": 12229, "iter_tflops": 24.531606173750376, "iter_time": 0.8088906555175782, "loss": 0.10534535348415375, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 25.843716667260708, "step_time": 0.7678224945068359} +{"epoch": 0, "iter": 12230, "iter_tflops": 13.198206184664485, "iter_time": 1.5034912109375, "loss": 0.08148784190416336, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 17.112838336587142, "step_time": 1.1595614128112792} +{"epoch": 0, "iter": 12231, "iter_tflops": 39.05468973168183, "iter_time": 0.5080922966003418, "loss": 0.10360169410705566, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 42.963140566701874, "step_time": 0.46187002944946287} +{"epoch": 0, "iter": 12232, "iter_tflops": 39.336659064745525, "iter_time": 0.5044502372741699, "loss": 0.14684127271175385, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 43.38867885030906, "step_time": 0.45734019851684565} +{"epoch": 0, "iter": 12233, "iter_tflops": 19.210052341348423, "iter_time": 1.0739738311767577, "loss": 0.43175968527793884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.89910436951748, "step_time": 0.9871759643554687} +{"epoch": 0, "iter": 12234, "iter_tflops": 41.93513212027257, "iter_time": 0.49197635650634763, "loss": 0.49742743372917175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50980409345829, "step_time": 0.45333294486999515} +{"epoch": 0, "iter": 12235, "iter_tflops": 47.47329838202614, "iter_time": 0.43458310699462893, "loss": 0.4245007336139679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.695772034136695, "step_time": 0.399086669921875} +{"epoch": 0, "iter": 12236, "iter_tflops": 48.54025349452435, "iter_time": 0.4250306091308593, "loss": 0.5094574093818665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.289754668202, "step_time": 0.39455326652526856} +{"epoch": 0, "iter": 12237, "iter_tflops": 30.501454053007144, "iter_time": 0.6763970489501954, "loss": 0.06578454375267029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.38625626579761, "step_time": 0.6370323677062988} +{"epoch": 0, "iter": 12238, "iter_tflops": 22.963775602674957, "iter_time": 0.8984190521240234, "loss": 0.0640198364853859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.237445099035515, "step_time": 0.7056394100189208} +{"epoch": 0, "iter": 12239, "iter_tflops": 51.970248482036894, "iter_time": 0.39697892761230463, "loss": 0.10601858794689178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.59842978109943, "step_time": 0.3645170650482178} +{"epoch": 0, "iter": 12240, "iter_tflops": 48.61461272848407, "iter_time": 0.42438049697875974, "loss": 0.1467154324054718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.912716439842875, "step_time": 0.3899080390930176} +{"epoch": 0, "iter": 12241, "iter_tflops": 39.471651690845675, "iter_time": 0.5226812820434571, "loss": 0.20941469073295593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88344575798894, "step_time": 0.4810969161987305} +{"epoch": 0, "iter": 12242, "iter_tflops": 36.55625871462564, "iter_time": 0.5643655624389649, "loss": 0.1320382058620453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.19174304915897, "step_time": 0.500855073928833} +{"epoch": 0, "iter": 12243, "iter_tflops": 39.3379630200274, "iter_time": 0.5244575958251952, "loss": 0.11889085918664932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.955814524335324, "step_time": 0.4802863998413086} +{"epoch": 0, "iter": 12244, "iter_tflops": 40.53818430235938, "iter_time": 0.5089298858642578, "loss": 0.19241440296173096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.333196069073, "step_time": 0.46536445236206053} +{"epoch": 0, "iter": 12245, "iter_tflops": 17.400194465114442, "iter_time": 1.1856817779541016, "loss": 0.7651602029800415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.657477647677382, "step_time": 1.105781494140625} +{"epoch": 0, "iter": 12246, "iter_tflops": 16.50907765008777, "iter_time": 1.2496817779541016, "loss": 0.6795067191123962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.66160830386049, "step_time": 1.0493085403442384} +{"epoch": 0, "iter": 12247, "iter_tflops": 45.730060934547616, "iter_time": 0.4511494865417481, "loss": 0.7992186546325684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37953256882962, "step_time": 0.41780657768249513} +{"epoch": 0, "iter": 12248, "iter_tflops": 45.93866354995648, "iter_time": 0.4491008644104004, "loss": 0.6030999422073364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.551470769776245, "step_time": 0.4163568344116211} +{"epoch": 0, "iter": 12249, "iter_tflops": 49.104266774391824, "iter_time": 0.4201486930847168, "loss": 0.03703925013542175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.10519729178954, "step_time": 0.3813144493103027} +{"epoch": 0, "iter": 12250, "iter_tflops": 40.423841658424664, "iter_time": 0.5103694419860839, "loss": 0.01663062907755375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.695845099499884, "step_time": 0.46158862113952637} +{"epoch": 0, "iter": 12251, "iter_tflops": 40.74048765719047, "iter_time": 0.5064027137756348, "loss": 0.038636740297079086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.243367234112945, "step_time": 0.45600260925292974} +{"epoch": 0, "iter": 12252, "iter_tflops": 37.27217035835813, "iter_time": 0.5535254135131836, "loss": 0.02402322366833687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.1676882490224, "step_time": 0.5011477298736572} +{"epoch": 0, "iter": 12253, "iter_tflops": 21.882139456964715, "iter_time": 0.9428279876708985, "loss": 0.18245700001716614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.828144627359674, "step_time": 0.8658287849426268} +{"epoch": 0, "iter": 12254, "iter_tflops": 14.407918983066889, "iter_time": 1.4319273681640627, "loss": 0.16757820546627045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.767327872311174, "step_time": 1.0993090572357178} +{"epoch": 0, "iter": 12255, "iter_tflops": 39.81076756767636, "iter_time": 0.5182289810180665, "loss": 0.1218634694814682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.69345800784559, "step_time": 0.4721780891418457} +{"epoch": 0, "iter": 12256, "iter_tflops": 40.251210097268924, "iter_time": 0.5125583419799805, "loss": 0.1615906059741974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10004090326759, "step_time": 0.4678248157501221} +{"epoch": 0, "iter": 12257, "iter_tflops": 31.441225114525505, "iter_time": 0.6561796951293946, "loss": 0.7993852496147156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.91448647487741, "step_time": 0.5909035358428955} +{"epoch": 0, "iter": 12258, "iter_tflops": 39.95435141273334, "iter_time": 0.5163666229248046, "loss": 0.5981201529502869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.741413296295434, "step_time": 0.4716604232788086} +{"epoch": 0, "iter": 12259, "iter_tflops": 40.369966169767956, "iter_time": 0.511050552368164, "loss": 0.5685349702835083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48753665197975, "step_time": 0.46374996376037597} +{"epoch": 0, "iter": 12260, "iter_tflops": 38.66114322128782, "iter_time": 0.5336389923095703, "loss": 0.5423466563224792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17374146294596, "step_time": 0.48919286727905276} +{"epoch": 0, "iter": 12261, "iter_tflops": 16.829670185713447, "iter_time": 1.2258762817382813, "loss": 0.16594770550727844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.92960756223281, "step_time": 1.1506717834472657} +{"epoch": 0, "iter": 12262, "iter_tflops": 19.21785004207741, "iter_time": 1.0735380630493165, "loss": 0.17787352204322815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.112588517763736, "step_time": 0.8926344833374023} +{"epoch": 0, "iter": 12263, "iter_tflops": 41.76255224152164, "iter_time": 0.49400940322875975, "loss": 0.16797320544719696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03844549945456, "step_time": 0.44812750053405764} +{"epoch": 0, "iter": 12264, "iter_tflops": 44.25957227403951, "iter_time": 0.4661385650634765, "loss": 0.2750091254711151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.35629323396249, "step_time": 0.42664753913879394} +{"epoch": 0, "iter": 12265, "iter_tflops": 27.35608668329216, "iter_time": 0.7541683044433594, "loss": 0.8778558373451233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.421942838310635, "step_time": 0.7012145195007324} +{"epoch": 0, "iter": 12266, "iter_tflops": 9.349918822854535, "iter_time": 2.2065532226562503, "loss": 0.6173155307769775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.603372141717703, "step_time": 1.9457105941772461} +{"epoch": 0, "iter": 12267, "iter_tflops": 14.249878201052523, "iter_time": 1.4478084106445315, "loss": 0.7309989929199219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.648590637049598, "step_time": 1.2392096099853516} +{"epoch": 0, "iter": 12268, "iter_tflops": 46.23049001352386, "iter_time": 0.44626594924926755, "loss": 0.7751336693763733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.034142987821674, "step_time": 0.4123402996063233} +{"epoch": 0, "iter": 12269, "iter_tflops": 19.772195222589986, "iter_time": 0.78292138671875, "loss": 0.453811913728714, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 20.815267186723922, "step_time": 0.7436884841918945} +{"epoch": 0, "iter": 12270, "iter_tflops": 9.057637805527412, "iter_time": 1.7090630950927734, "loss": 0.31668156385421753, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.353125610608046, "step_time": 1.495207832336426} +{"epoch": 0, "iter": 12271, "iter_tflops": 26.69538313404574, "iter_time": 0.5798783416748047, "loss": 0.4550774097442627, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.491268991324834, "step_time": 0.5433269577026367} +{"epoch": 0, "iter": 12272, "iter_tflops": 25.913824356537003, "iter_time": 0.5973674240112306, "loss": 0.3826061189174652, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.545596844644592, "step_time": 0.5619799995422363} +{"epoch": 0, "iter": 12273, "iter_tflops": 32.92810985051498, "iter_time": 0.6265495834350585, "loss": 0.2570003271102905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.70441550705716, "step_time": 0.5778303108215331} +{"epoch": 0, "iter": 12274, "iter_tflops": 10.179095985807903, "iter_time": 2.026809997558594, "loss": 0.3112379014492035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.418682794920388, "step_time": 1.8067840118408203} +{"epoch": 0, "iter": 12275, "iter_tflops": 11.503953552422951, "iter_time": 1.793391586303711, "loss": 0.28354886174201965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.465284411622797, "step_time": 1.2530055961608886} +{"epoch": 0, "iter": 12276, "iter_tflops": 20.106811229934895, "iter_time": 1.0260748596191407, "loss": 0.24180686473846436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.883298483626024, "step_time": 0.8638293209075928} +{"epoch": 0, "iter": 12277, "iter_tflops": 10.53255049162813, "iter_time": 1.3417934112548828, "loss": 0.29859471321105957, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 11.32568455087287, "step_time": 1.2478280487060547} +{"epoch": 0, "iter": 12278, "iter_tflops": 13.673160150951901, "iter_time": 1.0335947723388672, "loss": 0.4799554944038391, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 16.710582627982536, "step_time": 0.8457219696044922} +{"epoch": 0, "iter": 12279, "iter_tflops": 25.684589206940007, "iter_time": 0.5502329330444335, "loss": 0.2690798044204712, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.35961283024322, "step_time": 0.5165463027954101} +{"epoch": 0, "iter": 12280, "iter_tflops": 25.99614297056299, "iter_time": 0.5436386032104492, "loss": 0.34041741490364075, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.592807429385417, "step_time": 0.5121808242797852} +{"epoch": 0, "iter": 12281, "iter_tflops": 26.994710570709916, "iter_time": 0.7642642974853516, "loss": 0.2889486849308014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.471935495157137, "step_time": 0.7246115570068359} +{"epoch": 0, "iter": 12282, "iter_tflops": 17.555036735014998, "iter_time": 1.1752236022949218, "loss": 0.3956061601638794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.062436943203274, "step_time": 0.9795207252502443} +{"epoch": 0, "iter": 12283, "iter_tflops": 49.781407526674556, "iter_time": 0.41443371200561524, "loss": 0.34120631217956543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.15583140087257, "step_time": 0.3809579315185547} +{"epoch": 0, "iter": 12284, "iter_tflops": 50.784403758321055, "iter_time": 0.4062486114501953, "loss": 0.29542097449302673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.871327967329684, "step_time": 0.37599041748046874} +{"epoch": 0, "iter": 12285, "iter_tflops": 36.350322023735906, "iter_time": 0.5675628814697264, "loss": 0.3603020906448364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.06845082314151, "step_time": 0.5280755462646485} +{"epoch": 0, "iter": 12286, "iter_tflops": 11.712665367527611, "iter_time": 1.761434555053711, "loss": 0.4938141703605652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.70874285302077, "step_time": 1.402641525268555} +{"epoch": 0, "iter": 12287, "iter_tflops": 12.240810119897835, "iter_time": 1.685435302734375, "loss": 0.6039247512817383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.159507113290683, "step_time": 1.2767155189514159} +{"epoch": 0, "iter": 12288, "iter_tflops": 17.518965652349113, "iter_time": 1.1776433563232422, "loss": 0.5709116458892822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.058315413284426, "step_time": 0.8947355060577392} +{"epoch": 0, "iter": 12289, "iter_tflops": 14.983509663875454, "iter_time": 1.1151990051269531, "loss": 0.3828592896461487, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 16.113172870777053, "step_time": 1.0370145721435544} +{"epoch": 0, "iter": 12290, "iter_tflops": 14.86487610356416, "iter_time": 1.1240991821289061, "loss": 0.3203328549861908, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 16.515467762563, "step_time": 1.0117542724609374} +{"epoch": 0, "iter": 12291, "iter_tflops": 29.89783911037763, "iter_time": 0.5588897247314453, "loss": 0.3626917004585266, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 31.880681876146664, "step_time": 0.5241291618347167} +{"epoch": 0, "iter": 12292, "iter_tflops": 29.315624488615015, "iter_time": 0.5699893951416015, "loss": 0.3222610354423523, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 31.214933396737155, "step_time": 0.5353077278137207} +{"epoch": 0, "iter": 12293, "iter_tflops": 27.4761371058199, "iter_time": 0.750873146057129, "loss": 0.6695572733879089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.13544748264623, "step_time": 0.7081097183227538} +{"epoch": 0, "iter": 12294, "iter_tflops": 14.803120571773304, "iter_time": 1.3936989440917968, "loss": 0.4909391403198242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.773778289555697, "step_time": 1.2299610233306884} +{"epoch": 0, "iter": 12295, "iter_tflops": 35.103568188536364, "iter_time": 0.5877206954956056, "loss": 0.6701045036315918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21522571810071, "step_time": 0.5398658027648926} +{"epoch": 0, "iter": 12296, "iter_tflops": 41.08425326089168, "iter_time": 0.5021654739379883, "loss": 0.5908719897270203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93966092874434, "step_time": 0.45908431625366214} +{"epoch": 0, "iter": 12297, "iter_tflops": 34.38295016705835, "iter_time": 0.6000384902954101, "loss": 0.3624793291091919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.589909116250276, "step_time": 0.5488465919494628} +{"epoch": 0, "iter": 12298, "iter_tflops": 46.0675299720836, "iter_time": 0.44784457778930664, "loss": 0.44858548045158386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.10284865618244, "step_time": 0.41177486038208} +{"epoch": 0, "iter": 12299, "iter_tflops": 52.16767231081898, "iter_time": 0.3954765968322753, "loss": 0.6132457852363586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.76954199233917, "step_time": 0.36341835403442385} +{"epoch": 0, "iter": 12300, "iter_tflops": 46.3329583575806, "iter_time": 0.44527900314331054, "loss": 0.46372008323669434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.13989886225535, "step_time": 0.41147058486938476} +{"epoch": 0, "iter": 12301, "iter_tflops": 28.522663555420415, "iter_time": 0.7233228225708008, "loss": 0.5738843679428101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.258999187408868, "step_time": 0.6818167839050293} +{"epoch": 0, "iter": 12302, "iter_tflops": 25.289906782250178, "iter_time": 0.8157836914062501, "loss": 0.6659884452819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.241915014083865, "step_time": 0.7305132637023927} +{"epoch": 0, "iter": 12303, "iter_tflops": 37.37624856070973, "iter_time": 0.5519840621948242, "loss": 0.6463093757629395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73711561700318, "step_time": 0.5064446315765381} +{"epoch": 0, "iter": 12304, "iter_tflops": 37.228330718369975, "iter_time": 0.5541772384643555, "loss": 0.741231381893158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.526071855524, "step_time": 0.509081995010376} +{"epoch": 0, "iter": 12305, "iter_tflops": 19.20453148959769, "iter_time": 0.786900032043457, "loss": 0.2049356997013092, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 20.61472855085319, "step_time": 0.7330703582763672} +{"epoch": 0, "iter": 12306, "iter_tflops": 6.608384372073156, "iter_time": 2.2867989501953128, "loss": 0.45800480246543884, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 7.5296512037381085, "step_time": 2.0070048446655275} +{"epoch": 0, "iter": 12307, "iter_tflops": 6.779696830856762, "iter_time": 2.2290150756835936, "loss": 0.4855133295059204, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 8.326302542588767, "step_time": 1.8149768600463867} +{"epoch": 0, "iter": 12308, "iter_tflops": 21.15786935609848, "iter_time": 0.7142518081665039, "loss": 0.34608352184295654, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 22.793687453564566, "step_time": 0.6629926147460938} +{"epoch": 0, "iter": 12309, "iter_tflops": 22.89656635736319, "iter_time": 0.7172397308349611, "loss": 0.3013761043548584, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 24.930949829418342, "step_time": 0.6587124519348144} +{"epoch": 0, "iter": 12310, "iter_tflops": 24.632482105494923, "iter_time": 0.6666939620971679, "loss": 0.2891559898853302, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.563467601484074, "step_time": 0.618229793548584} +{"epoch": 0, "iter": 12311, "iter_tflops": 23.526463137755357, "iter_time": 0.698036376953125, "loss": 0.21762438118457794, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 25.3239246279927, "step_time": 0.6484906005859375} +{"epoch": 0, "iter": 12312, "iter_tflops": 27.111857055155482, "iter_time": 0.6057249069213868, "loss": 0.3486713469028473, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 28.95414193889633, "step_time": 0.5671840362548828} +{"epoch": 0, "iter": 12313, "iter_tflops": 12.959061537992168, "iter_time": 1.5920206451416017, "loss": 0.0033659907057881355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.937064213340282, "step_time": 1.4803041152954102} +{"epoch": 0, "iter": 12314, "iter_tflops": 22.2496957266207, "iter_time": 0.9272528381347656, "loss": 0.00818326324224472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.894282248433214, "step_time": 0.7396172924041747} +{"epoch": 0, "iter": 12315, "iter_tflops": 57.81520169985523, "iter_time": 0.3568454818725586, "loss": 0.005064700730144978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.80311515438671, "step_time": 0.32335558319091795} +{"epoch": 0, "iter": 12316, "iter_tflops": 55.13453813841093, "iter_time": 0.37419545364379886, "loss": 0.005297977011650801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.162813145982305, "step_time": 0.34292102432250976} +{"epoch": 0, "iter": 12317, "iter_tflops": 29.67538079500547, "iter_time": 0.6952259063720704, "loss": 0.41911065578460693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.549533823042598, "step_time": 0.6539270477294923} +{"epoch": 0, "iter": 12318, "iter_tflops": 9.665854810729238, "iter_time": 2.134430313110352, "loss": 0.362114280462265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.391549920466646, "step_time": 1.81108748626709} +{"epoch": 0, "iter": 12319, "iter_tflops": 10.325343983927848, "iter_time": 1.9981022949218752, "loss": 0.3454653024673462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.658496176897849, "step_time": 1.6298218383789063} +{"epoch": 0, "iter": 12320, "iter_tflops": 27.049395505110375, "iter_time": 0.7627192077636717, "loss": 0.3766445517539978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48595816380561, "step_time": 0.5224919052124024} +{"epoch": 0, "iter": 12321, "iter_tflops": 18.575905469739645, "iter_time": 0.848765235900879, "loss": 0.3226163983345032, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 19.56795906132694, "step_time": 0.8057346572875976} +{"epoch": 0, "iter": 12322, "iter_tflops": 10.110375411350045, "iter_time": 1.559445831298828, "loss": 0.32685232162475586, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 12.585606395308323, "step_time": 1.2527471694946288} +{"epoch": 0, "iter": 12323, "iter_tflops": 23.54945911026705, "iter_time": 0.6695093383789062, "loss": 0.38089144229888916, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 25.394853811357212, "step_time": 0.6208573951721191} +{"epoch": 0, "iter": 12324, "iter_tflops": 27.216950010095193, "iter_time": 0.5792927856445313, "loss": 0.3458642363548279, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 29.00087347807726, "step_time": 0.543658893585205} +{"epoch": 0, "iter": 12325, "iter_tflops": 29.40359182465009, "iter_time": 0.7016521530151367, "loss": 0.008141836151480675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.549634495909338, "step_time": 0.6539249610900879} +{"epoch": 0, "iter": 12326, "iter_tflops": 21.235410668832643, "iter_time": 0.971542007446289, "loss": 0.06993620842695236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.552731918695862, "step_time": 0.8073928680419922} +{"epoch": 0, "iter": 12327, "iter_tflops": 52.02549870481802, "iter_time": 0.39655734252929686, "loss": 0.03759695217013359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.95661176747316, "step_time": 0.36222473335266114} +{"epoch": 0, "iter": 12328, "iter_tflops": 54.01119251485544, "iter_time": 0.3819781150817871, "loss": 0.043998733162879944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.17031659292399, "step_time": 0.3486730289459229} +{"epoch": 0, "iter": 12329, "iter_tflops": 46.76435569471508, "iter_time": 0.44117134094238286, "loss": 0.006680634804069996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13335060107604, "step_time": 0.40347626876831055} +{"epoch": 0, "iter": 12330, "iter_tflops": 23.434759461264377, "iter_time": 0.8803629302978515, "loss": 0.010874506086111069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66169811328745, "step_time": 0.7198140678405762} +{"epoch": 0, "iter": 12331, "iter_tflops": 55.33785561206699, "iter_time": 0.3728206176757812, "loss": 0.012923013418912888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.890377610922165, "step_time": 0.33882354354858396} +{"epoch": 0, "iter": 12332, "iter_tflops": 57.01109424890579, "iter_time": 0.3618785743713379, "loss": 0.0154417734593153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.71230620635077, "step_time": 0.32897998428344727} +{"epoch": 0, "iter": 12333, "iter_tflops": 51.45074233102566, "iter_time": 0.40098728561401364, "loss": 0.08529991656541824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.71061067366214, "step_time": 0.3637960033416748} +{"epoch": 0, "iter": 12334, "iter_tflops": 46.288608586794666, "iter_time": 0.44570563125610346, "loss": 0.020751215517520905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.64969240895656, "step_time": 0.40732909774780274} +{"epoch": 0, "iter": 12335, "iter_tflops": 51.79993271119267, "iter_time": 0.39828417587280274, "loss": 0.014669610187411308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.48512284655604, "step_time": 0.36524827194213866} +{"epoch": 0, "iter": 12336, "iter_tflops": 50.31142017960823, "iter_time": 0.41006780242919916, "loss": 0.046123553067445755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.13101366409665, "step_time": 0.37421937561035157} +{"epoch": 0, "iter": 12337, "iter_tflops": 28.473232513364366, "iter_time": 0.46917628860473626, "loss": 0.018491044640541077, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 31.2291361491353, "step_time": 0.42777249717712396} +{"epoch": 0, "iter": 12338, "iter_tflops": 5.161388514461954, "iter_time": 2.5882503356933593, "loss": 0.03191960230469704, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 6.308044875439888, "step_time": 2.117766410827637} +{"epoch": 0, "iter": 12339, "iter_tflops": 8.40682077075724, "iter_time": 1.5890627288818358, "loss": 0.0636044591665268, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 10.890177577742175, "step_time": 1.2266985969543456} +{"epoch": 0, "iter": 12340, "iter_tflops": 16.267305684598824, "iter_time": 0.8212156219482422, "loss": 0.06742507219314575, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 20.11888831360288, "step_time": 0.6640011787414551} +{"epoch": 0, "iter": 12341, "iter_tflops": 15.95997941437192, "iter_time": 1.05211376953125, "loss": 0.1917642056941986, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 17.305732870147473, "step_time": 0.9702977752685547} +{"epoch": 0, "iter": 12342, "iter_tflops": 12.37099571609864, "iter_time": 1.357345397949219, "loss": 0.35312387347221375, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 13.861077091827584, "step_time": 1.21142924118042} +{"epoch": 0, "iter": 12343, "iter_tflops": 27.5696418932017, "iter_time": 0.6090653686523437, "loss": 0.19272156059741974, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 29.25730841408351, "step_time": 0.5739322929382324} +{"epoch": 0, "iter": 12344, "iter_tflops": 28.92448790473775, "iter_time": 0.580536262512207, "loss": 0.2760379910469055, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 30.647499597916347, "step_time": 0.5478983383178712} +{"epoch": 0, "iter": 12345, "iter_tflops": 30.952780899079226, "iter_time": 0.6665344085693359, "loss": 0.5661031007766724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.90585336090289, "step_time": 0.6269733619689941} +{"epoch": 0, "iter": 12346, "iter_tflops": 11.337234770231197, "iter_time": 1.81976416015625, "loss": 0.570965588092804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.99340838048409, "step_time": 1.3760109100341797} +{"epoch": 0, "iter": 12347, "iter_tflops": 36.9124801929426, "iter_time": 0.5589191894531249, "loss": 0.5569344162940979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.542407263519586, "step_time": 0.508876874923706} +{"epoch": 0, "iter": 12348, "iter_tflops": 38.629023947583654, "iter_time": 0.5340827026367188, "loss": 0.5690649747848511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.027700590652636, "step_time": 0.4908927497863769} +{"epoch": 0, "iter": 12349, "iter_tflops": 32.1471499908527, "iter_time": 0.6417705307006836, "loss": 0.5711855888366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.76491728625286, "step_time": 0.5768528232574464} +{"epoch": 0, "iter": 12350, "iter_tflops": 38.529344408980656, "iter_time": 0.5354644317626953, "loss": 0.5989254713058472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38414070943424, "step_time": 0.48676446342468266} +{"epoch": 0, "iter": 12351, "iter_tflops": 37.653307970105296, "iter_time": 0.5479224700927735, "loss": 0.6342432498931885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.28004547746235, "step_time": 0.49978369140625} +{"epoch": 0, "iter": 12352, "iter_tflops": 40.38075757414106, "iter_time": 0.5109139785766601, "loss": 0.5489616394042969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99996958195047, "step_time": 0.4688888130187988} +{"epoch": 0, "iter": 12353, "iter_tflops": 18.162174567650734, "iter_time": 1.1359374084472655, "loss": 0.4334525763988495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.290960333197507, "step_time": 1.0694694900512696} +{"epoch": 0, "iter": 12354, "iter_tflops": 21.121889800191973, "iter_time": 0.9767636184692383, "loss": 0.5833185315132141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.739548614773245, "step_time": 0.8690600585937498} +{"epoch": 0, "iter": 12355, "iter_tflops": 47.9775719532476, "iter_time": 0.43001537322998046, "loss": 0.5294236540794373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.669667827749066, "step_time": 0.39928829383850095} +{"epoch": 0, "iter": 12356, "iter_tflops": 48.07216275819741, "iter_time": 0.4291692390441895, "loss": 0.5777681469917297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14098064027981, "step_time": 0.39567904663085934} +{"epoch": 0, "iter": 12357, "iter_tflops": 30.35961876575987, "iter_time": 0.6795570678710937, "loss": 0.7878062129020691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.23073872036047, "step_time": 0.6401061325073243} +{"epoch": 0, "iter": 12358, "iter_tflops": 8.62833384347787, "iter_time": 2.391086608886719, "loss": 0.5542132258415222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.360798811790312, "step_time": 1.9912647552490235} +{"epoch": 0, "iter": 12359, "iter_tflops": 14.834456025987187, "iter_time": 1.3907549743652345, "loss": 0.6988027095794678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.365610113040184, "step_time": 1.1880431137084961} +{"epoch": 0, "iter": 12360, "iter_tflops": 17.917972232361493, "iter_time": 1.1514189910888672, "loss": 0.561096727848053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.74849459876865, "step_time": 0.948621681213379} +{"epoch": 0, "iter": 12361, "iter_tflops": 11.540491585443284, "iter_time": 1.2882386016845704, "loss": 0.4221067428588867, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 12.20838962841033, "step_time": 1.2177614898681641} +{"epoch": 0, "iter": 12362, "iter_tflops": 15.266449139941079, "iter_time": 0.973828727722168, "loss": 0.2489272654056549, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 19.495161381951323, "step_time": 0.7625946998596191} +{"epoch": 0, "iter": 12363, "iter_tflops": 27.725826671583963, "iter_time": 0.5362114868164062, "loss": 0.4531393051147461, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 29.521860482795283, "step_time": 0.5035897636413574} +{"epoch": 0, "iter": 12364, "iter_tflops": 25.5782864484022, "iter_time": 0.5812315368652343, "loss": 0.3133887052536011, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.271745891962176, "step_time": 0.5451395301818848} +{"epoch": 0, "iter": 12365, "iter_tflops": 23.91692378485074, "iter_time": 0.8626148452758788, "loss": 0.01274170633405447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.10970167972185, "step_time": 0.8216383361816406} +{"epoch": 0, "iter": 12366, "iter_tflops": 18.294200847020473, "iter_time": 1.1277395324707031, "loss": 0.00987340696156025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.754115292851488, "step_time": 0.9066972389221191} +{"epoch": 0, "iter": 12367, "iter_tflops": 52.366577462704996, "iter_time": 0.3939744491577149, "loss": 0.009211898781359196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.53216492366767, "step_time": 0.3586010284423828} +{"epoch": 0, "iter": 12368, "iter_tflops": 56.673759068389955, "iter_time": 0.3640325584411621, "loss": 0.009376738220453262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.24829508752108, "step_time": 0.3314322662353515} +{"epoch": 0, "iter": 12369, "iter_tflops": 31.860776660478553, "iter_time": 0.3963402519226074, "loss": 0.01134487520903349, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 35.60663372021431, "step_time": 0.3546448211669922} +{"epoch": 0, "iter": 12370, "iter_tflops": 30.415544832933804, "iter_time": 0.4151728439331055, "loss": 0.003004426369443536, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 33.91418069169724, "step_time": 0.3723430137634277} +{"epoch": 0, "iter": 12371, "iter_tflops": 33.51654664073219, "iter_time": 0.37676042175292973, "loss": 0.01286823395639658, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 36.953130943113294, "step_time": 0.34172228240966795} +{"epoch": 0, "iter": 12372, "iter_tflops": 37.20285479704792, "iter_time": 0.33942847442626956, "loss": 0.008045286871492863, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 40.93796778150073, "step_time": 0.30845957756042486} +{"epoch": 0, "iter": 12373, "iter_tflops": 29.44501599951536, "iter_time": 0.7006650466918944, "loss": 0.24859145283699036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.424245120963032, "step_time": 0.6565342597961425} +{"epoch": 0, "iter": 12374, "iter_tflops": 14.26094504733151, "iter_time": 1.4466848754882813, "loss": 0.21466931700706482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.992157623134936, "step_time": 1.214153844833374} +{"epoch": 0, "iter": 12375, "iter_tflops": 37.93588124456276, "iter_time": 0.5438411560058594, "loss": 0.23948994278907776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55591184731236, "step_time": 0.4964659080505372} +{"epoch": 0, "iter": 12376, "iter_tflops": 39.47145061419601, "iter_time": 0.5226839447021484, "loss": 0.2593025267124176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2105267667168, "step_time": 0.477455265045166} +{"epoch": 0, "iter": 12377, "iter_tflops": 35.45463772628271, "iter_time": 0.5819011230468749, "loss": 0.5918043851852417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33355183790594, "step_time": 0.5245164127349853} +{"epoch": 0, "iter": 12378, "iter_tflops": 36.62224234800665, "iter_time": 0.5633487243652343, "loss": 0.5010305643081665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.47497315667432, "step_time": 0.5097246990203858} +{"epoch": 0, "iter": 12379, "iter_tflops": 36.554125433873565, "iter_time": 0.5643984985351563, "loss": 0.6286554336547852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.982772909710874, "step_time": 0.5159995670318605} +{"epoch": 0, "iter": 12380, "iter_tflops": 34.60860949681556, "iter_time": 0.5961260452270507, "loss": 0.46992015838623047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.77228331181208, "step_time": 0.5461966209411622} +{"epoch": 0, "iter": 12381, "iter_tflops": 31.795045368791897, "iter_time": 0.6488776245117187, "loss": 0.1710175722837448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.88669219702668, "step_time": 0.591374309539795} +{"epoch": 0, "iter": 12382, "iter_tflops": 38.7553943699885, "iter_time": 0.5323412094116211, "loss": 0.1988610327243805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60938621360371, "step_time": 0.4841912860870361} +{"epoch": 0, "iter": 12383, "iter_tflops": 43.4013495111507, "iter_time": 0.47535603713989255, "loss": 0.157072514295578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.7652823274613, "step_time": 0.43192654800415037} +{"epoch": 0, "iter": 12384, "iter_tflops": 43.52534139331565, "iter_time": 0.47400187683105466, "loss": 0.2567906081676483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.436380956193005, "step_time": 0.4349213218688965} +{"epoch": 0, "iter": 12385, "iter_tflops": 21.715536473143327, "iter_time": 0.9500614242553711, "loss": 0.124160535633564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.350724444899072, "step_time": 0.8835311965942383} +{"epoch": 0, "iter": 12386, "iter_tflops": 20.886356361730492, "iter_time": 0.9877784881591797, "loss": 0.07537658512592316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.397689811961996, "step_time": 0.8817577152252196} +{"epoch": 0, "iter": 12387, "iter_tflops": 39.3243554374226, "iter_time": 0.5246390762329102, "loss": 0.12102456390857697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.136681558648455, "step_time": 0.47827261543273925} +{"epoch": 0, "iter": 12388, "iter_tflops": 43.52533438760609, "iter_time": 0.474001953125, "loss": 0.15513299405574799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.702715433914165, "step_time": 0.4324930629730225} +{"epoch": 0, "iter": 12389, "iter_tflops": 18.330417710938192, "iter_time": 1.1255113677978517, "loss": 0.6345952153205872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.644314989624398, "step_time": 1.0502322692871093} +{"epoch": 0, "iter": 12390, "iter_tflops": 22.155854327101057, "iter_time": 0.9311802291870117, "loss": 0.6243546009063721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.086878584428188, "step_time": 0.7616637496948243} +{"epoch": 0, "iter": 12391, "iter_tflops": 40.9560506035562, "iter_time": 0.503737377166748, "loss": 0.8418845534324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13373477597452, "step_time": 0.4674676551818847} +{"epoch": 0, "iter": 12392, "iter_tflops": 44.55982814035017, "iter_time": 0.46299760055541994, "loss": 0.6738560199737549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74886391762469, "step_time": 0.43207506561279296} +{"epoch": 0, "iter": 12393, "iter_tflops": 19.275324634225854, "iter_time": 0.8137190933227537, "loss": 0.009135471656918526, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.267619807280266, "step_time": 0.7738797073364259} +{"epoch": 0, "iter": 12394, "iter_tflops": 12.30329233790498, "iter_time": 1.2748376007080078, "loss": 0.025416310876607895, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 17.596417648877047, "step_time": 0.8913575477600097} +{"epoch": 0, "iter": 12395, "iter_tflops": 37.28102572414846, "iter_time": 0.42071534729003907, "loss": 0.015412564389407635, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 41.11452400015692, "step_time": 0.381488052368164} +{"epoch": 0, "iter": 12396, "iter_tflops": 31.62357136362235, "iter_time": 0.4959812889099121, "loss": 0.0025179805234074593, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 34.98476889994066, "step_time": 0.44832937812805174} +{"epoch": 0, "iter": 12397, "iter_tflops": 23.182428227325616, "iter_time": 0.8899453201293945, "loss": 0.3048609495162964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.86943966468688, "step_time": 0.8295761299133302} +{"epoch": 0, "iter": 12398, "iter_tflops": 28.950903920849655, "iter_time": 0.7126234664916993, "loss": 0.24181263148784637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.12537704487107, "step_time": 0.6422054901123047} +{"epoch": 0, "iter": 12399, "iter_tflops": 47.98730385396261, "iter_time": 0.42992816543579104, "loss": 0.3347475528717041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99748047306038, "step_time": 0.3967710227966309} +{"epoch": 0, "iter": 12400, "iter_tflops": 47.65121765393739, "iter_time": 0.43296046829223633, "loss": 0.12740187346935272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.51868025425007, "step_time": 0.40045850181579584} +{"epoch": 0, "iter": 12401, "iter_tflops": 32.11231392886775, "iter_time": 0.6424667358398437, "loss": 0.27882543206214905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.29812462938933, "step_time": 0.6015224952697754} +{"epoch": 0, "iter": 12402, "iter_tflops": 17.763319708153144, "iter_time": 1.1614435729980468, "loss": 0.2106279879808426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.865315495549314, "step_time": 0.9022877254486084} +{"epoch": 0, "iter": 12403, "iter_tflops": 37.21921718239939, "iter_time": 0.5543129348754883, "loss": 0.178892582654953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85281499070728, "step_time": 0.5050103282928466} +{"epoch": 0, "iter": 12404, "iter_tflops": 35.679803644894946, "iter_time": 0.5782288970947266, "loss": 0.2992374897003174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.800676869389335, "step_time": 0.5317199382781982} +{"epoch": 0, "iter": 12405, "iter_tflops": 19.40660056227896, "iter_time": 1.0630967254638672, "loss": 0.33065763115882874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.48014494185249, "step_time": 1.0073704833984376} +{"epoch": 0, "iter": 12406, "iter_tflops": 17.7587885744295, "iter_time": 1.1617399139404294, "loss": 0.3076547384262085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.678547235534715, "step_time": 0.951682476043701} +{"epoch": 0, "iter": 12407, "iter_tflops": 38.14246016845673, "iter_time": 0.5408957214355468, "loss": 0.35486623644828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8070392237803, "step_time": 0.4934837265014648} +{"epoch": 0, "iter": 12408, "iter_tflops": 41.70699534582637, "iter_time": 0.4946674613952637, "loss": 0.26390472054481506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.694423759016466, "step_time": 0.4515013389587403} +{"epoch": 0, "iter": 12409, "iter_tflops": 25.059828112656177, "iter_time": 0.8232735443115236, "loss": 0.6915609240531921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.05472709601922, "step_time": 0.7625689010620117} +{"epoch": 0, "iter": 12410, "iter_tflops": 9.8891037517833, "iter_time": 2.0862450256347658, "loss": 0.6322633624076843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.84708751857127, "step_time": 1.9019938278198243} +{"epoch": 0, "iter": 12411, "iter_tflops": 11.911778198233762, "iter_time": 1.7319910736083985, "loss": 0.7505683898925781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.302641872107941, "step_time": 1.4424673213958739} +{"epoch": 0, "iter": 12412, "iter_tflops": 36.32261362728633, "iter_time": 0.5679958419799804, "loss": 0.7674131393432617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76250281152253, "step_time": 0.5188580207824708} +{"epoch": 0, "iter": 12413, "iter_tflops": 15.162926694947682, "iter_time": 1.0128226776123046, "loss": 0.3034936189651489, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.351938417407062, "step_time": 0.9391764831542969} +{"epoch": 0, "iter": 12414, "iter_tflops": 9.398621865348003, "iter_time": 1.6340008392333982, "loss": 0.31691625714302063, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.050113989833806, "step_time": 1.3897916374206543} +{"epoch": 0, "iter": 12415, "iter_tflops": 22.370841724676065, "iter_time": 0.686489860534668, "loss": 0.3511916995048523, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.061049184079263, "step_time": 0.6382662658691406} +{"epoch": 0, "iter": 12416, "iter_tflops": 22.379589402456602, "iter_time": 0.6862215270996094, "loss": 0.2497400939464569, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.145275837376033, "step_time": 0.6360397834777832} +{"epoch": 0, "iter": 12417, "iter_tflops": 20.096242783931412, "iter_time": 1.0266144638061525, "loss": 0.3710190951824188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.623779091334924, "step_time": 0.9540928726196289} +{"epoch": 0, "iter": 12418, "iter_tflops": 17.081504660871083, "iter_time": 1.2078030548095704, "loss": 0.34014469385147095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.167709122993113, "step_time": 0.9306822547912598} +{"epoch": 0, "iter": 12419, "iter_tflops": 39.616929825358426, "iter_time": 0.5207645721435546, "loss": 0.35371074080467224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73260844580986, "step_time": 0.4717553844451904} +{"epoch": 0, "iter": 12420, "iter_tflops": 43.52033813964668, "iter_time": 0.4740563697814941, "loss": 0.33588358759880066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59025597169924, "step_time": 0.43351507759094243} +{"epoch": 0, "iter": 12421, "iter_tflops": 16.559607018034594, "iter_time": 1.2458685455322265, "loss": 0.02339143119752407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.71195630571125, "step_time": 1.1648116760253906} +{"epoch": 0, "iter": 12422, "iter_tflops": 17.333796775118387, "iter_time": 1.1902235717773437, "loss": 0.02378181926906109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.493957711224922, "step_time": 0.9598554992675781} +{"epoch": 0, "iter": 12423, "iter_tflops": 51.15036237473026, "iter_time": 0.4033420791625977, "loss": 0.044674210250377655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.318729402429845, "step_time": 0.3663273963928222} +{"epoch": 0, "iter": 12424, "iter_tflops": 57.029259102781744, "iter_time": 0.36176330947875973, "loss": 0.0317806676030159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.20988638278573, "step_time": 0.33163689422607423} +{"epoch": 0, "iter": 12425, "iter_tflops": 27.569832878825792, "iter_time": 0.7483213119506836, "loss": 0.9725203514099121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.070073972778754, "step_time": 0.709702133178711} +{"epoch": 0, "iter": 12426, "iter_tflops": 14.445796816628079, "iter_time": 1.4281727600097656, "loss": 0.567712128162384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.727248797518687, "step_time": 1.233382354736328} +{"epoch": 0, "iter": 12427, "iter_tflops": 34.052830805526156, "iter_time": 0.6058554611206055, "loss": 0.7356300950050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62357909667689, "step_time": 0.4956588058471679} +{"epoch": 0, "iter": 12428, "iter_tflops": 39.80913008216342, "iter_time": 0.5182502975463867, "loss": 0.6037171483039856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.31105671974549, "step_time": 0.47634703636169434} +{"epoch": 0, "iter": 12429, "iter_tflops": 18.94394840230199, "iter_time": 1.0890598449707032, "loss": 0.21621257066726685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.15051438071077, "step_time": 1.0238494720458986} +{"epoch": 0, "iter": 12430, "iter_tflops": 15.793206732132104, "iter_time": 1.3063270721435547, "loss": 0.2628818154335022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.38450180057065, "step_time": 1.0120970191955565} +{"epoch": 0, "iter": 12431, "iter_tflops": 35.98475113974871, "iter_time": 0.5733287811279297, "loss": 0.2079450935125351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.423885486888075, "step_time": 0.5233145656585694} +{"epoch": 0, "iter": 12432, "iter_tflops": 40.03155628763206, "iter_time": 0.5153707580566406, "loss": 0.218634232878685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.613219275839036, "step_time": 0.47304679298400876} +{"epoch": 0, "iter": 12433, "iter_tflops": 30.5150417696006, "iter_time": 0.6760958633422851, "loss": 0.025106212124228477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.047119033762144, "step_time": 0.6059570999145507} +{"epoch": 0, "iter": 12434, "iter_tflops": 42.56571126673751, "iter_time": 0.48468809509277344, "loss": 0.002014179015532136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53506878586326, "step_time": 0.4340183792114258} +{"epoch": 0, "iter": 12435, "iter_tflops": 42.25615645463893, "iter_time": 0.48823876190185544, "loss": 0.009748795069754124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.779353145362016, "step_time": 0.4410299015045166} +{"epoch": 0, "iter": 12436, "iter_tflops": 44.08222846130345, "iter_time": 0.4680138511657715, "loss": 0.004085162654519081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.819832892723106, "step_time": 0.4225965614318848} +{"epoch": 0, "iter": 12437, "iter_tflops": 20.54437833997759, "iter_time": 1.0042208709716798, "loss": 0.4027958810329437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.169735063204634, "step_time": 0.9305972061157228} +{"epoch": 0, "iter": 12438, "iter_tflops": 18.19974567456734, "iter_time": 1.1335924072265626, "loss": 0.4259425103664398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.681305901854554, "step_time": 0.8711974582672118} +{"epoch": 0, "iter": 12439, "iter_tflops": 47.189792263085444, "iter_time": 0.43719398880004884, "loss": 0.5225604176521301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30118709058423, "step_time": 0.40215625953674317} +{"epoch": 0, "iter": 12440, "iter_tflops": 48.18924546452642, "iter_time": 0.42812651062011725, "loss": 0.5148102641105652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.121059796455214, "step_time": 0.39583027648925784} +{"epoch": 0, "iter": 12441, "iter_tflops": 32.40123488623629, "iter_time": 0.6367378768920898, "loss": 0.07127957046031952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.38063505064058, "step_time": 0.6000788955688476} +{"epoch": 0, "iter": 12442, "iter_tflops": 12.955940727642446, "iter_time": 1.5924041290283202, "loss": 0.029967548325657845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.493125486926806, "step_time": 1.331628890991211} +{"epoch": 0, "iter": 12443, "iter_tflops": 51.915059670432356, "iter_time": 0.39740093994140624, "loss": 0.08490405976772308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.113286639306956, "step_time": 0.3612310676574707} +{"epoch": 0, "iter": 12444, "iter_tflops": 52.42552938484656, "iter_time": 0.3935314292907715, "loss": 0.07503753900527954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.2580856657336, "step_time": 0.36031755638122553} +{"epoch": 0, "iter": 12445, "iter_tflops": 27.413856919455096, "iter_time": 0.7525790176391601, "loss": 0.8836584687232971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.952026914400342, "step_time": 0.7125958251953125} +{"epoch": 0, "iter": 12446, "iter_tflops": 13.843453755823422, "iter_time": 1.4903140411376952, "loss": 0.7101678252220154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.516274425459642, "step_time": 1.1778242912292483} +{"epoch": 0, "iter": 12447, "iter_tflops": 39.73498042052653, "iter_time": 0.5192174072265624, "loss": 0.7301235795021057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.775637925435845, "step_time": 0.4823094291687012} +{"epoch": 0, "iter": 12448, "iter_tflops": 42.247397938941596, "iter_time": 0.4883399810791015, "loss": 0.677539587020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37644230389754, "step_time": 0.4546652946472168} +{"epoch": 0, "iter": 12449, "iter_tflops": 30.5570498854635, "iter_time": 0.6751664047241211, "loss": 0.48670196533203125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.43347599361191, "step_time": 0.6361049156188965} +{"epoch": 0, "iter": 12450, "iter_tflops": 9.707781367521235, "iter_time": 2.1252120056152344, "loss": 0.5913506150245667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.42792048340409, "step_time": 1.8053235092163087} +{"epoch": 0, "iter": 12451, "iter_tflops": 14.539373693020321, "iter_time": 1.4189808959960937, "loss": 0.5637441277503967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.160039110419394, "step_time": 1.2022754364013672} +{"epoch": 0, "iter": 12452, "iter_tflops": 17.680719026901933, "iter_time": 1.1668695983886719, "loss": 0.5353057384490967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.14839823647252, "step_time": 0.9755392951965333} +{"epoch": 0, "iter": 12453, "iter_tflops": 19.646764749382022, "iter_time": 0.7962514648437501, "loss": 0.33391058444976807, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 20.742876411456542, "step_time": 0.7541753082275391} +{"epoch": 0, "iter": 12454, "iter_tflops": 11.127846501452344, "iter_time": 1.4058214416503905, "loss": 0.35260945558547974, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 12.04175092537154, "step_time": 1.2991271209716797} +{"epoch": 0, "iter": 12455, "iter_tflops": 27.709866423838715, "iter_time": 0.564555778503418, "loss": 0.24987101554870605, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.58606936826951, "step_time": 0.5287544288635254} +{"epoch": 0, "iter": 12456, "iter_tflops": 27.998662889060096, "iter_time": 0.5587325820922852, "loss": 0.38167938590049744, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.74662799447606, "step_time": 0.5259004554748535} +{"epoch": 0, "iter": 12457, "iter_tflops": 42.09991926851441, "iter_time": 0.490050666809082, "loss": 0.23121345043182373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71012862575028, "step_time": 0.4513462142944336} +{"epoch": 0, "iter": 12458, "iter_tflops": 45.601015901360256, "iter_time": 0.4524261817932129, "loss": 0.2924943268299103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69491407844859, "step_time": 0.415155029296875} +{"epoch": 0, "iter": 12459, "iter_tflops": 47.27091825260431, "iter_time": 0.43644367980957033, "loss": 0.32202616333961487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49084748793003, "step_time": 0.4006749649047851} +{"epoch": 0, "iter": 12460, "iter_tflops": 49.715615213600344, "iter_time": 0.4149821624755859, "loss": 0.286600798368454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85607777097349, "step_time": 0.3830782775878906} +{"epoch": 0, "iter": 12461, "iter_tflops": 50.85054464453607, "iter_time": 0.4057202072143555, "loss": 0.11621571332216263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.98012641122545, "step_time": 0.36854317474365234} +{"epoch": 0, "iter": 12462, "iter_tflops": 51.056369410759594, "iter_time": 0.40408461761474607, "loss": 0.07656804472208023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.731838019386835, "step_time": 0.3701850547790527} +{"epoch": 0, "iter": 12463, "iter_tflops": 48.03414971784338, "iter_time": 0.4295088729858399, "loss": 0.05039076507091522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.260018737742705, "step_time": 0.3947777671813965} +{"epoch": 0, "iter": 12464, "iter_tflops": 52.517183823333774, "iter_time": 0.39284462738037107, "loss": 0.06034982576966286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.92555137021844, "step_time": 0.3624223747253419} +{"epoch": 0, "iter": 12465, "iter_tflops": 38.1909404254116, "iter_time": 0.5402090988159179, "loss": 0.01842862367630005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.162976245578214, "step_time": 0.5012050971984863} +{"epoch": 0, "iter": 12466, "iter_tflops": 12.201241277838596, "iter_time": 1.6909011993408205, "loss": 0.012408741749823093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54174609665172, "step_time": 1.2472137699127197} +{"epoch": 0, "iter": 12467, "iter_tflops": 39.743710564035766, "iter_time": 0.5191033554077148, "loss": 0.031721316277980804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.820324857783845, "step_time": 0.47081105804443363} +{"epoch": 0, "iter": 12468, "iter_tflops": 48.271934275648825, "iter_time": 0.427393138885498, "loss": 0.0494280606508255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15992410532508, "step_time": 0.3880948638916016} +{"epoch": 0, "iter": 12469, "iter_tflops": 28.800096898135696, "iter_time": 0.7163550033569336, "loss": 0.10189507156610489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.33410299651735, "step_time": 0.6584229812622071} +{"epoch": 0, "iter": 12470, "iter_tflops": 37.198462711893214, "iter_time": 0.5546222076416015, "loss": 0.06707368046045303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.09266894472071, "step_time": 0.5020626316070557} +{"epoch": 0, "iter": 12471, "iter_tflops": 45.87797579318044, "iter_time": 0.449694938659668, "loss": 0.06282562762498856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.433010970076595, "step_time": 0.40907915496826175} +{"epoch": 0, "iter": 12472, "iter_tflops": 45.562471696234084, "iter_time": 0.4528089179992676, "loss": 0.09846542775630951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06737802597849, "step_time": 0.41206658554077147} +{"epoch": 0, "iter": 12473, "iter_tflops": 20.282426096464313, "iter_time": 0.7955305862426758, "loss": 0.1268845498561859, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 21.828589687530002, "step_time": 0.7391815299987793} +{"epoch": 0, "iter": 12474, "iter_tflops": 7.655309732713261, "iter_time": 2.107725341796875, "loss": 0.09547131508588791, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 8.746773694445668, "step_time": 1.8447133636474613} +{"epoch": 0, "iter": 12475, "iter_tflops": 11.56079051257771, "iter_time": 1.39569091796875, "loss": 0.1490335315465927, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.2200840683689, "step_time": 1.220513442993164} +{"epoch": 0, "iter": 12476, "iter_tflops": 22.36627092153619, "iter_time": 0.721411735534668, "loss": 0.16098342835903168, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 28.632248700575644, "step_time": 0.5635355606079101} +{"epoch": 0, "iter": 12477, "iter_tflops": 25.196852091868177, "iter_time": 0.5867888259887696, "loss": 0.5109971165657043, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.065462410108008, "step_time": 0.5462766914367675} +{"epoch": 0, "iter": 12478, "iter_tflops": 26.51646281654776, "iter_time": 0.5575868606567383, "loss": 0.4434540569782257, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.27423863889813, "step_time": 0.522922348022461} +{"epoch": 0, "iter": 12479, "iter_tflops": 27.784316269400794, "iter_time": 0.5321430664062501, "loss": 0.28889432549476624, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 29.531154932999144, "step_time": 0.50066552734375} +{"epoch": 0, "iter": 12480, "iter_tflops": 25.73769530456582, "iter_time": 0.5744582443237305, "loss": 0.4742498993873596, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.447891275109807, "step_time": 0.538665470123291} +{"epoch": 0, "iter": 12481, "iter_tflops": 34.08839128788653, "iter_time": 0.6052234420776367, "loss": 0.45352333784103394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48349223997332, "step_time": 0.5654911918640136} +{"epoch": 0, "iter": 12482, "iter_tflops": 16.77910940151173, "iter_time": 1.2295702362060545, "loss": 0.3975726068019867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.48335512839992, "step_time": 1.116198513031006} +{"epoch": 0, "iter": 12483, "iter_tflops": 46.065476248258264, "iter_time": 0.44786454391479497, "loss": 0.48229438066482544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88085605377929, "step_time": 0.4136074466705322} +{"epoch": 0, "iter": 12484, "iter_tflops": 49.430705363134244, "iter_time": 0.4173740463256836, "loss": 0.5350111722946167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.32373868459803, "step_time": 0.38690260696411133} +{"epoch": 0, "iter": 12485, "iter_tflops": 20.400130421617927, "iter_time": 1.011321647644043, "loss": 0.3740433156490326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.26604740195579, "step_time": 0.9701423645019531} +{"epoch": 0, "iter": 12486, "iter_tflops": 14.58290333313464, "iter_time": 1.4147452697753906, "loss": 0.3131870925426483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.225199283026566, "step_time": 1.1977274208068847} +{"epoch": 0, "iter": 12487, "iter_tflops": 33.32188070031922, "iter_time": 0.6191455307006836, "loss": 0.3428305983543396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.12389697145545, "step_time": 0.48977172088623044} +{"epoch": 0, "iter": 12488, "iter_tflops": 40.355295716021814, "iter_time": 0.5112363357543945, "loss": 0.2775604724884033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16142489452836, "step_time": 0.4671745433807373} +{"epoch": 0, "iter": 12489, "iter_tflops": 19.34897253369597, "iter_time": 1.0662630004882814, "loss": 0.31865331530570984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.701171600418657, "step_time": 0.9966147766113281} +{"epoch": 0, "iter": 12490, "iter_tflops": 15.114894733683835, "iter_time": 1.3649511871337892, "loss": 0.3192467987537384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.567202877528388, "step_time": 1.054371114730835} +{"epoch": 0, "iter": 12491, "iter_tflops": 37.272377907515526, "iter_time": 0.553522331237793, "loss": 0.1841737926006317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71571050292486, "step_time": 0.506710880279541} +{"epoch": 0, "iter": 12492, "iter_tflops": 40.82103382865175, "iter_time": 0.5054035034179687, "loss": 0.13753573596477509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.523508544625926, "step_time": 0.4633752861022949} +{"epoch": 0, "iter": 12493, "iter_tflops": 23.659884761788216, "iter_time": 0.8719862213134765, "loss": 0.5289744138717651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.341415245028323, "step_time": 0.814125545501709} +{"epoch": 0, "iter": 12494, "iter_tflops": 21.69414240431357, "iter_time": 0.9509983444213868, "loss": 0.5477682948112488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.483687937812327, "step_time": 0.7790113506317138} +{"epoch": 0, "iter": 12495, "iter_tflops": 40.580952911690645, "iter_time": 0.5083935203552246, "loss": 0.3093099296092987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.397873554684395, "step_time": 0.46468652343749994} +{"epoch": 0, "iter": 12496, "iter_tflops": 39.10354779032941, "iter_time": 0.5276015777587891, "loss": 0.5529066920280457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.943224290940165, "step_time": 0.48042721176147457} +{"epoch": 0, "iter": 12497, "iter_tflops": 15.49924813319013, "iter_time": 1.3311028594970702, "loss": 0.3798691928386688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.25921028744522, "step_time": 1.2688865661621094} +{"epoch": 0, "iter": 12498, "iter_tflops": 18.45683858165305, "iter_time": 1.117802131652832, "loss": 0.4055292010307312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.099174256862245, "step_time": 0.7613181610107421} +{"epoch": 0, "iter": 12499, "iter_tflops": 48.66603917846245, "iter_time": 0.42393204498291015, "loss": 0.32349127531051636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.92113182326238, "step_time": 0.3898460369110107} +{"epoch": 0, "iter": 12500, "iter_tflops": 48.51136852212053, "iter_time": 0.4252836837768555, "loss": 0.35160353779792786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.2450363945377, "step_time": 0.3948909778594971} +{"epoch": 0, "iter": 12501, "iter_tflops": 22.4679034097504, "iter_time": 0.9182473831176758, "loss": 0.5668036341667175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.596589164520122, "step_time": 0.8743252410888672} +{"epoch": 0, "iter": 12502, "iter_tflops": 14.581904802474485, "iter_time": 1.4148421478271482, "loss": 0.7159093022346497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.1620348814428, "step_time": 1.2021356239318848} +{"epoch": 0, "iter": 12503, "iter_tflops": 32.14575017538965, "iter_time": 0.6417984771728517, "loss": 0.655381441116333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.064690506446546, "step_time": 0.5883723258972168} +{"epoch": 0, "iter": 12504, "iter_tflops": 35.21634785190013, "iter_time": 0.5858385314941407, "loss": 0.638181209564209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.095493056416885, "step_time": 0.5415625801086427} +{"epoch": 0, "iter": 12505, "iter_tflops": 19.744148149886172, "iter_time": 1.0449219360351563, "loss": 0.6755246520042419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.844624802095375, "step_time": 0.9897560501098632} +{"epoch": 0, "iter": 12506, "iter_tflops": 14.252971736569327, "iter_time": 1.447494171142578, "loss": 0.5919486880302429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.687613752949627, "step_time": 1.0479225044250489} +{"epoch": 0, "iter": 12507, "iter_tflops": 36.798429590733015, "iter_time": 0.5606514663696289, "loss": 0.5370917320251465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.04918293944641, "step_time": 0.5151439304351807} +{"epoch": 0, "iter": 12508, "iter_tflops": 37.502861104302575, "iter_time": 0.5501205215454101, "loss": 0.7186264991760254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.02961320341342, "step_time": 0.5028342189788819} +{"epoch": 0, "iter": 12509, "iter_tflops": 19.090668597879915, "iter_time": 1.0437677001953125, "loss": 0.0031583758536726236, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 20.193669203479978, "step_time": 0.9867559509277344} +{"epoch": 0, "iter": 12510, "iter_tflops": 22.297555872037723, "iter_time": 0.8936505584716796, "loss": 0.006992243696004152, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 28.345477576873158, "step_time": 0.7029771575927735} +{"epoch": 0, "iter": 12511, "iter_tflops": 53.829569470713515, "iter_time": 0.3701724433898926, "loss": 0.006004643626511097, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 59.77651854417774, "step_time": 0.333345329284668} +{"epoch": 0, "iter": 12512, "iter_tflops": 56.638252552093476, "iter_time": 0.3518156433105469, "loss": 0.004386972635984421, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 62.347083050577915, "step_time": 0.31960153198242186} +{"epoch": 0, "iter": 12513, "iter_tflops": 20.776645106489006, "iter_time": 0.9929944610595703, "loss": 0.3473363220691681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.697591758890898, "step_time": 0.9508471603393553} +{"epoch": 0, "iter": 12514, "iter_tflops": 12.777425567129873, "iter_time": 1.6146518249511717, "loss": 0.31678658723831177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.2675942448409, "step_time": 1.3512995681762696} +{"epoch": 0, "iter": 12515, "iter_tflops": 36.644926802349225, "iter_time": 0.5629999923706055, "loss": 0.3605080544948578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22331552706872, "step_time": 0.51291379737854} +{"epoch": 0, "iter": 12516, "iter_tflops": 41.44298088613833, "iter_time": 0.4978187637329102, "loss": 0.32262662053108215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.36019620826606, "step_time": 0.45482813644409176} +{"epoch": 0, "iter": 12517, "iter_tflops": 14.424878009242088, "iter_time": 1.430243881225586, "loss": 0.21891161799430847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.351775705697543, "step_time": 1.343889717102051} +{"epoch": 0, "iter": 12518, "iter_tflops": 18.53292699096478, "iter_time": 1.1132129058837892, "loss": 0.12826620042324066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.422703675391457, "step_time": 0.8447505970001221} +{"epoch": 0, "iter": 12519, "iter_tflops": 48.67462864552128, "iter_time": 0.423857234954834, "loss": 0.14396342635154724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44170691581834, "step_time": 0.3934100303649902} +{"epoch": 0, "iter": 12520, "iter_tflops": 49.78067942887378, "iter_time": 0.4144397735595703, "loss": 0.16989241540431976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.92861570683153, "step_time": 0.3825630092620849} +{"epoch": 0, "iter": 12521, "iter_tflops": 37.35794695205978, "iter_time": 0.5522544784545899, "loss": 0.32220959663391113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.41943417012091, "step_time": 0.5104250946044921} +{"epoch": 0, "iter": 12522, "iter_tflops": 15.593535810134998, "iter_time": 1.3230542297363281, "loss": 0.34147822856903076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.300835213635015, "step_time": 1.1273307075500487} +{"epoch": 0, "iter": 12523, "iter_tflops": 46.419093218318615, "iter_time": 0.44445274734497076, "loss": 0.348528653383255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.27642978742876, "step_time": 0.41035319328308106} +{"epoch": 0, "iter": 12524, "iter_tflops": 41.53539205710037, "iter_time": 0.4967111778259278, "loss": 0.2661990523338318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.84999221099034, "step_time": 0.4600021648406982} +{"epoch": 0, "iter": 12525, "iter_tflops": 30.325939819539723, "iter_time": 0.6134387969970704, "loss": 0.005071628373116255, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 32.452827387911, "step_time": 0.5732353553771972} +{"epoch": 0, "iter": 12526, "iter_tflops": 13.230547300390242, "iter_time": 1.4060724487304688, "loss": 0.009515207260847092, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 17.33649983395199, "step_time": 1.073060203552246} +{"epoch": 0, "iter": 12527, "iter_tflops": 41.079864038088914, "iter_time": 0.45285223007202147, "loss": 0.013562473468482494, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 45.39657031508551, "step_time": 0.40979104614257816} +{"epoch": 0, "iter": 12528, "iter_tflops": 43.55901135332796, "iter_time": 0.4270782890319824, "loss": 0.008082645945250988, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 48.068048553679304, "step_time": 0.3870160865783691} +{"epoch": 0, "iter": 12529, "iter_tflops": 16.86797075930313, "iter_time": 1.1151030883789061, "loss": 0.05962512269616127, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 17.870129834422013, "step_time": 1.0525679702758788} +{"epoch": 0, "iter": 12530, "iter_tflops": 15.784967863790282, "iter_time": 1.1916100463867187, "loss": 0.061458900570869446, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 19.33348115090075, "step_time": 0.9728990936279298} +{"epoch": 0, "iter": 12531, "iter_tflops": 38.31558133116356, "iter_time": 0.4909106330871582, "loss": 0.0648234561085701, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 42.266777170474356, "step_time": 0.4450191745758056} +{"epoch": 0, "iter": 12532, "iter_tflops": 36.47604107675903, "iter_time": 0.5156679763793945, "loss": 0.06497049331665039, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 39.89826081626919, "step_time": 0.47143724822998045} +{"epoch": 0, "iter": 12533, "iter_tflops": 19.22758758392801, "iter_time": 1.0729943847656251, "loss": 0.3610304296016693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.397194773094075, "step_time": 1.0114672012329102} +{"epoch": 0, "iter": 12534, "iter_tflops": 13.463186085132026, "iter_time": 1.532407958984375, "loss": 0.42646855115890503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.204799763141413, "step_time": 1.0742675666809083} +{"epoch": 0, "iter": 12535, "iter_tflops": 48.58568635515581, "iter_time": 0.4246331596374512, "loss": 0.47614482045173645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56102080032996, "step_time": 0.3925169868469239} +{"epoch": 0, "iter": 12536, "iter_tflops": 48.310937973040694, "iter_time": 0.4270480842590332, "loss": 0.6749312281608582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.647103718673115, "step_time": 0.3918751850128173} +{"epoch": 0, "iter": 12537, "iter_tflops": 26.593946933256607, "iter_time": 0.7757815551757813, "loss": 0.11352279037237167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.040349069018042, "step_time": 0.7357645034790039} +{"epoch": 0, "iter": 12538, "iter_tflops": 11.422386383415297, "iter_time": 1.8061981811523438, "loss": 0.10864447802305222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.049826296915, "step_time": 1.3708526000976562} +{"epoch": 0, "iter": 12539, "iter_tflops": 49.60442041464579, "iter_time": 0.4159123992919922, "loss": 0.10361634194850922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.08251396456154, "step_time": 0.38147438049316407} +{"epoch": 0, "iter": 12540, "iter_tflops": 55.01212837534418, "iter_time": 0.37502809143066407, "loss": 0.03938553109765053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.87758848532077, "step_time": 0.34455451583862307} +{"epoch": 0, "iter": 12541, "iter_tflops": 26.14630852092027, "iter_time": 0.7890633392333984, "loss": 0.0822039470076561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.502582981342584, "step_time": 0.7501511230468749} +{"epoch": 0, "iter": 12542, "iter_tflops": 13.643581583989407, "iter_time": 1.5121464538574219, "loss": 0.12396950274705887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.828570667864593, "step_time": 1.0957333869934083} +{"epoch": 0, "iter": 12543, "iter_tflops": 43.42050116684297, "iter_time": 0.47514636993408205, "loss": 0.1558007150888443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.50570252224135, "step_time": 0.37851256942749023} +{"epoch": 0, "iter": 12544, "iter_tflops": 49.20904672632681, "iter_time": 0.4192540779113769, "loss": 0.08380018919706345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59818841939151, "step_time": 0.3849214706420898} +{"epoch": 0, "iter": 12545, "iter_tflops": 35.94078991441524, "iter_time": 0.5740300521850586, "loss": 0.3337281048297882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.63946798120399, "step_time": 0.5339383430480957} +{"epoch": 0, "iter": 12546, "iter_tflops": 11.310202111914071, "iter_time": 1.8241136016845703, "loss": 0.34266865253448486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.987376374896407, "step_time": 1.5885497512817381} +{"epoch": 0, "iter": 12547, "iter_tflops": 12.902997533799722, "iter_time": 1.5989380340576171, "loss": 0.39368993043899536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.655390312150141, "step_time": 1.407747802734375} +{"epoch": 0, "iter": 12548, "iter_tflops": 14.54844606245901, "iter_time": 1.41809602355957, "loss": 0.3698418140411377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.352994715758015, "step_time": 1.1241268157958986} +{"epoch": 0, "iter": 12549, "iter_tflops": 16.139186880726623, "iter_time": 0.9718395233154298, "loss": 0.3004934787750244, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.89482045998647, "step_time": 0.9283732681274414} +{"epoch": 0, "iter": 12550, "iter_tflops": 11.68357204741137, "iter_time": 1.3424575653076172, "loss": 0.2990533411502838, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.623535072593338, "step_time": 1.0725655326843262} +{"epoch": 0, "iter": 12551, "iter_tflops": 21.299821162565873, "iter_time": 0.7363770599365235, "loss": 0.45847490429878235, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.08984772110748, "step_time": 0.6792898712158203} +{"epoch": 0, "iter": 12552, "iter_tflops": 24.22158735661325, "iter_time": 0.6475504455566405, "loss": 0.41683444380760193, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.981094718985695, "step_time": 0.6036966438293456} +{"epoch": 0, "iter": 12553, "iter_tflops": 17.790651820056457, "iter_time": 1.1596592254638671, "loss": 0.5305684208869934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.338074172291538, "step_time": 1.0668639144897463} +{"epoch": 0, "iter": 12554, "iter_tflops": 16.277977839517536, "iter_time": 1.267423614501953, "loss": 0.4713766276836395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.753404781456517, "step_time": 1.0444322757720947} +{"epoch": 0, "iter": 12555, "iter_tflops": 33.56962848611084, "iter_time": 0.6145761642456055, "loss": 0.35927966237068176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.461953780330504, "step_time": 0.5658252334594727} +{"epoch": 0, "iter": 12556, "iter_tflops": 36.779981468568884, "iter_time": 0.5609326782226562, "loss": 0.6285750269889832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03209809339698, "step_time": 0.5153637828826905} +{"epoch": 0, "iter": 12557, "iter_tflops": 15.666613392346338, "iter_time": 1.1900709991455076, "loss": 0.018594548106193542, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 16.832610364771504, "step_time": 1.1076346359252929} +{"epoch": 0, "iter": 12558, "iter_tflops": 16.324601482195444, "iter_time": 1.1421033630371094, "loss": 0.049228012561798096, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 21.435025062510995, "step_time": 0.869809211730957} +{"epoch": 0, "iter": 12559, "iter_tflops": 50.49422366563632, "iter_time": 0.36923792266845706, "loss": 0.03976859152317047, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 55.424720137034164, "step_time": 0.3363910942077637} +{"epoch": 0, "iter": 12560, "iter_tflops": 51.9749679576645, "iter_time": 0.35871849441528325, "loss": 0.04251066967844963, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 56.62405455350425, "step_time": 0.3292661113739014} +{"epoch": 0, "iter": 12561, "iter_tflops": 32.917847262137116, "iter_time": 0.6267449188232421, "loss": 0.3097187876701355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.26733268361167, "step_time": 0.5849916038513183} +{"epoch": 0, "iter": 12562, "iter_tflops": 16.82246977163777, "iter_time": 1.2264009857177733, "loss": 0.27650687098503113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.838297785553745, "step_time": 1.039962890625} +{"epoch": 0, "iter": 12563, "iter_tflops": 46.92762957353167, "iter_time": 0.43963638687133794, "loss": 0.3209781348705292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.875042435302475, "step_time": 0.40552484130859373} +{"epoch": 0, "iter": 12564, "iter_tflops": 46.127672874808965, "iter_time": 0.4472606620788574, "loss": 0.4074634313583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60668124190038, "step_time": 0.41589344406127926} +{"epoch": 0, "iter": 12565, "iter_tflops": 46.15740853106464, "iter_time": 0.44697252655029296, "loss": 0.47478845715522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69579839050934, "step_time": 0.40695864677429194} +{"epoch": 0, "iter": 12566, "iter_tflops": 42.16076436537458, "iter_time": 0.48934344100952143, "loss": 0.5325683951377869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7587199776634, "step_time": 0.45086692810058593} +{"epoch": 0, "iter": 12567, "iter_tflops": 45.563449746394134, "iter_time": 0.4527991981506348, "loss": 0.46496251225471497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.171188403001075, "step_time": 0.41957687377929687} +{"epoch": 0, "iter": 12568, "iter_tflops": 50.31077758663215, "iter_time": 0.4100730400085449, "loss": 0.5295323729515076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.356725077318, "step_time": 0.37954997253417977} +{"epoch": 0, "iter": 12569, "iter_tflops": 31.17469452940076, "iter_time": 0.6617897567749024, "loss": 0.5052409768104553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.114426290371, "step_time": 0.6230243377685547} +{"epoch": 0, "iter": 12570, "iter_tflops": 12.744679545504246, "iter_time": 1.6188004913330079, "loss": 0.4101213812828064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.868991110572937, "step_time": 1.387524772644043} +{"epoch": 0, "iter": 12571, "iter_tflops": 30.462939992832112, "iter_time": 0.6772522125244141, "loss": 0.43258216977119446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.471135405864764, "step_time": 0.5505862922668457} +{"epoch": 0, "iter": 12572, "iter_tflops": 37.54945748261013, "iter_time": 0.549437858581543, "loss": 0.369645893573761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99811340242913, "step_time": 0.5032205581665039} +{"epoch": 0, "iter": 12573, "iter_tflops": 16.4255449599557, "iter_time": 1.256037078857422, "loss": 0.6130580902099609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.149827112816972, "step_time": 1.202991340637207} +{"epoch": 0, "iter": 12574, "iter_tflops": 17.29711254548634, "iter_time": 1.1927478332519532, "loss": 0.665520966053009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12898082697436, "step_time": 0.9764358100891113} +{"epoch": 0, "iter": 12575, "iter_tflops": 39.976222174827754, "iter_time": 0.5160841217041016, "loss": 0.607570230960846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.247984939478734, "step_time": 0.47704172897338865} +{"epoch": 0, "iter": 12576, "iter_tflops": 42.31040575976049, "iter_time": 0.48761275482177735, "loss": 0.711921751499176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.52239166740638, "step_time": 0.4532075920104981} +{"epoch": 0, "iter": 12577, "iter_tflops": 24.785976746486597, "iter_time": 0.8323695983886719, "loss": 0.4960324764251709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.13574445939361, "step_time": 0.7893822784423827} +{"epoch": 0, "iter": 12578, "iter_tflops": 12.651213781670508, "iter_time": 1.6307600097656252, "loss": 0.6061932444572449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.678735302910358, "step_time": 1.2369698982238768} +{"epoch": 0, "iter": 12579, "iter_tflops": 45.11627074643302, "iter_time": 0.4572872085571289, "loss": 0.39070865511894226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.72081912766557, "step_time": 0.42345539093017576} +{"epoch": 0, "iter": 12580, "iter_tflops": 48.153902818618654, "iter_time": 0.42844073486328127, "loss": 0.42187219858169556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71170027671633, "step_time": 0.3989637432098389} +{"epoch": 0, "iter": 12581, "iter_tflops": 36.495411586208256, "iter_time": 0.5653065032958985, "loss": 0.13930602371692657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34396834942534, "step_time": 0.5243775444030762} +{"epoch": 0, "iter": 12582, "iter_tflops": 33.64304293819097, "iter_time": 0.6132350616455079, "loss": 0.08298688381910324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94880359618573, "step_time": 0.48036480140686033} +{"epoch": 0, "iter": 12583, "iter_tflops": 44.44547625285736, "iter_time": 0.4641888275146484, "loss": 0.20278796553611755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.77626801090301, "step_time": 0.422974006652832} +{"epoch": 0, "iter": 12584, "iter_tflops": 41.01231578166245, "iter_time": 0.5030462951660156, "loss": 0.07987099885940552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.937363953878005, "step_time": 0.45910778236389155} +{"epoch": 0, "iter": 12585, "iter_tflops": 15.810010562394293, "iter_time": 1.3049386291503908, "loss": 0.09006363898515701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.098755691233105, "step_time": 1.2065844955444336} +{"epoch": 0, "iter": 12586, "iter_tflops": 22.142683917558646, "iter_time": 0.9317340927124024, "loss": 0.14115187525749207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.216316435052544, "step_time": 0.73117600440979} +{"epoch": 0, "iter": 12587, "iter_tflops": 39.762637579792575, "iter_time": 0.5188562622070312, "loss": 0.13594645261764526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79556562494719, "step_time": 0.4710772247314453} +{"epoch": 0, "iter": 12588, "iter_tflops": 42.01539353620065, "iter_time": 0.49103654098510746, "loss": 0.15949247777462006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27123653575288, "step_time": 0.44587296676635746} +{"epoch": 0, "iter": 12589, "iter_tflops": 24.01306596935059, "iter_time": 0.8591611557006836, "loss": 0.17235317826271057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.748747672230287, "step_time": 0.801246482849121} +{"epoch": 0, "iter": 12590, "iter_tflops": 30.177161106002103, "iter_time": 0.6836658172607422, "loss": 0.19574134051799774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.749946609985905, "step_time": 0.5324160499572753} +{"epoch": 0, "iter": 12591, "iter_tflops": 40.886566644422224, "iter_time": 0.5045934448242188, "loss": 0.22839124500751495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95016712492236, "step_time": 0.458977014541626} +{"epoch": 0, "iter": 12592, "iter_tflops": 44.26359961539408, "iter_time": 0.4660961532592774, "loss": 0.19217875599861145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.539312717501765, "step_time": 0.4250388469696045} +{"epoch": 0, "iter": 12593, "iter_tflops": 23.61411271704275, "iter_time": 0.8736764221191406, "loss": 0.5541179776191711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.136214098168033, "step_time": 0.820771713256836} +{"epoch": 0, "iter": 12594, "iter_tflops": 18.641605587245163, "iter_time": 1.1067229919433592, "loss": 0.5994267463684082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18533870595259, "step_time": 0.9299426879882813} +{"epoch": 0, "iter": 12595, "iter_tflops": 39.41346597132016, "iter_time": 0.5234529113769532, "loss": 0.6627355813980103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03449495453492, "step_time": 0.4794082870483398} +{"epoch": 0, "iter": 12596, "iter_tflops": 37.8177069253499, "iter_time": 0.5455405731201172, "loss": 0.7772308588027954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.257766208081684, "step_time": 0.500053575515747} +{"epoch": 0, "iter": 12597, "iter_tflops": 23.611951208675368, "iter_time": 0.8737564010620118, "loss": 0.15416331589221954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.894170364101093, "step_time": 0.7967466506958008} +{"epoch": 0, "iter": 12598, "iter_tflops": 41.30070697516467, "iter_time": 0.49953366470336913, "loss": 0.12068752199411392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.38002807544908, "step_time": 0.4448271026611328} +{"epoch": 0, "iter": 12599, "iter_tflops": 49.97012711193459, "iter_time": 0.41286854171752935, "loss": 0.10437103360891342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.54499726313433, "step_time": 0.37823988533020014} +{"epoch": 0, "iter": 12600, "iter_tflops": 46.61875735894577, "iter_time": 0.44254919433593753, "loss": 0.09442984312772751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77318957506016, "step_time": 0.4063383388519287} +{"epoch": 0, "iter": 12601, "iter_tflops": 19.046881575182983, "iter_time": 0.48698667526245115, "loss": 0.003751780604943633, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 20.856960808751406, "step_time": 0.44472335243225103} +{"epoch": 0, "iter": 12602, "iter_tflops": 8.088892689381199, "iter_time": 1.1467054748535157, "loss": 0.0031138400081545115, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 9.764989305743056, "step_time": 0.9498809719085693} +{"epoch": 0, "iter": 12603, "iter_tflops": 18.9003671493598, "iter_time": 0.4907617645263672, "loss": 0.002026030793786049, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 20.96808378221792, "step_time": 0.44236648559570313} +{"epoch": 0, "iter": 12604, "iter_tflops": 18.621855660839735, "iter_time": 0.498101676940918, "loss": 0.0008961109560914338, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 20.627709740427363, "step_time": 0.44966589355468745} +{"epoch": 0, "iter": 12605, "iter_tflops": 24.605902261606463, "iter_time": 0.8384611663818359, "loss": 0.42906343936920166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.14127231755408, "step_time": 0.7892153549194336} +{"epoch": 0, "iter": 12606, "iter_tflops": 9.734558658136988, "iter_time": 2.1193660888671877, "loss": 0.4461270272731781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.1553753572459, "step_time": 1.8494306869506834} +{"epoch": 0, "iter": 12607, "iter_tflops": 12.857174862301488, "iter_time": 1.6046366119384765, "loss": 0.45783233642578125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.05543776171312, "step_time": 1.3703416557312014} +{"epoch": 0, "iter": 12608, "iter_tflops": 37.179986117741834, "iter_time": 0.5548978271484374, "loss": 0.39049962162971497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61113755253028, "step_time": 0.5080156517028809} +{"epoch": 0, "iter": 12609, "iter_tflops": 17.16568142273442, "iter_time": 0.9662565383911133, "loss": 0.46275410056114197, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 18.128385385962744, "step_time": 0.9149436950683594} +{"epoch": 0, "iter": 12610, "iter_tflops": 7.288412682094546, "iter_time": 2.275728973388672, "loss": 0.2793419063091278, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 9.441061598170638, "step_time": 1.7568418273925783} +{"epoch": 0, "iter": 12611, "iter_tflops": 10.36227507824045, "iter_time": 1.6006573638916017, "loss": 0.33361804485321045, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 12.830569225144904, "step_time": 1.292729232788086} +{"epoch": 0, "iter": 12612, "iter_tflops": 24.67623597591329, "iter_time": 0.6721629638671875, "loss": 0.30462121963500977, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 26.76540342539936, "step_time": 0.6196974372863769} +{"epoch": 0, "iter": 12613, "iter_tflops": 16.753986638476533, "iter_time": 0.9215217819213867, "loss": 0.425906240940094, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 17.837975263880764, "step_time": 0.8655222015380859} +{"epoch": 0, "iter": 12614, "iter_tflops": 7.677333515577041, "iter_time": 2.0110059814453125, "loss": 0.39372724294662476, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 8.69609087546258, "step_time": 1.775414245605469} +{"epoch": 0, "iter": 12615, "iter_tflops": 8.047731961870431, "iter_time": 1.9184490356445312, "loss": 0.3866850733757019, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.345823992811395, "step_time": 1.6519852752685549} +{"epoch": 0, "iter": 12616, "iter_tflops": 25.111726671819717, "iter_time": 0.6148188781738282, "loss": 0.3712902069091797, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 26.653171079370935, "step_time": 0.579261791229248} +{"epoch": 0, "iter": 12617, "iter_tflops": 15.050320384472831, "iter_time": 1.14573681640625, "loss": 0.38494443893432617, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 15.670627630424287, "step_time": 1.1003838882446288} +{"epoch": 0, "iter": 12618, "iter_tflops": 13.242959360367278, "iter_time": 1.3021036834716797, "loss": 0.445904403924942, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 15.04143323551179, "step_time": 1.1464137687683105} +{"epoch": 0, "iter": 12619, "iter_tflops": 26.12170841217316, "iter_time": 0.6601293411254883, "loss": 0.4293815791606903, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.15636480837057, "step_time": 0.6124265785217285} +{"epoch": 0, "iter": 12620, "iter_tflops": 28.82694502606591, "iter_time": 0.5981801452636719, "loss": 0.29525116086006165, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 30.753570534821794, "step_time": 0.5607058258056641} +{"epoch": 0, "iter": 12621, "iter_tflops": 35.4846367514359, "iter_time": 0.5615450820922852, "loss": 0.09572937339544296, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 39.751460330990206, "step_time": 0.5012702198028565} +{"epoch": 0, "iter": 12622, "iter_tflops": 36.2502288194711, "iter_time": 0.5496854476928711, "loss": 0.08939430862665176, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 40.10304126322542, "step_time": 0.4968756141662598} +{"epoch": 0, "iter": 12623, "iter_tflops": 35.72442626359817, "iter_time": 0.55777587890625, "loss": 0.07933526486158371, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 39.2618237403751, "step_time": 0.5075215911865234} +{"epoch": 0, "iter": 12624, "iter_tflops": 44.340114541693616, "iter_time": 0.44939494323730467, "loss": 0.05759119614958763, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 48.4654915357527, "step_time": 0.4111424980163574} +{"epoch": 0, "iter": 12625, "iter_tflops": 28.03658306986483, "iter_time": 0.7358633346557618, "loss": 0.19483527541160583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.062479970050852, "step_time": 0.6862738380432128} +{"epoch": 0, "iter": 12626, "iter_tflops": 12.571762179061352, "iter_time": 1.6410661621093752, "loss": 0.16582326591014862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.14847021942647, "step_time": 1.2030865287780763} +{"epoch": 0, "iter": 12627, "iter_tflops": 38.255737080375994, "iter_time": 0.5392941055297852, "loss": 0.1399131566286087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93298293892326, "step_time": 0.4920015716552734} +{"epoch": 0, "iter": 12628, "iter_tflops": 38.009039837539945, "iter_time": 0.5427943878173828, "loss": 0.15954096615314484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54977542406779, "step_time": 0.49653923034667974} +{"epoch": 0, "iter": 12629, "iter_tflops": 39.798866339916096, "iter_time": 0.518383949279785, "loss": 0.018769552931189537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.76904104086143, "step_time": 0.4608339385986328} +{"epoch": 0, "iter": 12630, "iter_tflops": 40.595130653053936, "iter_time": 0.5082159652709961, "loss": 0.0077621061354875565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17603156833433, "step_time": 0.45668228912353515} +{"epoch": 0, "iter": 12631, "iter_tflops": 43.2491047838579, "iter_time": 0.47702937698364256, "loss": 0.002359397243708372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12949528344779, "step_time": 0.42865800666809084} +{"epoch": 0, "iter": 12632, "iter_tflops": 45.203284594212896, "iter_time": 0.45640695571899403, "loss": 0.004360128194093704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04502795433928, "step_time": 0.4122506141662598} +{"epoch": 0, "iter": 12633, "iter_tflops": 15.948569825444714, "iter_time": 1.001429801940918, "loss": 0.03044275753200054, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 17.180682486593263, "step_time": 0.9296122627258301} +{"epoch": 0, "iter": 12634, "iter_tflops": 34.00962466915434, "iter_time": 0.4696133308410644, "loss": 0.06910539418458939, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 38.250670774986524, "step_time": 0.41754491615295414} +{"epoch": 0, "iter": 12635, "iter_tflops": 39.11415123893099, "iter_time": 0.4083272323608398, "loss": 0.051851511001586914, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 42.832949024570595, "step_time": 0.37287586975097653} +{"epoch": 0, "iter": 12636, "iter_tflops": 41.9187184241998, "iter_time": 0.38100814437866215, "loss": 0.1301044523715973, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 45.82955235741044, "step_time": 0.34849507141113284} +{"epoch": 0, "iter": 12637, "iter_tflops": 23.9865117191236, "iter_time": 0.860112289428711, "loss": 0.7884483933448792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.120218725570723, "step_time": 0.8212943420410157} +{"epoch": 0, "iter": 12638, "iter_tflops": 13.858642335989623, "iter_time": 1.4886807098388672, "loss": 0.6873295307159424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.923040727504834, "step_time": 1.219112678527832} +{"epoch": 0, "iter": 12639, "iter_tflops": 38.06274112171234, "iter_time": 0.5420285797119141, "loss": 0.712675154209137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58306061747622, "step_time": 0.4961417751312256} +{"epoch": 0, "iter": 12640, "iter_tflops": 36.72813675855802, "iter_time": 0.561724479675293, "loss": 0.6446701884269714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.75172284810665, "step_time": 0.5189987258911133} +{"epoch": 0, "iter": 12641, "iter_tflops": 20.852992887908766, "iter_time": 0.989358871459961, "loss": 0.006058590020984411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.529546975490383, "step_time": 0.9157349472045897} +{"epoch": 0, "iter": 12642, "iter_tflops": 15.508322000773603, "iter_time": 1.3303240356445312, "loss": 0.01487292256206274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.001574573922348, "step_time": 1.0857570476531984} +{"epoch": 0, "iter": 12643, "iter_tflops": 44.71262692765749, "iter_time": 0.46141537475585936, "loss": 0.011678434908390045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6577493844271, "step_time": 0.4154657382965088} +{"epoch": 0, "iter": 12644, "iter_tflops": 44.59996007946698, "iter_time": 0.4625809860229492, "loss": 0.020232820883393288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.24511636825168, "step_time": 0.41894699478149416} +{"epoch": 0, "iter": 12645, "iter_tflops": 23.316510697926343, "iter_time": 0.8741513824462891, "loss": 0.0066085211001336575, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 24.9874038680008, "step_time": 0.8156973876953125} +{"epoch": 0, "iter": 12646, "iter_tflops": 8.860189194301054, "iter_time": 2.3004204101562498, "loss": 0.009947397746145725, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 10.37819664646337, "step_time": 1.9639404373168947} +{"epoch": 0, "iter": 12647, "iter_tflops": 11.387180924042399, "iter_time": 1.7899215087890625, "loss": 0.0115859629586339, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 14.839380884757821, "step_time": 1.3735182228088378} +{"epoch": 0, "iter": 12648, "iter_tflops": 41.2477137955291, "iter_time": 0.49414035797119144, "loss": 0.004409555345773697, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 45.827650134522884, "step_time": 0.44475682258605953} +{"epoch": 0, "iter": 12649, "iter_tflops": 17.491697232310838, "iter_time": 0.8990344772338869, "loss": 0.3340386748313904, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 18.672139602253605, "step_time": 0.8421980133056641} +{"epoch": 0, "iter": 12650, "iter_tflops": 9.761032650334894, "iter_time": 1.6110630340576173, "loss": 0.4722924828529358, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 12.25447330022158, "step_time": 1.2832570190429688} +{"epoch": 0, "iter": 12651, "iter_tflops": 24.225689106884417, "iter_time": 0.6491307144165039, "loss": 0.33117568492889404, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.119665417555563, "step_time": 0.6020612678527832} +{"epoch": 0, "iter": 12652, "iter_tflops": 24.48401527107793, "iter_time": 0.6422818603515625, "loss": 0.3157571256160736, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.271573270053825, "step_time": 0.598580020904541} +{"epoch": 0, "iter": 12653, "iter_tflops": 20.501172615403746, "iter_time": 1.0063372421264647, "loss": 0.04328763484954834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.189488991135768, "step_time": 0.9297687530517578} +{"epoch": 0, "iter": 12654, "iter_tflops": 20.835298575049336, "iter_time": 0.9901990814208985, "loss": 0.05008798837661743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.953547163487034, "step_time": 0.7949238452911376} +{"epoch": 0, "iter": 12655, "iter_tflops": 51.588839946710095, "iter_time": 0.3999138870239258, "loss": 0.05731736496090889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.11544017111249, "step_time": 0.3676544895172119} +{"epoch": 0, "iter": 12656, "iter_tflops": 48.84426491187316, "iter_time": 0.4223851776123047, "loss": 0.1342344582080841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89441231859688, "step_time": 0.39004296684265133} +{"epoch": 0, "iter": 12657, "iter_tflops": 23.842035545292052, "iter_time": 0.865324333190918, "loss": 0.5905939936637878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.00780466427, "step_time": 0.8249861907958984} +{"epoch": 0, "iter": 12658, "iter_tflops": 21.029713042931146, "iter_time": 0.981044937133789, "loss": 0.5926874876022339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.53054164609661, "step_time": 0.7231230926513672} +{"epoch": 0, "iter": 12659, "iter_tflops": 42.78285623786832, "iter_time": 0.48222805404663094, "loss": 0.7436834573745728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82213944267917, "step_time": 0.4502429122924805} +{"epoch": 0, "iter": 12660, "iter_tflops": 46.94956934782074, "iter_time": 0.4394309425354003, "loss": 0.6645355820655823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38878355154683, "step_time": 0.40943821334838865} +{"epoch": 0, "iter": 12661, "iter_tflops": 2.236975004752263, "iter_time": 0.630402603149414, "loss": 0.45136791467666626, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.37931693865245, "step_time": 0.5926889533996581} +{"epoch": 0, "iter": 12662, "iter_tflops": 0.9245209733268888, "iter_time": 1.5253249053955078, "loss": 0.4291406571865082, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.1678621935543814, "step_time": 1.2075010852813721} +{"epoch": 0, "iter": 12663, "iter_tflops": 2.6742498688631797, "iter_time": 0.5273235244750976, "loss": 0.2727089524269104, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.953733225819409, "step_time": 0.47742797279357907} +{"epoch": 0, "iter": 12664, "iter_tflops": 2.6665840330739665, "iter_time": 0.5288394622802735, "loss": 0.4168078601360321, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.922520395148314, "step_time": 0.4825269546508789} +{"epoch": 0, "iter": 12665, "iter_tflops": 14.964925515799175, "iter_time": 1.3786298828125, "loss": 0.5270504355430603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.784058549359676, "step_time": 1.307084197998047} +{"epoch": 0, "iter": 12666, "iter_tflops": 21.836367818664737, "iter_time": 0.9448042678833009, "loss": 0.6065546870231628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.83563204026846, "step_time": 0.7411756801605225} +{"epoch": 0, "iter": 12667, "iter_tflops": 41.76435373029194, "iter_time": 0.493988094329834, "loss": 0.6522973775863647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0691009657675, "step_time": 0.4577658100128174} +{"epoch": 0, "iter": 12668, "iter_tflops": 46.983735426162085, "iter_time": 0.4391113929748536, "loss": 0.6201702356338501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.722428888577, "step_time": 0.4067449836730957} +{"epoch": 0, "iter": 12669, "iter_tflops": 23.30353313544602, "iter_time": 0.552332160949707, "loss": 0.05005793273448944, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 24.994781481921965, "step_time": 0.5149591255187989} +{"epoch": 0, "iter": 12670, "iter_tflops": 8.031444662141805, "iter_time": 1.6026121520996097, "loss": 0.025346366688609123, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 9.128035448472861, "step_time": 1.410083351135254} +{"epoch": 0, "iter": 12671, "iter_tflops": 20.72441611246383, "iter_time": 0.6210689239501953, "loss": 0.023533983156085014, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 25.72926756919355, "step_time": 0.5002587337493897} +{"epoch": 0, "iter": 12672, "iter_tflops": 26.829901715001952, "iter_time": 0.4797367858886719, "loss": 0.022827347740530968, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 29.529738485264584, "step_time": 0.4358755435943603} +{"epoch": 0, "iter": 12673, "iter_tflops": 21.226535466129544, "iter_time": 0.971948226928711, "loss": 0.5259758234024048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.828552520662647, "step_time": 0.9037407646179201} +{"epoch": 0, "iter": 12674, "iter_tflops": 35.45580407624197, "iter_time": 0.5818819808959961, "loss": 0.586899995803833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92420681845289, "step_time": 0.530032470703125} +{"epoch": 0, "iter": 12675, "iter_tflops": 36.1832750426737, "iter_time": 0.5701831436157226, "loss": 0.4442683756351471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.40467027829828, "step_time": 0.5235697536468507} +{"epoch": 0, "iter": 12676, "iter_tflops": 39.908102354934194, "iter_time": 0.5169650344848633, "loss": 0.45936912298202515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47376349258002, "step_time": 0.4745642395019531} +{"epoch": 0, "iter": 12677, "iter_tflops": 23.281062911706652, "iter_time": 0.8861748962402343, "loss": 0.20646964013576508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.026681268670885, "step_time": 0.8243639373779296} +{"epoch": 0, "iter": 12678, "iter_tflops": 10.473626259291905, "iter_time": 1.9698137969970704, "loss": 0.22985820472240448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.434691128334936, "step_time": 1.659156089782715} +{"epoch": 0, "iter": 12679, "iter_tflops": 14.159467109575035, "iter_time": 1.4570529632568359, "loss": 0.23451316356658936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.99590677268726, "step_time": 1.2138860130310059} +{"epoch": 0, "iter": 12680, "iter_tflops": 40.06321807139779, "iter_time": 0.5149634628295898, "loss": 0.23397673666477203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.868176315444614, "step_time": 0.47029749679565425} +{"epoch": 0, "iter": 12681, "iter_tflops": 13.93559199901319, "iter_time": 1.1961129913330077, "loss": 0.2540284991264343, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 14.880382124962447, "step_time": 1.120168991088867} +{"epoch": 0, "iter": 12682, "iter_tflops": 11.762458339343642, "iter_time": 1.4170968475341796, "loss": 0.41230952739715576, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 13.231065252764488, "step_time": 1.2598035240173338} +{"epoch": 0, "iter": 12683, "iter_tflops": 23.976174468988912, "iter_time": 0.6952127685546875, "loss": 0.2857024669647217, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 25.91734698000503, "step_time": 0.6431423187255859} +{"epoch": 0, "iter": 12684, "iter_tflops": 23.94571036547318, "iter_time": 0.6960972290039062, "loss": 0.376874178647995, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 25.700016653212966, "step_time": 0.6485810050964355} +{"epoch": 0, "iter": 12685, "iter_tflops": 25.401102647145837, "iter_time": 0.8122125167846679, "loss": 0.32026147842407227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.169006947142396, "step_time": 0.7593613395690917} +{"epoch": 0, "iter": 12686, "iter_tflops": 9.775450150998692, "iter_time": 2.1105006103515622, "loss": 0.33212733268737793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.710195990726856, "step_time": 1.623192398071289} +{"epoch": 0, "iter": 12687, "iter_tflops": 10.353299346442666, "iter_time": 1.9927071380615236, "loss": 0.24670329689979553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.811905588390836, "step_time": 1.7466354904174808} +{"epoch": 0, "iter": 12688, "iter_tflops": 34.231500362562926, "iter_time": 0.6026932296752929, "loss": 0.19601179659366608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1197746928739, "step_time": 0.4676155681610107} +{"epoch": 0, "iter": 12689, "iter_tflops": 18.866745942547308, "iter_time": 0.8074884796142578, "loss": 0.3463444709777832, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 19.858131137161322, "step_time": 0.7671759185791015} +{"epoch": 0, "iter": 12690, "iter_tflops": 11.986480714321347, "iter_time": 1.270988571166992, "loss": 0.4126962721347809, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 15.16273077281036, "step_time": 1.0047451362609863} +{"epoch": 0, "iter": 12691, "iter_tflops": 27.14207189455219, "iter_time": 0.5612939224243164, "loss": 0.3347890377044678, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.821364478098516, "step_time": 0.5285898246765137} +{"epoch": 0, "iter": 12692, "iter_tflops": 29.167586434490197, "iter_time": 0.522315414428711, "loss": 0.2952454090118408, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 30.892383805899176, "step_time": 0.4931532669067383} +{"epoch": 0, "iter": 12693, "iter_tflops": 28.768604780521038, "iter_time": 0.7171391754150391, "loss": 0.07619214057922363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.55249542873923, "step_time": 0.6752670516967774} +{"epoch": 0, "iter": 12694, "iter_tflops": 24.74748806057855, "iter_time": 0.8336641464233397, "loss": 0.0645039975643158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.545591338406208, "step_time": 0.6540087738037109} +{"epoch": 0, "iter": 12695, "iter_tflops": 38.64694976928232, "iter_time": 0.5338349761962891, "loss": 0.048419203609228134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.513846833832766, "step_time": 0.4852793865203857} +{"epoch": 0, "iter": 12696, "iter_tflops": 40.76095557761378, "iter_time": 0.5061484260559083, "loss": 0.07707066833972931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.772657362532144, "step_time": 0.4607967166900635} +{"epoch": 0, "iter": 12697, "iter_tflops": 21.173369690314697, "iter_time": 0.9743887634277344, "loss": 0.3508205711841583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.532186398152277, "step_time": 0.9156276779174806} +{"epoch": 0, "iter": 12698, "iter_tflops": 13.968673681092858, "iter_time": 1.476954360961914, "loss": 0.3413316607475281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98007005922181, "step_time": 1.2150181617736815} +{"epoch": 0, "iter": 12699, "iter_tflops": 48.55394604670667, "iter_time": 0.4249107475280761, "loss": 0.29719990491867065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.86308762948825, "step_time": 0.39027409172058103} +{"epoch": 0, "iter": 12700, "iter_tflops": 49.05355751044432, "iter_time": 0.4205830230712891, "loss": 0.2639455497264862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.11812552581332, "step_time": 0.3884002552032471} +{"epoch": 0, "iter": 12701, "iter_tflops": 23.35559517698638, "iter_time": 0.883346939086914, "loss": 0.36497217416763306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.600435039727603, "step_time": 0.8386475067138672} +{"epoch": 0, "iter": 12702, "iter_tflops": 14.399218946581708, "iter_time": 1.4327925415039064, "loss": 0.3893815279006958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.191326588224687, "step_time": 1.134117042541504} +{"epoch": 0, "iter": 12703, "iter_tflops": 37.22331377597352, "iter_time": 0.5542519302368165, "loss": 0.3066844642162323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.814990067066354, "step_time": 0.5054783420562744} +{"epoch": 0, "iter": 12704, "iter_tflops": 38.19622646481554, "iter_time": 0.5401343383789062, "loss": 0.3203582465648651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78040193132937, "step_time": 0.4937983493804931} +{"epoch": 0, "iter": 12705, "iter_tflops": 29.040626586076304, "iter_time": 0.7104217758178711, "loss": 0.6146224737167358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.198955858088265, "step_time": 0.6612751274108887} +{"epoch": 0, "iter": 12706, "iter_tflops": 8.598858127718938, "iter_time": 2.3992829284667967, "loss": 0.7923468947410583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.13325238809335, "step_time": 2.0359794387817383} +{"epoch": 0, "iter": 12707, "iter_tflops": 24.441186189519154, "iter_time": 0.8441117935180664, "loss": 0.6686880588531494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.612879130322092, "step_time": 0.7471547393798829} +{"epoch": 0, "iter": 12708, "iter_tflops": 45.97582564224804, "iter_time": 0.44873785781860354, "loss": 0.7369931936264038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.5842142984221, "step_time": 0.41608188819885256} +{"epoch": 0, "iter": 12709, "iter_tflops": 21.847525117621842, "iter_time": 0.7179165420532226, "loss": 0.24408335983753204, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.1824027457349, "step_time": 0.6765778274536134} +{"epoch": 0, "iter": 12710, "iter_tflops": 7.095172214265492, "iter_time": 2.210615783691406, "loss": 0.29034602642059326, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 8.52760803268067, "step_time": 1.8392847824096676} +{"epoch": 0, "iter": 12711, "iter_tflops": 8.237738805151826, "iter_time": 1.9040054626464842, "loss": 0.4958024024963379, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 10.02201573446385, "step_time": 1.5650244522094725} +{"epoch": 0, "iter": 12712, "iter_tflops": 18.508248900664004, "iter_time": 0.8474437408447265, "loss": 0.25233080983161926, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.013166327422734, "step_time": 0.6029523468017578} +{"epoch": 0, "iter": 12713, "iter_tflops": 20.298215788605912, "iter_time": 0.7404746551513672, "loss": 0.2370608150959015, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 21.471480958355738, "step_time": 0.7000129318237304} +{"epoch": 0, "iter": 12714, "iter_tflops": 13.605522796780944, "iter_time": 1.1047215576171876, "loss": 0.34434282779693604, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.999422417667711, "step_time": 0.9394285583496094} +{"epoch": 0, "iter": 12715, "iter_tflops": 25.38350781636566, "iter_time": 0.5921291275024414, "loss": 0.30742642283439636, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.107843455560158, "step_time": 0.5544636688232422} +{"epoch": 0, "iter": 12716, "iter_tflops": 27.72124859428225, "iter_time": 0.542194709777832, "loss": 0.2649289071559906, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.435732012785735, "step_time": 0.5106145935058594} +{"epoch": 0, "iter": 12717, "iter_tflops": 36.68156037675497, "iter_time": 0.562437728881836, "loss": 0.10381971299648285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.844579139499906, "step_time": 0.517789218902588} +{"epoch": 0, "iter": 12718, "iter_tflops": 8.58362993333779, "iter_time": 2.4035394897460938, "loss": 0.06292957812547684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.781116410727266, "step_time": 2.1092779846191405} +{"epoch": 0, "iter": 12719, "iter_tflops": 14.734647077588564, "iter_time": 1.4001756134033203, "loss": 0.10328442603349686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.644166638680133, "step_time": 1.1692869338989258} +{"epoch": 0, "iter": 12720, "iter_tflops": 23.411738070556705, "iter_time": 0.8812286148071289, "loss": 0.04752776771783829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.877844707242993, "step_time": 0.7144263610839844} +{"epoch": 0, "iter": 12721, "iter_tflops": 29.394182668516855, "iter_time": 0.5894345855712891, "loss": 0.3830379843711853, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 31.622512742711546, "step_time": 0.5478991508483886} +{"epoch": 0, "iter": 12722, "iter_tflops": 29.800697678831487, "iter_time": 0.5813940353393555, "loss": 0.39297839999198914, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 31.705267172112478, "step_time": 0.5464690704345703} +{"epoch": 0, "iter": 12723, "iter_tflops": 31.009543441292816, "iter_time": 0.5587295379638672, "loss": 0.36712607741355896, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 33.06549060322653, "step_time": 0.5239888343811036} +{"epoch": 0, "iter": 12724, "iter_tflops": 31.37239436647781, "iter_time": 0.5522673110961914, "loss": 0.5073129534721375, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 33.37482049519408, "step_time": 0.5191323165893555} +{"epoch": 0, "iter": 12725, "iter_tflops": 37.316046427456904, "iter_time": 0.5528745803833008, "loss": 0.7959004640579224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.246402901380975, "step_time": 0.5126195640563965} +{"epoch": 0, "iter": 12726, "iter_tflops": 9.543704192860826, "iter_time": 2.1617490539550785, "loss": 0.6618284583091736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.63004369720737, "step_time": 1.633493438720703} +{"epoch": 0, "iter": 12727, "iter_tflops": 9.193532731175036, "iter_time": 2.244087677001953, "loss": 0.5906015038490295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.315058475498097, "step_time": 2.000094673156738} +{"epoch": 0, "iter": 12728, "iter_tflops": 23.418814541118376, "iter_time": 0.8809623336791992, "loss": 0.8512362837791443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.03942239192386, "step_time": 0.6439283847808838} +{"epoch": 0, "iter": 12729, "iter_tflops": 12.185199340917874, "iter_time": 1.2838333435058593, "loss": 0.4094524681568146, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.18841290196318, "step_time": 1.186174964904785} +{"epoch": 0, "iter": 12730, "iter_tflops": 14.413437478175682, "iter_time": 1.0853597717285155, "loss": 0.3099197447299957, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 17.446366892703313, "step_time": 0.896677532196045} +{"epoch": 0, "iter": 12731, "iter_tflops": 26.498761883843557, "iter_time": 0.5903583450317382, "loss": 0.3858608901500702, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.187261735760092, "step_time": 0.5549941444396973} +{"epoch": 0, "iter": 12732, "iter_tflops": 27.633190272488925, "iter_time": 0.566122299194336, "loss": 0.4858230948448181, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.399050964076547, "step_time": 0.532118034362793} +{"epoch": 0, "iter": 12733, "iter_tflops": 30.809923345918044, "iter_time": 0.6696249542236329, "loss": 0.04056093841791153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.89421691091586, "step_time": 0.6271951560974122} +{"epoch": 0, "iter": 12734, "iter_tflops": 8.584605575828709, "iter_time": 2.403266326904297, "loss": 0.07555056363344193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.683061023687689, "step_time": 1.931196823120117} +{"epoch": 0, "iter": 12735, "iter_tflops": 10.416938731823057, "iter_time": 1.9805332489013674, "loss": 0.06122290715575218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.58589351282269, "step_time": 1.6392235870361331} +{"epoch": 0, "iter": 12736, "iter_tflops": 38.18648842721591, "iter_time": 0.5402720794677734, "loss": 0.023215994238853455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.31015204392506, "step_time": 0.3870012130737305} +{"epoch": 0, "iter": 12737, "iter_tflops": 23.252371437700848, "iter_time": 0.7592818450927734, "loss": 0.36221709847450256, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 24.591688584239403, "step_time": 0.7179296951293945} +{"epoch": 0, "iter": 12738, "iter_tflops": 12.367017758312253, "iter_time": 1.4275958709716794, "loss": 0.4307800233364105, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 14.431910580774296, "step_time": 1.223337921142578} +{"epoch": 0, "iter": 12739, "iter_tflops": 26.845774377104874, "iter_time": 0.6576492538452148, "loss": 0.3464675843715668, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 28.968400210636627, "step_time": 0.6094607696533203} +{"epoch": 0, "iter": 12740, "iter_tflops": 27.683795983875026, "iter_time": 0.6377414245605468, "loss": 0.4092157483100891, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 29.68640923478796, "step_time": 0.5947200736999512} +{"epoch": 0, "iter": 12741, "iter_tflops": 20.959500379028427, "iter_time": 0.9843313598632812, "loss": 0.5934984087944031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.308212508906223, "step_time": 0.924820556640625} +{"epoch": 0, "iter": 12742, "iter_tflops": 28.498437257665216, "iter_time": 0.7239377136230468, "loss": 0.5636243224143982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.44878342774334, "step_time": 0.5819972229003906} +{"epoch": 0, "iter": 12743, "iter_tflops": 40.19736175344391, "iter_time": 0.5132449645996093, "loss": 0.6117978692054749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.988597057516046, "step_time": 0.46901003646850586} +{"epoch": 0, "iter": 12744, "iter_tflops": 37.73362656336067, "iter_time": 0.5467561798095704, "loss": 0.5871133804321289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.006835218574935, "step_time": 0.5031135272979737} +{"epoch": 0, "iter": 12745, "iter_tflops": 18.480739508238035, "iter_time": 1.1163564910888673, "loss": 0.3458644449710846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.834686188067113, "step_time": 1.0401522521972657} +{"epoch": 0, "iter": 12746, "iter_tflops": 20.759867663622266, "iter_time": 0.9937969665527344, "loss": 0.1873365193605423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.60792072192007, "step_time": 0.8056528186798095} +{"epoch": 0, "iter": 12747, "iter_tflops": 47.97273492942826, "iter_time": 0.43005873107910153, "loss": 0.3289577066898346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16924234249024, "step_time": 0.39546469497680664} +{"epoch": 0, "iter": 12748, "iter_tflops": 51.64191433404408, "iter_time": 0.39950288009643553, "loss": 0.2265838235616684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.980610824031466, "step_time": 0.36853998565673834} +{"epoch": 0, "iter": 12749, "iter_tflops": 41.180070368279296, "iter_time": 0.5009970436096192, "loss": 0.05914343148469925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.845698118924496, "step_time": 0.4600462112426758} +{"epoch": 0, "iter": 12750, "iter_tflops": 36.129381939396595, "iter_time": 0.5710336685180664, "loss": 0.04085567593574524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.26768679938953, "step_time": 0.5123486137390137} +{"epoch": 0, "iter": 12751, "iter_tflops": 39.36589871413241, "iter_time": 0.5240854187011719, "loss": 0.10127926617860794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.293780156160274, "step_time": 0.476537124633789} +{"epoch": 0, "iter": 12752, "iter_tflops": 42.12668068327267, "iter_time": 0.4897393569946289, "loss": 0.08769699931144714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.113769919188236, "step_time": 0.44739550781250004} +{"epoch": 0, "iter": 12753, "iter_tflops": 31.107747076573638, "iter_time": 0.6632140045166016, "loss": 0.16737329959869385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.45555773007308, "step_time": 0.598774040222168} +{"epoch": 0, "iter": 12754, "iter_tflops": 47.68747422166861, "iter_time": 0.432631290435791, "loss": 0.12618039548397064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40528037023854, "step_time": 0.3936834869384766} +{"epoch": 0, "iter": 12755, "iter_tflops": 47.08938528078286, "iter_time": 0.43812620162963867, "loss": 0.12934306263923645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.14047296746283, "step_time": 0.40342007637023924} +{"epoch": 0, "iter": 12756, "iter_tflops": 53.88632701863284, "iter_time": 0.3828632354736328, "loss": 0.1885182112455368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.68524071180511, "step_time": 0.35155506324768065} +{"epoch": 0, "iter": 12757, "iter_tflops": 30.55074333269545, "iter_time": 0.675305778503418, "loss": 0.14424006640911102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.398875968291335, "step_time": 0.6367842369079589} +{"epoch": 0, "iter": 12758, "iter_tflops": 14.49552657293069, "iter_time": 1.4232731323242187, "loss": 0.1522424966096878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.050839483002957, "step_time": 1.2099752349853514} +{"epoch": 0, "iter": 12759, "iter_tflops": 48.57456851418666, "iter_time": 0.4247303504943848, "loss": 0.15700064599514008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.769944880677244, "step_time": 0.3909629535675049} +{"epoch": 0, "iter": 12760, "iter_tflops": 50.586744460952055, "iter_time": 0.4078359603881836, "loss": 0.18353548645973206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81194045065785, "step_time": 0.37639779472351076} +{"epoch": 0, "iter": 12761, "iter_tflops": 37.714762978739074, "iter_time": 0.5470296478271484, "loss": 0.13707955181598663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56709513763857, "step_time": 0.5085671882629395} +{"epoch": 0, "iter": 12762, "iter_tflops": 12.574851847323787, "iter_time": 1.6406629486083983, "loss": 0.1520557552576065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.091249313266914, "step_time": 1.282131244659424} +{"epoch": 0, "iter": 12763, "iter_tflops": 39.180662889242555, "iter_time": 0.5265631561279297, "loss": 0.09443674236536026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.184681244515474, "step_time": 0.4777410163879394} +{"epoch": 0, "iter": 12764, "iter_tflops": 44.858753086964064, "iter_time": 0.4599123268127441, "loss": 0.13862930238246918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11465497182156, "step_time": 0.42005982780456547} +{"epoch": 0, "iter": 12765, "iter_tflops": 15.948641032360037, "iter_time": 1.293595703125, "loss": 0.6075612306594849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.06130375905656, "step_time": 1.2092331161499024} +{"epoch": 0, "iter": 12766, "iter_tflops": 22.883798233726594, "iter_time": 0.9015589675903319, "loss": 0.6392437219619751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.703235336101216, "step_time": 0.5778494110107423} +{"epoch": 0, "iter": 12767, "iter_tflops": 46.803628106134326, "iter_time": 0.44080115890502924, "loss": 0.870830237865448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.314401220714934, "step_time": 0.4100435066223144} +{"epoch": 0, "iter": 12768, "iter_tflops": 44.82122004690431, "iter_time": 0.4602974548339843, "loss": 0.7288351058959961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.35751532568056, "step_time": 0.4266367568969727} +{"epoch": 0, "iter": 12769, "iter_tflops": 29.89131649474618, "iter_time": 0.6902035751342773, "loss": 0.27315109968185425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.734884128632267, "step_time": 0.6501077308654785} +{"epoch": 0, "iter": 12770, "iter_tflops": 11.164100317895597, "iter_time": 1.8479853210449217, "loss": 0.22013288736343384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.898192622384856, "step_time": 1.5995336799621582} +{"epoch": 0, "iter": 12771, "iter_tflops": 45.204203078222164, "iter_time": 0.45639768218994137, "loss": 0.31403061747550964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82131518902705, "step_time": 0.42258373069763183} +{"epoch": 0, "iter": 12772, "iter_tflops": 52.81979092523682, "iter_time": 0.39059400177001946, "loss": 0.2692814767360687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.124213253332485, "step_time": 0.36116197204589845} +{"epoch": 0, "iter": 12773, "iter_tflops": 8.494847675870085, "iter_time": 0.8137289886474609, "loss": 0.002522988710552454, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 8.984153134807725, "step_time": 0.7694107284545898} +{"epoch": 0, "iter": 12774, "iter_tflops": 5.989542347993294, "iter_time": 1.154095489501953, "loss": 0.00870540365576744, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 7.817992696532402, "step_time": 0.8841788520812989} +{"epoch": 0, "iter": 12775, "iter_tflops": 14.7094134567919, "iter_time": 0.4699374198913574, "loss": 0.0016827984945848584, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 16.333983763635434, "step_time": 0.42319766616821286} +{"epoch": 0, "iter": 12776, "iter_tflops": 16.748426932194604, "iter_time": 0.41272555541992195, "loss": 0.0016640872927382588, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 18.58231313491915, "step_time": 0.3719937210083008} +{"epoch": 0, "iter": 12777, "iter_tflops": 22.260112495558698, "iter_time": 0.9268189239501953, "loss": 0.2834493815898895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.977775542953204, "step_time": 0.8604256668090821} +{"epoch": 0, "iter": 12778, "iter_tflops": 45.615110335854105, "iter_time": 0.4522863883972168, "loss": 0.46647909283638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60041089917715, "step_time": 0.41594602012634274} +{"epoch": 0, "iter": 12779, "iter_tflops": 46.836614629257745, "iter_time": 0.4404907073974609, "loss": 0.2522302567958832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78063057901659, "step_time": 0.40627879714965814} +{"epoch": 0, "iter": 12780, "iter_tflops": 48.60445701810131, "iter_time": 0.42446916961669917, "loss": 0.29042598605155945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.47342805314574, "step_time": 0.3931722068786621} +{"epoch": 0, "iter": 12781, "iter_tflops": 29.428737798037673, "iter_time": 0.7010526123046875, "loss": 0.3668029308319092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.272946664824886, "step_time": 0.6597105712890625} +{"epoch": 0, "iter": 12782, "iter_tflops": 11.489821268224544, "iter_time": 1.7955974273681643, "loss": 0.322557270526886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.080141780713653, "step_time": 1.3680967864990234} +{"epoch": 0, "iter": 12783, "iter_tflops": 14.706219757686585, "iter_time": 1.4028821716308595, "loss": 0.317349374294281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.1960154576595, "step_time": 1.1997601165771485} +{"epoch": 0, "iter": 12784, "iter_tflops": 24.170677016701134, "iter_time": 0.8535587768554687, "loss": 0.25186803936958313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.68638198561886, "step_time": 0.7191946868896485} +{"epoch": 0, "iter": 12785, "iter_tflops": 17.0283777918753, "iter_time": 1.0488949279785156, "loss": 0.44078123569488525, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 18.374136060617147, "step_time": 0.9720717773437499} +{"epoch": 0, "iter": 12786, "iter_tflops": 15.998991489193477, "iter_time": 1.1163815612792969, "loss": 0.3749281167984009, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 21.183598439836533, "step_time": 0.84315132522583} +{"epoch": 0, "iter": 12787, "iter_tflops": 32.249803938578026, "iter_time": 0.5538321762084961, "loss": 0.4523787200450897, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 34.205691090963604, "step_time": 0.522163959503174} +{"epoch": 0, "iter": 12788, "iter_tflops": 31.13969604418194, "iter_time": 0.5735758972167969, "loss": 0.4285881519317627, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 33.18644324030358, "step_time": 0.5382010650634765} +{"epoch": 0, "iter": 12789, "iter_tflops": 15.338926270159298, "iter_time": 0.7677530212402344, "loss": 0.004188205115497112, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 16.21828784893818, "step_time": 0.7261251678466797} +{"epoch": 0, "iter": 12790, "iter_tflops": 7.573487689700864, "iter_time": 1.554964828491211, "loss": 0.01786627061665058, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 9.710989404998998, "step_time": 1.2126989841461182} +{"epoch": 0, "iter": 12791, "iter_tflops": 25.202873870808457, "iter_time": 0.4672684173583984, "loss": 0.006761176977306604, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 27.976618226530032, "step_time": 0.42094104766845697} +{"epoch": 0, "iter": 12792, "iter_tflops": 24.872876928118146, "iter_time": 0.4734678268432617, "loss": 0.00518884276971221, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 27.46640344369775, "step_time": 0.4287604312896728} +{"epoch": 0, "iter": 12793, "iter_tflops": 23.176314207481877, "iter_time": 0.8901800918579102, "loss": 0.5478388667106628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.776285865878638, "step_time": 0.832695167541504} +{"epoch": 0, "iter": 12794, "iter_tflops": 9.050239067452745, "iter_time": 2.279618621826172, "loss": 0.509384274482727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.32140235077135, "step_time": 1.9988653488159178} +{"epoch": 0, "iter": 12795, "iter_tflops": 12.234383940548126, "iter_time": 1.6863205871582032, "loss": 0.4077141582965851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.829684940727194, "step_time": 1.3912024154663083} +{"epoch": 0, "iter": 12796, "iter_tflops": 45.15094815286598, "iter_time": 0.4569359970092774, "loss": 0.5374616980552673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82544767261825, "step_time": 0.4225479640960693} +{"epoch": 0, "iter": 12797, "iter_tflops": 23.782616683356668, "iter_time": 0.6698340148925781, "loss": 0.30055707693099976, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 25.261578481298436, "step_time": 0.6306179809570311} +{"epoch": 0, "iter": 12798, "iter_tflops": 10.310211239544529, "iter_time": 1.5451095275878906, "loss": 0.2602837383747101, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 12.965019046092346, "step_time": 1.2287221145629883} +{"epoch": 0, "iter": 12799, "iter_tflops": 24.04429093843003, "iter_time": 0.662544204711914, "loss": 0.45676547288894653, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 25.862334625262463, "step_time": 0.6159693565368651} +{"epoch": 0, "iter": 12800, "iter_tflops": 24.86012896837426, "iter_time": 0.6408014068603516, "loss": 0.5269665718078613, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.693326093833466, "step_time": 0.5967935791015625} +{"epoch": 0, "iter": 12801, "iter_tflops": 19.23585777267782, "iter_time": 1.0725330657958985, "loss": 0.5627024173736572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.02445993321678, "step_time": 0.9812900581359864} +{"epoch": 0, "iter": 12802, "iter_tflops": 18.205526314386404, "iter_time": 1.1332324676513674, "loss": 0.901971161365509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.789664783899877, "step_time": 0.9468293209075928} +{"epoch": 0, "iter": 12803, "iter_tflops": 36.54083868061849, "iter_time": 0.5646037216186524, "loss": 0.7482203841209412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.964268639723514, "step_time": 0.5162384853363037} +{"epoch": 0, "iter": 12804, "iter_tflops": 38.41073501931597, "iter_time": 0.5371179046630858, "loss": 0.5316342711448669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58287006482185, "step_time": 0.49614404869079587} +{"epoch": 0, "iter": 12805, "iter_tflops": 16.540607325436863, "iter_time": 1.2472996368408202, "loss": 0.02910430170595646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.4812729346165, "step_time": 1.1801825637817382} +{"epoch": 0, "iter": 12806, "iter_tflops": 22.243678212662104, "iter_time": 0.9275036849975585, "loss": 0.027584651485085487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46438432908782, "step_time": 0.751194465637207} +{"epoch": 0, "iter": 12807, "iter_tflops": 55.43382046999803, "iter_time": 0.37217520523071285, "loss": 0.007930942811071873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.715860258851066, "step_time": 0.3397974338531494} +{"epoch": 0, "iter": 12808, "iter_tflops": 53.29561960374166, "iter_time": 0.3871067390441894, "loss": 0.005831224378198385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.52757364458239, "step_time": 0.35250211524963376} +{"epoch": 0, "iter": 12809, "iter_tflops": 35.84911743448008, "iter_time": 0.5754979476928711, "loss": 0.8188677430152893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49625796493682, "step_time": 0.535924648284912} +{"epoch": 0, "iter": 12810, "iter_tflops": 11.699550100653905, "iter_time": 1.7634091339111326, "loss": 0.6336495280265808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.185755397202474, "step_time": 1.5646501007080078} +{"epoch": 0, "iter": 12811, "iter_tflops": 14.071653654073254, "iter_time": 1.4661456298828124, "loss": 0.7676674127578735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.004220703111486, "step_time": 1.2891032867431642} +{"epoch": 0, "iter": 12812, "iter_tflops": 15.56092841363858, "iter_time": 1.325826644897461, "loss": 0.4925142824649811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.79502167067999, "step_time": 1.0976892642974851} +{"epoch": 0, "iter": 12813, "iter_tflops": 13.516296538209541, "iter_time": 1.1816382598876953, "loss": 0.40323832631111145, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 14.142993676161002, "step_time": 1.129278106689453} +{"epoch": 0, "iter": 12814, "iter_tflops": 8.011075850176807, "iter_time": 1.9936614532470702, "loss": 0.39728328585624695, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 9.838141031375766, "step_time": 1.6234137191772462} +{"epoch": 0, "iter": 12815, "iter_tflops": 11.21029897113805, "iter_time": 1.4247053680419923, "loss": 0.27681463956832886, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 13.364621927338298, "step_time": 1.195048629760742} +{"epoch": 0, "iter": 12816, "iter_tflops": 28.86562626191937, "iter_time": 0.5533007659912109, "loss": 0.34543177485466003, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.71988045492585, "step_time": 0.519903491973877} +{"epoch": 0, "iter": 12817, "iter_tflops": 16.172331941713963, "iter_time": 0.9243318252563476, "loss": 0.34510916471481323, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.92850402732285, "step_time": 0.8830432434082031} +{"epoch": 0, "iter": 12818, "iter_tflops": 9.612070421492584, "iter_time": 1.5551905517578124, "loss": 0.40440255403518677, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 11.484411976092856, "step_time": 1.301642707824707} +{"epoch": 0, "iter": 12819, "iter_tflops": 21.499422856216327, "iter_time": 0.6953024368286134, "loss": 0.3222120702266693, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.130432286458035, "step_time": 0.6462741775512696} +{"epoch": 0, "iter": 12820, "iter_tflops": 22.03424705190471, "iter_time": 0.6784257736206055, "loss": 0.2469559609889984, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.77386501881576, "step_time": 0.6287829551696777} +{"epoch": 0, "iter": 12821, "iter_tflops": 21.274027766024567, "iter_time": 0.9697784423828125, "loss": 0.03723093867301941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.93707506026584, "step_time": 0.8994648818969726} +{"epoch": 0, "iter": 12822, "iter_tflops": 26.567610616525112, "iter_time": 0.7765505828857422, "loss": 0.040188394486904144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.96706297253709, "step_time": 0.6884589767456055} +{"epoch": 0, "iter": 12823, "iter_tflops": 54.38764286639154, "iter_time": 0.3793342094421387, "loss": 0.03188175708055496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.29855848514046, "step_time": 0.34791897201538086} +{"epoch": 0, "iter": 12824, "iter_tflops": 55.51222497886277, "iter_time": 0.3716495513916016, "loss": 0.04365336149930954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.311552300690664, "step_time": 0.3420753192901611} +{"epoch": 0, "iter": 12825, "iter_tflops": 43.272812536255785, "iter_time": 0.47676802825927733, "loss": 0.06494571268558502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.9794293001705, "step_time": 0.43915164184570316} +{"epoch": 0, "iter": 12826, "iter_tflops": 17.647047194216256, "iter_time": 1.1690960693359376, "loss": 0.06809355318546295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.150837765606568, "step_time": 0.9754267768859862} +{"epoch": 0, "iter": 12827, "iter_tflops": 39.0487306717649, "iter_time": 0.5283422317504882, "loss": 0.11369960010051727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08632048552845, "step_time": 0.4788316402435303} +{"epoch": 0, "iter": 12828, "iter_tflops": 41.388063609408434, "iter_time": 0.4984793128967285, "loss": 0.04427046701312065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.649429947775715, "step_time": 0.4519463558197021} +{"epoch": 0, "iter": 12829, "iter_tflops": 20.66967773912858, "iter_time": 0.9981332931518554, "loss": 0.2955838739871979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.56462202616033, "step_time": 0.9143115043640136} +{"epoch": 0, "iter": 12830, "iter_tflops": 24.384398278472293, "iter_time": 0.8460776138305663, "loss": 0.2877976596355438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.37693219657589, "step_time": 0.6791697521209716} +{"epoch": 0, "iter": 12831, "iter_tflops": 37.21395840859766, "iter_time": 0.5543912658691407, "loss": 0.2574118971824646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.74531477188182, "step_time": 0.5063427200317383} +{"epoch": 0, "iter": 12832, "iter_tflops": 39.96568545081981, "iter_time": 0.5162201843261719, "loss": 0.365540474653244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54351443543572, "step_time": 0.4738040504455566} +{"epoch": 0, "iter": 12833, "iter_tflops": 20.188163158846407, "iter_time": 1.0219401016235352, "loss": 0.5363616347312927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.945199985580608, "step_time": 0.9401187286376953} +{"epoch": 0, "iter": 12834, "iter_tflops": 16.261485705523143, "iter_time": 1.2687090148925781, "loss": 0.8894248008728027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.694854462212078, "step_time": 0.9969189949035644} +{"epoch": 0, "iter": 12835, "iter_tflops": 48.17927569207046, "iter_time": 0.428215103149414, "loss": 0.8629773259162903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.02831169857338, "step_time": 0.3965359020233154} +{"epoch": 0, "iter": 12836, "iter_tflops": 43.29742532193379, "iter_time": 0.47649700546264645, "loss": 0.8251786828041077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.676277294111564, "step_time": 0.44200383377075186} +{"epoch": 0, "iter": 12837, "iter_tflops": 21.02646082006577, "iter_time": 0.9811966781616211, "loss": 0.021258924156427383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.973934223422056, "step_time": 0.9388893814086914} +{"epoch": 0, "iter": 12838, "iter_tflops": 12.94875041003133, "iter_time": 1.5932883758544922, "loss": 0.0368381030857563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.70786248267154, "step_time": 1.1650809650421143} +{"epoch": 0, "iter": 12839, "iter_tflops": 50.55002383388337, "iter_time": 0.4081322212219238, "loss": 0.023088401183485985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.00144880979233, "step_time": 0.3751009101867676} +{"epoch": 0, "iter": 12840, "iter_tflops": 54.67896562287253, "iter_time": 0.37731316375732427, "loss": 0.025579378008842468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.72784186377381, "step_time": 0.3454183654785156} +{"epoch": 0, "iter": 12841, "iter_tflops": 23.853102168481364, "iter_time": 0.864922866821289, "loss": 0.6149443984031677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.315931242498014, "step_time": 0.8149450759887695} +{"epoch": 0, "iter": 12842, "iter_tflops": 12.579362272698333, "iter_time": 1.640074676513672, "loss": 0.6012442111968994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.379134505775061, "step_time": 1.3414989967346191} +{"epoch": 0, "iter": 12843, "iter_tflops": 41.47263965684747, "iter_time": 0.49746275329589845, "loss": 0.5236310958862305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.779560664211246, "step_time": 0.460725679397583} +{"epoch": 0, "iter": 12844, "iter_tflops": 45.169591356557504, "iter_time": 0.4567474021911621, "loss": 0.7888249754905701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.53167218835959, "step_time": 0.42510576248168946} +{"epoch": 0, "iter": 12845, "iter_tflops": 35.11451501807454, "iter_time": 0.5875374755859375, "loss": 0.004228492267429829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72167859377253, "step_time": 0.5469293594360352} +{"epoch": 0, "iter": 12846, "iter_tflops": 20.464275441592207, "iter_time": 1.0081516723632813, "loss": 0.006508784368634224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.674215121196212, "step_time": 0.8361398086547852} +{"epoch": 0, "iter": 12847, "iter_tflops": 42.33179904795822, "iter_time": 0.48736632919311523, "loss": 0.006923765875399113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90181217969965, "step_time": 0.4398783874511718} +{"epoch": 0, "iter": 12848, "iter_tflops": 46.933170021530636, "iter_time": 0.43958448791503907, "loss": 0.0021098461002111435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.462226750096086, "step_time": 0.4008978004455567} +{"epoch": 0, "iter": 12849, "iter_tflops": 24.216397572172745, "iter_time": 0.8519472579956056, "loss": 0.7359440922737122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.809570397915603, "step_time": 0.799358268737793} +{"epoch": 0, "iter": 12850, "iter_tflops": 18.174847802389245, "iter_time": 1.1351453247070311, "loss": 0.5497898459434509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.803487679743455, "step_time": 0.9462290534973146} +{"epoch": 0, "iter": 12851, "iter_tflops": 36.58723735837783, "iter_time": 0.5638877105712891, "loss": 0.486186146736145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8592031007932, "step_time": 0.5175992469787598} +{"epoch": 0, "iter": 12852, "iter_tflops": 41.318003774868714, "iter_time": 0.4993245468139648, "loss": 0.6198455691337585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03868198203883, "step_time": 0.45807498359680177} +{"epoch": 0, "iter": 12853, "iter_tflops": 18.94074767436884, "iter_time": 1.089243881225586, "loss": 0.557226836681366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.492987702508383, "step_time": 1.0067391738891602} +{"epoch": 0, "iter": 12854, "iter_tflops": 23.500277233653005, "iter_time": 0.8779085159301758, "loss": 0.5029216408729553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.763560891353794, "step_time": 0.7172649307250977} +{"epoch": 0, "iter": 12855, "iter_tflops": 41.5629498461173, "iter_time": 0.4963818397521973, "loss": 0.546374499797821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.505800540716955, "step_time": 0.4635596542358399} +{"epoch": 0, "iter": 12856, "iter_tflops": 40.47604078463613, "iter_time": 0.509711254119873, "loss": 0.6479240655899048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36077968243357, "step_time": 0.47580079650878904} +{"epoch": 0, "iter": 12857, "iter_tflops": 45.178426034097264, "iter_time": 0.45665808486938475, "loss": 0.6697943806648254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50271094614564, "step_time": 0.4167669429779053} +{"epoch": 0, "iter": 12858, "iter_tflops": 45.98807464446209, "iter_time": 0.448618335723877, "loss": 0.5232229232788086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82621182190438, "step_time": 0.41406104850769043} +{"epoch": 0, "iter": 12859, "iter_tflops": 48.118203126558264, "iter_time": 0.428758602142334, "loss": 0.733538806438446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27095972199002, "step_time": 0.3946951351165771} +{"epoch": 0, "iter": 12860, "iter_tflops": 46.5556426558569, "iter_time": 0.4431491508483887, "loss": 0.5732845067977905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41969769611717, "step_time": 0.4091871719360351} +{"epoch": 0, "iter": 12861, "iter_tflops": 27.606506277325565, "iter_time": 0.7473272171020507, "loss": 0.6143215298652649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.25013215396448, "step_time": 0.7053333435058593} +{"epoch": 0, "iter": 12862, "iter_tflops": 8.187648834671627, "iter_time": 2.5197824096679686, "loss": 0.7523542642593384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.72487178941121, "step_time": 2.1214771728515625} +{"epoch": 0, "iter": 12863, "iter_tflops": 13.208377409040992, "iter_time": 1.5619703216552736, "loss": 0.6873903274536133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.326466364084936, "step_time": 1.346108947753906} +{"epoch": 0, "iter": 12864, "iter_tflops": 48.918068519041476, "iter_time": 0.4217479171752929, "loss": 0.8306037187576294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.6656441360196, "step_time": 0.3774051113128662} +{"epoch": 0, "iter": 12865, "iter_tflops": 18.884036411544702, "iter_time": 0.8805053405761719, "loss": 0.4502563774585724, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 19.84334975840628, "step_time": 0.8379379043579102} +{"epoch": 0, "iter": 12866, "iter_tflops": 7.886552078297625, "iter_time": 2.108335144042969, "loss": 0.28244850039482117, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 8.825646447280615, "step_time": 1.8839973945617676} +{"epoch": 0, "iter": 12867, "iter_tflops": 11.664314248615856, "iter_time": 1.4255012817382813, "loss": 0.38222387433052063, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 13.448879296405856, "step_time": 1.2363479919433593} +{"epoch": 0, "iter": 12868, "iter_tflops": 23.135495105452737, "iter_time": 0.7187006301879882, "loss": 0.36528652906417847, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 26.97314079174179, "step_time": 0.6164463768005372} +{"epoch": 0, "iter": 12869, "iter_tflops": 17.60262565517819, "iter_time": 0.8608329162597655, "loss": 0.22523918747901917, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 18.50572568678851, "step_time": 0.8188233108520507} +{"epoch": 0, "iter": 12870, "iter_tflops": 7.449616528539273, "iter_time": 2.0340536346435547, "loss": 0.41887032985687256, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.520451703931816, "step_time": 1.5916177139282226} +{"epoch": 0, "iter": 12871, "iter_tflops": 9.665180021818836, "iter_time": 1.5677845153808594, "loss": 0.4089144170284271, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.116342178969798, "step_time": 1.3631210098266602} +{"epoch": 0, "iter": 12872, "iter_tflops": 26.334878540456504, "iter_time": 0.5753935623168945, "loss": 0.30758848786354065, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.051624105942313, "step_time": 0.540179759979248} +{"epoch": 0, "iter": 12873, "iter_tflops": 18.937027131129355, "iter_time": 0.7915421600341798, "loss": 0.3849402070045471, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 20.178682366944415, "step_time": 0.7428361816406249} +{"epoch": 0, "iter": 12874, "iter_tflops": 21.07684042542457, "iter_time": 0.7111813278198242, "loss": 0.31005504727363586, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.549181580511117, "step_time": 0.6365170402526855} +{"epoch": 0, "iter": 12875, "iter_tflops": 21.961324586477826, "iter_time": 0.6825387649536132, "loss": 0.37137410044670105, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.67580595024868, "step_time": 0.6331127815246582} +{"epoch": 0, "iter": 12876, "iter_tflops": 22.474523960381646, "iter_time": 0.6669531860351562, "loss": 0.35943782329559326, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.162861538014205, "step_time": 0.620351001739502} +{"epoch": 0, "iter": 12877, "iter_tflops": 19.058089796855192, "iter_time": 1.0825373229980468, "loss": 0.7296434640884399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.600340737077737, "step_time": 1.0014928283691407} +{"epoch": 0, "iter": 12878, "iter_tflops": 16.98076467658631, "iter_time": 1.214968460083008, "loss": 0.7279084920883179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.813448945800257, "step_time": 0.9457969512939454} +{"epoch": 0, "iter": 12879, "iter_tflops": 32.2521227641934, "iter_time": 0.6396817245483399, "loss": 0.8306583762168884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.338481108591374, "step_time": 0.5838138160705567} +{"epoch": 0, "iter": 12880, "iter_tflops": 36.227440160564896, "iter_time": 0.5694880294799805, "loss": 0.8060355186462402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.468490300127264, "step_time": 0.5227231483459474} +{"epoch": 0, "iter": 12881, "iter_tflops": 21.008441810331004, "iter_time": 0.9820382537841796, "loss": 0.20515106618404388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.50560662626045, "step_time": 0.9167090606689452} +{"epoch": 0, "iter": 12882, "iter_tflops": 19.9208455743694, "iter_time": 1.0356535034179688, "loss": 0.18419064581394196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.972365142575878, "step_time": 0.7648974571228027} +{"epoch": 0, "iter": 12883, "iter_tflops": 51.2776650341187, "iter_time": 0.4023407363891602, "loss": 0.2215043306350708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.59226728286439, "step_time": 0.37111444664001464} +{"epoch": 0, "iter": 12884, "iter_tflops": 49.95626614308272, "iter_time": 0.41298309707641606, "loss": 0.15018346905708313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.24542073975611, "step_time": 0.38032875823974605} +{"epoch": 0, "iter": 12885, "iter_tflops": 33.69697120837046, "iter_time": 0.6122536468505859, "loss": 0.1629241704940796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.95758749014261, "step_time": 0.5737618942260742} +{"epoch": 0, "iter": 12886, "iter_tflops": 9.723142601357834, "iter_time": 2.1218544616699218, "loss": 0.13337579369544983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.651911239784, "step_time": 1.7706188354492187} +{"epoch": 0, "iter": 12887, "iter_tflops": 12.557058071414055, "iter_time": 1.6429878234863282, "loss": 0.20896171033382416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.810015390375682, "step_time": 1.3930501060485843} +{"epoch": 0, "iter": 12888, "iter_tflops": 47.25504969495362, "iter_time": 0.43659024047851563, "loss": 0.197991743683815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.427676142690316, "step_time": 0.40116713523864744} +{"epoch": 0, "iter": 12889, "iter_tflops": 19.71421953117345, "iter_time": 0.8184595031738282, "loss": 0.4656673073768616, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 20.739623234057657, "step_time": 0.7779934158325196} +{"epoch": 0, "iter": 12890, "iter_tflops": 9.516465116042927, "iter_time": 1.6955130004882812, "loss": 0.3324950933456421, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 11.861838449222903, "step_time": 1.3602689323425292} +{"epoch": 0, "iter": 12891, "iter_tflops": 29.536699424147063, "iter_time": 0.5462793960571288, "loss": 0.28257009387016296, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.640611371043704, "step_time": 0.5099550743103027} +{"epoch": 0, "iter": 12892, "iter_tflops": 29.80633918847582, "iter_time": 0.5413375396728516, "loss": 0.3755809962749481, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.649738731522287, "step_time": 0.5098080101013184} +{"epoch": 0, "iter": 12893, "iter_tflops": 25.56064406549186, "iter_time": 0.8071429443359375, "loss": 0.2025957703590393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.9744291887533, "step_time": 0.7648389282226562} +{"epoch": 0, "iter": 12894, "iter_tflops": 15.390032641582073, "iter_time": 1.3405490417480468, "loss": 0.1176852360367775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.273663183770044, "step_time": 1.1290069923400878} +{"epoch": 0, "iter": 12895, "iter_tflops": 37.2118378023141, "iter_time": 0.5544228591918945, "loss": 0.10280702263116837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.00456313233514, "step_time": 0.5031414051055908} +{"epoch": 0, "iter": 12896, "iter_tflops": 40.16183211924827, "iter_time": 0.5136990127563477, "loss": 0.13461580872535706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9954986747068, "step_time": 0.4689364624023437} +{"epoch": 0, "iter": 12897, "iter_tflops": 20.91245499454928, "iter_time": 0.9865457458496094, "loss": 0.4985750913619995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.576907573903824, "step_time": 0.9138139686584472} +{"epoch": 0, "iter": 12898, "iter_tflops": 16.255050437901808, "iter_time": 1.2692112884521483, "loss": 0.5908117294311523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.855805298596906, "step_time": 1.0390459213256835} +{"epoch": 0, "iter": 12899, "iter_tflops": 44.633544142020604, "iter_time": 0.4622329216003419, "loss": 0.5389387607574463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.1793617465568, "step_time": 0.42821433830261224} +{"epoch": 0, "iter": 12900, "iter_tflops": 48.078251591217985, "iter_time": 0.4291148872375488, "loss": 0.5718206763267517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74728898579999, "step_time": 0.398689359664917} +{"epoch": 0, "iter": 12901, "iter_tflops": 46.190923325458996, "iter_time": 0.4466482162475586, "loss": 0.18709805607795715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.004225893713404, "step_time": 0.40449772834777836} +{"epoch": 0, "iter": 12902, "iter_tflops": 38.36191154361054, "iter_time": 0.537801498413086, "loss": 0.17973266541957855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.20477722706591, "step_time": 0.5006966400146485} +{"epoch": 0, "iter": 12903, "iter_tflops": 49.77308630376023, "iter_time": 0.4145029983520508, "loss": 0.11029139906167984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.278052605746524, "step_time": 0.3801001052856446} +{"epoch": 0, "iter": 12904, "iter_tflops": 48.76760569898621, "iter_time": 0.42304913711547854, "loss": 0.1404198706150055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.88680159969462, "step_time": 0.39009909629821776} +{"epoch": 0, "iter": 12905, "iter_tflops": 28.413726960439682, "iter_time": 0.7260960006713867, "loss": 0.5363594889640808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.254622839117463, "step_time": 0.6819154090881347} +{"epoch": 0, "iter": 12906, "iter_tflops": 15.444648164669465, "iter_time": 1.335808578491211, "loss": 0.6378378868103027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.84100715126809, "step_time": 1.15638614654541} +{"epoch": 0, "iter": 12907, "iter_tflops": 44.60052759561418, "iter_time": 0.46257509994506835, "loss": 0.601100742816925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.852367798887606, "step_time": 0.42231511878967287} +{"epoch": 0, "iter": 12908, "iter_tflops": 50.60767783204333, "iter_time": 0.4076672630310058, "loss": 0.5579642057418823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.813041484223625, "step_time": 0.3763902339935302} +{"epoch": 0, "iter": 12909, "iter_tflops": 45.872531863535116, "iter_time": 0.44974830627441403, "loss": 0.11836117506027222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.15654849792125, "step_time": 0.4113339958190917} +{"epoch": 0, "iter": 12910, "iter_tflops": 44.54440099708364, "iter_time": 0.46315795135498045, "loss": 0.2156193107366562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42285593468766, "step_time": 0.42606106376647945} +{"epoch": 0, "iter": 12911, "iter_tflops": 50.28139825481343, "iter_time": 0.41031264495849606, "loss": 0.18859431147575378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51832596468378, "step_time": 0.37842492675781253} +{"epoch": 0, "iter": 12912, "iter_tflops": 46.340785225971345, "iter_time": 0.4452037963867187, "loss": 0.16886204481124878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.19807633495197, "step_time": 0.41099370765686033} +{"epoch": 0, "iter": 12913, "iter_tflops": 27.016868612185295, "iter_time": 0.7636374816894531, "loss": 0.08043638616800308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.47986596966842, "step_time": 0.724409782409668} +{"epoch": 0, "iter": 12914, "iter_tflops": 13.659336955904214, "iter_time": 1.5104022674560547, "loss": 0.06401536613702774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.702595400196632, "step_time": 1.2352028541564941} +{"epoch": 0, "iter": 12915, "iter_tflops": 37.42678919292351, "iter_time": 0.5512386703491211, "loss": 0.07065002620220184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75855523941917, "step_time": 0.49405668830871585} +{"epoch": 0, "iter": 12916, "iter_tflops": 42.86678328624545, "iter_time": 0.4812839202880859, "loss": 0.09118638932704926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93557067764679, "step_time": 0.4395620040893554} +{"epoch": 0, "iter": 12917, "iter_tflops": 33.07830432117848, "iter_time": 0.6237046890258788, "loss": 0.3883953392505646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.17309880854016, "step_time": 0.5703435478210449} +{"epoch": 0, "iter": 12918, "iter_tflops": 38.43965056885063, "iter_time": 0.5367138671875, "loss": 0.6145293712615967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18465531656781, "step_time": 0.4890663051605224} +{"epoch": 0, "iter": 12919, "iter_tflops": 36.48318977233283, "iter_time": 0.5654958801269532, "loss": 0.456970751285553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.96238434063888, "step_time": 0.5162628269195556} +{"epoch": 0, "iter": 12920, "iter_tflops": 37.782880460383026, "iter_time": 0.5460434265136719, "loss": 0.5056114196777344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.458545607288805, "step_time": 0.4976318683624268} +{"epoch": 0, "iter": 12921, "iter_tflops": 19.99995065805884, "iter_time": 1.0315572204589842, "loss": 0.6728858351707458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.413067119292034, "step_time": 0.9634814758300781} +{"epoch": 0, "iter": 12922, "iter_tflops": 16.69031517083205, "iter_time": 1.2361116790771483, "loss": 0.489021897315979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.609193916928575, "step_time": 0.9125090255737305} +{"epoch": 0, "iter": 12923, "iter_tflops": 45.64220636468865, "iter_time": 0.45201788330078124, "loss": 0.4702639579772949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45222441895769, "step_time": 0.41719242668151857} +{"epoch": 0, "iter": 12924, "iter_tflops": 47.94955280598929, "iter_time": 0.4302666511535644, "loss": 0.5847504138946533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71992613534263, "step_time": 0.3989002895355224} +{"epoch": 0, "iter": 12925, "iter_tflops": 41.86118372906994, "iter_time": 0.49284543991088864, "loss": 0.33976951241493225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45517324463117, "step_time": 0.4538777885437012} +{"epoch": 0, "iter": 12926, "iter_tflops": 33.6904883050233, "iter_time": 0.6123714599609374, "loss": 0.33849069476127625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.236801221939785, "step_time": 0.554051176071167} +{"epoch": 0, "iter": 12927, "iter_tflops": 39.25098946259283, "iter_time": 0.5256197052001952, "loss": 0.3233160674571991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.964124341565594, "step_time": 0.4801935062408448} +{"epoch": 0, "iter": 12928, "iter_tflops": 38.945315151531666, "iter_time": 0.5297451934814453, "loss": 0.33275604248046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42181712871894, "step_time": 0.4863321495056152} +{"epoch": 0, "iter": 12929, "iter_tflops": 19.985904028247734, "iter_time": 1.0322822265625, "loss": 0.0592840202152729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.32734091696849, "step_time": 0.967354232788086} +{"epoch": 0, "iter": 12930, "iter_tflops": 27.055831239647002, "iter_time": 0.7625377807617189, "loss": 0.10374755412340164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.211151136866892, "step_time": 0.6828966369628906} +{"epoch": 0, "iter": 12931, "iter_tflops": 48.71858214980407, "iter_time": 0.4234748344421386, "loss": 0.11134648323059082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.88891018154203, "step_time": 0.39008354377746585} +{"epoch": 0, "iter": 12932, "iter_tflops": 47.28820033151726, "iter_time": 0.4362841758728028, "loss": 0.12124455720186234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.028677327518636, "step_time": 0.3965331153869629} +{"epoch": 0, "iter": 12933, "iter_tflops": 36.48883879562532, "iter_time": 0.5654083328247071, "loss": 0.464517742395401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31224742062636, "step_time": 0.5248006629943848} +{"epoch": 0, "iter": 12934, "iter_tflops": 27.026449911549495, "iter_time": 0.7633667602539063, "loss": 0.6381587982177734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.20046624898335, "step_time": 0.6407079124450684} +{"epoch": 0, "iter": 12935, "iter_tflops": 37.81259703590401, "iter_time": 0.5456142959594726, "loss": 0.4304437041282654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29014352077495, "step_time": 0.4996614627838135} +{"epoch": 0, "iter": 12936, "iter_tflops": 39.63795155350391, "iter_time": 0.5204883880615234, "loss": 0.384870707988739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19672291832291, "step_time": 0.47760783958435055} +{"epoch": 0, "iter": 12937, "iter_tflops": 13.582846800041064, "iter_time": 1.2030094451904296, "loss": 0.3707849383354187, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 14.741783178558277, "step_time": 1.1084339523315427} +{"epoch": 0, "iter": 12938, "iter_tflops": 27.48800862895801, "iter_time": 0.5944516830444336, "loss": 0.46404147148132324, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 29.54960153943784, "step_time": 0.5529784545898437} +{"epoch": 0, "iter": 12939, "iter_tflops": 29.163354434045832, "iter_time": 0.56030224609375, "loss": 0.2981838583946228, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.113582967132714, "step_time": 0.5251819763183594} +{"epoch": 0, "iter": 12940, "iter_tflops": 29.619892396146568, "iter_time": 0.5516661834716797, "loss": 0.4416824281215668, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.518577930650693, "step_time": 0.5184337005615235} +{"epoch": 0, "iter": 12941, "iter_tflops": 23.777170206935196, "iter_time": 0.8676849822998047, "loss": 0.054854877293109894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.930592689243195, "step_time": 0.8275412368774413} +{"epoch": 0, "iter": 12942, "iter_tflops": 17.569242784354664, "iter_time": 1.1742733459472658, "loss": 0.0351303368806839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.091510399214272, "step_time": 0.9781705112457277} +{"epoch": 0, "iter": 12943, "iter_tflops": 40.13450993016434, "iter_time": 0.5140487213134766, "loss": 0.06510890275239944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.24205419086705, "step_time": 0.4663231372833252} +{"epoch": 0, "iter": 12944, "iter_tflops": 45.549252179350255, "iter_time": 0.4529403343200684, "loss": 0.05191885307431221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17686959995134, "step_time": 0.4111674098968505} +{"epoch": 0, "iter": 12945, "iter_tflops": 30.70259815551108, "iter_time": 0.6719657211303712, "loss": 0.5080748200416565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.86577169295328, "step_time": 0.6092019309997558} +{"epoch": 0, "iter": 12946, "iter_tflops": 33.977104577006486, "iter_time": 0.6072057571411132, "loss": 0.3658580780029297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.05661313280006, "step_time": 0.5567452545166015} +{"epoch": 0, "iter": 12947, "iter_tflops": 42.47331686523341, "iter_time": 0.48574246215820316, "loss": 0.5198747515678406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.48867563356232, "step_time": 0.44378750801086425} +{"epoch": 0, "iter": 12948, "iter_tflops": 42.41073283902681, "iter_time": 0.48645925521850586, "loss": 0.4387426972389221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.203862134477795, "step_time": 0.44652313804626464} +{"epoch": 0, "iter": 12949, "iter_tflops": 40.17731491567487, "iter_time": 0.5135010528564453, "loss": 0.011562269181013107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.757483107385234, "step_time": 0.46095294189453123} +{"epoch": 0, "iter": 12950, "iter_tflops": 35.43044386842833, "iter_time": 0.5822984771728515, "loss": 0.005398332607001066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.21394090694909, "step_time": 0.5130333671569824} +{"epoch": 0, "iter": 12951, "iter_tflops": 42.895875074135915, "iter_time": 0.48095751571655276, "loss": 0.0058287023566663265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.5563023916127, "step_time": 0.433824592590332} +{"epoch": 0, "iter": 12952, "iter_tflops": 49.66728296024238, "iter_time": 0.4153859901428223, "loss": 0.012826652266085148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.75623411560691, "step_time": 0.37678072357177733} +{"epoch": 0, "iter": 12953, "iter_tflops": 22.378349123432134, "iter_time": 0.9219220504760741, "loss": 0.14307403564453125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.06536292793097, "step_time": 0.8572940940856933} +{"epoch": 0, "iter": 12954, "iter_tflops": 16.50056093465894, "iter_time": 1.2503267974853516, "loss": 0.15816080570220947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.92386152307274, "step_time": 1.0354967327117919} +{"epoch": 0, "iter": 12955, "iter_tflops": 44.390158329969445, "iter_time": 0.46476728820800783, "loss": 0.1487543135881424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76142963156176, "step_time": 0.431961389541626} +{"epoch": 0, "iter": 12956, "iter_tflops": 45.02164480958445, "iter_time": 0.4582483291625976, "loss": 0.15051215887069702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89938847410579, "step_time": 0.42190902900695804} +{"epoch": 0, "iter": 12957, "iter_tflops": 24.942413648455187, "iter_time": 0.827149040222168, "loss": 0.6017645001411438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.21575825519467, "step_time": 0.7869729843139648} +{"epoch": 0, "iter": 12958, "iter_tflops": 11.807973654749853, "iter_time": 1.7472171020507812, "loss": 0.5222653746604919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.832925001925085, "step_time": 1.390898525238037} +{"epoch": 0, "iter": 12959, "iter_tflops": 36.875011051444034, "iter_time": 0.5594871139526367, "loss": 0.7088703513145447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0723081714507, "step_time": 0.5148466472625732} +{"epoch": 0, "iter": 12960, "iter_tflops": 38.79626178866347, "iter_time": 0.5317804489135742, "loss": 0.6739215850830078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.12296816503757, "step_time": 0.48978252029418945} +{"epoch": 0, "iter": 12961, "iter_tflops": 19.06484369911105, "iter_time": 1.082153823852539, "loss": 0.2091507911682129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.223315030698824, "step_time": 1.0201637802124024} +{"epoch": 0, "iter": 12962, "iter_tflops": 9.048423290870195, "iter_time": 2.2800760803222655, "loss": 0.291958212852478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.672591843831182, "step_time": 1.7674817886352536} +{"epoch": 0, "iter": 12963, "iter_tflops": 16.75714532606627, "iter_time": 1.2311818695068362, "loss": 0.20671264827251434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.772336865558795, "step_time": 1.043432228088379} +{"epoch": 0, "iter": 12964, "iter_tflops": 35.39664169410318, "iter_time": 0.5828545455932617, "loss": 0.23329946398735046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32968301740934, "step_time": 0.47614226722717284} +{"epoch": 0, "iter": 12965, "iter_tflops": 16.212535510466093, "iter_time": 0.924559600830078, "loss": 0.24771302938461304, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 16.98584571810698, "step_time": 0.8824674148559571} +{"epoch": 0, "iter": 12966, "iter_tflops": 10.161423243276554, "iter_time": 1.4751334533691407, "loss": 0.23792938888072968, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.593230535333479, "step_time": 1.0271512756347656} +{"epoch": 0, "iter": 12967, "iter_tflops": 22.065739700340483, "iter_time": 0.6793089904785156, "loss": 0.4780716300010681, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.734667717659057, "step_time": 0.631542667388916} +{"epoch": 0, "iter": 12968, "iter_tflops": 25.73448364576093, "iter_time": 0.5824657516479492, "loss": 0.31408417224884033, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.483267449204867, "step_time": 0.5454029579162598} +{"epoch": 0, "iter": 12969, "iter_tflops": 15.387729409738098, "iter_time": 1.3407496948242186, "loss": 0.40730106830596924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.762314556075903, "step_time": 1.2308021926879884} +{"epoch": 0, "iter": 12970, "iter_tflops": 25.668749762446012, "iter_time": 0.8037436065673829, "loss": 0.508049488067627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.035852955673214, "step_time": 0.5888566074371339} +{"epoch": 0, "iter": 12971, "iter_tflops": 49.05208443500808, "iter_time": 0.4205956535339355, "loss": 0.35690850019454956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05162003374497, "step_time": 0.38888715362548826} +{"epoch": 0, "iter": 12972, "iter_tflops": 49.41208078528284, "iter_time": 0.417531364440918, "loss": 0.4847983419895172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.34248667254034, "step_time": 0.38676662445068366} +{"epoch": 0, "iter": 12973, "iter_tflops": 24.6438849862068, "iter_time": 0.8371688766479493, "loss": 0.6373127698898315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.92161598986106, "step_time": 0.7959030609130859} +{"epoch": 0, "iter": 12974, "iter_tflops": 19.654208625188076, "iter_time": 1.049703598022461, "loss": 0.744342565536499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.930400202477696, "step_time": 0.9407531700134277} +{"epoch": 0, "iter": 12975, "iter_tflops": 42.56027743016288, "iter_time": 0.4847499771118164, "loss": 0.6515435576438904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.395289427814646, "step_time": 0.4446807804107667} +{"epoch": 0, "iter": 12976, "iter_tflops": 43.57646213654628, "iter_time": 0.4734458122253418, "loss": 0.8207575082778931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65565386216822, "step_time": 0.4421992149353027} +{"epoch": 0, "iter": 12977, "iter_tflops": 36.135494118359325, "iter_time": 0.5709370803833007, "loss": 0.43688368797302246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83064677085572, "step_time": 0.5313095512390137} +{"epoch": 0, "iter": 12978, "iter_tflops": 38.845084587557004, "iter_time": 0.5311120758056641, "loss": 0.8631625175476074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.58789880530387, "step_time": 0.48443558120727537} +{"epoch": 0, "iter": 12979, "iter_tflops": 41.81666330359077, "iter_time": 0.49337015151977537, "loss": 0.5968197584152222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.613537808953254, "step_time": 0.4523019809722901} +{"epoch": 0, "iter": 12980, "iter_tflops": 36.60257916581357, "iter_time": 0.5636513595581055, "loss": 0.6293626427650452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03450550620131, "step_time": 0.5153327922821045} +{"epoch": 0, "iter": 12981, "iter_tflops": 31.78954487897894, "iter_time": 0.6489898986816406, "loss": 0.040212444961071014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.552239048384386, "step_time": 0.5970985984802246} +{"epoch": 0, "iter": 12982, "iter_tflops": 9.428632489894472, "iter_time": 2.188132110595703, "loss": 0.055492550134658813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.943420783921617, "step_time": 1.7274023818969726} +{"epoch": 0, "iter": 12983, "iter_tflops": 14.91001599270914, "iter_time": 1.383707000732422, "loss": 0.03283102810382843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.809667950552047, "step_time": 1.2273349819183348} +{"epoch": 0, "iter": 12984, "iter_tflops": 37.420676192367345, "iter_time": 0.5513287200927734, "loss": 0.021000316366553307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38893463292362, "step_time": 0.49846882247924806} +{"epoch": 0, "iter": 12985, "iter_tflops": 14.402634762746205, "iter_time": 1.16017626953125, "loss": 0.439568430185318, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 15.494591619442497, "step_time": 1.078414680480957} +{"epoch": 0, "iter": 12986, "iter_tflops": 24.40809915084473, "iter_time": 0.6845922317504883, "loss": 0.32237860560417175, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.989163583832177, "step_time": 0.6191223754882813} +{"epoch": 0, "iter": 12987, "iter_tflops": 23.650118513809208, "iter_time": 0.7065332489013673, "loss": 0.445617139339447, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 25.485171013374686, "step_time": 0.6556595230102539} +{"epoch": 0, "iter": 12988, "iter_tflops": 24.052039946880463, "iter_time": 0.6947267303466798, "loss": 0.45884841680526733, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 25.94178631427428, "step_time": 0.6441189079284667} +{"epoch": 0, "iter": 12989, "iter_tflops": 18.086554364059527, "iter_time": 1.140686782836914, "loss": 0.7147031426429749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.375390049688836, "step_time": 1.064809196472168} +{"epoch": 0, "iter": 12990, "iter_tflops": 20.148720787496906, "iter_time": 1.0239406127929687, "loss": 0.7400234341621399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.069701395839886, "step_time": 0.857139570236206} +{"epoch": 0, "iter": 12991, "iter_tflops": 44.08613770190878, "iter_time": 0.46797235107421875, "loss": 0.5892713665962219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37440223874355, "step_time": 0.4354903182983398} +{"epoch": 0, "iter": 12992, "iter_tflops": 43.8879523971161, "iter_time": 0.4700855789184571, "loss": 0.6170243620872498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35438820530595, "step_time": 0.4356743755340575} +{"epoch": 0, "iter": 12993, "iter_tflops": 29.033141034363652, "iter_time": 0.7106049423217773, "loss": 0.4614085853099823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.80243112102743, "step_time": 0.6697878303527832} +{"epoch": 0, "iter": 12994, "iter_tflops": 15.849862782834112, "iter_time": 1.3016575469970704, "loss": 0.355876624584198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.9160663195133, "step_time": 1.0906651077270508} +{"epoch": 0, "iter": 12995, "iter_tflops": 44.66196458437405, "iter_time": 0.46193878173828123, "loss": 0.43107038736343384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.274996744312936, "step_time": 0.4273660259246826} +{"epoch": 0, "iter": 12996, "iter_tflops": 45.3626924211256, "iter_time": 0.45480310821533204, "loss": 0.3294900357723236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.14111810920428, "step_time": 0.4198336200714111} +{"epoch": 0, "iter": 12997, "iter_tflops": 18.13632946210264, "iter_time": 0.7208959274291993, "loss": 0.037765610963106155, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 19.23782423199488, "step_time": 0.6796197891235353} +{"epoch": 0, "iter": 12998, "iter_tflops": 11.066203223329452, "iter_time": 1.181471710205078, "loss": 0.015584954060614109, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 13.257232295105203, "step_time": 0.9862093200683593} +{"epoch": 0, "iter": 12999, "iter_tflops": 25.784583606208102, "iter_time": 0.5070629119873046, "loss": 0.04374707490205765, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 28.50684495083235, "step_time": 0.4586409358978271} +{"epoch": 0, "iter": 13000, "iter_tflops": 29.74936008788977, "iter_time": 0.4394852867126464, "loss": 0.02688332460820675, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 32.83345938844769, "step_time": 0.3982037315368653} +{"epoch": 0, "iter": 13001, "iter_tflops": 15.411414798505671, "iter_time": 1.3386891326904296, "loss": 0.5319148898124695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.687351979788726, "step_time": 1.2363311767578125} +{"epoch": 0, "iter": 13002, "iter_tflops": 20.116275728810628, "iter_time": 1.0255921020507814, "loss": 0.4770151972770691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.97407147903355, "step_time": 0.8261005229949951} +{"epoch": 0, "iter": 13003, "iter_tflops": 48.999585368270445, "iter_time": 0.4210462875366211, "loss": 0.4336625933647156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16220423812857, "step_time": 0.388078218460083} +{"epoch": 0, "iter": 13004, "iter_tflops": 48.715287842114435, "iter_time": 0.4235034713745117, "loss": 0.5481091737747192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.59451111647292, "step_time": 0.39226704597473144} +{"epoch": 0, "iter": 13005, "iter_tflops": 32.65105434815639, "iter_time": 0.6318660736083985, "loss": 0.56931471824646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.629902103869064, "step_time": 0.5957595100402833} +{"epoch": 0, "iter": 13006, "iter_tflops": 17.077756617685026, "iter_time": 1.208068130493164, "loss": 0.5497029423713684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.640788209884366, "step_time": 0.9995303134918212} +{"epoch": 0, "iter": 13007, "iter_tflops": 47.73946488227105, "iter_time": 0.4321601333618164, "loss": 0.5028398036956787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87713542025185, "step_time": 0.39769145584106447} +{"epoch": 0, "iter": 13008, "iter_tflops": 47.74640379461337, "iter_time": 0.43209732818603513, "loss": 0.5505529046058655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59074737922069, "step_time": 0.39989910125732425} +{"epoch": 0, "iter": 13009, "iter_tflops": 17.345579113372594, "iter_time": 0.6439554443359374, "loss": 0.010802913457155228, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 18.55048296123364, "step_time": 0.6021288032531739} +{"epoch": 0, "iter": 13010, "iter_tflops": 7.4862674868126495, "iter_time": 1.4920359344482421, "loss": 0.0005353603628464043, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 10.179336178041858, "step_time": 1.0972994613647462} +{"epoch": 0, "iter": 13011, "iter_tflops": 28.67440540085398, "iter_time": 0.3895383338928223, "loss": 0.026643235236406326, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 31.72761289254403, "step_time": 0.35205233192443847} +{"epoch": 0, "iter": 13012, "iter_tflops": 30.78320252176566, "iter_time": 0.36285308837890623, "loss": 0.0058338032104074955, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 33.81041804882856, "step_time": 0.33036503982543947} +{"epoch": 0, "iter": 13013, "iter_tflops": 31.050454918867548, "iter_time": 0.6644377212524413, "loss": 0.7614516615867615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.9695880538294, "step_time": 0.6257613372802734} +{"epoch": 0, "iter": 13014, "iter_tflops": 10.990090472974094, "iter_time": 1.8772451019287109, "loss": 0.6374179720878601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.59548822104871, "step_time": 1.3228885955810548} +{"epoch": 0, "iter": 13015, "iter_tflops": 9.45697212881071, "iter_time": 2.181574951171875, "loss": 0.7483693361282349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.90889718857464, "step_time": 1.891217155456543} +{"epoch": 0, "iter": 13016, "iter_tflops": 35.55762115014653, "iter_time": 0.5802157974243164, "loss": 0.8913210034370422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.613752665344, "step_time": 0.5208063392639161} +{"epoch": 0, "iter": 13017, "iter_tflops": 18.774724730902808, "iter_time": 0.8310553512573242, "loss": 0.3444543182849884, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 19.748476993626205, "step_time": 0.7900779113769532} +{"epoch": 0, "iter": 13018, "iter_tflops": 11.048593224310002, "iter_time": 1.4122010955810549, "loss": 0.445269912481308, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.983089901288597, "step_time": 1.115836025238037} +{"epoch": 0, "iter": 13019, "iter_tflops": 22.614041984165894, "iter_time": 0.6899622573852539, "loss": 0.30702003836631775, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.397966770005247, "step_time": 0.6395137596130371} +{"epoch": 0, "iter": 13020, "iter_tflops": 23.309218839089393, "iter_time": 0.6693847427368163, "loss": 0.39680105447769165, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.125761953474772, "step_time": 0.6209895439147949} +{"epoch": 0, "iter": 13021, "iter_tflops": 22.61781654893682, "iter_time": 0.912161148071289, "loss": 0.2385581135749817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.671823905643656, "step_time": 0.8362208480834961} +{"epoch": 0, "iter": 13022, "iter_tflops": 16.383434400558926, "iter_time": 1.2592654876708984, "loss": 0.3302723467350006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.221395674855657, "step_time": 1.1322455139160157} +{"epoch": 0, "iter": 13023, "iter_tflops": 37.95969633521896, "iter_time": 0.5434999618530274, "loss": 0.4494098722934723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.73337388406722, "step_time": 0.4943547954559326} +{"epoch": 0, "iter": 13024, "iter_tflops": 39.64460184893091, "iter_time": 0.5204010772705078, "loss": 0.37337958812713623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.042353093069686, "step_time": 0.47932076263427736} +{"epoch": 0, "iter": 13025, "iter_tflops": 20.66987949694725, "iter_time": 0.998123550415039, "loss": 0.06612279266119003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.331009216018273, "step_time": 0.9238764495849611} +{"epoch": 0, "iter": 13026, "iter_tflops": 16.206071409934143, "iter_time": 1.2730471801757812, "loss": 0.09413178265094757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.939643483081397, "step_time": 0.985264793395996} +{"epoch": 0, "iter": 13027, "iter_tflops": 51.15861384943586, "iter_time": 0.4032770233154297, "loss": 0.13806529343128204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.282769699085, "step_time": 0.3665614471435547} +{"epoch": 0, "iter": 13028, "iter_tflops": 52.94651481440537, "iter_time": 0.38965914154052733, "loss": 0.06509825587272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.45279361835492, "step_time": 0.3590964374542237} +{"epoch": 0, "iter": 13029, "iter_tflops": 20.081242636962713, "iter_time": 1.0273813171386719, "loss": 0.6380450129508972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.98157621220554, "step_time": 0.9832956924438476} +{"epoch": 0, "iter": 13030, "iter_tflops": 15.844963230191501, "iter_time": 1.302060043334961, "loss": 0.5489147901535034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.532631979797074, "step_time": 1.00479536819458} +{"epoch": 0, "iter": 13031, "iter_tflops": 36.93832438471514, "iter_time": 0.5585281372070313, "loss": 0.6885131001472473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17995027386394, "step_time": 0.5134673728942871} +{"epoch": 0, "iter": 13032, "iter_tflops": 38.66157491238635, "iter_time": 0.5336330337524414, "loss": 0.6679983735084534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10416905350865, "step_time": 0.49000120353698723} +{"epoch": 0, "iter": 13033, "iter_tflops": 28.814823459055727, "iter_time": 0.7159888916015624, "loss": 0.13058191537857056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.340749496715482, "step_time": 0.658283348083496} +{"epoch": 0, "iter": 13034, "iter_tflops": 9.457866636438695, "iter_time": 2.181368621826172, "loss": 0.13212940096855164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.816468607917525, "step_time": 1.7459610137939452} +{"epoch": 0, "iter": 13035, "iter_tflops": 17.552367853722554, "iter_time": 1.1754022979736327, "loss": 0.19350755214691162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.72079116794133, "step_time": 0.8697472763061525} +{"epoch": 0, "iter": 13036, "iter_tflops": 47.562731386392755, "iter_time": 0.43376595306396487, "loss": 0.18725129961967468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45213538445957, "step_time": 0.4009764289855957} +{"epoch": 0, "iter": 13037, "iter_tflops": 22.587788160473604, "iter_time": 0.7070799942016601, "loss": 0.4530033469200134, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.013585374359792, "step_time": 0.665097396850586} +{"epoch": 0, "iter": 13038, "iter_tflops": 12.056787565293591, "iter_time": 1.324678985595703, "loss": 0.39283716678619385, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 14.28192746113967, "step_time": 1.1182925529479981} +{"epoch": 0, "iter": 13039, "iter_tflops": 23.938186207177793, "iter_time": 0.6671922836303711, "loss": 0.26073744893074036, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.645859183381784, "step_time": 0.6227661552429199} +{"epoch": 0, "iter": 13040, "iter_tflops": 22.936556593492725, "iter_time": 0.6963282852172851, "loss": 0.38994157314300537, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.73916502162511, "step_time": 0.6455906295776367} +{"epoch": 0, "iter": 13041, "iter_tflops": 18.95413577585467, "iter_time": 1.0884745025634766, "loss": 0.06966497004032135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54486642112582, "step_time": 1.0041970138549805} +{"epoch": 0, "iter": 13042, "iter_tflops": 37.845685332287395, "iter_time": 0.5451372680664062, "loss": 0.04962483420968056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83977534393687, "step_time": 0.4815873413085938} +{"epoch": 0, "iter": 13043, "iter_tflops": 42.22959172989469, "iter_time": 0.4885458908081055, "loss": 0.12229172885417938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6539574625, "step_time": 0.44221529388427727} +{"epoch": 0, "iter": 13044, "iter_tflops": 39.61542838035974, "iter_time": 0.5207843093872071, "loss": 0.10291750729084015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.67967483491399, "step_time": 0.47232708549499514} +{"epoch": 0, "iter": 13045, "iter_tflops": 16.694353478421856, "iter_time": 1.2358126678466796, "loss": 0.6197549700737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.071526476761854, "step_time": 1.141635353088379} +{"epoch": 0, "iter": 13046, "iter_tflops": 13.78912048880948, "iter_time": 1.4961863250732423, "loss": 0.5458050966262817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.122444688800396, "step_time": 1.1384277267456056} +{"epoch": 0, "iter": 13047, "iter_tflops": 38.63145099245996, "iter_time": 0.5340491485595703, "loss": 0.515910267829895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.084373970127515, "step_time": 0.49023168373107906} +{"epoch": 0, "iter": 13048, "iter_tflops": 43.4635967056912, "iter_time": 0.47467524719238285, "loss": 0.47459468245506287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.394195837477376, "step_time": 0.43530844116210937} +{"epoch": 0, "iter": 13049, "iter_tflops": 28.681688737719192, "iter_time": 0.7193123703002929, "loss": 0.29730018973350525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.168477693823785, "step_time": 0.6619217567443847} +{"epoch": 0, "iter": 13050, "iter_tflops": 8.25016387562142, "iter_time": 2.500688934326172, "loss": 0.40376684069633484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.601421487851654, "step_time": 1.946068603515625} +{"epoch": 0, "iter": 13051, "iter_tflops": 15.849939519157912, "iter_time": 1.3016512451171873, "loss": 0.3715607821941376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.9733431090946, "step_time": 1.087372604370117} +{"epoch": 0, "iter": 13052, "iter_tflops": 37.209833152885345, "iter_time": 0.5544527282714844, "loss": 0.27740678191185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59202133276148, "step_time": 0.5082548942565919} +{"epoch": 0, "iter": 13053, "iter_tflops": 15.43610551256799, "iter_time": 0.9710645828247071, "loss": 0.3711085617542267, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 16.533494566951862, "step_time": 0.906611442565918} +{"epoch": 0, "iter": 13054, "iter_tflops": 25.848819842898948, "iter_time": 0.5798893508911132, "loss": 0.35349345207214355, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.636346989864048, "step_time": 0.542381935119629} +{"epoch": 0, "iter": 13055, "iter_tflops": 27.43402290468874, "iter_time": 0.5463819656372071, "loss": 0.38871562480926514, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.238385130513166, "step_time": 0.5126635856628418} +{"epoch": 0, "iter": 13056, "iter_tflops": 27.47006079571149, "iter_time": 0.5456651687622069, "loss": 0.24520795047283173, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.155067633439863, "step_time": 0.5141286430358887} +{"epoch": 0, "iter": 13057, "iter_tflops": 23.384192338167566, "iter_time": 0.8822666702270507, "loss": 0.40221184492111206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.584706832861855, "step_time": 0.8391840362548828} +{"epoch": 0, "iter": 13058, "iter_tflops": 14.666911205132669, "iter_time": 1.4066420135498046, "loss": 0.4062449336051941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.54927905972908, "step_time": 1.1122315559387208} +{"epoch": 0, "iter": 13059, "iter_tflops": 44.607720817741786, "iter_time": 0.4625005073547363, "loss": 0.42698702216148376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03404788321898, "step_time": 0.4207503643035889} +{"epoch": 0, "iter": 13060, "iter_tflops": 44.68233785491719, "iter_time": 0.461728157043457, "loss": 0.4364396929740906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.22192876960388, "step_time": 0.42783633995056153} +{"epoch": 0, "iter": 13061, "iter_tflops": 32.11684946540446, "iter_time": 0.5715243606567383, "loss": 0.12669138610363007, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 34.58802755379669, "step_time": 0.5306912002563476} +{"epoch": 0, "iter": 13062, "iter_tflops": 18.84857167945623, "iter_time": 0.9738436508178712, "loss": 0.09897401183843613, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 21.25515893789218, "step_time": 0.8635814914703369} +{"epoch": 0, "iter": 13063, "iter_tflops": 35.68163853386235, "iter_time": 0.5144259796142578, "loss": 0.13970226049423218, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 39.32865881648976, "step_time": 0.46672229385375974} +{"epoch": 0, "iter": 13064, "iter_tflops": 35.13415727942174, "iter_time": 0.5224420700073241, "loss": 0.17232662439346313, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 38.68067686683293, "step_time": 0.474540864944458} +{"epoch": 0, "iter": 13065, "iter_tflops": 28.801684311437466, "iter_time": 0.7163155212402343, "loss": 0.5158571004867554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.281338981623424, "step_time": 0.6595335807800292} +{"epoch": 0, "iter": 13066, "iter_tflops": 10.379291435694013, "iter_time": 1.9877169494628906, "loss": 0.5194070935249329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.89573596020081, "step_time": 1.599838394165039} +{"epoch": 0, "iter": 13067, "iter_tflops": 11.553009039868229, "iter_time": 1.785776626586914, "loss": 0.6508949995040894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.145159085647204, "step_time": 1.5694822235107422} +{"epoch": 0, "iter": 13068, "iter_tflops": 24.21920476466974, "iter_time": 0.8518485107421875, "loss": 0.5810197591781616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.890909430737, "step_time": 0.6902129745483399} +{"epoch": 0, "iter": 13069, "iter_tflops": 16.263404864684468, "iter_time": 1.0628047943115233, "loss": 0.3475026488304138, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 17.39666144244711, "step_time": 0.9935713653564453} +{"epoch": 0, "iter": 13070, "iter_tflops": 13.24462494210049, "iter_time": 1.3050444793701172, "loss": 0.7189635038375854, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 15.834828513766645, "step_time": 1.091570056915283} +{"epoch": 0, "iter": 13071, "iter_tflops": 26.034590449566473, "iter_time": 0.6639176712036132, "loss": 0.5469508171081543, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 28.070846050104755, "step_time": 0.6157571678161621} +{"epoch": 0, "iter": 13072, "iter_tflops": 26.850419272075168, "iter_time": 0.6437450561523438, "loss": 0.5261383056640625, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 28.73836564419976, "step_time": 0.6014546852111816} +{"epoch": 0, "iter": 13073, "iter_tflops": 26.84415137356158, "iter_time": 0.7685507812499999, "loss": 0.0032758493907749653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.99246849098533, "step_time": 0.7116018257141113} +{"epoch": 0, "iter": 13074, "iter_tflops": 11.223141984907334, "iter_time": 1.8382636108398438, "loss": 0.004572948906570673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.412765452167184, "step_time": 1.6620867919921873} +{"epoch": 0, "iter": 13075, "iter_tflops": 11.320815040809585, "iter_time": 1.822403549194336, "loss": 0.005616685375571251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.21517536578418, "step_time": 1.5611668357849122} +{"epoch": 0, "iter": 13076, "iter_tflops": 54.28707711863103, "iter_time": 0.3800369186401367, "loss": 0.019464895129203796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.680265707293444, "step_time": 0.34569372749328614} +{"epoch": 0, "iter": 13077, "iter_tflops": 22.059708897661572, "iter_time": 0.7295791625976563, "loss": 0.3484345078468323, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 23.35742435063474, "step_time": 0.689044464111328} +{"epoch": 0, "iter": 13078, "iter_tflops": 10.510485927727983, "iter_time": 1.5312616424560546, "loss": 0.34230199456214905, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 12.768712261305225, "step_time": 1.2604484786987304} +{"epoch": 0, "iter": 13079, "iter_tflops": 24.70983345859478, "iter_time": 0.6513319473266601, "loss": 0.2987252175807953, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.650347410882965, "step_time": 0.6039059715270996} +{"epoch": 0, "iter": 13080, "iter_tflops": 23.183992168816726, "iter_time": 0.6941989898681641, "loss": 0.3481396734714508, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.878897505472217, "step_time": 0.6469058341979981} +{"epoch": 0, "iter": 13081, "iter_tflops": 23.081483786700783, "iter_time": 0.89383740234375, "loss": 0.47722363471984863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.68737915339269, "step_time": 0.8356939544677734} +{"epoch": 0, "iter": 13082, "iter_tflops": 12.973018255129906, "iter_time": 1.5903079071044919, "loss": 0.512338399887085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.316202188797014, "step_time": 1.1914329299926756} +{"epoch": 0, "iter": 13083, "iter_tflops": 38.07807638033017, "iter_time": 0.541810287475586, "loss": 0.5019831657409668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.943883448504465, "step_time": 0.4918737087249756} +{"epoch": 0, "iter": 13084, "iter_tflops": 40.0037623899034, "iter_time": 0.5157288284301758, "loss": 0.4975077211856842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.792126514014534, "step_time": 0.47111421966552736} +{"epoch": 0, "iter": 13085, "iter_tflops": 19.805837777922406, "iter_time": 1.0416672973632812, "loss": 0.3544853627681732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.849901351686672, "step_time": 0.9895055694580078} +{"epoch": 0, "iter": 13086, "iter_tflops": 15.479110154982566, "iter_time": 1.3328345947265627, "loss": 0.5561794638633728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.714982640730106, "step_time": 0.9500856552124024} +{"epoch": 0, "iter": 13087, "iter_tflops": 45.159610374951825, "iter_time": 0.45684835052490236, "loss": 0.35225096344947815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.48583557626056, "step_time": 0.425507640838623} +{"epoch": 0, "iter": 13088, "iter_tflops": 44.01518305402904, "iter_time": 0.4687267456054688, "loss": 0.3899383842945099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31226346649122, "step_time": 0.43606228065490726} +{"epoch": 0, "iter": 13089, "iter_tflops": 25.41074890687659, "iter_time": 0.8119041900634765, "loss": 0.07010795176029205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.728254498241068, "step_time": 0.7718833084106446} +{"epoch": 0, "iter": 13090, "iter_tflops": 16.595494065885468, "iter_time": 1.2431744079589846, "loss": 0.07387054711580276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.602112201780084, "step_time": 1.0014067153930664} +{"epoch": 0, "iter": 13091, "iter_tflops": 40.60949424536258, "iter_time": 0.5080362091064452, "loss": 0.03993159160017967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.757107526523534, "step_time": 0.4609568099975586} +{"epoch": 0, "iter": 13092, "iter_tflops": 48.05730099754181, "iter_time": 0.4293019599914551, "loss": 0.10537475347518921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.93027069815781, "step_time": 0.38977872657775875} +{"epoch": 0, "iter": 13093, "iter_tflops": 20.222264709298187, "iter_time": 1.020216766357422, "loss": 0.17367154359817505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.596800632621303, "step_time": 0.9552847137451173} +{"epoch": 0, "iter": 13094, "iter_tflops": 23.29399132609541, "iter_time": 0.8856830596923828, "loss": 0.185940220952034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.438380301355657, "step_time": 0.7803463478088378} +{"epoch": 0, "iter": 13095, "iter_tflops": 51.1554254860877, "iter_time": 0.4033021583557128, "loss": 0.12698882818222046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.809313420241786, "step_time": 0.36967115783691407} +{"epoch": 0, "iter": 13096, "iter_tflops": 52.739950005141466, "iter_time": 0.39118530654907224, "loss": 0.20914998650550842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.44088287746972, "step_time": 0.3591708984375} +{"epoch": 0, "iter": 13097, "iter_tflops": 45.22685394527868, "iter_time": 0.4561691055297852, "loss": 0.08744356781244278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37244577800318, "step_time": 0.417866548538208} +{"epoch": 0, "iter": 13098, "iter_tflops": 45.877588176912724, "iter_time": 0.44969873809814453, "loss": 0.0911625474691391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28914610580207, "step_time": 0.41024942970275874} +{"epoch": 0, "iter": 13099, "iter_tflops": 52.622129297060994, "iter_time": 0.39206116867065427, "loss": 0.10270915925502777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.25977457143001, "step_time": 0.36030692863464353} +{"epoch": 0, "iter": 13100, "iter_tflops": 49.83443946417881, "iter_time": 0.4139926872253418, "loss": 0.09913269430398941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.09302843915295, "step_time": 0.38140023040771487} +{"epoch": 0, "iter": 13101, "iter_tflops": 28.61878349835329, "iter_time": 0.7208934478759766, "loss": 0.6293461918830872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.275210520430228, "step_time": 0.6814516944885254} +{"epoch": 0, "iter": 13102, "iter_tflops": 15.481108285678449, "iter_time": 1.332662567138672, "loss": 0.5422634482383728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.093131320444478, "step_time": 1.1402721366882327} +{"epoch": 0, "iter": 13103, "iter_tflops": 42.65704559435517, "iter_time": 0.4836503143310547, "loss": 0.7648130655288696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.809107955407846, "step_time": 0.4503709945678711} +{"epoch": 0, "iter": 13104, "iter_tflops": 43.261232075124404, "iter_time": 0.4768956527709961, "loss": 0.7288305759429932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27125830898666, "step_time": 0.4458727569580078} +{"epoch": 0, "iter": 13105, "iter_tflops": 25.646355880552996, "iter_time": 0.8044454193115234, "loss": 0.6734001040458679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.014170744011274, "step_time": 0.7637137451171875} +{"epoch": 0, "iter": 13106, "iter_tflops": 14.219859212101728, "iter_time": 1.4508648223876954, "loss": 0.7006839513778687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.864939735935188, "step_time": 1.1548370056152344} +{"epoch": 0, "iter": 13107, "iter_tflops": 40.38326892057139, "iter_time": 0.5108822059631347, "loss": 0.5151638388633728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38918810277942, "step_time": 0.475489273071289} +{"epoch": 0, "iter": 13108, "iter_tflops": 47.9661584653283, "iter_time": 0.4301176948547364, "loss": 0.630469024181366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.715315395816326, "step_time": 0.3989358539581299} +{"epoch": 0, "iter": 13109, "iter_tflops": 35.472609145254275, "iter_time": 0.5816063156127931, "loss": 0.36909040808677673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.07645474603422, "step_time": 0.5418333625793457} +{"epoch": 0, "iter": 13110, "iter_tflops": 13.924926990658228, "iter_time": 1.4815943756103516, "loss": 0.3453454375267029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.952843710463696, "step_time": 1.2932549133300781} +{"epoch": 0, "iter": 13111, "iter_tflops": 38.03826687516147, "iter_time": 0.542377326965332, "loss": 0.6246477961540222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59742711262786, "step_time": 0.495970422744751} +{"epoch": 0, "iter": 13112, "iter_tflops": 37.03306988573251, "iter_time": 0.5570991973876953, "loss": 0.4475207030773163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.48182003908019, "step_time": 0.5096384868621826} +{"epoch": 0, "iter": 13113, "iter_tflops": 21.289362509040917, "iter_time": 0.9690799102783203, "loss": 0.28311076760292053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.686360755758095, "step_time": 0.909405158996582} +{"epoch": 0, "iter": 13114, "iter_tflops": 11.56603340430969, "iter_time": 1.7837656860351563, "loss": 0.24044637382030487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.878587771680582, "step_time": 1.3866298217773436} +{"epoch": 0, "iter": 13115, "iter_tflops": 11.271417233107295, "iter_time": 1.8303903656005862, "loss": 0.43612197041511536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.905234840953462, "step_time": 1.5986608352661134} +{"epoch": 0, "iter": 13116, "iter_tflops": 20.774996243676984, "iter_time": 0.9930732727050782, "loss": 0.22761639952659607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.738648359820104, "step_time": 0.8339620342254639} +{"epoch": 0, "iter": 13117, "iter_tflops": 13.017951367546882, "iter_time": 1.1608621063232423, "loss": 0.348758727312088, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.995540834686132, "step_time": 1.0797758102416992} +{"epoch": 0, "iter": 13118, "iter_tflops": 14.840323442009858, "iter_time": 1.0183097763061522, "loss": 0.2952006757259369, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 16.56106135458262, "step_time": 0.9125047073364259} +{"epoch": 0, "iter": 13119, "iter_tflops": 25.381457503070184, "iter_time": 0.5953971099853516, "loss": 0.4208536446094513, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.950439428100918, "step_time": 0.5607346954345703} +{"epoch": 0, "iter": 13120, "iter_tflops": 24.799769260787627, "iter_time": 0.6093623809814452, "loss": 0.43871524930000305, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.292181164364276, "step_time": 0.5747734031677246} +{"epoch": 0, "iter": 13121, "iter_tflops": 29.17597015371549, "iter_time": 0.707126220703125, "loss": 0.601597011089325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.108875607682183, "step_time": 0.6631899452209472} +{"epoch": 0, "iter": 13122, "iter_tflops": 20.38177964794619, "iter_time": 1.012232192993164, "loss": 0.5731263756752014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.430979326303955, "step_time": 0.7805648536682129} +{"epoch": 0, "iter": 13123, "iter_tflops": 48.39201792163353, "iter_time": 0.42633257293701177, "loss": 0.5113239884376526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50012184713858, "step_time": 0.3929722976684571} +{"epoch": 0, "iter": 13124, "iter_tflops": 50.41702563887353, "iter_time": 0.4092088584899902, "loss": 0.6006004810333252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.79498750851134, "step_time": 0.37651424789428717} +{"epoch": 0, "iter": 13125, "iter_tflops": 34.597514088820084, "iter_time": 0.5963172225952149, "loss": 0.16710500419139862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.091646613445, "step_time": 0.5562194023132324} +{"epoch": 0, "iter": 13126, "iter_tflops": 19.889855565326442, "iter_time": 1.0372671356201173, "loss": 0.1586928367614746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.994752959404487, "step_time": 0.8972087478637696} +{"epoch": 0, "iter": 13127, "iter_tflops": 39.396773021973665, "iter_time": 0.5236747055053711, "loss": 0.20706795156002045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33565765752297, "step_time": 0.47607662200927736} +{"epoch": 0, "iter": 13128, "iter_tflops": 43.242938750732925, "iter_time": 0.47709739685058594, "loss": 0.1395297795534134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19758118844478, "step_time": 0.4371218395233154} +{"epoch": 0, "iter": 13129, "iter_tflops": 32.161437523349214, "iter_time": 0.6414854278564454, "loss": 0.0861906111240387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.62499943761644, "step_time": 0.5791184234619141} +{"epoch": 0, "iter": 13130, "iter_tflops": 39.44739555119293, "iter_time": 0.5230026779174805, "loss": 0.13621561229228973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.762523856767444, "step_time": 0.47143289947509764} +{"epoch": 0, "iter": 13131, "iter_tflops": 42.59391029712715, "iter_time": 0.48436721038818364, "loss": 0.1654394268989563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79840024387035, "step_time": 0.44085040092468264} +{"epoch": 0, "iter": 13132, "iter_tflops": 41.799925431726166, "iter_time": 0.49356771087646484, "loss": 0.11958180367946625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85543158354318, "step_time": 0.4499160251617432} +{"epoch": 0, "iter": 13133, "iter_tflops": 17.821381036975296, "iter_time": 1.1576596374511718, "loss": 0.534256637096405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38103591608082, "step_time": 1.0644990081787111} +{"epoch": 0, "iter": 13134, "iter_tflops": 16.05193112668717, "iter_time": 1.2852717437744139, "loss": 0.5602426528930664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.787164413660424, "step_time": 0.8323297157287597} +{"epoch": 0, "iter": 13135, "iter_tflops": 46.824391512984796, "iter_time": 0.4406056938171387, "loss": 0.5208624601364136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5406745372375, "step_time": 0.4082077198028565} +{"epoch": 0, "iter": 13136, "iter_tflops": 46.8317810662634, "iter_time": 0.4405361709594727, "loss": 0.5957115888595581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7787713750667, "step_time": 0.40629367256164556} +{"epoch": 0, "iter": 13137, "iter_tflops": 38.37626234473881, "iter_time": 0.5376003875732422, "loss": 0.37799492478370667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25182526566808, "step_time": 0.4882888107299805} +{"epoch": 0, "iter": 13138, "iter_tflops": 27.306114321987927, "iter_time": 0.7555484924316406, "loss": 0.38150250911712646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.028020796401236, "step_time": 0.6062971935272218} +{"epoch": 0, "iter": 13139, "iter_tflops": 48.713844665828546, "iter_time": 0.4235160179138184, "loss": 0.3787875771522522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.908119359613664, "step_time": 0.3899419174194336} +{"epoch": 0, "iter": 13140, "iter_tflops": 49.43237386006766, "iter_time": 0.4173599586486816, "loss": 0.31958726048469543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.54008721951638, "step_time": 0.3853391838073731} +{"epoch": 0, "iter": 13141, "iter_tflops": 33.780438021394424, "iter_time": 0.610740852355957, "loss": 0.09958944469690323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.08023485895999, "step_time": 0.5718115081787108} +{"epoch": 0, "iter": 13142, "iter_tflops": 17.999039880533825, "iter_time": 1.1462330017089846, "loss": 0.05915835127234459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.35390508071504, "step_time": 0.9661508483886719} +{"epoch": 0, "iter": 13143, "iter_tflops": 37.31257397568949, "iter_time": 0.5529260330200195, "loss": 0.08168917149305344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35574195500191, "step_time": 0.49886890029907227} +{"epoch": 0, "iter": 13144, "iter_tflops": 43.512740717904016, "iter_time": 0.47413914108276367, "loss": 0.06297685205936432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91465646959909, "step_time": 0.43058001518249506} +{"epoch": 0, "iter": 13145, "iter_tflops": 31.100276944670966, "iter_time": 0.6313891983032227, "loss": 0.055279482156038284, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 34.78510815353096, "step_time": 0.5645053291320802} +{"epoch": 0, "iter": 13146, "iter_tflops": 37.5686561976987, "iter_time": 0.5226798324584961, "loss": 0.07538768649101257, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 42.25613818457486, "step_time": 0.4646988525390625} +{"epoch": 0, "iter": 13147, "iter_tflops": 41.88027839515464, "iter_time": 0.4688693504333496, "loss": 0.06061693653464317, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 46.16872755588048, "step_time": 0.425317741394043} +{"epoch": 0, "iter": 13148, "iter_tflops": 43.280195845262696, "iter_time": 0.45370355987548827, "loss": 0.10300841182470322, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 47.587180461384506, "step_time": 0.41264010047912597} +{"epoch": 0, "iter": 13149, "iter_tflops": 21.965631835241922, "iter_time": 0.9392442550659179, "loss": 0.0958537831902504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.319307287519855, "step_time": 0.8847215423583985} +{"epoch": 0, "iter": 13150, "iter_tflops": 19.536034478279785, "iter_time": 1.0560532913208007, "loss": 0.03633199259638786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.860531127609164, "step_time": 0.7977830543518066} +{"epoch": 0, "iter": 13151, "iter_tflops": 40.57699332102864, "iter_time": 0.508443130493164, "loss": 0.09816429018974304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70326456828089, "step_time": 0.4615120105743408} +{"epoch": 0, "iter": 13152, "iter_tflops": 46.14939696204428, "iter_time": 0.4470501213073731, "loss": 0.08611275255680084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57930127395957, "step_time": 0.4078959770202637} +{"epoch": 0, "iter": 13153, "iter_tflops": 21.208644617508508, "iter_time": 0.9727681274414062, "loss": 0.6628450155258179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.601459247803017, "step_time": 0.9128213043212889} +{"epoch": 0, "iter": 13154, "iter_tflops": 8.14509103015209, "iter_time": 2.532948181152344, "loss": 0.5750767588615417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.85565710278071, "step_time": 2.3297078094482417} +{"epoch": 0, "iter": 13155, "iter_tflops": 12.093146685382495, "iter_time": 1.7060153198242187, "loss": 0.48630350828170776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.03757695013309, "step_time": 1.3719692726135253} +{"epoch": 0, "iter": 13156, "iter_tflops": 37.762023263219525, "iter_time": 0.5463450241088866, "loss": 0.5418081283569336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.98142042370712, "step_time": 0.5034255352020263} +{"epoch": 0, "iter": 13157, "iter_tflops": 13.549744092133103, "iter_time": 1.100223846435547, "loss": 0.4347708523273468, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.556506760017422, "step_time": 1.0241297454833984} +{"epoch": 0, "iter": 13158, "iter_tflops": 12.363626569948432, "iter_time": 1.2057749786376952, "loss": 0.3438178598880768, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 13.841039986707571, "step_time": 1.0770687446594238} +{"epoch": 0, "iter": 13159, "iter_tflops": 22.404210934539115, "iter_time": 0.6653995361328124, "loss": 0.4709259271621704, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.12784294620653, "step_time": 0.6178650779724121} +{"epoch": 0, "iter": 13160, "iter_tflops": 21.748370567467102, "iter_time": 0.6854652175903321, "loss": 0.3317081034183502, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.411761260350517, "step_time": 0.6367633514404296} +{"epoch": 0, "iter": 13161, "iter_tflops": 33.426486750279594, "iter_time": 0.6172079544067383, "loss": 0.6776591539382935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.04322099195041, "step_time": 0.556946533203125} +{"epoch": 0, "iter": 13162, "iter_tflops": 37.58519675698809, "iter_time": 0.5489154052734375, "loss": 0.4412268102169037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.194823025019076, "step_time": 0.500817626953125} +{"epoch": 0, "iter": 13163, "iter_tflops": 35.96562766321176, "iter_time": 0.5736336288452148, "loss": 0.5926560163497925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.00865956658781, "step_time": 0.5288849639892578} +{"epoch": 0, "iter": 13164, "iter_tflops": 35.08667838898117, "iter_time": 0.5880036087036133, "loss": 0.5007195472717285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21537099434885, "step_time": 0.5398637504577637} +{"epoch": 0, "iter": 13165, "iter_tflops": 31.28277146315658, "iter_time": 0.6595033798217773, "loss": 0.10752100497484207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.618808885295834, "step_time": 0.5959504146575928} +{"epoch": 0, "iter": 13166, "iter_tflops": 38.26217957480671, "iter_time": 0.5392033004760741, "loss": 0.08472757786512375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93100009996942, "step_time": 0.49202483749389647} +{"epoch": 0, "iter": 13167, "iter_tflops": 38.80227239062635, "iter_time": 0.5316980743408203, "loss": 0.08686231821775436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.62864953236495, "step_time": 0.4839724864959717} +{"epoch": 0, "iter": 13168, "iter_tflops": 41.17137297034924, "iter_time": 0.5011028785705566, "loss": 0.16087159514427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22109116958577, "step_time": 0.45622723770141593} +{"epoch": 0, "iter": 13169, "iter_tflops": 20.503385351263606, "iter_time": 1.0062286376953127, "loss": 0.2282423973083496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.353333136994816, "step_time": 0.9229537887573243} +{"epoch": 0, "iter": 13170, "iter_tflops": 17.928455211993963, "iter_time": 1.1507457427978516, "loss": 0.17872890830039978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.807841014116672, "step_time": 0.9460401649475099} +{"epoch": 0, "iter": 13171, "iter_tflops": 47.537246447429496, "iter_time": 0.43399849700927734, "loss": 0.16262875497341156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8888804497745, "step_time": 0.3976014385223389} +{"epoch": 0, "iter": 13172, "iter_tflops": 51.316447504051, "iter_time": 0.4020366668701172, "loss": 0.2067117989063263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.52179654297612, "step_time": 0.3715854816436768} +{"epoch": 0, "iter": 13173, "iter_tflops": 27.91934743760906, "iter_time": 0.7389532852172852, "loss": 0.5912100076675415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.601171469442303, "step_time": 0.6969688186645508} +{"epoch": 0, "iter": 13174, "iter_tflops": 12.688858700110762, "iter_time": 1.6259219207763673, "loss": 0.6688603162765503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.821562331175304, "step_time": 1.2264671440124513} +{"epoch": 0, "iter": 13175, "iter_tflops": 34.730504067393, "iter_time": 0.5940338058471679, "loss": 0.7326264381408691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.48427580555823, "step_time": 0.5503932800292969} +{"epoch": 0, "iter": 13176, "iter_tflops": 38.862072737362304, "iter_time": 0.5308799057006837, "loss": 0.8727412819862366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16260957706602, "step_time": 0.4893220252990723} +{"epoch": 0, "iter": 13177, "iter_tflops": 22.24215234351531, "iter_time": 0.9275673141479491, "loss": 0.14318330585956573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.15631582061012, "step_time": 0.8540662269592286} +{"epoch": 0, "iter": 13178, "iter_tflops": 28.54476114362979, "iter_time": 0.7227628707885742, "loss": 0.14711520075798035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.32187333023899, "step_time": 0.5840883159637451} +{"epoch": 0, "iter": 13179, "iter_tflops": 44.874817286683516, "iter_time": 0.459747688293457, "loss": 0.1557876616716385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30451508703643, "step_time": 0.41844227600097655} +{"epoch": 0, "iter": 13180, "iter_tflops": 36.31818168653559, "iter_time": 0.5680651550292969, "loss": 0.08501406013965607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.68692884545509, "step_time": 0.5198460578918457} +{"epoch": 0, "iter": 13181, "iter_tflops": 33.37624869476281, "iter_time": 0.6181369781494142, "loss": 0.27450138330459595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.6231807522241, "step_time": 0.5633342895507812} +{"epoch": 0, "iter": 13182, "iter_tflops": 35.546082819032186, "iter_time": 0.5804041366577148, "loss": 0.3842518925666809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.94055503301955, "step_time": 0.5298099498748778} +{"epoch": 0, "iter": 13183, "iter_tflops": 41.678095762514936, "iter_time": 0.4950104637145996, "loss": 0.30628859996795654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.343426850087205, "step_time": 0.45499634552001955} +{"epoch": 0, "iter": 13184, "iter_tflops": 39.5584634668468, "iter_time": 0.5215342483520508, "loss": 0.5496063828468323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50371850604249, "step_time": 0.4742374725341797} +{"epoch": 0, "iter": 13185, "iter_tflops": 24.555405822433332, "iter_time": 0.8401854019165038, "loss": 0.08151686936616898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.572672355753507, "step_time": 0.7764026603698732} +{"epoch": 0, "iter": 13186, "iter_tflops": 29.29610116218522, "iter_time": 0.7042265930175782, "loss": 0.09618689119815826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.56581285719428, "step_time": 0.5642181015014648} +{"epoch": 0, "iter": 13187, "iter_tflops": 48.86523589065631, "iter_time": 0.42220390701293947, "loss": 0.056188568472862244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27526450727928, "step_time": 0.3872546424865722} +{"epoch": 0, "iter": 13188, "iter_tflops": 51.83270700029104, "iter_time": 0.3980323371887208, "loss": 0.1192634180188179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.83621726184231, "step_time": 0.36299202346801757} +{"epoch": 0, "iter": 13189, "iter_tflops": 27.3582191500241, "iter_time": 0.7541095199584961, "loss": 0.7440893650054932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.87277693263706, "step_time": 0.7145517578124999} +{"epoch": 0, "iter": 13190, "iter_tflops": 12.11225560484399, "iter_time": 1.7033238220214844, "loss": 0.724332332611084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.673021190948763, "step_time": 1.4060562744140626} +{"epoch": 0, "iter": 13191, "iter_tflops": 40.15958053881127, "iter_time": 0.5137278137207031, "loss": 0.5328586101531982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.957055656938365, "step_time": 0.46934657478332514} +{"epoch": 0, "iter": 13192, "iter_tflops": 42.66274916918771, "iter_time": 0.48358565521240227, "loss": 0.6008079051971436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0244315745937, "step_time": 0.4482639503479004} +{"epoch": 0, "iter": 13193, "iter_tflops": 32.545270366683816, "iter_time": 0.6339198684692384, "loss": 0.5420305132865906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.906157431570755, "step_time": 0.5910445327758789} +{"epoch": 0, "iter": 13194, "iter_tflops": 34.91720328996816, "iter_time": 0.5908575592041015, "loss": 0.5009036064147949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.34188859509283, "step_time": 0.5380823497772217} +{"epoch": 0, "iter": 13195, "iter_tflops": 40.36639562453942, "iter_time": 0.5110957565307618, "loss": 0.35286492109298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44028644112787, "step_time": 0.46424303627014163} +{"epoch": 0, "iter": 13196, "iter_tflops": 37.71614169052594, "iter_time": 0.5470096511840821, "loss": 0.5541008710861206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.469711966874875, "step_time": 0.4974978733062744} +{"epoch": 0, "iter": 13197, "iter_tflops": 18.758787767269858, "iter_time": 1.0115123672485353, "loss": 0.04703764617443085, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 20.255846681275916, "step_time": 0.936754020690918} +{"epoch": 0, "iter": 13198, "iter_tflops": 20.663191628049788, "iter_time": 0.918287269592285, "loss": 0.06658954918384552, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 23.305853823330672, "step_time": 0.8141622257232666} +{"epoch": 0, "iter": 13199, "iter_tflops": 45.22962103641327, "iter_time": 0.41952033615112305, "loss": 0.05529897287487984, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 49.19292613630007, "step_time": 0.3857210235595703} +{"epoch": 0, "iter": 13200, "iter_tflops": 50.22085676290392, "iter_time": 0.3778260078430176, "loss": 0.1266058087348938, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 54.56758111183366, "step_time": 0.3477292823791504} +{"epoch": 0, "iter": 13201, "iter_tflops": 30.400886347004242, "iter_time": 0.6786346054077148, "loss": 0.2833884358406067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.42664164943297, "step_time": 0.6362389831542969} +{"epoch": 0, "iter": 13202, "iter_tflops": 12.050626633965495, "iter_time": 1.712034912109375, "loss": 0.2261982262134552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.104306127470498, "step_time": 1.3659080619812012} +{"epoch": 0, "iter": 13203, "iter_tflops": 13.812453622257626, "iter_time": 1.4936588439941407, "loss": 0.2794400155544281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.1730499030816, "step_time": 1.2756464385986328} +{"epoch": 0, "iter": 13204, "iter_tflops": 31.651262144804733, "iter_time": 0.6518253021240233, "loss": 0.2534511983394623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.684914985023994, "step_time": 0.5781460742950439} +{"epoch": 0, "iter": 13205, "iter_tflops": 12.645618833129504, "iter_time": 1.1562848052978516, "loss": 0.35312312841415405, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 13.612819664566148, "step_time": 1.0741299209594728} +{"epoch": 0, "iter": 13206, "iter_tflops": 11.597013949768609, "iter_time": 1.2608363647460938, "loss": 0.2449612021446228, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 14.001268963596642, "step_time": 1.0443294067382813} +{"epoch": 0, "iter": 13207, "iter_tflops": 26.072681668491402, "iter_time": 0.5608144607543946, "loss": 0.3554578721523285, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 27.774388261425667, "step_time": 0.5264539680480957} +{"epoch": 0, "iter": 13208, "iter_tflops": 25.123466591279733, "iter_time": 0.5820031585693359, "loss": 0.4880985915660858, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 26.685537200231987, "step_time": 0.5479348907470702} +{"epoch": 0, "iter": 13209, "iter_tflops": 29.9132386517071, "iter_time": 0.68969775390625, "loss": 0.2536596655845642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.699428100518677, "step_time": 0.6508348808288574} +{"epoch": 0, "iter": 13210, "iter_tflops": 12.866470378363346, "iter_time": 1.6034773254394534, "loss": 0.2770712077617645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.842470410014903, "step_time": 1.302264923095703} +{"epoch": 0, "iter": 13211, "iter_tflops": 47.72529830067132, "iter_time": 0.43228841400146484, "loss": 0.298062801361084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77085865534331, "step_time": 0.398507848739624} +{"epoch": 0, "iter": 13212, "iter_tflops": 50.68019719590021, "iter_time": 0.4070839233398438, "loss": 0.3782033920288086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.861820456940606, "step_time": 0.3760555763244629} +{"epoch": 0, "iter": 13213, "iter_tflops": 23.934089320245597, "iter_time": 0.8619961776733398, "loss": 0.03528156876564026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.015730710893738, "step_time": 0.8247248001098634} +{"epoch": 0, "iter": 13214, "iter_tflops": 13.90707482332235, "iter_time": 1.4834962615966796, "loss": 0.06848562508821487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.152583444609082, "step_time": 1.1365375938415525} +{"epoch": 0, "iter": 13215, "iter_tflops": 41.09003222463203, "iter_time": 0.5020948486328125, "loss": 0.103903628885746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43414959032648, "step_time": 0.45408781051635744} +{"epoch": 0, "iter": 13216, "iter_tflops": 41.24773401217052, "iter_time": 0.5001751976013183, "loss": 0.07369942963123322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.011594983262675, "step_time": 0.458350643157959} +{"epoch": 0, "iter": 13217, "iter_tflops": 20.359228716621068, "iter_time": 1.0133533935546877, "loss": 0.022708380594849586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.96688935603099, "step_time": 0.939190486907959} +{"epoch": 0, "iter": 13218, "iter_tflops": 21.595277368923167, "iter_time": 0.9553520965576171, "loss": 0.02285641059279442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.992561106654776, "step_time": 0.7643251571655272} +{"epoch": 0, "iter": 13219, "iter_tflops": 52.5146825623062, "iter_time": 0.39286333847045896, "loss": 0.03491244837641716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.645576739682056, "step_time": 0.3578955173492432} +{"epoch": 0, "iter": 13220, "iter_tflops": 50.470472611191255, "iter_time": 0.4087755165100098, "loss": 0.035968292504549026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.786666620843505, "step_time": 0.3765714321136475} +{"epoch": 0, "iter": 13221, "iter_tflops": 24.878583386205, "iter_time": 0.8292712326049804, "loss": 0.03303392231464386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.167739812260923, "step_time": 0.7884170989990235} +{"epoch": 0, "iter": 13222, "iter_tflops": 11.919311948029046, "iter_time": 1.7308963470458987, "loss": 0.031043533235788345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.080768167894536, "step_time": 1.465196590423584} +{"epoch": 0, "iter": 13223, "iter_tflops": 38.655218806741615, "iter_time": 0.5337207794189454, "loss": 0.01282317005097866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8290639417798, "step_time": 0.48170778465270997} +{"epoch": 0, "iter": 13224, "iter_tflops": 43.52939282260296, "iter_time": 0.4739577598571777, "loss": 0.022700339555740356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01447495646694, "step_time": 0.429684871673584} +{"epoch": 0, "iter": 13225, "iter_tflops": 23.20496041247757, "iter_time": 0.8890811767578124, "loss": 0.43947744369506836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.15760305076457, "step_time": 0.8200738945007324} +{"epoch": 0, "iter": 13226, "iter_tflops": 13.371884008767504, "iter_time": 1.542871109008789, "loss": 0.63249272108078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.860440993975313, "step_time": 1.2236390209197998} +{"epoch": 0, "iter": 13227, "iter_tflops": 40.901565662276724, "iter_time": 0.504408405303955, "loss": 0.4849779009819031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7232040042912, "step_time": 0.4613062496185303} +{"epoch": 0, "iter": 13228, "iter_tflops": 36.95649888446296, "iter_time": 0.5582534637451172, "loss": 0.4284213185310364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39728349830455, "step_time": 0.5107049713134766} +{"epoch": 0, "iter": 13229, "iter_tflops": 18.676623194507464, "iter_time": 1.10464794921875, "loss": 0.032228175550699234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.909492726850743, "step_time": 1.03624405670166} +{"epoch": 0, "iter": 13230, "iter_tflops": 15.044643910679959, "iter_time": 1.3713248138427734, "loss": 0.04373826086521149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.94883178662547, "step_time": 1.1494393482208252} +{"epoch": 0, "iter": 13231, "iter_tflops": 39.680375846217565, "iter_time": 0.5199319076538086, "loss": 0.023711616173386574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.984398711792984, "step_time": 0.4690548038482666} +{"epoch": 0, "iter": 13232, "iter_tflops": 40.18855960916416, "iter_time": 0.5133573760986329, "loss": 0.01580961048603058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.074917070984455, "step_time": 0.4680914878845215} +{"epoch": 0, "iter": 13233, "iter_tflops": 16.389093425414888, "iter_time": 1.2588306732177734, "loss": 0.32629069685935974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.71262780549592, "step_time": 1.1647675170898437} +{"epoch": 0, "iter": 13234, "iter_tflops": 45.18138654092223, "iter_time": 0.4566281623840332, "loss": 0.23209840059280396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.27754310243992, "step_time": 0.4103441066741943} +{"epoch": 0, "iter": 13235, "iter_tflops": 43.195888512868095, "iter_time": 0.4776170654296875, "loss": 0.27411070466041565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46275804833548, "step_time": 0.4440350589752197} +{"epoch": 0, "iter": 13236, "iter_tflops": 45.04781240755509, "iter_time": 0.4579821395874023, "loss": 0.29321908950805664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94003757170066, "step_time": 0.42155859565734866} +{"epoch": 0, "iter": 13237, "iter_tflops": 34.27902595781051, "iter_time": 0.6018576354980469, "loss": 0.643832802772522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.77185413235219, "step_time": 0.5610566558837891} +{"epoch": 0, "iter": 13238, "iter_tflops": 15.812704920463577, "iter_time": 1.3047162780761719, "loss": 0.6014690399169922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.807927924966112, "step_time": 1.1585341987609863} +{"epoch": 0, "iter": 13239, "iter_tflops": 33.66758219804059, "iter_time": 0.6127880935668945, "loss": 0.8286662101745605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.604740155628235, "step_time": 0.5636180839538574} +{"epoch": 0, "iter": 13240, "iter_tflops": 40.382694802070226, "iter_time": 0.5108894691467285, "loss": 0.5500152707099915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5735962235639, "step_time": 0.47347695159912107} +{"epoch": 0, "iter": 13241, "iter_tflops": 22.936784008672948, "iter_time": 0.8994762954711915, "loss": 0.46476906538009644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.90009332762005, "step_time": 0.8285548667907715} +{"epoch": 0, "iter": 13242, "iter_tflops": 20.149261864689816, "iter_time": 1.023913116455078, "loss": 0.4661950170993805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.924941476306046, "step_time": 0.8277288646697999} +{"epoch": 0, "iter": 13243, "iter_tflops": 37.7351003845, "iter_time": 0.5467348251342773, "loss": 0.5615071058273315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.192298361456494, "step_time": 0.5008483219146728} +{"epoch": 0, "iter": 13244, "iter_tflops": 45.038432001085845, "iter_time": 0.45807752609252933, "loss": 0.6385267376899719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.059343891581065, "step_time": 0.4205334167480469} +{"epoch": 0, "iter": 13245, "iter_tflops": 17.063370462800204, "iter_time": 1.2090866546630858, "loss": 0.5952365398406982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.255631422511346, "step_time": 1.1301221542358397} +{"epoch": 0, "iter": 13246, "iter_tflops": 18.624900229782213, "iter_time": 1.1077156524658203, "loss": 0.83554607629776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.492574977642384, "step_time": 0.878196346282959} +{"epoch": 0, "iter": 13247, "iter_tflops": 40.77654321882898, "iter_time": 0.5059549407958984, "loss": 0.5848103165626526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82033799462264, "step_time": 0.4708109169006348} +{"epoch": 0, "iter": 13248, "iter_tflops": 41.65285156589043, "iter_time": 0.49531047058105465, "loss": 0.6791882514953613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7742291574757, "step_time": 0.4607805404663086} +{"epoch": 0, "iter": 13249, "iter_tflops": 34.68635001648672, "iter_time": 0.52680908203125, "loss": 0.0017418169882148504, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 37.556882433074975, "step_time": 0.48654422378540035} +{"epoch": 0, "iter": 13250, "iter_tflops": 37.007110116541554, "iter_time": 0.49377225494384763, "loss": 0.011252575553953648, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 41.31099871964686, "step_time": 0.44232976150512693} +{"epoch": 0, "iter": 13251, "iter_tflops": 39.7617906992388, "iter_time": 0.45956391525268553, "loss": 0.0012420180719345808, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 44.054664157369075, "step_time": 0.4147820568084717} +{"epoch": 0, "iter": 13252, "iter_tflops": 37.377622326979264, "iter_time": 0.48887765121459964, "loss": 0.00364202749915421, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 41.45631953137812, "step_time": 0.4407792205810547} +{"epoch": 0, "iter": 13253, "iter_tflops": 18.845963545525464, "iter_time": 0.6463761520385742, "loss": 0.004045184701681137, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 21.161127714845282, "step_time": 0.5756584224700928} +{"epoch": 0, "iter": 13254, "iter_tflops": 24.11189773403872, "iter_time": 0.5052103958129882, "loss": 0.005014396272599697, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 26.873761297439664, "step_time": 0.45328903770446777} +{"epoch": 0, "iter": 13255, "iter_tflops": 26.059423033715653, "iter_time": 0.46745399475097654, "loss": 0.004311606753617525, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 28.959769065394884, "step_time": 0.42063807106018064} +{"epoch": 0, "iter": 13256, "iter_tflops": 28.62144573537275, "iter_time": 0.42561027526855466, "loss": 0.0015319779049605131, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 31.57190482441855, "step_time": 0.385836124420166} +{"epoch": 0, "iter": 13257, "iter_tflops": 31.87470092513702, "iter_time": 0.5810430603027343, "loss": 0.02330140396952629, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 35.82034780194387, "step_time": 0.5170405902862549} +{"epoch": 0, "iter": 13258, "iter_tflops": 37.114791544755896, "iter_time": 0.4990078887939453, "loss": 0.005522445309907198, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 41.229435810688756, "step_time": 0.44920754814147956} +{"epoch": 0, "iter": 13259, "iter_tflops": 36.04608881556779, "iter_time": 0.5138025894165039, "loss": 0.006659124977886677, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 40.32607077393954, "step_time": 0.4592704772949218} +{"epoch": 0, "iter": 13260, "iter_tflops": 40.59013845863089, "iter_time": 0.45628259658813475, "loss": 0.003447478637099266, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 44.79764628311649, "step_time": 0.4134273853302002} +{"epoch": 0, "iter": 13261, "iter_tflops": 38.99249828591013, "iter_time": 0.5291041717529297, "loss": 0.030594130977988243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37203316021739, "step_time": 0.47567734336853024} +{"epoch": 0, "iter": 13262, "iter_tflops": 40.23126574657521, "iter_time": 0.5128124389648437, "loss": 0.022146141156554222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45713834811573, "step_time": 0.46406706047058105} +{"epoch": 0, "iter": 13263, "iter_tflops": 42.81018009975209, "iter_time": 0.48192026901245116, "loss": 0.04041867330670357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.40228982854491, "step_time": 0.43523411178588867} +{"epoch": 0, "iter": 13264, "iter_tflops": 42.49344689891659, "iter_time": 0.4855123558044433, "loss": 0.053916990756988525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.154561433434004, "step_time": 0.4375206317901611} +{"epoch": 0, "iter": 13265, "iter_tflops": 19.92345795175571, "iter_time": 1.035517707824707, "loss": 0.1289442777633667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.331676632697064, "step_time": 0.9671576156616211} +{"epoch": 0, "iter": 13266, "iter_tflops": 18.23874935224941, "iter_time": 1.1311682128906249, "loss": 0.18657225370407104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.731980218718963, "step_time": 0.9075801277160646} +{"epoch": 0, "iter": 13267, "iter_tflops": 35.62385384192371, "iter_time": 0.5791370468139647, "loss": 0.2461146116256714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.167037955276086, "step_time": 0.5267463302612305} +{"epoch": 0, "iter": 13268, "iter_tflops": 41.51455887346409, "iter_time": 0.49696044158935543, "loss": 0.12923714518547058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07377772397681, "step_time": 0.4577183132171631} +{"epoch": 0, "iter": 13269, "iter_tflops": 29.530426272368576, "iter_time": 0.698638526916504, "loss": 0.8578553795814514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.54647681968806, "step_time": 0.633896369934082} +{"epoch": 0, "iter": 13270, "iter_tflops": 38.82701077059807, "iter_time": 0.5313593063354493, "loss": 0.7987322807312012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43986527375864, "step_time": 0.48612532997131347} +{"epoch": 0, "iter": 13271, "iter_tflops": 35.111793057439854, "iter_time": 0.5875830230712891, "loss": 0.5642448663711548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.16000150206105, "step_time": 0.5406470832824707} +{"epoch": 0, "iter": 13272, "iter_tflops": 41.670590407667795, "iter_time": 0.4950996208190918, "loss": 0.7645284533500671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.034246873197645, "step_time": 0.45812009620666505} +{"epoch": 0, "iter": 13273, "iter_tflops": 15.489923831976746, "iter_time": 1.3319041290283202, "loss": 0.09213971346616745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.58113497377968, "step_time": 1.2442509841918945} +{"epoch": 0, "iter": 13274, "iter_tflops": 22.384414532261363, "iter_time": 0.9216722412109375, "loss": 0.05655399337410927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.043639633082307, "step_time": 0.7628815422058105} +{"epoch": 0, "iter": 13275, "iter_tflops": 48.069610676273875, "iter_time": 0.42919202423095704, "loss": 0.09114290773868561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37766786355906, "step_time": 0.39389102935791015} +{"epoch": 0, "iter": 13276, "iter_tflops": 55.137109698437804, "iter_time": 0.3741780014038086, "loss": 0.041919462382793427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.188764472390105, "step_time": 0.3427731685638428} +{"epoch": 0, "iter": 13277, "iter_tflops": 30.55652366448167, "iter_time": 0.6751780319213866, "loss": 0.24473565816879272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.66382566717741, "step_time": 0.6316190185546875} +{"epoch": 0, "iter": 13278, "iter_tflops": 10.233682990752751, "iter_time": 2.0159988861083984, "loss": 0.19639952480793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.545273646871635, "step_time": 1.418405319213867} +{"epoch": 0, "iter": 13279, "iter_tflops": 12.003447701728541, "iter_time": 1.7187639770507814, "loss": 0.2115193009376526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.907856816882346, "step_time": 1.4834128494262697} +{"epoch": 0, "iter": 13280, "iter_tflops": 42.83027090546892, "iter_time": 0.48169421005249025, "loss": 0.31783729791641235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.85544338929451, "step_time": 0.41381827354431144} +{"epoch": 0, "iter": 13281, "iter_tflops": 21.646668109958373, "iter_time": 0.7075665969848632, "loss": 0.3540276885032654, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 23.04830769817587, "step_time": 0.6645372619628906} +{"epoch": 0, "iter": 13282, "iter_tflops": 9.511673022649802, "iter_time": 1.6102802581787112, "loss": 0.35303184390068054, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 11.680860282232484, "step_time": 1.3112441139221191} +{"epoch": 0, "iter": 13283, "iter_tflops": 25.198403226785395, "iter_time": 0.6078345184326172, "loss": 0.300746351480484, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 26.82560416689776, "step_time": 0.5709641876220704} +{"epoch": 0, "iter": 13284, "iter_tflops": 27.48264003261118, "iter_time": 0.5573139724731445, "loss": 0.4871516227722168, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.2305278532603, "step_time": 0.5239884605407715} +{"epoch": 0, "iter": 13285, "iter_tflops": 41.08055275625363, "iter_time": 0.5022107086181641, "loss": 0.10604019463062286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73684957838483, "step_time": 0.461165542602539} +{"epoch": 0, "iter": 13286, "iter_tflops": 37.40631645446439, "iter_time": 0.5515403671264648, "loss": 0.0716497004032135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.9358705661046, "step_time": 0.4919676933288575} +{"epoch": 0, "iter": 13287, "iter_tflops": 40.93144215579424, "iter_time": 0.5040402297973633, "loss": 0.048037976026535034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.377185469570904, "step_time": 0.45465784835815426} +{"epoch": 0, "iter": 13288, "iter_tflops": 42.23570830294748, "iter_time": 0.48847513961791994, "loss": 0.0911516547203064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.488863249413605, "step_time": 0.44378571701049807} +{"epoch": 0, "iter": 13289, "iter_tflops": 31.069228272579384, "iter_time": 0.6640362396240235, "loss": 0.48365357518196106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.5862288316631, "step_time": 0.596511796951294} +{"epoch": 0, "iter": 13290, "iter_tflops": 35.17543804491293, "iter_time": 0.5865198745727539, "loss": 0.5041950345039368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.21403713094131, "step_time": 0.5261150093078614} +{"epoch": 0, "iter": 13291, "iter_tflops": 41.56680773693658, "iter_time": 0.4963357696533203, "loss": 0.5558963418006897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50272071979995, "step_time": 0.45340351486206054} +{"epoch": 0, "iter": 13292, "iter_tflops": 40.57352881837117, "iter_time": 0.5084865455627442, "loss": 0.4556019902229309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42750397873656, "step_time": 0.4643766059875489} +{"epoch": 0, "iter": 13293, "iter_tflops": 23.345762224118285, "iter_time": 0.8837189941406249, "loss": 0.31468018889427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.02599685297674, "step_time": 0.8243864822387694} +{"epoch": 0, "iter": 13294, "iter_tflops": 29.64158844117792, "iter_time": 0.6960184860229492, "loss": 0.3183095157146454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.93001041203234, "step_time": 0.6265134220123292} +{"epoch": 0, "iter": 13295, "iter_tflops": 46.92750171682008, "iter_time": 0.43963758468627934, "loss": 0.45320671796798706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.03748674156077, "step_time": 0.4042341194152832} +{"epoch": 0, "iter": 13296, "iter_tflops": 46.338142493791594, "iter_time": 0.44522918701171876, "loss": 0.22197012603282928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17090132160137, "step_time": 0.4112163219451904} +{"epoch": 0, "iter": 13297, "iter_tflops": 22.510680362714982, "iter_time": 0.9165024414062499, "loss": 0.2550072968006134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.597617291549522, "step_time": 0.8742871475219727} +{"epoch": 0, "iter": 13298, "iter_tflops": 16.51443007509677, "iter_time": 1.2492767486572265, "loss": 0.21736109256744385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.775897516910593, "step_time": 1.0432443580627442} +{"epoch": 0, "iter": 13299, "iter_tflops": 37.904313967924985, "iter_time": 0.5442940750122071, "loss": 0.30049413442611694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.48712916350318, "step_time": 0.49728901290893557} +{"epoch": 0, "iter": 13300, "iter_tflops": 36.89758540299283, "iter_time": 0.5591448135375976, "loss": 0.27068066596984863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18354329351029, "step_time": 0.5134214611053467} +{"epoch": 0, "iter": 13301, "iter_tflops": 16.037943537644377, "iter_time": 1.2553602294921875, "loss": 0.001974307931959629, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 17.509041878506785, "step_time": 1.1498856773376465} +{"epoch": 0, "iter": 13302, "iter_tflops": 20.118901329791115, "iter_time": 1.0007204742431641, "loss": 0.0010533345630392432, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 24.937386962365174, "step_time": 0.8073579044342042} +{"epoch": 0, "iter": 13303, "iter_tflops": 42.323599043025624, "iter_time": 0.4757014274597168, "loss": 0.0030214833095669746, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 47.483753662366745, "step_time": 0.42400600051879883} +{"epoch": 0, "iter": 13304, "iter_tflops": 43.97117710110499, "iter_time": 0.4578771324157715, "loss": 0.003794887103140354, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 48.805168402925666, "step_time": 0.4125259094238281} +{"epoch": 0, "iter": 13305, "iter_tflops": 14.982582432315173, "iter_time": 1.377005172729492, "loss": 0.2466605305671692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.88451013187297, "step_time": 1.2988183670043945} +{"epoch": 0, "iter": 13306, "iter_tflops": 21.198865244939146, "iter_time": 0.9732168807983398, "loss": 0.14671269059181213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.356124468813523, "step_time": 0.8136532669067383} +{"epoch": 0, "iter": 13307, "iter_tflops": 42.36047422840821, "iter_time": 0.48703641510009765, "loss": 0.19588199257850647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.351867058126615, "step_time": 0.44509735679626466} +{"epoch": 0, "iter": 13308, "iter_tflops": 44.821296938038834, "iter_time": 0.4602966651916504, "loss": 0.27301234006881714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.01767698316758, "step_time": 0.42089088630676275} +{"epoch": 0, "iter": 13309, "iter_tflops": 11.444803861537133, "iter_time": 1.299009307861328, "loss": 0.016572123393416405, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 12.067325147782492, "step_time": 1.2319968643188477} +{"epoch": 0, "iter": 13310, "iter_tflops": 12.21184811501682, "iter_time": 1.2174166107177733, "loss": 0.0043000574223697186, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 14.630491096550912, "step_time": 1.0161591053009034} +{"epoch": 0, "iter": 13311, "iter_tflops": 31.755919743262854, "iter_time": 0.4681617431640625, "loss": 0.01675749197602272, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 35.105817362044924, "step_time": 0.4234884090423584} +{"epoch": 0, "iter": 13312, "iter_tflops": 35.81224739442737, "iter_time": 0.41513470458984375, "loss": 0.004705686122179031, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 39.598333526539584, "step_time": 0.37544273757934565} +{"epoch": 0, "iter": 13313, "iter_tflops": 18.032522203051165, "iter_time": 1.144104705810547, "loss": 0.6117881536483765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.152131821724794, "step_time": 1.077221778869629} +{"epoch": 0, "iter": 13314, "iter_tflops": 17.45999087124371, "iter_time": 1.18162109375, "loss": 0.6290991306304932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.82481420466671, "step_time": 0.945304428100586} +{"epoch": 0, "iter": 13315, "iter_tflops": 43.14356845698106, "iter_time": 0.4781962699890137, "loss": 0.7270361185073853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.33500861048239, "step_time": 0.4452593002319336} +{"epoch": 0, "iter": 13316, "iter_tflops": 45.273033335079454, "iter_time": 0.45570380401611327, "loss": 0.8077878952026367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.582458873051834, "step_time": 0.4246613693237304} +{"epoch": 0, "iter": 13317, "iter_tflops": 34.81814373301236, "iter_time": 0.5925385818481446, "loss": 0.8826454877853394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.3064225675049, "step_time": 0.553017204284668} +{"epoch": 0, "iter": 13318, "iter_tflops": 25.71114516692871, "iter_time": 0.8024183044433594, "loss": 0.810365617275238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.11781499040443, "step_time": 0.555827262878418} +{"epoch": 0, "iter": 13319, "iter_tflops": 39.583549991684066, "iter_time": 0.5212037200927735, "loss": 0.6892377734184265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80782017616079, "step_time": 0.4819468364715576} +{"epoch": 0, "iter": 13320, "iter_tflops": 33.93285882606546, "iter_time": 0.6079975051879882, "loss": 0.5575253963470459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.79350781496542, "step_time": 0.560726463317871} +{"epoch": 0, "iter": 13321, "iter_tflops": 15.774046408753795, "iter_time": 1.307913833618164, "loss": 0.7269326448440552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.85602467044616, "step_time": 1.223959617614746} +{"epoch": 0, "iter": 13322, "iter_tflops": 16.93038210031702, "iter_time": 1.2185840454101562, "loss": 0.5629158616065979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.514631034345975, "step_time": 0.9589331779479979} +{"epoch": 0, "iter": 13323, "iter_tflops": 34.171176256671984, "iter_time": 0.603757194519043, "loss": 0.5224778056144714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.109105071299, "step_time": 0.5559577217102051} +{"epoch": 0, "iter": 13324, "iter_tflops": 38.78893657221448, "iter_time": 0.5318808746337891, "loss": 0.6575818657875061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37696930518555, "step_time": 0.4868468379974365} +{"epoch": 0, "iter": 13325, "iter_tflops": 20.504414237908886, "iter_time": 1.0061781463623045, "loss": 0.7535086274147034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.09116420287246, "step_time": 0.9339070281982422} +{"epoch": 0, "iter": 13326, "iter_tflops": 21.026626930656096, "iter_time": 0.9811889266967774, "loss": 0.705471396446228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.153894669703146, "step_time": 0.7888344650268555} +{"epoch": 0, "iter": 13327, "iter_tflops": 46.30373747625631, "iter_time": 0.4455600051879883, "loss": 0.7356874942779541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16730972804199, "step_time": 0.4112457618713379} +{"epoch": 0, "iter": 13328, "iter_tflops": 46.225017420101125, "iter_time": 0.4463187828063965, "loss": 0.6515209674835205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64762287281936, "step_time": 0.415550479888916} +{"epoch": 0, "iter": 13329, "iter_tflops": 27.98627275803338, "iter_time": 0.7371861801147461, "loss": 0.10674703866243362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.53025052000435, "step_time": 0.6986426849365235} +{"epoch": 0, "iter": 13330, "iter_tflops": 14.61633890888858, "iter_time": 1.4115089721679688, "loss": 0.09281337261199951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.970362071444324, "step_time": 1.0875434761047365} +{"epoch": 0, "iter": 13331, "iter_tflops": 36.03526654873266, "iter_time": 0.5725250701904298, "loss": 0.08433854579925537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.45153876517161, "step_time": 0.5229477519989014} +{"epoch": 0, "iter": 13332, "iter_tflops": 37.86895877722028, "iter_time": 0.5448022384643555, "loss": 0.17738842964172363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62408268362907, "step_time": 0.4956528091430663} +{"epoch": 0, "iter": 13333, "iter_tflops": 18.051432860958677, "iter_time": 1.1429061431884766, "loss": 0.17110459506511688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.63924410691552, "step_time": 1.0505034408569336} +{"epoch": 0, "iter": 13334, "iter_tflops": 32.85659464858472, "iter_time": 0.6279133224487305, "loss": 0.14554500579833984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.078610566026, "step_time": 0.5418026866912842} +{"epoch": 0, "iter": 13335, "iter_tflops": 45.24492853501203, "iter_time": 0.455986873626709, "loss": 0.11488997936248779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0380530562156, "step_time": 0.4207159996032715} +{"epoch": 0, "iter": 13336, "iter_tflops": 51.169336420462066, "iter_time": 0.4031925163269043, "loss": 0.30426469445228577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.71005357449788, "step_time": 0.3703298091888428} +{"epoch": 0, "iter": 13337, "iter_tflops": 27.796300929945463, "iter_time": 0.7422244262695312, "loss": 0.6385152339935303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.291093035916155, "step_time": 0.7043470001220703} +{"epoch": 0, "iter": 13338, "iter_tflops": 18.574393764766054, "iter_time": 1.1107276916503908, "loss": 0.6141946911811829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.57279273439595, "step_time": 1.0028338775634766} +{"epoch": 0, "iter": 13339, "iter_tflops": 43.42345958875829, "iter_time": 0.47511399841308594, "loss": 0.7252957224845886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.80819313865124, "step_time": 0.44075816917419436} +{"epoch": 0, "iter": 13340, "iter_tflops": 46.40601269527407, "iter_time": 0.44457802581787104, "loss": 0.6957072019577026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98075121265928, "step_time": 0.4127807807922363} +{"epoch": 0, "iter": 13341, "iter_tflops": 27.338014618404188, "iter_time": 0.7546668548583985, "loss": 0.250995397567749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.905767836998603, "step_time": 0.7137362213134766} +{"epoch": 0, "iter": 13342, "iter_tflops": 14.750246875196275, "iter_time": 1.3986947937011718, "loss": 0.2232813686132431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.78821227122964, "step_time": 1.159818265914917} +{"epoch": 0, "iter": 13343, "iter_tflops": 37.81602568644883, "iter_time": 0.545564826965332, "loss": 0.22125375270843506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55670996134212, "step_time": 0.4964563732147217} +{"epoch": 0, "iter": 13344, "iter_tflops": 41.43242404782773, "iter_time": 0.4979456062316895, "loss": 0.13512560725212097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2519791977044, "step_time": 0.4559158267974853} +{"epoch": 0, "iter": 13345, "iter_tflops": 19.412174473288268, "iter_time": 1.062791473388672, "loss": 0.6651111841201782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.7354855235475, "step_time": 0.9949655380249023} +{"epoch": 0, "iter": 13346, "iter_tflops": 21.939438408337786, "iter_time": 0.9403656158447266, "loss": 0.4857534170150757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.5347178621222, "step_time": 0.840893856048584} +{"epoch": 0, "iter": 13347, "iter_tflops": 36.1669804328537, "iter_time": 0.5704400329589844, "loss": 0.7151945233345032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33750407479453, "step_time": 0.5244637145996094} +{"epoch": 0, "iter": 13348, "iter_tflops": 38.8891356022201, "iter_time": 0.5305104675292969, "loss": 0.6796134114265442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.204876858973606, "step_time": 0.488831979751587} +{"epoch": 0, "iter": 13349, "iter_tflops": 33.06656258845117, "iter_time": 0.6239261627197266, "loss": 0.2653781771659851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.69506906495348, "step_time": 0.562230676651001} +{"epoch": 0, "iter": 13350, "iter_tflops": 39.285319483808344, "iter_time": 0.5251603851318359, "loss": 0.21927887201309204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79535585137264, "step_time": 0.4710794811248779} +{"epoch": 0, "iter": 13351, "iter_tflops": 40.325934454123654, "iter_time": 0.5116085662841797, "loss": 0.2234085202217102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37421291211268, "step_time": 0.4649342975616455} +{"epoch": 0, "iter": 13352, "iter_tflops": 42.31229156912044, "iter_time": 0.48759102249145503, "loss": 0.18632574379444122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13634222612055, "step_time": 0.4471766185760498} +{"epoch": 0, "iter": 13353, "iter_tflops": 29.718344309736956, "iter_time": 0.6942208251953125, "loss": 0.6373436450958252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.70430529769216, "step_time": 0.6308372344970703} +{"epoch": 0, "iter": 13354, "iter_tflops": 36.323292295099705, "iter_time": 0.5679852294921874, "loss": 0.6237474083900452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.549610898858646, "step_time": 0.5216509857177735} +{"epoch": 0, "iter": 13355, "iter_tflops": 34.32223053891542, "iter_time": 0.6011000213623048, "loss": 0.6156049370765686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.27981395599187, "step_time": 0.553411922454834} +{"epoch": 0, "iter": 13356, "iter_tflops": 38.35112284989751, "iter_time": 0.5379527893066407, "loss": 0.6426436901092529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.557519442171845, "step_time": 0.4964467029571533} +{"epoch": 0, "iter": 13357, "iter_tflops": 17.164979315519556, "iter_time": 1.2019294128417968, "loss": 0.12775468826293945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.331065597431692, "step_time": 1.1254715881347654} +{"epoch": 0, "iter": 13358, "iter_tflops": 29.18929713049882, "iter_time": 0.706803367614746, "loss": 0.25170257687568665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.604329092143814, "step_time": 0.6327716007232665} +{"epoch": 0, "iter": 13359, "iter_tflops": 39.097128071850115, "iter_time": 0.5276882095336913, "loss": 0.1936541497707367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88714645331556, "step_time": 0.4810554027557373} +{"epoch": 0, "iter": 13360, "iter_tflops": 40.941315643608746, "iter_time": 0.5039186744689941, "loss": 0.12527886033058167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.886822229966405, "step_time": 0.4596247291564941} +{"epoch": 0, "iter": 13361, "iter_tflops": 13.993330295421586, "iter_time": 1.4743519287109375, "loss": 0.060489922761917114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.835775374423454, "step_time": 1.3906312942504881} +{"epoch": 0, "iter": 13362, "iter_tflops": 28.473557993807614, "iter_time": 0.7245702667236328, "loss": 0.10241130739450455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.513780919550776, "step_time": 0.5809320487976074} +{"epoch": 0, "iter": 13363, "iter_tflops": 52.658510922842154, "iter_time": 0.39179029464721676, "loss": 0.07372012734413147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.55677890601426, "step_time": 0.3584476737976074} +{"epoch": 0, "iter": 13364, "iter_tflops": 48.016476365695524, "iter_time": 0.4296669616699219, "loss": 0.08131150901317596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.28795342688282, "step_time": 0.3945668582916259} +{"epoch": 0, "iter": 13365, "iter_tflops": 41.35476323190035, "iter_time": 0.4988807067871094, "loss": 0.11026274412870407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.16179342024429, "step_time": 0.45682626724243164} +{"epoch": 0, "iter": 13366, "iter_tflops": 49.3588853015939, "iter_time": 0.4179813499450683, "loss": 0.09425334632396698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.8090168304806, "step_time": 0.383413314819336} +{"epoch": 0, "iter": 13367, "iter_tflops": 50.92027657694098, "iter_time": 0.40516460037231433, "loss": 0.09900622814893723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.38702902094328, "step_time": 0.3724896221160888} +{"epoch": 0, "iter": 13368, "iter_tflops": 46.805548880020034, "iter_time": 0.44078306961059566, "loss": 0.0936773493885994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.89915666630833, "step_time": 0.4053327178955078} +{"epoch": 0, "iter": 13369, "iter_tflops": 2.4517108911970866, "iter_time": 0.5912011260986327, "loss": 0.39891529083251953, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 2.6362319374226644, "step_time": 0.5498204536437988} +{"epoch": 0, "iter": 13370, "iter_tflops": 1.3624506360179438, "iter_time": 1.0638581695556641, "loss": 0.3687581717967987, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.5242466561799917, "step_time": 0.9509315528869628} +{"epoch": 0, "iter": 13371, "iter_tflops": 3.339589436768414, "iter_time": 0.43402168655395507, "loss": 0.43548572063446045, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.6369228109950296, "step_time": 0.398538631439209} +{"epoch": 0, "iter": 13372, "iter_tflops": 3.150110335712507, "iter_time": 0.4601280860900879, "loss": 0.5125643610954285, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.4334643106361376, "step_time": 0.4221550331115722} +{"epoch": 0, "iter": 13373, "iter_tflops": 39.995644051989444, "iter_time": 0.5158335113525391, "loss": 0.059931375086307526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5470446925499, "step_time": 0.4737656402587891} +{"epoch": 0, "iter": 13374, "iter_tflops": 11.83374981061902, "iter_time": 1.7434113311767576, "loss": 0.05436135455965996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.944596012510443, "step_time": 1.5937997207641603} +{"epoch": 0, "iter": 13375, "iter_tflops": 10.920476340676526, "iter_time": 1.8892118682861327, "loss": 0.0579967126250267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.196264699083704, "step_time": 1.5634040374755858} +{"epoch": 0, "iter": 13376, "iter_tflops": 17.281814737148515, "iter_time": 1.1938036499023437, "loss": 0.06588532775640488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15540512477761, "step_time": 0.9752161865234376} +{"epoch": 0, "iter": 13377, "iter_tflops": 14.810895929317269, "iter_time": 1.0949558563232422, "loss": 0.39434534311294556, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 15.905596256972474, "step_time": 1.0195956802368165} +{"epoch": 0, "iter": 13378, "iter_tflops": 15.983642794880424, "iter_time": 1.0146170959472656, "loss": 0.31942301988601685, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 21.28720235082496, "step_time": 0.7618322486877441} +{"epoch": 0, "iter": 13379, "iter_tflops": 25.23017824562341, "iter_time": 0.6427729949951172, "loss": 0.34863701462745667, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 27.12900286345716, "step_time": 0.5977837562561036} +{"epoch": 0, "iter": 13380, "iter_tflops": 23.625716152671476, "iter_time": 0.6864247894287109, "loss": 0.29880672693252563, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.32156675641364, "step_time": 0.6404531517028809} +{"epoch": 0, "iter": 13381, "iter_tflops": 16.096093183396203, "iter_time": 1.2817454071044923, "loss": 0.6427410244941711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.21537730226623, "step_time": 1.1984107666015624} +{"epoch": 0, "iter": 13382, "iter_tflops": 16.69923313001602, "iter_time": 1.2354515533447266, "loss": 0.521309494972229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.267050052383205, "step_time": 0.78543625831604} +{"epoch": 0, "iter": 13383, "iter_tflops": 39.895985121700846, "iter_time": 0.5171220474243164, "loss": 0.6190428733825684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.741998610027615, "step_time": 0.482689022064209} +{"epoch": 0, "iter": 13384, "iter_tflops": 43.600276461151886, "iter_time": 0.47318721771240235, "loss": 0.6187973022460938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19040824962777, "step_time": 0.4371882820129394} +{"epoch": 0, "iter": 13385, "iter_tflops": 32.964199707608564, "iter_time": 0.6258636245727539, "loss": 0.5075768232345581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.314043780139954, "step_time": 0.58421781539917} +{"epoch": 0, "iter": 13386, "iter_tflops": 33.92318946682544, "iter_time": 0.6081708068847655, "loss": 0.34779006242752075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.05679889584333, "step_time": 0.5421132125854493} +{"epoch": 0, "iter": 13387, "iter_tflops": 42.52016688031397, "iter_time": 0.4852072563171386, "loss": 0.4303143322467804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.33513008324028, "step_time": 0.4452581329345703} +{"epoch": 0, "iter": 13388, "iter_tflops": 39.868968660208886, "iter_time": 0.5174724655151367, "loss": 0.45884034037590027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77434346343043, "step_time": 0.47130560684204104} +{"epoch": 0, "iter": 13389, "iter_tflops": 20.520370524250932, "iter_time": 1.0053957595825194, "loss": 0.9047180414199829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.57213442468735, "step_time": 0.9563770141601563} +{"epoch": 0, "iter": 13390, "iter_tflops": 9.386523810774992, "iter_time": 2.1979482421875, "loss": 0.5814483165740967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.512912001553248, "step_time": 1.791996109008789} +{"epoch": 0, "iter": 13391, "iter_tflops": 15.436658069692486, "iter_time": 1.3365, "loss": 0.47406479716300964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.01519970296914, "step_time": 1.145204818725586} +{"epoch": 0, "iter": 13392, "iter_tflops": 20.36779509827909, "iter_time": 1.0129271926879884, "loss": 0.4646933674812317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.16005196104252, "step_time": 0.6840536460876465} +{"epoch": 0, "iter": 13393, "iter_tflops": 17.56521663864931, "iter_time": 0.8766335067749024, "loss": 0.28979939222335815, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 18.667735460555193, "step_time": 0.8248594207763672} +{"epoch": 0, "iter": 13394, "iter_tflops": 11.011492118385178, "iter_time": 1.3983806457519532, "loss": 0.3197445571422577, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.560191210323353, "step_time": 1.2259572486877444} +{"epoch": 0, "iter": 13395, "iter_tflops": 22.36354632415168, "iter_time": 0.688542739868164, "loss": 0.3045940101146698, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.158701547642575, "step_time": 0.6373793487548829} +{"epoch": 0, "iter": 13396, "iter_tflops": 23.46507937451189, "iter_time": 0.6562201309204101, "loss": 0.46647879481315613, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.182857943273863, "step_time": 0.6114579010009766} +{"epoch": 0, "iter": 13397, "iter_tflops": 24.363065426439537, "iter_time": 0.8468184585571289, "loss": 0.5117736458778381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.42291365120007, "step_time": 0.7808031234741212} +{"epoch": 0, "iter": 13398, "iter_tflops": 38.41924114434252, "iter_time": 0.5369989852905273, "loss": 0.609784722328186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53338635689118, "step_time": 0.496735164642334} +{"epoch": 0, "iter": 13399, "iter_tflops": 41.69857196726643, "iter_time": 0.4947673873901368, "loss": 0.5739313364028931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.91375698295119, "step_time": 0.45934909248352046} +{"epoch": 0, "iter": 13400, "iter_tflops": 42.280981514798256, "iter_time": 0.4879520950317383, "loss": 0.7560513019561768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.56815115311676, "step_time": 0.45275248146057134} +{"epoch": 0, "iter": 13401, "iter_tflops": 23.78705655564735, "iter_time": 0.8673243560791015, "loss": 0.848251461982727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.956274526698394, "step_time": 0.8266896362304688} +{"epoch": 0, "iter": 13402, "iter_tflops": 12.131612993433242, "iter_time": 1.700605972290039, "loss": 0.7198792695999146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.408977283590676, "step_time": 1.4318221969604492} +{"epoch": 0, "iter": 13403, "iter_tflops": 14.695322853010778, "iter_time": 1.4039224395751952, "loss": 0.6525822281837463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.46480725881082, "step_time": 1.1173197326660156} +{"epoch": 0, "iter": 13404, "iter_tflops": 19.173714497466833, "iter_time": 1.0760092163085937, "loss": 0.5099740028381348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.316023388198488, "step_time": 0.9678678398132323} +{"epoch": 0, "iter": 13405, "iter_tflops": 24.05483513408328, "iter_time": 0.7219785842895508, "loss": 0.23488043248653412, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 25.657802615410123, "step_time": 0.6768730773925782} +{"epoch": 0, "iter": 13406, "iter_tflops": 12.172386533590794, "iter_time": 1.4267601318359375, "loss": 0.3032539188861847, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 14.299613985729275, "step_time": 1.2145136108398438} +{"epoch": 0, "iter": 13407, "iter_tflops": 28.341127415265532, "iter_time": 0.6127870483398438, "loss": 0.2890056073665619, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 30.463485108280246, "step_time": 0.5700948448181152} +{"epoch": 0, "iter": 13408, "iter_tflops": 31.482418793679752, "iter_time": 0.5516436309814453, "loss": 0.36570611596107483, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.528439310927034, "step_time": 0.5179804420471191} +{"epoch": 0, "iter": 13409, "iter_tflops": 28.08997701921396, "iter_time": 0.7344645919799804, "loss": 0.23702490329742432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.142784966256364, "step_time": 0.684445499420166} +{"epoch": 0, "iter": 13410, "iter_tflops": 13.476650942179885, "iter_time": 1.530876892089844, "loss": 0.16027958691120148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.105213486378975, "step_time": 1.206128969192505} +{"epoch": 0, "iter": 13411, "iter_tflops": 44.1415117089482, "iter_time": 0.46738529586791994, "loss": 0.1735232025384903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.754507645510614, "step_time": 0.4320240020751953} +{"epoch": 0, "iter": 13412, "iter_tflops": 42.700049417379, "iter_time": 0.48316322326660155, "loss": 0.20325590670108795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9736550298674, "step_time": 0.44875904464721683} +{"epoch": 0, "iter": 13413, "iter_tflops": 29.75074637713608, "iter_time": 0.6934647369384767, "loss": 0.0040366994217038155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.657455902930494, "step_time": 0.6516977729797363} +{"epoch": 0, "iter": 13414, "iter_tflops": 15.672695976266784, "iter_time": 1.3163717041015626, "loss": 0.004265307914465666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.784475032252868, "step_time": 1.160061990737915} +{"epoch": 0, "iter": 13415, "iter_tflops": 41.576540776679934, "iter_time": 0.49621957778930664, "loss": 0.0013141282834112644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.34730758446311, "step_time": 0.4451411437988281} +{"epoch": 0, "iter": 13416, "iter_tflops": 46.10809024705462, "iter_time": 0.4474506187438965, "loss": 0.01683414727449417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97935128107952, "step_time": 0.4046950969696045} +{"epoch": 0, "iter": 13417, "iter_tflops": 28.678114381115456, "iter_time": 0.6429313354492188, "loss": 0.05770936235785484, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 30.995779316955954, "step_time": 0.5948570671081542} +{"epoch": 0, "iter": 13418, "iter_tflops": 21.117240059491245, "iter_time": 0.8731282272338867, "loss": 0.07958690822124481, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 23.818009810636216, "step_time": 0.7741225452423095} +{"epoch": 0, "iter": 13419, "iter_tflops": 45.196408623208825, "iter_time": 0.40795405960083014, "loss": 0.05850963294506073, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 49.245988721298744, "step_time": 0.37440731430053714} +{"epoch": 0, "iter": 13420, "iter_tflops": 50.19934042905955, "iter_time": 0.36729682540893555, "loss": 0.041338760405778885, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 54.883445478344385, "step_time": 0.33594935989379887} +{"epoch": 0, "iter": 13421, "iter_tflops": 40.54515775903684, "iter_time": 0.5088423538208008, "loss": 0.5760563015937805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.949839120142634, "step_time": 0.46942364120483393} +{"epoch": 0, "iter": 13422, "iter_tflops": 42.64171407425995, "iter_time": 0.4838242073059082, "loss": 0.6061823964118958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.039298885125994, "step_time": 0.4481191940307617} +{"epoch": 0, "iter": 13423, "iter_tflops": 40.602980263463884, "iter_time": 0.5081177139282226, "loss": 0.5654498934745789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.72091403751374, "step_time": 0.4718815689086914} +{"epoch": 0, "iter": 13424, "iter_tflops": 40.222722165573686, "iter_time": 0.5129213638305664, "loss": 0.5071130394935608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03569828566591, "step_time": 0.47939488220214843} +{"epoch": 0, "iter": 13425, "iter_tflops": 42.03500876374763, "iter_time": 0.4908074035644531, "loss": 0.7702997922897339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60305245211694, "step_time": 0.45240597724914555} +{"epoch": 0, "iter": 13426, "iter_tflops": 47.494740425327954, "iter_time": 0.43438690948486325, "loss": 0.5148646235466003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.794098621632294, "step_time": 0.3983290386199952} +{"epoch": 0, "iter": 13427, "iter_tflops": 44.459580561817184, "iter_time": 0.4640415687561035, "loss": 0.5850614309310913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.816711697600766, "step_time": 0.43146198844909667} +{"epoch": 0, "iter": 13428, "iter_tflops": 49.01788912120035, "iter_time": 0.4208890647888184, "loss": 0.5498858094215393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25844203120319, "step_time": 0.38737696266174315} +{"epoch": 0, "iter": 13429, "iter_tflops": 33.536739022832926, "iter_time": 0.6151788787841798, "loss": 0.7409844994544983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.64424552070752, "step_time": 0.5788057289123535} +{"epoch": 0, "iter": 13430, "iter_tflops": 42.57591007062078, "iter_time": 0.4845719909667969, "loss": 0.5665286183357239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66797366535467, "step_time": 0.44208247947692875} +{"epoch": 0, "iter": 13431, "iter_tflops": 44.417668376659066, "iter_time": 0.464479434967041, "loss": 0.5630562901496887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.93810972744991, "step_time": 0.4303693580627441} +{"epoch": 0, "iter": 13432, "iter_tflops": 48.4990686099877, "iter_time": 0.42539154052734374, "loss": 0.5875011682510376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27790909356959, "step_time": 0.3946426677703857} +{"epoch": 0, "iter": 13433, "iter_tflops": 28.101840378189454, "iter_time": 0.7341545333862305, "loss": 0.6503808498382568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.80475584635158, "step_time": 0.6922081031799318} +{"epoch": 0, "iter": 13434, "iter_tflops": 15.348836492296458, "iter_time": 1.3441470642089843, "loss": 0.447492390871048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.532987704451582, "step_time": 1.113209259033203} +{"epoch": 0, "iter": 13435, "iter_tflops": 36.762056591859434, "iter_time": 0.5612061843872072, "loss": 0.4649924039840698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.957521449499865, "step_time": 0.5163256568908691} +{"epoch": 0, "iter": 13436, "iter_tflops": 40.87017885633882, "iter_time": 0.5047957725524903, "loss": 0.4986191689968109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55562191374915, "step_time": 0.4630413093566894} +{"epoch": 0, "iter": 13437, "iter_tflops": 32.13555847166305, "iter_time": 0.6420020217895508, "loss": 0.15558531880378723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.72069730967431, "step_time": 0.5775669307708741} +{"epoch": 0, "iter": 13438, "iter_tflops": 36.434385738381025, "iter_time": 0.5662533645629882, "loss": 0.17443379759788513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90731128855038, "step_time": 0.5043375587463379} +{"epoch": 0, "iter": 13439, "iter_tflops": 39.94428348439351, "iter_time": 0.5164967727661134, "loss": 0.13774080574512482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85229570606087, "step_time": 0.470467809677124} +{"epoch": 0, "iter": 13440, "iter_tflops": 43.363970226024655, "iter_time": 0.47576578903198236, "loss": 0.13447336852550507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.4275682619026, "step_time": 0.4350021362304687} +{"epoch": 0, "iter": 13441, "iter_tflops": 22.390867928257276, "iter_time": 0.9214066009521484, "loss": 0.702470600605011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.838639867498046, "step_time": 0.8654475936889648} +{"epoch": 0, "iter": 13442, "iter_tflops": 9.495766494100586, "iter_time": 2.172662261962891, "loss": 0.8058028221130371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.614438411226141, "step_time": 1.6355142288208007} +{"epoch": 0, "iter": 13443, "iter_tflops": 19.55257009207878, "iter_time": 1.0551601867675782, "loss": 0.6186327338218689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.489212762174937, "step_time": 0.7788488731384278} +{"epoch": 0, "iter": 13444, "iter_tflops": 45.173309091773426, "iter_time": 0.45670981216430667, "loss": 0.7687230110168457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.097185003365915, "step_time": 0.42020929527282713} +{"epoch": 0, "iter": 13445, "iter_tflops": 27.139991751657867, "iter_time": 0.6399071884155274, "loss": 0.26346516609191895, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 28.981470016909167, "step_time": 0.5992475814819336} +{"epoch": 0, "iter": 13446, "iter_tflops": 31.103934356694047, "iter_time": 0.5583562393188477, "loss": 0.42163747549057007, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.25915218065011, "step_time": 0.5221743392944336} +{"epoch": 0, "iter": 13447, "iter_tflops": 30.800498088761255, "iter_time": 0.5638569793701173, "loss": 0.26461243629455566, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 32.79439584307138, "step_time": 0.5295745010375976} +{"epoch": 0, "iter": 13448, "iter_tflops": 32.38091498060746, "iter_time": 0.5363367843627929, "loss": 0.2891221344470978, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 34.37281901458179, "step_time": 0.5052560806274414} +{"epoch": 0, "iter": 13449, "iter_tflops": 1.7762575243326857, "iter_time": 1.0150560455322266, "loss": 1.9428731203079224, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 1.8531045322195845, "step_time": 0.9729623489379882} +{"epoch": 0, "iter": 13450, "iter_tflops": 1.5105050575374819, "iter_time": 1.19364111328125, "loss": 1.9829862117767334, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 1.9611894465282378, "step_time": 0.9193405265808106} +{"epoch": 0, "iter": 13451, "iter_tflops": 3.4312186283488995, "iter_time": 0.5254695587158204, "loss": 1.944692850112915, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 3.782324067359558, "step_time": 0.47669128990173343} +{"epoch": 0, "iter": 13452, "iter_tflops": 3.8902798505722376, "iter_time": 0.4634630432128906, "loss": 1.9903223514556885, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 4.241614730815305, "step_time": 0.4250741882324219} +{"epoch": 0, "iter": 13453, "iter_tflops": 34.8816342830844, "iter_time": 0.5914600601196289, "loss": 0.13497315347194672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.441830908824365, "step_time": 0.5366834259033203} +{"epoch": 0, "iter": 13454, "iter_tflops": 37.77779475483022, "iter_time": 0.5461169357299805, "loss": 0.190378800034523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.30077699265063, "step_time": 0.4995328178405762} +{"epoch": 0, "iter": 13455, "iter_tflops": 40.69366174363848, "iter_time": 0.5069854278564452, "loss": 0.17787499725818634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77783779047678, "step_time": 0.4607434062957763} +{"epoch": 0, "iter": 13456, "iter_tflops": 45.21798179428545, "iter_time": 0.4562586097717285, "loss": 0.15064942836761475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.413719130749946, "step_time": 0.41751752090454103} +{"epoch": 0, "iter": 13457, "iter_tflops": 20.36728609405194, "iter_time": 1.012952507019043, "loss": 0.49181467294692993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.651973349150694, "step_time": 0.9528504943847657} +{"epoch": 0, "iter": 13458, "iter_tflops": 26.188703566750174, "iter_time": 0.7877859802246093, "loss": 0.5093137621879578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.89497319265215, "step_time": 0.6468446731567383} +{"epoch": 0, "iter": 13459, "iter_tflops": 47.49085430684799, "iter_time": 0.43442245483398445, "loss": 0.33311817049980164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43369822050172, "step_time": 0.4011201648712158} +{"epoch": 0, "iter": 13460, "iter_tflops": 49.75722857169135, "iter_time": 0.41463510131835934, "loss": 0.31874775886535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.002902237259384, "step_time": 0.3820367546081543} +{"epoch": 0, "iter": 13461, "iter_tflops": 32.252558981973316, "iter_time": 0.6396730728149415, "loss": 0.5526350140571594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.52548080963649, "step_time": 0.5975613670349121} +{"epoch": 0, "iter": 13462, "iter_tflops": 43.03396830431218, "iter_time": 0.4794141540527344, "loss": 0.7976635098457336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.483369868924555, "step_time": 0.4438381633758545} +{"epoch": 0, "iter": 13463, "iter_tflops": 43.79525814686877, "iter_time": 0.47108053207397454, "loss": 0.6400043964385986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10307834191593, "step_time": 0.43799883651733396} +{"epoch": 0, "iter": 13464, "iter_tflops": 49.01489403551197, "iter_time": 0.4209147834777832, "loss": 0.7633964419364929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.11842759355297, "step_time": 0.38839804649353027} +{"epoch": 0, "iter": 13465, "iter_tflops": 32.67110318273135, "iter_time": 0.6314783248901368, "loss": 0.18113307654857635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.950494425627575, "step_time": 0.5902947540283203} +{"epoch": 0, "iter": 13466, "iter_tflops": 10.083939802447954, "iter_time": 2.045935806274414, "loss": 0.2378629893064499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.57292850193404, "step_time": 1.7827029266357421} +{"epoch": 0, "iter": 13467, "iter_tflops": 14.22222549768528, "iter_time": 1.4506234283447268, "loss": 0.17222389578819275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.035529739922925, "step_time": 1.2110626335144041} +{"epoch": 0, "iter": 13468, "iter_tflops": 20.410332512141874, "iter_time": 1.0108161392211914, "loss": 0.20215368270874023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.65874080274382, "step_time": 0.8040571308135986} +{"epoch": 0, "iter": 13469, "iter_tflops": 14.040587112692219, "iter_time": 1.1783989868164062, "loss": 0.2595897316932678, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 14.795058795218887, "step_time": 1.1183067169189453} +{"epoch": 0, "iter": 13470, "iter_tflops": 16.238565301346487, "iter_time": 1.018896270751953, "loss": 0.4698549509048462, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 18.30197504794829, "step_time": 0.9040233955383299} +{"epoch": 0, "iter": 13471, "iter_tflops": 29.844401972355772, "iter_time": 0.5543891830444336, "loss": 0.26202988624572754, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.837905640991195, "step_time": 0.519676570892334} +{"epoch": 0, "iter": 13472, "iter_tflops": 29.417648920361266, "iter_time": 0.5624315414428711, "loss": 0.38359540700912476, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.187801264738148, "step_time": 0.5305091400146484} +{"epoch": 0, "iter": 13473, "iter_tflops": 34.54695236037138, "iter_time": 0.5971899719238281, "loss": 0.7524604201316833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.071769517846874, "step_time": 0.556517635345459} +{"epoch": 0, "iter": 13474, "iter_tflops": 8.33226255425604, "iter_time": 2.476049377441406, "loss": 0.6033074855804443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.849228339529262, "step_time": 2.094691360473633} +{"epoch": 0, "iter": 13475, "iter_tflops": 10.84953322968302, "iter_time": 1.9015650787353517, "loss": 0.72209233045578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.640023078288111, "step_time": 1.6322037849426267} +{"epoch": 0, "iter": 13476, "iter_tflops": 32.94435956660237, "iter_time": 0.6262405395507812, "loss": 0.6973688006401062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73553986308943, "step_time": 0.5616112785339356} +{"epoch": 0, "iter": 13477, "iter_tflops": 11.165220065463789, "iter_time": 1.459826400756836, "loss": 0.29893457889556885, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 11.840092146082926, "step_time": 1.376617919921875} +{"epoch": 0, "iter": 13478, "iter_tflops": 15.370656913323332, "iter_time": 1.060415512084961, "loss": 0.4309292137622833, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 17.114797261552777, "step_time": 0.9523503417968749} +{"epoch": 0, "iter": 13479, "iter_tflops": 23.38713108141834, "iter_time": 0.696933837890625, "loss": 0.3898327052593231, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 25.247230124256948, "step_time": 0.6455869789123535} +{"epoch": 0, "iter": 13480, "iter_tflops": 25.895890089304658, "iter_time": 0.6294158248901367, "loss": 0.5089841485023499, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.811892233754648, "step_time": 0.5860544433593751} +{"epoch": 0, "iter": 13481, "iter_tflops": 31.99440831803902, "iter_time": 0.6448343505859375, "loss": 0.05660923570394516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.928298324019856, "step_time": 0.5906698722839355} +{"epoch": 0, "iter": 13482, "iter_tflops": 48.62642656418713, "iter_time": 0.4242773933410645, "loss": 0.06041315197944641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.594473900829044, "step_time": 0.384948148727417} +{"epoch": 0, "iter": 13483, "iter_tflops": 51.95152528117272, "iter_time": 0.39712199783325197, "loss": 0.03218363970518112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.83210130823793, "step_time": 0.3630183124542236} +{"epoch": 0, "iter": 13484, "iter_tflops": 48.922811734051976, "iter_time": 0.42170702743530275, "loss": 0.02682092785835266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.20022794215461, "step_time": 0.38780084800720216} +{"epoch": 0, "iter": 13485, "iter_tflops": 31.94285966318982, "iter_time": 0.6458749694824218, "loss": 0.0016034397995099425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.09272508809402, "step_time": 0.6051465072631835} +{"epoch": 0, "iter": 13486, "iter_tflops": 33.075681335705454, "iter_time": 0.623754150390625, "loss": 0.005219240672886372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.032770028248336, "step_time": 0.5571037082672119} +{"epoch": 0, "iter": 13487, "iter_tflops": 44.21503608005162, "iter_time": 0.4666080894470215, "loss": 0.00496979895979166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93890542637533, "step_time": 0.42156834793090825} +{"epoch": 0, "iter": 13488, "iter_tflops": 50.27911103458615, "iter_time": 0.4103313102722168, "loss": 0.00750885671004653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.53930521934848, "step_time": 0.3714683399200439} +{"epoch": 0, "iter": 13489, "iter_tflops": 16.970425237718516, "iter_time": 1.215708694458008, "loss": 0.42229554057121277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.041008786547128, "step_time": 1.1435665130615236} +{"epoch": 0, "iter": 13490, "iter_tflops": 23.26138382059413, "iter_time": 0.8869245986938477, "loss": 0.28652310371398926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.734551275107908, "step_time": 0.6501145496368408} +{"epoch": 0, "iter": 13491, "iter_tflops": 45.23832222484464, "iter_time": 0.4560534629821777, "loss": 0.3370015025138855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65400671693731, "step_time": 0.42403688621520996} +{"epoch": 0, "iter": 13492, "iter_tflops": 43.70402646119624, "iter_time": 0.47206390762329103, "loss": 0.4792839288711548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.206331892773896, "step_time": 0.4370408096313476} +{"epoch": 0, "iter": 13493, "iter_tflops": 18.734656365124312, "iter_time": 1.1012261505126952, "loss": 0.5819531083106995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.43703660320165, "step_time": 1.0614320449829102} +{"epoch": 0, "iter": 13494, "iter_tflops": 15.792803848134676, "iter_time": 1.3063603973388673, "loss": 0.5607743859291077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.50133423249464, "step_time": 1.1151138210296632} +{"epoch": 0, "iter": 13495, "iter_tflops": 35.48603319496817, "iter_time": 0.5813862991333009, "loss": 0.5676993727684021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.79490065737065, "step_time": 0.5317991065979004} +{"epoch": 0, "iter": 13496, "iter_tflops": 42.16304838915338, "iter_time": 0.4893169326782226, "loss": 0.46626371145248413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.739302300771705, "step_time": 0.45105833435058595} +{"epoch": 0, "iter": 13497, "iter_tflops": 19.204612431205824, "iter_time": 1.0742780456542969, "loss": 0.008478697389364243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.840267085809888, "step_time": 0.9899630088806153} +{"epoch": 0, "iter": 13498, "iter_tflops": 23.176579786004233, "iter_time": 0.8901698913574219, "loss": 0.0014936975203454494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.124140087506834, "step_time": 0.7606174221038818} +{"epoch": 0, "iter": 13499, "iter_tflops": 58.64142955949221, "iter_time": 0.35181771087646485, "loss": 0.01357041485607624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.58455298036807, "step_time": 0.31944315719604494} +{"epoch": 0, "iter": 13500, "iter_tflops": 53.6340688953368, "iter_time": 0.3846639633178711, "loss": 0.004182983189821243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.26107204308239, "step_time": 0.35411455345153803} +{"epoch": 0, "iter": 13501, "iter_tflops": 29.199535328776932, "iter_time": 0.7065555419921874, "loss": 0.09706488996744156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.60159884377652, "step_time": 0.6528496742248535} +{"epoch": 0, "iter": 13502, "iter_tflops": 10.81142019006031, "iter_time": 1.9082685852050782, "loss": 0.08876533806324005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.306259073897497, "step_time": 1.3478860778808592} +{"epoch": 0, "iter": 13503, "iter_tflops": 12.113214969388716, "iter_time": 1.7031889190673828, "loss": 0.1106988936662674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.02114438084078, "step_time": 1.2120861587524416} +{"epoch": 0, "iter": 13504, "iter_tflops": 19.63410213899607, "iter_time": 1.0507785568237304, "loss": 0.02612668089568615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.27485842194812, "step_time": 0.8162693996429443} +{"epoch": 0, "iter": 13505, "iter_tflops": 13.817468399390203, "iter_time": 1.0936914062499998, "loss": 0.3015429377555847, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.81544601248951, "step_time": 1.020019676208496} +{"epoch": 0, "iter": 13506, "iter_tflops": 15.791133621065693, "iter_time": 0.9569956665039062, "loss": 0.2430560439825058, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.72811545721336, "step_time": 0.8524338912963867} +{"epoch": 0, "iter": 13507, "iter_tflops": 23.192017692590518, "iter_time": 0.6516055068969727, "loss": 0.3447393774986267, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.979356041541674, "step_time": 0.6049814262390136} +{"epoch": 0, "iter": 13508, "iter_tflops": 20.01161832887111, "iter_time": 0.7551636352539062, "loss": 0.4064040184020996, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 21.479225654602562, "step_time": 0.7035657005310059} +{"epoch": 0, "iter": 13509, "iter_tflops": 16.428188737011148, "iter_time": 1.255834945678711, "loss": 0.7213701009750366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.40843450417817, "step_time": 1.1851205520629884} +{"epoch": 0, "iter": 13510, "iter_tflops": 13.374988678724597, "iter_time": 1.5425129699707032, "loss": 0.6059600114822388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.777677708775492, "step_time": 1.0431504554748534} +{"epoch": 0, "iter": 13511, "iter_tflops": 34.40177817955832, "iter_time": 0.5997100906372069, "loss": 0.6068092584609985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.09567853176892, "step_time": 0.5561589469909667} +{"epoch": 0, "iter": 13512, "iter_tflops": 37.00135123032187, "iter_time": 0.5575767593383789, "loss": 0.811091423034668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.215991348501156, "step_time": 0.513007209777832} +{"epoch": 0, "iter": 13513, "iter_tflops": 13.884096798809852, "iter_time": 1.485951431274414, "loss": 0.08104636520147324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.592794658232416, "step_time": 1.4137863235473633} +{"epoch": 0, "iter": 13514, "iter_tflops": 21.746050615880897, "iter_time": 0.9487282943725586, "loss": 0.1374426931142807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.812077729536114, "step_time": 0.8314939899444579} +{"epoch": 0, "iter": 13515, "iter_tflops": 39.221937662384306, "iter_time": 0.5260090332031251, "loss": 0.1026107594370842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2203908768307, "step_time": 0.4773462963104248} +{"epoch": 0, "iter": 13516, "iter_tflops": 37.99951020271374, "iter_time": 0.5429305114746094, "loss": 0.09857819229364395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.460379286417144, "step_time": 0.49760985946655273} +{"epoch": 0, "iter": 13517, "iter_tflops": 30.044668695987856, "iter_time": 0.6866806793212891, "loss": 0.5276453495025635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.59747040345796, "step_time": 0.6329047393798829} +{"epoch": 0, "iter": 13518, "iter_tflops": 8.775486956625908, "iter_time": 2.3509913024902347, "loss": 0.38719016313552856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.017772384145612, "step_time": 2.05944921875} +{"epoch": 0, "iter": 13519, "iter_tflops": 13.05075131894425, "iter_time": 1.5808356933593748, "loss": 0.3833073079586029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.080562442601853, "step_time": 1.141064807891846} +{"epoch": 0, "iter": 13520, "iter_tflops": 23.197859139641, "iter_time": 0.8893533401489258, "loss": 0.4052313268184662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.155201002567104, "step_time": 0.6622038326263426} +{"epoch": 0, "iter": 13521, "iter_tflops": 23.412765826981296, "iter_time": 0.6926681518554688, "loss": 0.501880943775177, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 24.8972856892418, "step_time": 0.6513672790527343} +{"epoch": 0, "iter": 13522, "iter_tflops": 10.124374634033366, "iter_time": 1.6018053283691405, "loss": 0.25667712092399597, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 12.112113618837945, "step_time": 1.3389304084777833} +{"epoch": 0, "iter": 13523, "iter_tflops": 25.94819155448639, "iter_time": 0.624986801147461, "loss": 0.4245009124279022, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 27.9748978929407, "step_time": 0.5797081832885741} +{"epoch": 0, "iter": 13524, "iter_tflops": 26.815004893164225, "iter_time": 0.6047836761474609, "loss": 0.40835335850715637, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 28.714290142992013, "step_time": 0.5647807121276855} +{"epoch": 0, "iter": 13525, "iter_tflops": 27.800040884494198, "iter_time": 0.6959258041381836, "loss": 0.1844915747642517, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 30.162152546190267, "step_time": 0.6414252357482909} +{"epoch": 0, "iter": 13526, "iter_tflops": 7.238987410720296, "iter_time": 2.6725790100097657, "loss": 0.1587725132703781, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 8.75931673930571, "step_time": 2.20870718383789} +{"epoch": 0, "iter": 13527, "iter_tflops": 12.728774555590276, "iter_time": 1.5199236755371093, "loss": 0.1071154773235321, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 15.792617050258297, "step_time": 1.2250512847900392} +{"epoch": 0, "iter": 13528, "iter_tflops": 33.658453289639425, "iter_time": 0.5747966384887695, "loss": 0.17995254695415497, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 41.27986211699529, "step_time": 0.4686732177734375} +{"epoch": 0, "iter": 13529, "iter_tflops": 15.29245536799274, "iter_time": 1.0042439651489257, "loss": 0.40313440561294556, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 15.98940091152308, "step_time": 0.9604710083007812} +{"epoch": 0, "iter": 13530, "iter_tflops": 14.160181547966253, "iter_time": 1.0845451354980469, "loss": 0.3940902054309845, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.028432938821023, "step_time": 0.9018654899597168} +{"epoch": 0, "iter": 13531, "iter_tflops": 27.52917529323741, "iter_time": 0.5578574676513671, "loss": 0.38029399514198303, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.373285028548587, "step_time": 0.5228341331481933} +{"epoch": 0, "iter": 13532, "iter_tflops": 27.444728869559384, "iter_time": 0.559573974609375, "loss": 0.21001257002353668, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.238228344528974, "step_time": 0.5252491989135742} +{"epoch": 0, "iter": 13533, "iter_tflops": 8.595338843585957, "iter_time": 0.595811408996582, "loss": 0.015309303067624569, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 9.243138651143884, "step_time": 0.5540543251037597} +{"epoch": 0, "iter": 13534, "iter_tflops": 4.481119196705531, "iter_time": 1.1428397064208984, "loss": 0.0011886109132319689, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 5.470780211068657, "step_time": 0.9361006565093994} +{"epoch": 0, "iter": 13535, "iter_tflops": 10.113476366800846, "iter_time": 0.5063739471435547, "loss": 0.006972606293857098, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 11.193973275270057, "step_time": 0.45749626350402833} +{"epoch": 0, "iter": 13536, "iter_tflops": 11.206728613732093, "iter_time": 0.4569755477905274, "loss": 0.008820749819278717, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 12.329300585362333, "step_time": 0.4153683261871338} +{"epoch": 0, "iter": 13537, "iter_tflops": 22.548766756249353, "iter_time": 0.9149544067382813, "loss": 0.8431511521339417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.861008469063197, "step_time": 0.8646362762451173} +{"epoch": 0, "iter": 13538, "iter_tflops": 13.648492836332554, "iter_time": 1.5116023254394533, "loss": 0.6665381789207458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.982365656202502, "step_time": 1.2908660678863526} +{"epoch": 0, "iter": 13539, "iter_tflops": 46.67931206882163, "iter_time": 0.44197509765625, "loss": 0.7838402986526489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.59527538231177, "step_time": 0.4077671947479248} +{"epoch": 0, "iter": 13540, "iter_tflops": 44.122498545733315, "iter_time": 0.4675867004394531, "loss": 0.6781612634658813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62172984026833, "step_time": 0.4332285614013672} +{"epoch": 0, "iter": 13541, "iter_tflops": 31.3409627165679, "iter_time": 0.6582788696289061, "loss": 0.49889951944351196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.40887350322415, "step_time": 0.6175333480834961} +{"epoch": 0, "iter": 13542, "iter_tflops": 29.165538341449025, "iter_time": 0.7073791427612304, "loss": 0.44158297777175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.800189457618174, "step_time": 0.5606246547698974} +{"epoch": 0, "iter": 13543, "iter_tflops": 46.70797838006261, "iter_time": 0.44170384216308595, "loss": 0.4224452078342438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.73020238941825, "step_time": 0.40668265724182134} +{"epoch": 0, "iter": 13544, "iter_tflops": 52.26021164204725, "iter_time": 0.39477630996704094, "loss": 0.5122334957122803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.6820013028779, "step_time": 0.3639796237945556} +{"epoch": 0, "iter": 13545, "iter_tflops": 26.907532214745487, "iter_time": 0.7667404556274413, "loss": 0.66951584815979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.458217007497264, "step_time": 0.7249608612060546} +{"epoch": 0, "iter": 13546, "iter_tflops": 12.944217852578777, "iter_time": 1.5938462829589841, "loss": 0.7347725033760071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.239733160952584, "step_time": 1.3537699966430665} +{"epoch": 0, "iter": 13547, "iter_tflops": 43.79479179562435, "iter_time": 0.4710855484008789, "loss": 0.5677729845046997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.295490300974635, "step_time": 0.4362169284820556} +{"epoch": 0, "iter": 13548, "iter_tflops": 49.46804191756359, "iter_time": 0.4170590286254883, "loss": 0.6897426843643188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.406356352337646, "step_time": 0.3863040828704834} +{"epoch": 0, "iter": 13549, "iter_tflops": 32.14402187911359, "iter_time": 0.6418329849243164, "loss": 0.3048710525035858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.27146563013052, "step_time": 0.6019904060363769} +{"epoch": 0, "iter": 13550, "iter_tflops": 7.4685960336304715, "iter_time": 2.762379089355469, "loss": 0.32599446177482605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.8309363175997, "step_time": 2.0985888671875} +{"epoch": 0, "iter": 13551, "iter_tflops": 11.626812038355133, "iter_time": 1.774441131591797, "loss": 0.2711957097053528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.571302165215323, "step_time": 1.5201999969482423} +{"epoch": 0, "iter": 13552, "iter_tflops": 34.00477105923168, "iter_time": 0.6067117309570311, "loss": 0.41551700234413147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.03338384056294, "step_time": 0.5285499610900879} +{"epoch": 0, "iter": 13553, "iter_tflops": 13.927464811140165, "iter_time": 1.105603759765625, "loss": 0.3247133493423462, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.928211504139743, "step_time": 1.0314870910644531} +{"epoch": 0, "iter": 13554, "iter_tflops": 21.47474691001867, "iter_time": 0.7170402297973634, "loss": 0.5243992209434509, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 27.311397484345434, "step_time": 0.563803352355957} +{"epoch": 0, "iter": 13555, "iter_tflops": 28.05836237323394, "iter_time": 0.5487938766479492, "loss": 0.3982067108154297, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.93132511157733, "step_time": 0.5144529151916504} +{"epoch": 0, "iter": 13556, "iter_tflops": 26.496511823584413, "iter_time": 0.5811428146362305, "loss": 0.31178173422813416, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.101724380381988, "step_time": 0.5479470672607422} +{"epoch": 0, "iter": 13557, "iter_tflops": 35.72220530089846, "iter_time": 0.5775425491333008, "loss": 0.08045332133769989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49018455140626, "step_time": 0.5360092124938964} +{"epoch": 0, "iter": 13558, "iter_tflops": 9.48942415405764, "iter_time": 2.1741143798828126, "loss": 0.09693098813295364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.159444497301212, "step_time": 1.848756317138672} +{"epoch": 0, "iter": 13559, "iter_tflops": 10.255702315859565, "iter_time": 2.0116704711914064, "loss": 0.04302050918340683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.331497348749565, "step_time": 1.6730404205322265} +{"epoch": 0, "iter": 13560, "iter_tflops": 30.296587061882416, "iter_time": 0.6809708786010743, "loss": 0.09948493540287018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.96097590891113, "step_time": 0.5434816417694092} +{"epoch": 0, "iter": 13561, "iter_tflops": 25.691668174155133, "iter_time": 0.6376116790771484, "loss": 0.33639881014823914, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.378910236186705, "step_time": 0.5983184700012207} +{"epoch": 0, "iter": 13562, "iter_tflops": 29.164525673543483, "iter_time": 0.5616860656738281, "loss": 0.381909042596817, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 31.11034537830799, "step_time": 0.5265549926757813} +{"epoch": 0, "iter": 13563, "iter_tflops": 27.92401712895689, "iter_time": 0.5866386489868164, "loss": 0.35500210523605347, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 29.73619419189245, "step_time": 0.5508878364562988} +{"epoch": 0, "iter": 13564, "iter_tflops": 29.036116316413178, "iter_time": 0.5641700668334961, "loss": 0.31938350200653076, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 30.87050464029052, "step_time": 0.5306459312438965} +{"epoch": 0, "iter": 13565, "iter_tflops": 23.667466271083708, "iter_time": 0.8717068939208985, "loss": 0.6340814232826233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.87885324565888, "step_time": 0.8292622375488281} +{"epoch": 0, "iter": 13566, "iter_tflops": 14.37194424668494, "iter_time": 1.4355116577148437, "loss": 0.7698502540588379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.72814460098502, "step_time": 1.0457695808410643} +{"epoch": 0, "iter": 13567, "iter_tflops": 45.27432834894734, "iter_time": 0.45569076919555657, "loss": 0.903340220451355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.949651957201795, "step_time": 0.4214757957458496} +{"epoch": 0, "iter": 13568, "iter_tflops": 49.36148466619691, "iter_time": 0.4179593391418457, "loss": 0.5851194858551025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.17700072093172, "step_time": 0.387970235824585} +{"epoch": 0, "iter": 13569, "iter_tflops": 33.78822208651688, "iter_time": 0.6106001510620117, "loss": 0.5613991618156433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.193545412050646, "step_time": 0.5700213470458985} +{"epoch": 0, "iter": 13570, "iter_tflops": 40.624565874829756, "iter_time": 0.507847728729248, "loss": 0.4970207214355469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49247686727398, "step_time": 0.47436005020141603} +{"epoch": 0, "iter": 13571, "iter_tflops": 40.17573905964864, "iter_time": 0.5135211944580078, "loss": 0.5365212559700012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.917625587777, "step_time": 0.480713768005371} +{"epoch": 0, "iter": 13572, "iter_tflops": 45.896954426313, "iter_time": 0.4495089874267578, "loss": 0.7741537690162659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.491132599170626, "step_time": 0.41686444473266604} +{"epoch": 0, "iter": 13573, "iter_tflops": 27.958097738615916, "iter_time": 0.7097545471191407, "loss": 0.2002781331539154, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 29.734576933341675, "step_time": 0.6673505744934082} +{"epoch": 0, "iter": 13574, "iter_tflops": 14.168964951101248, "iter_time": 1.4004824676513672, "loss": 0.18182392418384552, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 17.05351366872683, "step_time": 1.1635952205657958} +{"epoch": 0, "iter": 13575, "iter_tflops": 47.11323999993004, "iter_time": 0.42118493652343747, "loss": 0.1639091521501541, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 51.44662699936747, "step_time": 0.38570822143554684} +{"epoch": 0, "iter": 13576, "iter_tflops": 50.36616467805182, "iter_time": 0.3939824905395508, "loss": 0.17883703112602234, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 54.55316544294478, "step_time": 0.3637440071105957} +{"epoch": 0, "iter": 13577, "iter_tflops": 26.71549725891862, "iter_time": 0.7722518997192384, "loss": 0.05463423579931259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.161450038797785, "step_time": 0.7326005401611327} +{"epoch": 0, "iter": 13578, "iter_tflops": 11.462574873950427, "iter_time": 1.799865539550781, "loss": 0.0536579005420208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.318549405488715, "step_time": 1.549049591064453} +{"epoch": 0, "iter": 13579, "iter_tflops": 10.824317681747804, "iter_time": 1.9059948272705078, "loss": 0.07044877111911774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.857895602356813, "step_time": 1.6045466651916502} +{"epoch": 0, "iter": 13580, "iter_tflops": 24.67448596874802, "iter_time": 0.8361306304931639, "loss": 0.11762967705726624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.896673129953008, "step_time": 0.739553903579712} +{"epoch": 0, "iter": 13581, "iter_tflops": 21.11428624691852, "iter_time": 0.7447866668701171, "loss": 0.35108014941215515, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.000252760746978, "step_time": 0.6837159156799317} +{"epoch": 0, "iter": 13582, "iter_tflops": 22.52256477533469, "iter_time": 0.6982170562744141, "loss": 0.3741501271724701, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.32280854800686, "step_time": 0.6465387763977051} +{"epoch": 0, "iter": 13583, "iter_tflops": 23.442045774335575, "iter_time": 0.6708304824829101, "loss": 0.5227606892585754, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.28771188049471, "step_time": 0.6218687934875488} +{"epoch": 0, "iter": 13584, "iter_tflops": 23.87002396243006, "iter_time": 0.6588028106689453, "loss": 0.28456252813339233, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.44950391813221, "step_time": 0.617915340423584} +{"epoch": 0, "iter": 13585, "iter_tflops": 25.14481820128056, "iter_time": 0.8204908599853515, "loss": 0.716373860836029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.11174856637843, "step_time": 0.7609650650024413} +{"epoch": 0, "iter": 13586, "iter_tflops": 6.224521904118293, "iter_time": 3.3144864501953126, "loss": 0.7718666791915894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.331119419723884, "step_time": 2.8141805267333986} +{"epoch": 0, "iter": 13587, "iter_tflops": 18.75829021338228, "iter_time": 1.0998386993408205, "loss": 0.5154799222946167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.386058839012026, "step_time": 0.8460200004577637} +{"epoch": 0, "iter": 13588, "iter_tflops": 43.06750478314049, "iter_time": 0.4790408363342285, "loss": 0.743850588798523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30332221629259, "step_time": 0.445564001083374} +{"epoch": 0, "iter": 13589, "iter_tflops": 19.1157844370965, "iter_time": 0.7862776641845703, "loss": 0.3644147515296936, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 20.09805730280161, "step_time": 0.7478491134643555} +{"epoch": 0, "iter": 13590, "iter_tflops": 7.565715719141152, "iter_time": 1.986634826660156, "loss": 0.31843477487564087, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 9.149993039606509, "step_time": 1.6426585540771483} +{"epoch": 0, "iter": 13591, "iter_tflops": 11.32062319228621, "iter_time": 1.3276931915283203, "loss": 0.39550164341926575, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 12.605367287078229, "step_time": 1.1923741683959963} +{"epoch": 0, "iter": 13592, "iter_tflops": 26.176117725584806, "iter_time": 0.5741995239257812, "loss": 0.3221279978752136, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.054417212564204, "step_time": 0.5357557144165039} +{"epoch": 0, "iter": 13593, "iter_tflops": 18.755848291754134, "iter_time": 0.7904769973754884, "loss": 0.3231984078884125, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 19.79792510746603, "step_time": 0.7488697204589844} +{"epoch": 0, "iter": 13594, "iter_tflops": 13.826216581989078, "iter_time": 1.0723155212402344, "loss": 0.36580944061279297, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.36169694255258, "step_time": 0.9651320877075196} +{"epoch": 0, "iter": 13595, "iter_tflops": 26.35468436400246, "iter_time": 0.5625590667724609, "loss": 0.34558817744255066, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.131131717923843, "step_time": 0.5270341339111329} +{"epoch": 0, "iter": 13596, "iter_tflops": 26.5367037703087, "iter_time": 0.5587003860473633, "loss": 0.28395307064056396, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.181358436510074, "step_time": 0.5260948181152343} +{"epoch": 0, "iter": 13597, "iter_tflops": 32.163196384060214, "iter_time": 0.6414503479003906, "loss": 0.05214668810367584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.474080366548066, "step_time": 0.5984523239135742} +{"epoch": 0, "iter": 13598, "iter_tflops": 47.251854968760746, "iter_time": 0.43661975860595703, "loss": 0.11536867916584015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.038124898922895, "step_time": 0.396461124420166} +{"epoch": 0, "iter": 13599, "iter_tflops": 52.20845784844187, "iter_time": 0.39516764831542966, "loss": 0.08543272316455841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.78396866266588, "step_time": 0.36332602310180667} +{"epoch": 0, "iter": 13600, "iter_tflops": 49.619305440796204, "iter_time": 0.4157876319885254, "loss": 0.08543877303600311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.593147772199934, "step_time": 0.3849576740264892} +{"epoch": 0, "iter": 13601, "iter_tflops": 24.255549152125862, "iter_time": 0.8505721054077149, "loss": 0.09077094495296478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.642995110048275, "step_time": 0.8045508499145507} +{"epoch": 0, "iter": 13602, "iter_tflops": 17.219630519244905, "iter_time": 1.1981147613525391, "loss": 0.20162957906723022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.543610432301268, "step_time": 0.7490337390899658} +{"epoch": 0, "iter": 13603, "iter_tflops": 51.347611294383555, "iter_time": 0.4017926635742188, "loss": 0.2958873212337494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.781916368240104, "step_time": 0.36985272026062005} +{"epoch": 0, "iter": 13604, "iter_tflops": 51.29507218459731, "iter_time": 0.4022042007446289, "loss": 0.18766029179096222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.3478837067689, "step_time": 0.3727530689239502} +{"epoch": 0, "iter": 13605, "iter_tflops": 27.247855055737737, "iter_time": 0.7571639480590822, "loss": 0.08874081820249557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.67793129121676, "step_time": 0.7194066162109375} +{"epoch": 0, "iter": 13606, "iter_tflops": 13.486502257572804, "iter_time": 1.5297586517333985, "loss": 0.05159955844283104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.491323869978576, "step_time": 1.2510271263122559} +{"epoch": 0, "iter": 13607, "iter_tflops": 39.869692268882815, "iter_time": 0.5174630737304687, "loss": 0.12699274718761444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78959079051739, "step_time": 0.4711415004730225} +{"epoch": 0, "iter": 13608, "iter_tflops": 45.337055882289036, "iter_time": 0.45506028366088863, "loss": 0.14687180519104004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63946955357082, "step_time": 0.41561873435974117} +{"epoch": 0, "iter": 13609, "iter_tflops": 28.060514323783256, "iter_time": 0.6086588134765626, "loss": 0.016172271221876144, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 31.37487690050591, "step_time": 0.5443616371154785} +{"epoch": 0, "iter": 13610, "iter_tflops": 34.47392919563822, "iter_time": 0.4954259567260742, "loss": 0.02394944243133068, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 38.52137130530325, "step_time": 0.4433715305328369} +{"epoch": 0, "iter": 13611, "iter_tflops": 35.23415777097666, "iter_time": 0.4847364158630372, "loss": 0.02854834496974945, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 38.669398037160825, "step_time": 0.441674301147461} +{"epoch": 0, "iter": 13612, "iter_tflops": 41.27299423844323, "iter_time": 0.4138124618530273, "loss": 0.023008594289422035, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 45.53302333050858, "step_time": 0.37509653663635256} +{"epoch": 0, "iter": 13613, "iter_tflops": 18.68112198922817, "iter_time": 1.1043819274902344, "loss": 0.3956877589225769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.812153188144432, "step_time": 1.0413352508544922} +{"epoch": 0, "iter": 13614, "iter_tflops": 17.066507270982115, "iter_time": 1.2088644256591796, "loss": 0.31401291489601135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.936598093446683, "step_time": 0.9854081077575684} +{"epoch": 0, "iter": 13615, "iter_tflops": 47.96733985802705, "iter_time": 0.4301071014404297, "loss": 0.337610125541687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.15611636410605, "step_time": 0.3955642204284668} +{"epoch": 0, "iter": 13616, "iter_tflops": 47.63953295951934, "iter_time": 0.4330666618347168, "loss": 0.5342090725898743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.605151915971355, "step_time": 0.39978747749328614} +{"epoch": 0, "iter": 13617, "iter_tflops": 31.879457834883667, "iter_time": 0.6471594848632812, "loss": 0.14214587211608887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.05334196573842, "step_time": 0.6058463668823243} +{"epoch": 0, "iter": 13618, "iter_tflops": 12.747014361815967, "iter_time": 1.6185039825439451, "loss": 0.12729352712631226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.643200013143192, "step_time": 1.3188537826538087} +{"epoch": 0, "iter": 13619, "iter_tflops": 47.00018604414324, "iter_time": 0.4389576988220215, "loss": 0.07579652220010757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.34210138249332, "step_time": 0.4018357830047607} +{"epoch": 0, "iter": 13620, "iter_tflops": 47.291801097453344, "iter_time": 0.43625095748901366, "loss": 0.0911879613995552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20097731631304, "step_time": 0.40294335365295414} +{"epoch": 0, "iter": 13621, "iter_tflops": 41.021968205783786, "iter_time": 0.5029279289245605, "loss": 0.7131050825119019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.18636841575095, "step_time": 0.46691082000732426} +{"epoch": 0, "iter": 13622, "iter_tflops": 14.51572909485459, "iter_time": 1.4212922668457033, "loss": 0.5741080641746521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.8109270070323, "step_time": 1.3048629913330079} +{"epoch": 0, "iter": 13623, "iter_tflops": 9.971338393388326, "iter_time": 2.0690395507812496, "loss": 0.5664921998977661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.463934151081347, "step_time": 1.6552633590698242} +{"epoch": 0, "iter": 13624, "iter_tflops": 15.715646439311389, "iter_time": 1.3127740936279297, "loss": 0.5771965980529785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.56222847001462, "step_time": 1.0033491039276123} +{"epoch": 0, "iter": 13625, "iter_tflops": 24.65263007563834, "iter_time": 0.7011351165771484, "loss": 0.4081714153289795, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 26.20344789675345, "step_time": 0.6596393241882323} +{"epoch": 0, "iter": 13626, "iter_tflops": 19.0314517077709, "iter_time": 0.9082241821289063, "loss": 0.32689937949180603, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 23.20297946985717, "step_time": 0.7449398765563966} +{"epoch": 0, "iter": 13627, "iter_tflops": 30.29931834052363, "iter_time": 0.5704690933227539, "loss": 0.2732257843017578, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 32.35716381618681, "step_time": 0.5341884956359864} +{"epoch": 0, "iter": 13628, "iter_tflops": 28.868975509904896, "iter_time": 0.5987335662841797, "loss": 0.30336934328079224, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 30.606069046213882, "step_time": 0.5647515411376953} +{"epoch": 0, "iter": 13629, "iter_tflops": 27.554630968356744, "iter_time": 0.7487341613769531, "loss": 0.4710313677787781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.358691680645382, "step_time": 0.7027252349853514} +{"epoch": 0, "iter": 13630, "iter_tflops": 16.085263839722103, "iter_time": 1.2826083374023438, "loss": 0.5091506242752075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.18932355466833, "step_time": 1.0751339645385742} +{"epoch": 0, "iter": 13631, "iter_tflops": 39.01839080430892, "iter_time": 0.5287530593872071, "loss": 0.3665834069252014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.604441624401716, "step_time": 0.4842474803924561} +{"epoch": 0, "iter": 13632, "iter_tflops": 38.45656260574448, "iter_time": 0.5364778366088867, "loss": 0.3390987813472748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.94475785509928, "step_time": 0.4918634548187255} +{"epoch": 0, "iter": 13633, "iter_tflops": 20.146626113939824, "iter_time": 1.024047073364258, "loss": 0.6370583176612854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.724047001632794, "step_time": 0.9496892318725586} +{"epoch": 0, "iter": 13634, "iter_tflops": 23.38590077217802, "iter_time": 0.8822022171020507, "loss": 0.6513645052909851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.70518489656304, "step_time": 0.7187235889434815} +{"epoch": 0, "iter": 13635, "iter_tflops": 44.09650113674477, "iter_time": 0.4678623695373535, "loss": 0.7282211780548096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55296896546507, "step_time": 0.4338550033569336} +{"epoch": 0, "iter": 13636, "iter_tflops": 45.75109737924525, "iter_time": 0.4509420471191407, "loss": 0.7554240822792053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02676146699471, "step_time": 0.4208128967285156} +{"epoch": 0, "iter": 13637, "iter_tflops": 26.1619843451538, "iter_time": 0.7885905456542969, "loss": 0.6230331659317017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.580675177140577, "step_time": 0.7480271377563475} +{"epoch": 0, "iter": 13638, "iter_tflops": 19.761809689108528, "iter_time": 1.043988067626953, "loss": 0.6212202906608582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.47519458843203, "step_time": 0.8429388961791994} +{"epoch": 0, "iter": 13639, "iter_tflops": 47.842230405551724, "iter_time": 0.43123184967041017, "loss": 0.661881685256958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.78579064271976, "step_time": 0.39839294242858886} +{"epoch": 0, "iter": 13640, "iter_tflops": 44.3579544201292, "iter_time": 0.4651047096252442, "loss": 0.5601829886436462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.410929376090124, "step_time": 0.435154800415039} +{"epoch": 0, "iter": 13641, "iter_tflops": 40.79401537676068, "iter_time": 0.5057382392883301, "loss": 0.050478775054216385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.402038383237276, "step_time": 0.46464293670654294} +{"epoch": 0, "iter": 13642, "iter_tflops": 49.88349343343921, "iter_time": 0.41358557891845704, "loss": 0.06769564002752304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.40956259072742, "step_time": 0.37918138885498043} +{"epoch": 0, "iter": 13643, "iter_tflops": 56.66330447582185, "iter_time": 0.36409972381591804, "loss": 0.05047483369708061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.031921202281396, "step_time": 0.3325883369445801} +{"epoch": 0, "iter": 13644, "iter_tflops": 48.32411887617482, "iter_time": 0.42693160247802736, "loss": 0.07254566252231598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.753160302390626, "step_time": 0.3910873470306397} +{"epoch": 0, "iter": 13645, "iter_tflops": 27.307906699545136, "iter_time": 0.7554989013671874, "loss": 0.7178789973258972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.841908782263367, "step_time": 0.7153165092468261} +{"epoch": 0, "iter": 13646, "iter_tflops": 24.803186147000666, "iter_time": 0.8317920684814452, "loss": 0.827377200126648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.467687311795064, "step_time": 0.6771466865539552} +{"epoch": 0, "iter": 13647, "iter_tflops": 37.56285924320413, "iter_time": 0.5492418289184571, "loss": 0.6427647471427917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.971302091062924, "step_time": 0.503549861907959} +{"epoch": 0, "iter": 13648, "iter_tflops": 36.16767071174127, "iter_time": 0.5704291458129883, "loss": 0.598936915397644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15048701689306, "step_time": 0.5269690132141113} +{"epoch": 0, "iter": 13649, "iter_tflops": 19.592118696360096, "iter_time": 1.0530302429199216, "loss": 0.018400467932224274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15636890296727, "step_time": 0.9751717605590821} +{"epoch": 0, "iter": 13650, "iter_tflops": 18.68148180059017, "iter_time": 1.1043606567382813, "loss": 0.044721391052007675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.91367634822423, "step_time": 0.9003833866119386} +{"epoch": 0, "iter": 13651, "iter_tflops": 39.12621825204177, "iter_time": 0.5272958755493165, "loss": 0.030362704768776894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37782639219646, "step_time": 0.47561381530761715} +{"epoch": 0, "iter": 13652, "iter_tflops": 42.795093206351545, "iter_time": 0.48209016418457035, "loss": 0.015145348384976387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19294401063963, "step_time": 0.43716479110717776} +{"epoch": 0, "iter": 13653, "iter_tflops": 34.503519957138465, "iter_time": 0.5979417037963868, "loss": 0.7076253890991211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82421989780319, "step_time": 0.5454466361999512} +{"epoch": 0, "iter": 13654, "iter_tflops": 36.77922810101077, "iter_time": 0.5609441680908203, "loss": 0.7484861016273499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.408661486708205, "step_time": 0.510561170578003} +{"epoch": 0, "iter": 13655, "iter_tflops": 37.8593412920217, "iter_time": 0.5449406356811524, "loss": 0.7241870164871216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.546448574566014, "step_time": 0.4965789909362792} +{"epoch": 0, "iter": 13656, "iter_tflops": 39.59449951875293, "iter_time": 0.5210595855712891, "loss": 0.6244367361068726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.330530063750025, "step_time": 0.4761329593658448} +{"epoch": 0, "iter": 13657, "iter_tflops": 19.773972253345438, "iter_time": 1.043345932006836, "loss": 0.12322234362363815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.398082329683913, "step_time": 0.9641561889648438} +{"epoch": 0, "iter": 13658, "iter_tflops": 17.02284001562884, "iter_time": 1.2119654235839845, "loss": 0.2124217301607132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.785216104813905, "step_time": 0.9925849895477294} +{"epoch": 0, "iter": 13659, "iter_tflops": 40.836494789240525, "iter_time": 0.5052121543884277, "loss": 0.1842731535434723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19544140739163, "step_time": 0.4564861602783203} +{"epoch": 0, "iter": 13660, "iter_tflops": 39.103649007526144, "iter_time": 0.527600212097168, "loss": 0.15273374319076538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64228630676238, "step_time": 0.4838177146911621} +{"epoch": 0, "iter": 13661, "iter_tflops": 14.778152660952978, "iter_time": 1.3960536193847655, "loss": 0.05682066082954407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.856829491981427, "step_time": 1.3010856628417968} +{"epoch": 0, "iter": 13662, "iter_tflops": 16.940674274205158, "iter_time": 1.2178437042236328, "loss": 0.10636941343545914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.176773632970367, "step_time": 0.8533435363769531} +{"epoch": 0, "iter": 13663, "iter_tflops": 51.79732964350598, "iter_time": 0.3983041915893555, "loss": 0.14146959781646729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.46501815148473, "step_time": 0.36537832069396975} +{"epoch": 0, "iter": 13664, "iter_tflops": 54.55110606743458, "iter_time": 0.3781975288391113, "loss": 0.08144573867321014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.56449813264407, "step_time": 0.3463656063079834} +{"epoch": 0, "iter": 13665, "iter_tflops": 17.516665141356928, "iter_time": 1.1777980194091797, "loss": 0.5637027025222778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.648027736552095, "step_time": 1.1063418502807616} +{"epoch": 0, "iter": 13666, "iter_tflops": 14.475131788831032, "iter_time": 1.4252784576416015, "loss": 0.5080415606498718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.341719881542005, "step_time": 1.189679780960083} +{"epoch": 0, "iter": 13667, "iter_tflops": 37.1383739596059, "iter_time": 0.5555195693969726, "loss": 0.5071793794631958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44709498476623, "step_time": 0.5100760269165039} +{"epoch": 0, "iter": 13668, "iter_tflops": 44.782993274484234, "iter_time": 0.46069036483764647, "loss": 0.42842116951942444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.08573162764185, "step_time": 0.4203073444366455} +{"epoch": 0, "iter": 13669, "iter_tflops": 28.076253542696477, "iter_time": 0.7348235931396484, "loss": 0.8237269520759583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.448471885623288, "step_time": 0.6775740203857421} +{"epoch": 0, "iter": 13670, "iter_tflops": 33.24118446487207, "iter_time": 0.620648567199707, "loss": 0.7110356092453003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.90828800759167, "step_time": 0.5589826736450196} +{"epoch": 0, "iter": 13671, "iter_tflops": 37.6094119117928, "iter_time": 0.5485619812011718, "loss": 0.7216424942016602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.86751070067811, "step_time": 0.5048287296295166} +{"epoch": 0, "iter": 13672, "iter_tflops": 36.44748513445807, "iter_time": 0.5660498504638671, "loss": 0.7783604264259338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.725333049684004, "step_time": 0.5193435001373291} +{"epoch": 0, "iter": 13673, "iter_tflops": 39.971951042105395, "iter_time": 0.5161392669677735, "loss": 0.6093831062316895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54388710314923, "step_time": 0.4737999954223633} +{"epoch": 0, "iter": 13674, "iter_tflops": 41.58151370649541, "iter_time": 0.49616023254394526, "loss": 0.7483851313591003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92665698526769, "step_time": 0.45921719741821293} +{"epoch": 0, "iter": 13675, "iter_tflops": 42.11753197493826, "iter_time": 0.4898457374572754, "loss": 0.6446463465690613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37248893467177, "step_time": 0.4547049102783203} +{"epoch": 0, "iter": 13676, "iter_tflops": 45.26319334718302, "iter_time": 0.4558028717041016, "loss": 0.8741702437400818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.536708617328166, "step_time": 0.4250616512298584} +{"epoch": 0, "iter": 13677, "iter_tflops": 25.429715231001445, "iter_time": 0.8112986450195312, "loss": 0.14620353281497955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.762644127893974, "step_time": 0.7708914489746095} +{"epoch": 0, "iter": 13678, "iter_tflops": 13.378413675268927, "iter_time": 1.5421180725097656, "loss": 0.17958658933639526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.41748511737212, "step_time": 1.1845047302246092} +{"epoch": 0, "iter": 13679, "iter_tflops": 48.08786147791264, "iter_time": 0.4290291328430176, "loss": 0.16859474778175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26630325217207, "step_time": 0.3947302989959717} +{"epoch": 0, "iter": 13680, "iter_tflops": 49.12989527328459, "iter_time": 0.4199295234680176, "loss": 0.233194038271904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36143080401353, "step_time": 0.38662931632995606} +{"epoch": 0, "iter": 13681, "iter_tflops": 39.141458868860205, "iter_time": 0.5270905609130859, "loss": 0.4087490141391754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.344494425978944, "step_time": 0.4872202110290528} +{"epoch": 0, "iter": 13682, "iter_tflops": 17.250206052984467, "iter_time": 1.1959911346435548, "loss": 0.5121850967407227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.44735142497839, "step_time": 1.0089861068725585} +{"epoch": 0, "iter": 13683, "iter_tflops": 47.20275030936533, "iter_time": 0.4370739707946778, "loss": 0.5334783792495728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.1198155722983, "step_time": 0.40358309745788573} +{"epoch": 0, "iter": 13684, "iter_tflops": 51.179812606604386, "iter_time": 0.4031099853515625, "loss": 0.47774410247802734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.48276308876976, "step_time": 0.37184690093994144} +{"epoch": 0, "iter": 13685, "iter_tflops": 45.85929827013913, "iter_time": 0.44987808990478517, "loss": 0.02374262362718582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.95467998419437, "step_time": 0.41299621009826654} +{"epoch": 0, "iter": 13686, "iter_tflops": 10.740941193467776, "iter_time": 1.9207901000976562, "loss": 0.032182205468416214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.284479104718976, "step_time": 1.679443901062012} +{"epoch": 0, "iter": 13687, "iter_tflops": 10.403405335504194, "iter_time": 1.9831096496582035, "loss": 0.031314972788095474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.889230616198743, "step_time": 1.6006458511352537} +{"epoch": 0, "iter": 13688, "iter_tflops": 23.09838925148779, "iter_time": 0.8931832122802735, "loss": 0.019727125763893127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.845961949380364, "step_time": 0.7152159996032714} +{"epoch": 0, "iter": 13689, "iter_tflops": 20.948436866166208, "iter_time": 0.7077409515380859, "loss": 0.28502729535102844, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 22.370607797705954, "step_time": 0.66274760055542} +{"epoch": 0, "iter": 13690, "iter_tflops": 6.74555070313601, "iter_time": 2.197903076171875, "loss": 0.38724854588508606, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 8.78448409104434, "step_time": 1.687756103515625} +{"epoch": 0, "iter": 13691, "iter_tflops": 9.8669751544494, "iter_time": 1.502594909667969, "loss": 0.3265231251716614, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 12.3402062677126, "step_time": 1.201443988800049} +{"epoch": 0, "iter": 13692, "iter_tflops": 12.06225992345208, "iter_time": 1.2291284332275392, "loss": 0.43092963099479675, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.772057367015591, "step_time": 0.940021095275879} +{"epoch": 0, "iter": 13693, "iter_tflops": 14.419163635944441, "iter_time": 1.0537225189208985, "loss": 0.29674357175827026, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.654849108253531, "step_time": 0.9705489540100097} +{"epoch": 0, "iter": 13694, "iter_tflops": 26.495709974945694, "iter_time": 0.5734436798095703, "loss": 0.34311002492904663, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.303381373608367, "step_time": 0.5368191604614257} +{"epoch": 0, "iter": 13695, "iter_tflops": 27.747627191774146, "iter_time": 0.5475710525512696, "loss": 0.26424047350883484, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.53911733353867, "step_time": 0.514361930847168} +{"epoch": 0, "iter": 13696, "iter_tflops": 27.326641562668147, "iter_time": 0.5560067596435546, "loss": 0.3484509587287903, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.04869114227285, "step_time": 0.5230458526611328} +{"epoch": 0, "iter": 13697, "iter_tflops": 31.687957563240108, "iter_time": 0.6510704727172851, "loss": 0.9141330718994141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.93496817238744, "step_time": 0.6079597129821778} +{"epoch": 0, "iter": 13698, "iter_tflops": 23.024189532329313, "iter_time": 0.8960616607666017, "loss": 0.6614891290664673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.84296923357972, "step_time": 0.7983251972198486} +{"epoch": 0, "iter": 13699, "iter_tflops": 39.82203845772634, "iter_time": 0.5180823059082031, "loss": 0.6833766102790833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.69933674107303, "step_time": 0.4831712875366211} +{"epoch": 0, "iter": 13700, "iter_tflops": 44.25116206586202, "iter_time": 0.4662271575927734, "loss": 0.767303466796875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.673079613515576, "step_time": 0.4327619209289551} +{"epoch": 0, "iter": 13701, "iter_tflops": 27.5289744394484, "iter_time": 0.7494319686889649, "loss": 0.33903998136520386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.142514153271375, "step_time": 0.7079380111694337} +{"epoch": 0, "iter": 13702, "iter_tflops": 14.35878994151874, "iter_time": 1.4368267517089843, "loss": 0.33244267106056213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.780353119124925, "step_time": 1.0430093631744386} +{"epoch": 0, "iter": 13703, "iter_tflops": 34.672302609709476, "iter_time": 0.5950309600830078, "loss": 0.24807754158973694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.71063901769209, "step_time": 0.547089469909668} +{"epoch": 0, "iter": 13704, "iter_tflops": 41.72773949273922, "iter_time": 0.49442154693603513, "loss": 0.27403923869132996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.455397118920494, "step_time": 0.4538755531311035} +{"epoch": 0, "iter": 13705, "iter_tflops": 27.977762838389314, "iter_time": 0.7374104080200197, "loss": 0.25238803029060364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.463865898199106, "step_time": 0.6772316284179688} +{"epoch": 0, "iter": 13706, "iter_tflops": 8.472516076945416, "iter_time": 2.4350610046386723, "loss": 0.30229896306991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.505444086348081, "step_time": 1.9638478240966795} +{"epoch": 0, "iter": 13707, "iter_tflops": 11.512462542039929, "iter_time": 1.7920660705566405, "loss": 0.19711934030056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.910298099416153, "step_time": 1.4831525077819823} +{"epoch": 0, "iter": 13708, "iter_tflops": 35.711530948559286, "iter_time": 0.5777151794433594, "loss": 0.26452744007110596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3058035928816, "step_time": 0.46565216827392575} +{"epoch": 0, "iter": 13709, "iter_tflops": 24.199666619444205, "iter_time": 0.7108607177734375, "loss": 0.5160112380981445, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 26.274897055447124, "step_time": 0.6547158813476563} +{"epoch": 0, "iter": 13710, "iter_tflops": 25.99632398670779, "iter_time": 0.6617317276000977, "loss": 0.3320339024066925, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 28.048114460967142, "step_time": 0.6133243789672852} +{"epoch": 0, "iter": 13711, "iter_tflops": 23.92417864269813, "iter_time": 0.7190463104248047, "loss": 0.4554659128189087, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 25.733918103757457, "step_time": 0.6684793319702149} +{"epoch": 0, "iter": 13712, "iter_tflops": 26.070995944820517, "iter_time": 0.6598364105224609, "loss": 0.26922035217285156, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 28.029722817899135, "step_time": 0.6137268104553223} +{"epoch": 0, "iter": 13713, "iter_tflops": 22.077388138966093, "iter_time": 0.9344897766113281, "loss": 0.5872986912727356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.456046785419428, "step_time": 0.87956396484375} +{"epoch": 0, "iter": 13714, "iter_tflops": 8.248846356601362, "iter_time": 2.5010883483886714, "loss": 0.7631319165229797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.823138695151345, "step_time": 2.1002547302246093} +{"epoch": 0, "iter": 13715, "iter_tflops": 16.511225139786095, "iter_time": 1.2495192413330078, "loss": 0.697372317314148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.430510998403335, "step_time": 1.1193988876342773} +{"epoch": 0, "iter": 13716, "iter_tflops": 37.82287638881425, "iter_time": 0.5454660110473633, "loss": 0.7805893421173096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60247704461063, "step_time": 0.49591021919250483} +{"epoch": 0, "iter": 13717, "iter_tflops": 13.219026388237697, "iter_time": 1.1277495880126953, "loss": 0.29474303126335144, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 13.999241965366267, "step_time": 1.0648970565795899} +{"epoch": 0, "iter": 13718, "iter_tflops": 14.15569669645253, "iter_time": 1.0531273651123048, "loss": 0.35153090953826904, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 17.008871151145406, "step_time": 0.8764691925048829} +{"epoch": 0, "iter": 13719, "iter_tflops": 27.132030393659694, "iter_time": 0.5494521179199219, "loss": 0.16239033639431, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.888593670446877, "step_time": 0.5160428276062011} +{"epoch": 0, "iter": 13720, "iter_tflops": 28.087909001613163, "iter_time": 0.5307533416748046, "loss": 0.4637290835380554, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 29.77098656160049, "step_time": 0.5007476501464844} +{"epoch": 0, "iter": 13721, "iter_tflops": 27.24305609621146, "iter_time": 0.7572973251342774, "loss": 0.06535176187753677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.243728387943783, "step_time": 0.7054877967834472} +{"epoch": 0, "iter": 13722, "iter_tflops": 15.434586294551469, "iter_time": 1.336679397583008, "loss": 0.04143476486206055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.405138769309115, "step_time": 1.0110734233856202} +{"epoch": 0, "iter": 13723, "iter_tflops": 40.31354477175257, "iter_time": 0.5117658004760742, "loss": 0.03756656497716904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.331152895032005, "step_time": 0.4653859004974365} +{"epoch": 0, "iter": 13724, "iter_tflops": 40.70639681266536, "iter_time": 0.5068268165588379, "loss": 0.021404391154646873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96526933699235, "step_time": 0.45882286071777345} +{"epoch": 0, "iter": 13725, "iter_tflops": 37.99478241081371, "iter_time": 0.5429980697631837, "loss": 0.07101353257894516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96451425814292, "step_time": 0.49163189125061035} +{"epoch": 0, "iter": 13726, "iter_tflops": 10.311318761941955, "iter_time": 2.000820068359375, "loss": 0.18894560635089874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.798098873076281, "step_time": 1.7486794891357424} +{"epoch": 0, "iter": 13727, "iter_tflops": 16.503854196233394, "iter_time": 1.2500773010253907, "loss": 0.11965823173522949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.301726497645156, "step_time": 0.9685174350738526} +{"epoch": 0, "iter": 13728, "iter_tflops": 37.10806139818367, "iter_time": 0.5559733581542968, "loss": 0.12715253233909607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.65313942770835, "step_time": 0.5074907817840576} +{"epoch": 0, "iter": 13729, "iter_tflops": 14.891168668281843, "iter_time": 0.9846610260009765, "loss": 0.26802369952201843, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 16.29684469635301, "step_time": 0.8997295913696288} +{"epoch": 0, "iter": 13730, "iter_tflops": 22.18518367280852, "iter_time": 0.6609254913330078, "loss": 0.3088340759277344, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.928205093414466, "step_time": 0.6127811660766601} +{"epoch": 0, "iter": 13731, "iter_tflops": 21.260046694686125, "iter_time": 0.6896858520507813, "loss": 0.314502477645874, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 22.882358737693995, "step_time": 0.6407885475158692} +{"epoch": 0, "iter": 13732, "iter_tflops": 21.951152531540906, "iter_time": 0.6679719161987305, "loss": 0.30027520656585693, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.65068362186515, "step_time": 0.6199716529846191} +{"epoch": 0, "iter": 13733, "iter_tflops": 19.229981221585945, "iter_time": 1.072860824584961, "loss": 0.5201111435890198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.813196914950936, "step_time": 0.9912505798339843} +{"epoch": 0, "iter": 13734, "iter_tflops": 18.767473067942657, "iter_time": 1.0993005523681643, "loss": 0.6502071022987366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.200347165971024, "step_time": 0.8892579650878908} +{"epoch": 0, "iter": 13735, "iter_tflops": 44.989255719846376, "iter_time": 0.4585782356262207, "loss": 0.5912237167358398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.64440966352269, "step_time": 0.4241205444335937} +{"epoch": 0, "iter": 13736, "iter_tflops": 44.29072047178111, "iter_time": 0.4658107452392578, "loss": 0.5561650991439819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.7335571859001, "step_time": 0.4322136192321777} +{"epoch": 0, "iter": 13737, "iter_tflops": 34.45311298940674, "iter_time": 0.5988165283203125, "loss": 0.16077281534671783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.92187336847267, "step_time": 0.5587769966125489} +{"epoch": 0, "iter": 13738, "iter_tflops": 27.838564624908376, "iter_time": 0.7410976028442383, "loss": 0.21333445608615875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.257037351753624, "step_time": 0.5851624259948731} +{"epoch": 0, "iter": 13739, "iter_tflops": 47.81344966740828, "iter_time": 0.43149142456054695, "loss": 0.33560988306999207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10945746156045, "step_time": 0.39591840934753414} +{"epoch": 0, "iter": 13740, "iter_tflops": 48.88259417215736, "iter_time": 0.4220539817810059, "loss": 0.3987267315387726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15050988399158, "step_time": 0.3881636047363281} +{"epoch": 0, "iter": 13741, "iter_tflops": 34.786735203854455, "iter_time": 0.5930735778808595, "loss": 0.658855140209198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.30820575233243, "step_time": 0.5529907722473145} +{"epoch": 0, "iter": 13742, "iter_tflops": 24.589733046386254, "iter_time": 0.8390125045776367, "loss": 0.6813560128211975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.041649247656192, "step_time": 0.6867496967315674} +{"epoch": 0, "iter": 13743, "iter_tflops": 35.26469758223167, "iter_time": 0.5850353164672851, "loss": 0.4281836748123169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.30503357689758, "step_time": 0.5386000633239746} +{"epoch": 0, "iter": 13744, "iter_tflops": 37.840480508980505, "iter_time": 0.5452122497558594, "loss": 0.6172018051147461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.20648193808428, "step_time": 0.500675926208496} +{"epoch": 0, "iter": 13745, "iter_tflops": 24.599586432457617, "iter_time": 0.8386764373779296, "loss": 0.06696358323097229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.110399186438393, "step_time": 0.7901485290527344} +{"epoch": 0, "iter": 13746, "iter_tflops": 16.536951083178867, "iter_time": 1.2475754089355469, "loss": 0.054206784814596176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.00198808805493, "step_time": 1.0314521446228027} +{"epoch": 0, "iter": 13747, "iter_tflops": 40.08178826383095, "iter_time": 0.5147248764038085, "loss": 0.11256919056177139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.000022382096596, "step_time": 0.46888825035095216} +{"epoch": 0, "iter": 13748, "iter_tflops": 42.955254657595276, "iter_time": 0.48029265975952146, "loss": 0.09714293479919434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.04600648163536, "step_time": 0.4385301761627197} +{"epoch": 0, "iter": 13749, "iter_tflops": 28.272921530419993, "iter_time": 0.7297121200561524, "loss": 0.11925874650478363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.572139141260188, "step_time": 0.6748331680297852} +{"epoch": 0, "iter": 13750, "iter_tflops": 43.01462780658204, "iter_time": 0.479629711151123, "loss": 0.11638122797012329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.744870652188084, "step_time": 0.44135523796081544} +{"epoch": 0, "iter": 13751, "iter_tflops": 45.84711154396254, "iter_time": 0.44999767303466803, "loss": 0.07751075178384781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69544126007186, "step_time": 0.4151506252288819} +{"epoch": 0, "iter": 13752, "iter_tflops": 55.45661336701159, "iter_time": 0.37202223968505854, "loss": 0.08651416003704071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.3807322917162, "step_time": 0.34168339347839355} +{"epoch": 0, "iter": 13753, "iter_tflops": 2.8369622045933847, "iter_time": 0.48324232482910157, "loss": 1.3781366348266602, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.094717906290382, "step_time": 0.44299359512329106} +{"epoch": 0, "iter": 13754, "iter_tflops": 3.104995895801345, "iter_time": 0.44152722167968755, "loss": 1.2726905345916748, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.3768361252598904, "step_time": 0.4059836368560791} +{"epoch": 0, "iter": 13755, "iter_tflops": 3.281557924821815, "iter_time": 0.41777114486694333, "loss": 1.3048138618469238, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.5612967987826765, "step_time": 0.384955337524414} +{"epoch": 0, "iter": 13756, "iter_tflops": 3.4209837833450027, "iter_time": 0.4007444343566894, "loss": 1.2481942176818848, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.700474601918046, "step_time": 0.3704768600463867} +{"epoch": 0, "iter": 13757, "iter_tflops": 35.973769059396034, "iter_time": 0.573503807067871, "loss": 0.6672948598861694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.66822201646355, "step_time": 0.533541301727295} +{"epoch": 0, "iter": 13758, "iter_tflops": 36.59933085301152, "iter_time": 0.5637013854980468, "loss": 0.8350012898445129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49248564157074, "step_time": 0.5095042495727539} +{"epoch": 0, "iter": 13759, "iter_tflops": 38.28521751306029, "iter_time": 0.5388788375854492, "loss": 0.7976689338684082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.70051938020219, "step_time": 0.49474428176879887} +{"epoch": 0, "iter": 13760, "iter_tflops": 38.172552916089025, "iter_time": 0.5404693145751953, "loss": 0.7071389555931091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7647960637321, "step_time": 0.49398286247253415} +{"epoch": 0, "iter": 13761, "iter_tflops": 33.503358540136134, "iter_time": 0.6157918014526367, "loss": 0.0532851368188858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32106792511695, "step_time": 0.5528001918792724} +{"epoch": 0, "iter": 13762, "iter_tflops": 36.61295860691096, "iter_time": 0.563491569519043, "loss": 0.04621043801307678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.883418393443904, "step_time": 0.5046323013305665} +{"epoch": 0, "iter": 13763, "iter_tflops": 41.3724889317545, "iter_time": 0.49866696548461914, "loss": 0.05042153596878052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50306852879675, "step_time": 0.45340004920959476} +{"epoch": 0, "iter": 13764, "iter_tflops": 41.946881403092895, "iter_time": 0.4918385543823242, "loss": 0.043976351618766785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25906158029028, "step_time": 0.44599031639099124} +{"epoch": 0, "iter": 13765, "iter_tflops": 14.82550098240371, "iter_time": 1.391595031738281, "loss": 0.13053730130195618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.798591652003408, "step_time": 1.3058818130493166} +{"epoch": 0, "iter": 13766, "iter_tflops": 15.448702358084292, "iter_time": 1.335458023071289, "loss": 0.10006984323263168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.401554464682466, "step_time": 1.0112510566711426} +{"epoch": 0, "iter": 13767, "iter_tflops": 42.57648053835423, "iter_time": 0.48456549835205076, "loss": 0.18592654168605804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.863793698719824, "step_time": 0.4402352409362793} +{"epoch": 0, "iter": 13768, "iter_tflops": 46.95618960799639, "iter_time": 0.4393689880371094, "loss": 0.18130618333816528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.10369907813638, "step_time": 0.4037103748321534} +{"epoch": 0, "iter": 13769, "iter_tflops": 20.197985019379296, "iter_time": 1.0214431533813477, "loss": 0.5937020182609558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.07327832544596, "step_time": 0.9790167999267578} +{"epoch": 0, "iter": 13770, "iter_tflops": 29.13879325236869, "iter_time": 0.7080284118652344, "loss": 0.6446799039840698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6166921197828, "step_time": 0.5484558143615722} +{"epoch": 0, "iter": 13771, "iter_tflops": 35.18245702360657, "iter_time": 0.586402862548828, "loss": 0.8472350835800171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.15320847447874, "step_time": 0.5407433433532716} +{"epoch": 0, "iter": 13772, "iter_tflops": 36.79916671872901, "iter_time": 0.560640235900879, "loss": 0.6835115551948547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.182510445395955, "step_time": 0.5134346580505371} +{"epoch": 0, "iter": 13773, "iter_tflops": 26.91473531574666, "iter_time": 0.766535255432129, "loss": 0.03804657980799675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.708239418840847, "step_time": 0.7186471176147461} +{"epoch": 0, "iter": 13774, "iter_tflops": 18.06179250260515, "iter_time": 1.1422506103515624, "loss": 0.024931350722908974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.262916368815674, "step_time": 0.9702852210998534} +{"epoch": 0, "iter": 13775, "iter_tflops": 50.9552654392431, "iter_time": 0.4048863906860352, "loss": 0.08515872061252594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.05080331071869, "step_time": 0.368078462600708} +{"epoch": 0, "iter": 13776, "iter_tflops": 59.052928902777445, "iter_time": 0.3493661346435547, "loss": 0.017106283456087112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.65394586215885, "step_time": 0.3191002998352051} +{"epoch": 0, "iter": 13777, "iter_tflops": 20.755151907449207, "iter_time": 0.9940227661132813, "loss": 0.11253838986158371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.65775073687771, "step_time": 0.9525963134765625} +{"epoch": 0, "iter": 13778, "iter_tflops": 22.728414906884115, "iter_time": 0.9077224960327147, "loss": 0.16509483754634857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.76313338527891, "step_time": 0.8007990798950195} +{"epoch": 0, "iter": 13779, "iter_tflops": 40.16118972208328, "iter_time": 0.5137072296142579, "loss": 0.08052023500204086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.001702208823275, "step_time": 0.4688703498840333} +{"epoch": 0, "iter": 13780, "iter_tflops": 36.137278434107465, "iter_time": 0.5709088897705078, "loss": 0.13874343037605286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.668930688312166, "step_time": 0.520081916809082} +{"epoch": 0, "iter": 13781, "iter_tflops": 14.501584312549307, "iter_time": 1.1154837341308594, "loss": 0.02243844047188759, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 15.370263809931664, "step_time": 1.0524400634765625} +{"epoch": 0, "iter": 13782, "iter_tflops": 23.09038018226224, "iter_time": 0.7005636672973632, "loss": 0.0487450510263443, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 29.234262340294546, "step_time": 0.5533329772949219} +{"epoch": 0, "iter": 13783, "iter_tflops": 43.71661418000009, "iter_time": 0.37002594375610354, "loss": 0.01075498852878809, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 47.88507186151944, "step_time": 0.3378147048950195} +{"epoch": 0, "iter": 13784, "iter_tflops": 40.57719246775316, "iter_time": 0.398654525756836, "loss": 0.050046712160110474, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 44.48972618332239, "step_time": 0.36359588623046873} +{"epoch": 0, "iter": 13785, "iter_tflops": 42.368664980317206, "iter_time": 0.4869422607421875, "loss": 0.723487138748169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.283024196177706, "step_time": 0.4457594089508056} +{"epoch": 0, "iter": 13786, "iter_tflops": 45.38726629665955, "iter_time": 0.4545568656921387, "loss": 0.6865861415863037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18342327005656, "step_time": 0.4194724998474122} +{"epoch": 0, "iter": 13787, "iter_tflops": 44.33786091690817, "iter_time": 0.46531549072265627, "loss": 0.5585106015205383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.715845392257265, "step_time": 0.4323740539550781} +{"epoch": 0, "iter": 13788, "iter_tflops": 42.58108673707291, "iter_time": 0.48451308059692383, "loss": 0.6438961625099182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83229350603064, "step_time": 0.45014316177368163} +{"epoch": 0, "iter": 13789, "iter_tflops": 19.820607993146897, "iter_time": 1.0408910522460937, "loss": 0.8049079775810242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.704558582714608, "step_time": 0.9964517440795897} +{"epoch": 0, "iter": 13790, "iter_tflops": 18.875622327171502, "iter_time": 1.0930020294189453, "loss": 0.4494723379611969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.92575945079611, "step_time": 0.8999088363647461} +{"epoch": 0, "iter": 13791, "iter_tflops": 42.301534747929956, "iter_time": 0.48771501159667974, "loss": 0.5487329959869385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75318045146967, "step_time": 0.45092151641845707} +{"epoch": 0, "iter": 13792, "iter_tflops": 45.47775744816398, "iter_time": 0.45365239334106444, "loss": 0.6615316271781921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.01859796432209, "step_time": 0.420882978439331} +{"epoch": 0, "iter": 13793, "iter_tflops": 40.8948228288756, "iter_time": 0.5044915733337403, "loss": 0.6029701232910156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.193706720701776, "step_time": 0.46683329010009766} +{"epoch": 0, "iter": 13794, "iter_tflops": 42.94895042612254, "iter_time": 0.48036315917968747, "loss": 0.7205824851989746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.348783550575284, "step_time": 0.44512696838378907} +{"epoch": 0, "iter": 13795, "iter_tflops": 41.56094020609398, "iter_time": 0.49640584182739256, "loss": 0.5354915857315063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8088024338434, "step_time": 0.46042501449584966} +{"epoch": 0, "iter": 13796, "iter_tflops": 44.89634531262004, "iter_time": 0.4595272369384765, "loss": 0.5863263607025146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.442037036898085, "step_time": 0.4258923606872559} +{"epoch": 0, "iter": 13797, "iter_tflops": 32.000920594489145, "iter_time": 0.644703125, "loss": 0.05571288615465164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.97281335099442, "step_time": 0.6072824554443359} +{"epoch": 0, "iter": 13798, "iter_tflops": 20.344057013173074, "iter_time": 1.0141091079711915, "loss": 0.053348831832408905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.732642310977923, "step_time": 0.8341645526885987} +{"epoch": 0, "iter": 13799, "iter_tflops": 53.320352020292326, "iter_time": 0.38692718124389647, "loss": 0.014653973281383514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.21216402051085, "step_time": 0.3544120693206787} +{"epoch": 0, "iter": 13800, "iter_tflops": 55.62347226595402, "iter_time": 0.37090625, "loss": 0.03088057041168213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.783049190451024, "step_time": 0.33942182540893556} +{"epoch": 0, "iter": 13801, "iter_tflops": 25.099400076576483, "iter_time": 0.8219755630493164, "loss": 0.44527870416641235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.68602416762817, "step_time": 0.7731048049926758} +{"epoch": 0, "iter": 13802, "iter_tflops": 27.322582708774423, "iter_time": 0.7550930938720702, "loss": 0.38829752802848816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.63075796275463, "step_time": 0.6134590702056885} +{"epoch": 0, "iter": 13803, "iter_tflops": 42.682642516131686, "iter_time": 0.4833602676391602, "loss": 0.4341939091682434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51805505184378, "step_time": 0.4435072250366211} +{"epoch": 0, "iter": 13804, "iter_tflops": 38.65011100980561, "iter_time": 0.5337913131713867, "loss": 0.5211575031280518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.411667228612, "step_time": 0.4864485378265381} +{"epoch": 0, "iter": 13805, "iter_tflops": 19.920176408752617, "iter_time": 1.035688293457031, "loss": 0.22601211071014404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15620305337599, "step_time": 0.9751794052124023} +{"epoch": 0, "iter": 13806, "iter_tflops": 16.35200314047428, "iter_time": 1.2616860046386718, "loss": 0.3642314672470093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.060921188044553, "step_time": 1.0823765182495118} +{"epoch": 0, "iter": 13807, "iter_tflops": 41.96047980795386, "iter_time": 0.4916791610717774, "loss": 0.24083733558654785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86404770781173, "step_time": 0.4498315029144287} +{"epoch": 0, "iter": 13808, "iter_tflops": 48.48072864843644, "iter_time": 0.4255524635314941, "loss": 0.4041624963283539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6677603654226, "step_time": 0.39172148895263675} +{"epoch": 0, "iter": 13809, "iter_tflops": 39.05560973016585, "iter_time": 0.5282491722106933, "loss": 0.8300029039382935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07960465795126, "step_time": 0.49028724670410156} +{"epoch": 0, "iter": 13810, "iter_tflops": 15.919122982688023, "iter_time": 1.295994354248047, "loss": 0.6982433199882507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.07322100596608, "step_time": 1.027791877746582} +{"epoch": 0, "iter": 13811, "iter_tflops": 38.42444643428699, "iter_time": 0.5369262390136719, "loss": 0.44050148129463196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7828862205454, "step_time": 0.49376898956298826} +{"epoch": 0, "iter": 13812, "iter_tflops": 40.21217889636412, "iter_time": 0.5130558471679687, "loss": 0.5430104732513428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.43459563319434, "step_time": 0.47499218559265133} +{"epoch": 0, "iter": 13813, "iter_tflops": 17.826552839001632, "iter_time": 1.1061683197021486, "loss": 0.038109369575977325, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 19.151918939220106, "step_time": 1.029618392944336} +{"epoch": 0, "iter": 13814, "iter_tflops": 20.014988125745997, "iter_time": 0.9852200698852539, "loss": 0.0236822422593832, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 22.916305385647984, "step_time": 0.8604863510131836} +{"epoch": 0, "iter": 13815, "iter_tflops": 48.79301151975008, "iter_time": 0.4041391868591308, "loss": 0.038795679807662964, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 53.71852924604819, "step_time": 0.3670831699371338} +{"epoch": 0, "iter": 13816, "iter_tflops": 52.975291123047946, "iter_time": 0.3722333106994628, "loss": 0.01160941831767559, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 57.90176056176629, "step_time": 0.3405624942779541} +{"epoch": 0, "iter": 13817, "iter_tflops": 27.401709190359696, "iter_time": 0.7529126510620117, "loss": 0.095600426197052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.95892176584821, "step_time": 0.7124261627197266} +{"epoch": 0, "iter": 13818, "iter_tflops": 14.676155752773495, "iter_time": 1.4057559661865235, "loss": 0.07472719997167587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.268460730505687, "step_time": 1.2681650619506837} +{"epoch": 0, "iter": 13819, "iter_tflops": 51.093879835996304, "iter_time": 0.4037879600524902, "loss": 0.08431784808635712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.84611960730578, "step_time": 0.36942752075195306} +{"epoch": 0, "iter": 13820, "iter_tflops": 55.82489082630071, "iter_time": 0.36956800460815425, "loss": 0.19040660560131073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.751945850762496, "step_time": 0.3395956001281738} +{"epoch": 0, "iter": 13821, "iter_tflops": 22.24908136236686, "iter_time": 0.9272784423828127, "loss": 0.30401045083999634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.189861107500395, "step_time": 0.8896600723266603} +{"epoch": 0, "iter": 13822, "iter_tflops": 12.422324435708349, "iter_time": 1.660807815551758, "loss": 0.213422030210495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.141187085881278, "step_time": 1.2781645736694336} +{"epoch": 0, "iter": 13823, "iter_tflops": 37.614857349526346, "iter_time": 0.5484825668334962, "loss": 0.29406237602233887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.348147709570824, "step_time": 0.4989605255126954} +{"epoch": 0, "iter": 13824, "iter_tflops": 38.36959953770745, "iter_time": 0.5376937408447265, "loss": 0.2603262960910797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.19307610766633, "step_time": 0.488968698501587} +{"epoch": 0, "iter": 13825, "iter_tflops": 17.411152724911467, "iter_time": 1.184935531616211, "loss": 0.025935115292668343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.616758455611, "step_time": 1.108200096130371} +{"epoch": 0, "iter": 13826, "iter_tflops": 16.072395333577763, "iter_time": 1.283635269165039, "loss": 0.009230964817106724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.699756927977162, "step_time": 0.8352751636505127} +{"epoch": 0, "iter": 13827, "iter_tflops": 55.69014865228885, "iter_time": 0.370462173461914, "loss": 0.02097110077738762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.93565199400968, "step_time": 0.3385718021392823} +{"epoch": 0, "iter": 13828, "iter_tflops": 55.82084024449694, "iter_time": 0.36959482192993165, "loss": 0.024674467742443085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.532272386081445, "step_time": 0.3408280029296875} +{"epoch": 0, "iter": 13829, "iter_tflops": 31.088676661372702, "iter_time": 0.6382834243774413, "loss": 0.05476739630103111, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 33.3499480141895, "step_time": 0.5950050354003906} +{"epoch": 0, "iter": 13830, "iter_tflops": 22.63521916503182, "iter_time": 0.876659812927246, "loss": 0.03423875942826271, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 28.150763799228137, "step_time": 0.70489693069458} +{"epoch": 0, "iter": 13831, "iter_tflops": 46.85703039064699, "iter_time": 0.4234879341125488, "loss": 0.07651950418949127, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 50.79810262609337, "step_time": 0.39063244438171385} +{"epoch": 0, "iter": 13832, "iter_tflops": 49.3980531669457, "iter_time": 0.40170382690429685, "loss": 0.043795522302389145, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 53.676071454205506, "step_time": 0.3696877670288086} +{"epoch": 0, "iter": 13833, "iter_tflops": 30.29564142849409, "iter_time": 0.6809921340942383, "loss": 0.18790078163146973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.251507694092595, "step_time": 0.6396939239501953} +{"epoch": 0, "iter": 13834, "iter_tflops": 15.588512331228493, "iter_time": 1.3234805908203122, "loss": 0.21692778170108795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.139252044604795, "step_time": 1.2037335968017577} +{"epoch": 0, "iter": 13835, "iter_tflops": 34.022904961090106, "iter_time": 0.6063883590698241, "loss": 0.21085475385189056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.0671551217584, "step_time": 0.49043234443664546} +{"epoch": 0, "iter": 13836, "iter_tflops": 38.28179809815176, "iter_time": 0.5389269714355469, "loss": 0.24285610020160675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95409282315882, "step_time": 0.49175401306152344} +{"epoch": 0, "iter": 13837, "iter_tflops": 31.138419905227487, "iter_time": 0.6625607070922851, "loss": 0.6042621731758118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.862235105220606, "step_time": 0.6092655563354492} +{"epoch": 0, "iter": 13838, "iter_tflops": 11.639919459934589, "iter_time": 1.7724429779052737, "loss": 0.48377013206481934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.17888223534878, "step_time": 1.565466110229492} +{"epoch": 0, "iter": 13839, "iter_tflops": 13.355479715553251, "iter_time": 1.5447661895751952, "loss": 0.5600253343582153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.654558580403323, "step_time": 1.3178968544006349} +{"epoch": 0, "iter": 13840, "iter_tflops": 25.490318254262213, "iter_time": 0.8093697891235353, "loss": 0.7071465849876404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.714813600138143, "step_time": 0.6716984767913818} +{"epoch": 0, "iter": 13841, "iter_tflops": 18.773187587777404, "iter_time": 0.8311233978271485, "loss": 0.4989144504070282, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 20.522479591504517, "step_time": 0.7602802276611328} +{"epoch": 0, "iter": 13842, "iter_tflops": 23.576480641400313, "iter_time": 0.661796630859375, "loss": 0.3845987319946289, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.417320447886016, "step_time": 0.6138662605285644} +{"epoch": 0, "iter": 13843, "iter_tflops": 24.99087996027884, "iter_time": 0.624341178894043, "loss": 0.46194884181022644, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.8179117414654, "step_time": 0.5818065032958984} +{"epoch": 0, "iter": 13844, "iter_tflops": 23.92186240886816, "iter_time": 0.6522416687011718, "loss": 0.36760213971138, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.71546519773873, "step_time": 0.6067491035461426} +{"epoch": 0, "iter": 13845, "iter_tflops": 18.99028090750965, "iter_time": 1.0864027557373048, "loss": 0.013732608407735825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.092512032743844, "step_time": 1.0268050842285157} +{"epoch": 0, "iter": 13846, "iter_tflops": 8.471545149989163, "iter_time": 2.435340087890625, "loss": 0.012439829297363758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.35876575617586, "step_time": 2.2044673461914064} +{"epoch": 0, "iter": 13847, "iter_tflops": 15.211547347171232, "iter_time": 1.3562784271240234, "loss": 0.37133902311325073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.533064476120053, "step_time": 1.1766963806152344} +{"epoch": 0, "iter": 13848, "iter_tflops": 46.885570975900485, "iter_time": 0.44003076171875005, "loss": 0.4005265533924103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.973752203498755, "step_time": 0.40473954963684083} +{"epoch": 0, "iter": 13849, "iter_tflops": 16.184283877351938, "iter_time": 0.9691315231323241, "loss": 0.4485827386379242, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.885697357114033, "step_time": 0.9288748550415038} +{"epoch": 0, "iter": 13850, "iter_tflops": 10.032316751376824, "iter_time": 1.5634175109863282, "loss": 0.29078593850135803, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 11.91163513702613, "step_time": 1.3167545433044432} +{"epoch": 0, "iter": 13851, "iter_tflops": 23.529221822776936, "iter_time": 0.6666051177978516, "loss": 0.44709473848342896, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.35161693758102, "step_time": 0.6186863632202148} +{"epoch": 0, "iter": 13852, "iter_tflops": 23.436083169366466, "iter_time": 0.6692543106079102, "loss": 0.419635146856308, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.17736005363448, "step_time": 0.6229683990478515} +{"epoch": 0, "iter": 13853, "iter_tflops": 25.840906726390777, "iter_time": 0.7983889160156251, "loss": 0.08520293235778809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.391882084026587, "step_time": 0.7266546630859376} +{"epoch": 0, "iter": 13854, "iter_tflops": 23.749894287217348, "iter_time": 0.8686814880371094, "loss": 0.052390653640031815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.491967481545533, "step_time": 0.69954958152771} +{"epoch": 0, "iter": 13855, "iter_tflops": 49.90249770986712, "iter_time": 0.41342807388305663, "loss": 0.025586163625121117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.72403185922655, "step_time": 0.3770024394989014} +{"epoch": 0, "iter": 13856, "iter_tflops": 56.659619835247206, "iter_time": 0.36412340164184576, "loss": 0.05872118100523949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.9517242155765, "step_time": 0.3330188751220703} +{"epoch": 0, "iter": 13857, "iter_tflops": 46.94693456263651, "iter_time": 0.43945560455322263, "loss": 0.006495589856058359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42917553902067, "step_time": 0.40115543937683107} +{"epoch": 0, "iter": 13858, "iter_tflops": 40.901526377797566, "iter_time": 0.5044088897705078, "loss": 0.0011955219088122249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.11945059747355, "step_time": 0.4473404006958008} +{"epoch": 0, "iter": 13859, "iter_tflops": 44.902180322279364, "iter_time": 0.45946752166748045, "loss": 0.000892935146111995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77117075025702, "step_time": 0.41451895141601564} +{"epoch": 0, "iter": 13860, "iter_tflops": 50.60911037720638, "iter_time": 0.4076557235717773, "loss": 0.0030218237079679966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.16787433662898, "step_time": 0.36731127452850343} +{"epoch": 0, "iter": 13861, "iter_tflops": 19.682434753075604, "iter_time": 1.0481982421875, "loss": 0.6145639419555664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.78925662397331, "step_time": 0.9923920745849608} +{"epoch": 0, "iter": 13862, "iter_tflops": 20.756236647536987, "iter_time": 0.993970817565918, "loss": 0.3306407928466797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.198693419693768, "step_time": 0.8187366371154784} +{"epoch": 0, "iter": 13863, "iter_tflops": 49.955806088956045, "iter_time": 0.41298690032958985, "loss": 0.6092451214790344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.20533851084735, "step_time": 0.38060999298095705} +{"epoch": 0, "iter": 13864, "iter_tflops": 48.526805961799866, "iter_time": 0.4251483917236328, "loss": 0.5126468539237976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.667311074200626, "step_time": 0.3917248306274414} +{"epoch": 0, "iter": 13865, "iter_tflops": 51.459364222831816, "iter_time": 0.40092010116577137, "loss": 0.003347446909174323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.972189639252385, "step_time": 0.36212569046020504} +{"epoch": 0, "iter": 13866, "iter_tflops": 51.17090842223339, "iter_time": 0.4031801300048828, "loss": 0.0036469735205173492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.43284647319023, "step_time": 0.36558661842346196} +{"epoch": 0, "iter": 13867, "iter_tflops": 55.275909610172874, "iter_time": 0.3732384262084961, "loss": 0.0018015513196587563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.63645262860545, "step_time": 0.34024242210388184} +{"epoch": 0, "iter": 13868, "iter_tflops": 52.94586897382234, "iter_time": 0.3896638946533203, "loss": 0.001652090111747384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.849913230203036, "step_time": 0.35663136482238766} +{"epoch": 0, "iter": 13869, "iter_tflops": 41.87208470426317, "iter_time": 0.49271713256835936, "loss": 0.14822497963905334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.473770156245585, "step_time": 0.45369217109680177} +{"epoch": 0, "iter": 13870, "iter_tflops": 36.91638858446238, "iter_time": 0.5588600158691406, "loss": 0.07441689074039459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.22136647650113, "step_time": 0.500495138168335} +{"epoch": 0, "iter": 13871, "iter_tflops": 37.76602397425832, "iter_time": 0.5462871475219726, "loss": 0.12671233713626862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.377119077693195, "step_time": 0.49861116409301753} +{"epoch": 0, "iter": 13872, "iter_tflops": 41.94619136874196, "iter_time": 0.4918466453552246, "loss": 0.09139718115329742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.09848669819362, "step_time": 0.4475438346862793} +{"epoch": 0, "iter": 13873, "iter_tflops": 32.48913463289529, "iter_time": 0.6350151748657226, "loss": 0.17691998183727264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.304107863355505, "step_time": 0.5843822364807129} +{"epoch": 0, "iter": 13874, "iter_tflops": 48.502840923265836, "iter_time": 0.425358455657959, "loss": 0.21776153147220612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.93384682797186, "step_time": 0.38975239372253423} +{"epoch": 0, "iter": 13875, "iter_tflops": 46.430638050650494, "iter_time": 0.44434223556518554, "loss": 0.21253815293312073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.472224522884915, "step_time": 0.4087613277435303} +{"epoch": 0, "iter": 13876, "iter_tflops": 49.61941424308853, "iter_time": 0.4157867202758789, "loss": 0.2247667908668518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.740152527256534, "step_time": 0.3839046325683594} +{"epoch": 0, "iter": 13877, "iter_tflops": 30.133108858132687, "iter_time": 0.6846652832031249, "loss": 0.6358264684677124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.06352607636265, "step_time": 0.6434443130493164} +{"epoch": 0, "iter": 13878, "iter_tflops": 21.263115244641767, "iter_time": 0.9702761459350587, "loss": 0.6291343569755554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.75399517499897, "step_time": 0.8685315189361573} +{"epoch": 0, "iter": 13879, "iter_tflops": 41.99316906522411, "iter_time": 0.4912964172363281, "loss": 0.620215892791748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14179226623406, "step_time": 0.4570286750793457} +{"epoch": 0, "iter": 13880, "iter_tflops": 45.95374529997535, "iter_time": 0.4489534721374511, "loss": 0.5752459764480591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.172115385355134, "step_time": 0.41956896400451665} +{"epoch": 0, "iter": 13881, "iter_tflops": 33.31888681972941, "iter_time": 0.6192011642456055, "loss": 0.21255485713481903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.506387867212794, "step_time": 0.5810530090332031} +{"epoch": 0, "iter": 13882, "iter_tflops": 11.9540371987347, "iter_time": 1.7258682708740234, "loss": 0.24425376951694489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.846843343219073, "step_time": 1.301905563354492} +{"epoch": 0, "iter": 13883, "iter_tflops": 37.658633443399395, "iter_time": 0.547844985961914, "loss": 0.246312215924263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10391662015025, "step_time": 0.5019252471923829} +{"epoch": 0, "iter": 13884, "iter_tflops": 47.312126262244085, "iter_time": 0.4360635452270507, "loss": 0.2548659145832062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.259522957882965, "step_time": 0.40248313522338874} +{"epoch": 0, "iter": 13885, "iter_tflops": 30.382340650987217, "iter_time": 0.6790488510131836, "loss": 0.0036209712270647287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.336621897502134, "step_time": 0.6380101661682129} +{"epoch": 0, "iter": 13886, "iter_tflops": 16.628558644387578, "iter_time": 1.2407024536132814, "loss": 0.01296247262507677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.894915971676436, "step_time": 1.0370032997131347} +{"epoch": 0, "iter": 13887, "iter_tflops": 56.38400837273396, "iter_time": 0.3659032783508301, "loss": 0.005526652559638023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.311606112269175, "step_time": 0.33109551811218263} +{"epoch": 0, "iter": 13888, "iter_tflops": 58.19250877310546, "iter_time": 0.35453177642822264, "loss": 0.009432647377252579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.570096542161195, "step_time": 0.3245408554077148} +{"epoch": 0, "iter": 13889, "iter_tflops": 33.108632556609194, "iter_time": 0.6231333618164063, "loss": 0.22792717814445496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.199789237509734, "step_time": 0.5861141204833984} +{"epoch": 0, "iter": 13890, "iter_tflops": 10.979738238755662, "iter_time": 1.8790150604248046, "loss": 0.3058253824710846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.469894825778272, "step_time": 1.4257942962646484} +{"epoch": 0, "iter": 13891, "iter_tflops": 38.47733637262163, "iter_time": 0.5361881942749023, "loss": 0.3648751676082611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30693315907146, "step_time": 0.48765277862548834} +{"epoch": 0, "iter": 13892, "iter_tflops": 43.355063814164865, "iter_time": 0.475863525390625, "loss": 0.3262091875076294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.4155317575727, "step_time": 0.4351125621795654} +{"epoch": 0, "iter": 13893, "iter_tflops": 14.618027403677019, "iter_time": 1.4113459320068358, "loss": 0.5949055552482605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.566885939033725, "step_time": 1.3253192443847657} +{"epoch": 0, "iter": 13894, "iter_tflops": 19.81089477660436, "iter_time": 1.041401397705078, "loss": 0.5759022831916809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.827794199818978, "step_time": 0.8658415184020998} +{"epoch": 0, "iter": 13895, "iter_tflops": 36.36083510829542, "iter_time": 0.567398780822754, "loss": 0.5646666288375854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.88119123675224, "step_time": 0.5173138732910156} +{"epoch": 0, "iter": 13896, "iter_tflops": 37.623069554303335, "iter_time": 0.5483628463745117, "loss": 0.554746150970459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.86986120416796, "step_time": 0.504799695968628} +{"epoch": 0, "iter": 13897, "iter_tflops": 19.466615853153815, "iter_time": 1.0598192138671876, "loss": 0.4517918825149536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.977946649521993, "step_time": 0.9834658203125001} +{"epoch": 0, "iter": 13898, "iter_tflops": 15.434738174117554, "iter_time": 1.336666244506836, "loss": 0.48573625087738037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.87792464461204, "step_time": 1.0378897132873535} +{"epoch": 0, "iter": 13899, "iter_tflops": 40.66241240433938, "iter_time": 0.5073750495910645, "loss": 0.4541814625263214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.26854659046106, "step_time": 0.4660440673828125} +{"epoch": 0, "iter": 13900, "iter_tflops": 42.079912745350114, "iter_time": 0.4902836570739746, "loss": 0.45890793204307556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.060307238739796, "step_time": 0.44791480445861814} +{"epoch": 0, "iter": 13901, "iter_tflops": 17.50454320174311, "iter_time": 1.1786136474609377, "loss": 0.5544112324714661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.529698190936717, "step_time": 1.113406883239746} +{"epoch": 0, "iter": 13902, "iter_tflops": 24.112533084565527, "iter_time": 0.8556170120239257, "loss": 0.5888855457305908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.73679430542252, "step_time": 0.6302111721038818} +{"epoch": 0, "iter": 13903, "iter_tflops": 47.01035125026846, "iter_time": 0.43886278152465824, "loss": 0.6673948764801025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.947733336646664, "step_time": 0.4049462490081787} +{"epoch": 0, "iter": 13904, "iter_tflops": 44.75893807973567, "iter_time": 0.4609379577636719, "loss": 0.4655711054801941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.60346417508241, "step_time": 0.42447784042358394} +{"epoch": 0, "iter": 13905, "iter_tflops": 24.46067546291369, "iter_time": 0.8434392395019531, "loss": 0.5505282878875732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.74946630660661, "step_time": 0.80122412109375} +{"epoch": 0, "iter": 13906, "iter_tflops": 8.410424853063805, "iter_time": 2.4530382080078126, "loss": 0.6402049660682678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.83075168693323, "step_time": 2.0986282806396486} +{"epoch": 0, "iter": 13907, "iter_tflops": 15.692899987424335, "iter_time": 1.31467692565918, "loss": 0.6961643099784851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.432296383333064, "step_time": 1.1834983215332031} +{"epoch": 0, "iter": 13908, "iter_tflops": 27.37402747743234, "iter_time": 0.7536740264892577, "loss": 0.7096824049949646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.929838476299466, "step_time": 0.5299557952880859} +{"epoch": 0, "iter": 13909, "iter_tflops": 12.827697323682443, "iter_time": 1.241875701904297, "loss": 0.40265119075775146, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 13.697612170777715, "step_time": 1.1630060348510742} +{"epoch": 0, "iter": 13910, "iter_tflops": 15.250970417506357, "iter_time": 1.04455029296875, "loss": 0.29100412130355835, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.319274707549955, "step_time": 0.8695980529785156} +{"epoch": 0, "iter": 13911, "iter_tflops": 27.703987970253568, "iter_time": 0.5750221099853516, "loss": 0.32670289278030396, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.60038889244218, "step_time": 0.5381823081970214} +{"epoch": 0, "iter": 13912, "iter_tflops": 27.735802928997437, "iter_time": 0.5743625183105467, "loss": 0.4466877579689026, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.547587608266628, "step_time": 0.5391440353393555} +{"epoch": 0, "iter": 13913, "iter_tflops": 32.66851398722424, "iter_time": 0.6315283737182618, "loss": 0.15187351405620575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.9728370322193, "step_time": 0.5899176406860351} +{"epoch": 0, "iter": 13914, "iter_tflops": 11.600577364955283, "iter_time": 1.7784540252685548, "loss": 0.08723612129688263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.004937986840334, "step_time": 1.4731299438476562} +{"epoch": 0, "iter": 13915, "iter_tflops": 37.08586816017389, "iter_time": 0.5563060684204102, "loss": 0.11507334560155869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33012432658894, "step_time": 0.499178113937378} +{"epoch": 0, "iter": 13916, "iter_tflops": 42.07143560351411, "iter_time": 0.49038244628906247, "loss": 0.09558255225419998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57068657040701, "step_time": 0.4430059986114501} +{"epoch": 0, "iter": 13917, "iter_tflops": 16.473117404727827, "iter_time": 1.2524097900390627, "loss": 0.19288286566734314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.593929486052566, "step_time": 1.1726256790161134} +{"epoch": 0, "iter": 13918, "iter_tflops": 18.595546290796783, "iter_time": 1.1094642333984375, "loss": 0.1032693088054657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.436533624826836, "step_time": 0.9195312366485595} +{"epoch": 0, "iter": 13919, "iter_tflops": 39.020735267129176, "iter_time": 0.5287212905883789, "loss": 0.11821175366640091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.675317894101, "step_time": 0.48344322967529296} +{"epoch": 0, "iter": 13920, "iter_tflops": 41.00202909562497, "iter_time": 0.5031725006103516, "loss": 0.16495096683502197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.786708279837235, "step_time": 0.4606521511077881} +{"epoch": 0, "iter": 13921, "iter_tflops": 31.356520334601232, "iter_time": 0.657952262878418, "loss": 0.1058955043554306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.097067139313985, "step_time": 0.5878295593261719} +{"epoch": 0, "iter": 13922, "iter_tflops": 37.01777308124754, "iter_time": 0.5573294067382812, "loss": 0.08042749017477036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.199606976096426, "step_time": 0.5007594738006591} +{"epoch": 0, "iter": 13923, "iter_tflops": 38.544082286353174, "iter_time": 0.5352596893310547, "loss": 0.11066213250160217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53305846533759, "step_time": 0.48506019210815426} +{"epoch": 0, "iter": 13924, "iter_tflops": 46.257078772637975, "iter_time": 0.4460094337463379, "loss": 0.10045478492975235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.984612978681916, "step_time": 0.4046533317565918} +{"epoch": 0, "iter": 13925, "iter_tflops": 13.113506168973244, "iter_time": 1.5732705841064454, "loss": 0.6393482089042664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.833728143872456, "step_time": 1.4913617858886719} +{"epoch": 0, "iter": 13926, "iter_tflops": 21.026723557216712, "iter_time": 0.9811844177246094, "loss": 0.6378763914108276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.783593805678663, "step_time": 0.800163610458374} +{"epoch": 0, "iter": 13927, "iter_tflops": 44.53398147380788, "iter_time": 0.4632663154602051, "loss": 0.8325090408325195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.103716779759274, "step_time": 0.4288877220153809} +{"epoch": 0, "iter": 13928, "iter_tflops": 45.05703906501126, "iter_time": 0.45788835525512694, "loss": 0.7342183589935303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.5694921662778, "step_time": 0.4247747421264649} +{"epoch": 0, "iter": 13929, "iter_tflops": 32.928581386640744, "iter_time": 0.6265406112670898, "loss": 0.0347268283367157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.090657291733734, "step_time": 0.5879369354248047} +{"epoch": 0, "iter": 13930, "iter_tflops": 14.404724986904379, "iter_time": 1.4322448730468749, "loss": 0.06384953111410141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.012101673943064, "step_time": 1.1454017906188965} +{"epoch": 0, "iter": 13931, "iter_tflops": 47.090555859924194, "iter_time": 0.43811531066894527, "loss": 0.06179822236299515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.11309917145256, "step_time": 0.40363612937927246} +{"epoch": 0, "iter": 13932, "iter_tflops": 54.41691214927429, "iter_time": 0.37913017654418946, "loss": 0.04442445561289787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.280494174361046, "step_time": 0.3480249919891357} +{"epoch": 0, "iter": 13933, "iter_tflops": 32.351606494534025, "iter_time": 0.6377146530151367, "loss": 0.49019184708595276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.616293620159375, "step_time": 0.5959937171936035} +{"epoch": 0, "iter": 13934, "iter_tflops": 13.209845308900338, "iter_time": 1.5617967529296872, "loss": 0.61750727891922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.805203148334762, "step_time": 1.0970949554443359} +{"epoch": 0, "iter": 13935, "iter_tflops": 40.49921683861006, "iter_time": 0.5094195671081543, "loss": 0.5263371467590332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.749482180581424, "step_time": 0.4509579677581787} +{"epoch": 0, "iter": 13936, "iter_tflops": 50.43321178557597, "iter_time": 0.4090775260925293, "loss": 0.5689831376075745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.59440836790287, "step_time": 0.37789755630493166} +{"epoch": 0, "iter": 13937, "iter_tflops": 38.82754485171291, "iter_time": 0.5313519973754882, "loss": 0.744057297706604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.757776113852, "step_time": 0.4940659065246582} +{"epoch": 0, "iter": 13938, "iter_tflops": 11.11102966288228, "iter_time": 1.8568120269775392, "loss": 0.6354014873504639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.070079058077615, "step_time": 1.4663097076416016} +{"epoch": 0, "iter": 13939, "iter_tflops": 10.792540881600008, "iter_time": 1.911606704711914, "loss": 0.7680668234825134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.387173785435998, "step_time": 1.5411089630126953} +{"epoch": 0, "iter": 13940, "iter_tflops": 23.269122715215055, "iter_time": 0.8866296234130859, "loss": 0.46242308616638184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.652140248200585, "step_time": 0.8042640228271485} +{"epoch": 0, "iter": 13941, "iter_tflops": 19.953126171686478, "iter_time": 0.781974479675293, "loss": 0.28103312849998474, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 21.773107717782665, "step_time": 0.7166104011535644} +{"epoch": 0, "iter": 13942, "iter_tflops": 25.54742914326082, "iter_time": 0.6107399444580078, "loss": 0.25255006551742554, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.189874084199367, "step_time": 0.5738472862243652} +{"epoch": 0, "iter": 13943, "iter_tflops": 28.90347981317477, "iter_time": 0.5398255004882813, "loss": 0.2894752621650696, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.756230152691327, "step_time": 0.5073064994812011} +{"epoch": 0, "iter": 13944, "iter_tflops": 29.21605640115726, "iter_time": 0.5340500183105469, "loss": 0.5168237686157227, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 31.054793640313203, "step_time": 0.5024292106628419} +{"epoch": 0, "iter": 13945, "iter_tflops": 22.494600233885127, "iter_time": 0.731880142211914, "loss": 0.11800392717123032, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 23.905223569783566, "step_time": 0.6886926269531248} +{"epoch": 0, "iter": 13946, "iter_tflops": 11.464498532373986, "iter_time": 1.4360289001464845, "loss": 0.11105635017156601, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 13.848780901956827, "step_time": 1.1887942581176756} +{"epoch": 0, "iter": 13947, "iter_tflops": 39.13039747450641, "iter_time": 0.4207304878234864, "loss": 0.2295374870300293, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 42.51465265820023, "step_time": 0.3872394618988037} +{"epoch": 0, "iter": 13948, "iter_tflops": 40.901820710354755, "iter_time": 0.4025090065002441, "loss": 0.16970297694206238, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 44.30568765975853, "step_time": 0.37158550262451173} +{"epoch": 0, "iter": 13949, "iter_tflops": 39.43061520925719, "iter_time": 0.5232252502441406, "loss": 0.005300781223922968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55187684128598, "step_time": 0.4848456764221191} +{"epoch": 0, "iter": 13950, "iter_tflops": 9.061668031070033, "iter_time": 2.276743469238281, "loss": 0.02352195791900158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.962521668556946, "step_time": 1.8819660415649413} +{"epoch": 0, "iter": 13951, "iter_tflops": 11.813982394938314, "iter_time": 1.7463284454345704, "loss": 0.01946130581200123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.077173804422344, "step_time": 1.283253746032715} +{"epoch": 0, "iter": 13952, "iter_tflops": 26.517381217111232, "iter_time": 0.7780215301513673, "loss": 0.004893783945590258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.36527706904371, "step_time": 0.6183402423858644} +{"epoch": 0, "iter": 13953, "iter_tflops": 16.92361668472645, "iter_time": 0.9050346374511719, "loss": 0.4959598183631897, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 17.734743485759928, "step_time": 0.8636414337158203} +{"epoch": 0, "iter": 13954, "iter_tflops": 12.719650845292252, "iter_time": 1.204157211303711, "loss": 0.5568364858627319, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 15.40566019289062, "step_time": 0.9942098617553711} +{"epoch": 0, "iter": 13955, "iter_tflops": 25.79476313124543, "iter_time": 0.5937817382812499, "loss": 0.34875619411468506, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 27.7538776854395, "step_time": 0.5518673629760742} +{"epoch": 0, "iter": 13956, "iter_tflops": 27.00375317796823, "iter_time": 0.5671974258422852, "loss": 0.48533371090888977, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.68811245245042, "step_time": 0.533895679473877} +{"epoch": 0, "iter": 13957, "iter_tflops": 44.06519242802024, "iter_time": 0.4681947898864746, "loss": 0.47066375613212585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.04490521762584, "step_time": 0.42941272163391114} +{"epoch": 0, "iter": 13958, "iter_tflops": 47.743539310102825, "iter_time": 0.4321232528686524, "loss": 0.4369845688343048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.84077338370595, "step_time": 0.3979704036712646} +{"epoch": 0, "iter": 13959, "iter_tflops": 51.288836895509476, "iter_time": 0.4022530975341797, "loss": 0.3754708766937256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43820235165029, "step_time": 0.372145788192749} +{"epoch": 0, "iter": 13960, "iter_tflops": 46.045755464110755, "iter_time": 0.4480563583374023, "loss": 0.45584768056869507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0418060238526, "step_time": 0.41227715682983396} +{"epoch": 0, "iter": 13961, "iter_tflops": 38.50753672532065, "iter_time": 0.5357676773071289, "loss": 0.32802143692970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58734035939635, "step_time": 0.49609071731567383} +{"epoch": 0, "iter": 13962, "iter_tflops": 37.73515567477893, "iter_time": 0.5467340240478517, "loss": 0.47705578804016113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34194560422637, "step_time": 0.4990353794097901} +{"epoch": 0, "iter": 13963, "iter_tflops": 39.720729256256334, "iter_time": 0.5194036941528319, "loss": 0.3862128257751465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42515599989887, "step_time": 0.47509543800354004} +{"epoch": 0, "iter": 13964, "iter_tflops": 37.45151741086035, "iter_time": 0.5508747024536133, "loss": 0.3776637315750122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.02516941866419, "step_time": 0.5028886852264405} +{"epoch": 0, "iter": 13965, "iter_tflops": 30.761371601391776, "iter_time": 0.6706818466186524, "loss": 0.6455441117286682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.63486708814912, "step_time": 0.6133841247558594} +{"epoch": 0, "iter": 13966, "iter_tflops": 35.66998129020875, "iter_time": 0.5783881225585938, "loss": 0.46846073865890503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80577665086165, "step_time": 0.5316500606536865} +{"epoch": 0, "iter": 13967, "iter_tflops": 36.401235191601955, "iter_time": 0.5667690505981445, "loss": 0.5847503542900085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.36786301180117, "step_time": 0.524059268951416} +{"epoch": 0, "iter": 13968, "iter_tflops": 36.33125182383327, "iter_time": 0.5678607940673828, "loss": 0.7848529815673828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53858833615092, "step_time": 0.5217964115142822} +{"epoch": 0, "iter": 13969, "iter_tflops": 17.704844179877284, "iter_time": 1.165279586791992, "loss": 0.5646390914916992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.833401432524823, "step_time": 1.095452331542969} +{"epoch": 0, "iter": 13970, "iter_tflops": 21.59730843494238, "iter_time": 0.9552622528076171, "loss": 0.5531312823295593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.074836273676024, "step_time": 0.7620025215148926} +{"epoch": 0, "iter": 13971, "iter_tflops": 40.25433903078324, "iter_time": 0.5125185012817383, "loss": 0.47222280502319336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84191469366004, "step_time": 0.4705792083740235} +{"epoch": 0, "iter": 13972, "iter_tflops": 39.85383298070158, "iter_time": 0.5176689910888672, "loss": 0.5700386762619019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23810442884619, "step_time": 0.4771507396697998} +{"epoch": 0, "iter": 13973, "iter_tflops": 18.424908626003248, "iter_time": 1.1197392578125, "loss": 0.2770613729953766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.789369515205546, "step_time": 1.042534149169922} +{"epoch": 0, "iter": 13974, "iter_tflops": 24.401965735167828, "iter_time": 0.845468505859375, "loss": 0.2552777826786041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.28968819614224, "step_time": 0.6811259784698487} +{"epoch": 0, "iter": 13975, "iter_tflops": 36.54202228604983, "iter_time": 0.564585433959961, "loss": 0.30060964822769165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9324833396009, "step_time": 0.516649398803711} +{"epoch": 0, "iter": 13976, "iter_tflops": 39.76736410953935, "iter_time": 0.5187945938110352, "loss": 0.36015084385871887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47034049267324, "step_time": 0.47460160827636727} +{"epoch": 0, "iter": 13977, "iter_tflops": 32.87787130515077, "iter_time": 0.6275069732666015, "loss": 0.05967961251735687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.81460386338011, "step_time": 0.5604051475524903} +{"epoch": 0, "iter": 13978, "iter_tflops": 39.40498020521001, "iter_time": 0.5235656356811524, "loss": 0.056762829422950745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19565046222936, "step_time": 0.4776196975708008} +{"epoch": 0, "iter": 13979, "iter_tflops": 38.61327543550007, "iter_time": 0.5343005294799804, "loss": 0.06747190654277802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38836734201808, "step_time": 0.4867159271240235} +{"epoch": 0, "iter": 13980, "iter_tflops": 41.94506380842716, "iter_time": 0.49185986709594726, "loss": 0.1288621723651886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21343106025877, "step_time": 0.4464306812286377} +{"epoch": 0, "iter": 13981, "iter_tflops": 22.01007996451763, "iter_time": 0.9373475036621094, "loss": 0.5973798632621765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.122805354395407, "step_time": 0.8552526626586914} +{"epoch": 0, "iter": 13982, "iter_tflops": 21.299652204034878, "iter_time": 0.9686117553710938, "loss": 0.5244287252426147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85832952049033, "step_time": 0.8299468994140625} +{"epoch": 0, "iter": 13983, "iter_tflops": 36.17110701893646, "iter_time": 0.5703749542236328, "loss": 0.5442456603050232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.189669660929475, "step_time": 0.5264421386718751} +{"epoch": 0, "iter": 13984, "iter_tflops": 42.79161135454025, "iter_time": 0.4821293907165527, "loss": 0.7225795984268188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.069814042613885, "step_time": 0.44782237434387206} +{"epoch": 0, "iter": 13985, "iter_tflops": 31.72436561018249, "iter_time": 0.6503232803344726, "loss": 0.0770120769739151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.84764386704882, "step_time": 0.6095282020568847} +{"epoch": 0, "iter": 13986, "iter_tflops": 15.125209070037663, "iter_time": 1.3640203857421878, "loss": 0.14398567378520966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.73697187665946, "step_time": 1.1010900611877443} +{"epoch": 0, "iter": 13987, "iter_tflops": 38.23620796790151, "iter_time": 0.5395695495605468, "loss": 0.0971449688076973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.044777323373005, "step_time": 0.49069337081909176} +{"epoch": 0, "iter": 13988, "iter_tflops": 42.9087810477704, "iter_time": 0.4808128547668457, "loss": 0.11195783317089081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.15047324835562, "step_time": 0.43755856704711915} +{"epoch": 0, "iter": 13989, "iter_tflops": 1.2144598224171628, "iter_time": 1.0965288848876953, "loss": 2.0709445476531982, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 1.3180159199801862, "step_time": 1.0103749542236329} +{"epoch": 0, "iter": 13990, "iter_tflops": 1.1924307855542486, "iter_time": 1.1167862243652344, "loss": 1.9692652225494385, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 1.480419903946473, "step_time": 0.8995355110168456} +{"epoch": 0, "iter": 13991, "iter_tflops": 2.5875231563277046, "iter_time": 0.5146583023071288, "loss": 1.7072384357452393, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 2.845081186869355, "step_time": 0.46806758308410645} +{"epoch": 0, "iter": 13992, "iter_tflops": 2.685974544042368, "iter_time": 0.4957940788269043, "loss": 1.744093656539917, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 2.9447364560994704, "step_time": 0.45222731971740726} +{"epoch": 0, "iter": 13993, "iter_tflops": 17.61138350062859, "iter_time": 1.1714635314941406, "loss": 0.8272507190704346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.115212746418173, "step_time": 1.0793023223876952} +{"epoch": 0, "iter": 13994, "iter_tflops": 14.250899216712382, "iter_time": 1.4477046813964844, "loss": 0.7060722708702087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.185690927202106, "step_time": 1.1344685001373291} +{"epoch": 0, "iter": 13995, "iter_tflops": 43.55740351001163, "iter_time": 0.47365296936035156, "loss": 0.6955286264419556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8953530184408, "step_time": 0.4399389743804931} +{"epoch": 0, "iter": 13996, "iter_tflops": 45.077777834000116, "iter_time": 0.45767769622802734, "loss": 0.6373025178909302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.48099592053126, "step_time": 0.42555011749267574} +{"epoch": 0, "iter": 13997, "iter_tflops": 32.618273396832, "iter_time": 0.632501091003418, "loss": 0.7125245332717896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.88511744653856, "step_time": 0.5914010047912598} +{"epoch": 0, "iter": 13998, "iter_tflops": 19.08543265168362, "iter_time": 1.0809864196777346, "loss": 0.5634843111038208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.595645433424583, "step_time": 0.9130561714172364} +{"epoch": 0, "iter": 13999, "iter_tflops": 39.87716913667582, "iter_time": 0.5173660507202149, "loss": 0.6727498769760132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80839297884447, "step_time": 0.4819403877258301} +{"epoch": 0, "iter": 14000, "iter_tflops": 42.18286854816796, "iter_time": 0.48908702087402345, "loss": 0.619306206703186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41205136502892, "step_time": 0.4543087768554687} +{"epoch": 0, "iter": 14001, "iter_tflops": 26.179337421668162, "iter_time": 0.7880678253173828, "loss": 0.19424733519554138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.581161562243985, "step_time": 0.7480139465332031} +{"epoch": 0, "iter": 14002, "iter_tflops": 24.59086944366464, "iter_time": 0.838973731994629, "loss": 0.32608655095100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.52345970513886, "step_time": 0.7495821285247803} +{"epoch": 0, "iter": 14003, "iter_tflops": 50.33560586736874, "iter_time": 0.4098707695007324, "loss": 0.19202136993408203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.74974645940047, "step_time": 0.3768253707885742} +{"epoch": 0, "iter": 14004, "iter_tflops": 49.452097583335075, "iter_time": 0.41719349670410155, "loss": 0.2891139090061188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.229270086648256, "step_time": 0.38758926200866695} +{"epoch": 0, "iter": 14005, "iter_tflops": 28.112635861873173, "iter_time": 0.7338726119995117, "loss": 0.014756672084331512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.735477527226195, "step_time": 0.6938208236694335} +{"epoch": 0, "iter": 14006, "iter_tflops": 8.950055203645283, "iter_time": 2.3051358947753906, "loss": 0.004552584141492844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.283774722916949, "step_time": 1.828385803222656} +{"epoch": 0, "iter": 14007, "iter_tflops": 17.004199857306386, "iter_time": 1.2132939910888672, "loss": 0.014461664482951164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.01100287255352, "step_time": 1.085218578338623} +{"epoch": 0, "iter": 14008, "iter_tflops": 15.206092249911832, "iter_time": 1.3567649841308593, "loss": 0.006158510223031044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.33164453793341, "step_time": 1.0147282218933105} +{"epoch": 0, "iter": 14009, "iter_tflops": 19.296164672257667, "iter_time": 0.8510669021606444, "loss": 0.3610840141773224, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 20.27240498172416, "step_time": 0.8100828247070312} +{"epoch": 0, "iter": 14010, "iter_tflops": 12.834536546941186, "iter_time": 1.2795418853759766, "loss": 0.40360891819000244, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.13412962279451, "step_time": 1.0851187019348143} +{"epoch": 0, "iter": 14011, "iter_tflops": 24.818307901862788, "iter_time": 0.6617021255493163, "loss": 0.338570773601532, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.743540739149545, "step_time": 0.6140670471191406} +{"epoch": 0, "iter": 14012, "iter_tflops": 24.80048245039977, "iter_time": 0.6621777267456054, "loss": 0.2912239730358124, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.637950699894063, "step_time": 0.6165011444091797} +{"epoch": 0, "iter": 14013, "iter_tflops": 35.65574119092279, "iter_time": 0.5786191177368164, "loss": 0.009928472340106964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.961264653279144, "step_time": 0.5162772922515869} +{"epoch": 0, "iter": 14014, "iter_tflops": 41.55788842900561, "iter_time": 0.4964422950744629, "loss": 0.010361520573496819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.157703193678046, "step_time": 0.44696967315673825} +{"epoch": 0, "iter": 14015, "iter_tflops": 45.058217391934406, "iter_time": 0.45787638092041016, "loss": 0.00621666619554162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.650745698394196, "step_time": 0.4155243434906006} +{"epoch": 0, "iter": 14016, "iter_tflops": 48.11862182299665, "iter_time": 0.4287548713684082, "loss": 0.015070850029587746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.005564808071675, "step_time": 0.3892250480651856} +{"epoch": 0, "iter": 14017, "iter_tflops": 16.983974441130936, "iter_time": 1.2147388458251953, "loss": 0.25647440552711487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.894751554767407, "step_time": 1.1529131011962892} +{"epoch": 0, "iter": 14018, "iter_tflops": 14.934112279938944, "iter_time": 1.3814743804931642, "loss": 0.18890169262886047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.119480522408146, "step_time": 1.138613962173462} +{"epoch": 0, "iter": 14019, "iter_tflops": 37.59869880210533, "iter_time": 0.5487182846069336, "loss": 0.27276697754859924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.28375065538357, "step_time": 0.4997388362884521} +{"epoch": 0, "iter": 14020, "iter_tflops": 44.11941855620245, "iter_time": 0.46761934280395506, "loss": 0.26996615529060364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.33162313761755, "step_time": 0.42686531448364257} +{"epoch": 0, "iter": 14021, "iter_tflops": 30.635885472260835, "iter_time": 0.6734289932250976, "loss": 0.12707287073135376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.70368764459667, "step_time": 0.6121316375732422} +{"epoch": 0, "iter": 14022, "iter_tflops": 37.699795637854194, "iter_time": 0.547246826171875, "loss": 0.15211714804172516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.97684560292163, "step_time": 0.49148746681213384} +{"epoch": 0, "iter": 14023, "iter_tflops": 38.64145763260106, "iter_time": 0.5339108505249023, "loss": 0.16073593497276306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37115333017381, "step_time": 0.48691366386413576} +{"epoch": 0, "iter": 14024, "iter_tflops": 36.98482304644021, "iter_time": 0.5578259353637695, "loss": 0.20187734067440033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.434115328863, "step_time": 0.5102397651672363} +{"epoch": 0, "iter": 14025, "iter_tflops": 19.6958427178924, "iter_time": 1.0474846801757811, "loss": 0.5997959971427917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.156500326545938, "step_time": 0.9751657028198242} +{"epoch": 0, "iter": 14026, "iter_tflops": 41.83400027722055, "iter_time": 0.4931656875610352, "loss": 0.5954673290252686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01379184028571, "step_time": 0.4583282737731934} +{"epoch": 0, "iter": 14027, "iter_tflops": 42.68425003922247, "iter_time": 0.4833420639038085, "loss": 0.4945049285888672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17127317287068, "step_time": 0.44683830642700206} +{"epoch": 0, "iter": 14028, "iter_tflops": 40.81512852198543, "iter_time": 0.5054766273498535, "loss": 0.6835741400718689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.60102902002377, "step_time": 0.47317905044555664} +{"epoch": 0, "iter": 14029, "iter_tflops": 31.514414866742644, "iter_time": 0.6546557693481445, "loss": 0.4130305051803589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.55337261881238, "step_time": 0.6148739128112792} +{"epoch": 0, "iter": 14030, "iter_tflops": 16.031740227354888, "iter_time": 1.2868904571533204, "loss": 0.3658217787742615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.603850166810098, "step_time": 1.17196484375} +{"epoch": 0, "iter": 14031, "iter_tflops": 31.322566746703583, "iter_time": 0.6586654815673829, "loss": 0.35898351669311523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58924041047522, "step_time": 0.508289716720581} +{"epoch": 0, "iter": 14032, "iter_tflops": 48.02297538207029, "iter_time": 0.4296088142395019, "loss": 0.3274340331554413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.90474885008472, "step_time": 0.3974798831939697} +{"epoch": 0, "iter": 14033, "iter_tflops": 27.81966451084733, "iter_time": 0.7416010894775391, "loss": 0.07357575744390488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.502258932116312, "step_time": 0.6993055534362792} +{"epoch": 0, "iter": 14034, "iter_tflops": 13.726230781887406, "iter_time": 1.5030414276123047, "loss": 0.10173811763525009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.12162681164884, "step_time": 1.2049727363586427} +{"epoch": 0, "iter": 14035, "iter_tflops": 50.18983531845026, "iter_time": 0.4110611915588379, "loss": 0.09761334210634232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.942758725772485, "step_time": 0.3755015945434571} +{"epoch": 0, "iter": 14036, "iter_tflops": 50.756960513732324, "iter_time": 0.40646826171875, "loss": 0.12768352031707764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.436263191946196, "step_time": 0.372158805847168} +{"epoch": 0, "iter": 14037, "iter_tflops": 21.09382217520486, "iter_time": 0.9780633087158204, "loss": 0.5946421027183533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.033302266583657, "step_time": 0.9363595733642579} +{"epoch": 0, "iter": 14038, "iter_tflops": 20.045142425696877, "iter_time": 1.029231575012207, "loss": 0.7157167196273804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.477022239774985, "step_time": 0.8428759555816651} +{"epoch": 0, "iter": 14039, "iter_tflops": 39.46824173115291, "iter_time": 0.5227264404296875, "loss": 0.8665686845779419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11619113385413, "step_time": 0.47849990844726564} +{"epoch": 0, "iter": 14040, "iter_tflops": 36.88446641513949, "iter_time": 0.5593436889648438, "loss": 0.4515467882156372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.69799567438493, "step_time": 0.5197011375427246} +{"epoch": 0, "iter": 14041, "iter_tflops": 18.184664549489778, "iter_time": 1.1345325317382813, "loss": 0.06104469299316406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.671778244339464, "step_time": 1.0487660675048829} +{"epoch": 0, "iter": 14042, "iter_tflops": 24.407526613488297, "iter_time": 0.84527587890625, "loss": 0.0723625123500824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.18146731246114, "step_time": 0.706993013381958} +{"epoch": 0, "iter": 14043, "iter_tflops": 51.557852128719, "iter_time": 0.4001542472839356, "loss": 0.06211661919951439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.430279810131715, "step_time": 0.36560324668884275} +{"epoch": 0, "iter": 14044, "iter_tflops": 52.575821805518395, "iter_time": 0.39240648651123045, "loss": 0.10431909561157227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.442029222864726, "step_time": 0.35916373062133794} +{"epoch": 0, "iter": 14045, "iter_tflops": 18.424466488411298, "iter_time": 1.119766128540039, "loss": 0.027941184118390083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.12549051501779, "step_time": 1.0787223205566405} +{"epoch": 0, "iter": 14046, "iter_tflops": 17.39411854302854, "iter_time": 1.186095947265625, "loss": 0.03720223531126976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.58524603038502, "step_time": 0.9134765892028808} +{"epoch": 0, "iter": 14047, "iter_tflops": 54.62177009661649, "iter_time": 0.3777082557678223, "loss": 0.033170610666275024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.13166758364329, "step_time": 0.34309864234924314} +{"epoch": 0, "iter": 14048, "iter_tflops": 54.87324868295384, "iter_time": 0.37597725677490235, "loss": 0.02361932024359703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.89724744135581, "step_time": 0.34444142913818365} +{"epoch": 0, "iter": 14049, "iter_tflops": 44.6243347162313, "iter_time": 0.4623283157348633, "loss": 0.056613728404045105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.64259924898715, "step_time": 0.4241363296508789} +{"epoch": 0, "iter": 14050, "iter_tflops": 47.14945588573923, "iter_time": 0.43756800842285154, "loss": 0.07371262460947037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60534075463125, "step_time": 0.3997860145568848} +{"epoch": 0, "iter": 14051, "iter_tflops": 52.28961285955063, "iter_time": 0.3945543365478516, "loss": 0.05485960468649864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.15006515203991, "step_time": 0.3609986000061035} +{"epoch": 0, "iter": 14052, "iter_tflops": 56.88282287753873, "iter_time": 0.36269461441040035, "loss": 0.10219645500183105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.02858769891706, "step_time": 0.3326062107086182} +{"epoch": 0, "iter": 14053, "iter_tflops": 28.951741430828605, "iter_time": 0.7126028518676758, "loss": 0.5354772210121155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.57221794656401, "step_time": 0.6748314285278318} +{"epoch": 0, "iter": 14054, "iter_tflops": 13.475949928509376, "iter_time": 1.5309565277099608, "loss": 0.5258055925369263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.571245610270037, "step_time": 1.2449935264587402} +{"epoch": 0, "iter": 14055, "iter_tflops": 49.040650755213804, "iter_time": 0.42069371414184575, "loss": 0.4603287875652313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36302612608868, "step_time": 0.3866177577972412} +{"epoch": 0, "iter": 14056, "iter_tflops": 48.00702964458234, "iter_time": 0.42975151062011724, "loss": 0.36191123723983765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55908755478366, "step_time": 0.40014465904235835} +{"epoch": 0, "iter": 14057, "iter_tflops": 13.006668593007333, "iter_time": 0.8991963348388672, "loss": 0.06861677765846252, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 13.677130729034504, "step_time": 0.8551171264648438} +{"epoch": 0, "iter": 14058, "iter_tflops": 9.493190170856492, "iter_time": 1.2319935150146484, "loss": 0.07306813448667526, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 11.355808263135014, "step_time": 1.029917770385742} +{"epoch": 0, "iter": 14059, "iter_tflops": 22.71880948568057, "iter_time": 0.5147958450317383, "loss": 0.06752942502498627, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 25.15977428491927, "step_time": 0.46485109901428223} +{"epoch": 0, "iter": 14060, "iter_tflops": 24.502515495644833, "iter_time": 0.4773203277587891, "loss": 0.07723723351955414, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 26.913616614206965, "step_time": 0.4345587921142578} +{"epoch": 0, "iter": 14061, "iter_tflops": 15.010059749862684, "iter_time": 1.3744844360351562, "loss": 0.5337163209915161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.787050698087318, "step_time": 1.3068364639282226} +{"epoch": 0, "iter": 14062, "iter_tflops": 22.941317352964845, "iter_time": 0.8992985534667968, "loss": 0.7178860306739807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.644135039416096, "step_time": 0.7463099670410155} +{"epoch": 0, "iter": 14063, "iter_tflops": 38.5638700440416, "iter_time": 0.5349850387573242, "loss": 0.7515221834182739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.898735035547155, "step_time": 0.49240373229980466} +{"epoch": 0, "iter": 14064, "iter_tflops": 38.17957384412716, "iter_time": 0.5403699264526367, "loss": 0.7916416525840759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.668096981590416, "step_time": 0.4951292476654053} +{"epoch": 0, "iter": 14065, "iter_tflops": 18.137764472772066, "iter_time": 1.1374661712646486, "loss": 0.07196919620037079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.62725022626333, "step_time": 1.0511453857421875} +{"epoch": 0, "iter": 14066, "iter_tflops": 18.850886634078684, "iter_time": 1.0944362411499022, "loss": 0.06594785302877426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.804174733896033, "step_time": 0.8317589168548584} +{"epoch": 0, "iter": 14067, "iter_tflops": 47.76013921125112, "iter_time": 0.4319730606079102, "loss": 0.11129525303840637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10926692376949, "step_time": 0.3959198570251464} +{"epoch": 0, "iter": 14068, "iter_tflops": 52.69472468605697, "iter_time": 0.39152104187011716, "loss": 0.07202571630477905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.59674329469897, "step_time": 0.35819895935058593} +{"epoch": 0, "iter": 14069, "iter_tflops": 30.219905286513487, "iter_time": 0.6826988143920899, "loss": 0.22328853607177734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.007213137972116, "step_time": 0.6445763778686524} +{"epoch": 0, "iter": 14070, "iter_tflops": 14.643137669146585, "iter_time": 1.4089257354736329, "loss": 0.2941361963748932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.663085007973947, "step_time": 1.168034547805786} +{"epoch": 0, "iter": 14071, "iter_tflops": 39.96198290542648, "iter_time": 0.5162680130004883, "loss": 0.2637161612510681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81365023220189, "step_time": 0.47088278198242184} +{"epoch": 0, "iter": 14072, "iter_tflops": 39.223230215965685, "iter_time": 0.52599169921875, "loss": 0.24591340124607086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01880935276954, "step_time": 0.4795830898284912} +{"epoch": 0, "iter": 14073, "iter_tflops": 18.302472522735126, "iter_time": 1.1272298583984375, "loss": 0.2348700761795044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41036375048781, "step_time": 1.0628906173706054} +{"epoch": 0, "iter": 14074, "iter_tflops": 24.163366516471765, "iter_time": 0.8538170166015624, "loss": 0.21933740377426147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.10872081946373, "step_time": 0.7610500564575196} +{"epoch": 0, "iter": 14075, "iter_tflops": 46.23654535748542, "iter_time": 0.4462075042724609, "loss": 0.15914331376552582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.987854783628975, "step_time": 0.41272212219238286} +{"epoch": 0, "iter": 14076, "iter_tflops": 45.48155477243209, "iter_time": 0.4536145172119141, "loss": 0.21257811784744263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13576440844621, "step_time": 0.4198793640136719} +{"epoch": 0, "iter": 14077, "iter_tflops": 31.123637813030413, "iter_time": 0.5977163772583007, "loss": 0.007999398745596409, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 33.26160406810382, "step_time": 0.5592967796325683} +{"epoch": 0, "iter": 14078, "iter_tflops": 8.220596174280358, "iter_time": 2.262987701416016, "loss": 0.027953222393989563, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 10.234962758724551, "step_time": 1.8176038818359377} +{"epoch": 0, "iter": 14079, "iter_tflops": 10.356022741070252, "iter_time": 1.7963564300537107, "loss": 0.0041170911863446236, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 12.18437979511512, "step_time": 1.5267997512817382} +{"epoch": 0, "iter": 14080, "iter_tflops": 25.556363844976662, "iter_time": 0.7279246826171875, "loss": 0.0037483004853129387, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 33.25505234876989, "step_time": 0.5594069690704346} +{"epoch": 0, "iter": 14081, "iter_tflops": 20.571264100856393, "iter_time": 0.7943261489868164, "loss": 0.26285597681999207, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 21.75744383701267, "step_time": 0.7510208053588868} +{"epoch": 0, "iter": 14082, "iter_tflops": 19.026159754560762, "iter_time": 0.8588329544067383, "loss": 0.38306498527526855, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 22.55822035823839, "step_time": 0.7243609085083007} +{"epoch": 0, "iter": 14083, "iter_tflops": 27.546748248435208, "iter_time": 0.5931840972900391, "loss": 0.22914791107177734, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 29.263334192705933, "step_time": 0.5583879432678223} +{"epoch": 0, "iter": 14084, "iter_tflops": 28.886524215728674, "iter_time": 0.5656718292236328, "loss": 0.39483797550201416, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.742468843372166, "step_time": 0.5315218200683594} +{"epoch": 0, "iter": 14085, "iter_tflops": 36.81843666194171, "iter_time": 0.560346809387207, "loss": 0.04614339768886566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7668088316987, "step_time": 0.5188018379211425} +{"epoch": 0, "iter": 14086, "iter_tflops": 9.279536285138898, "iter_time": 2.223289276123047, "loss": 0.039609719067811966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.131509767364053, "step_time": 1.8533958053588864} +{"epoch": 0, "iter": 14087, "iter_tflops": 10.97056031640483, "iter_time": 1.8805870361328125, "loss": 0.025564013049006462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.414040085176913, "step_time": 1.5380223541259765} +{"epoch": 0, "iter": 14088, "iter_tflops": 48.34477254885306, "iter_time": 0.426749210357666, "loss": 0.037709902971982956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.424420218024146, "step_time": 0.38617346572875977} +{"epoch": 0, "iter": 14089, "iter_tflops": 22.327016872415133, "iter_time": 0.7557593612670899, "loss": 0.4163666069507599, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 23.649247089926767, "step_time": 0.7135048294067382} +{"epoch": 0, "iter": 14090, "iter_tflops": 23.39377157393073, "iter_time": 0.721296775817871, "loss": 0.33077096939086914, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 25.341132195857842, "step_time": 0.6658681182861328} +{"epoch": 0, "iter": 14091, "iter_tflops": 25.126770887569084, "iter_time": 0.6715487670898438, "loss": 0.3177368640899658, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.972526146775376, "step_time": 0.6255940551757813} +{"epoch": 0, "iter": 14092, "iter_tflops": 26.105936433528957, "iter_time": 0.6463607254028321, "loss": 0.3729216754436493, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.901851108923506, "step_time": 0.604757438659668} +{"epoch": 0, "iter": 14093, "iter_tflops": 17.567485514831436, "iter_time": 1.1743908081054688, "loss": 0.02474016696214676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.100400537898974, "step_time": 1.0801393127441405} +{"epoch": 0, "iter": 14094, "iter_tflops": 19.505424891668255, "iter_time": 1.0577105407714846, "loss": 0.045796021819114685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.29700099415396, "step_time": 0.8491209888458252} +{"epoch": 0, "iter": 14095, "iter_tflops": 47.70090101893474, "iter_time": 0.4325095138549805, "loss": 0.0361437126994133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50999786470441, "step_time": 0.3928983879089355} +{"epoch": 0, "iter": 14096, "iter_tflops": 46.068345391249565, "iter_time": 0.4478366508483887, "loss": 0.027419237419962883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.878044695406246, "step_time": 0.40550091171264646} +{"epoch": 0, "iter": 14097, "iter_tflops": 26.052955387525525, "iter_time": 0.7918907165527344, "loss": 0.5449286699295044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.674839519263394, "step_time": 0.7454819564819336} +{"epoch": 0, "iter": 14098, "iter_tflops": 11.735550108958863, "iter_time": 1.7579996948242187, "loss": 0.6491418480873108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.14345734595069, "step_time": 1.2034383201599124} +{"epoch": 0, "iter": 14099, "iter_tflops": 12.106819147888238, "iter_time": 1.7040886840820313, "loss": 0.6346419453620911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.586292460400832, "step_time": 1.4144165534973145} +{"epoch": 0, "iter": 14100, "iter_tflops": 43.97699761613768, "iter_time": 0.46913374328613283, "loss": 0.5308079719543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47377669802953, "step_time": 0.42561349487304684} +{"epoch": 0, "iter": 14101, "iter_tflops": 16.40077416801121, "iter_time": 0.9613316192626953, "loss": 0.366762638092041, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 17.206412503609844, "step_time": 0.9163201675415039} +{"epoch": 0, "iter": 14102, "iter_tflops": 10.660725910382686, "iter_time": 1.4789408264160158, "loss": 0.3340910077095032, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 13.454867494202198, "step_time": 1.1718125648498536} +{"epoch": 0, "iter": 14103, "iter_tflops": 26.90410488188722, "iter_time": 0.5860288925170899, "loss": 0.43165457248687744, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.744633688028888, "step_time": 0.5485052604675292} +{"epoch": 0, "iter": 14104, "iter_tflops": 27.233600622852848, "iter_time": 0.5789386062622071, "loss": 0.363604336977005, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.931815848544908, "step_time": 0.5449565582275391} +{"epoch": 0, "iter": 14105, "iter_tflops": 39.864534705968694, "iter_time": 0.5175300216674805, "loss": 0.5329800844192505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78507550809032, "step_time": 0.47119008636474613} +{"epoch": 0, "iter": 14106, "iter_tflops": 46.22259369258948, "iter_time": 0.4463421859741211, "loss": 0.6757024526596069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.07615960175485, "step_time": 0.41199432373046874} +{"epoch": 0, "iter": 14107, "iter_tflops": 50.91055524476619, "iter_time": 0.4052419662475586, "loss": 0.5970438122749329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.988912368584, "step_time": 0.37518642616271974} +{"epoch": 0, "iter": 14108, "iter_tflops": 50.59075909919525, "iter_time": 0.40780359649658204, "loss": 0.779343843460083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.64012990416717, "step_time": 0.37758134078979494} +{"epoch": 0, "iter": 14109, "iter_tflops": 27.600956988255753, "iter_time": 0.7474774703979492, "loss": 0.1759098619222641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.154454519408347, "step_time": 0.7076480712890625} +{"epoch": 0, "iter": 14110, "iter_tflops": 22.00915882595643, "iter_time": 0.9373867340087889, "loss": 0.14384229481220245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.765236324463853, "step_time": 0.8330666923522949} +{"epoch": 0, "iter": 14111, "iter_tflops": 49.29523999647399, "iter_time": 0.41852100753784177, "loss": 0.18374843895435333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.44281210961427, "step_time": 0.3860405673980713} +{"epoch": 0, "iter": 14112, "iter_tflops": 47.91252749967805, "iter_time": 0.43059914779663083, "loss": 0.13850528001785278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09200807325952, "step_time": 0.39605103111267087} +{"epoch": 0, "iter": 14113, "iter_tflops": 27.27165129486867, "iter_time": 0.6670159149169922, "loss": 0.17755275964736938, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 28.95341058436813, "step_time": 0.6282722854614258} +{"epoch": 0, "iter": 14114, "iter_tflops": 13.338474776769322, "iter_time": 1.3637710266113279, "loss": 0.15841060876846313, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 19.638470929939658, "step_time": 0.9262750396728516} +{"epoch": 0, "iter": 14115, "iter_tflops": 43.42073481673735, "iter_time": 0.41893868255615235, "loss": 0.18890246748924255, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 47.273444877615034, "step_time": 0.3847958507537842} +{"epoch": 0, "iter": 14116, "iter_tflops": 39.99026086567758, "iter_time": 0.45487638854980467, "loss": 0.1118163987994194, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 43.32124002499336, "step_time": 0.41990084838867187} +{"epoch": 0, "iter": 14117, "iter_tflops": 29.081807416468845, "iter_time": 0.7094157943725585, "loss": 0.5400000810623169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.97033065898838, "step_time": 0.6661567077636719} +{"epoch": 0, "iter": 14118, "iter_tflops": 30.8071100660984, "iter_time": 0.6696861038208007, "loss": 0.5855271816253662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.520475926649276, "step_time": 0.6154773445129395} +{"epoch": 0, "iter": 14119, "iter_tflops": 43.52206892162715, "iter_time": 0.47403751754760737, "loss": 0.6582148671150208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76986574319352, "step_time": 0.4411193656921386} +{"epoch": 0, "iter": 14120, "iter_tflops": 43.85839543212416, "iter_time": 0.47040237808227536, "loss": 0.6607261300086975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.305995720905884, "step_time": 0.4361200561523438} +{"epoch": 0, "iter": 14121, "iter_tflops": 28.708430209964, "iter_time": 0.7186423416137695, "loss": 0.5867477655410767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.537996465779855, "step_time": 0.6755876579284668} +{"epoch": 0, "iter": 14122, "iter_tflops": 9.026932691947746, "iter_time": 2.2855043029785156, "loss": 0.49923059344291687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.018418848963536, "step_time": 1.872418701171875} +{"epoch": 0, "iter": 14123, "iter_tflops": 11.179874604233733, "iter_time": 1.845377899169922, "loss": 0.537455677986145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.326566039083245, "step_time": 1.5481177558898926} +{"epoch": 0, "iter": 14124, "iter_tflops": 40.837501555465565, "iter_time": 0.5051996994018555, "loss": 0.427493691444397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1238765276481, "step_time": 0.4675720977783203} +{"epoch": 0, "iter": 14125, "iter_tflops": 16.10830959973049, "iter_time": 0.9533809814453125, "loss": 0.3212403357028961, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.848765770759005, "step_time": 0.911482551574707} +{"epoch": 0, "iter": 14126, "iter_tflops": 9.688134718225225, "iter_time": 1.585171600341797, "loss": 0.2619481086730957, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.351239263126416, "step_time": 1.352923294067383} +{"epoch": 0, "iter": 14127, "iter_tflops": 26.05135695188339, "iter_time": 0.5895031127929687, "loss": 0.38024112582206726, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.817765300678435, "step_time": 0.552070083618164} +{"epoch": 0, "iter": 14128, "iter_tflops": 22.924660735355324, "iter_time": 0.6699054870605469, "loss": 0.21166017651557922, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.452072224290834, "step_time": 0.6280594902038574} +{"epoch": 0, "iter": 14129, "iter_tflops": 28.252187173020577, "iter_time": 0.730247657775879, "loss": 0.7939616441726685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.970324454184425, "step_time": 0.6883840560913086} +{"epoch": 0, "iter": 14130, "iter_tflops": 16.0896529525196, "iter_time": 1.2822584533691406, "loss": 0.8312409520149231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.080913953766157, "step_time": 1.081242416381836} +{"epoch": 0, "iter": 14131, "iter_tflops": 46.708614930599985, "iter_time": 0.4416978225708008, "loss": 0.5229458212852478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.37487237498389, "step_time": 0.40955128097534177} +{"epoch": 0, "iter": 14132, "iter_tflops": 44.33095903494338, "iter_time": 0.46538793563842773, "loss": 0.6496122479438782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.700931100283945, "step_time": 0.4325092411041259} +{"epoch": 0, "iter": 14133, "iter_tflops": 27.95156807324398, "iter_time": 0.7381014709472656, "loss": 0.2490709125995636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.481049543377125, "step_time": 0.6998086509704589} +{"epoch": 0, "iter": 14134, "iter_tflops": 19.989375564868862, "iter_time": 1.0321029510498048, "loss": 0.36273640394210815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.852622137468803, "step_time": 0.8649402732849121} +{"epoch": 0, "iter": 14135, "iter_tflops": 45.82533168817386, "iter_time": 0.4502115478515625, "loss": 0.2970390319824219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.83455495194518, "step_time": 0.41399172782897947} +{"epoch": 0, "iter": 14136, "iter_tflops": 51.48714065239738, "iter_time": 0.40070381164550783, "loss": 0.30853602290153503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.85924817227079, "step_time": 0.36934069442749023} +{"epoch": 0, "iter": 14137, "iter_tflops": 14.48869597881769, "iter_time": 1.0176485595703124, "loss": 0.041897714138031006, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 15.094920323692506, "step_time": 0.9767789611816405} +{"epoch": 0, "iter": 14138, "iter_tflops": 10.101595492674372, "iter_time": 1.4596110687255857, "loss": 0.08656724542379379, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.074354227767197, "step_time": 1.2211336784362794} +{"epoch": 0, "iter": 14139, "iter_tflops": 37.04703371375949, "iter_time": 0.3979913940429687, "loss": 0.10595671832561493, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 40.54344322207103, "step_time": 0.36366917610168453} +{"epoch": 0, "iter": 14140, "iter_tflops": 37.67864793440036, "iter_time": 0.39131978988647464, "loss": 0.05575428530573845, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 41.14214856115064, "step_time": 0.35837701988220216} +{"epoch": 0, "iter": 14141, "iter_tflops": 47.76364896328532, "iter_time": 0.4319413185119629, "loss": 0.004907286260277033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33739379441601, "step_time": 0.39419413185119634} +{"epoch": 0, "iter": 14142, "iter_tflops": 12.043384926418415, "iter_time": 1.7130643615722656, "loss": 0.005304234568029642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.094353944789347, "step_time": 1.4637842636108398} +{"epoch": 0, "iter": 14143, "iter_tflops": 13.115998703113362, "iter_time": 1.5729716033935546, "loss": 0.004732708912342787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.020389640934082, "step_time": 1.3735391693115233} +{"epoch": 0, "iter": 14144, "iter_tflops": 22.556607264287724, "iter_time": 0.914636375427246, "loss": 0.0038609025068581104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.436491192682332, "step_time": 0.8110825252532959} +{"epoch": 0, "iter": 14145, "iter_tflops": 16.00500552407625, "iter_time": 1.0004585952758789, "loss": 0.3078353703022003, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.0037209652655, "step_time": 0.9416965484619141} +{"epoch": 0, "iter": 14146, "iter_tflops": 10.311717662845943, "iter_time": 1.552830078125, "loss": 0.3497355580329895, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 11.789557696824483, "step_time": 1.3581803283691407} +{"epoch": 0, "iter": 14147, "iter_tflops": 8.649629136839497, "iter_time": 1.8512175598144531, "loss": 0.20916713774204254, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 10.89982928306453, "step_time": 1.4690455169677734} +{"epoch": 0, "iter": 14148, "iter_tflops": 14.426598336138676, "iter_time": 1.1099182891845703, "loss": 0.40921372175216675, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.070133896470075, "step_time": 0.9380327911376953} +{"epoch": 0, "iter": 14149, "iter_tflops": 12.713196153521993, "iter_time": 1.1790469665527343, "loss": 0.3756551742553711, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 13.56703788760406, "step_time": 1.1048436279296876} +{"epoch": 0, "iter": 14150, "iter_tflops": 16.81857755125665, "iter_time": 0.8912439422607422, "loss": 0.30321961641311646, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 18.348574187112973, "step_time": 0.8169275283813476} +{"epoch": 0, "iter": 14151, "iter_tflops": 24.588459715581028, "iter_time": 0.6096134338378907, "loss": 0.3439124822616577, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.143851523548246, "step_time": 0.5733453369140625} +{"epoch": 0, "iter": 14152, "iter_tflops": 25.731721557955442, "iter_time": 0.5825282745361329, "loss": 0.28601762652397156, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.372398734759347, "step_time": 0.5476120491027832} +{"epoch": 0, "iter": 14153, "iter_tflops": 28.176566044014564, "iter_time": 0.5131469192504883, "loss": 0.005581788718700409, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 30.86951125249856, "step_time": 0.4683818264007568} +{"epoch": 0, "iter": 14154, "iter_tflops": 12.030975788048304, "iter_time": 1.2017909698486329, "loss": 0.004820899106562138, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 13.608845224618056, "step_time": 1.0624500331878663} +{"epoch": 0, "iter": 14155, "iter_tflops": 29.431547356757005, "iter_time": 0.49126598358154294, "loss": 0.012447001412510872, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 32.61066313637503, "step_time": 0.44337393569946293} +{"epoch": 0, "iter": 14156, "iter_tflops": 31.630051088652383, "iter_time": 0.4571196556091308, "loss": 0.0034877562429755926, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 34.855883638956215, "step_time": 0.41481427383422853} +{"epoch": 0, "iter": 14157, "iter_tflops": 32.88844617099514, "iter_time": 0.6273052062988281, "loss": 0.7147381901741028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.108271971888946, "step_time": 0.5713675117492676} +{"epoch": 0, "iter": 14158, "iter_tflops": 36.94533672846512, "iter_time": 0.5584221267700196, "loss": 0.6514450311660767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.863110448773945, "step_time": 0.5048830909729003} +{"epoch": 0, "iter": 14159, "iter_tflops": 32.63052216476854, "iter_time": 0.6322636642456055, "loss": 0.5300695300102234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.37702885132348, "step_time": 0.583177677154541} +{"epoch": 0, "iter": 14160, "iter_tflops": 36.23787839139093, "iter_time": 0.569323989868164, "loss": 0.5407789945602417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.157907195263085, "step_time": 0.526869155883789} +{"epoch": 0, "iter": 14161, "iter_tflops": 20.569359224866034, "iter_time": 1.0030012741088865, "loss": 0.269381046295166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.11931310117021, "step_time": 0.9327185440063477} +{"epoch": 0, "iter": 14162, "iter_tflops": 17.747528503467596, "iter_time": 1.1624769897460938, "loss": 0.33487045764923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.678621612388127, "step_time": 0.9097154960632324} +{"epoch": 0, "iter": 14163, "iter_tflops": 48.92734163501012, "iter_time": 0.4216679840087891, "loss": 0.2661217749118805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.134382068866394, "step_time": 0.3882814235687256} +{"epoch": 0, "iter": 14164, "iter_tflops": 45.99161310447638, "iter_time": 0.44858382034301764, "loss": 0.4075099527835846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.235063190716296, "step_time": 0.41069110298156736} +{"epoch": 0, "iter": 14165, "iter_tflops": 41.008577228603045, "iter_time": 0.503092155456543, "loss": 0.22630776464939117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.612580700927445, "step_time": 0.4624501247406006} +{"epoch": 0, "iter": 14166, "iter_tflops": 7.85900763438488, "iter_time": 2.6251524963378907, "loss": 0.19236448407173157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.693734635755442, "step_time": 2.128291549682617} +{"epoch": 0, "iter": 14167, "iter_tflops": 10.75001672857911, "iter_time": 1.919168502807617, "loss": 0.15358079969882965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.000001198843332, "step_time": 1.4736494102478028} +{"epoch": 0, "iter": 14168, "iter_tflops": 25.18463579413543, "iter_time": 0.8191936416625976, "loss": 0.11566724628210068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.93131240591329, "step_time": 0.5166645488739013} +{"epoch": 0, "iter": 14169, "iter_tflops": 12.581478813869374, "iter_time": 1.2661791076660156, "loss": 0.3796602189540863, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 13.617958062553946, "step_time": 1.1698086853027345} +{"epoch": 0, "iter": 14170, "iter_tflops": 16.211570089016824, "iter_time": 0.9826565551757812, "loss": 0.3893057405948639, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 17.870700433396802, "step_time": 0.8914259223937988} +{"epoch": 0, "iter": 14171, "iter_tflops": 26.551838131527017, "iter_time": 0.5999737396240233, "loss": 0.4488445520401001, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.306779448645393, "step_time": 0.562777042388916} +{"epoch": 0, "iter": 14172, "iter_tflops": 28.577325934172624, "iter_time": 0.5574491348266601, "loss": 0.40287575125694275, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.411195007976147, "step_time": 0.5238335952758789} +{"epoch": 0, "iter": 14173, "iter_tflops": 27.17419942448539, "iter_time": 0.7592162399291993, "loss": 0.5595910549163818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66913177641514, "step_time": 0.7196274261474609} +{"epoch": 0, "iter": 14174, "iter_tflops": 13.111422067311349, "iter_time": 1.5735206604003906, "loss": 0.5850121378898621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.546424319718806, "step_time": 1.246861141204834} +{"epoch": 0, "iter": 14175, "iter_tflops": 34.80733070112673, "iter_time": 0.59272265625, "loss": 0.4063290059566498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.86427587252443, "step_time": 0.5448696174621581} +{"epoch": 0, "iter": 14176, "iter_tflops": 35.89713658630532, "iter_time": 0.5747281112670899, "loss": 0.5852094888687134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.16030730089998, "step_time": 0.5268368644714355} +{"epoch": 0, "iter": 14177, "iter_tflops": 1.1299936775800985, "iter_time": 1.2479670410156247, "loss": 1.2976493835449219, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.209922672441322, "step_time": 1.1655247879028319} +{"epoch": 0, "iter": 14178, "iter_tflops": 1.412981913493681, "iter_time": 0.9980275421142578, "loss": 1.3308266401290894, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.9582247312970336, "step_time": 0.7201394424438478} +{"epoch": 0, "iter": 14179, "iter_tflops": 3.3490620384363816, "iter_time": 0.42107158660888666, "loss": 1.1595388650894165, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.6397181837708645, "step_time": 0.38744616889953615} +{"epoch": 0, "iter": 14180, "iter_tflops": 3.481489277602951, "iter_time": 0.4050550651550293, "loss": 1.1516245603561401, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.766840915196929, "step_time": 0.374370698928833} +{"epoch": 0, "iter": 14181, "iter_tflops": 44.36029314759724, "iter_time": 0.46508018875122076, "loss": 0.08108876645565033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.52564234885401, "step_time": 0.42515858650207516} +{"epoch": 0, "iter": 14182, "iter_tflops": 44.71757939296729, "iter_time": 0.4613642730712891, "loss": 0.13495266437530518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1504840306829, "step_time": 0.4197536182403565} +{"epoch": 0, "iter": 14183, "iter_tflops": 48.27879009064205, "iter_time": 0.42733244705200196, "loss": 0.09076552093029022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55183153080609, "step_time": 0.39258562278747566} +{"epoch": 0, "iter": 14184, "iter_tflops": 54.424999778842896, "iter_time": 0.37907383728027344, "loss": 0.13815969228744507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.2723247335186, "step_time": 0.34807295989990233} +{"epoch": 0, "iter": 14185, "iter_tflops": 42.75323140777316, "iter_time": 0.4825622024536133, "loss": 0.7616850137710571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28094982791815, "step_time": 0.44577938842773435} +{"epoch": 0, "iter": 14186, "iter_tflops": 43.13599947502223, "iter_time": 0.4782801780700684, "loss": 0.5268097519874573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.521552277763256, "step_time": 0.43414182662963874} +{"epoch": 0, "iter": 14187, "iter_tflops": 45.68664271769894, "iter_time": 0.4515782356262206, "loss": 0.614377498626709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50529398778742, "step_time": 0.41674519729614257} +{"epoch": 0, "iter": 14188, "iter_tflops": 46.23247666814427, "iter_time": 0.44624677276611324, "loss": 0.6544610857963562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96982516274873, "step_time": 0.41287103652954105} +{"epoch": 0, "iter": 14189, "iter_tflops": 26.353967596361326, "iter_time": 0.7828458251953125, "loss": 0.6317350268363953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.70360696045467, "step_time": 0.7447078475952148} +{"epoch": 0, "iter": 14190, "iter_tflops": 15.856144613541035, "iter_time": 1.3011418609619139, "loss": 0.5624030232429504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.694042890230687, "step_time": 0.9969580917358398} +{"epoch": 0, "iter": 14191, "iter_tflops": 46.43881137291869, "iter_time": 0.44426403045654306, "loss": 0.5112481713294983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.659476413641045, "step_time": 0.4072504291534424} +{"epoch": 0, "iter": 14192, "iter_tflops": 45.11560083440938, "iter_time": 0.4572939987182617, "loss": 0.5482224225997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65463110185136, "step_time": 0.42403144454956054} +{"epoch": 0, "iter": 14193, "iter_tflops": 40.13570606573735, "iter_time": 0.5140334014892578, "loss": 0.29406678676605225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.726854058361724, "step_time": 0.4718174667358399} +{"epoch": 0, "iter": 14194, "iter_tflops": 45.00501281421892, "iter_time": 0.45841767883300777, "loss": 0.2790535092353821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.343293735731386, "step_time": 0.41811342430114745} +{"epoch": 0, "iter": 14195, "iter_tflops": 45.92184049508575, "iter_time": 0.4492653884887695, "loss": 0.20656098425388336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.43445455628939, "step_time": 0.41734239196777345} +{"epoch": 0, "iter": 14196, "iter_tflops": 48.48493798293329, "iter_time": 0.42551551818847655, "loss": 0.31861868500709534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.600109476532324, "step_time": 0.3922252960205078} +{"epoch": 0, "iter": 14197, "iter_tflops": 41.18217159916972, "iter_time": 0.5009714813232422, "loss": 0.15728537738323212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.805343408977635, "step_time": 0.4604605598449707} +{"epoch": 0, "iter": 14198, "iter_tflops": 48.06433304611832, "iter_time": 0.42923915100097654, "loss": 0.18147405982017517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.859948056451344, "step_time": 0.3902972717285156} +{"epoch": 0, "iter": 14199, "iter_tflops": 46.48432972342433, "iter_time": 0.44382899856567387, "loss": 0.1255444437265396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16757800358894, "step_time": 0.41124356269836426} +{"epoch": 0, "iter": 14200, "iter_tflops": 48.39816596680496, "iter_time": 0.4262784156799317, "loss": 0.1461613029241562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21450564010468, "step_time": 0.39512187767028806} +{"epoch": 0, "iter": 14201, "iter_tflops": 42.48971952996676, "iter_time": 0.4855549468994141, "loss": 0.36033862829208374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.16447674367235, "step_time": 0.44690409088134764} +{"epoch": 0, "iter": 14202, "iter_tflops": 37.13375194276942, "iter_time": 0.5555887145996093, "loss": 0.38596221804618835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03014438693179, "step_time": 0.502827709197998} +{"epoch": 0, "iter": 14203, "iter_tflops": 36.82832998706297, "iter_time": 0.5601962814331055, "loss": 0.33854201436042786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.60560865334019, "step_time": 0.5080848236083985} +{"epoch": 0, "iter": 14204, "iter_tflops": 39.77546433197602, "iter_time": 0.5186889419555664, "loss": 0.419569730758667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65430501675118, "step_time": 0.4726015796661377} +{"epoch": 0, "iter": 14205, "iter_tflops": 33.15251303016031, "iter_time": 0.6223085861206055, "loss": 0.42769014835357666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.510290608491836, "step_time": 0.5650761241912842} +{"epoch": 0, "iter": 14206, "iter_tflops": 32.72868920107526, "iter_time": 0.6303672409057617, "loss": 0.5870949625968933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.203080152575325, "step_time": 0.5698712215423584} +{"epoch": 0, "iter": 14207, "iter_tflops": 37.933357226788736, "iter_time": 0.543877342224121, "loss": 0.678823709487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79255343770595, "step_time": 0.49365477371215816} +{"epoch": 0, "iter": 14208, "iter_tflops": 37.7844362682865, "iter_time": 0.5460209426879883, "loss": 0.5301109552383423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39154047906719, "step_time": 0.4984374408721924} +{"epoch": 0, "iter": 14209, "iter_tflops": 20.197875191414578, "iter_time": 1.0214487075805665, "loss": 0.6943933963775635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.758034254450358, "step_time": 0.9482057647705079} +{"epoch": 0, "iter": 14210, "iter_tflops": 40.135090118495384, "iter_time": 0.5140412902832032, "loss": 0.8003326654434204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37974383761803, "step_time": 0.47559279251098635} +{"epoch": 0, "iter": 14211, "iter_tflops": 47.83303360393753, "iter_time": 0.4313147621154786, "loss": 0.5422476530075073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43327267206966, "step_time": 0.40112348365783695} +{"epoch": 0, "iter": 14212, "iter_tflops": 47.71141539361523, "iter_time": 0.43241419982910156, "loss": 0.6480799317359924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.28452542160429, "step_time": 0.40228691482543943} +{"epoch": 0, "iter": 14213, "iter_tflops": 35.38087109426383, "iter_time": 0.5153025321960449, "loss": 0.10298215597867966, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 38.17835255855754, "step_time": 0.47754424285888675} +{"epoch": 0, "iter": 14214, "iter_tflops": 35.25698413414374, "iter_time": 0.5171132164001465, "loss": 0.06685464829206467, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 44.882696638243395, "step_time": 0.4062111644744873} +{"epoch": 0, "iter": 14215, "iter_tflops": 44.05894307478752, "iter_time": 0.41380594253540043, "loss": 0.0397765226662159, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 48.14648092787424, "step_time": 0.3786746635437012} +{"epoch": 0, "iter": 14216, "iter_tflops": 48.01026434770978, "iter_time": 0.37974905395507813, "loss": 0.05501599982380867, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 52.02785684454432, "step_time": 0.35042482185363766} +{"epoch": 0, "iter": 14217, "iter_tflops": 47.825750604878365, "iter_time": 0.43138044357299804, "loss": 0.04238845407962799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6861039911828, "step_time": 0.39158510398864743} +{"epoch": 0, "iter": 14218, "iter_tflops": 50.35769394778211, "iter_time": 0.40969099044799806, "loss": 0.0727103054523468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.93778876007305, "step_time": 0.3755355644226074} +{"epoch": 0, "iter": 14219, "iter_tflops": 50.24993067229533, "iter_time": 0.41056959152221684, "loss": 0.0606439970433712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.04798241897482, "step_time": 0.37478382682800293} +{"epoch": 0, "iter": 14220, "iter_tflops": 51.91552711626719, "iter_time": 0.39739736175537105, "loss": 0.07705280929803848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.553793541088666, "step_time": 0.36480476760864267} +{"epoch": 0, "iter": 14221, "iter_tflops": 28.50872964365637, "iter_time": 0.7236763534545898, "loss": 0.7580973505973816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.281989784242295, "step_time": 0.6812991371154784} +{"epoch": 0, "iter": 14222, "iter_tflops": 17.64318571199821, "iter_time": 1.1693519439697264, "loss": 0.7450766563415527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.390311285357704, "step_time": 0.88203586769104} +{"epoch": 0, "iter": 14223, "iter_tflops": 42.74264717770488, "iter_time": 0.482681697845459, "loss": 0.7696699500083923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87233732241366, "step_time": 0.4497502136230469} +{"epoch": 0, "iter": 14224, "iter_tflops": 45.62710011333123, "iter_time": 0.4521675376892089, "loss": 0.5827208757400513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27825602388768, "step_time": 0.4186652526855469} +{"epoch": 0, "iter": 14225, "iter_tflops": 44.704838146185196, "iter_time": 0.4614957656860352, "loss": 0.15585307776927948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6149564235431, "step_time": 0.42437749671936037} +{"epoch": 0, "iter": 14226, "iter_tflops": 44.51444925279532, "iter_time": 0.4634695892333984, "loss": 0.1767292022705078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.54795187858048, "step_time": 0.4163864040374756} +{"epoch": 0, "iter": 14227, "iter_tflops": 46.593597728929666, "iter_time": 0.4427881622314453, "loss": 0.18594865500926971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.42547143485605, "step_time": 0.4091403198242187} +{"epoch": 0, "iter": 14228, "iter_tflops": 49.076461841235634, "iter_time": 0.420386734008789, "loss": 0.26896387338638306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.897381372107496, "step_time": 0.3900210742950439} +{"epoch": 0, "iter": 14229, "iter_tflops": 43.6164293150883, "iter_time": 0.47301197814941404, "loss": 0.18723064661026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64479388850416, "step_time": 0.4330188426971435} +{"epoch": 0, "iter": 14230, "iter_tflops": 48.66734332431569, "iter_time": 0.4239206848144531, "loss": 0.26296430826187134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16910043287353, "step_time": 0.3880278835296631} +{"epoch": 0, "iter": 14231, "iter_tflops": 51.71798762548769, "iter_time": 0.39891524124145505, "loss": 0.26012587547302246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.37596716099979, "step_time": 0.3659554691314697} +{"epoch": 0, "iter": 14232, "iter_tflops": 52.08578747238309, "iter_time": 0.39609833145141604, "loss": 0.20118080079555511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.79987695401827, "step_time": 0.3632242641448975} +{"epoch": 0, "iter": 14233, "iter_tflops": 32.96392686067395, "iter_time": 0.6258688049316405, "loss": 0.491728276014328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.941453900569236, "step_time": 0.5904474830627441} +{"epoch": 0, "iter": 14234, "iter_tflops": 27.08844881853407, "iter_time": 0.7616195983886718, "loss": 0.45172837376594543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.328460546777826, "step_time": 0.6190232963562011} +{"epoch": 0, "iter": 14235, "iter_tflops": 36.228167694869924, "iter_time": 0.569476593017578, "loss": 0.5625333189964294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.453960179336256, "step_time": 0.522915657043457} +{"epoch": 0, "iter": 14236, "iter_tflops": 39.214027179433785, "iter_time": 0.5261151428222656, "loss": 0.5960243940353394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01886119309971, "step_time": 0.47958251190185547} +{"epoch": 0, "iter": 14237, "iter_tflops": 15.884752984692108, "iter_time": 1.2257576446533203, "loss": 0.009133078157901764, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 16.94522474002447, "step_time": 1.1490468673706054} +{"epoch": 0, "iter": 14238, "iter_tflops": 16.150289518632164, "iter_time": 1.205604232788086, "loss": 0.008412324823439121, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 22.05429764933759, "step_time": 0.8828600082397461} +{"epoch": 0, "iter": 14239, "iter_tflops": 43.873033460243136, "iter_time": 0.4438001174926758, "loss": 0.0076414053328335285, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 48.48209938935731, "step_time": 0.4016092052459717} +{"epoch": 0, "iter": 14240, "iter_tflops": 45.69841057348277, "iter_time": 0.42607296752929685, "loss": 0.019609469920396805, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 50.5288427031493, "step_time": 0.3853414478302002} +{"epoch": 0, "iter": 14241, "iter_tflops": 16.54339069736526, "iter_time": 1.2470897827148437, "loss": 0.67721027135849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.656803182060205, "step_time": 1.1684501037597657} +{"epoch": 0, "iter": 14242, "iter_tflops": 17.803198015691912, "iter_time": 1.1588419952392577, "loss": 0.6620005369186401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.938975010967738, "step_time": 0.7953704223632813} +{"epoch": 0, "iter": 14243, "iter_tflops": 36.37707833145814, "iter_time": 0.5671454238891602, "loss": 0.649468719959259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.73642495967585, "step_time": 0.5191985321044922} +{"epoch": 0, "iter": 14244, "iter_tflops": 41.117724435399005, "iter_time": 0.5017566947937012, "loss": 0.6493955254554749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.74847331271319, "step_time": 0.4610457515716552} +{"epoch": 0, "iter": 14245, "iter_tflops": 27.68591197874637, "iter_time": 0.7451838150024414, "loss": 0.031854212284088135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.98725254558393, "step_time": 0.6879954566955566} +{"epoch": 0, "iter": 14246, "iter_tflops": 11.437355984314669, "iter_time": 1.8038341674804688, "loss": 0.08242341876029968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.024541104102022, "step_time": 1.5840169219970703} +{"epoch": 0, "iter": 14247, "iter_tflops": 16.7318727267372, "iter_time": 1.2330415039062501, "loss": 0.06729567795991898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.77871214254956, "step_time": 1.09864262008667} +{"epoch": 0, "iter": 14248, "iter_tflops": 23.97345904241098, "iter_time": 0.8605805892944336, "loss": 0.03622414916753769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.853817472137358, "step_time": 0.7150212802886963} +{"epoch": 0, "iter": 14249, "iter_tflops": 14.605187382510811, "iter_time": 1.0403014373779296, "loss": 0.33183878660202026, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.580099304387709, "step_time": 0.9752054290771485} +{"epoch": 0, "iter": 14250, "iter_tflops": 14.807698043287822, "iter_time": 1.0260742340087892, "loss": 0.43732282519340515, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 17.50190409247558, "step_time": 0.8681225395202637} +{"epoch": 0, "iter": 14251, "iter_tflops": 22.432467913476238, "iter_time": 0.6773127899169922, "loss": 0.43985942006111145, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 24.078844173958757, "step_time": 0.6310019416809082} +{"epoch": 0, "iter": 14252, "iter_tflops": 22.038420251131864, "iter_time": 0.6894231643676758, "loss": 0.34207603335380554, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.639321393063288, "step_time": 0.6427340774536133} +{"epoch": 0, "iter": 14253, "iter_tflops": 22.891283623845915, "iter_time": 0.9012641601562501, "loss": 0.4009796380996704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.539423022104568, "step_time": 0.840732624053955} +{"epoch": 0, "iter": 14254, "iter_tflops": 9.069212545832372, "iter_time": 2.2748494873046874, "loss": 0.5529444217681885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.163498691564275, "step_time": 2.029920417785645} +{"epoch": 0, "iter": 14255, "iter_tflops": 15.58947625305541, "iter_time": 1.3233987579345703, "loss": 0.4320540130138397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.639838880743326, "step_time": 1.1068278884887697} +{"epoch": 0, "iter": 14256, "iter_tflops": 41.502620855060144, "iter_time": 0.49710338973999024, "loss": 0.4488195776939392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37451254728967, "step_time": 0.4546846313476563} +{"epoch": 0, "iter": 14257, "iter_tflops": 20.234240567534307, "iter_time": 0.7266679763793944, "loss": 0.30889853835105896, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.43346977594667, "step_time": 0.6860100021362304} +{"epoch": 0, "iter": 14258, "iter_tflops": 12.87637196121851, "iter_time": 1.1419035339355468, "loss": 0.3035651743412018, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 16.362457117364734, "step_time": 0.8986165428161621} +{"epoch": 0, "iter": 14259, "iter_tflops": 25.482296980956892, "iter_time": 0.5770113525390624, "loss": 0.33295968174934387, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.255407717257903, "step_time": 0.5394736633300782} +{"epoch": 0, "iter": 14260, "iter_tflops": 25.62045700693318, "iter_time": 0.5738997802734375, "loss": 0.2792406678199768, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.118312377264218, "step_time": 0.5422009468078614} +{"epoch": 0, "iter": 14261, "iter_tflops": 42.18957584001565, "iter_time": 0.4890092658996582, "loss": 0.10282571613788605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.301312225493255, "step_time": 0.4455833435058594} +{"epoch": 0, "iter": 14262, "iter_tflops": 34.7102047502286, "iter_time": 0.5943812103271484, "loss": 0.13680599629878998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.91507592938238, "step_time": 0.5301568355560303} +{"epoch": 0, "iter": 14263, "iter_tflops": 43.85470285481896, "iter_time": 0.47044198608398435, "loss": 0.09563286602497101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.291568173325544, "step_time": 0.42721937370300295} +{"epoch": 0, "iter": 14264, "iter_tflops": 40.003654683946635, "iter_time": 0.5157302169799804, "loss": 0.06095295399427414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13854127427934, "step_time": 0.46741674995422366} +{"epoch": 0, "iter": 14265, "iter_tflops": 17.50701419057, "iter_time": 1.0061077423095703, "loss": 0.05599874630570412, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 18.8301806612229, "step_time": 0.9354101715087889} +{"epoch": 0, "iter": 14266, "iter_tflops": 16.58478852233661, "iter_time": 1.0620540924072264, "loss": 0.08768294006586075, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 20.18165501967975, "step_time": 0.8727699737548829} +{"epoch": 0, "iter": 14267, "iter_tflops": 33.43928050565703, "iter_time": 0.5267440643310548, "loss": 0.07360709458589554, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 36.931920892588515, "step_time": 0.47693004035949704} +{"epoch": 0, "iter": 14268, "iter_tflops": 37.953490439501735, "iter_time": 0.46409282302856447, "loss": 0.027810780331492424, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 41.77514039067471, "step_time": 0.4216369438171387} +{"epoch": 0, "iter": 14269, "iter_tflops": 17.842948252241907, "iter_time": 1.1562603454589844, "loss": 0.05322714149951935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.928365894091403, "step_time": 1.089956398010254} +{"epoch": 0, "iter": 14270, "iter_tflops": 14.702491973783623, "iter_time": 1.4032378692626952, "loss": 0.05036328732967377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.772659838900637, "step_time": 1.0989968223571778} +{"epoch": 0, "iter": 14271, "iter_tflops": 41.280277061136665, "iter_time": 0.4997808876037598, "loss": 0.08039724826812744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.442300744479766, "step_time": 0.45400635910034176} +{"epoch": 0, "iter": 14272, "iter_tflops": 44.7181243938184, "iter_time": 0.46135865020751954, "loss": 0.07651323825120926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91715396622879, "step_time": 0.421755802154541} +{"epoch": 0, "iter": 14273, "iter_tflops": 18.495671996985244, "iter_time": 1.1154552001953124, "loss": 0.13838458061218262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.765075808078077, "step_time": 1.0438155517578125} +{"epoch": 0, "iter": 14274, "iter_tflops": 18.640415153393175, "iter_time": 1.106793670654297, "loss": 0.14928802847862244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.09592467145585, "step_time": 0.8932785243988037} +{"epoch": 0, "iter": 14275, "iter_tflops": 51.4395340803123, "iter_time": 0.4010746574401856, "loss": 0.0936078280210495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18981710885957, "step_time": 0.3671678352355957} +{"epoch": 0, "iter": 14276, "iter_tflops": 44.84059789606955, "iter_time": 0.46009853744506835, "loss": 0.1319880485534668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.60895240074826, "step_time": 0.4244299144744873} +{"epoch": 0, "iter": 14277, "iter_tflops": 28.010421715777294, "iter_time": 0.7365506210327148, "loss": 0.2552770972251892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.62332189387649, "step_time": 0.6964476699829101} +{"epoch": 0, "iter": 14278, "iter_tflops": 23.911276125639905, "iter_time": 0.862818588256836, "loss": 0.22532537579536438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.535915797538735, "step_time": 0.6985086784362794} +{"epoch": 0, "iter": 14279, "iter_tflops": 49.51317278539619, "iter_time": 0.4166788825988769, "loss": 0.16590136289596558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.715014286215045, "step_time": 0.38408429718017584} +{"epoch": 0, "iter": 14280, "iter_tflops": 49.26766409360429, "iter_time": 0.41875526046752937, "loss": 0.16741803288459778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26147989340975, "step_time": 0.3873548679351806} +{"epoch": 0, "iter": 14281, "iter_tflops": 35.51847752257706, "iter_time": 0.5808552322387694, "loss": 0.5765808820724487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.9596598343302, "step_time": 0.5435004844665527} +{"epoch": 0, "iter": 14282, "iter_tflops": 13.733651304374476, "iter_time": 1.5022293090820313, "loss": 0.5209129452705383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.156672482418315, "step_time": 1.2769395141601563} +{"epoch": 0, "iter": 14283, "iter_tflops": 37.746645876356666, "iter_time": 0.5465675964355469, "loss": 0.8391734957695007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.250125384601155, "step_time": 0.4883084564208985} +{"epoch": 0, "iter": 14284, "iter_tflops": 45.89918947639508, "iter_time": 0.4494870986938476, "loss": 0.6510781645774841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.61510826725005, "step_time": 0.4158228054046631} +{"epoch": 0, "iter": 14285, "iter_tflops": 31.35427708476454, "iter_time": 0.6579993362426758, "loss": 0.2762604355812073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.46666539888907, "step_time": 0.6164669609069824} +{"epoch": 0, "iter": 14286, "iter_tflops": 9.00479038911184, "iter_time": 2.291124237060547, "loss": 0.27084484696388245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.502638872262352, "step_time": 1.964372360229492} +{"epoch": 0, "iter": 14287, "iter_tflops": 11.04706820115055, "iter_time": 1.8675627899169922, "loss": 0.43040603399276733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.518723054638029, "step_time": 1.6480190048217775} +{"epoch": 0, "iter": 14288, "iter_tflops": 34.25024828784261, "iter_time": 0.6023633270263672, "loss": 0.3078101873397827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39942495542732, "step_time": 0.510677900314331} +{"epoch": 0, "iter": 14289, "iter_tflops": 15.685383682493375, "iter_time": 0.9530274429321288, "loss": 0.30201345682144165, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.01606264512069, "step_time": 0.8784994163513185} +{"epoch": 0, "iter": 14290, "iter_tflops": 24.986994674039355, "iter_time": 0.5982552642822264, "loss": 0.2121087908744812, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.66326337995107, "step_time": 0.5606440925598145} +{"epoch": 0, "iter": 14291, "iter_tflops": 27.131095224111093, "iter_time": 0.550976692199707, "loss": 0.33378368616104126, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.99058208793632, "step_time": 0.5156364593505859} +{"epoch": 0, "iter": 14292, "iter_tflops": 26.710082791604382, "iter_time": 0.5596613540649414, "loss": 0.3882484436035156, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.309438434170396, "step_time": 0.5280430107116699} +{"epoch": 0, "iter": 14293, "iter_tflops": 31.88277073128718, "iter_time": 0.6470922393798829, "loss": 0.04634593054652214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04283967647951, "step_time": 0.6060332717895508} +{"epoch": 0, "iter": 14294, "iter_tflops": 9.370293777791463, "iter_time": 2.2017552490234373, "loss": 0.0638340637087822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.20319972681041, "step_time": 1.8415358123779297} +{"epoch": 0, "iter": 14295, "iter_tflops": 10.885563010071193, "iter_time": 1.8952711486816407, "loss": 0.07214813679456711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.603557802427556, "step_time": 1.6369261627197267} +{"epoch": 0, "iter": 14296, "iter_tflops": 26.690166274040532, "iter_time": 0.7729848251342774, "loss": 0.07486363500356674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.72662315391368, "step_time": 0.6117153625488282} +{"epoch": 0, "iter": 14297, "iter_tflops": 21.82419536084227, "iter_time": 0.7093079147338867, "loss": 0.18376930058002472, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 23.12266036506589, "step_time": 0.6694763603210448} +{"epoch": 0, "iter": 14298, "iter_tflops": 26.01976886703996, "iter_time": 0.5949351272583009, "loss": 0.4193113148212433, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.889204004246118, "step_time": 0.5358428878784179} +{"epoch": 0, "iter": 14299, "iter_tflops": 26.390920829097396, "iter_time": 0.5865681838989258, "loss": 0.3540883958339691, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.127650869764548, "step_time": 0.5503507766723633} +{"epoch": 0, "iter": 14300, "iter_tflops": 28.075311761489843, "iter_time": 0.5513767623901367, "loss": 0.5111654996871948, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.826029216422526, "step_time": 0.51901224899292} +{"epoch": 0, "iter": 14301, "iter_tflops": 21.031532790525528, "iter_time": 0.9809600524902344, "loss": 0.06493712216615677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.010808402888337, "step_time": 0.9373164825439452} +{"epoch": 0, "iter": 14302, "iter_tflops": 13.77086407183404, "iter_time": 1.4981698608398437, "loss": 0.0323965959250927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.132808446070104, "step_time": 1.2041863174438476} +{"epoch": 0, "iter": 14303, "iter_tflops": 52.98078353513429, "iter_time": 0.3894071044921875, "loss": 0.061160292476415634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.0327843899296, "step_time": 0.35550755882263185} +{"epoch": 0, "iter": 14304, "iter_tflops": 55.82389512382419, "iter_time": 0.3695745964050293, "loss": 0.03344211354851723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.27815092613115, "step_time": 0.3366794395446777} +{"epoch": 0, "iter": 14305, "iter_tflops": 31.08958625582452, "iter_time": 0.6636014175415039, "loss": 0.590643048286438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.09860757973849, "step_time": 0.6233220977783203} +{"epoch": 0, "iter": 14306, "iter_tflops": 10.92997165254115, "iter_time": 1.8875706329345703, "loss": 0.5989658832550049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.558256866148376, "step_time": 1.642830986022949} +{"epoch": 0, "iter": 14307, "iter_tflops": 37.57508324235869, "iter_time": 0.5490631484985352, "loss": 0.7376694083213806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03159513429187, "step_time": 0.5028099308013916} +{"epoch": 0, "iter": 14308, "iter_tflops": 37.95569870616554, "iter_time": 0.5435572052001952, "loss": 0.7390717267990112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.06246770115988, "step_time": 0.5024318962097167} +{"epoch": 0, "iter": 14309, "iter_tflops": 17.225229182503917, "iter_time": 1.197725341796875, "loss": 0.6105673313140869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.303302158255693, "step_time": 1.1271787643432618} +{"epoch": 0, "iter": 14310, "iter_tflops": 14.523757671582947, "iter_time": 1.4205065917968749, "loss": 0.5607779026031494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.69697637825168, "step_time": 1.1034454498291015} +{"epoch": 0, "iter": 14311, "iter_tflops": 41.642140095936156, "iter_time": 0.49543787765502934, "loss": 0.39882519841194153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.279387458107855, "step_time": 0.45563985443115235} +{"epoch": 0, "iter": 14312, "iter_tflops": 47.400894316172156, "iter_time": 0.4352469253540039, "loss": 0.6014282703399658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.081108906204385, "step_time": 0.4038889122009277} +{"epoch": 0, "iter": 14313, "iter_tflops": 24.68618874698649, "iter_time": 0.8357342529296874, "loss": 0.5755777955055237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.910672817869877, "step_time": 0.7962392044067382} +{"epoch": 0, "iter": 14314, "iter_tflops": 12.005497593929562, "iter_time": 1.718470504760742, "loss": 0.8444244861602783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.896186290609734, "step_time": 1.384991641998291} +{"epoch": 0, "iter": 14315, "iter_tflops": 41.15952999247247, "iter_time": 0.5012470626831055, "loss": 0.9275941848754883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14832372924076, "step_time": 0.4673131790161133} +{"epoch": 0, "iter": 14316, "iter_tflops": 42.220334990760435, "iter_time": 0.488653003692627, "loss": 0.713785707950592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37250682511361, "step_time": 0.4547047309875488} +{"epoch": 0, "iter": 14317, "iter_tflops": 24.106328446176825, "iter_time": 0.5947132720947266, "loss": 0.020681416615843773, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 25.89438209830782, "step_time": 0.5536472511291504} +{"epoch": 0, "iter": 14318, "iter_tflops": 6.458397596640857, "iter_time": 2.219800384521484, "loss": 0.00738232908770442, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 7.689496237466767, "step_time": 1.8644073715209961} +{"epoch": 0, "iter": 14319, "iter_tflops": 10.121117897197445, "iter_time": 1.4164792480468749, "loss": 0.007949408143758774, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 11.750028876868004, "step_time": 1.220112190246582} +{"epoch": 0, "iter": 14320, "iter_tflops": 17.88123557035677, "iter_time": 0.8017540740966798, "loss": 0.008827133104205132, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 25.449626027818148, "step_time": 0.5633227558135986} +{"epoch": 0, "iter": 14321, "iter_tflops": 12.524698805599552, "iter_time": 1.2980973052978515, "loss": 0.40397122502326965, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.258657198609816, "step_time": 1.226238639831543} +{"epoch": 0, "iter": 14322, "iter_tflops": 13.95989271128048, "iter_time": 1.1646420288085937, "loss": 0.3503311276435852, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 17.439011343487152, "step_time": 0.9322935485839845} +{"epoch": 0, "iter": 14323, "iter_tflops": 26.944961192350295, "iter_time": 0.6033884277343751, "loss": 0.3165973424911499, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 28.592048238129898, "step_time": 0.5686293487548828} +{"epoch": 0, "iter": 14324, "iter_tflops": 28.970558867782078, "iter_time": 0.5612000045776367, "loss": 0.32048773765563965, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 30.809052334296453, "step_time": 0.5277110633850097} +{"epoch": 0, "iter": 14325, "iter_tflops": 27.003267898304866, "iter_time": 0.764022102355957, "loss": 0.6680038571357727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.65116492160616, "step_time": 0.7200786972045898} +{"epoch": 0, "iter": 14326, "iter_tflops": 20.43487678554066, "iter_time": 1.00960205078125, "loss": 0.6236410140991211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.71107310225077, "step_time": 0.8348926582336426} +{"epoch": 0, "iter": 14327, "iter_tflops": 36.29078329745462, "iter_time": 0.568494026184082, "loss": 0.4942220449447632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.387154487201485, "step_time": 0.5238025894165039} +{"epoch": 0, "iter": 14328, "iter_tflops": 42.350003626501085, "iter_time": 0.48715682983398434, "loss": 0.6071872711181641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9541457230584, "step_time": 0.44894956016540527} +{"epoch": 0, "iter": 14329, "iter_tflops": 31.65882444806647, "iter_time": 0.6516696014404297, "loss": 0.4759523868560791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.097709892808304, "step_time": 0.5878187942504882} +{"epoch": 0, "iter": 14330, "iter_tflops": 38.81972242227795, "iter_time": 0.5314590682983398, "loss": 0.4596042037010193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64057318531529, "step_time": 0.4838371524810791} +{"epoch": 0, "iter": 14331, "iter_tflops": 36.82544467875765, "iter_time": 0.5602401733398437, "loss": 0.5969531536102295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30971920996448, "step_time": 0.5118143692016601} +{"epoch": 0, "iter": 14332, "iter_tflops": 37.96274613916814, "iter_time": 0.543456298828125, "loss": 0.6222997307777405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34114686975753, "step_time": 0.49904502105712895} +{"epoch": 0, "iter": 14333, "iter_tflops": 19.48030271249022, "iter_time": 1.0590745849609375, "loss": 0.1764308214187622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95343838501363, "step_time": 0.9846161346435547} +{"epoch": 0, "iter": 14334, "iter_tflops": 17.870687236834023, "iter_time": 1.154465591430664, "loss": 0.16938133537769318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.933736684718493, "step_time": 0.795531078338623} +{"epoch": 0, "iter": 14335, "iter_tflops": 49.67466087665553, "iter_time": 0.4153242950439453, "loss": 0.008401203900575638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.11969157902009, "step_time": 0.3742962436676026} +{"epoch": 0, "iter": 14336, "iter_tflops": 48.33865145385552, "iter_time": 0.42680324935913083, "loss": 0.009758648462593555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53585795777925, "step_time": 0.3853696250915527} +{"epoch": 0, "iter": 14337, "iter_tflops": 20.71099593580969, "iter_time": 0.9961420288085938, "loss": 0.12303349375724792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.284743849525192, "step_time": 0.9257945098876954} +{"epoch": 0, "iter": 14338, "iter_tflops": 13.839853834752363, "iter_time": 1.490701690673828, "loss": 0.08952793478965759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.393760965251772, "step_time": 1.1861203308105468} +{"epoch": 0, "iter": 14339, "iter_tflops": 45.61917228500997, "iter_time": 0.4522461166381836, "loss": 0.09975866973400116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38737093310925, "step_time": 0.4177402667999268} +{"epoch": 0, "iter": 14340, "iter_tflops": 53.630392744836506, "iter_time": 0.38469033050537105, "loss": 0.03581696003675461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.30423949979483, "step_time": 0.353852373123169} +{"epoch": 0, "iter": 14341, "iter_tflops": 24.739553836085022, "iter_time": 0.8339315109252929, "loss": 0.2544763386249542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.089469187560297, "step_time": 0.7907824172973633} +{"epoch": 0, "iter": 14342, "iter_tflops": 18.431430925242765, "iter_time": 1.119343017578125, "loss": 0.2014656364917755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.951013049249276, "step_time": 0.8268639621734619} +{"epoch": 0, "iter": 14343, "iter_tflops": 49.45892954608678, "iter_time": 0.41713586807250974, "loss": 0.18162809312343597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.507632036600064, "step_time": 0.38557291221618656} +{"epoch": 0, "iter": 14344, "iter_tflops": 52.377984902633465, "iter_time": 0.3938886451721192, "loss": 0.22672909498214722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.890799586328754, "step_time": 0.3626437606811524} +{"epoch": 0, "iter": 14345, "iter_tflops": 25.59977383171245, "iter_time": 0.8059092102050781, "loss": 0.10452594608068466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.892587696704727, "step_time": 0.7671665420532228} +{"epoch": 0, "iter": 14346, "iter_tflops": 12.66652167486405, "iter_time": 1.6287891845703129, "loss": 0.10758930444717407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98573501559374, "step_time": 1.2146129379272461} +{"epoch": 0, "iter": 14347, "iter_tflops": 48.003494260454744, "iter_time": 0.42978316116333004, "loss": 0.09208523482084274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.262989722954686, "step_time": 0.3947553253173828} +{"epoch": 0, "iter": 14348, "iter_tflops": 49.070830368303426, "iter_time": 0.42043497848510747, "loss": 0.10434604436159134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5705549877321, "step_time": 0.3851200256347656} +{"epoch": 0, "iter": 14349, "iter_tflops": 41.038601286241764, "iter_time": 0.5027240905761718, "loss": 0.055741701275110245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.606153149465634, "step_time": 0.4625167617797851} +{"epoch": 0, "iter": 14350, "iter_tflops": 11.528564958831009, "iter_time": 1.7895630187988283, "loss": 0.07924249023199081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.946698200592095, "step_time": 1.4792815628051759} +{"epoch": 0, "iter": 14351, "iter_tflops": 10.330041535949123, "iter_time": 1.9971936645507813, "loss": 0.08478907495737076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.459129926277944, "step_time": 1.8004066314697265} +{"epoch": 0, "iter": 14352, "iter_tflops": 25.512685623016864, "iter_time": 0.8086602020263671, "loss": 0.10944828391075134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.63634359537389, "step_time": 0.5481694431304932} +{"epoch": 0, "iter": 14353, "iter_tflops": 22.538379642919082, "iter_time": 0.6977271270751954, "loss": 0.23289385437965393, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.852119528947508, "step_time": 0.6592973365783692} +{"epoch": 0, "iter": 14354, "iter_tflops": 7.558253625444794, "iter_time": 2.080591583251953, "loss": 0.41795283555984497, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 8.782860194757932, "step_time": 1.7904917678833006} +{"epoch": 0, "iter": 14355, "iter_tflops": 24.146441323611544, "iter_time": 0.6512611389160157, "loss": 0.3454958200454712, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.059658445537714, "step_time": 0.6034476203918457} +{"epoch": 0, "iter": 14356, "iter_tflops": 23.597357644270204, "iter_time": 0.6664152450561523, "loss": 0.36942073702812195, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.39512780294409, "step_time": 0.6192384223937989} +{"epoch": 0, "iter": 14357, "iter_tflops": 14.89519120536401, "iter_time": 1.3850841674804686, "loss": 0.3736197054386139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.732533401289517, "step_time": 1.3113649902343751} +{"epoch": 0, "iter": 14358, "iter_tflops": 18.35795582201026, "iter_time": 1.1238230285644533, "loss": 0.34998229146003723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.890258720835778, "step_time": 0.9013045139312745} +{"epoch": 0, "iter": 14359, "iter_tflops": 36.26662441568891, "iter_time": 0.5688727264404297, "loss": 0.5393399596214294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.707961098842695, "step_time": 0.5195707092285157} +{"epoch": 0, "iter": 14360, "iter_tflops": 34.22084590538066, "iter_time": 0.6028808746337889, "loss": 0.3498304784297943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.44142839530648, "step_time": 0.5510231418609619} +{"epoch": 0, "iter": 14361, "iter_tflops": 21.61763598003418, "iter_time": 0.9543639984130858, "loss": 0.2937180697917938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.083358716381028, "step_time": 0.8937648010253907} +{"epoch": 0, "iter": 14362, "iter_tflops": 16.24488818304557, "iter_time": 1.2700052642822264, "loss": 0.37215861678123474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.514721239953786, "step_time": 1.057206672668457} +{"epoch": 0, "iter": 14363, "iter_tflops": 48.943917111129494, "iter_time": 0.4215251808166504, "loss": 0.31009984016418457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16813004919583, "step_time": 0.3880349655151367} +{"epoch": 0, "iter": 14364, "iter_tflops": 49.028473919050555, "iter_time": 0.42079819869995116, "loss": 0.35318711400032043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80864865387338, "step_time": 0.3906764144897461} +{"epoch": 0, "iter": 14365, "iter_tflops": 13.910896547265802, "iter_time": 0.6956939392089844, "loss": 0.01314616110175848, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 14.730558996599141, "step_time": 0.6569829711914064} +{"epoch": 0, "iter": 14366, "iter_tflops": 6.962432696641151, "iter_time": 1.3899920959472656, "loss": 0.00371249346062541, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 8.4783416612308, "step_time": 1.1414645462036135} +{"epoch": 0, "iter": 14367, "iter_tflops": 19.69775237720011, "iter_time": 0.49131120300292974, "loss": 0.013548437505960464, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 21.887368423897378, "step_time": 0.4421603469848633} +{"epoch": 0, "iter": 14368, "iter_tflops": 21.137943213600632, "iter_time": 0.4578367118835449, "loss": 0.0022268688771873713, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 23.56273884214093, "step_time": 0.41072162628173825} +{"epoch": 0, "iter": 14369, "iter_tflops": 34.740289291538495, "iter_time": 0.5938664855957032, "loss": 0.1556454747915268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.389751999422735, "step_time": 0.5374114818572998} +{"epoch": 0, "iter": 14370, "iter_tflops": 35.833316540476645, "iter_time": 0.5757517166137696, "loss": 0.192874014377594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06699565031676, "step_time": 0.5149149112701417} +{"epoch": 0, "iter": 14371, "iter_tflops": 44.69742480439013, "iter_time": 0.4615723075866699, "loss": 0.12799261510372162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82088902237489, "step_time": 0.4225874195098877} +{"epoch": 0, "iter": 14372, "iter_tflops": 43.5194605435932, "iter_time": 0.47406592941284187, "loss": 0.1379249393939972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.360144351721885, "step_time": 0.43562142372131346} +{"epoch": 0, "iter": 14373, "iter_tflops": 25.343432691175206, "iter_time": 0.8140607376098632, "loss": 0.6154264211654663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.449952137580137, "step_time": 0.7515894165039062} +{"epoch": 0, "iter": 14374, "iter_tflops": 18.29388104615914, "iter_time": 1.1277592468261717, "loss": 0.5959091782569885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.170687551171028, "step_time": 0.9305572261810302} +{"epoch": 0, "iter": 14375, "iter_tflops": 44.06511559601135, "iter_time": 0.46819560623168943, "loss": 0.49683642387390137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.000248020363934, "step_time": 0.4298122272491455} +{"epoch": 0, "iter": 14376, "iter_tflops": 44.798041787105966, "iter_time": 0.4605356101989746, "loss": 0.481082946062088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.18595773972591, "step_time": 0.42815572166442867} +{"epoch": 0, "iter": 14377, "iter_tflops": 31.713803665166353, "iter_time": 0.6505398635864258, "loss": 0.32447943091392517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.84474983290882, "step_time": 0.6095803222656249} +{"epoch": 0, "iter": 14378, "iter_tflops": 19.918153043267274, "iter_time": 1.035793502807617, "loss": 0.26954737305641174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.486820366815945, "step_time": 0.7789192218780517} +{"epoch": 0, "iter": 14379, "iter_tflops": 45.01808164941092, "iter_time": 0.4582845993041993, "loss": 0.19666217267513275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76059541598855, "step_time": 0.4231099586486816} +{"epoch": 0, "iter": 14380, "iter_tflops": 47.57039391307784, "iter_time": 0.4336960830688476, "loss": 0.21747849881649017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.436052070746115, "step_time": 0.4011018085479736} +{"epoch": 0, "iter": 14381, "iter_tflops": 41.8005538010553, "iter_time": 0.4935602912902832, "loss": 0.538500964641571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.35341243246049, "step_time": 0.454896167755127} +{"epoch": 0, "iter": 14382, "iter_tflops": 44.410868901965145, "iter_time": 0.46455054855346684, "loss": 0.5881876349449158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.953969737883725, "step_time": 0.4302270202636719} +{"epoch": 0, "iter": 14383, "iter_tflops": 44.73519955507624, "iter_time": 0.4611825523376465, "loss": 0.543639063835144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89171596869804, "step_time": 0.4307862663269043} +{"epoch": 0, "iter": 14384, "iter_tflops": 46.56843236075707, "iter_time": 0.4430274429321289, "loss": 0.7841493487358093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.07117412770198, "step_time": 0.41203534507751466} +{"epoch": 0, "iter": 14385, "iter_tflops": 34.197092539585746, "iter_time": 0.6032996368408203, "loss": 0.35681408643722534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.57899828680054, "step_time": 0.564014720916748} +{"epoch": 0, "iter": 14386, "iter_tflops": 9.010557463009418, "iter_time": 2.2896578369140625, "loss": 0.342827171087265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.864809051521247, "step_time": 1.7388474960327147} +{"epoch": 0, "iter": 14387, "iter_tflops": 10.384665405829244, "iter_time": 1.9866883239746094, "loss": 0.32738128304481506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.765734732763153, "step_time": 1.6161305198669433} +{"epoch": 0, "iter": 14388, "iter_tflops": 33.72744216549695, "iter_time": 0.6117005081176757, "loss": 0.24624668061733246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04396037089899, "step_time": 0.46842049026489263} +{"epoch": 0, "iter": 14389, "iter_tflops": 20.544413534643546, "iter_time": 0.8053485488891601, "loss": 0.26856541633605957, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 21.629468866878668, "step_time": 0.76494775390625} +{"epoch": 0, "iter": 14390, "iter_tflops": 9.577561266792232, "iter_time": 1.7275184326171875, "loss": 0.2689226567745209, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 11.586644364029915, "step_time": 1.4279728546142578} +{"epoch": 0, "iter": 14391, "iter_tflops": 29.224372259926056, "iter_time": 0.5661512069702148, "loss": 0.2822999060153961, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.32390063333855, "step_time": 0.5282041282653809} +{"epoch": 0, "iter": 14392, "iter_tflops": 30.793956435083896, "iter_time": 0.5372941818237305, "loss": 0.32799792289733887, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 32.75580498489514, "step_time": 0.5051139373779296} +{"epoch": 0, "iter": 14393, "iter_tflops": 21.57271420938018, "iter_time": 0.9563513107299805, "loss": 0.773648738861084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67928132603541, "step_time": 0.9096890335083008} +{"epoch": 0, "iter": 14394, "iter_tflops": 15.776331448096101, "iter_time": 1.307724395751953, "loss": 0.731934130191803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.998323309379778, "step_time": 1.146278636932373} +{"epoch": 0, "iter": 14395, "iter_tflops": 36.67410564695592, "iter_time": 0.5625520553588867, "loss": 0.6958043575286865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06765596500148, "step_time": 0.5149064254760741} +{"epoch": 0, "iter": 14396, "iter_tflops": 36.41221019031232, "iter_time": 0.5665982208251953, "loss": 0.6684834957122803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4394859073236, "step_time": 0.5231075668334961} +{"epoch": 0, "iter": 14397, "iter_tflops": 22.94774809550465, "iter_time": 0.8990465393066406, "loss": 0.31952017545700073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.555094214602168, "step_time": 0.8401960639953613} +{"epoch": 0, "iter": 14398, "iter_tflops": 16.185896082072205, "iter_time": 1.2746340026855472, "loss": 0.3188810646533966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.967726445331746, "step_time": 0.9839451866149902} +{"epoch": 0, "iter": 14399, "iter_tflops": 50.0451562289145, "iter_time": 0.41224955749511716, "loss": 0.27212271094322205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.30668815219495, "step_time": 0.3798996810913086} +{"epoch": 0, "iter": 14400, "iter_tflops": 48.07505057360482, "iter_time": 0.42914345932006837, "loss": 0.34616029262542725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.957072441726474, "step_time": 0.39707959938049314} +{"epoch": 0, "iter": 14401, "iter_tflops": 23.815705514607206, "iter_time": 0.8662810134887696, "loss": 0.14882245659828186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.022977995864213, "step_time": 0.8244859390258789} +{"epoch": 0, "iter": 14402, "iter_tflops": 25.21806789198515, "iter_time": 0.8181076202392579, "loss": 0.11748070269823074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.740985569683883, "step_time": 0.6499827632904053} +{"epoch": 0, "iter": 14403, "iter_tflops": 45.41075151083016, "iter_time": 0.4543217811584473, "loss": 0.06336133182048798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85037155354346, "step_time": 0.4223323764801025} +{"epoch": 0, "iter": 14404, "iter_tflops": 45.51429640771717, "iter_time": 0.453288200378418, "loss": 0.1075066477060318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35410940126549, "step_time": 0.4180217971801758} +{"epoch": 0, "iter": 14405, "iter_tflops": 36.6984256601939, "iter_time": 0.5621792526245117, "loss": 0.13250015676021576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71197800416822, "step_time": 0.5195181541442871} +{"epoch": 0, "iter": 14406, "iter_tflops": 35.12449778340401, "iter_time": 0.587370491027832, "loss": 0.19620630145072937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.89415139625237, "step_time": 0.5304420528411865} +{"epoch": 0, "iter": 14407, "iter_tflops": 49.65047289413278, "iter_time": 0.415526626586914, "loss": 0.17182864248752594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.03879684107204, "step_time": 0.3817829914093017} +{"epoch": 0, "iter": 14408, "iter_tflops": 48.4407471074133, "iter_time": 0.4259037017822266, "loss": 0.16512912511825562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.69200267804629, "step_time": 0.39154126739501954} +{"epoch": 0, "iter": 14409, "iter_tflops": 24.07336923948043, "iter_time": 0.7882048263549805, "loss": 0.028431043028831482, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 25.405429440638578, "step_time": 0.746877586364746} +{"epoch": 0, "iter": 14410, "iter_tflops": 13.993324660229668, "iter_time": 1.355985534667969, "loss": 0.024443259462714195, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 16.346977108239397, "step_time": 1.1607495193481445} +{"epoch": 0, "iter": 14411, "iter_tflops": 47.191852402923104, "iter_time": 0.4020767326354981, "loss": 0.030009962618350983, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 51.99736032823954, "step_time": 0.3649174823760986} +{"epoch": 0, "iter": 14412, "iter_tflops": 51.156836685707546, "iter_time": 0.3709131965637207, "loss": 0.10279726982116699, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 55.78037599918754, "step_time": 0.34016884040832523} +{"epoch": 0, "iter": 14413, "iter_tflops": 42.22345583095238, "iter_time": 0.488616886138916, "loss": 0.5117378830909729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64925675227882, "step_time": 0.45194807052612307} +{"epoch": 0, "iter": 14414, "iter_tflops": 47.15808163499744, "iter_time": 0.4374879722595215, "loss": 0.6036422848701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.03591594331715, "step_time": 0.40424656105041507} +{"epoch": 0, "iter": 14415, "iter_tflops": 44.62769293403368, "iter_time": 0.4622935256958008, "loss": 0.6176550388336182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.18136812644044, "step_time": 0.42819650650024416} +{"epoch": 0, "iter": 14416, "iter_tflops": 43.56133600224713, "iter_time": 0.4736102104187012, "loss": 0.41941383481025696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.74004126194086, "step_time": 0.4414008407592773} +{"epoch": 0, "iter": 14417, "iter_tflops": 22.818448978107764, "iter_time": 0.9041409225463868, "loss": 0.16220808029174805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.876406518918312, "step_time": 0.8640786666870117} +{"epoch": 0, "iter": 14418, "iter_tflops": 14.486379442985303, "iter_time": 1.4241718292236327, "loss": 0.20940975844860077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.461717115884383, "step_time": 1.1815042800903321} +{"epoch": 0, "iter": 14419, "iter_tflops": 40.17126406382049, "iter_time": 0.5135783996582031, "loss": 0.16339726746082306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.075908090720816, "step_time": 0.4680809631347656} +{"epoch": 0, "iter": 14420, "iter_tflops": 39.368058162799315, "iter_time": 0.5240566711425781, "loss": 0.1434156447649002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1182139133792, "step_time": 0.47847746086120607} +{"epoch": 0, "iter": 14421, "iter_tflops": 22.154484234684016, "iter_time": 0.9312378158569334, "loss": 0.36599719524383545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.78659288401439, "step_time": 0.8673412628173827} +{"epoch": 0, "iter": 14422, "iter_tflops": 22.852885404766766, "iter_time": 0.9027784957885744, "loss": 0.20651225745677948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.646522069298566, "step_time": 0.8044402065277099} +{"epoch": 0, "iter": 14423, "iter_tflops": 41.4368358605206, "iter_time": 0.4978925895690918, "loss": 0.3809894025325775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.316424203026905, "step_time": 0.45526746368408205} +{"epoch": 0, "iter": 14424, "iter_tflops": 41.18078904601682, "iter_time": 0.5009883003234863, "loss": 0.25027042627334595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.21066336855945, "step_time": 0.4563324661254883} +{"epoch": 0, "iter": 14425, "iter_tflops": 25.099281962835743, "iter_time": 0.8219794311523438, "loss": 0.05626566708087921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.930485034951836, "step_time": 0.7660869636535644} +{"epoch": 0, "iter": 14426, "iter_tflops": 8.69639774459552, "iter_time": 2.372372344970703, "loss": 0.01615786738693714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.10937616592734, "step_time": 2.0407879943847655} +{"epoch": 0, "iter": 14427, "iter_tflops": 15.837143748056391, "iter_time": 1.3027029266357422, "loss": 0.04441475868225098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.962150147992478, "step_time": 1.0880144577026367} +{"epoch": 0, "iter": 14428, "iter_tflops": 35.35432622408702, "iter_time": 0.5835521621704101, "loss": 0.03757989779114723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.497708295110236, "step_time": 0.5223364696502686} +{"epoch": 0, "iter": 14429, "iter_tflops": 16.108908389997367, "iter_time": 0.9838335647583007, "loss": 0.37877070903778076, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 16.87652156897751, "step_time": 0.9390847930908204} +{"epoch": 0, "iter": 14430, "iter_tflops": 9.468632106423609, "iter_time": 1.6737882080078124, "loss": 0.3941052556037903, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 13.129786025436639, "step_time": 1.207063446044922} +{"epoch": 0, "iter": 14431, "iter_tflops": 23.789981382859498, "iter_time": 0.6661831512451172, "loss": 0.3216489553451538, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.631622632778882, "step_time": 0.6183176536560058} +{"epoch": 0, "iter": 14432, "iter_tflops": 23.433846484951754, "iter_time": 0.6763074417114259, "loss": 0.3928167521953583, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.106955980003267, "step_time": 0.631238800048828} +{"epoch": 0, "iter": 14433, "iter_tflops": 17.06365170325822, "iter_time": 1.2090667266845703, "loss": 0.5843260884284973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.22220274999892, "step_time": 1.132195365905762} +{"epoch": 0, "iter": 14434, "iter_tflops": 19.295515604015634, "iter_time": 1.0692170104980467, "loss": 0.727095901966095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.667428363879566, "step_time": 0.8717082901000976} +{"epoch": 0, "iter": 14435, "iter_tflops": 45.564392138210856, "iter_time": 0.4527898330688477, "loss": 0.6899566054344177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13136967847673, "step_time": 0.41991692161560057} +{"epoch": 0, "iter": 14436, "iter_tflops": 46.97307460496071, "iter_time": 0.4392110519409179, "loss": 0.7344467639923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.35156819706365, "step_time": 0.4097408332824707} +{"epoch": 0, "iter": 14437, "iter_tflops": 49.397504340507034, "iter_time": 0.4176545715332032, "loss": 0.30323323607444763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27187191404923, "step_time": 0.3801433925628662} +{"epoch": 0, "iter": 14438, "iter_tflops": 41.95183788976387, "iter_time": 0.491780445098877, "loss": 0.23976321518421173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80124999268575, "step_time": 0.450448263168335} +{"epoch": 0, "iter": 14439, "iter_tflops": 48.6457410910881, "iter_time": 0.4241089363098145, "loss": 0.22724129259586334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81252063262963, "step_time": 0.3906477718353272} +{"epoch": 0, "iter": 14440, "iter_tflops": 46.431375887692035, "iter_time": 0.4443351745605469, "loss": 0.19612522423267365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.19436951087048, "step_time": 0.4110240592956543} +{"epoch": 0, "iter": 14441, "iter_tflops": 26.33033194798471, "iter_time": 0.7835485534667969, "loss": 0.10625966638326645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.792427096042644, "step_time": 0.742327880859375} +{"epoch": 0, "iter": 14442, "iter_tflops": 15.584035962072962, "iter_time": 1.3238607482910156, "loss": 0.07954887300729752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.385226108789176, "step_time": 1.122156093597412} +{"epoch": 0, "iter": 14443, "iter_tflops": 39.822716380195494, "iter_time": 0.518073486328125, "loss": 0.10042664408683777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.700351356088994, "step_time": 0.47210360717773436} +{"epoch": 0, "iter": 14444, "iter_tflops": 43.160450284901636, "iter_time": 0.47800922775268556, "loss": 0.09671211242675781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.51165649256688, "step_time": 0.43423225021362305} +{"epoch": 0, "iter": 14445, "iter_tflops": 20.812461972241834, "iter_time": 0.9912855834960937, "loss": 0.0014067236334085464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.137003939890192, "step_time": 0.9319731597900391} +{"epoch": 0, "iter": 14446, "iter_tflops": 14.587700199103926, "iter_time": 1.414280059814453, "loss": 0.0007766029448248446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.451352576973342, "step_time": 1.118134479522705} +{"epoch": 0, "iter": 14447, "iter_tflops": 46.10490800929684, "iter_time": 0.44748150253295904, "loss": 0.001563442638143897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62735815107972, "step_time": 0.40750879096984866} +{"epoch": 0, "iter": 14448, "iter_tflops": 54.07119916346517, "iter_time": 0.38155420684814456, "loss": 0.011379183270037174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.37883542477033, "step_time": 0.34744860458374016} +{"epoch": 0, "iter": 14449, "iter_tflops": 27.43051900275744, "iter_time": 0.7521218795776368, "loss": 0.07650869339704514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.064667665422192, "step_time": 0.7098341445922851} +{"epoch": 0, "iter": 14450, "iter_tflops": 16.019149061327596, "iter_time": 1.2879019622802734, "loss": 0.12473743408918381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.41392589465251, "step_time": 0.9634428367614745} +{"epoch": 0, "iter": 14451, "iter_tflops": 50.26036722451339, "iter_time": 0.4104843368530273, "loss": 0.08077574521303177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.069649901990495, "step_time": 0.3746363658905029} +{"epoch": 0, "iter": 14452, "iter_tflops": 52.44223321921991, "iter_time": 0.3934060821533203, "loss": 0.07508555054664612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.233346982140816, "step_time": 0.3604733009338379} +{"epoch": 0, "iter": 14453, "iter_tflops": 40.326702409302946, "iter_time": 0.5115988235473633, "loss": 0.6083078980445862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61099962530602, "step_time": 0.47307086944580073} +{"epoch": 0, "iter": 14454, "iter_tflops": 13.02117838062679, "iter_time": 1.5844259948730468, "loss": 0.9512271881103516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.765143238255245, "step_time": 1.3972836685180663} +{"epoch": 0, "iter": 14455, "iter_tflops": 31.883017327917038, "iter_time": 0.6470872344970703, "loss": 0.7396710515022278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.37343187972946, "step_time": 0.5239851474761963} +{"epoch": 0, "iter": 14456, "iter_tflops": 37.05030469636976, "iter_time": 0.5568400497436523, "loss": 0.6548120975494385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.293609762491165, "step_time": 0.5120189933776855} +{"epoch": 0, "iter": 14457, "iter_tflops": 16.728992154876423, "iter_time": 0.9939328536987305, "loss": 0.13640379905700684, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 17.996863859713383, "step_time": 0.9239106903076172} +{"epoch": 0, "iter": 14458, "iter_tflops": 13.430948446171946, "iter_time": 1.2379985656738282, "loss": 0.05030830577015877, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 16.320971063512644, "step_time": 1.0187809810638426} +{"epoch": 0, "iter": 14459, "iter_tflops": 33.44823304005131, "iter_time": 0.4971113090515137, "loss": 0.09287957847118378, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 36.8419014949341, "step_time": 0.4513202152252197} +{"epoch": 0, "iter": 14460, "iter_tflops": 37.37763756374465, "iter_time": 0.4448514137268066, "loss": 0.07909220457077026, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 41.1146749787054, "step_time": 0.40441752052307134} +{"epoch": 0, "iter": 14461, "iter_tflops": 35.53906000499089, "iter_time": 0.5805188293457031, "loss": 0.13180331885814667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.30183812894341, "step_time": 0.5249396591186524} +{"epoch": 0, "iter": 14462, "iter_tflops": 40.06529205286222, "iter_time": 0.5149368057250976, "loss": 0.137596994638443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.752246841780355, "step_time": 0.4610068759918212} +{"epoch": 0, "iter": 14463, "iter_tflops": 43.342616057134585, "iter_time": 0.47600019073486327, "loss": 0.08764021843671799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53115288138565, "step_time": 0.4340541362762451} +{"epoch": 0, "iter": 14464, "iter_tflops": 38.15196906307444, "iter_time": 0.5407609100341798, "loss": 0.091630719602108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.90330077656671, "step_time": 0.49235008049011236} +{"epoch": 0, "iter": 14465, "iter_tflops": 25.360297555632066, "iter_time": 0.8135193786621093, "loss": 0.21226389706134796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.43612954003411, "step_time": 0.751968074798584} +{"epoch": 0, "iter": 14466, "iter_tflops": 18.870335386303452, "iter_time": 1.0933082580566404, "loss": 0.21643617749214172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.38906417480548, "step_time": 0.8125976352691651} +{"epoch": 0, "iter": 14467, "iter_tflops": 50.31493250001504, "iter_time": 0.4100391769409179, "loss": 0.27189046144485474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.89667580723401, "step_time": 0.37581680870056156} +{"epoch": 0, "iter": 14468, "iter_tflops": 52.001258159713196, "iter_time": 0.3967421989440918, "loss": 0.19996711611747742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.43142680115198, "step_time": 0.36559581565856936} +{"epoch": 0, "iter": 14469, "iter_tflops": 18.072510923113043, "iter_time": 1.1415731658935548, "loss": 0.14195288717746735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.845029499532394, "step_time": 1.0947763977050782} +{"epoch": 0, "iter": 14470, "iter_tflops": 18.29369441831519, "iter_time": 1.127770751953125, "loss": 0.12280844151973724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.592508121730916, "step_time": 0.9554746208190918} +{"epoch": 0, "iter": 14471, "iter_tflops": 41.31020345015581, "iter_time": 0.499418830871582, "loss": 0.1303909420967102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9878138019337, "step_time": 0.45859293365478515} +{"epoch": 0, "iter": 14472, "iter_tflops": 41.11924812881861, "iter_time": 0.5017381019592285, "loss": 0.21513380110263824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.071932596706816, "step_time": 0.4577370510101318} +{"epoch": 0, "iter": 14473, "iter_tflops": 24.95629986167228, "iter_time": 0.8266887969970702, "loss": 0.5583321452140808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.878248174632436, "step_time": 0.7675758247375488} +{"epoch": 0, "iter": 14474, "iter_tflops": 9.04522316936027, "iter_time": 2.280882751464844, "loss": 0.49843209981918335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.478611999882741, "step_time": 1.9688765563964843} +{"epoch": 0, "iter": 14475, "iter_tflops": 12.730022160903458, "iter_time": 1.6206643829345704, "loss": 0.492186963558197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.39866503209101, "step_time": 1.3397975387573242} +{"epoch": 0, "iter": 14476, "iter_tflops": 37.50858476682106, "iter_time": 0.5500365753173828, "loss": 0.6003733277320862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72240961289705, "step_time": 0.45122498321533205} +{"epoch": 0, "iter": 14477, "iter_tflops": 16.090525801491335, "iter_time": 0.9493516540527344, "loss": 0.266276478767395, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 16.849143492908933, "step_time": 0.906607940673828} +{"epoch": 0, "iter": 14478, "iter_tflops": 7.281945766545127, "iter_time": 2.0977315368652345, "loss": 0.43171754479408264, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.767021677969108, "step_time": 1.7423895874023436} +{"epoch": 0, "iter": 14479, "iter_tflops": 7.295154706015154, "iter_time": 2.0939332885742186, "loss": 0.35048410296440125, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.824343817196821, "step_time": 1.7310711822509768} +{"epoch": 0, "iter": 14480, "iter_tflops": 22.294245336167705, "iter_time": 0.6851798324584961, "loss": 0.30159512162208557, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.25390463199815, "step_time": 0.6298188896179199} +{"epoch": 0, "iter": 14481, "iter_tflops": 17.257723023805994, "iter_time": 1.03018310546875, "loss": 0.3109642565250397, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 18.677616867390054, "step_time": 0.9518674049377441} +{"epoch": 0, "iter": 14482, "iter_tflops": 27.654231768803132, "iter_time": 0.6428894805908203, "loss": 0.35488757491111755, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 29.848612559542104, "step_time": 0.5956261672973634} +{"epoch": 0, "iter": 14483, "iter_tflops": 31.665470756726236, "iter_time": 0.5614511413574218, "loss": 0.32899218797683716, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 33.906962585042976, "step_time": 0.524335220336914} +{"epoch": 0, "iter": 14484, "iter_tflops": 32.161111073766726, "iter_time": 0.5527985229492187, "loss": 0.3743527829647064, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 34.191305371698284, "step_time": 0.5199747276306153} +{"epoch": 0, "iter": 14485, "iter_tflops": 23.015557957894533, "iter_time": 0.8963977127075196, "loss": 0.5078050494194031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.31666220523101, "step_time": 0.8484344329833985} +{"epoch": 0, "iter": 14486, "iter_tflops": 21.017864420544246, "iter_time": 0.9815979919433593, "loss": 0.5379805564880371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.88123320239266, "step_time": 0.8639040260314941} +{"epoch": 0, "iter": 14487, "iter_tflops": 48.02735851063531, "iter_time": 0.4295696067810059, "loss": 0.684758186340332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.217536489467335, "step_time": 0.39509894371032717} +{"epoch": 0, "iter": 14488, "iter_tflops": 49.20686184288473, "iter_time": 0.41927269363403324, "loss": 0.6568993330001831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.320507885726514, "step_time": 0.3869260501861572} +{"epoch": 0, "iter": 14489, "iter_tflops": 33.26921661020228, "iter_time": 0.6201256179809571, "loss": 0.3656901717185974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.66468972118491, "step_time": 0.5784739379882813} +{"epoch": 0, "iter": 14490, "iter_tflops": 14.585418902799635, "iter_time": 1.4145012664794923, "loss": 0.40732091665267944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.395387637656217, "step_time": 1.1860094146728515} +{"epoch": 0, "iter": 14491, "iter_tflops": 36.788439712096135, "iter_time": 0.5608037109375, "loss": 0.34986791014671326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.24099247305969, "step_time": 0.5126884860992431} +{"epoch": 0, "iter": 14492, "iter_tflops": 34.39102797553064, "iter_time": 0.5998975524902344, "loss": 0.4238681197166443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.29982351296676, "step_time": 0.5531150436401367} +{"epoch": 0, "iter": 14493, "iter_tflops": 20.164520366645196, "iter_time": 1.0231383209228515, "loss": 0.04257557913661003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.484184222977518, "step_time": 0.9602921524047852} +{"epoch": 0, "iter": 14494, "iter_tflops": 16.75306205614097, "iter_time": 1.2314819488525393, "loss": 0.04412536695599556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.711940300713557, "step_time": 1.0466292610168457} +{"epoch": 0, "iter": 14495, "iter_tflops": 37.873422440946385, "iter_time": 0.5447380294799805, "loss": 0.03468136489391327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.455709993080845, "step_time": 0.4976659069061279} +{"epoch": 0, "iter": 14496, "iter_tflops": 40.947072753826674, "iter_time": 0.5038478240966797, "loss": 0.05453362688422203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.838309921274636, "step_time": 0.46012201499938965} +{"epoch": 0, "iter": 14497, "iter_tflops": 17.317690283584717, "iter_time": 1.1913305511474608, "loss": 0.09138800203800201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.591953185689956, "step_time": 1.1096786499023437} +{"epoch": 0, "iter": 14498, "iter_tflops": 22.180683816613612, "iter_time": 0.9301378479003907, "loss": 0.10150900483131409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.988574864696414, "step_time": 0.8256210536956788} +{"epoch": 0, "iter": 14499, "iter_tflops": 49.61234126597588, "iter_time": 0.41584599685668944, "loss": 0.09503847360610962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22714902096854, "step_time": 0.3804569091796875} +{"epoch": 0, "iter": 14500, "iter_tflops": 52.59322148091517, "iter_time": 0.3922766647338867, "loss": 0.21878740191459656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.17741325440443, "step_time": 0.3608259334564209} +{"epoch": 0, "iter": 14501, "iter_tflops": 26.526964048347647, "iter_time": 0.7777404708862306, "loss": 0.604207456111908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.061323017074308, "step_time": 0.7352145690917968} +{"epoch": 0, "iter": 14502, "iter_tflops": 13.027963183332792, "iter_time": 1.5836008453369144, "loss": 0.5594724416732788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.487767767880882, "step_time": 1.2512969493865969} +{"epoch": 0, "iter": 14503, "iter_tflops": 39.23659368060637, "iter_time": 0.5258125534057617, "loss": 0.4241553246974945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.706781235334006, "step_time": 0.48308706283569336} +{"epoch": 0, "iter": 14504, "iter_tflops": 40.78980367486412, "iter_time": 0.5057904586791993, "loss": 0.41952335834503174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48877778544414, "step_time": 0.4637370262145996} +{"epoch": 0, "iter": 14505, "iter_tflops": 16.134775131317046, "iter_time": 1.2786725158691405, "loss": 0.5573809742927551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.58673372491173, "step_time": 1.17310546875} +{"epoch": 0, "iter": 14506, "iter_tflops": 24.880571419469376, "iter_time": 0.8292049713134766, "loss": 0.7052934765815735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.08380545389682, "step_time": 0.6637248306274415} +{"epoch": 0, "iter": 14507, "iter_tflops": 43.551952031009925, "iter_time": 0.47371225738525397, "loss": 0.7275822162628174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.902204890631985, "step_time": 0.439874704360962} +{"epoch": 0, "iter": 14508, "iter_tflops": 43.232087884935076, "iter_time": 0.4772171440124512, "loss": 0.680043637752533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.563669624521864, "step_time": 0.4430727577209473} +{"epoch": 0, "iter": 14509, "iter_tflops": 25.66380327907022, "iter_time": 0.8038985214233398, "loss": 0.3902060091495514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.038711859383945, "step_time": 0.7630205764770508} +{"epoch": 0, "iter": 14510, "iter_tflops": 21.274388445896783, "iter_time": 0.9697620010375976, "loss": 0.5051078796386719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.263180460322427, "step_time": 0.7050188388824463} +{"epoch": 0, "iter": 14511, "iter_tflops": 47.8212963706119, "iter_time": 0.43142062377929685, "loss": 0.4528796076774597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72071330233566, "step_time": 0.39889421844482426} +{"epoch": 0, "iter": 14512, "iter_tflops": 50.05339403755786, "iter_time": 0.4121817092895508, "loss": 0.4496115744113922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.040422933053804, "step_time": 0.3817715034484863} +{"epoch": 0, "iter": 14513, "iter_tflops": 46.86204152453439, "iter_time": 0.4402517013549805, "loss": 0.0018390512559562922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48273574153329, "step_time": 0.4007380962371826} +{"epoch": 0, "iter": 14514, "iter_tflops": 10.056160840525505, "iter_time": 2.051587463378906, "loss": 0.011183885857462883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.421513930354084, "step_time": 1.6609161834716797} +{"epoch": 0, "iter": 14515, "iter_tflops": 11.49474781451332, "iter_time": 1.794827850341797, "loss": 0.004088645800948143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.111775110172342, "step_time": 1.4619772033691407} +{"epoch": 0, "iter": 14516, "iter_tflops": 17.391810433712376, "iter_time": 1.1862533569335938, "loss": 0.007108862046152353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.22467544639118, "step_time": 0.9720334033966065} +{"epoch": 0, "iter": 14517, "iter_tflops": 14.863564935510288, "iter_time": 1.044230651855469, "loss": 0.4371532201766968, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.017062371806084, "step_time": 0.9690285110473633} +{"epoch": 0, "iter": 14518, "iter_tflops": 14.941761552379536, "iter_time": 1.0387657470703124, "loss": 0.5489240288734436, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 17.551603212251667, "step_time": 0.8843061180114746} +{"epoch": 0, "iter": 14519, "iter_tflops": 26.084480852950787, "iter_time": 0.5950277557373046, "loss": 0.2990093231201172, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 27.757480287823572, "step_time": 0.5591642303466796} +{"epoch": 0, "iter": 14520, "iter_tflops": 26.921789598752103, "iter_time": 0.5765214843750001, "loss": 0.4446846544742584, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 28.58209565274313, "step_time": 0.5430319137573242} +{"epoch": 0, "iter": 14521, "iter_tflops": 35.24976366266093, "iter_time": 0.5852831726074219, "loss": 0.17323313653469086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92209211311431, "step_time": 0.5440389060974121} +{"epoch": 0, "iter": 14522, "iter_tflops": 38.144115677565495, "iter_time": 0.5408722457885742, "loss": 0.1547131985425949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1606831850283, "step_time": 0.48934438323974616} +{"epoch": 0, "iter": 14523, "iter_tflops": 38.725853950369476, "iter_time": 0.5327472839355469, "loss": 0.11703147739171982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.527950152769364, "step_time": 0.48511845588684077} +{"epoch": 0, "iter": 14524, "iter_tflops": 38.27140593834042, "iter_time": 0.5390733108520508, "loss": 0.17897827923297882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.92156719669255, "step_time": 0.49213554954528804} +{"epoch": 0, "iter": 14525, "iter_tflops": 14.005532194952007, "iter_time": 1.4730674438476563, "loss": 0.4621477425098419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.825759784161864, "step_time": 1.3915707397460935} +{"epoch": 0, "iter": 14526, "iter_tflops": 22.717109146670992, "iter_time": 0.9081742477416994, "loss": 0.5860675573348999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.908206898969304, "step_time": 0.7392482643127442} +{"epoch": 0, "iter": 14527, "iter_tflops": 49.03481765615496, "iter_time": 0.4207437591552734, "loss": 0.5042080879211426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.01921179884287, "step_time": 0.3891248626708984} +{"epoch": 0, "iter": 14528, "iter_tflops": 48.51443468323037, "iter_time": 0.42525680541992195, "loss": 0.5348234176635742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32673001228538, "step_time": 0.39427446556091306} +{"epoch": 0, "iter": 14529, "iter_tflops": 32.425786611173486, "iter_time": 0.6362557601928711, "loss": 0.09833604097366333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.70256082441561, "step_time": 0.594512134552002} +{"epoch": 0, "iter": 14530, "iter_tflops": 19.189579424880012, "iter_time": 1.07511962890625, "loss": 0.07046446204185486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.718497845857417, "step_time": 0.9499318809509277} +{"epoch": 0, "iter": 14531, "iter_tflops": 40.94434076861683, "iter_time": 0.5038814430236817, "loss": 0.10422620177268982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03439218361402, "step_time": 0.45811861801147463} +{"epoch": 0, "iter": 14532, "iter_tflops": 43.19999719730258, "iter_time": 0.4775716400146484, "loss": 0.08461198210716248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.3433133955961, "step_time": 0.4357762908935547} +{"epoch": 0, "iter": 14533, "iter_tflops": 36.841817638113156, "iter_time": 0.5599911956787109, "loss": 0.05053657665848732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99055904899412, "step_time": 0.5033132991790772} +{"epoch": 0, "iter": 14534, "iter_tflops": 36.9656913796008, "iter_time": 0.5581146392822265, "loss": 0.0416625440120697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2803633936747, "step_time": 0.49977984237670897} +{"epoch": 0, "iter": 14535, "iter_tflops": 39.24741872018131, "iter_time": 0.5256675262451171, "loss": 0.0913606584072113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93739059863191, "step_time": 0.48049248504638675} +{"epoch": 0, "iter": 14536, "iter_tflops": 42.61682237630735, "iter_time": 0.4841068000793457, "loss": 0.09618216753005981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07760462333088, "step_time": 0.43823583793640136} +{"epoch": 0, "iter": 14537, "iter_tflops": 37.46017995681022, "iter_time": 0.550747314453125, "loss": 0.3688243329524994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.282230974597276, "step_time": 0.4997572326660156} +{"epoch": 0, "iter": 14538, "iter_tflops": 40.69117716138252, "iter_time": 0.5070163841247558, "loss": 0.20980986952781677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77968524124127, "step_time": 0.4607243976593018} +{"epoch": 0, "iter": 14539, "iter_tflops": 37.77268773040348, "iter_time": 0.5461907730102539, "loss": 0.21814090013504028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33336512985277, "step_time": 0.49913897514343264} +{"epoch": 0, "iter": 14540, "iter_tflops": 39.168419935700584, "iter_time": 0.5267277450561523, "loss": 0.24275262653827667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.82429576922324, "step_time": 0.48176141929626465} +{"epoch": 0, "iter": 14541, "iter_tflops": 22.23906376579826, "iter_time": 0.5787694549560546, "loss": 0.03338483348488808, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 24.774717443620094, "step_time": 0.5195333042144775} +{"epoch": 0, "iter": 14542, "iter_tflops": 24.06313723473888, "iter_time": 0.5348966217041016, "loss": 0.04520125314593315, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 26.571342473724684, "step_time": 0.4844049873352051} +{"epoch": 0, "iter": 14543, "iter_tflops": 27.691970019309114, "iter_time": 0.4648022804260254, "loss": 0.027552057057619095, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 30.428313742152394, "step_time": 0.42300374984741207} +{"epoch": 0, "iter": 14544, "iter_tflops": 24.302684296485065, "iter_time": 0.5296242446899414, "loss": 0.04355844482779503, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 26.791663447830427, "step_time": 0.4804214878082276} +{"epoch": 0, "iter": 14545, "iter_tflops": 19.24890735449186, "iter_time": 1.0718059539794922, "loss": 0.005682765506207943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.503995705829603, "step_time": 1.0061986846923827} +{"epoch": 0, "iter": 14546, "iter_tflops": 15.596268602702107, "iter_time": 1.3228224029541016, "loss": 0.002543005160987377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.30025120551845, "step_time": 1.068954662322998} +{"epoch": 0, "iter": 14547, "iter_tflops": 55.09328378654772, "iter_time": 0.3744756546020508, "loss": 0.002037435071542859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.44233782141675, "step_time": 0.34133513450622555} +{"epoch": 0, "iter": 14548, "iter_tflops": 46.68850259519223, "iter_time": 0.4418880958557129, "loss": 0.016988417133688927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.56351144070914, "step_time": 0.4001103286743164} +{"epoch": 0, "iter": 14549, "iter_tflops": 35.2255933797903, "iter_time": 0.5856847686767578, "loss": 0.23867490887641907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.85697690246179, "step_time": 0.5449746704101562} +{"epoch": 0, "iter": 14550, "iter_tflops": 24.5240855244124, "iter_time": 0.8412584228515625, "loss": 0.28920015692710876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.633765571885583, "step_time": 0.6734755954742431} +{"epoch": 0, "iter": 14551, "iter_tflops": 34.74339364116798, "iter_time": 0.5938134231567382, "loss": 0.2149289846420288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.7542478758736, "step_time": 0.5464575424194336} +{"epoch": 0, "iter": 14552, "iter_tflops": 45.02102829905204, "iter_time": 0.4582546043395996, "loss": 0.21173863112926483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18653024356363, "step_time": 0.4194460029602051} +{"epoch": 0, "iter": 14553, "iter_tflops": 20.518731881124843, "iter_time": 1.0054760513305663, "loss": 0.5988755822181702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.078559973869105, "step_time": 0.9344401779174805} +{"epoch": 0, "iter": 14554, "iter_tflops": 13.406057299230792, "iter_time": 1.5389381866455079, "loss": 0.6208556890487671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.026454266178497, "step_time": 1.1444898262023926} +{"epoch": 0, "iter": 14555, "iter_tflops": 35.64632443319337, "iter_time": 0.57877197265625, "loss": 0.6946743726730347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.7910016047737, "step_time": 0.531852560043335} +{"epoch": 0, "iter": 14556, "iter_tflops": 36.41905900831006, "iter_time": 0.5664916687011718, "loss": 0.5255038738250732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.59179446516421, "step_time": 0.5210951862335205} +{"epoch": 0, "iter": 14557, "iter_tflops": 20.16868525454677, "iter_time": 1.0229270401000976, "loss": 0.4040939211845398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.47268389471773, "step_time": 0.9608064651489258} +{"epoch": 0, "iter": 14558, "iter_tflops": 14.333671429966525, "iter_time": 1.4393446655273436, "loss": 0.5509949326515198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.926144759420012, "step_time": 1.035378080368042} +{"epoch": 0, "iter": 14559, "iter_tflops": 47.998475650800145, "iter_time": 0.4298280982971192, "loss": 0.5054457187652588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03384297985364, "step_time": 0.3964937496185303} +{"epoch": 0, "iter": 14560, "iter_tflops": 49.02785256912756, "iter_time": 0.4208035316467285, "loss": 0.49610382318496704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15207590334594, "step_time": 0.3881521682739258} +{"epoch": 0, "iter": 14561, "iter_tflops": 31.816680997797423, "iter_time": 0.4493106842041016, "loss": 0.006600504275411367, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 34.74035595503162, "step_time": 0.41149764633178715} +{"epoch": 0, "iter": 14562, "iter_tflops": 9.76376568384897, "iter_time": 1.4641456146240233, "loss": 0.03321242332458496, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 10.493990531454758, "step_time": 1.3622629699707032} +{"epoch": 0, "iter": 14563, "iter_tflops": 8.53594292284518, "iter_time": 1.674750503540039, "loss": 0.008960489183664322, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 11.547992625037814, "step_time": 1.23792724609375} +{"epoch": 0, "iter": 14564, "iter_tflops": 8.326302066001134, "iter_time": 1.7169176177978518, "loss": 0.019653456285595894, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 9.92059732432619, "step_time": 1.440999391555786} +{"epoch": 0, "iter": 14565, "iter_tflops": 18.670203915911205, "iter_time": 0.8159889450073241, "loss": 0.27854886651039124, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 19.802255082333133, "step_time": 0.7693406600952148} +{"epoch": 0, "iter": 14566, "iter_tflops": 9.605552554795588, "iter_time": 1.5860284881591797, "loss": 0.3142727315425873, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 11.87230763811772, "step_time": 1.2832113571166994} +{"epoch": 0, "iter": 14567, "iter_tflops": 26.451863017306007, "iter_time": 0.5759397735595704, "loss": 0.21447865664958954, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.085075857972186, "step_time": 0.5424475288391113} +{"epoch": 0, "iter": 14568, "iter_tflops": 26.203755902652333, "iter_time": 0.5813929901123047, "loss": 0.2671133577823639, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.812531973084802, "step_time": 0.5477631454467773} +{"epoch": 0, "iter": 14569, "iter_tflops": 33.09519355233798, "iter_time": 0.6233863983154297, "loss": 0.034880440682172775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.44279355765412, "step_time": 0.5820955810546875} +{"epoch": 0, "iter": 14570, "iter_tflops": 20.429561077309593, "iter_time": 1.00986474609375, "loss": 0.010438873432576656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.13209521481454, "step_time": 0.8209062290191651} +{"epoch": 0, "iter": 14571, "iter_tflops": 41.77792374654786, "iter_time": 0.49382764053344724, "loss": 0.029886316508054733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.857804709694875, "step_time": 0.4498927421569824} +{"epoch": 0, "iter": 14572, "iter_tflops": 45.22294247935962, "iter_time": 0.4562085609436035, "loss": 0.05907251313328743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.711655100804876, "step_time": 0.4150152206420899} +{"epoch": 0, "iter": 14573, "iter_tflops": 24.317271634913453, "iter_time": 0.8484131698608398, "loss": 0.7313672304153442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.133124729289136, "step_time": 0.789461410522461} +{"epoch": 0, "iter": 14574, "iter_tflops": 11.25570279156847, "iter_time": 1.832945831298828, "loss": 0.7953317165374756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.350989681601906, "step_time": 1.5452857055664062} +{"epoch": 0, "iter": 14575, "iter_tflops": 13.228896518243287, "iter_time": 1.559547576904297, "loss": 0.6391535997390747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.080240116948552, "step_time": 1.2830090446472167} +{"epoch": 0, "iter": 14576, "iter_tflops": 25.625771225645813, "iter_time": 0.8050916137695313, "loss": 0.7114633321762085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.2081599916206, "step_time": 0.6405548629760742} +{"epoch": 0, "iter": 14577, "iter_tflops": 13.515940858956272, "iter_time": 1.1362402496337891, "loss": 0.30157119035720825, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.62425562193523, "step_time": 1.0501290740966795} +{"epoch": 0, "iter": 14578, "iter_tflops": 12.573175581810126, "iter_time": 1.2214381256103517, "loss": 0.29637035727500916, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 15.892592591255704, "step_time": 0.9663216323852538} +{"epoch": 0, "iter": 14579, "iter_tflops": 21.80934741737102, "iter_time": 0.7041639404296874, "loss": 0.5031230449676514, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.503177792874002, "step_time": 0.6534161529541016} +{"epoch": 0, "iter": 14580, "iter_tflops": 22.947408516814363, "iter_time": 0.6692414093017578, "loss": 0.3883384168148041, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.701322058740477, "step_time": 0.6217220268249511} +{"epoch": 0, "iter": 14581, "iter_tflops": 19.0427227890115, "iter_time": 1.0834109039306643, "loss": 0.6699952483177185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54446418563663, "step_time": 1.0042166748046875} +{"epoch": 0, "iter": 14582, "iter_tflops": 15.449513487499889, "iter_time": 1.3353879089355472, "loss": 0.7173711657524109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.867598684957127, "step_time": 1.1546651496887206} +{"epoch": 0, "iter": 14583, "iter_tflops": 36.933759591309254, "iter_time": 0.55859716796875, "loss": 0.4335724711418152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51461157564567, "step_time": 0.5092259979248047} +{"epoch": 0, "iter": 14584, "iter_tflops": 43.6431042900138, "iter_time": 0.47272286987304685, "loss": 0.6644086837768555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.6592797733641, "step_time": 0.4328872280120849} +{"epoch": 0, "iter": 14585, "iter_tflops": 20.540681414144103, "iter_time": 1.004401611328125, "loss": 0.35976043343544006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.202276192239438, "step_time": 0.9292332611083984} +{"epoch": 0, "iter": 14586, "iter_tflops": 36.595242672464046, "iter_time": 0.5637643585205079, "loss": 0.30225682258605957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10247729877847, "step_time": 0.5144593276977539} +{"epoch": 0, "iter": 14587, "iter_tflops": 41.10292572671447, "iter_time": 0.5019373474121094, "loss": 0.3269102871417999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22645720867959, "step_time": 0.45617310714721676} +{"epoch": 0, "iter": 14588, "iter_tflops": 38.58956080866408, "iter_time": 0.5346288757324219, "loss": 0.23845556378364563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00302501985089, "step_time": 0.49118113517761225} +{"epoch": 0, "iter": 14589, "iter_tflops": 20.014270644334626, "iter_time": 1.0308191528320312, "loss": 0.40550675988197327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.610216421141615, "step_time": 0.9546916656494141} +{"epoch": 0, "iter": 14590, "iter_tflops": 21.345309500717246, "iter_time": 0.9665399093627929, "loss": 0.5845303535461426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.610479016505227, "step_time": 0.8055723400115966} +{"epoch": 0, "iter": 14591, "iter_tflops": 37.94919070748459, "iter_time": 0.5436504211425781, "loss": 0.4824170470237732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40293399987322, "step_time": 0.49830027770996094} +{"epoch": 0, "iter": 14592, "iter_tflops": 41.42505576848839, "iter_time": 0.4980341758728027, "loss": 0.37827348709106445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08967322095862, "step_time": 0.4575569534301758} +{"epoch": 0, "iter": 14593, "iter_tflops": 23.10842522675987, "iter_time": 0.8927953033447265, "loss": 0.24863004684448242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.89395765122184, "step_time": 0.8287590827941895} +{"epoch": 0, "iter": 14594, "iter_tflops": 11.648182573451312, "iter_time": 1.7711856231689451, "loss": 0.3193163275718689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.026267221888194, "step_time": 1.470889808654785} +{"epoch": 0, "iter": 14595, "iter_tflops": 12.64610143039869, "iter_time": 1.6314192657470703, "loss": 0.23136965930461884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.067522465910965, "step_time": 1.2840245628356932} +{"epoch": 0, "iter": 14596, "iter_tflops": 36.920997982115466, "iter_time": 0.5587902450561524, "loss": 0.19869641959667206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.07639972829226, "step_time": 0.5147940845489501} +{"epoch": 0, "iter": 14597, "iter_tflops": 15.196895270650193, "iter_time": 1.129275619506836, "loss": 0.38369640707969666, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 16.30836101953683, "step_time": 1.0523119583129883} +{"epoch": 0, "iter": 14598, "iter_tflops": 15.492595822245212, "iter_time": 1.1077216186523438, "loss": 0.307802677154541, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 19.928043634860597, "step_time": 0.861172508239746} +{"epoch": 0, "iter": 14599, "iter_tflops": 30.87648924779451, "iter_time": 0.5558107070922851, "loss": 0.47512346506118774, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 32.94108568550394, "step_time": 0.5209750366210938} +{"epoch": 0, "iter": 14600, "iter_tflops": 30.175794197415957, "iter_time": 0.5687168731689453, "loss": 0.26412346959114075, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 32.117171543039326, "step_time": 0.5343398094177246} +{"epoch": 0, "iter": 14601, "iter_tflops": 26.05735699608755, "iter_time": 0.7917569503784179, "loss": 0.23199406266212463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.528473078708767, "step_time": 0.7494456176757813} +{"epoch": 0, "iter": 14602, "iter_tflops": 18.3181863008844, "iter_time": 1.1262628936767578, "loss": 0.3468738794326782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.589193532728615, "step_time": 0.8745993576049805} +{"epoch": 0, "iter": 14603, "iter_tflops": 38.46503049805743, "iter_time": 0.536359733581543, "loss": 0.2970678210258484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13863343115076, "step_time": 0.4896004409790039} +{"epoch": 0, "iter": 14604, "iter_tflops": 39.058452578766705, "iter_time": 0.5282107238769531, "loss": 0.2082844078540802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.747490945398205, "step_time": 0.48262700462341307} +{"epoch": 0, "iter": 14605, "iter_tflops": 23.240683293760277, "iter_time": 0.8877145843505858, "loss": 0.8121236562728882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.881674646031755, "step_time": 0.8291682052612304} +{"epoch": 0, "iter": 14606, "iter_tflops": 21.14559127440218, "iter_time": 0.9756687927246094, "loss": 0.5791666507720947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.96903172681271, "step_time": 0.7376406059265136} +{"epoch": 0, "iter": 14607, "iter_tflops": 36.07406291629857, "iter_time": 0.5719093399047852, "loss": 0.6919605731964111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.216608343227335, "step_time": 0.5260805149078369} +{"epoch": 0, "iter": 14608, "iter_tflops": 35.97693360062372, "iter_time": 0.5734533615112305, "loss": 0.5899950861930847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.948602248694584, "step_time": 0.5297004852294921} +{"epoch": 0, "iter": 14609, "iter_tflops": 17.538499321214324, "iter_time": 0.7524208068847655, "loss": 0.002766924211755395, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 18.86845421066494, "step_time": 0.6993859519958495} +{"epoch": 0, "iter": 14610, "iter_tflops": 6.756598426729342, "iter_time": 1.9531028747558594, "loss": 0.005716659594327211, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 7.813844044412282, "step_time": 1.6888399276733397} +{"epoch": 0, "iter": 14611, "iter_tflops": 8.504357005164842, "iter_time": 1.5517142333984375, "loss": 0.0027245087549090385, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 9.666773703922011, "step_time": 1.365122657775879} +{"epoch": 0, "iter": 14612, "iter_tflops": 11.66200607335259, "iter_time": 1.1315661926269531, "loss": 0.014754805713891983, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 16.147649064167556, "step_time": 0.8172292919158937} +{"epoch": 0, "iter": 14613, "iter_tflops": 25.218351959945384, "iter_time": 0.6837760925292969, "loss": 0.30983978509902954, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 26.859697731098468, "step_time": 0.6419918174743653} +{"epoch": 0, "iter": 14614, "iter_tflops": 13.782577267210293, "iter_time": 1.2511234893798828, "loss": 0.47858569025993347, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 17.45357039329173, "step_time": 0.9879758567810057} +{"epoch": 0, "iter": 14615, "iter_tflops": 26.513693970299595, "iter_time": 0.6503698120117188, "loss": 0.39712366461753845, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.55205896447352, "step_time": 0.6039391479492188} +{"epoch": 0, "iter": 14616, "iter_tflops": 27.388772168032737, "iter_time": 0.62959033203125, "loss": 0.23535823822021484, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 29.303116993990916, "step_time": 0.5884597930908202} +{"epoch": 0, "iter": 14617, "iter_tflops": 19.615220190931613, "iter_time": 1.0517900543212892, "loss": 0.6050578951835632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.804925558371327, "step_time": 0.9916446685791014} +{"epoch": 0, "iter": 14618, "iter_tflops": 10.610671352170877, "iter_time": 1.9443721160888672, "loss": 0.6042631268501282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.043812335202059, "step_time": 1.581676658630371} +{"epoch": 0, "iter": 14619, "iter_tflops": 10.58598660068746, "iter_time": 1.9489060668945313, "loss": 0.6068395376205444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.60194430879241, "step_time": 1.5167753257751466} +{"epoch": 0, "iter": 14620, "iter_tflops": 42.00495611627563, "iter_time": 0.4911585540771484, "loss": 0.3350156545639038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61668047027153, "step_time": 0.45227082061767576} +{"epoch": 0, "iter": 14621, "iter_tflops": 19.238088190545522, "iter_time": 0.8238076782226563, "loss": 0.4485711455345154, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 20.303538793530233, "step_time": 0.7805774612426758} +{"epoch": 0, "iter": 14622, "iter_tflops": 5.910256273043432, "iter_time": 2.6815224304199217, "loss": 0.3585827648639679, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 7.562266521139597, "step_time": 2.095732109069824} +{"epoch": 0, "iter": 14623, "iter_tflops": 11.719117545381193, "iter_time": 1.3523616180419922, "loss": 0.2393743246793747, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 13.872481205981952, "step_time": 1.142440528869629} +{"epoch": 0, "iter": 14624, "iter_tflops": 27.320860246367978, "iter_time": 0.5800873260498047, "loss": 0.29931843280792236, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.06554228584574, "step_time": 0.5452671279907226} +{"epoch": 0, "iter": 14625, "iter_tflops": 16.728536529974704, "iter_time": 0.9841477279663087, "loss": 0.4537242650985718, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 17.412304265035708, "step_time": 0.9455010070800781} +{"epoch": 0, "iter": 14626, "iter_tflops": 12.89587772277173, "iter_time": 1.2766367340087892, "loss": 0.4198809862136841, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 15.391774430865349, "step_time": 1.0696200942993164} +{"epoch": 0, "iter": 14627, "iter_tflops": 23.20164519520075, "iter_time": 0.7095768890380859, "loss": 0.26514387130737305, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 24.900296930821042, "step_time": 0.6611708793640138} +{"epoch": 0, "iter": 14628, "iter_tflops": 25.208020192785156, "iter_time": 0.6530997314453125, "loss": 0.3399271070957184, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.084053341609994, "step_time": 0.6078614234924317} +{"epoch": 0, "iter": 14629, "iter_tflops": 21.682361844887488, "iter_time": 0.9515150451660155, "loss": 0.003503250190988183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.437978272670843, "step_time": 0.880242027282715} +{"epoch": 0, "iter": 14630, "iter_tflops": 27.926052152307676, "iter_time": 0.7387758712768555, "loss": 0.004751463886350393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.312141709992183, "step_time": 0.6588847770690918} +{"epoch": 0, "iter": 14631, "iter_tflops": 53.0298607381401, "iter_time": 0.38904672241210936, "loss": 0.00913013331592083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.076931537664386, "step_time": 0.35523731994628904} +{"epoch": 0, "iter": 14632, "iter_tflops": 55.63127991611187, "iter_time": 0.3708541946411133, "loss": 0.00588045921176672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.06680228756944, "step_time": 0.3378446674346924} +{"epoch": 0, "iter": 14633, "iter_tflops": 33.72784222105357, "iter_time": 0.6116932525634765, "loss": 0.8515603542327881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.872829454698675, "step_time": 0.5751175422668456} +{"epoch": 0, "iter": 14634, "iter_tflops": 13.010683071207808, "iter_time": 1.5857041015625002, "loss": 0.6848787665367126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.37567681129282, "step_time": 1.4351389350891113} +{"epoch": 0, "iter": 14635, "iter_tflops": 33.00378983762851, "iter_time": 0.6251128616333008, "loss": 0.6820414662361145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.7897207638695, "step_time": 0.5764530448913574} +{"epoch": 0, "iter": 14636, "iter_tflops": 36.11112772495613, "iter_time": 0.5713223266601563, "loss": 0.7164592742919922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15865625748937, "step_time": 0.5268590774536133} +{"epoch": 0, "iter": 14637, "iter_tflops": 31.915021146669346, "iter_time": 0.6464383468627929, "loss": 0.6828125715255737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.226055118247466, "step_time": 0.5856770915985108} +{"epoch": 0, "iter": 14638, "iter_tflops": 38.66640763640752, "iter_time": 0.5335663375854492, "loss": 0.6686290502548218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29323746868191, "step_time": 0.4878106937408447} +{"epoch": 0, "iter": 14639, "iter_tflops": 34.4417985664493, "iter_time": 0.5990132446289063, "loss": 0.524530291557312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39502782850435, "step_time": 0.5517068634033203} +{"epoch": 0, "iter": 14640, "iter_tflops": 34.143557894165546, "iter_time": 0.6042455673217774, "loss": 0.5743210315704346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18586463496431, "step_time": 0.5548101062774659} +{"epoch": 0, "iter": 14641, "iter_tflops": 29.291587996846957, "iter_time": 0.7043350982666016, "loss": 0.4528990685939789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.28323650158305, "step_time": 0.6390652160644531} +{"epoch": 0, "iter": 14642, "iter_tflops": 34.44159370805306, "iter_time": 0.5990168075561524, "loss": 0.25172489881515503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.40444870912112, "step_time": 0.5372058238983155} +{"epoch": 0, "iter": 14643, "iter_tflops": 41.58416640086571, "iter_time": 0.4961285820007325, "loss": 0.39448440074920654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26518297579947, "step_time": 0.4557828369140625} +{"epoch": 0, "iter": 14644, "iter_tflops": 41.949068125835566, "iter_time": 0.4918129158020019, "loss": 0.2926871180534363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.670656438468704, "step_time": 0.4517363033294678} +{"epoch": 0, "iter": 14645, "iter_tflops": 19.367205173925957, "iter_time": 1.0652592010498048, "loss": 0.2960183918476105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62968516646367, "step_time": 1.0000682678222657} +{"epoch": 0, "iter": 14646, "iter_tflops": 25.767374989627626, "iter_time": 0.8006672592163085, "loss": 0.26475033164024353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.699918707335073, "step_time": 0.6508248081207275} +{"epoch": 0, "iter": 14647, "iter_tflops": 39.30460010906962, "iter_time": 0.5249027709960937, "loss": 0.30836790800094604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08710120384373, "step_time": 0.4901999168395996} +{"epoch": 0, "iter": 14648, "iter_tflops": 48.85093215371511, "iter_time": 0.4223275299072265, "loss": 0.33674007654190063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.7263751856941, "step_time": 0.39128602027893067} +{"epoch": 0, "iter": 14649, "iter_tflops": 28.210236263467976, "iter_time": 0.7313335952758788, "loss": 0.5934004783630371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.13098603432861, "step_time": 0.6847135200500489} +{"epoch": 0, "iter": 14650, "iter_tflops": 21.592875974521736, "iter_time": 0.9554583435058595, "loss": 0.5760747194290161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.239513317434184, "step_time": 0.7862605247497558} +{"epoch": 0, "iter": 14651, "iter_tflops": 48.496878475740644, "iter_time": 0.4254107513427735, "loss": 0.7886229753494263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44631052158108, "step_time": 0.39337549781799314} +{"epoch": 0, "iter": 14652, "iter_tflops": 48.5391251763608, "iter_time": 0.4250404891967774, "loss": 0.6895422339439392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.35934445574808, "step_time": 0.3940288734436036} +{"epoch": 0, "iter": 14653, "iter_tflops": 1.9416740104053765, "iter_time": 0.7869431915283203, "loss": 0.12068267911672592, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.0454838306147725, "step_time": 0.747005241394043} +{"epoch": 0, "iter": 14654, "iter_tflops": 1.0630115897360328, "iter_time": 1.4374134368896485, "loss": 0.07947341352701187, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.277159780571966, "step_time": 1.1963946609497071} +{"epoch": 0, "iter": 14655, "iter_tflops": 2.8206368655346057, "iter_time": 0.5417170715332031, "loss": 0.06502217054367065, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.095892884815243, "step_time": 0.4935529747009278} +{"epoch": 0, "iter": 14656, "iter_tflops": 2.9718672585295023, "iter_time": 0.5141505355834961, "loss": 0.1396608203649521, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.258723328176938, "step_time": 0.4688913383483887} +{"epoch": 0, "iter": 14657, "iter_tflops": 16.952134911225333, "iter_time": 1.2170203704833986, "loss": 0.18070146441459656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.886786727597737, "step_time": 1.1534264831542966} +{"epoch": 0, "iter": 14658, "iter_tflops": 16.63667987926414, "iter_time": 1.2400968017578125, "loss": 0.17524705827236176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.090467791072445, "step_time": 0.9339364700317382} +{"epoch": 0, "iter": 14659, "iter_tflops": 38.055612601308965, "iter_time": 0.542130111694336, "loss": 0.2716940641403198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.481751482908145, "step_time": 0.4973534812927246} +{"epoch": 0, "iter": 14660, "iter_tflops": 41.164360109435904, "iter_time": 0.5011882476806641, "loss": 0.26802363991737366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.741661577048625, "step_time": 0.46111594390869143} +{"epoch": 0, "iter": 14661, "iter_tflops": 15.716828207047358, "iter_time": 1.3126753845214847, "loss": 0.32811975479125977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.66962624694137, "step_time": 1.2376458358764648} +{"epoch": 0, "iter": 14662, "iter_tflops": 21.11368585369007, "iter_time": 0.9771431503295899, "loss": 0.39968249201774597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.3530714997573, "step_time": 0.8137512454986571} +{"epoch": 0, "iter": 14663, "iter_tflops": 48.454301088872064, "iter_time": 0.42578456497192385, "loss": 0.3612065315246582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.5606371795004, "step_time": 0.3925198516845703} +{"epoch": 0, "iter": 14664, "iter_tflops": 52.412706392461104, "iter_time": 0.39362770843505857, "loss": 0.44675078988075256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.758505807967495, "step_time": 0.36348901748657225} +{"epoch": 0, "iter": 14665, "iter_tflops": 19.825816729244796, "iter_time": 1.0406175842285155, "loss": 0.5296785831451416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.681998189036285, "step_time": 0.9975386962890626} +{"epoch": 0, "iter": 14666, "iter_tflops": 16.32263099915256, "iter_time": 1.2639563751220704, "loss": 0.5428684949874878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.779124839206574, "step_time": 0.9928759593963623} +{"epoch": 0, "iter": 14667, "iter_tflops": 44.86374132689264, "iter_time": 0.45986119079589843, "loss": 0.4926148056983948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.492857945937544, "step_time": 0.42544602203369136} +{"epoch": 0, "iter": 14668, "iter_tflops": 44.70461421357982, "iter_time": 0.4614980773925781, "loss": 0.632877767086029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28001624144411, "step_time": 0.42732159423828125} +{"epoch": 0, "iter": 14669, "iter_tflops": 37.460140518283765, "iter_time": 0.5507478942871094, "loss": 0.3502717614173889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.659987882134025, "step_time": 0.5074053039550781} +{"epoch": 0, "iter": 14670, "iter_tflops": 47.81002050294084, "iter_time": 0.4315223731994629, "loss": 0.2996255159378052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55965567713125, "step_time": 0.3925271816253662} +{"epoch": 0, "iter": 14671, "iter_tflops": 46.369262545587006, "iter_time": 0.4449303779602051, "loss": 0.23720991611480713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96631330930598, "step_time": 0.41290005493164067} +{"epoch": 0, "iter": 14672, "iter_tflops": 54.700836471373904, "iter_time": 0.3771623039245605, "loss": 0.3066871166229248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.14361147157529, "step_time": 0.3488304653167724} +{"epoch": 0, "iter": 14673, "iter_tflops": 45.75320715875536, "iter_time": 0.45092125320434573, "loss": 0.022406022995710373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.992747429421414, "step_time": 0.41268173027038574} +{"epoch": 0, "iter": 14674, "iter_tflops": 48.897113185691616, "iter_time": 0.4219286613464356, "loss": 0.0537058524787426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.632682833808374, "step_time": 0.3846739044189453} +{"epoch": 0, "iter": 14675, "iter_tflops": 51.576502620097465, "iter_time": 0.4000095481872559, "loss": 0.06173809990286827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.27459748060146, "step_time": 0.3666146793365479} +{"epoch": 0, "iter": 14676, "iter_tflops": 52.525920983538235, "iter_time": 0.39277928161621095, "loss": 0.03596894070506096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.35828388701333, "step_time": 0.3596881237030029} +{"epoch": 0, "iter": 14677, "iter_tflops": 36.26142762734424, "iter_time": 0.5689542541503906, "loss": 0.5526414513587952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.712177947686456, "step_time": 0.5329354896545411} +{"epoch": 0, "iter": 14678, "iter_tflops": 31.9959475506634, "iter_time": 0.6448033294677734, "loss": 0.70759117603302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.72835163729713, "step_time": 0.5617211933135986} +{"epoch": 0, "iter": 14679, "iter_tflops": 44.08572802380586, "iter_time": 0.4679766998291016, "loss": 0.7025840282440186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.493940461858415, "step_time": 0.43439422607421874} +{"epoch": 0, "iter": 14680, "iter_tflops": 48.64703933595787, "iter_time": 0.4240976181030273, "loss": 0.689691424369812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43344965795377, "step_time": 0.3934719848632813} +{"epoch": 0, "iter": 14681, "iter_tflops": 25.177785220859302, "iter_time": 0.8194165344238282, "loss": 0.5736948251724243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.70168643308172, "step_time": 0.7726513290405274} +{"epoch": 0, "iter": 14682, "iter_tflops": 17.488539622557088, "iter_time": 1.1796921844482422, "loss": 0.633335530757904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.707990361728218, "step_time": 0.9962866096496583} +{"epoch": 0, "iter": 14683, "iter_tflops": 40.00654164086101, "iter_time": 0.515693000793457, "loss": 0.6237199306488037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03949534953783, "step_time": 0.4793525886535645} +{"epoch": 0, "iter": 14684, "iter_tflops": 45.4227187757118, "iter_time": 0.45420208358764647, "loss": 0.5307775139808655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86727794697473, "step_time": 0.42218626403808596} +{"epoch": 0, "iter": 14685, "iter_tflops": 22.229408904736427, "iter_time": 0.9280990600585937, "loss": 0.5462313890457153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.21680633089959, "step_time": 0.8886275405883789} +{"epoch": 0, "iter": 14686, "iter_tflops": 13.858680689415351, "iter_time": 1.4886765899658203, "loss": 0.5359328985214233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.357159629669832, "step_time": 1.3434185752868653} +{"epoch": 0, "iter": 14687, "iter_tflops": 36.294912853251425, "iter_time": 0.5684293441772461, "loss": 0.7010988593101501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51355389968346, "step_time": 0.5092392921447754} +{"epoch": 0, "iter": 14688, "iter_tflops": 45.5025488271682, "iter_time": 0.4534052276611328, "loss": 0.7109196782112122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.920705737488476, "step_time": 0.4217251815795898} +{"epoch": 0, "iter": 14689, "iter_tflops": 25.604383157879706, "iter_time": 0.8057641296386718, "loss": 0.3248993754386902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.02859991364345, "step_time": 0.763306037902832} +{"epoch": 0, "iter": 14690, "iter_tflops": 14.409411166480572, "iter_time": 1.431779083251953, "loss": 0.3739107847213745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.630341620466236, "step_time": 1.1073921203613282} +{"epoch": 0, "iter": 14691, "iter_tflops": 37.55282084584731, "iter_time": 0.5493886489868164, "loss": 0.28059592843055725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.21256636343139, "step_time": 0.5006020088195801} +{"epoch": 0, "iter": 14692, "iter_tflops": 39.58875217444046, "iter_time": 0.5211352310180665, "loss": 0.2559998333454132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40317533797811, "step_time": 0.47533604049682615} +{"epoch": 0, "iter": 14693, "iter_tflops": 18.769959619062675, "iter_time": 1.0991549224853514, "loss": 0.5526624321937561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.99464840746977, "step_time": 1.0318307723999023} +{"epoch": 0, "iter": 14694, "iter_tflops": 16.341170272891652, "iter_time": 1.2625223999023438, "loss": 0.6366678476333618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.2181841137667, "step_time": 0.9723307800292968} +{"epoch": 0, "iter": 14695, "iter_tflops": 41.489674979103654, "iter_time": 0.4972584991455078, "loss": 0.6511589884757996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.687487247746354, "step_time": 0.46167495155334476} +{"epoch": 0, "iter": 14696, "iter_tflops": 44.399478011022005, "iter_time": 0.46466973114013677, "loss": 0.605252742767334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.83740624531984, "step_time": 0.4312753372192383} +{"epoch": 0, "iter": 14697, "iter_tflops": 30.261645686172375, "iter_time": 0.6817571563720703, "loss": 0.08730936050415039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.167349098980424, "step_time": 0.6413675384521484} +{"epoch": 0, "iter": 14698, "iter_tflops": 13.13056448976332, "iter_time": 1.5712266998291013, "loss": 0.14076434075832367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.264295415837875, "step_time": 1.0181007080078124} +{"epoch": 0, "iter": 14699, "iter_tflops": 32.81066712853216, "iter_time": 0.6287922592163085, "loss": 0.12647651135921478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.863372534776296, "step_time": 0.5596637554168701} +{"epoch": 0, "iter": 14700, "iter_tflops": 39.18878140536513, "iter_time": 0.5264540710449219, "loss": 0.10895687341690063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81064706643436, "step_time": 0.48191501235961914} +{"epoch": 0, "iter": 14701, "iter_tflops": 23.060115639536484, "iter_time": 0.894665657043457, "loss": 0.22067758440971375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.281596415953103, "step_time": 0.8160518493652342} +{"epoch": 0, "iter": 14702, "iter_tflops": 43.57325850390192, "iter_time": 0.47348062133789065, "loss": 0.19209586083889008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.35496593030798, "step_time": 0.42665925025939944} +{"epoch": 0, "iter": 14703, "iter_tflops": 49.3298932475753, "iter_time": 0.4182270050048828, "loss": 0.18454262614250183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47371644593969, "step_time": 0.385817461013794} +{"epoch": 0, "iter": 14704, "iter_tflops": 50.608927101615116, "iter_time": 0.4076571998596191, "loss": 0.24442172050476074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.70648634711421, "step_time": 0.37712335205078124} +{"epoch": 0, "iter": 14705, "iter_tflops": 36.77668008648752, "iter_time": 0.5609830322265625, "loss": 0.17671962082386017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.5477562438604, "step_time": 0.5216754493713379} +{"epoch": 0, "iter": 14706, "iter_tflops": 18.208430640000735, "iter_time": 1.1330517120361328, "loss": 0.1552964448928833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.156970164411707, "step_time": 0.8540430927276611} +{"epoch": 0, "iter": 14707, "iter_tflops": 42.9729802752103, "iter_time": 0.48009454727172857, "loss": 0.16367599368095398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25888988324481, "step_time": 0.43655476379394526} +{"epoch": 0, "iter": 14708, "iter_tflops": 42.279219794069064, "iter_time": 0.48797242736816404, "loss": 0.2111203521490097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.152601098352, "step_time": 0.44701908493042} +{"epoch": 0, "iter": 14709, "iter_tflops": 22.29239168521478, "iter_time": 0.9254768981933592, "loss": 0.21679072082042694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.677315070896398, "step_time": 0.8713442993164063} +{"epoch": 0, "iter": 14710, "iter_tflops": 11.01259489410963, "iter_time": 1.8734089202880857, "loss": 0.2248508632183075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.315575463967521, "step_time": 1.6752033691406252} +{"epoch": 0, "iter": 14711, "iter_tflops": 13.647390044710221, "iter_time": 1.5117244720458984, "loss": 0.16709758341312408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.178676138161748, "step_time": 1.2009710960388182} +{"epoch": 0, "iter": 14712, "iter_tflops": 37.67575899397576, "iter_time": 0.5475959625244141, "loss": 0.11163529008626938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.372246026470776, "step_time": 0.4986698932647705} +{"epoch": 0, "iter": 14713, "iter_tflops": 14.144382346626697, "iter_time": 1.0481947021484377, "loss": 0.2910160720348358, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.069801759727552, "step_time": 0.9838262557983399} +{"epoch": 0, "iter": 14714, "iter_tflops": 14.373834777151664, "iter_time": 1.0314621582031251, "loss": 0.21527674794197083, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.055994899457943, "step_time": 0.869258388519287} +{"epoch": 0, "iter": 14715, "iter_tflops": 26.85117464781977, "iter_time": 0.5521570968627929, "loss": 0.4466245770454407, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.611011971182272, "step_time": 0.5181944160461426} +{"epoch": 0, "iter": 14716, "iter_tflops": 24.595413037573085, "iter_time": 0.6027980346679688, "loss": 0.3020632565021515, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.096521631438986, "step_time": 0.5681242446899414} +{"epoch": 0, "iter": 14717, "iter_tflops": 32.93677648319639, "iter_time": 0.6263847198486329, "loss": 0.265286386013031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.12354199824594, "step_time": 0.5873864746093751} +{"epoch": 0, "iter": 14718, "iter_tflops": 12.956099885867461, "iter_time": 1.5923845672607422, "loss": 0.3007230758666992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.362034442173602, "step_time": 1.2609124851226805} +{"epoch": 0, "iter": 14719, "iter_tflops": 46.440713884150476, "iter_time": 0.44424583053588873, "loss": 0.18117062747478485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.58930653481421, "step_time": 0.40781530570983887} +{"epoch": 0, "iter": 14720, "iter_tflops": 50.62720957645832, "iter_time": 0.40750998687744144, "loss": 0.17584121227264404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.83913175080813, "step_time": 0.3762111625671386} +{"epoch": 0, "iter": 14721, "iter_tflops": 24.059749098960957, "iter_time": 0.857494125366211, "loss": 0.4199649393558502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.253142655667435, "step_time": 0.8169713287353515} +{"epoch": 0, "iter": 14722, "iter_tflops": 13.992193376006238, "iter_time": 1.474471725463867, "loss": 0.4810366630554199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.511199029104997, "step_time": 1.178165668487549} +{"epoch": 0, "iter": 14723, "iter_tflops": 47.8400471409389, "iter_time": 0.4312515296936036, "loss": 0.4754764437675476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.89228861099606, "step_time": 0.39757532501220705} +{"epoch": 0, "iter": 14724, "iter_tflops": 46.74380627864881, "iter_time": 0.44136528778076173, "loss": 0.48462769389152527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.534159497081106, "step_time": 0.408260347366333} +{"epoch": 0, "iter": 14725, "iter_tflops": 32.96742800247977, "iter_time": 0.6258023376464843, "loss": 0.04931182414293289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.09450615303155, "step_time": 0.5878724555969238} +{"epoch": 0, "iter": 14726, "iter_tflops": 29.17684245788513, "iter_time": 0.7071050796508789, "loss": 0.07874982804059982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.763394816390026, "step_time": 0.4714235172271728} +{"epoch": 0, "iter": 14727, "iter_tflops": 47.81663116133425, "iter_time": 0.4314627151489258, "loss": 0.04774969816207886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.59207740744745, "step_time": 0.3922851982116699} +{"epoch": 0, "iter": 14728, "iter_tflops": 52.47365461072267, "iter_time": 0.39317050933837894, "loss": 0.1128745973110199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.98292754097364, "step_time": 0.36205745124816896} +{"epoch": 0, "iter": 14729, "iter_tflops": 31.62434615252172, "iter_time": 0.6523800811767578, "loss": 0.03012194111943245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.688145887741555, "step_time": 0.6124140396118165} +{"epoch": 0, "iter": 14730, "iter_tflops": 9.762411323992499, "iter_time": 2.1133194274902345, "loss": 0.03762819990515709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.37816542560607, "step_time": 1.666732734680176} +{"epoch": 0, "iter": 14731, "iter_tflops": 10.918989272333322, "iter_time": 1.8894691619873045, "loss": 0.05073559284210205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.703101500130611, "step_time": 1.6240989265441894} +{"epoch": 0, "iter": 14732, "iter_tflops": 25.918565010859524, "iter_time": 0.7959967498779297, "loss": 0.019859010353684425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.633180165041985, "step_time": 0.6322121658325195} +{"epoch": 0, "iter": 14733, "iter_tflops": 11.465128604695675, "iter_time": 1.3073953094482422, "loss": 0.3901720941066742, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.170626034927384, "step_time": 1.2316092300415038} +{"epoch": 0, "iter": 14734, "iter_tflops": 12.38539162040029, "iter_time": 1.2102528381347657, "loss": 0.28175806999206543, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.973536895046877, "step_time": 1.0010631065368654} +{"epoch": 0, "iter": 14735, "iter_tflops": 26.931817384151312, "iter_time": 0.5565705108642578, "loss": 0.32205483317375183, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.740936414796266, "step_time": 0.5215367774963379} +{"epoch": 0, "iter": 14736, "iter_tflops": 26.13774398241783, "iter_time": 0.5734793090820312, "loss": 0.35663896799087524, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.633819402745832, "step_time": 0.5424315452575683} +{"epoch": 0, "iter": 14737, "iter_tflops": 31.359234477890926, "iter_time": 0.6578953170776367, "loss": 0.37743955850601196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.38970938749988, "step_time": 0.6178877830505372} +{"epoch": 0, "iter": 14738, "iter_tflops": 13.77267150241196, "iter_time": 1.4979732513427737, "loss": 0.28309014439582825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.079198185532604, "step_time": 1.2079661636352539} +{"epoch": 0, "iter": 14739, "iter_tflops": 14.473012752585658, "iter_time": 1.4254871368408202, "loss": 0.2090975046157837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.894896547208152, "step_time": 1.221143524169922} +{"epoch": 0, "iter": 14740, "iter_tflops": 16.16647546763297, "iter_time": 1.2761652069091798, "loss": 0.25738421082496643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.756808033463393, "step_time": 1.0999256095886232} +{"epoch": 0, "iter": 14741, "iter_tflops": 25.461681534630774, "iter_time": 0.6562643966674805, "loss": 0.22794003784656525, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 27.23115593847293, "step_time": 0.6136204833984376} +{"epoch": 0, "iter": 14742, "iter_tflops": 24.24905414370337, "iter_time": 0.6890823440551759, "loss": 0.3461017608642578, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.34553626427045, "step_time": 0.634247673034668} +{"epoch": 0, "iter": 14743, "iter_tflops": 26.100290809590764, "iter_time": 0.6402072372436524, "loss": 0.20580238103866577, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 28.10345376570126, "step_time": 0.5945744323730469} +{"epoch": 0, "iter": 14744, "iter_tflops": 24.34610739610326, "iter_time": 0.6863353881835937, "loss": 0.49804550409317017, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.10182618838748, "step_time": 0.6401695785522461} +{"epoch": 0, "iter": 14745, "iter_tflops": 16.233891486055285, "iter_time": 1.2708655548095702, "loss": 0.7741925120353699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.447178563610308, "step_time": 1.1824888153076172} +{"epoch": 0, "iter": 14746, "iter_tflops": 16.14917484148727, "iter_time": 1.2775323638916014, "loss": 0.5450438857078552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.308844650050958, "step_time": 0.9681939048767091} +{"epoch": 0, "iter": 14747, "iter_tflops": 45.16671500248662, "iter_time": 0.4567764892578125, "loss": 0.556192934513092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95053560444629, "step_time": 0.4214681873321533} +{"epoch": 0, "iter": 14748, "iter_tflops": 49.78781561416286, "iter_time": 0.41438037109375003, "loss": 0.6433454155921936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.76545675117036, "step_time": 0.3837239513397217} +{"epoch": 0, "iter": 14749, "iter_tflops": 42.699093007190285, "iter_time": 0.4831740455627441, "loss": 0.32878348231315613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56836359304894, "step_time": 0.44302809715270997} +{"epoch": 0, "iter": 14750, "iter_tflops": 36.58704875485567, "iter_time": 0.5638906173706055, "loss": 0.4396999478340149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.177561751292046, "step_time": 0.5134978981018067} +{"epoch": 0, "iter": 14751, "iter_tflops": 36.84176443278069, "iter_time": 0.5599920043945312, "loss": 0.30625778436660767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44120331783246, "step_time": 0.5101503372192383} +{"epoch": 0, "iter": 14752, "iter_tflops": 40.980070102746886, "iter_time": 0.5034421234130859, "loss": 0.37216421961784363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90576915533977, "step_time": 0.4594308013916015} +{"epoch": 0, "iter": 14753, "iter_tflops": 20.413259153821294, "iter_time": 1.0106712188720701, "loss": 0.12224076688289642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.613236458977777, "step_time": 0.9545582656860351} +{"epoch": 0, "iter": 14754, "iter_tflops": 8.631489717535612, "iter_time": 2.390212371826172, "loss": 0.15480199456214905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.690185467832704, "step_time": 1.929909782409668} +{"epoch": 0, "iter": 14755, "iter_tflops": 17.296596089960328, "iter_time": 1.192783447265625, "loss": 0.17105934023857117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.856241210314725, "step_time": 0.9892047805786133} +{"epoch": 0, "iter": 14756, "iter_tflops": 53.50508768228445, "iter_time": 0.38559124755859375, "loss": 0.2396814525127411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.34619095278227, "step_time": 0.3535979499816895} +{"epoch": 0, "iter": 14757, "iter_tflops": 22.797219656887712, "iter_time": 0.7023815002441407, "loss": 0.2905886471271515, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 24.26995649285613, "step_time": 0.6597599525451661} +{"epoch": 0, "iter": 14758, "iter_tflops": 12.301216613550942, "iter_time": 1.3016879425048828, "loss": 0.5014002323150635, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 13.885000536382723, "step_time": 1.1532117195129394} +{"epoch": 0, "iter": 14759, "iter_tflops": 23.65632653914862, "iter_time": 0.6768737030029297, "loss": 0.44687727093696594, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.48525529774354, "step_time": 0.6282984085083008} +{"epoch": 0, "iter": 14760, "iter_tflops": 26.340783921932584, "iter_time": 0.6078917541503908, "loss": 0.4048403799533844, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.14864718334152, "step_time": 0.568849552154541} +{"epoch": 0, "iter": 14761, "iter_tflops": 22.11178516758168, "iter_time": 0.9330360870361327, "loss": 0.32524922490119934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.835917361348454, "step_time": 0.865546443939209} +{"epoch": 0, "iter": 14762, "iter_tflops": 20.1119207973706, "iter_time": 1.025814178466797, "loss": 0.2588827311992645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.586818182421617, "step_time": 0.8391119728088379} +{"epoch": 0, "iter": 14763, "iter_tflops": 39.94796917730575, "iter_time": 0.5164491195678711, "loss": 0.2728379964828491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.86220566570572, "step_time": 0.47036151504516605} +{"epoch": 0, "iter": 14764, "iter_tflops": 40.45570450173002, "iter_time": 0.5099674758911134, "loss": 0.26958754658699036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45828879560632, "step_time": 0.4640550518035889} +{"epoch": 0, "iter": 14765, "iter_tflops": 16.375496591169252, "iter_time": 1.2598759002685547, "loss": 0.656947910785675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.309363392071504, "step_time": 1.1919036560058593} +{"epoch": 0, "iter": 14766, "iter_tflops": 16.77695391130979, "iter_time": 1.2297282104492189, "loss": 0.6668906807899475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.47663590804369, "step_time": 0.917890630722046} +{"epoch": 0, "iter": 14767, "iter_tflops": 34.534762045591485, "iter_time": 0.5974007720947265, "loss": 0.4948802888393402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.63813082559128, "step_time": 0.5481434135437011} +{"epoch": 0, "iter": 14768, "iter_tflops": 35.53219450434874, "iter_time": 0.5806309967041016, "loss": 0.5821583271026611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.52951733628357, "step_time": 0.535462028503418} +{"epoch": 0, "iter": 14769, "iter_tflops": 17.095746332027602, "iter_time": 1.2067968902587891, "loss": 0.2142229825258255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.139860369921394, "step_time": 1.137334747314453} +{"epoch": 0, "iter": 14770, "iter_tflops": 19.112633885449153, "iter_time": 1.0794479522705078, "loss": 0.2633531093597412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.206020217990343, "step_time": 0.818498649597168} +{"epoch": 0, "iter": 14771, "iter_tflops": 51.447138167585706, "iter_time": 0.40101537704467766, "loss": 0.23326680064201355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.05475341986939, "step_time": 0.3680525245666504} +{"epoch": 0, "iter": 14772, "iter_tflops": 47.17664379844316, "iter_time": 0.43731583786010736, "loss": 0.2719997763633728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.027840489379514, "step_time": 0.4043105354309082} +{"epoch": 0, "iter": 14773, "iter_tflops": 31.12786713897512, "iter_time": 0.6627853240966797, "loss": 0.3716791570186615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.127461288552674, "step_time": 0.6227791900634766} +{"epoch": 0, "iter": 14774, "iter_tflops": 18.387369525242967, "iter_time": 1.1220252838134765, "loss": 0.5755277276039124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62237220648055, "step_time": 1.0004229049682616} +{"epoch": 0, "iter": 14775, "iter_tflops": 44.64139727036344, "iter_time": 0.4621516075134277, "loss": 0.4098009169101715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87049657420293, "step_time": 0.42215845870971686} +{"epoch": 0, "iter": 14776, "iter_tflops": 49.69569857625576, "iter_time": 0.41514847564697266, "loss": 0.5918168425559998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.9425576604865, "step_time": 0.3824641323089599} +{"epoch": 0, "iter": 14777, "iter_tflops": 24.398431745865125, "iter_time": 0.8082145690917969, "loss": 0.006124843843281269, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 25.6058304680609, "step_time": 0.770104606628418} +{"epoch": 0, "iter": 14778, "iter_tflops": 13.49415309110265, "iter_time": 1.46131201171875, "loss": 0.020926443859934807, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 17.570183169006814, "step_time": 1.1223086185455322} +{"epoch": 0, "iter": 14779, "iter_tflops": 38.776778805716745, "iter_time": 0.5085303268432617, "loss": 0.006155609618872404, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 43.24421761934007, "step_time": 0.4559954853057861} +{"epoch": 0, "iter": 14780, "iter_tflops": 41.427951355483394, "iter_time": 0.4759870414733886, "loss": 0.0020447855349630117, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 45.90241369115265, "step_time": 0.42958891296386714} +{"epoch": 0, "iter": 14781, "iter_tflops": 17.824651191331984, "iter_time": 1.1574472503662108, "loss": 0.2843385636806488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.571202027372742, "step_time": 1.110918586730957} +{"epoch": 0, "iter": 14782, "iter_tflops": 17.663354088744182, "iter_time": 1.1680167541503905, "loss": 0.2774055302143097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.68093426195404, "step_time": 0.9515776977539061} +{"epoch": 0, "iter": 14783, "iter_tflops": 49.47770353021261, "iter_time": 0.4169775886535645, "loss": 0.3527160882949829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.082471510339346, "step_time": 0.38147467994689943} +{"epoch": 0, "iter": 14784, "iter_tflops": 48.41990862821315, "iter_time": 0.42608699798583993, "loss": 0.3106076419353485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.22177986634907, "step_time": 0.3950668392181396} +{"epoch": 0, "iter": 14785, "iter_tflops": 26.952652565577946, "iter_time": 0.7654568862915039, "loss": 0.1867828071117401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.398714598394125, "step_time": 0.7264798355102539} +{"epoch": 0, "iter": 14786, "iter_tflops": 13.618465622994721, "iter_time": 1.5149352416992188, "loss": 0.12894602119922638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.223255081548512, "step_time": 1.2716987686157226} +{"epoch": 0, "iter": 14787, "iter_tflops": 38.32897201821244, "iter_time": 0.5382636795043946, "loss": 0.08742375671863556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01974233265073, "step_time": 0.4909857215881347} +{"epoch": 0, "iter": 14788, "iter_tflops": 41.0215101970296, "iter_time": 0.5029335441589355, "loss": 0.10146376490592957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80795396185958, "step_time": 0.4604337329864502} +{"epoch": 0, "iter": 14789, "iter_tflops": 34.24234747200882, "iter_time": 0.602502311706543, "loss": 0.6630762815475464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.84428097992205, "step_time": 0.5451574974060058} +{"epoch": 0, "iter": 14790, "iter_tflops": 35.65737641117879, "iter_time": 0.5785925827026367, "loss": 0.47851690649986267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8711203774615, "step_time": 0.5307563381195068} +{"epoch": 0, "iter": 14791, "iter_tflops": 39.21828631746595, "iter_time": 0.526058006286621, "loss": 0.5180723071098328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06379021402372, "step_time": 0.4790821571350098} +{"epoch": 0, "iter": 14792, "iter_tflops": 36.98263793795265, "iter_time": 0.5578588943481445, "loss": 0.4816387891769409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.41456058731996, "step_time": 0.5104866466522217} +{"epoch": 0, "iter": 14793, "iter_tflops": 18.884752424960183, "iter_time": 1.0924736022949217, "loss": 0.44002315402030945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.47862988255862, "step_time": 1.007445011138916} +{"epoch": 0, "iter": 14794, "iter_tflops": 24.37668282258747, "iter_time": 0.8463454055786134, "loss": 0.43503978848457336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.34704241052455, "step_time": 0.754417724609375} +{"epoch": 0, "iter": 14795, "iter_tflops": 40.70760583168764, "iter_time": 0.5068117637634277, "loss": 0.46369999647140503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.624127053566646, "step_time": 0.4623304672241211} +{"epoch": 0, "iter": 14796, "iter_tflops": 38.291963827625246, "iter_time": 0.5387838973999023, "loss": 0.3345969617366791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47713447247641, "step_time": 0.4974088439941406} +{"epoch": 0, "iter": 14797, "iter_tflops": 23.09039813418668, "iter_time": 0.8934923248291016, "loss": 0.13865846395492554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.606735183074882, "step_time": 0.8384327850341797} +{"epoch": 0, "iter": 14798, "iter_tflops": 13.809552285869696, "iter_time": 1.49397265625, "loss": 0.135946124792099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.3845952880978, "step_time": 1.2591762657165526} +{"epoch": 0, "iter": 14799, "iter_tflops": 46.82079306724469, "iter_time": 0.44063955688476564, "loss": 0.1087925061583519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05396439583115, "step_time": 0.40410365295410156} +{"epoch": 0, "iter": 14800, "iter_tflops": 44.56464729788701, "iter_time": 0.4629475326538086, "loss": 0.12740586698055267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42270809477957, "step_time": 0.42606236457824703} +{"epoch": 0, "iter": 14801, "iter_tflops": 33.78556297877318, "iter_time": 0.6106482086181642, "loss": 0.6367164254188538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38816901336303, "step_time": 0.5669725646972656} +{"epoch": 0, "iter": 14802, "iter_tflops": 38.93360278914092, "iter_time": 0.529904556274414, "loss": 0.5148093700408936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84316509125608, "step_time": 0.49305767059326167} +{"epoch": 0, "iter": 14803, "iter_tflops": 45.781515823574985, "iter_time": 0.45064242935180665, "loss": 0.6728223562240601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49657987880109, "step_time": 0.416818567276001} +{"epoch": 0, "iter": 14804, "iter_tflops": 48.50987909777521, "iter_time": 0.42529674148559576, "loss": 0.8452956080436707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27848315365237, "step_time": 0.39463833427429207} +{"epoch": 0, "iter": 14805, "iter_tflops": 39.17519452532122, "iter_time": 0.5266366577148437, "loss": 0.06336463242769241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.529859250808705, "step_time": 0.4850966796875} +{"epoch": 0, "iter": 14806, "iter_tflops": 32.774466792130674, "iter_time": 0.6294867782592772, "loss": 0.07547274231910706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.070664745462956, "step_time": 0.43830044937133794} +{"epoch": 0, "iter": 14807, "iter_tflops": 53.838100047908824, "iter_time": 0.38320619583129883, "loss": 0.05210613086819649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.81659992546881, "step_time": 0.35076991081237796} +{"epoch": 0, "iter": 14808, "iter_tflops": 56.027776306557946, "iter_time": 0.3682297401428223, "loss": 0.061288993805646896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.996722894380795, "step_time": 0.3382328186035156} +{"epoch": 0, "iter": 14809, "iter_tflops": 43.72648355521624, "iter_time": 0.47182146453857426, "loss": 0.30921971797943115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44707706374528, "step_time": 0.4348232765197754} +{"epoch": 0, "iter": 14810, "iter_tflops": 47.62356905977546, "iter_time": 0.4332118301391601, "loss": 0.27209606766700745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68254463879408, "step_time": 0.3991888103485107} +{"epoch": 0, "iter": 14811, "iter_tflops": 50.664653300081895, "iter_time": 0.40720881652832036, "loss": 0.36429861187934875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.17578480102323, "step_time": 0.3739157238006592} +{"epoch": 0, "iter": 14812, "iter_tflops": 50.51052730244504, "iter_time": 0.40845135879516603, "loss": 0.3406064808368683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.859479569014525, "step_time": 0.3760716228485107} +{"epoch": 0, "iter": 14813, "iter_tflops": 45.97071443888718, "iter_time": 0.44878775024414064, "loss": 0.08482567965984344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30677848127503, "step_time": 0.4101056385040283} +{"epoch": 0, "iter": 14814, "iter_tflops": 47.55502240440096, "iter_time": 0.43383626937866215, "loss": 0.05104517564177513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26579385905993, "step_time": 0.3947341461181641} +{"epoch": 0, "iter": 14815, "iter_tflops": 53.12950604456771, "iter_time": 0.3883170585632324, "loss": 0.05214393138885498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.85776983553028, "step_time": 0.35658293724060064} +{"epoch": 0, "iter": 14816, "iter_tflops": 52.615860536022886, "iter_time": 0.3921078796386719, "loss": 0.09546858072280884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.466203114639406, "step_time": 0.35901264381408693} +{"epoch": 0, "iter": 14817, "iter_tflops": 22.759479758601024, "iter_time": 0.9064835281372071, "loss": 0.6943108439445496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.74322256875806, "step_time": 0.8689255828857423} +{"epoch": 0, "iter": 14818, "iter_tflops": 24.293458616034147, "iter_time": 0.8492448043823242, "loss": 0.5598251819610596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.378148588014938, "step_time": 0.7535605792999268} +{"epoch": 0, "iter": 14819, "iter_tflops": 36.03730127639583, "iter_time": 0.5724927444458008, "loss": 0.7330012321472168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.04983927849714, "step_time": 0.5283272323608399} +{"epoch": 0, "iter": 14820, "iter_tflops": 37.353091598627834, "iter_time": 0.5523262634277344, "loss": 0.6879605650901794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.160500823169116, "step_time": 0.5137160415649414} +{"epoch": 0, "iter": 14821, "iter_tflops": 34.830829268117206, "iter_time": 0.5923227767944335, "loss": 0.3816268742084503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.63177881589568, "step_time": 0.5340446166992188} +{"epoch": 0, "iter": 14822, "iter_tflops": 43.91143713702273, "iter_time": 0.4698341674804687, "loss": 0.24352166056632996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.793765189681714, "step_time": 0.42282233047485357} +{"epoch": 0, "iter": 14823, "iter_tflops": 47.854778623788576, "iter_time": 0.43111877441406254, "loss": 0.3299528956413269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.83319407691948, "step_time": 0.3980285968780518} +{"epoch": 0, "iter": 14824, "iter_tflops": 49.147207651189504, "iter_time": 0.41978160095214845, "loss": 0.31694915890693665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.35745739251262, "step_time": 0.38665810775756837} +{"epoch": 0, "iter": 14825, "iter_tflops": 39.301507974047745, "iter_time": 0.5249440689086915, "loss": 0.7520068883895874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.289429898148434, "step_time": 0.4878546142578125} +{"epoch": 0, "iter": 14826, "iter_tflops": 10.350550619128686, "iter_time": 1.993236328125, "loss": 0.514620840549469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.136942287123384, "step_time": 1.699859241485596} +{"epoch": 0, "iter": 14827, "iter_tflops": 15.0841882512783, "iter_time": 1.367729782104492, "loss": 0.6781737804412842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88953431289067, "step_time": 1.2215312232971192} +{"epoch": 0, "iter": 14828, "iter_tflops": 35.491399511383975, "iter_time": 0.5812983932495117, "loss": 0.7898178100585938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.51798456504566, "step_time": 0.5356223526000977} +{"epoch": 0, "iter": 14829, "iter_tflops": 15.268145247238214, "iter_time": 1.1186217498779296, "loss": 0.2799288034439087, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 16.51627768871918, "step_time": 1.0340876846313478} +{"epoch": 0, "iter": 14830, "iter_tflops": 17.575556857908666, "iter_time": 0.9717631988525391, "loss": 0.4073611795902252, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 19.554990675528714, "step_time": 0.8733974685668945} +{"epoch": 0, "iter": 14831, "iter_tflops": 29.787520512293607, "iter_time": 0.5733702926635742, "loss": 0.4460544288158417, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 31.799539542926073, "step_time": 0.5370920333862305} +{"epoch": 0, "iter": 14832, "iter_tflops": 31.376261446715443, "iter_time": 0.5443376159667969, "loss": 0.3598819375038147, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 33.3011384879965, "step_time": 0.5128737373352051} +{"epoch": 0, "iter": 14833, "iter_tflops": 32.102231827283646, "iter_time": 0.6426685104370117, "loss": 0.5376025438308716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.425161434209315, "step_time": 0.5993027381896973} +{"epoch": 0, "iter": 14834, "iter_tflops": 15.568294783847234, "iter_time": 1.3251993103027342, "loss": 0.7253211140632629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.345738737401664, "step_time": 1.1245714225769043} +{"epoch": 0, "iter": 14835, "iter_tflops": 47.63604352149107, "iter_time": 0.4330983848571778, "loss": 0.7121954560279846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.595753337919035, "step_time": 0.3998603019714355} +{"epoch": 0, "iter": 14836, "iter_tflops": 48.417300852726434, "iter_time": 0.42610994720458995, "loss": 0.672154426574707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09821134037032, "step_time": 0.39600387382507324} +{"epoch": 0, "iter": 14837, "iter_tflops": 33.3542885869617, "iter_time": 0.6185439529418946, "loss": 0.4494186043739319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.519414799462055, "step_time": 0.5808399047851562} +{"epoch": 0, "iter": 14838, "iter_tflops": 11.575170591957049, "iter_time": 1.782357620239258, "loss": 0.4382692575454712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.565642202038235, "step_time": 1.5208342666625978} +{"epoch": 0, "iter": 14839, "iter_tflops": 12.864200408758794, "iter_time": 1.603760269165039, "loss": 0.36299192905426025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.16159074414986, "step_time": 1.2765509185791015} +{"epoch": 0, "iter": 14840, "iter_tflops": 37.91332334748263, "iter_time": 0.5441647338867188, "loss": 0.4785941541194916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.66965307047769, "step_time": 0.4951107578277588} +{"epoch": 0, "iter": 14841, "iter_tflops": 17.131904546527164, "iter_time": 0.9418270034790039, "loss": 0.33506858348846436, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 18.216615186999835, "step_time": 0.8857457962036132} +{"epoch": 0, "iter": 14842, "iter_tflops": 10.082991946463482, "iter_time": 1.6002482604980468, "loss": 0.5026620626449585, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 11.99814372765412, "step_time": 1.3448155555725099} +{"epoch": 0, "iter": 14843, "iter_tflops": 23.14686092595045, "iter_time": 0.6970833053588867, "loss": 0.3745109438896179, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 24.97868986955023, "step_time": 0.6459622344970704} +{"epoch": 0, "iter": 14844, "iter_tflops": 23.15917607414542, "iter_time": 0.6967126235961915, "loss": 0.5047786831855774, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 24.952259075675805, "step_time": 0.6466464729309082} +{"epoch": 0, "iter": 14845, "iter_tflops": 16.628535330683636, "iter_time": 1.2407041931152345, "loss": 0.12739089131355286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.667571037036836, "step_time": 1.1677379684448241} +{"epoch": 0, "iter": 14846, "iter_tflops": 17.378009251226395, "iter_time": 1.1871954498291015, "loss": 0.09779471904039383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.77021729742498, "step_time": 0.9933017654418945} +{"epoch": 0, "iter": 14847, "iter_tflops": 37.26661259964524, "iter_time": 0.5536079635620117, "loss": 0.08984677493572235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.914825390687774, "step_time": 0.5042449359893798} +{"epoch": 0, "iter": 14848, "iter_tflops": 37.55180603743997, "iter_time": 0.5494034957885743, "loss": 0.06524299830198288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96990060638592, "step_time": 0.5035670871734619} +{"epoch": 0, "iter": 14849, "iter_tflops": 17.48031229590619, "iter_time": 1.1802474212646485, "loss": 0.5250831842422485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61346553277296, "step_time": 1.1083961486816405} +{"epoch": 0, "iter": 14850, "iter_tflops": 26.604945718338218, "iter_time": 0.775460838317871, "loss": 0.6471682786941528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.562013516911215, "step_time": 0.5969297332763672} +{"epoch": 0, "iter": 14851, "iter_tflops": 32.60427297523607, "iter_time": 0.6327726898193359, "loss": 0.7263823747634888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.49987495445489, "step_time": 0.581159610748291} +{"epoch": 0, "iter": 14852, "iter_tflops": 37.86989268634956, "iter_time": 0.544788803100586, "loss": 0.6605786681175232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8303544359259, "step_time": 0.5052881317138672} +{"epoch": 0, "iter": 14853, "iter_tflops": 17.163753203125015, "iter_time": 1.2020152740478518, "loss": 0.02151741459965706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.17632599178502, "step_time": 1.1350530090332032} +{"epoch": 0, "iter": 14854, "iter_tflops": 16.737965277058816, "iter_time": 1.2325926818847657, "loss": 0.028286583721637726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.685842974704887, "step_time": 0.9513623027801513} +{"epoch": 0, "iter": 14855, "iter_tflops": 39.61656591617939, "iter_time": 0.5207693557739258, "loss": 0.04569010064005852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.039853118471676, "step_time": 0.4684641761779785} +{"epoch": 0, "iter": 14856, "iter_tflops": 39.09340441402528, "iter_time": 0.5277384719848632, "loss": 0.14322882890701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.253058941648014, "step_time": 0.47698576736450193} +{"epoch": 0, "iter": 14857, "iter_tflops": 19.167912261376657, "iter_time": 1.0763349304199217, "loss": 0.6268188953399658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.7736668889751, "step_time": 0.9931368217468262} +{"epoch": 0, "iter": 14858, "iter_tflops": 20.241580983108644, "iter_time": 1.0192431869506837, "loss": 0.47555139660835266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.598602721652224, "step_time": 0.9129366874694823} +{"epoch": 0, "iter": 14859, "iter_tflops": 42.31175464086036, "iter_time": 0.48759720993041994, "loss": 0.5922842621803284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06539895271411, "step_time": 0.44786529541015624} +{"epoch": 0, "iter": 14860, "iter_tflops": 47.764004142994445, "iter_time": 0.43193810653686526, "loss": 0.55801922082901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.421821949679654, "step_time": 0.40121280670166015} +{"epoch": 0, "iter": 14861, "iter_tflops": 29.970113532425565, "iter_time": 0.688388900756836, "loss": 0.1585802435874939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.81151560296913, "step_time": 0.6485416717529298} +{"epoch": 0, "iter": 14862, "iter_tflops": 10.51588858516725, "iter_time": 1.9618973083496094, "loss": 0.3337520360946655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.063734557531845, "step_time": 1.710174690246582} +{"epoch": 0, "iter": 14863, "iter_tflops": 16.439944652687977, "iter_time": 1.2549369201660157, "loss": 0.18801718950271606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.385830560265486, "step_time": 1.1221192016601562} +{"epoch": 0, "iter": 14864, "iter_tflops": 39.700593588313374, "iter_time": 0.5196671295166015, "loss": 0.22209224104881287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.530122265084096, "step_time": 0.47394981765747063} +{"epoch": 0, "iter": 14865, "iter_tflops": 14.742792958246605, "iter_time": 1.1417633514404297, "loss": 0.4170825481414795, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 15.814536626790074, "step_time": 1.0643865890502928} +{"epoch": 0, "iter": 14866, "iter_tflops": 27.30857810968767, "iter_time": 0.6163916931152345, "loss": 0.3596975803375244, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 30.023669977777384, "step_time": 0.5606503372192383} +{"epoch": 0, "iter": 14867, "iter_tflops": 29.578113919829295, "iter_time": 0.5690958099365234, "loss": 0.2822337746620178, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 31.395996070464566, "step_time": 0.5361441841125487} +{"epoch": 0, "iter": 14868, "iter_tflops": 30.208093947164624, "iter_time": 0.5572275009155273, "loss": 0.3053065538406372, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 32.20023665050457, "step_time": 0.5227533226013183} +{"epoch": 0, "iter": 14869, "iter_tflops": 30.878308767314966, "iter_time": 0.6681419525146485, "loss": 0.16439291834831238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.86483483101019, "step_time": 0.6277558860778809} +{"epoch": 0, "iter": 14870, "iter_tflops": 13.444994061984962, "iter_time": 1.534481414794922, "loss": 0.11422992497682571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.830130797072904, "step_time": 1.3032800407409668} +{"epoch": 0, "iter": 14871, "iter_tflops": 40.2307270652579, "iter_time": 0.5128193054199218, "loss": 0.1843033879995346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37375945266991, "step_time": 0.46493904876708986} +{"epoch": 0, "iter": 14872, "iter_tflops": 37.949663098783354, "iter_time": 0.543643653869629, "loss": 0.25108468532562256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69498193159543, "step_time": 0.49480998802185056} +{"epoch": 0, "iter": 14873, "iter_tflops": 26.11599022779993, "iter_time": 0.7899793701171873, "loss": 0.45551443099975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.09361713502444, "step_time": 0.7343694267272949} +{"epoch": 0, "iter": 14874, "iter_tflops": 10.85136528673943, "iter_time": 1.9012440338134766, "loss": 0.439106285572052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.953129125559723, "step_time": 1.478599769592285} +{"epoch": 0, "iter": 14875, "iter_tflops": 13.533699991141527, "iter_time": 1.524423736572266, "loss": 0.31269338726997375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.722360343640837, "step_time": 1.3122135009765623} +{"epoch": 0, "iter": 14876, "iter_tflops": 23.304869237033667, "iter_time": 0.8852696533203125, "loss": 0.3941679298877716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.41196290894693, "step_time": 0.726141082763672} +{"epoch": 0, "iter": 14877, "iter_tflops": 20.84688006325057, "iter_time": 0.7131478042602539, "loss": 0.4229058027267456, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.13992898819552, "step_time": 0.6714974899291992} +{"epoch": 0, "iter": 14878, "iter_tflops": 10.048096234654487, "iter_time": 1.4795744781494142, "loss": 0.5186228156089783, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 12.270324254936245, "step_time": 1.2116148223876952} +{"epoch": 0, "iter": 14879, "iter_tflops": 26.62751457753819, "iter_time": 0.558328742980957, "loss": 0.4287853538990021, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 28.3806409714484, "step_time": 0.5238397102355957} +{"epoch": 0, "iter": 14880, "iter_tflops": 25.369170359405434, "iter_time": 0.5860225830078125, "loss": 0.293454647064209, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.873673057445412, "step_time": 0.5532145423889161} +{"epoch": 0, "iter": 14881, "iter_tflops": 31.796026731241678, "iter_time": 0.6488575973510743, "loss": 0.6166427731513977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.1136176876902, "step_time": 0.6047758903503417} +{"epoch": 0, "iter": 14882, "iter_tflops": 15.013222956060458, "iter_time": 1.3741948394775392, "loss": 0.6255779266357422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.04478518004069, "step_time": 1.2104050178527832} +{"epoch": 0, "iter": 14883, "iter_tflops": 36.37355727837426, "iter_time": 0.5672003250122071, "loss": 0.7296825051307678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.61810038718224, "step_time": 0.5207491855621338} +{"epoch": 0, "iter": 14884, "iter_tflops": 35.36378216787842, "iter_time": 0.583396125793457, "loss": 0.7729800939559937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.386736056674515, "step_time": 0.5374537048339844} +{"epoch": 0, "iter": 14885, "iter_tflops": 15.070772847311275, "iter_time": 1.368947280883789, "loss": 0.6371292471885681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.06353821378862, "step_time": 1.2843430404663085} +{"epoch": 0, "iter": 14886, "iter_tflops": 16.007714321698383, "iter_time": 1.2888219451904297, "loss": 0.5855612754821777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.34345526074599, "step_time": 1.1247114143371582} +{"epoch": 0, "iter": 14887, "iter_tflops": 43.31947021124725, "iter_time": 0.47625452041625976, "loss": 0.6620343327522278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.806893961151516, "step_time": 0.4407704029083252} +{"epoch": 0, "iter": 14888, "iter_tflops": 46.65225716195581, "iter_time": 0.44223141098022467, "loss": 0.49822312593460083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.207939250502875, "step_time": 0.410912971496582} +{"epoch": 0, "iter": 14889, "iter_tflops": 25.223967700082493, "iter_time": 0.8179162673950195, "loss": 0.4664198160171509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.98987078450963, "step_time": 0.7644013442993165} +{"epoch": 0, "iter": 14890, "iter_tflops": 15.131831583608674, "iter_time": 1.3634234161376952, "loss": 0.5419549942016602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.857397341660384, "step_time": 1.1553247718811035} +{"epoch": 0, "iter": 14891, "iter_tflops": 47.64232202418014, "iter_time": 0.4330413093566894, "loss": 0.5383754372596741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.775850310739216, "step_time": 0.3984694290161133} +{"epoch": 0, "iter": 14892, "iter_tflops": 48.134418593291606, "iter_time": 0.4286141624450684, "loss": 0.5214881300926208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.01793283327738, "step_time": 0.3966150207519531} +{"epoch": 0, "iter": 14893, "iter_tflops": 35.70769526931168, "iter_time": 0.5777772369384765, "loss": 0.2920877933502197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.322563223740595, "step_time": 0.5383536949157715} +{"epoch": 0, "iter": 14894, "iter_tflops": 9.052937913592192, "iter_time": 2.2789390258789064, "loss": 0.3257680833339691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.254157737806304, "step_time": 1.8331974716186523} +{"epoch": 0, "iter": 14895, "iter_tflops": 12.148971655118462, "iter_time": 1.6981761169433593, "loss": 0.350049763917923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.566713682194845, "step_time": 1.520714153289795} +{"epoch": 0, "iter": 14896, "iter_tflops": 37.08595920155208, "iter_time": 0.5563047027587891, "loss": 0.250602662563324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44908819638094, "step_time": 0.5100508918762207} +{"epoch": 0, "iter": 14897, "iter_tflops": 17.27031943943214, "iter_time": 0.8726635360717774, "loss": 0.4314596354961395, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.392375925450924, "step_time": 0.81942529296875} +{"epoch": 0, "iter": 14898, "iter_tflops": 26.69198127739375, "iter_time": 0.564633171081543, "loss": 0.3758651316165924, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.48966078093768, "step_time": 0.5290051765441894} +{"epoch": 0, "iter": 14899, "iter_tflops": 26.048861159533317, "iter_time": 0.5785733947753906, "loss": 0.2455594688653946, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 27.786916875251435, "step_time": 0.542383960723877} +{"epoch": 0, "iter": 14900, "iter_tflops": 27.34390688151188, "iter_time": 0.5511713485717774, "loss": 0.22540566325187683, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.962434072791357, "step_time": 0.5203698692321778} +{"epoch": 0, "iter": 14901, "iter_tflops": 30.77336657422372, "iter_time": 0.6704204254150391, "loss": 0.484190434217453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.777371774274776, "step_time": 0.6294309883117676} +{"epoch": 0, "iter": 14902, "iter_tflops": 20.88254373193326, "iter_time": 0.9879588317871094, "loss": 0.4650898873806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.59878678490576, "step_time": 0.8059402847290039} +{"epoch": 0, "iter": 14903, "iter_tflops": 50.04450421190464, "iter_time": 0.4122549285888672, "loss": 0.5250163674354553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.23026822190835, "step_time": 0.3804350261688233} +{"epoch": 0, "iter": 14904, "iter_tflops": 51.05024164823393, "iter_time": 0.40413312149047853, "loss": 0.5269806981086731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.38590565821888, "step_time": 0.3724971771240234} +{"epoch": 0, "iter": 14905, "iter_tflops": 30.62728871613082, "iter_time": 0.6114463806152344, "loss": 0.03345876559615135, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 32.64481906330306, "step_time": 0.573657485961914} +{"epoch": 0, "iter": 14906, "iter_tflops": 7.450259598367482, "iter_time": 2.5135962829589844, "loss": 0.06123637780547142, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 8.860167010552203, "step_time": 2.1136108169555663} +{"epoch": 0, "iter": 14907, "iter_tflops": 11.49209230262116, "iter_time": 1.6295505065917968, "loss": 0.03916379436850548, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 14.495120151029742, "step_time": 1.2919482307434083} +{"epoch": 0, "iter": 14908, "iter_tflops": 37.66537694240943, "iter_time": 0.4971925506591797, "loss": 0.028925146907567978, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 41.646396236853846, "step_time": 0.449665433883667} +{"epoch": 0, "iter": 14909, "iter_tflops": 16.773118452390783, "iter_time": 0.8498598937988281, "loss": 0.4154857099056244, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 17.88404464527838, "step_time": 0.7970680541992187} +{"epoch": 0, "iter": 14910, "iter_tflops": 7.040877981669093, "iter_time": 2.0245771484375004, "loss": 0.25519227981567383, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 8.649897842121067, "step_time": 1.6479732971191405} +{"epoch": 0, "iter": 14911, "iter_tflops": 11.99057930325624, "iter_time": 1.1888333587646485, "loss": 0.23453161120414734, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 14.017671158965376, "step_time": 1.0169164695739745} +{"epoch": 0, "iter": 14912, "iter_tflops": 25.576017387149665, "iter_time": 0.5573502883911133, "loss": 0.3527849316596985, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 27.213708938498428, "step_time": 0.5238095512390136} +{"epoch": 0, "iter": 14913, "iter_tflops": 21.554836966218538, "iter_time": 0.7485693511962892, "loss": 0.27138376235961914, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 22.80648928400465, "step_time": 0.7074868087768555} +{"epoch": 0, "iter": 14914, "iter_tflops": 13.98280298691666, "iter_time": 1.1539381866455078, "loss": 0.30873817205429077, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 16.654584530339847, "step_time": 0.9688197441101073} +{"epoch": 0, "iter": 14915, "iter_tflops": 28.249663709955655, "iter_time": 0.5711675186157227, "loss": 0.1485130786895752, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.203325127538932, "step_time": 0.5342223167419434} +{"epoch": 0, "iter": 14916, "iter_tflops": 27.85309675562907, "iter_time": 0.5792996902465821, "loss": 0.31337735056877136, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 29.424727182041405, "step_time": 0.5483581962585449} +{"epoch": 0, "iter": 14917, "iter_tflops": 34.16226224083805, "iter_time": 0.6039147338867188, "loss": 0.033590707927942276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.83955453949132, "step_time": 0.5600255966186524} +{"epoch": 0, "iter": 14918, "iter_tflops": 16.453129995460902, "iter_time": 1.2539312286376953, "loss": 0.0729251354932785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.938576697449477, "step_time": 0.9853149909973143} +{"epoch": 0, "iter": 14919, "iter_tflops": 50.735215737560544, "iter_time": 0.4066424713134766, "loss": 0.06915745884180069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.2696797668528, "step_time": 0.37328049659729007} +{"epoch": 0, "iter": 14920, "iter_tflops": 55.37105183440083, "iter_time": 0.3725971031188965, "loss": 0.050013113766908646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.409433605081354, "step_time": 0.3415210552215576} +{"epoch": 0, "iter": 14921, "iter_tflops": 45.92933365579804, "iter_time": 0.4491920928955079, "loss": 0.14626017212867737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38023904815358, "step_time": 0.4095076541900635} +{"epoch": 0, "iter": 14922, "iter_tflops": 44.06964303828659, "iter_time": 0.4681475067138673, "loss": 0.1289873719215393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.155820679095164, "step_time": 0.4284236717224121} +{"epoch": 0, "iter": 14923, "iter_tflops": 49.48250881436153, "iter_time": 0.41693709564208986, "loss": 0.14113229513168335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.428856478929845, "step_time": 0.3861414012908936} +{"epoch": 0, "iter": 14924, "iter_tflops": 56.378773669167565, "iter_time": 0.3659372520446777, "loss": 0.15732020139694214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.22495139095103, "step_time": 0.3369719867706299} +{"epoch": 0, "iter": 14925, "iter_tflops": 24.639768979288814, "iter_time": 0.8373087234497071, "loss": 0.36577072739601135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.834975712359515, "step_time": 0.7985722045898438} +{"epoch": 0, "iter": 14926, "iter_tflops": 16.892382811764744, "iter_time": 1.221325241088867, "loss": 0.38462263345718384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.35027877558456, "step_time": 1.0137990608215333} +{"epoch": 0, "iter": 14927, "iter_tflops": 36.92228498832627, "iter_time": 0.5587707672119141, "loss": 0.5375725626945496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44245364132875, "step_time": 0.5101345653533935} +{"epoch": 0, "iter": 14928, "iter_tflops": 40.19811764869848, "iter_time": 0.5132353134155274, "loss": 0.6980799436569214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90954441368144, "step_time": 0.469854419708252} +{"epoch": 0, "iter": 14929, "iter_tflops": 14.443549809400638, "iter_time": 1.4283949432373049, "loss": 0.3299596905708313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.38123302073435, "step_time": 1.3413159713745118} +{"epoch": 0, "iter": 14930, "iter_tflops": 23.778823633912406, "iter_time": 0.8676246490478515, "loss": 0.45312103629112244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.136985194914633, "step_time": 0.7080723476409913} +{"epoch": 0, "iter": 14931, "iter_tflops": 37.138787615567374, "iter_time": 0.5555133819580078, "loss": 0.4539252817630768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.813410145805435, "step_time": 0.5054979095458985} +{"epoch": 0, "iter": 14932, "iter_tflops": 42.06537469395068, "iter_time": 0.49045310211181636, "loss": 0.5317050218582153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.01549179664113, "step_time": 0.448351037979126} +{"epoch": 0, "iter": 14933, "iter_tflops": 18.88129349101413, "iter_time": 1.0926737365722656, "loss": 0.1416253000497818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.386658298386255, "step_time": 1.011989959716797} +{"epoch": 0, "iter": 14934, "iter_tflops": 18.08859899674902, "iter_time": 1.1405578460693357, "loss": 0.12206874787807465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.900257119859482, "step_time": 0.9420480041503907} +{"epoch": 0, "iter": 14935, "iter_tflops": 41.1417693753863, "iter_time": 0.5014634475708009, "loss": 0.12553294003009796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37106136095819, "step_time": 0.454719217300415} +{"epoch": 0, "iter": 14936, "iter_tflops": 40.354266515776764, "iter_time": 0.5112493743896485, "loss": 0.1729350984096527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52958672380191, "step_time": 0.4633120365142822} +{"epoch": 0, "iter": 14937, "iter_tflops": 28.458138840664677, "iter_time": 0.7249628524780274, "loss": 0.7365461587905884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.404135010178518, "step_time": 0.6569546813964844} +{"epoch": 0, "iter": 14938, "iter_tflops": 33.58439075909664, "iter_time": 0.614306022644043, "loss": 0.5677704811096191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.90291508960774, "step_time": 0.5443141632080079} +{"epoch": 0, "iter": 14939, "iter_tflops": 36.77197414072374, "iter_time": 0.5610548248291015, "loss": 0.6344488263130188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.26019015930152, "step_time": 0.5124440155029297} +{"epoch": 0, "iter": 14940, "iter_tflops": 36.41713935325926, "iter_time": 0.5665215301513673, "loss": 0.6516103148460388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67360629867214, "step_time": 0.5200206241607667} +{"epoch": 0, "iter": 14941, "iter_tflops": 14.853145225781667, "iter_time": 1.3890050354003907, "loss": 0.08835898339748383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.09384475166923, "step_time": 1.2819244766235351} +{"epoch": 0, "iter": 14942, "iter_tflops": 18.977440877365403, "iter_time": 1.0871378097534181, "loss": 0.11500667780637741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.58237985077692, "step_time": 0.7479809074401855} +{"epoch": 0, "iter": 14943, "iter_tflops": 38.1180772994543, "iter_time": 0.5412417144775391, "loss": 0.11989698559045792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.04017481952258, "step_time": 0.49074709129333494} +{"epoch": 0, "iter": 14944, "iter_tflops": 39.03675598074116, "iter_time": 0.5285043029785156, "loss": 0.1589725911617279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.98736718994071, "step_time": 0.47993387031555174} +{"epoch": 0, "iter": 14945, "iter_tflops": 27.39535521901092, "iter_time": 0.572531349182129, "loss": 0.03785101696848869, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 30.559927914266936, "step_time": 0.5132440013885498} +{"epoch": 0, "iter": 14946, "iter_tflops": 32.425637445262296, "iter_time": 0.48371291732788085, "loss": 0.02150673419237137, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 35.770670609753104, "step_time": 0.43847933006286627} +{"epoch": 0, "iter": 14947, "iter_tflops": 32.6491847914437, "iter_time": 0.48040095901489255, "loss": 0.047174371778964996, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 36.01852326337042, "step_time": 0.4354620418548584} +{"epoch": 0, "iter": 14948, "iter_tflops": 30.546862090132652, "iter_time": 0.5134635314941407, "loss": 0.024775493890047073, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 33.62695438863978, "step_time": 0.4664323596954345} +{"epoch": 0, "iter": 14949, "iter_tflops": 19.35731342967927, "iter_time": 0.9674351196289063, "loss": 0.015748649835586548, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 20.797795969982534, "step_time": 0.9004292984008789} +{"epoch": 0, "iter": 14950, "iter_tflops": 20.25180758417593, "iter_time": 0.9247048568725585, "loss": 0.014341793023049831, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 25.189193681278965, "step_time": 0.7434515399932862} +{"epoch": 0, "iter": 14951, "iter_tflops": 49.309180442840216, "iter_time": 0.37978617095947265, "loss": 0.0048178574070334435, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 54.37077865157573, "step_time": 0.3444303226470947} +{"epoch": 0, "iter": 14952, "iter_tflops": 50.95537854678952, "iter_time": 0.3675165481567383, "loss": 0.009877030737698078, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 55.99706295047268, "step_time": 0.33442726898193365} +{"epoch": 0, "iter": 14953, "iter_tflops": 24.940046765760847, "iter_time": 0.8272275390625, "loss": 0.46189209818840027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.138692662638334, "step_time": 0.7892932434082031} +{"epoch": 0, "iter": 14954, "iter_tflops": 12.050105965914012, "iter_time": 1.7121088867187502, "loss": 0.5666761994361877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.23412994250834, "step_time": 1.2708468875885008} +{"epoch": 0, "iter": 14955, "iter_tflops": 38.512072677609716, "iter_time": 0.5357045745849609, "loss": 0.6379587054252625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.06125017079626, "step_time": 0.49050119590759284} +{"epoch": 0, "iter": 14956, "iter_tflops": 32.802544607072846, "iter_time": 0.6289479598999023, "loss": 0.5278768539428711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.459548629341306, "step_time": 0.5818205337524414} +{"epoch": 0, "iter": 14957, "iter_tflops": 34.12570774901131, "iter_time": 0.6045616302490234, "loss": 0.6132887601852417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65850902009673, "step_time": 0.5478467960357666} +{"epoch": 0, "iter": 14958, "iter_tflops": 35.41713605450895, "iter_time": 0.5825172729492187, "loss": 0.5700128078460693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.083473466353496, "step_time": 0.5278725681304932} +{"epoch": 0, "iter": 14959, "iter_tflops": 41.159327326523574, "iter_time": 0.5012495307922362, "loss": 0.6628183126449585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87885626875038, "step_time": 0.4597063121795654} +{"epoch": 0, "iter": 14960, "iter_tflops": 36.915965252997346, "iter_time": 0.5588664245605468, "loss": 0.5886859893798828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.04205336544104, "step_time": 0.515235652923584} +{"epoch": 0, "iter": 14961, "iter_tflops": 19.569041247074516, "iter_time": 1.0542720642089844, "loss": 0.6328477263450623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68804640510627, "step_time": 0.9972470626831055} +{"epoch": 0, "iter": 14962, "iter_tflops": 14.40634191627943, "iter_time": 1.4320841217041014, "loss": 0.6290611624717712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.780617178862904, "step_time": 0.9928046569824218} +{"epoch": 0, "iter": 14963, "iter_tflops": 48.93479976316866, "iter_time": 0.42160371780395506, "loss": 0.6825670003890991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80854758831512, "step_time": 0.39067716217041015} +{"epoch": 0, "iter": 14964, "iter_tflops": 44.417835818375735, "iter_time": 0.4644776840209961, "loss": 0.6497192978858948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.670180431693915, "step_time": 0.4327882404327393} +{"epoch": 0, "iter": 14965, "iter_tflops": 35.58994082192377, "iter_time": 0.5796888961791992, "loss": 0.5711765885353088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.28992217147297, "step_time": 0.5388126258850098} +{"epoch": 0, "iter": 14966, "iter_tflops": 36.154942762645035, "iter_time": 0.5706299591064453, "loss": 0.5866510272026062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.263122506253374, "step_time": 0.5124066944122314} +{"epoch": 0, "iter": 14967, "iter_tflops": 38.9232778915714, "iter_time": 0.5300451202392578, "loss": 0.7005478739738464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21587274031348, "step_time": 0.4887046546936035} +{"epoch": 0, "iter": 14968, "iter_tflops": 42.48707688273729, "iter_time": 0.485585147857666, "loss": 0.538119912147522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.391865069588825, "step_time": 0.44471360397338866} +{"epoch": 0, "iter": 14969, "iter_tflops": 33.17629201022392, "iter_time": 0.6218625488281251, "loss": 0.38909944891929626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.22559791618576, "step_time": 0.5695169906616211} +{"epoch": 0, "iter": 14970, "iter_tflops": 40.256296209667795, "iter_time": 0.5124935836791992, "loss": 0.5378977656364441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.229193246243526, "step_time": 0.4664587345123291} +{"epoch": 0, "iter": 14971, "iter_tflops": 37.95259357227172, "iter_time": 0.543601676940918, "loss": 0.511005699634552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38546184293944, "step_time": 0.49851065063476563} +{"epoch": 0, "iter": 14972, "iter_tflops": 38.70692017736742, "iter_time": 0.5330078811645508, "loss": 0.3680158257484436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.331637189950776, "step_time": 0.48736819267272946} +{"epoch": 0, "iter": 14973, "iter_tflops": 17.38248482167487, "iter_time": 0.8482331924438477, "loss": 0.08956795930862427, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 18.782436779239767, "step_time": 0.7850099945068358} +{"epoch": 0, "iter": 14974, "iter_tflops": 10.594696353643423, "iter_time": 1.391677505493164, "loss": 0.09001903235912323, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 13.4797845456804, "step_time": 1.093815746307373} +{"epoch": 0, "iter": 14975, "iter_tflops": 35.39621706764957, "iter_time": 0.41655300521850586, "loss": 0.08068007230758667, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 38.5221019955594, "step_time": 0.3827517147064209} +{"epoch": 0, "iter": 14976, "iter_tflops": 38.36057999592281, "iter_time": 0.384363338470459, "loss": 0.04541072994470596, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 41.94491356698053, "step_time": 0.3515182018280029} +{"epoch": 0, "iter": 14977, "iter_tflops": 30.53705691045675, "iter_time": 0.6756084442138672, "loss": 0.5675269961357117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.37428143772302, "step_time": 0.6372679977416992} +{"epoch": 0, "iter": 14978, "iter_tflops": 15.434774646801822, "iter_time": 1.3366630859375, "loss": 0.7173579335212708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.144343609127286, "step_time": 1.1370537261962892} +{"epoch": 0, "iter": 14979, "iter_tflops": 44.0908315750743, "iter_time": 0.4679225311279297, "loss": 0.4892261028289795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73527612793803, "step_time": 0.43219805526733396} +{"epoch": 0, "iter": 14980, "iter_tflops": 43.44523762080553, "iter_time": 0.47487583541870115, "loss": 0.5669119358062744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6314165208689, "step_time": 0.4424290542602539} +{"epoch": 0, "iter": 14981, "iter_tflops": 25.558440065252192, "iter_time": 0.8072125473022461, "loss": 0.21862590312957764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.93881020630496, "step_time": 0.765850212097168} +{"epoch": 0, "iter": 14982, "iter_tflops": 20.743695700428624, "iter_time": 0.9945717391967773, "loss": 0.21094238758087158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.624586004565064, "step_time": 0.8732891025543215} +{"epoch": 0, "iter": 14983, "iter_tflops": 39.382116415232964, "iter_time": 0.5238695983886718, "loss": 0.2336398959159851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39945018035847, "step_time": 0.4753768405914307} +{"epoch": 0, "iter": 14984, "iter_tflops": 41.90781100896511, "iter_time": 0.49229709243774417, "loss": 0.1693670153617859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71902897824335, "step_time": 0.4512583484649658} +{"epoch": 0, "iter": 14985, "iter_tflops": 16.90598731143566, "iter_time": 1.2203424224853516, "loss": 0.05051532760262489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.009846336018516, "step_time": 1.145545227050781} +{"epoch": 0, "iter": 14986, "iter_tflops": 18.315247355736204, "iter_time": 1.126443618774414, "loss": 0.05558943375945091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.951935206993532, "step_time": 0.8613539295196533} +{"epoch": 0, "iter": 14987, "iter_tflops": 50.9310491048884, "iter_time": 0.4050789031982422, "loss": 0.05232707038521767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.653053468992915, "step_time": 0.3707091026306152} +{"epoch": 0, "iter": 14988, "iter_tflops": 50.26096462400942, "iter_time": 0.4104794578552246, "loss": 0.055509116500616074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.71381228441771, "step_time": 0.3770728569030761} +{"epoch": 0, "iter": 14989, "iter_tflops": 21.95597117120865, "iter_time": 0.9396575241088866, "loss": 0.19496914744377136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.999876345169028, "step_time": 0.8970088882446289} +{"epoch": 0, "iter": 14990, "iter_tflops": 15.450044342348065, "iter_time": 1.335342025756836, "loss": 0.16319575905799866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.387976305800997, "step_time": 1.1219882583618164} +{"epoch": 0, "iter": 14991, "iter_tflops": 36.30133360187578, "iter_time": 0.5683288040161133, "loss": 0.2668565809726715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86199873243034, "step_time": 0.51756294631958} +{"epoch": 0, "iter": 14992, "iter_tflops": 36.45026140220801, "iter_time": 0.566006736755371, "loss": 0.26603952050209045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19467795741245, "step_time": 0.5132792339324951} +{"epoch": 0, "iter": 14993, "iter_tflops": 36.260178499852856, "iter_time": 0.5689738540649415, "loss": 0.19233180582523346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1021882684818, "step_time": 0.514463035583496} +{"epoch": 0, "iter": 14994, "iter_tflops": 36.289640748427175, "iter_time": 0.5685119247436523, "loss": 0.12204907834529877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59387940285833, "step_time": 0.5082316303253174} +{"epoch": 0, "iter": 14995, "iter_tflops": 40.383283092820676, "iter_time": 0.5108820266723633, "loss": 0.22524794936180115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.230996243932445, "step_time": 0.4664397201538086} +{"epoch": 0, "iter": 14996, "iter_tflops": 39.015220259541834, "iter_time": 0.5287960281372069, "loss": 0.21772561967372894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81947950081063, "step_time": 0.48181560707092286} +{"epoch": 0, "iter": 14997, "iter_tflops": 21.506007280291495, "iter_time": 0.9593177032470702, "loss": 0.648943305015564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.871905027227097, "step_time": 0.9020277709960938} +{"epoch": 0, "iter": 14998, "iter_tflops": 9.783839081680217, "iter_time": 2.1086910095214844, "loss": 0.43102940917015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.419499359344542, "step_time": 1.806654815673828} +{"epoch": 0, "iter": 14999, "iter_tflops": 9.411146569691498, "iter_time": 2.1921976623535158, "loss": 0.5987833142280579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.627608663217195, "step_time": 1.94127335357666} +{"epoch": 0, "iter": 15000, "iter_tflops": 4.102397354577449, "iter_time": 5.029033447265625, "loss": 0.6023762226104736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.131689357317003, "step_time": 4.9933796386718745} +{"epoch": 0, "iter": 15001, "iter_tflops": 13.930504698668555, "iter_time": 1.0467047271728516, "loss": 0.3584556579589844, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 14.87362298649502, "step_time": 0.980334457397461} +{"epoch": 0, "iter": 15002, "iter_tflops": 12.26406100604102, "iter_time": 1.1889312286376952, "loss": 0.30013108253479004, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 14.247733355418672, "step_time": 1.0233996353149415} +{"epoch": 0, "iter": 15003, "iter_tflops": 13.942818847902938, "iter_time": 1.0457802886962893, "loss": 0.26671361923217773, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 16.31377840160813, "step_time": 0.8937920303344726} +{"epoch": 0, "iter": 15004, "iter_tflops": 19.986178833512724, "iter_time": 0.7295604248046875, "loss": 0.2725948393344879, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 22.324692240471336, "step_time": 0.6531389083862305} +{"epoch": 0, "iter": 15005, "iter_tflops": 6.219537037259773, "iter_time": 2.3837894287109376, "loss": 0.4182254672050476, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 6.336483146403204, "step_time": 2.3397942199707034} +{"epoch": 0, "iter": 15006, "iter_tflops": 16.029152148125004, "iter_time": 0.9249439086914063, "loss": 0.3453940749168396, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.703033983227474, "step_time": 0.7927091751098633} +{"epoch": 0, "iter": 15007, "iter_tflops": 18.809157065957564, "iter_time": 0.7882366333007813, "loss": 0.261189341545105, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 20.104736640441775, "step_time": 0.7374414749145507} +{"epoch": 0, "iter": 15008, "iter_tflops": 18.40266324625251, "iter_time": 0.8056478805541991, "loss": 0.3712502717971802, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 21.375041956100542, "step_time": 0.6936157913208006} +{"epoch": 0, "iter": 15009, "iter_tflops": 11.543331621521265, "iter_time": 1.7872737426757814, "loss": 0.7351655960083008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.965602965872332, "step_time": 1.7242000732421874} +{"epoch": 0, "iter": 15010, "iter_tflops": 26.52523446981861, "iter_time": 0.7777911834716796, "loss": 0.4856800436973572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.87682574958832, "step_time": 0.6472129211425781} +{"epoch": 0, "iter": 15011, "iter_tflops": 26.902125727800527, "iter_time": 0.766894546508789, "loss": 0.6132371425628662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.23337052220231, "step_time": 0.6823947563171386} +{"epoch": 0, "iter": 15012, "iter_tflops": 32.088624726316006, "iter_time": 0.6429410324096679, "loss": 0.7258410453796387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.19372368277896, "step_time": 0.570018539428711} +{"epoch": 0, "iter": 15013, "iter_tflops": 9.314790897839396, "iter_time": 2.2148745727539065, "loss": 0.16041716933250427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.645239134539521, "step_time": 2.138992431640625} +{"epoch": 0, "iter": 15014, "iter_tflops": 31.726225133394504, "iter_time": 0.6502851638793945, "loss": 0.022713232785463333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.601810955086314, "step_time": 0.5962431716918946} +{"epoch": 0, "iter": 15015, "iter_tflops": 34.75596445035415, "iter_time": 0.5935986480712891, "loss": 0.054225482046604156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.12275409962843, "step_time": 0.5557533111572266} +{"epoch": 0, "iter": 15016, "iter_tflops": 25.781030120921336, "iter_time": 0.8002431793212891, "loss": 0.03254201263189316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.77225063804486, "step_time": 0.7428671798706055} +{"epoch": 0, "iter": 15017, "iter_tflops": 19.59486691976084, "iter_time": 1.052882553100586, "loss": 0.22247911989688873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.84412243420715, "step_time": 0.9444688644409179} +{"epoch": 0, "iter": 15018, "iter_tflops": 34.84252288821116, "iter_time": 0.5921239852905275, "loss": 0.2467215657234192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44907866299685, "step_time": 0.46415120697021484} +{"epoch": 0, "iter": 15019, "iter_tflops": 39.13639735726138, "iter_time": 0.5271587295532226, "loss": 0.34361717104911804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78001639509221, "step_time": 0.493802906036377} +{"epoch": 0, "iter": 15020, "iter_tflops": 37.260695049505735, "iter_time": 0.5536958847045899, "loss": 0.2135268747806549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.888606812799864, "step_time": 0.5172177009582519} +{"epoch": 0, "iter": 15021, "iter_tflops": 10.336262732080089, "iter_time": 1.9959915924072267, "loss": 0.5694659352302551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.715990684108785, "step_time": 1.9252623596191405} +{"epoch": 0, "iter": 15022, "iter_tflops": 24.09177501578298, "iter_time": 0.8563542327880859, "loss": 0.635439932346344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.419299685101272, "step_time": 0.7259536209106445} +{"epoch": 0, "iter": 15023, "iter_tflops": 39.40314397015217, "iter_time": 0.5235900344848633, "loss": 0.5244579911231995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.218198304481696, "step_time": 0.42786944007873534} +{"epoch": 0, "iter": 15024, "iter_tflops": 38.19375237954152, "iter_time": 0.5401693267822266, "loss": 0.5608811378479004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.610390744569976, "step_time": 0.5080249938964844} +{"epoch": 0, "iter": 15025, "iter_tflops": 9.84055169155196, "iter_time": 2.096538299560547, "loss": 0.09436887502670288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.061703079367218, "step_time": 2.0504573974609377} +{"epoch": 0, "iter": 15026, "iter_tflops": 22.68017875797961, "iter_time": 0.9096530380249023, "loss": 0.052535124123096466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.993350942682333, "step_time": 0.793706573486328} +{"epoch": 0, "iter": 15027, "iter_tflops": 29.143202603688835, "iter_time": 0.7079212875366211, "loss": 0.04136422276496887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.94799558928551, "step_time": 0.5739149894714356} +{"epoch": 0, "iter": 15028, "iter_tflops": 42.75184172808227, "iter_time": 0.48257788848876954, "loss": 0.06250949949026108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87105555286145, "step_time": 0.44016703414916997} +{"epoch": 0, "iter": 15029, "iter_tflops": 25.004503723354308, "iter_time": 0.825095100402832, "loss": 0.05372811108827591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.122442140329333, "step_time": 0.7606650390625} +{"epoch": 0, "iter": 15030, "iter_tflops": 27.165396439737627, "iter_time": 0.7594622650146485, "loss": 0.04470133036375046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.656902480387757, "step_time": 0.6956590805053712} +{"epoch": 0, "iter": 15031, "iter_tflops": 31.344715788228605, "iter_time": 0.6582000503540039, "loss": 0.04593784362077713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.94429783366178, "step_time": 0.6077926139831542} +{"epoch": 0, "iter": 15032, "iter_tflops": 32.431971587825146, "iter_time": 0.6361344223022461, "loss": 0.05297499895095825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.0133025075731, "step_time": 0.5892358627319335} +{"epoch": 0, "iter": 15033, "iter_tflops": 7.955486161301301, "iter_time": 2.5933164978027343, "loss": 0.10734259337186813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.350022842832251, "step_time": 2.47078288269043} +{"epoch": 0, "iter": 15034, "iter_tflops": 43.35793615895465, "iter_time": 0.4758320007324218, "loss": 0.12401961535215378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69168865559694, "step_time": 0.43259305953979493} +{"epoch": 0, "iter": 15035, "iter_tflops": 28.735514732070765, "iter_time": 0.717964988708496, "loss": 0.12443503737449646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.79460835617881, "step_time": 0.6104847640991211} +{"epoch": 0, "iter": 15036, "iter_tflops": 26.545626973187794, "iter_time": 0.7771936798095703, "loss": 0.12453479319810867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.210581868817087, "step_time": 0.6610288009643553} +{"epoch": 0, "iter": 15037, "iter_tflops": 7.38012250896208, "iter_time": 2.7954947204589846, "loss": 0.08022495359182358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.823243131623375, "step_time": 2.6371535644531248} +{"epoch": 0, "iter": 15038, "iter_tflops": 30.357546211982765, "iter_time": 0.6796034622192383, "loss": 0.06279775500297546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.15850911151116, "step_time": 0.5406682281494141} +{"epoch": 0, "iter": 15039, "iter_tflops": 31.380693248904986, "iter_time": 0.6574454345703125, "loss": 0.0834631696343422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.80802996187844, "step_time": 0.6102424049377441} +{"epoch": 0, "iter": 15040, "iter_tflops": 37.44254469305223, "iter_time": 0.5510067138671875, "loss": 0.10556326061487198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.188521682379694, "step_time": 0.5133578605651855} +{"epoch": 0, "iter": 15041, "iter_tflops": 6.8268003570831635, "iter_time": 2.3275095214843753, "loss": 0.04859849438071251, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 6.950572455695604, "step_time": 2.286062469482422} +{"epoch": 0, "iter": 15042, "iter_tflops": 21.101555762253323, "iter_time": 0.7529986419677733, "loss": 0.0500929020345211, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 24.50490809974291, "step_time": 0.6484187889099122} +{"epoch": 0, "iter": 15043, "iter_tflops": 38.66028804089063, "iter_time": 0.4110016670227051, "loss": 0.02703595533967018, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 44.10997620826714, "step_time": 0.3602233371734619} +{"epoch": 0, "iter": 15044, "iter_tflops": 28.386974832183395, "iter_time": 0.5597441406249999, "loss": 0.02729814499616623, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.249901451063582, "step_time": 0.5252725486755371} +{"epoch": 0, "iter": 15045, "iter_tflops": 8.831057405211105, "iter_time": 2.3361974182128904, "loss": 0.39332690834999084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.164711242729956, "step_time": 2.2511449584960936} +{"epoch": 0, "iter": 15046, "iter_tflops": 25.447817795222562, "iter_time": 0.8107215194702149, "loss": 0.4143565893173218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.752403819999824, "step_time": 0.6934261054992675} +{"epoch": 0, "iter": 15047, "iter_tflops": 31.01894772034031, "iter_time": 0.6651126174926757, "loss": 0.26037266850471497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.340949006219994, "step_time": 0.5677092666625976} +{"epoch": 0, "iter": 15048, "iter_tflops": 45.93046053228488, "iter_time": 0.44918107223510745, "loss": 0.23081274330615997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69759690771167, "step_time": 0.41513261795043943} +{"epoch": 0, "iter": 15049, "iter_tflops": 10.628458826035795, "iter_time": 1.9411180725097656, "loss": 0.11864491552114487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.92215271043243, "step_time": 1.8889219055175783} +{"epoch": 0, "iter": 15050, "iter_tflops": 20.16440909796361, "iter_time": 1.0231439666748048, "loss": 0.12445129454135895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.801983340448796, "step_time": 0.9047938156127932} +{"epoch": 0, "iter": 15051, "iter_tflops": 22.979811648848564, "iter_time": 0.8977921066284178, "loss": 0.1843564361333847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.939365194989914, "step_time": 0.7658344345092774} +{"epoch": 0, "iter": 15052, "iter_tflops": 28.19393474329136, "iter_time": 0.7317564468383789, "loss": 0.13783298432826996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.97851945749075, "step_time": 0.6255918655395507} +{"epoch": 0, "iter": 15053, "iter_tflops": 8.549195824428686, "iter_time": 2.369551177978516, "loss": 0.0012431308859959245, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 8.998492408324411, "step_time": 2.251238998413086} +{"epoch": 0, "iter": 15054, "iter_tflops": 25.852033849744082, "iter_time": 0.7836039962768555, "loss": 0.0032529975287616253, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 29.127759317674066, "step_time": 0.6954794158935547} +{"epoch": 0, "iter": 15055, "iter_tflops": 27.07715280244846, "iter_time": 0.7481494522094727, "loss": 0.0028617053758352995, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 31.81324612649474, "step_time": 0.6367711410522461} +{"epoch": 0, "iter": 15056, "iter_tflops": 32.82219913922255, "iter_time": 0.6171968231201173, "loss": 0.007187420967966318, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 36.197102186586775, "step_time": 0.5596513481140137} +{"epoch": 0, "iter": 15057, "iter_tflops": 16.972790313252975, "iter_time": 1.215539291381836, "loss": 0.48209813237190247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.143322349651793, "step_time": 1.1371177291870118} +{"epoch": 0, "iter": 15058, "iter_tflops": 12.681484721136098, "iter_time": 1.6268673553466797, "loss": 0.3730714023113251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.737132537198725, "step_time": 1.5018486175537111} +{"epoch": 0, "iter": 15059, "iter_tflops": 12.248813672185918, "iter_time": 1.684334014892578, "loss": 0.4754924476146698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.81878529512351, "step_time": 1.3922256851196289} +{"epoch": 0, "iter": 15060, "iter_tflops": 34.11232044224325, "iter_time": 0.6047988891601562, "loss": 0.34736764430999756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36723944103782, "step_time": 0.5377268161773681} +{"epoch": 0, "iter": 15061, "iter_tflops": 23.111890648009187, "iter_time": 0.783500831604004, "loss": 0.3813716173171997, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 24.415400299709205, "step_time": 0.7416706390380858} +{"epoch": 0, "iter": 15062, "iter_tflops": 24.859937642531122, "iter_time": 0.7284083251953125, "loss": 0.14120861887931824, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 27.251907538748746, "step_time": 0.6644740562438964} +{"epoch": 0, "iter": 15063, "iter_tflops": 31.92197203525054, "iter_time": 0.5672639999389648, "loss": 0.26950639486312866, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 33.86492786400951, "step_time": 0.5347179718017578} +{"epoch": 0, "iter": 15064, "iter_tflops": 32.81772590747813, "iter_time": 0.5517806320190429, "loss": 0.45262429118156433, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 34.92480504776017, "step_time": 0.5184906692504883} +{"epoch": 0, "iter": 15065, "iter_tflops": 26.245595505146436, "iter_time": 0.7860783157348633, "loss": 0.06184767931699753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.58600553231802, "step_time": 0.7478825988769532} +{"epoch": 0, "iter": 15066, "iter_tflops": 15.699533033424025, "iter_time": 1.3141214752197266, "loss": 0.09819915890693665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.56915864013798, "step_time": 1.1110408344268798} +{"epoch": 0, "iter": 15067, "iter_tflops": 44.40418562004254, "iter_time": 0.4646204681396484, "loss": 0.08898469060659409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89753435007884, "step_time": 0.43073393630981444} +{"epoch": 0, "iter": 15068, "iter_tflops": 52.128273841543034, "iter_time": 0.39577549743652346, "loss": 0.09467780590057373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.50102997457285, "step_time": 0.36514544105529784} +{"epoch": 0, "iter": 15069, "iter_tflops": 39.52320172584647, "iter_time": 0.5219995498657226, "loss": 0.5673654675483704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75141254079557, "step_time": 0.48258273315429684} +{"epoch": 0, "iter": 15070, "iter_tflops": 42.6230311549311, "iter_time": 0.48403628158569334, "loss": 0.61793053150177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87719005979361, "step_time": 0.4497026405334473} +{"epoch": 0, "iter": 15071, "iter_tflops": 44.78134911549639, "iter_time": 0.4607072792053223, "loss": 0.6374764442443848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10885328169803, "step_time": 0.4288419303894042} +{"epoch": 0, "iter": 15072, "iter_tflops": 41.427691393325304, "iter_time": 0.4980024909973145, "loss": 0.685911238193512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21733553962702, "step_time": 0.4665838241577148} +{"epoch": 0, "iter": 15073, "iter_tflops": 41.5010708468356, "iter_time": 0.49712195587158203, "loss": 0.18269318342208862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0979039229296, "step_time": 0.4574734458923339} +{"epoch": 0, "iter": 15074, "iter_tflops": 10.161266247646232, "iter_time": 2.030366394042969, "loss": 0.20459210872650146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.267912401401173, "step_time": 1.6817118377685547} +{"epoch": 0, "iter": 15075, "iter_tflops": 16.705577682114278, "iter_time": 1.2349823455810547, "loss": 0.20561154186725616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.680977260395412, "step_time": 1.0482758674621582} +{"epoch": 0, "iter": 15076, "iter_tflops": 13.088623963846674, "iter_time": 1.576261459350586, "loss": 0.21307526528835297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.815397052138511, "step_time": 1.3044941864013673} +{"epoch": 0, "iter": 15077, "iter_tflops": 12.982670708247445, "iter_time": 1.1829119262695311, "loss": 0.4048900306224823, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.030469766882185, "step_time": 1.0945717620849609} +{"epoch": 0, "iter": 15078, "iter_tflops": 16.957905016854887, "iter_time": 0.9056163482666015, "loss": 0.3104468286037445, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 20.36655260918911, "step_time": 0.7540478897094727} +{"epoch": 0, "iter": 15079, "iter_tflops": 22.893070403740538, "iter_time": 0.6708298950195313, "loss": 0.24700264632701874, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.65982585347849, "step_time": 0.6227682266235351} +{"epoch": 0, "iter": 15080, "iter_tflops": 24.74105649869079, "iter_time": 0.6207235336303711, "loss": 0.35482481122016907, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.491919958963397, "step_time": 0.5796996231079101} +{"epoch": 0, "iter": 15081, "iter_tflops": 31.402454540317052, "iter_time": 0.6569898376464844, "loss": 0.5555633306503296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.56617520082288, "step_time": 0.5968578643798828} +{"epoch": 0, "iter": 15082, "iter_tflops": 32.91156652287317, "iter_time": 0.6268645248413085, "loss": 0.6887034773826599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.85303608519749, "step_time": 0.5754350471496581} +{"epoch": 0, "iter": 15083, "iter_tflops": 32.85192443166372, "iter_time": 0.6280025863647462, "loss": 0.7737621665000916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.77755181741971, "step_time": 0.576649112701416} +{"epoch": 0, "iter": 15084, "iter_tflops": 38.54185023121965, "iter_time": 0.5352906875610352, "loss": 0.7996581196784973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00252200808774, "step_time": 0.49118701744079596} +{"epoch": 0, "iter": 15085, "iter_tflops": 19.284891221208223, "iter_time": 1.0698060607910156, "loss": 0.6044324040412903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.886411614610246, "step_time": 0.9877758750915528} +{"epoch": 0, "iter": 15086, "iter_tflops": 19.29033460404471, "iter_time": 1.0695041809082033, "loss": 0.7156021595001221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.153793184419875, "step_time": 0.8910459442138672} +{"epoch": 0, "iter": 15087, "iter_tflops": 41.89191286918473, "iter_time": 0.49248392105102545, "loss": 0.5589011311531067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95026033693871, "step_time": 0.45897606277465813} +{"epoch": 0, "iter": 15088, "iter_tflops": 45.29621190296998, "iter_time": 0.4554706153869629, "loss": 0.5713205337524414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46562561141139, "step_time": 0.4256850757598877} +{"epoch": 0, "iter": 15089, "iter_tflops": 33.72065108264116, "iter_time": 0.6118236999511719, "loss": 0.059307780116796494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.064543216038636, "step_time": 0.5720603027343749} +{"epoch": 0, "iter": 15090, "iter_tflops": 14.610679569078947, "iter_time": 1.4120557098388673, "loss": 0.0636829063296318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.94345828237882, "step_time": 1.1497835693359375} +{"epoch": 0, "iter": 15091, "iter_tflops": 38.63190740808922, "iter_time": 0.5340428390502929, "loss": 0.08793724328279495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47608023200374, "step_time": 0.48571086120605467} +{"epoch": 0, "iter": 15092, "iter_tflops": 48.388358937231665, "iter_time": 0.42636481094360357, "loss": 0.04916929826140404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.11232273591594, "step_time": 0.3884426898956299} +{"epoch": 0, "iter": 15093, "iter_tflops": 20.145788756679696, "iter_time": 1.0240896377563475, "loss": 0.42368441820144653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.399018730256394, "step_time": 0.964113998413086} +{"epoch": 0, "iter": 15094, "iter_tflops": 17.501568178973137, "iter_time": 1.1788139953613281, "loss": 0.3272918164730072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.905037913662007, "step_time": 0.9868957710266113} +{"epoch": 0, "iter": 15095, "iter_tflops": 36.79059842806964, "iter_time": 0.5607708053588868, "loss": 0.41110435128211975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44384830668366, "step_time": 0.5101169738769532} +{"epoch": 0, "iter": 15096, "iter_tflops": 41.037603574966724, "iter_time": 0.502736312866211, "loss": 0.4306708872318268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.696005225846385, "step_time": 0.46158696746826167} +{"epoch": 0, "iter": 15097, "iter_tflops": 22.26711156798314, "iter_time": 0.9265276031494141, "loss": 0.6047973036766052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.217561861025597, "step_time": 0.8519062995910645} +{"epoch": 0, "iter": 15098, "iter_tflops": 15.241493770380192, "iter_time": 1.3536136169433592, "loss": 0.6563652157783508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.57575139248502, "step_time": 1.1106465129852294} +{"epoch": 0, "iter": 15099, "iter_tflops": 44.279804770950776, "iter_time": 0.4659255752563477, "loss": 0.5826144814491272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.842387418692866, "step_time": 0.43123043441772463} +{"epoch": 0, "iter": 15100, "iter_tflops": 48.97999066648302, "iter_time": 0.421214729309082, "loss": 0.6580652594566345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9082290880629, "step_time": 0.38994110870361326} +{"epoch": 0, "iter": 15101, "iter_tflops": 30.37161725707463, "iter_time": 0.6792886047363281, "loss": 0.6521030068397522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.89494897484596, "step_time": 0.6271811981201172} +{"epoch": 0, "iter": 15102, "iter_tflops": 12.650128370505135, "iter_time": 1.6308999328613283, "loss": 0.9475385546684265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.092161784763544, "step_time": 1.2820585441589356} +{"epoch": 0, "iter": 15103, "iter_tflops": 41.03872055397558, "iter_time": 0.5027226295471191, "loss": 0.6645932197570801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15611553885879, "step_time": 0.46723071670532224} +{"epoch": 0, "iter": 15104, "iter_tflops": 43.34490417642186, "iter_time": 0.4759750633239746, "loss": 0.45957669615745544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30640958964527, "step_time": 0.44553429412841794} +{"epoch": 0, "iter": 15105, "iter_tflops": 26.7716088637456, "iter_time": 0.7706333084106446, "loss": 0.44087401032447815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.32597971511263, "step_time": 0.7283452758789062} +{"epoch": 0, "iter": 15106, "iter_tflops": 27.50492886621181, "iter_time": 0.7500871429443359, "loss": 0.3464728593826294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57312568118898, "step_time": 0.6145121459960937} +{"epoch": 0, "iter": 15107, "iter_tflops": 37.95958123776425, "iter_time": 0.5435016098022462, "loss": 0.5158138275146484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.776893474939094, "step_time": 0.4938398189544678} +{"epoch": 0, "iter": 15108, "iter_tflops": 40.19967972311412, "iter_time": 0.5132153701782227, "loss": 0.4116079807281494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82997705372929, "step_time": 0.4707073764801026} +{"epoch": 0, "iter": 15109, "iter_tflops": 18.698263775597905, "iter_time": 1.1033694763183592, "loss": 0.8431144952774048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.073115659713046, "step_time": 1.0277972717285155} +{"epoch": 0, "iter": 15110, "iter_tflops": 16.358787956099043, "iter_time": 1.2611627197265627, "loss": 0.6540870666503906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.956955789933986, "step_time": 1.0337795867919921} +{"epoch": 0, "iter": 15111, "iter_tflops": 37.26113094512129, "iter_time": 0.5536894073486328, "loss": 0.5829970836639404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58764273681252, "step_time": 0.5083097248077393} +{"epoch": 0, "iter": 15112, "iter_tflops": 35.544535814441645, "iter_time": 0.5804293975830078, "loss": 0.6644275784492493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.623734951397594, "step_time": 0.5341558380126953} +{"epoch": 0, "iter": 15113, "iter_tflops": 24.047808155013563, "iter_time": 0.8579199142456055, "loss": 0.7116501331329346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.201550122788042, "step_time": 0.7873997306823732} +{"epoch": 0, "iter": 15114, "iter_tflops": 35.61367812341675, "iter_time": 0.5793025207519531, "loss": 0.6970986723899841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.110899133115566, "step_time": 0.5275024089813233} +{"epoch": 0, "iter": 15115, "iter_tflops": 34.924221966692194, "iter_time": 0.5907388153076172, "loss": 0.6195823550224304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.0661817970275, "step_time": 0.5419795875549316} +{"epoch": 0, "iter": 15116, "iter_tflops": 36.39242508349809, "iter_time": 0.5669062576293945, "loss": 0.6237012147903442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71417733948758, "step_time": 0.5194893836975097} +{"epoch": 0, "iter": 15117, "iter_tflops": 18.50350648625891, "iter_time": 1.11498291015625, "loss": 0.1706962287425995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.853928656051572, "step_time": 1.0391441345214845} +{"epoch": 0, "iter": 15118, "iter_tflops": 29.524542446797586, "iter_time": 0.6987777557373047, "loss": 0.18343904614448547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.719931469496245, "step_time": 0.5618500003814697} +{"epoch": 0, "iter": 15119, "iter_tflops": 46.874811236584904, "iter_time": 0.44013176727294917, "loss": 0.10735560953617096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.656644368684276, "step_time": 0.4072731971740723} +{"epoch": 0, "iter": 15120, "iter_tflops": 51.127285546463035, "iter_time": 0.4035241317749023, "loss": 0.19118262827396393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.505419382523876, "step_time": 0.37169511985778814} +{"epoch": 0, "iter": 15121, "iter_tflops": 23.29437196808227, "iter_time": 0.4431175956726074, "loss": 0.01471575628966093, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 25.610478988226003, "step_time": 0.403043851852417} +{"epoch": 0, "iter": 15122, "iter_tflops": 25.780734076761306, "iter_time": 0.40038216400146487, "loss": 0.00518740713596344, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 28.44997957429987, "step_time": 0.362817346572876} +{"epoch": 0, "iter": 15123, "iter_tflops": 28.704771478061375, "iter_time": 0.35959687423706055, "loss": 0.004283254966139793, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 31.592855564693544, "step_time": 0.32672406196594234} +{"epoch": 0, "iter": 15124, "iter_tflops": 30.103368574584724, "iter_time": 0.34289006805419925, "loss": 0.002710648812353611, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 33.126798350071695, "step_time": 0.3115950412750244} +{"epoch": 0, "iter": 15125, "iter_tflops": 29.481703136557883, "iter_time": 0.6997931365966796, "loss": 0.134940966963768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.09842475264958, "step_time": 0.6634128150939942} +{"epoch": 0, "iter": 15126, "iter_tflops": 13.616318863961288, "iter_time": 1.515174087524414, "loss": 0.12844906747341156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.28922230587409, "step_time": 1.128046516418457} +{"epoch": 0, "iter": 15127, "iter_tflops": 45.322635169221044, "iter_time": 0.45520507431030277, "loss": 0.20190145075321198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.470512196531054, "step_time": 0.3589857254028321} +{"epoch": 0, "iter": 15128, "iter_tflops": 52.09359337379087, "iter_time": 0.3960389785766602, "loss": 0.09741929918527603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.437979027437535, "step_time": 0.3655533714294434} +{"epoch": 0, "iter": 15129, "iter_tflops": 27.36802917982917, "iter_time": 0.7538392105102538, "loss": 0.2622634768486023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.936002829287013, "step_time": 0.7129904441833497} +{"epoch": 0, "iter": 15130, "iter_tflops": 13.699243070867434, "iter_time": 1.50600244140625, "loss": 0.18693488836288452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.28362561934039, "step_time": 1.1936785697937011} +{"epoch": 0, "iter": 15131, "iter_tflops": 48.10831065370287, "iter_time": 0.4288467674255372, "loss": 0.11875264346599579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.516244738889114, "step_time": 0.3928516521453857} +{"epoch": 0, "iter": 15132, "iter_tflops": 50.65641330183321, "iter_time": 0.40727505493164057, "loss": 0.14478352665901184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.230848715194156, "step_time": 0.3735429382324219} +{"epoch": 0, "iter": 15133, "iter_tflops": 32.97299551329704, "iter_time": 0.6256966705322267, "loss": 0.7087041139602661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.11810504944619, "step_time": 0.5874774131774902} +{"epoch": 0, "iter": 15134, "iter_tflops": 18.33369265371771, "iter_time": 1.125310317993164, "loss": 0.638666033744812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.389793391215754, "step_time": 1.011834358215332} +{"epoch": 0, "iter": 15135, "iter_tflops": 40.27862456779965, "iter_time": 0.5122094841003417, "loss": 0.5655966997146606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05038455626826, "step_time": 0.41220649337768556} +{"epoch": 0, "iter": 15136, "iter_tflops": 49.03715757639323, "iter_time": 0.42072368240356445, "loss": 0.617904543876648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.98207719574632, "step_time": 0.389397596359253} +{"epoch": 0, "iter": 15137, "iter_tflops": 2.580115174766592, "iter_time": 0.8512328796386719, "loss": 0.31057581305503845, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 2.7097057606396517, "step_time": 0.8105230102539063} +{"epoch": 0, "iter": 15138, "iter_tflops": 1.5108972989349065, "iter_time": 1.453625518798828, "loss": 0.00455682585015893, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 1.7847356485112966, "step_time": 1.230590576171875} +{"epoch": 0, "iter": 15139, "iter_tflops": 4.404940687270572, "iter_time": 0.49859442520141606, "loss": 0.15888002514839172, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 4.752509696000259, "step_time": 0.4621303291320801} +{"epoch": 0, "iter": 15140, "iter_tflops": 5.159053881678855, "iter_time": 0.42571349716186524, "loss": 0.20910055935382843, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 5.5763651277132, "step_time": 0.39385492515563963} +{"epoch": 0, "iter": 15141, "iter_tflops": 33.204313028424814, "iter_time": 0.621337760925293, "loss": 0.4090912342071533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.37502195355172, "step_time": 0.5832107620239259} +{"epoch": 0, "iter": 15142, "iter_tflops": 14.016531386368897, "iter_time": 1.4719114837646485, "loss": 0.5758054256439209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.736364553578696, "step_time": 1.2327105712890625} +{"epoch": 0, "iter": 15143, "iter_tflops": 37.43154869926351, "iter_time": 0.5511685791015625, "loss": 0.5499514937400818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8797329658162, "step_time": 0.5046777954101562} +{"epoch": 0, "iter": 15144, "iter_tflops": 42.348946770154726, "iter_time": 0.48716898727416996, "loss": 0.47928115725517273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06112828657698, "step_time": 0.4479068202972412} +{"epoch": 0, "iter": 15145, "iter_tflops": 16.211184037611165, "iter_time": 1.2726456909179689, "loss": 0.5158852338790894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.478610955477837, "step_time": 1.1803623046875} +{"epoch": 0, "iter": 15146, "iter_tflops": 16.559472553461884, "iter_time": 1.2458786621093751, "loss": 0.36837002635002136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.01708701750921, "step_time": 0.8590173110961915} +{"epoch": 0, "iter": 15147, "iter_tflops": 42.911324905359145, "iter_time": 0.48078435134887704, "loss": 0.44194597005844116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.072829927726396, "step_time": 0.44779306030273436} +{"epoch": 0, "iter": 15148, "iter_tflops": 46.425491382784095, "iter_time": 0.4443914947509766, "loss": 0.7418129444122314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.27978413726289, "step_time": 0.4103258171081543} +{"epoch": 0, "iter": 15149, "iter_tflops": 24.65047121124364, "iter_time": 0.8369451980590821, "loss": 0.03580816462635994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.912206724131558, "step_time": 0.7961920700073242} +{"epoch": 0, "iter": 15150, "iter_tflops": 15.799090737170916, "iter_time": 1.3058405609130859, "loss": 0.09114144742488861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.06706040666822, "step_time": 0.9793057556152345} +{"epoch": 0, "iter": 15151, "iter_tflops": 51.04778131850504, "iter_time": 0.4041525993347168, "loss": 0.06766404956579208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.648716863259246, "step_time": 0.3707379913330078} +{"epoch": 0, "iter": 15152, "iter_tflops": 48.076022800237595, "iter_time": 0.4291347808837891, "loss": 0.06267895549535751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40075885103691, "step_time": 0.39371745681762693} +{"epoch": 0, "iter": 15153, "iter_tflops": 26.178859685021507, "iter_time": 0.7880822067260742, "loss": 0.19016292691230774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.6084755798843, "step_time": 0.747273910522461} +{"epoch": 0, "iter": 15154, "iter_tflops": 13.529217003898946, "iter_time": 1.5249288635253906, "loss": 0.2989467680454254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.027869030083806, "step_time": 1.2872012786865237} +{"epoch": 0, "iter": 15155, "iter_tflops": 39.531453006170054, "iter_time": 0.5218905944824218, "loss": 0.19910994172096252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21810932229628, "step_time": 0.4773714962005615} +{"epoch": 0, "iter": 15156, "iter_tflops": 42.36510050893097, "iter_time": 0.48698323059082027, "loss": 0.13986101746559143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.3901239328314, "step_time": 0.4447302951812744} +{"epoch": 0, "iter": 15157, "iter_tflops": 27.078758167740396, "iter_time": 0.7618921585083007, "loss": 0.6420628428459167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.328515733739597, "step_time": 0.7034482650756836} +{"epoch": 0, "iter": 15158, "iter_tflops": 7.351538888999492, "iter_time": 2.806363922119141, "loss": 0.688441812992096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.917630362438176, "step_time": 2.3135174560546874} +{"epoch": 0, "iter": 15159, "iter_tflops": 13.754914606380872, "iter_time": 1.4999070587158203, "loss": 0.6312602162361145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.28792461190435, "step_time": 1.0696378135681153} +{"epoch": 0, "iter": 15160, "iter_tflops": 34.76660567990253, "iter_time": 0.5934169616699219, "loss": 0.778989315032959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.353231780926144, "step_time": 0.4758836345672607} +{"epoch": 0, "iter": 15161, "iter_tflops": 23.931959573487013, "iter_time": 0.6690779037475586, "loss": 0.3824504315853119, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.49920670045257, "step_time": 0.627954647064209} +{"epoch": 0, "iter": 15162, "iter_tflops": 28.40234160703961, "iter_time": 0.5637684936523437, "loss": 0.36527639627456665, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.326577278700455, "step_time": 0.5279971160888672} +{"epoch": 0, "iter": 15163, "iter_tflops": 29.519008961382614, "iter_time": 0.5424418334960938, "loss": 0.34866735339164734, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 31.40031899468908, "step_time": 0.509942123413086} +{"epoch": 0, "iter": 15164, "iter_tflops": 29.00077489781626, "iter_time": 0.5521350860595703, "loss": 0.3417317569255829, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.90116736723392, "step_time": 0.5181793022155762} +{"epoch": 0, "iter": 15165, "iter_tflops": 36.70485596241225, "iter_time": 0.5620807647705077, "loss": 0.42031797766685486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.70168530901706, "step_time": 0.5196528396606446} +{"epoch": 0, "iter": 15166, "iter_tflops": 30.76153011978382, "iter_time": 0.6706783905029298, "loss": 0.44772183895111084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.3539591143068, "step_time": 0.524244421005249} +{"epoch": 0, "iter": 15167, "iter_tflops": 50.172156348185936, "iter_time": 0.4112060356140137, "loss": 0.37797826528549194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.41379990948292, "step_time": 0.37915186119079586} +{"epoch": 0, "iter": 15168, "iter_tflops": 45.221339212101284, "iter_time": 0.4562247352600098, "loss": 0.322762131690979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65523296017215, "step_time": 0.42402619934082036} +{"epoch": 0, "iter": 15169, "iter_tflops": 31.45811239905154, "iter_time": 0.6558274459838866, "loss": 0.1225413903594017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.399832299965354, "step_time": 0.6177005119323731} +{"epoch": 0, "iter": 15170, "iter_tflops": 14.51481063922135, "iter_time": 1.4213822021484372, "loss": 0.06328853964805603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.880126739943957, "step_time": 1.2991768798828125} +{"epoch": 0, "iter": 15171, "iter_tflops": 26.20467812263566, "iter_time": 0.7873057403564452, "loss": 0.10102095454931259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.699573952502167, "step_time": 0.6508318862915039} +{"epoch": 0, "iter": 15172, "iter_tflops": 39.91462739433998, "iter_time": 0.5168805236816406, "loss": 0.11249690502882004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95890960262638, "step_time": 0.4693267803192138} +{"epoch": 0, "iter": 15173, "iter_tflops": 21.273859229784968, "iter_time": 0.9697861251831055, "loss": 0.09771807491779327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.96332338595967, "step_time": 0.8984367446899414} +{"epoch": 0, "iter": 15174, "iter_tflops": 22.48859572068797, "iter_time": 0.9174024810791015, "loss": 0.1648237705230713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.118230067916517, "step_time": 0.8213593654632568} +{"epoch": 0, "iter": 15175, "iter_tflops": 38.912945589664176, "iter_time": 0.5301858596801758, "loss": 0.14595313370227814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6507063214246, "step_time": 0.4837222003936768} +{"epoch": 0, "iter": 15176, "iter_tflops": 43.53819541311287, "iter_time": 0.4738619346618652, "loss": 0.11814706027507782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50811544255928, "step_time": 0.43426461601257327} +{"epoch": 0, "iter": 15177, "iter_tflops": 31.790192160453127, "iter_time": 0.6489766845703124, "loss": 0.10008955001831055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.432203454180886, "step_time": 0.5822695598602295} +{"epoch": 0, "iter": 15178, "iter_tflops": 38.65095512585279, "iter_time": 0.533779655456543, "loss": 0.08011821657419205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90973938483193, "step_time": 0.4808021163940429} +{"epoch": 0, "iter": 15179, "iter_tflops": 42.62936976335617, "iter_time": 0.48396430969238285, "loss": 0.13720665872097015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67005689709278, "step_time": 0.4420627460479737} +{"epoch": 0, "iter": 15180, "iter_tflops": 45.82318302739758, "iter_time": 0.45023265838623044, "loss": 0.14235098659992218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26407305877334, "step_time": 0.41045407295227054} +{"epoch": 0, "iter": 15181, "iter_tflops": 17.73811858541503, "iter_time": 1.1630936737060544, "loss": 0.4736129939556122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.257509397288256, "step_time": 1.0713271942138671} +{"epoch": 0, "iter": 15182, "iter_tflops": 34.74336462598632, "iter_time": 0.5938139190673828, "loss": 0.3766380846500397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53717433075748, "step_time": 0.5218150730133057} +{"epoch": 0, "iter": 15183, "iter_tflops": 47.01977194160824, "iter_time": 0.4387748527526855, "loss": 0.29150688648223877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.38018332674366, "step_time": 0.40153795051574703} +{"epoch": 0, "iter": 15184, "iter_tflops": 51.59005299053212, "iter_time": 0.399904483795166, "loss": 0.3311856985092163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.70507349991841, "step_time": 0.3703629169464111} +{"epoch": 0, "iter": 15185, "iter_tflops": 25.614597893135365, "iter_time": 0.8054428024291992, "loss": 0.4148986339569092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.964784169457456, "step_time": 0.7651125030517578} +{"epoch": 0, "iter": 15186, "iter_tflops": 14.530654144135779, "iter_time": 1.4198323974609375, "loss": 0.3199251890182495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.406263323055178, "step_time": 1.1208735389709474} +{"epoch": 0, "iter": 15187, "iter_tflops": 46.38200373680131, "iter_time": 0.4448081550598144, "loss": 0.39935386180877686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16131325039569, "step_time": 0.4112949237823486} +{"epoch": 0, "iter": 15188, "iter_tflops": 49.81245098312055, "iter_time": 0.41417543411254876, "loss": 0.3511371910572052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.91201525300705, "step_time": 0.38268080711364744} +{"epoch": 0, "iter": 15189, "iter_tflops": 18.53335834478198, "iter_time": 1.113186996459961, "loss": 0.760422945022583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.243410362435164, "step_time": 1.0721121215820313} +{"epoch": 0, "iter": 15190, "iter_tflops": 20.068575348511068, "iter_time": 1.0280298004150392, "loss": 0.8120729923248291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.969046206133232, "step_time": 0.8607390270233154} +{"epoch": 0, "iter": 15191, "iter_tflops": 35.11468190577278, "iter_time": 0.5875346832275391, "loss": 0.6198129653930664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.183059936686284, "step_time": 0.5403205909729004} +{"epoch": 0, "iter": 15192, "iter_tflops": 35.77242396297967, "iter_time": 0.5767317733764648, "loss": 0.668159544467926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88151824858896, "step_time": 0.5306144008636475} +{"epoch": 0, "iter": 15193, "iter_tflops": 22.62002313984248, "iter_time": 0.9120721664428711, "loss": 0.16432678699493408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.483858343455758, "step_time": 0.8426406173706055} +{"epoch": 0, "iter": 15194, "iter_tflops": 46.150684312901696, "iter_time": 0.44703765106201176, "loss": 0.10953035950660706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.23826898896583, "step_time": 0.41066489601135253} +{"epoch": 0, "iter": 15195, "iter_tflops": 46.541636362147685, "iter_time": 0.4432825126647949, "loss": 0.06815309822559357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40287362951467, "step_time": 0.4093237552642822} +{"epoch": 0, "iter": 15196, "iter_tflops": 47.7813021332012, "iter_time": 0.43178173446655266, "loss": 0.11818835884332657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.796384874521515, "step_time": 0.3983114566802978} +{"epoch": 0, "iter": 15197, "iter_tflops": 38.78852081913384, "iter_time": 0.44031788253784176, "loss": 0.030186599120497704, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 42.70006222841309, "step_time": 0.39998254013061524} +{"epoch": 0, "iter": 15198, "iter_tflops": 30.14714149344969, "iter_time": 0.5665306396484375, "loss": 0.04204006865620613, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 33.322934862497384, "step_time": 0.5125382690429687} +{"epoch": 0, "iter": 15199, "iter_tflops": 34.475749968813865, "iter_time": 0.4953997917175293, "loss": 0.017138194292783737, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 37.934660357601786, "step_time": 0.45022887229919434} +{"epoch": 0, "iter": 15200, "iter_tflops": 39.819431044566485, "iter_time": 0.42891821670532226, "loss": 0.01222978811711073, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 43.68046267234496, "step_time": 0.3910050010681152} +{"epoch": 0, "iter": 15201, "iter_tflops": 22.450817463270063, "iter_time": 0.9189462051391601, "loss": 0.5660467743873596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.018914785203744, "step_time": 0.8589519424438476} +{"epoch": 0, "iter": 15202, "iter_tflops": 9.323392074939107, "iter_time": 2.212831268310547, "loss": 0.6968611478805542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.2333816911522, "step_time": 2.0160582427978513} +{"epoch": 0, "iter": 15203, "iter_tflops": 16.46821814666292, "iter_time": 1.2527823791503905, "loss": 0.5983027815818787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.506164553318172, "step_time": 1.0576704330444335} +{"epoch": 0, "iter": 15204, "iter_tflops": 36.44689613468704, "iter_time": 0.5660589981079102, "loss": 0.6232922673225403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06647308359012, "step_time": 0.5149216270446777} +{"epoch": 0, "iter": 15205, "iter_tflops": 11.35898524939731, "iter_time": 1.3556015014648437, "loss": 0.37986239790916443, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.227749623179625, "step_time": 1.2592879257202148} +{"epoch": 0, "iter": 15206, "iter_tflops": 13.071119677377201, "iter_time": 1.178036605834961, "loss": 0.30668219923973083, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.562950077344738, "step_time": 1.0573583908081055} +{"epoch": 0, "iter": 15207, "iter_tflops": 22.525752847337422, "iter_time": 0.683584587097168, "loss": 0.46655333042144775, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.223438233680533, "step_time": 0.6356759643554688} +{"epoch": 0, "iter": 15208, "iter_tflops": 25.064986998906836, "iter_time": 0.6143333511352539, "loss": 0.3811788558959961, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.83724809109045, "step_time": 0.5737643966674806} +{"epoch": 0, "iter": 15209, "iter_tflops": 18.236526496803183, "iter_time": 1.1313060913085937, "loss": 0.13354367017745972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.81356194234765, "step_time": 1.0412612113952637} +{"epoch": 0, "iter": 15210, "iter_tflops": 19.648926039661436, "iter_time": 1.049985809326172, "loss": 0.15954414010047913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.121957079005735, "step_time": 0.9326070671081542} +{"epoch": 0, "iter": 15211, "iter_tflops": 35.72432564597648, "iter_time": 0.5775082702636719, "loss": 0.1357964426279068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.07036875400583, "step_time": 0.5280496234893799} +{"epoch": 0, "iter": 15212, "iter_tflops": 39.11405002945381, "iter_time": 0.5274599151611328, "loss": 0.15450666844844818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.687743585503526, "step_time": 0.4833025074005128} +{"epoch": 0, "iter": 15213, "iter_tflops": 29.817372548330713, "iter_time": 0.5273985443115234, "loss": 0.01024886965751648, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 33.354541235578566, "step_time": 0.47146920013427734} +{"epoch": 0, "iter": 15214, "iter_tflops": 31.865493431153048, "iter_time": 0.4935005607604981, "loss": 0.014510597102344036, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 35.087923282507035, "step_time": 0.44817810249328616} +{"epoch": 0, "iter": 15215, "iter_tflops": 34.893951325843275, "iter_time": 0.4506694793701172, "loss": 0.02289053052663803, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 38.666626201040195, "step_time": 0.40669798278808594} +{"epoch": 0, "iter": 15216, "iter_tflops": 31.58638018645426, "iter_time": 0.49786138153076176, "loss": 0.010053310543298721, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 34.847285902299376, "step_time": 0.45127298927307136} +{"epoch": 0, "iter": 15217, "iter_tflops": 18.660988211112862, "iter_time": 1.1055734710693361, "loss": 0.11412986367940903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.392685771671434, "step_time": 1.011690845489502} +{"epoch": 0, "iter": 15218, "iter_tflops": 26.111650229957284, "iter_time": 0.7901106719970704, "loss": 0.1450337916612625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.67734071024842, "step_time": 0.651288682937622} +{"epoch": 0, "iter": 15219, "iter_tflops": 48.5250264947264, "iter_time": 0.42516398239135744, "loss": 0.1303388625383377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66707668673132, "step_time": 0.39172657394409177} +{"epoch": 0, "iter": 15220, "iter_tflops": 49.410459250787994, "iter_time": 0.4175450668334961, "loss": 0.15233094990253448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53161744236667, "step_time": 0.3854001522064209} +{"epoch": 0, "iter": 15221, "iter_tflops": 24.507735761404692, "iter_time": 0.8418196487426757, "loss": 0.728287398815155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.66404489503244, "step_time": 0.8038909530639649} +{"epoch": 0, "iter": 15222, "iter_tflops": 16.54862726153399, "iter_time": 1.2466951599121092, "loss": 0.6662672162055969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.464977047924563, "step_time": 0.9611514358520508} +{"epoch": 0, "iter": 15223, "iter_tflops": 44.51950193407027, "iter_time": 0.4634169883728027, "loss": 0.5738298892974854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.990305827192294, "step_time": 0.42990127182006843} +{"epoch": 0, "iter": 15224, "iter_tflops": 43.863597320081674, "iter_time": 0.4703465919494629, "loss": 0.5378323793411255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.30191536783846, "step_time": 0.4361576766967773} +{"epoch": 0, "iter": 15225, "iter_tflops": 46.92252602592574, "iter_time": 0.43968420410156245, "loss": 0.4147303104400635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62163212059562, "step_time": 0.3996598453521729} +{"epoch": 0, "iter": 15226, "iter_tflops": 46.519506301171226, "iter_time": 0.44349338912963865, "loss": 0.3923051655292511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.311759033497786, "step_time": 0.4100650405883789} +{"epoch": 0, "iter": 15227, "iter_tflops": 46.30087144224359, "iter_time": 0.4455875854492188, "loss": 0.26975923776626587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.116221734061206, "step_time": 0.41166498184204103} +{"epoch": 0, "iter": 15228, "iter_tflops": 48.708199592139515, "iter_time": 0.42356510162353517, "loss": 0.2890204191207886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39087794778753, "step_time": 0.393791711807251} +{"epoch": 0, "iter": 15229, "iter_tflops": 35.40749144287153, "iter_time": 0.45686042022705076, "loss": 0.041778627783060074, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 38.721851939322384, "step_time": 0.41775588226318366} +{"epoch": 0, "iter": 15230, "iter_tflops": 12.241303585803676, "iter_time": 1.321450881958008, "loss": 0.020580420270562172, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 13.163260076476943, "step_time": 1.2288962860107422} +{"epoch": 0, "iter": 15231, "iter_tflops": 13.38974284531093, "iter_time": 1.2081099395751953, "loss": 0.02035423181951046, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 16.02639585231535, "step_time": 1.0093524188995362} +{"epoch": 0, "iter": 15232, "iter_tflops": 40.30299069750564, "iter_time": 0.4013667755126953, "loss": 0.021737296134233475, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 44.15021420956598, "step_time": 0.3663919124603272} +{"epoch": 0, "iter": 15233, "iter_tflops": 26.246395027692994, "iter_time": 0.5944753723144531, "loss": 0.3258903920650482, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.14466444729787, "step_time": 0.5543798713684082} +{"epoch": 0, "iter": 15234, "iter_tflops": 26.09217644817631, "iter_time": 0.5979890365600586, "loss": 0.45751050114631653, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.876990026782106, "step_time": 0.5597030181884766} +{"epoch": 0, "iter": 15235, "iter_tflops": 26.754786289589415, "iter_time": 0.5831792221069335, "loss": 0.36051174998283386, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.58207311240088, "step_time": 0.5458958625793457} +{"epoch": 0, "iter": 15236, "iter_tflops": 27.270184199335088, "iter_time": 0.5721573181152344, "loss": 0.3278590440750122, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.953806258097377, "step_time": 0.5388871955871581} +{"epoch": 0, "iter": 15237, "iter_tflops": 24.151352507669753, "iter_time": 0.8542417449951172, "loss": 0.40044674277305603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.543686464449515, "step_time": 0.8076787796020508} +{"epoch": 0, "iter": 15238, "iter_tflops": 13.03127465297109, "iter_time": 1.5831984252929687, "loss": 0.6010333299636841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.25730905357891, "step_time": 1.2690349578857423} +{"epoch": 0, "iter": 15239, "iter_tflops": 33.3568608990076, "iter_time": 0.6184962539672851, "loss": 0.679990828037262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.35929655023225, "step_time": 0.5674227905273438} +{"epoch": 0, "iter": 15240, "iter_tflops": 38.87655042505355, "iter_time": 0.5306822052001953, "loss": 0.6018473505973816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.334201384981306, "step_time": 0.48733867263793945} +{"epoch": 0, "iter": 15241, "iter_tflops": 19.67973123199514, "iter_time": 1.048342239379883, "loss": 0.12044371664524078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.247688888110794, "step_time": 0.9709805908203126} +{"epoch": 0, "iter": 15242, "iter_tflops": 21.53283691188184, "iter_time": 0.9581224060058594, "loss": 0.08083529770374298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.290554986097334, "step_time": 0.7847340431213379} +{"epoch": 0, "iter": 15243, "iter_tflops": 48.26634419884851, "iter_time": 0.42744263839721686, "loss": 0.11288970708847046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.277451522137085, "step_time": 0.3946461219787597} +{"epoch": 0, "iter": 15244, "iter_tflops": 50.67098650005877, "iter_time": 0.4071579208374023, "loss": 0.1689872145652771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.30746987983655, "step_time": 0.3730254440307617} +{"epoch": 0, "iter": 15245, "iter_tflops": 23.38515694174581, "iter_time": 0.8822302780151368, "loss": 0.09621264040470123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.556468581679685, "step_time": 0.840149040222168} +{"epoch": 0, "iter": 15246, "iter_tflops": 12.49879393513865, "iter_time": 1.6506467437744141, "loss": 0.06031205877661705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.593321296776445, "step_time": 1.1726662158966064} +{"epoch": 0, "iter": 15247, "iter_tflops": 44.132326648109526, "iter_time": 0.4674825706481934, "loss": 0.09447295218706131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77131750542159, "step_time": 0.4318719806671142} +{"epoch": 0, "iter": 15248, "iter_tflops": 54.668975902375266, "iter_time": 0.3773821105957031, "loss": 0.11285201460123062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.608347501818535, "step_time": 0.3461108112335205} +{"epoch": 0, "iter": 15249, "iter_tflops": 39.38173386581569, "iter_time": 0.5238746871948242, "loss": 0.3835219442844391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55441639132651, "step_time": 0.4848167419433594} +{"epoch": 0, "iter": 15250, "iter_tflops": 19.883492867639024, "iter_time": 1.037599060058594, "loss": 0.45734918117523193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.176237392901932, "step_time": 0.6836867446899415} +{"epoch": 0, "iter": 15251, "iter_tflops": 35.72140545867127, "iter_time": 0.5775554809570312, "loss": 0.38976916670799255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.257865598183635, "step_time": 0.5255276412963867} +{"epoch": 0, "iter": 15252, "iter_tflops": 42.242770601991026, "iter_time": 0.4883934745788574, "loss": 0.34521275758743286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.244330589007305, "step_time": 0.44613238525390625} +{"epoch": 0, "iter": 15253, "iter_tflops": 10.947907517503271, "iter_time": 1.402766326904297, "loss": 0.36744725704193115, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.543702458657773, "step_time": 1.330366584777832} +{"epoch": 0, "iter": 15254, "iter_tflops": 13.87385984890259, "iter_time": 1.1069274291992186, "loss": 0.2678106427192688, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.679112086319304, "step_time": 0.9207538108825684} +{"epoch": 0, "iter": 15255, "iter_tflops": 28.314009281949073, "iter_time": 0.5423942565917969, "loss": 0.27725282311439514, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.110839368240324, "step_time": 0.5100274963378906} +{"epoch": 0, "iter": 15256, "iter_tflops": 26.1518753851654, "iter_time": 0.5872372741699218, "loss": 0.30464237928390503, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.69877330564933, "step_time": 0.5544417381286622} +{"epoch": 0, "iter": 15257, "iter_tflops": 3.0730478528810083, "iter_time": 0.45889128112792965, "loss": 0.5573551654815674, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.358481901699213, "step_time": 0.41989056587219237} +{"epoch": 0, "iter": 15258, "iter_tflops": 0.6126138165988178, "iter_time": 2.3019312133789063, "loss": 0.44379374384880066, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 0.6701398261849242, "step_time": 2.104329292297363} +{"epoch": 0, "iter": 15259, "iter_tflops": 0.7928182491176479, "iter_time": 1.778711410522461, "loss": 0.5227165222167969, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.1108992955667438, "step_time": 1.2694173736572267} +{"epoch": 0, "iter": 15260, "iter_tflops": 2.1721674493138057, "iter_time": 0.6492109374999999, "loss": 0.37460964918136597, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.4090439127916095, "step_time": 0.5853753261566162} +{"epoch": 0, "iter": 15261, "iter_tflops": 21.084761653282094, "iter_time": 0.7652583694458007, "loss": 0.32033780217170715, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 22.38863365706309, "step_time": 0.7206911582946777} +{"epoch": 0, "iter": 15262, "iter_tflops": 12.714867557422727, "iter_time": 1.2690097045898436, "loss": 0.25328901410102844, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.961622071920267, "step_time": 1.1556888046264648} +{"epoch": 0, "iter": 15263, "iter_tflops": 29.02633416910283, "iter_time": 0.5558845367431641, "loss": 0.4534027576446533, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.980789482729463, "step_time": 0.5208159828186035} +{"epoch": 0, "iter": 15264, "iter_tflops": 28.544860857667373, "iter_time": 0.5652607803344726, "loss": 0.4397674798965454, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.39769033395811, "step_time": 0.5308064575195313} +{"epoch": 0, "iter": 15265, "iter_tflops": 24.96806222460427, "iter_time": 0.826299346923828, "loss": 0.003136002691462636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.35195516487302, "step_time": 0.7829056091308595} +{"epoch": 0, "iter": 15266, "iter_tflops": 13.736999668480196, "iter_time": 1.5018631439208985, "loss": 0.0022862255573272705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.318258024522123, "step_time": 1.126258483886719} +{"epoch": 0, "iter": 15267, "iter_tflops": 45.573196075757814, "iter_time": 0.4527023620605469, "loss": 0.008884905837476254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.558328924824025, "step_time": 0.3523169784545898} +{"epoch": 0, "iter": 15268, "iter_tflops": 59.36549601613381, "iter_time": 0.3475266761779785, "loss": 0.018723858520388603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.56870756456074, "step_time": 0.3146484699249268} +{"epoch": 0, "iter": 15269, "iter_tflops": 30.70252076816328, "iter_time": 0.671967414855957, "loss": 0.5845130681991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.61211470365131, "step_time": 0.6326205368041993} +{"epoch": 0, "iter": 15270, "iter_tflops": 13.866285109276454, "iter_time": 1.4878601837158203, "loss": 0.42945149540901184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.58621839530802, "step_time": 1.2438696403503418} +{"epoch": 0, "iter": 15271, "iter_tflops": 45.71443472167981, "iter_time": 0.4513036994934082, "loss": 0.4538881182670593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.417117159064, "step_time": 0.41748881149291994} +{"epoch": 0, "iter": 15272, "iter_tflops": 50.38246484131862, "iter_time": 0.40948956298828126, "loss": 0.8027298450469971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.437852986987295, "step_time": 0.3789843349456787} +{"epoch": 0, "iter": 15273, "iter_tflops": 24.808374051156896, "iter_time": 0.604209503173828, "loss": 0.03055388480424881, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.47557997656651, "step_time": 0.566161548614502} +{"epoch": 0, "iter": 15274, "iter_tflops": 9.68072939407911, "iter_time": 1.5483807830810545, "loss": 0.037503816187381744, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 10.89648086354129, "step_time": 1.3756235198974611} +{"epoch": 0, "iter": 15275, "iter_tflops": 38.02207864463132, "iter_time": 0.39423029708862306, "loss": 0.05923006683588028, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 42.26776273349681, "step_time": 0.3546309146881103} +{"epoch": 0, "iter": 15276, "iter_tflops": 36.343763404268294, "iter_time": 0.41243542098999025, "loss": 0.031958453357219696, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 39.33337560461367, "step_time": 0.381087438583374} +{"epoch": 0, "iter": 15277, "iter_tflops": 19.242739651244925, "iter_time": 1.0721494903564452, "loss": 0.26424822211265564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.161752997944177, "step_time": 1.0232787551879883} +{"epoch": 0, "iter": 15278, "iter_tflops": 13.775679467479764, "iter_time": 1.4976461639404297, "loss": 0.2036200612783432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.95079514144035, "step_time": 1.1493136291503907} +{"epoch": 0, "iter": 15279, "iter_tflops": 40.29474845194464, "iter_time": 0.512004524230957, "loss": 0.28964927792549133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07923420544097, "step_time": 0.4680456428527832} +{"epoch": 0, "iter": 15280, "iter_tflops": 42.45794478146402, "iter_time": 0.48591832733154294, "loss": 0.20295299589633942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44284467441913, "step_time": 0.4442254486083985} +{"epoch": 0, "iter": 15281, "iter_tflops": 21.430623249495397, "iter_time": 0.9626921844482421, "loss": 0.0930078998208046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.234776678152016, "step_time": 0.8879402542114258} +{"epoch": 0, "iter": 15282, "iter_tflops": 18.316974033024135, "iter_time": 1.126337432861328, "loss": 0.06784675270318985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.827877868969143, "step_time": 0.8658384780883789} +{"epoch": 0, "iter": 15283, "iter_tflops": 49.871430247801655, "iter_time": 0.4136856193542481, "loss": 0.11974281817674637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.536667430214436, "step_time": 0.3782976570129395} +{"epoch": 0, "iter": 15284, "iter_tflops": 54.367410945045485, "iter_time": 0.3794753723144531, "loss": 0.13197501003742218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.28538216762525, "step_time": 0.34799629783630376} +{"epoch": 0, "iter": 15285, "iter_tflops": 24.33813901674248, "iter_time": 0.8476857452392579, "loss": 0.5659768581390381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.637566574943754, "step_time": 0.804721206665039} +{"epoch": 0, "iter": 15286, "iter_tflops": 16.725282673851556, "iter_time": 1.23352734375, "loss": 0.6003637909889221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.005395654682786, "step_time": 1.031276454925537} +{"epoch": 0, "iter": 15287, "iter_tflops": 36.217998940412414, "iter_time": 0.5696364822387695, "loss": 0.5626616477966309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41486812569028, "step_time": 0.523434289932251} +{"epoch": 0, "iter": 15288, "iter_tflops": 37.6907708170518, "iter_time": 0.5473778610229492, "loss": 0.6630455255508423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78898798698808, "step_time": 0.5058005733489991} +{"epoch": 0, "iter": 15289, "iter_tflops": 1.7374554727645197, "iter_time": 0.879439598083496, "loss": 0.30965927243232727, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.8819379068658326, "step_time": 0.8119221878051758} +{"epoch": 0, "iter": 15290, "iter_tflops": 1.7261758694999478, "iter_time": 0.8851862487792969, "loss": 0.3237375020980835, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.317009472458999, "step_time": 0.6594652118682862} +{"epoch": 0, "iter": 15291, "iter_tflops": 3.5772304638334935, "iter_time": 0.4271424942016601, "loss": 0.25187090039253235, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.871748209465265, "step_time": 0.3946504421234131} +{"epoch": 0, "iter": 15292, "iter_tflops": 3.5503348157045846, "iter_time": 0.43037832260131836, "loss": 0.1999296396970749, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.853077670665431, "step_time": 0.3965627670288086} +{"epoch": 0, "iter": 15293, "iter_tflops": 28.76938739516235, "iter_time": 0.7171196670532227, "loss": 0.3879125118255615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.424227322984596, "step_time": 0.6781139678955078} +{"epoch": 0, "iter": 15294, "iter_tflops": 18.062444703284484, "iter_time": 1.1422093658447265, "loss": 0.5710511207580566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.072212122537987, "step_time": 0.7620763835906982} +{"epoch": 0, "iter": 15295, "iter_tflops": 46.40661794638011, "iter_time": 0.44457222747802744, "loss": 0.323779821395874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.208021517954535, "step_time": 0.4109122982025147} +{"epoch": 0, "iter": 15296, "iter_tflops": 46.155617788123116, "iter_time": 0.4469898681640625, "loss": 0.43084219098091125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.783200827534465, "step_time": 0.41441878318786624} +{"epoch": 0, "iter": 15297, "iter_tflops": 29.651553057113336, "iter_time": 0.69578458404541, "loss": 0.2602306604385376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.424197831253267, "step_time": 0.6565352478027343} +{"epoch": 0, "iter": 15298, "iter_tflops": 13.018704703736297, "iter_time": 1.5847270507812499, "loss": 0.2124524712562561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.89648796034912, "step_time": 1.2978397216796878} +{"epoch": 0, "iter": 15299, "iter_tflops": 19.70822151465175, "iter_time": 1.0468267517089842, "loss": 0.25651127099990845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.47128723482188, "step_time": 0.8789928436279296} +{"epoch": 0, "iter": 15300, "iter_tflops": 48.949441516961244, "iter_time": 0.42147760772705084, "loss": 0.19387340545654297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15178703426248, "step_time": 0.3881542778015136} +{"epoch": 0, "iter": 15301, "iter_tflops": 18.823580317334297, "iter_time": 0.8245503692626953, "loss": 0.5161415934562683, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 19.84195908312532, "step_time": 0.782230728149414} +{"epoch": 0, "iter": 15302, "iter_tflops": 6.690770980467818, "iter_time": 2.3197610778808593, "loss": 0.29862406849861145, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 7.948053610611883, "step_time": 1.9528039016723633} +{"epoch": 0, "iter": 15303, "iter_tflops": 9.63628251932717, "iter_time": 1.6106823425292969, "loss": 0.2501564621925354, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 11.81410603290585, "step_time": 1.3137676315307616} +{"epoch": 0, "iter": 15304, "iter_tflops": 22.287783576703426, "iter_time": 0.6963900222778321, "loss": 0.3284895420074463, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.0049709236736, "step_time": 0.646574001312256} +{"epoch": 0, "iter": 15305, "iter_tflops": 13.954733063835784, "iter_time": 1.1856488800048826, "loss": 0.29737335443496704, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 14.858415833674416, "step_time": 1.113538200378418} +{"epoch": 0, "iter": 15306, "iter_tflops": 17.600764159180798, "iter_time": 0.9400395050048829, "loss": 0.35242271423339844, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 20.937481615773155, "step_time": 0.790229404449463} +{"epoch": 0, "iter": 15307, "iter_tflops": 28.628117956373245, "iter_time": 0.5779427642822266, "loss": 0.2538580894470215, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.474235232644727, "step_time": 0.5429312171936036} +{"epoch": 0, "iter": 15308, "iter_tflops": 29.68010105329729, "iter_time": 0.5574581298828125, "loss": 0.45410674810409546, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.57111610689008, "step_time": 0.5240680618286133} +{"epoch": 0, "iter": 15309, "iter_tflops": 40.288217697624525, "iter_time": 0.5120875205993652, "loss": 0.6510302424430847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.793795645890505, "step_time": 0.471096263885498} +{"epoch": 0, "iter": 15310, "iter_tflops": 30.349078641470847, "iter_time": 0.6797930755615235, "loss": 0.6782790422439575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00273311108845, "step_time": 0.6251328773498536} +{"epoch": 0, "iter": 15311, "iter_tflops": 34.22459619354452, "iter_time": 0.6028148117065429, "loss": 0.6156560778617859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.303838422989394, "step_time": 0.553055513381958} +{"epoch": 0, "iter": 15312, "iter_tflops": 37.23234934717712, "iter_time": 0.5541174240112305, "loss": 0.8223155736923218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.42462800171409, "step_time": 0.5103595142364503} +{"epoch": 0, "iter": 15313, "iter_tflops": 17.343193817760483, "iter_time": 1.1895786743164063, "loss": 0.2422153502702713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.633567843844176, "step_time": 1.1072003860473634} +{"epoch": 0, "iter": 15314, "iter_tflops": 35.23331592654633, "iter_time": 0.585556396484375, "loss": 0.20993274450302124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14086937757772, "step_time": 0.5270984992980957} +{"epoch": 0, "iter": 15315, "iter_tflops": 40.20813280659205, "iter_time": 0.5131074752807616, "loss": 0.137794628739357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95203499692554, "step_time": 0.4694001884460449} +{"epoch": 0, "iter": 15316, "iter_tflops": 38.9997599803939, "iter_time": 0.5290056533813476, "loss": 0.11052870750427246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28263363321692, "step_time": 0.4879330291748047} +{"epoch": 0, "iter": 15317, "iter_tflops": 2.249391513250708, "iter_time": 1.1514352416992186, "loss": 0.008104094304144382, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 2.417140402999774, "step_time": 1.0715259475708008} +{"epoch": 0, "iter": 15318, "iter_tflops": 2.986571571565618, "iter_time": 0.867224708557129, "loss": 0.13990634679794312, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.654556797170842, "step_time": 0.7087121105194092} +{"epoch": 0, "iter": 15319, "iter_tflops": 6.279644068660631, "iter_time": 0.41244832229614253, "loss": 0.018796060234308243, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 6.7807115918222, "step_time": 0.3819700374603271} +{"epoch": 0, "iter": 15320, "iter_tflops": 5.834909401226339, "iter_time": 0.44388498306274415, "loss": 0.2846490144729614, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 6.335141509501096, "step_time": 0.40883517074584963} +{"epoch": 0, "iter": 15321, "iter_tflops": 34.32049420136474, "iter_time": 0.6011304321289063, "loss": 0.19181609153747559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.720079436209794, "step_time": 0.5618477363586425} +{"epoch": 0, "iter": 15322, "iter_tflops": 13.978218192544816, "iter_time": 1.4759458770751952, "loss": 0.31694135069847107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.17706208207629, "step_time": 1.2010839462280274} +{"epoch": 0, "iter": 15323, "iter_tflops": 15.879050268206292, "iter_time": 1.2992649536132812, "loss": 0.20260624587535858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.33391670544691, "step_time": 1.1252965660095215} +{"epoch": 0, "iter": 15324, "iter_tflops": 19.199615625246828, "iter_time": 1.074557632446289, "loss": 0.24665559828281403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.707690853352947, "step_time": 0.9504047966003418} +{"epoch": 0, "iter": 15325, "iter_tflops": 21.34743197333986, "iter_time": 0.7308998794555663, "loss": 0.2839173376560211, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 22.69706867622815, "step_time": 0.6874383506774903} +{"epoch": 0, "iter": 15326, "iter_tflops": 13.504864734408692, "iter_time": 1.1553492584228517, "loss": 0.5562580227851868, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.165046567847952, "step_time": 0.9652205696105957} +{"epoch": 0, "iter": 15327, "iter_tflops": 27.774926305657168, "iter_time": 0.5617597427368164, "loss": 0.4332207441329956, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.57217808596502, "step_time": 0.5276187438964843} +{"epoch": 0, "iter": 15328, "iter_tflops": 29.003724981832136, "iter_time": 0.5379597091674805, "loss": 0.3226214051246643, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.88438093651662, "step_time": 0.5052014961242676} +{"epoch": 0, "iter": 15329, "iter_tflops": 43.07675180848173, "iter_time": 0.47893800354003907, "loss": 0.4063836634159088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82686639880429, "step_time": 0.44058240699768064} +{"epoch": 0, "iter": 15330, "iter_tflops": 45.62318338561224, "iter_time": 0.45220635604858395, "loss": 0.38363948464393616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62894289501278, "step_time": 0.41570689010620115} +{"epoch": 0, "iter": 15331, "iter_tflops": 44.57819609009677, "iter_time": 0.462806827545166, "loss": 0.18978093564510345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.230097223139786, "step_time": 0.427763879776001} +{"epoch": 0, "iter": 15332, "iter_tflops": 47.605590137960974, "iter_time": 0.4333754386901856, "loss": 0.4032679796218872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.542287196519204, "step_time": 0.40027508735656736} +{"epoch": 0, "iter": 15333, "iter_tflops": 45.031560768108676, "iter_time": 0.45814742279052734, "loss": 0.39770272374153137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48480857375003, "step_time": 0.4169177188873291} +{"epoch": 0, "iter": 15334, "iter_tflops": 42.49458410197246, "iter_time": 0.4854993629455567, "loss": 0.36399152874946594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21636467414719, "step_time": 0.44640234375} +{"epoch": 0, "iter": 15335, "iter_tflops": 46.95945089774238, "iter_time": 0.43933847427368167, "loss": 0.35538890957832336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69332078837745, "step_time": 0.406978536605835} +{"epoch": 0, "iter": 15336, "iter_tflops": 46.71079538887509, "iter_time": 0.44167720413208006, "loss": 0.28022700548171997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.54368894916946, "step_time": 0.4081833744049072} +{"epoch": 0, "iter": 15337, "iter_tflops": 31.078792949389584, "iter_time": 0.6638318786621095, "loss": 0.07063357532024384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.06400554246392, "step_time": 0.6239744148254395} +{"epoch": 0, "iter": 15338, "iter_tflops": 20.754486845314055, "iter_time": 0.9940546188354492, "loss": 0.05111706256866455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.324689302138395, "step_time": 0.8845173988342284} +{"epoch": 0, "iter": 15339, "iter_tflops": 47.523490253548, "iter_time": 0.4341241226196289, "loss": 0.038746099919080734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54245543572016, "step_time": 0.4002737808227539} +{"epoch": 0, "iter": 15340, "iter_tflops": 51.01619540036365, "iter_time": 0.4044028244018555, "loss": 0.07244088500738144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.80492738109024, "step_time": 0.3697002124786377} +{"epoch": 0, "iter": 15341, "iter_tflops": 14.727682269955613, "iter_time": 0.8684379196166991, "loss": 0.08686243742704391, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 15.445216269861032, "step_time": 0.828093147277832} +{"epoch": 0, "iter": 15342, "iter_tflops": 9.626975308404289, "iter_time": 1.3285665893554688, "loss": 0.08128727972507477, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 10.883664518698007, "step_time": 1.1751628074645994} +{"epoch": 0, "iter": 15343, "iter_tflops": 23.44837196810436, "iter_time": 0.5454569625854493, "loss": 0.031165288761258125, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 25.836383188815034, "step_time": 0.4950413398742676} +{"epoch": 0, "iter": 15344, "iter_tflops": 26.842851021413267, "iter_time": 0.47647985458374026, "loss": 0.04982700198888779, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 29.67721961189325, "step_time": 0.4309729118347168} +{"epoch": 0, "iter": 15345, "iter_tflops": 30.080945460735176, "iter_time": 0.6858525619506837, "loss": 0.0043018292635679245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.84041861306092, "step_time": 0.6282226104736328} +{"epoch": 0, "iter": 15346, "iter_tflops": 8.81705412615873, "iter_time": 2.339907775878906, "loss": 0.02785729058086872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.445839378784667, "step_time": 1.975053680419922} +{"epoch": 0, "iter": 15347, "iter_tflops": 11.78602215937466, "iter_time": 1.7504712982177735, "loss": 0.0031263548880815506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.61972134717149, "step_time": 1.4111824035644531} +{"epoch": 0, "iter": 15348, "iter_tflops": 50.47966143354395, "iter_time": 0.4087011070251465, "loss": 0.008689135313034058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.647952458397704, "step_time": 0.37074308395385747} +{"epoch": 0, "iter": 15349, "iter_tflops": 22.562196613790764, "iter_time": 0.700620231628418, "loss": 0.3259194493293762, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 23.94170784330829, "step_time": 0.6602507858276367} +{"epoch": 0, "iter": 15350, "iter_tflops": 11.168934500734574, "iter_time": 1.4153123931884766, "loss": 0.3086870610713959, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.247201728902313, "step_time": 1.1932732467651366} +{"epoch": 0, "iter": 15351, "iter_tflops": 23.758060566972834, "iter_time": 0.6653544540405274, "loss": 0.3483586311340332, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.6271261944773, "step_time": 0.6168280944824219} +{"epoch": 0, "iter": 15352, "iter_tflops": 24.080003906491044, "iter_time": 0.656458839416504, "loss": 0.27005669474601746, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.92079827608004, "step_time": 0.6098396835327149} +{"epoch": 0, "iter": 15353, "iter_tflops": 21.51958995046373, "iter_time": 0.9587122039794921, "loss": 0.020733322948217392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.98373239538233, "step_time": 0.8976389541625976} +{"epoch": 0, "iter": 15354, "iter_tflops": 7.079628201216851, "iter_time": 2.9141492919921874, "loss": 0.03326826170086861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.84917785641258, "step_time": 2.331413589477539} +{"epoch": 0, "iter": 15355, "iter_tflops": 16.69673954510844, "iter_time": 1.2356360626220704, "loss": 0.030519351363182068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.790514480573655, "step_time": 0.9467924003601074} +{"epoch": 0, "iter": 15356, "iter_tflops": 52.63541509842941, "iter_time": 0.3919622077941895, "loss": 0.0548914410173893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.54347313205679, "step_time": 0.35853055763244623} +{"epoch": 0, "iter": 15357, "iter_tflops": 16.04339209877626, "iter_time": 0.944495994567871, "loss": 0.31813371181488037, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 16.755194774419884, "step_time": 0.9043714370727538} +{"epoch": 0, "iter": 15358, "iter_tflops": 10.820622678319697, "iter_time": 1.4003740844726562, "loss": 0.4365054965019226, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 13.996561385096275, "step_time": 1.082617305755615} +{"epoch": 0, "iter": 15359, "iter_tflops": 25.84604722871042, "iter_time": 0.5862760925292968, "loss": 0.290500670671463, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.53457680736634, "step_time": 0.550323314666748} +{"epoch": 0, "iter": 15360, "iter_tflops": 26.75479783306845, "iter_time": 0.5663627014160155, "loss": 0.24369503557682037, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.483276698604104, "step_time": 0.5319935531616211} +{"epoch": 0, "iter": 15361, "iter_tflops": 24.776178200630945, "iter_time": 0.6562060394287109, "loss": 0.0044364179484546185, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.385152364942712, "step_time": 0.6161904067993165} +{"epoch": 0, "iter": 15362, "iter_tflops": 7.363711949314087, "iter_time": 2.2078916015625, "loss": 0.004720423370599747, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 8.783619891220704, "step_time": 1.8509769287109374} +{"epoch": 0, "iter": 15363, "iter_tflops": 10.88532631188016, "iter_time": 1.4935958099365234, "loss": 0.005803944543004036, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.00911156572224, "step_time": 1.2497608070373536} +{"epoch": 0, "iter": 15364, "iter_tflops": 40.91646256036612, "iter_time": 0.39735296630859374, "loss": 0.006919152103364468, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 44.86139563763165, "step_time": 0.3624113235473633} +{"epoch": 0, "iter": 15365, "iter_tflops": 14.899165432266669, "iter_time": 1.0033180160522461, "loss": 0.5334299802780151, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 15.549632874157048, "step_time": 0.9613475265502929} +{"epoch": 0, "iter": 15366, "iter_tflops": 12.69455975255206, "iter_time": 1.1775596313476564, "loss": 0.1846228986978531, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.69500381298067, "step_time": 0.8953936920166015} +{"epoch": 0, "iter": 15367, "iter_tflops": 27.473701532748493, "iter_time": 0.544105827331543, "loss": 0.3487089276313782, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.189832690653187, "step_time": 0.5121167106628418} +{"epoch": 0, "iter": 15368, "iter_tflops": 26.939317659321073, "iter_time": 0.554899024963379, "loss": 0.3237810432910919, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.670865181681013, "step_time": 0.5213864669799805} +{"epoch": 0, "iter": 15369, "iter_tflops": 27.927319991917617, "iter_time": 0.7387423324584961, "loss": 0.5959790945053101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.546770060616225, "step_time": 0.6982520751953125} +{"epoch": 0, "iter": 15370, "iter_tflops": 14.433658395197531, "iter_time": 1.4293738250732422, "loss": 0.6337729096412659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.076506392857592, "step_time": 1.14132084274292} +{"epoch": 0, "iter": 15371, "iter_tflops": 34.914624982207634, "iter_time": 0.5909011917114257, "loss": 0.49778133630752563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92779205784495, "step_time": 0.543957145690918} +{"epoch": 0, "iter": 15372, "iter_tflops": 36.765690255810625, "iter_time": 0.5611507186889649, "loss": 0.5409696102142334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.824465245246365, "step_time": 0.5180507354736328} +{"epoch": 0, "iter": 15373, "iter_tflops": 25.411597329935333, "iter_time": 0.8118770828247069, "loss": 0.38800445199012756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.259822770849976, "step_time": 0.730050350189209} +{"epoch": 0, "iter": 15374, "iter_tflops": 39.3492632022614, "iter_time": 0.5243069839477539, "loss": 0.40246155858039856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.19139094138302, "step_time": 0.4668577537536621} +{"epoch": 0, "iter": 15375, "iter_tflops": 48.104721828650526, "iter_time": 0.428878761291504, "loss": 0.3563843369483948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94370527064784, "step_time": 0.3971817836761474} +{"epoch": 0, "iter": 15376, "iter_tflops": 47.01583400124144, "iter_time": 0.43881160354614257, "loss": 0.3354059159755707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36842440062779, "step_time": 0.40960371017456054} +{"epoch": 0, "iter": 15377, "iter_tflops": 28.8749624929051, "iter_time": 0.7144976730346679, "loss": 0.262480765581131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.62987697300329, "step_time": 0.6735610961914062} +{"epoch": 0, "iter": 15378, "iter_tflops": 15.64097463374588, "iter_time": 1.3190414276123046, "loss": 0.2666991949081421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.407352239433195, "step_time": 1.1208072319030762} +{"epoch": 0, "iter": 15379, "iter_tflops": 45.122146501436674, "iter_time": 0.4572276611328125, "loss": 0.37175026535987854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.955647605930665, "step_time": 0.4214241771697998} +{"epoch": 0, "iter": 15380, "iter_tflops": 47.4707344878021, "iter_time": 0.43460657882690434, "loss": 0.3311530649662018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.168311548623194, "step_time": 0.4032005920410156} +{"epoch": 0, "iter": 15381, "iter_tflops": 31.041754662637548, "iter_time": 0.6646239471435547, "loss": 0.1518576741218567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.14153069089456, "step_time": 0.6225148048400878} +{"epoch": 0, "iter": 15382, "iter_tflops": 14.733116316813257, "iter_time": 1.4003210906982422, "loss": 0.16484613716602325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46880578220746, "step_time": 1.1810248374938965} +{"epoch": 0, "iter": 15383, "iter_tflops": 48.15120315185966, "iter_time": 0.4284647560119629, "loss": 0.1867007315158844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.54393416168166, "step_time": 0.39264462852478027} +{"epoch": 0, "iter": 15384, "iter_tflops": 44.48776262350452, "iter_time": 0.4637476081848145, "loss": 0.1718008816242218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.35699604265018, "step_time": 0.4266413383483887} +{"epoch": 0, "iter": 15385, "iter_tflops": 25.12296396491947, "iter_time": 0.8212045974731446, "loss": 0.5278633236885071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.372322995084957, "step_time": 0.7823009567260742} +{"epoch": 0, "iter": 15386, "iter_tflops": 11.398571592248375, "iter_time": 1.8099718322753906, "loss": 0.6285538077354431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.060550964949542, "step_time": 1.3698764114379882} +{"epoch": 0, "iter": 15387, "iter_tflops": 39.09887766724793, "iter_time": 0.5276645965576172, "loss": 0.7289758920669556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73521583915629, "step_time": 0.48276563262939454} +{"epoch": 0, "iter": 15388, "iter_tflops": 39.89245025485185, "iter_time": 0.5171678695678711, "loss": 0.5465827584266663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38959433634067, "step_time": 0.4754848213195801} +{"epoch": 0, "iter": 15389, "iter_tflops": 21.500489195374758, "iter_time": 0.9595639114379882, "loss": 0.05408751219511032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.322888014325553, "step_time": 0.8845857124328612} +{"epoch": 0, "iter": 15390, "iter_tflops": 13.987222266541439, "iter_time": 1.4749957580566404, "loss": 0.08702220022678375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.860638790806345, "step_time": 1.1551150970458983} +{"epoch": 0, "iter": 15391, "iter_tflops": 42.7128708656177, "iter_time": 0.4830181884765625, "loss": 0.07114757597446442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03217704470318, "step_time": 0.43865912246704103} +{"epoch": 0, "iter": 15392, "iter_tflops": 43.55964840633991, "iter_time": 0.47362855911254886, "loss": 0.10133720934391022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52923829807461, "step_time": 0.43407162094116214} +{"epoch": 0, "iter": 15393, "iter_tflops": 12.925116074074175, "iter_time": 1.5962017974853515, "loss": 0.44321826100349426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.606030872293013, "step_time": 1.5163197631835938} +{"epoch": 0, "iter": 15394, "iter_tflops": 16.37412228478212, "iter_time": 1.259981643676758, "loss": 0.5532949566841125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.83522838175296, "step_time": 0.9902024173736571} +{"epoch": 0, "iter": 15395, "iter_tflops": 39.70860190651477, "iter_time": 0.5195623245239258, "loss": 0.44173288345336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14301332181892, "step_time": 0.47820242309570316} +{"epoch": 0, "iter": 15396, "iter_tflops": 43.024685214483064, "iter_time": 0.479517593383789, "loss": 0.4668361246585846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.902580321110506, "step_time": 0.4398711833953857} +{"epoch": 0, "iter": 15397, "iter_tflops": 20.610447368107685, "iter_time": 1.0010017318725586, "loss": 0.5582324266433716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.193019994582443, "step_time": 0.9296208229064942} +{"epoch": 0, "iter": 15398, "iter_tflops": 21.48278022693172, "iter_time": 0.9603549118041993, "loss": 0.7389982342720032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.90308869727168, "step_time": 0.796472333908081} +{"epoch": 0, "iter": 15399, "iter_tflops": 44.15984643076076, "iter_time": 0.46719124221801756, "loss": 0.6078968644142151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.34880962099146, "step_time": 0.4267135772705078} +{"epoch": 0, "iter": 15400, "iter_tflops": 34.92263751226654, "iter_time": 0.5907656173706055, "loss": 0.5304210186004639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.00706149244706, "step_time": 0.5428226413726807} +{"epoch": 0, "iter": 15401, "iter_tflops": 36.04163033919415, "iter_time": 0.5724239807128907, "loss": 0.401063472032547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.807538439387805, "step_time": 0.5182710189819336} +{"epoch": 0, "iter": 15402, "iter_tflops": 47.24107461515385, "iter_time": 0.4367193946838379, "loss": 0.4880489110946655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19768356474926, "step_time": 0.3952492160797119} +{"epoch": 0, "iter": 15403, "iter_tflops": 48.17625217721916, "iter_time": 0.42824197769165034, "loss": 0.41132375597953796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14034575642541, "step_time": 0.39568386459350585} +{"epoch": 0, "iter": 15404, "iter_tflops": 50.21653754082608, "iter_time": 0.41084261322021487, "loss": 0.42546218633651733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.4119897784221, "step_time": 0.37916447448730467} +{"epoch": 0, "iter": 15405, "iter_tflops": 21.991344901329352, "iter_time": 0.9381460571289063, "loss": 0.6430211067199707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.898367491842116, "step_time": 0.9009853439331054} +{"epoch": 0, "iter": 15406, "iter_tflops": 19.385564206472726, "iter_time": 1.0642503509521486, "loss": 0.6726341247558594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.885312506091, "step_time": 0.9014993133544922} +{"epoch": 0, "iter": 15407, "iter_tflops": 40.7423475277795, "iter_time": 0.5063795967102052, "loss": 0.5208584070205688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7048465315196, "step_time": 0.47205504989624025} +{"epoch": 0, "iter": 15408, "iter_tflops": 42.195659097956494, "iter_time": 0.4889387664794922, "loss": 0.5619292855262756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.33045531831608, "step_time": 0.4551265449523926} +{"epoch": 0, "iter": 15409, "iter_tflops": 31.700841899413703, "iter_time": 0.6508058547973634, "loss": 0.07034725695848465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.826094001171626, "step_time": 0.6099165191650391} +{"epoch": 0, "iter": 15410, "iter_tflops": 13.901520787309588, "iter_time": 1.4840889587402344, "loss": 0.10431799292564392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.307618285582823, "step_time": 1.1920238342285159} +{"epoch": 0, "iter": 15411, "iter_tflops": 40.33934118640208, "iter_time": 0.511438533782959, "loss": 0.07199127227067947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.460988999623034, "step_time": 0.46402686882019045} +{"epoch": 0, "iter": 15412, "iter_tflops": 44.75787054750238, "iter_time": 0.4609489517211914, "loss": 0.13728347420692444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0608022714253, "step_time": 0.42052091598510744} +{"epoch": 0, "iter": 15413, "iter_tflops": 18.550343924470585, "iter_time": 1.112167709350586, "loss": 0.5963214635848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.613305670245968, "step_time": 1.0518927230834962} +{"epoch": 0, "iter": 15414, "iter_tflops": 9.815726675646934, "iter_time": 2.101840667724609, "loss": 0.8129182457923889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.23078625975329, "step_time": 1.8370123901367186} +{"epoch": 0, "iter": 15415, "iter_tflops": 12.215633835030914, "iter_time": 1.688908966064453, "loss": 0.5126136541366577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.257627376096147, "step_time": 1.4470215110778808} +{"epoch": 0, "iter": 15416, "iter_tflops": 42.83023732596102, "iter_time": 0.48169458770751955, "loss": 0.4419756233692169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18145757054063, "step_time": 0.4467397651672363} +{"epoch": 0, "iter": 15417, "iter_tflops": 24.880007477335123, "iter_time": 0.7344485015869141, "loss": 0.49998003244400024, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 26.376849364793884, "step_time": 0.6927697830200196} +{"epoch": 0, "iter": 15418, "iter_tflops": 13.795405552802821, "iter_time": 1.3245775299072267, "loss": 0.3304643929004669, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 16.12774221855921, "step_time": 1.133021842956543} +{"epoch": 0, "iter": 15419, "iter_tflops": 24.18463319435219, "iter_time": 0.7555659027099609, "loss": 0.3113005459308624, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 26.31101757845656, "step_time": 0.6945031356811523} +{"epoch": 0, "iter": 15420, "iter_tflops": 26.758473585464387, "iter_time": 0.6828896331787109, "loss": 0.21136625111103058, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 28.70300884447286, "step_time": 0.6366260871887207} +{"epoch": 0, "iter": 15421, "iter_tflops": 34.8751773875397, "iter_time": 0.591569564819336, "loss": 0.2420634776353836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92364612341591, "step_time": 0.5300401058197022} +{"epoch": 0, "iter": 15422, "iter_tflops": 38.58629163679358, "iter_time": 0.534674171447754, "loss": 0.2787543535232544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68130592525543, "step_time": 0.48337540435791015} +{"epoch": 0, "iter": 15423, "iter_tflops": 37.372319670331294, "iter_time": 0.5520420913696289, "loss": 0.2441621571779251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.7677423779674, "step_time": 0.5060641651153565} +{"epoch": 0, "iter": 15424, "iter_tflops": 43.3271269781177, "iter_time": 0.4761703567504883, "loss": 0.20175671577453613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33631070815574, "step_time": 0.4358407573699951} +{"epoch": 0, "iter": 15425, "iter_tflops": 21.0237337771672, "iter_time": 0.9813239517211915, "loss": 0.36680662631988525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.59931709240573, "step_time": 0.912907829284668} +{"epoch": 0, "iter": 15426, "iter_tflops": 19.28785905774463, "iter_time": 1.0696414489746093, "loss": 0.33468058705329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.535051134832003, "step_time": 0.876611373901367} +{"epoch": 0, "iter": 15427, "iter_tflops": 50.50853192597867, "iter_time": 0.40846749496459955, "loss": 0.33294928073883057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.91694646677751, "step_time": 0.3756780891418457} +{"epoch": 0, "iter": 15428, "iter_tflops": 50.53836721055322, "iter_time": 0.40822635650634764, "loss": 0.36342743039131165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.35859163207528, "step_time": 0.3795369396209716} +{"epoch": 0, "iter": 15429, "iter_tflops": 29.670721049382994, "iter_time": 0.695335090637207, "loss": 0.3276829719543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.370385066067154, "step_time": 0.6576614685058595} +{"epoch": 0, "iter": 15430, "iter_tflops": 13.16962409923935, "iter_time": 1.566566619873047, "loss": 0.4834657311439514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.587812961728066, "step_time": 1.2437500686645508} +{"epoch": 0, "iter": 15431, "iter_tflops": 48.475787035913285, "iter_time": 0.4255958442687989, "loss": 0.38922232389450073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03645244497001, "step_time": 0.388998369216919} +{"epoch": 0, "iter": 15432, "iter_tflops": 46.78231210883369, "iter_time": 0.4410020065307617, "loss": 0.41422441601753235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.946495599697606, "step_time": 0.40495608711242675} +{"epoch": 0, "iter": 15433, "iter_tflops": 26.0365066492902, "iter_time": 0.792390998840332, "loss": 0.24986660480499268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.439154997507234, "step_time": 0.7518851623535157} +{"epoch": 0, "iter": 15434, "iter_tflops": 19.502661470697063, "iter_time": 1.0578604125976563, "loss": 0.280154824256897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.83542364042674, "step_time": 0.7985583591461182} +{"epoch": 0, "iter": 15435, "iter_tflops": 50.589726513499954, "iter_time": 0.40781192016601564, "loss": 0.3118831217288971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.90533560041964, "step_time": 0.37575753402709966} +{"epoch": 0, "iter": 15436, "iter_tflops": 50.09004079971533, "iter_time": 0.41188014984130855, "loss": 0.2580054998397827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.1390441820469, "step_time": 0.381076057434082} +{"epoch": 0, "iter": 15437, "iter_tflops": 28.727303931207025, "iter_time": 0.7181701965332031, "loss": 0.4136653244495392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.45761627139599, "step_time": 0.6773705902099608} +{"epoch": 0, "iter": 15438, "iter_tflops": 13.380239909860455, "iter_time": 1.5419075927734374, "loss": 0.4814422130584717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.312957003567865, "step_time": 1.3472965087890625} +{"epoch": 0, "iter": 15439, "iter_tflops": 33.857916037866595, "iter_time": 0.6093432769775391, "loss": 0.4853931963443756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.10189311160384, "step_time": 0.5560657901763916} +{"epoch": 0, "iter": 15440, "iter_tflops": 36.34598004539453, "iter_time": 0.5676306838989258, "loss": 0.5148645639419556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.892180869069506, "step_time": 0.5171713619232178} +{"epoch": 0, "iter": 15441, "iter_tflops": 18.81489148397808, "iter_time": 1.096530029296875, "loss": 0.5414457321166992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.158201649843004, "step_time": 1.023459030151367} +{"epoch": 0, "iter": 15442, "iter_tflops": 17.108541941505607, "iter_time": 1.205894317626953, "loss": 0.47377029061317444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34992979779605, "step_time": 1.0138164463043213} +{"epoch": 0, "iter": 15443, "iter_tflops": 42.82075305347331, "iter_time": 0.4818012771606445, "loss": 0.7016595005989075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.029644833729115, "step_time": 0.4482131805419922} +{"epoch": 0, "iter": 15444, "iter_tflops": 45.211420011406446, "iter_time": 0.45632482910156247, "loss": 0.4799143671989441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.373995057458785, "step_time": 0.42649141311645505} +{"epoch": 0, "iter": 15445, "iter_tflops": 37.71682398236378, "iter_time": 0.5469997558593751, "loss": 0.740361750125885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52106505049844, "step_time": 0.5091448974609375} +{"epoch": 0, "iter": 15446, "iter_tflops": 32.42584804488821, "iter_time": 0.6362545547485352, "loss": 0.6129108667373657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.57247434499458, "step_time": 0.5799735298156738} +{"epoch": 0, "iter": 15447, "iter_tflops": 34.00372986152147, "iter_time": 0.6067303085327148, "loss": 0.4715440571308136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.96071592206658, "step_time": 0.558189769744873} +{"epoch": 0, "iter": 15448, "iter_tflops": 36.245734939766855, "iter_time": 0.5692005844116211, "loss": 0.597008228302002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.213429813904625, "step_time": 0.5261231575012207} +{"epoch": 0, "iter": 15449, "iter_tflops": 18.908369284244817, "iter_time": 1.0911090850830079, "loss": 0.4399574398994446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.20751155632645, "step_time": 1.0209616088867186} +{"epoch": 0, "iter": 15450, "iter_tflops": 18.60263966533831, "iter_time": 1.1090411834716796, "loss": 0.5989832282066345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.541241584390455, "step_time": 0.9152598552703857} +{"epoch": 0, "iter": 15451, "iter_tflops": 38.57777357824381, "iter_time": 0.5347922286987306, "loss": 0.7930372953414917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.23457419404113, "step_time": 0.48848825645446775} +{"epoch": 0, "iter": 15452, "iter_tflops": 35.16476232254405, "iter_time": 0.5866979370117187, "loss": 0.6591982245445251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.002802999852854, "step_time": 0.5428834686279297} +{"epoch": 0, "iter": 15453, "iter_tflops": 19.526697513198563, "iter_time": 1.0565582580566406, "loss": 0.16923008859157562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.74760679053562, "step_time": 0.9943842544555664} +{"epoch": 0, "iter": 15454, "iter_tflops": 8.634816861177907, "iter_time": 2.389291381835937, "loss": 0.14724579453468323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.605667370408911, "step_time": 1.9452895126342775} +{"epoch": 0, "iter": 15455, "iter_tflops": 15.910931130608285, "iter_time": 1.296661605834961, "loss": 0.11096740514039993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46741018838595, "step_time": 1.1811191978454587} +{"epoch": 0, "iter": 15456, "iter_tflops": 38.95825022144096, "iter_time": 0.5295693054199219, "loss": 0.19977831840515137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40728296490005, "step_time": 0.4752910594940186} +{"epoch": 0, "iter": 15457, "iter_tflops": 12.155710053864045, "iter_time": 1.2903153839111328, "loss": 0.40377476811408997, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 12.95997098922258, "step_time": 1.2102418823242187} +{"epoch": 0, "iter": 15458, "iter_tflops": 10.185558614351658, "iter_time": 1.5398958740234376, "loss": 0.359632670879364, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.768036880123887, "step_time": 1.0620707283020019} +{"epoch": 0, "iter": 15459, "iter_tflops": 22.864216718451903, "iter_time": 0.6859933090209961, "loss": 0.2666766345500946, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.708317657593557, "step_time": 0.634794319152832} +{"epoch": 0, "iter": 15460, "iter_tflops": 25.07283671987729, "iter_time": 0.6255654220581054, "loss": 0.4139542877674103, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.918560788094005, "step_time": 0.5826722984313966} +{"epoch": 0, "iter": 15461, "iter_tflops": 16.610956814953358, "iter_time": 1.2420171661376953, "loss": 0.5436475872993469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.899498786265408, "step_time": 1.1526073303222657} +{"epoch": 0, "iter": 15462, "iter_tflops": 15.881054875700425, "iter_time": 1.2991009521484376, "loss": 0.5277446508407593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68443769844164, "step_time": 0.9974210472106935} +{"epoch": 0, "iter": 15463, "iter_tflops": 37.454140083831625, "iter_time": 0.5508361282348633, "loss": 0.5484180450439453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.866732817998596, "step_time": 0.5048383388519286} +{"epoch": 0, "iter": 15464, "iter_tflops": 40.3883558893061, "iter_time": 0.5108178596496582, "loss": 0.6416375637054443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81032699013301, "step_time": 0.47091850090026854} +{"epoch": 0, "iter": 15465, "iter_tflops": 17.875049502597076, "iter_time": 1.154183853149414, "loss": 0.17348507046699524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.967028755467663, "step_time": 1.0877346038818358} +{"epoch": 0, "iter": 15466, "iter_tflops": 35.33522495354677, "iter_time": 0.5838676147460938, "loss": 0.3470296263694763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.635679234494944, "step_time": 0.47280330848693847} +{"epoch": 0, "iter": 15467, "iter_tflops": 51.44066721100904, "iter_time": 0.4010658226013184, "loss": 0.2016088366508484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.82509970936567, "step_time": 0.3695666217803955} +{"epoch": 0, "iter": 15468, "iter_tflops": 46.57889102498707, "iter_time": 0.4429279670715332, "loss": 0.15483757853507996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92469850545497, "step_time": 0.4132442283630371} +{"epoch": 0, "iter": 15469, "iter_tflops": 23.81347674711353, "iter_time": 0.8663620910644532, "loss": 0.5604591369628906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.02779888068809, "step_time": 0.8243271255493163} +{"epoch": 0, "iter": 15470, "iter_tflops": 23.339854459530844, "iter_time": 0.8839426803588867, "loss": 0.5382684469223022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.21258919235478, "step_time": 0.7312726020812989} +{"epoch": 0, "iter": 15471, "iter_tflops": 43.33925118562671, "iter_time": 0.4760371475219726, "loss": 0.4980776309967041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.36011897293017, "step_time": 0.44501813125610346} +{"epoch": 0, "iter": 15472, "iter_tflops": 44.797439919586616, "iter_time": 0.4605417976379394, "loss": 0.6091862320899963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.077894286943334, "step_time": 0.4291180763244629} +{"epoch": 0, "iter": 15473, "iter_tflops": 21.28646832307231, "iter_time": 0.969211669921875, "loss": 0.39262622594833374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.270987108940282, "step_time": 0.9263663711547852} +{"epoch": 0, "iter": 15474, "iter_tflops": 16.463644552566656, "iter_time": 1.253130401611328, "loss": 0.3008165657520294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.519434711589312, "step_time": 0.9587191200256349} +{"epoch": 0, "iter": 15475, "iter_tflops": 36.689970919091785, "iter_time": 0.5623087997436523, "loss": 0.429761677980423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.113817294132986, "step_time": 0.514313892364502} +{"epoch": 0, "iter": 15476, "iter_tflops": 42.70291351026281, "iter_time": 0.4831308174133301, "loss": 0.5039047002792358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8049250770355, "step_time": 0.4407889442443848} +{"epoch": 0, "iter": 15477, "iter_tflops": 5.496443280739582, "iter_time": 1.141556655883789, "loss": 0.0181756392121315, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 5.951292625474713, "step_time": 1.0543090057373048} +{"epoch": 0, "iter": 15478, "iter_tflops": 5.0797544247076285, "iter_time": 1.235197784423828, "loss": 0.006982714403420687, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 6.569414332371961, "step_time": 0.9551081867218018} +{"epoch": 0, "iter": 15479, "iter_tflops": 12.554992669628597, "iter_time": 0.49976145553588863, "loss": 0.006480113603174686, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 13.873001706495039, "step_time": 0.4522814559936523} +{"epoch": 0, "iter": 15480, "iter_tflops": 13.937306784610296, "iter_time": 0.4501946830749512, "loss": 0.013271840289235115, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 15.45943108917253, "step_time": 0.4058688430786132} +{"epoch": 0, "iter": 15481, "iter_tflops": 30.642253379209652, "iter_time": 0.6017200546264647, "loss": 0.09789805114269257, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 33.98956371405899, "step_time": 0.5424623432159423} +{"epoch": 0, "iter": 15482, "iter_tflops": 34.06228251084144, "iter_time": 0.5413042526245118, "loss": 0.12309834361076355, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 37.47211614792402, "step_time": 0.49204742813110347} +{"epoch": 0, "iter": 15483, "iter_tflops": 35.37291365596697, "iter_time": 0.5212479400634766, "loss": 0.16892248392105103, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 38.83919832967539, "step_time": 0.47472808837890623} +{"epoch": 0, "iter": 15484, "iter_tflops": 35.61538558014425, "iter_time": 0.5176992492675782, "loss": 0.08004099875688553, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 39.11225517482024, "step_time": 0.47141383934021} +{"epoch": 0, "iter": 15485, "iter_tflops": 16.570216468390317, "iter_time": 0.9811747360229492, "loss": 0.009161454625427723, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 17.89409899605127, "step_time": 0.9085832023620607} +{"epoch": 0, "iter": 15486, "iter_tflops": 34.95501277030556, "iter_time": 0.4651200637817383, "loss": 0.0004554221814032644, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 39.06546127788296, "step_time": 0.41618036079406734} +{"epoch": 0, "iter": 15487, "iter_tflops": 42.713184090289715, "iter_time": 0.3806383934020996, "loss": 0.008894271217286587, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 47.30155306643181, "step_time": 0.34371551704406733} +{"epoch": 0, "iter": 15488, "iter_tflops": 44.31874406602165, "iter_time": 0.3668487930297852, "loss": 0.00468728831037879, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 48.54736107531765, "step_time": 0.3348951911926269} +{"epoch": 0, "iter": 15489, "iter_tflops": 29.923645832853186, "iter_time": 0.6451557235717774, "loss": 0.13063949346542358, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 31.775335723284446, "step_time": 0.6075596351623536} +{"epoch": 0, "iter": 15490, "iter_tflops": 9.43066129043638, "iter_time": 2.0470898895263674, "loss": 0.11555295437574387, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 12.137905695374442, "step_time": 1.5905059623718263} +{"epoch": 0, "iter": 15491, "iter_tflops": 37.774965193460815, "iter_time": 0.5110636444091797, "loss": 0.16146361827850342, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 41.39157341022243, "step_time": 0.46640921783447264} +{"epoch": 0, "iter": 15492, "iter_tflops": 36.324058755059546, "iter_time": 0.5314772644042969, "loss": 0.15767869353294373, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 39.941013428127654, "step_time": 0.4833480606079102} +{"epoch": 0, "iter": 15493, "iter_tflops": 15.946972919439608, "iter_time": 1.293731018066406, "loss": 0.11681035161018372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.02354419174922, "step_time": 1.2119152908325197} +{"epoch": 0, "iter": 15494, "iter_tflops": 19.11441312781151, "iter_time": 1.0793474731445312, "loss": 0.168061763048172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.23296851975686, "step_time": 0.6400618515014649} +{"epoch": 0, "iter": 15495, "iter_tflops": 43.1725609862307, "iter_time": 0.4778751373291016, "loss": 0.12382635474205017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.209663875424, "step_time": 0.43700996398925784} +{"epoch": 0, "iter": 15496, "iter_tflops": 37.371164301908166, "iter_time": 0.5520591583251954, "loss": 0.14866678416728973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11142203356092, "step_time": 0.5018336143493652} +{"epoch": 0, "iter": 15497, "iter_tflops": 21.549710763603418, "iter_time": 0.9573721771240236, "loss": 0.5989936590194702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.22746112418322, "step_time": 0.888219913482666} +{"epoch": 0, "iter": 15498, "iter_tflops": 27.48465770773879, "iter_time": 0.750640365600586, "loss": 0.6955862641334534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.192299389646315, "step_time": 0.6033842086791993} +{"epoch": 0, "iter": 15499, "iter_tflops": 45.37074601650057, "iter_time": 0.4547223777770996, "loss": 0.6184049248695374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86512264431168, "step_time": 0.42220488548278806} +{"epoch": 0, "iter": 15500, "iter_tflops": 42.58255452070389, "iter_time": 0.48449637985229493, "loss": 0.7354633808135986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91058721473745, "step_time": 0.44937550926208497} +{"epoch": 0, "iter": 15501, "iter_tflops": 33.8831825362692, "iter_time": 0.6088888931274414, "loss": 0.5812157392501831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.307556021162426, "step_time": 0.5682314033508301} +{"epoch": 0, "iter": 15502, "iter_tflops": 12.935909242325472, "iter_time": 1.5948699951171876, "loss": 0.5889078378677368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.966726370539096, "step_time": 1.378464000701904} +{"epoch": 0, "iter": 15503, "iter_tflops": 31.641562579308076, "iter_time": 0.6520251159667969, "loss": 0.7086270451545715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.08458842475935, "step_time": 0.5278575096130371} +{"epoch": 0, "iter": 15504, "iter_tflops": 47.23836657640072, "iter_time": 0.43674443054199213, "loss": 0.6665205955505371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21745789616069, "step_time": 0.4028136959075928} +{"epoch": 0, "iter": 15505, "iter_tflops": 20.41281235355029, "iter_time": 0.7523390579223632, "loss": 0.32154443860054016, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 21.5556421779282, "step_time": 0.7124517974853516} +{"epoch": 0, "iter": 15506, "iter_tflops": 9.848686834552524, "iter_time": 1.559330322265625, "loss": 0.3279229998588562, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.180121230636656, "step_time": 1.260854118347168} +{"epoch": 0, "iter": 15507, "iter_tflops": 9.601155312224005, "iter_time": 1.5995320892333984, "loss": 0.2034616470336914, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 10.627699238787983, "step_time": 1.4450311088562011} +{"epoch": 0, "iter": 15508, "iter_tflops": 16.76993182636189, "iter_time": 0.9157673492431642, "loss": 0.35992100834846497, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 20.453579989870864, "step_time": 0.7508395118713379} +{"epoch": 0, "iter": 15509, "iter_tflops": 13.404175393185602, "iter_time": 1.1945789184570312, "loss": 0.29908615350723267, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 14.392093036459817, "step_time": 1.1125793380737303} +{"epoch": 0, "iter": 15510, "iter_tflops": 27.599187720842586, "iter_time": 0.5801745147705079, "loss": 0.3878687024116516, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.48978460374391, "step_time": 0.5429793930053711} +{"epoch": 0, "iter": 15511, "iter_tflops": 28.290107684546264, "iter_time": 0.5660051040649414, "loss": 0.3655046224594116, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.137988285618384, "step_time": 0.5313010673522949} +{"epoch": 0, "iter": 15512, "iter_tflops": 28.717979813369737, "iter_time": 0.5575721359252929, "loss": 0.37931257486343384, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.552114302180886, "step_time": 0.5240994186401366} +{"epoch": 0, "iter": 15513, "iter_tflops": 34.157285988002855, "iter_time": 0.6040027160644531, "loss": 0.9088078737258911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18429905578868, "step_time": 0.5548334655761719} +{"epoch": 0, "iter": 15514, "iter_tflops": 13.761012698591058, "iter_time": 1.499242385864258, "loss": 0.7040383815765381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.237946632910205, "step_time": 1.1968417091369632} +{"epoch": 0, "iter": 15515, "iter_tflops": 35.88571310715764, "iter_time": 0.5749110641479492, "loss": 0.6350651383399963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.236808312445206, "step_time": 0.5258096771240234} +{"epoch": 0, "iter": 15516, "iter_tflops": 35.375446314887505, "iter_time": 0.5832037658691407, "loss": 0.5662841200828552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61822666065109, "step_time": 0.534232027053833} +{"epoch": 0, "iter": 15517, "iter_tflops": 18.02600274662828, "iter_time": 1.1445184936523438, "loss": 0.056633297353982925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.277183461392394, "step_time": 1.0702338104248048} +{"epoch": 0, "iter": 15518, "iter_tflops": 23.966759280656852, "iter_time": 0.8608211593627929, "loss": 0.08290194720029831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.160592674986994, "step_time": 0.7074991149902343} +{"epoch": 0, "iter": 15519, "iter_tflops": 50.29992993135402, "iter_time": 0.41016147613525394, "loss": 0.08366086333990097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.903364426239435, "step_time": 0.3757710247039795} +{"epoch": 0, "iter": 15520, "iter_tflops": 52.0410196300831, "iter_time": 0.3964390716552735, "loss": 0.025324461981654167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.95051967483067, "step_time": 0.36226348114013673} +{"epoch": 0, "iter": 15521, "iter_tflops": 30.977177146334025, "iter_time": 0.666009475708008, "loss": 0.471174418926239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.84078673505354, "step_time": 0.6282155685424804} +{"epoch": 0, "iter": 15522, "iter_tflops": 9.52679362883176, "iter_time": 2.1655862731933593, "loss": 0.49360933899879456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.804361949855197, "step_time": 1.9095152130126953} +{"epoch": 0, "iter": 15523, "iter_tflops": 14.389035469208837, "iter_time": 1.4338065643310547, "loss": 0.3766480088233948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.734472092063477, "step_time": 1.2328499755859375} +{"epoch": 0, "iter": 15524, "iter_tflops": 38.86659260928702, "iter_time": 0.5308181686401368, "loss": 0.4292256832122803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56048809862562, "step_time": 0.4847475776672363} +{"epoch": 0, "iter": 15525, "iter_tflops": 15.708965848048944, "iter_time": 1.0532465209960937, "loss": 0.3739960491657257, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.527058117715935, "step_time": 1.0011106338500977} +{"epoch": 0, "iter": 15526, "iter_tflops": 10.960239770743884, "iter_time": 1.5095850067138672, "loss": 0.508739173412323, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.688289368603096, "step_time": 1.2087276344299316} +{"epoch": 0, "iter": 15527, "iter_tflops": 29.36697428655107, "iter_time": 0.5634020538330078, "loss": 0.4416808784008026, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.34786088682267, "step_time": 0.5278004035949707} +{"epoch": 0, "iter": 15528, "iter_tflops": 29.80929738897959, "iter_time": 0.5550420532226563, "loss": 0.32782813906669617, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.70221476444464, "step_time": 0.5219008750915527} +{"epoch": 0, "iter": 15529, "iter_tflops": 24.561318616192565, "iter_time": 0.839983139038086, "loss": 0.2678459882736206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.845358503466418, "step_time": 0.7982513961791993} +{"epoch": 0, "iter": 15530, "iter_tflops": 11.914006437316798, "iter_time": 1.731667144775391, "loss": 0.2222701907157898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.144744933598613, "step_time": 1.2778829021453857} +{"epoch": 0, "iter": 15531, "iter_tflops": 42.81674262753836, "iter_time": 0.4818464050292969, "loss": 0.22378094494342804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00641018925825, "step_time": 0.4388995761871337} +{"epoch": 0, "iter": 15532, "iter_tflops": 38.96230015773756, "iter_time": 0.5295142593383789, "loss": 0.19814515113830566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.62954297859179, "step_time": 0.4839623432159424} +{"epoch": 0, "iter": 15533, "iter_tflops": 24.41916468440344, "iter_time": 0.8448730239868164, "loss": 0.2253435105085373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.159596091306543, "step_time": 0.7886625404357911} +{"epoch": 0, "iter": 15534, "iter_tflops": 8.069616075216809, "iter_time": 2.5566387939453126, "loss": 0.21214774250984192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.223800546041492, "step_time": 2.2367237243652345} +{"epoch": 0, "iter": 15535, "iter_tflops": 10.530029670284149, "iter_time": 1.9592626190185547, "loss": 0.277489572763443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.320661211486236, "step_time": 1.4406522998809814} +{"epoch": 0, "iter": 15536, "iter_tflops": 48.09709708564074, "iter_time": 0.42894675064086907, "loss": 0.3132111728191376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.274999823584075, "step_time": 0.39466463088989256} +{"epoch": 0, "iter": 15537, "iter_tflops": 15.296100619121018, "iter_time": 0.966601333618164, "loss": 0.29503899812698364, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 16.00544891620809, "step_time": 0.9237623596191407} +{"epoch": 0, "iter": 15538, "iter_tflops": 9.651439686348862, "iter_time": 1.5319197692871094, "loss": 0.312186062335968, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 11.350072718186494, "step_time": 1.302655200958252} +{"epoch": 0, "iter": 15539, "iter_tflops": 20.602431502340725, "iter_time": 0.7176449661254882, "loss": 0.5419442057609558, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.179006160472312, "step_time": 0.6666318206787109} +{"epoch": 0, "iter": 15540, "iter_tflops": 22.810646363610573, "iter_time": 0.6481723937988282, "loss": 0.36211657524108887, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.468497731558678, "step_time": 0.6042557830810548} +{"epoch": 0, "iter": 15541, "iter_tflops": 11.942192350035125, "iter_time": 1.1016139068603514, "loss": 0.014647332020103931, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 12.958064796829476, "step_time": 1.0152507629394532} +{"epoch": 0, "iter": 15542, "iter_tflops": 15.662085746842688, "iter_time": 0.8399701919555664, "loss": 0.009247035719454288, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 20.04816519583156, "step_time": 0.6562039489746094} +{"epoch": 0, "iter": 15543, "iter_tflops": 35.047933977005286, "iter_time": 0.3753626441955566, "loss": 0.002298732055351138, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 38.4754557453474, "step_time": 0.3419240894317627} +{"epoch": 0, "iter": 15544, "iter_tflops": 36.71849040954336, "iter_time": 0.3582850227355957, "loss": 0.004630562849342823, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 40.31436671134464, "step_time": 0.3263274669647217} +{"epoch": 0, "iter": 15545, "iter_tflops": 38.3597788944086, "iter_time": 0.5378313980102539, "loss": 0.558333694934845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81638654094553, "step_time": 0.49337341690063474} +{"epoch": 0, "iter": 15546, "iter_tflops": 39.77120089199914, "iter_time": 0.5187445449829102, "loss": 0.45386871695518494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.38619936051341, "step_time": 0.4648087425231934} +{"epoch": 0, "iter": 15547, "iter_tflops": 45.2796262841914, "iter_time": 0.45563745117187504, "loss": 0.6730715036392212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.05899254937317, "step_time": 0.42053642845153805} +{"epoch": 0, "iter": 15548, "iter_tflops": 45.09346729489868, "iter_time": 0.4575184555053711, "loss": 0.46849775314331055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70609580207083, "step_time": 0.42358339691162106} +{"epoch": 0, "iter": 15549, "iter_tflops": 21.502217962472827, "iter_time": 0.9594867630004883, "loss": 0.4740698039531708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.544056233728487, "step_time": 0.9151455841064454} +{"epoch": 0, "iter": 15550, "iter_tflops": 17.021169986967582, "iter_time": 1.2120843353271484, "loss": 0.6125352382659912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.721598417514475, "step_time": 0.995632339477539} +{"epoch": 0, "iter": 15551, "iter_tflops": 45.082592432068815, "iter_time": 0.45762881851196285, "loss": 0.7329521179199219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.993234765809376, "step_time": 0.4211008644104004} +{"epoch": 0, "iter": 15552, "iter_tflops": 47.89733328314083, "iter_time": 0.43073574447631835, "loss": 0.7126726508140564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.835343682000534, "step_time": 0.39801209068298343} +{"epoch": 0, "iter": 15553, "iter_tflops": 46.45230099430427, "iter_time": 0.44413501739501954, "loss": 0.11797187477350235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.65493869897494, "step_time": 0.4072869110107422} +{"epoch": 0, "iter": 15554, "iter_tflops": 38.287559262224406, "iter_time": 0.5388458786010742, "loss": 0.08328751474618912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.921199847058546, "step_time": 0.49213986206054683} +{"epoch": 0, "iter": 15555, "iter_tflops": 37.07626809281992, "iter_time": 0.5564501113891601, "loss": 0.12008121609687805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.979994337115286, "step_time": 0.5034430541992188} +{"epoch": 0, "iter": 15556, "iter_tflops": 39.19737997940541, "iter_time": 0.5263385848999024, "loss": 0.09982743114233017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01456879209005, "step_time": 0.47963036918640134} +{"epoch": 0, "iter": 15557, "iter_tflops": 18.42600512679174, "iter_time": 1.056705307006836, "loss": 0.08642791956663132, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 19.64977632957043, "step_time": 0.9908946075439453} +{"epoch": 0, "iter": 15558, "iter_tflops": 13.23856168288486, "iter_time": 1.470768341064453, "loss": 0.07205221801996231, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 15.686539523847754, "step_time": 1.2412461891174318} +{"epoch": 0, "iter": 15559, "iter_tflops": 40.87991009201991, "iter_time": 0.47629403686523436, "loss": 0.11181928962469101, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 44.78289783729894, "step_time": 0.4347833290100097} +{"epoch": 0, "iter": 15560, "iter_tflops": 36.55249742514077, "iter_time": 0.5326819992065429, "loss": 0.10225073993206024, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 39.99973984901762, "step_time": 0.486774600982666} +{"epoch": 0, "iter": 15561, "iter_tflops": 23.005678211421614, "iter_time": 0.8967826690673828, "loss": 0.5503516793251038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.003026964552873, "step_time": 0.8251438331604004} +{"epoch": 0, "iter": 15562, "iter_tflops": 28.92595924427745, "iter_time": 0.7132380065917969, "loss": 0.6671833992004395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.171611777956876, "step_time": 0.6412825584411621} +{"epoch": 0, "iter": 15563, "iter_tflops": 41.453673058363044, "iter_time": 0.4976903610229493, "loss": 0.6398431062698364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57524062333852, "step_time": 0.4628375129699707} +{"epoch": 0, "iter": 15564, "iter_tflops": 41.33561456504221, "iter_time": 0.49911181259155274, "loss": 0.43703097105026245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4215191258423, "step_time": 0.4644391708374024} +{"epoch": 0, "iter": 15565, "iter_tflops": 41.48926089280763, "iter_time": 0.4972634620666504, "loss": 0.1086309403181076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.23663632770923, "step_time": 0.4560704593658448} +{"epoch": 0, "iter": 15566, "iter_tflops": 38.1990942368139, "iter_time": 0.5400937881469726, "loss": 0.07779408991336823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22759986468278, "step_time": 0.48856893539428714} +{"epoch": 0, "iter": 15567, "iter_tflops": 38.510715234909, "iter_time": 0.5357234573364257, "loss": 0.047353219240903854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.39385971891722, "step_time": 0.48665287017822273} +{"epoch": 0, "iter": 15568, "iter_tflops": 36.42823485076267, "iter_time": 0.5663489761352538, "loss": 0.10094918310642242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.351544266224366, "step_time": 0.5112838649749756} +{"epoch": 0, "iter": 15569, "iter_tflops": 38.37686089088777, "iter_time": 0.5375920028686523, "loss": 0.47379571199417114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37260319053239, "step_time": 0.4868970031738281} +{"epoch": 0, "iter": 15570, "iter_tflops": 38.40458929158819, "iter_time": 0.537203857421875, "loss": 0.395972341299057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.36051885381615, "step_time": 0.48703590202331537} +{"epoch": 0, "iter": 15571, "iter_tflops": 36.3527155143424, "iter_time": 0.5675255126953125, "loss": 0.5031735897064209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.89314970384793, "step_time": 0.5171588020324707} +{"epoch": 0, "iter": 15572, "iter_tflops": 41.76913587905955, "iter_time": 0.4939315376281739, "loss": 0.47250938415527344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44088022829435, "step_time": 0.45402055168151856} +{"epoch": 0, "iter": 15573, "iter_tflops": 22.456875227817985, "iter_time": 0.9186983184814453, "loss": 0.30002644658088684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.82185372268264, "step_time": 0.8660574340820312} +{"epoch": 0, "iter": 15574, "iter_tflops": 18.63884510292805, "iter_time": 1.1068869018554688, "loss": 0.24379056692123413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.82013134996872, "step_time": 0.9040742664337158} +{"epoch": 0, "iter": 15575, "iter_tflops": 40.00511408979238, "iter_time": 0.5157114028930665, "loss": 0.17434193193912506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.660983085196534, "step_time": 0.48360567474365235} +{"epoch": 0, "iter": 15576, "iter_tflops": 46.98142207601302, "iter_time": 0.4391330146789551, "loss": 0.2621387541294098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69026617274729, "step_time": 0.40700306129455566} +{"epoch": 0, "iter": 15577, "iter_tflops": 31.671361638032852, "iter_time": 0.651411636352539, "loss": 0.3304605782032013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.719805490556475, "step_time": 0.6118390426635743} +{"epoch": 0, "iter": 15578, "iter_tflops": 13.289743412630779, "iter_time": 1.5524072113037108, "loss": 0.3830196261405945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.484390407531667, "step_time": 1.2515533180236817} +{"epoch": 0, "iter": 15579, "iter_tflops": 34.496097699278415, "iter_time": 0.5980703582763672, "loss": 0.30232301354408264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.68686114047848, "step_time": 0.5474346466064453} +{"epoch": 0, "iter": 15580, "iter_tflops": 41.91546671275024, "iter_time": 0.4922071762084961, "loss": 0.2185852825641632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.066313960261944, "step_time": 0.4478563995361329} +{"epoch": 0, "iter": 15581, "iter_tflops": 24.87899218321621, "iter_time": 0.8292576065063477, "loss": 0.030015291646122932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.636429348366086, "step_time": 0.774544261932373} +{"epoch": 0, "iter": 15582, "iter_tflops": 12.06845106049439, "iter_time": 1.7095063323974606, "loss": 0.0550101175904274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.285687524942835, "step_time": 1.5528811340332032} +{"epoch": 0, "iter": 15583, "iter_tflops": 22.325070027063973, "iter_time": 0.9241222305297851, "loss": 0.04790464788675308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.009428862064432, "step_time": 0.8249326133728027} +{"epoch": 0, "iter": 15584, "iter_tflops": 52.622127761044396, "iter_time": 0.3920611801147461, "loss": 0.03661103546619415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.6731392519641, "step_time": 0.35772447586059575} +{"epoch": 0, "iter": 15585, "iter_tflops": 26.397621383047582, "iter_time": 0.707853973388672, "loss": 0.5000559687614441, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 28.043724418822247, "step_time": 0.6663045501708985} +{"epoch": 0, "iter": 15586, "iter_tflops": 18.26028590682709, "iter_time": 1.0232951049804688, "loss": 0.34704262018203735, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 21.292531209801414, "step_time": 0.8775688056945802} +{"epoch": 0, "iter": 15587, "iter_tflops": 28.309184275430674, "iter_time": 0.6600565032958984, "loss": 0.4637684226036072, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 30.54664276818935, "step_time": 0.6117091598510742} +{"epoch": 0, "iter": 15588, "iter_tflops": 28.303947803419568, "iter_time": 0.6601786193847656, "loss": 0.46079468727111816, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 30.446291412009966, "step_time": 0.6137253608703613} +{"epoch": 0, "iter": 15589, "iter_tflops": 22.342409359558506, "iter_time": 0.9234050445556641, "loss": 0.5046365857124329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.121091482430746, "step_time": 0.8553134307861329} +{"epoch": 0, "iter": 15590, "iter_tflops": 16.87697439221459, "iter_time": 1.2224402923583986, "loss": 0.6480757594108582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.361542231116573, "step_time": 1.013238254547119} +{"epoch": 0, "iter": 15591, "iter_tflops": 34.70671700438846, "iter_time": 0.5944409408569336, "loss": 0.6308785080909729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.59971771501564, "step_time": 0.5487034149169921} +{"epoch": 0, "iter": 15592, "iter_tflops": 35.92731274331432, "iter_time": 0.5742453842163086, "loss": 0.5902613401412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.16068215790002, "step_time": 0.5268318214416504} +{"epoch": 0, "iter": 15593, "iter_tflops": 15.431130271455748, "iter_time": 0.934340187072754, "loss": 0.08506646007299423, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 16.55773650322859, "step_time": 0.870766674041748} +{"epoch": 0, "iter": 15594, "iter_tflops": 10.969871784556295, "iter_time": 1.314320297241211, "loss": 0.06242743507027626, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 13.23159877679756, "step_time": 1.0896585807800294} +{"epoch": 0, "iter": 15595, "iter_tflops": 28.825946648180164, "iter_time": 0.5001717834472656, "loss": 0.061367303133010864, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 31.8071904834859, "step_time": 0.4532913761138916} +{"epoch": 0, "iter": 15596, "iter_tflops": 30.69662348172888, "iter_time": 0.4696909141540528, "loss": 0.05813201516866684, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 33.668549238842225, "step_time": 0.4282312564849853} +{"epoch": 0, "iter": 15597, "iter_tflops": 20.569789348367966, "iter_time": 1.00298030090332, "loss": 0.1692250370979309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.14764465768197, "step_time": 0.9315253982543945} +{"epoch": 0, "iter": 15598, "iter_tflops": 23.839344733674523, "iter_time": 0.8654220046997071, "loss": 0.1844003051519394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.016658638720862, "step_time": 0.7110085887908935} +{"epoch": 0, "iter": 15599, "iter_tflops": 49.18845295271142, "iter_time": 0.41942960739135743, "loss": 0.12812325358390808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.304485325210784, "step_time": 0.3870423545837402} +{"epoch": 0, "iter": 15600, "iter_tflops": 50.21266925110436, "iter_time": 0.4108742637634277, "loss": 0.14234136044979095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.53533633381277, "step_time": 0.3783068904876709} +{"epoch": 0, "iter": 15601, "iter_tflops": 41.37035969105644, "iter_time": 0.4986926307678223, "loss": 0.08920031785964966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26306701242946, "step_time": 0.45580414390563967} +{"epoch": 0, "iter": 15602, "iter_tflops": 49.46349458601957, "iter_time": 0.41709737014770504, "loss": 0.09735672175884247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.854714264388804, "step_time": 0.3830879764556885} +{"epoch": 0, "iter": 15603, "iter_tflops": 49.7420511603505, "iter_time": 0.41476161575317383, "loss": 0.1271670013666153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.78519812616714, "step_time": 0.38358310890197755} +{"epoch": 0, "iter": 15604, "iter_tflops": 48.68453892126299, "iter_time": 0.4237709541320801, "loss": 0.16184592247009277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.79681887691015, "step_time": 0.3907639503479004} +{"epoch": 0, "iter": 15605, "iter_tflops": 31.794909665152293, "iter_time": 0.6488803939819336, "loss": 0.14349423348903656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.93759867554742, "step_time": 0.6079125900268554} +{"epoch": 0, "iter": 15606, "iter_tflops": 25.472430166886582, "iter_time": 0.8099381713867188, "loss": 0.138401061296463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.724413621551125, "step_time": 0.6503222961425781} +{"epoch": 0, "iter": 15607, "iter_tflops": 48.71108887107716, "iter_time": 0.4235399780273437, "loss": 0.18719922006130219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.906623060946806, "step_time": 0.38995294570922856} +{"epoch": 0, "iter": 15608, "iter_tflops": 49.707491859204865, "iter_time": 0.41504998016357425, "loss": 0.16265271604061127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51578542192113, "step_time": 0.3784425621032715} +{"epoch": 0, "iter": 15609, "iter_tflops": 27.997000485473, "iter_time": 0.7369037094116211, "loss": 0.6825748682022095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.6065730679598, "step_time": 0.6968416595458984} +{"epoch": 0, "iter": 15610, "iter_tflops": 12.049875932603415, "iter_time": 1.7121415710449222, "loss": 0.7634950280189514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.944279251476216, "step_time": 1.4795381774902343} +{"epoch": 0, "iter": 15611, "iter_tflops": 13.911538940056214, "iter_time": 1.4830202178955079, "loss": 0.6657004356384277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.411914992953974, "step_time": 1.2570802078247072} +{"epoch": 0, "iter": 15612, "iter_tflops": 15.648328079250218, "iter_time": 1.3184215850830077, "loss": 0.5763970613479614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.12640528457201, "step_time": 1.1381789817810057} +{"epoch": 0, "iter": 15613, "iter_tflops": 20.455570184791284, "iter_time": 0.7227973175048829, "loss": 0.47112685441970825, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.593536359068864, "step_time": 0.6544009323120118} +{"epoch": 0, "iter": 15614, "iter_tflops": 20.98225510822615, "iter_time": 0.7046540603637695, "loss": 0.43370452523231506, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.600153941964347, "step_time": 0.6542093162536621} +{"epoch": 0, "iter": 15615, "iter_tflops": 22.973627293507448, "iter_time": 0.6435740890502929, "loss": 0.38147929310798645, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.777001392352293, "step_time": 0.5967320671081543} +{"epoch": 0, "iter": 15616, "iter_tflops": 21.783031191070908, "iter_time": 0.6787499465942382, "loss": 0.4580279588699341, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.33504653530945, "step_time": 0.6336062469482422} +{"epoch": 0, "iter": 15617, "iter_tflops": 13.079939449889576, "iter_time": 1.136618927001953, "loss": 0.0998564288020134, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 14.056800803439105, "step_time": 1.0576308898925781} +{"epoch": 0, "iter": 15618, "iter_tflops": 11.897605610682607, "iter_time": 1.2495713195800782, "loss": 0.08247661590576172, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 13.601154704961584, "step_time": 1.0930621013641355} +{"epoch": 0, "iter": 15619, "iter_tflops": 28.476267099611103, "iter_time": 0.5220806045532226, "loss": 0.13227413594722748, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 31.449196409598024, "step_time": 0.4727277145385742} +{"epoch": 0, "iter": 15620, "iter_tflops": 31.59943268951372, "iter_time": 0.4704801788330078, "loss": 0.1403009295463562, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 34.639316356221556, "step_time": 0.4291916904449463} +{"epoch": 0, "iter": 15621, "iter_tflops": 34.88263004448301, "iter_time": 0.5914431762695312, "loss": 0.20280463993549347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.704331990593374, "step_time": 0.5330435237884521} +{"epoch": 0, "iter": 15622, "iter_tflops": 38.49776702651892, "iter_time": 0.5359036407470704, "loss": 0.24081557989120483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.811869428763046, "step_time": 0.481901252746582} +{"epoch": 0, "iter": 15623, "iter_tflops": 43.462864950401524, "iter_time": 0.47468323898315434, "loss": 0.2886955440044403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59233943574252, "step_time": 0.43349609947204587} +{"epoch": 0, "iter": 15624, "iter_tflops": 40.9180278580429, "iter_time": 0.5042054710388183, "loss": 0.12494830042123795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44426437877156, "step_time": 0.4642014846801758} +{"epoch": 0, "iter": 15625, "iter_tflops": 17.50498670949472, "iter_time": 1.178583786010742, "loss": 0.23735478520393372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.783410627289825, "step_time": 1.098367805480957} +{"epoch": 0, "iter": 15626, "iter_tflops": 19.00560556951886, "iter_time": 1.0855267639160155, "loss": 0.23183494806289673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.058441347715164, "step_time": 0.894730619430542} +{"epoch": 0, "iter": 15627, "iter_tflops": 49.88451027356942, "iter_time": 0.41357714843749993, "loss": 0.25208431482315063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.29640716441207, "step_time": 0.37997161483764647} +{"epoch": 0, "iter": 15628, "iter_tflops": 50.27115670000035, "iter_time": 0.41039623641967776, "loss": 0.20852810144424438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.92779657671924, "step_time": 0.3756038799285889} +{"epoch": 0, "iter": 15629, "iter_tflops": 26.415941684697735, "iter_time": 0.7810092010498046, "loss": 0.6197015047073364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.804355732292915, "step_time": 0.742009407043457} +{"epoch": 0, "iter": 15630, "iter_tflops": 9.85327946345219, "iter_time": 2.0938301391601564, "loss": 0.4603002965450287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.212767709329686, "step_time": 1.8399644088745117} +{"epoch": 0, "iter": 15631, "iter_tflops": 12.914278362919744, "iter_time": 1.5975413360595705, "loss": 0.4415610730648041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.371929313749613, "step_time": 1.4355131492614746} +{"epoch": 0, "iter": 15632, "iter_tflops": 40.913478835633434, "iter_time": 0.504261531829834, "loss": 0.40650978684425354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9336263408214, "step_time": 0.44915011405944827} +{"epoch": 0, "iter": 15633, "iter_tflops": 25.95439777905652, "iter_time": 0.6122061843872071, "loss": 0.400107204914093, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.816421261009594, "step_time": 0.5712252731323242} +{"epoch": 0, "iter": 15634, "iter_tflops": 26.060460158555163, "iter_time": 0.6097145919799803, "loss": 0.44949036836624146, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.7347025469932, "step_time": 0.5729083557128907} +{"epoch": 0, "iter": 15635, "iter_tflops": 28.847626388593937, "iter_time": 0.5508059005737305, "loss": 0.3437144160270691, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.805349473697245, "step_time": 0.5158014144897461} +{"epoch": 0, "iter": 15636, "iter_tflops": 29.81548442430315, "iter_time": 0.5329258651733398, "loss": 0.3280241787433624, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 31.65796769582323, "step_time": 0.5019097557067871} +{"epoch": 0, "iter": 15637, "iter_tflops": 30.372541712919315, "iter_time": 0.6792679290771484, "loss": 0.2924767732620239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.435706505287655, "step_time": 0.6360611724853515} +{"epoch": 0, "iter": 15638, "iter_tflops": 14.839142318502287, "iter_time": 1.3903157653808595, "loss": 0.2664463520050049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.55442927562758, "step_time": 1.24625821685791} +{"epoch": 0, "iter": 15639, "iter_tflops": 42.869489011832094, "iter_time": 0.48125354385375974, "loss": 0.26653867959976196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28002931367427, "step_time": 0.4363595752716065} +{"epoch": 0, "iter": 15640, "iter_tflops": 47.38330311896211, "iter_time": 0.4354085121154786, "loss": 0.3116309642791748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.276549039352744, "step_time": 0.4023494930267334} +{"epoch": 0, "iter": 15641, "iter_tflops": 28.992448441758555, "iter_time": 0.7116023178100586, "loss": 0.1974647045135498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.674264059362734, "step_time": 0.6725864219665528} +{"epoch": 0, "iter": 15642, "iter_tflops": 16.867224673914336, "iter_time": 1.2231468963623047, "loss": 0.15956076979637146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.40466115364746, "step_time": 1.011097089767456} +{"epoch": 0, "iter": 15643, "iter_tflops": 32.848039982484124, "iter_time": 0.6280768508911133, "loss": 0.13616400957107544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.03908011050481, "step_time": 0.5724644870758057} +{"epoch": 0, "iter": 15644, "iter_tflops": 43.049707627199496, "iter_time": 0.47923887634277346, "loss": 0.17009171843528748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0830778655146, "step_time": 0.43818489456176757} +{"epoch": 0, "iter": 15645, "iter_tflops": 35.13775228779076, "iter_time": 0.5871489257812501, "loss": 0.007480138447135687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.68031273831869, "step_time": 0.5333745269775391} +{"epoch": 0, "iter": 15646, "iter_tflops": 10.411456034460741, "iter_time": 1.9815762023925783, "loss": 0.009473491460084915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.667390935464038, "step_time": 1.6286774139404296} +{"epoch": 0, "iter": 15647, "iter_tflops": 12.879094736983863, "iter_time": 1.601905563354492, "loss": 0.008024892769753933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.900146163628142, "step_time": 1.3846235656738282} +{"epoch": 0, "iter": 15648, "iter_tflops": 43.39235845815419, "iter_time": 0.47545453262329096, "loss": 0.004257498309016228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37836135801552, "step_time": 0.42645292091369624} +{"epoch": 0, "iter": 15649, "iter_tflops": 22.59068345740035, "iter_time": 0.7178745727539062, "loss": 0.33179426193237305, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 24.377885831375508, "step_time": 0.665245433807373} +{"epoch": 0, "iter": 15650, "iter_tflops": 23.782578060505703, "iter_time": 0.6818973617553712, "loss": 0.3847704827785492, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.62762502202054, "step_time": 0.6328045310974121} +{"epoch": 0, "iter": 15651, "iter_tflops": 24.193521975773116, "iter_time": 0.6703148574829102, "loss": 0.25210052728652954, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.05535505281785, "step_time": 0.6224162826538086} +{"epoch": 0, "iter": 15652, "iter_tflops": 24.64630998507626, "iter_time": 0.6580002136230468, "loss": 0.34770387411117554, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.546406491762404, "step_time": 0.6109029197692871} +{"epoch": 0, "iter": 15653, "iter_tflops": 8.484710723193796, "iter_time": 1.0790025939941406, "loss": 0.004956053104251623, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 9.106151015507809, "step_time": 1.0053671264648438} +{"epoch": 0, "iter": 15654, "iter_tflops": 15.592560491532291, "iter_time": 0.5871405715942383, "loss": 0.00106436712667346, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 17.5627748727436, "step_time": 0.5212743968963623} +{"epoch": 0, "iter": 15655, "iter_tflops": 25.75160920050208, "iter_time": 0.35551272964477537, "loss": 0.008085595443844795, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 28.392433240000432, "step_time": 0.32244594192504883} +{"epoch": 0, "iter": 15656, "iter_tflops": 24.035668960142946, "iter_time": 0.3808932838439941, "loss": 0.009251314215362072, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 26.39616043830583, "step_time": 0.34683168792724606} +{"epoch": 0, "iter": 15657, "iter_tflops": 36.33368234680203, "iter_time": 0.5678228073120117, "loss": 0.5984112024307251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15243837851231, "step_time": 0.5269427490234374} +{"epoch": 0, "iter": 15658, "iter_tflops": 43.433795963303226, "iter_time": 0.4750009307861328, "loss": 0.6695636510848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.083821827959284, "step_time": 0.4381779708862305} +{"epoch": 0, "iter": 15659, "iter_tflops": 44.76010901594535, "iter_time": 0.4609258995056152, "loss": 0.5937247276306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.93894214574763, "step_time": 0.43036188507080075} +{"epoch": 0, "iter": 15660, "iter_tflops": 40.958577564232044, "iter_time": 0.5037062988281251, "loss": 0.579291045665741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.55284286274647, "step_time": 0.47370256805419925} +{"epoch": 0, "iter": 15661, "iter_tflops": 25.481542151956134, "iter_time": 0.8096485443115234, "loss": 0.0470665767788887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.788908807847662, "step_time": 0.7701356430053711} +{"epoch": 0, "iter": 15662, "iter_tflops": 15.479506939784983, "iter_time": 1.3328004302978518, "loss": 0.029051095247268677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.672750698077845, "step_time": 0.8715123043060302} +{"epoch": 0, "iter": 15663, "iter_tflops": 43.10269704497816, "iter_time": 0.47864971160888675, "loss": 0.027638066560029984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64321009465133, "step_time": 0.4330332374572754} +{"epoch": 0, "iter": 15664, "iter_tflops": 49.5583919799287, "iter_time": 0.41629868698120115, "loss": 0.03961969539523125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.84550462149638, "step_time": 0.37616744804382324} +{"epoch": 0, "iter": 15665, "iter_tflops": 24.502306298835258, "iter_time": 0.8420061874389648, "loss": 0.5676011443138123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.93951295630068, "step_time": 0.7953539276123046} +{"epoch": 0, "iter": 15666, "iter_tflops": 17.54039488792874, "iter_time": 1.1762046203613283, "loss": 0.672150731086731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.839993730039247, "step_time": 0.9899759941101074} +{"epoch": 0, "iter": 15667, "iter_tflops": 44.33410570095594, "iter_time": 0.46535490417480463, "loss": 0.5444153547286987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.00086574992487, "step_time": 0.4298066959381103} +{"epoch": 0, "iter": 15668, "iter_tflops": 49.60907833514761, "iter_time": 0.41587334823608396, "loss": 0.5273775458335876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.533550165042435, "step_time": 0.38538623809814454} +{"epoch": 0, "iter": 15669, "iter_tflops": 31.102183454038865, "iter_time": 0.6633326416015625, "loss": 0.062006626278162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.222573617487555, "step_time": 0.6209962463378906} +{"epoch": 0, "iter": 15670, "iter_tflops": 49.68544087153136, "iter_time": 0.4152341842651367, "loss": 0.13713404536247253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.30363076408116, "step_time": 0.37992107009887693} +{"epoch": 0, "iter": 15671, "iter_tflops": 50.39665775265838, "iter_time": 0.4093742408752441, "loss": 0.044736262410879135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.07215120168916, "step_time": 0.37461935043334965} +{"epoch": 0, "iter": 15672, "iter_tflops": 53.80067182355362, "iter_time": 0.38347278594970696, "loss": 0.08327663689851761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.747252693801485, "step_time": 0.35118397140502927} +{"epoch": 0, "iter": 15673, "iter_tflops": 26.774931328277933, "iter_time": 0.7705376815795899, "loss": 0.5579570531845093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.431617627378017, "step_time": 0.725639102935791} +{"epoch": 0, "iter": 15674, "iter_tflops": 10.357400143617745, "iter_time": 1.9919181671142576, "loss": 0.6814358830451965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.220974339460165, "step_time": 1.688170921325684} +{"epoch": 0, "iter": 15675, "iter_tflops": 15.707083250924942, "iter_time": 1.3134897918701174, "loss": 0.48587119579315186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.370029141631726, "step_time": 1.123084419250488} +{"epoch": 0, "iter": 15676, "iter_tflops": 23.34280827027904, "iter_time": 0.8838308258056641, "loss": 0.5679200291633606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.816478882797714, "step_time": 0.741686019897461} +{"epoch": 0, "iter": 15677, "iter_tflops": 15.339787033896924, "iter_time": 1.025153663635254, "loss": 0.31832611560821533, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 16.052102079281727, "step_time": 0.9796622772216798} +{"epoch": 0, "iter": 15678, "iter_tflops": 13.704530713770632, "iter_time": 1.1474773712158206, "loss": 0.32561707496643066, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 16.51252126319116, "step_time": 0.9523463211059573} +{"epoch": 0, "iter": 15679, "iter_tflops": 27.01785141611038, "iter_time": 0.5820462417602539, "loss": 0.3750758767127991, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.676681686679355, "step_time": 0.5483772163391114} +{"epoch": 0, "iter": 15680, "iter_tflops": 27.63530905044745, "iter_time": 0.5690415420532227, "loss": 0.3884265720844269, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.264372530771485, "step_time": 0.5373646354675293} +{"epoch": 0, "iter": 15681, "iter_tflops": 23.187452871623204, "iter_time": 0.889752471923828, "loss": 0.46590906381607056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.350388959432298, "step_time": 0.8472593002319335} +{"epoch": 0, "iter": 15682, "iter_tflops": 27.81387443286499, "iter_time": 0.7417554702758787, "loss": 0.6448919177055359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.18854074213638, "step_time": 0.5701001777648925} +{"epoch": 0, "iter": 15683, "iter_tflops": 47.978860316842486, "iter_time": 0.4300038261413574, "loss": 0.8169060349464417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.91180696889597, "step_time": 0.3974258403778076} +{"epoch": 0, "iter": 15684, "iter_tflops": 47.15035897270934, "iter_time": 0.43755962753295896, "loss": 0.581778883934021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7481973670859, "step_time": 0.40653845024108887} +{"epoch": 0, "iter": 15685, "iter_tflops": 38.05954667014619, "iter_time": 0.5420740737915039, "loss": 0.5863226056098938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37780602792272, "step_time": 0.4986028861999512} +{"epoch": 0, "iter": 15686, "iter_tflops": 35.505215389867075, "iter_time": 0.5810721969604491, "loss": 0.539597749710083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57930995268449, "step_time": 0.5347709312438965} +{"epoch": 0, "iter": 15687, "iter_tflops": 39.79277141626929, "iter_time": 0.5184633483886719, "loss": 0.6909729838371277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.075873312762205, "step_time": 0.47894777107238773} +{"epoch": 0, "iter": 15688, "iter_tflops": 31.58231864128407, "iter_time": 0.6532482223510742, "loss": 0.5671383142471313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.96348080734589, "step_time": 0.6074493255615235} +{"epoch": 0, "iter": 15689, "iter_tflops": 11.395215927108204, "iter_time": 1.0974764099121093, "loss": 0.03469005599617958, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 12.187304938733599, "step_time": 1.0261481704711914} +{"epoch": 0, "iter": 15690, "iter_tflops": 15.549150290493436, "iter_time": 0.8042870788574219, "loss": 0.03221738338470459, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 17.41790234982028, "step_time": 0.7179957962036133} +{"epoch": 0, "iter": 15691, "iter_tflops": 31.516314603685558, "iter_time": 0.39680974197387703, "loss": 0.06096594035625458, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 34.655936325541454, "step_time": 0.36086113929748537} +{"epoch": 0, "iter": 15692, "iter_tflops": 33.60152909440053, "iter_time": 0.37218486785888666, "loss": 0.038269270211458206, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 36.654495292138165, "step_time": 0.3411854553222656} +{"epoch": 0, "iter": 15693, "iter_tflops": 31.354271268002307, "iter_time": 0.6579994583129882, "loss": 0.13523682951927185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.40994793693318, "step_time": 0.6175134887695312} +{"epoch": 0, "iter": 15694, "iter_tflops": 8.679386735891912, "iter_time": 2.3770220336914063, "loss": 0.11165188252925873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.688464976642498, "step_time": 2.129449150085449} +{"epoch": 0, "iter": 15695, "iter_tflops": 13.859018492829755, "iter_time": 1.4886403045654297, "loss": 0.10353906452655792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.847922232582844, "step_time": 1.3018169326782227} +{"epoch": 0, "iter": 15696, "iter_tflops": 32.79424514856892, "iter_time": 0.6291071319580079, "loss": 0.14781424403190613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.50467629929624, "step_time": 0.4970787715911865} +{"epoch": 0, "iter": 15697, "iter_tflops": 20.761933491449593, "iter_time": 0.7495405197143554, "loss": 0.25816214084625244, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 21.9674276913455, "step_time": 0.7084084053039551} +{"epoch": 0, "iter": 15698, "iter_tflops": 8.049987560333793, "iter_time": 1.9331595611572265, "loss": 0.35146448016166687, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 10.044138354599413, "step_time": 1.5493524551391604} +{"epoch": 0, "iter": 15699, "iter_tflops": 27.848017599689374, "iter_time": 0.5588157348632813, "loss": 0.44564539194107056, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 29.66572980394186, "step_time": 0.5245753440856933} +{"epoch": 0, "iter": 15700, "iter_tflops": 28.456740852798696, "iter_time": 0.5468620071411132, "loss": 0.34991469979286194, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 30.242424177036334, "step_time": 0.5145721893310548} +{"epoch": 0, "iter": 15701, "iter_tflops": 23.522485455006997, "iter_time": 0.8770796585083007, "loss": 0.4968831539154053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.58331757022053, "step_time": 0.8392314605712891} +{"epoch": 0, "iter": 15702, "iter_tflops": 14.413738679464725, "iter_time": 1.4313492126464842, "loss": 0.6919400691986084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.41420510757349, "step_time": 1.1847278347015382} +{"epoch": 0, "iter": 15703, "iter_tflops": 40.88357570176164, "iter_time": 0.5046303596496583, "loss": 0.7100551724433899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.61425002947285, "step_time": 0.4624328212738037} +{"epoch": 0, "iter": 15704, "iter_tflops": 38.25077813008261, "iter_time": 0.5393640213012695, "loss": 0.6830446720123291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.708893045179465, "step_time": 0.49464495468139646} +{"epoch": 0, "iter": 15705, "iter_tflops": 11.463763647360874, "iter_time": 1.0237497787475585, "loss": 0.022901397198438644, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 12.382227631072693, "step_time": 0.9478121261596679} +{"epoch": 0, "iter": 15706, "iter_tflops": 9.826555542320053, "iter_time": 1.1943173217773437, "loss": 0.0020328741520643234, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 12.178925597241252, "step_time": 0.9636338939666749} +{"epoch": 0, "iter": 15707, "iter_tflops": 27.625699673188763, "iter_time": 0.42482274246215823, "loss": 0.006680158898234367, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 30.65721371734608, "step_time": 0.38281448554992675} +{"epoch": 0, "iter": 15708, "iter_tflops": 25.01352431657659, "iter_time": 0.4691872024536133, "loss": 0.00608938978984952, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 27.779919819427366, "step_time": 0.4224643402099609} +{"epoch": 0, "iter": 15709, "iter_tflops": 22.33916237717627, "iter_time": 0.9235392608642579, "loss": 0.5109757781028748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.647647438619003, "step_time": 0.8724374618530274} +{"epoch": 0, "iter": 15710, "iter_tflops": 18.598573073567387, "iter_time": 1.1092836761474607, "loss": 0.6866022944450378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.441509051525802, "step_time": 0.8441006431579591} +{"epoch": 0, "iter": 15711, "iter_tflops": 34.9476031707867, "iter_time": 0.5903435897827148, "loss": 0.70180344581604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.08668911973302, "step_time": 0.54168776512146} +{"epoch": 0, "iter": 15712, "iter_tflops": 34.89448860813544, "iter_time": 0.5912421798706055, "loss": 0.43771329522132874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.93977755479077, "step_time": 0.5437853050231934} +{"epoch": 0, "iter": 15713, "iter_tflops": 24.329491162822823, "iter_time": 0.8479870529174804, "loss": 0.3153708875179291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.24206987397588, "step_time": 0.7861839256286621} +{"epoch": 0, "iter": 15714, "iter_tflops": 36.378421658764005, "iter_time": 0.5671244812011719, "loss": 0.3619234561920166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.48102404575949, "step_time": 0.5096485080718994} +{"epoch": 0, "iter": 15715, "iter_tflops": 39.411313026295176, "iter_time": 0.5234815063476563, "loss": 0.3403145968914032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19931811324496, "step_time": 0.47757914733886714} +{"epoch": 0, "iter": 15716, "iter_tflops": 42.66661130475298, "iter_time": 0.4835418815612793, "loss": 0.2979971170425415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89186867734498, "step_time": 0.43997166442871094} +{"epoch": 0, "iter": 15717, "iter_tflops": 18.722760387722257, "iter_time": 1.1019258422851563, "loss": 0.24776801466941833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.998114848716895, "step_time": 1.0316519165039062} +{"epoch": 0, "iter": 15718, "iter_tflops": 17.299713214978222, "iter_time": 1.1925685272216797, "loss": 0.23971392214298248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.08027825006349, "step_time": 0.9343674602508546} +{"epoch": 0, "iter": 15719, "iter_tflops": 38.43000266509497, "iter_time": 0.5368486099243164, "loss": 0.1590428352355957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.00558201948145, "step_time": 0.49115123558044427} +{"epoch": 0, "iter": 15720, "iter_tflops": 41.009536524444734, "iter_time": 0.5030803871154784, "loss": 0.22290414571762085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82084228124924, "step_time": 0.4603013343811035} +{"epoch": 0, "iter": 15721, "iter_tflops": 34.80849652005738, "iter_time": 0.5927028045654297, "loss": 0.16590479016304016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42355472014704, "step_time": 0.53693869972229} +{"epoch": 0, "iter": 15722, "iter_tflops": 32.78573006955666, "iter_time": 0.6292705230712891, "loss": 0.07830514758825302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.438696572972525, "step_time": 0.5661863746643067} +{"epoch": 0, "iter": 15723, "iter_tflops": 42.44778408471188, "iter_time": 0.48603464126586915, "loss": 0.08801780641078949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51191555262402, "step_time": 0.443565767288208} +{"epoch": 0, "iter": 15724, "iter_tflops": 38.204831612642856, "iter_time": 0.540012680053711, "loss": 0.0777980387210846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89434390971513, "step_time": 0.49245534324646} +{"epoch": 0, "iter": 15725, "iter_tflops": 18.537326573137783, "iter_time": 1.112948699951172, "loss": 0.11722861975431442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.830773556537657, "step_time": 1.0403574752807616} +{"epoch": 0, "iter": 15726, "iter_tflops": 24.975895967030315, "iter_time": 0.8260401763916014, "loss": 0.08450375497341156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.36275309932473, "step_time": 0.6794869174957275} +{"epoch": 0, "iter": 15727, "iter_tflops": 50.06604318836319, "iter_time": 0.4120775718688965, "loss": 0.11889415979385376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.390750473669236, "step_time": 0.379312536239624} +{"epoch": 0, "iter": 15728, "iter_tflops": 49.25033672871643, "iter_time": 0.418902587890625, "loss": 0.1117158755660057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42752217178185, "step_time": 0.38615104484558105} +{"epoch": 0, "iter": 15729, "iter_tflops": 42.92630035264483, "iter_time": 0.48061662292480467, "loss": 0.22621746361255646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.80752936215286, "step_time": 0.44076441955566403} +{"epoch": 0, "iter": 15730, "iter_tflops": 8.849117913053286, "iter_time": 2.3314293823242194, "loss": 0.13718688488006592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.066578837788303, "step_time": 1.864270233154297} +{"epoch": 0, "iter": 15731, "iter_tflops": 12.746571051004143, "iter_time": 1.6185602722167969, "loss": 0.1473076343536377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.270310010052409, "step_time": 1.445735481262207} +{"epoch": 0, "iter": 15732, "iter_tflops": 26.960332756341582, "iter_time": 0.7652388305664063, "loss": 0.2170611172914505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.53999630234057, "step_time": 0.6151191349029541} +{"epoch": 0, "iter": 15733, "iter_tflops": 18.556667184198282, "iter_time": 0.8275923614501953, "loss": 0.29907575249671936, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.490421049161924, "step_time": 0.7879437789916992} +{"epoch": 0, "iter": 15734, "iter_tflops": 12.654899169726276, "iter_time": 1.213550247192383, "loss": 0.17289119958877563, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 13.913807750020176, "step_time": 1.1037493324279786} +{"epoch": 0, "iter": 15735, "iter_tflops": 27.30631323075643, "iter_time": 0.5624104537963868, "loss": 0.3276004195213318, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.071018225237562, "step_time": 0.5282703170776367} +{"epoch": 0, "iter": 15736, "iter_tflops": 28.993569661163953, "iter_time": 0.5296814498901368, "loss": 0.38738134503364563, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.76032260174019, "step_time": 0.49925861358642576} +{"epoch": 0, "iter": 15737, "iter_tflops": 18.005865615156544, "iter_time": 0.7238626480102539, "loss": 0.040686458349227905, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 19.08218912338233, "step_time": 0.6830334548950194} +{"epoch": 0, "iter": 15738, "iter_tflops": 12.378441286152023, "iter_time": 1.052941421508789, "loss": 0.05752621963620186, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 14.90098296094274, "step_time": 0.8746921997070312} +{"epoch": 0, "iter": 15739, "iter_tflops": 27.25911388888702, "iter_time": 0.4781436996459961, "loss": 0.041416652500629425, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 30.214187845314463, "step_time": 0.43137924575805664} +{"epoch": 0, "iter": 15740, "iter_tflops": 29.312415563025752, "iter_time": 0.4446502723693848, "loss": 0.0464995913207531, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 32.287402407560336, "step_time": 0.4036798439025879} +{"epoch": 0, "iter": 15741, "iter_tflops": 18.444352510816564, "iter_time": 1.118558837890625, "loss": 0.6485708951950073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.917543766521895, "step_time": 1.0358251876831055} +{"epoch": 0, "iter": 15742, "iter_tflops": 18.802202869770873, "iter_time": 1.09727001953125, "loss": 0.7284994125366211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.44438166180087, "step_time": 0.8440014476776122} +{"epoch": 0, "iter": 15743, "iter_tflops": 49.79695467803142, "iter_time": 0.4143043212890624, "loss": 0.7117643356323242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.94057942220325, "step_time": 0.3824781589508056} +{"epoch": 0, "iter": 15744, "iter_tflops": 46.55774552942908, "iter_time": 0.44312913513183594, "loss": 0.6258044242858887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.33182132488092, "step_time": 0.40990158843994146} +{"epoch": 0, "iter": 15745, "iter_tflops": 22.334289061154337, "iter_time": 0.9237407760620118, "loss": 0.6307645440101624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.42429240492829, "step_time": 0.880756317138672} +{"epoch": 0, "iter": 15746, "iter_tflops": 19.558621441138996, "iter_time": 1.054833724975586, "loss": 0.6315653920173645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.59652574603879, "step_time": 0.8743275909423829} +{"epoch": 0, "iter": 15747, "iter_tflops": 43.51063122154304, "iter_time": 0.47416212844848626, "loss": 0.7513037323951721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.904151052364796, "step_time": 0.4398564529418945} +{"epoch": 0, "iter": 15748, "iter_tflops": 47.11809375355541, "iter_time": 0.43785925674438475, "loss": 0.7474564909934998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.928254475240806, "step_time": 0.405101131439209} +{"epoch": 0, "iter": 15749, "iter_tflops": 38.22573463227419, "iter_time": 0.5397173843383789, "loss": 0.12999878823757172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.272510815908824, "step_time": 0.4998749313354492} +{"epoch": 0, "iter": 15750, "iter_tflops": 10.323082009521931, "iter_time": 1.9985401153564455, "loss": 0.11505967378616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.652443706763156, "step_time": 1.7705379257202147} +{"epoch": 0, "iter": 15751, "iter_tflops": 15.601477104102349, "iter_time": 1.3223807830810546, "loss": 0.160786971449852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.356934363467534, "step_time": 1.1238855628967286} +{"epoch": 0, "iter": 15752, "iter_tflops": 17.657577273659992, "iter_time": 1.1683988800048828, "loss": 0.08201467245817184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.06952946976261, "step_time": 0.9791909942626953} +{"epoch": 0, "iter": 15753, "iter_tflops": 25.198758564419318, "iter_time": 0.6354418334960937, "loss": 0.34550610184669495, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.144995098475974, "step_time": 0.5898820495605469} +{"epoch": 0, "iter": 15754, "iter_tflops": 26.611649146770166, "iter_time": 0.601704360961914, "loss": 0.2967802882194519, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.308424852795326, "step_time": 0.5656388664245606} +{"epoch": 0, "iter": 15755, "iter_tflops": 26.66845063193004, "iter_time": 0.6004227828979493, "loss": 0.22083401679992676, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.337030419201806, "step_time": 0.5650678672790528} +{"epoch": 0, "iter": 15756, "iter_tflops": 27.7072876527737, "iter_time": 0.5779109649658203, "loss": 0.49673396348953247, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.523634399990883, "step_time": 0.5423568496704102} +{"epoch": 0, "iter": 15757, "iter_tflops": 22.431103669478333, "iter_time": 0.9197538299560547, "loss": 0.005727379582822323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.543033897198526, "step_time": 0.8763141403198242} +{"epoch": 0, "iter": 15758, "iter_tflops": 13.025439495656814, "iter_time": 1.5839076690673828, "loss": 0.0028371296357363462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.234228266435915, "step_time": 1.3542591819763183} +{"epoch": 0, "iter": 15759, "iter_tflops": 53.87854950563544, "iter_time": 0.38291850280761724, "loss": 0.0027570936363190413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.344457214432396, "step_time": 0.347649881362915} +{"epoch": 0, "iter": 15760, "iter_tflops": 48.25745685084188, "iter_time": 0.42752135848999023, "loss": 0.007042768411338329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21751845769244, "step_time": 0.3876748504638672} +{"epoch": 0, "iter": 15761, "iter_tflops": 26.307978897040012, "iter_time": 0.7842143096923828, "loss": 0.08266454935073853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.791704157091726, "step_time": 0.7423471908569337} +{"epoch": 0, "iter": 15762, "iter_tflops": 14.677285294048373, "iter_time": 1.4056477813720702, "loss": 0.10271298885345459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.749472930851315, "step_time": 1.231745834350586} +{"epoch": 0, "iter": 15763, "iter_tflops": 36.89651608802588, "iter_time": 0.559161018371582, "loss": 0.0843147337436676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.576431642378445, "step_time": 0.5084501686096192} +{"epoch": 0, "iter": 15764, "iter_tflops": 41.6905450292068, "iter_time": 0.4948626480102539, "loss": 0.16156832873821259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88305491086725, "step_time": 0.44964515876770017} +{"epoch": 0, "iter": 15765, "iter_tflops": 24.50069590995844, "iter_time": 0.8420615310668946, "loss": 0.5650947690010071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.24209495819853, "step_time": 0.7861831741333009} +{"epoch": 0, "iter": 15766, "iter_tflops": 8.924025911162826, "iter_time": 2.311859436035156, "loss": 0.5954970121383667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.652920282127512, "step_time": 1.770465515136719} +{"epoch": 0, "iter": 15767, "iter_tflops": 9.984283166666, "iter_time": 2.0663570098876956, "loss": 0.8126844167709351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.922732102165511, "step_time": 1.7303998222351074} +{"epoch": 0, "iter": 15768, "iter_tflops": 42.55398680605766, "iter_time": 0.48482163619995117, "loss": 0.6454469561576843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.11513423382242, "step_time": 0.43788676071167} +{"epoch": 0, "iter": 15769, "iter_tflops": 20.640102027535224, "iter_time": 0.8553786926269532, "loss": 0.3042236864566803, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 21.679463981319817, "step_time": 0.8143699264526367} +{"epoch": 0, "iter": 15770, "iter_tflops": 11.581442074155735, "iter_time": 1.5244304962158202, "loss": 0.3242723345756531, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 14.300788523365938, "step_time": 1.2345545463562013} +{"epoch": 0, "iter": 15771, "iter_tflops": 31.885046276499903, "iter_time": 0.5537110824584962, "loss": 0.2618122398853302, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 33.976140327223455, "step_time": 0.5196324043273925} +{"epoch": 0, "iter": 15772, "iter_tflops": 33.22107644803768, "iter_time": 0.5314428482055664, "loss": 0.4033564031124115, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 35.25637531755331, "step_time": 0.5007634315490722} +{"epoch": 0, "iter": 15773, "iter_tflops": 12.19179991303525, "iter_time": 0.9493386383056642, "loss": 0.0008104583248496056, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 12.732716915772578, "step_time": 0.9090084075927735} +{"epoch": 0, "iter": 15774, "iter_tflops": 8.576726578458942, "iter_time": 1.3494830017089843, "loss": 0.014623659662902355, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 12.48135606260324, "step_time": 0.9273148422241211} +{"epoch": 0, "iter": 15775, "iter_tflops": 23.695646030152645, "iter_time": 0.4884503555297851, "loss": 0.0010680586565285921, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 26.21053941343369, "step_time": 0.44158369064331054} +{"epoch": 0, "iter": 15776, "iter_tflops": 22.72057625047514, "iter_time": 0.5094125518798828, "loss": 0.0008838985813781619, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 25.256625703392334, "step_time": 0.4582617988586426} +{"epoch": 0, "iter": 15777, "iter_tflops": 24.195113771799438, "iter_time": 0.8526966934204101, "loss": 0.5642990469932556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.902426033759212, "step_time": 0.7964927101135254} +{"epoch": 0, "iter": 15778, "iter_tflops": 19.001246353535382, "iter_time": 1.0857758026123048, "loss": 0.5365721583366394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.498297339484207, "step_time": 0.8091165161132812} +{"epoch": 0, "iter": 15779, "iter_tflops": 40.9941254357327, "iter_time": 0.5032695121765137, "loss": 0.5240090489387512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00253850069962, "step_time": 0.4688614387512207} +{"epoch": 0, "iter": 15780, "iter_tflops": 46.652483727391925, "iter_time": 0.44222926330566403, "loss": 0.7117668390274048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.08269551403118, "step_time": 0.4119405574798584} +{"epoch": 0, "iter": 15781, "iter_tflops": 34.97480601841765, "iter_time": 0.5898844299316406, "loss": 0.13583232462406158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.40923502841123, "step_time": 0.5514973373413087} +{"epoch": 0, "iter": 15782, "iter_tflops": 44.71558437427545, "iter_time": 0.46138485717773436, "loss": 0.20634540915489197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.624021632708455, "step_time": 0.4242983779907227} +{"epoch": 0, "iter": 15783, "iter_tflops": 49.42041630331001, "iter_time": 0.41746094131469724, "loss": 0.27999216318130493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5707078090176, "step_time": 0.3851189270019531} +{"epoch": 0, "iter": 15784, "iter_tflops": 52.69455679851175, "iter_time": 0.391522289276123, "loss": 0.1459575593471527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.13420789097531, "step_time": 0.3610987930297851} +{"epoch": 0, "iter": 15785, "iter_tflops": 21.797797618676398, "iter_time": 0.6259236145019531, "loss": 0.007693285122513771, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 23.237461254502325, "step_time": 0.587144874572754} +{"epoch": 0, "iter": 15786, "iter_tflops": 9.782429977920474, "iter_time": 1.394720565795898, "loss": 0.005419385153800249, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 12.568010444834739, "step_time": 1.085593963623047} +{"epoch": 0, "iter": 15787, "iter_tflops": 30.918878335172263, "iter_time": 0.4412759132385254, "loss": 0.005716205574572086, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 40.14932839551288, "step_time": 0.33982526779174804} +{"epoch": 0, "iter": 15788, "iter_tflops": 39.58953188654359, "iter_time": 0.34463040161132813, "loss": 0.011115891858935356, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 43.40831464236749, "step_time": 0.314312047958374} +{"epoch": 0, "iter": 15789, "iter_tflops": 28.146216097187793, "iter_time": 0.732997055053711, "loss": 0.5826643109321594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.72238066212715, "step_time": 0.6941265487670898} +{"epoch": 0, "iter": 15790, "iter_tflops": 15.385379247720472, "iter_time": 1.3409544982910155, "loss": 0.6427098512649536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.150370516066744, "step_time": 1.1366761627197266} +{"epoch": 0, "iter": 15791, "iter_tflops": 36.155125003910385, "iter_time": 0.5706270828247071, "loss": 0.653580367565155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.498484404891116, "step_time": 0.5223262062072754} +{"epoch": 0, "iter": 15792, "iter_tflops": 36.76298818083, "iter_time": 0.5611919631958008, "loss": 0.652748167514801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.98229450972471, "step_time": 0.5160057411193848} +{"epoch": 0, "iter": 15793, "iter_tflops": 20.918262867458473, "iter_time": 0.9862718353271485, "loss": 0.07063495367765427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.533965729204365, "step_time": 0.915555377960205} +{"epoch": 0, "iter": 15794, "iter_tflops": 28.300279157514915, "iter_time": 0.7290067138671876, "loss": 0.0799139142036438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.97785654589842, "step_time": 0.6451681175231934} +{"epoch": 0, "iter": 15795, "iter_tflops": 39.74944865298943, "iter_time": 0.5190284194946289, "loss": 0.12261345982551575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7863646075149, "step_time": 0.4711762142181397} +{"epoch": 0, "iter": 15796, "iter_tflops": 38.31978847359933, "iter_time": 0.538392677307129, "loss": 0.1125028133392334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.221281115641176, "step_time": 0.488642053604126} +{"epoch": 0, "iter": 15797, "iter_tflops": 15.39265531186607, "iter_time": 1.3403206329345705, "loss": 0.06832791864871979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.33125277830364, "step_time": 1.2632890930175782} +{"epoch": 0, "iter": 15798, "iter_tflops": 17.996422572796718, "iter_time": 1.1463997039794922, "loss": 0.08778846263885498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.562379606182425, "step_time": 0.956809679031372} +{"epoch": 0, "iter": 15799, "iter_tflops": 39.69942149800405, "iter_time": 0.5196824722290039, "loss": 0.09254424273967743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8726082131338, "step_time": 0.4702499885559081} +{"epoch": 0, "iter": 15800, "iter_tflops": 40.18178648321224, "iter_time": 0.5134439086914062, "loss": 0.07254528254270554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.939059719680635, "step_time": 0.4695388031005859} +{"epoch": 0, "iter": 15801, "iter_tflops": 22.606639611475494, "iter_time": 0.9126121292114258, "loss": 0.029925396665930748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.489286285801903, "step_time": 0.8424538497924805} +{"epoch": 0, "iter": 15802, "iter_tflops": 15.386367411956599, "iter_time": 1.3408683776855468, "loss": 0.052845969796180725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.77708560175599, "step_time": 1.0431816864013672} +{"epoch": 0, "iter": 15803, "iter_tflops": 51.54658685093228, "iter_time": 0.40024169921874997, "loss": 0.060217734426259995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.12750096815753, "step_time": 0.36757548713684085} +{"epoch": 0, "iter": 15804, "iter_tflops": 56.387489700719286, "iter_time": 0.36588068771362303, "loss": 0.04883510246872902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.14886176483046, "step_time": 0.33739129257202144} +{"epoch": 0, "iter": 15805, "iter_tflops": 37.553967427500666, "iter_time": 0.39805650711059576, "loss": 0.005604174453765154, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 41.41119750382143, "step_time": 0.3609796867370605} +{"epoch": 0, "iter": 15806, "iter_tflops": 35.70761056719735, "iter_time": 0.41863907623291013, "loss": 0.0017246773932129145, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 39.4680563694998, "step_time": 0.37875189399719233} +{"epoch": 0, "iter": 15807, "iter_tflops": 42.66284751921436, "iter_time": 0.3503892021179199, "loss": 0.002001133980229497, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 46.85861885121585, "step_time": 0.3190149745941162} +{"epoch": 0, "iter": 15808, "iter_tflops": 43.508024818933926, "iter_time": 0.34358261871337886, "loss": 0.004249426070600748, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 47.85204889843908, "step_time": 0.3123920803070068} +{"epoch": 0, "iter": 15809, "iter_tflops": 23.927743945351637, "iter_time": 0.8622247695922851, "loss": 0.22922517359256744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.109826886368307, "step_time": 0.8216342391967774} +{"epoch": 0, "iter": 15810, "iter_tflops": 13.529506580340783, "iter_time": 1.524896224975586, "loss": 0.17443722486495972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.21276300325018, "step_time": 1.2725217475891113} +{"epoch": 0, "iter": 15811, "iter_tflops": 48.23013313690212, "iter_time": 0.4277635612487793, "loss": 0.21663165092468262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.73468923206302, "step_time": 0.39122433090209957} +{"epoch": 0, "iter": 15812, "iter_tflops": 49.62569144083484, "iter_time": 0.4157341270446777, "loss": 0.23171518743038177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.98404610640947, "step_time": 0.38217019653320317} +{"epoch": 0, "iter": 15813, "iter_tflops": 45.758296046189734, "iter_time": 0.45087110519409185, "loss": 0.5830850601196289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.029785097247604, "step_time": 0.41237621688842774} +{"epoch": 0, "iter": 15814, "iter_tflops": 45.915950717070665, "iter_time": 0.4493230171203613, "loss": 0.5441579222679138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96288963567251, "step_time": 0.39703514671325685} +{"epoch": 0, "iter": 15815, "iter_tflops": 44.74161975165036, "iter_time": 0.46111637496948243, "loss": 0.42975252866744995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28685387438296, "step_time": 0.42726108360290527} +{"epoch": 0, "iter": 15816, "iter_tflops": 48.74849484670576, "iter_time": 0.42321498489379883, "loss": 0.44096341729164124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.579904061045326, "step_time": 0.3923760204315186} +{"epoch": 0, "iter": 15817, "iter_tflops": 42.751909993311166, "iter_time": 0.48257711791992186, "loss": 0.2783782184123993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.72191505902439, "step_time": 0.4415720863342285} +{"epoch": 0, "iter": 15818, "iter_tflops": 45.98818257352021, "iter_time": 0.44861728286743163, "loss": 0.29510778188705444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74839712902528, "step_time": 0.406536849975586} +{"epoch": 0, "iter": 15819, "iter_tflops": 46.87873249567762, "iter_time": 0.44009495162963863, "loss": 0.32454580068588257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.87942485844349, "step_time": 0.40548991203308105} +{"epoch": 0, "iter": 15820, "iter_tflops": 47.96741473399417, "iter_time": 0.43010643005371096, "loss": 0.3723316490650177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85918335825993, "step_time": 0.3978291244506836} +{"epoch": 0, "iter": 15821, "iter_tflops": 36.928802055054575, "iter_time": 0.5586721572875977, "loss": 0.4104885756969452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.859415785340616, "step_time": 0.5175964851379395} +{"epoch": 0, "iter": 15822, "iter_tflops": 10.836788059041238, "iter_time": 1.9038015136718751, "loss": 0.4253714680671692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.10331875890262, "step_time": 1.5744937515258788} +{"epoch": 0, "iter": 15823, "iter_tflops": 9.790301898399006, "iter_time": 2.107299011230469, "loss": 0.5681207776069641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.873227446861105, "step_time": 1.7376146125793455} +{"epoch": 0, "iter": 15824, "iter_tflops": 31.23656836856271, "iter_time": 0.6604788742065429, "loss": 0.5383794903755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.258007398971095, "step_time": 0.5124717998504639} +{"epoch": 0, "iter": 15825, "iter_tflops": 12.458874893418892, "iter_time": 1.212954345703125, "loss": 0.370424747467041, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.435992762125652, "step_time": 1.12474356842041} +{"epoch": 0, "iter": 15826, "iter_tflops": 12.588329067006173, "iter_time": 1.200480728149414, "loss": 0.232977956533432, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.576132347607162, "step_time": 0.9702053184509278} +{"epoch": 0, "iter": 15827, "iter_tflops": 25.280810967077638, "iter_time": 0.5977674713134765, "loss": 0.2609550952911377, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.80369158119823, "step_time": 0.5638046684265137} +{"epoch": 0, "iter": 15828, "iter_tflops": 26.614117106041412, "iter_time": 0.5678206939697266, "loss": 0.48665598034858704, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.30311605770854, "step_time": 0.5339357833862304} +{"epoch": 0, "iter": 15829, "iter_tflops": 19.437248825223083, "iter_time": 1.061420455932617, "loss": 0.5084648728370667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.320815042315957, "step_time": 1.0152689971923827} +{"epoch": 0, "iter": 15830, "iter_tflops": 18.94579566223066, "iter_time": 1.0889536590576172, "loss": 0.42398470640182495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.919314697609714, "step_time": 0.8279157657623291} +{"epoch": 0, "iter": 15831, "iter_tflops": 47.79443379100056, "iter_time": 0.43166310119628903, "loss": 0.4396686851978302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.893605842988336, "step_time": 0.39756523323059084} +{"epoch": 0, "iter": 15832, "iter_tflops": 48.6627677353362, "iter_time": 0.4239605445861817, "loss": 0.36413607001304626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.333468111746754, "step_time": 0.39422370147705077} +{"epoch": 0, "iter": 15833, "iter_tflops": 41.11699482818542, "iter_time": 0.5017655982971191, "loss": 0.3170469105243683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.448551524737816, "step_time": 0.46415671157836913} +{"epoch": 0, "iter": 15834, "iter_tflops": 43.68590447817807, "iter_time": 0.47225973129272464, "loss": 0.19025975465774536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.227034803812224, "step_time": 0.4368492240905762} +{"epoch": 0, "iter": 15835, "iter_tflops": 47.09746366672376, "iter_time": 0.4380510520935058, "loss": 0.35682618618011475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.930049583098636, "step_time": 0.40508685302734376} +{"epoch": 0, "iter": 15836, "iter_tflops": 51.668597394479086, "iter_time": 0.3992965660095215, "loss": 0.3593129813671112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.91502520257725, "step_time": 0.3689722652435302} +{"epoch": 0, "iter": 15837, "iter_tflops": 37.18599157959429, "iter_time": 0.5548082122802734, "loss": 0.11517426371574402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.273473431892384, "step_time": 0.5122749977111817} +{"epoch": 0, "iter": 15838, "iter_tflops": 9.453210659119627, "iter_time": 2.182443008422852, "loss": 0.142708882689476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.720587350921308, "step_time": 1.760243995666504} +{"epoch": 0, "iter": 15839, "iter_tflops": 20.621522827796436, "iter_time": 1.0004641113281252, "loss": 0.13568872213363647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.157482154667893, "step_time": 0.8909040012359619} +{"epoch": 0, "iter": 15840, "iter_tflops": 43.25019978351052, "iter_time": 0.4770172996520996, "loss": 0.14179030060768127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44831292243034, "step_time": 0.4441731510162354} +{"epoch": 0, "iter": 15841, "iter_tflops": 23.451355923313454, "iter_time": 0.6531161499023438, "loss": 0.4370655119419098, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.044644927771493, "step_time": 0.6115662384033203} +{"epoch": 0, "iter": 15842, "iter_tflops": 26.263202017873972, "iter_time": 0.5831908569335936, "loss": 0.3444124162197113, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.222272854884096, "step_time": 0.5427082138061523} +{"epoch": 0, "iter": 15843, "iter_tflops": 26.79749089600396, "iter_time": 0.5715631866455078, "loss": 0.34363535046577454, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.59733638684568, "step_time": 0.5355904159545899} +{"epoch": 0, "iter": 15844, "iter_tflops": 25.872425791983506, "iter_time": 0.5919993515014648, "loss": 0.4575921297073364, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 27.472825173940915, "step_time": 0.5575130767822265} +{"epoch": 0, "iter": 15845, "iter_tflops": 21.79140976918776, "iter_time": 0.9467535018920898, "loss": 0.6982011198997498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.78765760555039, "step_time": 0.9053626251220701} +{"epoch": 0, "iter": 15846, "iter_tflops": 14.496524965312213, "iter_time": 1.4231751098632812, "loss": 0.5808258056640625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.50174190065807, "step_time": 1.1150892505645753} +{"epoch": 0, "iter": 15847, "iter_tflops": 37.191092581593566, "iter_time": 0.5547321166992187, "loss": 0.41396570205688477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.4504789325146, "step_time": 0.5100333557128907} +{"epoch": 0, "iter": 15848, "iter_tflops": 39.63021040826387, "iter_time": 0.5205900573730469, "loss": 0.6600207090377808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26151427862509, "step_time": 0.476892541885376} +{"epoch": 0, "iter": 15849, "iter_tflops": 19.304278153538185, "iter_time": 1.068731674194336, "loss": 0.24476100504398346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62869418425565, "step_time": 1.0001163101196289} +{"epoch": 0, "iter": 15850, "iter_tflops": 21.250437103946282, "iter_time": 0.9708550186157225, "loss": 0.23280517756938934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.041897741732935, "step_time": 0.7922269611358642} +{"epoch": 0, "iter": 15851, "iter_tflops": 54.3357884587872, "iter_time": 0.3796962203979492, "loss": 0.19595631957054138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.98005249432666, "step_time": 0.349797815322876} +{"epoch": 0, "iter": 15852, "iter_tflops": 47.347942658905914, "iter_time": 0.43573368453979494, "loss": 0.23201431334018707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19762252045182, "step_time": 0.40296975708007815} +{"epoch": 0, "iter": 15853, "iter_tflops": 33.15756711900133, "iter_time": 0.6222137298583985, "loss": 0.09256601333618164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.418458824402045, "step_time": 0.582495517730713} +{"epoch": 0, "iter": 15854, "iter_tflops": 10.21458614781083, "iter_time": 2.0197679290771484, "loss": 0.12197976559400558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.258457011949044, "step_time": 1.6830090026855467} +{"epoch": 0, "iter": 15855, "iter_tflops": 11.590579571014084, "iter_time": 1.7799880828857424, "loss": 0.1246223896741867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.166619819651155, "step_time": 1.45631729888916} +{"epoch": 0, "iter": 15856, "iter_tflops": 22.183079985364255, "iter_time": 0.9300373764038086, "loss": 0.07134731113910675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.24583308964315, "step_time": 0.6821135807037353} +{"epoch": 0, "iter": 15857, "iter_tflops": 16.48225221967757, "iter_time": 0.8797047576904298, "loss": 0.42037245631217957, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 17.688606195805402, "step_time": 0.8197093391418457} +{"epoch": 0, "iter": 15858, "iter_tflops": 6.4326482846016635, "iter_time": 2.254050750732422, "loss": 0.3620559573173523, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 7.433460076679487, "step_time": 1.950574234008789} +{"epoch": 0, "iter": 15859, "iter_tflops": 7.263802258924898, "iter_time": 1.996133041381836, "loss": 0.292946994304657, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 8.152689025222603, "step_time": 1.7784948806762695} +{"epoch": 0, "iter": 15860, "iter_tflops": 20.925359158086078, "iter_time": 0.6929159774780274, "loss": 0.3505637049674988, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.47126343119493, "step_time": 0.6452470169067382} +{"epoch": 0, "iter": 15861, "iter_tflops": 14.079160548540461, "iter_time": 1.0733627471923828, "loss": 0.35965386033058167, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.32076913075303, "step_time": 0.9863764877319336} +{"epoch": 0, "iter": 15862, "iter_tflops": 12.335696658411873, "iter_time": 1.2250663146972653, "loss": 0.36696165800094604, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.404040259655236, "step_time": 1.049153305053711} +{"epoch": 0, "iter": 15863, "iter_tflops": 25.547212134494334, "iter_time": 0.5915340728759766, "loss": 0.32071664929389954, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.287887129034008, "step_time": 0.5538005332946777} +{"epoch": 0, "iter": 15864, "iter_tflops": 28.393513232505953, "iter_time": 0.5322358779907226, "loss": 0.3824182152748108, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 30.147215786794664, "step_time": 0.5012750282287597} +{"epoch": 0, "iter": 15865, "iter_tflops": 33.02866126644238, "iter_time": 0.6246421356201172, "loss": 0.5208780765533447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.54464374062492, "step_time": 0.5804276351928711} +{"epoch": 0, "iter": 15866, "iter_tflops": 14.528297300210838, "iter_time": 1.4200627288818357, "loss": 0.5398300290107727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.542889894715625, "step_time": 1.1126147880554198} +{"epoch": 0, "iter": 15867, "iter_tflops": 35.684819508442686, "iter_time": 0.5781476211547851, "loss": 0.5496559143066406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.521036394980975, "step_time": 0.5355799179077149} +{"epoch": 0, "iter": 15868, "iter_tflops": 38.76373211081831, "iter_time": 0.5322267074584961, "loss": 0.6964524984359741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52278489280524, "step_time": 0.4851773834228516} +{"epoch": 0, "iter": 15869, "iter_tflops": 18.529514719667876, "iter_time": 1.1134179077148438, "loss": 0.13958458602428436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.895584685454224, "step_time": 1.0369684448242187} +{"epoch": 0, "iter": 15870, "iter_tflops": 7.710634498255901, "iter_time": 2.6756674194335934, "loss": 0.2006201446056366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.401264755948313, "step_time": 2.1945019149780274} +{"epoch": 0, "iter": 15871, "iter_tflops": 13.991519797504068, "iter_time": 1.4745427093505858, "loss": 0.32005575299263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.96965067543555, "step_time": 1.0875842609405517} +{"epoch": 0, "iter": 15872, "iter_tflops": 42.14519759126893, "iter_time": 0.48952418518066404, "loss": 0.3165082335472107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.932166749588475, "step_time": 0.4491643867492676} +{"epoch": 0, "iter": 15873, "iter_tflops": 10.663253027148146, "iter_time": 1.6209710693359376, "loss": 0.2866944968700409, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 11.280580342917709, "step_time": 1.5322637786865232} +{"epoch": 0, "iter": 15874, "iter_tflops": 15.575097859082545, "iter_time": 1.1097731018066406, "loss": 0.26416417956352234, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 20.354390958766192, "step_time": 0.8491939010620118} +{"epoch": 0, "iter": 15875, "iter_tflops": 25.634644008851883, "iter_time": 0.6742759780883788, "loss": 0.4041752815246582, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 27.48146659813912, "step_time": 0.6289629631042482} +{"epoch": 0, "iter": 15876, "iter_tflops": 29.056226751106806, "iter_time": 0.5948750610351563, "loss": 0.34389397501945496, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 31.07986079395507, "step_time": 0.5561422805786133} +{"epoch": 0, "iter": 15877, "iter_tflops": 23.75785545575827, "iter_time": 0.868390396118164, "loss": 0.26971346139907837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.599083157524426, "step_time": 0.8059309539794921} +{"epoch": 0, "iter": 15878, "iter_tflops": 8.704572694881453, "iter_time": 2.370144317626953, "loss": 0.2543294131755829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.313952668973005, "step_time": 2.000309112548828} +{"epoch": 0, "iter": 15879, "iter_tflops": 10.320407043810965, "iter_time": 1.9990581207275393, "loss": 0.28521013259887695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.296224368297548, "step_time": 1.5516505241394043} +{"epoch": 0, "iter": 15880, "iter_tflops": 46.638144422002895, "iter_time": 0.44236523056030275, "loss": 0.38730138540267944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.76148436644779, "step_time": 0.40643203735351563} +{"epoch": 0, "iter": 15881, "iter_tflops": 20.97733462413964, "iter_time": 0.7613633193969727, "loss": 0.3910549283027649, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 22.214737555583056, "step_time": 0.7189539413452148} +{"epoch": 0, "iter": 15882, "iter_tflops": 7.952996403485453, "iter_time": 2.0082208404541015, "loss": 0.393511027097702, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 9.606981702631035, "step_time": 1.6624756469726563} +{"epoch": 0, "iter": 15883, "iter_tflops": 23.590571872699368, "iter_time": 0.6770235671997071, "loss": 0.3125503659248352, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.527065295095895, "step_time": 0.6256642875671387} +{"epoch": 0, "iter": 15884, "iter_tflops": 24.63573632621153, "iter_time": 0.648301025390625, "loss": 0.2707791328430176, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 26.462912141796306, "step_time": 0.6035380020141602} +{"epoch": 0, "iter": 15885, "iter_tflops": 19.877599167459707, "iter_time": 1.0379067077636719, "loss": 0.41400396823883057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.446167141424183, "step_time": 0.9619944381713866} +{"epoch": 0, "iter": 15886, "iter_tflops": 29.281998885937252, "iter_time": 0.7045657501220703, "loss": 0.6651637554168701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.39994824054472, "step_time": 0.6367631626129151} +{"epoch": 0, "iter": 15887, "iter_tflops": 45.23449465712012, "iter_time": 0.4560920524597168, "loss": 0.6747381091117859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.164616928255924, "step_time": 0.41963295555114744} +{"epoch": 0, "iter": 15888, "iter_tflops": 49.377607741092596, "iter_time": 0.4178228645324707, "loss": 0.7547492980957031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42483106436443, "step_time": 0.3861704959869384} +{"epoch": 0, "iter": 15889, "iter_tflops": 42.67692621255677, "iter_time": 0.48342501068115235, "loss": 0.8813371062278748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.385077145556714, "step_time": 0.44477868270874027} +{"epoch": 0, "iter": 15890, "iter_tflops": 40.726503249338386, "iter_time": 0.5065765991210937, "loss": 0.5979470014572144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15348215680427, "step_time": 0.4672585830688476} +{"epoch": 0, "iter": 15891, "iter_tflops": 42.5509755998967, "iter_time": 0.48485594558715817, "loss": 0.5912969708442688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79444417474209, "step_time": 0.4505152072906495} +{"epoch": 0, "iter": 15892, "iter_tflops": 41.14500762570652, "iter_time": 0.5014239807128907, "loss": 0.7186410427093506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84230030554775, "step_time": 0.4705750694274902} +{"epoch": 0, "iter": 15893, "iter_tflops": 30.838524734165567, "iter_time": 0.6690039062500001, "loss": 0.4264325499534607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.86535768122397, "step_time": 0.6277458992004394} +{"epoch": 0, "iter": 15894, "iter_tflops": 37.564566575857086, "iter_time": 0.5492168655395508, "loss": 0.46091577410697937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29647435450025, "step_time": 0.49958486366271976} +{"epoch": 0, "iter": 15895, "iter_tflops": 40.38631135770266, "iter_time": 0.5108437194824219, "loss": 0.3695710003376007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20673547053129, "step_time": 0.4666957035064697} +{"epoch": 0, "iter": 15896, "iter_tflops": 39.81776737330412, "iter_time": 0.5181378784179687, "loss": 0.35042473673820496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.18495262257249, "step_time": 0.47773801422119133} +{"epoch": 0, "iter": 15897, "iter_tflops": 20.561370866604573, "iter_time": 1.003390953063965, "loss": 0.6421026587486267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.138795532648285, "step_time": 0.9318977394104004} +{"epoch": 0, "iter": 15898, "iter_tflops": 28.089501117578635, "iter_time": 0.7344770355224609, "loss": 0.6891549825668335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.25747311096517, "step_time": 0.66003715133667} +{"epoch": 0, "iter": 15899, "iter_tflops": 46.00925916987962, "iter_time": 0.4484117736816406, "loss": 0.5430384278297424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71382015238075, "step_time": 0.4149971466064453} +{"epoch": 0, "iter": 15900, "iter_tflops": 45.622683450784244, "iter_time": 0.45221131134033205, "loss": 0.5034153461456299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42254544703805, "step_time": 0.4174429569244385} +{"epoch": 0, "iter": 15901, "iter_tflops": 27.092415236765802, "iter_time": 0.7615080947875977, "loss": 0.36074548959732056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.721911783972526, "step_time": 0.7183050231933594} +{"epoch": 0, "iter": 15902, "iter_tflops": 19.257023929547096, "iter_time": 1.0713542022705078, "loss": 0.4714587926864624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.75396695634076, "step_time": 0.9483830490112305} +{"epoch": 0, "iter": 15903, "iter_tflops": 40.22803028435941, "iter_time": 0.5128536834716797, "loss": 0.3520345985889435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.98533665619898, "step_time": 0.46904480171203616} +{"epoch": 0, "iter": 15904, "iter_tflops": 32.78624404560056, "iter_time": 0.6292606582641601, "loss": 0.3506139814853668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.907229761830436, "step_time": 0.5745665607452393} +{"epoch": 0, "iter": 15905, "iter_tflops": 21.345711355645108, "iter_time": 0.966521713256836, "loss": 0.38911956548690796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.79328487331749, "step_time": 0.9051391067504883} +{"epoch": 0, "iter": 15906, "iter_tflops": 22.005622241313585, "iter_time": 0.9375373840332031, "loss": 0.4497922956943512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.668567177261103, "step_time": 0.8363312454223631} +{"epoch": 0, "iter": 15907, "iter_tflops": 43.43849568854208, "iter_time": 0.47494953918457034, "loss": 0.6771438717842102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.622206855869834, "step_time": 0.442516450881958} +{"epoch": 0, "iter": 15908, "iter_tflops": 48.0118194569565, "iter_time": 0.4297086372375488, "loss": 0.6165488362312317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.571750459604026, "step_time": 0.40004640769958494} +{"epoch": 0, "iter": 15909, "iter_tflops": 24.45025851160476, "iter_time": 0.843798583984375, "loss": 0.001664034673012793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.623165080048086, "step_time": 0.805173500061035} +{"epoch": 0, "iter": 15910, "iter_tflops": 15.730512670612502, "iter_time": 1.311533447265625, "loss": 0.0037241526879370213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57213197026497, "step_time": 1.0541055793762208} +{"epoch": 0, "iter": 15911, "iter_tflops": 43.62158205276612, "iter_time": 0.47295610427856444, "loss": 0.0022872104309499264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24142410712535, "step_time": 0.4276634426116943} +{"epoch": 0, "iter": 15912, "iter_tflops": 44.03622745151113, "iter_time": 0.4685027465820313, "loss": 0.01763979159295559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78734525409272, "step_time": 0.4228779697418213} +{"epoch": 0, "iter": 15913, "iter_tflops": 21.441768783136812, "iter_time": 0.9621917724609376, "loss": 0.29698315262794495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.100367273530374, "step_time": 0.8931067314147949} +{"epoch": 0, "iter": 15914, "iter_tflops": 25.347986277023068, "iter_time": 0.8139144973754884, "loss": 0.29645034670829773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.454202677694237, "step_time": 0.725063138961792} +{"epoch": 0, "iter": 15915, "iter_tflops": 48.276545684898174, "iter_time": 0.4273523139953613, "loss": 0.29372721910476685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53352028965749, "step_time": 0.39272246360778806} +{"epoch": 0, "iter": 15916, "iter_tflops": 51.62227882533899, "iter_time": 0.3996548385620117, "loss": 0.26763299107551575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68432990274832, "step_time": 0.37050088500976563} +{"epoch": 0, "iter": 15917, "iter_tflops": 40.043626756445924, "iter_time": 0.5152154083251954, "loss": 0.5134344100952148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24277866684812, "step_time": 0.47709916305542} +{"epoch": 0, "iter": 15918, "iter_tflops": 38.14951093351179, "iter_time": 0.540795753479004, "loss": 0.7046346664428711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.92669435379248, "step_time": 0.49207536697387694} +{"epoch": 0, "iter": 15919, "iter_tflops": 39.49716124330111, "iter_time": 0.5223437042236329, "loss": 0.5057069063186646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.28214101692218, "step_time": 0.47666527175903317} +{"epoch": 0, "iter": 15920, "iter_tflops": 40.21021524231026, "iter_time": 0.5130809020996093, "loss": 0.4688345789909363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.56406590404645, "step_time": 0.4735805320739745} +{"epoch": 0, "iter": 15921, "iter_tflops": 19.01812729476457, "iter_time": 1.084812042236328, "loss": 0.36418843269348145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.31514993381095, "step_time": 1.0155521163940429} +{"epoch": 0, "iter": 15922, "iter_tflops": 17.910005467672644, "iter_time": 1.1519311676025392, "loss": 0.2699887454509735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.015239819444393, "step_time": 0.9371278114318848} +{"epoch": 0, "iter": 15923, "iter_tflops": 45.40379594363382, "iter_time": 0.45439138031005855, "loss": 0.40274322032928467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95278861731609, "step_time": 0.4214487895965576} +{"epoch": 0, "iter": 15924, "iter_tflops": 48.12685723905224, "iter_time": 0.42868150329589844, "loss": 0.5619999170303345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20871589119577, "step_time": 0.39516569519042966} +{"epoch": 0, "iter": 15925, "iter_tflops": 17.794410683516215, "iter_time": 1.159414260864258, "loss": 0.17838206887245178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.729999555545984, "step_time": 1.1014999465942383} +{"epoch": 0, "iter": 15926, "iter_tflops": 13.744600902686695, "iter_time": 1.5010325622558593, "loss": 0.3249775767326355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.63447318254198, "step_time": 1.2402613105773925} +{"epoch": 0, "iter": 15927, "iter_tflops": 39.41690094555211, "iter_time": 0.5234072952270508, "loss": 0.259065181016922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.31787763284086, "step_time": 0.476272029876709} +{"epoch": 0, "iter": 15928, "iter_tflops": 43.958543552973495, "iter_time": 0.4693306884765625, "loss": 0.24653825163841248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17256932354037, "step_time": 0.4282747173309326} +{"epoch": 0, "iter": 15929, "iter_tflops": 21.114502544808573, "iter_time": 0.9201963806152343, "loss": 0.026113023981451988, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 22.5890226296026, "step_time": 0.860129680633545} +{"epoch": 0, "iter": 15930, "iter_tflops": 17.419927799312106, "iter_time": 1.1153598937988283, "loss": 0.02453511580824852, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 19.775515800832196, "step_time": 0.9825022525787352} +{"epoch": 0, "iter": 15931, "iter_tflops": 40.602996405343134, "iter_time": 0.4785235214233398, "loss": 0.0689266249537468, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 45.03324445238777, "step_time": 0.43144767951965335} +{"epoch": 0, "iter": 15932, "iter_tflops": 41.544994738190006, "iter_time": 0.46767339706420896, "loss": 0.09298599511384964, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 45.963035519584814, "step_time": 0.42271987915039066} +{"epoch": 0, "iter": 15933, "iter_tflops": 15.652323037496757, "iter_time": 1.3180850830078126, "loss": 0.618384838104248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.29991393596901, "step_time": 1.26571794128418} +{"epoch": 0, "iter": 15934, "iter_tflops": 13.776918761817045, "iter_time": 1.4975114440917965, "loss": 0.5526122450828552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.247521849725283, "step_time": 1.353078468322754} +{"epoch": 0, "iter": 15935, "iter_tflops": 10.731871343595717, "iter_time": 1.9224134216308593, "loss": 0.6741594076156616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.044707171451664, "step_time": 1.712876304626465} +{"epoch": 0, "iter": 15936, "iter_tflops": 37.09076621078054, "iter_time": 0.5562326049804688, "loss": 0.6877478957176208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49680400556075, "step_time": 0.5094499187469482} +{"epoch": 0, "iter": 15937, "iter_tflops": 14.19454995257473, "iter_time": 1.0444900817871094, "loss": 0.4579225480556488, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.113745777480506, "step_time": 0.9809657287597654} +{"epoch": 0, "iter": 15938, "iter_tflops": 14.609554277051163, "iter_time": 1.0148199157714843, "loss": 0.24899083375930786, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.46323009014723, "step_time": 0.8489876480102538} +{"epoch": 0, "iter": 15939, "iter_tflops": 27.43937557722014, "iter_time": 0.5403208465576171, "loss": 0.30405882000923157, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 29.18861306459195, "step_time": 0.5079400863647461} +{"epoch": 0, "iter": 15940, "iter_tflops": 27.171462429030722, "iter_time": 0.5456484603881837, "loss": 0.3958033323287964, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.815683710114403, "step_time": 0.5145137901306153} +{"epoch": 0, "iter": 15941, "iter_tflops": 35.08771184259241, "iter_time": 0.5879862899780274, "loss": 0.6713835000991821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.849338248778004, "step_time": 0.5450846557617186} +{"epoch": 0, "iter": 15942, "iter_tflops": 32.73817145767979, "iter_time": 0.6301846618652345, "loss": 0.530025839805603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.07288537069405, "step_time": 0.5719280090332031} +{"epoch": 0, "iter": 15943, "iter_tflops": 36.500869288751915, "iter_time": 0.5652219772338867, "loss": 0.5548482537269592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.63701991165664, "step_time": 0.5205006217956543} +{"epoch": 0, "iter": 15944, "iter_tflops": 37.15757097571492, "iter_time": 0.5552325668334961, "loss": 0.6129528880119324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17686154594322, "step_time": 0.5135068473815918} +{"epoch": 0, "iter": 15945, "iter_tflops": 25.175635966711344, "iter_time": 0.8194864883422851, "loss": 0.185262992978096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.349784772774157, "step_time": 0.7543420791625975} +{"epoch": 0, "iter": 15946, "iter_tflops": 45.62598807304068, "iter_time": 0.4521785583496094, "loss": 0.15237131714820862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44227375625551, "step_time": 0.4172763900756836} +{"epoch": 0, "iter": 15947, "iter_tflops": 48.1413438159752, "iter_time": 0.42855250549316404, "loss": 0.18484430015087128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.329439474126104, "step_time": 0.39425405120849605} +{"epoch": 0, "iter": 15948, "iter_tflops": 45.583069515784004, "iter_time": 0.452604305267334, "loss": 0.10459157079458237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.344966241563085, "step_time": 0.41809925270080567} +{"epoch": 0, "iter": 15949, "iter_tflops": 28.963839895677978, "iter_time": 0.7123051910400391, "loss": 0.351121187210083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.74971972589115, "step_time": 0.6709359855651856} +{"epoch": 0, "iter": 15950, "iter_tflops": 11.504034890922265, "iter_time": 1.79337890625, "loss": 0.4205048084259033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.197840018663578, "step_time": 1.453114944458008} +{"epoch": 0, "iter": 15951, "iter_tflops": 15.826928740553202, "iter_time": 1.303543716430664, "loss": 0.35874393582344055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.380271589031054, "step_time": 0.9218428573608398} +{"epoch": 0, "iter": 15952, "iter_tflops": 38.726735209465694, "iter_time": 0.5327351608276367, "loss": 0.36822423338890076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43206859810892, "step_time": 0.4862146530151368} +{"epoch": 0, "iter": 15953, "iter_tflops": 15.793526534655971, "iter_time": 1.052804443359375, "loss": 0.33399003744125366, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 17.094295074143883, "step_time": 0.9726926345825195} +{"epoch": 0, "iter": 15954, "iter_tflops": 18.70030627197405, "iter_time": 0.8891562881469727, "loss": 0.35673025250434875, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 20.791969105505583, "step_time": 0.79970756149292} +{"epoch": 0, "iter": 15955, "iter_tflops": 25.290321136578974, "iter_time": 0.6574647598266601, "loss": 0.3407857120037079, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 27.250548546396757, "step_time": 0.6101710166931151} +{"epoch": 0, "iter": 15956, "iter_tflops": 27.005056171888736, "iter_time": 0.6157178421020507, "loss": 0.40542376041412354, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 28.96458868914044, "step_time": 0.5740628700256347} +{"epoch": 0, "iter": 15957, "iter_tflops": 21.983590509948197, "iter_time": 0.9384769744873047, "loss": 0.02934182994067669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.617451326988775, "step_time": 0.8735529174804686} +{"epoch": 0, "iter": 15958, "iter_tflops": 27.561737789137492, "iter_time": 0.7485410995483399, "loss": 0.030899984762072563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.981892917475655, "step_time": 0.6659081020355224} +{"epoch": 0, "iter": 15959, "iter_tflops": 51.4030217588178, "iter_time": 0.40135954666137696, "loss": 0.01887083612382412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.12566181299358, "step_time": 0.367587532043457} +{"epoch": 0, "iter": 15960, "iter_tflops": 57.63668828327562, "iter_time": 0.35795071029663084, "loss": 0.022720810025930405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.15621670456059, "step_time": 0.3266676597595215} +{"epoch": 0, "iter": 15961, "iter_tflops": 20.847518368940204, "iter_time": 0.9896186752319337, "loss": 0.567101776599884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.822984921860833, "step_time": 0.9453836669921875} +{"epoch": 0, "iter": 15962, "iter_tflops": 14.208335209863321, "iter_time": 1.4520415802001951, "loss": 0.5686691999435425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.04647758759579, "step_time": 1.1432199668884278} +{"epoch": 0, "iter": 15963, "iter_tflops": 37.79930182694239, "iter_time": 0.5458062057495117, "loss": 0.6872657537460327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.141726341952136, "step_time": 0.5014639720916748} +{"epoch": 0, "iter": 15964, "iter_tflops": 41.22890903237999, "iter_time": 0.5004035758972168, "loss": 0.6811670660972595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90521099559457, "step_time": 0.4594365119934081} +{"epoch": 0, "iter": 15965, "iter_tflops": 18.281141429129516, "iter_time": 1.128545150756836, "loss": 0.2963097393512726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57490441659933, "step_time": 1.053956283569336} +{"epoch": 0, "iter": 15966, "iter_tflops": 23.000506071271996, "iter_time": 0.8969843292236329, "loss": 0.20483560860157013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.75878157491039, "step_time": 0.5769518032073975} +{"epoch": 0, "iter": 15967, "iter_tflops": 48.64077103332172, "iter_time": 0.42415227127075195, "loss": 0.2637779414653778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75246411386668, "step_time": 0.39109250831604003} +{"epoch": 0, "iter": 15968, "iter_tflops": 50.318527706764314, "iter_time": 0.410009880065918, "loss": 0.27545487880706787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.32131215677632, "step_time": 0.37979740715026855} +{"epoch": 0, "iter": 15969, "iter_tflops": 22.083952990968868, "iter_time": 0.9342119827270507, "loss": 0.040271054953336716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.156471448586714, "step_time": 0.890942886352539} +{"epoch": 0, "iter": 15970, "iter_tflops": 20.42083456987619, "iter_time": 1.0102962951660155, "loss": 0.03458872064948082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.09274544847332, "step_time": 0.822193552017212} +{"epoch": 0, "iter": 15971, "iter_tflops": 41.7883148224707, "iter_time": 0.4937048454284668, "loss": 0.014271477237343788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.61167992333924, "step_time": 0.4426163902282715} +{"epoch": 0, "iter": 15972, "iter_tflops": 43.49869433255482, "iter_time": 0.47429224777221674, "loss": 0.03479573130607605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.96210677724209, "step_time": 0.43015402984619133} +{"epoch": 0, "iter": 15973, "iter_tflops": 20.204511215517506, "iter_time": 1.0211132202148439, "loss": 0.5217616558074951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.442181759240583, "step_time": 0.962173240661621} +{"epoch": 0, "iter": 15974, "iter_tflops": 7.841522299764954, "iter_time": 2.6310061645507807, "loss": 0.4306797385215759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.69982377234031, "step_time": 1.9281713371276854} +{"epoch": 0, "iter": 15975, "iter_tflops": 14.933614637414644, "iter_time": 1.3815204162597656, "loss": 0.45680147409439087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.966188388954098, "step_time": 1.1483289089202882} +{"epoch": 0, "iter": 15976, "iter_tflops": 36.77828218319332, "iter_time": 0.5609585952758789, "loss": 0.4617934226989746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14673451163042, "step_time": 0.5138921947479247} +{"epoch": 0, "iter": 15977, "iter_tflops": 13.821327468267985, "iter_time": 1.1733516387939453, "loss": 0.19516834616661072, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 14.670189506756888, "step_time": 1.1054579238891602} +{"epoch": 0, "iter": 15978, "iter_tflops": 13.404428077040691, "iter_time": 1.209844772338867, "loss": 0.4808901846408844, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 16.86290748149871, "step_time": 0.9617129936218262} +{"epoch": 0, "iter": 15979, "iter_tflops": 29.071916011890917, "iter_time": 0.557833106994629, "loss": 0.37706026434898376, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.856040540788015, "step_time": 0.525578685760498} +{"epoch": 0, "iter": 15980, "iter_tflops": 29.74146557908741, "iter_time": 0.5452749862670898, "loss": 0.32616734504699707, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 31.519762570973, "step_time": 0.5145114021301269} +{"epoch": 0, "iter": 15981, "iter_tflops": 40.64344734205793, "iter_time": 0.5076118011474609, "loss": 0.20495326817035675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1420132152615, "step_time": 0.46737998580932616} +{"epoch": 0, "iter": 15982, "iter_tflops": 44.307448589812154, "iter_time": 0.46563488006591797, "loss": 0.2550673484802246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12659449975642, "step_time": 0.42868384361267087} +{"epoch": 0, "iter": 15983, "iter_tflops": 43.89339142549408, "iter_time": 0.470027328491211, "loss": 0.2030065804719925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.318087620328114, "step_time": 0.4360086078643799} +{"epoch": 0, "iter": 15984, "iter_tflops": 50.90586726683614, "iter_time": 0.4052792854309082, "loss": 0.22141703963279724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.50624396410861, "step_time": 0.37168959808349605} +{"epoch": 0, "iter": 15985, "iter_tflops": 22.871932357690238, "iter_time": 0.8223846588134767, "loss": 0.16718803346157074, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 24.104562412208743, "step_time": 0.7803305435180664} +{"epoch": 0, "iter": 15986, "iter_tflops": 14.793276358596064, "iter_time": 1.2714915771484374, "loss": 0.2566920816898346, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 19.449685857729726, "step_time": 0.9670863800048828} +{"epoch": 0, "iter": 15987, "iter_tflops": 46.22450537365142, "iter_time": 0.4069167671203613, "loss": 0.23064711689949036, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 50.1458373665337, "step_time": 0.37509646415710457} +{"epoch": 0, "iter": 15988, "iter_tflops": 44.75886581657761, "iter_time": 0.4202413520812988, "loss": 0.1988644003868103, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 48.223400729688166, "step_time": 0.390049768447876} +{"epoch": 0, "iter": 15989, "iter_tflops": 30.023403541321272, "iter_time": 0.6871670455932617, "loss": 0.3633589744567871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.78571591861829, "step_time": 0.6490680770874023} +{"epoch": 0, "iter": 15990, "iter_tflops": 15.420361464004381, "iter_time": 1.3379124450683595, "loss": 0.5494016408920288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.68591182297836, "step_time": 1.1040988368988036} +{"epoch": 0, "iter": 15991, "iter_tflops": 39.58691152215867, "iter_time": 0.5211594619750977, "loss": 0.44626373052597046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39418718443051, "step_time": 0.47543449592590326} +{"epoch": 0, "iter": 15992, "iter_tflops": 41.7444453689644, "iter_time": 0.49422368240356446, "loss": 0.30092698335647583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45872664965046, "step_time": 0.45384230995178226} +{"epoch": 0, "iter": 15993, "iter_tflops": 32.788396321391716, "iter_time": 0.629219352722168, "loss": 0.060337115079164505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.281094732970665, "step_time": 0.5686458377838135} +{"epoch": 0, "iter": 15994, "iter_tflops": 37.996516954678, "iter_time": 0.5429732818603517, "loss": 0.10310002416372299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84432360164333, "step_time": 0.4930440196990966} +{"epoch": 0, "iter": 15995, "iter_tflops": 43.44466980946623, "iter_time": 0.47488204193115235, "loss": 0.034307483583688736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.798823668420454, "step_time": 0.431623456954956} +{"epoch": 0, "iter": 15996, "iter_tflops": 47.328521489963116, "iter_time": 0.43591248703002927, "loss": 0.06693728268146515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.01427503328919, "step_time": 0.39664291191101075} +{"epoch": 0, "iter": 15997, "iter_tflops": 27.59963128241556, "iter_time": 0.7475133743286133, "loss": 0.07043135911226273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.799332924894067, "step_time": 0.6923340721130371} +{"epoch": 0, "iter": 15998, "iter_tflops": 9.818802343732566, "iter_time": 2.1011822814941405, "loss": 0.09762910008430481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.560070815478225, "step_time": 1.7846857376098633} +{"epoch": 0, "iter": 15999, "iter_tflops": 14.612238526048817, "iter_time": 1.411905059814453, "loss": 0.08168575167655945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.773999970623322, "step_time": 1.1607456703186037} +{"epoch": 0, "iter": 16000, "iter_tflops": 41.47406509364537, "iter_time": 0.4974456558227539, "loss": 0.09703528881072998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.8394493690037, "step_time": 0.45007289123535155} +{"epoch": 0, "iter": 16001, "iter_tflops": 18.684814028635664, "iter_time": 0.7934821624755859, "loss": 0.28156372904777527, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 19.968914228520315, "step_time": 0.7424573249816895} +{"epoch": 0, "iter": 16002, "iter_tflops": 6.192024182908581, "iter_time": 2.3943812561035154, "loss": 0.3093295395374298, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 7.728991524359208, "step_time": 1.918240768432617} +{"epoch": 0, "iter": 16003, "iter_tflops": 8.182896466987339, "iter_time": 1.8118360290527344, "loss": 0.38913172483444214, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 9.267000191069044, "step_time": 1.5998776664733887} +{"epoch": 0, "iter": 16004, "iter_tflops": 19.751507505768824, "iter_time": 0.7506296234130859, "loss": 0.2615493834018707, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 21.868676151036787, "step_time": 0.6779590377807617} +{"epoch": 0, "iter": 16005, "iter_tflops": 13.534645010814751, "iter_time": 1.0923988952636718, "loss": 0.24348093569278717, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 14.271359845360516, "step_time": 1.0360071792602539} +{"epoch": 0, "iter": 16006, "iter_tflops": 6.800821594950451, "iter_time": 2.1740360412597655, "loss": 0.37584465742111206, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 8.014984309194993, "step_time": 1.844698715209961} +{"epoch": 0, "iter": 16007, "iter_tflops": 10.075801215837338, "iter_time": 1.4674000549316406, "loss": 0.5293256044387817, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 12.62391072784576, "step_time": 1.1712084770202638} +{"epoch": 0, "iter": 16008, "iter_tflops": 26.74349445629442, "iter_time": 0.5528533782958984, "loss": 0.4387188255786896, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.428226140265416, "step_time": 0.520089828491211} +{"epoch": 0, "iter": 16009, "iter_tflops": 26.343835064156014, "iter_time": 0.6842498245239257, "loss": 0.34145164489746094, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 28.311328270576645, "step_time": 0.6366979446411133} +{"epoch": 0, "iter": 16010, "iter_tflops": 17.00079096752307, "iter_time": 1.0602897567749023, "loss": 0.49776333570480347, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 20.14982008131267, "step_time": 0.8945868721008301} +{"epoch": 0, "iter": 16011, "iter_tflops": 28.584188154619817, "iter_time": 0.630620132446289, "loss": 0.34678035974502563, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 30.730547775209807, "step_time": 0.5865747871398926} +{"epoch": 0, "iter": 16012, "iter_tflops": 30.68138578596137, "iter_time": 0.5875146789550781, "loss": 0.33381178975105286, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 32.502682336371144, "step_time": 0.5545931358337403} +{"epoch": 0, "iter": 16013, "iter_tflops": 25.529834421470124, "iter_time": 0.8081170120239258, "loss": 0.023726681247353554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.911965672076057, "step_time": 0.7666141433715821} +{"epoch": 0, "iter": 16014, "iter_tflops": 21.97133380904976, "iter_time": 0.939000503540039, "loss": 0.018190598115324974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.26558099519838, "step_time": 0.7566716995239258} +{"epoch": 0, "iter": 16015, "iter_tflops": 46.93313377326101, "iter_time": 0.43958482742309574, "loss": 0.03547872230410576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.6633855587368, "step_time": 0.3993368473052979} +{"epoch": 0, "iter": 16016, "iter_tflops": 48.491710960484724, "iter_time": 0.4254560852050781, "loss": 0.027130957692861557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25273963258968, "step_time": 0.3874184436798096} +{"epoch": 0, "iter": 16017, "iter_tflops": 31.86270619536315, "iter_time": 0.6474997253417969, "loss": 0.7112321853637695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.80102265454287, "step_time": 0.5928300933837891} +{"epoch": 0, "iter": 16018, "iter_tflops": 36.43735149095474, "iter_time": 0.566207275390625, "loss": 0.7306561470031738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.91478087066188, "step_time": 0.5168785362243653} +{"epoch": 0, "iter": 16019, "iter_tflops": 38.59507290531962, "iter_time": 0.5345525207519531, "loss": 0.6523478031158447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.85195061940089, "step_time": 0.4929541683197022} +{"epoch": 0, "iter": 16020, "iter_tflops": 36.59743276173405, "iter_time": 0.5637306213378906, "loss": 0.590401828289032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.881175503167434, "step_time": 0.5173140773773193} +{"epoch": 0, "iter": 16021, "iter_tflops": 17.51075889170961, "iter_time": 1.178195281982422, "loss": 0.5092945098876953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.636515568668933, "step_time": 1.1070252609252929} +{"epoch": 0, "iter": 16022, "iter_tflops": 28.01762541497088, "iter_time": 0.73636124420166, "loss": 0.7003849148750305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.79599527292372, "step_time": 0.5763520011901856} +{"epoch": 0, "iter": 16023, "iter_tflops": 43.592017244428426, "iter_time": 0.4732768707275391, "loss": 0.7559700012207031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93274482063328, "step_time": 0.4395884704589844} +{"epoch": 0, "iter": 16024, "iter_tflops": 38.66689528787874, "iter_time": 0.5335596084594727, "loss": 0.5732628107070923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39602612838533, "step_time": 0.498383430480957} +{"epoch": 0, "iter": 16025, "iter_tflops": 43.117141227067904, "iter_time": 0.47848936462402347, "loss": 0.0404667928814888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28144623955575, "step_time": 0.4363464984893799} +{"epoch": 0, "iter": 16026, "iter_tflops": 51.04161467168527, "iter_time": 0.40420142745971677, "loss": 0.05744548514485359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.71044380115857, "step_time": 0.37032721519470213} +{"epoch": 0, "iter": 16027, "iter_tflops": 53.424635007808156, "iter_time": 0.3861719131469727, "loss": 0.041160114109516144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.27917029559136, "step_time": 0.3540045852661133} +{"epoch": 0, "iter": 16028, "iter_tflops": 52.69806828406715, "iter_time": 0.3914962005615234, "loss": 0.0560767687857151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.464906822059504, "step_time": 0.3590207424163818} +{"epoch": 0, "iter": 16029, "iter_tflops": 33.802224025338326, "iter_time": 0.6103472213745117, "loss": 0.4071333408355713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.184780824444005, "step_time": 0.5701594161987305} +{"epoch": 0, "iter": 16030, "iter_tflops": 32.75648742758407, "iter_time": 0.6298322906494142, "loss": 0.534248948097229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.18749881148433, "step_time": 0.5701165924072266} +{"epoch": 0, "iter": 16031, "iter_tflops": 40.32915260435194, "iter_time": 0.511567741394043, "loss": 0.5462672114372253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13771907615995, "step_time": 0.4674254570007324} +{"epoch": 0, "iter": 16032, "iter_tflops": 38.44085491654411, "iter_time": 0.5366970520019531, "loss": 0.40720075368881226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1480486629676, "step_time": 0.48949107170104983} +{"epoch": 0, "iter": 16033, "iter_tflops": 37.72509077536842, "iter_time": 0.5468798904418946, "loss": 0.10576746612787247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.91020752067622, "step_time": 0.49226894187927245} +{"epoch": 0, "iter": 16034, "iter_tflops": 41.36398087639388, "iter_time": 0.49876953506469734, "loss": 0.052892401814460754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.860029832813574, "step_time": 0.440270601272583} +{"epoch": 0, "iter": 16035, "iter_tflops": 41.300520263029405, "iter_time": 0.49953592300415034, "loss": 0.05742868781089783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57607853982233, "step_time": 0.4526737308502197} +{"epoch": 0, "iter": 16036, "iter_tflops": 40.929480109887926, "iter_time": 0.5040643920898438, "loss": 0.08685214817523956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.81765845639152, "step_time": 0.4603340339660644} +{"epoch": 0, "iter": 16037, "iter_tflops": 29.925975058225465, "iter_time": 0.6894042205810547, "loss": 0.027999771758913994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.562154148628004, "step_time": 0.6335911750793457} +{"epoch": 0, "iter": 16038, "iter_tflops": 50.62528410929166, "iter_time": 0.4075254859924316, "loss": 0.02782326750457287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.7106876955976, "step_time": 0.37032559394836423} +{"epoch": 0, "iter": 16039, "iter_tflops": 50.92607058998685, "iter_time": 0.4051185035705567, "loss": 0.030765490606427193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.4074091886176, "step_time": 0.37235261154174804} +{"epoch": 0, "iter": 16040, "iter_tflops": 49.43208198922101, "iter_time": 0.4173624229431152, "loss": 0.045191530138254166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84565118088407, "step_time": 0.38315245628356936} +{"epoch": 0, "iter": 16041, "iter_tflops": 32.3262162716869, "iter_time": 0.6382155380249023, "loss": 0.17419631779193878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.495420686693684, "step_time": 0.5980820960998535} +{"epoch": 0, "iter": 16042, "iter_tflops": 14.42605108586738, "iter_time": 1.4301275787353518, "loss": 0.19258692860603333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.322167226806698, "step_time": 1.1910226497650147} +{"epoch": 0, "iter": 16043, "iter_tflops": 49.21502525464589, "iter_time": 0.4192031478881836, "loss": 0.33658358454704285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.647862674482745, "step_time": 0.3845650596618652} +{"epoch": 0, "iter": 16044, "iter_tflops": 45.15902590218473, "iter_time": 0.4568542633056641, "loss": 0.270645409822464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.656404109711566, "step_time": 0.42401599311828614} +{"epoch": 0, "iter": 16045, "iter_tflops": 42.22979023429061, "iter_time": 0.48854359436035155, "loss": 0.603186845779419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.884618047378694, "step_time": 0.4496298408508301} +{"epoch": 0, "iter": 16046, "iter_tflops": 41.604663691598965, "iter_time": 0.4958841552734375, "loss": 0.5870197415351868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96659914498129, "step_time": 0.45880929183959956} +{"epoch": 0, "iter": 16047, "iter_tflops": 45.7908202592947, "iter_time": 0.45055086135864264, "loss": 0.8441755175590515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.3701729001945, "step_time": 0.41788578605651855} +{"epoch": 0, "iter": 16048, "iter_tflops": 44.43711354898785, "iter_time": 0.46427618408203125, "loss": 0.6022472381591797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86048871322403, "step_time": 0.4310673389434814} +{"epoch": 0, "iter": 16049, "iter_tflops": 30.094654694581838, "iter_time": 0.6855401306152343, "loss": 0.4396387040615082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.897424851798718, "step_time": 0.6467949562072753} +{"epoch": 0, "iter": 16050, "iter_tflops": 10.670207496768231, "iter_time": 1.933523178100586, "loss": 0.4841321110725403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.157746000671494, "step_time": 1.5679808311462402} +{"epoch": 0, "iter": 16051, "iter_tflops": 16.2272055709528, "iter_time": 1.271389175415039, "loss": 0.5298093557357788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.884074370363994, "step_time": 1.0925128288269044} +{"epoch": 0, "iter": 16052, "iter_tflops": 34.248577349730525, "iter_time": 0.6023927154541016, "loss": 0.7015776038169861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6675829723553, "step_time": 0.4724578762054443} +{"epoch": 0, "iter": 16053, "iter_tflops": 23.721787154345815, "iter_time": 0.6129520492553712, "loss": 0.1708790510892868, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 25.522760857371274, "step_time": 0.5697000465393066} +{"epoch": 0, "iter": 16054, "iter_tflops": 21.96165867637889, "iter_time": 0.662077407836914, "loss": 0.32455015182495117, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.671475195400376, "step_time": 0.6142548332214356} +{"epoch": 0, "iter": 16055, "iter_tflops": 22.616456566332594, "iter_time": 0.6429087600708008, "loss": 0.31485116481781006, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.413065218850363, "step_time": 0.5955957565307617} +{"epoch": 0, "iter": 16056, "iter_tflops": 21.623434087075506, "iter_time": 0.6724333419799804, "loss": 0.40437668561935425, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.0229000940528, "step_time": 0.631558925628662} +{"epoch": 0, "iter": 16057, "iter_tflops": 19.008561283699873, "iter_time": 1.0853579711914063, "loss": 0.8220240473747253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.885245881160653, "step_time": 0.9878310089111328} +{"epoch": 0, "iter": 16058, "iter_tflops": 38.308844362121086, "iter_time": 0.538546485900879, "loss": 0.695399284362793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05550555649676, "step_time": 0.4791743412017822} +{"epoch": 0, "iter": 16059, "iter_tflops": 45.517774208471195, "iter_time": 0.4532535667419434, "loss": 0.6745728254318237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46906541994367, "step_time": 0.4170503997802735} +{"epoch": 0, "iter": 16060, "iter_tflops": 42.91720873144438, "iter_time": 0.4807184371948242, "loss": 0.5826249718666077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94434800171944, "step_time": 0.4490452995300293} +{"epoch": 0, "iter": 16061, "iter_tflops": 24.095194460390744, "iter_time": 0.8562327041625977, "loss": 0.6329743266105652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.33456089342703, "step_time": 0.8143458099365234} +{"epoch": 0, "iter": 16062, "iter_tflops": 12.902741788925036, "iter_time": 1.5989697265625, "loss": 0.4811197519302368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.05094056219017, "step_time": 1.2853510627746583} +{"epoch": 0, "iter": 16063, "iter_tflops": 29.033513848722, "iter_time": 0.7105958175659179, "loss": 0.6189665794372559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.3613928532378, "step_time": 0.5834355449676514} +{"epoch": 0, "iter": 16064, "iter_tflops": 36.85533626880666, "iter_time": 0.5597857894897461, "loss": 0.5424844026565552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23560879309673, "step_time": 0.5127570858001709} +{"epoch": 0, "iter": 16065, "iter_tflops": 19.6366398470035, "iter_time": 1.0506427612304687, "loss": 0.5736517906188965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.895341173826246, "step_time": 0.9873537521362304} +{"epoch": 0, "iter": 16066, "iter_tflops": 14.549914418603265, "iter_time": 1.4179529113769531, "loss": 0.5571622252464294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.14539055289777, "step_time": 1.1369881210327148} +{"epoch": 0, "iter": 16067, "iter_tflops": 43.640027831175864, "iter_time": 0.47275619506835936, "loss": 0.6011204719543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.9781318217639, "step_time": 0.43916377067565915} +{"epoch": 0, "iter": 16068, "iter_tflops": 47.71831920816795, "iter_time": 0.4323516387939453, "loss": 0.5184959173202515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47505180370771, "step_time": 0.4007979164123535} +{"epoch": 0, "iter": 16069, "iter_tflops": 37.05846418428899, "iter_time": 0.5567174453735351, "loss": 0.5084795951843262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.661586960305286, "step_time": 0.5201782150268555} +{"epoch": 0, "iter": 16070, "iter_tflops": 11.42040468938219, "iter_time": 1.8065115966796874, "loss": 0.5802261233329773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.935235355026805, "step_time": 1.4804983901977538} +{"epoch": 0, "iter": 16071, "iter_tflops": 11.371044301182161, "iter_time": 1.8143534545898437, "loss": 0.74498051404953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.837869063229626, "step_time": 1.3904350700378416} +{"epoch": 0, "iter": 16072, "iter_tflops": 17.343456104509404, "iter_time": 1.1895606842041015, "loss": 0.6242581009864807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.457091952504747, "step_time": 1.0085056838989257} +{"epoch": 0, "iter": 16073, "iter_tflops": 17.84561842804971, "iter_time": 0.8010691680908204, "loss": 0.3256132900714874, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 19.479838631696477, "step_time": 0.7338651504516601} +{"epoch": 0, "iter": 16074, "iter_tflops": 21.110512566251234, "iter_time": 0.6771780014038086, "loss": 0.3939371705055237, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.77294347580129, "step_time": 0.6277438278198243} +{"epoch": 0, "iter": 16075, "iter_tflops": 21.379386199949444, "iter_time": 0.6686616058349609, "loss": 0.29057827591896057, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.93779299900491, "step_time": 0.6232323532104492} +{"epoch": 0, "iter": 16076, "iter_tflops": 22.45136500803595, "iter_time": 0.6367352142333985, "loss": 0.3599991500377655, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.1242580811257, "step_time": 0.5925809059143066} +{"epoch": 0, "iter": 16077, "iter_tflops": 15.686052890275649, "iter_time": 1.3152507934570314, "loss": 0.6075945496559143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.631769709030273, "step_time": 1.2404629135131837} +{"epoch": 0, "iter": 16078, "iter_tflops": 21.94276821672025, "iter_time": 0.9402229156494142, "loss": 0.7701853513717651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.72232438998882, "step_time": 0.6941278629302978} +{"epoch": 0, "iter": 16079, "iter_tflops": 42.19152725522862, "iter_time": 0.4889866485595703, "loss": 0.799650251865387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.33657017942958, "step_time": 0.4550651588439941} +{"epoch": 0, "iter": 16080, "iter_tflops": 45.90267414256254, "iter_time": 0.4494529762268066, "loss": 0.7049946188926697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.36567801380079, "step_time": 0.41792383575439457} +{"epoch": 0, "iter": 16081, "iter_tflops": 41.19203623336222, "iter_time": 0.5008515090942383, "loss": 0.546196699142456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64505325536199, "step_time": 0.4621137619018555} +{"epoch": 0, "iter": 16082, "iter_tflops": 48.7880940971773, "iter_time": 0.42287147903442385, "loss": 0.5462638139724731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.327891641827094, "step_time": 0.38687247657775875} +{"epoch": 0, "iter": 16083, "iter_tflops": 47.96301871956641, "iter_time": 0.4301458511352539, "loss": 0.43488696217536926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72841163883696, "step_time": 0.3988348541259766} +{"epoch": 0, "iter": 16084, "iter_tflops": 50.51602554532918, "iter_time": 0.40840690231323246, "loss": 0.39547187089920044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.49006415791445, "step_time": 0.37862120056152343} +{"epoch": 0, "iter": 16085, "iter_tflops": 28.18512064677716, "iter_time": 0.7319852828979492, "loss": 0.4495600759983063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.888916397892807, "step_time": 0.6902589988708496} +{"epoch": 0, "iter": 16086, "iter_tflops": 17.195178961813962, "iter_time": 1.1998184814453123, "loss": 0.44110339879989624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.96203260613177, "step_time": 1.0880212020874023} +{"epoch": 0, "iter": 16087, "iter_tflops": 36.33072417141011, "iter_time": 0.5678690414428711, "loss": 0.5146772861480713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76161733947068, "step_time": 0.5188695755004883} +{"epoch": 0, "iter": 16088, "iter_tflops": 39.241857107706075, "iter_time": 0.5257420272827149, "loss": 0.5083373188972473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79943658040723, "step_time": 0.4820412406921387} +{"epoch": 0, "iter": 16089, "iter_tflops": 20.010131225766578, "iter_time": 1.0310323944091797, "loss": 0.60209059715271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.507273020809492, "step_time": 0.9592612457275391} +{"epoch": 0, "iter": 16090, "iter_tflops": 19.33239857350138, "iter_time": 1.0671771240234376, "loss": 0.433441162109375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.583499140167177, "step_time": 0.8748105354309081} +{"epoch": 0, "iter": 16091, "iter_tflops": 43.135370910622704, "iter_time": 0.47828714752197266, "loss": 0.5846071839332581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.55537595276255, "step_time": 0.44315168952941897} +{"epoch": 0, "iter": 16092, "iter_tflops": 41.75725058726521, "iter_time": 0.4940721244812012, "loss": 0.43402841687202454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.603409903139095, "step_time": 0.4625452079772949} +{"epoch": 0, "iter": 16093, "iter_tflops": 31.60034343913462, "iter_time": 0.6528756103515625, "loss": 0.610791027545929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.645115568782195, "step_time": 0.6131972846984863} +{"epoch": 0, "iter": 16094, "iter_tflops": 15.46488471949052, "iter_time": 1.3340606079101562, "loss": 0.5638380646705627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.169529165302013, "step_time": 1.1354776077270508} +{"epoch": 0, "iter": 16095, "iter_tflops": 47.12267291651824, "iter_time": 0.43781670761108393, "loss": 0.512234091758728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.16957582133423, "step_time": 0.40319062995910643} +{"epoch": 0, "iter": 16096, "iter_tflops": 48.40681753476658, "iter_time": 0.4262022285461426, "loss": 0.48366880416870117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09859501593852, "step_time": 0.39600095748901365} +{"epoch": 0, "iter": 16097, "iter_tflops": 35.42690503174801, "iter_time": 0.5823566436767578, "loss": 0.2488659769296646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.32589839742369, "step_time": 0.5383068466186524} +{"epoch": 0, "iter": 16098, "iter_tflops": 10.132038448985334, "iter_time": 2.036223373413086, "loss": 0.17590872943401337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.75326652663452, "step_time": 1.755349754333496} +{"epoch": 0, "iter": 16099, "iter_tflops": 15.509537728886729, "iter_time": 1.330219757080078, "loss": 0.2332180142402649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.73495626218843, "step_time": 1.1012085227966308} +{"epoch": 0, "iter": 16100, "iter_tflops": 38.682610661523455, "iter_time": 0.5333428421020507, "loss": 0.19539761543273926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52595813142227, "step_time": 0.4851411800384521} +{"epoch": 0, "iter": 16101, "iter_tflops": 15.558302024283389, "iter_time": 1.0028623580932616, "loss": 0.25546854734420776, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.582758682028377, "step_time": 0.9409071044921875} +{"epoch": 0, "iter": 16102, "iter_tflops": 12.196387687499032, "iter_time": 1.2792997283935545, "loss": 0.3608841896057129, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 14.925400516051406, "step_time": 1.04538805770874} +{"epoch": 0, "iter": 16103, "iter_tflops": 24.59489840425517, "iter_time": 0.6343931655883789, "loss": 0.32789671421051025, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.434294644437745, "step_time": 0.5902497367858887} +{"epoch": 0, "iter": 16104, "iter_tflops": 24.63794144702548, "iter_time": 0.6332848663330078, "loss": 0.4390384554862976, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.405493912274064, "step_time": 0.5908935279846191} +{"epoch": 0, "iter": 16105, "iter_tflops": 19.213904734222957, "iter_time": 1.0737584991455078, "loss": 0.5266429781913757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.80780803311617, "step_time": 0.9915072975158692} +{"epoch": 0, "iter": 16106, "iter_tflops": 13.429723532991584, "iter_time": 1.5362262268066407, "loss": 0.6904769539833069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.922709371832442, "step_time": 1.2957024478912353} +{"epoch": 0, "iter": 16107, "iter_tflops": 37.8978554444733, "iter_time": 0.544386833190918, "loss": 0.5265282988548279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52814750659678, "step_time": 0.49679782867431643} +{"epoch": 0, "iter": 16108, "iter_tflops": 40.009513073023236, "iter_time": 0.5156547012329101, "loss": 0.5570939779281616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59281923735905, "step_time": 0.47326816368103025} +{"epoch": 0, "iter": 16109, "iter_tflops": 15.37963641905985, "iter_time": 1.1158575439453124, "loss": 0.004083238076418638, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 16.6653727759924, "step_time": 1.0297689437866213} +{"epoch": 0, "iter": 16110, "iter_tflops": 32.05494205004029, "iter_time": 0.5353771438598633, "loss": 0.008206285536289215, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 39.55444258449911, "step_time": 0.4338699321746826} +{"epoch": 0, "iter": 16111, "iter_tflops": 35.22322957628925, "iter_time": 0.48722060775756837, "loss": 0.01457906886935234, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 38.98448923557254, "step_time": 0.44021311187744144} +{"epoch": 0, "iter": 16112, "iter_tflops": 36.40684315910232, "iter_time": 0.4713807029724121, "loss": 0.014953230507671833, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 40.42561406675326, "step_time": 0.4245200405120849} +{"epoch": 0, "iter": 16113, "iter_tflops": 21.217435441377578, "iter_time": 0.9723650894165039, "loss": 0.16464337706565857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.613111740755247, "step_time": 0.912350929260254} +{"epoch": 0, "iter": 16114, "iter_tflops": 19.802653304343057, "iter_time": 1.0418348083496094, "loss": 0.15003159642219543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.63070407544342, "step_time": 0.8376168804168702} +{"epoch": 0, "iter": 16115, "iter_tflops": 51.120488904028285, "iter_time": 0.4035777816772461, "loss": 0.1452452391386032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.59253071550631, "step_time": 0.3711126880645752} +{"epoch": 0, "iter": 16116, "iter_tflops": 52.81845127285428, "iter_time": 0.3906039085388184, "loss": 0.14470888674259186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.14825257754947, "step_time": 0.3610100498199463} +{"epoch": 0, "iter": 16117, "iter_tflops": 45.99716670613242, "iter_time": 0.4485296592712402, "loss": 0.3455515205860138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40316392441215, "step_time": 0.4093213977813721} +{"epoch": 0, "iter": 16118, "iter_tflops": 43.890026335174376, "iter_time": 0.4700633659362793, "loss": 0.32326579093933105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61058769447331, "step_time": 0.433329948425293} +{"epoch": 0, "iter": 16119, "iter_tflops": 48.15972676090891, "iter_time": 0.42838892364501946, "loss": 0.25781017541885376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.383626810908375, "step_time": 0.39384622192382807} +{"epoch": 0, "iter": 16120, "iter_tflops": 53.06965533137539, "iter_time": 0.3887549934387207, "loss": 0.4085725247859955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.14919614062669, "step_time": 0.36100408935546874} +{"epoch": 0, "iter": 16121, "iter_tflops": 39.93848134213773, "iter_time": 0.5165718078613282, "loss": 0.6196653246879578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53932472910973, "step_time": 0.4738496437072754} +{"epoch": 0, "iter": 16122, "iter_tflops": 46.4840612398374, "iter_time": 0.44383156204223634, "loss": 0.6156379580497742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.269141876848984, "step_time": 0.41041268539428716} +{"epoch": 0, "iter": 16123, "iter_tflops": 47.98279734119208, "iter_time": 0.42996854400634765, "loss": 0.6293512582778931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44229531756809, "step_time": 0.4010531291961669} +{"epoch": 0, "iter": 16124, "iter_tflops": 43.98585518992744, "iter_time": 0.4690392723083496, "loss": 0.4859190583229065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32528120792644, "step_time": 0.43594233322143555} +{"epoch": 0, "iter": 16125, "iter_tflops": 30.272911054233518, "iter_time": 0.6815034561157227, "loss": 0.36562180519104004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.10121565786646, "step_time": 0.6426888542175293} +{"epoch": 0, "iter": 16126, "iter_tflops": 13.843726607121361, "iter_time": 1.49028466796875, "loss": 0.41303765773773193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.016019252314432, "step_time": 1.2881536407470704} +{"epoch": 0, "iter": 16127, "iter_tflops": 43.44869820559686, "iter_time": 0.47483801269531256, "loss": 0.4264240264892578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99370813177954, "step_time": 0.43901820755004883} +{"epoch": 0, "iter": 16128, "iter_tflops": 48.453296138799864, "iter_time": 0.4257933959960937, "loss": 0.5223796963691711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.727396595309564, "step_time": 0.39127844047546384} +{"epoch": 0, "iter": 16129, "iter_tflops": 41.25543806472936, "iter_time": 0.5000817947387696, "loss": 0.04323519021272659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80391511149767, "step_time": 0.46047523880004887} +{"epoch": 0, "iter": 16130, "iter_tflops": 11.488608228810532, "iter_time": 1.7957870178222657, "loss": 0.062442366033792496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.330129994126636, "step_time": 1.5477038497924804} +{"epoch": 0, "iter": 16131, "iter_tflops": 13.735507723674928, "iter_time": 1.5020262756347655, "loss": 0.08869640529155731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.209051249065764, "step_time": 1.35650101852417} +{"epoch": 0, "iter": 16132, "iter_tflops": 40.7769336698137, "iter_time": 0.5059500961303711, "loss": 0.0753144919872284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80485140069463, "step_time": 0.4604656162261963} +{"epoch": 0, "iter": 16133, "iter_tflops": 24.439548287536756, "iter_time": 0.5999600830078126, "loss": 0.22007302939891815, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 26.22797735064114, "step_time": 0.5590501022338867} +{"epoch": 0, "iter": 16134, "iter_tflops": 20.422881314131466, "iter_time": 0.7179571380615235, "loss": 0.4043242931365967, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 22.100606315073282, "step_time": 0.6634548034667969} +{"epoch": 0, "iter": 16135, "iter_tflops": 23.06420760045436, "iter_time": 0.6357362747192382, "loss": 0.3300932049751282, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.785495165173458, "step_time": 0.5915860595703124} +{"epoch": 0, "iter": 16136, "iter_tflops": 22.688477655533262, "iter_time": 0.6462643127441406, "loss": 0.3616281747817993, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.367946074169975, "step_time": 0.6017229919433593} +{"epoch": 0, "iter": 16137, "iter_tflops": 15.683679494059318, "iter_time": 1.3154498291015624, "loss": 0.09821843355894089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.7655735416087, "step_time": 1.2305629425048827} +{"epoch": 0, "iter": 16138, "iter_tflops": 22.752378940186677, "iter_time": 0.9067664337158203, "loss": 0.06332442909479141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.11351515678507, "step_time": 0.7338496589660645} +{"epoch": 0, "iter": 16139, "iter_tflops": 40.250362337112, "iter_time": 0.5125691375732422, "loss": 0.044714342802762985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46725979635109, "step_time": 0.46396143150329594} +{"epoch": 0, "iter": 16140, "iter_tflops": 44.49842486731469, "iter_time": 0.4636364898681641, "loss": 0.08914745599031448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.98697100140767, "step_time": 0.4211547088623047} +{"epoch": 0, "iter": 16141, "iter_tflops": 18.8527056678574, "iter_time": 1.0943306427001955, "loss": 0.642850399017334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.034153772158994, "step_time": 1.0297961044311523} +{"epoch": 0, "iter": 16142, "iter_tflops": 20.789596098624514, "iter_time": 0.9923758697509765, "loss": 0.47730520367622375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.294619432830682, "step_time": 0.8156317024230957} +{"epoch": 0, "iter": 16143, "iter_tflops": 46.26183953774741, "iter_time": 0.44596353530883787, "loss": 0.49164894223213196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.02971035485723, "step_time": 0.41237683296203614} +{"epoch": 0, "iter": 16144, "iter_tflops": 50.347172883769815, "iter_time": 0.4097766036987304, "loss": 0.5736474990844727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.552646757381865, "step_time": 0.3781868476867675} +{"epoch": 0, "iter": 16145, "iter_tflops": 31.196181393414218, "iter_time": 0.6613339385986328, "loss": 0.1711791306734085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.134707953303554, "step_time": 0.6226429862976074} +{"epoch": 0, "iter": 16146, "iter_tflops": 11.459885459409746, "iter_time": 1.8002879333496093, "loss": 0.26371416449546814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.085633178816279, "step_time": 1.464690528869629} +{"epoch": 0, "iter": 16147, "iter_tflops": 13.436464845789319, "iter_time": 1.5354554748535156, "loss": 0.2725487947463989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.861012616085661, "step_time": 1.3007425193786621} +{"epoch": 0, "iter": 16148, "iter_tflops": 45.94924175305978, "iter_time": 0.4489974746704101, "loss": 0.15651027858257294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.29125442631629, "step_time": 0.4102322311401367} +{"epoch": 0, "iter": 16149, "iter_tflops": 21.764871267917567, "iter_time": 0.7338142700195314, "loss": 0.4920475482940674, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 23.08391095303901, "step_time": 0.6918833274841308} +{"epoch": 0, "iter": 16150, "iter_tflops": 10.721761215969964, "iter_time": 1.4896221618652343, "loss": 0.293015718460083, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.252570041968287, "step_time": 1.3035120849609374} +{"epoch": 0, "iter": 16151, "iter_tflops": 9.416629867792677, "iter_time": 1.6960816497802735, "loss": 0.31445714831352234, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 10.696446640453814, "step_time": 1.4931475524902342} +{"epoch": 0, "iter": 16152, "iter_tflops": 11.463396228102958, "iter_time": 1.3932496795654297, "loss": 0.3866000771522522, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 13.111067901683986, "step_time": 1.2181595916748047} +{"epoch": 0, "iter": 16153, "iter_tflops": 12.702502551043265, "iter_time": 1.1061600799560547, "loss": 0.45287665724754333, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 16.407695008175015, "step_time": 0.8563665542602539} +{"epoch": 0, "iter": 16154, "iter_tflops": 5.789342076205796, "iter_time": 2.427046295166016, "loss": 0.3674773871898651, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 7.053983758293847, "step_time": 1.9919242401123043} +{"epoch": 0, "iter": 16155, "iter_tflops": 7.838984421042694, "iter_time": 1.7924517364501955, "loss": 0.2772165834903717, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 9.50925512859264, "step_time": 1.4776132354736329} +{"epoch": 0, "iter": 16156, "iter_tflops": 24.029406385267468, "iter_time": 0.5847419204711913, "loss": 0.45167019963264465, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 25.603033358908547, "step_time": 0.5488022079467774} +{"epoch": 0, "iter": 16157, "iter_tflops": 22.46941439232097, "iter_time": 0.7967349777221681, "loss": 0.4159592390060425, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 24.097112091306883, "step_time": 0.7429175872802734} +{"epoch": 0, "iter": 16158, "iter_tflops": 8.458406224750606, "iter_time": 2.1164942779541014, "loss": 0.532088041305542, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 10.974931024872262, "step_time": 1.6311873245239257} +{"epoch": 0, "iter": 16159, "iter_tflops": 28.302320716719237, "iter_time": 0.6325335845947266, "loss": 0.3264940083026886, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 30.425675437979958, "step_time": 0.5883901710510254} +{"epoch": 0, "iter": 16160, "iter_tflops": 29.649835188321124, "iter_time": 0.6037864379882812, "loss": 0.2662239968776703, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 31.721528541660998, "step_time": 0.5643539009094238} +{"epoch": 0, "iter": 16161, "iter_tflops": 20.033613664694915, "iter_time": 1.0298238677978515, "loss": 0.6236674785614014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.53040412730184, "step_time": 0.9582306671142578} +{"epoch": 0, "iter": 16162, "iter_tflops": 17.544803181059837, "iter_time": 1.1759090881347656, "loss": 0.6121023893356323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.323286411647572, "step_time": 0.9675381698608398} +{"epoch": 0, "iter": 16163, "iter_tflops": 34.473315224026884, "iter_time": 0.5984656066894531, "loss": 0.7806170582771301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.58170915997792, "step_time": 0.548966344833374} +{"epoch": 0, "iter": 16164, "iter_tflops": 34.862116155837654, "iter_time": 0.5917911987304688, "loss": 0.5915030241012573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.942209661469256, "step_time": 0.5437504482269286} +{"epoch": 0, "iter": 16165, "iter_tflops": 19.491176713343243, "iter_time": 1.0584837341308595, "loss": 0.23598995804786682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.570020298783582, "step_time": 1.0029690399169922} +{"epoch": 0, "iter": 16166, "iter_tflops": 21.502831269738728, "iter_time": 0.9594593963623047, "loss": 0.21878331899642944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.676834680526273, "step_time": 0.6951918468475341} +{"epoch": 0, "iter": 16167, "iter_tflops": 48.27353494066194, "iter_time": 0.4273789672851563, "loss": 0.17287090420722961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16149651196192, "step_time": 0.39552342033386234} +{"epoch": 0, "iter": 16168, "iter_tflops": 52.0884773067414, "iter_time": 0.39607787704467773, "loss": 0.2098308652639389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.57221459135321, "step_time": 0.36468597984313966} +{"epoch": 0, "iter": 16169, "iter_tflops": 29.322662800888363, "iter_time": 0.7035886764526367, "loss": 0.1316583752632141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.14938187237583, "step_time": 0.6623275413513183} +{"epoch": 0, "iter": 16170, "iter_tflops": 15.814266448656225, "iter_time": 1.304587448120117, "loss": 0.11143217235803604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.490355235469412, "step_time": 1.0585283470153808} +{"epoch": 0, "iter": 16171, "iter_tflops": 40.69446030350801, "iter_time": 0.5069754791259766, "loss": 0.15665844082832336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68716305666263, "step_time": 0.4616783008575439} +{"epoch": 0, "iter": 16172, "iter_tflops": 42.597886132459905, "iter_time": 0.48432200241088874, "loss": 0.14561423659324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.55118344509128, "step_time": 0.4431916007995606} +{"epoch": 0, "iter": 16173, "iter_tflops": 19.513156894438726, "iter_time": 1.0572914276123049, "loss": 0.36022239923477173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.974648432402216, "step_time": 0.9836204681396483} +{"epoch": 0, "iter": 16174, "iter_tflops": 32.013144703809964, "iter_time": 0.6444569473266601, "loss": 0.49985626339912415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83731115381361, "step_time": 0.470628625869751} +{"epoch": 0, "iter": 16175, "iter_tflops": 49.58424407441574, "iter_time": 0.4160816383361816, "loss": 0.39099565148353577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.62602238745324, "step_time": 0.3847216815948486} +{"epoch": 0, "iter": 16176, "iter_tflops": 45.946398355473654, "iter_time": 0.44902526092529293, "loss": 0.3436754643917084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.657847412668694, "step_time": 0.4154649181365967} +{"epoch": 0, "iter": 16177, "iter_tflops": 32.817265457961796, "iter_time": 0.6286658325195312, "loss": 0.024654876440763474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.964008043856516, "step_time": 0.5900666046142578} +{"epoch": 0, "iter": 16178, "iter_tflops": 36.15088124114514, "iter_time": 0.5706940689086915, "loss": 0.03966674581170082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.168884555506324, "step_time": 0.5011331672668456} +{"epoch": 0, "iter": 16179, "iter_tflops": 47.213053618137785, "iter_time": 0.43697858810424806, "loss": 0.022165458649396896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77276148188603, "step_time": 0.3984932022094727} +{"epoch": 0, "iter": 16180, "iter_tflops": 47.48858165184351, "iter_time": 0.4344432449340821, "loss": 0.03796030208468437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.17392582523653, "step_time": 0.39542919540405275} +{"epoch": 0, "iter": 16181, "iter_tflops": 18.925988599591896, "iter_time": 1.0900933074951171, "loss": 0.07282496988773346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.797558623720782, "step_time": 1.042102912902832} +{"epoch": 0, "iter": 16182, "iter_tflops": 17.218772208296226, "iter_time": 1.1981744842529296, "loss": 0.06881428509950638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.575740753527068, "step_time": 1.0026901950836182} +{"epoch": 0, "iter": 16183, "iter_tflops": 44.327323790976735, "iter_time": 0.46542610168457027, "loss": 0.07545138150453568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54186198314963, "step_time": 0.4250165252685547} +{"epoch": 0, "iter": 16184, "iter_tflops": 37.34921761311699, "iter_time": 0.5523835525512695, "loss": 0.06914521753787994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92595747980644, "step_time": 0.5041077785491943} +{"epoch": 0, "iter": 16185, "iter_tflops": 19.053948939890315, "iter_time": 1.0827725830078125, "loss": 0.27361205220222473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.341786107429524, "step_time": 1.0142223205566407} +{"epoch": 0, "iter": 16186, "iter_tflops": 16.692176226010034, "iter_time": 1.2359738616943359, "loss": 0.4122655689716339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.579294858290133, "step_time": 0.9560596694946288} +{"epoch": 0, "iter": 16187, "iter_tflops": 49.26442839162102, "iter_time": 0.41878276443481444, "loss": 0.2749144732952118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51956114598814, "step_time": 0.3854869709014893} +{"epoch": 0, "iter": 16188, "iter_tflops": 41.91616483276917, "iter_time": 0.49219897842407223, "loss": 0.19637323915958405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.218235662679156, "step_time": 0.45625604820251464} +{"epoch": 0, "iter": 16189, "iter_tflops": 33.08992401390894, "iter_time": 0.6234856719970703, "loss": 0.5248598456382751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.4195225858422, "step_time": 0.5824780235290528} +{"epoch": 0, "iter": 16190, "iter_tflops": 19.912734972217223, "iter_time": 1.0360753326416015, "loss": 0.5184296369552612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.088092776290623, "step_time": 0.8564851398468017} +{"epoch": 0, "iter": 16191, "iter_tflops": 48.42659927314571, "iter_time": 0.4260281295776367, "loss": 0.5472889542579651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.420087521998056, "step_time": 0.3935722827911377} +{"epoch": 0, "iter": 16192, "iter_tflops": 46.66823682770697, "iter_time": 0.4420799865722656, "loss": 0.696984589099884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52633833172356, "step_time": 0.4083235435485839} +{"epoch": 0, "iter": 16193, "iter_tflops": 28.52412455101897, "iter_time": 0.7232857742309571, "loss": 0.5383446216583252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.31846001039866, "step_time": 0.6804795989990234} +{"epoch": 0, "iter": 16194, "iter_tflops": 13.173774229402552, "iter_time": 1.5660731048583985, "loss": 0.3774464428424835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.623075002080927, "step_time": 1.241111738204956} +{"epoch": 0, "iter": 16195, "iter_tflops": 36.462257616724195, "iter_time": 0.5658205184936523, "loss": 0.4750351011753082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.239457874194365, "step_time": 0.5127080383300782} +{"epoch": 0, "iter": 16196, "iter_tflops": 40.41169220025531, "iter_time": 0.5105228805541993, "loss": 0.4129831790924072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07452340534169, "step_time": 0.4680956687927246} +{"epoch": 0, "iter": 16197, "iter_tflops": 19.914487675751396, "iter_time": 1.035984146118164, "loss": 0.5759890079498291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.2344296712444, "step_time": 0.9715868911743165} +{"epoch": 0, "iter": 16198, "iter_tflops": 16.079183527481618, "iter_time": 1.2830933532714845, "loss": 0.5379657745361328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.186540742999718, "step_time": 1.0752899017333983} +{"epoch": 0, "iter": 16199, "iter_tflops": 36.78800679816612, "iter_time": 0.5608103103637696, "loss": 0.45277366042137146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.056746718427625, "step_time": 0.5150466575622559} +{"epoch": 0, "iter": 16200, "iter_tflops": 34.99805753492622, "iter_time": 0.5894925308227539, "loss": 0.5529457926750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14895847656329, "step_time": 0.5408035850524902} +{"epoch": 0, "iter": 16201, "iter_tflops": 19.493017018882043, "iter_time": 1.058383804321289, "loss": 0.5778354406356812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.16410357576871, "step_time": 0.974815372467041} +{"epoch": 0, "iter": 16202, "iter_tflops": 15.2193029621689, "iter_time": 1.3555872802734374, "loss": 0.5237417817115784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.401953745185065, "step_time": 1.1211360378265383} +{"epoch": 0, "iter": 16203, "iter_tflops": 38.713979998410515, "iter_time": 0.5329106826782227, "loss": 0.49977898597717285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32849818460055, "step_time": 0.48740433502197267} +{"epoch": 0, "iter": 16204, "iter_tflops": 37.361547096213705, "iter_time": 0.5522012634277343, "loss": 0.5459550023078918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73860171926012, "step_time": 0.5064261569976807} +{"epoch": 0, "iter": 16205, "iter_tflops": 19.334640875342195, "iter_time": 1.0670533599853516, "loss": 0.08549245446920395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.603235003878236, "step_time": 1.0013521423339844} +{"epoch": 0, "iter": 16206, "iter_tflops": 17.18884457431315, "iter_time": 1.2002606353759764, "loss": 0.09556964039802551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.822763030911773, "step_time": 0.9907951927185059} +{"epoch": 0, "iter": 16207, "iter_tflops": 51.91187448540567, "iter_time": 0.3974253234863281, "loss": 0.024028921499848366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.813834423340424, "step_time": 0.3631350307464599} +{"epoch": 0, "iter": 16208, "iter_tflops": 53.44517572715135, "iter_time": 0.38602349472045894, "loss": 0.09242143481969833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.065343513982675, "step_time": 0.3553082141876221} +{"epoch": 0, "iter": 16209, "iter_tflops": 25.50697672087869, "iter_time": 0.8088411941528321, "loss": 0.5515589118003845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.87247357237973, "step_time": 0.7677407684326171} +{"epoch": 0, "iter": 16210, "iter_tflops": 12.433311719590963, "iter_time": 1.6593401641845702, "loss": 0.34654751420021057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.868153083415985, "step_time": 1.3001572017669678} +{"epoch": 0, "iter": 16211, "iter_tflops": 36.511992703452385, "iter_time": 0.5650497817993165, "loss": 0.43560197949409485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.58818100913992, "step_time": 0.521142749786377} +{"epoch": 0, "iter": 16212, "iter_tflops": 35.274595680346955, "iter_time": 0.5848711547851563, "loss": 0.5182310342788696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.22920159684912, "step_time": 0.5396684379577636} +{"epoch": 0, "iter": 16213, "iter_tflops": 29.401067219611992, "iter_time": 0.70171240234375, "loss": 0.3903641998767853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.322671599932644, "step_time": 0.6586632766723632} +{"epoch": 0, "iter": 16214, "iter_tflops": 20.111383518635236, "iter_time": 1.0258415832519532, "loss": 0.36831533908843994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.23199866416545, "step_time": 0.9279909477233887} +{"epoch": 0, "iter": 16215, "iter_tflops": 46.03156215607143, "iter_time": 0.44819451141357425, "loss": 0.42976412177085876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17202974892434, "step_time": 0.4112070732116699} +{"epoch": 0, "iter": 16216, "iter_tflops": 48.979229932870524, "iter_time": 0.4212212715148926, "loss": 0.3860663175582886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.928524253231366, "step_time": 0.38979158782958984} +{"epoch": 0, "iter": 16217, "iter_tflops": 41.25495657547561, "iter_time": 0.500087631225586, "loss": 0.09066150337457657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83500207387594, "step_time": 0.4601559619903564} +{"epoch": 0, "iter": 16218, "iter_tflops": 12.008626379741267, "iter_time": 1.7180227661132812, "loss": 0.07568628340959549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.907180676996788, "step_time": 1.2202562866210938} +{"epoch": 0, "iter": 16219, "iter_tflops": 12.080869853437516, "iter_time": 1.707749008178711, "loss": 0.07109171152114868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.011315953299956, "step_time": 1.472459373474121} +{"epoch": 0, "iter": 16220, "iter_tflops": 26.15814674913166, "iter_time": 0.7887062377929688, "loss": 0.032480597496032715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.756706612708193, "step_time": 0.6496609916687013} +{"epoch": 0, "iter": 16221, "iter_tflops": 14.804770387411391, "iter_time": 1.023516014099121, "loss": 0.39466628432273865, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.777989891488978, "step_time": 0.9603833999633788} +{"epoch": 0, "iter": 16222, "iter_tflops": 10.398684303398378, "iter_time": 1.4571958465576174, "loss": 0.41093167662620544, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.031054492464484, "step_time": 1.0799558639526368} +{"epoch": 0, "iter": 16223, "iter_tflops": 23.81111858005468, "iter_time": 0.636379997253418, "loss": 0.4826224744319916, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.446163234406658, "step_time": 0.5954893646240235} +{"epoch": 0, "iter": 16224, "iter_tflops": 22.74193963999149, "iter_time": 0.6662984695434571, "loss": 0.44025400280952454, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.484999120899808, "step_time": 0.6188654327392579} +{"epoch": 0, "iter": 16225, "iter_tflops": 18.662534946800648, "iter_time": 1.1054818420410157, "loss": 0.11865221709012985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.992032552523895, "step_time": 1.0319657821655275} +{"epoch": 0, "iter": 16226, "iter_tflops": 17.53246494910026, "iter_time": 1.1767366180419923, "loss": 0.1790098398923874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.630201900579095, "step_time": 1.050987331390381} +{"epoch": 0, "iter": 16227, "iter_tflops": 43.81886600758514, "iter_time": 0.4708267326354981, "loss": 0.14479665458202362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.286069199400146, "step_time": 0.43630383872985845} +{"epoch": 0, "iter": 16228, "iter_tflops": 54.620991719256736, "iter_time": 0.377713638305664, "loss": 0.1737273633480072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.5999935325811, "step_time": 0.3461593246459961} +{"epoch": 0, "iter": 16229, "iter_tflops": 34.34433212781635, "iter_time": 0.6007131958007813, "loss": 0.13590222597122192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.92758947956198, "step_time": 0.5586905021667481} +{"epoch": 0, "iter": 16230, "iter_tflops": 36.54926681204233, "iter_time": 0.5644735260009766, "loss": 0.1421019434928894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.066649249833176, "step_time": 0.5149193630218506} +{"epoch": 0, "iter": 16231, "iter_tflops": 41.59049657187064, "iter_time": 0.4960530700683594, "loss": 0.13079005479812622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.62494055680371, "step_time": 0.45218894004821775} +{"epoch": 0, "iter": 16232, "iter_tflops": 42.30487872817589, "iter_time": 0.4876764602661132, "loss": 0.13543732464313507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98141862965662, "step_time": 0.4486832752227783} +{"epoch": 0, "iter": 16233, "iter_tflops": 23.10502306416364, "iter_time": 0.8929267654418945, "loss": 0.4398239850997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.285659887508377, "step_time": 0.8159207077026367} +{"epoch": 0, "iter": 16234, "iter_tflops": 35.50390081550584, "iter_time": 0.5810937118530273, "loss": 0.4077773690223694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.73104715284075, "step_time": 0.5192688083648682} +{"epoch": 0, "iter": 16235, "iter_tflops": 42.417927985762894, "iter_time": 0.48637673950195315, "loss": 0.49433210492134094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.634164588316864, "step_time": 0.442402982711792} +{"epoch": 0, "iter": 16236, "iter_tflops": 38.037584140192166, "iter_time": 0.5423870620727539, "loss": 0.3498761057853699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.399038491977805, "step_time": 0.49834716606140134} +{"epoch": 0, "iter": 16237, "iter_tflops": 22.60718542905929, "iter_time": 0.9125900955200195, "loss": 0.6593656539916992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.58536050664432, "step_time": 0.8391617240905761} +{"epoch": 0, "iter": 16238, "iter_tflops": 45.97157371687009, "iter_time": 0.4487793617248535, "loss": 0.4384731352329254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36049407061808, "step_time": 0.4096682109832764} +{"epoch": 0, "iter": 16239, "iter_tflops": 46.49885483190821, "iter_time": 0.443690357208252, "loss": 0.5755254626274109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.97767748956741, "step_time": 0.4128061676025391} +{"epoch": 0, "iter": 16240, "iter_tflops": 48.61206826893946, "iter_time": 0.42440270996093754, "loss": 0.38666480779647827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.71348553982002, "step_time": 0.39138169860839844} +{"epoch": 0, "iter": 16241, "iter_tflops": 29.045822096182448, "iter_time": 0.7102947006225586, "loss": 0.11557286232709885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.835551516171098, "step_time": 0.6690684127807618} +{"epoch": 0, "iter": 16242, "iter_tflops": 12.223235842000422, "iter_time": 1.6878585815429688, "loss": 0.17279012501239777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.366303447397895, "step_time": 1.3426191654205322} +{"epoch": 0, "iter": 16243, "iter_tflops": 38.72209033292866, "iter_time": 0.5327990646362304, "loss": 0.1182936355471611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.578708592564816, "step_time": 0.48454014205932616} +{"epoch": 0, "iter": 16244, "iter_tflops": 38.16862346630449, "iter_time": 0.5405249557495118, "loss": 0.09814134985208511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55853253509562, "step_time": 0.4964346008300781} +{"epoch": 0, "iter": 16245, "iter_tflops": 22.517372153727166, "iter_time": 0.9162300720214844, "loss": 0.01147050503641367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.540071060858743, "step_time": 0.8407104225158692} +{"epoch": 0, "iter": 16246, "iter_tflops": 21.817927369490345, "iter_time": 0.9456028137207031, "loss": 0.04404822364449501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.152547944997025, "step_time": 0.7598216400146484} +{"epoch": 0, "iter": 16247, "iter_tflops": 41.5835633843303, "iter_time": 0.49613577651977536, "loss": 0.03426847606897354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.789474595624185, "step_time": 0.4505641021728516} +{"epoch": 0, "iter": 16248, "iter_tflops": 40.923064040263256, "iter_time": 0.5041434211730957, "loss": 0.030489323660731316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25613464542697, "step_time": 0.4558739643096924} +{"epoch": 0, "iter": 16249, "iter_tflops": 18.01191637196706, "iter_time": 1.14541357421875, "loss": 0.14168941974639893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.18075672824401, "step_time": 1.0756141586303711} +{"epoch": 0, "iter": 16250, "iter_tflops": 24.9976384770949, "iter_time": 0.8253217010498046, "loss": 0.1567629724740982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.29891955473725, "step_time": 0.7290417385101318} +{"epoch": 0, "iter": 16251, "iter_tflops": 49.56468418058225, "iter_time": 0.41624583816528316, "loss": 0.13205313682556152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.44807937324292, "step_time": 0.38600252342224123} +{"epoch": 0, "iter": 16252, "iter_tflops": 47.67176896065239, "iter_time": 0.43277381896972655, "loss": 0.12505033612251282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.61469800054734, "step_time": 0.3997135372161865} +{"epoch": 0, "iter": 16253, "iter_tflops": 24.235305056606645, "iter_time": 0.851282600402832, "loss": 0.08989009261131287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.454964682194547, "step_time": 0.810493896484375} +{"epoch": 0, "iter": 16254, "iter_tflops": 13.84684198471687, "iter_time": 1.4899493713378906, "loss": 0.23446424305438995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.08299848840043, "step_time": 1.2827889976501463} +{"epoch": 0, "iter": 16255, "iter_tflops": 46.913644795543554, "iter_time": 0.4397674407958984, "loss": 0.16945405304431915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8054837697421, "step_time": 0.39824149894714356} +{"epoch": 0, "iter": 16256, "iter_tflops": 50.02132188295059, "iter_time": 0.412445987701416, "loss": 0.16812554001808167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2717245968262, "step_time": 0.38014442443847657} +{"epoch": 0, "iter": 16257, "iter_tflops": 23.633668595018865, "iter_time": 0.8729534912109375, "loss": 0.20449140667915344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.815593414504153, "step_time": 0.8313761901855469} +{"epoch": 0, "iter": 16258, "iter_tflops": 14.02832636667694, "iter_time": 1.4706739044189452, "loss": 0.4026165306568146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54350014107611, "step_time": 1.0042637996673585} +{"epoch": 0, "iter": 16259, "iter_tflops": 37.68185533343232, "iter_time": 0.5475073699951172, "loss": 0.2991560995578766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36951302433228, "step_time": 0.4987028369903565} +{"epoch": 0, "iter": 16260, "iter_tflops": 43.733533383557024, "iter_time": 0.4717454071044922, "loss": 0.2610051929950714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.488447384353115, "step_time": 0.4344444732666015} +{"epoch": 0, "iter": 16261, "iter_tflops": 23.448687532070707, "iter_time": 0.8798400115966797, "loss": 0.5088703036308289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.154044608699685, "step_time": 0.8201899070739747} +{"epoch": 0, "iter": 16262, "iter_tflops": 13.285149694322891, "iter_time": 1.5529440002441404, "loss": 0.5002435445785522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.834562334557685, "step_time": 1.3029152984619141} +{"epoch": 0, "iter": 16263, "iter_tflops": 38.22877545406512, "iter_time": 0.5396744537353515, "loss": 0.5028400421142578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77713905738801, "step_time": 0.4938369159698487} +{"epoch": 0, "iter": 16264, "iter_tflops": 42.861988368591966, "iter_time": 0.48133776092529296, "loss": 0.41793587803840637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.609798526819425, "step_time": 0.44263425636291504} +{"epoch": 0, "iter": 16265, "iter_tflops": 20.73287173936528, "iter_time": 0.9950909729003906, "loss": 0.05165581405162811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.161019835535182, "step_time": 0.9309631805419921} +{"epoch": 0, "iter": 16266, "iter_tflops": 36.582989052839274, "iter_time": 0.5639531936645508, "loss": 0.0614120215177536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.517938379654474, "step_time": 0.5091841869354249} +{"epoch": 0, "iter": 16267, "iter_tflops": 45.039287161222745, "iter_time": 0.45806882858276365, "loss": 0.051941849291324615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.394010214618824, "step_time": 0.41768411636352537} +{"epoch": 0, "iter": 16268, "iter_tflops": 43.96045372396747, "iter_time": 0.46931029510498046, "loss": 0.07066408544778824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98470372482464, "step_time": 0.42995146179199223} +{"epoch": 0, "iter": 16269, "iter_tflops": 28.846894104422077, "iter_time": 0.5125415306091308, "loss": 0.018655749037861824, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 32.23729624084873, "step_time": 0.4586374473571777} +{"epoch": 0, "iter": 16270, "iter_tflops": 28.947720352389084, "iter_time": 0.5107563247680663, "loss": 0.0066230762749910355, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 32.17572218821175, "step_time": 0.45951513290405277} +{"epoch": 0, "iter": 16271, "iter_tflops": 33.77136945563273, "iter_time": 0.4378037223815918, "loss": 0.022380465641617775, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 37.17648245468871, "step_time": 0.39770387840271} +{"epoch": 0, "iter": 16272, "iter_tflops": 33.77640968211173, "iter_time": 0.4377383918762207, "loss": 0.00916315708309412, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 37.19769611616458, "step_time": 0.3974770698547363} +{"epoch": 0, "iter": 16273, "iter_tflops": 23.54276661788761, "iter_time": 0.876324089050293, "loss": 0.718741774559021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.010110868305876, "step_time": 0.8249101181030274} +{"epoch": 0, "iter": 16274, "iter_tflops": 9.988579072009207, "iter_time": 2.0654683074951175, "loss": 0.5136817097663879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.512329756580643, "step_time": 1.6488610763549805} +{"epoch": 0, "iter": 16275, "iter_tflops": 11.934500457931314, "iter_time": 1.7286935119628906, "loss": 0.6804215908050537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.724031546082978, "step_time": 1.4011850929260254} +{"epoch": 0, "iter": 16276, "iter_tflops": 33.36109669750441, "iter_time": 0.6184177246093749, "loss": 0.6226083636283875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.447668617424824, "step_time": 0.5660470008850097} +{"epoch": 0, "iter": 16277, "iter_tflops": 10.541877819221062, "iter_time": 1.5656015319824217, "loss": 0.2900625467300415, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 11.30518776219216, "step_time": 1.4598943786621092} +{"epoch": 0, "iter": 16278, "iter_tflops": 11.994747301600636, "iter_time": 1.3759673004150392, "loss": 0.5598382949829102, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 16.826080912746484, "step_time": 0.9808808212280272} +{"epoch": 0, "iter": 16279, "iter_tflops": 25.257859954687238, "iter_time": 0.6534354095458985, "loss": 0.3439759314060211, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 27.22376046208709, "step_time": 0.6062490921020508} +{"epoch": 0, "iter": 16280, "iter_tflops": 24.65726174738616, "iter_time": 0.6693516998291015, "loss": 0.33196404576301575, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 26.56217210684683, "step_time": 0.6213490371704102} +{"epoch": 0, "iter": 16281, "iter_tflops": 20.507562793570983, "iter_time": 1.006023666381836, "loss": 0.2838286757469177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.09366823730653, "step_time": 0.9338011817932128} +{"epoch": 0, "iter": 16282, "iter_tflops": 28.357194836436832, "iter_time": 0.7275435256958007, "loss": 0.24090062081813812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.01946088199923, "step_time": 0.5891322422027588} +{"epoch": 0, "iter": 16283, "iter_tflops": 48.09630236572868, "iter_time": 0.42895383834838874, "loss": 0.2117854207754135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33423177965709, "step_time": 0.3942179489135742} +{"epoch": 0, "iter": 16284, "iter_tflops": 48.73202466322151, "iter_time": 0.42335802078247076, "loss": 0.25712138414382935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83103853468106, "step_time": 0.3905108451843262} +{"epoch": 0, "iter": 16285, "iter_tflops": 28.80646202412148, "iter_time": 0.7161967163085937, "loss": 0.5554782748222351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.66727326120451, "step_time": 0.6727397422790528} +{"epoch": 0, "iter": 16286, "iter_tflops": 19.85650266090596, "iter_time": 1.0390094299316408, "loss": 0.3766089081764221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.832626767484886, "step_time": 0.8656659507751464} +{"epoch": 0, "iter": 16287, "iter_tflops": 47.956974771023454, "iter_time": 0.4302000617980957, "loss": 0.39778751134872437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94402780420605, "step_time": 0.3971793174743652} +{"epoch": 0, "iter": 16288, "iter_tflops": 47.45157010139713, "iter_time": 0.4347821044921875, "loss": 0.2883222997188568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.551851312016915, "step_time": 0.4002008266448974} +{"epoch": 0, "iter": 16289, "iter_tflops": 33.10659165053983, "iter_time": 0.623171775817871, "loss": 0.5942792892456055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.381830317294515, "step_time": 0.5830985374450683} +{"epoch": 0, "iter": 16290, "iter_tflops": 12.529298552899952, "iter_time": 1.646627975463867, "loss": 0.5626587867736816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.127484730607115, "step_time": 1.3638151931762694} +{"epoch": 0, "iter": 16291, "iter_tflops": 10.643654339370634, "iter_time": 1.9383468170166016, "loss": 0.5751399993896484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.418157941339741, "step_time": 1.6613650436401366} +{"epoch": 0, "iter": 16292, "iter_tflops": 14.57798529166306, "iter_time": 1.4152225494384765, "loss": 0.5601459741592407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.24583100601706, "step_time": 1.1962945423126223} +{"epoch": 0, "iter": 16293, "iter_tflops": 11.798480990699511, "iter_time": 1.2462260742187503, "loss": 0.20446816086769104, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 12.78160496555543, "step_time": 1.1503699798583984} +{"epoch": 0, "iter": 16294, "iter_tflops": 12.127157332761861, "iter_time": 1.212450225830078, "loss": 0.3103679120540619, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 16.138832281391053, "step_time": 0.9110680618286132} +{"epoch": 0, "iter": 16295, "iter_tflops": 26.242607061767156, "iter_time": 0.5602939758300781, "loss": 0.31696638464927673, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.962873856428796, "step_time": 0.5258248748779297} +{"epoch": 0, "iter": 16296, "iter_tflops": 25.880857917811923, "iter_time": 0.568125473022461, "loss": 0.3446028530597687, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.363948643579537, "step_time": 0.5373338050842285} +{"epoch": 0, "iter": 16297, "iter_tflops": 19.20643148541463, "iter_time": 1.0741763000488282, "loss": 0.08862670511007309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.012473526465477, "step_time": 1.0309117202758789} +{"epoch": 0, "iter": 16298, "iter_tflops": 15.907991696383839, "iter_time": 1.2969011993408202, "loss": 0.11068608611822128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.454202357058413, "step_time": 1.0604954719543458} +{"epoch": 0, "iter": 16299, "iter_tflops": 49.16336576906814, "iter_time": 0.4196436347961426, "loss": 0.14450429379940033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.442024987672546, "step_time": 0.38604625320434577} +{"epoch": 0, "iter": 16300, "iter_tflops": 51.14108397665799, "iter_time": 0.4034152565002442, "loss": 0.08641193062067032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.550963265257025, "step_time": 0.3713903827667236} +{"epoch": 0, "iter": 16301, "iter_tflops": 26.26254677874285, "iter_time": 0.7855709381103515, "loss": 0.4140559434890747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.600413281335555, "step_time": 0.7474921951293945} +{"epoch": 0, "iter": 16302, "iter_tflops": 14.224464770754249, "iter_time": 1.4503950653076172, "loss": 0.665911853313446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.099173603427523, "step_time": 1.2065550060272219} +{"epoch": 0, "iter": 16303, "iter_tflops": 44.27818176392034, "iter_time": 0.4659426536560059, "loss": 0.5283170342445374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.840714075611324, "step_time": 0.4312455177307129} +{"epoch": 0, "iter": 16304, "iter_tflops": 47.394677619457305, "iter_time": 0.4353040161132812, "loss": 0.587990403175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.130945096020284, "step_time": 0.4034952507019043} +{"epoch": 0, "iter": 16305, "iter_tflops": 19.801026943439453, "iter_time": 1.041920379638672, "loss": 0.4274909496307373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66926317630124, "step_time": 0.9981533126831055} +{"epoch": 0, "iter": 16306, "iter_tflops": 16.436715409051285, "iter_time": 1.2551834716796877, "loss": 0.4151860475540161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.618502428328036, "step_time": 0.9543257484436034} +{"epoch": 0, "iter": 16307, "iter_tflops": 37.765184841095206, "iter_time": 0.5462992858886719, "loss": 0.4000512957572937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.28025358757107, "step_time": 0.49978117179870607} +{"epoch": 0, "iter": 16308, "iter_tflops": 41.710756818347896, "iter_time": 0.49462285232543945, "loss": 0.47575101256370544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.39920651262079, "step_time": 0.45443731498718265} +{"epoch": 0, "iter": 16309, "iter_tflops": 18.357663698060843, "iter_time": 1.1238409118652344, "loss": 0.11451371759176254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.209543609033794, "step_time": 1.0740022735595702} +{"epoch": 0, "iter": 16310, "iter_tflops": 14.817964095054311, "iter_time": 1.3923028411865235, "loss": 0.0720047727227211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.64947921426367, "step_time": 0.9991096286773682} +{"epoch": 0, "iter": 16311, "iter_tflops": 48.13114412774821, "iter_time": 0.42864332199096683, "loss": 0.12690146267414093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.29159799786941, "step_time": 0.3945393581390381} +{"epoch": 0, "iter": 16312, "iter_tflops": 49.07408699832497, "iter_time": 0.42040707778930664, "loss": 0.1291476935148239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27015848634509, "step_time": 0.3872917613983154} +{"epoch": 0, "iter": 16313, "iter_tflops": 37.09293256993309, "iter_time": 0.5562001190185547, "loss": 0.4090704321861267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.959779658414696, "step_time": 0.5162964782714844} +{"epoch": 0, "iter": 16314, "iter_tflops": 12.78781566973316, "iter_time": 1.6133399200439456, "loss": 0.3545938730239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.216890244938998, "step_time": 1.2721978874206543} +{"epoch": 0, "iter": 16315, "iter_tflops": 13.2293522675819, "iter_time": 1.5594938507080076, "loss": 0.4584432542324066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.476738686797178, "step_time": 1.3330388221740723} +{"epoch": 0, "iter": 16316, "iter_tflops": 25.109838311245518, "iter_time": 0.8216338653564452, "loss": 0.3832947015762329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.0851412845758, "step_time": 0.7345910530090332} +{"epoch": 0, "iter": 16317, "iter_tflops": 12.037135365681282, "iter_time": 1.2792294006347658, "loss": 0.22859176993370056, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.997465812106899, "step_time": 1.1847122879028322} +{"epoch": 0, "iter": 16318, "iter_tflops": 15.668931703338673, "iter_time": 0.9827254180908204, "loss": 0.5476539134979248, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.069020748395452, "step_time": 0.8075012168884278} +{"epoch": 0, "iter": 16319, "iter_tflops": 23.87019372704499, "iter_time": 0.6450830535888672, "loss": 0.4620184898376465, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.75597302188143, "step_time": 0.5978519020080566} +{"epoch": 0, "iter": 16320, "iter_tflops": 23.153158889826315, "iter_time": 0.665060760498047, "loss": 0.4501623511314392, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.71581619527895, "step_time": 0.6230122985839844} +{"epoch": 0, "iter": 16321, "iter_tflops": 15.61315261955079, "iter_time": 1.321391906738281, "loss": 0.1234908401966095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.740966785367405, "step_time": 1.2323716888427736} +{"epoch": 0, "iter": 16322, "iter_tflops": 20.260932979505064, "iter_time": 1.0182696685791015, "loss": 0.1438557505607605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.047997189264585, "step_time": 0.8236624011993409} +{"epoch": 0, "iter": 16323, "iter_tflops": 45.86700559266622, "iter_time": 0.44980249404907224, "loss": 0.17450128495693207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40000139340362, "step_time": 0.40934708213806154} +{"epoch": 0, "iter": 16324, "iter_tflops": 49.542545023068186, "iter_time": 0.41643184661865235, "loss": 0.10400897264480591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84161926895984, "step_time": 0.3831811485290528} +{"epoch": 0, "iter": 16325, "iter_tflops": 34.691245107610385, "iter_time": 0.5947060546875, "loss": 0.5783023834228516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.044980626216116, "step_time": 0.5569200782775879} +{"epoch": 0, "iter": 16326, "iter_tflops": 10.27813060199019, "iter_time": 2.007280731201172, "loss": 0.7208265066146851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.35587285391264, "step_time": 1.6697398681640623} +{"epoch": 0, "iter": 16327, "iter_tflops": 14.454033278146447, "iter_time": 1.427358932495117, "loss": 0.6281683444976807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.996871100271377, "step_time": 1.2138171424865725} +{"epoch": 0, "iter": 16328, "iter_tflops": 17.952883090128058, "iter_time": 1.1491799621582033, "loss": 0.6297021508216858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.43069670511553, "step_time": 0.9626888847351074} +{"epoch": 0, "iter": 16329, "iter_tflops": 20.361662313633484, "iter_time": 0.7341542587280273, "loss": 0.3984033465385437, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 21.527944795899415, "step_time": 0.6943812446594237} +{"epoch": 0, "iter": 16330, "iter_tflops": 9.914940171840737, "iter_time": 1.5076844482421874, "loss": 0.2742389738559723, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 11.448661338808144, "step_time": 1.3057073364257812} +{"epoch": 0, "iter": 16331, "iter_tflops": 26.81685202346495, "iter_time": 0.55743310546875, "loss": 0.3483067750930786, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.638290091574437, "step_time": 0.5219795265197754} +{"epoch": 0, "iter": 16332, "iter_tflops": 26.620726761876693, "iter_time": 0.5615399322509766, "loss": 0.468820720911026, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.31991524310608, "step_time": 0.5278476638793945} +{"epoch": 0, "iter": 16333, "iter_tflops": 30.832719492211208, "iter_time": 0.669129867553711, "loss": 0.3232525885105133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.764405135085305, "step_time": 0.6296800880432128} +{"epoch": 0, "iter": 16334, "iter_tflops": 13.812840117072456, "iter_time": 1.4936170501708983, "loss": 0.35980424284935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.042242729601096, "step_time": 1.2860479583740234} +{"epoch": 0, "iter": 16335, "iter_tflops": 26.020001053070757, "iter_time": 0.7928936462402343, "loss": 0.35779812932014465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.67031858552558, "step_time": 0.6314934902191163} +{"epoch": 0, "iter": 16336, "iter_tflops": 38.49090965480399, "iter_time": 0.5359991149902344, "loss": 0.2846008539199829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.095110719333306, "step_time": 0.49010664558410644} +{"epoch": 0, "iter": 16337, "iter_tflops": 17.494471182368173, "iter_time": 1.1792922058105468, "loss": 0.5614989399909973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.626254698490424, "step_time": 1.1076351013183594} +{"epoch": 0, "iter": 16338, "iter_tflops": 18.555169636663997, "iter_time": 1.1118784637451171, "loss": 0.37552696466445923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.309308759544958, "step_time": 0.9247751121520997} +{"epoch": 0, "iter": 16339, "iter_tflops": 45.43694978822937, "iter_time": 0.4540598258972168, "loss": 0.44833147525787354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0485540410947, "step_time": 0.4206259269714356} +{"epoch": 0, "iter": 16340, "iter_tflops": 46.23413623064378, "iter_time": 0.4462307548522949, "loss": 0.37721872329711914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.330906991578864, "step_time": 0.41821841049194336} +{"epoch": 0, "iter": 16341, "iter_tflops": 34.245065093991805, "iter_time": 0.6024544982910157, "loss": 0.10453154891729355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.85446579263465, "step_time": 0.5597990112304687} +{"epoch": 0, "iter": 16342, "iter_tflops": 9.448563093324967, "iter_time": 2.1835165100097655, "loss": 0.09549479931592941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.134508787156054, "step_time": 1.5707548599243162} +{"epoch": 0, "iter": 16343, "iter_tflops": 13.004710980967307, "iter_time": 1.5864322967529296, "loss": 0.10281424224376678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.982202170473156, "step_time": 1.2908792724609377} +{"epoch": 0, "iter": 16344, "iter_tflops": 21.190080832605926, "iter_time": 0.9736203308105468, "loss": 0.09303383529186249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.44353377758059, "step_time": 0.7801942691802978} +{"epoch": 0, "iter": 16345, "iter_tflops": 13.651827426937672, "iter_time": 1.151907241821289, "loss": 0.257793128490448, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.484326605926631, "step_time": 1.0857003784179688} +{"epoch": 0, "iter": 16346, "iter_tflops": 10.875109648043042, "iter_time": 1.4460211791992186, "loss": 0.2123599499464035, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 13.795614726197165, "step_time": 1.1399012794494627} +{"epoch": 0, "iter": 16347, "iter_tflops": 23.645653332437238, "iter_time": 0.6650541076660157, "loss": 0.31208908557891846, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.512338660904913, "step_time": 0.616393466949463} +{"epoch": 0, "iter": 16348, "iter_tflops": 22.372619073044014, "iter_time": 0.7028966445922852, "loss": 0.4329644739627838, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.997338194398218, "step_time": 0.655307632446289} +{"epoch": 0, "iter": 16349, "iter_tflops": 17.243375403350125, "iter_time": 1.196464904785156, "loss": 0.3808690905570984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.531935435987407, "step_time": 1.1132724685668944} +{"epoch": 0, "iter": 16350, "iter_tflops": 16.855894700034764, "iter_time": 1.2239690551757814, "loss": 0.37756702303886414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.30264968425354, "step_time": 1.0161773872375488} +{"epoch": 0, "iter": 16351, "iter_tflops": 49.67668536266736, "iter_time": 0.4153073692321777, "loss": 0.3041745722293854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.102852331196075, "step_time": 0.38133097648620606} +{"epoch": 0, "iter": 16352, "iter_tflops": 45.28570810003993, "iter_time": 0.45557625961303705, "loss": 0.3198424279689789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87733706808976, "step_time": 0.4220993766784668} +{"epoch": 0, "iter": 16353, "iter_tflops": 36.06789933876769, "iter_time": 0.5720070724487305, "loss": 0.17365215718746185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.00261326782234, "step_time": 0.5289669532775879} +{"epoch": 0, "iter": 16354, "iter_tflops": 40.649646175841305, "iter_time": 0.5075343933105468, "loss": 0.17990604043006897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.75228183627027, "step_time": 0.4610065155029297} +{"epoch": 0, "iter": 16355, "iter_tflops": 41.38194277357926, "iter_time": 0.49855304336547845, "loss": 0.14147596061229706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.631826408116034, "step_time": 0.45212070465087895} +{"epoch": 0, "iter": 16356, "iter_tflops": 41.670268057803774, "iter_time": 0.49510345077514645, "loss": 0.24442768096923828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68420662082837, "step_time": 0.45160231590271} +{"epoch": 0, "iter": 16357, "iter_tflops": 20.447248454201194, "iter_time": 1.0089911880493163, "loss": 0.1456729918718338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.05673760786879, "step_time": 0.935364688873291} +{"epoch": 0, "iter": 16358, "iter_tflops": 20.38517786137066, "iter_time": 1.0120634536743165, "loss": 0.06019414961338043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.064382275241133, "step_time": 0.8231239566802979} +{"epoch": 0, "iter": 16359, "iter_tflops": 52.90956553428962, "iter_time": 0.38993125915527344, "loss": 0.11542341858148575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.65497776635232, "step_time": 0.35783716011047356} +{"epoch": 0, "iter": 16360, "iter_tflops": 55.79363649559554, "iter_time": 0.36977502822875985, "loss": 0.0899646133184433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.77931133780158, "step_time": 0.3394426994323731} +{"epoch": 0, "iter": 16361, "iter_tflops": 28.68154758374629, "iter_time": 0.7193159103393556, "loss": 0.5049610733985901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.377806214052516, "step_time": 0.6791502113342286} +{"epoch": 0, "iter": 16362, "iter_tflops": 11.943827344658972, "iter_time": 1.7273435821533203, "loss": 0.6438164114952087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.056017931357491, "step_time": 1.4677765502929687} +{"epoch": 0, "iter": 16363, "iter_tflops": 12.319795695475419, "iter_time": 1.6746295166015623, "loss": 0.4419086277484894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.276723049848785, "step_time": 1.4450860633850096} +{"epoch": 0, "iter": 16364, "iter_tflops": 27.37054165645194, "iter_time": 0.7537700119018554, "loss": 0.46550190448760986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6662697435453, "step_time": 0.6727617568969726} +{"epoch": 0, "iter": 16365, "iter_tflops": 24.30617294069032, "iter_time": 0.641928924560547, "loss": 0.3769087493419647, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.994205046835507, "step_time": 0.6002428398132325} +{"epoch": 0, "iter": 16366, "iter_tflops": 27.885512127410355, "iter_time": 0.559531967163086, "loss": 0.32396239042282104, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.8841152710403, "step_time": 0.5221113395690918} +{"epoch": 0, "iter": 16367, "iter_tflops": 27.860725460036317, "iter_time": 0.5600297622680664, "loss": 0.37994712591171265, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.666496860644443, "step_time": 0.5259412841796874} +{"epoch": 0, "iter": 16368, "iter_tflops": 28.977009231984653, "iter_time": 0.5384556884765626, "loss": 0.3418268859386444, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.735237607179492, "step_time": 0.5076529960632324} +{"epoch": 0, "iter": 16369, "iter_tflops": 31.797483371728102, "iter_time": 0.6488278732299804, "loss": 0.12024300545454025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.95904098792903, "step_time": 0.6075287437438964} +{"epoch": 0, "iter": 16370, "iter_tflops": 12.689177369215434, "iter_time": 1.6258810882568362, "loss": 0.1502411961555481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.975859627376291, "step_time": 1.291391761779785} +{"epoch": 0, "iter": 16371, "iter_tflops": 11.702866427104334, "iter_time": 1.7629094238281249, "loss": 0.17182421684265137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.43666787608593, "step_time": 1.5354322738647461} +{"epoch": 0, "iter": 16372, "iter_tflops": 38.741224886985414, "iter_time": 0.5325359115600586, "loss": 0.1785142570734024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.926341303336166, "step_time": 0.4696747531890869} +{"epoch": 0, "iter": 16373, "iter_tflops": 18.16609572972097, "iter_time": 0.8049025573730468, "loss": 0.42066138982772827, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 19.16600878825658, "step_time": 0.7629098510742187} +{"epoch": 0, "iter": 16374, "iter_tflops": 7.385275749246993, "iter_time": 1.9798769073486326, "loss": 0.30190277099609375, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 8.848987079773682, "step_time": 1.652385383605957} +{"epoch": 0, "iter": 16375, "iter_tflops": 9.485598129793374, "iter_time": 1.5414881286621096, "loss": 0.4311378598213196, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.164652468279051, "step_time": 1.3096634178161621} +{"epoch": 0, "iter": 16376, "iter_tflops": 20.346542903837744, "iter_time": 0.7186447830200196, "loss": 0.40356096625328064, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 21.981188236493267, "step_time": 0.6652022972106935} +{"epoch": 0, "iter": 16377, "iter_tflops": 18.016135931858248, "iter_time": 1.0784492797851564, "loss": 0.3221583664417267, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 19.088539816981747, "step_time": 1.0178614501953125} +{"epoch": 0, "iter": 16378, "iter_tflops": 8.218858065202278, "iter_time": 2.36401318359375, "loss": 0.28081706166267395, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 9.575739984264075, "step_time": 2.0290326232910156} +{"epoch": 0, "iter": 16379, "iter_tflops": 12.053343444676557, "iter_time": 1.6119584503173827, "loss": 0.43745243549346924, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 13.370969559879173, "step_time": 1.4531099433898929} +{"epoch": 0, "iter": 16380, "iter_tflops": 32.04607282446266, "iter_time": 0.6062985916137695, "loss": 0.2650977075099945, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 34.05736357851777, "step_time": 0.5704930381774902} +{"epoch": 0, "iter": 16381, "iter_tflops": 15.944524466651826, "iter_time": 0.970870880126953, "loss": 0.399042010307312, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 16.791514428713324, "step_time": 0.9218986511230469} +{"epoch": 0, "iter": 16382, "iter_tflops": 9.466368928202813, "iter_time": 1.6352705688476563, "loss": 0.3539625406265259, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 13.65791395481404, "step_time": 1.1334142646789551} +{"epoch": 0, "iter": 16383, "iter_tflops": 23.216133701863985, "iter_time": 0.6667808990478515, "loss": 0.28984618186950684, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.970845463458925, "step_time": 0.619925926208496} +{"epoch": 0, "iter": 16384, "iter_tflops": 25.817114055199053, "iter_time": 0.5996051483154298, "loss": 0.31767624616622925, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.650389454632972, "step_time": 0.5598501434326172} +{"epoch": 0, "iter": 16385, "iter_tflops": 1.2786636232140696, "iter_time": 1.102866180419922, "loss": 0.039172857999801636, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.3906637339683934, "step_time": 1.0140444679260254} +{"epoch": 0, "iter": 16386, "iter_tflops": 1.1733732610303291, "iter_time": 1.2018297271728513, "loss": 0.04495875537395477, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.323247774438512, "step_time": 1.0657073402404784} +{"epoch": 0, "iter": 16387, "iter_tflops": 2.879674261651802, "iter_time": 0.4897063827514649, "loss": 0.018754782155156136, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.100098607972891, "step_time": 0.45488710021972656} +{"epoch": 0, "iter": 16388, "iter_tflops": 3.3505345795370367, "iter_time": 0.4208865280151367, "loss": 0.02599809691309929, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.626637955642055, "step_time": 0.3888435745239258} +{"epoch": 0, "iter": 16389, "iter_tflops": 17.53377000164859, "iter_time": 1.1766490325927736, "loss": 0.0195333082228899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.171662327708415, "step_time": 1.1353443145751954} +{"epoch": 0, "iter": 16390, "iter_tflops": 23.917300742890173, "iter_time": 0.8626012496948241, "loss": 0.006247019860893488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.635381422196378, "step_time": 0.6521525135040283} +{"epoch": 0, "iter": 16391, "iter_tflops": 52.924479615443154, "iter_time": 0.3898213768005371, "loss": 0.00878095906227827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.83178589139599, "step_time": 0.3567431507110596} +{"epoch": 0, "iter": 16392, "iter_tflops": 60.46548195029259, "iter_time": 0.34120448303222656, "loss": 0.000941579753998667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.11710226410698, "step_time": 0.31203868293762205} +{"epoch": 0, "iter": 16393, "iter_tflops": 34.05883447559879, "iter_time": 0.605748664855957, "loss": 0.13779230415821075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.346408236489076, "step_time": 0.5676239967346192} +{"epoch": 0, "iter": 16394, "iter_tflops": 13.461116544055548, "iter_time": 1.5326435546875, "loss": 0.16371478140354156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.876127192806365, "step_time": 1.386859176635742} +{"epoch": 0, "iter": 16395, "iter_tflops": 50.363391586549675, "iter_time": 0.40964464187622074, "loss": 0.1534373015165329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.15768519231148, "step_time": 0.3740384216308593} +{"epoch": 0, "iter": 16396, "iter_tflops": 48.062134589124554, "iter_time": 0.42925878524780275, "loss": 0.15244008600711823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44014281462825, "step_time": 0.39342176437377924} +{"epoch": 0, "iter": 16397, "iter_tflops": 31.97996015537417, "iter_time": 0.6451256790161133, "loss": 0.520771861076355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.315998661470275, "step_time": 0.6012091827392578} +{"epoch": 0, "iter": 16398, "iter_tflops": 21.266243242350548, "iter_time": 0.970133430480957, "loss": 0.6086730360984802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.59319500474993, "step_time": 0.8744510231018066} +{"epoch": 0, "iter": 16399, "iter_tflops": 44.7362771075296, "iter_time": 0.461171443939209, "loss": 0.6210343837738037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.590192235779696, "step_time": 0.42459378242492674} +{"epoch": 0, "iter": 16400, "iter_tflops": 44.99858232933316, "iter_time": 0.4584831886291504, "loss": 0.6758221983909607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.64192508119987, "step_time": 0.4241422080993652} +{"epoch": 0, "iter": 16401, "iter_tflops": 15.047916751920306, "iter_time": 0.8904645080566406, "loss": 0.002314598299562931, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 15.754662645701407, "step_time": 0.8505187377929688} +{"epoch": 0, "iter": 16402, "iter_tflops": 13.329967203160253, "iter_time": 1.0052264633178711, "loss": 0.015621963888406754, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 18.60250134564165, "step_time": 0.7203136577606202} +{"epoch": 0, "iter": 16403, "iter_tflops": 36.275420316677135, "iter_time": 0.36938609313964843, "loss": 0.005915298592299223, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 40.26940070478047, "step_time": 0.33274981880187987} +{"epoch": 0, "iter": 16404, "iter_tflops": 38.43156581376995, "iter_time": 0.348662239074707, "loss": 0.002630416536703706, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 42.08888492726852, "step_time": 0.3183651885986328} +{"epoch": 0, "iter": 16405, "iter_tflops": 28.438917453278183, "iter_time": 0.7254528427124023, "loss": 0.15808366239070892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.006278296155703, "step_time": 0.687559226989746} +{"epoch": 0, "iter": 16406, "iter_tflops": 14.870018635711835, "iter_time": 1.3874288940429689, "loss": 0.06555154174566269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.677357392911553, "step_time": 1.1670914974212647} +{"epoch": 0, "iter": 16407, "iter_tflops": 40.614038745796066, "iter_time": 0.507979362487793, "loss": 0.061499014496803284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.446831928117106, "step_time": 0.46417466926574713} +{"epoch": 0, "iter": 16408, "iter_tflops": 43.29103793484531, "iter_time": 0.47656731033325195, "loss": 0.13828088343143463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.401927128679944, "step_time": 0.43523744201660153} +{"epoch": 0, "iter": 16409, "iter_tflops": 17.31026444436054, "iter_time": 1.1918416137695313, "loss": 0.14282330870628357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.407497776333187, "step_time": 1.1207983703613282} +{"epoch": 0, "iter": 16410, "iter_tflops": 15.458365185633697, "iter_time": 1.334623245239258, "loss": 0.10530516505241394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.184613236336943, "step_time": 1.0221198329925538} +{"epoch": 0, "iter": 16411, "iter_tflops": 36.04941643114624, "iter_time": 0.5723003463745118, "loss": 0.07974265515804291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.253783189764825, "step_time": 0.5255822963714599} +{"epoch": 0, "iter": 16412, "iter_tflops": 38.514526738103825, "iter_time": 0.5356704406738281, "loss": 0.09624437987804413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.403504559424015, "step_time": 0.48654217910766595} +{"epoch": 0, "iter": 16413, "iter_tflops": 11.908266752002412, "iter_time": 1.0501931915283205, "loss": 0.009994560852646828, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 12.708465994898743, "step_time": 0.98406689453125} +{"epoch": 0, "iter": 16414, "iter_tflops": 10.458050243227198, "iter_time": 1.1958233489990233, "loss": 0.022304687649011612, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 13.888587648345364, "step_time": 0.900450138092041} +{"epoch": 0, "iter": 16415, "iter_tflops": 33.532435444084925, "iter_time": 0.37295175552368165, "loss": 0.0020783531945198774, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 36.92321781379577, "step_time": 0.33870235061645515} +{"epoch": 0, "iter": 16416, "iter_tflops": 36.77428097991763, "iter_time": 0.340074104309082, "loss": 0.014493443071842194, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 40.28799508898175, "step_time": 0.3104145698547363} +{"epoch": 0, "iter": 16417, "iter_tflops": 39.58993570690052, "iter_time": 0.5211196517944335, "loss": 0.5038915276527405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93252534365272, "step_time": 0.48054693603515625} +{"epoch": 0, "iter": 16418, "iter_tflops": 38.26895189135736, "iter_time": 0.5391078796386719, "loss": 0.4825453758239746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89514598945602, "step_time": 0.49244591522216796} +{"epoch": 0, "iter": 16419, "iter_tflops": 39.353814072720866, "iter_time": 0.5242463531494141, "loss": 0.5434063076972961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99122834958221, "step_time": 0.47989076614379883} +{"epoch": 0, "iter": 16420, "iter_tflops": 35.28084689731961, "iter_time": 0.5847675247192383, "loss": 0.5194425582885742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42738216282998, "step_time": 0.5368852195739746} +{"epoch": 0, "iter": 16421, "iter_tflops": 25.04869354007367, "iter_time": 0.8236395034790038, "loss": 0.12154689431190491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.600148055914552, "step_time": 0.7474993782043458} +{"epoch": 0, "iter": 16422, "iter_tflops": 45.4183312922445, "iter_time": 0.4542459602355957, "loss": 0.0772375762462616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.29370766629525, "step_time": 0.41853401756286623} +{"epoch": 0, "iter": 16423, "iter_tflops": 42.25853370401762, "iter_time": 0.48821129608154296, "loss": 0.14092746376991272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.52853906909418, "step_time": 0.45314639854431155} +{"epoch": 0, "iter": 16424, "iter_tflops": 48.09340216027714, "iter_time": 0.42897970581054695, "loss": 0.07262160629034042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14506955212517, "step_time": 0.3956480197906494} +{"epoch": 0, "iter": 16425, "iter_tflops": 27.038115461971792, "iter_time": 0.7630374069213868, "loss": 0.5578913688659668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.598455940694972, "step_time": 0.7214058532714844} +{"epoch": 0, "iter": 16426, "iter_tflops": 12.793544047614983, "iter_time": 1.6126175384521484, "loss": 0.62449049949646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.792114159922702, "step_time": 1.3064174499511718} +{"epoch": 0, "iter": 16427, "iter_tflops": 46.89804281466498, "iter_time": 0.43991374206542966, "loss": 0.6592842936515808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77841618939102, "step_time": 0.4062965145111084} +{"epoch": 0, "iter": 16428, "iter_tflops": 51.1819354829487, "iter_time": 0.40309326553344726, "loss": 0.684951901435852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.12289211581428, "step_time": 0.37427451133728024} +{"epoch": 0, "iter": 16429, "iter_tflops": 25.406741810979064, "iter_time": 0.8120322418212892, "loss": 0.001061873510479927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.700631832400525, "step_time": 0.7726818466186524} +{"epoch": 0, "iter": 16430, "iter_tflops": 21.366911639221748, "iter_time": 0.9655627288818358, "loss": 0.006436456926167011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.116427121903232, "step_time": 0.8554788570404053} +{"epoch": 0, "iter": 16431, "iter_tflops": 56.12688790304382, "iter_time": 0.3675795021057129, "loss": 0.004106543492525816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.9858772311909, "step_time": 0.33283538818359376} +{"epoch": 0, "iter": 16432, "iter_tflops": 59.78508541981713, "iter_time": 0.34508763122558594, "loss": 0.004762155003845692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.82790843886464, "step_time": 0.3134095249176026} +{"epoch": 0, "iter": 16433, "iter_tflops": 14.47474828362942, "iter_time": 0.6075146560668945, "loss": 0.004690001253038645, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 15.489224910941703, "step_time": 0.56772509765625} +{"epoch": 0, "iter": 16434, "iter_tflops": 9.29945325025178, "iter_time": 0.945606315612793, "loss": 0.013813210651278496, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 10.44509218960268, "step_time": 0.841890293121338} +{"epoch": 0, "iter": 16435, "iter_tflops": 23.223829439038308, "iter_time": 0.37864649963378905, "loss": 0.004710397683084011, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 25.64652600992165, "step_time": 0.34287769508361815} +{"epoch": 0, "iter": 16436, "iter_tflops": 23.490095575161003, "iter_time": 0.3743544464111328, "loss": 0.0018264559330418706, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 25.753964888303745, "step_time": 0.3414472980499268} +{"epoch": 0, "iter": 16437, "iter_tflops": 27.669265558268556, "iter_time": 0.7456321334838867, "loss": 0.46556738018989563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.231158821949176, "step_time": 0.7057911605834961} +{"epoch": 0, "iter": 16438, "iter_tflops": 16.19518014302832, "iter_time": 1.273903305053711, "loss": 0.49462181329727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.5040043343831, "step_time": 1.0061982612609865} +{"epoch": 0, "iter": 16439, "iter_tflops": 37.272109224269784, "iter_time": 0.5535263214111329, "loss": 0.49160492420196533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91753826953196, "step_time": 0.504211503982544} +{"epoch": 0, "iter": 16440, "iter_tflops": 40.83992169769439, "iter_time": 0.5051697616577148, "loss": 0.6022838354110718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.838881288995026, "step_time": 0.46011615180969234} +{"epoch": 0, "iter": 16441, "iter_tflops": 18.246272112398618, "iter_time": 1.1307018432617186, "loss": 0.5985606908798218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.3981157268404, "step_time": 1.0635617294311523} +{"epoch": 0, "iter": 16442, "iter_tflops": 18.811983123869805, "iter_time": 1.0966995544433593, "loss": 0.6258997917175293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.286539746534935, "step_time": 0.9692084178924562} +{"epoch": 0, "iter": 16443, "iter_tflops": 46.009557032344965, "iter_time": 0.4484088706970215, "loss": 0.6893492937088013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67766641028498, "step_time": 0.41529916763305663} +{"epoch": 0, "iter": 16444, "iter_tflops": 48.015936672144946, "iter_time": 0.42967179107666015, "loss": 0.5552416443824768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.80082774668801, "step_time": 0.39827729415893554} +{"epoch": 0, "iter": 16445, "iter_tflops": 31.37020710954038, "iter_time": 0.6576651992797852, "loss": 0.19961926341056824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.464134101300814, "step_time": 0.6165135917663573} +{"epoch": 0, "iter": 16446, "iter_tflops": 13.09869316408565, "iter_time": 1.575049758911133, "loss": 0.18891958892345428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.220273409724815, "step_time": 1.2719325370788572} +{"epoch": 0, "iter": 16447, "iter_tflops": 33.73509997611854, "iter_time": 0.611561653137207, "loss": 0.19588446617126465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.0409393280492, "step_time": 0.5423392238616944} +{"epoch": 0, "iter": 16448, "iter_tflops": 38.45003316919273, "iter_time": 0.5365689392089844, "loss": 0.1862410604953766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.180270017072615, "step_time": 0.48911715126037597} +{"epoch": 0, "iter": 16449, "iter_tflops": 18.067995260968093, "iter_time": 1.1418584747314453, "loss": 0.5281868577003479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.416031553978776, "step_time": 1.0625803451538087} +{"epoch": 0, "iter": 16450, "iter_tflops": 14.632285558104176, "iter_time": 1.4099706726074217, "loss": 0.5028283596038818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.612853044372066, "step_time": 1.1713657894134522} +{"epoch": 0, "iter": 16451, "iter_tflops": 36.376088394540254, "iter_time": 0.5671608581542968, "loss": 0.47207656502723694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18624635560494, "step_time": 0.5133869266510009} +{"epoch": 0, "iter": 16452, "iter_tflops": 37.20098506611991, "iter_time": 0.5545846023559571, "loss": 0.37895530462265015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56763008401403, "step_time": 0.5085604820251465} +{"epoch": 0, "iter": 16453, "iter_tflops": 31.847002793016046, "iter_time": 0.6478190002441406, "loss": 0.11220495402812958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.247551795305014, "step_time": 0.5853199005126954} +{"epoch": 0, "iter": 16454, "iter_tflops": 38.04297070762702, "iter_time": 0.5423102645874024, "loss": 0.19643515348434448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68726757656474, "step_time": 0.49490155410766606} +{"epoch": 0, "iter": 16455, "iter_tflops": 37.58849442229479, "iter_time": 0.5488672485351563, "loss": 0.11512286216020584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.04470725235852, "step_time": 0.5026493034362792} +{"epoch": 0, "iter": 16456, "iter_tflops": 40.984817813041985, "iter_time": 0.503383804321289, "loss": 0.14241854846477509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.943606642692345, "step_time": 0.45904401206970213} +{"epoch": 0, "iter": 16457, "iter_tflops": 19.86725053580458, "iter_time": 1.0384473419189453, "loss": 0.2530989646911621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.447592807507014, "step_time": 0.961930492401123} +{"epoch": 0, "iter": 16458, "iter_tflops": 28.504803436988407, "iter_time": 0.7237760314941406, "loss": 0.20485927164554596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.59572376387571, "step_time": 0.5795947189331055} +{"epoch": 0, "iter": 16459, "iter_tflops": 44.990480655504705, "iter_time": 0.4585657501220703, "loss": 0.3307439684867859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76779457010311, "step_time": 0.4230474987030029} +{"epoch": 0, "iter": 16460, "iter_tflops": 48.401656631368915, "iter_time": 0.42624767303466804, "loss": 0.31976455450057983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65174038275911, "step_time": 0.3918406753540039} +{"epoch": 0, "iter": 16461, "iter_tflops": 12.495508194497296, "iter_time": 1.1995874938964843, "loss": 0.18837305903434753, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.967655311325634, "step_time": 1.1559109954833986} +{"epoch": 0, "iter": 16462, "iter_tflops": 11.651661346183875, "iter_time": 1.286465072631836, "loss": 0.07840345054864883, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 15.11246853870141, "step_time": 0.9918601531982422} +{"epoch": 0, "iter": 16463, "iter_tflops": 29.411114118896585, "iter_time": 0.5096527557373047, "loss": 0.1863003373146057, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 32.175403794252894, "step_time": 0.4658668918609619} +{"epoch": 0, "iter": 16464, "iter_tflops": 29.301533406739413, "iter_time": 0.5115587348937988, "loss": 0.21417485177516937, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 32.20880067931378, "step_time": 0.4653838405609131} +{"epoch": 0, "iter": 16465, "iter_tflops": 29.461519249904008, "iter_time": 0.7002725601196289, "loss": 0.509796142578125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.560881877190333, "step_time": 0.6536919212341308} +{"epoch": 0, "iter": 16466, "iter_tflops": 12.340500016668715, "iter_time": 1.6718199005126955, "loss": 0.4345422387123108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.067455611935115, "step_time": 1.4665831604003905} +{"epoch": 0, "iter": 16467, "iter_tflops": 17.00160902588607, "iter_time": 1.2134788818359374, "loss": 0.45866337418556213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.460036556042745, "step_time": 1.117608486175537} +{"epoch": 0, "iter": 16468, "iter_tflops": 34.29508955048086, "iter_time": 0.6015757293701172, "loss": 0.5332930684089661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.99572453837503, "step_time": 0.5429846057891845} +{"epoch": 0, "iter": 16469, "iter_tflops": 12.681351687397136, "iter_time": 1.1948978271484374, "loss": 0.3403773009777069, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 13.508480834592184, "step_time": 1.1217338027954102} +{"epoch": 0, "iter": 16470, "iter_tflops": 13.128588958507754, "iter_time": 1.1541925506591797, "loss": 0.2866397500038147, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 17.010298219445712, "step_time": 0.8908085784912108} +{"epoch": 0, "iter": 16471, "iter_tflops": 22.21729165814567, "iter_time": 0.6820327072143555, "loss": 0.3910697102546692, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.95626416265416, "step_time": 0.632524314880371} +{"epoch": 0, "iter": 16472, "iter_tflops": 22.953917509823636, "iter_time": 0.660145248413086, "loss": 0.3048371374607086, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.686238426718923, "step_time": 0.613820514678955} +{"epoch": 0, "iter": 16473, "iter_tflops": 21.612731534871656, "iter_time": 0.9545805664062501, "loss": 0.3215077817440033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.033592528054836, "step_time": 0.8956958618164061} +{"epoch": 0, "iter": 16474, "iter_tflops": 9.611345979355656, "iter_time": 2.1465353088378905, "loss": 0.2132750302553177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.575328898705502, "step_time": 1.7823332443237303} +{"epoch": 0, "iter": 16475, "iter_tflops": 11.930236637540814, "iter_time": 1.729311340332031, "loss": 0.33050137758255005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.991386848706185, "step_time": 1.4745567207336427} +{"epoch": 0, "iter": 16476, "iter_tflops": 48.420775637069546, "iter_time": 0.42607936859130857, "loss": 0.26132506132125854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.61851862571113, "step_time": 0.3920880718231201} +{"epoch": 0, "iter": 16477, "iter_tflops": 18.170992955131418, "iter_time": 0.8699321746826173, "loss": 0.40957576036453247, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 19.122644980525955, "step_time": 0.8266393814086914} +{"epoch": 0, "iter": 16478, "iter_tflops": 10.56415135915173, "iter_time": 1.4963370819091797, "loss": 0.4449305236339569, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.933299214698108, "step_time": 1.222235034942627} +{"epoch": 0, "iter": 16479, "iter_tflops": 28.29647089724739, "iter_time": 0.5586396789550782, "loss": 0.3241026997566223, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.200581970718222, "step_time": 0.5234181060791016} +{"epoch": 0, "iter": 16480, "iter_tflops": 26.790139013644694, "iter_time": 0.5900503692626954, "loss": 0.467780739068985, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.385317163805524, "step_time": 0.5568911323547364} +{"epoch": 0, "iter": 16481, "iter_tflops": 28.37398465799971, "iter_time": 0.7271130142211913, "loss": 0.5122230052947998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.12483715759025, "step_time": 0.6848532791137695} +{"epoch": 0, "iter": 16482, "iter_tflops": 23.117608028800692, "iter_time": 0.8924406661987304, "loss": 0.4690655469894409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.528281429725347, "step_time": 0.7231803836822509} +{"epoch": 0, "iter": 16483, "iter_tflops": 39.38308802092394, "iter_time": 0.5238566741943359, "loss": 0.5567221641540527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93411970606009, "step_time": 0.48052909088134765} +{"epoch": 0, "iter": 16484, "iter_tflops": 42.07465132366367, "iter_time": 0.49034496688842777, "loss": 0.5741760730743408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74398493544091, "step_time": 0.45101216125488286} +{"epoch": 0, "iter": 16485, "iter_tflops": 20.561010038678326, "iter_time": 1.003408561706543, "loss": 0.17884229123592377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.207410118434314, "step_time": 0.929018440246582} +{"epoch": 0, "iter": 16486, "iter_tflops": 19.470872984991598, "iter_time": 1.0595874938964844, "loss": 0.15936556458473206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.905668486283222, "step_time": 0.8630209827423097} +{"epoch": 0, "iter": 16487, "iter_tflops": 45.29365586031545, "iter_time": 0.4554963188171387, "loss": 0.16044482588768005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11181707335049, "step_time": 0.4200841007232666} +{"epoch": 0, "iter": 16488, "iter_tflops": 49.17822602800549, "iter_time": 0.41951683044433596, "loss": 0.2011440545320511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.385416237775104, "step_time": 0.3864556083679199} +{"epoch": 0, "iter": 16489, "iter_tflops": 26.576459511013148, "iter_time": 0.7762920227050781, "loss": 0.4615500867366791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.000648290472235, "step_time": 0.7368077087402344} +{"epoch": 0, "iter": 16490, "iter_tflops": 16.48723576021808, "iter_time": 1.2513373260498044, "loss": 0.507807731628418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.386513356555007, "step_time": 1.0641982460021973} +{"epoch": 0, "iter": 16491, "iter_tflops": 46.962416173886396, "iter_time": 0.43931073379516605, "loss": 0.5262103080749512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86117647743357, "step_time": 0.4056353969573974} +{"epoch": 0, "iter": 16492, "iter_tflops": 45.82259095806078, "iter_time": 0.4502384757995605, "loss": 0.2378283143043518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60351777926042, "step_time": 0.41591996765136713} +{"epoch": 0, "iter": 16493, "iter_tflops": 31.62446561048314, "iter_time": 0.6523776168823243, "loss": 0.0022688868921250105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.67049904560315, "step_time": 0.612735008239746} +{"epoch": 0, "iter": 16494, "iter_tflops": 11.44733395199041, "iter_time": 1.8022618713378906, "loss": 0.011336339637637138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.60642406096919, "step_time": 1.5162759456634523} +{"epoch": 0, "iter": 16495, "iter_tflops": 46.758419640897316, "iter_time": 0.4412273483276367, "loss": 0.0014699301682412624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.979029394753475, "step_time": 0.396911865234375} +{"epoch": 0, "iter": 16496, "iter_tflops": 49.49177163504199, "iter_time": 0.41685906219482427, "loss": 0.0023265453055500984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.697600117997396, "step_time": 0.37718461990356444} +{"epoch": 0, "iter": 16497, "iter_tflops": 34.66053546874131, "iter_time": 0.5952329711914063, "loss": 0.1610419601202011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.582152967127605, "step_time": 0.5347315254211427} +{"epoch": 0, "iter": 16498, "iter_tflops": 37.62755920829186, "iter_time": 0.5482974166870117, "loss": 0.13496792316436768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.864188992339, "step_time": 0.49281006050109855} +{"epoch": 0, "iter": 16499, "iter_tflops": 41.76032268706958, "iter_time": 0.4940357780456543, "loss": 0.10235026478767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43591608720041, "step_time": 0.4540701560974121} +{"epoch": 0, "iter": 16500, "iter_tflops": 43.26972263009776, "iter_time": 0.47680207443237305, "loss": 0.1194075420498848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48724568242368, "step_time": 0.4344554672241211} +{"epoch": 0, "iter": 16501, "iter_tflops": 28.219127034946084, "iter_time": 0.7311031799316405, "loss": 0.13450421392917633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.36971137115085, "step_time": 0.6576755924224853} +{"epoch": 0, "iter": 16502, "iter_tflops": 47.72259636564686, "iter_time": 0.4323128890991211, "loss": 0.11868032813072205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.138342681081525, "step_time": 0.39569906616210937} +{"epoch": 0, "iter": 16503, "iter_tflops": 51.04818798414476, "iter_time": 0.4041493797302246, "loss": 0.15578912198543549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.23706190906142, "step_time": 0.37350092124938966} +{"epoch": 0, "iter": 16504, "iter_tflops": 53.72922551336331, "iter_time": 0.38398270797729495, "loss": 0.17139579355716705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.40520445711813, "step_time": 0.35324066925048825} +{"epoch": 0, "iter": 16505, "iter_tflops": 45.41517719174711, "iter_time": 0.4542775077819824, "loss": 0.1948823630809784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98352458493989, "step_time": 0.41275787734985353} +{"epoch": 0, "iter": 16506, "iter_tflops": 47.352540489152375, "iter_time": 0.4356913757324219, "loss": 0.30369362235069275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.58402499605992, "step_time": 0.3999512157440186} +{"epoch": 0, "iter": 16507, "iter_tflops": 50.15776349940855, "iter_time": 0.41132403182983396, "loss": 0.378572553396225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.353243897744804, "step_time": 0.3795742816925049} +{"epoch": 0, "iter": 16508, "iter_tflops": 51.66145473924232, "iter_time": 0.3993517723083496, "loss": 0.2855308949947357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.09782796960476, "step_time": 0.3677699165344238} +{"epoch": 0, "iter": 16509, "iter_tflops": 27.480933640433673, "iter_time": 0.750742088317871, "loss": 0.09254639595746994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.092101787479898, "step_time": 0.709164764404297} +{"epoch": 0, "iter": 16510, "iter_tflops": 17.197875066229848, "iter_time": 1.199630386352539, "loss": 0.08289854228496552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.012206632113386, "step_time": 0.981862298965454} +{"epoch": 0, "iter": 16511, "iter_tflops": 41.17063424956109, "iter_time": 0.5011118698120117, "loss": 0.07211881875991821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41709572511011, "step_time": 0.4542583179473877} +{"epoch": 0, "iter": 16512, "iter_tflops": 40.16437268226026, "iter_time": 0.513666519165039, "loss": 0.06999190896749496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42619290952165, "step_time": 0.46439031028747557} +{"epoch": 0, "iter": 16513, "iter_tflops": 25.939070804410335, "iter_time": 0.7953674850463869, "loss": 0.29905930161476135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.17102819949872, "step_time": 0.7323514556884765} +{"epoch": 0, "iter": 16514, "iter_tflops": 39.256017403586846, "iter_time": 0.5255523834228516, "loss": 0.4657100737094879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79437498359157, "step_time": 0.48209825515747073} +{"epoch": 0, "iter": 16515, "iter_tflops": 39.42708989166228, "iter_time": 0.5232720336914063, "loss": 0.5083808302879333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.302428416076204, "step_time": 0.47644195175170895} +{"epoch": 0, "iter": 16516, "iter_tflops": 39.5470914098959, "iter_time": 0.5216842193603516, "loss": 0.44669798016548157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36917685789083, "step_time": 0.4757086715698242} +{"epoch": 0, "iter": 16517, "iter_tflops": 20.098400636986742, "iter_time": 1.0265042419433594, "loss": 0.002710955450311303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.387433958697386, "step_time": 0.9646362228393555} +{"epoch": 0, "iter": 16518, "iter_tflops": 16.517565427842115, "iter_time": 1.2490396118164062, "loss": 0.004247528966516256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.915545054448973, "step_time": 1.0906951637268065} +{"epoch": 0, "iter": 16519, "iter_tflops": 53.16267533841607, "iter_time": 0.388074779510498, "loss": 0.001462303102016449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.34527039326748, "step_time": 0.35360352897644043} +{"epoch": 0, "iter": 16520, "iter_tflops": 58.61314838505642, "iter_time": 0.3519874649047851, "loss": 0.007820774801075459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.26964020439934, "step_time": 0.3210083866119385} +{"epoch": 0, "iter": 16521, "iter_tflops": 43.228393590826684, "iter_time": 0.47725792694091795, "loss": 0.821469247341156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.96832582345821, "step_time": 0.43925545883178707} +{"epoch": 0, "iter": 16522, "iter_tflops": 43.49904454199369, "iter_time": 0.4742884292602539, "loss": 0.5964888334274292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.265836221884044, "step_time": 0.43649060630798336} +{"epoch": 0, "iter": 16523, "iter_tflops": 46.969622556295974, "iter_time": 0.43924333190917975, "loss": 0.634497344493866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75135945696521, "step_time": 0.4065131206512451} +{"epoch": 0, "iter": 16524, "iter_tflops": 44.09665645817109, "iter_time": 0.46786072158813474, "loss": 0.4233987331390381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50037638522104, "step_time": 0.4343353691101074} +{"epoch": 0, "iter": 16525, "iter_tflops": 36.25648170347191, "iter_time": 0.569031867980957, "loss": 0.44621652364730835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.99596458802558, "step_time": 0.5290571403503418} +{"epoch": 0, "iter": 16526, "iter_tflops": 25.935813975487335, "iter_time": 0.7954673614501953, "loss": 0.5568154454231262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.75044970093338, "step_time": 0.5770862655639648} +{"epoch": 0, "iter": 16527, "iter_tflops": 36.175861742493225, "iter_time": 0.5702999877929686, "loss": 0.37024593353271484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66046283378359, "step_time": 0.5201929588317871} +{"epoch": 0, "iter": 16528, "iter_tflops": 40.3200518774342, "iter_time": 0.5116832084655761, "loss": 0.46678563952445984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.743909653964984, "step_time": 0.47163350677490234} +{"epoch": 0, "iter": 16529, "iter_tflops": 17.013571456516313, "iter_time": 1.2126256713867187, "loss": 0.20951992273330688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.347473596397478, "step_time": 1.1244650878906248} +{"epoch": 0, "iter": 16530, "iter_tflops": 17.445392729011505, "iter_time": 1.18260986328125, "loss": 0.296142041683197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.36348976806889, "step_time": 0.8134169902801512} +{"epoch": 0, "iter": 16531, "iter_tflops": 43.24269603221806, "iter_time": 0.47710007476806643, "loss": 0.3412910997867584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.470397614436436, "step_time": 0.4346096630096436} +{"epoch": 0, "iter": 16532, "iter_tflops": 38.35984364857414, "iter_time": 0.5378304901123047, "loss": 0.43263378739356995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.010216926213275, "step_time": 0.49109704780578617} +{"epoch": 0, "iter": 16533, "iter_tflops": 29.55745380496926, "iter_time": 0.6979996871948242, "loss": 0.04342957213521004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.300663890333446, "step_time": 0.6387204170227051} +{"epoch": 0, "iter": 16534, "iter_tflops": 44.10505443232061, "iter_time": 0.4677716369628907, "loss": 0.04320681840181351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1030647210852, "step_time": 0.4117730846405029} +{"epoch": 0, "iter": 16535, "iter_tflops": 51.67304330262395, "iter_time": 0.39926221084594726, "loss": 0.027922354638576508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.419736801758354, "step_time": 0.3656715660095215} +{"epoch": 0, "iter": 16536, "iter_tflops": 53.88783038608236, "iter_time": 0.38285255432128906, "loss": 0.04154975339770317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.61680020640823, "step_time": 0.35196553611755377} +{"epoch": 0, "iter": 16537, "iter_tflops": 30.762074623658172, "iter_time": 0.6706665191650392, "loss": 0.4586237668991089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.168709604291145, "step_time": 0.6220047073364258} +{"epoch": 0, "iter": 16538, "iter_tflops": 16.095880489015073, "iter_time": 1.2817623443603519, "loss": 0.4605181813240051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.489982807280896, "step_time": 1.0068868141174316} +{"epoch": 0, "iter": 16539, "iter_tflops": 48.32703877044348, "iter_time": 0.4269058074951172, "loss": 0.6648609638214111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41667800936097, "step_time": 0.39359788322448724} +{"epoch": 0, "iter": 16540, "iter_tflops": 49.25318931315131, "iter_time": 0.4188783264160157, "loss": 0.6303028464317322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.1454011132472, "step_time": 0.3882009181976318} +{"epoch": 0, "iter": 16541, "iter_tflops": 31.259327264327293, "iter_time": 0.6599980010986328, "loss": 0.0330909863114357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.33312261266639, "step_time": 0.6189367179870605} +{"epoch": 0, "iter": 16542, "iter_tflops": 11.940503084110105, "iter_time": 1.7278244781494139, "loss": 0.04728458821773529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.970173804783613, "step_time": 1.4767957649230956} +{"epoch": 0, "iter": 16543, "iter_tflops": 19.210581844584713, "iter_time": 1.0739442291259764, "loss": 0.06631769984960556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.691806187151546, "step_time": 0.9091869258880616} +{"epoch": 0, "iter": 16544, "iter_tflops": 41.25384132588649, "iter_time": 0.5001011505126953, "loss": 0.11634533107280731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41809691263613, "step_time": 0.45424830436706537} +{"epoch": 0, "iter": 16545, "iter_tflops": 17.837023531165226, "iter_time": 0.9990340881347657, "loss": 0.2875593900680542, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 19.22208511168071, "step_time": 0.9270479469299316} +{"epoch": 0, "iter": 16546, "iter_tflops": 26.421131784061465, "iter_time": 0.6744523544311524, "loss": 0.4311358630657196, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 28.460770535383418, "step_time": 0.6261177825927735} +{"epoch": 0, "iter": 16547, "iter_tflops": 27.71150691191574, "iter_time": 0.6430467529296875, "loss": 0.49318256974220276, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 29.750045409816337, "step_time": 0.5989837760925293} +{"epoch": 0, "iter": 16548, "iter_tflops": 26.8425989812114, "iter_time": 0.6638624877929687, "loss": 0.224850594997406, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 28.919551851316907, "step_time": 0.61618501663208} +{"epoch": 0, "iter": 16549, "iter_tflops": 18.281667926916263, "iter_time": 1.1285126495361326, "loss": 0.7261288166046143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.58975568177405, "step_time": 1.0531572647094727} +{"epoch": 0, "iter": 16550, "iter_tflops": 18.430444548143203, "iter_time": 1.1194029235839844, "loss": 0.5764641761779785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.175682625179036, "step_time": 0.9303476181030275} +{"epoch": 0, "iter": 16551, "iter_tflops": 41.54353582434377, "iter_time": 0.4966138076782226, "loss": 0.7319439053535461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.783875474506424, "step_time": 0.4606812896728516} +{"epoch": 0, "iter": 16552, "iter_tflops": 44.98774120326821, "iter_time": 0.45859367370605464, "loss": 0.6200736165046692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.25359323633878, "step_time": 0.42755558967590335} +{"epoch": 0, "iter": 16553, "iter_tflops": 25.08503865774269, "iter_time": 0.8224461517333984, "loss": 0.042930010706186295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.399766584322208, "step_time": 0.7814877243041993} +{"epoch": 0, "iter": 16554, "iter_tflops": 17.39111539130277, "iter_time": 1.1863007659912108, "loss": 0.025227194651961327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.78812674246231, "step_time": 0.9053439865112305} +{"epoch": 0, "iter": 16555, "iter_tflops": 58.48463752502339, "iter_time": 0.35276090240478514, "loss": 0.0475047305226326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.72782359126077, "step_time": 0.31873609161376953} +{"epoch": 0, "iter": 16556, "iter_tflops": 50.56826424103084, "iter_time": 0.4079850044250489, "loss": 0.02907293289899826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69401208600183, "step_time": 0.3772093639373779} +{"epoch": 0, "iter": 16557, "iter_tflops": 2.686236430998498, "iter_time": 0.49574574279785155, "loss": 0.7259613275527954, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 2.9152715116774037, "step_time": 0.45679802703857425} +{"epoch": 0, "iter": 16558, "iter_tflops": 3.0190813937903735, "iter_time": 0.44109121322631833, "loss": 0.6327593922615051, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.3347464948070336, "step_time": 0.39933778381347657} +{"epoch": 0, "iter": 16559, "iter_tflops": 3.125221764818426, "iter_time": 0.4261106491088867, "loss": 0.6152473092079163, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.381312375098395, "step_time": 0.3938382873535156} +{"epoch": 0, "iter": 16560, "iter_tflops": 3.2232258824854054, "iter_time": 0.41315449905395507, "loss": 0.32591530680656433, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.4668418956813616, "step_time": 0.38412200927734375} +{"epoch": 0, "iter": 16561, "iter_tflops": 27.650386458567954, "iter_time": 0.7461412353515625, "loss": 0.645581841468811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.252305918517028, "step_time": 0.7052809295654296} +{"epoch": 0, "iter": 16562, "iter_tflops": 16.965967003567297, "iter_time": 1.2160281524658203, "loss": 0.6553512215614319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.854860811716428, "step_time": 1.0942055587768555} +{"epoch": 0, "iter": 16563, "iter_tflops": 35.51209285291595, "iter_time": 0.5809596633911133, "loss": 0.4241970479488373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57366892436241, "step_time": 0.5348491363525391} +{"epoch": 0, "iter": 16564, "iter_tflops": 38.08882839458059, "iter_time": 0.541657341003418, "loss": 0.5635390281677246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27755288789729, "step_time": 0.49981387138366695} +{"epoch": 0, "iter": 16565, "iter_tflops": 15.066959559352929, "iter_time": 1.3692937469482422, "loss": 0.38884061574935913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.0377658715256, "step_time": 1.2864069519042969} +{"epoch": 0, "iter": 16566, "iter_tflops": 17.204579950282163, "iter_time": 1.199162872314453, "loss": 0.4573695957660675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.89806200712942, "step_time": 0.9009973640441895} +{"epoch": 0, "iter": 16567, "iter_tflops": 48.47492196625826, "iter_time": 0.4256034393310547, "loss": 0.3833365738391876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50698343765174, "step_time": 0.3929209442138672} +{"epoch": 0, "iter": 16568, "iter_tflops": 47.6319584738798, "iter_time": 0.43313552856445314, "loss": 0.33533501625061035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21554425858709, "step_time": 0.4028287467956544} +{"epoch": 0, "iter": 16569, "iter_tflops": 36.09513313272427, "iter_time": 0.5715754928588868, "loss": 0.1619826853275299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76056392453245, "step_time": 0.5322702102661132} +{"epoch": 0, "iter": 16570, "iter_tflops": 9.399741615994103, "iter_time": 2.1948575134277344, "loss": 0.21249979734420776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.971587404777395, "step_time": 1.880410987854004} +{"epoch": 0, "iter": 16571, "iter_tflops": 12.308503794718419, "iter_time": 1.6761658325195312, "loss": 0.10453867167234421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.183170034667294, "step_time": 1.4546179351806643} +{"epoch": 0, "iter": 16572, "iter_tflops": 20.938871372028586, "iter_time": 0.9853011245727539, "loss": 0.16617931425571442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.410306350869327, "step_time": 0.7811758499145509} +{"epoch": 0, "iter": 16573, "iter_tflops": 11.649941510252265, "iter_time": 1.3147241363525393, "loss": 0.44534391164779663, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 12.3947565708304, "step_time": 1.2357208633422854} +{"epoch": 0, "iter": 16574, "iter_tflops": 15.046309096920739, "iter_time": 1.0179545822143554, "loss": 0.45437175035476685, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 17.87013541199887, "step_time": 0.8570981101989746} +{"epoch": 0, "iter": 16575, "iter_tflops": 28.058654354848798, "iter_time": 0.5458729095458984, "loss": 0.3377187252044678, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.835329527864012, "step_time": 0.5133665199279785} +{"epoch": 0, "iter": 16576, "iter_tflops": 28.23287598224557, "iter_time": 0.5425043945312501, "loss": 0.2579709589481354, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.991735179943365, "step_time": 0.5106893348693848} +{"epoch": 0, "iter": 16577, "iter_tflops": 24.16801476380276, "iter_time": 0.8536528015136718, "loss": 0.2486475557088852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.41774262578914, "step_time": 0.8116807937622069} +{"epoch": 0, "iter": 16578, "iter_tflops": 22.145541593958882, "iter_time": 0.9316138610839844, "loss": 0.24744853377342224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.908685688657176, "step_time": 0.7963002738952637} +{"epoch": 0, "iter": 16579, "iter_tflops": 51.867054291867625, "iter_time": 0.3977687530517578, "loss": 0.2551191449165344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.60120161572706, "step_time": 0.36449921417236325} +{"epoch": 0, "iter": 16580, "iter_tflops": 47.520799846248224, "iter_time": 0.4341487007141113, "loss": 0.15434636175632477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30995388582819, "step_time": 0.4020875473022461} +{"epoch": 0, "iter": 16581, "iter_tflops": 23.407395870155437, "iter_time": 0.4237653045654297, "loss": 0.03113144263625145, "lr": 3e-05, "seqlen": 4000.0, "step_tflops": 25.799754925406862, "step_time": 0.38447040557861323} +{"epoch": 0, "iter": 16582, "iter_tflops": 23.937495942095467, "iter_time": 0.4143809471130371, "loss": 0.03416857123374939, "lr": 3e-05, "seqlen": 4000.0, "step_tflops": 26.736288391138782, "step_time": 0.37100296401977545} +{"epoch": 0, "iter": 16583, "iter_tflops": 27.759687859784908, "iter_time": 0.357325424194336, "loss": 0.03927934542298317, "lr": 3e-05, "seqlen": 4000.0, "step_tflops": 30.368196289210033, "step_time": 0.3266325778961181} +{"epoch": 0, "iter": 16584, "iter_tflops": 26.430199184680035, "iter_time": 0.3752995643615723, "loss": 0.03419584780931473, "lr": 3e-05, "seqlen": 4000.0, "step_tflops": 28.889203921188695, "step_time": 0.3433546409606934} +{"epoch": 0, "iter": 16585, "iter_tflops": 22.9000527971196, "iter_time": 0.9009190368652344, "loss": 0.11977220326662064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.982490905050753, "step_time": 0.8602564926147461} +{"epoch": 0, "iter": 16586, "iter_tflops": 12.267157089600126, "iter_time": 1.6818153839111327, "loss": 0.11865534633398056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.466081452047836, "step_time": 1.4261701469421386} +{"epoch": 0, "iter": 16587, "iter_tflops": 38.42859366040726, "iter_time": 0.536868293762207, "loss": 0.12460733950138092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.142064008723324, "step_time": 0.4895605850219727} +{"epoch": 0, "iter": 16588, "iter_tflops": 40.887332299657125, "iter_time": 0.5045839958190919, "loss": 0.1632416844367981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89493058664196, "step_time": 0.45954171752929684} +{"epoch": 0, "iter": 16589, "iter_tflops": 34.58467987262119, "iter_time": 0.5965385131835939, "loss": 0.11007649451494217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.38382369734126, "step_time": 0.5374944839477539} +{"epoch": 0, "iter": 16590, "iter_tflops": 38.35955144033287, "iter_time": 0.537834587097168, "loss": 0.07296445965766907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.238426084160416, "step_time": 0.4771471900939942} +{"epoch": 0, "iter": 16591, "iter_tflops": 42.0740195963276, "iter_time": 0.4903523292541504, "loss": 0.15855243802070618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.38641865743704, "step_time": 0.4447658195495605} +{"epoch": 0, "iter": 16592, "iter_tflops": 43.96556369347554, "iter_time": 0.4692557487487793, "loss": 0.07205870002508163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.876992254927394, "step_time": 0.43091874694824217} +{"epoch": 0, "iter": 16593, "iter_tflops": 25.592303927434738, "iter_time": 0.8061444396972657, "loss": 0.026772940531373024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.627296766112735, "step_time": 0.7467648277282715} +{"epoch": 0, "iter": 16594, "iter_tflops": 23.774283324451282, "iter_time": 0.8677903442382813, "loss": 0.013602462597191334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.071866303541967, "step_time": 0.6860596313476562} +{"epoch": 0, "iter": 16595, "iter_tflops": 43.1775590487184, "iter_time": 0.47781982040405274, "loss": 0.014998096972703934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86808077203702, "step_time": 0.43099897003173826} +{"epoch": 0, "iter": 16596, "iter_tflops": 39.08837114267967, "iter_time": 0.5278064270019531, "loss": 0.059606339782476425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05475525915108, "step_time": 0.47918269157409665} +{"epoch": 0, "iter": 16597, "iter_tflops": 16.744965978142982, "iter_time": 1.2320773620605467, "loss": 0.0042840163223445415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.818725316223095, "step_time": 1.1578321762084962} +{"epoch": 0, "iter": 16598, "iter_tflops": 20.252302886943117, "iter_time": 1.018703582763672, "loss": 0.006911635864526033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.970483022683677, "step_time": 0.8262192401885986} +{"epoch": 0, "iter": 16599, "iter_tflops": 56.5415287650242, "iter_time": 0.3648838996887207, "loss": 0.03948165848851204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.3181682281075, "step_time": 0.3310606536865235} +{"epoch": 0, "iter": 16600, "iter_tflops": 55.30582913797611, "iter_time": 0.3730365104675292, "loss": 0.016805533319711685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.8290596064123, "step_time": 0.3391650905609131} +{"epoch": 0, "iter": 16601, "iter_tflops": 19.469663720907544, "iter_time": 1.059653305053711, "loss": 0.4886608123779297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.243807215683777, "step_time": 0.9711580085754395} +{"epoch": 0, "iter": 16602, "iter_tflops": 23.56778966690632, "iter_time": 0.8753936538696289, "loss": 0.4676075279712677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.75339810010419, "step_time": 0.6708557357788085} +{"epoch": 0, "iter": 16603, "iter_tflops": 43.792053822613376, "iter_time": 0.4711150016784668, "loss": 0.6468489766120911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08627030673826, "step_time": 0.43815518569946293} +{"epoch": 0, "iter": 16604, "iter_tflops": 44.852297000909964, "iter_time": 0.4599785270690918, "loss": 0.7730520367622375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.379952210543486, "step_time": 0.42643889808654784} +{"epoch": 0, "iter": 16605, "iter_tflops": 30.258616075188463, "iter_time": 0.6818254165649414, "loss": 0.056127823889255524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.29464825795341, "step_time": 0.6388393936157226} +{"epoch": 0, "iter": 16606, "iter_tflops": 40.358693232375586, "iter_time": 0.5111932983398438, "loss": 0.10157904028892517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.796106925951705, "step_time": 0.4605555019378661} +{"epoch": 0, "iter": 16607, "iter_tflops": 43.46666056542861, "iter_time": 0.474641788482666, "loss": 0.05616149678826332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69759819136515, "step_time": 0.43253946304321295} +{"epoch": 0, "iter": 16608, "iter_tflops": 45.899683803202414, "iter_time": 0.44948225784301765, "loss": 0.055896658450365067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.419513204610595, "step_time": 0.4091886692047119} +{"epoch": 0, "iter": 16609, "iter_tflops": 15.83117359031964, "iter_time": 1.1100115966796875, "loss": 0.005812604445964098, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 17.025242086680617, "step_time": 1.0321607284545897} +{"epoch": 0, "iter": 16610, "iter_tflops": 16.568863236587244, "iter_time": 1.0605909423828126, "loss": 0.0013324107276275754, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 21.15393322915996, "step_time": 0.8307101135253907} +{"epoch": 0, "iter": 16611, "iter_tflops": 48.45278772043656, "iter_time": 0.36267853927612304, "loss": 0.005670292768627405, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 53.35495241924016, "step_time": 0.32935623550415044} +{"epoch": 0, "iter": 16612, "iter_tflops": 47.81933123501531, "iter_time": 0.36748289489746094, "loss": 0.0021119234152138233, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 52.2062369752221, "step_time": 0.3366031970977783} +{"epoch": 0, "iter": 16613, "iter_tflops": 39.14757239942904, "iter_time": 0.4436309776306152, "loss": 0.004770569037646055, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 42.92478538274015, "step_time": 0.40459318923950194} +{"epoch": 0, "iter": 16614, "iter_tflops": 31.839962847308602, "iter_time": 0.5454489974975587, "loss": 0.014861205592751503, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 35.237123992296645, "step_time": 0.4928630332946778} +{"epoch": 0, "iter": 16615, "iter_tflops": 37.323422491982406, "iter_time": 0.4653130569458008, "loss": 0.006714839022606611, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 41.397084408248176, "step_time": 0.4195241298675537} +{"epoch": 0, "iter": 16616, "iter_tflops": 36.820749740837215, "iter_time": 0.47166545867919923, "loss": 0.0048594726249575615, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 40.728377790168196, "step_time": 0.42641216659545894} +{"epoch": 0, "iter": 16617, "iter_tflops": 20.19375810796662, "iter_time": 1.0216569595336915, "loss": 0.17855489253997803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.74417051859863, "step_time": 0.9488103256225587} +{"epoch": 0, "iter": 16618, "iter_tflops": 27.94641581546985, "iter_time": 0.738237548828125, "loss": 0.2318791300058365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.95393670500137, "step_time": 0.6076200733184814} +{"epoch": 0, "iter": 16619, "iter_tflops": 44.75814872327211, "iter_time": 0.4609460868835449, "loss": 0.16361622512340546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32275512749412, "step_time": 0.42694365119934086} +{"epoch": 0, "iter": 16620, "iter_tflops": 47.26738634954457, "iter_time": 0.43647629165649415, "loss": 0.17665839195251465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.25930749188289, "step_time": 0.40248482704162597} +{"epoch": 0, "iter": 16621, "iter_tflops": 26.598585057608826, "iter_time": 0.7756462783813476, "loss": 0.18614087998867035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.99485973065489, "step_time": 0.736960060119629} +{"epoch": 0, "iter": 16622, "iter_tflops": 20.62682248382099, "iter_time": 1.000207061767578, "loss": 0.11004158109426498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.242864013034183, "step_time": 0.817303991317749} +{"epoch": 0, "iter": 16623, "iter_tflops": 47.437870930875086, "iter_time": 0.4349076614379883, "loss": 0.1511545181274414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.561890645680215, "step_time": 0.4001229057312012} +{"epoch": 0, "iter": 16624, "iter_tflops": 48.93698712093709, "iter_time": 0.42158487319946286, "loss": 0.18583349883556366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.07729272743118, "step_time": 0.3886990547180176} +{"epoch": 0, "iter": 16625, "iter_tflops": 25.401682938977306, "iter_time": 0.8121939620971679, "loss": 0.7563718557357788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.658573824971747, "step_time": 0.7739008712768555} +{"epoch": 0, "iter": 16626, "iter_tflops": 12.779823010790988, "iter_time": 1.6143489227294923, "loss": 0.8008362650871277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.020613151690593, "step_time": 1.2877842636108399} +{"epoch": 0, "iter": 16627, "iter_tflops": 38.07279565200392, "iter_time": 0.5418854370117188, "loss": 0.6184598803520203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86991364770094, "step_time": 0.43098246765136716} +{"epoch": 0, "iter": 16628, "iter_tflops": 47.09826796637478, "iter_time": 0.43804357147216794, "loss": 0.7072964310646057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.640544244194515, "step_time": 0.407402681350708} +{"epoch": 0, "iter": 16629, "iter_tflops": 25.021379241479536, "iter_time": 0.8245386199951171, "loss": 0.445666640996933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.508911515834473, "step_time": 0.7782701110839844} +{"epoch": 0, "iter": 16630, "iter_tflops": 14.112449269852489, "iter_time": 1.4619073638916016, "loss": 0.3589513301849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.47310129849304, "step_time": 1.252411014556885} +{"epoch": 0, "iter": 16631, "iter_tflops": 34.4001046770596, "iter_time": 0.5997392654418946, "loss": 0.3827795684337616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.472324355284535, "step_time": 0.5505688228607177} +{"epoch": 0, "iter": 16632, "iter_tflops": 37.18136997307844, "iter_time": 0.5548771743774414, "loss": 0.36691105365753174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.373039001513526, "step_time": 0.5110116558074952} +{"epoch": 0, "iter": 16633, "iter_tflops": 18.93735493227797, "iter_time": 1.0894390258789062, "loss": 0.553459882736206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.0049955443159, "step_time": 1.0312970809936524} +{"epoch": 0, "iter": 16634, "iter_tflops": 9.392547389052131, "iter_time": 2.1965386657714845, "loss": 0.5999644994735718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.661694195117162, "step_time": 1.9350670852661134} +{"epoch": 0, "iter": 16635, "iter_tflops": 11.922362851434322, "iter_time": 1.7304534149169921, "loss": 0.3885553479194641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.922888158375667, "step_time": 1.481811336517334} +{"epoch": 0, "iter": 16636, "iter_tflops": 38.8473887368347, "iter_time": 0.5310805740356446, "loss": 0.6335987448692322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56958182034151, "step_time": 0.48464402580261234} +{"epoch": 0, "iter": 16637, "iter_tflops": 14.443787930218026, "iter_time": 1.1256242370605467, "loss": 0.369683176279068, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 15.511691914197405, "step_time": 1.0481305236816407} +{"epoch": 0, "iter": 16638, "iter_tflops": 11.78692393115793, "iter_time": 1.379348663330078, "loss": 0.34761565923690796, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 15.105675123775276, "step_time": 1.0763026237487794} +{"epoch": 0, "iter": 16639, "iter_tflops": 24.691222086765343, "iter_time": 0.6584638748168946, "loss": 0.34383952617645264, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.629369438708554, "step_time": 0.6105393447875976} +{"epoch": 0, "iter": 16640, "iter_tflops": 25.511810603043788, "iter_time": 0.6372843551635743, "loss": 0.3536706566810608, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.458162349924585, "step_time": 0.5921109199523926} +{"epoch": 0, "iter": 16641, "iter_tflops": 18.432635272515874, "iter_time": 1.1192698822021485, "loss": 0.5511844754219055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.79522549436769, "step_time": 1.0422257385253908} +{"epoch": 0, "iter": 16642, "iter_tflops": 24.781099587246576, "iter_time": 0.832533416748047, "loss": 0.4680926203727722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.015172557970335, "step_time": 0.6651935749053955} +{"epoch": 0, "iter": 16643, "iter_tflops": 42.53577073265076, "iter_time": 0.4850292625427246, "loss": 0.4910445213317871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44916481584327, "step_time": 0.44416500473022463} +{"epoch": 0, "iter": 16644, "iter_tflops": 35.87181276205724, "iter_time": 0.5751338424682617, "loss": 0.5066912770271301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.212676095934256, "step_time": 0.526133270263672} +{"epoch": 0, "iter": 16645, "iter_tflops": 18.191886476061207, "iter_time": 1.1340821380615236, "loss": 0.3323550522327423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.44997118718869, "step_time": 1.0607261734008788} +{"epoch": 0, "iter": 16646, "iter_tflops": 14.281426201076657, "iter_time": 1.4446101684570312, "loss": 0.24478943645954132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.48135919519502, "step_time": 1.0590171508789061} +{"epoch": 0, "iter": 16647, "iter_tflops": 47.66146778063397, "iter_time": 0.4328673553466797, "loss": 0.23341423273086548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.66005501619324, "step_time": 0.3993625926971435} +{"epoch": 0, "iter": 16648, "iter_tflops": 51.2150063996364, "iter_time": 0.40283297729492185, "loss": 0.3987666666507721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.41126430473611, "step_time": 0.3723267059326172} +{"epoch": 0, "iter": 16649, "iter_tflops": 30.041510159879056, "iter_time": 0.6867528762817384, "loss": 0.31035107374191284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.86527924962986, "step_time": 0.6474474411010741} +{"epoch": 0, "iter": 16650, "iter_tflops": 23.84000466405842, "iter_time": 0.865398048400879, "loss": 0.45966246724128723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.11914812383522, "step_time": 0.760757432937622} +{"epoch": 0, "iter": 16651, "iter_tflops": 49.31836343590605, "iter_time": 0.41832477951049807, "loss": 0.3373439311981201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.52547710576078, "step_time": 0.3854443645477295} +{"epoch": 0, "iter": 16652, "iter_tflops": 50.74549442599078, "iter_time": 0.40656010437011714, "loss": 0.2846675217151642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.693505988177286, "step_time": 0.377212854385376} +{"epoch": 0, "iter": 16653, "iter_tflops": 13.290901229005705, "iter_time": 0.8252091827392578, "loss": 0.01487098541110754, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 13.935046561331989, "step_time": 0.7870640182495117} +{"epoch": 0, "iter": 16654, "iter_tflops": 10.708752601164681, "iter_time": 1.0241877975463867, "loss": 0.0037776269018650055, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 13.507642737398104, "step_time": 0.8119680061340333} +{"epoch": 0, "iter": 16655, "iter_tflops": 30.12021019344947, "iter_time": 0.36413337326049805, "loss": 0.007028828840702772, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 33.07640525057711, "step_time": 0.33158904838562014} +{"epoch": 0, "iter": 16656, "iter_tflops": 26.158029668983506, "iter_time": 0.4192889862060547, "loss": 0.0012642575893551111, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 29.038191265016803, "step_time": 0.37770168399810794} +{"epoch": 0, "iter": 16657, "iter_tflops": 25.6462308603623, "iter_time": 0.8044493408203125, "loss": 0.6537046432495117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.08389111101463, "step_time": 0.7617477645874022} +{"epoch": 0, "iter": 16658, "iter_tflops": 21.157648614343717, "iter_time": 0.975112777709961, "loss": 0.5425620079040527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.32184587335484, "step_time": 0.8147547225952149} +{"epoch": 0, "iter": 16659, "iter_tflops": 45.69074945559086, "iter_time": 0.4515376472473144, "loss": 0.5827504992485046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52115498132081, "step_time": 0.4166117191314697} +{"epoch": 0, "iter": 16660, "iter_tflops": 48.13602514627906, "iter_time": 0.42859985733032224, "loss": 0.7044923901557922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.194191852343174, "step_time": 0.3952756576538086} +{"epoch": 0, "iter": 16661, "iter_tflops": 30.63961494611053, "iter_time": 0.673347023010254, "loss": 0.29805174469947815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.41812438638568, "step_time": 0.6364061431884765} +{"epoch": 0, "iter": 16662, "iter_tflops": 14.275922805120919, "iter_time": 1.4451670684814453, "loss": 0.2560058534145355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.873367921084217, "step_time": 1.1542924423217773} +{"epoch": 0, "iter": 16663, "iter_tflops": 37.65560241628944, "iter_time": 0.5478890838623047, "loss": 0.1961420476436615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57488488916793, "step_time": 0.49623934173583983} +{"epoch": 0, "iter": 16664, "iter_tflops": 43.54672421456272, "iter_time": 0.47376912689208983, "loss": 0.1610325574874878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42861554685072, "step_time": 0.4349925308227539} +{"epoch": 0, "iter": 16665, "iter_tflops": 17.266126788108323, "iter_time": 1.1948883361816407, "loss": 0.838256299495697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.355069636510958, "step_time": 1.123999740600586} +{"epoch": 0, "iter": 16666, "iter_tflops": 17.52177651835618, "iter_time": 1.1774544372558595, "loss": 0.5371467471122742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.945627733729832, "step_time": 0.984983299255371} +{"epoch": 0, "iter": 16667, "iter_tflops": 35.31249085244817, "iter_time": 0.5842435073852538, "loss": 0.4457738399505615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.268232418597165, "step_time": 0.5391180152893067} +{"epoch": 0, "iter": 16668, "iter_tflops": 36.76280975589643, "iter_time": 0.5611946868896485, "loss": 0.5296408534049988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99210924732191, "step_time": 0.5158791046142578} +{"epoch": 0, "iter": 16669, "iter_tflops": 21.064040604612753, "iter_time": 0.9794461517333984, "loss": 0.1956821233034134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.64308649763212, "step_time": 0.9111431655883789} +{"epoch": 0, "iter": 16670, "iter_tflops": 27.743871101511765, "iter_time": 0.743627067565918, "loss": 0.13529960811138153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.387222093742977, "step_time": 0.6573086795806885} +{"epoch": 0, "iter": 16671, "iter_tflops": 51.10208557335009, "iter_time": 0.40372312164306645, "loss": 0.10275085270404816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.41387820944322, "step_time": 0.3723091430664063} +{"epoch": 0, "iter": 16672, "iter_tflops": 52.07439161676135, "iter_time": 0.39618501281738283, "loss": 0.1382969170808792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.40959693226557, "step_time": 0.3657372970581055} +{"epoch": 0, "iter": 16673, "iter_tflops": 27.11649675021837, "iter_time": 0.7608318176269531, "loss": 0.09639941900968552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.577052793456932, "step_time": 0.721946159362793} +{"epoch": 0, "iter": 16674, "iter_tflops": 11.420979927283526, "iter_time": 1.8064206085205077, "loss": 0.10503590852022171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.808227649842385, "step_time": 1.4941159744262693} +{"epoch": 0, "iter": 16675, "iter_tflops": 16.199054379152535, "iter_time": 1.2735986328125, "loss": 0.11855527758598328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.307491055871886, "step_time": 1.0159351272583008} +{"epoch": 0, "iter": 16676, "iter_tflops": 24.364459102561103, "iter_time": 0.8467700195312501, "loss": 0.1653621941804886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.32579896563632, "step_time": 0.6803149204254151} +{"epoch": 0, "iter": 16677, "iter_tflops": 14.320523132511344, "iter_time": 1.072401885986328, "loss": 0.3529646396636963, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.910151462754605, "step_time": 1.029993293762207} +{"epoch": 0, "iter": 16678, "iter_tflops": 10.12043777065498, "iter_time": 1.5174596557617188, "loss": 0.40881964564323425, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.970818552574322, "step_time": 1.2828994064331054} +{"epoch": 0, "iter": 16679, "iter_tflops": 23.455315777405858, "iter_time": 0.6547494888305664, "loss": 0.35916316509246826, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.24624706093876, "step_time": 0.6083025321960449} +{"epoch": 0, "iter": 16680, "iter_tflops": 23.604990804811397, "iter_time": 0.6505978393554688, "loss": 0.42730265855789185, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.247467929713014, "step_time": 0.6082731170654296} +{"epoch": 0, "iter": 16681, "iter_tflops": 26.75123439958666, "iter_time": 0.5374382629394532, "loss": 0.0470154695212841, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 29.702937054108677, "step_time": 0.484030818939209} +{"epoch": 0, "iter": 16682, "iter_tflops": 28.061253714422563, "iter_time": 0.512348346710205, "loss": 0.03462113067507744, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 31.447401509196148, "step_time": 0.4571804428100586} +{"epoch": 0, "iter": 16683, "iter_tflops": 31.073543413600138, "iter_time": 0.4626809616088867, "loss": 0.027494380250573158, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 34.254360837472014, "step_time": 0.41971698188781736} +{"epoch": 0, "iter": 16684, "iter_tflops": 31.88002453923671, "iter_time": 0.45097634506225587, "loss": 0.03305458277463913, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 35.18956605522421, "step_time": 0.4085624961853027} +{"epoch": 0, "iter": 16685, "iter_tflops": 29.010654605797097, "iter_time": 0.7111557388305664, "loss": 0.013024551793932915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.142580286513414, "step_time": 0.6624721946716309} +{"epoch": 0, "iter": 16686, "iter_tflops": 8.660894358847388, "iter_time": 2.382097351074219, "loss": 0.01566457562148571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.721532360584748, "step_time": 1.9242672424316405} +{"epoch": 0, "iter": 16687, "iter_tflops": 11.899514464765048, "iter_time": 1.7337760772705078, "loss": 0.032050129026174545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.190648815678124, "step_time": 1.5640696525573732} +{"epoch": 0, "iter": 16688, "iter_tflops": 26.52407070256478, "iter_time": 0.7778253097534179, "loss": 0.036452632397413254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.64294378857952, "step_time": 0.563030460357666} +{"epoch": 0, "iter": 16689, "iter_tflops": 22.20075704581856, "iter_time": 0.7397192382812501, "loss": 0.47794631123542786, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 24.130901673440345, "step_time": 0.6805517387390136} +{"epoch": 0, "iter": 16690, "iter_tflops": 23.339610843393388, "iter_time": 0.7036247177124024, "loss": 0.327642023563385, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 25.072307960399684, "step_time": 0.6549986190795898} +{"epoch": 0, "iter": 16691, "iter_tflops": 24.49478893184948, "iter_time": 0.6704416656494141, "loss": 0.30138692259788513, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.39601856058473, "step_time": 0.6221516723632812} +{"epoch": 0, "iter": 16692, "iter_tflops": 24.658477305585535, "iter_time": 0.6659911270141601, "loss": 0.4524490535259247, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 26.450294644816637, "step_time": 0.6208750152587892} +{"epoch": 0, "iter": 16693, "iter_tflops": 29.04989564525072, "iter_time": 0.7101950988769532, "loss": 0.3470695912837982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.77909299025956, "step_time": 0.6492033462524414} +{"epoch": 0, "iter": 16694, "iter_tflops": 43.52592707861039, "iter_time": 0.4739954986572265, "loss": 0.2961977422237396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19879525445352, "step_time": 0.437110595703125} +{"epoch": 0, "iter": 16695, "iter_tflops": 47.95124695116228, "iter_time": 0.4302514495849609, "loss": 0.3103846311569214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10511183940701, "step_time": 0.3959514293670654} +{"epoch": 0, "iter": 16696, "iter_tflops": 44.13701523406585, "iter_time": 0.4674329109191894, "loss": 0.40925562381744385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50062565643061, "step_time": 0.4343330898284912} +{"epoch": 0, "iter": 16697, "iter_tflops": 32.17430865836694, "iter_time": 0.6412288055419921, "loss": 0.4807802140712738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.33022906398144, "step_time": 0.6009599723815918} +{"epoch": 0, "iter": 16698, "iter_tflops": 8.295037260512172, "iter_time": 2.487161041259766, "loss": 0.3881243169307709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.946271835113185, "step_time": 2.0742539367675783} +{"epoch": 0, "iter": 16699, "iter_tflops": 10.18607238109259, "iter_time": 2.025421844482422, "loss": 0.5413137078285217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.871034575377754, "step_time": 1.737935592651367} +{"epoch": 0, "iter": 16700, "iter_tflops": 38.55098280079258, "iter_time": 0.5351638793945312, "loss": 0.47567787766456604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30230435259373, "step_time": 0.4877061386108398} +{"epoch": 0, "iter": 16701, "iter_tflops": 17.095325065790696, "iter_time": 1.0255221633911134, "loss": 0.37502312660217285, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 18.339042385529073, "step_time": 0.9559732933044434} +{"epoch": 0, "iter": 16702, "iter_tflops": 26.209307145881382, "iter_time": 0.6689087448120117, "loss": 0.33088669180870056, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 28.79202608004383, "step_time": 0.6089059066772461} +{"epoch": 0, "iter": 16703, "iter_tflops": 30.7533953082986, "iter_time": 0.570071517944336, "loss": 0.3836638927459717, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 32.66215805834481, "step_time": 0.5367567787170411} +{"epoch": 0, "iter": 16704, "iter_tflops": 33.48648846546391, "iter_time": 0.5235435409545899, "loss": 0.22964122891426086, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 35.465290168574626, "step_time": 0.4943321952819824} +{"epoch": 0, "iter": 16705, "iter_tflops": 27.608425970416445, "iter_time": 0.7472752532958984, "loss": 0.40523219108581543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.290723888445516, "step_time": 0.7043558769226075} +{"epoch": 0, "iter": 16706, "iter_tflops": 16.01936485714465, "iter_time": 1.2878846130371093, "loss": 0.41480550169944763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.107717023372256, "step_time": 1.0797257194519043} +{"epoch": 0, "iter": 16707, "iter_tflops": 48.91911452267009, "iter_time": 0.42173889923095703, "loss": 0.4891032874584198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.86473984619327, "step_time": 0.3902618942260742} +{"epoch": 0, "iter": 16708, "iter_tflops": 42.49910346631116, "iter_time": 0.48544773483276366, "loss": 0.46315982937812805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.854336183001706, "step_time": 0.4499267730712891} +{"epoch": 0, "iter": 16709, "iter_tflops": 32.89227816931004, "iter_time": 0.6272321243286133, "loss": 0.29783424735069275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.95915049752614, "step_time": 0.5901485939025879} +{"epoch": 0, "iter": 16710, "iter_tflops": 12.63003331499655, "iter_time": 1.6334947814941407, "loss": 0.23994065821170807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.372605436379253, "step_time": 1.2600983753204344} +{"epoch": 0, "iter": 16711, "iter_tflops": 40.99115881476132, "iter_time": 0.5033059349060058, "loss": 0.34987205266952515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99079316886501, "step_time": 0.4585625648498535} +{"epoch": 0, "iter": 16712, "iter_tflops": 37.84989980949062, "iter_time": 0.5450765686035156, "loss": 0.210958793759346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.32225107114613, "step_time": 0.49927322387695316} +{"epoch": 0, "iter": 16713, "iter_tflops": 18.544501748022547, "iter_time": 1.112518081665039, "loss": 0.1397053450345993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.11711213098319, "step_time": 1.025549461364746} +{"epoch": 0, "iter": 16714, "iter_tflops": 13.920052996470227, "iter_time": 1.4821131439208983, "loss": 0.15015295147895813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.569725450961034, "step_time": 1.2451077461242677} +{"epoch": 0, "iter": 16715, "iter_tflops": 46.35894707251421, "iter_time": 0.4450293807983398, "loss": 0.14760638773441315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.275748829375544, "step_time": 0.410358751296997} +{"epoch": 0, "iter": 16716, "iter_tflops": 52.68632494676457, "iter_time": 0.39158346176147457, "loss": 0.1556306630373001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.08233532445576, "step_time": 0.36142693519592284} +{"epoch": 0, "iter": 16717, "iter_tflops": 30.449140792467333, "iter_time": 0.6775591354370116, "loss": 0.1850666105747223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.44969353442933, "step_time": 0.6357870063781739} +{"epoch": 0, "iter": 16718, "iter_tflops": 14.793081273592023, "iter_time": 1.394644775390625, "loss": 0.3047543168067932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.37823489968349, "step_time": 1.187180034637451} +{"epoch": 0, "iter": 16719, "iter_tflops": 42.04746101629631, "iter_time": 0.49066205215454095, "loss": 0.2149934321641922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.42062974054344, "step_time": 0.44443803596496584} +{"epoch": 0, "iter": 16720, "iter_tflops": 50.086894709959914, "iter_time": 0.4119060211181641, "loss": 0.12952543795108795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.616069037680504, "step_time": 0.3777476825714111} +{"epoch": 0, "iter": 16721, "iter_tflops": 29.588055352650333, "iter_time": 0.6972777786254881, "loss": 0.13077165186405182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.415865379410462, "step_time": 0.6567093811035156} +{"epoch": 0, "iter": 16722, "iter_tflops": 12.974535027972996, "iter_time": 1.590121994018555, "loss": 0.14253957569599152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.314769505753496, "step_time": 1.191531513214111} +{"epoch": 0, "iter": 16723, "iter_tflops": 50.22513970947642, "iter_time": 0.4107722473144531, "loss": 0.19563868641853333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.86288898995944, "step_time": 0.3760482521057129} +{"epoch": 0, "iter": 16724, "iter_tflops": 51.81051337367276, "iter_time": 0.3982028388977051, "loss": 0.17802439630031586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.001812963929076, "step_time": 0.3684004573822021} +{"epoch": 0, "iter": 16725, "iter_tflops": 24.672438436655124, "iter_time": 0.8362000198364258, "loss": 0.49272421002388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.964512939041644, "step_time": 0.7945881195068359} +{"epoch": 0, "iter": 16726, "iter_tflops": 14.162079449869834, "iter_time": 1.456784194946289, "loss": 0.4323905408382416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.71028729467836, "step_time": 1.2346342792510987} +{"epoch": 0, "iter": 16727, "iter_tflops": 40.25754696222222, "iter_time": 0.5124776611328126, "loss": 0.42800775170326233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.919070828043985, "step_time": 0.46975250434875493} +{"epoch": 0, "iter": 16728, "iter_tflops": 41.25045472275986, "iter_time": 0.5001422080993653, "loss": 0.44368234276771545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97601211210095, "step_time": 0.45871326828002923} +{"epoch": 0, "iter": 16729, "iter_tflops": 20.619669251865574, "iter_time": 1.0005540466308596, "loss": 0.08550845831632614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.985272005039757, "step_time": 0.9384051971435547} +{"epoch": 0, "iter": 16730, "iter_tflops": 8.585163419033197, "iter_time": 2.4031101684570313, "loss": 0.09994926303625107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.01602658578082, "step_time": 2.0598081817626954} +{"epoch": 0, "iter": 16731, "iter_tflops": 13.453163850007437, "iter_time": 1.533549560546875, "loss": 0.10409492254257202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.21756832116565, "step_time": 1.2721446952819824} +{"epoch": 0, "iter": 16732, "iter_tflops": 37.28537128344429, "iter_time": 0.5533294372558594, "loss": 0.0839753970503807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.663175801970105, "step_time": 0.5073655242919921} +{"epoch": 0, "iter": 16733, "iter_tflops": 18.39873753974599, "iter_time": 0.8013811035156251, "loss": 0.43832114338874817, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 20.104667008847255, "step_time": 0.7333819847106934} +{"epoch": 0, "iter": 16734, "iter_tflops": 20.274295196269485, "iter_time": 0.7272460250854492, "loss": 0.4050077497959137, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 21.7842403144368, "step_time": 0.6768379516601561} +{"epoch": 0, "iter": 16735, "iter_tflops": 22.609802565164955, "iter_time": 0.6521242523193359, "loss": 0.4133785665035248, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.3498194587323, "step_time": 0.6055240211486818} +{"epoch": 0, "iter": 16736, "iter_tflops": 22.330659491872755, "iter_time": 0.660276092529297, "loss": 0.24135622382164001, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.049748956114307, "step_time": 0.6130791893005372} +{"epoch": 0, "iter": 16737, "iter_tflops": 17.712582325672276, "iter_time": 1.1647705078125, "loss": 0.42371389269828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.03947934981315, "step_time": 1.0835954666137695} +{"epoch": 0, "iter": 16738, "iter_tflops": 22.750825550837774, "iter_time": 0.9068283462524415, "loss": 0.36544278264045715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.870380658216835, "step_time": 0.7402515869140626} +{"epoch": 0, "iter": 16739, "iter_tflops": 37.3415726033005, "iter_time": 0.5524966430664062, "loss": 0.4325173497200012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.97940685246397, "step_time": 0.5034502716064453} +{"epoch": 0, "iter": 16740, "iter_tflops": 43.80701489717374, "iter_time": 0.47095410537719723, "loss": 0.44057509303092957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.96184540837566, "step_time": 0.4301563739776611} +{"epoch": 0, "iter": 16741, "iter_tflops": 19.078721767929267, "iter_time": 1.0813666534423827, "loss": 0.07029331475496292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.380192400498856, "step_time": 1.0123110275268554} +{"epoch": 0, "iter": 16742, "iter_tflops": 21.188325354954692, "iter_time": 0.9737009963989258, "loss": 0.04033338278532028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.30636695167111, "step_time": 0.7039799079895019} +{"epoch": 0, "iter": 16743, "iter_tflops": 56.28467803383088, "iter_time": 0.3665490188598633, "loss": 0.08674389868974686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.598559278795086, "step_time": 0.3349281826019287} +{"epoch": 0, "iter": 16744, "iter_tflops": 57.82392737641691, "iter_time": 0.356791633605957, "loss": 0.04596903920173645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.895233773763195, "step_time": 0.3280231628417969} +{"epoch": 0, "iter": 16745, "iter_tflops": 25.80882968427628, "iter_time": 0.7993812103271485, "loss": 0.1901136040687561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.11768534776943, "step_time": 0.7607984695434569} +{"epoch": 0, "iter": 16746, "iter_tflops": 15.086136553466153, "iter_time": 1.3675531463623045, "loss": 0.09346380829811096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.61037450463845, "step_time": 1.052049949645996} +{"epoch": 0, "iter": 16747, "iter_tflops": 38.07307868333443, "iter_time": 0.5418814086914062, "loss": 0.12337541580200195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80769333855294, "step_time": 0.49347600555419924} +{"epoch": 0, "iter": 16748, "iter_tflops": 39.13090170411871, "iter_time": 0.5272327651977539, "loss": 0.1563633233308792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04616692039127, "step_time": 0.4792782955169677} +{"epoch": 0, "iter": 16749, "iter_tflops": 29.041015186835757, "iter_time": 0.7104122695922851, "loss": 0.09475843608379364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.389376145731713, "step_time": 0.6572635726928711} +{"epoch": 0, "iter": 16750, "iter_tflops": 48.577884395454475, "iter_time": 0.424701358795166, "loss": 0.05806120112538338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82498179257076, "step_time": 0.38329959106445316} +{"epoch": 0, "iter": 16751, "iter_tflops": 49.16236426191661, "iter_time": 0.41965218353271483, "loss": 0.09868801385164261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23492737375988, "step_time": 0.38754807281494136} +{"epoch": 0, "iter": 16752, "iter_tflops": 50.6584270230362, "iter_time": 0.4072588653564453, "loss": 0.06974954158067703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.0266779749268, "step_time": 0.3749289302825927} +{"epoch": 0, "iter": 16753, "iter_tflops": 39.58940768734911, "iter_time": 0.5211266021728516, "loss": 0.5977591872215271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.727812064113834, "step_time": 0.4828492851257324} +{"epoch": 0, "iter": 16754, "iter_tflops": 37.444789154542754, "iter_time": 0.5509736862182617, "loss": 0.6651209592819214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03822947430937, "step_time": 0.502728645324707} +{"epoch": 0, "iter": 16755, "iter_tflops": 39.308697808941915, "iter_time": 0.5248480529785157, "loss": 0.5733534693717957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.953516980133486, "step_time": 0.4803120899200439} +{"epoch": 0, "iter": 16756, "iter_tflops": 36.639971035547894, "iter_time": 0.5630761413574219, "loss": 0.6920698285102844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.238637852665455, "step_time": 0.5127184867858886} +{"epoch": 0, "iter": 16757, "iter_tflops": 16.314162999412563, "iter_time": 1.2646124420166016, "loss": 0.3885495066642761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.19096633760419, "step_time": 1.2001124954223632} +{"epoch": 0, "iter": 16758, "iter_tflops": 15.81030709650059, "iter_time": 1.3049141540527343, "loss": 0.3195244073867798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.254886820298815, "step_time": 0.9270365505218506} +{"epoch": 0, "iter": 16759, "iter_tflops": 40.01350329967876, "iter_time": 0.5156032791137696, "loss": 0.42287954688072205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.57201088227025, "step_time": 0.4734941787719727} +{"epoch": 0, "iter": 16760, "iter_tflops": 35.04124028450308, "iter_time": 0.5887660751342774, "loss": 0.3024624288082123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.974132743424384, "step_time": 0.543293342590332} +{"epoch": 0, "iter": 16761, "iter_tflops": 20.71541329628716, "iter_time": 0.9959296112060547, "loss": 0.08674749732017517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.196682757290674, "step_time": 0.9294674224853515} +{"epoch": 0, "iter": 16762, "iter_tflops": 15.615714283116636, "iter_time": 1.3211751403808591, "loss": 0.08663677424192429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.917463793154237, "step_time": 0.9413084335327149} +{"epoch": 0, "iter": 16763, "iter_tflops": 49.107997823459236, "iter_time": 0.42011677169799805, "loss": 0.05164511501789093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36311563543382, "step_time": 0.3866171092987061} +{"epoch": 0, "iter": 16764, "iter_tflops": 53.340935717541036, "iter_time": 0.3867778701782227, "loss": 0.09952489286661148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.775460454622184, "step_time": 0.3570909404754639} +{"epoch": 0, "iter": 16765, "iter_tflops": 28.941192097176458, "iter_time": 0.7128626022338868, "loss": 0.22756947576999664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.59658323952237, "step_time": 0.6742940330505371} +{"epoch": 0, "iter": 16766, "iter_tflops": 17.123115208421964, "iter_time": 1.2048679962158202, "loss": 0.3039493262767792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.70594474528334, "step_time": 0.996385036468506} +{"epoch": 0, "iter": 16767, "iter_tflops": 45.82110483436724, "iter_time": 0.45025307846069335, "loss": 0.3005247116088867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.474490870751175, "step_time": 0.4170046653747559} +{"epoch": 0, "iter": 16768, "iter_tflops": 45.720870456731376, "iter_time": 0.4512401733398438, "loss": 0.2477281242609024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.06297620604971, "step_time": 0.4205022830963135} +{"epoch": 0, "iter": 16769, "iter_tflops": 25.465535583856045, "iter_time": 0.810157455444336, "loss": 0.20058465003967285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.815293428828873, "step_time": 0.7693778762817383} +{"epoch": 0, "iter": 16770, "iter_tflops": 15.049617237683732, "iter_time": 1.3708716430664063, "loss": 0.18100643157958984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.686019376994697, "step_time": 1.1665198974609376} +{"epoch": 0, "iter": 16771, "iter_tflops": 43.46415243702929, "iter_time": 0.4746691780090332, "loss": 0.19638119637966156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.3538404884769, "step_time": 0.43567941474914557} +{"epoch": 0, "iter": 16772, "iter_tflops": 50.96056178086979, "iter_time": 0.40484431076049804, "loss": 0.1741247922182083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43312644440408, "step_time": 0.3721798648834229} +{"epoch": 0, "iter": 16773, "iter_tflops": 37.965142414863614, "iter_time": 0.5434219970703125, "loss": 0.5292290449142456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.749793621137265, "step_time": 0.5062870674133301} +{"epoch": 0, "iter": 16774, "iter_tflops": 9.793317813676213, "iter_time": 2.1066500549316407, "loss": 0.6922910213470459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.375219168603042, "step_time": 1.8136875610351562} +{"epoch": 0, "iter": 16775, "iter_tflops": 15.715058270571198, "iter_time": 1.312823226928711, "loss": 0.606423556804657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.505614757941995, "step_time": 1.1148558845520022} +{"epoch": 0, "iter": 16776, "iter_tflops": 23.84559136639897, "iter_time": 0.8651952972412109, "loss": 0.6631036996841431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.925973709502628, "step_time": 0.7132376499176025} +{"epoch": 0, "iter": 16777, "iter_tflops": 26.88012427652426, "iter_time": 0.5850285034179687, "loss": 0.3822767734527588, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.91860218673353, "step_time": 0.5437897300720216} +{"epoch": 0, "iter": 16778, "iter_tflops": 25.36230921703199, "iter_time": 0.6200397109985352, "loss": 0.34253770112991333, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.983741673161422, "step_time": 0.5827819976806641} +{"epoch": 0, "iter": 16779, "iter_tflops": 29.21251198009874, "iter_time": 0.5383186111450196, "loss": 0.4424571990966797, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 31.123446508175935, "step_time": 0.5052666282653809} +{"epoch": 0, "iter": 16780, "iter_tflops": 27.786258549687638, "iter_time": 0.565950210571289, "loss": 0.3674483597278595, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.351936858059442, "step_time": 0.5357615394592286} +{"epoch": 0, "iter": 16781, "iter_tflops": 27.74089660663183, "iter_time": 0.743706802368164, "loss": 0.11920272558927536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.370467915473277, "step_time": 0.7024434738159179} +{"epoch": 0, "iter": 16782, "iter_tflops": 16.102480273365934, "iter_time": 1.2812369995117188, "loss": 0.14849421381950378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.7414209522189, "step_time": 1.1008286705017092} +{"epoch": 0, "iter": 16783, "iter_tflops": 39.91774251726257, "iter_time": 0.5168401870727539, "loss": 0.08609619736671448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74358998681756, "step_time": 0.4716369533538819} +{"epoch": 0, "iter": 16784, "iter_tflops": 46.95782529264687, "iter_time": 0.4393536834716797, "loss": 0.057610440999269485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84997952320778, "step_time": 0.4057247161865235} +{"epoch": 0, "iter": 16785, "iter_tflops": 28.159730661334386, "iter_time": 0.7326452713012696, "loss": 0.23683515191078186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.82726689490977, "step_time": 0.6916856842041015} +{"epoch": 0, "iter": 16786, "iter_tflops": 12.736969629779194, "iter_time": 1.6197803802490236, "loss": 0.2673912048339844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.482188842399278, "step_time": 1.4245839309692385} +{"epoch": 0, "iter": 16787, "iter_tflops": 37.5801035446422, "iter_time": 0.5489897994995117, "loss": 0.21301902830600739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12976403515942, "step_time": 0.5016098194122315} +{"epoch": 0, "iter": 16788, "iter_tflops": 39.267527141781706, "iter_time": 0.5253983383178711, "loss": 0.3377684950828552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84357779144131, "step_time": 0.481544599533081} +{"epoch": 0, "iter": 16789, "iter_tflops": 33.849466792800214, "iter_time": 0.6094953765869141, "loss": 0.17783264815807343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.01473540173784, "step_time": 0.5573751449584962} +{"epoch": 0, "iter": 16790, "iter_tflops": 46.17407272482469, "iter_time": 0.4468112144470215, "loss": 0.23129771649837494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40527148107544, "step_time": 0.4093042831420899} +{"epoch": 0, "iter": 16791, "iter_tflops": 51.57919815441224, "iter_time": 0.3999886436462402, "loss": 0.195877805352211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.182220171964744, "step_time": 0.36721748352050787} +{"epoch": 0, "iter": 16792, "iter_tflops": 51.85678515297722, "iter_time": 0.39784752273559576, "loss": 0.20865021646022797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.16174173595903, "step_time": 0.3673513832092285} +{"epoch": 0, "iter": 16793, "iter_tflops": 30.851362744878113, "iter_time": 0.6687255172729492, "loss": 0.1728724092245102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.743544836530305, "step_time": 0.6300812454223633} +{"epoch": 0, "iter": 16794, "iter_tflops": 10.348433301107338, "iter_time": 1.9936441497802735, "loss": 0.18272989988327026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.515092788536643, "step_time": 1.6484970474243161} +{"epoch": 0, "iter": 16795, "iter_tflops": 13.418808219145077, "iter_time": 1.5374758453369142, "loss": 0.33814436197280884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.522407992209402, "step_time": 1.329116817474365} +{"epoch": 0, "iter": 16796, "iter_tflops": 19.769244812095693, "iter_time": 1.043595428466797, "loss": 0.2289998084306717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.64987044385256, "step_time": 0.804335193634033} +{"epoch": 0, "iter": 16797, "iter_tflops": 22.896425661475117, "iter_time": 0.8178981781005858, "loss": 0.38830217719078064, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 24.14054087332859, "step_time": 0.775746696472168} +{"epoch": 0, "iter": 16798, "iter_tflops": 8.575656231938172, "iter_time": 2.183733154296875, "loss": 0.3666975796222687, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 10.275348099133366, "step_time": 1.8225119628906248} +{"epoch": 0, "iter": 16799, "iter_tflops": 11.218315406528028, "iter_time": 1.6693188018798828, "loss": 0.466230571269989, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 12.980282952179842, "step_time": 1.4427223892211913} +{"epoch": 0, "iter": 16800, "iter_tflops": 27.856499336064168, "iter_time": 0.6722648315429688, "loss": 0.3966004550457001, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 29.97307159655391, "step_time": 0.6247923164367675} +{"epoch": 0, "iter": 16801, "iter_tflops": 22.56961683768355, "iter_time": 0.76948917388916, "loss": 0.4001499116420746, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 24.35789383445732, "step_time": 0.7129957923889161} +{"epoch": 0, "iter": 16802, "iter_tflops": 9.256836237153614, "iter_time": 1.8761351470947265, "loss": 0.49836307764053345, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 11.28796939993074, "step_time": 1.5385473861694339} +{"epoch": 0, "iter": 16803, "iter_tflops": 29.683578369069036, "iter_time": 0.5850735244750976, "loss": 0.5414737462997437, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 31.712941352030683, "step_time": 0.5476337127685547} +{"epoch": 0, "iter": 16804, "iter_tflops": 31.68178015194665, "iter_time": 0.5481723480224608, "loss": 0.41053202748298645, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.631153358530405, "step_time": 0.5163984603881836} +{"epoch": 0, "iter": 16805, "iter_tflops": 15.967092897096334, "iter_time": 1.0156687469482422, "loss": 0.4019882082939148, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 16.699521268813093, "step_time": 0.9711222839355469} +{"epoch": 0, "iter": 16806, "iter_tflops": 9.360891870432804, "iter_time": 1.7324500122070314, "loss": 0.37702274322509766, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 11.433887297563533, "step_time": 1.4183520278930666} +{"epoch": 0, "iter": 16807, "iter_tflops": 24.50154769550124, "iter_time": 0.6618878707885741, "loss": 0.3580720126628876, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.493652628118234, "step_time": 0.6121193428039551} +{"epoch": 0, "iter": 16808, "iter_tflops": 22.816782209173088, "iter_time": 0.7107609252929687, "loss": 0.4007771909236908, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 24.42570508514417, "step_time": 0.6639430541992188} +{"epoch": 0, "iter": 16809, "iter_tflops": 17.054115160952737, "iter_time": 1.2097428283691407, "loss": 0.5329797267913818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.428820873776445, "step_time": 1.11950154876709} +{"epoch": 0, "iter": 16810, "iter_tflops": 13.409259912100099, "iter_time": 1.5385706329345703, "loss": 0.7910332679748535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.753223916734317, "step_time": 1.162104055404663} +{"epoch": 0, "iter": 16811, "iter_tflops": 43.35360032985272, "iter_time": 0.4758795890808106, "loss": 0.6927911639213562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.701888864171806, "step_time": 0.44176143646240235} +{"epoch": 0, "iter": 16812, "iter_tflops": 43.7146590359145, "iter_time": 0.471949089050293, "loss": 0.6909469962120056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.128399148706016, "step_time": 0.43776351165771493} +{"epoch": 0, "iter": 16813, "iter_tflops": 26.814359323274598, "iter_time": 0.7694046783447265, "loss": 0.7847604751586914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.315066041660785, "step_time": 0.7286260070800781} +{"epoch": 0, "iter": 16814, "iter_tflops": 12.900289886286302, "iter_time": 1.5992736358642579, "loss": 0.6646891236305237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.07081718092044, "step_time": 1.2837613220214843} +{"epoch": 0, "iter": 16815, "iter_tflops": 14.674351715047214, "iter_time": 1.4059287872314452, "loss": 0.7598575949668884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.45164879399347, "step_time": 1.2540441246032714} +{"epoch": 0, "iter": 16816, "iter_tflops": 36.092748389480256, "iter_time": 0.5716132583618163, "loss": 0.6951277852058411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.236389583277266, "step_time": 0.5258152885437012} +{"epoch": 0, "iter": 16817, "iter_tflops": 13.81664271598746, "iter_time": 1.230189895629883, "loss": 0.3718383014202118, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 14.647110382479799, "step_time": 1.1604401016235353} +{"epoch": 0, "iter": 16818, "iter_tflops": 15.553751629668229, "iter_time": 1.0927970733642578, "loss": 0.3140593469142914, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 18.767919614400164, "step_time": 0.9056461563110352} +{"epoch": 0, "iter": 16819, "iter_tflops": 30.436549860822062, "iter_time": 0.5584435272216797, "loss": 0.26953956484794617, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 32.42987532175964, "step_time": 0.5241183967590332} +{"epoch": 0, "iter": 16820, "iter_tflops": 31.80677938342683, "iter_time": 0.5343858947753907, "loss": 0.38166362047195435, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 33.715835836557616, "step_time": 0.5041279220581054} +{"epoch": 0, "iter": 16821, "iter_tflops": 28.105217603742545, "iter_time": 0.7340663146972658, "loss": 0.08450902998447418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.888479502465557, "step_time": 0.690269088745117} +{"epoch": 0, "iter": 16822, "iter_tflops": 16.5773895552194, "iter_time": 1.2445321044921875, "loss": 0.04286840930581093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.122122425663697, "step_time": 0.9767528610229491} +{"epoch": 0, "iter": 16823, "iter_tflops": 52.31652730427953, "iter_time": 0.39435135650634767, "loss": 0.0660911276936531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.24763194425796, "step_time": 0.36038335227966306} +{"epoch": 0, "iter": 16824, "iter_tflops": 50.48702442107852, "iter_time": 0.408641502380371, "loss": 0.0684407651424408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.826872610624356, "step_time": 0.37629528236389165} +{"epoch": 0, "iter": 16825, "iter_tflops": 27.32451695697594, "iter_time": 0.7550396423339845, "loss": 0.506909966468811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.826151978370095, "step_time": 0.7157075119018554} +{"epoch": 0, "iter": 16826, "iter_tflops": 14.988544737740389, "iter_time": 1.3764574127197267, "loss": 0.47193455696105957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.70637510586854, "step_time": 1.1028910408020018} +{"epoch": 0, "iter": 16827, "iter_tflops": 43.34925563903763, "iter_time": 0.4759272842407226, "loss": 0.4935247600078583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.211841497387844, "step_time": 0.4369898071289063} +{"epoch": 0, "iter": 16828, "iter_tflops": 46.87667934969766, "iter_time": 0.44011422729492194, "loss": 0.48207682371139526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49407723148251, "step_time": 0.40858442497253417} +{"epoch": 0, "iter": 16829, "iter_tflops": 34.54208270278586, "iter_time": 0.5972741622924805, "loss": 0.5386789441108704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.02337572790385, "step_time": 0.5572450675964356} +{"epoch": 0, "iter": 16830, "iter_tflops": 16.74371515689114, "iter_time": 1.2321694030761718, "loss": 0.4548628330230713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.19539585633687, "step_time": 1.0215741081237795} +{"epoch": 0, "iter": 16831, "iter_tflops": 39.51986140218171, "iter_time": 0.5220436706542969, "loss": 0.6715359687805176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19723527339126, "step_time": 0.4776021747589111} +{"epoch": 0, "iter": 16832, "iter_tflops": 38.974864526141026, "iter_time": 0.5293435592651368, "loss": 0.7067148089408875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73223345920752, "step_time": 0.48279932594299313} +{"epoch": 0, "iter": 16833, "iter_tflops": 28.89141706842784, "iter_time": 0.7140907440185548, "loss": 0.4982818365097046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.670973085270965, "step_time": 0.6514196281433106} +{"epoch": 0, "iter": 16834, "iter_tflops": 36.95228760608412, "iter_time": 0.5583170852661132, "loss": 0.6465562582015991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.315748284739875, "step_time": 0.511737829208374} +{"epoch": 0, "iter": 16835, "iter_tflops": 36.83355960072906, "iter_time": 0.5601167449951172, "loss": 0.600996196269989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8527789821939, "step_time": 0.5176826820373536} +{"epoch": 0, "iter": 16836, "iter_tflops": 35.72944798073338, "iter_time": 0.5774254760742188, "loss": 0.4534236192703247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.826060136991096, "step_time": 0.5313723163604737} +{"epoch": 0, "iter": 16837, "iter_tflops": 17.456657956031787, "iter_time": 1.1818466949462891, "loss": 0.5509473085403442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.792443880563955, "step_time": 1.0978398361206056} +{"epoch": 0, "iter": 16838, "iter_tflops": 25.99712008961554, "iter_time": 0.7935914993286133, "loss": 0.5591443777084351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.424853802059367, "step_time": 0.701145149230957} +{"epoch": 0, "iter": 16839, "iter_tflops": 36.72579682576807, "iter_time": 0.5617602691650391, "loss": 0.49960798025131226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17025124050731, "step_time": 0.5135913486480713} +{"epoch": 0, "iter": 16840, "iter_tflops": 37.122723012709926, "iter_time": 0.5557537765502929, "loss": 0.502536416053772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49003662814279, "step_time": 0.5095350666046143} +{"epoch": 0, "iter": 16841, "iter_tflops": 18.25724247126439, "iter_time": 1.130022430419922, "loss": 0.4969118535518646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.683738220433284, "step_time": 1.0481288299560545} +{"epoch": 0, "iter": 16842, "iter_tflops": 16.61065581164383, "iter_time": 1.2420396728515626, "loss": 0.633773148059845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.360296578913065, "step_time": 1.065639331817627} +{"epoch": 0, "iter": 16843, "iter_tflops": 33.350364230360334, "iter_time": 0.6186167373657226, "loss": 0.5807434320449829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.2937388688219, "step_time": 0.5684477310180663} +{"epoch": 0, "iter": 16844, "iter_tflops": 36.86234326601038, "iter_time": 0.5596793823242187, "loss": 0.48224860429763794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17046472133239, "step_time": 0.5135886192321778} +{"epoch": 0, "iter": 16845, "iter_tflops": 17.44143303278307, "iter_time": 0.9556865310668944, "loss": 0.03054105117917061, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 18.76550942364794, "step_time": 0.8882542037963868} +{"epoch": 0, "iter": 16846, "iter_tflops": 15.177985735788928, "iter_time": 1.0982051849365235, "loss": 0.03741634264588356, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 17.167757811655473, "step_time": 0.9709213523864746} +{"epoch": 0, "iter": 16847, "iter_tflops": 46.33502430534823, "iter_time": 0.3597395896911621, "loss": 0.03968023881316185, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 50.63310365083699, "step_time": 0.3292024669647217} +{"epoch": 0, "iter": 16848, "iter_tflops": 48.155781552495064, "iter_time": 0.3461379318237305, "loss": 0.02875817008316517, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 52.50053495228459, "step_time": 0.3174928150177002} +{"epoch": 0, "iter": 16849, "iter_tflops": 29.634762233856033, "iter_time": 0.6961788101196289, "loss": 0.18106861412525177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.448364244161446, "step_time": 0.6560307350158691} +{"epoch": 0, "iter": 16850, "iter_tflops": 17.880531104728185, "iter_time": 1.153830017089844, "loss": 0.14822304248809814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.968451760902123, "step_time": 1.0331844329833986} +{"epoch": 0, "iter": 16851, "iter_tflops": 47.49420614016795, "iter_time": 0.4343917961120605, "loss": 0.28089770674705505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52112262666239, "step_time": 0.40043951797485355} +{"epoch": 0, "iter": 16852, "iter_tflops": 48.638101796533626, "iter_time": 0.4241755485534668, "loss": 0.26723745465278625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.5089769583362, "step_time": 0.39290602684021} +{"epoch": 0, "iter": 16853, "iter_tflops": 30.904738893442307, "iter_time": 0.6675705490112305, "loss": 0.5427778959274292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.795145979396366, "step_time": 0.6290898513793945} +{"epoch": 0, "iter": 16854, "iter_tflops": 19.695794516989245, "iter_time": 1.0474872436523437, "loss": 0.6306638121604919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.177430057388136, "step_time": 0.9302743129730225} +{"epoch": 0, "iter": 16855, "iter_tflops": 36.957172151775396, "iter_time": 0.558243293762207, "loss": 0.677452027797699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.57762838673095, "step_time": 0.508435173034668} +{"epoch": 0, "iter": 16856, "iter_tflops": 40.29790247437618, "iter_time": 0.5119644508361817, "loss": 0.5605264902114868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01718771225522, "step_time": 0.4687053985595704} +{"epoch": 0, "iter": 16857, "iter_tflops": 25.49799440021464, "iter_time": 0.8091261291503906, "loss": 0.6017146706581116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.60624417580045, "step_time": 0.7473343124389649} +{"epoch": 0, "iter": 16858, "iter_tflops": 27.03745610307253, "iter_time": 0.7630560150146484, "loss": 0.5328711867332458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.708418307613044, "step_time": 0.6120457305908203} +{"epoch": 0, "iter": 16859, "iter_tflops": 44.368001269296585, "iter_time": 0.46499938964843746, "loss": 0.4859679341316223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55783252881093, "step_time": 0.43381063461303715} +{"epoch": 0, "iter": 16860, "iter_tflops": 46.88932492102427, "iter_time": 0.43999553298950195, "loss": 0.4164033532142639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.12283523275057, "step_time": 0.4116106643676758} +{"epoch": 0, "iter": 16861, "iter_tflops": 31.323207849913594, "iter_time": 0.6586520004272461, "loss": 0.517063558101654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.68558475318456, "step_time": 0.6124606018066405} +{"epoch": 0, "iter": 16862, "iter_tflops": 10.030995993562984, "iter_time": 2.0567342987060546, "loss": 0.516046941280365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.641656605927801, "step_time": 1.7721784973144532} +{"epoch": 0, "iter": 16863, "iter_tflops": 22.4382864763546, "iter_time": 0.9194594039916992, "loss": 0.3377044200897217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.103057726335003, "step_time": 0.8218557968139648} +{"epoch": 0, "iter": 16864, "iter_tflops": 38.85591015744092, "iter_time": 0.5309641036987304, "loss": 0.38886937499046326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.312133998132005, "step_time": 0.48759283828735345} +{"epoch": 0, "iter": 16865, "iter_tflops": 14.065625304412865, "iter_time": 1.1675504455566406, "loss": 0.3856915533542633, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.105038161988986, "step_time": 1.0872085800170899} +{"epoch": 0, "iter": 16866, "iter_tflops": 21.688556348133055, "iter_time": 0.7571885757446289, "loss": 0.30562347173690796, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 25.64619213441345, "step_time": 0.6403417320251465} +{"epoch": 0, "iter": 16867, "iter_tflops": 30.459478466682608, "iter_time": 0.5391532592773438, "loss": 0.3196827173233032, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 32.37479408700582, "step_time": 0.5072565727233886} +{"epoch": 0, "iter": 16868, "iter_tflops": 27.613190402610588, "iter_time": 0.5947276229858398, "loss": 0.3854500949382782, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 29.26924880896507, "step_time": 0.5610778465270997} +{"epoch": 0, "iter": 16869, "iter_tflops": 34.219672782719, "iter_time": 0.6029015426635742, "loss": 0.25438764691352844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.51106171388698, "step_time": 0.5650641899108887} +{"epoch": 0, "iter": 16870, "iter_tflops": 14.60082997694994, "iter_time": 1.4130082702636722, "loss": 0.2742205858230591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.846980455897263, "step_time": 1.1559991092681887} +{"epoch": 0, "iter": 16871, "iter_tflops": 39.55998779741957, "iter_time": 0.5215141525268554, "loss": 0.2456578016281128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.372087420560355, "step_time": 0.4756767482757569} +{"epoch": 0, "iter": 16872, "iter_tflops": 36.929038578332865, "iter_time": 0.5586685791015625, "loss": 0.1762411743402481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.364418085475975, "step_time": 0.5111207962036133} +{"epoch": 0, "iter": 16873, "iter_tflops": 29.030597050172688, "iter_time": 0.7106672134399414, "loss": 0.05399363860487938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.31656570430793, "step_time": 0.6587916984558105} +{"epoch": 0, "iter": 16874, "iter_tflops": 9.46316835552704, "iter_time": 2.1801465148925785, "loss": 0.06149892881512642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.271718579117167, "step_time": 1.8303414306640626} +{"epoch": 0, "iter": 16875, "iter_tflops": 15.85595346063843, "iter_time": 1.3011575469970704, "loss": 0.08401469141244888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.464443712700472, "step_time": 1.1813198204040527} +{"epoch": 0, "iter": 16876, "iter_tflops": 32.833171919863354, "iter_time": 0.6283612670898436, "loss": 0.06417633593082428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.95431762645518, "step_time": 0.5582864151000976} +{"epoch": 0, "iter": 16877, "iter_tflops": 19.88877097907256, "iter_time": 0.8112763900756834, "loss": 0.34390953183174133, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 21.093189975428597, "step_time": 0.7649525909423829} +{"epoch": 0, "iter": 16878, "iter_tflops": 12.604018306113565, "iter_time": 1.2801703338623047, "loss": 0.4144324064254761, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 14.605952529704178, "step_time": 1.1047064743041992} +{"epoch": 0, "iter": 16879, "iter_tflops": 28.627130105942612, "iter_time": 0.5636363220214844, "loss": 0.3487841486930847, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.603829128325017, "step_time": 0.5272310943603515} +{"epoch": 0, "iter": 16880, "iter_tflops": 29.903392403159003, "iter_time": 0.5395805969238281, "loss": 0.37444251775741577, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.797469801510445, "step_time": 0.5074394416809083} +{"epoch": 0, "iter": 16881, "iter_tflops": 23.31689521339644, "iter_time": 0.8848130645751954, "loss": 0.6331372857093811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.33794537839365, "step_time": 0.8476924896240235} +{"epoch": 0, "iter": 16882, "iter_tflops": 14.134326240901393, "iter_time": 1.4596446380615236, "loss": 0.6579588055610657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.177604819573578, "step_time": 1.134973155975342} +{"epoch": 0, "iter": 16883, "iter_tflops": 34.852174599890425, "iter_time": 0.5919600067138672, "loss": 0.5599557757377625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82781787169832, "step_time": 0.5453947563171386} +{"epoch": 0, "iter": 16884, "iter_tflops": 35.59204362024912, "iter_time": 0.5796546478271484, "loss": 0.5220679640769958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.553954387568496, "step_time": 0.535122631072998} +{"epoch": 0, "iter": 16885, "iter_tflops": 21.128124118957324, "iter_time": 0.9764754028320313, "loss": 0.7207455039024353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.700325741988422, "step_time": 0.9088457031250001} +{"epoch": 0, "iter": 16886, "iter_tflops": 15.346914165033366, "iter_time": 1.3443154296875, "loss": 0.6427220106124878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.476915352088202, "step_time": 0.9178792190551757} +{"epoch": 0, "iter": 16887, "iter_tflops": 44.29707249017541, "iter_time": 0.46574394989013673, "loss": 0.6668742895126343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.766909264164056, "step_time": 0.4319118366241455} +{"epoch": 0, "iter": 16888, "iter_tflops": 45.82360971504845, "iter_time": 0.45022846603393557, "loss": 0.6607098579406738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.292976243051875, "step_time": 0.4185402278900146} +{"epoch": 0, "iter": 16889, "iter_tflops": 28.817080097644478, "iter_time": 0.7159328231811524, "loss": 0.5224646329879761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.377153063593365, "step_time": 0.6791648139953613} +{"epoch": 0, "iter": 16890, "iter_tflops": 11.9915378083398, "iter_time": 1.7204710388183595, "loss": 0.5313310623168945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.998342052501041, "step_time": 1.473824073791504} +{"epoch": 0, "iter": 16891, "iter_tflops": 14.636799967982771, "iter_time": 1.4095357971191407, "loss": 0.5344506502151489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.278663484364696, "step_time": 1.1940213737487795} +{"epoch": 0, "iter": 16892, "iter_tflops": 43.93879912854838, "iter_time": 0.4695415878295899, "loss": 0.6832340359687805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47172367730162, "step_time": 0.4345975227355957} +{"epoch": 0, "iter": 16893, "iter_tflops": 15.192568514294374, "iter_time": 1.118776870727539, "loss": 0.46054673194885254, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 15.790486619049243, "step_time": 1.0764135818481446} +{"epoch": 0, "iter": 16894, "iter_tflops": 11.505329309857357, "iter_time": 1.4773235778808598, "loss": 0.2714652419090271, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 15.812785826686296, "step_time": 1.074895622253418} +{"epoch": 0, "iter": 16895, "iter_tflops": 26.607495046282548, "iter_time": 0.6388085098266602, "loss": 0.32853713631629944, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 28.593515248295255, "step_time": 0.594438777923584} +{"epoch": 0, "iter": 16896, "iter_tflops": 25.159400359395814, "iter_time": 0.6755762863159179, "loss": 0.3260796368122101, "lr": 3e-05, "seqlen": 6784.0, "step_tflops": 26.96010073650474, "step_time": 0.6304536628723144} +{"epoch": 0, "iter": 16897, "iter_tflops": 13.507423524854607, "iter_time": 1.5273892517089842, "loss": 0.6616715788841248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.190424162422477, "step_time": 1.4538743362426758} +{"epoch": 0, "iter": 16898, "iter_tflops": 20.231776326609058, "iter_time": 1.019737129211426, "loss": 0.6796205043792725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.834401934628136, "step_time": 0.8656014766693114} +{"epoch": 0, "iter": 16899, "iter_tflops": 35.53834540600131, "iter_time": 0.580530502319336, "loss": 0.4305760860443115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57083376193934, "step_time": 0.5348884506225586} +{"epoch": 0, "iter": 16900, "iter_tflops": 35.60163701257594, "iter_time": 0.5794984512329101, "loss": 0.5605558753013611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64662638304226, "step_time": 0.5338394432067871} +{"epoch": 0, "iter": 16901, "iter_tflops": 17.31066646893034, "iter_time": 1.1918139343261718, "loss": 0.6588323712348938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.59778104740102, "step_time": 1.1093309173583983} +{"epoch": 0, "iter": 16902, "iter_tflops": 20.914427581900046, "iter_time": 0.9864526977539063, "loss": 0.6236255168914795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.266859141835017, "step_time": 0.8867158813476562} +{"epoch": 0, "iter": 16903, "iter_tflops": 42.86436700830704, "iter_time": 0.4813110504150391, "loss": 0.732438325881958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02620686441008, "step_time": 0.448246660232544} +{"epoch": 0, "iter": 16904, "iter_tflops": 45.52211732156738, "iter_time": 0.45321032333374023, "loss": 0.5964227914810181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.682649488297486, "step_time": 0.4237874011993408} +{"epoch": 0, "iter": 16905, "iter_tflops": 30.55536666357887, "iter_time": 0.6752035980224609, "loss": 0.056527115404605865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.423297380675585, "step_time": 0.6363046073913575} +{"epoch": 0, "iter": 16906, "iter_tflops": 13.16995262164324, "iter_time": 1.5665275421142577, "loss": 0.12649790942668915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.904094383290053, "step_time": 1.3842567672729493} +{"epoch": 0, "iter": 16907, "iter_tflops": 52.930583324738244, "iter_time": 0.38977642440795895, "loss": 0.08713210374116898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.95445299262792, "step_time": 0.35598806381225584} +{"epoch": 0, "iter": 16908, "iter_tflops": 53.72383171527626, "iter_time": 0.3840212593078613, "loss": 0.06864579021930695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.22257091288583, "step_time": 0.35434872055053707} +{"epoch": 0, "iter": 16909, "iter_tflops": 40.33193604791648, "iter_time": 0.5115324363708496, "loss": 0.22415508329868317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90133903340145, "step_time": 0.4699422378540039} +{"epoch": 0, "iter": 16910, "iter_tflops": 35.073753525640484, "iter_time": 0.5882202911376953, "loss": 0.1996525526046753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4226296758888, "step_time": 0.5233312358856201} +{"epoch": 0, "iter": 16911, "iter_tflops": 36.67305969008791, "iter_time": 0.562568099975586, "loss": 0.20510122179985046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1385739839287, "step_time": 0.5139966735839844} +{"epoch": 0, "iter": 16912, "iter_tflops": 36.876645362629574, "iter_time": 0.5594623184204102, "loss": 0.2711560130119324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40555606193078, "step_time": 0.5106004104614258} +{"epoch": 0, "iter": 16913, "iter_tflops": 12.357382763319466, "iter_time": 0.8222556610107422, "loss": 0.008215841837227345, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 13.251112368638426, "step_time": 0.7667981109619141} +{"epoch": 0, "iter": 16914, "iter_tflops": 6.2943478031025615, "iter_time": 1.6142940063476559, "loss": 0.006069253198802471, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 8.466298321002578, "step_time": 1.2001618118286133} +{"epoch": 0, "iter": 16915, "iter_tflops": 6.078154667627714, "iter_time": 1.6717126312255857, "loss": 0.001079230452887714, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 7.787014611292233, "step_time": 1.3048553829193115} +{"epoch": 0, "iter": 16916, "iter_tflops": 20.87539236325918, "iter_time": 0.4867418899536133, "loss": 0.007178532425314188, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 23.075047163236007, "step_time": 0.4403426723480225} +{"epoch": 0, "iter": 16917, "iter_tflops": 14.977970142447775, "iter_time": 1.047184600830078, "loss": 0.3374670147895813, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.00385019636508, "step_time": 0.9800578918457031} +{"epoch": 0, "iter": 16918, "iter_tflops": 11.457083828123439, "iter_time": 1.3689958038330077, "loss": 0.4860929250717163, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.584443754702432, "step_time": 1.1546074295043944} +{"epoch": 0, "iter": 16919, "iter_tflops": 24.53093604985624, "iter_time": 0.6393844757080078, "loss": 0.26347020268440247, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.422716549957617, "step_time": 0.5936066284179686} +{"epoch": 0, "iter": 16920, "iter_tflops": 24.084740107680307, "iter_time": 0.6512297668457031, "loss": 0.28445908427238464, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.911583009350498, "step_time": 0.6053161506652833} +{"epoch": 0, "iter": 16921, "iter_tflops": 19.377857706634096, "iter_time": 1.0646735992431642, "loss": 0.40442395210266113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.596793387513294, "step_time": 1.0016653137207032} +{"epoch": 0, "iter": 16922, "iter_tflops": 9.419124654651535, "iter_time": 2.1903408508300783, "loss": 0.3847877085208893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.04065291406871, "step_time": 1.8686479568481444} +{"epoch": 0, "iter": 16923, "iter_tflops": 17.687382560196728, "iter_time": 1.166429992675781, "loss": 0.3807847797870636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.335445717494125, "step_time": 0.966986759185791} +{"epoch": 0, "iter": 16924, "iter_tflops": 52.234460912292136, "iter_time": 0.39497092819213864, "loss": 0.4239276647567749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.71597914984968, "step_time": 0.3637615680694581} +{"epoch": 0, "iter": 16925, "iter_tflops": 16.228540759323202, "iter_time": 1.0068861541748046, "loss": 0.49426761269569397, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 16.994724741406557, "step_time": 0.9614920654296876} +{"epoch": 0, "iter": 16926, "iter_tflops": 19.740840415471155, "iter_time": 0.827740493774414, "loss": 0.20985576510429382, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 24.075982054389474, "step_time": 0.6786968421936036} +{"epoch": 0, "iter": 16927, "iter_tflops": 28.387125889967898, "iter_time": 0.5756233673095703, "loss": 0.3482416868209839, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.10404010100094, "step_time": 0.5427940216064454} +{"epoch": 0, "iter": 16928, "iter_tflops": 29.488898261359484, "iter_time": 0.5541167678833008, "loss": 0.3074533939361572, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.307988470407548, "step_time": 0.5219208831787109} +{"epoch": 0, "iter": 16929, "iter_tflops": 3.345386848045934, "iter_time": 0.4567445297241211, "loss": 0.721471905708313, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.6736702932344354, "step_time": 0.41592930793762206} +{"epoch": 0, "iter": 16930, "iter_tflops": 3.3859469748961737, "iter_time": 0.45127320480346683, "loss": 0.9904221892356873, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.718297365162785, "step_time": 0.4109373168945312} +{"epoch": 0, "iter": 16931, "iter_tflops": 3.723356994699235, "iter_time": 0.41037889862060556, "loss": 0.926485538482666, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 4.034093666303172, "step_time": 0.37876838493347165} +{"epoch": 0, "iter": 16932, "iter_tflops": 3.7518288489191547, "iter_time": 0.4072646179199219, "loss": 1.2801216840744019, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 4.059463406497993, "step_time": 0.3764012603759766} +{"epoch": 0, "iter": 16933, "iter_tflops": 30.81147288749039, "iter_time": 0.6695912780761719, "loss": 0.3355500102043152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.668031518639815, "step_time": 0.6315377006530762} +{"epoch": 0, "iter": 16934, "iter_tflops": 14.040018311188314, "iter_time": 1.469449188232422, "loss": 0.5345916748046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.71459262175427, "step_time": 1.1024067649841307} +{"epoch": 0, "iter": 16935, "iter_tflops": 48.223958476215465, "iter_time": 0.42781833267211916, "loss": 0.3744368851184845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.35663671141697, "step_time": 0.3940492515563965} +{"epoch": 0, "iter": 16936, "iter_tflops": 51.90423601931069, "iter_time": 0.3974838104248047, "loss": 0.36741694808006287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.25747453009293, "step_time": 0.36672626495361327} +{"epoch": 0, "iter": 16937, "iter_tflops": 42.06782574555879, "iter_time": 0.49042452621459964, "loss": 0.002398513024672866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97868206461303, "step_time": 0.44870998001098633} +{"epoch": 0, "iter": 16938, "iter_tflops": 12.371689064598431, "iter_time": 1.667605239868164, "loss": 0.00787135399878025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.747193148597638, "step_time": 1.5007495193481444} +{"epoch": 0, "iter": 16939, "iter_tflops": 12.670161616989562, "iter_time": 1.628321258544922, "loss": 0.0022363527677953243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.922057438699309, "step_time": 1.295755500793457} +{"epoch": 0, "iter": 16940, "iter_tflops": 25.318204551799937, "iter_time": 0.8148719024658204, "loss": 0.0024933090899139643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.43632262298067, "step_time": 0.6562820262908935} +{"epoch": 0, "iter": 16941, "iter_tflops": 15.87444160290606, "iter_time": 0.9880473327636718, "loss": 0.33097735047340393, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.889128604449745, "step_time": 0.9286861419677735} +{"epoch": 0, "iter": 16942, "iter_tflops": 5.655671548219761, "iter_time": 2.773269195556641, "loss": 0.33761876821517944, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 6.246045172688123, "step_time": 2.511140930175781} +{"epoch": 0, "iter": 16943, "iter_tflops": 9.130639608660573, "iter_time": 1.717809524536133, "loss": 0.2502082586288452, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 11.464329239775022, "step_time": 1.3681306037902832} +{"epoch": 0, "iter": 16944, "iter_tflops": 22.696964318366362, "iter_time": 0.6910483474731445, "loss": 0.34271055459976196, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.52188577494653, "step_time": 0.6396204528808593} +{"epoch": 0, "iter": 16945, "iter_tflops": 14.090908340665749, "iter_time": 1.2850971069335937, "loss": 0.3230988681316376, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 15.110294549985, "step_time": 1.1984005661010741} +{"epoch": 0, "iter": 16946, "iter_tflops": 23.613409987605067, "iter_time": 0.7668602523803711, "loss": 0.2511069178581238, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 26.334532319592565, "step_time": 0.6876213073730469} +{"epoch": 0, "iter": 16947, "iter_tflops": 33.22392627541417, "iter_time": 0.5450344848632813, "loss": 0.4096001982688904, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 35.29588818683884, "step_time": 0.5130395202636718} +{"epoch": 0, "iter": 16948, "iter_tflops": 33.276377072257056, "iter_time": 0.544175392150879, "loss": 0.2907785177230835, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 35.33894755202191, "step_time": 0.5124143981933594} +{"epoch": 0, "iter": 16949, "iter_tflops": 28.894917435241272, "iter_time": 0.6409630279541016, "loss": 0.021636275574564934, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 30.803375616945, "step_time": 0.6012514343261719} +{"epoch": 0, "iter": 16950, "iter_tflops": 13.132320224142694, "iter_time": 1.4103047637939452, "loss": 0.02809472382068634, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 17.31730617485459, "step_time": 1.0694835319519045} +{"epoch": 0, "iter": 16951, "iter_tflops": 33.12888983219327, "iter_time": 0.5590460128784179, "loss": 0.02785065583884716, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 36.49566800652009, "step_time": 0.5074732093811035} +{"epoch": 0, "iter": 16952, "iter_tflops": 42.72550978106897, "iter_time": 0.4334781227111817, "loss": 0.008512000553309917, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 47.084572381999074, "step_time": 0.3933469676971436} +{"epoch": 0, "iter": 16953, "iter_tflops": 16.754093171142383, "iter_time": 1.2314061584472658, "loss": 0.37296316027641296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.83347759683192, "step_time": 1.1568743896484375} +{"epoch": 0, "iter": 16954, "iter_tflops": 17.430443710006895, "iter_time": 1.1836241149902345, "loss": 0.32067713141441345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.890426617080106, "step_time": 0.9012979030609131} +{"epoch": 0, "iter": 16955, "iter_tflops": 41.10998141656022, "iter_time": 0.5018512001037598, "loss": 0.3163565993309021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17507158904156, "step_time": 0.45669199371337893} +{"epoch": 0, "iter": 16956, "iter_tflops": 41.39006796858387, "iter_time": 0.4984551734924316, "loss": 0.3360119163990021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60452734910891, "step_time": 0.45239134597778324} +{"epoch": 0, "iter": 16957, "iter_tflops": 22.921633462169783, "iter_time": 0.9000708236694337, "loss": 0.11473779380321503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.58031976984495, "step_time": 0.8393338127136231} +{"epoch": 0, "iter": 16958, "iter_tflops": 10.850417224463065, "iter_time": 1.90141015625, "loss": 0.06981293857097626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.850049511200977, "step_time": 1.7410132751464844} +{"epoch": 0, "iter": 16959, "iter_tflops": 11.394924818648427, "iter_time": 1.8105510864257814, "loss": 0.09429772198200226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.725741853006133, "step_time": 1.3119313354492188} +{"epoch": 0, "iter": 16960, "iter_tflops": 45.508775308064386, "iter_time": 0.45334319305419923, "loss": 0.14027608931064606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50328729835789, "step_time": 0.41676209068298337} +{"epoch": 0, "iter": 16961, "iter_tflops": 20.929924048638917, "iter_time": 0.8415674362182617, "loss": 0.5019400119781494, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 22.02364326336398, "step_time": 0.7997742385864258} +{"epoch": 0, "iter": 16962, "iter_tflops": 11.48380524165867, "iter_time": 1.5338071441650392, "loss": 0.24949505925178528, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 13.86886291097915, "step_time": 1.2700350875854491} +{"epoch": 0, "iter": 16963, "iter_tflops": 11.898491395545454, "iter_time": 1.4803509063720703, "loss": 0.37152931094169617, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 15.358130409731867, "step_time": 1.1468806457519531} +{"epoch": 0, "iter": 16964, "iter_tflops": 11.698471094036876, "iter_time": 1.5056619262695312, "loss": 0.48408424854278564, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 14.478343350259623, "step_time": 1.2165716819763184} +{"epoch": 0, "iter": 16965, "iter_tflops": 19.56218383324201, "iter_time": 0.7934180679321288, "loss": 0.39497047662734985, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 21.303755053472482, "step_time": 0.7285565414428711} +{"epoch": 0, "iter": 16966, "iter_tflops": 23.73401660866997, "iter_time": 0.6539554748535157, "loss": 0.3257620632648468, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.59809492173354, "step_time": 0.606333797454834} +{"epoch": 0, "iter": 16967, "iter_tflops": 23.51716519568919, "iter_time": 0.6599855880737305, "loss": 0.2972675859928131, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.390000981960544, "step_time": 0.6113032493591307} +{"epoch": 0, "iter": 16968, "iter_tflops": 23.47091584985354, "iter_time": 0.6612860870361329, "loss": 0.2568918466567993, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.168862942282573, "step_time": 0.6166742668151856} +{"epoch": 0, "iter": 16969, "iter_tflops": 22.607148385204002, "iter_time": 0.9125915908813476, "loss": 0.12937888503074646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.125254042454472, "step_time": 0.8551658554077148} +{"epoch": 0, "iter": 16970, "iter_tflops": 12.051875754848584, "iter_time": 1.7118574676513674, "loss": 0.07370919734239578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.829673717948626, "step_time": 1.303317672729492} +{"epoch": 0, "iter": 16971, "iter_tflops": 9.72087815963687, "iter_time": 2.1223487396240235, "loss": 0.0899411290884018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.129573108176496, "step_time": 1.8537183151245118} +{"epoch": 0, "iter": 16972, "iter_tflops": 28.006030269962345, "iter_time": 0.736666114807129, "loss": 0.07119645178318024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.02676226293569, "step_time": 0.5286396389007568} +{"epoch": 0, "iter": 16973, "iter_tflops": 14.368634272999527, "iter_time": 1.0147885208129883, "loss": 0.27645421028137207, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 15.000748019175502, "step_time": 0.9720265350341797} +{"epoch": 0, "iter": 16974, "iter_tflops": 11.633692063779833, "iter_time": 1.2533531951904298, "loss": 0.31659814715385437, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 14.105095109165985, "step_time": 1.0337487983703613} +{"epoch": 0, "iter": 16975, "iter_tflops": 22.217554482698905, "iter_time": 0.6562884826660156, "loss": 0.32755035161972046, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.92495835557565, "step_time": 0.609452476501465} +{"epoch": 0, "iter": 16976, "iter_tflops": 22.732618575487827, "iter_time": 0.6414186325073242, "loss": 0.27879616618156433, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 24.48190576701576, "step_time": 0.5955878295898438} +{"epoch": 0, "iter": 16977, "iter_tflops": 17.892444588197833, "iter_time": 1.1530617523193358, "loss": 0.27129316329956055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.20516482200375, "step_time": 1.0742471466064454} +{"epoch": 0, "iter": 16978, "iter_tflops": 17.830990281952577, "iter_time": 1.1570357666015625, "loss": 0.3325883150100708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.58157933833005, "step_time": 0.9559584674835205} +{"epoch": 0, "iter": 16979, "iter_tflops": 47.699689802841284, "iter_time": 0.43252049636840817, "loss": 0.2126864194869995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98276967583337, "step_time": 0.3968833065032959} +{"epoch": 0, "iter": 16980, "iter_tflops": 51.349865125010275, "iter_time": 0.40177502822875977, "loss": 0.2799309194087982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.719753188374646, "step_time": 0.3702653427124023} +{"epoch": 0, "iter": 16981, "iter_tflops": 19.42564025105205, "iter_time": 0.6250014190673828, "loss": 0.009719548746943474, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 20.80904010605987, "step_time": 0.5834508781433106} +{"epoch": 0, "iter": 16982, "iter_tflops": 10.888184030435552, "iter_time": 1.1150668182373047, "loss": 0.006880003493279219, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 13.236444937754008, "step_time": 0.9172442283630371} +{"epoch": 0, "iter": 16983, "iter_tflops": 26.146860210117904, "iter_time": 0.46434075164794925, "loss": 0.013565142638981342, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 29.063142767415606, "step_time": 0.41774741363525386} +{"epoch": 0, "iter": 16984, "iter_tflops": 28.251268330207182, "iter_time": 0.4297524833679199, "loss": 0.015233434736728668, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 31.32187315374792, "step_time": 0.3876221790313721} +{"epoch": 0, "iter": 16985, "iter_tflops": 17.662260395267044, "iter_time": 1.1680890808105469, "loss": 0.250461220741272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.811165882086936, "step_time": 1.0967472000122072} +{"epoch": 0, "iter": 16986, "iter_tflops": 18.141256199961237, "iter_time": 1.1372472381591796, "loss": 0.23861494660377502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.040173112723593, "step_time": 0.9360676708221436} +{"epoch": 0, "iter": 16987, "iter_tflops": 47.407603830919605, "iter_time": 0.4351853256225587, "loss": 0.2820074260234833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31320949624435, "step_time": 0.40206203651428224} +{"epoch": 0, "iter": 16988, "iter_tflops": 43.520651225176394, "iter_time": 0.47405295944213865, "loss": 0.2775641679763794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.691907800489886, "step_time": 0.4418558692932129} +{"epoch": 0, "iter": 16989, "iter_tflops": 41.49123750521471, "iter_time": 0.4972397727966308, "loss": 0.44870615005493164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17350152216654, "step_time": 0.4567078666687012} +{"epoch": 0, "iter": 16990, "iter_tflops": 42.587530584058435, "iter_time": 0.48443976974487307, "loss": 0.3086770474910736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.381691287104616, "step_time": 0.4448111515045166} +{"epoch": 0, "iter": 16991, "iter_tflops": 45.89325721679984, "iter_time": 0.4495452003479004, "loss": 0.3056226968765259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.492835297250885, "step_time": 0.416850103378296} +{"epoch": 0, "iter": 16992, "iter_tflops": 50.01595623567666, "iter_time": 0.412490234375, "loss": 0.38395926356315613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.03211990177912, "step_time": 0.38183016967773437} +{"epoch": 0, "iter": 16993, "iter_tflops": 28.654735600251872, "iter_time": 0.7199889678955078, "loss": 0.4913141131401062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.329687444348263, "step_time": 0.6802276992797852} +{"epoch": 0, "iter": 16994, "iter_tflops": 14.019848309381704, "iter_time": 1.4715632476806642, "loss": 0.5728440880775452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.926173930352025, "step_time": 1.2188870086669923} +{"epoch": 0, "iter": 16995, "iter_tflops": 39.926913357519425, "iter_time": 0.5167214736938477, "loss": 0.35487818717956543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.135162769360846, "step_time": 0.4782894554138183} +{"epoch": 0, "iter": 16996, "iter_tflops": 48.84582743853397, "iter_time": 0.4223716659545898, "loss": 0.550337553024292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.801345858596626, "step_time": 0.39073044776916505} +{"epoch": 0, "iter": 16997, "iter_tflops": 22.92727694909839, "iter_time": 0.8998492736816407, "loss": 0.21796631813049316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.132765692374694, "step_time": 0.8548996734619141} +{"epoch": 0, "iter": 16998, "iter_tflops": 16.02138568654716, "iter_time": 1.28772216796875, "loss": 0.17395739257335663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.2363313525724, "step_time": 1.0195075950622556} +{"epoch": 0, "iter": 16999, "iter_tflops": 48.686516814885884, "iter_time": 0.4237537384033203, "loss": 0.2780197262763977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94621936370828, "step_time": 0.38966131591796876} +{"epoch": 0, "iter": 17000, "iter_tflops": 45.08934617551058, "iter_time": 0.45756027221679685, "loss": 0.22270943224430084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.621061152559896, "step_time": 0.4243242130279541} +{"epoch": 0, "iter": 17001, "iter_tflops": 2.1437885311863596, "iter_time": 0.7493924407958985, "loss": 0.019416693598031998, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.2722371141750575, "step_time": 0.7070296096801758} +{"epoch": 0, "iter": 17002, "iter_tflops": 1.1827185042785922, "iter_time": 1.3583442840576172, "loss": 0.508537232875824, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.470759661975419, "step_time": 1.092319133758545} +{"epoch": 0, "iter": 17003, "iter_tflops": 3.610364037153986, "iter_time": 0.4449797592163085, "loss": 1.091965913772583, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.95816436807371, "step_time": 0.405879789352417} +{"epoch": 0, "iter": 17004, "iter_tflops": 3.8361177113406075, "iter_time": 0.41879291534423824, "loss": 0.62296062707901, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.145702055399119, "step_time": 0.38751914596557613} +{"epoch": 0, "iter": 17005, "iter_tflops": 27.192724045862857, "iter_time": 0.7586990356445313, "loss": 0.7196771502494812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.642650315853498, "step_time": 0.7202927551269531} +{"epoch": 0, "iter": 17006, "iter_tflops": 15.235038101820217, "iter_time": 1.3541871948242186, "loss": 0.603207528591156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.225994092993567, "step_time": 1.0730833168029785} +{"epoch": 0, "iter": 17007, "iter_tflops": 31.076669258324063, "iter_time": 0.6638772430419921, "loss": 0.5962166786193848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.01486585758253, "step_time": 0.6065316734313966} +{"epoch": 0, "iter": 17008, "iter_tflops": 33.64453601018298, "iter_time": 0.613207847595215, "loss": 0.5341929793357849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.55269795002352, "step_time": 0.564420539855957} +{"epoch": 0, "iter": 17009, "iter_tflops": 16.4178003425193, "iter_time": 1.256629577636719, "loss": 0.011018700897693634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.54082860866837, "step_time": 1.176175537109375} +{"epoch": 0, "iter": 17010, "iter_tflops": 23.372542695617007, "iter_time": 0.8827064208984375, "loss": 0.031208960339426994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.469754369643805, "step_time": 0.7000768737792968} +{"epoch": 0, "iter": 17011, "iter_tflops": 50.78401511354769, "iter_time": 0.4062517204284668, "loss": 0.0244568083435297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.677952635957666, "step_time": 0.3705433216094971} +{"epoch": 0, "iter": 17012, "iter_tflops": 49.63069767326985, "iter_time": 0.41569219207763675, "loss": 0.05456770583987236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.01637199406871, "step_time": 0.38194148826599117} +{"epoch": 0, "iter": 17013, "iter_tflops": 16.628643674892995, "iter_time": 0.8866869049072266, "loss": 0.1497691422700882, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 17.375410545591876, "step_time": 0.8485785446166991} +{"epoch": 0, "iter": 17014, "iter_tflops": 8.988717420085782, "iter_time": 1.6403230743408206, "loss": 0.1251542717218399, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 11.817622580581714, "step_time": 1.2476621665954588} +{"epoch": 0, "iter": 17015, "iter_tflops": 29.465985908860855, "iter_time": 0.5003871459960938, "loss": 0.09488780796527863, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 32.39416252298618, "step_time": 0.45515609741210933} +{"epoch": 0, "iter": 17016, "iter_tflops": 30.29340504356689, "iter_time": 0.48671981811523435, "loss": 0.09032509475946426, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 33.165914576122574, "step_time": 0.4445648727416992} +{"epoch": 0, "iter": 17017, "iter_tflops": 26.799866561162215, "iter_time": 0.7698207550048828, "loss": 0.14438527822494507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.91401528929804, "step_time": 0.7135326347351074} +{"epoch": 0, "iter": 17018, "iter_tflops": 10.666609825709445, "iter_time": 1.934175323486328, "loss": 0.0974937453866005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.713068029876382, "step_time": 1.622825698852539} +{"epoch": 0, "iter": 17019, "iter_tflops": 12.32230184407279, "iter_time": 1.6742889251708983, "loss": 0.1400233954191208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.332290574524126, "step_time": 1.2632088203430176} +{"epoch": 0, "iter": 17020, "iter_tflops": 28.68000836551836, "iter_time": 0.7193545150756836, "loss": 0.07811444252729416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.952333334854686, "step_time": 0.57384574508667} +{"epoch": 0, "iter": 17021, "iter_tflops": 23.10361813546322, "iter_time": 0.6682573928833009, "loss": 0.33485621213912964, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.61464550679475, "step_time": 0.6272348556518554} +{"epoch": 0, "iter": 17022, "iter_tflops": 21.965794101130157, "iter_time": 0.7028730010986328, "loss": 0.372402548789978, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.025176168173438, "step_time": 0.6426243667602539} +{"epoch": 0, "iter": 17023, "iter_tflops": 23.499267244542715, "iter_time": 0.6570061721801759, "loss": 0.3412306308746338, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.3215888721687, "step_time": 0.6097233352661132} +{"epoch": 0, "iter": 17024, "iter_tflops": 23.613972345165934, "iter_time": 0.6538147583007813, "loss": 0.3054142892360687, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.389954249548, "step_time": 0.6080815849304199} +{"epoch": 0, "iter": 17025, "iter_tflops": 17.398529281059005, "iter_time": 1.1857952575683595, "loss": 0.2839379608631134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.775757790396266, "step_time": 1.0988154907226562} +{"epoch": 0, "iter": 17026, "iter_tflops": 15.215754380018172, "iter_time": 1.3559034271240233, "loss": 0.30847156047821045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.813642152725215, "step_time": 0.9457885742187501} +{"epoch": 0, "iter": 17027, "iter_tflops": 46.68135360284707, "iter_time": 0.4419557685852051, "loss": 0.2547741234302521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62788847717474, "step_time": 0.40750452232360834} +{"epoch": 0, "iter": 17028, "iter_tflops": 47.927281575797586, "iter_time": 0.43046659088134764, "loss": 0.1976086050271988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.831879910005576, "step_time": 0.39803868865966796} +{"epoch": 0, "iter": 17029, "iter_tflops": 34.25466413172025, "iter_time": 0.6022856750488281, "loss": 0.39588165283203125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.67830499589551, "step_time": 0.5624876480102539} +{"epoch": 0, "iter": 17030, "iter_tflops": 24.79073855271943, "iter_time": 0.832209716796875, "loss": 0.2511460483074188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.449710723600237, "step_time": 0.6775464534759521} +{"epoch": 0, "iter": 17031, "iter_tflops": 37.91596431691214, "iter_time": 0.5441268310546874, "loss": 0.2172883152961731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52974370585876, "step_time": 0.49677873420715335} +{"epoch": 0, "iter": 17032, "iter_tflops": 38.81319109690845, "iter_time": 0.5315485000610352, "loss": 0.19472858309745789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.443395705466116, "step_time": 0.48608489418029793} +{"epoch": 0, "iter": 17033, "iter_tflops": 27.594257647072617, "iter_time": 0.7476589431762696, "loss": 0.18702258169651031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.770842573091723, "step_time": 0.6929966278076172} +{"epoch": 0, "iter": 17034, "iter_tflops": 28.21665745204361, "iter_time": 0.7311671676635741, "loss": 0.23538027703762054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.13354111031402, "step_time": 0.5709679393768311} +{"epoch": 0, "iter": 17035, "iter_tflops": 35.05961403395473, "iter_time": 0.5884575195312501, "loss": 0.2532486319541931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.2219265867101, "step_time": 0.5397711563110352} +{"epoch": 0, "iter": 17036, "iter_tflops": 36.63369545632407, "iter_time": 0.5631725997924806, "loss": 0.2711043059825897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06180620700882, "step_time": 0.5149816112518311} +{"epoch": 0, "iter": 17037, "iter_tflops": 13.776907724512322, "iter_time": 1.1414490966796875, "loss": 0.2414388209581375, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.827198883082747, "step_time": 1.0605940475463869} +{"epoch": 0, "iter": 17038, "iter_tflops": 14.174510995858958, "iter_time": 1.1094307861328125, "loss": 0.30710065364837646, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.26963411771022, "step_time": 0.9105947914123534} +{"epoch": 0, "iter": 17039, "iter_tflops": 22.77326031513528, "iter_time": 0.6905308532714844, "loss": 0.26362812519073486, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.499116311733115, "step_time": 0.6418859634399415} +{"epoch": 0, "iter": 17040, "iter_tflops": 26.690225857126677, "iter_time": 0.5891909255981446, "loss": 0.38862311840057373, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.545303090979676, "step_time": 0.5509011001586914} +{"epoch": 0, "iter": 17041, "iter_tflops": 19.960238854837836, "iter_time": 1.0336095504760743, "loss": 0.1935720443725586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.14583698922093, "step_time": 0.9756574554443359} +{"epoch": 0, "iter": 17042, "iter_tflops": 9.891074021066983, "iter_time": 2.085829452514649, "loss": 0.1805635690689087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.895476432333647, "step_time": 1.7343646240234376} +{"epoch": 0, "iter": 17043, "iter_tflops": 12.153141269907321, "iter_time": 1.697593490600586, "loss": 0.1458466798067093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.250124392420725, "step_time": 1.4477833976745604} +{"epoch": 0, "iter": 17044, "iter_tflops": 44.10648492036307, "iter_time": 0.46775646591186526, "loss": 0.18363642692565918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99627235257766, "step_time": 0.4298478298187256} +{"epoch": 0, "iter": 17045, "iter_tflops": 19.745663047846435, "iter_time": 0.8254614181518555, "loss": 0.4316907227039337, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 20.75964530787911, "step_time": 0.7851426544189453} +{"epoch": 0, "iter": 17046, "iter_tflops": 13.44651117194887, "iter_time": 1.2121570281982423, "loss": 0.3107037842273712, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.788296662958409, "step_time": 1.1021744689941406} +{"epoch": 0, "iter": 17047, "iter_tflops": 28.01015287569777, "iter_time": 0.5819062500000001, "loss": 0.355081170797348, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.749033639593662, "step_time": 0.5478928565979004} +{"epoch": 0, "iter": 17048, "iter_tflops": 29.26616827361367, "iter_time": 0.5569325942993164, "loss": 0.4110301733016968, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 31.120205968033996, "step_time": 0.5237524147033692} +{"epoch": 0, "iter": 17049, "iter_tflops": 27.651932223994823, "iter_time": 0.7460995254516603, "loss": 0.04378592222929001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.623318324207272, "step_time": 0.6964477539062499} +{"epoch": 0, "iter": 17050, "iter_tflops": 14.48194135355874, "iter_time": 1.4246082763671872, "loss": 0.05914716050028801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.81285298530574, "step_time": 1.0966488456726076} +{"epoch": 0, "iter": 17051, "iter_tflops": 50.064132825857634, "iter_time": 0.4120932960510254, "loss": 0.0719737634062767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.59273113210913, "step_time": 0.3779091663360596} +{"epoch": 0, "iter": 17052, "iter_tflops": 57.75602432572589, "iter_time": 0.3572111091613769, "loss": 0.08394894748926163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.840914200347456, "step_time": 0.32830670547485347} +{"epoch": 0, "iter": 17053, "iter_tflops": 22.729090395718767, "iter_time": 0.7369697570800782, "loss": 0.01446609292179346, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 24.107436306583146, "step_time": 0.6948334121704102} +{"epoch": 0, "iter": 17054, "iter_tflops": 12.813891146291414, "iter_time": 1.3072260437011718, "loss": 0.0037188646383583546, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 16.498661177536118, "step_time": 1.0152734241485595} +{"epoch": 0, "iter": 17055, "iter_tflops": 34.67584664164794, "iter_time": 0.48306397247314453, "loss": 0.004004144109785557, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 38.27195444124809, "step_time": 0.4376743354797363} +{"epoch": 0, "iter": 17056, "iter_tflops": 38.56603000996795, "iter_time": 0.4343369598388672, "loss": 0.009244529530405998, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 42.876110881833135, "step_time": 0.3906756439208984} +{"epoch": 0, "iter": 17057, "iter_tflops": 17.979689811381352, "iter_time": 1.1474665985107422, "loss": 0.46184784173965454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.104944668372212, "step_time": 1.0798824005126955} +{"epoch": 0, "iter": 17058, "iter_tflops": 17.200796089083976, "iter_time": 1.1994266662597657, "loss": 0.6160488128662109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.486840937481052, "step_time": 0.9174740715026856} +{"epoch": 0, "iter": 17059, "iter_tflops": 32.64275688700138, "iter_time": 0.6320266876220704, "loss": 0.6117401123046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.580251908026675, "step_time": 0.5798467521667481} +{"epoch": 0, "iter": 17060, "iter_tflops": 33.87236572554734, "iter_time": 0.6090833358764649, "loss": 0.6706423163414001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.710301488691115, "step_time": 0.561997386932373} +{"epoch": 0, "iter": 17061, "iter_tflops": 36.312392279433936, "iter_time": 0.5681557235717773, "loss": 0.0506884790956974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29029012806801, "step_time": 0.5120611801147461} +{"epoch": 0, "iter": 17062, "iter_tflops": 37.8252936626208, "iter_time": 0.5454311523437501, "loss": 0.058480001986026764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75184392014607, "step_time": 0.4715479774475098} +{"epoch": 0, "iter": 17063, "iter_tflops": 41.007446343303094, "iter_time": 0.503106029510498, "loss": 0.056563034653663635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26328994544638, "step_time": 0.45580189895629886} +{"epoch": 0, "iter": 17064, "iter_tflops": 44.30050718419983, "iter_time": 0.4657078399658203, "loss": 0.053979597985744476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71381263521785, "step_time": 0.4235162963867188} +{"epoch": 0, "iter": 17065, "iter_tflops": 32.66573303270698, "iter_time": 0.6315821380615235, "loss": 0.15927033126354218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.25525782840573, "step_time": 0.569051076889038} +{"epoch": 0, "iter": 17066, "iter_tflops": 40.093572797146415, "iter_time": 0.5145735855102539, "loss": 0.20538485050201416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22779797188302, "step_time": 0.45615958404541024} +{"epoch": 0, "iter": 17067, "iter_tflops": 41.5778816276385, "iter_time": 0.4962035751342774, "loss": 0.12424328178167343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.76790089186563, "step_time": 0.45077648544311527} +{"epoch": 0, "iter": 17068, "iter_tflops": 40.66391293437216, "iter_time": 0.5073563270568847, "loss": 0.12734611332416534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68035982153996, "step_time": 0.46174859809875485} +{"epoch": 0, "iter": 17069, "iter_tflops": 22.151537875797715, "iter_time": 0.9313616790771484, "loss": 0.47564250230789185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.496786282373172, "step_time": 0.878038948059082} +{"epoch": 0, "iter": 17070, "iter_tflops": 24.32435611780616, "iter_time": 0.8481660690307616, "loss": 0.42837759852409363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.48612823967378, "step_time": 0.6767370834350586} +{"epoch": 0, "iter": 17071, "iter_tflops": 40.12376161043404, "iter_time": 0.5141864242553711, "loss": 0.527127206325531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30424870319026, "step_time": 0.4764219245910644} +{"epoch": 0, "iter": 17072, "iter_tflops": 45.23926105404062, "iter_time": 0.45604399871826173, "loss": 0.4412514567375183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81213133681282, "step_time": 0.42266323852539056} +{"epoch": 0, "iter": 17073, "iter_tflops": 30.195645227902947, "iter_time": 0.683247314453125, "loss": 0.06175241991877556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.999500540795275, "step_time": 0.6447317352294921} +{"epoch": 0, "iter": 17074, "iter_tflops": 13.000091921032363, "iter_time": 1.5869959716796875, "loss": 0.04530579596757889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.24908574474115, "step_time": 1.269677188873291} +{"epoch": 0, "iter": 17075, "iter_tflops": 49.006625465729904, "iter_time": 0.42098580169677735, "loss": 0.05057336390018463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.50272537873429, "step_time": 0.38560827255249025} +{"epoch": 0, "iter": 17076, "iter_tflops": 51.509955320718916, "iter_time": 0.40052633285522454, "loss": 0.037953656166791916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.08506656574564, "step_time": 0.36785359764099124} +{"epoch": 0, "iter": 17077, "iter_tflops": 37.41937051791348, "iter_time": 0.551347957611084, "loss": 0.41431960463523865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32054636027249, "step_time": 0.5116769332885742} +{"epoch": 0, "iter": 17078, "iter_tflops": 21.14331165709359, "iter_time": 0.9757739868164061, "loss": 0.4938207268714905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.304492836657488, "step_time": 0.755593360900879} +{"epoch": 0, "iter": 17079, "iter_tflops": 47.02041293100577, "iter_time": 0.438768871307373, "loss": 0.4184608459472656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.919837668489514, "step_time": 0.4051680927276611} +{"epoch": 0, "iter": 17080, "iter_tflops": 44.01493481295561, "iter_time": 0.46872938919067386, "loss": 0.3543069362640381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.525375139856855, "step_time": 0.4341069049835205} +{"epoch": 0, "iter": 17081, "iter_tflops": 35.95979803590167, "iter_time": 0.5737266235351562, "loss": 0.531141996383667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44487505525551, "step_time": 0.5366409301757813} +{"epoch": 0, "iter": 17082, "iter_tflops": 14.717382260077127, "iter_time": 1.4018181457519532, "loss": 0.5978613495826721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.456974680415914, "step_time": 1.181825252532959} +{"epoch": 0, "iter": 17083, "iter_tflops": 44.16994802399913, "iter_time": 0.4670843963623047, "loss": 0.6725665330886841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.43302870769775, "step_time": 0.4259715747833252} +{"epoch": 0, "iter": 17084, "iter_tflops": 40.998858687889665, "iter_time": 0.5032114105224609, "loss": 0.7374622821807861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.74201024845606, "step_time": 0.46111235046386717} +{"epoch": 0, "iter": 17085, "iter_tflops": 28.924802382527897, "iter_time": 0.7132665328979491, "loss": 0.34844133257865906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.587836497559756, "step_time": 0.6744868507385254} +{"epoch": 0, "iter": 17086, "iter_tflops": 15.565642746714664, "iter_time": 1.325425094604492, "loss": 0.25718849897384644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.479837953096965, "step_time": 1.1164109535217284} +{"epoch": 0, "iter": 17087, "iter_tflops": 41.37633148724878, "iter_time": 0.4986206550598144, "loss": 0.23931679129600525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.24528263412141, "step_time": 0.45598330497741696} +{"epoch": 0, "iter": 17088, "iter_tflops": 37.145931392573175, "iter_time": 0.5554065475463866, "loss": 0.32261157035827637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58550716797346, "step_time": 0.5083364715576172} +{"epoch": 0, "iter": 17089, "iter_tflops": 24.61950653788527, "iter_time": 0.8379978485107422, "loss": 0.4860115051269531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.51205186427167, "step_time": 0.7781779251098633} +{"epoch": 0, "iter": 17090, "iter_tflops": 13.468118822007975, "iter_time": 1.5318467102050781, "loss": 0.4687296152114868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.63853421295393, "step_time": 1.239958595275879} +{"epoch": 0, "iter": 17091, "iter_tflops": 35.43348475054844, "iter_time": 0.5822485046386718, "loss": 0.91100013256073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.50572916834101, "step_time": 0.5357928276062012} +{"epoch": 0, "iter": 17092, "iter_tflops": 35.22411222746057, "iter_time": 0.5857093963623047, "loss": 0.5994856357574463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.3644390317667, "step_time": 0.5377660675048828} +{"epoch": 0, "iter": 17093, "iter_tflops": 20.634470824159134, "iter_time": 0.9998363265991211, "loss": 0.35717886686325073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.456079856122734, "step_time": 0.9187308578491211} +{"epoch": 0, "iter": 17094, "iter_tflops": 13.134231725666226, "iter_time": 1.5707879943847656, "loss": 0.5219440460205078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.797305252283355, "step_time": 1.1592256927490237} +{"epoch": 0, "iter": 17095, "iter_tflops": 47.184955927607646, "iter_time": 0.43723880004882815, "loss": 0.3608418107032776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4751390108697, "step_time": 0.40079723739624024} +{"epoch": 0, "iter": 17096, "iter_tflops": 52.10250185253303, "iter_time": 0.3959712638854981, "loss": 0.4428604543209076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.26109124844118, "step_time": 0.3667026901245118} +{"epoch": 0, "iter": 17097, "iter_tflops": 47.1779372441431, "iter_time": 0.4373038482666016, "loss": 0.01439225859940052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.571658990642256, "step_time": 0.4000471172332763} +{"epoch": 0, "iter": 17098, "iter_tflops": 10.082500620953105, "iter_time": 2.0462278442382815, "loss": 0.004086938686668873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.963262336273225, "step_time": 1.8818388977050784} +{"epoch": 0, "iter": 17099, "iter_tflops": 13.630905808253864, "iter_time": 1.5135526428222659, "loss": 0.0038049265276640654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.985272897900856, "step_time": 1.2906312980651853} +{"epoch": 0, "iter": 17100, "iter_tflops": 43.83187908689801, "iter_time": 0.47068695068359373, "loss": 0.015129391103982925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.722431924346786, "step_time": 0.42344137382507324} +{"epoch": 0, "iter": 17101, "iter_tflops": 13.505987401573616, "iter_time": 1.1189145965576173, "loss": 0.40141940116882324, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.4609321406847, "step_time": 1.045025749206543} +{"epoch": 0, "iter": 17102, "iter_tflops": 10.204506753477714, "iter_time": 1.4809188537597657, "loss": 0.2640281021595001, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.946611199348133, "step_time": 1.0835640449523927} +{"epoch": 0, "iter": 17103, "iter_tflops": 26.193569043328097, "iter_time": 0.5769372787475586, "loss": 0.338769793510437, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.81831542642776, "step_time": 0.5432408905029298} +{"epoch": 0, "iter": 17104, "iter_tflops": 25.416105722583417, "iter_time": 0.5945854415893554, "loss": 0.21552243828773499, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.98129202793908, "step_time": 0.560093505859375} +{"epoch": 0, "iter": 17105, "iter_tflops": 28.88166459704155, "iter_time": 0.7143318710327148, "loss": 0.5040379166603088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61252265294601, "step_time": 0.6739429397583008} +{"epoch": 0, "iter": 17106, "iter_tflops": 14.781750200181998, "iter_time": 1.395713851928711, "loss": 0.5071263909339905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.60601745499421, "step_time": 1.1718205757141114} +{"epoch": 0, "iter": 17107, "iter_tflops": 36.67155079229221, "iter_time": 0.5625912475585937, "loss": 0.6727480888366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74076037544931, "step_time": 0.5191418914794922} +{"epoch": 0, "iter": 17108, "iter_tflops": 36.52316334975166, "iter_time": 0.5648769607543945, "loss": 0.4460315406322479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62010589868863, "step_time": 0.5207228260040282} +{"epoch": 0, "iter": 17109, "iter_tflops": 31.86745238301048, "iter_time": 0.6474032897949219, "loss": 0.003409769618883729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.730659038489954, "step_time": 0.5774059047698975} +{"epoch": 0, "iter": 17110, "iter_tflops": 43.07500137128794, "iter_time": 0.47895746612548834, "loss": 0.002165029523894191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08822513023095, "step_time": 0.4290258884429932} +{"epoch": 0, "iter": 17111, "iter_tflops": 43.617006549717246, "iter_time": 0.47300571823120113, "loss": 0.012333466671407223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.04331455701661, "step_time": 0.4294269390106201} +{"epoch": 0, "iter": 17112, "iter_tflops": 50.502929204587545, "iter_time": 0.40851280975341797, "loss": 0.004554413259029388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92946381815903, "step_time": 0.3688770122528076} +{"epoch": 0, "iter": 17113, "iter_tflops": 10.9874844286138, "iter_time": 1.5021072540283205, "loss": 0.12449424713850021, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 11.838765306707591, "step_time": 1.3940963973999025} +{"epoch": 0, "iter": 17114, "iter_tflops": 12.76267680089052, "iter_time": 1.2931754302978515, "loss": 0.09519278258085251, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 16.61771075409493, "step_time": 0.9931801261901855} +{"epoch": 0, "iter": 17115, "iter_tflops": 32.357362081583254, "iter_time": 0.5100656852722167, "loss": 0.0744565948843956, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 35.77937246184997, "step_time": 0.46128198814392085} +{"epoch": 0, "iter": 17116, "iter_tflops": 34.14344917166679, "iter_time": 0.4833835029602051, "loss": 0.05217515304684639, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 37.59089954680137, "step_time": 0.43905254364013674} +{"epoch": 0, "iter": 17117, "iter_tflops": 15.918990659124308, "iter_time": 1.296005126953125, "loss": 0.029833050444722176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.874065119558345, "step_time": 1.2226510543823241} +{"epoch": 0, "iter": 17118, "iter_tflops": 16.310660476645264, "iter_time": 1.2648840026855468, "loss": 0.033293090760707855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.64416929570839, "step_time": 0.953194055557251} +{"epoch": 0, "iter": 17119, "iter_tflops": 54.56348687035891, "iter_time": 0.37811171340942384, "loss": 0.05485754460096359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.70707677416573, "step_time": 0.34553849601745606} +{"epoch": 0, "iter": 17120, "iter_tflops": 52.65411323145034, "iter_time": 0.3918230171203613, "loss": 0.03885984048247337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.682707074493, "step_time": 0.3576651401519776} +{"epoch": 0, "iter": 17121, "iter_tflops": 26.432709165657933, "iter_time": 0.7805137710571289, "loss": 0.30402812361717224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.823170918672908, "step_time": 0.7415076293945312} +{"epoch": 0, "iter": 17122, "iter_tflops": 13.001545643827631, "iter_time": 1.5868185272216797, "loss": 0.29831066727638245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.59447189014196, "step_time": 1.322974811553955} +{"epoch": 0, "iter": 17123, "iter_tflops": 37.3171937555162, "iter_time": 0.5528575820922852, "loss": 0.26969048380851746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.895214156762556, "step_time": 0.5044867458343506} +{"epoch": 0, "iter": 17124, "iter_tflops": 37.75789845058436, "iter_time": 0.5464047088623046, "loss": 0.35312238335609436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65558780440013, "step_time": 0.49527793502807615} +{"epoch": 0, "iter": 17125, "iter_tflops": 38.991388434760005, "iter_time": 0.5291192321777345, "loss": 0.6297018527984619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.309213519779114, "step_time": 0.48762649536132807} +{"epoch": 0, "iter": 17126, "iter_tflops": 37.50550085700859, "iter_time": 0.5500818023681641, "loss": 0.6411946415901184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.016878100435335, "step_time": 0.5029903411865234} +{"epoch": 0, "iter": 17127, "iter_tflops": 35.791850789798595, "iter_time": 0.5764187393188477, "loss": 0.5923441648483276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.692138362831024, "step_time": 0.5332115097045899} +{"epoch": 0, "iter": 17128, "iter_tflops": 36.35086734830036, "iter_time": 0.5675543670654297, "loss": 0.5268941521644592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.5425002123745, "step_time": 0.5217447910308838} +{"epoch": 0, "iter": 17129, "iter_tflops": 28.186230974703555, "iter_time": 0.5942849273681641, "loss": 0.0181876290589571, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 31.175845342953142, "step_time": 0.5372958469390869} +{"epoch": 0, "iter": 17130, "iter_tflops": 11.272496182410084, "iter_time": 1.4859754180908202, "loss": 0.011120800860226154, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 12.971745424129065, "step_time": 1.2913182983398437} +{"epoch": 0, "iter": 17131, "iter_tflops": 9.405457577697913, "iter_time": 1.7809502716064454, "loss": 0.006766056176275015, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 11.258513054533221, "step_time": 1.4878210067749023} +{"epoch": 0, "iter": 17132, "iter_tflops": 25.095448213768094, "iter_time": 0.6674777069091797, "loss": 0.0016487340908497572, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 33.204692680007135, "step_time": 0.5044664134979249} +{"epoch": 0, "iter": 17133, "iter_tflops": 19.085944490311558, "iter_time": 0.7468742599487305, "loss": 0.2624048888683319, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 20.210707096993502, "step_time": 0.7053093490600586} +{"epoch": 0, "iter": 17134, "iter_tflops": 10.771189982538548, "iter_time": 1.3234192962646483, "loss": 0.5333642959594727, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 12.610545596425133, "step_time": 1.130387306213379} +{"epoch": 0, "iter": 17135, "iter_tflops": 23.739971360604418, "iter_time": 0.6004556808471679, "loss": 0.32843631505966187, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 25.296742495381544, "step_time": 0.5635034103393554} +{"epoch": 0, "iter": 17136, "iter_tflops": 24.455469847313953, "iter_time": 0.5828880310058594, "loss": 0.2724160850048065, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 25.881486023326634, "step_time": 0.5507721099853515} +{"epoch": 0, "iter": 17137, "iter_tflops": 19.170418037760964, "iter_time": 0.7180792922973632, "loss": 0.09164319932460785, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 20.334286281137185, "step_time": 0.6769787750244141} +{"epoch": 0, "iter": 17138, "iter_tflops": 10.09685813148615, "iter_time": 1.363382553100586, "loss": 0.07687384635210037, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 12.03592541861531, "step_time": 1.1437325954437256} +{"epoch": 0, "iter": 17139, "iter_tflops": 26.247639451728453, "iter_time": 0.5244616470336914, "loss": 0.11705534160137177, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 28.98339565985785, "step_time": 0.4749574680328369} +{"epoch": 0, "iter": 17140, "iter_tflops": 27.03720253230918, "iter_time": 0.5091458778381348, "loss": 0.07527658343315125, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 29.744983557058244, "step_time": 0.4627966995239258} +{"epoch": 0, "iter": 17141, "iter_tflops": 25.114944173253182, "iter_time": 0.8214668273925781, "loss": 0.20608898997306824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.41647232575074, "step_time": 0.7525072250366212} +{"epoch": 0, "iter": 17142, "iter_tflops": 45.84267666736036, "iter_time": 0.4500412063598632, "loss": 0.24947896599769592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.547512507807255, "step_time": 0.40815249824523925} +{"epoch": 0, "iter": 17143, "iter_tflops": 46.562597652048254, "iter_time": 0.44308295822143556, "loss": 0.208092600107193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.29370878362478, "step_time": 0.41021221160888666} +{"epoch": 0, "iter": 17144, "iter_tflops": 45.59177152157234, "iter_time": 0.4525179176330566, "loss": 0.16444574296474457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25048473204537, "step_time": 0.4189013290405274} +{"epoch": 0, "iter": 17145, "iter_tflops": 33.357452603359945, "iter_time": 0.6184852828979492, "loss": 0.2676186263561249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.54791384095701, "step_time": 0.5803742408752441} +{"epoch": 0, "iter": 17146, "iter_tflops": 18.255652497053855, "iter_time": 1.1301208496093749, "loss": 0.32405388355255127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.347686899026645, "step_time": 0.966432270050049} +{"epoch": 0, "iter": 17147, "iter_tflops": 44.07360499924417, "iter_time": 0.46810542297363283, "loss": 0.30940476059913635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.505115906521226, "step_time": 0.43429203605651856} +{"epoch": 0, "iter": 17148, "iter_tflops": 47.49122587535442, "iter_time": 0.43441905593872066, "loss": 0.2747398912906647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.315249240164626, "step_time": 0.40204605484008793} +{"epoch": 0, "iter": 17149, "iter_tflops": 26.742858931805003, "iter_time": 0.5421827087402343, "loss": 0.014679432846605778, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 28.765943455817727, "step_time": 0.504051456451416} +{"epoch": 0, "iter": 17150, "iter_tflops": 11.740477360215179, "iter_time": 1.2350022277832031, "loss": 0.007231398951262236, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 13.022660035450654, "step_time": 1.1134066047668458} +{"epoch": 0, "iter": 17151, "iter_tflops": 26.476079862659443, "iter_time": 0.547645866394043, "loss": 0.0033254085574299097, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 33.45469896330689, "step_time": 0.43340744781494145} +{"epoch": 0, "iter": 17152, "iter_tflops": 36.37409263367068, "iter_time": 0.3986220588684082, "loss": 0.005397442728281021, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 40.163454809963724, "step_time": 0.3610126609802246} +{"epoch": 0, "iter": 17153, "iter_tflops": 30.624700762041385, "iter_time": 0.6736749420166017, "loss": 0.19190528988838196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.324472040207056, "step_time": 0.6190973854064942} +{"epoch": 0, "iter": 17154, "iter_tflops": 46.983405225161896, "iter_time": 0.43911447906494133, "loss": 0.13612951338291168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.01045018964708, "step_time": 0.39667208099365237} +{"epoch": 0, "iter": 17155, "iter_tflops": 46.65386811817925, "iter_time": 0.4422161407470703, "loss": 0.17669448256492615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.705199705032605, "step_time": 0.4068831920623779} +{"epoch": 0, "iter": 17156, "iter_tflops": 49.34039491916072, "iter_time": 0.4181379890441895, "loss": 0.1923777163028717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.54117299680897, "step_time": 0.38533136940002444} +{"epoch": 0, "iter": 17157, "iter_tflops": 26.672215114273037, "iter_time": 0.7735050659179687, "loss": 0.6778101921081543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.20645185725556, "step_time": 0.7314317169189454} +{"epoch": 0, "iter": 17158, "iter_tflops": 18.134055194662658, "iter_time": 1.1376988372802734, "loss": 0.6630234718322754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.79535297130461, "step_time": 0.8670219573974609} +{"epoch": 0, "iter": 17159, "iter_tflops": 47.48464817927597, "iter_time": 0.434479232788086, "loss": 0.739343523979187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55302846377265, "step_time": 0.4001916885375976} +{"epoch": 0, "iter": 17160, "iter_tflops": 51.040079986315774, "iter_time": 0.40421358108520516, "loss": 0.7079408764839172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.342251216381754, "step_time": 0.3727910060882568} +{"epoch": 0, "iter": 17161, "iter_tflops": 20.369135076128416, "iter_time": 1.0128605575561525, "loss": 0.18322637677192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.181512414591644, "step_time": 0.9740141830444335} +{"epoch": 0, "iter": 17162, "iter_tflops": 14.857426026401614, "iter_time": 1.3886048278808591, "loss": 0.09184153378009796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.537498444424596, "step_time": 1.1129383811950684} +{"epoch": 0, "iter": 17163, "iter_tflops": 46.68551297003073, "iter_time": 0.4419163932800293, "loss": 0.13959664106369019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.111611144391475, "step_time": 0.40364788055419926} +{"epoch": 0, "iter": 17164, "iter_tflops": 47.5426931460926, "iter_time": 0.43394877624511724, "loss": 0.21455565094947815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44041988482987, "step_time": 0.40106775093078617} +{"epoch": 0, "iter": 17165, "iter_tflops": 43.6242040945349, "iter_time": 0.472927677154541, "loss": 0.055008675903081894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.869646291628676, "step_time": 0.4309848747253418} +{"epoch": 0, "iter": 17166, "iter_tflops": 46.085017975410125, "iter_time": 0.447674633026123, "loss": 0.0388876274228096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.355659265252164, "step_time": 0.39405660820007327} +{"epoch": 0, "iter": 17167, "iter_tflops": 51.47932432214871, "iter_time": 0.40076465225219726, "loss": 0.026036715134978294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.336776372504936, "step_time": 0.36621004676818847} +{"epoch": 0, "iter": 17168, "iter_tflops": 58.12725425633757, "iter_time": 0.3549297790527344, "loss": 0.034663766622543335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.367023839297616, "step_time": 0.32558091354370117} +{"epoch": 0, "iter": 17169, "iter_tflops": 26.328718404705665, "iter_time": 0.7835965728759766, "loss": 0.6884602904319763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.70617603849846, "step_time": 0.7446387939453125} +{"epoch": 0, "iter": 17170, "iter_tflops": 14.6310699946949, "iter_time": 1.4100878143310547, "loss": 0.5962799787521362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.330583276616043, "step_time": 1.190444267272949} +{"epoch": 0, "iter": 17171, "iter_tflops": 35.82598707734965, "iter_time": 0.5758695068359374, "loss": 0.6287246942520142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.96798187192504, "step_time": 0.5294370536804199} +{"epoch": 0, "iter": 17172, "iter_tflops": 38.229885012436014, "iter_time": 0.5396587905883788, "loss": 0.566987931728363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.747104553144155, "step_time": 0.4941922016143798} +{"epoch": 0, "iter": 17173, "iter_tflops": 15.151376727506308, "iter_time": 1.3616646118164062, "loss": 0.4639570116996765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.242225998594694, "step_time": 1.2702134246826173} +{"epoch": 0, "iter": 17174, "iter_tflops": 19.038745703607834, "iter_time": 1.083637222290039, "loss": 0.6285983324050903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.587777058114014, "step_time": 0.8746518783569336} +{"epoch": 0, "iter": 17175, "iter_tflops": 37.57612907156467, "iter_time": 0.549047866821289, "loss": 0.5832618474960327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.064152550533606, "step_time": 0.5024112815856934} +{"epoch": 0, "iter": 17176, "iter_tflops": 36.55341785438832, "iter_time": 0.564409423828125, "loss": 0.4695332646369934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62449957817141, "step_time": 0.5206650867462158} +{"epoch": 0, "iter": 17177, "iter_tflops": 16.081262392521612, "iter_time": 0.9168683547973633, "loss": 0.009865670464932919, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 17.4165021798525, "step_time": 0.8465764503479004} +{"epoch": 0, "iter": 17178, "iter_tflops": 12.280701893427034, "iter_time": 1.2006154632568358, "loss": 0.006740255281329155, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 14.764889140786847, "step_time": 0.9986123466491699} +{"epoch": 0, "iter": 17179, "iter_tflops": 29.089978276076423, "iter_time": 0.5068549880981446, "loss": 0.005996535532176495, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 32.19619682170343, "step_time": 0.45795472908020024} +{"epoch": 0, "iter": 17180, "iter_tflops": 33.32432060274259, "iter_time": 0.4424516487121582, "loss": 0.0029638144187629223, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 36.810226709971225, "step_time": 0.40055174636840823} +{"epoch": 0, "iter": 17181, "iter_tflops": 20.464512391765584, "iter_time": 1.0081399993896485, "loss": 0.038828082382678986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.186300134847666, "step_time": 0.9299023895263672} +{"epoch": 0, "iter": 17182, "iter_tflops": 21.651553812656477, "iter_time": 0.9528689575195313, "loss": 0.034216560423374176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.531247257298595, "step_time": 0.8410128231048585} +{"epoch": 0, "iter": 17183, "iter_tflops": 44.20406913093697, "iter_time": 0.4667238540649414, "loss": 0.04173092171549797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6489321322339, "step_time": 0.4240811176300049} +{"epoch": 0, "iter": 17184, "iter_tflops": 42.92672965140962, "iter_time": 0.48061181640624995, "loss": 0.04163907468318939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.375791211610846, "step_time": 0.43547755050659176} +{"epoch": 0, "iter": 17185, "iter_tflops": 22.377414124443654, "iter_time": 0.9219605712890625, "loss": 0.7050108909606934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.378185085192705, "step_time": 0.8462932510375977} +{"epoch": 0, "iter": 17186, "iter_tflops": 18.87377818694494, "iter_time": 1.0931088256835937, "loss": 0.5683380961418152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.809267708226827, "step_time": 0.8315881690979003} +{"epoch": 0, "iter": 17187, "iter_tflops": 44.88148619641842, "iter_time": 0.4596793746948243, "loss": 0.6314537525177002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70513411119691, "step_time": 0.423591760635376} +{"epoch": 0, "iter": 17188, "iter_tflops": 47.63206712531381, "iter_time": 0.4331345405578613, "loss": 0.6109305620193481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.569649705945494, "step_time": 0.4000627040863037} +{"epoch": 0, "iter": 17189, "iter_tflops": 45.71891322649658, "iter_time": 0.45125949096679696, "loss": 0.6053202748298645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.09193434989933, "step_time": 0.411864580154419} +{"epoch": 0, "iter": 17190, "iter_tflops": 46.33218315917799, "iter_time": 0.44528645324707034, "loss": 0.5281515717506409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.2813323418153, "step_time": 0.41031318283081053} +{"epoch": 0, "iter": 17191, "iter_tflops": 45.500988070877725, "iter_time": 0.45342078018188475, "loss": 0.5301708579063416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1717499082738, "step_time": 0.41957208251953126} +{"epoch": 0, "iter": 17192, "iter_tflops": 43.48457415054227, "iter_time": 0.47444625854492195, "loss": 0.4983936846256256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92718492835162, "step_time": 0.4396405525207519} +{"epoch": 0, "iter": 17193, "iter_tflops": 29.30125165197617, "iter_time": 0.7041028060913085, "loss": 0.014204449020326138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.335655422084276, "step_time": 0.6583903617858886} +{"epoch": 0, "iter": 17194, "iter_tflops": 14.853318348922464, "iter_time": 1.3889888458251953, "loss": 0.03133905678987503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.508890974632674, "step_time": 1.1146585464477539} +{"epoch": 0, "iter": 17195, "iter_tflops": 55.08893016571007, "iter_time": 0.3745052490234375, "loss": 0.020373094826936722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.60319383310827, "step_time": 0.3404291458129883} +{"epoch": 0, "iter": 17196, "iter_tflops": 57.1001540549794, "iter_time": 0.36131414794921873, "loss": 0.021678457036614418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.6741712394884, "step_time": 0.3291801567077637} +{"epoch": 0, "iter": 17197, "iter_tflops": 32.024659062933345, "iter_time": 0.6442252349853517, "loss": 0.1075546070933342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.972237812408174, "step_time": 0.6072927436828613} +{"epoch": 0, "iter": 17198, "iter_tflops": 9.757478847473228, "iter_time": 2.114387725830078, "loss": 0.10517743974924088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.987896182920956, "step_time": 1.720993675231934} +{"epoch": 0, "iter": 17199, "iter_tflops": 9.010402781092196, "iter_time": 2.289697143554687, "loss": 0.08529287576675415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.900468645328242, "step_time": 1.8926794967651366} +{"epoch": 0, "iter": 17200, "iter_tflops": 37.4843875191023, "iter_time": 0.5503916397094727, "loss": 0.09858088195323944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.036814941429135, "step_time": 0.42948504257202147} +{"epoch": 0, "iter": 17201, "iter_tflops": 24.18125894304543, "iter_time": 0.6740460891723632, "loss": 0.2854161858558655, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 25.68970167773685, "step_time": 0.634467586517334} +{"epoch": 0, "iter": 17202, "iter_tflops": 13.358433873868071, "iter_time": 1.2201492462158203, "loss": 0.20578782260417938, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 16.929457377002294, "step_time": 0.9627764587402344} +{"epoch": 0, "iter": 17203, "iter_tflops": 28.787289816507116, "iter_time": 0.5661972045898438, "loss": 0.39951053261756897, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.682180046506357, "step_time": 0.5312296257019042} +{"epoch": 0, "iter": 17204, "iter_tflops": 27.991808665391133, "iter_time": 0.58228759765625, "loss": 0.25686511397361755, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.707157266746606, "step_time": 0.5486651878356933} +{"epoch": 0, "iter": 17205, "iter_tflops": 28.280771680092325, "iter_time": 0.7295095672607423, "loss": 0.10014493763446808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.924873262060274, "step_time": 0.68942960357666} +{"epoch": 0, "iter": 17206, "iter_tflops": 14.929399449696177, "iter_time": 1.3819104766845705, "loss": 0.06104784458875656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.75994283631609, "step_time": 1.161664409637451} +{"epoch": 0, "iter": 17207, "iter_tflops": 42.539297398099, "iter_time": 0.48498905181884766, "loss": 0.12215556204319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89574724775439, "step_time": 0.4399352760314941} +{"epoch": 0, "iter": 17208, "iter_tflops": 42.064322835085896, "iter_time": 0.49046536636352533, "loss": 0.10830710828304291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14737904922798, "step_time": 0.4470696697235107} +{"epoch": 0, "iter": 17209, "iter_tflops": 17.941009948207768, "iter_time": 1.1499404754638671, "loss": 0.30373916029930115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.452944014918465, "step_time": 1.0605640716552733} +{"epoch": 0, "iter": 17210, "iter_tflops": 22.908630346782896, "iter_time": 0.9005817108154296, "loss": 0.4141291081905365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.239639425872515, "step_time": 0.7573923130035402} +{"epoch": 0, "iter": 17211, "iter_tflops": 46.60014567828564, "iter_time": 0.44272594451904296, "loss": 0.30903688073158264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.37625516999937, "step_time": 0.4095400390625} +{"epoch": 0, "iter": 17212, "iter_tflops": 50.60947030212728, "iter_time": 0.40765282440185546, "loss": 0.30011871457099915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.62568879954459, "step_time": 0.3776811599731445} +{"epoch": 0, "iter": 17213, "iter_tflops": 28.56545326701575, "iter_time": 0.7222393188476561, "loss": 0.10916373878717422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.246300539589022, "step_time": 0.6821030387878417} +{"epoch": 0, "iter": 17214, "iter_tflops": 21.10182181424671, "iter_time": 0.977692527770996, "loss": 0.17540955543518066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.909461252171116, "step_time": 0.7962764377593996} +{"epoch": 0, "iter": 17215, "iter_tflops": 44.22760053066728, "iter_time": 0.46647553253173824, "loss": 0.14797550439834595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.7885988362824, "step_time": 0.4317158069610596} +{"epoch": 0, "iter": 17216, "iter_tflops": 52.0171243358991, "iter_time": 0.3966211853027344, "loss": 0.12235359102487564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.778783122277126, "step_time": 0.3633592052459717} +{"epoch": 0, "iter": 17217, "iter_tflops": 36.83197927311554, "iter_time": 0.5601407775878906, "loss": 0.5170090794563293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4813677467761, "step_time": 0.5225526542663574} +{"epoch": 0, "iter": 17218, "iter_tflops": 11.223369577964872, "iter_time": 1.8382263336181641, "loss": 0.4808642864227295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.729849067454706, "step_time": 1.5026453247070313} +{"epoch": 0, "iter": 17219, "iter_tflops": 11.5643810653644, "iter_time": 1.7840205535888671, "loss": 0.6263238191604614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.348206011178632, "step_time": 1.5456079635620117} +{"epoch": 0, "iter": 17220, "iter_tflops": 27.35984536201746, "iter_time": 0.754064697265625, "loss": 0.6935341358184814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57716465812095, "step_time": 0.6144382266998291} +{"epoch": 0, "iter": 17221, "iter_tflops": 15.160376550847854, "iter_time": 1.1455570220947267, "loss": 0.3993892967700958, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 16.21250285504842, "step_time": 1.071214973449707} +{"epoch": 0, "iter": 17222, "iter_tflops": 13.586026865998813, "iter_time": 1.2783042449951172, "loss": 0.4650660753250122, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 16.222224662253083, "step_time": 1.0705730056762697} +{"epoch": 0, "iter": 17223, "iter_tflops": 26.020476031660454, "iter_time": 0.667438819885254, "loss": 0.32186567783355713, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 28.069961350200977, "step_time": 0.6187067947387695} +{"epoch": 0, "iter": 17224, "iter_tflops": 26.393359808186894, "iter_time": 0.6580092849731446, "loss": 0.33441630005836487, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 28.392253053121216, "step_time": 0.6116836090087889} +{"epoch": 0, "iter": 17225, "iter_tflops": 16.161098338944537, "iter_time": 1.2765898132324218, "loss": 0.4108971059322357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.125742556306573, "step_time": 1.204683151245117} +{"epoch": 0, "iter": 17226, "iter_tflops": 17.277052331149633, "iter_time": 1.1941327209472656, "loss": 0.4678153097629547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66340648903761, "step_time": 0.9984362220764159} +{"epoch": 0, "iter": 17227, "iter_tflops": 43.80102611912316, "iter_time": 0.47101849746704105, "loss": 0.4656413495540619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89497469039663, "step_time": 0.43075695610046383} +{"epoch": 0, "iter": 17228, "iter_tflops": 37.03060726771204, "iter_time": 0.5571362457275391, "loss": 0.49528077244758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61389677194746, "step_time": 0.5079811382293701} +{"epoch": 0, "iter": 17229, "iter_tflops": 18.663424983374, "iter_time": 1.1054291229248048, "loss": 0.052892304956912994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.914967846001492, "step_time": 1.0359591674804687} +{"epoch": 0, "iter": 17230, "iter_tflops": 8.329961874059956, "iter_time": 2.4767332458496094, "loss": 0.08878640830516815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.634644675565736, "step_time": 2.1413445129394533} +{"epoch": 0, "iter": 17231, "iter_tflops": 13.594889943364095, "iter_time": 1.5175623779296874, "loss": 0.03445826843380928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.34701979405882, "step_time": 1.1244929008483886} +{"epoch": 0, "iter": 17232, "iter_tflops": 48.510638375123804, "iter_time": 0.42529008483886716, "loss": 0.06682021915912628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.089537771703036, "step_time": 0.38860940170288083} +{"epoch": 0, "iter": 17233, "iter_tflops": 22.774177251902938, "iter_time": 0.6671502227783203, "loss": 0.14833727478981018, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 24.27456692138754, "step_time": 0.6259142532348634} +{"epoch": 0, "iter": 17234, "iter_tflops": 8.904605908368948, "iter_time": 1.7062852172851564, "loss": 0.3117729127407074, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 10.284698136285865, "step_time": 1.477320697784424} +{"epoch": 0, "iter": 17235, "iter_tflops": 26.96910338923957, "iter_time": 0.5633779220581054, "loss": 0.35109561681747437, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.93385768652984, "step_time": 0.5251217308044434} +{"epoch": 0, "iter": 17236, "iter_tflops": 26.31253947334552, "iter_time": 0.5774356155395508, "loss": 0.36560338735580444, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.84457640723675, "step_time": 0.5456645202636718} +{"epoch": 0, "iter": 17237, "iter_tflops": 19.82289436156195, "iter_time": 1.04077099609375, "loss": 0.4956931173801422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.64964908012583, "step_time": 0.9991014099121094} +{"epoch": 0, "iter": 17238, "iter_tflops": 17.157244471713923, "iter_time": 1.2024712677001952, "loss": 0.5088619589805603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.67219493801211, "step_time": 0.9980117530822754} +{"epoch": 0, "iter": 17239, "iter_tflops": 37.80594879109494, "iter_time": 0.5457102432250976, "loss": 0.4139748513698578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08435718922392, "step_time": 0.5021642036437988} +{"epoch": 0, "iter": 17240, "iter_tflops": 39.52508151988576, "iter_time": 0.5219747238159179, "loss": 0.430429607629776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83947401411775, "step_time": 0.48159072875976566} +{"epoch": 0, "iter": 17241, "iter_tflops": 19.68041413255518, "iter_time": 1.0483058624267578, "loss": 0.2106122225522995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.855782456301547, "step_time": 0.9892265396118164} +{"epoch": 0, "iter": 17242, "iter_tflops": 7.006967710972484, "iter_time": 2.9443682861328124, "loss": 0.2664807438850403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.37578533614468, "step_time": 2.4631831741333006} +{"epoch": 0, "iter": 17243, "iter_tflops": 15.105755807451324, "iter_time": 1.3657769775390625, "loss": 0.2723928987979889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.022844499514697, "step_time": 1.0303777523040771} +{"epoch": 0, "iter": 17244, "iter_tflops": 42.63412117603571, "iter_time": 0.48391037368774414, "loss": 0.1943538784980774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77850973229715, "step_time": 0.4410378532409668} +{"epoch": 0, "iter": 17245, "iter_tflops": 14.628554511488623, "iter_time": 1.1759598236083986, "loss": 0.3165978193283081, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 15.749365662956647, "step_time": 1.0922720794677734} +{"epoch": 0, "iter": 17246, "iter_tflops": 17.937962346314464, "iter_time": 0.9590048217773437, "loss": 0.35302335023880005, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 19.7399099773431, "step_time": 0.8714625549316407} +{"epoch": 0, "iter": 17247, "iter_tflops": 28.771457947907983, "iter_time": 0.59790478515625, "loss": 0.338092178106308, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 30.51100354739085, "step_time": 0.5638160133361816} +{"epoch": 0, "iter": 17248, "iter_tflops": 28.5898161073198, "iter_time": 0.601703498840332, "loss": 0.32786843180656433, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 30.25690314167371, "step_time": 0.5685509948730468} +{"epoch": 0, "iter": 17249, "iter_tflops": 27.76770715177039, "iter_time": 0.7429887313842773, "loss": 0.09649710357189178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.389112165958874, "step_time": 0.7019978485107422} +{"epoch": 0, "iter": 17250, "iter_tflops": 22.62369127502616, "iter_time": 0.9119242858886719, "loss": 0.059801213443279266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.8125397745647, "step_time": 0.7160456409454345} +{"epoch": 0, "iter": 17251, "iter_tflops": 50.86030046539043, "iter_time": 0.40564238357543947, "loss": 0.055143021047115326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.70639058824063, "step_time": 0.3703541603088379} +{"epoch": 0, "iter": 17252, "iter_tflops": 54.2180672381591, "iter_time": 0.38052063751220705, "loss": 0.08682234585285187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.96268372969846, "step_time": 0.3499008560180664} +{"epoch": 0, "iter": 17253, "iter_tflops": 29.753490169640788, "iter_time": 0.6934007873535156, "loss": 0.5450799465179443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.436173427580776, "step_time": 0.6562851409912109} +{"epoch": 0, "iter": 17254, "iter_tflops": 12.076332773375073, "iter_time": 1.708390609741211, "loss": 0.4144788980484009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.19665486884817, "step_time": 1.4532362518310546} +{"epoch": 0, "iter": 17255, "iter_tflops": 12.911618874365018, "iter_time": 1.5978703918457031, "loss": 0.5423955321311951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.925625971763067, "step_time": 1.3822598495483398} +{"epoch": 0, "iter": 17256, "iter_tflops": 18.095894963359406, "iter_time": 1.1400979919433594, "loss": 0.39318814873695374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.7076597481757, "step_time": 0.9504061584472657} +{"epoch": 0, "iter": 17257, "iter_tflops": 19.23446145814313, "iter_time": 0.8580630187988282, "loss": 0.20295552909374237, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 20.30369448880665, "step_time": 0.8128757095336914} +{"epoch": 0, "iter": 17258, "iter_tflops": 28.121158150208355, "iter_time": 0.586902572631836, "loss": 0.2229113131761551, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 30.62793605550019, "step_time": 0.538866870880127} +{"epoch": 0, "iter": 17259, "iter_tflops": 27.888942484743378, "iter_time": 0.5917893829345703, "loss": 0.367036372423172, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 29.58530420808652, "step_time": 0.5578573722839355} +{"epoch": 0, "iter": 17260, "iter_tflops": 28.411247511822392, "iter_time": 0.5809100799560547, "loss": 0.40019723773002625, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 30.3572573290991, "step_time": 0.543671646118164} +{"epoch": 0, "iter": 17261, "iter_tflops": 24.502530979784627, "iter_time": 0.8419984664916993, "loss": 0.2789415121078491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.87091256930726, "step_time": 0.7974629211425781} +{"epoch": 0, "iter": 17262, "iter_tflops": 22.14267521455002, "iter_time": 0.9317344589233397, "loss": 0.2483452558517456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.989973728102207, "step_time": 0.6657344627380372} +{"epoch": 0, "iter": 17263, "iter_tflops": 50.478102876187855, "iter_time": 0.4087137260437012, "loss": 0.18075504899024963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.71243571967721, "step_time": 0.37708234405517577} +{"epoch": 0, "iter": 17264, "iter_tflops": 49.16277853502691, "iter_time": 0.4196486473083496, "loss": 0.24641256034374237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.413468492455266, "step_time": 0.38625264549255367} +{"epoch": 0, "iter": 17265, "iter_tflops": 32.09514075280855, "iter_time": 0.6428105010986327, "loss": 0.0016797102289274335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.14862631272037, "step_time": 0.6041558837890625} +{"epoch": 0, "iter": 17266, "iter_tflops": 16.64366575168967, "iter_time": 1.2395762939453125, "loss": 0.005795542150735855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.64709086679742, "step_time": 1.050083885192871} +{"epoch": 0, "iter": 17267, "iter_tflops": 41.53982518860813, "iter_time": 0.4966581687927246, "loss": 0.0010767903877422214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22852644810773, "step_time": 0.4462849044799805} +{"epoch": 0, "iter": 17268, "iter_tflops": 47.00878994639722, "iter_time": 0.4388773574829102, "loss": 0.0035994513891637325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.071235481175506, "step_time": 0.39620902633666993} +{"epoch": 0, "iter": 17269, "iter_tflops": 31.37406983849794, "iter_time": 0.657584228515625, "loss": 0.5293569564819336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.91799970102229, "step_time": 0.6082638626098633} +{"epoch": 0, "iter": 17270, "iter_tflops": 11.015272906197287, "iter_time": 1.8729534606933596, "loss": 0.47313225269317627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.053046902766253, "step_time": 1.4680868606567383} +{"epoch": 0, "iter": 17271, "iter_tflops": 15.704263517511633, "iter_time": 1.3137256317138672, "loss": 0.4063553214073181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.800620545100383, "step_time": 1.0973623695373536} +{"epoch": 0, "iter": 17272, "iter_tflops": 17.696347969266885, "iter_time": 1.1658390502929685, "loss": 0.5595450401306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.877859448905927, "step_time": 0.9881804962158203} +{"epoch": 0, "iter": 17273, "iter_tflops": 17.114402537354632, "iter_time": 0.9045056915283204, "loss": 0.17688694596290588, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 17.96364123510985, "step_time": 0.8617448043823244} +{"epoch": 0, "iter": 17274, "iter_tflops": 13.608256130476255, "iter_time": 1.1375502014160157, "loss": 0.27215179800987244, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 15.191422807283804, "step_time": 1.0190009651184082} +{"epoch": 0, "iter": 17275, "iter_tflops": 27.495858387836535, "iter_time": 0.5629965896606445, "loss": 0.4061967730522156, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.507343450224905, "step_time": 0.5246176948547364} +{"epoch": 0, "iter": 17276, "iter_tflops": 27.24221478364871, "iter_time": 0.5682384719848632, "loss": 0.38920527696609497, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.949759677485492, "step_time": 0.534722038269043} +{"epoch": 0, "iter": 17277, "iter_tflops": 31.86102321540929, "iter_time": 0.6475339279174804, "loss": 0.22907471656799316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04263803659473, "step_time": 0.6060368614196777} +{"epoch": 0, "iter": 17278, "iter_tflops": 18.127396509074753, "iter_time": 1.1381167449951173, "loss": 0.20663432776927948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.168794013155306, "step_time": 1.0229215240478515} +{"epoch": 0, "iter": 17279, "iter_tflops": 37.094480923884724, "iter_time": 0.5561769027709962, "loss": 0.2060302048921585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52615551773746, "step_time": 0.5090809440612794} +{"epoch": 0, "iter": 17280, "iter_tflops": 42.83984898035928, "iter_time": 0.48158651351928705, "loss": 0.2802790105342865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82441908009763, "step_time": 0.44060543441772465} +{"epoch": 0, "iter": 17281, "iter_tflops": 20.15912479883597, "iter_time": 1.0234121627807617, "loss": 0.10788898915052414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.730227174518202, "step_time": 0.9494191360473633} +{"epoch": 0, "iter": 17282, "iter_tflops": 26.40063955078116, "iter_time": 0.781461883544922, "loss": 0.15286168456077576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.64125426909004, "step_time": 0.6960263328552245} +{"epoch": 0, "iter": 17283, "iter_tflops": 50.681495642398616, "iter_time": 0.40707349395751957, "loss": 0.06943527609109879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.07987719703864, "step_time": 0.37456680297851563} +{"epoch": 0, "iter": 17284, "iter_tflops": 57.49625129136456, "iter_time": 0.3588250198364258, "loss": 0.10695216804742813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.449013838615556, "step_time": 0.33036700248718265} +{"epoch": 0, "iter": 17285, "iter_tflops": 12.198561680405888, "iter_time": 0.6420222930908204, "loss": 0.010869390331208706, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 12.920395621573299, "step_time": 0.6061539268493652} +{"epoch": 0, "iter": 17286, "iter_tflops": 6.464840423706697, "iter_time": 1.211437255859375, "loss": 0.016282448545098305, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 7.258575179960144, "step_time": 1.0789649963378904} +{"epoch": 0, "iter": 17287, "iter_tflops": 16.07924427827973, "iter_time": 0.48707192993164067, "loss": 0.0030045732855796814, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 21.34734083315668, "step_time": 0.3668723239898682} +{"epoch": 0, "iter": 17288, "iter_tflops": 22.410952267187017, "iter_time": 0.349460765838623, "loss": 0.0016707011964172125, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 24.686557497914993, "step_time": 0.3172474956512451} +{"epoch": 0, "iter": 17289, "iter_tflops": 35.043834670502136, "iter_time": 0.5544338684082031, "loss": 0.00171366473659873, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 37.85775649586453, "step_time": 0.513223461151123} +{"epoch": 0, "iter": 17290, "iter_tflops": 23.140317876630093, "iter_time": 0.839637939453125, "loss": 0.006029564421623945, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 26.241362825348673, "step_time": 0.7404146251678467} +{"epoch": 0, "iter": 17291, "iter_tflops": 40.426591259363256, "iter_time": 0.48061160278320314, "loss": 0.022333454340696335, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 44.85017792195749, "step_time": 0.4332087345123291} +{"epoch": 0, "iter": 17292, "iter_tflops": 46.049544108569265, "iter_time": 0.42192575836181645, "loss": 0.02382834628224373, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 50.855744172802616, "step_time": 0.38205101776123046} +{"epoch": 0, "iter": 17293, "iter_tflops": 28.396604710552197, "iter_time": 0.7265338134765627, "loss": 0.6436867117881775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.554322123433717, "step_time": 0.6752266807556152} +{"epoch": 0, "iter": 17294, "iter_tflops": 10.715446988142261, "iter_time": 1.9253600463867186, "loss": 0.7149210572242737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.422249967992967, "step_time": 1.5370816040039064} +{"epoch": 0, "iter": 17295, "iter_tflops": 12.030576516603622, "iter_time": 1.71488818359375, "loss": 0.5296129584312439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.904271982027977, "step_time": 1.3842402725219727} +{"epoch": 0, "iter": 17296, "iter_tflops": 22.596912869274647, "iter_time": 0.9130049591064454, "loss": 0.6252536177635193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.282213979380295, "step_time": 0.6595151329040526} +{"epoch": 0, "iter": 17297, "iter_tflops": 19.842262966378446, "iter_time": 0.8939222106933594, "loss": 0.30639639496803284, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 20.84277257260167, "step_time": 0.8510115203857422} +{"epoch": 0, "iter": 17298, "iter_tflops": 6.887110934750211, "iter_time": 2.5754543151855467, "loss": 0.2844223082065582, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 8.513305800965046, "step_time": 2.083496116638184} +{"epoch": 0, "iter": 17299, "iter_tflops": 10.26768760196105, "iter_time": 1.7275009002685549, "loss": 0.32043856382369995, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 12.129388218235514, "step_time": 1.4623523674011232} +{"epoch": 0, "iter": 17300, "iter_tflops": 27.570016584047195, "iter_time": 0.6433597717285157, "loss": 0.4689360558986664, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 29.64733900715611, "step_time": 0.5982809982299805} +{"epoch": 0, "iter": 17301, "iter_tflops": 18.966128850549918, "iter_time": 0.9417303466796876, "loss": 0.42845332622528076, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 20.440330316966488, "step_time": 0.8738106880187987} +{"epoch": 0, "iter": 17302, "iter_tflops": 26.336623986346368, "iter_time": 0.6781802825927735, "loss": 0.40929991006851196, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 28.35458673677759, "step_time": 0.6299149856567383} +{"epoch": 0, "iter": 17303, "iter_tflops": 27.24810008761281, "iter_time": 0.6554944763183593, "loss": 0.3555516004562378, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 29.39055700784527, "step_time": 0.6077114868164063} +{"epoch": 0, "iter": 17304, "iter_tflops": 27.760385787643493, "iter_time": 0.6433980865478516, "loss": 0.3681286573410034, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 29.798607228260195, "step_time": 0.5993897285461425} +{"epoch": 0, "iter": 17305, "iter_tflops": 20.038177071585427, "iter_time": 1.029589340209961, "loss": 0.5539938807487488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.944489328376303, "step_time": 0.9401491737365723} +{"epoch": 0, "iter": 17306, "iter_tflops": 17.365988727473418, "iter_time": 1.1880172119140624, "loss": 0.6410040259361267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.037449514073803, "step_time": 0.980684160232544} +{"epoch": 0, "iter": 17307, "iter_tflops": 36.271087080503285, "iter_time": 0.5688027343750001, "loss": 0.6021235585212708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.581415504705525, "step_time": 0.5212318267822266} +{"epoch": 0, "iter": 17308, "iter_tflops": 36.495708592195676, "iter_time": 0.5653019027709961, "loss": 0.652838945388794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62726601189182, "step_time": 0.5206287384033202} +{"epoch": 0, "iter": 17309, "iter_tflops": 19.35375678434326, "iter_time": 1.0659994201660157, "loss": 0.5316991209983826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.489228519220223, "step_time": 1.0069238815307617} +{"epoch": 0, "iter": 17310, "iter_tflops": 7.805001857566854, "iter_time": 2.643316925048828, "loss": 0.557937741279602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.23378551947255, "step_time": 2.2343050384521486} +{"epoch": 0, "iter": 17311, "iter_tflops": 14.825930156080226, "iter_time": 1.3915547485351563, "loss": 0.5612563490867615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.71446442253926, "step_time": 1.1646467552185058} +{"epoch": 0, "iter": 17312, "iter_tflops": 37.839738138653225, "iter_time": 0.5452229461669922, "loss": 0.422246515750885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.28448335639023, "step_time": 0.49972996711730966} +{"epoch": 0, "iter": 17313, "iter_tflops": 17.218533823546718, "iter_time": 0.8349803237915039, "loss": 0.1531773805618286, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 18.72057733118941, "step_time": 0.767985767364502} +{"epoch": 0, "iter": 17314, "iter_tflops": 21.592459927020766, "iter_time": 0.6658406219482421, "loss": 0.38815224170684814, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.284290416380564, "step_time": 0.6174608154296874} +{"epoch": 0, "iter": 17315, "iter_tflops": 21.595108556926668, "iter_time": 0.6657589569091797, "loss": 0.3782179057598114, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.267374872715553, "step_time": 0.6179097137451172} +{"epoch": 0, "iter": 17316, "iter_tflops": 21.82362064997983, "iter_time": 0.6587878875732422, "loss": 0.2896778881549835, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.45147891498671, "step_time": 0.6130588607788086} +{"epoch": 0, "iter": 17317, "iter_tflops": 23.572350482826902, "iter_time": 0.8752242813110352, "loss": 0.16955456137657166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.776747900646605, "step_time": 0.8003761215209961} +{"epoch": 0, "iter": 17318, "iter_tflops": 44.24227586584114, "iter_time": 0.46632080078125, "loss": 0.14778508245944977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46288342252523, "step_time": 0.41710252380371093} +{"epoch": 0, "iter": 17319, "iter_tflops": 46.460533831675704, "iter_time": 0.44405631637573245, "loss": 0.10719391703605652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.23337972719174, "step_time": 0.4107048664093017} +{"epoch": 0, "iter": 17320, "iter_tflops": 52.96577805651593, "iter_time": 0.38951742553710944, "loss": 0.11704009771347046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.604293529494306, "step_time": 0.35815200996398927} +{"epoch": 0, "iter": 17321, "iter_tflops": 26.776798485809152, "iter_time": 0.7286991653442383, "loss": 0.016926724463701248, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 28.226465032011234, "step_time": 0.6912743301391602} +{"epoch": 0, "iter": 17322, "iter_tflops": 12.9932567103043, "iter_time": 1.5017197875976562, "loss": 0.002393245929852128, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 15.104127972129, "step_time": 1.2918475494384765} +{"epoch": 0, "iter": 17323, "iter_tflops": 52.98332758123034, "iter_time": 0.36827114486694335, "loss": 0.006424014922231436, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 58.27492335511095, "step_time": 0.3348306541442871} +{"epoch": 0, "iter": 17324, "iter_tflops": 54.27216691967331, "iter_time": 0.35952555084228516, "loss": 0.0020373763982206583, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 59.23138152683412, "step_time": 0.3294238662719726} +{"epoch": 0, "iter": 17325, "iter_tflops": 29.570180963497812, "iter_time": 0.6976992645263672, "loss": 0.36087194085121155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.2888918821591, "step_time": 0.6593743743896485} +{"epoch": 0, "iter": 17326, "iter_tflops": 11.49364433480444, "iter_time": 1.7950001678466796, "loss": 0.3885936141014099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.866983945024058, "step_time": 1.487785202026367} +{"epoch": 0, "iter": 17327, "iter_tflops": 16.238013989822985, "iter_time": 1.2705429077148438, "loss": 0.4668141007423401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.624497442275093, "step_time": 1.1077396087646485} +{"epoch": 0, "iter": 17328, "iter_tflops": 16.573513065649923, "iter_time": 1.2448231964111327, "loss": 0.40074053406715393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.432747261109768, "step_time": 1.061666332244873} +{"epoch": 0, "iter": 17329, "iter_tflops": 19.801030855132264, "iter_time": 0.7817812423706054, "loss": 0.2972428798675537, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 20.89276418954713, "step_time": 0.7409299392700196} +{"epoch": 0, "iter": 17330, "iter_tflops": 6.01668293375295, "iter_time": 2.572858612060547, "loss": 0.3314945697784424, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 7.689754696533318, "step_time": 2.01307780456543} +{"epoch": 0, "iter": 17331, "iter_tflops": 9.07449281853154, "iter_time": 1.705888671875, "loss": 0.3217528462409973, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.092429169926268, "step_time": 1.5338303833007814} +{"epoch": 0, "iter": 17332, "iter_tflops": 19.324025552088493, "iter_time": 0.8010791778564454, "loss": 0.4093171954154968, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 21.379246033625076, "step_time": 0.7240701789855957} +{"epoch": 0, "iter": 17333, "iter_tflops": 14.568869247795268, "iter_time": 1.0934551849365235, "loss": 0.3189607262611389, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 15.653407470628292, "step_time": 1.0176957092285157} +{"epoch": 0, "iter": 17334, "iter_tflops": 16.840729862172847, "iter_time": 0.945945083618164, "loss": 0.43004870414733887, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.841008519355743, "step_time": 0.8455176696777342} +{"epoch": 0, "iter": 17335, "iter_tflops": 28.210234075612824, "iter_time": 0.5647030639648438, "loss": 0.33241474628448486, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.111436522989376, "step_time": 0.5290483436584473} +{"epoch": 0, "iter": 17336, "iter_tflops": 27.334759639794548, "iter_time": 0.582789306640625, "loss": 0.3526472747325897, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.06811946769686, "step_time": 0.5480370216369629} +{"epoch": 0, "iter": 17337, "iter_tflops": 38.16490897618196, "iter_time": 0.5155985336303711, "loss": 0.0013942209770902991, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 41.66692315358025, "step_time": 0.47226359939575197} +{"epoch": 0, "iter": 17338, "iter_tflops": 23.276977475830424, "iter_time": 0.8453748397827148, "loss": 0.0012233086163178086, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 28.804433513320053, "step_time": 0.6831507759094239} +{"epoch": 0, "iter": 17339, "iter_tflops": 39.19302594861653, "iter_time": 0.5020732803344726, "loss": 0.012098043225705624, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 43.44380743494257, "step_time": 0.4529476642608643} +{"epoch": 0, "iter": 17340, "iter_tflops": 44.225763737742284, "iter_time": 0.44493909072875976, "loss": 0.035799540579319, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 49.04135545811505, "step_time": 0.4012485160827637} +{"epoch": 0, "iter": 17341, "iter_tflops": 27.87124529477807, "iter_time": 0.7402286224365235, "loss": 0.006205890793353319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.986046458691174, "step_time": 0.6658188400268554} +{"epoch": 0, "iter": 17342, "iter_tflops": 53.50937773119454, "iter_time": 0.38556033325195316, "loss": 0.003744838759303093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.680816316432555, "step_time": 0.3515815696716309} +{"epoch": 0, "iter": 17343, "iter_tflops": 54.315350712541246, "iter_time": 0.3798390922546387, "loss": 0.008523567579686642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.60731901773863, "step_time": 0.3461167831420898} +{"epoch": 0, "iter": 17344, "iter_tflops": 60.3960423116809, "iter_time": 0.34159677886962886, "loss": 0.0201729629188776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.93870861854352, "step_time": 0.31288288688659666} +{"epoch": 0, "iter": 17345, "iter_tflops": 44.49711455139656, "iter_time": 0.46365014266967775, "loss": 0.4852505922317505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.714180989784, "step_time": 0.4235130939483642} +{"epoch": 0, "iter": 17346, "iter_tflops": 43.50008890631929, "iter_time": 0.474277042388916, "loss": 0.5723642706871033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41994049023907, "step_time": 0.42608671760559086} +{"epoch": 0, "iter": 17347, "iter_tflops": 49.048512227478206, "iter_time": 0.42062628555297854, "loss": 0.5892022252082825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.35138641514876, "step_time": 0.38670210647583003} +{"epoch": 0, "iter": 17348, "iter_tflops": 48.49240705852709, "iter_time": 0.4254499778747559, "loss": 0.5672464370727539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89718761555058, "step_time": 0.3900225028991699} +{"epoch": 0, "iter": 17349, "iter_tflops": 30.17006145173314, "iter_time": 0.6838266983032227, "loss": 0.5442655086517334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.86986845012425, "step_time": 0.6473542098999023} +{"epoch": 0, "iter": 17350, "iter_tflops": 20.4523684091107, "iter_time": 1.0087386016845703, "loss": 0.5774182677268982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.017115255417117, "step_time": 0.7636305103302001} +{"epoch": 0, "iter": 17351, "iter_tflops": 42.12294552773792, "iter_time": 0.4897827835083008, "loss": 0.5786085724830627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48066571565564, "step_time": 0.453623384475708} +{"epoch": 0, "iter": 17352, "iter_tflops": 45.919812606563326, "iter_time": 0.449285228729248, "loss": 0.6507077217102051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.344774899351684, "step_time": 0.4181008739471435} +{"epoch": 0, "iter": 17353, "iter_tflops": 35.275562510047834, "iter_time": 0.5601773071289062, "loss": 0.5037528872489929, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 38.030302174307884, "step_time": 0.5196006469726563} +{"epoch": 0, "iter": 17354, "iter_tflops": 38.79400092211219, "iter_time": 0.5093717880249023, "loss": 0.5961182713508606, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 42.063558358049285, "step_time": 0.4697788391113281} +{"epoch": 0, "iter": 17355, "iter_tflops": 43.6674813926109, "iter_time": 0.4525236854553223, "loss": 0.5363288521766663, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 47.21896494931385, "step_time": 0.4184879875183105} +{"epoch": 0, "iter": 17356, "iter_tflops": 41.23845213371022, "iter_time": 0.47917825698852534, "loss": 0.27162855863571167, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 44.30706408293632, "step_time": 0.44599140167236334} +{"epoch": 0, "iter": 17357, "iter_tflops": 28.430383843187165, "iter_time": 0.7256705932617186, "loss": 0.7574601769447327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.06192084970186, "step_time": 0.6862866020202637} +{"epoch": 0, "iter": 17358, "iter_tflops": 10.546097816187292, "iter_time": 1.9562774658203126, "loss": 0.5838083624839783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.573734038494727, "step_time": 1.7825788497924804} +{"epoch": 0, "iter": 17359, "iter_tflops": 12.707251562301527, "iter_time": 1.6235685119628906, "loss": 0.6191744804382324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.459848584595253, "step_time": 1.3344951858520508} +{"epoch": 0, "iter": 17360, "iter_tflops": 38.998937118677176, "iter_time": 0.5290168151855469, "loss": 0.4858121871948242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72660207670394, "step_time": 0.49443502426147456} +{"epoch": 0, "iter": 17361, "iter_tflops": 25.644526197973658, "iter_time": 0.6212010116577148, "loss": 0.2797410190105438, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.50045696945473, "step_time": 0.5792778511047363} +{"epoch": 0, "iter": 17362, "iter_tflops": 27.663767040292914, "iter_time": 0.5758581466674804, "loss": 0.39368298649787903, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.554368661034395, "step_time": 0.5390203323364258} +{"epoch": 0, "iter": 17363, "iter_tflops": 25.93291122397658, "iter_time": 0.6142929916381837, "loss": 0.23221316933631897, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.59813092396742, "step_time": 0.5772276992797851} +{"epoch": 0, "iter": 17364, "iter_tflops": 28.83915983621606, "iter_time": 0.5523879928588866, "loss": 0.21615511178970337, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.671034059779416, "step_time": 0.5193957786560058} +{"epoch": 0, "iter": 17365, "iter_tflops": 27.089620032688316, "iter_time": 0.761586669921875, "loss": 0.04840851202607155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.743054563819058, "step_time": 0.7177766532897949} +{"epoch": 0, "iter": 17366, "iter_tflops": 17.437388318834792, "iter_time": 1.1831527252197265, "loss": 0.017409183084964752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.758284405457903, "step_time": 0.9938727645874024} +{"epoch": 0, "iter": 17367, "iter_tflops": 52.79970533404559, "iter_time": 0.39074258804321294, "loss": 0.03278727829456329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.96728690156593, "step_time": 0.3559092483520508} +{"epoch": 0, "iter": 17368, "iter_tflops": 59.34382330170524, "iter_time": 0.34765359497070314, "loss": 0.04902532696723938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.95635745167228, "step_time": 0.3176146926879883} +{"epoch": 0, "iter": 17369, "iter_tflops": 22.78114046380384, "iter_time": 0.9056216278076172, "loss": 0.13833358883857727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.843152656442573, "step_time": 0.865283790588379} +{"epoch": 0, "iter": 17370, "iter_tflops": 14.279073804642596, "iter_time": 1.444848159790039, "loss": 0.17964288592338562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.814522992038626, "step_time": 1.1581053009033202} +{"epoch": 0, "iter": 17371, "iter_tflops": 36.85796100587863, "iter_time": 0.5597459259033203, "loss": 0.17014580965042114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.31237557273917, "step_time": 0.5117806434631348} +{"epoch": 0, "iter": 17372, "iter_tflops": 40.581494618204054, "iter_time": 0.5083867340087891, "loss": 0.14287422597408295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64852793573987, "step_time": 0.4620777988433838} +{"epoch": 0, "iter": 17373, "iter_tflops": 37.48904526746733, "iter_time": 0.5503232574462891, "loss": 0.15318314731121063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6432530317881, "step_time": 0.49542463684082033} +{"epoch": 0, "iter": 17374, "iter_tflops": 36.31625606853608, "iter_time": 0.5680952758789063, "loss": 0.15770378708839417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.808269587627784, "step_time": 0.5055615863800048} +{"epoch": 0, "iter": 17375, "iter_tflops": 39.17405892724333, "iter_time": 0.5266519241333008, "loss": 0.0853336900472641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87449004952772, "step_time": 0.48119740867614746} +{"epoch": 0, "iter": 17376, "iter_tflops": 40.64270972974381, "iter_time": 0.5076210136413574, "loss": 0.1456013321876526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62369884747159, "step_time": 0.462334903717041} +{"epoch": 0, "iter": 17377, "iter_tflops": 12.377070671624823, "iter_time": 1.6668801574707033, "loss": 0.21530738472938538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.001427748784286, "step_time": 1.5868329162597656} +{"epoch": 0, "iter": 17378, "iter_tflops": 20.46112282298254, "iter_time": 1.0083070068359374, "loss": 0.18856841325759888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.923873505781387, "step_time": 0.7958337516784668} +{"epoch": 0, "iter": 17379, "iter_tflops": 45.79605479406039, "iter_time": 0.4504993629455566, "loss": 0.2439272552728653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.604667917183626, "step_time": 0.41591032409667966} +{"epoch": 0, "iter": 17380, "iter_tflops": 47.674236107054355, "iter_time": 0.43275142288208, "loss": 0.17944084107875824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.58405451629298, "step_time": 0.39995098686218267} +{"epoch": 0, "iter": 17381, "iter_tflops": 33.35295979677986, "iter_time": 0.6185685958862304, "loss": 0.05359906330704689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.6343854428636, "step_time": 0.5789658851623535} +{"epoch": 0, "iter": 17382, "iter_tflops": 8.958011838515803, "iter_time": 2.303088439941406, "loss": 0.07093585282564163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.360233918980773, "step_time": 1.9913733291625977} +{"epoch": 0, "iter": 17383, "iter_tflops": 12.111419092682192, "iter_time": 1.703441467285156, "loss": 0.05068786442279816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.521836111590543, "step_time": 1.525761245727539} +{"epoch": 0, "iter": 17384, "iter_tflops": 24.52554550774056, "iter_time": 0.8412083435058594, "loss": 0.1101786196231842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.813141524091975, "step_time": 0.5760760612487793} +{"epoch": 0, "iter": 17385, "iter_tflops": 15.078281684403784, "iter_time": 0.9562047882080077, "loss": 0.41922375559806824, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 16.089375747866956, "step_time": 0.8961146392822266} +{"epoch": 0, "iter": 17386, "iter_tflops": 6.286175407607, "iter_time": 2.293592559814453, "loss": 0.228502556681633, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 7.112794762001258, "step_time": 2.0270407943725584} +{"epoch": 0, "iter": 17387, "iter_tflops": 10.907399011169202, "iter_time": 1.32184814453125, "loss": 0.23911800980567932, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 12.213396697589802, "step_time": 1.180500846862793} +{"epoch": 0, "iter": 17388, "iter_tflops": 25.45728282000235, "iter_time": 0.5663575820922852, "loss": 0.3463529646396637, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 27.069813445974816, "step_time": 0.5326200408935546} +{"epoch": 0, "iter": 17389, "iter_tflops": 17.954761870668264, "iter_time": 0.841673454284668, "loss": 0.3147958517074585, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 18.86243958132774, "step_time": 0.8011713638305664} +{"epoch": 0, "iter": 17390, "iter_tflops": 26.722720107200193, "iter_time": 0.5655130310058594, "loss": 0.3833068609237671, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.464164614250866, "step_time": 0.5309148063659669} +{"epoch": 0, "iter": 17391, "iter_tflops": 27.933307237252833, "iter_time": 0.5410045547485351, "loss": 0.4122946560382843, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.71856774459413, "step_time": 0.5085052070617676} +{"epoch": 0, "iter": 17392, "iter_tflops": 27.019610608391297, "iter_time": 0.5592991943359376, "loss": 0.3831545412540436, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.703762932708006, "step_time": 0.5264831123352051} +{"epoch": 0, "iter": 17393, "iter_tflops": 2.036323935280845, "iter_time": 0.7310812911987303, "loss": 0.6951448917388916, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.171841797664714, "step_time": 0.6854635238647462} +{"epoch": 0, "iter": 17394, "iter_tflops": 0.8298782839303289, "iter_time": 1.7938996124267579, "loss": 0.6960944533348083, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 0.9665903187283674, "step_time": 1.5401750907897949} +{"epoch": 0, "iter": 17395, "iter_tflops": 3.5545615425501826, "iter_time": 0.4188191184997559, "loss": 0.6140637993812561, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.870068750382985, "step_time": 0.38467490577697755} +{"epoch": 0, "iter": 17396, "iter_tflops": 3.5458732407814173, "iter_time": 0.4198453330993653, "loss": 0.827483594417572, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.8352497292033396, "step_time": 0.3881672477722168} +{"epoch": 0, "iter": 17397, "iter_tflops": 31.71759188136407, "iter_time": 0.6504621658325195, "loss": 0.2180013656616211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.92219028049792, "step_time": 0.6081887207031249} +{"epoch": 0, "iter": 17398, "iter_tflops": 12.829406099688693, "iter_time": 1.6081097869873044, "loss": 0.18509571254253387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.97755020925907, "step_time": 1.2151984977722168} +{"epoch": 0, "iter": 17399, "iter_tflops": 13.788665432505296, "iter_time": 1.4962357025146484, "loss": 0.28070542216300964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.96429196150963, "step_time": 1.2923274993896485} +{"epoch": 0, "iter": 17400, "iter_tflops": 19.863111881899545, "iter_time": 1.0386637115478514, "loss": 0.24714237451553345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.83261206436759, "step_time": 0.8656664848327638} +{"epoch": 0, "iter": 17401, "iter_tflops": 14.715919520153088, "iter_time": 1.0574881439208985, "loss": 0.34759703278541565, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 15.743811548765416, "step_time": 0.9884461822509765} +{"epoch": 0, "iter": 17402, "iter_tflops": 12.630735603277115, "iter_time": 1.2320668334960938, "loss": 0.3411516547203064, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.812893808553744, "step_time": 1.0505651779174805} +{"epoch": 0, "iter": 17403, "iter_tflops": 23.224589033599383, "iter_time": 0.6700618209838868, "loss": 0.39558371901512146, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 25.00010905284766, "step_time": 0.6224737014770508} +{"epoch": 0, "iter": 17404, "iter_tflops": 22.029163556380066, "iter_time": 0.7064231185913086, "loss": 0.31904587149620056, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 23.769975161687476, "step_time": 0.654687702178955} +{"epoch": 0, "iter": 17405, "iter_tflops": 31.924706698776294, "iter_time": 0.6462422256469726, "loss": 0.04648589715361595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.76179276917488, "step_time": 0.5769032230377197} +{"epoch": 0, "iter": 17406, "iter_tflops": 42.84891905464218, "iter_time": 0.4814845733642579, "loss": 0.044018566608428955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55085487382665, "step_time": 0.43387429237365727} +{"epoch": 0, "iter": 17407, "iter_tflops": 45.42327919262702, "iter_time": 0.4541964797973633, "loss": 0.02886069566011429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.059537800820024, "step_time": 0.41213112258911133} +{"epoch": 0, "iter": 17408, "iter_tflops": 43.38127763763397, "iter_time": 0.4755759773254395, "loss": 0.02608996443450451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47850202601556, "step_time": 0.43453547668457027} +{"epoch": 0, "iter": 17409, "iter_tflops": 29.071256235793133, "iter_time": 0.7096732711791993, "loss": 0.46599385142326355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.9473372501447, "step_time": 0.6457844467163085} +{"epoch": 0, "iter": 17410, "iter_tflops": 42.38154787735361, "iter_time": 0.4867942428588867, "loss": 0.42370617389678955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01561119931604, "step_time": 0.4296747035980225} +{"epoch": 0, "iter": 17411, "iter_tflops": 43.595542719905026, "iter_time": 0.4732385978698731, "loss": 0.5553035140037537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.858751729884226, "step_time": 0.44028260993957513} +{"epoch": 0, "iter": 17412, "iter_tflops": 44.80868586217214, "iter_time": 0.460426212310791, "loss": 0.5133745670318604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41845580772973, "step_time": 0.42609978294372564} +{"epoch": 0, "iter": 17413, "iter_tflops": 33.724854443347326, "iter_time": 0.6117474441528321, "loss": 0.44275757670402527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.26596877765934, "step_time": 0.5688830108642577} +{"epoch": 0, "iter": 17414, "iter_tflops": 36.916472747946365, "iter_time": 0.558858741760254, "loss": 0.49218955636024475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54660891395467, "step_time": 0.5088241424560547} +{"epoch": 0, "iter": 17415, "iter_tflops": 39.248980127953104, "iter_time": 0.525646614074707, "loss": 0.4732106029987335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1916029679674, "step_time": 0.4776644554138184} +{"epoch": 0, "iter": 17416, "iter_tflops": 36.0182130719047, "iter_time": 0.5727961425781251, "loss": 0.554421067237854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.17307859754724, "step_time": 0.5266651039123535} +{"epoch": 0, "iter": 17417, "iter_tflops": 31.534064709999434, "iter_time": 0.6542478332519531, "loss": 0.7217879295349121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.59640507208031, "step_time": 0.596336338043213} +{"epoch": 0, "iter": 17418, "iter_tflops": 35.989563806152624, "iter_time": 0.5732521133422852, "loss": 0.5563561320304871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.11550776831713, "step_time": 0.527440258026123} +{"epoch": 0, "iter": 17419, "iter_tflops": 34.20814071008663, "iter_time": 0.6031047897338867, "loss": 0.5894160866737366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.02273932946301, "step_time": 0.5572546463012695} +{"epoch": 0, "iter": 17420, "iter_tflops": 36.39460320920723, "iter_time": 0.566872329711914, "loss": 0.54878830909729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33994255863981, "step_time": 0.5244312057495117} +{"epoch": 0, "iter": 17421, "iter_tflops": 14.405086924936239, "iter_time": 0.9443231964111327, "loss": 0.004074550233781338, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 15.47922515533892, "step_time": 0.8787944869995117} +{"epoch": 0, "iter": 17422, "iter_tflops": 11.667773815759315, "iter_time": 1.165865737915039, "loss": 0.002699966309592128, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 14.230928903310707, "step_time": 0.9558798179626465} +{"epoch": 0, "iter": 17423, "iter_tflops": 29.970101130488825, "iter_time": 0.4538876152038574, "loss": 0.0009321546531282365, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 33.34542031536328, "step_time": 0.4079438076019287} +{"epoch": 0, "iter": 17424, "iter_tflops": 31.22590902348784, "iter_time": 0.43563368225097654, "loss": 0.004485640674829483, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 34.54646404519664, "step_time": 0.39376121711730955} +{"epoch": 0, "iter": 17425, "iter_tflops": 15.496845491382869, "iter_time": 1.3313092346191406, "loss": 0.5492759346961975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.450267330427607, "step_time": 1.2541494369506836} +{"epoch": 0, "iter": 17426, "iter_tflops": 15.902314711660585, "iter_time": 1.2973641815185548, "loss": 0.39471760392189026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.472712404865874, "step_time": 1.1168415908813476} +{"epoch": 0, "iter": 17427, "iter_tflops": 38.98594298357275, "iter_time": 0.5291931381225586, "loss": 0.3958786427974701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70650318761049, "step_time": 0.48309020805358893} +{"epoch": 0, "iter": 17428, "iter_tflops": 41.64336525852637, "iter_time": 0.49542330169677734, "loss": 0.22065943479537964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.543918003722, "step_time": 0.4529933834075928} +{"epoch": 0, "iter": 17429, "iter_tflops": 8.791415942104981, "iter_time": 1.0916624755859377, "loss": 0.001782997278496623, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 9.515867629263816, "step_time": 1.0085532150268555} +{"epoch": 0, "iter": 17430, "iter_tflops": 7.863723211591322, "iter_time": 1.2204471893310547, "loss": 0.008423480205237865, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 10.365205096153844, "step_time": 0.9259111423492432} +{"epoch": 0, "iter": 17431, "iter_tflops": 25.977240153486623, "iter_time": 0.3694487495422363, "loss": 0.004652708303183317, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 28.58260733159939, "step_time": 0.33577268791198733} +{"epoch": 0, "iter": 17432, "iter_tflops": 28.50441729978575, "iter_time": 0.33669374084472653, "loss": 0.002639701357111335, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 31.29396042499736, "step_time": 0.3066808662414551} +{"epoch": 0, "iter": 17433, "iter_tflops": 35.6885311098571, "iter_time": 0.5780874938964843, "loss": 0.12551264464855194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.24890046078798, "step_time": 0.5393904991149903} +{"epoch": 0, "iter": 17434, "iter_tflops": 12.183972269760185, "iter_time": 1.6932978057861328, "loss": 0.11620069295167923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.904801969379795, "step_time": 1.3841910514831544} +{"epoch": 0, "iter": 17435, "iter_tflops": 47.45631467028059, "iter_time": 0.4347386360168457, "loss": 0.09464903175830841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70390409992623, "step_time": 0.39902390098571777} +{"epoch": 0, "iter": 17436, "iter_tflops": 49.63431237011052, "iter_time": 0.4156619186401367, "loss": 0.15103638172149658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.970244811880185, "step_time": 0.3822679252624512} +{"epoch": 0, "iter": 17437, "iter_tflops": 37.18471015616545, "iter_time": 0.5548273315429688, "loss": 0.1384817212820053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.96048973762602, "step_time": 0.5162873039245606} +{"epoch": 0, "iter": 17438, "iter_tflops": 47.959867901756986, "iter_time": 0.4301741104125977, "loss": 0.12437620759010315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.86052980386973, "step_time": 0.39029297637939453} +{"epoch": 0, "iter": 17439, "iter_tflops": 49.189153092374504, "iter_time": 0.41942363739013677, "loss": 0.1431218981742859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24388980381547, "step_time": 0.3874828376770019} +{"epoch": 0, "iter": 17440, "iter_tflops": 55.561872441548694, "iter_time": 0.37131746292114254, "loss": 0.16954393684864044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.52907575284891, "step_time": 0.3408460025787354} +{"epoch": 0, "iter": 17441, "iter_tflops": 26.637498431780703, "iter_time": 0.7745131759643554, "loss": 0.533523678779602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.1907859670082, "step_time": 0.7318381805419922} +{"epoch": 0, "iter": 17442, "iter_tflops": 11.988878629213165, "iter_time": 1.7208526458740234, "loss": 0.4971838891506195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.031668991462727, "step_time": 1.2868961753845214} +{"epoch": 0, "iter": 17443, "iter_tflops": 48.279888236146554, "iter_time": 0.4273227272033692, "loss": 0.4585823714733124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44769519408689, "step_time": 0.3933651123046875} +{"epoch": 0, "iter": 17444, "iter_tflops": 48.55634928248639, "iter_time": 0.4248897171020508, "loss": 0.554739773273468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34045741600911, "step_time": 0.39417105865478513} +{"epoch": 0, "iter": 17445, "iter_tflops": 37.98131772327604, "iter_time": 0.5431905670166015, "loss": 0.11890309303998947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83802299662829, "step_time": 0.5051932487487794} +{"epoch": 0, "iter": 17446, "iter_tflops": 34.43685193923744, "iter_time": 0.5990992889404297, "loss": 0.13731378316879272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.23067398382853, "step_time": 0.539647653579712} +{"epoch": 0, "iter": 17447, "iter_tflops": 37.472215699388194, "iter_time": 0.5505704193115234, "loss": 0.2060505896806717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.172796265749014, "step_time": 0.5010855560302734} +{"epoch": 0, "iter": 17448, "iter_tflops": 40.99007114060573, "iter_time": 0.5033192901611327, "loss": 0.13737353682518005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.32132466745588, "step_time": 0.45521823692321783} +{"epoch": 0, "iter": 17449, "iter_tflops": 20.31145372247211, "iter_time": 1.0157369232177733, "loss": 0.5933725237846375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.76197417616544, "step_time": 0.9480340957641602} +{"epoch": 0, "iter": 17450, "iter_tflops": 20.957515544922412, "iter_time": 0.9844245834350586, "loss": 0.6356760859489441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.012777002914703, "step_time": 0.7931138420104981} +{"epoch": 0, "iter": 17451, "iter_tflops": 45.05854473676174, "iter_time": 0.4578730545043946, "loss": 0.5307803750038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.660839055488715, "step_time": 0.4239773483276367} +{"epoch": 0, "iter": 17452, "iter_tflops": 45.28823252765365, "iter_time": 0.4555508651733398, "loss": 0.6180930733680725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.544209557104516, "step_time": 0.42499597167968756} +{"epoch": 0, "iter": 17453, "iter_tflops": 31.36857502575512, "iter_time": 0.6576994171142578, "loss": 0.3208273947238922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.27427256882097, "step_time": 0.6200313911437988} +{"epoch": 0, "iter": 17454, "iter_tflops": 8.089731397553457, "iter_time": 2.5502816467285156, "loss": 0.39138591289520264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.86819373365743, "step_time": 2.0906656341552736} +{"epoch": 0, "iter": 17455, "iter_tflops": 10.043517704121202, "iter_time": 2.0541700744628906, "loss": 0.4117061495780945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.256744032014385, "step_time": 1.6832442169189452} +{"epoch": 0, "iter": 17456, "iter_tflops": 45.5576837717014, "iter_time": 0.4528565063476562, "loss": 0.3374999761581421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.43347111745131, "step_time": 0.41735069465637203} +{"epoch": 0, "iter": 17457, "iter_tflops": 25.280475488463775, "iter_time": 0.7146611099243163, "loss": 0.3035973608493805, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 26.751924025656773, "step_time": 0.6753522720336913} +{"epoch": 0, "iter": 17458, "iter_tflops": 14.745330852212701, "iter_time": 1.2252673645019532, "loss": 0.27614879608154297, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 17.19087250974876, "step_time": 1.0509631004333495} +{"epoch": 0, "iter": 17459, "iter_tflops": 25.998576337830013, "iter_time": 0.6949216156005859, "loss": 0.29750537872314453, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 27.898074212775697, "step_time": 0.6476064453125001} +{"epoch": 0, "iter": 17460, "iter_tflops": 31.033551184711047, "iter_time": 0.5821754837036133, "loss": 0.23595482110977173, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 33.15250792104689, "step_time": 0.5449654884338379} +{"epoch": 0, "iter": 17461, "iter_tflops": 30.41233943979592, "iter_time": 0.6783790359497069, "loss": 0.3870663642883301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.67903218582295, "step_time": 0.6313251075744629} +{"epoch": 0, "iter": 17462, "iter_tflops": 20.269431107123058, "iter_time": 1.0178427505493164, "loss": 0.4359443187713623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.653388489271713, "step_time": 0.8368461608886719} +{"epoch": 0, "iter": 17463, "iter_tflops": 45.61133337610712, "iter_time": 0.45232384109497065, "loss": 0.3790254294872284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.5482489777924, "step_time": 0.41638390731811525} +{"epoch": 0, "iter": 17464, "iter_tflops": 49.16493849919116, "iter_time": 0.4196302108764648, "loss": 0.4823363423347473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37352525058787, "step_time": 0.3865417060852051} +{"epoch": 0, "iter": 17465, "iter_tflops": 44.83704334275668, "iter_time": 0.45920942306518553, "loss": 0.08601848036050797, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 48.96401429766027, "step_time": 0.4205045909881592} +{"epoch": 0, "iter": 17466, "iter_tflops": 15.069854556055706, "iter_time": 1.366276809692383, "loss": 0.13789846003055573, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 16.22488262887004, "step_time": 1.2690133590698243} +{"epoch": 0, "iter": 17467, "iter_tflops": 13.526224853144463, "iter_time": 1.5221980285644532, "loss": 0.10989341139793396, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 15.557427184822718, "step_time": 1.3234574432373045} +{"epoch": 0, "iter": 17468, "iter_tflops": 21.927546900918987, "iter_time": 0.9389829559326173, "loss": 0.07945804297924042, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 28.527597097391137, "step_time": 0.7217429752349853} +{"epoch": 0, "iter": 17469, "iter_tflops": 14.885288622282053, "iter_time": 1.0344614639282226, "loss": 0.24994498491287231, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 15.566721086771635, "step_time": 0.989177963256836} +{"epoch": 0, "iter": 17470, "iter_tflops": 10.05677492331635, "iter_time": 1.5311327514648438, "loss": 0.2075902372598648, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 11.855442470093314, "step_time": 1.2988344802856446} +{"epoch": 0, "iter": 17471, "iter_tflops": 24.08983828871832, "iter_time": 0.6392013626098633, "loss": 0.41470590233802795, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.93793610536473, "step_time": 0.5936577758789063} +{"epoch": 0, "iter": 17472, "iter_tflops": 23.070571700813783, "iter_time": 0.6674415206909179, "loss": 0.3037208318710327, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.843490083421027, "step_time": 0.6198105583190918} +{"epoch": 0, "iter": 17473, "iter_tflops": 22.571857921844813, "iter_time": 0.9140184020996095, "loss": 0.28739988803863525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.3821518295569, "step_time": 0.8461555671691894} +{"epoch": 0, "iter": 17474, "iter_tflops": 14.82655185171228, "iter_time": 1.3914963989257811, "loss": 0.32002443075180054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.918742499880654, "step_time": 1.1513694953918456} +{"epoch": 0, "iter": 17475, "iter_tflops": 45.1370471380985, "iter_time": 0.45707672119140624, "loss": 0.4055714011192322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35630129999261, "step_time": 0.4180032329559326} +{"epoch": 0, "iter": 17476, "iter_tflops": 49.32181043360084, "iter_time": 0.41829554367065425, "loss": 0.37587279081344604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.455038051943795, "step_time": 0.3859522743225097} +{"epoch": 0, "iter": 17477, "iter_tflops": 46.442011557209746, "iter_time": 0.44423341751098633, "loss": 0.5040334463119507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96998085885306, "step_time": 0.4047694969177246} +{"epoch": 0, "iter": 17478, "iter_tflops": 45.01701633357728, "iter_time": 0.4582954444885254, "loss": 0.4752826988697052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.67355846499869, "step_time": 0.4238665542602539} +{"epoch": 0, "iter": 17479, "iter_tflops": 48.0426165689562, "iter_time": 0.42943317794799807, "loss": 0.4458136260509491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.249059113217605, "step_time": 0.39486057472229} +{"epoch": 0, "iter": 17480, "iter_tflops": 47.306154613219924, "iter_time": 0.4361185913085938, "loss": 0.43708962202072144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36087361317638, "step_time": 0.4016889133453369} +{"epoch": 0, "iter": 17481, "iter_tflops": 23.006516124469716, "iter_time": 0.8967500076293946, "loss": 0.04139132797718048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.129158164618673, "step_time": 0.8550274887084961} +{"epoch": 0, "iter": 17482, "iter_tflops": 9.856904116153222, "iter_time": 2.0930601806640627, "loss": 0.06076222285628319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.126906606192689, "step_time": 1.701265968322754} +{"epoch": 0, "iter": 17483, "iter_tflops": 26.816757071501478, "iter_time": 0.7693358840942384, "loss": 0.07423118501901627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.384040624377253, "step_time": 0.6790108585357666} +{"epoch": 0, "iter": 17484, "iter_tflops": 38.82098079904934, "iter_time": 0.5314418411254883, "loss": 0.04284577816724777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.457066679421814, "step_time": 0.4859283771514892} +{"epoch": 0, "iter": 17485, "iter_tflops": 12.675991916273787, "iter_time": 1.2083045959472658, "loss": 0.3120800256729126, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 13.494625086601992, "step_time": 1.1350044326782227} +{"epoch": 0, "iter": 17486, "iter_tflops": 11.347658242450276, "iter_time": 1.3497462615966795, "loss": 0.3055514991283417, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 15.683469246679817, "step_time": 0.9765989303588869} +{"epoch": 0, "iter": 17487, "iter_tflops": 22.98491581357068, "iter_time": 0.6663700408935547, "loss": 0.37901294231414795, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 24.711561030992552, "step_time": 0.6198094596862792} +{"epoch": 0, "iter": 17488, "iter_tflops": 21.044748156017082, "iter_time": 0.7278043518066406, "loss": 0.4498719871044159, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 22.65710928672537, "step_time": 0.6760111846923829} +{"epoch": 0, "iter": 17489, "iter_tflops": 28.90456531892997, "iter_time": 0.7137659149169922, "loss": 0.1925506442785263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.93830807613884, "step_time": 0.6459670143127442} +{"epoch": 0, "iter": 17490, "iter_tflops": 47.727655163387034, "iter_time": 0.43226706695556644, "loss": 0.21904878318309784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19367278242266, "step_time": 0.3952795886993408} +{"epoch": 0, "iter": 17491, "iter_tflops": 46.990716038909454, "iter_time": 0.43904616165161137, "loss": 0.23706698417663574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19103102022065, "step_time": 0.4030216445922851} +{"epoch": 0, "iter": 17492, "iter_tflops": 47.44299148195805, "iter_time": 0.4348607215881347, "loss": 0.19600358605384827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.41244359487993, "step_time": 0.40128599357604977} +{"epoch": 0, "iter": 17493, "iter_tflops": 39.3395127546316, "iter_time": 0.5244369354248046, "loss": 0.446182519197464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29215946657945, "step_time": 0.4878231277465821} +{"epoch": 0, "iter": 17494, "iter_tflops": 43.35503705281923, "iter_time": 0.47586381912231446, "loss": 0.41798239946365356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.18451009760531, "step_time": 0.4372429313659668} +{"epoch": 0, "iter": 17495, "iter_tflops": 44.066441524291236, "iter_time": 0.4681815185546875, "loss": 0.3907035291194916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42118656316048, "step_time": 0.4350606765747071} +{"epoch": 0, "iter": 17496, "iter_tflops": 44.37786804988382, "iter_time": 0.4648960037231446, "loss": 0.32596325874328613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.93585524706245, "step_time": 0.4303895988464355} +{"epoch": 0, "iter": 17497, "iter_tflops": 29.3252413780293, "iter_time": 0.7035268096923828, "loss": 0.002147589810192585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.06173269156479, "step_time": 0.6641964797973632} +{"epoch": 0, "iter": 17498, "iter_tflops": 14.623674249265193, "iter_time": 1.4108009490966797, "loss": 0.007153843063861132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.645560142169536, "step_time": 1.106488265991211} +{"epoch": 0, "iter": 17499, "iter_tflops": 54.12460994887048, "iter_time": 0.3811776847839355, "loss": 0.0032467974815517664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.31467664779911, "step_time": 0.3478244285583496} +{"epoch": 0, "iter": 17500, "iter_tflops": 57.800970949344276, "iter_time": 0.35693333816528316, "loss": 0.005587846040725708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.753775126567376, "step_time": 0.32360583305358886} +{"epoch": 0, "iter": 17501, "iter_tflops": 23.14488727827421, "iter_time": 0.8913888092041016, "loss": 0.07543148845434189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.244254279129592, "step_time": 0.8509683685302734} +{"epoch": 0, "iter": 17502, "iter_tflops": 13.520767888556666, "iter_time": 1.525881790161133, "loss": 0.13022814691066742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.90610423584668, "step_time": 1.2203339824676513} +{"epoch": 0, "iter": 17503, "iter_tflops": 39.13139604584234, "iter_time": 0.5272261047363281, "loss": 0.10120128095149994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40655223362757, "step_time": 0.47529906082153317} +{"epoch": 0, "iter": 17504, "iter_tflops": 41.052070464105896, "iter_time": 0.5025591468811036, "loss": 0.13356231153011322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.152922836477494, "step_time": 0.45691601371765145} +{"epoch": 0, "iter": 17505, "iter_tflops": 15.576388928924796, "iter_time": 1.3245106811523437, "loss": 0.49562379717826843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.673489130556412, "step_time": 1.2373591003417967} +{"epoch": 0, "iter": 17506, "iter_tflops": 17.073990598450653, "iter_time": 1.2083345947265625, "loss": 0.5634313821792603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.049334026652563, "step_time": 1.0290163993835448} +{"epoch": 0, "iter": 17507, "iter_tflops": 37.792426386051574, "iter_time": 0.5459055023193359, "loss": 0.4847097396850586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.00269618698054, "step_time": 0.5031643142700195} +{"epoch": 0, "iter": 17508, "iter_tflops": 39.538753820702766, "iter_time": 0.5217942276000976, "loss": 0.5231432318687439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8060444327909, "step_time": 0.48196682929992674} +{"epoch": 0, "iter": 17509, "iter_tflops": 16.571374122261076, "iter_time": 1.244983871459961, "loss": 0.5896928906440735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.596993364146712, "step_time": 1.1724215087890624} +{"epoch": 0, "iter": 17510, "iter_tflops": 20.26125602616434, "iter_time": 1.0182534332275393, "loss": 0.5448184013366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.11303957837791, "step_time": 0.7900686340332032} +{"epoch": 0, "iter": 17511, "iter_tflops": 45.37710742080239, "iter_time": 0.4546586303710938, "loss": 0.5422120094299316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02635726042697, "step_time": 0.42081636619567875} +{"epoch": 0, "iter": 17512, "iter_tflops": 44.66678446445939, "iter_time": 0.46188893508911133, "loss": 0.6520463824272156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.30749231071841, "step_time": 0.42707854461669925} +{"epoch": 0, "iter": 17513, "iter_tflops": 40.19137296079537, "iter_time": 0.5133214416503906, "loss": 0.052508316934108734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16987533545965, "step_time": 0.4670851650238037} +{"epoch": 0, "iter": 17514, "iter_tflops": 12.76846260244034, "iter_time": 1.6157852478027341, "loss": 0.024870535358786583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.84782719172454, "step_time": 1.4898433685302737} +{"epoch": 0, "iter": 17515, "iter_tflops": 7.954944241236035, "iter_time": 2.5934931640625, "loss": 0.0391017347574234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.544897352308888, "step_time": 2.1614788246154784} +{"epoch": 0, "iter": 17516, "iter_tflops": 32.16025638689823, "iter_time": 0.6415089874267579, "loss": 0.06192459911108017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5399915770719, "step_time": 0.4966561794281006} +{"epoch": 0, "iter": 17517, "iter_tflops": 19.87739507332134, "iter_time": 0.782894859313965, "loss": 0.3162144422531128, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 20.994552918316113, "step_time": 0.7412356185913086} +{"epoch": 0, "iter": 17518, "iter_tflops": 12.570704277806803, "iter_time": 1.2379505615234374, "loss": 0.43362799286842346, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.628809780855045, "step_time": 1.0637851371765137} +{"epoch": 0, "iter": 17519, "iter_tflops": 22.808160845452452, "iter_time": 0.6822957153320313, "loss": 0.34295445680618286, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 24.651493476144484, "step_time": 0.6312765769958496} +{"epoch": 0, "iter": 17520, "iter_tflops": 23.41862588088693, "iter_time": 0.6645099716186523, "loss": 0.2913675010204315, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 25.21667893451843, "step_time": 0.6171276741027832} +{"epoch": 0, "iter": 17521, "iter_tflops": 23.183922651052143, "iter_time": 0.8898879547119141, "loss": 0.05168928951025009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.82641557164992, "step_time": 0.8310137825012208} +{"epoch": 0, "iter": 17522, "iter_tflops": 22.976959918753305, "iter_time": 0.8979035339355469, "loss": 0.08236776292324066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.131402335072057, "step_time": 0.7333830451965332} +{"epoch": 0, "iter": 17523, "iter_tflops": 39.44397654804614, "iter_time": 0.5230480117797852, "loss": 0.04213375970721245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.955938031333616, "step_time": 0.4802850189208985} +{"epoch": 0, "iter": 17524, "iter_tflops": 47.102434686510506, "iter_time": 0.43800482177734373, "loss": 0.027583541348576546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.610551227870786, "step_time": 0.3997456531524659} +{"epoch": 0, "iter": 17525, "iter_tflops": 16.40347780735202, "iter_time": 1.257726791381836, "loss": 0.7377089262008667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.316301320627932, "step_time": 1.1914261093139649} +{"epoch": 0, "iter": 17526, "iter_tflops": 20.359846460843375, "iter_time": 1.0133226470947265, "loss": 0.7263649106025696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.21453751673414, "step_time": 0.7580909099578859} +{"epoch": 0, "iter": 17527, "iter_tflops": 37.63874821707119, "iter_time": 0.5481344223022461, "loss": 0.7056929469108582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18196447682396, "step_time": 0.5009740009307861} +{"epoch": 0, "iter": 17528, "iter_tflops": 39.846345484698446, "iter_time": 0.5177662658691407, "loss": 0.635280966758728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.4975111491677, "step_time": 0.4743051490783691} +{"epoch": 0, "iter": 17529, "iter_tflops": 35.38104124692213, "iter_time": 0.583111541748047, "loss": 0.09319044649600983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.91158115800992, "step_time": 0.5302044506072998} +{"epoch": 0, "iter": 17530, "iter_tflops": 9.298607604777553, "iter_time": 2.2187293395996095, "loss": 0.12900763750076294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.09159167379776, "step_time": 1.8600660858154296} +{"epoch": 0, "iter": 17531, "iter_tflops": 12.950120979604582, "iter_time": 1.5931197509765624, "loss": 0.07890027016401291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.238763008133574, "step_time": 1.19678503036499} +{"epoch": 0, "iter": 17532, "iter_tflops": 21.275343846394325, "iter_time": 0.9697184524536132, "loss": 0.12949106097221375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.228778250851214, "step_time": 0.8515119209289552} +{"epoch": 0, "iter": 17533, "iter_tflops": 16.603165081689713, "iter_time": 1.0583997802734375, "loss": 0.3429115116596222, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 17.589788672038104, "step_time": 0.9990333938598633} +{"epoch": 0, "iter": 17534, "iter_tflops": 8.805632818727974, "iter_time": 1.995630142211914, "loss": 0.21822628378868103, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 10.130974188822684, "step_time": 1.734560363769531} +{"epoch": 0, "iter": 17535, "iter_tflops": 8.180153122113966, "iter_time": 2.14822216796875, "loss": 0.27877768874168396, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 9.86801357595509, "step_time": 1.780782539367676} +{"epoch": 0, "iter": 17536, "iter_tflops": 30.115840294166002, "iter_time": 0.5835064239501954, "loss": 0.3227519989013672, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 32.00416562031793, "step_time": 0.5490780944824218} +{"epoch": 0, "iter": 17537, "iter_tflops": 14.335702223874767, "iter_time": 1.165593063354492, "loss": 0.35580652952194214, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 14.869531803902076, "step_time": 1.1237472229003906} +{"epoch": 0, "iter": 17538, "iter_tflops": 12.790751639414252, "iter_time": 1.3063810119628907, "loss": 0.20030944049358368, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 15.42041547735005, "step_time": 1.0836021308898927} +{"epoch": 0, "iter": 17539, "iter_tflops": 29.71828023294304, "iter_time": 0.5622665557861327, "loss": 0.3363970220088959, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 31.767753859032, "step_time": 0.5259923362731933} +{"epoch": 0, "iter": 17540, "iter_tflops": 31.80690716066324, "iter_time": 0.5253448562622071, "loss": 0.3556004464626312, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 33.796420457945175, "step_time": 0.4944190788269043} +{"epoch": 0, "iter": 17541, "iter_tflops": 8.029791997192216, "iter_time": 0.7814027328491211, "loss": 0.004898469429463148, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 8.558385113471557, "step_time": 0.733140811920166} +{"epoch": 0, "iter": 17542, "iter_tflops": 3.894332697427938, "iter_time": 1.6111878204345702, "loss": 0.0071485163643956184, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 4.677962388827024, "step_time": 1.3412894096374512} +{"epoch": 0, "iter": 17543, "iter_tflops": 12.93705424069955, "iter_time": 0.4850023269653321, "loss": 0.004388832021504641, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 14.395164743351929, "step_time": 0.4358756237030029} +{"epoch": 0, "iter": 17544, "iter_tflops": 13.24020171769383, "iter_time": 0.473897720336914, "loss": 0.001350137172266841, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 14.69648688255125, "step_time": 0.4269388637542724} +{"epoch": 0, "iter": 17545, "iter_tflops": 30.759484203293873, "iter_time": 0.6707229995727539, "loss": 0.6735453009605408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.23348823394132, "step_time": 0.6207922973632812} +{"epoch": 0, "iter": 17546, "iter_tflops": 8.69021236836192, "iter_time": 2.3740609130859376, "loss": 0.5387741327285767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.626499015456432, "step_time": 1.9414760665893553} +{"epoch": 0, "iter": 17547, "iter_tflops": 13.529622468542273, "iter_time": 1.5248831634521485, "loss": 0.6485326290130615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.270075801470426, "step_time": 1.194615110397339} +{"epoch": 0, "iter": 17548, "iter_tflops": 36.31933139929374, "iter_time": 0.5680471725463867, "loss": 0.6339214444160461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.604091270599895, "step_time": 0.5209333896636963} +{"epoch": 0, "iter": 17549, "iter_tflops": 13.85721654118049, "iter_time": 1.1141605224609374, "loss": 0.3420124351978302, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 14.70105164733313, "step_time": 1.050208106994629} +{"epoch": 0, "iter": 17550, "iter_tflops": 20.22623562086307, "iter_time": 0.763323631286621, "loss": 0.2750803828239441, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.069035330958652, "step_time": 0.6414533615112304} +{"epoch": 0, "iter": 17551, "iter_tflops": 24.180338731604788, "iter_time": 0.6385007171630859, "loss": 0.2907112240791321, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.961263833141295, "step_time": 0.5947000007629394} +{"epoch": 0, "iter": 17552, "iter_tflops": 24.751006625379905, "iter_time": 0.6237792205810547, "loss": 0.30098193883895874, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 26.538706851413043, "step_time": 0.5817602081298828} +{"epoch": 0, "iter": 17553, "iter_tflops": 18.71251869299008, "iter_time": 1.1025289459228516, "loss": 0.11194586008787155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.208999525584176, "step_time": 1.0208864364624024} +{"epoch": 0, "iter": 17554, "iter_tflops": 22.239921365450343, "iter_time": 0.9276603622436523, "loss": 0.13137276470661163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.41543430812031, "step_time": 0.7810242023468018} +{"epoch": 0, "iter": 17555, "iter_tflops": 50.6315662317531, "iter_time": 0.4074749221801758, "loss": 0.038619961589574814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.98178115738733, "step_time": 0.3752350883483887} +{"epoch": 0, "iter": 17556, "iter_tflops": 50.30014465858823, "iter_time": 0.410159725189209, "loss": 0.057945869863033295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.253956001552275, "step_time": 0.3802689247131348} +{"epoch": 0, "iter": 17557, "iter_tflops": 39.80529476742995, "iter_time": 0.5183002319335938, "loss": 0.08980263769626617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38019095278349, "step_time": 0.475587890625} +{"epoch": 0, "iter": 17558, "iter_tflops": 8.98832502567537, "iter_time": 2.295321258544922, "loss": 0.06590043008327484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.181555137929665, "step_time": 1.8451005477905271} +{"epoch": 0, "iter": 17559, "iter_tflops": 13.664979888046094, "iter_time": 1.5097785491943359, "loss": 0.07410292327404022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.39297270839998, "step_time": 1.2585327796936034} +{"epoch": 0, "iter": 17560, "iter_tflops": 22.78833049561892, "iter_time": 0.9053358917236328, "loss": 0.06010614335536957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.589468678715313, "step_time": 0.7216326313018798} +{"epoch": 0, "iter": 17561, "iter_tflops": 22.005533552777454, "iter_time": 0.6681762390136718, "loss": 0.4225333631038666, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.45843079317702, "step_time": 0.6267927627563477} +{"epoch": 0, "iter": 17562, "iter_tflops": 13.613496975540809, "iter_time": 1.0800733032226564, "loss": 0.3606230616569519, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 15.06985229787129, "step_time": 0.9756946754455567} +{"epoch": 0, "iter": 17563, "iter_tflops": 21.429365110817834, "iter_time": 0.6861414031982422, "loss": 0.25166067481040955, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.123785864968422, "step_time": 0.6358636398315429} +{"epoch": 0, "iter": 17564, "iter_tflops": 21.98861286107392, "iter_time": 0.6686904144287109, "loss": 0.3720034658908844, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.68147456287079, "step_time": 0.6208893203735351} +{"epoch": 0, "iter": 17565, "iter_tflops": 34.1471179182774, "iter_time": 0.6041825714111329, "loss": 0.10097186267375946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.97873018259378, "step_time": 0.543227575302124} +{"epoch": 0, "iter": 17566, "iter_tflops": 40.149157371669034, "iter_time": 0.5138611831665039, "loss": 0.12138392776250839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.240653618306354, "step_time": 0.46633790016174315} +{"epoch": 0, "iter": 17567, "iter_tflops": 42.83515236577178, "iter_time": 0.4816393165588379, "loss": 0.08007849007844925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81642516683576, "step_time": 0.4406806678771973} +{"epoch": 0, "iter": 17568, "iter_tflops": 44.52601505863453, "iter_time": 0.46334920120239254, "loss": 0.13777893781661987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.914123178252574, "step_time": 0.4217819347381592} +{"epoch": 0, "iter": 17569, "iter_tflops": 25.492625395160548, "iter_time": 0.8092965393066405, "loss": 0.24274607002735138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.453639800799948, "step_time": 0.7514884605407716} +{"epoch": 0, "iter": 17570, "iter_tflops": 13.662445679614217, "iter_time": 1.51005859375, "loss": 0.25224563479423523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.983753574946748, "step_time": 1.3768975448608398} +{"epoch": 0, "iter": 17571, "iter_tflops": 14.01685716652858, "iter_time": 1.4718772735595702, "loss": 0.1918794959783554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.62250847776002, "step_time": 1.241154037475586} +{"epoch": 0, "iter": 17572, "iter_tflops": 45.64858517460454, "iter_time": 0.45195471954345706, "loss": 0.13715746998786926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52084981840994, "step_time": 0.41661428642272946} +{"epoch": 0, "iter": 17573, "iter_tflops": 21.75220321049964, "iter_time": 0.7719546356201171, "loss": 0.42133548855781555, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 22.974505450300576, "step_time": 0.7308846817016601} +{"epoch": 0, "iter": 17574, "iter_tflops": 14.962799942817224, "iter_time": 1.122230743408203, "loss": 0.4139039218425751, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 18.087587000460825, "step_time": 0.9283556785583496} +{"epoch": 0, "iter": 17575, "iter_tflops": 29.358114758697024, "iter_time": 0.5719615936279296, "loss": 0.2678615152835846, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 31.299925377407764, "step_time": 0.5364777679443359} +{"epoch": 0, "iter": 17576, "iter_tflops": 29.946476276664765, "iter_time": 0.5607242050170899, "loss": 0.19555380940437317, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 31.835445083583668, "step_time": 0.5274534111022949} +{"epoch": 0, "iter": 17577, "iter_tflops": 27.174784089832272, "iter_time": 0.7591999053955077, "loss": 0.731223464012146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.904234427735094, "step_time": 0.7137740859985351} +{"epoch": 0, "iter": 17578, "iter_tflops": 16.706547636873925, "iter_time": 1.2349106445312499, "loss": 0.64754319190979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.936215180128873, "step_time": 1.0348550777435301} +{"epoch": 0, "iter": 17579, "iter_tflops": 41.067727233586616, "iter_time": 0.5023675498962402, "loss": 0.47262805700302124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16854299510981, "step_time": 0.4670992546081543} +{"epoch": 0, "iter": 17580, "iter_tflops": 46.56954991460518, "iter_time": 0.4430168113708496, "loss": 0.633647620677948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.11165405599566, "step_time": 0.4117025051116943} +{"epoch": 0, "iter": 17581, "iter_tflops": 34.2241396530455, "iter_time": 0.6028228530883789, "loss": 0.08395730704069138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.7279062936161, "step_time": 0.5617280044555664} +{"epoch": 0, "iter": 17582, "iter_tflops": 45.00336579396041, "iter_time": 0.45843445587158205, "loss": 0.09268016368150711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62829666872532, "step_time": 0.41571230316162106} +{"epoch": 0, "iter": 17583, "iter_tflops": 48.0722947919071, "iter_time": 0.42916806030273436, "loss": 0.06989230215549469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53937796748345, "step_time": 0.39267867851257326} +{"epoch": 0, "iter": 17584, "iter_tflops": 55.02283056198058, "iter_time": 0.3749551467895508, "loss": 0.0765533372759819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.97583893044326, "step_time": 0.34399007797241216} +{"epoch": 0, "iter": 17585, "iter_tflops": 27.03590177619046, "iter_time": 0.7630998840332031, "loss": 0.39635956287384033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.52381735644576, "step_time": 0.7232935638427733} +{"epoch": 0, "iter": 17586, "iter_tflops": 21.57484259097278, "iter_time": 0.9562569656372071, "loss": 0.6384801864624023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.282471818848723, "step_time": 0.704554370880127} +{"epoch": 0, "iter": 17587, "iter_tflops": 43.096029060853915, "iter_time": 0.4787237701416015, "loss": 0.655249834060669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35845035403495, "step_time": 0.4450341491699219} +{"epoch": 0, "iter": 17588, "iter_tflops": 46.28337096551785, "iter_time": 0.44575606918334965, "loss": 0.6036502122879028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.87601748065835, "step_time": 0.41364757156372073} +{"epoch": 0, "iter": 17589, "iter_tflops": 34.76501135950274, "iter_time": 0.5934441757202148, "loss": 0.45108601450920105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.17303670369839, "step_time": 0.5550015640258789} +{"epoch": 0, "iter": 17590, "iter_tflops": 19.133189513787784, "iter_time": 1.0782882537841798, "loss": 0.4074866473674774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.298658503685203, "step_time": 0.8855056400299073} +{"epoch": 0, "iter": 17591, "iter_tflops": 41.531675859463576, "iter_time": 0.4967556228637695, "loss": 0.3628772795200348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.566249205628935, "step_time": 0.45277137947082524} +{"epoch": 0, "iter": 17592, "iter_tflops": 39.4776324971544, "iter_time": 0.5226020965576171, "loss": 0.2695032060146332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.20698993366324, "step_time": 0.477494348526001} +{"epoch": 0, "iter": 17593, "iter_tflops": 24.39809342202852, "iter_time": 0.8167123107910156, "loss": 0.003076134016737342, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 26.12730818833388, "step_time": 0.7626588668823242} +{"epoch": 0, "iter": 17594, "iter_tflops": 24.616803485316396, "iter_time": 0.8094561614990233, "loss": 0.007509662304073572, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 30.391533695137852, "step_time": 0.6556504669189452} +{"epoch": 0, "iter": 17595, "iter_tflops": 52.81893916718564, "iter_time": 0.37725527191162106, "loss": 0.014383367262780666, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 57.97771124079504, "step_time": 0.34368764877319335} +{"epoch": 0, "iter": 17596, "iter_tflops": 55.048611179407004, "iter_time": 0.36197504043579104, "loss": 0.002973114140331745, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 60.59227847221777, "step_time": 0.3288574676513672} +{"epoch": 0, "iter": 17597, "iter_tflops": 21.364217638032915, "iter_time": 0.7494936447143554, "loss": 0.2909620404243469, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 22.67026194682877, "step_time": 0.706314968109131} +{"epoch": 0, "iter": 17598, "iter_tflops": 12.6817359407516, "iter_time": 1.2626304016113281, "loss": 0.2682434618473053, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 16.30218030417366, "step_time": 0.9822210922241211} +{"epoch": 0, "iter": 17599, "iter_tflops": 24.432152501277432, "iter_time": 0.6553800506591797, "loss": 0.34248244762420654, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.28869934437795, "step_time": 0.6090961418151856} +{"epoch": 0, "iter": 17600, "iter_tflops": 26.54076806783973, "iter_time": 0.6033113021850586, "loss": 0.4706553816795349, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.45036466075482, "step_time": 0.5628168754577637} +{"epoch": 0, "iter": 17601, "iter_tflops": 19.016944724836772, "iter_time": 1.0848795013427737, "loss": 0.13237570226192474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.340358844264813, "step_time": 1.014293487548828} +{"epoch": 0, "iter": 17602, "iter_tflops": 21.354423615753642, "iter_time": 0.9661273880004883, "loss": 0.0883001759648323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.672528327985308, "step_time": 0.8036253089904786} +{"epoch": 0, "iter": 17603, "iter_tflops": 55.08370426035014, "iter_time": 0.37454077911376954, "loss": 0.09119793027639389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.09724411493344, "step_time": 0.34329516792297354} +{"epoch": 0, "iter": 17604, "iter_tflops": 52.136112587297106, "iter_time": 0.39571599197387697, "loss": 0.129331573843956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.27502932568783, "step_time": 0.36661186599731443} +{"epoch": 0, "iter": 17605, "iter_tflops": 19.785822903728345, "iter_time": 1.0427210235595703, "loss": 0.6389831900596619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.63397139123992, "step_time": 0.9998605270385742} +{"epoch": 0, "iter": 17606, "iter_tflops": 19.718942861850177, "iter_time": 1.046257583618164, "loss": 0.6966681480407715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.516281543864075, "step_time": 0.8415262107849121} +{"epoch": 0, "iter": 17607, "iter_tflops": 44.76029423766126, "iter_time": 0.4609239921569825, "loss": 0.8048562407493591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.31646910531124, "step_time": 0.42699919700622563} +{"epoch": 0, "iter": 17608, "iter_tflops": 41.93798979967264, "iter_time": 0.4919428329467773, "loss": 0.4867974817752838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03499518498726, "step_time": 0.4581124839782715} +{"epoch": 0, "iter": 17609, "iter_tflops": 32.811202588884875, "iter_time": 0.6287819976806641, "loss": 0.6557393670082092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.09284040304767, "step_time": 0.5879003601074219} +{"epoch": 0, "iter": 17610, "iter_tflops": 9.969754368227477, "iter_time": 2.069368286132812, "loss": 0.6688425540924072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.08498161153424, "step_time": 1.7071679687499999} +{"epoch": 0, "iter": 17611, "iter_tflops": 12.890133970691421, "iter_time": 1.6005336761474611, "loss": 0.5710296630859375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.409792937467232, "step_time": 1.4317411499023438} +{"epoch": 0, "iter": 17612, "iter_tflops": 37.275781221395235, "iter_time": 0.5534717941284178, "loss": 0.4459889233112335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88188570512945, "step_time": 0.5046512203216552} +{"epoch": 0, "iter": 17613, "iter_tflops": 14.017542400965917, "iter_time": 1.083913070678711, "loss": 0.2161509394645691, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.861961547212752, "step_time": 1.022327865600586} +{"epoch": 0, "iter": 17614, "iter_tflops": 6.398483854509753, "iter_time": 2.3745933837890623, "loss": 0.4620724320411682, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 7.861410570401516, "step_time": 1.9327062606811523} +{"epoch": 0, "iter": 17615, "iter_tflops": 9.358886628345195, "iter_time": 1.6234620666503907, "loss": 0.28010207414627075, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 11.665514805453999, "step_time": 1.3024540863037108} +{"epoch": 0, "iter": 17616, "iter_tflops": 25.856644418898764, "iter_time": 0.5876167526245117, "loss": 0.25566139817237854, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.61031277442669, "step_time": 0.5502942886352539} +{"epoch": 0, "iter": 17617, "iter_tflops": 18.401664623241686, "iter_time": 0.8301187744140625, "loss": 0.4857299327850342, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.379757248235606, "step_time": 0.7882228393554689} +{"epoch": 0, "iter": 17618, "iter_tflops": 10.630635196478549, "iter_time": 1.4369383392333983, "loss": 0.5008753538131714, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.992679852296499, "step_time": 1.0916827545166017} +{"epoch": 0, "iter": 17619, "iter_tflops": 26.973847052157637, "iter_time": 0.5663102951049804, "loss": 0.24930696189403534, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.80370047689059, "step_time": 0.5303334999084472} +{"epoch": 0, "iter": 17620, "iter_tflops": 27.11253083662786, "iter_time": 0.5634135513305665, "loss": 0.37149715423583984, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.74267036227061, "step_time": 0.5314595718383788} +{"epoch": 0, "iter": 17621, "iter_tflops": 21.099797257685104, "iter_time": 0.9777863388061523, "loss": 0.14424417912960052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.145566077525682, "step_time": 0.9316128311157226} +{"epoch": 0, "iter": 17622, "iter_tflops": 18.05058095804295, "iter_time": 1.1429600830078124, "loss": 0.10229495167732239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.868919582275947, "step_time": 0.9433979320526124} +{"epoch": 0, "iter": 17623, "iter_tflops": 43.62629611317022, "iter_time": 0.4729049987792969, "loss": 0.12078451365232468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99906351515326, "step_time": 0.42982283401489263} +{"epoch": 0, "iter": 17624, "iter_tflops": 39.94561818607807, "iter_time": 0.5164795150756836, "loss": 0.11018802970647812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5824583205507, "step_time": 0.4733806743621826} +{"epoch": 0, "iter": 17625, "iter_tflops": 18.80392791949484, "iter_time": 1.097169357299805, "loss": 0.6772633790969849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.05683486073853, "step_time": 1.0286315689086913} +{"epoch": 0, "iter": 17626, "iter_tflops": 36.81843866714557, "iter_time": 0.5603467788696288, "loss": 0.6684168577194214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74072142004927, "step_time": 0.45104434013366695} +{"epoch": 0, "iter": 17627, "iter_tflops": 42.58731092869735, "iter_time": 0.48444226837158205, "loss": 0.5912321209907532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71637108718081, "step_time": 0.45128458404541016} +{"epoch": 0, "iter": 17628, "iter_tflops": 45.57699207147772, "iter_time": 0.4526646575927734, "loss": 0.7211340069770813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.2026291832853, "step_time": 0.4193087615966796} +{"epoch": 0, "iter": 17629, "iter_tflops": 30.088153880321773, "iter_time": 0.685688247680664, "loss": 0.03268227353692055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.85880873275276, "step_time": 0.6475789375305177} +{"epoch": 0, "iter": 17630, "iter_tflops": 10.725692380799435, "iter_time": 1.9235209045410155, "loss": 0.03299282491207123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.029310304276603, "step_time": 1.3727239036560057} +{"epoch": 0, "iter": 17631, "iter_tflops": 42.38538417904134, "iter_time": 0.4867501831054687, "loss": 0.007779933512210846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.95058075187846, "step_time": 0.43942147636413575} +{"epoch": 0, "iter": 17632, "iter_tflops": 39.134143748895724, "iter_time": 0.5271890869140626, "loss": 0.024870319291949272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.325281339738325, "step_time": 0.4761906414031982} +{"epoch": 0, "iter": 17633, "iter_tflops": 17.728201427028573, "iter_time": 1.1637443084716796, "loss": 0.44942212104797363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.869302264349436, "step_time": 1.0933681182861328} +{"epoch": 0, "iter": 17634, "iter_tflops": 15.887421945643739, "iter_time": 1.298580322265625, "loss": 0.4906412363052368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.906507865092788, "step_time": 0.9868263816833497} +{"epoch": 0, "iter": 17635, "iter_tflops": 37.40403831688268, "iter_time": 0.551573959350586, "loss": 0.3962230086326599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68125012767199, "step_time": 0.5071401062011719} +{"epoch": 0, "iter": 17636, "iter_tflops": 36.55111495189837, "iter_time": 0.5644449844360352, "loss": 0.5866862535476685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.88708882339448, "step_time": 0.5172373847961426} +{"epoch": 0, "iter": 17637, "iter_tflops": 18.47243920489939, "iter_time": 1.1168581085205078, "loss": 0.49812543392181396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.762771415205787, "step_time": 1.0439372634887696} +{"epoch": 0, "iter": 17638, "iter_tflops": 16.78825760443012, "iter_time": 1.2289002227783206, "loss": 0.5680332779884338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.29630807763772, "step_time": 0.9687638549804687} +{"epoch": 0, "iter": 17639, "iter_tflops": 43.29212743499577, "iter_time": 0.4765553169250488, "loss": 0.3749546706676483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.830927509561185, "step_time": 0.422500545501709} +{"epoch": 0, "iter": 17640, "iter_tflops": 45.32811613242769, "iter_time": 0.4551500320434571, "loss": 0.4639081358909607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76358104713385, "step_time": 0.42308405303955077} +{"epoch": 0, "iter": 17641, "iter_tflops": 25.48726753893162, "iter_time": 0.8094666671752929, "loss": 0.4935063123703003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.849801972105443, "step_time": 0.7683890380859375} +{"epoch": 0, "iter": 17642, "iter_tflops": 15.810466275568437, "iter_time": 1.3049010162353516, "loss": 0.6130388975143433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.34116955345383, "step_time": 1.124851577758789} +{"epoch": 0, "iter": 17643, "iter_tflops": 33.51640207567386, "iter_time": 0.6155521545410155, "loss": 0.7057317495346069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.356429762630384, "step_time": 0.5674675331115723} +{"epoch": 0, "iter": 17644, "iter_tflops": 35.76095201919092, "iter_time": 0.5769167861938476, "loss": 0.574895441532135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.11039710711115, "step_time": 0.5275091800689697} +{"epoch": 0, "iter": 17645, "iter_tflops": 18.445168260413, "iter_time": 1.1185093688964844, "loss": 0.4426945745944977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.824460659776484, "step_time": 1.0406887664794922} +{"epoch": 0, "iter": 17646, "iter_tflops": 20.158348013585897, "iter_time": 1.0234515991210937, "loss": 0.38402384519577026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.282045180272707, "step_time": 0.8496439800262452} +{"epoch": 0, "iter": 17647, "iter_tflops": 40.567814792893586, "iter_time": 0.5085581665039063, "loss": 0.3863089382648468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94603971441619, "step_time": 0.46946422576904295} +{"epoch": 0, "iter": 17648, "iter_tflops": 49.89475460438633, "iter_time": 0.4134922332763672, "loss": 0.4715281128883362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.242880813998156, "step_time": 0.3803465671539307} +{"epoch": 0, "iter": 17649, "iter_tflops": 30.561067233833477, "iter_time": 0.6750776519775391, "loss": 0.02011730521917343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.380694543358835, "step_time": 0.6371417846679688} +{"epoch": 0, "iter": 17650, "iter_tflops": 11.741999265671344, "iter_time": 1.7570341339111328, "loss": 0.022129589691758156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.914628102131154, "step_time": 1.4826909751892088} +{"epoch": 0, "iter": 17651, "iter_tflops": 52.31984534823857, "iter_time": 0.39432634735107425, "loss": 0.030753368511795998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.5412363575606, "step_time": 0.3585444946289062} +{"epoch": 0, "iter": 17652, "iter_tflops": 56.31904023161753, "iter_time": 0.36632537460327147, "loss": 0.03739430755376816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.453593273480706, "step_time": 0.33571826171874997} +{"epoch": 0, "iter": 17653, "iter_tflops": 31.879519470872484, "iter_time": 0.647158233642578, "loss": 0.06664766371250153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.109923949886046, "step_time": 0.604841381072998} +{"epoch": 0, "iter": 17654, "iter_tflops": 15.242456836146765, "iter_time": 1.353528091430664, "loss": 0.08600828051567078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.713693654399325, "step_time": 1.0465361728668212} +{"epoch": 0, "iter": 17655, "iter_tflops": 43.50810856607966, "iter_time": 0.47418962097167977, "loss": 0.07294461131095886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98476716002391, "step_time": 0.42995089340209963} +{"epoch": 0, "iter": 17656, "iter_tflops": 40.34635444799833, "iter_time": 0.5113496322631836, "loss": 0.07323089241981506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.261869848057145, "step_time": 0.46611436843872067} +{"epoch": 0, "iter": 17657, "iter_tflops": 21.69255769824854, "iter_time": 0.9510678176879883, "loss": 0.05546734109520912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.052529664295207, "step_time": 0.8949600677490234} +{"epoch": 0, "iter": 17658, "iter_tflops": 16.32247907441906, "iter_time": 1.2639681396484375, "loss": 0.06288197636604309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.42849792517983, "step_time": 0.9627876663208007} +{"epoch": 0, "iter": 17659, "iter_tflops": 51.31116650678747, "iter_time": 0.4020780448913574, "loss": 0.05230652540922165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.04114518606281, "step_time": 0.3681418972015381} +{"epoch": 0, "iter": 17660, "iter_tflops": 51.42466367669819, "iter_time": 0.4011906356811523, "loss": 0.10219118744134903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.048180669114664, "step_time": 0.36809568595886233} +{"epoch": 0, "iter": 17661, "iter_tflops": 38.00397158337706, "iter_time": 0.5428667755126954, "loss": 0.0753118023276329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.16349631933849, "step_time": 0.5011987648010254} +{"epoch": 0, "iter": 17662, "iter_tflops": 26.205851616712604, "iter_time": 0.7872704849243163, "loss": 0.12036867439746857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.32198738657195, "step_time": 0.7036048831939696} +{"epoch": 0, "iter": 17663, "iter_tflops": 46.47434717705697, "iter_time": 0.44392433166503903, "loss": 0.08126311749219894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.276945308779176, "step_time": 0.4103489856719971} +{"epoch": 0, "iter": 17664, "iter_tflops": 50.82966358172641, "iter_time": 0.40588687896728515, "loss": 0.10984685271978378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.26954251540883, "step_time": 0.37328142356872557} +{"epoch": 0, "iter": 17665, "iter_tflops": 31.27693017921376, "iter_time": 0.6596265487670898, "loss": 0.5848709940910339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.406288211053024, "step_time": 0.61758113861084} +{"epoch": 0, "iter": 17666, "iter_tflops": 12.42945532184942, "iter_time": 1.659854995727539, "loss": 0.7174208760261536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.673234688214936, "step_time": 1.2373779830932619} +{"epoch": 0, "iter": 17667, "iter_tflops": 35.92199179689744, "iter_time": 0.5743304443359375, "loss": 0.7259460687637329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.093940903710404, "step_time": 0.5277312297821045} +{"epoch": 0, "iter": 17668, "iter_tflops": 35.169581810631335, "iter_time": 0.5866175384521485, "loss": 0.7078203558921814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.339809676156165, "step_time": 0.5381115264892578} +{"epoch": 0, "iter": 17669, "iter_tflops": 18.734721522639955, "iter_time": 1.1012223205566405, "loss": 0.40800413489341736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.28308224618545, "step_time": 1.0171577110290526} +{"epoch": 0, "iter": 17670, "iter_tflops": 19.128192591555198, "iter_time": 1.078569938659668, "loss": 0.5037153363227844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.03553395763488, "step_time": 0.8956203727722168} +{"epoch": 0, "iter": 17671, "iter_tflops": 34.03417453049213, "iter_time": 0.6061875686645509, "loss": 0.4747851490974426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18751690051759, "step_time": 0.5547854557037353} +{"epoch": 0, "iter": 17672, "iter_tflops": 44.04353497720137, "iter_time": 0.4684250144958496, "loss": 0.5765892863273621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.00547046619614, "step_time": 0.4297654685974121} +{"epoch": 0, "iter": 17673, "iter_tflops": 24.801557803770397, "iter_time": 0.8318466796875, "loss": 0.3168109059333801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.77587766848722, "step_time": 0.7705104484558105} +{"epoch": 0, "iter": 17674, "iter_tflops": 12.963559131514348, "iter_time": 1.591468307495117, "loss": 0.4101111590862274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.935712088668332, "step_time": 1.2946452217102051} +{"epoch": 0, "iter": 17675, "iter_tflops": 39.44624641921739, "iter_time": 0.5230179138183594, "loss": 0.3770386874675751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1814982379856, "step_time": 0.4777762317657471} +{"epoch": 0, "iter": 17676, "iter_tflops": 39.27806345994617, "iter_time": 0.5252574005126953, "loss": 0.33940619230270386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94132256699382, "step_time": 0.48044848823547365} +{"epoch": 0, "iter": 17677, "iter_tflops": 26.682338284365702, "iter_time": 0.7732116012573241, "loss": 0.6582123041152954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.49283210577785, "step_time": 0.7240801277160644} +{"epoch": 0, "iter": 17678, "iter_tflops": 18.433736484679212, "iter_time": 1.1192030181884765, "loss": 0.6033457517623901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.8712290261736, "step_time": 0.8642660789489747} +{"epoch": 0, "iter": 17679, "iter_tflops": 44.9605832974384, "iter_time": 0.4588706817626953, "loss": 0.6312738060951233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71819288309679, "step_time": 0.4234782180786133} +{"epoch": 0, "iter": 17680, "iter_tflops": 44.83215237875314, "iter_time": 0.46018521118164063, "loss": 0.5249892473220825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24188496996351, "step_time": 0.42765935707092284} +{"epoch": 0, "iter": 17681, "iter_tflops": 32.881107900898115, "iter_time": 0.6274452056884765, "loss": 0.5506313443183899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.11691657544103, "step_time": 0.5874972953796387} +{"epoch": 0, "iter": 17682, "iter_tflops": 11.383731935317943, "iter_time": 1.812331283569336, "loss": 0.5545072555541992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.935444761685881, "step_time": 1.4804761428833006} +{"epoch": 0, "iter": 17683, "iter_tflops": 9.675789921278168, "iter_time": 2.1322386779785156, "loss": 0.40655243396759033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.654791911728388, "step_time": 1.7701811981201172} +{"epoch": 0, "iter": 17684, "iter_tflops": 20.95608372193851, "iter_time": 0.9844918441772461, "loss": 0.467237651348114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.01205558759023, "step_time": 0.7365076599121094} +{"epoch": 0, "iter": 17685, "iter_tflops": 16.360702127444302, "iter_time": 0.971195655822754, "loss": 0.369060754776001, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 17.11742984357724, "step_time": 0.9282610168457031} +{"epoch": 0, "iter": 17686, "iter_tflops": 11.477684290082472, "iter_time": 1.3843770599365235, "loss": 0.2614145576953888, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.76402150611404, "step_time": 1.154418628692627} +{"epoch": 0, "iter": 17687, "iter_tflops": 22.826784941219625, "iter_time": 0.6960876388549806, "loss": 0.39045611023902893, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 24.616671686163382, "step_time": 0.6454748649597168} +{"epoch": 0, "iter": 17688, "iter_tflops": 25.027001546870366, "iter_time": 0.634891990661621, "loss": 0.4601479470729828, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 26.885540309532818, "step_time": 0.5910032920837403} +{"epoch": 0, "iter": 17689, "iter_tflops": 18.819572664053553, "iter_time": 1.0962572784423827, "loss": 0.02684231474995613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.47665266297965, "step_time": 1.0075422897338868} +{"epoch": 0, "iter": 17690, "iter_tflops": 20.195776168064373, "iter_time": 1.0215548706054687, "loss": 0.02098129130899906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46237995773378, "step_time": 0.7512492923736572} +{"epoch": 0, "iter": 17691, "iter_tflops": 53.07030159161125, "iter_time": 0.38875025939941404, "loss": 0.015746064484119415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.349710747833086, "step_time": 0.35357662010192875} +{"epoch": 0, "iter": 17692, "iter_tflops": 57.186517368599574, "iter_time": 0.3607684898376465, "loss": 0.03340833634138107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.33290688228827, "step_time": 0.3309823741912842} +{"epoch": 0, "iter": 17693, "iter_tflops": 36.789442710221444, "iter_time": 0.5607884216308594, "loss": 0.4682745933532715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47118069073965, "step_time": 0.5226875190734864} +{"epoch": 0, "iter": 17694, "iter_tflops": 8.306411333315175, "iter_time": 2.483755340576172, "loss": 0.5479757189750671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.921721505286605, "step_time": 1.8889964828491213} +{"epoch": 0, "iter": 17695, "iter_tflops": 13.839033929588036, "iter_time": 1.4907900085449217, "loss": 0.4744325876235962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.970998325116781, "step_time": 1.291784839630127} +{"epoch": 0, "iter": 17696, "iter_tflops": 25.3257695445785, "iter_time": 0.8146284942626952, "loss": 0.6999028921127319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.93634797804981, "step_time": 0.5741010055541992} +{"epoch": 0, "iter": 17697, "iter_tflops": 18.669607222253447, "iter_time": 0.9038139801025391, "loss": 0.22916580736637115, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 20.307859318966113, "step_time": 0.8309025459289551} +{"epoch": 0, "iter": 17698, "iter_tflops": 29.123534180313307, "iter_time": 0.5793888854980468, "loss": 0.4142925441265106, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 31.114829715276176, "step_time": 0.5423089942932129} +{"epoch": 0, "iter": 17699, "iter_tflops": 31.800701967042134, "iter_time": 0.530612564086914, "loss": 0.3166621923446655, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 33.77233371770633, "step_time": 0.4996353569030762} +{"epoch": 0, "iter": 17700, "iter_tflops": 31.866218172456918, "iter_time": 0.5295216369628906, "loss": 0.3975800573825836, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 33.82725608951966, "step_time": 0.4988241424560547} +{"epoch": 0, "iter": 17701, "iter_tflops": 27.143135326361648, "iter_time": 0.7600851287841797, "loss": 0.18173934519290924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.692273158068087, "step_time": 0.7190470199584962} +{"epoch": 0, "iter": 17702, "iter_tflops": 40.52656791220769, "iter_time": 0.5090757637023926, "loss": 0.19270704686641693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.471747238628424, "step_time": 0.4537123546600342} +{"epoch": 0, "iter": 17703, "iter_tflops": 51.14980314745378, "iter_time": 0.4033464889526367, "loss": 0.1204306110739708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.88194657308359, "step_time": 0.369190673828125} +{"epoch": 0, "iter": 17704, "iter_tflops": 52.179159879760626, "iter_time": 0.39538953018188483, "loss": 0.14961056411266327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.77663460630098, "step_time": 0.36337295532226566} +{"epoch": 0, "iter": 17705, "iter_tflops": 42.75374343559016, "iter_time": 0.4825564231872559, "loss": 0.0044522155076265335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71245678863909, "step_time": 0.44166149520874026} +{"epoch": 0, "iter": 17706, "iter_tflops": 36.73463436623319, "iter_time": 0.5616251220703125, "loss": 0.014235474169254303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83055680354088, "step_time": 0.5052856273651123} +{"epoch": 0, "iter": 17707, "iter_tflops": 45.16300476391795, "iter_time": 0.4568140144348145, "loss": 0.006637664046138525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05171855903664, "step_time": 0.4121955070495606} +{"epoch": 0, "iter": 17708, "iter_tflops": 49.43865217669452, "iter_time": 0.41730695724487304, "loss": 0.00637006014585495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.482576001447725, "step_time": 0.37867323875427245} +{"epoch": 0, "iter": 17709, "iter_tflops": 26.498849636825334, "iter_time": 0.7785656280517578, "loss": 0.5669838786125183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.540937517433353, "step_time": 0.7228596992492675} +{"epoch": 0, "iter": 17710, "iter_tflops": 7.6762699965931045, "iter_time": 2.6876456298828124, "loss": 0.6558293700218201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.624620911532064, "step_time": 2.143574661254883} +{"epoch": 0, "iter": 17711, "iter_tflops": 11.137060457105004, "iter_time": 1.8524720764160156, "loss": 0.5819938778877258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.326766653814545, "step_time": 1.440038356781006} +{"epoch": 0, "iter": 17712, "iter_tflops": 34.526249371651424, "iter_time": 0.5975480651855468, "loss": 0.49758169054985046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39413580912967, "step_time": 0.5517200241088868} +{"epoch": 0, "iter": 17713, "iter_tflops": 14.04101819268308, "iter_time": 1.1783628082275392, "loss": 0.3220032751560211, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 14.848769412783708, "step_time": 1.114261604309082} +{"epoch": 0, "iter": 17714, "iter_tflops": 13.064565673385209, "iter_time": 1.2664342651367189, "loss": 0.299263596534729, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.230389812934572, "step_time": 1.0194095039367677} +{"epoch": 0, "iter": 17715, "iter_tflops": 28.217754598992904, "iter_time": 0.5863476333618164, "loss": 0.34691762924194336, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 29.952020620304406, "step_time": 0.5523972434997558} +{"epoch": 0, "iter": 17716, "iter_tflops": 28.41846568565907, "iter_time": 0.5822064361572266, "loss": 0.327362060546875, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.131528909553673, "step_time": 0.5491063423156738} +{"epoch": 0, "iter": 17717, "iter_tflops": 13.270772881638742, "iter_time": 0.9362568893432617, "loss": 0.054252348840236664, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 14.177439897996212, "step_time": 0.8763819580078125} +{"epoch": 0, "iter": 17718, "iter_tflops": 14.551283643385567, "iter_time": 0.8538664245605468, "loss": 0.04531479626893997, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 17.835125327803436, "step_time": 0.6966506996154784} +{"epoch": 0, "iter": 17719, "iter_tflops": 30.748572301095805, "iter_time": 0.4040790061950684, "loss": 0.05717900022864342, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 33.587332247044806, "step_time": 0.3699267463684082} +{"epoch": 0, "iter": 17720, "iter_tflops": 32.4870760491344, "iter_time": 0.38245524215698246, "loss": 0.14008283615112305, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 35.55477029150285, "step_time": 0.3494566955566406} +{"epoch": 0, "iter": 17721, "iter_tflops": 31.56384943659262, "iter_time": 0.6536304626464844, "loss": 0.6033934950828552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.563560033559796, "step_time": 0.6146872825622559} +{"epoch": 0, "iter": 17722, "iter_tflops": 21.327479519538347, "iter_time": 0.9673479461669922, "loss": 0.5364029407501221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.727685183694927, "step_time": 0.7181606655120849} +{"epoch": 0, "iter": 17723, "iter_tflops": 47.491919402048524, "iter_time": 0.4344127120971679, "loss": 0.41610947251319885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.329757789516435, "step_time": 0.4019324150085449} +{"epoch": 0, "iter": 17724, "iter_tflops": 46.21195363294949, "iter_time": 0.44644495391845707, "loss": 0.45853176712989807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.886682815485734, "step_time": 0.4135591373443604} +{"epoch": 0, "iter": 17725, "iter_tflops": 28.07313187034109, "iter_time": 0.7349053039550781, "loss": 0.3954174518585205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.756078119377843, "step_time": 0.6933404808044433} +{"epoch": 0, "iter": 17726, "iter_tflops": 19.073936357604, "iter_time": 1.0816379547119142, "loss": 0.5369018316268921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.811701497909365, "step_time": 0.8664266815185547} +{"epoch": 0, "iter": 17727, "iter_tflops": 42.43942750785321, "iter_time": 0.48613034439086916, "loss": 0.520979642868042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4870478948473, "step_time": 0.44380304718017577} +{"epoch": 0, "iter": 17728, "iter_tflops": 36.19610095514212, "iter_time": 0.5699811019897462, "loss": 0.47538459300994873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53287657185772, "step_time": 0.5218718013763428} +{"epoch": 0, "iter": 17729, "iter_tflops": 19.561474892646395, "iter_time": 1.0546798553466799, "loss": 0.1446283459663391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.916204296485386, "step_time": 0.9863689041137695} +{"epoch": 0, "iter": 17730, "iter_tflops": 37.847134019174355, "iter_time": 0.5451164016723632, "loss": 0.09845859557390213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20129560146516, "step_time": 0.48887346267700205} +{"epoch": 0, "iter": 17731, "iter_tflops": 49.62783125049245, "iter_time": 0.41571620178222657, "loss": 0.13471625745296478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.72808832779922, "step_time": 0.38399083518981936} +{"epoch": 0, "iter": 17732, "iter_tflops": 49.71268139765024, "iter_time": 0.4150066528320312, "loss": 0.10413151234388351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6401870907458, "step_time": 0.3846200885772705} +{"epoch": 0, "iter": 17733, "iter_tflops": 30.90120625975071, "iter_time": 0.6676468658447267, "loss": 0.0017333587165921926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.81704860387029, "step_time": 0.6286699867248535} +{"epoch": 0, "iter": 17734, "iter_tflops": 16.120507748558843, "iter_time": 1.27980419921875, "loss": 0.0018056847620755434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.697469995428694, "step_time": 1.0473981437683106} +{"epoch": 0, "iter": 17735, "iter_tflops": 42.698373957882445, "iter_time": 0.4831821823120117, "loss": 0.00038584001595154405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.2199203174214, "step_time": 0.4369150428771973} +{"epoch": 0, "iter": 17736, "iter_tflops": 50.532938704238354, "iter_time": 0.4082702102661133, "loss": 0.008274400606751442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.082505851214236, "step_time": 0.36787039375305175} +{"epoch": 0, "iter": 17737, "iter_tflops": 24.75027913087449, "iter_time": 0.587480972290039, "loss": 0.03122595138847828, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.73996764234713, "step_time": 0.524164924621582} +{"epoch": 0, "iter": 17738, "iter_tflops": 27.17066124698195, "iter_time": 0.5351477432250976, "loss": 0.05199650675058365, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 30.297918276565074, "step_time": 0.4799114551544189} +{"epoch": 0, "iter": 17739, "iter_tflops": 30.661233539678907, "iter_time": 0.47422482299804686, "loss": 0.02264919877052307, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 33.933662282140375, "step_time": 0.4284924488067627} +{"epoch": 0, "iter": 17740, "iter_tflops": 28.649198902823557, "iter_time": 0.5075296554565429, "loss": 0.0437128022313118, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 31.402281269879655, "step_time": 0.4630338134765625} +{"epoch": 0, "iter": 17741, "iter_tflops": 21.047637844162452, "iter_time": 0.9802094497680665, "loss": 0.09627888351678848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.68949337872841, "step_time": 0.9092796020507814} +{"epoch": 0, "iter": 17742, "iter_tflops": 43.919885970293294, "iter_time": 0.4697437858581543, "loss": 0.18134957551956177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.783264519323346, "step_time": 0.43176400184631347} +{"epoch": 0, "iter": 17743, "iter_tflops": 47.34356288603631, "iter_time": 0.43577399444580084, "loss": 0.14218339323997498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.930469482987334, "step_time": 0.4050835132598878} +{"epoch": 0, "iter": 17744, "iter_tflops": 49.60636501397941, "iter_time": 0.4158960952758789, "loss": 0.13698062300682068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.54606497361587, "step_time": 0.38529616546630857} +{"epoch": 0, "iter": 17745, "iter_tflops": 25.13204118154086, "iter_time": 0.40911259078979495, "loss": 0.026215460151433945, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 27.664488020189168, "step_time": 0.37166183853149415} +{"epoch": 0, "iter": 17746, "iter_tflops": 19.42904795635212, "iter_time": 0.529199089050293, "loss": 0.04460820555686951, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 21.816714393907432, "step_time": 0.47128244400024416} +{"epoch": 0, "iter": 17747, "iter_tflops": 21.726855360533634, "iter_time": 0.47323159790039065, "loss": 0.008972376585006714, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 23.963741745842306, "step_time": 0.4290579738616943} +{"epoch": 0, "iter": 17748, "iter_tflops": 22.951628168808522, "iter_time": 0.4479784355163574, "loss": 0.020401621237397194, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 25.29697650129911, "step_time": 0.40644519233703613} +{"epoch": 0, "iter": 17749, "iter_tflops": 16.316884460728055, "iter_time": 1.2644015197753906, "loss": 0.009927600622177124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.397545254954817, "step_time": 1.1858623275756837} +{"epoch": 0, "iter": 17750, "iter_tflops": 15.78134557723628, "iter_time": 1.3073088989257813, "loss": 0.005315630696713924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.707638314407962, "step_time": 0.9504070968627931} +{"epoch": 0, "iter": 17751, "iter_tflops": 42.998061870541825, "iter_time": 0.4798144989013672, "loss": 0.000526138988789171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54018004622344, "step_time": 0.433971715927124} +{"epoch": 0, "iter": 17752, "iter_tflops": 43.23278873520035, "iter_time": 0.4772094078063965, "loss": 0.0034738853573799133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.95908664125443, "step_time": 0.43018111801147463} +{"epoch": 0, "iter": 17753, "iter_tflops": 34.00156300641826, "iter_time": 0.6067689743041992, "loss": 0.059212323278188705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.93859130465494, "step_time": 0.5585241012573241} +{"epoch": 0, "iter": 17754, "iter_tflops": 11.606631690642063, "iter_time": 1.7775263366699219, "loss": 0.05165741220116615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.12139614974493, "step_time": 1.2797336730957034} +{"epoch": 0, "iter": 17755, "iter_tflops": 15.474191287831951, "iter_time": 1.333258270263672, "loss": 0.05693589523434639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.188349422258064, "step_time": 1.2002952117919923} +{"epoch": 0, "iter": 17756, "iter_tflops": 14.284470369057338, "iter_time": 1.444302307128906, "loss": 0.04468236491084099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.3335600897669, "step_time": 1.1253184547424318} +{"epoch": 0, "iter": 17757, "iter_tflops": 17.77782053928359, "iter_time": 0.96995613861084, "loss": 0.324531614780426, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 19.100526952387817, "step_time": 0.9027869338989258} +{"epoch": 0, "iter": 17758, "iter_tflops": 10.6183298055974, "iter_time": 1.62395654296875, "loss": 0.2904309630393982, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 14.02077853727184, "step_time": 1.2298679504394532} +{"epoch": 0, "iter": 17759, "iter_tflops": 25.86158397728914, "iter_time": 0.6667691421508788, "loss": 0.4057668447494507, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 27.817179271465545, "step_time": 0.6198941307067871} +{"epoch": 0, "iter": 17760, "iter_tflops": 27.219389310730865, "iter_time": 0.6335081939697266, "loss": 0.36445578932762146, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 29.243094806603207, "step_time": 0.5896676216125488} +{"epoch": 0, "iter": 17761, "iter_tflops": 15.455847638345585, "iter_time": 1.3348406372070312, "loss": 0.5528866648674011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.44608097205034, "step_time": 1.254468681335449} +{"epoch": 0, "iter": 17762, "iter_tflops": 17.30696141372883, "iter_time": 1.192069076538086, "loss": 0.47577548027038574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57622840094002, "step_time": 1.0538850021362305} +{"epoch": 0, "iter": 17763, "iter_tflops": 35.68150322103779, "iter_time": 0.5782013549804688, "loss": 0.5073513984680176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.773485785273344, "step_time": 0.5320928230285644} +{"epoch": 0, "iter": 17764, "iter_tflops": 39.34002726146959, "iter_time": 0.5244300765991211, "loss": 0.8000229001045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74190757567108, "step_time": 0.4826900501251221} +{"epoch": 0, "iter": 17765, "iter_tflops": 18.509190591870535, "iter_time": 1.1146405029296873, "loss": 0.6472892165184021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.774198403757122, "step_time": 1.0433339996337891} +{"epoch": 0, "iter": 17766, "iter_tflops": 15.6542247692127, "iter_time": 1.3179249572753906, "loss": 0.5165402293205261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.790000721896668, "step_time": 1.0979825820922853} +{"epoch": 0, "iter": 17767, "iter_tflops": 40.955659196025884, "iter_time": 0.5037421913146972, "loss": 0.6234469413757324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95338592018512, "step_time": 0.4693857612609863} +{"epoch": 0, "iter": 17768, "iter_tflops": 40.37529574305707, "iter_time": 0.5109830932617188, "loss": 0.39626187086105347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.216605859110224, "step_time": 0.47738810348510735} +{"epoch": 0, "iter": 17769, "iter_tflops": 36.82825625636534, "iter_time": 0.5601974029541016, "loss": 0.15011438727378845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.82592509630226, "step_time": 0.5180317459106445} +{"epoch": 0, "iter": 17770, "iter_tflops": 13.61478488015013, "iter_time": 1.5153448028564451, "loss": 0.055208154022693634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.670008435356614, "step_time": 1.3165974731445313} +{"epoch": 0, "iter": 17771, "iter_tflops": 24.63741182049863, "iter_time": 0.8373888320922851, "loss": 0.06558037549257278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.52288543002084, "step_time": 0.5976062908172607} +{"epoch": 0, "iter": 17772, "iter_tflops": 44.030801003556704, "iter_time": 0.4685604858398437, "loss": 0.07070046663284302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47781122837166, "step_time": 0.42557807350158694} +{"epoch": 0, "iter": 17773, "iter_tflops": 23.13866746429821, "iter_time": 0.7594554138183593, "loss": 0.32040712237358093, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 25.251474505892066, "step_time": 0.6959112930297852} +{"epoch": 0, "iter": 17774, "iter_tflops": 25.86583573941177, "iter_time": 0.6793821182250976, "loss": 0.4295184910297394, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 27.822932217429777, "step_time": 0.6315936126708984} +{"epoch": 0, "iter": 17775, "iter_tflops": 26.183829041970306, "iter_time": 0.6711312637329101, "loss": 0.23100675642490387, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 28.259728861783, "step_time": 0.6218313827514649} +{"epoch": 0, "iter": 17776, "iter_tflops": 26.922503506956687, "iter_time": 0.6527173919677735, "loss": 0.2989984452724457, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 28.989951792087435, "step_time": 0.6061681785583496} +{"epoch": 0, "iter": 17777, "iter_tflops": 32.63680952682077, "iter_time": 0.632141860961914, "loss": 0.1384069323539734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.144615296151756, "step_time": 0.5707930030822754} +{"epoch": 0, "iter": 17778, "iter_tflops": 38.303094152596685, "iter_time": 0.5386273345947266, "loss": 0.12052004039287567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.78835213958709, "step_time": 0.4821661148071289} +{"epoch": 0, "iter": 17779, "iter_tflops": 41.96256311073557, "iter_time": 0.49165475082397464, "loss": 0.0846293717622757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95193069580333, "step_time": 0.44897120094299314} +{"epoch": 0, "iter": 17780, "iter_tflops": 42.91100043779022, "iter_time": 0.4807879867553711, "loss": 0.12844139337539673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75934682256303, "step_time": 0.441218599319458} +{"epoch": 0, "iter": 17781, "iter_tflops": 35.146510739366086, "iter_time": 0.5870026092529297, "loss": 0.19062356650829315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.016644463953156, "step_time": 0.528776725769043} +{"epoch": 0, "iter": 17782, "iter_tflops": 44.451240316329205, "iter_time": 0.4641286354064941, "loss": 0.19320926070213318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.144714983247425, "step_time": 0.4198028926849365} +{"epoch": 0, "iter": 17783, "iter_tflops": 49.33904950143381, "iter_time": 0.41814939117431643, "loss": 0.281133234500885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64950231125903, "step_time": 0.38455330657958986} +{"epoch": 0, "iter": 17784, "iter_tflops": 52.78755135977791, "iter_time": 0.39083255386352533, "loss": 0.2627943754196167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.236110485299825, "step_time": 0.3604558963775635} +{"epoch": 0, "iter": 17785, "iter_tflops": 22.365246821179248, "iter_time": 0.922462142944336, "loss": 0.028076890856027603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.646506802764574, "step_time": 0.8724795455932618} +{"epoch": 0, "iter": 17786, "iter_tflops": 16.282746110413875, "iter_time": 1.267052459716797, "loss": 0.04114043340086937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.113699701321888, "step_time": 0.9771425094604492} +{"epoch": 0, "iter": 17787, "iter_tflops": 52.054654883965554, "iter_time": 0.3963352279663086, "loss": 0.02138356864452362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.98214345344376, "step_time": 0.3620624332427978} +{"epoch": 0, "iter": 17788, "iter_tflops": 54.822118919259864, "iter_time": 0.3763279113769531, "loss": 0.035466670989990234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.89543551881597, "step_time": 0.3444518489837646} +{"epoch": 0, "iter": 17789, "iter_tflops": 36.064297435982866, "iter_time": 0.5720642013549805, "loss": 0.05375397577881813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.66720242615585, "step_time": 0.5335553703308107} +{"epoch": 0, "iter": 17790, "iter_tflops": 25.95161565442027, "iter_time": 0.7949830093383788, "loss": 0.051880430430173874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.912323494054576, "step_time": 0.6464929924011231} +{"epoch": 0, "iter": 17791, "iter_tflops": 41.4493964804951, "iter_time": 0.49774171066284184, "loss": 0.10971417278051376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.562736740609004, "step_time": 0.4528062839508057} +{"epoch": 0, "iter": 17792, "iter_tflops": 44.03499620166049, "iter_time": 0.4685158462524414, "loss": 0.07153037190437317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.23400202897507, "step_time": 0.4277292499542237} +{"epoch": 0, "iter": 17793, "iter_tflops": 18.70798379840881, "iter_time": 1.1027962036132812, "loss": 0.2517731189727783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.15699130014927, "step_time": 1.0235204849243165} +{"epoch": 0, "iter": 17794, "iter_tflops": 15.87582228770064, "iter_time": 1.2995291290283204, "loss": 0.20487745106220245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.63459229938466, "step_time": 1.0507523250579833} +{"epoch": 0, "iter": 17795, "iter_tflops": 36.01139332238258, "iter_time": 0.5729046173095703, "loss": 0.22087766230106354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7794573095113, "step_time": 0.5186368770599366} +{"epoch": 0, "iter": 17796, "iter_tflops": 40.38131716517263, "iter_time": 0.5109068984985351, "loss": 0.30668479204177856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.164143614421825, "step_time": 0.46714578437805176} +{"epoch": 0, "iter": 17797, "iter_tflops": 21.938704896826643, "iter_time": 0.9403970565795898, "loss": 0.43681105971336365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.608664960818928, "step_time": 0.8738780250549316} +{"epoch": 0, "iter": 17798, "iter_tflops": 16.4385683017543, "iter_time": 1.2550419921875, "loss": 0.5612732172012329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.00512185817734, "step_time": 1.03129056930542} +{"epoch": 0, "iter": 17799, "iter_tflops": 40.242870049522246, "iter_time": 0.5126645660400391, "loss": 0.7784674763679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.28795789855373, "step_time": 0.4766012191772461} +{"epoch": 0, "iter": 17800, "iter_tflops": 38.85058229656705, "iter_time": 0.5310369186401367, "loss": 0.39826011657714844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.752142384323776, "step_time": 0.49413257217407225} +{"epoch": 0, "iter": 17801, "iter_tflops": 36.571876573177086, "iter_time": 0.49626666259765617, "loss": 0.03741178289055824, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 39.85986585397966, "step_time": 0.4553302612304687} +{"epoch": 0, "iter": 17802, "iter_tflops": 30.93340370885764, "iter_time": 0.5867250595092773, "loss": 0.02815718576312065, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 35.15776794881422, "step_time": 0.5162273998260498} +{"epoch": 0, "iter": 17803, "iter_tflops": 37.76992903558646, "iter_time": 0.48052521133422854, "loss": 0.04014205560088158, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 41.73078059435999, "step_time": 0.43491645431518555} +{"epoch": 0, "iter": 17804, "iter_tflops": 36.158470943375555, "iter_time": 0.501940559387207, "loss": 0.03364982828497887, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 39.83426321134579, "step_time": 0.45562291526794435} +{"epoch": 0, "iter": 17805, "iter_tflops": 19.84087239717261, "iter_time": 1.039827941894531, "loss": 0.13715150952339172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.13290124110056, "step_time": 0.976254669189453} +{"epoch": 0, "iter": 17806, "iter_tflops": 15.162447050681225, "iter_time": 1.3606704406738281, "loss": 0.12932522594928741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.058446160772732, "step_time": 1.14246227645874} +{"epoch": 0, "iter": 17807, "iter_tflops": 50.07425866691397, "iter_time": 0.4120099639892579, "loss": 0.12581199407577515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.9250980804164, "step_time": 0.37562233352661134} +{"epoch": 0, "iter": 17808, "iter_tflops": 50.61616820752742, "iter_time": 0.4075988807678223, "loss": 0.179228276014328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.66255614274064, "step_time": 0.37742643165588374} +{"epoch": 0, "iter": 17809, "iter_tflops": 38.14809712772965, "iter_time": 0.5408157958984375, "loss": 0.5170255899429321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.21625456862146, "step_time": 0.5005572128295899} +{"epoch": 0, "iter": 17810, "iter_tflops": 34.11117497198034, "iter_time": 0.6048191986083985, "loss": 0.6093106269836426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.21530330855177, "step_time": 0.5543712310791017} +{"epoch": 0, "iter": 17811, "iter_tflops": 34.1601812899951, "iter_time": 0.6039515228271485, "loss": 0.6528331637382507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.25837044512109, "step_time": 0.5537304306030274} +{"epoch": 0, "iter": 17812, "iter_tflops": 33.909451135029634, "iter_time": 0.6084172058105469, "loss": 0.483969122171402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.65904996992915, "step_time": 0.5627830924987793} +{"epoch": 0, "iter": 17813, "iter_tflops": 27.05695090461763, "iter_time": 0.7625062255859376, "loss": 0.5123372673988342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.214150030225206, "step_time": 0.7062020797729492} +{"epoch": 0, "iter": 17814, "iter_tflops": 46.884677597060545, "iter_time": 0.4400391464233399, "loss": 0.43176090717315674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.967808283505434, "step_time": 0.40478675079345705} +{"epoch": 0, "iter": 17815, "iter_tflops": 44.040389255785286, "iter_time": 0.46845847320556644, "loss": 0.5863041877746582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57011650294414, "step_time": 0.4336986122131347} +{"epoch": 0, "iter": 17816, "iter_tflops": 42.21090638280575, "iter_time": 0.48876215362548825, "loss": 0.36315008997917175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38778146371824, "step_time": 0.45455170631408687} +{"epoch": 0, "iter": 17817, "iter_tflops": 34.52253537620188, "iter_time": 0.5976123504638672, "loss": 0.14178988337516785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.903245900516794, "step_time": 0.5590590476989746} +{"epoch": 0, "iter": 17818, "iter_tflops": 8.989836537533057, "iter_time": 2.294935333251953, "loss": 0.16470858454704285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.680655568611627, "step_time": 1.9316317596435546} +{"epoch": 0, "iter": 17819, "iter_tflops": 14.656634387513106, "iter_time": 1.4076283111572268, "loss": 0.12431780993938446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.31361490571224, "step_time": 1.1916109733581544} +{"epoch": 0, "iter": 17820, "iter_tflops": 33.532819853657124, "iter_time": 0.6152507781982421, "loss": 0.11868714541196823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31040222274843, "step_time": 0.40208403396606446} +{"epoch": 0, "iter": 17821, "iter_tflops": 15.576095537528735, "iter_time": 0.9990893020629882, "loss": 0.2582784593105316, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 16.24800237694202, "step_time": 0.9577737655639649} +{"epoch": 0, "iter": 17822, "iter_tflops": 10.006080980617359, "iter_time": 1.555245300292969, "loss": 0.26243898272514343, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 12.526849634458884, "step_time": 1.2422844429016113} +{"epoch": 0, "iter": 17823, "iter_tflops": 27.635894418774733, "iter_time": 0.5631050033569336, "loss": 0.41402167081832886, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 30.058914887880075, "step_time": 0.5177136459350586} +{"epoch": 0, "iter": 17824, "iter_tflops": 25.99261702664637, "iter_time": 0.5987050247192383, "loss": 0.3263711631298065, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 27.513532813611487, "step_time": 0.5656093139648437} +{"epoch": 0, "iter": 17825, "iter_tflops": 42.14404124440147, "iter_time": 0.48953761672973634, "loss": 0.526039183139801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90535900139288, "step_time": 0.4494266891479492} +{"epoch": 0, "iter": 17826, "iter_tflops": 48.60448672102505, "iter_time": 0.4244689102172851, "loss": 0.652885913848877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28712854814811, "step_time": 0.38716842269897467} +{"epoch": 0, "iter": 17827, "iter_tflops": 45.3081344282759, "iter_time": 0.4553507614135743, "loss": 0.5236912965774536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.98466093536465, "step_time": 0.4211745700836181} +{"epoch": 0, "iter": 17828, "iter_tflops": 43.6196456316887, "iter_time": 0.4729771003723144, "loss": 0.46095532178878784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87552019195085, "step_time": 0.4401251106262207} +{"epoch": 0, "iter": 17829, "iter_tflops": 27.82410506030573, "iter_time": 0.7414827346801758, "loss": 0.07839374989271164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.341135120542273, "step_time": 0.7031457176208495} +{"epoch": 0, "iter": 17830, "iter_tflops": 16.02605988185075, "iter_time": 1.2873465881347657, "loss": 0.16082070767879486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.958217875156897, "step_time": 1.0337142143249511} +{"epoch": 0, "iter": 17831, "iter_tflops": 48.2463359122721, "iter_time": 0.4276199035644532, "loss": 0.12664586305618286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37471476555663, "step_time": 0.3939132385253906} +{"epoch": 0, "iter": 17832, "iter_tflops": 46.33513286203631, "iter_time": 0.4452581062316895, "loss": 0.06375471502542496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17263645376487, "step_time": 0.41120210075378416} +{"epoch": 0, "iter": 17833, "iter_tflops": 28.866446872122882, "iter_time": 0.7147084503173828, "loss": 0.21833379566669464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.687632358881103, "step_time": 0.6722934265136717} +{"epoch": 0, "iter": 17834, "iter_tflops": 24.845143296673385, "iter_time": 0.8303873825073244, "loss": 0.2615275979042053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.730919512337515, "step_time": 0.6713464431762695} +{"epoch": 0, "iter": 17835, "iter_tflops": 40.100073597432015, "iter_time": 0.5144901657104493, "loss": 0.22967831790447235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10561715661314, "step_time": 0.4677656688690186} +{"epoch": 0, "iter": 17836, "iter_tflops": 40.8613049847103, "iter_time": 0.5049053993225097, "loss": 0.2779134213924408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.783620526624816, "step_time": 0.4606839122772216} +{"epoch": 0, "iter": 17837, "iter_tflops": 17.83151421608952, "iter_time": 1.1570017700195312, "loss": 0.1359427273273468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.199902033432146, "step_time": 1.0745416030883792} +{"epoch": 0, "iter": 17838, "iter_tflops": 17.766167052072166, "iter_time": 1.1612574310302735, "loss": 0.11665159463882446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.91284590931896, "step_time": 0.862761947631836} +{"epoch": 0, "iter": 17839, "iter_tflops": 49.6159751378663, "iter_time": 0.4158155403137207, "loss": 0.10836085677146912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82168648959088, "step_time": 0.38332305908203124} +{"epoch": 0, "iter": 17840, "iter_tflops": 50.76463048200554, "iter_time": 0.4064068489074707, "loss": 0.09098851680755615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.4135574194178, "step_time": 0.3723112983703613} +{"epoch": 0, "iter": 17841, "iter_tflops": 25.52094004793649, "iter_time": 0.808398651123047, "loss": 0.05307920277118683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.879731656634796, "step_time": 0.767533462524414} +{"epoch": 0, "iter": 17842, "iter_tflops": 28.42324718436218, "iter_time": 0.7258527984619141, "loss": 0.07403755187988281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.745247046002028, "step_time": 0.6498955097198487} +{"epoch": 0, "iter": 17843, "iter_tflops": 51.093392798249546, "iter_time": 0.4037918090820312, "loss": 0.09384183585643768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.79600252554112, "step_time": 0.36975934791564935} +{"epoch": 0, "iter": 17844, "iter_tflops": 53.89309981428988, "iter_time": 0.3828151206970215, "loss": 0.062309835106134415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.875362132152276, "step_time": 0.3504198150634765} +{"epoch": 0, "iter": 17845, "iter_tflops": 33.863949114475474, "iter_time": 0.4873731651306152, "loss": 0.04196798801422119, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 36.902376708988704, "step_time": 0.4472443656921387} +{"epoch": 0, "iter": 17846, "iter_tflops": 14.573077498071232, "iter_time": 1.132525375366211, "loss": 0.06857989728450775, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 17.721476108207153, "step_time": 0.9313208427429198} +{"epoch": 0, "iter": 17847, "iter_tflops": 32.81912646138913, "iter_time": 0.5028890724182129, "loss": 0.03547695651650429, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 36.344265552173454, "step_time": 0.4541123561859131} +{"epoch": 0, "iter": 17848, "iter_tflops": 33.92310255055383, "iter_time": 0.48652330780029296, "loss": 0.044917747378349304, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 37.56747825638703, "step_time": 0.4393262691497803} +{"epoch": 0, "iter": 17849, "iter_tflops": 20.481635084316114, "iter_time": 0.985021224975586, "loss": 0.0759659856557846, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 22.101818215426015, "step_time": 0.9128138275146485} +{"epoch": 0, "iter": 17850, "iter_tflops": 26.11740907657344, "iter_time": 0.7724673309326172, "loss": 0.08158989995718002, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 32.56904213467941, "step_time": 0.6194485301971435} +{"epoch": 0, "iter": 17851, "iter_tflops": 53.71129684720149, "iter_time": 0.3756164245605469, "loss": 0.09824886173009872, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 58.64466465456334, "step_time": 0.34401842689514167} +{"epoch": 0, "iter": 17852, "iter_tflops": 47.83137833148447, "iter_time": 0.4217910079956055, "loss": 0.052553966641426086, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 51.78915799909081, "step_time": 0.38955731391906734} +{"epoch": 0, "iter": 17853, "iter_tflops": 25.371596677788098, "iter_time": 0.8131570816040038, "loss": 0.19788336753845215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.697450343302698, "step_time": 0.77277392578125} +{"epoch": 0, "iter": 17854, "iter_tflops": 16.18986435642555, "iter_time": 1.2743215789794924, "loss": 0.11673848330974579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.65246158735431, "step_time": 1.0497969131469727} +{"epoch": 0, "iter": 17855, "iter_tflops": 48.94574603820823, "iter_time": 0.42150942993164064, "loss": 0.1933622807264328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.390166473941, "step_time": 0.3864212245941162} +{"epoch": 0, "iter": 17856, "iter_tflops": 51.960979297869955, "iter_time": 0.3970497436523438, "loss": 0.1103285402059555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.373798486641945, "step_time": 0.36596954727172853} +{"epoch": 0, "iter": 17857, "iter_tflops": 48.595837715341744, "iter_time": 0.42454445648193356, "loss": 0.03639133274555206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.545764384277334, "step_time": 0.38529832839965816} +{"epoch": 0, "iter": 17858, "iter_tflops": 12.245460896046856, "iter_time": 1.684795181274414, "loss": 0.03711456060409546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.99937564636829, "step_time": 1.3754634857177734} +{"epoch": 0, "iter": 17859, "iter_tflops": 13.355247273260028, "iter_time": 1.5447930755615233, "loss": 0.04402673989534378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.624167998677452, "step_time": 1.3204602966308594} +{"epoch": 0, "iter": 17860, "iter_tflops": 32.34916869263692, "iter_time": 0.6377627105712891, "loss": 0.02993810921907425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.040691632278666, "step_time": 0.5284510250091552} +{"epoch": 0, "iter": 17861, "iter_tflops": 21.826334241978834, "iter_time": 0.6680519485473633, "loss": 0.34454208612442017, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.24122974358368, "step_time": 0.6273818244934083} +{"epoch": 0, "iter": 17862, "iter_tflops": 23.8821540081221, "iter_time": 0.610544807434082, "loss": 0.39914751052856445, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.415644719604085, "step_time": 0.531854175567627} +{"epoch": 0, "iter": 17863, "iter_tflops": 25.519266330627477, "iter_time": 0.5713771286010741, "loss": 0.3640982210636139, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.203659985309244, "step_time": 0.5359986534118653} +{"epoch": 0, "iter": 17864, "iter_tflops": 26.21236918351803, "iter_time": 0.5562688751220703, "loss": 0.39300239086151123, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.893972360375933, "step_time": 0.5227339057922363} +{"epoch": 0, "iter": 17865, "iter_tflops": 28.35019562753245, "iter_time": 0.72772314453125, "loss": 0.0034950771369040012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.078275269660082, "step_time": 0.6859134483337402} +{"epoch": 0, "iter": 17866, "iter_tflops": 29.455544133553662, "iter_time": 0.7004146118164062, "loss": 0.0032539265230298042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.26500939782537, "step_time": 0.5254320278167725} +{"epoch": 0, "iter": 17867, "iter_tflops": 58.9713837122265, "iter_time": 0.34984923553466796, "loss": 0.0004037363105453551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.72782204190976, "step_time": 0.31873609924316404} +{"epoch": 0, "iter": 17868, "iter_tflops": 56.96609931413588, "iter_time": 0.3621644058227539, "loss": 0.0047250911593437195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.64649080522608, "step_time": 0.32932560539245603} +{"epoch": 0, "iter": 17869, "iter_tflops": 30.12598460525073, "iter_time": 0.6848271942138671, "loss": 0.20004823803901672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.94591949484741, "step_time": 0.6458131065368652} +{"epoch": 0, "iter": 17870, "iter_tflops": 15.479351696859244, "iter_time": 1.3328137969970701, "loss": 0.18505671620368958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.25492298878912, "step_time": 1.1301660118103027} +{"epoch": 0, "iter": 17871, "iter_tflops": 47.731604567385176, "iter_time": 0.4322313003540039, "loss": 0.1923155039548874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00998828524713, "step_time": 0.39667560386657713} +{"epoch": 0, "iter": 17872, "iter_tflops": 51.89194954225448, "iter_time": 0.3975779228210449, "loss": 0.22328267991542816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.38773456207165, "step_time": 0.36587909889221193} +{"epoch": 0, "iter": 17873, "iter_tflops": 24.920720666068874, "iter_time": 0.8278690567016602, "loss": 0.098297618329525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.235560675085917, "step_time": 0.7863789825439453} +{"epoch": 0, "iter": 17874, "iter_tflops": 13.593406839396737, "iter_time": 1.5177279510498045, "loss": 0.16689789295196533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.701653726667328, "step_time": 1.165489610671997} +{"epoch": 0, "iter": 17875, "iter_tflops": 39.1075188377788, "iter_time": 0.5275480041503906, "loss": 0.17667460441589355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90852334240758, "step_time": 0.4808157424926758} +{"epoch": 0, "iter": 17876, "iter_tflops": 40.28729425167115, "iter_time": 0.5120992584228516, "loss": 0.14492450654506683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.202669155932874, "step_time": 0.46673863601684573} +{"epoch": 0, "iter": 17877, "iter_tflops": 18.010485194122623, "iter_time": 1.1455045928955079, "loss": 0.546871542930603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.08028642882427, "step_time": 1.0812779769897458} +{"epoch": 0, "iter": 17878, "iter_tflops": 15.350208932021228, "iter_time": 1.344026885986328, "loss": 0.5163816809654236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.378798794221705, "step_time": 1.122548526763916} +{"epoch": 0, "iter": 17879, "iter_tflops": 34.13810353387786, "iter_time": 0.6043421096801758, "loss": 0.5841884613037109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.91871153669195, "step_time": 0.5588248519897461} +{"epoch": 0, "iter": 17880, "iter_tflops": 38.469715150490636, "iter_time": 0.5362944183349609, "loss": 0.7020168304443359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18518836582522, "step_time": 0.4890601253509521} +{"epoch": 0, "iter": 17881, "iter_tflops": 18.366027303094366, "iter_time": 1.123329132080078, "loss": 0.3423726558685303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.96749600970053, "step_time": 1.03323388671875} +{"epoch": 0, "iter": 17882, "iter_tflops": 19.849216236575117, "iter_time": 1.0393908386230468, "loss": 0.24294394254684448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.05919869140939, "step_time": 0.8575137424468994} +{"epoch": 0, "iter": 17883, "iter_tflops": 47.93787050283264, "iter_time": 0.43037150573730476, "loss": 0.31782084703445435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.033090306077675, "step_time": 0.3964994850158691} +{"epoch": 0, "iter": 17884, "iter_tflops": 47.49836353784281, "iter_time": 0.4343537750244141, "loss": 0.2725539207458496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.46299703289833, "step_time": 0.40089179992675783} +{"epoch": 0, "iter": 17885, "iter_tflops": 16.267058371324648, "iter_time": 0.9365356445312499, "loss": 0.0061008813790977, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 17.066480014526707, "step_time": 0.8926667938232421} +{"epoch": 0, "iter": 17886, "iter_tflops": 9.54724446208387, "iter_time": 1.595714874267578, "loss": 0.0021586583461612463, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 13.882119413975623, "step_time": 1.0974318504333496} +{"epoch": 0, "iter": 17887, "iter_tflops": 31.22405804428424, "iter_time": 0.48791479873657223, "loss": 0.003688015742227435, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 34.74971057915185, "step_time": 0.43841170883178715} +{"epoch": 0, "iter": 17888, "iter_tflops": 36.90928821979561, "iter_time": 0.4127600593566894, "loss": 0.005054456181824207, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 40.927412977255244, "step_time": 0.3722365741729736} +{"epoch": 0, "iter": 17889, "iter_tflops": 23.693403347684896, "iter_time": 0.8707526397705078, "loss": 0.0015017448458820581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.36947097368405, "step_time": 0.8132252159118653} +{"epoch": 0, "iter": 17890, "iter_tflops": 23.534469835554845, "iter_time": 0.8766330261230468, "loss": 0.011290673166513443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.066914633475125, "step_time": 0.7097792720794679} +{"epoch": 0, "iter": 17891, "iter_tflops": 44.46244504567961, "iter_time": 0.4640116729736329, "loss": 0.009899677708745003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.14883629508525, "step_time": 0.4197676906585693} +{"epoch": 0, "iter": 17892, "iter_tflops": 43.32059584273716, "iter_time": 0.47624214553833005, "loss": 0.0024893705267459154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91322425965525, "step_time": 0.4305928859710694} +{"epoch": 0, "iter": 17893, "iter_tflops": 27.506218070390464, "iter_time": 0.7500519866943358, "loss": 0.02659621275961399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.800349239535944, "step_time": 0.6698331031799316} +{"epoch": 0, "iter": 17894, "iter_tflops": 43.401476290042126, "iter_time": 0.4753546485900879, "loss": 0.04043252393603325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13572888984382, "step_time": 0.4286024951934814} +{"epoch": 0, "iter": 17895, "iter_tflops": 48.71791201808458, "iter_time": 0.42348065948486335, "loss": 0.017129220068454742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51146318482618, "step_time": 0.3855453071594238} +{"epoch": 0, "iter": 17896, "iter_tflops": 53.991968917577275, "iter_time": 0.38211411666870115, "loss": 0.036396801471710205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.21490566663454, "step_time": 0.34841047668457037} +{"epoch": 0, "iter": 17897, "iter_tflops": 32.068559702296895, "iter_time": 0.6433433151245117, "loss": 0.015965642407536507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.10987145844558, "step_time": 0.6048423118591308} +{"epoch": 0, "iter": 17898, "iter_tflops": 20.264810067572324, "iter_time": 1.018074851989746, "loss": 0.014063039794564247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.941762992742166, "step_time": 0.827170618057251} +{"epoch": 0, "iter": 17899, "iter_tflops": 41.897272144278666, "iter_time": 0.4924209251403808, "loss": 0.054522912949323654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.43180142025662, "step_time": 0.4443311023712158} +{"epoch": 0, "iter": 17900, "iter_tflops": 46.026487124000674, "iter_time": 0.4482439308166504, "loss": 0.03216862305998802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48563247509441, "step_time": 0.4086527690887451} +{"epoch": 0, "iter": 17901, "iter_tflops": 16.7591474075963, "iter_time": 1.2310347900390626, "loss": 0.12610043585300446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.887886869575784, "step_time": 1.1533555450439452} +{"epoch": 0, "iter": 17902, "iter_tflops": 28.074187215223425, "iter_time": 0.7348776779174805, "loss": 0.08315147459506989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.284253207227618, "step_time": 0.6594721431732178} +{"epoch": 0, "iter": 17903, "iter_tflops": 51.41198785260766, "iter_time": 0.40128955078125, "loss": 0.08009645342826843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.992791114349465, "step_time": 0.3684598159790039} +{"epoch": 0, "iter": 17904, "iter_tflops": 56.45464484255384, "iter_time": 0.365445457458496, "loss": 0.10484693199396133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.342858458829134, "step_time": 0.3363242931365967} +{"epoch": 0, "iter": 17905, "iter_tflops": 28.14431843229354, "iter_time": 0.7330464782714845, "loss": 0.6456719040870667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.832920500336186, "step_time": 0.6915546035766601} +{"epoch": 0, "iter": 17906, "iter_tflops": 16.288611870672565, "iter_time": 1.266596176147461, "loss": 0.44226983189582825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.00413402626852, "step_time": 0.9822396621704101} +{"epoch": 0, "iter": 17907, "iter_tflops": 43.37391513430068, "iter_time": 0.47565670394897464, "loss": 0.5786296129226685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82657671276263, "step_time": 0.440585132598877} +{"epoch": 0, "iter": 17908, "iter_tflops": 42.540197475639076, "iter_time": 0.4849787902832031, "loss": 0.5324090123176575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.8012185747814, "step_time": 0.4504485721588135} +{"epoch": 0, "iter": 17909, "iter_tflops": 31.837275489139383, "iter_time": 0.6480169296264648, "loss": 0.4772282540798187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.79983182522637, "step_time": 0.6103904190063476} +{"epoch": 0, "iter": 17910, "iter_tflops": 14.08323371694107, "iter_time": 1.4649400787353515, "loss": 0.43215474486351013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.92564265327232, "step_time": 1.2189252681732174} +{"epoch": 0, "iter": 17911, "iter_tflops": 44.274268112861385, "iter_time": 0.46598384094238277, "loss": 0.5641475319862366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86039447626815, "step_time": 0.4310681877136231} +{"epoch": 0, "iter": 17912, "iter_tflops": 41.24717909168343, "iter_time": 0.5001819267272949, "loss": 0.628721296787262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.51717075725887, "step_time": 0.46344125556945803} +{"epoch": 0, "iter": 17913, "iter_tflops": 26.311639643324188, "iter_time": 0.7841052017211914, "loss": 0.6013382077217102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.83428047256536, "step_time": 0.741211669921875} +{"epoch": 0, "iter": 17914, "iter_tflops": 12.935436360194808, "iter_time": 1.5949282989501952, "loss": 0.6432071924209595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.089244853551605, "step_time": 1.2822909774780273} +{"epoch": 0, "iter": 17915, "iter_tflops": 34.25224955055864, "iter_time": 0.6023281326293947, "loss": 0.700080931186676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.47753486593475, "step_time": 0.5504922771453857} +{"epoch": 0, "iter": 17916, "iter_tflops": 36.937912659442425, "iter_time": 0.5585343627929688, "loss": 0.45364823937416077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.007885241772236, "step_time": 0.5156756820678711} +{"epoch": 0, "iter": 17917, "iter_tflops": 19.809181155565202, "iter_time": 1.0414914855957031, "loss": 0.4991253912448883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.381491160648793, "step_time": 0.9649043350219726} +{"epoch": 0, "iter": 17918, "iter_tflops": 26.21860836576221, "iter_time": 0.7868874359130859, "loss": 0.6015909910202026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.06516645112502, "step_time": 0.6434113960266112} +{"epoch": 0, "iter": 17919, "iter_tflops": 39.0067053490503, "iter_time": 0.5289114608764648, "loss": 0.598888635635376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7625742115883, "step_time": 0.48245677185058594} +{"epoch": 0, "iter": 17920, "iter_tflops": 35.85459223319216, "iter_time": 0.5754100723266602, "loss": 0.541806161403656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2521639960143, "step_time": 0.5256039772033692} +{"epoch": 0, "iter": 17921, "iter_tflops": 33.80579521042992, "iter_time": 0.6102827453613281, "loss": 0.11506697535514832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.044114870341254, "step_time": 0.5569330940246583} +{"epoch": 0, "iter": 17922, "iter_tflops": 10.876734105036826, "iter_time": 1.8968095855712892, "loss": 0.08773627132177353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.40774438687747, "step_time": 1.662759391784668} +{"epoch": 0, "iter": 17923, "iter_tflops": 18.78325367099786, "iter_time": 1.098376983642578, "loss": 0.07092788070440292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.445119071831886, "step_time": 0.9191795082092284} +{"epoch": 0, "iter": 17924, "iter_tflops": 35.2560658362342, "iter_time": 0.5851785507202149, "loss": 0.10223545134067535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.60418966015618, "step_time": 0.5344262809753417} +{"epoch": 0, "iter": 17925, "iter_tflops": 18.7419427284557, "iter_time": 0.9025173263549805, "loss": 0.21792051196098328, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 19.93552297838579, "step_time": 0.8484817810058594} +{"epoch": 0, "iter": 17926, "iter_tflops": 9.277681587439025, "iter_time": 1.8231847991943362, "loss": 0.4447755813598633, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 10.759254767552852, "step_time": 1.5721282196044921} +{"epoch": 0, "iter": 17927, "iter_tflops": 24.501594980203134, "iter_time": 0.6903602828979493, "loss": 0.2840796709060669, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 26.477001539847908, "step_time": 0.6388536109924317} +{"epoch": 0, "iter": 17928, "iter_tflops": 24.13646674516505, "iter_time": 0.7008038177490233, "loss": 0.2217649668455124, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 25.91656741256754, "step_time": 0.6526685333251953} +{"epoch": 0, "iter": 17929, "iter_tflops": 12.17561990790272, "iter_time": 1.2814818115234374, "loss": 0.413621187210083, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.160746961944389, "step_time": 1.1855585021972657} +{"epoch": 0, "iter": 17930, "iter_tflops": 17.491297019836175, "iter_time": 0.8920342178344727, "loss": 0.432485431432724, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 21.07880113244944, "step_time": 0.740214557647705} +{"epoch": 0, "iter": 17931, "iter_tflops": 26.314951828742988, "iter_time": 0.5929266204833984, "loss": 0.5131267309188843, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.97520932627012, "step_time": 0.5577379341125488} +{"epoch": 0, "iter": 17932, "iter_tflops": 27.919155294400152, "iter_time": 0.5588577194213867, "loss": 0.5623453259468079, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.734960293184344, "step_time": 0.5247303276062012} +{"epoch": 0, "iter": 17933, "iter_tflops": 35.599288442547696, "iter_time": 0.5795366821289062, "loss": 0.2549709975719452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.573233966985796, "step_time": 0.534855167388916} +{"epoch": 0, "iter": 17934, "iter_tflops": 13.95558283584271, "iter_time": 1.4783397979736328, "loss": 0.2794891595840454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.281696023759643, "step_time": 1.267134178161621} +{"epoch": 0, "iter": 17935, "iter_tflops": 34.04083066990494, "iter_time": 0.6060690383911134, "loss": 0.34737253189086914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.965918631078104, "step_time": 0.5434108867645264} +{"epoch": 0, "iter": 17936, "iter_tflops": 42.27745921160864, "iter_time": 0.48799274826049804, "loss": 0.21141327917575836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20047899878085, "step_time": 0.44655583572387697} +{"epoch": 0, "iter": 17937, "iter_tflops": 18.79857912605359, "iter_time": 1.0974815368652344, "loss": 0.686429500579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.1165904377307, "step_time": 1.0255760574340822} +{"epoch": 0, "iter": 17938, "iter_tflops": 24.01372446400694, "iter_time": 0.8591375961303711, "loss": 0.6125615835189819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.962653716859847, "step_time": 0.7651729583740234} +{"epoch": 0, "iter": 17939, "iter_tflops": 35.00786813353449, "iter_time": 0.5893273315429688, "loss": 0.583474338054657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.94048206841608, "step_time": 0.5437752075195312} +{"epoch": 0, "iter": 17940, "iter_tflops": 34.402986143415006, "iter_time": 0.5996890335083007, "loss": 0.5584561228752136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.29424258902616, "step_time": 0.5531978149414063} +{"epoch": 0, "iter": 17941, "iter_tflops": 27.24254945198788, "iter_time": 0.7573114089965821, "loss": 0.5677216649055481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.29824891058103, "step_time": 0.7041749687194825} +{"epoch": 0, "iter": 17942, "iter_tflops": 7.8494115160875495, "iter_time": 2.62836181640625, "loss": 0.6362367272377014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.880346667868931, "step_time": 2.3232306442260744} +{"epoch": 0, "iter": 17943, "iter_tflops": 12.350965789583954, "iter_time": 1.6704032592773437, "loss": 0.5792802572250366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.048569516296658, "step_time": 1.3709670867919923} +{"epoch": 0, "iter": 17944, "iter_tflops": 39.12577385792933, "iter_time": 0.5273018646240235, "loss": 0.6100603342056274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.14716165439341, "step_time": 0.4895013732910157} +{"epoch": 0, "iter": 17945, "iter_tflops": 23.199612642170994, "iter_time": 0.665492301940918, "loss": 0.31015080213546753, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.810483211525437, "step_time": 0.6222838745117187} +{"epoch": 0, "iter": 17946, "iter_tflops": 14.43458636326139, "iter_time": 1.069595153808594, "loss": 0.40532970428466797, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.04134877053294, "step_time": 0.9624604415893555} +{"epoch": 0, "iter": 17947, "iter_tflops": 23.703732346392496, "iter_time": 0.6513389282226563, "loss": 0.2041749656200409, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.47191230197554, "step_time": 0.6061250305175782} +{"epoch": 0, "iter": 17948, "iter_tflops": 23.875537627105395, "iter_time": 0.6466519775390625, "loss": 0.26702603697776794, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.526487259963492, "step_time": 0.6048291511535645} +{"epoch": 0, "iter": 17949, "iter_tflops": 19.433774220518504, "iter_time": 1.0616102294921876, "loss": 0.42396610975265503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.43283428279683, "step_time": 1.0097029724121094} +{"epoch": 0, "iter": 17950, "iter_tflops": 17.285158115728127, "iter_time": 1.193572738647461, "loss": 0.5188382863998413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.87413165470182, "step_time": 0.9883569698333742} +{"epoch": 0, "iter": 17951, "iter_tflops": 39.66621036423114, "iter_time": 0.5201175842285156, "loss": 0.48856261372566223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.629237067020235, "step_time": 0.4728731212615967} +{"epoch": 0, "iter": 17952, "iter_tflops": 38.46623753100388, "iter_time": 0.536342903137207, "loss": 0.43554461002349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96260071561659, "step_time": 0.4916543102264404} +{"epoch": 0, "iter": 17953, "iter_tflops": 19.8397093142629, "iter_time": 1.039888900756836, "loss": 0.4648682177066803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.421276841212464, "step_time": 0.9631122207641601} +{"epoch": 0, "iter": 17954, "iter_tflops": 16.006695144688276, "iter_time": 1.2889040069580078, "loss": 0.5559861063957214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.175352227316356, "step_time": 1.0759173164367677} +{"epoch": 0, "iter": 17955, "iter_tflops": 44.05017614494204, "iter_time": 0.46835439300537113, "loss": 0.4495331346988678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.465242194327615, "step_time": 0.43465686798095704} +{"epoch": 0, "iter": 17956, "iter_tflops": 48.4065939724563, "iter_time": 0.42620419692993167, "loss": 0.4890974760055542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37684002922308, "step_time": 0.39389725494384764} +{"epoch": 0, "iter": 17957, "iter_tflops": 32.056757242693095, "iter_time": 0.6435801773071289, "loss": 0.06705483049154282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.18125107820218, "step_time": 0.6035792388916016} +{"epoch": 0, "iter": 17958, "iter_tflops": 12.711066237378711, "iter_time": 1.6230812683105469, "loss": 0.022596828639507294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.056144744096239, "step_time": 1.3702773094177245} +{"epoch": 0, "iter": 17959, "iter_tflops": 49.31637884226811, "iter_time": 0.4183416137695313, "loss": 0.021129611879587173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.95426973700379, "step_time": 0.38238110923767094} +{"epoch": 0, "iter": 17960, "iter_tflops": 53.47369556185267, "iter_time": 0.3858176116943359, "loss": 0.05878767371177673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.59800375883041, "step_time": 0.35207843589782717} +{"epoch": 0, "iter": 17961, "iter_tflops": 17.35651805327558, "iter_time": 1.0456822662353515, "loss": 0.06898687034845352, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 18.05172272502077, "step_time": 1.0054111404418946} +{"epoch": 0, "iter": 17962, "iter_tflops": 14.107102433554447, "iter_time": 1.2865436553955076, "loss": 0.0348246693611145, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 16.57470632160473, "step_time": 1.095006015777588} +{"epoch": 0, "iter": 17963, "iter_tflops": 46.2184543758928, "iter_time": 0.39268736648559566, "loss": 0.06764800101518631, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 50.640904208844276, "step_time": 0.358394136428833} +{"epoch": 0, "iter": 17964, "iter_tflops": 44.01552110727687, "iter_time": 0.4123409805297852, "loss": 0.1005525067448616, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 47.89685537630099, "step_time": 0.3789268207550049} +{"epoch": 0, "iter": 17965, "iter_tflops": 23.125344287321113, "iter_time": 0.8921421127319336, "loss": 0.33669787645339966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.271918289590754, "step_time": 0.8499984741210938} +{"epoch": 0, "iter": 17966, "iter_tflops": 18.360298134743886, "iter_time": 1.123679656982422, "loss": 0.20103682577610016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.245315644554154, "step_time": 0.9274354133605958} +{"epoch": 0, "iter": 17967, "iter_tflops": 48.67574136776253, "iter_time": 0.4238475456237793, "loss": 0.16536691784858704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.022699889921036, "step_time": 0.3890992641448974} +{"epoch": 0, "iter": 17968, "iter_tflops": 48.74732123702041, "iter_time": 0.42322517395019527, "loss": 0.17687508463859558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.962621865743415, "step_time": 0.3895406379699707} +{"epoch": 0, "iter": 17969, "iter_tflops": 35.494689406440045, "iter_time": 0.581244514465332, "loss": 0.2316436767578125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.27231674011228, "step_time": 0.5390604820251464} +{"epoch": 0, "iter": 17970, "iter_tflops": 13.151258978534521, "iter_time": 1.5687542572021484, "loss": 0.2027411162853241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.883340051609105, "step_time": 1.298914047241211} +{"epoch": 0, "iter": 17971, "iter_tflops": 46.353367350290355, "iter_time": 0.445082950592041, "loss": 0.2647244334220886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83677804865136, "step_time": 0.4058300762176514} +{"epoch": 0, "iter": 17972, "iter_tflops": 51.19653835839008, "iter_time": 0.40297829055786133, "loss": 0.18836131691932678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.6139007869953, "step_time": 0.370970085144043} +{"epoch": 0, "iter": 17973, "iter_tflops": 30.254570541844185, "iter_time": 0.6819165878295897, "loss": 0.637614905834198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.05688987038959, "step_time": 0.6435775146484376} +{"epoch": 0, "iter": 17974, "iter_tflops": 14.855920087584108, "iter_time": 1.388745590209961, "loss": 0.6848458647727966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.292092304399613, "step_time": 1.1278695278167725} +{"epoch": 0, "iter": 17975, "iter_tflops": 34.17209170783109, "iter_time": 0.6037410202026368, "loss": 0.5348444581031799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.154293861863486, "step_time": 0.5552815399169923} +{"epoch": 0, "iter": 17976, "iter_tflops": 37.34616294715412, "iter_time": 0.5524287338256836, "loss": 0.7454960346221924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56395801286589, "step_time": 0.5086065196990968} +{"epoch": 0, "iter": 17977, "iter_tflops": 16.9653275064132, "iter_time": 1.216073989868164, "loss": 0.6480389833450317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.040553588878762, "step_time": 1.1435953674316406} +{"epoch": 0, "iter": 17978, "iter_tflops": 20.79400516145202, "iter_time": 0.9921654510498047, "loss": 0.6405937075614929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.077490728101093, "step_time": 0.8226936950683593} +{"epoch": 0, "iter": 17979, "iter_tflops": 37.06981474051152, "iter_time": 0.5565469818115235, "loss": 0.6288670301437378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49544449706723, "step_time": 0.5094670219421387} +{"epoch": 0, "iter": 17980, "iter_tflops": 33.942810205135416, "iter_time": 0.6078192520141601, "loss": 0.6095959544181824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.68947635272507, "step_time": 0.5623163795471192} +{"epoch": 0, "iter": 17981, "iter_tflops": 19.32333417119163, "iter_time": 1.0397766876220702, "loss": 0.058511584997177124, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 20.6673445990296, "step_time": 0.9721593551635741} +{"epoch": 0, "iter": 17982, "iter_tflops": 19.799170383828084, "iter_time": 1.0147875900268555, "loss": 0.025819925591349602, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 24.79126750051683, "step_time": 0.8104447422027587} +{"epoch": 0, "iter": 17983, "iter_tflops": 40.94418042448791, "iter_time": 0.490715705871582, "loss": 0.016605598852038383, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 45.23674521535343, "step_time": 0.4441511497497559} +{"epoch": 0, "iter": 17984, "iter_tflops": 46.29680986904743, "iter_time": 0.4339813575744629, "loss": 0.04184446111321449, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 50.89703032807314, "step_time": 0.3947568702697754} +{"epoch": 0, "iter": 17985, "iter_tflops": 19.028033914872076, "iter_time": 1.0842472534179688, "loss": 0.33747598528862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.342802358859235, "step_time": 1.0141716537475587} +{"epoch": 0, "iter": 17986, "iter_tflops": 24.550321192791568, "iter_time": 0.8403594131469727, "loss": 0.23049700260162354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.657227816841683, "step_time": 0.6956514492034912} +{"epoch": 0, "iter": 17987, "iter_tflops": 50.83873134581093, "iter_time": 0.40581448364257805, "loss": 0.3246523141860962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.053551243938394, "step_time": 0.37474591636657717} +{"epoch": 0, "iter": 17988, "iter_tflops": 43.7744288510689, "iter_time": 0.4713046875, "loss": 0.22183816134929657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.130919624643354, "step_time": 0.43774010086059567} +{"epoch": 0, "iter": 17989, "iter_tflops": 31.30072687229168, "iter_time": 0.6591250610351562, "loss": 0.22579997777938843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.231373916384314, "step_time": 0.6208317947387696} +{"epoch": 0, "iter": 17990, "iter_tflops": 14.840722395198547, "iter_time": 1.390167739868164, "loss": 0.2367510199546814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.79830755829554, "step_time": 0.9919602088928222} +{"epoch": 0, "iter": 17991, "iter_tflops": 47.16536506116125, "iter_time": 0.4374204139709473, "loss": 0.1385616958141327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08051307896256, "step_time": 0.4038936233520508} +{"epoch": 0, "iter": 17992, "iter_tflops": 52.11093988814506, "iter_time": 0.39590714645385744, "loss": 0.2550150454044342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.539769362151496, "step_time": 0.3648952541351318} +{"epoch": 0, "iter": 17993, "iter_tflops": 21.415148643124848, "iter_time": 0.6675449676513672, "loss": 0.02405441552400589, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.765605769020706, "step_time": 0.6279461593627931} +{"epoch": 0, "iter": 17994, "iter_tflops": 11.158317986846717, "iter_time": 1.2811585693359375, "loss": 0.01844451203942299, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 12.58268419895591, "step_time": 1.1361307716369629} +{"epoch": 0, "iter": 17995, "iter_tflops": 28.803351967801042, "iter_time": 0.4963163566589356, "loss": 0.0483410470187664, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 31.79976518074402, "step_time": 0.4495496940612793} +{"epoch": 0, "iter": 17996, "iter_tflops": 34.59577973590505, "iter_time": 0.41321730041503907, "loss": 0.02174893021583557, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 38.12663930025571, "step_time": 0.37494977188110357} +{"epoch": 0, "iter": 17997, "iter_tflops": 28.853936389335704, "iter_time": 0.7150183334350586, "loss": 0.32476621866226196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.25691544460932, "step_time": 0.6600489273071288} +{"epoch": 0, "iter": 17998, "iter_tflops": 12.909360187028062, "iter_time": 1.5981499633789062, "loss": 0.2961370646953583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.731057741249588, "step_time": 1.400516777038574} +{"epoch": 0, "iter": 17999, "iter_tflops": 12.771726932668034, "iter_time": 1.6153722686767575, "loss": 0.22001899778842926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.884151972436463, "step_time": 1.2219206237792968} +{"epoch": 0, "iter": 18000, "iter_tflops": 3.823309969041822, "iter_time": 5.3961341552734385, "loss": 0.30981793999671936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 3.995218123569637, "step_time": 5.163946716308594} +{"epoch": 0, "iter": 18001, "iter_tflops": 5.981154222924204, "iter_time": 2.807437072753906, "loss": 0.4812195301055908, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 6.1480267169705725, "step_time": 2.7312363586425783} +{"epoch": 0, "iter": 18002, "iter_tflops": 14.77780083529376, "iter_time": 1.1362796325683595, "loss": 0.2708521783351898, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 16.91685926459122, "step_time": 0.9926023406982423} +{"epoch": 0, "iter": 18003, "iter_tflops": 15.889697834499087, "iter_time": 1.056767364501953, "loss": 0.3789438009262085, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 18.355237309706038, "step_time": 0.9148186874389648} +{"epoch": 0, "iter": 18004, "iter_tflops": 24.015021759087926, "iter_time": 0.6992171096801758, "loss": 0.4004548490047455, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 28.066202181204925, "step_time": 0.5982895011901855} +{"epoch": 0, "iter": 18005, "iter_tflops": 8.631692057742669, "iter_time": 2.3901563415527343, "loss": 0.5625030994415283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.169635264374794, "step_time": 2.2499361114501957} +{"epoch": 0, "iter": 18006, "iter_tflops": 26.141764604590737, "iter_time": 0.7892004928588867, "loss": 0.6048599481582642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.361414798890678, "step_time": 0.6795168685913086} +{"epoch": 0, "iter": 18007, "iter_tflops": 24.71984401086351, "iter_time": 0.8345964279174805, "loss": 0.5688018202781677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.049797590282015, "step_time": 0.7355166625976562} +{"epoch": 0, "iter": 18008, "iter_tflops": 29.610939020469534, "iter_time": 0.6967389144897461, "loss": 0.6424160599708557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.79234286265838, "step_time": 0.5929779891967772} +{"epoch": 0, "iter": 18009, "iter_tflops": 20.911194907235025, "iter_time": 0.9866051940917969, "loss": 0.39199408888816833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.412851601793243, "step_time": 0.9205028381347656} +{"epoch": 0, "iter": 18010, "iter_tflops": 20.117431206284554, "iter_time": 1.0255331954956053, "loss": 0.41668376326560974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.85330647419515, "step_time": 0.8649154586791992} +{"epoch": 0, "iter": 18011, "iter_tflops": 20.527218162455213, "iter_time": 1.0050603713989257, "loss": 0.4729433059692383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.2746545662687, "step_time": 0.8864188919067383} +{"epoch": 0, "iter": 18012, "iter_tflops": 29.93161976558869, "iter_time": 0.6892742080688475, "loss": 0.3376278877258301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.18102316824304, "step_time": 0.6035832633972168} +{"epoch": 0, "iter": 18013, "iter_tflops": 9.53694665259625, "iter_time": 2.163280792236328, "loss": 0.46876299381256104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.840614216554636, "step_time": 2.0965249786376954} +{"epoch": 0, "iter": 18014, "iter_tflops": 35.77360421660312, "iter_time": 0.576712745666504, "loss": 0.47169598937034607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11073279871363, "step_time": 0.5413460197448731} +{"epoch": 0, "iter": 18015, "iter_tflops": 34.821816681254475, "iter_time": 0.5924760818481445, "loss": 0.6184148788452148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.20359707902032, "step_time": 0.5545456657409668} +{"epoch": 0, "iter": 18016, "iter_tflops": 35.295443225776545, "iter_time": 0.5845256958007813, "loss": 0.47395503520965576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6848333447946, "step_time": 0.5474641036987306} +{"epoch": 0, "iter": 18017, "iter_tflops": 9.70533506765914, "iter_time": 2.1257476806640625, "loss": 0.43592241406440735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.897920683690652, "step_time": 2.0843866271972655} +{"epoch": 0, "iter": 18018, "iter_tflops": 28.629898652523494, "iter_time": 0.7206135711669921, "loss": 0.38944771885871887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.12260157869782, "step_time": 0.62287056350708} +{"epoch": 0, "iter": 18019, "iter_tflops": 35.89275593244094, "iter_time": 0.5747982559204101, "loss": 0.3620363175868988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15712442467142, "step_time": 0.5268796882629394} +{"epoch": 0, "iter": 18020, "iter_tflops": 27.05770028729104, "iter_time": 0.762485107421875, "loss": 0.3817526698112488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.01355666815915, "step_time": 0.6444486541748047} +{"epoch": 0, "iter": 18021, "iter_tflops": 6.815423644602531, "iter_time": 2.445708953857422, "loss": 0.0046188849955797195, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 7.307719536706761, "step_time": 2.280949966430664} +{"epoch": 0, "iter": 18022, "iter_tflops": 21.4159966631693, "iter_time": 0.7783220596313476, "loss": 0.005311720538884401, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 24.79874516626015, "step_time": 0.6721526641845703} +{"epoch": 0, "iter": 18023, "iter_tflops": 34.44397479009265, "iter_time": 0.4839320297241211, "loss": 0.0018044263124465942, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 40.325601874687926, "step_time": 0.4133488864898682} +{"epoch": 0, "iter": 18024, "iter_tflops": 30.73136419090664, "iter_time": 0.5423951416015625, "loss": 0.018597856163978577, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 32.74821945006893, "step_time": 0.5089908065795898} +{"epoch": 0, "iter": 18025, "iter_tflops": 9.772146462759942, "iter_time": 2.111214111328125, "loss": 0.40062206983566284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.039665819637872, "step_time": 2.054958190917969} +{"epoch": 0, "iter": 18026, "iter_tflops": 23.463018150317833, "iter_time": 0.8793026275634764, "loss": 0.451395183801651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.098216603182202, "step_time": 0.7613450660705566} +{"epoch": 0, "iter": 18027, "iter_tflops": 25.724628179994088, "iter_time": 0.8019977340698243, "loss": 0.34287840127944946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.08366733185156, "step_time": 0.6857905082702637} +{"epoch": 0, "iter": 18028, "iter_tflops": 30.377771576580503, "iter_time": 0.6791509857177734, "loss": 0.2788611352443695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.66624522630406, "step_time": 0.6315722351074219} +{"epoch": 0, "iter": 18029, "iter_tflops": 8.327610762677155, "iter_time": 2.4774324951171875, "loss": 0.4618002474308014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.646959800422469, "step_time": 2.3859360961914065} +{"epoch": 0, "iter": 18030, "iter_tflops": 28.378994686949532, "iter_time": 0.726984649658203, "loss": 0.49193882942199707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.39668044943929, "step_time": 0.6177588081359863} +{"epoch": 0, "iter": 18031, "iter_tflops": 30.571752379014246, "iter_time": 0.6748417053222657, "loss": 0.5413870811462402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.31455545764867, "step_time": 0.5842093505859376} +{"epoch": 0, "iter": 18032, "iter_tflops": 44.03245898281455, "iter_time": 0.46854284286499026, "loss": 0.46716323494911194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57823637654711, "step_time": 0.4336245956420898} +{"epoch": 0, "iter": 18033, "iter_tflops": 8.861389447511932, "iter_time": 2.328200744628906, "loss": 0.018118994310498238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.621024018927129, "step_time": 2.1443760528564453} +{"epoch": 0, "iter": 18034, "iter_tflops": 22.497388439107443, "iter_time": 0.9170439300537109, "loss": 0.024528156965970993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.178352568274786, "step_time": 0.7880974731445313} +{"epoch": 0, "iter": 18035, "iter_tflops": 28.00592121207315, "iter_time": 0.7366689834594726, "loss": 0.018676668405532837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.812908354308245, "step_time": 0.6485132789611816} +{"epoch": 0, "iter": 18036, "iter_tflops": 44.53753589545775, "iter_time": 0.46322934341430666, "loss": 0.025435417890548706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60698742722395, "step_time": 0.3997732582092285} +{"epoch": 0, "iter": 18037, "iter_tflops": 9.349580551264266, "iter_time": 2.206633056640625, "loss": 0.5278772115707397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.52610395820984, "step_time": 2.165743057250977} +{"epoch": 0, "iter": 18038, "iter_tflops": 25.875774563286726, "iter_time": 0.7973130798339843, "loss": 0.48036882281303406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.927379844856237, "step_time": 0.7132029800415041} +{"epoch": 0, "iter": 18039, "iter_tflops": 25.251233286119806, "iter_time": 0.8170331039428711, "loss": 0.5289686322212219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.021035512757535, "step_time": 0.6872212486267091} +{"epoch": 0, "iter": 18040, "iter_tflops": 30.059171503372216, "iter_time": 0.6863493728637694, "loss": 0.35419103503227234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.100895469425296, "step_time": 0.6232790145874023} +{"epoch": 0, "iter": 18041, "iter_tflops": 13.914225544621695, "iter_time": 1.482733871459961, "loss": 0.027002932503819466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.32743657517808, "step_time": 1.4399710235595706} +{"epoch": 0, "iter": 18042, "iter_tflops": 19.16451181484899, "iter_time": 1.0765259094238282, "loss": 0.027134399861097336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.634383790115884, "step_time": 0.7205006980895996} +{"epoch": 0, "iter": 18043, "iter_tflops": 24.85454956935766, "iter_time": 0.8300731201171875, "loss": 0.02243456430733204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.660715434453127, "step_time": 0.7458626136779785} +{"epoch": 0, "iter": 18044, "iter_tflops": 25.555366502569974, "iter_time": 0.8073096313476562, "loss": 0.02858855202794075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.154613486167168, "step_time": 0.684177017211914} +{"epoch": 0, "iter": 18045, "iter_tflops": 26.363695084804764, "iter_time": 0.7825569763183594, "loss": 0.48811525106430054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.117560896943452, "step_time": 0.6630048408508301} +{"epoch": 0, "iter": 18046, "iter_tflops": 32.10091403961127, "iter_time": 0.6426948928833007, "loss": 0.4499099552631378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.69374334622733, "step_time": 0.594663230895996} +{"epoch": 0, "iter": 18047, "iter_tflops": 28.774327389147295, "iter_time": 0.7169965515136719, "loss": 0.5182804465293884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.00651209744203, "step_time": 0.6445904960632325} +{"epoch": 0, "iter": 18048, "iter_tflops": 28.167089416204927, "iter_time": 0.7324538650512696, "loss": 0.6207596063613892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.27551134606838, "step_time": 0.6814449234008789} +{"epoch": 0, "iter": 18049, "iter_tflops": 8.716222810563666, "iter_time": 2.366976379394531, "loss": 0.2970837652683258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.069578286680633, "step_time": 2.274757751464844} +{"epoch": 0, "iter": 18050, "iter_tflops": 27.468907494431225, "iter_time": 0.7510707702636719, "loss": 0.2796372175216675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.9416798044179, "step_time": 0.6667735443115235} +{"epoch": 0, "iter": 18051, "iter_tflops": 27.565828575966247, "iter_time": 0.7484300155639649, "loss": 0.22194525599479675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.82876246212687, "step_time": 0.6481902503967285} +{"epoch": 0, "iter": 18052, "iter_tflops": 29.798949870832832, "iter_time": 0.6923429718017577, "loss": 0.31500810384750366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.184163624598064, "step_time": 0.6035278129577637} +{"epoch": 0, "iter": 18053, "iter_tflops": 9.249709285978424, "iter_time": 2.2304585876464844, "loss": 0.386476069688797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.614160818454385, "step_time": 2.1459068450927736} +{"epoch": 0, "iter": 18054, "iter_tflops": 18.627886275062377, "iter_time": 1.1075380859375, "loss": 0.2736727297306061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.823894064912523, "step_time": 0.9453442840576172} +{"epoch": 0, "iter": 18055, "iter_tflops": 24.64311355124206, "iter_time": 0.8371950836181641, "loss": 0.2895984351634979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.478012871186344, "step_time": 0.7244569206237792} +{"epoch": 0, "iter": 18056, "iter_tflops": 37.61298901369586, "iter_time": 0.5485098114013671, "loss": 0.5002314448356628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.236895632325776, "step_time": 0.5003066596984863} +{"epoch": 0, "iter": 18057, "iter_tflops": 15.812579907970077, "iter_time": 1.3047265930175782, "loss": 0.38472673296928406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.803432440041973, "step_time": 1.2277904281616212} +{"epoch": 0, "iter": 18058, "iter_tflops": 17.78539409029717, "iter_time": 1.1600020446777342, "loss": 0.41607826948165894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.459820594377984, "step_time": 0.9613823852539063} +{"epoch": 0, "iter": 18059, "iter_tflops": 47.11164732125932, "iter_time": 0.4379191703796387, "loss": 0.36406221985816956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.108167108316, "step_time": 0.40367508125305174} +{"epoch": 0, "iter": 18060, "iter_tflops": 46.69073277118265, "iter_time": 0.44186698913574224, "loss": 0.43501824140548706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.67910040714201, "step_time": 0.40709273338317875} +{"epoch": 0, "iter": 18061, "iter_tflops": 38.624593363055894, "iter_time": 0.5341439666748047, "loss": 0.7992864847183228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59051704132328, "step_time": 0.49605282592773436} +{"epoch": 0, "iter": 18062, "iter_tflops": 7.9386718646305585, "iter_time": 2.5988092041015625, "loss": 0.5659885406494141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.52249208488978, "step_time": 2.166564521789551} +{"epoch": 0, "iter": 18063, "iter_tflops": 10.133918881681527, "iter_time": 2.0358455352783205, "loss": 0.538314938545227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.46109879733535, "step_time": 1.655639991760254} +{"epoch": 0, "iter": 18064, "iter_tflops": 39.78275548120716, "iter_time": 0.518593879699707, "loss": 0.5525152683258057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68814437169751, "step_time": 0.4832979698181152} +{"epoch": 0, "iter": 18065, "iter_tflops": 22.655048908085075, "iter_time": 0.746629508972168, "loss": 0.4005989730358124, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 23.962419959510665, "step_time": 0.7058939819335938} +{"epoch": 0, "iter": 18066, "iter_tflops": 9.662715830523652, "iter_time": 1.750535598754883, "loss": 0.35278621315956116, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 12.152116516108482, "step_time": 1.391932674407959} +{"epoch": 0, "iter": 18067, "iter_tflops": 26.311384099681835, "iter_time": 0.642874885559082, "loss": 0.5238797068595886, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 28.27699901851872, "step_time": 0.5981868171691895} +{"epoch": 0, "iter": 18068, "iter_tflops": 24.232052175540794, "iter_time": 0.6980394363403319, "loss": 0.37031862139701843, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 26.012018777391237, "step_time": 0.6502735595703125} +{"epoch": 0, "iter": 18069, "iter_tflops": 36.946316498464164, "iter_time": 0.5584073181152344, "loss": 0.011592335999011993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.447645882923, "step_time": 0.49776273345947264} +{"epoch": 0, "iter": 18070, "iter_tflops": 41.297277355169605, "iter_time": 0.49957514953613286, "loss": 0.01009521633386612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.81478056942457, "step_time": 0.4503152313232422} +{"epoch": 0, "iter": 18071, "iter_tflops": 48.07225505352983, "iter_time": 0.4291684150695801, "loss": 0.01422986201941967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.305137317459995, "step_time": 0.3870376205444336} +{"epoch": 0, "iter": 18072, "iter_tflops": 43.56133951089875, "iter_time": 0.4736101722717285, "loss": 0.0041340552270412445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.405905318201185, "step_time": 0.42621026039123533} +{"epoch": 0, "iter": 18073, "iter_tflops": 19.203779404143933, "iter_time": 1.0743246459960938, "loss": 0.5855807065963745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.70643744169455, "step_time": 0.996361328125} +{"epoch": 0, "iter": 18074, "iter_tflops": 18.999351155160884, "iter_time": 1.0858841094970701, "loss": 0.6559482216835022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.746619639233582, "step_time": 0.9069960212707521} +{"epoch": 0, "iter": 18075, "iter_tflops": 35.697997516119564, "iter_time": 0.577934196472168, "loss": 0.5789458155632019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88454185119117, "step_time": 0.5305731410980223} +{"epoch": 0, "iter": 18076, "iter_tflops": 37.85894535146167, "iter_time": 0.5449463348388672, "loss": 0.48675239086151123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10882790200713, "step_time": 0.5018652820587157} +{"epoch": 0, "iter": 18077, "iter_tflops": 21.070463275064473, "iter_time": 0.9791475982666015, "loss": 0.2978297173976898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67057528290736, "step_time": 0.9100383758544922} +{"epoch": 0, "iter": 18078, "iter_tflops": 17.08143236840524, "iter_time": 1.2078081665039064, "loss": 0.32061654329299927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.387352946886182, "step_time": 1.011955478668213} +{"epoch": 0, "iter": 18079, "iter_tflops": 37.530200928312716, "iter_time": 0.5497197723388672, "loss": 0.3054400384426117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11970081944665, "step_time": 0.5017325782775879} +{"epoch": 0, "iter": 18080, "iter_tflops": 38.33072417006088, "iter_time": 0.5382390747070313, "loss": 0.32994168996810913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18915095636749, "step_time": 0.48901419067382806} +{"epoch": 0, "iter": 18081, "iter_tflops": 18.63006386651557, "iter_time": 1.1074086303710937, "loss": 0.5392780900001526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.953200011141817, "step_time": 1.0339741744995117} +{"epoch": 0, "iter": 18082, "iter_tflops": 20.569101221982372, "iter_time": 1.0030138549804688, "loss": 0.6629548668861389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.430895636625664, "step_time": 0.844467342376709} +{"epoch": 0, "iter": 18083, "iter_tflops": 41.58307676337702, "iter_time": 0.4961415824890137, "loss": 0.4866550862789154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70144022864099, "step_time": 0.4615308456420898} +{"epoch": 0, "iter": 18084, "iter_tflops": 44.061586638788526, "iter_time": 0.46823310470581053, "loss": 0.5270426869392395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26048436412649, "step_time": 0.4365400352478027} +{"epoch": 0, "iter": 18085, "iter_tflops": 24.426372656172084, "iter_time": 0.8446237106323242, "loss": 0.7228022217750549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.671736846281533, "step_time": 0.8036500854492187} +{"epoch": 0, "iter": 18086, "iter_tflops": 14.260122468920612, "iter_time": 1.4467683258056643, "loss": 0.7408010959625244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.937280270528085, "step_time": 1.2180877437591553} +{"epoch": 0, "iter": 18087, "iter_tflops": 37.68680178982286, "iter_time": 0.5474355087280274, "loss": 0.5209288001060486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08959735676887, "step_time": 0.5021001625061035} +{"epoch": 0, "iter": 18088, "iter_tflops": 36.94865382066143, "iter_time": 0.5583719940185546, "loss": 0.7665607929229736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.48159687296811, "step_time": 0.5096412963867187} +{"epoch": 0, "iter": 18089, "iter_tflops": 24.047881293547448, "iter_time": 0.8579173049926758, "loss": 0.41120973229408264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.993476871465646, "step_time": 0.7937027282714842} +{"epoch": 0, "iter": 18090, "iter_tflops": 44.350770960868516, "iter_time": 0.4651800422668457, "loss": 0.5385758280754089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20099370234728, "step_time": 0.4280221614837646} +{"epoch": 0, "iter": 18091, "iter_tflops": 45.40970260676691, "iter_time": 0.45433227539062504, "loss": 0.40601664781570435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.2769142172861, "step_time": 0.41867665290832523} +{"epoch": 0, "iter": 18092, "iter_tflops": 44.313536721649406, "iter_time": 0.4655709075927734, "loss": 0.45458951592445374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.712654144475465, "step_time": 0.4324029731750489} +{"epoch": 0, "iter": 18093, "iter_tflops": 44.95619158286101, "iter_time": 0.4589155082702637, "loss": 0.041738517582416534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.912679477526304, "step_time": 0.4217943840026856} +{"epoch": 0, "iter": 18094, "iter_tflops": 43.152432248658506, "iter_time": 0.4780980453491211, "loss": 0.08504147082567215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.33112060707359, "step_time": 0.4268697528839111} +{"epoch": 0, "iter": 18095, "iter_tflops": 44.319606188364936, "iter_time": 0.46550714874267574, "loss": 0.6659189462661743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.580237162682394, "step_time": 0.43360636138916014} +{"epoch": 0, "iter": 18096, "iter_tflops": 43.99775853298727, "iter_time": 0.4689123764038086, "loss": 0.5312852263450623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32174987209476, "step_time": 0.43597486495971677} +{"epoch": 0, "iter": 18097, "iter_tflops": 33.0870279404116, "iter_time": 0.6235402450561524, "loss": 0.1592126041650772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.362364055026596, "step_time": 0.5834195213317871} +{"epoch": 0, "iter": 18098, "iter_tflops": 10.733145053481763, "iter_time": 1.922185287475586, "loss": 0.09863019734621048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.032914185198168, "step_time": 1.5829992599487304} +{"epoch": 0, "iter": 18099, "iter_tflops": 16.858245614473773, "iter_time": 1.223798370361328, "loss": 0.07421768456697464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.483656963497765, "step_time": 1.0588922576904296} +{"epoch": 0, "iter": 18100, "iter_tflops": 45.12740925482474, "iter_time": 0.4571743392944336, "loss": 0.16242116689682007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94982806384639, "step_time": 0.42147427940368654} +{"epoch": 0, "iter": 18101, "iter_tflops": 16.409064790134508, "iter_time": 0.9035290451049806, "loss": 0.3361564874649048, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.17590626654107, "step_time": 0.8631897735595704} +{"epoch": 0, "iter": 18102, "iter_tflops": 13.035811309190203, "iter_time": 1.1373336334228514, "loss": 0.35353732109069824, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.38022498773565, "step_time": 0.9639694252014162} +{"epoch": 0, "iter": 18103, "iter_tflops": 22.965651149540097, "iter_time": 0.6455757141113281, "loss": 0.30045661330223083, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.719040552502634, "step_time": 0.5997832565307618} +{"epoch": 0, "iter": 18104, "iter_tflops": 24.555906313823453, "iter_time": 0.6037678451538087, "loss": 0.18852710723876953, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.289822763135316, "step_time": 0.5639469985961914} +{"epoch": 0, "iter": 18105, "iter_tflops": 15.972576792851058, "iter_time": 1.2916571807861328, "loss": 0.05169432610273361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.955025456229702, "step_time": 1.2168128890991212} +{"epoch": 0, "iter": 18106, "iter_tflops": 15.78885025625888, "iter_time": 1.306687515258789, "loss": 0.04593750834465027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.550552472004654, "step_time": 1.0039191665649414} +{"epoch": 0, "iter": 18107, "iter_tflops": 40.067101471367515, "iter_time": 0.5149135513305664, "loss": 0.06179787591099739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.515649201968344, "step_time": 0.46345709609985347} +{"epoch": 0, "iter": 18108, "iter_tflops": 42.66504886469304, "iter_time": 0.4835595893859863, "loss": 0.08070172369480133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00469451933951, "step_time": 0.43891559600830077} +{"epoch": 0, "iter": 18109, "iter_tflops": 32.349702353686695, "iter_time": 0.6377521896362305, "loss": 0.42646104097366333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.72727522244723, "step_time": 0.5774605922698974} +{"epoch": 0, "iter": 18110, "iter_tflops": 47.89451809910863, "iter_time": 0.43076106262207026, "loss": 0.3664427697658539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14285295906067, "step_time": 0.3956648387908936} +{"epoch": 0, "iter": 18111, "iter_tflops": 46.0551197797413, "iter_time": 0.44796525573730467, "loss": 0.374563068151474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96486591563654, "step_time": 0.412912015914917} +{"epoch": 0, "iter": 18112, "iter_tflops": 48.763258769536165, "iter_time": 0.4230868492126465, "loss": 0.33077725768089294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.68033439610441, "step_time": 0.39162799072265625} +{"epoch": 0, "iter": 18113, "iter_tflops": 21.455130137576088, "iter_time": 0.9615925598144531, "loss": 0.7108705043792725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.377485047554252, "step_time": 0.921957649230957} +{"epoch": 0, "iter": 18114, "iter_tflops": 16.522935473814268, "iter_time": 1.2486336669921876, "loss": 0.4495340883731842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.71060929884046, "step_time": 0.9502770385742187} +{"epoch": 0, "iter": 18115, "iter_tflops": 44.2618799908014, "iter_time": 0.46611426162719727, "loss": 0.7508392930030823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.551550770313575, "step_time": 0.4338679428100586} +{"epoch": 0, "iter": 18116, "iter_tflops": 44.23063451127281, "iter_time": 0.46644353485107426, "loss": 0.6671668887138367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.302862781111386, "step_time": 0.4361489410400391} +{"epoch": 0, "iter": 18117, "iter_tflops": 28.3387773804578, "iter_time": 0.7031433639526368, "loss": 0.05738052725791931, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 30.068485797843504, "step_time": 0.6626946029663086} +{"epoch": 0, "iter": 18118, "iter_tflops": 12.714100358683833, "iter_time": 1.567253890991211, "loss": 0.0771133080124855, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 15.392793697813996, "step_time": 1.2945163593292235} +{"epoch": 0, "iter": 18119, "iter_tflops": 39.48063300315899, "iter_time": 0.5047088088989257, "loss": 0.08504147827625275, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 43.64321658804458, "step_time": 0.45657091331481936} +{"epoch": 0, "iter": 18120, "iter_tflops": 42.87454451337974, "iter_time": 0.4647565002441406, "loss": 0.10137961804866791, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 47.34295929276414, "step_time": 0.4208909530639648} +{"epoch": 0, "iter": 18121, "iter_tflops": 16.92951274200752, "iter_time": 1.2186466217041017, "loss": 0.8579860925674438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.977170868363906, "step_time": 1.1476273803710937} +{"epoch": 0, "iter": 18122, "iter_tflops": 18.988110563144307, "iter_time": 1.0865269317626953, "loss": 0.4838149845600128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.192885396063478, "step_time": 0.8527752342224122} +{"epoch": 0, "iter": 18123, "iter_tflops": 36.01735482520115, "iter_time": 0.5728097915649414, "loss": 0.5013186931610107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.10400397867983, "step_time": 0.527595422744751} +{"epoch": 0, "iter": 18124, "iter_tflops": 32.79785871370436, "iter_time": 0.6290378189086914, "loss": 0.5849242806434631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.32324701625519, "step_time": 0.584065601348877} +{"epoch": 0, "iter": 18125, "iter_tflops": 13.083884340863557, "iter_time": 1.0988431701660155, "loss": 0.05230601131916046, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 14.231245508459887, "step_time": 1.0102514877319337} +{"epoch": 0, "iter": 18126, "iter_tflops": 15.290313075833774, "iter_time": 0.940277473449707, "loss": 0.040125541388988495, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 18.1473666651918, "step_time": 0.7922437019348144} +{"epoch": 0, "iter": 18127, "iter_tflops": 39.785770176835655, "iter_time": 0.36136379623413084, "loss": 0.02985529415309429, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 43.54419852837648, "step_time": 0.3301734199523926} +{"epoch": 0, "iter": 18128, "iter_tflops": 37.40528403985382, "iter_time": 0.3843611221313476, "loss": 0.01945878006517887, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 40.79408141958474, "step_time": 0.35243193244934085} +{"epoch": 0, "iter": 18129, "iter_tflops": 30.390276569582756, "iter_time": 0.6788715286254884, "loss": 0.08846110850572586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.52101008540846, "step_time": 0.6343927650451661} +{"epoch": 0, "iter": 18130, "iter_tflops": 19.142760799512185, "iter_time": 1.0777491149902343, "loss": 0.10559699684381485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.785092256357654, "step_time": 0.9470280532836913} +{"epoch": 0, "iter": 18131, "iter_tflops": 49.977558797399354, "iter_time": 0.41280714797973633, "loss": 0.09813793748617172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.487574281478, "step_time": 0.37863850212097167} +{"epoch": 0, "iter": 18132, "iter_tflops": 50.70199179188915, "iter_time": 0.406908935546875, "loss": 0.13451823592185974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.88587391421048, "step_time": 0.37589077186584474} +{"epoch": 0, "iter": 18133, "iter_tflops": 29.504233888001426, "iter_time": 0.6992587432861328, "loss": 0.3968682289123535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.354932938547133, "step_time": 0.6579855728149414} +{"epoch": 0, "iter": 18134, "iter_tflops": 22.29898298988919, "iter_time": 0.9252033386230468, "loss": 0.3711210787296295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.261894324777366, "step_time": 0.8166882991790771} +{"epoch": 0, "iter": 18135, "iter_tflops": 44.53574761158493, "iter_time": 0.4632479438781738, "loss": 0.3288213610649109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98377286449477, "step_time": 0.4299598026275635} +{"epoch": 0, "iter": 18136, "iter_tflops": 43.463484932405805, "iter_time": 0.47467646789550777, "loss": 0.3156944215297699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.685211328525455, "step_time": 0.44191924858093257} +{"epoch": 0, "iter": 18137, "iter_tflops": 27.037927031812927, "iter_time": 0.7630427246093751, "loss": 0.5383919477462769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.627493030811795, "step_time": 0.7206741256713868} +{"epoch": 0, "iter": 18138, "iter_tflops": 15.615734662945863, "iter_time": 1.3211734161376953, "loss": 0.4736720621585846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.906583302053924, "step_time": 1.0363955078125} +{"epoch": 0, "iter": 18139, "iter_tflops": 35.32154333392206, "iter_time": 0.5840937728881836, "loss": 0.6524468660354614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.636292058633636, "step_time": 0.5339822330474853} +{"epoch": 0, "iter": 18140, "iter_tflops": 39.94949180420295, "iter_time": 0.5164294357299805, "loss": 0.5297269821166992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.226091671391146, "step_time": 0.47728334236145015} +{"epoch": 0, "iter": 18141, "iter_tflops": 32.34589126219365, "iter_time": 0.6378273315429688, "loss": 0.663196861743927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.61616885184762, "step_time": 0.5792620086669921} +{"epoch": 0, "iter": 18142, "iter_tflops": 36.36802801746805, "iter_time": 0.5672865600585938, "loss": 0.5719606280326843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.91053979097802, "step_time": 0.5169334621429443} +{"epoch": 0, "iter": 18143, "iter_tflops": 41.19491496231013, "iter_time": 0.5008165092468262, "loss": 0.42311960458755493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.788933498806294, "step_time": 0.46062926483154293} +{"epoch": 0, "iter": 18144, "iter_tflops": 36.86219452741944, "iter_time": 0.559681640625, "loss": 0.6189101934432983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17722253964279, "step_time": 0.513502233505249} +{"epoch": 0, "iter": 18145, "iter_tflops": 23.043255332130673, "iter_time": 0.8953202667236329, "loss": 0.6047128438949585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.738815100985622, "step_time": 0.833956413269043} +{"epoch": 0, "iter": 18146, "iter_tflops": 26.500852623339753, "iter_time": 0.7785067825317383, "loss": 0.6595606207847595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.79050683180822, "step_time": 0.6291788539886474} +{"epoch": 0, "iter": 18147, "iter_tflops": 35.454824132582814, "iter_time": 0.581898063659668, "loss": 0.7519652843475342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.662477846768695, "step_time": 0.5336205711364747} +{"epoch": 0, "iter": 18148, "iter_tflops": 37.209953988981155, "iter_time": 0.554450927734375, "loss": 0.5793851017951965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32182019791633, "step_time": 0.5116607685089112} +{"epoch": 0, "iter": 18149, "iter_tflops": 20.811720033909992, "iter_time": 0.9913209228515626, "loss": 0.23387545347213745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67828040608861, "step_time": 0.9097291831970215} +{"epoch": 0, "iter": 18150, "iter_tflops": 20.627323144178064, "iter_time": 1.0001827850341798, "loss": 0.24941492080688477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.05051507631708, "step_time": 0.8950382862091064} +{"epoch": 0, "iter": 18151, "iter_tflops": 48.38234799124076, "iter_time": 0.426417781829834, "loss": 0.1685742884874344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.578374386874636, "step_time": 0.3923874359130859} +{"epoch": 0, "iter": 18152, "iter_tflops": 50.591728783809735, "iter_time": 0.4077957801818848, "loss": 0.21887809038162231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.72274281816839, "step_time": 0.3770113201141358} +{"epoch": 0, "iter": 18153, "iter_tflops": 27.49060607446351, "iter_time": 0.7504779434204101, "loss": 0.35087382793426514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.02912204277104, "step_time": 0.7107033233642578} +{"epoch": 0, "iter": 18154, "iter_tflops": 20.251267296679735, "iter_time": 1.0187556762695313, "loss": 0.409177303314209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.869878268507644, "step_time": 0.9021077098846435} +{"epoch": 0, "iter": 18155, "iter_tflops": 48.71858127208183, "iter_time": 0.42347484207153324, "loss": 0.4767424166202545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83972737808398, "step_time": 0.39044663047790523} +{"epoch": 0, "iter": 18156, "iter_tflops": 42.158515095258984, "iter_time": 0.4893695487976074, "loss": 0.662670373916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17232960875191, "step_time": 0.4567197151184082} +{"epoch": 0, "iter": 18157, "iter_tflops": 35.32770920502851, "iter_time": 0.583991828918457, "loss": 0.23863019049167633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.94875480779589, "step_time": 0.543656665802002} +{"epoch": 0, "iter": 18158, "iter_tflops": 23.533795990372347, "iter_time": 0.8766581268310547, "loss": 0.18422797322273254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.530268771793594, "step_time": 0.5497187786102294} +{"epoch": 0, "iter": 18159, "iter_tflops": 45.798537157295726, "iter_time": 0.4504749450683594, "loss": 0.20850564539432526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.448589626653934, "step_time": 0.4172230930328369} +{"epoch": 0, "iter": 18160, "iter_tflops": 46.51962954349874, "iter_time": 0.4434922142028809, "loss": 0.17199493944644928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17937402791927, "step_time": 0.41114688873291017} +{"epoch": 0, "iter": 18161, "iter_tflops": 30.41722444966243, "iter_time": 0.6782700881958009, "loss": 0.038064662367105484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.60208171681452, "step_time": 0.6328152198791505} +{"epoch": 0, "iter": 18162, "iter_tflops": 22.605518386776243, "iter_time": 0.9126573944091797, "loss": 0.07814805209636688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.79583396413209, "step_time": 0.7997839317321777} +{"epoch": 0, "iter": 18163, "iter_tflops": 49.63727612197607, "iter_time": 0.41563710021972655, "loss": 0.04839153215289116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.48171926119721, "step_time": 0.3786791934967041} +{"epoch": 0, "iter": 18164, "iter_tflops": 53.99197646370683, "iter_time": 0.38211406326293945, "loss": 0.060746390372514725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.04516983145734, "step_time": 0.34941204452514646} +{"epoch": 0, "iter": 18165, "iter_tflops": 46.86752060081182, "iter_time": 0.4402002334594727, "loss": 0.013224012218415737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32480818121051, "step_time": 0.40197117614746086} +{"epoch": 0, "iter": 18166, "iter_tflops": 9.883441330743052, "iter_time": 2.0874402770996094, "loss": 0.02197856269776821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.462991608624614, "step_time": 1.655388542175293} +{"epoch": 0, "iter": 18167, "iter_tflops": 10.840023014462016, "iter_time": 1.903233367919922, "loss": 0.06320973485708237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.234562673887227, "step_time": 1.5588798828125001} +{"epoch": 0, "iter": 18168, "iter_tflops": 22.36069382584785, "iter_time": 0.9226499710083007, "loss": 0.02884778566658497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.774151304050662, "step_time": 0.7428163433074951} +{"epoch": 0, "iter": 18169, "iter_tflops": 14.995293536260645, "iter_time": 1.070557388305664, "loss": 0.33408358693122864, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 16.04773425222667, "step_time": 1.0003482131958008} +{"epoch": 0, "iter": 18170, "iter_tflops": 14.348789206678216, "iter_time": 1.1187928161621095, "loss": 0.24644134938716888, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 18.297996300207604, "step_time": 0.8773267860412598} +{"epoch": 0, "iter": 18171, "iter_tflops": 27.29753712218584, "iter_time": 0.5880868377685546, "loss": 0.22179381549358368, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.259315507750777, "step_time": 0.548656795501709} +{"epoch": 0, "iter": 18172, "iter_tflops": 28.65726270104242, "iter_time": 0.5601833801269531, "loss": 0.4459993839263916, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.559491729609306, "step_time": 0.5253137855529785} +{"epoch": 0, "iter": 18173, "iter_tflops": 3.136884465168205, "iter_time": 0.6499796524047851, "loss": 0.202445387840271, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 3.3335651058690647, "step_time": 0.6116307945251465} +{"epoch": 0, "iter": 18174, "iter_tflops": 1.62652340765488, "iter_time": 1.253539337158203, "loss": 0.20633462071418762, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 2.1100503850093144, "step_time": 0.9662854919433593} +{"epoch": 0, "iter": 18175, "iter_tflops": 4.861911458537565, "iter_time": 0.41936408996582025, "loss": 0.31189438700675964, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 5.280895961715203, "step_time": 0.3860918846130372} +{"epoch": 0, "iter": 18176, "iter_tflops": 5.011833715574246, "iter_time": 0.4068193778991699, "loss": 0.29648807644844055, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 5.442482708053205, "step_time": 0.3746288566589356} +{"epoch": 0, "iter": 18177, "iter_tflops": 44.05205876531451, "iter_time": 0.46833437728881844, "loss": 0.4681790769100189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21940826706042, "step_time": 0.42785870361328127} +{"epoch": 0, "iter": 18178, "iter_tflops": 34.48547582326271, "iter_time": 0.5982545700073242, "loss": 0.4823165535926819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.044625620175715, "step_time": 0.5422866744995116} +{"epoch": 0, "iter": 18179, "iter_tflops": 36.83156841014812, "iter_time": 0.5601470260620117, "loss": 0.4554807245731354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.98469977897914, "step_time": 0.5159747009277343} +{"epoch": 0, "iter": 18180, "iter_tflops": 35.8521407730397, "iter_time": 0.5754494171142577, "loss": 0.46422526240348816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.96703808000144, "step_time": 0.5294498767852783} +{"epoch": 0, "iter": 18181, "iter_tflops": 34.469507595926146, "iter_time": 0.5985317153930664, "loss": 0.6509600877761841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.847374241778276, "step_time": 0.5451129417419434} +{"epoch": 0, "iter": 18182, "iter_tflops": 35.48359648608084, "iter_time": 0.5814262237548827, "loss": 0.565146803855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.63590592104047, "step_time": 0.53398756980896} +{"epoch": 0, "iter": 18183, "iter_tflops": 36.92267367881755, "iter_time": 0.5587648849487304, "loss": 0.6362510323524475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25611507573707, "step_time": 0.5124958896636963} +{"epoch": 0, "iter": 18184, "iter_tflops": 36.796314517821045, "iter_time": 0.5606836929321288, "loss": 0.4648820161819458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.702843542853806, "step_time": 0.5196376800537109} +{"epoch": 0, "iter": 18185, "iter_tflops": 35.04174840082935, "iter_time": 0.5887575378417969, "loss": 0.44681254029273987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.348657209757256, "step_time": 0.537987377166748} +{"epoch": 0, "iter": 18186, "iter_tflops": 38.33079425946686, "iter_time": 0.5382380905151367, "loss": 0.558796763420105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37176297316175, "step_time": 0.48690665817260737} +{"epoch": 0, "iter": 18187, "iter_tflops": 36.81556142433166, "iter_time": 0.5603905715942382, "loss": 0.26062270998954773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40818612486726, "step_time": 0.5105671768188477} +{"epoch": 0, "iter": 18188, "iter_tflops": 37.11711496923496, "iter_time": 0.5558377456665039, "loss": 0.2548438608646393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.16836223547638, "step_time": 0.5136155014038085} +{"epoch": 0, "iter": 18189, "iter_tflops": 18.08564301817968, "iter_time": 1.1407442626953126, "loss": 0.48002177476882935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.456760757884105, "step_time": 1.060356025695801} +{"epoch": 0, "iter": 18190, "iter_tflops": 26.20981372915476, "iter_time": 0.7871514739990234, "loss": 0.7306035757064819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.59524808497308, "step_time": 0.6971083145141601} +{"epoch": 0, "iter": 18191, "iter_tflops": 46.95333762489428, "iter_time": 0.43939567565917975, "loss": 0.5822775363922119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.504034180364506, "step_time": 0.40850387191772464} +{"epoch": 0, "iter": 18192, "iter_tflops": 45.563199088250244, "iter_time": 0.4528016891479492, "loss": 0.6404832601547241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17286043894742, "step_time": 0.4195626068115234} +{"epoch": 0, "iter": 18193, "iter_tflops": 31.828586572377922, "iter_time": 0.6481938323974609, "loss": 0.3429844081401825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.07483765658733, "step_time": 0.6054641761779785} +{"epoch": 0, "iter": 18194, "iter_tflops": 13.050912689149044, "iter_time": 1.580816146850586, "loss": 0.22174523770809174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.827924226817299, "step_time": 1.3034617309570313} +{"epoch": 0, "iter": 18195, "iter_tflops": 28.362995890109485, "iter_time": 0.7273947219848633, "loss": 0.2302454710006714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.803421048269403, "step_time": 0.6487067375183104} +{"epoch": 0, "iter": 18196, "iter_tflops": 42.90367756011197, "iter_time": 0.4808700485229492, "loss": 0.23187443614006042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.881334638154016, "step_time": 0.44007052421569826} +{"epoch": 0, "iter": 18197, "iter_tflops": 15.381839148468291, "iter_time": 1.0010673828125, "loss": 0.3066521883010864, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 16.35653141769255, "step_time": 0.9414133758544923} +{"epoch": 0, "iter": 18198, "iter_tflops": 21.477051061184593, "iter_time": 0.7169633026123046, "loss": 0.21860475838184357, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.531976856616012, "step_time": 0.654354606628418} +{"epoch": 0, "iter": 18199, "iter_tflops": 24.914149801749105, "iter_time": 0.6180526962280274, "loss": 0.40520331263542175, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.706831111385984, "step_time": 0.576566249847412} +{"epoch": 0, "iter": 18200, "iter_tflops": 23.068669152344913, "iter_time": 0.6674965667724609, "loss": 0.4109481871128082, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.83110842746345, "step_time": 0.6201196174621582} +{"epoch": 0, "iter": 18201, "iter_tflops": 17.248679912766576, "iter_time": 1.196096954345703, "loss": 0.4010425806045532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.361376382803943, "step_time": 1.123613670349121} +{"epoch": 0, "iter": 18202, "iter_tflops": 17.84634575382999, "iter_time": 1.1560402221679689, "loss": 0.38892731070518494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.379352666538797, "step_time": 0.9650008506774903} +{"epoch": 0, "iter": 18203, "iter_tflops": 38.06268861758735, "iter_time": 0.5420293273925781, "loss": 0.4145716428756714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.511767050058005, "step_time": 0.49699386405944823} +{"epoch": 0, "iter": 18204, "iter_tflops": 37.05623583552798, "iter_time": 0.5567509231567382, "loss": 0.35616928339004517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18144331932375, "step_time": 0.513448293685913} +{"epoch": 0, "iter": 18205, "iter_tflops": 26.146099451446567, "iter_time": 0.7890696487426757, "loss": 0.5251733064651489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.09481500316581, "step_time": 0.7343381156921387} +{"epoch": 0, "iter": 18206, "iter_tflops": 13.210131700320751, "iter_time": 1.561762893676758, "loss": 0.5398589968681335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.002067509556198, "step_time": 1.3752166824340821} +{"epoch": 0, "iter": 18207, "iter_tflops": 7.6321710836852645, "iter_time": 2.7031749267578125, "loss": 0.36958131194114685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.017365421440008, "step_time": 2.287929183959961} +{"epoch": 0, "iter": 18208, "iter_tflops": 29.80279169381452, "iter_time": 0.6922537231445312, "loss": 0.6289223432540894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.38709577936619, "step_time": 0.5518239135742188} +{"epoch": 0, "iter": 18209, "iter_tflops": 19.972582762435305, "iter_time": 0.7812127075195313, "loss": 0.2717207074165344, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 21.146739882716755, "step_time": 0.7378364486694335} +{"epoch": 0, "iter": 18210, "iter_tflops": 15.093155200999789, "iter_time": 1.0337689666748047, "loss": 0.2910168766975403, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.61854094505895, "step_time": 0.9388811874389649} +{"epoch": 0, "iter": 18211, "iter_tflops": 28.3372063286342, "iter_time": 0.5506130447387695, "loss": 0.4933904707431793, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.20777515599332, "step_time": 0.5165172004699707} +{"epoch": 0, "iter": 18212, "iter_tflops": 26.25656383830554, "iter_time": 0.5942451400756836, "loss": 0.3136070668697357, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.854412662178984, "step_time": 0.5601566848754883} +{"epoch": 0, "iter": 18213, "iter_tflops": 45.47791117986543, "iter_time": 0.4536508598327637, "loss": 0.38145944476127625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.737502258064964, "step_time": 0.4147995491027832} +{"epoch": 0, "iter": 18214, "iter_tflops": 45.52745194861289, "iter_time": 0.4531572189331054, "loss": 0.4388258755207062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.04763677066787, "step_time": 0.40415374374389645} +{"epoch": 0, "iter": 18215, "iter_tflops": 47.17006737652631, "iter_time": 0.4373768081665038, "loss": 0.4966960847377777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78576930770763, "step_time": 0.40623768806457516} +{"epoch": 0, "iter": 18216, "iter_tflops": 45.6148014002989, "iter_time": 0.45228945159912104, "loss": 0.32661908864974976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27106832236825, "step_time": 0.41872632789611813} +{"epoch": 0, "iter": 18217, "iter_tflops": 37.35093034287062, "iter_time": 0.5523582229614257, "loss": 0.10174082219600677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39014031115499, "step_time": 0.5107952919006348} +{"epoch": 0, "iter": 18218, "iter_tflops": 39.85346059480977, "iter_time": 0.517673828125, "loss": 0.11294128000736237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.89509126474373, "step_time": 0.47000912666320793} +{"epoch": 0, "iter": 18219, "iter_tflops": 47.29318316311417, "iter_time": 0.4362382087707519, "loss": 0.11405418813228607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.829937966024325, "step_time": 0.39805360221862796} +{"epoch": 0, "iter": 18220, "iter_tflops": 44.66956281391218, "iter_time": 0.4618602066040039, "loss": 0.08033691346645355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95190112259312, "step_time": 0.42145643043518066} +{"epoch": 0, "iter": 18221, "iter_tflops": 17.28126076220404, "iter_time": 1.1938419189453127, "loss": 0.05601126700639725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.51020252102512, "step_time": 1.1145795669555663} +{"epoch": 0, "iter": 18222, "iter_tflops": 22.711203168443454, "iter_time": 0.9084104156494142, "loss": 0.0731348842382431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.64760023183589, "step_time": 0.6731715812683106} +{"epoch": 0, "iter": 18223, "iter_tflops": 50.56013166255357, "iter_time": 0.4080506286621094, "loss": 0.07960103452205658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.992370887978495, "step_time": 0.3751628303527832} +{"epoch": 0, "iter": 18224, "iter_tflops": 53.09297909255986, "iter_time": 0.3885842132568359, "loss": 0.1004396453499794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.00863087444773, "step_time": 0.3556555843353272} +{"epoch": 0, "iter": 18225, "iter_tflops": 25.807498485663654, "iter_time": 0.5760692672729492, "loss": 0.004105161409825087, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.897709690950908, "step_time": 0.532907787322998} +{"epoch": 0, "iter": 18226, "iter_tflops": 6.950998537848896, "iter_time": 2.138815979003906, "loss": 0.0016861313488334417, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 8.732627251779524, "step_time": 1.7024552078247068} +{"epoch": 0, "iter": 18227, "iter_tflops": 11.682696924170326, "iter_time": 1.2725577697753907, "loss": 0.0016336439875885844, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 13.968547723386651, "step_time": 1.0643129863739014} +{"epoch": 0, "iter": 18228, "iter_tflops": 34.343013688829075, "iter_time": 0.43289464569091796, "loss": 0.0004417699237819761, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 37.97448264714439, "step_time": 0.3914972820281982} +{"epoch": 0, "iter": 18229, "iter_tflops": 21.458709986602276, "iter_time": 0.7576540145874023, "loss": 0.43824103474617004, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 23.28163877058179, "step_time": 0.6983304710388184} +{"epoch": 0, "iter": 18230, "iter_tflops": 24.859762670976576, "iter_time": 0.6539997177124023, "loss": 0.3112955689430237, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.781922719084953, "step_time": 0.6070616340637207} +{"epoch": 0, "iter": 18231, "iter_tflops": 25.743869032397615, "iter_time": 0.631539794921875, "loss": 0.34226295351982117, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.694731471961642, "step_time": 0.5870530929565431} +{"epoch": 0, "iter": 18232, "iter_tflops": 24.30220264302291, "iter_time": 0.6690042877197265, "loss": 0.34149834513664246, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.074386511763382, "step_time": 0.6235344314575195} +{"epoch": 0, "iter": 18233, "iter_tflops": 15.117266989584756, "iter_time": 1.0348276062011719, "loss": 0.038849711418151855, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 16.102918673270967, "step_time": 0.9714863204956055} +{"epoch": 0, "iter": 18234, "iter_tflops": 35.753318232320005, "iter_time": 0.43754722595214846, "loss": 0.037730034440755844, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 39.52639246396437, "step_time": 0.3957802429199219} +{"epoch": 0, "iter": 18235, "iter_tflops": 40.41461130526504, "iter_time": 0.38708191680908205, "loss": 0.12459995597600937, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 43.974900265518656, "step_time": 0.35574305152893065} +{"epoch": 0, "iter": 18236, "iter_tflops": 41.34481877771311, "iter_time": 0.3783730506896973, "loss": 0.06615189462900162, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 44.907083130978634, "step_time": 0.3483585243225098} +{"epoch": 0, "iter": 18237, "iter_tflops": 39.54532865257983, "iter_time": 0.5217074737548828, "loss": 0.25629955530166626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46241134831789, "step_time": 0.48586721420288087} +{"epoch": 0, "iter": 18238, "iter_tflops": 12.756819660524696, "iter_time": 1.6172599487304689, "loss": 0.1502864956855774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.92443984596451, "step_time": 1.3823697052001953} +{"epoch": 0, "iter": 18239, "iter_tflops": 44.62095712411303, "iter_time": 0.4623633117675781, "loss": 0.19907473027706146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24408161230672, "step_time": 0.42763988494873045} +{"epoch": 0, "iter": 18240, "iter_tflops": 48.63443393297975, "iter_time": 0.42420753860473637, "loss": 0.18495503067970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.425033145019874, "step_time": 0.39353515434265135} +{"epoch": 0, "iter": 18241, "iter_tflops": 18.976719461903436, "iter_time": 1.0871791381835938, "loss": 0.08892987668514252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.76312643551774, "step_time": 1.0439185104370117} +{"epoch": 0, "iter": 18242, "iter_tflops": 17.129407311182394, "iter_time": 1.2044254150390625, "loss": 0.11851949989795685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.626004990114097, "step_time": 1.0002467041015626} +{"epoch": 0, "iter": 18243, "iter_tflops": 48.20038434273874, "iter_time": 0.4280275726318359, "loss": 0.1253979355096817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.655835205662136, "step_time": 0.3918102035522461} +{"epoch": 0, "iter": 18244, "iter_tflops": 51.48252231548504, "iter_time": 0.40073975753784186, "loss": 0.08815296739339828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.219442012299574, "step_time": 0.3669743556976318} +{"epoch": 0, "iter": 18245, "iter_tflops": 22.63301389767633, "iter_time": 0.5704898376464844, "loss": 0.006152280140668154, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 24.242644019285123, "step_time": 0.5326112289428712} +{"epoch": 0, "iter": 18246, "iter_tflops": 7.7191732543349305, "iter_time": 1.6727056121826172, "loss": 0.004471743479371071, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 9.669915104320586, "step_time": 1.33526554107666} +{"epoch": 0, "iter": 18247, "iter_tflops": 28.54984146890482, "iter_time": 0.45225835800170905, "loss": 0.0013898491160944104, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 31.756568999202898, "step_time": 0.4065900325775146} +{"epoch": 0, "iter": 18248, "iter_tflops": 29.607137350701727, "iter_time": 0.43610783004760734, "loss": 0.002366993110626936, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 32.71887045811815, "step_time": 0.39463172912597655} +{"epoch": 0, "iter": 18249, "iter_tflops": 36.54504114801234, "iter_time": 0.5645387954711913, "loss": 0.16317187249660492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.224264014456494, "step_time": 0.5129017028808593} +{"epoch": 0, "iter": 18250, "iter_tflops": 35.008427856409654, "iter_time": 0.5893179092407226, "loss": 0.22193023562431335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73141754414593, "step_time": 0.5065154800415039} +{"epoch": 0, "iter": 18251, "iter_tflops": 39.53707967231584, "iter_time": 0.5218163223266602, "loss": 0.26119497418403625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11631865840471, "step_time": 0.47849849319458004} +{"epoch": 0, "iter": 18252, "iter_tflops": 39.72638308984717, "iter_time": 0.5193297729492187, "loss": 0.17301729321479797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59200125757471, "step_time": 0.47327704429626466} +{"epoch": 0, "iter": 18253, "iter_tflops": 19.895518814748666, "iter_time": 1.0369718780517578, "loss": 0.30178871750831604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.453927035934324, "step_time": 0.961646484375} +{"epoch": 0, "iter": 18254, "iter_tflops": 19.318337376860292, "iter_time": 1.0679538879394532, "loss": 0.3163313567638397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.389763566086057, "step_time": 0.8820565223693848} +{"epoch": 0, "iter": 18255, "iter_tflops": 42.787968257266606, "iter_time": 0.48217044067382814, "loss": 0.35861125588417053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06982248002652, "step_time": 0.4478222923278808} +{"epoch": 0, "iter": 18256, "iter_tflops": 48.66610223830589, "iter_time": 0.4239314956665039, "loss": 0.37206563353538513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76791321861143, "step_time": 0.390978006362915} +{"epoch": 0, "iter": 18257, "iter_tflops": 40.164282901075566, "iter_time": 0.513667667388916, "loss": 0.06766384094953537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.25001923963676, "step_time": 0.47701929092407225} +{"epoch": 0, "iter": 18258, "iter_tflops": 28.557617108945767, "iter_time": 0.7224375000000001, "loss": 0.0824415385723114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.57503700004497, "step_time": 0.6333406009674072} +{"epoch": 0, "iter": 18259, "iter_tflops": 41.45394535830009, "iter_time": 0.49768709182739257, "loss": 0.053269706666469574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90353165802511, "step_time": 0.44944458007812493} +{"epoch": 0, "iter": 18260, "iter_tflops": 42.824426489812936, "iter_time": 0.4817599487304688, "loss": 0.06432201713323593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23366857901748, "step_time": 0.43678787040710443} +{"epoch": 0, "iter": 18261, "iter_tflops": 26.957857808473218, "iter_time": 0.6977381668090821, "loss": 0.10193976759910583, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 29.110014474362078, "step_time": 0.646153106689453} +{"epoch": 0, "iter": 18262, "iter_tflops": 9.414447443923574, "iter_time": 1.9979426727294922, "loss": 0.08616094291210175, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 10.066838905811583, "step_time": 1.8684640197753906} +{"epoch": 0, "iter": 18263, "iter_tflops": 13.997165060963058, "iter_time": 1.3438097076416013, "loss": 0.07450823485851288, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 17.06865116318483, "step_time": 1.1019925422668457} +{"epoch": 0, "iter": 18264, "iter_tflops": 38.25126421401617, "iter_time": 0.491736068725586, "loss": 0.1496475636959076, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 42.08923669249522, "step_time": 0.4468963508605957} +{"epoch": 0, "iter": 18265, "iter_tflops": 19.514856819963438, "iter_time": 0.7827660446166992, "loss": 0.47404026985168457, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 20.882476332300183, "step_time": 0.7315017166137695} +{"epoch": 0, "iter": 18266, "iter_tflops": 8.037740846609505, "iter_time": 1.900480194091797, "loss": 0.4238618314266205, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 10.384611564886564, "step_time": 1.4709810943603514} +{"epoch": 0, "iter": 18267, "iter_tflops": 9.315470168465794, "iter_time": 1.639806365966797, "loss": 0.2454613745212555, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 10.994692176276862, "step_time": 1.389358341217041} +{"epoch": 0, "iter": 18268, "iter_tflops": 22.785537778838254, "iter_time": 0.6704062652587891, "loss": 0.3438093662261963, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.508254195207975, "step_time": 0.6232825546264648} +{"epoch": 0, "iter": 18269, "iter_tflops": 13.92232531447311, "iter_time": 1.0795836181640626, "loss": 0.3373189866542816, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 14.933938636332476, "step_time": 1.006453468322754} +{"epoch": 0, "iter": 18270, "iter_tflops": 11.919284505347097, "iter_time": 1.2610081024169921, "loss": 0.3808533549308777, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 14.403001491648094, "step_time": 1.0435543136596679} +{"epoch": 0, "iter": 18271, "iter_tflops": 27.03177065914345, "iter_time": 0.556024040222168, "loss": 0.45326003432273865, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.88168681901534, "step_time": 0.5204098510742188} +{"epoch": 0, "iter": 18272, "iter_tflops": 27.46915322997951, "iter_time": 0.5471706466674805, "loss": 0.4238761067390442, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.134749701999826, "step_time": 0.5158895988464356} +{"epoch": 0, "iter": 18273, "iter_tflops": 20.101299314863894, "iter_time": 1.026356216430664, "loss": 0.05362622067332268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.011437527684446, "step_time": 0.9818982391357421} +{"epoch": 0, "iter": 18274, "iter_tflops": 16.09704117887237, "iter_time": 1.281669921875, "loss": 0.1000719889998436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.304286206978823, "step_time": 0.9684010677337647} +{"epoch": 0, "iter": 18275, "iter_tflops": 41.42339891719547, "iter_time": 0.4980540962219238, "loss": 0.08415932208299637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80523770130665, "step_time": 0.45040904808044435} +{"epoch": 0, "iter": 18276, "iter_tflops": 44.753477977137635, "iter_time": 0.46099419403076175, "loss": 0.06485400348901749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.995797757299066, "step_time": 0.42107883644104} +{"epoch": 0, "iter": 18277, "iter_tflops": 21.2568999398144, "iter_time": 0.970559844970703, "loss": 0.4921545386314392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.615561779501373, "step_time": 0.9122520904541015} +{"epoch": 0, "iter": 18278, "iter_tflops": 23.093452432774182, "iter_time": 0.893374153137207, "loss": 0.43831440806388855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.296968227753048, "step_time": 0.7290920124053956} +{"epoch": 0, "iter": 18279, "iter_tflops": 43.38424985784784, "iter_time": 0.47554339599609374, "loss": 0.4468305706977844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.04404771210544, "step_time": 0.4385484352111816} +{"epoch": 0, "iter": 18280, "iter_tflops": 49.68643595913853, "iter_time": 0.41522586822509766, "loss": 0.4075699746608734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.00013614188246, "step_time": 0.3820563240051269} +{"epoch": 0, "iter": 18281, "iter_tflops": 1.4644780120645462, "iter_time": 1.2043192749023437, "loss": 0.12117195129394531, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 1.519722988010246, "step_time": 1.1605398559570312} +{"epoch": 0, "iter": 18282, "iter_tflops": 1.5956757289229984, "iter_time": 1.1052991943359376, "loss": 0.1354401409626007, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 1.973830058330605, "step_time": 0.893541513442993} +{"epoch": 0, "iter": 18283, "iter_tflops": 3.139176100944138, "iter_time": 0.5618350296020508, "loss": 0.13959276676177979, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 3.42450634781323, "step_time": 0.5150228729248046} +{"epoch": 0, "iter": 18284, "iter_tflops": 3.7267651763994336, "iter_time": 0.4732520065307617, "loss": 0.2508903741836548, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.07435850563102, "step_time": 0.43287773895263676} +{"epoch": 0, "iter": 18285, "iter_tflops": 15.156867560211694, "iter_time": 1.3611713256835938, "loss": 0.12857520580291748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.098364190004435, "step_time": 1.2815645904541015} +{"epoch": 0, "iter": 18286, "iter_tflops": 22.220569287336666, "iter_time": 0.9284682693481445, "loss": 0.10324525088071823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.313402602315232, "step_time": 0.6588582458496094} +{"epoch": 0, "iter": 18287, "iter_tflops": 46.71427445823554, "iter_time": 0.44164430999755855, "loss": 0.14902976155281067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60311932635473, "step_time": 0.40770398712158207} +{"epoch": 0, "iter": 18288, "iter_tflops": 47.617013767897994, "iter_time": 0.43327146911621095, "loss": 0.10838861763477325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.5916098546993, "step_time": 0.3998924160003663} +{"epoch": 0, "iter": 18289, "iter_tflops": 25.971589100033288, "iter_time": 0.7943716278076172, "loss": 0.6035864353179932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.425378454469143, "step_time": 0.7522628555297853} +{"epoch": 0, "iter": 18290, "iter_tflops": 11.311365181670553, "iter_time": 1.8239260406494138, "loss": 0.5839333534240723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.125112214984188, "step_time": 1.4605967864990235} +{"epoch": 0, "iter": 18291, "iter_tflops": 35.55150517293472, "iter_time": 0.5803156127929687, "loss": 0.6184825301170349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.705183324151825, "step_time": 0.5330317993164062} +{"epoch": 0, "iter": 18292, "iter_tflops": 35.504904917780124, "iter_time": 0.581077278137207, "loss": 0.3434886932373047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21376950625458, "step_time": 0.5398863754272462} +{"epoch": 0, "iter": 18293, "iter_tflops": 16.00657140472597, "iter_time": 1.0285370178222655, "loss": 0.0011792131699621677, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 17.080593827888496, "step_time": 0.9638629302978514} +{"epoch": 0, "iter": 18294, "iter_tflops": 15.450417632336801, "iter_time": 1.065560272216797, "loss": 0.0051466538570821285, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 18.959739468402624, "step_time": 0.8683321437835694} +{"epoch": 0, "iter": 18295, "iter_tflops": 46.120785543371824, "iter_time": 0.35696163940429687, "loss": 0.002755137160420418, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 50.61306354864025, "step_time": 0.325278694152832} +{"epoch": 0, "iter": 18296, "iter_tflops": 44.541193913042605, "iter_time": 0.36962078857421876, "loss": 0.0028382183518260717, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 48.86495235360934, "step_time": 0.3369153232574463} +{"epoch": 0, "iter": 18297, "iter_tflops": 23.06142635863905, "iter_time": 0.5933917236328125, "loss": 0.06299377232789993, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 24.747650608919784, "step_time": 0.5529599456787109} +{"epoch": 0, "iter": 18298, "iter_tflops": 16.088925172863703, "iter_time": 0.8505515060424804, "loss": 0.08873425424098969, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 18.405766052116487, "step_time": 0.7434876384735107} +{"epoch": 0, "iter": 18299, "iter_tflops": 35.30895865211618, "iter_time": 0.3875633850097656, "loss": 0.0766250491142273, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 38.52538988785006, "step_time": 0.3552062568664551} +{"epoch": 0, "iter": 18300, "iter_tflops": 33.297066602170666, "iter_time": 0.4109809341430664, "loss": 0.06351719796657562, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 36.10406083146869, "step_time": 0.37902826499938963} +{"epoch": 0, "iter": 18301, "iter_tflops": 37.11531307113904, "iter_time": 0.5558647308349609, "loss": 0.5605388879776001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86956706095358, "step_time": 0.5174646987915039} +{"epoch": 0, "iter": 18302, "iter_tflops": 47.784649494835016, "iter_time": 0.4317514877319336, "loss": 0.5221083760261536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20504154235137, "step_time": 0.3951935081481934} +{"epoch": 0, "iter": 18303, "iter_tflops": 44.58638592656591, "iter_time": 0.46272181701660153, "loss": 0.4278969466686249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21805622545539, "step_time": 0.4278707008361816} +{"epoch": 0, "iter": 18304, "iter_tflops": 43.33457846529558, "iter_time": 0.4760884780883789, "loss": 0.5119491815567017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.499992437085865, "step_time": 0.4436795024871826} +{"epoch": 0, "iter": 18305, "iter_tflops": 23.704857211752483, "iter_time": 0.46268043899536127, "loss": 0.017067935317754745, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 26.007292955284637, "step_time": 0.421719160079956} +{"epoch": 0, "iter": 18306, "iter_tflops": 5.284931100729053, "iter_time": 2.075291717529297, "loss": 0.0051983012817800045, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 5.959157184939882, "step_time": 1.8404907608032226} +{"epoch": 0, "iter": 18307, "iter_tflops": 4.55998751833533, "iter_time": 2.4052201232910155, "loss": 0.003034804482012987, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 5.502892920644721, "step_time": 1.9930923423767088} +{"epoch": 0, "iter": 18308, "iter_tflops": 27.770886862930826, "iter_time": 0.3949378280639648, "loss": 0.007356565911322832, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 30.49398807740843, "step_time": 0.3596700344085694} +{"epoch": 0, "iter": 18309, "iter_tflops": 22.652897152980056, "iter_time": 0.7340118484497071, "loss": 0.39352527260780334, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 23.90374428244135, "step_time": 0.6956021080017089} +{"epoch": 0, "iter": 18310, "iter_tflops": 11.584186747176554, "iter_time": 1.4353614349365236, "loss": 0.37893208861351013, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 15.396727630882948, "step_time": 1.079936939239502} +{"epoch": 0, "iter": 18311, "iter_tflops": 25.987135552504995, "iter_time": 0.6398356170654297, "loss": 0.35037875175476074, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 27.88950018779306, "step_time": 0.5961919288635255} +{"epoch": 0, "iter": 18312, "iter_tflops": 25.043414769462977, "iter_time": 0.6639467926025391, "loss": 0.4480111002922058, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 26.893120480317027, "step_time": 0.6182806091308594} +{"epoch": 0, "iter": 18313, "iter_tflops": 17.94702709917452, "iter_time": 1.149554931640625, "loss": 0.45726364850997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.229142437128555, "step_time": 1.0729076232910157} +{"epoch": 0, "iter": 18314, "iter_tflops": 16.202608345598392, "iter_time": 1.2733192749023436, "loss": 0.4161558151245117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.553554588746294, "step_time": 0.875922716140747} +{"epoch": 0, "iter": 18315, "iter_tflops": 43.973173491797816, "iter_time": 0.46917454147338866, "loss": 0.36837342381477356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.264027627167216, "step_time": 0.4365073089599609} +{"epoch": 0, "iter": 18316, "iter_tflops": 46.276720475055974, "iter_time": 0.44582012939453125, "loss": 0.3870784640312195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89043637754319, "step_time": 0.41352802276611333} +{"epoch": 0, "iter": 18317, "iter_tflops": 25.84186610596926, "iter_time": 0.7983592758178711, "loss": 0.4751100540161133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.220576603690827, "step_time": 0.7579227218627929} +{"epoch": 0, "iter": 18318, "iter_tflops": 15.617764976950971, "iter_time": 1.321001663208008, "loss": 0.5580964088439941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.441522416703403, "step_time": 1.118730495452881} +{"epoch": 0, "iter": 18319, "iter_tflops": 46.190832589778516, "iter_time": 0.44664909362792965, "loss": 0.4380284547805786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.02266382165765, "step_time": 0.41243492317199704} +{"epoch": 0, "iter": 18320, "iter_tflops": 40.989529344455455, "iter_time": 0.503325942993164, "loss": 0.6062666177749634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.929320710312254, "step_time": 0.4696428985595703} +{"epoch": 0, "iter": 18321, "iter_tflops": 33.07322744277861, "iter_time": 0.6238004302978516, "loss": 0.04170208051800728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.35327561903228, "step_time": 0.5835695037841797} +{"epoch": 0, "iter": 18322, "iter_tflops": 13.600996342077798, "iter_time": 1.5168810424804686, "loss": 0.020076211541891098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.06498728563606, "step_time": 1.2842271919250487} +{"epoch": 0, "iter": 18323, "iter_tflops": 42.33315492659297, "iter_time": 0.4873507194519043, "loss": 0.047399070113897324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82033098856878, "step_time": 0.44064390563964845} +{"epoch": 0, "iter": 18324, "iter_tflops": 44.93987975384854, "iter_time": 0.45908208084106444, "loss": 0.026600725948810577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.465413674223846, "step_time": 0.4170811882019043} +{"epoch": 0, "iter": 18325, "iter_tflops": 18.66899945978226, "iter_time": 1.0185903396606444, "loss": 0.15259142220020294, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 20.153090203501907, "step_time": 0.9435804786682128} +{"epoch": 0, "iter": 18326, "iter_tflops": 18.799916105189915, "iter_time": 1.011497200012207, "loss": 0.21192729473114014, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 26.32770540030686, "step_time": 0.7222833213806152} +{"epoch": 0, "iter": 18327, "iter_tflops": 43.694869321855585, "iter_time": 0.43520126724243163, "loss": 0.1550104022026062, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 47.353399131037236, "step_time": 0.4015775604248047} +{"epoch": 0, "iter": 18328, "iter_tflops": 46.30956253295869, "iter_time": 0.4106292839050293, "loss": 0.2170506715774536, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 50.07475118485554, "step_time": 0.37975350952148434} +{"epoch": 0, "iter": 18329, "iter_tflops": 32.035456816654474, "iter_time": 0.6440080947875977, "loss": 0.07243048399686813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.186762455812605, "step_time": 0.60348193359375} +{"epoch": 0, "iter": 18330, "iter_tflops": 16.00466589457521, "iter_time": 1.2890674285888672, "loss": 0.16178883612155914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.284058022550777, "step_time": 1.069852283477783} +{"epoch": 0, "iter": 18331, "iter_tflops": 45.4498975556835, "iter_time": 0.4539304733276367, "loss": 0.11804524809122086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32845144510899, "step_time": 0.41823922920227047} +{"epoch": 0, "iter": 18332, "iter_tflops": 51.09268663414294, "iter_time": 0.4037973899841309, "loss": 0.14260812103748322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.351236559806594, "step_time": 0.37273048973083495} +{"epoch": 0, "iter": 18333, "iter_tflops": 29.452660616841015, "iter_time": 0.700483184814453, "loss": 0.5922441482543945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.294207248800113, "step_time": 0.6592623786926268} +{"epoch": 0, "iter": 18334, "iter_tflops": 14.681182510286868, "iter_time": 1.405274642944336, "loss": 0.4538975656032562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.961906462115877, "step_time": 1.2163192596435546} +{"epoch": 0, "iter": 18335, "iter_tflops": 42.50019421498635, "iter_time": 0.4854352760314941, "loss": 0.635850191116333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74059743460367, "step_time": 0.45104556274414065} +{"epoch": 0, "iter": 18336, "iter_tflops": 47.8150771393104, "iter_time": 0.4314767379760742, "loss": 0.6782945990562439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.650742978534545, "step_time": 0.3994345932006836} +{"epoch": 0, "iter": 18337, "iter_tflops": 38.75025451378285, "iter_time": 0.5324118194580079, "loss": 0.6198747158050537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.962148321974446, "step_time": 0.491659610748291} +{"epoch": 0, "iter": 18338, "iter_tflops": 28.58370038853788, "iter_time": 0.7217782592773438, "loss": 0.7112685441970825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93981664264366, "step_time": 0.5904751510620118} +{"epoch": 0, "iter": 18339, "iter_tflops": 42.06989026892946, "iter_time": 0.4904004592895507, "loss": 0.4717710614204407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.203625007009926, "step_time": 0.45640351867675777} +{"epoch": 0, "iter": 18340, "iter_tflops": 37.66899400994497, "iter_time": 0.5476943054199219, "loss": 0.5824357867240906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.02179089063802, "step_time": 0.515496509552002} +{"epoch": 0, "iter": 18341, "iter_tflops": 47.47861623057808, "iter_time": 0.4345344314575196, "loss": 0.03644320368766785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39738002628805, "step_time": 0.3937428455352783} +{"epoch": 0, "iter": 18342, "iter_tflops": 15.689707730787859, "iter_time": 1.3149444122314455, "loss": 0.05028744414448738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.322629476968935, "step_time": 1.1909908676147463} +{"epoch": 0, "iter": 18343, "iter_tflops": 10.859237192356048, "iter_time": 1.8998658142089844, "loss": 0.030131777748465538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.898692569877836, "step_time": 1.484390953063965} +{"epoch": 0, "iter": 18344, "iter_tflops": 19.397735712391004, "iter_time": 1.0635825653076174, "loss": 0.03254089877009392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87310022185553, "step_time": 0.9432176189422607} +{"epoch": 0, "iter": 18345, "iter_tflops": 15.491076230978416, "iter_time": 1.0601153106689454, "loss": 0.41826555132865906, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 16.700940059948348, "step_time": 0.983317527770996} +{"epoch": 0, "iter": 18346, "iter_tflops": 12.67165207219735, "iter_time": 1.2959894256591797, "loss": 0.39928194880485535, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 14.805590435207895, "step_time": 1.1091977157592772} +{"epoch": 0, "iter": 18347, "iter_tflops": 29.014988688206337, "iter_time": 0.5659946060180664, "loss": 0.5065591931343079, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.92945029101269, "step_time": 0.5309608459472657} +{"epoch": 0, "iter": 18348, "iter_tflops": 30.25508764110706, "iter_time": 0.5427955551147461, "loss": 0.3359677493572235, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 32.138090399145774, "step_time": 0.5109926223754883} +{"epoch": 0, "iter": 18349, "iter_tflops": 26.506521472008128, "iter_time": 0.7783402862548829, "loss": 0.3800102472305298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.954723743488444, "step_time": 0.7380181503295898} +{"epoch": 0, "iter": 18350, "iter_tflops": 19.480547315653432, "iter_time": 1.0590612869262694, "loss": 0.4328039288520813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.463782654438152, "step_time": 0.700218765258789} +{"epoch": 0, "iter": 18351, "iter_tflops": 38.3937635001971, "iter_time": 0.5373553314208984, "loss": 0.45598265528678894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.118771330342895, "step_time": 0.4898313236236573} +{"epoch": 0, "iter": 18352, "iter_tflops": 41.36736463055253, "iter_time": 0.4987287368774414, "loss": 0.4848187267780304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.11890203724658, "step_time": 0.45726054000854494} +{"epoch": 0, "iter": 18353, "iter_tflops": 18.598471252471462, "iter_time": 1.1092897491455078, "loss": 0.006634692661464214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.090943395733618, "step_time": 1.0268852539062499} +{"epoch": 0, "iter": 18354, "iter_tflops": 19.712369871361194, "iter_time": 1.0466064529418946, "loss": 0.0026265291962772608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.03188636171946, "step_time": 0.6869729480743408} +{"epoch": 0, "iter": 18355, "iter_tflops": 56.70103682710383, "iter_time": 0.36385742950439454, "loss": 0.024080438539385796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.20335599297892, "step_time": 0.33167171096801756} +{"epoch": 0, "iter": 18356, "iter_tflops": 59.085792322363204, "iter_time": 0.34917181777954104, "loss": 0.01031667459756136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.83985552018895, "step_time": 0.3181853713989258} +{"epoch": 0, "iter": 18357, "iter_tflops": 33.27302731886959, "iter_time": 0.6200545959472655, "loss": 0.21536748111248016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.65661120534096, "step_time": 0.5786049995422364} +{"epoch": 0, "iter": 18358, "iter_tflops": 13.131537253726357, "iter_time": 1.5711103057861326, "loss": 0.18566618859767914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.80117950356279, "step_time": 1.3938817176818847} +{"epoch": 0, "iter": 18359, "iter_tflops": 37.40636923314119, "iter_time": 0.5515395889282226, "loss": 0.2690550982952118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11397836163433, "step_time": 0.5018024120330811} +{"epoch": 0, "iter": 18360, "iter_tflops": 40.38058752907685, "iter_time": 0.5109161300659179, "loss": 0.3360675275325775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33622856282597, "step_time": 0.46533262252807617} +{"epoch": 0, "iter": 18361, "iter_tflops": 16.251055616969108, "iter_time": 1.2695232849121094, "loss": 0.2931927442550659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.36608062342212, "step_time": 1.1880109252929687} +{"epoch": 0, "iter": 18362, "iter_tflops": 21.676554813857024, "iter_time": 0.9517699508666991, "loss": 0.2801443338394165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.44641262081555, "step_time": 0.8439313297271729} +{"epoch": 0, "iter": 18363, "iter_tflops": 43.82272192868809, "iter_time": 0.4707853050231934, "loss": 0.21136297285556793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34094771867413, "step_time": 0.43579806709289554} +{"epoch": 0, "iter": 18364, "iter_tflops": 49.53650118122868, "iter_time": 0.41648265457153316, "loss": 0.19332681596279144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51081063584661, "step_time": 0.3855500087738037} +{"epoch": 0, "iter": 18365, "iter_tflops": 30.018053051659404, "iter_time": 0.6872895278930663, "loss": 0.3620859384536743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.002477307478514, "step_time": 0.6446717643737793} +{"epoch": 0, "iter": 18366, "iter_tflops": 8.374129182121393, "iter_time": 2.4636703186035156, "loss": 0.4295143187046051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.931631362156349, "step_time": 1.887284049987793} +{"epoch": 0, "iter": 18367, "iter_tflops": 11.374736567944131, "iter_time": 1.8137645111083984, "loss": 0.348076730966568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.492563677381535, "step_time": 1.423564109802246} +{"epoch": 0, "iter": 18368, "iter_tflops": 36.85010298088522, "iter_time": 0.5598652877807617, "loss": 0.30396875739097595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.63537171645805, "step_time": 0.5077126808166503} +{"epoch": 0, "iter": 18369, "iter_tflops": 14.222257381827156, "iter_time": 1.134509796142578, "loss": 0.4119863510131836, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 15.11006419047054, "step_time": 1.0678505477905276} +{"epoch": 0, "iter": 18370, "iter_tflops": 12.468118188028667, "iter_time": 1.2941239471435546, "loss": 0.3584952652454376, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 14.842667439535116, "step_time": 1.0870883140563965} +{"epoch": 0, "iter": 18371, "iter_tflops": 28.973907392770098, "iter_time": 0.556890380859375, "loss": 0.2620200216770172, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.84590179488979, "step_time": 0.5230934867858886} +{"epoch": 0, "iter": 18372, "iter_tflops": 29.986703939673788, "iter_time": 0.5380814895629883, "loss": 0.34831446409225464, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.777707128221458, "step_time": 0.5077550201416016} +{"epoch": 0, "iter": 18373, "iter_tflops": 39.607926923116686, "iter_time": 0.520882942199707, "loss": 0.09914874285459518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.905407973746414, "step_time": 0.48085065460205073} +{"epoch": 0, "iter": 18374, "iter_tflops": 24.247459719335406, "iter_time": 0.8508558731079101, "loss": 0.048251062631607056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.882908017387177, "step_time": 0.6903977851867676} +{"epoch": 0, "iter": 18375, "iter_tflops": 50.33457851790406, "iter_time": 0.409879135131836, "loss": 0.0899893045425415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.40581877454334, "step_time": 0.3792074813842773} +{"epoch": 0, "iter": 18376, "iter_tflops": 52.49379193601342, "iter_time": 0.39301968383789065, "loss": 0.1297127604484558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.171962209723866, "step_time": 0.36086033630371095} +{"epoch": 0, "iter": 18377, "iter_tflops": 31.877323272088713, "iter_time": 0.6472028198242188, "loss": 0.6075235605239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.842147891164025, "step_time": 0.6096271896362305} +{"epoch": 0, "iter": 18378, "iter_tflops": 16.322541144493776, "iter_time": 1.263963333129883, "loss": 0.44809216260910034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.04638547203306, "step_time": 1.0832025604248046} +{"epoch": 0, "iter": 18379, "iter_tflops": 47.166951591853916, "iter_time": 0.43740570068359375, "loss": 0.6129561066627502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13723343608456, "step_time": 0.40344563293457036} +{"epoch": 0, "iter": 18380, "iter_tflops": 43.517635775409644, "iter_time": 0.4740858078002929, "loss": 0.5259250402450562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90457593870452, "step_time": 0.43985246849060056} +{"epoch": 0, "iter": 18381, "iter_tflops": 41.00108973351258, "iter_time": 0.5031840286254883, "loss": 0.012995482422411442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35186317101109, "step_time": 0.465168586730957} +{"epoch": 0, "iter": 18382, "iter_tflops": 25.723195190805733, "iter_time": 0.8020424118041992, "loss": 0.028080599382519722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.022619446436757, "step_time": 0.7108625583648682} +{"epoch": 0, "iter": 18383, "iter_tflops": 54.93883179230074, "iter_time": 0.3755284347534179, "loss": 0.054322730749845505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.053225233839235, "step_time": 0.343546802520752} +{"epoch": 0, "iter": 18384, "iter_tflops": 57.80716696179075, "iter_time": 0.3568950805664063, "loss": 0.023242643103003502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.51240742268964, "step_time": 0.3248356399536133} +{"epoch": 0, "iter": 18385, "iter_tflops": 39.551174462170266, "iter_time": 0.5216303634643554, "loss": 0.4607418179512024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.001357899082606, "step_time": 0.4797777214050293} +{"epoch": 0, "iter": 18386, "iter_tflops": 30.969661002455613, "iter_time": 0.6661711120605469, "loss": 0.46718817949295044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.395987696239224, "step_time": 0.599811050415039} +{"epoch": 0, "iter": 18387, "iter_tflops": 37.215134806620235, "iter_time": 0.5543737411499023, "loss": 0.39101824164390564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50280273074714, "step_time": 0.5093744659423828} +{"epoch": 0, "iter": 18388, "iter_tflops": 36.936724964190226, "iter_time": 0.5585523223876953, "loss": 0.3534610867500305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09683556797258, "step_time": 0.5145317134857178} +{"epoch": 0, "iter": 18389, "iter_tflops": 15.036230287313796, "iter_time": 1.3720921478271486, "loss": 0.5796499252319336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.9968989640054, "step_time": 1.2896933059692384} +{"epoch": 0, "iter": 18390, "iter_tflops": 26.41540366616663, "iter_time": 0.7810251083374025, "loss": 0.4492276906967163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.488971089751985, "step_time": 0.6160563564300536} +{"epoch": 0, "iter": 18391, "iter_tflops": 49.62847700925213, "iter_time": 0.4157107925415039, "loss": 0.5105103254318237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82872377906654, "step_time": 0.38327294540405277} +{"epoch": 0, "iter": 18392, "iter_tflops": 44.41313370333065, "iter_time": 0.46452685928344734, "loss": 0.7198034524917603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91497038716028, "step_time": 0.4305771942138672} +{"epoch": 0, "iter": 18393, "iter_tflops": 44.43532382801185, "iter_time": 0.4642948837280274, "loss": 0.138591468334198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.29125361374658, "step_time": 0.4272221565246582} +{"epoch": 0, "iter": 18394, "iter_tflops": 11.009711895604203, "iter_time": 1.8738994903564452, "loss": 0.1305706650018692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.97890284780115, "step_time": 1.4758735885620118} +{"epoch": 0, "iter": 18395, "iter_tflops": 15.98612580627416, "iter_time": 1.2905624389648438, "loss": 0.08503582328557968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.53253960713212, "step_time": 1.0047998886108398} +{"epoch": 0, "iter": 18396, "iter_tflops": 11.723500508826405, "iter_time": 1.7598065948486328, "loss": 0.14286379516124725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.807644989197122, "step_time": 1.494179023742676} +{"epoch": 0, "iter": 18397, "iter_tflops": 19.361235810581707, "iter_time": 0.7699793395996094, "loss": 0.3629685938358307, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 20.749955943525624, "step_time": 0.7184473838806154} +{"epoch": 0, "iter": 18398, "iter_tflops": 21.058060810489962, "iter_time": 0.7079356307983398, "loss": 0.3361966609954834, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 22.64874146720025, "step_time": 0.6582154502868653} +{"epoch": 0, "iter": 18399, "iter_tflops": 23.086582622834648, "iter_time": 0.6457322769165039, "loss": 0.23693454265594482, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.77594285919911, "step_time": 0.6017026939392089} +{"epoch": 0, "iter": 18400, "iter_tflops": 22.66359907065884, "iter_time": 0.6577839431762695, "loss": 0.41905808448791504, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.22072021095922, "step_time": 0.6154958000183106} +{"epoch": 0, "iter": 18401, "iter_tflops": 23.754369779454283, "iter_time": 0.868517822265625, "loss": 0.06411488354206085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.479035118090245, "step_time": 0.8097282104492186} +{"epoch": 0, "iter": 18402, "iter_tflops": 10.782656705220651, "iter_time": 1.9133590240478517, "loss": 0.04954669997096062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.922596084661372, "step_time": 1.730419563293457} +{"epoch": 0, "iter": 18403, "iter_tflops": 13.474825154778463, "iter_time": 1.5310843200683595, "loss": 0.03713482245802879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.13476179785052, "step_time": 1.2786735725402834} +{"epoch": 0, "iter": 18404, "iter_tflops": 39.89557192311464, "iter_time": 0.5171274032592774, "loss": 0.06064744293689728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77079326235438, "step_time": 0.4713438339233399} +{"epoch": 0, "iter": 18405, "iter_tflops": 13.377887373548484, "iter_time": 1.1296287689208984, "loss": 0.3298947513103485, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.197747551214203, "step_time": 1.0643974609375} +{"epoch": 0, "iter": 18406, "iter_tflops": 9.674806796449449, "iter_time": 1.5619998168945313, "loss": 0.27447450160980225, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 11.648858642606031, "step_time": 1.2972984657287596} +{"epoch": 0, "iter": 18407, "iter_tflops": 26.99942561964961, "iter_time": 0.5597173309326172, "loss": 0.4787933826446533, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.791113992619078, "step_time": 0.52488578414917} +{"epoch": 0, "iter": 18408, "iter_tflops": 25.87302004864508, "iter_time": 0.5840851364135743, "loss": 0.362464040517807, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.60338145499999, "step_time": 0.5474708404541015} +{"epoch": 0, "iter": 18409, "iter_tflops": 2.279978399536842, "iter_time": 0.7218585433959961, "loss": 0.1976957470178604, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 2.423422273258115, "step_time": 0.6791312866210937} +{"epoch": 0, "iter": 18410, "iter_tflops": 1.1189884576570892, "iter_time": 1.4708122100830077, "loss": 0.05781159549951553, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 1.3042521952945754, "step_time": 1.2618892974853515} +{"epoch": 0, "iter": 18411, "iter_tflops": 3.0405219710578786, "iter_time": 0.5412958374023438, "loss": 0.05276135355234146, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 3.3457117116772883, "step_time": 0.4919198150634766} +{"epoch": 0, "iter": 18412, "iter_tflops": 3.294325514551512, "iter_time": 0.4995929756164551, "loss": 0.19884063303470612, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 3.5991305691192768, "step_time": 0.45728318405151364} +{"epoch": 0, "iter": 18413, "iter_tflops": 20.99528967231572, "iter_time": 0.982653434753418, "loss": 0.4373932480812073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.331913968258203, "step_time": 0.9238390197753905} +{"epoch": 0, "iter": 18414, "iter_tflops": 18.41255484264532, "iter_time": 1.1204905395507814, "loss": 0.45265817642211914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.572219390014418, "step_time": 0.8396105041503906} +{"epoch": 0, "iter": 18415, "iter_tflops": 37.37824173201928, "iter_time": 0.5519546279907227, "loss": 0.5613361597061157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71808143656519, "step_time": 0.50668137550354} +{"epoch": 0, "iter": 18416, "iter_tflops": 36.94613931827846, "iter_time": 0.5584099960327149, "loss": 0.45444798469543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32682464135916, "step_time": 0.5115972728729248} +{"epoch": 0, "iter": 18417, "iter_tflops": 21.38210284136997, "iter_time": 0.9648767318725586, "loss": 0.5151981711387634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.970976613321394, "step_time": 0.8981374130249024} +{"epoch": 0, "iter": 18418, "iter_tflops": 17.813185200456832, "iter_time": 1.1581922760009764, "loss": 0.4301987886428833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.23053975685075, "step_time": 0.971764907836914} +{"epoch": 0, "iter": 18419, "iter_tflops": 44.655214738711564, "iter_time": 0.4620086059570313, "loss": 0.545516312122345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.11510979359118, "step_time": 0.4287861671447754} +{"epoch": 0, "iter": 18420, "iter_tflops": 45.30042177028258, "iter_time": 0.45542828750610354, "loss": 0.5277827382087708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76124144374114, "step_time": 0.4231043529510498} +{"epoch": 0, "iter": 18421, "iter_tflops": 43.67840894194449, "iter_time": 0.4723407745361328, "loss": 0.3128136694431305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.686617296601604, "step_time": 0.43263906478881836} +{"epoch": 0, "iter": 18422, "iter_tflops": 45.051227537866, "iter_time": 0.45794742202758787, "loss": 0.23701195418834686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.55604133051272, "step_time": 0.4163184337615967} +{"epoch": 0, "iter": 18423, "iter_tflops": 50.60481582601175, "iter_time": 0.40769031906127934, "loss": 0.34142738580703735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.774241756228044, "step_time": 0.376656852722168} +{"epoch": 0, "iter": 18424, "iter_tflops": 50.73193668781197, "iter_time": 0.40666875457763674, "loss": 0.3522798418998718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69985031116619, "step_time": 0.37716910362243655} +{"epoch": 0, "iter": 18425, "iter_tflops": 21.520926626072722, "iter_time": 0.6661587448120116, "loss": 0.001385347917675972, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 22.900129865017927, "step_time": 0.6260380859375001} +{"epoch": 0, "iter": 18426, "iter_tflops": 10.223153311329447, "iter_time": 1.4023416290283204, "loss": 0.0022483342327177525, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 14.245800220603376, "step_time": 1.0063564872741697} +{"epoch": 0, "iter": 18427, "iter_tflops": 37.63377838452991, "iter_time": 0.3809437713623047, "loss": 0.01230789814144373, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 41.5758451006235, "step_time": 0.3448241024017334} +{"epoch": 0, "iter": 18428, "iter_tflops": 42.85827462844607, "iter_time": 0.33450608062744136, "loss": 0.0037196569610387087, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 47.26114213015287, "step_time": 0.3033433561325073} +{"epoch": 0, "iter": 18429, "iter_tflops": 29.848899789983754, "iter_time": 0.6911843872070313, "loss": 0.0027981859166175127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.695116690481775, "step_time": 0.6509234123229981} +{"epoch": 0, "iter": 18430, "iter_tflops": 16.8734943353038, "iter_time": 1.2226924133300783, "loss": 0.001736941048875451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.10649473391761, "step_time": 1.0260910110473633} +{"epoch": 0, "iter": 18431, "iter_tflops": 55.48482646372413, "iter_time": 0.3718330726623535, "loss": 0.013646852225065231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.00886229895948, "step_time": 0.3381655178070068} +{"epoch": 0, "iter": 18432, "iter_tflops": 54.041786949532344, "iter_time": 0.3817618675231934, "loss": 0.009293554350733757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.44139334762778, "step_time": 0.34708293914794924} +{"epoch": 0, "iter": 18433, "iter_tflops": 33.15915037019818, "iter_time": 0.6221840209960937, "loss": 0.10837779939174652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.41394608753432, "step_time": 0.5825697441101074} +{"epoch": 0, "iter": 18434, "iter_tflops": 16.49445000161089, "iter_time": 1.250790023803711, "loss": 0.09403084963560104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.354271313858824, "step_time": 1.0659710807800293} +{"epoch": 0, "iter": 18435, "iter_tflops": 39.60864688868308, "iter_time": 0.5208734741210937, "loss": 0.1175675243139267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41545994911354, "step_time": 0.4752015419006348} +{"epoch": 0, "iter": 18436, "iter_tflops": 46.149853374641324, "iter_time": 0.4470457000732422, "loss": 0.11269357055425644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30058207158818, "step_time": 0.4101561584472656} +{"epoch": 0, "iter": 18437, "iter_tflops": 23.028063273432533, "iter_time": 0.8959109268188477, "loss": 0.5593151450157166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.639742486807926, "step_time": 0.8373096237182617} +{"epoch": 0, "iter": 18438, "iter_tflops": 7.596080164329964, "iter_time": 2.7160184020996097, "loss": 0.4941311776638031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.967975317818118, "step_time": 2.300529693603516} +{"epoch": 0, "iter": 18439, "iter_tflops": 13.538673156124883, "iter_time": 1.52386376953125, "loss": 0.47938790917396545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.329222367585267, "step_time": 1.1905377559661865} +{"epoch": 0, "iter": 18440, "iter_tflops": 34.675987983084944, "iter_time": 0.5949677200317384, "loss": 0.5217454433441162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82634830768545, "step_time": 0.5454159450531005} +{"epoch": 0, "iter": 18441, "iter_tflops": 18.466111778002066, "iter_time": 0.7984572372436524, "loss": 0.2113286554813385, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 20.209471937605464, "step_time": 0.7295787162780761} +{"epoch": 0, "iter": 18442, "iter_tflops": 21.91229513454864, "iter_time": 0.6728825302124023, "loss": 0.23423129320144653, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.59319304989172, "step_time": 0.6249429893493652} +{"epoch": 0, "iter": 18443, "iter_tflops": 22.48207787793655, "iter_time": 0.655829086303711, "loss": 0.40771955251693726, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.26193087190868, "step_time": 0.6077175254821777} +{"epoch": 0, "iter": 18444, "iter_tflops": 20.200272562655076, "iter_time": 0.7299109725952149, "loss": 0.2464190274477005, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 21.72198712389557, "step_time": 0.6787777061462401} +{"epoch": 0, "iter": 18445, "iter_tflops": 26.46175117234233, "iter_time": 0.7796571502685548, "loss": 0.0271599180996418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.507932895353164, "step_time": 0.723696578979492} +{"epoch": 0, "iter": 18446, "iter_tflops": 10.828938941197418, "iter_time": 1.905181442260742, "loss": 0.02296588011085987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.359226947208292, "step_time": 1.6692867279052732} +{"epoch": 0, "iter": 18447, "iter_tflops": 33.45703943933543, "iter_time": 0.6166443252563476, "loss": 0.030560551211237907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.53757256066185, "step_time": 0.5496118183135987} +{"epoch": 0, "iter": 18448, "iter_tflops": 49.18535736638408, "iter_time": 0.4194560050964356, "loss": 0.013778118416666985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42644680461365, "step_time": 0.3861588172912597} +{"epoch": 0, "iter": 18449, "iter_tflops": 22.78464391939892, "iter_time": 0.6901858520507813, "loss": 0.4436398148536682, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.19342631845376, "step_time": 0.6499963531494141} +{"epoch": 0, "iter": 18450, "iter_tflops": 8.88425100585862, "iter_time": 1.7700579223632813, "loss": 0.3029533326625824, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.127597162661125, "step_time": 1.4132106552124024} +{"epoch": 0, "iter": 18451, "iter_tflops": 25.24832556281537, "iter_time": 0.622838882446289, "loss": 0.28261062502861023, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 27.10892611427552, "step_time": 0.5800908088684081} +{"epoch": 0, "iter": 18452, "iter_tflops": 25.261542915327485, "iter_time": 0.6225130004882813, "loss": 0.2993530035018921, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 27.0915406891706, "step_time": 0.5804630699157713} +{"epoch": 0, "iter": 18453, "iter_tflops": 26.939008528939464, "iter_time": 0.7658445739746093, "loss": 0.09176982194185257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.29980552270219, "step_time": 0.7041375579833984} +{"epoch": 0, "iter": 18454, "iter_tflops": 10.219331969128527, "iter_time": 2.0188299560546876, "loss": 0.12085224688053131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.33726032063313, "step_time": 1.6722589111328123} +{"epoch": 0, "iter": 18455, "iter_tflops": 9.836722708377978, "iter_time": 2.0973543853759766, "loss": 0.12421821057796478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.876288573879725, "step_time": 1.7371667404174804} +{"epoch": 0, "iter": 18456, "iter_tflops": 42.259413684287516, "iter_time": 0.4882011299133301, "loss": 0.10749085992574692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02932771131472, "step_time": 0.44821626853942875} +{"epoch": 0, "iter": 18457, "iter_tflops": 19.49539712670393, "iter_time": 0.8087336044311523, "loss": 0.24560822546482086, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 20.56016486595697, "step_time": 0.7668509902954102} +{"epoch": 0, "iter": 18458, "iter_tflops": 8.8052805567678, "iter_time": 1.7905826721191405, "loss": 0.3810325562953949, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 11.157621323787989, "step_time": 1.4130774230957033} +{"epoch": 0, "iter": 18459, "iter_tflops": 7.837210293257927, "iter_time": 2.011759567260742, "loss": 0.5213241577148438, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 9.636247089499909, "step_time": 1.636174606323242} +{"epoch": 0, "iter": 18460, "iter_tflops": 20.843342423406547, "iter_time": 0.7564325561523437, "loss": 0.26777151226997375, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 25.55291037754266, "step_time": 0.617017105102539} +{"epoch": 0, "iter": 18461, "iter_tflops": 26.677146745393237, "iter_time": 0.6402213668823242, "loss": 0.19923987984657288, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 28.56842798684522, "step_time": 0.5978375625610352} +{"epoch": 0, "iter": 18462, "iter_tflops": 28.67324849121432, "iter_time": 0.5956520538330079, "loss": 0.3969906270503998, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 30.486027843015304, "step_time": 0.5602330169677734} +{"epoch": 0, "iter": 18463, "iter_tflops": 29.07722038466011, "iter_time": 0.5873766174316407, "loss": 0.4282773435115814, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 31.140442951878395, "step_time": 0.5484597434997559} +{"epoch": 0, "iter": 18464, "iter_tflops": 29.73424931978507, "iter_time": 0.5743975296020508, "loss": 0.3554314374923706, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 31.695909931286277, "step_time": 0.5388480529785156} +{"epoch": 0, "iter": 18465, "iter_tflops": 31.14491440950998, "iter_time": 0.6624225463867187, "loss": 0.597373902797699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.47168750509622, "step_time": 0.6163744659423828} +{"epoch": 0, "iter": 18466, "iter_tflops": 14.811475001935378, "iter_time": 1.392912826538086, "loss": 0.515049159526825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.504308652564607, "step_time": 1.1786294403076174} +{"epoch": 0, "iter": 18467, "iter_tflops": 30.237983324502647, "iter_time": 0.682290657043457, "loss": 0.4167555272579193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.27319906488351, "step_time": 0.6200513954162598} +{"epoch": 0, "iter": 18468, "iter_tflops": 35.092152288376866, "iter_time": 0.5879118881225586, "loss": 0.4318434000015259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.053496199891775, "step_time": 0.5421602630615234} +{"epoch": 0, "iter": 18469, "iter_tflops": 17.241417998428084, "iter_time": 1.1966007385253907, "loss": 0.07259813696146011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.356854797535302, "step_time": 1.1238904342651368} +{"epoch": 0, "iter": 18470, "iter_tflops": 18.481567321552795, "iter_time": 1.1163064880371094, "loss": 0.05967569723725319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.22956987593415, "step_time": 0.9718093032836914} +{"epoch": 0, "iter": 18471, "iter_tflops": 49.86854468800499, "iter_time": 0.4137095565795899, "loss": 0.06081797927618027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2835906879776, "step_time": 0.3800613269805908} +{"epoch": 0, "iter": 18472, "iter_tflops": 52.297681756252594, "iter_time": 0.3944934616088867, "loss": 0.0715353861451149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.94160774568455, "step_time": 0.3623201789855957} +{"epoch": 0, "iter": 18473, "iter_tflops": 30.469627131358255, "iter_time": 0.6771035766601562, "loss": 0.401822566986084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.470581399091756, "step_time": 0.6353780136108399} +{"epoch": 0, "iter": 18474, "iter_tflops": 20.963576160727083, "iter_time": 0.9841399841308592, "loss": 0.42297250032424927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.50025770759117, "step_time": 0.7785242595672607} +{"epoch": 0, "iter": 18475, "iter_tflops": 50.3165863249348, "iter_time": 0.41002569961547847, "loss": 0.43904268741607666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.54561339065435, "step_time": 0.37823561286926266} +{"epoch": 0, "iter": 18476, "iter_tflops": 47.61610277800316, "iter_time": 0.43327975845336913, "loss": 0.4474528133869171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.476600019738434, "step_time": 0.40078586196899413} +{"epoch": 0, "iter": 18477, "iter_tflops": 17.270490537159482, "iter_time": 0.5836746292114258, "loss": 0.0018883366137742996, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 18.48134829536658, "step_time": 0.5454335365295411} +{"epoch": 0, "iter": 18478, "iter_tflops": 6.2878257294347595, "iter_time": 1.603153076171875, "loss": 0.005953288171440363, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 7.410708417046488, "step_time": 1.3602406940460203} +{"epoch": 0, "iter": 18479, "iter_tflops": 27.007746837074276, "iter_time": 0.3732391014099121, "loss": 0.001422915724106133, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 29.756750548177205, "step_time": 0.3387583312988281} +{"epoch": 0, "iter": 18480, "iter_tflops": 26.020395875175307, "iter_time": 0.3874017601013184, "loss": 0.013189089484512806, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 28.731369740751955, "step_time": 0.35084812355041506} +{"epoch": 0, "iter": 18481, "iter_tflops": 37.46881588005932, "iter_time": 0.5506203765869141, "loss": 0.22542400658130646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52664443993985, "step_time": 0.5090748023986816} +{"epoch": 0, "iter": 18482, "iter_tflops": 45.053742404443035, "iter_time": 0.4579218597412109, "loss": 0.23406819999217987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.997991015649234, "step_time": 0.4210599880218506} +{"epoch": 0, "iter": 18483, "iter_tflops": 44.47593029253742, "iter_time": 0.4638709831237793, "loss": 0.2076977789402008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.956120464763224, "step_time": 0.43020772552490233} +{"epoch": 0, "iter": 18484, "iter_tflops": 48.343005110383416, "iter_time": 0.4267648124694824, "loss": 0.21272331476211548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.54155923746377, "step_time": 0.39266237640380863} +{"epoch": 0, "iter": 18485, "iter_tflops": 37.25406083748574, "iter_time": 0.5537944869995116, "loss": 0.6347871422767639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85058864090846, "step_time": 0.5177111358642578} +{"epoch": 0, "iter": 18486, "iter_tflops": 14.993680726074706, "iter_time": 1.3759859161376953, "loss": 0.774785041809082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.86220303965534, "step_time": 1.0937796325683593} +{"epoch": 0, "iter": 18487, "iter_tflops": 36.14419764371565, "iter_time": 0.5707995986938477, "loss": 0.8370118737220764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53905588777982, "step_time": 0.5217902412414551} +{"epoch": 0, "iter": 18488, "iter_tflops": 37.24882761665469, "iter_time": 0.5538722915649414, "loss": 0.6488223075866699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.41523859901362, "step_time": 0.5104780826568603} +{"epoch": 0, "iter": 18489, "iter_tflops": 22.872994561122944, "iter_time": 0.5627287139892578, "loss": 0.018737131729722023, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 25.502880970619398, "step_time": 0.5046994819641113} +{"epoch": 0, "iter": 18490, "iter_tflops": 23.824436396955225, "iter_time": 0.5402558364868165, "loss": 0.04949905723333359, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 26.494026521259116, "step_time": 0.48581859779357917} +{"epoch": 0, "iter": 18491, "iter_tflops": 26.70061700919345, "iter_time": 0.4820596771240234, "loss": 0.022303113713860512, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 29.547919577194097, "step_time": 0.43560734558105474} +{"epoch": 0, "iter": 18492, "iter_tflops": 29.208238566585976, "iter_time": 0.4406732978820801, "loss": 0.0245401319116354, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 32.069533876220824, "step_time": 0.4013557186126709} +{"epoch": 0, "iter": 18493, "iter_tflops": 3.0917264229111687, "iter_time": 0.8377289276123046, "loss": 0.009644192643463612, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.3105565157865007, "step_time": 0.7823544616699218} +{"epoch": 0, "iter": 18494, "iter_tflops": 1.0336242490320389, "iter_time": 2.505773895263672, "loss": 0.020531008020043373, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 1.1083754607567546, "step_time": 2.3367791442871098} +{"epoch": 0, "iter": 18495, "iter_tflops": 2.1262162139918823, "iter_time": 1.2181398315429686, "loss": 0.008536385372281075, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 2.5394141259726037, "step_time": 1.0199315795898436} +{"epoch": 0, "iter": 18496, "iter_tflops": 2.7605579180592934, "iter_time": 0.9382265243530273, "loss": 0.010587392374873161, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.715611739289022, "step_time": 0.697066551208496} +{"epoch": 0, "iter": 18497, "iter_tflops": 17.368081200071497, "iter_time": 0.8489366455078124, "loss": 0.31285783648490906, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 18.340761535280418, "step_time": 0.8039143066406249} +{"epoch": 0, "iter": 18498, "iter_tflops": 7.051303681794517, "iter_time": 2.0910176696777345, "loss": 0.2698628902435303, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 8.564036874309387, "step_time": 1.7216647720336915} +{"epoch": 0, "iter": 18499, "iter_tflops": 10.43232594682915, "iter_time": 1.4133377990722655, "loss": 0.21651673316955566, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.108102855610385, "step_time": 1.2177300415039063} +{"epoch": 0, "iter": 18500, "iter_tflops": 13.467980245512123, "iter_time": 1.0947744445800782, "loss": 0.355314165353775, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 16.51719634779727, "step_time": 0.892669692993164} +{"epoch": 0, "iter": 18501, "iter_tflops": 20.537589233462214, "iter_time": 0.8436213073730469, "loss": 0.3410351574420929, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 22.123710419212493, "step_time": 0.7831393356323243} +{"epoch": 0, "iter": 18502, "iter_tflops": 10.498710257700356, "iter_time": 1.650292984008789, "loss": 0.2743881940841675, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 11.888477640542858, "step_time": 1.4573731307983397} +{"epoch": 0, "iter": 18503, "iter_tflops": 8.69135392366849, "iter_time": 1.9934693756103514, "loss": 0.39048442244529724, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 10.107079706382851, "step_time": 1.7142387695312498} +{"epoch": 0, "iter": 18504, "iter_tflops": 18.711775627996797, "iter_time": 0.925938201904297, "loss": 0.2494458109140396, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 23.24674874860879, "step_time": 0.745306282043457} +{"epoch": 0, "iter": 18505, "iter_tflops": 14.698682274599111, "iter_time": 1.094948760986328, "loss": 0.2609049379825592, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.897638109973489, "step_time": 1.0123707580566406} +{"epoch": 0, "iter": 18506, "iter_tflops": 13.069014208818952, "iter_time": 1.2314856872558593, "loss": 0.4060797095298767, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.568352401334764, "step_time": 1.0337833786010742} +{"epoch": 0, "iter": 18507, "iter_tflops": 25.43622419637678, "iter_time": 0.6327316436767578, "loss": 0.2990495264530182, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 27.380713240022047, "step_time": 0.5877971038818359} +{"epoch": 0, "iter": 18508, "iter_tflops": 22.100732605732492, "iter_time": 0.7282249069213866, "loss": 0.2790222465991974, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 23.745913096299834, "step_time": 0.6777715339660645} +{"epoch": 0, "iter": 18509, "iter_tflops": 20.00816223377782, "iter_time": 1.0311338577270508, "loss": 0.054228171706199646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.560014271840817, "step_time": 0.9569146499633789} +{"epoch": 0, "iter": 18510, "iter_tflops": 33.740159825164305, "iter_time": 0.6114699401855469, "loss": 0.0433516763150692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44315527163963, "step_time": 0.5101257152557372} +{"epoch": 0, "iter": 18511, "iter_tflops": 41.58890129588265, "iter_time": 0.4960720977783203, "loss": 0.1417897343635559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83928133346807, "step_time": 0.45007454109191897} +{"epoch": 0, "iter": 18512, "iter_tflops": 43.60138720639773, "iter_time": 0.47317516326904296, "loss": 0.06600493937730789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.847023156438254, "step_time": 0.4311886539459228} +{"epoch": 0, "iter": 18513, "iter_tflops": 26.514645948192868, "iter_time": 0.778101791381836, "loss": 0.162720188498497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.31530560370988, "step_time": 0.7286198425292968} +{"epoch": 0, "iter": 18514, "iter_tflops": 11.296195579214219, "iter_time": 1.8263753814697266, "loss": 0.28936678171157837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.073780496443126, "step_time": 1.4659240646362302} +{"epoch": 0, "iter": 18515, "iter_tflops": 11.50808672935781, "iter_time": 1.7927474822998049, "loss": 0.21224066615104675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.101619367014125, "step_time": 1.4630300941467287} +{"epoch": 0, "iter": 18516, "iter_tflops": 37.19479928052087, "iter_time": 0.5546768341064454, "loss": 0.16052421927452087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.63273275615442, "step_time": 0.5077456550598145} +{"epoch": 0, "iter": 18517, "iter_tflops": 26.7410094544028, "iter_time": 0.6095238494873048, "loss": 0.32180488109588623, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 28.836894453847457, "step_time": 0.565223243713379} +{"epoch": 0, "iter": 18518, "iter_tflops": 23.81125064054108, "iter_time": 0.6845202407836914, "loss": 0.49791574478149414, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 25.705626870116973, "step_time": 0.634074520111084} +{"epoch": 0, "iter": 18519, "iter_tflops": 25.501251054995606, "iter_time": 0.6391562118530273, "loss": 0.33539292216300964, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.4019414626986, "step_time": 0.5948221969604492} +{"epoch": 0, "iter": 18520, "iter_tflops": 25.986804287714182, "iter_time": 0.6272138290405274, "loss": 0.4138050079345703, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.888001483290797, "step_time": 0.5844550399780273} +{"epoch": 0, "iter": 18521, "iter_tflops": 23.663724400729212, "iter_time": 0.8718447341918946, "loss": 0.0857611894607544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.81670764099672, "step_time": 0.7991372795104981} +{"epoch": 0, "iter": 18522, "iter_tflops": 20.651249244107408, "iter_time": 0.9990239944458008, "loss": 0.02790186181664467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.375158705812307, "step_time": 0.8826076335906983} +{"epoch": 0, "iter": 18523, "iter_tflops": 42.02181389322492, "iter_time": 0.4909615173339843, "loss": 0.02189556695520878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35891865993018, "step_time": 0.4450296535491943} +{"epoch": 0, "iter": 18524, "iter_tflops": 44.9504617061013, "iter_time": 0.458974006652832, "loss": 0.018078641965985298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.581248697698285, "step_time": 0.41610677528381346} +{"epoch": 0, "iter": 18525, "iter_tflops": 17.99697471944533, "iter_time": 1.146364532470703, "loss": 0.24613253772258759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.091660039069495, "step_time": 1.080633819580078} +{"epoch": 0, "iter": 18526, "iter_tflops": 13.93359787699053, "iter_time": 1.480672378540039, "loss": 0.17539146542549133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.15450841538029, "step_time": 1.1364170837402343} +{"epoch": 0, "iter": 18527, "iter_tflops": 45.16742000541552, "iter_time": 0.456769359588623, "loss": 0.3460124135017395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93936930512938, "step_time": 0.4215643520355225} +{"epoch": 0, "iter": 18528, "iter_tflops": 48.52266728022699, "iter_time": 0.4251846542358398, "loss": 0.22148311138153076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34462175086299, "step_time": 0.3941396999359131} +{"epoch": 0, "iter": 18529, "iter_tflops": 38.94205272340249, "iter_time": 0.5297895736694336, "loss": 0.5301896333694458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45587299219323, "step_time": 0.4859420394897461} +{"epoch": 0, "iter": 18530, "iter_tflops": 38.67457714479772, "iter_time": 0.5334536285400391, "loss": 0.3908713757991791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.3877185181693, "step_time": 0.4867233772277832} +{"epoch": 0, "iter": 18531, "iter_tflops": 37.47000740808959, "iter_time": 0.5506028671264649, "loss": 0.5695408582687378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14362819269525, "step_time": 0.5014407920837401} +{"epoch": 0, "iter": 18532, "iter_tflops": 38.19967323907716, "iter_time": 0.5400856018066406, "loss": 0.49372801184654236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6972866522019, "step_time": 0.49478263854980464} +{"epoch": 0, "iter": 18533, "iter_tflops": 33.115179746357455, "iter_time": 0.6230101623535156, "loss": 0.1140982061624527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.886757803393785, "step_time": 0.5593089427947998} +{"epoch": 0, "iter": 18534, "iter_tflops": 47.62526457195128, "iter_time": 0.43319640731811526, "loss": 0.09681463241577148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71473435161292, "step_time": 0.398940336227417} +{"epoch": 0, "iter": 18535, "iter_tflops": 52.08805740941295, "iter_time": 0.39608106994628905, "loss": 0.07177464663982391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.72849169689041, "step_time": 0.3636813335418701} +{"epoch": 0, "iter": 18536, "iter_tflops": 53.012425617856906, "iter_time": 0.3891746749877929, "loss": 0.0954737439751625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.395900721628756, "step_time": 0.3594523868560791} +{"epoch": 0, "iter": 18537, "iter_tflops": 37.19355459441834, "iter_time": 0.5546953964233398, "loss": 0.5656126141548157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05579142961085, "step_time": 0.5150589408874512} +{"epoch": 0, "iter": 18538, "iter_tflops": 26.81117247371205, "iter_time": 0.7694961318969726, "loss": 0.6432370543479919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.064945804546184, "step_time": 0.5281229267120362} +{"epoch": 0, "iter": 18539, "iter_tflops": 39.67132022618772, "iter_time": 0.5200505905151367, "loss": 0.6066358089447021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.98961692861966, "step_time": 0.4799087543487549} +{"epoch": 0, "iter": 18540, "iter_tflops": 39.46101645352808, "iter_time": 0.522822151184082, "loss": 0.536381721496582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.825754086534786, "step_time": 0.48174501419067384} +{"epoch": 0, "iter": 18541, "iter_tflops": 17.14389595047913, "iter_time": 1.2034075317382813, "loss": 0.4131876230239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.220684432387902, "step_time": 1.1322897109985353} +{"epoch": 0, "iter": 18542, "iter_tflops": 23.159826299176697, "iter_time": 0.8908138275146484, "loss": 0.4248235821723938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.795634646310596, "step_time": 0.648865598678589} +{"epoch": 0, "iter": 18543, "iter_tflops": 46.23024776952735, "iter_time": 0.4462682876586914, "loss": 0.5410001873970032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.11239256429468, "step_time": 0.4116964378356934} +{"epoch": 0, "iter": 18544, "iter_tflops": 44.050271222920806, "iter_time": 0.46835338211059574, "loss": 0.4094582796096802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.454417561638635, "step_time": 0.4347560157775879} +{"epoch": 0, "iter": 18545, "iter_tflops": 34.674934621728994, "iter_time": 0.5949857940673828, "loss": 0.17114299535751343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.10709874517092, "step_time": 0.5559877815246582} +{"epoch": 0, "iter": 18546, "iter_tflops": 12.806691674692537, "iter_time": 1.6109619903564454, "loss": 0.21217511594295502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.92301742736232, "step_time": 1.3825014686584471} +{"epoch": 0, "iter": 18547, "iter_tflops": 48.43200013499978, "iter_time": 0.4259806213378906, "loss": 0.14233842492103577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.649587882941056, "step_time": 0.39185669517517097} +{"epoch": 0, "iter": 18548, "iter_tflops": 45.98015028185199, "iter_time": 0.44869565200805667, "loss": 0.1144958958029747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42983072357855, "step_time": 0.4173814315795899} +{"epoch": 0, "iter": 18549, "iter_tflops": 29.613771564291113, "iter_time": 0.6966722717285156, "loss": 0.5219048857688904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.399444877405667, "step_time": 0.6570528106689453} +{"epoch": 0, "iter": 18550, "iter_tflops": 9.443131833433862, "iter_time": 2.184772369384766, "loss": 0.6084660291671753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.319904012184036, "step_time": 1.8225502166748049} +{"epoch": 0, "iter": 18551, "iter_tflops": 10.201397605133064, "iter_time": 2.0223791198730465, "loss": 0.6234642267227173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.04039529540256, "step_time": 1.7134897155761717} +{"epoch": 0, "iter": 18552, "iter_tflops": 25.409350672658945, "iter_time": 0.8119488677978516, "loss": 0.5110028386116028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.97695729370807, "step_time": 0.5734529838562011} +{"epoch": 0, "iter": 18553, "iter_tflops": 16.61367561990031, "iter_time": 0.8874857635498046, "loss": 0.28386935591697693, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 17.70959544929219, "step_time": 0.8325656356811525} +{"epoch": 0, "iter": 18554, "iter_tflops": 7.36145516216935, "iter_time": 2.0029192962646487, "loss": 0.2599484622478485, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 9.205441162945657, "step_time": 1.6017049407958983} +{"epoch": 0, "iter": 18555, "iter_tflops": 10.615332621330435, "iter_time": 1.3889720764160156, "loss": 0.3768061399459839, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 13.149067318283832, "step_time": 1.1213267250061036} +{"epoch": 0, "iter": 18556, "iter_tflops": 10.932594087772408, "iter_time": 1.3486644134521484, "loss": 0.29315024614334106, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 13.005718146100081, "step_time": 1.1336860008239746} +{"epoch": 0, "iter": 18557, "iter_tflops": 25.329737696956126, "iter_time": 0.669411148071289, "loss": 0.24468767642974854, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 27.121994779865076, "step_time": 0.6251755790710449} +{"epoch": 0, "iter": 18558, "iter_tflops": 29.542037710621116, "iter_time": 0.5739620590209961, "loss": 0.41493499279022217, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 31.518226418688446, "step_time": 0.5379747123718261} +{"epoch": 0, "iter": 18559, "iter_tflops": 30.695064648459155, "iter_time": 0.5524017944335938, "loss": 0.3625078499317169, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 32.721574827126716, "step_time": 0.518190486907959} +{"epoch": 0, "iter": 18560, "iter_tflops": 28.803779982673, "iter_time": 0.5886730422973633, "loss": 0.47078320384025574, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 30.55166013118098, "step_time": 0.5549946784973144} +{"epoch": 0, "iter": 18561, "iter_tflops": 26.67441042518405, "iter_time": 0.7734414062499999, "loss": 0.16824834048748016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.270457596489464, "step_time": 0.729775718688965} +{"epoch": 0, "iter": 18562, "iter_tflops": 12.820616620117676, "iter_time": 1.6092122650146483, "loss": 0.10744410753250122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.839246181836856, "step_time": 1.2251791610717773} +{"epoch": 0, "iter": 18563, "iter_tflops": 11.56169626742534, "iter_time": 1.7844348297119141, "loss": 0.1916203647851944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.42120135107631, "step_time": 1.5372016983032226} +{"epoch": 0, "iter": 18564, "iter_tflops": 27.811364002266327, "iter_time": 0.7418224258422851, "loss": 0.17025870084762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.510867783221855, "step_time": 0.5978143939971925} +{"epoch": 0, "iter": 18565, "iter_tflops": 12.806160804926106, "iter_time": 1.243964202880859, "loss": 0.21721075475215912, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 13.695949115602792, "step_time": 1.1631472549438477} +{"epoch": 0, "iter": 18566, "iter_tflops": 13.201608921094074, "iter_time": 1.2067018280029298, "loss": 0.33424022793769836, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 15.038976839978181, "step_time": 1.0592745628356934} +{"epoch": 0, "iter": 18567, "iter_tflops": 26.68875894534674, "iter_time": 0.5968957061767578, "loss": 0.3722447454929352, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.358694431516664, "step_time": 0.5617467918395996} +{"epoch": 0, "iter": 18568, "iter_tflops": 28.584921875489034, "iter_time": 0.5573010025024414, "loss": 0.25927838683128357, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.35985071658037, "step_time": 0.5247194976806642} +{"epoch": 0, "iter": 18569, "iter_tflops": 36.247270703980625, "iter_time": 0.5691764678955078, "loss": 0.3734612464904785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13125193233182, "step_time": 0.5272280464172363} +{"epoch": 0, "iter": 18570, "iter_tflops": 12.247453725623892, "iter_time": 1.6845210418701173, "loss": 0.3576061427593231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.040692948125542, "step_time": 1.2861722106933593} +{"epoch": 0, "iter": 18571, "iter_tflops": 14.492205235106933, "iter_time": 1.4235993194580079, "loss": 0.32456710934638977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.724305629360032, "step_time": 1.2335994071960448} +{"epoch": 0, "iter": 18572, "iter_tflops": 25.699609800951812, "iter_time": 0.8027784729003906, "loss": 0.40200382471084595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.02178156943458, "step_time": 0.49096189498901366} +{"epoch": 0, "iter": 18573, "iter_tflops": 22.652298193909445, "iter_time": 0.6725445632934571, "loss": 0.26053518056869507, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.061275985446798, "step_time": 0.6331617660522462} +{"epoch": 0, "iter": 18574, "iter_tflops": 24.527216915027452, "iter_time": 0.621133659362793, "loss": 0.3018935024738312, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.055805261315733, "step_time": 0.5630835914611816} +{"epoch": 0, "iter": 18575, "iter_tflops": 27.477463907966495, "iter_time": 0.554442726135254, "loss": 0.427634596824646, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.28545516111917, "step_time": 0.5202131881713867} +{"epoch": 0, "iter": 18576, "iter_tflops": 26.485527788386747, "iter_time": 0.5752077178955077, "loss": 0.37220609188079834, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.216498856048453, "step_time": 0.5399209899902343} +{"epoch": 0, "iter": 18577, "iter_tflops": 24.196811764644153, "iter_time": 0.8526368560791016, "loss": 0.6120280623435974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.500878625926518, "step_time": 0.8090346145629883} +{"epoch": 0, "iter": 18578, "iter_tflops": 16.030431374825554, "iter_time": 1.2869955291748048, "loss": 0.5529038906097412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.732779110442078, "step_time": 1.04552396774292} +{"epoch": 0, "iter": 18579, "iter_tflops": 38.1794250665887, "iter_time": 0.5403720321655273, "loss": 0.6051792502403259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72402967832577, "step_time": 0.4944655075073242} +{"epoch": 0, "iter": 18580, "iter_tflops": 41.02279214945376, "iter_time": 0.5029178276062012, "loss": 0.6238963603973389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62233604330482, "step_time": 0.4623490238189697} +{"epoch": 0, "iter": 18581, "iter_tflops": 15.964198657607724, "iter_time": 1.2923350524902344, "loss": 0.562664806842804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.030301928095515, "step_time": 1.211434394836426} +{"epoch": 0, "iter": 18582, "iter_tflops": 19.276015036388188, "iter_time": 1.070298683166504, "loss": 0.5647165775299072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.2781799620019, "step_time": 0.886284646987915} +{"epoch": 0, "iter": 18583, "iter_tflops": 46.54411409319792, "iter_time": 0.4432589149475098, "loss": 0.6469675302505493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.487047279135766, "step_time": 0.4086413173675537} +{"epoch": 0, "iter": 18584, "iter_tflops": 45.321772633668054, "iter_time": 0.455213737487793, "loss": 0.4991270899772644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49125965962811, "step_time": 0.4254600448608398} +{"epoch": 0, "iter": 18585, "iter_tflops": 33.37373529168468, "iter_time": 0.545058120727539, "loss": 0.07298990339040756, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 36.27274949728471, "step_time": 0.5014956321716308} +{"epoch": 0, "iter": 18586, "iter_tflops": 33.10161181277687, "iter_time": 0.5495389633178711, "loss": 0.13716717064380646, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 36.35957500034561, "step_time": 0.5002980766296387} +{"epoch": 0, "iter": 18587, "iter_tflops": 35.04393412051455, "iter_time": 0.5190805740356444, "loss": 0.11566707491874695, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 38.46834418400584, "step_time": 0.4728725872039795} +{"epoch": 0, "iter": 18588, "iter_tflops": 36.36836939794122, "iter_time": 0.5001770973205566, "loss": 0.12670505046844482, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 39.69268369776286, "step_time": 0.4582866096496582} +{"epoch": 0, "iter": 18589, "iter_tflops": 22.586779432982055, "iter_time": 0.8108424987792968, "loss": 0.025388386100530624, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 24.800610492999866, "step_time": 0.7384624938964843} +{"epoch": 0, "iter": 18590, "iter_tflops": 34.603725688460464, "iter_time": 0.5292586364746094, "loss": 0.020173445343971252, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 38.28801943408, "step_time": 0.478330322265625} +{"epoch": 0, "iter": 18591, "iter_tflops": 39.45086831611736, "iter_time": 0.46423111724853516, "loss": 0.018975479528307915, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 43.52855118238479, "step_time": 0.42074271202087404} +{"epoch": 0, "iter": 18592, "iter_tflops": 41.621120687709066, "iter_time": 0.44002468872070316, "loss": 0.04277876392006874, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 46.05217411210977, "step_time": 0.39768634223937993} +{"epoch": 0, "iter": 18593, "iter_tflops": 26.7334626096048, "iter_time": 0.7717329330444336, "loss": 0.06432262063026428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.757699894860252, "step_time": 0.7174111137390137} +{"epoch": 0, "iter": 18594, "iter_tflops": 9.02712241586443, "iter_time": 2.285456268310547, "loss": 0.13055723905563354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.858016117288802, "step_time": 1.739843605041504} +{"epoch": 0, "iter": 18595, "iter_tflops": 13.66891790185403, "iter_time": 1.5093435821533203, "loss": 0.1575821489095688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.40176898075191, "step_time": 1.1855744972229003} +{"epoch": 0, "iter": 18596, "iter_tflops": 38.40669856122087, "iter_time": 0.5371743545532227, "loss": 0.10376974195241928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.119900048545176, "step_time": 0.4898181972503662} +{"epoch": 0, "iter": 18597, "iter_tflops": 12.850455456450467, "iter_time": 1.2301144866943359, "loss": 0.3615628182888031, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.694088000385104, "step_time": 1.1543325424194335} +{"epoch": 0, "iter": 18598, "iter_tflops": 13.457547617953331, "iter_time": 1.1746219940185547, "loss": 0.344619482755661, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 17.96257533148845, "step_time": 0.880025894165039} +{"epoch": 0, "iter": 18599, "iter_tflops": 25.310994583612864, "iter_time": 0.624532211303711, "loss": 0.360675573348999, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 27.189298401755345, "step_time": 0.5813879852294922} +{"epoch": 0, "iter": 18600, "iter_tflops": 22.911459055665787, "iter_time": 0.6899399719238282, "loss": 0.5491341352462769, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.685016130822117, "step_time": 0.640369499206543} +{"epoch": 0, "iter": 18601, "iter_tflops": 16.909085337206626, "iter_time": 1.2201188354492187, "loss": 0.4750972390174866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.02215670517043, "step_time": 1.1447627410888672} +{"epoch": 0, "iter": 18602, "iter_tflops": 14.19367155267521, "iter_time": 1.453541702270508, "loss": 0.39315247535705566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.924713402480297, "step_time": 1.2189921932220458} +{"epoch": 0, "iter": 18603, "iter_tflops": 38.0999810308959, "iter_time": 0.5414987869262695, "loss": 0.36900731921195984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74318267720053, "step_time": 0.4942386322021485} +{"epoch": 0, "iter": 18604, "iter_tflops": 39.84938360071864, "iter_time": 0.5177267913818359, "loss": 0.37574446201324463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48986678924592, "step_time": 0.4743885192871094} +{"epoch": 0, "iter": 18605, "iter_tflops": 19.014311554202294, "iter_time": 1.0850297393798827, "loss": 0.1176677793264389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.132603897062165, "step_time": 1.0247603149414064} +{"epoch": 0, "iter": 18606, "iter_tflops": 18.426683415622517, "iter_time": 1.119631408691406, "loss": 0.09065908193588257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.420961564279015, "step_time": 0.92016987991333} +{"epoch": 0, "iter": 18607, "iter_tflops": 50.16151958450958, "iter_time": 0.41129323196411127, "loss": 0.11734829843044281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.612508513688816, "step_time": 0.37777231025695795} +{"epoch": 0, "iter": 18608, "iter_tflops": 53.0238271656753, "iter_time": 0.389090991973877, "loss": 0.14717216789722443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.637094603817125, "step_time": 0.3579481868743897} +{"epoch": 0, "iter": 18609, "iter_tflops": 37.8849550826465, "iter_time": 0.5445722045898437, "loss": 0.617524266242981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87606364963185, "step_time": 0.5047230987548827} +{"epoch": 0, "iter": 18610, "iter_tflops": 34.93420106348758, "iter_time": 0.590570068359375, "loss": 0.5433622598648071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75862045799797, "step_time": 0.5322968997955322} +{"epoch": 0, "iter": 18611, "iter_tflops": 36.90001676452779, "iter_time": 0.5591079711914062, "loss": 0.6026029586791992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12379881973655, "step_time": 0.5141859474182129} +{"epoch": 0, "iter": 18612, "iter_tflops": 37.624410684321504, "iter_time": 0.5483432998657227, "loss": 0.6181319952011108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58052997045321, "step_time": 0.5083988189697266} +{"epoch": 0, "iter": 18613, "iter_tflops": 35.39731307436389, "iter_time": 0.5828434906005859, "loss": 0.02888011932373047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35547688900791, "step_time": 0.5242242031097412} +{"epoch": 0, "iter": 18614, "iter_tflops": 36.12717076701038, "iter_time": 0.5710686187744142, "loss": 0.04292306303977966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.55272107579089, "step_time": 0.5087474517822266} +{"epoch": 0, "iter": 18615, "iter_tflops": 39.305166834269656, "iter_time": 0.5248952026367187, "loss": 0.05481744930148125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.028688646296374, "step_time": 0.479472978591919} +{"epoch": 0, "iter": 18616, "iter_tflops": 50.231391957740556, "iter_time": 0.41072111892700197, "loss": 0.01647130772471428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.40058188015582, "step_time": 0.3723984985351562} +{"epoch": 0, "iter": 18617, "iter_tflops": 24.829658599388956, "iter_time": 0.8309052429199218, "loss": 0.14854878187179565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.598256456390597, "step_time": 0.7756558609008789} +{"epoch": 0, "iter": 18618, "iter_tflops": 36.84842030746616, "iter_time": 0.5598908538818359, "loss": 0.38858723640441895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45765740169009, "step_time": 0.5099428596496582} +{"epoch": 0, "iter": 18619, "iter_tflops": 40.377247820758406, "iter_time": 0.5109583892822266, "loss": 0.13709259033203125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15619521212289, "step_time": 0.4672298736572265} +{"epoch": 0, "iter": 18620, "iter_tflops": 37.62827390342067, "iter_time": 0.5482870025634766, "loss": 0.30984410643577576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12302158490679, "step_time": 0.5016920623779297} +{"epoch": 0, "iter": 18621, "iter_tflops": 20.665824399524677, "iter_time": 0.9983194046020508, "loss": 0.06022542342543602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.201922737570218, "step_time": 0.9292480545043946} +{"epoch": 0, "iter": 18622, "iter_tflops": 22.38210157190932, "iter_time": 0.9217674865722656, "loss": 0.042177844792604446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.988419948764847, "step_time": 0.7371296253204346} +{"epoch": 0, "iter": 18623, "iter_tflops": 50.345407849676754, "iter_time": 0.40979096984863284, "loss": 0.042698394507169724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.80078678614933, "step_time": 0.37647440338134763} +{"epoch": 0, "iter": 18624, "iter_tflops": 50.85738494733246, "iter_time": 0.4056656379699707, "loss": 0.07684438675642014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.03448501997626, "step_time": 0.37487574386596684} +{"epoch": 0, "iter": 18625, "iter_tflops": 17.197543885611168, "iter_time": 1.1996534881591796, "loss": 0.6709545850753784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.903062700338147, "step_time": 1.152377883911133} +{"epoch": 0, "iter": 18626, "iter_tflops": 16.076557403946218, "iter_time": 1.283302947998047, "loss": 0.5070281028747559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.762044929544288, "step_time": 0.9480310134887695} +{"epoch": 0, "iter": 18627, "iter_tflops": 48.18705960886816, "iter_time": 0.4281459312438965, "loss": 0.5837141871452332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19688458306024, "step_time": 0.3952552661895752} +{"epoch": 0, "iter": 18628, "iter_tflops": 44.96000508632742, "iter_time": 0.45887658309936524, "loss": 0.7287104725837708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38052738401189, "step_time": 0.4264338283538819} +{"epoch": 0, "iter": 18629, "iter_tflops": 23.94655542162207, "iter_time": 0.8615474395751954, "loss": 0.0032820121850818396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.275612685571563, "step_time": 0.8162450408935547} +{"epoch": 0, "iter": 18630, "iter_tflops": 24.23997189907354, "iter_time": 0.8511187057495118, "loss": 0.010270439088344574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.57966321915707, "step_time": 0.7480545845031739} +{"epoch": 0, "iter": 18631, "iter_tflops": 56.46145208999919, "iter_time": 0.3654013977050781, "loss": 0.011078094132244587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.2053152521466, "step_time": 0.3316612644195556} +{"epoch": 0, "iter": 18632, "iter_tflops": 54.03466631093792, "iter_time": 0.3818121757507324, "loss": 0.004968541208654642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.1809986602844, "step_time": 0.3486100940704346} +{"epoch": 0, "iter": 18633, "iter_tflops": 33.362255319972384, "iter_time": 0.6183962478637696, "loss": 0.003209208371117711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83601854985041, "step_time": 0.5757083053588867} +{"epoch": 0, "iter": 18634, "iter_tflops": 19.228417482196416, "iter_time": 1.0729480743408202, "loss": 0.0013121350202709436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.9569568586349, "step_time": 0.9396153411865235} +{"epoch": 0, "iter": 18635, "iter_tflops": 48.20678113129373, "iter_time": 0.42797077560424807, "loss": 0.002990440232679248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.45139511812514, "step_time": 0.38597857856750484} +{"epoch": 0, "iter": 18636, "iter_tflops": 46.134506876296946, "iter_time": 0.44719440841674807, "loss": 0.00495280884206295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.91020827702075, "step_time": 0.40524472808837886} +{"epoch": 0, "iter": 18637, "iter_tflops": 18.730766166685196, "iter_time": 1.101454864501953, "loss": 0.059403419494628906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.962414902089883, "step_time": 1.0334968795776367} +{"epoch": 0, "iter": 18638, "iter_tflops": 16.758525483558557, "iter_time": 1.2310804748535156, "loss": 0.0769507959485054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.891020511787, "step_time": 1.037206386566162} +{"epoch": 0, "iter": 18639, "iter_tflops": 39.37778205486773, "iter_time": 0.5239272613525391, "loss": 0.07239339500665665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29251469897497, "step_time": 0.4765510540008545} +{"epoch": 0, "iter": 18640, "iter_tflops": 44.37900857174552, "iter_time": 0.46488405609130856, "loss": 0.0552804060280323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.752807501787366, "step_time": 0.423177547454834} +{"epoch": 0, "iter": 18641, "iter_tflops": 1.4236728296208108, "iter_time": 1.349281051635742, "loss": 0.24631625413894653, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 1.5245834678191943, "step_time": 1.259973503112793} +{"epoch": 0, "iter": 18642, "iter_tflops": 1.3841958046957261, "iter_time": 1.3877623138427733, "loss": 0.0017298406455665827, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 1.799486113349207, "step_time": 1.0674907455444336} +{"epoch": 0, "iter": 18643, "iter_tflops": 3.9585364067714326, "iter_time": 0.4852638893127441, "loss": 0.19496570527553558, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 4.266781411953952, "step_time": 0.4502069797515869} +{"epoch": 0, "iter": 18644, "iter_tflops": 4.491521258587824, "iter_time": 0.42768021392822264, "loss": 0.7488933205604553, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 4.8616035584685635, "step_time": 0.3951236972808838} +{"epoch": 0, "iter": 18645, "iter_tflops": 25.32503072480747, "iter_time": 0.8146522598266602, "loss": 0.5119251608848572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.63583836534574, "step_time": 0.7745614471435546} +{"epoch": 0, "iter": 18646, "iter_tflops": 17.290134595169665, "iter_time": 1.1932292022705078, "loss": 0.6129194498062134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62639284315167, "step_time": 1.0002278957366944} +{"epoch": 0, "iter": 18647, "iter_tflops": 45.763228461839155, "iter_time": 0.450822509765625, "loss": 0.423260897397995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.413873761139556, "step_time": 0.4175162143707275} +{"epoch": 0, "iter": 18648, "iter_tflops": 47.745253070155584, "iter_time": 0.4321077423095703, "loss": 0.5652351379394531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68788704832556, "step_time": 0.3991475505828857} +{"epoch": 0, "iter": 18649, "iter_tflops": 41.6345883889794, "iter_time": 0.49552774047851567, "loss": 0.0961066484451294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44384583086144, "step_time": 0.4539909229278565} +{"epoch": 0, "iter": 18650, "iter_tflops": 40.22960455959735, "iter_time": 0.5128336143493651, "loss": 0.1230856403708458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92028282272684, "step_time": 0.4592823600769043} +{"epoch": 0, "iter": 18651, "iter_tflops": 38.48288271907729, "iter_time": 0.5361109161376953, "loss": 0.11055188626050949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66468031785207, "step_time": 0.4835637664794923} +{"epoch": 0, "iter": 18652, "iter_tflops": 39.90334761267361, "iter_time": 0.5170266342163086, "loss": 0.0893421471118927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.060618159151865, "step_time": 0.4682433967590332} +{"epoch": 0, "iter": 18653, "iter_tflops": 30.326919004441773, "iter_time": 0.680289794921875, "loss": 0.5131543874740601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.640630213896856, "step_time": 0.6132790431976318} +{"epoch": 0, "iter": 18654, "iter_tflops": 43.146809726564605, "iter_time": 0.4781603469848633, "loss": 0.5109458565711975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97472105432874, "step_time": 0.4391956577301026} +{"epoch": 0, "iter": 18655, "iter_tflops": 44.811333376632874, "iter_time": 0.4603990097045898, "loss": 0.48422935605049133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42570972528749, "step_time": 0.42603595542907713} +{"epoch": 0, "iter": 18656, "iter_tflops": 36.168819138784485, "iter_time": 0.5704110336303712, "loss": 0.6372854113578796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.61863367089552, "step_time": 0.5207421760559082} +{"epoch": 0, "iter": 18657, "iter_tflops": 21.742869923545616, "iter_time": 0.9488670806884766, "loss": 0.19864143431186676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.679442738434062, "step_time": 0.8712660064697265} +{"epoch": 0, "iter": 18658, "iter_tflops": 16.67258180927562, "iter_time": 1.2374264373779298, "loss": 0.2389262467622757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.292484352227884, "step_time": 1.0166864318847657} +{"epoch": 0, "iter": 18659, "iter_tflops": 45.35954908681348, "iter_time": 0.4548346252441406, "loss": 0.19960300624370575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45269423643324, "step_time": 0.4171884632110596} +{"epoch": 0, "iter": 18660, "iter_tflops": 47.7084721977259, "iter_time": 0.43244087600708003, "loss": 0.1565920114517212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.73229877922374, "step_time": 0.39880488586425783} +{"epoch": 0, "iter": 18661, "iter_tflops": 26.771531206294952, "iter_time": 0.7706355438232422, "loss": 0.050475895404815674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.22706666695333, "step_time": 0.7308975372314452} +{"epoch": 0, "iter": 18662, "iter_tflops": 19.29242621484246, "iter_time": 1.0693882293701171, "loss": 0.039268672466278076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.684877782825666, "step_time": 0.9514046478271485} +{"epoch": 0, "iter": 18663, "iter_tflops": 51.877434486071465, "iter_time": 0.39768916320800785, "loss": 0.022407986223697662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.7918311963058, "step_time": 0.36327572250366214} +{"epoch": 0, "iter": 18664, "iter_tflops": 53.82839804217452, "iter_time": 0.3832752647399902, "loss": 0.038959018886089325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.858155208784076, "step_time": 0.35052225875854487} +{"epoch": 0, "iter": 18665, "iter_tflops": 35.74044245183544, "iter_time": 0.5772478485107423, "loss": 0.04561687260866165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.269789189164186, "step_time": 0.5390960845947266} +{"epoch": 0, "iter": 18666, "iter_tflops": 15.924489938083138, "iter_time": 1.2955575714111327, "loss": 0.027058500796556473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.479419993256997, "step_time": 1.0074061431884767} +{"epoch": 0, "iter": 18667, "iter_tflops": 45.36764114485798, "iter_time": 0.45475349807739257, "loss": 0.029430439695715904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22536079504301, "step_time": 0.4107704391479492} +{"epoch": 0, "iter": 18668, "iter_tflops": 46.683692299385484, "iter_time": 0.44193362808227543, "loss": 0.01608693227171898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3685627744648, "step_time": 0.4016287860870361} +{"epoch": 0, "iter": 18669, "iter_tflops": 34.20505901540008, "iter_time": 0.6031591262817383, "loss": 0.021964073181152344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.04192589075822, "step_time": 0.5423251590728759} +{"epoch": 0, "iter": 18670, "iter_tflops": 40.48138264793553, "iter_time": 0.5096439933776855, "loss": 0.0320228636264801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1011842950463, "step_time": 0.45744017219543454} +{"epoch": 0, "iter": 18671, "iter_tflops": 43.6338269297016, "iter_time": 0.4728233795166016, "loss": 0.026676861569285393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36549751657737, "step_time": 0.4265663452148437} +{"epoch": 0, "iter": 18672, "iter_tflops": 40.83136305726814, "iter_time": 0.5052756500244141, "loss": 0.01112281158566475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15019409811138, "step_time": 0.4569436283111572} +{"epoch": 0, "iter": 18673, "iter_tflops": 18.72031872464067, "iter_time": 1.102069564819336, "loss": 0.5368233919143677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.833190714081724, "step_time": 1.040230682373047} +{"epoch": 0, "iter": 18674, "iter_tflops": 9.932804730599706, "iter_time": 2.0770662536621094, "loss": 0.6273305416107178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.472427019593479, "step_time": 1.6541362380981444} +{"epoch": 0, "iter": 18675, "iter_tflops": 10.608840665198029, "iter_time": 1.9447076416015625, "loss": 0.5241749286651611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.760590130623392, "step_time": 1.4992884254455565} +{"epoch": 0, "iter": 18676, "iter_tflops": 44.479914065401175, "iter_time": 0.46382943725585934, "loss": 0.5278317928314209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.11203569037419, "step_time": 0.4288135643005371} +{"epoch": 0, "iter": 18677, "iter_tflops": 30.528841318955525, "iter_time": 0.5634865798950195, "loss": 0.3381336033344269, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.827612989585106, "step_time": 0.5240281219482422} +{"epoch": 0, "iter": 18678, "iter_tflops": 30.979758332759438, "iter_time": 0.555284912109375, "loss": 0.2633451521396637, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 33.0677432493892, "step_time": 0.5202227516174317} +{"epoch": 0, "iter": 18679, "iter_tflops": 30.584770915861746, "iter_time": 0.5624561462402344, "loss": 0.5244163870811462, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.653678113651566, "step_time": 0.5268194389343261} +{"epoch": 0, "iter": 18680, "iter_tflops": 30.360927688296503, "iter_time": 0.5666029891967774, "loss": 0.42026978731155396, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.26156277579767, "step_time": 0.5332225379943848} +{"epoch": 0, "iter": 18681, "iter_tflops": 36.64256617875782, "iter_time": 0.5630362625122071, "loss": 0.01722031645476818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72081939919713, "step_time": 0.5194025154113769} +{"epoch": 0, "iter": 18682, "iter_tflops": 53.7867854550568, "iter_time": 0.3835717887878418, "loss": 0.001545865903608501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.71725524753983, "step_time": 0.345479600906372} +{"epoch": 0, "iter": 18683, "iter_tflops": 54.870806335319045, "iter_time": 0.37599399185180665, "loss": 0.008160039782524109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.126560507007824, "step_time": 0.3431277847290039} +{"epoch": 0, "iter": 18684, "iter_tflops": 56.95571953855759, "iter_time": 0.36223040771484377, "loss": 0.005858419928699732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.59888907871643, "step_time": 0.32957603263854984} +{"epoch": 0, "iter": 18685, "iter_tflops": 26.83848264244684, "iter_time": 0.7687131118774414, "loss": 0.14128413796424866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.31747192678633, "step_time": 0.7285641021728515} +{"epoch": 0, "iter": 18686, "iter_tflops": 18.640781108235807, "iter_time": 1.106771942138672, "loss": 0.10159912705421448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.84551176085427, "step_time": 0.9030698776245117} +{"epoch": 0, "iter": 18687, "iter_tflops": 47.90007370778681, "iter_time": 0.43071110153198244, "loss": 0.08480781316757202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.91795966485037, "step_time": 0.39737874221801756} +{"epoch": 0, "iter": 18688, "iter_tflops": 52.02306207380124, "iter_time": 0.3965759162902832, "loss": 0.05675279721617699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.673667907291225, "step_time": 0.3640331439971924} +{"epoch": 0, "iter": 18689, "iter_tflops": 31.298493771543892, "iter_time": 0.6591720886230468, "loss": 0.5019093155860901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.24750662712276, "step_time": 0.6205305480957031} +{"epoch": 0, "iter": 18690, "iter_tflops": 21.54354666881024, "iter_time": 0.9576461029052733, "loss": 0.7048938274383545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.089957174075998, "step_time": 0.8564188537597657} +{"epoch": 0, "iter": 18691, "iter_tflops": 42.64403503956911, "iter_time": 0.48379787445068356, "loss": 0.7720400094985962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.310665785016226, "step_time": 0.4454933471679688} +{"epoch": 0, "iter": 18692, "iter_tflops": 40.73505694768868, "iter_time": 0.5064702262878418, "loss": 0.6556617021560669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.69956888253566, "step_time": 0.472112060546875} +{"epoch": 0, "iter": 18693, "iter_tflops": 27.203408811236386, "iter_time": 0.7584010391235352, "loss": 0.6274375319480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.68570490692905, "step_time": 0.7192116622924806} +{"epoch": 0, "iter": 18694, "iter_tflops": 14.290254632857478, "iter_time": 1.4437176971435546, "loss": 0.5476598739624023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.031480050047954, "step_time": 1.0299335575103759} +{"epoch": 0, "iter": 18695, "iter_tflops": 47.27112359762826, "iter_time": 0.43644178390502936, "loss": 0.5694575905799866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.392948713228975, "step_time": 0.40143821334838864} +{"epoch": 0, "iter": 18696, "iter_tflops": 41.88090070590337, "iter_time": 0.4926134147644043, "loss": 0.7761813402175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01707179020991, "step_time": 0.45829487991333007} +{"epoch": 0, "iter": 18697, "iter_tflops": 29.06177052747108, "iter_time": 0.7099049072265626, "loss": 0.22767968475818634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.817119992909685, "step_time": 0.6694685783386232} +{"epoch": 0, "iter": 18698, "iter_tflops": 18.153114201236644, "iter_time": 1.136504364013672, "loss": 0.35985010862350464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.384562497457747, "step_time": 1.0120940055847167} +{"epoch": 0, "iter": 18699, "iter_tflops": 46.95953856211342, "iter_time": 0.4393376541137695, "loss": 0.2373930811882019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82532077264624, "step_time": 0.4059215602874756} +{"epoch": 0, "iter": 18700, "iter_tflops": 43.46650650630175, "iter_time": 0.47464347076416014, "loss": 0.23808813095092773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.68889436199327, "step_time": 0.44188438796997065} +{"epoch": 0, "iter": 18701, "iter_tflops": 26.160842362985647, "iter_time": 0.788624969482422, "loss": 0.7225505709648132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.578924445306257, "step_time": 0.7480746231079102} +{"epoch": 0, "iter": 18702, "iter_tflops": 12.695102098083323, "iter_time": 1.625122299194336, "loss": 0.8156962394714355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.51845163773482, "step_time": 1.3294556694030761} +{"epoch": 0, "iter": 18703, "iter_tflops": 36.89352797331384, "iter_time": 0.5592063064575196, "loss": 0.5698543787002563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22036026079046, "step_time": 0.5129514846801758} +{"epoch": 0, "iter": 18704, "iter_tflops": 39.13129638373364, "iter_time": 0.5272274475097656, "loss": 0.42625850439071655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.222367703632784, "step_time": 0.4886294784545899} +{"epoch": 0, "iter": 18705, "iter_tflops": 13.82920221529589, "iter_time": 1.491849868774414, "loss": 0.6876755356788635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.728657746008032, "step_time": 1.400744987487793} +{"epoch": 0, "iter": 18706, "iter_tflops": 19.149267845936436, "iter_time": 1.0773828887939454, "loss": 0.5846447348594666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.335846699479553, "step_time": 0.8143044815063477} +{"epoch": 0, "iter": 18707, "iter_tflops": 46.52851751681853, "iter_time": 0.4434074974060058, "loss": 0.6191270351409912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.138116956334805, "step_time": 0.41148520851135256} +{"epoch": 0, "iter": 18708, "iter_tflops": 43.38494381733359, "iter_time": 0.4755357894897461, "loss": 0.6017538905143738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.595865214801776, "step_time": 0.4427666149139404} +{"epoch": 0, "iter": 18709, "iter_tflops": 35.67320555126928, "iter_time": 0.5783358459472656, "loss": 0.18780989944934845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.31415169251824, "step_time": 0.5384718856811523} +{"epoch": 0, "iter": 18710, "iter_tflops": 21.619828374712892, "iter_time": 0.9542672195434571, "loss": 0.1854105293750763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.288869834254058, "step_time": 0.7847843456268311} +{"epoch": 0, "iter": 18711, "iter_tflops": 48.30686234921664, "iter_time": 0.42708411407470703, "loss": 0.2787475883960724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.47156373676774, "step_time": 0.3931861763000488} +{"epoch": 0, "iter": 18712, "iter_tflops": 47.9254744573779, "iter_time": 0.4304828224182129, "loss": 0.2783963084220886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94048389654919, "step_time": 0.3972064170837402} +{"epoch": 0, "iter": 18713, "iter_tflops": 29.75258599122279, "iter_time": 0.693421859741211, "loss": 0.5628964304924011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.529752759291604, "step_time": 0.6543373069763183} +{"epoch": 0, "iter": 18714, "iter_tflops": 15.401157756018943, "iter_time": 1.3395806884765626, "loss": 0.5332390666007996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.11124441290832, "step_time": 1.139131748199463} +{"epoch": 0, "iter": 18715, "iter_tflops": 36.46404828939639, "iter_time": 0.5657927322387695, "loss": 0.5454577207565308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.684409011383565, "step_time": 0.5198790664672852} +{"epoch": 0, "iter": 18716, "iter_tflops": 32.91743573764084, "iter_time": 0.6267527542114257, "loss": 0.44476377964019775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.598105135619114, "step_time": 0.5795559463500977} +{"epoch": 0, "iter": 18717, "iter_tflops": 15.446621172572147, "iter_time": 1.335637954711914, "loss": 0.38156184554100037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.564561216995475, "step_time": 1.2454959259033203} +{"epoch": 0, "iter": 18718, "iter_tflops": 20.35620920734694, "iter_time": 1.0135037078857423, "loss": 0.4914799630641937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.12570422091937, "step_time": 0.8921282272338866} +{"epoch": 0, "iter": 18719, "iter_tflops": 41.94277504090666, "iter_time": 0.4918867073059082, "loss": 0.32074230909347534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08120352183895, "step_time": 0.4576429176330567} +{"epoch": 0, "iter": 18720, "iter_tflops": 48.95262578199961, "iter_time": 0.4214501914978027, "loss": 0.47147679328918457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9364519098936, "step_time": 0.3897332134246826} +{"epoch": 0, "iter": 18721, "iter_tflops": 29.949273499306827, "iter_time": 0.6888679122924806, "loss": 0.04569859057664871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.980702015535773, "step_time": 0.6451107139587403} +{"epoch": 0, "iter": 18722, "iter_tflops": 18.1670659865273, "iter_time": 1.1356315612792969, "loss": 0.02080434188246727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.325879416242447, "step_time": 0.9240887279510498} +{"epoch": 0, "iter": 18723, "iter_tflops": 45.7980834010641, "iter_time": 0.4504794082641601, "loss": 0.03225087746977806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5313862978993, "step_time": 0.40828275299072264} +{"epoch": 0, "iter": 18724, "iter_tflops": 40.65379017235753, "iter_time": 0.5074826583862304, "loss": 0.039428241550922394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77834922581545, "step_time": 0.46073814392089846} +{"epoch": 0, "iter": 18725, "iter_tflops": 22.341176310575513, "iter_time": 0.9234560089111328, "loss": 0.10368508100509644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.276013318414652, "step_time": 0.8498550910949708} +{"epoch": 0, "iter": 18726, "iter_tflops": 20.27591933760056, "iter_time": 1.0175170440673829, "loss": 0.13018372654914856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.01999885223298, "step_time": 0.763549015045166} +{"epoch": 0, "iter": 18727, "iter_tflops": 51.04050676024013, "iter_time": 0.40421020126342777, "loss": 0.06350260972976685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.79791713917995, "step_time": 0.36974666023254393} +{"epoch": 0, "iter": 18728, "iter_tflops": 49.583595374987404, "iter_time": 0.4160870819091797, "loss": 0.14465779066085815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.49358989076366, "step_time": 0.38567412567138676} +{"epoch": 0, "iter": 18729, "iter_tflops": 31.86698408470242, "iter_time": 0.6474128036499023, "loss": 0.080569326877594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.77265184781104, "step_time": 0.6108816566467286} +{"epoch": 0, "iter": 18730, "iter_tflops": 7.975408305916809, "iter_time": 2.5868385314941404, "loss": 0.050892267376184464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.876125875048531, "step_time": 2.088986488342285} +{"epoch": 0, "iter": 18731, "iter_tflops": 11.220309908356342, "iter_time": 1.8387276000976562, "loss": 0.031161194667220116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.048628695773436, "step_time": 1.4685485649108887} +{"epoch": 0, "iter": 18732, "iter_tflops": 30.493935880717554, "iter_time": 0.6765638122558595, "loss": 0.05818095803260803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.05432191922581, "step_time": 0.5567796802520751} +{"epoch": 0, "iter": 18733, "iter_tflops": 11.93162103789885, "iter_time": 1.2973991088867187, "loss": 0.3764301538467407, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.659489338907827, "step_time": 1.2228040237426756} +{"epoch": 0, "iter": 18734, "iter_tflops": 21.240366785545305, "iter_time": 0.7288044815063477, "loss": 0.3429417908191681, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.23145697727689, "step_time": 0.5901339950561524} +{"epoch": 0, "iter": 18735, "iter_tflops": 27.469036344734096, "iter_time": 0.5635463256835938, "loss": 0.21832674741744995, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.355244536116906, "step_time": 0.5273359069824219} +{"epoch": 0, "iter": 18736, "iter_tflops": 27.37424341401955, "iter_time": 0.5654978027343751, "loss": 0.3194998800754547, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.110479754605024, "step_time": 0.531769817352295} +{"epoch": 0, "iter": 18737, "iter_tflops": 35.72898062244101, "iter_time": 0.5774330291748047, "loss": 0.004533558152616024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.69622424733833, "step_time": 0.5331552085876464} +{"epoch": 0, "iter": 18738, "iter_tflops": 16.915264480797255, "iter_time": 1.2196731262207032, "loss": 0.012786268256604671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.852074372892684, "step_time": 0.9441251735687257} +{"epoch": 0, "iter": 18739, "iter_tflops": 56.33000883357679, "iter_time": 0.3662540435791015, "loss": 0.0011941769625991583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.81721802613673, "step_time": 0.3337434806823731} +{"epoch": 0, "iter": 18740, "iter_tflops": 58.07218839697707, "iter_time": 0.35526633453369144, "loss": 0.0034342645667493343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.69120398391471, "step_time": 0.3239237480163574} +{"epoch": 0, "iter": 18741, "iter_tflops": 33.1996473844706, "iter_time": 0.6214250793457031, "loss": 0.17403598129749298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.55192396354601, "step_time": 0.5803087768554687} +{"epoch": 0, "iter": 18742, "iter_tflops": 19.615118601117878, "iter_time": 1.0517955017089844, "loss": 0.2556597888469696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.988457514684256, "step_time": 0.8600425224304199} +{"epoch": 0, "iter": 18743, "iter_tflops": 41.51947140587298, "iter_time": 0.4969016418457032, "loss": 0.15827345848083496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.377536501985645, "step_time": 0.45465433120727533} +{"epoch": 0, "iter": 18744, "iter_tflops": 38.20120473880733, "iter_time": 0.5400639495849608, "loss": 0.142266646027565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60886673171408, "step_time": 0.4958340644836426} +{"epoch": 0, "iter": 18745, "iter_tflops": 16.7688700857155, "iter_time": 1.2303210296630858, "loss": 0.30282995104789734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.827793492912843, "step_time": 1.1572432403564452} +{"epoch": 0, "iter": 18746, "iter_tflops": 19.208871981728063, "iter_time": 1.074039825439453, "loss": 0.3751828968524933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.14463827298541, "step_time": 0.8913983993530273} +{"epoch": 0, "iter": 18747, "iter_tflops": 46.77751321290496, "iter_time": 0.441047248840332, "loss": 0.4295378029346466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.54801919399026, "step_time": 0.4081484069824219} +{"epoch": 0, "iter": 18748, "iter_tflops": 51.639500187647116, "iter_time": 0.399521556854248, "loss": 0.5128039121627808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.026646826198416, "step_time": 0.3682371635437011} +{"epoch": 0, "iter": 18749, "iter_tflops": 51.665022365144296, "iter_time": 0.39932419586181644, "loss": 0.0047720191068947315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.26358315874135, "step_time": 0.3602829647064209} +{"epoch": 0, "iter": 18750, "iter_tflops": 47.09398921103442, "iter_time": 0.4380833702087402, "loss": 0.0302500668913126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.761476908722535, "step_time": 0.39858007812499996} +{"epoch": 0, "iter": 18751, "iter_tflops": 54.56454491469661, "iter_time": 0.3781043815612793, "loss": 0.001499632140621543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.09178867755019, "step_time": 0.3433263339996338} +{"epoch": 0, "iter": 18752, "iter_tflops": 53.99276666294847, "iter_time": 0.38210847091674804, "loss": 0.01008270587772131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.273349160367324, "step_time": 0.3480669441223145} +{"epoch": 0, "iter": 18753, "iter_tflops": 29.361992639819036, "iter_time": 0.7026462326049804, "loss": 0.38459691405296326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.17203415398652, "step_time": 0.6618462371826173} +{"epoch": 0, "iter": 18754, "iter_tflops": 15.105513802119125, "iter_time": 1.365798858642578, "loss": 0.40662264823913574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.55824309765174, "step_time": 1.175008991241455} +{"epoch": 0, "iter": 18755, "iter_tflops": 48.49856193902508, "iter_time": 0.42539598464965817, "loss": 0.25915858149528503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.660815928471166, "step_time": 0.3917731456756592} +{"epoch": 0, "iter": 18756, "iter_tflops": 49.44643563477403, "iter_time": 0.4172412681579589, "loss": 0.31675300002098083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21388897544299, "step_time": 0.3877012920379639} +{"epoch": 0, "iter": 18757, "iter_tflops": 30.072159255851986, "iter_time": 0.686052947998047, "loss": 0.13419818878173828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.750630907678897, "step_time": 0.6497853088378907} +{"epoch": 0, "iter": 18758, "iter_tflops": 13.367814975469845, "iter_time": 1.5433407440185547, "loss": 0.11610742658376694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.68467427162387, "step_time": 1.2365295944213868} +{"epoch": 0, "iter": 18759, "iter_tflops": 42.310127387888805, "iter_time": 0.4876159629821777, "loss": 0.12514105439186096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.534145280315855, "step_time": 0.4530906066894531} +{"epoch": 0, "iter": 18760, "iter_tflops": 46.19534376828319, "iter_time": 0.4466054763793946, "loss": 0.1174902394413948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.292641520313786, "step_time": 0.4102209167480469} +{"epoch": 0, "iter": 18761, "iter_tflops": 33.78530591307228, "iter_time": 0.6106528549194337, "loss": 0.3954834043979645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.12499388822207, "step_time": 0.5711030311584473} +{"epoch": 0, "iter": 18762, "iter_tflops": 10.78677093171567, "iter_time": 1.9126292419433595, "loss": 0.3182709217071533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.445173436943119, "step_time": 1.6577586174011232} +{"epoch": 0, "iter": 18763, "iter_tflops": 36.56931139269505, "iter_time": 0.5641641235351563, "loss": 0.37634992599487305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.798411367019014, "step_time": 0.5183898754119873} +{"epoch": 0, "iter": 18764, "iter_tflops": 40.90841561627053, "iter_time": 0.5043239440917968, "loss": 0.39595091342926025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.430807267360805, "step_time": 0.4643420810699463} +{"epoch": 0, "iter": 18765, "iter_tflops": 19.55854901211035, "iter_time": 1.054837631225586, "loss": 0.009509860537946224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.051957593083433, "step_time": 0.9800083160400391} +{"epoch": 0, "iter": 18766, "iter_tflops": 39.80732279213074, "iter_time": 0.5182738265991211, "loss": 0.0025534951128065586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.97252762191139, "step_time": 0.46918143272399904} +{"epoch": 0, "iter": 18767, "iter_tflops": 45.82279400700132, "iter_time": 0.4502364807128906, "loss": 0.003691589692607522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57512670577765, "step_time": 0.4079296455383301} +{"epoch": 0, "iter": 18768, "iter_tflops": 44.98850013285461, "iter_time": 0.45858593750000004, "loss": 0.0007551913731731474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63970350978727, "step_time": 0.4156167755126953} +{"epoch": 0, "iter": 18769, "iter_tflops": 28.712638926791286, "iter_time": 0.7185370025634766, "loss": 0.1682911217212677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.150660191798192, "step_time": 0.6623003616333007} +{"epoch": 0, "iter": 18770, "iter_tflops": 8.53837391336811, "iter_time": 2.416278991699219, "loss": 0.1902126520872116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.222152667719987, "step_time": 2.018272880554199} +{"epoch": 0, "iter": 18771, "iter_tflops": 13.043605777727423, "iter_time": 1.5817017059326173, "loss": 0.1508684903383255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.182594062162764, "step_time": 1.2006972541809082} +{"epoch": 0, "iter": 18772, "iter_tflops": 32.11777868141255, "iter_time": 0.6423574218750001, "loss": 0.2097570151090622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.254639050477, "step_time": 0.500091480255127} +{"epoch": 0, "iter": 18773, "iter_tflops": 22.46257441952276, "iter_time": 0.7676638412475586, "loss": 0.40198689699172974, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 23.6898341733863, "step_time": 0.7278947601318361} +{"epoch": 0, "iter": 18774, "iter_tflops": 10.720987753784613, "iter_time": 1.608406478881836, "loss": 0.30338597297668457, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 14.279160869569155, "step_time": 1.207613410949707} +{"epoch": 0, "iter": 18775, "iter_tflops": 26.495002845868182, "iter_time": 0.6508286209106445, "loss": 0.3263980448246002, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.552353831744274, "step_time": 0.6039329109191894} +{"epoch": 0, "iter": 18776, "iter_tflops": 27.579963434636152, "iter_time": 0.6252258529663086, "loss": 0.34382230043411255, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 29.605120063144742, "step_time": 0.5824568901062013} +{"epoch": 0, "iter": 18777, "iter_tflops": 20.05713521613978, "iter_time": 1.0286161651611327, "loss": 0.508901834487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.670431003758697, "step_time": 0.9520389099121094} +{"epoch": 0, "iter": 18778, "iter_tflops": 19.311068696106577, "iter_time": 1.0683558654785155, "loss": 0.5024040341377258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.743600638245635, "step_time": 0.8689117469787597} +{"epoch": 0, "iter": 18779, "iter_tflops": 47.50828341609235, "iter_time": 0.4342630805969238, "loss": 0.5964553952217102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.248337395568306, "step_time": 0.40257098197937013} +{"epoch": 0, "iter": 18780, "iter_tflops": 46.88518646927836, "iter_time": 0.44003437042236326, "loss": 0.4516102969646454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.29391503911923, "step_time": 0.4102105293273926} +{"epoch": 0, "iter": 18781, "iter_tflops": 25.446370340236065, "iter_time": 0.7440524368286132, "loss": 0.03962899371981621, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 27.066690968138456, "step_time": 0.6995104751586914} +{"epoch": 0, "iter": 18782, "iter_tflops": 12.512613327971508, "iter_time": 1.5131478424072264, "loss": 0.025695841759443283, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 15.280016973174678, "step_time": 1.23909769821167} +{"epoch": 0, "iter": 18783, "iter_tflops": 44.9736944951194, "iter_time": 0.42098907089233395, "loss": 0.035282645374536514, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 49.504450027672014, "step_time": 0.3824592304229736} +{"epoch": 0, "iter": 18784, "iter_tflops": 50.27232941285114, "iter_time": 0.37661739730834964, "loss": 0.024990804493427277, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 55.02910363240615, "step_time": 0.34406218910217284} +{"epoch": 0, "iter": 18785, "iter_tflops": 23.45485111628951, "iter_time": 0.8796088027954102, "loss": 0.22539030015468597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.636903182348036, "step_time": 0.8374061203002929} +{"epoch": 0, "iter": 18786, "iter_tflops": 14.276089818416132, "iter_time": 1.445150161743164, "loss": 0.2494668811559677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.510240088823643, "step_time": 1.114577304840088} +{"epoch": 0, "iter": 18787, "iter_tflops": 38.0093838953514, "iter_time": 0.5427894744873047, "loss": 0.26012516021728516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.372017682280585, "step_time": 0.4986726455688477} +{"epoch": 0, "iter": 18788, "iter_tflops": 43.58480783623034, "iter_time": 0.4733551559448242, "loss": 0.2569839358329773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.501543080452926, "step_time": 0.43432470130920414} +{"epoch": 0, "iter": 18789, "iter_tflops": 16.934325965778235, "iter_time": 1.2183002471923827, "loss": 0.556837260723114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.31143500604906, "step_time": 1.1266781387329101} +{"epoch": 0, "iter": 18790, "iter_tflops": 20.594894846205307, "iter_time": 1.0017576522827147, "loss": 0.5141649842262268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.75735780463141, "step_time": 0.8684085865020752} +{"epoch": 0, "iter": 18791, "iter_tflops": 41.390201958428335, "iter_time": 0.49845355987548823, "loss": 0.4982578754425049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71630438690019, "step_time": 0.4613774280548096} +{"epoch": 0, "iter": 18792, "iter_tflops": 46.413054095927414, "iter_time": 0.4445105781555176, "loss": 0.5568908452987671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22693643588797, "step_time": 0.4107575531005859} +{"epoch": 0, "iter": 18793, "iter_tflops": 40.98049271864305, "iter_time": 0.5034369316101075, "loss": 0.2863534092903137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.663591504958106, "step_time": 0.4619219551086426} +{"epoch": 0, "iter": 18794, "iter_tflops": 44.86824862807859, "iter_time": 0.4598149948120117, "loss": 0.28232741355895996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30610722925999, "step_time": 0.41011111068725586} +{"epoch": 0, "iter": 18795, "iter_tflops": 46.348342657417824, "iter_time": 0.4451312026977539, "loss": 0.3058544993400574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40820985905185, "step_time": 0.409280424118042} +{"epoch": 0, "iter": 18796, "iter_tflops": 46.77812697967259, "iter_time": 0.44104146194458016, "loss": 0.35393548011779785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.46965780998104, "step_time": 0.4087821159362792} +{"epoch": 0, "iter": 18797, "iter_tflops": 44.98517383102818, "iter_time": 0.4586198463439941, "loss": 0.46509313583374023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23808962109408, "step_time": 0.4190067825317383} +{"epoch": 0, "iter": 18798, "iter_tflops": 36.05947005212856, "iter_time": 0.5721407852172851, "loss": 0.481586754322052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.677717410333884, "step_time": 0.5199667434692383} +{"epoch": 0, "iter": 18799, "iter_tflops": 41.28164393036099, "iter_time": 0.49976433944702153, "loss": 0.5029095411300659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.184635279303386, "step_time": 0.4565953311920166} +{"epoch": 0, "iter": 18800, "iter_tflops": 39.152666829497775, "iter_time": 0.5269396743774414, "loss": 0.303148478269577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.709093943561015, "step_time": 0.48306090354919434} +{"epoch": 0, "iter": 18801, "iter_tflops": 21.483647078090346, "iter_time": 0.960316162109375, "loss": 0.0396389476954937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.22514211627285, "step_time": 0.8883086013793946} +{"epoch": 0, "iter": 18802, "iter_tflops": 16.051643181999257, "iter_time": 1.2852947998046873, "loss": 0.02342892810702324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.026509673734147, "step_time": 1.0301891765594484} +{"epoch": 0, "iter": 18803, "iter_tflops": 42.248503198477806, "iter_time": 0.48832720565795895, "loss": 0.01956200785934925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.746534880894465, "step_time": 0.4413395252227783} +{"epoch": 0, "iter": 18804, "iter_tflops": 44.0218916381473, "iter_time": 0.46865531539917, "loss": 0.029190361499786377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.956487868006015, "step_time": 0.4302044296264648} +{"epoch": 0, "iter": 18805, "iter_tflops": 21.25090002426968, "iter_time": 0.9708338699340819, "loss": 0.17854958772659302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.188956394466736, "step_time": 0.8896947822570801} +{"epoch": 0, "iter": 18806, "iter_tflops": 44.96487007347792, "iter_time": 0.45882693481445314, "loss": 0.2920670211315155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93819423650297, "step_time": 0.4215744743347168} +{"epoch": 0, "iter": 18807, "iter_tflops": 51.66198573105937, "iter_time": 0.3993476676940918, "loss": 0.2577859163284302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.98077857420025, "step_time": 0.3685388813018799} +{"epoch": 0, "iter": 18808, "iter_tflops": 50.72523427739626, "iter_time": 0.40672248840332037, "loss": 0.16441284120082855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81440561616838, "step_time": 0.37638086700439455} +{"epoch": 0, "iter": 18809, "iter_tflops": 32.93387506575392, "iter_time": 0.6264399032592773, "loss": 0.0660441592335701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.245313791235176, "step_time": 0.5853570671081544} +{"epoch": 0, "iter": 18810, "iter_tflops": 40.88785841446591, "iter_time": 0.5045775032043458, "loss": 0.08696168661117554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07694713523008, "step_time": 0.4576861305236816} +{"epoch": 0, "iter": 18811, "iter_tflops": 48.32963294469186, "iter_time": 0.4268828926086426, "loss": 0.10324452817440033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.443071004155435, "step_time": 0.3933997974395752} +{"epoch": 0, "iter": 18812, "iter_tflops": 44.1785143209728, "iter_time": 0.4669938278198242, "loss": 0.08566957712173462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.006683199091526, "step_time": 0.4297546119689941} +{"epoch": 0, "iter": 18813, "iter_tflops": 28.862249937386206, "iter_time": 0.7148123779296875, "loss": 0.13730068504810333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.7161321235908, "step_time": 0.6716696434020996} +{"epoch": 0, "iter": 18814, "iter_tflops": 12.265298701786135, "iter_time": 1.6820702056884766, "loss": 0.14707323908805847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.183021569250904, "step_time": 1.200667381286621} +{"epoch": 0, "iter": 18815, "iter_tflops": 46.970192831403864, "iter_time": 0.43923799896240234, "loss": 0.15392152965068817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43105363898401, "step_time": 0.3861255226135254} +{"epoch": 0, "iter": 18816, "iter_tflops": 50.065792914234144, "iter_time": 0.41207963180541995, "loss": 0.1155022606253624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.39254196017609, "step_time": 0.3793000431060791} +{"epoch": 0, "iter": 18817, "iter_tflops": 30.0218601690185, "iter_time": 0.5197158126831055, "loss": 0.0036198939196765423, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 32.559343270275896, "step_time": 0.47921222877502445} +{"epoch": 0, "iter": 18818, "iter_tflops": 6.4218855675981805, "iter_time": 2.4296346130371096, "loss": 0.002846846589818597, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 7.812832413729836, "step_time": 1.9970779647827146} +{"epoch": 0, "iter": 18819, "iter_tflops": 10.992911255052798, "iter_time": 1.419354263305664, "loss": 0.0059281522408127785, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 12.08120777155455, "step_time": 1.291496326446533} +{"epoch": 0, "iter": 18820, "iter_tflops": 25.197190584272864, "iter_time": 0.6192291717529297, "loss": 0.008366327732801437, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 38.45636139275341, "step_time": 0.40572833442687983} +{"epoch": 0, "iter": 18821, "iter_tflops": 17.0664111865124, "iter_time": 0.8711208572387695, "loss": 0.30811992287635803, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.93230674271804, "step_time": 0.82905712890625} +{"epoch": 0, "iter": 18822, "iter_tflops": 7.775870563895042, "iter_time": 1.9119282684326173, "loss": 0.24103067815303802, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 9.571559825659909, "step_time": 1.5532376136779786} +{"epoch": 0, "iter": 18823, "iter_tflops": 9.696608139327886, "iter_time": 1.5332069244384765, "loss": 0.3172023296356201, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 11.030999748264325, "step_time": 1.3477388343811034} +{"epoch": 0, "iter": 18824, "iter_tflops": 25.57457295250979, "iter_time": 0.581315933227539, "loss": 0.34435853362083435, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.271445325087296, "step_time": 0.5451455383300782} +{"epoch": 0, "iter": 18825, "iter_tflops": 21.362628041470884, "iter_time": 0.7783450088500976, "loss": 0.34856513142585754, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 22.5077279659624, "step_time": 0.7387460403442383} +{"epoch": 0, "iter": 18826, "iter_tflops": 10.338525297670651, "iter_time": 1.6083043212890624, "loss": 0.31359151005744934, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 13.685117148967615, "step_time": 1.2150056686401367} +{"epoch": 0, "iter": 18827, "iter_tflops": 30.16886533904308, "iter_time": 0.5511475067138671, "loss": 0.28802362084388733, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 32.14895296549058, "step_time": 0.5172017555236816} +{"epoch": 0, "iter": 18828, "iter_tflops": 30.87795328446722, "iter_time": 0.5384908370971679, "loss": 0.17761768400669098, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 32.77159258163922, "step_time": 0.5073752479553223} +{"epoch": 0, "iter": 18829, "iter_tflops": 29.92792119999844, "iter_time": 0.689359390258789, "loss": 0.0008651692769490182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.937051803234986, "step_time": 0.6459924240112305} +{"epoch": 0, "iter": 18830, "iter_tflops": 15.537049215217614, "iter_time": 1.3278643341064456, "loss": 0.006828800309449434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.8119903352147, "step_time": 1.1582699699401857} +{"epoch": 0, "iter": 18831, "iter_tflops": 40.03778979201982, "iter_time": 0.5152905197143555, "loss": 0.003937472589313984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31015772894987, "step_time": 0.4656064109802246} +{"epoch": 0, "iter": 18832, "iter_tflops": 42.09920092281506, "iter_time": 0.4900590286254883, "loss": 0.01134982518851757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14619819235003, "step_time": 0.44708111000061035} +{"epoch": 0, "iter": 18833, "iter_tflops": 29.02265939480204, "iter_time": 0.7108615798950195, "loss": 0.4359581470489502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.274607152996538, "step_time": 0.6596755447387695} +{"epoch": 0, "iter": 18834, "iter_tflops": 10.05804732983365, "iter_time": 2.051202667236328, "loss": 0.4529830813407898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.61631870482499, "step_time": 1.6352704772949218} +{"epoch": 0, "iter": 18835, "iter_tflops": 20.572532767246532, "iter_time": 1.002846549987793, "loss": 0.5063794851303101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.906068962139198, "step_time": 0.7963807067871093} +{"epoch": 0, "iter": 18836, "iter_tflops": 50.01549600658478, "iter_time": 0.41249402999877927, "loss": 0.47177648544311523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.242223633001956, "step_time": 0.38035117530822754} +{"epoch": 0, "iter": 18837, "iter_tflops": 18.999865589281974, "iter_time": 0.7846240539550781, "loss": 0.37289226055145264, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 20.066630559192113, "step_time": 0.7429125442504882} +{"epoch": 0, "iter": 18838, "iter_tflops": 9.716259882560179, "iter_time": 1.5343096771240234, "loss": 0.32617074251174927, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 10.675518223845735, "step_time": 1.3964428939819338} +{"epoch": 0, "iter": 18839, "iter_tflops": 25.128265327145076, "iter_time": 0.5932662429809571, "loss": 0.2872108519077301, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 26.75074223330248, "step_time": 0.5572836608886719} +{"epoch": 0, "iter": 18840, "iter_tflops": 26.92680571414536, "iter_time": 0.5536398086547851, "loss": 0.35595276951789856, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.557458533282578, "step_time": 0.5220265502929688} +{"epoch": 0, "iter": 18841, "iter_tflops": 40.38735486028656, "iter_time": 0.5108305206298828, "loss": 0.44974464178085327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.609437800305, "step_time": 0.46248270606994624} +{"epoch": 0, "iter": 18842, "iter_tflops": 37.893063174659545, "iter_time": 0.5444556808471679, "loss": 0.6024201512336731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59552114762931, "step_time": 0.49599314880371087} +{"epoch": 0, "iter": 18843, "iter_tflops": 42.389245760894596, "iter_time": 0.48670584106445314, "loss": 0.4254762530326843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.286412301781844, "step_time": 0.4457267799377441} +{"epoch": 0, "iter": 18844, "iter_tflops": 38.823044646689475, "iter_time": 0.531413589477539, "loss": 0.45411545038223267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.5869645149996, "step_time": 0.4844462089538575} +{"epoch": 0, "iter": 18845, "iter_tflops": 19.529297937240706, "iter_time": 1.0564175720214843, "loss": 0.14465878903865814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.112398927484335, "step_time": 0.9772027130126952} +{"epoch": 0, "iter": 18846, "iter_tflops": 17.347131622923534, "iter_time": 1.189308639526367, "loss": 0.07569326460361481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.539356008818462, "step_time": 0.8764510593414305} +{"epoch": 0, "iter": 18847, "iter_tflops": 50.6163197965103, "iter_time": 0.4075976600646972, "loss": 0.0974850058555603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31765302652594, "step_time": 0.3729567756652832} +{"epoch": 0, "iter": 18848, "iter_tflops": 53.39072595440975, "iter_time": 0.3864171752929687, "loss": 0.08362150192260742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.00829769358998, "step_time": 0.35565762710571286} +{"epoch": 0, "iter": 18849, "iter_tflops": 25.203096128284095, "iter_time": 0.8185936126708984, "loss": 0.0013544992543756962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.58377023314265, "step_time": 0.7760785369873047} +{"epoch": 0, "iter": 18850, "iter_tflops": 17.512645237214972, "iter_time": 1.178068374633789, "loss": 0.0027938007842749357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.77733464110546, "step_time": 1.043168550491333} +{"epoch": 0, "iter": 18851, "iter_tflops": 55.01855019514001, "iter_time": 0.374984317779541, "loss": 0.0134167755022645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.6215746273469, "step_time": 0.3403259258270264} +{"epoch": 0, "iter": 18852, "iter_tflops": 55.10890704055259, "iter_time": 0.37436949157714844, "loss": 0.0017102418933063745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.41977332556618, "step_time": 0.341462610244751} +{"epoch": 0, "iter": 18853, "iter_tflops": 30.585724884628185, "iter_time": 0.6745334167480468, "loss": 0.6095013618469238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.48581922006444, "step_time": 0.6350799827575684} +{"epoch": 0, "iter": 18854, "iter_tflops": 7.9706890177342355, "iter_time": 2.5883701477050782, "loss": 0.48883479833602905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.931115686987694, "step_time": 2.310024215698242} +{"epoch": 0, "iter": 18855, "iter_tflops": 14.977428824813922, "iter_time": 1.3774789886474608, "loss": 0.5846583247184753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.553427127667103, "step_time": 1.1753313674926757} +{"epoch": 0, "iter": 18856, "iter_tflops": 33.80011784051515, "iter_time": 0.61038525390625, "loss": 0.5212553143501282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.53019856377194, "step_time": 0.5647681732177734} +{"epoch": 0, "iter": 18857, "iter_tflops": 15.09917322822588, "iter_time": 1.0252266311645508, "loss": 0.35669034719467163, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 15.969689649892395, "step_time": 0.9693409729003907} +{"epoch": 0, "iter": 18858, "iter_tflops": 7.848467987130558, "iter_time": 1.972368942260742, "loss": 0.28682997822761536, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 9.426911817755974, "step_time": 1.6421151275634764} +{"epoch": 0, "iter": 18859, "iter_tflops": 11.595721926934315, "iter_time": 1.3349815216064453, "loss": 0.37920746207237244, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.974281308643828, "step_time": 1.193135414123535} +{"epoch": 0, "iter": 18860, "iter_tflops": 14.658550175705848, "iter_time": 1.0560440368652344, "loss": 0.2818297743797302, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 17.468755629319407, "step_time": 0.8861578254699706} +{"epoch": 0, "iter": 18861, "iter_tflops": 17.6511153574189, "iter_time": 0.9257371368408204, "loss": 0.38595712184906006, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 18.51981151990994, "step_time": 0.8823142166137695} +{"epoch": 0, "iter": 18862, "iter_tflops": 15.547936363112274, "iter_time": 1.0509621734619141, "loss": 0.2539953887462616, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 18.567994396380996, "step_time": 0.8800246620178223} +{"epoch": 0, "iter": 18863, "iter_tflops": 30.063796208221625, "iter_time": 0.5435206146240235, "loss": 0.2859959006309509, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.938184267053554, "step_time": 0.5116224784851074} +{"epoch": 0, "iter": 18864, "iter_tflops": 28.23312892223266, "iter_time": 0.5787630920410157, "loss": 0.2172858864068985, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.06712640963443, "step_time": 0.5434604148864746} +{"epoch": 0, "iter": 18865, "iter_tflops": 32.43713518984885, "iter_time": 0.6360331573486327, "loss": 0.171834334731102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.72288107191867, "step_time": 0.5941642189025879} +{"epoch": 0, "iter": 18866, "iter_tflops": 13.90254924250306, "iter_time": 1.4839791717529296, "loss": 0.19782553613185883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.310438139289182, "step_time": 1.1918296546936034} +{"epoch": 0, "iter": 18867, "iter_tflops": 12.682087194858212, "iter_time": 1.626790069580078, "loss": 0.1275121122598648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.293307439025096, "step_time": 1.4434093437194824} +{"epoch": 0, "iter": 18868, "iter_tflops": 24.184921299931524, "iter_time": 0.8530560531616211, "loss": 0.12389764189720154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.029433125925156, "step_time": 0.6870290699005128} +{"epoch": 0, "iter": 18869, "iter_tflops": 12.619778803965772, "iter_time": 1.2590904388427735, "loss": 0.36100977659225464, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.324736024929816, "step_time": 1.1924771194458006} +{"epoch": 0, "iter": 18870, "iter_tflops": 13.002982987196457, "iter_time": 1.2219844360351564, "loss": 0.26577985286712646, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 16.568947410116454, "step_time": 0.9589892730712891} +{"epoch": 0, "iter": 18871, "iter_tflops": 28.441449748264297, "iter_time": 0.5586720428466796, "loss": 0.344111829996109, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.332737815216216, "step_time": 0.5238380699157714} +{"epoch": 0, "iter": 18872, "iter_tflops": 27.115473546500176, "iter_time": 0.5859917144775391, "loss": 0.29686257243156433, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 28.754464531673698, "step_time": 0.552590461730957} +{"epoch": 0, "iter": 18873, "iter_tflops": 24.60344569562179, "iter_time": 0.8385448837280274, "loss": 0.7459431290626526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.976689888038063, "step_time": 0.7942156448364257} +{"epoch": 0, "iter": 18874, "iter_tflops": 12.709198275252428, "iter_time": 1.6233198242187499, "loss": 0.6275973916053772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.341579006572097, "step_time": 1.262490821838379} +{"epoch": 0, "iter": 18875, "iter_tflops": 46.158495807506256, "iter_time": 0.44696199798583985, "loss": 0.6636039614677429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96692889825495, "step_time": 0.4128949680328369} +{"epoch": 0, "iter": 18876, "iter_tflops": 45.81138024725471, "iter_time": 0.4503486557006836, "loss": 0.6019903421401978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.186964830680026, "step_time": 0.4194422969818115} +{"epoch": 0, "iter": 18877, "iter_tflops": 24.235075258547003, "iter_time": 0.8512906723022461, "loss": 0.18140387535095215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.41083964460511, "step_time": 0.8119012908935548} +{"epoch": 0, "iter": 18878, "iter_tflops": 13.743221295018268, "iter_time": 1.5011832427978513, "loss": 0.13234752416610718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.471854792916737, "step_time": 1.1808187370300294} +{"epoch": 0, "iter": 18879, "iter_tflops": 38.63730408488066, "iter_time": 0.5339682464599609, "loss": 0.13434988260269165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28985184378401, "step_time": 0.48784974670410153} +{"epoch": 0, "iter": 18880, "iter_tflops": 43.51857247763249, "iter_time": 0.4740756034851074, "loss": 0.1493796557188034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64114105925633, "step_time": 0.4330520439147949} +{"epoch": 0, "iter": 18881, "iter_tflops": 19.079329401502417, "iter_time": 1.0813322143554687, "loss": 0.0221632719039917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.18642629807337, "step_time": 1.0220280303955078} +{"epoch": 0, "iter": 18882, "iter_tflops": 9.206378095068372, "iter_time": 2.2409565734863284, "loss": 0.06252370774745941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.346138082787002, "step_time": 1.99408642578125} +{"epoch": 0, "iter": 18883, "iter_tflops": 11.669476707510178, "iter_time": 1.7679536132812503, "loss": 0.036218903958797455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.127152845998802, "step_time": 1.4603858070373537} +{"epoch": 0, "iter": 18884, "iter_tflops": 50.33793336956612, "iter_time": 0.40985181808471677, "loss": 0.03287079185247421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.18424965296767, "step_time": 0.3738583679199219} +{"epoch": 0, "iter": 18885, "iter_tflops": 17.97938361035449, "iter_time": 0.8064523239135741, "loss": 0.34285590052604675, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 19.12773341623283, "step_time": 0.7580362701416015} +{"epoch": 0, "iter": 18886, "iter_tflops": 6.985966614010719, "iter_time": 2.075520324707031, "loss": 0.34974855184555054, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 7.834269276091145, "step_time": 1.8507808685302733} +{"epoch": 0, "iter": 18887, "iter_tflops": 6.28190374179299, "iter_time": 2.308140380859375, "loss": 0.3446698486804962, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 7.329916082259789, "step_time": 1.9781284713745115} +{"epoch": 0, "iter": 18888, "iter_tflops": 21.05950463291879, "iter_time": 0.6885022201538086, "loss": 0.40598130226135254, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.67194167240755, "step_time": 0.6395356826782226} +{"epoch": 0, "iter": 18889, "iter_tflops": 14.903336954387196, "iter_time": 1.2344086303710935, "loss": 0.4255087077617645, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 16.02565929305641, "step_time": 1.1479594955444337} +{"epoch": 0, "iter": 18890, "iter_tflops": 19.637384928091738, "iter_time": 0.9368257446289062, "loss": 0.3918733596801758, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 23.185013561855776, "step_time": 0.7934784126281739} +{"epoch": 0, "iter": 18891, "iter_tflops": 32.6672303127455, "iter_time": 0.5631578674316406, "loss": 0.293692409992218, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 34.83256033365422, "step_time": 0.5281497421264648} +{"epoch": 0, "iter": 18892, "iter_tflops": 32.57894294767745, "iter_time": 0.5646839981079101, "loss": 0.33790847659111023, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 34.62748250530541, "step_time": 0.5312776565551758} +{"epoch": 0, "iter": 18893, "iter_tflops": 25.95352704925451, "iter_time": 0.794924461364746, "loss": 0.06962849944829941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.58210568778998, "step_time": 0.7479883422851563} +{"epoch": 0, "iter": 18894, "iter_tflops": 28.04714694084099, "iter_time": 0.7355861740112304, "loss": 0.02677321992814541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.174716013694564, "step_time": 0.5703180503845214} +{"epoch": 0, "iter": 18895, "iter_tflops": 47.841826238999325, "iter_time": 0.43123549270629885, "loss": 0.06146060302853584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.67567808390101, "step_time": 0.39924185371398924} +{"epoch": 0, "iter": 18896, "iter_tflops": 50.35881836729728, "iter_time": 0.40968184280395503, "loss": 0.05298611521720886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.73767571773782, "step_time": 0.37690846824645996} +{"epoch": 0, "iter": 18897, "iter_tflops": 44.21259445389794, "iter_time": 0.4666338577270508, "loss": 0.08895207941532135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.07824753091231, "step_time": 0.42911492347717284} +{"epoch": 0, "iter": 18898, "iter_tflops": 14.118031051630698, "iter_time": 1.4613293762207034, "loss": 0.05307905375957489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.245364641304251, "step_time": 1.3532699279785154} +{"epoch": 0, "iter": 18899, "iter_tflops": 10.297749134627017, "iter_time": 2.0034566040039063, "loss": 0.07070453464984894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.025509966597573, "step_time": 1.7156106948852539} +{"epoch": 0, "iter": 18900, "iter_tflops": 14.926277066014958, "iter_time": 1.3821995544433594, "loss": 0.06591274589300156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.876207879261834, "step_time": 1.2224958152770997} +{"epoch": 0, "iter": 18901, "iter_tflops": 14.254537136507603, "iter_time": 1.0515567932128906, "loss": 0.35843563079833984, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 15.509525220186388, "step_time": 0.9664677124023437} +{"epoch": 0, "iter": 18902, "iter_tflops": 12.766874675123528, "iter_time": 1.1740896453857421, "loss": 0.22137531638145447, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 16.472046974182625, "step_time": 0.909993480682373} +{"epoch": 0, "iter": 18903, "iter_tflops": 27.767954303991164, "iter_time": 0.5398112945556641, "loss": 0.2833128571510315, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.533740150067988, "step_time": 0.5075366439819337} +{"epoch": 0, "iter": 18904, "iter_tflops": 26.37511343473521, "iter_time": 0.5683181381225586, "loss": 0.3192111849784851, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.090004906511734, "step_time": 0.5336223831176757} +{"epoch": 0, "iter": 18905, "iter_tflops": 20.528081918649615, "iter_time": 1.005018081665039, "loss": 0.07024764269590378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.39757200047657, "step_time": 0.964179183959961} +{"epoch": 0, "iter": 18906, "iter_tflops": 17.206827460388315, "iter_time": 1.1990062408447266, "loss": 0.08711743354797363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.60036808702702, "step_time": 0.9551269416809082} +{"epoch": 0, "iter": 18907, "iter_tflops": 43.9766543295581, "iter_time": 0.46913740539550774, "loss": 0.12252236902713776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.69807205555651, "step_time": 0.4236531887054444} +{"epoch": 0, "iter": 18908, "iter_tflops": 40.132278684533915, "iter_time": 0.5140773010253906, "loss": 0.12072554230690002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.140383001151754, "step_time": 0.46739724731445315} +{"epoch": 0, "iter": 18909, "iter_tflops": 22.382926356825646, "iter_time": 0.9217335205078125, "loss": 0.14798973500728607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.38470754801861, "step_time": 0.8460668830871583} +{"epoch": 0, "iter": 18910, "iter_tflops": 20.46904229239272, "iter_time": 1.007916893005371, "loss": 0.23848815262317657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.705253211829927, "step_time": 0.8350893363952636} +{"epoch": 0, "iter": 18911, "iter_tflops": 39.058302514302824, "iter_time": 0.5282127532958985, "loss": 0.15906858444213867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.883598601998955, "step_time": 0.4810952014923095} +{"epoch": 0, "iter": 18912, "iter_tflops": 37.2137996492144, "iter_time": 0.5543936309814453, "loss": 0.13715527951717377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.515973889026085, "step_time": 0.5092088756561279} +{"epoch": 0, "iter": 18913, "iter_tflops": 18.339759209847973, "iter_time": 1.1249380798339843, "loss": 0.19706326723098755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.374301997806747, "step_time": 1.0648689956665038} +{"epoch": 0, "iter": 18914, "iter_tflops": 15.83922611037849, "iter_time": 1.3025316619873049, "loss": 0.20784461498260498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.89879130006057, "step_time": 1.0916620635986327} +{"epoch": 0, "iter": 18915, "iter_tflops": 34.54295503618859, "iter_time": 0.5972590789794923, "loss": 0.24560633301734924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.741814203766424, "step_time": 0.5466375675201416} +{"epoch": 0, "iter": 18916, "iter_tflops": 42.68083167119498, "iter_time": 0.48338077545166025, "loss": 0.20336206257343292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54285076186246, "step_time": 0.4432709465026855} +{"epoch": 0, "iter": 18917, "iter_tflops": 15.162927752752353, "iter_time": 1.3606273040771484, "loss": 0.4858928918838501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.011866373785892, "step_time": 1.2884877395629883} +{"epoch": 0, "iter": 18918, "iter_tflops": 21.004802947645995, "iter_time": 0.9822083816528321, "loss": 0.43771904706954956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.566200207710594, "step_time": 0.7765918102264404} +{"epoch": 0, "iter": 18919, "iter_tflops": 45.39876006308337, "iter_time": 0.4544417839050293, "loss": 0.718428373336792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91312582786524, "step_time": 0.4217905349731445} +{"epoch": 0, "iter": 18920, "iter_tflops": 48.15782574195489, "iter_time": 0.42840583419799805, "loss": 0.6839881539344788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87168367161211, "step_time": 0.3977332534790039} +{"epoch": 0, "iter": 18921, "iter_tflops": 23.972146714346415, "iter_time": 0.8606277008056641, "loss": 0.2997916042804718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.53197105438837, "step_time": 0.8080493850708008} +{"epoch": 0, "iter": 18922, "iter_tflops": 12.46342994666258, "iter_time": 1.6553303222656248, "loss": 0.48402780294418335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.37965301257193, "step_time": 1.3414537696838378} +{"epoch": 0, "iter": 18923, "iter_tflops": 36.22891224655344, "iter_time": 0.5694648895263672, "loss": 0.22569260001182556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.552650647643716, "step_time": 0.5216108951568603} +{"epoch": 0, "iter": 18924, "iter_tflops": 39.67769237638462, "iter_time": 0.5199670715332031, "loss": 0.3124189078807831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.951806561873326, "step_time": 0.4803312168121338} +{"epoch": 0, "iter": 18925, "iter_tflops": 14.148277471713872, "iter_time": 1.458205322265625, "loss": 0.03645619377493858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.221138104402007, "step_time": 1.355423843383789} +{"epoch": 0, "iter": 18926, "iter_tflops": 22.761578240682876, "iter_time": 0.9063999557495117, "loss": 0.07800861448049545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.135475168730455, "step_time": 0.7332768821716309} +{"epoch": 0, "iter": 18927, "iter_tflops": 39.7980422125541, "iter_time": 0.5183946838378907, "loss": 0.05210275202989578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85734997453306, "step_time": 0.4704135913848877} +{"epoch": 0, "iter": 18928, "iter_tflops": 40.78275196781965, "iter_time": 0.5058779144287109, "loss": 0.046735286712646484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.879189950635336, "step_time": 0.45970289421081545} +{"epoch": 0, "iter": 18929, "iter_tflops": 17.551616401620112, "iter_time": 1.175452621459961, "loss": 0.12414667755365372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.60397272339914, "step_time": 1.1089617156982423} +{"epoch": 0, "iter": 18930, "iter_tflops": 17.089002445060153, "iter_time": 1.207273132324219, "loss": 0.1653904914855957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.87062691430627, "step_time": 0.9885229415893555} +{"epoch": 0, "iter": 18931, "iter_tflops": 35.65206507888836, "iter_time": 0.5786787796020507, "loss": 0.22385334968566895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64380644138296, "step_time": 0.5338783988952637} +{"epoch": 0, "iter": 18932, "iter_tflops": 38.788525956716434, "iter_time": 0.5318865051269531, "loss": 0.1422940045595169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59651055909066, "step_time": 0.48433764266967777} +{"epoch": 0, "iter": 18933, "iter_tflops": 20.840061586487106, "iter_time": 0.9899727706909179, "loss": 0.0028532727155834436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.47765764301739, "step_time": 0.917848907470703} +{"epoch": 0, "iter": 18934, "iter_tflops": 18.521560766918544, "iter_time": 1.1138960571289063, "loss": 0.006014919374138117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.912909671524506, "step_time": 0.8281286201477052} +{"epoch": 0, "iter": 18935, "iter_tflops": 46.85786891328408, "iter_time": 0.4402909049987792, "loss": 0.3866763114929199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85576830846504, "step_time": 0.4056785335540772} +{"epoch": 0, "iter": 18936, "iter_tflops": 52.22430549707057, "iter_time": 0.39504773330688475, "loss": 0.3436482846736908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.537344266993045, "step_time": 0.3649109058380127} +{"epoch": 0, "iter": 18937, "iter_tflops": 33.34205578752428, "iter_time": 0.6187708892822266, "loss": 0.17964647710323334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.51931285828943, "step_time": 0.5808415718078612} +{"epoch": 0, "iter": 18938, "iter_tflops": 8.211390076398358, "iter_time": 2.5124970703125, "loss": 0.2879706025123596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.340910681344957, "step_time": 2.2086811676025393} +{"epoch": 0, "iter": 18939, "iter_tflops": 16.788852551637223, "iter_time": 1.2288566741943359, "loss": 0.2146211713552475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.705872990134633, "step_time": 1.0469515113830568} +{"epoch": 0, "iter": 18940, "iter_tflops": 28.042599732040845, "iter_time": 0.735705451965332, "loss": 0.31525760889053345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.946767298381424, "step_time": 0.5903577098846435} +{"epoch": 0, "iter": 18941, "iter_tflops": 14.569149062982042, "iter_time": 1.1103106536865235, "loss": 0.4492979347705841, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 15.510961726404952, "step_time": 1.0428935165405275} +{"epoch": 0, "iter": 18942, "iter_tflops": 23.478559428862603, "iter_time": 0.6889810028076172, "loss": 0.30649590492248535, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 29.52412972311031, "step_time": 0.5479003639221192} +{"epoch": 0, "iter": 18943, "iter_tflops": 29.62012857671475, "iter_time": 0.5461246185302734, "loss": 0.4745884835720062, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 31.480265136350578, "step_time": 0.5138546752929688} +{"epoch": 0, "iter": 18944, "iter_tflops": 28.593767740013007, "iter_time": 0.565727523803711, "loss": 0.3817217946052551, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 30.491832449741533, "step_time": 0.5305119476318358} +{"epoch": 0, "iter": 18945, "iter_tflops": 26.924703248602345, "iter_time": 0.7662514724731446, "loss": 0.2498619556427002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.42333710990742, "step_time": 0.7258505020141602} +{"epoch": 0, "iter": 18946, "iter_tflops": 12.264604343414367, "iter_time": 1.6821654357910154, "loss": 0.20141732692718506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.42154727786601, "step_time": 1.430574203491211} +{"epoch": 0, "iter": 18947, "iter_tflops": 9.415580156793062, "iter_time": 2.1911654052734377, "loss": 0.3679564297199249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.274489518703007, "step_time": 1.829891586303711} +{"epoch": 0, "iter": 18948, "iter_tflops": 22.697336819074806, "iter_time": 0.9089653854370117, "loss": 0.23800697922706604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.82757679210907, "step_time": 0.7413902282714845} +{"epoch": 0, "iter": 18949, "iter_tflops": 13.954294139488757, "iter_time": 1.179805374145508, "loss": 0.39447149634361267, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 15.177149941211871, "step_time": 1.0847459030151367} +{"epoch": 0, "iter": 18950, "iter_tflops": 13.1555405404325, "iter_time": 1.2514385986328125, "loss": 0.2503563165664673, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 15.562815296598963, "step_time": 1.057864589691162} +{"epoch": 0, "iter": 18951, "iter_tflops": 24.681775569270705, "iter_time": 0.6670245895385742, "loss": 0.5223713517189026, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.596325323184114, "step_time": 0.6190084915161133} +{"epoch": 0, "iter": 18952, "iter_tflops": 25.18502350646955, "iter_time": 0.6536960830688477, "loss": 0.4053603708744049, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.05146064458479, "step_time": 0.6085937995910644} +{"epoch": 0, "iter": 18953, "iter_tflops": 18.248717779299135, "iter_time": 1.130550308227539, "loss": 0.26422542333602905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.512161020443422, "step_time": 1.0573453903198242} +{"epoch": 0, "iter": 18954, "iter_tflops": 18.40170639517719, "iter_time": 1.1211511077880858, "loss": 0.28812941908836365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.867908315971274, "step_time": 0.9434415588378906} +{"epoch": 0, "iter": 18955, "iter_tflops": 51.77214395857972, "iter_time": 0.3984979553222657, "loss": 0.3194204270839691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.40602227781134, "step_time": 0.3657604751586914} +{"epoch": 0, "iter": 18956, "iter_tflops": 43.974982672029284, "iter_time": 0.4691552391052246, "loss": 0.24484707415103912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44661565244014, "step_time": 0.4348275051116943} +{"epoch": 0, "iter": 18957, "iter_tflops": 28.970484318128932, "iter_time": 0.7121418228149413, "loss": 0.2241557091474533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.69942036630792, "step_time": 0.6720352783203124} +{"epoch": 0, "iter": 18958, "iter_tflops": 14.105649646560996, "iter_time": 1.462612075805664, "loss": 0.264051616191864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.455003017179674, "step_time": 1.1819587478637696} +{"epoch": 0, "iter": 18959, "iter_tflops": 47.485758858526125, "iter_time": 0.43446907043457034, "loss": 0.29711538553237915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57555703635717, "step_time": 0.4000168819427491} +{"epoch": 0, "iter": 18960, "iter_tflops": 51.534237313444514, "iter_time": 0.4003376121520996, "loss": 0.49841174483299255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.77763904214675, "step_time": 0.36988108253479} +{"epoch": 0, "iter": 18961, "iter_tflops": 26.341517335506104, "iter_time": 0.783215835571289, "loss": 0.42764514684677124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.74594631104657, "step_time": 0.7435714492797852} +{"epoch": 0, "iter": 18962, "iter_tflops": 14.066629299493815, "iter_time": 1.4666693115234375, "loss": 0.5268150568008423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.86861581645902, "step_time": 1.2230460243225096} +{"epoch": 0, "iter": 18963, "iter_tflops": 36.25501466735764, "iter_time": 0.5690548934936523, "loss": 0.764807939529419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.875805929420984, "step_time": 0.517383737564087} +{"epoch": 0, "iter": 18964, "iter_tflops": 36.61172083296952, "iter_time": 0.5635106201171876, "loss": 0.6961966753005981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.96667203593596, "step_time": 0.5162074413299561} +{"epoch": 0, "iter": 18965, "iter_tflops": 20.4257834683531, "iter_time": 1.010051513671875, "loss": 0.047071393579244614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.018050082095844, "step_time": 0.9370082015991211} +{"epoch": 0, "iter": 18966, "iter_tflops": 15.034949862338733, "iter_time": 1.3722089996337892, "loss": 0.08620557188987732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.322002641889906, "step_time": 1.1260283012390135} +{"epoch": 0, "iter": 18967, "iter_tflops": 43.06889688284369, "iter_time": 0.4790253524780273, "loss": 0.05981883406639099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.46017660444401, "step_time": 0.43470326042175295} +{"epoch": 0, "iter": 18968, "iter_tflops": 44.211880631912486, "iter_time": 0.4666413917541504, "loss": 0.0451742485165596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.482248661942606, "step_time": 0.42553912162780766} +{"epoch": 0, "iter": 18969, "iter_tflops": 20.651329203617347, "iter_time": 0.9990201263427735, "loss": 0.46812984347343445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.46604957109287, "step_time": 0.918323154449463} +{"epoch": 0, "iter": 18970, "iter_tflops": 39.768102164753365, "iter_time": 0.5187849655151368, "loss": 0.6104376912117004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.993522581731554, "step_time": 0.47986515808105473} +{"epoch": 0, "iter": 18971, "iter_tflops": 45.47190035622294, "iter_time": 0.4537108268737793, "loss": 0.605614423751831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.7671479289614, "step_time": 0.423053108215332} +{"epoch": 0, "iter": 18972, "iter_tflops": 42.36388195573409, "iter_time": 0.48699723815917967, "loss": 0.4404975175857544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.271293501853684, "step_time": 0.45572131729125975} +{"epoch": 0, "iter": 18973, "iter_tflops": 28.43392900035143, "iter_time": 0.7255801162719727, "loss": 0.006817459594458342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.078062323889547, "step_time": 0.6859183044433594} +{"epoch": 0, "iter": 18974, "iter_tflops": 12.75733723008868, "iter_time": 1.6171943359375, "loss": 0.004507655277848244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.18722107068676, "step_time": 1.2745296688079835} +{"epoch": 0, "iter": 18975, "iter_tflops": 41.39176650452596, "iter_time": 0.49843471908569337, "loss": 0.002988448366522789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72559454959218, "step_time": 0.4511935539245605} +{"epoch": 0, "iter": 18976, "iter_tflops": 44.75338428330035, "iter_time": 0.46099515914917, "loss": 0.0022600251249969006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.919998173958284, "step_time": 0.4217312812805176} +{"epoch": 0, "iter": 18977, "iter_tflops": 18.271497551098104, "iter_time": 1.1291408081054688, "loss": 0.45656004548072815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.753823282349682, "step_time": 1.0444101486206054} +{"epoch": 0, "iter": 18978, "iter_tflops": 22.173519869818207, "iter_time": 0.930438362121582, "loss": 0.5385647416114807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.023404323134162, "step_time": 0.7634527931213377} +{"epoch": 0, "iter": 18979, "iter_tflops": 45.44402205336856, "iter_time": 0.4539891624450683, "loss": 0.5850740075111389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95520801249062, "step_time": 0.4214279613494873} +{"epoch": 0, "iter": 18980, "iter_tflops": 44.16002671788726, "iter_time": 0.46718933486938474, "loss": 0.47744220495224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45416835879173, "step_time": 0.4347582988739013} +{"epoch": 0, "iter": 18981, "iter_tflops": 39.11391537867737, "iter_time": 0.5274617309570312, "loss": 0.35890084505081177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71741046459529, "step_time": 0.48296685791015614} +{"epoch": 0, "iter": 18982, "iter_tflops": 47.87009711247039, "iter_time": 0.4309808158874512, "loss": 0.42194661498069763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37556966890459, "step_time": 0.3939068088531494} +{"epoch": 0, "iter": 18983, "iter_tflops": 49.71682631336592, "iter_time": 0.414972053527832, "loss": 0.3677520453929901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.80754703289551, "step_time": 0.38342378807067873} +{"epoch": 0, "iter": 18984, "iter_tflops": 51.565904720793135, "iter_time": 0.40009175872802727, "loss": 0.35873061418533325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66085367308138, "step_time": 0.3706571521759033} +{"epoch": 0, "iter": 18985, "iter_tflops": 31.05577392541408, "iter_time": 0.6643239212036132, "loss": 0.4664855897426605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.0033290345065, "step_time": 0.6251215896606446} +{"epoch": 0, "iter": 18986, "iter_tflops": 7.62105229283319, "iter_time": 2.7071187438964843, "loss": 0.5302928686141968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.71022981021615, "step_time": 2.3686049575805663} +{"epoch": 0, "iter": 18987, "iter_tflops": 15.644224017982184, "iter_time": 1.3187674560546876, "loss": 0.49407652020454407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.808726605224578, "step_time": 1.0415153846740723} +{"epoch": 0, "iter": 18988, "iter_tflops": 29.206902838231276, "iter_time": 0.7063773117065429, "loss": 0.5923062562942505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.06874548917653, "step_time": 0.5883042926788329} +{"epoch": 0, "iter": 18989, "iter_tflops": 13.153363207222455, "iter_time": 1.2859774169921876, "loss": 0.3313479423522949, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 13.907996440442007, "step_time": 1.2162016372680664} +{"epoch": 0, "iter": 18990, "iter_tflops": 12.75342536809226, "iter_time": 1.3263047027587893, "loss": 0.27652138471603394, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 15.968158374214784, "step_time": 1.0592910995483398} +{"epoch": 0, "iter": 18991, "iter_tflops": 30.20672621857285, "iter_time": 0.5599722366333009, "loss": 0.2647143304347992, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.197951857217895, "step_time": 0.525341739654541} +{"epoch": 0, "iter": 18992, "iter_tflops": 29.618652355871028, "iter_time": 0.5710904006958007, "loss": 0.26523107290267944, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 31.50879974627414, "step_time": 0.5368318748474121} +{"epoch": 0, "iter": 18993, "iter_tflops": 22.07568351343793, "iter_time": 0.9345619354248048, "loss": 0.5709301829338074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.194324954176917, "step_time": 0.8894888534545898} +{"epoch": 0, "iter": 18994, "iter_tflops": 12.513859714248325, "iter_time": 1.6486594848632812, "loss": 0.6766680479049683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.531164491004661, "step_time": 1.419782531738281} +{"epoch": 0, "iter": 18995, "iter_tflops": 37.39517779536053, "iter_time": 0.5517046508789062, "loss": 0.5278281569480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91652894619083, "step_time": 0.5042239418029785} +{"epoch": 0, "iter": 18996, "iter_tflops": 39.114551866353935, "iter_time": 0.5274531478881835, "loss": 0.4978386461734772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.630886242484166, "step_time": 0.483947093963623} +{"epoch": 0, "iter": 18997, "iter_tflops": 23.26739893062708, "iter_time": 0.8724318542480469, "loss": 0.009648141451179981, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 24.908727576507932, "step_time": 0.8149440765380859} +{"epoch": 0, "iter": 18998, "iter_tflops": 10.269427320430713, "iter_time": 1.976665237426758, "loss": 0.00827654916793108, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 12.179549130470377, "step_time": 1.6666643218994142} +{"epoch": 0, "iter": 18999, "iter_tflops": 14.186606125346822, "iter_time": 1.4308721771240231, "loss": 0.004517211578786373, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 18.17380008971481, "step_time": 1.1169496688842775} +{"epoch": 0, "iter": 19000, "iter_tflops": 38.809752522777075, "iter_time": 0.5230443038940429, "loss": 0.005629333201795816, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 49.6918964849697, "step_time": 0.408501615524292} +{"epoch": 0, "iter": 19001, "iter_tflops": 25.453137570305106, "iter_time": 0.6629379959106445, "loss": 0.3595416247844696, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.174012614698235, "step_time": 0.6209554786682129} +{"epoch": 0, "iter": 19002, "iter_tflops": 13.54269198615495, "iter_time": 1.2459747314453125, "loss": 0.25066518783569336, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 16.2162476126324, "step_time": 1.0405521926879882} +{"epoch": 0, "iter": 19003, "iter_tflops": 23.988847510847624, "iter_time": 0.7034040298461914, "loss": 0.4027211368083954, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 25.965700467148036, "step_time": 0.6498516006469727} +{"epoch": 0, "iter": 19004, "iter_tflops": 25.010302713957287, "iter_time": 0.6746760406494141, "loss": 0.25677576661109924, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.705459869027333, "step_time": 0.6318502693176269} +{"epoch": 0, "iter": 19005, "iter_tflops": 17.926794708022776, "iter_time": 0.9848444061279298, "loss": 0.06689035147428513, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 19.321978440431675, "step_time": 0.9137316627502442} +{"epoch": 0, "iter": 19006, "iter_tflops": 22.204050830750358, "iter_time": 0.7951298446655274, "loss": 0.06528186798095703, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 27.408218525178683, "step_time": 0.6441536312103271} +{"epoch": 0, "iter": 19007, "iter_tflops": 40.43924835088249, "iter_time": 0.4365833740234375, "loss": 0.06925477832555771, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 44.43933648611487, "step_time": 0.39728548812866205} +{"epoch": 0, "iter": 19008, "iter_tflops": 41.601320224040784, "iter_time": 0.4243880577087403, "loss": 0.05736827477812767, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 45.3655218646184, "step_time": 0.38917448234558105} +{"epoch": 0, "iter": 19009, "iter_tflops": 39.15860269209252, "iter_time": 0.42461920928955077, "loss": 0.0141353290528059, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 43.15949922770551, "step_time": 0.3852569007873535} +{"epoch": 0, "iter": 19010, "iter_tflops": 31.50191197938675, "iter_time": 0.5278249435424804, "loss": 0.028606640174984932, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 35.62942696595885, "step_time": 0.4666787071228027} +{"epoch": 0, "iter": 19011, "iter_tflops": 31.72966472260624, "iter_time": 0.524036262512207, "loss": 0.03677232563495636, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 34.92958062744486, "step_time": 0.47602904510498045} +{"epoch": 0, "iter": 19012, "iter_tflops": 33.7155077715822, "iter_time": 0.49317053222656254, "loss": 0.028881024569272995, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 36.76531780797842, "step_time": 0.4522603340148926} +{"epoch": 0, "iter": 19013, "iter_tflops": 31.156861125860797, "iter_time": 0.6621685485839843, "loss": 0.7168226838111877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.240866989687355, "step_time": 0.60252836227417} +{"epoch": 0, "iter": 19014, "iter_tflops": 35.23495945848082, "iter_time": 0.5855290832519531, "loss": 0.6355568766593933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19884548167931, "step_time": 0.5400973052978516} +{"epoch": 0, "iter": 19015, "iter_tflops": 35.675684845372864, "iter_time": 0.578295654296875, "loss": 0.6617481708526611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.74071967976851, "step_time": 0.5325428562164307} +{"epoch": 0, "iter": 19016, "iter_tflops": 31.782264031173604, "iter_time": 0.6491385726928711, "loss": 0.5850565433502197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.307968243921366, "step_time": 0.6013499069213868} +{"epoch": 0, "iter": 19017, "iter_tflops": 15.92580499549519, "iter_time": 1.2954505920410158, "loss": 0.5040063261985779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.023172217466396, "step_time": 1.2119417724609374} +{"epoch": 0, "iter": 19018, "iter_tflops": 19.977188398503106, "iter_time": 1.0327325897216797, "loss": 0.6162616610527039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.017223429172656, "step_time": 0.8590124320983887} +{"epoch": 0, "iter": 19019, "iter_tflops": 44.77621458433024, "iter_time": 0.46076010894775393, "loss": 0.5706639289855957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.934055554022464, "step_time": 0.4304057579040528} +{"epoch": 0, "iter": 19020, "iter_tflops": 49.20825154956808, "iter_time": 0.4192608528137207, "loss": 0.5632994174957275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21359760156137, "step_time": 0.38770341491699223} +{"epoch": 0, "iter": 19021, "iter_tflops": 24.503270187369537, "iter_time": 0.6635146102905274, "loss": 0.006198428571224213, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 25.938032684871608, "step_time": 0.6268122940063476} +{"epoch": 0, "iter": 19022, "iter_tflops": 9.023995029398163, "iter_time": 1.801671844482422, "loss": 0.00521259568631649, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 11.409740762390452, "step_time": 1.424947166442871} +{"epoch": 0, "iter": 19023, "iter_tflops": 31.85917541495644, "iter_time": 0.5103169670104981, "loss": 0.006535402499139309, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 35.39909258544864, "step_time": 0.45928515625} +{"epoch": 0, "iter": 19024, "iter_tflops": 38.87489501119747, "iter_time": 0.41822049331665045, "loss": 0.002411382971331477, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 42.968814160876526, "step_time": 0.3783738994598389} +{"epoch": 0, "iter": 19025, "iter_tflops": 21.001227486691587, "iter_time": 0.982375602722168, "loss": 0.6026800274848938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.274611711693947, "step_time": 0.9262156295776367} +{"epoch": 0, "iter": 19026, "iter_tflops": 9.780466330736992, "iter_time": 2.1094181823730467, "loss": 0.5864385962486267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.117314989531035, "step_time": 1.8557622528076174} +{"epoch": 0, "iter": 19027, "iter_tflops": 10.424428073884734, "iter_time": 1.9791103515625, "loss": 0.4944477081298828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.708866833256765, "step_time": 1.6233621597290038} +{"epoch": 0, "iter": 19028, "iter_tflops": 37.82162792890576, "iter_time": 0.5454840164184571, "loss": 0.6169657707214355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34355833607517, "step_time": 0.4990159130096435} +{"epoch": 0, "iter": 19029, "iter_tflops": 12.423556628154328, "iter_time": 1.3582142791748046, "loss": 0.30628859996795654, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 13.206031751862785, "step_time": 1.2777382583618162} +{"epoch": 0, "iter": 19030, "iter_tflops": 11.160805408949104, "iter_time": 1.511884796142578, "loss": 0.40731164813041687, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 13.333279864438003, "step_time": 1.2655439758300784} +{"epoch": 0, "iter": 19031, "iter_tflops": 27.69064915917574, "iter_time": 0.6093700408935547, "loss": 0.3007217049598694, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 29.738301982379518, "step_time": 0.5674114151000975} +{"epoch": 0, "iter": 19032, "iter_tflops": 26.6850796725963, "iter_time": 0.6323328323364258, "loss": 0.45729923248291016, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 28.60622670261604, "step_time": 0.5898664016723633} +{"epoch": 0, "iter": 19033, "iter_tflops": 21.381036564895233, "iter_time": 0.9649248504638672, "loss": 0.11202823370695114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.28288380512842, "step_time": 0.8861055908203126} +{"epoch": 0, "iter": 19034, "iter_tflops": 19.49778448919686, "iter_time": 1.058125015258789, "loss": 0.12458238005638123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.726912321100304, "step_time": 0.8695228958129884} +{"epoch": 0, "iter": 19035, "iter_tflops": 52.86771745571202, "iter_time": 0.3902399139404297, "loss": 0.10552318394184113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.71968476147151, "step_time": 0.35743600463867187} +{"epoch": 0, "iter": 19036, "iter_tflops": 50.827525880158966, "iter_time": 0.40590394973754884, "loss": 0.14063596725463867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.988592845822225, "step_time": 0.375188606262207} +{"epoch": 0, "iter": 19037, "iter_tflops": 45.06151437468504, "iter_time": 0.41100646591186524, "loss": 0.006729355081915855, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 49.728596107914704, "step_time": 0.3724330711364746} +{"epoch": 0, "iter": 19038, "iter_tflops": 37.01833615749623, "iter_time": 0.5003081092834473, "loss": 0.002611181465908885, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 41.00378038425308, "step_time": 0.4516796646118164} +{"epoch": 0, "iter": 19039, "iter_tflops": 37.83196910495091, "iter_time": 0.48954823684692383, "loss": 0.009308109991252422, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 41.747385559585936, "step_time": 0.44363433837890626} +{"epoch": 0, "iter": 19040, "iter_tflops": 43.39085096630866, "iter_time": 0.4268313102722168, "loss": 0.002556930761784315, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 47.91247859097064, "step_time": 0.3865501079559326} +{"epoch": 0, "iter": 19041, "iter_tflops": 18.99174721848193, "iter_time": 0.8215587158203125, "loss": 0.06635785102844238, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 20.236052838750123, "step_time": 0.7710414466857911} +{"epoch": 0, "iter": 19042, "iter_tflops": 25.628273666050163, "iter_time": 0.6088133621215821, "loss": 0.05236997455358505, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 32.10381324812428, "step_time": 0.48601190567016606} +{"epoch": 0, "iter": 19043, "iter_tflops": 28.57727699420049, "iter_time": 0.5459874801635742, "loss": 0.07702937722206116, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 31.419083101918634, "step_time": 0.4966037807464599} +{"epoch": 0, "iter": 19044, "iter_tflops": 37.734467998085435, "iter_time": 0.41349027252197273, "loss": 0.096906878054142, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 41.45912145899118, "step_time": 0.37634264564514164} +{"epoch": 0, "iter": 19045, "iter_tflops": 23.947340272679867, "iter_time": 0.8615192031860351, "loss": 0.588697075843811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.39179416322352, "step_time": 0.812510269165039} +{"epoch": 0, "iter": 19046, "iter_tflops": 19.101668801314855, "iter_time": 1.0800675964355468, "loss": 0.6818466186523438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.071827810060228, "step_time": 0.9347251930236816} +{"epoch": 0, "iter": 19047, "iter_tflops": 39.721403150280544, "iter_time": 0.5193948822021484, "loss": 0.6273200511932373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.828188058079526, "step_time": 0.4817176361083984} +{"epoch": 0, "iter": 19048, "iter_tflops": 44.277025394152645, "iter_time": 0.4659548225402832, "loss": 0.5795854330062866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.423577731599806, "step_time": 0.4350387401580811} +{"epoch": 0, "iter": 19049, "iter_tflops": 25.105455185897732, "iter_time": 0.8217773132324218, "loss": 0.34223824739456177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.507212612379675, "step_time": 0.7783199920654298} +{"epoch": 0, "iter": 19050, "iter_tflops": 15.63708619629669, "iter_time": 1.319369430541992, "loss": 0.38859042525291443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.493278733549257, "step_time": 1.0583695945739746} +{"epoch": 0, "iter": 19051, "iter_tflops": 33.66777292212615, "iter_time": 0.6127846221923828, "loss": 0.328508585691452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.80746632882366, "step_time": 0.5605138187408447} +{"epoch": 0, "iter": 19052, "iter_tflops": 38.540479153490885, "iter_time": 0.5353097305297851, "loss": 0.42788660526275635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.914933961139965, "step_time": 0.49221343231201176} +{"epoch": 0, "iter": 19053, "iter_tflops": 20.631835375989706, "iter_time": 0.9999640426635742, "loss": 0.074200339615345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.23818240568092, "step_time": 0.9277329025268555} +{"epoch": 0, "iter": 19054, "iter_tflops": 23.25591607578273, "iter_time": 0.8871331253051757, "loss": 0.06627004593610764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.67596961585608, "step_time": 0.7194558296203613} +{"epoch": 0, "iter": 19055, "iter_tflops": 41.43231549441423, "iter_time": 0.4979469108581543, "loss": 0.07710718363523483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.573942051904645, "step_time": 0.4526949520111084} +{"epoch": 0, "iter": 19056, "iter_tflops": 45.623465493780415, "iter_time": 0.4522035598754883, "loss": 0.06546813994646072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89635215110097, "step_time": 0.41347899436950686} +{"epoch": 0, "iter": 19057, "iter_tflops": 17.627692547391565, "iter_time": 1.170379699707031, "loss": 0.5260982513427734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.94406704720053, "step_time": 1.0890530242919922} +{"epoch": 0, "iter": 19058, "iter_tflops": 13.957463577656554, "iter_time": 1.4781405944824217, "loss": 0.5342786312103271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.592214273574946, "step_time": 1.2434201469421386} +{"epoch": 0, "iter": 19059, "iter_tflops": 41.1592180067708, "iter_time": 0.5012508621215821, "loss": 0.44010627269744873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1257269313271, "step_time": 0.46755249023437495} +{"epoch": 0, "iter": 19060, "iter_tflops": 45.14359074023057, "iter_time": 0.45701046752929686, "loss": 0.4948686361312866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.35681487992066, "step_time": 0.426642936706543} +{"epoch": 0, "iter": 19061, "iter_tflops": 35.54214431970821, "iter_time": 0.5804684524536133, "loss": 0.09200557321310043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.12188725089422, "step_time": 0.5411876220703126} +{"epoch": 0, "iter": 19062, "iter_tflops": 20.899735927250525, "iter_time": 0.9871461334228515, "loss": 0.07931997627019882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.567988458882148, "step_time": 0.7483713779449462} +{"epoch": 0, "iter": 19063, "iter_tflops": 42.661694481778, "iter_time": 0.48359761047363287, "loss": 0.09837580472230911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.895104366640396, "step_time": 0.43994130706787116} +{"epoch": 0, "iter": 19064, "iter_tflops": 46.09518360295523, "iter_time": 0.4475759048461913, "loss": 0.14721573889255524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.305053710894, "step_time": 0.41011969947814936} +{"epoch": 0, "iter": 19065, "iter_tflops": 21.39166671943017, "iter_time": 0.9644453506469728, "loss": 0.4678317904472351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.24832217348186, "step_time": 0.8874229011535643} +{"epoch": 0, "iter": 19066, "iter_tflops": 22.24879469506645, "iter_time": 0.9272903900146484, "loss": 0.540352463722229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.897418411093053, "step_time": 0.8286438846588134} +{"epoch": 0, "iter": 19067, "iter_tflops": 39.01110078482722, "iter_time": 0.5288518676757812, "loss": 0.40870702266693115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80535531734639, "step_time": 0.481974588394165} +{"epoch": 0, "iter": 19068, "iter_tflops": 37.18546948859759, "iter_time": 0.5548160018920898, "loss": 0.3976147770881653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87470311745031, "step_time": 0.5047398986816406} +{"epoch": 0, "iter": 19069, "iter_tflops": 19.281125518719154, "iter_time": 1.0700149993896484, "loss": 0.14370736479759216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.844325383237383, "step_time": 0.9897702674865722} +{"epoch": 0, "iter": 19070, "iter_tflops": 15.072957296646248, "iter_time": 1.3687488861083985, "loss": 0.12318708002567291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.91706299528693, "step_time": 1.035850191116333} +{"epoch": 0, "iter": 19071, "iter_tflops": 50.843419393492134, "iter_time": 0.4057770652770996, "loss": 0.10717812925577164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.153246582304945, "step_time": 0.3740685234069824} +{"epoch": 0, "iter": 19072, "iter_tflops": 54.651713359239544, "iter_time": 0.37750131225585937, "loss": 0.162846177816391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.248901527582596, "step_time": 0.3482105655670166} +{"epoch": 0, "iter": 19073, "iter_tflops": 42.295197658409926, "iter_time": 0.4877880859375, "loss": 0.11659464240074158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13259828311669, "step_time": 0.44721290969848626} +{"epoch": 0, "iter": 19074, "iter_tflops": 34.030119828846544, "iter_time": 0.6062597961425781, "loss": 0.09588228911161423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.18325780561362, "step_time": 0.5403177909851073} +{"epoch": 0, "iter": 19075, "iter_tflops": 41.16372189758577, "iter_time": 0.5011960182189942, "loss": 0.10063034296035767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57941770741239, "step_time": 0.452640567779541} +{"epoch": 0, "iter": 19076, "iter_tflops": 43.35047142532908, "iter_time": 0.4759139366149902, "loss": 0.11854717135429382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.518341039497166, "step_time": 0.43417116546630863} +{"epoch": 0, "iter": 19077, "iter_tflops": 20.212978083993697, "iter_time": 1.0206854934692384, "loss": 0.615388035774231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.688076666265324, "step_time": 0.9512643203735351} +{"epoch": 0, "iter": 19078, "iter_tflops": 16.48549569739544, "iter_time": 1.2514694061279297, "loss": 0.7908794283866882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.72705598648596, "step_time": 0.90777677154541} +{"epoch": 0, "iter": 19079, "iter_tflops": 49.64580446312531, "iter_time": 0.4155657005310059, "loss": 0.5550581812858582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82948027534742, "step_time": 0.38326755905151366} +{"epoch": 0, "iter": 19080, "iter_tflops": 47.324962339120134, "iter_time": 0.4359452705383301, "loss": 0.4476715326309204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24668099613239, "step_time": 0.4025839939117432} +{"epoch": 0, "iter": 19081, "iter_tflops": 19.353584195565276, "iter_time": 1.0660089263916017, "loss": 0.5395030379295349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.364595298895143, "step_time": 1.0130863494873048} +{"epoch": 0, "iter": 19082, "iter_tflops": 15.032147833574335, "iter_time": 1.3724647827148437, "loss": 0.671862006187439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.51522601985881, "step_time": 1.0571793270111083} +{"epoch": 0, "iter": 19083, "iter_tflops": 36.24482015061567, "iter_time": 0.5692149505615234, "loss": 0.7171136140823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.527096980181966, "step_time": 0.5219481086730957} +{"epoch": 0, "iter": 19084, "iter_tflops": 39.92968311943677, "iter_time": 0.5166856307983398, "loss": 0.447089821100235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37237281132992, "step_time": 0.47567361831665034} +{"epoch": 0, "iter": 19085, "iter_tflops": 17.732359050028844, "iter_time": 1.1634714508056643, "loss": 0.5141764879226685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.017325613457334, "step_time": 1.0848577728271485} +{"epoch": 0, "iter": 19086, "iter_tflops": 17.35620224884925, "iter_time": 1.1886870880126954, "loss": 0.4448471963405609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.653254420030557, "step_time": 0.8368507118225098} +{"epoch": 0, "iter": 19087, "iter_tflops": 42.706585640438924, "iter_time": 0.48308927536010743, "loss": 0.36784976720809937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62172778994951, "step_time": 0.44252099800109856} +{"epoch": 0, "iter": 19088, "iter_tflops": 47.65152456038357, "iter_time": 0.43295767974853516, "loss": 0.48759448528289795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59669227868701, "step_time": 0.39985302543640133} +{"epoch": 0, "iter": 19089, "iter_tflops": 30.880328204841906, "iter_time": 0.6680982589721679, "loss": 0.0746811032295227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.80844047771449, "step_time": 0.6288349342346191} +{"epoch": 0, "iter": 19090, "iter_tflops": 16.006466804675643, "iter_time": 1.2889223937988281, "loss": 0.25920370221138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.03417195717009, "step_time": 1.1440000438690185} +{"epoch": 0, "iter": 19091, "iter_tflops": 41.049876864223855, "iter_time": 0.5025860023498535, "loss": 0.15792012214660645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20745245358216, "step_time": 0.4563648777008057} +{"epoch": 0, "iter": 19092, "iter_tflops": 42.76215055664262, "iter_time": 0.48246155166625976, "loss": 0.20560522377490997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89087016824955, "step_time": 0.4399810333251953} +{"epoch": 0, "iter": 19093, "iter_tflops": 24.232856997403463, "iter_time": 0.8513685989379882, "loss": 0.040713120251894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.931307859302787, "step_time": 0.7956055908203126} +{"epoch": 0, "iter": 19094, "iter_tflops": 11.624913502055362, "iter_time": 1.774730926513672, "loss": 0.05056045576930046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.111563072026113, "step_time": 1.5735037384033201} +{"epoch": 0, "iter": 19095, "iter_tflops": 10.801376556900083, "iter_time": 1.9100429840087887, "loss": 0.04311719536781311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.195645416170336, "step_time": 1.5634774093627928} +{"epoch": 0, "iter": 19096, "iter_tflops": 40.098246334956514, "iter_time": 0.5145136108398438, "loss": 0.06651861220598221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27293943395875, "step_time": 0.46599782562255865} +{"epoch": 0, "iter": 19097, "iter_tflops": 14.652356525249385, "iter_time": 1.1852752685546875, "loss": 0.2535373866558075, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 15.559325156733918, "step_time": 1.1161843872070312} +{"epoch": 0, "iter": 19098, "iter_tflops": 12.14288582587212, "iter_time": 1.4302263946533205, "loss": 0.43104755878448486, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 14.295866129102775, "step_time": 1.2148320121765137} +{"epoch": 0, "iter": 19099, "iter_tflops": 26.80248032766679, "iter_time": 0.6479652481079101, "loss": 0.42787617444992065, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 28.761451514807717, "step_time": 0.6038316879272461} +{"epoch": 0, "iter": 19100, "iter_tflops": 27.158601989307243, "iter_time": 0.6394686965942383, "loss": 0.4570867419242859, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 29.157013290800123, "step_time": 0.5956397399902343} +{"epoch": 0, "iter": 19101, "iter_tflops": 18.704622689387307, "iter_time": 1.1029943695068358, "loss": 0.6559515595436096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.23212039333719, "step_time": 1.0197197875976562} +{"epoch": 0, "iter": 19102, "iter_tflops": 15.56144241595168, "iter_time": 1.3257828521728516, "loss": 0.7719813585281372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.843584227283262, "step_time": 1.0396858386993408} +{"epoch": 0, "iter": 19103, "iter_tflops": 42.34345342603608, "iter_time": 0.48723218917846683, "loss": 0.4653349220752716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.65600020992416, "step_time": 0.45188131713867186} +{"epoch": 0, "iter": 19104, "iter_tflops": 42.55245470039059, "iter_time": 0.4848390922546387, "loss": 0.5272536277770996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.654030416438424, "step_time": 0.4519008140563965} +{"epoch": 0, "iter": 19105, "iter_tflops": 45.188643008479204, "iter_time": 0.4565548362731934, "loss": 0.007046693004667759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51391415776391, "step_time": 0.416672643661499} +{"epoch": 0, "iter": 19106, "iter_tflops": 15.279467311738744, "iter_time": 1.350249526977539, "loss": 0.004462752491235733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.77860894446794, "step_time": 1.2296069107055665} +{"epoch": 0, "iter": 19107, "iter_tflops": 12.55046992514676, "iter_time": 1.6438502807617186, "loss": 0.024050235748291016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.919956694065334, "step_time": 1.3827850799560548} +{"epoch": 0, "iter": 19108, "iter_tflops": 15.397452156828676, "iter_time": 1.3399030761718747, "loss": 0.00499295350164175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.940713059011273, "step_time": 1.1499595050811768} +{"epoch": 0, "iter": 19109, "iter_tflops": 20.050329608714584, "iter_time": 0.7822664260864257, "loss": 0.3334919512271881, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 21.916848839895714, "step_time": 0.7156457481384277} +{"epoch": 0, "iter": 19110, "iter_tflops": 23.249595549758155, "iter_time": 0.6746224746704101, "loss": 0.20001403987407684, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.062369678076493, "step_time": 0.6258266830444336} +{"epoch": 0, "iter": 19111, "iter_tflops": 22.741588525193634, "iter_time": 0.6896923522949219, "loss": 0.27977022528648376, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.609335369384496, "step_time": 0.6373475532531738} +{"epoch": 0, "iter": 19112, "iter_tflops": 23.81971276744156, "iter_time": 0.658475601196289, "loss": 0.2996283769607544, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.612992138994887, "step_time": 0.6123727989196777} +{"epoch": 0, "iter": 19113, "iter_tflops": 26.11525376396452, "iter_time": 0.7900016479492187, "loss": 0.22288919985294342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.104027172729612, "step_time": 0.7340974082946777} +{"epoch": 0, "iter": 19114, "iter_tflops": 8.239441274596977, "iter_time": 2.5039432678222657, "loss": 0.20967817306518555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.971566126629897, "step_time": 1.8804146347045898} +{"epoch": 0, "iter": 19115, "iter_tflops": 14.118228297659588, "iter_time": 1.4613089599609375, "loss": 0.1466619074344635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.228619778506925, "step_time": 1.1317967987060547} +{"epoch": 0, "iter": 19116, "iter_tflops": 47.06810114465977, "iter_time": 0.43832432174682623, "loss": 0.1098126694560051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.003001282805734, "step_time": 0.40450744056701654} +{"epoch": 0, "iter": 19117, "iter_tflops": 20.014104304938584, "iter_time": 0.7857278366088867, "loss": 0.2578011751174927, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.233113424018807, "step_time": 0.7406186065673828} +{"epoch": 0, "iter": 19118, "iter_tflops": 7.323775024347292, "iter_time": 2.1472039794921876, "loss": 0.3606360852718353, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 9.2537080975445, "step_time": 1.6993878250122072} +{"epoch": 0, "iter": 19119, "iter_tflops": 7.4708293455519685, "iter_time": 2.1049388427734375, "loss": 0.4433024823665619, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 8.897852209363148, "step_time": 1.767352222442627} +{"epoch": 0, "iter": 19120, "iter_tflops": 25.64548711858871, "iter_time": 0.6131932220458985, "loss": 0.47470349073410034, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 27.30100852398348, "step_time": 0.576009449005127} +{"epoch": 0, "iter": 19121, "iter_tflops": 19.199636987469642, "iter_time": 0.780715560913086, "loss": 0.3182946741580963, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 20.3631192902172, "step_time": 0.7361080169677734} +{"epoch": 0, "iter": 19122, "iter_tflops": 10.730728920927293, "iter_time": 1.3968720550537108, "loss": 0.3030036389827728, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 13.61492906150347, "step_time": 1.1009572868347166} +{"epoch": 0, "iter": 19123, "iter_tflops": 26.906461783277887, "iter_time": 0.5570950012207031, "loss": 0.29574084281921387, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.70418303096379, "step_time": 0.5222045631408692} +{"epoch": 0, "iter": 19124, "iter_tflops": 27.384757749672705, "iter_time": 0.5473649063110352, "loss": 0.3380899727344513, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.08779819294091, "step_time": 0.5153176345825196} +{"epoch": 0, "iter": 19125, "iter_tflops": 27.276267527217325, "iter_time": 0.7563752441406251, "loss": 0.4958603084087372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.996973431512306, "step_time": 0.7114912719726563} +{"epoch": 0, "iter": 19126, "iter_tflops": 11.971334941914787, "iter_time": 1.7233745117187502, "loss": 0.45084714889526367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.8963594748711, "step_time": 1.3849755401611328} +{"epoch": 0, "iter": 19127, "iter_tflops": 37.38307210188871, "iter_time": 0.5518833084106446, "loss": 0.6132764220237732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69664486782283, "step_time": 0.5069482650756836} +{"epoch": 0, "iter": 19128, "iter_tflops": 38.570336977439986, "iter_time": 0.5348953399658203, "loss": 0.5002646446228027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.551905296005, "step_time": 0.4965137786865234} +{"epoch": 0, "iter": 19129, "iter_tflops": 17.889688469765463, "iter_time": 1.1532393951416016, "loss": 0.637502133846283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.26657101509645, "step_time": 1.0708233184814453} +{"epoch": 0, "iter": 19130, "iter_tflops": 27.162406628419493, "iter_time": 0.7595458602905274, "loss": 0.6688697338104248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.873012364589805, "step_time": 0.647290355682373} +{"epoch": 0, "iter": 19131, "iter_tflops": 41.73199448718745, "iter_time": 0.4943711357116699, "loss": 0.7189986109733582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.421666266953636, "step_time": 0.4542126083374023} +{"epoch": 0, "iter": 19132, "iter_tflops": 32.52064381758403, "iter_time": 0.6343999099731445, "loss": 0.7581601142883301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.558810300584206, "step_time": 0.5801963939666748} +{"epoch": 0, "iter": 19133, "iter_tflops": 12.924094000880363, "iter_time": 1.1345285339355469, "loss": 0.0180285032838583, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.728652438328103, "step_time": 1.0680402526855468} +{"epoch": 0, "iter": 19134, "iter_tflops": 17.364020417080326, "iter_time": 0.8444330902099609, "loss": 0.014549720101058483, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.626673862178485, "step_time": 0.7470829505920411} +{"epoch": 0, "iter": 19135, "iter_tflops": 31.349736854801304, "iter_time": 0.46771535873413084, "loss": 0.04633870720863342, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 34.57176491020696, "step_time": 0.42412510490417477} +{"epoch": 0, "iter": 19136, "iter_tflops": 33.815733821773435, "iter_time": 0.4336074295043945, "loss": 0.02458977699279785, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 37.347879476798745, "step_time": 0.3925993556976318} +{"epoch": 0, "iter": 19137, "iter_tflops": 17.765745226265654, "iter_time": 1.1612850036621092, "loss": 0.49828365445137024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.684645459112982, "step_time": 1.1041736679077148} +{"epoch": 0, "iter": 19138, "iter_tflops": 14.517723014178719, "iter_time": 1.4210970611572264, "loss": 0.6059659123420715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.571716412148323, "step_time": 1.1108878173828125} +{"epoch": 0, "iter": 19139, "iter_tflops": 37.321165147003164, "iter_time": 0.5527987518310546, "loss": 0.6388097405433655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83776104093204, "step_time": 0.5051964893341064} +{"epoch": 0, "iter": 19140, "iter_tflops": 43.721293282445245, "iter_time": 0.47187747573852534, "loss": 0.625019907951355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.67461013041698, "step_time": 0.4327480278015136} +{"epoch": 0, "iter": 19141, "iter_tflops": 16.326229939652617, "iter_time": 1.263677749633789, "loss": 0.04240317642688751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.657890780467483, "step_time": 1.1683781356811525} +{"epoch": 0, "iter": 19142, "iter_tflops": 18.851496794636066, "iter_time": 1.0944008178710936, "loss": 0.0172038022428751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.208479154158912, "step_time": 0.787191556930542} +{"epoch": 0, "iter": 19143, "iter_tflops": 51.86246552468657, "iter_time": 0.39780394744873043, "loss": 0.0450376532971859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.80456398226612, "step_time": 0.3631942939758301} +{"epoch": 0, "iter": 19144, "iter_tflops": 54.16132694116821, "iter_time": 0.3809192771911621, "loss": 0.039293382316827774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.06366699079546, "step_time": 0.3493026180267334} +{"epoch": 0, "iter": 19145, "iter_tflops": 22.541065102032846, "iter_time": 0.9152670211791993, "loss": 0.22164086997509003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.67158873085603, "step_time": 0.8715550842285157} +{"epoch": 0, "iter": 19146, "iter_tflops": 16.49448340437965, "iter_time": 1.2507874908447267, "loss": 0.2797742486000061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.123751893284275, "step_time": 0.9325314083099365} +{"epoch": 0, "iter": 19147, "iter_tflops": 36.6671123488102, "iter_time": 0.5626593475341797, "loss": 0.243289053440094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30582904102518, "step_time": 0.5118637676239014} +{"epoch": 0, "iter": 19148, "iter_tflops": 39.85193412033035, "iter_time": 0.5176936569213868, "loss": 0.2458680272102356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.751941784979174, "step_time": 0.4715469226837158} +{"epoch": 0, "iter": 19149, "iter_tflops": 19.486802570406287, "iter_time": 1.0587213287353516, "loss": 0.5148845911026001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.862618887182972, "step_time": 0.9889023818969725} +{"epoch": 0, "iter": 19150, "iter_tflops": 20.453886779007497, "iter_time": 1.0086637191772463, "loss": 0.39054253697395325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.3742307075426, "step_time": 0.8130726699829101} +{"epoch": 0, "iter": 19151, "iter_tflops": 47.3935815796585, "iter_time": 0.43531408309936526, "loss": 0.49911442399024963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44624674850896, "step_time": 0.4010223255157471} +{"epoch": 0, "iter": 19152, "iter_tflops": 51.239343089565395, "iter_time": 0.40264164733886715, "loss": 0.4511755406856537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.78883338993492, "step_time": 0.36980686378479} +{"epoch": 0, "iter": 19153, "iter_tflops": 30.320095134049293, "iter_time": 0.680442901611328, "loss": 0.5339472889900208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.23892371902322, "step_time": 0.639943618774414} +{"epoch": 0, "iter": 19154, "iter_tflops": 18.450519474196483, "iter_time": 1.1181849670410156, "loss": 0.8260475993156433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.940923825924447, "step_time": 0.9403019523620606} +{"epoch": 0, "iter": 19155, "iter_tflops": 35.02356664798372, "iter_time": 0.5890631790161133, "loss": 0.44471022486686707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.8031688095187, "step_time": 0.545750373840332} +{"epoch": 0, "iter": 19156, "iter_tflops": 32.363799080732896, "iter_time": 0.6374744033813476, "loss": 0.4542594850063324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86879549483425, "step_time": 0.5916778373718262} +{"epoch": 0, "iter": 19157, "iter_tflops": 18.247975464510223, "iter_time": 1.1305962982177733, "loss": 0.28045761585235596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.34840588659876, "step_time": 1.0662942276000977} +{"epoch": 0, "iter": 19158, "iter_tflops": 19.698486669245742, "iter_time": 1.0473440856933594, "loss": 0.2684285640716553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.504887632323555, "step_time": 0.9167383480072021} +{"epoch": 0, "iter": 19159, "iter_tflops": 51.30782719125466, "iter_time": 0.4021042137145996, "loss": 0.2890433371067047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.87894942212635, "step_time": 0.36921047592163087} +{"epoch": 0, "iter": 19160, "iter_tflops": 50.970623108170095, "iter_time": 0.4047643966674805, "loss": 0.38402634859085083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.28280963464757, "step_time": 0.37319184112548826} +{"epoch": 0, "iter": 19161, "iter_tflops": 31.58340790810062, "iter_time": 0.6532256927490233, "loss": 0.35342419147491455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.70694266879744, "step_time": 0.612072525024414} +{"epoch": 0, "iter": 19162, "iter_tflops": 17.37387100116566, "iter_time": 1.1874782257080079, "loss": 0.4035753309726715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.608997978467173, "step_time": 1.0521238021850585} +{"epoch": 0, "iter": 19163, "iter_tflops": 37.39821514162374, "iter_time": 0.5516598434448243, "loss": 0.3455032408237457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53492283306393, "step_time": 0.5089708347320556} +{"epoch": 0, "iter": 19164, "iter_tflops": 35.216560654564454, "iter_time": 0.5858349914550782, "loss": 0.4919945001602173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.65394131654853, "step_time": 0.5337384185791015} +{"epoch": 0, "iter": 19165, "iter_tflops": 21.0025853920647, "iter_time": 0.9823120880126954, "loss": 0.011227237060666084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.580022767089506, "step_time": 0.9136878967285156} +{"epoch": 0, "iter": 19166, "iter_tflops": 36.14688731087987, "iter_time": 0.5707571258544922, "loss": 0.0007249498157761991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.903396892984865, "step_time": 0.43986352539062495} +{"epoch": 0, "iter": 19167, "iter_tflops": 52.81449977157888, "iter_time": 0.39063313293457036, "loss": 0.0024797553196549416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.903202961681984, "step_time": 0.35630314826965337} +{"epoch": 0, "iter": 19168, "iter_tflops": 54.72221099568577, "iter_time": 0.3770149841308594, "loss": 0.0036112030502408743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.19134278279946, "step_time": 0.34275848579406737} +{"epoch": 0, "iter": 19169, "iter_tflops": 26.964591382444784, "iter_time": 0.7651179733276366, "loss": 0.028355132788419724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.395036290397766, "step_time": 0.7265739440917969} +{"epoch": 0, "iter": 19170, "iter_tflops": 17.772807319264317, "iter_time": 1.1608235626220702, "loss": 0.006856468506157398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.880976693633215, "step_time": 0.9428780899047852} +{"epoch": 0, "iter": 19171, "iter_tflops": 41.722467916303586, "iter_time": 0.494484016418457, "loss": 0.004484761971980333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.094076325301245, "step_time": 0.4475866565704346} +{"epoch": 0, "iter": 19172, "iter_tflops": 46.25234548935668, "iter_time": 0.44605507659912114, "loss": 0.01867438107728958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24114999997813, "step_time": 0.4026274490356445} +{"epoch": 0, "iter": 19173, "iter_tflops": 17.59277993409285, "iter_time": 1.1727023010253907, "loss": 0.18002048134803772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.534540352049618, "step_time": 1.1131160049438478} +{"epoch": 0, "iter": 19174, "iter_tflops": 25.68548863567029, "iter_time": 0.8032198181152344, "loss": 0.24219901859760284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.307252470264253, "step_time": 0.7039586372375488} +{"epoch": 0, "iter": 19175, "iter_tflops": 45.63995429017687, "iter_time": 0.45204018783569333, "loss": 0.23664870858192444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.348605627826096, "step_time": 0.4180684185028076} +{"epoch": 0, "iter": 19176, "iter_tflops": 53.26297508734292, "iter_time": 0.3873439941406249, "loss": 0.19699028134346008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.967465216210215, "step_time": 0.35590815353393546} +{"epoch": 0, "iter": 19177, "iter_tflops": 40.80994826822524, "iter_time": 0.5055407905578613, "loss": 0.08278503268957138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.36373983643087, "step_time": 0.4650440559387207} +{"epoch": 0, "iter": 19178, "iter_tflops": 46.61135613637003, "iter_time": 0.44261946487426757, "loss": 0.062304116785526276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.334935903609725, "step_time": 0.4018918724060059} +{"epoch": 0, "iter": 19179, "iter_tflops": 53.80893331406081, "iter_time": 0.38341390991210933, "loss": 0.06397734582424164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.974543331074074, "step_time": 0.3498304920196533} +{"epoch": 0, "iter": 19180, "iter_tflops": 55.207235597569834, "iter_time": 0.3737027091979981, "loss": 0.026901714503765106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.42618537892783, "step_time": 0.34142637634277345} +{"epoch": 0, "iter": 19181, "iter_tflops": 21.126369318449303, "iter_time": 0.9765565109252929, "loss": 0.5284628868103027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.025736773140974, "step_time": 0.9366811981201171} +{"epoch": 0, "iter": 19182, "iter_tflops": 16.19207386631085, "iter_time": 1.2741476898193358, "loss": 0.5809802412986755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.063556533318845, "step_time": 1.0822268905639647} +{"epoch": 0, "iter": 19183, "iter_tflops": 41.257376408687875, "iter_time": 0.5000583000183105, "loss": 0.6073577404022217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53208988878563, "step_time": 0.46328599357604977} +{"epoch": 0, "iter": 19184, "iter_tflops": 43.898684641283204, "iter_time": 0.4699706535339356, "loss": 0.5440949201583862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.422069735104806, "step_time": 0.43505257415771487} +{"epoch": 0, "iter": 19185, "iter_tflops": 8.959938999115058, "iter_time": 0.7670357208251953, "loss": 0.0007860968471504748, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 9.482500163924575, "step_time": 0.7247659530639649} +{"epoch": 0, "iter": 19186, "iter_tflops": 6.639494492606648, "iter_time": 1.03510791015625, "loss": 0.00812776293605566, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 8.189512145456755, "step_time": 0.839194465637207} +{"epoch": 0, "iter": 19187, "iter_tflops": 18.30817847936436, "iter_time": 0.3753837814331055, "loss": 0.0009790081530809402, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 20.107487907563097, "step_time": 0.3417927341461181} +{"epoch": 0, "iter": 19188, "iter_tflops": 19.424992814476628, "iter_time": 0.353801586151123, "loss": 0.00411303760483861, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 21.371597812230483, "step_time": 0.3215760154724121} +{"epoch": 0, "iter": 19189, "iter_tflops": 26.573820471865947, "iter_time": 0.6581849212646484, "loss": 0.0245477557182312, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 28.167910971941556, "step_time": 0.620936637878418} +{"epoch": 0, "iter": 19190, "iter_tflops": 10.748462306493389, "iter_time": 1.627254898071289, "loss": 0.01859113574028015, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 14.445750371960083, "step_time": 1.2107704677581785} +{"epoch": 0, "iter": 19191, "iter_tflops": 38.032762046883285, "iter_time": 0.45987950897216795, "loss": 0.03445329889655113, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 41.98952308585393, "step_time": 0.416544095993042} +{"epoch": 0, "iter": 19192, "iter_tflops": 39.801166406390934, "iter_time": 0.4394466171264648, "loss": 0.0397016815841198, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 43.934333443484874, "step_time": 0.39810523033142087} +{"epoch": 0, "iter": 19193, "iter_tflops": 16.55853075423059, "iter_time": 1.2459495239257812, "loss": 0.7097761631011963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.64919996323383, "step_time": 1.168953468322754} +{"epoch": 0, "iter": 19194, "iter_tflops": 22.186583373637927, "iter_time": 0.9298905181884766, "loss": 0.4887908697128296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.356790163279754, "step_time": 0.6796203880310059} +{"epoch": 0, "iter": 19195, "iter_tflops": 42.805487955099224, "iter_time": 0.4819730949401856, "loss": 0.757796049118042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1817134999568, "step_time": 0.44673728942871094} +{"epoch": 0, "iter": 19196, "iter_tflops": 42.91431650280074, "iter_time": 0.4807508354187012, "loss": 0.5008360743522644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08284901676718, "step_time": 0.44769570350646976} +{"epoch": 0, "iter": 19197, "iter_tflops": 26.104264203377, "iter_time": 0.7903342285156251, "loss": 0.3211299777030945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.45189610166152, "step_time": 0.7515361938476561} +{"epoch": 0, "iter": 19198, "iter_tflops": 11.277084238435531, "iter_time": 1.8294705505371094, "loss": 0.3783716857433319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.994675732363522, "step_time": 1.5876574325561523} +{"epoch": 0, "iter": 19199, "iter_tflops": 11.011600604706613, "iter_time": 1.8735780792236327, "loss": 0.36439552903175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.27221028621187, "step_time": 1.5544580039978027} +{"epoch": 0, "iter": 19200, "iter_tflops": 28.298473490146534, "iter_time": 0.7290532302856445, "loss": 0.5686118602752686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.56429191985549, "step_time": 0.6536212997436524} +{"epoch": 0, "iter": 19201, "iter_tflops": 23.209934262640623, "iter_time": 0.6793032073974611, "loss": 0.2751859426498413, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 24.611368276521358, "step_time": 0.6406219520568848} +{"epoch": 0, "iter": 19202, "iter_tflops": 11.252806584736458, "iter_time": 1.4011244812011718, "loss": 0.3263495862483978, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 13.361535847330558, "step_time": 1.1799977912902833} +{"epoch": 0, "iter": 19203, "iter_tflops": 27.089155974706753, "iter_time": 0.5820256195068358, "loss": 0.3264272212982178, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.82787155865191, "step_time": 0.546921501159668} +{"epoch": 0, "iter": 19204, "iter_tflops": 28.505511496523326, "iter_time": 0.5531064682006837, "loss": 0.3496793210506439, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.336577921442224, "step_time": 0.5197218627929687} +{"epoch": 0, "iter": 19205, "iter_tflops": 23.373521171699764, "iter_time": 0.6622910766601563, "loss": 0.03780027851462364, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.906354181644495, "step_time": 0.6215311317443848} +{"epoch": 0, "iter": 19206, "iter_tflops": 9.154037806542279, "iter_time": 1.691065170288086, "loss": 0.05649464204907417, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.421372507576102, "step_time": 1.4854160995483396} +{"epoch": 0, "iter": 19207, "iter_tflops": 11.018947341757759, "iter_time": 1.404859649658203, "loss": 0.06618747115135193, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.396682404025007, "step_time": 1.2487271995544436} +{"epoch": 0, "iter": 19208, "iter_tflops": 38.70122448763389, "iter_time": 0.3999892692565918, "loss": 0.0400841124355793, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 42.31310696359471, "step_time": 0.36584584808349613} +{"epoch": 0, "iter": 19209, "iter_tflops": 15.254791687597994, "iter_time": 0.9960016326904297, "loss": 0.17362025380134583, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.926007505533615, "step_time": 0.9540242538452148} +{"epoch": 0, "iter": 19210, "iter_tflops": 11.325327602889434, "iter_time": 1.3415768585205077, "loss": 0.5408535599708557, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.755273520816557, "step_time": 1.1045798110961915} +{"epoch": 0, "iter": 19211, "iter_tflops": 27.961488581861474, "iter_time": 0.5433829956054688, "loss": 0.3319703936576843, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.777147652003016, "step_time": 0.5102502632141114} +{"epoch": 0, "iter": 19212, "iter_tflops": 28.183116600025556, "iter_time": 0.5391099090576171, "loss": 0.3966245949268341, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.940998563765202, "step_time": 0.5074579391479492} +{"epoch": 0, "iter": 19213, "iter_tflops": 28.037686251088164, "iter_time": 0.7358343811035156, "loss": 0.623830258846283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.606503052034512, "step_time": 0.6968433074951171} +{"epoch": 0, "iter": 19214, "iter_tflops": 12.857510478263695, "iter_time": 1.6045947265624998, "loss": 0.7192220091819763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.590414741311497, "step_time": 1.414016933441162} +{"epoch": 0, "iter": 19215, "iter_tflops": 42.78004435260767, "iter_time": 0.482259750366211, "loss": 0.5103605389595032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25749359920792, "step_time": 0.44600543403625487} +{"epoch": 0, "iter": 19216, "iter_tflops": 42.06221600384746, "iter_time": 0.490489933013916, "loss": 0.6320473551750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.154506741621354, "step_time": 0.4568999862670898} +{"epoch": 0, "iter": 19217, "iter_tflops": 26.800151556286295, "iter_time": 0.7698125686645507, "loss": 0.5490671396255493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.31812669244647, "step_time": 0.7285472564697266} +{"epoch": 0, "iter": 19218, "iter_tflops": 12.435741198061086, "iter_time": 1.659015991210938, "loss": 0.4880051910877228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.534051129601306, "step_time": 1.3281206130981444} +{"epoch": 0, "iter": 19219, "iter_tflops": 36.17751743066824, "iter_time": 0.5702738876342773, "loss": 0.4575963616371155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.281623803569175, "step_time": 0.5252097930908203} +{"epoch": 0, "iter": 19220, "iter_tflops": 39.14407595997197, "iter_time": 0.527055320739746, "loss": 0.5862531661987305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.9667582261712, "step_time": 0.48016407012939455} +{"epoch": 0, "iter": 19221, "iter_tflops": 25.00240660449368, "iter_time": 0.825164306640625, "loss": 0.2017570436000824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.974271377551066, "step_time": 0.7648434028625488} +{"epoch": 0, "iter": 19222, "iter_tflops": 10.334796132891181, "iter_time": 1.9962748413085936, "loss": 0.23886118829250336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.73273129454031, "step_time": 1.7584220581054686} +{"epoch": 0, "iter": 19223, "iter_tflops": 10.618880448026355, "iter_time": 1.9428689880371093, "loss": 0.2932494878768921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.070244344713396, "step_time": 1.5784780273437502} +{"epoch": 0, "iter": 19224, "iter_tflops": 39.887435656139026, "iter_time": 0.5172328872680664, "loss": 0.19691850244998932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.88858077459525, "step_time": 0.40541695594787597} +{"epoch": 0, "iter": 19225, "iter_tflops": 20.760565231072313, "iter_time": 0.7436774215698242, "loss": 0.34122908115386963, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 22.008023878503256, "step_time": 0.7015243034362793} +{"epoch": 0, "iter": 19226, "iter_tflops": 11.64797358585623, "iter_time": 1.325480651855469, "loss": 0.23102816939353943, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.799783385948091, "step_time": 1.2062050704956055} +{"epoch": 0, "iter": 19227, "iter_tflops": 28.063455515334095, "iter_time": 0.5501519088745118, "loss": 0.4434117376804352, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.906215079902882, "step_time": 0.5162526779174804} +{"epoch": 0, "iter": 19228, "iter_tflops": 27.82471774098945, "iter_time": 0.5548722457885742, "loss": 0.3065083622932434, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.608295976451313, "step_time": 0.5214472198486328} +{"epoch": 0, "iter": 19229, "iter_tflops": 43.31460263265082, "iter_time": 0.47630804061889653, "loss": 0.15147121250629425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08709677192396, "step_time": 0.43814749526977537} +{"epoch": 0, "iter": 19230, "iter_tflops": 38.963030526918644, "iter_time": 0.5295043334960937, "loss": 0.21553492546081543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8750922636889, "step_time": 0.45974487113952633} +{"epoch": 0, "iter": 19231, "iter_tflops": 40.82251681607606, "iter_time": 0.5053851432800294, "loss": 0.26584890484809875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58314307505272, "step_time": 0.46275547409057616} +{"epoch": 0, "iter": 19232, "iter_tflops": 41.65257825052179, "iter_time": 0.495313720703125, "loss": 0.26174405217170715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.776502361293176, "step_time": 0.45069178390502934} +{"epoch": 0, "iter": 19233, "iter_tflops": 19.07175626632356, "iter_time": 1.0817615966796874, "loss": 0.1205798014998436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.351094699048833, "step_time": 1.013758415222168} +{"epoch": 0, "iter": 19234, "iter_tflops": 25.42602680435728, "iter_time": 0.8114163360595702, "loss": 0.0964723601937294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.0475359438981, "step_time": 0.6437653598785401} +{"epoch": 0, "iter": 19235, "iter_tflops": 47.355309318611894, "iter_time": 0.43566590118408205, "loss": 0.11515074223279953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.054609004359364, "step_time": 0.4040985507965088} +{"epoch": 0, "iter": 19236, "iter_tflops": 53.743102466890114, "iter_time": 0.3838835601806641, "loss": 0.13199546933174133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.46071566810235, "step_time": 0.35290525054931643} +{"epoch": 0, "iter": 19237, "iter_tflops": 31.226655732762023, "iter_time": 0.6606885375976562, "loss": 0.5635550618171692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.31709801525832, "step_time": 0.6192344093322754} +{"epoch": 0, "iter": 19238, "iter_tflops": 13.823217236581215, "iter_time": 1.4924957885742185, "loss": 0.6359579563140869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.324994848369098, "step_time": 1.1908282623291016} +{"epoch": 0, "iter": 19239, "iter_tflops": 35.50241620938889, "iter_time": 0.5811180114746093, "loss": 0.5601517558097839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.704753844057635, "step_time": 0.5330377140045166} +{"epoch": 0, "iter": 19240, "iter_tflops": 35.40095166628806, "iter_time": 0.5827835845947265, "loss": 0.6465191841125488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36231154377773, "step_time": 0.5377958908081054} +{"epoch": 0, "iter": 19241, "iter_tflops": 22.023986839889346, "iter_time": 0.9367556228637696, "loss": 0.5584033131599426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.7129825951581, "step_time": 0.8700336799621582} +{"epoch": 0, "iter": 19242, "iter_tflops": 16.81958919669527, "iter_time": 1.2266110229492186, "loss": 0.6140458583831787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.31982640914523, "step_time": 1.0153183937072754} +{"epoch": 0, "iter": 19243, "iter_tflops": 35.888503994964275, "iter_time": 0.5748663558959961, "loss": 0.564911425113678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.009362693901714, "step_time": 0.528875431060791} +{"epoch": 0, "iter": 19244, "iter_tflops": 36.07606930000463, "iter_time": 0.5718775329589845, "loss": 0.6589962840080261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.537520596643716, "step_time": 0.5218105030059814} +{"epoch": 0, "iter": 19245, "iter_tflops": 15.5576478373463, "iter_time": 1.3261062164306643, "loss": 0.2010362148284912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.494362370291306, "step_time": 1.2507966690063475} +{"epoch": 0, "iter": 19246, "iter_tflops": 18.930953180294676, "iter_time": 1.0898074340820312, "loss": 0.17436474561691284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.394842601215284, "step_time": 0.9643021869659424} +{"epoch": 0, "iter": 19247, "iter_tflops": 49.95764589483687, "iter_time": 0.4129716911315918, "loss": 0.20384033024311066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.01442825802721, "step_time": 0.3819552326202393} +{"epoch": 0, "iter": 19248, "iter_tflops": 53.90232768867347, "iter_time": 0.38274958419799804, "loss": 0.17261604964733124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.581202211490314, "step_time": 0.35217941474914555} +{"epoch": 0, "iter": 19249, "iter_tflops": 25.51534108609089, "iter_time": 0.8085760421752929, "loss": 0.04873846843838692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.75597488055846, "step_time": 0.7710836029052734} +{"epoch": 0, "iter": 19250, "iter_tflops": 17.19552623439207, "iter_time": 1.1997942504882813, "loss": 0.06864821910858154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.921636609168104, "step_time": 0.9861127929687501} +{"epoch": 0, "iter": 19251, "iter_tflops": 41.5133180167021, "iter_time": 0.49697529602050783, "loss": 0.0957280769944191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.891785392338484, "step_time": 0.44955961799621585} +{"epoch": 0, "iter": 19252, "iter_tflops": 44.288478655177954, "iter_time": 0.4658343238830567, "loss": 0.05765359848737717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.673544885441785, "step_time": 0.4238666725158691} +{"epoch": 0, "iter": 19253, "iter_tflops": 24.11742162471047, "iter_time": 0.8554435806274414, "loss": 0.3330245912075043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.02923472275949, "step_time": 0.7926123733520508} +{"epoch": 0, "iter": 19254, "iter_tflops": 10.295776526477066, "iter_time": 2.003840454101562, "loss": 0.4358052909374237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.2770486170816, "step_time": 1.6804603576660155} +{"epoch": 0, "iter": 19255, "iter_tflops": 10.34196147486531, "iter_time": 1.9948917388916017, "loss": 0.3616427779197693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.986337103153316, "step_time": 1.5886768798828124} +{"epoch": 0, "iter": 19256, "iter_tflops": 37.821954848153624, "iter_time": 0.5454793014526368, "loss": 0.40586140751838684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.316006073395876, "step_time": 0.499348690032959} +{"epoch": 0, "iter": 19257, "iter_tflops": 11.12335785139426, "iter_time": 1.384317367553711, "loss": 0.2816668152809143, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 11.807984578388746, "step_time": 1.3040546722412107} +{"epoch": 0, "iter": 19258, "iter_tflops": 11.517871505052844, "iter_time": 1.3369013061523436, "loss": 0.24577920138835907, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 16.907901854706385, "step_time": 0.9107136764526368} +{"epoch": 0, "iter": 19259, "iter_tflops": 26.200253425522174, "iter_time": 0.5877140655517579, "loss": 0.2938174307346344, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 27.863684991380037, "step_time": 0.5526281776428222} +{"epoch": 0, "iter": 19260, "iter_tflops": 27.72302564059675, "iter_time": 0.5554320678710938, "loss": 0.24964775145053864, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.43297238169692, "step_time": 0.5231635208129883} +{"epoch": 0, "iter": 19261, "iter_tflops": 26.118672114243434, "iter_time": 0.7898982543945312, "loss": 0.1334870606660843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.673861280134638, "step_time": 0.7455083084106445} +{"epoch": 0, "iter": 19262, "iter_tflops": 21.57755023207947, "iter_time": 0.9561369705200194, "loss": 0.1130378469824791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.677878468796013, "step_time": 0.6951673965454102} +{"epoch": 0, "iter": 19263, "iter_tflops": 48.953442849582046, "iter_time": 0.42144315719604497, "loss": 0.11219941824674606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9193688937459, "step_time": 0.38985902404785155} +{"epoch": 0, "iter": 19264, "iter_tflops": 51.39653503073852, "iter_time": 0.40141020202636724, "loss": 0.16035377979278564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.642557273458955, "step_time": 0.37077903175354004} +{"epoch": 0, "iter": 19265, "iter_tflops": 40.77823051264962, "iter_time": 0.5059340057373047, "loss": 0.07470647245645523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.22202557341138, "step_time": 0.4665343399047852} +{"epoch": 0, "iter": 19266, "iter_tflops": 46.669157415919464, "iter_time": 0.44207126617431647, "loss": 0.08918497711420059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.773461033140904, "step_time": 0.406336166381836} +{"epoch": 0, "iter": 19267, "iter_tflops": 48.68436099322107, "iter_time": 0.4237725028991699, "loss": 0.09617315232753754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.362619125617975, "step_time": 0.38662070655822756} +{"epoch": 0, "iter": 19268, "iter_tflops": 45.37047615972725, "iter_time": 0.4547250823974609, "loss": 0.07833851128816605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.850882072027225, "step_time": 0.4223279628753662} +{"epoch": 0, "iter": 19269, "iter_tflops": 25.931968830774046, "iter_time": 0.7955853118896483, "loss": 0.6154007911682129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.348614188399022, "step_time": 0.7543743667602539} +{"epoch": 0, "iter": 19270, "iter_tflops": 15.813367001830308, "iter_time": 1.3046616516113283, "loss": 0.6665517687797546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.066261166894833, "step_time": 1.082073371887207} +{"epoch": 0, "iter": 19271, "iter_tflops": 38.70992832157353, "iter_time": 0.5329664611816406, "loss": 0.4546829164028168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.008705778781284, "step_time": 0.4911147136688232} +{"epoch": 0, "iter": 19272, "iter_tflops": 43.73969367822606, "iter_time": 0.4716789665222168, "loss": 0.6465093493461609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.520664770194294, "step_time": 0.43414993476867675} +{"epoch": 0, "iter": 19273, "iter_tflops": 14.837051323153002, "iter_time": 1.3905117034912111, "loss": 0.48144078254699707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.626338662416494, "step_time": 1.3202768707275392} +{"epoch": 0, "iter": 19274, "iter_tflops": 16.69977454997981, "iter_time": 1.2354114990234375, "loss": 0.31307125091552734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.353173589426227, "step_time": 1.01365486907959} +{"epoch": 0, "iter": 19275, "iter_tflops": 36.3636065035344, "iter_time": 0.5673555374145507, "loss": 0.4628203213214874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4516992050642, "step_time": 0.5229456253051759} +{"epoch": 0, "iter": 19276, "iter_tflops": 40.338526415855995, "iter_time": 0.5114488639831543, "loss": 0.30399787425994873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78290780146236, "step_time": 0.471213415145874} +{"epoch": 0, "iter": 19277, "iter_tflops": 17.2408037349244, "iter_time": 1.1966433715820313, "loss": 0.30056264996528625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.16012070829955, "step_time": 1.1360658798217773} +{"epoch": 0, "iter": 19278, "iter_tflops": 8.330262926119993, "iter_time": 2.4766437377929686, "loss": 0.3869553208351135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.706094203125977, "step_time": 1.9270420303344726} +{"epoch": 0, "iter": 19279, "iter_tflops": 16.475344884229752, "iter_time": 1.252240463256836, "loss": 0.3650016486644745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.612915330887294, "step_time": 1.108428913116455} +{"epoch": 0, "iter": 19280, "iter_tflops": 40.118116121713115, "iter_time": 0.5142587814331054, "loss": 0.4072889983654022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.012171573800444, "step_time": 0.4687588176727295} +{"epoch": 0, "iter": 19281, "iter_tflops": 12.188653431765692, "iter_time": 1.2197333221435547, "loss": 0.47709745168685913, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 13.035571285333251, "step_time": 1.1404875488281248} +{"epoch": 0, "iter": 19282, "iter_tflops": 13.94423104314879, "iter_time": 1.0661689910888672, "loss": 0.27621254324913025, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.666173276799285, "step_time": 0.9489813804626465} +{"epoch": 0, "iter": 19283, "iter_tflops": 21.133027124239284, "iter_time": 0.7034915847778321, "loss": 0.3837997317314148, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.739626718723265, "step_time": 0.6537885131835938} +{"epoch": 0, "iter": 19284, "iter_tflops": 22.609377391740942, "iter_time": 0.6575548934936524, "loss": 0.3409130573272705, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.3334503666761, "step_time": 0.6109658317565918} +{"epoch": 0, "iter": 19285, "iter_tflops": 20.67903937245854, "iter_time": 0.9976814270019531, "loss": 0.24462412297725677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.30846022102522, "step_time": 0.924810287475586} +{"epoch": 0, "iter": 19286, "iter_tflops": 33.372745852816955, "iter_time": 0.6182018585205078, "loss": 0.20933321118354797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80547101536598, "step_time": 0.4819732856750488} +{"epoch": 0, "iter": 19287, "iter_tflops": 50.37765821911597, "iter_time": 0.4095286331176758, "loss": 0.23440417647361755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.96774998332354, "step_time": 0.37533087158203127} +{"epoch": 0, "iter": 19288, "iter_tflops": 53.19506042742133, "iter_time": 0.3878385200500489, "loss": 0.20987993478775024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.83430382564826, "step_time": 0.3567276191711426} +{"epoch": 0, "iter": 19289, "iter_tflops": 23.0462844040985, "iter_time": 0.8952025909423829, "loss": 0.4122309982776642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.173783935916656, "step_time": 0.853449073791504} +{"epoch": 0, "iter": 19290, "iter_tflops": 14.833458063994032, "iter_time": 1.3908485412597658, "loss": 0.6643713116645813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91252070948351, "step_time": 1.0908695793151855} +{"epoch": 0, "iter": 19291, "iter_tflops": 43.43115699998571, "iter_time": 0.4750297927856446, "loss": 0.6048135757446289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.870543940852414, "step_time": 0.440171838760376} +{"epoch": 0, "iter": 19292, "iter_tflops": 45.61988917466352, "iter_time": 0.4522390098571777, "loss": 0.4939896762371063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93134599576432, "step_time": 0.42163347625732417} +{"epoch": 0, "iter": 19293, "iter_tflops": 16.515085251507944, "iter_time": 1.2492271881103516, "loss": 0.0638667419552803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.083304724761977, "step_time": 1.2076757888793945} +{"epoch": 0, "iter": 19294, "iter_tflops": 16.02241660840274, "iter_time": 1.2876393127441406, "loss": 0.050040941685438156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.11727162103381, "step_time": 1.138752784729004} +{"epoch": 0, "iter": 19295, "iter_tflops": 42.34330125867621, "iter_time": 0.4872339401245117, "loss": 0.10914574563503265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.678579827334275, "step_time": 0.4419820308685302} +{"epoch": 0, "iter": 19296, "iter_tflops": 40.813766807075936, "iter_time": 0.5054934921264648, "loss": 0.033062856644392014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.60854056706344, "step_time": 0.46249200820922853} +{"epoch": 0, "iter": 19297, "iter_tflops": 1.422885711321423, "iter_time": 1.10146630859375, "loss": 0.4054883122444153, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.5289405610143019, "step_time": 1.0250631790161133} +{"epoch": 0, "iter": 19298, "iter_tflops": 2.121548078702307, "iter_time": 0.7387344589233398, "loss": 0.6233952045440674, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 2.417995960227611, "step_time": 0.648165132522583} +{"epoch": 0, "iter": 19299, "iter_tflops": 3.8035151503428843, "iter_time": 0.412055850982666, "loss": 0.5380188226699829, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 4.126548361930713, "step_time": 0.3797994194030762} +{"epoch": 0, "iter": 19300, "iter_tflops": 3.8162199181238514, "iter_time": 0.4106840553283691, "loss": 0.5392580032348633, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 4.138427109248913, "step_time": 0.3787092609405518} +{"epoch": 0, "iter": 19301, "iter_tflops": 36.384122483103404, "iter_time": 0.5670356216430664, "loss": 0.4061625897884369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.205554593080485, "step_time": 0.5262288398742676} +{"epoch": 0, "iter": 19302, "iter_tflops": 28.534935332715712, "iter_time": 0.7230117492675782, "loss": 0.40925079584121704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27230609094032, "step_time": 0.4998774108886719} +{"epoch": 0, "iter": 19303, "iter_tflops": 47.525034149567944, "iter_time": 0.4341100196838379, "loss": 0.3638162314891815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.435717715007904, "step_time": 0.40110441589355467} +{"epoch": 0, "iter": 19304, "iter_tflops": 47.351393331684754, "iter_time": 0.4357019309997558, "loss": 0.4400792717933655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18270345250363, "step_time": 0.40308721733093267} +{"epoch": 0, "iter": 19305, "iter_tflops": 24.72763392191153, "iter_time": 0.7089895782470703, "loss": 0.06259430944919586, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 26.22238062046687, "step_time": 0.6685752525329589} +{"epoch": 0, "iter": 19306, "iter_tflops": 12.803646148516048, "iter_time": 1.3692689208984374, "loss": 0.053703270852565765, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 16.212599645521763, "step_time": 1.081358642578125} +{"epoch": 0, "iter": 19307, "iter_tflops": 43.44543943966732, "iter_time": 0.4035322227478027, "loss": 0.06872338056564331, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 47.43159120476869, "step_time": 0.36961936759948727} +{"epoch": 0, "iter": 19308, "iter_tflops": 41.33002652094156, "iter_time": 0.42418638992309576, "loss": 0.05793359875679016, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 44.70229037983499, "step_time": 0.39218649864196775} +{"epoch": 0, "iter": 19309, "iter_tflops": 38.09706639741054, "iter_time": 0.5415402145385741, "loss": 0.37065771222114563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.131090143200055, "step_time": 0.5015936470031738} +{"epoch": 0, "iter": 19310, "iter_tflops": 13.08151132165258, "iter_time": 1.5771184997558596, "loss": 0.39783480763435364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.41523597126513, "step_time": 1.1203274040222169} +{"epoch": 0, "iter": 19311, "iter_tflops": 48.2079060829895, "iter_time": 0.42796078872680665, "loss": 0.2539415955543518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38326708459169, "step_time": 0.39384892654418946} +{"epoch": 0, "iter": 19312, "iter_tflops": 50.42781901025223, "iter_time": 0.4091212730407715, "loss": 0.3039587736129761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.45088769444792, "step_time": 0.37889361190795895} +{"epoch": 0, "iter": 19313, "iter_tflops": 24.631699995804553, "iter_time": 0.6783776626586915, "loss": 0.3098057806491852, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.22607107351638, "step_time": 0.6371368026733399} +{"epoch": 0, "iter": 19314, "iter_tflops": 9.955497745402981, "iter_time": 1.6784288940429688, "loss": 0.3194078207015991, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 13.224943169285726, "step_time": 1.2634908790588377} +{"epoch": 0, "iter": 19315, "iter_tflops": 25.49027864338845, "iter_time": 0.6555281448364257, "loss": 0.2875053584575653, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 27.478742324325424, "step_time": 0.608091697692871} +{"epoch": 0, "iter": 19316, "iter_tflops": 24.23443450070767, "iter_time": 0.6894980392456055, "loss": 0.30955687165260315, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 25.8844944650273, "step_time": 0.6455445785522461} +{"epoch": 0, "iter": 19317, "iter_tflops": 22.0090081764413, "iter_time": 0.9373931503295898, "loss": 0.5387811064720154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.729376034778724, "step_time": 0.8694326171875001} +{"epoch": 0, "iter": 19318, "iter_tflops": 23.580059808058202, "iter_time": 0.8749381332397461, "loss": 0.6318172812461853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.248701035507715, "step_time": 0.7859853134155274} +{"epoch": 0, "iter": 19319, "iter_tflops": 36.67537748753844, "iter_time": 0.5625325469970702, "loss": 0.564142107963562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.78766563907163, "step_time": 0.5185298805236817} +{"epoch": 0, "iter": 19320, "iter_tflops": 36.18185604279136, "iter_time": 0.5702055053710937, "loss": 0.4806595742702484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14383542681956, "step_time": 0.5270585594177246} +{"epoch": 0, "iter": 19321, "iter_tflops": 16.059910215651005, "iter_time": 1.2846331787109375, "loss": 0.19505898654460907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.183202492333148, "step_time": 1.2006547393798828} +{"epoch": 0, "iter": 19322, "iter_tflops": 16.5929592778284, "iter_time": 1.2433643188476564, "loss": 0.23394544422626495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.47781812263092, "step_time": 1.00748494720459} +{"epoch": 0, "iter": 19323, "iter_tflops": 48.58644931690132, "iter_time": 0.42462649154663085, "loss": 0.2611946165561676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83848789404248, "step_time": 0.3904557895660401} +{"epoch": 0, "iter": 19324, "iter_tflops": 54.4711944222916, "iter_time": 0.3787523612976074, "loss": 0.21084290742874146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.317038137632295, "step_time": 0.34781058120727537} +{"epoch": 0, "iter": 19325, "iter_tflops": 36.0646480703891, "iter_time": 0.5720586395263672, "loss": 0.26941412687301636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.74666880680152, "step_time": 0.5324610900878907} +{"epoch": 0, "iter": 19326, "iter_tflops": 12.94396877355815, "iter_time": 1.593876953125, "loss": 0.19736194610595703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.826575141925055, "step_time": 1.391494213104248} +{"epoch": 0, "iter": 19327, "iter_tflops": 10.882759270192814, "iter_time": 1.8957594299316407, "loss": 0.2959121763706207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.762950982834429, "step_time": 1.6164830169677735} +{"epoch": 0, "iter": 19328, "iter_tflops": 27.459182843193297, "iter_time": 0.7513367614746094, "loss": 0.28637033700942993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.1513721008194, "step_time": 0.6041073093414306} +{"epoch": 0, "iter": 19329, "iter_tflops": 15.499050804677005, "iter_time": 1.0463400268554688, "loss": 0.46181032061576843, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 16.21273929600207, "step_time": 1.000279899597168} +{"epoch": 0, "iter": 19330, "iter_tflops": 9.26637759459212, "iter_time": 1.7501204833984376, "loss": 0.28492382168769836, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 11.931234620135903, "step_time": 1.359228759765625} +{"epoch": 0, "iter": 19331, "iter_tflops": 24.53111228488156, "iter_time": 0.6610901718139648, "loss": 0.3715641498565674, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.407715986817575, "step_time": 0.6141113166809082} +{"epoch": 0, "iter": 19332, "iter_tflops": 24.634102244360896, "iter_time": 0.6583262939453124, "loss": 0.2999255657196045, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.515538305390084, "step_time": 0.6116141052246094} +{"epoch": 0, "iter": 19333, "iter_tflops": 18.671627154305977, "iter_time": 1.0805081481933594, "loss": 0.2318328469991684, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 20.17540341633989, "step_time": 0.9999723358154297} +{"epoch": 0, "iter": 19334, "iter_tflops": 17.26811503789189, "iter_time": 1.1683293304443358, "loss": 0.19475580751895905, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 20.226113769027666, "step_time": 0.9974652328491211} +{"epoch": 0, "iter": 19335, "iter_tflops": 48.360666382364506, "iter_time": 0.4171746749877929, "loss": 0.23122625052928925, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 52.76656873376223, "step_time": 0.3823414287567139} +{"epoch": 0, "iter": 19336, "iter_tflops": 48.882842837436286, "iter_time": 0.4127183303833008, "loss": 0.1800759732723236, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 53.25416479576787, "step_time": 0.3788407039642334} +{"epoch": 0, "iter": 19337, "iter_tflops": 21.310499321891438, "iter_time": 0.9681187286376954, "loss": 0.19987936317920685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.305050153984034, "step_time": 0.9249516754150391} +{"epoch": 0, "iter": 19338, "iter_tflops": 14.608869808873905, "iter_time": 1.4122306365966797, "loss": 0.09844634681940079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.493637828860635, "step_time": 1.1155778923034667} +{"epoch": 0, "iter": 19339, "iter_tflops": 38.95328311259486, "iter_time": 0.529636833190918, "loss": 0.12061790376901627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7826843127436, "step_time": 0.48222999191284177} +{"epoch": 0, "iter": 19340, "iter_tflops": 39.47020041280131, "iter_time": 0.5227005004882812, "loss": 0.1314835250377655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.02912102248243, "step_time": 0.47946816062927244} +{"epoch": 0, "iter": 19341, "iter_tflops": 20.278039767953615, "iter_time": 1.01741064453125, "loss": 0.5207459926605225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.649278543796104, "step_time": 0.9529691009521484} +{"epoch": 0, "iter": 19342, "iter_tflops": 19.705122341528345, "iter_time": 1.0469913940429687, "loss": 0.4834475517272949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.051268875675238, "step_time": 0.8577964687347412} +{"epoch": 0, "iter": 19343, "iter_tflops": 41.2024696560681, "iter_time": 0.5007246818542481, "loss": 0.35112860798835754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.378679919187704, "step_time": 0.4648874988555909} +{"epoch": 0, "iter": 19344, "iter_tflops": 41.70263675914263, "iter_time": 0.4947191619873047, "loss": 0.5557056069374084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.74276053964768, "step_time": 0.4611046180725098} +{"epoch": 0, "iter": 19345, "iter_tflops": 31.396722675933276, "iter_time": 0.6571097793579102, "loss": 0.14328616857528687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.48423330089938, "step_time": 0.616143524169922} +{"epoch": 0, "iter": 19346, "iter_tflops": 13.591910805472779, "iter_time": 1.5178950042724608, "loss": 0.10662582516670227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.942296427327582, "step_time": 1.2941105194091798} +{"epoch": 0, "iter": 19347, "iter_tflops": 17.092985774954638, "iter_time": 1.2069917907714844, "loss": 0.11236376315355301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.75520329118963, "step_time": 1.1000197219848633} +{"epoch": 0, "iter": 19348, "iter_tflops": 25.90293653536653, "iter_time": 0.7964770126342773, "loss": 0.08564989268779755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.2941632705986, "step_time": 0.6388489875793457} +{"epoch": 0, "iter": 19349, "iter_tflops": 11.89470496297473, "iter_time": 1.291108612060547, "loss": 0.31182584166526794, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.617125162688275, "step_time": 1.2171834564208983} +{"epoch": 0, "iter": 19350, "iter_tflops": 15.724639576931827, "iter_time": 0.9766427993774414, "loss": 0.432657390832901, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.06776083760802, "step_time": 0.8054095153808594} +{"epoch": 0, "iter": 19351, "iter_tflops": 22.358159289292846, "iter_time": 0.6868792648315429, "loss": 0.44833657145500183, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.98909552039409, "step_time": 0.6401807022094727} +{"epoch": 0, "iter": 19352, "iter_tflops": 25.382501662618193, "iter_time": 0.6050371322631837, "loss": 0.47269415855407715, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.15976978115379, "step_time": 0.5654449996948243} +{"epoch": 0, "iter": 19353, "iter_tflops": 17.772954734662044, "iter_time": 1.1608139343261719, "loss": 0.4215071499347687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.219062785388612, "step_time": 1.0734703216552735} +{"epoch": 0, "iter": 19354, "iter_tflops": 15.669551339979163, "iter_time": 1.3166358795166015, "loss": 0.4557040333747864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.906991062735298, "step_time": 1.0363742790222168} +{"epoch": 0, "iter": 19355, "iter_tflops": 33.45750472020194, "iter_time": 0.6166357498168945, "loss": 0.47721824049949646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60141813863753, "step_time": 0.5636692390441894} +{"epoch": 0, "iter": 19356, "iter_tflops": 39.66531201188194, "iter_time": 0.5201293640136718, "loss": 0.3609482944011688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06521962316084, "step_time": 0.479066255569458} +{"epoch": 0, "iter": 19357, "iter_tflops": 32.061671660792285, "iter_time": 0.6434815292358398, "loss": 0.4977608621120453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.306016719670126, "step_time": 0.5843506412506103} +{"epoch": 0, "iter": 19358, "iter_tflops": 35.91922814216883, "iter_time": 0.5743746337890624, "loss": 0.5948648452758789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.36630789083064, "step_time": 0.5240799713134766} +{"epoch": 0, "iter": 19359, "iter_tflops": 35.1751090632559, "iter_time": 0.586525360107422, "loss": 0.45444685220718384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20216967907279, "step_time": 0.5400503082275391} +{"epoch": 0, "iter": 19360, "iter_tflops": 35.93732893059902, "iter_time": 0.5740853347778321, "loss": 0.7057229280471802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.17686470053007, "step_time": 0.5266142063140868} +{"epoch": 0, "iter": 19361, "iter_tflops": 17.84895540274851, "iter_time": 1.1558712005615235, "loss": 0.3015747666358948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.084016776474684, "step_time": 1.081066619873047} +{"epoch": 0, "iter": 19362, "iter_tflops": 22.30382288070501, "iter_time": 0.9250025711059571, "loss": 0.3439437747001648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.17543848785031, "step_time": 0.6036818962097168} +{"epoch": 0, "iter": 19363, "iter_tflops": 48.79123847678606, "iter_time": 0.4228442268371582, "loss": 0.31232908368110657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.97097556892579, "step_time": 0.38947920608520503} +{"epoch": 0, "iter": 19364, "iter_tflops": 49.74253153347668, "iter_time": 0.4147576103210449, "loss": 0.2773931324481964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.15185104404075, "step_time": 0.380985933303833} +{"epoch": 0, "iter": 19365, "iter_tflops": 36.3834067840119, "iter_time": 0.5670467758178711, "loss": 0.17652660608291626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.203767019706206, "step_time": 0.5262528343200683} +{"epoch": 0, "iter": 19366, "iter_tflops": 37.91783682126947, "iter_time": 0.5440999603271485, "loss": 0.18374568223953247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.9546122504498, "step_time": 0.49174792480468754} +{"epoch": 0, "iter": 19367, "iter_tflops": 39.52392265829196, "iter_time": 0.5219900283813477, "loss": 0.23177577555179596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37280568725253, "step_time": 0.4756688709259033} +{"epoch": 0, "iter": 19368, "iter_tflops": 37.74967576915959, "iter_time": 0.5465237274169922, "loss": 0.19269418716430664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.395841246629885, "step_time": 0.4983856563568116} +{"epoch": 0, "iter": 19369, "iter_tflops": 31.21478191482417, "iter_time": 0.6609398574829102, "loss": 0.4563828408718109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.415406172701346, "step_time": 0.5994726142883301} +{"epoch": 0, "iter": 19370, "iter_tflops": 38.889203833732694, "iter_time": 0.530509536743164, "loss": 0.5565561652183533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.845971529644835, "step_time": 0.4815176963806153} +{"epoch": 0, "iter": 19371, "iter_tflops": 41.77842526597897, "iter_time": 0.49382171249389656, "loss": 0.43072310090065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63275911717851, "step_time": 0.4521114635467529} +{"epoch": 0, "iter": 19372, "iter_tflops": 40.577617122209276, "iter_time": 0.5084353141784669, "loss": 0.4333869516849518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.320280091036864, "step_time": 0.4655000705718994} +{"epoch": 0, "iter": 19373, "iter_tflops": 22.505796555090704, "iter_time": 0.9167013244628905, "loss": 0.6012663245201111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.627651475226287, "step_time": 0.837720703125} +{"epoch": 0, "iter": 19374, "iter_tflops": 30.144278040050626, "iter_time": 0.6844115982055664, "loss": 0.3362470865249634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.83884451601311, "step_time": 0.6282527236938477} +{"epoch": 0, "iter": 19375, "iter_tflops": 38.620672895192875, "iter_time": 0.5341981887817383, "loss": 0.36910316348075867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.02771610390508, "step_time": 0.4908925685882568} +{"epoch": 0, "iter": 19376, "iter_tflops": 35.0186904990647, "iter_time": 0.5891452026367188, "loss": 0.29691407084465027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.055956565553316, "step_time": 0.5421252117156983} +{"epoch": 0, "iter": 19377, "iter_tflops": 22.12038552654525, "iter_time": 0.932673324584961, "loss": 0.0036442012060433626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.08411800678119, "step_time": 0.856626491546631} +{"epoch": 0, "iter": 19378, "iter_tflops": 49.581637560294155, "iter_time": 0.41610351181030275, "loss": 0.004212233237922192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3763123931198, "step_time": 0.379413251876831} +{"epoch": 0, "iter": 19379, "iter_tflops": 54.37397831326627, "iter_time": 0.3794295387268066, "loss": 0.0036760871298611164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.289820529438, "step_time": 0.34797024726867676} +{"epoch": 0, "iter": 19380, "iter_tflops": 61.49626711959577, "iter_time": 0.3354852981567383, "loss": 0.003619843628257513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.66024362167221, "step_time": 0.30492195129394534} +{"epoch": 0, "iter": 19381, "iter_tflops": 27.804472660323018, "iter_time": 0.7420062866210938, "loss": 0.16384391486644745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.410709855343097, "step_time": 0.7014823379516602} +{"epoch": 0, "iter": 19382, "iter_tflops": 13.940291957340072, "iter_time": 1.4799613647460939, "loss": 0.12806953489780426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.508428770314822, "step_time": 1.0575476760864257} +{"epoch": 0, "iter": 19383, "iter_tflops": 41.801062971756046, "iter_time": 0.49355427932739254, "loss": 0.2746570110321045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.76787765321809, "step_time": 0.45077671432495114} +{"epoch": 0, "iter": 19384, "iter_tflops": 38.82142665627368, "iter_time": 0.5314357376098632, "loss": 0.333476722240448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.815705734308615, "step_time": 0.48185807418823245} +{"epoch": 0, "iter": 19385, "iter_tflops": 29.73632573064892, "iter_time": 0.6938010330200195, "loss": 0.5424032211303711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77648065970039, "step_time": 0.6294481010437012} +{"epoch": 0, "iter": 19386, "iter_tflops": 35.30016542928958, "iter_time": 0.5844475021362304, "loss": 0.5894197225570679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.54544730199157, "step_time": 0.5352407341003418} +{"epoch": 0, "iter": 19387, "iter_tflops": 37.07683389112954, "iter_time": 0.556441619873047, "loss": 0.7118064761161804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.079975142411485, "step_time": 0.5147481613159179} +{"epoch": 0, "iter": 19388, "iter_tflops": 32.85601059925185, "iter_time": 0.6279244842529297, "loss": 0.432050496339798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.45989574027654, "step_time": 0.5818148384094238} +{"epoch": 0, "iter": 19389, "iter_tflops": 16.384247982378245, "iter_time": 1.2592029571533203, "loss": 0.21606993675231934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46816330516907, "step_time": 1.1810682754516604} +{"epoch": 0, "iter": 19390, "iter_tflops": 23.24978282685581, "iter_time": 0.8873671493530273, "loss": 0.23124682903289795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.181365810162387, "step_time": 0.7320828113555908} +{"epoch": 0, "iter": 19391, "iter_tflops": 47.24093885509525, "iter_time": 0.4367206497192383, "loss": 0.17193171381950378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.27772070143324, "step_time": 0.40234029960632317} +{"epoch": 0, "iter": 19392, "iter_tflops": 51.98280040367344, "iter_time": 0.39688307189941396, "loss": 0.16936586797237396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.40485190638063, "step_time": 0.36576806449890137} +{"epoch": 0, "iter": 19393, "iter_tflops": 24.160324712148046, "iter_time": 0.7648619766235351, "loss": 0.02314099296927452, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 25.421351642834384, "step_time": 0.7269209747314452} +{"epoch": 0, "iter": 19394, "iter_tflops": 10.617350605134712, "iter_time": 1.740482574462891, "loss": 0.028992632403969765, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 13.326864847033917, "step_time": 1.386621229171753} +{"epoch": 0, "iter": 19395, "iter_tflops": 37.47624519344971, "iter_time": 0.49309405517578125, "loss": 0.03345802426338196, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 41.472399555931716, "step_time": 0.44558101081848145} +{"epoch": 0, "iter": 19396, "iter_tflops": 39.26318961930841, "iter_time": 0.47065238189697267, "loss": 0.028980417177081108, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 42.931972417611256, "step_time": 0.4304324417114258} +{"epoch": 0, "iter": 19397, "iter_tflops": 18.756627648060494, "iter_time": 1.0999361877441407, "loss": 0.019649935886263847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.845890501188357, "step_time": 1.0395650177001952} +{"epoch": 0, "iter": 19398, "iter_tflops": 12.145167630493226, "iter_time": 1.6987080078124999, "loss": 0.051685627549886703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.492295018766344, "step_time": 1.4235904998779296} +{"epoch": 0, "iter": 19399, "iter_tflops": 11.521079648259565, "iter_time": 1.7907257080078125, "loss": 0.015152639709413052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.158413114158416, "step_time": 1.567901336669922} +{"epoch": 0, "iter": 19400, "iter_tflops": 35.18314639560573, "iter_time": 0.5863913726806642, "loss": 0.018959814682602882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.4164467884688, "step_time": 0.45426480865478513} +{"epoch": 0, "iter": 19401, "iter_tflops": 18.679217940503477, "iter_time": 0.8243523635864256, "loss": 0.246230348944664, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.64732271981069, "step_time": 0.7837331161499024} +{"epoch": 0, "iter": 19402, "iter_tflops": 9.468191889421362, "iter_time": 1.6263144683837891, "loss": 0.3775271475315094, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.072422423613169, "step_time": 1.275490280151367} +{"epoch": 0, "iter": 19403, "iter_tflops": 26.696771030925383, "iter_time": 0.5767835159301758, "loss": 0.4426838159561157, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.304103765260862, "step_time": 0.5440291481018067} +{"epoch": 0, "iter": 19404, "iter_tflops": 27.06824316909511, "iter_time": 0.5688680038452149, "loss": 0.3392336368560791, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.899343733708083, "step_time": 0.5328237762451172} +{"epoch": 0, "iter": 19405, "iter_tflops": 25.249282240340378, "iter_time": 0.8170962371826174, "loss": 0.6380946636199951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.61167815234935, "step_time": 0.7752646560668945} +{"epoch": 0, "iter": 19406, "iter_tflops": 13.845825299376873, "iter_time": 1.4900587768554687, "loss": 0.5426859855651855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.52593422853068, "step_time": 1.2484070930480957} +{"epoch": 0, "iter": 19407, "iter_tflops": 31.80877274654417, "iter_time": 0.6485975952148437, "loss": 0.5235282182693481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.68058723974712, "step_time": 0.5948888168334961} +{"epoch": 0, "iter": 19408, "iter_tflops": 37.21081829715888, "iter_time": 0.5544380493164063, "loss": 0.5422077775001526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.678206691440764, "step_time": 0.5071780490875244} +{"epoch": 0, "iter": 19409, "iter_tflops": 17.11158861640407, "iter_time": 1.2056796112060546, "loss": 0.5747590661048889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.230592066645375, "step_time": 1.1316743545532228} +{"epoch": 0, "iter": 19410, "iter_tflops": 22.067930771753783, "iter_time": 0.9348902587890625, "loss": 0.591775119304657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.539515326565258, "step_time": 0.8407294616699219} +{"epoch": 0, "iter": 19411, "iter_tflops": 42.37073855723795, "iter_time": 0.48691843032836907, "loss": 0.5597977638244629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88891561588716, "step_time": 0.44958773231506344} +{"epoch": 0, "iter": 19412, "iter_tflops": 44.781312777757456, "iter_time": 0.4607076530456543, "loss": 0.6968005299568176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.03282553552251, "step_time": 0.4295207138061523} +{"epoch": 0, "iter": 19413, "iter_tflops": 25.447537846395058, "iter_time": 0.810730438232422, "loss": 0.41288840770721436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.784508105439617, "step_time": 0.770262176513672} +{"epoch": 0, "iter": 19414, "iter_tflops": 16.270638196029992, "iter_time": 1.267995346069336, "loss": 0.45634695887565613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.391354571179896, "step_time": 1.063932559967041} +{"epoch": 0, "iter": 19415, "iter_tflops": 49.315113876470456, "iter_time": 0.41835234451293946, "loss": 0.5153765082359314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.45028471772334, "step_time": 0.38598659706115723} +{"epoch": 0, "iter": 19416, "iter_tflops": 44.6904190275282, "iter_time": 0.46164466476440424, "loss": 0.3936205208301544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.7579237903253, "step_time": 0.4319930992126465} +{"epoch": 0, "iter": 19417, "iter_tflops": 29.797321221127422, "iter_time": 0.6923808135986327, "loss": 0.44893741607666016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.725037968665955, "step_time": 0.650309497833252} +{"epoch": 0, "iter": 19418, "iter_tflops": 10.506252895554228, "iter_time": 1.9636966400146485, "loss": 0.3811109662055969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.609617710861283, "step_time": 1.7770691528320315} +{"epoch": 0, "iter": 19419, "iter_tflops": 14.733696054634894, "iter_time": 1.4002659912109374, "loss": 0.5679478049278259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.06987656902808, "step_time": 1.2838364639282227} +{"epoch": 0, "iter": 19420, "iter_tflops": 13.999235370612281, "iter_time": 1.4737300262451172, "loss": 0.6014590859413147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.70384627602233, "step_time": 1.2351103553771972} +{"epoch": 0, "iter": 19421, "iter_tflops": 12.999428770379609, "iter_time": 1.3264972229003906, "loss": 0.5775077939033508, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 13.936754307940555, "step_time": 1.2372827835083007} +{"epoch": 0, "iter": 19422, "iter_tflops": 14.389466292524157, "iter_time": 1.198356201171875, "loss": 0.29466384649276733, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 17.17760647869955, "step_time": 1.003848014831543} +{"epoch": 0, "iter": 19423, "iter_tflops": 30.020205880395473, "iter_time": 0.5744033279418945, "loss": 0.23782841861248016, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 32.176187627277564, "step_time": 0.5359151420593262} +{"epoch": 0, "iter": 19424, "iter_tflops": 31.694501252337375, "iter_time": 0.5440598678588867, "loss": 0.3875943422317505, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 33.59896761605821, "step_time": 0.5132213096618652} +{"epoch": 0, "iter": 19425, "iter_tflops": 34.95414565139581, "iter_time": 0.5902330932617188, "loss": 0.3448057770729065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.35854770207776, "step_time": 0.5522455978393555} +{"epoch": 0, "iter": 19426, "iter_tflops": 10.951271082699792, "iter_time": 1.883899444580078, "loss": 0.26532670855522156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.6224181465915, "step_time": 1.5144956855773928} +{"epoch": 0, "iter": 19427, "iter_tflops": 12.875797492244647, "iter_time": 1.6023157806396484, "loss": 0.33965209126472473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.868250645468624, "step_time": 1.3001492080688475} +{"epoch": 0, "iter": 19428, "iter_tflops": 15.172013087930074, "iter_time": 1.359812530517578, "loss": 0.3166644275188446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.33960706168735, "step_time": 1.1249474124908447} +{"epoch": 0, "iter": 19429, "iter_tflops": 14.726153023089697, "iter_time": 1.1821287231445312, "loss": 0.2531954348087311, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 15.822773933932519, "step_time": 1.1001995315551758} +{"epoch": 0, "iter": 19430, "iter_tflops": 13.894780599636901, "iter_time": 1.2528595428466798, "loss": 0.37978753447532654, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 16.169754337855245, "step_time": 1.0765907821655272} +{"epoch": 0, "iter": 19431, "iter_tflops": 30.108802167071985, "iter_time": 0.5781767196655274, "loss": 0.36939868330955505, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 32.12607778269377, "step_time": 0.5418715782165527} +{"epoch": 0, "iter": 19432, "iter_tflops": 31.041294245864226, "iter_time": 0.5608080749511718, "loss": 0.38047611713409424, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 32.93895030069696, "step_time": 0.5284991874694823} +{"epoch": 0, "iter": 19433, "iter_tflops": 29.01166925442503, "iter_time": 0.7111308670043945, "loss": 0.4801374077796936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.89387481772907, "step_time": 0.6678053054809571} +{"epoch": 0, "iter": 19434, "iter_tflops": 13.901979892632403, "iter_time": 1.4840399475097656, "loss": 0.4378477931022644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.850410137045525, "step_time": 1.2243674392700195} +{"epoch": 0, "iter": 19435, "iter_tflops": 16.215030543340834, "iter_time": 1.2723437957763672, "loss": 0.4053656756877899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.639249346366782, "step_time": 1.1068628959655762} +{"epoch": 0, "iter": 19436, "iter_tflops": 29.948531181758007, "iter_time": 0.6888849868774414, "loss": 0.6634005904197693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.62772306471245, "step_time": 0.6135144348144531} +{"epoch": 0, "iter": 19437, "iter_tflops": 13.804943521653112, "iter_time": 1.0473580017089843, "loss": 0.20142215490341187, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 14.707608129136846, "step_time": 0.9830774612426758} +{"epoch": 0, "iter": 19438, "iter_tflops": 13.709206580460657, "iter_time": 1.0546721267700196, "loss": 0.3054391145706177, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 16.133105866826273, "step_time": 0.896214168548584} +{"epoch": 0, "iter": 19439, "iter_tflops": 21.516492779045585, "iter_time": 0.6719830322265626, "loss": 0.23480583727359772, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 23.243830473585753, "step_time": 0.6220454101562499} +{"epoch": 0, "iter": 19440, "iter_tflops": 21.991844646311396, "iter_time": 0.6574581756591797, "loss": 0.40249770879745483, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 23.66203352219727, "step_time": 0.6110513725280762} +{"epoch": 0, "iter": 19441, "iter_tflops": 26.352833447096003, "iter_time": 0.7828795166015625, "loss": 0.4407320022583008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.337927036410747, "step_time": 0.7280382041931153} +{"epoch": 0, "iter": 19442, "iter_tflops": 11.273940222846353, "iter_time": 1.829980743408203, "loss": 0.45480769872665405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.48932234202119, "step_time": 1.4238825683593752} +{"epoch": 0, "iter": 19443, "iter_tflops": 11.619167592392701, "iter_time": 1.7756085662841796, "loss": 0.5224292874336243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.993334603931494, "step_time": 1.474351474761963} +{"epoch": 0, "iter": 19444, "iter_tflops": 30.922327198549592, "iter_time": 0.6671908416748047, "loss": 0.33959704637527466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.17146832453743, "step_time": 0.5266867542266847} +{"epoch": 0, "iter": 19445, "iter_tflops": 14.584303033476615, "iter_time": 1.0417911224365235, "loss": 0.38299301266670227, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.550848574176188, "step_time": 0.9770397644042969} +{"epoch": 0, "iter": 19446, "iter_tflops": 16.090336901656034, "iter_time": 0.9442808761596679, "loss": 0.2526906728744507, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 19.201727668095153, "step_time": 0.7912724151611328} +{"epoch": 0, "iter": 19447, "iter_tflops": 26.98462621266415, "iter_time": 0.563053840637207, "loss": 0.31863558292388916, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.707133132953675, "step_time": 0.5292690620422363} +{"epoch": 0, "iter": 19448, "iter_tflops": 26.760414539391494, "iter_time": 0.5677713775634766, "loss": 0.3452589511871338, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.45077215998975, "step_time": 0.5340381393432617} +{"epoch": 0, "iter": 19449, "iter_tflops": 33.35170268595317, "iter_time": 0.618591911315918, "loss": 0.559350848197937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.738737964916005, "step_time": 0.5772753791809082} +{"epoch": 0, "iter": 19450, "iter_tflops": 15.567545877591899, "iter_time": 1.3252630615234375, "loss": 0.45110049843788147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.89524963640228, "step_time": 1.1528810119628905} +{"epoch": 0, "iter": 19451, "iter_tflops": 40.13353246394217, "iter_time": 0.5140612411499024, "loss": 0.6384274363517761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.81287353201281, "step_time": 0.4141719207763672} +{"epoch": 0, "iter": 19452, "iter_tflops": 45.23925121524161, "iter_time": 0.45604409790039063, "loss": 0.5793882012367249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.564305240994095, "step_time": 0.4248201103210449} +{"epoch": 0, "iter": 19453, "iter_tflops": 28.97789008034222, "iter_time": 0.7119598236083985, "loss": 0.4704044759273529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.70974404801636, "step_time": 0.6718093605041503} +{"epoch": 0, "iter": 19454, "iter_tflops": 17.707292945081957, "iter_time": 1.165118438720703, "loss": 0.5058339238166809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.896663055248432, "step_time": 0.9872912940979004} +{"epoch": 0, "iter": 19455, "iter_tflops": 46.587777179699714, "iter_time": 0.4428434829711914, "loss": 0.5634631514549255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.428044234764194, "step_time": 0.40911944580078125} +{"epoch": 0, "iter": 19456, "iter_tflops": 46.64564427199824, "iter_time": 0.44229410552978515, "loss": 0.41248953342437744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30881644569387, "step_time": 0.41008902549743653} +{"epoch": 0, "iter": 19457, "iter_tflops": 41.45190367252654, "iter_time": 0.4977116050720215, "loss": 0.033660534769296646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.142087480450236, "step_time": 0.45702568626403806} +{"epoch": 0, "iter": 19458, "iter_tflops": 13.436879058139569, "iter_time": 1.535408142089844, "loss": 0.018356159329414368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.450966576971705, "step_time": 1.1181578712463378} +{"epoch": 0, "iter": 19459, "iter_tflops": 12.096578355083897, "iter_time": 1.7055313415527344, "loss": 0.029536813497543335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.991319875485289, "step_time": 1.290143257141113} +{"epoch": 0, "iter": 19460, "iter_tflops": 12.715800491814893, "iter_time": 1.6224769744873047, "loss": 0.01993742398917675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.099243826228456, "step_time": 1.2814945678710936} +{"epoch": 0, "iter": 19461, "iter_tflops": 27.426093359949817, "iter_time": 0.6062655258178711, "loss": 0.2216293066740036, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 29.52569768657321, "step_time": 0.5631533279418945} +{"epoch": 0, "iter": 19462, "iter_tflops": 29.452892650058853, "iter_time": 0.5645453948974609, "loss": 0.18482086062431335, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.44043452485667, "step_time": 0.5288570327758789} +{"epoch": 0, "iter": 19463, "iter_tflops": 28.76063086586738, "iter_time": 0.57813387298584, "loss": 0.2637026011943817, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 30.604767601251798, "step_time": 0.5432975387573242} +{"epoch": 0, "iter": 19464, "iter_tflops": 28.323443751610956, "iter_time": 0.5870576705932616, "loss": 0.21738982200622559, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 30.021897482525063, "step_time": 0.5538455696105956} +{"epoch": 0, "iter": 19465, "iter_tflops": 27.436914688972635, "iter_time": 0.7519465560913086, "loss": 0.09397423267364502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.181989830971144, "step_time": 0.706980354309082} +{"epoch": 0, "iter": 19466, "iter_tflops": 10.583350116305898, "iter_time": 1.9493915710449217, "loss": 0.05580281838774681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.897342850674283, "step_time": 1.4845351181030273} +{"epoch": 0, "iter": 19467, "iter_tflops": 37.79856371317108, "iter_time": 0.5458168640136718, "loss": 0.10343912988901138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.483260433912456, "step_time": 0.4973353900909424} +{"epoch": 0, "iter": 19468, "iter_tflops": 41.84793065903961, "iter_time": 0.49300152206420905, "loss": 0.07655034214258194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.076245621145134, "step_time": 0.44775986480712887} +{"epoch": 0, "iter": 19469, "iter_tflops": 14.67486400382447, "iter_time": 1.1274662322998046, "loss": 0.04709186032414436, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 15.709707003323768, "step_time": 1.0531968307495116} +{"epoch": 0, "iter": 19470, "iter_tflops": 10.833313734739233, "iter_time": 1.5272717132568359, "loss": 0.054721783846616745, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.724436368804728, "step_time": 1.2055441246032716} +{"epoch": 0, "iter": 19471, "iter_tflops": 31.9109761739055, "iter_time": 0.5184866027832031, "loss": 0.058133769780397415, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 35.21707866598329, "step_time": 0.4698122119903565} +{"epoch": 0, "iter": 19472, "iter_tflops": 37.46552660622343, "iter_time": 0.4416170043945313, "loss": 0.04387139156460762, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 41.03370879258498, "step_time": 0.4032151641845703} +{"epoch": 0, "iter": 19473, "iter_tflops": 1.3508115127594564, "iter_time": 1.1602363891601561, "loss": 0.011838803999125957, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.4415978949597736, "step_time": 1.087169090270996} +{"epoch": 0, "iter": 19474, "iter_tflops": 1.2239217198187673, "iter_time": 1.2805236206054686, "loss": 0.037597883492708206, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.5116347377619859, "step_time": 1.0367985286712647} +{"epoch": 0, "iter": 19475, "iter_tflops": 2.858013627854816, "iter_time": 0.5483741073608398, "loss": 0.1375855803489685, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.118395497829718, "step_time": 0.502585599899292} +{"epoch": 0, "iter": 19476, "iter_tflops": 3.0264560452172207, "iter_time": 0.5178534393310548, "loss": 0.010660387575626373, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.301904581438766, "step_time": 0.47465353202819827} +{"epoch": 0, "iter": 19477, "iter_tflops": 20.65324494732888, "iter_time": 0.998927459716797, "loss": 0.6069676876068115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.19403136208166, "step_time": 0.9295784606933593} +{"epoch": 0, "iter": 19478, "iter_tflops": 18.886028352003088, "iter_time": 1.0923997955322267, "loss": 0.5583431124687195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.080663551488513, "step_time": 0.8938691673278809} +{"epoch": 0, "iter": 19479, "iter_tflops": 41.52096126687997, "iter_time": 0.49688381195068365, "loss": 0.5558685660362244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70111786714249, "step_time": 0.46153417396545404} +{"epoch": 0, "iter": 19480, "iter_tflops": 45.59418950590914, "iter_time": 0.4524939193725586, "loss": 0.5237225294113159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74359734472571, "step_time": 0.42325750732421874} +{"epoch": 0, "iter": 19481, "iter_tflops": 32.009080203591786, "iter_time": 0.6445387802124024, "loss": 0.1523265838623047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.11830572104856, "step_time": 0.6046927909851074} +{"epoch": 0, "iter": 19482, "iter_tflops": 15.719478650844207, "iter_time": 1.3124540557861328, "loss": 0.19696205854415894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.292904002068926, "step_time": 1.0166654071807861} +{"epoch": 0, "iter": 19483, "iter_tflops": 39.03020325375587, "iter_time": 0.5285930328369141, "loss": 0.2475125789642334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83655866343579, "step_time": 0.48162350463867193} +{"epoch": 0, "iter": 19484, "iter_tflops": 39.10807650030323, "iter_time": 0.5275404815673828, "loss": 0.25901731848716736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.149249690893754, "step_time": 0.4781333084106445} +{"epoch": 0, "iter": 19485, "iter_tflops": 22.348278544379355, "iter_time": 0.9231625366210937, "loss": 0.44175606966018677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.9264736651618, "step_time": 0.8622705459594726} +{"epoch": 0, "iter": 19486, "iter_tflops": 27.345179632741768, "iter_time": 0.7544691162109375, "loss": 0.4571129083633423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.89234361178481, "step_time": 0.5912785263061523} +{"epoch": 0, "iter": 19487, "iter_tflops": 48.31642658816987, "iter_time": 0.4269995727539062, "loss": 0.5748746991157532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.28982266584954, "step_time": 0.3945527534484864} +{"epoch": 0, "iter": 19488, "iter_tflops": 47.80164556985879, "iter_time": 0.43159797668457034, "loss": 0.3265768587589264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.50602892424448, "step_time": 0.4005568656921387} +{"epoch": 0, "iter": 19489, "iter_tflops": 26.460846195812604, "iter_time": 0.7796838150024413, "loss": 0.21021203696727753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.882482404150466, "step_time": 0.7399302978515626} +{"epoch": 0, "iter": 19490, "iter_tflops": 14.741478524750221, "iter_time": 1.3995267486572267, "loss": 0.1832701712846756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.965089899729378, "step_time": 1.0878458061218261} +{"epoch": 0, "iter": 19491, "iter_tflops": 47.50354555760704, "iter_time": 0.43430639266967774, "loss": 0.17802166938781738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.68604984323353, "step_time": 0.391585506439209} +{"epoch": 0, "iter": 19492, "iter_tflops": 51.08831493299847, "iter_time": 0.4038319435119629, "loss": 0.15031050145626068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.52060472601567, "step_time": 0.3715934581756592} +{"epoch": 0, "iter": 19493, "iter_tflops": 26.872006253194048, "iter_time": 0.7677541198730469, "loss": 0.7190519571304321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.56431299196774, "step_time": 0.7222681503295898} +{"epoch": 0, "iter": 19494, "iter_tflops": 13.810256558374064, "iter_time": 1.4938964691162109, "loss": 0.47568240761756897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.20320056605289, "step_time": 1.2732727355957032} +{"epoch": 0, "iter": 19495, "iter_tflops": 36.08290923696152, "iter_time": 0.5717691268920898, "loss": 0.486982524394989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2530769216792, "step_time": 0.5255917530059815} +{"epoch": 0, "iter": 19496, "iter_tflops": 35.908332741579024, "iter_time": 0.5745489120483398, "loss": 0.6465290188789368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.85045280263198, "step_time": 0.531038688659668} +{"epoch": 0, "iter": 19497, "iter_tflops": 19.405026348610697, "iter_time": 1.0631829681396485, "loss": 0.4879050850868225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.210902325323207, "step_time": 0.9726645851135254} +{"epoch": 0, "iter": 19498, "iter_tflops": 20.861709046147663, "iter_time": 0.9889455108642579, "loss": 0.6379994750022888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.79377965003849, "step_time": 0.7998476295471192} +{"epoch": 0, "iter": 19499, "iter_tflops": 38.2455483826957, "iter_time": 0.5394377746582031, "loss": 0.5898401737213135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79626931084679, "step_time": 0.4936108856201172} +{"epoch": 0, "iter": 19500, "iter_tflops": 35.09164043235913, "iter_time": 0.5879204635620117, "loss": 0.6044361591339111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44164797480638, "step_time": 0.5366859798431397} +{"epoch": 0, "iter": 19501, "iter_tflops": 19.07974103254198, "iter_time": 1.0726098937988282, "loss": 0.20041793584823608, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 20.668454893507633, "step_time": 0.9901620178222655} +{"epoch": 0, "iter": 19502, "iter_tflops": 17.06254226373928, "iter_time": 1.1994179229736328, "loss": 0.25911861658096313, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 19.331441642352882, "step_time": 1.0586442222595216} +{"epoch": 0, "iter": 19503, "iter_tflops": 48.71179043835954, "iter_time": 0.4201266021728516, "loss": 0.15879859030246735, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 52.97148472951131, "step_time": 0.38634218215942384} +{"epoch": 0, "iter": 19504, "iter_tflops": 51.95561694512772, "iter_time": 0.39389617919921877, "loss": 0.20965665578842163, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 56.22669455970838, "step_time": 0.36397513961791994} +{"epoch": 0, "iter": 19505, "iter_tflops": 28.807389707559572, "iter_time": 0.7161736526489257, "loss": 0.4042704105377197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.4832894170129, "step_time": 0.676800106048584} +{"epoch": 0, "iter": 19506, "iter_tflops": 20.90581860264366, "iter_time": 0.9868589172363281, "loss": 0.32792890071868896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.96907837375027, "step_time": 0.7376393756866455} +{"epoch": 0, "iter": 19507, "iter_tflops": 45.60619251498384, "iter_time": 0.452374828338623, "loss": 0.23763376474380493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23468568978418, "step_time": 0.41903575134277343} +{"epoch": 0, "iter": 19508, "iter_tflops": 45.891000548752125, "iter_time": 0.4495673065185547, "loss": 0.22205421328544617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.36785944899912, "step_time": 0.4179053688049317} +{"epoch": 0, "iter": 19509, "iter_tflops": 24.179186227959814, "iter_time": 0.6182425231933593, "loss": 0.011749990284442902, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.871368760041097, "step_time": 0.577804801940918} +{"epoch": 0, "iter": 19510, "iter_tflops": 12.54797426929194, "iter_time": 1.191315887451172, "loss": 0.00543025229126215, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 14.926800925937917, "step_time": 1.0014604721069336} +{"epoch": 0, "iter": 19511, "iter_tflops": 41.00416892060229, "iter_time": 0.36456295776367187, "loss": 0.013489087112247944, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 45.24146950742782, "step_time": 0.3304181156158447} +{"epoch": 0, "iter": 19512, "iter_tflops": 42.73115321899225, "iter_time": 0.34982910537719725, "loss": 0.0037247263826429844, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 47.140607274284164, "step_time": 0.3171066722869873} +{"epoch": 0, "iter": 19513, "iter_tflops": 32.81014641507755, "iter_time": 0.6288022384643555, "loss": 0.10118480771780014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.90095468400334, "step_time": 0.5911326408386232} +{"epoch": 0, "iter": 19514, "iter_tflops": 14.778544852588379, "iter_time": 1.396016571044922, "loss": 0.10285423696041107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.011078198678433, "step_time": 1.2128034019470213} +{"epoch": 0, "iter": 19515, "iter_tflops": 40.1674940956446, "iter_time": 0.5136266021728515, "loss": 0.13370446860790253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07348934815884, "step_time": 0.46810665130615237} +{"epoch": 0, "iter": 19516, "iter_tflops": 38.23884544810364, "iter_time": 0.5395323333740234, "loss": 0.07498107850551605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6736191024974, "step_time": 0.49506363868713377} +{"epoch": 0, "iter": 19517, "iter_tflops": 29.937853245789096, "iter_time": 0.6891306915283203, "loss": 0.06518908590078354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.264067621154496, "step_time": 0.6394449005126953} +{"epoch": 0, "iter": 19518, "iter_tflops": 10.025477784087148, "iter_time": 2.0578663635253904, "loss": 0.017134562134742737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.073389886203126, "step_time": 1.7088070297241211} +{"epoch": 0, "iter": 19519, "iter_tflops": 9.961141555868899, "iter_time": 2.07115754699707, "loss": 0.0460829883813858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.404145417362203, "step_time": 1.663241828918457} +{"epoch": 0, "iter": 19520, "iter_tflops": 24.88050228505902, "iter_time": 0.829207275390625, "loss": 0.06152995675802231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.285037717466004, "step_time": 0.6017521018981934} +{"epoch": 0, "iter": 19521, "iter_tflops": 14.33960615945343, "iter_time": 1.0310765228271483, "loss": 0.38055261969566345, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 15.255724616014328, "step_time": 0.9691595535278319} +{"epoch": 0, "iter": 19522, "iter_tflops": 5.8300989394471925, "iter_time": 2.5360172119140625, "loss": 0.30081379413604736, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 6.6649885215873885, "step_time": 2.21834309387207} +{"epoch": 0, "iter": 19523, "iter_tflops": 9.866500756464957, "iter_time": 1.498528366088867, "loss": 0.3810693621635437, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 11.513912470053445, "step_time": 1.284118782043457} +{"epoch": 0, "iter": 19524, "iter_tflops": 26.622830978400245, "iter_time": 0.5553590927124025, "loss": 0.25961947441101074, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.36249383631126, "step_time": 0.5212951774597168} +{"epoch": 0, "iter": 19525, "iter_tflops": 18.704257552966062, "iter_time": 0.8451301193237304, "loss": 0.22769863903522491, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 19.74795620262431, "step_time": 0.8004641723632813} +{"epoch": 0, "iter": 19526, "iter_tflops": 10.238520015465403, "iter_time": 1.5439273834228515, "loss": 0.28475260734558105, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 12.724449651360548, "step_time": 1.242295883178711} +{"epoch": 0, "iter": 19527, "iter_tflops": 23.674656787482256, "iter_time": 0.6676984405517579, "loss": 0.4651554226875305, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.48846264380888, "step_time": 0.6201837921142578} +{"epoch": 0, "iter": 19528, "iter_tflops": 21.63467786776311, "iter_time": 0.7306571197509766, "loss": 0.3238289952278137, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 23.235994359904247, "step_time": 0.6803036346435546} +{"epoch": 0, "iter": 19529, "iter_tflops": 18.837480250532785, "iter_time": 1.0446073913574219, "loss": 0.060152892023324966, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 20.36603838184897, "step_time": 0.9662051467895507} +{"epoch": 0, "iter": 19530, "iter_tflops": 20.239435672485293, "iter_time": 0.9722490005493164, "loss": 0.06227380409836769, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 24.496246549180512, "step_time": 0.8032973976135254} +{"epoch": 0, "iter": 19531, "iter_tflops": 45.07618312813679, "iter_time": 0.4365447502136231, "loss": 0.0837244838476181, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 48.81196012247145, "step_time": 0.40313421249389647} +{"epoch": 0, "iter": 19532, "iter_tflops": 49.58748019546111, "iter_time": 0.39682942199707033, "loss": 0.08345861732959747, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 54.10687136425911, "step_time": 0.3636834030151367} +{"epoch": 0, "iter": 19533, "iter_tflops": 29.093113377360286, "iter_time": 0.709140106201172, "loss": 0.2571340799331665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.804099565897804, "step_time": 0.6697515525817871} +{"epoch": 0, "iter": 19534, "iter_tflops": 12.519012428629479, "iter_time": 1.6479809112548827, "loss": 0.18552783131599426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.838168334358098, "step_time": 1.2252575874328615} +{"epoch": 0, "iter": 19535, "iter_tflops": 45.9503036282381, "iter_time": 0.4489870986938477, "loss": 0.2218772917985916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94467716101648, "step_time": 0.4130789241790771} +{"epoch": 0, "iter": 19536, "iter_tflops": 47.71374226107159, "iter_time": 0.43239311218261717, "loss": 0.29005783796310425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.540425095243215, "step_time": 0.40028954887390134} +{"epoch": 0, "iter": 19537, "iter_tflops": 31.795437532652294, "iter_time": 0.6488696212768554, "loss": 0.28852972388267517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.7878136315044, "step_time": 0.6106075325012207} +{"epoch": 0, "iter": 19538, "iter_tflops": 26.054951027579104, "iter_time": 0.7918300628662109, "loss": 0.31361162662506104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.03822184339966, "step_time": 0.7104806079864502} +{"epoch": 0, "iter": 19539, "iter_tflops": 45.10435869033795, "iter_time": 0.4574079780578613, "loss": 0.3191797733306885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54862058642593, "step_time": 0.42495735740661617} +{"epoch": 0, "iter": 19540, "iter_tflops": 49.08865138998996, "iter_time": 0.42028234481811516, "loss": 0.28452068567276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.323605407388335, "step_time": 0.3869035739898682} +{"epoch": 0, "iter": 19541, "iter_tflops": 33.445216790472614, "iter_time": 0.6168623046875, "loss": 0.4351922869682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.76153560969563, "step_time": 0.5769073715209961} +{"epoch": 0, "iter": 19542, "iter_tflops": 9.778565547792946, "iter_time": 2.109828216552735, "loss": 0.4704265594482422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.870728889172156, "step_time": 1.7379803466796877} +{"epoch": 0, "iter": 19543, "iter_tflops": 14.696414771973538, "iter_time": 1.4038181304931638, "loss": 0.4369969069957733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.44740703592476, "step_time": 1.1183736267089843} +{"epoch": 0, "iter": 19544, "iter_tflops": 25.75593827469281, "iter_time": 0.8010227890014647, "loss": 0.29058343172073364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.901874305740435, "step_time": 0.7138323726654053} +{"epoch": 0, "iter": 19545, "iter_tflops": 19.420517411653506, "iter_time": 0.8413932876586915, "loss": 0.28026703000068665, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 20.458114997466204, "step_time": 0.798719383239746} +{"epoch": 0, "iter": 19546, "iter_tflops": 9.944684063350534, "iter_time": 1.6431183624267578, "loss": 0.40358835458755493, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 13.03827940117474, "step_time": 1.2532553176879884} +{"epoch": 0, "iter": 19547, "iter_tflops": 27.475260111984152, "iter_time": 0.5947275085449218, "loss": 0.28276416659355164, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 29.321283259367117, "step_time": 0.5572843742370606} +{"epoch": 0, "iter": 19548, "iter_tflops": 30.160925448468987, "iter_time": 0.541770278930664, "loss": 0.33768537640571594, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 32.0600124819578, "step_time": 0.509678310394287} +{"epoch": 0, "iter": 19549, "iter_tflops": 25.76569664189099, "iter_time": 0.8007194137573241, "loss": 0.5013419985771179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.625718052199133, "step_time": 0.7468075027465821} +{"epoch": 0, "iter": 19550, "iter_tflops": 13.052504811919308, "iter_time": 1.5806233215332028, "loss": 0.4461580514907837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.032251493207117, "step_time": 1.3724553184509278} +{"epoch": 0, "iter": 19551, "iter_tflops": 44.2058948315625, "iter_time": 0.46670457839965823, "loss": 0.6120296120643616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.81827111076158, "step_time": 0.43144791793823234} +{"epoch": 0, "iter": 19552, "iter_tflops": 43.52723266709314, "iter_time": 0.47398128128051764, "loss": 0.5238555669784546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97022077442419, "step_time": 0.43923773765563967} +{"epoch": 0, "iter": 19553, "iter_tflops": 27.33680662827704, "iter_time": 0.7547002029418945, "loss": 0.4664464592933655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.030875520217286, "step_time": 0.7106603965759277} +{"epoch": 0, "iter": 19554, "iter_tflops": 12.71180848476019, "iter_time": 1.6229864959716798, "loss": 0.42852047085762024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.27431911870498, "step_time": 1.4453294296264647} +{"epoch": 0, "iter": 19555, "iter_tflops": 37.161281719657175, "iter_time": 0.5551771240234374, "loss": 0.4786524474620819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.6283008129859, "step_time": 0.5078010425567627} +{"epoch": 0, "iter": 19556, "iter_tflops": 36.21648845260159, "iter_time": 0.5696602401733398, "loss": 0.6022091507911682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56641468631408, "step_time": 0.5214294414520264} +{"epoch": 0, "iter": 19557, "iter_tflops": 16.28923891826899, "iter_time": 0.7602746582031251, "loss": 0.028252674266695976, "lr": 3e-05, "seqlen": 4976.0, "step_tflops": 17.55280002464094, "step_time": 0.7055452995300293} +{"epoch": 0, "iter": 19558, "iter_tflops": 4.62178076372434, "iter_time": 2.6795506286621094, "loss": 0.028743043541908264, "lr": 3e-05, "seqlen": 4976.0, "step_tflops": 6.476172290052508, "step_time": 1.9122863006591797} +{"epoch": 0, "iter": 19559, "iter_tflops": 7.573537936227039, "iter_time": 1.6352061157226563, "loss": 0.022078093141317368, "lr": 3e-05, "seqlen": 4976.0, "step_tflops": 9.698171502226675, "step_time": 1.2769722156524657} +{"epoch": 0, "iter": 19560, "iter_tflops": 23.701776444351914, "iter_time": 0.5225049514770507, "loss": 0.028657514601945877, "lr": 3e-05, "seqlen": 4976.0, "step_tflops": 26.475072661093844, "step_time": 0.4677719192504883} +{"epoch": 0, "iter": 19561, "iter_tflops": 21.626943375281076, "iter_time": 0.7271318283081054, "loss": 0.2729577422142029, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.534476988996158, "step_time": 0.6681958084106445} +{"epoch": 0, "iter": 19562, "iter_tflops": 23.335375722544345, "iter_time": 0.6738969650268555, "loss": 0.24391275644302368, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.182932922922472, "step_time": 0.6244562110900879} +{"epoch": 0, "iter": 19563, "iter_tflops": 23.846635367611068, "iter_time": 0.659448959350586, "loss": 0.3203428387641907, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.514611677901215, "step_time": 0.6163385543823241} +{"epoch": 0, "iter": 19564, "iter_tflops": 23.997641334326662, "iter_time": 0.6552993545532226, "loss": 0.23783129453659058, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.82689307119015, "step_time": 0.6088862037658691} +{"epoch": 0, "iter": 19565, "iter_tflops": 19.556553463876337, "iter_time": 1.0549452667236328, "loss": 0.1126178652048111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.835589145812822, "step_time": 0.9901852722167969} +{"epoch": 0, "iter": 19566, "iter_tflops": 19.460101024104688, "iter_time": 1.0601740188598634, "loss": 0.12622930109500885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.991201783827933, "step_time": 0.7937722034454346} +{"epoch": 0, "iter": 19567, "iter_tflops": 46.668141790922135, "iter_time": 0.44208088684082036, "loss": 0.09539733827114105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.68818816123769, "step_time": 0.40701974678039554} +{"epoch": 0, "iter": 19568, "iter_tflops": 48.469555400853544, "iter_time": 0.42565056228637693, "loss": 0.14621156454086304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.42790501223277, "step_time": 0.3935135974884033} +{"epoch": 0, "iter": 19569, "iter_tflops": 27.43766828492326, "iter_time": 0.7519259033203124, "loss": 0.1865347921848297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.99805864416247, "step_time": 0.7114646453857423} +{"epoch": 0, "iter": 19570, "iter_tflops": 9.88010432836948, "iter_time": 2.0881453094482425, "loss": 0.1942032277584076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.315946252045018, "step_time": 1.5493524169921875} +{"epoch": 0, "iter": 19571, "iter_tflops": 17.174654465224304, "iter_time": 1.2012523193359375, "loss": 0.19786156713962555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.584749766962457, "step_time": 1.0534264545440672} +{"epoch": 0, "iter": 19572, "iter_tflops": 40.92872960117657, "iter_time": 0.5040736351013184, "loss": 0.1975381076335907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.876775337888496, "step_time": 0.4597276287078858} +{"epoch": 0, "iter": 19573, "iter_tflops": 14.546430837759788, "iter_time": 1.1064091339111326, "loss": 0.28043368458747864, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 15.410947323937991, "step_time": 1.04434228515625} +{"epoch": 0, "iter": 19574, "iter_tflops": 13.600176331503734, "iter_time": 1.183389358520508, "loss": 0.3436594307422638, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 18.141179175793432, "step_time": 0.8871696701049804} +{"epoch": 0, "iter": 19575, "iter_tflops": 28.96466511349402, "iter_time": 0.5556530303955078, "loss": 0.5165168046951294, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.85440126357697, "step_time": 0.5216210098266602} +{"epoch": 0, "iter": 19576, "iter_tflops": 28.221265744368697, "iter_time": 0.570289939880371, "loss": 0.3041174113750458, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.047005251831067, "step_time": 0.5356375389099122} +{"epoch": 0, "iter": 19577, "iter_tflops": 29.37162082373933, "iter_time": 0.7024159011840819, "loss": 0.0626283586025238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.256689818492056, "step_time": 0.6600536918640136} +{"epoch": 0, "iter": 19578, "iter_tflops": 18.87598044675983, "iter_time": 1.0929812927246094, "loss": 0.058193523436784744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.3032048022208, "step_time": 0.8853328838348389} +{"epoch": 0, "iter": 19579, "iter_tflops": 41.88078589796232, "iter_time": 0.4926147651672363, "loss": 0.10475856065750122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23620877611334, "step_time": 0.4462107524871826} +{"epoch": 0, "iter": 19580, "iter_tflops": 43.575874388272005, "iter_time": 0.4734521980285645, "loss": 0.055223215371370316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.992985991109464, "step_time": 0.42987726402282717} +{"epoch": 0, "iter": 19581, "iter_tflops": 31.051986317827815, "iter_time": 0.6644049530029298, "loss": 0.5778477787971497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.152157416818696, "step_time": 0.604093418121338} +{"epoch": 0, "iter": 19582, "iter_tflops": 33.62208067910225, "iter_time": 0.6136173934936523, "loss": 0.6224741339683533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.387644847602886, "step_time": 0.5669807319641114} +{"epoch": 0, "iter": 19583, "iter_tflops": 34.97370411873966, "iter_time": 0.5899030151367188, "loss": 0.5698160529136658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.01964037234306, "step_time": 0.5426430473327637} +{"epoch": 0, "iter": 19584, "iter_tflops": 33.384942659128164, "iter_time": 0.6179760055541992, "loss": 0.5425665378570557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.15833918129522, "step_time": 0.570576358795166} +{"epoch": 0, "iter": 19585, "iter_tflops": 19.460499171383983, "iter_time": 1.0601523284912109, "loss": 0.6407468318939209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95046372696326, "step_time": 0.9847559356689455} +{"epoch": 0, "iter": 19586, "iter_tflops": 14.685814080649084, "iter_time": 1.4048314514160154, "loss": 0.5203863978385925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.88174470769589, "step_time": 1.0926476249694823} +{"epoch": 0, "iter": 19587, "iter_tflops": 34.438615421213804, "iter_time": 0.5990686111450196, "loss": 0.34489336609840393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.423896106200786, "step_time": 0.5512812843322754} +{"epoch": 0, "iter": 19588, "iter_tflops": 35.4841324122609, "iter_time": 0.5814174423217773, "loss": 0.4760552942752838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.53544336746031, "step_time": 0.5353796844482422} +{"epoch": 0, "iter": 19589, "iter_tflops": 25.956492086528637, "iter_time": 0.7948336563110352, "loss": 0.2081829011440277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.643773188075922, "step_time": 0.7202645187377932} +{"epoch": 0, "iter": 19590, "iter_tflops": 46.35411385780327, "iter_time": 0.44507578277587895, "loss": 0.2642589807510376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.70072390228924, "step_time": 0.4069191112518311} +{"epoch": 0, "iter": 19591, "iter_tflops": 47.16840658820586, "iter_time": 0.43739220809936524, "loss": 0.274776428937912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3601312592496, "step_time": 0.40169471931457523} +{"epoch": 0, "iter": 19592, "iter_tflops": 47.860364405227756, "iter_time": 0.43106845855712894, "loss": 0.3892359137535095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.008115243212366, "step_time": 0.39668988990783693} +{"epoch": 0, "iter": 19593, "iter_tflops": 29.37346043258937, "iter_time": 0.7023719100952148, "loss": 0.395418643951416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.076355515256168, "step_time": 0.6638839454650879} +{"epoch": 0, "iter": 19594, "iter_tflops": 14.315279880816595, "iter_time": 1.4411938629150391, "loss": 0.3234790563583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.154745568873356, "step_time": 1.1364022388458253} +{"epoch": 0, "iter": 19595, "iter_tflops": 39.669791959578085, "iter_time": 0.5200706253051758, "loss": 0.3552878499031067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49106144774527, "step_time": 0.47437548828125} +{"epoch": 0, "iter": 19596, "iter_tflops": 39.94136302923865, "iter_time": 0.5165345382690429, "loss": 0.4200451672077179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66921459150236, "step_time": 0.47244022369384764} +{"epoch": 0, "iter": 19597, "iter_tflops": 29.98942633728907, "iter_time": 0.6879455871582031, "loss": 0.13851147890090942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77363720389913, "step_time": 0.629502712249756} +{"epoch": 0, "iter": 19598, "iter_tflops": 37.58331883011267, "iter_time": 0.5489428329467774, "loss": 0.15692998468875885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.916251571501284, "step_time": 0.49219795989990234} +{"epoch": 0, "iter": 19599, "iter_tflops": 41.444536403571966, "iter_time": 0.49780007934570314, "loss": 0.12349769473075867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.360540890720806, "step_time": 0.45482468032836915} +{"epoch": 0, "iter": 19600, "iter_tflops": 42.2397425763982, "iter_time": 0.48842848587036136, "loss": 0.16260270774364471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.343228512609, "step_time": 0.44518032455444334} +{"epoch": 0, "iter": 19601, "iter_tflops": 13.900586712054022, "iter_time": 1.0900920867919923, "loss": 0.4078379273414612, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.660589332640825, "step_time": 1.033581886291504} +{"epoch": 0, "iter": 19602, "iter_tflops": 6.625894908060016, "iter_time": 2.2869242248535153, "loss": 0.24139325320720673, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 8.246577717925542, "step_time": 1.8374797515869143} +{"epoch": 0, "iter": 19603, "iter_tflops": 10.094085742438196, "iter_time": 1.5011681060791013, "loss": 0.3231191635131836, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.134058822637906, "step_time": 1.3609519958496092} +{"epoch": 0, "iter": 19604, "iter_tflops": 22.561131950675282, "iter_time": 0.6716382675170898, "loss": 0.3158896863460541, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.328020778818683, "step_time": 0.6228587074279784} +{"epoch": 0, "iter": 19605, "iter_tflops": 17.372835381485217, "iter_time": 0.928765510559082, "loss": 0.3715659976005554, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 18.44186132981555, "step_time": 0.8749274291992187} +{"epoch": 0, "iter": 19606, "iter_tflops": 8.516437068231035, "iter_time": 1.8946057128906248, "loss": 0.2944095730781555, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 10.214211031924389, "step_time": 1.5796903228759767} +{"epoch": 0, "iter": 19607, "iter_tflops": 13.46760903807259, "iter_time": 1.1980812835693362, "loss": 0.23078583180904388, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 14.816680273479653, "step_time": 1.0889949722290038} +{"epoch": 0, "iter": 19608, "iter_tflops": 21.797864993663243, "iter_time": 0.7402234268188476, "loss": 0.4032798111438751, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 23.626909072622073, "step_time": 0.6829200668334962} +{"epoch": 0, "iter": 19609, "iter_tflops": 23.643828471302268, "iter_time": 0.6391539535522461, "loss": 0.3504948616027832, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.31702131078628, "step_time": 0.596912498474121} +{"epoch": 0, "iter": 19610, "iter_tflops": 26.808140014769254, "iter_time": 0.5637111129760741, "loss": 0.45105889439582825, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.606742295723677, "step_time": 0.5282686958312989} +{"epoch": 0, "iter": 19611, "iter_tflops": 27.830528959366756, "iter_time": 0.5430024871826172, "loss": 0.4388886094093323, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.679772313826398, "step_time": 0.5091698913574219} +{"epoch": 0, "iter": 19612, "iter_tflops": 27.234255289115602, "iter_time": 0.5548911209106445, "loss": 0.2555718719959259, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.942369568554895, "step_time": 0.5221426811218262} +{"epoch": 0, "iter": 19613, "iter_tflops": 40.34477764348353, "iter_time": 0.5113696174621583, "loss": 0.4475715458393097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27192159519032, "step_time": 0.4660085391998291} +{"epoch": 0, "iter": 19614, "iter_tflops": 44.962405114758646, "iter_time": 0.45885208892822266, "loss": 0.5834789276123047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.549792710683505, "step_time": 0.42494709777832035} +{"epoch": 0, "iter": 19615, "iter_tflops": 41.30935151349782, "iter_time": 0.4994291305541992, "loss": 0.6332014203071594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17753528147383, "step_time": 0.4670041770935059} +{"epoch": 0, "iter": 19616, "iter_tflops": 43.951405285031555, "iter_time": 0.4694069137573242, "loss": 0.6718416810035706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.283813001038716, "step_time": 0.4363246574401855} +{"epoch": 0, "iter": 19617, "iter_tflops": 38.00716379196166, "iter_time": 0.4310057907104492, "loss": 0.007974425330758095, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 41.97669945528349, "step_time": 0.3902476348876953} +{"epoch": 0, "iter": 19618, "iter_tflops": 31.666291776563657, "iter_time": 0.517310577392578, "loss": 0.005791043862700462, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 35.20420658913434, "step_time": 0.46532245063781746} +{"epoch": 0, "iter": 19619, "iter_tflops": 36.13358478895409, "iter_time": 0.45335406875610357, "loss": 0.009524921886622906, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 40.18182018954937, "step_time": 0.4076795825958252} +{"epoch": 0, "iter": 19620, "iter_tflops": 40.009840677306485, "iter_time": 0.4094319648742676, "loss": 0.005378612317144871, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 44.14510582208175, "step_time": 0.37107868194580074} +{"epoch": 0, "iter": 19621, "iter_tflops": 40.3273019999676, "iter_time": 0.5115912170410156, "loss": 0.020838776603341103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.638662398464945, "step_time": 0.46217992210388187} +{"epoch": 0, "iter": 19622, "iter_tflops": 37.067519988783836, "iter_time": 0.5565814361572266, "loss": 0.003917437978088856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.471762879575415, "step_time": 0.48576023483276365} +{"epoch": 0, "iter": 19623, "iter_tflops": 51.0437867993978, "iter_time": 0.40418422698974615, "loss": 0.01114803459495306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.83828353693924, "step_time": 0.3694793643951416} +{"epoch": 0, "iter": 19624, "iter_tflops": 55.82828961999621, "iter_time": 0.36954550552368165, "loss": 0.002609992166981101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.17843067115096, "step_time": 0.3372282238006592} +{"epoch": 0, "iter": 19625, "iter_tflops": 25.21479956516568, "iter_time": 0.5977115936279297, "loss": 0.30256810784339905, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 26.986216056110624, "step_time": 0.558476890563965} +{"epoch": 0, "iter": 19626, "iter_tflops": 26.77863273912361, "iter_time": 0.5628061065673828, "loss": 0.2613804042339325, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.510895174075106, "step_time": 0.5286111831665039} +{"epoch": 0, "iter": 19627, "iter_tflops": 27.645496069252527, "iter_time": 0.5451585311889648, "loss": 0.25024452805519104, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 29.441838118356063, "step_time": 0.5118966407775879} +{"epoch": 0, "iter": 19628, "iter_tflops": 27.122665504247532, "iter_time": 0.5556672897338867, "loss": 0.22865040600299835, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.833612067030288, "step_time": 0.5226947631835938} +{"epoch": 0, "iter": 19629, "iter_tflops": 28.165905326319432, "iter_time": 0.7324846572875976, "loss": 0.006118210963904858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.886007360960203, "step_time": 0.690326187133789} +{"epoch": 0, "iter": 19630, "iter_tflops": 29.33828175747559, "iter_time": 0.7032141036987304, "loss": 0.0010764842154458165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.371376053849595, "step_time": 0.5520560302734375} +{"epoch": 0, "iter": 19631, "iter_tflops": 57.09941134809533, "iter_time": 0.36131884765625, "loss": 0.0041878679767251015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.8125373566118, "step_time": 0.3284550247192383} +{"epoch": 0, "iter": 19632, "iter_tflops": 63.54985430421289, "iter_time": 0.324644229888916, "loss": 0.0014666190836578608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 70.01769849326905, "step_time": 0.2946554079055786} +{"epoch": 0, "iter": 19633, "iter_tflops": 25.970338222211623, "iter_time": 0.7944098892211914, "loss": 0.2081410437822342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.428807591020675, "step_time": 0.7521688079833984} +{"epoch": 0, "iter": 19634, "iter_tflops": 18.56966593003712, "iter_time": 1.1110104827880858, "loss": 0.11962589621543884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.80746595916387, "step_time": 0.9045763149261475} +{"epoch": 0, "iter": 19635, "iter_tflops": 52.7972631645827, "iter_time": 0.39076066207885746, "loss": 0.12107829749584198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.67474878856214, "step_time": 0.35771449279785156} +{"epoch": 0, "iter": 19636, "iter_tflops": 57.8817356010368, "iter_time": 0.3564352951049805, "loss": 0.13952697813510895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.01500832733489, "step_time": 0.32739967918395996} +{"epoch": 0, "iter": 19637, "iter_tflops": 17.815562618301804, "iter_time": 1.1580377197265626, "loss": 0.48696020245552063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.492684808418485, "step_time": 1.115635383605957} +{"epoch": 0, "iter": 19638, "iter_tflops": 18.251387311300206, "iter_time": 1.1303849487304687, "loss": 0.619526743888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.284908535916813, "step_time": 0.9257876682281494} +{"epoch": 0, "iter": 19639, "iter_tflops": 40.632628657968695, "iter_time": 0.507746955871582, "loss": 0.5326218008995056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.147700272625364, "step_time": 0.4673197784423828} +{"epoch": 0, "iter": 19640, "iter_tflops": 37.85830030941038, "iter_time": 0.5449556198120117, "loss": 0.5427886843681335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36350997889611, "step_time": 0.4987752132415771} +{"epoch": 0, "iter": 19641, "iter_tflops": 28.338091110037713, "iter_time": 0.7280339889526367, "loss": 0.429709255695343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.645559369831634, "step_time": 0.6732164115905761} +{"epoch": 0, "iter": 19642, "iter_tflops": 9.075965862253005, "iter_time": 2.273156799316406, "loss": 0.5048912167549133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.730938823788076, "step_time": 1.7586907424926759} +{"epoch": 0, "iter": 19643, "iter_tflops": 19.63730635539951, "iter_time": 1.0506071014404297, "loss": 0.5755634307861328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.644798329502052, "step_time": 0.8725425872802735} +{"epoch": 0, "iter": 19644, "iter_tflops": 36.41066139843926, "iter_time": 0.5666223220825195, "loss": 0.7250868082046509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35528171953214, "step_time": 0.5242268028259277} +{"epoch": 0, "iter": 19645, "iter_tflops": 16.94686022202421, "iter_time": 0.9763114471435548, "loss": 0.2301989048719406, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 18.12040142112428, "step_time": 0.9130820693969726} +{"epoch": 0, "iter": 19646, "iter_tflops": 8.78862224708467, "iter_time": 1.8825946960449218, "loss": 0.21971455216407776, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 9.520564681598644, "step_time": 1.7378605346679687} +{"epoch": 0, "iter": 19647, "iter_tflops": 10.047244216100067, "iter_time": 1.6467613677978516, "loss": 0.42061349749565125, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 11.633421135628781, "step_time": 1.4222311248779294} +{"epoch": 0, "iter": 19648, "iter_tflops": 16.58181817778862, "iter_time": 0.9978045501708984, "loss": 0.31579533219337463, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 21.523053506320565, "step_time": 0.7687298469543457} +{"epoch": 0, "iter": 19649, "iter_tflops": 21.403345371062176, "iter_time": 0.7385542373657226, "loss": 0.46743184328079224, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 22.754997864227146, "step_time": 0.6946839332580567} +{"epoch": 0, "iter": 19650, "iter_tflops": 26.60298476000208, "iter_time": 0.5942014236450195, "loss": 0.2539844512939453, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.28660852289217, "step_time": 0.5588344535827637} +{"epoch": 0, "iter": 19651, "iter_tflops": 29.272427979338033, "iter_time": 0.5400143585205078, "loss": 0.4039473235607147, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 31.136507223942015, "step_time": 0.507684799194336} +{"epoch": 0, "iter": 19652, "iter_tflops": 25.235537696608255, "iter_time": 0.6263996276855468, "loss": 0.3098629117012024, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 26.85790097554454, "step_time": 0.5885616836547852} +{"epoch": 0, "iter": 19653, "iter_tflops": 35.84234398463042, "iter_time": 0.575606704711914, "loss": 0.5664503574371338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83646221828704, "step_time": 0.5312299919128418} +{"epoch": 0, "iter": 19654, "iter_tflops": 27.757488031533892, "iter_time": 0.7432622680664063, "loss": 0.5593129992485046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.3853749037987, "step_time": 0.5999961776733399} +{"epoch": 0, "iter": 19655, "iter_tflops": 37.948975552324406, "iter_time": 0.5436535034179687, "loss": 0.5405250191688538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.307546628231954, "step_time": 0.49945095252990723} +{"epoch": 0, "iter": 19656, "iter_tflops": 35.31540913199486, "iter_time": 0.5841952285766602, "loss": 0.5083139538764954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.338690738401404, "step_time": 0.5381272315979003} +{"epoch": 0, "iter": 19657, "iter_tflops": 16.2951466040346, "iter_time": 1.2660882415771484, "loss": 0.5563365817070007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.361092882929437, "step_time": 1.1883522338867187} +{"epoch": 0, "iter": 19658, "iter_tflops": 17.513418082605135, "iter_time": 1.1780163879394534, "loss": 0.4141552746295929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.373902511330172, "step_time": 0.9221052742004394} +{"epoch": 0, "iter": 19659, "iter_tflops": 38.18766509916623, "iter_time": 0.5402554321289064, "loss": 0.4488123953342438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77194129662114, "step_time": 0.49389836502075196} +{"epoch": 0, "iter": 19660, "iter_tflops": 40.55628816559713, "iter_time": 0.5087027053833009, "loss": 0.5334707498550415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.434618676648256, "step_time": 0.46430225181579593} +{"epoch": 0, "iter": 19661, "iter_tflops": 27.805010427568725, "iter_time": 0.7419919357299805, "loss": 0.001970283454284072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.127105959844634, "step_time": 0.6848017044067383} +{"epoch": 0, "iter": 19662, "iter_tflops": 12.13442049099749, "iter_time": 1.7002125091552736, "loss": 0.07169029861688614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.965928432285583, "step_time": 1.4772446823120118} +{"epoch": 0, "iter": 19663, "iter_tflops": 12.189286054114998, "iter_time": 1.6925596313476563, "loss": 0.06897564232349396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.82080486158762, "step_time": 1.3920359725952147} +{"epoch": 0, "iter": 19664, "iter_tflops": 41.19886018038779, "iter_time": 0.5007685508728027, "loss": 0.03232337906956673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75266029543614, "step_time": 0.44128170204162603} +{"epoch": 0, "iter": 19665, "iter_tflops": 21.094870966865173, "iter_time": 0.7144475097656251, "loss": 0.45087599754333496, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 22.353878731261567, "step_time": 0.6742086334228515} +{"epoch": 0, "iter": 19666, "iter_tflops": 10.330007095807982, "iter_time": 1.458970733642578, "loss": 0.27065199613571167, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 13.12789850034357, "step_time": 1.1480267028808595} +{"epoch": 0, "iter": 19667, "iter_tflops": 22.43425246845571, "iter_time": 0.6717931900024414, "loss": 0.4642806053161621, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.23722680107018, "step_time": 0.621819408416748} +{"epoch": 0, "iter": 19668, "iter_tflops": 24.771424347365492, "iter_time": 0.6084098281860352, "loss": 0.32252559065818787, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 26.605701376186783, "step_time": 0.566464225769043} +{"epoch": 0, "iter": 19669, "iter_tflops": 28.45703756606165, "iter_time": 0.5670052719116211, "loss": 0.03901570290327072, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.85754645746385, "step_time": 0.5064825172424317} +{"epoch": 0, "iter": 19670, "iter_tflops": 38.10127684170941, "iter_time": 0.4234842414855957, "loss": 0.050061896443367004, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 41.68439483871409, "step_time": 0.3870822734832764} +{"epoch": 0, "iter": 19671, "iter_tflops": 40.92543216483697, "iter_time": 0.3942607192993164, "loss": 0.038868196308612823, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 44.62235011724457, "step_time": 0.3615966053009033} +{"epoch": 0, "iter": 19672, "iter_tflops": 41.6112819980311, "iter_time": 0.387762393951416, "loss": 0.013349364511668682, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 45.19000731912696, "step_time": 0.35705438613891605} +{"epoch": 0, "iter": 19673, "iter_tflops": 30.17307907833655, "iter_time": 0.6837583084106446, "loss": 0.06987148523330688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.990260584972248, "step_time": 0.6449179573059082} +{"epoch": 0, "iter": 19674, "iter_tflops": 18.49779853928499, "iter_time": 1.1153269653320312, "loss": 0.11061441153287888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.451715872147425, "step_time": 0.9189094333648682} +{"epoch": 0, "iter": 19675, "iter_tflops": 46.619985029580626, "iter_time": 0.442537540435791, "loss": 0.10705835372209549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.72388577764926, "step_time": 0.40673330116271966} +{"epoch": 0, "iter": 19676, "iter_tflops": 49.2789986847543, "iter_time": 0.4186589431762695, "loss": 0.11681575328111649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5068424747729, "step_time": 0.38557860183715825} +{"epoch": 0, "iter": 19677, "iter_tflops": 32.40259530709735, "iter_time": 0.6367111434936524, "loss": 0.5184192657470703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.470471178100404, "step_time": 0.5985149841308594} +{"epoch": 0, "iter": 19678, "iter_tflops": 8.165288202605808, "iter_time": 2.526682830810547, "loss": 0.6134341359138489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.307624065487449, "step_time": 2.2165800170898438} +{"epoch": 0, "iter": 19679, "iter_tflops": 11.261384579449972, "iter_time": 1.8320210418701173, "loss": 0.5264499187469482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.60291040410263, "step_time": 1.5166676025390626} +{"epoch": 0, "iter": 19680, "iter_tflops": 36.44870052859908, "iter_time": 0.5660309753417969, "loss": 0.39567506313323975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.75035359604355, "step_time": 0.5190166034698487} +{"epoch": 0, "iter": 19681, "iter_tflops": 15.951358370895194, "iter_time": 1.0501082000732422, "loss": 0.42455142736434937, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 16.896170808046424, "step_time": 0.9913874816894532} +{"epoch": 0, "iter": 19682, "iter_tflops": 21.041692842726906, "iter_time": 0.796069610595703, "loss": 0.2902543842792511, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 23.521025503481066, "step_time": 0.7121565437316895} +{"epoch": 0, "iter": 19683, "iter_tflops": 28.677160311987105, "iter_time": 0.5841112594604493, "loss": 0.39558419585227966, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 30.522229421815073, "step_time": 0.5488017272949219} +{"epoch": 0, "iter": 19684, "iter_tflops": 30.176647481854662, "iter_time": 0.5550865859985352, "loss": 0.3091024160385132, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 32.14820125316006, "step_time": 0.5210447731018066} +{"epoch": 0, "iter": 19685, "iter_tflops": 29.251009214842497, "iter_time": 0.7053121948242187, "loss": 0.5896480679512024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.034318275735256, "step_time": 0.6647832031249999} +{"epoch": 0, "iter": 19686, "iter_tflops": 35.628253124642306, "iter_time": 0.5790655364990235, "loss": 0.4581170678138733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82610724250085, "step_time": 0.5313716716766357} +{"epoch": 0, "iter": 19687, "iter_tflops": 37.412258095475224, "iter_time": 0.5514527740478516, "loss": 0.5676983594894409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71140738674013, "step_time": 0.5067644386291504} +{"epoch": 0, "iter": 19688, "iter_tflops": 40.2887474151733, "iter_time": 0.5120807876586914, "loss": 0.6537528038024902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7016880877138, "step_time": 0.47208916664123535} +{"epoch": 0, "iter": 19689, "iter_tflops": 19.956865357791543, "iter_time": 1.0337842712402343, "loss": 0.5268914103507996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.28030467139409, "step_time": 0.9694923934936523} +{"epoch": 0, "iter": 19690, "iter_tflops": 25.93309216168894, "iter_time": 0.7955508499145507, "loss": 0.49172243475914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.346483832354735, "step_time": 0.658162925720215} +{"epoch": 0, "iter": 19691, "iter_tflops": 40.962683648483534, "iter_time": 0.5036558074951172, "loss": 0.6049485802650452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94860948212636, "step_time": 0.4694367752075195} +{"epoch": 0, "iter": 19692, "iter_tflops": 46.26112844699976, "iter_time": 0.4459703903198242, "loss": 0.5649669766426086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.950730136500795, "step_time": 0.4130288677215576} +{"epoch": 0, "iter": 19693, "iter_tflops": 2.76123141862237, "iter_time": 0.6672050323486327, "loss": 0.27793556451797485, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 2.943016778232401, "step_time": 0.6259928627014161} +{"epoch": 0, "iter": 19694, "iter_tflops": 1.7163544957726329, "iter_time": 1.0733840255737306, "loss": 0.34189122915267944, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 2.3028302099299767, "step_time": 0.8000188159942627} +{"epoch": 0, "iter": 19695, "iter_tflops": 4.0902478996832725, "iter_time": 0.45041463088989253, "loss": 0.3896220922470093, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.43435470524006, "step_time": 0.41546236610412596} +{"epoch": 0, "iter": 19696, "iter_tflops": 4.384615964102459, "iter_time": 0.42017533874511714, "loss": 0.2402961254119873, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.7481016706511365, "step_time": 0.38800927734374996} +{"epoch": 0, "iter": 19697, "iter_tflops": 49.585158283014316, "iter_time": 0.41607396697998056, "loss": 0.0016226436709985137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.983602287203865, "step_time": 0.3752226600646973} +{"epoch": 0, "iter": 19698, "iter_tflops": 49.854455996955586, "iter_time": 0.4138264694213867, "loss": 0.006090945098549128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.11591455860392, "step_time": 0.3743218936920166} +{"epoch": 0, "iter": 19699, "iter_tflops": 58.242763617234466, "iter_time": 0.35422586822509766, "loss": 0.012945170514285564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.12908162303067, "step_time": 0.32171197509765626} +{"epoch": 0, "iter": 19700, "iter_tflops": 56.88585030497295, "iter_time": 0.36267531204223635, "loss": 0.0011298403842374682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.34522107223189, "step_time": 0.33091699981689454} +{"epoch": 0, "iter": 19701, "iter_tflops": 42.6257636444364, "iter_time": 0.4840052528381347, "loss": 0.15589724481105804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.52797332702309, "step_time": 0.4434126834869385} +{"epoch": 0, "iter": 19702, "iter_tflops": 37.03867538843772, "iter_time": 0.5570148849487304, "loss": 0.12948724627494812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3025525829715, "step_time": 0.49951134300231936} +{"epoch": 0, "iter": 19703, "iter_tflops": 43.027868593243035, "iter_time": 0.4794821166992188, "loss": 0.10279601812362671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.05269203903374, "step_time": 0.43846786689758305} +{"epoch": 0, "iter": 19704, "iter_tflops": 41.7132233410352, "iter_time": 0.4945936050415039, "loss": 0.14064958691596985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.69541655544185, "step_time": 0.4514915294647217} +{"epoch": 0, "iter": 19705, "iter_tflops": 29.241321750011554, "iter_time": 0.7055458602905272, "loss": 0.19014491140842438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.685665157287136, "step_time": 0.6511175765991211} +{"epoch": 0, "iter": 19706, "iter_tflops": 13.086804386238061, "iter_time": 1.5764806213378906, "loss": 0.2494031935930252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.814552783859362, "step_time": 1.3045638275146485} +{"epoch": 0, "iter": 19707, "iter_tflops": 10.780403284940544, "iter_time": 1.9137589721679686, "loss": 0.2275363951921463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.966882021574003, "step_time": 1.5910604782104492} +{"epoch": 0, "iter": 19708, "iter_tflops": 23.240129427218786, "iter_time": 0.8877357406616212, "loss": 0.21343953907489777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.62261844422133, "step_time": 0.7468913040161131} +{"epoch": 0, "iter": 19709, "iter_tflops": 19.178097196361463, "iter_time": 0.7858536682128906, "loss": 0.26605814695358276, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 20.3051702930269, "step_time": 0.7422335205078125} +{"epoch": 0, "iter": 19710, "iter_tflops": 11.790428228841227, "iter_time": 1.2782553558349607, "loss": 0.2916926443576813, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 12.86460764241673, "step_time": 1.1715225563049316} +{"epoch": 0, "iter": 19711, "iter_tflops": 22.383213523276755, "iter_time": 0.6733250350952148, "loss": 0.46235334873199463, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.130339255053602, "step_time": 0.6245738143920898} +{"epoch": 0, "iter": 19712, "iter_tflops": 23.260281361311105, "iter_time": 0.6479361877441406, "loss": 0.29965850710868835, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.863249908422954, "step_time": 0.6061628341674805} +{"epoch": 0, "iter": 19713, "iter_tflops": 13.70125236366476, "iter_time": 1.5057815856933592, "loss": 0.2884313762187958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.816476292574961, "step_time": 1.3924426498413087} +{"epoch": 0, "iter": 19714, "iter_tflops": 21.274128520841384, "iter_time": 0.9697738494873047, "loss": 0.2736441493034363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.462602035807837, "step_time": 0.7002468242645264} +{"epoch": 0, "iter": 19715, "iter_tflops": 46.77726682062522, "iter_time": 0.44104957199096684, "loss": 0.31850528717041016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.700378838159324, "step_time": 0.4069218807220459} +{"epoch": 0, "iter": 19716, "iter_tflops": 51.927344630444765, "iter_time": 0.3973069229125976, "loss": 0.286944180727005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.13119245659302, "step_time": 0.36755131340026853} +{"epoch": 0, "iter": 19717, "iter_tflops": 19.357294978504786, "iter_time": 1.0658045730590822, "loss": 0.7567611932754517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.16805694457992, "step_time": 1.0229589080810548} +{"epoch": 0, "iter": 19718, "iter_tflops": 15.613653309459679, "iter_time": 1.3213495330810545, "loss": 0.7112036347389221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54132184363174, "step_time": 1.0043702964782717} +{"epoch": 0, "iter": 19719, "iter_tflops": 38.94722620005897, "iter_time": 0.5297192001342773, "loss": 0.6278457641601562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37307858706855, "step_time": 0.4868915405273438} +{"epoch": 0, "iter": 19720, "iter_tflops": 37.17815101964301, "iter_time": 0.5549252166748047, "loss": 0.43922141194343567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25687333211888, "step_time": 0.5124862365722657} +{"epoch": 0, "iter": 19721, "iter_tflops": 18.281922288119787, "iter_time": 1.1284969482421874, "loss": 0.30714139342308044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.625465675112366, "step_time": 1.051240966796875} +{"epoch": 0, "iter": 19722, "iter_tflops": 17.998167280176443, "iter_time": 1.1462885742187499, "loss": 0.2180546075105667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.431086847201367, "step_time": 0.880500919342041} +{"epoch": 0, "iter": 19723, "iter_tflops": 44.49659103238474, "iter_time": 0.46365559768676756, "loss": 0.18919247388839722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12466868902566, "step_time": 0.42870099830627445} +{"epoch": 0, "iter": 19724, "iter_tflops": 48.04501768178708, "iter_time": 0.4294117164611816, "loss": 0.23302441835403442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94262918997819, "step_time": 0.39719001197814935} +{"epoch": 0, "iter": 19725, "iter_tflops": 26.07943465188318, "iter_time": 0.791086685180664, "loss": 0.5777003169059753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.546179173016228, "step_time": 0.7489638900756835} +{"epoch": 0, "iter": 19726, "iter_tflops": 8.981807422090203, "iter_time": 2.296986846923828, "loss": 0.4600481390953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.72364312717041, "step_time": 1.7597851867675778} +{"epoch": 0, "iter": 19727, "iter_tflops": 12.451014709944277, "iter_time": 1.6569808959960937, "loss": 0.5648800134658813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.924803761174212, "step_time": 1.3823359985351562} +{"epoch": 0, "iter": 19728, "iter_tflops": 22.134058061377836, "iter_time": 0.9320971984863281, "loss": 0.5128059387207031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.545469618259673, "step_time": 0.6754223709106445} +{"epoch": 0, "iter": 19729, "iter_tflops": 19.95405509231527, "iter_time": 0.7901442947387695, "loss": 0.5070706009864807, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 21.680515897786485, "step_time": 0.7272235984802244} +{"epoch": 0, "iter": 19730, "iter_tflops": 23.52197541486839, "iter_time": 0.6702916107177734, "loss": 0.37534427642822266, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 25.20837562990069, "step_time": 0.6254501686096191} +{"epoch": 0, "iter": 19731, "iter_tflops": 23.08880272960392, "iter_time": 0.6828670578002929, "loss": 0.32033777236938477, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 24.777246160300024, "step_time": 0.6363331375122071} +{"epoch": 0, "iter": 19732, "iter_tflops": 25.321018310603193, "iter_time": 0.6226678009033204, "loss": 0.2708873748779297, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 27.13803803464222, "step_time": 0.5809772529602051} +{"epoch": 0, "iter": 19733, "iter_tflops": 28.93965554671716, "iter_time": 0.7129004516601563, "loss": 0.4124843180179596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.37729199392133, "step_time": 0.6575167007446289} +{"epoch": 0, "iter": 19734, "iter_tflops": 10.317351772112154, "iter_time": 1.9996501007080076, "loss": 0.4884161353111267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.755594681794118, "step_time": 1.7550021133422853} +{"epoch": 0, "iter": 19735, "iter_tflops": 10.148799727013293, "iter_time": 2.032860443115234, "loss": 0.3695544898509979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.881929549520375, "step_time": 1.4861834182739257} +{"epoch": 0, "iter": 19736, "iter_tflops": 40.08014979375712, "iter_time": 0.5147459182739258, "loss": 0.4044566750526428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93697596952304, "step_time": 0.469561071395874} +{"epoch": 0, "iter": 19737, "iter_tflops": 17.95301022454835, "iter_time": 0.9925797576904297, "loss": 0.2642972767353058, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 19.3851697738925, "step_time": 0.9192488250732422} +{"epoch": 0, "iter": 19738, "iter_tflops": 16.886691094043194, "iter_time": 1.0552567367553711, "loss": 0.30804771184921265, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 21.51268244870904, "step_time": 0.8283390312194824} +{"epoch": 0, "iter": 19739, "iter_tflops": 31.562319722876683, "iter_time": 0.5645907745361328, "loss": 0.3575008511543274, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 33.682838013264, "step_time": 0.5290467071533204} +{"epoch": 0, "iter": 19740, "iter_tflops": 32.25529337398417, "iter_time": 0.5524610900878906, "loss": 0.2898518443107605, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 34.28185001487672, "step_time": 0.5198025932312011} +{"epoch": 0, "iter": 19741, "iter_tflops": 20.869923989402526, "iter_time": 0.9885562362670898, "loss": 0.25861409306526184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87695303803975, "step_time": 0.9430515060424806} +{"epoch": 0, "iter": 19742, "iter_tflops": 14.736759580888018, "iter_time": 1.3999748992919925, "loss": 0.29202455282211304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.55593018605324, "step_time": 1.054978889465332} +{"epoch": 0, "iter": 19743, "iter_tflops": 47.57935478688393, "iter_time": 0.43361440277099605, "loss": 0.3223327398300171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.623349557652055, "step_time": 0.39964654922485343} +{"epoch": 0, "iter": 19744, "iter_tflops": 46.46850647430949, "iter_time": 0.44398012924194336, "loss": 0.1800910234451294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.17023718126002, "step_time": 0.41122176551818845} +{"epoch": 0, "iter": 19745, "iter_tflops": 33.4700096428766, "iter_time": 0.6164053649902342, "loss": 0.5900177955627441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.77088937114784, "step_time": 0.5767565155029296} +{"epoch": 0, "iter": 19746, "iter_tflops": 17.961093722325803, "iter_time": 1.1486546325683593, "loss": 0.6245375871658325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.281115757868527, "step_time": 1.0172563362121583} +{"epoch": 0, "iter": 19747, "iter_tflops": 35.80048575868488, "iter_time": 0.5762797088623047, "loss": 0.5286630988121033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.98738022805868, "step_time": 0.5291736297607422} +{"epoch": 0, "iter": 19748, "iter_tflops": 36.45446026045339, "iter_time": 0.5659415435791016, "loss": 0.5783911943435669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.64563251316705, "step_time": 0.5203875484466552} +{"epoch": 0, "iter": 19749, "iter_tflops": 17.858666594612732, "iter_time": 1.1552426605224608, "loss": 0.3515481948852539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.09970454873522, "step_time": 1.0801786727905274} +{"epoch": 0, "iter": 19750, "iter_tflops": 16.58564743753595, "iter_time": 1.2439124603271483, "loss": 0.6849097013473511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.22622558492871, "step_time": 1.07307039642334} +{"epoch": 0, "iter": 19751, "iter_tflops": 46.17394539342283, "iter_time": 0.44681244659423824, "loss": 0.4884163439273834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69418531222373, "step_time": 0.4151611175537109} +{"epoch": 0, "iter": 19752, "iter_tflops": 46.506474710330956, "iter_time": 0.4436176605224609, "loss": 0.4461389482021332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89574428414216, "step_time": 0.4134840316772461} +{"epoch": 0, "iter": 19753, "iter_tflops": 28.643773491484815, "iter_time": 0.7202645111083983, "loss": 0.4539274573326111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.217692383456242, "step_time": 0.682748809814453} +{"epoch": 0, "iter": 19754, "iter_tflops": 12.663352999160669, "iter_time": 1.6291967468261719, "loss": 0.6192883253097534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.39317127584299, "step_time": 1.1216713638305664} +{"epoch": 0, "iter": 19755, "iter_tflops": 37.702853233867906, "iter_time": 0.5472024459838867, "loss": 0.4937763810157776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2233192159743, "step_time": 0.5004714298248291} +{"epoch": 0, "iter": 19756, "iter_tflops": 40.02023342979255, "iter_time": 0.5155165710449219, "loss": 0.6200209259986877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59056161041546, "step_time": 0.4732926750183105} +{"epoch": 0, "iter": 19757, "iter_tflops": 16.84973260010223, "iter_time": 1.2244166717529297, "loss": 0.5056647658348083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.140877588735272, "step_time": 1.1372709732055664} +{"epoch": 0, "iter": 19758, "iter_tflops": 23.338364439846668, "iter_time": 0.8839991149902343, "loss": 0.45740213990211487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.214035079282922, "step_time": 0.7312351264953613} +{"epoch": 0, "iter": 19759, "iter_tflops": 41.43232914291157, "iter_time": 0.49794674682617185, "loss": 0.4617569148540497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.277160810443014, "step_time": 0.4556622619628906} +{"epoch": 0, "iter": 19760, "iter_tflops": 33.94840988855124, "iter_time": 0.607718994140625, "loss": 0.4987559914588928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.89845539987774, "step_time": 0.5591316299438477} +{"epoch": 0, "iter": 19761, "iter_tflops": 15.950621475963572, "iter_time": 1.2934350891113282, "loss": 0.33970531821250916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.211502366261907, "step_time": 1.1986805725097656} +{"epoch": 0, "iter": 19762, "iter_tflops": 16.63516560013212, "iter_time": 1.240209686279297, "loss": 0.4021879732608795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.825145493348998, "step_time": 0.9452900791168213} +{"epoch": 0, "iter": 19763, "iter_tflops": 36.02537279791806, "iter_time": 0.5726823043823241, "loss": 0.4033345878124237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.36632408041975, "step_time": 0.5240797557830811} +{"epoch": 0, "iter": 19764, "iter_tflops": 41.17561180317957, "iter_time": 0.5010512924194336, "loss": 0.4359138607978821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.06151940054059, "step_time": 0.45784282875061033} +{"epoch": 0, "iter": 19765, "iter_tflops": 18.931378079632744, "iter_time": 1.089782974243164, "loss": 0.1268264204263687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24559542123611, "step_time": 1.0190410842895508} +{"epoch": 0, "iter": 19766, "iter_tflops": 18.67694800333107, "iter_time": 1.1046287384033202, "loss": 0.1807166188955307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.196599875166708, "step_time": 0.8188046646118163} +{"epoch": 0, "iter": 19767, "iter_tflops": 50.57130464301949, "iter_time": 0.4079604759216308, "loss": 0.13674040138721466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.122218213784656, "step_time": 0.37427908706665036} +{"epoch": 0, "iter": 19768, "iter_tflops": 49.92545847466764, "iter_time": 0.41323793792724606, "loss": 0.11966469138860703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.969123520951264, "step_time": 0.3822758674621582} +{"epoch": 0, "iter": 19769, "iter_tflops": 26.644059647204436, "iter_time": 0.7743224487304687, "loss": 0.6745601296424866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.02802161486217, "step_time": 0.7360881118774414} +{"epoch": 0, "iter": 19770, "iter_tflops": 13.075879452696286, "iter_time": 1.5777977752685548, "loss": 0.4237859845161438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.866126449709137, "step_time": 1.3877921447753907} +{"epoch": 0, "iter": 19771, "iter_tflops": 35.85316894791291, "iter_time": 0.5754329147338868, "loss": 0.3668598234653473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.116058389763005, "step_time": 0.5142851600646973} +{"epoch": 0, "iter": 19772, "iter_tflops": 43.67281710239555, "iter_time": 0.472401252746582, "loss": 0.5059265494346619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.038819396338205, "step_time": 0.43859717941284176} +{"epoch": 0, "iter": 19773, "iter_tflops": 30.018227660681873, "iter_time": 0.687285530090332, "loss": 0.5180827379226685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.921303124436996, "step_time": 0.6463111305236817} +{"epoch": 0, "iter": 19774, "iter_tflops": 11.130526830435713, "iter_time": 1.8535594787597658, "loss": 0.4659315049648285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.835310680180855, "step_time": 1.6073700141906737} +{"epoch": 0, "iter": 19775, "iter_tflops": 18.697889871698486, "iter_time": 1.1033915405273438, "loss": 0.4859751760959625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.083381604504353, "step_time": 0.978547649383545} +{"epoch": 0, "iter": 19776, "iter_tflops": 41.934327695889436, "iter_time": 0.4919857940673828, "loss": 0.4596472680568695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07916427869911, "step_time": 0.4576636199951172} +{"epoch": 0, "iter": 19777, "iter_tflops": 21.568714492157785, "iter_time": 0.7139163284301757, "loss": 0.2770853042602539, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 22.894451336851205, "step_time": 0.6725759544372558} +{"epoch": 0, "iter": 19778, "iter_tflops": 10.256946665983083, "iter_time": 1.5012515869140626, "loss": 0.38895687460899353, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.326777950029673, "step_time": 1.2491713180541992} +{"epoch": 0, "iter": 19779, "iter_tflops": 26.964483526425205, "iter_time": 0.5710570144653321, "loss": 0.424076110124588, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.781491869203258, "step_time": 0.5350055351257325} +{"epoch": 0, "iter": 19780, "iter_tflops": 26.699476289274426, "iter_time": 0.5767250747680663, "loss": 0.2532913088798523, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.419931535114106, "step_time": 0.5418119125366212} +{"epoch": 0, "iter": 19781, "iter_tflops": 21.37069968900577, "iter_time": 0.9653915786743166, "loss": 0.23756758868694305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.3521168036014, "step_time": 0.9230040130615236} +{"epoch": 0, "iter": 19782, "iter_tflops": 15.823239882036217, "iter_time": 1.3038476104736327, "loss": 0.25102493166923523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.808485981616318, "step_time": 1.1584978942871094} +{"epoch": 0, "iter": 19783, "iter_tflops": 37.54490669029851, "iter_time": 0.5495044555664063, "loss": 0.39898785948753357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.232717103461745, "step_time": 0.5003573608398438} +{"epoch": 0, "iter": 19784, "iter_tflops": 41.175741586733395, "iter_time": 0.5010497131347657, "loss": 0.2374788075685501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07820371518696, "step_time": 0.45767337226867677} +{"epoch": 0, "iter": 19785, "iter_tflops": 17.308481823765934, "iter_time": 0.7506808471679688, "loss": 0.0023550037294626236, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 18.727727704472024, "step_time": 0.6937919006347656} +{"epoch": 0, "iter": 19786, "iter_tflops": 6.798900563893228, "iter_time": 1.9110657196044922, "loss": 0.002416113391518593, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 7.742041694976535, "step_time": 1.6782583084106446} +{"epoch": 0, "iter": 19787, "iter_tflops": 6.927186046854238, "iter_time": 1.8756744384765625, "loss": 0.010486499406397343, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 8.891458807217923, "step_time": 1.4613064155578614} +{"epoch": 0, "iter": 19788, "iter_tflops": 26.12633775338261, "iter_time": 0.49731982803344726, "loss": 0.007112073712050915, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 28.962975108410667, "step_time": 0.44861226272583} +{"epoch": 0, "iter": 19789, "iter_tflops": 16.375123932719394, "iter_time": 1.0103992919921876, "loss": 0.3368408977985382, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 17.492815081127752, "step_time": 0.9458405380249024} +{"epoch": 0, "iter": 19790, "iter_tflops": 16.35107594071772, "iter_time": 1.0118853149414062, "loss": 0.32744431495666504, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 19.71035798469042, "step_time": 0.8394273529052734} +{"epoch": 0, "iter": 19791, "iter_tflops": 25.00964576383914, "iter_time": 0.661561294555664, "loss": 0.31850871443748474, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 26.95690432065003, "step_time": 0.6137727622985839} +{"epoch": 0, "iter": 19792, "iter_tflops": 24.525802300712044, "iter_time": 0.6746125335693359, "loss": 0.4294489026069641, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 26.2610066502359, "step_time": 0.630037296295166} +{"epoch": 0, "iter": 19793, "iter_tflops": 21.16292353297245, "iter_time": 0.974869728088379, "loss": 0.6191313862800598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.08447522840206, "step_time": 0.8937215728759766} +{"epoch": 0, "iter": 19794, "iter_tflops": 41.54826466258033, "iter_time": 0.4965572853088379, "loss": 0.6713217496871948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.946572499509244, "step_time": 0.45901372146606445} +{"epoch": 0, "iter": 19795, "iter_tflops": 45.83136563339095, "iter_time": 0.45015227508544925, "loss": 0.46831661462783813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.26806286382743, "step_time": 0.4187518711090088} +{"epoch": 0, "iter": 19796, "iter_tflops": 42.977256695442136, "iter_time": 0.48004677581787103, "loss": 0.3915630578994751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89589852071375, "step_time": 0.4495193290710449} +{"epoch": 0, "iter": 19797, "iter_tflops": 27.64830601297674, "iter_time": 0.746197380065918, "loss": 0.5004215240478516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.188075947640087, "step_time": 0.7068329391479493} +{"epoch": 0, "iter": 19798, "iter_tflops": 15.507503439617011, "iter_time": 1.330394256591797, "loss": 0.5649798512458801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.195826809502922, "step_time": 1.0215523090362548} +{"epoch": 0, "iter": 19799, "iter_tflops": 42.98832203583536, "iter_time": 0.47992321014404304, "loss": 0.6789295077323914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.281853417696894, "step_time": 0.4457706851959229} +{"epoch": 0, "iter": 19800, "iter_tflops": 44.56359599010162, "iter_time": 0.4629584541320801, "loss": 0.5166318416595459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.05454746379967, "step_time": 0.4293265590667724} +{"epoch": 0, "iter": 19801, "iter_tflops": 27.91094298215255, "iter_time": 0.7391757965087891, "loss": 0.5521695017814636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.51564410679292, "step_time": 0.6989884223937989} +{"epoch": 0, "iter": 19802, "iter_tflops": 41.35636368168495, "iter_time": 0.49886140060424805, "loss": 0.39196500182151794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23849557043545, "step_time": 0.44618868446350096} +{"epoch": 0, "iter": 19803, "iter_tflops": 46.585304428324456, "iter_time": 0.4428669891357422, "loss": 0.5034584999084473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.987787096847136, "step_time": 0.41272268104553217} +{"epoch": 0, "iter": 19804, "iter_tflops": 46.40075762914306, "iter_time": 0.44462837600708005, "loss": 0.4060797095298767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.79810715522499, "step_time": 0.4142947330474854} +{"epoch": 0, "iter": 19805, "iter_tflops": 25.63344948151282, "iter_time": 0.8048504562377929, "loss": 0.3639516830444336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.970397980737836, "step_time": 0.7649532470703125} +{"epoch": 0, "iter": 19806, "iter_tflops": 13.248466315755095, "iter_time": 1.557243911743164, "loss": 0.23866985738277435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.681425191826326, "step_time": 1.3156389331817628} +{"epoch": 0, "iter": 19807, "iter_tflops": 38.91421505757621, "iter_time": 0.5301685638427734, "loss": 0.3073098659515381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.610123754252946, "step_time": 0.4841829051971435} +{"epoch": 0, "iter": 19808, "iter_tflops": 36.60489549511681, "iter_time": 0.5636156921386718, "loss": 0.30923059582710266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.11310978833196, "step_time": 0.5143229637145996} +{"epoch": 0, "iter": 19809, "iter_tflops": 14.030616518652936, "iter_time": 0.8653256759643554, "loss": 0.0406297966837883, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 15.18877624369543, "step_time": 0.79934370803833} +{"epoch": 0, "iter": 19810, "iter_tflops": 13.81723651490797, "iter_time": 0.8786889266967772, "loss": 0.02655896544456482, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 16.85072099237695, "step_time": 0.7205064239501954} +{"epoch": 0, "iter": 19811, "iter_tflops": 33.12261649609423, "iter_time": 0.3665487213134765, "loss": 0.021244917064905167, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 36.32450788750556, "step_time": 0.3342386016845703} +{"epoch": 0, "iter": 19812, "iter_tflops": 33.27276553884897, "iter_time": 0.3648946075439453, "loss": 0.020793979987502098, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 36.279698950253014, "step_time": 0.3346514186859131} +{"epoch": 0, "iter": 19813, "iter_tflops": 45.61244316340966, "iter_time": 0.45231283569335945, "loss": 0.047191113233566284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.12796666888999, "step_time": 0.4115685291290283} +{"epoch": 0, "iter": 19814, "iter_tflops": 45.67142604968566, "iter_time": 0.4517286911010743, "loss": 0.04999048635363579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.66489971828367, "step_time": 0.39932514381408696} +{"epoch": 0, "iter": 19815, "iter_tflops": 52.35142577607176, "iter_time": 0.39408847427368165, "loss": 0.08269155770540237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.29035334917321, "step_time": 0.36011461448669435} +{"epoch": 0, "iter": 19816, "iter_tflops": 52.60355568589466, "iter_time": 0.39219960021972655, "loss": 0.08068589121103287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.752072794230955, "step_time": 0.3635302200317383} +{"epoch": 0, "iter": 19817, "iter_tflops": 26.38930553163499, "iter_time": 0.7817975158691407, "loss": 0.0948462262749672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.894730480524508, "step_time": 0.7396054077148437} +{"epoch": 0, "iter": 19818, "iter_tflops": 14.618525888412265, "iter_time": 1.411297805786133, "loss": 0.09537270665168762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.879716239917386, "step_time": 1.222241725921631} +{"epoch": 0, "iter": 19819, "iter_tflops": 40.38223377019691, "iter_time": 0.5108953018188476, "loss": 0.12473895400762558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53777154447405, "step_time": 0.46322689247131354} +{"epoch": 0, "iter": 19820, "iter_tflops": 42.77862382949383, "iter_time": 0.48227576446533205, "loss": 0.07948184013366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.871518430197796, "step_time": 0.4401626873016357} +{"epoch": 0, "iter": 19821, "iter_tflops": 16.758716999669577, "iter_time": 1.23106640625, "loss": 0.06012586131691933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.000374101985116, "step_time": 1.1461480407714844} +{"epoch": 0, "iter": 19822, "iter_tflops": 12.900424293916608, "iter_time": 1.5992569732666015, "loss": 0.06093405932188034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.14243762690303, "step_time": 1.2035099067687987} +{"epoch": 0, "iter": 19823, "iter_tflops": 41.53278895850472, "iter_time": 0.4967423095703125, "loss": 0.12710882723331451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79530152895883, "step_time": 0.45050677299499514} +{"epoch": 0, "iter": 19824, "iter_tflops": 44.17009159809522, "iter_time": 0.467082878112793, "loss": 0.11291655898094177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.43120753709771, "step_time": 0.42598759269714354} +{"epoch": 0, "iter": 19825, "iter_tflops": 22.550018501868156, "iter_time": 0.9149036178588867, "loss": 0.7867835760116577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.433932714258887, "step_time": 0.8443623771667481} +{"epoch": 0, "iter": 19826, "iter_tflops": 20.48714750934124, "iter_time": 1.0070261611938478, "loss": 0.7046896815299988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.954079461926252, "step_time": 0.7380351600646973} +{"epoch": 0, "iter": 19827, "iter_tflops": 45.91787534296034, "iter_time": 0.44930418395996097, "loss": 0.7445594072341919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45195062665369, "step_time": 0.41719473648071287} +{"epoch": 0, "iter": 19828, "iter_tflops": 43.974565045669365, "iter_time": 0.4691596946716308, "loss": 0.7702496647834778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50506917217301, "step_time": 0.43429246330261234} +{"epoch": 0, "iter": 19829, "iter_tflops": 20.89927411311443, "iter_time": 0.6918287200927735, "loss": 0.027183834463357925, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 22.365382010058322, "step_time": 0.6464775810241699} +{"epoch": 0, "iter": 19830, "iter_tflops": 10.497972402982763, "iter_time": 1.3772867279052734, "loss": 0.02325213886797428, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 13.156718380821356, "step_time": 1.098960823059082} +{"epoch": 0, "iter": 19831, "iter_tflops": 37.897263407460045, "iter_time": 0.3815240669250488, "loss": 0.013147247955203056, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 41.65962953883511, "step_time": 0.34706785011291497} +{"epoch": 0, "iter": 19832, "iter_tflops": 35.28053030399837, "iter_time": 0.40982144927978514, "loss": 0.03379877656698227, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 38.187281803543925, "step_time": 0.37862653160095217} +{"epoch": 0, "iter": 19833, "iter_tflops": 34.35240183591628, "iter_time": 0.6005720825195312, "loss": 0.6016650795936584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.7615580812106, "step_time": 0.561213794708252} +{"epoch": 0, "iter": 19834, "iter_tflops": 19.39356196026879, "iter_time": 1.0638114624023438, "loss": 0.3466230034828186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.42092044542258, "step_time": 0.8808831214904784} +{"epoch": 0, "iter": 19835, "iter_tflops": 42.453905046307334, "iter_time": 0.48596456527709964, "loss": 0.6666199564933777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63366529643081, "step_time": 0.45210248565673833} +{"epoch": 0, "iter": 19836, "iter_tflops": 44.357379599546995, "iter_time": 0.46511073684692383, "loss": 0.5319467782974243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.748173194366856, "step_time": 0.4320813159942627} +{"epoch": 0, "iter": 19837, "iter_tflops": 26.897858983075658, "iter_time": 0.7670161972045899, "loss": 0.2315165102481842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.29953222238648, "step_time": 0.7290259552001953} +{"epoch": 0, "iter": 19838, "iter_tflops": 11.486876442732628, "iter_time": 1.7960577545166014, "loss": 0.23805026710033417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.501564504109876, "step_time": 1.4226805324554443} +{"epoch": 0, "iter": 19839, "iter_tflops": 38.395036389040534, "iter_time": 0.537337516784668, "loss": 0.273136705160141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07175224421358, "step_time": 0.49037875556945804} +{"epoch": 0, "iter": 19840, "iter_tflops": 39.072658635533294, "iter_time": 0.5280186767578126, "loss": 0.2845231294631958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80706357830712, "step_time": 0.48195535469055173} +{"epoch": 0, "iter": 19841, "iter_tflops": 15.879277411735691, "iter_time": 1.2992463684082032, "loss": 0.19081448018550873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.077456146425817, "step_time": 1.208089385986328} +{"epoch": 0, "iter": 19842, "iter_tflops": 23.305952444819393, "iter_time": 0.8852285079956056, "loss": 0.17439351975917816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46663763501299, "step_time": 0.7511328392028809} +{"epoch": 0, "iter": 19843, "iter_tflops": 44.78875916747743, "iter_time": 0.46063105773925783, "loss": 0.23419713973999023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24036729711415, "step_time": 0.42767281150817876} +{"epoch": 0, "iter": 19844, "iter_tflops": 50.8297400168438, "iter_time": 0.40588626861572263, "loss": 0.17471525073051453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.02191224487638, "step_time": 0.37496140480041507} +{"epoch": 0, "iter": 19845, "iter_tflops": 29.51224940833099, "iter_time": 0.6990688247680664, "loss": 0.027054863050580025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.33938869934231, "step_time": 0.6583119316101074} +{"epoch": 0, "iter": 19846, "iter_tflops": 14.535463883833346, "iter_time": 1.419362579345703, "loss": 0.05510351434350014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.902152069742275, "step_time": 1.0914679679870605} +{"epoch": 0, "iter": 19847, "iter_tflops": 42.63392389354031, "iter_time": 0.48391261291503906, "loss": 0.037403564900159836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.21199254542283, "step_time": 0.4369884090423584} +{"epoch": 0, "iter": 19848, "iter_tflops": 42.26426892847877, "iter_time": 0.48814504623413085, "loss": 0.02639465220272541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6092783425637, "step_time": 0.442639196395874} +{"epoch": 0, "iter": 19849, "iter_tflops": 20.15295319473751, "iter_time": 1.023725570678711, "loss": 0.6266154050827026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.422707430970352, "step_time": 0.9630479049682616} +{"epoch": 0, "iter": 19850, "iter_tflops": 15.980033130867215, "iter_time": 1.2910544891357423, "loss": 0.5747878551483154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.137720615319, "step_time": 1.078032955169678} +{"epoch": 0, "iter": 19851, "iter_tflops": 34.453757394466145, "iter_time": 0.5988053283691407, "loss": 0.5333322882652283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.45846082550189, "step_time": 0.5507725906372071} +{"epoch": 0, "iter": 19852, "iter_tflops": 39.5582487736872, "iter_time": 0.5215370788574218, "loss": 0.5344390869140625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.800532299773735, "step_time": 0.4820289001464843} +{"epoch": 0, "iter": 19853, "iter_tflops": 14.623616993700717, "iter_time": 1.41080647277832, "loss": 0.5848162174224854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.68647564267992, "step_time": 1.315215347290039} +{"epoch": 0, "iter": 19854, "iter_tflops": 18.845129572953557, "iter_time": 1.0947705841064452, "loss": 0.45789068937301636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.839354892768327, "step_time": 0.8305808906555177} +{"epoch": 0, "iter": 19855, "iter_tflops": 43.34386829227936, "iter_time": 0.4759864387512207, "loss": 0.38245972990989685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.513371619296436, "step_time": 0.44355188179016114} +{"epoch": 0, "iter": 19856, "iter_tflops": 47.46936035782838, "iter_time": 0.43461915969848636, "loss": 0.8385160565376282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18227453951352, "step_time": 0.40309059524536134} +{"epoch": 0, "iter": 19857, "iter_tflops": 30.72953904466328, "iter_time": 0.6713766021728516, "loss": 0.027650978416204453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.597594772129945, "step_time": 0.6329023246765136} +{"epoch": 0, "iter": 19858, "iter_tflops": 14.824425556309818, "iter_time": 1.3916959838867187, "loss": 0.03182538226246834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.559358266894314, "step_time": 1.174934368133545} +{"epoch": 0, "iter": 19859, "iter_tflops": 41.44681336064149, "iter_time": 0.4977727317810059, "loss": 0.04305775463581085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00543700910339, "step_time": 0.44844902801513664} +{"epoch": 0, "iter": 19860, "iter_tflops": 38.081446625902224, "iter_time": 0.5417623367309571, "loss": 0.03171167150139809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.281046301323606, "step_time": 0.48795134735107426} +{"epoch": 0, "iter": 19861, "iter_tflops": 11.12551208151957, "iter_time": 1.1678694610595703, "loss": 0.0005410572630353272, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 11.84861976974347, "step_time": 1.0965957260131836} +{"epoch": 0, "iter": 19862, "iter_tflops": 10.777294104098763, "iter_time": 1.205603713989258, "loss": 0.01005987823009491, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 12.990519289930244, "step_time": 1.0002021865844726} +{"epoch": 0, "iter": 19863, "iter_tflops": 33.76933056658518, "iter_time": 0.3847617225646973, "loss": 0.004128086846321821, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 37.05815584717761, "step_time": 0.3506150131225586} +{"epoch": 0, "iter": 19864, "iter_tflops": 36.23945473233874, "iter_time": 0.3585359077453613, "loss": 0.003014747053384781, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 39.876570482774234, "step_time": 0.3258340835571289} +{"epoch": 0, "iter": 19865, "iter_tflops": 30.829323858907063, "iter_time": 0.6692035675048829, "loss": 0.7111760377883911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.80177805482758, "step_time": 0.6289626579284668} +{"epoch": 0, "iter": 19866, "iter_tflops": 14.829327682354412, "iter_time": 1.3912359313964844, "loss": 0.5808101296424866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.532824046537602, "step_time": 1.176712516784668} +{"epoch": 0, "iter": 19867, "iter_tflops": 38.01624711969452, "iter_time": 0.5426914825439453, "loss": 0.5412400960922241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54220309089414, "step_time": 0.4966297397613525} +{"epoch": 0, "iter": 19868, "iter_tflops": 37.6658228089945, "iter_time": 0.5477404174804688, "loss": 0.47224417328834534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.788647139716275, "step_time": 0.5058048000335693} +{"epoch": 0, "iter": 19869, "iter_tflops": 16.83183610090987, "iter_time": 1.2257185363769532, "loss": 0.37861281633377075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.046129115650523, "step_time": 1.143242042541504} +{"epoch": 0, "iter": 19870, "iter_tflops": 14.430714343128178, "iter_time": 1.4296654357910157, "loss": 0.3795667886734009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.067409897148522, "step_time": 1.1418954696655272} +{"epoch": 0, "iter": 19871, "iter_tflops": 44.607962177069666, "iter_time": 0.46249800491333004, "loss": 0.32628801465034485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41227075497183, "step_time": 0.42615422058105473} +{"epoch": 0, "iter": 19872, "iter_tflops": 48.05417065752792, "iter_time": 0.4293299255371094, "loss": 0.3254181444644928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.97486858049901, "step_time": 0.396943639755249} +{"epoch": 0, "iter": 19873, "iter_tflops": 3.887662927711211, "iter_time": 0.4738856048583984, "loss": 0.9303154349327087, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.241246667396456, "step_time": 0.4343787670135497} +{"epoch": 0, "iter": 19874, "iter_tflops": 4.041847971030362, "iter_time": 0.4558082122802734, "loss": 1.1040617227554321, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.464530684471204, "step_time": 0.4126542358398438} +{"epoch": 0, "iter": 19875, "iter_tflops": 4.312951889469709, "iter_time": 0.4271569786071777, "loss": 0.9934332370758057, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.6584178010964825, "step_time": 0.39547923278808594} +{"epoch": 0, "iter": 19876, "iter_tflops": 4.297600306764104, "iter_time": 0.4286828384399414, "loss": 0.8588314056396484, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.652114175096444, "step_time": 0.39601510810852053} +{"epoch": 0, "iter": 19877, "iter_tflops": 30.80306742457961, "iter_time": 0.6697739944458008, "loss": 0.5835003852844238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.598345916210576, "step_time": 0.6328877410888671} +{"epoch": 0, "iter": 19878, "iter_tflops": 33.27783851564144, "iter_time": 0.6199649505615235, "loss": 0.5336078405380249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.874989180048765, "step_time": 0.5047363662719726} +{"epoch": 0, "iter": 19879, "iter_tflops": 45.70243917234453, "iter_time": 0.4514221534729004, "loss": 0.6402240991592407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25620596668389, "step_time": 0.4188526725769043} +{"epoch": 0, "iter": 19880, "iter_tflops": 43.622317403046694, "iter_time": 0.47294813156127924, "loss": 0.5207222700119019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.94386163153271, "step_time": 0.43948437118530265} +{"epoch": 0, "iter": 19881, "iter_tflops": 24.213196436291643, "iter_time": 0.8520598907470703, "loss": 0.06453166157007217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.409869262069808, "step_time": 0.8119322967529297} +{"epoch": 0, "iter": 19882, "iter_tflops": 12.918423132610569, "iter_time": 1.5970287780761718, "loss": 0.1313784420490265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.12154377760307, "step_time": 1.2797219543457032} +{"epoch": 0, "iter": 19883, "iter_tflops": 48.894982436251425, "iter_time": 0.42194704818725587, "loss": 0.07469604164361954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.413448974201636, "step_time": 0.38625278663635254} +{"epoch": 0, "iter": 19884, "iter_tflops": 45.51668626125695, "iter_time": 0.45326440048217775, "loss": 0.10841717571020126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.21682321695405, "step_time": 0.41918783378601077} +{"epoch": 0, "iter": 19885, "iter_tflops": 21.790217472074527, "iter_time": 0.946805305480957, "loss": 0.5449594259262085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.91985522893774, "step_time": 0.900140655517578} +{"epoch": 0, "iter": 19886, "iter_tflops": 13.318533531117689, "iter_time": 1.5490514373779296, "loss": 0.5524711012840271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.615130414585133, "step_time": 1.3212245407104493} +{"epoch": 0, "iter": 19887, "iter_tflops": 37.54165784202467, "iter_time": 0.5495520095825195, "loss": 0.6695979833602905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.04536420745537, "step_time": 0.5026412582397461} +{"epoch": 0, "iter": 19888, "iter_tflops": 38.82103095747581, "iter_time": 0.5314411544799805, "loss": 0.5395312905311584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29552908784229, "step_time": 0.4877842636108398} +{"epoch": 0, "iter": 19889, "iter_tflops": 21.293795473880337, "iter_time": 0.9688781661987306, "loss": 0.1042528972029686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.816647829368858, "step_time": 0.9042122955322266} +{"epoch": 0, "iter": 19890, "iter_tflops": 16.759882184952296, "iter_time": 1.2309808197021483, "loss": 0.17091459035873413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.65170122084186, "step_time": 1.049837532043457} +{"epoch": 0, "iter": 19891, "iter_tflops": 40.19952076125568, "iter_time": 0.5132173995971681, "loss": 0.14556849002838135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.23977550092156, "step_time": 0.46634715652465825} +{"epoch": 0, "iter": 19892, "iter_tflops": 40.42586883962089, "iter_time": 0.5103438491821289, "loss": 0.15042494237422943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.296127190103256, "step_time": 0.4657538890838623} +{"epoch": 0, "iter": 19893, "iter_tflops": 17.805473348387707, "iter_time": 1.158693908691406, "loss": 0.03817369043827057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.05371452957621, "step_time": 1.0827859039306642} +{"epoch": 0, "iter": 19894, "iter_tflops": 21.832073769490467, "iter_time": 0.9449900970458983, "loss": 0.02927926741540432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.461952900397133, "step_time": 0.7002622528076172} +{"epoch": 0, "iter": 19895, "iter_tflops": 42.508593786337606, "iter_time": 0.48533935546875007, "loss": 0.01506047509610653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.95261714224441, "step_time": 0.4394024181365966} +{"epoch": 0, "iter": 19896, "iter_tflops": 46.36789618620535, "iter_time": 0.444943489074707, "loss": 0.06069033592939377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.99221368401438, "step_time": 0.4045930156707764} +{"epoch": 0, "iter": 19897, "iter_tflops": 19.112567963926267, "iter_time": 1.079451675415039, "loss": 0.40584638714790344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.698854258673837, "step_time": 0.9967263526916503} +{"epoch": 0, "iter": 19898, "iter_tflops": 14.814783605756661, "iter_time": 1.392601745605469, "loss": 0.2649419903755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.537823212179838, "step_time": 1.1763770942687988} +{"epoch": 0, "iter": 19899, "iter_tflops": 37.4355931844472, "iter_time": 0.5511090316772461, "loss": 0.2814384400844574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92238064935339, "step_time": 0.504151840209961} +{"epoch": 0, "iter": 19900, "iter_tflops": 39.324796347778374, "iter_time": 0.5246331939697266, "loss": 0.3642216920852661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.13015611520987, "step_time": 0.47834497642517093} +{"epoch": 0, "iter": 19901, "iter_tflops": 13.937462264245262, "iter_time": 1.0403268127441405, "loss": 0.11733779311180115, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 14.878712985954158, "step_time": 0.9745141067504882} +{"epoch": 0, "iter": 19902, "iter_tflops": 25.344057220262144, "iter_time": 0.5721071243286133, "loss": 0.09075921028852463, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 32.54646510300387, "step_time": 0.4455020122528076} +{"epoch": 0, "iter": 19903, "iter_tflops": 41.30998357090597, "iter_time": 0.35099301528930665, "loss": 0.10077893733978271, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 45.04679720895366, "step_time": 0.3218767280578613} +{"epoch": 0, "iter": 19904, "iter_tflops": 39.142761668437764, "iter_time": 0.37042648696899416, "loss": 0.07462188601493835, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 42.51686850979286, "step_time": 0.3410297183990479} +{"epoch": 0, "iter": 19905, "iter_tflops": 33.80737123421826, "iter_time": 0.610254295349121, "loss": 0.4021540880203247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.05927362738219, "step_time": 0.5721439018249512} +{"epoch": 0, "iter": 19906, "iter_tflops": 15.039732756526663, "iter_time": 1.3717726135253907, "loss": 0.33726638555526733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.22439082091337, "step_time": 1.07317280960083} +{"epoch": 0, "iter": 19907, "iter_tflops": 48.22374218963165, "iter_time": 0.42782025146484376, "loss": 0.33449587225914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40446359563677, "step_time": 0.3936896228790283} +{"epoch": 0, "iter": 19908, "iter_tflops": 47.3470079464522, "iter_time": 0.4357422866821289, "loss": 0.4857662618160248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86798136589069, "step_time": 0.405581132888794} +{"epoch": 0, "iter": 19909, "iter_tflops": 39.45521395751334, "iter_time": 0.5228990402221679, "loss": 0.6913090348243713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70079549426922, "step_time": 0.48315478134155276} +{"epoch": 0, "iter": 19910, "iter_tflops": 44.002764047187746, "iter_time": 0.46885903549194335, "loss": 0.5018202066421509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44576132637643, "step_time": 0.43483533477783204} +{"epoch": 0, "iter": 19911, "iter_tflops": 45.69434924793941, "iter_time": 0.4515020751953125, "loss": 0.4129921793937683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13846868179707, "step_time": 0.4198562564849853} +{"epoch": 0, "iter": 19912, "iter_tflops": 44.06451674635842, "iter_time": 0.46820196914672857, "loss": 0.5505997538566589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43390154789314, "step_time": 0.43494405555725096} +{"epoch": 0, "iter": 19913, "iter_tflops": 31.734740558507525, "iter_time": 0.6501106719970703, "loss": 0.26744702458381653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.87493603410795, "step_time": 0.6090371208190918} +{"epoch": 0, "iter": 19914, "iter_tflops": 12.911915908259132, "iter_time": 1.5978336334228516, "loss": 0.29339584708213806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.971346259427511, "step_time": 1.378038631439209} +{"epoch": 0, "iter": 19915, "iter_tflops": 38.379500380764966, "iter_time": 0.5375550308227539, "loss": 0.22867585718631744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27282100250268, "step_time": 0.41871143341064454} +{"epoch": 0, "iter": 19916, "iter_tflops": 46.12261950294273, "iter_time": 0.44730966567993163, "loss": 0.2994595766067505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67547827096725, "step_time": 0.41531746101379396} +{"epoch": 0, "iter": 19917, "iter_tflops": 45.0165303444732, "iter_time": 0.4583003921508789, "loss": 0.12834793329238892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32351117194222, "step_time": 0.41828112030029296} +{"epoch": 0, "iter": 19918, "iter_tflops": 35.9611762380896, "iter_time": 0.5737046356201171, "loss": 0.1154431700706482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.09405044562456, "step_time": 0.5145674552917481} +{"epoch": 0, "iter": 19919, "iter_tflops": 38.42623298995212, "iter_time": 0.5369012756347656, "loss": 0.1063120886683464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.939554729521575, "step_time": 0.4919244766235351} +{"epoch": 0, "iter": 19920, "iter_tflops": 32.577230413175215, "iter_time": 0.6332979583740235, "loss": 0.19457624852657318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.89656893258674, "step_time": 0.5747371997833252} +{"epoch": 0, "iter": 19921, "iter_tflops": 16.827502189914302, "iter_time": 1.0271770858764648, "loss": 0.012518809176981449, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 17.727113381851503, "step_time": 0.9750501556396485} +{"epoch": 0, "iter": 19922, "iter_tflops": 15.538402022877044, "iter_time": 1.1123939666748046, "loss": 0.010733179748058319, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 20.38189846766865, "step_time": 0.8480478248596192} +{"epoch": 0, "iter": 19923, "iter_tflops": 48.31852298471651, "iter_time": 0.357726676940918, "loss": 0.007162751164287329, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 53.09740233622885, "step_time": 0.3255305137634277} +{"epoch": 0, "iter": 19924, "iter_tflops": 52.458438823948, "iter_time": 0.3294955978393555, "loss": 0.005954448599368334, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 57.734673901293455, "step_time": 0.29938377571105956} +{"epoch": 0, "iter": 19925, "iter_tflops": 30.93604411781118, "iter_time": 0.6668950119018554, "loss": 0.1647484451532364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.08175169342076, "step_time": 0.6236396942138672} +{"epoch": 0, "iter": 19926, "iter_tflops": 11.782076173142697, "iter_time": 1.7510575561523438, "loss": 0.18991748988628387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.915232351649845, "step_time": 1.2963111724853515} +{"epoch": 0, "iter": 19927, "iter_tflops": 11.46302491696469, "iter_time": 1.799794876098633, "loss": 0.1600276529788971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.316589698701131, "step_time": 1.5492775535583496} +{"epoch": 0, "iter": 19928, "iter_tflops": 19.042851256831856, "iter_time": 1.0834035949707033, "loss": 0.21716012060642242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.71327504227492, "step_time": 0.8348182697296143} +{"epoch": 0, "iter": 19929, "iter_tflops": 22.390794657247664, "iter_time": 0.6931862106323242, "loss": 0.28073251247406006, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 23.867626193212324, "step_time": 0.6502946701049804} +{"epoch": 0, "iter": 19930, "iter_tflops": 14.09851655618538, "iter_time": 1.100895263671875, "loss": 0.24850621819496155, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.78924694936478, "step_time": 0.9244601707458495} +{"epoch": 0, "iter": 19931, "iter_tflops": 27.788872395937332, "iter_time": 0.5585325622558593, "loss": 0.24238920211791992, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.693239901619155, "step_time": 0.5227112350463867} +{"epoch": 0, "iter": 19932, "iter_tflops": 29.191260270528485, "iter_time": 0.5316998977661133, "loss": 0.26332831382751465, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 30.97710369423419, "step_time": 0.5010471687316894} +{"epoch": 0, "iter": 19933, "iter_tflops": 29.239393689728395, "iter_time": 0.7055923843383789, "loss": 0.1517505794763565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.26487405503195, "step_time": 0.6598809089660644} +{"epoch": 0, "iter": 19934, "iter_tflops": 34.155822990024035, "iter_time": 0.6040285873413086, "loss": 0.12379664182662964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.908860879187785, "step_time": 0.48081196022033695} +{"epoch": 0, "iter": 19935, "iter_tflops": 49.38516950969806, "iter_time": 0.4177588882446289, "loss": 0.10983021557331085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42507409205267, "step_time": 0.38616873931884765} +{"epoch": 0, "iter": 19936, "iter_tflops": 53.41032017156812, "iter_time": 0.3862754135131837, "loss": 0.13206031918525696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.793377409986306, "step_time": 0.35698023605346685} +{"epoch": 0, "iter": 19937, "iter_tflops": 25.840046679078483, "iter_time": 0.7984154891967773, "loss": 0.1851709932088852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.288931027556625, "step_time": 0.7560242462158203} +{"epoch": 0, "iter": 19938, "iter_tflops": 35.25926167346668, "iter_time": 0.5851255111694336, "loss": 0.3081189692020416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74957120775727, "step_time": 0.47157247352600096} +{"epoch": 0, "iter": 19939, "iter_tflops": 49.22512277637275, "iter_time": 0.41911715698242186, "loss": 0.25158628821372986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.49485289262614, "step_time": 0.3856650199890137} +{"epoch": 0, "iter": 19940, "iter_tflops": 45.93691900277066, "iter_time": 0.449117919921875, "loss": 0.23847247660160065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.020681358727735, "step_time": 0.41245126914978025} +{"epoch": 0, "iter": 19941, "iter_tflops": 34.2757124705349, "iter_time": 0.5487712707519531, "loss": 0.04451078549027443, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 37.23573518264914, "step_time": 0.5051471710205078} +{"epoch": 0, "iter": 19942, "iter_tflops": 45.1529132255631, "iter_time": 0.41657392501831053, "loss": 0.019919998943805695, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 49.80352190357212, "step_time": 0.37767462158203124} +{"epoch": 0, "iter": 19943, "iter_tflops": 47.99720539612129, "iter_time": 0.39188794708251956, "loss": 0.014286772347986698, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 52.527972820368866, "step_time": 0.3580858974456787} +{"epoch": 0, "iter": 19944, "iter_tflops": 53.99814825704659, "iter_time": 0.3483365058898926, "loss": 0.036959897726774216, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 59.100629768397866, "step_time": 0.3182627048492431} +{"epoch": 0, "iter": 19945, "iter_tflops": 31.195250743665387, "iter_time": 0.6613536682128907, "loss": 0.5334571599960327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.096489730392236, "step_time": 0.6233619842529295} +{"epoch": 0, "iter": 19946, "iter_tflops": 16.331448856102266, "iter_time": 1.2632739257812498, "loss": 0.6103195548057556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.995149251345072, "step_time": 1.0861243171691894} +{"epoch": 0, "iter": 19947, "iter_tflops": 36.83020645744354, "iter_time": 0.560167739868164, "loss": 0.6380890011787415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.126975706773, "step_time": 0.5141452388763428} +{"epoch": 0, "iter": 19948, "iter_tflops": 37.44585833661156, "iter_time": 0.5509579544067382, "loss": 0.6026726961135864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89789520931046, "step_time": 0.5044536743164062} +{"epoch": 0, "iter": 19949, "iter_tflops": 12.684827060889935, "iter_time": 1.626438690185547, "loss": 0.0193608570843935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.580500247953106, "step_time": 1.5191703643798828} +{"epoch": 0, "iter": 19950, "iter_tflops": 25.116605530221904, "iter_time": 0.8214124908447266, "loss": 0.042965762317180634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.37254527299488, "step_time": 0.5672161064147949} +{"epoch": 0, "iter": 19951, "iter_tflops": 50.23559489561619, "iter_time": 0.4106867561340332, "loss": 0.03716336935758591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.76922846233509, "step_time": 0.37669132995605464} +{"epoch": 0, "iter": 19952, "iter_tflops": 57.52748342010337, "iter_time": 0.35863021087646485, "loss": 0.020407235249876976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.832197203301696, "step_time": 0.3283522529602051} +{"epoch": 0, "iter": 19953, "iter_tflops": 35.45698816962041, "iter_time": 0.581862548828125, "loss": 0.021298259496688843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.971869699165936, "step_time": 0.5433257217407227} +{"epoch": 0, "iter": 19954, "iter_tflops": 10.864705164198352, "iter_time": 1.8989096527099607, "loss": 0.031690388917922974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.658565687577864, "step_time": 1.5104875564575195} +{"epoch": 0, "iter": 19955, "iter_tflops": 17.230073500184286, "iter_time": 1.1973885955810546, "loss": 0.018885402008891106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.39375225616473, "step_time": 1.011637939453125} +{"epoch": 0, "iter": 19956, "iter_tflops": 14.544665458663935, "iter_time": 1.418464630126953, "loss": 0.02980063483119011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.850021990017893, "step_time": 1.094486442565918} +{"epoch": 0, "iter": 19957, "iter_tflops": 17.795479903656798, "iter_time": 1.0106252365112305, "loss": 0.4788687825202942, "lr": 3e-05, "seqlen": 7168.0, "step_tflops": 19.16026865855334, "step_time": 0.9386382522583008} +{"epoch": 0, "iter": 19958, "iter_tflops": 12.226799135591067, "iter_time": 1.4709132690429687, "loss": 0.5024307370185852, "lr": 3e-05, "seqlen": 7168.0, "step_tflops": 16.35442397015517, "step_time": 1.0996756057739259} +{"epoch": 0, "iter": 19959, "iter_tflops": 31.504982153360746, "iter_time": 0.570848159790039, "loss": 0.28794172406196594, "lr": 3e-05, "seqlen": 7168.0, "step_tflops": 33.605609112078234, "step_time": 0.535165454864502} +{"epoch": 0, "iter": 19960, "iter_tflops": 31.99572008254525, "iter_time": 0.5620927124023437, "loss": 0.26896193623542786, "lr": 3e-05, "seqlen": 7168.0, "step_tflops": 33.75920256379621, "step_time": 0.5327306251525878} +{"epoch": 0, "iter": 19961, "iter_tflops": 25.888546492163677, "iter_time": 0.7969197311401368, "loss": 0.48744648694992065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.673054157477306, "step_time": 0.7455300521850586} +{"epoch": 0, "iter": 19962, "iter_tflops": 12.132178830570542, "iter_time": 1.7005266571044921, "loss": 0.5456838607788086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.92387201488811, "step_time": 1.2956078453063964} +{"epoch": 0, "iter": 19963, "iter_tflops": 41.424284119991164, "iter_time": 0.49804345321655275, "loss": 0.6380365490913391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.60730138853762, "step_time": 0.46250485610961917} +{"epoch": 0, "iter": 19964, "iter_tflops": 42.510764619024314, "iter_time": 0.48531457138061523, "loss": 0.5622295141220093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.62619477217081, "step_time": 0.4521765098571778} +{"epoch": 0, "iter": 19965, "iter_tflops": 17.335913867230634, "iter_time": 0.6769776077270507, "loss": 0.06796079874038696, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 18.80476998238556, "step_time": 0.6240983276367188} +{"epoch": 0, "iter": 19966, "iter_tflops": 7.803336616401424, "iter_time": 1.5039752960205077, "loss": 0.05924591049551964, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 9.14310590039723, "step_time": 1.2835928649902344} +{"epoch": 0, "iter": 19967, "iter_tflops": 30.13909395863258, "iter_time": 0.3893954315185547, "loss": 0.08361228555440903, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 33.32092931961727, "step_time": 0.35221183013916013} +{"epoch": 0, "iter": 19968, "iter_tflops": 28.1317742392694, "iter_time": 0.4171804237365723, "loss": 0.049496158957481384, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 30.621584636291885, "step_time": 0.38325990104675295} +{"epoch": 0, "iter": 19969, "iter_tflops": 21.282287639269864, "iter_time": 0.9694020614624023, "loss": 0.594120979309082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18281776261673, "step_time": 0.930048370361328} +{"epoch": 0, "iter": 19970, "iter_tflops": 14.466675307795207, "iter_time": 1.426111602783203, "loss": 0.5081669688224792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.80114150829491, "step_time": 1.0973319625854492} +{"epoch": 0, "iter": 19971, "iter_tflops": 44.98285181084646, "iter_time": 0.45864352035522454, "loss": 0.4783226251602173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8029476362576, "step_time": 0.42274277496337886} +{"epoch": 0, "iter": 19972, "iter_tflops": 50.168051506460046, "iter_time": 0.4112396812438965, "loss": 0.6324905157089233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.10831709456099, "step_time": 0.38129246330261235} +{"epoch": 0, "iter": 19973, "iter_tflops": 37.707997127187305, "iter_time": 0.547127799987793, "loss": 0.055873770266771317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79124593639319, "step_time": 0.5057725753784179} +{"epoch": 0, "iter": 19974, "iter_tflops": 15.534308935016924, "iter_time": 1.328098571777344, "loss": 0.040586285293102264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.248212511027123, "step_time": 1.1305816116333007} +{"epoch": 0, "iter": 19975, "iter_tflops": 54.36161942207371, "iter_time": 0.37951580047607425, "loss": 0.02248406782746315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.8889432820009, "step_time": 0.3444891891479492} +{"epoch": 0, "iter": 19976, "iter_tflops": 51.89311663448972, "iter_time": 0.39756898117065426, "loss": 0.012577734887599945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.7283741783188, "step_time": 0.36368208694458004} +{"epoch": 0, "iter": 19977, "iter_tflops": 26.137077763194835, "iter_time": 0.789342010498047, "loss": 0.6176149249076843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.524760663008504, "step_time": 0.7495466995239258} +{"epoch": 0, "iter": 19978, "iter_tflops": 12.332841376830565, "iter_time": 1.6728580932617187, "loss": 0.5565966963768005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.152704850312581, "step_time": 1.457749153137207} +{"epoch": 0, "iter": 19979, "iter_tflops": 42.76718660903845, "iter_time": 0.4824047393798829, "loss": 0.5495014190673828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03515080664667, "step_time": 0.4481595726013184} +{"epoch": 0, "iter": 19980, "iter_tflops": 43.16325177263571, "iter_time": 0.4779782028198242, "loss": 0.6358031630516052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56032879307972, "step_time": 0.443104549407959} +{"epoch": 0, "iter": 19981, "iter_tflops": 32.15195377897102, "iter_time": 0.6416746444702148, "loss": 0.6343600153923035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.259959395608895, "step_time": 0.602192584991455} +{"epoch": 0, "iter": 19982, "iter_tflops": 11.880635687783352, "iter_time": 1.7365311126708984, "loss": 0.5642105340957642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.59062375042903, "step_time": 1.3233013534545899} +{"epoch": 0, "iter": 19983, "iter_tflops": 12.024675928706055, "iter_time": 1.7157296905517576, "loss": 0.5380499362945557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.540049262062782, "step_time": 1.327607986450195} +{"epoch": 0, "iter": 19984, "iter_tflops": 16.314198628683815, "iter_time": 1.2646096801757811, "loss": 0.6118127107620239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.554642876724483, "step_time": 1.0037193851470947} +{"epoch": 0, "iter": 19985, "iter_tflops": 15.452775505327724, "iter_time": 0.988532272338867, "loss": 0.2581934928894043, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 16.475689370945933, "step_time": 0.9271580047607422} +{"epoch": 0, "iter": 19986, "iter_tflops": 5.280534918565446, "iter_time": 2.8928067932128902, "loss": 0.2587588131427765, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 6.355922983951005, "step_time": 2.403359405517578} +{"epoch": 0, "iter": 19987, "iter_tflops": 8.494244746173804, "iter_time": 1.7983432006835938, "loss": 0.22498470544815063, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.310572783972228, "step_time": 1.3505564727783201} +{"epoch": 0, "iter": 19988, "iter_tflops": 20.819563417229123, "iter_time": 0.7337121810913086, "loss": 0.3441081941127777, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 22.503682842183384, "step_time": 0.6788029937744141} +{"epoch": 0, "iter": 19989, "iter_tflops": 23.129812411492182, "iter_time": 0.6958250961303711, "loss": 0.372239351272583, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.200099409769205, "step_time": 0.6386603355407715} +{"epoch": 0, "iter": 19990, "iter_tflops": 22.7344043902954, "iter_time": 0.7079272308349609, "loss": 0.35986611247062683, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.506431786287102, "step_time": 0.6567379570007323} +{"epoch": 0, "iter": 19991, "iter_tflops": 23.914949748752736, "iter_time": 0.6729808807373047, "loss": 0.31942373514175415, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 25.790339360207337, "step_time": 0.6240438995361328} +{"epoch": 0, "iter": 19992, "iter_tflops": 25.997109274782005, "iter_time": 0.6190805206298828, "loss": 0.28870895504951477, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 27.87597384886203, "step_time": 0.5773539619445801} +{"epoch": 0, "iter": 19993, "iter_tflops": 14.99599223773133, "iter_time": 1.3757738189697266, "loss": 0.7119280099868774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.021002779383675, "step_time": 1.287752944946289} +{"epoch": 0, "iter": 19994, "iter_tflops": 15.453416219292112, "iter_time": 1.3350506591796876, "loss": 0.39640703797340393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68731283583905, "step_time": 0.9972824249267577} +{"epoch": 0, "iter": 19995, "iter_tflops": 34.05395692210135, "iter_time": 0.6058354263305663, "loss": 0.6326956152915955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.79843109300189, "step_time": 0.5606514434814454} +{"epoch": 0, "iter": 19996, "iter_tflops": 36.28217552817214, "iter_time": 0.5686288986206055, "loss": 0.6470681428909302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.54083629841636, "step_time": 0.5217667465209961} +{"epoch": 0, "iter": 19997, "iter_tflops": 18.357913199250643, "iter_time": 1.123825637817383, "loss": 0.2613266706466675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.659692006987328, "step_time": 1.0494108200073242} +{"epoch": 0, "iter": 19998, "iter_tflops": 17.140159156824616, "iter_time": 1.203669891357422, "loss": 0.20995798707008362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.650188380380996, "step_time": 0.9990753173828124} +{"epoch": 0, "iter": 19999, "iter_tflops": 38.31235437394437, "iter_time": 0.5384971466064453, "loss": 0.25015807151794434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.98807290494473, "step_time": 0.4913560466766358} +{"epoch": 0, "iter": 20000, "iter_tflops": 45.20976049472752, "iter_time": 0.45634157943725584, "loss": 0.19556263089179993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60829292942143, "step_time": 0.41587993240356447} +{"epoch": 0, "iter": 20001, "iter_tflops": 23.574029389646185, "iter_time": 0.8751619491577147, "loss": 0.6116479635238647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.161952779756167, "step_time": 0.81993212890625} +{"epoch": 0, "iter": 20002, "iter_tflops": 10.131207808886504, "iter_time": 2.0363903198242186, "loss": 0.6462240219116211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.644725564357724, "step_time": 1.7717114410400392} +{"epoch": 0, "iter": 20003, "iter_tflops": 15.277398327234325, "iter_time": 1.3504323883056641, "loss": 0.54725182056427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.88216107227033, "step_time": 1.0926235313415529} +{"epoch": 0, "iter": 20004, "iter_tflops": 34.463205436271956, "iter_time": 0.5986411666870117, "loss": 0.5508767366409302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.462625567141835, "step_time": 0.5507113609313965} +{"epoch": 0, "iter": 20005, "iter_tflops": 13.33466336616568, "iter_time": 1.1547541198730467, "loss": 0.24571970105171204, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.453959007694841, "step_time": 1.0653314743041993} +{"epoch": 0, "iter": 20006, "iter_tflops": 13.992051966528502, "iter_time": 1.1005003051757813, "loss": 0.189467653632164, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 16.572445681972276, "step_time": 0.9291481628417968} +{"epoch": 0, "iter": 20007, "iter_tflops": 24.69879792907769, "iter_time": 0.6234415740966797, "loss": 0.24563097953796387, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.563073862642455, "step_time": 0.579686580657959} +{"epoch": 0, "iter": 20008, "iter_tflops": 25.04014739838944, "iter_time": 0.6149427642822266, "loss": 0.24017241597175598, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.785626479222657, "step_time": 0.5748701629638673} +{"epoch": 0, "iter": 20009, "iter_tflops": 19.9574091412677, "iter_time": 1.0337561035156249, "loss": 0.006949737202376127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.95256936649764, "step_time": 0.9398031349182129} +{"epoch": 0, "iter": 20010, "iter_tflops": 20.87778965360086, "iter_time": 0.9881837997436523, "loss": 0.006034062709659338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.624213314284294, "step_time": 0.8051405620574952} +{"epoch": 0, "iter": 20011, "iter_tflops": 52.84862151681958, "iter_time": 0.39038092041015615, "loss": 0.010517418384552002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.188153350754696, "step_time": 0.35455831336975097} +{"epoch": 0, "iter": 20012, "iter_tflops": 57.079373996783765, "iter_time": 0.36144568634033203, "loss": 0.010620099492371082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.90364086696009, "step_time": 0.32797932243347167} +{"epoch": 0, "iter": 20013, "iter_tflops": 32.02427601507795, "iter_time": 0.6442329406738281, "loss": 0.06141958385705948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.98794311473144, "step_time": 0.6070121231079101} +{"epoch": 0, "iter": 20014, "iter_tflops": 15.855881686516932, "iter_time": 1.3011634368896485, "loss": 0.07152166962623596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.469354805478186, "step_time": 1.1809877204895018} +{"epoch": 0, "iter": 20015, "iter_tflops": 34.441352881919904, "iter_time": 0.59902099609375, "loss": 0.08933757245540619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.116888524808076, "step_time": 0.5412585945129395} +{"epoch": 0, "iter": 20016, "iter_tflops": 43.238519418677754, "iter_time": 0.47714616012573247, "loss": 0.108695849776268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61840358723735, "step_time": 0.43325882339477545} +{"epoch": 0, "iter": 20017, "iter_tflops": 20.172617856855968, "iter_time": 1.0227276229858397, "loss": 0.19706223905086517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.565293460714074, "step_time": 0.9566803970336915} +{"epoch": 0, "iter": 20018, "iter_tflops": 13.58793045603612, "iter_time": 1.5183396453857423, "loss": 0.17536689341068268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.36394116386201, "step_time": 1.2607655639648438} +{"epoch": 0, "iter": 20019, "iter_tflops": 39.92539363015613, "iter_time": 0.5167411422729492, "loss": 0.21616455912590027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78041035770764, "step_time": 0.4712402954101562} +{"epoch": 0, "iter": 20020, "iter_tflops": 40.08003098315238, "iter_time": 0.514747444152832, "loss": 0.229893758893013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.749671716715135, "step_time": 0.4715713901519775} +{"epoch": 0, "iter": 20021, "iter_tflops": 21.056080237035157, "iter_time": 0.9798164367675781, "loss": 0.33513033390045166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67890643431586, "step_time": 0.9097040710449218} +{"epoch": 0, "iter": 20022, "iter_tflops": 15.766843160762857, "iter_time": 1.3085113677978517, "loss": 0.2594253718852997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.69806583221715, "step_time": 1.1033811569213867} +{"epoch": 0, "iter": 20023, "iter_tflops": 46.83985405602309, "iter_time": 0.4404602432250977, "loss": 0.1979580968618393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78710488461008, "step_time": 0.40622700500488285} +{"epoch": 0, "iter": 20024, "iter_tflops": 48.252958437371035, "iter_time": 0.42756121444702144, "loss": 0.225859597325325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.30530874279455, "step_time": 0.39443593788146974} +{"epoch": 0, "iter": 20025, "iter_tflops": 22.73192547101056, "iter_time": 0.9075823135375978, "loss": 0.42361047863960266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.163976923447994, "step_time": 0.8537954483032227} +{"epoch": 0, "iter": 20026, "iter_tflops": 18.630713084396298, "iter_time": 1.1073700408935545, "loss": 0.49376657605171204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.3047478268238, "step_time": 0.8852742652893067} +{"epoch": 0, "iter": 20027, "iter_tflops": 45.41540944369766, "iter_time": 0.4542751846313477, "loss": 0.6588372588157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83196119216608, "step_time": 0.42249160194396973} +{"epoch": 0, "iter": 20028, "iter_tflops": 42.92814537604043, "iter_time": 0.4805959663391114, "loss": 0.5082264542579651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.350826465012105, "step_time": 0.44510734939575197} +{"epoch": 0, "iter": 20029, "iter_tflops": 24.189020453251803, "iter_time": 0.8529114913940431, "loss": 0.3436944782733917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.576967133923645, "step_time": 0.8066278305053711} +{"epoch": 0, "iter": 20030, "iter_tflops": 11.862230179322163, "iter_time": 1.7392255249023436, "loss": 0.3028988838195801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.883243702650383, "step_time": 1.486042739868164} +{"epoch": 0, "iter": 20031, "iter_tflops": 48.293194936473405, "iter_time": 0.42720498275756835, "loss": 0.25188955664634705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.45695594234795, "step_time": 0.3932956676483155} +{"epoch": 0, "iter": 20032, "iter_tflops": 50.43985370692414, "iter_time": 0.40902365875244145, "loss": 0.2579326331615448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.297919872410496, "step_time": 0.3799610290527344} +{"epoch": 0, "iter": 20033, "iter_tflops": 34.11912600106878, "iter_time": 0.6046782531738281, "loss": 0.4836498498916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60746436222839, "step_time": 0.5635761413574218} +{"epoch": 0, "iter": 20034, "iter_tflops": 14.6300396869645, "iter_time": 1.4101871185302732, "loss": 0.49254006147384644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.12161872994064, "step_time": 1.0789407424926758} +{"epoch": 0, "iter": 20035, "iter_tflops": 43.92881974349407, "iter_time": 0.46964825439453123, "loss": 0.411765456199646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19125463971808, "step_time": 0.43718044090270997} +{"epoch": 0, "iter": 20036, "iter_tflops": 42.57713314269264, "iter_time": 0.4845580711364746, "loss": 0.4798089861869812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75748362960439, "step_time": 0.45087911033630373} +{"epoch": 0, "iter": 20037, "iter_tflops": 21.67255873663382, "iter_time": 0.9519454421997071, "loss": 0.38140615820884705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.65546563089897, "step_time": 0.9106453094482422} +{"epoch": 0, "iter": 20038, "iter_tflops": 11.005207487842343, "iter_time": 1.8746664733886718, "loss": 0.4499645531177521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.740949841898896, "step_time": 1.5014313964843748} +{"epoch": 0, "iter": 20039, "iter_tflops": 48.259747282275384, "iter_time": 0.4275010681152344, "loss": 0.5167486071586609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21352341187466, "step_time": 0.3951293106079101} +{"epoch": 0, "iter": 20040, "iter_tflops": 48.671929848310796, "iter_time": 0.4238807373046875, "loss": 0.39636704325675964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.29817710521802, "step_time": 0.39448972511291513} +{"epoch": 0, "iter": 20041, "iter_tflops": 21.207383511452058, "iter_time": 0.9728259735107421, "loss": 0.00554208317771554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.06504897812013, "step_time": 0.9350123596191405} +{"epoch": 0, "iter": 20042, "iter_tflops": 15.608334512518418, "iter_time": 1.3217998046875, "loss": 0.008064155466854572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.78232831152551, "step_time": 1.160202035903931} +{"epoch": 0, "iter": 20043, "iter_tflops": 44.47530559516585, "iter_time": 0.46387749862670896, "loss": 0.005267454776912928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.31493378263573, "step_time": 0.4183538722991943} +{"epoch": 0, "iter": 20044, "iter_tflops": 44.97394422729262, "iter_time": 0.4587343597412109, "loss": 0.0027462646830826998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75435918697695, "step_time": 0.41465901374816894} +{"epoch": 0, "iter": 20045, "iter_tflops": 22.221642058340215, "iter_time": 0.9284234466552734, "loss": 0.17454907298088074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.92904866350696, "step_time": 0.8621777572631835} +{"epoch": 0, "iter": 20046, "iter_tflops": 20.210932717789063, "iter_time": 1.0207887878417967, "loss": 0.18620170652866364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.22720794488122, "step_time": 0.8882295951843261} +{"epoch": 0, "iter": 20047, "iter_tflops": 37.95903879711939, "iter_time": 0.5435093765258789, "loss": 0.08938588947057724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46582677372578, "step_time": 0.49754448699951165} +{"epoch": 0, "iter": 20048, "iter_tflops": 42.95277074480555, "iter_time": 0.48032043457031254, "loss": 0.12070339918136597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.708696013378066, "step_time": 0.4416970558166504} +{"epoch": 0, "iter": 20049, "iter_tflops": 26.964607784021972, "iter_time": 0.7651175079345703, "loss": 0.14345543086528778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.169996655461592, "step_time": 0.6838281669616698} +{"epoch": 0, "iter": 20050, "iter_tflops": 43.81132832837067, "iter_time": 0.4709077377319336, "loss": 0.1342412829399109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.05107487867608, "step_time": 0.42935758590698236} +{"epoch": 0, "iter": 20051, "iter_tflops": 53.131889790052846, "iter_time": 0.3882996368408203, "loss": 0.09884492307901382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.65693695564943, "step_time": 0.35782500076293944} +{"epoch": 0, "iter": 20052, "iter_tflops": 55.3894542898437, "iter_time": 0.3724733123779297, "loss": 0.09912955015897751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.23435924209944, "step_time": 0.3425137042999268} +{"epoch": 0, "iter": 20053, "iter_tflops": 36.65821333990281, "iter_time": 0.5627959365844726, "loss": 0.13552992045879364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46849721286656, "step_time": 0.5227230567932128} +{"epoch": 0, "iter": 20054, "iter_tflops": 10.556721102141314, "iter_time": 1.954308853149414, "loss": 0.09517720341682434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.809054972885312, "step_time": 1.610664764404297} +{"epoch": 0, "iter": 20055, "iter_tflops": 13.062558877389165, "iter_time": 1.57940673828125, "loss": 0.1698037087917328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.485293243744115, "step_time": 1.3323024101257326} +{"epoch": 0, "iter": 20056, "iter_tflops": 46.506356736526335, "iter_time": 0.4436187858581543, "loss": 0.1040530800819397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.496061800893415, "step_time": 0.40856836700439453} +{"epoch": 0, "iter": 20057, "iter_tflops": 21.667723789761553, "iter_time": 0.7484532012939453, "loss": 0.2524906098842621, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 22.92061254026541, "step_time": 0.7075411796569824} +{"epoch": 0, "iter": 20058, "iter_tflops": 13.030946709820542, "iter_time": 1.2445202636718748, "loss": 0.3292848467826843, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 15.633095890801652, "step_time": 1.0373682441711425} +{"epoch": 0, "iter": 20059, "iter_tflops": 24.133014407812095, "iter_time": 0.6719955062866212, "loss": 0.3360631465911865, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 26.01490983836901, "step_time": 0.6233839492797851} +{"epoch": 0, "iter": 20060, "iter_tflops": 24.14629764383728, "iter_time": 0.6716258316040039, "loss": 0.2329728752374649, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.96549406108302, "step_time": 0.6245703315734863} +{"epoch": 0, "iter": 20061, "iter_tflops": 16.716355064821375, "iter_time": 1.2341861267089844, "loss": 0.011301174759864807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.623288333879387, "step_time": 1.1706721878051756} +{"epoch": 0, "iter": 20062, "iter_tflops": 14.843520645310951, "iter_time": 1.3899056701660157, "loss": 0.03382691368460655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.017274300100222, "step_time": 0.9816255531311036} +{"epoch": 0, "iter": 20063, "iter_tflops": 41.604977025816666, "iter_time": 0.4958804206848144, "loss": 0.037582289427518845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97805450461973, "step_time": 0.4487161045074463} +{"epoch": 0, "iter": 20064, "iter_tflops": 41.96168698492348, "iter_time": 0.49166501617431646, "loss": 0.01659248396754265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51610459054577, "step_time": 0.443525821685791} +{"epoch": 0, "iter": 20065, "iter_tflops": 34.11328438366306, "iter_time": 0.6047817993164063, "loss": 0.45385921001434326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.86056454624787, "step_time": 0.5449230289459228} +{"epoch": 0, "iter": 20066, "iter_tflops": 36.58191908878483, "iter_time": 0.5639696884155273, "loss": 0.38481566309928894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89450324741328, "step_time": 0.5044955158233643} +{"epoch": 0, "iter": 20067, "iter_tflops": 39.47818866190175, "iter_time": 0.5225947341918945, "loss": 0.4122928977012634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.128365738938825, "step_time": 0.4783648338317871} +{"epoch": 0, "iter": 20068, "iter_tflops": 39.95157055435321, "iter_time": 0.5164025650024414, "loss": 0.3440293073654175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29195017956633, "step_time": 0.47655726814270016} +{"epoch": 0, "iter": 20069, "iter_tflops": 19.243525119258347, "iter_time": 1.072105728149414, "loss": 0.4502125382423401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.85190283507622, "step_time": 0.9894105911254882} +{"epoch": 0, "iter": 20070, "iter_tflops": 16.91597343538302, "iter_time": 1.2196220092773438, "loss": 0.3283899128437042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.80580399980839, "step_time": 0.9916028003692627} +{"epoch": 0, "iter": 20071, "iter_tflops": 36.26136052527607, "iter_time": 0.5689553070068359, "loss": 0.3186139166355133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.51652019942654, "step_time": 0.5220878105163573} +{"epoch": 0, "iter": 20072, "iter_tflops": 38.77792918710407, "iter_time": 0.5320318527221679, "loss": 0.3342517018318176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.584801820409126, "step_time": 0.48447081184387203} +{"epoch": 0, "iter": 20073, "iter_tflops": 21.068405828210583, "iter_time": 0.9792432174682617, "loss": 0.06339948624372482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.86072666469833, "step_time": 0.9024688415527344} +{"epoch": 0, "iter": 20074, "iter_tflops": 18.814249787350892, "iter_time": 1.0965674285888671, "loss": 0.08923390507698059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.34525208808811, "step_time": 0.9665425090789795} +{"epoch": 0, "iter": 20075, "iter_tflops": 40.961347436574066, "iter_time": 0.5036722373962402, "loss": 0.06333624571561813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08102258650659, "step_time": 0.4576447544097901} +{"epoch": 0, "iter": 20076, "iter_tflops": 41.50365723087527, "iter_time": 0.4970909767150879, "loss": 0.0843210443854332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6086512604983, "step_time": 0.4523504409790039} +{"epoch": 0, "iter": 20077, "iter_tflops": 31.89637645944453, "iter_time": 0.6468162155151368, "loss": 0.5315426588058472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.66383997625661, "step_time": 0.59517622756958} +{"epoch": 0, "iter": 20078, "iter_tflops": 32.85375204475385, "iter_time": 0.6279676513671876, "loss": 0.45849573612213135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.6461696047064, "step_time": 0.5787744865417479} +{"epoch": 0, "iter": 20079, "iter_tflops": 37.021100152991885, "iter_time": 0.5572793197631836, "loss": 0.541994035243988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29652341077351, "step_time": 0.5119819717407226} +{"epoch": 0, "iter": 20080, "iter_tflops": 39.50477258107759, "iter_time": 0.5222430648803711, "loss": 0.5996869206428528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.819071668068744, "step_time": 0.4818201961517333} +{"epoch": 0, "iter": 20081, "iter_tflops": 21.139279386756687, "iter_time": 0.9759601135253907, "loss": 0.055261459201574326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.652964621241836, "step_time": 0.910745849609375} +{"epoch": 0, "iter": 20082, "iter_tflops": 17.65859958440989, "iter_time": 1.1683312377929687, "loss": 0.07730920612812042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.50637173106728, "step_time": 0.8776809005737304} +{"epoch": 0, "iter": 20083, "iter_tflops": 50.356736026981196, "iter_time": 0.40969878387451175, "loss": 0.11043864488601685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.718849208751884, "step_time": 0.37703814697265625} +{"epoch": 0, "iter": 20084, "iter_tflops": 50.069320156424396, "iter_time": 0.41205060195922855, "loss": 0.1483510583639145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3309869104532, "step_time": 0.37972977638244626} +{"epoch": 0, "iter": 20085, "iter_tflops": 28.232184107906463, "iter_time": 0.7307650527954103, "loss": 0.303700715303421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.838750916830875, "step_time": 0.6914194755554199} +{"epoch": 0, "iter": 20086, "iter_tflops": 12.41255156156202, "iter_time": 1.6621154327392578, "loss": 0.25529471039772034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.330797965606816, "step_time": 1.3457286148071288} +{"epoch": 0, "iter": 20087, "iter_tflops": 46.404490478891816, "iter_time": 0.4445926094055176, "loss": 0.3026227653026581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8424730159228, "step_time": 0.40578461837768554} +{"epoch": 0, "iter": 20088, "iter_tflops": 44.03407263019435, "iter_time": 0.46852567291259767, "loss": 0.2677857577800751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.51234247683192, "step_time": 0.4342259807586669} +{"epoch": 0, "iter": 20089, "iter_tflops": 23.746834473768104, "iter_time": 0.8687934188842773, "loss": 0.4453204870223999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.988774318503246, "step_time": 0.8256144638061523} +{"epoch": 0, "iter": 20090, "iter_tflops": 18.209356119543468, "iter_time": 1.1329941253662108, "loss": 0.3016129732131958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.326096691817494, "step_time": 0.9240797348022459} +{"epoch": 0, "iter": 20091, "iter_tflops": 46.28251425070509, "iter_time": 0.44576432037353514, "loss": 0.3987114727497101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.199699189116764, "step_time": 0.41098042106628413} +{"epoch": 0, "iter": 20092, "iter_tflops": 46.80405664134769, "iter_time": 0.4407971229553223, "loss": 0.35216379165649414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83213350775644, "step_time": 0.4058671569824219} +{"epoch": 0, "iter": 20093, "iter_tflops": 33.350624591455336, "iter_time": 0.6186119079589844, "loss": 0.15131674706935883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.54534971550398, "step_time": 0.5804161071777344} +{"epoch": 0, "iter": 20094, "iter_tflops": 17.389584793762214, "iter_time": 1.1864051818847654, "loss": 0.13197675347328186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.40975413298731, "step_time": 0.9206300697326661} +{"epoch": 0, "iter": 20095, "iter_tflops": 39.57168402655243, "iter_time": 0.5213600082397462, "loss": 0.15682220458984375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47589737469698, "step_time": 0.4745409469604492} +{"epoch": 0, "iter": 20096, "iter_tflops": 41.31408146806344, "iter_time": 0.4993719520568848, "loss": 0.14173521101474762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01731141689255, "step_time": 0.45829244041442874} +{"epoch": 0, "iter": 20097, "iter_tflops": 14.003604927212438, "iter_time": 1.3933719787597654, "loss": 0.02579035423696041, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 15.041032400270558, "step_time": 1.297266716003418} +{"epoch": 0, "iter": 20098, "iter_tflops": 20.622345612168637, "iter_time": 0.9461693191528321, "loss": 0.01732347533106804, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 28.751694042062606, "step_time": 0.6786462974548338} +{"epoch": 0, "iter": 20099, "iter_tflops": 52.38154148533583, "iter_time": 0.37250203323364256, "loss": 0.04212932661175728, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 57.060207236834344, "step_time": 0.3419586372375488} +{"epoch": 0, "iter": 20100, "iter_tflops": 51.393094697369015, "iter_time": 0.379666389465332, "loss": 0.021159835159778595, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 56.078922488084686, "step_time": 0.347942325592041} +{"epoch": 0, "iter": 20101, "iter_tflops": 29.682504297663492, "iter_time": 0.6950590591430663, "loss": 0.08581109344959259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.64567555783182, "step_time": 0.651940372467041} +{"epoch": 0, "iter": 20102, "iter_tflops": 14.751701206996913, "iter_time": 1.398556900024414, "loss": 0.060548730194568634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.593236962977755, "step_time": 1.1096020317077637} +{"epoch": 0, "iter": 20103, "iter_tflops": 13.76705718502804, "iter_time": 1.4985841369628907, "loss": 0.10628224164247513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.167874132442144, "step_time": 1.2017267456054688} +{"epoch": 0, "iter": 20104, "iter_tflops": 37.969888423824614, "iter_time": 0.5433540725708008, "loss": 0.11718286573886871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65909597050577, "step_time": 0.4952362270355224} +{"epoch": 0, "iter": 20105, "iter_tflops": 11.366811897224967, "iter_time": 1.3330843963623047, "loss": 0.38139575719833374, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 12.089447773694458, "step_time": 1.25340047454834} +{"epoch": 0, "iter": 20106, "iter_tflops": 15.454086078210697, "iter_time": 0.980512176513672, "loss": 0.4158889949321747, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 17.23046376216271, "step_time": 0.8794261016845704} +{"epoch": 0, "iter": 20107, "iter_tflops": 22.15852441993678, "iter_time": 0.6838415451049804, "loss": 0.3586427569389343, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.85040078960538, "step_time": 0.6353318634033203} +{"epoch": 0, "iter": 20108, "iter_tflops": 22.499357542137314, "iter_time": 0.6734823226928711, "loss": 0.3983866572380066, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.221337588149275, "step_time": 0.6256020965576172} +{"epoch": 0, "iter": 20109, "iter_tflops": 19.096557109483076, "iter_time": 1.080356704711914, "loss": 0.6653558015823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.636002023290064, "step_time": 0.9997621383666992} +{"epoch": 0, "iter": 20110, "iter_tflops": 21.184247852072275, "iter_time": 0.9738884124755859, "loss": 0.3832252025604248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.74596288095148, "step_time": 0.8688253078460695} +{"epoch": 0, "iter": 20111, "iter_tflops": 45.171659162774645, "iter_time": 0.4567264938354492, "loss": 0.4304836094379425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.133190993437, "step_time": 0.4199013557434082} +{"epoch": 0, "iter": 20112, "iter_tflops": 45.804238575459905, "iter_time": 0.45041887283325194, "loss": 0.5511826276779175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.60211680316032, "step_time": 0.41593171501159665} +{"epoch": 0, "iter": 20113, "iter_tflops": 31.37735534424962, "iter_time": 0.6575153732299804, "loss": 0.5711668729782104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.30518084044935, "step_time": 0.6194559822082519} +{"epoch": 0, "iter": 20114, "iter_tflops": 13.401416705124571, "iter_time": 1.5394710845947264, "loss": 0.520379364490509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.216035387558946, "step_time": 1.2722649536132813} +{"epoch": 0, "iter": 20115, "iter_tflops": 35.34792464972668, "iter_time": 0.5836578445434571, "loss": 0.7987117171287537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.51996695192891, "step_time": 0.5220422763824462} +{"epoch": 0, "iter": 20116, "iter_tflops": 37.07432783569946, "iter_time": 0.5564792327880859, "loss": 0.3805640935897827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84813547437781, "step_time": 0.5177430076599121} +{"epoch": 0, "iter": 20117, "iter_tflops": 17.337454861222945, "iter_time": 1.1899724426269531, "loss": 0.012913512997329235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23979356302574, "step_time": 1.1311034545898437} +{"epoch": 0, "iter": 20118, "iter_tflops": 16.593017923982785, "iter_time": 1.2433599243164062, "loss": 0.013236673548817635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.01541873970789, "step_time": 0.8590769844055175} +{"epoch": 0, "iter": 20119, "iter_tflops": 53.99002585941718, "iter_time": 0.3821278686523437, "loss": 0.00644415570423007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.42971684004115, "step_time": 0.3471511325836181} +{"epoch": 0, "iter": 20120, "iter_tflops": 59.16325894905211, "iter_time": 0.3487146224975586, "loss": 0.009402476251125336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.7517644942586, "step_time": 0.3186182441711426} +{"epoch": 0, "iter": 20121, "iter_tflops": 29.461054799026556, "iter_time": 0.7002835998535156, "loss": 0.42177024483680725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.212405064414117, "step_time": 0.6609901885986328} +{"epoch": 0, "iter": 20122, "iter_tflops": 17.05323498517096, "iter_time": 1.2098052673339843, "loss": 0.4818531572818756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.756025632890438, "step_time": 0.9939809226989748} +{"epoch": 0, "iter": 20123, "iter_tflops": 46.39807899481296, "iter_time": 0.44465404510498047, "loss": 0.346145361661911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.190356983701214, "step_time": 0.4110569190979004} +{"epoch": 0, "iter": 20124, "iter_tflops": 44.1201808648576, "iter_time": 0.4676112632751465, "loss": 0.28357627987861633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73925102352064, "step_time": 0.43216206932067874} +{"epoch": 0, "iter": 20125, "iter_tflops": 45.76247298485053, "iter_time": 0.45082995223999023, "loss": 0.05365486070513725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34174671247395, "step_time": 0.40982077217102053} +{"epoch": 0, "iter": 20126, "iter_tflops": 47.75804313862269, "iter_time": 0.43199201965332024, "loss": 0.027018491178750992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90854016033879, "step_time": 0.3899388160705567} +{"epoch": 0, "iter": 20127, "iter_tflops": 48.6838666580982, "iter_time": 0.42377680587768557, "loss": 0.050418347120285034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90223894288818, "step_time": 0.38998526191711425} +{"epoch": 0, "iter": 20128, "iter_tflops": 50.40007395854346, "iter_time": 0.409346492767334, "loss": 0.062312446534633636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.87210319651058, "step_time": 0.37598510551452635} +{"epoch": 0, "iter": 20129, "iter_tflops": 30.39735929626864, "iter_time": 0.6787133483886718, "loss": 0.2807573080062866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.91111169604739, "step_time": 0.6268731880187989} +{"epoch": 0, "iter": 20130, "iter_tflops": 10.83383272699987, "iter_time": 1.9043208465576171, "loss": 0.28121066093444824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.327967197941067, "step_time": 1.673519500732422} +{"epoch": 0, "iter": 20131, "iter_tflops": 16.759717233564896, "iter_time": 1.2309929351806639, "loss": 0.28498610854148865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.079890765281824, "step_time": 1.0274504852294921} +{"epoch": 0, "iter": 20132, "iter_tflops": 47.64143020978979, "iter_time": 0.4330494155883789, "loss": 0.2890692353248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.744426590952706, "step_time": 0.3987114143371582} +{"epoch": 0, "iter": 20133, "iter_tflops": 16.641803205841757, "iter_time": 0.944948013305664, "loss": 0.48314470052719116, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.380892394948848, "step_time": 0.9047659072875975} +{"epoch": 0, "iter": 20134, "iter_tflops": 8.812595974689547, "iter_time": 1.784450225830078, "loss": 0.2918245792388916, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.519493603418514, "step_time": 1.365132827758789} +{"epoch": 0, "iter": 20135, "iter_tflops": 23.666112567078613, "iter_time": 0.6644791717529297, "loss": 0.24836577475070953, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.5138731200759, "step_time": 0.6163563957214355} +{"epoch": 0, "iter": 20136, "iter_tflops": 24.22727316645929, "iter_time": 0.6490882720947266, "loss": 0.3136107325553894, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.069533018144153, "step_time": 0.6032190475463868} +{"epoch": 0, "iter": 20137, "iter_tflops": 17.142716752473454, "iter_time": 1.2034903106689452, "loss": 0.06083669140934944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.218300892006994, "step_time": 1.1324378509521484} +{"epoch": 0, "iter": 20138, "iter_tflops": 12.030890812056674, "iter_time": 1.7148433837890626, "loss": 0.0508582629263401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.662857512537565, "step_time": 1.5100130767822266} +{"epoch": 0, "iter": 20139, "iter_tflops": 12.947824376332841, "iter_time": 1.5934023284912109, "loss": 0.0967191606760025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.368566281735234, "step_time": 1.260409320831299} +{"epoch": 0, "iter": 20140, "iter_tflops": 47.86451202438731, "iter_time": 0.4310311050415038, "loss": 0.06306286156177521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.06547219671698, "step_time": 0.39625288391113284} +{"epoch": 0, "iter": 20141, "iter_tflops": 18.453997879390464, "iter_time": 0.83219669342041, "loss": 0.3034612536430359, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.527588700483157, "step_time": 0.7864440536499023} +{"epoch": 0, "iter": 20142, "iter_tflops": 12.122785694306316, "iter_time": 1.2668174133300782, "loss": 0.3409891128540039, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.152423094916731, "step_time": 1.08513969039917} +{"epoch": 0, "iter": 20143, "iter_tflops": 28.48098105500016, "iter_time": 0.5392144317626953, "loss": 0.38463476300239563, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.332161666030515, "step_time": 0.5063060188293457} +{"epoch": 0, "iter": 20144, "iter_tflops": 26.146362492996943, "iter_time": 0.5873610916137695, "loss": 0.3396257758140564, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.70866746252994, "step_time": 0.5542437591552734} +{"epoch": 0, "iter": 20145, "iter_tflops": 33.10332834508992, "iter_time": 0.6232332077026367, "loss": 0.43742141127586365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.34896916071139, "step_time": 0.5836405982971191} +{"epoch": 0, "iter": 20146, "iter_tflops": 12.618963093087707, "iter_time": 1.634927795410156, "loss": 0.5260835886001587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.080310996262604, "step_time": 1.2830033893585204} +{"epoch": 0, "iter": 20147, "iter_tflops": 40.70839911709499, "iter_time": 0.5068018875122071, "loss": 0.47673431038856506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78601631454124, "step_time": 0.4711799621582032} +{"epoch": 0, "iter": 20148, "iter_tflops": 48.97726233734335, "iter_time": 0.4212381935119629, "loss": 0.5279019474983215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.945135813540354, "step_time": 0.3896692905426026} +{"epoch": 0, "iter": 20149, "iter_tflops": 27.34104570371596, "iter_time": 0.7545831909179688, "loss": 0.5031959414482117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.972725050398708, "step_time": 0.7120867462158205} +{"epoch": 0, "iter": 20150, "iter_tflops": 17.76401377934645, "iter_time": 1.1613981933593749, "loss": 0.5280418395996094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.158076066823345, "step_time": 0.8908811531066895} +{"epoch": 0, "iter": 20151, "iter_tflops": 44.00734922830742, "iter_time": 0.4688101844787598, "loss": 0.5411185026168823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44238990034995, "step_time": 0.43486623573303224} +{"epoch": 0, "iter": 20152, "iter_tflops": 42.958270126888806, "iter_time": 0.48025894546508796, "loss": 0.4602566957473755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.05894117886129, "step_time": 0.44792808914184573} +{"epoch": 0, "iter": 20153, "iter_tflops": 19.527581636074668, "iter_time": 1.0565104217529295, "loss": 0.5659441947937012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.27320566352741, "step_time": 1.0176532440185548} +{"epoch": 0, "iter": 20154, "iter_tflops": 24.249524301947428, "iter_time": 0.850783432006836, "loss": 0.5788252949714661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.275707606242246, "step_time": 0.6814405059814453} +{"epoch": 0, "iter": 20155, "iter_tflops": 46.58005481519579, "iter_time": 0.44291690063476563, "loss": 0.5535541772842407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.76734666096849, "step_time": 0.4063851051330566} +{"epoch": 0, "iter": 20156, "iter_tflops": 45.31267642094255, "iter_time": 0.45530511856079103, "loss": 0.5103248357772827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.096474331287624, "step_time": 0.4202153778076172} +{"epoch": 0, "iter": 20157, "iter_tflops": 34.03095150571448, "iter_time": 0.6062449798583985, "loss": 0.15725381672382355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.353121627833765, "step_time": 0.567519172668457} +{"epoch": 0, "iter": 20158, "iter_tflops": 13.183942010526561, "iter_time": 1.5648653106689454, "loss": 0.05797433480620384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.695365997831772, "step_time": 1.3144703674316407} +{"epoch": 0, "iter": 20159, "iter_tflops": 13.222435919721775, "iter_time": 1.560309585571289, "loss": 0.08508165180683136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.321511499104588, "step_time": 1.3465442695617675} +{"epoch": 0, "iter": 20160, "iter_tflops": 41.52629874111438, "iter_time": 0.49681994628906245, "loss": 0.11599048227071762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70345896634258, "step_time": 0.45141208076477046} +{"epoch": 0, "iter": 20161, "iter_tflops": 13.486394166711404, "iter_time": 1.1387295837402345, "loss": 0.3744792342185974, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.31488748395357, "step_time": 1.0728240814208985} +{"epoch": 0, "iter": 20162, "iter_tflops": 14.895058831947674, "iter_time": 1.0310369491577147, "loss": 0.38550227880477905, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.651377422965705, "step_time": 0.8700372581481933} +{"epoch": 0, "iter": 20163, "iter_tflops": 28.205371743668632, "iter_time": 0.5444833755493164, "loss": 0.2615661025047302, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.014901029709968, "step_time": 0.5116577262878418} +{"epoch": 0, "iter": 20164, "iter_tflops": 27.909276157153027, "iter_time": 0.5502599182128907, "loss": 0.3801131844520569, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.6427969492264, "step_time": 0.5180805320739745} +{"epoch": 0, "iter": 20165, "iter_tflops": 29.336522934944664, "iter_time": 0.7032562637329101, "loss": 0.6091424226760864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.147456427986594, "step_time": 0.6623684844970702} +{"epoch": 0, "iter": 20166, "iter_tflops": 18.391895399686955, "iter_time": 1.1217491760253904, "loss": 0.4662562310695648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.284080249466665, "step_time": 0.8860600585937499} +{"epoch": 0, "iter": 20167, "iter_tflops": 39.616065624521475, "iter_time": 0.5207759323120117, "loss": 0.5045861601829529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49334139520058, "step_time": 0.48551356124877937} +{"epoch": 0, "iter": 20168, "iter_tflops": 50.017695931906744, "iter_time": 0.412475887298584, "loss": 0.595018208026886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85430830939485, "step_time": 0.38309086418151855} +{"epoch": 0, "iter": 20169, "iter_tflops": 23.388915610285444, "iter_time": 0.8820885009765624, "loss": 0.04182462766766548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.615738810892505, "step_time": 0.8381261138916015} +{"epoch": 0, "iter": 20170, "iter_tflops": 16.347994922804183, "iter_time": 1.2619953460693358, "loss": 0.05622248724102974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.81956342212686, "step_time": 1.0409459114074706} +{"epoch": 0, "iter": 20171, "iter_tflops": 48.08899114837651, "iter_time": 0.4290190544128418, "loss": 0.06647518277168274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.46998996384242, "step_time": 0.3931979694366455} +{"epoch": 0, "iter": 20172, "iter_tflops": 49.787835780955284, "iter_time": 0.41438020324707026, "loss": 0.0517948716878891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.08692415855307, "step_time": 0.3814432754516601} +{"epoch": 0, "iter": 20173, "iter_tflops": 23.302182239846022, "iter_time": 0.8444604415893554, "loss": 0.011148949153721333, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 24.473382870558325, "step_time": 0.8040478591918946} +{"epoch": 0, "iter": 20174, "iter_tflops": 13.705849395936983, "iter_time": 1.4357206573486327, "loss": 0.0017994549125432968, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 19.291958934196828, "step_time": 1.0199986000061037} +{"epoch": 0, "iter": 20175, "iter_tflops": 40.015229312606344, "iter_time": 0.4917570495605469, "loss": 0.0027309146244078875, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 44.45955123193392, "step_time": 0.4425994091033935} +{"epoch": 0, "iter": 20176, "iter_tflops": 47.56132778869657, "iter_time": 0.41373468780517575, "loss": 0.004469882231205702, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 52.559124277144875, "step_time": 0.37439305496215824} +{"epoch": 0, "iter": 20177, "iter_tflops": 31.38114736367929, "iter_time": 0.6574359207153321, "loss": 0.6149100661277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.59173806607862, "step_time": 0.5964167938232422} +{"epoch": 0, "iter": 20178, "iter_tflops": 39.53378728578824, "iter_time": 0.5218597793579102, "loss": 0.47139522433280945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.72865633422182, "step_time": 0.48283974456787104} +{"epoch": 0, "iter": 20179, "iter_tflops": 43.13785224677985, "iter_time": 0.4782596359252929, "loss": 0.5348105430603027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44991361427015, "step_time": 0.444157844543457} +{"epoch": 0, "iter": 20180, "iter_tflops": 44.75315875310312, "iter_time": 0.46099748229980475, "loss": 0.5570029616355896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0662339530651, "step_time": 0.4292221755981446} +{"epoch": 0, "iter": 20181, "iter_tflops": 18.40291888391537, "iter_time": 1.1210772399902345, "loss": 0.12084183096885681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.09535869816159, "step_time": 1.0804245071411134} +{"epoch": 0, "iter": 20182, "iter_tflops": 17.802261793162128, "iter_time": 1.1589029388427734, "loss": 0.18433928489685059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.69628406575564, "step_time": 0.9509044704437255} +{"epoch": 0, "iter": 20183, "iter_tflops": 41.67577373336444, "iter_time": 0.49503804397583007, "loss": 0.14979419112205505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.480099483588056, "step_time": 0.4536290321350098} +{"epoch": 0, "iter": 20184, "iter_tflops": 42.675930091484865, "iter_time": 0.4834362945556641, "loss": 0.23195073008537292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45282246842007, "step_time": 0.4441300315856933} +{"epoch": 0, "iter": 20185, "iter_tflops": 23.452046441395012, "iter_time": 0.8797139968872071, "loss": 0.5545626878738403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.109040926408284, "step_time": 0.8216599578857422} +{"epoch": 0, "iter": 20186, "iter_tflops": 8.317207351723923, "iter_time": 2.4805313415527346, "loss": 0.6884351968765259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.733411828821255, "step_time": 1.9221375122070312} +{"epoch": 0, "iter": 20187, "iter_tflops": 10.819614995557677, "iter_time": 1.9068232574462893, "loss": 0.4477440118789673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.41460949767173, "step_time": 1.5379570693969726} +{"epoch": 0, "iter": 20188, "iter_tflops": 40.287636675017716, "iter_time": 0.5120949058532716, "loss": 0.6161082983016968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.755447687616524, "step_time": 0.4715091400146485} +{"epoch": 0, "iter": 20189, "iter_tflops": 14.58553091369174, "iter_time": 1.0950148620605469, "loss": 0.29111942648887634, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.739452282546088, "step_time": 1.0147350006103515} +{"epoch": 0, "iter": 20190, "iter_tflops": 11.517200042360496, "iter_time": 1.3867409667968749, "loss": 0.34192559123039246, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 14.5803335777514, "step_time": 1.0954051933288573} +{"epoch": 0, "iter": 20191, "iter_tflops": 23.394376051250546, "iter_time": 0.6827013931274415, "loss": 0.22769296169281006, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.272152431075487, "step_time": 0.6319751815795898} +{"epoch": 0, "iter": 20192, "iter_tflops": 24.27931032586803, "iter_time": 0.6578182373046875, "loss": 0.2595967948436737, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 26.06285162650923, "step_time": 0.6128022117614746} +{"epoch": 0, "iter": 20193, "iter_tflops": 14.227091061031164, "iter_time": 0.9446984405517579, "loss": 0.0017328509129583836, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 15.114992184237108, "step_time": 0.8892039489746094} +{"epoch": 0, "iter": 20194, "iter_tflops": 11.452212517416214, "iter_time": 1.1735994873046875, "loss": 0.0031259015668183565, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 13.935169447321377, "step_time": 0.96448850440979} +{"epoch": 0, "iter": 20195, "iter_tflops": 35.28484784813694, "iter_time": 0.3809088478088379, "loss": 0.009237796068191528, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 38.69692284499812, "step_time": 0.3473224678039551} +{"epoch": 0, "iter": 20196, "iter_tflops": 36.711794143664186, "iter_time": 0.3661033477783204, "loss": 0.007975884713232517, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 40.24881172738241, "step_time": 0.3339306221008301} +{"epoch": 0, "iter": 20197, "iter_tflops": 35.23228397703693, "iter_time": 0.5855735473632813, "loss": 0.5749794840812683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.71620560502027, "step_time": 0.5470087242126466} +{"epoch": 0, "iter": 20198, "iter_tflops": 11.215458429935575, "iter_time": 1.8395229797363282, "loss": 0.5977739095687866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.571789282056207, "step_time": 1.4158243103027346} +{"epoch": 0, "iter": 20199, "iter_tflops": 10.998880394982391, "iter_time": 1.875744873046875, "loss": 0.7762467861175537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.625374192387502, "step_time": 1.6340975875854493} +{"epoch": 0, "iter": 20200, "iter_tflops": 16.156790445482624, "iter_time": 1.2769301910400392, "loss": 0.45622310042381287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.125982190668108, "step_time": 1.078694589614868} +{"epoch": 0, "iter": 20201, "iter_tflops": 10.976554387131616, "iter_time": 1.414012954711914, "loss": 0.31129804253578186, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 11.703129003024506, "step_time": 1.32622566986084} +{"epoch": 0, "iter": 20202, "iter_tflops": 20.07122513514659, "iter_time": 0.7732956008911133, "loss": 0.2830665409564972, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 23.975978032685347, "step_time": 0.6473558692932129} +{"epoch": 0, "iter": 20203, "iter_tflops": 29.370876126742694, "iter_time": 0.5284483184814452, "loss": 0.3676241934299469, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 31.241462489874, "step_time": 0.49680741119384764} +{"epoch": 0, "iter": 20204, "iter_tflops": 28.754456077310426, "iter_time": 0.5397768630981445, "loss": 0.2212616503238678, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 30.56419660189935, "step_time": 0.5078160667419434} +{"epoch": 0, "iter": 20205, "iter_tflops": 44.314169228168595, "iter_time": 0.46556426239013676, "loss": 0.6119900345802307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.630589306004666, "step_time": 0.4242410755157471} +{"epoch": 0, "iter": 20206, "iter_tflops": 43.695292608806135, "iter_time": 0.4721582641601562, "loss": 0.4872245490550995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22716993108565, "step_time": 0.41909972763061526} +{"epoch": 0, "iter": 20207, "iter_tflops": 46.149956944934296, "iter_time": 0.44704469680786135, "loss": 0.6024965643882751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.47803260468817, "step_time": 0.41697481536865233} +{"epoch": 0, "iter": 20208, "iter_tflops": 41.233556765407776, "iter_time": 0.5003471717834472, "loss": 0.5762934684753418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4879269346053, "step_time": 0.4637458953857422} +{"epoch": 0, "iter": 20209, "iter_tflops": 29.880951711266388, "iter_time": 0.6904429855346679, "loss": 0.4901955723762512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.73439774586333, "step_time": 0.6501176948547364} +{"epoch": 0, "iter": 20210, "iter_tflops": 15.135053443313618, "iter_time": 1.3631331787109373, "loss": 0.5257638096809387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.14324925043539, "step_time": 1.1371223106384276} +{"epoch": 0, "iter": 20211, "iter_tflops": 38.04673835844593, "iter_time": 0.5422565612792969, "loss": 0.5812152028083801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3493802924469, "step_time": 0.4989456520080567} +{"epoch": 0, "iter": 20212, "iter_tflops": 34.49835445312787, "iter_time": 0.5980312347412109, "loss": 0.4760182201862335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.52057849398497, "step_time": 0.5498607521057128} +{"epoch": 0, "iter": 20213, "iter_tflops": 15.04190537139503, "iter_time": 1.3715744781494141, "loss": 0.4759959578514099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.994594904591429, "step_time": 1.2898790893554686} +{"epoch": 0, "iter": 20214, "iter_tflops": 22.913874634749746, "iter_time": 0.9003755950927734, "loss": 0.49429047107696533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.707141981459788, "step_time": 0.6506765422821045} +{"epoch": 0, "iter": 20215, "iter_tflops": 38.4570728711881, "iter_time": 0.5364707183837891, "loss": 0.6412323713302612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.625031587321075, "step_time": 0.4956415100097656} +{"epoch": 0, "iter": 20216, "iter_tflops": 39.164667938395034, "iter_time": 0.5267782058715821, "loss": 0.48120081424713135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.468365120375644, "step_time": 0.4857990989685058} +{"epoch": 0, "iter": 20217, "iter_tflops": 21.660740179045597, "iter_time": 0.95246484375, "loss": 0.5129873156547546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.599742295827685, "step_time": 0.874208423614502} +{"epoch": 0, "iter": 20218, "iter_tflops": 20.082545323950107, "iter_time": 1.0273146743774413, "loss": 0.5862132906913757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.332996270279178, "step_time": 0.8478649024963378} +{"epoch": 0, "iter": 20219, "iter_tflops": 38.17972693526847, "iter_time": 0.5403677597045898, "loss": 0.5864244699478149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.471062472111214, "step_time": 0.4974816722869873} +{"epoch": 0, "iter": 20220, "iter_tflops": 37.55809132649554, "iter_time": 0.5493115539550781, "loss": 0.37439316511154175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03082001994567, "step_time": 0.5028194293975831} +{"epoch": 0, "iter": 20221, "iter_tflops": 22.533198403406555, "iter_time": 0.915586555480957, "loss": 0.2945150136947632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.351801141475878, "step_time": 0.8472101669311525} +{"epoch": 0, "iter": 20222, "iter_tflops": 38.166430912385074, "iter_time": 0.5405560073852539, "loss": 0.22760219871997833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.940423101327724, "step_time": 0.49191429138183596} +{"epoch": 0, "iter": 20223, "iter_tflops": 39.61172719805554, "iter_time": 0.5208329696655274, "loss": 0.2562257945537567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.311580115117806, "step_time": 0.4763412799835205} +{"epoch": 0, "iter": 20224, "iter_tflops": 41.632596813110055, "iter_time": 0.49555144500732423, "loss": 0.24912850558757782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27988008705259, "step_time": 0.45563489723205564} +{"epoch": 0, "iter": 20225, "iter_tflops": 27.81209025276966, "iter_time": 0.7418030548095703, "loss": 0.4063241183757782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.319675798102736, "step_time": 0.6804523124694825} +{"epoch": 0, "iter": 20226, "iter_tflops": 35.562447913725244, "iter_time": 0.5801370468139648, "loss": 0.34080860018730164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.976967381102114, "step_time": 0.5293150005340576} +{"epoch": 0, "iter": 20227, "iter_tflops": 38.85305735950312, "iter_time": 0.5310030899047852, "loss": 0.4209959805011749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.5300940330429, "step_time": 0.48509400177001955} +{"epoch": 0, "iter": 20228, "iter_tflops": 36.44663578222331, "iter_time": 0.5660630416870117, "loss": 0.41542065143585205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.60560882341257, "step_time": 0.5209134292602539} +{"epoch": 0, "iter": 20229, "iter_tflops": 25.17121742423671, "iter_time": 0.819630340576172, "loss": 0.6585206389427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.50708404252158, "step_time": 0.7500283737182617} +{"epoch": 0, "iter": 20230, "iter_tflops": 38.57740319354214, "iter_time": 0.5347973632812499, "loss": 0.5191676616668701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.064696296215054, "step_time": 0.49046101188659663} +{"epoch": 0, "iter": 20231, "iter_tflops": 40.45737140037256, "iter_time": 0.5099464645385742, "loss": 0.5785076022148132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83083436022614, "step_time": 0.470698169708252} +{"epoch": 0, "iter": 20232, "iter_tflops": 40.65739462679703, "iter_time": 0.5074376678466797, "loss": 0.6096101403236389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.132225453634305, "step_time": 0.46748364257812497} +{"epoch": 0, "iter": 20233, "iter_tflops": 17.426435427153034, "iter_time": 1.1838963623046874, "loss": 0.5093262791633606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.74798531518215, "step_time": 1.1004432296752928} +{"epoch": 0, "iter": 20234, "iter_tflops": 26.286905342506742, "iter_time": 0.7848429946899413, "loss": 0.4216887354850769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.33412109617957, "step_time": 0.6801282768249511} +{"epoch": 0, "iter": 20235, "iter_tflops": 41.37202148008435, "iter_time": 0.49867259979248046, "loss": 0.396435022354126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.10189552837563, "step_time": 0.45743295860290534} +{"epoch": 0, "iter": 20236, "iter_tflops": 43.45925857763696, "iter_time": 0.47472262954711913, "loss": 0.4947499930858612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.716683844980636, "step_time": 0.44162153244018554} +{"epoch": 0, "iter": 20237, "iter_tflops": 27.25281225978855, "iter_time": 0.757026222229004, "loss": 0.37786057591438293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.885046590203604, "step_time": 0.7142482337951661} +{"epoch": 0, "iter": 20238, "iter_tflops": 13.090304764017175, "iter_time": 1.576059066772461, "loss": 0.3465275168418884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.64558265941077, "step_time": 1.2394335441589355} +{"epoch": 0, "iter": 20239, "iter_tflops": 37.51965210336917, "iter_time": 0.5498743286132813, "loss": 0.2990339696407318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.231372175231655, "step_time": 0.5003736820220948} +{"epoch": 0, "iter": 20240, "iter_tflops": 38.373638002918575, "iter_time": 0.5376371536254883, "loss": 0.29121866822242737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.624689438801205, "step_time": 0.4956455841064453} +{"epoch": 0, "iter": 20241, "iter_tflops": 33.64638338358381, "iter_time": 0.6131741790771483, "loss": 0.033660512417554855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65719062937555, "step_time": 0.5478659763336181} +{"epoch": 0, "iter": 20242, "iter_tflops": 37.35104177892345, "iter_time": 0.552356575012207, "loss": 0.05183311924338341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.681829536910456, "step_time": 0.494966121673584} +{"epoch": 0, "iter": 20243, "iter_tflops": 39.01546512523064, "iter_time": 0.528792709350586, "loss": 0.043686941266059875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19009462262802, "step_time": 0.47768113708496096} +{"epoch": 0, "iter": 20244, "iter_tflops": 42.25413963430791, "iter_time": 0.48826206588745114, "loss": 0.029654119163751602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27807315027771, "step_time": 0.44580709838867183} +{"epoch": 0, "iter": 20245, "iter_tflops": 21.337815025979243, "iter_time": 0.9668793869018555, "loss": 0.5029655694961548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.645871500106278, "step_time": 0.9110311126708984} +{"epoch": 0, "iter": 20246, "iter_tflops": 17.26856420124311, "iter_time": 1.1947196807861329, "loss": 0.37710919976234436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.725033284035728, "step_time": 0.9078575706481933} +{"epoch": 0, "iter": 20247, "iter_tflops": 34.39533932549096, "iter_time": 0.5998223571777344, "loss": 0.4512975811958313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.244462763577715, "step_time": 0.5539372024536132} +{"epoch": 0, "iter": 20248, "iter_tflops": 32.098691804152395, "iter_time": 0.642739387512207, "loss": 0.4520932137966156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.99403688285221, "step_time": 0.5895602607727052} +{"epoch": 0, "iter": 20249, "iter_tflops": 14.25769454355987, "iter_time": 1.4470146942138673, "loss": 0.569448709487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.210504337009512, "step_time": 1.3563714294433593} +{"epoch": 0, "iter": 20250, "iter_tflops": 31.90518073108125, "iter_time": 0.6466377258300782, "loss": 0.5103013515472412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60403668492777, "step_time": 0.5636289157867431} +{"epoch": 0, "iter": 20251, "iter_tflops": 48.07226488129444, "iter_time": 0.429168327331543, "loss": 0.572251558303833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.15961329622893, "step_time": 0.39553770065307614} +{"epoch": 0, "iter": 20252, "iter_tflops": 49.55326865462681, "iter_time": 0.41634172821044924, "loss": 0.5931167602539062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.383957107117745, "step_time": 0.3864661712646484} +{"epoch": 0, "iter": 20253, "iter_tflops": 25.181782088562237, "iter_time": 0.8192864761352538, "loss": 0.016076721251010895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.51693006688371, "step_time": 0.7780347671508789} +{"epoch": 0, "iter": 20254, "iter_tflops": 14.870540342836, "iter_time": 1.3873802185058595, "loss": 0.0033310900907963514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.64720646569794, "step_time": 1.1690855178833006} +{"epoch": 0, "iter": 20255, "iter_tflops": 53.09188092762438, "iter_time": 0.38859225082397464, "loss": 0.005534985568374395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.51183161814479, "step_time": 0.35259695243835454} +{"epoch": 0, "iter": 20256, "iter_tflops": 55.85289855425687, "iter_time": 0.369382682800293, "loss": 0.0020412507001310587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.182313983556156, "step_time": 0.33720681953430176} +{"epoch": 0, "iter": 20257, "iter_tflops": 24.193491775792346, "iter_time": 0.8527538604736328, "loss": 0.3346271812915802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.47106760190356, "step_time": 0.8099814987182619} +{"epoch": 0, "iter": 20258, "iter_tflops": 17.29758543691204, "iter_time": 1.1927152252197266, "loss": 0.5700517892837524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.391766480660873, "step_time": 1.0117364540100098} +{"epoch": 0, "iter": 20259, "iter_tflops": 48.11468602168455, "iter_time": 0.42878994369506834, "loss": 0.4347902834415436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32099051524567, "step_time": 0.3943177165985107} +{"epoch": 0, "iter": 20260, "iter_tflops": 48.96792905777608, "iter_time": 0.4213184814453125, "loss": 0.45708033442497253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.84250287684984, "step_time": 0.3904261226654053} +{"epoch": 0, "iter": 20261, "iter_tflops": 33.2426776335036, "iter_time": 0.6206206893920898, "loss": 0.5967397689819336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.53673112525148, "step_time": 0.5805568733215332} +{"epoch": 0, "iter": 20262, "iter_tflops": 34.36791621697428, "iter_time": 0.6003009719848633, "loss": 0.5478507876396179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.55964145789104, "step_time": 0.5492888832092285} +{"epoch": 0, "iter": 20263, "iter_tflops": 40.50403578377873, "iter_time": 0.5093589591979981, "loss": 0.5527551770210266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.790145493719535, "step_time": 0.4711355323791504} +{"epoch": 0, "iter": 20264, "iter_tflops": 37.11060512003794, "iter_time": 0.5559352493286134, "loss": 0.592504620552063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51838718564921, "step_time": 0.5091785469055176} +{"epoch": 0, "iter": 20265, "iter_tflops": 13.103322540519247, "iter_time": 1.3756636505126953, "loss": 0.0495755597949028, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 14.078525864773964, "step_time": 1.2803730087280274} +{"epoch": 0, "iter": 20266, "iter_tflops": 18.000505006955155, "iter_time": 1.0014032669067383, "loss": 0.04743051901459694, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 21.72365811234076, "step_time": 0.8297757415771485} +{"epoch": 0, "iter": 20267, "iter_tflops": 44.93776199413594, "iter_time": 0.40112733078002927, "loss": 0.07572554796934128, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 49.03177546575142, "step_time": 0.3676343421936035} +{"epoch": 0, "iter": 20268, "iter_tflops": 44.28165122155275, "iter_time": 0.40707073974609376, "loss": 0.061257004737854004, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 48.276975225821246, "step_time": 0.3733822269439697} +{"epoch": 0, "iter": 20269, "iter_tflops": 26.493134493141966, "iter_time": 0.7787335815429688, "loss": 0.0568840391933918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.946070107941207, "step_time": 0.7382466812133789} +{"epoch": 0, "iter": 20270, "iter_tflops": 19.170843384106924, "iter_time": 1.0761703643798826, "loss": 0.09748251736164093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.578785847290707, "step_time": 0.6974962940216064} +{"epoch": 0, "iter": 20271, "iter_tflops": 55.10701639706578, "iter_time": 0.37438233566284185, "loss": 0.05547862499952316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.80732474896667, "step_time": 0.3449593105316162} +{"epoch": 0, "iter": 20272, "iter_tflops": 50.203392353851875, "iter_time": 0.41095018768310543, "loss": 0.06026213616132736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.639449537862205, "step_time": 0.3775860424041748} +{"epoch": 0, "iter": 20273, "iter_tflops": 24.75143646528633, "iter_time": 0.8335311584472657, "loss": 0.05005751922726631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.00357713373462, "step_time": 0.7933944396972656} +{"epoch": 0, "iter": 20274, "iter_tflops": 13.51815942944315, "iter_time": 1.5261762237548828, "loss": 0.018638191744685173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.538304359835333, "step_time": 1.2474733238220213} +{"epoch": 0, "iter": 20275, "iter_tflops": 43.95837491135845, "iter_time": 0.46933248901367186, "loss": 0.07701170444488525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54333768659774, "step_time": 0.425003604888916} +{"epoch": 0, "iter": 20276, "iter_tflops": 44.67447253119952, "iter_time": 0.46180944824218745, "loss": 0.04768074303865433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78020148308897, "step_time": 0.4229398994445801} +{"epoch": 0, "iter": 20277, "iter_tflops": 18.962265696784964, "iter_time": 1.088007827758789, "loss": 0.21758586168289185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.23726235305793, "step_time": 1.0194606933593748} +{"epoch": 0, "iter": 20278, "iter_tflops": 16.185621718019654, "iter_time": 1.2746556091308594, "loss": 0.4324205219745636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.428348961815274, "step_time": 0.9198667964935302} +{"epoch": 0, "iter": 20279, "iter_tflops": 53.80717899380363, "iter_time": 0.3834264106750488, "loss": 0.38665276765823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.60625287062291, "step_time": 0.35202887916564946} +{"epoch": 0, "iter": 20280, "iter_tflops": 51.71073440206295, "iter_time": 0.39897119522094726, "loss": 0.4391554892063141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.98776355217818, "step_time": 0.3684929027557373} +{"epoch": 0, "iter": 20281, "iter_tflops": 33.95006317318665, "iter_time": 0.6076893997192382, "loss": 0.15199755132198334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.28223053714911, "step_time": 0.5686280364990235} +{"epoch": 0, "iter": 20282, "iter_tflops": 18.713721980607737, "iter_time": 1.1024580535888673, "loss": 0.20702126622200012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.86320663228029, "step_time": 0.9888745231628419} +{"epoch": 0, "iter": 20283, "iter_tflops": 49.80394509852767, "iter_time": 0.4142461700439453, "loss": 0.1555052101612091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.344612386995706, "step_time": 0.37963456916809085} +{"epoch": 0, "iter": 20284, "iter_tflops": 48.783145073192756, "iter_time": 0.42291437911987306, "loss": 0.1673443764448166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.97676359927843, "step_time": 0.38943665313720705} +{"epoch": 0, "iter": 20285, "iter_tflops": 25.25915279329422, "iter_time": 0.8167769393920898, "loss": 0.04690416157245636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.55810482037355, "step_time": 0.7768285293579101} +{"epoch": 0, "iter": 20286, "iter_tflops": 13.241902165976756, "iter_time": 1.558015853881836, "loss": 0.019118934869766235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.194296300920804, "step_time": 1.1998800735473631} +{"epoch": 0, "iter": 20287, "iter_tflops": 40.312635489042215, "iter_time": 0.5117773437500001, "loss": 0.02021227777004242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.59539877464668, "step_time": 0.4626282997131348} +{"epoch": 0, "iter": 20288, "iter_tflops": 46.06061341201915, "iter_time": 0.4479118270874024, "loss": 0.034523140639066696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.4853994312025, "step_time": 0.40865465545654295} +{"epoch": 0, "iter": 20289, "iter_tflops": 19.698075560163556, "iter_time": 0.6369430236816406, "loss": 0.012943399138748646, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 22.05168008328938, "step_time": 0.5689612655639648} +{"epoch": 0, "iter": 20290, "iter_tflops": 26.72772617745961, "iter_time": 0.4694208450317383, "loss": 0.022909170016646385, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 29.62270579087883, "step_time": 0.42354509735107415} +{"epoch": 0, "iter": 20291, "iter_tflops": 25.43958815936125, "iter_time": 0.49319005203247074, "loss": 0.050729457288980484, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 28.105678331826276, "step_time": 0.4464062976837158} +{"epoch": 0, "iter": 20292, "iter_tflops": 29.03796545681285, "iter_time": 0.4320740661621094, "loss": 0.01402103342115879, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 31.948116205647853, "step_time": 0.39271648216247557} +{"epoch": 0, "iter": 20293, "iter_tflops": 25.912162278517396, "iter_time": 0.7961934356689452, "loss": 0.02656368352472782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.49778538949795, "step_time": 0.7239542732238771} +{"epoch": 0, "iter": 20294, "iter_tflops": 49.72849611153145, "iter_time": 0.4148746719360351, "loss": 0.058708444237709045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.515067760109346, "step_time": 0.3784475440979004} +{"epoch": 0, "iter": 20295, "iter_tflops": 52.950874909706485, "iter_time": 0.38962705612182624, "loss": 0.012543625198304653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.96539602613822, "step_time": 0.35592085838317866} +{"epoch": 0, "iter": 20296, "iter_tflops": 55.53388318019276, "iter_time": 0.37150460815429687, "loss": 0.01708618365228176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.43179657789602, "step_time": 0.34139467430114745} +{"epoch": 0, "iter": 20297, "iter_tflops": 39.22165435876541, "iter_time": 0.5260128326416015, "loss": 0.12843957543373108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.292555339475555, "step_time": 0.4878185615539551} +{"epoch": 0, "iter": 20298, "iter_tflops": 24.35129808766264, "iter_time": 0.847227668762207, "loss": 0.16528956592082977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.916249613576305, "step_time": 0.7134775009155274} +{"epoch": 0, "iter": 20299, "iter_tflops": 51.3611138341379, "iter_time": 0.4016870346069336, "loss": 0.18509092926979065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.02495327140672, "step_time": 0.36824829483032223} +{"epoch": 0, "iter": 20300, "iter_tflops": 53.765787873519976, "iter_time": 0.38372158813476565, "loss": 0.24525023996829987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.29738442405985, "step_time": 0.3538939819335938} +{"epoch": 0, "iter": 20301, "iter_tflops": 44.50869112972931, "iter_time": 0.4635295486450195, "loss": 0.024584218859672546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04999331813043, "step_time": 0.4206135845184326} +{"epoch": 0, "iter": 20302, "iter_tflops": 48.51124189791348, "iter_time": 0.4252847938537598, "loss": 0.03971341997385025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.35120128524999, "step_time": 0.3795885467529297} +{"epoch": 0, "iter": 20303, "iter_tflops": 50.92247722601086, "iter_time": 0.40514709091186524, "loss": 0.02307291328907013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.60721774825987, "step_time": 0.37101466941833505} +{"epoch": 0, "iter": 20304, "iter_tflops": 53.183745164459225, "iter_time": 0.38792103576660153, "loss": 0.02682318724691868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.70698733653709, "step_time": 0.35751465225219725} +{"epoch": 0, "iter": 20305, "iter_tflops": 40.80959228956448, "iter_time": 0.5055452003479005, "loss": 0.054113153368234634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.265125545432056, "step_time": 0.46608008575439447} +{"epoch": 0, "iter": 20306, "iter_tflops": 36.3773939677551, "iter_time": 0.5671405029296875, "loss": 0.03807467967271805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.195502013608774, "step_time": 0.5132687110900879} +{"epoch": 0, "iter": 20307, "iter_tflops": 39.381312897984714, "iter_time": 0.5238802871704101, "loss": 0.078892283141613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.4401571619177, "step_time": 0.4749313735961914} +{"epoch": 0, "iter": 20308, "iter_tflops": 44.54570382618627, "iter_time": 0.46314440536499024, "loss": 0.050752200186252594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.110422774275776, "step_time": 0.42009602737426754} +{"epoch": 0, "iter": 20309, "iter_tflops": 22.705664014177675, "iter_time": 0.9086320266723633, "loss": 0.037355728447437286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.41519768207911, "step_time": 0.8450102996826171} +{"epoch": 0, "iter": 20310, "iter_tflops": 23.609166759706085, "iter_time": 0.8738594512939452, "loss": 0.06699618697166443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.60909331725221, "step_time": 0.7472571907043456} +{"epoch": 0, "iter": 20311, "iter_tflops": 43.149829773803816, "iter_time": 0.47812688064575193, "loss": 0.018964480608701706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42243378283247, "step_time": 0.4350492343902588} +{"epoch": 0, "iter": 20312, "iter_tflops": 43.29250898093294, "iter_time": 0.47655111694335933, "loss": 0.025065645575523376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68350415949745, "step_time": 0.4326673107147217} +{"epoch": 0, "iter": 20313, "iter_tflops": 29.735632515294736, "iter_time": 0.6938172073364257, "loss": 0.23089294135570526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.987876934192258, "step_time": 0.6449660148620606} +{"epoch": 0, "iter": 20314, "iter_tflops": 7.12806630168117, "iter_time": 2.894346466064453, "loss": 0.2736191153526306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.32555372790807, "step_time": 2.2123183364868164} +{"epoch": 0, "iter": 20315, "iter_tflops": 14.088223701786953, "iter_time": 1.4644212036132813, "loss": 0.17435815930366516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.826878302580733, "step_time": 1.3035478706359864} +{"epoch": 0, "iter": 20316, "iter_tflops": 47.15501635953903, "iter_time": 0.4375164108276367, "loss": 0.1903158575296402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44755807264358, "step_time": 0.40101210403442383} +{"epoch": 0, "iter": 20317, "iter_tflops": 15.347007141376114, "iter_time": 1.072740180969238, "loss": 0.3748442530632019, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 15.981397211330522, "step_time": 1.0301571884155274} +{"epoch": 0, "iter": 20318, "iter_tflops": 7.764965026021673, "iter_time": 2.1202093200683594, "loss": 0.523456871509552, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 9.367089375688433, "step_time": 1.7575738372802734} +{"epoch": 0, "iter": 20319, "iter_tflops": 11.676172512154611, "iter_time": 1.4099955444335937, "loss": 0.3150363266468048, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 13.575237895161733, "step_time": 1.2127486343383789} +{"epoch": 0, "iter": 20320, "iter_tflops": 20.277956430924938, "iter_time": 0.8118841400146484, "loss": 0.2523064613342285, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 24.532296026987705, "step_time": 0.6710888862609864} +{"epoch": 0, "iter": 20321, "iter_tflops": 14.195011387541257, "iter_time": 1.1626887512207031, "loss": 0.29736196994781494, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 14.811695152617219, "step_time": 1.1142802963256835} +{"epoch": 0, "iter": 20322, "iter_tflops": 15.553062616643215, "iter_time": 1.0611659240722655, "loss": 0.3703227639198303, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 18.81241740015563, "step_time": 0.8773130912780761} +{"epoch": 0, "iter": 20323, "iter_tflops": 23.856491176969456, "iter_time": 0.6918192596435547, "loss": 0.3277345895767212, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 25.827941214478745, "step_time": 0.639012607574463} +{"epoch": 0, "iter": 20324, "iter_tflops": 23.824285691890328, "iter_time": 0.6927544555664062, "loss": 0.15600542724132538, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 25.563064376482437, "step_time": 0.6456338653564453} +{"epoch": 0, "iter": 20325, "iter_tflops": 30.191242690528924, "iter_time": 0.6833469467163086, "loss": 0.4785429835319519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.15401572180033, "step_time": 0.6222803802490234} +{"epoch": 0, "iter": 20326, "iter_tflops": 37.51540415102171, "iter_time": 0.5499365921020507, "loss": 0.4363350570201874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.901283094101466, "step_time": 0.5044118900299072} +{"epoch": 0, "iter": 20327, "iter_tflops": 34.14697088066443, "iter_time": 0.604185173034668, "loss": 0.5956379771232605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.8232485264514, "step_time": 0.5602735862731933} +{"epoch": 0, "iter": 20328, "iter_tflops": 35.81184080496474, "iter_time": 0.5760969848632812, "loss": 0.5136494636535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.883745789646774, "step_time": 0.5305840034484863} +{"epoch": 0, "iter": 20329, "iter_tflops": 26.759259041615934, "iter_time": 0.770988967895508, "loss": 0.023870522156357765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.59385158468416, "step_time": 0.7215220184326173} +{"epoch": 0, "iter": 20330, "iter_tflops": 16.37077672404914, "iter_time": 1.2602391357421876, "loss": 0.02414928562939167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.585322150347647, "step_time": 1.0533956680297851} +{"epoch": 0, "iter": 20331, "iter_tflops": 41.16563314583435, "iter_time": 0.5011727485656738, "loss": 0.04460221156477928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.568020038803255, "step_time": 0.4527537841796876} +{"epoch": 0, "iter": 20332, "iter_tflops": 42.057367168152346, "iter_time": 0.4905464820861816, "loss": 0.019526800140738487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.168900796667224, "step_time": 0.4468612670898437} +{"epoch": 0, "iter": 20333, "iter_tflops": 28.200135888329797, "iter_time": 0.7315955352783203, "loss": 0.4385758340358734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.516427655318278, "step_time": 0.6760651588439942} +{"epoch": 0, "iter": 20334, "iter_tflops": 8.62734350254184, "iter_time": 2.391361083984375, "loss": 0.48993420600891113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.015951201777295, "step_time": 2.059823684692383} +{"epoch": 0, "iter": 20335, "iter_tflops": 11.013989318350866, "iter_time": 1.8731717376708987, "loss": 0.47895348072052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.535623782844711, "step_time": 1.52420707321167} +{"epoch": 0, "iter": 20336, "iter_tflops": 35.16924699233571, "iter_time": 0.5866231231689453, "loss": 0.46210527420043945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.145796351919884, "step_time": 0.5408484153747558} +{"epoch": 0, "iter": 20337, "iter_tflops": 13.769830632254552, "iter_time": 1.153931610107422, "loss": 0.3859248161315918, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.640837284322505, "step_time": 1.0852823867797852} +{"epoch": 0, "iter": 20338, "iter_tflops": 19.726640227841745, "iter_time": 0.8054814529418945, "loss": 0.3439420759677887, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 23.980182708140326, "step_time": 0.6626072463989258} +{"epoch": 0, "iter": 20339, "iter_tflops": 24.54317576092262, "iter_time": 0.6474077758789063, "loss": 0.24451404809951782, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 26.42004466445212, "step_time": 0.6014161987304687} +{"epoch": 0, "iter": 20340, "iter_tflops": 23.913972515884737, "iter_time": 0.6644417953491212, "loss": 0.3801999092102051, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 25.751092164356155, "step_time": 0.6170395698547364} +{"epoch": 0, "iter": 20341, "iter_tflops": 19.97807113188226, "iter_time": 1.0326869583129883, "loss": 0.007367040496319532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.443378782737923, "step_time": 0.9621195297241212} +{"epoch": 0, "iter": 20342, "iter_tflops": 19.654188197720227, "iter_time": 1.0497046890258788, "loss": 0.002589636016637087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.259003678785973, "step_time": 0.8167817611694336} +{"epoch": 0, "iter": 20343, "iter_tflops": 45.836917505155114, "iter_time": 0.4500977516174316, "loss": 0.00804804265499115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.691337075563446, "step_time": 0.4069944629669189} +{"epoch": 0, "iter": 20344, "iter_tflops": 47.62106941649282, "iter_time": 0.4332345695495605, "loss": 0.003968645352870226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.273873594801074, "step_time": 0.3946731338500976} +{"epoch": 0, "iter": 20345, "iter_tflops": 16.906530805665177, "iter_time": 1.220303192138672, "loss": 0.13581889867782593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.38285162616077, "step_time": 1.122301040649414} +{"epoch": 0, "iter": 20346, "iter_tflops": 23.752889792958857, "iter_time": 0.8685719375610351, "loss": 0.10477815568447113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.888356260389045, "step_time": 0.7672872714996338} +{"epoch": 0, "iter": 20347, "iter_tflops": 48.495132511613164, "iter_time": 0.42542606735229493, "loss": 0.18648773431777954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.36045890922952, "step_time": 0.394020486831665} +{"epoch": 0, "iter": 20348, "iter_tflops": 51.78094728759196, "iter_time": 0.39843020629882814, "loss": 0.1459100991487503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92096627871713, "step_time": 0.3689330654144287} +{"epoch": 0, "iter": 20349, "iter_tflops": 33.74386233493769, "iter_time": 0.6114028472900391, "loss": 0.16298578679561615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.1614107931839, "step_time": 0.5705278930664062} +{"epoch": 0, "iter": 20350, "iter_tflops": 17.090276009972218, "iter_time": 1.2071831665039063, "loss": 0.1319435089826584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.632200285491678, "step_time": 0.9115814304351807} +{"epoch": 0, "iter": 20351, "iter_tflops": 40.40683271064806, "iter_time": 0.5105842781066895, "loss": 0.11243609338998795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.214121206743734, "step_time": 0.4666177444458008} +{"epoch": 0, "iter": 20352, "iter_tflops": 39.482864523415046, "iter_time": 0.522532844543457, "loss": 0.12098505347967148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.92441136974414, "step_time": 0.480637773513794} +{"epoch": 0, "iter": 20353, "iter_tflops": 17.94300680340839, "iter_time": 1.1498125, "loss": 0.6068264245986938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.479379646991795, "step_time": 1.0591247711181642} +{"epoch": 0, "iter": 20354, "iter_tflops": 13.767777317404779, "iter_time": 1.4985057525634764, "loss": 0.4908835291862488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.298897933147657, "step_time": 1.2657968406677245} +{"epoch": 0, "iter": 20355, "iter_tflops": 33.564603820306495, "iter_time": 0.6146681671142578, "loss": 0.5566840171813965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.31555035238447, "step_time": 0.568106315612793} +{"epoch": 0, "iter": 20356, "iter_tflops": 36.14907770200799, "iter_time": 0.570722541809082, "loss": 0.5596400499343872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.214272698815975, "step_time": 0.5261118488311767} +{"epoch": 0, "iter": 20357, "iter_tflops": 27.271330390504506, "iter_time": 0.7565121765136719, "loss": 0.020846176892518997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.56857172728765, "step_time": 0.6977372360229492} +{"epoch": 0, "iter": 20358, "iter_tflops": 10.023672822801224, "iter_time": 2.0582369232177737, "loss": 0.022910237312316895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.43139853752625, "step_time": 1.6595955352783205} +{"epoch": 0, "iter": 20359, "iter_tflops": 9.016729254439062, "iter_time": 2.288090606689453, "loss": 0.03231228142976761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.9183206557223, "step_time": 1.8895848693847657} +{"epoch": 0, "iter": 20360, "iter_tflops": 31.648816508608153, "iter_time": 0.6518756713867188, "loss": 0.030952055007219315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.499710867788515, "step_time": 0.5358765830993653} +{"epoch": 0, "iter": 20361, "iter_tflops": 13.659361039898142, "iter_time": 1.2654197082519532, "loss": 0.31159085035324097, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 14.73657875175077, "step_time": 1.1729197769165038} +{"epoch": 0, "iter": 20362, "iter_tflops": 18.924782096657, "iter_time": 0.913343391418457, "loss": 0.35405856370925903, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 22.76812141064083, "step_time": 0.7591678009033204} +{"epoch": 0, "iter": 20363, "iter_tflops": 31.083199176989584, "iter_time": 0.5560825500488281, "loss": 0.3921146094799042, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 33.07049388633733, "step_time": 0.52266605758667} +{"epoch": 0, "iter": 20364, "iter_tflops": 32.2104510260491, "iter_time": 0.5366216278076172, "loss": 0.2865552008152008, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 34.17272382399816, "step_time": 0.5058076362609863} +{"epoch": 0, "iter": 20365, "iter_tflops": 20.820274756075968, "iter_time": 0.9909136047363281, "loss": 0.5449767708778381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.778552840665462, "step_time": 0.9473124160766602} +{"epoch": 0, "iter": 20366, "iter_tflops": 15.56237433321151, "iter_time": 1.3257034606933593, "loss": 0.5077589154243469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.87215955858094, "step_time": 1.093202579498291} +{"epoch": 0, "iter": 20367, "iter_tflops": 35.01289495033103, "iter_time": 0.5892427215576171, "loss": 0.5369067192077637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17468891385299, "step_time": 0.5404390735626221} +{"epoch": 0, "iter": 20368, "iter_tflops": 37.705518588010015, "iter_time": 0.5471637649536133, "loss": 0.5402727723121643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.279879589529656, "step_time": 0.4997856998443604} +{"epoch": 0, "iter": 20369, "iter_tflops": 26.90080769572614, "iter_time": 0.7669321212768555, "loss": 0.47714000940322876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.36885769593171, "step_time": 0.7024819869995117} +{"epoch": 0, "iter": 20370, "iter_tflops": 14.383252377829077, "iter_time": 1.434383056640625, "loss": 0.5625298619270325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.974773079067468, "step_time": 1.2153973083496092} +{"epoch": 0, "iter": 20371, "iter_tflops": 34.462879101113984, "iter_time": 0.5986468353271485, "loss": 0.45478299260139465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.30531038295952, "step_time": 0.55303369140625} +{"epoch": 0, "iter": 20372, "iter_tflops": 37.91214279154189, "iter_time": 0.5441816787719727, "loss": 0.5206700563430786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.088896686312964, "step_time": 0.5021087245941162} +{"epoch": 0, "iter": 20373, "iter_tflops": 18.37416780953713, "iter_time": 1.1228314514160156, "loss": 0.20689067244529724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.848565569195895, "step_time": 1.0394249114990235} +{"epoch": 0, "iter": 20374, "iter_tflops": 17.40851306522103, "iter_time": 1.1851152038574218, "loss": 0.2308596670627594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.873777852775532, "step_time": 0.9883737220764159} +{"epoch": 0, "iter": 20375, "iter_tflops": 48.493141004902654, "iter_time": 0.42544353866577145, "loss": 0.23386047780513763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37109359187014, "step_time": 0.3939404754638671} +{"epoch": 0, "iter": 20376, "iter_tflops": 48.074673659395465, "iter_time": 0.42914682388305664, "loss": 0.20753873884677887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72198696603221, "step_time": 0.3988843955993653} +{"epoch": 0, "iter": 20377, "iter_tflops": 25.126052755478145, "iter_time": 0.8211036453247071, "loss": 0.34848153591156006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.572690764648037, "step_time": 0.7764021224975586} +{"epoch": 0, "iter": 20378, "iter_tflops": 12.893636887560273, "iter_time": 1.600098846435547, "loss": 0.522122859954834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.138080633524265, "step_time": 1.2784106101989745} +{"epoch": 0, "iter": 20379, "iter_tflops": 47.61591161289648, "iter_time": 0.43328149795532217, "loss": 0.3063935935497284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.58749445263361, "step_time": 0.39231938552856443} +{"epoch": 0, "iter": 20380, "iter_tflops": 48.48935585377998, "iter_time": 0.42547674942016606, "loss": 0.3605596423149109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43553647866871, "step_time": 0.39345632553100585} +{"epoch": 0, "iter": 20381, "iter_tflops": 34.04728493195129, "iter_time": 0.6059541473388673, "loss": 0.48427096009254456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.85462426345208, "step_time": 0.5597966041564941} +{"epoch": 0, "iter": 20382, "iter_tflops": 45.387914209213875, "iter_time": 0.4545503768920899, "loss": 0.3965860903263092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69871394597253, "step_time": 0.4069352436065674} +{"epoch": 0, "iter": 20383, "iter_tflops": 50.45410109672637, "iter_time": 0.40890815734863273, "loss": 0.3471202254295349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.33509927404249, "step_time": 0.3797010364532471} +{"epoch": 0, "iter": 20384, "iter_tflops": 49.93076876567951, "iter_time": 0.4131939888000489, "loss": 0.3884180188179016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.72287673198434, "step_time": 0.3840280857086181} +{"epoch": 0, "iter": 20385, "iter_tflops": 24.01153566320352, "iter_time": 0.8592159118652344, "loss": 0.24150918424129486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.262887364576894, "step_time": 0.8166561965942384} +{"epoch": 0, "iter": 20386, "iter_tflops": 17.85293106509483, "iter_time": 1.155613800048828, "loss": 0.2536606788635254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.97324437964845, "step_time": 0.9389188575744629} +{"epoch": 0, "iter": 20387, "iter_tflops": 36.79249758684626, "iter_time": 0.5607418594360352, "loss": 0.29839858412742615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29013089902245, "step_time": 0.5120632038116455} +{"epoch": 0, "iter": 20388, "iter_tflops": 40.309286581383176, "iter_time": 0.5118198623657226, "loss": 0.23906295001506805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.228000190764504, "step_time": 0.4664713172912598} +{"epoch": 0, "iter": 20389, "iter_tflops": 16.099937213700915, "iter_time": 1.2814393768310548, "loss": 0.4510791599750519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98975467060056, "step_time": 1.2143255691528319} +{"epoch": 0, "iter": 20390, "iter_tflops": 15.847342602673622, "iter_time": 1.3018645477294921, "loss": 0.35379883646965027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.972415890606072, "step_time": 0.9837251758575439} +{"epoch": 0, "iter": 20391, "iter_tflops": 38.76301752900975, "iter_time": 0.5322365188598632, "loss": 0.37432339787483215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30864406850773, "step_time": 0.4876330585479737} +{"epoch": 0, "iter": 20392, "iter_tflops": 39.754410496853566, "iter_time": 0.518963638305664, "loss": 0.4016580879688263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.591500400656294, "step_time": 0.47328248214721685} +{"epoch": 0, "iter": 20393, "iter_tflops": 34.373405379376436, "iter_time": 0.600205108642578, "loss": 0.07450897246599197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.193552244048064, "step_time": 0.5401721572875976} +{"epoch": 0, "iter": 20394, "iter_tflops": 39.12537136445813, "iter_time": 0.5273072891235352, "loss": 0.12150471657514572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94051890864502, "step_time": 0.46952321052551277} +{"epoch": 0, "iter": 20395, "iter_tflops": 41.48450792690509, "iter_time": 0.49732043457031244, "loss": 0.09921643137931824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.612252745458996, "step_time": 0.45231472396850586} +{"epoch": 0, "iter": 20396, "iter_tflops": 41.01966301960354, "iter_time": 0.5029561920166016, "loss": 0.11642736941576004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92533494976847, "step_time": 0.4592307109832764} +{"epoch": 0, "iter": 20397, "iter_tflops": 20.061159864348785, "iter_time": 1.0284098052978516, "loss": 0.002754207467660308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.39155875627691, "step_time": 0.9644502182006836} +{"epoch": 0, "iter": 20398, "iter_tflops": 21.37873936846922, "iter_time": 0.9650285339355468, "loss": 0.006753656547516584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.00461164878801, "step_time": 0.7933628768920898} +{"epoch": 0, "iter": 20399, "iter_tflops": 44.666012741563684, "iter_time": 0.4618969154357911, "loss": 0.035522107034921646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89377762183476, "step_time": 0.4135003299713135} +{"epoch": 0, "iter": 20400, "iter_tflops": 46.13869610332004, "iter_time": 0.44715380477905275, "loss": 0.0012746425345540047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22579556494269, "step_time": 0.4027481327056885} +{"epoch": 0, "iter": 20401, "iter_tflops": 22.00970967259634, "iter_time": 0.9373632736206055, "loss": 0.28860533237457275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.849843849679335, "step_time": 0.865041030883789} +{"epoch": 0, "iter": 20402, "iter_tflops": 22.365957151305995, "iter_time": 0.9224328460693361, "loss": 0.4121290445327759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.30591538258697, "step_time": 0.755553997039795} +{"epoch": 0, "iter": 20403, "iter_tflops": 43.85292275894909, "iter_time": 0.4704610824584961, "loss": 0.40789711475372314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.029471431659154, "step_time": 0.43868435859680177} +{"epoch": 0, "iter": 20404, "iter_tflops": 50.92718888464062, "iter_time": 0.4051096076965332, "loss": 0.41650158166885376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.96372451695374, "step_time": 0.37535836029052727} +{"epoch": 0, "iter": 20405, "iter_tflops": 26.881154777671853, "iter_time": 0.7674928283691407, "loss": 0.11103194952011108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.470918379936784, "step_time": 0.7246374435424804} +{"epoch": 0, "iter": 20406, "iter_tflops": 14.130893350109627, "iter_time": 1.4599992370605468, "loss": 0.24398860335350037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.904789570141844, "step_time": 1.220428886413574} +{"epoch": 0, "iter": 20407, "iter_tflops": 39.28767387370012, "iter_time": 0.5251289138793945, "loss": 0.2463711053133011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.151735894920414, "step_time": 0.47810576057434084} +{"epoch": 0, "iter": 20408, "iter_tflops": 40.51005379576136, "iter_time": 0.509283290863037, "loss": 0.22910235822200775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27237043874153, "step_time": 0.4660038146972656} +{"epoch": 0, "iter": 20409, "iter_tflops": 21.96007370357043, "iter_time": 0.9394819793701172, "loss": 0.12206130474805832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.80970251008872, "step_time": 0.8664994239807129} +{"epoch": 0, "iter": 20410, "iter_tflops": 25.249164833547884, "iter_time": 0.8171000366210938, "loss": 0.10424535721540451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.895952863671525, "step_time": 0.5912173709869385} +{"epoch": 0, "iter": 20411, "iter_tflops": 47.980109163452724, "iter_time": 0.42999263381958014, "loss": 0.12245353311300278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.90626872183521, "step_time": 0.3974682445526123} +{"epoch": 0, "iter": 20412, "iter_tflops": 47.521784442352775, "iter_time": 0.434139705657959, "loss": 0.1167190670967102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.41720188010294, "step_time": 0.40124885749816896} +{"epoch": 0, "iter": 20413, "iter_tflops": 41.52147257458998, "iter_time": 0.4968776931762695, "loss": 0.10360150784254074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.978809613203865, "step_time": 0.4586847381591797} +{"epoch": 0, "iter": 20414, "iter_tflops": 38.31900111402834, "iter_time": 0.5384037399291992, "loss": 0.18900786340236664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04786519883317, "step_time": 0.4122272434234619} +{"epoch": 0, "iter": 20415, "iter_tflops": 51.699683428982084, "iter_time": 0.3990564765930176, "loss": 0.16174203157424927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.19129091320811, "step_time": 0.3671582050323486} +{"epoch": 0, "iter": 20416, "iter_tflops": 50.411623637227095, "iter_time": 0.4092527084350586, "loss": 0.08485958725214005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.30907726593653, "step_time": 0.3798829689025879} +{"epoch": 0, "iter": 20417, "iter_tflops": 31.478516894311966, "iter_time": 0.6554023361206055, "loss": 0.3137718439102173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.42711067809275, "step_time": 0.6171964340209961} +{"epoch": 0, "iter": 20418, "iter_tflops": 36.55807840170965, "iter_time": 0.5643374710083008, "loss": 0.5189388990402222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.005884443587036, "step_time": 0.5031251926422119} +{"epoch": 0, "iter": 20419, "iter_tflops": 38.699172854604136, "iter_time": 0.5331145858764648, "loss": 0.36022382974624634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46058563932141, "step_time": 0.4858881053924561} +{"epoch": 0, "iter": 20420, "iter_tflops": 41.36541910200923, "iter_time": 0.4987521934509278, "loss": 0.3528752028942108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.13369261992133, "step_time": 0.4571106929779053} +{"epoch": 0, "iter": 20421, "iter_tflops": 20.05408280472455, "iter_time": 0.7698763275146484, "loss": 0.23453305661678314, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.895408848375524, "step_time": 0.7051324653625488} +{"epoch": 0, "iter": 20422, "iter_tflops": 23.167185851268368, "iter_time": 0.6664237823486329, "loss": 0.3904555141925812, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.002597742372576, "step_time": 0.6175023803710937} +{"epoch": 0, "iter": 20423, "iter_tflops": 23.372246585809663, "iter_time": 0.660576789855957, "loss": 0.3163834810256958, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.196574320505306, "step_time": 0.6127485198974609} +{"epoch": 0, "iter": 20424, "iter_tflops": 23.137163520460017, "iter_time": 0.6672885208129883, "loss": 0.4492930769920349, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.8720534730293, "step_time": 0.6207434234619141} +{"epoch": 0, "iter": 20425, "iter_tflops": 19.865011128980864, "iter_time": 1.0385644073486329, "loss": 0.3645573556423187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.187726370786372, "step_time": 0.9737285232543946} +{"epoch": 0, "iter": 20426, "iter_tflops": 24.758545001541233, "iter_time": 0.8332918395996094, "loss": 0.4975989758968353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.87557993143872, "step_time": 0.6682010040283203} +{"epoch": 0, "iter": 20427, "iter_tflops": 47.553874614291, "iter_time": 0.4338467407226562, "loss": 0.42035406827926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43942032966416, "step_time": 0.40107554435729986} +{"epoch": 0, "iter": 20428, "iter_tflops": 46.808471253839336, "iter_time": 0.44075555038452147, "loss": 0.43407168984413147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30052756998131, "step_time": 0.41015660285949707} +{"epoch": 0, "iter": 20429, "iter_tflops": 30.302758888167325, "iter_time": 0.6808321838378906, "loss": 0.4764884114265442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.17963982483208, "step_time": 0.641122573852539} +{"epoch": 0, "iter": 20430, "iter_tflops": 18.331628030858155, "iter_time": 1.1254370574951171, "loss": 0.34306585788726807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.305448626813828, "step_time": 1.0160373153686524} +{"epoch": 0, "iter": 20431, "iter_tflops": 43.608507865093955, "iter_time": 0.47309790039062505, "loss": 0.40908151865005493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34896591083872, "step_time": 0.43572426795959474} +{"epoch": 0, "iter": 20432, "iter_tflops": 50.628677825820525, "iter_time": 0.4074981689453125, "loss": 0.3192809820175171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.68706221822413, "step_time": 0.37725730133056634} +{"epoch": 0, "iter": 20433, "iter_tflops": 25.700768050147882, "iter_time": 0.8027422943115234, "loss": 0.11141707003116608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.961084054348913, "step_time": 0.7652175064086915} +{"epoch": 0, "iter": 20434, "iter_tflops": 26.18047417551469, "iter_time": 0.7880336074829102, "loss": 0.12387140095233917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.20716808037459, "step_time": 0.6405745906829835} +{"epoch": 0, "iter": 20435, "iter_tflops": 38.56356042051316, "iter_time": 0.5349893341064453, "loss": 0.13754300773143768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59210529614736, "step_time": 0.48438773727416995} +{"epoch": 0, "iter": 20436, "iter_tflops": 43.51814981859026, "iter_time": 0.47408020782470706, "loss": 0.08908550441265106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59840157368957, "step_time": 0.4334408893585205} +{"epoch": 0, "iter": 20437, "iter_tflops": 18.926742062455084, "iter_time": 1.0900499114990234, "loss": 0.053986795246601105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.025598148262183, "step_time": 1.030236068725586} +{"epoch": 0, "iter": 20438, "iter_tflops": 15.639603981280978, "iter_time": 1.319157028198242, "loss": 0.11887319386005402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.883306694868562, "step_time": 0.9879227371215821} +{"epoch": 0, "iter": 20439, "iter_tflops": 40.70891144453486, "iter_time": 0.5067955093383789, "loss": 0.08848025649785995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46839870198204, "step_time": 0.46394954872131344} +{"epoch": 0, "iter": 20440, "iter_tflops": 42.923665123369375, "iter_time": 0.4806461296081543, "loss": 0.07176479697227478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.679206713267014, "step_time": 0.441976095199585} +{"epoch": 0, "iter": 20441, "iter_tflops": 20.50141535454266, "iter_time": 0.9982295684814453, "loss": 0.055653322488069534, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 22.0845480320174, "step_time": 0.9266713981628417} +{"epoch": 0, "iter": 20442, "iter_tflops": 29.735442405342795, "iter_time": 0.6882399368286134, "loss": 0.04985442012548447, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 33.34982228119988, "step_time": 0.6136500167846679} +{"epoch": 0, "iter": 20443, "iter_tflops": 50.99696215322467, "iter_time": 0.40130074691772466, "loss": 0.03922906890511513, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 55.22511251346941, "step_time": 0.37057632064819335} +{"epoch": 0, "iter": 20444, "iter_tflops": 50.59740035524384, "iter_time": 0.4044697723388672, "loss": 0.050384946167469025, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 55.045146645913256, "step_time": 0.37178789138793944} +{"epoch": 0, "iter": 20445, "iter_tflops": 40.04579478169341, "iter_time": 0.515187515258789, "loss": 0.2866179049015045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2957355828673, "step_time": 0.4765156021118164} +{"epoch": 0, "iter": 20446, "iter_tflops": 41.71394240967066, "iter_time": 0.4945850791931152, "loss": 0.3306942880153656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.614806978799216, "step_time": 0.4522893962860108} +{"epoch": 0, "iter": 20447, "iter_tflops": 42.00729343879604, "iter_time": 0.49113122558593747, "loss": 0.2725641429424286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89736983000992, "step_time": 0.44950491905212403} +{"epoch": 0, "iter": 20448, "iter_tflops": 46.578173363288755, "iter_time": 0.44293479156494137, "loss": 0.3029366731643677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.188373083344494, "step_time": 0.4110731678009033} +{"epoch": 0, "iter": 20449, "iter_tflops": 19.357076462146928, "iter_time": 1.0658166046142579, "loss": 0.15570218861103058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.180686159644562, "step_time": 1.0223187332153318} +{"epoch": 0, "iter": 20450, "iter_tflops": 21.39737932112755, "iter_time": 0.9641878662109374, "loss": 0.10554429888725281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.906531768410336, "step_time": 0.7963664798736572} +{"epoch": 0, "iter": 20451, "iter_tflops": 48.91126262353544, "iter_time": 0.4218066024780273, "loss": 0.1459352821111679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15066240750422, "step_time": 0.38816249084472654} +{"epoch": 0, "iter": 20452, "iter_tflops": 49.87452587051569, "iter_time": 0.4136599426269531, "loss": 0.13193587958812714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.627501161995944, "step_time": 0.3847110729217529} +{"epoch": 0, "iter": 20453, "iter_tflops": 29.480210216031093, "iter_time": 0.6998285751342774, "loss": 0.1955445408821106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.291518471226812, "step_time": 0.6593190269470214} +{"epoch": 0, "iter": 20454, "iter_tflops": 22.066442243241415, "iter_time": 0.9349533233642577, "loss": 0.27565643191337585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.179396405875327, "step_time": 0.8193641014099121} +{"epoch": 0, "iter": 20455, "iter_tflops": 44.41512225077301, "iter_time": 0.46450606155395513, "loss": 0.23646578192710876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46970287537741, "step_time": 0.42564926719665525} +{"epoch": 0, "iter": 20456, "iter_tflops": 42.62465300341816, "iter_time": 0.4840178642272949, "loss": 0.4444277882575989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49608591246148, "step_time": 0.4437167797088623} +{"epoch": 0, "iter": 20457, "iter_tflops": 14.535399660104442, "iter_time": 1.4193688507080078, "loss": 0.26220703125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.337869282975111, "step_time": 1.345108184814453} +{"epoch": 0, "iter": 20458, "iter_tflops": 18.866333623057788, "iter_time": 1.0935401611328124, "loss": 0.2951698303222656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.385851475263205, "step_time": 0.8822040767669678} +{"epoch": 0, "iter": 20459, "iter_tflops": 46.665064177507624, "iter_time": 0.44211004257202147, "loss": 0.3060459792613983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3729827091722, "step_time": 0.4095666446685791} +{"epoch": 0, "iter": 20460, "iter_tflops": 46.511718550604726, "iter_time": 0.4435676460266113, "loss": 0.36919882893562317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.31940669125741, "step_time": 0.41000271797180177} +{"epoch": 0, "iter": 20461, "iter_tflops": 28.229806838354946, "iter_time": 0.7308265914916993, "loss": 0.5044435858726501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.863408583601583, "step_time": 0.6908485832214355} +{"epoch": 0, "iter": 20462, "iter_tflops": 16.965208085023196, "iter_time": 1.2160825500488284, "loss": 0.5241033434867859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.49136674464336, "step_time": 1.006818811416626} +{"epoch": 0, "iter": 20463, "iter_tflops": 40.674587074684574, "iter_time": 0.5072231826782226, "loss": 0.5562772750854492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27190510575396, "step_time": 0.4660087127685547} +{"epoch": 0, "iter": 20464, "iter_tflops": 36.30519308814386, "iter_time": 0.5682683868408203, "loss": 0.5185977816581726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71633083617989, "step_time": 0.5194612159729004} +{"epoch": 0, "iter": 20465, "iter_tflops": 25.965952637210616, "iter_time": 0.597743911743164, "loss": 0.02291947975754738, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.09512817666814, "step_time": 0.5334566669464111} +{"epoch": 0, "iter": 20466, "iter_tflops": 30.521416936173196, "iter_time": 0.5085278358459473, "loss": 0.05064797401428223, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 34.256791891583255, "step_time": 0.4530777473449707} +{"epoch": 0, "iter": 20467, "iter_tflops": 33.28144472107081, "iter_time": 0.46635565948486324, "loss": 0.03004436194896698, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 36.71156495838787, "step_time": 0.42278203392028807} +{"epoch": 0, "iter": 20468, "iter_tflops": 33.947661430570435, "iter_time": 0.4572035140991211, "loss": 0.047191232442855835, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 37.43808296304184, "step_time": 0.4145775871276855} +{"epoch": 0, "iter": 20469, "iter_tflops": 22.93765564616884, "iter_time": 0.8903977584838867, "loss": 0.0416073240339756, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 24.85727270455268, "step_time": 0.8216362838745118} +{"epoch": 0, "iter": 20470, "iter_tflops": 37.20157375907502, "iter_time": 0.5489992790222167, "loss": 0.05971142649650574, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 45.11712380361874, "step_time": 0.45268038940429683} +{"epoch": 0, "iter": 20471, "iter_tflops": 44.18559495086519, "iter_time": 0.46222388076782234, "loss": 0.034380488097667694, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 48.849959702624425, "step_time": 0.41808913040161133} +{"epoch": 0, "iter": 20472, "iter_tflops": 42.48848107016399, "iter_time": 0.48068645095825197, "loss": 0.045665234327316284, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 46.75952100310706, "step_time": 0.43678029060363766} +{"epoch": 0, "iter": 20473, "iter_tflops": 20.274746951004673, "iter_time": 1.0175758819580079, "loss": 0.45288577675819397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.423191974138998, "step_time": 0.9630261230468751} +{"epoch": 0, "iter": 20474, "iter_tflops": 8.114165759562558, "iter_time": 2.5426019287109374, "loss": 0.5124123096466064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.569279744258434, "step_time": 1.9519867019653319} +{"epoch": 0, "iter": 20475, "iter_tflops": 14.860628609915695, "iter_time": 1.3883055725097655, "loss": 0.3865590989589691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.91162341769571, "step_time": 1.1518271141052245} +{"epoch": 0, "iter": 20476, "iter_tflops": 43.367837959834816, "iter_time": 0.4757233581542969, "loss": 0.5379979014396667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76536499801926, "step_time": 0.4411618194580078} +{"epoch": 0, "iter": 20477, "iter_tflops": 26.158596091185405, "iter_time": 0.6246624603271484, "loss": 0.3459699749946594, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 28.034415295437945, "step_time": 0.5828654823303223} +{"epoch": 0, "iter": 20478, "iter_tflops": 29.37886080706276, "iter_time": 0.5561921920776367, "loss": 0.2694774568080902, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.34759843301506, "step_time": 0.5212613983154297} +{"epoch": 0, "iter": 20479, "iter_tflops": 30.42691651996372, "iter_time": 0.5370341415405273, "loss": 0.38349053263664246, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 32.434641762778924, "step_time": 0.5037913818359375} +{"epoch": 0, "iter": 20480, "iter_tflops": 28.21737904092094, "iter_time": 0.5790861358642578, "loss": 0.30858251452445984, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 30.109044090697164, "step_time": 0.5427038116455078} +{"epoch": 0, "iter": 20481, "iter_tflops": 24.336831360602073, "iter_time": 0.8477312927246093, "loss": 0.08089963346719742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.64966230427554, "step_time": 0.8043417205810547} +{"epoch": 0, "iter": 20482, "iter_tflops": 16.724438801909223, "iter_time": 1.233589584350586, "loss": 0.07190325856208801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.66895348314024, "step_time": 1.1051017684936524} +{"epoch": 0, "iter": 20483, "iter_tflops": 48.29069565248425, "iter_time": 0.4272270927429199, "loss": 0.08046036958694458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70735012149599, "step_time": 0.3914272575378418} +{"epoch": 0, "iter": 20484, "iter_tflops": 49.936673129336306, "iter_time": 0.41314513397216795, "loss": 0.10397734493017197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85903750160268, "step_time": 0.3830572261810303} +{"epoch": 0, "iter": 20485, "iter_tflops": 24.626819604220465, "iter_time": 0.8377490005493164, "loss": 0.3468094766139984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.876126163224328, "step_time": 0.7973022460937501} +{"epoch": 0, "iter": 20486, "iter_tflops": 21.4798678531987, "iter_time": 0.9604851226806641, "loss": 0.4067239761352539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.28877469827474, "step_time": 0.7847871856689452} +{"epoch": 0, "iter": 20487, "iter_tflops": 43.66123377600162, "iter_time": 0.4725265808105469, "loss": 0.3272545039653778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.88074407890202, "step_time": 0.4308849811553954} +{"epoch": 0, "iter": 20488, "iter_tflops": 36.79500924259246, "iter_time": 0.5607035827636718, "loss": 0.3373340368270874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.318235320748705, "step_time": 0.5117062625885009} +{"epoch": 0, "iter": 20489, "iter_tflops": 16.115942926458434, "iter_time": 1.2801667022705077, "loss": 0.28440403938293457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.45927075460451, "step_time": 1.1816698303222657} +{"epoch": 0, "iter": 20490, "iter_tflops": 17.008964497528222, "iter_time": 1.2129541168212892, "loss": 0.3718826174736023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.25509599241289, "step_time": 0.9706422176361084} +{"epoch": 0, "iter": 20491, "iter_tflops": 46.767869439131715, "iter_time": 0.4411381950378418, "loss": 0.342165470123291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.79127087646019, "step_time": 0.4061936855316162} +{"epoch": 0, "iter": 20492, "iter_tflops": 46.95637143570518, "iter_time": 0.43936728668212893, "loss": 0.36940228939056396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.816796140946316, "step_time": 0.4059896545410156} +{"epoch": 0, "iter": 20493, "iter_tflops": 42.76390609134901, "iter_time": 0.48244174575805665, "loss": 0.44436606764793396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70027338023365, "step_time": 0.44177671813964847} +{"epoch": 0, "iter": 20494, "iter_tflops": 45.62976592040891, "iter_time": 0.4521411209106445, "loss": 0.39141443371772766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.330654114118836, "step_time": 0.4182205543518066} +{"epoch": 0, "iter": 20495, "iter_tflops": 46.64469725524132, "iter_time": 0.44230308532714846, "loss": 0.4518214166164398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30814958497111, "step_time": 0.41009446144104} +{"epoch": 0, "iter": 20496, "iter_tflops": 47.1853144913676, "iter_time": 0.4372354774475097, "loss": 0.4899970293045044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86976887789104, "step_time": 0.40556688117980955} +{"epoch": 0, "iter": 20497, "iter_tflops": 34.30815932811509, "iter_time": 0.6013465576171875, "loss": 0.04377346485853195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.65122336863048, "step_time": 0.5629032707214355} +{"epoch": 0, "iter": 20498, "iter_tflops": 10.313562917575988, "iter_time": 2.0003847045898437, "loss": 0.07011023908853531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.908554872682563, "step_time": 1.73245987701416} +{"epoch": 0, "iter": 20499, "iter_tflops": 11.183620797260154, "iter_time": 1.844759750366211, "loss": 0.08330479264259338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.157183877463495, "step_time": 1.5680478210449218} +{"epoch": 0, "iter": 20500, "iter_tflops": 20.074854237840675, "iter_time": 1.0277082595825195, "loss": 0.03758545592427254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.961468538467347, "step_time": 0.8610112304687499} +{"epoch": 0, "iter": 20501, "iter_tflops": 16.877545092347642, "iter_time": 0.8929721679687499, "loss": 0.23853275179862976, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.517645189730615, "step_time": 0.8138819961547852} +{"epoch": 0, "iter": 20502, "iter_tflops": 23.400796425166536, "iter_time": 0.6440455169677735, "loss": 0.3275013267993927, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 25.173128585894343, "step_time": 0.5987010307312012} +{"epoch": 0, "iter": 20503, "iter_tflops": 23.179480436588186, "iter_time": 0.6501948165893554, "loss": 0.39079734683036804, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.954727886983044, "step_time": 0.603940788269043} +{"epoch": 0, "iter": 20504, "iter_tflops": 23.76902021668431, "iter_time": 0.6340681228637695, "loss": 0.5051578283309937, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 25.498030264546536, "step_time": 0.5910722465515137} +{"epoch": 0, "iter": 20505, "iter_tflops": 20.36191855365367, "iter_time": 1.0132195281982423, "loss": 0.17736415565013885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.980984861615937, "step_time": 0.9385882225036621} +{"epoch": 0, "iter": 20506, "iter_tflops": 31.23737410756161, "iter_time": 0.6604618377685547, "loss": 0.17867547273635864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.69946127566792, "step_time": 0.5196819515228271} +{"epoch": 0, "iter": 20507, "iter_tflops": 46.282733674408775, "iter_time": 0.44576220703124997, "loss": 0.11484094709157944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24354708142316, "step_time": 0.4106217555999756} +{"epoch": 0, "iter": 20508, "iter_tflops": 48.87722001099593, "iter_time": 0.4221003875732422, "loss": 0.13698157668113708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96419265253767, "step_time": 0.38952908515930174} +{"epoch": 0, "iter": 20509, "iter_tflops": 26.610211147620877, "iter_time": 0.7753073959350586, "loss": 0.5491082668304443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.159014584811683, "step_time": 0.7326639022827148} +{"epoch": 0, "iter": 20510, "iter_tflops": 9.736864818607256, "iter_time": 2.1188641204833987, "loss": 0.49442553520202637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.88467691622509, "step_time": 1.7359406280517578} +{"epoch": 0, "iter": 20511, "iter_tflops": 10.21787048648506, "iter_time": 2.019118713378906, "loss": 0.3443228304386139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.252214305138592, "step_time": 1.683866523742676} +{"epoch": 0, "iter": 20512, "iter_tflops": 30.422458761526862, "iter_time": 0.6781533889770507, "loss": 0.5529099106788635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.814639661720854, "step_time": 0.5760519638061523} +{"epoch": 0, "iter": 20513, "iter_tflops": 12.090698404366444, "iter_time": 1.2701793975830078, "loss": 0.3544847071170807, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.959750217165444, "step_time": 1.1850040130615236} +{"epoch": 0, "iter": 20514, "iter_tflops": 18.139454264227286, "iter_time": 0.8466272354125977, "loss": 0.41828009486198425, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.717564380451694, "step_time": 0.7788667869567871} +{"epoch": 0, "iter": 20515, "iter_tflops": 22.443705772157188, "iter_time": 0.6842611541748047, "loss": 0.3008895814418793, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.21134363940926, "step_time": 0.6343041610717773} +{"epoch": 0, "iter": 20516, "iter_tflops": 23.542545131993425, "iter_time": 0.6523235244750977, "loss": 0.3575163185596466, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.161956648609884, "step_time": 0.6103402938842774} +{"epoch": 0, "iter": 20517, "iter_tflops": 15.306923612425283, "iter_time": 1.3478275604248047, "loss": 0.12122668325901031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.386889205833207, "step_time": 1.2590000000000001} +{"epoch": 0, "iter": 20518, "iter_tflops": 15.421486222170289, "iter_time": 1.3378148651123045, "loss": 0.09642305225133896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.728242832239257, "step_time": 0.9495058422088622} +{"epoch": 0, "iter": 20519, "iter_tflops": 53.27362982338101, "iter_time": 0.38726652526855465, "loss": 0.06673267483711243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88100525404316, "step_time": 0.3564397926330567} +{"epoch": 0, "iter": 20520, "iter_tflops": 51.18990013939334, "iter_time": 0.40303054809570316, "loss": 0.09389655292034149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.585789968253934, "step_time": 0.3711576919555664} +{"epoch": 0, "iter": 20521, "iter_tflops": 24.427621326172133, "iter_time": 0.8445805358886719, "loss": 0.2524009346961975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.593753133658016, "step_time": 0.8060987930297852} +{"epoch": 0, "iter": 20522, "iter_tflops": 15.085478928382406, "iter_time": 1.367612762451172, "loss": 0.2630118131637573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10807307921407, "step_time": 1.2059273662567138} +{"epoch": 0, "iter": 20523, "iter_tflops": 39.00375948158847, "iter_time": 0.5289514083862304, "loss": 0.18910713493824005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75782336666698, "step_time": 0.4825103778839111} +{"epoch": 0, "iter": 20524, "iter_tflops": 40.803367054415666, "iter_time": 0.505622329711914, "loss": 0.19216875731945038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67886381594868, "step_time": 0.4617640590667724} +{"epoch": 0, "iter": 20525, "iter_tflops": 18.776205737044027, "iter_time": 1.098789276123047, "loss": 0.33224374055862427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.152692315560834, "step_time": 1.0237388229370117} +{"epoch": 0, "iter": 20526, "iter_tflops": 16.3744229069868, "iter_time": 1.2599585113525393, "loss": 0.3432762324810028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.379826622691084, "step_time": 1.122485752105713} +{"epoch": 0, "iter": 20527, "iter_tflops": 46.83942486504395, "iter_time": 0.4404642791748047, "loss": 0.24433322250843048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.668603413368146, "step_time": 0.4071770706176758} +{"epoch": 0, "iter": 20528, "iter_tflops": 49.36254791674563, "iter_time": 0.4179503364562988, "loss": 0.4560433030128479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59877643714451, "step_time": 0.3849172477722168} +{"epoch": 0, "iter": 20529, "iter_tflops": 20.92597749581571, "iter_time": 0.9859082336425782, "loss": 0.49160853028297424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.869845070392266, "step_time": 0.943358009338379} +{"epoch": 0, "iter": 20530, "iter_tflops": 14.90326435835113, "iter_time": 1.3843338623046875, "loss": 0.637147068977356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.863025387894545, "step_time": 0.988883113861084} +{"epoch": 0, "iter": 20531, "iter_tflops": 38.760359471513375, "iter_time": 0.5322730178833008, "loss": 0.40423300862312317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.788561184994634, "step_time": 0.4937019348144531} +{"epoch": 0, "iter": 20532, "iter_tflops": 32.46588423068456, "iter_time": 0.6354699401855469, "loss": 0.5629103779792786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.28061996834266, "step_time": 0.5847712860107421} +{"epoch": 0, "iter": 20533, "iter_tflops": 24.82154675131635, "iter_time": 0.8311767883300781, "loss": 0.19488689303398132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.542838457295076, "step_time": 0.7772753295898437} +{"epoch": 0, "iter": 20534, "iter_tflops": 7.617489181100346, "iter_time": 2.7083850097656255, "loss": 0.103547602891922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.643308891768047, "step_time": 2.386943908691406} +{"epoch": 0, "iter": 20535, "iter_tflops": 15.372416017026518, "iter_time": 1.3420852966308594, "loss": 0.09987262636423111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.960173374490505, "step_time": 1.2164435501098632} +{"epoch": 0, "iter": 20536, "iter_tflops": 35.32087574750529, "iter_time": 0.5841048126220704, "loss": 0.10782783478498459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.362814250358404, "step_time": 0.4650537586212158} +{"epoch": 0, "iter": 20537, "iter_tflops": 15.678338263069902, "iter_time": 1.0945983581542968, "loss": 0.25500059127807617, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 16.390354243563237, "step_time": 1.0470477371215818} +{"epoch": 0, "iter": 20538, "iter_tflops": 15.736883985199631, "iter_time": 1.0905261383056641, "loss": 0.34068530797958374, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 18.757522488073494, "step_time": 0.9149120483398437} +{"epoch": 0, "iter": 20539, "iter_tflops": 31.23973285555338, "iter_time": 0.5493479537963868, "loss": 0.34789857268333435, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 33.22751353374985, "step_time": 0.5164841270446777} +{"epoch": 0, "iter": 20540, "iter_tflops": 31.99658596146147, "iter_time": 0.5363535766601563, "loss": 0.44836288690567017, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 33.973255583485134, "step_time": 0.5051468582153321} +{"epoch": 0, "iter": 20541, "iter_tflops": 40.23813459933957, "iter_time": 0.5127248992919923, "loss": 0.5488377213478088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.779915262205066, "step_time": 0.4712456245422363} +{"epoch": 0, "iter": 20542, "iter_tflops": 32.87324179428395, "iter_time": 0.6275953445434571, "loss": 0.5645978450775146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.457399913976175, "step_time": 0.5658959102630615} +{"epoch": 0, "iter": 20543, "iter_tflops": 34.861828514229586, "iter_time": 0.5917960815429688, "loss": 0.3394874930381775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.02783830988322, "step_time": 0.5425260658264159} +{"epoch": 0, "iter": 20544, "iter_tflops": 39.92689744045802, "iter_time": 0.5167216796874999, "loss": 0.5676581263542175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.35163967291593, "step_time": 0.4759011116027832} +{"epoch": 0, "iter": 20545, "iter_tflops": 17.87729407333721, "iter_time": 1.1540389404296876, "loss": 0.32042643427848816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.00724523341935, "step_time": 1.0854331207275392} +{"epoch": 0, "iter": 20546, "iter_tflops": 27.203694517355714, "iter_time": 0.7583930740356445, "loss": 0.2348760962486267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.73139623315632, "step_time": 0.6116288032531737} +{"epoch": 0, "iter": 20547, "iter_tflops": 45.679801080897285, "iter_time": 0.45164587020874025, "loss": 0.25153371691703796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27233102954996, "step_time": 0.4187155971527099} +{"epoch": 0, "iter": 20548, "iter_tflops": 47.713268282325835, "iter_time": 0.43239740753173833, "loss": 0.4771927297115326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81060742904888, "step_time": 0.3982021160125732} +{"epoch": 0, "iter": 20549, "iter_tflops": 44.733200743926965, "iter_time": 0.4612031593322754, "loss": 0.0037726007867604494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30837665488026, "step_time": 0.41840950584411624} +{"epoch": 0, "iter": 20550, "iter_tflops": 50.382769920546075, "iter_time": 0.4094870834350586, "loss": 0.003471352392807603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.920238392942125, "step_time": 0.3561983528137207} +{"epoch": 0, "iter": 20551, "iter_tflops": 56.17102726275714, "iter_time": 0.36729065704345704, "loss": 0.002801354043185711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.76050865385404, "step_time": 0.33404992866516114} +{"epoch": 0, "iter": 20552, "iter_tflops": 42.19522224044552, "iter_time": 0.48894382858276364, "loss": 0.011215650476515293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.69338826037289, "step_time": 0.4418418598175049} +{"epoch": 0, "iter": 20553, "iter_tflops": 14.171415670901997, "iter_time": 0.5837292404174805, "loss": 0.03019433096051216, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 15.784424625874337, "step_time": 0.5240780010223389} +{"epoch": 0, "iter": 20554, "iter_tflops": 19.598856543089845, "iter_time": 0.4220792007446289, "loss": 0.03533949330449104, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 21.70745489428267, "step_time": 0.3810796680450439} +{"epoch": 0, "iter": 20555, "iter_tflops": 21.67231988762213, "iter_time": 0.3816974716186523, "loss": 0.027981923893094063, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 23.761721001354463, "step_time": 0.3481342830657959} +{"epoch": 0, "iter": 20556, "iter_tflops": 21.862440880538085, "iter_time": 0.37837813949584953, "loss": 0.02291928417980671, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 24.08159029309432, "step_time": 0.3435101089477539} +{"epoch": 0, "iter": 20557, "iter_tflops": 33.720725089734955, "iter_time": 0.6118223571777344, "loss": 0.01981603540480137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.93536803413664, "step_time": 0.5741166610717774} +{"epoch": 0, "iter": 20558, "iter_tflops": 11.679178023122274, "iter_time": 1.7664850616455077, "loss": 0.06280679255723953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.686129682296208, "step_time": 1.4048012619018555} +{"epoch": 0, "iter": 20559, "iter_tflops": 15.84267423224951, "iter_time": 1.3022481689453125, "loss": 0.038143306970596313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23681496263357, "step_time": 1.1312881965637207} +{"epoch": 0, "iter": 20560, "iter_tflops": 15.371550574433625, "iter_time": 1.3421608581542968, "loss": 0.024791881442070007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.268411058341037, "step_time": 1.129331579208374} +{"epoch": 0, "iter": 20561, "iter_tflops": 16.532167370792507, "iter_time": 0.9363608627319335, "loss": 0.36389175057411194, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 17.37507266018978, "step_time": 0.8909358139038086} +{"epoch": 0, "iter": 20562, "iter_tflops": 10.137577906395757, "iter_time": 1.5269993133544923, "loss": 0.2889375686645508, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 11.78728098303377, "step_time": 1.3132862892150878} +{"epoch": 0, "iter": 20563, "iter_tflops": 23.445987598647623, "iter_time": 0.6602440795898439, "loss": 0.2377173751592636, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.28081156222265, "step_time": 0.6123250617980956} +{"epoch": 0, "iter": 20564, "iter_tflops": 23.321712943693406, "iter_time": 0.6637623291015625, "loss": 0.38932034373283386, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.100769355435602, "step_time": 0.6167171325683595} +{"epoch": 0, "iter": 20565, "iter_tflops": 17.742152114342794, "iter_time": 1.1628292541503906, "loss": 0.4665614366531372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.039120763615415, "step_time": 1.0836158752441407} +{"epoch": 0, "iter": 20566, "iter_tflops": 13.718885089029405, "iter_time": 1.503846221923828, "loss": 0.5706664323806763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.11430358806817, "step_time": 1.2802969360351564} +{"epoch": 0, "iter": 20567, "iter_tflops": 41.44577886887122, "iter_time": 0.49778515624999997, "loss": 0.43443408608436584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3728361789097, "step_time": 0.4649487228393554} +{"epoch": 0, "iter": 20568, "iter_tflops": 42.318589410784064, "iter_time": 0.4875184593200684, "loss": 0.45820537209510803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42022052980309, "step_time": 0.4542270660400391} +{"epoch": 0, "iter": 20569, "iter_tflops": 42.607866060730274, "iter_time": 0.4842085609436035, "loss": 0.11633019149303436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19774234275556, "step_time": 0.4465822887420655} +{"epoch": 0, "iter": 20570, "iter_tflops": 42.40298226887231, "iter_time": 0.48654817199707034, "loss": 0.10876196622848511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08945214513753, "step_time": 0.4476315631866456} +{"epoch": 0, "iter": 20571, "iter_tflops": 47.74447962902589, "iter_time": 0.4321147422790528, "loss": 0.12993508577346802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.02749336904298, "step_time": 0.3965421390533447} +{"epoch": 0, "iter": 20572, "iter_tflops": 53.69422622005042, "iter_time": 0.3842329978942871, "loss": 0.08041480928659439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.12982485170406, "step_time": 0.354914083480835} +{"epoch": 0, "iter": 20573, "iter_tflops": 19.755060743258625, "iter_time": 1.0443447265625, "loss": 0.27296072244644165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.654605868067566, "step_time": 0.9988616409301758} +{"epoch": 0, "iter": 20574, "iter_tflops": 15.649446853231362, "iter_time": 1.3183273315429687, "loss": 0.31088605523109436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.36970786967567, "step_time": 1.123104061126709} +{"epoch": 0, "iter": 20575, "iter_tflops": 50.05382207383575, "iter_time": 0.41217818450927735, "loss": 0.21829621493816376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.32273649595168, "step_time": 0.3797874488830567} +{"epoch": 0, "iter": 20576, "iter_tflops": 48.72607114934201, "iter_time": 0.42340974807739257, "loss": 0.31953051686286926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80537427499394, "step_time": 0.39070063972473146} +{"epoch": 0, "iter": 20577, "iter_tflops": 31.777073415639457, "iter_time": 0.6492446060180663, "loss": 0.343461275100708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.968696875982744, "step_time": 0.6073560485839843} +{"epoch": 0, "iter": 20578, "iter_tflops": 12.258903365460819, "iter_time": 1.6829477233886716, "loss": 0.25053098797798157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.946013120003283, "step_time": 1.3803743743896484} +{"epoch": 0, "iter": 20579, "iter_tflops": 14.640433476505171, "iter_time": 1.4091859741210937, "loss": 0.1987127810716629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.21572407595628, "step_time": 1.1983866271972656} +{"epoch": 0, "iter": 20580, "iter_tflops": 23.884155505924216, "iter_time": 0.863798324584961, "loss": 0.3561917841434479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.91273208651559, "step_time": 0.6268423252105713} +{"epoch": 0, "iter": 20581, "iter_tflops": 11.364903481643362, "iter_time": 1.340502365112305, "loss": 0.41751009225845337, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 12.071468946971779, "step_time": 1.2620402755737306} +{"epoch": 0, "iter": 20582, "iter_tflops": 12.066743810320272, "iter_time": 1.2625344696044922, "loss": 0.3563508689403534, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 15.312297393110455, "step_time": 0.9949310417175293} +{"epoch": 0, "iter": 20583, "iter_tflops": 26.362437664608297, "iter_time": 0.5778934478759765, "loss": 0.24843500554561615, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.04894246507366, "step_time": 0.5431463241577149} +{"epoch": 0, "iter": 20584, "iter_tflops": 27.503075873861988, "iter_time": 0.5539264068603516, "loss": 0.43834513425827026, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.223905722567896, "step_time": 0.5213088264465332} +{"epoch": 0, "iter": 20585, "iter_tflops": 25.558912577850943, "iter_time": 0.807197624206543, "loss": 0.5239309668540955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.011483397713704, "step_time": 0.7637897262573242} +{"epoch": 0, "iter": 20586, "iter_tflops": 17.342999610546777, "iter_time": 1.1895919952392577, "loss": 0.7111625671386719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.658284958111228, "step_time": 0.9105319995880128} +{"epoch": 0, "iter": 20587, "iter_tflops": 43.3339156290195, "iter_time": 0.47609576034545903, "loss": 0.46300825476646423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65893391803109, "step_time": 0.4421681289672852} +{"epoch": 0, "iter": 20588, "iter_tflops": 43.026614011932196, "iter_time": 0.47949609756469724, "loss": 0.4518253803253174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30290339600729, "step_time": 0.4455680313110351} +{"epoch": 0, "iter": 20589, "iter_tflops": 42.10096307636968, "iter_time": 0.49003851699829104, "loss": 0.12618030607700348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82867058810009, "step_time": 0.45017874717712403} +{"epoch": 0, "iter": 20590, "iter_tflops": 45.3721882251013, "iter_time": 0.45470792388916015, "loss": 0.14868295192718506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64145699868328, "step_time": 0.4156020946502686} +{"epoch": 0, "iter": 20591, "iter_tflops": 45.9872284356359, "iter_time": 0.44862659072875977, "loss": 0.1995142698287964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64948471599087, "step_time": 0.4155348968505859} +{"epoch": 0, "iter": 20592, "iter_tflops": 53.6868328750055, "iter_time": 0.3842859115600586, "loss": 0.14146758615970612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.24116831995371, "step_time": 0.3542355709075928} +{"epoch": 0, "iter": 20593, "iter_tflops": 37.68205276768601, "iter_time": 0.5475045013427735, "loss": 0.2034538835287094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89457514121102, "step_time": 0.50449462890625} +{"epoch": 0, "iter": 20594, "iter_tflops": 37.82083604599756, "iter_time": 0.5454954376220703, "loss": 0.22460688650608063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.44647969408308, "step_time": 0.4977767391204834} +{"epoch": 0, "iter": 20595, "iter_tflops": 42.14898892296514, "iter_time": 0.4894801521301269, "loss": 0.230157271027565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.93101775003706, "step_time": 0.44917562294006347} +{"epoch": 0, "iter": 20596, "iter_tflops": 39.52436227824459, "iter_time": 0.5219842224121094, "loss": 0.280119925737381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.124182296297406, "step_time": 0.47841123962402343} +{"epoch": 0, "iter": 20597, "iter_tflops": 28.491734695242577, "iter_time": 0.7241080169677734, "loss": 0.049520306289196014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.042815512750384, "step_time": 0.6646012344360352} +{"epoch": 0, "iter": 20598, "iter_tflops": 10.773364955984842, "iter_time": 1.915009246826172, "loss": 0.03848671168088913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.13353960147892, "step_time": 1.5708707733154295} +{"epoch": 0, "iter": 20599, "iter_tflops": 12.147283351053352, "iter_time": 1.698412139892578, "loss": 0.08190181106328964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.08866270494002, "step_time": 1.46437557220459} +{"epoch": 0, "iter": 20600, "iter_tflops": 26.909233283526266, "iter_time": 0.7666919860839844, "loss": 0.051442697644233704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.683922374838815, "step_time": 0.5948316135406495} +{"epoch": 0, "iter": 20601, "iter_tflops": 15.372257062243985, "iter_time": 1.2209159088134767, "loss": 0.2718796730041504, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 16.804330877876914, "step_time": 1.1168688201904298} +{"epoch": 0, "iter": 20602, "iter_tflops": 12.309799366299446, "iter_time": 1.5246579284667967, "loss": 0.37380239367485046, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 14.978018584634631, "step_time": 1.2530518035888674} +{"epoch": 0, "iter": 20603, "iter_tflops": 32.709064031574215, "iter_time": 0.5737930374145508, "loss": 0.461263507604599, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 35.022714490928564, "step_time": 0.535887451171875} +{"epoch": 0, "iter": 20604, "iter_tflops": 33.11883676214786, "iter_time": 0.5666936111450196, "loss": 0.31569719314575195, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 35.22595527540357, "step_time": 0.5327955780029298} +{"epoch": 0, "iter": 20605, "iter_tflops": 41.249758780663946, "iter_time": 0.5001506462097167, "loss": 0.5930072069168091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.051169370121244, "step_time": 0.45794801330566404} +{"epoch": 0, "iter": 20606, "iter_tflops": 34.94565260060385, "iter_time": 0.5903765411376953, "loss": 0.5377667546272278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.974437773787606, "step_time": 0.5432889785766601} +{"epoch": 0, "iter": 20607, "iter_tflops": 37.72576286477089, "iter_time": 0.5468701477050781, "loss": 0.5179134011268616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87490623383994, "step_time": 0.5047373905181886} +{"epoch": 0, "iter": 20608, "iter_tflops": 35.685224963686025, "iter_time": 0.5781410522460937, "loss": 0.47805163264274597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.70705896614396, "step_time": 0.5330059700012207} +{"epoch": 0, "iter": 20609, "iter_tflops": 19.338087863121345, "iter_time": 1.0668631591796873, "loss": 0.3232403099536896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.897164304614353, "step_time": 0.9872676124572752} +{"epoch": 0, "iter": 20610, "iter_tflops": 28.17458147324905, "iter_time": 0.7322590942382813, "loss": 0.38664478063583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.20420874800923, "step_time": 0.6031741199493408} +{"epoch": 0, "iter": 20611, "iter_tflops": 40.630163426918244, "iter_time": 0.5077777633666992, "loss": 0.38904091715812683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.585767311543016, "step_time": 0.46272823715209965} +{"epoch": 0, "iter": 20612, "iter_tflops": 39.70086636714428, "iter_time": 0.5196635589599609, "loss": 0.3101798892021179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.469067138701604, "step_time": 0.47461551094055177} +{"epoch": 0, "iter": 20613, "iter_tflops": 18.524808431986962, "iter_time": 1.1137007751464842, "loss": 0.5648087859153748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.846024936590542, "step_time": 1.0395579757690427} +{"epoch": 0, "iter": 20614, "iter_tflops": 23.088793910920167, "iter_time": 0.8935544052124024, "loss": 0.626246988773346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73750932940384, "step_time": 0.5615811710357665} +{"epoch": 0, "iter": 20615, "iter_tflops": 44.79079967087643, "iter_time": 0.4606100730895996, "loss": 0.4722217917442322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41983645112586, "step_time": 0.4260876331329346} +{"epoch": 0, "iter": 20616, "iter_tflops": 49.31970502713749, "iter_time": 0.4183134002685546, "loss": 0.6064819097518921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.49004856536415, "step_time": 0.38569965934753414} +{"epoch": 0, "iter": 20617, "iter_tflops": 31.810117173543745, "iter_time": 0.6485701828002931, "loss": 0.3126366436481476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.870770272520204, "step_time": 0.6091120262145996} +{"epoch": 0, "iter": 20618, "iter_tflops": 18.507540220408004, "iter_time": 1.1147398986816408, "loss": 0.4126301109790802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.075976103615194, "step_time": 0.8569161815643309} +{"epoch": 0, "iter": 20619, "iter_tflops": 36.738561087201894, "iter_time": 0.5615650939941406, "loss": 0.39129048585891724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27721712769464, "step_time": 0.5122273826599121} +{"epoch": 0, "iter": 20620, "iter_tflops": 38.6593452569389, "iter_time": 0.5336638107299805, "loss": 0.4422205090522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.36061722911817, "step_time": 0.4870347709655762} +{"epoch": 0, "iter": 20621, "iter_tflops": 19.44715003107547, "iter_time": 1.0608800506591796, "loss": 0.3872360587120056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.82245975036118, "step_time": 0.9908096237182616} +{"epoch": 0, "iter": 20622, "iter_tflops": 15.252166109498525, "iter_time": 1.352666458129883, "loss": 0.44204914569854736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.861158112821727, "step_time": 1.0387658863067626} +{"epoch": 0, "iter": 20623, "iter_tflops": 37.344245396312296, "iter_time": 0.5524570999145508, "loss": 0.5234965085983276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24309849392819, "step_time": 0.5002314147949218} +{"epoch": 0, "iter": 20624, "iter_tflops": 38.33621473986315, "iter_time": 0.5381619873046876, "loss": 0.4928080141544342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.92069944305473, "step_time": 0.4921457366943359} +{"epoch": 0, "iter": 20625, "iter_tflops": 19.61476916295893, "iter_time": 1.0518142395019532, "loss": 0.5621997714042664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.008867805647146, "step_time": 0.9820183410644532} +{"epoch": 0, "iter": 20626, "iter_tflops": 19.693959328341943, "iter_time": 1.0475848541259765, "loss": 0.5897906422615051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.265857894723815, "step_time": 0.9265797710418702} +{"epoch": 0, "iter": 20627, "iter_tflops": 36.908216978885335, "iter_time": 0.5589837493896485, "loss": 0.43160349130630493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.08615539926522, "step_time": 0.5146688003540039} +{"epoch": 0, "iter": 20628, "iter_tflops": 34.98953000486144, "iter_time": 0.5896361999511719, "loss": 0.5774322152137756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.15647802710182, "step_time": 0.5406970081329345} +{"epoch": 0, "iter": 20629, "iter_tflops": 15.604162256969888, "iter_time": 1.3221532287597657, "loss": 0.3116242587566376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.873349987019672, "step_time": 1.2227028732299805} +{"epoch": 0, "iter": 20630, "iter_tflops": 19.028101664626135, "iter_time": 1.084243392944336, "loss": 0.27211734652519226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.372138726701056, "step_time": 0.8827216777801514} +{"epoch": 0, "iter": 20631, "iter_tflops": 47.31953270436688, "iter_time": 0.43599529266357423, "loss": 0.29025986790657043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.30541838349672, "step_time": 0.40212309265136714} +{"epoch": 0, "iter": 20632, "iter_tflops": 50.98683409330989, "iter_time": 0.40463570404052734, "loss": 0.30385899543762207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31029460456413, "step_time": 0.37300639343261716} +{"epoch": 0, "iter": 20633, "iter_tflops": 35.98506239876928, "iter_time": 0.5733238220214844, "loss": 0.07900205254554749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61236900871538, "step_time": 0.5343130722045898} +{"epoch": 0, "iter": 20634, "iter_tflops": 16.356183681270533, "iter_time": 1.261363525390625, "loss": 0.025367870926856995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.481161587167318, "step_time": 1.059027893066406} +{"epoch": 0, "iter": 20635, "iter_tflops": 39.127821551395606, "iter_time": 0.5272742691040039, "loss": 0.05607248470187187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.105270475503104, "step_time": 0.47862113571166987} +{"epoch": 0, "iter": 20636, "iter_tflops": 44.26413940293071, "iter_time": 0.4660904693603516, "loss": 0.07395368069410324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57375379290719, "step_time": 0.4247374744415284} +{"epoch": 0, "iter": 20637, "iter_tflops": 2.1410073724495047, "iter_time": 0.7503658981323241, "loss": 0.11448459327220917, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.318965406488619, "step_time": 0.6927826156616211} +{"epoch": 0, "iter": 20638, "iter_tflops": 0.6225243617166046, "iter_time": 2.5806844177246098, "loss": 0.2407848984003067, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 0.7604679873020491, "step_time": 2.1125661392211916} +{"epoch": 0, "iter": 20639, "iter_tflops": 1.1966438081039061, "iter_time": 1.3425372772216795, "loss": 0.2570442855358124, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.3514598099736634, "step_time": 1.1887433929443358} +{"epoch": 0, "iter": 20640, "iter_tflops": 2.9904735522093167, "iter_time": 0.5372189025878906, "loss": 0.11529669910669327, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.2759368399720104, "step_time": 0.4904059505462646} +{"epoch": 0, "iter": 20641, "iter_tflops": 16.751243470519857, "iter_time": 0.9558899536132813, "loss": 0.3835708498954773, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.755481534563177, "step_time": 0.9018254623413087} +{"epoch": 0, "iter": 20642, "iter_tflops": 7.328124879508343, "iter_time": 2.185053558349609, "loss": 0.3825375437736511, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 8.512069099585272, "step_time": 1.8811343231201172} +{"epoch": 0, "iter": 20643, "iter_tflops": 7.211130364719512, "iter_time": 2.2205042114257814, "loss": 0.38831135630607605, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 9.110730590146419, "step_time": 1.7575259399414063} +{"epoch": 0, "iter": 20644, "iter_tflops": 24.113585669285193, "iter_time": 0.6640383377075196, "loss": 0.2646750211715698, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.92374597147485, "step_time": 0.617670970916748} +{"epoch": 0, "iter": 20645, "iter_tflops": 13.140178791686068, "iter_time": 1.1905290985107422, "loss": 0.35085350275039673, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.880652037229533, "step_time": 1.1270194778442382} +{"epoch": 0, "iter": 20646, "iter_tflops": 13.633444576060176, "iter_time": 1.1474550781250001, "loss": 0.3590083718299866, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 16.685205759628584, "step_time": 0.9375829963684081} +{"epoch": 0, "iter": 20647, "iter_tflops": 25.385530533861186, "iter_time": 0.6162473220825195, "loss": 0.4748750627040863, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 27.277479905413543, "step_time": 0.573504783630371} +{"epoch": 0, "iter": 20648, "iter_tflops": 22.9582268119553, "iter_time": 0.6814012832641602, "loss": 0.28076377511024475, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 24.63651540019535, "step_time": 0.6349828681945802} +{"epoch": 0, "iter": 20649, "iter_tflops": 17.713370830384196, "iter_time": 1.1647186584472657, "loss": 0.3361489772796631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.756778305085728, "step_time": 1.0999273529052735} +{"epoch": 0, "iter": 20650, "iter_tflops": 12.525138611250384, "iter_time": 1.6471748657226564, "loss": 0.34512054920196533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.446757744294818, "step_time": 1.2544170608520506} +{"epoch": 0, "iter": 20651, "iter_tflops": 18.385054799556364, "iter_time": 1.1221665496826172, "loss": 0.3975914418697357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.7509578074186, "step_time": 0.9485142536163331} +{"epoch": 0, "iter": 20652, "iter_tflops": 42.08811522560354, "iter_time": 0.4901881065368653, "loss": 0.48338791728019714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97090825230692, "step_time": 0.4487858581542969} +{"epoch": 0, "iter": 20653, "iter_tflops": 15.821393282626138, "iter_time": 1.0509501037597657, "loss": 0.33480188250541687, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 16.933257436184352, "step_time": 0.9819430770874024} +{"epoch": 0, "iter": 20654, "iter_tflops": 17.314226606940455, "iter_time": 0.9603371429443359, "loss": 0.35891562700271606, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 20.57267807885067, "step_time": 0.8082319107055664} +{"epoch": 0, "iter": 20655, "iter_tflops": 29.04852486842439, "iter_time": 0.5724041061401368, "loss": 0.21776722371578217, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 30.92841386047779, "step_time": 0.537612274169922} +{"epoch": 0, "iter": 20656, "iter_tflops": 29.83378863997785, "iter_time": 0.5573376922607421, "loss": 0.30971407890319824, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.762101042885956, "step_time": 0.5235011024475098} +{"epoch": 0, "iter": 20657, "iter_tflops": 31.91549085790031, "iter_time": 0.6464288330078126, "loss": 0.03655838593840599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.095762929146346, "step_time": 0.6050925903320313} +{"epoch": 0, "iter": 20658, "iter_tflops": 9.667205974640474, "iter_time": 2.1341319885253904, "loss": 0.03435151278972626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.27856540863361, "step_time": 1.8292302932739255} +{"epoch": 0, "iter": 20659, "iter_tflops": 11.458258841274096, "iter_time": 1.800543502807617, "loss": 0.04027172923088074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.603125954554137, "step_time": 1.516643569946289} +{"epoch": 0, "iter": 20660, "iter_tflops": 39.18619467561286, "iter_time": 0.5264888229370117, "loss": 0.01683807000517845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.43682411397289, "step_time": 0.36556085205078126} +{"epoch": 0, "iter": 20661, "iter_tflops": 16.870787602191665, "iter_time": 1.0294170150756834, "loss": 0.4302331805229187, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 17.679801329604036, "step_time": 0.9823117065429688} +{"epoch": 0, "iter": 20662, "iter_tflops": 12.748203163335214, "iter_time": 1.3623155822753907, "loss": 0.19323718547821045, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 16.53295738834179, "step_time": 1.0504518585205078} +{"epoch": 0, "iter": 20663, "iter_tflops": 28.412152469108243, "iter_time": 0.6112551956176758, "loss": 0.20574569702148438, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 30.24213848039253, "step_time": 0.5742674522399903} +{"epoch": 0, "iter": 20664, "iter_tflops": 31.938373816391096, "iter_time": 0.5437683181762696, "loss": 0.1679023951292038, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.99231332485926, "step_time": 0.5109118537902833} +{"epoch": 0, "iter": 20665, "iter_tflops": 27.63821214197848, "iter_time": 0.7464699020385742, "loss": 0.20177362859249115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.133797642091416, "step_time": 0.7081498184204101} +{"epoch": 0, "iter": 20666, "iter_tflops": 10.880782363145176, "iter_time": 1.8961038665771484, "loss": 0.13947764039039612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.664743297520447, "step_time": 1.5098046894073487} +{"epoch": 0, "iter": 20667, "iter_tflops": 41.14285071777464, "iter_time": 0.5014502677917481, "loss": 0.11490378528833389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09839430305277, "step_time": 0.45746847152709963} +{"epoch": 0, "iter": 20668, "iter_tflops": 43.222148487380245, "iter_time": 0.4773268852233886, "loss": 0.171518936753273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.278178716476546, "step_time": 0.4363766555786132} +{"epoch": 0, "iter": 20669, "iter_tflops": 19.668144905002727, "iter_time": 1.0489598083496094, "loss": 0.6183794140815735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.163808159346324, "step_time": 0.9748289794921875} +{"epoch": 0, "iter": 20670, "iter_tflops": 14.18007326095905, "iter_time": 1.4549356079101563, "loss": 0.6660065650939941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.834979009909407, "step_time": 1.2254897079467775} +{"epoch": 0, "iter": 20671, "iter_tflops": 33.72440861454922, "iter_time": 0.6117555313110351, "loss": 0.4443623125553131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48048181523126, "step_time": 0.5655378570556641} +{"epoch": 0, "iter": 20672, "iter_tflops": 39.91230565661883, "iter_time": 0.5169105911254883, "loss": 0.4112909138202667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33550921331889, "step_time": 0.4760782527923584} +{"epoch": 0, "iter": 20673, "iter_tflops": 18.9136749159041, "iter_time": 1.0908030090332033, "loss": 0.6084709167480469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.244575521255587, "step_time": 1.0190924224853517} +{"epoch": 0, "iter": 20674, "iter_tflops": 22.267589037762406, "iter_time": 0.9265077362060548, "loss": 0.3472171425819397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.30254984451429, "step_time": 0.7556471328735352} +{"epoch": 0, "iter": 20675, "iter_tflops": 43.649046435866545, "iter_time": 0.47265851593017577, "loss": 0.40325745940208435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97594714351465, "step_time": 0.43918419456481933} +{"epoch": 0, "iter": 20676, "iter_tflops": 44.245697361492084, "iter_time": 0.46628474044799806, "loss": 0.6542221903800964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.78016175633376, "step_time": 0.43179203987121584} +{"epoch": 0, "iter": 20677, "iter_tflops": 30.921635926142876, "iter_time": 0.6672057571411133, "loss": 0.2699843943119049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77124797818896, "step_time": 0.6295486068725585} +{"epoch": 0, "iter": 20678, "iter_tflops": 11.982429049623486, "iter_time": 1.7217789001464843, "loss": 0.338382363319397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.18818855001237, "step_time": 1.3583643264770509} +{"epoch": 0, "iter": 20679, "iter_tflops": 47.01089391039416, "iter_time": 0.43885771560668946, "loss": 0.24118392169475555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.03765988927348, "step_time": 0.4042327480316162} +{"epoch": 0, "iter": 20680, "iter_tflops": 44.250005296706014, "iter_time": 0.46623934555053703, "loss": 0.23157870769500732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69479108443174, "step_time": 0.432564920425415} +{"epoch": 0, "iter": 20681, "iter_tflops": 38.32381645966873, "iter_time": 0.5383360900878906, "loss": 0.26218125224113464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.372642113882016, "step_time": 0.49866511917114265} +{"epoch": 0, "iter": 20682, "iter_tflops": 37.48700908154891, "iter_time": 0.5503531494140625, "loss": 0.1176227480173111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.168211105529736, "step_time": 0.5011413650512696} +{"epoch": 0, "iter": 20683, "iter_tflops": 39.58147633787376, "iter_time": 0.5212310256958008, "loss": 0.18674063682556152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39923948277635, "step_time": 0.47537914848327634} +{"epoch": 0, "iter": 20684, "iter_tflops": 41.524680329577215, "iter_time": 0.49683930969238277, "loss": 0.14159934222698212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1780027857871, "step_time": 0.4566623630523682} +{"epoch": 0, "iter": 20685, "iter_tflops": 18.218773941470474, "iter_time": 1.1324084472656248, "loss": 0.35701775550842285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.281141603635774, "step_time": 1.0700141067504882} +{"epoch": 0, "iter": 20686, "iter_tflops": 15.68219639516456, "iter_time": 1.315574234008789, "loss": 0.39796268939971924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.502773854797255, "step_time": 1.0578543167114258} +{"epoch": 0, "iter": 20687, "iter_tflops": 49.2786870697907, "iter_time": 0.41866159057617186, "loss": 0.3696553111076355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5459968503395, "step_time": 0.3852966556549073} +{"epoch": 0, "iter": 20688, "iter_tflops": 49.06606197071156, "iter_time": 0.4204758377075195, "loss": 0.39502882957458496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.957636006486254, "step_time": 0.3895773124694824} +{"epoch": 0, "iter": 20689, "iter_tflops": 42.7796579122527, "iter_time": 0.4822641067504883, "loss": 0.26108551025390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53339497834576, "step_time": 0.44336102104187014} +{"epoch": 0, "iter": 20690, "iter_tflops": 37.73263354979945, "iter_time": 0.5467705688476563, "loss": 0.28249311447143555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77181159904698, "step_time": 0.4938998985290527} +{"epoch": 0, "iter": 20691, "iter_tflops": 38.93443466820399, "iter_time": 0.5298932342529298, "loss": 0.24687661230564117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.58720110186649, "step_time": 0.48444351768493654} +{"epoch": 0, "iter": 20692, "iter_tflops": 41.00317988841688, "iter_time": 0.5031583786010742, "loss": 0.21901024878025055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83594691093079, "step_time": 0.46014626502990724} +{"epoch": 0, "iter": 20693, "iter_tflops": 34.20028809653902, "iter_time": 0.5923270874023437, "loss": 0.04496723413467407, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 37.92265618044571, "step_time": 0.5341861324310303} +{"epoch": 0, "iter": 20694, "iter_tflops": 46.67947146950265, "iter_time": 0.43397571563720705, "loss": 0.04760279878973961, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 50.94391820419018, "step_time": 0.397648193359375} +{"epoch": 0, "iter": 20695, "iter_tflops": 50.20961791320537, "iter_time": 0.4034636764526367, "loss": 0.1313980370759964, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 54.64957245719094, "step_time": 0.3706846389770508} +{"epoch": 0, "iter": 20696, "iter_tflops": 50.234161344427385, "iter_time": 0.4032665519714355, "loss": 0.046097282320261, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 54.676602482174495, "step_time": 0.370501386642456} +{"epoch": 0, "iter": 20697, "iter_tflops": 19.601071499292026, "iter_time": 1.052549270629883, "loss": 0.5073500275611877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.363265578831545, "step_time": 1.0131525039672853} +{"epoch": 0, "iter": 20698, "iter_tflops": 14.761211396820565, "iter_time": 1.3976558532714844, "loss": 0.5065019130706787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.174039908586153, "step_time": 1.1351957855224608} +{"epoch": 0, "iter": 20699, "iter_tflops": 35.6057420127945, "iter_time": 0.5794316406250001, "loss": 0.4793049097061157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.67495050406844, "step_time": 0.5334484786987305} +{"epoch": 0, "iter": 20700, "iter_tflops": 38.948220221365794, "iter_time": 0.5297056808471681, "loss": 0.5091179609298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.271436266466864, "step_time": 0.48806227874755864} +{"epoch": 0, "iter": 20701, "iter_tflops": 29.69761734822446, "iter_time": 0.6947053451538087, "loss": 0.484729528427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.17797680446242, "step_time": 0.6411557083129883} +{"epoch": 0, "iter": 20702, "iter_tflops": 8.593374502254166, "iter_time": 2.4008139648437496, "loss": 0.416413813829422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.838770778864252, "step_time": 2.0969177932739256} +{"epoch": 0, "iter": 20703, "iter_tflops": 12.833449298518484, "iter_time": 1.6076031494140626, "loss": 0.497411847114563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.858166986889794, "step_time": 1.3885355796813965} +{"epoch": 0, "iter": 20704, "iter_tflops": 30.963585405771962, "iter_time": 0.6663018264770507, "loss": 0.5291562676429749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.776126920314205, "step_time": 0.5186803016662598} +{"epoch": 0, "iter": 20705, "iter_tflops": 12.835334459073213, "iter_time": 1.2539060821533203, "loss": 0.4173908233642578, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 13.681589924206083, "step_time": 1.1763474884033203} +{"epoch": 0, "iter": 20706, "iter_tflops": 11.267470933919016, "iter_time": 1.4283865509033205, "loss": 0.338699072599411, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 14.19980322159025, "step_time": 1.133417392730713} +{"epoch": 0, "iter": 20707, "iter_tflops": 26.390470246598895, "iter_time": 0.6098528671264648, "loss": 0.23683437705039978, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 28.078427732981094, "step_time": 0.5731910667419433} +{"epoch": 0, "iter": 20708, "iter_tflops": 28.619188979498492, "iter_time": 0.5623605880737305, "loss": 0.42628946900367737, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.36196130333575, "step_time": 0.530081169128418} +{"epoch": 0, "iter": 20709, "iter_tflops": 25.211789872472227, "iter_time": 0.760809783935547, "loss": 0.012349343858659267, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 26.796065358922615, "step_time": 0.7158280944824218} +{"epoch": 0, "iter": 20710, "iter_tflops": 16.682727599609535, "iter_time": 1.1497745971679687, "loss": 0.012113041244447231, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 18.947355381733303, "step_time": 1.0123511180877685} +{"epoch": 0, "iter": 20711, "iter_tflops": 44.76399007193059, "iter_time": 0.4285001487731933, "loss": 0.015871258452534676, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 49.64170964850494, "step_time": 0.38639636993408205} +{"epoch": 0, "iter": 20712, "iter_tflops": 46.4376535852599, "iter_time": 0.4130565376281738, "loss": 0.006123267114162445, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 51.386850465682656, "step_time": 0.37327402305603025} +{"epoch": 0, "iter": 20713, "iter_tflops": 20.61398245151408, "iter_time": 1.0008300704956055, "loss": 0.4679190218448639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.290193979593827, "step_time": 0.9255681457519531} +{"epoch": 0, "iter": 20714, "iter_tflops": 19.284116674850015, "iter_time": 1.0698490295410157, "loss": 0.3969149887561798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.921485045174226, "step_time": 0.8624503650665283} +{"epoch": 0, "iter": 20715, "iter_tflops": 45.73162932515086, "iter_time": 0.4511340141296387, "loss": 0.5666870474815369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.615876592109025, "step_time": 0.4158163661956787} +{"epoch": 0, "iter": 20716, "iter_tflops": 48.16267743270156, "iter_time": 0.42836267852783205, "loss": 0.4523066282272339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10125906963836, "step_time": 0.39598070907592775} +{"epoch": 0, "iter": 20717, "iter_tflops": 20.99510449573138, "iter_time": 0.9826621017456055, "loss": 0.24855268001556396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.142793431001905, "step_time": 0.9317294845581054} +{"epoch": 0, "iter": 20718, "iter_tflops": 13.872271296409133, "iter_time": 1.4872181396484374, "loss": 0.2787434756755829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.22123073771988, "step_time": 1.2718574714660644} +{"epoch": 0, "iter": 20719, "iter_tflops": 45.88734517852414, "iter_time": 0.44960311889648436, "loss": 0.1537870168685913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97008029355225, "step_time": 0.40476870727539066} +{"epoch": 0, "iter": 20720, "iter_tflops": 49.09924894380568, "iter_time": 0.4201916313171387, "loss": 0.15422722697257996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36154663276793, "step_time": 0.3866284770965576} +{"epoch": 0, "iter": 20721, "iter_tflops": 15.898096655191525, "iter_time": 1.0717122192382813, "loss": 0.032728541642427444, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 16.51454710711547, "step_time": 1.0317076416015625} +{"epoch": 0, "iter": 20722, "iter_tflops": 12.062033654627232, "iter_time": 1.4125465850830077, "loss": 0.05038955807685852, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 15.148967399497623, "step_time": 1.1247092952728273} +{"epoch": 0, "iter": 20723, "iter_tflops": 33.81140088157645, "iter_time": 0.5039183235168457, "loss": 0.02959585189819336, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 37.21328884751413, "step_time": 0.45785215377807614} +{"epoch": 0, "iter": 20724, "iter_tflops": 35.86271067432982, "iter_time": 0.4750947189331055, "loss": 0.01533990353345871, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 39.26192047145369, "step_time": 0.43396207427978517} +{"epoch": 0, "iter": 20725, "iter_tflops": 17.853050581561316, "iter_time": 1.1556060638427734, "loss": 0.4138593375682831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91883554629392, "step_time": 1.0905054626464845} +{"epoch": 0, "iter": 20726, "iter_tflops": 41.84535849687102, "iter_time": 0.4930318260192871, "loss": 0.27931487560272217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85142171991894, "step_time": 0.4499553718566894} +{"epoch": 0, "iter": 20727, "iter_tflops": 46.946280497054424, "iter_time": 0.439461727142334, "loss": 0.4357436001300812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.95809904432404, "step_time": 0.40486387634277343} +{"epoch": 0, "iter": 20728, "iter_tflops": 49.02612016160003, "iter_time": 0.4208184013366699, "loss": 0.3528013527393341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72836483768856, "step_time": 0.39127125549316405} +{"epoch": 0, "iter": 20729, "iter_tflops": 23.430135605229918, "iter_time": 0.8805366668701172, "loss": 0.5051480531692505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.605446193823134, "step_time": 0.8384767074584962} +{"epoch": 0, "iter": 20730, "iter_tflops": 16.379509775271384, "iter_time": 1.2595672149658204, "loss": 0.38229528069496155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.92790161629063, "step_time": 0.9858175888061524} +{"epoch": 0, "iter": 20731, "iter_tflops": 39.01604606026413, "iter_time": 0.5287848358154297, "loss": 0.3034742772579193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6202573679303, "step_time": 0.4840677833557129} +{"epoch": 0, "iter": 20732, "iter_tflops": 41.77721086094913, "iter_time": 0.4938360671997071, "loss": 0.5694506168365479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.28871719535119, "step_time": 0.4555459899902344} +{"epoch": 0, "iter": 20733, "iter_tflops": 20.322103486640668, "iter_time": 1.0152046279907228, "loss": 0.313431978225708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.902674330189434, "step_time": 0.9419440383911133} +{"epoch": 0, "iter": 20734, "iter_tflops": 43.957834695235256, "iter_time": 0.4693382568359375, "loss": 0.19831548631191254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92855577175975, "step_time": 0.4304551467895507} +{"epoch": 0, "iter": 20735, "iter_tflops": 48.789714211923574, "iter_time": 0.4228574371337891, "loss": 0.26094403862953186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.064874223692776, "step_time": 0.38879001998901364} +{"epoch": 0, "iter": 20736, "iter_tflops": 52.188001967619165, "iter_time": 0.39532254028320307, "loss": 0.30749762058258057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.57634980312723, "step_time": 0.3646593246459961} +{"epoch": 0, "iter": 20737, "iter_tflops": 48.342390642492035, "iter_time": 0.42677023696899413, "loss": 0.12560337781906128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48175432963378, "step_time": 0.3857594757080079} +{"epoch": 0, "iter": 20738, "iter_tflops": 46.64489840256801, "iter_time": 0.44230117797851554, "loss": 0.1706758737564087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5414077969227, "step_time": 0.40820179748535157} +{"epoch": 0, "iter": 20739, "iter_tflops": 50.06956675751388, "iter_time": 0.41204857254028315, "loss": 0.25076332688331604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.585031002596175, "step_time": 0.37796247673034666} +{"epoch": 0, "iter": 20740, "iter_tflops": 45.894966902558366, "iter_time": 0.44952845382690426, "loss": 0.14875660836696625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.734052252811075, "step_time": 0.41482832336425784} +{"epoch": 0, "iter": 20741, "iter_tflops": 29.660154702210445, "iter_time": 0.6955828018188476, "loss": 0.09721813350915909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.40680378209101, "step_time": 0.6568988571166992} +{"epoch": 0, "iter": 20742, "iter_tflops": 15.23416797531567, "iter_time": 1.3542645416259766, "loss": 0.13697916269302368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.65375342136281, "step_time": 1.168651958465576} +{"epoch": 0, "iter": 20743, "iter_tflops": 37.65299129976757, "iter_time": 0.5479270782470704, "loss": 0.053829967975616455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22218787743028, "step_time": 0.419142147064209} +{"epoch": 0, "iter": 20744, "iter_tflops": 54.10527848275605, "iter_time": 0.38131387710571285, "loss": 0.17493872344493866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.652327633597444, "step_time": 0.3517523403167725} +{"epoch": 0, "iter": 20745, "iter_tflops": 51.74248253136609, "iter_time": 0.3987263946533203, "loss": 0.001792165101505816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.990806536904394, "step_time": 0.36200739669799803} +{"epoch": 0, "iter": 20746, "iter_tflops": 48.004333211925875, "iter_time": 0.42977565002441404, "loss": 0.003491244511678815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.79985121159655, "step_time": 0.3907415084838867} +{"epoch": 0, "iter": 20747, "iter_tflops": 52.81009300547043, "iter_time": 0.39066572952270506, "loss": 0.010037511587142944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.80975010709425, "step_time": 0.3568791332244873} +{"epoch": 0, "iter": 20748, "iter_tflops": 58.90015573127168, "iter_time": 0.35027230834960943, "loss": 0.0012044324539601803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.96662748144247, "step_time": 0.31756448364257817} +{"epoch": 0, "iter": 20749, "iter_tflops": 38.3825695301756, "iter_time": 0.5375120468139648, "loss": 0.5814811587333679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.49910348722094, "step_time": 0.497145523071289} +{"epoch": 0, "iter": 20750, "iter_tflops": 34.14543978123952, "iter_time": 0.6042122650146484, "loss": 0.5354276299476624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.12714960050148, "step_time": 0.555687515258789} +{"epoch": 0, "iter": 20751, "iter_tflops": 35.89480795030766, "iter_time": 0.5747653961181641, "loss": 0.6056567430496216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.10689671782807, "step_time": 0.527556396484375} +{"epoch": 0, "iter": 20752, "iter_tflops": 35.34881366998535, "iter_time": 0.5836431655883789, "loss": 0.6847892999649048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.529889684229865, "step_time": 0.5354568538665772} +{"epoch": 0, "iter": 20753, "iter_tflops": 20.47128064193802, "iter_time": 1.0078066864013673, "loss": 0.4086250066757202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.81142892426623, "step_time": 0.9458845443725585} +{"epoch": 0, "iter": 20754, "iter_tflops": 20.90607558579148, "iter_time": 0.9868467864990234, "loss": 0.42943134903907776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.746696867162395, "step_time": 0.8013103046417236} +{"epoch": 0, "iter": 20755, "iter_tflops": 48.961343314709964, "iter_time": 0.4213751525878906, "loss": 0.4732086658477783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.52351715687244, "step_time": 0.38545847892761226} +{"epoch": 0, "iter": 20756, "iter_tflops": 48.395351798332506, "iter_time": 0.42630320358276363, "loss": 0.41362491250038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16837227159837, "step_time": 0.39547129058837893} +{"epoch": 0, "iter": 20757, "iter_tflops": 24.220439068234874, "iter_time": 0.8518050994873047, "loss": 0.418194055557251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.469137850876905, "step_time": 0.8100428695678711} +{"epoch": 0, "iter": 20758, "iter_tflops": 16.682321394375617, "iter_time": 1.2367039947509766, "loss": 0.6544889807701111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.663496344540803, "step_time": 1.1054248962402342} +{"epoch": 0, "iter": 20759, "iter_tflops": 46.46099801579189, "iter_time": 0.44405187988281253, "loss": 0.538865864276886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32630363018851, "step_time": 0.4099465293884277} +{"epoch": 0, "iter": 20760, "iter_tflops": 43.96598508043312, "iter_time": 0.4692512512207031, "loss": 0.4776786267757416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12823282402587, "step_time": 0.43776505661010745} +{"epoch": 0, "iter": 20761, "iter_tflops": 1.5937466833129206, "iter_time": 0.8601995697021484, "loss": 0.8227344155311584, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.670575935594168, "step_time": 0.8206392669677735} +{"epoch": 0, "iter": 20762, "iter_tflops": 1.0780457977345044, "iter_time": 1.2716901397705078, "loss": 0.5780503749847412, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.4437425385303122, "step_time": 0.9495738849639892} +{"epoch": 0, "iter": 20763, "iter_tflops": 2.5948913270797207, "iter_time": 0.5283227844238281, "loss": 0.6830384135246277, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.84439523972506, "step_time": 0.4819795055389405} +{"epoch": 0, "iter": 20764, "iter_tflops": 2.6583311228254876, "iter_time": 0.5157146148681641, "loss": 0.8440386056900024, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.917916457629011, "step_time": 0.46983531951904295} +{"epoch": 0, "iter": 20765, "iter_tflops": 32.262355062646506, "iter_time": 0.6394788436889649, "loss": 0.20794397592544556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.746473819433525, "step_time": 0.5771504516601562} +{"epoch": 0, "iter": 20766, "iter_tflops": 38.47819759691, "iter_time": 0.5361761932373047, "loss": 0.24558398127555847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.244407526593015, "step_time": 0.4883745498657226} +{"epoch": 0, "iter": 20767, "iter_tflops": 43.41435447452621, "iter_time": 0.4752136421203613, "loss": 0.24519944190979004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.38387082175865, "step_time": 0.43540329551696777} +{"epoch": 0, "iter": 20768, "iter_tflops": 36.55396583080099, "iter_time": 0.5644009628295898, "loss": 0.16710279881954193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.00993929072305, "step_time": 0.5156492080688476} +{"epoch": 0, "iter": 20769, "iter_tflops": 16.394468310586102, "iter_time": 1.2584179687500001, "loss": 0.3844093978404999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.382136959226273, "step_time": 1.1869135284423828} +{"epoch": 0, "iter": 20770, "iter_tflops": 16.60162181304461, "iter_time": 1.2427155456542969, "loss": 0.42483246326446533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.359763607363387, "step_time": 1.0133267707824707} +{"epoch": 0, "iter": 20771, "iter_tflops": 38.23669996714714, "iter_time": 0.5395626068115236, "loss": 0.6349926590919495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56478542540082, "step_time": 0.49635991859436035} +{"epoch": 0, "iter": 20772, "iter_tflops": 34.10626219889915, "iter_time": 0.6049063186645507, "loss": 0.44793540239334106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.632902165330556, "step_time": 0.5631847953796386} +{"epoch": 0, "iter": 20773, "iter_tflops": 21.62978192245559, "iter_time": 0.9538280868530273, "loss": 0.07016413658857346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.53053575969792, "step_time": 0.8767795906066895} +{"epoch": 0, "iter": 20774, "iter_tflops": 19.93550217710035, "iter_time": 1.03489208984375, "loss": 0.04657716676592827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.577030034781366, "step_time": 0.7762753582000732} +{"epoch": 0, "iter": 20775, "iter_tflops": 50.441766505812055, "iter_time": 0.4090081481933594, "loss": 0.08742471039295197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.66505734072165, "step_time": 0.3774091625213623} +{"epoch": 0, "iter": 20776, "iter_tflops": 50.45479301383172, "iter_time": 0.40890254974365237, "loss": 0.046982552856206894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.543629922590156, "step_time": 0.37143941688537596} +{"epoch": 0, "iter": 20777, "iter_tflops": 24.827931948957385, "iter_time": 0.8309630279541016, "loss": 0.41199207305908203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.10406714531221, "step_time": 0.7903401947021484} +{"epoch": 0, "iter": 20778, "iter_tflops": 9.827617312192736, "iter_time": 2.099297607421875, "loss": 0.4962943196296692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.597646152702083, "step_time": 1.6376943168640137} +{"epoch": 0, "iter": 20779, "iter_tflops": 12.761012700226404, "iter_time": 1.616728546142578, "loss": 0.3352065980434418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.576281362738053, "step_time": 1.4153879852294922} +{"epoch": 0, "iter": 20780, "iter_tflops": 28.148187560994483, "iter_time": 0.7329457168579101, "loss": 0.508120059967041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.672895889021184, "step_time": 0.5950207786560059} +{"epoch": 0, "iter": 20781, "iter_tflops": 13.46658321211007, "iter_time": 1.158633575439453, "loss": 0.3805641531944275, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 14.206425508522212, "step_time": 1.0982942504882813} +{"epoch": 0, "iter": 20782, "iter_tflops": 7.0834415966287025, "iter_time": 2.2027195739746097, "loss": 0.42884770035743713, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 10.517601628857845, "step_time": 1.4834974746704102} +{"epoch": 0, "iter": 20783, "iter_tflops": 9.153224681832253, "iter_time": 1.7046271667480466, "loss": 0.44507476687431335, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 11.278861140276609, "step_time": 1.3833697624206545} +{"epoch": 0, "iter": 20784, "iter_tflops": 22.43042004852722, "iter_time": 0.6956104888916017, "loss": 0.307769775390625, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.12759943229073, "step_time": 0.6466799774169922} +{"epoch": 0, "iter": 20785, "iter_tflops": 16.621783794839295, "iter_time": 1.0968649749755859, "loss": 0.31910640001296997, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 17.77452790679052, "step_time": 1.0257292098999022} +{"epoch": 0, "iter": 20786, "iter_tflops": 25.359328685703602, "iter_time": 0.7189406585693361, "loss": 0.4071507453918457, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 31.010276088092994, "step_time": 0.5879293823242188} +{"epoch": 0, "iter": 20787, "iter_tflops": 33.98257702090272, "iter_time": 0.5365058822631836, "loss": 0.47619569301605225, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 36.19776382346067, "step_time": 0.5036734466552735} +{"epoch": 0, "iter": 20788, "iter_tflops": 29.426697464836025, "iter_time": 0.6195684204101563, "loss": 0.2260565608739853, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 31.279596089897233, "step_time": 0.5828672599792482} +{"epoch": 0, "iter": 20789, "iter_tflops": 28.985789281171726, "iter_time": 0.7117658004760743, "loss": 0.012559094466269016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.021162815131913, "step_time": 0.6650651245117188} +{"epoch": 0, "iter": 20790, "iter_tflops": 29.1639615217224, "iter_time": 0.7074173889160156, "loss": 0.026735862717032433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.92146607536815, "step_time": 0.626676025390625} +{"epoch": 0, "iter": 20791, "iter_tflops": 41.687184996170224, "iter_time": 0.49490253448486327, "loss": 0.022605858743190765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13052222788094, "step_time": 0.44723303604125975} +{"epoch": 0, "iter": 20792, "iter_tflops": 42.559396262426134, "iter_time": 0.4847600135803222, "loss": 0.00696158641949296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99103062354425, "step_time": 0.4390432224273682} +{"epoch": 0, "iter": 20793, "iter_tflops": 25.821156137838493, "iter_time": 0.7989996032714843, "loss": 0.46493497490882874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.579494027590805, "step_time": 0.7480591735839843} +{"epoch": 0, "iter": 20794, "iter_tflops": 13.750426528361471, "iter_time": 1.5003966217041016, "loss": 0.5589310526847839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.367160696753203, "step_time": 1.1879370422363282} +{"epoch": 0, "iter": 20795, "iter_tflops": 44.42118345842341, "iter_time": 0.4644426803588867, "loss": 0.4289470911026001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10957074199913, "step_time": 0.4288355350494385} +{"epoch": 0, "iter": 20796, "iter_tflops": 43.9247460658122, "iter_time": 0.4696918106079101, "loss": 0.6164453625679016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.11659342350884, "step_time": 0.43787319946289066} +{"epoch": 0, "iter": 20797, "iter_tflops": 15.36684084610104, "iter_time": 0.9940603561401367, "loss": 0.3830868601799011, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.99869634843312, "step_time": 0.9548007507324218} +{"epoch": 0, "iter": 20798, "iter_tflops": 10.634016764266331, "iter_time": 1.4364813995361327, "loss": 0.24211783707141876, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.248179491653387, "step_time": 1.1530314254760743} +{"epoch": 0, "iter": 20799, "iter_tflops": 27.68588461883783, "iter_time": 0.5517456817626953, "loss": 0.34628912806510925, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.50775866442316, "step_time": 0.5176796874999999} +{"epoch": 0, "iter": 20800, "iter_tflops": 26.702851367172137, "iter_time": 0.5720575332641601, "loss": 0.420489102602005, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.469765998357385, "step_time": 0.5365540161132814} +{"epoch": 0, "iter": 20801, "iter_tflops": 28.235616617635692, "iter_time": 0.7306762161254884, "loss": 0.0032803500071167946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.866922158496678, "step_time": 0.6907673110961915} +{"epoch": 0, "iter": 20802, "iter_tflops": 16.262004197260797, "iter_time": 1.2686685638427735, "loss": 0.01167983002960682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.408015228578268, "step_time": 0.9637088394165041} +{"epoch": 0, "iter": 20803, "iter_tflops": 42.83885949087903, "iter_time": 0.48159763717651366, "loss": 0.0045014433562755585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31246544583546, "step_time": 0.4360604190826416} +{"epoch": 0, "iter": 20804, "iter_tflops": 45.116065630807014, "iter_time": 0.4572892875671386, "loss": 0.002081655664369464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.78960410550106, "step_time": 0.41436548614501956} +{"epoch": 0, "iter": 20805, "iter_tflops": 19.12616389960838, "iter_time": 1.0786843414306642, "loss": 0.03240862488746643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.714740305383657, "step_time": 0.9959619674682617} +{"epoch": 0, "iter": 20806, "iter_tflops": 23.893893732679558, "iter_time": 0.8634462738037111, "loss": 0.036584630608558655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.486351683677462, "step_time": 0.6996828136444092} +{"epoch": 0, "iter": 20807, "iter_tflops": 46.006256884621514, "iter_time": 0.44844103622436526, "loss": 0.029709896072745323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.688072483618825, "step_time": 0.4070206756591797} +{"epoch": 0, "iter": 20808, "iter_tflops": 47.458802460941925, "iter_time": 0.43471584701538085, "loss": 0.04905583709478378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50167449324809, "step_time": 0.3929606761932373} +{"epoch": 0, "iter": 20809, "iter_tflops": 15.414137837450191, "iter_time": 0.9671479339599608, "loss": 0.0718829482793808, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 16.82394965195139, "step_time": 0.8861029586791991} +{"epoch": 0, "iter": 20810, "iter_tflops": 16.13954649712794, "iter_time": 0.923678466796875, "loss": 0.07633904367685318, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 19.99451854298854, "step_time": 0.7455919246673584} +{"epoch": 0, "iter": 20811, "iter_tflops": 32.823608603005205, "iter_time": 0.45417771530151363, "loss": 0.05136838182806969, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 35.698111471857786, "step_time": 0.4176061687469482} +{"epoch": 0, "iter": 20812, "iter_tflops": 37.79935120737294, "iter_time": 0.39439173126220706, "loss": 0.055695705115795135, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 41.373209301433356, "step_time": 0.3603237895965576} +{"epoch": 0, "iter": 20813, "iter_tflops": 33.41374801580871, "iter_time": 0.6174432601928712, "loss": 0.024050354957580566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.695133696489265, "step_time": 0.5779805641174317} +{"epoch": 0, "iter": 20814, "iter_tflops": 20.850000701345426, "iter_time": 0.9895008544921875, "loss": 0.03577946126461029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.866358570072748, "step_time": 0.8644424514770509} +{"epoch": 0, "iter": 20815, "iter_tflops": 40.978478462331545, "iter_time": 0.5034616775512696, "loss": 0.036405183374881744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.28779261916268, "step_time": 0.4555552902221679} +{"epoch": 0, "iter": 20816, "iter_tflops": 42.52275781172577, "iter_time": 0.48517769241333, "loss": 0.030595552176237106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17880808622433, "step_time": 0.43729577636718747} +{"epoch": 0, "iter": 20817, "iter_tflops": 17.752992067330915, "iter_time": 1.1621192321777343, "loss": 0.6040491461753845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.2427788135775, "step_time": 1.0721473083496094} +{"epoch": 0, "iter": 20818, "iter_tflops": 16.478791970199875, "iter_time": 1.251978515625, "loss": 0.4963664412498474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.447540375795445, "step_time": 1.060858757019043} +{"epoch": 0, "iter": 20819, "iter_tflops": 33.920038484962525, "iter_time": 0.6082273025512696, "loss": 0.35238322615623474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.72068976476062, "step_time": 0.5618383979797363} +{"epoch": 0, "iter": 20820, "iter_tflops": 36.2481579209748, "iter_time": 0.5691625366210937, "loss": 0.6398329138755798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.38630501452579, "step_time": 0.523813886642456} +{"epoch": 0, "iter": 20821, "iter_tflops": 23.177243312059282, "iter_time": 0.8525849838256836, "loss": 0.047013137489557266, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 24.803877314526343, "step_time": 0.796672607421875} +{"epoch": 0, "iter": 20822, "iter_tflops": 22.936136226103276, "iter_time": 0.86154744720459, "loss": 0.10762327164411545, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 28.092247217950245, "step_time": 0.7034171905517578} +{"epoch": 0, "iter": 20823, "iter_tflops": 47.71039138113461, "iter_time": 0.41417747879028327, "loss": 0.05354325473308563, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 52.15246092854639, "step_time": 0.37890004158020024} +{"epoch": 0, "iter": 20824, "iter_tflops": 51.72827953427341, "iter_time": 0.3820070915222168, "loss": 0.05853786692023277, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 56.24837473466389, "step_time": 0.35130916595458983} +{"epoch": 0, "iter": 20825, "iter_tflops": 39.31632985223359, "iter_time": 0.5247461700439453, "loss": 0.3634782135486603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28809930341944, "step_time": 0.48786996459960935} +{"epoch": 0, "iter": 20826, "iter_tflops": 28.501744059572694, "iter_time": 0.7238537216186522, "loss": 0.2958599030971527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.89561879310627, "step_time": 0.5747524127960205} +{"epoch": 0, "iter": 20827, "iter_tflops": 37.9771736672686, "iter_time": 0.5432498397827149, "loss": 0.3835020363330841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45831901083336, "step_time": 0.49763458824157714} +{"epoch": 0, "iter": 20828, "iter_tflops": 41.15049935592132, "iter_time": 0.5013570632934571, "loss": 0.35882043838500977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67352581564382, "step_time": 0.46181923484802245} +{"epoch": 0, "iter": 20829, "iter_tflops": 16.41639949828172, "iter_time": 1.0478907012939453, "loss": 0.04279008507728577, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 17.66462629553935, "step_time": 0.9738441162109375} +{"epoch": 0, "iter": 20830, "iter_tflops": 17.431506718944714, "iter_time": 0.9868677825927735, "loss": 0.05605364218354225, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 23.661591689800613, "step_time": 0.7270260009765626} +{"epoch": 0, "iter": 20831, "iter_tflops": 41.15685670540685, "iter_time": 0.41797634124755856, "loss": 0.04895542189478874, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 44.743365258214446, "step_time": 0.3844724750518799} +{"epoch": 0, "iter": 20832, "iter_tflops": 44.14449707367455, "iter_time": 0.3896882629394531, "loss": 0.050345875322818756, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 47.97580821409466, "step_time": 0.358568058013916} +{"epoch": 0, "iter": 20833, "iter_tflops": 29.858654493636543, "iter_time": 0.69095858001709, "loss": 0.4969642162322998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.794139017553448, "step_time": 0.6488961219787598} +{"epoch": 0, "iter": 20834, "iter_tflops": 14.387144099018581, "iter_time": 1.4339950561523436, "loss": 0.4768178462982178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.84887901052163, "step_time": 1.224478702545166} +{"epoch": 0, "iter": 20835, "iter_tflops": 37.97936106566973, "iter_time": 0.5432185516357422, "loss": 0.6053658127784729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55508598563312, "step_time": 0.49647577476501475} +{"epoch": 0, "iter": 20836, "iter_tflops": 36.833307742609726, "iter_time": 0.5601205749511718, "loss": 0.5069262981414795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84709792427437, "step_time": 0.5177564888000488} +{"epoch": 0, "iter": 20837, "iter_tflops": 28.72574484583562, "iter_time": 0.7182091751098633, "loss": 0.04329640790820122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.20933831925748, "step_time": 0.6405314292907714} +{"epoch": 0, "iter": 20838, "iter_tflops": 38.62195094536811, "iter_time": 0.5341805114746093, "loss": 0.041071999818086624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.603832483157426, "step_time": 0.48425440406799314} +{"epoch": 0, "iter": 20839, "iter_tflops": 40.678302609414814, "iter_time": 0.5071768531799317, "loss": 0.06127281114459038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70537600008153, "step_time": 0.461490213394165} +{"epoch": 0, "iter": 20840, "iter_tflops": 45.838759751241945, "iter_time": 0.45007966232299806, "loss": 0.04917507246136665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45656431395541, "step_time": 0.4088881950378418} +{"epoch": 0, "iter": 20841, "iter_tflops": 38.76956088151468, "iter_time": 0.5321466903686524, "loss": 0.0021227439865469933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5598761013582, "step_time": 0.4736260833740234} +{"epoch": 0, "iter": 20842, "iter_tflops": 39.953863214504736, "iter_time": 0.516372932434082, "loss": 0.007406636141240597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32187912561991, "step_time": 0.46548327636718745} +{"epoch": 0, "iter": 20843, "iter_tflops": 41.1725226318401, "iter_time": 0.5010888862609862, "loss": 0.0016980712534859776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79753114920036, "step_time": 0.45048484039306635} +{"epoch": 0, "iter": 20844, "iter_tflops": 44.08464061692696, "iter_time": 0.4679882431030274, "loss": 0.016541777178645134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61877497750359, "step_time": 0.42434416580200196} +{"epoch": 0, "iter": 20845, "iter_tflops": 20.310708325943313, "iter_time": 1.015774200439453, "loss": 0.16529981791973114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.63600343388667, "step_time": 0.9535538101196288} +{"epoch": 0, "iter": 20846, "iter_tflops": 24.053541897339812, "iter_time": 0.8577154083251952, "loss": 0.2442386895418167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.635656991538745, "step_time": 0.6961577911376953} +{"epoch": 0, "iter": 20847, "iter_tflops": 46.17008084500215, "iter_time": 0.44684984588623045, "loss": 0.21652401983737946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.998107173530244, "step_time": 0.4126374912261963} +{"epoch": 0, "iter": 20848, "iter_tflops": 49.32306269821093, "iter_time": 0.4182849235534668, "loss": 0.2294093668460846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94624528030417, "step_time": 0.3896611251831055} +{"epoch": 0, "iter": 20849, "iter_tflops": 34.347297618985316, "iter_time": 0.6006613311767579, "loss": 0.3569791913032532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.65711213756503, "step_time": 0.5628128433227539} +{"epoch": 0, "iter": 20850, "iter_tflops": 15.633104433725268, "iter_time": 1.3197054748535157, "loss": 0.4280064105987549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.694718398917292, "step_time": 1.1035787258148195} +{"epoch": 0, "iter": 20851, "iter_tflops": 35.573198038600495, "iter_time": 0.5799617309570312, "loss": 0.3165980875492096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.54318707099616, "step_time": 0.5352721214294434} +{"epoch": 0, "iter": 20852, "iter_tflops": 37.37521176170902, "iter_time": 0.5519993743896484, "loss": 0.5521293878555298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32230705812859, "step_time": 0.5116545906066895} +{"epoch": 0, "iter": 20853, "iter_tflops": 23.490733484745235, "iter_time": 0.8782651901245118, "loss": 0.11724410206079483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.069644266507343, "step_time": 0.8229511871337891} +{"epoch": 0, "iter": 20854, "iter_tflops": 6.956894985528694, "iter_time": 2.9655605773925786, "loss": 0.11904491484165192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.425769401893424, "step_time": 2.448570869445801} +{"epoch": 0, "iter": 20855, "iter_tflops": 14.865427229744776, "iter_time": 1.387857421875, "loss": 0.16567738354206085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.04974967712248, "step_time": 1.143012722015381} +{"epoch": 0, "iter": 20856, "iter_tflops": 41.52906047099677, "iter_time": 0.4967869071960449, "loss": 0.19744274020195007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72169065713982, "step_time": 0.45123207855224606} +{"epoch": 0, "iter": 20857, "iter_tflops": 12.068185097772687, "iter_time": 1.3982095794677734, "loss": 0.33515429496765137, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 12.987075357382798, "step_time": 1.2992803649902342} +{"epoch": 0, "iter": 20858, "iter_tflops": 12.70753116729526, "iter_time": 1.3278623352050782, "loss": 0.25009915232658386, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 17.88303666250689, "step_time": 0.9435674896240234} +{"epoch": 0, "iter": 20859, "iter_tflops": 25.68383348282329, "iter_time": 0.6569833908081055, "loss": 0.23797760903835297, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.753115721035954, "step_time": 0.6079984741210938} +{"epoch": 0, "iter": 20860, "iter_tflops": 25.509038528446546, "iter_time": 0.661485221862793, "loss": 0.25648435950279236, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.401841309368084, "step_time": 0.6157926330566407} +{"epoch": 0, "iter": 20861, "iter_tflops": 17.34853232562749, "iter_time": 1.1892126159667968, "loss": 0.4658740162849426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.640594531148423, "step_time": 1.1067830200195312} +{"epoch": 0, "iter": 20862, "iter_tflops": 20.62838134510708, "iter_time": 1.000131477355957, "loss": 0.5542107224464417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.779635076651743, "step_time": 0.8325826206207274} +{"epoch": 0, "iter": 20863, "iter_tflops": 43.2540915275421, "iter_time": 0.47697438049316404, "loss": 0.7133958339691162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50039144164942, "step_time": 0.4436756954193115} +{"epoch": 0, "iter": 20864, "iter_tflops": 48.681838148608634, "iter_time": 0.42379446411132815, "loss": 0.41161322593688965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39099670664887, "step_time": 0.39379081916809083} +{"epoch": 0, "iter": 20865, "iter_tflops": 25.96322584943704, "iter_time": 0.7946275100708008, "loss": 0.01608693040907383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.521175849785983, "step_time": 0.7496443328857421} +{"epoch": 0, "iter": 20866, "iter_tflops": 14.844412559774733, "iter_time": 1.3898221588134767, "loss": 0.007717664819210768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.587626882047967, "step_time": 1.1730458946228026} +{"epoch": 0, "iter": 20867, "iter_tflops": 51.739234342182094, "iter_time": 0.3987514266967774, "loss": 0.008329588919878006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.71356508315467, "step_time": 0.3637770519256592} +{"epoch": 0, "iter": 20868, "iter_tflops": 55.700066493440744, "iter_time": 0.3703962097167969, "loss": 0.005357824265956879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.06090571925442, "step_time": 0.33787729263305666} +{"epoch": 0, "iter": 20869, "iter_tflops": 31.34176331581213, "iter_time": 0.6582620544433594, "loss": 0.23671124875545502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.25830674089584, "step_time": 0.6203290405273438} +{"epoch": 0, "iter": 20870, "iter_tflops": 12.508446584849553, "iter_time": 1.6493729553222658, "loss": 0.36997270584106445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.724428872077038, "step_time": 1.2335903167724611} +{"epoch": 0, "iter": 20871, "iter_tflops": 37.05291920011656, "iter_time": 0.5568007583618164, "loss": 0.21003732085227966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54274431174986, "step_time": 0.5088726444244385} +{"epoch": 0, "iter": 20872, "iter_tflops": 41.212486281140926, "iter_time": 0.5006029815673828, "loss": 0.17955821752548218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.16195878820783, "step_time": 0.45682459449768065} +{"epoch": 0, "iter": 20873, "iter_tflops": 26.04534815156297, "iter_time": 0.7921220092773437, "loss": 0.31763991713523865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.040504044953707, "step_time": 0.7357604370117188} +{"epoch": 0, "iter": 20874, "iter_tflops": 8.34480498286464, "iter_time": 2.4723278198242187, "loss": 0.28206977248191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.779303258377126, "step_time": 2.109669059753418} +{"epoch": 0, "iter": 20875, "iter_tflops": 11.01722070852264, "iter_time": 1.872622329711914, "loss": 0.3121975064277649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.687120009519662, "step_time": 1.5073363494873047} +{"epoch": 0, "iter": 20876, "iter_tflops": 38.67525694061909, "iter_time": 0.5334442520141601, "loss": 0.2800033688545227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.504309573439514, "step_time": 0.48538827514648436} +{"epoch": 0, "iter": 20877, "iter_tflops": 9.481926466170432, "iter_time": 1.5937738494873048, "loss": 0.3557548224925995, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 10.095792973151296, "step_time": 1.49686572265625} +{"epoch": 0, "iter": 20878, "iter_tflops": 16.282846488311854, "iter_time": 0.92809610748291, "loss": 0.21688319742679596, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 21.791179195986707, "step_time": 0.6934937438964844} +{"epoch": 0, "iter": 20879, "iter_tflops": 26.204392776630293, "iter_time": 0.576698974609375, "loss": 0.28142207860946655, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.98142240229676, "step_time": 0.540074275970459} +{"epoch": 0, "iter": 20880, "iter_tflops": 27.52662670029849, "iter_time": 0.5489973983764649, "loss": 0.46188461780548096, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.293176558318613, "step_time": 0.5158896446228027} +{"epoch": 0, "iter": 20881, "iter_tflops": 32.21100784427325, "iter_time": 0.6404982299804687, "loss": 0.08765090256929398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.55002181048605, "step_time": 0.5971369171142578} +{"epoch": 0, "iter": 20882, "iter_tflops": 15.4763645859198, "iter_time": 1.333071044921875, "loss": 0.05775241181254387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61865600007152, "step_time": 1.1080871524810791} +{"epoch": 0, "iter": 20883, "iter_tflops": 37.83919911768189, "iter_time": 0.5452307128906251, "loss": 0.04396839439868927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.833576053539495, "step_time": 0.49317068862915037} +{"epoch": 0, "iter": 20884, "iter_tflops": 41.099321801611254, "iter_time": 0.5019813613891602, "loss": 0.059118662029504776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.142599922718624, "step_time": 0.45702049827575686} +{"epoch": 0, "iter": 20885, "iter_tflops": 18.178945298525647, "iter_time": 1.134889465332031, "loss": 0.020672185346484184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.434350348487943, "step_time": 1.0615787582397462} +{"epoch": 0, "iter": 20886, "iter_tflops": 36.26572997177428, "iter_time": 0.5688867568969727, "loss": 0.04288306459784508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.86173272502892, "step_time": 0.4402546024322509} +{"epoch": 0, "iter": 20887, "iter_tflops": 54.36726010279224, "iter_time": 0.3794764251708984, "loss": 0.025951329618692398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.21099354353055, "step_time": 0.3484334964752197} +{"epoch": 0, "iter": 20888, "iter_tflops": 52.02854218685806, "iter_time": 0.3965341453552246, "loss": 0.017711466178297997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.173156442830596, "step_time": 0.36727673530578614} +{"epoch": 0, "iter": 20889, "iter_tflops": 28.069758271818124, "iter_time": 0.7349936294555663, "loss": 0.4917997419834137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.539839657913728, "step_time": 0.6984158935546875} +{"epoch": 0, "iter": 20890, "iter_tflops": 12.059744860928463, "iter_time": 1.7107404632568362, "loss": 0.5644205808639526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.205003353458425, "step_time": 1.4523821640014647} +{"epoch": 0, "iter": 20891, "iter_tflops": 42.550553783833315, "iter_time": 0.4848607521057129, "loss": 0.5421968102455139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.921238855174884, "step_time": 0.4492712745666504} +{"epoch": 0, "iter": 20892, "iter_tflops": 45.710721269498784, "iter_time": 0.4513403625488281, "loss": 0.5767735838890076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.29884418972917, "step_time": 0.4184904098510742} +{"epoch": 0, "iter": 20893, "iter_tflops": 24.34420747124849, "iter_time": 0.6594308853149413, "loss": 0.20162098109722137, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 25.947622778387416, "step_time": 0.618681812286377} +{"epoch": 0, "iter": 20894, "iter_tflops": 12.556929286030073, "iter_time": 1.2784433135986328, "loss": 0.3335500657558441, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 15.068993233049463, "step_time": 1.0653214874267578} +{"epoch": 0, "iter": 20895, "iter_tflops": 22.862962726881605, "iter_time": 0.702154067993164, "loss": 0.3163725733757019, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 24.549258416759606, "step_time": 0.6539229011535644} +{"epoch": 0, "iter": 20896, "iter_tflops": 25.303792108875932, "iter_time": 0.6344235763549805, "loss": 0.5007486939430237, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.147335194136822, "step_time": 0.5913406295776367} +{"epoch": 0, "iter": 20897, "iter_tflops": 20.770963299451452, "iter_time": 0.9932660903930663, "loss": 0.49380481243133545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.639719317456834, "step_time": 0.911278678894043} +{"epoch": 0, "iter": 20898, "iter_tflops": 17.552899696488936, "iter_time": 1.175366683959961, "loss": 0.3708626925945282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.170546400105007, "step_time": 0.9745187072753907} +{"epoch": 0, "iter": 20899, "iter_tflops": 40.75068342957964, "iter_time": 0.5062760124206542, "loss": 0.6390202045440674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.690814222422226, "step_time": 0.4722066612243652} +{"epoch": 0, "iter": 20900, "iter_tflops": 42.319975240536294, "iter_time": 0.48750249481201174, "loss": 0.534793496131897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80043527442099, "step_time": 0.4504562759399414} +{"epoch": 0, "iter": 20901, "iter_tflops": 40.58647753479828, "iter_time": 0.508324317932129, "loss": 0.03830699995160103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.19990306604279, "step_time": 0.46676784515380854} +{"epoch": 0, "iter": 20902, "iter_tflops": 12.104187160939697, "iter_time": 1.704459228515625, "loss": 0.061446238309144974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.769342606965479, "step_time": 1.4983354034423826} +{"epoch": 0, "iter": 20903, "iter_tflops": 14.91251904618726, "iter_time": 1.3834747467041015, "loss": 0.04531760886311531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.645384857503526, "step_time": 1.2394482727050782} +{"epoch": 0, "iter": 20904, "iter_tflops": 41.07154925310052, "iter_time": 0.5023208007812501, "loss": 0.062447477132081985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37769679082716, "step_time": 0.45465272521972655} +{"epoch": 0, "iter": 20905, "iter_tflops": 20.033986580935355, "iter_time": 0.7747329788208008, "loss": 0.4308985471725464, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 21.878262954334627, "step_time": 0.7094251556396485} +{"epoch": 0, "iter": 20906, "iter_tflops": 23.30591099947702, "iter_time": 0.6659679641723633, "loss": 0.27916404604911804, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.087687958100865, "step_time": 0.6186696090698243} +{"epoch": 0, "iter": 20907, "iter_tflops": 22.552832608750183, "iter_time": 0.688205795288086, "loss": 0.3632597029209137, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.315354851227312, "step_time": 0.6383205261230469} +{"epoch": 0, "iter": 20908, "iter_tflops": 23.74426416139399, "iter_time": 0.6536732406616211, "loss": 0.36021438241004944, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.517283836835965, "step_time": 0.6082540054321289} +{"epoch": 0, "iter": 20909, "iter_tflops": 18.42502864202668, "iter_time": 1.1197319641113281, "loss": 0.09009405225515366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.73698671955807, "step_time": 1.0453010787963866} +{"epoch": 0, "iter": 20910, "iter_tflops": 25.23998455225873, "iter_time": 0.8173972320556642, "loss": 0.052318960428237915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.32328531379327, "step_time": 0.6586503715515136} +{"epoch": 0, "iter": 20911, "iter_tflops": 52.89374524570487, "iter_time": 0.39004788589477535, "loss": 0.07260852307081223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.887818515418516, "step_time": 0.35639784049987794} +{"epoch": 0, "iter": 20912, "iter_tflops": 57.80571746145603, "iter_time": 0.3569040298461914, "loss": 0.0897214412689209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.027050724267916, "step_time": 0.3273371238708496} +{"epoch": 0, "iter": 20913, "iter_tflops": 31.10803121463562, "iter_time": 0.6632079467773437, "loss": 0.387826144695282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.13251565753797, "step_time": 0.6226841850280762} +{"epoch": 0, "iter": 20914, "iter_tflops": 11.993925364823445, "iter_time": 1.7201285552978516, "loss": 0.19765307009220123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.998845567454056, "step_time": 1.3755120964050291} +{"epoch": 0, "iter": 20915, "iter_tflops": 40.48985550658836, "iter_time": 0.5095373458862305, "loss": 0.17807865142822266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70377818585395, "step_time": 0.47206658935546875} +{"epoch": 0, "iter": 20916, "iter_tflops": 43.91258803821787, "iter_time": 0.46982185363769535, "loss": 0.2654019892215729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28337666906079, "step_time": 0.43632868385314943} +{"epoch": 0, "iter": 20917, "iter_tflops": 26.34498978983646, "iter_time": 0.7831126022338868, "loss": 0.40908825397491455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.80598566621387, "step_time": 0.7419659118652343} +{"epoch": 0, "iter": 20918, "iter_tflops": 14.980708918459918, "iter_time": 1.3771773834228518, "loss": 0.4759666323661804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.022394347133176, "step_time": 1.1447476463317872} +{"epoch": 0, "iter": 20919, "iter_tflops": 35.65138494024097, "iter_time": 0.5786898193359374, "loss": 0.6795387268066406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80349344342809, "step_time": 0.5316813430786133} +{"epoch": 0, "iter": 20920, "iter_tflops": 38.25489769143537, "iter_time": 0.5393059387207031, "loss": 0.3264322876930237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34261574334401, "step_time": 0.49902729034423826} +{"epoch": 0, "iter": 20921, "iter_tflops": 17.212932308626435, "iter_time": 1.198580993652344, "loss": 0.06637520343065262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.513147716155185, "step_time": 1.1144022521972659} +{"epoch": 0, "iter": 20922, "iter_tflops": 18.59622072810354, "iter_time": 1.1094239959716798, "loss": 0.06435813754796982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.195337589858998, "step_time": 0.9733788585662843} +{"epoch": 0, "iter": 20923, "iter_tflops": 39.9251183466915, "iter_time": 0.5167447052001953, "loss": 0.059691086411476135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.998805684134666, "step_time": 0.4689012165069581} +{"epoch": 0, "iter": 20924, "iter_tflops": 44.48667907891056, "iter_time": 0.46375890350341803, "loss": 0.06818515062332153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86216849091309, "step_time": 0.42223041152954105} +{"epoch": 0, "iter": 20925, "iter_tflops": 27.655845335186722, "iter_time": 0.7459939575195312, "loss": 0.4770018458366394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.817617676557756, "step_time": 0.6919095191955567} +{"epoch": 0, "iter": 20926, "iter_tflops": 9.94746651189747, "iter_time": 2.074004821777344, "loss": 0.4777938425540924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.036734343270748, "step_time": 1.8693114166259766} +{"epoch": 0, "iter": 20927, "iter_tflops": 13.558752526380832, "iter_time": 1.5216070556640626, "loss": 0.5051136016845703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.741286098797092, "step_time": 1.31063582611084} +{"epoch": 0, "iter": 20928, "iter_tflops": 25.205268633729187, "iter_time": 0.8185230560302734, "loss": 0.45419785380363464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39614126945387, "step_time": 0.5107194118499756} +{"epoch": 0, "iter": 20929, "iter_tflops": 14.724394431305473, "iter_time": 1.0568794860839843, "loss": 0.3291199505329132, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 15.55758351401915, "step_time": 1.0002781219482422} +{"epoch": 0, "iter": 20930, "iter_tflops": 7.032750347897951, "iter_time": 2.2127773132324218, "loss": 0.31949469447135925, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 9.086143054086866, "step_time": 1.7127080574035647} +{"epoch": 0, "iter": 20931, "iter_tflops": 9.087707457531828, "iter_time": 1.7124132232666016, "loss": 0.2363557368516922, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 10.693836973534316, "step_time": 1.4552223358154295} +{"epoch": 0, "iter": 20932, "iter_tflops": 22.34493095779893, "iter_time": 0.696440299987793, "loss": 0.34944355487823486, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 23.965896286569123, "step_time": 0.6493356323242188} +{"epoch": 0, "iter": 20933, "iter_tflops": 15.370578754854439, "iter_time": 0.9858392333984374, "loss": 0.1506327986717224, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 16.313544129668504, "step_time": 0.9288551559448242} +{"epoch": 0, "iter": 20934, "iter_tflops": 8.862268432706877, "iter_time": 1.7098240356445313, "loss": 0.18669791519641876, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 10.632620683895992, "step_time": 1.425134971618652} +{"epoch": 0, "iter": 20935, "iter_tflops": 7.016574091592332, "iter_time": 2.1595894775390625, "loss": 0.2870004177093506, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 8.449754758524215, "step_time": 1.7932969665527345} +{"epoch": 0, "iter": 20936, "iter_tflops": 14.520407556951703, "iter_time": 1.0435602111816404, "loss": 0.3796755075454712, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 17.924356317615157, "step_time": 0.845381519317627} +{"epoch": 0, "iter": 20937, "iter_tflops": 14.393426535486698, "iter_time": 1.1694769592285155, "loss": 0.43150120973587036, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 15.67722534910597, "step_time": 1.0737091751098633} +{"epoch": 0, "iter": 20938, "iter_tflops": 10.724466099718775, "iter_time": 1.5695681762695313, "loss": 0.3928626775741577, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 14.549143534247145, "step_time": 1.1569602470397948} +{"epoch": 0, "iter": 20939, "iter_tflops": 29.545837205720126, "iter_time": 0.5697175064086915, "loss": 0.34970811009407043, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 31.507118043724507, "step_time": 0.5342532653808594} +{"epoch": 0, "iter": 20940, "iter_tflops": 31.505573936115173, "iter_time": 0.5342794494628906, "loss": 0.2361629158258438, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 33.382807447227854, "step_time": 0.5042350234985351} +{"epoch": 0, "iter": 20941, "iter_tflops": 28.26617570059543, "iter_time": 0.7298862686157227, "loss": 0.09418431669473648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.952425110043592, "step_time": 0.6887954292297364} +{"epoch": 0, "iter": 20942, "iter_tflops": 12.993992930517862, "iter_time": 1.5877408599853515, "loss": 0.11971593648195267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.39316237045279, "step_time": 1.2585182189941406} +{"epoch": 0, "iter": 20943, "iter_tflops": 51.58173752674981, "iter_time": 0.39996895217895506, "loss": 0.056797854602336884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.01608409454737, "step_time": 0.36830660057067877} +{"epoch": 0, "iter": 20944, "iter_tflops": 49.633371295768974, "iter_time": 0.41566979980468743, "loss": 0.14151982963085175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84720535563054, "step_time": 0.3831413974761963} +{"epoch": 0, "iter": 20945, "iter_tflops": 20.595451211337476, "iter_time": 1.0017305908203125, "loss": 0.39137712121009827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.44483086320082, "step_time": 0.9620543823242187} +{"epoch": 0, "iter": 20946, "iter_tflops": 15.527157987023454, "iter_time": 1.328710220336914, "loss": 0.4425883889198303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.275354676366273, "step_time": 1.0175453815460205} +{"epoch": 0, "iter": 20947, "iter_tflops": 46.962765654226835, "iter_time": 0.4393074645996094, "loss": 0.39395445585250854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.815222187002995, "step_time": 0.4060022296905517} +{"epoch": 0, "iter": 20948, "iter_tflops": 47.321727099007916, "iter_time": 0.4359750747680664, "loss": 0.4569113254547119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.9928728406545, "step_time": 0.4045877857208252} +{"epoch": 0, "iter": 20949, "iter_tflops": 26.507397275021237, "iter_time": 0.464141975402832, "loss": 0.017596716061234474, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 29.363367971680095, "step_time": 0.4189981117248535} +{"epoch": 0, "iter": 20950, "iter_tflops": 4.52654236057442, "iter_time": 2.7180118408203127, "loss": 0.003371282946318388, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 5.582170482160232, "step_time": 2.2040164794921875} +{"epoch": 0, "iter": 20951, "iter_tflops": 8.649491899536464, "iter_time": 1.4224183197021485, "loss": 0.0016247967723757029, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 9.490474214505404, "step_time": 1.2963731269836427} +{"epoch": 0, "iter": 20952, "iter_tflops": 20.30184209749079, "iter_time": 0.6060137634277344, "loss": 0.003848363645374775, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 30.84707945165299, "step_time": 0.3988447513580322} +{"epoch": 0, "iter": 20953, "iter_tflops": 19.337971612407433, "iter_time": 0.7983858871459962, "loss": 0.4026240110397339, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 20.500248489748774, "step_time": 0.7531208038330077} +{"epoch": 0, "iter": 20954, "iter_tflops": 16.95881823277597, "iter_time": 0.9103914794921875, "loss": 0.37870317697525024, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 20.090351830693226, "step_time": 0.7684864730834962} +{"epoch": 0, "iter": 20955, "iter_tflops": 28.234461346770814, "iter_time": 0.5468198394775391, "loss": 0.40008482336997986, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 30.093369060079187, "step_time": 0.5130420455932617} +{"epoch": 0, "iter": 20956, "iter_tflops": 27.38257524711557, "iter_time": 0.563831687927246, "loss": 0.309479683637619, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.188671874458244, "step_time": 0.5289436836242676} +{"epoch": 0, "iter": 20957, "iter_tflops": 14.98848966369049, "iter_time": 0.9864390335083006, "loss": 0.03096805326640606, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 15.652804852753162, "step_time": 0.9445739212036133} +{"epoch": 0, "iter": 20958, "iter_tflops": 11.48642052867274, "iter_time": 1.2871922302246095, "loss": 0.05428382381796837, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 13.310447953185058, "step_time": 1.1107989234924316} +{"epoch": 0, "iter": 20959, "iter_tflops": 37.836389866066774, "iter_time": 0.3907674942016601, "loss": 0.029719924554228783, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 41.76554347527598, "step_time": 0.3540054798126221} +{"epoch": 0, "iter": 20960, "iter_tflops": 38.468886673010765, "iter_time": 0.38434258270263677, "loss": 0.015481623820960522, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 42.01802882400532, "step_time": 0.35187826919555665} +{"epoch": 0, "iter": 20961, "iter_tflops": 32.925634908300616, "iter_time": 0.6265966796875, "loss": 0.07774985581636429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.97628731439423, "step_time": 0.589859447479248} +{"epoch": 0, "iter": 20962, "iter_tflops": 8.57826301738656, "iter_time": 2.405043243408203, "loss": 0.09631448239088058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.612747238997569, "step_time": 1.9439917907714843} +{"epoch": 0, "iter": 20963, "iter_tflops": 15.918295340571577, "iter_time": 1.296061737060547, "loss": 0.1531345248222351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.66520840326287, "step_time": 1.105323501586914} +{"epoch": 0, "iter": 20964, "iter_tflops": 21.50520549365313, "iter_time": 0.9593534698486328, "loss": 0.13036832213401794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.98521631798207, "step_time": 0.793955041885376} +{"epoch": 0, "iter": 20965, "iter_tflops": 11.934084634843902, "iter_time": 1.3314337310791016, "loss": 0.3390974998474121, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 12.63164722420584, "step_time": 1.2579074249267577} +{"epoch": 0, "iter": 20966, "iter_tflops": 11.383712439817259, "iter_time": 1.3958050079345703, "loss": 0.25402510166168213, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.344368802555955, "step_time": 1.10771293258667} +{"epoch": 0, "iter": 20967, "iter_tflops": 24.452972661262955, "iter_time": 0.6497959594726563, "loss": 0.3074745535850525, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 26.307830232792462, "step_time": 0.603981502532959} +{"epoch": 0, "iter": 20968, "iter_tflops": 23.054411553353596, "iter_time": 0.6892148513793945, "loss": 0.27251702547073364, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 24.86845679391685, "step_time": 0.638939640045166} +{"epoch": 0, "iter": 20969, "iter_tflops": 19.376919886853422, "iter_time": 1.0647251281738281, "loss": 0.2767481207847595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.557652351100902, "step_time": 1.0035724487304687} +{"epoch": 0, "iter": 20970, "iter_tflops": 7.413920627981873, "iter_time": 2.7827507934570312, "loss": 0.23431146144866943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.281677021304576, "step_time": 2.491173400878906} +{"epoch": 0, "iter": 20971, "iter_tflops": 17.685181720481175, "iter_time": 1.1665751495361327, "loss": 0.3998364806175232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.470002122611486, "step_time": 0.960926477432251} +{"epoch": 0, "iter": 20972, "iter_tflops": 42.09747365081402, "iter_time": 0.49007913589477536, "loss": 0.27485620975494385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98067919138599, "step_time": 0.44869049072265627} +{"epoch": 0, "iter": 20973, "iter_tflops": 17.41678309559683, "iter_time": 0.8512517242431639, "loss": 0.4313395321369171, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.695712115140854, "step_time": 0.7930196266174315} +{"epoch": 0, "iter": 20974, "iter_tflops": 5.817400849619279, "iter_time": 2.548572296142578, "loss": 0.5580878257751465, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 6.7766572653319, "step_time": 2.1878141479492186} +{"epoch": 0, "iter": 20975, "iter_tflops": 8.79872304903099, "iter_time": 1.6850248107910157, "loss": 0.330781489610672, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 12.213316706919523, "step_time": 1.2139263229370119} +{"epoch": 0, "iter": 20976, "iter_tflops": 21.11271681928777, "iter_time": 0.7022339553833008, "loss": 0.37397637963294983, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.98350025778919, "step_time": 0.593434326171875} +{"epoch": 0, "iter": 20977, "iter_tflops": 18.800955290124886, "iter_time": 0.7712116470336914, "loss": 0.22983136773109436, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 19.862151401965303, "step_time": 0.7300073089599609} +{"epoch": 0, "iter": 20978, "iter_tflops": 5.837019055839429, "iter_time": 2.484061737060547, "loss": 0.1751922369003296, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 7.246279166948807, "step_time": 2.00096012878418} +{"epoch": 0, "iter": 20979, "iter_tflops": 8.419164674695391, "iter_time": 1.722203598022461, "loss": 0.3256317377090454, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 9.454779496181759, "step_time": 1.5335646591186525} +{"epoch": 0, "iter": 20980, "iter_tflops": 21.64803283532406, "iter_time": 0.6697844467163085, "loss": 0.23356975615024567, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.303626896791783, "step_time": 0.6221999588012695} +{"epoch": 0, "iter": 20981, "iter_tflops": 13.254227433589737, "iter_time": 1.1247544708251953, "loss": 0.3915999233722687, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.30855065649266, "step_time": 1.0418771209716797} +{"epoch": 0, "iter": 20982, "iter_tflops": 11.996698401902554, "iter_time": 1.242654525756836, "loss": 0.3999343812465668, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 14.300070151980176, "step_time": 1.0424949951171874} +{"epoch": 0, "iter": 20983, "iter_tflops": 23.37827051374391, "iter_time": 0.6376755523681641, "loss": 0.20639266073703766, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 25.16765259728623, "step_time": 0.5923377838134765} +{"epoch": 0, "iter": 20984, "iter_tflops": 22.54574736482539, "iter_time": 0.6612223281860351, "loss": 0.2700310945510864, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.240602042495055, "step_time": 0.6149909782409667} +{"epoch": 0, "iter": 20985, "iter_tflops": 21.03933841078005, "iter_time": 0.9805961151123047, "loss": 0.24165722727775574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.854332620064163, "step_time": 0.9027213287353515} +{"epoch": 0, "iter": 20986, "iter_tflops": 16.658382863620346, "iter_time": 1.238481170654297, "loss": 0.23181283473968506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.570331637980125, "step_time": 0.9564569454193117} +{"epoch": 0, "iter": 20987, "iter_tflops": 36.41102174186186, "iter_time": 0.566616714477539, "loss": 0.24402181804180145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.507208903794485, "step_time": 0.522210859298706} +{"epoch": 0, "iter": 20988, "iter_tflops": 37.00414416019987, "iter_time": 0.5575346755981445, "loss": 0.32631799578666687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.6427599720517, "step_time": 0.5076203861236573} +{"epoch": 0, "iter": 20989, "iter_tflops": 35.99858725234096, "iter_time": 0.5731084213256836, "loss": 0.04688483849167824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14612835805109, "step_time": 0.5138999538421631} +{"epoch": 0, "iter": 20990, "iter_tflops": 40.34173062763015, "iter_time": 0.5114082412719727, "loss": 0.07416751980781555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.60948508220483, "step_time": 0.46248221588134764} +{"epoch": 0, "iter": 20991, "iter_tflops": 42.78723402534847, "iter_time": 0.48217871475219726, "loss": 0.05984155461192131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8826180373971, "step_time": 0.44005847740173337} +{"epoch": 0, "iter": 20992, "iter_tflops": 47.45387627223691, "iter_time": 0.43476097488403315, "loss": 0.05155060067772865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.11757004066235, "step_time": 0.3958567810058593} +{"epoch": 0, "iter": 20993, "iter_tflops": 28.459115208162746, "iter_time": 0.7249379806518554, "loss": 0.21740669012069702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.008153972887772, "step_time": 0.6653441390991212} +{"epoch": 0, "iter": 20994, "iter_tflops": 9.476488678333777, "iter_time": 2.177082061767578, "loss": 0.1394590139389038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.07120674644658, "step_time": 1.8634909439086913} +{"epoch": 0, "iter": 20995, "iter_tflops": 30.838292271507285, "iter_time": 0.6690089492797852, "loss": 0.23127871751785278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.26930740533662, "step_time": 0.6020283184051515} +{"epoch": 0, "iter": 20996, "iter_tflops": 42.94194736403677, "iter_time": 0.4804414978027344, "loss": 0.17293354868888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92786696538843, "step_time": 0.4396341629028321} +{"epoch": 0, "iter": 20997, "iter_tflops": 12.014034929699344, "iter_time": 1.2816890869140627, "loss": 0.19307832419872284, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 12.810255911210218, "step_time": 1.202025749206543} +{"epoch": 0, "iter": 20998, "iter_tflops": 15.536716089927715, "iter_time": 0.9910882949829102, "loss": 0.21486184000968933, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 20.201609189877264, "step_time": 0.7622292518615722} +{"epoch": 0, "iter": 20999, "iter_tflops": 29.30157397412301, "iter_time": 0.5255095672607422, "loss": 0.4057566225528717, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 31.197478878118048, "step_time": 0.49357377624511717} +{"epoch": 0, "iter": 21000, "iter_tflops": 3.1130095629415266, "iter_time": 4.946421508789062, "loss": 0.3181251883506775, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 3.133337201011343, "step_time": 4.914331420898438} +{"epoch": 0, "iter": 21001, "iter_tflops": 29.254412595120304, "iter_time": 0.7052301406860351, "loss": 0.012776047922670841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.32558255502367, "step_time": 0.6586020698547363} +{"epoch": 0, "iter": 21002, "iter_tflops": 18.500409814498955, "iter_time": 1.1151695404052735, "loss": 0.015134860761463642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.03899425753337, "step_time": 0.9361177406311036} +{"epoch": 0, "iter": 21003, "iter_tflops": 23.34164708626802, "iter_time": 0.8838747940063476, "loss": 0.021149208769202232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.854747320857815, "step_time": 0.740667049407959} +{"epoch": 0, "iter": 21004, "iter_tflops": 23.105028789206525, "iter_time": 0.8929265441894532, "loss": 0.028979478403925896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.31457351335883, "step_time": 0.7553145027160644} +{"epoch": 0, "iter": 21005, "iter_tflops": 8.240541229175967, "iter_time": 2.5036090393066406, "loss": 0.12824216485023499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.914188298742925, "step_time": 2.314410781860351} +{"epoch": 0, "iter": 21006, "iter_tflops": 28.111141026020924, "iter_time": 0.733911636352539, "loss": 0.1965414136648178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.90612882466457, "step_time": 0.608476821899414} +{"epoch": 0, "iter": 21007, "iter_tflops": 25.774058877549905, "iter_time": 0.8004596252441405, "loss": 0.17182831466197968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.73952911087964, "step_time": 0.7437434654235838} +{"epoch": 0, "iter": 21008, "iter_tflops": 21.88968935732734, "iter_time": 0.9425027999877928, "loss": 0.14167477190494537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.848784143320913, "step_time": 0.7981456069946289} +{"epoch": 0, "iter": 21009, "iter_tflops": 17.182345188698896, "iter_time": 1.2007146453857422, "loss": 0.1508590131998062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.100572283287185, "step_time": 1.0801296005249024} +{"epoch": 0, "iter": 21010, "iter_tflops": 30.334276432732807, "iter_time": 0.6801247940063477, "loss": 0.1352948099374771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.18522435190661, "step_time": 0.5701524276733398} +{"epoch": 0, "iter": 21011, "iter_tflops": 35.956209563011235, "iter_time": 0.5737838821411133, "loss": 0.09664464741945267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80520496224337, "step_time": 0.5055995559692383} +{"epoch": 0, "iter": 21012, "iter_tflops": 38.28103776919919, "iter_time": 0.5389376754760742, "loss": 0.15676100552082062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93216116540096, "step_time": 0.5040313758850097} +{"epoch": 0, "iter": 21013, "iter_tflops": 21.15834158696211, "iter_time": 0.9750808410644531, "loss": 0.41741034388542175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.113282170760957, "step_time": 0.9329729232788087} +{"epoch": 0, "iter": 21014, "iter_tflops": 30.98891169228051, "iter_time": 0.6657572784423829, "loss": 0.5040056705474854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.57713223627397, "step_time": 0.5640434951782226} +{"epoch": 0, "iter": 21015, "iter_tflops": 37.59905324579176, "iter_time": 0.5487131118774414, "loss": 0.3826178014278412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65196234171328, "step_time": 0.49532104492187495} +{"epoch": 0, "iter": 21016, "iter_tflops": 34.979230600415285, "iter_time": 0.589809814453125, "loss": 0.4223123788833618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.37975302613913, "step_time": 0.5519323120117188} +{"epoch": 0, "iter": 21017, "iter_tflops": 10.971537592674617, "iter_time": 1.8804195251464846, "loss": 0.09122084826231003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.71279879338222, "step_time": 1.761414489746094} +{"epoch": 0, "iter": 21018, "iter_tflops": 37.61026767594974, "iter_time": 0.5485494995117188, "loss": 0.11915989965200424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.28321863039576, "step_time": 0.5121510696411132} +{"epoch": 0, "iter": 21019, "iter_tflops": 39.56525267612861, "iter_time": 0.5214447555541992, "loss": 0.06503842771053314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.385127075295124, "step_time": 0.4867531356811523} +{"epoch": 0, "iter": 21020, "iter_tflops": 39.50444535612022, "iter_time": 0.5222473907470704, "loss": 0.06672977656126022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.286837233661366, "step_time": 0.4878845252990723} +{"epoch": 0, "iter": 21021, "iter_tflops": 4.149362977631003, "iter_time": 2.128943359375, "loss": 0.018598735332489014, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 4.2727084573530165, "step_time": 2.067484558105469} +{"epoch": 0, "iter": 21022, "iter_tflops": 12.167295334017576, "iter_time": 0.7260248489379884, "loss": 0.03598664328455925, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 14.974857273870366, "step_time": 0.5899060401916504} +{"epoch": 0, "iter": 21023, "iter_tflops": 17.36001185392502, "iter_time": 0.5088567237854004, "loss": 0.016035646200180054, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 19.335311369572178, "step_time": 0.45687181282043454} +{"epoch": 0, "iter": 21024, "iter_tflops": 14.927008995056744, "iter_time": 0.5917969741821288, "loss": 0.055987849831581116, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 16.044634866547927, "step_time": 0.5505739974975586} +{"epoch": 0, "iter": 21025, "iter_tflops": 9.084244844038164, "iter_time": 2.2710851440429685, "loss": 0.22135323286056519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.513907501725456, "step_time": 2.168519454956055} +{"epoch": 0, "iter": 21026, "iter_tflops": 31.09894673521445, "iter_time": 0.6634016799926759, "loss": 0.2553507685661316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.50922978153737, "step_time": 0.5978427696228027} +{"epoch": 0, "iter": 21027, "iter_tflops": 29.79423712522223, "iter_time": 0.6924524841308594, "loss": 0.3145044147968292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.279948500462545, "step_time": 0.6018414382934569} +{"epoch": 0, "iter": 21028, "iter_tflops": 42.89159408368332, "iter_time": 0.48100551986694334, "loss": 0.2334994673728943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.47852599827947, "step_time": 0.41697065734863276} +{"epoch": 0, "iter": 21029, "iter_tflops": 10.33800671194538, "iter_time": 1.9956548767089843, "loss": 0.19069267809391022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.547693381200599, "step_time": 1.9559815368652345} +{"epoch": 0, "iter": 21030, "iter_tflops": 20.606753951634012, "iter_time": 1.0011811447143555, "loss": 0.17568475008010864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.89576002099094, "step_time": 0.8633788375854492} +{"epoch": 0, "iter": 21031, "iter_tflops": 20.360358312555693, "iter_time": 1.0132971725463868, "loss": 0.2729993760585785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.083608769429762, "step_time": 0.8937551193237305} +{"epoch": 0, "iter": 21032, "iter_tflops": 30.861040621629005, "iter_time": 0.6685158081054686, "loss": 0.210188090801239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.29123892477215, "step_time": 0.568486888885498} +{"epoch": 0, "iter": 21033, "iter_tflops": 8.593718164149845, "iter_time": 2.4007179565429686, "loss": 0.26088038086891174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.972449237951613, "step_time": 2.299382583618164} +{"epoch": 0, "iter": 21034, "iter_tflops": 25.035101833527467, "iter_time": 0.8240866622924805, "loss": 0.16888189315795898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.46159756925573, "step_time": 0.7002706985473632} +{"epoch": 0, "iter": 21035, "iter_tflops": 27.933473071301144, "iter_time": 0.7385796051025391, "loss": 0.11016170680522919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.779641247492496, "step_time": 0.6491921463012695} +{"epoch": 0, "iter": 21036, "iter_tflops": 41.10742542925602, "iter_time": 0.5018824043273926, "loss": 0.18684130907058716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61186962678258, "step_time": 0.4333182811737061} +{"epoch": 0, "iter": 21037, "iter_tflops": 10.073213768676139, "iter_time": 2.0481143341064456, "loss": 0.41987577080726624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.293362843003697, "step_time": 2.004310333251953} +{"epoch": 0, "iter": 21038, "iter_tflops": 18.886167640811166, "iter_time": 1.0923917388916018, "loss": 0.4251525402069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.158390933738026, "step_time": 0.9310736312866212} +{"epoch": 0, "iter": 21039, "iter_tflops": 22.127757804051658, "iter_time": 0.9323625869750976, "loss": 0.4388262927532196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.315549916283935, "step_time": 0.8484732437133788} +{"epoch": 0, "iter": 21040, "iter_tflops": 28.846846481298172, "iter_time": 0.7151940689086914, "loss": 0.5038574934005737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.44108179205073, "step_time": 0.5990257110595703} +{"epoch": 0, "iter": 21041, "iter_tflops": 8.80448563964434, "iter_time": 2.343248016357422, "loss": 0.14742712676525116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.074952759694606, "step_time": 2.2734105682373045} +{"epoch": 0, "iter": 21042, "iter_tflops": 22.187226330688155, "iter_time": 0.9298635711669921, "loss": 0.0982922613620758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.615783191200872, "step_time": 0.8054055328369142} +{"epoch": 0, "iter": 21043, "iter_tflops": 24.16929850825454, "iter_time": 0.8536074600219726, "loss": 0.11427100002765656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.3816469521595, "step_time": 0.7534643020629883} +{"epoch": 0, "iter": 21044, "iter_tflops": 28.381096001238486, "iter_time": 0.7269308242797852, "loss": 0.1623329222202301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.65561966026995, "step_time": 0.5786210899353028} +{"epoch": 0, "iter": 21045, "iter_tflops": 20.956282827170476, "iter_time": 0.9844824905395508, "loss": 0.13771632313728333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.474776938974017, "step_time": 0.917966552734375} +{"epoch": 0, "iter": 21046, "iter_tflops": 26.141107552112327, "iter_time": 0.7892203292846679, "loss": 0.09607511758804321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.253397568792536, "step_time": 0.7302163734436035} +{"epoch": 0, "iter": 21047, "iter_tflops": 26.237535510882132, "iter_time": 0.7863197937011719, "loss": 0.10668052732944489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.758576575278454, "step_time": 0.7173892440795898} +{"epoch": 0, "iter": 21048, "iter_tflops": 22.493414998838553, "iter_time": 0.9172059249877929, "loss": 0.15052494406700134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.503642974240652, "step_time": 0.7784248199462891} +{"epoch": 0, "iter": 21049, "iter_tflops": 13.876174482334099, "iter_time": 1.4867998046875, "loss": 0.051743220537900925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.942382332264295, "step_time": 1.294103546142578} +{"epoch": 0, "iter": 21050, "iter_tflops": 20.460241316515024, "iter_time": 1.0083504486083983, "loss": 0.06036955863237381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.073318543428528, "step_time": 0.8570107803344725} +{"epoch": 0, "iter": 21051, "iter_tflops": 26.25567676328754, "iter_time": 0.7857764892578125, "loss": 0.0455472469329834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.442955125097168, "step_time": 0.6776968078613281} +{"epoch": 0, "iter": 21052, "iter_tflops": 29.97353913632364, "iter_time": 0.6883102264404297, "loss": 0.09604428708553314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.35248204455666, "step_time": 0.5835826034545897} +{"epoch": 0, "iter": 21053, "iter_tflops": 4.483285225335784, "iter_time": 4.6017802734375, "loss": 0.2654898464679718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.564761810664514, "step_time": 4.519642944335938} +{"epoch": 0, "iter": 21054, "iter_tflops": 19.64691129381098, "iter_time": 1.0500934829711914, "loss": 0.3547094762325287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.513206517824116, "step_time": 0.8416317749023436} +{"epoch": 0, "iter": 21055, "iter_tflops": 21.430059908334616, "iter_time": 0.9627174911499023, "loss": 0.31018850207328796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.537691307694235, "step_time": 0.8765130462646483} +{"epoch": 0, "iter": 21056, "iter_tflops": 20.42285076675635, "iter_time": 1.0101965560913087, "loss": 0.31228572130203247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.432023750373016, "step_time": 0.8804657135009767} +{"epoch": 0, "iter": 21057, "iter_tflops": 5.854361442401712, "iter_time": 2.7911315612792964, "loss": 0.39119893312454224, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 6.750429021783109, "step_time": 2.4206302947998046} +{"epoch": 0, "iter": 21058, "iter_tflops": 26.788756700102518, "iter_time": 0.6099683227539063, "loss": 0.33338913321495056, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 28.774452133183516, "step_time": 0.5678750343322754} +{"epoch": 0, "iter": 21059, "iter_tflops": 30.848209585544062, "iter_time": 0.5296998825073242, "loss": 0.3348865807056427, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 32.845045765065876, "step_time": 0.49749642944335937} +{"epoch": 0, "iter": 21060, "iter_tflops": 29.42836664358949, "iter_time": 0.5552565383911132, "loss": 0.27758917212486267, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.3339756779797, "step_time": 0.5214880218505858} +{"epoch": 0, "iter": 21061, "iter_tflops": 28.84635905119007, "iter_time": 0.7152061538696289, "loss": 0.5486629009246826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.579366205548965, "step_time": 0.6746736793518064} +{"epoch": 0, "iter": 21062, "iter_tflops": 14.774027187737273, "iter_time": 1.3964434509277344, "loss": 0.5345903635025024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.265547066686924, "step_time": 1.1295086555480955} +{"epoch": 0, "iter": 21063, "iter_tflops": 39.83424635504662, "iter_time": 0.5179235305786134, "loss": 0.49093911051750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98354936121714, "step_time": 0.42996180534362793} +{"epoch": 0, "iter": 21064, "iter_tflops": 45.50745650883606, "iter_time": 0.45335633087158206, "loss": 0.5601078271865845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.124947836742244, "step_time": 0.4199718151092529} +{"epoch": 0, "iter": 21065, "iter_tflops": 20.960092539591308, "iter_time": 0.9843035507202148, "loss": 0.13175299763679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.967050939378282, "step_time": 0.9391835784912108} +{"epoch": 0, "iter": 21066, "iter_tflops": 17.921642327236793, "iter_time": 1.1511831970214845, "loss": 0.09195508062839508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.68635949481756, "step_time": 0.9513396434783935} +{"epoch": 0, "iter": 21067, "iter_tflops": 43.8120200468344, "iter_time": 0.47090030288696294, "loss": 0.07240710407495499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99056835802884, "step_time": 0.4298989200592041} +{"epoch": 0, "iter": 21068, "iter_tflops": 42.90038083453233, "iter_time": 0.4809070014953613, "loss": 0.13650399446487427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.025609555523, "step_time": 0.4387203845977783} +{"epoch": 0, "iter": 21069, "iter_tflops": 31.756536739535566, "iter_time": 0.6496644668579101, "loss": 0.03254547342658043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.38632086482568, "step_time": 0.5830245418548584} +{"epoch": 0, "iter": 21070, "iter_tflops": 40.56518642519489, "iter_time": 0.5085911178588868, "loss": 0.05451719090342522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.854663339740846, "step_time": 0.44992356300354003} +{"epoch": 0, "iter": 21071, "iter_tflops": 42.628665492433484, "iter_time": 0.48397230529785157, "loss": 0.054235491901636124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90447017403248, "step_time": 0.43985346031188965} +{"epoch": 0, "iter": 21072, "iter_tflops": 43.44516188850299, "iter_time": 0.47487666320800775, "loss": 0.03777674585580826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.85728444141915, "step_time": 0.43109620094299317} +{"epoch": 0, "iter": 21073, "iter_tflops": 17.151187103673028, "iter_time": 1.2028959503173828, "loss": 0.4925057590007782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.25511997923518, "step_time": 1.1301538162231446} +{"epoch": 0, "iter": 21074, "iter_tflops": 23.14323150365525, "iter_time": 0.8914525833129884, "loss": 0.4365135729312897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.05457473712617, "step_time": 0.7353914184570313} +{"epoch": 0, "iter": 21075, "iter_tflops": 40.47800928141305, "iter_time": 0.5096864662170411, "loss": 0.5109463334083557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.990990031307426, "step_time": 0.46898452377319333} +{"epoch": 0, "iter": 21076, "iter_tflops": 46.132199662869084, "iter_time": 0.44721677398681636, "loss": 0.4794993996620178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.785648008399285, "step_time": 0.41439841270446776} +{"epoch": 0, "iter": 21077, "iter_tflops": 32.37624390468681, "iter_time": 0.6372293701171875, "loss": 0.5340174436569214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.394161864468764, "step_time": 0.5998428916931152} +{"epoch": 0, "iter": 21078, "iter_tflops": 9.369677159558883, "iter_time": 2.201900146484375, "loss": 0.5145992636680603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.198511754443023, "step_time": 1.8423067245483398} +{"epoch": 0, "iter": 21079, "iter_tflops": 10.699562368321184, "iter_time": 1.9282184448242188, "loss": 0.6475458145141602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.798812796899274, "step_time": 1.6119536895751954} +{"epoch": 0, "iter": 21080, "iter_tflops": 26.365727304575906, "iter_time": 0.7824966583251954, "loss": 0.6305057406425476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.79461545765834, "step_time": 0.6488863983154297} +{"epoch": 0, "iter": 21081, "iter_tflops": 17.91991252735853, "iter_time": 0.888977867126465, "loss": 0.28281155228614807, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.874306544149746, "step_time": 0.844026008605957} +{"epoch": 0, "iter": 21082, "iter_tflops": 11.988604680548178, "iter_time": 1.328795639038086, "loss": 0.2723601162433624, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 14.585843397834244, "step_time": 1.0921826858520507} +{"epoch": 0, "iter": 21083, "iter_tflops": 26.176475832904284, "iter_time": 0.6085771713256836, "loss": 0.39542099833488464, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.79717372743907, "step_time": 0.5730944366455079} +{"epoch": 0, "iter": 21084, "iter_tflops": 29.466728149610248, "iter_time": 0.5406234970092774, "loss": 0.19064782559871674, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 31.264939257553404, "step_time": 0.5095293960571289} +{"epoch": 0, "iter": 21085, "iter_tflops": 33.23550029141541, "iter_time": 0.6207547149658202, "loss": 0.4861603081226349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.408528287306524, "step_time": 0.5826588821411133} +{"epoch": 0, "iter": 21086, "iter_tflops": 12.823949637470081, "iter_time": 1.6087940216064454, "loss": 0.5376695394515991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.729143298393199, "step_time": 1.3116476287841796} +{"epoch": 0, "iter": 21087, "iter_tflops": 31.15519050340441, "iter_time": 0.6622040557861328, "loss": 0.6368705034255981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.90962273576772, "step_time": 0.5909858627319335} +{"epoch": 0, "iter": 21088, "iter_tflops": 38.342294022106486, "iter_time": 0.53807666015625, "loss": 0.4480355381965637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05298066132597, "step_time": 0.49059765052795407} +{"epoch": 0, "iter": 21089, "iter_tflops": 22.97174096771781, "iter_time": 0.8981075286865234, "loss": 0.11581265926361084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.584251773945102, "step_time": 0.8391995697021485} +{"epoch": 0, "iter": 21090, "iter_tflops": 33.2921853663331, "iter_time": 0.619697784423828, "loss": 0.12664757668972015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.95594925287366, "step_time": 0.5582617664337158} +{"epoch": 0, "iter": 21091, "iter_tflops": 52.615681377385165, "iter_time": 0.3921092147827148, "loss": 0.16430634260177612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.29158321225025, "step_time": 0.36010688400268553} +{"epoch": 0, "iter": 21092, "iter_tflops": 47.9323299129684, "iter_time": 0.43042125320434577, "loss": 0.12462779879570007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.115720629425084, "step_time": 0.39587082862854006} +{"epoch": 0, "iter": 21093, "iter_tflops": 36.82204588113886, "iter_time": 0.5602918853759766, "loss": 0.02429218590259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.764899542819386, "step_time": 0.5188267478942872} +{"epoch": 0, "iter": 21094, "iter_tflops": 23.7835512195518, "iter_time": 0.8674521865844727, "loss": 0.021388117223978043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.04739989532803, "step_time": 0.7102561187744141} +{"epoch": 0, "iter": 21095, "iter_tflops": 55.02796093874055, "iter_time": 0.3749201889038086, "loss": 0.040164101868867874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.20476944210163, "step_time": 0.3426820449829102} +{"epoch": 0, "iter": 21096, "iter_tflops": 56.9660537121201, "iter_time": 0.3621646957397461, "loss": 0.018918147310614586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.840389215332756, "step_time": 0.3336184291839599} +{"epoch": 0, "iter": 21097, "iter_tflops": 42.5833675782557, "iter_time": 0.4844871292114258, "loss": 0.010501893237233162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57298511451982, "step_time": 0.4429841346740722} +{"epoch": 0, "iter": 21098, "iter_tflops": 9.749983585948595, "iter_time": 2.116013153076172, "loss": 0.0048229629173874855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.070781277492516, "step_time": 1.863562561035156} +{"epoch": 0, "iter": 21099, "iter_tflops": 12.002741438204792, "iter_time": 1.7188651123046876, "loss": 0.011619949713349342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.16439206691548, "step_time": 1.3604959182739256} +{"epoch": 0, "iter": 21100, "iter_tflops": 44.48552569801526, "iter_time": 0.4637709274291992, "loss": 0.005249973386526108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.235132781906934, "step_time": 0.41903194618225104} +{"epoch": 0, "iter": 21101, "iter_tflops": 12.261131860785381, "iter_time": 1.2892391662597655, "loss": 0.3065234124660492, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.004112059396219, "step_time": 1.2155794525146484} +{"epoch": 0, "iter": 21102, "iter_tflops": 13.70363965727372, "iter_time": 1.1535279541015626, "loss": 0.3108811676502228, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 16.430138486456475, "step_time": 0.9621057929992676} +{"epoch": 0, "iter": 21103, "iter_tflops": 27.914424012671805, "iter_time": 0.5662854232788086, "loss": 0.22756120562553406, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 29.68593054745353, "step_time": 0.5324923667907715} +{"epoch": 0, "iter": 21104, "iter_tflops": 28.365684434506957, "iter_time": 0.5572765731811524, "loss": 0.2820407450199127, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.21134159300385, "step_time": 0.5232316932678223} +{"epoch": 0, "iter": 21105, "iter_tflops": 43.57786400768761, "iter_time": 0.43163029479980475, "loss": 0.04058609530329704, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 48.44723651734303, "step_time": 0.3882476615905762} +{"epoch": 0, "iter": 21106, "iter_tflops": 48.03262236364393, "iter_time": 0.3915989875793457, "loss": 0.01904595084488392, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 52.82741263412196, "step_time": 0.3560561714172363} +{"epoch": 0, "iter": 21107, "iter_tflops": 46.29892425654895, "iter_time": 0.4062627067565918, "loss": 0.04486136883497238, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 50.912444643023605, "step_time": 0.3694484996795655} +{"epoch": 0, "iter": 21108, "iter_tflops": 49.45654677729862, "iter_time": 0.3803242950439453, "loss": 0.018457407131791115, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 53.982416595141274, "step_time": 0.3484380187988281} +{"epoch": 0, "iter": 21109, "iter_tflops": 28.921798810656497, "iter_time": 0.7133406066894531, "loss": 0.3326603174209595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.78157520716478, "step_time": 0.6702416419982911} +{"epoch": 0, "iter": 21110, "iter_tflops": 12.536048474803806, "iter_time": 1.6457413635253906, "loss": 0.32406410574913025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.843504960765292, "step_time": 1.3899071388244628} +{"epoch": 0, "iter": 21111, "iter_tflops": 47.18971485414937, "iter_time": 0.43719470596313476, "loss": 0.25720420479774475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81541662034666, "step_time": 0.3981651573181152} +{"epoch": 0, "iter": 21112, "iter_tflops": 46.04152547908101, "iter_time": 0.44809752273559567, "loss": 0.2712664008140564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.4005084658459, "step_time": 0.4176291732788086} +{"epoch": 0, "iter": 21113, "iter_tflops": 19.003096669626412, "iter_time": 0.7780432586669922, "loss": 0.17734543979167938, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 20.08646376326339, "step_time": 0.7360793533325195} +{"epoch": 0, "iter": 21114, "iter_tflops": 10.559196910740864, "iter_time": 1.4002230834960938, "loss": 0.3493999242782593, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 12.355941789676375, "step_time": 1.1966090087890624} +{"epoch": 0, "iter": 21115, "iter_tflops": 21.42288185686542, "iter_time": 0.6901607055664063, "loss": 0.24088144302368164, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.001199570186582, "step_time": 0.6428026161193847} +{"epoch": 0, "iter": 21116, "iter_tflops": 21.45063110026371, "iter_time": 0.6892678909301757, "loss": 0.1314292550086975, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.112706646050142, "step_time": 0.6397014198303222} +{"epoch": 0, "iter": 21117, "iter_tflops": 31.232923036367385, "iter_time": 0.6605559616088867, "loss": 0.08951801061630249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.96269550669131, "step_time": 0.6074633712768553} +{"epoch": 0, "iter": 21118, "iter_tflops": 8.71108686762556, "iter_time": 2.3683719177246094, "loss": 0.06301698833703995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.06213431160277, "step_time": 1.865019256591797} +{"epoch": 0, "iter": 21119, "iter_tflops": 11.558115116095587, "iter_time": 1.7849877166748047, "loss": 0.06069884076714516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.439006772161676, "step_time": 1.535165050506592} +{"epoch": 0, "iter": 21120, "iter_tflops": 48.51398513522197, "iter_time": 0.4252607460021973, "loss": 0.08886626362800598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.20744650911176, "step_time": 0.38774823570251465} +{"epoch": 0, "iter": 21121, "iter_tflops": 21.737054837734643, "iter_time": 0.7762713088989258, "loss": 0.29675406217575073, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 22.93156007592999, "step_time": 0.7358353271484377} +{"epoch": 0, "iter": 21122, "iter_tflops": 10.261891040369196, "iter_time": 1.6443218841552736, "loss": 0.2761891782283783, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 12.016373807409085, "step_time": 1.4042382736206056} +{"epoch": 0, "iter": 21123, "iter_tflops": 25.124063281265357, "iter_time": 0.6716211395263672, "loss": 0.28194352984428406, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.074926847891557, "step_time": 0.6232279815673828} +{"epoch": 0, "iter": 21124, "iter_tflops": 25.2313125290029, "iter_time": 0.6687663192749024, "loss": 0.4831561744213104, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.004867788369687, "step_time": 0.6248448295593263} +{"epoch": 0, "iter": 21125, "iter_tflops": 19.360391180106646, "iter_time": 1.0656341247558596, "loss": 0.21978875994682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.609282784414074, "step_time": 1.0010582962036132} +{"epoch": 0, "iter": 21126, "iter_tflops": 16.557224499492325, "iter_time": 1.246047821044922, "loss": 0.20016570389270782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.250164122846467, "step_time": 0.8170676994323731} +{"epoch": 0, "iter": 21127, "iter_tflops": 50.85656047497005, "iter_time": 0.4056722145080567, "loss": 0.1685217022895813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.451143784443985, "step_time": 0.3720589351654052} +{"epoch": 0, "iter": 21128, "iter_tflops": 45.26979820860134, "iter_time": 0.45573637008666995, "loss": 0.20736922323703766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.232756227445655, "step_time": 0.419052173614502} +{"epoch": 0, "iter": 21129, "iter_tflops": 11.167976429822245, "iter_time": 1.0907599487304687, "loss": 0.026494165882468224, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 11.647939125419354, "step_time": 1.045814308166504} +{"epoch": 0, "iter": 21130, "iter_tflops": 11.30780387813848, "iter_time": 1.0772720794677735, "loss": 0.043900974094867706, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 14.72648942554058, "step_time": 0.8271884117126465} +{"epoch": 0, "iter": 21131, "iter_tflops": 28.785411193687853, "iter_time": 0.42318594360351564, "loss": 0.039228878915309906, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 31.73563583474895, "step_time": 0.3838455123901367} +{"epoch": 0, "iter": 21132, "iter_tflops": 25.224540677015362, "iter_time": 0.48292579650878914, "loss": 0.02195168100297451, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 27.890392094528288, "step_time": 0.43676622962951656} +{"epoch": 0, "iter": 21133, "iter_tflops": 19.19540814121196, "iter_time": 1.0747931671142577, "loss": 0.06864053010940552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.778075184138693, "step_time": 0.9929261169433594} +{"epoch": 0, "iter": 21134, "iter_tflops": 15.793276833095545, "iter_time": 1.3063212738037109, "loss": 0.04190618544816971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.92074180451273, "step_time": 0.9861549701690673} +{"epoch": 0, "iter": 21135, "iter_tflops": 42.3962056268331, "iter_time": 0.4866259422302246, "loss": 0.024555735290050507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90119862016403, "step_time": 0.4398841419219971} +{"epoch": 0, "iter": 21136, "iter_tflops": 42.257961819040304, "iter_time": 0.488217903137207, "loss": 0.0388878732919693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.2377132518148, "step_time": 0.4461962337493896} +{"epoch": 0, "iter": 21137, "iter_tflops": 36.271528834114164, "iter_time": 0.5687958068847656, "loss": 0.04436279833316803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19366305698161, "step_time": 0.513292194366455} +{"epoch": 0, "iter": 21138, "iter_tflops": 40.76802156745405, "iter_time": 0.5060606994628907, "loss": 0.07174143195152283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50806765019462, "step_time": 0.4533502426147461} +{"epoch": 0, "iter": 21139, "iter_tflops": 40.62944857807523, "iter_time": 0.5077866973876952, "loss": 0.07114949077367783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.733241073596915, "step_time": 0.4612027435302734} +{"epoch": 0, "iter": 21140, "iter_tflops": 43.17556395887005, "iter_time": 0.4778418998718262, "loss": 0.08744834363460541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73197570005334, "step_time": 0.4322279396057129} +{"epoch": 0, "iter": 21141, "iter_tflops": 36.84767819237637, "iter_time": 0.5599021301269531, "loss": 0.22161385416984558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.6782938895803, "step_time": 0.5071769618988037} +{"epoch": 0, "iter": 21142, "iter_tflops": 38.61935521860108, "iter_time": 0.5342164154052734, "loss": 0.32311904430389404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29808725613195, "step_time": 0.48775476264953616} +{"epoch": 0, "iter": 21143, "iter_tflops": 43.78772716868534, "iter_time": 0.47116155242919916, "loss": 0.23635685443878174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.918713942891216, "step_time": 0.4305435562133789} +{"epoch": 0, "iter": 21144, "iter_tflops": 39.43819976818869, "iter_time": 0.5231246261596679, "loss": 0.17394807934761047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.77855192605626, "step_time": 0.48227657508850097} +{"epoch": 0, "iter": 21145, "iter_tflops": 16.19086705328553, "iter_time": 1.2742426605224608, "loss": 0.5624610185623169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.50013914552104, "step_time": 1.178910255432129} +{"epoch": 0, "iter": 21146, "iter_tflops": 19.36394358687351, "iter_time": 1.0654386291503906, "loss": 0.46329930424690247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.73125851895852, "step_time": 0.8693636493682861} +{"epoch": 0, "iter": 21147, "iter_tflops": 48.13534181263562, "iter_time": 0.4286059417724609, "loss": 0.5352903008460999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.305712927155795, "step_time": 0.3944328899383545} +{"epoch": 0, "iter": 21148, "iter_tflops": 47.38958700806586, "iter_time": 0.43535077667236327, "loss": 0.5584248900413513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.50300138671589, "step_time": 0.4005804119110108} +{"epoch": 0, "iter": 21149, "iter_tflops": 39.22210093359796, "iter_time": 0.5260068435668945, "loss": 0.11616408079862595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48148191097443, "step_time": 0.48564910125732424} +{"epoch": 0, "iter": 21150, "iter_tflops": 35.84550207516702, "iter_time": 0.5755559921264649, "loss": 0.18630509078502655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.974105767629126, "step_time": 0.5293538646697997} +{"epoch": 0, "iter": 21151, "iter_tflops": 42.19033742670154, "iter_time": 0.4890004386901856, "loss": 0.20476175844669342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.712027342126405, "step_time": 0.4513274669647217} +{"epoch": 0, "iter": 21152, "iter_tflops": 43.21110470067596, "iter_time": 0.47744887924194335, "loss": 0.22667652368545532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20861366911263, "step_time": 0.4370196857452393} +{"epoch": 0, "iter": 21153, "iter_tflops": 20.49637758443639, "iter_time": 1.00657266998291, "loss": 0.34212836623191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.09522787150078, "step_time": 0.9337352676391601} +{"epoch": 0, "iter": 21154, "iter_tflops": 45.180146281380416, "iter_time": 0.45664069747924807, "loss": 0.4537941813468933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02628104190354, "step_time": 0.42081702041625973} +{"epoch": 0, "iter": 21155, "iter_tflops": 48.76050529438943, "iter_time": 0.4231107406616211, "loss": 0.4752679169178009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.92628509768697, "step_time": 0.38980807876586915} +{"epoch": 0, "iter": 21156, "iter_tflops": 47.802630852934776, "iter_time": 0.43158908081054687, "loss": 0.47514280676841736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.82269626273438, "step_time": 0.3981092262268066} +{"epoch": 0, "iter": 21157, "iter_tflops": 37.954515509799045, "iter_time": 0.5435741500854491, "loss": 0.4266081750392914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.088030909781345, "step_time": 0.5021193046569824} +{"epoch": 0, "iter": 21158, "iter_tflops": 43.283469086217785, "iter_time": 0.4766506462097168, "loss": 0.45766985416412354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58785403101497, "step_time": 0.44284275245666505} +{"epoch": 0, "iter": 21159, "iter_tflops": 46.18560405524058, "iter_time": 0.44669965744018547, "loss": 0.4241563081741333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63788089656402, "step_time": 0.41563203620910644} +{"epoch": 0, "iter": 21160, "iter_tflops": 43.5547891412202, "iter_time": 0.47368140029907224, "loss": 0.3609839975833893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54062147379587, "step_time": 0.44329217910766605} +{"epoch": 0, "iter": 21161, "iter_tflops": 19.394643552259012, "iter_time": 1.0637521362304687, "loss": 0.1802324652671814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.198416497946926, "step_time": 1.0214213333129882} +{"epoch": 0, "iter": 21162, "iter_tflops": 15.710188938570584, "iter_time": 1.3132301330566405, "loss": 0.12820588052272797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.921776390995692, "step_time": 1.0356051139831544} +{"epoch": 0, "iter": 21163, "iter_tflops": 43.92304303727424, "iter_time": 0.4697100219726562, "loss": 0.07369042187929153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58899090596506, "step_time": 0.43352660179138186} +{"epoch": 0, "iter": 21164, "iter_tflops": 49.700181830817584, "iter_time": 0.415111026763916, "loss": 0.117889903485775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59466111490263, "step_time": 0.38494680404663084} +{"epoch": 0, "iter": 21165, "iter_tflops": 23.12866955082607, "iter_time": 0.8920138473510744, "loss": 0.2569568455219269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.25324078420257, "step_time": 0.8506530609130859} +{"epoch": 0, "iter": 21166, "iter_tflops": 17.760600657174564, "iter_time": 1.1616213836669922, "loss": 0.2419125735759735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.694450723199687, "step_time": 0.9509848289489745} +{"epoch": 0, "iter": 21167, "iter_tflops": 42.718600514638815, "iter_time": 0.4829534034729004, "loss": 0.36267411708831787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56369548235835, "step_time": 0.4430725116729736} +{"epoch": 0, "iter": 21168, "iter_tflops": 38.35322625045413, "iter_time": 0.5379232864379884, "loss": 0.262742817401886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.614387572715515, "step_time": 0.49576828384399413} +{"epoch": 0, "iter": 21169, "iter_tflops": 15.901696591238533, "iter_time": 1.297414611816406, "loss": 0.22050057351589203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.000537815166513, "step_time": 1.2135553436279296} +{"epoch": 0, "iter": 21170, "iter_tflops": 17.977450769184287, "iter_time": 1.1476095123291015, "loss": 0.20138207077980042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.61423501888033, "step_time": 0.9545141658782959} +{"epoch": 0, "iter": 21171, "iter_tflops": 47.801299969894295, "iter_time": 0.43160109710693356, "loss": 0.26809749007225037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.78055117260882, "step_time": 0.39843325424194337} +{"epoch": 0, "iter": 21172, "iter_tflops": 52.197321601855215, "iter_time": 0.3952519569396973, "loss": 0.23886488378047943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.539713505194115, "step_time": 0.3648956146240235} +{"epoch": 0, "iter": 21173, "iter_tflops": 37.548319811454, "iter_time": 0.5494545059204102, "loss": 0.44213148951530457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2654164397144, "step_time": 0.5123775024414063} +{"epoch": 0, "iter": 21174, "iter_tflops": 11.104337295745959, "iter_time": 1.8579310913085938, "loss": 0.3191172182559967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.097215384678229, "step_time": 1.3665495910644534} +{"epoch": 0, "iter": 21175, "iter_tflops": 13.502592623081693, "iter_time": 1.5279357147216799, "loss": 0.3699723482131958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.948828782747563, "step_time": 1.2935804748535156} +{"epoch": 0, "iter": 21176, "iter_tflops": 14.1581071876586, "iter_time": 1.4571929168701172, "loss": 0.33865949511528015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.772460871702183, "step_time": 1.230057632446289} +{"epoch": 0, "iter": 21177, "iter_tflops": 17.263873703694962, "iter_time": 0.86588916015625, "loss": 0.40640905499458313, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.732755663827305, "step_time": 0.7979926376342773} +{"epoch": 0, "iter": 21178, "iter_tflops": 21.844102110348143, "iter_time": 0.6843312225341797, "loss": 0.25619807839393616, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.558198433481962, "step_time": 0.6345392303466797} +{"epoch": 0, "iter": 21179, "iter_tflops": 22.86255654178651, "iter_time": 0.653846435546875, "loss": 0.19934265315532684, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.537675492925203, "step_time": 0.6092101554870606} +{"epoch": 0, "iter": 21180, "iter_tflops": 22.838176728116416, "iter_time": 0.6545444183349609, "loss": 0.25863227248191833, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.547280548999392, "step_time": 0.6089717788696288} +{"epoch": 0, "iter": 21181, "iter_tflops": 21.16487838944536, "iter_time": 0.9747796859741211, "loss": 0.035100314766168594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.83896997730566, "step_time": 0.9033285446166992} +{"epoch": 0, "iter": 21182, "iter_tflops": 14.97617299203705, "iter_time": 1.3775944976806642, "loss": 0.028489621356129646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.323143906130333, "step_time": 1.1259581661224365} +{"epoch": 0, "iter": 21183, "iter_tflops": 41.130914658491236, "iter_time": 0.5015957870483398, "loss": 0.040449678897857666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45401952472754, "step_time": 0.45388930892944335} +{"epoch": 0, "iter": 21184, "iter_tflops": 40.50881248751076, "iter_time": 0.5092988967895508, "loss": 0.01960245706140995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87054989683359, "step_time": 0.4597914123535156} +{"epoch": 0, "iter": 21185, "iter_tflops": 19.755353427237218, "iter_time": 1.0443292541503908, "loss": 0.10743596404790878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.306141657007274, "step_time": 0.9683167343139647} +{"epoch": 0, "iter": 21186, "iter_tflops": 16.565350064246974, "iter_time": 1.245436614990234, "loss": 0.15185001492500305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.737110571960073, "step_time": 1.0452945194244385} +{"epoch": 0, "iter": 21187, "iter_tflops": 49.00489811025432, "iter_time": 0.4210006408691407, "loss": 0.12157627940177917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38043818497353, "step_time": 0.3864916477203369} +{"epoch": 0, "iter": 21188, "iter_tflops": 43.539166999892075, "iter_time": 0.4738513603210449, "loss": 0.09764978289604187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1465324971178, "step_time": 0.4375951404571533} +{"epoch": 0, "iter": 21189, "iter_tflops": 33.359339778474606, "iter_time": 0.618450294494629, "loss": 0.2937186062335968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.590453967987195, "step_time": 0.5796805381774902} +{"epoch": 0, "iter": 21190, "iter_tflops": 15.969318034988186, "iter_time": 1.2919207611083983, "loss": 0.2907255291938782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.360899723604856, "step_time": 1.1883654556274414} +{"epoch": 0, "iter": 21191, "iter_tflops": 35.831628110218396, "iter_time": 0.5757788467407227, "loss": 0.32728928327560425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.99553565982532, "step_time": 0.5290629596710206} +{"epoch": 0, "iter": 21192, "iter_tflops": 37.87212078393428, "iter_time": 0.5447567520141602, "loss": 0.3353826105594635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46431559385797, "step_time": 0.4975626201629638} +{"epoch": 0, "iter": 21193, "iter_tflops": 20.787360309000086, "iter_time": 0.9924826049804688, "loss": 0.1613365262746811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.374473248177686, "step_time": 0.9220817527770997} +{"epoch": 0, "iter": 21194, "iter_tflops": 45.804396461987324, "iter_time": 0.45041732025146486, "loss": 0.12983936071395874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.832747388699744, "step_time": 0.4140067443847656} +{"epoch": 0, "iter": 21195, "iter_tflops": 48.63763726902843, "iter_time": 0.42417959976196284, "loss": 0.13521689176559448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.874899566910194, "step_time": 0.3901869068145752} +{"epoch": 0, "iter": 21196, "iter_tflops": 48.29573545037213, "iter_time": 0.42718251037597654, "loss": 0.16020360589027405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4003712197599, "step_time": 0.39372036933898924} +{"epoch": 0, "iter": 21197, "iter_tflops": 43.991898198661, "iter_time": 0.4689748420715332, "loss": 0.6519480347633362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90042901026043, "step_time": 0.43070790672302245} +{"epoch": 0, "iter": 21198, "iter_tflops": 35.95038012054257, "iter_time": 0.573876922607422, "loss": 0.7023768424987793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48679478681062, "step_time": 0.5224808349609376} +{"epoch": 0, "iter": 21199, "iter_tflops": 34.505475196875686, "iter_time": 0.5979078216552735, "loss": 0.4076865017414093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.600297772455555, "step_time": 0.5486949501037597} +{"epoch": 0, "iter": 21200, "iter_tflops": 38.1743581587283, "iter_time": 0.5404437561035156, "loss": 0.49079784750938416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.456669189998024, "step_time": 0.49765439224243163} +{"epoch": 0, "iter": 21201, "iter_tflops": 21.675860493479846, "iter_time": 0.951800437927246, "loss": 0.18068814277648926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.5141411068678, "step_time": 0.8773909034729005} +{"epoch": 0, "iter": 21202, "iter_tflops": 25.3689227352484, "iter_time": 0.8132427902221682, "loss": 0.2250976860523224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.246815409533856, "step_time": 0.6602622776031494} +{"epoch": 0, "iter": 21203, "iter_tflops": 50.573884779413504, "iter_time": 0.4079396629333497, "loss": 0.3117144703865051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.00624260440648, "step_time": 0.37506822013854985} +{"epoch": 0, "iter": 21204, "iter_tflops": 46.830289993001855, "iter_time": 0.44055019760131836, "loss": 0.2718278765678406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60531229739797, "step_time": 0.40768631935119626} +{"epoch": 0, "iter": 21205, "iter_tflops": 26.2385148932976, "iter_time": 0.7862904434204102, "loss": 0.22655540704727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.717458541452043, "step_time": 0.7443356857299804} +{"epoch": 0, "iter": 21206, "iter_tflops": 45.48627812274067, "iter_time": 0.45356741333007816, "loss": 0.36049985885620117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38527358718957, "step_time": 0.4094667358398437} +{"epoch": 0, "iter": 21207, "iter_tflops": 51.923640984676666, "iter_time": 0.39733526229858396, "loss": 0.25744765996932983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.33413012898708, "step_time": 0.3662272491455078} +{"epoch": 0, "iter": 21208, "iter_tflops": 53.19581386813934, "iter_time": 0.3878330268859863, "loss": 0.2116089165210724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.49753402225353, "step_time": 0.3588170146942139} +{"epoch": 0, "iter": 21209, "iter_tflops": 23.702387848807376, "iter_time": 0.8704225769042968, "loss": 0.4665879011154175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.87817162720867, "step_time": 0.8292849578857422} +{"epoch": 0, "iter": 21210, "iter_tflops": 19.725448725155715, "iter_time": 1.0459125061035155, "loss": 0.4625947177410126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.346912805683182, "step_time": 0.9232189559936523} +{"epoch": 0, "iter": 21211, "iter_tflops": 52.21577626523833, "iter_time": 0.39511226272583005, "loss": 0.057395827025175095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.84568535164656, "step_time": 0.3629315643310547} +{"epoch": 0, "iter": 21212, "iter_tflops": 54.54022467179729, "iter_time": 0.37827298355102545, "loss": 0.015924163162708282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.55154960074604, "step_time": 0.34644091796875004} +{"epoch": 0, "iter": 21213, "iter_tflops": 27.813970270725292, "iter_time": 0.7417529144287109, "loss": 0.5672013759613037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.395399067787842, "step_time": 0.7018477096557617} +{"epoch": 0, "iter": 21214, "iter_tflops": 12.87476490559897, "iter_time": 1.6024442901611329, "loss": 0.4306761622428894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.883163024607946, "step_time": 1.3862035560607908} +{"epoch": 0, "iter": 21215, "iter_tflops": 37.3137422002655, "iter_time": 0.5529087219238281, "loss": 0.5195013880729675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69717144598306, "step_time": 0.5069417057037354} +{"epoch": 0, "iter": 21216, "iter_tflops": 38.046078336718494, "iter_time": 0.542265968322754, "loss": 0.5292738080024719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.461468542230016, "step_time": 0.4975967864990235} +{"epoch": 0, "iter": 21217, "iter_tflops": 35.79774929778304, "iter_time": 0.5763237609863281, "loss": 0.36174243688583374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.558203840706376, "step_time": 0.5350636558532715} +{"epoch": 0, "iter": 21218, "iter_tflops": 16.005156960013494, "iter_time": 1.2890278778076172, "loss": 0.3837582468986511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.638194740106236, "step_time": 1.106925525665283} +{"epoch": 0, "iter": 21219, "iter_tflops": 45.26057512306749, "iter_time": 0.45582923889160154, "loss": 0.377366840839386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00511369000059, "step_time": 0.42099878883361813} +{"epoch": 0, "iter": 21220, "iter_tflops": 49.7625887556177, "iter_time": 0.4145904388427735, "loss": 0.3100142776966095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.72777047956434, "step_time": 0.383993106842041} +{"epoch": 0, "iter": 21221, "iter_tflops": 28.598626220827093, "iter_time": 0.7214015579223632, "loss": 0.15539635717868805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.294859781138705, "step_time": 0.6810097045898438} +{"epoch": 0, "iter": 21222, "iter_tflops": 12.690127636428587, "iter_time": 1.625759338378906, "loss": 0.07106459140777588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.174087013390295, "step_time": 1.2755646419525146} +{"epoch": 0, "iter": 21223, "iter_tflops": 38.364545167431274, "iter_time": 0.5377645797729492, "loss": 0.11209604144096375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.35523463159225, "step_time": 0.48709666442871097} +{"epoch": 0, "iter": 21224, "iter_tflops": 43.90626631342699, "iter_time": 0.46988949966430665, "loss": 0.11596643179655075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.333106171143996, "step_time": 0.42685221672058105} +{"epoch": 0, "iter": 21225, "iter_tflops": 23.535128766472052, "iter_time": 0.8766084823608398, "loss": 0.6154020428657532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.179543526894268, "step_time": 0.8193593139648437} +{"epoch": 0, "iter": 21226, "iter_tflops": 32.89719238649864, "iter_time": 0.627138427734375, "loss": 0.5409537553787231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.660900062840184, "step_time": 0.5073939208984376} +{"epoch": 0, "iter": 21227, "iter_tflops": 39.320742746786124, "iter_time": 0.5246872787475585, "loss": 0.5904019474983215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8991663433146, "step_time": 0.48092061614990234} +{"epoch": 0, "iter": 21228, "iter_tflops": 40.424183989706606, "iter_time": 0.510365119934082, "loss": 0.4637325704097748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27490167359779, "step_time": 0.4659771728515625} +{"epoch": 0, "iter": 21229, "iter_tflops": 16.560304524668613, "iter_time": 1.2458160705566406, "loss": 0.6169747710227966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.613489373101412, "step_time": 1.1713234710693359} +{"epoch": 0, "iter": 21230, "iter_tflops": 23.184980530917205, "iter_time": 0.8898473510742188, "loss": 0.6089754700660706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.414655161231778, "step_time": 0.6567346801757813} +{"epoch": 0, "iter": 21231, "iter_tflops": 43.22701292799638, "iter_time": 0.4772731704711914, "loss": 0.44370824098587036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18007583729961, "step_time": 0.44675313186645504} +{"epoch": 0, "iter": 21232, "iter_tflops": 44.37079535597607, "iter_time": 0.46497010803222655, "loss": 0.3843494653701782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49229807700665, "step_time": 0.4344092483520508} +{"epoch": 0, "iter": 21233, "iter_tflops": 24.502099604881483, "iter_time": 0.8420132904052735, "loss": 0.3664970397949219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.82810626058042, "step_time": 0.7987845993041992} +{"epoch": 0, "iter": 21234, "iter_tflops": 16.99879650404556, "iter_time": 1.2136796569824218, "loss": 0.4396982192993164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.821471083440727, "step_time": 0.9908566703796388} +{"epoch": 0, "iter": 21235, "iter_tflops": 45.53903370597754, "iter_time": 0.4530419692993164, "loss": 0.5500727295875549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18061409163054, "step_time": 0.4194964599609375} +{"epoch": 0, "iter": 21236, "iter_tflops": 51.3550702162762, "iter_time": 0.40173430633544926, "loss": 0.5552869439125061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.50342596162218, "step_time": 0.37170846939086916} +{"epoch": 0, "iter": 21237, "iter_tflops": 32.872602805969024, "iter_time": 0.6276075439453125, "loss": 0.22736330330371857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.07828872622195, "step_time": 0.5881442413330078} +{"epoch": 0, "iter": 21238, "iter_tflops": 16.568690762895244, "iter_time": 1.245185501098633, "loss": 0.22413307428359985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.445087080311495, "step_time": 1.0090978546142577} +{"epoch": 0, "iter": 21239, "iter_tflops": 41.81045416107725, "iter_time": 0.49344342041015626, "loss": 0.23007555305957794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82198453962407, "step_time": 0.4502444343566895} +{"epoch": 0, "iter": 21240, "iter_tflops": 38.15729816127745, "iter_time": 0.5406853866577148, "loss": 0.2662777006626129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62535628121642, "step_time": 0.4956376438140869} +{"epoch": 0, "iter": 21241, "iter_tflops": 26.3038204851081, "iter_time": 0.7843382873535156, "loss": 0.4567984938621521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.068856964237547, "step_time": 0.7097318458557129} +{"epoch": 0, "iter": 21242, "iter_tflops": 37.31330352825146, "iter_time": 0.5529152221679687, "loss": 0.47572842240333557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73307180758657, "step_time": 0.506494909286499} +{"epoch": 0, "iter": 21243, "iter_tflops": 43.423338957025784, "iter_time": 0.4751153182983399, "loss": 0.48695826530456543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.51341271027886, "step_time": 0.4342161998748779} +{"epoch": 0, "iter": 21244, "iter_tflops": 40.285850795679984, "iter_time": 0.5121176071166993, "loss": 0.48091036081314087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9158262519452, "step_time": 0.4697872104644776} +{"epoch": 0, "iter": 21245, "iter_tflops": 25.32191985336918, "iter_time": 0.814752342224121, "loss": 0.3696761727333069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.235370221517027, "step_time": 0.7575110359191896} +{"epoch": 0, "iter": 21246, "iter_tflops": 14.138864428249748, "iter_time": 1.4591761322021484, "loss": 0.37863266468048096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.340729122190808, "step_time": 1.2625564842224122} +{"epoch": 0, "iter": 21247, "iter_tflops": 38.78669442814778, "iter_time": 0.53191162109375, "loss": 0.3831389546394348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.50214658116124, "step_time": 0.485412977218628} +{"epoch": 0, "iter": 21248, "iter_tflops": 39.37783595611021, "iter_time": 0.5239265441894531, "loss": 0.2575761377811432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90795041013596, "step_time": 0.4808221626281738} +{"epoch": 0, "iter": 21249, "iter_tflops": 11.476939345195172, "iter_time": 1.2811407470703124, "loss": 0.3395659625530243, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 12.15072327854864, "step_time": 1.2100987167358397} +{"epoch": 0, "iter": 21250, "iter_tflops": 13.970652384995764, "iter_time": 1.0524615631103516, "loss": 0.314342200756073, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 18.887829501413677, "step_time": 0.7784682006835937} +{"epoch": 0, "iter": 21251, "iter_tflops": 21.929802923025647, "iter_time": 0.6704836654663086, "loss": 0.16578792035579681, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.558204133560817, "step_time": 0.6241381797790527} +{"epoch": 0, "iter": 21252, "iter_tflops": 20.911410026875245, "iter_time": 0.7031364517211914, "loss": 0.32180511951446533, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.3869024616603, "step_time": 0.6567936172485351} +{"epoch": 0, "iter": 21253, "iter_tflops": 20.597055380126402, "iter_time": 1.001652572631836, "loss": 0.004224652890115976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.412907424078984, "step_time": 0.920500545501709} +{"epoch": 0, "iter": 21254, "iter_tflops": 19.792547689384175, "iter_time": 1.042366744995117, "loss": 0.007796561345458031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.090340947015893, "step_time": 0.790755994796753} +{"epoch": 0, "iter": 21255, "iter_tflops": 50.58378641534277, "iter_time": 0.40785980987548826, "loss": 0.0018804211867973208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.88814455318022, "step_time": 0.369149730682373} +{"epoch": 0, "iter": 21256, "iter_tflops": 46.83845738870841, "iter_time": 0.4404733772277832, "loss": 0.003611356718465686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.936899075483254, "step_time": 0.3972338333129883} +{"epoch": 0, "iter": 21257, "iter_tflops": 14.922446910810336, "iter_time": 0.7349849395751953, "loss": 0.02623606286942959, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 16.53852383850164, "step_time": 0.6631652164459229} +{"epoch": 0, "iter": 21258, "iter_tflops": 26.30517448956706, "iter_time": 0.41694358444213864, "loss": 0.025388076901435852, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 28.873183977249838, "step_time": 0.37986021041870116} +{"epoch": 0, "iter": 21259, "iter_tflops": 28.176863845874838, "iter_time": 0.3892474975585938, "loss": 0.031309958547353745, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 30.840363669254444, "step_time": 0.35563049316406253} +{"epoch": 0, "iter": 21260, "iter_tflops": 30.871940164565345, "iter_time": 0.3552667465209961, "loss": 0.027342654764652252, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 33.70124496792881, "step_time": 0.32544120407104493} +{"epoch": 0, "iter": 21261, "iter_tflops": 45.791023414470196, "iter_time": 0.4505488624572754, "loss": 0.2987070679664612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92733384031828, "step_time": 0.4132224159240722} +{"epoch": 0, "iter": 21262, "iter_tflops": 36.598773094908054, "iter_time": 0.5637099761962892, "loss": 0.29075467586517334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56985075937477, "step_time": 0.4962994365692139} +{"epoch": 0, "iter": 21263, "iter_tflops": 36.92929578192493, "iter_time": 0.5586646881103515, "loss": 0.229645773768425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3514674951701, "step_time": 0.5112848377227783} +{"epoch": 0, "iter": 21264, "iter_tflops": 40.144811065267724, "iter_time": 0.5139168167114258, "loss": 0.3397979438304901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95682611297075, "step_time": 0.4693490257263183} +{"epoch": 0, "iter": 21265, "iter_tflops": 17.454722361117657, "iter_time": 1.1819777526855468, "loss": 0.34895074367523193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.58808292087209, "step_time": 1.1099096984863281} +{"epoch": 0, "iter": 21266, "iter_tflops": 17.25566826006092, "iter_time": 1.1956125488281248, "loss": 0.5307185649871826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.408101579420208, "step_time": 0.9637049522399901} +{"epoch": 0, "iter": 21267, "iter_tflops": 41.06194924183872, "iter_time": 0.5024382400512696, "loss": 0.23601417243480682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.02731696540632, "step_time": 0.4685975646972656} +{"epoch": 0, "iter": 21268, "iter_tflops": 49.07834834191335, "iter_time": 0.4203705749511719, "loss": 0.4161088764667511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.88758796676526, "step_time": 0.3900932960510254} +{"epoch": 0, "iter": 21269, "iter_tflops": 28.984621104768635, "iter_time": 0.7117944869995116, "loss": 0.0029651704244315624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.85345328873714, "step_time": 0.6686802062988282} +{"epoch": 0, "iter": 21270, "iter_tflops": 13.472458035664264, "iter_time": 1.5313533325195312, "loss": 0.005519131198525429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.02092158936127, "step_time": 1.2121020240783693} +{"epoch": 0, "iter": 21271, "iter_tflops": 44.971195944035664, "iter_time": 0.458762393951416, "loss": 0.011357145383954048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.959679206397084, "step_time": 0.4129548835754394} +{"epoch": 0, "iter": 21272, "iter_tflops": 42.69897805206464, "iter_time": 0.48317534637451176, "loss": 0.000726022815797478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.100097737620096, "step_time": 0.43802655410766606} +{"epoch": 0, "iter": 21273, "iter_tflops": 20.60458478873193, "iter_time": 1.0012865447998047, "loss": 0.21325302124023438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.971151722761366, "step_time": 0.9390082855224607} +{"epoch": 0, "iter": 21274, "iter_tflops": 18.564778635775046, "iter_time": 1.1113029632568359, "loss": 0.2318790704011917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.969044489006706, "step_time": 0.8982129631042479} +{"epoch": 0, "iter": 21275, "iter_tflops": 39.63510650835768, "iter_time": 0.5205257492065429, "loss": 0.2840234935283661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.60512883068394, "step_time": 0.47313456153869626} +{"epoch": 0, "iter": 21276, "iter_tflops": 37.059093939435364, "iter_time": 0.5567079849243164, "loss": 0.2577643096446991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.49133241445956, "step_time": 0.5095187606811523} +{"epoch": 0, "iter": 21277, "iter_tflops": 18.02444990835129, "iter_time": 1.1446170959472655, "loss": 0.5216246843338013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.2847722571854, "step_time": 1.069812660217285} +{"epoch": 0, "iter": 21278, "iter_tflops": 17.718906958575666, "iter_time": 1.164354751586914, "loss": 0.8373720645904541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.03945934020136, "step_time": 0.9805904788970948} +{"epoch": 0, "iter": 21279, "iter_tflops": 44.2681864174151, "iter_time": 0.4660478591918945, "loss": 0.6053910851478577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45001890189018, "step_time": 0.4347963180541992} +{"epoch": 0, "iter": 21280, "iter_tflops": 41.40224241169825, "iter_time": 0.49830860137939453, "loss": 0.507164478302002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.40393588828224, "step_time": 0.46462308120727536} +{"epoch": 0, "iter": 21281, "iter_tflops": 23.17221881968452, "iter_time": 0.6327729568481445, "loss": 0.02118026837706566, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.792724477013, "step_time": 0.591413558959961} +{"epoch": 0, "iter": 21282, "iter_tflops": 36.09726841279374, "iter_time": 0.40620119094848633, "loss": 0.029547663405537605, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 40.111479017013764, "step_time": 0.3655500564575196} +{"epoch": 0, "iter": 21283, "iter_tflops": 38.61009963863927, "iter_time": 0.3797647132873535, "loss": 0.024698719382286072, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 42.29878054193418, "step_time": 0.3466471900939941} +{"epoch": 0, "iter": 21284, "iter_tflops": 41.652893442614776, "iter_time": 0.35202244567871094, "loss": 0.021165138110518456, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 45.463959645320706, "step_time": 0.3225137786865234} +{"epoch": 0, "iter": 21285, "iter_tflops": 44.59364739031811, "iter_time": 0.46264646911621093, "loss": 0.38532912731170654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62605297760288, "step_time": 0.42428065299987794} +{"epoch": 0, "iter": 21286, "iter_tflops": 47.04221410952923, "iter_time": 0.43856552886962896, "loss": 0.3525606691837311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36277300534294, "step_time": 0.40167405891418456} +{"epoch": 0, "iter": 21287, "iter_tflops": 47.63940832792979, "iter_time": 0.43306779479980473, "loss": 0.5974040627479553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62247049941963, "step_time": 0.3996533546447754} +{"epoch": 0, "iter": 21288, "iter_tflops": 49.671777968528914, "iter_time": 0.41534840011596674, "loss": 0.4004509449005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.7568872056663, "step_time": 0.38378512191772457} +{"epoch": 0, "iter": 21289, "iter_tflops": 28.941778761172507, "iter_time": 0.7128481521606445, "loss": 0.43816378712654114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.670780785259584, "step_time": 0.6726628074645996} +{"epoch": 0, "iter": 21290, "iter_tflops": 13.777751118949093, "iter_time": 1.497420974731445, "loss": 0.5383285284042358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.29653265376034, "step_time": 1.0691606559753417} +{"epoch": 0, "iter": 21291, "iter_tflops": 39.392698257090416, "iter_time": 0.5237288742065429, "loss": 0.6609020233154297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.155582045233686, "step_time": 0.47806315040588376} +{"epoch": 0, "iter": 21292, "iter_tflops": 37.67834754474129, "iter_time": 0.5475583419799804, "loss": 0.3739302456378937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.896319686911006, "step_time": 0.504473108291626} +{"epoch": 0, "iter": 21293, "iter_tflops": 21.39811196135775, "iter_time": 0.9641548538208007, "loss": 0.23622384667396545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.669305176703993, "step_time": 0.9100893630981446} +{"epoch": 0, "iter": 21294, "iter_tflops": 18.96024186857809, "iter_time": 1.0881239624023435, "loss": 0.17768409848213196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.875808193929686, "step_time": 0.9018738632202148} +{"epoch": 0, "iter": 21295, "iter_tflops": 45.5268559994304, "iter_time": 0.45316315078735353, "loss": 0.13491889834403992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35140791129181, "step_time": 0.4180446796417236} +{"epoch": 0, "iter": 21296, "iter_tflops": 52.70477883864362, "iter_time": 0.3914463539123535, "loss": 0.18055185675621033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.1588225661033, "step_time": 0.3609432907104492} +{"epoch": 0, "iter": 21297, "iter_tflops": 2.2445301378796327, "iter_time": 0.663265022277832, "loss": 0.2246856540441513, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.390074065733034, "step_time": 0.6228753967285157} +{"epoch": 0, "iter": 21298, "iter_tflops": 1.3507343354521457, "iter_time": 1.102154800415039, "loss": 1.0205014944076538, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 1.784274825458802, "step_time": 0.8343548374176026} +{"epoch": 0, "iter": 21299, "iter_tflops": 3.457885645398748, "iter_time": 0.4305285034179688, "loss": 1.2067841291427612, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.743755586747855, "step_time": 0.39765371894836427} +{"epoch": 0, "iter": 21300, "iter_tflops": 3.8946386181006427, "iter_time": 0.38224813079833986, "loss": 1.0990699529647827, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 4.196689715300916, "step_time": 0.3547363357543945} +{"epoch": 0, "iter": 21301, "iter_tflops": 30.751528815807593, "iter_time": 0.6708965148925783, "loss": 0.4255717098712921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.52244336113, "step_time": 0.6343648071289062} +{"epoch": 0, "iter": 21302, "iter_tflops": 14.781879968566273, "iter_time": 1.3957015991210937, "loss": 0.5369480848312378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.347623111822404, "step_time": 1.18927494430542} +{"epoch": 0, "iter": 21303, "iter_tflops": 35.572110520502505, "iter_time": 0.5799794616699219, "loss": 0.39234602451324463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.456173773375156, "step_time": 0.5228863201141357} +{"epoch": 0, "iter": 21304, "iter_tflops": 43.28130798427161, "iter_time": 0.47667444610595705, "loss": 0.43911129236221313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20068680931321, "step_time": 0.4370930786132813} +{"epoch": 0, "iter": 21305, "iter_tflops": 19.33928554002083, "iter_time": 1.066797088623047, "loss": 0.0030265890527516603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.643263195104076, "step_time": 0.9994104766845703} +{"epoch": 0, "iter": 21306, "iter_tflops": 22.234667274742517, "iter_time": 0.9278795700073242, "loss": 0.0012852775398641825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.09518875562744, "step_time": 0.7614301452636718} +{"epoch": 0, "iter": 21307, "iter_tflops": 56.99212717168245, "iter_time": 0.36199900817871095, "loss": 0.021782904863357544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.962786602966425, "step_time": 0.32767122650146485} +{"epoch": 0, "iter": 21308, "iter_tflops": 58.01444800140951, "iter_time": 0.3556199226379394, "loss": 0.015558833256363869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.55300307154851, "step_time": 0.3246281452178955} +{"epoch": 0, "iter": 21309, "iter_tflops": 37.51068469602545, "iter_time": 0.5500057830810546, "loss": 0.39768853783607483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.479341412244274, "step_time": 0.509669692993164} +{"epoch": 0, "iter": 21310, "iter_tflops": 23.595111273054627, "iter_time": 0.8743800048828125, "loss": 0.39435914158821106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.160367302125362, "step_time": 0.7326287078857422} +{"epoch": 0, "iter": 21311, "iter_tflops": 44.98502229001143, "iter_time": 0.4586213912963867, "loss": 0.3724052906036377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.4593531680801, "step_time": 0.4257401752471924} +{"epoch": 0, "iter": 21312, "iter_tflops": 49.03529202436907, "iter_time": 0.420739688873291, "loss": 0.392845094203949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63733436592215, "step_time": 0.3919479160308838} +{"epoch": 0, "iter": 21313, "iter_tflops": 27.77051314649286, "iter_time": 0.7429136581420899, "loss": 0.4566415846347809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.37843388980683, "step_time": 0.7022530059814452} +{"epoch": 0, "iter": 21314, "iter_tflops": 13.825149547803521, "iter_time": 1.4922871856689452, "loss": 0.2744608223438263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.208123192887403, "step_time": 1.198915958404541} +{"epoch": 0, "iter": 21315, "iter_tflops": 31.797713320757296, "iter_time": 0.6488231811523438, "loss": 0.3515574634075165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.194137106432684, "step_time": 0.5263821334838867} +{"epoch": 0, "iter": 21316, "iter_tflops": 47.90887106237473, "iter_time": 0.4306320114135742, "loss": 0.4093788266181946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9155101724545, "step_time": 0.3973974914550781} +{"epoch": 0, "iter": 21317, "iter_tflops": 25.60011966749247, "iter_time": 0.805898323059082, "loss": 0.6202378273010254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.92931507027701, "step_time": 0.766120246887207} +{"epoch": 0, "iter": 21318, "iter_tflops": 14.114819576849154, "iter_time": 1.4616618652343751, "loss": 0.5388419032096863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.757154853462303, "step_time": 1.2311811695098878} +{"epoch": 0, "iter": 21319, "iter_tflops": 34.7792827567065, "iter_time": 0.5932006607055664, "loss": 0.3456108868122101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62269188798636, "step_time": 0.548368350982666} +{"epoch": 0, "iter": 21320, "iter_tflops": 34.14120852105967, "iter_time": 0.6042871475219725, "loss": 0.542616605758667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.94441606460428, "step_time": 0.5584360427856445} +{"epoch": 0, "iter": 21321, "iter_tflops": 20.069455005568617, "iter_time": 1.0279847412109375, "loss": 0.4644272029399872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.464527925228925, "step_time": 0.9611715469360351} +{"epoch": 0, "iter": 21322, "iter_tflops": 18.950126016980725, "iter_time": 1.0887048187255859, "loss": 0.534881591796875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.98268004518896, "step_time": 0.8602497081756592} +{"epoch": 0, "iter": 21323, "iter_tflops": 41.09967254589673, "iter_time": 0.5019770774841309, "loss": 0.513494074344635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87890207523318, "step_time": 0.4597058429718017} +{"epoch": 0, "iter": 21324, "iter_tflops": 41.22032514345319, "iter_time": 0.5005077819824219, "loss": 0.5590211749076843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.98320013600623, "step_time": 0.4586399688720703} +{"epoch": 0, "iter": 21325, "iter_tflops": 27.499974332778013, "iter_time": 0.750222282409668, "loss": 0.10818243771791458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.587870172692046, "step_time": 0.6972821426391602} +{"epoch": 0, "iter": 21326, "iter_tflops": 7.407958283242408, "iter_time": 2.784990509033203, "loss": 0.15455612540245056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.850039219218367, "step_time": 2.3311866760253905} +{"epoch": 0, "iter": 21327, "iter_tflops": 13.996162174042848, "iter_time": 1.4740536193847655, "loss": 0.1996196061372757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.154924877196585, "step_time": 1.0770647048950195} +{"epoch": 0, "iter": 21328, "iter_tflops": 42.56301929757, "iter_time": 0.48471875, "loss": 0.1393330991268158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8076991024137, "step_time": 0.44076282119750976} +{"epoch": 0, "iter": 21329, "iter_tflops": 14.67377728505258, "iter_time": 1.0298675079345703, "loss": 0.37130650877952576, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.666946099423926, "step_time": 0.9645815048217773} +{"epoch": 0, "iter": 21330, "iter_tflops": 12.009289263827638, "iter_time": 1.2583630981445313, "loss": 0.34752675890922546, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 16.807366931758214, "step_time": 0.8991322975158691} +{"epoch": 0, "iter": 21331, "iter_tflops": 25.821625179946835, "iter_time": 0.585247688293457, "loss": 0.3342108726501465, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.623803824844735, "step_time": 0.5470660934448243} +{"epoch": 0, "iter": 21332, "iter_tflops": 27.258286053398034, "iter_time": 0.5544019317626953, "loss": 0.24925357103347778, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.04658039379761, "step_time": 0.5202693824768068} +{"epoch": 0, "iter": 21333, "iter_tflops": 30.763963389637095, "iter_time": 0.6706253433227539, "loss": 0.5062628984451294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.921355455335224, "step_time": 0.6266781311035157} +{"epoch": 0, "iter": 21334, "iter_tflops": 18.69320906416458, "iter_time": 1.1036678314208985, "loss": 0.5244460105895996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.41195282397442, "step_time": 0.881220531463623} +{"epoch": 0, "iter": 21335, "iter_tflops": 43.43701156507774, "iter_time": 0.4749657669067383, "loss": 0.5382236242294312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76150270755684, "step_time": 0.4411982574462891} +{"epoch": 0, "iter": 21336, "iter_tflops": 49.00939748188391, "iter_time": 0.4209619903564453, "loss": 0.5547075867652893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.066113157082874, "step_time": 0.38878094291687015} +{"epoch": 0, "iter": 21337, "iter_tflops": 48.717125613278576, "iter_time": 0.4234874954223633, "loss": 0.05349947139620781, "lr": 2.9999925978027876e-05, "seqlen": 8192.0, "step_tflops": 53.81067463152425, "step_time": 0.383401502609253} +{"epoch": 0, "iter": 21338, "iter_tflops": 47.28129309338557, "iter_time": 0.4363479118347168, "loss": 0.055339183658361435, "lr": 2.9999703912842056e-05, "seqlen": 8192.0, "step_tflops": 52.10781471511068, "step_time": 0.3959308910369873} +{"epoch": 0, "iter": 21339, "iter_tflops": 51.54966742143727, "iter_time": 0.40021778106689454, "loss": 0.052093058824539185, "lr": 2.999933380663425e-05, "seqlen": 8192.0, "step_tflops": 56.30340889715399, "step_time": 0.36642707633972166} +{"epoch": 0, "iter": 21340, "iter_tflops": 51.926884948827094, "iter_time": 0.39731044006347654, "loss": 0.041315145790576935, "lr": 2.9998815663057245e-05, "seqlen": 8192.0, "step_tflops": 56.14375950615774, "step_time": 0.36746904182434076} +{"epoch": 0, "iter": 21341, "iter_tflops": 42.769834114684684, "iter_time": 0.4823748779296875, "loss": 0.3140667676925659, "lr": 2.999814948722491e-05, "seqlen": 8192.0, "step_tflops": 46.44276630668715, "step_time": 0.4442261981964111} +{"epoch": 0, "iter": 21342, "iter_tflops": 37.27739318277839, "iter_time": 0.5534478607177734, "loss": 0.42731034755706787, "lr": 2.9997335285712135e-05, "seqlen": 8192.0, "step_tflops": 40.804174389683155, "step_time": 0.5056123256683349} +{"epoch": 0, "iter": 21343, "iter_tflops": 37.12622087247643, "iter_time": 0.5557014160156251, "loss": 0.3465264141559601, "lr": 2.9996373066554763e-05, "seqlen": 8192.0, "step_tflops": 40.64389328215704, "step_time": 0.5076062316894531} +{"epoch": 0, "iter": 21344, "iter_tflops": 43.88006624173158, "iter_time": 0.4701700630187989, "loss": 0.32967084646224976, "lr": 2.99952628392495e-05, "seqlen": 8192.0, "step_tflops": 47.79121090725831, "step_time": 0.43169221115112305} +{"epoch": 0, "iter": 21345, "iter_tflops": 22.630595318039713, "iter_time": 0.9116460800170898, "loss": 0.2240704894065857, "lr": 2.999400461475385e-05, "seqlen": 8192.0, "step_tflops": 24.0167386895179, "step_time": 0.8590297698974609} +{"epoch": 0, "iter": 21346, "iter_tflops": 9.876510211356514, "iter_time": 2.0889051971435544, "loss": 0.22961242496967316, "lr": 2.9992598405485974e-05, "seqlen": 8192.0, "step_tflops": 11.273082448798522, "step_time": 1.830119987487793} +{"epoch": 0, "iter": 21347, "iter_tflops": 9.118925447614483, "iter_time": 2.2624478759765623, "loss": 0.20027901232242584, "lr": 2.9991044225324593e-05, "seqlen": 8192.0, "step_tflops": 10.772281479551985, "step_time": 1.915201858520508} +{"epoch": 0, "iter": 21348, "iter_tflops": 37.53331338653241, "iter_time": 0.549674186706543, "loss": 0.18182487785816193, "lr": 2.9989342089608838e-05, "seqlen": 8192.0, "step_tflops": 41.11197736993859, "step_time": 0.5018268356323242} +{"epoch": 0, "iter": 21349, "iter_tflops": 17.73091034923473, "iter_time": 0.8799793777465819, "loss": 0.3913627564907074, "lr": 2.99874920151381e-05, "seqlen": 6240.0, "step_tflops": 18.933643415510417, "step_time": 0.8240799255371093} +{"epoch": 0, "iter": 21350, "iter_tflops": 8.162443633711906, "iter_time": 1.911539749145508, "loss": 0.4236263036727905, "lr": 2.9985494020171873e-05, "seqlen": 6240.0, "step_tflops": 9.418270103315393, "step_time": 1.6566561889648437} +{"epoch": 0, "iter": 21351, "iter_tflops": 11.05041107940117, "iter_time": 1.411968780517578, "loss": 0.23500001430511475, "lr": 2.9983348124429553e-05, "seqlen": 6240.0, "step_tflops": 12.718400561912258, "step_time": 1.2267922668457032} +{"epoch": 0, "iter": 21352, "iter_tflops": 26.432871786675506, "iter_time": 0.590281509399414, "loss": 0.29477229714393616, "lr": 2.9981054349090264e-05, "seqlen": 6240.0, "step_tflops": 28.237677157508156, "step_time": 0.5525537872314453} +{"epoch": 0, "iter": 21353, "iter_tflops": 22.933275911696146, "iter_time": 0.7232482604980469, "loss": 0.34065717458724976, "lr": 2.9978612716792647e-05, "seqlen": 6624.0, "step_tflops": 24.289070616216403, "step_time": 0.6828771743774414} +{"epoch": 0, "iter": 21354, "iter_tflops": 9.130667601627069, "iter_time": 1.8165650787353513, "loss": 0.48628732562065125, "lr": 2.997602325163463e-05, "seqlen": 6624.0, "step_tflops": 11.245004477044231, "step_time": 1.4750062522888188} +{"epoch": 0, "iter": 21355, "iter_tflops": 25.90880395617827, "iter_time": 0.6401859359741211, "loss": 0.3260089159011841, "lr": 2.997328597917318e-05, "seqlen": 6624.0, "step_tflops": 27.85626208568982, "step_time": 0.5954299201965331} +{"epoch": 0, "iter": 21356, "iter_tflops": 25.373713882343736, "iter_time": 0.653686408996582, "loss": 0.36610811948776245, "lr": 2.9970400926424075e-05, "seqlen": 6624.0, "step_tflops": 27.31386768811457, "step_time": 0.6072538719177245} +{"epoch": 0, "iter": 21357, "iter_tflops": 20.80502624061978, "iter_time": 0.9916398696899413, "loss": 0.3198246657848358, "lr": 2.9967368121861623e-05, "seqlen": 8192.0, "step_tflops": 22.288590809901525, "step_time": 0.9256347198486328} +{"epoch": 0, "iter": 21358, "iter_tflops": 8.248503154011988, "iter_time": 2.5011924133300782, "loss": 0.2108415961265564, "lr": 2.9964187595418376e-05, "seqlen": 8192.0, "step_tflops": 9.549340311773793, "step_time": 2.1604731674194335} +{"epoch": 0, "iter": 21359, "iter_tflops": 14.760902631694817, "iter_time": 1.3976850891113282, "loss": 0.32092827558517456, "lr": 2.9960859378484856e-05, "seqlen": 8192.0, "step_tflops": 18.53036586021971, "step_time": 1.113366765975952} +{"epoch": 0, "iter": 21360, "iter_tflops": 36.82572401180238, "iter_time": 0.5602359237670899, "loss": 0.2986007034778595, "lr": 2.9957383503909207e-05, "seqlen": 8192.0, "step_tflops": 40.357334247532435, "step_time": 0.5112105121612549} +{"epoch": 0, "iter": 21361, "iter_tflops": 12.981998056822867, "iter_time": 1.3187784271240237, "loss": 0.17248886823654175, "lr": 2.995376000599692e-05, "seqlen": 6832.0, "step_tflops": 13.616990116134062, "step_time": 1.2572807083129882} +{"epoch": 0, "iter": 21362, "iter_tflops": 12.601483326959, "iter_time": 1.358600296020508, "loss": 0.20808365941047668, "lr": 2.9949988920510447e-05, "seqlen": 6832.0, "step_tflops": 16.11560213243712, "step_time": 1.0623480796813964} +{"epoch": 0, "iter": 21363, "iter_tflops": 30.874232340580036, "iter_time": 0.5545199890136718, "loss": 0.45929059386253357, "lr": 2.9946070284668884e-05, "seqlen": 6832.0, "step_tflops": 32.861136039848695, "step_time": 0.5209916954040528} +{"epoch": 0, "iter": 21364, "iter_tflops": 30.247674870022554, "iter_time": 0.566006446838379, "loss": 0.3233933448791504, "lr": 2.9942004137147587e-05, "seqlen": 6832.0, "step_tflops": 32.183755767198825, "step_time": 0.5319571495056152} +{"epoch": 0, "iter": 21365, "iter_tflops": 28.894762906755354, "iter_time": 0.714008056640625, "loss": 0.30305221676826477, "lr": 2.9937790518077784e-05, "seqlen": 8192.0, "step_tflops": 30.65273484775364, "step_time": 0.6730588188171387} +{"epoch": 0, "iter": 21366, "iter_tflops": 10.899763413081232, "iter_time": 1.8928019561767577, "loss": 0.46618348360061646, "lr": 2.9933429469046202e-05, "seqlen": 8192.0, "step_tflops": 14.573391590110012, "step_time": 1.415668643951416} +{"epoch": 0, "iter": 21367, "iter_tflops": 40.3708406729967, "iter_time": 0.5110394821166991, "loss": 0.39276495575904846, "lr": 2.9928921033094626e-05, "seqlen": 8192.0, "step_tflops": 44.35307637657296, "step_time": 0.4651558628082275} +{"epoch": 0, "iter": 21368, "iter_tflops": 39.90427326757545, "iter_time": 0.5170146408081056, "loss": 0.3878069221973419, "lr": 2.9924265254719506e-05, "seqlen": 8192.0, "step_tflops": 43.45237264813727, "step_time": 0.47479785919189454} +{"epoch": 0, "iter": 21369, "iter_tflops": 17.269481083068044, "iter_time": 1.19465625, "loss": 0.18795378506183624, "lr": 2.9919462179871485e-05, "seqlen": 8192.0, "step_tflops": 18.563894924206743, "step_time": 1.1113558654785156} +{"epoch": 0, "iter": 21370, "iter_tflops": 18.252767339604784, "iter_time": 1.1302994842529297, "loss": 0.22954684495925903, "lr": 2.9914511855954986e-05, "seqlen": 8192.0, "step_tflops": 24.635048997028203, "step_time": 0.8374691486358642} +{"epoch": 0, "iter": 21371, "iter_tflops": 44.98996941546395, "iter_time": 0.4585709609985351, "loss": 0.22857053577899933, "lr": 2.99094143318277e-05, "seqlen": 8192.0, "step_tflops": 48.9343189249058, "step_time": 0.4216078605651856} +{"epoch": 0, "iter": 21372, "iter_tflops": 48.460684254663185, "iter_time": 0.42572848129272456, "loss": 0.18287688493728638, "lr": 2.9904169657800128e-05, "seqlen": 8192.0, "step_tflops": 52.45174409585124, "step_time": 0.39333474731445317} +{"epoch": 0, "iter": 21373, "iter_tflops": 36.731814612120544, "iter_time": 0.5616682357788086, "loss": 0.11936838924884796, "lr": 2.989877788563509e-05, "seqlen": 8192.0, "step_tflops": 39.758316387650744, "step_time": 0.5189126548767089} +{"epoch": 0, "iter": 21374, "iter_tflops": 12.112677702180688, "iter_time": 1.7032644653320315, "loss": 0.15079636871814728, "lr": 2.9893239068547206e-05, "seqlen": 8192.0, "step_tflops": 15.007950464513183, "step_time": 1.3746776123046875} +{"epoch": 0, "iter": 21375, "iter_tflops": 14.47143813230076, "iter_time": 1.4256422424316406, "loss": 0.1554490029811859, "lr": 2.988755326120236e-05, "seqlen": 8192.0, "step_tflops": 17.2659706279184, "step_time": 1.194899143218994} +{"epoch": 0, "iter": 21376, "iter_tflops": 22.35964419427951, "iter_time": 0.9226932830810548, "loss": 0.08984676748514175, "lr": 2.988172051971717e-05, "seqlen": 8192.0, "step_tflops": 26.98628308609912, "step_time": 0.7645029678344726} +{"epoch": 0, "iter": 21377, "iter_tflops": 13.299246695422514, "iter_time": 1.1516787109375, "loss": 0.30249443650245667, "lr": 2.9875740901658446e-05, "seqlen": 6128.0, "step_tflops": 14.142988538694297, "step_time": 1.082971908569336} +{"epoch": 0, "iter": 21378, "iter_tflops": 19.458349843923124, "iter_time": 0.7871407089233399, "loss": 0.17970362305641174, "lr": 2.98696144660426e-05, "seqlen": 6128.0, "step_tflops": 21.29336505384817, "step_time": 0.7193066596984863} +{"epoch": 0, "iter": 21379, "iter_tflops": 27.595915184134878, "iter_time": 0.5550263214111328, "loss": 0.3204342722892761, "lr": 2.986334127333508e-05, "seqlen": 6128.0, "step_tflops": 29.464805787399957, "step_time": 0.5198221702575684} +{"epoch": 0, "iter": 21380, "iter_tflops": 25.948527880686004, "iter_time": 0.5902631301879883, "loss": 0.47025036811828613, "lr": 2.9856921385449766e-05, "seqlen": 6128.0, "step_tflops": 27.787032074541663, "step_time": 0.5512088966369628} +{"epoch": 0, "iter": 21381, "iter_tflops": 32.095972727829235, "iter_time": 0.6427938385009765, "loss": 0.0159139521420002, "lr": 2.9850354865748363e-05, "seqlen": 8192.0, "step_tflops": 34.423618870890415, "step_time": 0.5993295936584472} +{"epoch": 0, "iter": 21382, "iter_tflops": 12.939376409002223, "iter_time": 1.5944426422119138, "loss": 0.01606401801109314, "lr": 2.984364177903976e-05, "seqlen": 8192.0, "step_tflops": 14.907831831627924, "step_time": 1.3839097290039064} +{"epoch": 0, "iter": 21383, "iter_tflops": 42.097116810349085, "iter_time": 0.49008329010009766, "loss": 0.03499941900372505, "lr": 2.9836782191579425e-05, "seqlen": 8192.0, "step_tflops": 46.51644625953894, "step_time": 0.4435225639343262} +{"epoch": 0, "iter": 21384, "iter_tflops": 47.42146556543198, "iter_time": 0.4350581169128418, "loss": 0.0504155158996582, "lr": 2.982977617106871e-05, "seqlen": 8192.0, "step_tflops": 52.285955422662916, "step_time": 0.3945819358825684} +{"epoch": 0, "iter": 21385, "iter_tflops": 32.02782505777012, "iter_time": 0.6441615524291993, "loss": 0.5212656259536743, "lr": 2.982262378665421e-05, "seqlen": 8192.0, "step_tflops": 35.07631853614975, "step_time": 0.5881772766113281} +{"epoch": 0, "iter": 21386, "iter_tflops": 35.043522155622355, "iter_time": 0.5887277374267579, "loss": 0.5293945074081421, "lr": 2.981532510892707e-05, "seqlen": 8192.0, "step_tflops": 38.38330529467682, "step_time": 0.5375017433166503} +{"epoch": 0, "iter": 21387, "iter_tflops": 36.92101260095367, "iter_time": 0.5587900238037109, "loss": 0.5367233157157898, "lr": 2.9807880209922288e-05, "seqlen": 8192.0, "step_tflops": 40.385716042524614, "step_time": 0.5108512496948242} +{"epoch": 0, "iter": 21388, "iter_tflops": 37.907427148227534, "iter_time": 0.5442493743896484, "loss": 0.4953613877296448, "lr": 2.980028916311802e-05, "seqlen": 8192.0, "step_tflops": 41.36705597252776, "step_time": 0.498732458114624} +{"epoch": 0, "iter": 21389, "iter_tflops": 17.24409695394906, "iter_time": 1.1964148406982422, "loss": 0.4537629783153534, "lr": 2.9792552043434835e-05, "seqlen": 8192.0, "step_tflops": 18.489063581639275, "step_time": 1.115853889465332} +{"epoch": 0, "iter": 21390, "iter_tflops": 15.19076838689998, "iter_time": 1.3581336364746095, "loss": 0.4074175953865051, "lr": 2.9784668927234972e-05, "seqlen": 8192.0, "step_tflops": 19.577983313989233, "step_time": 1.0537905349731445} +{"epoch": 0, "iter": 21391, "iter_tflops": 35.58310340959679, "iter_time": 0.5798002853393555, "loss": 0.5755835771560669, "lr": 2.977663989232161e-05, "seqlen": 8192.0, "step_tflops": 38.75270792390188, "step_time": 0.5323781127929688} +{"epoch": 0, "iter": 21392, "iter_tflops": 37.81556031801406, "iter_time": 0.5455715408325195, "loss": 0.32765820622444153, "lr": 2.9768465017938083e-05, "seqlen": 8192.0, "step_tflops": 41.24529973976141, "step_time": 0.5002047176361084} +{"epoch": 0, "iter": 21393, "iter_tflops": 15.940296062786906, "iter_time": 1.294272918701172, "loss": 0.5488532781600952, "lr": 2.9760144384767095e-05, "seqlen": 8192.0, "step_tflops": 16.849706982232163, "step_time": 1.2244185333251953} +{"epoch": 0, "iter": 21394, "iter_tflops": 17.09983271958356, "iter_time": 1.206508499145508, "loss": 0.4670586585998535, "lr": 2.9751678074929946e-05, "seqlen": 8192.0, "step_tflops": 22.489179992331138, "step_time": 0.9173786468505859} +{"epoch": 0, "iter": 21395, "iter_tflops": 42.606531799302424, "iter_time": 0.4842237243652343, "loss": 0.5220075249671936, "lr": 2.9743066171985686e-05, "seqlen": 8192.0, "step_tflops": 46.36483995922235, "step_time": 0.44497281837463376} +{"epoch": 0, "iter": 21396, "iter_tflops": 38.91178874926756, "iter_time": 0.5302016220092773, "loss": 0.5075334310531616, "lr": 2.9734308760930333e-05, "seqlen": 8192.0, "step_tflops": 42.5196231591157, "step_time": 0.4852134609222412} +{"epoch": 0, "iter": 21397, "iter_tflops": 26.378365404252044, "iter_time": 0.7821217575073243, "loss": 0.5288676619529724, "lr": 2.9725405928195985e-05, "seqlen": 8192.0, "step_tflops": 28.48344898063724, "step_time": 0.7243186569213867} +{"epoch": 0, "iter": 21398, "iter_tflops": 10.215428896025266, "iter_time": 2.019601303100586, "loss": 0.6031374931335449, "lr": 2.971635776165001e-05, "seqlen": 8192.0, "step_tflops": 13.593650789196804, "step_time": 1.5177007141113281} +{"epoch": 0, "iter": 21399, "iter_tflops": 11.001840350815064, "iter_time": 1.8752402191162107, "loss": 0.5867651700973511, "lr": 2.9707164350594168e-05, "seqlen": 8192.0, "step_tflops": 13.399371454045282, "step_time": 1.5397060661315918} +{"epoch": 0, "iter": 21400, "iter_tflops": 32.580907007193574, "iter_time": 0.6332264938354493, "loss": 0.5869750380516052, "lr": 2.9697825785763704e-05, "seqlen": 8192.0, "step_tflops": 40.76395472149547, "step_time": 0.5061111869812011} +{"epoch": 0, "iter": 21401, "iter_tflops": 23.981748693221466, "iter_time": 0.6284436645507813, "loss": 0.32133808732032776, "lr": 2.9688342159326487e-05, "seqlen": 6032.0, "step_tflops": 25.747447060654263, "step_time": 0.5853465003967285} +{"epoch": 0, "iter": 21402, "iter_tflops": 21.467156373835625, "iter_time": 0.7020574951171876, "loss": 0.33815696835517883, "lr": 2.967871356488208e-05, "seqlen": 6032.0, "step_tflops": 23.466065748977275, "step_time": 0.6422541465759277} +{"epoch": 0, "iter": 21403, "iter_tflops": 21.94459445083542, "iter_time": 0.6867831649780274, "loss": 0.3738936483860016, "lr": 2.966894009746083e-05, "seqlen": 6032.0, "step_tflops": 23.52934211770052, "step_time": 0.6405269622802734} +{"epoch": 0, "iter": 21404, "iter_tflops": 23.93387476521028, "iter_time": 0.6297007141113281, "loss": 0.3855034410953522, "lr": 2.9659021853522903e-05, "seqlen": 6032.0, "step_tflops": 25.68498860336949, "step_time": 0.5867698936462402} +{"epoch": 0, "iter": 21405, "iter_tflops": 18.354668468515083, "iter_time": 1.1240243072509766, "loss": 0.46300867199897766, "lr": 2.964895893095737e-05, "seqlen": 8192.0, "step_tflops": 19.67927580033149, "step_time": 1.0483665008544922} +{"epoch": 0, "iter": 21406, "iter_tflops": 28.31262232527612, "iter_time": 0.7286888961791992, "loss": 0.4650062024593353, "lr": 2.9638751429081213e-05, "seqlen": 8192.0, "step_tflops": 36.524455822090786, "step_time": 0.5648569717407227} +{"epoch": 0, "iter": 21407, "iter_tflops": 48.78971157106219, "iter_time": 0.42285746002197266, "loss": 0.41551706194877625, "lr": 2.9628399448638352e-05, "seqlen": 8192.0, "step_tflops": 52.88849719243583, "step_time": 0.3900865898132324} +{"epoch": 0, "iter": 21408, "iter_tflops": 48.105211318499016, "iter_time": 0.42887439727783205, "loss": 0.4949432909488678, "lr": 2.961790309179866e-05, "seqlen": 8192.0, "step_tflops": 51.95147887059263, "step_time": 0.39712235260009765} +{"epoch": 0, "iter": 21409, "iter_tflops": 17.875858422926818, "iter_time": 0.6338979568481445, "loss": 0.008473759517073631, "lr": 2.960726246215693e-05, "seqlen": 4560.0, "step_tflops": 19.10411030700196, "step_time": 0.5931430435180665} +{"epoch": 0, "iter": 21410, "iter_tflops": 11.694192999585102, "iter_time": 0.9689826507568359, "loss": 0.001381607144139707, "lr": 2.95964776647319e-05, "seqlen": 4560.0, "step_tflops": 14.173791073156233, "step_time": 0.7994664287567139} +{"epoch": 0, "iter": 21411, "iter_tflops": 22.54052491271934, "iter_time": 0.5027154502868652, "loss": 0.0062210438773036, "lr": 2.958554880596515e-05, "seqlen": 4560.0, "step_tflops": 24.955026048304017, "step_time": 0.4540756683349609} +{"epoch": 0, "iter": 21412, "iter_tflops": 25.023278283124867, "iter_time": 0.4528371543884277, "loss": 0.011440816335380077, "lr": 2.9574475993720112e-05, "seqlen": 4560.0, "step_tflops": 27.680624501300333, "step_time": 0.40936468505859375} +{"epoch": 0, "iter": 21413, "iter_tflops": 17.841933209406882, "iter_time": 1.1563261260986328, "loss": 0.09641353040933609, "lr": 2.9563259337280967e-05, "seqlen": 8192.0, "step_tflops": 19.21722153548483, "step_time": 1.0735731735229492} +{"epoch": 0, "iter": 21414, "iter_tflops": 24.298627783729884, "iter_time": 0.8490641403198241, "loss": 0.13073253631591797, "lr": 2.9551898947351588e-05, "seqlen": 8192.0, "step_tflops": 30.045935067284596, "step_time": 0.6866517372131347} +{"epoch": 0, "iter": 21415, "iter_tflops": 36.31410242771305, "iter_time": 0.5681289672851564, "loss": 0.06538079679012299, "lr": 2.954039493605444e-05, "seqlen": 8192.0, "step_tflops": 39.866187478764544, "step_time": 0.5175085659027099} +{"epoch": 0, "iter": 21416, "iter_tflops": 38.26783952261288, "iter_time": 0.539123550415039, "loss": 0.08807458728551865, "lr": 2.9528747416929467e-05, "seqlen": 8192.0, "step_tflops": 41.92626711051268, "step_time": 0.4920803813934326} +{"epoch": 0, "iter": 21417, "iter_tflops": 23.265527551200137, "iter_time": 0.886766632080078, "loss": 0.258590966463089, "lr": 2.9516956504932984e-05, "seqlen": 8192.0, "step_tflops": 25.206359373610773, "step_time": 0.8184876365661621} +{"epoch": 0, "iter": 21418, "iter_tflops": 24.922648597494312, "iter_time": 0.8278050155639649, "loss": 0.2700275778770447, "lr": 2.9505022316436527e-05, "seqlen": 8192.0, "step_tflops": 29.99745179237119, "step_time": 0.6877615356445314} +{"epoch": 0, "iter": 21419, "iter_tflops": 49.541998162374455, "iter_time": 0.41643644332885743, "loss": 0.266110360622406, "lr": 2.949294496922572e-05, "seqlen": 8192.0, "step_tflops": 53.848792861642146, "step_time": 0.3831301021575928} +{"epoch": 0, "iter": 21420, "iter_tflops": 49.76802199598335, "iter_time": 0.41454517745971686, "loss": 0.26457804441452026, "lr": 2.948072458249911e-05, "seqlen": 8192.0, "step_tflops": 53.879540358809024, "step_time": 0.3829114608764648} +{"epoch": 0, "iter": 21421, "iter_tflops": 33.71856976616815, "iter_time": 0.6118614654541016, "loss": 0.3161172866821289, "lr": 2.9468361276866973e-05, "seqlen": 8192.0, "step_tflops": 36.03065960660313, "step_time": 0.5725982742309571} +{"epoch": 0, "iter": 21422, "iter_tflops": 7.292899836231134, "iter_time": 2.828928680419922, "loss": 0.41000640392303467, "lr": 2.945585517435015e-05, "seqlen": 8192.0, "step_tflops": 9.22033482029306, "step_time": 2.2375644607543945} +{"epoch": 0, "iter": 21423, "iter_tflops": 10.291504324205118, "iter_time": 2.004672286987305, "loss": 0.3757716715335846, "lr": 2.9443206398378815e-05, "seqlen": 8192.0, "step_tflops": 13.17335310481217, "step_time": 1.5661231689453126} +{"epoch": 0, "iter": 21424, "iter_tflops": 35.52996199044819, "iter_time": 0.58066748046875, "loss": 0.4176684319972992, "lr": 2.9430415073791287e-05, "seqlen": 8192.0, "step_tflops": 38.83508557665363, "step_time": 0.5312488231658935} +{"epoch": 0, "iter": 21425, "iter_tflops": 16.78835486184035, "iter_time": 0.9928633728027344, "loss": 0.24358944594860077, "lr": 2.9417481326832778e-05, "seqlen": 6656.0, "step_tflops": 17.845041586604566, "step_time": 0.9340713806152342} +{"epoch": 0, "iter": 21426, "iter_tflops": 6.447628403648924, "iter_time": 2.585220733642578, "loss": 0.3417949378490448, "lr": 2.9404405285154146e-05, "seqlen": 6656.0, "step_tflops": 7.193221762969917, "step_time": 2.3172568817138672} +{"epoch": 0, "iter": 21427, "iter_tflops": 9.757569521080262, "iter_time": 1.7082678833007812, "loss": 0.3167937994003296, "lr": 2.9391187077810644e-05, "seqlen": 6656.0, "step_tflops": 12.735405524531998, "step_time": 1.3088348541259767} +{"epoch": 0, "iter": 21428, "iter_tflops": 28.973010138503195, "iter_time": 0.5753127670288085, "loss": 0.19752337038516998, "lr": 2.937782683526064e-05, "seqlen": 6656.0, "step_tflops": 30.80322528194412, "step_time": 0.541129783630371} +{"epoch": 0, "iter": 21429, "iter_tflops": 18.988430396996144, "iter_time": 0.8044670867919921, "loss": 0.2375042885541916, "lr": 2.9364324689364332e-05, "seqlen": 6112.0, "step_tflops": 20.108600701601926, "step_time": 0.759653419494629} +{"epoch": 0, "iter": 21430, "iter_tflops": 18.887012246290404, "iter_time": 0.8087868576049804, "loss": 0.5808430910110474, "lr": 2.935068077338246e-05, "seqlen": 6112.0, "step_tflops": 20.590295864924553, "step_time": 0.7418818740844728} +{"epoch": 0, "iter": 21431, "iter_tflops": 26.484360332821698, "iter_time": 0.5767769012451172, "loss": 0.27536290884017944, "lr": 2.933689522197495e-05, "seqlen": 6112.0, "step_tflops": 28.18258634496401, "step_time": 0.542021484375} +{"epoch": 0, "iter": 21432, "iter_tflops": 27.460758783764756, "iter_time": 0.5562689437866211, "loss": 0.34450867772102356, "lr": 2.9322968171199646e-05, "seqlen": 6112.0, "step_tflops": 29.221167054642567, "step_time": 0.5227569198608398} +{"epoch": 0, "iter": 21433, "iter_tflops": 26.044183218567753, "iter_time": 0.7921574401855469, "loss": 0.5072170495986938, "lr": 2.9308899758510903e-05, "seqlen": 8192.0, "step_tflops": 27.509939758614802, "step_time": 0.7499505157470704} +{"epoch": 0, "iter": 21434, "iter_tflops": 27.710026999721602, "iter_time": 0.7445353088378905, "loss": 0.6433106064796448, "lr": 2.9294690122758285e-05, "seqlen": 8192.0, "step_tflops": 33.71442408693891, "step_time": 0.6119367027282714} +{"epoch": 0, "iter": 21435, "iter_tflops": 33.33878618040457, "iter_time": 0.6188315734863281, "loss": 0.4231567680835724, "lr": 2.928033940418515e-05, "seqlen": 8192.0, "step_tflops": 36.23434005014698, "step_time": 0.5693795852661133} +{"epoch": 0, "iter": 21436, "iter_tflops": 35.887770511409016, "iter_time": 0.5748781051635742, "loss": 0.509395956993103, "lr": 2.9265847744427305e-05, "seqlen": 8192.0, "step_tflops": 39.07050522201195, "step_time": 0.528047779083252} +{"epoch": 0, "iter": 21437, "iter_tflops": 18.217050019167843, "iter_time": 1.132515609741211, "loss": 0.5270273685455322, "lr": 2.9251215286511574e-05, "seqlen": 8192.0, "step_tflops": 19.56785473330963, "step_time": 1.0543359909057617} +{"epoch": 0, "iter": 21438, "iter_tflops": 20.023502454949824, "iter_time": 1.030343894958496, "loss": 0.5504913926124573, "lr": 2.923644217485442e-05, "seqlen": 8192.0, "step_tflops": 25.025760906158826, "step_time": 0.8243942546844483} +{"epoch": 0, "iter": 21439, "iter_tflops": 47.25330653802165, "iter_time": 0.43660634613037114, "loss": 0.5851752161979675, "lr": 2.922152855526048e-05, "seqlen": 8192.0, "step_tflops": 50.68358830445861, "step_time": 0.40705668640136716} +{"epoch": 0, "iter": 21440, "iter_tflops": 47.61135764194072, "iter_time": 0.433322940826416, "loss": 0.6026676297187805, "lr": 2.9206474574921165e-05, "seqlen": 8192.0, "step_tflops": 51.06459950505644, "step_time": 0.40401949119567865} +{"epoch": 0, "iter": 21441, "iter_tflops": 33.1938303135422, "iter_time": 0.6215339813232422, "loss": 0.01172480545938015, "lr": 2.919128038241318e-05, "seqlen": 8192.0, "step_tflops": 35.51387723210099, "step_time": 0.5809304733276367} +{"epoch": 0, "iter": 21442, "iter_tflops": 12.883942730431302, "iter_time": 1.6013027954101564, "loss": 0.0032843807712197304, "lr": 2.9175946127697073e-05, "seqlen": 8192.0, "step_tflops": 14.665829710713174, "step_time": 1.4067457427978518} +{"epoch": 0, "iter": 21443, "iter_tflops": 42.46013644529753, "iter_time": 0.48589324569702147, "loss": 0.002956092357635498, "lr": 2.916047196211575e-05, "seqlen": 8192.0, "step_tflops": 47.13638986565724, "step_time": 0.43768930053710936} +{"epoch": 0, "iter": 21444, "iter_tflops": 43.27943524998358, "iter_time": 0.47669507217407225, "loss": 0.005509658716619015, "lr": 2.914485803839297e-05, "seqlen": 8192.0, "step_tflops": 47.72868183603839, "step_time": 0.4322577686309814} +{"epoch": 0, "iter": 21445, "iter_tflops": 36.29648396783617, "iter_time": 0.5684047393798828, "loss": 0.024078255519270897, "lr": 2.912910451063186e-05, "seqlen": 8192.0, "step_tflops": 40.75565162673454, "step_time": 0.5062142963409424} +{"epoch": 0, "iter": 21446, "iter_tflops": 51.053249200633005, "iter_time": 0.40410931396484373, "loss": 0.006177798844873905, "lr": 2.9113211534313385e-05, "seqlen": 8192.0, "step_tflops": 56.01253245314712, "step_time": 0.3683299541473389} +{"epoch": 0, "iter": 21447, "iter_tflops": 49.367298863482226, "iter_time": 0.41791011428833014, "loss": 0.0035355982836335897, "lr": 2.9097179266294794e-05, "seqlen": 8192.0, "step_tflops": 54.24290774352597, "step_time": 0.380346378326416} +{"epoch": 0, "iter": 21448, "iter_tflops": 52.871596292538136, "iter_time": 0.3902112846374512, "loss": 0.00672521535307169, "lr": 2.9081007864808114e-05, "seqlen": 8192.0, "step_tflops": 57.76303213945685, "step_time": 0.3571677722930908} +{"epoch": 0, "iter": 21449, "iter_tflops": 42.02719404842814, "iter_time": 0.490898666381836, "loss": 0.0629381537437439, "lr": 2.9064697489458532e-05, "seqlen": 8192.0, "step_tflops": 45.686633648183225, "step_time": 0.4515783252716065} +{"epoch": 0, "iter": 21450, "iter_tflops": 37.66770269375543, "iter_time": 0.5477130813598633, "loss": 0.04505385085940361, "lr": 2.9048248301222885e-05, "seqlen": 8192.0, "step_tflops": 41.65029659210973, "step_time": 0.49534085464477534} +{"epoch": 0, "iter": 21451, "iter_tflops": 45.22024345945658, "iter_time": 0.4562357902526855, "loss": 0.037130456417798996, "lr": 2.9031660462448015e-05, "seqlen": 8192.0, "step_tflops": 49.76878407247212, "step_time": 0.4145388298034668} +{"epoch": 0, "iter": 21452, "iter_tflops": 38.97256882097856, "iter_time": 0.529374740600586, "loss": 0.04112136736512184, "lr": 2.9014934136849183e-05, "seqlen": 8192.0, "step_tflops": 42.929181759297364, "step_time": 0.48058436393737786} +{"epoch": 0, "iter": 21453, "iter_tflops": 18.652837120841394, "iter_time": 1.106056594848633, "loss": 0.6327441930770874, "lr": 2.899806948950848e-05, "seqlen": 8192.0, "step_tflops": 19.775897444598616, "step_time": 1.0432443618774414} +{"epoch": 0, "iter": 21454, "iter_tflops": 17.718420502332794, "iter_time": 1.16438671875, "loss": 0.4166201055049896, "lr": 2.8981066686873162e-05, "seqlen": 8192.0, "step_tflops": 21.28197170462755, "step_time": 0.969416452407837} +{"epoch": 0, "iter": 21455, "iter_tflops": 43.5327946438486, "iter_time": 0.47392072296142573, "loss": 0.4282698333263397, "lr": 2.8963925896754036e-05, "seqlen": 8192.0, "step_tflops": 46.8364319025941, "step_time": 0.4404924259185791} +{"epoch": 0, "iter": 21456, "iter_tflops": 43.79988886206242, "iter_time": 0.4710307273864746, "loss": 0.6922010183334351, "lr": 2.894664728832377e-05, "seqlen": 8192.0, "step_tflops": 46.858429576122894, "step_time": 0.44028563690185546} +{"epoch": 0, "iter": 21457, "iter_tflops": 25.61756146533928, "iter_time": 0.8053496246337891, "loss": 0.29032304883003235, "lr": 2.892923103211526e-05, "seqlen": 8192.0, "step_tflops": 27.05533667826922, "step_time": 0.7625517196655273} +{"epoch": 0, "iter": 21458, "iter_tflops": 16.746601742062076, "iter_time": 1.231957015991211, "loss": 0.35554900765419006, "lr": 2.8911677300019924e-05, "seqlen": 8192.0, "step_tflops": 18.40708466554173, "step_time": 1.1208235244750977} +{"epoch": 0, "iter": 21459, "iter_tflops": 46.34899526171203, "iter_time": 0.4451249351501464, "loss": 0.2541140019893646, "lr": 2.8893986265286e-05, "seqlen": 8192.0, "step_tflops": 50.62174348971671, "step_time": 0.40755398941040044} +{"epoch": 0, "iter": 21460, "iter_tflops": 50.05852817610675, "iter_time": 0.4121394348144531, "loss": 0.24388240277767181, "lr": 2.887615810251687e-05, "seqlen": 8192.0, "step_tflops": 54.1112567017913, "step_time": 0.3812717494964599} +{"epoch": 0, "iter": 21461, "iter_tflops": 25.190384568754283, "iter_time": 0.8190066909790039, "loss": 0.3943783938884735, "lr": 2.8858192987669303e-05, "seqlen": 8192.0, "step_tflops": 26.5098652631013, "step_time": 0.7782421112060547} +{"epoch": 0, "iter": 21462, "iter_tflops": 13.817815822786423, "iter_time": 1.4930792083740234, "loss": 0.44197019934654236, "lr": 2.8840091098051725e-05, "seqlen": 8192.0, "step_tflops": 17.52385562053252, "step_time": 1.177314739227295} +{"epoch": 0, "iter": 21463, "iter_tflops": 43.36393371829388, "iter_time": 0.4757661895751953, "loss": 0.317066490650177, "lr": 2.8821852612322477e-05, "seqlen": 8192.0, "step_tflops": 46.93153177784993, "step_time": 0.43959983253479007} +{"epoch": 0, "iter": 21464, "iter_tflops": 45.129203946891316, "iter_time": 0.4571561584472656, "loss": 0.45989081263542175, "lr": 2.8803477710488058e-05, "seqlen": 8192.0, "step_tflops": 48.936560482264944, "step_time": 0.42158854866027834} +{"epoch": 0, "iter": 21465, "iter_tflops": 26.434609911855386, "iter_time": 0.780457649230957, "loss": 0.16278976202011108, "lr": 2.878496657390132e-05, "seqlen": 8192.0, "step_tflops": 27.999042130303824, "step_time": 0.7368499755859376} +{"epoch": 0, "iter": 21466, "iter_tflops": 16.263203641704564, "iter_time": 1.2685749969482423, "loss": 0.14194338023662567, "lr": 2.8766319385259717e-05, "seqlen": 8192.0, "step_tflops": 18.904832638354765, "step_time": 1.091313205718994} +{"epoch": 0, "iter": 21467, "iter_tflops": 47.68697259300236, "iter_time": 0.4326358413696289, "loss": 0.15534116327762604, "lr": 2.874753632860347e-05, "seqlen": 8192.0, "step_tflops": 51.68293061260069, "step_time": 0.39918582916259765} +{"epoch": 0, "iter": 21468, "iter_tflops": 49.344450521416434, "iter_time": 0.41810362243652344, "loss": 0.16979217529296875, "lr": 2.8728617589313762e-05, "seqlen": 8192.0, "step_tflops": 53.34649863495713, "step_time": 0.3867375373840332} +{"epoch": 0, "iter": 21469, "iter_tflops": 30.983891757269703, "iter_time": 0.6658651428222656, "loss": 0.09474831819534302, "lr": 2.870956335411092e-05, "seqlen": 8192.0, "step_tflops": 33.02625185704175, "step_time": 0.6246877059936523} +{"epoch": 0, "iter": 21470, "iter_tflops": 46.03623820887042, "iter_time": 0.44814898681640625, "loss": 0.117091104388237, "lr": 2.8690373811052545e-05, "seqlen": 8192.0, "step_tflops": 54.13528575067765, "step_time": 0.3811025142669678} +{"epoch": 0, "iter": 21471, "iter_tflops": 49.21503869017048, "iter_time": 0.4192030334472656, "loss": 0.05249838903546333, "lr": 2.8671049149531677e-05, "seqlen": 8192.0, "step_tflops": 53.70327625795041, "step_time": 0.3841682472229004} +{"epoch": 0, "iter": 21472, "iter_tflops": 51.39865833922178, "iter_time": 0.4013936195373535, "loss": 0.058912184089422226, "lr": 2.8651589560274936e-05, "seqlen": 8192.0, "step_tflops": 55.83232658531717, "step_time": 0.3695187854766846} +{"epoch": 0, "iter": 21473, "iter_tflops": 25.25259436327742, "iter_time": 0.8169890670776367, "loss": 0.5704858303070068, "lr": 2.8631995235340606e-05, "seqlen": 8192.0, "step_tflops": 26.65142127179383, "step_time": 0.7741085662841797} +{"epoch": 0, "iter": 21474, "iter_tflops": 14.991667963363907, "iter_time": 1.376170654296875, "loss": 0.5562602281570435, "lr": 2.8612266368116755e-05, "seqlen": 8192.0, "step_tflops": 17.938501354831917, "step_time": 1.150101287841797} +{"epoch": 0, "iter": 21475, "iter_tflops": 36.3339742852734, "iter_time": 0.5678182449340821, "loss": 0.44390591979026794, "lr": 2.859240315331935e-05, "seqlen": 8192.0, "step_tflops": 39.80513597965128, "step_time": 0.5183022994995117} +{"epoch": 0, "iter": 21476, "iter_tflops": 37.72905734391716, "iter_time": 0.546822395324707, "loss": 0.5822710990905762, "lr": 2.8572405786990293e-05, "seqlen": 8192.0, "step_tflops": 41.12327048414206, "step_time": 0.5016890258789063} +{"epoch": 0, "iter": 21477, "iter_tflops": 18.917046738212527, "iter_time": 1.090608581542969, "loss": 0.4567992389202118, "lr": 2.8552274466495525e-05, "seqlen": 8192.0, "step_tflops": 20.336822457200647, "step_time": 1.0144698638916017} +{"epoch": 0, "iter": 21478, "iter_tflops": 32.270430970081804, "iter_time": 0.6393188095092772, "loss": 0.557766318321228, "lr": 2.853200939052304e-05, "seqlen": 8192.0, "step_tflops": 36.339577312825135, "step_time": 0.5677306957244873} +{"epoch": 0, "iter": 21479, "iter_tflops": 44.02329416081463, "iter_time": 0.4686403846740723, "loss": 0.4509188234806061, "lr": 2.851161075908097e-05, "seqlen": 8192.0, "step_tflops": 47.17247257975885, "step_time": 0.43735450744628906} +{"epoch": 0, "iter": 21480, "iter_tflops": 44.54753804228083, "iter_time": 0.46312533569335945, "loss": 0.4987579584121704, "lr": 2.8491078773495568e-05, "seqlen": 8192.0, "step_tflops": 47.695499192105146, "step_time": 0.43255849838256843} +{"epoch": 0, "iter": 21481, "iter_tflops": 51.30007347795094, "iter_time": 0.40216498947143553, "loss": 0.0012175798183307052, "lr": 2.8470413636409233e-05, "seqlen": 8192.0, "step_tflops": 56.77888684168608, "step_time": 0.36335854148864744} +{"epoch": 0, "iter": 21482, "iter_tflops": 12.44206995883902, "iter_time": 1.6581721191406251, "loss": 0.003655338892713189, "lr": 2.844961555177854e-05, "seqlen": 8192.0, "step_tflops": 13.47348406557594, "step_time": 1.5312367172241212} +{"epoch": 0, "iter": 21483, "iter_tflops": 12.030148454627241, "iter_time": 1.7149492034912108, "loss": 0.006936305668205023, "lr": 2.8428684724872176e-05, "seqlen": 8192.0, "step_tflops": 14.02288639593607, "step_time": 1.471244430541992} +{"epoch": 0, "iter": 21484, "iter_tflops": 18.955908750219535, "iter_time": 1.0883726959228517, "loss": 0.009350775741040707, "lr": 2.8407621362268957e-05, "seqlen": 8192.0, "step_tflops": 26.329111005109944, "step_time": 0.7835848884582519} +{"epoch": 0, "iter": 21485, "iter_tflops": 12.37082134008876, "iter_time": 1.201772003173828, "loss": 0.5520724058151245, "lr": 2.8386425671855768e-05, "seqlen": 5952.0, "step_tflops": 13.17130955451466, "step_time": 1.1287341384887697} +{"epoch": 0, "iter": 21486, "iter_tflops": 10.446844863715343, "iter_time": 1.4231001739501952, "loss": 0.288360059261322, "lr": 2.8365097862825516e-05, "seqlen": 5952.0, "step_tflops": 14.408860966953865, "step_time": 1.0317891731262208} +{"epoch": 0, "iter": 21487, "iter_tflops": 22.429846564942856, "iter_time": 0.6628180313110351, "loss": 0.30455145239830017, "lr": 2.8343638145675072e-05, "seqlen": 5952.0, "step_tflops": 24.175720500348298, "step_time": 0.6149519615173339} +{"epoch": 0, "iter": 21488, "iter_tflops": 23.646364158571906, "iter_time": 0.628718505859375, "loss": 0.31882765889167786, "lr": 2.832204673220317e-05, "seqlen": 5952.0, "step_tflops": 25.388795891855185, "step_time": 0.5855695877075195} +{"epoch": 0, "iter": 21489, "iter_tflops": 32.55286154174325, "iter_time": 0.6337720413208008, "loss": 0.4135688543319702, "lr": 2.830032383550835e-05, "seqlen": 8192.0, "step_tflops": 36.06335210380131, "step_time": 0.5720791969299316} +{"epoch": 0, "iter": 21490, "iter_tflops": 38.716513627937005, "iter_time": 0.5328758087158203, "loss": 0.39003440737724304, "lr": 2.827846966998683e-05, "seqlen": 8192.0, "step_tflops": 42.331575892973234, "step_time": 0.4873688983917236} +{"epoch": 0, "iter": 21491, "iter_tflops": 38.767399341302486, "iter_time": 0.5321763610839844, "loss": 0.32898443937301636, "lr": 2.8256484451330406e-05, "seqlen": 8192.0, "step_tflops": 41.90187960757126, "step_time": 0.4923667793273926} +{"epoch": 0, "iter": 21492, "iter_tflops": 38.48525802334031, "iter_time": 0.5360778274536132, "loss": 0.4252311587333679, "lr": 2.8234368396524303e-05, "seqlen": 8192.0, "step_tflops": 42.03900478561413, "step_time": 0.49076074981689455} +{"epoch": 0, "iter": 21493, "iter_tflops": 18.235169089652462, "iter_time": 1.1313903045654297, "loss": 0.3290417194366455, "lr": 2.8212121723845054e-05, "seqlen": 8192.0, "step_tflops": 19.810347410221382, "step_time": 1.0414301719665529} +{"epoch": 0, "iter": 21494, "iter_tflops": 19.86246322106056, "iter_time": 1.0386976318359376, "loss": 0.3417435884475708, "lr": 2.818974465285834e-05, "seqlen": 8192.0, "step_tflops": 23.508308458926166, "step_time": 0.8776085929870606} +{"epoch": 0, "iter": 21495, "iter_tflops": 47.43309136032553, "iter_time": 0.4349514846801758, "loss": 0.5684006810188293, "lr": 2.8167237404416827e-05, "seqlen": 8192.0, "step_tflops": 51.532632597895144, "step_time": 0.40035007858276367} +{"epoch": 0, "iter": 21496, "iter_tflops": 48.79943451851366, "iter_time": 0.42277320861816403, "loss": 0.5559224486351013, "lr": 2.8144600200657953e-05, "seqlen": 8192.0, "step_tflops": 53.056631413806016, "step_time": 0.3888504219055176} +{"epoch": 0, "iter": 21497, "iter_tflops": 26.18337670389625, "iter_time": 0.7879462509155274, "loss": 0.5235893726348877, "lr": 2.8121833265001792e-05, "seqlen": 8192.0, "step_tflops": 27.641420639868237, "step_time": 0.7463832550048829} +{"epoch": 0, "iter": 21498, "iter_tflops": 14.522929767278205, "iter_time": 1.4205875701904298, "loss": 0.502682089805603, "lr": 2.8098936822148805e-05, "seqlen": 8192.0, "step_tflops": 17.439724569655922, "step_time": 1.1829942283630372} +{"epoch": 0, "iter": 21499, "iter_tflops": 37.45315394475319, "iter_time": 0.5508506317138672, "loss": 0.5091109871864319, "lr": 2.8075911098077637e-05, "seqlen": 8192.0, "step_tflops": 41.47513455361201, "step_time": 0.49743282890319823} +{"epoch": 0, "iter": 21500, "iter_tflops": 43.182484483668006, "iter_time": 0.47776531982421877, "loss": 0.5529205203056335, "lr": 2.8052756320042885e-05, "seqlen": 8192.0, "step_tflops": 46.467864473902736, "step_time": 0.4439862632751465} +{"epoch": 0, "iter": 21501, "iter_tflops": 27.597049994978708, "iter_time": 0.5372337493896485, "loss": 0.061569441109895706, "lr": 2.8029472716572872e-05, "seqlen": 5936.0, "step_tflops": 29.828374022155835, "step_time": 0.49704575347900387} +{"epoch": 0, "iter": 21502, "iter_tflops": 7.246267823763453, "iter_time": 2.0460279693603516, "loss": 0.041549839079380035, "lr": 2.8006060517467352e-05, "seqlen": 5936.0, "step_tflops": 8.57262636371437, "step_time": 1.7294660949707032} +{"epoch": 0, "iter": 21503, "iter_tflops": 8.92029847930548, "iter_time": 1.6620594787597653, "loss": 0.03470485284924507, "lr": 2.7982519953795284e-05, "seqlen": 5936.0, "step_tflops": 10.327595309950759, "step_time": 1.4355778083801272} +{"epoch": 0, "iter": 21504, "iter_tflops": 13.943474251911868, "iter_time": 1.0632978820800782, "loss": 0.016713347285985947, "lr": 2.7958851257892527e-05, "seqlen": 5936.0, "step_tflops": 17.715012254166492, "step_time": 0.836921049118042} +{"epoch": 0, "iter": 21505, "iter_tflops": 22.11737261734482, "iter_time": 0.7833637466430664, "loss": 0.27294614911079407, "lr": 2.7935054663359562e-05, "seqlen": 6912.0, "step_tflops": 23.439074880313285, "step_time": 0.7391907730102539} +{"epoch": 0, "iter": 21506, "iter_tflops": 22.75312897643053, "iter_time": 0.7614753952026367, "loss": 0.34577471017837524, "lr": 2.7911130405059155e-05, "seqlen": 6912.0, "step_tflops": 31.00553934896931, "step_time": 0.5588016929626465} +{"epoch": 0, "iter": 21507, "iter_tflops": 32.0345635029029, "iter_time": 0.540851692199707, "loss": 0.31699901819229126, "lr": 2.788707871911409e-05, "seqlen": 6912.0, "step_tflops": 34.053096509528274, "step_time": 0.5087921409606934} +{"epoch": 0, "iter": 21508, "iter_tflops": 30.481200481926525, "iter_time": 0.5684142227172851, "loss": 0.36840102076530457, "lr": 2.7862899842904784e-05, "seqlen": 6912.0, "step_tflops": 32.380153018594314, "step_time": 0.5350792465209961} +{"epoch": 0, "iter": 21509, "iter_tflops": 27.841272020266796, "iter_time": 0.7410255355834962, "loss": 0.10630469769239426, "lr": 2.7838594015066997e-05, "seqlen": 8192.0, "step_tflops": 29.456708864792162, "step_time": 0.7003869171142577} +{"epoch": 0, "iter": 21510, "iter_tflops": 14.514964590082139, "iter_time": 1.4213671264648438, "loss": 0.09586051106452942, "lr": 2.7814161475489428e-05, "seqlen": 8192.0, "step_tflops": 18.56529161323839, "step_time": 1.1112722568511961} +{"epoch": 0, "iter": 21511, "iter_tflops": 38.695259935131084, "iter_time": 0.5331684951782226, "loss": 0.17241640388965607, "lr": 2.7789602465311384e-05, "seqlen": 8192.0, "step_tflops": 42.980401464199375, "step_time": 0.4800116519927978} +{"epoch": 0, "iter": 21512, "iter_tflops": 39.20601870674989, "iter_time": 0.5262226104736328, "loss": 0.15139815211296082, "lr": 2.776491722692038e-05, "seqlen": 8192.0, "step_tflops": 42.99722794362756, "step_time": 0.4798238048553467} +{"epoch": 0, "iter": 21513, "iter_tflops": 18.646165183038438, "iter_time": 1.1064523620605469, "loss": 0.38882625102996826, "lr": 2.7740106003949747e-05, "seqlen": 8192.0, "step_tflops": 19.956073301511232, "step_time": 1.0338253021240236} +{"epoch": 0, "iter": 21514, "iter_tflops": 20.9986331931464, "iter_time": 0.982496971130371, "loss": 0.38639217615127563, "lr": 2.7715169041276248e-05, "seqlen": 8192.0, "step_tflops": 25.60469954029664, "step_time": 0.8057541732788086} +{"epoch": 0, "iter": 21515, "iter_tflops": 47.29316703441351, "iter_time": 0.43623835754394535, "loss": 0.5124911069869995, "lr": 2.7690106585017634e-05, "seqlen": 8192.0, "step_tflops": 51.32919000475257, "step_time": 0.401936861038208} +{"epoch": 0, "iter": 21516, "iter_tflops": 43.56588544377597, "iter_time": 0.47356075286865235, "loss": 0.37032800912857056, "lr": 2.7664918882530227e-05, "seqlen": 8192.0, "step_tflops": 46.92241936609199, "step_time": 0.43968520355224605} +{"epoch": 0, "iter": 21517, "iter_tflops": 42.4964439574549, "iter_time": 0.4854781150817871, "loss": 0.004568958654999733, "lr": 2.7639606182406484e-05, "seqlen": 8192.0, "step_tflops": 46.384343961744825, "step_time": 0.4447857131958008} +{"epoch": 0, "iter": 21518, "iter_tflops": 24.548224463322978, "iter_time": 0.8404311904907226, "loss": 0.0007448758115060627, "lr": 2.7614168734472538e-05, "seqlen": 8192.0, "step_tflops": 27.43566168723761, "step_time": 0.7519808979034424} +{"epoch": 0, "iter": 21519, "iter_tflops": 52.45230977109006, "iter_time": 0.3933305053710937, "loss": 0.007387130055576563, "lr": 2.7588606789785723e-05, "seqlen": 8192.0, "step_tflops": 58.1832198921961, "step_time": 0.3545883769989014} +{"epoch": 0, "iter": 21520, "iter_tflops": 50.39812627932666, "iter_time": 0.4093623123168945, "loss": 0.0021840552799403667, "lr": 2.7562920600632128e-05, "seqlen": 8192.0, "step_tflops": 55.14747115909039, "step_time": 0.3741076984405517} +{"epoch": 0, "iter": 21521, "iter_tflops": 31.499342676901854, "iter_time": 0.6549690170288086, "loss": 0.23351509869098663, "lr": 2.7537110420524057e-05, "seqlen": 8192.0, "step_tflops": 33.64793536752278, "step_time": 0.613145896911621} +{"epoch": 0, "iter": 21522, "iter_tflops": 9.696295733841499, "iter_time": 2.127729400634766, "loss": 0.33447644114494324, "lr": 2.751117650419757e-05, "seqlen": 8192.0, "step_tflops": 12.440960008962834, "step_time": 1.6583200569152832} +{"epoch": 0, "iter": 21523, "iter_tflops": 9.044191690566576, "iter_time": 2.2811428833007814, "loss": 0.2187362164258957, "lr": 2.7485119107609953e-05, "seqlen": 8192.0, "step_tflops": 11.223084366107136, "step_time": 1.8382730484008787} +{"epoch": 0, "iter": 21524, "iter_tflops": 33.24106841687989, "iter_time": 0.6206507339477539, "loss": 0.30244749784469604, "lr": 2.745893848793719e-05, "seqlen": 8192.0, "step_tflops": 36.867504886843825, "step_time": 0.5596010246276856} +{"epoch": 0, "iter": 21525, "iter_tflops": 31.433014243444614, "iter_time": 0.6089152221679688, "loss": 0.14466093480587006, "lr": 2.7432634903571426e-05, "seqlen": 7616.0, "step_tflops": 33.96090365133825, "step_time": 0.5635904464721679} +{"epoch": 0, "iter": 21526, "iter_tflops": 34.280786196900635, "iter_time": 0.5583314437866211, "loss": 0.4650064706802368, "lr": 2.7406208614118427e-05, "seqlen": 7616.0, "step_tflops": 36.61584580816908, "step_time": 0.5227256240844727} +{"epoch": 0, "iter": 21527, "iter_tflops": 34.730916538235896, "iter_time": 0.5510951843261719, "loss": 0.24897538125514984, "lr": 2.7379659880394996e-05, "seqlen": 7616.0, "step_tflops": 37.21014855430558, "step_time": 0.5143768997192383} +{"epoch": 0, "iter": 21528, "iter_tflops": 32.58272293172154, "iter_time": 0.5874291381835938, "loss": 0.326015442609787, "lr": 2.7352988964426412e-05, "seqlen": 7616.0, "step_tflops": 34.492605607364894, "step_time": 0.5549027252197266} +{"epoch": 0, "iter": 21529, "iter_tflops": 34.613523185579204, "iter_time": 0.59604141998291, "loss": 0.49156248569488525, "lr": 2.7326196129443855e-05, "seqlen": 8192.0, "step_tflops": 37.09428018519735, "step_time": 0.5561799125671387} +{"epoch": 0, "iter": 21530, "iter_tflops": 13.021330743712111, "iter_time": 1.584407455444336, "loss": 0.35829344391822815, "lr": 2.7299281639881783e-05, "seqlen": 8192.0, "step_tflops": 15.111551230978742, "step_time": 1.365253189086914} +{"epoch": 0, "iter": 21531, "iter_tflops": 35.79004689506932, "iter_time": 0.5764477920532227, "loss": 0.3207607567310333, "lr": 2.727224576137535e-05, "seqlen": 8192.0, "step_tflops": 39.23944656867017, "step_time": 0.5257743244171142} +{"epoch": 0, "iter": 21532, "iter_tflops": 37.30642411153695, "iter_time": 0.5530171813964844, "loss": 0.38891324400901794, "lr": 2.724508876075776e-05, "seqlen": 8192.0, "step_tflops": 40.65834077209523, "step_time": 0.5074258594512939} +{"epoch": 0, "iter": 21533, "iter_tflops": 31.801501415090772, "iter_time": 0.6487458953857421, "loss": 0.05893903970718384, "lr": 2.721781090605764e-05, "seqlen": 8192.0, "step_tflops": 34.62940053950946, "step_time": 0.595768138885498} +{"epoch": 0, "iter": 21534, "iter_tflops": 8.188111948423721, "iter_time": 2.5196398925781254, "loss": 0.05059922859072685, "lr": 2.719041246649641e-05, "seqlen": 8192.0, "step_tflops": 9.776410377423709, "step_time": 2.1102933197021483} +{"epoch": 0, "iter": 21535, "iter_tflops": 12.286216202151042, "iter_time": 1.6792064514160157, "loss": 0.08012419193983078, "lr": 2.7162893712485604e-05, "seqlen": 8192.0, "step_tflops": 14.585368003898656, "step_time": 1.414506202697754} +{"epoch": 0, "iter": 21536, "iter_tflops": 47.264823168737514, "iter_time": 0.43649996185302736, "loss": 0.04578859731554985, "lr": 2.7135254915624213e-05, "seqlen": 8192.0, "step_tflops": 53.03595289372833, "step_time": 0.3890020332336426} +{"epoch": 0, "iter": 21537, "iter_tflops": 22.94950051735583, "iter_time": 0.7639222793579101, "loss": 0.39973685145378113, "lr": 2.7107496348696004e-05, "seqlen": 6992.0, "step_tflops": 24.32423485937034, "step_time": 0.7207476348876953} +{"epoch": 0, "iter": 21538, "iter_tflops": 11.691671441850952, "iter_time": 1.49949772644043, "loss": 0.25488197803497314, "lr": 2.707961828566683e-05, "seqlen": 6992.0, "step_tflops": 15.100352677194577, "step_time": 1.161008296966553} +{"epoch": 0, "iter": 21539, "iter_tflops": 27.133226050097473, "iter_time": 0.6461315994262696, "loss": 0.2557345926761627, "lr": 2.7051621001681917e-05, "seqlen": 6992.0, "step_tflops": 29.07156002030296, "step_time": 0.6030510482788086} +{"epoch": 0, "iter": 21540, "iter_tflops": 27.423396780208517, "iter_time": 0.6392947921752931, "loss": 0.18439050018787384, "lr": 2.702350477306315e-05, "seqlen": 6992.0, "step_tflops": 29.436421933588452, "step_time": 0.5955762825012206} +{"epoch": 0, "iter": 21541, "iter_tflops": 35.27308233809918, "iter_time": 0.5848962478637695, "loss": 0.05135029926896095, "lr": 2.699526987730636e-05, "seqlen": 8192.0, "step_tflops": 39.07009201054179, "step_time": 0.5280533638000489} +{"epoch": 0, "iter": 21542, "iter_tflops": 47.16973331706438, "iter_time": 0.4373799057006836, "loss": 0.05817190185189247, "lr": 2.6966916593078566e-05, "seqlen": 8192.0, "step_tflops": 52.431208480583706, "step_time": 0.39348880386352536} +{"epoch": 0, "iter": 21543, "iter_tflops": 50.48407519836791, "iter_time": 0.4086653747558594, "loss": 0.03673960641026497, "lr": 2.6938445200215237e-05, "seqlen": 8192.0, "step_tflops": 55.13324205066123, "step_time": 0.3742042503356933} +{"epoch": 0, "iter": 21544, "iter_tflops": 51.66607414140634, "iter_time": 0.39931606674194337, "loss": 0.08010735362768173, "lr": 2.6909855979717533e-05, "seqlen": 8192.0, "step_tflops": 56.49293617421452, "step_time": 0.36519775581359865} +{"epoch": 0, "iter": 21545, "iter_tflops": 22.67539111988363, "iter_time": 0.9098451004028321, "loss": 0.0998055562376976, "lr": 2.6881149213749506e-05, "seqlen": 8192.0, "step_tflops": 23.86973012344433, "step_time": 0.8643203506469725} +{"epoch": 0, "iter": 21546, "iter_tflops": 13.822417247409742, "iter_time": 1.4925821685791016, "loss": 0.11166676878929138, "lr": 2.6852325185635357e-05, "seqlen": 8192.0, "step_tflops": 17.654273503512215, "step_time": 1.1686175308227535} +{"epoch": 0, "iter": 21547, "iter_tflops": 46.04612789367938, "iter_time": 0.44805273437500004, "loss": 0.12799063324928284, "lr": 2.6823384179856602e-05, "seqlen": 8192.0, "step_tflops": 50.667095585958066, "step_time": 0.40718918800354} +{"epoch": 0, "iter": 21548, "iter_tflops": 46.97660346414218, "iter_time": 0.4391780586242676, "loss": 0.09494443982839584, "lr": 2.6794326482049282e-05, "seqlen": 8192.0, "step_tflops": 51.040186438285716, "step_time": 0.40421273803710944} +{"epoch": 0, "iter": 21549, "iter_tflops": 24.048106272177513, "iter_time": 0.8579092788696289, "loss": 0.44070038199424744, "lr": 2.676515237900115e-05, "seqlen": 8192.0, "step_tflops": 25.31282023658436, "step_time": 0.815045234680176} +{"epoch": 0, "iter": 21550, "iter_tflops": 15.340504662102868, "iter_time": 1.3448771057128905, "loss": 0.5519198179244995, "lr": 2.673586215864882e-05, "seqlen": 8192.0, "step_tflops": 18.607471565563078, "step_time": 1.1087531929016115} +{"epoch": 0, "iter": 21551, "iter_tflops": 43.34422747654539, "iter_time": 0.47598249435424805, "loss": 0.693774938583374, "lr": 2.6706456110074946e-05, "seqlen": 8192.0, "step_tflops": 46.870639600684804, "step_time": 0.44017094039917} +{"epoch": 0, "iter": 21552, "iter_tflops": 45.057436963993375, "iter_time": 0.4578843116760254, "loss": 0.4935113489627838, "lr": 2.6676934523505354e-05, "seqlen": 8192.0, "step_tflops": 48.373999167871055, "step_time": 0.4264913768768311} +{"epoch": 0, "iter": 21553, "iter_tflops": 23.790283507774873, "iter_time": 0.8672067108154297, "loss": 0.49329352378845215, "lr": 2.664729769030618e-05, "seqlen": 8192.0, "step_tflops": 24.90326022805789, "step_time": 0.8284495010375976} +{"epoch": 0, "iter": 21554, "iter_tflops": 12.024521507763367, "iter_time": 1.7157517242431641, "loss": 0.3761679232120514, "lr": 2.6617545902981005e-05, "seqlen": 8192.0, "step_tflops": 16.168407983055435, "step_time": 1.276012674331665} +{"epoch": 0, "iter": 21555, "iter_tflops": 35.979632898377815, "iter_time": 0.5734103393554687, "loss": 0.5000733733177185, "lr": 2.6587679455167966e-05, "seqlen": 8192.0, "step_tflops": 38.962455660641645, "step_time": 0.5295121459960938} +{"epoch": 0, "iter": 21556, "iter_tflops": 38.59040558621559, "iter_time": 0.534617172241211, "loss": 0.5064855813980103, "lr": 2.655769864163684e-05, "seqlen": 8192.0, "step_tflops": 41.84867769438824, "step_time": 0.4929927215576172} +{"epoch": 0, "iter": 21557, "iter_tflops": 15.72728003221295, "iter_time": 1.3118030242919922, "loss": 0.48284807801246643, "lr": 2.652760375828615e-05, "seqlen": 8192.0, "step_tflops": 16.76413308813257, "step_time": 1.2306686782836913} +{"epoch": 0, "iter": 21558, "iter_tflops": 17.303284295445884, "iter_time": 1.1923224029541017, "loss": 0.3246093690395355, "lr": 2.649739510214025e-05, "seqlen": 8192.0, "step_tflops": 20.53618977826481, "step_time": 1.0046212921142579} +{"epoch": 0, "iter": 21559, "iter_tflops": 49.06579889219222, "iter_time": 0.4204780921936035, "loss": 0.46592602133750916, "lr": 2.6467072971346375e-05, "seqlen": 8192.0, "step_tflops": 53.33573399534523, "step_time": 0.3868155918121338} +{"epoch": 0, "iter": 21560, "iter_tflops": 49.79517528648422, "iter_time": 0.4143191261291504, "loss": 0.5037370324134827, "lr": 2.643663766517172e-05, "seqlen": 8192.0, "step_tflops": 54.013162989628015, "step_time": 0.3819641799926758} +{"epoch": 0, "iter": 21561, "iter_tflops": 9.96686141084708, "iter_time": 1.039691635131836, "loss": 0.027048226445913315, "lr": 2.6406089484000465e-05, "seqlen": 4176.0, "step_tflops": 10.360107558496525, "step_time": 1.0002273025512696} +{"epoch": 0, "iter": 21562, "iter_tflops": 6.654312550644226, "iter_time": 1.5572551422119139, "loss": 0.01821499690413475, "lr": 2.637542872933083e-05, "seqlen": 4176.0, "step_tflops": 8.642756919089289, "step_time": 1.1989764995574952} +{"epoch": 0, "iter": 21563, "iter_tflops": 21.727032531885, "iter_time": 0.47693869018554685, "loss": 0.03455110266804695, "lr": 2.634465570377208e-05, "seqlen": 4176.0, "step_tflops": 23.97407583028325, "step_time": 0.43223615837097173} +{"epoch": 0, "iter": 21564, "iter_tflops": 23.49530047417914, "iter_time": 0.44104404830932614, "loss": 0.03231634944677353, "lr": 2.6313770711041557e-05, "seqlen": 4176.0, "step_tflops": 25.8399933780802, "step_time": 0.40102419090271} +{"epoch": 0, "iter": 21565, "iter_tflops": 33.99071749077142, "iter_time": 0.6069625778198243, "loss": 0.4319125711917877, "lr": 2.6282774055961672e-05, "seqlen": 8192.0, "step_tflops": 37.28213481192068, "step_time": 0.5533774719238281} +{"epoch": 0, "iter": 21566, "iter_tflops": 34.6470138814852, "iter_time": 0.5954652709960938, "loss": 0.37514859437942505, "lr": 2.6251666044456895e-05, "seqlen": 8192.0, "step_tflops": 38.16644801548563, "step_time": 0.5405557651519776} +{"epoch": 0, "iter": 21567, "iter_tflops": 40.32220935445703, "iter_time": 0.5116558303833008, "loss": 0.48068904876708984, "lr": 2.6220446983550738e-05, "seqlen": 8192.0, "step_tflops": 44.395804735307095, "step_time": 0.4647081775665283} +{"epoch": 0, "iter": 21568, "iter_tflops": 39.236246972596184, "iter_time": 0.5258171997070312, "loss": 0.3988380432128906, "lr": 2.6189117181362733e-05, "seqlen": 8192.0, "step_tflops": 43.11282300577909, "step_time": 0.47853729057312017} +{"epoch": 0, "iter": 21569, "iter_tflops": 20.46268244259206, "iter_time": 1.0082301559448241, "loss": 0.5827349424362183, "lr": 2.6157676947105377e-05, "seqlen": 8192.0, "step_tflops": 22.021341031242663, "step_time": 0.9368681716918944} +{"epoch": 0, "iter": 21570, "iter_tflops": 12.13231948161998, "iter_time": 1.7005069427490234, "loss": 0.4760739207267761, "lr": 2.612612659108109e-05, "seqlen": 8192.0, "step_tflops": 15.171758399895305, "step_time": 1.3598353576660156} +{"epoch": 0, "iter": 21571, "iter_tflops": 36.219754058481904, "iter_time": 0.5696088790893554, "loss": 0.5077038407325745, "lr": 2.6094466424679148e-05, "seqlen": 8192.0, "step_tflops": 39.41015953399778, "step_time": 0.5234968280792236} +{"epoch": 0, "iter": 21572, "iter_tflops": 37.090832856385134, "iter_time": 0.5562316055297851, "loss": 0.5977743864059448, "lr": 2.606269676037261e-05, "seqlen": 8192.0, "step_tflops": 40.40626984623975, "step_time": 0.5105913906097412} +{"epoch": 0, "iter": 21573, "iter_tflops": 25.298491239948735, "iter_time": 0.8155068740844728, "loss": 0.604506254196167, "lr": 2.603081791171524e-05, "seqlen": 8192.0, "step_tflops": 27.45140005255249, "step_time": 0.751549774169922} +{"epoch": 0, "iter": 21574, "iter_tflops": 36.65878583223778, "iter_time": 0.5627871475219727, "loss": 0.5057833790779114, "lr": 2.5998830193338402e-05, "seqlen": 8192.0, "step_tflops": 40.0556611934538, "step_time": 0.5150606155395507} +{"epoch": 0, "iter": 21575, "iter_tflops": 39.56627791609614, "iter_time": 0.5214312438964843, "loss": 0.5214551091194153, "lr": 2.5966733920947963e-05, "seqlen": 8192.0, "step_tflops": 42.886146105898746, "step_time": 0.4810666236877442} +{"epoch": 0, "iter": 21576, "iter_tflops": 38.68124725685868, "iter_time": 0.5333616409301758, "loss": 0.5992715358734131, "lr": 2.5934529411321174e-05, "seqlen": 8192.0, "step_tflops": 41.870825138500486, "step_time": 0.4927319545745849} +{"epoch": 0, "iter": 21577, "iter_tflops": 18.640079279115696, "iter_time": 1.1068136138916018, "loss": 0.06474851816892624, "lr": 2.5902216982303544e-05, "seqlen": 8192.0, "step_tflops": 19.97749936088383, "step_time": 1.0327165145874024} +{"epoch": 0, "iter": 21578, "iter_tflops": 14.967152120569873, "iter_time": 1.3784247894287107, "loss": 0.06697557866573334, "lr": 2.5869796952805702e-05, "seqlen": 8192.0, "step_tflops": 17.674775062953266, "step_time": 1.1672620124816895} +{"epoch": 0, "iter": 21579, "iter_tflops": 47.52109213091, "iter_time": 0.4341460304260254, "loss": 0.07499132305383682, "lr": 2.583726964280025e-05, "seqlen": 8192.0, "step_tflops": 51.61099867685097, "step_time": 0.39974218749999996} +{"epoch": 0, "iter": 21580, "iter_tflops": 50.122061346054906, "iter_time": 0.4116170196533203, "loss": 0.07958213239908218, "lr": 2.5804635373318604e-05, "seqlen": 8192.0, "step_tflops": 54.74909550598096, "step_time": 0.3768298511505127} +{"epoch": 0, "iter": 21581, "iter_tflops": 30.4885091962514, "iter_time": 0.6766842346191406, "loss": 0.23457194864749908, "lr": 2.5771894466447834e-05, "seqlen": 8192.0, "step_tflops": 32.37085224611108, "step_time": 0.637335506439209} +{"epoch": 0, "iter": 21582, "iter_tflops": 15.727267043591748, "iter_time": 1.3118041076660156, "loss": 0.2753058671951294, "lr": 2.5739047245327468e-05, "seqlen": 8192.0, "step_tflops": 18.65222920245255, "step_time": 1.1060926437377931} +{"epoch": 0, "iter": 21583, "iter_tflops": 47.872583558918826, "iter_time": 0.4309584312438965, "loss": 0.27110326290130615, "lr": 2.570609403414632e-05, "seqlen": 8192.0, "step_tflops": 52.04945153701313, "step_time": 0.396374849319458} +{"epoch": 0, "iter": 21584, "iter_tflops": 47.714198990175106, "iter_time": 0.43238897323608394, "loss": 0.2215852290391922, "lr": 2.5673035158139285e-05, "seqlen": 8192.0, "step_tflops": 51.46497107966051, "step_time": 0.4008764228820801} +{"epoch": 0, "iter": 21585, "iter_tflops": 37.17607231258781, "iter_time": 0.5549562454223633, "loss": 0.029349826276302338, "lr": 2.5639870943584108e-05, "seqlen": 8192.0, "step_tflops": 40.01593305078517, "step_time": 0.5155719718933105} +{"epoch": 0, "iter": 21586, "iter_tflops": 10.824112657420436, "iter_time": 1.9060309295654299, "loss": 0.02151821367442608, "lr": 2.5606601717798212e-05, "seqlen": 8192.0, "step_tflops": 13.724785896319993, "step_time": 1.503199661254883} +{"epoch": 0, "iter": 21587, "iter_tflops": 10.459702105831429, "iter_time": 1.9724360504150389, "loss": 0.022730372846126556, "lr": 2.557322780913542e-05, "seqlen": 8192.0, "step_tflops": 13.265553709646941, "step_time": 1.555238021850586} +{"epoch": 0, "iter": 21588, "iter_tflops": 25.46210407667955, "iter_time": 0.8102666397094728, "loss": 0.027285359799861908, "lr": 2.553974954698274e-05, "seqlen": 8192.0, "step_tflops": 32.756687709371704, "step_time": 0.6298284397125244} +{"epoch": 0, "iter": 21589, "iter_tflops": 12.568599166901157, "iter_time": 1.208869598388672, "loss": 0.19342581927776337, "lr": 2.5506167261757097e-05, "seqlen": 6080.0, "step_tflops": 13.388805950052706, "step_time": 1.1348134765625} +{"epoch": 0, "iter": 21590, "iter_tflops": 12.324942509953052, "iter_time": 1.2327682189941407, "loss": 0.30445805191993713, "lr": 2.5472481284902092e-05, "seqlen": 6080.0, "step_tflops": 14.42687765629138, "step_time": 1.0531590957641603} +{"epoch": 0, "iter": 21591, "iter_tflops": 26.728198731265838, "iter_time": 0.5684557189941406, "loss": 0.491450697183609, "lr": 2.5438691948884715e-05, "seqlen": 6080.0, "step_tflops": 28.537957781583884, "step_time": 0.5324066123962402} +{"epoch": 0, "iter": 21592, "iter_tflops": 28.515130304808153, "iter_time": 0.5328328247070313, "loss": 0.3676018714904785, "lr": 2.5404799587192074e-05, "seqlen": 6080.0, "step_tflops": 30.297656545724237, "step_time": 0.5014842453002929} +{"epoch": 0, "iter": 21593, "iter_tflops": 22.135464404026674, "iter_time": 0.9320379791259765, "loss": 0.35452762246131897, "lr": 2.5370804534328097e-05, "seqlen": 8192.0, "step_tflops": 23.173579917075635, "step_time": 0.8902851257324219} +{"epoch": 0, "iter": 21594, "iter_tflops": 22.86861179318208, "iter_time": 0.9021576690673829, "loss": 0.3921336531639099, "lr": 2.5336707125810227e-05, "seqlen": 8192.0, "step_tflops": 25.523984013669818, "step_time": 0.8083022422790527} +{"epoch": 0, "iter": 21595, "iter_tflops": 37.340343855409714, "iter_time": 0.5525148239135742, "loss": 0.3343683183193207, "lr": 2.5302507698166127e-05, "seqlen": 8192.0, "step_tflops": 40.85031865040667, "step_time": 0.5050411891937255} +{"epoch": 0, "iter": 21596, "iter_tflops": 42.04920282152557, "iter_time": 0.49064172744750983, "loss": 0.38752028346061707, "lr": 2.5268206588930332e-05, "seqlen": 8192.0, "step_tflops": 46.01357111501157, "step_time": 0.4483697528839111} +{"epoch": 0, "iter": 21597, "iter_tflops": 21.037196860320353, "iter_time": 0.9806959381103515, "loss": 0.35297077894210815, "lr": 2.523380413664095e-05, "seqlen": 8192.0, "step_tflops": 22.67878717912224, "step_time": 0.9097088546752929} +{"epoch": 0, "iter": 21598, "iter_tflops": 14.142660509497707, "iter_time": 1.458784469604492, "loss": 0.35274145007133484, "lr": 2.519930068083629e-05, "seqlen": 8192.0, "step_tflops": 16.690499619713766, "step_time": 1.23609801864624} +{"epoch": 0, "iter": 21599, "iter_tflops": 38.47721263981367, "iter_time": 0.5361899185180664, "loss": 0.3826945126056671, "lr": 2.5164696562051537e-05, "seqlen": 8192.0, "step_tflops": 41.762807330110064, "step_time": 0.49400638580322265} +{"epoch": 0, "iter": 21600, "iter_tflops": 43.56412169866641, "iter_time": 0.47357992553710937, "loss": 0.3246011435985565, "lr": 2.5129992121815365e-05, "seqlen": 8192.0, "step_tflops": 47.492191521933776, "step_time": 0.43441022300720217} +{"epoch": 0, "iter": 21601, "iter_tflops": 18.62290519115797, "iter_time": 1.1078343200683594, "loss": 0.3163596987724304, "lr": 2.50951877026466e-05, "seqlen": 8192.0, "step_tflops": 19.997023018525347, "step_time": 1.0317082443237304} +{"epoch": 0, "iter": 21602, "iter_tflops": 17.85804270696822, "iter_time": 1.1552830200195312, "loss": 0.45641574263572693, "lr": 2.5060283648050798e-05, "seqlen": 8192.0, "step_tflops": 20.073421122190677, "step_time": 1.0277816314697268} +{"epoch": 0, "iter": 21603, "iter_tflops": 47.43224147315118, "iter_time": 0.4349592781066894, "loss": 0.30087074637413025, "lr": 2.5025280302516897e-05, "seqlen": 8192.0, "step_tflops": 51.352021879531335, "step_time": 0.40175815391540526} +{"epoch": 0, "iter": 21604, "iter_tflops": 46.27829490785846, "iter_time": 0.4458049621582032, "loss": 0.30783215165138245, "lr": 2.4990178011513776e-05, "seqlen": 8192.0, "step_tflops": 49.93726677792837, "step_time": 0.4131402225494385} +{"epoch": 0, "iter": 21605, "iter_tflops": 29.35308000066991, "iter_time": 0.7028595809936523, "loss": 0.6473624110221863, "lr": 2.495497712148688e-05, "seqlen": 8192.0, "step_tflops": 31.2757396846438, "step_time": 0.6596516571044921} +{"epoch": 0, "iter": 21606, "iter_tflops": 9.932008623744942, "iter_time": 2.0772327423095707, "loss": 0.4714977443218231, "lr": 2.491967797985478e-05, "seqlen": 8192.0, "step_tflops": 12.634153118774796, "step_time": 1.6329621238708496} +{"epoch": 0, "iter": 21607, "iter_tflops": 13.623807547531353, "iter_time": 1.5143412322998047, "loss": 0.40752077102661133, "lr": 2.4884280935005755e-05, "seqlen": 8192.0, "step_tflops": 15.086954583079464, "step_time": 1.3674789962768554} +{"epoch": 0, "iter": 21608, "iter_tflops": 21.47541762956843, "iter_time": 0.9606841583251954, "loss": 0.5421141982078552, "lr": 2.4848786336294347e-05, "seqlen": 8192.0, "step_tflops": 25.680883111099295, "step_time": 0.8033638648986816} +{"epoch": 0, "iter": 21609, "iter_tflops": 22.67311300785654, "iter_time": 0.7387892532348633, "loss": 0.27698060870170593, "lr": 2.4813194534037917e-05, "seqlen": 6688.0, "step_tflops": 24.014816919728226, "step_time": 0.6975132179260255} +{"epoch": 0, "iter": 21610, "iter_tflops": 9.43713043951589, "iter_time": 1.7749730529785155, "loss": 0.25968340039253235, "lr": 2.4777505879513183e-05, "seqlen": 6688.0, "step_tflops": 10.87457886456341, "step_time": 1.540349510192871} +{"epoch": 0, "iter": 21611, "iter_tflops": 14.410569542951645, "iter_time": 1.1623865509033202, "loss": 0.3533361554145813, "lr": 2.4741720724952754e-05, "seqlen": 6688.0, "step_tflops": 16.0520668231945, "step_time": 1.0435199661254881} +{"epoch": 0, "iter": 21612, "iter_tflops": 31.335584563651842, "iter_time": 0.5345568771362305, "loss": 0.29114338755607605, "lr": 2.4705839423541668e-05, "seqlen": 6688.0, "step_tflops": 33.25160764280445, "step_time": 0.5037546577453613} +{"epoch": 0, "iter": 21613, "iter_tflops": 20.948998956730517, "iter_time": 0.731130844116211, "loss": 0.39547550678253174, "lr": 2.466986232941387e-05, "seqlen": 6128.0, "step_tflops": 22.311709920020284, "step_time": 0.6864762649536132} +{"epoch": 0, "iter": 21614, "iter_tflops": 14.069790138023913, "iter_time": 1.0886060943603517, "loss": 0.24272915720939636, "lr": 2.4633789797648766e-05, "seqlen": 6128.0, "step_tflops": 18.33749491335039, "step_time": 0.8352536354064941} +{"epoch": 0, "iter": 21615, "iter_tflops": 26.994074428615015, "iter_time": 0.5674007949829102, "loss": 0.27477121353149414, "lr": 2.4597622184267677e-05, "seqlen": 6128.0, "step_tflops": 28.911938070455047, "step_time": 0.5297624549865723} +{"epoch": 0, "iter": 21616, "iter_tflops": 28.624186097353338, "iter_time": 0.5350880279541015, "loss": 0.25688666105270386, "lr": 2.4561359846230346e-05, "seqlen": 6128.0, "step_tflops": 30.36524860895768, "step_time": 0.5044075050354004} +{"epoch": 0, "iter": 21617, "iter_tflops": 22.65786087758327, "iter_time": 0.910549041748047, "loss": 0.24079088866710663, "lr": 2.4525003141431413e-05, "seqlen": 8192.0, "step_tflops": 23.759903261410614, "step_time": 0.8683155517578126} +{"epoch": 0, "iter": 21618, "iter_tflops": 12.661297114791733, "iter_time": 1.6294612884521487, "loss": 0.1865386664867401, "lr": 2.448855242869687e-05, "seqlen": 8192.0, "step_tflops": 15.120210817280459, "step_time": 1.3644712867736817} +{"epoch": 0, "iter": 21619, "iter_tflops": 46.15912810594864, "iter_time": 0.44695587539672854, "loss": 0.3062816560268402, "lr": 2.445200806778054e-05, "seqlen": 8192.0, "step_tflops": 50.09547339693441, "step_time": 0.41183548355102534} +{"epoch": 0, "iter": 21620, "iter_tflops": 53.598559182470055, "iter_time": 0.38491880798339845, "loss": 0.279619038105011, "lr": 2.4415370419360508e-05, "seqlen": 8192.0, "step_tflops": 57.98560032412553, "step_time": 0.35579684257507327} +{"epoch": 0, "iter": 21621, "iter_tflops": 32.525685075780764, "iter_time": 0.6343015823364258, "loss": 0.01712704636156559, "lr": 2.4378639845035583e-05, "seqlen": 8192.0, "step_tflops": 34.73119502285966, "step_time": 0.5940219879150391} +{"epoch": 0, "iter": 21622, "iter_tflops": 17.6530524394512, "iter_time": 1.1686983642578126, "loss": 0.033255383372306824, "lr": 2.4341816707321688e-05, "seqlen": 8192.0, "step_tflops": 20.720078304439376, "step_time": 0.9957053833007812} +{"epoch": 0, "iter": 21623, "iter_tflops": 42.13550739005632, "iter_time": 0.4896367645263672, "loss": 0.02599259279668331, "lr": 2.4304901369648344e-05, "seqlen": 8192.0, "step_tflops": 46.621599183325024, "step_time": 0.4425222187042236} +{"epoch": 0, "iter": 21624, "iter_tflops": 45.5780905856157, "iter_time": 0.4526537475585938, "loss": 0.011777114123106003, "lr": 2.4267894196355016e-05, "seqlen": 8192.0, "step_tflops": 50.41522421641172, "step_time": 0.40922348022460936} +{"epoch": 0, "iter": 21625, "iter_tflops": 26.609782232492456, "iter_time": 0.7753198928833007, "loss": 0.3963366150856018, "lr": 2.4230795552687573e-05, "seqlen": 8192.0, "step_tflops": 28.69236205401017, "step_time": 0.719044792175293} +{"epoch": 0, "iter": 21626, "iter_tflops": 10.7374653605497, "iter_time": 1.921411880493164, "loss": 0.47748833894729614, "lr": 2.419360580479465e-05, "seqlen": 8192.0, "step_tflops": 12.205831672571435, "step_time": 1.690265281677246} +{"epoch": 0, "iter": 21627, "iter_tflops": 10.463487529886113, "iter_time": 1.9717224731445313, "loss": 0.5224204063415527, "lr": 2.4156325319724037e-05, "seqlen": 8192.0, "step_tflops": 13.788847043618512, "step_time": 1.4962159957885741} +{"epoch": 0, "iter": 21628, "iter_tflops": 21.098115636585835, "iter_time": 0.9778642730712892, "loss": 0.4920481741428375, "lr": 2.4118954465419083e-05, "seqlen": 8192.0, "step_tflops": 32.68223824180257, "step_time": 0.6312631759643555} +{"epoch": 0, "iter": 21629, "iter_tflops": 22.558304525149314, "iter_time": 0.7425492553710938, "loss": 0.2132965326309204, "lr": 2.408149361071502e-05, "seqlen": 6688.0, "step_tflops": 24.48428681610516, "step_time": 0.6841388664245606} +{"epoch": 0, "iter": 21630, "iter_tflops": 24.515715608577988, "iter_time": 0.6832618103027344, "loss": 0.3174915611743927, "lr": 2.4043943125335373e-05, "seqlen": 6688.0, "step_tflops": 26.450334790986368, "step_time": 0.6332869644165039} +{"epoch": 0, "iter": 21631, "iter_tflops": 25.60391161104184, "iter_time": 0.6542223892211915, "loss": 0.3321128189563751, "lr": 2.4006303379888262e-05, "seqlen": 6688.0, "step_tflops": 27.576686515337254, "step_time": 0.6074207725524904} +{"epoch": 0, "iter": 21632, "iter_tflops": 25.507811235004233, "iter_time": 0.6566871643066406, "loss": 0.28105947375297546, "lr": 2.3968574745862785e-05, "seqlen": 6688.0, "step_tflops": 27.41496629429351, "step_time": 0.6110039329528808} +{"epoch": 0, "iter": 21633, "iter_tflops": 29.356490284951693, "iter_time": 0.702777931213379, "loss": 0.5531280636787415, "lr": 2.3930757595625326e-05, "seqlen": 8192.0, "step_tflops": 32.27273463052839, "step_time": 0.6392731742858886} +{"epoch": 0, "iter": 21634, "iter_tflops": 37.22385486424938, "iter_time": 0.5542438735961914, "loss": 0.5129497051239014, "lr": 2.389285230241589e-05, "seqlen": 8192.0, "step_tflops": 40.61829796527368, "step_time": 0.5079260959625244} +{"epoch": 0, "iter": 21635, "iter_tflops": 41.96167103202414, "iter_time": 0.4916652030944824, "loss": 0.5305861830711365, "lr": 2.385485924034442e-05, "seqlen": 8192.0, "step_tflops": 45.66470328935051, "step_time": 0.45179519462585443} +{"epoch": 0, "iter": 21636, "iter_tflops": 38.345855298233914, "iter_time": 0.5380266876220703, "loss": 0.5978650450706482, "lr": 2.3816778784387097e-05, "seqlen": 8192.0, "step_tflops": 41.58679784867841, "step_time": 0.49609718894958504} +{"epoch": 0, "iter": 21637, "iter_tflops": 18.514665730080363, "iter_time": 1.1143108825683594, "loss": 0.5395561456680298, "lr": 2.3778611310382653e-05, "seqlen": 8192.0, "step_tflops": 19.74038312171893, "step_time": 1.0451212310791016} +{"epoch": 0, "iter": 21638, "iter_tflops": 20.221933226620006, "iter_time": 1.0202334899902343, "loss": 0.5280308723449707, "lr": 2.374035719502864e-05, "seqlen": 8192.0, "step_tflops": 26.94733239268609, "step_time": 0.7656080093383788} +{"epoch": 0, "iter": 21639, "iter_tflops": 39.15256479150732, "iter_time": 0.5269410476684571, "loss": 0.5579200983047485, "lr": 2.3702016815877742e-05, "seqlen": 8192.0, "step_tflops": 42.83465840230564, "step_time": 0.4816448707580567} +{"epoch": 0, "iter": 21640, "iter_tflops": 39.54254372377474, "iter_time": 0.5217442169189453, "loss": 0.48527318239212036, "lr": 2.3663590551334014e-05, "seqlen": 8192.0, "step_tflops": 43.32699646800907, "step_time": 0.4761717910766602} +{"epoch": 0, "iter": 21641, "iter_tflops": 18.2680912637683, "iter_time": 1.1293513488769529, "loss": 0.6047074198722839, "lr": 2.362507878064918e-05, "seqlen": 8192.0, "step_tflops": 19.35286741716792, "step_time": 1.0660484085083006} +{"epoch": 0, "iter": 21642, "iter_tflops": 24.051492580693118, "iter_time": 0.8577884902954102, "loss": 0.5362603068351746, "lr": 2.3586481883918867e-05, "seqlen": 8192.0, "step_tflops": 33.13016210439011, "step_time": 0.6227284202575684} +{"epoch": 0, "iter": 21643, "iter_tflops": 37.50127483069488, "iter_time": 0.5501437911987304, "loss": 0.5545198321342468, "lr": 2.3547800242078857e-05, "seqlen": 8192.0, "step_tflops": 41.11020656490324, "step_time": 0.50184845161438} +{"epoch": 0, "iter": 21644, "iter_tflops": 39.14212118250608, "iter_time": 0.5270816421508789, "loss": 0.39962664246559143, "lr": 2.350903423690135e-05, "seqlen": 8192.0, "step_tflops": 42.728485013219014, "step_time": 0.4828416805267333} +{"epoch": 0, "iter": 21645, "iter_tflops": 15.974581681989504, "iter_time": 1.2914950714111328, "loss": 0.6215506196022034, "lr": 2.3470184250991157e-05, "seqlen": 8192.0, "step_tflops": 16.828215765376584, "step_time": 1.2259822311401367} +{"epoch": 0, "iter": 21646, "iter_tflops": 25.734156379856678, "iter_time": 0.8017007904052734, "loss": 0.44934988021850586, "lr": 2.3431250667781958e-05, "seqlen": 8192.0, "step_tflops": 30.66137290760424, "step_time": 0.6728692016601563} +{"epoch": 0, "iter": 21647, "iter_tflops": 46.85778893568147, "iter_time": 0.44029165649414065, "loss": 0.4421781897544861, "lr": 2.3392233871532504e-05, "seqlen": 8192.0, "step_tflops": 50.34381820636164, "step_time": 0.4098039093017578} +{"epoch": 0, "iter": 21648, "iter_tflops": 47.61937126803597, "iter_time": 0.4332500190734863, "loss": 0.5630472302436829, "lr": 2.335313424732282e-05, "seqlen": 8192.0, "step_tflops": 51.173057674879374, "step_time": 0.4031631965637207} +{"epoch": 0, "iter": 21649, "iter_tflops": 41.33230833840543, "iter_time": 0.49915173721313477, "loss": 0.3985731601715088, "lr": 2.3313952181050412e-05, "seqlen": 8192.0, "step_tflops": 44.97609402355337, "step_time": 0.4587124328613281} +{"epoch": 0, "iter": 21650, "iter_tflops": 36.02964474925186, "iter_time": 0.5726144027709961, "loss": 0.47965842485427856, "lr": 2.3274688059426456e-05, "seqlen": 8192.0, "step_tflops": 39.29391622759267, "step_time": 0.5250454902648926} +{"epoch": 0, "iter": 21651, "iter_tflops": 37.068133283668914, "iter_time": 0.5565722274780273, "loss": 0.435243159532547, "lr": 2.3235342269971978e-05, "seqlen": 8192.0, "step_tflops": 40.36050349716107, "step_time": 0.5111703701019287} +{"epoch": 0, "iter": 21652, "iter_tflops": 41.294964457248156, "iter_time": 0.49960313034057613, "loss": 0.5723587870597839, "lr": 2.3195915201014038e-05, "seqlen": 8192.0, "step_tflops": 45.02247403350861, "step_time": 0.45823988914489744} +{"epoch": 0, "iter": 21653, "iter_tflops": 35.60272446186125, "iter_time": 0.5794807510375977, "loss": 0.10430368036031723, "lr": 2.3156407241681888e-05, "seqlen": 8192.0, "step_tflops": 39.63780658982805, "step_time": 0.520490291595459} +{"epoch": 0, "iter": 21654, "iter_tflops": 39.49013759653024, "iter_time": 0.5224366073608399, "loss": 0.08498497307300568, "lr": 2.311681878190314e-05, "seqlen": 8192.0, "step_tflops": 43.90157975834801, "step_time": 0.469939661026001} +{"epoch": 0, "iter": 21655, "iter_tflops": 41.54624942097115, "iter_time": 0.49658137130737307, "loss": 0.10444997251033783, "lr": 2.30771502123999e-05, "seqlen": 8192.0, "step_tflops": 45.398044771288134, "step_time": 0.4544489440917968} +{"epoch": 0, "iter": 21656, "iter_tflops": 40.04248294494817, "iter_time": 0.5152301254272461, "loss": 0.06844168156385422, "lr": 2.303740192468495e-05, "seqlen": 8192.0, "step_tflops": 43.97495120675326, "step_time": 0.46915557479858394} +{"epoch": 0, "iter": 21657, "iter_tflops": 19.12574823647431, "iter_time": 0.9726734771728517, "loss": 0.02454458363354206, "lr": 2.299757431105783e-05, "seqlen": 7408.0, "step_tflops": 20.540615480639197, "step_time": 0.9056743240356445} +{"epoch": 0, "iter": 21658, "iter_tflops": 16.091675255006606, "iter_time": 1.1560703124999998, "loss": 0.02446754276752472, "lr": 2.295766776460101e-05, "seqlen": 7408.0, "step_tflops": 21.475886987689577, "step_time": 0.8662323493957519} +{"epoch": 0, "iter": 21659, "iter_tflops": 49.40909412723059, "iter_time": 0.3765118217468262, "loss": 0.023855555802583694, "lr": 2.2917682679175998e-05, "seqlen": 7408.0, "step_tflops": 54.140583713095936, "step_time": 0.3436074523925781} +{"epoch": 0, "iter": 21660, "iter_tflops": 48.36162197714085, "iter_time": 0.3846667518615723, "loss": 0.0736299678683281, "lr": 2.2877619449419437e-05, "seqlen": 7408.0, "step_tflops": 53.43319121740071, "step_time": 0.34815641021728516} +{"epoch": 0, "iter": 21661, "iter_tflops": 27.099550113353406, "iter_time": 0.7613076019287109, "loss": 0.4973577857017517, "lr": 2.2837478470739234e-05, "seqlen": 8192.0, "step_tflops": 28.68349252792828, "step_time": 0.7192671356201172} +{"epoch": 0, "iter": 21662, "iter_tflops": 13.996633202709528, "iter_time": 1.4740040130615235, "loss": 0.43819379806518555, "lr": 2.279726013931064e-05, "seqlen": 8192.0, "step_tflops": 16.802620598674086, "step_time": 1.227849750518799} +{"epoch": 0, "iter": 21663, "iter_tflops": 36.36041757760051, "iter_time": 0.5674052963256836, "loss": 0.4044916331768036, "lr": 2.2756964852072357e-05, "seqlen": 8192.0, "step_tflops": 39.56400173535313, "step_time": 0.5214612426757812} +{"epoch": 0, "iter": 21664, "iter_tflops": 36.2694612474508, "iter_time": 0.5688282318115234, "loss": 0.45773038268089294, "lr": 2.27165930067226e-05, "seqlen": 8192.0, "step_tflops": 39.54150280121685, "step_time": 0.5217579517364501} +{"epoch": 0, "iter": 21665, "iter_tflops": 33.43371499761417, "iter_time": 0.6170745162963867, "loss": 0.0725448876619339, "lr": 2.2676145001715175e-05, "seqlen": 8192.0, "step_tflops": 37.133337761584635, "step_time": 0.5555949115753174} +{"epoch": 0, "iter": 21666, "iter_tflops": 41.68102227315442, "iter_time": 0.49497570800781243, "loss": 0.09642542153596878, "lr": 2.263562123625557e-05, "seqlen": 8192.0, "step_tflops": 45.46749151740723, "step_time": 0.4537548217773438} +{"epoch": 0, "iter": 21667, "iter_tflops": 49.96837502645121, "iter_time": 0.4128830184936523, "loss": 0.10342822968959808, "lr": 2.2595022110296988e-05, "seqlen": 8192.0, "step_tflops": 54.24139429113732, "step_time": 0.38035699081420904} +{"epoch": 0, "iter": 21668, "iter_tflops": 50.21750505609616, "iter_time": 0.4108346977233887, "loss": 0.11679158359766006, "lr": 2.2554348024536413e-05, "seqlen": 8192.0, "step_tflops": 54.19243547578949, "step_time": 0.3807006149291992} +{"epoch": 0, "iter": 21669, "iter_tflops": 28.02449330214115, "iter_time": 0.7361807861328125, "loss": 0.4861803352832794, "lr": 2.251359938041065e-05, "seqlen": 8192.0, "step_tflops": 29.828310681168595, "step_time": 0.6916614799499512} +{"epoch": 0, "iter": 21670, "iter_tflops": 17.881606118906358, "iter_time": 1.1537606506347657, "loss": 0.7933398485183716, "lr": 2.247277658009236e-05, "seqlen": 8192.0, "step_tflops": 19.81982126789093, "step_time": 1.0409323692321777} +{"epoch": 0, "iter": 21671, "iter_tflops": 42.22717553363602, "iter_time": 0.48857384490966793, "loss": 0.5691443681716919, "lr": 2.2431880026486114e-05, "seqlen": 8192.0, "step_tflops": 45.470724378380005, "step_time": 0.4537225608825684} +{"epoch": 0, "iter": 21672, "iter_tflops": 45.07208716539452, "iter_time": 0.4577354812622071, "loss": 0.56690514087677, "lr": 2.2390910123224375e-05, "seqlen": 8192.0, "step_tflops": 48.58580092883171, "step_time": 0.42463215827941897} +{"epoch": 0, "iter": 21673, "iter_tflops": 44.82435608198238, "iter_time": 0.46026525115966793, "loss": 0.1902862787246704, "lr": 2.234986727466355e-05, "seqlen": 8192.0, "step_tflops": 48.86902961537164, "step_time": 0.42217113113403315} +{"epoch": 0, "iter": 21674, "iter_tflops": 47.335090800348716, "iter_time": 0.4358519897460938, "loss": 0.21861326694488525, "lr": 2.2308751885879984e-05, "seqlen": 8192.0, "step_tflops": 51.515822484218575, "step_time": 0.40048071670532226} +{"epoch": 0, "iter": 21675, "iter_tflops": 49.67061787378616, "iter_time": 0.4153581008911133, "loss": 0.1205611377954483, "lr": 2.226756436266597e-05, "seqlen": 8192.0, "step_tflops": 53.703324518066566, "step_time": 0.38416790199279777} +{"epoch": 0, "iter": 21676, "iter_tflops": 46.92562344358187, "iter_time": 0.4396551818847656, "loss": 0.16388830542564392, "lr": 2.222630511152573e-05, "seqlen": 8192.0, "step_tflops": 50.87259298181608, "step_time": 0.4055443668365479} +{"epoch": 0, "iter": 21677, "iter_tflops": 28.070166815271797, "iter_time": 0.690702163696289, "loss": 0.06639061868190765, "lr": 2.2184974539671417e-05, "seqlen": 7712.0, "step_tflops": 29.644688451859604, "step_time": 0.6540168228149413} +{"epoch": 0, "iter": 21678, "iter_tflops": 13.33240601543914, "iter_time": 1.4542105102539062, "loss": 0.06327972561120987, "lr": 2.2143573055019088e-05, "seqlen": 7712.0, "step_tflops": 19.55843307859859, "step_time": 0.9912923431396483} +{"epoch": 0, "iter": 21679, "iter_tflops": 47.06272150912407, "iter_time": 0.4119635314941406, "loss": 0.06611133366823196, "lr": 2.2102101066184684e-05, "seqlen": 7712.0, "step_tflops": 51.595461112017276, "step_time": 0.3757719097137451} +{"epoch": 0, "iter": 21680, "iter_tflops": 47.02511883803976, "iter_time": 0.41229294967651364, "loss": 0.05401773378252983, "lr": 2.2060558982479992e-05, "seqlen": 7712.0, "step_tflops": 51.432719422885896, "step_time": 0.3769609146118164} +{"epoch": 0, "iter": 21681, "iter_tflops": 25.5964095092956, "iter_time": 0.8060151367187501, "loss": 0.44446462392807007, "lr": 2.20189472139086e-05, "seqlen": 8192.0, "step_tflops": 27.024978144797505, "step_time": 0.7634083328247071} +{"epoch": 0, "iter": 21682, "iter_tflops": 14.430851729125475, "iter_time": 1.4296518249511718, "loss": 0.4844631254673004, "lr": 2.1977266171161876e-05, "seqlen": 8192.0, "step_tflops": 16.612915541561524, "step_time": 1.2418707275390626} +{"epoch": 0, "iter": 21683, "iter_tflops": 36.10056285086391, "iter_time": 0.5714895248413085, "loss": 0.4055119454860687, "lr": 2.193551626561488e-05, "seqlen": 8192.0, "step_tflops": 41.66623493224335, "step_time": 0.49515137481689453} +{"epoch": 0, "iter": 21684, "iter_tflops": 42.120674047646865, "iter_time": 0.4898091964721679, "loss": 0.3705251216888428, "lr": 2.1893697909322322e-05, "seqlen": 8192.0, "step_tflops": 45.9112645749848, "step_time": 0.4493688793182373} +{"epoch": 0, "iter": 21685, "iter_tflops": 18.724803329861246, "iter_time": 1.1018056182861329, "loss": 0.14406460523605347, "lr": 2.1851811515014494e-05, "seqlen": 8192.0, "step_tflops": 19.79967933695582, "step_time": 1.0419912948608399} +{"epoch": 0, "iter": 21686, "iter_tflops": 16.832408156476408, "iter_time": 1.2256768798828126, "loss": 0.16106778383255005, "lr": 2.1809857496093203e-05, "seqlen": 8192.0, "step_tflops": 21.88597402538308, "step_time": 0.9426627979278566} +{"epoch": 0, "iter": 21687, "iter_tflops": 45.7826749894188, "iter_time": 0.4506310195922852, "loss": 0.11468860507011414, "lr": 2.1767836266627676e-05, "seqlen": 8192.0, "step_tflops": 49.610676981099516, "step_time": 0.4158599472045899} +{"epoch": 0, "iter": 21688, "iter_tflops": 50.08244669660724, "iter_time": 0.4119426040649414, "loss": 0.17638589441776276, "lr": 2.1725748241350487e-05, "seqlen": 8192.0, "step_tflops": 54.318467794099, "step_time": 0.3798172950744629} +{"epoch": 0, "iter": 21689, "iter_tflops": 29.443210054475184, "iter_time": 0.7007080230712891, "loss": 0.0009514663834124804, "lr": 2.1683593835653447e-05, "seqlen": 8192.0, "step_tflops": 31.144170823786677, "step_time": 0.662438362121582} +{"epoch": 0, "iter": 21690, "iter_tflops": 16.79822801370259, "iter_time": 1.228170822143555, "loss": 0.01879153959453106, "lr": 2.1641373465583527e-05, "seqlen": 8192.0, "step_tflops": 20.651725028081827, "step_time": 0.9990009784698487} +{"epoch": 0, "iter": 21691, "iter_tflops": 54.59801668920612, "iter_time": 0.3778725814819336, "loss": 0.006978081073611975, "lr": 2.1599087547838727e-05, "seqlen": 8192.0, "step_tflops": 59.906930464814785, "step_time": 0.34438575553894046} +{"epoch": 0, "iter": 21692, "iter_tflops": 56.44963033635524, "iter_time": 0.3654779205322266, "loss": 0.008462941274046898, "lr": 2.1556736499763993e-05, "seqlen": 8192.0, "step_tflops": 61.60705556598635, "step_time": 0.33488199234008786} +{"epoch": 0, "iter": 21693, "iter_tflops": 37.594645619459286, "iter_time": 0.3997993354797363, "loss": 0.019288187846541405, "lr": 2.151432073934707e-05, "seqlen": 6016.0, "step_tflops": 41.500804789546535, "step_time": 0.36216922569274906} +{"epoch": 0, "iter": 21694, "iter_tflops": 35.25640088000719, "iter_time": 0.42631448364257807, "loss": 0.032419104129076004, "lr": 2.1471840685214387e-05, "seqlen": 6016.0, "step_tflops": 38.63798232472463, "step_time": 0.3890036029815674} +{"epoch": 0, "iter": 21695, "iter_tflops": 38.673490082461846, "iter_time": 0.3886464424133301, "loss": 0.04640493169426918, "lr": 2.1429296756626926e-05, "seqlen": 6016.0, "step_tflops": 42.37100451498134, "step_time": 0.3547311305999756} +{"epoch": 0, "iter": 21696, "iter_tflops": 42.506807554470875, "iter_time": 0.3535978164672851, "loss": 0.03136146068572998, "lr": 2.138668937347609e-05, "seqlen": 6016.0, "step_tflops": 46.50304860374652, "step_time": 0.32321137619018553} +{"epoch": 0, "iter": 21697, "iter_tflops": 27.482617487663333, "iter_time": 0.7506960906982422, "loss": 0.49857574701309204, "lr": 2.1344018956279547e-05, "seqlen": 8192.0, "step_tflops": 29.028614410622424, "step_time": 0.7107157516479493} +{"epoch": 0, "iter": 21698, "iter_tflops": 11.886295410529323, "iter_time": 1.7357042541503904, "loss": 0.509628415107727, "lr": 2.1301285926177098e-05, "seqlen": 8192.0, "step_tflops": 13.244253593332369, "step_time": 1.5577392387390137} +{"epoch": 0, "iter": 21699, "iter_tflops": 43.49196552890779, "iter_time": 0.47436562728881837, "loss": 0.43080276250839233, "lr": 2.1258490704926483e-05, "seqlen": 8192.0, "step_tflops": 46.970380478601754, "step_time": 0.43923624420166013} +{"epoch": 0, "iter": 21700, "iter_tflops": 45.13976486870425, "iter_time": 0.45704920196533205, "loss": 0.5587766766548157, "lr": 2.1215633714899264e-05, "seqlen": 8192.0, "step_tflops": 48.53529277146203, "step_time": 0.42507405090332034} +{"epoch": 0, "iter": 21701, "iter_tflops": 37.62372545020804, "iter_time": 0.5483532867431641, "loss": 0.007355010602623224, "lr": 2.1172715379076635e-05, "seqlen": 8192.0, "step_tflops": 40.70304312594988, "step_time": 0.5068685760498047} +{"epoch": 0, "iter": 21702, "iter_tflops": 19.1532687254542, "iter_time": 1.0771578369140624, "loss": 0.0008475929498672485, "lr": 2.1129736121045237e-05, "seqlen": 8192.0, "step_tflops": 22.937859730135592, "step_time": 0.8994341125488281} +{"epoch": 0, "iter": 21703, "iter_tflops": 37.5897343311209, "iter_time": 0.5488491439819336, "loss": 0.0025423055049031973, "lr": 2.1086696364993e-05, "seqlen": 8192.0, "step_tflops": 41.315890554144225, "step_time": 0.49935008621215815} +{"epoch": 0, "iter": 21704, "iter_tflops": 43.984291576905434, "iter_time": 0.4690559463500977, "loss": 0.0022515852469950914, "lr": 2.1043596535704943e-05, "seqlen": 8192.0, "step_tflops": 48.50742107319528, "step_time": 0.4253182926177979} +{"epoch": 0, "iter": 21705, "iter_tflops": 30.037004646205293, "iter_time": 0.6868558883666992, "loss": 0.08309372514486313, "lr": 2.100043705855897e-05, "seqlen": 8192.0, "step_tflops": 32.59024725340351, "step_time": 0.6330450134277343} +{"epoch": 0, "iter": 21706, "iter_tflops": 10.293037254039092, "iter_time": 2.0043737335205076, "loss": 0.1388518512248993, "lr": 2.0957218359521707e-05, "seqlen": 8192.0, "step_tflops": 11.319930264286254, "step_time": 1.8225459899902345} +{"epoch": 0, "iter": 21707, "iter_tflops": 12.289041213287623, "iter_time": 1.6788204345703126, "loss": 0.11665243655443192, "lr": 2.0913940865144266e-05, "seqlen": 8192.0, "step_tflops": 15.003623828667514, "step_time": 1.375074031829834} +{"epoch": 0, "iter": 21708, "iter_tflops": 38.639925966611194, "iter_time": 0.5339320144653321, "loss": 0.09452594071626663, "lr": 2.087060500255804e-05, "seqlen": 8192.0, "step_tflops": 42.39158368200859, "step_time": 0.4866789989471436} +{"epoch": 0, "iter": 21709, "iter_tflops": 12.07772104585342, "iter_time": 1.3835056304931643, "loss": 0.38357090950012207, "lr": 2.0827211199470495e-05, "seqlen": 6672.0, "step_tflops": 12.754361591826571, "step_time": 1.3101083068847656} +{"epoch": 0, "iter": 21710, "iter_tflops": 18.956757464912126, "iter_time": 0.8814585037231445, "loss": 0.42352771759033203, "lr": 2.078375988416095e-05, "seqlen": 6672.0, "step_tflops": 23.048361901619852, "step_time": 0.7249797248840333} +{"epoch": 0, "iter": 21711, "iter_tflops": 28.043568493515174, "iter_time": 0.595844108581543, "loss": 0.27523839473724365, "lr": 2.074025148547635e-05, "seqlen": 6672.0, "step_tflops": 29.81919810675076, "step_time": 0.5603636627197266} +{"epoch": 0, "iter": 21712, "iter_tflops": 28.625922735599868, "iter_time": 0.5837224960327148, "loss": 0.30141958594322205, "lr": 2.069668643282702e-05, "seqlen": 6672.0, "step_tflops": 30.500349169719403, "step_time": 0.5478493041992187} +{"epoch": 0, "iter": 21713, "iter_tflops": 27.611709629532356, "iter_time": 0.642388313293457, "loss": 0.20971663296222687, "lr": 2.065306515618244e-05, "seqlen": 7072.0, "step_tflops": 29.50039827391757, "step_time": 0.6012610206604005} +{"epoch": 0, "iter": 21714, "iter_tflops": 10.956578120507944, "iter_time": 1.618884963989258, "loss": 0.21946370601654053, "lr": 2.0609388086067004e-05, "seqlen": 7072.0, "step_tflops": 13.938914001886355, "step_time": 1.2725123043060302} +{"epoch": 0, "iter": 21715, "iter_tflops": 33.06485504488643, "iter_time": 0.5364438934326173, "loss": 0.19824080169200897, "lr": 2.0565655653555764e-05, "seqlen": 7072.0, "step_tflops": 36.28432901619446, "step_time": 0.48884573745727533} +{"epoch": 0, "iter": 21716, "iter_tflops": 33.141028798836714, "iter_time": 0.5352108917236329, "loss": 0.11195050925016403, "lr": 2.052186829027017e-05, "seqlen": 7072.0, "step_tflops": 36.379010184941286, "step_time": 0.4875734519958496} +{"epoch": 0, "iter": 21717, "iter_tflops": 15.46753827399973, "iter_time": 1.3338317413330079, "loss": 0.4287037253379822, "lr": 2.047802642837382e-05, "seqlen": 8192.0, "step_tflops": 16.67650118700918, "step_time": 1.237135612487793} +{"epoch": 0, "iter": 21718, "iter_tflops": 23.803675920118913, "iter_time": 0.8667188034057617, "loss": 0.4435316026210785, "lr": 2.0434130500568188e-05, "seqlen": 8192.0, "step_tflops": 28.483924823342303, "step_time": 0.7243065567016602} +{"epoch": 0, "iter": 21719, "iter_tflops": 46.35532564285091, "iter_time": 0.4450641479492187, "loss": 0.45900097489356995, "lr": 2.039018094008836e-05, "seqlen": 8192.0, "step_tflops": 50.14022657753693, "step_time": 0.4114678955078125} +{"epoch": 0, "iter": 21720, "iter_tflops": 43.81485600144891, "iter_time": 0.4708698234558105, "loss": 0.5190407633781433, "lr": 2.034617818069876e-05, "seqlen": 8192.0, "step_tflops": 47.168343236198716, "step_time": 0.43739279556274413} +{"epoch": 0, "iter": 21721, "iter_tflops": 28.9853540001814, "iter_time": 0.7117764892578126, "loss": 0.043050769716501236, "lr": 2.030212265668886e-05, "seqlen": 8192.0, "step_tflops": 30.58071263650149, "step_time": 0.6746439743041991} +{"epoch": 0, "iter": 21722, "iter_tflops": 15.005826450230213, "iter_time": 1.3748721923828124, "loss": 0.016933035105466843, "lr": 2.0258014802868886e-05, "seqlen": 8192.0, "step_tflops": 18.686651811661292, "step_time": 1.1040551147460937} +{"epoch": 0, "iter": 21723, "iter_tflops": 43.03104662274117, "iter_time": 0.4794467048645019, "loss": 0.01734570600092411, "lr": 2.0213855054565554e-05, "seqlen": 8192.0, "step_tflops": 48.71986365799639, "step_time": 0.423463695526123} +{"epoch": 0, "iter": 21724, "iter_tflops": 53.56363596239645, "iter_time": 0.3851697731018066, "loss": 0.013481670059263706, "lr": 2.0169643847617754e-05, "seqlen": 8192.0, "step_tflops": 58.51223581145256, "step_time": 0.3525945167541504} +{"epoch": 0, "iter": 21725, "iter_tflops": 25.3092217794412, "iter_time": 0.815161117553711, "loss": 0.2490440011024475, "lr": 2.0125381618372253e-05, "seqlen": 8192.0, "step_tflops": 26.689235597260918, "step_time": 0.7730117797851562} +{"epoch": 0, "iter": 21726, "iter_tflops": 17.422549544378715, "iter_time": 1.184160415649414, "loss": 0.22466963529586792, "lr": 2.0081068803679374e-05, "seqlen": 8192.0, "step_tflops": 20.318616115360467, "step_time": 1.0153788719177246} +{"epoch": 0, "iter": 21727, "iter_tflops": 48.78413382956963, "iter_time": 0.4229058074951172, "loss": 0.2689320147037506, "lr": 2.003670584088871e-05, "seqlen": 8192.0, "step_tflops": 53.02436937717249, "step_time": 0.3890870132446289} +{"epoch": 0, "iter": 21728, "iter_tflops": 44.73882095889761, "iter_time": 0.46114522171020506, "loss": 0.2712453305721283, "lr": 1.9992293167844802e-05, "seqlen": 8192.0, "step_tflops": 48.1050497943053, "step_time": 0.42887583732604984} +{"epoch": 0, "iter": 21729, "iter_tflops": 29.19419454566551, "iter_time": 0.7066847991943359, "loss": 0.23621824383735657, "lr": 1.9947831222882792e-05, "seqlen": 8192.0, "step_tflops": 30.921488482268195, "step_time": 0.6672089385986327} +{"epoch": 0, "iter": 21730, "iter_tflops": 14.992029678848741, "iter_time": 1.376137451171875, "loss": 0.2624080777168274, "lr": 1.9903320444824137e-05, "seqlen": 8192.0, "step_tflops": 18.862034633194405, "step_time": 1.0937893981933593} +{"epoch": 0, "iter": 21731, "iter_tflops": 35.845007918665175, "iter_time": 0.5755639266967774, "loss": 0.12968195974826813, "lr": 1.985876127297224e-05, "seqlen": 8192.0, "step_tflops": 39.39620494344473, "step_time": 0.5236822566986085} +{"epoch": 0, "iter": 21732, "iter_tflops": 40.97191945135147, "iter_time": 0.5035422744750976, "loss": 0.1760932058095932, "lr": 1.981415414710814e-05, "seqlen": 8192.0, "step_tflops": 44.81453394166094, "step_time": 0.46036612892150874} +{"epoch": 0, "iter": 21733, "iter_tflops": 25.268575822561573, "iter_time": 0.8164723510742188, "loss": 0.03645367920398712, "lr": 1.976949950748616e-05, "seqlen": 8192.0, "step_tflops": 26.875784794064412, "step_time": 0.7676461791992187} +{"epoch": 0, "iter": 21734, "iter_tflops": 11.728093281382643, "iter_time": 1.7591174468994142, "loss": 0.030545327812433243, "lr": 1.972479779482957e-05, "seqlen": 8192.0, "step_tflops": 15.169945186539072, "step_time": 1.3599978942871094} +{"epoch": 0, "iter": 21735, "iter_tflops": 15.719701252062324, "iter_time": 1.3124354705810546, "loss": 0.04872637242078781, "lr": 1.9680049450326224e-05, "seqlen": 8192.0, "step_tflops": 18.8863222326893, "step_time": 1.092382797241211} +{"epoch": 0, "iter": 21736, "iter_tflops": 23.46025528136956, "iter_time": 0.8794061813354492, "loss": 0.053002454340457916, "lr": 1.963525491562421e-05, "seqlen": 8192.0, "step_tflops": 26.931496918471307, "step_time": 0.7660581798553467} +{"epoch": 0, "iter": 21737, "iter_tflops": 18.029269699070273, "iter_time": 0.824598388671875, "loss": 0.2116127461194992, "lr": 1.9590414632827513e-05, "seqlen": 5952.0, "step_tflops": 18.995647511432207, "step_time": 0.782648063659668} +{"epoch": 0, "iter": 21738, "iter_tflops": 10.273552355457907, "iter_time": 1.4471047821044922, "loss": 0.3931446075439453, "lr": 1.9545529044491612e-05, "seqlen": 5952.0, "step_tflops": 11.609624339308652, "step_time": 1.28056742477417} +{"epoch": 0, "iter": 21739, "iter_tflops": 25.323477679402707, "iter_time": 0.5870799789428711, "loss": 0.38743647933006287, "lr": 1.9500598593619143e-05, "seqlen": 5952.0, "step_tflops": 27.013019283062874, "step_time": 0.5503607940673828} +{"epoch": 0, "iter": 21740, "iter_tflops": 24.629742453842034, "iter_time": 0.6036160049438476, "loss": 0.34881770610809326, "lr": 1.9455623723655524e-05, "seqlen": 5952.0, "step_tflops": 26.083208101552753, "step_time": 0.5699799919128418} +{"epoch": 0, "iter": 21741, "iter_tflops": 31.268103647283045, "iter_time": 0.6598127517700195, "loss": 0.04169798642396927, "lr": 1.941060487848456e-05, "seqlen": 8192.0, "step_tflops": 33.51444496022671, "step_time": 0.6155881004333495} +{"epoch": 0, "iter": 21742, "iter_tflops": 16.950079020477283, "iter_time": 1.2171679840087892, "loss": 0.0464448481798172, "lr": 1.9365542502424075e-05, "seqlen": 8192.0, "step_tflops": 20.807945889385056, "step_time": 0.9915007286071776} +{"epoch": 0, "iter": 21743, "iter_tflops": 51.125997022493735, "iter_time": 0.40353430175781246, "loss": 0.06233390048146248, "lr": 1.932043704022154e-05, "seqlen": 8192.0, "step_tflops": 55.74788017542967, "step_time": 0.3700785293579102} +{"epoch": 0, "iter": 21744, "iter_tflops": 54.690897935270684, "iter_time": 0.377230842590332, "loss": 0.04930006340146065, "lr": 1.927528893704964e-05, "seqlen": 8192.0, "step_tflops": 59.63503676685293, "step_time": 0.3459559116363526} +{"epoch": 0, "iter": 21745, "iter_tflops": 36.921032260788806, "iter_time": 0.5587897262573241, "loss": 0.6899735331535339, "lr": 1.9230098638501942e-05, "seqlen": 8192.0, "step_tflops": 39.879527956763624, "step_time": 0.51733544921875} +{"epoch": 0, "iter": 21746, "iter_tflops": 41.161278578187705, "iter_time": 0.5012257690429688, "loss": 0.460168719291687, "lr": 1.918486659058844e-05, "seqlen": 8192.0, "step_tflops": 44.62742374246068, "step_time": 0.46229631423950185} +{"epoch": 0, "iter": 21747, "iter_tflops": 45.41637813918882, "iter_time": 0.454265495300293, "loss": 0.6594781279563904, "lr": 1.913959323973119e-05, "seqlen": 8192.0, "step_tflops": 48.85659050119919, "step_time": 0.4222786178588867} +{"epoch": 0, "iter": 21748, "iter_tflops": 43.55573305596738, "iter_time": 0.47367113494873053, "loss": 0.5908020734786987, "lr": 1.909427903275988e-05, "seqlen": 8192.0, "step_tflops": 46.77949031159969, "step_time": 0.4410286083221436} +{"epoch": 0, "iter": 21749, "iter_tflops": 28.74467465262091, "iter_time": 0.7177361984252929, "loss": 0.39407384395599365, "lr": 1.904892441690745e-05, "seqlen": 8192.0, "step_tflops": 30.442301226383446, "step_time": 0.6777113647460937} +{"epoch": 0, "iter": 21750, "iter_tflops": 15.820739082374194, "iter_time": 1.3040537109375, "loss": 0.4708213210105896, "lr": 1.9003529839805637e-05, "seqlen": 8192.0, "step_tflops": 18.657527465622614, "step_time": 1.1057785415649413} +{"epoch": 0, "iter": 21751, "iter_tflops": 39.32821645517592, "iter_time": 0.5245875701904297, "loss": 0.4956071078777313, "lr": 1.8958095749480597e-05, "seqlen": 8192.0, "step_tflops": 43.39553589265145, "step_time": 0.4754197196960449} +{"epoch": 0, "iter": 21752, "iter_tflops": 45.43606342871205, "iter_time": 0.4540686836242676, "loss": 0.4745573103427887, "lr": 1.8912622594348456e-05, "seqlen": 8192.0, "step_tflops": 49.25354725509634, "step_time": 0.41887528228759763} +{"epoch": 0, "iter": 21753, "iter_tflops": 28.561168127180103, "iter_time": 0.7223476791381835, "loss": 0.11309892684221268, "lr": 1.88671108232109e-05, "seqlen": 8192.0, "step_tflops": 30.30238502375472, "step_time": 0.6808405838012694} +{"epoch": 0, "iter": 21754, "iter_tflops": 19.291714096920927, "iter_time": 1.069427703857422, "loss": 0.08294045925140381, "lr": 1.8821560885250735e-05, "seqlen": 8192.0, "step_tflops": 23.48752857572209, "step_time": 0.87838503074646} +{"epoch": 0, "iter": 21755, "iter_tflops": 40.81333838310793, "iter_time": 0.5054987983703614, "loss": 0.058305248618125916, "lr": 1.8775973230027457e-05, "seqlen": 8192.0, "step_tflops": 44.940038459395566, "step_time": 0.45908045959472665} +{"epoch": 0, "iter": 21756, "iter_tflops": 46.444788597868865, "iter_time": 0.4442068557739258, "loss": 0.1059492975473404, "lr": 1.8730348307472828e-05, "seqlen": 8192.0, "step_tflops": 50.82716834025384, "step_time": 0.4059068050384522} +{"epoch": 0, "iter": 21757, "iter_tflops": 15.926967673646608, "iter_time": 0.5672463760375976, "loss": 0.0012937409337610006, "lr": 1.8684686567886398e-05, "seqlen": 3648.0, "step_tflops": 17.925012514138963, "step_time": 0.5040172042846679} +{"epoch": 0, "iter": 21758, "iter_tflops": 18.289483724306987, "iter_time": 0.4939731941223145, "loss": 0.014234298840165138, "lr": 1.863898846193111e-05, "seqlen": 3648.0, "step_tflops": 20.30400037801869, "step_time": 0.44496229934692383} +{"epoch": 0, "iter": 21759, "iter_tflops": 21.22335126549238, "iter_time": 0.42568746948242187, "loss": 0.009234238415956497, "lr": 1.859325444062883e-05, "seqlen": 3648.0, "step_tflops": 23.536797946725287, "step_time": 0.38384638023376466} +{"epoch": 0, "iter": 21760, "iter_tflops": 22.788347461356803, "iter_time": 0.3964532623291016, "loss": 0.006394963711500168, "lr": 1.8547484955355872e-05, "seqlen": 3648.0, "step_tflops": 25.176992534581913, "step_time": 0.35884010696411134} +{"epoch": 0, "iter": 21761, "iter_tflops": 17.768103732847365, "iter_time": 0.6878682708740234, "loss": 0.011298947036266327, "lr": 1.8501680457838582e-05, "seqlen": 4912.0, "step_tflops": 19.151602679380225, "step_time": 0.6381771278381347} +{"epoch": 0, "iter": 21762, "iter_tflops": 25.16155662900808, "iter_time": 0.4857455749511718, "loss": 0.00262078526429832, "lr": 1.845584140014886e-05, "seqlen": 4912.0, "step_tflops": 28.079149402738793, "step_time": 0.43527368354797363} +{"epoch": 0, "iter": 21763, "iter_tflops": 23.189904371273176, "iter_time": 0.5270446395874023, "loss": 0.0014076391234993935, "lr": 1.8409968234699698e-05, "seqlen": 4912.0, "step_tflops": 25.715511024337474, "step_time": 0.4752818164825439} +{"epoch": 0, "iter": 21764, "iter_tflops": 26.789529524157558, "iter_time": 0.45622730255126953, "loss": 0.0022277410607784986, "lr": 1.836406141424072e-05, "seqlen": 4912.0, "step_tflops": 29.685311771682773, "step_time": 0.411722635269165} +{"epoch": 0, "iter": 21765, "iter_tflops": 31.43706478378684, "iter_time": 0.6562665328979492, "loss": 0.004483937751501799, "lr": 1.8318121391853712e-05, "seqlen": 8192.0, "step_tflops": 34.379612232194, "step_time": 0.6000967483520507} +{"epoch": 0, "iter": 21766, "iter_tflops": 12.257643189548565, "iter_time": 1.6831207427978516, "loss": 0.0023040836676955223, "lr": 1.8272148620948143e-05, "seqlen": 8192.0, "step_tflops": 14.090370680847323, "step_time": 1.4641980667114258} +{"epoch": 0, "iter": 21767, "iter_tflops": 41.24654774622573, "iter_time": 0.500189582824707, "loss": 0.006484858226031065, "lr": 1.8226143555256703e-05, "seqlen": 8192.0, "step_tflops": 46.904471597784514, "step_time": 0.43985344696044926} +{"epoch": 0, "iter": 21768, "iter_tflops": 48.11293237023909, "iter_time": 0.42880557250976564, "loss": 0.010394028387963772, "lr": 1.8180106648830824e-05, "seqlen": 8192.0, "step_tflops": 53.176724129373476, "step_time": 0.38797225379943845} +{"epoch": 0, "iter": 21769, "iter_tflops": 13.324331415596086, "iter_time": 1.1587195739746092, "loss": 0.17352820932865143, "lr": 1.8134038356036174e-05, "seqlen": 6176.0, "step_tflops": 14.193001181979705, "step_time": 1.0878011932373046} +{"epoch": 0, "iter": 21770, "iter_tflops": 18.959629244650284, "iter_time": 0.8143178024291993, "loss": 0.4640788435935974, "lr": 1.808793913154822e-05, "seqlen": 6176.0, "step_tflops": 22.86014266688495, "step_time": 0.675374771118164} +{"epoch": 0, "iter": 21771, "iter_tflops": 28.073022442691386, "iter_time": 0.5499644241333008, "loss": 0.38075751066207886, "lr": 1.8041809430347688e-05, "seqlen": 6176.0, "step_tflops": 29.93831570487294, "step_time": 0.5156991386413574} +{"epoch": 0, "iter": 21772, "iter_tflops": 26.425173089461165, "iter_time": 0.5842596969604492, "loss": 0.2406502366065979, "lr": 1.7995649707716104e-05, "seqlen": 6176.0, "step_tflops": 28.035675281350255, "step_time": 0.5506970481872558} +{"epoch": 0, "iter": 21773, "iter_tflops": 26.202571633051296, "iter_time": 0.7873690338134766, "loss": 0.21877726912498474, "lr": 1.79494604192313e-05, "seqlen": 8192.0, "step_tflops": 27.706183986888323, "step_time": 0.7446385803222656} +{"epoch": 0, "iter": 21774, "iter_tflops": 13.151164191800397, "iter_time": 1.5687655639648437, "loss": 0.19008535146713257, "lr": 1.790324202076291e-05, "seqlen": 8192.0, "step_tflops": 16.37150291670569, "step_time": 1.2601832351684572} +{"epoch": 0, "iter": 21775, "iter_tflops": 42.20284303059615, "iter_time": 0.48885553741455073, "loss": 0.21863459050655365, "lr": 1.7856994968467852e-05, "seqlen": 8192.0, "step_tflops": 45.447085068498566, "step_time": 0.45395856475830076} +{"epoch": 0, "iter": 21776, "iter_tflops": 45.17938879786315, "iter_time": 0.4566483535766601, "loss": 0.1629285216331482, "lr": 1.781071971878587e-05, "seqlen": 8192.0, "step_tflops": 48.877840640849854, "step_time": 0.422095027923584} +{"epoch": 0, "iter": 21777, "iter_tflops": 23.30770852791675, "iter_time": 0.8851618118286132, "loss": 0.3400243818759918, "lr": 1.7764416728435e-05, "seqlen": 8192.0, "step_tflops": 24.427539239686112, "step_time": 0.8445833740234374} +{"epoch": 0, "iter": 21778, "iter_tflops": 16.525691086649633, "iter_time": 1.2484254608154297, "loss": 0.39220842719078064, "lr": 1.771808645440706e-05, "seqlen": 8192.0, "step_tflops": 19.419230317378858, "step_time": 1.06240531539917} +{"epoch": 0, "iter": 21779, "iter_tflops": 34.570428487480704, "iter_time": 0.5967844314575196, "loss": 0.36059772968292236, "lr": 1.767172935396315e-05, "seqlen": 8192.0, "step_tflops": 37.366688633898555, "step_time": 0.5521252822875977} +{"epoch": 0, "iter": 21780, "iter_tflops": 34.4007654810306, "iter_time": 0.5997277450561525, "loss": 0.44959917664527893, "lr": 1.7625345884629144e-05, "seqlen": 8192.0, "step_tflops": 37.36206820681688, "step_time": 0.5521935615539552} +{"epoch": 0, "iter": 21781, "iter_tflops": 24.887715039686626, "iter_time": 0.6681005020141602, "loss": 0.023285269737243652, "lr": 1.757893650419114e-05, "seqlen": 6640.0, "step_tflops": 27.44776607532742, "step_time": 0.6057868194580077} +{"epoch": 0, "iter": 21782, "iter_tflops": 42.843720570227575, "iter_time": 0.38809642791748045, "loss": 0.00953179132193327, "lr": 1.7532501670691006e-05, "seqlen": 6640.0, "step_tflops": 47.33122073500331, "step_time": 0.3513007831573486} +{"epoch": 0, "iter": 21783, "iter_tflops": 39.995499322548426, "iter_time": 0.4157341499328613, "loss": 0.002931124297901988, "lr": 1.74860418424218e-05, "seqlen": 6640.0, "step_tflops": 43.51135589415655, "step_time": 0.3821415023803711} +{"epoch": 0, "iter": 21784, "iter_tflops": 47.321931974348125, "iter_time": 0.35136973953247075, "loss": 0.006766212172806263, "lr": 1.7439557477923257e-05, "seqlen": 6640.0, "step_tflops": 51.84579032208258, "step_time": 0.3207106075286865} +{"epoch": 0, "iter": 21785, "iter_tflops": 20.476735569039263, "iter_time": 0.7939877777099609, "loss": 0.02212083525955677, "lr": 1.7393049035977296e-05, "seqlen": 6496.0, "step_tflops": 21.60152044917879, "step_time": 0.7526450653076172} +{"epoch": 0, "iter": 21786, "iter_tflops": 10.832133111562046, "iter_time": 1.5009303894042967, "loss": 0.013961263000965118, "lr": 1.7346516975603466e-05, "seqlen": 6496.0, "step_tflops": 13.503887115971935, "step_time": 1.2039702072143557} +{"epoch": 0, "iter": 21787, "iter_tflops": 11.595647241269631, "iter_time": 1.402101791381836, "loss": 0.044958487153053284, "lr": 1.729996175605441e-05, "seqlen": 6496.0, "step_tflops": 12.698275510983231, "step_time": 1.2803532066345216} +{"epoch": 0, "iter": 21788, "iter_tflops": 32.50530000295581, "iter_time": 0.5001731338500977, "loss": 0.027688872069120407, "lr": 1.7253383836811357e-05, "seqlen": 6496.0, "step_tflops": 43.913284422992525, "step_time": 0.3702359771728515} +{"epoch": 0, "iter": 21789, "iter_tflops": 23.68386651410563, "iter_time": 0.7194004592895509, "loss": 0.41592690348625183, "lr": 1.720678367757956e-05, "seqlen": 6800.0, "step_tflops": 25.21741310031554, "step_time": 0.6756515579223632} +{"epoch": 0, "iter": 21790, "iter_tflops": 9.458378033889462, "iter_time": 1.8013854370117186, "loss": 0.31127500534057617, "lr": 1.7160161738283782e-05, "seqlen": 6800.0, "step_tflops": 11.70852211810262, "step_time": 1.4551951370239258} +{"epoch": 0, "iter": 21791, "iter_tflops": 25.429406174212872, "iter_time": 0.6700189666748047, "loss": 0.1816328465938568, "lr": 1.711351847906374e-05, "seqlen": 6800.0, "step_tflops": 27.381883752328832, "step_time": 0.6222429618835449} +{"epoch": 0, "iter": 21792, "iter_tflops": 27.69061364911421, "iter_time": 0.6153054122924805, "loss": 0.2958318889141083, "lr": 1.706685436026957e-05, "seqlen": 6800.0, "step_tflops": 29.622016279472874, "step_time": 0.5751865196228028} +{"epoch": 0, "iter": 21793, "iter_tflops": 18.233756803857446, "iter_time": 1.1314779357910156, "loss": 0.33482882380485535, "lr": 1.702016984245729e-05, "seqlen": 8192.0, "step_tflops": 19.700120196184457, "step_time": 1.0472572402954101} +{"epoch": 0, "iter": 21794, "iter_tflops": 14.505730980771988, "iter_time": 1.4222718963623047, "loss": 0.28378528356552124, "lr": 1.697346538638424e-05, "seqlen": 8192.0, "step_tflops": 16.60197080204937, "step_time": 1.2426894226074219} +{"epoch": 0, "iter": 21795, "iter_tflops": 34.9549688863656, "iter_time": 0.5902191925048829, "loss": 0.39755576848983765, "lr": 1.6926741453004544e-05, "seqlen": 8192.0, "step_tflops": 38.00138269081676, "step_time": 0.5429037590026855} +{"epoch": 0, "iter": 21796, "iter_tflops": 41.51167831309872, "iter_time": 0.49699492645263665, "loss": 0.36879101395606995, "lr": 1.6879998503464565e-05, "seqlen": 8192.0, "step_tflops": 45.14603961576406, "step_time": 0.45698567771911625} +{"epoch": 0, "iter": 21797, "iter_tflops": 19.90833609531737, "iter_time": 1.0363042602539063, "loss": 0.21647979319095612, "lr": 1.683323699909834e-05, "seqlen": 8192.0, "step_tflops": 20.9981055412775, "step_time": 0.9825216598510742} +{"epoch": 0, "iter": 21798, "iter_tflops": 10.027127381688064, "iter_time": 2.057527816772461, "loss": 0.19086167216300964, "lr": 1.6786457401423047e-05, "seqlen": 8192.0, "step_tflops": 11.682951446708412, "step_time": 1.7659145126342772} +{"epoch": 0, "iter": 21799, "iter_tflops": 13.269267315439471, "iter_time": 1.5548027648925782, "loss": 0.20070859789848328, "lr": 1.673966017213442e-05, "seqlen": 8192.0, "step_tflops": 17.496591356175877, "step_time": 1.1791493034362792} +{"epoch": 0, "iter": 21800, "iter_tflops": 39.19829248924332, "iter_time": 0.5263263320922852, "loss": 0.16522066295146942, "lr": 1.6692845773102225e-05, "seqlen": 8192.0, "step_tflops": 44.09041120625408, "step_time": 0.4679269924163818} +{"epoch": 0, "iter": 21801, "iter_tflops": 20.27990652838991, "iter_time": 0.7774484939575195, "loss": 0.29758065938949585, "lr": 1.6646014666365676e-05, "seqlen": 6304.0, "step_tflops": 21.41658455167734, "step_time": 0.736185676574707} +{"epoch": 0, "iter": 21802, "iter_tflops": 9.509110275576047, "iter_time": 1.658050262451172, "loss": 0.3602340817451477, "lr": 1.65991673141289e-05, "seqlen": 6304.0, "step_tflops": 11.07283972072916, "step_time": 1.423896957397461} +{"epoch": 0, "iter": 21803, "iter_tflops": 28.617934856887494, "iter_time": 0.5509336318969728, "loss": 0.43557363748550415, "lr": 1.6552304178756337e-05, "seqlen": 6304.0, "step_tflops": 30.56431308857552, "step_time": 0.515849407196045} +{"epoch": 0, "iter": 21804, "iter_tflops": 28.310065717406324, "iter_time": 0.5569249801635743, "loss": 0.36840084195137024, "lr": 1.650542572276822e-05, "seqlen": 6304.0, "step_tflops": 30.04582633984743, "step_time": 0.5247511787414552} +{"epoch": 0, "iter": 21805, "iter_tflops": 35.65139716088612, "iter_time": 0.5786896209716798, "loss": 0.1418629139661789, "lr": 1.6458532408835996e-05, "seqlen": 8192.0, "step_tflops": 38.19992065592811, "step_time": 0.540082103729248} +{"epoch": 0, "iter": 21806, "iter_tflops": 11.69698735972277, "iter_time": 1.7637954864501952, "loss": 0.16314519941806793, "lr": 1.641162469977772e-05, "seqlen": 8192.0, "step_tflops": 14.69595688421018, "step_time": 1.4038618698120118} +{"epoch": 0, "iter": 21807, "iter_tflops": 17.08113522042774, "iter_time": 1.2078291778564454, "loss": 0.15664143860340118, "lr": 1.6364703058553552e-05, "seqlen": 8192.0, "step_tflops": 20.13235306212296, "step_time": 1.0247730827331545} +{"epoch": 0, "iter": 21808, "iter_tflops": 18.42980041998393, "iter_time": 1.1194420471191406, "loss": 0.10463845729827881, "lr": 1.631776794826115e-05, "seqlen": 8192.0, "step_tflops": 22.075169095538463, "step_time": 0.934583713531494} +{"epoch": 0, "iter": 21809, "iter_tflops": 19.701964007603394, "iter_time": 0.7877889785766601, "loss": 0.3986832797527313, "lr": 1.6270819832131102e-05, "seqlen": 6208.0, "step_tflops": 20.828874955854978, "step_time": 0.7451669921874999} +{"epoch": 0, "iter": 21810, "iter_tflops": 10.358018134320265, "iter_time": 1.4984517211914063, "loss": 0.31091269850730896, "lr": 1.6223859173522367e-05, "seqlen": 6208.0, "step_tflops": 12.767881869183336, "step_time": 1.215627639770508} +{"epoch": 0, "iter": 21811, "iter_tflops": 10.442078098313832, "iter_time": 1.4863890075683595, "loss": 0.24359141290187836, "lr": 1.6176886435917676e-05, "seqlen": 6208.0, "step_tflops": 11.931424303911998, "step_time": 1.3008497314453127} +{"epoch": 0, "iter": 21812, "iter_tflops": 19.01695610488185, "iter_time": 0.8161658477783204, "loss": 0.36254414916038513, "lr": 1.612990208291899e-05, "seqlen": 6208.0, "step_tflops": 22.626906688335477, "step_time": 0.6859528045654296} +{"epoch": 0, "iter": 21813, "iter_tflops": 12.570783507822565, "iter_time": 1.2998627319335938, "loss": 0.222386434674263, "lr": 1.6082906578242913e-05, "seqlen": 6528.0, "step_tflops": 13.30045391522703, "step_time": 1.228551528930664} +{"epoch": 0, "iter": 21814, "iter_tflops": 14.047167859870155, "iter_time": 1.1632446594238282, "loss": 0.29037144780158997, "lr": 1.603590038571609e-05, "seqlen": 6528.0, "step_tflops": 17.935633912665693, "step_time": 0.9110518798828124} +{"epoch": 0, "iter": 21815, "iter_tflops": 28.723624459461206, "iter_time": 0.5688799133300781, "loss": 0.23245026171207428, "lr": 1.598888396927067e-05, "seqlen": 6528.0, "step_tflops": 30.72623701380654, "step_time": 0.5318026084899902} +{"epoch": 0, "iter": 21816, "iter_tflops": 28.400008412000886, "iter_time": 0.5753622589111328, "loss": 0.430728018283844, "lr": 1.5941857792939702e-05, "seqlen": 6528.0, "step_tflops": 30.211261800064786, "step_time": 0.5408676109313966} +{"epoch": 0, "iter": 21817, "iter_tflops": 36.92466414437029, "iter_time": 0.558734764099121, "loss": 0.03538302704691887, "lr": 1.5894822320852563e-05, "seqlen": 8192.0, "step_tflops": 39.695593015949655, "step_time": 0.519732593536377} +{"epoch": 0, "iter": 21818, "iter_tflops": 35.43753248685492, "iter_time": 0.5821819992065429, "loss": 0.01727728359401226, "lr": 1.5847778017230372e-05, "seqlen": 8192.0, "step_tflops": 40.82269568772509, "step_time": 0.5053829288482666} +{"epoch": 0, "iter": 21819, "iter_tflops": 44.596906479988036, "iter_time": 0.4626126594543457, "loss": 0.03886101767420769, "lr": 1.5800725346381406e-05, "seqlen": 8192.0, "step_tflops": 49.017993747021414, "step_time": 0.4208881664276123} +{"epoch": 0, "iter": 21820, "iter_tflops": 42.804485145987464, "iter_time": 0.4819843864440918, "loss": 0.023204073309898376, "lr": 1.5753664772696546e-05, "seqlen": 8192.0, "step_tflops": 47.32846371264444, "step_time": 0.4359130191802979} +{"epoch": 0, "iter": 21821, "iter_tflops": 23.206148662321663, "iter_time": 0.8890356521606445, "loss": 0.04797320067882538, "lr": 1.570659676064464e-05, "seqlen": 8192.0, "step_tflops": 25.185291149529736, "step_time": 0.8191723251342773} +{"epoch": 0, "iter": 21822, "iter_tflops": 28.697560684526504, "iter_time": 0.718914535522461, "loss": 0.05896677076816559, "lr": 1.5659521774767974e-05, "seqlen": 8192.0, "step_tflops": 36.5444681283769, "step_time": 0.5645476474761962} +{"epoch": 0, "iter": 21823, "iter_tflops": 54.17355747626088, "iter_time": 0.38083327865600586, "loss": 0.06909094750881195, "lr": 1.561244027967765e-05, "seqlen": 8192.0, "step_tflops": 59.31256220055764, "step_time": 0.34783682823181156} +{"epoch": 0, "iter": 21824, "iter_tflops": 55.35111901331685, "iter_time": 0.37273128128051763, "loss": 0.04936220124363899, "lr": 1.556535274004902e-05, "seqlen": 8192.0, "step_tflops": 60.285481901594046, "step_time": 0.3422232494354248} +{"epoch": 0, "iter": 21825, "iter_tflops": 42.0281764444713, "iter_time": 0.490887191772461, "loss": 0.13724258542060852, "lr": 1.5518259620617088e-05, "seqlen": 8192.0, "step_tflops": 45.70595911213281, "step_time": 0.45138738822937013} +{"epoch": 0, "iter": 21826, "iter_tflops": 36.187741430925136, "iter_time": 0.5701127700805664, "loss": 0.1328023076057434, "lr": 1.5471161386171925e-05, "seqlen": 8192.0, "step_tflops": 39.45016220135728, "step_time": 0.5229659996032715} +{"epoch": 0, "iter": 21827, "iter_tflops": 42.082855027190995, "iter_time": 0.4902493782043457, "loss": 0.23142953217029572, "lr": 1.5424058501554102e-05, "seqlen": 8192.0, "step_tflops": 46.212315922631944, "step_time": 0.44644145393371576} +{"epoch": 0, "iter": 21828, "iter_tflops": 39.750661680489614, "iter_time": 0.519012580871582, "loss": 0.129298597574234, "lr": 1.5376951431650065e-05, "seqlen": 8192.0, "step_tflops": 43.540709290174966, "step_time": 0.4738345756530762} +{"epoch": 0, "iter": 21829, "iter_tflops": 27.473626978233675, "iter_time": 0.750941749572754, "loss": 0.5832899808883667, "lr": 1.5329840641387577e-05, "seqlen": 8192.0, "step_tflops": 29.429690620810593, "step_time": 0.7010299148559571} +{"epoch": 0, "iter": 21830, "iter_tflops": 10.333045970871659, "iter_time": 1.9966129608154297, "loss": 0.5613319873809814, "lr": 1.5282726595731126e-05, "seqlen": 8192.0, "step_tflops": 11.920744242456827, "step_time": 1.7306883773803712} +{"epoch": 0, "iter": 21831, "iter_tflops": 11.998246657484621, "iter_time": 1.719509033203125, "loss": 0.6377318501472473, "lr": 1.5235609759677311e-05, "seqlen": 8192.0, "step_tflops": 14.935587918180733, "step_time": 1.381337890625} +{"epoch": 0, "iter": 21832, "iter_tflops": 39.978026512149164, "iter_time": 0.5160608291625977, "loss": 0.49376657605171204, "lr": 1.518849059825029e-05, "seqlen": 8192.0, "step_tflops": 43.22883223577955, "step_time": 0.47725308418273926} +{"epoch": 0, "iter": 21833, "iter_tflops": 21.801474745475886, "iter_time": 0.7363411178588867, "loss": 0.3988458812236786, "lr": 1.5141369576497164e-05, "seqlen": 6416.0, "step_tflops": 23.085581582342726, "step_time": 0.695383056640625} +{"epoch": 0, "iter": 21834, "iter_tflops": 7.073115753645091, "iter_time": 2.269625274658203, "loss": 0.3819912374019623, "lr": 1.5094247159483387e-05, "seqlen": 6416.0, "step_tflops": 9.00962179606682, "step_time": 1.781797576904297} +{"epoch": 0, "iter": 21835, "iter_tflops": 8.557732312568131, "iter_time": 1.875885070800781, "loss": 0.19728179275989532, "lr": 1.5047123812288194e-05, "seqlen": 6416.0, "step_tflops": 10.768731304417875, "step_time": 1.4907347793579102} +{"epoch": 0, "iter": 21836, "iter_tflops": 17.819409399576298, "iter_time": 0.9008896942138671, "loss": 0.35902756452560425, "lr": 1.5e-05, "seqlen": 6416.0, "step_tflops": 21.153639102362277, "step_time": 0.7588917541503906} +{"epoch": 0, "iter": 21837, "iter_tflops": 19.430336789539375, "iter_time": 0.7483307266235352, "loss": 0.2756272554397583, "lr": 1.4952876187711806e-05, "seqlen": 5824.0, "step_tflops": 21.182613982978868, "step_time": 0.6864269943237304} +{"epoch": 0, "iter": 21838, "iter_tflops": 21.67935971862678, "iter_time": 0.6706986846923828, "loss": 0.2427031248807907, "lr": 1.4905752840516615e-05, "seqlen": 5824.0, "step_tflops": 23.296118883833053, "step_time": 0.6241519508361816} +{"epoch": 0, "iter": 21839, "iter_tflops": 20.275046351914586, "iter_time": 0.7171533813476563, "loss": 0.34091272950172424, "lr": 1.485863042350284e-05, "seqlen": 5824.0, "step_tflops": 21.86120636434909, "step_time": 0.6651196556091309} +{"epoch": 0, "iter": 21840, "iter_tflops": 22.045589350418847, "iter_time": 0.6595567855834961, "loss": 0.40822646021842957, "lr": 1.481150940174971e-05, "seqlen": 5824.0, "step_tflops": 23.698977793540248, "step_time": 0.6135419921875} +{"epoch": 0, "iter": 21841, "iter_tflops": 34.29376041813607, "iter_time": 0.6015990447998047, "loss": 0.07634348422288895, "lr": 1.4764390240322693e-05, "seqlen": 8192.0, "step_tflops": 38.22500422079992, "step_time": 0.5397276973724365} +{"epoch": 0, "iter": 21842, "iter_tflops": 36.40758675798594, "iter_time": 0.5666701736450196, "loss": 0.11552523076534271, "lr": 1.471727340426888e-05, "seqlen": 8192.0, "step_tflops": 41.123723576037115, "step_time": 0.5016834983825684} +{"epoch": 0, "iter": 21843, "iter_tflops": 38.70037302682751, "iter_time": 0.5330980529785155, "loss": 0.14304794371128082, "lr": 1.4670159358612425e-05, "seqlen": 8192.0, "step_tflops": 42.496674196849604, "step_time": 0.4854754848480225} +{"epoch": 0, "iter": 21844, "iter_tflops": 42.06459012984931, "iter_time": 0.4904622497558594, "loss": 0.15077024698257446, "lr": 1.462304856834994e-05, "seqlen": 8192.0, "step_tflops": 46.302821932080434, "step_time": 0.44556881523132325} +{"epoch": 0, "iter": 21845, "iter_tflops": 34.445536014924784, "iter_time": 0.5989482498168945, "loss": 0.010912200435996056, "lr": 1.4575941498445902e-05, "seqlen": 8192.0, "step_tflops": 38.50241442974413, "step_time": 0.535838954925537} +{"epoch": 0, "iter": 21846, "iter_tflops": 39.86517470022596, "iter_time": 0.5175217132568359, "loss": 0.011223861947655678, "lr": 1.4528838613828076e-05, "seqlen": 8192.0, "step_tflops": 44.19555088728285, "step_time": 0.4668138103485107} +{"epoch": 0, "iter": 21847, "iter_tflops": 46.11661204957033, "iter_time": 0.44736793518066403, "loss": 0.004130383487790823, "lr": 1.4481740379382916e-05, "seqlen": 8192.0, "step_tflops": 51.014755837225245, "step_time": 0.4044142360687256} +{"epoch": 0, "iter": 21848, "iter_tflops": 42.59511058502929, "iter_time": 0.48435356140136715, "loss": 0.002198226749897003, "lr": 1.443464725995098e-05, "seqlen": 8192.0, "step_tflops": 46.97480530766076, "step_time": 0.4391948699951172} +{"epoch": 0, "iter": 21849, "iter_tflops": 21.836595640671543, "iter_time": 0.9447944107055664, "loss": 0.10593120753765106, "lr": 1.438755972032235e-05, "seqlen": 8192.0, "step_tflops": 23.49085143285911, "step_time": 0.8782607803344727} +{"epoch": 0, "iter": 21850, "iter_tflops": 18.749815744375212, "iter_time": 1.1003358001708985, "loss": 0.10600265860557556, "lr": 1.434047822523203e-05, "seqlen": 8192.0, "step_tflops": 23.017626098692535, "step_time": 0.8963171710968019} +{"epoch": 0, "iter": 21851, "iter_tflops": 42.8649812443677, "iter_time": 0.48130415344238275, "loss": 0.07913665473461151, "lr": 1.4293403239355362e-05, "seqlen": 8192.0, "step_tflops": 47.202953208661256, "step_time": 0.43707209205627445} +{"epoch": 0, "iter": 21852, "iter_tflops": 40.4290162229564, "iter_time": 0.5103041191101074, "loss": 0.11681731045246124, "lr": 1.4246335227303458e-05, "seqlen": 8192.0, "step_tflops": 44.33495467593354, "step_time": 0.4653459930419922} +{"epoch": 0, "iter": 21853, "iter_tflops": 18.388668157015182, "iter_time": 1.121946044921875, "loss": 0.06121065467596054, "lr": 1.4199274653618593e-05, "seqlen": 8192.0, "step_tflops": 19.494937500487314, "step_time": 1.058279541015625} +{"epoch": 0, "iter": 21854, "iter_tflops": 13.935776766005826, "iter_time": 1.4804408721923827, "loss": 0.037561219185590744, "lr": 1.4152221982769634e-05, "seqlen": 8192.0, "step_tflops": 21.408715207379775, "step_time": 0.9636773300170899} +{"epoch": 0, "iter": 21855, "iter_tflops": 44.50085605738766, "iter_time": 0.4636111602783203, "loss": 0.03531915694475174, "lr": 1.4105177679147441e-05, "seqlen": 8192.0, "step_tflops": 48.78585137751271, "step_time": 0.4228909187316895} +{"epoch": 0, "iter": 21856, "iter_tflops": 45.18651777204097, "iter_time": 0.4565763092041016, "loss": 0.057741276919841766, "lr": 1.40581422070603e-05, "seqlen": 8192.0, "step_tflops": 49.68597059095896, "step_time": 0.41522975730895995} +{"epoch": 0, "iter": 21857, "iter_tflops": 20.106075068818765, "iter_time": 0.8351562423706055, "loss": 0.07446666061878204, "lr": 1.4011116030729333e-05, "seqlen": 6704.0, "step_tflops": 21.971790305561772, "step_time": 0.7642396850585937} +{"epoch": 0, "iter": 21858, "iter_tflops": 36.0938612868537, "iter_time": 0.46522354507446284, "loss": 0.05488862469792366, "lr": 1.3964099614283912e-05, "seqlen": 6704.0, "step_tflops": 39.403633330014884, "step_time": 0.4261463394165039} +{"epoch": 0, "iter": 21859, "iter_tflops": 40.86339861342038, "iter_time": 0.4109230918884277, "loss": 0.07291784137487411, "lr": 1.391709342175709e-05, "seqlen": 6704.0, "step_tflops": 44.52274115055811, "step_time": 0.3771491527557373} +{"epoch": 0, "iter": 21860, "iter_tflops": 45.10066138897264, "iter_time": 0.3723163604736328, "loss": 0.08929871767759323, "lr": 1.3870097917081011e-05, "seqlen": 6704.0, "step_tflops": 48.95470785868823, "step_time": 0.3430050926208496} +{"epoch": 0, "iter": 21861, "iter_tflops": 28.87022860923642, "iter_time": 0.7146148300170898, "loss": 0.07363191246986389, "lr": 1.3823113564082329e-05, "seqlen": 8192.0, "step_tflops": 30.60352764248367, "step_time": 0.674141025543213} +{"epoch": 0, "iter": 21862, "iter_tflops": 11.506979903803188, "iter_time": 1.792919921875, "loss": 0.10171244293451309, "lr": 1.3776140826477637e-05, "seqlen": 8192.0, "step_tflops": 13.284017389927646, "step_time": 1.5530763702392578} +{"epoch": 0, "iter": 21863, "iter_tflops": 12.086017685908518, "iter_time": 1.7070216217041014, "loss": 0.07077000290155411, "lr": 1.3729180167868899e-05, "seqlen": 8192.0, "step_tflops": 13.502012312456504, "step_time": 1.5280013847351075} +{"epoch": 0, "iter": 21864, "iter_tflops": 39.30919665310548, "iter_time": 0.5248413925170898, "loss": 0.07019136846065521, "lr": 1.3682232051738853e-05, "seqlen": 8192.0, "step_tflops": 43.25985519631828, "step_time": 0.476910831451416} +{"epoch": 0, "iter": 21865, "iter_tflops": 11.74803167792506, "iter_time": 1.2724345245361328, "loss": 0.3344871699810028, "lr": 1.363529694144645e-05, "seqlen": 5984.0, "step_tflops": 12.432710731850348, "step_time": 1.202360565185547} +{"epoch": 0, "iter": 21866, "iter_tflops": 12.603590701574332, "iter_time": 1.1860589141845703, "loss": 0.35649698972702026, "lr": 1.3588375300222285e-05, "seqlen": 5984.0, "step_tflops": 16.384369342997076, "step_time": 0.9123696365356445} +{"epoch": 0, "iter": 21867, "iter_tflops": 26.88892531877778, "iter_time": 0.5559389572143555, "loss": 0.3008691370487213, "lr": 1.354146759116401e-05, "seqlen": 5984.0, "step_tflops": 28.659778778156515, "step_time": 0.5215881538391114} +{"epoch": 0, "iter": 21868, "iter_tflops": 26.630258739571783, "iter_time": 0.5613389358520507, "loss": 0.19283975660800934, "lr": 1.3494574277231775e-05, "seqlen": 5984.0, "step_tflops": 28.337117539723256, "step_time": 0.527527229309082} +{"epoch": 0, "iter": 21869, "iter_tflops": 26.006886270920187, "iter_time": 0.793293487548828, "loss": 0.5891191363334656, "lr": 1.3447695821243663e-05, "seqlen": 8192.0, "step_tflops": 27.511293807501946, "step_time": 0.7499136047363282} +{"epoch": 0, "iter": 21870, "iter_tflops": 18.662675337668635, "iter_time": 1.1054735260009765, "loss": 0.6477681994438171, "lr": 1.3400832685871102e-05, "seqlen": 8192.0, "step_tflops": 20.8414317388432, "step_time": 0.9899076881408692} +{"epoch": 0, "iter": 21871, "iter_tflops": 43.62330154458232, "iter_time": 0.4729374618530273, "loss": 0.6574023962020874, "lr": 1.3353985333634321e-05, "seqlen": 8192.0, "step_tflops": 47.0575139905181, "step_time": 0.43842293739318844} +{"epoch": 0, "iter": 21872, "iter_tflops": 41.5479279242539, "iter_time": 0.4965613098144531, "loss": 0.4297284483909607, "lr": 1.3307154226897774e-05, "seqlen": 8192.0, "step_tflops": 44.47814796362723, "step_time": 0.46384785461425776} +{"epoch": 0, "iter": 21873, "iter_tflops": 28.061083365526407, "iter_time": 0.7352208480834961, "loss": 0.3693050444126129, "lr": 1.326033982786558e-05, "seqlen": 8192.0, "step_tflops": 29.69485287241305, "step_time": 0.69477001953125} +{"epoch": 0, "iter": 21874, "iter_tflops": 18.785566399673453, "iter_time": 1.0982417602539063, "loss": 0.26747429370880127, "lr": 1.3213542598576953e-05, "seqlen": 8192.0, "step_tflops": 20.93932227666949, "step_time": 0.9852799072265626} +{"epoch": 0, "iter": 21875, "iter_tflops": 36.56116071846841, "iter_time": 0.564289894104004, "loss": 0.27766138315200806, "lr": 1.3166763000901658e-05, "seqlen": 8192.0, "step_tflops": 39.637919306983314, "step_time": 0.5204888114929199} +{"epoch": 0, "iter": 21876, "iter_tflops": 41.52140786350422, "iter_time": 0.49687846755981446, "loss": 0.39239609241485596, "lr": 1.3120001496535434e-05, "seqlen": 8192.0, "step_tflops": 45.40209864361208, "step_time": 0.4544083671569824} +{"epoch": 0, "iter": 21877, "iter_tflops": 18.70720982444598, "iter_time": 0.7206307373046875, "loss": 0.002688322216272354, "lr": 1.3073258546995455e-05, "seqlen": 5408.0, "step_tflops": 20.24385726695841, "step_time": 0.6659299278259279} +{"epoch": 0, "iter": 21878, "iter_tflops": 6.007714136314661, "iter_time": 2.2439467163085935, "loss": 0.003596974303945899, "lr": 1.3026534613615764e-05, "seqlen": 5408.0, "step_tflops": 7.165868459066086, "step_time": 1.881277961730957} +{"epoch": 0, "iter": 21879, "iter_tflops": 7.632755743935603, "iter_time": 1.7662022552490235, "loss": 0.0011703548952937126, "lr": 1.2979830157542712e-05, "seqlen": 5408.0, "step_tflops": 9.731663391361876, "step_time": 1.3852709312438964} +{"epoch": 0, "iter": 21880, "iter_tflops": 24.394836355834446, "iter_time": 0.5526165542602539, "loss": 0.017635175958275795, "lr": 1.293314563973043e-05, "seqlen": 5408.0, "step_tflops": 32.211040455819, "step_time": 0.41852079963684086} +{"epoch": 0, "iter": 21881, "iter_tflops": 23.80248222990065, "iter_time": 0.6555129547119141, "loss": 0.3492794334888458, "lr": 1.2886481520936261e-05, "seqlen": 6240.0, "step_tflops": 25.40441138371961, "step_time": 0.6141781921386719} +{"epoch": 0, "iter": 21882, "iter_tflops": 22.045114964295717, "iter_time": 0.7077683868408202, "loss": 0.38091692328453064, "lr": 1.2839838261716217e-05, "seqlen": 6240.0, "step_tflops": 23.765395973867673, "step_time": 0.6565358924865723} +{"epoch": 0, "iter": 21883, "iter_tflops": 23.863923378080912, "iter_time": 0.6538252410888671, "loss": 0.2975143790245056, "lr": 1.2793216322420439e-05, "seqlen": 6240.0, "step_tflops": 25.66545265990668, "step_time": 0.6079314346313476} +{"epoch": 0, "iter": 21884, "iter_tflops": 23.11540741111411, "iter_time": 0.6749972076416015, "loss": 0.3046535849571228, "lr": 1.2746616163188644e-05, "seqlen": 6240.0, "step_tflops": 24.66783242065681, "step_time": 0.6325174903869628} +{"epoch": 0, "iter": 21885, "iter_tflops": 21.74651457010263, "iter_time": 0.9487080535888672, "loss": 0.5230762958526611, "lr": 1.270003824394559e-05, "seqlen": 8192.0, "step_tflops": 23.458878975086165, "step_time": 0.8794577751159668} +{"epoch": 0, "iter": 21886, "iter_tflops": 16.203473001999797, "iter_time": 1.2732513275146484, "loss": 0.4583415389060974, "lr": 1.2653483024396535e-05, "seqlen": 8192.0, "step_tflops": 19.6324563086528, "step_time": 1.0508666458129883} +{"epoch": 0, "iter": 21887, "iter_tflops": 38.73488473526569, "iter_time": 0.5326230773925781, "loss": 0.5350431799888611, "lr": 1.2606950964022701e-05, "seqlen": 8192.0, "step_tflops": 42.41102051821055, "step_time": 0.486455955505371} +{"epoch": 0, "iter": 21888, "iter_tflops": 34.0348183519772, "iter_time": 0.6061761016845703, "loss": 0.4853678047657013, "lr": 1.2560442522076747e-05, "seqlen": 8192.0, "step_tflops": 37.318375273324, "step_time": 0.5528400783538818} +{"epoch": 0, "iter": 21889, "iter_tflops": 20.644250534286325, "iter_time": 0.999362678527832, "loss": 0.2018946409225464, "lr": 1.2513958157578204e-05, "seqlen": 8192.0, "step_tflops": 22.28290275986887, "step_time": 0.9258710021972658} +{"epoch": 0, "iter": 21890, "iter_tflops": 20.596356643386905, "iter_time": 1.001686553955078, "loss": 0.18861573934555054, "lr": 1.2467498329308992e-05, "seqlen": 8192.0, "step_tflops": 25.30990211241227, "step_time": 0.8151392059326172} +{"epoch": 0, "iter": 21891, "iter_tflops": 46.7423248418673, "iter_time": 0.44137927627563467, "loss": 0.17833971977233887, "lr": 1.2421063495808856e-05, "seqlen": 8192.0, "step_tflops": 50.40751099720245, "step_time": 0.4092860984802246} +{"epoch": 0, "iter": 21892, "iter_tflops": 50.26285126655687, "iter_time": 0.4104640502929688, "loss": 0.17823854088783264, "lr": 1.2374654115370859e-05, "seqlen": 8192.0, "step_tflops": 54.86248689387392, "step_time": 0.3760510082244873} +{"epoch": 0, "iter": 21893, "iter_tflops": 41.99754913582682, "iter_time": 0.4912451782226562, "loss": 0.1120375394821167, "lr": 1.2328270646036847e-05, "seqlen": 8192.0, "step_tflops": 46.01040895594153, "step_time": 0.4484005680084229} +{"epoch": 0, "iter": 21894, "iter_tflops": 32.99257994254742, "iter_time": 0.6253252563476562, "loss": 0.1348736584186554, "lr": 1.2281913545592938e-05, "seqlen": 8192.0, "step_tflops": 36.98994755681895, "step_time": 0.5577486553192139} +{"epoch": 0, "iter": 21895, "iter_tflops": 41.43929454111392, "iter_time": 0.49786304855346675, "loss": 0.09286775439977646, "lr": 1.2235583271564999e-05, "seqlen": 8192.0, "step_tflops": 45.53186514080552, "step_time": 0.45311329650878907} +{"epoch": 0, "iter": 21896, "iter_tflops": 38.91394682168046, "iter_time": 0.5301722183227539, "loss": 0.09254580736160278, "lr": 1.2189280281214128e-05, "seqlen": 8192.0, "step_tflops": 42.74912717912757, "step_time": 0.4826085319519043} +{"epoch": 0, "iter": 21897, "iter_tflops": 28.611177787906733, "iter_time": 0.7210850830078125, "loss": 0.011483017355203629, "lr": 1.2143005031532152e-05, "seqlen": 8192.0, "step_tflops": 32.02149397834931, "step_time": 0.6442889118194581} +{"epoch": 0, "iter": 21898, "iter_tflops": 40.98392613598546, "iter_time": 0.5033947563171386, "loss": 0.011568955145776272, "lr": 1.2096757979237096e-05, "seqlen": 8192.0, "step_tflops": 45.940124139471195, "step_time": 0.4490865859985351} +{"epoch": 0, "iter": 21899, "iter_tflops": 46.34835616212875, "iter_time": 0.4451310729980469, "loss": 0.002005228539928794, "lr": 1.2050539580768702e-05, "seqlen": 8192.0, "step_tflops": 51.47983369089894, "step_time": 0.4007606868743897} +{"epoch": 0, "iter": 21900, "iter_tflops": 47.18309609419294, "iter_time": 0.43725603485107417, "loss": 0.008738900534808636, "lr": 1.2004350292283897e-05, "seqlen": 8192.0, "step_tflops": 52.27671174385496, "step_time": 0.39465170669555666} +{"epoch": 0, "iter": 21901, "iter_tflops": 34.88834338490345, "iter_time": 0.5913463211059571, "loss": 0.032347336411476135, "lr": 1.1958190569652318e-05, "seqlen": 8192.0, "step_tflops": 38.89896172711491, "step_time": 0.5303764572143554} +{"epoch": 0, "iter": 21902, "iter_tflops": 34.13508873701708, "iter_time": 0.6043954849243164, "loss": 0.012737476266920567, "lr": 1.1912060868451784e-05, "seqlen": 8192.0, "step_tflops": 37.68921273236925, "step_time": 0.547400489807129} +{"epoch": 0, "iter": 21903, "iter_tflops": 40.99249666167762, "iter_time": 0.5032895088195801, "loss": 0.021479366347193718, "lr": 1.1865961643963828e-05, "seqlen": 8192.0, "step_tflops": 45.358989670714365, "step_time": 0.45484023475646973} +{"epoch": 0, "iter": 21904, "iter_tflops": 43.093464974468745, "iter_time": 0.478752254486084, "loss": 0.03394018113613129, "lr": 1.1819893351169185e-05, "seqlen": 8192.0, "step_tflops": 47.34329992650263, "step_time": 0.43577641487121577} +{"epoch": 0, "iter": 21905, "iter_tflops": 22.326186829012954, "iter_time": 0.9240760040283204, "loss": 0.16285021603107452, "lr": 1.1773856444743304e-05, "seqlen": 8192.0, "step_tflops": 23.9229568571881, "step_time": 0.862397304534912} +{"epoch": 0, "iter": 21906, "iter_tflops": 16.844051120349548, "iter_time": 1.2248296661376954, "loss": 0.12403317540884018, "lr": 1.1727851379051866e-05, "seqlen": 8192.0, "step_tflops": 19.866907126831897, "step_time": 1.0384652919769286} +{"epoch": 0, "iter": 21907, "iter_tflops": 36.36758635569842, "iter_time": 0.5672934494018554, "loss": 0.16506603360176086, "lr": 1.1681878608146297e-05, "seqlen": 8192.0, "step_tflops": 39.62343938607985, "step_time": 0.5206790180206299} +{"epoch": 0, "iter": 21908, "iter_tflops": 43.15381366063275, "iter_time": 0.47808274078369145, "loss": 0.2645638883113861, "lr": 1.1635938585759284e-05, "seqlen": 8192.0, "step_tflops": 47.037826074078225, "step_time": 0.4386064414978027} +{"epoch": 0, "iter": 21909, "iter_tflops": 18.73845105301833, "iter_time": 1.101003143310547, "loss": 0.20077037811279297, "lr": 1.1590031765300306e-05, "seqlen": 8192.0, "step_tflops": 20.149981043896997, "step_time": 1.0238765716552736} +{"epoch": 0, "iter": 21910, "iter_tflops": 29.996410609431674, "iter_time": 0.6877854080200196, "loss": 0.23473717272281647, "lr": 1.1544158599851146e-05, "seqlen": 8192.0, "step_tflops": 34.76035606130671, "step_time": 0.5935236530303956} +{"epoch": 0, "iter": 21911, "iter_tflops": 51.022528229266605, "iter_time": 0.4043526306152344, "loss": 0.24298107624053955, "lr": 1.1498319542161424e-05, "seqlen": 8192.0, "step_tflops": 55.342542865717164, "step_time": 0.3727890415191651} +{"epoch": 0, "iter": 21912, "iter_tflops": 50.4702597241743, "iter_time": 0.4087772407531738, "loss": 0.1376885026693344, "lr": 1.1452515044644134e-05, "seqlen": 8192.0, "step_tflops": 54.52485563580105, "step_time": 0.3783796081542969} +{"epoch": 0, "iter": 21913, "iter_tflops": 33.67150903487299, "iter_time": 0.6127166290283202, "loss": 0.11332716792821884, "lr": 1.1406745559371177e-05, "seqlen": 8192.0, "step_tflops": 36.2555444977187, "step_time": 0.5690465774536133} +{"epoch": 0, "iter": 21914, "iter_tflops": 16.333065197840074, "iter_time": 1.2631489105224607, "loss": 0.18461991846561432, "lr": 1.1361011538068891e-05, "seqlen": 8192.0, "step_tflops": 17.852195850080697, "step_time": 1.1556613922119139} +{"epoch": 0, "iter": 21915, "iter_tflops": 39.331598256560426, "iter_time": 0.524542465209961, "loss": 0.09988412261009216, "lr": 1.1315313432113608e-05, "seqlen": 8192.0, "step_tflops": 43.189360493570724, "step_time": 0.4776892566680908} +{"epoch": 0, "iter": 21916, "iter_tflops": 41.66530148233016, "iter_time": 0.49516246795654295, "loss": 0.12459709495306015, "lr": 1.1269651692527181e-05, "seqlen": 8192.0, "step_tflops": 45.54994519797515, "step_time": 0.452933443069458} +{"epoch": 0, "iter": 21917, "iter_tflops": 20.325271460516, "iter_time": 1.0150463943481445, "loss": 0.33883771300315857, "lr": 1.1224026769972545e-05, "seqlen": 8192.0, "step_tflops": 21.974363935761957, "step_time": 0.938871021270752} +{"epoch": 0, "iter": 21918, "iter_tflops": 17.373930832308268, "iter_time": 1.187474136352539, "loss": 0.3163556456565857, "lr": 1.117843911474927e-05, "seqlen": 8192.0, "step_tflops": 19.66918480662384, "step_time": 1.0489043502807618} +{"epoch": 0, "iter": 21919, "iter_tflops": 40.01826123461693, "iter_time": 0.515541976928711, "loss": 0.40696805715560913, "lr": 1.1132889176789103e-05, "seqlen": 8192.0, "step_tflops": 43.70679705928211, "step_time": 0.4720339832305908} +{"epoch": 0, "iter": 21920, "iter_tflops": 37.370171165676744, "iter_time": 0.5520738296508789, "loss": 0.35366252064704895, "lr": 1.108737740565155e-05, "seqlen": 8192.0, "step_tflops": 40.88626233765871, "step_time": 0.5045972003936767} +{"epoch": 0, "iter": 21921, "iter_tflops": 16.48261804245822, "iter_time": 1.2516878967285154, "loss": 0.5683019757270813, "lr": 1.104190425051941e-05, "seqlen": 8192.0, "step_tflops": 17.565711138929764, "step_time": 1.1745094375610352} +{"epoch": 0, "iter": 21922, "iter_tflops": 25.948872345954822, "iter_time": 0.795067054748535, "loss": 0.402161568403244, "lr": 1.099647016019437e-05, "seqlen": 8192.0, "step_tflops": 28.803542037259632, "step_time": 0.7162693214416503} +{"epoch": 0, "iter": 21923, "iter_tflops": 41.601055087327325, "iter_time": 0.4959271697998047, "loss": 0.544461190700531, "lr": 1.0951075583092559e-05, "seqlen": 8192.0, "step_tflops": 44.68374142906736, "step_time": 0.46171365356445315} +{"epoch": 0, "iter": 21924, "iter_tflops": 45.969450002735144, "iter_time": 0.4488000946044923, "loss": 0.406828373670578, "lr": 1.0905720967240124e-05, "seqlen": 8192.0, "step_tflops": 49.13682599277131, "step_time": 0.41987029266357423} +{"epoch": 0, "iter": 21925, "iter_tflops": 29.548310788323, "iter_time": 0.6463553924560547, "loss": 0.04076632484793663, "lr": 1.0860406760268818e-05, "seqlen": 7600.0, "step_tflops": 31.343787959412765, "step_time": 0.609329990386963} +{"epoch": 0, "iter": 21926, "iter_tflops": 13.749643746064283, "iter_time": 1.3890330810546876, "loss": 0.07237663120031357, "lr": 1.0815133409411564e-05, "seqlen": 7600.0, "step_tflops": 15.593851795362934, "step_time": 1.2247589797973633} +{"epoch": 0, "iter": 21927, "iter_tflops": 46.340462287181595, "iter_time": 0.41213896179199216, "loss": 0.05763305723667145, "lr": 1.076990136149806e-05, "seqlen": 7600.0, "step_tflops": 51.135106385491284, "step_time": 0.3734950675964355} +{"epoch": 0, "iter": 21928, "iter_tflops": 54.55013651430078, "iter_time": 0.350112964630127, "loss": 0.07569918036460876, "lr": 1.0724711062950358e-05, "seqlen": 7600.0, "step_tflops": 59.85893131139024, "step_time": 0.3190619945526123} +{"epoch": 0, "iter": 21929, "iter_tflops": 19.541339138246553, "iter_time": 1.0557666168212891, "loss": 0.19324621558189392, "lr": 1.0679562959778467e-05, "seqlen": 8192.0, "step_tflops": 20.345675528478537, "step_time": 1.0140284347534179} +{"epoch": 0, "iter": 21930, "iter_tflops": 19.026829763572415, "iter_time": 1.0843158721923827, "loss": 0.24139654636383057, "lr": 1.0634457497575926e-05, "seqlen": 8192.0, "step_tflops": 23.613269758000158, "step_time": 0.8737076110839843} +{"epoch": 0, "iter": 21931, "iter_tflops": 48.30602745980405, "iter_time": 0.42709149551391606, "loss": 0.15871331095695496, "lr": 1.0589395121515443e-05, "seqlen": 8192.0, "step_tflops": 52.46179122547123, "step_time": 0.39325941848754886} +{"epoch": 0, "iter": 21932, "iter_tflops": 48.14898473855061, "iter_time": 0.4284844970703125, "loss": 0.16242268681526184, "lr": 1.0544376276344478e-05, "seqlen": 8192.0, "step_tflops": 52.02315765309518, "step_time": 0.3965751876831055} +{"epoch": 0, "iter": 21933, "iter_tflops": 20.621639670222866, "iter_time": 1.0004584426879883, "loss": 0.40103879570961, "lr": 1.0499401406380859e-05, "seqlen": 8192.0, "step_tflops": 21.521720708323866, "step_time": 0.9586172866821289} +{"epoch": 0, "iter": 21934, "iter_tflops": 17.11958341838238, "iter_time": 1.2051165618896484, "loss": 0.42489221692085266, "lr": 1.0454470955508394e-05, "seqlen": 8192.0, "step_tflops": 19.967221406656236, "step_time": 1.0332480964660644} +{"epoch": 0, "iter": 21935, "iter_tflops": 48.100131634572115, "iter_time": 0.4289196891784668, "loss": 0.4900290369987488, "lr": 1.0409585367172491e-05, "seqlen": 8192.0, "step_tflops": 52.20784072333339, "step_time": 0.39517231941223147} +{"epoch": 0, "iter": 21936, "iter_tflops": 49.16283573833507, "iter_time": 0.41964815902709957, "loss": 0.4876580238342285, "lr": 1.036474508437579e-05, "seqlen": 8192.0, "step_tflops": 53.164483260004125, "step_time": 0.38806158256530765} +{"epoch": 0, "iter": 21937, "iter_tflops": 25.896367178533573, "iter_time": 0.7966790618896484, "loss": 0.4056212902069092, "lr": 1.0319950549673779e-05, "seqlen": 8192.0, "step_tflops": 27.257308859980323, "step_time": 0.7569013366699218} +{"epoch": 0, "iter": 21938, "iter_tflops": 12.524744940532006, "iter_time": 1.6472266387939454, "loss": 0.3445678949356079, "lr": 1.0275202205170431e-05, "seqlen": 8192.0, "step_tflops": 15.035284117829441, "step_time": 1.3721784934997558} +{"epoch": 0, "iter": 21939, "iter_tflops": 34.60213505628198, "iter_time": 0.5962375869750978, "loss": 0.5655716061592102, "lr": 1.023050049251384e-05, "seqlen": 8192.0, "step_tflops": 37.500899085539956, "step_time": 0.5501493034362792} +{"epoch": 0, "iter": 21940, "iter_tflops": 39.241436847140164, "iter_time": 0.5257476577758788, "loss": 0.568034827709198, "lr": 1.018584585289186e-05, "seqlen": 8192.0, "step_tflops": 42.693243723120005, "step_time": 0.48324024391174314} +{"epoch": 0, "iter": 21941, "iter_tflops": 16.665944077856526, "iter_time": 1.2379192810058592, "loss": 0.011627674102783203, "lr": 1.0141238727027761e-05, "seqlen": 8192.0, "step_tflops": 17.56601853937419, "step_time": 1.174488883972168} +{"epoch": 0, "iter": 21942, "iter_tflops": 14.75496447897066, "iter_time": 1.3982475891113282, "loss": 0.012999066151678562, "lr": 1.0096679555175865e-05, "seqlen": 8192.0, "step_tflops": 19.463970661093192, "step_time": 1.0599632453918457} +{"epoch": 0, "iter": 21943, "iter_tflops": 58.87395085627715, "iter_time": 0.35042821502685545, "loss": 0.018951667472720146, "lr": 1.0052168777117207e-05, "seqlen": 8192.0, "step_tflops": 64.61924314473066, "step_time": 0.31927166748046876} +{"epoch": 0, "iter": 21944, "iter_tflops": 53.46370003700613, "iter_time": 0.38588974380493163, "loss": 0.015886353328824043, "lr": 1.0007706832155202e-05, "seqlen": 8192.0, "step_tflops": 58.56268794243512, "step_time": 0.3522907543182373} +{"epoch": 0, "iter": 21945, "iter_tflops": 23.27531785996765, "iter_time": 0.8863936309814453, "loss": 0.6349038481712341, "lr": 9.963294159111292e-06, "seqlen": 8192.0, "step_tflops": 24.35671896203948, "step_time": 0.8470391082763673} +{"epoch": 0, "iter": 21946, "iter_tflops": 12.69929175247623, "iter_time": 1.6245861511230468, "loss": 0.5675196051597595, "lr": 9.91893119632063e-06, "seqlen": 8192.0, "step_tflops": 14.976002674530509, "step_time": 1.377610164642334} +{"epoch": 0, "iter": 21947, "iter_tflops": 38.66773681968486, "iter_time": 0.533547996520996, "loss": 0.5592139959335327, "lr": 9.874618381627751e-06, "seqlen": 8192.0, "step_tflops": 42.1813214319978, "step_time": 0.48910495948791505} +{"epoch": 0, "iter": 21948, "iter_tflops": 36.499165151670304, "iter_time": 0.5652483673095704, "loss": 0.5405242443084717, "lr": 9.830356152382247e-06, "seqlen": 8192.0, "step_tflops": 39.7030991561523, "step_time": 0.519634334564209} +{"epoch": 0, "iter": 21949, "iter_tflops": 21.080448573428857, "iter_time": 0.9786837997436523, "loss": 0.0744035542011261, "lr": 9.786144945434447e-06, "seqlen": 8192.0, "step_tflops": 22.676829443021372, "step_time": 0.9097873916625976} +{"epoch": 0, "iter": 21950, "iter_tflops": 24.486135435892788, "iter_time": 0.842562255859375, "loss": 0.07453416287899017, "lr": 9.741985197131118e-06, "seqlen": 8192.0, "step_tflops": 29.530063321851173, "step_time": 0.698647113800049} +{"epoch": 0, "iter": 21951, "iter_tflops": 52.89968405369018, "iter_time": 0.39000409698486327, "loss": 0.044207703322172165, "lr": 9.697877343311145e-06, "seqlen": 8192.0, "step_tflops": 57.575630018646535, "step_time": 0.35833031272888183} +{"epoch": 0, "iter": 21952, "iter_tflops": 52.976717414559474, "iter_time": 0.38943699264526366, "loss": 0.05500892177224159, "lr": 9.65382181930124e-06, "seqlen": 8192.0, "step_tflops": 58.01177994923002, "step_time": 0.35563627815246585} +{"epoch": 0, "iter": 21953, "iter_tflops": 30.962466504894934, "iter_time": 0.6663259048461914, "loss": 0.13080283999443054, "lr": 9.609819059911643e-06, "seqlen": 8192.0, "step_tflops": 32.885384494495206, "step_time": 0.6273636093139648} +{"epoch": 0, "iter": 21954, "iter_tflops": 13.856146978619487, "iter_time": 1.4889488067626953, "loss": 0.11272380501031876, "lr": 9.565869499431816e-06, "seqlen": 8192.0, "step_tflops": 16.181278181942826, "step_time": 1.2749977645874024} +{"epoch": 0, "iter": 21955, "iter_tflops": 47.700308655480335, "iter_time": 0.43251488494873047, "loss": 0.13013367354869843, "lr": 9.521973571626184e-06, "seqlen": 8192.0, "step_tflops": 51.90575212482765, "step_time": 0.39747220039367676} +{"epoch": 0, "iter": 21956, "iter_tflops": 46.21731412563316, "iter_time": 0.44639317321777344, "loss": 0.16500407457351685, "lr": 9.478131709729831e-06, "seqlen": 8192.0, "step_tflops": 49.662343905362086, "step_time": 0.41542730140686035} +{"epoch": 0, "iter": 21957, "iter_tflops": 22.77319873210242, "iter_time": 0.9059374465942382, "loss": 0.579219400882721, "lr": 9.434344346444237e-06, "seqlen": 8192.0, "step_tflops": 23.9996776613672, "step_time": 0.8596404418945311} +{"epoch": 0, "iter": 21958, "iter_tflops": 16.9135791004963, "iter_time": 1.219794662475586, "loss": 0.44216808676719666, "lr": 9.390611913932996e-06, "seqlen": 8192.0, "step_tflops": 20.577384404882544, "step_time": 1.0026101036071777} +{"epoch": 0, "iter": 21959, "iter_tflops": 39.75407503226, "iter_time": 0.518968017578125, "loss": 0.6404975652694702, "lr": 9.346934843817563e-06, "seqlen": 8192.0, "step_tflops": 43.24785663488497, "step_time": 0.4770431442260742} +{"epoch": 0, "iter": 21960, "iter_tflops": 37.763733982920755, "iter_time": 0.5463202743530273, "loss": 0.3777996301651001, "lr": 9.303313567172985e-06, "seqlen": 8192.0, "step_tflops": 41.040128308158735, "step_time": 0.5027053852081298} +{"epoch": 0, "iter": 21961, "iter_tflops": 19.734168390166538, "iter_time": 1.0454503631591796, "loss": 0.07201152294874191, "lr": 9.259748514523654e-06, "seqlen": 8192.0, "step_tflops": 21.284103450836145, "step_time": 0.9693193588256834} +{"epoch": 0, "iter": 21962, "iter_tflops": 28.83465306218943, "iter_time": 0.7154965057373046, "loss": 0.07922054082155228, "lr": 9.216240115839053e-06, "seqlen": 8192.0, "step_tflops": 32.0239216091592, "step_time": 0.6442400703430177} +{"epoch": 0, "iter": 21963, "iter_tflops": 47.45669944040262, "iter_time": 0.4347351112365722, "loss": 0.08917006850242615, "lr": 9.17278880052951e-06, "seqlen": 8192.0, "step_tflops": 51.64827325806381, "step_time": 0.39945369338989256} +{"epoch": 0, "iter": 21964, "iter_tflops": 48.27612337381198, "iter_time": 0.42735605239868163, "loss": 0.1110634058713913, "lr": 9.129394997441965e-06, "seqlen": 8192.0, "step_tflops": 52.42135750227578, "step_time": 0.3935627479553223} +{"epoch": 0, "iter": 21965, "iter_tflops": 28.647512583736567, "iter_time": 0.7201705017089844, "loss": 0.16173295676708221, "lr": 9.086059134855735e-06, "seqlen": 8192.0, "step_tflops": 30.513598509209263, "step_time": 0.676127841949463} +{"epoch": 0, "iter": 21966, "iter_tflops": 26.727291576686564, "iter_time": 0.7719111175537109, "loss": 0.11398756504058838, "lr": 9.042781640478292e-06, "seqlen": 8192.0, "step_tflops": 30.129151111391895, "step_time": 0.684755220413208} +{"epoch": 0, "iter": 21967, "iter_tflops": 38.55691158899415, "iter_time": 0.5350815887451171, "loss": 0.14447413384914398, "lr": 8.999562941441031e-06, "seqlen": 8192.0, "step_tflops": 42.41275499043625, "step_time": 0.48643606185913085} +{"epoch": 0, "iter": 21968, "iter_tflops": 43.069511849852816, "iter_time": 0.47901851272583, "loss": 0.09892947971820831, "lr": 8.956403464295062e-06, "seqlen": 8192.0, "step_tflops": 47.04532714478158, "step_time": 0.43853650856018067} +{"epoch": 0, "iter": 21969, "iter_tflops": 22.69659499762993, "iter_time": 0.9089950942993164, "loss": 0.2268979400396347, "lr": 8.913303635007002e-06, "seqlen": 8192.0, "step_tflops": 24.313426092226532, "step_time": 0.8485473594665527} +{"epoch": 0, "iter": 21970, "iter_tflops": 13.883695613860873, "iter_time": 1.4859943695068358, "loss": 0.2168872356414795, "lr": 8.870263878954767e-06, "seqlen": 8192.0, "step_tflops": 15.584211543819404, "step_time": 1.323845832824707} +{"epoch": 0, "iter": 21971, "iter_tflops": 35.93585799941265, "iter_time": 0.5741088333129882, "loss": 0.26575884222984314, "lr": 8.827284620923369e-06, "seqlen": 8192.0, "step_tflops": 39.55919234146789, "step_time": 0.5215246391296386} +{"epoch": 0, "iter": 21972, "iter_tflops": 39.19570564003825, "iter_time": 0.526361068725586, "loss": 0.2895338833332062, "lr": 8.784366285100738e-06, "seqlen": 8192.0, "step_tflops": 42.86513633496187, "step_time": 0.481302412033081} +{"epoch": 0, "iter": 21973, "iter_tflops": 22.32270352823797, "iter_time": 0.9242201995849609, "loss": 0.5160138010978699, "lr": 8.741509295073525e-06, "seqlen": 8192.0, "step_tflops": 24.18112841965806, "step_time": 0.8531898574829102} +{"epoch": 0, "iter": 21974, "iter_tflops": 34.627558268865, "iter_time": 0.5957998352050782, "loss": 0.5593913793563843, "lr": 8.698714073822906e-06, "seqlen": 8192.0, "step_tflops": 42.61524544789781, "step_time": 0.4841247138977051} +{"epoch": 0, "iter": 21975, "iter_tflops": 44.27791133473302, "iter_time": 0.46594549942016605, "loss": 0.4805890917778015, "lr": 8.655981043720454e-06, "seqlen": 8192.0, "step_tflops": 48.036028825443296, "step_time": 0.4294920711517334} +{"epoch": 0, "iter": 21976, "iter_tflops": 47.521198606930255, "iter_time": 0.43414505767822265, "loss": 0.6758046746253967, "lr": 8.61331062652391e-06, "seqlen": 8192.0, "step_tflops": 51.441921492251616, "step_time": 0.4010560436248779} +{"epoch": 0, "iter": 21977, "iter_tflops": 47.80660662077158, "iter_time": 0.4315531883239746, "loss": 0.10533114522695541, "lr": 8.570703243373076e-06, "seqlen": 8192.0, "step_tflops": 52.46779656236474, "step_time": 0.3932144069671631} +{"epoch": 0, "iter": 21978, "iter_tflops": 40.018795425646466, "iter_time": 0.5155350952148439, "loss": 0.0935608297586441, "lr": 8.528159314785614e-06, "seqlen": 8192.0, "step_tflops": 44.069766928560206, "step_time": 0.4681461906433106} +{"epoch": 0, "iter": 21979, "iter_tflops": 36.302028044162526, "iter_time": 0.5683179321289062, "loss": 0.11959435045719147, "lr": 8.48567926065293e-06, "seqlen": 8192.0, "step_tflops": 40.0828129697895, "step_time": 0.5147117176055909} +{"epoch": 0, "iter": 21980, "iter_tflops": 35.30852234203139, "iter_time": 0.5843091735839844, "loss": 0.12743811309337616, "lr": 8.44326350023601e-06, "seqlen": 8192.0, "step_tflops": 38.58393109619063, "step_time": 0.5347068824768066} +{"epoch": 0, "iter": 21981, "iter_tflops": 29.66927500569181, "iter_time": 0.6953689804077148, "loss": 0.16430149972438812, "lr": 8.400912452161272e-06, "seqlen": 8192.0, "step_tflops": 32.23679089993532, "step_time": 0.6399859580993653} +{"epoch": 0, "iter": 21982, "iter_tflops": 30.315976387634855, "iter_time": 0.6805353469848633, "loss": 0.17062553763389587, "lr": 8.35862653441648e-06, "seqlen": 8192.0, "step_tflops": 38.49211434013117, "step_time": 0.5359823398590088} +{"epoch": 0, "iter": 21983, "iter_tflops": 37.07504301818982, "iter_time": 0.5564684982299805, "loss": 0.10054146498441696, "lr": 8.316406164346556e-06, "seqlen": 8192.0, "step_tflops": 40.746546211458195, "step_time": 0.5063274173736573} +{"epoch": 0, "iter": 21984, "iter_tflops": 39.01501254829801, "iter_time": 0.5287988433837891, "loss": 0.17109398543834686, "lr": 8.274251758649519e-06, "seqlen": 8192.0, "step_tflops": 42.522728223178355, "step_time": 0.48517803001403803} +{"epoch": 0, "iter": 21985, "iter_tflops": 18.006370448410394, "iter_time": 0.9143070373535156, "loss": 0.22262157499790192, "lr": 8.232163733372323e-06, "seqlen": 6576.0, "step_tflops": 19.38539185441625, "step_time": 0.8492658462524415} +{"epoch": 0, "iter": 21986, "iter_tflops": 14.005510800941785, "iter_time": 1.175490951538086, "loss": 0.18410734832286835, "lr": 8.190142503906798e-06, "seqlen": 6576.0, "step_tflops": 15.695608535411894, "step_time": 1.0489144897460938} +{"epoch": 0, "iter": 21987, "iter_tflops": 28.713568152322132, "iter_time": 0.5733648681640624, "loss": 0.2584644854068756, "lr": 8.148188484985505e-06, "seqlen": 6576.0, "step_tflops": 30.655704626897133, "step_time": 0.5370403785705566} +{"epoch": 0, "iter": 21988, "iter_tflops": 31.506923614025006, "iter_time": 0.5225312194824219, "loss": 0.3176363408565521, "lr": 8.106302090677683e-06, "seqlen": 6576.0, "step_tflops": 33.42815899620122, "step_time": 0.49249948883056643} +{"epoch": 0, "iter": 21989, "iter_tflops": 2.026799155312036, "iter_time": 0.7732688598632813, "loss": 0.11509347707033157, "lr": 8.064483734385127e-06, "seqlen": 640.0, "step_tflops": 2.142088276310046, "step_time": 0.7316508331298828} +{"epoch": 0, "iter": 21990, "iter_tflops": 0.8751040685660666, "iter_time": 1.7909420471191404, "loss": 0.14114761352539062, "lr": 8.022733828838124e-06, "seqlen": 640.0, "step_tflops": 1.0824647631143802, "step_time": 1.4478629932403566} +{"epoch": 0, "iter": 21991, "iter_tflops": 2.9786901178355443, "iter_time": 0.5261576766967774, "loss": 0.13419076800346375, "lr": 7.981052786091401e-06, "seqlen": 640.0, "step_tflops": 3.2603513497636647, "step_time": 0.4807029991149902} +{"epoch": 0, "iter": 21992, "iter_tflops": 3.136831320445862, "iter_time": 0.49963179779052735, "loss": 0.18833696842193604, "lr": 7.939441017520012e-06, "seqlen": 640.0, "step_tflops": 3.4310191779654216, "step_time": 0.4567915802001953} +{"epoch": 0, "iter": 21993, "iter_tflops": 15.107612958572224, "iter_time": 1.3656090850830078, "loss": 0.3693310022354126, "lr": 7.897898933815319e-06, "seqlen": 8192.0, "step_tflops": 16.059145882457464, "step_time": 1.284694320678711} +{"epoch": 0, "iter": 21994, "iter_tflops": 19.197548314242123, "iter_time": 1.0746733474731447, "loss": 0.4229007363319397, "lr": 7.856426944980911e-06, "seqlen": 8192.0, "step_tflops": 23.260024546243102, "step_time": 0.8869764289855957} +{"epoch": 0, "iter": 21995, "iter_tflops": 37.66644451958155, "iter_time": 0.5477313766479492, "loss": 0.41250765323638916, "lr": 7.815025460328585e-06, "seqlen": 8192.0, "step_tflops": 41.121385991646356, "step_time": 0.5017120170593261} +{"epoch": 0, "iter": 21996, "iter_tflops": 36.743759743850084, "iter_time": 0.5614856414794922, "loss": 0.5126540660858154, "lr": 7.773694888474268e-06, "seqlen": 8192.0, "step_tflops": 40.093280031302875, "step_time": 0.5145773429870606} +{"epoch": 0, "iter": 21997, "iter_tflops": 22.07521585981137, "iter_time": 0.9345817337036132, "loss": 0.2676601707935333, "lr": 7.73243563733403e-06, "seqlen": 8192.0, "step_tflops": 23.458866866342728, "step_time": 0.8794582290649414} +{"epoch": 0, "iter": 21998, "iter_tflops": 7.93541663757555, "iter_time": 2.5998752746582032, "loss": 0.31889116764068604, "lr": 7.691248114120012e-06, "seqlen": 8192.0, "step_tflops": 11.124135448235915, "step_time": 1.8546244430541994} +{"epoch": 0, "iter": 21999, "iter_tflops": 11.467717925662095, "iter_time": 1.7990583343505862, "loss": 0.35100290179252625, "lr": 7.650132725336451e-06, "seqlen": 8192.0, "step_tflops": 13.928072210970486, "step_time": 1.4812598037719729} +{"epoch": 0, "iter": 22000, "iter_tflops": 43.2423910820488, "iter_time": 0.4771034393310547, "loss": 0.3379177153110504, "lr": 7.609089876775629e-06, "seqlen": 8192.0, "step_tflops": 46.56787480685328, "step_time": 0.44303274726867675} +{"epoch": 0, "iter": 22001, "iter_tflops": 22.447025790006546, "iter_time": 0.6950958938598634, "loss": 0.31219735741615295, "lr": 7.568119973513886e-06, "seqlen": 6240.0, "step_tflops": 23.840786336056023, "step_time": 0.6544597663879395} +{"epoch": 0, "iter": 22002, "iter_tflops": 10.268696523040187, "iter_time": 1.519456283569336, "loss": 0.36670440435409546, "lr": 7.527223419907638e-06, "seqlen": 6240.0, "step_tflops": 13.0133240578969, "step_time": 1.1989892349243163} +{"epoch": 0, "iter": 22003, "iter_tflops": 24.12150428841386, "iter_time": 0.6468433837890626, "loss": 0.32685452699661255, "lr": 7.486400619589349e-06, "seqlen": 6240.0, "step_tflops": 25.95984781569034, "step_time": 0.6010372467041015} +{"epoch": 0, "iter": 22004, "iter_tflops": 25.720453705479333, "iter_time": 0.6066314239501953, "loss": 0.3156568706035614, "lr": 7.4456519754635884e-06, "seqlen": 6240.0, "step_tflops": 27.511598982960617, "step_time": 0.5671366271972655} +{"epoch": 0, "iter": 22005, "iter_tflops": 24.65848195263797, "iter_time": 0.8366733016967773, "loss": 0.46018627285957336, "lr": 7.404977889703009e-06, "seqlen": 8192.0, "step_tflops": 26.53865521903892, "step_time": 0.7773978500366211} +{"epoch": 0, "iter": 22006, "iter_tflops": 12.140929678086318, "iter_time": 1.6993009643554686, "loss": 0.3727281987667084, "lr": 7.3643787637444305e-06, "seqlen": 8192.0, "step_tflops": 14.003468350754178, "step_time": 1.4732845458984374} +{"epoch": 0, "iter": 22007, "iter_tflops": 13.031889594072343, "iter_time": 1.5831237182617188, "loss": 0.42853793501853943, "lr": 7.323854998284823e-06, "seqlen": 8192.0, "step_tflops": 15.979131914408997, "step_time": 1.2911273040771483} +{"epoch": 0, "iter": 22008, "iter_tflops": 21.23099697668085, "iter_time": 0.9717439804077149, "loss": 0.3210197389125824, "lr": 7.283406993277402e-06, "seqlen": 8192.0, "step_tflops": 24.050076137717866, "step_time": 0.8578390102386475} +{"epoch": 0, "iter": 22009, "iter_tflops": 14.207986290262477, "iter_time": 1.1327646026611329, "loss": 0.23788048326969147, "lr": 7.243035147927644e-06, "seqlen": 6432.0, "step_tflops": 15.27112691599732, "step_time": 1.0539041442871093} +{"epoch": 0, "iter": 22010, "iter_tflops": 14.318101660888798, "iter_time": 1.1240529174804688, "loss": 0.1646794229745865, "lr": 7.202739860689357e-06, "seqlen": 6432.0, "step_tflops": 16.06029772972426, "step_time": 1.0021174087524416} +{"epoch": 0, "iter": 22011, "iter_tflops": 23.693480888496996, "iter_time": 0.6792714004516601, "loss": 0.3661437928676605, "lr": 7.1625215292607685e-06, "seqlen": 6432.0, "step_tflops": 25.582854550234792, "step_time": 0.6291050872802734} +{"epoch": 0, "iter": 22012, "iter_tflops": 25.413873591512598, "iter_time": 0.6332881088256836, "loss": 0.3282555937767029, "lr": 7.122380550580563e-06, "seqlen": 6432.0, "step_tflops": 27.282008088646645, "step_time": 0.589923728942871} +{"epoch": 0, "iter": 22013, "iter_tflops": 20.70844842525547, "iter_time": 0.9962645721435547, "loss": 0.46958863735198975, "lr": 7.082317320824005e-06, "seqlen": 8192.0, "step_tflops": 22.300196305763695, "step_time": 0.9251529998779298} +{"epoch": 0, "iter": 22014, "iter_tflops": 15.276051129838315, "iter_time": 1.3505514831542969, "loss": 0.6084160208702087, "lr": 7.042332235398986e-06, "seqlen": 8192.0, "step_tflops": 17.725233786057103, "step_time": 1.1639391479492187} +{"epoch": 0, "iter": 22015, "iter_tflops": 44.04213439244535, "iter_time": 0.4684399108886719, "loss": 0.4996015727519989, "lr": 7.002425688942171e-06, "seqlen": 8192.0, "step_tflops": 47.47377635620412, "step_time": 0.4345787315368652} +{"epoch": 0, "iter": 22016, "iter_tflops": 45.79093734469103, "iter_time": 0.4505497093200684, "loss": 0.5440701246261597, "lr": 6.962598075315047e-06, "seqlen": 8192.0, "step_tflops": 49.22338043191829, "step_time": 0.4191319923400879} +{"epoch": 0, "iter": 22017, "iter_tflops": 33.58631037443713, "iter_time": 0.6142709121704102, "loss": 0.3961847424507141, "lr": 6.922849787600097e-06, "seqlen": 8192.0, "step_tflops": 35.85381806417378, "step_time": 0.5754224967956543} +{"epoch": 0, "iter": 22018, "iter_tflops": 12.094000039059877, "iter_time": 1.7058949432373047, "loss": 0.28477880358695984, "lr": 6.883181218096863e-06, "seqlen": 8192.0, "step_tflops": 14.635416757823323, "step_time": 1.4096690139770507} +{"epoch": 0, "iter": 22019, "iter_tflops": 12.167165717185174, "iter_time": 1.6956367645263675, "loss": 0.31446373462677, "lr": 6.843592758318111e-06, "seqlen": 8192.0, "step_tflops": 14.218048533413363, "step_time": 1.4510495910644532} +{"epoch": 0, "iter": 22020, "iter_tflops": 27.411697431398906, "iter_time": 0.7526383056640625, "loss": 0.36394554376602173, "lr": 6.804084798985965e-06, "seqlen": 8192.0, "step_tflops": 34.031065746688306, "step_time": 0.6062429447174073} +{"epoch": 0, "iter": 22021, "iter_tflops": 13.005600209500326, "iter_time": 1.15568017578125, "loss": 0.37900257110595703, "lr": 6.764657730028022e-06, "seqlen": 6016.0, "step_tflops": 13.638259956094483, "step_time": 1.1020697937011719} +{"epoch": 0, "iter": 22022, "iter_tflops": 9.99546015798136, "iter_time": 1.5037140960693358, "loss": 0.35065868496894836, "lr": 6.7253119405735475e-06, "seqlen": 6016.0, "step_tflops": 11.679058240728347, "step_time": 1.2869457473754884} +{"epoch": 0, "iter": 22023, "iter_tflops": 22.639564517152788, "iter_time": 0.6638959121704101, "loss": 0.3446441888809204, "lr": 6.686047818949586e-06, "seqlen": 6016.0, "step_tflops": 24.385472240679956, "step_time": 0.6163634719848632} +{"epoch": 0, "iter": 22024, "iter_tflops": 22.730482093679452, "iter_time": 0.6612404556274414, "loss": 0.3877168297767639, "lr": 6.646865752677186e-06, "seqlen": 6016.0, "step_tflops": 24.395448079821854, "step_time": 0.6161114273071289} +{"epoch": 0, "iter": 22025, "iter_tflops": 17.307446494953464, "iter_time": 1.1297801055908203, "loss": 0.04015748202800751, "lr": 6.6077661284674966e-06, "seqlen": 7776.0, "step_tflops": 18.64303001501385, "step_time": 1.0488428497314455} +{"epoch": 0, "iter": 22026, "iter_tflops": 17.719240423914844, "iter_time": 1.1035240936279296, "loss": 0.0767064243555069, "lr": 6.568749332218045e-06, "seqlen": 7776.0, "step_tflops": 22.20466918982027, "step_time": 0.8806079730987548} +{"epoch": 0, "iter": 22027, "iter_tflops": 41.23038406172482, "iter_time": 0.4742524032592773, "loss": 0.07583495229482651, "lr": 6.529815749008846e-06, "seqlen": 7776.0, "step_tflops": 45.41280512892453, "step_time": 0.43057478332519533} +{"epoch": 0, "iter": 22028, "iter_tflops": 38.584464956986544, "iter_time": 0.5067741317749023, "loss": 0.03967074677348137, "lr": 6.490965763098655e-06, "seqlen": 7776.0, "step_tflops": 42.65655212327906, "step_time": 0.45839637184143067} +{"epoch": 0, "iter": 22029, "iter_tflops": 18.08570228794916, "iter_time": 1.1407405242919921, "loss": 0.017879342660307884, "lr": 6.452199757921144e-06, "seqlen": 8192.0, "step_tflops": 19.12931691167659, "step_time": 1.0785065460205079} +{"epoch": 0, "iter": 22030, "iter_tflops": 25.684849683741987, "iter_time": 0.8032397994995116, "loss": 0.02958770841360092, "lr": 6.41351811608114e-06, "seqlen": 8192.0, "step_tflops": 36.84894879397866, "step_time": 0.5598828239440918} +{"epoch": 0, "iter": 22031, "iter_tflops": 51.52967140389012, "iter_time": 0.4003730850219726, "loss": 0.06581619381904602, "lr": 6.374921219350826e-06, "seqlen": 8192.0, "step_tflops": 56.27297292411442, "step_time": 0.3666252632141113} +{"epoch": 0, "iter": 22032, "iter_tflops": 56.62149619431432, "iter_time": 0.36436856842041015, "loss": 0.018413323909044266, "lr": 6.336409448665989e-06, "seqlen": 8192.0, "step_tflops": 61.85766112298224, "step_time": 0.33352527618408206} +{"epoch": 0, "iter": 22033, "iter_tflops": 44.442696856677166, "iter_time": 0.46421785736083987, "loss": 0.03931251913309097, "lr": 6.297983184122264e-06, "seqlen": 8192.0, "step_tflops": 48.6467905768978, "step_time": 0.4240997867584228} +{"epoch": 0, "iter": 22034, "iter_tflops": 43.53602492152054, "iter_time": 0.4738855590820313, "loss": 0.10472854226827621, "lr": 6.259642804971361e-06, "seqlen": 8192.0, "step_tflops": 48.86125095055048, "step_time": 0.4222383403778076} +{"epoch": 0, "iter": 22035, "iter_tflops": 49.65061009356205, "iter_time": 0.4155254783630371, "loss": 0.06913486868143082, "lr": 6.221388689617353e-06, "seqlen": 8192.0, "step_tflops": 54.05167974925082, "step_time": 0.3816919956207275} +{"epoch": 0, "iter": 22036, "iter_tflops": 56.30362723809209, "iter_time": 0.3664256553649902, "loss": 0.08035504817962646, "lr": 6.1832212156129045e-06, "seqlen": 8192.0, "step_tflops": 61.58778660509394, "step_time": 0.3349867668151855} +{"epoch": 0, "iter": 22037, "iter_tflops": 41.094893506289054, "iter_time": 0.5020354537963867, "loss": 0.10431041568517685, "lr": 6.145140759655586e-06, "seqlen": 8192.0, "step_tflops": 44.578269210166994, "step_time": 0.46280606842041017} +{"epoch": 0, "iter": 22038, "iter_tflops": 44.71570859559435, "iter_time": 0.46138357543945313, "loss": 0.07493740320205688, "lr": 6.1071476975841114e-06, "seqlen": 8192.0, "step_tflops": 50.33372921779592, "step_time": 0.4098860511779785} +{"epoch": 0, "iter": 22039, "iter_tflops": 50.13163859830292, "iter_time": 0.41153838348388677, "loss": 0.09542457014322281, "lr": 6.0692424043746794e-06, "seqlen": 8192.0, "step_tflops": 54.57005544572644, "step_time": 0.3780662002563477} +{"epoch": 0, "iter": 22040, "iter_tflops": 45.23612836964761, "iter_time": 0.45607558059692377, "loss": 0.09112739562988281, "lr": 6.031425254137223e-06, "seqlen": 8192.0, "step_tflops": 48.895351325533376, "step_time": 0.42194386482238766} +{"epoch": 0, "iter": 22041, "iter_tflops": 26.45016490144278, "iter_time": 0.7799986724853516, "loss": 0.1287337988615036, "lr": 5.993696620111741e-06, "seqlen": 8192.0, "step_tflops": 27.794698700139566, "step_time": 0.7422672119140623} +{"epoch": 0, "iter": 22042, "iter_tflops": 13.770297322287533, "iter_time": 1.4982315216064452, "loss": 0.13565650582313538, "lr": 5.956056874664633e-06, "seqlen": 8192.0, "step_tflops": 16.39808125305228, "step_time": 1.2581407051086426} +{"epoch": 0, "iter": 22043, "iter_tflops": 39.02401652422334, "iter_time": 0.5286768341064454, "loss": 0.10388396680355072, "lr": 5.91850638928498e-06, "seqlen": 8192.0, "step_tflops": 42.8168777086964, "step_time": 0.4818448848724365} +{"epoch": 0, "iter": 22044, "iter_tflops": 38.44840894651635, "iter_time": 0.5365916061401367, "loss": 0.10427714884281158, "lr": 5.881045534580923e-06, "seqlen": 8192.0, "step_tflops": 42.21085861286565, "step_time": 0.4887627067565918} +{"epoch": 0, "iter": 22045, "iter_tflops": 18.20449999442315, "iter_time": 1.133296356201172, "loss": 0.06206448748707771, "lr": 5.8436746802759634e-06, "seqlen": 8192.0, "step_tflops": 19.544092343666595, "step_time": 1.055617889404297} +{"epoch": 0, "iter": 22046, "iter_tflops": 15.433901992606405, "iter_time": 1.3367386627197266, "loss": 0.07127706706523895, "lr": 5.806394195205357e-06, "seqlen": 8192.0, "step_tflops": 22.484747141118948, "step_time": 0.9175595073699951} +{"epoch": 0, "iter": 22047, "iter_tflops": 38.859147557668365, "iter_time": 0.5309198684692382, "loss": 0.09410720318555832, "lr": 5.7692044473124276e-06, "seqlen": 8192.0, "step_tflops": 42.57442112801065, "step_time": 0.4845889377593994} +{"epoch": 0, "iter": 22048, "iter_tflops": 44.209105077389886, "iter_time": 0.46667068862915045, "loss": 0.0994674563407898, "lr": 5.732105803644987e-06, "seqlen": 8192.0, "step_tflops": 48.50904348504925, "step_time": 0.42530406761169437} +{"epoch": 0, "iter": 22049, "iter_tflops": 31.435741106960656, "iter_time": 0.6562941665649413, "loss": 0.26305779814720154, "lr": 5.695098630351665e-06, "seqlen": 8192.0, "step_tflops": 34.10212579421946, "step_time": 0.6049796905517578} +{"epoch": 0, "iter": 22050, "iter_tflops": 13.485626838844057, "iter_time": 1.529857955932617, "loss": 0.24295273423194885, "lr": 5.658183292678315e-06, "seqlen": 8192.0, "step_tflops": 15.903386390117177, "step_time": 1.2972767562866212} +{"epoch": 0, "iter": 22051, "iter_tflops": 14.693524160156771, "iter_time": 1.4040942993164063, "loss": 0.2586199641227722, "lr": 5.621360154964428e-06, "seqlen": 8192.0, "step_tflops": 17.011828972816563, "step_time": 1.2127498779296875} +{"epoch": 0, "iter": 22052, "iter_tflops": 38.28798536127792, "iter_time": 0.5388398818969726, "loss": 0.2220839262008667, "lr": 5.584629580639495e-06, "seqlen": 8192.0, "step_tflops": 41.98670872686191, "step_time": 0.4913720111846923} +{"epoch": 0, "iter": 22053, "iter_tflops": 14.962172798386261, "iter_time": 1.1277674865722658, "loss": 0.28243541717529297, "lr": 5.547991932219466e-06, "seqlen": 6736.0, "step_tflops": 16.078723602835627, "step_time": 1.0494522094726564} +{"epoch": 0, "iter": 22054, "iter_tflops": 11.78437744193033, "iter_time": 1.4318831939697267, "loss": 0.31782588362693787, "lr": 5.511447571303134e-06, "seqlen": 6736.0, "step_tflops": 13.870903303405294, "step_time": 1.2164926567077634} +{"epoch": 0, "iter": 22055, "iter_tflops": 27.67633788947549, "iter_time": 0.6096851425170898, "loss": 0.20276670157909393, "lr": 5.474996858568593e-06, "seqlen": 6736.0, "step_tflops": 29.426849039659366, "step_time": 0.5734168815612792} +{"epoch": 0, "iter": 22056, "iter_tflops": 29.798184627846258, "iter_time": 0.5662711410522461, "loss": 0.20631009340286255, "lr": 5.438640153769654e-06, "seqlen": 6736.0, "step_tflops": 31.684870342191076, "step_time": 0.5325523452758789} +{"epoch": 0, "iter": 22057, "iter_tflops": 26.06868253767699, "iter_time": 0.7914129714965821, "loss": 0.012433520518243313, "lr": 5.402377815732326e-06, "seqlen": 8192.0, "step_tflops": 27.58103412712578, "step_time": 0.7480174026489258} +{"epoch": 0, "iter": 22058, "iter_tflops": 13.478997766533508, "iter_time": 1.5306103515625002, "loss": 0.019552771002054214, "lr": 5.366210202351234e-06, "seqlen": 8192.0, "step_tflops": 15.96873401761915, "step_time": 1.2919680099487305} +{"epoch": 0, "iter": 22059, "iter_tflops": 47.194945469459846, "iter_time": 0.43714625167846677, "loss": 0.028714967891573906, "lr": 5.330137670586132e-06, "seqlen": 8192.0, "step_tflops": 52.217397340972454, "step_time": 0.3950999965667724} +{"epoch": 0, "iter": 22060, "iter_tflops": 52.04767056962016, "iter_time": 0.3963884124755859, "loss": 0.023784149438142776, "lr": 5.29416057645834e-06, "seqlen": 8192.0, "step_tflops": 56.904843094715716, "step_time": 0.36255426406860347} +{"epoch": 0, "iter": 22061, "iter_tflops": 20.141192711559473, "iter_time": 1.02432332611084, "loss": 0.5136192440986633, "lr": 5.258279275047247e-06, "seqlen": 8192.0, "step_tflops": 21.522719009301124, "step_time": 0.9585728225708008} +{"epoch": 0, "iter": 22062, "iter_tflops": 23.80746365229271, "iter_time": 0.8665809097290038, "loss": 0.4507008492946625, "lr": 5.222494120486821e-06, "seqlen": 8192.0, "step_tflops": 26.371829830536228, "step_time": 0.7823155860900878} +{"epoch": 0, "iter": 22063, "iter_tflops": 37.42942394585425, "iter_time": 0.5511998672485352, "loss": 0.4937784671783447, "lr": 5.186805465962085e-06, "seqlen": 8192.0, "step_tflops": 40.499623226012545, "step_time": 0.5094144554138184} +{"epoch": 0, "iter": 22064, "iter_tflops": 37.951941073997055, "iter_time": 0.5436110229492187, "loss": 0.7627134919166565, "lr": 5.1512136637056556e-06, "seqlen": 8192.0, "step_tflops": 41.267463048059234, "step_time": 0.4999360752105712} +{"epoch": 0, "iter": 22065, "iter_tflops": 17.860247619661266, "iter_time": 1.1551403961181639, "loss": 0.3993086814880371, "lr": 5.115719064994246e-06, "seqlen": 8192.0, "step_tflops": 18.777282669468473, "step_time": 1.0987262573242187} +{"epoch": 0, "iter": 22066, "iter_tflops": 16.112089138200602, "iter_time": 1.280472900390625, "loss": 0.45211857557296753, "lr": 5.080322020145225e-06, "seqlen": 8192.0, "step_tflops": 19.624648469567973, "step_time": 1.0512847423553464} +{"epoch": 0, "iter": 22067, "iter_tflops": 38.09833471809641, "iter_time": 0.5415221862792969, "loss": 0.29155638813972473, "lr": 5.045022878513122e-06, "seqlen": 8192.0, "step_tflops": 41.64571302024813, "step_time": 0.4953953723907471} +{"epoch": 0, "iter": 22068, "iter_tflops": 39.31353936065624, "iter_time": 0.5247834167480468, "loss": 0.25338655710220337, "lr": 5.009821988486227e-06, "seqlen": 8192.0, "step_tflops": 42.699759489426675, "step_time": 0.48316650390625} +{"epoch": 0, "iter": 22069, "iter_tflops": 33.82544832051989, "iter_time": 0.6099281616210938, "loss": 0.05794364959001541, "lr": 4.9747196974831075e-06, "seqlen": 8192.0, "step_tflops": 37.74032418824678, "step_time": 0.5466591491699219} +{"epoch": 0, "iter": 22070, "iter_tflops": 37.779042433777505, "iter_time": 0.5460988998413085, "loss": 0.04750529304146767, "lr": 4.9397163519492e-06, "seqlen": 8192.0, "step_tflops": 42.118328191318156, "step_time": 0.48983647727966306} +{"epoch": 0, "iter": 22071, "iter_tflops": 41.97151236791092, "iter_time": 0.491549919128418, "loss": 0.04735775664448738, "lr": 4.904812297353403e-06, "seqlen": 8192.0, "step_tflops": 46.27507111816929, "step_time": 0.4458360195159913} +{"epoch": 0, "iter": 22072, "iter_tflops": 39.67829203548163, "iter_time": 0.5199592132568359, "loss": 0.10159167647361755, "lr": 4.870007878184633e-06, "seqlen": 8192.0, "step_tflops": 43.24080572164768, "step_time": 0.47712093162536623} +{"epoch": 0, "iter": 22073, "iter_tflops": 17.26785948204939, "iter_time": 0.8917293701171876, "loss": 0.061369795352220535, "lr": 4.835303437948468e-06, "seqlen": 6160.0, "step_tflops": 18.432619092399303, "step_time": 0.835380874633789} +{"epoch": 0, "iter": 22074, "iter_tflops": 7.061409019354079, "iter_time": 2.18062109375, "loss": 0.04125990346074104, "lr": 4.800699319163711e-06, "seqlen": 6160.0, "step_tflops": 8.256099593623016, "step_time": 1.865076515197754} +{"epoch": 0, "iter": 22075, "iter_tflops": 11.660275791871522, "iter_time": 1.3205740356445312, "loss": 0.06578555703163147, "lr": 4.766195863359055e-06, "seqlen": 6160.0, "step_tflops": 15.015097369791937, "step_time": 1.025518321990967} +{"epoch": 0, "iter": 22076, "iter_tflops": 18.459820321380548, "iter_time": 0.8341499099731445, "loss": 0.11894821375608444, "lr": 4.731793411069669e-06, "seqlen": 6160.0, "step_tflops": 23.95045201909105, "step_time": 0.6429213714599609} +{"epoch": 0, "iter": 22077, "iter_tflops": 22.800800389211233, "iter_time": 0.6592011718749999, "loss": 0.2337363064289093, "lr": 4.697492301833878e-06, "seqlen": 6016.0, "step_tflops": 24.302966897779108, "step_time": 0.6184559440612794} +{"epoch": 0, "iter": 22078, "iter_tflops": 11.911077332490027, "iter_time": 1.261876983642578, "loss": 0.3639778792858124, "lr": 4.663292874189776e-06, "seqlen": 6016.0, "step_tflops": 14.10359423393065, "step_time": 1.065708080291748} +{"epoch": 0, "iter": 22079, "iter_tflops": 26.90919840327254, "iter_time": 0.5585567474365234, "loss": 0.2163238525390625, "lr": 4.629195465671905e-06, "seqlen": 6016.0, "step_tflops": 28.68106552574265, "step_time": 0.5240500679016113} +{"epoch": 0, "iter": 22080, "iter_tflops": 27.12892404343521, "iter_time": 0.5540328216552735, "loss": 0.2878550887107849, "lr": 4.595200412807928e-06, "seqlen": 6016.0, "step_tflops": 28.95300588809189, "step_time": 0.5191279411315919} +{"epoch": 0, "iter": 22081, "iter_tflops": 43.170774152099405, "iter_time": 0.47789491653442384, "loss": 0.24719318747520447, "lr": 4.561308051115286e-06, "seqlen": 8192.0, "step_tflops": 47.234028708553474, "step_time": 0.43678454017639157} +{"epoch": 0, "iter": 22082, "iter_tflops": 46.423179679507356, "iter_time": 0.44441362380981453, "loss": 0.19662721455097198, "lr": 4.5275187150979124e-06, "seqlen": 8192.0, "step_tflops": 50.50224893518285, "step_time": 0.4085183124542236} +{"epoch": 0, "iter": 22083, "iter_tflops": 47.023442741773174, "iter_time": 0.43874060058593756, "loss": 0.2801719605922699, "lr": 4.493832738242905e-06, "seqlen": 8192.0, "step_tflops": 50.9366356864098, "step_time": 0.4050344753265381} +{"epoch": 0, "iter": 22084, "iter_tflops": 47.15712479865613, "iter_time": 0.43749684906005865, "loss": 0.2141115665435791, "lr": 4.460250453017264e-06, "seqlen": 8192.0, "step_tflops": 50.69179343454567, "step_time": 0.4069907989501953} +{"epoch": 0, "iter": 22085, "iter_tflops": 28.566030831899383, "iter_time": 0.7222247161865234, "loss": 0.45736128091812134, "lr": 4.426772190864578e-06, "seqlen": 8192.0, "step_tflops": 30.33294753144477, "step_time": 0.6801545906066895} +{"epoch": 0, "iter": 22086, "iter_tflops": 18.48415759062752, "iter_time": 1.1161500549316405, "loss": 0.39566531777381897, "lr": 4.393398282201788e-06, "seqlen": 8192.0, "step_tflops": 20.436644463638075, "step_time": 1.0095147247314453} +{"epoch": 0, "iter": 22087, "iter_tflops": 41.010675921476725, "iter_time": 0.5030664100646971, "loss": 0.30488577485084534, "lr": 4.360129056415895e-06, "seqlen": 8192.0, "step_tflops": 45.92920103984123, "step_time": 0.4491933898925782} +{"epoch": 0, "iter": 22088, "iter_tflops": 46.49435173305685, "iter_time": 0.4437333297729492, "loss": 0.3139418959617615, "lr": 4.32696484186072e-06, "seqlen": 8192.0, "step_tflops": 50.34793475496483, "step_time": 0.4097704029083252} +{"epoch": 0, "iter": 22089, "iter_tflops": 32.067581599605674, "iter_time": 0.6433629379272462, "loss": 0.20412497222423553, "lr": 4.293905965853682e-06, "seqlen": 8192.0, "step_tflops": 34.29688834896212, "step_time": 0.6015441780090333} +{"epoch": 0, "iter": 22090, "iter_tflops": 19.32595405765328, "iter_time": 1.0675329895019532, "loss": 0.23250088095664978, "lr": 4.260952754672532e-06, "seqlen": 8192.0, "step_tflops": 21.607462764984184, "step_time": 0.9548133316040038} +{"epoch": 0, "iter": 22091, "iter_tflops": 38.40134265825669, "iter_time": 0.5372492752075195, "loss": 0.23392049968242645, "lr": 4.22810553355217e-06, "seqlen": 8192.0, "step_tflops": 41.729352682301254, "step_time": 0.49440243339538575} +{"epoch": 0, "iter": 22092, "iter_tflops": 38.86292334476075, "iter_time": 0.5308682861328125, "loss": 0.24739255011081696, "lr": 4.195364626681396e-06, "seqlen": 8192.0, "step_tflops": 42.56748605559375, "step_time": 0.4846678867340088} +{"epoch": 0, "iter": 22093, "iter_tflops": 21.199506906467693, "iter_time": 0.9731874237060547, "loss": 0.5575512051582336, "lr": 4.162730357199753e-06, "seqlen": 8192.0, "step_tflops": 23.090945971203425, "step_time": 0.8934711265563965} +{"epoch": 0, "iter": 22094, "iter_tflops": 13.046991553145745, "iter_time": 1.581291244506836, "loss": 0.4961814284324646, "lr": 4.1302030471943e-06, "seqlen": 8192.0, "step_tflops": 15.27508566947118, "step_time": 1.3506368446350097} +{"epoch": 0, "iter": 22095, "iter_tflops": 44.1417311165581, "iter_time": 0.46738297271728513, "loss": 0.4330843389034271, "lr": 4.097783017696459e-06, "seqlen": 8192.0, "step_tflops": 47.84340824084946, "step_time": 0.4312212333679199} +{"epoch": 0, "iter": 22096, "iter_tflops": 47.41594725246138, "iter_time": 0.43510874938964844, "loss": 0.384082168340683, "lr": 4.06547058867883e-06, "seqlen": 8192.0, "step_tflops": 50.95901780111966, "step_time": 0.4048565769195557} +{"epoch": 0, "iter": 22097, "iter_tflops": 33.938135771655276, "iter_time": 0.6079029693603516, "loss": 0.3177569508552551, "lr": 4.033266079052039e-06, "seqlen": 8192.0, "step_tflops": 36.34022474897146, "step_time": 0.5677205810546875} +{"epoch": 0, "iter": 22098, "iter_tflops": 16.439268241146483, "iter_time": 1.254988555908203, "loss": 0.2172161191701889, "lr": 4.001169806661603e-06, "seqlen": 8192.0, "step_tflops": 20.072021696044725, "step_time": 1.0278532886505125} +{"epoch": 0, "iter": 22099, "iter_tflops": 38.22019895246894, "iter_time": 0.5397955551147461, "loss": 0.2050890475511551, "lr": 3.969182088284761e-06, "seqlen": 8192.0, "step_tflops": 42.0383184156379, "step_time": 0.490768762588501} +{"epoch": 0, "iter": 22100, "iter_tflops": 39.569208623954054, "iter_time": 0.5213926239013671, "loss": 0.19664053618907928, "lr": 3.937303239627393e-06, "seqlen": 8192.0, "step_tflops": 43.05468602334524, "step_time": 0.4791834621429443} +{"epoch": 0, "iter": 22101, "iter_tflops": 25.52081889691389, "iter_time": 0.8084024887084962, "loss": 0.04049227014183998, "lr": 3.905533575320853e-06, "seqlen": 8192.0, "step_tflops": 27.8534417362091, "step_time": 0.7407017669677735} +{"epoch": 0, "iter": 22102, "iter_tflops": 48.52054161503572, "iter_time": 0.4252032814025879, "loss": 0.050619132816791534, "lr": 3.873873408918913e-06, "seqlen": 8192.0, "step_tflops": 53.24688380511162, "step_time": 0.38746105003356934} +{"epoch": 0, "iter": 22103, "iter_tflops": 48.44051672360579, "iter_time": 0.42590572738647453, "loss": 0.04005526751279831, "lr": 3.842323052894623e-06, "seqlen": 8192.0, "step_tflops": 53.041422985337405, "step_time": 0.3889619159698487} +{"epoch": 0, "iter": 22104, "iter_tflops": 54.46565008444592, "iter_time": 0.3787909164428711, "loss": 0.05814797431230545, "lr": 3.8108828186372686e-06, "seqlen": 8192.0, "step_tflops": 58.94946347634998, "step_time": 0.3499793262481689} +{"epoch": 0, "iter": 22105, "iter_tflops": 42.33275696787122, "iter_time": 0.4873553009033203, "loss": 0.5821571350097656, "lr": 3.779553016449266e-06, "seqlen": 8192.0, "step_tflops": 46.0812511430018, "step_time": 0.44771122741699215} +{"epoch": 0, "iter": 22106, "iter_tflops": 41.60758502016962, "iter_time": 0.4958493385314941, "loss": 0.5016841888427734, "lr": 3.7483339555431063e-06, "seqlen": 8192.0, "step_tflops": 44.88216444485071, "step_time": 0.45967242813110354} +{"epoch": 0, "iter": 22107, "iter_tflops": 42.38299363314541, "iter_time": 0.48677763748168945, "loss": 0.5396092534065247, "lr": 3.717225944038331e-06, "seqlen": 8192.0, "step_tflops": 45.50070824092966, "step_time": 0.45342356872558603} +{"epoch": 0, "iter": 22108, "iter_tflops": 45.45432819636717, "iter_time": 0.4538862266540527, "loss": 0.5113757252693176, "lr": 3.686229288958442e-06, "seqlen": 8192.0, "step_tflops": 48.98487212221258, "step_time": 0.42117275428771966} +{"epoch": 0, "iter": 22109, "iter_tflops": 33.412113937884314, "iter_time": 0.6174734573364258, "loss": 0.5175461769104004, "lr": 3.655344296227923e-06, "seqlen": 8192.0, "step_tflops": 35.587908524775294, "step_time": 0.5797220001220703} +{"epoch": 0, "iter": 22110, "iter_tflops": 25.81610958785735, "iter_time": 0.7991557922363282, "loss": 0.36656278371810913, "lr": 3.624571270669172e-06, "seqlen": 8192.0, "step_tflops": 28.68127045143172, "step_time": 0.7193228607177734} +{"epoch": 0, "iter": 22111, "iter_tflops": 42.70467396328794, "iter_time": 0.4831109008789063, "loss": 0.480394572019577, "lr": 3.5939105159995363e-06, "seqlen": 8192.0, "step_tflops": 46.02106820189425, "step_time": 0.44829671096801754} +{"epoch": 0, "iter": 22112, "iter_tflops": 40.54600460808282, "iter_time": 0.5088317260742188, "loss": 0.35118812322616577, "lr": 3.56336233482828e-06, "seqlen": 8192.0, "step_tflops": 43.38173731162009, "step_time": 0.47557093811035156} +{"epoch": 0, "iter": 22113, "iter_tflops": 33.07192944588354, "iter_time": 0.6238249130249024, "loss": 0.4217457175254822, "lr": 3.532927028653627e-06, "seqlen": 8192.0, "step_tflops": 35.353651389206426, "step_time": 0.5835633010864258} +{"epoch": 0, "iter": 22114, "iter_tflops": 8.609875568495747, "iter_time": 2.3962127380371094, "loss": 0.5422706604003906, "lr": 3.502604897859754e-06, "seqlen": 8192.0, "step_tflops": 11.030367673736544, "step_time": 1.8703903732299807} +{"epoch": 0, "iter": 22115, "iter_tflops": 15.061386355133463, "iter_time": 1.3698004302978517, "loss": 0.410373717546463, "lr": 3.4723962417138512e-06, "seqlen": 8192.0, "step_tflops": 19.267125671836798, "step_time": 1.0707924919128418} +{"epoch": 0, "iter": 22116, "iter_tflops": 36.40837350668584, "iter_time": 0.5666579284667969, "loss": 0.397474080324173, "lr": 3.442301358363163e-06, "seqlen": 8192.0, "step_tflops": 39.84014899402615, "step_time": 0.5178467960357666} +{"epoch": 0, "iter": 22117, "iter_tflops": 15.179751492432727, "iter_time": 1.1630693359375, "loss": 0.3798743188381195, "lr": 3.412320544832033e-06, "seqlen": 7040.0, "step_tflops": 16.31982269700254, "step_time": 1.0818195648193358} +{"epoch": 0, "iter": 22118, "iter_tflops": 24.260950609713984, "iter_time": 0.7277168884277344, "loss": 0.3009268045425415, "lr": 3.3824540970189943e-06, "seqlen": 7040.0, "step_tflops": 28.905692095529538, "step_time": 0.6107829360961915} +{"epoch": 0, "iter": 22119, "iter_tflops": 32.531846048953646, "iter_time": 0.5427021713256837, "loss": 0.43362945318222046, "lr": 3.35270230969382e-06, "seqlen": 7040.0, "step_tflops": 34.643772408120135, "step_time": 0.5096183891296386} +{"epoch": 0, "iter": 22120, "iter_tflops": 27.852004476165174, "iter_time": 0.6338898696899413, "loss": 0.303824245929718, "lr": 3.32306547649465e-06, "seqlen": 7040.0, "step_tflops": 29.879094739580495, "step_time": 0.5908848190307617} +{"epoch": 0, "iter": 22121, "iter_tflops": 40.01018732992555, "iter_time": 0.5156460113525392, "loss": 0.3478209674358368, "lr": 3.293543889925053e-06, "seqlen": 8192.0, "step_tflops": 43.56338304562198, "step_time": 0.47358795547485344} +{"epoch": 0, "iter": 22122, "iter_tflops": 44.71828523446448, "iter_time": 0.461356990814209, "loss": 0.4343723654747009, "lr": 3.2641378413511783e-06, "seqlen": 8192.0, "step_tflops": 48.546676322919524, "step_time": 0.4249743766784668} +{"epoch": 0, "iter": 22123, "iter_tflops": 50.623599031037976, "iter_time": 0.40753905105590815, "loss": 0.5036220550537109, "lr": 3.2348476209988465e-06, "seqlen": 8192.0, "step_tflops": 54.68181110052659, "step_time": 0.37729352951049805} +{"epoch": 0, "iter": 22124, "iter_tflops": 50.805053877375094, "iter_time": 0.4060834884643555, "loss": 0.40567299723625183, "lr": 3.205673517950716e-06, "seqlen": 8192.0, "step_tflops": 55.170435192101934, "step_time": 0.37395198059082024} +{"epoch": 0, "iter": 22125, "iter_tflops": 35.02045148894161, "iter_time": 0.589115577697754, "loss": 0.2054896354675293, "lr": 3.176615820143401e-06, "seqlen": 8192.0, "step_tflops": 37.5767614009367, "step_time": 0.5490386276245117} +{"epoch": 0, "iter": 22126, "iter_tflops": 25.60709993743954, "iter_time": 0.8056786422729492, "loss": 0.1919478178024292, "lr": 3.1476748143646437e-06, "seqlen": 8192.0, "step_tflops": 31.327206357362893, "step_time": 0.6585679321289063} +{"epoch": 0, "iter": 22127, "iter_tflops": 45.18560000656349, "iter_time": 0.45658558273315425, "loss": 0.1461445689201355, "lr": 3.118850786250495e-06, "seqlen": 8192.0, "step_tflops": 48.566208177019675, "step_time": 0.4248034648895264} +{"epoch": 0, "iter": 22128, "iter_tflops": 45.33731546015003, "iter_time": 0.4550576782226562, "loss": 0.20017914474010468, "lr": 3.0901440202824693e-06, "seqlen": 8192.0, "step_tflops": 49.269123664439775, "step_time": 0.41874285507202147} +{"epoch": 0, "iter": 22129, "iter_tflops": 22.971563973292437, "iter_time": 0.8981144485473631, "loss": 0.44724053144454956, "lr": 3.0615547997847626e-06, "seqlen": 8192.0, "step_tflops": 24.171184950616183, "step_time": 0.8535408401489258} +{"epoch": 0, "iter": 22130, "iter_tflops": 13.861730466193057, "iter_time": 1.4883490600585938, "loss": 0.3856678903102875, "lr": 3.0330834069214337e-06, "seqlen": 8192.0, "step_tflops": 17.170149729247715, "step_time": 1.2015674781799315} +{"epoch": 0, "iter": 22131, "iter_tflops": 42.5893271481467, "iter_time": 0.48441933441162105, "loss": 0.520271360874176, "lr": 3.0047301226936412e-06, "seqlen": 8192.0, "step_tflops": 46.97697967857759, "step_time": 0.43917454147338864} +{"epoch": 0, "iter": 22132, "iter_tflops": 47.67446346212927, "iter_time": 0.4327493591308593, "loss": 0.4561571478843689, "lr": 2.976495226936849e-06, "seqlen": 8192.0, "step_tflops": 51.49527609625101, "step_time": 0.4006405067443848} +{"epoch": 0, "iter": 22133, "iter_tflops": 41.27304468518815, "iter_time": 0.499868465423584, "loss": 0.09368631988763809, "lr": 2.9483789983180857e-06, "seqlen": 8192.0, "step_tflops": 45.04074269594385, "step_time": 0.45805402565002434} +{"epoch": 0, "iter": 22134, "iter_tflops": 11.018181624299853, "iter_time": 1.8724590148925782, "loss": 0.09663617610931396, "lr": 2.920381714333172e-06, "seqlen": 8192.0, "step_tflops": 14.08884063342063, "step_time": 1.464357078552246} +{"epoch": 0, "iter": 22135, "iter_tflops": 15.551883055515372, "iter_time": 1.3265977783203127, "loss": 0.14093585312366486, "lr": 2.8925036513039955e-06, "seqlen": 8192.0, "step_tflops": 18.3550867675223, "step_time": 1.123998691558838} +{"epoch": 0, "iter": 22136, "iter_tflops": 16.318062473666313, "iter_time": 1.264310241699219, "loss": 0.1030503511428833, "lr": 2.86474508437579e-06, "seqlen": 8192.0, "step_tflops": 20.4694696251661, "step_time": 1.0078958511352538} +{"epoch": 0, "iter": 22137, "iter_tflops": 19.985992754566666, "iter_time": 0.7806885375976562, "loss": 0.23610685765743256, "lr": 2.837106287514397e-06, "seqlen": 6240.0, "step_tflops": 21.755688354131678, "step_time": 0.7171841773986816} +{"epoch": 0, "iter": 22138, "iter_tflops": 24.258139816588884, "iter_time": 0.6431999969482423, "loss": 0.30706170201301575, "lr": 2.809587533503591e-06, "seqlen": 6240.0, "step_tflops": 26.105159481709002, "step_time": 0.597691635131836} +{"epoch": 0, "iter": 22139, "iter_tflops": 22.426870123835904, "iter_time": 0.6957205963134765, "loss": 0.34000176191329956, "lr": 2.7821890939423576e-06, "seqlen": 6240.0, "step_tflops": 24.256145648645525, "step_time": 0.6432528762817383} +{"epoch": 0, "iter": 22140, "iter_tflops": 23.760163840734403, "iter_time": 0.6566804656982422, "loss": 0.33936285972595215, "lr": 2.754911239242241e-06, "seqlen": 6240.0, "step_tflops": 25.51404389268506, "step_time": 0.6115391006469726} +{"epoch": 0, "iter": 22141, "iter_tflops": 11.877033370669329, "iter_time": 0.8690845413208008, "loss": 0.008038648404181004, "lr": 2.7277542386246455e-06, "seqlen": 4160.0, "step_tflops": 12.763151745150203, "step_time": 0.8087458572387696} +{"epoch": 0, "iter": 22142, "iter_tflops": 4.288322039910175, "iter_time": 2.4070361328125003, "loss": 0.013413653708994389, "lr": 2.7007183601182144e-06, "seqlen": 4160.0, "step_tflops": 5.20460335106978, "step_time": 1.9832723846435547} +{"epoch": 0, "iter": 22143, "iter_tflops": 6.169169310175606, "iter_time": 1.6731824951171874, "loss": 0.004494288004934788, "lr": 2.673803870556148e-06, "seqlen": 4160.0, "step_tflops": 7.200344891628166, "step_time": 1.4335627326965334} +{"epoch": 0, "iter": 22144, "iter_tflops": 20.85756249350629, "iter_time": 0.49488745880126955, "loss": 0.004513614811003208, "lr": 2.6470110355735884e-06, "seqlen": 4160.0, "step_tflops": 23.23474384417261, "step_time": 0.44425478363037113} +{"epoch": 0, "iter": 22145, "iter_tflops": 14.14070635045587, "iter_time": 1.086038818359375, "loss": 0.31318360567092896, "lr": 2.6203401196050065e-06, "seqlen": 6144.0, "step_tflops": 15.154682144376459, "step_time": 1.0133736801147462} +{"epoch": 0, "iter": 22146, "iter_tflops": 20.210941478112975, "iter_time": 0.7598535690307617, "loss": 0.2685845196247101, "lr": 2.5937913858815708e-06, "seqlen": 6144.0, "step_tflops": 22.31835181781549, "step_time": 0.6881043968200684} +{"epoch": 0, "iter": 22147, "iter_tflops": 23.233181854677756, "iter_time": 0.6610095901489258, "loss": 0.3157278001308441, "lr": 2.5673650964285718e-06, "seqlen": 6144.0, "step_tflops": 25.052196876671406, "step_time": 0.6130143432617188} +{"epoch": 0, "iter": 22148, "iter_tflops": 24.04064280253824, "iter_time": 0.6388080444335937, "loss": 0.32841604948043823, "lr": 2.541061512062808e-06, "seqlen": 6144.0, "step_tflops": 25.846439710027035, "step_time": 0.5941768455505372} +{"epoch": 0, "iter": 22149, "iter_tflops": 33.492566119873366, "iter_time": 0.5665360412597655, "loss": 0.10071250796318054, "lr": 2.5148808923900515e-06, "seqlen": 7552.0, "step_tflops": 37.27307750713327, "step_time": 0.5090737628936768} +{"epoch": 0, "iter": 22150, "iter_tflops": 34.02642537509196, "iter_time": 0.5576473464965821, "loss": 0.0886346697807312, "lr": 2.4888234958024333e-06, "seqlen": 7552.0, "step_tflops": 38.51327230083235, "step_time": 0.49268069648742674} +{"epoch": 0, "iter": 22151, "iter_tflops": 42.13817409670559, "iter_time": 0.4502982444763184, "loss": 0.09674374759197235, "lr": 2.4628895794759493e-06, "seqlen": 7552.0, "step_tflops": 46.26488415039869, "step_time": 0.41013278579711915} +{"epoch": 0, "iter": 22152, "iter_tflops": 39.12627360508275, "iter_time": 0.48496174240112305, "loss": 0.08305132389068604, "lr": 2.437079399367875e-06, "seqlen": 7552.0, "step_tflops": 43.003329382650385, "step_time": 0.44123899459838867} +{"epoch": 0, "iter": 22153, "iter_tflops": 2.703846831135461, "iter_time": 0.6813653335571289, "loss": 0.00764064583927393, "lr": 2.4113932102142765e-06, "seqlen": 752.0, "step_tflops": 2.9842072348031734, "step_time": 0.6173523998260498} +{"epoch": 0, "iter": 22154, "iter_tflops": 3.0760691440646215, "iter_time": 0.5989161529541016, "loss": 0.20823100209236145, "lr": 2.3858312655274643e-06, "seqlen": 752.0, "step_tflops": 3.3843486826317593, "step_time": 0.5443610191345214} +{"epoch": 0, "iter": 22155, "iter_tflops": 3.627421311966291, "iter_time": 0.5078835182189941, "loss": 0.33377617597579956, "lr": 2.360393817593519e-06, "seqlen": 752.0, "step_tflops": 3.981965255992847, "step_time": 0.4626628761291504} +{"epoch": 0, "iter": 22156, "iter_tflops": 3.57315096172219, "iter_time": 0.5155974426269532, "loss": 0.5138541460037231, "lr": 2.335081117469777e-06, "seqlen": 752.0, "step_tflops": 3.9047140560879736, "step_time": 0.4718162384033203} +{"epoch": 0, "iter": 22157, "iter_tflops": 22.893623945880712, "iter_time": 0.9011720275878907, "loss": 0.4960174560546875, "lr": 2.3098934149823686e-06, "seqlen": 8192.0, "step_tflops": 24.87848302012392, "step_time": 0.8292745780944825} +{"epoch": 0, "iter": 22158, "iter_tflops": 22.319400373005905, "iter_time": 0.9243569793701173, "loss": 0.443986177444458, "lr": 2.284830958723755e-06, "seqlen": 8192.0, "step_tflops": 26.733099614289372, "step_time": 0.7717434120178223} +{"epoch": 0, "iter": 22159, "iter_tflops": 41.770039468850904, "iter_time": 0.49392085266113284, "loss": 0.35513222217559814, "lr": 2.2598939960502528e-06, "seqlen": 8192.0, "step_tflops": 45.02877282233418, "step_time": 0.4581757888793946} +{"epoch": 0, "iter": 22160, "iter_tflops": 43.17993767941519, "iter_time": 0.47779349899291995, "loss": 0.4505140781402588, "lr": 2.235082773079624e-06, "seqlen": 8192.0, "step_tflops": 46.30562735004663, "step_time": 0.445541820526123} +{"epoch": 0, "iter": 22161, "iter_tflops": 36.060829937459005, "iter_time": 0.5721192092895508, "loss": 0.031531646847724915, "lr": 2.2103975346886172e-06, "seqlen": 8192.0, "step_tflops": 38.67777167379055, "step_time": 0.5334095687866212} +{"epoch": 0, "iter": 22162, "iter_tflops": 27.346017519492403, "iter_time": 0.7544459991455079, "loss": 0.03950831666588783, "lr": 2.1858385245105744e-06, "seqlen": 8192.0, "step_tflops": 31.162090786536766, "step_time": 0.6620574226379394} +{"epoch": 0, "iter": 22163, "iter_tflops": 53.20935589430314, "iter_time": 0.38773432159423826, "loss": 0.016219040378928185, "lr": 2.1614059849330054e-06, "seqlen": 8192.0, "step_tflops": 58.63723650338435, "step_time": 0.35184286880493165} +{"epoch": 0, "iter": 22164, "iter_tflops": 50.72849628290364, "iter_time": 0.4066963348388672, "loss": 0.03384095057845116, "lr": 2.1371001570952187e-06, "seqlen": 8192.0, "step_tflops": 55.127597810113244, "step_time": 0.3742425632476807} +{"epoch": 0, "iter": 22165, "iter_tflops": 30.3088707229085, "iter_time": 0.6806948928833007, "loss": 0.4676190912723541, "lr": 2.1129212808859184e-06, "seqlen": 8192.0, "step_tflops": 32.250640904356466, "step_time": 0.6397111167907714} +{"epoch": 0, "iter": 22166, "iter_tflops": 28.37295666696076, "iter_time": 0.7271393585205077, "loss": 0.4270329475402832, "lr": 2.088869594940847e-06, "seqlen": 8192.0, "step_tflops": 34.73020142060658, "step_time": 0.5940389823913573} +{"epoch": 0, "iter": 22167, "iter_tflops": 37.236283727984194, "iter_time": 0.5540588760375976, "loss": 0.3523358702659607, "lr": 2.0649453366404438e-06, "seqlen": 8192.0, "step_tflops": 40.55828105226013, "step_time": 0.5086777095794678} +{"epoch": 0, "iter": 22168, "iter_tflops": 36.20186787679476, "iter_time": 0.5698903045654297, "loss": 0.49719518423080444, "lr": 2.041148742107471e-06, "seqlen": 8192.0, "step_tflops": 39.695853488995006, "step_time": 0.5197291831970214} +{"epoch": 0, "iter": 22169, "iter_tflops": 21.803093102972838, "iter_time": 0.94624617767334, "loss": 0.1989043802022934, "lr": 2.017480046204718e-06, "seqlen": 8192.0, "step_tflops": 23.378410025464074, "step_time": 0.8824848861694335} +{"epoch": 0, "iter": 22170, "iter_tflops": 21.254428700937527, "iter_time": 0.9706726913452148, "loss": 0.2621697187423706, "lr": 1.9939394825326494e-06, "seqlen": 8192.0, "step_tflops": 28.630573402445243, "step_time": 0.7205965881347656} +{"epoch": 0, "iter": 22171, "iter_tflops": 41.2607622597748, "iter_time": 0.5000172653198243, "loss": 0.29950207471847534, "lr": 1.9705272834271337e-06, "seqlen": 8192.0, "step_tflops": 45.170901963815766, "step_time": 0.4567341499328613} +{"epoch": 0, "iter": 22172, "iter_tflops": 42.28990309950919, "iter_time": 0.48784915542602536, "loss": 0.20454838871955872, "lr": 1.9472436799571145e-06, "seqlen": 8192.0, "step_tflops": 46.100001872047955, "step_time": 0.4475291252136231} +{"epoch": 0, "iter": 22173, "iter_tflops": 35.01166372267785, "iter_time": 0.589263442993164, "loss": 0.10992120206356049, "lr": 1.9240889019223683e-06, "seqlen": 8192.0, "step_tflops": 39.03367119215261, "step_time": 0.528546070098877} +{"epoch": 0, "iter": 22174, "iter_tflops": 35.94864001954082, "iter_time": 0.5739047012329102, "loss": 0.07417213916778564, "lr": 1.901063177851201e-06, "seqlen": 8192.0, "step_tflops": 40.5167598748914, "step_time": 0.5091989974975586} +{"epoch": 0, "iter": 22175, "iter_tflops": 41.671562945595404, "iter_time": 0.49508806610107414, "loss": 0.0938768982887268, "lr": 1.8781667349982096e-06, "seqlen": 8192.0, "step_tflops": 45.526597887375765, "step_time": 0.4531657199859619} +{"epoch": 0, "iter": 22176, "iter_tflops": 45.10970157507438, "iter_time": 0.45735380172729484, "loss": 0.058916348963975906, "lr": 1.8553997993420495e-06, "seqlen": 8192.0, "step_tflops": 49.263800149249185, "step_time": 0.4187881050109863} +{"epoch": 0, "iter": 22177, "iter_tflops": 19.97595940226155, "iter_time": 1.032796127319336, "loss": 0.12492009252309799, "lr": 1.8327625955831763e-06, "seqlen": 8192.0, "step_tflops": 21.51836762267754, "step_time": 0.9587666625976563} +{"epoch": 0, "iter": 22178, "iter_tflops": 36.40292823588698, "iter_time": 0.566742691040039, "loss": 0.17155975103378296, "lr": 1.8102553471416584e-06, "seqlen": 8192.0, "step_tflops": 45.46167719454222, "step_time": 0.4538128547668457} +{"epoch": 0, "iter": 22179, "iter_tflops": 53.79331762407856, "iter_time": 0.3835252113342285, "loss": 0.1873379796743393, "lr": 1.787878276154946e-06, "seqlen": 8192.0, "step_tflops": 58.54774240707722, "step_time": 0.35238068389892574} +{"epoch": 0, "iter": 22180, "iter_tflops": 51.06373768536761, "iter_time": 0.40402630996704103, "loss": 0.17386320233345032, "lr": 1.7656316034757024e-06, "seqlen": 8192.0, "step_tflops": 55.475502936733314, "step_time": 0.37189556503295895} +{"epoch": 0, "iter": 22181, "iter_tflops": 32.59710810897571, "iter_time": 0.6329117736816406, "loss": 0.5217831134796143, "lr": 1.7435155486695982e-06, "seqlen": 8192.0, "step_tflops": 34.74421412128856, "step_time": 0.5937994003295898} +{"epoch": 0, "iter": 22182, "iter_tflops": 20.77825256069516, "iter_time": 0.9929176406860352, "loss": 0.5497341752052307, "lr": 1.721530330013173e-06, "seqlen": 8192.0, "step_tflops": 23.178902417302325, "step_time": 0.8900806922912597} +{"epoch": 0, "iter": 22183, "iter_tflops": 43.73502546079978, "iter_time": 0.47172931289672854, "loss": 0.5870184302330017, "lr": 1.699676164491652e-06, "seqlen": 8192.0, "step_tflops": 47.20645117837047, "step_time": 0.4370397052764892} +{"epoch": 0, "iter": 22184, "iter_tflops": 44.819636965365476, "iter_time": 0.4603137130737305, "loss": 0.5012447834014893, "lr": 1.677953267796833e-06, "seqlen": 8192.0, "step_tflops": 48.4766265001736, "step_time": 0.4255884742736817} +{"epoch": 0, "iter": 22185, "iter_tflops": 39.74593386229062, "iter_time": 0.5190743179321289, "loss": 0.3986457586288452, "lr": 1.6563618543249331e-06, "seqlen": 8192.0, "step_tflops": 43.409080372386406, "step_time": 0.4752713794708252} +{"epoch": 0, "iter": 22186, "iter_tflops": 37.0318745419693, "iter_time": 0.5571171798706055, "loss": 0.3136623203754425, "lr": 1.6349021371744833e-06, "seqlen": 8192.0, "step_tflops": 41.26273211321903, "step_time": 0.4999933948516846} +{"epoch": 0, "iter": 22187, "iter_tflops": 38.484870243244906, "iter_time": 0.5360832290649414, "loss": 0.4496181309223175, "lr": 1.6135743281442333e-06, "seqlen": 8192.0, "step_tflops": 41.862075266552644, "step_time": 0.49283494377136233} +{"epoch": 0, "iter": 22188, "iter_tflops": 37.54164376994381, "iter_time": 0.5495522155761718, "loss": 0.36170414090156555, "lr": 1.5923786377310433e-06, "seqlen": 8192.0, "step_tflops": 40.8161484046267, "step_time": 0.505463996887207} +{"epoch": 0, "iter": 22189, "iter_tflops": 20.580896485045454, "iter_time": 1.002439010620117, "loss": 0.12930360436439514, "lr": 1.5713152751278265e-06, "seqlen": 8192.0, "step_tflops": 21.711970886867714, "step_time": 0.9502174453735351} +{"epoch": 0, "iter": 22190, "iter_tflops": 24.884072595330597, "iter_time": 0.8290883026123046, "loss": 0.14020729064941406, "lr": 1.5503844482214618e-06, "seqlen": 8192.0, "step_tflops": 28.016137763195548, "step_time": 0.7364003448486329} +{"epoch": 0, "iter": 22191, "iter_tflops": 38.088116483324306, "iter_time": 0.5416674652099609, "loss": 0.10055533051490784, "lr": 1.529586363590767e-06, "seqlen": 8192.0, "step_tflops": 41.912212916141094, "step_time": 0.49224538803100587} +{"epoch": 0, "iter": 22192, "iter_tflops": 38.53275877524024, "iter_time": 0.5354169845581055, "loss": 0.1509246528148651, "lr": 1.508921226504434e-06, "seqlen": 8192.0, "step_tflops": 42.364846969756336, "step_time": 0.48698614501953125} +{"epoch": 0, "iter": 22193, "iter_tflops": 23.03546851526808, "iter_time": 0.8956229171752929, "loss": 0.18825413286685944, "lr": 1.4883892409190292e-06, "seqlen": 8192.0, "step_tflops": 24.676941654372076, "step_time": 0.8360474243164061} +{"epoch": 0, "iter": 22194, "iter_tflops": 17.98527933118903, "iter_time": 1.1471099853515625, "loss": 0.18933215737342834, "lr": 1.467990609476959e-06, "seqlen": 8192.0, "step_tflops": 22.381162600859323, "step_time": 0.9218061580657959} +{"epoch": 0, "iter": 22195, "iter_tflops": 47.05664414025715, "iter_time": 0.4384310417175292, "loss": 0.17456746101379395, "lr": 1.4477255335044775e-06, "seqlen": 8192.0, "step_tflops": 51.017370835162666, "step_time": 0.4043935070037842} +{"epoch": 0, "iter": 22196, "iter_tflops": 48.49830099673665, "iter_time": 0.42539827346801756, "loss": 0.1727989763021469, "lr": 1.4275942130097097e-06, "seqlen": 8192.0, "step_tflops": 52.33089672015217, "step_time": 0.39424307250976565} +{"epoch": 0, "iter": 22197, "iter_tflops": 30.391975467130855, "iter_time": 0.6788335800170899, "loss": 0.2129313051700592, "lr": 1.4075968466806533e-06, "seqlen": 8192.0, "step_tflops": 32.344763275513806, "step_time": 0.6378495750427247} +{"epoch": 0, "iter": 22198, "iter_tflops": 8.224137116207901, "iter_time": 2.508602813720703, "loss": 0.23096036911010742, "lr": 1.3877336318832468e-06, "seqlen": 8192.0, "step_tflops": 9.539209016478717, "step_time": 2.162767738342285} +{"epoch": 0, "iter": 22199, "iter_tflops": 12.006860526012227, "iter_time": 1.7182754364013673, "loss": 0.3765299916267395, "lr": 1.368004764659398e-06, "seqlen": 8192.0, "step_tflops": 14.822256566994406, "step_time": 1.3918996353149413} +{"epoch": 0, "iter": 22200, "iter_tflops": 34.907237505253796, "iter_time": 0.5910262451171875, "loss": 0.22982759773731232, "lr": 1.3484104397250652e-06, "seqlen": 8192.0, "step_tflops": 43.651187167136136, "step_time": 0.47263533592224116} +{"epoch": 0, "iter": 22201, "iter_tflops": 14.36339922107238, "iter_time": 1.1090971832275391, "loss": 0.283785343170166, "lr": 1.3289508504683206e-06, "seqlen": 6368.0, "step_tflops": 15.013071682266386, "step_time": 1.0611023483276367} +{"epoch": 0, "iter": 22202, "iter_tflops": 13.252509709344517, "iter_time": 1.2020670776367188, "loss": 0.3609052896499634, "lr": 1.3096261889474587e-06, "seqlen": 6368.0, "step_tflops": 16.95879715909855, "step_time": 0.9393594055175782} +{"epoch": 0, "iter": 22203, "iter_tflops": 24.39952387865568, "iter_time": 0.6528982162475586, "loss": 0.25873246788978577, "lr": 1.2904366458890837e-06, "seqlen": 6368.0, "step_tflops": 26.247519007206144, "step_time": 0.6069299583435059} +{"epoch": 0, "iter": 22204, "iter_tflops": 23.49624195656029, "iter_time": 0.6779980239868164, "loss": 0.2887028455734253, "lr": 1.271382410686237e-06, "seqlen": 6368.0, "step_tflops": 25.3009776277692, "step_time": 0.629635971069336} +{"epoch": 0, "iter": 22205, "iter_tflops": 22.992477655629198, "iter_time": 0.5934008712768555, "loss": 0.050492655485868454, "lr": 1.2524636713965316e-06, "seqlen": 5472.0, "step_tflops": 25.6437870219528, "step_time": 0.5320491962432861} +{"epoch": 0, "iter": 22206, "iter_tflops": 28.09873356702332, "iter_time": 0.485564811706543, "loss": 0.0476682186126709, "lr": 1.233680614740283e-06, "seqlen": 5472.0, "step_tflops": 31.37670664057537, "step_time": 0.4348371047973633} +{"epoch": 0, "iter": 22207, "iter_tflops": 28.30990393258386, "iter_time": 0.4819428672790528, "loss": 0.030185146257281303, "lr": 1.2150334260986818e-06, "seqlen": 5472.0, "step_tflops": 31.239003236160958, "step_time": 0.43675389289855954} +{"epoch": 0, "iter": 22208, "iter_tflops": 26.51608331089111, "iter_time": 0.5145464401245118, "loss": 0.0591689758002758, "lr": 1.1965222895119443e-06, "seqlen": 5472.0, "step_tflops": 29.33501126399145, "step_time": 0.4651014499664306} +{"epoch": 0, "iter": 22209, "iter_tflops": 33.24740647716629, "iter_time": 0.6205324172973633, "loss": 0.26261284947395325, "lr": 1.1781473876775246e-06, "seqlen": 8192.0, "step_tflops": 36.957968436939844, "step_time": 0.5582312660217285} +{"epoch": 0, "iter": 22210, "iter_tflops": 36.96029485698012, "iter_time": 0.5581961288452149, "loss": 0.4097942113876343, "lr": 1.1599089019482768e-06, "seqlen": 8192.0, "step_tflops": 40.49374444393508, "step_time": 0.509488410949707} +{"epoch": 0, "iter": 22211, "iter_tflops": 42.53685834597009, "iter_time": 0.48501686096191404, "loss": 0.38703998923301697, "lr": 1.1418070123306989e-06, "seqlen": 8192.0, "step_tflops": 46.30907487401632, "step_time": 0.4455086517333985} +{"epoch": 0, "iter": 22212, "iter_tflops": 40.54791577180045, "iter_time": 0.5088077430725098, "loss": 0.39106085896492004, "lr": 1.123841897483131e-06, "seqlen": 8192.0, "step_tflops": 44.21662499879821, "step_time": 0.4665913219451904} +{"epoch": 0, "iter": 22213, "iter_tflops": 22.23913232539007, "iter_time": 0.9276932754516601, "loss": 0.5529699325561523, "lr": 1.1060137347140008e-06, "seqlen": 8192.0, "step_tflops": 23.660650726672678, "step_time": 0.8719579925537109} +{"epoch": 0, "iter": 22214, "iter_tflops": 7.684783060908402, "iter_time": 2.6846683044433592, "loss": 0.5120484828948975, "lr": 1.088322699980081e-06, "seqlen": 8192.0, "step_tflops": 9.824313003606942, "step_time": 2.1000036849975583} +{"epoch": 0, "iter": 22215, "iter_tflops": 12.169173776897427, "iter_time": 1.6953569641113282, "loss": 0.4092845618724823, "lr": 1.0707689678847398e-06, "seqlen": 8192.0, "step_tflops": 15.125271335927842, "step_time": 1.3640147705078125} +{"epoch": 0, "iter": 22216, "iter_tflops": 35.05850132978006, "iter_time": 0.5884761962890626, "loss": 0.33692824840545654, "lr": 1.0533527116762298e-06, "seqlen": 8192.0, "step_tflops": 38.00802265957051, "step_time": 0.5428089141845702} +{"epoch": 0, "iter": 22217, "iter_tflops": 12.717894684301363, "iter_time": 1.3170931701660153, "loss": 0.4895271360874176, "lr": 1.0360741032459636e-06, "seqlen": 6688.0, "step_tflops": 13.513202292403223, "step_time": 1.2395768127441404} +{"epoch": 0, "iter": 22218, "iter_tflops": 12.120258653377595, "iter_time": 1.3820375213623048, "loss": 0.36103618144989014, "lr": 1.0189333131268357e-06, "seqlen": 6688.0, "step_tflops": 14.140659468567604, "step_time": 1.1845736236572266} +{"epoch": 0, "iter": 22219, "iter_tflops": 22.935260595356247, "iter_time": 0.7303449707031251, "loss": 0.3895983397960663, "lr": 1.0019305104915205e-06, "seqlen": 6688.0, "step_tflops": 24.815041312916247, "step_time": 0.6750201225280762} +{"epoch": 0, "iter": 22220, "iter_tflops": 25.927278761608196, "iter_time": 0.6460628738403321, "loss": 0.29691585898399353, "lr": 9.850658631508198e-07, "seqlen": 6688.0, "step_tflops": 27.878331088407887, "step_time": 0.6008484573364258} +{"epoch": 0, "iter": 22221, "iter_tflops": 16.874946799682508, "iter_time": 1.222587173461914, "loss": 0.48273807764053345, "lr": 9.683395375519911e-07, "seqlen": 8192.0, "step_tflops": 18.197275154039882, "step_time": 1.133746307373047} +{"epoch": 0, "iter": 22222, "iter_tflops": 25.605614305244412, "iter_time": 0.8057253875732422, "loss": 0.43965238332748413, "lr": 9.517516987771157e-07, "seqlen": 8192.0, "step_tflops": 32.65514649549413, "step_time": 0.6317868919372558} +{"epoch": 0, "iter": 22223, "iter_tflops": 42.52336263071818, "iter_time": 0.48517079162597654, "loss": 0.5033233761787415, "lr": 9.353025105414669e-07, "seqlen": 8192.0, "step_tflops": 45.70850434296536, "step_time": 0.45136225318908685} +{"epoch": 0, "iter": 22224, "iter_tflops": 46.44502152862508, "iter_time": 0.4442046279907227, "loss": 0.44218909740448, "lr": 9.189921351918889e-07, "seqlen": 8192.0, "step_tflops": 50.18994523986923, "step_time": 0.4110602912902832} +{"epoch": 0, "iter": 22225, "iter_tflops": 26.788254912907924, "iter_time": 0.770154441833496, "loss": 0.561758816242218, "lr": 9.028207337052052e-07, "seqlen": 8192.0, "step_tflops": 28.395679739151376, "step_time": 0.7265574798583985} +{"epoch": 0, "iter": 22226, "iter_tflops": 23.034960883510603, "iter_time": 0.8956426544189453, "loss": 0.553843080997467, "lr": 8.867884656866181e-07, "seqlen": 8192.0, "step_tflops": 28.91416044109466, "step_time": 0.713529052734375} +{"epoch": 0, "iter": 22227, "iter_tflops": 43.457644530299255, "iter_time": 0.47474026107788087, "loss": 0.3507198095321655, "lr": 8.708954893681421e-07, "seqlen": 8192.0, "step_tflops": 46.860547305686104, "step_time": 0.44026573944091796} +{"epoch": 0, "iter": 22228, "iter_tflops": 45.158215961063, "iter_time": 0.45686245727539065, "loss": 0.513821542263031, "lr": 8.551419616070321e-07, "seqlen": 8192.0, "step_tflops": 49.001106792154125, "step_time": 0.42103321456909176} +{"epoch": 0, "iter": 22229, "iter_tflops": 41.56831729130908, "iter_time": 0.4963177452087402, "loss": 0.25443652272224426, "lr": 8.395280378842551e-07, "seqlen": 8192.0, "step_tflops": 45.38631883879432, "step_time": 0.4545663547515869} +{"epoch": 0, "iter": 22230, "iter_tflops": 36.29644206968201, "iter_time": 0.5684053955078125, "loss": 0.28871405124664307, "lr": 8.240538723029295e-07, "seqlen": 8192.0, "step_tflops": 40.04053019644844, "step_time": 0.5152552528381348} +{"epoch": 0, "iter": 22231, "iter_tflops": 37.1745242945262, "iter_time": 0.5549793548583984, "loss": 0.2549925744533539, "lr": 8.087196175868206e-07, "seqlen": 8192.0, "step_tflops": 40.57609403380501, "step_time": 0.5084543991088867} +{"epoch": 0, "iter": 22232, "iter_tflops": 34.55683136850293, "iter_time": 0.5970192489624024, "loss": 0.23132053017616272, "lr": 7.935254250788365e-07, "seqlen": 8192.0, "step_tflops": 37.296921470996885, "step_time": 0.5531580810546874} +{"epoch": 0, "iter": 22233, "iter_tflops": 21.379907682181745, "iter_time": 0.9649757995605469, "loss": 0.05745808780193329, "lr": 7.784714447395197e-07, "seqlen": 8192.0, "step_tflops": 23.100908873976103, "step_time": 0.8930857925415039} +{"epoch": 0, "iter": 22234, "iter_tflops": 24.325344484880116, "iter_time": 0.8481316070556641, "loss": 0.04702257364988327, "lr": 7.635578251455827e-07, "seqlen": 8192.0, "step_tflops": 30.021456127608253, "step_time": 0.6872116203308105} +{"epoch": 0, "iter": 22235, "iter_tflops": 48.59460638641928, "iter_time": 0.4245552139282226, "loss": 0.03363839164376259, "lr": 7.487847134884229e-07, "seqlen": 8192.0, "step_tflops": 52.99690002127876, "step_time": 0.3892886848449707} +{"epoch": 0, "iter": 22236, "iter_tflops": 52.85587411676783, "iter_time": 0.39032735443115235, "loss": 0.06487151980400085, "lr": 7.341522555726971e-07, "seqlen": 8192.0, "step_tflops": 57.48824995652165, "step_time": 0.35887496185302736} +{"epoch": 0, "iter": 22237, "iter_tflops": 24.59159548323644, "iter_time": 0.6694706420898437, "loss": 0.004610605537891388, "lr": 7.196605958148505e-07, "seqlen": 6576.0, "step_tflops": 26.103645746910196, "step_time": 0.6306916427612306} +{"epoch": 0, "iter": 22238, "iter_tflops": 12.544323982006203, "iter_time": 1.3124143829345702, "loss": 0.008012514561414719, "lr": 7.053098772417188e-07, "seqlen": 6576.0, "step_tflops": 14.7459591967338, "step_time": 1.1164652633666992} +{"epoch": 0, "iter": 22239, "iter_tflops": 34.906171467925944, "iter_time": 0.4716458587646485, "loss": 0.03455518186092377, "lr": 6.911002414890983e-07, "seqlen": 6576.0, "step_tflops": 38.65232211688877, "step_time": 0.4259343376159668} +{"epoch": 0, "iter": 22240, "iter_tflops": 35.94527275293218, "iter_time": 0.45801158142089843, "loss": 0.003181551117449999, "lr": 6.770318288003557e-07, "seqlen": 6576.0, "step_tflops": 39.8491558616363, "step_time": 0.41314178085327147} +{"epoch": 0, "iter": 22241, "iter_tflops": 13.386187165286847, "iter_time": 1.5412225494384766, "loss": 0.4897307753562927, "lr": 6.631047780250482e-07, "seqlen": 8192.0, "step_tflops": 13.981617633044609, "step_time": 1.4755870208740234} +{"epoch": 0, "iter": 22242, "iter_tflops": 17.791242915149326, "iter_time": 1.1596206970214844, "loss": 0.5325692892074585, "lr": 6.493192266175418e-07, "seqlen": 8192.0, "step_tflops": 22.966969219859543, "step_time": 0.8982941246032715} +{"epoch": 0, "iter": 22243, "iter_tflops": 45.07680061080807, "iter_time": 0.45768761825561527, "loss": 0.48131516575813293, "lr": 6.356753106356666e-07, "seqlen": 8192.0, "step_tflops": 48.6035489124106, "step_time": 0.4244771003723144} +{"epoch": 0, "iter": 22244, "iter_tflops": 39.67858721414068, "iter_time": 0.5199553451538085, "loss": 0.3532560467720032, "lr": 6.221731647393608e-07, "seqlen": 8192.0, "step_tflops": 42.47926769037258, "step_time": 0.48567441558837887} +{"epoch": 0, "iter": 22245, "iter_tflops": 31.237265133712317, "iter_time": 0.6604641418457031, "loss": 0.34565311670303345, "lr": 6.088129221893584e-07, "seqlen": 8192.0, "step_tflops": 33.435414027388, "step_time": 0.6170431594848633} +{"epoch": 0, "iter": 22246, "iter_tflops": 13.11644059900819, "iter_time": 1.5729186096191405, "loss": 0.41430988907814026, "lr": 5.95594714845854e-07, "seqlen": 8192.0, "step_tflops": 18.04442979269817, "step_time": 1.1433497066497802} +{"epoch": 0, "iter": 22247, "iter_tflops": 41.039937871875615, "iter_time": 0.5027077178955077, "loss": 0.34170278906822205, "lr": 5.825186731672217e-07, "seqlen": 8192.0, "step_tflops": 44.88398140245091, "step_time": 0.45965382003784183} +{"epoch": 0, "iter": 22248, "iter_tflops": 42.28095639354592, "iter_time": 0.4879523849487305, "loss": 0.411504328250885, "lr": 5.69584926208711e-07, "seqlen": 8192.0, "step_tflops": 46.203378404326784, "step_time": 0.44652781295776367} +{"epoch": 0, "iter": 22249, "iter_tflops": 24.86014564930366, "iter_time": 0.8298862686157227, "loss": 0.4397263526916504, "lr": 5.567936016211856e-07, "seqlen": 8192.0, "step_tflops": 26.943830492296183, "step_time": 0.7657075157165526} +{"epoch": 0, "iter": 22250, "iter_tflops": 36.38732144412744, "iter_time": 0.5669857711791992, "loss": 0.4686845541000366, "lr": 5.441448256498544e-07, "seqlen": 8192.0, "step_tflops": 39.852284600040285, "step_time": 0.5176891040802001} +{"epoch": 0, "iter": 22251, "iter_tflops": 38.81587534935066, "iter_time": 0.5315117416381836, "loss": 0.38006266951560974, "lr": 5.316387231330288e-07, "seqlen": 8192.0, "step_tflops": 42.406930353030745, "step_time": 0.48650287437438966} +{"epoch": 0, "iter": 22252, "iter_tflops": 38.40076952332496, "iter_time": 0.5372572937011719, "loss": 0.3930605947971344, "lr": 5.192754175008918e-07, "seqlen": 8192.0, "step_tflops": 41.96388225210673, "step_time": 0.4916392955780029} +{"epoch": 0, "iter": 22253, "iter_tflops": 32.15655365809509, "iter_time": 0.6415828552246094, "loss": 0.5226633548736572, "lr": 5.070550307742783e-07, "seqlen": 8192.0, "step_tflops": 35.50629066286684, "step_time": 0.5810545997619628} +{"epoch": 0, "iter": 22254, "iter_tflops": 36.7559239768119, "iter_time": 0.5612998199462891, "loss": 0.37945008277893066, "lr": 4.949776835634751e-07, "seqlen": 8192.0, "step_tflops": 40.023125742004595, "step_time": 0.5154793167114259} +{"epoch": 0, "iter": 22255, "iter_tflops": 38.54369716559807, "iter_time": 0.5352650375366211, "loss": 0.4131380021572113, "lr": 4.830434950670182e-07, "seqlen": 8192.0, "step_tflops": 41.69880537715244, "step_time": 0.4947646179199218} +{"epoch": 0, "iter": 22256, "iter_tflops": 35.147944256572785, "iter_time": 0.5869786682128906, "loss": 0.4431537091732025, "lr": 4.7125258307053385e-07, "seqlen": 8192.0, "step_tflops": 38.319588372600386, "step_time": 0.5383954887390137} +{"epoch": 0, "iter": 22257, "iter_tflops": 34.519527652005216, "iter_time": 0.5976644210815429, "loss": 0.100074402987957, "lr": 4.5960506394555956e-07, "seqlen": 8192.0, "step_tflops": 38.30647477582867, "step_time": 0.5385797996520996} +{"epoch": 0, "iter": 22258, "iter_tflops": 39.2236176572913, "iter_time": 0.5259865036010742, "loss": 0.12110115587711334, "lr": 4.4810105264841304e-07, "seqlen": 8192.0, "step_tflops": 43.635134950725075, "step_time": 0.4728092060089111} +{"epoch": 0, "iter": 22259, "iter_tflops": 42.761515935211115, "iter_time": 0.4824687118530273, "loss": 0.11406619101762772, "lr": 4.367406627190351e-07, "seqlen": 8192.0, "step_tflops": 46.85946953768617, "step_time": 0.4402758655548096} +{"epoch": 0, "iter": 22260, "iter_tflops": 41.69279703006818, "iter_time": 0.4948359184265137, "loss": 0.10384981334209442, "lr": 4.2552400627989043e-07, "seqlen": 8192.0, "step_tflops": 45.64343783924958, "step_time": 0.45200568771362304} +{"epoch": 0, "iter": 22261, "iter_tflops": 20.585560647285288, "iter_time": 1.0022118835449219, "loss": 0.06491705030202866, "lr": 4.1445119403485165e-07, "seqlen": 8192.0, "step_tflops": 22.21764457246041, "step_time": 0.9285904922485352} +{"epoch": 0, "iter": 22262, "iter_tflops": 18.62652156154818, "iter_time": 1.1076192321777343, "loss": 0.07778582721948624, "lr": 4.0352233526810054e-07, "seqlen": 8192.0, "step_tflops": 21.747241626011178, "step_time": 0.9486763362884523} +{"epoch": 0, "iter": 22263, "iter_tflops": 41.84446071058302, "iter_time": 0.4930424041748047, "loss": 0.062340639531612396, "lr": 3.92737537843067e-07, "seqlen": 8192.0, "step_tflops": 45.984529104653205, "step_time": 0.448652925491333} +{"epoch": 0, "iter": 22264, "iter_tflops": 40.822636063667915, "iter_time": 0.5053836669921875, "loss": 0.05577007681131363, "lr": 3.8209690820134145e-07, "seqlen": 8192.0, "step_tflops": 44.81294557696987, "step_time": 0.46038244628906244} +{"epoch": 0, "iter": 22265, "iter_tflops": 22.012568606452188, "iter_time": 0.9372415313720703, "loss": 0.33973369002342224, "lr": 3.716005513616494e-07, "seqlen": 8192.0, "step_tflops": 23.61559422729014, "step_time": 0.8736216125488282} +{"epoch": 0, "iter": 22266, "iter_tflops": 15.059602109686319, "iter_time": 1.3699627227783204, "loss": 0.36073601245880127, "lr": 3.612485709187885e-07, "seqlen": 8192.0, "step_tflops": 20.77122679009015, "step_time": 0.993253490447998} +{"epoch": 0, "iter": 22267, "iter_tflops": 40.16168717547423, "iter_time": 0.5137008666992188, "loss": 0.2781315743923187, "lr": 3.5104106904263134e-07, "seqlen": 8192.0, "step_tflops": 44.09878450785308, "step_time": 0.4678381443023682} +{"epoch": 0, "iter": 22268, "iter_tflops": 37.55983578761153, "iter_time": 0.5492860412597655, "loss": 0.4352719187736511, "lr": 3.409781464770978e-07, "seqlen": 8192.0, "step_tflops": 40.95469590634501, "step_time": 0.5037540397644042} +{"epoch": 0, "iter": 22269, "iter_tflops": 31.480956437797893, "iter_time": 0.6553515472412109, "loss": 0.5029865503311157, "lr": 3.310599025391725e-07, "seqlen": 8192.0, "step_tflops": 35.02712642277128, "step_time": 0.5890033130645752} +{"epoch": 0, "iter": 22270, "iter_tflops": 39.00183219338171, "iter_time": 0.5289775466918946, "loss": 0.4763965308666229, "lr": 3.21286435117919e-07, "seqlen": 8192.0, "step_tflops": 43.208418681682105, "step_time": 0.4774785594940185} +{"epoch": 0, "iter": 22271, "iter_tflops": 43.206940400363834, "iter_time": 0.4774948959350586, "loss": 0.472515344619751, "lr": 3.1165784067351213e-07, "seqlen": 8192.0, "step_tflops": 47.35235143443981, "step_time": 0.43569311523437504} +{"epoch": 0, "iter": 22272, "iter_tflops": 38.37800139355623, "iter_time": 0.5375760269165039, "loss": 0.33697575330734253, "lr": 3.021742142362971e-07, "seqlen": 8192.0, "step_tflops": 41.84253511644688, "step_time": 0.49306509399414056} +{"epoch": 0, "iter": 22273, "iter_tflops": 29.857959189585895, "iter_time": 0.6909746704101563, "loss": 0.49021172523498535, "lr": 2.928356494058337e-07, "seqlen": 8192.0, "step_tflops": 32.29144317612219, "step_time": 0.6389028015136718} +{"epoch": 0, "iter": 22274, "iter_tflops": 9.46662562845777, "iter_time": 2.179350311279297, "loss": 0.29965680837631226, "lr": 2.8364223834999034e-07, "seqlen": 8192.0, "step_tflops": 11.575998988022521, "step_time": 1.7822300720214845} +{"epoch": 0, "iter": 22275, "iter_tflops": 15.806778987981705, "iter_time": 1.3052054138183593, "loss": 0.33923983573913574, "lr": 2.74594071804018e-07, "seqlen": 8192.0, "step_tflops": 19.97953207129131, "step_time": 1.0326114463806153} +{"epoch": 0, "iter": 22276, "iter_tflops": 41.33441091854086, "iter_time": 0.4991263465881348, "loss": 0.42951786518096924, "lr": 2.6569123906967083e-07, "seqlen": 8192.0, "step_tflops": 44.54062043578909, "step_time": 0.46319726371765135} +{"epoch": 0, "iter": 22277, "iter_tflops": 20.385670755136616, "iter_time": 0.7734149627685547, "loss": 0.27756479382514954, "lr": 2.569338280143124e-07, "seqlen": 6304.0, "step_tflops": 21.53673144070895, "step_time": 0.7320787200927734} +{"epoch": 0, "iter": 22278, "iter_tflops": 13.65978700025125, "iter_time": 1.1542334289550782, "loss": 0.2562709450721741, "lr": 2.483219250700558e-07, "seqlen": 6304.0, "step_tflops": 16.181074494683042, "step_time": 0.9743841667175293} +{"epoch": 0, "iter": 22279, "iter_tflops": 24.0278624195036, "iter_time": 0.6561791687011719, "loss": 0.2733994722366333, "lr": 2.398556152329046e-07, "seqlen": 6304.0, "step_tflops": 25.91859566638245, "step_time": 0.6083116149902345} +{"epoch": 0, "iter": 22280, "iter_tflops": 25.791301837670122, "iter_time": 0.6113139572143554, "loss": 0.29031315445899963, "lr": 2.3153498206192004e-07, "seqlen": 6304.0, "step_tflops": 27.479010097338463, "step_time": 0.5737682228088379} +{"epoch": 0, "iter": 22281, "iter_tflops": 23.542723165076225, "iter_time": 0.8763257064819336, "loss": 0.3401775658130646, "lr": 2.23360107678392e-07, "seqlen": 8192.0, "step_tflops": 24.873011948128127, "step_time": 0.8294569854736329} +{"epoch": 0, "iter": 22282, "iter_tflops": 23.59536409507993, "iter_time": 0.8743706359863281, "loss": 0.3053867220878601, "lr": 2.1533107276502929e-07, "seqlen": 8192.0, "step_tflops": 26.607103593296618, "step_time": 0.7753979473114013} +{"epoch": 0, "iter": 22283, "iter_tflops": 45.09619933327574, "iter_time": 0.457490737915039, "loss": 0.45070064067840576, "lr": 2.074479565651688e-07, "seqlen": 8192.0, "step_tflops": 48.93531603281755, "step_time": 0.42159926986694335} +{"epoch": 0, "iter": 22284, "iter_tflops": 47.48678161325934, "iter_time": 0.4344597129821778, "loss": 0.2118781954050064, "lr": 1.9971083688197943e-07, "seqlen": 8192.0, "step_tflops": 51.23824866482897, "step_time": 0.4026502475738526} +{"epoch": 0, "iter": 22285, "iter_tflops": 28.30868474725027, "iter_time": 0.7287902526855469, "loss": 0.47668734192848206, "lr": 1.9211979007771274e-07, "seqlen": 8192.0, "step_tflops": 30.170348578301247, "step_time": 0.6838201904296874} +{"epoch": 0, "iter": 22286, "iter_tflops": 18.914809748406217, "iter_time": 1.090737564086914, "loss": 0.5235428214073181, "lr": 1.8467489107293513e-07, "seqlen": 8192.0, "step_tflops": 21.244713763748134, "step_time": 0.9711165676116944} +{"epoch": 0, "iter": 22287, "iter_tflops": 38.3410849697701, "iter_time": 0.5380936279296875, "loss": 0.4685501456260681, "lr": 1.7737621334579346e-07, "seqlen": 8192.0, "step_tflops": 41.988331081955266, "step_time": 0.49135302543640136} +{"epoch": 0, "iter": 22288, "iter_tflops": 37.27664447951845, "iter_time": 0.5534589767456055, "loss": 0.4348700940608978, "lr": 1.7022382893129074e-07, "seqlen": 8192.0, "step_tflops": 40.780341056284506, "step_time": 0.5059078216552735} +{"epoch": 0, "iter": 22289, "iter_tflops": 22.110057510098002, "iter_time": 0.9331089935302733, "loss": 0.5046842694282532, "lr": 1.632178084205732e-07, "seqlen": 8192.0, "step_tflops": 23.648087510293248, "step_time": 0.8724212265014649} +{"epoch": 0, "iter": 22290, "iter_tflops": 15.256175469220574, "iter_time": 1.3523109741210937, "loss": 0.4791506826877594, "lr": 1.56358220960236e-07, "seqlen": 8192.0, "step_tflops": 19.811387381411336, "step_time": 1.0413755035400392} +{"epoch": 0, "iter": 22291, "iter_tflops": 34.96166871356167, "iter_time": 0.590106086730957, "loss": 0.44178348779678345, "lr": 1.4964513425163694e-07, "seqlen": 8192.0, "step_tflops": 38.17793762447694, "step_time": 0.5403930854797363} +{"epoch": 0, "iter": 22292, "iter_tflops": 34.355347770205725, "iter_time": 0.6005205841064454, "loss": 0.49768197536468506, "lr": 1.430786145502322e-07, "seqlen": 8192.0, "step_tflops": 37.20152141113703, "step_time": 0.5545766067504883} +{"epoch": 0, "iter": 22293, "iter_tflops": 21.611827981715546, "iter_time": 0.954620475769043, "loss": 0.05097365379333496, "lr": 1.3665872666491996e-07, "seqlen": 8192.0, "step_tflops": 23.234240959931597, "step_time": 0.8879607276916504} +{"epoch": 0, "iter": 22294, "iter_tflops": 20.437346615994663, "iter_time": 1.0094800415039062, "loss": 0.0926612839102745, "lr": 1.3038553395740284e-07, "seqlen": 8192.0, "step_tflops": 25.157944357001, "step_time": 0.8200627689361571} +{"epoch": 0, "iter": 22295, "iter_tflops": 48.152700214093244, "iter_time": 0.4284514350891113, "loss": 0.06549856066703796, "lr": 1.2425909834155813e-07, "seqlen": 8192.0, "step_tflops": 52.378172337939525, "step_time": 0.3938872356414795} +{"epoch": 0, "iter": 22296, "iter_tflops": 51.225605612269206, "iter_time": 0.4027496261596679, "loss": 0.10944509506225586, "lr": 1.1827948028283353e-07, "seqlen": 8192.0, "step_tflops": 55.73202955162943, "step_time": 0.3701837825775146} +{"epoch": 0, "iter": 22297, "iter_tflops": 25.82362022759679, "iter_time": 0.7989233627319335, "loss": 0.325982004404068, "lr": 1.1244673879764411e-07, "seqlen": 8192.0, "step_tflops": 27.195481765650687, "step_time": 0.7586221008300782} +{"epoch": 0, "iter": 22298, "iter_tflops": 13.947337184898268, "iter_time": 1.4792137908935548, "loss": 0.2726595997810364, "lr": 1.0676093145279619e-07, "seqlen": 8192.0, "step_tflops": 18.57562816078434, "step_time": 1.1106538810729982} +{"epoch": 0, "iter": 22299, "iter_tflops": 45.51983532148589, "iter_time": 0.4532330436706543, "loss": 0.2714175581932068, "lr": 1.0122211436490946e-07, "seqlen": 8192.0, "step_tflops": 49.929058849433716, "step_time": 0.4132081394195557} +{"epoch": 0, "iter": 22300, "iter_tflops": 51.12454521508946, "iter_time": 0.40354576110839846, "loss": 0.3948642611503601, "lr": 9.583034219987408e-08, "seqlen": 8192.0, "step_tflops": 55.39871117691748, "step_time": 0.3724110736846924} +{"epoch": 0, "iter": 22301, "iter_tflops": 25.616387407859317, "iter_time": 0.8053865356445312, "loss": 0.5248860716819763, "lr": 9.058566817230606e-08, "seqlen": 8192.0, "step_tflops": 27.092427994141204, "step_time": 0.7615077362060546} +{"epoch": 0, "iter": 22302, "iter_tflops": 13.769954293877168, "iter_time": 1.498268844604492, "loss": 0.5771357417106628, "lr": 8.548814404501615e-08, "seqlen": 8192.0, "step_tflops": 16.413505011241547, "step_time": 1.2569584312438964} +{"epoch": 0, "iter": 22303, "iter_tflops": 34.346069569566446, "iter_time": 0.6006828079223633, "loss": 0.6247313022613525, "lr": 8.053782012851507e-08, "seqlen": 8192.0, "step_tflops": 37.69192212792362, "step_time": 0.547361141204834} +{"epoch": 0, "iter": 22304, "iter_tflops": 35.226045367123874, "iter_time": 0.5856772537231445, "loss": 0.4196898937225342, "lr": 7.573474528049739e-08, "seqlen": 8192.0, "step_tflops": 38.312191532510724, "step_time": 0.5384994354248047} +{"epoch": 0, "iter": 22305, "iter_tflops": 17.889691783605144, "iter_time": 1.1532391815185545, "loss": 0.6047741174697876, "lr": 7.107896690537518e-08, "seqlen": 8192.0, "step_tflops": 19.135400734696795, "step_time": 1.0781636505126952} +{"epoch": 0, "iter": 22306, "iter_tflops": 22.415252861514166, "iter_time": 0.9204042282104492, "loss": 0.6444460153579712, "lr": 6.657053095380006e-08, "seqlen": 8192.0, "step_tflops": 29.67604954673998, "step_time": 0.6952102394104005} +{"epoch": 0, "iter": 22307, "iter_tflops": 36.64820755125336, "iter_time": 0.5629495925903321, "loss": 0.5557860136032104, "lr": 6.22094819222152e-08, "seqlen": 8192.0, "step_tflops": 40.08650154287427, "step_time": 0.5146643562316894} +{"epoch": 0, "iter": 22308, "iter_tflops": 39.903084409484144, "iter_time": 0.517030044555664, "loss": 0.4454355239868164, "lr": 5.799586285241243e-08, "seqlen": 8192.0, "step_tflops": 43.47688627957017, "step_time": 0.4745301532745362} +{"epoch": 0, "iter": 22309, "iter_tflops": 26.061119110979362, "iter_time": 0.7916426544189454, "loss": 0.4922816753387451, "lr": 5.3929715331114125e-08, "seqlen": 8192.0, "step_tflops": 28.04275851338501, "step_time": 0.7357012863159179} +{"epoch": 0, "iter": 22310, "iter_tflops": 9.941094854410386, "iter_time": 2.0753341369628906, "loss": 0.4311877191066742, "lr": 5.001107948955197e-08, "seqlen": 8192.0, "step_tflops": 11.387154420508896, "step_time": 1.8117865753173827} +{"epoch": 0, "iter": 22311, "iter_tflops": 13.209325860883633, "iter_time": 1.5618581695556641, "loss": 0.3911023736000061, "lr": 4.623999400308054e-08, "seqlen": 8192.0, "step_tflops": 15.834203696327089, "step_time": 1.3029448089599611} +{"epoch": 0, "iter": 22312, "iter_tflops": 35.16033685632838, "iter_time": 0.5867717819213867, "loss": 0.40086448192596436, "lr": 4.261649609079099e-08, "seqlen": 8192.0, "step_tflops": 38.22150979010895, "step_time": 0.539777042388916} +{"epoch": 0, "iter": 22313, "iter_tflops": 20.3400643641449, "iter_time": 0.7791757431030274, "loss": 0.30053165555000305, "lr": 3.9140621515144634e-08, "seqlen": 6336.0, "step_tflops": 21.87563031888826, "step_time": 0.7244812850952148} +{"epoch": 0, "iter": 22314, "iter_tflops": 6.301160277910968, "iter_time": 2.5151692810058592, "loss": 0.4416821002960205, "lr": 3.5812404581621605e-08, "seqlen": 6336.0, "step_tflops": 7.000238180842086, "step_time": 2.263992218017578} +{"epoch": 0, "iter": 22315, "iter_tflops": 9.155933688577484, "iter_time": 1.7309523315429687, "loss": 0.1644938737154007, "lr": 3.263187813837776e-08, "seqlen": 6336.0, "step_tflops": 11.141794440525695, "step_time": 1.4224355735778809} +{"epoch": 0, "iter": 22316, "iter_tflops": 24.52031405672245, "iter_time": 0.6463410186767578, "loss": 0.30280908942222595, "lr": 2.9599073575926615e-08, "seqlen": 6336.0, "step_tflops": 26.336743235626656, "step_time": 0.6017632713317871} +{"epoch": 0, "iter": 22317, "iter_tflops": 12.800267740395174, "iter_time": 1.1518822021484376, "loss": 0.21145987510681152, "lr": 2.671402082682295e-08, "seqlen": 5904.0, "step_tflops": 13.671123620808702, "step_time": 1.0785068588256836} +{"epoch": 0, "iter": 22318, "iter_tflops": 13.192163485512513, "iter_time": 1.1176635742187502, "loss": 0.3140164315700531, "lr": 2.397674836537467e-08, "seqlen": 5904.0, "step_tflops": 16.125670467126277, "step_time": 0.9143434143066407} +{"epoch": 0, "iter": 22319, "iter_tflops": 21.411558132663874, "iter_time": 0.6886187591552734, "loss": 0.2514936923980713, "lr": 2.1387283207354723e-08, "seqlen": 5904.0, "step_tflops": 23.015407266867584, "step_time": 0.6406317481994628} +{"epoch": 0, "iter": 22320, "iter_tflops": 22.42752858123122, "iter_time": 0.6574242248535156, "loss": 0.39398548007011414, "lr": 1.894565090973799e-08, "seqlen": 5904.0, "step_tflops": 24.137566185462322, "step_time": 0.6108486862182617} +{"epoch": 0, "iter": 22321, "iter_tflops": 21.639204363238242, "iter_time": 0.9534127578735352, "loss": 0.003409767057746649, "lr": 1.6651875570451447e-08, "seqlen": 8192.0, "step_tflops": 23.374770507511048, "step_time": 0.8826222915649414} +{"epoch": 0, "iter": 22322, "iter_tflops": 23.98530178051229, "iter_time": 0.8601556777954102, "loss": 0.011936402879655361, "lr": 1.4505979828129401e-08, "seqlen": 8192.0, "step_tflops": 27.260272952431993, "step_time": 0.7568190364837646} +{"epoch": 0, "iter": 22323, "iter_tflops": 41.28339691979805, "iter_time": 0.4997431182861328, "loss": 0.008946510963141918, "lr": 1.2507984861898635e-08, "seqlen": 8192.0, "step_tflops": 45.71997916957668, "step_time": 0.4512489700317383} +{"epoch": 0, "iter": 22324, "iter_tflops": 38.867651233458005, "iter_time": 0.5308037109375, "loss": 0.0073067983612418175, "lr": 1.0657910391161929e-08, "seqlen": 8192.0, "step_tflops": 43.03508359900533, "step_time": 0.4794017295837402} +{"epoch": 0, "iter": 22325, "iter_tflops": 20.290809732744115, "iter_time": 1.0167703399658203, "loss": 0.18393367528915405, "lr": 8.955774675406536e-09, "seqlen": 8192.0, "step_tflops": 21.57704680179863, "step_time": 0.9561592788696288} +{"epoch": 0, "iter": 22326, "iter_tflops": 10.163688812552088, "iter_time": 2.0298824462890623, "loss": 0.24233022332191467, "lr": 7.401594514026e-09, "seqlen": 8192.0, "step_tflops": 11.822273050473349, "step_time": 1.7451037902832032} +{"epoch": 0, "iter": 22327, "iter_tflops": 12.881038748709459, "iter_time": 1.6016638031005863, "loss": 0.2083960771560669, "lr": 5.995385246151952e-09, "seqlen": 8192.0, "step_tflops": 16.993180444666553, "step_time": 1.2140807647705079} +{"epoch": 0, "iter": 22328, "iter_tflops": 24.716298302963704, "iter_time": 0.8347161560058594, "loss": 0.3010471761226654, "lr": 4.737160750500902e-09, "seqlen": 8192.0, "step_tflops": 35.12609216704944, "step_time": 0.5873438301086426} +{"epoch": 0, "iter": 22329, "iter_tflops": 25.215289305773346, "iter_time": 0.6285267868041993, "loss": 0.38699114322662354, "lr": 3.6269334452393442e-09, "seqlen": 6336.0, "step_tflops": 27.066258584556895, "step_time": 0.5855439796447754} +{"epoch": 0, "iter": 22330, "iter_tflops": 24.422859544229084, "iter_time": 0.6489201126098633, "loss": 0.18734952807426453, "lr": 2.664714287865522e-09, "seqlen": 6336.0, "step_tflops": 26.30045246434311, "step_time": 0.6025936164855956} +{"epoch": 0, "iter": 22331, "iter_tflops": 24.418862620142455, "iter_time": 0.6490263290405274, "loss": 0.26941928267478943, "lr": 1.8505127750911844e-09, "seqlen": 6336.0, "step_tflops": 26.30245487447943, "step_time": 0.6025477409362794} +{"epoch": 0, "iter": 22332, "iter_tflops": 24.813201487393947, "iter_time": 0.6387118072509766, "loss": 0.17381365597248077, "lr": 1.184336942758324e-09, "seqlen": 6336.0, "step_tflops": 26.628580322801223, "step_time": 0.5951682205200195} +{"epoch": 0, "iter": 1, "iter_tflops": 14.455527765373674, "iter_time": 1.4272113647460936, "loss": 0.14039325714111328, "lr": 6.000000000000001e-08, "seqlen": 8192.0, "step_tflops": 26.668488596399946, "step_time": 0.7736131515502929} +{"epoch": 0, "iter": 2, "iter_tflops": 53.319257045307246, "iter_time": 0.38693512725830076, "loss": 0.19863784313201904, "lr": 1.2000000000000002e-07, "seqlen": 8192.0, "step_tflops": 58.520812862691066, "step_time": 0.35254283905029293} +{"epoch": 0, "iter": 3, "iter_tflops": 61.15010972480007, "iter_time": 0.33738440704345707, "loss": 0.25300225615501404, "lr": 1.8e-07, "seqlen": 8192.0, "step_tflops": 66.95728330975871, "step_time": 0.30812321662902836} +{"epoch": 0, "iter": 4, "iter_tflops": 65.55408443824746, "iter_time": 0.3147186584472656, "loss": 0.0966501235961914, "lr": 2.4000000000000003e-07, "seqlen": 8192.0, "step_tflops": 71.85517059982249, "step_time": 0.2871205139160156} +{"epoch": 0, "iter": 5, "iter_tflops": 32.57566731731372, "iter_time": 0.6333283462524415, "loss": 2.303663730621338, "lr": 3.0000000000000004e-07, "seqlen": 8192.0, "step_tflops": 34.56218734214356, "step_time": 0.5969267311096191} +{"epoch": 0, "iter": 6, "iter_tflops": 20.27116845327411, "iter_time": 1.0177555160522462, "loss": 2.158521890640259, "lr": 3.6e-07, "seqlen": 8192.0, "step_tflops": 22.913224160994673, "step_time": 0.9004011554718018} +{"epoch": 0, "iter": 7, "iter_tflops": 44.24384339749733, "iter_time": 0.46630427932739255, "loss": 2.0365874767303467, "lr": 4.2e-07, "seqlen": 8192.0, "step_tflops": 47.500952529002305, "step_time": 0.4343301010131836} +{"epoch": 0, "iter": 8, "iter_tflops": 53.17099452104654, "iter_time": 0.38801406097412106, "loss": 1.892120599746704, "lr": 4.800000000000001e-07, "seqlen": 8192.0, "step_tflops": 57.34053533772819, "step_time": 0.35979945755004883} +{"epoch": 0, "iter": 9, "iter_tflops": 44.953693573069565, "iter_time": 0.45894100952148437, "loss": 1.6478855609893799, "lr": 5.4e-07, "seqlen": 8192.0, "step_tflops": 49.2949360389317, "step_time": 0.41852358818054197} +{"epoch": 0, "iter": 10, "iter_tflops": 50.639484732311274, "iter_time": 0.407411205291748, "loss": 1.5576761960983276, "lr": 6.000000000000001e-07, "seqlen": 8192.0, "step_tflops": 55.060706411161405, "step_time": 0.3746972179412842} +{"epoch": 0, "iter": 11, "iter_tflops": 53.11496080328434, "iter_time": 0.388423397064209, "loss": 1.7640050649642944, "lr": 6.599999999999999e-07, "seqlen": 8192.0, "step_tflops": 57.57316152418502, "step_time": 0.35834567642211923} +{"epoch": 0, "iter": 12, "iter_tflops": 55.032331246977975, "iter_time": 0.3748904151916504, "loss": 1.8201686143875122, "lr": 7.2e-07, "seqlen": 8192.0, "step_tflops": 59.7045542835329, "step_time": 0.3455530948638916} +{"epoch": 0, "iter": 13, "iter_tflops": 42.02368026831408, "iter_time": 0.4909397125244141, "loss": 2.8867990970611572, "lr": 7.799999999999999e-07, "seqlen": 8192.0, "step_tflops": 45.26884484967236, "step_time": 0.4557459678649902} +{"epoch": 0, "iter": 14, "iter_tflops": 44.86885649447329, "iter_time": 0.459808765411377, "loss": 2.6016571521759033, "lr": 8.4e-07, "seqlen": 8192.0, "step_tflops": 48.86139728676866, "step_time": 0.4222370758056641} +{"epoch": 0, "iter": 15, "iter_tflops": 50.78678296623732, "iter_time": 0.4062295799255371, "loss": 2.6777000427246094, "lr": 9e-07, "seqlen": 8192.0, "step_tflops": 54.68462950441491, "step_time": 0.3772740840911865} +{"epoch": 0, "iter": 16, "iter_tflops": 49.35460933431429, "iter_time": 0.418017562866211, "loss": 1.9911431074142456, "lr": 9.600000000000001e-07, "seqlen": 8192.0, "step_tflops": 53.108661433579286, "step_time": 0.38846946907043456} +{"epoch": 0, "iter": 17, "iter_tflops": 57.02267919847003, "iter_time": 0.3618050537109375, "loss": 0.177252858877182, "lr": 1.0200000000000002e-06, "seqlen": 8192.0, "step_tflops": 63.12358126825902, "step_time": 0.32683654975891113} +{"epoch": 0, "iter": 18, "iter_tflops": 50.94346006329458, "iter_time": 0.4049802169799805, "loss": 0.18340305984020233, "lr": 1.08e-06, "seqlen": 8192.0, "step_tflops": 58.06171332898888, "step_time": 0.35533042907714846} +{"epoch": 0, "iter": 19, "iter_tflops": 60.739108640336354, "iter_time": 0.3396673736572266, "loss": 0.10045892745256424, "lr": 1.14e-06, "seqlen": 8192.0, "step_tflops": 66.60353302785589, "step_time": 0.30975974655151367} +{"epoch": 0, "iter": 20, "iter_tflops": 56.76700121639035, "iter_time": 0.36343461990356446, "loss": 0.1525794267654419, "lr": 1.2000000000000002e-06, "seqlen": 8192.0, "step_tflops": 61.958355191597434, "step_time": 0.33298323440551764} +{"epoch": 0, "iter": 21, "iter_tflops": 42.923933230812885, "iter_time": 0.48064312744140625, "loss": 2.656681776046753, "lr": 1.26e-06, "seqlen": 8192.0, "step_tflops": 46.46565114582349, "step_time": 0.4440074119567871} +{"epoch": 0, "iter": 22, "iter_tflops": 10.826635447328707, "iter_time": 1.9055867919921872, "loss": 2.876267910003662, "lr": 1.3199999999999999e-06, "seqlen": 8192.0, "step_tflops": 14.967976684059378, "step_time": 1.3783488540649416} +{"epoch": 0, "iter": 23, "iter_tflops": 12.622853976133223, "iter_time": 1.6344238433837892, "loss": 2.5151779651641846, "lr": 1.38e-06, "seqlen": 8192.0, "step_tflops": 14.718609644597654, "step_time": 1.4017012481689453} +{"epoch": 0, "iter": 24, "iter_tflops": 15.907499837561227, "iter_time": 1.2969412994384764, "loss": 2.754101514816284, "lr": 1.44e-06, "seqlen": 8192.0, "step_tflops": 19.29622490452342, "step_time": 1.0691777076721192} +{"epoch": 0, "iter": 25, "iter_tflops": 21.03169377325769, "iter_time": 0.7068810958862305, "loss": 2.1338322162628174, "lr": 1.5e-06, "seqlen": 5952.0, "step_tflops": 22.959353889197402, "step_time": 0.6475315818786621} +{"epoch": 0, "iter": 26, "iter_tflops": 24.19747415504968, "iter_time": 0.6143991165161132, "loss": 1.9283133745193481, "lr": 1.5599999999999999e-06, "seqlen": 5952.0, "step_tflops": 26.018510977476748, "step_time": 0.5713972930908203} +{"epoch": 0, "iter": 27, "iter_tflops": 23.840205249988966, "iter_time": 0.6236064910888671, "loss": 2.2188634872436523, "lr": 1.62e-06, "seqlen": 5952.0, "step_tflops": 25.47090355529125, "step_time": 0.5836819534301758} +{"epoch": 0, "iter": 28, "iter_tflops": 21.095620441563923, "iter_time": 0.7047390136718752, "loss": 2.048845052719116, "lr": 1.68e-06, "seqlen": 5952.0, "step_tflops": 22.581420165474174, "step_time": 0.6583689880371094} +{"epoch": 0, "iter": 29, "iter_tflops": 33.82390064904302, "iter_time": 0.6099560699462889, "loss": 0.8687458634376526, "lr": 1.74e-06, "seqlen": 8192.0, "step_tflops": 38.38617330209727, "step_time": 0.5374615840911865} +{"epoch": 0, "iter": 30, "iter_tflops": 40.03828537875033, "iter_time": 0.5152841415405273, "loss": 0.8299131989479065, "lr": 1.8e-06, "seqlen": 8192.0, "step_tflops": 44.425860090904706, "step_time": 0.4643937892913818} +{"epoch": 0, "iter": 31, "iter_tflops": 47.66920543282461, "iter_time": 0.43279709243774417, "loss": 0.5046553015708923, "lr": 1.86e-06, "seqlen": 8192.0, "step_tflops": 52.38739154934692, "step_time": 0.39381791877746586} +{"epoch": 0, "iter": 32, "iter_tflops": 44.12372893403793, "iter_time": 0.4675736618041992, "loss": 0.7341625690460205, "lr": 1.9200000000000003e-06, "seqlen": 8192.0, "step_tflops": 48.37098101529561, "step_time": 0.42651798820495607} +{"epoch": 0, "iter": 33, "iter_tflops": 29.801459850389755, "iter_time": 0.6922846603393554, "loss": 2.3219828605651855, "lr": 1.98e-06, "seqlen": 8192.0, "step_tflops": 31.87794200281559, "step_time": 0.6471902580261231} +{"epoch": 0, "iter": 34, "iter_tflops": 13.986085760694488, "iter_time": 1.4751156158447265, "loss": 2.362283706665039, "lr": 2.0400000000000004e-06, "seqlen": 8192.0, "step_tflops": 17.748149468962723, "step_time": 1.1624363174438477} +{"epoch": 0, "iter": 35, "iter_tflops": 44.284794169004286, "iter_time": 0.4658730812072754, "loss": 1.9251540899276733, "lr": 2.1000000000000002e-06, "seqlen": 8192.0, "step_tflops": 47.68512153579395, "step_time": 0.4326526355743408} +{"epoch": 0, "iter": 36, "iter_tflops": 45.009226409231594, "iter_time": 0.4583747634887695, "loss": 2.616335868835449, "lr": 2.16e-06, "seqlen": 8192.0, "step_tflops": 48.53456560631571, "step_time": 0.4250804195404053} +{"epoch": 0, "iter": 37, "iter_tflops": 30.930252696562807, "iter_time": 0.6670198822021485, "loss": 2.211092472076416, "lr": 2.22e-06, "seqlen": 8192.0, "step_tflops": 32.74944450242389, "step_time": 0.6299677391052246} +{"epoch": 0, "iter": 38, "iter_tflops": 13.26040353964128, "iter_time": 1.5558420562744142, "loss": 2.3830511569976807, "lr": 2.28e-06, "seqlen": 8192.0, "step_tflops": 15.725112555739617, "step_time": 1.3119838371276855} +{"epoch": 0, "iter": 39, "iter_tflops": 46.486856897261326, "iter_time": 0.4438048706054688, "loss": 2.2319018840789795, "lr": 2.34e-06, "seqlen": 8192.0, "step_tflops": 50.34754198182307, "step_time": 0.4097735996246338} +{"epoch": 0, "iter": 40, "iter_tflops": 45.80595017002463, "iter_time": 0.450402042388916, "loss": 1.8445093631744385, "lr": 2.4000000000000003e-06, "seqlen": 8192.0, "step_tflops": 49.20498022275293, "step_time": 0.41928872680664064} +{"epoch": 0, "iter": 41, "iter_tflops": 21.912309624170806, "iter_time": 0.9415298461914062, "loss": 2.76606822013855, "lr": 2.46e-06, "seqlen": 8192.0, "step_tflops": 22.831994712217657, "step_time": 0.9036045150756836} +{"epoch": 0, "iter": 42, "iter_tflops": 16.72606828158675, "iter_time": 1.2334694061279297, "loss": 2.7377257347106934, "lr": 2.52e-06, "seqlen": 8192.0, "step_tflops": 20.221316186645506, "step_time": 1.0202646217346192} +{"epoch": 0, "iter": 43, "iter_tflops": 41.477686054607524, "iter_time": 0.49740222930908207, "loss": 2.5618526935577393, "lr": 2.58e-06, "seqlen": 8192.0, "step_tflops": 44.42823390466396, "step_time": 0.46436897659301757} +{"epoch": 0, "iter": 44, "iter_tflops": 43.94658785393599, "iter_time": 0.46945837020874026, "loss": 2.2452611923217773, "lr": 2.6399999999999997e-06, "seqlen": 8192.0, "step_tflops": 47.10488940938975, "step_time": 0.43798199653625486} +{"epoch": 0, "iter": 45, "iter_tflops": 26.384749882727895, "iter_time": 0.781932502746582, "loss": 1.657957911491394, "lr": 2.7e-06, "seqlen": 8192.0, "step_tflops": 27.847303643134673, "step_time": 0.7408650321960449} +{"epoch": 0, "iter": 46, "iter_tflops": 14.029178754034856, "iter_time": 1.4705845489501952, "loss": 1.4809768199920654, "lr": 2.76e-06, "seqlen": 8192.0, "step_tflops": 15.863971753260476, "step_time": 1.3004998893737794} +{"epoch": 0, "iter": 47, "iter_tflops": 40.345472578021614, "iter_time": 0.5113608093261719, "loss": 1.4918513298034668, "lr": 2.82e-06, "seqlen": 8192.0, "step_tflops": 44.29218461187585, "step_time": 0.4657953472137451} +{"epoch": 0, "iter": 48, "iter_tflops": 40.415400630565735, "iter_time": 0.5104760360717774, "loss": 1.3990833759307861, "lr": 2.88e-06, "seqlen": 8192.0, "step_tflops": 43.6246326868885, "step_time": 0.4729230308532715} +{"epoch": 0, "iter": 49, "iter_tflops": 16.467198051947328, "iter_time": 1.2528599853515625, "loss": 0.7109791040420532, "lr": 2.9400000000000002e-06, "seqlen": 8192.0, "step_tflops": 17.387921080636485, "step_time": 1.1865186996459962} +{"epoch": 0, "iter": 50, "iter_tflops": 16.434310389110244, "iter_time": 1.2553671569824219, "loss": 0.6258338689804077, "lr": 3e-06, "seqlen": 8192.0, "step_tflops": 21.922864186769374, "step_time": 0.9410765552520752} +{"epoch": 0, "iter": 51, "iter_tflops": 53.512889053108495, "iter_time": 0.3855350341796875, "loss": 0.6360726356506348, "lr": 3.06e-06, "seqlen": 8192.0, "step_tflops": 57.98878980056765, "step_time": 0.35577727317810054} +{"epoch": 0, "iter": 52, "iter_tflops": 57.3036194184254, "iter_time": 0.36003124618530274, "loss": 0.7896813154220581, "lr": 3.1199999999999998e-06, "seqlen": 8192.0, "step_tflops": 61.80350084542515, "step_time": 0.33381755447387695} +{"epoch": 0, "iter": 53, "iter_tflops": 40.63835088071086, "iter_time": 0.5076754608154297, "loss": 1.8694210052490234, "lr": 3.18e-06, "seqlen": 8192.0, "step_tflops": 44.01318395111431, "step_time": 0.4687480354309082} +{"epoch": 0, "iter": 54, "iter_tflops": 17.644485425920422, "iter_time": 1.1692658081054688, "loss": 2.023561477661133, "lr": 3.24e-06, "seqlen": 8192.0, "step_tflops": 20.905516938349283, "step_time": 0.9868731575012207} +{"epoch": 0, "iter": 55, "iter_tflops": 47.172875802390664, "iter_time": 0.43735076904296877, "loss": 2.2315995693206787, "lr": 3.3e-06, "seqlen": 8192.0, "step_tflops": 51.11416386076905, "step_time": 0.40362772178649897} +{"epoch": 0, "iter": 56, "iter_tflops": 48.10752540536877, "iter_time": 0.4288537673950195, "loss": 1.635510802268982, "lr": 3.36e-06, "seqlen": 8192.0, "step_tflops": 52.03797168393309, "step_time": 0.3964622917175293} +{"epoch": 0, "iter": 57, "iter_tflops": 41.68343323518744, "iter_time": 0.49494707870483395, "loss": 1.9895552396774292, "lr": 3.4200000000000003e-06, "seqlen": 8192.0, "step_tflops": 45.34418832351196, "step_time": 0.45498870468139646} +{"epoch": 0, "iter": 58, "iter_tflops": 38.20872425181733, "iter_time": 0.5399576644897461, "loss": 1.9709049463272095, "lr": 3.48e-06, "seqlen": 8192.0, "step_tflops": 41.87010778232254, "step_time": 0.49274039649963375} +{"epoch": 0, "iter": 59, "iter_tflops": 36.6318032274734, "iter_time": 0.5632016906738281, "loss": 2.1886942386627197, "lr": 3.54e-06, "seqlen": 8192.0, "step_tflops": 39.83870174588898, "step_time": 0.517865608215332} +{"epoch": 0, "iter": 60, "iter_tflops": 40.03816622340417, "iter_time": 0.5152856750488282, "loss": 2.140314817428589, "lr": 3.6e-06, "seqlen": 8192.0, "step_tflops": 43.586292943685216, "step_time": 0.4733390274047851} +{"epoch": 0, "iter": 61, "iter_tflops": 32.51267870237197, "iter_time": 0.6345553283691405, "loss": 1.6275314092636108, "lr": 3.66e-06, "seqlen": 8192.0, "step_tflops": 35.183555866065696, "step_time": 0.5863845481872558} +{"epoch": 0, "iter": 62, "iter_tflops": 39.015292874563436, "iter_time": 0.5287950439453125, "loss": 1.4193670749664307, "lr": 3.72e-06, "seqlen": 8192.0, "step_tflops": 42.51760116645155, "step_time": 0.48523653602600103} +{"epoch": 0, "iter": 63, "iter_tflops": 43.020725474937805, "iter_time": 0.4795617294311524, "loss": 1.2222882509231567, "lr": 3.7800000000000002e-06, "seqlen": 8192.0, "step_tflops": 46.345392463937515, "step_time": 0.445159538269043} +{"epoch": 0, "iter": 64, "iter_tflops": 41.92478718371204, "iter_time": 0.49209775161743163, "loss": 1.4542878866195679, "lr": 3.8400000000000005e-06, "seqlen": 8192.0, "step_tflops": 45.48465058291758, "step_time": 0.4535836429595947} +{"epoch": 0, "iter": 65, "iter_tflops": 34.57429292575015, "iter_time": 0.5967177276611328, "loss": 1.2989850044250488, "lr": 3.9e-06, "seqlen": 8192.0, "step_tflops": 38.186269089993445, "step_time": 0.5402751827239991} +{"epoch": 0, "iter": 66, "iter_tflops": 40.842851353992785, "iter_time": 0.5051335258483887, "loss": 1.202908992767334, "lr": 3.96e-06, "seqlen": 8192.0, "step_tflops": 44.56691477373308, "step_time": 0.462923978805542} +{"epoch": 0, "iter": 67, "iter_tflops": 45.97404270793349, "iter_time": 0.4487552604675293, "loss": 1.8937926292419434, "lr": 4.0200000000000005e-06, "seqlen": 8192.0, "step_tflops": 50.02604755718142, "step_time": 0.41240702629089354} +{"epoch": 0, "iter": 68, "iter_tflops": 42.15828078270479, "iter_time": 0.48937226867675787, "loss": 1.7249547243118286, "lr": 4.080000000000001e-06, "seqlen": 8192.0, "step_tflops": 45.21742907802175, "step_time": 0.4562641868591308} +{"epoch": 0, "iter": 69, "iter_tflops": 30.037588531530766, "iter_time": 0.6868425369262695, "loss": 1.430472731590271, "lr": 4.14e-06, "seqlen": 8192.0, "step_tflops": 32.17354305129725, "step_time": 0.6412440643310546} +{"epoch": 0, "iter": 70, "iter_tflops": 13.264282513351993, "iter_time": 1.5553870697021486, "loss": 1.1409351825714111, "lr": 4.2000000000000004e-06, "seqlen": 8192.0, "step_tflops": 15.358974345634525, "step_time": 1.3432598457336427} +{"epoch": 0, "iter": 71, "iter_tflops": 43.18597230351942, "iter_time": 0.47772673416137695, "loss": 0.9895938038825989, "lr": 4.26e-06, "seqlen": 8192.0, "step_tflops": 47.35699449101996, "step_time": 0.43565039825439444} +{"epoch": 0, "iter": 72, "iter_tflops": 46.72165108913147, "iter_time": 0.44157458114624026, "loss": 0.9529436826705933, "lr": 4.32e-06, "seqlen": 8192.0, "step_tflops": 50.57447286603518, "step_time": 0.4079349193572998} +{"epoch": 0, "iter": 73, "iter_tflops": 18.97214294127485, "iter_time": 1.087441390991211, "loss": 0.5327914953231812, "lr": 4.3799999999999996e-06, "seqlen": 8192.0, "step_tflops": 20.54636936347136, "step_time": 1.0041235580444337} +{"epoch": 0, "iter": 74, "iter_tflops": 21.360289789685496, "iter_time": 0.9658620605468751, "loss": 0.5980557799339294, "lr": 4.44e-06, "seqlen": 8192.0, "step_tflops": 26.613005718917368, "step_time": 0.7752259826660156} +{"epoch": 0, "iter": 75, "iter_tflops": 58.34345422240438, "iter_time": 0.35361453628540035, "loss": 0.7556465864181519, "lr": 4.5e-06, "seqlen": 8192.0, "step_tflops": 63.52511786017099, "step_time": 0.3247706451416016} +{"epoch": 0, "iter": 76, "iter_tflops": 53.021012834686815, "iter_time": 0.38911164474487303, "loss": 0.736591637134552, "lr": 4.56e-06, "seqlen": 8192.0, "step_tflops": 57.187451311088836, "step_time": 0.3607625980377197} +{"epoch": 0, "iter": 77, "iter_tflops": 50.09494024596017, "iter_time": 0.41183986663818356, "loss": 0.36448052525520325, "lr": 4.62e-06, "seqlen": 8192.0, "step_tflops": 55.13832501967134, "step_time": 0.37416975402832037} +{"epoch": 0, "iter": 78, "iter_tflops": 49.470996706010304, "iter_time": 0.41703411865234374, "loss": 0.271384596824646, "lr": 4.68e-06, "seqlen": 8192.0, "step_tflops": 54.57471100219471, "step_time": 0.37803394889831543} +{"epoch": 0, "iter": 79, "iter_tflops": 54.610722084281605, "iter_time": 0.37778466796875, "loss": 0.3685609698295593, "lr": 4.74e-06, "seqlen": 8192.0, "step_tflops": 59.327154662966485, "step_time": 0.3477512722015381} +{"epoch": 0, "iter": 80, "iter_tflops": 58.03542448182431, "iter_time": 0.3554913864135742, "loss": 0.4336148500442505, "lr": 4.800000000000001e-06, "seqlen": 8192.0, "step_tflops": 62.97548624077076, "step_time": 0.3276051483154297} +{"epoch": 0, "iter": 81, "iter_tflops": 38.28260506619303, "iter_time": 0.5389156112670899, "loss": 0.43602344393730164, "lr": 4.86e-06, "seqlen": 8192.0, "step_tflops": 41.164121678441866, "step_time": 0.5011911506652832} +{"epoch": 0, "iter": 82, "iter_tflops": 12.853992944435191, "iter_time": 1.6050338287353514, "loss": 0.31771111488342285, "lr": 4.92e-06, "seqlen": 8192.0, "step_tflops": 16.2303882239886, "step_time": 1.271139865875244} +{"epoch": 0, "iter": 83, "iter_tflops": 46.550646938755804, "iter_time": 0.44319670867919914, "loss": 0.3566915690898895, "lr": 4.980000000000001e-06, "seqlen": 8192.0, "step_tflops": 50.53848810914663, "step_time": 0.40822537994384767} +{"epoch": 0, "iter": 84, "iter_tflops": 54.992358586273674, "iter_time": 0.375162914276123, "loss": 0.2852489948272705, "lr": 5.04e-06, "seqlen": 8192.0, "step_tflops": 59.47183856035915, "step_time": 0.34690525817871093} +{"epoch": 0, "iter": 85, "iter_tflops": 45.80845417002413, "iter_time": 0.45037742233276373, "loss": 0.5484495759010315, "lr": 5.1e-06, "seqlen": 8192.0, "step_tflops": 50.04056329750246, "step_time": 0.412287395477295} +{"epoch": 0, "iter": 86, "iter_tflops": 47.47192702043663, "iter_time": 0.43459566116333004, "loss": 0.5187171697616577, "lr": 5.16e-06, "seqlen": 8192.0, "step_tflops": 51.86950443714733, "step_time": 0.39774996376037597} +{"epoch": 0, "iter": 87, "iter_tflops": 46.897741063900504, "iter_time": 0.43991657257080075, "loss": 0.46911975741386414, "lr": 5.22e-06, "seqlen": 8192.0, "step_tflops": 50.6997001306832, "step_time": 0.4069273281097412} +{"epoch": 0, "iter": 88, "iter_tflops": 55.60214657258985, "iter_time": 0.37104850769042974, "loss": 0.6201434135437012, "lr": 5.279999999999999e-06, "seqlen": 8192.0, "step_tflops": 60.05765426473401, "step_time": 0.34352146720886223} +{"epoch": 0, "iter": 89, "iter_tflops": 26.815230408558968, "iter_time": 0.7693796844482422, "loss": 0.017181022092700005, "lr": 5.34e-06, "seqlen": 8192.0, "step_tflops": 28.303726468926868, "step_time": 0.7289179229736329} +{"epoch": 0, "iter": 90, "iter_tflops": 16.223076873467615, "iter_time": 1.2717127380371094, "loss": 0.026854131370782852, "lr": 5.4e-06, "seqlen": 8192.0, "step_tflops": 21.636086310569535, "step_time": 0.9535501575469971} +{"epoch": 0, "iter": 91, "iter_tflops": 45.3787933339376, "iter_time": 0.4546417388916016, "loss": 0.007477348204702139, "lr": 5.46e-06, "seqlen": 8192.0, "step_tflops": 50.21328416536038, "step_time": 0.4108692321777344} +{"epoch": 0, "iter": 92, "iter_tflops": 50.80522139488607, "iter_time": 0.4060821495056153, "loss": 0.03425372764468193, "lr": 5.52e-06, "seqlen": 8192.0, "step_tflops": 56.14201865128336, "step_time": 0.36748043632507327} +{"epoch": 0, "iter": 93, "iter_tflops": 17.171859978329273, "iter_time": 1.001789695739746, "loss": 0.36476558446884155, "lr": 5.58e-06, "seqlen": 6864.0, "step_tflops": 18.20236820543338, "step_time": 0.9450744094848633} +{"epoch": 0, "iter": 94, "iter_tflops": 13.76645154386515, "iter_time": 1.2496025085449218, "loss": 0.3335002362728119, "lr": 5.64e-06, "seqlen": 6864.0, "step_tflops": 16.35360405896949, "step_time": 1.0519144477844238} +{"epoch": 0, "iter": 95, "iter_tflops": 41.660636775383615, "iter_time": 0.4129219741821289, "loss": 0.2736526131629944, "lr": 5.7000000000000005e-06, "seqlen": 6864.0, "step_tflops": 45.28112287469614, "step_time": 0.37990648841857905} +{"epoch": 0, "iter": 96, "iter_tflops": 45.48515550461451, "iter_time": 0.37820234298706057, "loss": 0.28431475162506104, "lr": 5.76e-06, "seqlen": 6864.0, "step_tflops": 49.392161749140634, "step_time": 0.3482858772277832} +{"epoch": 0, "iter": 97, "iter_tflops": 47.3752068952038, "iter_time": 0.4354829216003418, "loss": 0.040049921721220016, "lr": 5.82e-06, "seqlen": 8192.0, "step_tflops": 51.70043785515053, "step_time": 0.39905065345764157} +{"epoch": 0, "iter": 98, "iter_tflops": 16.123567397550854, "iter_time": 1.279561340332031, "loss": 0.1076601892709732, "lr": 5.8800000000000005e-06, "seqlen": 8192.0, "step_tflops": 19.410867256365496, "step_time": 1.0628630466461182} +{"epoch": 0, "iter": 99, "iter_tflops": 44.95029732297617, "iter_time": 0.4589756851196289, "loss": 0.041644614189863205, "lr": 5.940000000000001e-06, "seqlen": 8192.0, "step_tflops": 49.921890645234605, "step_time": 0.41326747131347663} +{"epoch": 0, "iter": 100, "iter_tflops": 51.89472296587949, "iter_time": 0.3975566749572754, "loss": 0.04860464110970497, "lr": 6e-06, "seqlen": 8192.0, "step_tflops": 57.44687042864255, "step_time": 0.35913346290588377} +{"epoch": 0, "iter": 101, "iter_tflops": 30.691312439015043, "iter_time": 0.6722128143310547, "loss": 0.339015930891037, "lr": 6.0600000000000004e-06, "seqlen": 8192.0, "step_tflops": 32.97727191456055, "step_time": 0.6256155319213867} +{"epoch": 0, "iter": 102, "iter_tflops": 22.60329100788024, "iter_time": 0.9127473297119141, "loss": 0.36341220140457153, "lr": 6.12e-06, "seqlen": 8192.0, "step_tflops": 27.718535756912534, "step_time": 0.7443067588806153} +{"epoch": 0, "iter": 103, "iter_tflops": 53.44366579742659, "iter_time": 0.3860344009399414, "loss": 0.34665584564208984, "lr": 6.18e-06, "seqlen": 8192.0, "step_tflops": 58.006913991822586, "step_time": 0.35566611099243167} +{"epoch": 0, "iter": 104, "iter_tflops": 61.73994011535229, "iter_time": 0.3341612167358398, "loss": 0.3674561381340027, "lr": 6.2399999999999995e-06, "seqlen": 8192.0, "step_tflops": 67.0132076746262, "step_time": 0.3078660793304443} +{"epoch": 0, "iter": 105, "iter_tflops": 25.484690788091193, "iter_time": 0.5881748962402344, "loss": 0.5728678703308105, "lr": 6.3e-06, "seqlen": 6000.0, "step_tflops": 27.271884663886496, "step_time": 0.5496303443908691} +{"epoch": 0, "iter": 106, "iter_tflops": 9.843685952071183, "iter_time": 1.5227482299804684, "loss": 0.549075722694397, "lr": 6.36e-06, "seqlen": 6000.0, "step_tflops": 14.220412110843354, "step_time": 1.0540802364349364} +{"epoch": 0, "iter": 107, "iter_tflops": 31.236293055630874, "iter_time": 0.47987305450439455, "loss": 0.7594627737998962, "lr": 6.42e-06, "seqlen": 6000.0, "step_tflops": 34.152361055593815, "step_time": 0.43889953422546385} +{"epoch": 0, "iter": 108, "iter_tflops": 29.201746621342544, "iter_time": 0.5133068084716796, "loss": 0.7779650688171387, "lr": 6.48e-06, "seqlen": 6000.0, "step_tflops": 32.18806600895085, "step_time": 0.46568362808227537} +{"epoch": 0, "iter": 109, "iter_tflops": 24.66867573610848, "iter_time": 0.7373977279663086, "loss": 0.6491127014160156, "lr": 6.54e-06, "seqlen": 7248.0, "step_tflops": 26.323282230610847, "step_time": 0.6910470085144043} +{"epoch": 0, "iter": 110, "iter_tflops": 10.17527815424047, "iter_time": 1.7877275848388672, "loss": 0.6282387375831604, "lr": 6.6e-06, "seqlen": 7248.0, "step_tflops": 12.840761603467076, "step_time": 1.4166313495635985} +{"epoch": 0, "iter": 111, "iter_tflops": 43.70949371692278, "iter_time": 0.4161710395812988, "loss": 0.47668221592903137, "lr": 6.660000000000001e-06, "seqlen": 7248.0, "step_tflops": 47.474903450323346, "step_time": 0.38316297912597663} +{"epoch": 0, "iter": 112, "iter_tflops": 46.96321954951334, "iter_time": 0.3873376998901367, "loss": 0.6224085092544556, "lr": 6.72e-06, "seqlen": 7248.0, "step_tflops": 50.573205392259716, "step_time": 0.35968899536132815} +{"epoch": 0, "iter": 113, "iter_tflops": 45.92805511385584, "iter_time": 0.4492045974731445, "loss": 0.8361319899559021, "lr": 6.78e-06, "seqlen": 8192.0, "step_tflops": 49.87568976032907, "step_time": 0.4136502895355224} +{"epoch": 0, "iter": 114, "iter_tflops": 45.90378334742753, "iter_time": 0.4494421157836914, "loss": 1.0933212041854858, "lr": 6.840000000000001e-06, "seqlen": 8192.0, "step_tflops": 51.04184059518383, "step_time": 0.4041996383666992} +{"epoch": 0, "iter": 115, "iter_tflops": 48.60084488907445, "iter_time": 0.424500717163086, "loss": 1.010616660118103, "lr": 6.900000000000001e-06, "seqlen": 8192.0, "step_tflops": 52.59930912628042, "step_time": 0.3922312641143799} +{"epoch": 0, "iter": 116, "iter_tflops": 46.74986712876237, "iter_time": 0.44130806732177735, "loss": 0.9893502593040466, "lr": 6.96e-06, "seqlen": 8192.0, "step_tflops": 50.55557770099831, "step_time": 0.40808738517761234} +{"epoch": 0, "iter": 117, "iter_tflops": 42.61298570071694, "iter_time": 0.48415038681030276, "loss": 1.0092202425003052, "lr": 7.0200000000000006e-06, "seqlen": 8192.0, "step_tflops": 46.18243832007501, "step_time": 0.44673027801513665} +{"epoch": 0, "iter": 118, "iter_tflops": 9.119675703672865, "iter_time": 2.2622617492675783, "loss": 0.8984869718551636, "lr": 7.08e-06, "seqlen": 8192.0, "step_tflops": 11.285823688182292, "step_time": 1.8280538558959958} +{"epoch": 0, "iter": 119, "iter_tflops": 12.86954920939058, "iter_time": 1.603093719482422, "loss": 1.1289927959442139, "lr": 7.14e-06, "seqlen": 8192.0, "step_tflops": 16.135972682002652, "step_time": 1.2785776176452637} +{"epoch": 0, "iter": 120, "iter_tflops": 16.914931821697525, "iter_time": 1.2196971130371095, "loss": 1.0090773105621338, "lr": 7.2e-06, "seqlen": 8192.0, "step_tflops": 22.63990009643262, "step_time": 0.9112714023590087} +{"epoch": 0, "iter": 121, "iter_tflops": 14.57623988427403, "iter_time": 0.9919374389648438, "loss": 1.888622760772705, "lr": 7.26e-06, "seqlen": 5792.0, "step_tflops": 15.55799334395845, "step_time": 0.9293433761596681} +{"epoch": 0, "iter": 122, "iter_tflops": 9.18988379212326, "iter_time": 1.573329803466797, "loss": 1.7583286762237549, "lr": 7.32e-06, "seqlen": 5792.0, "step_tflops": 10.774012886790722, "step_time": 1.3419993286132814} +{"epoch": 0, "iter": 123, "iter_tflops": 26.68571945274745, "iter_time": 0.541814811706543, "loss": 1.7467515468597412, "lr": 7.3800000000000005e-06, "seqlen": 5792.0, "step_tflops": 28.498718988057977, "step_time": 0.5073462448120117} +{"epoch": 0, "iter": 124, "iter_tflops": 27.95985049201487, "iter_time": 0.5171242980957031, "loss": 2.003465175628662, "lr": 7.44e-06, "seqlen": 5792.0, "step_tflops": 29.647953303131963, "step_time": 0.48768014144897465} +{"epoch": 0, "iter": 125, "iter_tflops": 40.851772754676574, "iter_time": 0.5050232124328614, "loss": 0.7038463950157166, "lr": 7.5e-06, "seqlen": 8192.0, "step_tflops": 45.62689302107003, "step_time": 0.45216958999633794} +{"epoch": 0, "iter": 126, "iter_tflops": 10.47713111543622, "iter_time": 1.9691548461914063, "loss": 0.6864296197891235, "lr": 7.5600000000000005e-06, "seqlen": 8192.0, "step_tflops": 12.364415784141654, "step_time": 1.668586196899414} +{"epoch": 0, "iter": 127, "iter_tflops": 11.598073215718825, "iter_time": 1.7788380126953125, "loss": 0.6263814568519592, "lr": 7.62e-06, "seqlen": 8192.0, "step_tflops": 13.51542886499431, "step_time": 1.5264845619201661} +{"epoch": 0, "iter": 128, "iter_tflops": 18.908348394526467, "iter_time": 1.0911102905273438, "loss": 0.9397559762001038, "lr": 7.680000000000001e-06, "seqlen": 8192.0, "step_tflops": 23.674986364308797, "step_time": 0.8714300060272218} +{"epoch": 0, "iter": 129, "iter_tflops": 21.063533909374954, "iter_time": 0.7155104217529297, "loss": 1.4708449840545654, "lr": 7.74e-06, "seqlen": 6032.0, "step_tflops": 22.63907745882928, "step_time": 0.6657152023315429} +{"epoch": 0, "iter": 130, "iter_tflops": 5.984657775410108, "iter_time": 2.518302398681641, "loss": 1.5921251773834229, "lr": 7.8e-06, "seqlen": 6032.0, "step_tflops": 7.063763693085006, "step_time": 2.1335903472900393} +{"epoch": 0, "iter": 131, "iter_tflops": 7.448079311480974, "iter_time": 2.0234985961914065, "loss": 1.473779559135437, "lr": 7.860000000000001e-06, "seqlen": 6032.0, "step_tflops": 8.985257751933851, "step_time": 1.67732283782959} +{"epoch": 0, "iter": 132, "iter_tflops": 27.54949100547539, "iter_time": 0.5470583114624024, "loss": 1.6875251531600952, "lr": 7.92e-06, "seqlen": 6032.0, "step_tflops": 29.34727861030269, "step_time": 0.5135460166931152} +{"epoch": 0, "iter": 133, "iter_tflops": 18.680506072474405, "iter_time": 0.8002246322631836, "loss": 1.4046069383621216, "lr": 7.98e-06, "seqlen": 5984.0, "step_tflops": 19.654133012829288, "step_time": 0.7605830841064453} +{"epoch": 0, "iter": 134, "iter_tflops": 7.017262263884982, "iter_time": 2.1302611389160155, "loss": 1.3133084774017334, "lr": 8.040000000000001e-06, "seqlen": 5984.0, "step_tflops": 8.131040644455718, "step_time": 1.8384609985351563} +{"epoch": 0, "iter": 135, "iter_tflops": 9.334987150490639, "iter_time": 1.601352081298828, "loss": 1.4561543464660645, "lr": 8.1e-06, "seqlen": 5984.0, "step_tflops": 11.019374115917591, "step_time": 1.3565744247436524} +{"epoch": 0, "iter": 136, "iter_tflops": 16.063140023818672, "iter_time": 0.9306151275634766, "loss": 1.9326118230819702, "lr": 8.160000000000001e-06, "seqlen": 5984.0, "step_tflops": 21.274931449604765, "step_time": 0.7026392135620116} +{"epoch": 0, "iter": 137, "iter_tflops": 20.773373400902933, "iter_time": 0.6724721221923827, "loss": 1.5162853002548218, "lr": 8.220000000000001e-06, "seqlen": 5600.0, "step_tflops": 22.169094287625615, "step_time": 0.630134651184082} +{"epoch": 0, "iter": 138, "iter_tflops": 12.331669150672427, "iter_time": 1.132816192626953, "loss": 1.1627510786056519, "lr": 8.28e-06, "seqlen": 5600.0, "step_tflops": 14.516310321792195, "step_time": 0.9623323135375976} +{"epoch": 0, "iter": 139, "iter_tflops": 21.87879845426675, "iter_time": 0.6384955062866211, "loss": 1.0142537355422974, "lr": 8.340000000000001e-06, "seqlen": 5600.0, "step_tflops": 23.587022509543417, "step_time": 0.5922542572021484} +{"epoch": 0, "iter": 140, "iter_tflops": 20.63268104769274, "iter_time": 0.677057647705078, "loss": 0.998977780342102, "lr": 8.400000000000001e-06, "seqlen": 5600.0, "step_tflops": 22.04971381347814, "step_time": 0.6335462951660156} +{"epoch": 0, "iter": 141, "iter_tflops": 21.808129533603704, "iter_time": 0.9460276489257813, "loss": 1.2105873823165894, "lr": 8.459999999999999e-06, "seqlen": 8192.0, "step_tflops": 24.039015954147942, "step_time": 0.8582336959838868} +{"epoch": 0, "iter": 142, "iter_tflops": 23.560573830816285, "iter_time": 0.8756617584228515, "loss": 1.2517457008361816, "lr": 8.52e-06, "seqlen": 8192.0, "step_tflops": 28.258452656494445, "step_time": 0.7300857467651367} +{"epoch": 0, "iter": 143, "iter_tflops": 42.79459440976975, "iter_time": 0.48209578323364255, "loss": 1.604549527168274, "lr": 8.58e-06, "seqlen": 8192.0, "step_tflops": 46.03345926357248, "step_time": 0.4481760406494141} +{"epoch": 0, "iter": 144, "iter_tflops": 44.97624288622618, "iter_time": 0.45871091461181634, "loss": 1.531215786933899, "lr": 8.64e-06, "seqlen": 8192.0, "step_tflops": 48.55331792289739, "step_time": 0.42491624450683596} +{"epoch": 0, "iter": 145, "iter_tflops": 25.621536295803704, "iter_time": 0.8052246856689452, "loss": 1.5463712215423584, "lr": 8.7e-06, "seqlen": 8192.0, "step_tflops": 26.967201635313177, "step_time": 0.7650439147949217} +{"epoch": 0, "iter": 146, "iter_tflops": 16.43093899239021, "iter_time": 1.2556247406005858, "loss": 1.4296495914459229, "lr": 8.759999999999999e-06, "seqlen": 8192.0, "step_tflops": 19.174218886543823, "step_time": 1.0759809112548828} +{"epoch": 0, "iter": 147, "iter_tflops": 40.82286377612104, "iter_time": 0.5053808479309082, "loss": 1.3284320831298828, "lr": 8.82e-06, "seqlen": 8192.0, "step_tflops": 43.862920336076584, "step_time": 0.4703538513183594} +{"epoch": 0, "iter": 148, "iter_tflops": 42.810543709360495, "iter_time": 0.4819161758422852, "loss": 1.4234822988510132, "lr": 8.88e-06, "seqlen": 8192.0, "step_tflops": 46.02614556419982, "step_time": 0.448247257232666} +{"epoch": 0, "iter": 149, "iter_tflops": 28.726016883122462, "iter_time": 0.42265005874633793, "loss": 0.016766170039772987, "lr": 8.939999999999999e-06, "seqlen": 4880.0, "step_tflops": 31.80663220708342, "step_time": 0.3817145004272461} +{"epoch": 0, "iter": 150, "iter_tflops": 31.042865561192205, "iter_time": 0.3911060562133789, "loss": 0.051529236137866974, "lr": 9e-06, "seqlen": 4880.0, "step_tflops": 34.068719924828414, "step_time": 0.35636950111389165} +{"epoch": 0, "iter": 151, "iter_tflops": 34.60878799653633, "iter_time": 0.3508083763122559, "loss": 0.032757312059402466, "lr": 9.06e-06, "seqlen": 4880.0, "step_tflops": 38.107566192809266, "step_time": 0.3185995311737061} +{"epoch": 0, "iter": 152, "iter_tflops": 35.21628154203342, "iter_time": 0.3447568054199219, "loss": 0.032004740089178085, "lr": 9.12e-06, "seqlen": 4880.0, "step_tflops": 38.781981538091635, "step_time": 0.3130591125488281} +{"epoch": 0, "iter": 153, "iter_tflops": 40.40432658268696, "iter_time": 0.5106159477233887, "loss": 1.3272353410720825, "lr": 9.18e-06, "seqlen": 8192.0, "step_tflops": 44.45929932353935, "step_time": 0.4640445041656494} +{"epoch": 0, "iter": 154, "iter_tflops": 39.946168732251785, "iter_time": 0.5164723968505859, "loss": 1.2855440378189087, "lr": 9.24e-06, "seqlen": 8192.0, "step_tflops": 43.99951013653623, "step_time": 0.4688937091827393} +{"epoch": 0, "iter": 155, "iter_tflops": 48.29365247661794, "iter_time": 0.4272009353637695, "loss": 1.2535490989685059, "lr": 9.3e-06, "seqlen": 8192.0, "step_tflops": 52.56031869162799, "step_time": 0.39252223014831544} +{"epoch": 0, "iter": 156, "iter_tflops": 45.529326894997695, "iter_time": 0.45313855743408205, "loss": 1.3580127954483032, "lr": 9.36e-06, "seqlen": 8192.0, "step_tflops": 49.109560770025325, "step_time": 0.42010340118408207} +{"epoch": 0, "iter": 157, "iter_tflops": 26.165599507596916, "iter_time": 0.7884815902709961, "loss": 0.5071438550949097, "lr": 9.42e-06, "seqlen": 8192.0, "step_tflops": 27.73906600558964, "step_time": 0.7437558822631837} +{"epoch": 0, "iter": 158, "iter_tflops": 16.602098415010097, "iter_time": 1.242679870605469, "loss": 0.5796887874603271, "lr": 9.48e-06, "seqlen": 8192.0, "step_tflops": 18.668611231043617, "step_time": 1.10512202835083} +{"epoch": 0, "iter": 159, "iter_tflops": 41.56364585544461, "iter_time": 0.4963735275268555, "loss": 0.625769853591919, "lr": 9.54e-06, "seqlen": 8192.0, "step_tflops": 45.68702653518517, "step_time": 0.45157444190978996} +{"epoch": 0, "iter": 160, "iter_tflops": 43.76948967076537, "iter_time": 0.47135787200927737, "loss": 0.5935603380203247, "lr": 9.600000000000001e-06, "seqlen": 8192.0, "step_tflops": 48.01499500842207, "step_time": 0.4296802177429199} +{"epoch": 0, "iter": 161, "iter_tflops": 22.615287152060898, "iter_time": 0.9122631683349609, "loss": 0.2490057498216629, "lr": 9.66e-06, "seqlen": 8192.0, "step_tflops": 23.8650307715211, "step_time": 0.8644905471801758} +{"epoch": 0, "iter": 162, "iter_tflops": 18.860196303883452, "iter_time": 1.093896011352539, "loss": 0.3102376163005829, "lr": 9.72e-06, "seqlen": 8192.0, "step_tflops": 22.741970546554477, "step_time": 0.9071814365386962} +{"epoch": 0, "iter": 163, "iter_tflops": 43.600039556355576, "iter_time": 0.4731897888183594, "loss": 0.27336275577545166, "lr": 9.780000000000001e-06, "seqlen": 8192.0, "step_tflops": 48.1230853743768, "step_time": 0.4287151031494141} +{"epoch": 0, "iter": 164, "iter_tflops": 41.211096353396165, "iter_time": 0.5006198654174805, "loss": 0.24339637160301208, "lr": 9.84e-06, "seqlen": 8192.0, "step_tflops": 44.97140759740186, "step_time": 0.4587602348327637} +{"epoch": 0, "iter": 165, "iter_tflops": 25.082418956068427, "iter_time": 0.8225320510864258, "loss": 1.2621097564697266, "lr": 9.9e-06, "seqlen": 8192.0, "step_tflops": 26.658323369531015, "step_time": 0.7739081420898437} +{"epoch": 0, "iter": 166, "iter_tflops": 16.02928996227348, "iter_time": 1.2870871734619143, "loss": 1.4170351028442383, "lr": 9.960000000000001e-06, "seqlen": 8192.0, "step_tflops": 18.873316351985817, "step_time": 1.0931355743408202} +{"epoch": 0, "iter": 167, "iter_tflops": 44.32364010401013, "iter_time": 0.4654647827148437, "loss": 1.1922229528427124, "lr": 1.002e-05, "seqlen": 8192.0, "step_tflops": 48.37110712465304, "step_time": 0.4265168762207031} +{"epoch": 0, "iter": 168, "iter_tflops": 45.058481669957224, "iter_time": 0.45787369537353506, "loss": 1.4661996364593506, "lr": 1.008e-05, "seqlen": 8192.0, "step_tflops": 48.80904152775023, "step_time": 0.4226899948120117} +{"epoch": 0, "iter": 169, "iter_tflops": 25.76530114736325, "iter_time": 0.800731704711914, "loss": 0.943618655204773, "lr": 1.0140000000000001e-05, "seqlen": 8192.0, "step_tflops": 27.25894178229486, "step_time": 0.7568559951782227} +{"epoch": 0, "iter": 170, "iter_tflops": 19.09679581182124, "iter_time": 1.0803432006835938, "loss": 0.8966913223266602, "lr": 1.02e-05, "seqlen": 8192.0, "step_tflops": 25.226918345279238, "step_time": 0.8178206005096436} +{"epoch": 0, "iter": 171, "iter_tflops": 41.61805709778698, "iter_time": 0.4957245712280273, "loss": 0.8642070889472961, "lr": 1.0260000000000002e-05, "seqlen": 8192.0, "step_tflops": 45.43045825380515, "step_time": 0.4541247062683106} +{"epoch": 0, "iter": 172, "iter_tflops": 38.4828230253297, "iter_time": 0.5361117477416991, "loss": 0.8393124938011169, "lr": 1.032e-05, "seqlen": 8192.0, "step_tflops": 41.674746085219034, "step_time": 0.49505025100708} +{"epoch": 0, "iter": 173, "iter_tflops": 35.46573859291717, "iter_time": 0.5817189865112304, "loss": 0.20916251838207245, "lr": 1.0379999999999999e-05, "seqlen": 8192.0, "step_tflops": 39.442111795505056, "step_time": 0.5230727405548096} +{"epoch": 0, "iter": 174, "iter_tflops": 43.649526245797944, "iter_time": 0.47265332031249996, "loss": 0.2284487634897232, "lr": 1.044e-05, "seqlen": 8192.0, "step_tflops": 48.16781602426834, "step_time": 0.4283169803619385} +{"epoch": 0, "iter": 175, "iter_tflops": 43.44129779753891, "iter_time": 0.4749189033508301, "loss": 0.3024371564388275, "lr": 1.05e-05, "seqlen": 8192.0, "step_tflops": 47.93271394444333, "step_time": 0.4304178047180176} +{"epoch": 0, "iter": 176, "iter_tflops": 49.11705582329119, "iter_time": 0.4200392951965332, "loss": 0.26949402689933777, "lr": 1.0559999999999999e-05, "seqlen": 8192.0, "step_tflops": 53.80799430754371, "step_time": 0.38342060089111324} +{"epoch": 0, "iter": 177, "iter_tflops": 24.809388457136837, "iter_time": 0.8315841217041015, "loss": 1.4070398807525635, "lr": 1.062e-05, "seqlen": 8192.0, "step_tflops": 26.33221004966101, "step_time": 0.7834926681518555} +{"epoch": 0, "iter": 178, "iter_tflops": 14.61568874094709, "iter_time": 1.4115717620849608, "loss": 1.473947286605835, "lr": 1.068e-05, "seqlen": 8192.0, "step_tflops": 17.579723902659612, "step_time": 1.1735732383728028} +{"epoch": 0, "iter": 179, "iter_tflops": 38.540829603982374, "iter_time": 0.5353048629760742, "loss": 1.3512235879898071, "lr": 1.074e-05, "seqlen": 8192.0, "step_tflops": 41.881120431895944, "step_time": 0.4926108303070068} +{"epoch": 0, "iter": 180, "iter_tflops": 40.91624663555449, "iter_time": 0.5042274208068847, "loss": 1.3573906421661377, "lr": 1.08e-05, "seqlen": 8192.0, "step_tflops": 44.23770492487341, "step_time": 0.4663689842224121} +{"epoch": 0, "iter": 181, "iter_tflops": 19.757147244136107, "iter_time": 1.0442344360351563, "loss": 1.0650379657745361, "lr": 1.086e-05, "seqlen": 8192.0, "step_tflops": 21.402197261408407, "step_time": 0.9639708137512206} +{"epoch": 0, "iter": 182, "iter_tflops": 16.523177373559932, "iter_time": 1.2486153869628904, "loss": 0.9560708999633789, "lr": 1.092e-05, "seqlen": 8192.0, "step_tflops": 19.89056929372675, "step_time": 1.0372299156188964} +{"epoch": 0, "iter": 183, "iter_tflops": 46.624776577562656, "iter_time": 0.4424920616149902, "loss": 0.9745517373085022, "lr": 1.098e-05, "seqlen": 8192.0, "step_tflops": 50.757474742171794, "step_time": 0.40646414375305173} +{"epoch": 0, "iter": 184, "iter_tflops": 46.492823316158, "iter_time": 0.44374791717529294, "loss": 1.1533089876174927, "lr": 1.104e-05, "seqlen": 8192.0, "step_tflops": 50.04329955329576, "step_time": 0.41226485252380374} +{"epoch": 0, "iter": 185, "iter_tflops": 30.49397852070658, "iter_time": 0.6765628662109375, "loss": 0.9068030714988708, "lr": 1.11e-05, "seqlen": 8192.0, "step_tflops": 32.36721147511176, "step_time": 0.6374071960449219} +{"epoch": 0, "iter": 186, "iter_tflops": 12.083642896227373, "iter_time": 1.7073571014404298, "loss": 0.9107381105422974, "lr": 1.116e-05, "seqlen": 8192.0, "step_tflops": 14.624121158587903, "step_time": 1.4107578353881838} +{"epoch": 0, "iter": 187, "iter_tflops": 37.77584845578831, "iter_time": 0.5461450729370118, "loss": 0.7233052253723145, "lr": 1.1220000000000001e-05, "seqlen": 8192.0, "step_tflops": 41.629706258257166, "step_time": 0.4955858535766602} +{"epoch": 0, "iter": 188, "iter_tflops": 39.139288518171966, "iter_time": 0.5271197891235352, "loss": 0.9600797891616821, "lr": 1.128e-05, "seqlen": 8192.0, "step_tflops": 42.78420613501767, "step_time": 0.4822128391265869} +{"epoch": 0, "iter": 189, "iter_tflops": 25.79541527481838, "iter_time": 0.7997969131469727, "loss": 0.4837993085384369, "lr": 1.134e-05, "seqlen": 8192.0, "step_tflops": 27.799189295752523, "step_time": 0.7421473083496094} +{"epoch": 0, "iter": 190, "iter_tflops": 7.809017970931191, "iter_time": 2.641957489013672, "loss": 0.6589922904968262, "lr": 1.1400000000000001e-05, "seqlen": 8192.0, "step_tflops": 8.573688607666407, "step_time": 2.4063264312744144} +{"epoch": 0, "iter": 191, "iter_tflops": 11.43679109140465, "iter_time": 1.8039232635498046, "loss": 0.4614115059375763, "lr": 1.146e-05, "seqlen": 8192.0, "step_tflops": 15.264250662138874, "step_time": 1.3515955657958985} +{"epoch": 0, "iter": 192, "iter_tflops": 45.865041276523485, "iter_time": 0.4498217582702636, "loss": 0.49152976274490356, "lr": 1.152e-05, "seqlen": 8192.0, "step_tflops": 49.548588751082704, "step_time": 0.41638105201721193} +{"epoch": 0, "iter": 193, "iter_tflops": 24.817397942855152, "iter_time": 0.618814109802246, "loss": 1.0524660348892212, "lr": 1.1580000000000001e-05, "seqlen": 6144.0, "step_tflops": 26.58597582570768, "step_time": 0.5776487617492676} +{"epoch": 0, "iter": 194, "iter_tflops": 12.507608588496929, "iter_time": 1.2278411102294924, "loss": 1.0460803508758545, "lr": 1.164e-05, "seqlen": 6144.0, "step_tflops": 14.779413376731048, "step_time": 1.0391045722961425} +{"epoch": 0, "iter": 195, "iter_tflops": 23.430102691849754, "iter_time": 0.6554540634155273, "loss": 0.9729478359222412, "lr": 1.1700000000000001e-05, "seqlen": 6144.0, "step_tflops": 25.414983203423592, "step_time": 0.6042638664245605} +{"epoch": 0, "iter": 196, "iter_tflops": 22.796131316398753, "iter_time": 0.6736825561523437, "loss": 1.018281102180481, "lr": 1.1760000000000001e-05, "seqlen": 6144.0, "step_tflops": 24.45949228479371, "step_time": 0.6278689613342285} +{"epoch": 0, "iter": 197, "iter_tflops": 22.394282218639255, "iter_time": 0.9212661209106445, "loss": 0.9962446093559265, "lr": 1.182e-05, "seqlen": 8192.0, "step_tflops": 23.909388598377152, "step_time": 0.8628867034912111} +{"epoch": 0, "iter": 198, "iter_tflops": 24.138378206137396, "iter_time": 0.8547008972167969, "loss": 1.124510645866394, "lr": 1.1880000000000001e-05, "seqlen": 8192.0, "step_tflops": 27.18219391898782, "step_time": 0.7589929485321045} +{"epoch": 0, "iter": 199, "iter_tflops": 44.114124153296416, "iter_time": 0.467675464630127, "loss": 1.1367504596710205, "lr": 1.1940000000000001e-05, "seqlen": 8192.0, "step_tflops": 47.45439341154061, "step_time": 0.4347562370300293} +{"epoch": 0, "iter": 200, "iter_tflops": 44.081809154762105, "iter_time": 0.46801830291748037, "loss": 1.2129052877426147, "lr": 1.2e-05, "seqlen": 8192.0, "step_tflops": 47.50630641159278, "step_time": 0.4342811527252197} +{"epoch": 0, "iter": 201, "iter_tflops": 39.97634982648703, "iter_time": 0.5160824737548828, "loss": 0.205076202750206, "lr": 1.2060000000000001e-05, "seqlen": 8192.0, "step_tflops": 43.39447913339242, "step_time": 0.47543129730224615} +{"epoch": 0, "iter": 202, "iter_tflops": 50.385628927121694, "iter_time": 0.40946384811401365, "loss": 0.16341552138328552, "lr": 1.2120000000000001e-05, "seqlen": 8192.0, "step_tflops": 56.30465009268579, "step_time": 0.3664189987182617} +{"epoch": 0, "iter": 203, "iter_tflops": 50.93589187339534, "iter_time": 0.40504039001464837, "loss": 0.23840691149234772, "lr": 1.2180000000000002e-05, "seqlen": 8192.0, "step_tflops": 55.38830643700669, "step_time": 0.37248103141784666} +{"epoch": 0, "iter": 204, "iter_tflops": 51.852435335890114, "iter_time": 0.39788089752197264, "loss": 0.2545650601387024, "lr": 1.224e-05, "seqlen": 8192.0, "step_tflops": 56.3867413141384, "step_time": 0.36588554382324223} +{"epoch": 0, "iter": 205, "iter_tflops": 33.5527949675845, "iter_time": 0.6148844985961914, "loss": 1.5548263788223267, "lr": 1.2299999999999999e-05, "seqlen": 8192.0, "step_tflops": 35.77281887076899, "step_time": 0.5767254066467286} +{"epoch": 0, "iter": 206, "iter_tflops": 15.706087946013044, "iter_time": 1.3135730285644533, "loss": 1.4607678651809692, "lr": 1.236e-05, "seqlen": 8192.0, "step_tflops": 20.220699033122926, "step_time": 1.0202957611083983} +{"epoch": 0, "iter": 207, "iter_tflops": 36.97642696342768, "iter_time": 0.5579525985717774, "loss": 1.2424415349960327, "lr": 1.242e-05, "seqlen": 8192.0, "step_tflops": 40.23048211966338, "step_time": 0.5128224277496338} +{"epoch": 0, "iter": 208, "iter_tflops": 39.683211670898956, "iter_time": 0.5198947525024414, "loss": 1.3333094120025635, "lr": 1.2479999999999999e-05, "seqlen": 8192.0, "step_tflops": 42.4443332003823, "step_time": 0.4860741577148438} +{"epoch": 0, "iter": 209, "iter_tflops": 9.906313364348765, "iter_time": 1.0013051147460936, "loss": 0.14748996496200562, "lr": 1.254e-05, "seqlen": 4000.0, "step_tflops": 10.52291253803268, "step_time": 0.9426327743530274} +{"epoch": 0, "iter": 210, "iter_tflops": 7.281338740192761, "iter_time": 1.362282760620117, "loss": 0.09562236815690994, "lr": 1.26e-05, "seqlen": 4000.0, "step_tflops": 9.462474771543695, "step_time": 1.0482714595794678} +{"epoch": 0, "iter": 211, "iter_tflops": 21.77830538331196, "iter_time": 0.45546437454223637, "loss": 0.12725526094436646, "lr": 1.2659999999999999e-05, "seqlen": 4000.0, "step_tflops": 24.050393607089827, "step_time": 0.412435754776001} +{"epoch": 0, "iter": 212, "iter_tflops": 23.671536580773246, "iter_time": 0.4190366859436035, "loss": 0.09237241744995117, "lr": 1.272e-05, "seqlen": 4000.0, "step_tflops": 26.00110970982233, "step_time": 0.38149303436279297} +{"epoch": 0, "iter": 213, "iter_tflops": 19.466720394936967, "iter_time": 1.0598135223388672, "loss": 1.2751749753952026, "lr": 1.278e-05, "seqlen": 8192.0, "step_tflops": 20.472182008348828, "step_time": 1.0077623138427734} +{"epoch": 0, "iter": 214, "iter_tflops": 14.165751762773514, "iter_time": 1.4564065399169923, "loss": 1.3223717212677002, "lr": 1.284e-05, "seqlen": 8192.0, "step_tflops": 17.040421833457486, "step_time": 1.210714952468872} +{"epoch": 0, "iter": 215, "iter_tflops": 40.098576335870334, "iter_time": 0.5145093765258789, "loss": 1.3537883758544922, "lr": 1.29e-05, "seqlen": 8192.0, "step_tflops": 43.849718168366856, "step_time": 0.47049546432495115} +{"epoch": 0, "iter": 216, "iter_tflops": 38.05510061622278, "iter_time": 0.5421374053955078, "loss": 1.0796077251434326, "lr": 1.296e-05, "seqlen": 8192.0, "step_tflops": 41.27146941685383, "step_time": 0.49988754463195795} +{"epoch": 0, "iter": 217, "iter_tflops": 23.322752939012727, "iter_time": 0.8845908355712891, "loss": 1.4653565883636475, "lr": 1.302e-05, "seqlen": 8192.0, "step_tflops": 24.70738148054878, "step_time": 0.8350174026489257} +{"epoch": 0, "iter": 218, "iter_tflops": 13.612197442929608, "iter_time": 1.515632843017578, "loss": 1.2978672981262207, "lr": 1.308e-05, "seqlen": 8192.0, "step_tflops": 17.067030166522475, "step_time": 1.2088273887634275} +{"epoch": 0, "iter": 219, "iter_tflops": 47.166012905628904, "iter_time": 0.43741440582275387, "loss": 1.4609891176223755, "lr": 1.314e-05, "seqlen": 8192.0, "step_tflops": 51.04301094945835, "step_time": 0.4041903705596924} +{"epoch": 0, "iter": 220, "iter_tflops": 45.14394909566519, "iter_time": 0.4570068397521972, "loss": 1.5547784566879272, "lr": 1.32e-05, "seqlen": 8192.0, "step_tflops": 48.544309556248606, "step_time": 0.4249950962066651} +{"epoch": 0, "iter": 221, "iter_tflops": 42.35606789093782, "iter_time": 0.4870870819091797, "loss": 0.19794310629367828, "lr": 1.326e-05, "seqlen": 8192.0, "step_tflops": 45.70517829414361, "step_time": 0.45139509963989255} +{"epoch": 0, "iter": 222, "iter_tflops": 20.165616579161146, "iter_time": 1.0230827026367189, "loss": 0.21468229591846466, "lr": 1.3320000000000001e-05, "seqlen": 8192.0, "step_tflops": 24.54918558701937, "step_time": 0.840398286819458} +{"epoch": 0, "iter": 223, "iter_tflops": 45.707689057946624, "iter_time": 0.45137030410766604, "loss": 0.17540059983730316, "lr": 1.338e-05, "seqlen": 8192.0, "step_tflops": 50.043375725455114, "step_time": 0.41226422500610355} +{"epoch": 0, "iter": 224, "iter_tflops": 41.6483535463197, "iter_time": 0.49536396408081057, "loss": 0.14954955875873566, "lr": 1.344e-05, "seqlen": 8192.0, "step_tflops": 45.786288700372076, "step_time": 0.450595453262329} +{"epoch": 0, "iter": 225, "iter_tflops": 21.90452941800963, "iter_time": 0.9418642654418945, "loss": 1.209694504737854, "lr": 1.3500000000000001e-05, "seqlen": 8192.0, "step_tflops": 23.456787807460806, "step_time": 0.8795361785888671} +{"epoch": 0, "iter": 226, "iter_tflops": 9.935736052208448, "iter_time": 2.0764534606933593, "loss": 1.2348915338516235, "lr": 1.356e-05, "seqlen": 8192.0, "step_tflops": 12.51378101562854, "step_time": 1.6486698532104491} +{"epoch": 0, "iter": 227, "iter_tflops": 15.191194391281734, "iter_time": 1.358095550537109, "loss": 1.3379225730895996, "lr": 1.362e-05, "seqlen": 8192.0, "step_tflops": 21.396485557824924, "step_time": 0.9642281417846679} +{"epoch": 0, "iter": 228, "iter_tflops": 45.58785124986571, "iter_time": 0.4525568313598633, "loss": 1.2395906448364258, "lr": 1.3680000000000001e-05, "seqlen": 8192.0, "step_tflops": 49.24400772787985, "step_time": 0.4189564266204834} +{"epoch": 0, "iter": 229, "iter_tflops": 25.40398717050487, "iter_time": 0.6029155654907227, "loss": 0.8663157224655151, "lr": 1.374e-05, "seqlen": 6128.0, "step_tflops": 27.205434435136702, "step_time": 0.5629926376342773} +{"epoch": 0, "iter": 230, "iter_tflops": 21.202134177852322, "iter_time": 0.7224017715454102, "loss": 0.7124512791633606, "lr": 1.3800000000000002e-05, "seqlen": 6128.0, "step_tflops": 23.336606141200313, "step_time": 0.6563276252746582} +{"epoch": 0, "iter": 231, "iter_tflops": 24.370491981544447, "iter_time": 0.6284837951660156, "loss": 0.9250337481498718, "lr": 1.3860000000000001e-05, "seqlen": 6128.0, "step_tflops": 26.201972553703555, "step_time": 0.5845536727905274} +{"epoch": 0, "iter": 232, "iter_tflops": 23.989555847290827, "iter_time": 0.6384636459350587, "loss": 0.7177670001983643, "lr": 1.392e-05, "seqlen": 6128.0, "step_tflops": 25.835601475622767, "step_time": 0.5928431472778322} +{"epoch": 0, "iter": 233, "iter_tflops": 25.63100429984718, "iter_time": 0.8049272384643553, "loss": 1.3769655227661133, "lr": 1.3980000000000002e-05, "seqlen": 8192.0, "step_tflops": 27.579057345735002, "step_time": 0.748071018218994} +{"epoch": 0, "iter": 234, "iter_tflops": 8.248336092118636, "iter_time": 2.5012430725097654, "loss": 1.2816873788833618, "lr": 1.4040000000000001e-05, "seqlen": 8192.0, "step_tflops": 9.846450515053823, "step_time": 2.095282302856446} +{"epoch": 0, "iter": 235, "iter_tflops": 11.531452325076836, "iter_time": 1.7891149291992186, "loss": 1.1181061267852783, "lr": 1.4099999999999999e-05, "seqlen": 8192.0, "step_tflops": 14.406012783790134, "step_time": 1.4321168403625486} +{"epoch": 0, "iter": 236, "iter_tflops": 46.870080270382815, "iter_time": 0.4401761932373047, "loss": 1.0505281686782837, "lr": 1.416e-05, "seqlen": 8192.0, "step_tflops": 50.88604814594458, "step_time": 0.40543713378906254} +{"epoch": 0, "iter": 237, "iter_tflops": 24.130077934313007, "iter_time": 0.5772371826171875, "loss": 0.886495053768158, "lr": 1.422e-05, "seqlen": 5584.0, "step_tflops": 25.96223704103511, "step_time": 0.53650146484375} +{"epoch": 0, "iter": 238, "iter_tflops": 23.53200807596001, "iter_time": 0.5919077606201172, "loss": 0.8341838717460632, "lr": 1.428e-05, "seqlen": 5584.0, "step_tflops": 24.944409552634305, "step_time": 0.558392780303955} +{"epoch": 0, "iter": 239, "iter_tflops": 24.999776682153406, "iter_time": 0.5571561050415039, "loss": 0.7421590089797974, "lr": 1.434e-05, "seqlen": 5584.0, "step_tflops": 26.46023525410262, "step_time": 0.5264041709899903} +{"epoch": 0, "iter": 240, "iter_tflops": 25.18944194754307, "iter_time": 0.5529609680175781, "loss": 0.7783908843994141, "lr": 1.44e-05, "seqlen": 5584.0, "step_tflops": 26.7374871821437, "step_time": 0.520945671081543} +{"epoch": 0, "iter": 241, "iter_tflops": 35.64927089471908, "iter_time": 0.578724136352539, "loss": 0.8110135197639465, "lr": 1.446e-05, "seqlen": 8192.0, "step_tflops": 38.21409837955439, "step_time": 0.5398817291259767} +{"epoch": 0, "iter": 242, "iter_tflops": 19.90004617188554, "iter_time": 1.0367359619140624, "loss": 0.7531841993331909, "lr": 1.452e-05, "seqlen": 8192.0, "step_tflops": 22.408872913628176, "step_time": 0.9206662731170655} +{"epoch": 0, "iter": 243, "iter_tflops": 47.05320108531646, "iter_time": 0.43846312332153314, "loss": 0.9010996222496033, "lr": 1.458e-05, "seqlen": 8192.0, "step_tflops": 50.800592161533096, "step_time": 0.4061191539764404} +{"epoch": 0, "iter": 244, "iter_tflops": 51.10976409222442, "iter_time": 0.403662467956543, "loss": 0.8071631789207458, "lr": 1.464e-05, "seqlen": 8192.0, "step_tflops": 55.139670536079656, "step_time": 0.37416062355041496} +{"epoch": 0, "iter": 245, "iter_tflops": 34.1409711137966, "iter_time": 0.3805734100341797, "loss": 0.020747283473610878, "lr": 1.47e-05, "seqlen": 5216.0, "step_tflops": 37.765556207643066, "step_time": 0.3440475158691406} +{"epoch": 0, "iter": 246, "iter_tflops": 26.26935637836838, "iter_time": 0.4946122627258301, "loss": 0.023791303858160973, "lr": 1.4760000000000001e-05, "seqlen": 5216.0, "step_tflops": 31.71146573943319, "step_time": 0.40973021888732913} +{"epoch": 0, "iter": 247, "iter_tflops": 37.64016840280278, "iter_time": 0.3451936149597168, "loss": 0.03346379101276398, "lr": 1.482e-05, "seqlen": 5216.0, "step_tflops": 41.32545236879826, "step_time": 0.31441024971008297} +{"epoch": 0, "iter": 248, "iter_tflops": 37.3988328159197, "iter_time": 0.34742115783691413, "loss": 0.022663259878754616, "lr": 1.488e-05, "seqlen": 5216.0, "step_tflops": 40.83478033936356, "step_time": 0.31818821334838865} +{"epoch": 0, "iter": 249, "iter_tflops": 37.912822891221, "iter_time": 0.5441719169616699, "loss": 1.3723891973495483, "lr": 1.4940000000000001e-05, "seqlen": 8192.0, "step_tflops": 40.6340802805161, "step_time": 0.5077288169860841} +{"epoch": 0, "iter": 250, "iter_tflops": 12.806990208463569, "iter_time": 1.6109244384765624, "loss": 1.2302803993225098, "lr": 1.5e-05, "seqlen": 8192.0, "step_tflops": 16.334572286227818, "step_time": 1.2630323677062987} +{"epoch": 0, "iter": 251, "iter_tflops": 46.44186001205538, "iter_time": 0.44423486709594723, "loss": 1.4404901266098022, "lr": 1.506e-05, "seqlen": 8192.0, "step_tflops": 50.894290707325894, "step_time": 0.4053714714050293} +{"epoch": 0, "iter": 252, "iter_tflops": 49.84460397766082, "iter_time": 0.41390826416015625, "loss": 1.3222739696502686, "lr": 1.5120000000000001e-05, "seqlen": 8192.0, "step_tflops": 53.754066381205305, "step_time": 0.38380526161193845} +{"epoch": 0, "iter": 253, "iter_tflops": 48.367337106621505, "iter_time": 0.4265501213073731, "loss": 0.5225178003311157, "lr": 1.518e-05, "seqlen": 8192.0, "step_tflops": 52.99574013552004, "step_time": 0.3892972049713135} +{"epoch": 0, "iter": 254, "iter_tflops": 43.97117105303109, "iter_time": 0.4691959075927734, "loss": 0.4177249073982239, "lr": 1.524e-05, "seqlen": 8192.0, "step_tflops": 47.80649928452413, "step_time": 0.43155415725708} +{"epoch": 0, "iter": 255, "iter_tflops": 50.67377480615342, "iter_time": 0.40713551712036133, "loss": 0.47233477234840393, "lr": 1.53e-05, "seqlen": 8192.0, "step_tflops": 54.83428836989388, "step_time": 0.3762443923950195} +{"epoch": 0, "iter": 256, "iter_tflops": 49.977060943007466, "iter_time": 0.41281126022338865, "loss": 0.33595728874206543, "lr": 1.5360000000000002e-05, "seqlen": 8192.0, "step_tflops": 54.14961643358826, "step_time": 0.38100165557861326} +{"epoch": 0, "iter": 257, "iter_tflops": 45.585224918176166, "iter_time": 0.45258290481567387, "loss": 0.2246416211128235, "lr": 1.542e-05, "seqlen": 8192.0, "step_tflops": 49.74706534027131, "step_time": 0.41471981048583983} +{"epoch": 0, "iter": 258, "iter_tflops": 9.003696881327716, "iter_time": 2.2914024963378905, "loss": 0.17975591123104095, "lr": 1.548e-05, "seqlen": 8192.0, "step_tflops": 10.69328032319522, "step_time": 1.9293512268066408} +{"epoch": 0, "iter": 259, "iter_tflops": 12.738580643615174, "iter_time": 1.6195755310058595, "loss": 0.18339164555072784, "lr": 1.554e-05, "seqlen": 8192.0, "step_tflops": 14.735248013014576, "step_time": 1.4001185111999512} +{"epoch": 0, "iter": 260, "iter_tflops": 27.663251235431783, "iter_time": 0.7457942428588867, "loss": 0.28510788083076477, "lr": 1.56e-05, "seqlen": 8192.0, "step_tflops": 39.75963620570394, "step_time": 0.518895429611206} +{"epoch": 0, "iter": 261, "iter_tflops": 14.633684266054807, "iter_time": 1.0215199966430664, "loss": 0.8183011412620544, "lr": 1.5660000000000003e-05, "seqlen": 5984.0, "step_tflops": 15.66387700224095, "step_time": 0.9543359603881836} +{"epoch": 0, "iter": 262, "iter_tflops": 13.432263246704514, "iter_time": 1.1128877410888671, "loss": 0.7458704710006714, "lr": 1.5720000000000002e-05, "seqlen": 5984.0, "step_tflops": 16.406899413838936, "step_time": 0.9111167640686035} +{"epoch": 0, "iter": 263, "iter_tflops": 27.11570724710116, "iter_time": 0.5512893676757813, "loss": 0.7235580682754517, "lr": 1.578e-05, "seqlen": 5984.0, "step_tflops": 28.91255299805003, "step_time": 0.5170280570983886} +{"epoch": 0, "iter": 264, "iter_tflops": 27.692901971533345, "iter_time": 0.5397990112304688, "loss": 0.7478819489479065, "lr": 1.584e-05, "seqlen": 5984.0, "step_tflops": 29.45787771163496, "step_time": 0.5074568252563476} +{"epoch": 0, "iter": 265, "iter_tflops": 41.656969374082074, "iter_time": 0.49526150894165044, "loss": 0.2911660671234131, "lr": 1.59e-05, "seqlen": 8192.0, "step_tflops": 45.707445308632494, "step_time": 0.45137271118164063} +{"epoch": 0, "iter": 266, "iter_tflops": 16.92509377173193, "iter_time": 1.218964797973633, "loss": 0.2851850390434265, "lr": 1.596e-05, "seqlen": 8192.0, "step_tflops": 20.641901011195674, "step_time": 0.9994764289855956} +{"epoch": 0, "iter": 267, "iter_tflops": 42.94718068918375, "iter_time": 0.4803829536437988, "loss": 0.35079559683799744, "lr": 1.6020000000000002e-05, "seqlen": 8192.0, "step_tflops": 54.01867061683506, "step_time": 0.38192523574829096} +{"epoch": 0, "iter": 268, "iter_tflops": 52.83695707471121, "iter_time": 0.3904671020507812, "loss": 0.29420387744903564, "lr": 1.6080000000000002e-05, "seqlen": 8192.0, "step_tflops": 57.30822751331571, "step_time": 0.36000229644775394} +{"epoch": 0, "iter": 269, "iter_tflops": 32.017552931465296, "iter_time": 0.6443682174682617, "loss": 0.6026943922042847, "lr": 1.614e-05, "seqlen": 8192.0, "step_tflops": 34.58142869317474, "step_time": 0.596594596862793} +{"epoch": 0, "iter": 270, "iter_tflops": 16.211036707148452, "iter_time": 1.272657257080078, "loss": 0.5819262862205505, "lr": 1.62e-05, "seqlen": 8192.0, "step_tflops": 18.599622369084475, "step_time": 1.1092210960388185} +{"epoch": 0, "iter": 271, "iter_tflops": 38.94153062860522, "iter_time": 0.5297966766357421, "loss": 0.5579386353492737, "lr": 1.626e-05, "seqlen": 8192.0, "step_tflops": 43.21153781603792, "step_time": 0.4774440937042236} +{"epoch": 0, "iter": 272, "iter_tflops": 37.04896103576233, "iter_time": 0.5568602447509766, "loss": 0.486864298582077, "lr": 1.6320000000000003e-05, "seqlen": 8192.0, "step_tflops": 40.36555430319859, "step_time": 0.511106409072876} +{"epoch": 0, "iter": 273, "iter_tflops": 20.107411358857757, "iter_time": 1.026044235229492, "loss": 0.7096089124679565, "lr": 1.6380000000000002e-05, "seqlen": 8192.0, "step_tflops": 21.69596564216687, "step_time": 0.950918426513672} +{"epoch": 0, "iter": 274, "iter_tflops": 21.34295512022723, "iter_time": 0.9666465301513673, "loss": 0.536216676235199, "lr": 1.6440000000000002e-05, "seqlen": 8192.0, "step_tflops": 28.513288585508665, "step_time": 0.7235606460571289} +{"epoch": 0, "iter": 275, "iter_tflops": 49.617759953485255, "iter_time": 0.41580058288574223, "loss": 0.8336670994758606, "lr": 1.65e-05, "seqlen": 8192.0, "step_tflops": 53.75832003819748, "step_time": 0.3837748928070069} +{"epoch": 0, "iter": 276, "iter_tflops": 52.090827251392895, "iter_time": 0.39606000900268556, "loss": 0.6739900708198547, "lr": 1.656e-05, "seqlen": 8192.0, "step_tflops": 56.41998135467839, "step_time": 0.3656699810028076} +{"epoch": 0, "iter": 277, "iter_tflops": 39.24291577144078, "iter_time": 0.5257278442382812, "loss": 0.2565365433692932, "lr": 1.6620000000000004e-05, "seqlen": 8192.0, "step_tflops": 42.27723117617424, "step_time": 0.48799538040161133} +{"epoch": 0, "iter": 278, "iter_tflops": 14.855686837214273, "iter_time": 1.3887673950195314, "loss": 0.285427987575531, "lr": 1.6680000000000003e-05, "seqlen": 8192.0, "step_tflops": 18.89092837160637, "step_time": 1.0921164436340332} +{"epoch": 0, "iter": 279, "iter_tflops": 41.41792324005401, "iter_time": 0.4981199417114257, "loss": 0.3763643801212311, "lr": 1.6740000000000002e-05, "seqlen": 8192.0, "step_tflops": 45.585659289092916, "step_time": 0.45257859230041503} +{"epoch": 0, "iter": 280, "iter_tflops": 49.73800815880957, "iter_time": 0.41479533004760744, "loss": 0.31031933426856995, "lr": 1.6800000000000002e-05, "seqlen": 8192.0, "step_tflops": 54.28744058103175, "step_time": 0.38003437423706055} +{"epoch": 0, "iter": 281, "iter_tflops": 31.1878453124472, "iter_time": 0.6615107040405274, "loss": 0.2924528419971466, "lr": 1.686e-05, "seqlen": 8192.0, "step_tflops": 33.82954728784607, "step_time": 0.6098542594909668} +{"epoch": 0, "iter": 282, "iter_tflops": 12.598648001021228, "iter_time": 1.6375640869140626, "loss": 0.28760474920272827, "lr": 1.6919999999999997e-05, "seqlen": 8192.0, "step_tflops": 13.777221004345893, "step_time": 1.4974785919189455} +{"epoch": 0, "iter": 283, "iter_tflops": 14.846450529388722, "iter_time": 1.389631378173828, "loss": 0.27303850650787354, "lr": 1.698e-05, "seqlen": 8192.0, "step_tflops": 20.50418891720622, "step_time": 1.0061892032623292} +{"epoch": 0, "iter": 284, "iter_tflops": 39.279350586612196, "iter_time": 0.5252401885986329, "loss": 0.3199981153011322, "lr": 1.704e-05, "seqlen": 8192.0, "step_tflops": 43.10236692881876, "step_time": 0.47865337753295895} +{"epoch": 0, "iter": 285, "iter_tflops": 22.71604382763474, "iter_time": 0.6922701416015624, "loss": 0.6290707588195801, "lr": 1.71e-05, "seqlen": 6288.0, "step_tflops": 24.548602739069686, "step_time": 0.6405920143127442} +{"epoch": 0, "iter": 286, "iter_tflops": 24.791698527365927, "iter_time": 0.6343106689453124, "loss": 0.5357489585876465, "lr": 1.716e-05, "seqlen": 6288.0, "step_tflops": 26.655779359505452, "step_time": 0.589952320098877} +{"epoch": 0, "iter": 287, "iter_tflops": 27.09120116989317, "iter_time": 0.5804703445434571, "loss": 0.4836196005344391, "lr": 1.7219999999999998e-05, "seqlen": 6288.0, "step_tflops": 28.985105713383064, "step_time": 0.5425420570373536} +{"epoch": 0, "iter": 288, "iter_tflops": 25.159056652193446, "iter_time": 0.625048828125, "loss": 0.45776426792144775, "lr": 1.728e-05, "seqlen": 6288.0, "step_tflops": 26.998851527443765, "step_time": 0.5824558448791504} +{"epoch": 0, "iter": 289, "iter_tflops": 22.191285356507606, "iter_time": 0.929693489074707, "loss": 0.6594906449317932, "lr": 1.734e-05, "seqlen": 8192.0, "step_tflops": 23.926870295668778, "step_time": 0.8622562522888183} +{"epoch": 0, "iter": 290, "iter_tflops": 16.172515982798547, "iter_time": 1.2756885528564452, "loss": 0.6553142666816711, "lr": 1.74e-05, "seqlen": 8192.0, "step_tflops": 19.619266213227235, "step_time": 1.0515731468200684} +{"epoch": 0, "iter": 291, "iter_tflops": 40.039342394786594, "iter_time": 0.5152705383300782, "loss": 0.7544489502906799, "lr": 1.746e-05, "seqlen": 8192.0, "step_tflops": 42.95463169079651, "step_time": 0.48029962539672855} +{"epoch": 0, "iter": 292, "iter_tflops": 50.28060590787915, "iter_time": 0.4103191108703613, "loss": 0.8351047039031982, "lr": 1.7519999999999998e-05, "seqlen": 8192.0, "step_tflops": 54.64366366530641, "step_time": 0.3775569229125977} +{"epoch": 0, "iter": 293, "iter_tflops": 40.099964182951275, "iter_time": 0.514491569519043, "loss": 1.0363465547561646, "lr": 1.758e-05, "seqlen": 8192.0, "step_tflops": 43.385087901975915, "step_time": 0.4755342102050782} +{"epoch": 0, "iter": 294, "iter_tflops": 14.480321987914994, "iter_time": 1.4247675933837889, "loss": 1.2692655324935913, "lr": 1.764e-05, "seqlen": 8192.0, "step_tflops": 18.425344633113912, "step_time": 1.119712760925293} +{"epoch": 0, "iter": 295, "iter_tflops": 42.55257824240935, "iter_time": 0.4848376846313476, "loss": 0.991767168045044, "lr": 1.77e-05, "seqlen": 8192.0, "step_tflops": 46.417230324042265, "step_time": 0.4444705848693848} +{"epoch": 0, "iter": 296, "iter_tflops": 45.09797838180147, "iter_time": 0.4574726905822754, "loss": 1.0150283575057983, "lr": 1.776e-05, "seqlen": 8192.0, "step_tflops": 49.04337948037138, "step_time": 0.4206703071594238} +{"epoch": 0, "iter": 297, "iter_tflops": 31.662735502472362, "iter_time": 0.6267110748291016, "loss": 0.19226248562335968, "lr": 1.782e-05, "seqlen": 7888.0, "step_tflops": 34.36755819882611, "step_time": 0.57738716506958} +{"epoch": 0, "iter": 298, "iter_tflops": 46.09219271212737, "iter_time": 0.43051514434814453, "loss": 0.16880249977111816, "lr": 1.7879999999999998e-05, "seqlen": 7888.0, "step_tflops": 50.649048137380234, "step_time": 0.3917820320129394} +{"epoch": 0, "iter": 299, "iter_tflops": 52.20305925054289, "iter_time": 0.3801192359924317, "loss": 0.2603312134742737, "lr": 1.794e-05, "seqlen": 7888.0, "step_tflops": 56.86145403535328, "step_time": 0.3489778327941895} +{"epoch": 0, "iter": 300, "iter_tflops": 51.7952491484088, "iter_time": 0.38311210632324216, "loss": 0.14210590720176697, "lr": 1.8e-05, "seqlen": 7888.0, "step_tflops": 55.99728772610707, "step_time": 0.35436335945129394} +{"epoch": 0, "iter": 301, "iter_tflops": 41.1475917683199, "iter_time": 0.5013924903869629, "loss": 0.4336937665939331, "lr": 1.806e-05, "seqlen": 8192.0, "step_tflops": 44.796415817206295, "step_time": 0.4605523262023925} +{"epoch": 0, "iter": 302, "iter_tflops": 40.791465607188556, "iter_time": 0.5057698516845703, "loss": 0.3508971929550171, "lr": 1.812e-05, "seqlen": 8192.0, "step_tflops": 44.815959945328494, "step_time": 0.46035148048400876} +{"epoch": 0, "iter": 303, "iter_tflops": 52.56990694392545, "iter_time": 0.3924506378173829, "loss": 0.46814849972724915, "lr": 1.818e-05, "seqlen": 8192.0, "step_tflops": 56.954545138000526, "step_time": 0.36223787689208986} +{"epoch": 0, "iter": 304, "iter_tflops": 49.7711368560898, "iter_time": 0.4145192337036133, "loss": 0.40198394656181335, "lr": 1.824e-05, "seqlen": 8192.0, "step_tflops": 53.941710825466465, "step_time": 0.38247013664245605} +{"epoch": 0, "iter": 305, "iter_tflops": 25.864760706625454, "iter_time": 0.7976525955200195, "loss": 0.4103003442287445, "lr": 1.83e-05, "seqlen": 8192.0, "step_tflops": 27.12076514583627, "step_time": 0.7607120742797852} +{"epoch": 0, "iter": 306, "iter_tflops": 12.011968942822802, "iter_time": 1.717544692993164, "loss": 0.33307576179504395, "lr": 1.836e-05, "seqlen": 8192.0, "step_tflops": 14.472006946613773, "step_time": 1.425586208343506} +{"epoch": 0, "iter": 307, "iter_tflops": 48.57829105837244, "iter_time": 0.4246978034973145, "loss": 0.4066064953804016, "lr": 1.842e-05, "seqlen": 8192.0, "step_tflops": 52.834157899889576, "step_time": 0.3904877891540527} +{"epoch": 0, "iter": 308, "iter_tflops": 53.741952144161075, "iter_time": 0.3838917770385742, "loss": 0.3153511583805084, "lr": 1.848e-05, "seqlen": 8192.0, "step_tflops": 57.81962725937078, "step_time": 0.3568181686401367} +{"epoch": 0, "iter": 309, "iter_tflops": 31.367867660255115, "iter_time": 0.6577142486572266, "loss": 0.9149670004844666, "lr": 1.854e-05, "seqlen": 8192.0, "step_tflops": 33.378213804557284, "step_time": 0.6181005859375001} +{"epoch": 0, "iter": 310, "iter_tflops": 12.963930529345358, "iter_time": 1.5914227142333985, "loss": 1.0596660375595093, "lr": 1.86e-05, "seqlen": 8192.0, "step_tflops": 16.454611764043598, "step_time": 1.2538183097839357} +{"epoch": 0, "iter": 311, "iter_tflops": 41.41841425032092, "iter_time": 0.4981140365600586, "loss": 1.0023382902145386, "lr": 1.866e-05, "seqlen": 8192.0, "step_tflops": 45.170878761557944, "step_time": 0.45673438453674314} +{"epoch": 0, "iter": 312, "iter_tflops": 42.246487768626864, "iter_time": 0.4883505020141602, "loss": 0.9668651819229126, "lr": 1.872e-05, "seqlen": 8192.0, "step_tflops": 46.04891640646738, "step_time": 0.44802560234069827} +{"epoch": 0, "iter": 313, "iter_tflops": 15.441601773459801, "iter_time": 1.3360721130371092, "loss": 0.5118486285209656, "lr": 1.878e-05, "seqlen": 8192.0, "step_tflops": 16.711066769682507, "step_time": 1.234576690673828} +{"epoch": 0, "iter": 314, "iter_tflops": 16.17286128591101, "iter_time": 1.2756613159179686, "loss": 0.49852505326271057, "lr": 1.884e-05, "seqlen": 8192.0, "step_tflops": 20.60263430601485, "step_time": 1.001381338119507} +{"epoch": 0, "iter": 315, "iter_tflops": 38.637706536343245, "iter_time": 0.5339626846313477, "loss": 0.414790540933609, "lr": 1.8900000000000002e-05, "seqlen": 8192.0, "step_tflops": 42.41656201409744, "step_time": 0.4863924026489257} +{"epoch": 0, "iter": 316, "iter_tflops": 46.59038665445685, "iter_time": 0.44281867980957035, "loss": 0.5071983933448792, "lr": 1.896e-05, "seqlen": 8192.0, "step_tflops": 50.6852479282842, "step_time": 0.4070433578491211} +{"epoch": 0, "iter": 317, "iter_tflops": 32.48541862179323, "iter_time": 0.6350878143310548, "loss": 0.029107335954904556, "lr": 1.902e-05, "seqlen": 8192.0, "step_tflops": 35.18744528063848, "step_time": 0.5863197326660157} +{"epoch": 0, "iter": 318, "iter_tflops": 21.33580217022078, "iter_time": 0.9669706039428712, "loss": 0.007018902339041233, "lr": 1.908e-05, "seqlen": 8192.0, "step_tflops": 26.471662773309188, "step_time": 0.7793652286529541} +{"epoch": 0, "iter": 319, "iter_tflops": 58.09610718364001, "iter_time": 0.3551200675964356, "loss": 0.005693414714187384, "lr": 1.914e-05, "seqlen": 8192.0, "step_tflops": 63.85109779071448, "step_time": 0.32311258888244637} +{"epoch": 0, "iter": 320, "iter_tflops": 59.78895248709039, "iter_time": 0.3450653114318848, "loss": 0.004028920084238052, "lr": 1.9200000000000003e-05, "seqlen": 8192.0, "step_tflops": 65.10039599582142, "step_time": 0.3169119510650635} +{"epoch": 0, "iter": 321, "iter_tflops": 44.708398089042994, "iter_time": 0.46145901870727546, "loss": 1.4451289176940918, "lr": 1.9260000000000002e-05, "seqlen": 8192.0, "step_tflops": 48.44579484160128, "step_time": 0.42585932540893556} +{"epoch": 0, "iter": 322, "iter_tflops": 40.3686987798873, "iter_time": 0.5110665969848633, "loss": 1.1420717239379883, "lr": 1.932e-05, "seqlen": 8192.0, "step_tflops": 43.85776626660853, "step_time": 0.4704091262817382} +{"epoch": 0, "iter": 323, "iter_tflops": 44.01790959155243, "iter_time": 0.46869771194458004, "loss": 1.6257719993591309, "lr": 1.938e-05, "seqlen": 8192.0, "step_tflops": 47.530209041667945, "step_time": 0.4340627555847168} +{"epoch": 0, "iter": 324, "iter_tflops": 47.4689199697557, "iter_time": 0.4346231918334961, "loss": 1.5048267841339111, "lr": 1.944e-05, "seqlen": 8192.0, "step_tflops": 51.320689593663495, "step_time": 0.4020034351348877} +{"epoch": 0, "iter": 325, "iter_tflops": 36.77653153822886, "iter_time": 0.5609852981567384, "loss": 0.6047444939613342, "lr": 1.95e-05, "seqlen": 8192.0, "step_tflops": 39.383317737541006, "step_time": 0.5238536186218262} +{"epoch": 0, "iter": 326, "iter_tflops": 18.544375337976753, "iter_time": 1.112525665283203, "loss": 0.6439633369445801, "lr": 1.9560000000000002e-05, "seqlen": 8192.0, "step_tflops": 22.24205684643658, "step_time": 0.9275712966918945} +{"epoch": 0, "iter": 327, "iter_tflops": 36.0373694728059, "iter_time": 0.5724916610717773, "loss": 0.6579887866973877, "lr": 1.9620000000000002e-05, "seqlen": 8192.0, "step_tflops": 39.48953164504433, "step_time": 0.5224446239471435} +{"epoch": 0, "iter": 328, "iter_tflops": 42.59110273299755, "iter_time": 0.48439913940429685, "loss": 0.7880885601043701, "lr": 1.968e-05, "seqlen": 8192.0, "step_tflops": 46.493883284980335, "step_time": 0.44373780059814455} +{"epoch": 0, "iter": 329, "iter_tflops": 19.960303828791943, "iter_time": 1.033606185913086, "loss": 1.4239540100097656, "lr": 1.974e-05, "seqlen": 8192.0, "step_tflops": 21.19948862497489, "step_time": 0.973188262939453} +{"epoch": 0, "iter": 330, "iter_tflops": 13.636694986303366, "iter_time": 1.5129100952148438, "loss": 1.2921051979064941, "lr": 1.98e-05, "seqlen": 8192.0, "step_tflops": 17.53978237356447, "step_time": 1.1762456951141358} +{"epoch": 0, "iter": 331, "iter_tflops": 41.358160975383484, "iter_time": 0.4988397216796875, "loss": 1.1398863792419434, "lr": 1.9860000000000003e-05, "seqlen": 8192.0, "step_tflops": 45.04655395696753, "step_time": 0.4579949340820313} +{"epoch": 0, "iter": 332, "iter_tflops": 36.38157996981119, "iter_time": 0.5670752487182616, "loss": 1.2478491067886353, "lr": 1.9920000000000002e-05, "seqlen": 8192.0, "step_tflops": 39.38125497259793, "step_time": 0.5238810577392577} +{"epoch": 0, "iter": 333, "iter_tflops": 28.511393709093934, "iter_time": 0.7236087341308594, "loss": 1.0987603664398193, "lr": 1.9980000000000002e-05, "seqlen": 8192.0, "step_tflops": 30.680708218519683, "step_time": 0.6724451522827148} +{"epoch": 0, "iter": 334, "iter_tflops": 44.33207243631609, "iter_time": 0.46537624740600586, "loss": 1.0673834085464478, "lr": 2.004e-05, "seqlen": 8192.0, "step_tflops": 47.88977752911494, "step_time": 0.43080370330810547} +{"epoch": 0, "iter": 335, "iter_tflops": 47.21728148454148, "iter_time": 0.4369394607543945, "loss": 1.4033602476119995, "lr": 2.01e-05, "seqlen": 8192.0, "step_tflops": 50.96122276052636, "step_time": 0.4048390598297119} +{"epoch": 0, "iter": 336, "iter_tflops": 49.977047088203356, "iter_time": 0.4128113746643066, "loss": 1.217431902885437, "lr": 2.016e-05, "seqlen": 8192.0, "step_tflops": 53.619492241539206, "step_time": 0.3847685356140137} +{"epoch": 0, "iter": 337, "iter_tflops": 25.812866066688894, "iter_time": 0.7992562103271483, "loss": 1.5042182207107544, "lr": 2.0220000000000003e-05, "seqlen": 8192.0, "step_tflops": 27.116532371279668, "step_time": 0.7608308181762695} +{"epoch": 0, "iter": 338, "iter_tflops": 22.490624156305685, "iter_time": 0.9173197402954101, "loss": 1.1869813203811646, "lr": 2.0280000000000002e-05, "seqlen": 8192.0, "step_tflops": 27.2900154005615, "step_time": 0.7559942054748535} +{"epoch": 0, "iter": 339, "iter_tflops": 47.738799924794534, "iter_time": 0.4321661529541016, "loss": 1.1149892807006836, "lr": 2.0340000000000002e-05, "seqlen": 8192.0, "step_tflops": 51.12099796135499, "step_time": 0.40357376289367675} +{"epoch": 0, "iter": 340, "iter_tflops": 45.29671305468459, "iter_time": 0.455465576171875, "loss": 1.259817123413086, "lr": 2.04e-05, "seqlen": 8192.0, "step_tflops": 48.54407709699893, "step_time": 0.42499713134765627} +{"epoch": 0, "iter": 341, "iter_tflops": 34.24589516697448, "iter_time": 0.6024398956298828, "loss": 0.28503331542015076, "lr": 2.046e-05, "seqlen": 8192.0, "step_tflops": 36.52739505724324, "step_time": 0.5648115196228027} +{"epoch": 0, "iter": 342, "iter_tflops": 12.210387699962807, "iter_time": 1.6896345977783205, "loss": 0.24231646955013275, "lr": 2.0520000000000003e-05, "seqlen": 8192.0, "step_tflops": 14.891632099938066, "step_time": 1.3854152030944824} +{"epoch": 0, "iter": 343, "iter_tflops": 41.008182017989235, "iter_time": 0.5030970039367675, "loss": 0.2944745719432831, "lr": 2.0580000000000003e-05, "seqlen": 8192.0, "step_tflops": 45.015759977694465, "step_time": 0.458308235168457} +{"epoch": 0, "iter": 344, "iter_tflops": 45.63956259749476, "iter_time": 0.45204406738281244, "loss": 0.22935426235198975, "lr": 2.064e-05, "seqlen": 8192.0, "step_tflops": 49.90703240244192, "step_time": 0.41339050865173343} +{"epoch": 0, "iter": 345, "iter_tflops": 22.08303197067568, "iter_time": 0.9342509460449219, "loss": 0.3062193691730499, "lr": 2.07e-05, "seqlen": 8192.0, "step_tflops": 23.39792531212762, "step_time": 0.8817488403320313} +{"epoch": 0, "iter": 346, "iter_tflops": 16.024550829349224, "iter_time": 1.287467819213867, "loss": 0.21499793231487274, "lr": 2.0759999999999998e-05, "seqlen": 8192.0, "step_tflops": 21.09076381267471, "step_time": 0.9782051372528076} +{"epoch": 0, "iter": 347, "iter_tflops": 49.33636608707736, "iter_time": 0.4181721343994141, "loss": 0.3325015604496002, "lr": 2.082e-05, "seqlen": 8192.0, "step_tflops": 53.64294915399322, "step_time": 0.38460028457641604} +{"epoch": 0, "iter": 348, "iter_tflops": 57.187018047474545, "iter_time": 0.36076533126831056, "loss": 0.2118881642818451, "lr": 2.088e-05, "seqlen": 8192.0, "step_tflops": 61.924549423128944, "step_time": 0.3331650161743164} +{"epoch": 0, "iter": 349, "iter_tflops": 28.147360737229338, "iter_time": 0.7329672470092773, "loss": 1.0224055051803589, "lr": 2.094e-05, "seqlen": 8192.0, "step_tflops": 29.730616495900048, "step_time": 0.6939342651367187} +{"epoch": 0, "iter": 350, "iter_tflops": 15.444281038268109, "iter_time": 1.33584033203125, "loss": 1.2349789142608643, "lr": 2.1e-05, "seqlen": 8192.0, "step_tflops": 18.746027953502605, "step_time": 1.100558132171631} +{"epoch": 0, "iter": 351, "iter_tflops": 46.87895720276217, "iter_time": 0.4400928421020508, "loss": 1.0482258796691895, "lr": 2.1059999999999998e-05, "seqlen": 8192.0, "step_tflops": 50.63545974997532, "step_time": 0.4074435901641846} +{"epoch": 0, "iter": 352, "iter_tflops": 45.27561168716297, "iter_time": 0.4556778526306152, "loss": 1.2188000679016113, "lr": 2.1119999999999998e-05, "seqlen": 8192.0, "step_tflops": 48.40773606825519, "step_time": 0.42619414138793943} +{"epoch": 0, "iter": 353, "iter_tflops": 36.75099058848766, "iter_time": 0.5613751678466797, "loss": 0.7759236693382263, "lr": 2.118e-05, "seqlen": 8192.0, "step_tflops": 39.625310872457774, "step_time": 0.5206544265747071} +{"epoch": 0, "iter": 354, "iter_tflops": 14.431862335891871, "iter_time": 1.429551712036133, "loss": 0.6498499512672424, "lr": 2.124e-05, "seqlen": 8192.0, "step_tflops": 17.202811037495202, "step_time": 1.1992861785888673} +{"epoch": 0, "iter": 355, "iter_tflops": 44.20781824907056, "iter_time": 0.4666842727661133, "loss": 0.7866199612617493, "lr": 2.13e-05, "seqlen": 8192.0, "step_tflops": 48.81780956143858, "step_time": 0.42261407661437983} +{"epoch": 0, "iter": 356, "iter_tflops": 48.27603503416206, "iter_time": 0.42735683441162103, "loss": 0.6227957606315613, "lr": 2.136e-05, "seqlen": 8192.0, "step_tflops": 52.30082241237227, "step_time": 0.39446977233886715} +{"epoch": 0, "iter": 357, "iter_tflops": 45.560370642555945, "iter_time": 0.45282979965209963, "loss": 0.5227723121643066, "lr": 2.1419999999999998e-05, "seqlen": 8192.0, "step_tflops": 49.524113850915484, "step_time": 0.4165868282318116} +{"epoch": 0, "iter": 358, "iter_tflops": 8.572657883974983, "iter_time": 2.406615753173828, "loss": 0.6491134166717529, "lr": 2.148e-05, "seqlen": 8192.0, "step_tflops": 11.307035586791804, "step_time": 1.8246244430541996} +{"epoch": 0, "iter": 359, "iter_tflops": 10.736653130416656, "iter_time": 1.9215572357177735, "loss": 0.5308497548103333, "lr": 2.154e-05, "seqlen": 8192.0, "step_tflops": 13.537183181229155, "step_time": 1.524031494140625} +{"epoch": 0, "iter": 360, "iter_tflops": 25.730441792417412, "iter_time": 0.8018165283203125, "loss": 0.6293942332267761, "lr": 2.16e-05, "seqlen": 8192.0, "step_tflops": 31.665187373784903, "step_time": 0.651538652420044} +{"epoch": 0, "iter": 361, "iter_tflops": 19.688201201942125, "iter_time": 0.7800283966064453, "loss": 0.5235227346420288, "lr": 2.166e-05, "seqlen": 6144.0, "step_tflops": 21.26828517311251, "step_time": 0.7220777740478515} +{"epoch": 0, "iter": 362, "iter_tflops": 24.111176111692767, "iter_time": 0.6369393157958985, "loss": 0.513312041759491, "lr": 2.172e-05, "seqlen": 6144.0, "step_tflops": 25.952207857828764, "step_time": 0.5917552795410157} +{"epoch": 0, "iter": 363, "iter_tflops": 23.417351373538175, "iter_time": 0.6558109741210938, "loss": 0.523526132106781, "lr": 2.178e-05, "seqlen": 6144.0, "step_tflops": 25.244259502957874, "step_time": 0.6083504257202149} +{"epoch": 0, "iter": 364, "iter_tflops": 23.99024457370084, "iter_time": 0.6401500396728516, "loss": 0.7531177997589111, "lr": 2.184e-05, "seqlen": 6144.0, "step_tflops": 25.718445774734032, "step_time": 0.5971339073181152} +{"epoch": 0, "iter": 365, "iter_tflops": 16.04714050720821, "iter_time": 1.2856554412841799, "loss": 0.982761561870575, "lr": 2.19e-05, "seqlen": 8192.0, "step_tflops": 17.00404267865865, "step_time": 1.213305206298828} +{"epoch": 0, "iter": 366, "iter_tflops": 17.247420252816752, "iter_time": 1.1961843109130859, "loss": 1.186174988746643, "lr": 2.196e-05, "seqlen": 8192.0, "step_tflops": 22.688034889210044, "step_time": 0.9093380546569825} +{"epoch": 0, "iter": 367, "iter_tflops": 41.376296350270856, "iter_time": 0.49862107849121096, "loss": 0.9875729084014893, "lr": 2.202e-05, "seqlen": 8192.0, "step_tflops": 44.72666903714459, "step_time": 0.4612705116271973} +{"epoch": 0, "iter": 368, "iter_tflops": 41.82477608100989, "iter_time": 0.4932744522094727, "loss": 1.0213961601257324, "lr": 2.208e-05, "seqlen": 8192.0, "step_tflops": 45.19442300893213, "step_time": 0.456496446609497} +{"epoch": 0, "iter": 369, "iter_tflops": 33.580358282753174, "iter_time": 0.5367948837280274, "loss": 0.2154073417186737, "lr": 2.214e-05, "seqlen": 7184.0, "step_tflops": 37.024056825502115, "step_time": 0.4868662719726563} +{"epoch": 0, "iter": 370, "iter_tflops": 41.819334167193475, "iter_time": 0.4310390129089355, "loss": 0.19834020733833313, "lr": 2.22e-05, "seqlen": 7184.0, "step_tflops": 45.80661092716422, "step_time": 0.39351884269714354} +{"epoch": 0, "iter": 371, "iter_tflops": 41.72169640333837, "iter_time": 0.43204773712158207, "loss": 0.2576339840888977, "lr": 2.226e-05, "seqlen": 7184.0, "step_tflops": 45.44567154836633, "step_time": 0.3966442546844482} +{"epoch": 0, "iter": 372, "iter_tflops": 43.970806400099654, "iter_time": 0.40994846343994135, "loss": 0.18353460729122162, "lr": 2.232e-05, "seqlen": 7184.0, "step_tflops": 47.67833032678243, "step_time": 0.3780703811645508} +{"epoch": 0, "iter": 373, "iter_tflops": 45.391107953268964, "iter_time": 0.4545183944702148, "loss": 0.9442480206489563, "lr": 2.238e-05, "seqlen": 8192.0, "step_tflops": 49.415677026093384, "step_time": 0.4175009784698486} +{"epoch": 0, "iter": 374, "iter_tflops": 42.98973499173137, "iter_time": 0.4799074363708496, "loss": 0.7205535769462585, "lr": 2.2440000000000002e-05, "seqlen": 8192.0, "step_tflops": 46.76359733061408, "step_time": 0.44117849540710447} +{"epoch": 0, "iter": 375, "iter_tflops": 50.58511446500369, "iter_time": 0.4078491020202637, "loss": 0.9729657769203186, "lr": 2.25e-05, "seqlen": 8192.0, "step_tflops": 54.680854927290696, "step_time": 0.37730012702941895} +{"epoch": 0, "iter": 376, "iter_tflops": 45.452824989706826, "iter_time": 0.45390123748779293, "loss": 0.860843300819397, "lr": 2.256e-05, "seqlen": 8192.0, "step_tflops": 48.9798660198817, "step_time": 0.4212158012390137} +{"epoch": 0, "iter": 377, "iter_tflops": 40.775982150782184, "iter_time": 0.5059619026184082, "loss": 1.0317611694335938, "lr": 2.262e-05, "seqlen": 8192.0, "step_tflops": 44.15113346667035, "step_time": 0.4672834396362304} +{"epoch": 0, "iter": 378, "iter_tflops": 41.14843829821044, "iter_time": 0.5013821754455566, "loss": 1.0553710460662842, "lr": 2.268e-05, "seqlen": 8192.0, "step_tflops": 44.199480795461184, "step_time": 0.46677230453491214} +{"epoch": 0, "iter": 379, "iter_tflops": 44.76027830853345, "iter_time": 0.4609241561889649, "loss": 1.2050749063491821, "lr": 2.274e-05, "seqlen": 8192.0, "step_tflops": 48.16377459154467, "step_time": 0.42835292053222657} +{"epoch": 0, "iter": 380, "iter_tflops": 44.37738411052523, "iter_time": 0.46490107345581055, "loss": 0.925665557384491, "lr": 2.2800000000000002e-05, "seqlen": 8192.0, "step_tflops": 47.7400287200434, "step_time": 0.432155029296875} +{"epoch": 0, "iter": 381, "iter_tflops": 33.20542694351993, "iter_time": 0.6213169174194336, "loss": 1.2240668535232544, "lr": 2.286e-05, "seqlen": 8192.0, "step_tflops": 35.365662446879114, "step_time": 0.5833651084899902} +{"epoch": 0, "iter": 382, "iter_tflops": 9.592460724908797, "iter_time": 2.1507613220214843, "loss": 0.8008397817611694, "lr": 2.292e-05, "seqlen": 8192.0, "step_tflops": 12.339263999896891, "step_time": 1.6719873657226563} +{"epoch": 0, "iter": 383, "iter_tflops": 9.796731804111683, "iter_time": 2.1059159240722654, "loss": 0.9344398975372314, "lr": 2.298e-05, "seqlen": 8192.0, "step_tflops": 11.7115089007424, "step_time": 1.7616084899902344} +{"epoch": 0, "iter": 384, "iter_tflops": 24.152877389330737, "iter_time": 0.8541878128051759, "loss": 0.9990040063858032, "lr": 2.304e-05, "seqlen": 8192.0, "step_tflops": 30.081449823904332, "step_time": 0.6858410625457765} +{"epoch": 0, "iter": 385, "iter_tflops": 17.894469476633283, "iter_time": 0.92690380859375, "loss": 0.44981327652931213, "lr": 2.3100000000000002e-05, "seqlen": 6624.0, "step_tflops": 19.002808350772476, "step_time": 0.872842140197754} +{"epoch": 0, "iter": 386, "iter_tflops": 22.797019862963484, "iter_time": 0.7275710601806641, "loss": 0.5439385175704956, "lr": 2.3160000000000002e-05, "seqlen": 6624.0, "step_tflops": 24.912110108359933, "step_time": 0.6657987556457519} +{"epoch": 0, "iter": 387, "iter_tflops": 28.344208640005558, "iter_time": 0.5851795730590821, "loss": 0.6648163199424744, "lr": 2.322e-05, "seqlen": 6624.0, "step_tflops": 30.35342682217195, "step_time": 0.5464441299438476} +{"epoch": 0, "iter": 388, "iter_tflops": 30.074827200751855, "iter_time": 0.5515061416625977, "loss": 0.46196165680885315, "lr": 2.328e-05, "seqlen": 6624.0, "step_tflops": 31.944816085330498, "step_time": 0.5192220191955567} +{"epoch": 0, "iter": 389, "iter_tflops": 36.935475292560845, "iter_time": 0.5585712203979492, "loss": 0.9874288439750671, "lr": 2.334e-05, "seqlen": 8192.0, "step_tflops": 39.4778293142864, "step_time": 0.5225994911193849} +{"epoch": 0, "iter": 390, "iter_tflops": 32.48681227026489, "iter_time": 0.6350605697631837, "loss": 1.2185848951339722, "lr": 2.3400000000000003e-05, "seqlen": 8192.0, "step_tflops": 39.74159556554491, "step_time": 0.5191309814453124} +{"epoch": 0, "iter": 391, "iter_tflops": 39.14751740929515, "iter_time": 0.5270089874267578, "loss": 1.0750616788864136, "lr": 2.3460000000000002e-05, "seqlen": 8192.0, "step_tflops": 42.78881372149759, "step_time": 0.4821609134674073} +{"epoch": 0, "iter": 392, "iter_tflops": 36.684861125786014, "iter_time": 0.5623871231079102, "loss": 0.9271309971809387, "lr": 2.3520000000000002e-05, "seqlen": 8192.0, "step_tflops": 39.80380552260915, "step_time": 0.5183196239471435} +{"epoch": 0, "iter": 393, "iter_tflops": 26.69699805294734, "iter_time": 0.7727870178222657, "loss": 1.1403743028640747, "lr": 2.358e-05, "seqlen": 8192.0, "step_tflops": 28.497848607841604, "step_time": 0.7239526672363281} +{"epoch": 0, "iter": 394, "iter_tflops": 11.622322509039954, "iter_time": 1.7751265716552735, "loss": 1.0655168294906616, "lr": 2.364e-05, "seqlen": 8192.0, "step_tflops": 14.166253645044195, "step_time": 1.4563549423217774} +{"epoch": 0, "iter": 395, "iter_tflops": 11.086224415576236, "iter_time": 1.8609666137695313, "loss": 1.1095210313796997, "lr": 2.37e-05, "seqlen": 8192.0, "step_tflops": 13.607813402363616, "step_time": 1.51612113571167} +{"epoch": 0, "iter": 396, "iter_tflops": 27.28528100728758, "iter_time": 0.7561253814697266, "loss": 1.2023180723190308, "lr": 2.3760000000000003e-05, "seqlen": 8192.0, "step_tflops": 31.957597788385055, "step_time": 0.64557710647583} +{"epoch": 0, "iter": 397, "iter_tflops": 15.83299785846969, "iter_time": 0.9286664962768555, "loss": 0.6678003072738647, "lr": 2.3820000000000002e-05, "seqlen": 5888.0, "step_tflops": 16.698584206544123, "step_time": 0.880528221130371} +{"epoch": 0, "iter": 398, "iter_tflops": 10.947167703648685, "iter_time": 1.343139617919922, "loss": 0.48667261004447937, "lr": 2.3880000000000002e-05, "seqlen": 5888.0, "step_tflops": 12.852617882871193, "step_time": 1.144013988494873} +{"epoch": 0, "iter": 399, "iter_tflops": 24.374389745790925, "iter_time": 0.6032386779785156, "loss": 0.5780973434448242, "lr": 2.394e-05, "seqlen": 5888.0, "step_tflops": 26.159542803269566, "step_time": 0.5620730743408202} +{"epoch": 0, "iter": 400, "iter_tflops": 23.995893961784798, "iter_time": 0.6127537765502928, "loss": 0.72230064868927, "lr": 2.4e-05, "seqlen": 5888.0, "step_tflops": 25.66195177418449, "step_time": 0.5729717979431153} +{"epoch": 0, "iter": 401, "iter_tflops": 28.005408419342245, "iter_time": 0.736682472229004, "loss": 0.22301694750785828, "lr": 2.4060000000000003e-05, "seqlen": 8192.0, "step_tflops": 30.23419564880876, "step_time": 0.6823761329650879} +{"epoch": 0, "iter": 402, "iter_tflops": 41.15047180282367, "iter_time": 0.5013573989868165, "loss": 0.10567642748355865, "lr": 2.4120000000000003e-05, "seqlen": 8192.0, "step_tflops": 45.578306069601204, "step_time": 0.4526516075134278} +{"epoch": 0, "iter": 403, "iter_tflops": 40.568815049600985, "iter_time": 0.5085456275939941, "loss": 0.19320963323116302, "lr": 2.4180000000000002e-05, "seqlen": 8192.0, "step_tflops": 44.76217914101396, "step_time": 0.4609045829772949} +{"epoch": 0, "iter": 404, "iter_tflops": 43.90915227065227, "iter_time": 0.46985861587524413, "loss": 0.20077930390834808, "lr": 2.4240000000000002e-05, "seqlen": 8192.0, "step_tflops": 47.97529354500182, "step_time": 0.430035795211792} +{"epoch": 0, "iter": 405, "iter_tflops": 21.16398472909001, "iter_time": 0.9748208465576172, "loss": 1.1457762718200684, "lr": 2.43e-05, "seqlen": 8192.0, "step_tflops": 22.55120512782306, "step_time": 0.9148554763793945} +{"epoch": 0, "iter": 406, "iter_tflops": 15.14542076769893, "iter_time": 1.3622000885009764, "loss": 1.1143778562545776, "lr": 2.4360000000000004e-05, "seqlen": 8192.0, "step_tflops": 17.101420370084455, "step_time": 1.2063964900970459} +{"epoch": 0, "iter": 407, "iter_tflops": 43.934719317024104, "iter_time": 0.46958518981933595, "loss": 1.1129316091537476, "lr": 2.442e-05, "seqlen": 8192.0, "step_tflops": 47.15639681489257, "step_time": 0.43750360298156743} +{"epoch": 0, "iter": 408, "iter_tflops": 46.18855050739894, "iter_time": 0.44667116165161136, "loss": 1.139170527458191, "lr": 2.448e-05, "seqlen": 8192.0, "step_tflops": 49.29503600977176, "step_time": 0.41852273941040036} +{"epoch": 0, "iter": 409, "iter_tflops": 36.340315584777386, "iter_time": 0.5677191619873048, "loss": 0.9586703777313232, "lr": 2.454e-05, "seqlen": 8192.0, "step_tflops": 38.960838659510785, "step_time": 0.529534122467041} +{"epoch": 0, "iter": 410, "iter_tflops": 11.659585272200223, "iter_time": 1.7694534606933594, "loss": 0.7441532611846924, "lr": 2.4599999999999998e-05, "seqlen": 8192.0, "step_tflops": 16.23009272109731, "step_time": 1.2711630096435547} +{"epoch": 0, "iter": 411, "iter_tflops": 10.911798733359758, "iter_time": 1.8907142639160157, "loss": 0.8449580669403076, "lr": 2.4659999999999998e-05, "seqlen": 8192.0, "step_tflops": 12.613517449040778, "step_time": 1.635633644104004} +{"epoch": 0, "iter": 412, "iter_tflops": 19.67871699443049, "iter_time": 1.048396270751953, "loss": 0.7802624106407166, "lr": 2.472e-05, "seqlen": 8192.0, "step_tflops": 26.605633951662917, "step_time": 0.7754407787322999} +{"epoch": 0, "iter": 413, "iter_tflops": 17.493861736835623, "iter_time": 0.8475010757446289, "loss": 0.5163106322288513, "lr": 2.478e-05, "seqlen": 5936.0, "step_tflops": 18.373488136175745, "step_time": 0.8069271621704102} +{"epoch": 0, "iter": 414, "iter_tflops": 8.594565569924601, "iter_time": 1.725051315307617, "loss": 0.7632628679275513, "lr": 2.484e-05, "seqlen": 5936.0, "step_tflops": 10.533697330153073, "step_time": 1.4074893341064454} +{"epoch": 0, "iter": 415, "iter_tflops": 13.787956351835536, "iter_time": 1.0752910919189453, "loss": 0.4809132516384125, "lr": 2.49e-05, "seqlen": 5936.0, "step_tflops": 16.17156148125236, "step_time": 0.9167987060546875} +{"epoch": 0, "iter": 416, "iter_tflops": 22.06045875180365, "iter_time": 0.6720652008056641, "loss": 0.4551543891429901, "lr": 2.4959999999999998e-05, "seqlen": 5936.0, "step_tflops": 23.561382233639964, "step_time": 0.6292528381347656} +{"epoch": 0, "iter": 417, "iter_tflops": 14.21900060810915, "iter_time": 1.0398221130371095, "loss": 0.5870473384857178, "lr": 2.502e-05, "seqlen": 5920.0, "step_tflops": 15.150855102429055, "step_time": 0.9758677749633791} +{"epoch": 0, "iter": 418, "iter_tflops": 12.211683136081765, "iter_time": 1.2107447509765623, "loss": 0.4146134853363037, "lr": 2.508e-05, "seqlen": 5920.0, "step_tflops": 16.090007601484963, "step_time": 0.9189076614379883} +{"epoch": 0, "iter": 419, "iter_tflops": 26.185899096161208, "iter_time": 0.5646256866455077, "loss": 0.45380234718322754, "lr": 2.514e-05, "seqlen": 5920.0, "step_tflops": 27.96320926262968, "step_time": 0.5287387123107911} +{"epoch": 0, "iter": 420, "iter_tflops": 25.816963720765592, "iter_time": 0.5726944274902344, "loss": 0.46986913681030273, "lr": 2.52e-05, "seqlen": 5920.0, "step_tflops": 27.41618348049843, "step_time": 0.5392884559631348} +{"epoch": 0, "iter": 421, "iter_tflops": 39.78108343015323, "iter_time": 0.5186156768798829, "loss": 1.1855525970458984, "lr": 2.526e-05, "seqlen": 8192.0, "step_tflops": 43.17372345552128, "step_time": 0.4778622703552246} +{"epoch": 0, "iter": 422, "iter_tflops": 47.13292773788149, "iter_time": 0.43772145080566405, "loss": 1.0259474515914917, "lr": 2.5319999999999998e-05, "seqlen": 8192.0, "step_tflops": 51.020468371292665, "step_time": 0.40436895561218256} +{"epoch": 0, "iter": 423, "iter_tflops": 49.18855674212521, "iter_time": 0.4194287223815918, "loss": 1.380772352218628, "lr": 2.538e-05, "seqlen": 8192.0, "step_tflops": 53.28439722997091, "step_time": 0.387188268661499} +{"epoch": 0, "iter": 424, "iter_tflops": 43.61914783319267, "iter_time": 0.47298249816894533, "loss": 1.4340070486068726, "lr": 2.544e-05, "seqlen": 8192.0, "step_tflops": 47.28139705079915, "step_time": 0.4363469524383545} +{"epoch": 0, "iter": 425, "iter_tflops": 47.59960367138622, "iter_time": 0.43342994308471683, "loss": 0.01091308705508709, "lr": 2.55e-05, "seqlen": 8192.0, "step_tflops": 51.99568831370872, "step_time": 0.39678469848632814} +{"epoch": 0, "iter": 426, "iter_tflops": 14.1755475238774, "iter_time": 1.4554001159667969, "loss": 0.044086895883083344, "lr": 2.556e-05, "seqlen": 8192.0, "step_tflops": 20.46674092106642, "step_time": 1.0080302276611328} +{"epoch": 0, "iter": 427, "iter_tflops": 37.83317033046168, "iter_time": 0.5453175964355469, "loss": 0.05165230855345726, "lr": 2.562e-05, "seqlen": 8192.0, "step_tflops": 43.18625473296682, "step_time": 0.47772360992431645} +{"epoch": 0, "iter": 428, "iter_tflops": 51.185451233355415, "iter_time": 0.4030655784606933, "loss": 0.02556353434920311, "lr": 2.568e-05, "seqlen": 8192.0, "step_tflops": 56.66692487780539, "step_time": 0.3640764617919922} +{"epoch": 0, "iter": 429, "iter_tflops": 23.515964185667258, "iter_time": 0.877322883605957, "loss": 0.33217108249664307, "lr": 2.574e-05, "seqlen": 8192.0, "step_tflops": 24.840376542224224, "step_time": 0.8305467300415039} +{"epoch": 0, "iter": 430, "iter_tflops": 15.508297809113484, "iter_time": 1.3303261108398436, "loss": 0.3181961178779602, "lr": 2.58e-05, "seqlen": 8192.0, "step_tflops": 21.65799275696462, "step_time": 0.9525856685638429} +{"epoch": 0, "iter": 431, "iter_tflops": 40.63442984549943, "iter_time": 0.5077244491577149, "loss": 0.3285542130470276, "lr": 2.586e-05, "seqlen": 8192.0, "step_tflops": 44.49601590786646, "step_time": 0.4636615905761719} +{"epoch": 0, "iter": 432, "iter_tflops": 44.23332232627705, "iter_time": 0.4664151916503906, "loss": 0.3684915006160736, "lr": 2.592e-05, "seqlen": 8192.0, "step_tflops": 48.11530241965282, "step_time": 0.42878445053100583} +{"epoch": 0, "iter": 433, "iter_tflops": 20.854580011760365, "iter_time": 0.989283576965332, "loss": 0.4451562464237213, "lr": 2.5980000000000002e-05, "seqlen": 8192.0, "step_tflops": 22.16637762905022, "step_time": 0.9307381591796875} +{"epoch": 0, "iter": 434, "iter_tflops": 23.958286256630565, "iter_time": 0.8611255950927734, "loss": 0.47812119126319885, "lr": 2.604e-05, "seqlen": 8192.0, "step_tflops": 29.303516853519216, "step_time": 0.7040483779907226} +{"epoch": 0, "iter": 435, "iter_tflops": 43.31974397998379, "iter_time": 0.4762515106201172, "loss": 0.4845077693462372, "lr": 2.61e-05, "seqlen": 8192.0, "step_tflops": 47.36682096988972, "step_time": 0.43556002044677733} +{"epoch": 0, "iter": 436, "iter_tflops": 46.89834660271284, "iter_time": 0.4399108924865722, "loss": 0.6194814443588257, "lr": 2.616e-05, "seqlen": 8192.0, "step_tflops": 50.8975814383573, "step_time": 0.4053452625274658} +{"epoch": 0, "iter": 437, "iter_tflops": 28.925085169285786, "iter_time": 0.7132595596313478, "loss": 0.4842802882194519, "lr": 2.622e-05, "seqlen": 8192.0, "step_tflops": 30.98722583105817, "step_time": 0.66579349899292} +{"epoch": 0, "iter": 438, "iter_tflops": 19.94961136440234, "iter_time": 1.034160171508789, "loss": 0.6207271218299866, "lr": 2.628e-05, "seqlen": 8192.0, "step_tflops": 22.532075674523092, "step_time": 0.9156321773529053} +{"epoch": 0, "iter": 439, "iter_tflops": 42.120668798987545, "iter_time": 0.48980925750732424, "loss": 0.5245529413223267, "lr": 2.6340000000000002e-05, "seqlen": 8192.0, "step_tflops": 45.81940724623248, "step_time": 0.450269760131836} +{"epoch": 0, "iter": 440, "iter_tflops": 41.96250092445685, "iter_time": 0.4916554794311524, "loss": 0.5759292840957642, "lr": 2.64e-05, "seqlen": 8192.0, "step_tflops": 45.41802844907359, "step_time": 0.4542489891052246} +{"epoch": 0, "iter": 441, "iter_tflops": 15.994260955739861, "iter_time": 1.289906021118164, "loss": 1.1353482007980347, "lr": 2.646e-05, "seqlen": 8192.0, "step_tflops": 16.929089754423064, "step_time": 1.2186770706176757} +{"epoch": 0, "iter": 442, "iter_tflops": 19.997429242920724, "iter_time": 1.031687286376953, "loss": 1.145229458808899, "lr": 2.652e-05, "seqlen": 8192.0, "step_tflops": 27.17190476740543, "step_time": 0.7592803554534913} +{"epoch": 0, "iter": 443, "iter_tflops": 38.836270628767785, "iter_time": 0.5312326126098633, "loss": 1.2709718942642212, "lr": 2.658e-05, "seqlen": 8192.0, "step_tflops": 42.04998975420682, "step_time": 0.49063254547119134} +{"epoch": 0, "iter": 444, "iter_tflops": 36.68253317260339, "iter_time": 0.5624228134155274, "loss": 1.0138986110687256, "lr": 2.6640000000000002e-05, "seqlen": 8192.0, "step_tflops": 39.627844403154775, "step_time": 0.5206211395263671} +{"epoch": 0, "iter": 445, "iter_tflops": 37.952310198836344, "iter_time": 0.5436057357788086, "loss": 1.3762438297271729, "lr": 2.6700000000000002e-05, "seqlen": 8192.0, "step_tflops": 41.678581076815085, "step_time": 0.4950046997070313} +{"epoch": 0, "iter": 446, "iter_tflops": 38.202150789953286, "iter_time": 0.5400505752563477, "loss": 1.3224918842315674, "lr": 2.676e-05, "seqlen": 8192.0, "step_tflops": 42.41613204731051, "step_time": 0.4863973331451416} +{"epoch": 0, "iter": 447, "iter_tflops": 34.12894960957644, "iter_time": 0.6045042037963867, "loss": 1.0154646635055542, "lr": 2.682e-05, "seqlen": 8192.0, "step_tflops": 37.46465663946609, "step_time": 0.550681505203247} +{"epoch": 0, "iter": 448, "iter_tflops": 39.38235328988716, "iter_time": 0.5238664474487305, "loss": 1.0277906656265259, "lr": 2.688e-05, "seqlen": 8192.0, "step_tflops": 42.77218613085609, "step_time": 0.48234835243225094} +{"epoch": 0, "iter": 449, "iter_tflops": 30.06629026223011, "iter_time": 0.6861868667602539, "loss": 1.0595285892486572, "lr": 2.6940000000000003e-05, "seqlen": 8192.0, "step_tflops": 32.12859505112508, "step_time": 0.6421411666870117} +{"epoch": 0, "iter": 450, "iter_tflops": 13.93178457058642, "iter_time": 1.4808650970458985, "loss": 1.112343668937683, "lr": 2.7000000000000002e-05, "seqlen": 8192.0, "step_tflops": 17.308426317428353, "step_time": 1.1919681854248048} +{"epoch": 0, "iter": 451, "iter_tflops": 24.31275838150327, "iter_time": 0.8485706634521484, "loss": 1.0202504396438599, "lr": 2.7060000000000002e-05, "seqlen": 8192.0, "step_tflops": 29.608547588568523, "step_time": 0.6967951889038085} +{"epoch": 0, "iter": 452, "iter_tflops": 45.33163811049903, "iter_time": 0.4551146697998047, "loss": 1.0721513032913208, "lr": 2.712e-05, "seqlen": 8192.0, "step_tflops": 48.781656073798985, "step_time": 0.42292728805541996} +{"epoch": 0, "iter": 453, "iter_tflops": 19.62617447949306, "iter_time": 0.7346273803710938, "loss": 0.49907156825065613, "lr": 2.718e-05, "seqlen": 5776.0, "step_tflops": 20.781353834531426, "step_time": 0.6937914276123047} +{"epoch": 0, "iter": 454, "iter_tflops": 7.387697021277808, "iter_time": 1.9516129455566404, "loss": 0.37356603145599365, "lr": 2.724e-05, "seqlen": 5776.0, "step_tflops": 8.812326489683198, "step_time": 1.6361088256835936} +{"epoch": 0, "iter": 455, "iter_tflops": 22.688430772673165, "iter_time": 0.6354747619628905, "loss": 0.8958395719528198, "lr": 2.7300000000000003e-05, "seqlen": 5776.0, "step_tflops": 24.4099051452317, "step_time": 0.5906587944030761} +{"epoch": 0, "iter": 456, "iter_tflops": 22.66300551637802, "iter_time": 0.6361876907348633, "loss": 0.5355672240257263, "lr": 2.7360000000000002e-05, "seqlen": 5776.0, "step_tflops": 24.253386982543237, "step_time": 0.5944705848693848} +{"epoch": 0, "iter": 457, "iter_tflops": 35.71026235758887, "iter_time": 0.5777357025146485, "loss": 1.2527486085891724, "lr": 2.7420000000000002e-05, "seqlen": 8192.0, "step_tflops": 39.483835197464565, "step_time": 0.522519998550415} +{"epoch": 0, "iter": 458, "iter_tflops": 39.52343163574121, "iter_time": 0.5219965133666992, "loss": 1.0747907161712646, "lr": 2.748e-05, "seqlen": 8192.0, "step_tflops": 43.50760788508998, "step_time": 0.4741950778961182} +{"epoch": 0, "iter": 459, "iter_tflops": 40.77733550394953, "iter_time": 0.5059451103210449, "loss": 1.0230013132095337, "lr": 2.754e-05, "seqlen": 8192.0, "step_tflops": 44.14912848280155, "step_time": 0.46730466079711913} +{"epoch": 0, "iter": 460, "iter_tflops": 36.6063746309318, "iter_time": 0.5635929183959961, "loss": 1.0087727308273315, "lr": 2.7600000000000003e-05, "seqlen": 8192.0, "step_tflops": 39.80157428083705, "step_time": 0.5183486804962159} +{"epoch": 0, "iter": 461, "iter_tflops": 2.6115660823305324, "iter_time": 0.5850846176147461, "loss": 0.363437294960022, "lr": 2.7660000000000003e-05, "seqlen": 624.0, "step_tflops": 2.8881500224466596, "step_time": 0.5290539379119873} +{"epoch": 0, "iter": 462, "iter_tflops": 2.723054933693488, "iter_time": 0.561129753112793, "loss": 0.4684091806411743, "lr": 2.7720000000000002e-05, "seqlen": 624.0, "step_tflops": 3.0278559194670995, "step_time": 0.5046432800292969} +{"epoch": 0, "iter": 463, "iter_tflops": 3.220227793199145, "iter_time": 0.4744966011047363, "loss": 0.48724451661109924, "lr": 2.778e-05, "seqlen": 624.0, "step_tflops": 3.518454070371622, "step_time": 0.4342779846191406} +{"epoch": 0, "iter": 464, "iter_tflops": 3.342198310673825, "iter_time": 0.45718027496337893, "loss": 0.7984528541564941, "lr": 2.784e-05, "seqlen": 624.0, "step_tflops": 3.6342585565877443, "step_time": 0.4204398555755615} +{"epoch": 0, "iter": 465, "iter_tflops": 15.865785420374106, "iter_time": 1.0816661682128905, "loss": 0.39443856477737427, "lr": 2.79e-05, "seqlen": 6848.0, "step_tflops": 16.779202667098055, "step_time": 1.022783004760742} +{"epoch": 0, "iter": 466, "iter_tflops": 14.642197942748329, "iter_time": 1.1720565032958985, "loss": 0.5305396318435669, "lr": 2.7960000000000003e-05, "seqlen": 6848.0, "step_tflops": 17.779897011104072, "step_time": 0.9652183761596679} +{"epoch": 0, "iter": 467, "iter_tflops": 35.95500058099381, "iter_time": 0.4773044929504394, "loss": 0.507685124874115, "lr": 2.8020000000000003e-05, "seqlen": 6848.0, "step_tflops": 39.395550078024364, "step_time": 0.4356198425292969} +{"epoch": 0, "iter": 468, "iter_tflops": 38.648081551960935, "iter_time": 0.44404489517211915, "loss": 0.37315934896469116, "lr": 2.8080000000000002e-05, "seqlen": 6848.0, "step_tflops": 42.175552206641434, "step_time": 0.4069059543609619} +{"epoch": 0, "iter": 469, "iter_tflops": 29.331694418706736, "iter_time": 0.7033720321655272, "loss": 0.19543947279453278, "lr": 2.8139999999999998e-05, "seqlen": 8192.0, "step_tflops": 31.948676618937355, "step_time": 0.6457573738098145} +{"epoch": 0, "iter": 470, "iter_tflops": 16.92430990614479, "iter_time": 1.2190212554931639, "loss": 0.3101702034473419, "lr": 2.8199999999999998e-05, "seqlen": 8192.0, "step_tflops": 18.936361467523458, "step_time": 1.089496181488037} +{"epoch": 0, "iter": 471, "iter_tflops": 51.89213774726432, "iter_time": 0.39757648086547853, "loss": 0.20789006352424622, "lr": 2.826e-05, "seqlen": 8192.0, "step_tflops": 56.68727850623354, "step_time": 0.3639457397460938} +{"epoch": 0, "iter": 472, "iter_tflops": 52.38418137751863, "iter_time": 0.39384205245971676, "loss": 0.27009317278862, "lr": 2.832e-05, "seqlen": 8192.0, "step_tflops": 56.7867300803575, "step_time": 0.36330835533142086} +{"epoch": 0, "iter": 473, "iter_tflops": 29.673290224306548, "iter_time": 0.6952748870849609, "loss": 1.1624534130096436, "lr": 2.838e-05, "seqlen": 8192.0, "step_tflops": 31.60978496135832, "step_time": 0.6526806030273438} +{"epoch": 0, "iter": 474, "iter_tflops": 15.5223664263753, "iter_time": 1.329120376586914, "loss": 1.2611138820648193, "lr": 2.844e-05, "seqlen": 8192.0, "step_tflops": 18.6632040763622, "step_time": 1.1054422073364258} +{"epoch": 0, "iter": 475, "iter_tflops": 45.02631847105383, "iter_time": 0.4582007637023926, "loss": 1.1857244968414307, "lr": 2.8499999999999998e-05, "seqlen": 8192.0, "step_tflops": 48.51049674307584, "step_time": 0.42529132652282714} +{"epoch": 0, "iter": 476, "iter_tflops": 48.70713583070213, "iter_time": 0.42357435226440426, "loss": 1.174234390258789, "lr": 2.856e-05, "seqlen": 8192.0, "step_tflops": 52.20984233816621, "step_time": 0.39515716934204104} +{"epoch": 0, "iter": 477, "iter_tflops": 44.228510174771046, "iter_time": 0.4664659385681152, "loss": 0.3454110622406006, "lr": 2.862e-05, "seqlen": 8192.0, "step_tflops": 48.32472078942226, "step_time": 0.4269262847900391} +{"epoch": 0, "iter": 478, "iter_tflops": 10.803484062023106, "iter_time": 1.909670379638672, "loss": 0.33894482254981995, "lr": 2.868e-05, "seqlen": 8192.0, "step_tflops": 11.840649088367071, "step_time": 1.7423954849243164} +{"epoch": 0, "iter": 479, "iter_tflops": 14.28610147002857, "iter_time": 1.4441374053955078, "loss": 0.3787243366241455, "lr": 2.874e-05, "seqlen": 8192.0, "step_tflops": 16.912297991823095, "step_time": 1.2198870620727538} +{"epoch": 0, "iter": 480, "iter_tflops": 17.003148205705315, "iter_time": 1.2133690338134766, "loss": 0.3052578568458557, "lr": 2.88e-05, "seqlen": 8192.0, "step_tflops": 20.287531564750232, "step_time": 1.0169346351623536} +{"epoch": 0, "iter": 481, "iter_tflops": 23.3088576655775, "iter_time": 0.7645074157714844, "loss": 0.8220904469490051, "lr": 2.8859999999999998e-05, "seqlen": 7104.0, "step_tflops": 24.80412415711761, "step_time": 0.7184206314086914} +{"epoch": 0, "iter": 482, "iter_tflops": 11.166307424805305, "iter_time": 1.5958538360595702, "loss": 0.6604374051094055, "lr": 2.892e-05, "seqlen": 7104.0, "step_tflops": 12.835280467456217, "step_time": 1.388344772338867} +{"epoch": 0, "iter": 483, "iter_tflops": 12.756476561397607, "iter_time": 1.396921356201172, "loss": 0.3900327682495117, "lr": 2.898e-05, "seqlen": 7104.0, "step_tflops": 15.220238985347958, "step_time": 1.1707959747314454} +{"epoch": 0, "iter": 484, "iter_tflops": 28.419748016814953, "iter_time": 0.6270215530395509, "loss": 0.43961742520332336, "lr": 2.904e-05, "seqlen": 7104.0, "step_tflops": 30.24123434478329, "step_time": 0.5892548675537108} +{"epoch": 0, "iter": 485, "iter_tflops": 13.308530359802187, "iter_time": 1.1631693420410159, "loss": 0.315384179353714, "lr": 2.91e-05, "seqlen": 6192.0, "step_tflops": 14.009115745636134, "step_time": 1.105000114440918} +{"epoch": 0, "iter": 486, "iter_tflops": 11.486912263224855, "iter_time": 1.3476271209716797, "loss": 0.5143297910690308, "lr": 2.916e-05, "seqlen": 6192.0, "step_tflops": 13.73608699167218, "step_time": 1.126963924407959} +{"epoch": 0, "iter": 487, "iter_tflops": 26.71586525379784, "iter_time": 0.5794337692260743, "loss": 0.45416098833084106, "lr": 2.922e-05, "seqlen": 6192.0, "step_tflops": 28.375965627157324, "step_time": 0.5455347213745118} +{"epoch": 0, "iter": 488, "iter_tflops": 28.74171416528209, "iter_time": 0.5385925979614258, "loss": 0.5248470306396484, "lr": 2.928e-05, "seqlen": 6192.0, "step_tflops": 30.57344548894396, "step_time": 0.5063241729736327} +{"epoch": 0, "iter": 489, "iter_tflops": 27.168114773099532, "iter_time": 0.7593862762451171, "loss": 1.1200305223464966, "lr": 2.934e-05, "seqlen": 8192.0, "step_tflops": 28.806200577325853, "step_time": 0.7162032165527343} +{"epoch": 0, "iter": 490, "iter_tflops": 13.878799148667733, "iter_time": 1.4865186309814453, "loss": 1.1986539363861084, "lr": 2.94e-05, "seqlen": 8192.0, "step_tflops": 16.70484066715049, "step_time": 1.235036832809448} +{"epoch": 0, "iter": 491, "iter_tflops": 43.05066061830184, "iter_time": 0.47922826766967774, "loss": 1.2179025411605835, "lr": 2.946e-05, "seqlen": 8192.0, "step_tflops": 46.89194734833376, "step_time": 0.43997092628479006} +{"epoch": 0, "iter": 492, "iter_tflops": 49.41171963253059, "iter_time": 0.41753441619873044, "loss": 1.0749528408050537, "lr": 2.9520000000000002e-05, "seqlen": 8192.0, "step_tflops": 53.27089332993874, "step_time": 0.3872864189147949} +{"epoch": 0, "iter": 493, "iter_tflops": 29.4809288559939, "iter_time": 0.6998115158081055, "loss": 0.3446482717990875, "lr": 2.958e-05, "seqlen": 8192.0, "step_tflops": 31.31039058875075, "step_time": 0.6589216270446777} +{"epoch": 0, "iter": 494, "iter_tflops": 20.093126238473456, "iter_time": 1.0267736968994141, "loss": 0.36411574482917786, "lr": 2.964e-05, "seqlen": 8192.0, "step_tflops": 27.407906089620774, "step_time": 0.7527424182891845} +{"epoch": 0, "iter": 495, "iter_tflops": 46.933299538277296, "iter_time": 0.43958327484130855, "loss": 0.33372944593429565, "lr": 2.97e-05, "seqlen": 8192.0, "step_tflops": 50.77583921582812, "step_time": 0.40631713485717774} +{"epoch": 0, "iter": 496, "iter_tflops": 50.09316362197844, "iter_time": 0.4118544731140137, "loss": 0.33651304244995117, "lr": 2.976e-05, "seqlen": 8192.0, "step_tflops": 54.09655482455052, "step_time": 0.3813753681182862} +{"epoch": 0, "iter": 497, "iter_tflops": 39.57159369062692, "iter_time": 0.5213611984252928, "loss": 1.3020069599151611, "lr": 2.982e-05, "seqlen": 8192.0, "step_tflops": 42.668556604938416, "step_time": 0.4835198364257812} +{"epoch": 0, "iter": 498, "iter_tflops": 25.019823054369507, "iter_time": 0.8245899047851563, "loss": 1.1371878385543823, "lr": 2.9880000000000002e-05, "seqlen": 8192.0, "step_tflops": 27.762514848381404, "step_time": 0.7431276893615721} +{"epoch": 0, "iter": 499, "iter_tflops": 43.61674695053474, "iter_time": 0.4730085334777832, "loss": 1.2816355228424072, "lr": 2.994e-05, "seqlen": 8192.0, "step_tflops": 46.98996562560076, "step_time": 0.43905317306518554} +{"epoch": 0, "iter": 500, "iter_tflops": 44.02378438398846, "iter_time": 0.4686351661682129, "loss": 1.0622025728225708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.30253221644911, "step_time": 0.4361519889831542} +{"epoch": 0, "iter": 501, "iter_tflops": 38.1863584692704, "iter_time": 0.5402739181518554, "loss": 0.4723750352859497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12078756774383, "step_time": 0.5017193183898927} +{"epoch": 0, "iter": 502, "iter_tflops": 10.392324540265568, "iter_time": 1.985224136352539, "loss": 0.5093558430671692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.531736998299955, "step_time": 1.524644878387451} +{"epoch": 0, "iter": 503, "iter_tflops": 15.595816338812487, "iter_time": 1.3228607635498049, "loss": 0.5132389664649963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.63086124986829, "step_time": 1.1701693534851074} +{"epoch": 0, "iter": 504, "iter_tflops": 40.63052849179472, "iter_time": 0.5077732009887695, "loss": 0.4227224588394165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35161839196645, "step_time": 0.4651711540222168} +{"epoch": 0, "iter": 505, "iter_tflops": 14.582560546301332, "iter_time": 0.9999015655517578, "loss": 0.28340139985084534, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 15.472017991281502, "step_time": 0.9424190902709961} +{"epoch": 0, "iter": 506, "iter_tflops": 5.87360204402016, "iter_time": 2.4824843444824216, "loss": 0.42759162187576294, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 7.1945510479227845, "step_time": 2.0266900634765626} +{"epoch": 0, "iter": 507, "iter_tflops": 9.826213547350182, "iter_time": 1.4839006958007812, "loss": 0.4707750082015991, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 11.523770482061606, "step_time": 1.2653085327148437} +{"epoch": 0, "iter": 508, "iter_tflops": 22.843282882391925, "iter_time": 0.638311279296875, "loss": 0.7768471240997314, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 24.55790167357039, "step_time": 0.5937447471618652} +{"epoch": 0, "iter": 509, "iter_tflops": 18.10145708551568, "iter_time": 0.8167978515625001, "loss": 0.5736711621284485, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 19.333615493919357, "step_time": 0.7647421798706054} +{"epoch": 0, "iter": 510, "iter_tflops": 25.214377542613647, "iter_time": 0.5863809738159179, "loss": 0.25780290365219116, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.28986591799434, "step_time": 0.5417846794128418} +{"epoch": 0, "iter": 511, "iter_tflops": 24.964785963925188, "iter_time": 0.5922434616088867, "loss": 0.5487132668495178, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 26.588618307835453, "step_time": 0.5560736961364747} +{"epoch": 0, "iter": 512, "iter_tflops": 25.31510453617571, "iter_time": 0.5840478057861328, "loss": 0.40323150157928467, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 26.76361030333483, "step_time": 0.5524378471374513} +{"epoch": 0, "iter": 513, "iter_tflops": 30.4337661070213, "iter_time": 0.6779014282226563, "loss": 1.1911814212799072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.466005063698375, "step_time": 0.6354675750732423} +{"epoch": 0, "iter": 514, "iter_tflops": 19.60991246516505, "iter_time": 1.052074737548828, "loss": 0.9788625836372375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.77843479811823, "step_time": 0.9473175506591798} +{"epoch": 0, "iter": 515, "iter_tflops": 34.55118193451289, "iter_time": 0.5971168670654297, "loss": 1.2547580003738403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.33405468547294, "step_time": 0.5526078987121582} +{"epoch": 0, "iter": 516, "iter_tflops": 42.07169644310353, "iter_time": 0.4903794059753418, "loss": 0.996567964553833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.28049934764652, "step_time": 0.45562866592407225} +{"epoch": 0, "iter": 517, "iter_tflops": 18.840235580389564, "iter_time": 1.0576419372558594, "loss": 0.1595958173274994, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 19.91944524812378, "step_time": 1.0003402709960936} +{"epoch": 0, "iter": 518, "iter_tflops": 19.38092391734125, "iter_time": 1.0281358795166018, "loss": 0.2371494174003601, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 22.03100001282074, "step_time": 0.9044629497528076} +{"epoch": 0, "iter": 519, "iter_tflops": 52.974549405788935, "iter_time": 0.37614710235595694, "loss": 0.14765483140945435, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 57.48531461043124, "step_time": 0.3466315422058105} +{"epoch": 0, "iter": 520, "iter_tflops": 55.227032114760846, "iter_time": 0.3608056144714355, "loss": 0.16921380162239075, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 60.357302446317526, "step_time": 0.33013773727416995} +{"epoch": 0, "iter": 521, "iter_tflops": 23.521919303132318, "iter_time": 0.8771007690429687, "loss": 0.7658287882804871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.575399581604707, "step_time": 0.839501853942871} +{"epoch": 0, "iter": 522, "iter_tflops": 15.624968407009934, "iter_time": 1.3203926544189455, "loss": 0.7911285161972046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.856866597631843, "step_time": 1.0389903869628907} +{"epoch": 0, "iter": 523, "iter_tflops": 36.422320531655956, "iter_time": 0.5664409408569335, "loss": 0.7131878137588501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46507627891936, "step_time": 0.522768367767334} +{"epoch": 0, "iter": 524, "iter_tflops": 40.64373872920643, "iter_time": 0.5076081619262695, "loss": 0.7208675742149353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32502232306112, "step_time": 0.4654502677917481} +{"epoch": 0, "iter": 525, "iter_tflops": 18.87472274011385, "iter_time": 1.0930541229248045, "loss": 1.078709363937378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.918762357289282, "step_time": 1.035761817932129} +{"epoch": 0, "iter": 526, "iter_tflops": 23.591109459766727, "iter_time": 0.8745283279418946, "loss": 1.0807874202728271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.594792838088768, "step_time": 0.7214982681274413} +{"epoch": 0, "iter": 527, "iter_tflops": 39.42108990791117, "iter_time": 0.523351676940918, "loss": 1.2299394607543945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83223438066737, "step_time": 0.4816721286773682} +{"epoch": 0, "iter": 528, "iter_tflops": 38.315821053648115, "iter_time": 0.5384484252929688, "loss": 0.9896005988121033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74971267105414, "step_time": 0.49416132926940914} +{"epoch": 0, "iter": 529, "iter_tflops": 18.071202457827976, "iter_time": 1.1416558227539062, "loss": 0.23374491930007935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38767888349465, "step_time": 1.0641342697143554} +{"epoch": 0, "iter": 530, "iter_tflops": 17.125741905551017, "iter_time": 1.2046831970214844, "loss": 0.2286098450422287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.47833391444752, "step_time": 0.9605537185668944} +{"epoch": 0, "iter": 531, "iter_tflops": 52.51959505032632, "iter_time": 0.39282659149169924, "loss": 0.2040565013885498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.841666598901654, "step_time": 0.36295722389221186} +{"epoch": 0, "iter": 532, "iter_tflops": 57.618802958253504, "iter_time": 0.3580618209838867, "loss": 0.20590198040008545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.58349897756901, "step_time": 0.32965707969665525} +{"epoch": 0, "iter": 533, "iter_tflops": 46.042961258425954, "iter_time": 0.44808354949951174, "loss": 0.00885777361690998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20007008869247, "step_time": 0.41097738456726074} +{"epoch": 0, "iter": 534, "iter_tflops": 36.8046194785129, "iter_time": 0.5605571746826171, "loss": 0.01816042885184288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.97224869961944, "step_time": 0.42128131866455076} +{"epoch": 0, "iter": 535, "iter_tflops": 56.84593868982079, "iter_time": 0.3629299468994141, "loss": 0.01906309649348259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.34430582854321, "step_time": 0.3309218578338623} +{"epoch": 0, "iter": 536, "iter_tflops": 61.59356125762744, "iter_time": 0.3349553604125976, "loss": 0.01501740887761116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.19848483668684, "step_time": 0.3070172424316406} +{"epoch": 0, "iter": 537, "iter_tflops": 46.88215453440934, "iter_time": 0.4400628280639649, "loss": 0.19068245589733124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.17821439017403, "step_time": 0.4031225738525391} +{"epoch": 0, "iter": 538, "iter_tflops": 48.09411124766491, "iter_time": 0.4289733810424805, "loss": 0.22517485916614532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.060935704529285, "step_time": 0.3962874126434326} +{"epoch": 0, "iter": 539, "iter_tflops": 51.78378323143396, "iter_time": 0.3984083862304687, "loss": 0.2581854462623596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.31558992674629, "step_time": 0.3663478183746338} +{"epoch": 0, "iter": 540, "iter_tflops": 52.47598495804385, "iter_time": 0.39315304946899415, "loss": 0.26558202505111694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.01833391497326, "step_time": 0.36183262634277347} +{"epoch": 0, "iter": 541, "iter_tflops": 46.40925137065679, "iter_time": 0.44454700088500987, "loss": 0.3619622588157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.908224086180674, "step_time": 0.4052605228424072} +{"epoch": 0, "iter": 542, "iter_tflops": 46.25996431557003, "iter_time": 0.44598161315917967, "loss": 0.41435155272483826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.70269646647639, "step_time": 0.4069032802581787} +{"epoch": 0, "iter": 543, "iter_tflops": 51.62059323874616, "iter_time": 0.39966788864135744, "loss": 0.4445009231567383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.76221361433108, "step_time": 0.3699834022521973} +{"epoch": 0, "iter": 544, "iter_tflops": 46.60954887709631, "iter_time": 0.4426366271972656, "loss": 0.5418584942817688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96631100116449, "step_time": 0.41290007400512696} +{"epoch": 0, "iter": 545, "iter_tflops": 42.79311502539406, "iter_time": 0.4704821434020996, "loss": 0.011909861117601395, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 46.68470586669294, "step_time": 0.4312632179260254} +{"epoch": 0, "iter": 546, "iter_tflops": 26.65309077660499, "iter_time": 0.7553869323730468, "loss": 0.007171686738729477, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 37.706463476312045, "step_time": 0.5339508037567139} +{"epoch": 0, "iter": 547, "iter_tflops": 58.69887854190929, "iter_time": 0.3429945678710937, "loss": 0.015189768746495247, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 64.69538023045854, "step_time": 0.3112030010223389} +{"epoch": 0, "iter": 548, "iter_tflops": 57.418608012745835, "iter_time": 0.35064236450195313, "loss": 0.013806900009512901, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 62.96883204059744, "step_time": 0.3197359046936035} +{"epoch": 0, "iter": 549, "iter_tflops": 24.29835529970973, "iter_time": 0.8490736618041992, "loss": 0.8500806093215942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.40441534358057, "step_time": 0.8121066055297852} +{"epoch": 0, "iter": 550, "iter_tflops": 14.530253137843236, "iter_time": 1.41987158203125, "loss": 1.020143985748291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.46710716157586, "step_time": 1.0080121898651124} +{"epoch": 0, "iter": 551, "iter_tflops": 49.52596848284919, "iter_time": 0.41657122802734375, "loss": 0.8642250299453735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.65155399405179, "step_time": 0.38453860092163084} +{"epoch": 0, "iter": 552, "iter_tflops": 51.9057782782381, "iter_time": 0.3974720001220703, "loss": 0.9269462823867798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.028979549246685, "step_time": 0.3682218322753907} +{"epoch": 0, "iter": 553, "iter_tflops": 25.783642699291526, "iter_time": 0.4394829025268554, "loss": 0.015444758348166943, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 28.255337226179172, "step_time": 0.4010382194519043} +{"epoch": 0, "iter": 554, "iter_tflops": 28.30915112383195, "iter_time": 0.40027587127685543, "loss": 0.006455957889556885, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 31.955920841016386, "step_time": 0.3545968894958496} +{"epoch": 0, "iter": 555, "iter_tflops": 29.69965923436167, "iter_time": 0.3815353584289551, "loss": 0.022585038095712662, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 32.517644843706805, "step_time": 0.3484714279174804} +{"epoch": 0, "iter": 556, "iter_tflops": 32.76919978473865, "iter_time": 0.3457963638305664, "loss": 0.022077269852161407, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 35.829635460125836, "step_time": 0.31625971031188965} +{"epoch": 0, "iter": 557, "iter_tflops": 35.37489839551279, "iter_time": 0.5832127990722655, "loss": 0.961166501045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.882739007603405, "step_time": 0.544604061126709} +{"epoch": 0, "iter": 558, "iter_tflops": 46.811945440167726, "iter_time": 0.44072283935546874, "loss": 0.8491876721382141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40241766533922, "step_time": 0.4013642635345459} +{"epoch": 0, "iter": 559, "iter_tflops": 48.793799526606946, "iter_time": 0.42282203292846676, "loss": 0.8181353211402893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67028366783964, "step_time": 0.39170272254943844} +{"epoch": 0, "iter": 560, "iter_tflops": 49.177602665298664, "iter_time": 0.4195221481323242, "loss": 1.0648019313812256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.323139599573246, "step_time": 0.38690695381164547} +{"epoch": 0, "iter": 561, "iter_tflops": 48.42825401015504, "iter_time": 0.4260135726928711, "loss": 0.005368877202272415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.096625200843924, "step_time": 0.3885575294494629} +{"epoch": 0, "iter": 562, "iter_tflops": 48.861937612705994, "iter_time": 0.4222324066162109, "loss": 0.008334910497069359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.888583443631546, "step_time": 0.382847204208374} +{"epoch": 0, "iter": 563, "iter_tflops": 63.497583266513224, "iter_time": 0.3249114761352539, "loss": 0.012863815762102604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 70.01485025340207, "step_time": 0.2946673946380615} +{"epoch": 0, "iter": 564, "iter_tflops": 62.297785997998055, "iter_time": 0.3311689682006836, "loss": 0.013619267381727695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.83365493738026, "step_time": 0.3041424427032471} +{"epoch": 0, "iter": 565, "iter_tflops": 10.866386635855285, "iter_time": 1.8986158142089844, "loss": 1.3193618059158325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.092225443543596, "step_time": 1.8599598083496096} +{"epoch": 0, "iter": 566, "iter_tflops": 29.572566524955192, "iter_time": 0.6976429824829101, "loss": 1.1259324550628662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.71774004425525, "step_time": 0.611876522064209} +{"epoch": 0, "iter": 567, "iter_tflops": 38.117250925312376, "iter_time": 0.5412534484863282, "loss": 1.0911831855773926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.49424846108234, "step_time": 0.49720369148254395} +{"epoch": 0, "iter": 568, "iter_tflops": 36.04111058538598, "iter_time": 0.5724322357177735, "loss": 0.9158269762992859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75384477220885, "step_time": 0.5323624954223632} +{"epoch": 0, "iter": 569, "iter_tflops": 11.101043919881675, "iter_time": 1.7837940368652345, "loss": 0.26946723461151123, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 11.434756322243096, "step_time": 1.7317357177734376} +{"epoch": 0, "iter": 570, "iter_tflops": 38.6968529857988, "iter_time": 0.5117205772399902, "loss": 0.22921214997768402, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 45.29766721710719, "step_time": 0.4371522235870362} +{"epoch": 0, "iter": 571, "iter_tflops": 42.92416976377215, "iter_time": 0.4613246116638184, "loss": 0.28225356340408325, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 46.842050620346825, "step_time": 0.4227393054962158} +{"epoch": 0, "iter": 572, "iter_tflops": 38.38503151702343, "iter_time": 0.5158775482177733, "loss": 0.2345852553844452, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 41.90053497937177, "step_time": 0.4725948238372803} +{"epoch": 0, "iter": 573, "iter_tflops": 11.284314712158595, "iter_time": 1.828298309326172, "loss": 0.9091084599494934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.6940637451743, "step_time": 1.7642364501953125} +{"epoch": 0, "iter": 574, "iter_tflops": 39.10528891222015, "iter_time": 0.5275780868530273, "loss": 0.8504753112792969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17421527504138, "step_time": 0.4778568267822266} +{"epoch": 0, "iter": 575, "iter_tflops": 44.037839582196554, "iter_time": 0.468485595703125, "loss": 1.103979229927063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84936369343446, "step_time": 0.4311675624847412} +{"epoch": 0, "iter": 576, "iter_tflops": 37.02344086430306, "iter_time": 0.5572440872192383, "loss": 0.8505343794822693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.381666766686756, "step_time": 0.5109024753570556} +{"epoch": 0, "iter": 577, "iter_tflops": 24.229830618617452, "iter_time": 0.8514749374389647, "loss": 0.4646243751049042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.94418620647754, "step_time": 0.7952106628417969} +{"epoch": 0, "iter": 578, "iter_tflops": 26.394195856373905, "iter_time": 0.7816526641845702, "loss": 0.5830449461936951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.522389872585656, "step_time": 0.6343658504486084} +{"epoch": 0, "iter": 579, "iter_tflops": 42.40704590237809, "iter_time": 0.48650154876708984, "loss": 0.3747984766960144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23961612293099, "step_time": 0.44617787170410156} +{"epoch": 0, "iter": 580, "iter_tflops": 37.51538437356453, "iter_time": 0.5499368820190431, "loss": 0.4661244750022888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.25446014979358, "step_time": 0.5000936489105224} +{"epoch": 0, "iter": 581, "iter_tflops": 34.1632773465989, "iter_time": 0.6038967895507812, "loss": 0.32945716381073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.11992548755376, "step_time": 0.5412154731750488} +{"epoch": 0, "iter": 582, "iter_tflops": 39.15093792427966, "iter_time": 0.5269629440307617, "loss": 0.3783126771450043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88044146378442, "step_time": 0.4811306228637695} +{"epoch": 0, "iter": 583, "iter_tflops": 40.35150106365396, "iter_time": 0.5112844123840332, "loss": 0.27802470326423645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74545657638342, "step_time": 0.471616828918457} +{"epoch": 0, "iter": 584, "iter_tflops": 43.22090880184343, "iter_time": 0.47734057617187503, "loss": 0.33502262830734253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17977547472499, "step_time": 0.4372868099212646} +{"epoch": 0, "iter": 585, "iter_tflops": 2.418051556560047, "iter_time": 0.8757404403686524, "loss": 0.33670011162757874, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 2.585410304368445, "step_time": 0.8190520210266112} +{"epoch": 0, "iter": 586, "iter_tflops": 0.879309053138024, "iter_time": 2.4082380676269532, "loss": 0.25816217064857483, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 1.0433021675791414, "step_time": 2.0296953277587892} +{"epoch": 0, "iter": 587, "iter_tflops": 1.4012295774402277, "iter_time": 1.5112338256835938, "loss": 0.019608188420534134, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 1.7152077139903121, "step_time": 1.234594223022461} +{"epoch": 0, "iter": 588, "iter_tflops": 3.866508865724759, "iter_time": 0.5476737823486328, "loss": 0.09564827382564545, "lr": 3e-05, "seqlen": 864.0, "step_tflops": 4.2139964227279325, "step_time": 0.5025124187469483} +{"epoch": 0, "iter": 589, "iter_tflops": 8.968888195121574, "iter_time": 1.6986141052246095, "loss": 0.46863511204719543, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 9.318368792887998, "step_time": 1.634908462524414} +{"epoch": 0, "iter": 590, "iter_tflops": 19.801053144358143, "iter_time": 0.7693873596191406, "loss": 0.5081607699394226, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 21.969966414892024, "step_time": 0.6934321022033691} +{"epoch": 0, "iter": 591, "iter_tflops": 19.14627774481765, "iter_time": 0.7956993103027343, "loss": 0.354603111743927, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 20.35748546301186, "step_time": 0.748357650756836} +{"epoch": 0, "iter": 592, "iter_tflops": 24.46599425357443, "iter_time": 0.6226879577636719, "loss": 0.4103126525878906, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.289702981085558, "step_time": 0.5794922828674317} +{"epoch": 0, "iter": 593, "iter_tflops": 22.136519550115686, "iter_time": 0.9319935531616211, "loss": 1.1765856742858887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.38263397386573, "step_time": 0.882325469970703} +{"epoch": 0, "iter": 594, "iter_tflops": 14.416939595350254, "iter_time": 1.4310314178466796, "loss": 0.9578369855880737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.241850410025755, "step_time": 1.130975917816162} +{"epoch": 0, "iter": 595, "iter_tflops": 36.42117704345218, "iter_time": 0.566458724975586, "loss": 1.0195029973983765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.488589234967435, "step_time": 0.5224570922851562} +{"epoch": 0, "iter": 596, "iter_tflops": 38.73121200383983, "iter_time": 0.532673583984375, "loss": 1.1318190097808838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.182726909528206, "step_time": 0.4890886631011963} +{"epoch": 0, "iter": 597, "iter_tflops": 33.618820707570436, "iter_time": 0.6136768951416016, "loss": 0.975911021232605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.8493635598064, "step_time": 0.559876522064209} +{"epoch": 0, "iter": 598, "iter_tflops": 34.93699305491781, "iter_time": 0.5905228729248047, "loss": 1.050826072692871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.80327133389718, "step_time": 0.545748893737793} +{"epoch": 0, "iter": 599, "iter_tflops": 34.05859596969134, "iter_time": 0.6057529067993164, "loss": 0.9739422798156738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.94975847236794, "step_time": 0.5583553009033203} +{"epoch": 0, "iter": 600, "iter_tflops": 38.890156862836854, "iter_time": 0.5304965362548828, "loss": 1.15399968624115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.362044303016084, "step_time": 0.4870183639526367} +{"epoch": 0, "iter": 601, "iter_tflops": 10.192830048919001, "iter_time": 2.0240790252685548, "loss": 1.0304932594299316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.483011162843027, "step_time": 1.9680503234863282} +{"epoch": 0, "iter": 602, "iter_tflops": 24.404455571178623, "iter_time": 0.8453822479248048, "loss": 1.0492056608200073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.63927254139401, "step_time": 0.6960728702545166} +{"epoch": 0, "iter": 603, "iter_tflops": 47.40832234521932, "iter_time": 0.43517873001098634, "loss": 1.0380825996398926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.07289254073553, "step_time": 0.4039538879394531} +{"epoch": 0, "iter": 604, "iter_tflops": 45.014460230873766, "iter_time": 0.4583214683532715, "loss": 1.2012784481048584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39587060626374, "step_time": 0.42629863357543946} +{"epoch": 0, "iter": 605, "iter_tflops": 20.03671648811527, "iter_time": 0.7644196243286132, "loss": 0.4301764667034149, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 21.160581850539252, "step_time": 0.7238203277587891} +{"epoch": 0, "iter": 606, "iter_tflops": 12.23076014839482, "iter_time": 1.2522900543212891, "loss": 0.4774059057235718, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 13.589446974458182, "step_time": 1.1270848121643067} +{"epoch": 0, "iter": 607, "iter_tflops": 28.44864389248253, "iter_time": 0.5383897857666016, "loss": 0.26489344239234924, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 30.30453864488347, "step_time": 0.5054179992675781} +{"epoch": 0, "iter": 608, "iter_tflops": 28.768218214816887, "iter_time": 0.5324090347290039, "loss": 0.5609877109527588, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 30.52460017259873, "step_time": 0.5017742805480957} +{"epoch": 0, "iter": 609, "iter_tflops": 12.393002572330193, "iter_time": 1.664737289428711, "loss": 0.26660075783729553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.708012952509923, "step_time": 1.6234712371826172} +{"epoch": 0, "iter": 610, "iter_tflops": 40.361488887974375, "iter_time": 0.5111578903198242, "loss": 0.21875715255737305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.308708191216176, "step_time": 0.46562164306640624} +{"epoch": 0, "iter": 611, "iter_tflops": 43.03674517684865, "iter_time": 0.4793832206726074, "loss": 0.18554316461086273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97910793345391, "step_time": 0.4391546459197998} +{"epoch": 0, "iter": 612, "iter_tflops": 45.197115736838285, "iter_time": 0.45646924972534175, "loss": 0.29871684312820435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37392486095881, "step_time": 0.41785403060913084} +{"epoch": 0, "iter": 613, "iter_tflops": 23.76055191118563, "iter_time": 0.8682918472290039, "loss": 0.9351066946983337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.407975510418954, "step_time": 0.8119928131103517} +{"epoch": 0, "iter": 614, "iter_tflops": 14.381852650683138, "iter_time": 1.4345226593017577, "loss": 0.9143012166023254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.888409779895106, "step_time": 1.221612560272217} +{"epoch": 0, "iter": 615, "iter_tflops": 49.4960881634964, "iter_time": 0.4168227081298828, "loss": 0.8989169597625732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.660125125460915, "step_time": 0.3844771785736084} +{"epoch": 0, "iter": 616, "iter_tflops": 47.18074331449096, "iter_time": 0.4372778396606446, "loss": 0.6767606735229492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.89516884856021, "step_time": 0.4053644771575927} +{"epoch": 0, "iter": 617, "iter_tflops": 29.304474287467823, "iter_time": 0.7040253753662109, "loss": 0.005753070116043091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.053341170261987, "step_time": 0.6643759651184082} +{"epoch": 0, "iter": 618, "iter_tflops": 14.326401679116778, "iter_time": 1.4400750427246094, "loss": 0.0050440384075045586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.232591205296302, "step_time": 1.1972136554718018} +{"epoch": 0, "iter": 619, "iter_tflops": 56.59962300796628, "iter_time": 0.36450938034057623, "loss": 0.016928456723690033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.2671232944913, "step_time": 0.3313320484161377} +{"epoch": 0, "iter": 620, "iter_tflops": 61.72842219328406, "iter_time": 0.33422356796264646, "loss": 0.0060550374910235405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.51828018664428, "step_time": 0.30556307792663573} +{"epoch": 0, "iter": 621, "iter_tflops": 30.7621561609234, "iter_time": 0.6706647415161134, "loss": 0.13648869097232819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.69022540874432, "step_time": 0.6311089401245117} +{"epoch": 0, "iter": 622, "iter_tflops": 14.80775789291835, "iter_time": 1.3932624816894532, "loss": 0.2070920467376709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.465298050893523, "step_time": 1.1172900352478028} +{"epoch": 0, "iter": 623, "iter_tflops": 35.319399027125144, "iter_time": 0.5841292343139649, "loss": 0.09572872519493103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.582312088663265, "step_time": 0.5212200202941895} +{"epoch": 0, "iter": 624, "iter_tflops": 47.37878356910844, "iter_time": 0.43545004653930663, "loss": 0.13340531289577484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.844517400209945, "step_time": 0.39794166374206547} +{"epoch": 0, "iter": 625, "iter_tflops": 21.376103018561775, "iter_time": 0.9651475524902344, "loss": 1.1675233840942383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70038405353319, "step_time": 0.9088433685302735} +{"epoch": 0, "iter": 626, "iter_tflops": 25.602378372137586, "iter_time": 0.8058272247314453, "loss": 1.2159935235977173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.812084892315138, "step_time": 0.7160569458007813} +{"epoch": 0, "iter": 627, "iter_tflops": 46.704063451373784, "iter_time": 0.4417408676147461, "loss": 1.109412670135498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.47747627656482, "step_time": 0.4087187995910645} +{"epoch": 0, "iter": 628, "iter_tflops": 49.386300524806316, "iter_time": 0.41774932098388673, "loss": 1.1755374670028687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.62044833733982, "step_time": 0.38476167488098145} +{"epoch": 0, "iter": 629, "iter_tflops": 42.26326602025055, "iter_time": 0.4646204795837402, "loss": 0.11954721808433533, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 46.28544029274643, "step_time": 0.42424526596069334} +{"epoch": 0, "iter": 630, "iter_tflops": 46.73548682241641, "iter_time": 0.4201599311828613, "loss": 0.07105371356010437, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 53.17551328921186, "step_time": 0.3692748355865479} +{"epoch": 0, "iter": 631, "iter_tflops": 46.105293463634446, "iter_time": 0.42590291595458984, "loss": 0.12717588245868683, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 50.27292948894597, "step_time": 0.39059547805786127} +{"epoch": 0, "iter": 632, "iter_tflops": 51.35992610747422, "iter_time": 0.382328800201416, "loss": 0.15332375466823578, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 55.9572744327128, "step_time": 0.3509173583984375} +{"epoch": 0, "iter": 633, "iter_tflops": 41.55942607774405, "iter_time": 0.4964239273071289, "loss": 0.6155823469161987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53737976700208, "step_time": 0.4530584239959717} +{"epoch": 0, "iter": 634, "iter_tflops": 40.033659589424076, "iter_time": 0.5153436813354492, "loss": 0.5937594175338745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.269278912080765, "step_time": 0.4660363578796386} +{"epoch": 0, "iter": 635, "iter_tflops": 39.941022632256626, "iter_time": 0.5165389404296876, "loss": 0.4911433458328247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65692112660415, "step_time": 0.4725732593536377} +{"epoch": 0, "iter": 636, "iter_tflops": 47.25507694570396, "iter_time": 0.43658998870849614, "loss": 0.5272795557975769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.35219619279984, "step_time": 0.40175679016113286} +{"epoch": 0, "iter": 637, "iter_tflops": 30.05041003358087, "iter_time": 0.6865494842529296, "loss": 0.46279802918434143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.447072152493384, "step_time": 0.6358383712768555} +{"epoch": 0, "iter": 638, "iter_tflops": 7.94434294491966, "iter_time": 2.596954040527344, "loss": 0.4195637106895447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.278328708191859, "step_time": 2.0072420425415043} +{"epoch": 0, "iter": 639, "iter_tflops": 13.625010054715695, "iter_time": 1.5142075805664061, "loss": 0.48398202657699585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.977702206719089, "step_time": 1.291242835998535} +{"epoch": 0, "iter": 640, "iter_tflops": 34.115054476887074, "iter_time": 0.6047504196166992, "loss": 0.34419047832489014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.292484202768236, "step_time": 0.47655138969421396} +{"epoch": 0, "iter": 641, "iter_tflops": 12.051455403991262, "iter_time": 1.2132922058105469, "loss": 0.5630276799201965, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 12.923668801879591, "step_time": 1.1314075851440428} +{"epoch": 0, "iter": 642, "iter_tflops": 10.836853106843273, "iter_time": 1.3492788696289062, "loss": 0.5530914664268494, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 13.866505261607967, "step_time": 1.0544788780212402} +{"epoch": 0, "iter": 643, "iter_tflops": 27.08216221092802, "iter_time": 0.5399102478027343, "loss": 0.42876601219177246, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.809239866682624, "step_time": 0.5075433082580566} +{"epoch": 0, "iter": 644, "iter_tflops": 28.62616609440358, "iter_time": 0.5107892150878907, "loss": 0.3685451149940491, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 30.33453471430917, "step_time": 0.48202278518676756} +{"epoch": 0, "iter": 645, "iter_tflops": 36.43449619063041, "iter_time": 0.5662516479492189, "loss": 0.861283540725708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.18981051333662, "step_time": 0.5264402465820313} +{"epoch": 0, "iter": 646, "iter_tflops": 33.5027869680022, "iter_time": 0.6158023071289063, "loss": 0.8512598872184753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.445597614600636, "step_time": 0.47487190055847167} +{"epoch": 0, "iter": 647, "iter_tflops": 48.8292530762876, "iter_time": 0.42251503372192384, "loss": 0.8581055402755737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.30079039086499, "step_time": 0.38706918525695805} +{"epoch": 0, "iter": 648, "iter_tflops": 46.92419763613436, "iter_time": 0.43966854095458985, "loss": 0.9306585788726807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.56491360249109, "step_time": 0.40801203918457035} +{"epoch": 0, "iter": 649, "iter_tflops": 26.9599497314337, "iter_time": 0.7652497024536132, "loss": 0.26470014452934265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.53189898643523, "step_time": 0.7230886917114259} +{"epoch": 0, "iter": 650, "iter_tflops": 13.35083847065204, "iter_time": 1.5453032073974609, "loss": 0.2490725964307785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98023252603274, "step_time": 1.2150065364837648} +{"epoch": 0, "iter": 651, "iter_tflops": 37.53664988987367, "iter_time": 0.5496253280639649, "loss": 0.15279383957386017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.02641892186699, "step_time": 0.5028733692169189} +{"epoch": 0, "iter": 652, "iter_tflops": 42.0956888536783, "iter_time": 0.4900999145507813, "loss": 0.19482220709323883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.697508267554426, "step_time": 0.4514708633422851} +{"epoch": 0, "iter": 653, "iter_tflops": 22.564950688642845, "iter_time": 0.9142981872558593, "loss": 0.8963837623596191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.994333202596106, "step_time": 0.859831916809082} +{"epoch": 0, "iter": 654, "iter_tflops": 9.447261460974875, "iter_time": 2.183817352294922, "loss": 1.1405079364776611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.153657368301218, "step_time": 1.8497155532836915} +{"epoch": 0, "iter": 655, "iter_tflops": 12.375276471257123, "iter_time": 1.6671218261718748, "loss": 0.968603789806366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.465320503523397, "step_time": 1.2530028495788577} +{"epoch": 0, "iter": 656, "iter_tflops": 44.66597695964802, "iter_time": 0.4618972854614258, "loss": 0.9982810616493225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21531518841862, "step_time": 0.4278950252532959} +{"epoch": 0, "iter": 657, "iter_tflops": 19.54011989258513, "iter_time": 0.6836685562133789, "loss": 0.4610082507133484, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 20.7899324512627, "step_time": 0.6425689735412597} +{"epoch": 0, "iter": 658, "iter_tflops": 12.441538846984457, "iter_time": 1.0737390060424805, "loss": 0.3930245041847229, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 14.368838887583992, "step_time": 0.9297178192138672} +{"epoch": 0, "iter": 659, "iter_tflops": 21.02766133282253, "iter_time": 0.6353043899536133, "loss": 0.3359444737434387, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 22.661189232659282, "step_time": 0.5895085830688476} +{"epoch": 0, "iter": 660, "iter_tflops": 22.847194944099083, "iter_time": 0.5847092208862305, "loss": 0.3195766806602478, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 24.42710964738054, "step_time": 0.5468909645080566} +{"epoch": 0, "iter": 661, "iter_tflops": 17.93386020082832, "iter_time": 1.15039892578125, "loss": 0.36544543504714966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.415960595391645, "step_time": 1.062584228515625} +{"epoch": 0, "iter": 662, "iter_tflops": 14.71270738551215, "iter_time": 1.4022635650634765, "loss": 0.2870815396308899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.07171503021291, "step_time": 1.1416234416961668} +{"epoch": 0, "iter": 663, "iter_tflops": 38.36103701447775, "iter_time": 0.5378137588500976, "loss": 0.44216489791870117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.059048139158214, "step_time": 0.490526876449585} +{"epoch": 0, "iter": 664, "iter_tflops": 42.848990685599084, "iter_time": 0.4814837684631348, "loss": 0.26678526401519775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45451743991355, "step_time": 0.44411382675170896} +{"epoch": 0, "iter": 665, "iter_tflops": 32.0111540080312, "iter_time": 0.6444970245361328, "loss": 0.612817108631134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.282144660335106, "step_time": 0.5847460155487061} +{"epoch": 0, "iter": 666, "iter_tflops": 45.45329714251177, "iter_time": 0.45389652252197266, "loss": 0.5693396925926208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.338942600361655, "step_time": 0.41815029716491703} +{"epoch": 0, "iter": 667, "iter_tflops": 51.193134959743155, "iter_time": 0.4030050811767578, "loss": 0.65523761510849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.29162193099351, "step_time": 0.37313236236572267} +{"epoch": 0, "iter": 668, "iter_tflops": 49.891887516624465, "iter_time": 0.41351599502563474, "loss": 0.6234486699104309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.76607490297291, "step_time": 0.3837195396423339} +{"epoch": 0, "iter": 669, "iter_tflops": 42.35813393171749, "iter_time": 0.4870633239746094, "loss": 0.5835962295532227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23736202743167, "step_time": 0.44619962310791017} +{"epoch": 0, "iter": 670, "iter_tflops": 35.485789648720164, "iter_time": 0.5813902893066406, "loss": 0.35875871777534485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.52403270678427, "step_time": 0.5219885749816895} +{"epoch": 0, "iter": 671, "iter_tflops": 47.12350640841809, "iter_time": 0.4378089637756348, "loss": 0.31774061918258667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80102543783165, "step_time": 0.4061156902313232} +{"epoch": 0, "iter": 672, "iter_tflops": 48.76805600180459, "iter_time": 0.42304523086547857, "loss": 0.4473671615123749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.92004671058236, "step_time": 0.3898540306091308} +{"epoch": 0, "iter": 673, "iter_tflops": 36.37823911702489, "iter_time": 0.5671273269653321, "loss": 0.031957417726516724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.17595588393717, "step_time": 0.5266264228820801} +{"epoch": 0, "iter": 674, "iter_tflops": 51.88029245533001, "iter_time": 0.39766725540161135, "loss": 0.05452658608555794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.08618451560972, "step_time": 0.36140256500244144} +{"epoch": 0, "iter": 675, "iter_tflops": 56.14215939600053, "iter_time": 0.3674795150756836, "loss": 0.047914665192365646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.42493752481296, "step_time": 0.3358748798370362} +{"epoch": 0, "iter": 676, "iter_tflops": 52.93148004217819, "iter_time": 0.3897698211669922, "loss": 0.0661892518401146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.61258158869163, "step_time": 0.35810048675537115} +{"epoch": 0, "iter": 677, "iter_tflops": 46.84291095084265, "iter_time": 0.4404314994812012, "loss": 0.11209134757518768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.509575850851526, "step_time": 0.4005292835235596} +{"epoch": 0, "iter": 678, "iter_tflops": 44.502303913341386, "iter_time": 0.463596076965332, "loss": 0.14258624613285065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94130041949401, "step_time": 0.42154771804809565} +{"epoch": 0, "iter": 679, "iter_tflops": 49.95526713963367, "iter_time": 0.4129913558959961, "loss": 0.15515147149562836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.20369324399373, "step_time": 0.380621545791626} +{"epoch": 0, "iter": 680, "iter_tflops": 52.538412570250976, "iter_time": 0.3926858940124512, "loss": 0.10904519259929657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.112455535955064, "step_time": 0.3612363243103027} +{"epoch": 0, "iter": 681, "iter_tflops": 23.3835290929901, "iter_time": 0.8822916946411132, "loss": 0.3431559205055237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.560938038366107, "step_time": 0.8399961547851563} +{"epoch": 0, "iter": 682, "iter_tflops": 13.382553935686506, "iter_time": 1.5416409759521483, "loss": 0.3655403256416321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.257782175072858, "step_time": 1.0713120193481447} +{"epoch": 0, "iter": 683, "iter_tflops": 42.341000979471126, "iter_time": 0.4872604103088379, "loss": 0.3561614155769348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.292671906962866, "step_time": 0.4456665096282959} +{"epoch": 0, "iter": 684, "iter_tflops": 40.1603821321499, "iter_time": 0.5137175598144531, "loss": 0.43617182970046997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87145175522206, "step_time": 0.47026238441467283} +{"epoch": 0, "iter": 685, "iter_tflops": 20.326578499304883, "iter_time": 1.0149811248779297, "loss": 0.07911583036184311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.60659780560318, "step_time": 0.9548515548706055} +{"epoch": 0, "iter": 686, "iter_tflops": 22.716196386068567, "iter_time": 0.9082107391357421, "loss": 0.04500700160861015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.73132220332121, "step_time": 0.7439635715484619} +{"epoch": 0, "iter": 687, "iter_tflops": 46.522793653713585, "iter_time": 0.44346205139160155, "loss": 0.06774639338254929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.376615396562535, "step_time": 0.40156583595275874} +{"epoch": 0, "iter": 688, "iter_tflops": 48.123823171052926, "iter_time": 0.42870853042602536, "loss": 0.07040761411190033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.917360141322845, "step_time": 0.3898738231658936} +{"epoch": 0, "iter": 689, "iter_tflops": 14.337320475160098, "iter_time": 1.4389783325195311, "loss": 0.6700447797775269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.088915639796733, "step_time": 1.36730126953125} +{"epoch": 0, "iter": 690, "iter_tflops": 19.553749379981102, "iter_time": 1.0550965499877931, "loss": 0.7435401678085327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.31838157884275, "step_time": 0.728540699005127} +{"epoch": 0, "iter": 691, "iter_tflops": 37.049098087726506, "iter_time": 0.5568581848144531, "loss": 0.8129079341888428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.5440966639526, "step_time": 0.508855670928955} +{"epoch": 0, "iter": 692, "iter_tflops": 40.536001829253564, "iter_time": 0.5089572868347167, "loss": 0.6468371152877808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20568833519856, "step_time": 0.46670675849914556} +{"epoch": 0, "iter": 693, "iter_tflops": 23.813329323831354, "iter_time": 0.8663674545288087, "loss": 0.39416083693504333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.756929136612445, "step_time": 0.8009919738769531} +{"epoch": 0, "iter": 694, "iter_tflops": 17.44894021886782, "iter_time": 1.1823694305419923, "loss": 0.4650627374649048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.489452360446034, "step_time": 0.9173675365447997} +{"epoch": 0, "iter": 695, "iter_tflops": 40.79877485574516, "iter_time": 0.50567924118042, "loss": 0.3615325689315796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.61054646764897, "step_time": 0.46247121238708494} +{"epoch": 0, "iter": 696, "iter_tflops": 43.4457035373234, "iter_time": 0.4748707427978515, "loss": 0.3202468156814575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32225916685417, "step_time": 0.43597017288208006} +{"epoch": 0, "iter": 697, "iter_tflops": 18.410025210189406, "iter_time": 1.120644500732422, "loss": 0.11399683356285095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.507404249038952, "step_time": 1.0576032180786132} +{"epoch": 0, "iter": 698, "iter_tflops": 27.93539262286513, "iter_time": 0.7385288543701172, "loss": 0.17939049005508423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.6643817530047, "step_time": 0.6515552291870117} +{"epoch": 0, "iter": 699, "iter_tflops": 53.12062120923974, "iter_time": 0.3883820075988769, "loss": 0.16397148370742798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.60990864154525, "step_time": 0.3581171016693115} +{"epoch": 0, "iter": 700, "iter_tflops": 54.406718009690394, "iter_time": 0.37920121383666994, "loss": 0.11766412109136581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.35012623999153, "step_time": 0.34761667442321775} +{"epoch": 0, "iter": 701, "iter_tflops": 33.73385597650938, "iter_time": 0.6115842056274414, "loss": 0.7247940301895142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.33914964891333, "step_time": 0.5677373771667481} +{"epoch": 0, "iter": 702, "iter_tflops": 10.89527273623397, "iter_time": 1.8935821075439452, "loss": 0.8296911716461182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.009737411336147, "step_time": 1.4726252822875976} +{"epoch": 0, "iter": 703, "iter_tflops": 23.462704844542937, "iter_time": 0.8793143692016602, "loss": 0.753939688205719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.136971203108143, "step_time": 0.7332378940582276} +{"epoch": 0, "iter": 704, "iter_tflops": 46.69847413852623, "iter_time": 0.4417937393188477, "loss": 0.6407961249351501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.450791689046234, "step_time": 0.40893498039245607} +{"epoch": 0, "iter": 705, "iter_tflops": 22.99370882671121, "iter_time": 0.664336814880371, "loss": 0.2784039378166199, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.529396284071872, "step_time": 0.6227453422546386} +{"epoch": 0, "iter": 706, "iter_tflops": 8.41678031725721, "iter_time": 1.8148943786621095, "loss": 0.7111649513244629, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 9.75298043417834, "step_time": 1.5662460708618164} +{"epoch": 0, "iter": 707, "iter_tflops": 22.02937954997385, "iter_time": 0.6934179534912108, "loss": 0.3331347107887268, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.80913634881458, "step_time": 0.6415842666625976} +{"epoch": 0, "iter": 708, "iter_tflops": 23.725152079483532, "iter_time": 0.6438554000854493, "loss": 0.26438677310943604, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.35875995380742, "step_time": 0.6023783226013183} +{"epoch": 0, "iter": 709, "iter_tflops": 18.13656331597402, "iter_time": 1.13754150390625, "loss": 0.5987810492515564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.443466094322165, "step_time": 1.0610810546875} +{"epoch": 0, "iter": 710, "iter_tflops": 22.34532602513493, "iter_time": 0.9232845153808594, "loss": 0.7070704102516174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.15413486779431, "step_time": 0.8201869640350341} +{"epoch": 0, "iter": 711, "iter_tflops": 48.729265494059504, "iter_time": 0.4233819923400879, "loss": 0.6340017318725586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83411686680158, "step_time": 0.3904880924224854} +{"epoch": 0, "iter": 712, "iter_tflops": 43.90296803196039, "iter_time": 0.4699248008728027, "loss": 0.6669037342071533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.96983059496698, "step_time": 0.4392413864135742} +{"epoch": 0, "iter": 713, "iter_tflops": 32.769083657525194, "iter_time": 0.6295901870727538, "loss": 0.8133872747421265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.03068079495127, "step_time": 0.5889435501098632} +{"epoch": 0, "iter": 714, "iter_tflops": 10.007073847980639, "iter_time": 2.0616509704589845, "loss": 0.7319921255111694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.082048485620156, "step_time": 1.5770537414550783} +{"epoch": 0, "iter": 715, "iter_tflops": 13.66283707907502, "iter_time": 1.5100153350830077, "loss": 0.8188021183013916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.184771616660708, "step_time": 1.200545108795166} +{"epoch": 0, "iter": 716, "iter_tflops": 27.460939041182385, "iter_time": 0.7512887115478516, "loss": 0.7293439507484436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83297429428938, "step_time": 0.609792486190796} +{"epoch": 0, "iter": 717, "iter_tflops": 21.61256200445232, "iter_time": 0.7769423217773439, "loss": 0.4755142033100128, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 22.965772088831145, "step_time": 0.7311626205444336} +{"epoch": 0, "iter": 718, "iter_tflops": 11.048680666642278, "iter_time": 1.5197935943603513, "loss": 0.3658170998096466, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 14.917361797040572, "step_time": 1.1256490478515624} +{"epoch": 0, "iter": 719, "iter_tflops": 25.736446950700568, "iter_time": 0.6524488067626952, "loss": 0.39612820744514465, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 27.79898414546778, "step_time": 0.6040405654907226} +{"epoch": 0, "iter": 720, "iter_tflops": 26.76733029509583, "iter_time": 0.6273212127685547, "loss": 0.4269494414329529, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 28.593480217732846, "step_time": 0.5872567443847656} +{"epoch": 0, "iter": 721, "iter_tflops": 13.860072636039217, "iter_time": 0.5968417282104492, "loss": 0.08460035920143127, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 15.407259123320037, "step_time": 0.5369072875976562} +{"epoch": 0, "iter": 722, "iter_tflops": 4.729066206966085, "iter_time": 1.749239562988281, "loss": 0.07885430008172989, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 6.0494091927732425, "step_time": 1.367450843811035} +{"epoch": 0, "iter": 723, "iter_tflops": 6.344889327385455, "iter_time": 1.3037689514160156, "loss": 0.06932701170444489, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 7.173523293608269, "step_time": 1.1531669120788575} +{"epoch": 0, "iter": 724, "iter_tflops": 11.428980386280825, "iter_time": 0.7237976989746093, "loss": 0.0922270342707634, "lr": 3e-05, "seqlen": 3344.0, "step_tflops": 14.583649848196467, "step_time": 0.5672290401458739} +{"epoch": 0, "iter": 725, "iter_tflops": 27.840086914363432, "iter_time": 0.5692685089111328, "loss": 0.37027621269226074, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.90350675914163, "step_time": 0.5299874992370606} +{"epoch": 0, "iter": 726, "iter_tflops": 29.19496306752059, "iter_time": 0.5428499679565431, "loss": 0.5670149922370911, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.111024041275282, "step_time": 0.5094170074462891} +{"epoch": 0, "iter": 727, "iter_tflops": 28.316903984323886, "iter_time": 0.5596828231811523, "loss": 0.3359454572200775, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.166850451728628, "step_time": 0.5253609352111817} +{"epoch": 0, "iter": 728, "iter_tflops": 29.292508109537135, "iter_time": 0.5410422592163086, "loss": 0.3955504894256592, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.978054240932185, "step_time": 0.5116036224365234} +{"epoch": 0, "iter": 729, "iter_tflops": 26.031018745494187, "iter_time": 0.7925580520629883, "loss": 0.8913794159889221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.421816977166845, "step_time": 0.7523605575561523} +{"epoch": 0, "iter": 730, "iter_tflops": 14.328463269352108, "iter_time": 1.4398678436279293, "loss": 0.8681170344352722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.26805855988926, "step_time": 1.1293533706665038} +{"epoch": 0, "iter": 731, "iter_tflops": 39.63426649348792, "iter_time": 0.5205367813110351, "loss": 1.111641526222229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42129895280557, "step_time": 0.4751376399993897} +{"epoch": 0, "iter": 732, "iter_tflops": 43.821151428821885, "iter_time": 0.4708021774291992, "loss": 0.852899432182312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84874202549208, "step_time": 0.4311731643676757} +{"epoch": 0, "iter": 733, "iter_tflops": 17.691146749392146, "iter_time": 1.1661818084716795, "loss": 0.6193632483482361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.750872488898946, "step_time": 1.1002737884521485} +{"epoch": 0, "iter": 734, "iter_tflops": 22.36701903099614, "iter_time": 0.9223890533447264, "loss": 0.4825401306152344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.956859320223746, "step_time": 0.7653374328613282} +{"epoch": 0, "iter": 735, "iter_tflops": 47.917965439682185, "iter_time": 0.43055028152465824, "loss": 0.6915460824966431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.992209577672845, "step_time": 0.3968112468719482} +{"epoch": 0, "iter": 736, "iter_tflops": 50.3592516432647, "iter_time": 0.40967831802368165, "loss": 0.5398069024085999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.60672172248563, "step_time": 0.3778123435974121} +{"epoch": 0, "iter": 737, "iter_tflops": 32.72411031659004, "iter_time": 0.6304554443359375, "loss": 0.5265164971351624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.77987970278189, "step_time": 0.5931904792785645} +{"epoch": 0, "iter": 738, "iter_tflops": 14.527700677053177, "iter_time": 1.4201210479736326, "loss": 0.5888873934745789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.223612126390297, "step_time": 1.1978377914428713} +{"epoch": 0, "iter": 739, "iter_tflops": 31.69853239439282, "iter_time": 0.6508532714843751, "loss": 0.6310972571372986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.4113007106662, "step_time": 0.5105278263092041} +{"epoch": 0, "iter": 740, "iter_tflops": 50.62432830169887, "iter_time": 0.4075331802368164, "loss": 0.6753288507461548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.14634624661856, "step_time": 0.3741153297424316} +{"epoch": 0, "iter": 741, "iter_tflops": 33.53010641565394, "iter_time": 0.6153005676269531, "loss": 0.19401845335960388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.727665711748834, "step_time": 0.5774542808532714} +{"epoch": 0, "iter": 742, "iter_tflops": 12.96770351132717, "iter_time": 1.5909596862792967, "loss": 0.1703266203403473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.902831968567941, "step_time": 1.2973219833374023} +{"epoch": 0, "iter": 743, "iter_tflops": 44.740125932946604, "iter_time": 0.46113177108764647, "loss": 0.12142314016819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.330885393402724, "step_time": 0.41821859359741215} +{"epoch": 0, "iter": 744, "iter_tflops": 51.629365799990424, "iter_time": 0.39959997940063474, "loss": 0.15845777094364166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.02037311908887, "step_time": 0.36827840232849124} +{"epoch": 0, "iter": 745, "iter_tflops": 32.772830297343454, "iter_time": 0.6295182113647461, "loss": 0.7612389326095581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86005782513122, "step_time": 0.5918261413574218} +{"epoch": 0, "iter": 746, "iter_tflops": 13.659983069695112, "iter_time": 1.510330825805664, "loss": 0.8144273161888123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.436283320665932, "step_time": 1.2552164688110352} +{"epoch": 0, "iter": 747, "iter_tflops": 38.5001304650938, "iter_time": 0.5358707427978516, "loss": 0.7528918385505676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.925624230605706, "step_time": 0.492087926864624} +{"epoch": 0, "iter": 748, "iter_tflops": 44.15560830450317, "iter_time": 0.467236083984375, "loss": 0.8072118163108826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.78928038381368, "step_time": 0.4317096500396729} +{"epoch": 0, "iter": 749, "iter_tflops": 20.23974372300958, "iter_time": 1.0193357086181642, "loss": 0.5724032521247864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.21365259870034, "step_time": 0.9725384826660157} +{"epoch": 0, "iter": 750, "iter_tflops": 13.15504094529688, "iter_time": 1.5683032531738281, "loss": 0.5199466347694397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.79700976191524, "step_time": 1.1592449398040772} +{"epoch": 0, "iter": 751, "iter_tflops": 42.072297990145735, "iter_time": 0.4903723945617676, "loss": 0.6494859457015991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91646021849138, "step_time": 0.44931803131103515} +{"epoch": 0, "iter": 752, "iter_tflops": 40.54813890970572, "iter_time": 0.5088049430847168, "loss": 0.635182797908783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.50917280897004, "step_time": 0.4635245323181152} +{"epoch": 0, "iter": 753, "iter_tflops": 13.357756766941362, "iter_time": 1.021410743713379, "loss": 0.12876854836940765, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 14.085380649058585, "step_time": 0.968646614074707} +{"epoch": 0, "iter": 754, "iter_tflops": 11.486825856497651, "iter_time": 1.1877742767333985, "loss": 0.1243368461728096, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 13.691481488550757, "step_time": 0.9965142402648925} +{"epoch": 0, "iter": 755, "iter_tflops": 28.302227468494614, "iter_time": 0.482073585510254, "loss": 0.07603255659341812, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 31.249887105851734, "step_time": 0.4366017780303955} +{"epoch": 0, "iter": 756, "iter_tflops": 28.86628949409895, "iter_time": 0.472653621673584, "loss": 0.1743939369916916, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 31.816533542822143, "step_time": 0.42882598304748537} +{"epoch": 0, "iter": 757, "iter_tflops": 24.95564876916943, "iter_time": 0.8267103652954101, "loss": 1.0803025960922241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.561465813785833, "step_time": 0.7767302322387696} +{"epoch": 0, "iter": 758, "iter_tflops": 19.519352378398867, "iter_time": 1.056955841064453, "loss": 1.0660983324050903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.818725513049664, "step_time": 0.8661711769104005} +{"epoch": 0, "iter": 759, "iter_tflops": 45.603140316466465, "iter_time": 0.45240510559082037, "loss": 1.24075186252594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17396476033137, "step_time": 0.4195531845092773} +{"epoch": 0, "iter": 760, "iter_tflops": 44.36878274819152, "iter_time": 0.46499119949340817, "loss": 1.0126632452011108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72721628746751, "step_time": 0.43227104187011717} +{"epoch": 0, "iter": 761, "iter_tflops": 37.58911516732197, "iter_time": 0.5400292053222657, "loss": 0.05371517315506935, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 40.577540718955916, "step_time": 0.5002575225830078} +{"epoch": 0, "iter": 762, "iter_tflops": 13.605704411610404, "iter_time": 1.4919639129638673, "loss": 0.04692729189991951, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 15.760538157868785, "step_time": 1.2879775924682617} +{"epoch": 0, "iter": 763, "iter_tflops": 36.776528894052085, "iter_time": 0.551961280822754, "loss": 0.09067646414041519, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 42.172622770032994, "step_time": 0.48133643722534186} +{"epoch": 0, "iter": 764, "iter_tflops": 46.67834171455593, "iter_time": 0.43487448883056634, "loss": 0.07036205381155014, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 51.36904818752545, "step_time": 0.3951644172668457} +{"epoch": 0, "iter": 765, "iter_tflops": 27.86216182172479, "iter_time": 0.7404699478149415, "loss": 0.399370402097702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.822457538746207, "step_time": 0.6917972297668457} +{"epoch": 0, "iter": 766, "iter_tflops": 13.70605628678862, "iter_time": 1.5052538146972656, "loss": 0.5960094928741455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.056723653932902, "step_time": 1.2848881225585937} +{"epoch": 0, "iter": 767, "iter_tflops": 49.23173889472467, "iter_time": 0.4190608329772949, "loss": 0.5922554135322571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.660160264138455, "step_time": 0.38447692680358886} +{"epoch": 0, "iter": 768, "iter_tflops": 49.520087381006796, "iter_time": 0.4166207008361817, "loss": 0.5604545474052429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.579516404816104, "step_time": 0.38505561256408694} +{"epoch": 0, "iter": 769, "iter_tflops": 37.85363673167412, "iter_time": 0.5450227584838867, "loss": 0.25380778312683105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.5047404625561, "step_time": 0.50935009765625} +{"epoch": 0, "iter": 770, "iter_tflops": 37.61679337354518, "iter_time": 0.5484543380737305, "loss": 0.2519339919090271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21784801015603, "step_time": 0.4886817893981934} +{"epoch": 0, "iter": 771, "iter_tflops": 44.93570750707387, "iter_time": 0.45912470626831053, "loss": 0.21540789306163788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.357951714494654, "step_time": 0.41798925590515135} +{"epoch": 0, "iter": 772, "iter_tflops": 44.13013432507154, "iter_time": 0.4675057945251465, "loss": 0.15236902236938477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.34603792403597, "step_time": 0.4267380409240722} +{"epoch": 0, "iter": 773, "iter_tflops": 19.773410948398197, "iter_time": 1.043375549316406, "loss": 1.1255300045013428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.304464918723163, "step_time": 0.9683929443359374} +{"epoch": 0, "iter": 774, "iter_tflops": 24.724485944982888, "iter_time": 0.8344397354125976, "loss": 1.1667526960372925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.606043175570957, "step_time": 0.6968541316986083} +{"epoch": 0, "iter": 775, "iter_tflops": 46.24015932085939, "iter_time": 0.4461726303100586, "loss": 1.2565478086471558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.97470342773444, "step_time": 0.41283073425292965} +{"epoch": 0, "iter": 776, "iter_tflops": 45.82149499073864, "iter_time": 0.45024924468994143, "loss": 0.9205522537231445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.390868404742, "step_time": 0.4177106857299805} +{"epoch": 0, "iter": 777, "iter_tflops": 27.706828389381574, "iter_time": 0.7446212615966797, "loss": 1.1693693399429321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.330426297843694, "step_time": 0.7034024429321288} +{"epoch": 0, "iter": 778, "iter_tflops": 27.843848346769626, "iter_time": 0.7409569702148437, "loss": 1.0680371522903442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.436352004563204, "step_time": 0.5991079864501954} +{"epoch": 0, "iter": 779, "iter_tflops": 38.74875585406962, "iter_time": 0.5324324111938477, "loss": 1.1952729225158691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46426438762309, "step_time": 0.4858460121154785} +{"epoch": 0, "iter": 780, "iter_tflops": 38.025594258989635, "iter_time": 0.5425580825805664, "loss": 1.1128581762313843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.30357189346282, "step_time": 0.4994990158081055} +{"epoch": 0, "iter": 781, "iter_tflops": 41.80606067527073, "iter_time": 0.49349527740478516, "loss": 1.0249303579330444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21396377070494, "step_time": 0.4464255352020263} +{"epoch": 0, "iter": 782, "iter_tflops": 35.22014795894217, "iter_time": 0.5857753219604492, "loss": 0.6526306867599487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87704888020081, "step_time": 0.5173676109313964} +{"epoch": 0, "iter": 783, "iter_tflops": 40.5645493148758, "iter_time": 0.5085991058349609, "loss": 0.928467869758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.39170811984008, "step_time": 0.4647510623931885} +{"epoch": 0, "iter": 784, "iter_tflops": 42.97494678326999, "iter_time": 0.4800725784301757, "loss": 0.8243647813796997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.91407249909507, "step_time": 0.43976343154907227} +{"epoch": 0, "iter": 785, "iter_tflops": 34.49144649647347, "iter_time": 0.598151008605957, "loss": 0.931201696395874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.205441691334705, "step_time": 0.540004056930542} +{"epoch": 0, "iter": 786, "iter_tflops": 38.93626111499645, "iter_time": 0.5298683776855468, "loss": 0.8940626382827759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46704741770406, "step_time": 0.48581417274475097} +{"epoch": 0, "iter": 787, "iter_tflops": 39.516733573673655, "iter_time": 0.5220849914550781, "loss": 0.7625452876091003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14967657531789, "step_time": 0.47812857818603516} +{"epoch": 0, "iter": 788, "iter_tflops": 43.73706905390189, "iter_time": 0.47170727157592784, "loss": 0.8251112699508667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.399134774866866, "step_time": 0.4352630825042725} +{"epoch": 0, "iter": 789, "iter_tflops": 28.290934054193645, "iter_time": 0.7292475204467773, "loss": 0.009630298241972923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.697398732728125, "step_time": 0.6720795364379883} +{"epoch": 0, "iter": 790, "iter_tflops": 16.053545599578843, "iter_time": 1.2851424865722658, "loss": 0.013033926486968994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.577938030154236, "step_time": 1.1105157890319826} +{"epoch": 0, "iter": 791, "iter_tflops": 26.69766489665081, "iter_time": 0.7727677154541015, "loss": 0.01176944375038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.241380706322154, "step_time": 0.6206449031829835} +{"epoch": 0, "iter": 792, "iter_tflops": 46.82149957885365, "iter_time": 0.44063290786743164, "loss": 0.00963027123361826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.730386555759175, "step_time": 0.3988196277618408} +{"epoch": 0, "iter": 793, "iter_tflops": 13.516451274101321, "iter_time": 1.1876876525878906, "loss": 0.2893027663230896, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.360697989786942, "step_time": 1.1178650436401365} +{"epoch": 0, "iter": 794, "iter_tflops": 11.1406409648706, "iter_time": 1.4409693603515623, "loss": 0.34697866439819336, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.330711834860818, "step_time": 1.1202041091918944} +{"epoch": 0, "iter": 795, "iter_tflops": 26.933265832480817, "iter_time": 0.5960406875610351, "loss": 0.3670915365219116, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.842597212612368, "step_time": 0.556583797454834} +{"epoch": 0, "iter": 796, "iter_tflops": 23.602606873906776, "iter_time": 0.6801503906249999, "loss": 0.3273037075996399, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 25.272424641989595, "step_time": 0.635211006164551} +{"epoch": 0, "iter": 797, "iter_tflops": 19.854809130867046, "iter_time": 1.0390980529785159, "loss": 0.18346534669399261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.561702509251724, "step_time": 0.9568397254943848} +{"epoch": 0, "iter": 798, "iter_tflops": 27.266572097293796, "iter_time": 0.7566441955566406, "loss": 0.19246290624141693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36702322435563, "step_time": 0.49873285293579106} +{"epoch": 0, "iter": 799, "iter_tflops": 50.41838348797877, "iter_time": 0.40919783782958985, "loss": 0.11488183587789536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.86304036883644, "step_time": 0.37604721450805667} +{"epoch": 0, "iter": 800, "iter_tflops": 59.00777503055765, "iter_time": 0.3496334762573242, "loss": 0.09256552904844284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.1318876590428, "step_time": 0.3216978988647461} +{"epoch": 0, "iter": 801, "iter_tflops": 36.62036816344308, "iter_time": 0.563377555847168, "loss": 0.14903408288955688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34696583343025, "step_time": 0.5243375968933105} +{"epoch": 0, "iter": 802, "iter_tflops": 10.054740717812502, "iter_time": 2.051877227783203, "loss": 0.28670042753219604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.952559224978732, "step_time": 1.4786601638793946} +{"epoch": 0, "iter": 803, "iter_tflops": 14.353216409829386, "iter_time": 1.4373846893310547, "loss": 0.2950038015842438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.899871772960303, "step_time": 1.1525833129882814} +{"epoch": 0, "iter": 804, "iter_tflops": 19.145361081217796, "iter_time": 1.0776027374267578, "loss": 0.3994649350643158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.552218277318058, "step_time": 0.7487997264862061} +{"epoch": 0, "iter": 805, "iter_tflops": 23.521413640773233, "iter_time": 0.6650860977172851, "loss": 0.34746402502059937, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.00834443551318, "step_time": 0.6255418167114257} +{"epoch": 0, "iter": 806, "iter_tflops": 15.552411169968762, "iter_time": 1.0058739471435547, "loss": 0.2171313464641571, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 17.13852200453242, "step_time": 0.9127837982177734} +{"epoch": 0, "iter": 807, "iter_tflops": 29.135304302034527, "iter_time": 0.5369350204467773, "loss": 0.30044814944267273, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.986896157185203, "step_time": 0.5048509902954101} +{"epoch": 0, "iter": 808, "iter_tflops": 28.459519013325277, "iter_time": 0.5496848068237304, "loss": 0.33214908838272095, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.311498415722035, "step_time": 0.5161000289916993} +{"epoch": 0, "iter": 809, "iter_tflops": 32.9674139353395, "iter_time": 0.6258026046752929, "loss": 0.8385404348373413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.159981641970425, "step_time": 0.5867777099609375} +{"epoch": 0, "iter": 810, "iter_tflops": 19.957424754123643, "iter_time": 1.0337552947998048, "loss": 0.8348217606544495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.10362356616906, "step_time": 0.9333806037902832} +{"epoch": 0, "iter": 811, "iter_tflops": 48.66303876979563, "iter_time": 0.42395818328857426, "loss": 0.9824939966201782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78097475726438, "step_time": 0.3908812522888183} +{"epoch": 0, "iter": 812, "iter_tflops": 49.40627408903887, "iter_time": 0.417580436706543, "loss": 0.7103802561759949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.573243018483424, "step_time": 0.3851007022857666} +{"epoch": 0, "iter": 813, "iter_tflops": 32.424533880751355, "iter_time": 0.6362803421020508, "loss": 1.1455693244934082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.452629481957274, "step_time": 0.5988249320983887} +{"epoch": 0, "iter": 814, "iter_tflops": 18.017186218077107, "iter_time": 1.1450785522460938, "loss": 1.1692606210708618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.427296365232916, "step_time": 0.9628416557312012} +{"epoch": 0, "iter": 815, "iter_tflops": 43.5738124792361, "iter_time": 0.47347460174560546, "loss": 0.9456325173377991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75118779449286, "step_time": 0.4412956008911133} +{"epoch": 0, "iter": 816, "iter_tflops": 46.56124312466211, "iter_time": 0.44309584808349606, "loss": 0.9291584491729736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.151704427532806, "step_time": 0.4113737258911132} +{"epoch": 0, "iter": 817, "iter_tflops": 40.38427155636531, "iter_time": 0.5108695220947266, "loss": 0.15073803067207336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75999917277734, "step_time": 0.47146009826660157} +{"epoch": 0, "iter": 818, "iter_tflops": 44.30929772379577, "iter_time": 0.4656154479980469, "loss": 0.1125154048204422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1388396915873, "step_time": 0.4198530864715576} +{"epoch": 0, "iter": 819, "iter_tflops": 52.8040912911899, "iter_time": 0.39071013259887694, "loss": 0.10675446689128876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.67716695251101, "step_time": 0.3576994953155518} +{"epoch": 0, "iter": 820, "iter_tflops": 54.95262430145719, "iter_time": 0.3754341812133789, "loss": 0.10521635413169861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.85885270908159, "step_time": 0.3446623611450195} +{"epoch": 0, "iter": 821, "iter_tflops": 24.127846173178277, "iter_time": 0.8550739822387696, "loss": 1.099657654762268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.275504247438455, "step_time": 0.8162485427856445} +{"epoch": 0, "iter": 822, "iter_tflops": 15.056040587019703, "iter_time": 1.3702867889404295, "loss": 1.0775004625320435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.38938776001751, "step_time": 1.121902141571045} +{"epoch": 0, "iter": 823, "iter_tflops": 37.330338475073034, "iter_time": 0.5526629104614258, "loss": 0.9723065495491028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.874807069540495, "step_time": 0.5047386150360107} +{"epoch": 0, "iter": 824, "iter_tflops": 41.086055696121534, "iter_time": 0.5021434440612793, "loss": 1.0986193418502808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.652035967310276, "step_time": 0.4620414962768555} +{"epoch": 0, "iter": 825, "iter_tflops": 10.5471321188002, "iter_time": 1.1050469970703125, "loss": 0.05880257859826088, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 11.192313550828496, "step_time": 1.0413465118408205} +{"epoch": 0, "iter": 826, "iter_tflops": 10.714230828650322, "iter_time": 1.087812728881836, "loss": 0.06814532727003098, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 13.092506412759901, "step_time": 0.8902097358703612} +{"epoch": 0, "iter": 827, "iter_tflops": 32.560126542247524, "iter_time": 0.35795550918579105, "loss": 0.044490501284599304, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 35.63667120634701, "step_time": 0.3270529003143311} +{"epoch": 0, "iter": 828, "iter_tflops": 32.793221635664594, "iter_time": 0.3554111518859863, "loss": 0.04494353383779526, "lr": 3e-05, "seqlen": 4688.0, "step_tflops": 35.60739098040913, "step_time": 0.32732183837890627} +{"epoch": 0, "iter": 829, "iter_tflops": 29.734908270569626, "iter_time": 0.6938341064453125, "loss": 0.698172926902771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.520282925514103, "step_time": 0.6545338935852052} +{"epoch": 0, "iter": 830, "iter_tflops": 12.547905866093133, "iter_time": 1.6441861877441406, "loss": 0.6266994476318359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.028549742851688, "step_time": 1.2115590476989746} +{"epoch": 0, "iter": 831, "iter_tflops": 41.07401156553804, "iter_time": 0.5022906875610351, "loss": 0.7122700810432434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.985848668668496, "step_time": 0.45861296653747563} +{"epoch": 0, "iter": 832, "iter_tflops": 43.07462570799415, "iter_time": 0.47896164321899415, "loss": 0.6443567872047424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00862140042809, "step_time": 0.4388789310455322} +{"epoch": 0, "iter": 833, "iter_tflops": 19.25894811634465, "iter_time": 1.0712471618652344, "loss": 1.0524482727050781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.60900241805926, "step_time": 1.0010719146728515} +{"epoch": 0, "iter": 834, "iter_tflops": 24.093392631927998, "iter_time": 0.8562967376708985, "loss": 0.845808744430542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.1459949290673, "step_time": 0.7600050601959228} +{"epoch": 0, "iter": 835, "iter_tflops": 44.023183432091734, "iter_time": 0.4686415634155273, "loss": 0.832960307598114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.36719080747507, "step_time": 0.43555661964416503} +{"epoch": 0, "iter": 836, "iter_tflops": 42.792366051915316, "iter_time": 0.4821208877563477, "loss": 0.843730628490448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07765392764468, "step_time": 0.4477461795806884} +{"epoch": 0, "iter": 837, "iter_tflops": 42.37712022040945, "iter_time": 0.48684510421752936, "loss": 0.9498440027236938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.259817715136904, "step_time": 0.4459830265045166} +{"epoch": 0, "iter": 838, "iter_tflops": 10.208488395142124, "iter_time": 2.020974380493164, "loss": 1.0492711067199707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.648984921022343, "step_time": 1.7710636291503907} +{"epoch": 0, "iter": 839, "iter_tflops": 11.49216840897348, "iter_time": 1.795230697631836, "loss": 0.8871620297431946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.020784657915417, "step_time": 1.3735030479431152} +{"epoch": 0, "iter": 840, "iter_tflops": 43.82636046687789, "iter_time": 0.4707462196350098, "loss": 0.7645742893218994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70653867401089, "step_time": 0.43245840263366697} +{"epoch": 0, "iter": 841, "iter_tflops": 19.471565585625132, "iter_time": 0.7320829238891602, "loss": 0.4092767536640167, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 20.720325193921777, "step_time": 0.6879622077941896} +{"epoch": 0, "iter": 842, "iter_tflops": 8.056347642413053, "iter_time": 1.769387481689453, "loss": 0.5116755962371826, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 9.477899026938376, "step_time": 1.5040042762756347} +{"epoch": 0, "iter": 843, "iter_tflops": 21.841839560491376, "iter_time": 0.6526373672485352, "loss": 0.4565630257129669, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.56208394340586, "step_time": 0.6049889602661134} +{"epoch": 0, "iter": 844, "iter_tflops": 22.889754191456923, "iter_time": 0.6227590103149414, "loss": 0.3006483316421509, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.617306899540765, "step_time": 0.5790560569763185} +{"epoch": 0, "iter": 845, "iter_tflops": 18.182318663911424, "iter_time": 1.1346789093017577, "loss": 0.026315152645111084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.497807263960564, "step_time": 1.0581237792968747} +{"epoch": 0, "iter": 846, "iter_tflops": 16.325751894778467, "iter_time": 1.2637147521972656, "loss": 0.028064744547009468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.011547656275567, "step_time": 0.8592154827117919} +{"epoch": 0, "iter": 847, "iter_tflops": 56.11233437987408, "iter_time": 0.3676748390197754, "loss": 0.009912488982081413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.348751503073224, "step_time": 0.3362919864654541} +{"epoch": 0, "iter": 848, "iter_tflops": 65.6492302014086, "iter_time": 0.31426253509521485, "loss": 0.0062131318263709545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 71.52032310799551, "step_time": 0.2884647693634033} +{"epoch": 0, "iter": 849, "iter_tflops": 45.38931217879539, "iter_time": 0.454536376953125, "loss": 0.24404729902744293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.810241320584964, "step_time": 0.4141938076019287} +{"epoch": 0, "iter": 850, "iter_tflops": 49.83704093866384, "iter_time": 0.413971076965332, "loss": 0.22131194174289703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.44640863069893, "step_time": 0.3789247817993164} +{"epoch": 0, "iter": 851, "iter_tflops": 54.79828536746595, "iter_time": 0.3764915885925293, "loss": 0.22547170519828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.44303547439325, "step_time": 0.34707335090637204} +{"epoch": 0, "iter": 852, "iter_tflops": 49.09390458439893, "iter_time": 0.42023737335205075, "loss": 0.23068241775035858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18350615784895, "step_time": 0.38792277908325196} +{"epoch": 0, "iter": 853, "iter_tflops": 42.15852955502507, "iter_time": 0.4893693809509277, "loss": 0.32365188002586365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.351781266328814, "step_time": 0.45491252899169926} +{"epoch": 0, "iter": 854, "iter_tflops": 12.839336783862114, "iter_time": 1.606865982055664, "loss": 0.38324451446533203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.97810543888843, "step_time": 1.1475677223205567} +{"epoch": 0, "iter": 855, "iter_tflops": 34.310398958709996, "iter_time": 0.6013073043823242, "loss": 0.3983413577079773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65618682117621, "step_time": 0.5478805809020996} +{"epoch": 0, "iter": 856, "iter_tflops": 40.29350289169896, "iter_time": 0.5120203514099121, "loss": 0.43960314989089966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94257889510676, "step_time": 0.4695011997222901} +{"epoch": 0, "iter": 857, "iter_tflops": 18.78031125873591, "iter_time": 1.098549072265625, "loss": 1.1759759187698364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.070774635755523, "step_time": 1.0279171524047852} +{"epoch": 0, "iter": 858, "iter_tflops": 17.344851763595923, "iter_time": 1.1894649658203125, "loss": 1.1329981088638306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.845605786486882, "step_time": 0.9897094726562501} +{"epoch": 0, "iter": 859, "iter_tflops": 35.65395851097918, "iter_time": 0.5786480484008789, "loss": 0.8121854662895203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.018017118238355, "step_time": 0.5287581233978271} +{"epoch": 0, "iter": 860, "iter_tflops": 38.17653814942377, "iter_time": 0.5404128952026367, "loss": 1.175324559211731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.529619813061096, "step_time": 0.49678021621704116} +{"epoch": 0, "iter": 861, "iter_tflops": 18.91413026134028, "iter_time": 1.0907767486572266, "loss": 1.2098779678344727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.899969581190312, "step_time": 1.0367399520874023} +{"epoch": 0, "iter": 862, "iter_tflops": 17.482887194563762, "iter_time": 1.1800735931396484, "loss": 1.0809162855148315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.471897626814727, "step_time": 0.9180841712951661} +{"epoch": 0, "iter": 863, "iter_tflops": 41.998407191457204, "iter_time": 0.4912351417541504, "loss": 1.1151752471923828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.244808169072996, "step_time": 0.45598808670043944} +{"epoch": 0, "iter": 864, "iter_tflops": 43.54253774674706, "iter_time": 0.4738146781921387, "loss": 0.9952307343482971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.424536945408086, "step_time": 0.44440063095092774} +{"epoch": 0, "iter": 865, "iter_tflops": 30.891876193923743, "iter_time": 0.6678485107421875, "loss": 0.23524807393550873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.85602656751528, "step_time": 0.6279241790771484} +{"epoch": 0, "iter": 866, "iter_tflops": 15.02463149648469, "iter_time": 1.373151382446289, "loss": 0.229109987616539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.137297779714324, "step_time": 1.0780567741394043} +{"epoch": 0, "iter": 867, "iter_tflops": 52.22157690872655, "iter_time": 0.3950683746337891, "loss": 0.2798176109790802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.85789356056531, "step_time": 0.36285363769531254} +{"epoch": 0, "iter": 868, "iter_tflops": 55.535150271404845, "iter_time": 0.3714961318969726, "loss": 0.2699711322784424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.28674997483907, "step_time": 0.34221605110168457} +{"epoch": 0, "iter": 869, "iter_tflops": 27.64323696231816, "iter_time": 0.7463342132568359, "loss": 0.9437853693962097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.618425927714632, "step_time": 0.6965627937316894} +{"epoch": 0, "iter": 870, "iter_tflops": 14.689005975084797, "iter_time": 1.4045261840820313, "loss": 0.9795582294464111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.753730323463014, "step_time": 1.1620709075927735} +{"epoch": 0, "iter": 871, "iter_tflops": 40.898784074603164, "iter_time": 0.5044427108764649, "loss": 1.0219558477401733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.572460219060304, "step_time": 0.4628663845062256} +{"epoch": 0, "iter": 872, "iter_tflops": 38.56726467988539, "iter_time": 0.5349379501342774, "loss": 0.8765082359313965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.142204717669905, "step_time": 0.48955895042419434} +{"epoch": 0, "iter": 873, "iter_tflops": 24.19169426170987, "iter_time": 0.8528172225952149, "loss": 0.16971921920776367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.92237735587745, "step_time": 0.7958796844482423} +{"epoch": 0, "iter": 874, "iter_tflops": 9.690736879242042, "iter_time": 2.1289499206542963, "loss": 0.1771564930677414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.345062868922744, "step_time": 1.8185085220336912} +{"epoch": 0, "iter": 875, "iter_tflops": 14.214289435053502, "iter_time": 1.4514333343505859, "loss": 0.22313185036182404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.876460991339805, "step_time": 1.092953468322754} +{"epoch": 0, "iter": 876, "iter_tflops": 23.464458976008068, "iter_time": 0.8792486343383789, "loss": 0.13332800567150116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.300479418093985, "step_time": 0.784437925338745} +{"epoch": 0, "iter": 877, "iter_tflops": 14.319738978671507, "iter_time": 1.2386705932617188, "loss": 0.4535294771194458, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 15.264872710869785, "step_time": 1.1619775619506836} +{"epoch": 0, "iter": 878, "iter_tflops": 11.020145918868561, "iter_time": 1.6095467071533205, "loss": 0.3745197653770447, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 14.793038957283285, "step_time": 1.1990396041870117} +{"epoch": 0, "iter": 879, "iter_tflops": 29.99791049228392, "iter_time": 0.5912891693115235, "loss": 0.4337797462940216, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 32.03727945742622, "step_time": 0.5536499938964844} +{"epoch": 0, "iter": 880, "iter_tflops": 32.80107501737713, "iter_time": 0.5407578735351563, "loss": 0.48564237356185913, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 34.90871278822594, "step_time": 0.5081092414855957} +{"epoch": 0, "iter": 881, "iter_tflops": 13.04255120597627, "iter_time": 0.8471166381835937, "loss": 0.010691184550523758, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 13.7450456943005, "step_time": 0.803821418762207} +{"epoch": 0, "iter": 882, "iter_tflops": 6.978313175972859, "iter_time": 1.5832711791992187, "loss": 0.00404744828119874, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 8.33885781622223, "step_time": 1.3249490966796873} +{"epoch": 0, "iter": 883, "iter_tflops": 24.878476941822274, "iter_time": 0.44410122680664066, "loss": 0.0436265654861927, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 27.52638501680738, "step_time": 0.4013807888031005} +{"epoch": 0, "iter": 884, "iter_tflops": 28.6608064572102, "iter_time": 0.3854937629699707, "loss": 0.010020939633250237, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 31.635507957544228, "step_time": 0.34924560546875005} +{"epoch": 0, "iter": 885, "iter_tflops": 13.22344714826619, "iter_time": 1.2202030334472658, "loss": 0.1847774237394333, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 14.076154997525698, "step_time": 1.1462853546142577} +{"epoch": 0, "iter": 886, "iter_tflops": 11.512062976151347, "iter_time": 1.4015985107421876, "loss": 0.1557590514421463, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 14.596086649181553, "step_time": 1.1054531745910645} +{"epoch": 0, "iter": 887, "iter_tflops": 35.610128321630086, "iter_time": 0.4531095809936524, "loss": 0.20312157273292542, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 39.276801574086704, "step_time": 0.4108096809387207} +{"epoch": 0, "iter": 888, "iter_tflops": 40.64112891120915, "iter_time": 0.3970187530517578, "loss": 0.2668847441673279, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 44.194896407895776, "step_time": 0.36509397315979003} +{"epoch": 0, "iter": 889, "iter_tflops": 29.120039927451803, "iter_time": 0.7084843826293945, "loss": 0.7299372553825378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.881263081930854, "step_time": 0.6680780334472656} +{"epoch": 0, "iter": 890, "iter_tflops": 22.20160957775087, "iter_time": 0.9292611618041992, "loss": 0.7617627382278442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.55373118698317, "step_time": 0.7487586116790771} +{"epoch": 0, "iter": 891, "iter_tflops": 40.57408615515012, "iter_time": 0.5084795608520508, "loss": 0.5280004739761353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84274492269676, "step_time": 0.47057029724121097} +{"epoch": 0, "iter": 892, "iter_tflops": 40.668241795229, "iter_time": 0.5073023223876953, "loss": 0.825849711894989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96932876309924, "step_time": 0.4692155666351318} +{"epoch": 0, "iter": 893, "iter_tflops": 19.318570393934635, "iter_time": 0.8076599426269532, "loss": 0.15550795197486877, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 20.67561175376592, "step_time": 0.7546492767333984} +{"epoch": 0, "iter": 894, "iter_tflops": 13.09983541533656, "iter_time": 1.1910711059570314, "loss": 0.09811248630285263, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.525591074125591, "step_time": 1.0049752941131591} +{"epoch": 0, "iter": 895, "iter_tflops": 36.3363762802985, "iter_time": 0.4293998756408691, "loss": 0.09927710145711899, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 39.491298213876604, "step_time": 0.3950955314636231} +{"epoch": 0, "iter": 896, "iter_tflops": 42.87442223659245, "iter_time": 0.36391943359375, "loss": 0.14541956782341003, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 46.79419190859508, "step_time": 0.3334353008270264} +{"epoch": 0, "iter": 897, "iter_tflops": 29.79594685809815, "iter_time": 0.6924127502441408, "loss": 0.6233466863632202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.54632219888476, "step_time": 0.6539936218261718} +{"epoch": 0, "iter": 898, "iter_tflops": 9.273460583265877, "iter_time": 2.2247459106445313, "loss": 0.6141119599342346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.59009163847789, "step_time": 1.780063018798828} +{"epoch": 0, "iter": 899, "iter_tflops": 15.00349425834654, "iter_time": 1.3750859069824217, "loss": 0.5774694681167603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.383345444531834, "step_time": 1.1222708930969238} +{"epoch": 0, "iter": 900, "iter_tflops": 29.658362606579256, "iter_time": 0.6956248321533204, "loss": 0.7021958827972412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.22042136814888, "step_time": 0.5695983848571777} +{"epoch": 0, "iter": 901, "iter_tflops": 14.899817965559615, "iter_time": 1.1242185821533202, "loss": 0.3730710446834564, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 15.757816269429275, "step_time": 1.0630059356689454} +{"epoch": 0, "iter": 902, "iter_tflops": 7.678225679592805, "iter_time": 2.1815785217285155, "loss": 0.448623925447464, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 9.418335851329168, "step_time": 1.7785150680541992} +{"epoch": 0, "iter": 903, "iter_tflops": 9.019331514051766, "iter_time": 1.8571944274902343, "loss": 0.24990405142307281, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 10.976625176068652, "step_time": 1.526029354095459} +{"epoch": 0, "iter": 904, "iter_tflops": 26.616194547866073, "iter_time": 0.6293406143188477, "loss": 0.4032862186431885, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 28.616138842463407, "step_time": 0.5853568267822266} +{"epoch": 0, "iter": 905, "iter_tflops": 18.41310814494629, "iter_time": 0.8696166458129883, "loss": 0.22838717699050903, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 19.750794919441, "step_time": 0.8107190322875977} +{"epoch": 0, "iter": 906, "iter_tflops": 10.398949107165416, "iter_time": 1.5398041839599608, "loss": 0.2819339632987976, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 11.451124830564636, "step_time": 1.398320739746094} +{"epoch": 0, "iter": 907, "iter_tflops": 8.958209855330232, "iter_time": 1.787449234008789, "loss": 0.46735870838165283, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 10.840043644289477, "step_time": 1.4771476821899414} +{"epoch": 0, "iter": 908, "iter_tflops": 17.746061882693624, "iter_time": 0.9023041534423828, "loss": 0.3212573230266571, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.468537344075667, "step_time": 0.5829340362548828} +{"epoch": 0, "iter": 909, "iter_tflops": 24.25586332900105, "iter_time": 0.6196569519042968, "loss": 0.40253642201423645, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 25.95073232375047, "step_time": 0.5791865196228027} +{"epoch": 0, "iter": 910, "iter_tflops": 27.019238530180985, "iter_time": 0.5562819366455077, "loss": 0.2572570741176605, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.857538872238177, "step_time": 0.520845329284668} +{"epoch": 0, "iter": 911, "iter_tflops": 26.840139563334287, "iter_time": 0.559993896484375, "loss": 0.38873976469039917, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.546175773963803, "step_time": 0.5265263710021972} +{"epoch": 0, "iter": 912, "iter_tflops": 25.654677067760076, "iter_time": 0.5858703384399414, "loss": 0.43098264932632446, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.161996150266994, "step_time": 0.5533582382202148} +{"epoch": 0, "iter": 913, "iter_tflops": 29.015301718055667, "iter_time": 0.7110418395996094, "loss": 1.0574733018875122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.794816290280725, "step_time": 0.6699534530639649} +{"epoch": 0, "iter": 914, "iter_tflops": 15.40652884771371, "iter_time": 1.3391136779785155, "loss": 1.0425065755844116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.85816193908645, "step_time": 0.9891136894226075} +{"epoch": 0, "iter": 915, "iter_tflops": 44.384667621541865, "iter_time": 0.4648247833251953, "loss": 1.0370187759399414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.760371604073605, "step_time": 0.43197095870971675} +{"epoch": 0, "iter": 916, "iter_tflops": 43.16188077906255, "iter_time": 0.4779933853149414, "loss": 0.8946157693862915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00396208944088, "step_time": 0.4484634056091309} +{"epoch": 0, "iter": 917, "iter_tflops": 30.55089209427266, "iter_time": 0.675302490234375, "loss": 1.0995736122131348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.414797610675684, "step_time": 0.6364714584350587} +{"epoch": 0, "iter": 918, "iter_tflops": 12.180493037250471, "iter_time": 1.693781478881836, "loss": 1.031752109527588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54197339411284, "step_time": 1.2471966323852541} +{"epoch": 0, "iter": 919, "iter_tflops": 40.643471471443014, "iter_time": 0.5076114997863769, "loss": 0.9980649352073669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.189536831796396, "step_time": 0.4668773422241211} +{"epoch": 0, "iter": 920, "iter_tflops": 41.65864464222848, "iter_time": 0.49524159240722654, "loss": 1.198708176612854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38880078620061, "step_time": 0.45454149818420414} +{"epoch": 0, "iter": 921, "iter_tflops": 19.800146592748472, "iter_time": 1.0419667053222657, "loss": 1.0806881189346313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.002741990643266, "step_time": 0.9823047637939453} +{"epoch": 0, "iter": 922, "iter_tflops": 11.532766405009903, "iter_time": 1.7889110717773438, "loss": 0.9454750418663025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.411831516121326, "step_time": 1.4315386276245117} +{"epoch": 0, "iter": 923, "iter_tflops": 12.274839426522268, "iter_time": 1.6807628021240235, "loss": 0.9110398888587952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.886093912333772, "step_time": 1.3859306297302247} +{"epoch": 0, "iter": 924, "iter_tflops": 30.548593182146128, "iter_time": 0.6753533096313477, "loss": 1.0083653926849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.74625083011773, "step_time": 0.6113595733642578} +{"epoch": 0, "iter": 925, "iter_tflops": 12.786392676854712, "iter_time": 1.1307894592285155, "loss": 0.4258769452571869, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 13.857816992771252, "step_time": 1.0433618850708009} +{"epoch": 0, "iter": 926, "iter_tflops": 13.653500690621367, "iter_time": 1.0589751586914065, "loss": 0.4008251130580902, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 18.303910546423346, "step_time": 0.7899250831604003} +{"epoch": 0, "iter": 927, "iter_tflops": 25.160229115812843, "iter_time": 0.5746655960083008, "loss": 0.44586485624313354, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 26.791757910499093, "step_time": 0.5396703758239746} +{"epoch": 0, "iter": 928, "iter_tflops": 26.378682722333043, "iter_time": 0.5481213073730469, "loss": 0.4803503155708313, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 28.034444999488453, "step_time": 0.5157483253479004} +{"epoch": 0, "iter": 929, "iter_tflops": 28.72051617256873, "iter_time": 0.7183399276733399, "loss": 0.7186952233314514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.325039141273493, "step_time": 0.6803319664001466} +{"epoch": 0, "iter": 930, "iter_tflops": 13.204833985333863, "iter_time": 1.5623894653320312, "loss": 0.6672487854957581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.706053053331182, "step_time": 1.3135759468078614} +{"epoch": 0, "iter": 931, "iter_tflops": 48.27957490617807, "iter_time": 0.4273255004882813, "loss": 0.8708587288856506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44240662197789, "step_time": 0.39340478134155277} +{"epoch": 0, "iter": 932, "iter_tflops": 52.735690395104726, "iter_time": 0.3912169036865235, "loss": 0.9633270502090454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.027264467158744, "step_time": 0.3617759628295898} +{"epoch": 0, "iter": 933, "iter_tflops": 33.374831652264476, "iter_time": 0.6181632232666017, "loss": 0.11536992341279984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.57204173394313, "step_time": 0.5799805831909179} +{"epoch": 0, "iter": 934, "iter_tflops": 17.479352779628606, "iter_time": 1.180312210083008, "loss": 0.1061047911643982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.61459909425038, "step_time": 1.0008001327514648} +{"epoch": 0, "iter": 935, "iter_tflops": 43.379147466443435, "iter_time": 0.47559933090209966, "loss": 0.11288676410913467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.79680361215864, "step_time": 0.43164169883728026} +{"epoch": 0, "iter": 936, "iter_tflops": 46.219601816227964, "iter_time": 0.44637107849121094, "loss": 0.09239089488983154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.655798161857774, "step_time": 0.4072800006866455} +{"epoch": 0, "iter": 937, "iter_tflops": 18.926176164907393, "iter_time": 1.090082504272461, "loss": 0.925432026386261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.23139473351135, "step_time": 1.019756362915039} +{"epoch": 0, "iter": 938, "iter_tflops": 15.725983654087718, "iter_time": 1.311911163330078, "loss": 1.033864974975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.83966667378889, "step_time": 1.1564730377197265} +{"epoch": 0, "iter": 939, "iter_tflops": 42.98658663621467, "iter_time": 0.47994258499145515, "loss": 1.0281723737716675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14661103257368, "step_time": 0.4470771102905273} +{"epoch": 0, "iter": 940, "iter_tflops": 41.88307147280776, "iter_time": 0.49258788299560546, "loss": 0.9617648124694824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87813250149242, "step_time": 0.45971372604370114} +{"epoch": 0, "iter": 941, "iter_tflops": 33.670242889422575, "iter_time": 0.6127396697998047, "loss": 0.39935922622680664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.90033627323454, "step_time": 0.574676887512207} +{"epoch": 0, "iter": 942, "iter_tflops": 14.445002159420302, "iter_time": 1.4282513275146484, "loss": 0.5195086002349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.6031007745401, "step_time": 1.172014736175537} +{"epoch": 0, "iter": 943, "iter_tflops": 41.418301012424216, "iter_time": 0.4981153984069824, "loss": 0.292264461517334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43204069967261, "step_time": 0.4541088886260986} +{"epoch": 0, "iter": 944, "iter_tflops": 40.822736515668325, "iter_time": 0.5053824234008789, "loss": 0.3870542645454407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15121150001352, "step_time": 0.46728261375427244} +{"epoch": 0, "iter": 945, "iter_tflops": 21.015786853561035, "iter_time": 0.9816950302124023, "loss": 0.07657594978809357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.345868345238674, "step_time": 0.9232621078491211} +{"epoch": 0, "iter": 946, "iter_tflops": 16.58629586548025, "iter_time": 1.2438638305664063, "loss": 0.14094850420951843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.036105986230016, "step_time": 1.0296957664489748} +{"epoch": 0, "iter": 947, "iter_tflops": 44.86598072081572, "iter_time": 0.45983823776245114, "loss": 0.14588530361652374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.374981209856706, "step_time": 0.4178450908660889} +{"epoch": 0, "iter": 948, "iter_tflops": 47.14237954528501, "iter_time": 0.4376336898803711, "loss": 0.1228150874376297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.67458172748587, "step_time": 0.39925032424926754} +{"epoch": 0, "iter": 949, "iter_tflops": 23.982014904050438, "iter_time": 0.860273567199707, "loss": 0.169067844748497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.579229497432024, "step_time": 0.8065564880371094} +{"epoch": 0, "iter": 950, "iter_tflops": 46.141942450887406, "iter_time": 0.44712234497070313, "loss": 0.19259704649448395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.666355352204945, "step_time": 0.4071951370239258} +{"epoch": 0, "iter": 951, "iter_tflops": 46.9056683936375, "iter_time": 0.43984222412109375, "loss": 0.2330973893404007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.92557355926493, "step_time": 0.40512245750427245} +{"epoch": 0, "iter": 952, "iter_tflops": 46.72451778425944, "iter_time": 0.4415474891662598, "loss": 0.14006991684436798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3641147864842, "step_time": 0.4096387596130371} +{"epoch": 0, "iter": 953, "iter_tflops": 23.965826599956912, "iter_time": 0.8608546600341797, "loss": 1.012002944946289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.145765405074517, "step_time": 0.8204599533081054} +{"epoch": 0, "iter": 954, "iter_tflops": 13.134045068217965, "iter_time": 1.570810317993164, "loss": 0.9474181532859802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.99021333375297, "step_time": 1.2142927875518799} +{"epoch": 0, "iter": 955, "iter_tflops": 50.187962536394494, "iter_time": 0.41107653045654297, "loss": 1.0737782716751099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.589333991454104, "step_time": 0.37793268394470214} +{"epoch": 0, "iter": 956, "iter_tflops": 50.19421251928336, "iter_time": 0.41102534484863285, "loss": 0.9351035356521606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.12052884264969, "step_time": 0.381206428527832} +{"epoch": 0, "iter": 957, "iter_tflops": 37.00126769195937, "iter_time": 0.5575780181884765, "loss": 0.09136492013931274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.278854051055944, "step_time": 0.5122065658569337} +{"epoch": 0, "iter": 958, "iter_tflops": 15.108660138731798, "iter_time": 1.3655144348144532, "loss": 0.07374989241361618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.142203009793217, "step_time": 1.203526378631592} +{"epoch": 0, "iter": 959, "iter_tflops": 14.390627434989664, "iter_time": 1.4336479492187497, "loss": 0.09065280854701996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.603332306391035, "step_time": 1.1719993209838866} +{"epoch": 0, "iter": 960, "iter_tflops": 44.63953173636738, "iter_time": 0.46217092132568366, "loss": 0.04756869748234749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18768550622143, "step_time": 0.41943615150451663} +{"epoch": 0, "iter": 961, "iter_tflops": 20.295499888294696, "iter_time": 0.688306007385254, "loss": 0.4866921603679657, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 21.90735994411795, "step_time": 0.6376630744934082} +{"epoch": 0, "iter": 962, "iter_tflops": 21.834754487169675, "iter_time": 0.6397834472656251, "loss": 0.19061432778835297, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 23.52502532433738, "step_time": 0.5938150672912598} +{"epoch": 0, "iter": 963, "iter_tflops": 20.812183086835955, "iter_time": 0.6712181243896483, "loss": 0.3189038932323456, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 22.319012923789014, "step_time": 0.625901985168457} +{"epoch": 0, "iter": 964, "iter_tflops": 22.741488455644266, "iter_time": 0.6142744140625, "loss": 0.3865228295326233, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 24.41285383278798, "step_time": 0.5722196426391603} +{"epoch": 0, "iter": 965, "iter_tflops": 28.151321845737577, "iter_time": 0.7328641128540039, "loss": 0.39972496032714844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.631174768882804, "step_time": 0.6735325584411621} +{"epoch": 0, "iter": 966, "iter_tflops": 38.675449433931995, "iter_time": 0.5334415969848633, "loss": 0.634086549282074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48766527130773, "step_time": 0.4744125347137451} +{"epoch": 0, "iter": 967, "iter_tflops": 40.35694381041879, "iter_time": 0.5112154579162598, "loss": 0.5042170882225037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.041103109706874, "step_time": 0.4684508800506592} +{"epoch": 0, "iter": 968, "iter_tflops": 43.63344039994536, "iter_time": 0.47282756805419923, "loss": 0.5313689112663269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69480054816162, "step_time": 0.4325648345947266} +{"epoch": 0, "iter": 969, "iter_tflops": 19.746876033010597, "iter_time": 1.0447775878906251, "loss": 0.07697826623916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.0477299130226, "step_time": 0.9802051620483399} +{"epoch": 0, "iter": 970, "iter_tflops": 16.57768691499783, "iter_time": 1.2445097808837893, "loss": 0.0886426717042923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.420647259911146, "step_time": 0.8808933963775636} +{"epoch": 0, "iter": 971, "iter_tflops": 44.54449675297821, "iter_time": 0.4631569557189942, "loss": 0.11095976829528809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.917527393435094, "step_time": 0.4217525825500488} +{"epoch": 0, "iter": 972, "iter_tflops": 48.98852270366747, "iter_time": 0.42114136886596676, "loss": 0.14207102358341217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.671125512755346, "step_time": 0.38439837646484376} +{"epoch": 0, "iter": 973, "iter_tflops": 35.50112467975664, "iter_time": 0.5811391525268554, "loss": 0.2884710133075714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.216826737643835, "step_time": 0.5260775852203369} +{"epoch": 0, "iter": 974, "iter_tflops": 38.709943283120225, "iter_time": 0.5329662551879883, "loss": 0.2862192690372467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03021483951574, "step_time": 0.47945597267150875} +{"epoch": 0, "iter": 975, "iter_tflops": 38.926155579945615, "iter_time": 0.5300059356689453, "loss": 0.2626616358757019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64727482753572, "step_time": 0.4837611217498779} +{"epoch": 0, "iter": 976, "iter_tflops": 40.20984991857754, "iter_time": 0.513085563659668, "loss": 0.28884071111679077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48067473097301, "step_time": 0.4744888076782227} +{"epoch": 0, "iter": 977, "iter_tflops": 16.247608600502172, "iter_time": 1.0764961395263672, "loss": 0.05426211655139923, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 17.153232677421993, "step_time": 1.0196613235473633} +{"epoch": 0, "iter": 978, "iter_tflops": 12.455890967878773, "iter_time": 1.4041940460205078, "loss": 0.05814245715737343, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 15.028623082245181, "step_time": 1.1638117370605467} +{"epoch": 0, "iter": 979, "iter_tflops": 37.583801201629946, "iter_time": 0.46537304306030275, "loss": 0.07399936020374298, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 41.678726975824404, "step_time": 0.4196502437591553} +{"epoch": 0, "iter": 980, "iter_tflops": 39.34523715589954, "iter_time": 0.44453888702392574, "loss": 0.05084780603647232, "lr": 3e-05, "seqlen": 6976.0, "step_tflops": 43.42152736861155, "step_time": 0.4028068332672119} +{"epoch": 0, "iter": 981, "iter_tflops": 28.514235967834324, "iter_time": 0.7235366058349609, "loss": 0.9475242495536804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.653930334547443, "step_time": 0.673032569885254} +{"epoch": 0, "iter": 982, "iter_tflops": 14.95404587533384, "iter_time": 1.3796328887939453, "loss": 0.981185257434845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.589959511784645, "step_time": 1.1097976570129395} +{"epoch": 0, "iter": 983, "iter_tflops": 33.49564001991679, "iter_time": 0.6159337005615234, "loss": 0.9718639254570007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.69101908544353, "step_time": 0.5622927360534667} +{"epoch": 0, "iter": 984, "iter_tflops": 41.52849827405919, "iter_time": 0.4967936325073242, "loss": 1.088502049446106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.12325971726306, "step_time": 0.457216381072998} +{"epoch": 0, "iter": 985, "iter_tflops": 29.42112960021268, "iter_time": 0.701233901977539, "loss": 0.09413647651672363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.772552149872386, "step_time": 0.6493369941711425} +{"epoch": 0, "iter": 986, "iter_tflops": 8.353117038359166, "iter_time": 2.4698676452636716, "loss": 0.10175903886556625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.609390224669482, "step_time": 2.146972183227539} +{"epoch": 0, "iter": 987, "iter_tflops": 15.65817105877834, "iter_time": 1.3175928039550782, "loss": 0.18192698061466217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.452734648414562, "step_time": 1.1821123695373534} +{"epoch": 0, "iter": 988, "iter_tflops": 21.676548558519926, "iter_time": 0.9517702255249024, "loss": 0.15900450944900513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.17426244290051, "step_time": 0.6837314929962158} +{"epoch": 0, "iter": 989, "iter_tflops": 14.210987422409204, "iter_time": 1.1556077423095703, "loss": 0.459723562002182, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 15.230862238442429, "step_time": 1.0782270126342772} +{"epoch": 0, "iter": 990, "iter_tflops": 10.171643250186763, "iter_time": 1.6145205535888674, "loss": 0.37568604946136475, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 13.909504824349387, "step_time": 1.1806550483703613} +{"epoch": 0, "iter": 991, "iter_tflops": 26.76744676709919, "iter_time": 0.6135186233520509, "loss": 0.35865429043769836, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 28.453084336630752, "step_time": 0.5771721229553223} +{"epoch": 0, "iter": 992, "iter_tflops": 30.167391620598536, "iter_time": 0.5443734512329101, "loss": 0.3970790505409241, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 32.07045578255584, "step_time": 0.5120702743530273} +{"epoch": 0, "iter": 993, "iter_tflops": 38.670219054244235, "iter_time": 0.5335137481689453, "loss": 0.31659606099128723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75459156105374, "step_time": 0.49410358810424804} +{"epoch": 0, "iter": 994, "iter_tflops": 48.39570041178486, "iter_time": 0.4263001327514649, "loss": 0.2750660479068756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03903694727564, "step_time": 0.38897941398620606} +{"epoch": 0, "iter": 995, "iter_tflops": 47.97236472408019, "iter_time": 0.43006204986572266, "loss": 0.23555195331573486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.141056797220116, "step_time": 0.3956784687042237} +{"epoch": 0, "iter": 996, "iter_tflops": 46.154753585741595, "iter_time": 0.44699823760986324, "loss": 0.2428588569164276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26146885144568, "step_time": 0.4104753398895264} +{"epoch": 0, "iter": 997, "iter_tflops": 19.108467944695615, "iter_time": 1.079683288574219, "loss": 0.040026791393756866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.937012973578625, "step_time": 1.0348136672973633} +{"epoch": 0, "iter": 998, "iter_tflops": 13.993458465674387, "iter_time": 1.4743384246826172, "loss": 0.04026646912097931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.365588042207612, "step_time": 1.0653481559753417} +{"epoch": 0, "iter": 999, "iter_tflops": 43.57216007355521, "iter_time": 0.4734925575256348, "loss": 0.1276008039712906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.991652579062446, "step_time": 0.4298892078399658} +{"epoch": 0, "iter": 1000, "iter_tflops": 51.40947445084506, "iter_time": 0.4013091697692871, "loss": 0.07435376942157745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.2917071284917, "step_time": 0.36650324821472163} +{"epoch": 0, "iter": 1001, "iter_tflops": 2.4054989074851147, "iter_time": 0.8476042404174805, "loss": 0.3534965515136719, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 2.5600113787803958, "step_time": 0.7964460983276367} +{"epoch": 0, "iter": 1002, "iter_tflops": 2.06277344372803, "iter_time": 0.988431900024414, "loss": 0.33062413334846497, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 2.5260572627524147, "step_time": 0.8071515655517579} +{"epoch": 0, "iter": 1003, "iter_tflops": 4.084686792105503, "iter_time": 0.4991597099304199, "loss": 0.5313044786453247, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.4677977987480855, "step_time": 0.4563570613861084} +{"epoch": 0, "iter": 1004, "iter_tflops": 4.627937220232348, "iter_time": 0.44056584548950195, "loss": 0.5922998189926147, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 5.036964563013647, "step_time": 0.4047896404266357} +{"epoch": 0, "iter": 1005, "iter_tflops": 8.68441295285366, "iter_time": 2.3756463012695312, "loss": 0.5873260498046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.853080520571115, "step_time": 2.0938724155426027} +{"epoch": 0, "iter": 1006, "iter_tflops": 36.6791646826957, "iter_time": 0.562474464416504, "loss": 0.5662714838981628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.102237926682854, "step_time": 0.5144623985290527} +{"epoch": 0, "iter": 1007, "iter_tflops": 40.422038245398966, "iter_time": 0.5103922119140625, "loss": 0.5916463136672974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.913940638155054, "step_time": 0.46980738258361815} +{"epoch": 0, "iter": 1008, "iter_tflops": 41.49612064058264, "iter_time": 0.49718125915527345, "loss": 0.45476236939430237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.904233969803876, "step_time": 0.45944650840759277} +{"epoch": 0, "iter": 1009, "iter_tflops": 18.465167863938678, "iter_time": 1.1172979125976563, "loss": 0.41772422194480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.374683593311396, "step_time": 1.0648480224609373} +{"epoch": 0, "iter": 1010, "iter_tflops": 16.35817520407801, "iter_time": 1.2612099609375003, "loss": 0.2753344476222992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.793261332847983, "step_time": 1.0423291625976563} +{"epoch": 0, "iter": 1011, "iter_tflops": 46.5776615062989, "iter_time": 0.4429396591186523, "loss": 0.5409807562828064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36814177653826, "step_time": 0.40960600852966306} +{"epoch": 0, "iter": 1012, "iter_tflops": 47.26083540389439, "iter_time": 0.4365367927551269, "loss": 0.4174359142780304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83618695197659, "step_time": 0.4058347949981689} +{"epoch": 0, "iter": 1013, "iter_tflops": 30.810928388764196, "iter_time": 0.6696031112670898, "loss": 0.9462823271751404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.78161884126439, "step_time": 0.6293494415283204} +{"epoch": 0, "iter": 1014, "iter_tflops": 11.885374368340173, "iter_time": 1.7358387603759764, "loss": 0.8594239950180054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.207434489452886, "step_time": 1.4521336364746096} +{"epoch": 0, "iter": 1015, "iter_tflops": 12.42843413154034, "iter_time": 1.6599913787841796, "loss": 0.9911372661590576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.711950748909096, "step_time": 1.313082878112793} +{"epoch": 0, "iter": 1016, "iter_tflops": 16.430586175139013, "iter_time": 1.2556517028808594, "loss": 0.9989652633666992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.35876846457261, "step_time": 1.0657234497070311} +{"epoch": 0, "iter": 1017, "iter_tflops": 12.168362155435394, "iter_time": 1.3428506469726562, "loss": 0.24161337316036224, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 13.05587629940735, "step_time": 1.2515661621093748} +{"epoch": 0, "iter": 1018, "iter_tflops": 14.701852151013473, "iter_time": 1.1114445190429687, "loss": 0.33529794216156006, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 17.61662363889647, "step_time": 0.9275496444702149} +{"epoch": 0, "iter": 1019, "iter_tflops": 29.3484198805879, "iter_time": 0.5567690887451171, "loss": 0.31351640820503235, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.348651681086256, "step_time": 0.5212438850402832} +{"epoch": 0, "iter": 1020, "iter_tflops": 29.457489973977097, "iter_time": 0.5547075805664062, "loss": 0.30152803659439087, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 31.321826365961474, "step_time": 0.5216902999877929} +{"epoch": 0, "iter": 1021, "iter_tflops": 35.470980591121936, "iter_time": 0.5816330184936523, "loss": 0.16177983582019806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.06844886230016, "step_time": 0.5419473114013672} +{"epoch": 0, "iter": 1022, "iter_tflops": 17.88455491296067, "iter_time": 1.1535704193115235, "loss": 0.21542058885097504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.841391453596312, "step_time": 0.9445869579315185} +{"epoch": 0, "iter": 1023, "iter_tflops": 39.59809716772919, "iter_time": 0.5210122451782228, "loss": 0.15269353985786438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74070965511327, "step_time": 0.47166801071166986} +{"epoch": 0, "iter": 1024, "iter_tflops": 46.917113447929275, "iter_time": 0.4397349281311035, "loss": 0.2428121119737625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.489064101067314, "step_time": 0.4006888427734376} +{"epoch": 0, "iter": 1025, "iter_tflops": 22.846881023692404, "iter_time": 0.903015754699707, "loss": 0.7969738245010376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.71612006157641, "step_time": 0.8347221755981445} +{"epoch": 0, "iter": 1026, "iter_tflops": 22.12639915026518, "iter_time": 0.9324198379516602, "loss": 0.706110417842865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.917881856451764, "step_time": 0.7664456520080566} +{"epoch": 0, "iter": 1027, "iter_tflops": 49.241500748099945, "iter_time": 0.41897775650024416, "loss": 0.7986994981765747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6029569406658, "step_time": 0.38488722801208497} +{"epoch": 0, "iter": 1028, "iter_tflops": 45.15983474109708, "iter_time": 0.4568460807800293, "loss": 0.5580850839614868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6466426791375, "step_time": 0.4241010761260986} +{"epoch": 0, "iter": 1029, "iter_tflops": 45.78180686905872, "iter_time": 0.45063956451416015, "loss": 1.2848153114318848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.825385787337474, "step_time": 0.4140679130554199} +{"epoch": 0, "iter": 1030, "iter_tflops": 45.50948834923809, "iter_time": 0.45333609008789066, "loss": 0.9837836623191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05030303618241, "step_time": 0.4122071647644043} +{"epoch": 0, "iter": 1031, "iter_tflops": 47.62679537548755, "iter_time": 0.43318248367309575, "loss": 0.9318081736564636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19841155952903, "step_time": 0.40296354675292967} +{"epoch": 0, "iter": 1032, "iter_tflops": 38.6079962628053, "iter_time": 0.5343735885620118, "loss": 0.9995017647743225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07377067478179, "step_time": 0.4903552303314209} +{"epoch": 0, "iter": 1033, "iter_tflops": 22.09763667028812, "iter_time": 0.9336334838867187, "loss": 0.6643528938293457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.168607732451143, "step_time": 0.890476188659668} +{"epoch": 0, "iter": 1034, "iter_tflops": 16.467091757566674, "iter_time": 1.2528680725097656, "loss": 0.7004671096801758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.667887425517016, "step_time": 0.9521506690979004} +{"epoch": 0, "iter": 1035, "iter_tflops": 39.85125755188366, "iter_time": 0.5177024459838867, "loss": 0.7127973437309265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42136257475297, "step_time": 0.47513694381713867} +{"epoch": 0, "iter": 1036, "iter_tflops": 40.79156006020468, "iter_time": 0.5057686805725098, "loss": 0.6742587089538574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.33204500032951, "step_time": 0.4653765354156494} +{"epoch": 0, "iter": 1037, "iter_tflops": 28.893274193941465, "iter_time": 0.7140448455810547, "loss": 0.4960530400276184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.474655514882173, "step_time": 0.6554827423095704} +{"epoch": 0, "iter": 1038, "iter_tflops": 37.78156232097122, "iter_time": 0.5460624771118163, "loss": 0.5844188928604126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28863480482324, "step_time": 0.48786378669738767} +{"epoch": 0, "iter": 1039, "iter_tflops": 42.56387448680853, "iter_time": 0.4847090110778809, "loss": 0.4787681996822357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.346680843649914, "step_time": 0.44514716339111327} +{"epoch": 0, "iter": 1040, "iter_tflops": 42.56356262180875, "iter_time": 0.4847125625610351, "loss": 0.5486803650856018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.610848169475986, "step_time": 0.44262428855895997} +{"epoch": 0, "iter": 1041, "iter_tflops": 12.872972892326798, "iter_time": 1.151720489501953, "loss": 0.10476909577846527, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 13.813650581824371, "step_time": 1.073290985107422} +{"epoch": 0, "iter": 1042, "iter_tflops": 14.463984462469817, "iter_time": 1.0250333633422852, "loss": 0.1487162709236145, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.033544149283344, "step_time": 0.8221382617950439} +{"epoch": 0, "iter": 1043, "iter_tflops": 38.68921550900699, "iter_time": 0.383209285736084, "loss": 0.045772697776556015, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 42.507242815125394, "step_time": 0.3487891864776611} +{"epoch": 0, "iter": 1044, "iter_tflops": 39.130844433161926, "iter_time": 0.37888440322875977, "loss": 0.056980255991220474, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 43.098644525582735, "step_time": 0.3440030841827392} +{"epoch": 0, "iter": 1045, "iter_tflops": 12.742184523547536, "iter_time": 0.6397702789306641, "loss": 0.015681548044085503, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 13.56330035013439, "step_time": 0.6010388870239258} +{"epoch": 0, "iter": 1046, "iter_tflops": 5.457210412930194, "iter_time": 1.4938164978027346, "loss": 0.0062732994556427, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 6.284292944958202, "step_time": 1.297213706970215} +{"epoch": 0, "iter": 1047, "iter_tflops": 23.079755766034868, "iter_time": 0.35321305084228516, "loss": 0.01042803842574358, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 25.452040882500288, "step_time": 0.32029144477844235} +{"epoch": 0, "iter": 1048, "iter_tflops": 22.669964507003396, "iter_time": 0.35959786987304687, "loss": 0.010919243097305298, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 24.91144206617675, "step_time": 0.3272420330047608} +{"epoch": 0, "iter": 1049, "iter_tflops": 29.164966162337166, "iter_time": 0.7073930206298829, "loss": 0.10372356325387955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.828743582751645, "step_time": 0.6692161636352539} +{"epoch": 0, "iter": 1050, "iter_tflops": 13.560092625249846, "iter_time": 1.5214566802978515, "loss": 0.17892296612262726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.987464740941434, "step_time": 1.2144892616271972} +{"epoch": 0, "iter": 1051, "iter_tflops": 45.774244763131854, "iter_time": 0.4507140121459961, "loss": 0.18315865099430084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63311211941289, "step_time": 0.41567197036743164} +{"epoch": 0, "iter": 1052, "iter_tflops": 47.811152795593905, "iter_time": 0.4315121536254882, "loss": 0.13950714468955994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.86899130766992, "step_time": 0.39775389862060545} +{"epoch": 0, "iter": 1053, "iter_tflops": 24.178536510199446, "iter_time": 0.6775144424438476, "loss": 0.07204998284578323, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.745536525254387, "step_time": 0.6362775802612305} +{"epoch": 0, "iter": 1054, "iter_tflops": 11.668806255571258, "iter_time": 1.4038546295166014, "loss": 0.05387942120432854, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 16.116195456049013, "step_time": 1.016450050354004} +{"epoch": 0, "iter": 1055, "iter_tflops": 32.83752823168464, "iter_time": 0.49885934066772464, "loss": 0.046927839517593384, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 36.19461864147461, "step_time": 0.45258959197998044} +{"epoch": 0, "iter": 1056, "iter_tflops": 39.25236567894889, "iter_time": 0.41733300399780265, "loss": 0.045988328754901886, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 43.16895500757569, "step_time": 0.3794696369171143} +{"epoch": 0, "iter": 1057, "iter_tflops": 18.753867592187817, "iter_time": 1.1000980682373047, "loss": 1.062910795211792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.963298540996597, "step_time": 1.0334511337280274} +{"epoch": 0, "iter": 1058, "iter_tflops": 18.943546560151866, "iter_time": 1.0890829467773437, "loss": 0.8205592036247253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.670147382129773, "step_time": 0.8362776756286621} +{"epoch": 0, "iter": 1059, "iter_tflops": 40.99393029916658, "iter_time": 0.5032719078063965, "loss": 1.3550891876220703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.41715201478287, "step_time": 0.46448483467102053} +{"epoch": 0, "iter": 1060, "iter_tflops": 41.13298428469345, "iter_time": 0.5015705490112304, "loss": 0.9645048379898071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21789589065209, "step_time": 0.4665779113769531} +{"epoch": 0, "iter": 1061, "iter_tflops": 23.507072001948845, "iter_time": 0.8776547546386719, "loss": 0.10664203017950058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.185768262204107, "step_time": 0.8191568069458008} +{"epoch": 0, "iter": 1062, "iter_tflops": 24.269664309134544, "iter_time": 0.8500774154663087, "loss": 0.11199581623077393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.34517863423273, "step_time": 0.6798804435729979} +{"epoch": 0, "iter": 1063, "iter_tflops": 52.323133440861554, "iter_time": 0.3943015670776367, "loss": 0.07532817870378494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.93050546509665, "step_time": 0.3623908367156983} +{"epoch": 0, "iter": 1064, "iter_tflops": 53.7913342663035, "iter_time": 0.3835393524169922, "loss": 0.1025775671005249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.39366476199492, "step_time": 0.35331047630310064} +{"epoch": 0, "iter": 1065, "iter_tflops": 29.756311742914342, "iter_time": 0.6933350372314453, "loss": 0.3385767340660095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.39637272846781, "step_time": 0.6571171035766601} +{"epoch": 0, "iter": 1066, "iter_tflops": 17.817436317581826, "iter_time": 1.1579159393310547, "loss": 0.22658869624137878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.28666051820763, "step_time": 0.9692029190063477} +{"epoch": 0, "iter": 1067, "iter_tflops": 49.848915192755314, "iter_time": 0.41387246704101566, "loss": 0.2302447259426117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.323078610375774, "step_time": 0.3797850570678711} +{"epoch": 0, "iter": 1068, "iter_tflops": 53.03273581155491, "iter_time": 0.38902563095092774, "loss": 0.3082164227962494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.312726418592625, "step_time": 0.35997403717041015} +{"epoch": 0, "iter": 1069, "iter_tflops": 48.043613091272555, "iter_time": 0.42942427062988286, "loss": 0.2831004559993744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.329381499894424, "step_time": 0.394254487991333} +{"epoch": 0, "iter": 1070, "iter_tflops": 17.352639137004275, "iter_time": 1.1889311676025391, "loss": 0.29732057452201843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.26159021366913, "step_time": 1.0711002197265624} +{"epoch": 0, "iter": 1071, "iter_tflops": 14.806407608075727, "iter_time": 1.3933895416259765, "loss": 0.3656979203224182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.036557888678075, "step_time": 1.2865038528442383} +{"epoch": 0, "iter": 1072, "iter_tflops": 30.41054832063436, "iter_time": 0.6784189910888673, "loss": 0.2561066448688507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.998499007715914, "step_time": 0.5429449596405029} +{"epoch": 0, "iter": 1073, "iter_tflops": 17.32254771702068, "iter_time": 0.8252582092285156, "loss": 0.3414034843444824, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 18.214896281769263, "step_time": 0.7848287734985351} +{"epoch": 0, "iter": 1074, "iter_tflops": 8.37442108616535, "iter_time": 1.7070522918701172, "loss": 0.3063393235206604, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 10.269431290226025, "step_time": 1.392051254272461} +{"epoch": 0, "iter": 1075, "iter_tflops": 26.637437125772685, "iter_time": 0.5366723022460937, "loss": 0.29972878098487854, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 28.36199912575146, "step_time": 0.5040397415161133} +{"epoch": 0, "iter": 1076, "iter_tflops": 26.95280729798236, "iter_time": 0.5303927917480469, "loss": 0.4061368405818939, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 28.571677288237513, "step_time": 0.5003407592773438} +{"epoch": 0, "iter": 1077, "iter_tflops": 31.522103718649692, "iter_time": 0.6544960861206055, "loss": 0.3617870509624481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.697738599607725, "step_time": 0.6122397041320801} +{"epoch": 0, "iter": 1078, "iter_tflops": 15.221774193849715, "iter_time": 1.355367202758789, "loss": 0.38600626587867737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.83155243062336, "step_time": 1.1569992904663084} +{"epoch": 0, "iter": 1079, "iter_tflops": 40.69534953435297, "iter_time": 0.5069644012451171, "loss": 0.5333133339881897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.786082424686114, "step_time": 0.4606585884094238} +{"epoch": 0, "iter": 1080, "iter_tflops": 40.81898808061295, "iter_time": 0.5054288330078125, "loss": 0.3430061936378479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7056659033412, "step_time": 0.46148722076416016} +{"epoch": 0, "iter": 1081, "iter_tflops": 20.382664238992582, "iter_time": 1.012188262939453, "loss": 0.9237521290779114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.646583716032154, "step_time": 0.9530877380371092} +{"epoch": 0, "iter": 1082, "iter_tflops": 14.92183068685546, "iter_time": 1.3826114196777344, "loss": 0.9630272388458252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.30225660084244, "step_time": 1.192393222808838} +{"epoch": 0, "iter": 1083, "iter_tflops": 39.91685571534639, "iter_time": 0.5168516693115235, "loss": 0.9684129357337952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68614002329528, "step_time": 0.4722571849822998} +{"epoch": 0, "iter": 1084, "iter_tflops": 41.05222221754029, "iter_time": 0.5025572891235351, "loss": 1.1108083724975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.24318104662663, "step_time": 0.46631126022338865} +{"epoch": 0, "iter": 1085, "iter_tflops": 17.5261911323636, "iter_time": 1.1771578521728514, "loss": 1.2275128364562988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.646182283149063, "step_time": 1.1064513473510742} +{"epoch": 0, "iter": 1086, "iter_tflops": 21.062443915472304, "iter_time": 0.9795204010009765, "loss": 1.0154346227645874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.18866399933405, "step_time": 0.7068186988830566} +{"epoch": 0, "iter": 1087, "iter_tflops": 46.778692614198036, "iter_time": 0.44103612899780276, "loss": 1.2246640920639038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.56394092725461, "step_time": 0.4080198879241943} +{"epoch": 0, "iter": 1088, "iter_tflops": 42.55042322341335, "iter_time": 0.48486223983764654, "loss": 0.9787795543670654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.554181452185546, "step_time": 0.4528913230895996} +{"epoch": 0, "iter": 1089, "iter_tflops": 29.002420836741912, "iter_time": 0.7113576354980468, "loss": 0.17340527474880219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.709533227174322, "step_time": 0.6718139724731446} +{"epoch": 0, "iter": 1090, "iter_tflops": 11.327319619173716, "iter_time": 1.8213570556640624, "loss": 0.1977260708808899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.550749474888386, "step_time": 1.522505714416504} +{"epoch": 0, "iter": 1091, "iter_tflops": 11.612559289228692, "iter_time": 1.7766190032958986, "loss": 0.147931769490242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.34158588749628, "step_time": 1.4385503578186036} +{"epoch": 0, "iter": 1092, "iter_tflops": 23.426456626746415, "iter_time": 0.880674949645996, "loss": 0.16398011147975922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.858801674237288, "step_time": 0.7148977890014648} +{"epoch": 0, "iter": 1093, "iter_tflops": 19.435109782776518, "iter_time": 0.8386514511108398, "loss": 0.3478315472602844, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 20.4677443871319, "step_time": 0.796339973449707} +{"epoch": 0, "iter": 1094, "iter_tflops": 11.114268622263786, "iter_time": 1.4665187225341798, "loss": 0.22683864831924438, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 13.80086835471025, "step_time": 1.1810331497192381} +{"epoch": 0, "iter": 1095, "iter_tflops": 24.65131339174737, "iter_time": 0.6611932907104492, "loss": 0.3065784275531769, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.6024388076467, "step_time": 0.6126988258361817} +{"epoch": 0, "iter": 1096, "iter_tflops": 25.879006378120458, "iter_time": 0.6298264617919922, "loss": 0.3178616762161255, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.735564783664273, "step_time": 0.5876672477722169} +{"epoch": 0, "iter": 1097, "iter_tflops": 15.848910663705686, "iter_time": 0.90456396484375, "loss": 0.0026840928476303816, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 16.78630982987518, "step_time": 0.8540503311157226} +{"epoch": 0, "iter": 1098, "iter_tflops": 12.508416274998236, "iter_time": 1.1461365814208984, "loss": 0.004457760136574507, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 14.218647250385954, "step_time": 1.008278299331665} +{"epoch": 0, "iter": 1099, "iter_tflops": 38.55399708744688, "iter_time": 0.37185128784179683, "loss": 0.0022140603978186846, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 42.190826679461345, "step_time": 0.3397978801727295} +{"epoch": 0, "iter": 1100, "iter_tflops": 44.39718773704145, "iter_time": 0.3229112968444824, "loss": 0.01168442890048027, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 48.47814960064225, "step_time": 0.2957281494140625} +{"epoch": 0, "iter": 1101, "iter_tflops": 42.49451765772391, "iter_time": 0.4855001220703125, "loss": 0.6292281150817871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98166785092716, "step_time": 0.44868084335327146} +{"epoch": 0, "iter": 1102, "iter_tflops": 38.379739510556966, "iter_time": 0.5375516815185547, "loss": 0.6613233685493469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70822818735813, "step_time": 0.4830706958770752} +{"epoch": 0, "iter": 1103, "iter_tflops": 43.1010743374128, "iter_time": 0.4786677322387695, "loss": 0.5523889064788818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.064488285315015, "step_time": 0.4383579692840576} +{"epoch": 0, "iter": 1104, "iter_tflops": 41.95036022465513, "iter_time": 0.4917977676391601, "loss": 0.6696305274963379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.8052553527278, "step_time": 0.45040887451171874} +{"epoch": 0, "iter": 1105, "iter_tflops": 27.031223925299713, "iter_time": 0.7632319412231445, "loss": 0.014447780326008797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.11982089175322, "step_time": 0.7084897117614747} +{"epoch": 0, "iter": 1106, "iter_tflops": 11.230578141741576, "iter_time": 1.8370464324951175, "loss": 0.03422342240810394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.922001030868575, "step_time": 1.596586585998535} +{"epoch": 0, "iter": 1107, "iter_tflops": 14.74241238857822, "iter_time": 1.3994380950927734, "loss": 0.004809677600860596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.283016999605643, "step_time": 1.1284293785095214} +{"epoch": 0, "iter": 1108, "iter_tflops": 34.18660989024043, "iter_time": 0.6034846267700196, "loss": 0.004249356687068939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.853638548660335, "step_time": 0.45996476936340336} +{"epoch": 0, "iter": 1109, "iter_tflops": 19.33144042506515, "iter_time": 0.7373894729614259, "loss": 0.41785964369773865, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 20.418612435196742, "step_time": 0.6981277847290039} +{"epoch": 0, "iter": 1110, "iter_tflops": 9.25886828747552, "iter_time": 1.5395834808349609, "loss": 0.33614662289619446, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 11.769878288408357, "step_time": 1.2111255798339844} +{"epoch": 0, "iter": 1111, "iter_tflops": 23.171201062843053, "iter_time": 0.6151947250366211, "loss": 0.3721845746040344, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.834360080195726, "step_time": 0.5739950866699219} +{"epoch": 0, "iter": 1112, "iter_tflops": 26.509705448907425, "iter_time": 0.5377200698852538, "loss": 0.26402539014816284, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 28.092416136794647, "step_time": 0.5074252281188966} +{"epoch": 0, "iter": 1113, "iter_tflops": 32.794713653966554, "iter_time": 0.62909814453125, "loss": 0.8797511458396912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.990961837803226, "step_time": 0.5896120719909669} +{"epoch": 0, "iter": 1114, "iter_tflops": 15.194397698974951, "iter_time": 1.3578092346191406, "loss": 1.0746859312057495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.412037389763814, "step_time": 1.120522029876709} +{"epoch": 0, "iter": 1115, "iter_tflops": 47.46670773702999, "iter_time": 0.4346434478759766, "loss": 1.119901418685913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.35152161921207, "step_time": 0.40176206779479984} +{"epoch": 0, "iter": 1116, "iter_tflops": 47.57486555807541, "iter_time": 0.43365531921386724, "loss": 1.1064162254333496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.255076757656234, "step_time": 0.4025180492401123} +{"epoch": 0, "iter": 1117, "iter_tflops": 43.173979875793094, "iter_time": 0.47785943222045896, "loss": 0.1994045227766037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93708862673936, "step_time": 0.4395477886199951} +{"epoch": 0, "iter": 1118, "iter_tflops": 38.82137872613076, "iter_time": 0.5314363937377931, "loss": 0.2019626498222351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.794141507127556, "step_time": 0.48210088539123536} +{"epoch": 0, "iter": 1119, "iter_tflops": 48.73674460303265, "iter_time": 0.42331702041625974, "loss": 0.1971626728773117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.368895678933164, "step_time": 0.3865752372741699} +{"epoch": 0, "iter": 1120, "iter_tflops": 44.126682433984755, "iter_time": 0.46754236602783206, "loss": 0.2601584494113922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41845407384718, "step_time": 0.4260997982025147} +{"epoch": 0, "iter": 1121, "iter_tflops": 26.34814122298746, "iter_time": 0.7830189361572266, "loss": 0.23197932541370392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.14544812432547, "step_time": 0.7330170555114746} +{"epoch": 0, "iter": 1122, "iter_tflops": 24.19843918632794, "iter_time": 0.8525795135498048, "loss": 0.31999003887176514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.162959476983957, "step_time": 0.7595304012298584} +{"epoch": 0, "iter": 1123, "iter_tflops": 49.35557950356048, "iter_time": 0.41800934600830075, "loss": 0.309353232383728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33078413526724, "step_time": 0.38685149383544926} +{"epoch": 0, "iter": 1124, "iter_tflops": 54.82768882679495, "iter_time": 0.376289680480957, "loss": 0.22372189164161682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.76457935750671, "step_time": 0.3452060356140137} +{"epoch": 0, "iter": 1125, "iter_tflops": 31.90283307740318, "iter_time": 0.6466853103637695, "loss": 0.15505006909370422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83650085597752, "step_time": 0.6097289314270019} +{"epoch": 0, "iter": 1126, "iter_tflops": 12.80879844871815, "iter_time": 1.6106970214843752, "loss": 0.3022134006023407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.27986573018419, "step_time": 1.2672766380310059} +{"epoch": 0, "iter": 1127, "iter_tflops": 48.61312089043073, "iter_time": 0.4243935203552246, "loss": 0.12140331417322159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.146407747772585, "step_time": 0.38819356536865235} +{"epoch": 0, "iter": 1128, "iter_tflops": 53.179817256071956, "iter_time": 0.38794968795776363, "loss": 0.18570086359977722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.609809841456475, "step_time": 0.3581177158355713} +{"epoch": 0, "iter": 1129, "iter_tflops": 23.28915146694394, "iter_time": 0.8858671188354493, "loss": 0.10016036778688431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.382609660407283, "step_time": 0.8461396789550782} +{"epoch": 0, "iter": 1130, "iter_tflops": 16.81565696869949, "iter_time": 1.2268978576660157, "loss": 0.07397003471851349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.893246335414226, "step_time": 0.9423496723175049} +{"epoch": 0, "iter": 1131, "iter_tflops": 47.99145348924237, "iter_time": 0.42989099121093755, "loss": 0.09515731036663055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50763594789567, "step_time": 0.39291606140136714} +{"epoch": 0, "iter": 1132, "iter_tflops": 48.396768802553105, "iter_time": 0.42629072189331046, "loss": 0.1986175775527954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.13567071229453, "step_time": 0.3882720069885255} +{"epoch": 0, "iter": 1133, "iter_tflops": 28.724874895443936, "iter_time": 0.7182309265136718, "loss": 0.302772581577301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.78594657062541, "step_time": 0.6701464729309082} +{"epoch": 0, "iter": 1134, "iter_tflops": 10.827146440948628, "iter_time": 1.905496856689453, "loss": 0.36383381485939026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.236714462322645, "step_time": 1.449147102355957} +{"epoch": 0, "iter": 1135, "iter_tflops": 15.078396809054228, "iter_time": 1.3682551116943358, "loss": 0.450668066740036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.31012887054978, "step_time": 1.1918509483337403} +{"epoch": 0, "iter": 1136, "iter_tflops": 29.93074680182169, "iter_time": 0.6892943115234375, "loss": 0.3954935073852539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.46517822694263, "step_time": 0.5363576736450195} +{"epoch": 0, "iter": 1137, "iter_tflops": 11.824063113236367, "iter_time": 1.1573398590087889, "loss": 0.2552267014980316, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 12.679185167052834, "step_time": 1.0792854080200196} +{"epoch": 0, "iter": 1138, "iter_tflops": 17.68271390813677, "iter_time": 0.7738890991210937, "loss": 0.21731850504875183, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 20.96183832159072, "step_time": 0.652827262878418} +{"epoch": 0, "iter": 1139, "iter_tflops": 21.668867525723556, "iter_time": 0.6315262908935548, "loss": 0.42463192343711853, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 23.17970007894041, "step_time": 0.5903639602661133} +{"epoch": 0, "iter": 1140, "iter_tflops": 21.271343502582827, "iter_time": 0.6433284072875977, "loss": 0.2812664210796356, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 22.86165002508165, "step_time": 0.5985770721435547} +{"epoch": 0, "iter": 1141, "iter_tflops": 20.047745593985, "iter_time": 1.0290979309082031, "loss": 0.3148926794528961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.392659083843323, "step_time": 0.9644006118774415} +{"epoch": 0, "iter": 1142, "iter_tflops": 22.348576092560744, "iter_time": 0.9231502456665039, "loss": 0.2943946123123169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.56984074100317, "step_time": 0.8068526401519777} +{"epoch": 0, "iter": 1143, "iter_tflops": 49.36915506311488, "iter_time": 0.41789440155029295, "loss": 0.26443570852279663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.7420576150627, "step_time": 0.3838910236358642} +{"epoch": 0, "iter": 1144, "iter_tflops": 52.40748936909592, "iter_time": 0.3936668930053711, "loss": 0.2872944474220276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.82895987180096, "step_time": 0.36303837966918945} +{"epoch": 0, "iter": 1145, "iter_tflops": 35.60435247885755, "iter_time": 0.5794542541503905, "loss": 0.30122801661491394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.0536306100864, "step_time": 0.5421583480834962} +{"epoch": 0, "iter": 1146, "iter_tflops": 12.665296724313222, "iter_time": 1.6289467163085936, "loss": 0.2863382399082184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.7856968323681, "step_time": 1.3953413047790526} +{"epoch": 0, "iter": 1147, "iter_tflops": 47.3996243425381, "iter_time": 0.4352585868835449, "loss": 0.27220645546913147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48108253226096, "step_time": 0.40075096511840824} +{"epoch": 0, "iter": 1148, "iter_tflops": 52.36240431071424, "iter_time": 0.39400584793090826, "loss": 0.23279398679733276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.79761768479076, "step_time": 0.3632387123107911} +{"epoch": 0, "iter": 1149, "iter_tflops": 46.86874516189147, "iter_time": 0.4401887321472168, "loss": 0.3702218234539032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83123006983302, "step_time": 0.4058743705749512} +{"epoch": 0, "iter": 1150, "iter_tflops": 10.422333931923026, "iter_time": 1.979508010864258, "loss": 0.43412673473358154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.300955793550305, "step_time": 1.825606071472168} +{"epoch": 0, "iter": 1151, "iter_tflops": 12.154732315468978, "iter_time": 1.6973712768554687, "loss": 0.4650871753692627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.21989091691825, "step_time": 1.4508615875244142} +{"epoch": 0, "iter": 1152, "iter_tflops": 26.8635593911747, "iter_time": 0.7679955291748047, "loss": 0.3657532036304474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.78351563704998, "step_time": 0.6106852149963379} +{"epoch": 0, "iter": 1153, "iter_tflops": 10.790448642525194, "iter_time": 1.4346089782714844, "loss": 0.3637051284313202, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 11.411466651125684, "step_time": 1.3565368041992185} +{"epoch": 0, "iter": 1154, "iter_tflops": 12.640969247553036, "iter_time": 1.2245955352783202, "loss": 0.261807382106781, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.300008601447534, "step_time": 1.0825220413208008} +{"epoch": 0, "iter": 1155, "iter_tflops": 26.60882320133647, "iter_time": 0.5817647171020507, "loss": 0.2906283736228943, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.282504725130394, "step_time": 0.5473374671936035} +{"epoch": 0, "iter": 1156, "iter_tflops": 30.21179577883622, "iter_time": 0.5123851165771484, "loss": 0.23976702988147736, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 31.98325797677175, "step_time": 0.48400555419921876} +{"epoch": 0, "iter": 1157, "iter_tflops": 33.56024121896246, "iter_time": 0.6147480697631837, "loss": 0.936271071434021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.93013302100748, "step_time": 0.5742003097534181} +{"epoch": 0, "iter": 1158, "iter_tflops": 16.54879598333372, "iter_time": 1.2466824493408204, "loss": 1.0723001956939697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.71100021866742, "step_time": 0.9961418228149412} +{"epoch": 0, "iter": 1159, "iter_tflops": 49.31454729356565, "iter_time": 0.4183571510314942, "loss": 1.0584441423416138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.383069495147176, "step_time": 0.38647259712219234} +{"epoch": 0, "iter": 1160, "iter_tflops": 45.959707659392635, "iter_time": 0.4488952293395996, "loss": 0.9939104318618774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.451845044469465, "step_time": 0.41719562721252446} +{"epoch": 0, "iter": 1161, "iter_tflops": 30.68994458329122, "iter_time": 0.6722427749633789, "loss": 0.968221127986908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.500951817709726, "step_time": 0.6347842864990234} +{"epoch": 0, "iter": 1162, "iter_tflops": 13.915339373829369, "iter_time": 1.4826151885986327, "loss": 1.0581369400024414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.23094463146286, "step_time": 1.1973280601501466} +{"epoch": 0, "iter": 1163, "iter_tflops": 47.79149765914219, "iter_time": 0.4316896209716797, "loss": 1.0030186176300049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77018790454217, "step_time": 0.39851301193237304} +{"epoch": 0, "iter": 1164, "iter_tflops": 47.43329686869847, "iter_time": 0.43494960021972656, "loss": 1.1450070142745972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.120746209857934, "step_time": 0.40357575035095217} +{"epoch": 0, "iter": 1165, "iter_tflops": 34.360874832973515, "iter_time": 0.6004239883422852, "loss": 1.0731769800186157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.71434588157024, "step_time": 0.5619354782104492} +{"epoch": 0, "iter": 1166, "iter_tflops": 10.171978994055713, "iter_time": 2.0282280883789063, "loss": 1.2321470975875854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.013138483037437, "step_time": 1.585404899597168} +{"epoch": 0, "iter": 1167, "iter_tflops": 15.603919685577285, "iter_time": 1.3221737823486328, "loss": 0.8455244898796082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.316531967311583, "step_time": 1.1914102401733397} +{"epoch": 0, "iter": 1168, "iter_tflops": 26.7732350029287, "iter_time": 0.7705865020751953, "loss": 0.9630340933799744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.285387937190826, "step_time": 0.6198243370056152} +{"epoch": 0, "iter": 1169, "iter_tflops": 21.24521159806754, "iter_time": 0.7267126312255859, "loss": 0.3372006118297577, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 22.519090688545, "step_time": 0.6856033325195313} +{"epoch": 0, "iter": 1170, "iter_tflops": 10.134770282851749, "iter_time": 1.5233856506347656, "loss": 0.32793766260147095, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.719246674342148, "step_time": 1.2138426132202147} +{"epoch": 0, "iter": 1171, "iter_tflops": 24.91296406948375, "iter_time": 0.6197240753173828, "loss": 0.36460503935813904, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 26.759537109312085, "step_time": 0.5769592933654785} +{"epoch": 0, "iter": 1172, "iter_tflops": 25.059327110136312, "iter_time": 0.616104476928711, "loss": 0.35331788659095764, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 26.686696141880955, "step_time": 0.5785340957641602} +{"epoch": 0, "iter": 1173, "iter_tflops": 19.45926683024249, "iter_time": 1.060219467163086, "loss": 0.8966606855392456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.64984398187975, "step_time": 0.9990919799804687} +{"epoch": 0, "iter": 1174, "iter_tflops": 19.35474652822667, "iter_time": 1.0659449081420898, "loss": 0.901648223400116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.635655697209547, "step_time": 0.774566759109497} +{"epoch": 0, "iter": 1175, "iter_tflops": 46.20610566168763, "iter_time": 0.44650145721435547, "loss": 1.0427653789520264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.40646771386084, "step_time": 0.417578800201416} +{"epoch": 0, "iter": 1176, "iter_tflops": 43.057636111632185, "iter_time": 0.4791506309509277, "loss": 0.8738406300544739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74020169148462, "step_time": 0.4510494651794434} +{"epoch": 0, "iter": 1177, "iter_tflops": 43.223122255142684, "iter_time": 0.47731613159179687, "loss": 0.43006631731987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28452776783728, "step_time": 0.43631806182861327} +{"epoch": 0, "iter": 1178, "iter_tflops": 49.8932512869055, "iter_time": 0.4135046920776366, "loss": 0.42812392115592957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.94668426682589, "step_time": 0.37547476768493654} +{"epoch": 0, "iter": 1179, "iter_tflops": 51.690236309564206, "iter_time": 0.3991294097900391, "loss": 0.40756756067276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.06412826159172, "step_time": 0.36799098014831544} +{"epoch": 0, "iter": 1180, "iter_tflops": 52.09684556143527, "iter_time": 0.39601425552368164, "loss": 0.32430049777030945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.444538371806416, "step_time": 0.3655108909606934} +{"epoch": 0, "iter": 1181, "iter_tflops": 23.176558627201416, "iter_time": 0.6080174331665038, "loss": 0.06421729177236557, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 24.68382256468117, "step_time": 0.5708901710510255} +{"epoch": 0, "iter": 1182, "iter_tflops": 6.761345366664602, "iter_time": 2.0841638641357423, "loss": 0.10764434933662415, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 8.041027749097719, "step_time": 1.752481414794922} +{"epoch": 0, "iter": 1183, "iter_tflops": 10.616629704997214, "iter_time": 1.3273281707763673, "loss": 0.03431660681962967, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 11.708622287848739, "step_time": 1.2035362777709961} +{"epoch": 0, "iter": 1184, "iter_tflops": 9.126423482123503, "iter_time": 1.5440606842041016, "loss": 0.05178314074873924, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 11.654597287158996, "step_time": 1.2091152820587159} +{"epoch": 0, "iter": 1185, "iter_tflops": 11.213651151444317, "iter_time": 1.4425525817871094, "loss": 0.35750263929367065, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 12.080615448615335, "step_time": 1.3390279235839841} +{"epoch": 0, "iter": 1186, "iter_tflops": 12.74436428670499, "iter_time": 1.269289001464844, "loss": 0.42734959721565247, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 17.713718664591457, "step_time": 0.9132064094543457} +{"epoch": 0, "iter": 1187, "iter_tflops": 28.038357768234278, "iter_time": 0.576933982849121, "loss": 0.3322937488555908, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 29.686964598715008, "step_time": 0.5448950958251952} +{"epoch": 0, "iter": 1188, "iter_tflops": 30.628722745324563, "iter_time": 0.5281409072875977, "loss": 0.32349252700805664, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 32.42908585280042, "step_time": 0.4988201484680176} +{"epoch": 0, "iter": 1189, "iter_tflops": 28.26878487268859, "iter_time": 0.7298189010620117, "loss": 0.4826151430606842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.049509090083095, "step_time": 0.686570068359375} +{"epoch": 0, "iter": 1190, "iter_tflops": 16.276654729798924, "iter_time": 1.267526641845703, "loss": 0.4358857274055481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.23126682060849, "step_time": 1.07278910446167} +{"epoch": 0, "iter": 1191, "iter_tflops": 42.357500297912004, "iter_time": 0.48707061004638674, "loss": 0.4344463646411896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73513553122591, "step_time": 0.44144717407226564} +{"epoch": 0, "iter": 1192, "iter_tflops": 45.21535367725259, "iter_time": 0.4562851295471192, "loss": 0.4759819805622101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.208152154280185, "step_time": 0.41926169967651367} +{"epoch": 0, "iter": 1193, "iter_tflops": 19.20200912068605, "iter_time": 1.0744236907958984, "loss": 0.9115219116210938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.52958988736518, "step_time": 1.0049442596435547} +{"epoch": 0, "iter": 1194, "iter_tflops": 16.938979582596392, "iter_time": 1.2179655456542968, "loss": 0.7959582209587097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.467060310622493, "step_time": 0.9182818412780762} +{"epoch": 0, "iter": 1195, "iter_tflops": 49.225220448085146, "iter_time": 0.419116325378418, "loss": 1.078449010848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.29439802738727, "step_time": 0.3871156120300293} +{"epoch": 0, "iter": 1196, "iter_tflops": 46.85004295755792, "iter_time": 0.4403644523620606, "loss": 0.8474735617637634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.11706254874993, "step_time": 0.4116580753326416} +{"epoch": 0, "iter": 1197, "iter_tflops": 47.631430325900524, "iter_time": 0.4331403312683106, "loss": 0.3848714828491211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.04303576699081, "step_time": 0.3964237136840821} +{"epoch": 0, "iter": 1198, "iter_tflops": 29.353542646491395, "iter_time": 0.702848503112793, "loss": 0.2999776005744934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.995245063780736, "step_time": 0.5576687889099121} +{"epoch": 0, "iter": 1199, "iter_tflops": 41.29171390985733, "iter_time": 0.49964245986938477, "loss": 0.29633164405822754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2786056046257, "step_time": 0.4556477222442627} +{"epoch": 0, "iter": 1200, "iter_tflops": 44.22166434904824, "iter_time": 0.4665381507873536, "loss": 0.23153260350227356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08851909188624, "step_time": 0.429023265838623} +{"epoch": 0, "iter": 1201, "iter_tflops": 15.700172909847547, "iter_time": 1.314067916870117, "loss": 0.23178745806217194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.490948190290176, "step_time": 1.2510556259155274} +{"epoch": 0, "iter": 1202, "iter_tflops": 25.81470433980998, "iter_time": 0.7991992950439453, "loss": 0.15806227922439575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.01010295762766, "step_time": 0.6445181865692139} +{"epoch": 0, "iter": 1203, "iter_tflops": 53.8387335380604, "iter_time": 0.38320168685913086, "loss": 0.21540933847427368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.42680472620904, "step_time": 0.35311007690429685} +{"epoch": 0, "iter": 1204, "iter_tflops": 51.08373264123463, "iter_time": 0.40386816787719726, "loss": 0.141493022441864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.71606510624472, "step_time": 0.37028985214233395} +{"epoch": 0, "iter": 1205, "iter_tflops": 28.987304943973857, "iter_time": 0.7117285842895508, "loss": 0.7970738410949707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.75934354997294, "step_time": 0.6707260665893554} +{"epoch": 0, "iter": 1206, "iter_tflops": 19.041949604899884, "iter_time": 1.0834548950195313, "loss": 0.9650155305862427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.870860049000612, "step_time": 0.9020689849853515} +{"epoch": 0, "iter": 1207, "iter_tflops": 36.292068629342104, "iter_time": 0.568473892211914, "loss": 0.8567197322845459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.659948490801405, "step_time": 0.5201997051239013} +{"epoch": 0, "iter": 1208, "iter_tflops": 37.88284222938701, "iter_time": 0.5446025772094726, "loss": 0.9281084537506104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.31918200111557, "step_time": 0.4993103084564209} +{"epoch": 0, "iter": 1209, "iter_tflops": 18.325236267227986, "iter_time": 1.125829605102539, "loss": 0.6464203596115112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.639421400251532, "step_time": 1.0504939575195313} +{"epoch": 0, "iter": 1210, "iter_tflops": 15.395130752165448, "iter_time": 1.3401051177978518, "loss": 0.4262438714504242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.946414186054525, "step_time": 1.0343259353637695} +{"epoch": 0, "iter": 1211, "iter_tflops": 38.93775861526328, "iter_time": 0.5298479995727539, "loss": 0.5393080711364746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.676618245210776, "step_time": 0.48342849922180176} +{"epoch": 0, "iter": 1212, "iter_tflops": 39.62852269875616, "iter_time": 0.5206122283935547, "loss": 0.5712288022041321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41146907472086, "step_time": 0.4752452278137207} +{"epoch": 0, "iter": 1213, "iter_tflops": 18.755079065369376, "iter_time": 1.1000270080566406, "loss": 0.8789495825767517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.047274158925312, "step_time": 1.0291221313476562} +{"epoch": 0, "iter": 1214, "iter_tflops": 19.389086375774465, "iter_time": 1.0640570220947265, "loss": 0.8125579953193665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.683394046706244, "step_time": 0.8711206455230713} +{"epoch": 0, "iter": 1215, "iter_tflops": 44.86539154166101, "iter_time": 0.4598442764282226, "loss": 0.7258238792419434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.70961501522301, "step_time": 0.4235527935028076} +{"epoch": 0, "iter": 1216, "iter_tflops": 49.03283759029849, "iter_time": 0.4207607498168946, "loss": 0.7825930714607239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.052356926721465, "step_time": 0.38888175201416014} +{"epoch": 0, "iter": 1217, "iter_tflops": 28.489128627711114, "iter_time": 0.7241742553710938, "loss": 0.3000134825706482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.022129235878793, "step_time": 0.6871962127685547} +{"epoch": 0, "iter": 1218, "iter_tflops": 11.614196693164931, "iter_time": 1.7763685302734376, "loss": 0.2915618419647217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.792555099666611, "step_time": 1.306380973815918} +{"epoch": 0, "iter": 1219, "iter_tflops": 39.49601959369007, "iter_time": 0.5223588027954101, "loss": 0.20090270042419434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.20011848920742, "step_time": 0.4775702991485596} +{"epoch": 0, "iter": 1220, "iter_tflops": 45.5831329070627, "iter_time": 0.45260367584228517, "loss": 0.23160889744758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50243818035919, "step_time": 0.41676923942565913} +{"epoch": 0, "iter": 1221, "iter_tflops": 34.82741861212787, "iter_time": 0.5923807830810547, "loss": 0.956519365310669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49891198638124, "step_time": 0.5358877029418945} +{"epoch": 0, "iter": 1222, "iter_tflops": 42.746391689684835, "iter_time": 0.4826394157409668, "loss": 0.8211737871170044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8450877356068, "step_time": 0.44041103363037104} +{"epoch": 0, "iter": 1223, "iter_tflops": 49.519860672064205, "iter_time": 0.41662260818481445, "loss": 0.8090185523033142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.83238993034047, "step_time": 0.38324684333801273} +{"epoch": 0, "iter": 1224, "iter_tflops": 50.00762670605243, "iter_time": 0.4125589408874512, "loss": 0.9354656934738159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17137831797816, "step_time": 0.3808485984802246} +{"epoch": 0, "iter": 1225, "iter_tflops": 40.94064372979467, "iter_time": 0.503926944732666, "loss": 1.0850865840911865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5219735449394, "step_time": 0.46339126205444336} +{"epoch": 0, "iter": 1226, "iter_tflops": 43.3238023393752, "iter_time": 0.4762068977355957, "loss": 0.9176682829856873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26849804266929, "step_time": 0.4364660263061524} +{"epoch": 0, "iter": 1227, "iter_tflops": 47.02432751526515, "iter_time": 0.43873234558105473, "loss": 1.0611532926559448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85339793407492, "step_time": 0.40569744300842286} +{"epoch": 0, "iter": 1228, "iter_tflops": 48.699240883206805, "iter_time": 0.42364302062988274, "loss": 0.9679526090621948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37082303828311, "step_time": 0.3939425106048584} +{"epoch": 0, "iter": 1229, "iter_tflops": 25.562866086325307, "iter_time": 0.807072784423828, "loss": 1.080630898475647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.88972233855893, "step_time": 0.7672482910156249} +{"epoch": 0, "iter": 1230, "iter_tflops": 16.913133742223515, "iter_time": 1.2198267822265625, "loss": 0.9840913414955139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.624991500773806, "step_time": 1.1077102241516112} +{"epoch": 0, "iter": 1231, "iter_tflops": 40.15681099383812, "iter_time": 0.5137632446289063, "loss": 0.8396658301353455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8732387885643, "step_time": 0.4812114524841309} +{"epoch": 0, "iter": 1232, "iter_tflops": 43.69711394771142, "iter_time": 0.47213858413696297, "loss": 0.9444419741630554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00442223796285, "step_time": 0.4389181385040284} +{"epoch": 0, "iter": 1233, "iter_tflops": 40.27917593197711, "iter_time": 0.5122024726867676, "loss": 1.1028989553451538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.473538094729435, "step_time": 0.47456669998168943} +{"epoch": 0, "iter": 1234, "iter_tflops": 30.153193190336587, "iter_time": 0.684209243774414, "loss": 1.0213152170181274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.11499178467098, "step_time": 0.6230136985778809} +{"epoch": 0, "iter": 1235, "iter_tflops": 44.0406116041862, "iter_time": 0.4684561080932617, "loss": 1.0290793180465698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.381966424603156, "step_time": 0.43542079544067386} +{"epoch": 0, "iter": 1236, "iter_tflops": 44.33789653853327, "iter_time": 0.46531511688232424, "loss": 1.098140835762024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50643243419897, "step_time": 0.4342800006866455} +{"epoch": 0, "iter": 1237, "iter_tflops": 35.94540065925094, "iter_time": 0.5739564208984376, "loss": 0.45437324047088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.486286664332034, "step_time": 0.5360634994506837} +{"epoch": 0, "iter": 1238, "iter_tflops": 18.01639251973429, "iter_time": 1.1451289978027346, "loss": 0.514220654964447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.149614291579383, "step_time": 0.9754832038879395} +{"epoch": 0, "iter": 1239, "iter_tflops": 46.99653357576831, "iter_time": 0.43899181365966794, "loss": 0.4593493640422821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.316447504051, "step_time": 0.4020366668701172} +{"epoch": 0, "iter": 1240, "iter_tflops": 50.416183423775486, "iter_time": 0.40921569442749023, "loss": 0.5661141872406006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.54592008431843, "step_time": 0.37823348617553715} +{"epoch": 0, "iter": 1241, "iter_tflops": 29.3077097021218, "iter_time": 0.703947654724121, "loss": 0.0233455877751112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.950700608362993, "step_time": 0.6665792083740234} +{"epoch": 0, "iter": 1242, "iter_tflops": 14.906853884743992, "iter_time": 1.384000518798828, "loss": 0.04253333806991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.32659031528476, "step_time": 1.2636498565673828} +{"epoch": 0, "iter": 1243, "iter_tflops": 44.92243271566584, "iter_time": 0.4592603797912598, "loss": 0.05512991547584534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.58970190610577, "step_time": 0.40781211853027344} +{"epoch": 0, "iter": 1244, "iter_tflops": 56.96213401170458, "iter_time": 0.36218961715698245, "loss": 0.07753461599349976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.06457510217724, "step_time": 0.3324133529663086} +{"epoch": 0, "iter": 1245, "iter_tflops": 3.744802568279924, "iter_time": 0.4394949684143066, "loss": 0.8209608793258667, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.092304088867953, "step_time": 0.40217487525939943} +{"epoch": 0, "iter": 1246, "iter_tflops": 3.8265289126533837, "iter_time": 0.43010831069946287, "loss": 0.8463615775108337, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.206462386698683, "step_time": 0.39126033592224124} +{"epoch": 0, "iter": 1247, "iter_tflops": 3.8664132464302443, "iter_time": 0.42567148971557617, "loss": 0.9312488436698914, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.173546939399275, "step_time": 0.3943460826873779} +{"epoch": 0, "iter": 1248, "iter_tflops": 4.165664811272803, "iter_time": 0.3950922508239746, "loss": 0.957212507724762, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.488753400986575, "step_time": 0.366654556274414} +{"epoch": 0, "iter": 1249, "iter_tflops": 30.98069859139961, "iter_time": 0.6031387939453124, "loss": 0.44022226333618164, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 33.1809725056502, "step_time": 0.5631438674926759} +{"epoch": 0, "iter": 1250, "iter_tflops": 20.215380588470037, "iter_time": 0.9243289337158203, "loss": 0.6051777601242065, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 25.111391471638868, "step_time": 0.7441109428405761} +{"epoch": 0, "iter": 1251, "iter_tflops": 36.84039702229022, "iter_time": 0.5072057495117187, "loss": 0.40689966082572937, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 39.93461772139098, "step_time": 0.4679063491821289} +{"epoch": 0, "iter": 1252, "iter_tflops": 37.01523374092022, "iter_time": 0.5048100280761718, "loss": 0.4946981370449066, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 40.396717420772845, "step_time": 0.46255394935607913} +{"epoch": 0, "iter": 1253, "iter_tflops": 30.02308887148373, "iter_time": 0.6871742477416991, "loss": 1.168241024017334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.385065152245886, "step_time": 0.6370557975769042} +{"epoch": 0, "iter": 1254, "iter_tflops": 10.914405298387397, "iter_time": 1.8902627258300781, "loss": 1.0636627674102783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.085402264420802, "step_time": 1.576649543762207} +{"epoch": 0, "iter": 1255, "iter_tflops": 29.591422989050276, "iter_time": 0.6971984252929688, "loss": 1.0449674129486084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.29010499160856, "step_time": 0.5685046520233155} +{"epoch": 0, "iter": 1256, "iter_tflops": 39.716551040294405, "iter_time": 0.5194583358764648, "loss": 0.730512261390686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80548253438313, "step_time": 0.4819731559753418} +{"epoch": 0, "iter": 1257, "iter_tflops": 14.94179865027584, "iter_time": 1.0196014785766603, "loss": 0.4682251513004303, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 16.05459501836756, "step_time": 0.9489295730590821} +{"epoch": 0, "iter": 1258, "iter_tflops": 26.319681622719255, "iter_time": 0.5788322296142578, "loss": 0.3150578439235687, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.151256352113133, "step_time": 0.5411722946166992} +{"epoch": 0, "iter": 1259, "iter_tflops": 27.348431038996427, "iter_time": 0.5570586471557618, "loss": 0.4634782671928406, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.158492736598657, "step_time": 0.5224783096313477} +{"epoch": 0, "iter": 1260, "iter_tflops": 28.838358438342844, "iter_time": 0.528278335571289, "loss": 0.4238317608833313, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 30.547104882439438, "step_time": 0.4987274589538575} +{"epoch": 0, "iter": 1261, "iter_tflops": 33.80096704293613, "iter_time": 0.6103699188232422, "loss": 1.2094602584838867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.02452212857389, "step_time": 0.5726958274841308} +{"epoch": 0, "iter": 1262, "iter_tflops": 23.547948080566872, "iter_time": 0.8761312637329102, "loss": 1.2199054956436157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.251937609127694, "step_time": 0.7302541084289551} +{"epoch": 0, "iter": 1263, "iter_tflops": 45.032009961108805, "iter_time": 0.4581428527832031, "loss": 1.2810442447662354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75050079116835, "step_time": 0.42319757080078124} +{"epoch": 0, "iter": 1264, "iter_tflops": 46.50166826228641, "iter_time": 0.44366351318359376, "loss": 1.1048201322555542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.8958810011297, "step_time": 0.41348289871215815} +{"epoch": 0, "iter": 1265, "iter_tflops": 29.581262190126928, "iter_time": 0.6974379043579102, "loss": 1.011013150215149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.30275882159988, "step_time": 0.659082275390625} +{"epoch": 0, "iter": 1266, "iter_tflops": 17.568368899450167, "iter_time": 1.174331756591797, "loss": 0.8615200519561768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.809571875123666, "step_time": 1.0414709434509277} +{"epoch": 0, "iter": 1267, "iter_tflops": 37.28326926077083, "iter_time": 0.5533606338500977, "loss": 0.9466931819915771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45314238199415, "step_time": 0.5099997749328613} +{"epoch": 0, "iter": 1268, "iter_tflops": 40.31242304853047, "iter_time": 0.5117800407409667, "loss": 0.953479528427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46120226345799, "step_time": 0.47470139884948725} +{"epoch": 0, "iter": 1269, "iter_tflops": 19.731193235004433, "iter_time": 1.0056861114501954, "loss": 0.1601809859275818, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 20.74438132516761, "step_time": 0.956566825866699} +{"epoch": 0, "iter": 1270, "iter_tflops": 13.261943132242653, "iter_time": 1.4962654266357425, "loss": 0.2860526740550995, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 17.798928859531195, "step_time": 1.1148641109466553} +{"epoch": 0, "iter": 1271, "iter_tflops": 48.972878763313865, "iter_time": 0.40519135284423824, "loss": 0.23028028011322021, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 53.35187044791451, "step_time": 0.37193423271179193} +{"epoch": 0, "iter": 1272, "iter_tflops": 52.67055586722449, "iter_time": 0.3767453498840332, "loss": 0.2483183890581131, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 57.33013952715023, "step_time": 0.3461248683929443} +{"epoch": 0, "iter": 1273, "iter_tflops": 26.25607292438646, "iter_time": 0.7857646331787109, "loss": 0.9066203236579895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.48548432889875, "step_time": 0.750617790222168} +{"epoch": 0, "iter": 1274, "iter_tflops": 13.419872911939423, "iter_time": 1.5373538665771485, "loss": 0.8946757912635803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.11214949846911, "step_time": 1.2804681034088137} +{"epoch": 0, "iter": 1275, "iter_tflops": 38.19681994767224, "iter_time": 0.5401259460449218, "loss": 1.1527284383773804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35272118598388, "step_time": 0.49890534210205073} +{"epoch": 0, "iter": 1276, "iter_tflops": 37.02648808461408, "iter_time": 0.557198226928711, "loss": 1.0044935941696167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25380692197121, "step_time": 0.5125252761840821} +{"epoch": 0, "iter": 1277, "iter_tflops": 17.554830005026826, "iter_time": 1.1752374420166014, "loss": 0.3171774446964264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.454409106388848, "step_time": 1.1179492874145507} +{"epoch": 0, "iter": 1278, "iter_tflops": 32.81938596009828, "iter_time": 0.6286252136230469, "loss": 0.2693622410297394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.804592419739976, "step_time": 0.5762136116027833} +{"epoch": 0, "iter": 1279, "iter_tflops": 48.07290113078011, "iter_time": 0.42916264724731445, "loss": 0.25576356053352356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.95722867474328, "step_time": 0.397078405380249} +{"epoch": 0, "iter": 1280, "iter_tflops": 47.58522521050665, "iter_time": 0.43356090927124025, "loss": 0.3123922049999237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57030691069157, "step_time": 0.4000576057434082} +{"epoch": 0, "iter": 1281, "iter_tflops": 25.027817468739343, "iter_time": 0.43822333908081057, "loss": 0.0166022926568985, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 27.345554809296015, "step_time": 0.40108068084716797} +{"epoch": 0, "iter": 1282, "iter_tflops": 6.16467571625828, "iter_time": 1.7791323089599609, "loss": 0.012563753873109818, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 6.8905849135537895, "step_time": 1.5917043151855468} +{"epoch": 0, "iter": 1283, "iter_tflops": 8.002896826848175, "iter_time": 1.3704754638671874, "loss": 0.006394213531166315, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 9.047700655302542, "step_time": 1.2122166900634765} +{"epoch": 0, "iter": 1284, "iter_tflops": 14.332051054697224, "iter_time": 0.7652619781494141, "loss": 0.016538463532924652, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 16.32374275892325, "step_time": 0.6718908710479736} +{"epoch": 0, "iter": 1285, "iter_tflops": 20.539778239735604, "iter_time": 0.7456974029541015, "loss": 0.30403125286102295, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 21.737256723557486, "step_time": 0.7046178588867187} +{"epoch": 0, "iter": 1286, "iter_tflops": 9.887034685194486, "iter_time": 1.5491459045410156, "loss": 0.3636585474014282, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 11.503687186716634, "step_time": 1.3314391326904298} +{"epoch": 0, "iter": 1287, "iter_tflops": 23.708530373274563, "iter_time": 0.6460315780639648, "loss": 0.4436681866645813, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.49227861416525, "step_time": 0.600827392578125} +{"epoch": 0, "iter": 1288, "iter_tflops": 25.418754267685664, "iter_time": 0.6025652999877931, "loss": 0.2118680775165558, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 27.197108075738093, "step_time": 0.5631649971008301} +{"epoch": 0, "iter": 1289, "iter_tflops": 16.79657442887262, "iter_time": 1.228291732788086, "loss": 0.39503613114356995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.909568006444918, "step_time": 1.1519593048095702} +{"epoch": 0, "iter": 1290, "iter_tflops": 18.315216839723536, "iter_time": 1.1264454956054688, "loss": 0.39879968762397766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.28280629092744, "step_time": 0.6812807674407959} +{"epoch": 0, "iter": 1291, "iter_tflops": 44.53135526300205, "iter_time": 0.4632936363220215, "loss": 0.4048130214214325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.45444521471931, "step_time": 0.4257832984924317} +{"epoch": 0, "iter": 1292, "iter_tflops": 47.45444587557796, "iter_time": 0.4347557563781739, "loss": 0.31471091508865356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.50645297958646, "step_time": 0.4005535678863525} +{"epoch": 0, "iter": 1293, "iter_tflops": 31.981586884326067, "iter_time": 0.6450928649902344, "loss": 0.09850706160068512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.064359653400814, "step_time": 0.6056504135131835} +{"epoch": 0, "iter": 1294, "iter_tflops": 19.61853680829197, "iter_time": 1.0516122436523436, "loss": 0.08018647134304047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.704781690833077, "step_time": 0.8703346767425536} +{"epoch": 0, "iter": 1295, "iter_tflops": 39.14242770136954, "iter_time": 0.5270775146484374, "loss": 0.10846132785081863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19401453881428, "step_time": 0.47763778686523434} +{"epoch": 0, "iter": 1296, "iter_tflops": 45.33119621850034, "iter_time": 0.45511910629272456, "loss": 0.07056525349617004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92695958553963, "step_time": 0.413225513458252} +{"epoch": 0, "iter": 1297, "iter_tflops": 17.661918240081608, "iter_time": 1.1681117095947264, "loss": 0.9791232347488403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.93343817082574, "step_time": 1.0896643981933594} +{"epoch": 0, "iter": 1298, "iter_tflops": 19.99560306343645, "iter_time": 1.031781509399414, "loss": 0.8174026608467102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.77592997402114, "step_time": 0.8677302436828613} +{"epoch": 0, "iter": 1299, "iter_tflops": 44.63995545645532, "iter_time": 0.4621665344238281, "loss": 0.8653156161308289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.07818192501558, "step_time": 0.42911550903320317} +{"epoch": 0, "iter": 1300, "iter_tflops": 49.31322891453364, "iter_time": 0.418368335723877, "loss": 0.8646315336227417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.433942816579616, "step_time": 0.38610464477539064} +{"epoch": 0, "iter": 1301, "iter_tflops": 33.267455849259576, "iter_time": 0.6201584396362305, "loss": 0.7188634872436523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.381583801356356, "step_time": 0.5831026000976562} +{"epoch": 0, "iter": 1302, "iter_tflops": 12.294499491661352, "iter_time": 1.6780751037597657, "loss": 0.5135241150856018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.203382318454306, "step_time": 1.2732584533691407} +{"epoch": 0, "iter": 1303, "iter_tflops": 40.24891074310616, "iter_time": 0.5125876235961915, "loss": 0.7838735580444336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95440863360859, "step_time": 0.4693748397827149} +{"epoch": 0, "iter": 1304, "iter_tflops": 43.243142401275605, "iter_time": 0.47709514999389646, "loss": 0.7768266797065735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97990370669268, "step_time": 0.43914720726013184} +{"epoch": 0, "iter": 1305, "iter_tflops": 33.23236263367924, "iter_time": 0.6208133239746093, "loss": 0.20387902855873108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.46333067169241, "step_time": 0.5658038673400878} +{"epoch": 0, "iter": 1306, "iter_tflops": 10.432247817336858, "iter_time": 1.9776268615722656, "loss": 0.1278875470161438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.327570313112377, "step_time": 1.6735733795166015} +{"epoch": 0, "iter": 1307, "iter_tflops": 11.565927540746141, "iter_time": 1.783782012939453, "loss": 0.2110070437192917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.379069038905795, "step_time": 1.341504707336426} +{"epoch": 0, "iter": 1308, "iter_tflops": 16.48156823695708, "iter_time": 1.251767623901367, "loss": 0.17325158417224884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.63540903518308, "step_time": 1.1070909938812257} +{"epoch": 0, "iter": 1309, "iter_tflops": 11.661250875496956, "iter_time": 1.225904052734375, "loss": 0.37328076362609863, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 12.375315771445763, "step_time": 1.1551684799194337} +{"epoch": 0, "iter": 1310, "iter_tflops": 11.108188128544771, "iter_time": 1.2869402770996092, "loss": 0.2811633050441742, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.003972176776859, "step_time": 1.0993236923217773} +{"epoch": 0, "iter": 1311, "iter_tflops": 27.285911422700423, "iter_time": 0.5239178009033204, "loss": 0.25314486026763916, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 28.988593400199427, "step_time": 0.4931448211669922} +{"epoch": 0, "iter": 1312, "iter_tflops": 26.423416082861248, "iter_time": 0.5410191726684571, "loss": 0.47727349400520325, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 28.014524948173094, "step_time": 0.5102915267944336} +{"epoch": 0, "iter": 1313, "iter_tflops": 34.53002192695757, "iter_time": 0.597482780456543, "loss": 0.33602574467658997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.91396942633284, "step_time": 0.5588966407775879} +{"epoch": 0, "iter": 1314, "iter_tflops": 9.459019915838676, "iter_time": 2.1811026611328126, "loss": 0.6305087804794312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.33287067659946, "step_time": 1.8204649200439453} +{"epoch": 0, "iter": 1315, "iter_tflops": 14.599134102753949, "iter_time": 1.413172409057617, "loss": 0.32124781608581543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.449334185721113, "step_time": 1.1823427352905274} +{"epoch": 0, "iter": 1316, "iter_tflops": 23.805449972807967, "iter_time": 0.8666542129516601, "loss": 0.35909000039100647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.885495575707004, "step_time": 0.6903380088806153} +{"epoch": 0, "iter": 1317, "iter_tflops": 15.839185973818342, "iter_time": 1.0419967346191406, "loss": 0.3443746566772461, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 16.907647423231538, "step_time": 0.9761488189697265} +{"epoch": 0, "iter": 1318, "iter_tflops": 14.614185589958275, "iter_time": 1.1293397064208985, "loss": 0.35827380418777466, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 17.259463141070313, "step_time": 0.9562510681152343} +{"epoch": 0, "iter": 1319, "iter_tflops": 30.497197526096606, "iter_time": 0.5411769409179688, "loss": 0.4028453528881073, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 32.417385954323265, "step_time": 0.5091212501525879} +{"epoch": 0, "iter": 1320, "iter_tflops": 28.21127367850995, "iter_time": 0.5850278244018554, "loss": 0.40148311853408813, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 29.958342089836474, "step_time": 0.5509109954833984} +{"epoch": 0, "iter": 1321, "iter_tflops": 37.445823076481496, "iter_time": 0.5509584732055663, "loss": 0.179616779088974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.491672251899956, "step_time": 0.5095144844055176} +{"epoch": 0, "iter": 1322, "iter_tflops": 11.535125811837494, "iter_time": 1.7885451660156249, "loss": 0.20544461905956268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.734614470265651, "step_time": 1.502123962402344} +{"epoch": 0, "iter": 1323, "iter_tflops": 16.089589577594612, "iter_time": 1.2822635040283201, "loss": 0.23280911147594452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.37305184753145, "step_time": 1.0126658325195312} +{"epoch": 0, "iter": 1324, "iter_tflops": 16.016740020856997, "iter_time": 1.288095672607422, "loss": 0.190301775932312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.169920815990107, "step_time": 1.1354531326293944} +{"epoch": 0, "iter": 1325, "iter_tflops": 19.849645208999526, "iter_time": 0.804617561340332, "loss": 0.21685560047626495, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 20.937534108619932, "step_time": 0.7628106079101563} +{"epoch": 0, "iter": 1326, "iter_tflops": 13.477356055566826, "iter_time": 1.1850523986816406, "loss": 0.3179692327976227, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.108559965891393, "step_time": 1.0571075706481934} +{"epoch": 0, "iter": 1327, "iter_tflops": 24.227875488916023, "iter_time": 0.6592147598266601, "loss": 0.3695485293865204, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.883152526535014, "step_time": 0.6170567169189454} +{"epoch": 0, "iter": 1328, "iter_tflops": 25.391539573040106, "iter_time": 0.6290037307739258, "loss": 0.31712865829467773, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 27.246637941543618, "step_time": 0.5861777572631836} +{"epoch": 0, "iter": 1329, "iter_tflops": 23.626165692053615, "iter_time": 0.8732307128906251, "loss": 1.0981063842773438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.591973924724687, "step_time": 0.8061548347473144} +{"epoch": 0, "iter": 1330, "iter_tflops": 24.22134003741633, "iter_time": 0.8517734146118164, "loss": 1.0492703914642334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.152866012014755, "step_time": 0.7076866302490235} +{"epoch": 0, "iter": 1331, "iter_tflops": 35.3378024914354, "iter_time": 0.5838250274658204, "loss": 1.0670908689498901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57541352940241, "step_time": 0.5348249473571778} +{"epoch": 0, "iter": 1332, "iter_tflops": 39.542504982722676, "iter_time": 0.5217447280883789, "loss": 1.0856069326400757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.086294226550656, "step_time": 0.47883193206787106} +{"epoch": 0, "iter": 1333, "iter_tflops": 15.713971004971247, "iter_time": 1.3129140625, "loss": 0.11533326655626297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.642392223688024, "step_time": 1.2396711502075195} +{"epoch": 0, "iter": 1334, "iter_tflops": 19.641718087528382, "iter_time": 1.050371124267578, "loss": 0.09304840862751007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.18563627854083, "step_time": 0.6834738655090332} +{"epoch": 0, "iter": 1335, "iter_tflops": 53.26977992256687, "iter_time": 0.38729451370239254, "loss": 0.141636461019516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.81880452378645, "step_time": 0.3568232460021973} +{"epoch": 0, "iter": 1336, "iter_tflops": 49.746905643257406, "iter_time": 0.41472114181518555, "loss": 0.15044282376766205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.0487171735466, "step_time": 0.3817129173278809} +{"epoch": 0, "iter": 1337, "iter_tflops": 24.512046624557446, "iter_time": 0.841671600341797, "loss": 0.05490192398428917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.716497300992266, "step_time": 0.8022513046264649} +{"epoch": 0, "iter": 1338, "iter_tflops": 13.082874948657787, "iter_time": 1.576954116821289, "loss": 0.07497478276491165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.748810913336726, "step_time": 1.3100096015930174} +{"epoch": 0, "iter": 1339, "iter_tflops": 54.844042169908114, "iter_time": 0.37617747879028324, "loss": 0.05505940318107605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.04023400044215, "step_time": 0.3436211376190186} +{"epoch": 0, "iter": 1340, "iter_tflops": 60.78234967428666, "iter_time": 0.3394257316589355, "loss": 0.04174576327204704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.30730750908677, "step_time": 0.3111435871124268} +{"epoch": 0, "iter": 1341, "iter_tflops": 18.5008755502229, "iter_time": 0.6255999450683594, "loss": 0.0030011585913598537, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 19.649398396811495, "step_time": 0.5890331344604491} +{"epoch": 0, "iter": 1342, "iter_tflops": 8.41998656264686, "iter_time": 1.3746039428710937, "loss": 0.031980205327272415, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 10.084786318472267, "step_time": 1.1476838836669923} +{"epoch": 0, "iter": 1343, "iter_tflops": 26.19741496478252, "iter_time": 0.441804916381836, "loss": 0.0023228584323078394, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 28.906560769972977, "step_time": 0.40039860916137693} +{"epoch": 0, "iter": 1344, "iter_tflops": 29.67433872313943, "iter_time": 0.3900389099121094, "loss": 0.0014625220792368054, "lr": 3e-05, "seqlen": 4656.0, "step_tflops": 32.70244753437101, "step_time": 0.3539229507446288} +{"epoch": 0, "iter": 1345, "iter_tflops": 18.27639712464117, "iter_time": 0.9930514755249024, "loss": 0.0567927360534668, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 19.64531603973552, "step_time": 0.9238539657592773} +{"epoch": 0, "iter": 1346, "iter_tflops": 13.366359125289357, "iter_time": 1.3578419494628906, "loss": 0.058048687875270844, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 16.15623110495005, "step_time": 1.1233686256408693} +{"epoch": 0, "iter": 1347, "iter_tflops": 37.9634541418746, "iter_time": 0.47807565307617184, "loss": 0.07129945605993271, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 41.90937472447257, "step_time": 0.4330630855560303} +{"epoch": 0, "iter": 1348, "iter_tflops": 41.10793825139299, "iter_time": 0.44150604248046876, "loss": 0.02199646830558777, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 45.04452363992836, "step_time": 0.40292141342163085} +{"epoch": 0, "iter": 1349, "iter_tflops": 22.51250886980838, "iter_time": 0.9164280014038086, "loss": 0.04506882652640343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.89547897171554, "step_time": 0.8633889923095703} +{"epoch": 0, "iter": 1350, "iter_tflops": 10.955342803478004, "iter_time": 1.8831992645263673, "loss": 0.09864354878664017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.328622490374268, "step_time": 1.673430549621582} +{"epoch": 0, "iter": 1351, "iter_tflops": 11.884237550884627, "iter_time": 1.7360048065185547, "loss": 0.05358021333813667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.803975349989708, "step_time": 1.3054369583129883} +{"epoch": 0, "iter": 1352, "iter_tflops": 29.767788859970672, "iter_time": 0.6930677185058594, "loss": 0.05271896719932556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.237752227518754, "step_time": 0.4884515018463135} +{"epoch": 0, "iter": 1353, "iter_tflops": 15.955900777903112, "iter_time": 0.9932679443359375, "loss": 0.25001847743988037, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 16.970740773386428, "step_time": 0.933871124267578} +{"epoch": 0, "iter": 1354, "iter_tflops": 28.04645749903296, "iter_time": 0.565079734802246, "loss": 0.3974000811576843, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.904617637650407, "step_time": 0.5299678115844727} +{"epoch": 0, "iter": 1355, "iter_tflops": 28.91890366280608, "iter_time": 0.5480320053100585, "loss": 0.3243615925312042, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.755552378741733, "step_time": 0.5153048324584961} +{"epoch": 0, "iter": 1356, "iter_tflops": 27.943215374547055, "iter_time": 0.5671675415039064, "loss": 0.5654097199440002, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.793158687441068, "step_time": 0.531950469970703} +{"epoch": 0, "iter": 1357, "iter_tflops": 46.29572734535745, "iter_time": 0.4456370964050293, "loss": 0.12863288819789886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.70285142621354, "step_time": 0.4069020366668701} +{"epoch": 0, "iter": 1358, "iter_tflops": 45.68491648710562, "iter_time": 0.4515952987670898, "loss": 0.18788263201713562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.761464832494426, "step_time": 0.4064321937561035} +{"epoch": 0, "iter": 1359, "iter_tflops": 52.537739390638706, "iter_time": 0.3926909255981445, "loss": 0.20072738826274872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.085432832658746, "step_time": 0.36140732383728025} +{"epoch": 0, "iter": 1360, "iter_tflops": 49.49082191866351, "iter_time": 0.4168670616149902, "loss": 0.18272362649440765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.154684219423395, "step_time": 0.3809660015106201} +{"epoch": 0, "iter": 1361, "iter_tflops": 40.44379900862115, "iter_time": 0.5101175956726075, "loss": 1.0025219917297363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.265996155216314, "step_time": 0.4660709190368652} +{"epoch": 0, "iter": 1362, "iter_tflops": 46.75846613049108, "iter_time": 0.44122690963745115, "loss": 1.0719709396362305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.51209659045182, "step_time": 0.40843866920471195} +{"epoch": 0, "iter": 1363, "iter_tflops": 48.864230599352666, "iter_time": 0.42221259307861325, "loss": 1.0580899715423584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62135208412149, "step_time": 0.39206695938110353} +{"epoch": 0, "iter": 1364, "iter_tflops": 49.38085066309433, "iter_time": 0.41779542541503906, "loss": 1.1152909994125366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.65732722288693, "step_time": 0.38449722671508785} +{"epoch": 0, "iter": 1365, "iter_tflops": 20.08299291460707, "iter_time": 1.0272917785644533, "loss": 0.8621596097946167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.96128703494667, "step_time": 0.9842474594116211} +{"epoch": 0, "iter": 1366, "iter_tflops": 18.927857530772624, "iter_time": 1.0899856719970704, "loss": 0.7725144624710083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.698277089032374, "step_time": 0.835325210571289} +{"epoch": 0, "iter": 1367, "iter_tflops": 46.957984300970715, "iter_time": 0.4393521957397461, "loss": 0.8478370904922485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75075081928437, "step_time": 0.4065179958343506} +{"epoch": 0, "iter": 1368, "iter_tflops": 48.85490768769326, "iter_time": 0.42229316329956057, "loss": 0.7987025380134583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.847705140978114, "step_time": 0.3903876895904541} +{"epoch": 0, "iter": 1369, "iter_tflops": 27.13206254666657, "iter_time": 0.7603953247070313, "loss": 0.9179184436798096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.554163759095516, "step_time": 0.7225248718261719} +{"epoch": 0, "iter": 1370, "iter_tflops": 15.41146099828398, "iter_time": 1.338685119628906, "loss": 1.2020701169967651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.597900897814885, "step_time": 1.0527195549011232} +{"epoch": 0, "iter": 1371, "iter_tflops": 41.49675232636869, "iter_time": 0.49717369079589846, "loss": 0.9066473841667175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.189080615289356, "step_time": 0.4565504150390625} +{"epoch": 0, "iter": 1372, "iter_tflops": 41.599046240327546, "iter_time": 0.4959511184692383, "loss": 1.1555709838867188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.211116330777735, "step_time": 0.4563278942108154} +{"epoch": 0, "iter": 1373, "iter_tflops": 37.92733829724428, "iter_time": 0.5439636535644532, "loss": 0.06174810230731964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.14053252567466, "step_time": 0.48957837677001953} +{"epoch": 0, "iter": 1374, "iter_tflops": 37.61443615988485, "iter_time": 0.5484887084960937, "loss": 0.11343572288751602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49575241946534, "step_time": 0.48548601531982427} +{"epoch": 0, "iter": 1375, "iter_tflops": 46.19165198534931, "iter_time": 0.44664117050170904, "loss": 0.16018082201480865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.808163631083104, "step_time": 0.4060586338043213} +{"epoch": 0, "iter": 1376, "iter_tflops": 43.24503827587537, "iter_time": 0.47707423400878907, "loss": 0.09013226628303528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.41475025615058, "step_time": 0.43511973381042474} +{"epoch": 0, "iter": 1377, "iter_tflops": 19.739568673992395, "iter_time": 1.0451643524169922, "loss": 0.7573525309562683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.241125534822125, "step_time": 0.9712806167602538} +{"epoch": 0, "iter": 1378, "iter_tflops": 15.67181728364854, "iter_time": 1.3164455108642579, "loss": 0.9103508591651917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.04218910098078, "step_time": 1.0293832378387453} +{"epoch": 0, "iter": 1379, "iter_tflops": 37.67145620389621, "iter_time": 0.5476585083007812, "loss": 0.9704706072807312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.30265998435151, "step_time": 0.49951004409790045} +{"epoch": 0, "iter": 1380, "iter_tflops": 41.580446905321125, "iter_time": 0.4961729621887207, "loss": 0.7543672919273376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.12485264882853, "step_time": 0.45720024108886714} +{"epoch": 0, "iter": 1381, "iter_tflops": 31.55501526345305, "iter_time": 0.6538134536743164, "loss": 0.5626239776611328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.04768671202196, "step_time": 0.5886577816009521} +{"epoch": 0, "iter": 1382, "iter_tflops": 45.90887733927415, "iter_time": 0.4493922462463379, "loss": 0.6082712411880493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99394503402524, "step_time": 0.41267184448242183} +{"epoch": 0, "iter": 1383, "iter_tflops": 49.786802712197776, "iter_time": 0.41438880157470703, "loss": 0.6658762693405151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.950122249490825, "step_time": 0.3824105052947998} +{"epoch": 0, "iter": 1384, "iter_tflops": 49.04644298735191, "iter_time": 0.42064403152465824, "loss": 0.6247612237930298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18360840220816, "step_time": 0.38792203330993646} +{"epoch": 0, "iter": 1385, "iter_tflops": 22.89613728220879, "iter_time": 0.9010731048583984, "loss": 0.7628074884414673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.882080110629673, "step_time": 0.863873390197754} +{"epoch": 0, "iter": 1386, "iter_tflops": 18.121271911591368, "iter_time": 1.1385014038085937, "loss": 0.7699106931686401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.041221096131917, "step_time": 0.8581549758911132} +{"epoch": 0, "iter": 1387, "iter_tflops": 40.105078758943385, "iter_time": 0.5144259567260743, "loss": 0.6286059021949768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68029343296113, "step_time": 0.47232039642333984} +{"epoch": 0, "iter": 1388, "iter_tflops": 43.67073719342116, "iter_time": 0.47242375183105473, "loss": 0.5347486734390259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.085659905394294, "step_time": 0.4381608657836914} +{"epoch": 0, "iter": 1389, "iter_tflops": 21.308370722535287, "iter_time": 0.9682154388427735, "loss": 0.28121352195739746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.527030901229, "step_time": 0.9158372268676759} +{"epoch": 0, "iter": 1390, "iter_tflops": 13.289417246473219, "iter_time": 1.5524453125000002, "loss": 0.20539286732673645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.36780562652076, "step_time": 1.3424879264831544} +{"epoch": 0, "iter": 1391, "iter_tflops": 45.401290250467035, "iter_time": 0.4544164581298828, "loss": 0.23365911841392517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.523905924927476, "step_time": 0.4165885772705078} +{"epoch": 0, "iter": 1392, "iter_tflops": 52.964895737534775, "iter_time": 0.38952391433715816, "loss": 0.33427175879478455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.3263795871808, "step_time": 0.35988830375671393} +{"epoch": 0, "iter": 1393, "iter_tflops": 33.79130259865979, "iter_time": 0.6105444869995117, "loss": 0.2571336627006531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.07068567892329, "step_time": 0.5719628868103027} +{"epoch": 0, "iter": 1394, "iter_tflops": 13.657765258498321, "iter_time": 1.5105760803222656, "loss": 0.2778157591819763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.174464430246104, "step_time": 1.2755348777770996} +{"epoch": 0, "iter": 1395, "iter_tflops": 36.5002642747726, "iter_time": 0.5652313461303711, "loss": 0.18842697143554688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.417245582822815, "step_time": 0.4644838562011719} +{"epoch": 0, "iter": 1396, "iter_tflops": 44.72613936294737, "iter_time": 0.4612759742736816, "loss": 0.23173841834068298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.79646518797746, "step_time": 0.4227989349365234} +{"epoch": 0, "iter": 1397, "iter_tflops": 21.76349757707852, "iter_time": 0.9479677352905275, "loss": 0.9006536602973938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.70220647985377, "step_time": 0.8704292373657226} +{"epoch": 0, "iter": 1398, "iter_tflops": 46.153067420760735, "iter_time": 0.44701456832885733, "loss": 0.8393787741661072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03720653176849, "step_time": 0.4123150539398194} +{"epoch": 0, "iter": 1399, "iter_tflops": 53.255835840515765, "iter_time": 0.3873959197998047, "loss": 0.8660128116607666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.442366302261924, "step_time": 0.3591616230010986} +{"epoch": 0, "iter": 1400, "iter_tflops": 48.64942553935055, "iter_time": 0.42407681655883794, "loss": 1.1457635164260864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.31206105979014, "step_time": 0.394385025024414} +{"epoch": 0, "iter": 1401, "iter_tflops": 32.10116897621177, "iter_time": 0.6426897888183594, "loss": 0.9750587940216064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.243424365391704, "step_time": 0.6024833641052246} +{"epoch": 0, "iter": 1402, "iter_tflops": 16.04931389684918, "iter_time": 1.2854813385009765, "loss": 1.0662494897842407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.861436682985772, "step_time": 1.093824073791504} +{"epoch": 0, "iter": 1403, "iter_tflops": 45.1547529232183, "iter_time": 0.4568974952697754, "loss": 1.5577771663665771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42533011098618, "step_time": 0.42603929519653316} +{"epoch": 0, "iter": 1404, "iter_tflops": 43.50225826551465, "iter_time": 0.4742533912658692, "loss": 1.1984025239944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.855557183504565, "step_time": 0.44031262779235836} +{"epoch": 0, "iter": 1405, "iter_tflops": 26.07493255326286, "iter_time": 0.791223274230957, "loss": 1.0203533172607422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.502303829224903, "step_time": 0.7501587371826172} +{"epoch": 0, "iter": 1406, "iter_tflops": 13.20284336115524, "iter_time": 1.562625030517578, "loss": 1.1455367803573608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.53228162581777, "step_time": 1.2479277801513673} +{"epoch": 0, "iter": 1407, "iter_tflops": 40.18443705823724, "iter_time": 0.513410041809082, "loss": 1.0033353567123413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21005069055797, "step_time": 0.47746052551269524} +{"epoch": 0, "iter": 1408, "iter_tflops": 43.35162612060496, "iter_time": 0.4759012603759766, "loss": 1.04609215259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.29045760518964, "step_time": 0.4456878280639648} +{"epoch": 0, "iter": 1409, "iter_tflops": 31.559313685447567, "iter_time": 0.6537244033813476, "loss": 1.074642539024353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.521054751001195, "step_time": 0.6154667167663573} +{"epoch": 0, "iter": 1410, "iter_tflops": 16.826762015221497, "iter_time": 1.226088150024414, "loss": 0.9880999326705933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.293960117034228, "step_time": 1.0693032112121583} +{"epoch": 0, "iter": 1411, "iter_tflops": 45.24327526265716, "iter_time": 0.4560035362243652, "loss": 1.0515341758728027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.69868397839777, "step_time": 0.4236478652954102} +{"epoch": 0, "iter": 1412, "iter_tflops": 48.02546579480098, "iter_time": 0.4295865364074707, "loss": 0.9574434757232666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.720738033072394, "step_time": 0.3988940277099609} +{"epoch": 0, "iter": 1413, "iter_tflops": 36.20371400944606, "iter_time": 0.5698612442016602, "loss": 0.7891819477081299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83219777688369, "step_time": 0.5312883300781249} +{"epoch": 0, "iter": 1414, "iter_tflops": 42.73809106854123, "iter_time": 0.482733154296875, "loss": 0.763325572013855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03048669021582, "step_time": 0.4386748886108398} +{"epoch": 0, "iter": 1415, "iter_tflops": 47.09426232602511, "iter_time": 0.4380808296203614, "loss": 0.8430590629577637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74734214662154, "step_time": 0.40654530143737794} +{"epoch": 0, "iter": 1416, "iter_tflops": 41.05440017430086, "iter_time": 0.5025306282043457, "loss": 0.8987220525741577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32207472243764, "step_time": 0.46548122215271} +{"epoch": 0, "iter": 1417, "iter_tflops": 30.82270944328341, "iter_time": 0.6693471755981445, "loss": 0.9005211591720581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.75302181960381, "step_time": 0.6298989334106445} +{"epoch": 0, "iter": 1418, "iter_tflops": 16.53185915316728, "iter_time": 1.2479596710205079, "loss": 0.962653636932373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.32931131812224, "step_time": 1.06734757232666} +{"epoch": 0, "iter": 1419, "iter_tflops": 46.678541352557126, "iter_time": 0.4419823951721191, "loss": 0.7776969075202942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5678760605021, "step_time": 0.4079881362915039} +{"epoch": 0, "iter": 1420, "iter_tflops": 47.00492411238905, "iter_time": 0.4389134521484375, "loss": 0.7776415944099426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48965865385974, "step_time": 0.4086201820373535} +{"epoch": 0, "iter": 1421, "iter_tflops": 41.79479224885941, "iter_time": 0.49362833023071284, "loss": 1.1148278713226318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.398925837174815, "step_time": 0.4544401245117188} +{"epoch": 0, "iter": 1422, "iter_tflops": 33.65202136615674, "iter_time": 0.6130714492797852, "loss": 0.8436712026596069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39325016648849, "step_time": 0.5517330913543701} +{"epoch": 0, "iter": 1423, "iter_tflops": 36.441452103973425, "iter_time": 0.5661435623168946, "loss": 0.9783804416656494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.64745741289293, "step_time": 0.5203635959625244} +{"epoch": 0, "iter": 1424, "iter_tflops": 41.25142505373747, "iter_time": 0.5001304435729981, "loss": 1.1323177814483643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.76729044329392, "step_time": 0.46085195922851563} +{"epoch": 0, "iter": 1425, "iter_tflops": 29.4042958629898, "iter_time": 0.7016353530883789, "loss": 0.9228204488754272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.345145608801598, "step_time": 0.6581910247802734} +{"epoch": 0, "iter": 1426, "iter_tflops": 20.148759520692085, "iter_time": 1.0239386444091798, "loss": 1.0222108364105225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.087178667245556, "step_time": 0.6636528110504151} +{"epoch": 0, "iter": 1427, "iter_tflops": 46.681337888759366, "iter_time": 0.44195591735839845, "loss": 1.0914808511734009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32069771882989, "step_time": 0.4099921989440918} +{"epoch": 0, "iter": 1428, "iter_tflops": 45.72154300343347, "iter_time": 0.4512335357666016, "loss": 0.9997925758361816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.20746535276849, "step_time": 0.41926755142211913} +{"epoch": 0, "iter": 1429, "iter_tflops": 29.480487914809597, "iter_time": 0.5821302337646485, "loss": 0.2954693138599396, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 31.619654117829654, "step_time": 0.5427473449707031} +{"epoch": 0, "iter": 1430, "iter_tflops": 29.902495704972655, "iter_time": 0.5739147491455078, "loss": 0.47708818316459656, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 31.765187334140506, "step_time": 0.5402607307434082} +{"epoch": 0, "iter": 1431, "iter_tflops": 30.929251501275804, "iter_time": 0.554862548828125, "loss": 0.2506844997406006, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 33.03335744313975, "step_time": 0.5195198020935059} +{"epoch": 0, "iter": 1432, "iter_tflops": 31.69243147981846, "iter_time": 0.5415009994506836, "loss": 0.24655947089195251, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 33.712353845620896, "step_time": 0.5090562171936035} +{"epoch": 0, "iter": 1433, "iter_tflops": 28.843538803665133, "iter_time": 0.7152760848999024, "loss": 0.22258195281028748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.45682315638735, "step_time": 0.6773882293701171} +{"epoch": 0, "iter": 1434, "iter_tflops": 12.14197940259936, "iter_time": 1.6991540527343751, "loss": 0.11963503062725067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.385375539355767, "step_time": 1.54131600189209} +{"epoch": 0, "iter": 1435, "iter_tflops": 39.50064255713081, "iter_time": 0.5222976684570313, "loss": 0.14259377121925354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.695445293240255, "step_time": 0.4721566143035889} +{"epoch": 0, "iter": 1436, "iter_tflops": 41.1070614300255, "iter_time": 0.5018868484497071, "loss": 0.17880606651306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.487653205426916, "step_time": 0.4637487487792969} +{"epoch": 0, "iter": 1437, "iter_tflops": 17.186842290304675, "iter_time": 1.2004004669189454, "loss": 0.3986385762691498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.457165239170592, "step_time": 1.1177823486328125} +{"epoch": 0, "iter": 1438, "iter_tflops": 15.497741798390862, "iter_time": 1.3312322387695312, "loss": 0.6203638911247253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.65033136896667, "step_time": 1.106205198287964} +{"epoch": 0, "iter": 1439, "iter_tflops": 41.851914184181965, "iter_time": 0.4929545974731445, "loss": 0.4834613800048828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.879622067780375, "step_time": 0.44967880249023434} +{"epoch": 0, "iter": 1440, "iter_tflops": 38.33584843555105, "iter_time": 0.5381671295166015, "loss": 0.5568016171455383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.019805341646894, "step_time": 0.4909849853515625} +{"epoch": 0, "iter": 1441, "iter_tflops": 25.264884654692835, "iter_time": 0.816591636657715, "loss": 0.8452271223068237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.020509537151497, "step_time": 0.7635345840454102} +{"epoch": 0, "iter": 1442, "iter_tflops": 12.483596222541408, "iter_time": 1.652656265258789, "loss": 0.6820082664489746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.513856699745972, "step_time": 1.4214756240844726} +{"epoch": 0, "iter": 1443, "iter_tflops": 48.21012348299513, "iter_time": 0.42794110488891596, "loss": 0.9800146818161011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.562772961332975, "step_time": 0.39250390243530275} +{"epoch": 0, "iter": 1444, "iter_tflops": 49.86180734391073, "iter_time": 0.4137654571533203, "loss": 0.8631830811500549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.7446558011399, "step_time": 0.38387246513366696} +{"epoch": 0, "iter": 1445, "iter_tflops": 34.022993999025644, "iter_time": 0.6063867721557616, "loss": 0.43114450573921204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.34846261930342, "step_time": 0.5675919151306152} +{"epoch": 0, "iter": 1446, "iter_tflops": 11.777930092598957, "iter_time": 1.7516739654541016, "loss": 0.6025099754333496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.593906756893139, "step_time": 1.4136785888671877} +{"epoch": 0, "iter": 1447, "iter_tflops": 15.024243998525641, "iter_time": 1.373186798095703, "loss": 0.42227280139923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.13423071596397, "step_time": 1.2040863609313963} +{"epoch": 0, "iter": 1448, "iter_tflops": 14.72307263427907, "iter_time": 1.4012763519287108, "loss": 0.46111178398132324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.546126351582178, "step_time": 1.175820411682129} +{"epoch": 0, "iter": 1449, "iter_tflops": 19.55025740359303, "iter_time": 0.7520911026000977, "loss": 0.511874258518219, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.32751378152652, "step_time": 0.6894181289672853} +{"epoch": 0, "iter": 1450, "iter_tflops": 22.10585639294075, "iter_time": 0.6651438598632813, "loss": 0.4586111307144165, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.686961078047368, "step_time": 0.6207455062866212} +{"epoch": 0, "iter": 1451, "iter_tflops": 22.80560570926517, "iter_time": 0.644735107421875, "loss": 0.37700220942497253, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.542719769881252, "step_time": 0.5991012725830078} +{"epoch": 0, "iter": 1452, "iter_tflops": 22.882623850153216, "iter_time": 0.6425650634765625, "loss": 0.2996348738670349, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.585584097555042, "step_time": 0.5980567550659179} +{"epoch": 0, "iter": 1453, "iter_tflops": 30.447433959900145, "iter_time": 0.596089744567871, "loss": 0.0966637060046196, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 34.141756270548804, "step_time": 0.5315896167755128} +{"epoch": 0, "iter": 1454, "iter_tflops": 34.507779526954714, "iter_time": 0.525951057434082, "loss": 0.0771026685833931, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 38.697431914164405, "step_time": 0.46900794792175293} +{"epoch": 0, "iter": 1455, "iter_tflops": 36.205144456691485, "iter_time": 0.5012934875488282, "loss": 0.11675433814525604, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 40.02067028127095, "step_time": 0.45350072860717766} +{"epoch": 0, "iter": 1456, "iter_tflops": 34.75257441929955, "iter_time": 0.5222462921142579, "loss": 0.1804218888282776, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 38.21933330247441, "step_time": 0.47487492752075194} +{"epoch": 0, "iter": 1457, "iter_tflops": 22.04471363091963, "iter_time": 0.9358748703002929, "loss": 0.0058823698200285435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.690608892087223, "step_time": 0.8708553504943848} +{"epoch": 0, "iter": 1458, "iter_tflops": 24.52531818020262, "iter_time": 0.8412161407470702, "loss": 0.007641846314072609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.316385254984468, "step_time": 0.6805261688232421} +{"epoch": 0, "iter": 1459, "iter_tflops": 61.78979892831599, "iter_time": 0.3338915786743164, "loss": 0.010151121765375137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.06849491430329, "step_time": 0.30309313488006595} +{"epoch": 0, "iter": 1460, "iter_tflops": 61.24576908494317, "iter_time": 0.33685744857788086, "loss": 0.009824756532907486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.85051965810254, "step_time": 0.3086153049468994} +{"epoch": 0, "iter": 1461, "iter_tflops": 45.43056472626475, "iter_time": 0.45412364196777344, "loss": 0.045770078897476196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73956663564127, "step_time": 0.4147823333740234} +{"epoch": 0, "iter": 1462, "iter_tflops": 11.866212238595073, "iter_time": 1.7386418762207032, "loss": 0.03412579372525215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.650820718090662, "step_time": 1.5113445510864256} +{"epoch": 0, "iter": 1463, "iter_tflops": 10.390227226704225, "iter_time": 1.9856248626708988, "loss": 0.07016605138778687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.969878066995998, "step_time": 1.7235842666625976} +{"epoch": 0, "iter": 1464, "iter_tflops": 21.61931726590204, "iter_time": 0.9542897796630859, "loss": 0.036595456302165985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.3386898843146, "step_time": 0.8476665592193603} +{"epoch": 0, "iter": 1465, "iter_tflops": 21.6598549811295, "iter_time": 0.7676641845703126, "loss": 0.3398280739784241, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 22.884406709051493, "step_time": 0.7265862350463868} +{"epoch": 0, "iter": 1466, "iter_tflops": 11.251601320352645, "iter_time": 1.4777891998291017, "loss": 0.17687413096427917, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 15.181496676375813, "step_time": 1.0952474098205567} +{"epoch": 0, "iter": 1467, "iter_tflops": 25.285289618918164, "iter_time": 0.6575955886840821, "loss": 0.336005836725235, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 27.260051314882976, "step_time": 0.6099583129882813} +{"epoch": 0, "iter": 1468, "iter_tflops": 26.777657980859033, "iter_time": 0.6209465713500978, "loss": 0.22436273097991943, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 28.502768386655067, "step_time": 0.5833642082214356} +{"epoch": 0, "iter": 1469, "iter_tflops": 33.19607270492423, "iter_time": 0.6214919967651367, "loss": 0.8962935209274292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.47953028671984, "step_time": 0.5655526084899902} +{"epoch": 0, "iter": 1470, "iter_tflops": 40.82782940201211, "iter_time": 0.5053193817138673, "loss": 1.018524408340454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7493332086334, "step_time": 0.47157503890991215} +{"epoch": 0, "iter": 1471, "iter_tflops": 46.49993106775306, "iter_time": 0.4436800880432129, "loss": 1.1095575094223022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04274250801784, "step_time": 0.41226944160461426} +{"epoch": 0, "iter": 1472, "iter_tflops": 49.8354699186751, "iter_time": 0.41398412704467774, "loss": 1.2364712953567505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.65688484506563, "step_time": 0.38450039672851555} +{"epoch": 0, "iter": 1473, "iter_tflops": 27.33634070591407, "iter_time": 0.7547130661010741, "loss": 0.24369874596595764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.77571874272637, "step_time": 0.7169618835449219} +{"epoch": 0, "iter": 1474, "iter_tflops": 13.027324011019871, "iter_time": 1.58367854309082, "loss": 0.3124570846557617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.981285730189866, "step_time": 1.3771243591308595} +{"epoch": 0, "iter": 1475, "iter_tflops": 33.436023818623326, "iter_time": 0.6170319061279297, "loss": 0.19329431653022766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16199903536849, "step_time": 0.48932911109924315} +{"epoch": 0, "iter": 1476, "iter_tflops": 40.60310341400853, "iter_time": 0.5081161727905273, "loss": 0.3039422631263733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55823042959211, "step_time": 0.4630142021179199} +{"epoch": 0, "iter": 1477, "iter_tflops": 34.83577485143002, "iter_time": 0.5922386856079103, "loss": 0.3531339168548584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.70506276996922, "step_time": 0.5471703796386719} +{"epoch": 0, "iter": 1478, "iter_tflops": 38.8974448289471, "iter_time": 0.5303971405029296, "loss": 0.36624935269355774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.50305560900759, "step_time": 0.4854025955200195} +{"epoch": 0, "iter": 1479, "iter_tflops": 42.81540982727617, "iter_time": 0.4818614044189453, "loss": 0.3017853796482086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.476235427256974, "step_time": 0.44390629577636714} +{"epoch": 0, "iter": 1480, "iter_tflops": 40.27804412182321, "iter_time": 0.5122168655395507, "loss": 0.33899569511413574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93504981423333, "step_time": 0.4695816574096679} +{"epoch": 0, "iter": 1481, "iter_tflops": 13.523618437498374, "iter_time": 0.9637785644531249, "loss": 0.05977785587310791, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 14.363094314190258, "step_time": 0.907448860168457} +{"epoch": 0, "iter": 1482, "iter_tflops": 19.385859390993687, "iter_time": 0.6723340606689454, "loss": 0.0527694970369339, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 30.443617647878735, "step_time": 0.42812827682495114} +{"epoch": 0, "iter": 1483, "iter_tflops": 38.627149969560996, "iter_time": 0.3374251937866211, "loss": 0.04214020073413849, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 42.19433289943753, "step_time": 0.3088986759185791} +{"epoch": 0, "iter": 1484, "iter_tflops": 36.54813384524709, "iter_time": 0.356619400024414, "loss": 0.04399874806404114, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 39.672615495727136, "step_time": 0.3285332565307617} +{"epoch": 0, "iter": 1485, "iter_tflops": 43.579139505957194, "iter_time": 0.47341672515869143, "loss": 0.5534089207649231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27148181947597, "step_time": 0.4364384765625} +{"epoch": 0, "iter": 1486, "iter_tflops": 46.948806798760465, "iter_time": 0.43943807983398436, "loss": 0.45231908559799194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.745400408332124, "step_time": 0.39870391082763673} +{"epoch": 0, "iter": 1487, "iter_tflops": 46.811750953137874, "iter_time": 0.4407246704101563, "loss": 0.4707159996032715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.63192789817932, "step_time": 0.4074720115661621} +{"epoch": 0, "iter": 1488, "iter_tflops": 49.017542148744134, "iter_time": 0.4208920440673828, "loss": 0.49253666400909424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99029867200261, "step_time": 0.3893371810913086} +{"epoch": 0, "iter": 1489, "iter_tflops": 35.70701442097906, "iter_time": 0.5777882537841796, "loss": 0.5754873156547546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21120378684714, "step_time": 0.5399226264953614} +{"epoch": 0, "iter": 1490, "iter_tflops": 14.340054067960866, "iter_time": 1.4387040252685546, "loss": 0.4342368245124817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.824284328753205, "step_time": 1.1574710731506346} +{"epoch": 0, "iter": 1491, "iter_tflops": 41.8613330990474, "iter_time": 0.4928436813354492, "loss": 0.5226204991340637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30010603731385, "step_time": 0.4455949516296387} +{"epoch": 0, "iter": 1492, "iter_tflops": 43.333823618252886, "iter_time": 0.47609677124023436, "loss": 0.4890702962875366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92451540210681, "step_time": 0.43966556358337405} +{"epoch": 0, "iter": 1493, "iter_tflops": 37.977339540151675, "iter_time": 0.5432474670410157, "loss": 1.1001890897750854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5103334449845, "step_time": 0.49701102828979493} +{"epoch": 0, "iter": 1494, "iter_tflops": 36.655027687040764, "iter_time": 0.5628448486328125, "loss": 0.9464007019996643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13268459076448, "step_time": 0.5140721015930176} +{"epoch": 0, "iter": 1495, "iter_tflops": 39.889988695474706, "iter_time": 0.5171997833251953, "loss": 0.9302618503570557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47372243155716, "step_time": 0.47456468772888183} +{"epoch": 0, "iter": 1496, "iter_tflops": 41.092263213409, "iter_time": 0.5020675888061523, "loss": 1.0195915699005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.283779048481456, "step_time": 0.4658837604522705} +{"epoch": 0, "iter": 1497, "iter_tflops": 19.418194436511072, "iter_time": 1.0624619903564454, "loss": 1.0115684270858765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.37193143235591, "step_time": 1.0127215270996093} +{"epoch": 0, "iter": 1498, "iter_tflops": 19.074853151024524, "iter_time": 1.0815859680175781, "loss": 0.8629145622253418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70248651822269, "step_time": 0.9087592010498046} +{"epoch": 0, "iter": 1499, "iter_tflops": 39.71596714016163, "iter_time": 0.5194659729003906, "loss": 1.1186697483062744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.40631005499591, "step_time": 0.4865099906921387} +{"epoch": 0, "iter": 1500, "iter_tflops": 47.006094990120445, "iter_time": 0.4389025192260742, "loss": 1.09824800491333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49251660533254, "step_time": 0.40859705352783204} +{"epoch": 0, "iter": 1501, "iter_tflops": 28.09971918412053, "iter_time": 0.7342099533081055, "loss": 0.9594500660896301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.57285920879129, "step_time": 0.6976360778808594} +{"epoch": 0, "iter": 1502, "iter_tflops": 16.40864248648964, "iter_time": 1.2573309173583984, "loss": 0.9140841364860535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.214898722173487, "step_time": 0.9724813575744629} +{"epoch": 0, "iter": 1503, "iter_tflops": 44.310791228138264, "iter_time": 0.4655997543334961, "loss": 0.909492015838623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52427012356847, "step_time": 0.4341169986724853} +{"epoch": 0, "iter": 1504, "iter_tflops": 45.64043033871163, "iter_time": 0.452035472869873, "loss": 1.16736900806427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.69687346802545, "step_time": 0.4236636161804199} +{"epoch": 0, "iter": 1505, "iter_tflops": 21.489133125056696, "iter_time": 0.9600709991455078, "loss": 0.32824042439460754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.39630962745441, "step_time": 0.9211827239990235} +{"epoch": 0, "iter": 1506, "iter_tflops": 13.946925431463592, "iter_time": 1.4792574615478515, "loss": 0.3463454246520996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.526124227675517, "step_time": 1.1771623458862304} +{"epoch": 0, "iter": 1507, "iter_tflops": 44.888743140089844, "iter_time": 0.4596050605773926, "loss": 0.47803494334220886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42216118167629, "step_time": 0.4260671768188476} +{"epoch": 0, "iter": 1508, "iter_tflops": 50.113184261936546, "iter_time": 0.4116899337768554, "loss": 0.3883955478668213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.36785582526126, "step_time": 0.3794722671508789} +{"epoch": 0, "iter": 1509, "iter_tflops": 47.07430907859197, "iter_time": 0.4382665176391602, "loss": 0.6093682646751404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.690913882999034, "step_time": 0.39912417793273924} +{"epoch": 0, "iter": 1510, "iter_tflops": 45.769266591163586, "iter_time": 0.4507630348205566, "loss": 0.7457941770553589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.789666902124374, "step_time": 0.41436496353149416} +{"epoch": 0, "iter": 1511, "iter_tflops": 47.57246057396017, "iter_time": 0.43367724227905274, "loss": 0.7897939682006836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4798118851038, "step_time": 0.4007608566284179} +{"epoch": 0, "iter": 1512, "iter_tflops": 51.382242555471954, "iter_time": 0.401521858215332, "loss": 0.8179835677146912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.844066754227576, "step_time": 0.36944110107421874} +{"epoch": 0, "iter": 1513, "iter_tflops": 25.94926776894999, "iter_time": 0.7950549392700195, "loss": 1.009963035583496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.400143242071675, "step_time": 0.752955680847168} +{"epoch": 0, "iter": 1514, "iter_tflops": 22.2499723480636, "iter_time": 0.927241310119629, "loss": 0.8729179501533508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.814465281022187, "step_time": 0.7694016380310059} +{"epoch": 0, "iter": 1515, "iter_tflops": 42.59805120723449, "iter_time": 0.484320125579834, "loss": 0.8575042486190796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61110334722918, "step_time": 0.45232612228393554} +{"epoch": 0, "iter": 1516, "iter_tflops": 50.87712335201556, "iter_time": 0.4055082550048829, "loss": 1.0888259410858154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.78911896763249, "step_time": 0.3765545768737793} +{"epoch": 0, "iter": 1517, "iter_tflops": 23.17712467838565, "iter_time": 0.7777394638061522, "loss": 0.06980231404304504, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 24.37704679622135, "step_time": 0.7394564514160157} +{"epoch": 0, "iter": 1518, "iter_tflops": 14.051191854401527, "iter_time": 1.282863739013672, "loss": 0.06764672696590424, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 16.92104483748392, "step_time": 1.065286729812622} +{"epoch": 0, "iter": 1519, "iter_tflops": 39.23033847652678, "iter_time": 0.45948531723022457, "loss": 0.021709268912672997, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 43.2269683297878, "step_time": 0.41700274658203124} +{"epoch": 0, "iter": 1520, "iter_tflops": 41.63015754212564, "iter_time": 0.43299774932861324, "loss": 0.11155913025140762, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 45.95553185301938, "step_time": 0.3922436275482178} +{"epoch": 0, "iter": 1521, "iter_tflops": 25.25462540470143, "iter_time": 0.8169233627319336, "loss": 0.04523409903049469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.87254754382083, "step_time": 0.767738655090332} +{"epoch": 0, "iter": 1522, "iter_tflops": 38.714218880406335, "iter_time": 0.5329073944091797, "loss": 0.04588515684008598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00448561588473, "step_time": 0.4688406925201416} +{"epoch": 0, "iter": 1523, "iter_tflops": 58.417131403301845, "iter_time": 0.3531685485839844, "loss": 0.10075340420007706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.94316203983447, "step_time": 0.3226473770141602} +{"epoch": 0, "iter": 1524, "iter_tflops": 52.224299445551964, "iter_time": 0.39504777908325195, "loss": 0.06472202390432358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.89408282740256, "step_time": 0.3626228332519531} +{"epoch": 0, "iter": 1525, "iter_tflops": 32.95135749385897, "iter_time": 0.6261075439453125, "loss": 0.8360165357589722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.14283177669314, "step_time": 0.5870640602111816} +{"epoch": 0, "iter": 1526, "iter_tflops": 12.92429793259594, "iter_time": 1.5963028411865234, "loss": 0.8522027730941772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.28895439778818, "step_time": 1.2665695419311525} +{"epoch": 0, "iter": 1527, "iter_tflops": 35.71965489654569, "iter_time": 0.5775837860107422, "loss": 1.060901403427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.751267382435785, "step_time": 0.5323979034423829} +{"epoch": 0, "iter": 1528, "iter_tflops": 45.33343920397535, "iter_time": 0.4550965881347656, "loss": 1.0342693328857422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.171707437283374, "step_time": 0.41957244491577145} +{"epoch": 0, "iter": 1529, "iter_tflops": 20.7546265446503, "iter_time": 0.9940479278564454, "loss": 1.1532976627349854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.962781333696224, "step_time": 0.9393661575317382} +{"epoch": 0, "iter": 1530, "iter_tflops": 12.978462806851939, "iter_time": 1.5896407623291018, "loss": 0.9009199738502502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.36578214156506, "step_time": 1.2606237411499024} +{"epoch": 0, "iter": 1531, "iter_tflops": 38.9340579638469, "iter_time": 0.5298983612060546, "loss": 0.9225150346755981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.220922997195096, "step_time": 0.4886461982727051} +{"epoch": 0, "iter": 1532, "iter_tflops": 38.45448449408359, "iter_time": 0.5365068283081055, "loss": 0.8389502763748169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54090681721773, "step_time": 0.4966452369689942} +{"epoch": 0, "iter": 1533, "iter_tflops": 28.680022966021838, "iter_time": 0.719354148864746, "loss": 0.15959277749061584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.87678244256766, "step_time": 0.6472138004302979} +{"epoch": 0, "iter": 1534, "iter_tflops": 40.34390848231394, "iter_time": 0.5113806343078613, "loss": 0.15395145118236542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01280408975247, "step_time": 0.45833833122253415} +{"epoch": 0, "iter": 1535, "iter_tflops": 47.259338358833595, "iter_time": 0.43655062103271486, "loss": 0.22628678381443024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77611326481361, "step_time": 0.3984674053192138} +{"epoch": 0, "iter": 1536, "iter_tflops": 45.82396691191295, "iter_time": 0.4502249565124512, "loss": 0.15124055743217468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.036111010173784, "step_time": 0.41232408142089844} +{"epoch": 0, "iter": 1537, "iter_tflops": 22.949529101529485, "iter_time": 0.8989767684936524, "loss": 0.23373344540596008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.59494329137362, "step_time": 0.8388347663879394} +{"epoch": 0, "iter": 1538, "iter_tflops": 23.213914212808582, "iter_time": 0.888738250732422, "loss": 0.34251779317855835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.558664632346584, "step_time": 0.7224110012054443} +{"epoch": 0, "iter": 1539, "iter_tflops": 48.71277978029548, "iter_time": 0.42352527618408203, "loss": 0.19697818160057068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05204233960973, "step_time": 0.3888840579986572} +{"epoch": 0, "iter": 1540, "iter_tflops": 51.01444281578059, "iter_time": 0.4044167175292969, "loss": 0.23840515315532684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.44110495186093, "step_time": 0.37212630462646484} +{"epoch": 0, "iter": 1541, "iter_tflops": 49.28345691646796, "iter_time": 0.4186210708618164, "loss": 0.015200858935713768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.06792472408235, "step_time": 0.38157731437683107} +{"epoch": 0, "iter": 1542, "iter_tflops": 10.971482750918364, "iter_time": 1.8804289245605468, "loss": 0.010490434244275093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.155976952108205, "step_time": 1.5681916732788088} +{"epoch": 0, "iter": 1543, "iter_tflops": 13.065079548627969, "iter_time": 1.5791020202636719, "loss": 0.005099339876323938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.01866525001574, "step_time": 1.287940860748291} +{"epoch": 0, "iter": 1544, "iter_tflops": 16.851921535785994, "iter_time": 1.2242576293945313, "loss": 0.009716307744383812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.03858593494083, "step_time": 1.0295683326721192} +{"epoch": 0, "iter": 1545, "iter_tflops": 21.222145418402334, "iter_time": 0.6736159057617187, "loss": 0.45973673462867737, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.591130887633145, "step_time": 0.6327958869934082} +{"epoch": 0, "iter": 1546, "iter_tflops": 22.727974869790184, "iter_time": 0.628985855102539, "loss": 0.37936726212501526, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.539594448731037, "step_time": 0.582551383972168} +{"epoch": 0, "iter": 1547, "iter_tflops": 26.20998015785267, "iter_time": 0.5454248580932617, "loss": 0.30117982625961304, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.950311459865812, "step_time": 0.5114638786315918} +{"epoch": 0, "iter": 1548, "iter_tflops": 26.57897508123999, "iter_time": 0.5378527450561523, "loss": 0.35312917828559875, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 28.26480831973653, "step_time": 0.5057729225158691} +{"epoch": 0, "iter": 1549, "iter_tflops": 25.772994479151873, "iter_time": 0.8004926834106445, "loss": 1.1636894941329956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.192728967910817, "step_time": 0.7586988983154297} +{"epoch": 0, "iter": 1550, "iter_tflops": 17.725332544245713, "iter_time": 1.163932662963867, "loss": 0.8663269281387329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.677564229295307, "step_time": 0.9517256317138673} +{"epoch": 0, "iter": 1551, "iter_tflops": 47.28156383830045, "iter_time": 0.4363454132080078, "loss": 1.0376466512680054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.320290992916874, "step_time": 0.40200655746459957} +{"epoch": 0, "iter": 1552, "iter_tflops": 47.40210286915848, "iter_time": 0.43523582839965824, "loss": 0.7525251507759094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0495511322263, "step_time": 0.40413858795166013} +{"epoch": 0, "iter": 1553, "iter_tflops": 30.467754766201033, "iter_time": 0.6771451873779297, "loss": 1.018100619316101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.29548752297046, "step_time": 0.6388227920532227} +{"epoch": 0, "iter": 1554, "iter_tflops": 13.897672899089889, "iter_time": 1.4844998626708985, "loss": 1.1028724908828735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.128998363013558, "step_time": 1.204454170227051} +{"epoch": 0, "iter": 1555, "iter_tflops": 34.972823463131874, "iter_time": 0.5899178695678711, "loss": 1.002774715423584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17715204092615, "step_time": 0.5404042053222656} +{"epoch": 0, "iter": 1556, "iter_tflops": 34.155583986634376, "iter_time": 0.6040328140258789, "loss": 1.1634747982025146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.799486717588266, "step_time": 0.5606353607177734} +{"epoch": 0, "iter": 1557, "iter_tflops": 28.38208883527578, "iter_time": 0.7269053955078124, "loss": 0.05545908212661743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6119233062319, "step_time": 0.6739561347961427} +{"epoch": 0, "iter": 1558, "iter_tflops": 8.94382049414406, "iter_time": 2.306742797851562, "loss": 0.04929462820291519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.544222159781336, "step_time": 1.956625457763672} +{"epoch": 0, "iter": 1559, "iter_tflops": 14.61339204935887, "iter_time": 1.4117936096191406, "loss": 0.0611100047826767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.081391582776597, "step_time": 1.207811050415039} +{"epoch": 0, "iter": 1560, "iter_tflops": 43.26090748509275, "iter_time": 0.47689923095703124, "loss": 0.04511766508221626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49173131794273, "step_time": 0.4344144325256348} +{"epoch": 0, "iter": 1561, "iter_tflops": 11.334222965266683, "iter_time": 1.3188906860351564, "loss": 0.2285631000995636, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 11.99113384090085, "step_time": 1.2466378326416014} +{"epoch": 0, "iter": 1562, "iter_tflops": 15.333038989108946, "iter_time": 0.9749274826049805, "loss": 0.23343248665332794, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.09878825150711, "step_time": 0.8259448585510254} +{"epoch": 0, "iter": 1563, "iter_tflops": 27.082875464558057, "iter_time": 0.5519576797485353, "loss": 0.23563189804553986, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.858060377318015, "step_time": 0.5180043601989747} +{"epoch": 0, "iter": 1564, "iter_tflops": 26.389692863147445, "iter_time": 0.5664560470581055, "loss": 0.26977112889289856, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.94245010935984, "step_time": 0.5349781799316407} +{"epoch": 0, "iter": 1565, "iter_tflops": 30.826901324371995, "iter_time": 0.6692561569213867, "loss": 0.08290299773216248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.91188356723746, "step_time": 0.6268584861755371} +{"epoch": 0, "iter": 1566, "iter_tflops": 13.735332189231029, "iter_time": 1.5020454711914062, "loss": 0.14197474718093872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.848089693245264, "step_time": 1.3018031768798826} +{"epoch": 0, "iter": 1567, "iter_tflops": 14.902734603535382, "iter_time": 1.3843830718994141, "loss": 0.12122529000043869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.682290495154465, "step_time": 1.1667658958435059} +{"epoch": 0, "iter": 1568, "iter_tflops": 27.291891190644005, "iter_time": 0.7559422454833984, "loss": 0.13271944224834442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.762576582116964, "step_time": 0.5934857406616211} +{"epoch": 0, "iter": 1569, "iter_tflops": 16.350422036261197, "iter_time": 0.9042721481323241, "loss": 0.3091094195842743, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 17.12421883957203, "step_time": 0.8634105529785157} +{"epoch": 0, "iter": 1570, "iter_tflops": 8.765176047002672, "iter_time": 1.6868150939941404, "loss": 0.36437755823135376, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 10.970716943664453, "step_time": 1.347699638366699} +{"epoch": 0, "iter": 1571, "iter_tflops": 9.374419580739627, "iter_time": 1.577188980102539, "loss": 0.381419837474823, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 11.482999277428608, "step_time": 1.2875757369995118} +{"epoch": 0, "iter": 1572, "iter_tflops": 23.49477916188527, "iter_time": 0.629298583984375, "loss": 0.3979615569114685, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 25.22237762205443, "step_time": 0.5861949844360352} +{"epoch": 0, "iter": 1573, "iter_tflops": 10.476916273873705, "iter_time": 1.286732666015625, "loss": 0.3432799279689789, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 11.091606582695247, "step_time": 1.2154226989746093} +{"epoch": 0, "iter": 1574, "iter_tflops": 11.496200121110599, "iter_time": 1.172647506713867, "loss": 0.2647237181663513, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 14.908674829446502, "step_time": 0.9042380065917968} +{"epoch": 0, "iter": 1575, "iter_tflops": 25.44330629645526, "iter_time": 0.5298442840576172, "loss": 0.37036630511283875, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 27.052914332820773, "step_time": 0.4983193397521972} +{"epoch": 0, "iter": 1576, "iter_tflops": 25.31155656069297, "iter_time": 0.5326021881103515, "loss": 0.19693122804164886, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 26.881976828659948, "step_time": 0.5014880599975586} +{"epoch": 0, "iter": 1577, "iter_tflops": 27.02380252117677, "iter_time": 0.7634415435791015, "loss": 0.14081813395023346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.69947551455512, "step_time": 0.7188665695190429} +{"epoch": 0, "iter": 1578, "iter_tflops": 17.60143336360547, "iter_time": 1.1721257629394533, "loss": 0.16371789574623108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.354420756159197, "step_time": 0.8137079410552979} +{"epoch": 0, "iter": 1579, "iter_tflops": 53.8513340847569, "iter_time": 0.38311202239990233, "loss": 0.1814889758825302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.59553379504067, "step_time": 0.3520932769775391} +{"epoch": 0, "iter": 1580, "iter_tflops": 47.72790745751644, "iter_time": 0.43226478195190426, "loss": 0.1629776656627655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.53593887284587, "step_time": 0.40032439422607413} +{"epoch": 0, "iter": 1581, "iter_tflops": 34.952044364073764, "iter_time": 0.5902685775756836, "loss": 0.00382256624288857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39266575064062, "step_time": 0.5517417144775391} +{"epoch": 0, "iter": 1582, "iter_tflops": 11.219007696350198, "iter_time": 1.8389410247802735, "loss": 0.01897738128900528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.00153618781666, "step_time": 1.4734878540039065} +{"epoch": 0, "iter": 1583, "iter_tflops": 16.212625024058777, "iter_time": 1.2725325775146485, "loss": 0.015924330800771713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.443674703455496, "step_time": 1.1185999450683595} +{"epoch": 0, "iter": 1584, "iter_tflops": 26.79419315287422, "iter_time": 0.769983757019043, "loss": 0.007600902579724789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.706396194534115, "step_time": 0.630796905517578} +{"epoch": 0, "iter": 1585, "iter_tflops": 21.138110179759483, "iter_time": 0.7265245513916015, "loss": 0.46459585428237915, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 22.410229410103405, "step_time": 0.685283302307129} +{"epoch": 0, "iter": 1586, "iter_tflops": 19.215978510222794, "iter_time": 0.7991971893310547, "loss": 0.3870704472064972, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.107670499324197, "step_time": 0.6645999221801757} +{"epoch": 0, "iter": 1587, "iter_tflops": 27.938289517185517, "iter_time": 0.5496884841918945, "loss": 0.3567793369293213, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.77054960749176, "step_time": 0.5158573226928711} +{"epoch": 0, "iter": 1588, "iter_tflops": 27.35699526450634, "iter_time": 0.5613685226440429, "loss": 0.43955954909324646, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.102335786294724, "step_time": 0.5277018356323242} +{"epoch": 0, "iter": 1589, "iter_tflops": 33.96227066607038, "iter_time": 0.6074709701538086, "loss": 0.7891072034835815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.524959516424886, "step_time": 0.5648491821289062} +{"epoch": 0, "iter": 1590, "iter_tflops": 20.49855261392676, "iter_time": 1.0064658660888672, "loss": 1.1316249370574951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.794354814069017, "step_time": 0.8320883388519287} +{"epoch": 0, "iter": 1591, "iter_tflops": 49.07593101072365, "iter_time": 0.4203912811279297, "loss": 0.9734009504318237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.08054465238826, "step_time": 0.38867524147033694} +{"epoch": 0, "iter": 1592, "iter_tflops": 49.38110856423422, "iter_time": 0.41779324340820306, "loss": 1.113845705986023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37398640833619, "step_time": 0.386538366317749} +{"epoch": 0, "iter": 1593, "iter_tflops": 24.570921065174478, "iter_time": 0.8396548690795897, "loss": 0.17818158864974976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.75139855562935, "step_time": 0.8011640014648438} +{"epoch": 0, "iter": 1594, "iter_tflops": 29.215505964897613, "iter_time": 0.706169303894043, "loss": 0.17837482690811157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.0995796261545, "step_time": 0.5561004657745361} +{"epoch": 0, "iter": 1595, "iter_tflops": 48.52550498307585, "iter_time": 0.4251597900390625, "loss": 0.16364113986492157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3412309124382, "step_time": 0.3941652336120606} +{"epoch": 0, "iter": 1596, "iter_tflops": 48.53306931316457, "iter_time": 0.42509352493286134, "loss": 0.2165839970111847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.676826043039824, "step_time": 0.39165407371520994} +{"epoch": 0, "iter": 1597, "iter_tflops": 39.27162909172632, "iter_time": 0.5253434600830077, "loss": 0.01888134703040123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56948481756241, "step_time": 0.4846451301574707} +{"epoch": 0, "iter": 1598, "iter_tflops": 50.99781369250728, "iter_time": 0.4045485877990722, "loss": 0.021961065009236336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.472113887369886, "step_time": 0.36533241081237794} +{"epoch": 0, "iter": 1599, "iter_tflops": 59.1650621177354, "iter_time": 0.3487039947509766, "loss": 0.009400904178619385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.07032322523969, "step_time": 0.3170584144592285} +{"epoch": 0, "iter": 1600, "iter_tflops": 55.56768104053409, "iter_time": 0.37127864837646485, "loss": 0.017165500670671463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.60881520208488, "step_time": 0.3403975715637207} +{"epoch": 0, "iter": 1601, "iter_tflops": 26.40443905768903, "iter_time": 0.7813494338989259, "loss": 0.7324042916297913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.641610229265176, "step_time": 0.7463781356811522} +{"epoch": 0, "iter": 1602, "iter_tflops": 12.974512990897665, "iter_time": 1.5901246948242185, "loss": 0.7360182404518127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.553752316449046, "step_time": 1.326438346862793} +{"epoch": 0, "iter": 1603, "iter_tflops": 38.44287034591485, "iter_time": 0.536668914794922, "loss": 0.7971367835998535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.088967814655724, "step_time": 0.4901781768798828} +{"epoch": 0, "iter": 1604, "iter_tflops": 38.76045558638685, "iter_time": 0.5322716979980469, "loss": 0.7376840710639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.2202924729929, "step_time": 0.4886534957885742} +{"epoch": 0, "iter": 1605, "iter_tflops": 37.902392861088295, "iter_time": 0.544321662902832, "loss": 0.13320037722587585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08057460829648, "step_time": 0.4902759456634521} +{"epoch": 0, "iter": 1606, "iter_tflops": 40.26225401227404, "iter_time": 0.5124177474975586, "loss": 0.16438844799995422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.291297555947594, "step_time": 0.4658046760559082} +{"epoch": 0, "iter": 1607, "iter_tflops": 41.55331639256501, "iter_time": 0.49649691772460935, "loss": 0.14181271195411682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61281996482808, "step_time": 0.4523090991973877} +{"epoch": 0, "iter": 1608, "iter_tflops": 37.634404641909065, "iter_time": 0.5481976852416992, "loss": 0.11144337803125381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.16217736268956, "step_time": 0.5012148246765137} +{"epoch": 0, "iter": 1609, "iter_tflops": 36.75769614380153, "iter_time": 0.5612727584838867, "loss": 0.15129324793815613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79418276749818, "step_time": 0.5057361640930176} +{"epoch": 0, "iter": 1610, "iter_tflops": 41.82570957408235, "iter_time": 0.4932634429931641, "loss": 0.11474670469760895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.210043167099116, "step_time": 0.4464634113311768} +{"epoch": 0, "iter": 1611, "iter_tflops": 39.6541529115709, "iter_time": 0.5202757339477538, "loss": 0.12261785566806793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48637271992322, "step_time": 0.47442663574218746} +{"epoch": 0, "iter": 1612, "iter_tflops": 43.12112354936943, "iter_time": 0.4784451751708984, "loss": 0.1663755178451538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12542622076928, "step_time": 0.43779112815856935} +{"epoch": 0, "iter": 1613, "iter_tflops": 19.150881397064296, "iter_time": 1.0772921142578127, "loss": 1.0640592575073242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.705120888770864, "step_time": 0.9964246826171876} +{"epoch": 0, "iter": 1614, "iter_tflops": 18.77751137053662, "iter_time": 1.098712875366211, "loss": 1.1416428089141846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.83177292427065, "step_time": 0.9036132926940916} +{"epoch": 0, "iter": 1615, "iter_tflops": 39.62839842025612, "iter_time": 0.5206138610839844, "loss": 0.9277834892272949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.18651613005717, "step_time": 0.4777207183837891} +{"epoch": 0, "iter": 1616, "iter_tflops": 39.675838787535206, "iter_time": 0.5199913635253905, "loss": 1.0313317775726318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09822252914863, "step_time": 0.478699405670166} +{"epoch": 0, "iter": 1617, "iter_tflops": 26.503374640740304, "iter_time": 0.7784327011108398, "loss": 0.18296806514263153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.026269062764737, "step_time": 0.7107731781005859} +{"epoch": 0, "iter": 1618, "iter_tflops": 49.181109621526566, "iter_time": 0.4194922332763672, "loss": 0.26265546679496765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.762924962551345, "step_time": 0.383742021560669} +{"epoch": 0, "iter": 1619, "iter_tflops": 54.47302852066978, "iter_time": 0.37873960876464846, "loss": 0.18879850208759308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.26552686638844, "step_time": 0.34811288452148437} +{"epoch": 0, "iter": 1620, "iter_tflops": 51.80788889487672, "iter_time": 0.3982230110168457, "loss": 0.11881498992443085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.279116225842486, "step_time": 0.36658524322509767} +{"epoch": 0, "iter": 1621, "iter_tflops": 41.758056937469256, "iter_time": 0.49406258392333985, "loss": 0.25088173151016235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2098024441151, "step_time": 0.45634115600585934} +{"epoch": 0, "iter": 1622, "iter_tflops": 19.306282919325888, "iter_time": 1.0686206970214844, "loss": 0.2693449556827545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.617375635699872, "step_time": 0.9543754920959473} +{"epoch": 0, "iter": 1623, "iter_tflops": 50.24496958846369, "iter_time": 0.4106101303100586, "loss": 0.3227267563343048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.567411804511615, "step_time": 0.3780845165252686} +{"epoch": 0, "iter": 1624, "iter_tflops": 46.81696538538014, "iter_time": 0.44067558288574216, "loss": 0.2682611644268036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.858776663762725, "step_time": 0.40565453720092765} +{"epoch": 0, "iter": 1625, "iter_tflops": 39.92528369315661, "iter_time": 0.5167425651550294, "loss": 0.30821412801742554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17927611799053, "step_time": 0.4778008193969726} +{"epoch": 0, "iter": 1626, "iter_tflops": 14.214052736242722, "iter_time": 1.4514575042724611, "loss": 0.5221889019012451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.34585069992956, "step_time": 1.1893964653015139} +{"epoch": 0, "iter": 1627, "iter_tflops": 42.128759191742304, "iter_time": 0.4897151947021484, "loss": 0.2887633442878723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.507317510931074, "step_time": 0.45335771560668947} +{"epoch": 0, "iter": 1628, "iter_tflops": 51.282062394993766, "iter_time": 0.4023062362670899, "loss": 0.42497146129608154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.86929588579365, "step_time": 0.3692742710113526} +{"epoch": 0, "iter": 1629, "iter_tflops": 36.66020769827854, "iter_time": 0.5627653198242187, "loss": 0.6609542369842529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.364880393927265, "step_time": 0.5240989761352539} +{"epoch": 0, "iter": 1630, "iter_tflops": 11.488941215572664, "iter_time": 1.7957349700927734, "loss": 0.7648812532424927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.945442629972618, "step_time": 1.4794147491455076} +{"epoch": 0, "iter": 1631, "iter_tflops": 44.603422777987475, "iter_time": 0.4625450744628906, "loss": 0.8457709550857544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41914135127307, "step_time": 0.42609375} +{"epoch": 0, "iter": 1632, "iter_tflops": 46.698885024405605, "iter_time": 0.441789852142334, "loss": 0.8686158657073975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04483600678979, "step_time": 0.4122521953582764} +{"epoch": 0, "iter": 1633, "iter_tflops": 43.78764137463873, "iter_time": 0.47116247558593755, "loss": 0.22963550686836243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98900386147454, "step_time": 0.429912935256958} +{"epoch": 0, "iter": 1634, "iter_tflops": 49.49361456017415, "iter_time": 0.41684354019165043, "loss": 0.05421111732721329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.01108706371602, "step_time": 0.3819788608551025} +{"epoch": 0, "iter": 1635, "iter_tflops": 51.692399765168844, "iter_time": 0.39911270523071296, "loss": 0.09011660516262054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.165270912709886, "step_time": 0.36732830047607423} +{"epoch": 0, "iter": 1636, "iter_tflops": 53.35307008600834, "iter_time": 0.3866899032592774, "loss": 0.1114121824502945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.794689181968046, "step_time": 0.35697213363647456} +{"epoch": 0, "iter": 1637, "iter_tflops": 22.552863604503862, "iter_time": 0.9147882003784179, "loss": 1.1263433694839478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.714056347286178, "step_time": 0.8699942855834961} +{"epoch": 0, "iter": 1638, "iter_tflops": 16.193550246473624, "iter_time": 1.274031524658203, "loss": 0.9080237150192261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.53774323711521, "step_time": 1.0045453033447267} +{"epoch": 0, "iter": 1639, "iter_tflops": 46.02547381860096, "iter_time": 0.4482537994384766, "loss": 0.9904206395149231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.90551222732741, "step_time": 0.41340310096740723} +{"epoch": 0, "iter": 1640, "iter_tflops": 44.91466168719409, "iter_time": 0.45933983993530275, "loss": 0.9633359909057617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82445987837286, "step_time": 0.43139208602905277} +{"epoch": 0, "iter": 1641, "iter_tflops": 25.123420279374947, "iter_time": 0.8211896820068361, "loss": 0.9015388488769531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.34579727755347, "step_time": 0.7830886001586914} +{"epoch": 0, "iter": 1642, "iter_tflops": 14.516538878311431, "iter_time": 1.4212129821777342, "loss": 0.9183894395828247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.767260154386005, "step_time": 1.0437002067565917} +{"epoch": 0, "iter": 1643, "iter_tflops": 40.69732931490249, "iter_time": 0.5069397392272948, "loss": 0.8621050119400024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.28648130471676, "step_time": 0.4658553333282471} +{"epoch": 0, "iter": 1644, "iter_tflops": 38.55088112693117, "iter_time": 0.5351652908325195, "loss": 0.9814185500144958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.691311524238344, "step_time": 0.4948535499572754} +{"epoch": 0, "iter": 1645, "iter_tflops": 30.467657618219942, "iter_time": 0.6771473464965821, "loss": 0.9430595636367798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.486913608604986, "step_time": 0.6160942077636719} +{"epoch": 0, "iter": 1646, "iter_tflops": 37.03387477033868, "iter_time": 0.5570870895385742, "loss": 0.9861649870872498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23206810921552, "step_time": 0.5128022117614747} +{"epoch": 0, "iter": 1647, "iter_tflops": 37.027943688261026, "iter_time": 0.5571763229370117, "loss": 1.1006823778152466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0748167825706, "step_time": 0.5148144187927246} +{"epoch": 0, "iter": 1648, "iter_tflops": 44.65501121299466, "iter_time": 0.46201071166992197, "loss": 1.1831529140472412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58407386870063, "step_time": 0.43357139968872066} +{"epoch": 0, "iter": 1649, "iter_tflops": 31.425390159658402, "iter_time": 0.6565103378295899, "loss": 0.34877151250839233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.575666616344556, "step_time": 0.6144656410217285} +{"epoch": 0, "iter": 1650, "iter_tflops": 18.138105117388815, "iter_time": 1.137444808959961, "loss": 0.3178336024284363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.516950680537633, "step_time": 0.9588297996520997} +{"epoch": 0, "iter": 1651, "iter_tflops": 51.75653780185247, "iter_time": 0.3986181144714356, "loss": 0.2523888349533081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.33042275478454, "step_time": 0.36625135231018063} +{"epoch": 0, "iter": 1652, "iter_tflops": 51.49127300915212, "iter_time": 0.40067165374755864, "loss": 0.39804989099502563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.74578943249557, "step_time": 0.3700924091339111} +{"epoch": 0, "iter": 1653, "iter_tflops": 36.81757444452594, "iter_time": 0.5603599319458008, "loss": 1.0064122676849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67990771208727, "step_time": 0.5199380416870117} +{"epoch": 0, "iter": 1654, "iter_tflops": 20.925269056445423, "iter_time": 0.9859416122436523, "loss": 1.1248693466186523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.456310166391333, "step_time": 0.8795540885925293} +{"epoch": 0, "iter": 1655, "iter_tflops": 38.10014099974955, "iter_time": 0.5414965133666992, "loss": 0.9496693015098572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12130657597774, "step_time": 0.5017129859924316} +{"epoch": 0, "iter": 1656, "iter_tflops": 40.55269887847543, "iter_time": 0.508747730255127, "loss": 1.2487479448318481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76011424670693, "step_time": 0.47145885848999025} +{"epoch": 0, "iter": 1657, "iter_tflops": 20.934776665568936, "iter_time": 0.9854938430786133, "loss": 0.8497275710105896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67428207752318, "step_time": 0.9098896026611327} +{"epoch": 0, "iter": 1658, "iter_tflops": 19.332240600820466, "iter_time": 1.0671858444213869, "loss": 0.8218997716903687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.62894694518755, "step_time": 0.8049918537139893} +{"epoch": 0, "iter": 1659, "iter_tflops": 44.179534906680466, "iter_time": 0.466983039855957, "loss": 1.0062141418457031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89858785813792, "step_time": 0.4307244625091553} +{"epoch": 0, "iter": 1660, "iter_tflops": 42.657543208385675, "iter_time": 0.4836446723937988, "loss": 0.919898509979248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.736810883078135, "step_time": 0.4510829048156738} +{"epoch": 0, "iter": 1661, "iter_tflops": 21.369617662141035, "iter_time": 0.9654404602050782, "loss": 0.15212656557559967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.35349685231054, "step_time": 0.9229470291137695} +{"epoch": 0, "iter": 1662, "iter_tflops": 14.61840134306679, "iter_time": 1.4113098297119142, "loss": 0.21686041355133057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.74910220140043, "step_time": 1.1623739204406738} +{"epoch": 0, "iter": 1663, "iter_tflops": 41.14601870523961, "iter_time": 0.5014116592407227, "loss": 0.19185477495193481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.008412468466034, "step_time": 0.45838305282592773} +{"epoch": 0, "iter": 1664, "iter_tflops": 39.32111953805275, "iter_time": 0.5246822509765625, "loss": 0.12287997454404831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87341637038723, "step_time": 0.4812094593048095} +{"epoch": 0, "iter": 1665, "iter_tflops": 24.337731810258692, "iter_time": 0.8476999282836915, "loss": 0.38630545139312744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.124622056303636, "step_time": 0.7897183532714844} +{"epoch": 0, "iter": 1666, "iter_tflops": 9.013759177800324, "iter_time": 2.2888445434570315, "loss": 0.6151866316795349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.195158333343088, "step_time": 2.02361678314209} +{"epoch": 0, "iter": 1667, "iter_tflops": 12.369364775896663, "iter_time": 1.6679185943603516, "loss": 0.6032046675682068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.604047766409806, "step_time": 1.5165408020019533} +{"epoch": 0, "iter": 1668, "iter_tflops": 40.85988770841091, "iter_time": 0.5049229125976562, "loss": 0.47667068243026733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89481114402288, "step_time": 0.45954294013977054} +{"epoch": 0, "iter": 1669, "iter_tflops": 13.678047219641975, "iter_time": 1.1616748046875, "loss": 0.27345335483551025, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 14.539408320298222, "step_time": 1.092853469848633} +{"epoch": 0, "iter": 1670, "iter_tflops": 11.541121412375484, "iter_time": 1.3767676696777342, "loss": 0.25951048731803894, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 13.421976301176395, "step_time": 1.1838377952575683} +{"epoch": 0, "iter": 1671, "iter_tflops": 22.818179380103423, "iter_time": 0.6963501586914064, "loss": 0.3813236355781555, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 24.5055597394638, "step_time": 0.6484015464782715} +{"epoch": 0, "iter": 1672, "iter_tflops": 25.303092430877413, "iter_time": 0.6279644622802736, "loss": 0.3234645426273346, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.1730370236942, "step_time": 0.5847503471374511} +{"epoch": 0, "iter": 1673, "iter_tflops": 21.191209851560096, "iter_time": 0.9735684585571289, "loss": 0.6431958675384521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.60631436359875, "step_time": 0.9126252593994141} +{"epoch": 0, "iter": 1674, "iter_tflops": 23.679780314286813, "iter_time": 0.8712535858154297, "loss": 0.6185286641120911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.517776083668902, "step_time": 0.7780099449157716} +{"epoch": 0, "iter": 1675, "iter_tflops": 47.19966769057979, "iter_time": 0.4371025161743164, "loss": 0.4407927393913269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60998657162899, "step_time": 0.39975002670288085} +{"epoch": 0, "iter": 1676, "iter_tflops": 47.30626716258818, "iter_time": 0.4361175537109375, "loss": 0.5884746313095093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08829949002824, "step_time": 0.40383206558227536} +{"epoch": 0, "iter": 1677, "iter_tflops": 42.82497176095762, "iter_time": 0.48175381469726564, "loss": 0.7281729578971863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.68545655048156, "step_time": 0.4419169273376464} +{"epoch": 0, "iter": 1678, "iter_tflops": 47.34973923427857, "iter_time": 0.4357171516418457, "loss": 0.6048046946525574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44011384949676, "step_time": 0.4010701370239258} +{"epoch": 0, "iter": 1679, "iter_tflops": 51.303863413558844, "iter_time": 0.4021352806091309, "loss": 0.8166955709457397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.53924105545098, "step_time": 0.3714687690734863} +{"epoch": 0, "iter": 1680, "iter_tflops": 46.5221173375089, "iter_time": 0.44346849822998047, "loss": 0.566950798034668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.481626728751365, "step_time": 0.4086851959228515} +{"epoch": 0, "iter": 1681, "iter_tflops": 29.17290129163851, "iter_time": 0.7072006072998046, "loss": 0.6717652082443237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.951487238552705, "step_time": 0.6665622673034668} +{"epoch": 0, "iter": 1682, "iter_tflops": 19.999929209742263, "iter_time": 1.0315583267211914, "loss": 0.543655276298523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.338124888048345, "step_time": 0.847686237335205} +{"epoch": 0, "iter": 1683, "iter_tflops": 37.538188090539784, "iter_time": 0.5496028060913086, "loss": 0.6736454963684082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99523212514412, "step_time": 0.5032559261322022} +{"epoch": 0, "iter": 1684, "iter_tflops": 38.48141562090134, "iter_time": 0.5361313552856446, "loss": 0.668638288974762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59169374888527, "step_time": 0.4960387916564941} +{"epoch": 0, "iter": 1685, "iter_tflops": 30.425911532375192, "iter_time": 0.6780764312744141, "loss": 0.7296680808067322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.516703046845805, "step_time": 0.6155466270446778} +{"epoch": 0, "iter": 1686, "iter_tflops": 41.07134838834332, "iter_time": 0.502323257446289, "loss": 0.9149150848388672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.442915482684974, "step_time": 0.45400021743774416} +{"epoch": 0, "iter": 1687, "iter_tflops": 41.94775658539728, "iter_time": 0.49182829284667967, "loss": 0.7999609112739563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.877661535513106, "step_time": 0.44969801902770995} +{"epoch": 0, "iter": 1688, "iter_tflops": 43.39108496886468, "iter_time": 0.47546848678588866, "loss": 0.8466961979866028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.089582081612576, "step_time": 0.4381243705749512} +{"epoch": 0, "iter": 1689, "iter_tflops": 27.978734311595503, "iter_time": 0.7373848037719727, "loss": 0.7446374893188477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.513719019585114, "step_time": 0.676125171661377} +{"epoch": 0, "iter": 1690, "iter_tflops": 22.130061414858712, "iter_time": 0.9322655334472657, "loss": 0.849975049495697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.01222911711403, "step_time": 0.7637686405181885} +{"epoch": 0, "iter": 1691, "iter_tflops": 45.783017208411366, "iter_time": 0.45062765121459963, "loss": 0.7936657071113586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.414452561436846, "step_time": 0.41751132392883294} +{"epoch": 0, "iter": 1692, "iter_tflops": 50.56059346130394, "iter_time": 0.4080469017028809, "loss": 0.9456121921539307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.598354287577486, "step_time": 0.37787024497985844} +{"epoch": 0, "iter": 1693, "iter_tflops": 21.81417799587257, "iter_time": 0.7735268325805664, "loss": 0.25059637427330017, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 22.98397041784428, "step_time": 0.7341574020385742} +{"epoch": 0, "iter": 1694, "iter_tflops": 17.756856943242376, "iter_time": 0.9502724533081055, "loss": 0.3241688907146454, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 22.03609555562351, "step_time": 0.7657369232177734} +{"epoch": 0, "iter": 1695, "iter_tflops": 30.494170186922293, "iter_time": 0.5533468170166016, "loss": 0.3707049787044525, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.43549745383706, "step_time": 0.5202279396057129} +{"epoch": 0, "iter": 1696, "iter_tflops": 30.563663827454615, "iter_time": 0.5520886535644531, "loss": 0.3363450765609741, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.271460027466276, "step_time": 0.5228722839355469} +{"epoch": 0, "iter": 1697, "iter_tflops": 9.69810601978179, "iter_time": 0.7580687561035157, "loss": 0.011932307854294777, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 10.239193944539446, "step_time": 0.7180087814331054} +{"epoch": 0, "iter": 1698, "iter_tflops": 5.906708179678076, "iter_time": 1.244657928466797, "loss": 0.001451522228308022, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 7.193345743806069, "step_time": 1.0220322265625} +{"epoch": 0, "iter": 1699, "iter_tflops": 19.065395489910607, "iter_time": 0.3856112594604493, "loss": 0.018910730257630348, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 21.064511775059064, "step_time": 0.349015027999878} +{"epoch": 0, "iter": 1700, "iter_tflops": 20.901049229055534, "iter_time": 0.3517445983886719, "loss": 0.003185862209647894, "lr": 3e-05, "seqlen": 2976.0, "step_tflops": 22.816340884249787, "step_time": 0.3222178001403809} +{"epoch": 0, "iter": 1701, "iter_tflops": 32.68054696147538, "iter_time": 0.6312958450317383, "loss": 0.26937904953956604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.77184222080684, "step_time": 0.5933275947570801} +{"epoch": 0, "iter": 1702, "iter_tflops": 18.66198782896825, "iter_time": 1.1055142517089844, "loss": 0.2566303014755249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.129277913449943, "step_time": 0.9764220809936524} +{"epoch": 0, "iter": 1703, "iter_tflops": 44.03860445327438, "iter_time": 0.4684774589538574, "loss": 0.29568126797676086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0578676706806, "step_time": 0.4292968978881836} +{"epoch": 0, "iter": 1704, "iter_tflops": 44.74810806473433, "iter_time": 0.46104951477050776, "loss": 0.25971153378486633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96861362274686, "step_time": 0.4213125915527343} +{"epoch": 0, "iter": 1705, "iter_tflops": 14.80417775491896, "iter_time": 1.3655806579589846, "loss": 0.030423276126384735, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 15.730200857857323, "step_time": 1.2851901245117188} +{"epoch": 0, "iter": 1706, "iter_tflops": 15.727150329709005, "iter_time": 1.2854394073486328, "loss": 0.05673103407025337, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 20.441421391526475, "step_time": 0.9889869403839111} +{"epoch": 0, "iter": 1707, "iter_tflops": 41.42131599218021, "iter_time": 0.48806510162353517, "loss": 0.03605777025222778, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 45.78927242989976, "step_time": 0.44150731658935544} +{"epoch": 0, "iter": 1708, "iter_tflops": 44.718116265385945, "iter_time": 0.45208296966552736, "loss": 0.048496317118406296, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 49.161076111232255, "step_time": 0.41122571754455567} +{"epoch": 0, "iter": 1709, "iter_tflops": 18.74504393598197, "iter_time": 1.1006159057617189, "loss": 0.3359827697277069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.027963651657597, "step_time": 1.030114387512207} +{"epoch": 0, "iter": 1710, "iter_tflops": 14.73741735247368, "iter_time": 1.399912414550781, "loss": 0.29389718174934387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.05190311230485, "step_time": 1.0828888530731202} +{"epoch": 0, "iter": 1711, "iter_tflops": 45.96343980262165, "iter_time": 0.44885877990722656, "loss": 0.36255180835723877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.84777690254728, "step_time": 0.4138819179534912} +{"epoch": 0, "iter": 1712, "iter_tflops": 47.79128227817138, "iter_time": 0.4316915664672852, "loss": 0.33670443296432495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.69728586893401, "step_time": 0.3990749835968017} +{"epoch": 0, "iter": 1713, "iter_tflops": 34.421873793694495, "iter_time": 0.599359977722168, "loss": 0.28708016872406006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.7816143711262, "step_time": 0.5609077758789062} +{"epoch": 0, "iter": 1714, "iter_tflops": 45.49075565361431, "iter_time": 0.4535227699279785, "loss": 0.20162545144557953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.237436002042124, "step_time": 0.4106717052459717} +{"epoch": 0, "iter": 1715, "iter_tflops": 51.387051400864785, "iter_time": 0.40148428344726567, "loss": 0.28043878078460693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.90348747749769, "step_time": 0.36904841613769535} +{"epoch": 0, "iter": 1716, "iter_tflops": 51.84415534898089, "iter_time": 0.3979444427490235, "loss": 0.33880168199539185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.1275804782959, "step_time": 0.36757496643066406} +{"epoch": 0, "iter": 1717, "iter_tflops": 32.66886839858445, "iter_time": 0.6315215225219726, "loss": 0.09773743897676468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.956606660914915, "step_time": 0.5901915397644044} +{"epoch": 0, "iter": 1718, "iter_tflops": 11.332804563975586, "iter_time": 1.820475540161133, "loss": 0.0936933159828186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.97821963348297, "step_time": 1.5896705474853514} +{"epoch": 0, "iter": 1719, "iter_tflops": 10.982478526682973, "iter_time": 1.8785462188720705, "loss": 0.12519368529319763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.296451099343104, "step_time": 1.55162406539917} +{"epoch": 0, "iter": 1720, "iter_tflops": 22.645222357143354, "iter_time": 0.9110572280883789, "loss": 0.14548815786838531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.73540830179489, "step_time": 0.743853967666626} +{"epoch": 0, "iter": 1721, "iter_tflops": 17.624191904282146, "iter_time": 1.0017539291381836, "loss": 0.18613873422145844, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 18.342189723486253, "step_time": 0.9625406646728516} +{"epoch": 0, "iter": 1722, "iter_tflops": 10.202758985271425, "iter_time": 1.7304244384765624, "loss": 0.41245293617248535, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 11.93453528085434, "step_time": 1.479328945159912} +{"epoch": 0, "iter": 1723, "iter_tflops": 30.706529101165536, "iter_time": 0.5749625244140624, "loss": 0.3002760112285614, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 32.794925605479044, "step_time": 0.5383486366271972} +{"epoch": 0, "iter": 1724, "iter_tflops": 32.30386766273474, "iter_time": 0.5465321884155274, "loss": 0.2409285306930542, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 34.36330818094326, "step_time": 0.5137777595520019} +{"epoch": 0, "iter": 1725, "iter_tflops": 49.34263894569135, "iter_time": 0.41811897277832033, "loss": 0.08993584662675858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.53774725807162, "step_time": 0.3782901668548584} +{"epoch": 0, "iter": 1726, "iter_tflops": 37.41643871367944, "iter_time": 0.5513911590576173, "loss": 0.10654287785291672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27801568686398, "step_time": 0.4998082675933838} +{"epoch": 0, "iter": 1727, "iter_tflops": 45.06606258846448, "iter_time": 0.4577966728210449, "loss": 0.08412092179059982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73894542888477, "step_time": 0.4147875137329101} +{"epoch": 0, "iter": 1728, "iter_tflops": 43.847360801783594, "iter_time": 0.47052075958251954, "loss": 0.11709924787282944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.03417595479644, "step_time": 0.4295086383819581} +{"epoch": 0, "iter": 1729, "iter_tflops": 17.891196393632256, "iter_time": 1.1531421966552735, "loss": 0.1769404113292694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.43446313390059, "step_time": 1.061572597503662} +{"epoch": 0, "iter": 1730, "iter_tflops": 22.84622107796403, "iter_time": 0.9030418395996094, "loss": 0.2486359030008316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.49082666818719, "step_time": 0.6551461391448974} +{"epoch": 0, "iter": 1731, "iter_tflops": 51.29432248821963, "iter_time": 0.40221007919311524, "loss": 0.20037148892879486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.441816227624784, "step_time": 0.3721215305328369} +{"epoch": 0, "iter": 1732, "iter_tflops": 50.831938102543546, "iter_time": 0.40586871719360357, "loss": 0.16253887116909027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.72571357323808, "step_time": 0.3769908542633057} +{"epoch": 0, "iter": 1733, "iter_tflops": 19.131251392570046, "iter_time": 1.0783974914550782, "loss": 1.1358790397644043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.918336728519996, "step_time": 1.035783950805664} +{"epoch": 0, "iter": 1734, "iter_tflops": 13.124067069295606, "iter_time": 1.5720045776367189, "loss": 0.9328632354736328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.709092084302732, "step_time": 1.2347225933074952} +{"epoch": 0, "iter": 1735, "iter_tflops": 35.73337521753062, "iter_time": 0.5773620147705077, "loss": 0.857074499130249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.90270607918658, "step_time": 0.5303254089355469} +{"epoch": 0, "iter": 1736, "iter_tflops": 37.69809806358292, "iter_time": 0.5472714691162109, "loss": 0.9591957330703735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50611712920088, "step_time": 0.5093327865600586} +{"epoch": 0, "iter": 1737, "iter_tflops": 15.881599385249, "iter_time": 1.2990564117431642, "loss": 0.014924182556569576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.813796296605588, "step_time": 1.2270336303710938} +{"epoch": 0, "iter": 1738, "iter_tflops": 26.32729986002171, "iter_time": 0.7836387939453126, "loss": 0.026336025446653366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.209876517203696, "step_time": 0.6030741882324219} +{"epoch": 0, "iter": 1739, "iter_tflops": 46.19617318874727, "iter_time": 0.4465974578857422, "loss": 0.009770111180841923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.76183335973371, "step_time": 0.4064292430877685} +{"epoch": 0, "iter": 1740, "iter_tflops": 44.43803329236496, "iter_time": 0.4642665748596192, "loss": 0.008553760126233101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.20244498623771, "step_time": 0.4193103313446045} +{"epoch": 0, "iter": 1741, "iter_tflops": 22.72065306282641, "iter_time": 0.9080325927734375, "loss": 0.31307452917099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.0003903789572, "step_time": 0.8596149139404297} +{"epoch": 0, "iter": 1742, "iter_tflops": 34.2871449943325, "iter_time": 0.6017151184082031, "loss": 0.3193124830722809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.674258965419746, "step_time": 0.5334580173492431} +{"epoch": 0, "iter": 1743, "iter_tflops": 42.76332315128244, "iter_time": 0.4824483222961426, "loss": 0.304821640253067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.42892369650884, "step_time": 0.44435864257812496} +{"epoch": 0, "iter": 1744, "iter_tflops": 40.89639591657714, "iter_time": 0.50447216796875, "loss": 0.35665184259414673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.56452978983925, "step_time": 0.4629487533569336} +{"epoch": 0, "iter": 1745, "iter_tflops": 31.47821202376409, "iter_time": 0.6554086837768555, "loss": 1.217057704925537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.63714026167112, "step_time": 0.5956350135803223} +{"epoch": 0, "iter": 1746, "iter_tflops": 45.727649395868966, "iter_time": 0.4511732788085937, "loss": 1.037817120552063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50666276247756, "step_time": 0.41673367500305175} +{"epoch": 0, "iter": 1747, "iter_tflops": 45.919872649141666, "iter_time": 0.44928464126586914, "loss": 1.1730049848556519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.26349164323641, "step_time": 0.4187907276153565} +{"epoch": 0, "iter": 1748, "iter_tflops": 44.13075188417943, "iter_time": 0.4674992523193359, "loss": 1.049476981163025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48507989546179, "step_time": 0.4344752826690674} +{"epoch": 0, "iter": 1749, "iter_tflops": 28.5977554846049, "iter_time": 0.7214235229492187, "loss": 0.15728871524333954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.23622181132937, "step_time": 0.6823304061889649} +{"epoch": 0, "iter": 1750, "iter_tflops": 13.304138343749074, "iter_time": 1.5507275238037113, "loss": 0.07132632285356522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.135392153161686, "step_time": 1.2786236190795899} +{"epoch": 0, "iter": 1751, "iter_tflops": 40.59530829935651, "iter_time": 0.5082137413024903, "loss": 0.10186196863651276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.66774194706545, "step_time": 0.45176513290405274} +{"epoch": 0, "iter": 1752, "iter_tflops": 45.039955558992126, "iter_time": 0.45806203079223634, "loss": 0.14458198845386505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.362866225024156, "step_time": 0.4179476413726807} +{"epoch": 0, "iter": 1753, "iter_tflops": 21.21408828840012, "iter_time": 0.9725185089111328, "loss": 0.6329132914543152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.25821032709726, "step_time": 0.8870456161499023} +{"epoch": 0, "iter": 1754, "iter_tflops": 24.410188133026235, "iter_time": 0.8451837158203125, "loss": 0.9003653526306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.14861599501478, "step_time": 0.6223817462921143} +{"epoch": 0, "iter": 1755, "iter_tflops": 39.682247328566284, "iter_time": 0.5199073867797852, "loss": 0.6506698727607727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2220754306602, "step_time": 0.4773276920318604} +{"epoch": 0, "iter": 1756, "iter_tflops": 40.317065999921496, "iter_time": 0.5117211036682129, "loss": 0.7885779738426208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.591039081386874, "step_time": 0.47328749084472654} +{"epoch": 0, "iter": 1757, "iter_tflops": 18.18675637790139, "iter_time": 1.1344020385742186, "loss": 0.034181464463472366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.455614695885835, "step_time": 1.060418487548828} +{"epoch": 0, "iter": 1758, "iter_tflops": 21.172254672569984, "iter_time": 0.9744400787353515, "loss": 0.048202939331531525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.670340143317436, "step_time": 0.8036938114166261} +{"epoch": 0, "iter": 1759, "iter_tflops": 47.87900298821802, "iter_time": 0.4309006500244141, "loss": 0.03992154076695442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72045492512278, "step_time": 0.3913299598693848} +{"epoch": 0, "iter": 1760, "iter_tflops": 49.45626699523397, "iter_time": 0.41715832519531243, "loss": 0.0560850165784359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2860573247018, "step_time": 0.38004405784606937} +{"epoch": 0, "iter": 1761, "iter_tflops": 35.2140551541079, "iter_time": 0.572920249938965, "loss": 0.2688256800174713, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 38.97094702571028, "step_time": 0.5176893768310546} +{"epoch": 0, "iter": 1762, "iter_tflops": 39.51392545527316, "iter_time": 0.5105755767822265, "loss": 0.29627686738967896, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 43.8839355319035, "step_time": 0.4597319049835205} +{"epoch": 0, "iter": 1763, "iter_tflops": 41.32010846490576, "iter_time": 0.48825731658935545, "loss": 0.28571051359176636, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 45.24921065018747, "step_time": 0.44586071205139155} +{"epoch": 0, "iter": 1764, "iter_tflops": 36.30364400720272, "iter_time": 0.5557250747680664, "loss": 0.18618130683898926, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 39.68661849149168, "step_time": 0.5083538494110108} +{"epoch": 0, "iter": 1765, "iter_tflops": 37.90485803356336, "iter_time": 0.5442862625122071, "loss": 0.009331216104328632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.691334021373606, "step_time": 0.49485328292846686} +{"epoch": 0, "iter": 1766, "iter_tflops": 37.949561909127546, "iter_time": 0.5436451034545899, "loss": 0.0018478920683264732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77883465667793, "step_time": 0.4318040332794189} +{"epoch": 0, "iter": 1767, "iter_tflops": 59.895837163797204, "iter_time": 0.34444953918457033, "loss": 0.014374828897416592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.58511036055077, "step_time": 0.31456977653503415} +{"epoch": 0, "iter": 1768, "iter_tflops": 58.22609116642841, "iter_time": 0.3543272972106934, "loss": 0.0038315930869430304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.56348254018351, "step_time": 0.32457462501525874} +{"epoch": 0, "iter": 1769, "iter_tflops": 44.290500305983635, "iter_time": 0.465813060760498, "loss": 1.0543874502182007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24265330942612, "step_time": 0.4276525459289551} +{"epoch": 0, "iter": 1770, "iter_tflops": 48.331939735066356, "iter_time": 0.42686251831054683, "loss": 0.8557901382446289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.49653070151148, "step_time": 0.3929991798400879} +{"epoch": 0, "iter": 1771, "iter_tflops": 46.58152839102167, "iter_time": 0.44290288925170895, "loss": 1.0219439268112183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91536714968863, "step_time": 0.41332148170471195} +{"epoch": 0, "iter": 1772, "iter_tflops": 46.69986811535402, "iter_time": 0.4417805519104004, "loss": 0.9447750449180603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.07955243362843, "step_time": 0.4119664115905762} +{"epoch": 0, "iter": 1773, "iter_tflops": 43.36909443502516, "iter_time": 0.4757095756530762, "loss": 0.10649773478507996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.39185829561101, "step_time": 0.43532991218566897} +{"epoch": 0, "iter": 1774, "iter_tflops": 12.297089080395494, "iter_time": 1.6777217254638672, "loss": 0.12760838866233826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.70519331314701, "step_time": 1.402980094909668} +{"epoch": 0, "iter": 1775, "iter_tflops": 9.004497377812463, "iter_time": 2.2911987915039065, "loss": 0.0978332906961441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.433374064554634, "step_time": 1.9774133834838865} +{"epoch": 0, "iter": 1776, "iter_tflops": 19.544596348711192, "iter_time": 1.0555906677246094, "loss": 0.11184864491224289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.69007881452939, "step_time": 0.7729873580932618} +{"epoch": 0, "iter": 1777, "iter_tflops": 16.849644494282956, "iter_time": 0.8484199600219726, "loss": 0.4106906056404114, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 18.025402168881687, "step_time": 0.7930793762207031} +{"epoch": 0, "iter": 1778, "iter_tflops": 22.28592375364164, "iter_time": 0.6414620666503906, "loss": 0.1846376359462738, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.03930687899644, "step_time": 0.5946749954223634} +{"epoch": 0, "iter": 1779, "iter_tflops": 20.500714581018972, "iter_time": 0.69732080078125, "loss": 0.37068817019462585, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.077201616596298, "step_time": 0.6475265731811524} +{"epoch": 0, "iter": 1780, "iter_tflops": 22.6201542204906, "iter_time": 0.6319839630126953, "loss": 0.4076663553714752, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.281642498101863, "step_time": 0.5887400207519531} +{"epoch": 0, "iter": 1781, "iter_tflops": 22.206179160104636, "iter_time": 0.9290699386596679, "loss": 0.9337031245231628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.859001931182835, "step_time": 0.8647089920043944} +{"epoch": 0, "iter": 1782, "iter_tflops": 8.903240716402776, "iter_time": 2.3172566223144533, "loss": 1.1204229593276978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.569886916983366, "step_time": 1.9518745727539062} +{"epoch": 0, "iter": 1783, "iter_tflops": 12.136972819675659, "iter_time": 1.699854965209961, "loss": 1.0087928771972656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.569243376129485, "step_time": 1.520430648803711} +{"epoch": 0, "iter": 1784, "iter_tflops": 46.43013820030666, "iter_time": 0.44434701919555664, "loss": 1.0331480503082275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80091902677849, "step_time": 0.4061165409088135} +{"epoch": 0, "iter": 1785, "iter_tflops": 22.934984250151285, "iter_time": 0.6517816162109374, "loss": 0.3318060338497162, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.477563827757443, "step_time": 0.6107062454223633} +{"epoch": 0, "iter": 1786, "iter_tflops": 14.732187923319358, "iter_time": 1.0146898193359375, "loss": 0.42637622356414795, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.454286494236847, "step_time": 0.8564429779052735} +{"epoch": 0, "iter": 1787, "iter_tflops": 21.78073117936143, "iter_time": 0.6863222808837891, "loss": 0.2800474464893341, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.288658538557637, "step_time": 0.6418833045959472} +{"epoch": 0, "iter": 1788, "iter_tflops": 23.945766794001674, "iter_time": 0.6242690505981445, "loss": 0.3748069405555725, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.640722541407147, "step_time": 0.5830023345947266} +{"epoch": 0, "iter": 1789, "iter_tflops": 21.488331400820833, "iter_time": 0.960106819152832, "loss": 0.41318461298942566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.87992157183888, "step_time": 0.9017117233276367} +{"epoch": 0, "iter": 1790, "iter_tflops": 13.004493840033682, "iter_time": 1.5864587860107422, "loss": 0.5669222474098206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.681524017838992, "step_time": 1.3156306419372559} +{"epoch": 0, "iter": 1791, "iter_tflops": 40.693287888765155, "iter_time": 0.5069900856018067, "loss": 0.4958311915397644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.662951935096636, "step_time": 0.4619285697937012} +{"epoch": 0, "iter": 1792, "iter_tflops": 43.54794198587938, "iter_time": 0.4737558784484863, "loss": 0.6000012159347534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.333219320091914, "step_time": 0.4358692226409912} +{"epoch": 0, "iter": 1793, "iter_tflops": 18.331819410242733, "iter_time": 1.125425308227539, "loss": 0.9065881967544556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.866010728813176, "step_time": 1.038512149810791} +{"epoch": 0, "iter": 1794, "iter_tflops": 14.187014509198791, "iter_time": 1.4542237548828125, "loss": 1.1616615056991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.041420510886713, "step_time": 1.21064400100708} +{"epoch": 0, "iter": 1795, "iter_tflops": 35.16390448423217, "iter_time": 0.5867122497558593, "loss": 1.0123311281204224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.24075903850393, "step_time": 0.539505334854126} +{"epoch": 0, "iter": 1796, "iter_tflops": 36.627200265325676, "iter_time": 0.5632724685668945, "loss": 1.0332428216934204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.83896718178252, "step_time": 0.5178621578216553} +{"epoch": 0, "iter": 1797, "iter_tflops": 24.718788755377137, "iter_time": 0.8346320571899413, "loss": 0.6228204369544983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.72282830467855, "step_time": 0.7720400428771973} +{"epoch": 0, "iter": 1798, "iter_tflops": 15.929114513972952, "iter_time": 1.2951814422607422, "loss": 0.6327173709869385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.7799634579705, "step_time": 0.9928358898162841} +{"epoch": 0, "iter": 1799, "iter_tflops": 38.419821381069404, "iter_time": 0.5369908752441407, "loss": 0.5644004344940186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.737813787725194, "step_time": 0.4943022079467773} +{"epoch": 0, "iter": 1800, "iter_tflops": 40.52622111158378, "iter_time": 0.5090801200866699, "loss": 0.5894204378128052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55131060318644, "step_time": 0.46308611869812005} +{"epoch": 0, "iter": 1801, "iter_tflops": 19.955153193093903, "iter_time": 1.0338729705810545, "loss": 0.9495476484298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.03076435608485, "step_time": 0.9809958953857422} +{"epoch": 0, "iter": 1802, "iter_tflops": 19.58554244583746, "iter_time": 1.053383819580078, "loss": 0.9282003045082092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.29245014259654, "step_time": 0.8492800598144531} +{"epoch": 0, "iter": 1803, "iter_tflops": 45.484145072748085, "iter_time": 0.45358868408203123, "loss": 1.0858360528945923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.945300199718595, "step_time": 0.4215132694244385} +{"epoch": 0, "iter": 1804, "iter_tflops": 48.62985272361315, "iter_time": 0.42424750137329104, "loss": 0.9716066122055054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.329168844423855, "step_time": 0.39425609016418456} +{"epoch": 0, "iter": 1805, "iter_tflops": 35.67290436969012, "iter_time": 0.5783407287597655, "loss": 0.2267979234457016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.18902062669485, "step_time": 0.5402362556457518} +{"epoch": 0, "iter": 1806, "iter_tflops": 16.480878753265404, "iter_time": 1.2518199920654298, "loss": 0.12703068554401398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.4166301221508, "step_time": 1.0625475883483886} +{"epoch": 0, "iter": 1807, "iter_tflops": 41.48812816553325, "iter_time": 0.4972770385742188, "loss": 0.21597489714622498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59319514558411, "step_time": 0.45250378799438484} +{"epoch": 0, "iter": 1808, "iter_tflops": 49.78976361840154, "iter_time": 0.414364158630371, "loss": 0.1387939304113388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.77623584678816, "step_time": 0.37664314079284666} +{"epoch": 0, "iter": 1809, "iter_tflops": 21.815248999730667, "iter_time": 0.9457189102172853, "loss": 0.3291962742805481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.018553495997665, "step_time": 0.8962810592651369} +{"epoch": 0, "iter": 1810, "iter_tflops": 10.775999322879748, "iter_time": 1.9145410919189454, "loss": 0.49614107608795166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.364782026722326, "step_time": 1.6685367736816408} +{"epoch": 0, "iter": 1811, "iter_tflops": 14.672467235365735, "iter_time": 1.4061093597412109, "loss": 0.29487502574920654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.10088547335225, "step_time": 1.139783660888672} +{"epoch": 0, "iter": 1812, "iter_tflops": 24.440968596604716, "iter_time": 0.8441193084716796, "loss": 0.2578887939453125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.270073994161805, "step_time": 0.7048527965545655} +{"epoch": 0, "iter": 1813, "iter_tflops": 19.91395353227849, "iter_time": 0.9093214721679688, "loss": 0.4000936448574066, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 20.792634378818782, "step_time": 0.8708942413330079} +{"epoch": 0, "iter": 1814, "iter_tflops": 13.409601743568652, "iter_time": 1.3503895111083983, "loss": 0.31008148193359375, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 16.119266738594163, "step_time": 1.1233876724243164} +{"epoch": 0, "iter": 1815, "iter_tflops": 28.06575220363612, "iter_time": 0.6452057800292968, "loss": 0.4061489403247833, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 30.203457686791634, "step_time": 0.5995401496887207} +{"epoch": 0, "iter": 1816, "iter_tflops": 29.57269222804262, "iter_time": 0.6123279342651367, "loss": 0.30916792154312134, "lr": 3e-05, "seqlen": 7216.0, "step_tflops": 31.67000458818735, "step_time": 0.5717771682739258} +{"epoch": 0, "iter": 1817, "iter_tflops": 20.590148064108533, "iter_time": 1.0019885940551758, "loss": 0.7962642908096313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.24420170353272, "step_time": 0.9274818572998047} +{"epoch": 0, "iter": 1818, "iter_tflops": 17.186636713347227, "iter_time": 1.200414825439453, "loss": 0.7400228977203369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.76120019398756, "step_time": 0.9937331809997559} +{"epoch": 0, "iter": 1819, "iter_tflops": 44.79030037764166, "iter_time": 0.46061520767211916, "loss": 0.6677826046943665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38808878921552, "step_time": 0.4263671913146972} +{"epoch": 0, "iter": 1820, "iter_tflops": 51.46978023867891, "iter_time": 0.40083896636962896, "loss": 0.8363685011863708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.56998626345128, "step_time": 0.37126324653625487} +{"epoch": 0, "iter": 1821, "iter_tflops": 41.00622100294232, "iter_time": 0.5031210632324219, "loss": 1.0736088752746582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64899513767451, "step_time": 0.4620729637145996} +{"epoch": 0, "iter": 1822, "iter_tflops": 44.21370155607148, "iter_time": 0.46662217330932615, "loss": 0.9253427386283875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58252613443741, "step_time": 0.43358550262451173} +{"epoch": 0, "iter": 1823, "iter_tflops": 42.02151024745807, "iter_time": 0.4909650650024414, "loss": 1.046061635017395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08703406194049, "step_time": 0.4575837364196777} +{"epoch": 0, "iter": 1824, "iter_tflops": 43.99540383302261, "iter_time": 0.4689374732971191, "loss": 0.8111551403999329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19196352250154, "step_time": 0.4371738739013672} +{"epoch": 0, "iter": 1825, "iter_tflops": 43.92875837227632, "iter_time": 0.46964891052246094, "loss": 0.7818143963813782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.464316591189174, "step_time": 0.4346653442382813} +{"epoch": 0, "iter": 1826, "iter_tflops": 45.93344200940214, "iter_time": 0.44915191650390623, "loss": 0.5685727596282959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.83031095595368, "step_time": 0.4140269870758056} +{"epoch": 0, "iter": 1827, "iter_tflops": 43.57319250524476, "iter_time": 0.4734813385009765, "loss": 0.6313914060592651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17323994303775, "step_time": 0.4373473930358886} +{"epoch": 0, "iter": 1828, "iter_tflops": 48.042421537091656, "iter_time": 0.4294349212646485, "loss": 0.6809616088867188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99701429977898, "step_time": 0.396774580001831} +{"epoch": 0, "iter": 1829, "iter_tflops": 41.94541026814257, "iter_time": 0.4918558044433594, "loss": 1.094415545463562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26501552506613, "step_time": 0.45578452301025396} +{"epoch": 0, "iter": 1830, "iter_tflops": 36.135360362140524, "iter_time": 0.570939193725586, "loss": 1.0750328302383423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14527816462514, "step_time": 0.513910837173462} +{"epoch": 0, "iter": 1831, "iter_tflops": 40.72503734934835, "iter_time": 0.5065948333740234, "loss": 1.014057993888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07985070003091, "step_time": 0.4680390968322754} +{"epoch": 0, "iter": 1832, "iter_tflops": 36.04456035943375, "iter_time": 0.5723774490356446, "loss": 1.0938881635665894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13548022231072, "step_time": 0.5271710834503174} +{"epoch": 0, "iter": 1833, "iter_tflops": 21.83036046455345, "iter_time": 0.9450642623901367, "loss": 0.8809695839881897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.442005113538603, "step_time": 0.8800908203125001} +{"epoch": 0, "iter": 1834, "iter_tflops": 14.909175855018601, "iter_time": 1.3837849731445313, "loss": 1.127345323562622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.989073417175437, "step_time": 1.1468680477142335} +{"epoch": 0, "iter": 1835, "iter_tflops": 35.34015732846174, "iter_time": 0.5837861251831055, "loss": 0.8720114231109619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.40771455149308, "step_time": 0.5371601448059081} +{"epoch": 0, "iter": 1836, "iter_tflops": 35.71846027178721, "iter_time": 0.5776031036376953, "loss": 1.0362640619277954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.680637241859294, "step_time": 0.5333700523376464} +{"epoch": 0, "iter": 1837, "iter_tflops": 22.121940157999102, "iter_time": 0.932607780456543, "loss": 1.1154241561889648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.76489148946299, "step_time": 0.8681332931518555} +{"epoch": 0, "iter": 1838, "iter_tflops": 18.933770117816838, "iter_time": 1.0896452941894532, "loss": 0.8989056944847107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.177575859002257, "step_time": 0.8901316356658936} +{"epoch": 0, "iter": 1839, "iter_tflops": 38.50888736326687, "iter_time": 0.5357488861083984, "loss": 0.7231149077415466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01931041596638, "step_time": 0.4909907684326172} +{"epoch": 0, "iter": 1840, "iter_tflops": 40.101890916557466, "iter_time": 0.5144668502807618, "loss": 0.8248458504676819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66589489054026, "step_time": 0.47247614097595214} +{"epoch": 0, "iter": 1841, "iter_tflops": 19.50113716949438, "iter_time": 1.0579430999755859, "loss": 0.8940157294273376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.898066178016496, "step_time": 0.9872250061035157} +{"epoch": 0, "iter": 1842, "iter_tflops": 22.043111616078836, "iter_time": 0.9359428863525391, "loss": 0.9904674887657166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.69113615449955, "step_time": 0.835566795349121} +{"epoch": 0, "iter": 1843, "iter_tflops": 40.86477280231159, "iter_time": 0.5048625526428223, "loss": 1.0364964008331299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78125137236973, "step_time": 0.4712312431335449} +{"epoch": 0, "iter": 1844, "iter_tflops": 43.422131630683964, "iter_time": 0.47512852859497073, "loss": 1.0215988159179688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53951411849403, "step_time": 0.4433027267456055} +{"epoch": 0, "iter": 1845, "iter_tflops": 45.34379712902736, "iter_time": 0.45499263000488277, "loss": 0.05049929767847061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.375908003357154, "step_time": 0.41783724784851073} +{"epoch": 0, "iter": 1846, "iter_tflops": 9.225450461663698, "iter_time": 2.236323699951172, "loss": 0.0532771572470665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.377902497373707, "step_time": 1.8132598266601563} +{"epoch": 0, "iter": 1847, "iter_tflops": 16.421247719827953, "iter_time": 1.256365768432617, "loss": 0.037379778921604156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.492687084775202, "step_time": 1.1156352462768555} +{"epoch": 0, "iter": 1848, "iter_tflops": 44.54474916895676, "iter_time": 0.46315433120727545, "loss": 0.05294232442975044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.19561858243786, "step_time": 0.41936851501464845} +{"epoch": 0, "iter": 1849, "iter_tflops": 13.751502313390905, "iter_time": 1.1793094940185547, "loss": 0.4037465453147888, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 14.704463045790598, "step_time": 1.1028812942504882} +{"epoch": 0, "iter": 1850, "iter_tflops": 15.77428041318288, "iter_time": 1.0280834884643555, "loss": 0.33678847551345825, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 19.136429535753308, "step_time": 0.8474557495117188} +{"epoch": 0, "iter": 1851, "iter_tflops": 23.473568174026013, "iter_time": 0.6908739700317382, "loss": 0.37154272198677063, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.32449212670537, "step_time": 0.6403791694641113} +{"epoch": 0, "iter": 1852, "iter_tflops": 24.169170836141934, "iter_time": 0.6709902191162109, "loss": 0.4179350733757019, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.863596989073102, "step_time": 0.6270310058593751} +{"epoch": 0, "iter": 1853, "iter_tflops": 35.179374863308645, "iter_time": 0.5864542388916015, "loss": 0.45407572388648987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82026173652236, "step_time": 0.5314516849517822} +{"epoch": 0, "iter": 1854, "iter_tflops": 31.463485603613428, "iter_time": 0.655715446472168, "loss": 0.3324540853500366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.769026045667985, "step_time": 0.5933756523132324} +{"epoch": 0, "iter": 1855, "iter_tflops": 40.016617294942414, "iter_time": 0.5155631561279297, "loss": 0.4083267152309418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.86614891580762, "step_time": 0.4703192329406738} +{"epoch": 0, "iter": 1856, "iter_tflops": 37.43512676745615, "iter_time": 0.5511158981323243, "loss": 0.31503191590309143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.05104685885084, "step_time": 0.5025716781616211} +{"epoch": 0, "iter": 1857, "iter_tflops": 28.33604693282226, "iter_time": 0.7280865097045899, "loss": 0.02209131233394146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.511705244002684, "step_time": 0.6761697959899902} +{"epoch": 0, "iter": 1858, "iter_tflops": 8.331937431270466, "iter_time": 2.47614599609375, "loss": 0.09389693289995193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.899137513917118, "step_time": 1.7338310012817382} +{"epoch": 0, "iter": 1859, "iter_tflops": 13.392932323281519, "iter_time": 1.540446334838867, "loss": 0.026135709136724472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.337492821569786, "step_time": 1.1899698371887206} +{"epoch": 0, "iter": 1860, "iter_tflops": 45.07693548838219, "iter_time": 0.4576862487792969, "loss": 0.04347331076860428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82024130330681, "step_time": 0.41411067008972163} +{"epoch": 0, "iter": 1861, "iter_tflops": 13.949452897096915, "iter_time": 1.1067934875488281, "loss": 0.38890135288238525, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 14.582696233487283, "step_time": 1.0587317581176756} +{"epoch": 0, "iter": 1862, "iter_tflops": 8.164967480819966, "iter_time": 1.8909032592773438, "loss": 0.3976469337940216, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.301225331368087, "step_time": 1.6599064178466796} +{"epoch": 0, "iter": 1863, "iter_tflops": 8.524790596935993, "iter_time": 1.8110900726318357, "loss": 0.2048870176076889, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.50410722937368, "step_time": 1.6244727935791017} +{"epoch": 0, "iter": 1864, "iter_tflops": 18.324145018205275, "iter_time": 0.8425584716796876, "loss": 0.23887276649475098, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 22.2613475543638, "step_time": 0.6935412864685059} +{"epoch": 0, "iter": 1865, "iter_tflops": 12.122007242984008, "iter_time": 1.155770263671875, "loss": 0.32379642128944397, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 12.781926260894586, "step_time": 1.0960989151000977} +{"epoch": 0, "iter": 1866, "iter_tflops": 7.324514519202802, "iter_time": 1.9127896423339845, "loss": 0.23326940834522247, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 8.33160416079849, "step_time": 1.68157958984375} +{"epoch": 0, "iter": 1867, "iter_tflops": 8.37202666725506, "iter_time": 1.67346044921875, "loss": 0.10275008529424667, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 9.729707528810245, "step_time": 1.4399462127685547} +{"epoch": 0, "iter": 1868, "iter_tflops": 16.691158788071753, "iter_time": 0.8393818359374999, "loss": 0.2915394604206085, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 18.35946373927317, "step_time": 0.7631081008911133} +{"epoch": 0, "iter": 1869, "iter_tflops": 16.773623484113124, "iter_time": 0.8571276779174805, "loss": 0.3264441192150116, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 17.944595231227183, "step_time": 0.8011959457397461} +{"epoch": 0, "iter": 1870, "iter_tflops": 5.4617237763955675, "iter_time": 2.632344207763672, "loss": 0.25495484471321106, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 7.730560587754432, "step_time": 1.8597793502807616} +{"epoch": 0, "iter": 1871, "iter_tflops": 9.363233912403864, "iter_time": 1.5354883880615233, "loss": 0.22590313851833344, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 11.592841183730497, "step_time": 1.2401737174987792} +{"epoch": 0, "iter": 1872, "iter_tflops": 21.09141962070723, "iter_time": 0.6816580963134766, "loss": 0.42456379532814026, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 22.595514771157543, "step_time": 0.6362827796936036} +{"epoch": 0, "iter": 1873, "iter_tflops": 11.18264340673516, "iter_time": 1.4722235717773438, "loss": 0.2983451187610626, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 11.848636652184155, "step_time": 1.3894721984863279} +{"epoch": 0, "iter": 1874, "iter_tflops": 15.503690161251795, "iter_time": 1.0618988800048828, "loss": 0.37903693318367004, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 18.322191364127722, "step_time": 0.898547061920166} +{"epoch": 0, "iter": 1875, "iter_tflops": 30.46977723095997, "iter_time": 0.5403174133300781, "loss": 0.26117268204689026, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 32.53094730021883, "step_time": 0.5060827484130859} +{"epoch": 0, "iter": 1876, "iter_tflops": 30.289912749691492, "iter_time": 0.5435258712768555, "loss": 0.34527209401130676, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 32.23381605987859, "step_time": 0.5107478179931642} +{"epoch": 0, "iter": 1877, "iter_tflops": 37.33852795225582, "iter_time": 0.5525416946411132, "loss": 0.6624634861946106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50759856101299, "step_time": 0.5093141593933106} +{"epoch": 0, "iter": 1878, "iter_tflops": 15.861558124574646, "iter_time": 1.3006977844238279, "loss": 0.7077426314353943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.553959813179645, "step_time": 0.9147437381744384} +{"epoch": 0, "iter": 1879, "iter_tflops": 39.129769238660785, "iter_time": 0.5272480239868164, "loss": 0.7917909622192383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.853928706635145, "step_time": 0.48142828750610356} +{"epoch": 0, "iter": 1880, "iter_tflops": 40.28673006113235, "iter_time": 0.5121064300537109, "loss": 0.6705551743507385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90841844726504, "step_time": 0.4698664684295654} +{"epoch": 0, "iter": 1881, "iter_tflops": 19.039234921206926, "iter_time": 0.6739074325561524, "loss": 0.03528763726353645, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 20.693678318974705, "step_time": 0.6200290603637696} +{"epoch": 0, "iter": 1882, "iter_tflops": 6.054808596082562, "iter_time": 2.119089599609375, "loss": 0.058910246938467026, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 7.061997570839302, "step_time": 1.8168629760742188} +{"epoch": 0, "iter": 1883, "iter_tflops": 7.561866373661499, "iter_time": 1.696761260986328, "loss": 0.08200816810131073, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 9.830112581086787, "step_time": 1.3052426223754883} +{"epoch": 0, "iter": 1884, "iter_tflops": 11.239818948772601, "iter_time": 1.141538131713867, "loss": 0.05915694311261177, "lr": 3e-05, "seqlen": 5152.0, "step_tflops": 16.318260869541607, "step_time": 0.7862775344848633} +{"epoch": 0, "iter": 1885, "iter_tflops": 17.81180938456626, "iter_time": 0.8507232055664063, "loss": 0.4440723955631256, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 19.045390953748267, "step_time": 0.7956213455200196} +{"epoch": 0, "iter": 1886, "iter_tflops": 7.251177705491807, "iter_time": 2.089718414306641, "loss": 0.24770453572273254, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 8.493554533742277, "step_time": 1.7840492477416992} +{"epoch": 0, "iter": 1887, "iter_tflops": 8.934906722408186, "iter_time": 1.6959236450195314, "loss": 0.280582070350647, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.265215318089655, "step_time": 1.3451069641113282} +{"epoch": 0, "iter": 1888, "iter_tflops": 27.853869435951303, "iter_time": 0.5440148849487304, "loss": 0.4357464909553528, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 29.6444084668357, "step_time": 0.511156078338623} +{"epoch": 0, "iter": 1889, "iter_tflops": 24.331936755895615, "iter_time": 0.649661865234375, "loss": 0.27823397517204285, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.837545124194833, "step_time": 0.6118046951293945} +{"epoch": 0, "iter": 1890, "iter_tflops": 12.452550770885157, "iter_time": 1.2694211578369141, "loss": 0.20616112649440765, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.816112217469355, "step_time": 1.1441374511718752} +{"epoch": 0, "iter": 1891, "iter_tflops": 24.103346314724707, "iter_time": 0.6558231048583985, "loss": 0.35241150856018066, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.985888397262837, "step_time": 0.608312141418457} +{"epoch": 0, "iter": 1892, "iter_tflops": 23.919502386641927, "iter_time": 0.6608637237548829, "loss": 0.2845383584499359, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.6422940492138, "step_time": 0.6164632301330566} +{"epoch": 0, "iter": 1893, "iter_tflops": 36.63650215098918, "iter_time": 0.5631294555664063, "loss": 0.054555464535951614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85460674264876, "step_time": 0.5049881801605225} +{"epoch": 0, "iter": 1894, "iter_tflops": 39.90297253445968, "iter_time": 0.517031494140625, "loss": 0.05438271164894104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15798275016351, "step_time": 0.4568648166656494} +{"epoch": 0, "iter": 1895, "iter_tflops": 47.1628960114216, "iter_time": 0.4374433135986328, "loss": 0.03872285038232803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.12080714012375, "step_time": 0.39583219528198244} +{"epoch": 0, "iter": 1896, "iter_tflops": 43.799257826930415, "iter_time": 0.47103751373291014, "loss": 0.03403371945023537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.307091247711234, "step_time": 0.4270820903778076} +{"epoch": 0, "iter": 1897, "iter_tflops": 21.960659726435576, "iter_time": 0.9394569091796875, "loss": 0.9440619349479675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.461176708180524, "step_time": 0.8793716430664062} +{"epoch": 0, "iter": 1898, "iter_tflops": 13.761174183899296, "iter_time": 1.499224792480469, "loss": 0.9277477264404297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.43951709175743, "step_time": 1.2549695587158203} +{"epoch": 0, "iter": 1899, "iter_tflops": 39.424947528628685, "iter_time": 0.5233004684448243, "loss": 0.8839645385742188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.102500898346634, "step_time": 0.47865188980102535} +{"epoch": 0, "iter": 1900, "iter_tflops": 40.58230126701413, "iter_time": 0.5083766288757324, "loss": 1.0980819463729858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12055303077719, "step_time": 0.46760731887817386} +{"epoch": 0, "iter": 1901, "iter_tflops": 38.99906438989344, "iter_time": 0.5226320266723633, "loss": 0.15618230402469635, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 43.3748897070458, "step_time": 0.4699069023132324} +{"epoch": 0, "iter": 1902, "iter_tflops": 38.16998755969568, "iter_time": 0.5339839324951172, "loss": 0.11781016737222672, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 41.97787914263146, "step_time": 0.48554525566101076} +{"epoch": 0, "iter": 1903, "iter_tflops": 41.172212189140474, "iter_time": 0.49504651260375976, "loss": 0.06723739951848984, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 45.17757290869514, "step_time": 0.45115659713745115} +{"epoch": 0, "iter": 1904, "iter_tflops": 40.99789696829416, "iter_time": 0.49715135574340813, "loss": 0.07214280217885971, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 45.060701899396065, "step_time": 0.45232673263549805} +{"epoch": 0, "iter": 1905, "iter_tflops": 3.6758141689911077, "iter_time": 0.7046136016845702, "loss": 0.027037475258111954, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.9589064285455033, "step_time": 0.6542283096313476} +{"epoch": 0, "iter": 1906, "iter_tflops": 1.2283355991865903, "iter_time": 2.108567611694336, "loss": 0.20777527987957, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 1.3911148498423083, "step_time": 1.8618366851806638} +{"epoch": 0, "iter": 1907, "iter_tflops": 2.153354923236445, "iter_time": 1.2027876281738283, "loss": 0.0829169899225235, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 2.53686801585303, "step_time": 1.0209552268981934} +{"epoch": 0, "iter": 1908, "iter_tflops": 6.003086254375969, "iter_time": 0.4314495162963867, "loss": 0.284233033657074, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 6.499555379066879, "step_time": 0.39849320602416993} +{"epoch": 0, "iter": 1909, "iter_tflops": 23.95685944337794, "iter_time": 0.6154563217163086, "loss": 0.2574678957462311, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 25.59951882150358, "step_time": 0.5759639739990234} +{"epoch": 0, "iter": 1910, "iter_tflops": 22.448836110730504, "iter_time": 0.6568002243041992, "loss": 0.2873148024082184, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.17629997436746, "step_time": 0.6098700218200683} +{"epoch": 0, "iter": 1911, "iter_tflops": 24.819588141604928, "iter_time": 0.5940630645751953, "loss": 0.34858381748199463, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.570043256373577, "step_time": 0.5549257278442382} +{"epoch": 0, "iter": 1912, "iter_tflops": 21.978567463709414, "iter_time": 0.6708535766601562, "loss": 0.339124470949173, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.669441495115183, "step_time": 0.6229298057556152} +{"epoch": 0, "iter": 1913, "iter_tflops": 21.749184833951183, "iter_time": 0.9485915756225586, "loss": 0.04211655259132385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.101881035981634, "step_time": 0.893048210144043} +{"epoch": 0, "iter": 1914, "iter_tflops": 8.638150213170888, "iter_time": 2.388369384765625, "loss": 0.06574802100658417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.68657568862108, "step_time": 1.7653668670654294} +{"epoch": 0, "iter": 1915, "iter_tflops": 12.617351459802535, "iter_time": 1.6351366271972654, "loss": 0.07659487426280975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.005013135456911, "step_time": 1.3749467144012453} +{"epoch": 0, "iter": 1916, "iter_tflops": 40.42579782894355, "iter_time": 0.5103447456359864, "loss": 0.03703496605157852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70700201979549, "step_time": 0.4614734287261963} +{"epoch": 0, "iter": 1917, "iter_tflops": 19.615353326394562, "iter_time": 0.7558398971557616, "loss": 0.13975264132022858, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 21.18650735348254, "step_time": 0.699788143157959} +{"epoch": 0, "iter": 1918, "iter_tflops": 21.456866051380764, "iter_time": 0.6909707412719727, "loss": 0.34838536381721497, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 23.052870721169327, "step_time": 0.6431332054138184} +{"epoch": 0, "iter": 1919, "iter_tflops": 24.358207215466656, "iter_time": 0.6086682205200196, "loss": 0.21194328367710114, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.11282102077984, "step_time": 0.5677696266174317} +{"epoch": 0, "iter": 1920, "iter_tflops": 22.906372580500573, "iter_time": 0.6472463760375977, "loss": 0.2010434865951538, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.63521791567236, "step_time": 0.6018240509033204} +{"epoch": 0, "iter": 1921, "iter_tflops": 27.886241048893382, "iter_time": 0.73983056640625, "loss": 0.15249046683311462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.34908954103125, "step_time": 0.6797928314208984} +{"epoch": 0, "iter": 1922, "iter_tflops": 11.466012461745434, "iter_time": 1.799325927734375, "loss": 0.133957639336586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.194235913549551, "step_time": 1.3578236923217775} +{"epoch": 0, "iter": 1923, "iter_tflops": 28.380733796452958, "iter_time": 0.7269401016235351, "loss": 0.13156647980213165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.33019875126633, "step_time": 0.5839506778717042} +{"epoch": 0, "iter": 1924, "iter_tflops": 46.5413203562542, "iter_time": 0.4432855224609375, "loss": 0.06811615824699402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.99011733302029, "step_time": 0.4046096496582031} +{"epoch": 0, "iter": 1925, "iter_tflops": 15.098228849432916, "iter_time": 0.9387370452880859, "loss": 0.17657582461833954, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 16.2446902496804, "step_time": 0.8724861183166505} +{"epoch": 0, "iter": 1926, "iter_tflops": 9.302182485792025, "iter_time": 1.5236496124267576, "loss": 0.32819998264312744, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 12.391842725257456, "step_time": 1.143757797241211} +{"epoch": 0, "iter": 1927, "iter_tflops": 26.191640403246897, "iter_time": 0.5411370391845703, "loss": 0.279411643743515, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.886970368086065, "step_time": 0.50823974609375} +{"epoch": 0, "iter": 1928, "iter_tflops": 24.33295102944798, "iter_time": 0.58247216796875, "loss": 0.3302159905433655, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 25.93134789481156, "step_time": 0.5465688400268554} +{"epoch": 0, "iter": 1929, "iter_tflops": 27.905678928242587, "iter_time": 0.7318810348510743, "loss": 0.10030698031187057, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 29.468045854691606, "step_time": 0.6930774192810059} +{"epoch": 0, "iter": 1930, "iter_tflops": 11.73262542597689, "iter_time": 1.7407559204101561, "loss": 0.10774682462215424, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 14.78731674999489, "step_time": 1.3811591053009031} +{"epoch": 0, "iter": 1931, "iter_tflops": 41.946433671818134, "iter_time": 0.4868980598449707, "loss": 0.10372013598680496, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 46.33816640898199, "step_time": 0.4407519493103027} +{"epoch": 0, "iter": 1932, "iter_tflops": 44.053584226024505, "iter_time": 0.4636089782714844, "loss": 0.16571073234081268, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 48.43266421381978, "step_time": 0.4216913833618164} +{"epoch": 0, "iter": 1933, "iter_tflops": 22.45151385319997, "iter_time": 0.9189177017211915, "loss": 0.7461532950401306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.368617480909396, "step_time": 0.8466255226135254} +{"epoch": 0, "iter": 1934, "iter_tflops": 28.56551633330967, "iter_time": 0.7222377243041992, "loss": 1.035122036933899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.963670121248164, "step_time": 0.59007230758667} +{"epoch": 0, "iter": 1935, "iter_tflops": 49.077586331380225, "iter_time": 0.4203771018981934, "loss": 0.8140521049499512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43490576923206, "step_time": 0.38609768676757805} +{"epoch": 0, "iter": 1936, "iter_tflops": 49.83482748682796, "iter_time": 0.4139894638061523, "loss": 0.8028548359870911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64628034174852, "step_time": 0.3845764026641846} +{"epoch": 0, "iter": 1937, "iter_tflops": 19.848767350194628, "iter_time": 1.0394143447875976, "loss": 0.7035864591598511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68599016862373, "step_time": 0.99734619140625} +{"epoch": 0, "iter": 1938, "iter_tflops": 14.122940990877352, "iter_time": 1.4608213348388674, "loss": 0.7915038466453552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.73041871951815, "step_time": 1.0456490459442138} +{"epoch": 0, "iter": 1939, "iter_tflops": 48.03256915427639, "iter_time": 0.429523006439209, "loss": 0.6796966195106506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.828542757063396, "step_time": 0.3980643177032471} +{"epoch": 0, "iter": 1940, "iter_tflops": 46.819950388509554, "iter_time": 0.44064748764038086, "loss": 0.7061597108840942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.345316929870194, "step_time": 0.4097917098999023} +{"epoch": 0, "iter": 1941, "iter_tflops": 36.00247849419623, "iter_time": 0.5730464782714845, "loss": 0.4497440457344055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.55226696034427, "step_time": 0.535146053314209} +{"epoch": 0, "iter": 1942, "iter_tflops": 16.67317927735699, "iter_time": 1.2373820953369141, "loss": 0.3702084720134735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.540727490760425, "step_time": 1.0557996635437013} +{"epoch": 0, "iter": 1943, "iter_tflops": 37.322238613315015, "iter_time": 0.5527828521728516, "loss": 0.6105743646621704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.00108693600604, "step_time": 0.5031840629577637} +{"epoch": 0, "iter": 1944, "iter_tflops": 40.80527330974747, "iter_time": 0.5055987091064453, "loss": 0.6055996417999268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62523884456005, "step_time": 0.46231894874572754} +{"epoch": 0, "iter": 1945, "iter_tflops": 26.380400658495724, "iter_time": 0.7820614166259765, "loss": 1.041092872619629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.962688325051264, "step_time": 0.7378079414367676} +{"epoch": 0, "iter": 1946, "iter_tflops": 23.887586320413885, "iter_time": 0.8636742630004883, "loss": 0.969152569770813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.853835408569637, "step_time": 0.7682736263275146} +{"epoch": 0, "iter": 1947, "iter_tflops": 47.08962144197592, "iter_time": 0.43812400436401366, "loss": 1.055524468421936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.80789299507875, "step_time": 0.4060607967376709} +{"epoch": 0, "iter": 1948, "iter_tflops": 43.9094217791087, "iter_time": 0.46985573196411135, "loss": 0.9388422966003418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17176099092103, "step_time": 0.43736110496520997} +{"epoch": 0, "iter": 1949, "iter_tflops": 49.029008612600286, "iter_time": 0.42079360961914064, "loss": 0.4229896366596222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.66290761229004, "step_time": 0.3844572429656982} +{"epoch": 0, "iter": 1950, "iter_tflops": 48.23804148849963, "iter_time": 0.4276934318542481, "loss": 0.4290260076522827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99923448747868, "step_time": 0.38927153778076173} +{"epoch": 0, "iter": 1951, "iter_tflops": 48.852412588314685, "iter_time": 0.42231473159790045, "loss": 0.42800140380859375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.00854297630991, "step_time": 0.3892031803131104} +{"epoch": 0, "iter": 1952, "iter_tflops": 51.87730858939104, "iter_time": 0.39769012832641604, "loss": 0.6102730631828308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.20576655950637, "step_time": 0.36706364440917966} +{"epoch": 0, "iter": 1953, "iter_tflops": 23.65083708827313, "iter_time": 0.8723198013305664, "loss": 0.4639391601085663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.887334025986426, "step_time": 0.8289796524047851} +{"epoch": 0, "iter": 1954, "iter_tflops": 25.3297440197548, "iter_time": 0.8145006713867188, "loss": 0.30383870005607605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.385503442605454, "step_time": 0.7268179531097412} +{"epoch": 0, "iter": 1955, "iter_tflops": 55.1085263184241, "iter_time": 0.3743720779418945, "loss": 0.30158668756484985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.955935606616265, "step_time": 0.3441042709350586} +{"epoch": 0, "iter": 1956, "iter_tflops": 53.09298586827949, "iter_time": 0.3885841636657715, "loss": 0.4748520255088806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.2756154870169, "step_time": 0.36020727729797364} +{"epoch": 0, "iter": 1957, "iter_tflops": 41.065896783411496, "iter_time": 0.5023899421691895, "loss": 0.9298700094223022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42089267237986, "step_time": 0.4644457206726074} +{"epoch": 0, "iter": 1958, "iter_tflops": 42.14485504850493, "iter_time": 0.4895281639099121, "loss": 1.0178719758987427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.18676317019673, "step_time": 0.4565738296508789} +{"epoch": 0, "iter": 1959, "iter_tflops": 44.78521460115357, "iter_time": 0.46066751480102536, "loss": 1.117785096168518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.06688734445522, "step_time": 0.4292163410186768} +{"epoch": 0, "iter": 1960, "iter_tflops": 47.602976739452146, "iter_time": 0.43339923095703126, "loss": 0.8399420976638794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0577332368296, "step_time": 0.404073823928833} +{"epoch": 0, "iter": 1961, "iter_tflops": 37.95924713938403, "iter_time": 0.5435063934326172, "loss": 1.1247804164886475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.76194018948721, "step_time": 0.5061361999511719} +{"epoch": 0, "iter": 1962, "iter_tflops": 20.366458824702526, "iter_time": 1.01299365234375, "loss": 1.186092495918274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.576013450448027, "step_time": 0.839480884552002} +{"epoch": 0, "iter": 1963, "iter_tflops": 37.23150300514665, "iter_time": 0.5541300201416015, "loss": 0.9813944697380066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.34727924896657, "step_time": 0.511337911605835} +{"epoch": 0, "iter": 1964, "iter_tflops": 39.05260317494623, "iter_time": 0.5282898406982421, "loss": 1.1038845777511597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22309404958998, "step_time": 0.4886210727691651} +{"epoch": 0, "iter": 1965, "iter_tflops": 19.014316902174617, "iter_time": 1.0850294342041016, "loss": 0.9257489442825317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.164861848874317, "step_time": 1.023120994567871} +{"epoch": 0, "iter": 1966, "iter_tflops": 18.643636769039933, "iter_time": 1.1066024169921875, "loss": 1.045908808708191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.314896008433358, "step_time": 0.7286303825378418} +{"epoch": 0, "iter": 1967, "iter_tflops": 47.20315610970152, "iter_time": 0.43707021331787105, "loss": 1.1099636554718018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.11421265204106, "step_time": 0.4036273365020752} +{"epoch": 0, "iter": 1968, "iter_tflops": 39.77922016888164, "iter_time": 0.5186399688720703, "loss": 1.0423331260681152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.31593737829048, "step_time": 0.476293363571167} +{"epoch": 0, "iter": 1969, "iter_tflops": 33.468134364861974, "iter_time": 0.6164399032592773, "loss": 0.4261407256126404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.804321055012835, "step_time": 0.5605617198944092} +{"epoch": 0, "iter": 1970, "iter_tflops": 37.73736425898543, "iter_time": 0.5467020263671875, "loss": 0.7875824570655823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.147198413010315, "step_time": 0.5013972835540772} +{"epoch": 0, "iter": 1971, "iter_tflops": 45.43524962691131, "iter_time": 0.4540768165588378, "loss": 0.729912281036377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59497348789958, "step_time": 0.41599162292480474} +{"epoch": 0, "iter": 1972, "iter_tflops": 44.584092763065286, "iter_time": 0.4627456169128418, "loss": 0.6703218221664429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.406199927248984, "step_time": 0.4262076663970948} +{"epoch": 0, "iter": 1973, "iter_tflops": 19.04784487523855, "iter_time": 1.0831195678710939, "loss": 0.7604402899742126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.153516278227045, "step_time": 1.0236969680786134} +{"epoch": 0, "iter": 1974, "iter_tflops": 14.759947088703582, "iter_time": 1.3977755737304687, "loss": 0.5560610294342041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.721499326767344, "step_time": 1.164184425354004} +{"epoch": 0, "iter": 1975, "iter_tflops": 39.25896559308352, "iter_time": 0.5255129165649415, "loss": 1.0137311220169067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61982057472687, "step_time": 0.4840727443695068} +{"epoch": 0, "iter": 1976, "iter_tflops": 39.51152662968243, "iter_time": 0.522153793334961, "loss": 0.955346405506134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48861696836534, "step_time": 0.4855675468444824} +{"epoch": 0, "iter": 1977, "iter_tflops": 34.62780259291952, "iter_time": 0.5957956314086914, "loss": 1.0507817268371582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92047974895754, "step_time": 0.5440620384216308} +{"epoch": 0, "iter": 1978, "iter_tflops": 35.26091130521694, "iter_time": 0.5850981369018555, "loss": 0.919395923614502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.13871361002022, "step_time": 0.5409488563537598} +{"epoch": 0, "iter": 1979, "iter_tflops": 35.61097483433698, "iter_time": 0.5793464965820312, "loss": 0.7924964427947998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64187673612587, "step_time": 0.5339050598144531} +{"epoch": 0, "iter": 1980, "iter_tflops": 39.10897863511912, "iter_time": 0.5275283126831054, "loss": 0.9432628750801086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.143270819599095, "step_time": 0.48954656600952146} +{"epoch": 0, "iter": 1981, "iter_tflops": 26.54108648926852, "iter_time": 0.7773266372680664, "loss": 1.059759497642517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.38209687831939, "step_time": 0.72690518951416} +{"epoch": 0, "iter": 1982, "iter_tflops": 8.276429350232634, "iter_time": 2.4927529296874997, "loss": 1.0317652225494385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.265464628250653, "step_time": 2.226665832519531} +{"epoch": 0, "iter": 1983, "iter_tflops": 14.33563464319202, "iter_time": 1.4391475524902344, "loss": 1.1274727582931519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.866482269232403, "step_time": 1.223200735092163} +{"epoch": 0, "iter": 1984, "iter_tflops": 36.342176353630286, "iter_time": 0.5676900939941405, "loss": 0.8482134938240051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.429962357044545, "step_time": 0.5232339134216308} +{"epoch": 0, "iter": 1985, "iter_tflops": 11.548291361827477, "iter_time": 1.3830074615478516, "loss": 0.37919655442237854, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.013854038547242, "step_time": 1.3294129486083983} +{"epoch": 0, "iter": 1986, "iter_tflops": 12.668927783328796, "iter_time": 1.2606728363037112, "loss": 0.23560041189193726, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.099453686916975, "step_time": 1.0577450981140137} +{"epoch": 0, "iter": 1987, "iter_tflops": 25.656352308662772, "iter_time": 0.6225114517211914, "loss": 0.42597928643226624, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 27.517137652028016, "step_time": 0.5804154968261719} +{"epoch": 0, "iter": 1988, "iter_tflops": 23.549419291855678, "iter_time": 0.6782066650390626, "loss": 0.3794853687286377, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.237208019103253, "step_time": 0.6328502388000489} +{"epoch": 0, "iter": 1989, "iter_tflops": 21.488371016034094, "iter_time": 0.9601050491333007, "loss": 0.6729849576950073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.768657197057472, "step_time": 0.906118148803711} +{"epoch": 0, "iter": 1990, "iter_tflops": 18.19360343553656, "iter_time": 1.1339751129150388, "loss": 0.5208081007003784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.788370145969775, "step_time": 0.8672764625549316} +{"epoch": 0, "iter": 1991, "iter_tflops": 45.38509986626859, "iter_time": 0.45457856369018557, "loss": 0.5086835026741028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00078380909414, "step_time": 0.42103598976135254} +{"epoch": 0, "iter": 1992, "iter_tflops": 54.167965053648985, "iter_time": 0.3808725967407227, "loss": 0.6306808590888977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.46335502535439, "step_time": 0.3528893184661865} +{"epoch": 0, "iter": 1993, "iter_tflops": 53.10355392572026, "iter_time": 0.3885068321228028, "loss": 0.061764005571603775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.63144922727397, "step_time": 0.35187759780883787} +{"epoch": 0, "iter": 1994, "iter_tflops": 39.1051203915349, "iter_time": 0.5275803604125977, "loss": 0.05695468559861183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53179068214687, "step_time": 0.4632891063690185} +{"epoch": 0, "iter": 1995, "iter_tflops": 40.64148287862474, "iter_time": 0.5076363372802735, "loss": 0.03189320117235184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53279795384452, "step_time": 0.46327862739562997} +{"epoch": 0, "iter": 1996, "iter_tflops": 48.89072414622953, "iter_time": 0.4219837989807129, "loss": 0.038247816264629364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86987143634065, "step_time": 0.38298018836975095} +{"epoch": 0, "iter": 1997, "iter_tflops": 19.144164533203494, "iter_time": 1.0776700897216798, "loss": 0.395749568939209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62947238919057, "step_time": 1.000078582763672} +{"epoch": 0, "iter": 1998, "iter_tflops": 20.092851826897725, "iter_time": 1.0267877197265625, "loss": 0.2439306229352951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.7091108771801, "step_time": 0.8349589595794679} +{"epoch": 0, "iter": 1999, "iter_tflops": 48.59868529331292, "iter_time": 0.42451958084106456, "loss": 0.31363722681999207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.831272061428095, "step_time": 0.39050911903381347} +{"epoch": 0, "iter": 2000, "iter_tflops": 49.85232692888531, "iter_time": 0.4138441429138184, "loss": 0.30146026611328125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.29619620966915, "step_time": 0.3799730911254883} +{"epoch": 0, "iter": 2001, "iter_tflops": 43.09633881847508, "iter_time": 0.47872032928466796, "loss": 0.3982393145561218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79475133456667, "step_time": 0.44088477706909174} +{"epoch": 0, "iter": 2002, "iter_tflops": 10.626013591326167, "iter_time": 1.9415647583007813, "loss": 0.25444892048835754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.3134249408175, "step_time": 1.6754959411621095} +{"epoch": 0, "iter": 2003, "iter_tflops": 11.250389938188459, "iter_time": 1.8338114166259765, "loss": 0.4410589635372162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.8605782060749, "step_time": 1.4884727897644043} +{"epoch": 0, "iter": 2004, "iter_tflops": 21.380078240972498, "iter_time": 0.9649681015014648, "loss": 0.4464367628097534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.581377068604272, "step_time": 0.8064887771606444} +{"epoch": 0, "iter": 2005, "iter_tflops": 13.758694219060885, "iter_time": 1.2085082092285155, "loss": 0.3427943289279938, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 14.423761837426763, "step_time": 1.1527849044799805} +{"epoch": 0, "iter": 2006, "iter_tflops": 9.411237780880013, "iter_time": 1.7667702484130858, "loss": 0.20212507247924805, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 11.514671219430092, "step_time": 1.4440268936157226} +{"epoch": 0, "iter": 2007, "iter_tflops": 10.67210938830728, "iter_time": 1.5580326538085938, "loss": 0.28123244643211365, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 11.856340352271125, "step_time": 1.4024137649536133} +{"epoch": 0, "iter": 2008, "iter_tflops": 23.14644108125837, "iter_time": 0.7183607559204102, "loss": 0.36628758907318115, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 26.978402489224376, "step_time": 0.6163261489868164} +{"epoch": 0, "iter": 2009, "iter_tflops": 15.212072467687998, "iter_time": 0.9451136245727539, "loss": 0.29397520422935486, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 15.937121173758024, "step_time": 0.902116310119629} +{"epoch": 0, "iter": 2010, "iter_tflops": 12.68533390074199, "iter_time": 1.1333668518066407, "loss": 0.3354598581790924, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 14.063786624382356, "step_time": 1.0222806510925293} +{"epoch": 0, "iter": 2011, "iter_tflops": 26.07956925317141, "iter_time": 0.5512796936035156, "loss": 0.1517370343208313, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.85308387218001, "step_time": 0.516177562713623} +{"epoch": 0, "iter": 2012, "iter_tflops": 27.001856459118866, "iter_time": 0.5324499435424804, "loss": 0.37780722975730896, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 28.65409321898446, "step_time": 0.501748104095459} +{"epoch": 0, "iter": 2013, "iter_tflops": 35.544707749103836, "iter_time": 0.5804265899658203, "loss": 0.2695796489715576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.307629006939514, "step_time": 0.5385635719299318} +{"epoch": 0, "iter": 2014, "iter_tflops": 13.270316614812883, "iter_time": 1.5546798248291016, "loss": 0.1618800312280655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.3269733332526, "step_time": 1.3460644226074217} +{"epoch": 0, "iter": 2015, "iter_tflops": 52.53798487718729, "iter_time": 0.39268909072875974, "loss": 0.13995203375816345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.30248435846225, "step_time": 0.36003837776184083} +{"epoch": 0, "iter": 2016, "iter_tflops": 52.43337035695908, "iter_time": 0.39347257995605467, "loss": 0.19660162925720215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.956114215194816, "step_time": 0.36222789764404295} +{"epoch": 0, "iter": 2017, "iter_tflops": 36.224293025566304, "iter_time": 0.5695375061035156, "loss": 0.07313477247953415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82117195908012, "step_time": 0.531439224243164} +{"epoch": 0, "iter": 2018, "iter_tflops": 11.459640111465225, "iter_time": 1.8003264770507812, "loss": 0.004658385645598173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.270448990248829, "step_time": 1.4457214012145998} +{"epoch": 0, "iter": 2019, "iter_tflops": 49.74404362564447, "iter_time": 0.41474500274658194, "loss": 0.005918483715504408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.762522855576165, "step_time": 0.37673745536804204} +{"epoch": 0, "iter": 2020, "iter_tflops": 59.89818943231556, "iter_time": 0.3444360122680664, "loss": 0.007241083774715662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.6937071219915, "step_time": 0.31404976844787597} +{"epoch": 0, "iter": 2021, "iter_tflops": 32.56514240189616, "iter_time": 0.6335330352783204, "loss": 0.5074195861816406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.495351160744086, "step_time": 0.5980833015441894} +{"epoch": 0, "iter": 2022, "iter_tflops": 11.044420899033009, "iter_time": 1.8680104370117188, "loss": 0.474933385848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.849905244020562, "step_time": 1.4896198310852053} +{"epoch": 0, "iter": 2023, "iter_tflops": 45.03315998102282, "iter_time": 0.4581311531066895, "loss": 0.4749360680580139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.697272479089925, "step_time": 0.42366014480590825} +{"epoch": 0, "iter": 2024, "iter_tflops": 51.9891251162171, "iter_time": 0.39683478927612303, "loss": 0.40707385540008545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.149667386452414, "step_time": 0.36743037796020506} +{"epoch": 0, "iter": 2025, "iter_tflops": 36.77084759297315, "iter_time": 0.5610720138549805, "loss": 0.05143710970878601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49927020925644, "step_time": 0.5223158149719238} +{"epoch": 0, "iter": 2026, "iter_tflops": 23.96182121025027, "iter_time": 0.8609985580444336, "loss": 0.05042107775807381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.072815994740942, "step_time": 0.6860379657745361} +{"epoch": 0, "iter": 2027, "iter_tflops": 47.28191395120755, "iter_time": 0.43634218215942383, "loss": 0.04302871227264404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20748580274051, "step_time": 0.40289213943481444} +{"epoch": 0, "iter": 2028, "iter_tflops": 57.53901414262986, "iter_time": 0.35855834197998043, "loss": 0.021453477442264557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.90434506529997, "step_time": 0.3279756507873535} +{"epoch": 0, "iter": 2029, "iter_tflops": 37.22312316907766, "iter_time": 0.554254768371582, "loss": 0.26790183782577515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.00901938220549, "step_time": 0.5156610641479492} +{"epoch": 0, "iter": 2030, "iter_tflops": 13.724437469445864, "iter_time": 1.5032378234863282, "loss": 0.23274728655815125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.10821512317758, "step_time": 1.2807808532714844} +{"epoch": 0, "iter": 2031, "iter_tflops": 29.802547978918355, "iter_time": 0.6922593841552734, "loss": 0.34545814990997314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.48625927701166, "step_time": 0.5360638809204101} +{"epoch": 0, "iter": 2032, "iter_tflops": 49.44025883969928, "iter_time": 0.4172933959960938, "loss": 0.1943950206041336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21350702237051, "step_time": 0.3877040748596192} +{"epoch": 0, "iter": 2033, "iter_tflops": 34.464398390999534, "iter_time": 0.5986204452514648, "loss": 0.8280671238899231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.87721413690135, "step_time": 0.5594536895751953} +{"epoch": 0, "iter": 2034, "iter_tflops": 35.205661495369526, "iter_time": 0.586016357421875, "loss": 0.7111486792564392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.252186735062764, "step_time": 0.46621636199951166} +{"epoch": 0, "iter": 2035, "iter_tflops": 45.36546364950457, "iter_time": 0.4547753257751465, "loss": 0.6872566938400269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85380396974558, "step_time": 0.4223027038574219} +{"epoch": 0, "iter": 2036, "iter_tflops": 45.8600087273326, "iter_time": 0.4498711204528808, "loss": 0.6282714009284973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.05618199286338, "step_time": 0.42056052207946776} +{"epoch": 0, "iter": 2037, "iter_tflops": 34.759635461226104, "iter_time": 0.5935359573364257, "loss": 0.08080365508794785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.10654627975284, "step_time": 0.5559960594177246} +{"epoch": 0, "iter": 2038, "iter_tflops": 16.203463875344358, "iter_time": 1.2732520446777345, "loss": 0.12250129878520966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.398763963612822, "step_time": 0.9641254768371582} +{"epoch": 0, "iter": 2039, "iter_tflops": 39.49151651903872, "iter_time": 0.5224183654785157, "loss": 0.0842684954404831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.352551519661766, "step_time": 0.47589110183715827} +{"epoch": 0, "iter": 2040, "iter_tflops": 40.3452983154446, "iter_time": 0.5113630180358887, "loss": 0.13849975168704987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81506649431801, "step_time": 0.470867561340332} +{"epoch": 0, "iter": 2041, "iter_tflops": 22.66480308125334, "iter_time": 0.9102701416015626, "loss": 0.01711130142211914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.592602864226183, "step_time": 0.8389145965576171} +{"epoch": 0, "iter": 2042, "iter_tflops": 27.48861918060966, "iter_time": 0.7505321884155274, "loss": 0.008120911195874214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.027894018210898, "step_time": 0.6649208450317383} +{"epoch": 0, "iter": 2043, "iter_tflops": 52.87284198245988, "iter_time": 0.3902020912170411, "loss": 0.014236512593925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.74774365906885, "step_time": 0.3572623310089112} +{"epoch": 0, "iter": 2044, "iter_tflops": 62.04020902381067, "iter_time": 0.3325439071655274, "loss": 0.004378091543912888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.0218210091352, "step_time": 0.3033011054992676} +{"epoch": 0, "iter": 2045, "iter_tflops": 23.66582580808841, "iter_time": 0.871767318725586, "loss": 1.0414011478424072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.791276745518793, "step_time": 0.832191650390625} +{"epoch": 0, "iter": 2046, "iter_tflops": 12.723974036455242, "iter_time": 1.6214347381591798, "loss": 0.688237190246582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.35033640024299, "step_time": 1.189088962554932} +{"epoch": 0, "iter": 2047, "iter_tflops": 12.641637938134071, "iter_time": 1.6319952850341795, "loss": 0.7720578908920288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.364465013943525, "step_time": 1.4362590942382814} +{"epoch": 0, "iter": 2048, "iter_tflops": 19.20058871175015, "iter_time": 1.0745031738281252, "loss": 0.6581771373748779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.747380520814758, "step_time": 0.9069656829833984} +{"epoch": 0, "iter": 2049, "iter_tflops": 20.274241106124258, "iter_time": 0.7413502807617188, "loss": 0.40055474638938904, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 22.145557634984225, "step_time": 0.6787056159973145} +{"epoch": 0, "iter": 2050, "iter_tflops": 21.844510007167425, "iter_time": 0.6880591201782227, "loss": 0.36926981806755066, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.33600073988217, "step_time": 0.6440826988220215} +{"epoch": 0, "iter": 2051, "iter_tflops": 22.477294248412974, "iter_time": 0.6686887741088866, "loss": 0.3611176609992981, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.232499549825814, "step_time": 0.6202543945312501} +{"epoch": 0, "iter": 2052, "iter_tflops": 22.841497906271055, "iter_time": 0.6580266494750976, "loss": 0.3156314790248871, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.585868940497758, "step_time": 0.6113395614624024} +{"epoch": 0, "iter": 2053, "iter_tflops": 21.513786242459393, "iter_time": 0.9589708328247071, "loss": 0.5884355902671814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.135608173022664, "step_time": 0.891746322631836} +{"epoch": 0, "iter": 2054, "iter_tflops": 30.42605771137435, "iter_time": 0.6780731735229493, "loss": 0.5353894829750061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.4617268869234, "step_time": 0.6165579433441162} +{"epoch": 0, "iter": 2055, "iter_tflops": 46.73192389771897, "iter_time": 0.44147751235961913, "loss": 0.7449979186058044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.524067241169426, "step_time": 0.40834189796447756} +{"epoch": 0, "iter": 2056, "iter_tflops": 51.199403224210506, "iter_time": 0.40295574188232425, "loss": 0.6661609411239624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.049133019127815, "step_time": 0.37477599334716793} +{"epoch": 0, "iter": 2057, "iter_tflops": 39.86078930022974, "iter_time": 0.4646313858032226, "loss": 0.004301536828279495, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 43.312768092307685, "step_time": 0.427600788116455} +{"epoch": 0, "iter": 2058, "iter_tflops": 47.02884140511424, "iter_time": 0.39381309890747074, "loss": 0.003553560934960842, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 53.560158137820885, "step_time": 0.3457901248931885} +{"epoch": 0, "iter": 2059, "iter_tflops": 53.08413827193664, "iter_time": 0.3488909187316894, "loss": 0.010270815342664719, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 57.823454972578624, "step_time": 0.32029517745971675} +{"epoch": 0, "iter": 2060, "iter_tflops": 55.10991728693197, "iter_time": 0.3360660781860352, "loss": 0.01954452320933342, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 60.294358313263814, "step_time": 0.3071692657470703} +{"epoch": 0, "iter": 2061, "iter_tflops": 37.943229043118876, "iter_time": 0.54373583984375, "loss": 0.5449199676513672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.7738951131931, "step_time": 0.5059878005981445} +{"epoch": 0, "iter": 2062, "iter_tflops": 10.041880012449106, "iter_time": 2.0545050811767576, "loss": 0.610266387462616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.722428426044756, "step_time": 1.621631721496582} +{"epoch": 0, "iter": 2063, "iter_tflops": 16.670885036059683, "iter_time": 1.2375523834228515, "loss": 0.5855490565299988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.19554004581165, "step_time": 1.0215668144226075} +{"epoch": 0, "iter": 2064, "iter_tflops": 16.19873008067941, "iter_time": 1.2736241302490234, "loss": 0.6069445610046387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.11444585176978, "step_time": 0.9771079788208007} +{"epoch": 0, "iter": 2065, "iter_tflops": 20.33400597527139, "iter_time": 0.8217561798095704, "loss": 0.3619542419910431, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 21.449662916989727, "step_time": 0.7790143432617187} +{"epoch": 0, "iter": 2066, "iter_tflops": 10.06770170281515, "iter_time": 1.6597229003906249, "loss": 0.27153709530830383, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 12.483473153885061, "step_time": 1.3385373497009279} +{"epoch": 0, "iter": 2067, "iter_tflops": 24.73757379673946, "iter_time": 0.6754742889404297, "loss": 0.1800116002559662, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.580570904521664, "step_time": 0.6286394348144531} +{"epoch": 0, "iter": 2068, "iter_tflops": 26.31147148075975, "iter_time": 0.6350688171386719, "loss": 0.3182331919670105, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 28.291206865790386, "step_time": 0.5906285705566406} +{"epoch": 0, "iter": 2069, "iter_tflops": 22.081053668922863, "iter_time": 0.9343346481323241, "loss": 0.8177550435066223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.717479964798233, "step_time": 0.8698687019348145} +{"epoch": 0, "iter": 2070, "iter_tflops": 19.50627725920972, "iter_time": 1.057664321899414, "loss": 0.8605721592903137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.846109753790078, "step_time": 0.8651764888763427} +{"epoch": 0, "iter": 2071, "iter_tflops": 47.41331098916256, "iter_time": 0.43513294219970705, "loss": 1.0466887950897217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44829365711529, "step_time": 0.4010063705444336} +{"epoch": 0, "iter": 2072, "iter_tflops": 47.029231988345416, "iter_time": 0.43868659210205085, "loss": 0.927571713924408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90307563947686, "step_time": 0.4053015117645264} +{"epoch": 0, "iter": 2073, "iter_tflops": 37.56476695759171, "iter_time": 0.5492139358520508, "loss": 0.75906902551651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.75749276369512, "step_time": 0.5061914291381836} +{"epoch": 0, "iter": 2074, "iter_tflops": 38.528494068414744, "iter_time": 0.5354762496948242, "loss": 0.9917805194854736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09959777956641, "step_time": 0.49005440902709957} +{"epoch": 0, "iter": 2075, "iter_tflops": 42.16497828465915, "iter_time": 0.4892945365905762, "loss": 0.9445934891700745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.963978289016026, "step_time": 0.44885352134704587} +{"epoch": 0, "iter": 2076, "iter_tflops": 38.88335636737335, "iter_time": 0.5305893173217773, "loss": 0.9006488919258118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47594228864414, "step_time": 0.48571243858337404} +{"epoch": 0, "iter": 2077, "iter_tflops": 36.29970895438308, "iter_time": 0.5683542404174805, "loss": 0.6522340178489685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06227731201918, "step_time": 0.5149755554199218} +{"epoch": 0, "iter": 2078, "iter_tflops": 40.853994609924776, "iter_time": 0.5049957466125489, "loss": 0.6221900582313538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7522507300543, "step_time": 0.4610068359375} +{"epoch": 0, "iter": 2079, "iter_tflops": 39.83512479567423, "iter_time": 0.517912109375, "loss": 0.5113037824630737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.679875211490334, "step_time": 0.4723249187469482} +{"epoch": 0, "iter": 2080, "iter_tflops": 41.88725082458123, "iter_time": 0.49253873443603513, "loss": 0.5091495513916016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.641931920936486, "step_time": 0.452020601272583} +{"epoch": 0, "iter": 2081, "iter_tflops": 20.735361981534002, "iter_time": 0.9949714660644531, "loss": 0.25572818517684937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.323727303139204, "step_time": 0.9241778144836426} +{"epoch": 0, "iter": 2082, "iter_tflops": 19.996315455558516, "iter_time": 1.0317447509765625, "loss": 0.3051814138889313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.92373121972372, "step_time": 0.8623693904876709} +{"epoch": 0, "iter": 2083, "iter_tflops": 38.589566315568774, "iter_time": 0.5346287994384765, "loss": 0.22771869599819183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09431718249156, "step_time": 0.4901158847808838} +{"epoch": 0, "iter": 2084, "iter_tflops": 44.783119724728415, "iter_time": 0.4606890640258789, "loss": 0.30900025367736816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09589671645787, "step_time": 0.4202203216552734} +{"epoch": 0, "iter": 2085, "iter_tflops": 27.090375025614584, "iter_time": 0.7615654449462891, "loss": 0.48249271512031555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.172303646847134, "step_time": 0.7072150955200195} +{"epoch": 0, "iter": 2086, "iter_tflops": 9.25367865716633, "iter_time": 2.2295018310546872, "loss": 0.34790006279945374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.601729995137708, "step_time": 1.7782773361206055} +{"epoch": 0, "iter": 2087, "iter_tflops": 10.945971477547364, "iter_time": 1.884811553955078, "loss": 0.31803348660469055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.008696310700383, "step_time": 1.374609298706055} +{"epoch": 0, "iter": 2088, "iter_tflops": 31.668503077087863, "iter_time": 0.6514704360961914, "loss": 0.33335477113723755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.913392083789674, "step_time": 0.4397698097229004} +{"epoch": 0, "iter": 2089, "iter_tflops": 18.765000284605975, "iter_time": 0.8336671981811523, "loss": 0.2351435124874115, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 19.732468585603886, "step_time": 0.7927931137084961} +{"epoch": 0, "iter": 2090, "iter_tflops": 21.5157255220328, "iter_time": 0.7270851821899413, "loss": 0.26972663402557373, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.901509099330394, "step_time": 0.6039711875915528} +{"epoch": 0, "iter": 2091, "iter_tflops": 24.044998039998138, "iter_time": 0.6506037216186523, "loss": 0.34533560276031494, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.958940891232203, "step_time": 0.6026349563598633} +{"epoch": 0, "iter": 2092, "iter_tflops": 22.87807878988496, "iter_time": 0.6837884140014648, "loss": 0.24820661544799805, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 24.510011619672024, "step_time": 0.6382602119445802} +{"epoch": 0, "iter": 2093, "iter_tflops": 22.044431666611196, "iter_time": 0.9358868408203125, "loss": 0.028960473835468292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.673156214136448, "step_time": 0.8714973754882812} +{"epoch": 0, "iter": 2094, "iter_tflops": 15.11316198212553, "iter_time": 1.365107681274414, "loss": 0.03931138291954994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.150643565496164, "step_time": 1.2029340724945068} +{"epoch": 0, "iter": 2095, "iter_tflops": 44.04374946641979, "iter_time": 0.46842273330688483, "loss": 0.0675133466720581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73424399257232, "step_time": 0.4233387413024902} +{"epoch": 0, "iter": 2096, "iter_tflops": 44.75178747474112, "iter_time": 0.4610116081237793, "loss": 0.03059283271431923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27315768219103, "step_time": 0.4187085723876953} +{"epoch": 0, "iter": 2097, "iter_tflops": 32.74393656159022, "iter_time": 0.6300737075805664, "loss": 0.9341499209403992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.5172832543669, "step_time": 0.5808747634887695} +{"epoch": 0, "iter": 2098, "iter_tflops": 36.831599513006886, "iter_time": 0.5601465530395509, "loss": 0.9486710429191589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10380683233104, "step_time": 0.5144422721862792} +{"epoch": 0, "iter": 2099, "iter_tflops": 35.071952146216496, "iter_time": 0.588250503540039, "loss": 0.7503706812858582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21206906778869, "step_time": 0.539910400390625} +{"epoch": 0, "iter": 2100, "iter_tflops": 40.24624806220769, "iter_time": 0.5126215362548828, "loss": 0.9762635827064514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.855823756408356, "step_time": 0.47042996215820315} +{"epoch": 0, "iter": 2101, "iter_tflops": 22.84447652644838, "iter_time": 0.9031108016967774, "loss": 0.6258305907249451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.203941691736688, "step_time": 0.8523856887817383} +{"epoch": 0, "iter": 2102, "iter_tflops": 19.573764088584987, "iter_time": 1.0540176849365235, "loss": 0.8127812147140503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.428716030884004, "step_time": 0.844542688369751} +{"epoch": 0, "iter": 2103, "iter_tflops": 50.714689394396196, "iter_time": 0.40680705642700193, "loss": 0.6756016612052917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.921189844904184, "step_time": 0.3756490631103515} +{"epoch": 0, "iter": 2104, "iter_tflops": 48.75837456514298, "iter_time": 0.42312923049926754, "loss": 0.5952030420303345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78690192076868, "step_time": 0.3908373622894287} +{"epoch": 0, "iter": 2105, "iter_tflops": 38.6686336829936, "iter_time": 0.5335356216430664, "loss": 0.08784637600183487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.51269060606738, "step_time": 0.49698280715942383} +{"epoch": 0, "iter": 2106, "iter_tflops": 44.84226351161587, "iter_time": 0.4600814476013184, "loss": 0.12217288464307785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.78226143062069, "step_time": 0.4144266033172607} +{"epoch": 0, "iter": 2107, "iter_tflops": 54.8879814230836, "iter_time": 0.37587633895874023, "loss": 0.06400132924318314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.95349373029667, "step_time": 0.3441182861328125} +{"epoch": 0, "iter": 2108, "iter_tflops": 48.231641993350564, "iter_time": 0.4277501792907715, "loss": 0.0670490637421608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32817976603895, "step_time": 0.39426354217529297} +{"epoch": 0, "iter": 2109, "iter_tflops": 25.643977853025383, "iter_time": 0.6419967803955078, "loss": 0.006626409478485584, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.27633766786704, "step_time": 0.6035763092041017} +{"epoch": 0, "iter": 2110, "iter_tflops": 9.778775083919188, "iter_time": 1.6835801086425781, "loss": 0.0033481582067906857, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 12.673194656422119, "step_time": 1.2990687561035155} +{"epoch": 0, "iter": 2111, "iter_tflops": 8.513654024391721, "iter_time": 1.933758544921875, "loss": 0.006692322436720133, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 9.448130068319966, "step_time": 1.7424983673095704} +{"epoch": 0, "iter": 2112, "iter_tflops": 24.384279993958128, "iter_time": 0.6751624908447265, "loss": 0.0054277339950203896, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 30.032806383042132, "step_time": 0.5481789150238037} +{"epoch": 0, "iter": 2113, "iter_tflops": 14.124095908074427, "iter_time": 1.012140869140625, "loss": 0.3436810076236725, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 15.003818903438393, "step_time": 0.9527957382202148} +{"epoch": 0, "iter": 2114, "iter_tflops": 21.25975274753036, "iter_time": 0.67242431640625, "loss": 0.4403945207595825, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 22.834403202171142, "step_time": 0.6260542297363281} +{"epoch": 0, "iter": 2115, "iter_tflops": 23.377672359034015, "iter_time": 0.6115054779052734, "loss": 0.4313999116420746, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 25.05542171177899, "step_time": 0.5705581359863282} +{"epoch": 0, "iter": 2116, "iter_tflops": 23.210118756389424, "iter_time": 0.6159199295043944, "loss": 0.32860177755355835, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.88150499253685, "step_time": 0.5745462226867676} +{"epoch": 0, "iter": 2117, "iter_tflops": 33.74676925234234, "iter_time": 0.6113501815795899, "loss": 0.939916729927063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.82557782530877, "step_time": 0.5602381477355957} +{"epoch": 0, "iter": 2118, "iter_tflops": 36.59633174107936, "iter_time": 0.5637475814819336, "loss": 0.9451339840888977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.47353727149464, "step_time": 0.5097427825927735} +{"epoch": 0, "iter": 2119, "iter_tflops": 34.957157741375745, "iter_time": 0.5901822357177734, "loss": 0.8207418918609619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.857071504172964, "step_time": 0.5449733085632325} +{"epoch": 0, "iter": 2120, "iter_tflops": 36.369964999750316, "iter_time": 0.56725634765625, "loss": 0.7806830406188965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.16590311365353, "step_time": 0.5267615928649901} +{"epoch": 0, "iter": 2121, "iter_tflops": 21.859217749949476, "iter_time": 0.9438166427612303, "loss": 0.09855089336633682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.239612535560433, "step_time": 0.887755485534668} +{"epoch": 0, "iter": 2122, "iter_tflops": 17.7108243382379, "iter_time": 1.1648861236572265, "loss": 0.1976136565208435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.922265352225946, "step_time": 1.0355796966552735} +{"epoch": 0, "iter": 2123, "iter_tflops": 42.7094560066708, "iter_time": 0.48305680847167976, "loss": 0.13158468902111053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.15555845872742, "step_time": 0.43751138114929194} +{"epoch": 0, "iter": 2124, "iter_tflops": 50.02607231339442, "iter_time": 0.41240682220458985, "loss": 0.15088018774986267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69165231202766, "step_time": 0.3772256393432617} +{"epoch": 0, "iter": 2125, "iter_tflops": 37.24677280696661, "iter_time": 0.5539028472900391, "loss": 0.6635732054710388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.219503455870694, "step_time": 0.5005177593231201} +{"epoch": 0, "iter": 2126, "iter_tflops": 34.30122725051771, "iter_time": 0.6014680862426758, "loss": 0.3623393177986145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20201114597073, "step_time": 0.5400525493621826} +{"epoch": 0, "iter": 2127, "iter_tflops": 38.209219863135026, "iter_time": 0.5399506607055664, "loss": 0.3861303925514221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.64878669062664, "step_time": 0.49535881233215334} +{"epoch": 0, "iter": 2128, "iter_tflops": 46.67266693787687, "iter_time": 0.44203802490234373, "loss": 0.5206729173660278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.844953986143175, "step_time": 0.4057648181915283} +{"epoch": 0, "iter": 2129, "iter_tflops": 20.591928752586405, "iter_time": 1.0019019470214845, "loss": 0.8012779355049133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.89195447502968, "step_time": 0.9424052810668946} +{"epoch": 0, "iter": 2130, "iter_tflops": 37.96935954997822, "iter_time": 0.5433616409301758, "loss": 0.9247267246246338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43029244219219, "step_time": 0.49797122573852537} +{"epoch": 0, "iter": 2131, "iter_tflops": 46.994492557590085, "iter_time": 0.43901087951660156, "loss": 1.0848766565322876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82546501947121, "step_time": 0.40592040824890135} +{"epoch": 0, "iter": 2132, "iter_tflops": 46.486431353890474, "iter_time": 0.44380893325805665, "loss": 1.030212163925171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92607420956505, "step_time": 0.41323284149169925} +{"epoch": 0, "iter": 2133, "iter_tflops": 34.62006704276221, "iter_time": 0.5959287567138671, "loss": 0.8993456959724426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.972684546485375, "step_time": 0.5580090751647949} +{"epoch": 0, "iter": 2134, "iter_tflops": 11.830068348328256, "iter_time": 1.743953872680664, "loss": 0.9423214197158813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.508844287931698, "step_time": 1.4219667053222655} +{"epoch": 0, "iter": 2135, "iter_tflops": 40.484930833916756, "iter_time": 0.5095993270874024, "loss": 1.0226908922195435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96545200405431, "step_time": 0.46925694084167485} +{"epoch": 0, "iter": 2136, "iter_tflops": 47.192892946833, "iter_time": 0.43716526412963863, "loss": 0.9217323064804077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.610766547845884, "step_time": 0.4076423835754394} +{"epoch": 0, "iter": 2137, "iter_tflops": 40.57381859505861, "iter_time": 0.5084829139709472, "loss": 0.3773747980594635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88428190393143, "step_time": 0.47012489700317384} +{"epoch": 0, "iter": 2138, "iter_tflops": 13.846713224676446, "iter_time": 1.4899632263183593, "loss": 0.3906659185886383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.58336265549997, "step_time": 1.1733303756713866} +{"epoch": 0, "iter": 2139, "iter_tflops": 38.75649804797949, "iter_time": 0.5323260498046876, "loss": 0.41149353981018066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.24251242097117, "step_time": 0.48839645957946776} +{"epoch": 0, "iter": 2140, "iter_tflops": 41.220419393788625, "iter_time": 0.5005066375732422, "loss": 0.37270793318748474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78271979582655, "step_time": 0.4606931781768799} +{"epoch": 0, "iter": 2141, "iter_tflops": 23.312112152831407, "iter_time": 0.8849946060180663, "loss": 0.5079179406166077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.252204913274255, "step_time": 0.817001667022705} +{"epoch": 0, "iter": 2142, "iter_tflops": 43.258923716054554, "iter_time": 0.4769211006164551, "loss": 0.6014013290405273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89514950193078, "step_time": 0.4399408836364746} +{"epoch": 0, "iter": 2143, "iter_tflops": 54.0703963958596, "iter_time": 0.381559871673584, "loss": 0.6398342251777649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.809415053642695, "step_time": 0.35081276512146} +{"epoch": 0, "iter": 2144, "iter_tflops": 49.85489396508637, "iter_time": 0.4138228340148926, "loss": 0.6035416722297668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.32380046025649, "step_time": 0.3869021587371826} +{"epoch": 0, "iter": 2145, "iter_tflops": 41.61873605808519, "iter_time": 0.49571648406982427, "loss": 0.5579584240913391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22634450498579, "step_time": 0.4561742439270019} +{"epoch": 0, "iter": 2146, "iter_tflops": 17.519810338460516, "iter_time": 1.1775865783691404, "loss": 0.5335077047348022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.589515707794863, "step_time": 1.053170166015625} +{"epoch": 0, "iter": 2147, "iter_tflops": 46.55622135797812, "iter_time": 0.4431436424255371, "loss": 0.448652058839798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.35735002048177, "step_time": 0.40969378852844235} +{"epoch": 0, "iter": 2148, "iter_tflops": 48.693829360393046, "iter_time": 0.42369010162353515, "loss": 0.42926058173179626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.437156738797185, "step_time": 0.3934441680908203} +{"epoch": 0, "iter": 2149, "iter_tflops": 30.315401340998818, "iter_time": 0.6805482559204101, "loss": 0.7423272132873535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.15518495042052, "step_time": 0.641610164642334} +{"epoch": 0, "iter": 2150, "iter_tflops": 23.555956216625628, "iter_time": 0.8758334121704101, "loss": 0.6611841320991516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.173607544853304, "step_time": 0.7071834869384765} +{"epoch": 0, "iter": 2151, "iter_tflops": 48.17546256373101, "iter_time": 0.42824899673461914, "loss": 0.804531991481781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.096290036348435, "step_time": 0.39601847839355464} +{"epoch": 0, "iter": 2152, "iter_tflops": 46.402428100711475, "iter_time": 0.44461236953735356, "loss": 0.725842297077179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05877698759414, "step_time": 0.4121373863220215} +{"epoch": 0, "iter": 2153, "iter_tflops": 38.30533173773408, "iter_time": 0.5385958709716796, "loss": 0.8993158340454102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38976926621402, "step_time": 0.49845877075195316} +{"epoch": 0, "iter": 2154, "iter_tflops": 43.2791076160701, "iter_time": 0.4766986808776855, "loss": 0.5728521347045898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45117230427548, "step_time": 0.44414580917358404} +{"epoch": 0, "iter": 2155, "iter_tflops": 43.26381028194342, "iter_time": 0.47686723327636726, "loss": 1.1559463739395142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.38431889944147, "step_time": 0.44478595352172856} +{"epoch": 0, "iter": 2156, "iter_tflops": 47.60559642353656, "iter_time": 0.4333753814697266, "loss": 0.9811210036277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.047660861917215, "step_time": 0.4041535530090332} +{"epoch": 0, "iter": 2157, "iter_tflops": 37.68364034295005, "iter_time": 0.38910129928588866, "loss": 0.07236730307340622, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 41.58230467358462, "step_time": 0.35262002754211424} +{"epoch": 0, "iter": 2158, "iter_tflops": 35.37968969316961, "iter_time": 0.4144398536682129, "loss": 0.07864148914813995, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 38.87688022325524, "step_time": 0.37715869522094725} +{"epoch": 0, "iter": 2159, "iter_tflops": 42.298034392004794, "iter_time": 0.34665330505371095, "loss": 0.027053343132138252, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 46.20527693672998, "step_time": 0.3173393688201904} +{"epoch": 0, "iter": 2160, "iter_tflops": 40.73726982415089, "iter_time": 0.35993461227416995, "loss": 0.0492994524538517, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 44.51764164464581, "step_time": 0.32936950111389157} +{"epoch": 0, "iter": 2161, "iter_tflops": 25.80185792589944, "iter_time": 0.7995972061157227, "loss": 0.38623955845832825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.132887151578917, "step_time": 0.7603722152709961} +{"epoch": 0, "iter": 2162, "iter_tflops": 15.454549691088856, "iter_time": 1.3349527435302735, "loss": 0.3367224931716919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.614474160548156, "step_time": 1.1083360900878905} +{"epoch": 0, "iter": 2163, "iter_tflops": 37.29613031719126, "iter_time": 0.5531698150634765, "loss": 0.3098146319389343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53956324334587, "step_time": 0.5089125747680664} +{"epoch": 0, "iter": 2164, "iter_tflops": 42.28881848873114, "iter_time": 0.4878616676330566, "loss": 0.36739426851272583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.09920890816289, "step_time": 0.44753682327270505} +{"epoch": 0, "iter": 2165, "iter_tflops": 22.320037049760707, "iter_time": 0.924330612182617, "loss": 0.3531889319419861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.68879017762007, "step_time": 0.8709222106933594} +{"epoch": 0, "iter": 2166, "iter_tflops": 15.830780575961864, "iter_time": 1.303226547241211, "loss": 0.33210399746894836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.7788036884003, "step_time": 1.098637264251709} +{"epoch": 0, "iter": 2167, "iter_tflops": 51.146659898484295, "iter_time": 0.40337127685546875, "loss": 0.3423882722854614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.86006483780197, "step_time": 0.36933529472351073} +{"epoch": 0, "iter": 2168, "iter_tflops": 49.058908219091805, "iter_time": 0.4205371513366699, "loss": 0.38189518451690674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67947772476477, "step_time": 0.3916343593597412} +{"epoch": 0, "iter": 2169, "iter_tflops": 41.95078259000716, "iter_time": 0.49179281616210935, "loss": 0.1340833157300949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.763359152982716, "step_time": 0.45082122230529786} +{"epoch": 0, "iter": 2170, "iter_tflops": 50.963784969036375, "iter_time": 0.4048187065124512, "loss": 0.12181135267019272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.01286601676731, "step_time": 0.3683277606964111} +{"epoch": 0, "iter": 2171, "iter_tflops": 51.90422904547976, "iter_time": 0.39748386383056644, "loss": 0.10924559086561203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.58110537815099, "step_time": 0.36462867546081534} +{"epoch": 0, "iter": 2172, "iter_tflops": 58.22714682399179, "iter_time": 0.3543208732604981, "loss": 0.06723099201917648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.348805017099494, "step_time": 0.3256745491027832} +{"epoch": 0, "iter": 2173, "iter_tflops": 46.78655019878178, "iter_time": 0.4409620590209961, "loss": 0.9280686378479004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.72559133616841, "step_time": 0.4067196254730225} +{"epoch": 0, "iter": 2174, "iter_tflops": 12.891326472074466, "iter_time": 1.6003856201171875, "loss": 0.809287428855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.870438200630021, "step_time": 1.4874146881103516} +{"epoch": 0, "iter": 2175, "iter_tflops": 16.041339621463656, "iter_time": 1.2861203613281251, "loss": 0.8249478340148926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.832124933966305, "step_time": 1.0955265846252442} +{"epoch": 0, "iter": 2176, "iter_tflops": 31.517080008552416, "iter_time": 0.6546004104614258, "loss": 0.706670343875885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81761859963823, "step_time": 0.5181398143768311} +{"epoch": 0, "iter": 2177, "iter_tflops": 15.621449627716228, "iter_time": 0.972624038696289, "loss": 0.13039979338645935, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 16.80388277841153, "step_time": 0.9041837310791017} +{"epoch": 0, "iter": 2178, "iter_tflops": 6.1680356469327755, "iter_time": 2.463312194824219, "loss": 0.22510796785354614, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 7.990241440419454, "step_time": 1.901544219970703} +{"epoch": 0, "iter": 2179, "iter_tflops": 9.364249574847621, "iter_time": 1.6225323028564451, "loss": 0.4018918573856354, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 11.774069782845894, "step_time": 1.2904456748962403} +{"epoch": 0, "iter": 2180, "iter_tflops": 23.82201735933671, "iter_time": 0.637804817199707, "loss": 0.2990882098674774, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.57809873164979, "step_time": 0.5940159034729005} +{"epoch": 0, "iter": 2181, "iter_tflops": 10.587845323503565, "iter_time": 1.4543334350585937, "loss": 0.4284326732158661, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 11.20676691675346, "step_time": 1.3740142517089842} +{"epoch": 0, "iter": 2182, "iter_tflops": 14.21837810381665, "iter_time": 1.082982696533203, "loss": 0.24398162961006165, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 16.885102261229484, "step_time": 0.9119433937072754} +{"epoch": 0, "iter": 2183, "iter_tflops": 27.844550677495185, "iter_time": 0.5530079345703125, "loss": 0.42748361825942993, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.537608011855244, "step_time": 0.5213102378845216} +{"epoch": 0, "iter": 2184, "iter_tflops": 26.774598168084758, "iter_time": 0.5751069488525391, "loss": 0.4744575321674347, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.413974778234792, "step_time": 0.5419254989624024} +{"epoch": 0, "iter": 2185, "iter_tflops": 24.19228736401697, "iter_time": 0.4450131225585937, "loss": 0.012121099978685379, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 26.737981223376867, "step_time": 0.4026439113616943} +{"epoch": 0, "iter": 2186, "iter_tflops": 29.268158250939845, "iter_time": 0.3678361053466797, "loss": 0.007130592130124569, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 32.368256989008394, "step_time": 0.33260627365112305} +{"epoch": 0, "iter": 2187, "iter_tflops": 31.119569419296784, "iter_time": 0.34595225906372074, "loss": 0.004972339607775211, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 34.12128776355572, "step_time": 0.3155181427001953} +{"epoch": 0, "iter": 2188, "iter_tflops": 32.36793735880012, "iter_time": 0.33260955810546877, "loss": 0.003865211270749569, "lr": 3e-05, "seqlen": 4336.0, "step_tflops": 35.45218641716281, "step_time": 0.3036733818054199} +{"epoch": 0, "iter": 2189, "iter_tflops": 2.5833581446573075, "iter_time": 0.5914732131958007, "loss": 1.0563313961029053, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.753369745242261, "step_time": 0.5549516716003418} +{"epoch": 0, "iter": 2190, "iter_tflops": 0.8386497888090862, "iter_time": 1.8219609222412112, "loss": 1.202194333076477, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.0403035392182414, "step_time": 1.4687897186279295} +{"epoch": 0, "iter": 2191, "iter_tflops": 0.7743101290056754, "iter_time": 1.9733529052734375, "loss": 1.146106243133545, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 0.9321780033034244, "step_time": 1.63915811920166} +{"epoch": 0, "iter": 2192, "iter_tflops": 2.5368221292741686, "iter_time": 0.6023233261108397, "loss": 1.3946762084960938, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.2613772938430032, "step_time": 0.4685097751617432} +{"epoch": 0, "iter": 2193, "iter_tflops": 17.912558658907475, "iter_time": 0.7480581665039062, "loss": 0.3855331838130951, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 19.001032523747792, "step_time": 0.7052056655883788} +{"epoch": 0, "iter": 2194, "iter_tflops": 7.153033733601648, "iter_time": 1.8732801055908206, "loss": 0.4276517927646637, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 9.045765372412482, "step_time": 1.4813158683776857} +{"epoch": 0, "iter": 2195, "iter_tflops": 20.596442240749834, "iter_time": 0.6505801162719727, "loss": 0.2945467233657837, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 22.17868183757333, "step_time": 0.604167366027832} +{"epoch": 0, "iter": 2196, "iter_tflops": 21.540155878612072, "iter_time": 0.6220770111083984, "loss": 0.3294578492641449, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 22.97621875847083, "step_time": 0.5831958656311036} +{"epoch": 0, "iter": 2197, "iter_tflops": 25.84034742985472, "iter_time": 0.7984061965942383, "loss": 0.3925086259841919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.865119022174678, "step_time": 0.7403913650512696} +{"epoch": 0, "iter": 2198, "iter_tflops": 9.831577682493968, "iter_time": 2.0984519653320315, "loss": 0.2719659209251404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.43639176252222, "step_time": 1.8039862518310545} +{"epoch": 0, "iter": 2199, "iter_tflops": 15.572675482158783, "iter_time": 1.3248265228271485, "loss": 0.2452109456062317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.523903967959658, "step_time": 1.113755153656006} +{"epoch": 0, "iter": 2200, "iter_tflops": 27.01538547370657, "iter_time": 0.7636794052124024, "loss": 0.20180779695510864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.482891338660394, "step_time": 0.616168218612671} +{"epoch": 0, "iter": 2201, "iter_tflops": 19.421797514815452, "iter_time": 0.8054746322631836, "loss": 0.45668354630470276, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 20.43341837231622, "step_time": 0.7655970687866211} +{"epoch": 0, "iter": 2202, "iter_tflops": 12.174943062265545, "iter_time": 1.2849148559570311, "loss": 0.2831602692604065, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 14.48464915350845, "step_time": 1.0800237579345704} +{"epoch": 0, "iter": 2203, "iter_tflops": 27.10409435411338, "iter_time": 0.5771735076904296, "loss": 0.2465643435716629, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 28.805944823426035, "step_time": 0.5430741920471192} +{"epoch": 0, "iter": 2204, "iter_tflops": 28.32567358348004, "iter_time": 0.5522821960449219, "loss": 0.3921644687652588, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.136931566535292, "step_time": 0.5190895156860351} +{"epoch": 0, "iter": 2205, "iter_tflops": 25.84414124391233, "iter_time": 0.7982889938354493, "loss": 0.5320727825164795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.212732111904863, "step_time": 0.7581412048339844} +{"epoch": 0, "iter": 2206, "iter_tflops": 13.648750754102707, "iter_time": 1.511573760986328, "loss": 0.4538843035697937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.68875422096621, "step_time": 1.31502433013916} +{"epoch": 0, "iter": 2207, "iter_tflops": 47.19339658904603, "iter_time": 0.43716059875488283, "loss": 0.34239181876182556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22925183365725, "step_time": 0.4027209606170654} +{"epoch": 0, "iter": 2208, "iter_tflops": 44.853874578602685, "iter_time": 0.45996234893798826, "loss": 0.41487160325050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.636818462488755, "step_time": 0.42418674087524416} +{"epoch": 0, "iter": 2209, "iter_tflops": 37.72935002718538, "iter_time": 0.5468181533813476, "loss": 1.1709691286087036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.66780816432625, "step_time": 0.507307731628418} +{"epoch": 0, "iter": 2210, "iter_tflops": 43.24182095432433, "iter_time": 0.4771097297668457, "loss": 0.8725086450576782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.488568139752694, "step_time": 0.4437885341644287} +{"epoch": 0, "iter": 2211, "iter_tflops": 42.57093538192461, "iter_time": 0.4846286163330078, "loss": 0.9897791743278503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42705483265683, "step_time": 0.4541587295532227} +{"epoch": 0, "iter": 2212, "iter_tflops": 43.60585993472543, "iter_time": 0.4731266288757324, "loss": 0.9427824020385742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.84838839500456, "step_time": 0.44038000488281254} +{"epoch": 0, "iter": 2213, "iter_tflops": 47.16250529760638, "iter_time": 0.43744693756103514, "loss": 0.06759178638458252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36900701314096, "step_time": 0.4016253128051758} +{"epoch": 0, "iter": 2214, "iter_tflops": 10.936621832255653, "iter_time": 1.886422866821289, "loss": 0.08541025221347809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.739530128640823, "step_time": 1.5015865402221678} +{"epoch": 0, "iter": 2215, "iter_tflops": 11.998319271490589, "iter_time": 1.7194986267089845, "loss": 0.08663606643676758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.903608431691216, "step_time": 1.384301902770996} +{"epoch": 0, "iter": 2216, "iter_tflops": 20.691048487817167, "iter_time": 0.9971023712158202, "loss": 0.10643613338470459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.877409989532293, "step_time": 0.7675997619628907} +{"epoch": 0, "iter": 2217, "iter_tflops": 21.973250172477073, "iter_time": 0.7828879318237304, "loss": 0.5692073702812195, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 23.198028185788733, "step_time": 0.7415540771484375} +{"epoch": 0, "iter": 2218, "iter_tflops": 8.848847618440118, "iter_time": 1.9440488891601562, "loss": 0.3295479714870453, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 10.24578469704472, "step_time": 1.6789921798706056} +{"epoch": 0, "iter": 2219, "iter_tflops": 30.096671460157864, "iter_time": 0.5715779037475586, "loss": 0.2547629177570343, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.08918541326385, "step_time": 0.5360869140625} +{"epoch": 0, "iter": 2220, "iter_tflops": 32.46845125964181, "iter_time": 0.529824851989746, "loss": 0.2449401319026947, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 34.44339304006572, "step_time": 0.49944534683227537} +{"epoch": 0, "iter": 2221, "iter_tflops": 44.1973424746553, "iter_time": 0.46679488754272463, "loss": 0.052665479481220245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26368704009352, "step_time": 0.4274661712646484} +{"epoch": 0, "iter": 2222, "iter_tflops": 48.75634687466079, "iter_time": 0.42314682769775386, "loss": 0.07877552509307861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.90999251461922, "step_time": 0.37572566604614255} +{"epoch": 0, "iter": 2223, "iter_tflops": 52.40205048898224, "iter_time": 0.39370775222778326, "loss": 0.038385238498449326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.1289093077881, "step_time": 0.3611322841644287} +{"epoch": 0, "iter": 2224, "iter_tflops": 53.824757343492784, "iter_time": 0.38330118942260744, "loss": 0.030047180131077766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.793485278395146, "step_time": 0.35090781593322756} +{"epoch": 0, "iter": 2225, "iter_tflops": 23.665093058581345, "iter_time": 0.8717943115234377, "loss": 0.07427550852298737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.870525778245845, "step_time": 0.8295399017333983} +{"epoch": 0, "iter": 2226, "iter_tflops": 17.066419595270357, "iter_time": 1.2088706359863282, "loss": 0.1276688277721405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.059481341255513, "step_time": 1.0824582862854004} +{"epoch": 0, "iter": 2227, "iter_tflops": 42.16019380891584, "iter_time": 0.48935006332397457, "loss": 0.11933164298534393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51041097950037, "step_time": 0.44358011627197264} +{"epoch": 0, "iter": 2228, "iter_tflops": 48.38198528713225, "iter_time": 0.4264209785461426, "loss": 0.08900975435972214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.04761327627719, "step_time": 0.3889165267944336} +{"epoch": 0, "iter": 2229, "iter_tflops": 18.539836909480822, "iter_time": 1.1127980041503909, "loss": 0.5299246907234192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.73371993996394, "step_time": 1.04547412109375} +{"epoch": 0, "iter": 2230, "iter_tflops": 18.43598228963478, "iter_time": 1.1190666809082033, "loss": 0.5739347338676453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.260848644610853, "step_time": 0.9267882747650147} +{"epoch": 0, "iter": 2231, "iter_tflops": 40.40470993793908, "iter_time": 0.5106111030578614, "loss": 0.5781323313713074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96418825545223, "step_time": 0.4692704296112061} +{"epoch": 0, "iter": 2232, "iter_tflops": 43.99767620908323, "iter_time": 0.4689132537841797, "loss": 0.4794100224971771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.97961795972199, "step_time": 0.4299970359802246} +{"epoch": 0, "iter": 2233, "iter_tflops": 34.35274353838989, "iter_time": 0.6005661087036133, "loss": 0.6937275528907776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.87033353822137, "step_time": 0.5447824611663818} +{"epoch": 0, "iter": 2234, "iter_tflops": 40.41621005274194, "iter_time": 0.5104658126831054, "loss": 0.7042933702468872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16559939771057, "step_time": 0.4671303863525391} +{"epoch": 0, "iter": 2235, "iter_tflops": 41.62825055761449, "iter_time": 0.4956031837463379, "loss": 0.5273960828781128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38479879908859, "step_time": 0.454581579208374} +{"epoch": 0, "iter": 2236, "iter_tflops": 45.10278225073486, "iter_time": 0.4574239654541016, "loss": 0.6024323105812073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18876745387715, "step_time": 0.4194269256591797} +{"epoch": 0, "iter": 2237, "iter_tflops": 20.319907716374747, "iter_time": 1.0153143310546875, "loss": 0.12917402386665344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33874565399761, "step_time": 0.9668372192382813} +{"epoch": 0, "iter": 2238, "iter_tflops": 11.468928113947205, "iter_time": 1.7988684997558595, "loss": 0.23807482421398163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.994081029956064, "step_time": 1.474272834777832} +{"epoch": 0, "iter": 2239, "iter_tflops": 14.838428047294032, "iter_time": 1.3903826904296874, "loss": 0.15274639427661896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.406771117181197, "step_time": 1.1208426170349122} +{"epoch": 0, "iter": 2240, "iter_tflops": 18.244678385919446, "iter_time": 1.1308006134033202, "loss": 0.17385464906692505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.622564667795196, "step_time": 0.9119696998596192} +{"epoch": 0, "iter": 2241, "iter_tflops": 17.475742637239772, "iter_time": 0.906884765625, "loss": 0.39264920353889465, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 18.67460326987965, "step_time": 0.8486651382446289} +{"epoch": 0, "iter": 2242, "iter_tflops": 14.315464053201396, "iter_time": 1.107088439941406, "loss": 0.43703532218933105, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 20.673616205295776, "step_time": 0.7666043815612793} +{"epoch": 0, "iter": 2243, "iter_tflops": 30.385773515448726, "iter_time": 0.5215758209228516, "loss": 0.2664998471736908, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 32.29882970720381, "step_time": 0.4906829414367675} +{"epoch": 0, "iter": 2244, "iter_tflops": 29.15148029590924, "iter_time": 0.5436596908569337, "loss": 0.21609148383140564, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.982895252619258, "step_time": 0.5115236854553222} +{"epoch": 0, "iter": 2245, "iter_tflops": 41.797445414956705, "iter_time": 0.49359699630737297, "loss": 0.3667955994606018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.58926213951981, "step_time": 0.4525428256988525} +{"epoch": 0, "iter": 2246, "iter_tflops": 15.521293543550616, "iter_time": 1.3292122497558594, "loss": 0.5086947083473206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.54818327902815, "step_time": 1.004034917831421} +{"epoch": 0, "iter": 2247, "iter_tflops": 43.49257235107814, "iter_time": 0.47435900878906245, "loss": 0.6065043210983276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58518208651832, "step_time": 0.4335613021850586} +{"epoch": 0, "iter": 2248, "iter_tflops": 41.08025319995626, "iter_time": 0.5022143707275392, "loss": 0.46751806139945984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80324256636164, "step_time": 0.46048215103149415} +{"epoch": 0, "iter": 2249, "iter_tflops": 26.298163829342634, "iter_time": 0.784506996154785, "loss": 0.643351674079895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.052554378164746, "step_time": 0.7354443817138672} +{"epoch": 0, "iter": 2250, "iter_tflops": 10.331176332315533, "iter_time": 1.9969742889404298, "loss": 0.385605126619339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.017766320690393, "step_time": 1.7167161483764646} +{"epoch": 0, "iter": 2251, "iter_tflops": 14.855810561830681, "iter_time": 1.3887558288574218, "loss": 0.6844921708106995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.23259143884261, "step_time": 1.072715217590332} +{"epoch": 0, "iter": 2252, "iter_tflops": 39.478799020317446, "iter_time": 0.522586654663086, "loss": 0.5165199637413025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.306700958547275, "step_time": 0.4763949470520019} +{"epoch": 0, "iter": 2253, "iter_tflops": 14.209110357931506, "iter_time": 1.0118252716064453, "loss": 0.22304439544677734, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 15.096343681855942, "step_time": 0.9523588790893556} +{"epoch": 0, "iter": 2254, "iter_tflops": 9.720734832008954, "iter_time": 1.4790175018310545, "loss": 0.23022708296775818, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 11.512182418607289, "step_time": 1.248862850189209} +{"epoch": 0, "iter": 2255, "iter_tflops": 23.044610794702965, "iter_time": 0.623882827758789, "loss": 0.2998862564563751, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.796632939740324, "step_time": 0.5798019828796386} +{"epoch": 0, "iter": 2256, "iter_tflops": 23.515148004559254, "iter_time": 0.6113989562988282, "loss": 0.31548836827278137, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 25.061556790058166, "step_time": 0.5736729393005371} +{"epoch": 0, "iter": 2257, "iter_tflops": 23.580287633167462, "iter_time": 0.8749296798706057, "loss": 0.585669755935669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.222357981405583, "step_time": 0.8179684677124024} +{"epoch": 0, "iter": 2258, "iter_tflops": 7.628161412492803, "iter_time": 2.704595825195312, "loss": 0.699370801448822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.406087947986203, "step_time": 2.1933766326904296} +{"epoch": 0, "iter": 2259, "iter_tflops": 13.418377675201999, "iter_time": 1.5375251770019531, "loss": 0.5737553238868713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.309463512986737, "step_time": 1.3476039505004882} +{"epoch": 0, "iter": 2260, "iter_tflops": 47.70714150249422, "iter_time": 0.432452938079834, "loss": 0.6758589148521423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.688339791239635, "step_time": 0.3991440544128418} +{"epoch": 0, "iter": 2261, "iter_tflops": 16.99531403398215, "iter_time": 0.9373410568237304, "loss": 0.4612475037574768, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 17.804029366491015, "step_time": 0.894764060974121} +{"epoch": 0, "iter": 2262, "iter_tflops": 10.526484217491088, "iter_time": 1.5133643188476564, "loss": 0.38391920924186707, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 13.201916964778775, "step_time": 1.206673671722412} +{"epoch": 0, "iter": 2263, "iter_tflops": 26.786783882964077, "iter_time": 0.5947113952636719, "loss": 0.2830732464790344, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.520211716333552, "step_time": 0.5585654754638671} +{"epoch": 0, "iter": 2264, "iter_tflops": 30.066153296211624, "iter_time": 0.5298451538085938, "loss": 0.3115442991256714, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 31.890522471531103, "step_time": 0.4995341682434081} +{"epoch": 0, "iter": 2265, "iter_tflops": 29.686069078898278, "iter_time": 0.6377885131835939, "loss": 0.08390967547893524, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 31.706404437732385, "step_time": 0.5971485633850098} +{"epoch": 0, "iter": 2266, "iter_tflops": 15.096370139728073, "iter_time": 1.2541712799072264, "loss": 0.05388573929667473, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 18.04771726779508, "step_time": 1.0490763778686523} +{"epoch": 0, "iter": 2267, "iter_tflops": 39.046034917423526, "iter_time": 0.4849002952575684, "loss": 0.13956783711910248, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 42.95047480273001, "step_time": 0.4408201293945312} +{"epoch": 0, "iter": 2268, "iter_tflops": 39.33978508638068, "iter_time": 0.48127954483032226, "loss": 0.10464886575937271, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 43.11948874320428, "step_time": 0.4390922622680664} +{"epoch": 0, "iter": 2269, "iter_tflops": 33.85596313143717, "iter_time": 0.6093784255981445, "loss": 0.781354546546936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84985943314391, "step_time": 0.5598689880371093} +{"epoch": 0, "iter": 2270, "iter_tflops": 43.49922157308655, "iter_time": 0.47428649902343756, "loss": 1.0277386903762817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03776491330851, "step_time": 0.43860701179504397} +{"epoch": 0, "iter": 2271, "iter_tflops": 43.75605232634551, "iter_time": 0.4715026245117188, "loss": 0.9536848068237305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43931252157556, "step_time": 0.4348944454193116} +{"epoch": 0, "iter": 2272, "iter_tflops": 45.218976490766124, "iter_time": 0.45624857330322266, "loss": 1.0675697326660156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96011067959671, "step_time": 0.42138576126098637} +{"epoch": 0, "iter": 2273, "iter_tflops": 26.036810987875363, "iter_time": 0.792381736755371, "loss": 0.14867441356182098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46288260943005, "step_time": 0.7512355422973632} +{"epoch": 0, "iter": 2274, "iter_tflops": 14.741116744590094, "iter_time": 1.3995610961914065, "loss": 0.15251639485359192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.390210348548813, "step_time": 1.1218519592285157} +{"epoch": 0, "iter": 2275, "iter_tflops": 43.934530157791826, "iter_time": 0.4695872116088868, "loss": 0.1383906602859497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.490519612961684, "step_time": 0.3717949237823486} +{"epoch": 0, "iter": 2276, "iter_tflops": 50.0536131504522, "iter_time": 0.41217990493774415, "loss": 0.17827321588993073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.10909365036325, "step_time": 0.38128699111938474} +{"epoch": 0, "iter": 2277, "iter_tflops": 25.246912390555433, "iter_time": 0.8171729354858399, "loss": 0.17287038266658783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.472114004177566, "step_time": 0.7793519439697266} +{"epoch": 0, "iter": 2278, "iter_tflops": 11.493804571962027, "iter_time": 1.794975143432617, "loss": 0.22637856006622314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.34251103940769, "step_time": 1.4384575653076173} +{"epoch": 0, "iter": 2279, "iter_tflops": 49.090115077461626, "iter_time": 0.4202698135375977, "loss": 0.12336014956235886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.31277669195318, "step_time": 0.38698216056823737} +{"epoch": 0, "iter": 2280, "iter_tflops": 49.23165374543456, "iter_time": 0.41906155776977544, "loss": 0.16045348346233368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95683433225092, "step_time": 0.38958320999145507} +{"epoch": 0, "iter": 2281, "iter_tflops": 38.38689789764709, "iter_time": 0.5374514389038085, "loss": 0.7187249660491943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.195959561920624, "step_time": 0.5008038101196289} +{"epoch": 0, "iter": 2282, "iter_tflops": 17.7300593519663, "iter_time": 1.1636223602294924, "loss": 0.7146660685539246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.002304172979706, "step_time": 0.9823252410888671} +{"epoch": 0, "iter": 2283, "iter_tflops": 39.07027010812966, "iter_time": 0.5280509567260743, "loss": 0.60294508934021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71556596745061, "step_time": 0.4829877128601074} +{"epoch": 0, "iter": 2284, "iter_tflops": 37.85495407593641, "iter_time": 0.545003791809082, "loss": 0.5788720846176147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10090628885308, "step_time": 0.5019620094299316} +{"epoch": 0, "iter": 2285, "iter_tflops": 20.554950085009537, "iter_time": 1.0037043838500976, "loss": 0.7288281321525574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.995904370003505, "step_time": 0.9379515914916992} +{"epoch": 0, "iter": 2286, "iter_tflops": 15.156587215321435, "iter_time": 1.3611965026855468, "loss": 0.9659262895584106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.716170284783704, "step_time": 0.9958932189941407} +{"epoch": 0, "iter": 2287, "iter_tflops": 38.21886308722784, "iter_time": 0.539814422607422, "loss": 0.6947394013404846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67604414170493, "step_time": 0.49503483200073245} +{"epoch": 0, "iter": 2288, "iter_tflops": 41.149240091810924, "iter_time": 0.5013724060058593, "loss": 0.6696404218673706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88861031779766, "step_time": 0.4596064205169678} +{"epoch": 0, "iter": 2289, "iter_tflops": 26.139149478912177, "iter_time": 0.7892794494628906, "loss": 1.001237154006958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.7988865149619, "step_time": 0.7421553916931153} +{"epoch": 0, "iter": 2290, "iter_tflops": 10.33549513121867, "iter_time": 1.9961398315429688, "loss": 1.21271550655365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.971421072410298, "step_time": 1.5905037231445314} +{"epoch": 0, "iter": 2291, "iter_tflops": 14.839014311354275, "iter_time": 1.3903277587890623, "loss": 1.0377163887023926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.357394288866573, "step_time": 1.188605453491211} +{"epoch": 0, "iter": 2292, "iter_tflops": 22.687724521557858, "iter_time": 0.9093504943847657, "loss": 0.7280607223510742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.205063198578742, "step_time": 0.7583549194335938} +{"epoch": 0, "iter": 2293, "iter_tflops": 19.78339353735296, "iter_time": 0.7721408996582032, "loss": 0.26714879274368286, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 20.88960625191277, "step_time": 0.7312520446777344} +{"epoch": 0, "iter": 2294, "iter_tflops": 10.461433071460949, "iter_time": 1.460179229736328, "loss": 0.3389665186405182, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.769213012948445, "step_time": 1.2979259757995607} +{"epoch": 0, "iter": 2295, "iter_tflops": 23.80052138971196, "iter_time": 0.6418164978027343, "loss": 0.2133042961359024, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.644265929123772, "step_time": 0.5956718482971192} +{"epoch": 0, "iter": 2296, "iter_tflops": 23.3515449694918, "iter_time": 0.6541566009521484, "loss": 0.2522275447845459, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.122185475444716, "step_time": 0.6080508918762207} +{"epoch": 0, "iter": 2297, "iter_tflops": 24.168520641767373, "iter_time": 0.8536349334716797, "loss": 0.8767887949943542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.72345383158891, "step_time": 0.8020343475341798} +{"epoch": 0, "iter": 2298, "iter_tflops": 21.798457697763432, "iter_time": 0.946447395324707, "loss": 1.050403356552124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.579593286217662, "step_time": 0.776200496673584} +{"epoch": 0, "iter": 2299, "iter_tflops": 38.27801084307673, "iter_time": 0.5389802932739258, "loss": 0.8449809551239014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.282681588853286, "step_time": 0.49975177764892575} +{"epoch": 0, "iter": 2300, "iter_tflops": 39.37537499875642, "iter_time": 0.5239592895507813, "loss": 0.8674215078353882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79528825827915, "step_time": 0.48208796691894534} +{"epoch": 0, "iter": 2301, "iter_tflops": 16.621825414423977, "iter_time": 0.7743608474731445, "loss": 0.05845389515161514, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 17.829039787933226, "step_time": 0.7219284362792969} +{"epoch": 0, "iter": 2302, "iter_tflops": 10.972334715664088, "iter_time": 1.173067642211914, "loss": 0.061667799949645996, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 13.713622580350462, "step_time": 0.9385770053863526} +{"epoch": 0, "iter": 2303, "iter_tflops": 32.08171900251816, "iter_time": 0.40120327758789065, "loss": 0.04097367450594902, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 35.33247262851356, "step_time": 0.3642906894683838} +{"epoch": 0, "iter": 2304, "iter_tflops": 32.803958885379465, "iter_time": 0.392370044708252, "loss": 0.03260277956724167, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 35.823790809146644, "step_time": 0.3592944946289063} +{"epoch": 0, "iter": 2305, "iter_tflops": 38.96291544002659, "iter_time": 0.5295058975219727, "loss": 0.8085368871688843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.066994626771645, "step_time": 0.49043421554565436} +{"epoch": 0, "iter": 2306, "iter_tflops": 44.11909895587846, "iter_time": 0.467622730255127, "loss": 0.9242908954620361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58903319936335, "step_time": 0.433526216506958} +{"epoch": 0, "iter": 2307, "iter_tflops": 45.37612479073979, "iter_time": 0.4546684761047364, "loss": 0.9427960515022278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84929186636605, "step_time": 0.4223417110443115} +{"epoch": 0, "iter": 2308, "iter_tflops": 46.3216465269248, "iter_time": 0.4453877410888672, "loss": 1.0491396188735962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9627797944157, "step_time": 0.41292925643920897} +{"epoch": 0, "iter": 2309, "iter_tflops": 40.72196574177429, "iter_time": 0.5066330451965332, "loss": 0.8363321423530579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.02759401945301, "step_time": 0.46859461593627927} +{"epoch": 0, "iter": 2310, "iter_tflops": 45.57322295735697, "iter_time": 0.45270209503173825, "loss": 1.0466694831848145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13035787038149, "step_time": 0.4199255695343017} +{"epoch": 0, "iter": 2311, "iter_tflops": 46.35233205289837, "iter_time": 0.4450928916931153, "loss": 0.9045872092247009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71017147764833, "step_time": 0.41502760696411134} +{"epoch": 0, "iter": 2312, "iter_tflops": 45.47291351985, "iter_time": 0.4537007179260253, "loss": 1.0585283041000366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.100646174640126, "step_time": 0.4201796741485596} +{"epoch": 0, "iter": 2313, "iter_tflops": 41.96904354052292, "iter_time": 0.49157883453369144, "loss": 0.3011811673641205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.713653614079995, "step_time": 0.4513114109039306} +{"epoch": 0, "iter": 2314, "iter_tflops": 48.443591725345456, "iter_time": 0.42587869262695316, "loss": 0.3614464998245239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.49363271425128, "step_time": 0.39302087593078616} +{"epoch": 0, "iter": 2315, "iter_tflops": 47.652161057488755, "iter_time": 0.43295189666748046, "loss": 0.39831334352493286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.212108024212064, "step_time": 0.4028557758331299} +{"epoch": 0, "iter": 2316, "iter_tflops": 51.05448827457002, "iter_time": 0.4040995063781738, "loss": 0.328978031873703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.03182586834784, "step_time": 0.37489385795593255} +{"epoch": 0, "iter": 2317, "iter_tflops": 41.75573082592112, "iter_time": 0.4940901069641114, "loss": 0.9298903346061707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.381251610639424, "step_time": 0.45461711120605464} +{"epoch": 0, "iter": 2318, "iter_tflops": 43.31713932685023, "iter_time": 0.4762801475524903, "loss": 0.8850569128990173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.4608273669327, "step_time": 0.4346972999572754} +{"epoch": 0, "iter": 2319, "iter_tflops": 46.52135380414726, "iter_time": 0.44347577667236326, "loss": 0.7976676821708679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93441900704154, "step_time": 0.4131637840270996} +{"epoch": 0, "iter": 2320, "iter_tflops": 49.83290029066219, "iter_time": 0.41400547409057614, "loss": 0.8455923199653625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.0040165738785, "step_time": 0.38202887153625487} +{"epoch": 0, "iter": 2321, "iter_tflops": 45.48159416787944, "iter_time": 0.45361412429809567, "loss": 1.0565358400344849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.362634645770626, "step_time": 0.41794960212707516} +{"epoch": 0, "iter": 2322, "iter_tflops": 44.08434883015816, "iter_time": 0.46799134063720704, "loss": 0.9171546697616577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9193716090822, "step_time": 0.43053764724731447} +{"epoch": 0, "iter": 2323, "iter_tflops": 46.46956932740914, "iter_time": 0.4439699745178222, "loss": 1.0707175731658936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.954481577224904, "step_time": 0.4129978504180909} +{"epoch": 0, "iter": 2324, "iter_tflops": 47.14270130047201, "iter_time": 0.437630702972412, "loss": 0.9582018256187439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6829763040411, "step_time": 0.4070616016387939} +{"epoch": 0, "iter": 2325, "iter_tflops": 40.22157856938583, "iter_time": 0.5129359474182129, "loss": 0.20937801897525787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5290673491915, "step_time": 0.47396130371093753} +{"epoch": 0, "iter": 2326, "iter_tflops": 22.99548643596434, "iter_time": 0.8971801300048828, "loss": 0.2399338334798813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.272906824166043, "step_time": 0.7297124996185304} +{"epoch": 0, "iter": 2327, "iter_tflops": 53.245890659946745, "iter_time": 0.3874682769775391, "loss": 0.2641282379627228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.73503912630481, "step_time": 0.3573409461975098} +{"epoch": 0, "iter": 2328, "iter_tflops": 54.84002812863891, "iter_time": 0.3762050132751465, "loss": 0.15998013317584991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.394365532111685, "step_time": 0.34735775566101074} +{"epoch": 0, "iter": 2329, "iter_tflops": 45.94219480035542, "iter_time": 0.4490663452148438, "loss": 0.15365028381347656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1720453410783, "step_time": 0.4112069454193115} +{"epoch": 0, "iter": 2330, "iter_tflops": 45.26363846025512, "iter_time": 0.45579838943481443, "loss": 0.12083043158054352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.580832796470865, "step_time": 0.41611026573181153} +{"epoch": 0, "iter": 2331, "iter_tflops": 52.312870149492674, "iter_time": 0.39437892532348634, "loss": 0.15550395846366882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.130122895001875, "step_time": 0.36112461280822755} +{"epoch": 0, "iter": 2332, "iter_tflops": 53.64904569348027, "iter_time": 0.38455657958984374, "loss": 0.19706209003925323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.07007306637794, "step_time": 0.355279275894165} +{"epoch": 0, "iter": 2333, "iter_tflops": 31.119574326716144, "iter_time": 0.6629619445800781, "loss": 0.03625454753637314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.81650080616085, "step_time": 0.6286804809570312} +{"epoch": 0, "iter": 2334, "iter_tflops": 31.389258457215103, "iter_time": 0.6572660369873047, "loss": 0.04804683104157448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.289344092019014, "step_time": 0.525106590270996} +{"epoch": 0, "iter": 2335, "iter_tflops": 45.491141352466904, "iter_time": 0.45351892471313476, "loss": 0.07519181072711945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.761805120617026, "step_time": 0.41459696769714355} +{"epoch": 0, "iter": 2336, "iter_tflops": 50.3894980912901, "iter_time": 0.4094324073791504, "loss": 0.04650740325450897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.51817536759171, "step_time": 0.3716097183227539} +{"epoch": 0, "iter": 2337, "iter_tflops": 38.116392888129575, "iter_time": 0.5412656326293945, "loss": 0.17088748514652252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.226683953318975, "step_time": 0.48857953262329107} +{"epoch": 0, "iter": 2338, "iter_tflops": 38.81057572858986, "iter_time": 0.5315843200683593, "loss": 0.11760310083627701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24924606544174, "step_time": 0.47702781867980953} +{"epoch": 0, "iter": 2339, "iter_tflops": 45.41716113468077, "iter_time": 0.45425766372680665, "loss": 0.08310102671384811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9497154388474, "step_time": 0.4130372581481933} +{"epoch": 0, "iter": 2340, "iter_tflops": 42.957548463828694, "iter_time": 0.4802670135498047, "loss": 0.10986465960741043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17392524217874, "step_time": 0.43734103965759275} +{"epoch": 0, "iter": 2341, "iter_tflops": 21.825677346552382, "iter_time": 0.9452670440673828, "loss": 0.1771005541086197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.089040336035758, "step_time": 0.8935448684692383} +{"epoch": 0, "iter": 2342, "iter_tflops": 44.12036442753423, "iter_time": 0.467609317779541, "loss": 0.22113123536109924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.21863228458424, "step_time": 0.4191724262237549} +{"epoch": 0, "iter": 2343, "iter_tflops": 56.52608534243162, "iter_time": 0.3649835891723633, "loss": 0.1625354140996933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.59174520802594, "step_time": 0.33496523666381833} +{"epoch": 0, "iter": 2344, "iter_tflops": 56.78254259680402, "iter_time": 0.363335147857666, "loss": 0.1916404366493225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.27205277420177, "step_time": 0.33671294784545897} +{"epoch": 0, "iter": 2345, "iter_tflops": 32.8174821157839, "iter_time": 0.6286616821289063, "loss": 0.40549519658088684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.905174068420635, "step_time": 0.5910611839294434} +{"epoch": 0, "iter": 2346, "iter_tflops": 35.61216884584278, "iter_time": 0.5793270721435546, "loss": 0.31146058440208435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29043100892001, "step_time": 0.5250920639038086} +{"epoch": 0, "iter": 2347, "iter_tflops": 44.85223785785405, "iter_time": 0.459979133605957, "loss": 0.3095257878303528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.27871143810294, "step_time": 0.4273331432342529} +{"epoch": 0, "iter": 2348, "iter_tflops": 46.2237725366187, "iter_time": 0.4463308029174805, "loss": 0.4736720025539398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.97449839676277, "step_time": 0.4128324279785156} +{"epoch": 0, "iter": 2349, "iter_tflops": 40.304110780460526, "iter_time": 0.5118855895996094, "loss": 0.3405779004096985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32289690711405, "step_time": 0.47621685028076177} +{"epoch": 0, "iter": 2350, "iter_tflops": 13.560678245269745, "iter_time": 1.5213909759521487, "loss": 0.1599080115556717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.096478059544364, "step_time": 1.2817147598266603} +{"epoch": 0, "iter": 2351, "iter_tflops": 41.521843631212334, "iter_time": 0.49687325286865236, "loss": 0.11314664036035538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.5993885986979, "step_time": 0.4524423274993896} +{"epoch": 0, "iter": 2352, "iter_tflops": 39.55221806341202, "iter_time": 0.5216166000366211, "loss": 0.1664469689130783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34267840665965, "step_time": 0.47599950599670415} +{"epoch": 0, "iter": 2353, "iter_tflops": 21.547337004730938, "iter_time": 0.9574776458740234, "loss": 0.08900804817676544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.829223106187694, "step_time": 0.9037142181396485} +{"epoch": 0, "iter": 2354, "iter_tflops": 15.872836181068026, "iter_time": 1.2997736053466795, "loss": 0.07755941152572632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.488802379167975, "step_time": 1.0069448242187502} +{"epoch": 0, "iter": 2355, "iter_tflops": 37.164441057227755, "iter_time": 0.5551299285888672, "loss": 0.11196509003639221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.335317279952044, "step_time": 0.49911540222167966} +{"epoch": 0, "iter": 2356, "iter_tflops": 48.658185126720596, "iter_time": 0.42400047302246097, "loss": 0.041475437581539154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48451808382597, "step_time": 0.3857395420074463} +{"epoch": 0, "iter": 2357, "iter_tflops": 20.374900034442373, "iter_time": 1.0125739746093751, "loss": 0.22533661127090454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.763015558231327, "step_time": 0.9479887313842774} +{"epoch": 0, "iter": 2358, "iter_tflops": 20.394937062131604, "iter_time": 1.0115791702270507, "loss": 0.310844361782074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.059071138818755, "step_time": 0.8947061824798583} +{"epoch": 0, "iter": 2359, "iter_tflops": 46.935764158251395, "iter_time": 0.43956019210815434, "loss": 0.41189032793045044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.38762632561112, "step_time": 0.40147979164123526} +{"epoch": 0, "iter": 2360, "iter_tflops": 43.70578671891242, "iter_time": 0.4720448951721191, "loss": 0.3077085614204407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.67157314630994, "step_time": 0.43277559661865234} +{"epoch": 0, "iter": 2361, "iter_tflops": 35.73734105702014, "iter_time": 0.551780502319336, "loss": 0.0031121186912059784, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 40.115308924005994, "step_time": 0.49156216239929196} +{"epoch": 0, "iter": 2362, "iter_tflops": 42.99353230171266, "iter_time": 0.4586542892456054, "loss": 0.020056936889886856, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 47.88844973933521, "step_time": 0.41177294540405274} +{"epoch": 0, "iter": 2363, "iter_tflops": 48.56817184231486, "iter_time": 0.40601009368896485, "loss": 0.007173048332333565, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 53.60244431982544, "step_time": 0.3678781490325928} +{"epoch": 0, "iter": 2364, "iter_tflops": 51.051272410470816, "iter_time": 0.38626202774047846, "loss": 0.003613977460190654, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 56.2937533206774, "step_time": 0.3502905178070069} +{"epoch": 0, "iter": 2365, "iter_tflops": 13.913100583435073, "iter_time": 1.482853759765625, "loss": 0.6533065438270569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.870814867067175, "step_time": 1.387354606628418} +{"epoch": 0, "iter": 2366, "iter_tflops": 19.0359231742386, "iter_time": 1.0837978973388671, "loss": 0.41352227330207825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.424264443745408, "step_time": 0.8114725818634033} +{"epoch": 0, "iter": 2367, "iter_tflops": 43.92169647540048, "iter_time": 0.469724422454834, "loss": 0.5510511994361877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.1541564236414, "step_time": 0.4284384784698486} +{"epoch": 0, "iter": 2368, "iter_tflops": 40.21880196233622, "iter_time": 0.5129713592529297, "loss": 0.46023911237716675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.135253205835994, "step_time": 0.46745157241821295} +{"epoch": 0, "iter": 2369, "iter_tflops": 21.471515989934193, "iter_time": 0.9608587265014649, "loss": 0.4022194743156433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.82415157049557, "step_time": 0.9039150238037109} +{"epoch": 0, "iter": 2370, "iter_tflops": 9.965163804344769, "iter_time": 2.070321563720703, "loss": 0.3611484467983246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.565742333639918, "step_time": 1.7838105773925783} +{"epoch": 0, "iter": 2371, "iter_tflops": 13.911522336326913, "iter_time": 1.4830219879150393, "loss": 0.29925045371055603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.208736443997502, "step_time": 1.198873233795166} +{"epoch": 0, "iter": 2372, "iter_tflops": 17.9352927872279, "iter_time": 1.1503070373535158, "loss": 0.2879495620727539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.56562734326595, "step_time": 0.7766085567474366} +{"epoch": 0, "iter": 2373, "iter_tflops": 18.546914818188434, "iter_time": 0.7707791213989258, "loss": 0.3076237738132477, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 20.05519885735095, "step_time": 0.7128114166259766} +{"epoch": 0, "iter": 2374, "iter_tflops": 22.089903897800376, "iter_time": 0.6471542282104492, "loss": 0.3875787854194641, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 23.86100819326831, "step_time": 0.5991186370849609} +{"epoch": 0, "iter": 2375, "iter_tflops": 23.56902635334526, "iter_time": 0.6065407409667969, "loss": 0.39861026406288147, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 25.287302530330535, "step_time": 0.5653262023925782} +{"epoch": 0, "iter": 2376, "iter_tflops": 23.089493457447556, "iter_time": 0.6191376495361328, "loss": 0.32505327463150024, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.596614195776688, "step_time": 0.5812009162902831} +{"epoch": 0, "iter": 2377, "iter_tflops": 22.38161491804018, "iter_time": 0.9217875289916991, "loss": 0.332511842250824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.781752194028957, "step_time": 0.8675178070068359} +{"epoch": 0, "iter": 2378, "iter_tflops": 9.843837299544221, "iter_time": 2.095838531494141, "loss": 0.1921931952238083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.829357914680058, "step_time": 1.7440586090087888} +{"epoch": 0, "iter": 2379, "iter_tflops": 13.794646418873015, "iter_time": 1.4955869750976563, "loss": 0.272544801235199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.293506962498974, "step_time": 1.192996513366699} +{"epoch": 0, "iter": 2380, "iter_tflops": 37.150324728385044, "iter_time": 0.5553408660888671, "loss": 0.2576274275779724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.713570937479716, "step_time": 0.5067375087738037} +{"epoch": 0, "iter": 2381, "iter_tflops": 13.061085869675221, "iter_time": 1.1570283355712891, "loss": 0.2596259117126465, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.026126029241741, "step_time": 1.0774212646484376} +{"epoch": 0, "iter": 2382, "iter_tflops": 14.90504449699312, "iter_time": 1.0138880462646485, "loss": 0.1755841076374054, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.7710112206481, "step_time": 0.8503762817382812} +{"epoch": 0, "iter": 2383, "iter_tflops": 24.76230601039207, "iter_time": 0.6102842941284179, "loss": 0.39569976925849915, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.479822333762606, "step_time": 0.5707004470825195} +{"epoch": 0, "iter": 2384, "iter_tflops": 26.391705267983543, "iter_time": 0.5726059112548827, "loss": 0.3336832821369171, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.98632652967636, "step_time": 0.5399796371459961} +{"epoch": 0, "iter": 2385, "iter_tflops": 39.29891091089988, "iter_time": 0.524978759765625, "loss": 0.7111943364143372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52552895333973, "step_time": 0.4851460762023926} +{"epoch": 0, "iter": 2386, "iter_tflops": 46.02634180378691, "iter_time": 0.44824534606933597, "loss": 0.8129498362541199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.66973016191116, "step_time": 0.4153655242919922} +{"epoch": 0, "iter": 2387, "iter_tflops": 41.98759649194025, "iter_time": 0.49136162185668947, "loss": 0.8123618364334106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14297503029811, "step_time": 0.45701670074462897} +{"epoch": 0, "iter": 2388, "iter_tflops": 44.93489099635555, "iter_time": 0.4591330490112304, "loss": 0.657808244228363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.179916708807355, "step_time": 0.4282094058990479} +{"epoch": 0, "iter": 2389, "iter_tflops": 26.527571418882268, "iter_time": 0.7777226638793946, "loss": 1.0434428453445435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.874040075132573, "step_time": 0.7401544036865235} +{"epoch": 0, "iter": 2390, "iter_tflops": 12.185805983113932, "iter_time": 1.6930429992675782, "loss": 1.1602376699447632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.242912895830937, "step_time": 1.3534875946044922} +{"epoch": 0, "iter": 2391, "iter_tflops": 35.58472167204368, "iter_time": 0.5797739181518554, "loss": 0.9613970518112183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.03108757444084, "step_time": 0.5285810565948487} +{"epoch": 0, "iter": 2392, "iter_tflops": 37.07163712618444, "iter_time": 0.5565196228027344, "loss": 0.7537413239479065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.298334408220555, "step_time": 0.511958963394165} +{"epoch": 0, "iter": 2393, "iter_tflops": 7.419995178996727, "iter_time": 1.1472776489257812, "loss": 0.0048897890374064445, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 7.805193662219186, "step_time": 1.0906577072143555} +{"epoch": 0, "iter": 2394, "iter_tflops": 13.61679958318036, "iter_time": 0.6251685333251953, "loss": 0.00837558601051569, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 15.271873131051688, "step_time": 0.5574165363311768} +{"epoch": 0, "iter": 2395, "iter_tflops": 22.162641363100622, "iter_time": 0.38410559844970704, "loss": 0.001565320068039, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 24.54561481212265, "step_time": 0.34681529426574703} +{"epoch": 0, "iter": 2396, "iter_tflops": 25.32633868017057, "iter_time": 0.33612417221069335, "loss": 0.0070267487317323685, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 27.792439692204734, "step_time": 0.30629893302917477} +{"epoch": 0, "iter": 2397, "iter_tflops": 41.41195306799649, "iter_time": 0.49819175338745114, "loss": 0.0063520390540361404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58826209278615, "step_time": 0.4627023468017578} +{"epoch": 0, "iter": 2398, "iter_tflops": 32.04304361239084, "iter_time": 0.6438556137084961, "loss": 0.00666528707370162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.0966271058828, "step_time": 0.5276949710845947} +{"epoch": 0, "iter": 2399, "iter_tflops": 48.60255435939771, "iter_time": 0.42448578643798823, "loss": 0.04847091808915138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.63225282889631, "step_time": 0.38467698860168453} +{"epoch": 0, "iter": 2400, "iter_tflops": 46.705311349346346, "iter_time": 0.44172906494140624, "loss": 0.004976533353328705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14775721659698, "step_time": 0.395627628326416} +{"epoch": 0, "iter": 2401, "iter_tflops": 21.126037571606016, "iter_time": 0.9765718460083009, "loss": 0.014915256761014462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.469865565893954, "step_time": 0.9181671981811524} +{"epoch": 0, "iter": 2402, "iter_tflops": 16.514302596073097, "iter_time": 1.2492863922119142, "loss": 0.03638067469000816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.865697462856286, "step_time": 1.0385285263061523} +{"epoch": 0, "iter": 2403, "iter_tflops": 56.329755379511234, "iter_time": 0.3662556915283203, "loss": 0.005961041897535324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.995111398301496, "step_time": 0.33278581237792965} +{"epoch": 0, "iter": 2404, "iter_tflops": 54.03798720632998, "iter_time": 0.38178871154785154, "loss": 0.004851287230849266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.8585520304363, "step_time": 0.35051989555358887} +{"epoch": 0, "iter": 2405, "iter_tflops": 23.218841071066894, "iter_time": 0.8885496673583985, "loss": 0.13536755740642548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.344248749570777, "step_time": 0.8474729995727539} +{"epoch": 0, "iter": 2406, "iter_tflops": 14.929203283688706, "iter_time": 1.3819286346435549, "loss": 0.11075370013713837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.419209747894172, "step_time": 1.0624064407348632} +{"epoch": 0, "iter": 2407, "iter_tflops": 43.89069489519326, "iter_time": 0.47005620574951174, "loss": 0.12571412324905396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.471691154135605, "step_time": 0.4256318073272705} +{"epoch": 0, "iter": 2408, "iter_tflops": 46.07604660052118, "iter_time": 0.4477617988586426, "loss": 0.1596713662147522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.567008231435864, "step_time": 0.407995138168335} +{"epoch": 0, "iter": 2409, "iter_tflops": 33.91796098692057, "iter_time": 0.6082645568847657, "loss": 0.1095781996846199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.718107620652574, "step_time": 0.5469811401367188} +{"epoch": 0, "iter": 2410, "iter_tflops": 48.10122508999432, "iter_time": 0.4289099388122558, "loss": 0.09614533185958862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.679652186869326, "step_time": 0.3916330623626709} +{"epoch": 0, "iter": 2411, "iter_tflops": 51.99428316721848, "iter_time": 0.39679542160034176, "loss": 0.04336588457226753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.511258250037436, "step_time": 0.36507935142517095} +{"epoch": 0, "iter": 2412, "iter_tflops": 55.13854875254612, "iter_time": 0.3741682357788086, "loss": 0.134070485830307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.762039128669, "step_time": 0.3452207088470459} +{"epoch": 0, "iter": 2413, "iter_tflops": 44.34950242067561, "iter_time": 0.4651933479309083, "loss": 0.6581259369850159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.44399896347025, "step_time": 0.42587511253356936} +{"epoch": 0, "iter": 2414, "iter_tflops": 46.312430111003046, "iter_time": 0.44547637557983405, "loss": 0.523165225982666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.073982806871385, "step_time": 0.40394526481628423} +{"epoch": 0, "iter": 2415, "iter_tflops": 54.35918033501574, "iter_time": 0.3795328292846679, "loss": 0.46868565678596497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01447300255511, "step_time": 0.3495937938690185} +{"epoch": 0, "iter": 2416, "iter_tflops": 45.90220819121133, "iter_time": 0.4494575386047363, "loss": 0.486653596162796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.595609068360496, "step_time": 0.415986291885376} +{"epoch": 0, "iter": 2417, "iter_tflops": 31.318556365542815, "iter_time": 0.6587498245239258, "loss": 0.6315734386444092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.32529586725445, "step_time": 0.6190820808410644} +{"epoch": 0, "iter": 2418, "iter_tflops": 14.484914726416722, "iter_time": 1.4243158416748047, "loss": 0.44474858045578003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.137135940423047, "step_time": 1.2038822345733642} +{"epoch": 0, "iter": 2419, "iter_tflops": 40.33113631366613, "iter_time": 0.5115425796508789, "loss": 0.44450613856315613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12874208976759, "step_time": 0.467520544052124} +{"epoch": 0, "iter": 2420, "iter_tflops": 39.19382466453375, "iter_time": 0.5263863296508788, "loss": 0.6311537623405457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.40454502315478, "step_time": 0.48653024101257325} +{"epoch": 0, "iter": 2421, "iter_tflops": 22.195316537418922, "iter_time": 0.9295246353149413, "loss": 0.2960944175720215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.560049155380817, "step_time": 0.8756812591552734} +{"epoch": 0, "iter": 2422, "iter_tflops": 27.823928418847164, "iter_time": 0.7414874420166016, "loss": 0.31267106533050537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.054017593253, "step_time": 0.5885514678955078} +{"epoch": 0, "iter": 2423, "iter_tflops": 52.878665973827225, "iter_time": 0.39015911483764654, "loss": 0.3412949740886688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.42401724567978, "step_time": 0.35927638816833496} +{"epoch": 0, "iter": 2424, "iter_tflops": 54.85441533136366, "iter_time": 0.37610634231567386, "loss": 0.3524480164051056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.25698497832304, "step_time": 0.34816306495666505} +{"epoch": 0, "iter": 2425, "iter_tflops": 42.23556020732812, "iter_time": 0.48847685241699224, "loss": 0.25062885880470276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74992123551773, "step_time": 0.4509536399841309} +{"epoch": 0, "iter": 2426, "iter_tflops": 16.803975730030217, "iter_time": 1.2277507324218748, "loss": 0.2367689311504364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.993570039322847, "step_time": 1.0862146224975588} +{"epoch": 0, "iter": 2427, "iter_tflops": 40.463980122721445, "iter_time": 0.5098631782531738, "loss": 0.1858593076467514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.40745353695893, "step_time": 0.46458627700805666} +{"epoch": 0, "iter": 2428, "iter_tflops": 42.67476497844633, "iter_time": 0.48344949340820315, "loss": 0.2439531534910202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.800460105318486, "step_time": 0.4408309974670411} +{"epoch": 0, "iter": 2429, "iter_tflops": 23.268222518658924, "iter_time": 0.8866639251708984, "loss": 0.13538271188735962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.854698744138343, "step_time": 0.8300681381225585} +{"epoch": 0, "iter": 2430, "iter_tflops": 20.201075325610386, "iter_time": 1.0212868957519532, "loss": 0.2968904376029968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.63404646727529, "step_time": 0.8375032310485839} +{"epoch": 0, "iter": 2431, "iter_tflops": 44.84153107168005, "iter_time": 0.46008896255493165, "loss": 0.2822442948818207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.213868172077675, "step_time": 0.4279078674316406} +{"epoch": 0, "iter": 2432, "iter_tflops": 55.855294706381265, "iter_time": 0.3693668365478516, "loss": 0.2786957621574402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.694259919925095, "step_time": 0.339918363571167} +{"epoch": 0, "iter": 2433, "iter_tflops": 43.41465558182369, "iter_time": 0.47521034622192376, "loss": 0.7616903781890869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.14660996994642, "step_time": 0.4375944213867188} +{"epoch": 0, "iter": 2434, "iter_tflops": 44.1666637508949, "iter_time": 0.4671191291809082, "loss": 0.7257115244865417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.19462471041244, "step_time": 0.4193769874572754} +{"epoch": 0, "iter": 2435, "iter_tflops": 45.074157323556854, "iter_time": 0.4577144584655762, "loss": 0.8634777665138245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76097371053009, "step_time": 0.4231066761016845} +{"epoch": 0, "iter": 2436, "iter_tflops": 52.08964773729888, "iter_time": 0.39606897735595703, "loss": 0.808097779750824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.45180897387558, "step_time": 0.3654638156890869} +{"epoch": 0, "iter": 2437, "iter_tflops": 32.542718699888724, "iter_time": 0.6339695739746094, "loss": 0.46000906825065613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.73281457141019, "step_time": 0.5939942893981933} +{"epoch": 0, "iter": 2438, "iter_tflops": 35.606604666024325, "iter_time": 0.5794176025390625, "loss": 0.3894333243370056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.23856146426287, "step_time": 0.5257861843109131} +{"epoch": 0, "iter": 2439, "iter_tflops": 38.67170611459174, "iter_time": 0.5334932327270508, "loss": 0.42835932970046997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32804399587936, "step_time": 0.4874095649719238} +{"epoch": 0, "iter": 2440, "iter_tflops": 42.93917418810931, "iter_time": 0.480472526550293, "loss": 0.4205690026283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73647697219386, "step_time": 0.4414345035552979} +{"epoch": 0, "iter": 2441, "iter_tflops": 16.82422810822343, "iter_time": 1.2262728118896487, "loss": 0.30072590708732605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.751153221014704, "step_time": 1.162239616394043} +{"epoch": 0, "iter": 2442, "iter_tflops": 17.36875896992755, "iter_time": 1.1878277282714842, "loss": 0.2598837912082672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.589054467807756, "step_time": 0.8062468090057373} +{"epoch": 0, "iter": 2443, "iter_tflops": 45.45036506420665, "iter_time": 0.45392580413818356, "loss": 0.3941098153591156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.87316956242386, "step_time": 0.4136711921691894} +{"epoch": 0, "iter": 2444, "iter_tflops": 41.68985376163283, "iter_time": 0.4948708534240722, "loss": 0.36335402727127075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.759728927448904, "step_time": 0.45085698699951166} +{"epoch": 0, "iter": 2445, "iter_tflops": 21.237003503040743, "iter_time": 0.971469139099121, "loss": 0.19569025933742523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.820745157081905, "step_time": 0.9040499496459962} +{"epoch": 0, "iter": 2446, "iter_tflops": 22.514022199517992, "iter_time": 0.9163664016723633, "loss": 0.3128257095813751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.267935526451353, "step_time": 0.6598162994384766} +{"epoch": 0, "iter": 2447, "iter_tflops": 45.96631304789495, "iter_time": 0.44883072280883796, "loss": 0.2106945961713791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.773896633682725, "step_time": 0.4144962501525879} +{"epoch": 0, "iter": 2448, "iter_tflops": 55.31754043193827, "iter_time": 0.37295753479003907, "loss": 0.26730138063430786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.88929609560419, "step_time": 0.3444871597290039} +{"epoch": 0, "iter": 2449, "iter_tflops": 36.71942400383591, "iter_time": 0.561857765197754, "loss": 1.004041075706482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47824946635547, "step_time": 0.5225939292907715} +{"epoch": 0, "iter": 2450, "iter_tflops": 25.95100946680835, "iter_time": 0.795001579284668, "loss": 1.1413531303405762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.29886381966843, "step_time": 0.6591642951965331} +{"epoch": 0, "iter": 2451, "iter_tflops": 41.89448167117622, "iter_time": 0.4924537239074707, "loss": 1.0523936748504639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1451406401313, "step_time": 0.45699477767944335} +{"epoch": 0, "iter": 2452, "iter_tflops": 46.25557303646837, "iter_time": 0.4460239524841308, "loss": 0.926153302192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69179322796276, "step_time": 0.4151811027526855} +{"epoch": 0, "iter": 2453, "iter_tflops": 36.52271692540848, "iter_time": 0.5648838653564453, "loss": 0.0878228098154068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.33538230761087, "step_time": 0.5244920043945313} +{"epoch": 0, "iter": 2454, "iter_tflops": 36.28030629570596, "iter_time": 0.5686581954956055, "loss": 0.02840390056371689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08739194979925, "step_time": 0.44765157318115234} +{"epoch": 0, "iter": 2455, "iter_tflops": 55.27182080818594, "iter_time": 0.37326603698730465, "loss": 0.06331963837146759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.12018047071518, "step_time": 0.34316419792175296} +{"epoch": 0, "iter": 2456, "iter_tflops": 55.57627028741801, "iter_time": 0.37122126770019526, "loss": 0.042373549193143845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.27415090747748, "step_time": 0.34228758430480954} +{"epoch": 0, "iter": 2457, "iter_tflops": 35.36026033355185, "iter_time": 0.5834542312622071, "loss": 0.6331719756126404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.97003050786766, "step_time": 0.5433520393371583} +{"epoch": 0, "iter": 2458, "iter_tflops": 16.17824469765871, "iter_time": 1.275236831665039, "loss": 0.8197781443595886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.492980995782673, "step_time": 0.959899118423462} +{"epoch": 0, "iter": 2459, "iter_tflops": 50.00196027175387, "iter_time": 0.41260569381713863, "loss": 0.7343315482139587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.93716801490229, "step_time": 0.3825023498535156} +{"epoch": 0, "iter": 2460, "iter_tflops": 51.84759417000723, "iter_time": 0.39791804885864257, "loss": 0.7610967755317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.773499836437324, "step_time": 0.36990853309631344} +{"epoch": 0, "iter": 2461, "iter_tflops": 45.573724497007774, "iter_time": 0.45269711303710936, "loss": 0.2716175615787506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.80803208343452, "step_time": 0.41421217918396} +{"epoch": 0, "iter": 2462, "iter_tflops": 48.40626729791004, "iter_time": 0.42620707321166995, "loss": 0.35875335335731506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.12430607744403, "step_time": 0.38835506820678706} +{"epoch": 0, "iter": 2463, "iter_tflops": 50.36241000015095, "iter_time": 0.4096526260375977, "loss": 0.18979725241661072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81224709128349, "step_time": 0.37639568901062015} +{"epoch": 0, "iter": 2464, "iter_tflops": 54.22887754628961, "iter_time": 0.38044478225708006, "loss": 0.27678775787353516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.527890015614176, "step_time": 0.3525002098083496} +{"epoch": 0, "iter": 2465, "iter_tflops": 37.59286503381958, "iter_time": 0.5488034362792968, "loss": 0.002682119607925415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.509273379935124, "step_time": 0.5092931022644043} +{"epoch": 0, "iter": 2466, "iter_tflops": 16.182005864541363, "iter_time": 1.2749404296874998, "loss": 0.004570780787616968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.734783176085024, "step_time": 1.101218696594238} +{"epoch": 0, "iter": 2467, "iter_tflops": 42.22668790965411, "iter_time": 0.4885794868469238, "loss": 0.0032503888942301273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77228693542386, "step_time": 0.4410965309143066} +{"epoch": 0, "iter": 2468, "iter_tflops": 49.59850520201379, "iter_time": 0.4159620018005371, "loss": 0.01898404024541378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.849785639710326, "step_time": 0.3761380882263183} +{"epoch": 0, "iter": 2469, "iter_tflops": 26.739841412593954, "iter_time": 0.7715488357543945, "loss": 0.1321055144071579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.148283915042498, "step_time": 0.7077978782653809} +{"epoch": 0, "iter": 2470, "iter_tflops": 40.071569321607925, "iter_time": 0.5148561401367188, "loss": 0.14182983338832855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.54210683095682, "step_time": 0.46318180656433106} +{"epoch": 0, "iter": 2471, "iter_tflops": 44.02384602100811, "iter_time": 0.4686345100402832, "loss": 0.1443132758140564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.59885080457911, "step_time": 0.4245181350708008} +{"epoch": 0, "iter": 2472, "iter_tflops": 43.20991121845623, "iter_time": 0.4774620666503906, "loss": 0.11466676741838455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35768493216538, "step_time": 0.43564404678344726} +{"epoch": 0, "iter": 2473, "iter_tflops": 18.82302133529412, "iter_time": 1.0960564270019533, "loss": 0.5716673731803894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.36276400770977, "step_time": 1.0131774597167968} +{"epoch": 0, "iter": 2474, "iter_tflops": 23.58172315598211, "iter_time": 0.8748764190673828, "loss": 0.6712045669555664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.75752679770211, "step_time": 0.7174154319763183} +{"epoch": 0, "iter": 2475, "iter_tflops": 51.94896833149072, "iter_time": 0.3971415443420411, "loss": 0.7063148021697998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.06438514274196, "step_time": 0.3679892940521241} +{"epoch": 0, "iter": 2476, "iter_tflops": 51.14028026201153, "iter_time": 0.4034215965270996, "loss": 0.7491838335990906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.468798217914994, "step_time": 0.371940517425537} +{"epoch": 0, "iter": 2477, "iter_tflops": 28.596489246279333, "iter_time": 0.721455467224121, "loss": 0.907207190990448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.139318890379712, "step_time": 0.6845242118835448} +{"epoch": 0, "iter": 2478, "iter_tflops": 13.49526175708974, "iter_time": 1.5287657165527344, "loss": 0.7710498571395874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.95092249377928, "step_time": 1.2171074180603028} +{"epoch": 0, "iter": 2479, "iter_tflops": 43.84978394078712, "iter_time": 0.4704947586059571, "loss": 0.8325680494308472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.440084013521464, "step_time": 0.43488737297058105} +{"epoch": 0, "iter": 2480, "iter_tflops": 45.639046513173795, "iter_time": 0.4520491790771484, "loss": 0.8739939332008362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22338446394358, "step_time": 0.41913195800781244} +{"epoch": 0, "iter": 2481, "iter_tflops": 15.400487040374346, "iter_time": 0.3893224411010742, "loss": 0.003305210266262293, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 17.118095974686355, "step_time": 0.35025830078125} +{"epoch": 0, "iter": 2482, "iter_tflops": 15.53151426397619, "iter_time": 0.3860380325317383, "loss": 0.04063086211681366, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 17.18907293588777, "step_time": 0.34881201744079593} +{"epoch": 0, "iter": 2483, "iter_tflops": 16.850813309395896, "iter_time": 0.35581399536132813, "loss": 0.0012241844087839127, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 18.540883073811614, "step_time": 0.32338023948669437} +{"epoch": 0, "iter": 2484, "iter_tflops": 18.321825127553847, "iter_time": 0.32724661254882814, "loss": 0.00369460042566061, "lr": 3e-05, "seqlen": 2432.0, "step_tflops": 20.081538242820375, "step_time": 0.29857051467895507} +{"epoch": 0, "iter": 2485, "iter_tflops": 23.64011478561838, "iter_time": 0.8727154541015626, "loss": 1.0376518964767456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.807836906058572, "step_time": 0.8316361312866211} +{"epoch": 0, "iter": 2486, "iter_tflops": 20.842441900875738, "iter_time": 0.9898597106933594, "loss": 1.2471023797988892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.503281239572146, "step_time": 0.877796308517456} +{"epoch": 0, "iter": 2487, "iter_tflops": 44.077604314174806, "iter_time": 0.46806295013427734, "loss": 0.8976457715034485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.21203025606529, "step_time": 0.4369880599975586} +{"epoch": 0, "iter": 2488, "iter_tflops": 44.31932290475849, "iter_time": 0.465510124206543, "loss": 1.01790189743042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.648899189547905, "step_time": 0.4329815349578857} +{"epoch": 0, "iter": 2489, "iter_tflops": 46.301677305024434, "iter_time": 0.4455798301696777, "loss": 0.4240756928920746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.63540949804141, "step_time": 0.40744399452209473} +{"epoch": 0, "iter": 2490, "iter_tflops": 42.1902090674684, "iter_time": 0.48900192642211915, "loss": 0.39092522859573364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20990371720377, "step_time": 0.4370077438354492} +{"epoch": 0, "iter": 2491, "iter_tflops": 44.00397416147786, "iter_time": 0.4688461418151855, "loss": 0.22053323686122894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27805493518888, "step_time": 0.4363777980804444} +{"epoch": 0, "iter": 2492, "iter_tflops": 49.35047100447355, "iter_time": 0.41805261611938477, "loss": 0.41940224170684814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.372854202605495, "step_time": 0.3865465660095214} +{"epoch": 0, "iter": 2493, "iter_tflops": 47.34372120193184, "iter_time": 0.43577253723144527, "loss": 0.26252391934394836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.882602748728175, "step_time": 0.3976495475769043} +{"epoch": 0, "iter": 2494, "iter_tflops": 39.0980155691711, "iter_time": 0.5276762313842773, "loss": 0.3663923740386963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00878359214485, "step_time": 0.47969488525390624} +{"epoch": 0, "iter": 2495, "iter_tflops": 38.95051418087995, "iter_time": 0.5296744842529296, "loss": 0.31985706090927124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.40478873077947, "step_time": 0.4865274448394776} +{"epoch": 0, "iter": 2496, "iter_tflops": 38.832699099233274, "iter_time": 0.5312814712524414, "loss": 0.4063251316547394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.58572109385802, "step_time": 0.48446035385131836} +{"epoch": 0, "iter": 2497, "iter_tflops": 19.489087296154025, "iter_time": 1.0585972137451172, "loss": 0.9856258630752563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.726439051408974, "step_time": 0.9953998107910156} +{"epoch": 0, "iter": 2498, "iter_tflops": 25.85951979051954, "iter_time": 0.7978142547607422, "loss": 0.9926132559776306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.55172410907775, "step_time": 0.6538816528320313} +{"epoch": 0, "iter": 2499, "iter_tflops": 37.07860155019217, "iter_time": 0.5564150924682617, "loss": 0.9374845623970032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53875479818854, "step_time": 0.5089227237701416} +{"epoch": 0, "iter": 2500, "iter_tflops": 40.915123313072236, "iter_time": 0.5042412643432618, "loss": 1.0517587661743164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78035652315039, "step_time": 0.4607174911499023} +{"epoch": 0, "iter": 2501, "iter_tflops": 31.599000069078976, "iter_time": 0.6529033660888671, "loss": 1.02494478225708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.3588008300746, "step_time": 0.6004602317810058} +{"epoch": 0, "iter": 2502, "iter_tflops": 35.404169190177626, "iter_time": 0.5827306213378906, "loss": 1.001764178276062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.55434287402053, "step_time": 0.5351172389984131} +{"epoch": 0, "iter": 2503, "iter_tflops": 38.10242311781828, "iter_time": 0.5414640808105469, "loss": 0.9285767078399658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53813075036204, "step_time": 0.49667842864990236} +{"epoch": 0, "iter": 2504, "iter_tflops": 35.16007353071075, "iter_time": 0.5867761764526367, "loss": 0.9775968790054321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.1053874341753, "step_time": 0.5414219589233399} +{"epoch": 0, "iter": 2505, "iter_tflops": 18.005494047166984, "iter_time": 1.1458221282958987, "loss": 1.4097307920455933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.34106103012339, "step_time": 1.0666991577148437} +{"epoch": 0, "iter": 2506, "iter_tflops": 16.449638600393556, "iter_time": 1.2541973724365234, "loss": 1.4330435991287231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.831146478746536, "step_time": 1.0955835075378417} +{"epoch": 0, "iter": 2507, "iter_tflops": 37.42082222230211, "iter_time": 0.5513265686035157, "loss": 1.0720491409301758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.67289901748481, "step_time": 0.507244234085083} +{"epoch": 0, "iter": 2508, "iter_tflops": 40.79580259000874, "iter_time": 0.5057160835266113, "loss": 1.062358021736145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.19905130868089, "step_time": 0.46677684020996096} +{"epoch": 0, "iter": 2509, "iter_tflops": 24.209024932933527, "iter_time": 0.8522067108154296, "loss": 0.5155255794525146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.465188964199886, "step_time": 0.7795558738708497} +{"epoch": 0, "iter": 2510, "iter_tflops": 38.65496241785403, "iter_time": 0.5337243194580078, "loss": 0.5004523396492004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15064486798154, "step_time": 0.4894609222412109} +{"epoch": 0, "iter": 2511, "iter_tflops": 39.56510737538397, "iter_time": 0.5214466705322265, "loss": 0.370530366897583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.287322819014555, "step_time": 0.476608211517334} +{"epoch": 0, "iter": 2512, "iter_tflops": 42.61332045051921, "iter_time": 0.4841465835571289, "loss": 0.45474866032600403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20061634329649, "step_time": 0.4465545082092285} +{"epoch": 0, "iter": 2513, "iter_tflops": 39.392413628957726, "iter_time": 0.5237326583862305, "loss": 0.47599709033966064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53804873251269, "step_time": 0.4738635311126709} +{"epoch": 0, "iter": 2514, "iter_tflops": 38.58130111970798, "iter_time": 0.5347433319091797, "loss": 0.5954205393791199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.151465169038936, "step_time": 0.4894513969421387} +{"epoch": 0, "iter": 2515, "iter_tflops": 42.22996269062124, "iter_time": 0.4885415992736816, "loss": 0.5642136335372925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.102159673533954, "step_time": 0.4475081787109375} +{"epoch": 0, "iter": 2516, "iter_tflops": 47.49428580253231, "iter_time": 0.43439106750488277, "loss": 0.4917885363101959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55746359737069, "step_time": 0.40015726280212405} +{"epoch": 0, "iter": 2517, "iter_tflops": 32.78512826503372, "iter_time": 0.6292820739746094, "loss": 0.1120930090546608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.722893099479805, "step_time": 0.5775314292907715} +{"epoch": 0, "iter": 2518, "iter_tflops": 8.604336381203277, "iter_time": 2.397755340576172, "loss": 0.1712726652622223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.231208256457181, "step_time": 1.8369433670043944} +{"epoch": 0, "iter": 2519, "iter_tflops": 12.108019008732187, "iter_time": 1.7039198150634767, "loss": 0.21126651763916016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.121360366820637, "step_time": 1.3643675575256347} +{"epoch": 0, "iter": 2520, "iter_tflops": 41.742372390881, "iter_time": 0.49424822616577146, "loss": 0.15129032731056213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.864879276573774, "step_time": 0.4498233470916748} +{"epoch": 0, "iter": 2521, "iter_tflops": 19.16619922152224, "iter_time": 0.8932589492797851, "loss": 0.33044326305389404, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 20.412182609710324, "step_time": 0.838733383178711} +{"epoch": 0, "iter": 2522, "iter_tflops": 5.529444377864031, "iter_time": 3.096220489501953, "loss": 0.2606257200241089, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 6.863804912330371, "step_time": 2.494298599243164} +{"epoch": 0, "iter": 2523, "iter_tflops": 13.44645469333972, "iter_time": 1.2732262420654297, "loss": 0.2458261251449585, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 15.808952037022308, "step_time": 1.082954704284668} +{"epoch": 0, "iter": 2524, "iter_tflops": 29.36087682693434, "iter_time": 0.5831017608642577, "loss": 0.29247209429740906, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 31.11221633771728, "step_time": 0.5502783470153809} +{"epoch": 0, "iter": 2525, "iter_tflops": 18.970953529977095, "iter_time": 0.841885627746582, "loss": 0.33246877789497375, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 20.02869591503897, "step_time": 0.7974245147705078} +{"epoch": 0, "iter": 2526, "iter_tflops": 8.852209486079792, "iter_time": 1.8042244873046873, "loss": 0.23172584176063538, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 11.71099705787308, "step_time": 1.3637927703857422} +{"epoch": 0, "iter": 2527, "iter_tflops": 23.685322415613527, "iter_time": 0.6743152084350587, "loss": 0.2757337689399719, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.419333035037628, "step_time": 0.6283159790039062} +{"epoch": 0, "iter": 2528, "iter_tflops": 24.53765255037257, "iter_time": 0.650892463684082, "loss": 0.41432490944862366, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 26.364641139010615, "step_time": 0.6057876167297364} +{"epoch": 0, "iter": 2529, "iter_tflops": 20.99297324408952, "iter_time": 0.982761863708496, "loss": 0.02837647870182991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.447207031221417, "step_time": 0.919094009399414} +{"epoch": 0, "iter": 2530, "iter_tflops": 16.679891090166986, "iter_time": 1.2368841857910156, "loss": 0.003459103172644973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34868391265241, "step_time": 1.0138785190582276} +{"epoch": 0, "iter": 2531, "iter_tflops": 45.33767803800901, "iter_time": 0.45505403900146485, "loss": 0.015682443976402283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28196530072267, "step_time": 0.41030801773071296} +{"epoch": 0, "iter": 2532, "iter_tflops": 44.76045390001061, "iter_time": 0.4609223480224609, "loss": 0.014213168993592262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.31029968548286, "step_time": 0.4183931884765625} +{"epoch": 0, "iter": 2533, "iter_tflops": 18.309721894229423, "iter_time": 1.1267835540771485, "loss": 0.016189010813832283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.174847982561637, "step_time": 1.0759456100463867} +{"epoch": 0, "iter": 2534, "iter_tflops": 15.803404656358857, "iter_time": 1.305484100341797, "loss": 0.012523137032985687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.346457624206298, "step_time": 1.066401607513428} +{"epoch": 0, "iter": 2535, "iter_tflops": 48.958988463848, "iter_time": 0.42139542007446285, "loss": 0.0008222051546908915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.972547859061635, "step_time": 0.3822516136169433} +{"epoch": 0, "iter": 2536, "iter_tflops": 49.71294780309095, "iter_time": 0.4150044288635254, "loss": 0.007178617175668478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.10451136792892, "step_time": 0.37439935493469234} +{"epoch": 0, "iter": 2537, "iter_tflops": 23.505133536845324, "iter_time": 0.8777271347045899, "loss": 0.767288327217102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.098664854089996, "step_time": 0.821999641418457} +{"epoch": 0, "iter": 2538, "iter_tflops": 8.657672378457914, "iter_time": 2.3829838562011716, "loss": 0.9701436161994934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.293007194137347, "step_time": 2.2200664520263675} +{"epoch": 0, "iter": 2539, "iter_tflops": 11.989456320090458, "iter_time": 1.7207697296142577, "loss": 0.7818763256072998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.893882821561139, "step_time": 1.484904815673828} +{"epoch": 0, "iter": 2540, "iter_tflops": 35.63148862603418, "iter_time": 0.579012954711914, "loss": 1.041419506072998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.022682725476535, "step_time": 0.5286949043273925} +{"epoch": 0, "iter": 2541, "iter_tflops": 12.722452613918678, "iter_time": 1.2167523803710936, "loss": 0.42948952317237854, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 13.429097475932165, "step_time": 1.1527263488769532} +{"epoch": 0, "iter": 2542, "iter_tflops": 13.772275089707566, "iter_time": 1.1240027084350586, "loss": 0.3009909391403198, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 16.640482158173075, "step_time": 0.930265983581543} +{"epoch": 0, "iter": 2543, "iter_tflops": 25.697750706196086, "iter_time": 0.6023902511596679, "loss": 0.3689156472682953, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.535457748562294, "step_time": 0.5621869316101075} +{"epoch": 0, "iter": 2544, "iter_tflops": 26.848342691264428, "iter_time": 0.5765746765136719, "loss": 0.23353412747383118, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.643355803026598, "step_time": 0.5404420700073241} +{"epoch": 0, "iter": 2545, "iter_tflops": 20.208948176196735, "iter_time": 1.020889030456543, "loss": 0.29616981744766235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.725530013268312, "step_time": 0.9496244049072265} +{"epoch": 0, "iter": 2546, "iter_tflops": 15.796535923429962, "iter_time": 1.3060517578125002, "loss": 0.19520966708660126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.715086517541906, "step_time": 1.1023776721954346} +{"epoch": 0, "iter": 2547, "iter_tflops": 47.548456287907214, "iter_time": 0.43389617919921875, "loss": 0.1906610131263733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.76677073994114, "step_time": 0.39853931808471676} +{"epoch": 0, "iter": 2548, "iter_tflops": 49.345793082085564, "iter_time": 0.4180922470092774, "loss": 0.23872330784797668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.14791868506785, "step_time": 0.38818252944946285} +{"epoch": 0, "iter": 2549, "iter_tflops": 42.879142090753, "iter_time": 0.4811452026367187, "loss": 1.0514386892318726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.63967617443941, "step_time": 0.4423507022857666} +{"epoch": 0, "iter": 2550, "iter_tflops": 42.39412291027536, "iter_time": 0.48664984893798824, "loss": 0.810306966304779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.522847257396705, "step_time": 0.4532030563354492} +{"epoch": 0, "iter": 2551, "iter_tflops": 43.11728388216077, "iter_time": 0.4784877815246582, "loss": 0.8739266991615295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13221461592282, "step_time": 0.44721662902832027} +{"epoch": 0, "iter": 2552, "iter_tflops": 44.62090190285256, "iter_time": 0.462363883972168, "loss": 0.9826661944389343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37016683623202, "step_time": 0.42652516746520996} +{"epoch": 0, "iter": 2553, "iter_tflops": 24.72121507370271, "iter_time": 0.8345501403808593, "loss": 0.9040294289588928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.974953723653673, "step_time": 0.7942687301635742} +{"epoch": 0, "iter": 2554, "iter_tflops": 17.452949174996363, "iter_time": 1.1820978393554686, "loss": 0.8198334574699402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.652439883550215, "step_time": 0.9107669467926026} +{"epoch": 0, "iter": 2555, "iter_tflops": 35.095516610491124, "iter_time": 0.5878555297851562, "loss": 0.642861545085907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14447106314055, "step_time": 0.5408672065734863} +{"epoch": 0, "iter": 2556, "iter_tflops": 35.926501780630225, "iter_time": 0.5742583465576173, "loss": 1.1373172998428345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.9531888446169, "step_time": 0.5296381149291992} +{"epoch": 0, "iter": 2557, "iter_tflops": 2.1644032336466728, "iter_time": 0.6334033279418945, "loss": 1.1747844219207764, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.340343478613258, "step_time": 0.5857859001159669} +{"epoch": 0, "iter": 2558, "iter_tflops": 0.6549131067865932, "iter_time": 2.0933161926269532, "loss": 0.759535551071167, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 0.8654456416607363, "step_time": 1.5840858688354493} +{"epoch": 0, "iter": 2559, "iter_tflops": 0.8546994487933707, "iter_time": 1.6040026855468752, "loss": 1.0708860158920288, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.0696655589268431, "step_time": 1.2816531295776368} +{"epoch": 0, "iter": 2560, "iter_tflops": 2.6660189104420753, "iter_time": 0.5142274894714356, "loss": 0.9386685490608215, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.965103511850849, "step_time": 0.4623582973480225} +{"epoch": 0, "iter": 2561, "iter_tflops": 13.59268498546675, "iter_time": 1.0847305450439453, "loss": 0.21690452098846436, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 14.325564082802021, "step_time": 1.0292369995117188} +{"epoch": 0, "iter": 2562, "iter_tflops": 9.994029962903374, "iter_time": 1.475320831298828, "loss": 0.23493826389312744, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 13.80091807362281, "step_time": 1.0683637504577637} +{"epoch": 0, "iter": 2563, "iter_tflops": 22.750730283795974, "iter_time": 0.648084716796875, "loss": 0.23412340879440308, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.468038221470017, "step_time": 0.6025983963012695} +{"epoch": 0, "iter": 2564, "iter_tflops": 24.774222365206228, "iter_time": 0.5951508941650392, "loss": 0.35303592681884766, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.47299483503716, "step_time": 0.5569600524902344} +{"epoch": 0, "iter": 2565, "iter_tflops": 16.21950614023188, "iter_time": 1.271992706298828, "loss": 0.7932475805282593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.983538807825763, "step_time": 1.214770004272461} +{"epoch": 0, "iter": 2566, "iter_tflops": 10.91743869730041, "iter_time": 1.889737518310547, "loss": 0.9980058670043945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.76064109234077, "step_time": 1.754249053955078} +{"epoch": 0, "iter": 2567, "iter_tflops": 14.720094603291992, "iter_time": 1.4015598449707032, "loss": 0.7182315587997437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.8066904090014, "step_time": 1.2275524215698241} +{"epoch": 0, "iter": 2568, "iter_tflops": 27.929076872602835, "iter_time": 0.7386958618164061, "loss": 0.9636880159378052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.42209166636024, "step_time": 0.6565792541503905} +{"epoch": 0, "iter": 2569, "iter_tflops": 21.11023119827736, "iter_time": 0.7662768478393553, "loss": 0.3579859137535095, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 22.250595543059504, "step_time": 0.7270044250488281} +{"epoch": 0, "iter": 2570, "iter_tflops": 11.511608401094241, "iter_time": 1.4052147064208984, "loss": 0.4046018421649933, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 13.808510371332325, "step_time": 1.171471866607666} +{"epoch": 0, "iter": 2571, "iter_tflops": 10.894683805071736, "iter_time": 1.4847866821289064, "loss": 0.250774085521698, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 13.700272160019171, "step_time": 1.1807270126342773} +{"epoch": 0, "iter": 2572, "iter_tflops": 14.239217096235695, "iter_time": 1.1360372772216798, "loss": 0.2408469170331955, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 15.542797742830059, "step_time": 1.0407573776245118} +{"epoch": 0, "iter": 2573, "iter_tflops": 10.419564427518148, "iter_time": 1.3954823303222657, "loss": 0.29796522855758667, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.09019244716617, "step_time": 1.3110970001220703} +{"epoch": 0, "iter": 2574, "iter_tflops": 13.443783790833516, "iter_time": 1.0815644073486328, "loss": 0.2249133139848709, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.8274376599622, "step_time": 0.8156145782470703} +{"epoch": 0, "iter": 2575, "iter_tflops": 22.297648071204684, "iter_time": 0.6521009750366211, "loss": 0.3050013780593872, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.030194413702954, "step_time": 0.6050853271484375} +{"epoch": 0, "iter": 2576, "iter_tflops": 23.184474328463963, "iter_time": 0.6271575469970703, "loss": 0.26639243960380554, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.88827027542062, "step_time": 0.5842237281799316} +{"epoch": 0, "iter": 2577, "iter_tflops": 20.221841435664658, "iter_time": 1.020238121032715, "loss": 0.223310649394989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.583251617863464, "step_time": 0.9558843994140624} +{"epoch": 0, "iter": 2578, "iter_tflops": 26.809512485545824, "iter_time": 0.7695437774658204, "loss": 0.22810839116573334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.288792609950324, "step_time": 0.6811461181640626} +{"epoch": 0, "iter": 2579, "iter_tflops": 51.33131530393019, "iter_time": 0.4019202194213867, "loss": 0.16317322850227356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49330413378067, "step_time": 0.37177626800537106} +{"epoch": 0, "iter": 2580, "iter_tflops": 53.78747925552385, "iter_time": 0.3835668411254882, "loss": 0.2646735906600952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.12712243706263, "step_time": 0.35493058395385746} +{"epoch": 0, "iter": 2581, "iter_tflops": 31.307469770082882, "iter_time": 0.6589831008911133, "loss": 0.36551618576049805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.25438244427079, "step_time": 0.6204022445678712} +{"epoch": 0, "iter": 2582, "iter_tflops": 16.114207946283887, "iter_time": 1.2803045349121094, "loss": 0.2933250665664673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.942162305842572, "step_time": 1.1498666191101075} +{"epoch": 0, "iter": 2583, "iter_tflops": 45.566794166077145, "iter_time": 0.4527659645080567, "loss": 0.49968406558036804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33139138064329, "step_time": 0.4182143039703369} +{"epoch": 0, "iter": 2584, "iter_tflops": 49.882547487263324, "iter_time": 0.41359342193603515, "loss": 0.3701494336128235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.062906128353454, "step_time": 0.381612735748291} +{"epoch": 0, "iter": 2585, "iter_tflops": 32.4230347750525, "iter_time": 0.6363097610473634, "loss": 0.6186621189117432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.59848904651483, "step_time": 0.5963004188537597} +{"epoch": 0, "iter": 2586, "iter_tflops": 29.217463391463045, "iter_time": 0.7061219940185547, "loss": 0.7162697315216064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.87125657605777, "step_time": 0.6276332473754882} +{"epoch": 0, "iter": 2587, "iter_tflops": 49.72823319733924, "iter_time": 0.41487686538696283, "loss": 0.6310043334960938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.674553687969684, "step_time": 0.38437382507324225} +{"epoch": 0, "iter": 2588, "iter_tflops": 48.65443019391585, "iter_time": 0.4240331954956055, "loss": 0.6414738893508911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.654677897394386, "step_time": 0.3918188152313232} +{"epoch": 0, "iter": 2589, "iter_tflops": 42.69614617124047, "iter_time": 0.48320739364624027, "loss": 0.8338819146156311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.38856873925877, "step_time": 0.44474520492553715} +{"epoch": 0, "iter": 2590, "iter_tflops": 43.13174265966373, "iter_time": 0.47832738113403317, "loss": 0.9459182620048523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.55809883243084, "step_time": 0.44312577247619633} +{"epoch": 0, "iter": 2591, "iter_tflops": 48.25459056529641, "iter_time": 0.4275467529296874, "loss": 1.0460741519927979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.0879204551176, "step_time": 0.39608211135864263} +{"epoch": 0, "iter": 2592, "iter_tflops": 45.83048906323708, "iter_time": 0.4501608848571777, "loss": 0.939821183681488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.185173074978806, "step_time": 0.4194575767517089} +{"epoch": 0, "iter": 2593, "iter_tflops": 23.037959675661867, "iter_time": 0.6293749923706056, "loss": 0.13791923224925995, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.682377004005648, "step_time": 0.5874440574645996} +{"epoch": 0, "iter": 2594, "iter_tflops": 16.557065446944872, "iter_time": 0.8757298049926758, "loss": 0.14359018206596375, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 20.726268614083843, "step_time": 0.699571928024292} +{"epoch": 0, "iter": 2595, "iter_tflops": 38.71256794326303, "iter_time": 0.374542854309082, "loss": 0.137346088886261, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 42.272761571383114, "step_time": 0.3429990177154541} +{"epoch": 0, "iter": 2596, "iter_tflops": 37.893072552721904, "iter_time": 0.38264291381835935, "loss": 0.10901962220668793, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 41.19138432757255, "step_time": 0.3520036029815674} +{"epoch": 0, "iter": 2597, "iter_tflops": 31.755508961692495, "iter_time": 0.6496854934692383, "loss": 0.8380995988845825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.74419961779194, "step_time": 0.6113967361450195} +{"epoch": 0, "iter": 2598, "iter_tflops": 18.70413908102879, "iter_time": 1.103022888183594, "loss": 0.6307503581047058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.864480099697317, "step_time": 0.988814167022705} +{"epoch": 0, "iter": 2599, "iter_tflops": 36.5094953824017, "iter_time": 0.5650884323120117, "loss": 1.1078499555587769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.59240993563288, "step_time": 0.521087085723877} +{"epoch": 0, "iter": 2600, "iter_tflops": 37.937415615585714, "iter_time": 0.5438191604614259, "loss": 0.7197268605232239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80078625866699, "step_time": 0.5056543121337891} +{"epoch": 0, "iter": 2601, "iter_tflops": 18.46087656793446, "iter_time": 1.117557632446289, "loss": 0.9828894734382629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.750014030609083, "step_time": 1.0446115875244142} +{"epoch": 0, "iter": 2602, "iter_tflops": 31.765069565491395, "iter_time": 0.6494899520874023, "loss": 0.9397076368331909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53877145319956, "step_time": 0.5217939949035644} +{"epoch": 0, "iter": 2603, "iter_tflops": 48.612766078923784, "iter_time": 0.42439661788940425, "loss": 0.8838119506835938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62975237177432, "step_time": 0.39200438117980957} +{"epoch": 0, "iter": 2604, "iter_tflops": 50.75701958159216, "iter_time": 0.4064677886962891, "loss": 0.9430269002914429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.590135160575606, "step_time": 0.3779271373748779} +{"epoch": 0, "iter": 2605, "iter_tflops": 25.981047290298278, "iter_time": 0.7940824432373046, "loss": 0.5679486393928528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.344274053811887, "step_time": 0.7544941024780273} +{"epoch": 0, "iter": 2606, "iter_tflops": 16.474134218455763, "iter_time": 1.252332489013672, "loss": 0.4936273694038391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.04059342837952, "step_time": 0.936049819946289} +{"epoch": 0, "iter": 2607, "iter_tflops": 42.06717622669485, "iter_time": 0.4904320983886719, "loss": 0.5142037868499756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71290576189358, "step_time": 0.45131879425048826} +{"epoch": 0, "iter": 2608, "iter_tflops": 38.97528808436669, "iter_time": 0.5293378067016601, "loss": 0.4872495234012604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.454589390879164, "step_time": 0.48595673179626464} +{"epoch": 0, "iter": 2609, "iter_tflops": 23.914845440601994, "iter_time": 0.8626898117065429, "loss": 0.783096194267273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.58005787272949, "step_time": 0.8065303688049316} +{"epoch": 0, "iter": 2610, "iter_tflops": 11.570880666590105, "iter_time": 1.7830184326171876, "loss": 0.8152990937232971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.570981331312286, "step_time": 1.415902816772461} +{"epoch": 0, "iter": 2611, "iter_tflops": 13.787535093497127, "iter_time": 1.496358367919922, "loss": 0.7247239947319031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.214391422591383, "step_time": 1.2723939476013184} +{"epoch": 0, "iter": 2612, "iter_tflops": 31.351870220167825, "iter_time": 0.6580498504638672, "loss": 0.7915589809417725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.821623951363996, "step_time": 0.5314330368041993} +{"epoch": 0, "iter": 2613, "iter_tflops": 13.626816768046979, "iter_time": 1.1600311126708984, "loss": 0.2655480206012726, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 14.602203167447522, "step_time": 1.082544273376465} +{"epoch": 0, "iter": 2614, "iter_tflops": 18.382241687078242, "iter_time": 0.8599349136352539, "loss": 0.3375326097011566, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 21.704531970995557, "step_time": 0.7283055648803711} +{"epoch": 0, "iter": 2615, "iter_tflops": 28.71106535913223, "iter_time": 0.5505727920532226, "loss": 0.1372789442539215, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.552322334985075, "step_time": 0.5173921394348144} +{"epoch": 0, "iter": 2616, "iter_tflops": 30.23732003624679, "iter_time": 0.5227821578979492, "loss": 0.14014177024364471, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 32.12229323561744, "step_time": 0.4921046981811523} +{"epoch": 0, "iter": 2617, "iter_tflops": 27.869240913651765, "iter_time": 0.7402818603515625, "loss": 0.2935752272605896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.578948831663116, "step_time": 0.6974924507141114} +{"epoch": 0, "iter": 2618, "iter_tflops": 15.324755286756355, "iter_time": 1.346259246826172, "loss": 0.20600050687789917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.347276412757765, "step_time": 1.1244771728515626} +{"epoch": 0, "iter": 2619, "iter_tflops": 37.54652428744198, "iter_time": 0.5494807815551758, "loss": 0.23467180132865906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.20079729166356, "step_time": 0.5007450065612793} +{"epoch": 0, "iter": 2620, "iter_tflops": 42.755812626051316, "iter_time": 0.4825330696105957, "loss": 0.20546473562717438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70690560173483, "step_time": 0.44171398735046385} +{"epoch": 0, "iter": 2621, "iter_tflops": 35.09400766389975, "iter_time": 0.5878808059692382, "loss": 0.32114505767822266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.71271067549326, "step_time": 0.5329281558990479} +{"epoch": 0, "iter": 2622, "iter_tflops": 41.061395567928386, "iter_time": 0.5024450149536133, "loss": 0.3325272500514984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96201412551745, "step_time": 0.45885607910156256} +{"epoch": 0, "iter": 2623, "iter_tflops": 46.35584533926006, "iter_time": 0.4450591583251953, "loss": 0.3806951344013214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.522591129990374, "step_time": 0.40835382843017576} +{"epoch": 0, "iter": 2624, "iter_tflops": 39.790823917508035, "iter_time": 0.5184887237548829, "loss": 0.3748795688152313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.256351973070466, "step_time": 0.47694945526123045} +{"epoch": 0, "iter": 2625, "iter_tflops": 30.96735218636941, "iter_time": 0.6662207794189453, "loss": 1.1005038022994995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.32622734191043, "step_time": 0.6190647773742676} +{"epoch": 0, "iter": 2626, "iter_tflops": 13.535591769934129, "iter_time": 1.524210678100586, "loss": 1.0794914960861206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.692886144831498, "step_time": 1.3146780853271485} +{"epoch": 0, "iter": 2627, "iter_tflops": 16.726638550335817, "iter_time": 1.2334273529052735, "loss": 0.9460611343383789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.78624692058941, "step_time": 0.8673538780212403} +{"epoch": 0, "iter": 2628, "iter_tflops": 35.509866594628534, "iter_time": 0.5809960861206054, "loss": 0.9511734247207642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.67658368990465, "step_time": 0.533425952911377} +{"epoch": 0, "iter": 2629, "iter_tflops": 14.529304216024581, "iter_time": 1.076704360961914, "loss": 0.3771783709526062, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 15.518566730319522, "step_time": 1.008067657470703} +{"epoch": 0, "iter": 2630, "iter_tflops": 12.172743218897534, "iter_time": 1.2851470642089844, "loss": 0.2861576974391937, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 15.586520198181407, "step_time": 1.0036727256774902} +{"epoch": 0, "iter": 2631, "iter_tflops": 28.84576762005061, "iter_time": 0.5423244552612305, "loss": 0.5359787940979004, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.736272864005098, "step_time": 0.5089675407409668} +{"epoch": 0, "iter": 2632, "iter_tflops": 28.490464624705815, "iter_time": 0.5490877532958984, "loss": 0.2564854919910431, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.34743822767137, "step_time": 0.5154888229370118} +{"epoch": 0, "iter": 2633, "iter_tflops": 48.33980935661309, "iter_time": 0.42679302597045904, "loss": 0.1827392876148224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.02710113769735, "step_time": 0.3890669689178467} +{"epoch": 0, "iter": 2634, "iter_tflops": 48.37921194909284, "iter_time": 0.4264454231262207, "loss": 0.21220150589942932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48258361101036, "step_time": 0.38575349426269534} +{"epoch": 0, "iter": 2635, "iter_tflops": 49.946927827500154, "iter_time": 0.4130603103637695, "loss": 0.11982898414134979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.78525108019485, "step_time": 0.3835827312469482} +{"epoch": 0, "iter": 2636, "iter_tflops": 55.297530206555244, "iter_time": 0.3730924949645996, "loss": 0.15967626869678497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.9103178901417, "step_time": 0.3443662834167481} +{"epoch": 0, "iter": 2637, "iter_tflops": 21.696329722860842, "iter_time": 0.8974263229370116, "loss": 0.008516203612089157, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 22.83999219484789, "step_time": 0.8524896697998048} +{"epoch": 0, "iter": 2638, "iter_tflops": 11.624338156358704, "iter_time": 1.675007827758789, "loss": 0.021432116627693176, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 14.913403496438203, "step_time": 1.305594488143921} +{"epoch": 0, "iter": 2639, "iter_tflops": 40.279200803247036, "iter_time": 0.4833973121643066, "loss": 0.022425614297389984, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 44.61549028354484, "step_time": 0.43641473579406737} +{"epoch": 0, "iter": 2640, "iter_tflops": 42.040429512030485, "iter_time": 0.4631460151672363, "loss": 0.01624213717877865, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 46.653975541374855, "step_time": 0.4173461570739746} +{"epoch": 0, "iter": 2641, "iter_tflops": 40.151541921928086, "iter_time": 0.513830665588379, "loss": 0.17301705479621887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35757241744885, "step_time": 0.46510871505737306} +{"epoch": 0, "iter": 2642, "iter_tflops": 39.008873400143585, "iter_time": 0.528882064819336, "loss": 0.12989619374275208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.008105520518015, "step_time": 0.46880212783813474} +{"epoch": 0, "iter": 2643, "iter_tflops": 41.82222519864055, "iter_time": 0.4933045387268067, "loss": 0.2483701854944229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.651459843016816, "step_time": 0.45192625999450686} +{"epoch": 0, "iter": 2644, "iter_tflops": 41.19076407318105, "iter_time": 0.5008669776916505, "loss": 0.14066077768802643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1478348477856, "step_time": 0.4569675064086914} +{"epoch": 0, "iter": 2645, "iter_tflops": 20.56472304424055, "iter_time": 1.003227394104004, "loss": 0.8510228395462036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.899455994541228, "step_time": 0.9420824661254883} +{"epoch": 0, "iter": 2646, "iter_tflops": 22.96049543669183, "iter_time": 0.8985474014282226, "loss": 1.0656489133834839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.63840912475075, "step_time": 0.8046947612762451} +{"epoch": 0, "iter": 2647, "iter_tflops": 39.55895304540927, "iter_time": 0.5215277938842773, "loss": 0.8600791096687317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61403529493545, "step_time": 0.48413846206665045} +{"epoch": 0, "iter": 2648, "iter_tflops": 42.60133585729748, "iter_time": 0.48428278350830084, "loss": 0.8674708008766174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60733397375375, "step_time": 0.45236350631713873} +{"epoch": 0, "iter": 2649, "iter_tflops": 25.670099931817752, "iter_time": 0.8037013320922852, "loss": 0.749204695224762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.117922481505268, "step_time": 0.7607918167114258} +{"epoch": 0, "iter": 2650, "iter_tflops": 21.203617053921487, "iter_time": 0.9729987792968751, "loss": 0.9241018891334534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.70972742888785, "step_time": 0.8024625530242919} +{"epoch": 0, "iter": 2651, "iter_tflops": 40.87459067061375, "iter_time": 0.5047412872314453, "loss": 0.9698529839515686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.427517482148474, "step_time": 0.46437646484375} +{"epoch": 0, "iter": 2652, "iter_tflops": 45.325925335461434, "iter_time": 0.45517203140258794, "loss": 1.0305896997451782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27278755923819, "step_time": 0.41871171760559084} +{"epoch": 0, "iter": 2653, "iter_tflops": 23.827840915896875, "iter_time": 0.8658398208618164, "loss": 0.4906214475631714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.501163597354182, "step_time": 0.8090255737304687} +{"epoch": 0, "iter": 2654, "iter_tflops": 8.235839854335532, "iter_time": 2.5050382080078126, "loss": 0.3825939893722534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.357541558763844, "step_time": 2.204755744934082} +{"epoch": 0, "iter": 2655, "iter_tflops": 11.455079679643926, "iter_time": 1.801043212890625, "loss": 0.5325635075569153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.39795473029678, "step_time": 1.5398688774108886} +{"epoch": 0, "iter": 2656, "iter_tflops": 37.363273863145004, "iter_time": 0.5521757431030273, "loss": 0.5970269441604614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.838638505520926, "step_time": 0.5051856346130372} +{"epoch": 0, "iter": 2657, "iter_tflops": 14.068650482784287, "iter_time": 1.0480323333740236, "loss": 0.2959224283695221, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 15.037542027839551, "step_time": 0.9805060272216797} +{"epoch": 0, "iter": 2658, "iter_tflops": 10.405322180298338, "iter_time": 1.4170056762695311, "loss": 0.4683714509010315, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.629972604177196, "step_time": 1.1674135055541992} +{"epoch": 0, "iter": 2659, "iter_tflops": 25.12580988475428, "iter_time": 0.5868228988647461, "loss": 0.45011162757873535, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.705693513826557, "step_time": 0.5521070098876953} +{"epoch": 0, "iter": 2660, "iter_tflops": 26.860021653821462, "iter_time": 0.548934799194336, "loss": 0.3060259521007538, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.60695309478261, "step_time": 0.5154131774902344} +{"epoch": 0, "iter": 2661, "iter_tflops": 27.552900666318177, "iter_time": 0.7487811813354491, "loss": 0.13705961406230927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.143454342197685, "step_time": 0.7079151725769044} +{"epoch": 0, "iter": 2662, "iter_tflops": 17.320574493338206, "iter_time": 1.1911321716308594, "loss": 0.10291415452957153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.19846189121277, "step_time": 0.8893302326202392} +{"epoch": 0, "iter": 2663, "iter_tflops": 42.77543831612024, "iter_time": 0.4823116798400878, "loss": 0.16562992334365845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.068411645841394, "step_time": 0.43832143020629877} +{"epoch": 0, "iter": 2664, "iter_tflops": 51.5981304133303, "iter_time": 0.3998418807983398, "loss": 0.19923365116119385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.21412709622907, "step_time": 0.36700905227661135} +{"epoch": 0, "iter": 2665, "iter_tflops": 26.979744478088335, "iter_time": 0.6345641632080078, "loss": 0.02632177248597145, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 28.696466323392073, "step_time": 0.5966023406982423} +{"epoch": 0, "iter": 2666, "iter_tflops": 9.643726362985724, "iter_time": 1.7752866821289064, "loss": 0.011993683874607086, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 12.473415856664928, "step_time": 1.3725493621826172} +{"epoch": 0, "iter": 2667, "iter_tflops": 15.116236894066105, "iter_time": 1.1325820770263673, "loss": 0.01144794374704361, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 16.989616212758087, "step_time": 1.0076966285705566} +{"epoch": 0, "iter": 2668, "iter_tflops": 14.302948459558491, "iter_time": 1.1969824981689454, "loss": 0.006074432749301195, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 16.888729816717778, "step_time": 1.0137161979675293} +{"epoch": 0, "iter": 2669, "iter_tflops": 14.7583096438067, "iter_time": 0.9658830184936524, "loss": 0.2648554742336273, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 15.533419300440073, "step_time": 0.9176859512329102} +{"epoch": 0, "iter": 2670, "iter_tflops": 11.841639924978185, "iter_time": 1.2037860260009767, "loss": 0.24067316949367523, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 14.45177763947805, "step_time": 0.9863700523376465} +{"epoch": 0, "iter": 2671, "iter_tflops": 21.401681016646872, "iter_time": 0.6660598602294923, "loss": 0.42896056175231934, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 22.94630284166322, "step_time": 0.6212242889404297} +{"epoch": 0, "iter": 2672, "iter_tflops": 22.390526319409243, "iter_time": 0.6366442871093749, "loss": 0.26233115792274475, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.053138297855163, "step_time": 0.5926378707885742} +{"epoch": 0, "iter": 2673, "iter_tflops": 20.236592054361843, "iter_time": 1.0194944610595702, "loss": 0.5856976509094238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.549279725411953, "step_time": 0.9573913269042967} +{"epoch": 0, "iter": 2674, "iter_tflops": 18.966652007382336, "iter_time": 1.0877562103271483, "loss": 0.6556221842765808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.58824611663077, "step_time": 0.9133552646636963} +{"epoch": 0, "iter": 2675, "iter_tflops": 45.13701549470146, "iter_time": 0.4570770416259766, "loss": 0.6219276189804077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.950474463624644, "step_time": 0.421468713760376} +{"epoch": 0, "iter": 2676, "iter_tflops": 47.49904016926363, "iter_time": 0.4343475875854493, "loss": 0.7679082155227661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.38379642200308, "step_time": 0.40150971603393554} +{"epoch": 0, "iter": 2677, "iter_tflops": 42.68877951277305, "iter_time": 0.4832907791137695, "loss": 0.166842520236969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59391685363395, "step_time": 0.4427851295471192} +{"epoch": 0, "iter": 2678, "iter_tflops": 14.44381242055018, "iter_time": 1.4283689727783204, "loss": 0.24061591923236847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.855475784478067, "step_time": 1.301196746826172} +{"epoch": 0, "iter": 2679, "iter_tflops": 13.669566166177573, "iter_time": 1.509272003173828, "loss": 0.3163098394870758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.774588574055599, "step_time": 1.307868881225586} +{"epoch": 0, "iter": 2680, "iter_tflops": 19.622905935106093, "iter_time": 1.0513780975341798, "loss": 0.28927358984947205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.43843378547993, "step_time": 0.8802249202728272} +{"epoch": 0, "iter": 2681, "iter_tflops": 14.138407411808387, "iter_time": 1.144137451171875, "loss": 0.349672794342041, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 15.373222654448867, "step_time": 1.052237503051758} +{"epoch": 0, "iter": 2682, "iter_tflops": 12.919644637965817, "iter_time": 1.252068603515625, "loss": 0.3992612659931183, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 15.222741447921832, "step_time": 1.062639175415039} +{"epoch": 0, "iter": 2683, "iter_tflops": 29.83653728385005, "iter_time": 0.5421634979248047, "loss": 0.2802535891532898, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 31.829804823873665, "step_time": 0.5082117691040039} +{"epoch": 0, "iter": 2684, "iter_tflops": 31.511711833484032, "iter_time": 0.5133418807983398, "loss": 0.35999566316604614, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 33.390830633466074, "step_time": 0.48445280075073244} +{"epoch": 0, "iter": 2685, "iter_tflops": 35.73528390759096, "iter_time": 0.5773311767578124, "loss": 0.41010379791259766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.54298325668049, "step_time": 0.5352749519348144} +{"epoch": 0, "iter": 2686, "iter_tflops": 8.801238570669504, "iter_time": 2.344112518310547, "loss": 0.512963056564331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.881859225566448, "step_time": 1.8959162292480467} +{"epoch": 0, "iter": 2687, "iter_tflops": 15.068618587786608, "iter_time": 1.3691429901123047, "loss": 0.46395328640937805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.743894822655008, "step_time": 1.100683380126953} +{"epoch": 0, "iter": 2688, "iter_tflops": 21.13329282576421, "iter_time": 0.9762365798950197, "loss": 0.45008012652397156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.45153004611498, "step_time": 0.7005100746154784} +{"epoch": 0, "iter": 2689, "iter_tflops": 23.997196664401713, "iter_time": 0.6450784530639648, "loss": 0.33296599984169006, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.691683965243485, "step_time": 0.6025324974060059} +{"epoch": 0, "iter": 2690, "iter_tflops": 22.777551139450658, "iter_time": 0.6796197891235352, "loss": 0.18049117922782898, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.782044885464234, "step_time": 0.624648796081543} +{"epoch": 0, "iter": 2691, "iter_tflops": 23.630286028047465, "iter_time": 0.6550946731567383, "loss": 0.2625342607498169, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.558551574209968, "step_time": 0.6056710395812989} +{"epoch": 0, "iter": 2692, "iter_tflops": 25.010647069484044, "iter_time": 0.6189393844604493, "loss": 0.332888126373291, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.62666863763546, "step_time": 0.581374813079834} +{"epoch": 0, "iter": 2693, "iter_tflops": 32.53096426888507, "iter_time": 0.6341986465454101, "loss": 0.12028252333402634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.06096637369091, "step_time": 0.5884348220825195} +{"epoch": 0, "iter": 2694, "iter_tflops": 28.460498410295486, "iter_time": 0.7249027481079102, "loss": 0.10443869233131409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.677444750888554, "step_time": 0.5782671279907227} +{"epoch": 0, "iter": 2695, "iter_tflops": 53.65399759066409, "iter_time": 0.3845210876464844, "loss": 0.09054487943649292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.34877496905033, "step_time": 0.35358229064941404} +{"epoch": 0, "iter": 2696, "iter_tflops": 48.94427190078036, "iter_time": 0.42152212524414057, "loss": 0.08969239890575409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.67005043644911, "step_time": 0.3844060764312744} +{"epoch": 0, "iter": 2697, "iter_tflops": 27.252836429615023, "iter_time": 0.7570255508422852, "loss": 1.1072874069213867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.82160395818473, "step_time": 0.7158204498291015} +{"epoch": 0, "iter": 2698, "iter_tflops": 9.584034798954825, "iter_time": 2.152652191162109, "loss": 1.1010836362838745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.686936009039744, "step_time": 1.7653124389648438} +{"epoch": 0, "iter": 2699, "iter_tflops": 12.592611328480977, "iter_time": 1.6383491058349609, "loss": 0.9174198508262634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.904004353354084, "step_time": 1.3842651290893555} +{"epoch": 0, "iter": 2700, "iter_tflops": 18.91319392983647, "iter_time": 1.0908307495117189, "loss": 0.9220761060714722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.876609814454138, "step_time": 0.7972873439788819} +{"epoch": 0, "iter": 2701, "iter_tflops": 13.812087497638663, "iter_time": 1.0084484481811522, "loss": 0.22987917065620422, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 14.693950141280705, "step_time": 0.9479260559082031} +{"epoch": 0, "iter": 2702, "iter_tflops": 10.84257831740825, "iter_time": 1.2846370849609376, "loss": 0.32325536012649536, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 13.118642006836499, "step_time": 1.0617545776367188} +{"epoch": 0, "iter": 2703, "iter_tflops": 21.76454126414303, "iter_time": 0.6399757308959961, "loss": 0.2093358188867569, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 23.429406000934797, "step_time": 0.594499843597412} +{"epoch": 0, "iter": 2704, "iter_tflops": 21.425463352247164, "iter_time": 0.6501039428710937, "loss": 0.33869513869285583, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 23.046206029111357, "step_time": 0.604384868621826} +{"epoch": 0, "iter": 2705, "iter_tflops": 21.951797109121188, "iter_time": 0.939836196899414, "loss": 0.4385276734828949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.623679713409825, "step_time": 0.8733226051330566} +{"epoch": 0, "iter": 2706, "iter_tflops": 20.2589144748012, "iter_time": 1.0183711242675781, "loss": 0.3138463497161865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.09706623860998, "step_time": 0.8561661949157715} +{"epoch": 0, "iter": 2707, "iter_tflops": 51.06622703746294, "iter_time": 0.40400661468505855, "loss": 0.42316511273384094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.42174213303452, "step_time": 0.37225631523132324} +{"epoch": 0, "iter": 2708, "iter_tflops": 47.20565780656936, "iter_time": 0.43704705047607423, "loss": 0.42051962018013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.56611679325693, "step_time": 0.4080023307800293} +{"epoch": 0, "iter": 2709, "iter_tflops": 24.679844253781486, "iter_time": 0.8359490966796874, "loss": 0.7246301770210266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.962444384310988, "step_time": 0.7946514282226563} +{"epoch": 0, "iter": 2710, "iter_tflops": 20.61039662884913, "iter_time": 1.0010041961669922, "loss": 1.091308355331421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.714618457695323, "step_time": 0.8023099212646484} +{"epoch": 0, "iter": 2711, "iter_tflops": 41.10508844765574, "iter_time": 0.5019109382629395, "loss": 0.8143121600151062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.746417967988236, "step_time": 0.4610669288635254} +{"epoch": 0, "iter": 2712, "iter_tflops": 43.413963110982685, "iter_time": 0.47521792602539065, "loss": 1.0309892892837524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06534122317741, "step_time": 0.438350025177002} +{"epoch": 0, "iter": 2713, "iter_tflops": 25.252459003573428, "iter_time": 0.8169934463500977, "loss": 0.1655777245759964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.406619626379545, "step_time": 0.7527777519226075} +{"epoch": 0, "iter": 2714, "iter_tflops": 47.70844147558835, "iter_time": 0.43244115447998044, "loss": 0.17805208265781403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.994951587603836, "step_time": 0.38930299758911135} +{"epoch": 0, "iter": 2715, "iter_tflops": 50.263169379934986, "iter_time": 0.4104614524841309, "loss": 0.1553531140089035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.60366243399223, "step_time": 0.377833511352539} +{"epoch": 0, "iter": 2716, "iter_tflops": 53.329939045654136, "iter_time": 0.3868576240539551, "loss": 0.10563959181308746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.85943704705491, "step_time": 0.3565726623535157} +{"epoch": 0, "iter": 2717, "iter_tflops": 50.98630391138247, "iter_time": 0.40463991165161134, "loss": 0.03454815596342087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.0052185466924, "step_time": 0.3683780555725098} +{"epoch": 0, "iter": 2718, "iter_tflops": 47.696078396491195, "iter_time": 0.4325532455444336, "loss": 0.08764030784368515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.69729242061838, "step_time": 0.3915019645690918} +{"epoch": 0, "iter": 2719, "iter_tflops": 51.02980440411558, "iter_time": 0.40429497528076164, "loss": 0.05294648930430412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.53394590612015, "step_time": 0.37150418853759765} +{"epoch": 0, "iter": 2720, "iter_tflops": 50.49346602778246, "iter_time": 0.4085893707275391, "loss": 0.03703305870294571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.425665701639176, "step_time": 0.3722299633026123} +{"epoch": 0, "iter": 2721, "iter_tflops": 37.49737680836532, "iter_time": 0.5502009811401368, "loss": 0.19286556541919708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29471722944055, "step_time": 0.5120049209594726} +{"epoch": 0, "iter": 2722, "iter_tflops": 9.516593609377074, "iter_time": 2.1679073791503907, "loss": 0.2026185691356659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.941404689668174, "step_time": 1.7276940231323241} +{"epoch": 0, "iter": 2723, "iter_tflops": 10.085025758163875, "iter_time": 2.0457154998779297, "loss": 0.23934708535671234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.402171521054937, "step_time": 1.663506546020508} +{"epoch": 0, "iter": 2724, "iter_tflops": 29.543106276656705, "iter_time": 0.698338668823242, "loss": 0.23198357224464417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.23290646218164, "step_time": 0.5541091327667237} +{"epoch": 0, "iter": 2725, "iter_tflops": 17.032058789285518, "iter_time": 0.8824719619750977, "loss": 0.36114999651908875, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 17.884012436239402, "step_time": 0.8404330062866211} +{"epoch": 0, "iter": 2726, "iter_tflops": 10.087525400836403, "iter_time": 1.489990234375, "loss": 0.23959331214427948, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 12.424683298871097, "step_time": 1.2097140808105467} +{"epoch": 0, "iter": 2727, "iter_tflops": 21.943538717715445, "iter_time": 0.684953987121582, "loss": 0.28689253330230713, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.61363869905591, "step_time": 0.636509880065918} +{"epoch": 0, "iter": 2728, "iter_tflops": 24.018224494650084, "iter_time": 0.6257879028320312, "loss": 0.15579473972320557, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 25.776586774911614, "step_time": 0.583099479675293} +{"epoch": 0, "iter": 2729, "iter_tflops": 36.33413636685138, "iter_time": 0.5678157119750977, "loss": 1.0632460117340088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.768952395038845, "step_time": 0.5187738742828369} +{"epoch": 0, "iter": 2730, "iter_tflops": 37.12378713225285, "iter_time": 0.5557378463745117, "loss": 0.9875529408454895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32431459760507, "step_time": 0.5116291179656982} +{"epoch": 0, "iter": 2731, "iter_tflops": 36.6290250374304, "iter_time": 0.5632444076538086, "loss": 0.8725512623786926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.744023510643586, "step_time": 0.5190992679595947} +{"epoch": 0, "iter": 2732, "iter_tflops": 33.87389195170232, "iter_time": 0.6090558929443359, "loss": 0.9254521727561951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.66331620899363, "step_time": 0.5627176055908204} +{"epoch": 0, "iter": 2733, "iter_tflops": 38.9123218056099, "iter_time": 0.5301943588256837, "loss": 0.05312546715140343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23170532980627, "step_time": 0.4772213668823242} +{"epoch": 0, "iter": 2734, "iter_tflops": 41.81446934891409, "iter_time": 0.49339603805542, "loss": 0.03332412987947464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.23231423582518, "step_time": 0.4462483406066894} +{"epoch": 0, "iter": 2735, "iter_tflops": 51.422797352234966, "iter_time": 0.4012051963806152, "loss": 0.051154617220163345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.593902551355626, "step_time": 0.3645462245941161} +{"epoch": 0, "iter": 2736, "iter_tflops": 51.09351926305542, "iter_time": 0.4037908096313477, "loss": 0.06532423198223114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.14553252657268, "step_time": 0.3674574375152588} +{"epoch": 0, "iter": 2737, "iter_tflops": 21.43368350589207, "iter_time": 0.9625547332763671, "loss": 0.5029271841049194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.663452896271234, "step_time": 0.9103243713378907} +{"epoch": 0, "iter": 2738, "iter_tflops": 23.091702851250584, "iter_time": 0.8934418411254882, "loss": 0.7138888835906982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.217513821573956, "step_time": 0.7311449775695802} +{"epoch": 0, "iter": 2739, "iter_tflops": 41.270797173094856, "iter_time": 0.49989568710327154, "loss": 0.5831567645072937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3301703631555, "step_time": 0.4551294059753418} +{"epoch": 0, "iter": 2740, "iter_tflops": 39.10234283702511, "iter_time": 0.5276178359985351, "loss": 0.6688584685325623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79365543050867, "step_time": 0.48210636138916013} +{"epoch": 0, "iter": 2741, "iter_tflops": 19.3445570812905, "iter_time": 1.066506378173828, "loss": 0.4222870469093323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.937784643562686, "step_time": 0.9853522644042969} +{"epoch": 0, "iter": 2742, "iter_tflops": 19.781809558626833, "iter_time": 1.0429325714111328, "loss": 0.5125869512557983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.126586362948874, "step_time": 0.8551186313629149} +{"epoch": 0, "iter": 2743, "iter_tflops": 47.2728151796478, "iter_time": 0.4364261665344238, "loss": 0.47437411546707153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.168739743507906, "step_time": 0.4031972179412842} +{"epoch": 0, "iter": 2744, "iter_tflops": 47.5898947866877, "iter_time": 0.433518367767334, "loss": 0.409912109375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.27464702561444, "step_time": 0.4023644180297851} +{"epoch": 0, "iter": 2745, "iter_tflops": 35.125636125240945, "iter_time": 0.5873514556884766, "loss": 0.362942636013031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.50823436792809, "step_time": 0.5500417137145996} +{"epoch": 0, "iter": 2746, "iter_tflops": 13.490432684434353, "iter_time": 1.5293129577636717, "loss": 0.3781389594078064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.244806193011733, "step_time": 1.1963656349182128} +{"epoch": 0, "iter": 2747, "iter_tflops": 46.68295730127497, "iter_time": 0.4419405860900879, "loss": 0.4070413410663605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49540151764033, "step_time": 0.4085737094879151} +{"epoch": 0, "iter": 2748, "iter_tflops": 48.80891466627762, "iter_time": 0.42269109344482425, "loss": 0.4485297203063965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.954658629811746, "step_time": 0.3895992164611817} +{"epoch": 0, "iter": 2749, "iter_tflops": 25.63211531274128, "iter_time": 0.8048923492431641, "loss": 0.3839391767978668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.969452769072632, "step_time": 0.7649800567626952} +{"epoch": 0, "iter": 2750, "iter_tflops": 13.085879399200183, "iter_time": 1.576592056274414, "loss": 0.37411999702453613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.907034723937247, "step_time": 1.2969792213439941} +{"epoch": 0, "iter": 2751, "iter_tflops": 40.47090596322056, "iter_time": 0.5097759246826172, "loss": 0.2653528153896332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.399773073033245, "step_time": 0.46466664314270023} +{"epoch": 0, "iter": 2752, "iter_tflops": 41.950749399201634, "iter_time": 0.49179320526123044, "loss": 0.3296656012535095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61234564613934, "step_time": 0.45231380271911625} +{"epoch": 0, "iter": 2753, "iter_tflops": 20.320512998530816, "iter_time": 1.0152840881347656, "loss": 0.7554168105125427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.6930689695032, "step_time": 0.9510454025268554} +{"epoch": 0, "iter": 2754, "iter_tflops": 25.60532408516732, "iter_time": 0.805734519958496, "loss": 0.654957115650177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.269232587661737, "step_time": 0.7298073425292968} +{"epoch": 0, "iter": 2755, "iter_tflops": 42.363006578704706, "iter_time": 0.4870073013305664, "loss": 0.5481866598129272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.47581256420099, "step_time": 0.45367179489135745} +{"epoch": 0, "iter": 2756, "iter_tflops": 46.26965704127367, "iter_time": 0.44588818740844716, "loss": 0.9519103765487671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.503949984056234, "step_time": 0.41675651168823247} +{"epoch": 0, "iter": 2757, "iter_tflops": 33.8044809130474, "iter_time": 0.6103064727783203, "loss": 0.94044429063797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.94259685483377, "step_time": 0.5740011940002442} +{"epoch": 0, "iter": 2758, "iter_tflops": 12.351290617051328, "iter_time": 1.670359329223633, "loss": 0.866862416267395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.13304595980238, "step_time": 1.2788095664978025} +{"epoch": 0, "iter": 2759, "iter_tflops": 43.40567922055865, "iter_time": 0.47530862045288086, "loss": 0.8023988008499146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81775202747527, "step_time": 0.4406681785583496} +{"epoch": 0, "iter": 2760, "iter_tflops": 51.211700924885015, "iter_time": 0.40285897827148437, "loss": 0.8801732659339905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.14554385021937, "step_time": 0.3741207733154297} +{"epoch": 0, "iter": 2761, "iter_tflops": 38.62266254211495, "iter_time": 0.5341706695556641, "loss": 0.24063876271247864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.771477679894254, "step_time": 0.49390384674072263} +{"epoch": 0, "iter": 2762, "iter_tflops": 8.598609639867211, "iter_time": 2.399352264404297, "loss": 0.19426752626895905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.616361135639767, "step_time": 1.9433300399780276} +{"epoch": 0, "iter": 2763, "iter_tflops": 9.375337430148631, "iter_time": 2.200570770263672, "loss": 0.11217831075191498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.976775615939832, "step_time": 1.722591636657715} +{"epoch": 0, "iter": 2764, "iter_tflops": 39.09443077955222, "iter_time": 0.5277246170043945, "loss": 0.2674531042575836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08713950709963, "step_time": 0.47882253837585453} +{"epoch": 0, "iter": 2765, "iter_tflops": 15.163073489727068, "iter_time": 1.0506053161621094, "loss": 0.2821284532546997, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 16.20171336480737, "step_time": 0.983254379272461} +{"epoch": 0, "iter": 2766, "iter_tflops": 19.04152063242522, "iter_time": 0.8366141510009766, "loss": 0.3062998354434967, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.376557918686878, "step_time": 0.6039607467651367} +{"epoch": 0, "iter": 2767, "iter_tflops": 26.951287323844404, "iter_time": 0.5910814361572265, "loss": 0.30063924193382263, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.609336326525383, "step_time": 0.5568254165649413} +{"epoch": 0, "iter": 2768, "iter_tflops": 28.52152496654617, "iter_time": 0.5585397567749024, "loss": 0.2988179624080658, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.20653361683651, "step_time": 0.5273827781677246} +{"epoch": 0, "iter": 2769, "iter_tflops": 33.04782278970971, "iter_time": 0.6242799606323243, "loss": 0.8111503720283508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.33604268774154, "step_time": 0.5838541030883789} +{"epoch": 0, "iter": 2770, "iter_tflops": 12.98408923280278, "iter_time": 1.588951919555664, "loss": 0.7360895276069641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.79967194468726, "step_time": 1.3940237045288084} +{"epoch": 0, "iter": 2771, "iter_tflops": 36.029272713219356, "iter_time": 0.572620315551758, "loss": 0.8996450304985046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.24132608890986, "step_time": 0.5257491416931153} +{"epoch": 0, "iter": 2772, "iter_tflops": 39.812399326461964, "iter_time": 0.5182077407836914, "loss": 1.0267233848571777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95069148618696, "step_time": 0.4803436870574951} +{"epoch": 0, "iter": 2773, "iter_tflops": 22.20351941852469, "iter_time": 0.9086327667236329, "loss": 0.08796223998069763, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 23.394945771214477, "step_time": 0.8623591384887695} +{"epoch": 0, "iter": 2774, "iter_tflops": 11.811229883473281, "iter_time": 1.7081070709228514, "loss": 0.09205909818410873, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 14.5642699600167, "step_time": 1.3852287368774414} +{"epoch": 0, "iter": 2775, "iter_tflops": 15.731769878339025, "iter_time": 1.2824269256591794, "loss": 0.11110144108533859, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 20.471467269987418, "step_time": 0.9855104675292968} +{"epoch": 0, "iter": 2776, "iter_tflops": 27.214517207132854, "iter_time": 0.7413265914916992, "loss": 0.12786519527435303, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 34.64423002884547, "step_time": 0.5823435897827148} +{"epoch": 0, "iter": 2777, "iter_tflops": 21.5865212784603, "iter_time": 0.6735831985473634, "loss": 0.28272897005081177, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 22.954305618376075, "step_time": 0.633446216583252} +{"epoch": 0, "iter": 2778, "iter_tflops": 9.61296423127537, "iter_time": 1.5125738220214846, "loss": 0.23610670864582062, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 10.766958859051725, "step_time": 1.3504572868347167} +{"epoch": 0, "iter": 2779, "iter_tflops": 22.866487954720032, "iter_time": 0.6358789367675782, "loss": 0.2155102789402008, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.50355639169585, "step_time": 0.5933962326049805} +{"epoch": 0, "iter": 2780, "iter_tflops": 24.202596126012228, "iter_time": 0.6007751388549805, "loss": 0.39290720224380493, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 25.888529209856202, "step_time": 0.5616509895324707} +{"epoch": 0, "iter": 2781, "iter_tflops": 17.919272375521352, "iter_time": 1.1513354492187498, "loss": 0.17639145255088806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.28295428444272, "step_time": 1.0699135208129884} +{"epoch": 0, "iter": 2782, "iter_tflops": 14.6589227603456, "iter_time": 1.4074085693359373, "loss": 0.18410524725914001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3708855486773, "step_time": 1.1876823120117188} +{"epoch": 0, "iter": 2783, "iter_tflops": 42.4577111284464, "iter_time": 0.48592100143432615, "loss": 0.2256203442811966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50211727521659, "step_time": 0.44365922927856444} +{"epoch": 0, "iter": 2784, "iter_tflops": 43.99702407147761, "iter_time": 0.4689202041625977, "loss": 0.15838736295700073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.836794619766046, "step_time": 0.43128085136413574} +{"epoch": 0, "iter": 2785, "iter_tflops": 18.634272649729002, "iter_time": 1.1071585083007813, "loss": 0.7738173007965088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.022026405900768, "step_time": 1.0304198532104494} +{"epoch": 0, "iter": 2786, "iter_tflops": 15.682500523086222, "iter_time": 1.3155487213134767, "loss": 0.6674915552139282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.804487118478125, "step_time": 1.0971367301940917} +{"epoch": 0, "iter": 2787, "iter_tflops": 36.46472586050517, "iter_time": 0.5657822189331054, "loss": 0.6856226325035095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.816193946545134, "step_time": 0.5181583538055419} +{"epoch": 0, "iter": 2788, "iter_tflops": 36.01575981350678, "iter_time": 0.5728351593017579, "loss": 0.6859143972396851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.19846366059363, "step_time": 0.5263240337371826} +{"epoch": 0, "iter": 2789, "iter_tflops": 16.03264643736724, "iter_time": 1.2868177185058594, "loss": 0.11107916384935379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.117395913740044, "step_time": 1.2052705688476562} +{"epoch": 0, "iter": 2790, "iter_tflops": 16.346456256122885, "iter_time": 1.2621141357421874, "loss": 0.09251727908849716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.92279452360966, "step_time": 0.9000252342224121} +{"epoch": 0, "iter": 2791, "iter_tflops": 48.33407266471958, "iter_time": 0.4268436813354492, "loss": 0.142933189868927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.440884428999865, "step_time": 0.39341620063781746} +{"epoch": 0, "iter": 2792, "iter_tflops": 54.06516682676858, "iter_time": 0.38159677886962895, "loss": 0.12011148780584335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.602739522968875, "step_time": 0.3520499839782715} +{"epoch": 0, "iter": 2793, "iter_tflops": 26.607012281106385, "iter_time": 0.44260914611816404, "loss": 0.005586102604866028, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 29.315864907576575, "step_time": 0.40171105384826655} +{"epoch": 0, "iter": 2794, "iter_tflops": 24.130070831686233, "iter_time": 0.4880427856445313, "loss": 0.021778138354420662, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 27.226238423603718, "step_time": 0.4325425643920898} +{"epoch": 0, "iter": 2795, "iter_tflops": 27.027901333096032, "iter_time": 0.4357166633605957, "loss": 0.005817849189043045, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 29.965038813348137, "step_time": 0.39300823402404783} +{"epoch": 0, "iter": 2796, "iter_tflops": 25.87104842426479, "iter_time": 0.45520022201538085, "loss": 0.015261968597769737, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 28.57619625370738, "step_time": 0.4121089763641358} +{"epoch": 0, "iter": 2797, "iter_tflops": 17.332072289020747, "iter_time": 1.190341995239258, "loss": 0.90183025598526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.40755303459666, "step_time": 1.1207950057983398} +{"epoch": 0, "iter": 2798, "iter_tflops": 17.387035961594776, "iter_time": 1.1865791015625, "loss": 0.9765499830245972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.836850252623854, "step_time": 0.990125343322754} +{"epoch": 0, "iter": 2799, "iter_tflops": 43.74984495418949, "iter_time": 0.47156952285766607, "loss": 0.9908919334411621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1482587425253, "step_time": 0.4375791187286377} +{"epoch": 0, "iter": 2800, "iter_tflops": 43.278026736402445, "iter_time": 0.4767105865478515, "loss": 1.038620948791504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.506398327223046, "step_time": 0.44361838912963864} +{"epoch": 0, "iter": 2801, "iter_tflops": 48.75977905834481, "iter_time": 0.42311704254150384, "loss": 0.28127193450927734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53321209408994, "step_time": 0.385388671875} +{"epoch": 0, "iter": 2802, "iter_tflops": 47.976070864971646, "iter_time": 0.4300288276672363, "loss": 0.24099329113960266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.320866758870245, "step_time": 0.3943186492919922} +{"epoch": 0, "iter": 2803, "iter_tflops": 54.40594301280913, "iter_time": 0.37920661544799805, "loss": 0.21290245652198792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.18262351671862, "step_time": 0.3486005229949951} +{"epoch": 0, "iter": 2804, "iter_tflops": 49.49198495260337, "iter_time": 0.41685726547241214, "loss": 0.23364107310771942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.94162286159964, "step_time": 0.382470760345459} +{"epoch": 0, "iter": 2805, "iter_tflops": 39.93109211650493, "iter_time": 0.4133215293884277, "loss": 0.060611698776483536, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 43.725907628944306, "step_time": 0.37745082855224604} +{"epoch": 0, "iter": 2806, "iter_tflops": 40.95808424733164, "iter_time": 0.40295781326293945, "loss": 0.07686919718980789, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 44.95957770457932, "step_time": 0.36709375190734866} +{"epoch": 0, "iter": 2807, "iter_tflops": 44.397556466470824, "iter_time": 0.3717407302856445, "loss": 0.08778078109025955, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 48.631604153637404, "step_time": 0.3393756046295166} +{"epoch": 0, "iter": 2808, "iter_tflops": 47.83281689249189, "iter_time": 0.3450430297851563, "loss": 0.045396555215120316, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 52.25433224735885, "step_time": 0.3158471145629883} +{"epoch": 0, "iter": 2809, "iter_tflops": 23.305320745489286, "iter_time": 0.8852525024414062, "loss": 0.4549855887889862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.366249010610346, "step_time": 0.8467078170776366} +{"epoch": 0, "iter": 2810, "iter_tflops": 12.799909817817115, "iter_time": 1.6118155364990232, "loss": 0.7499099969863892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.2532138453526, "step_time": 1.2693547077178957} +{"epoch": 0, "iter": 2811, "iter_tflops": 46.99059437070589, "iter_time": 0.43904729843139645, "loss": 0.6780006289482117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.258200068948014, "step_time": 0.4024935226440429} +{"epoch": 0, "iter": 2812, "iter_tflops": 51.9840100601918, "iter_time": 0.39687383651733404, "loss": 0.5091877579689026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.97373885669388, "step_time": 0.3685852317810059} +{"epoch": 0, "iter": 2813, "iter_tflops": 21.268214438857346, "iter_time": 0.4285667762756347, "loss": 0.02071249857544899, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 23.253801199401522, "step_time": 0.39197247886657716} +{"epoch": 0, "iter": 2814, "iter_tflops": 3.6406772789049677, "iter_time": 2.503613861083984, "loss": 0.00871362816542387, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 4.302563145771521, "step_time": 2.118469802856445} +{"epoch": 0, "iter": 2815, "iter_tflops": 5.024564574370555, "iter_time": 1.8140577087402343, "loss": 0.004737562499940395, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 5.863682631358919, "step_time": 1.5544582939147948} +{"epoch": 0, "iter": 2816, "iter_tflops": 17.420566251799208, "iter_time": 0.5232235260009767, "loss": 0.01588217169046402, "lr": 3e-05, "seqlen": 3680.0, "step_tflops": 19.564492842368114, "step_time": 0.4658873691558838} +{"epoch": 0, "iter": 2817, "iter_tflops": 25.958285145769413, "iter_time": 0.6611177597045897, "loss": 0.2488536238670349, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 27.831013840586923, "step_time": 0.6166316261291503} +{"epoch": 0, "iter": 2818, "iter_tflops": 31.007944127653072, "iter_time": 0.5534544067382813, "loss": 0.3879221975803375, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 33.06045901369471, "step_time": 0.5190939216613769} +{"epoch": 0, "iter": 2819, "iter_tflops": 30.197000964138965, "iter_time": 0.5683174743652344, "loss": 0.3322163224220276, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 31.99761164649425, "step_time": 0.53633638381958} +{"epoch": 0, "iter": 2820, "iter_tflops": 30.686487682025362, "iter_time": 0.5592521209716796, "loss": 0.1992415189743042, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 32.642915913483975, "step_time": 0.525733772277832} +{"epoch": 0, "iter": 2821, "iter_tflops": 40.39500453601655, "iter_time": 0.5107337837219238, "loss": 0.42037367820739746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35211595458396, "step_time": 0.46516593551635743} +{"epoch": 0, "iter": 2822, "iter_tflops": 17.41927623908973, "iter_time": 1.1843829345703125, "loss": 0.4177468419075012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.74885854653086, "step_time": 1.0446727066040038} +{"epoch": 0, "iter": 2823, "iter_tflops": 39.33747260138665, "iter_time": 0.5244641342163086, "loss": 0.5401628613471985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.047434120522055, "step_time": 0.47926418685913086} +{"epoch": 0, "iter": 2824, "iter_tflops": 40.414733787698346, "iter_time": 0.5104844589233398, "loss": 0.2736656367778778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73226807998123, "step_time": 0.4717590560913086} +{"epoch": 0, "iter": 2825, "iter_tflops": 20.013036044608747, "iter_time": 1.030882743835449, "loss": 0.9264954924583435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.025835966922074, "step_time": 0.9812258377075196} +{"epoch": 0, "iter": 2826, "iter_tflops": 10.412331502193542, "iter_time": 1.9814095916748045, "loss": 0.961953341960907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.160408615865002, "step_time": 1.5676635971069335} +{"epoch": 0, "iter": 2827, "iter_tflops": 16.306547813712697, "iter_time": 1.2652030181884766, "loss": 1.094765305519104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.553249442118968, "step_time": 1.0037874336242676} +{"epoch": 0, "iter": 2828, "iter_tflops": 37.50298593152769, "iter_time": 0.5501186904907227, "loss": 0.9729037880897522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.76381784253393, "step_time": 0.5061128864288329} +{"epoch": 0, "iter": 2829, "iter_tflops": 16.59669790832165, "iter_time": 0.8613505401611328, "loss": 0.23294377326965332, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 17.638310063934295, "step_time": 0.810484375} +{"epoch": 0, "iter": 2830, "iter_tflops": 5.911110232776107, "iter_time": 2.4184246520996093, "loss": 0.2310488075017929, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 7.302323914921419, "step_time": 1.957674690246582} +{"epoch": 0, "iter": 2831, "iter_tflops": 8.429378112511014, "iter_time": 1.6959228210449218, "loss": 0.13968278467655182, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 9.762977049291614, "step_time": 1.464263885498047} +{"epoch": 0, "iter": 2832, "iter_tflops": 21.495216286910328, "iter_time": 0.6650584259033203, "loss": 0.22105245292186737, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 23.241525243711227, "step_time": 0.615087631225586} +{"epoch": 0, "iter": 2833, "iter_tflops": 18.171782621114097, "iter_time": 0.8406201858520508, "loss": 0.41823288798332214, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.322465734096973, "step_time": 0.7905599365234375} +{"epoch": 0, "iter": 2834, "iter_tflops": 7.426703318677703, "iter_time": 2.0568436126708987, "loss": 0.34233343601226807, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.796908600506407, "step_time": 1.7364699325561523} +{"epoch": 0, "iter": 2835, "iter_tflops": 7.820436954259698, "iter_time": 1.9532882080078122, "loss": 0.3507070243358612, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.785674837823702, "step_time": 1.7386902618408202} +{"epoch": 0, "iter": 2836, "iter_tflops": 22.4676538652321, "iter_time": 0.6798915176391602, "loss": 0.2631174921989441, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.116592186023393, "step_time": 0.6334048843383788} +{"epoch": 0, "iter": 2837, "iter_tflops": 17.945077011581855, "iter_time": 0.8421277008056641, "loss": 0.2509123980998993, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 19.311635966275425, "step_time": 0.7825357971191406} +{"epoch": 0, "iter": 2838, "iter_tflops": 7.6592250743249926, "iter_time": 1.9730516204833985, "loss": 0.4569726288318634, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 8.473192854791602, "step_time": 1.7835126266479493} +{"epoch": 0, "iter": 2839, "iter_tflops": 8.152324537844741, "iter_time": 1.8537101135253908, "loss": 0.28850245475769043, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 10.620627502088228, "step_time": 1.4228958168029784} +{"epoch": 0, "iter": 2840, "iter_tflops": 22.823201950837213, "iter_time": 0.6621352462768554, "loss": 0.39465293288230896, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.396823574612927, "step_time": 0.6194268035888671} +{"epoch": 0, "iter": 2841, "iter_tflops": 11.72123837217853, "iter_time": 1.2962621307373048, "loss": 0.347854346036911, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.50512535258995, "step_time": 1.2150056076049804} +{"epoch": 0, "iter": 2842, "iter_tflops": 9.677666768907768, "iter_time": 1.5699855957031248, "loss": 0.41325488686561584, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.046341342021572, "step_time": 1.0098001289367675} +{"epoch": 0, "iter": 2843, "iter_tflops": 27.27187733204572, "iter_time": 0.5571232681274414, "loss": 0.35178035497665405, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.050539729535974, "step_time": 0.5230125694274903} +{"epoch": 0, "iter": 2844, "iter_tflops": 28.339479773625822, "iter_time": 0.536135368347168, "loss": 0.28820207715034485, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 30.097668736268457, "step_time": 0.504816421508789} +{"epoch": 0, "iter": 2845, "iter_tflops": 48.9634807385294, "iter_time": 0.42135675811767576, "loss": 0.11230068653821945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.148693690597526, "step_time": 0.38100814819335943} +{"epoch": 0, "iter": 2846, "iter_tflops": 49.911866401039305, "iter_time": 0.41335047149658205, "loss": 0.10163562744855881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.522997433392725, "step_time": 0.3783925037384033} +{"epoch": 0, "iter": 2847, "iter_tflops": 52.835186070523584, "iter_time": 0.39048019027709957, "loss": 0.07102890312671661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.615504593468955, "step_time": 0.3580823192596435} +{"epoch": 0, "iter": 2848, "iter_tflops": 54.55585607386857, "iter_time": 0.3781646003723144, "loss": 0.1450739949941635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.35994165246098, "step_time": 0.3475591945648193} +{"epoch": 0, "iter": 2849, "iter_tflops": 48.92673125553897, "iter_time": 0.4216732444763183, "loss": 0.3562660217285156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.65281754682257, "step_time": 0.38452954483032226} +{"epoch": 0, "iter": 2850, "iter_tflops": 46.48980714595623, "iter_time": 0.44377670669555663, "loss": 0.42147308588027954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.55027212170228, "step_time": 0.40813021659851073} +{"epoch": 0, "iter": 2851, "iter_tflops": 50.418854451688084, "iter_time": 0.40919401550292966, "loss": 0.3524976074695587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.71289844139091, "step_time": 0.3770791549682617} +{"epoch": 0, "iter": 2852, "iter_tflops": 48.8872536183456, "iter_time": 0.4220137557983399, "loss": 0.3197716474533081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.017382300409125, "step_time": 0.3891382904052734} +{"epoch": 0, "iter": 2853, "iter_tflops": 42.80147090338474, "iter_time": 0.4820183296203613, "loss": 1.1638998985290527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85062068187026, "step_time": 0.4403590221405029} +{"epoch": 0, "iter": 2854, "iter_tflops": 43.2416491237499, "iter_time": 0.4771116256713867, "loss": 0.8954321146011353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.447885690999755, "step_time": 0.4441772365570068} +{"epoch": 0, "iter": 2855, "iter_tflops": 43.07148514478696, "iter_time": 0.478996566772461, "loss": 0.7621138095855713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03002797201156, "step_time": 0.4482094497680664} +{"epoch": 0, "iter": 2856, "iter_tflops": 41.29705346415437, "iter_time": 0.4995778579711914, "loss": 0.7090239524841309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16258731457735, "step_time": 0.4671622467041015} +{"epoch": 0, "iter": 2857, "iter_tflops": 26.76972220474999, "iter_time": 0.40970816421508793, "loss": 0.017154010012745857, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 29.660236914401697, "step_time": 0.3697803821563721} +{"epoch": 0, "iter": 2858, "iter_tflops": 25.698792670437903, "iter_time": 0.42678167343139645, "loss": 0.0017916916403919458, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 29.529276512690977, "step_time": 0.37142033386230466} +{"epoch": 0, "iter": 2859, "iter_tflops": 30.745986040308168, "iter_time": 0.35672213363647465, "loss": 0.005368524696677923, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 33.76378849561736, "step_time": 0.32483836174011227} +{"epoch": 0, "iter": 2860, "iter_tflops": 34.464025672234946, "iter_time": 0.31823832321166995, "loss": 0.008000570349395275, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 37.92492156253059, "step_time": 0.2891970052719116} +{"epoch": 0, "iter": 2861, "iter_tflops": 42.060075241305384, "iter_time": 0.3865489463806152, "loss": 0.007957011461257935, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 46.23234614922023, "step_time": 0.35166456222534176} +{"epoch": 0, "iter": 2862, "iter_tflops": 24.038141930724596, "iter_time": 0.6763533477783203, "loss": 0.00597202917560935, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 29.723961237306845, "step_time": 0.5469754734039307} +{"epoch": 0, "iter": 2863, "iter_tflops": 35.23773324652975, "iter_time": 0.46138829803466797, "loss": 0.00498443515971303, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 39.04542413305043, "step_time": 0.4163939342498779} +{"epoch": 0, "iter": 2864, "iter_tflops": 38.26928838813787, "iter_time": 0.4248387794494629, "loss": 0.012868634425103664, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 42.19043544504391, "step_time": 0.38535458564758307} +{"epoch": 0, "iter": 2865, "iter_tflops": 36.115771668837056, "iter_time": 0.5712488632202148, "loss": 0.7727307081222534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.437136437205, "step_time": 0.5231387310028076} +{"epoch": 0, "iter": 2866, "iter_tflops": 43.71093338988688, "iter_time": 0.47198931503295904, "loss": 0.8406428694725037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25732069864162, "step_time": 0.43656925964355475} +{"epoch": 0, "iter": 2867, "iter_tflops": 43.23101799166656, "iter_time": 0.4772289543151856, "loss": 0.8707781434059143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.09710442469831, "step_time": 0.4475572547912598} +{"epoch": 0, "iter": 2868, "iter_tflops": 44.01986039623616, "iter_time": 0.46867694091796874, "loss": 0.865778386592865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.442287314625, "step_time": 0.4348671760559082} +{"epoch": 0, "iter": 2869, "iter_tflops": 36.46343852032599, "iter_time": 0.4222930717468262, "loss": 0.11687839776277542, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 40.199228452185224, "step_time": 0.3830485820770264} +{"epoch": 0, "iter": 2870, "iter_tflops": 37.87321635586133, "iter_time": 0.4065737991333008, "loss": 0.11072321981191635, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 41.64008872794686, "step_time": 0.36979405975341795} +{"epoch": 0, "iter": 2871, "iter_tflops": 40.119556000481, "iter_time": 0.383809268951416, "loss": 0.08435814082622528, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 43.82198730760584, "step_time": 0.3513819980621338} +{"epoch": 0, "iter": 2872, "iter_tflops": 42.95182458426794, "iter_time": 0.358500659942627, "loss": 0.1410038322210312, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 46.817249504241055, "step_time": 0.32890136909484863} +{"epoch": 0, "iter": 2873, "iter_tflops": 32.29225798019595, "iter_time": 0.6388866806030273, "loss": 0.8127099275588989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.41819733193287, "step_time": 0.599423999786377} +{"epoch": 0, "iter": 2874, "iter_tflops": 11.772265755744101, "iter_time": 1.752516799926758, "loss": 0.5220952033996582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.396837728076797, "step_time": 1.5399972686767578} +{"epoch": 0, "iter": 2875, "iter_tflops": 10.8592843764492, "iter_time": 1.8998575592041014, "loss": 0.548622190952301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.500272941179018, "step_time": 1.6504514427185057} +{"epoch": 0, "iter": 2876, "iter_tflops": 23.315443906438194, "iter_time": 0.8848681411743164, "loss": 0.6383880972862244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.10192441817096, "step_time": 0.5560653209686279} +{"epoch": 0, "iter": 2877, "iter_tflops": 21.98330688296022, "iter_time": 0.6892920913696289, "loss": 0.27516359090805054, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.967141074021864, "step_time": 0.6322372589111328} +{"epoch": 0, "iter": 2878, "iter_tflops": 24.25384052956752, "iter_time": 0.6247637176513672, "loss": 0.2852400243282318, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 26.079514473499493, "step_time": 0.5810276718139648} +{"epoch": 0, "iter": 2879, "iter_tflops": 23.598017375793543, "iter_time": 0.6421268081665039, "loss": 0.281155526638031, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.431231196180207, "step_time": 0.5958390083312989} +{"epoch": 0, "iter": 2880, "iter_tflops": 24.892988109285785, "iter_time": 0.6087224044799805, "loss": 0.24258366227149963, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 26.573855660271327, "step_time": 0.5702190818786622} +{"epoch": 0, "iter": 2881, "iter_tflops": 19.638885678918673, "iter_time": 1.0505226135253907, "loss": 0.8641887307167053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.09411424238515, "step_time": 0.9780497665405273} +{"epoch": 0, "iter": 2882, "iter_tflops": 19.112901223499485, "iter_time": 1.0794328536987305, "loss": 0.9118221998214722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.841916056032623, "step_time": 0.9032120361328125} +{"epoch": 0, "iter": 2883, "iter_tflops": 33.76298271956872, "iter_time": 0.6110566024780274, "loss": 1.141448736190796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.83356562128402, "step_time": 0.5601166534423828} +{"epoch": 0, "iter": 2884, "iter_tflops": 39.808980054436404, "iter_time": 0.5182522506713867, "loss": 1.1949235200881958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49912466062995, "step_time": 0.47428755569458003} +{"epoch": 0, "iter": 2885, "iter_tflops": 31.101292028291073, "iter_time": 0.6633516540527344, "loss": 0.6114004254341125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.688491500768365, "step_time": 0.6124077568054199} +{"epoch": 0, "iter": 2886, "iter_tflops": 12.417062031962871, "iter_time": 1.661511672973633, "loss": 0.6113349795341492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.195044872877949, "step_time": 1.4534010772705077} +{"epoch": 0, "iter": 2887, "iter_tflops": 39.8255145207042, "iter_time": 0.5180370864868165, "loss": 0.6085333824157715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31964036336049, "step_time": 0.45523515510559087} +{"epoch": 0, "iter": 2888, "iter_tflops": 48.514552185051265, "iter_time": 0.42525577545166016, "loss": 0.6910075545310974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.459428045987025, "step_time": 0.39327713394165037} +{"epoch": 0, "iter": 2889, "iter_tflops": 29.489053595331786, "iter_time": 0.6996187057495117, "loss": 0.005972875747829676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.27897369301304, "step_time": 0.6595834541320802} +{"epoch": 0, "iter": 2890, "iter_tflops": 15.248370200861892, "iter_time": 1.353003189086914, "loss": 0.014359044842422009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.677187028277327, "step_time": 1.0484777870178221} +{"epoch": 0, "iter": 2891, "iter_tflops": 50.03823636117977, "iter_time": 0.41230656814575195, "loss": 0.022507986053824425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.455116722409294, "step_time": 0.37203227996826177} +{"epoch": 0, "iter": 2892, "iter_tflops": 45.82362446872173, "iter_time": 0.4502283210754394, "loss": 0.02812148630619049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.12787885584206, "step_time": 0.4115692501068115} +{"epoch": 0, "iter": 2893, "iter_tflops": 23.189301905180027, "iter_time": 0.889681526184082, "loss": 0.4079503118991852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.179117408186524, "step_time": 0.8193731803894043} +{"epoch": 0, "iter": 2894, "iter_tflops": 18.352585168688403, "iter_time": 1.1241519012451173, "loss": 0.2844667136669159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.730922850300562, "step_time": 0.8342225494384765} +{"epoch": 0, "iter": 2895, "iter_tflops": 43.33119609454997, "iter_time": 0.4761256408691406, "loss": 0.3991183042526245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.14572141093346, "step_time": 0.43760266876220705} +{"epoch": 0, "iter": 2896, "iter_tflops": 41.449015599374924, "iter_time": 0.4977462844848633, "loss": 0.23831558227539062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.943124478166226, "step_time": 0.4590489368438721} +{"epoch": 0, "iter": 2897, "iter_tflops": 17.6986885083347, "iter_time": 1.1656848754882811, "loss": 0.9102758169174194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.827162694127765, "step_time": 1.095815330505371} +{"epoch": 0, "iter": 2898, "iter_tflops": 14.105028961243395, "iter_time": 1.4626764373779297, "loss": 0.8418840169906616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.197060123606743, "step_time": 1.1337597045898438} +{"epoch": 0, "iter": 2899, "iter_tflops": 49.32446572392658, "iter_time": 0.41827302551269535, "loss": 1.2216137647628784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.50519910686178, "step_time": 0.38559044456481933} +{"epoch": 0, "iter": 2900, "iter_tflops": 46.46512259721898, "iter_time": 0.44401246261596683, "loss": 0.7475628852844238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94517644599208, "step_time": 0.41307479476928705} +{"epoch": 0, "iter": 2901, "iter_tflops": 2.7116630906742496, "iter_time": 0.5924552078247071, "loss": 0.7063362002372742, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.902156993380824, "step_time": 0.5535671997070313} +{"epoch": 0, "iter": 2902, "iter_tflops": 1.0407165178354034, "iter_time": 1.5436854248046876, "loss": 0.7822802066802979, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.2939776119204687, "step_time": 1.2415507850646972} +{"epoch": 0, "iter": 2903, "iter_tflops": 3.753108629532164, "iter_time": 0.428055534362793, "loss": 0.5833399891853333, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.064531591260797, "step_time": 0.3952580718994141} +{"epoch": 0, "iter": 2904, "iter_tflops": 3.675332082779472, "iter_time": 0.43711394882202154, "loss": 0.7210914492607117, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.978330552372291, "step_time": 0.4038223819732666} +{"epoch": 0, "iter": 2905, "iter_tflops": 28.69959643732395, "iter_time": 0.7188635406494142, "loss": 0.9100914001464844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.389058357700787, "step_time": 0.6788987426757813} +{"epoch": 0, "iter": 2906, "iter_tflops": 8.631333450382312, "iter_time": 2.390255645751953, "loss": 0.8706722259521484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.99358795422902, "step_time": 2.064433074951172} +{"epoch": 0, "iter": 2907, "iter_tflops": 13.85360922598687, "iter_time": 1.4892215576171877, "loss": 0.8519224524497986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.772994850504524, "step_time": 1.3080010299682618} +{"epoch": 0, "iter": 2908, "iter_tflops": 22.405936805241563, "iter_time": 0.9207869186401367, "loss": 0.8630707263946533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.376720238366328, "step_time": 0.7535998954772949} +{"epoch": 0, "iter": 2909, "iter_tflops": 22.79475591832306, "iter_time": 0.6701351547241211, "loss": 0.19717289507389069, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.251344116390126, "step_time": 0.6298853874206543} +{"epoch": 0, "iter": 2910, "iter_tflops": 9.477040456745488, "iter_time": 1.6118499603271486, "loss": 0.2719546854496002, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.430993120601453, "step_time": 1.3363289718627929} +{"epoch": 0, "iter": 2911, "iter_tflops": 26.008621105226936, "iter_time": 0.5873270721435546, "loss": 0.3614460825920105, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.67031742737372, "step_time": 0.5520560913085937} +{"epoch": 0, "iter": 2912, "iter_tflops": 27.9920744043941, "iter_time": 0.5457104415893554, "loss": 0.23505018651485443, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.758249937684926, "step_time": 0.5133220977783204} +{"epoch": 0, "iter": 2913, "iter_tflops": 37.59661984736996, "iter_time": 0.5487486267089844, "loss": 0.7770223021507263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29024300454358, "step_time": 0.5120617790222167} +{"epoch": 0, "iter": 2914, "iter_tflops": 21.139128842751, "iter_time": 0.9759670639038087, "loss": 0.629094123840332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.480669813849207, "step_time": 0.8786416091918945} +{"epoch": 0, "iter": 2915, "iter_tflops": 43.10268536544398, "iter_time": 0.4786498413085938, "loss": 0.7533090710639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50698300431351, "step_time": 0.4436128120422363} +{"epoch": 0, "iter": 2916, "iter_tflops": 42.72815098341144, "iter_time": 0.4828454551696777, "loss": 0.7011898756027222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22188479539853, "step_time": 0.4463490314483643} +{"epoch": 0, "iter": 2917, "iter_tflops": 23.563691574550546, "iter_time": 0.8755458984375001, "loss": 0.9230229258537292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.225474438927353, "step_time": 0.8178674125671387} +{"epoch": 0, "iter": 2918, "iter_tflops": 27.053207049639987, "iter_time": 0.7626117477416993, "loss": 0.7738781571388245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64600751805945, "step_time": 0.5338479919433594} +{"epoch": 0, "iter": 2919, "iter_tflops": 43.54328796315873, "iter_time": 0.47380651473999025, "loss": 0.6970469951629639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.68957190836734, "step_time": 0.4418779754638672} +{"epoch": 0, "iter": 2920, "iter_tflops": 43.34270914729932, "iter_time": 0.4759991683959961, "loss": 0.6849551796913147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.686924712145476, "step_time": 0.4419030303955078} +{"epoch": 0, "iter": 2921, "iter_tflops": 13.713635731143754, "iter_time": 0.8056625061035156, "loss": 0.005453546065837145, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 14.492344820487014, "step_time": 0.7623722915649414} +{"epoch": 0, "iter": 2922, "iter_tflops": 11.498507012804875, "iter_time": 0.9608692779541016, "loss": 0.006800740025937557, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 15.34175339954684, "step_time": 0.7201629333496093} +{"epoch": 0, "iter": 2923, "iter_tflops": 30.74081650099825, "iter_time": 0.35941017150878907, "loss": 0.006460594478994608, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 33.76238171084349, "step_time": 0.32724474906921386} +{"epoch": 0, "iter": 2924, "iter_tflops": 30.146903380927714, "iter_time": 0.366490779876709, "loss": 0.003946101292967796, "lr": 3e-05, "seqlen": 4448.0, "step_tflops": 33.12162386362396, "step_time": 0.333575496673584} +{"epoch": 0, "iter": 2925, "iter_tflops": 26.57527400739384, "iter_time": 0.7763266525268554, "loss": 0.49884286522865295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.046348440345263, "step_time": 0.7356071166992187} +{"epoch": 0, "iter": 2926, "iter_tflops": 13.856285854233581, "iter_time": 1.4889338836669919, "loss": 0.5774850845336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.898364641883699, "step_time": 1.2976865215301514} +{"epoch": 0, "iter": 2927, "iter_tflops": 47.30022335732668, "iter_time": 0.43617327880859375, "loss": 0.49153390526771545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60848375479681, "step_time": 0.3997616672515869} +{"epoch": 0, "iter": 2928, "iter_tflops": 47.75683070575527, "iter_time": 0.432002986907959, "loss": 0.4428902566432953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.741403386398495, "step_time": 0.39873471069335936} +{"epoch": 0, "iter": 2929, "iter_tflops": 31.284050439674132, "iter_time": 0.659476417541504, "loss": 0.09610585123300552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.213783281649, "step_time": 0.6211605987548828} +{"epoch": 0, "iter": 2930, "iter_tflops": 20.603351953116253, "iter_time": 1.0013464584350587, "loss": 0.18321767449378967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.992906181848102, "step_time": 0.8254779720306397} +{"epoch": 0, "iter": 2931, "iter_tflops": 40.68653824966682, "iter_time": 0.5070741920471191, "loss": 0.1413843184709549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73922454526445, "step_time": 0.4611410617828369} +{"epoch": 0, "iter": 2932, "iter_tflops": 41.68980266466067, "iter_time": 0.4948714599609375, "loss": 0.19694849848747253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.833639162885255, "step_time": 0.4501299457550049} +{"epoch": 0, "iter": 2933, "iter_tflops": 17.52419897732337, "iter_time": 1.1772916717529296, "loss": 0.042808905243873596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.334563782669328, "step_time": 1.125256851196289} +{"epoch": 0, "iter": 2934, "iter_tflops": 19.922133844759866, "iter_time": 1.0355865325927736, "loss": 0.03898772969841957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.0851970114541, "step_time": 0.8224409599304199} +{"epoch": 0, "iter": 2935, "iter_tflops": 54.99078569090312, "iter_time": 0.3751736450195313, "loss": 0.030104171484708786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.09116774195849, "step_time": 0.34332988166809086} +{"epoch": 0, "iter": 2936, "iter_tflops": 55.93395391409046, "iter_time": 0.3688474006652832, "loss": 0.09716160595417023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.04749763629157, "step_time": 0.3379515018463135} +{"epoch": 0, "iter": 2937, "iter_tflops": 28.67746536711636, "iter_time": 0.7194183044433593, "loss": 1.146738886833191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.275452536304492, "step_time": 0.6814462471008302} +{"epoch": 0, "iter": 2938, "iter_tflops": 10.844282208359466, "iter_time": 1.902485855102539, "loss": 0.8177820444107056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.619767473996115, "step_time": 1.634823585510254} +{"epoch": 0, "iter": 2939, "iter_tflops": 11.950206907464741, "iter_time": 1.7264214477539062, "loss": 0.9490982294082642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.009553367334984, "step_time": 1.374530807495117} +{"epoch": 0, "iter": 2940, "iter_tflops": 18.618888449022318, "iter_time": 1.1080733184814453, "loss": 0.8348636627197266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.013720488115148, "step_time": 0.9371924896240235} +{"epoch": 0, "iter": 2941, "iter_tflops": 18.081107525594916, "iter_time": 0.9014537963867187, "loss": 0.2770218551158905, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 19.512754673712067, "step_time": 0.8353143005371094} +{"epoch": 0, "iter": 2942, "iter_tflops": 6.319500075555828, "iter_time": 2.579204498291016, "loss": 0.371514230966568, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 7.757537755806933, "step_time": 2.1010897445678713} +{"epoch": 0, "iter": 2943, "iter_tflops": 11.499573946714868, "iter_time": 1.4173814697265625, "loss": 0.2915172576904297, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.454230083812371, "step_time": 1.1276479568481446} +{"epoch": 0, "iter": 2944, "iter_tflops": 27.177315332890892, "iter_time": 0.599738525390625, "loss": 0.2534981667995453, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 28.873217992618166, "step_time": 0.5645121726989746} +{"epoch": 0, "iter": 2945, "iter_tflops": 24.818203115693652, "iter_time": 0.615498519897461, "loss": 0.21933795511722565, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.634215288606164, "step_time": 0.5735317192077637} +{"epoch": 0, "iter": 2946, "iter_tflops": 26.747136391721448, "iter_time": 0.5711103820800781, "loss": 0.3998997211456299, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.43956984554074, "step_time": 0.5371237106323242} +{"epoch": 0, "iter": 2947, "iter_tflops": 26.21993710902518, "iter_time": 0.5825935897827148, "loss": 0.24744068086147308, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.861160482638493, "step_time": 0.5482746238708496} +{"epoch": 0, "iter": 2948, "iter_tflops": 26.075342052160345, "iter_time": 0.585824234008789, "loss": 0.3235630989074707, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.725414522301964, "step_time": 0.5509590225219727} +{"epoch": 0, "iter": 2949, "iter_tflops": 28.04346839848977, "iter_time": 0.7356826629638671, "loss": 0.6961610913276672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.775217441703962, "step_time": 0.6928948059082031} +{"epoch": 0, "iter": 2950, "iter_tflops": 15.321309277276622, "iter_time": 1.346562042236328, "loss": 0.9734313488006592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.29966425004431, "step_time": 1.1274028434753416} +{"epoch": 0, "iter": 2951, "iter_tflops": 40.91911078338461, "iter_time": 0.5041921272277832, "loss": 0.7206240296363831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.94515441964334, "step_time": 0.45902820396423344} +{"epoch": 0, "iter": 2952, "iter_tflops": 38.68439640220741, "iter_time": 0.5333182220458985, "loss": 0.9457762837409973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.119962702214075, "step_time": 0.4898174686431884} +{"epoch": 0, "iter": 2953, "iter_tflops": 16.553939959072604, "iter_time": 0.7554685287475585, "loss": 0.0336826927959919, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 18.3518466663863, "step_time": 0.6814562530517577} +{"epoch": 0, "iter": 2954, "iter_tflops": 28.70766543764843, "iter_time": 0.43563210296630855, "loss": 0.03586504980921745, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 31.803214741250653, "step_time": 0.39323007965087886} +{"epoch": 0, "iter": 2955, "iter_tflops": 33.06683721985611, "iter_time": 0.37820310974121096, "loss": 0.0729357898235321, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 36.232256924305574, "step_time": 0.3451615142822265} +{"epoch": 0, "iter": 2956, "iter_tflops": 35.03397049004616, "iter_time": 0.3569672660827637, "loss": 0.0504457913339138, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 38.13179688431005, "step_time": 0.32796725273132327} +{"epoch": 0, "iter": 2957, "iter_tflops": 42.02146127274535, "iter_time": 0.4909656372070313, "loss": 0.2821628153324127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79287321751829, "step_time": 0.4505306625366211} +{"epoch": 0, "iter": 2958, "iter_tflops": 45.61350106986557, "iter_time": 0.4523023452758789, "loss": 0.23183953762054443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.760854999560145, "step_time": 0.40643707656860356} +{"epoch": 0, "iter": 2959, "iter_tflops": 47.46616408428053, "iter_time": 0.43464842605590814, "loss": 0.3419008255004883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44771835289684, "step_time": 0.40101085472106934} +{"epoch": 0, "iter": 2960, "iter_tflops": 50.730805061916755, "iter_time": 0.4066778259277344, "loss": 0.3374044895172119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.8971221474014, "step_time": 0.3758137531280517} +{"epoch": 0, "iter": 2961, "iter_tflops": 42.748325687709034, "iter_time": 0.4826175804138183, "loss": 0.23031555116176605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54870175623326, "step_time": 0.44321522903442384} +{"epoch": 0, "iter": 2962, "iter_tflops": 12.809811985361875, "iter_time": 1.610569580078125, "loss": 0.3074718713760376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.77486672712987, "step_time": 1.4977345275878906} +{"epoch": 0, "iter": 2963, "iter_tflops": 17.124717677609098, "iter_time": 1.2047552490234374, "loss": 0.27577584981918335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.913338712535374, "step_time": 1.0908223991394044} +{"epoch": 0, "iter": 2964, "iter_tflops": 27.979582813738574, "iter_time": 0.7373624420166015, "loss": 0.2896149754524231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.698719963931566, "step_time": 0.5779225006103516} +{"epoch": 0, "iter": 2965, "iter_tflops": 14.921488220719196, "iter_time": 1.0621249389648437, "loss": 0.4392889738082886, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 15.969704266048113, "step_time": 0.992409408569336} +{"epoch": 0, "iter": 2966, "iter_tflops": 20.586137836296814, "iter_time": 0.7698619766235352, "loss": 0.37384456396102905, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.08778985679888, "step_time": 0.6317210426330566} +{"epoch": 0, "iter": 2967, "iter_tflops": 28.317598039816346, "iter_time": 0.5596691055297851, "loss": 0.4466608166694641, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.09282074349592, "step_time": 0.5266533470153809} +{"epoch": 0, "iter": 2968, "iter_tflops": 28.886777864437903, "iter_time": 0.5486414871215821, "loss": 0.37079572677612305, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.73881151785002, "step_time": 0.5155854759216308} +{"epoch": 0, "iter": 2969, "iter_tflops": 43.54498305664293, "iter_time": 0.4737880706787109, "loss": 0.32899510860443115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.625130788592166, "step_time": 0.43319762420654295} +{"epoch": 0, "iter": 2970, "iter_tflops": 10.498825317018529, "iter_time": 1.965085891723633, "loss": 0.3746780455112457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.604735203201134, "step_time": 1.5164641723632815} +{"epoch": 0, "iter": 2971, "iter_tflops": 14.860777733838201, "iter_time": 1.3882916412353519, "loss": 0.3206253945827484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.77615404534979, "step_time": 1.2297868423461915} +{"epoch": 0, "iter": 2972, "iter_tflops": 15.401490028140902, "iter_time": 1.339551788330078, "loss": 0.30327627062797546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.21464233936508, "step_time": 1.132665309906006} +{"epoch": 0, "iter": 2973, "iter_tflops": 23.38382736981355, "iter_time": 0.6585003051757812, "loss": 0.416755735874176, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.2901524797703, "step_time": 0.5857043800354004} +{"epoch": 0, "iter": 2974, "iter_tflops": 27.331518829839734, "iter_time": 0.5633882827758789, "loss": 0.16321447491645813, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.174694429652327, "step_time": 0.5277949867248535} +{"epoch": 0, "iter": 2975, "iter_tflops": 26.899057181653042, "iter_time": 0.5724459915161134, "loss": 0.3611406087875366, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.54987609006086, "step_time": 0.539345859527588} +{"epoch": 0, "iter": 2976, "iter_tflops": 27.38031508508795, "iter_time": 0.5623842315673828, "loss": 0.3474445641040802, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 29.093005701253308, "step_time": 0.5292769546508789} +{"epoch": 0, "iter": 2977, "iter_tflops": 18.090373224950074, "iter_time": 0.48831268692016605, "loss": 0.03569404035806656, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 19.71657521349224, "step_time": 0.4480371799468994} +{"epoch": 0, "iter": 2978, "iter_tflops": 21.222693124477203, "iter_time": 0.4162411766052246, "loss": 0.03938133642077446, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 23.28858464140996, "step_time": 0.3793171157836914} +{"epoch": 0, "iter": 2979, "iter_tflops": 24.213138134423296, "iter_time": 0.3648332862854004, "loss": 0.03840326517820358, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 26.62287276663199, "step_time": 0.3318108768463135} +{"epoch": 0, "iter": 2980, "iter_tflops": 24.61639130772876, "iter_time": 0.3588567733764649, "loss": 0.06768067926168442, "lr": 3e-05, "seqlen": 3568.0, "step_tflops": 26.935367662380607, "step_time": 0.3279613208770752} +{"epoch": 0, "iter": 2981, "iter_tflops": 27.402899042006634, "iter_time": 0.7528799591064455, "loss": 0.6010298132896423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.97894125715975, "step_time": 0.7119339981079101} +{"epoch": 0, "iter": 2982, "iter_tflops": 14.162906778187262, "iter_time": 1.4566990966796876, "loss": 0.6488509774208069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.982224096628737, "step_time": 1.2148640480041502} +{"epoch": 0, "iter": 2983, "iter_tflops": 49.45144962397837, "iter_time": 0.41719896316528315, "loss": 0.7380179762840271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.61290708448223, "step_time": 0.3848157958984375} +{"epoch": 0, "iter": 2984, "iter_tflops": 48.47360552070487, "iter_time": 0.4256149978637695, "loss": 0.8536525368690491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65843042690386, "step_time": 0.3917908935546875} +{"epoch": 0, "iter": 2985, "iter_tflops": 34.55190109160345, "iter_time": 0.5971044387817384, "loss": 0.249342143535614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.85362273070554, "step_time": 0.5598118171691894} +{"epoch": 0, "iter": 2986, "iter_tflops": 10.883918967158653, "iter_time": 1.8955574340820314, "loss": 0.28291553258895874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.30605304051104, "step_time": 1.5505043792724607} +{"epoch": 0, "iter": 2987, "iter_tflops": 16.909394504374056, "iter_time": 1.2200965270996096, "loss": 0.226721853017807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.87578939525168, "step_time": 1.0929923553466796} +{"epoch": 0, "iter": 2988, "iter_tflops": 24.912497808299566, "iter_time": 0.8281423110961915, "loss": 0.3144818842411041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.304331419765735, "step_time": 0.72890234375} +{"epoch": 0, "iter": 2989, "iter_tflops": 27.598128419476435, "iter_time": 0.5757434921264648, "loss": 0.5561826825141907, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.750993171367764, "step_time": 0.5340810890197754} +{"epoch": 0, "iter": 2990, "iter_tflops": 28.943403309613462, "iter_time": 0.5489832229614258, "loss": 0.18813011050224304, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.86108598077923, "step_time": 0.5148698539733887} +{"epoch": 0, "iter": 2991, "iter_tflops": 29.59329720989306, "iter_time": 0.5369270858764648, "loss": 0.21387295424938202, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 31.539000659076052, "step_time": 0.5038029899597168} +{"epoch": 0, "iter": 2992, "iter_tflops": 26.414118739565694, "iter_time": 0.6015511245727538, "loss": 0.2331359088420868, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 28.01004812428268, "step_time": 0.5672765274047852} +{"epoch": 0, "iter": 2993, "iter_tflops": 27.53309109837978, "iter_time": 0.443906379699707, "loss": 0.011347048915922642, "lr": 3e-05, "seqlen": 4912.0, "step_tflops": 30.314158290921274, "step_time": 0.40318173027038573} +{"epoch": 0, "iter": 2994, "iter_tflops": 31.876486631940555, "iter_time": 0.38342101287841795, "loss": 0.008833293803036213, "lr": 3e-05, "seqlen": 4912.0, "step_tflops": 35.08405393014412, "step_time": 0.348366662979126} +{"epoch": 0, "iter": 2995, "iter_tflops": 32.689046120906504, "iter_time": 0.3738902244567871, "loss": 0.004992767237126827, "lr": 3e-05, "seqlen": 4912.0, "step_tflops": 35.75294636436648, "step_time": 0.3418491630554199} +{"epoch": 0, "iter": 2996, "iter_tflops": 35.55631667947373, "iter_time": 0.34373962020874027, "loss": 0.01802813448011875, "lr": 3e-05, "seqlen": 4912.0, "step_tflops": 38.92374969110424, "step_time": 0.3140014743804932} +{"epoch": 0, "iter": 2997, "iter_tflops": 30.972261403987982, "iter_time": 0.6661151809692383, "loss": 1.0924152135849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.99993121018365, "step_time": 0.6251859550476074} +{"epoch": 0, "iter": 2998, "iter_tflops": 11.4777184656841, "iter_time": 1.7974908142089845, "loss": 0.9897560477256775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.909515580894285, "step_time": 1.4832359466552734} +{"epoch": 0, "iter": 2999, "iter_tflops": 12.79752045820383, "iter_time": 1.6121164703369142, "loss": 1.0294727087020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.95587032561233, "step_time": 1.2167522583007813} +{"epoch": 0, "iter": 3000, "iter_tflops": 3.8175186364185385, "iter_time": 5.4043203124999994, "loss": 0.9072354435920715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 3.9436340306567987, "step_time": 5.231492919921875} +{"epoch": 0, "iter": 3001, "iter_tflops": 13.443840226548728, "iter_time": 1.1028148498535157, "loss": 0.17294706404209137, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 14.268480103262064, "step_time": 1.0390782012939452} +{"epoch": 0, "iter": 3002, "iter_tflops": 16.06425654613304, "iter_time": 0.9229226760864259, "loss": 0.3630561828613281, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.272628166747847, "step_time": 0.8583561515808105} +{"epoch": 0, "iter": 3003, "iter_tflops": 15.539315061018453, "iter_time": 0.9541003952026368, "loss": 0.2684328556060791, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.240565704280076, "step_time": 0.8128073921203613} +{"epoch": 0, "iter": 3004, "iter_tflops": 14.65177621073007, "iter_time": 1.0118955154418945, "loss": 0.4322541356086731, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 16.800984185087135, "step_time": 0.8824522705078124} +{"epoch": 0, "iter": 3005, "iter_tflops": 9.047604067912028, "iter_time": 2.280282531738281, "loss": 0.4803653657436371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.746256289648413, "step_time": 2.1168223876953123} +{"epoch": 0, "iter": 3006, "iter_tflops": 36.25394484609891, "iter_time": 0.5690716857910156, "loss": 0.5328236222267151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.215141002603794, "step_time": 0.47740428543090824} +{"epoch": 0, "iter": 3007, "iter_tflops": 35.220799815883076, "iter_time": 0.5857644805908203, "loss": 0.47139057517051697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.582287486690824, "step_time": 0.5489578971862792} +{"epoch": 0, "iter": 3008, "iter_tflops": 35.33475769385655, "iter_time": 0.5838753356933594, "loss": 0.4423280954360962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.83813674066089, "step_time": 0.545246021270752} +{"epoch": 0, "iter": 3009, "iter_tflops": 19.850373440114602, "iter_time": 1.0393302459716798, "loss": 0.20424190163612366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.835699368371593, "step_time": 0.9448331909179688} +{"epoch": 0, "iter": 3010, "iter_tflops": 21.688736630571576, "iter_time": 0.9512353744506835, "loss": 0.2876811921596527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.64912348714601, "step_time": 0.8369909591674805} +{"epoch": 0, "iter": 3011, "iter_tflops": 24.919341622462426, "iter_time": 0.8279148712158203, "loss": 0.22006800770759583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.554183205276587, "step_time": 0.6980769309997559} +{"epoch": 0, "iter": 3012, "iter_tflops": 23.097321700371122, "iter_time": 0.8932244949340821, "loss": 0.28455182909965515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.624311106911776, "step_time": 0.8051374893188477} +{"epoch": 0, "iter": 3013, "iter_tflops": 4.657306829386717, "iter_time": 4.429833435058594, "loss": 0.8888589143753052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.8009646939765505, "step_time": 4.2972808227539065} +{"epoch": 0, "iter": 3014, "iter_tflops": 26.842880583097937, "iter_time": 0.7685871658325194, "loss": 0.852658748626709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.747095726170787, "step_time": 0.6709932441711426} +{"epoch": 0, "iter": 3015, "iter_tflops": 26.47602795161672, "iter_time": 0.77923673248291, "loss": 0.7653124928474426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.59063772772407, "step_time": 0.6744250869750976} +{"epoch": 0, "iter": 3016, "iter_tflops": 37.200553135383004, "iter_time": 0.5545910415649414, "loss": 0.8682220578193665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18032466194096, "step_time": 0.5009939498901367} +{"epoch": 0, "iter": 3017, "iter_tflops": 7.45026800882611, "iter_time": 2.0229041442871094, "loss": 0.30699872970581055, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 7.876742680682774, "step_time": 1.9133769683837893} +{"epoch": 0, "iter": 3018, "iter_tflops": 16.897009106770877, "iter_time": 0.8919435348510741, "loss": 0.39347216486930847, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 19.60199903009432, "step_time": 0.7688592376708984} +{"epoch": 0, "iter": 3019, "iter_tflops": 20.10286162869692, "iter_time": 0.7497031173706055, "loss": 0.37380099296569824, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 22.242571454303203, "step_time": 0.6775825386047364} +{"epoch": 0, "iter": 3020, "iter_tflops": 22.813608316873573, "iter_time": 0.6606222839355469, "loss": 0.3007252812385559, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.031670545217462, "step_time": 0.6271381759643555} +{"epoch": 0, "iter": 3021, "iter_tflops": 8.469673677993146, "iter_time": 2.435878204345703, "loss": 0.5683953166007996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.87417939844679, "step_time": 2.32484521484375} +{"epoch": 0, "iter": 3022, "iter_tflops": 23.56560828983868, "iter_time": 0.8754746856689453, "loss": 0.6772792339324951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.4308175687403, "step_time": 0.7521136932373047} +{"epoch": 0, "iter": 3023, "iter_tflops": 35.21863928479512, "iter_time": 0.5858004150390624, "loss": 0.6016859412193298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.459494304232955, "step_time": 0.4747200546264649} +{"epoch": 0, "iter": 3024, "iter_tflops": 34.85716129369294, "iter_time": 0.5918753204345703, "loss": 0.878832221031189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.966302067510924, "step_time": 0.5581054191589355} +{"epoch": 0, "iter": 3025, "iter_tflops": 11.032594706499594, "iter_time": 1.8700128173828123, "loss": 0.22516073286533356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.3976880714978, "step_time": 1.8101121368408202} +{"epoch": 0, "iter": 3026, "iter_tflops": 22.330460794071456, "iter_time": 0.9238991394042968, "loss": 0.26643723249435425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.128448054032265, "step_time": 0.7896027145385744} +{"epoch": 0, "iter": 3027, "iter_tflops": 26.498811984721033, "iter_time": 0.7785667343139648, "loss": 0.1801256686449051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.568922711643026, "step_time": 0.6535254211425782} +{"epoch": 0, "iter": 3028, "iter_tflops": 34.992267454657274, "iter_time": 0.5895900726318358, "loss": 0.31664368510246277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39324264860424, "step_time": 0.5373626213073731} +{"epoch": 0, "iter": 3029, "iter_tflops": 29.914985235559882, "iter_time": 0.6896574859619141, "loss": 0.882639467716217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27541208859398, "step_time": 0.6392201423645019} +{"epoch": 0, "iter": 3030, "iter_tflops": 11.160365622834743, "iter_time": 1.848603729248047, "loss": 0.7443696856498718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.685646355498482, "step_time": 1.4048474960327149} +{"epoch": 0, "iter": 3031, "iter_tflops": 12.244802714846706, "iter_time": 1.6848857421875, "loss": 0.9216161370277405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.678422489300882, "step_time": 1.5082947998046874} +{"epoch": 0, "iter": 3032, "iter_tflops": 28.526879067146417, "iter_time": 0.7232159347534179, "loss": 0.9494801759719849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.75996740273613, "step_time": 0.6707124633789063} +{"epoch": 0, "iter": 3033, "iter_tflops": 9.95056649832885, "iter_time": 1.5968380126953126, "loss": 0.3508913516998291, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 10.537489782467349, "step_time": 1.5078963928222655} +{"epoch": 0, "iter": 3034, "iter_tflops": 19.990762351245532, "iter_time": 0.7948392639160156, "loss": 0.3105849325656891, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 23.109868010816502, "step_time": 0.6875609512329102} +{"epoch": 0, "iter": 3035, "iter_tflops": 26.052237632551666, "iter_time": 0.6099070281982422, "loss": 0.2313072383403778, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.67215646119814, "step_time": 0.5742032737731934} +{"epoch": 0, "iter": 3036, "iter_tflops": 18.605522159412946, "iter_time": 0.8540175704956054, "loss": 0.30659228563308716, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 19.82008099373175, "step_time": 0.801684051513672} +{"epoch": 0, "iter": 3037, "iter_tflops": 9.236767162448816, "iter_time": 2.2335838012695315, "loss": 0.09303565323352814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.870065182412759, "step_time": 2.0902692260742186} +{"epoch": 0, "iter": 3038, "iter_tflops": 35.78326733615553, "iter_time": 0.5765570068359376, "loss": 0.11199741065502167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.50013619240235, "step_time": 0.46361865997314455} +{"epoch": 0, "iter": 3039, "iter_tflops": 36.60659217556178, "iter_time": 0.5635895690917969, "loss": 0.09918400645256042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.21358050411125, "step_time": 0.52612113571167} +{"epoch": 0, "iter": 3040, "iter_tflops": 32.554744604013734, "iter_time": 0.6337353820800782, "loss": 0.184576153755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.20680006207693, "step_time": 0.5859974060058594} +{"epoch": 0, "iter": 3041, "iter_tflops": 7.431077145293664, "iter_time": 2.776326110839844, "loss": 0.6904178857803345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.7487075205452784, "step_time": 2.6625206146240235} +{"epoch": 0, "iter": 3042, "iter_tflops": 24.353236735382584, "iter_time": 0.8471602249145507, "loss": 0.6657238006591797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.628048241422643, "step_time": 0.6963365707397461} +{"epoch": 0, "iter": 3043, "iter_tflops": 37.226447797198404, "iter_time": 0.5542052688598633, "loss": 0.5217040777206421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.86742145419832, "step_time": 0.5048298320770265} +{"epoch": 0, "iter": 3044, "iter_tflops": 26.836610198748424, "iter_time": 0.7687667465209961, "loss": 0.714331328868866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.528805960991754, "step_time": 0.6543569564819336} +{"epoch": 0, "iter": 3045, "iter_tflops": 12.169023070022929, "iter_time": 1.695377960205078, "loss": 0.0032001726794987917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.396592893281754, "step_time": 1.5400254135131834} +{"epoch": 0, "iter": 3046, "iter_tflops": 21.87657677003901, "iter_time": 0.943067726135254, "loss": 0.008071601390838623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.703154978463566, "step_time": 0.8026677474975585} +{"epoch": 0, "iter": 3047, "iter_tflops": 46.50437166708905, "iter_time": 0.4436377220153809, "loss": 0.0015519752632826567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.57967764504334, "step_time": 0.36463787651062013} +{"epoch": 0, "iter": 3048, "iter_tflops": 42.163062194625674, "iter_time": 0.4893167724609375, "loss": 0.012729568406939507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.368118760483455, "step_time": 0.4547487106323242} +{"epoch": 0, "iter": 3049, "iter_tflops": 13.949879603001035, "iter_time": 1.4789441986083984, "loss": 0.049974191933870316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.078591029853252, "step_time": 1.3682374877929688} +{"epoch": 0, "iter": 3050, "iter_tflops": 20.457247795899455, "iter_time": 1.0084980010986326, "loss": 0.10050695389509201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.039915871647004, "step_time": 0.8582015686035156} +{"epoch": 0, "iter": 3051, "iter_tflops": 20.831071446386705, "iter_time": 0.9904000167846679, "loss": 0.08305294811725616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.88399560392099, "step_time": 0.8638041076660156} +{"epoch": 0, "iter": 3052, "iter_tflops": 25.58434893385844, "iter_time": 0.8063950958251953, "loss": 0.06958462297916412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.08182477463562, "step_time": 0.7094153709411621} +{"epoch": 0, "iter": 3053, "iter_tflops": 8.288332602307685, "iter_time": 2.4891729736328125, "loss": 0.17008599638938904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.029306743804462, "step_time": 2.284903381347656} +{"epoch": 0, "iter": 3054, "iter_tflops": 24.827218014762067, "iter_time": 0.8309869232177735, "loss": 0.0917864441871643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.500267333067193, "step_time": 0.723891227722168} +{"epoch": 0, "iter": 3055, "iter_tflops": 29.75449196660012, "iter_time": 0.6933774414062499, "loss": 0.1658296138048172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.99367506712835, "step_time": 0.6253045005798339} +{"epoch": 0, "iter": 3056, "iter_tflops": 32.6859938891608, "iter_time": 0.6311906433105469, "loss": 0.10387703031301498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.885308309636294, "step_time": 0.5305626831054687} +{"epoch": 0, "iter": 3057, "iter_tflops": 13.640374376450259, "iter_time": 1.5125019989013673, "loss": 0.2928197979927063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.584682437400952, "step_time": 1.4145726928710936} +{"epoch": 0, "iter": 3058, "iter_tflops": 40.49824912353312, "iter_time": 0.5094317398071289, "loss": 0.5156516432762146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73778769255438, "step_time": 0.4611558723449707} +{"epoch": 0, "iter": 3059, "iter_tflops": 49.12494761363585, "iter_time": 0.4199718170166015, "loss": 0.25068673491477966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25630309529853, "step_time": 0.38739252090454107} +{"epoch": 0, "iter": 3060, "iter_tflops": 47.26394501794711, "iter_time": 0.43650807189941404, "loss": 0.3500591516494751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08513243065298, "step_time": 0.40385710144042974} +{"epoch": 0, "iter": 3061, "iter_tflops": 41.33364201151228, "iter_time": 0.49913563156127927, "loss": 0.4254322052001953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6529981781126, "step_time": 0.4620315399169922} +{"epoch": 0, "iter": 3062, "iter_tflops": 21.022236672826704, "iter_time": 0.9813938369750976, "loss": 0.2758430242538452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.771445817367255, "step_time": 0.8005407867431641} +{"epoch": 0, "iter": 3063, "iter_tflops": 41.02986922188037, "iter_time": 0.5028310813903808, "loss": 0.3601629137992859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.12695570910449, "step_time": 0.45717893409729} +{"epoch": 0, "iter": 3064, "iter_tflops": 41.91874343293363, "iter_time": 0.49216870117187494, "loss": 0.2801835238933563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71039519763389, "step_time": 0.45134358215332027} +{"epoch": 0, "iter": 3065, "iter_tflops": 36.22615794641991, "iter_time": 0.569508186340332, "loss": 0.22916626930236816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.122842413735725, "step_time": 0.5141982040405274} +{"epoch": 0, "iter": 3066, "iter_tflops": 41.03473215614158, "iter_time": 0.5027714920043945, "loss": 0.21640193462371826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25746656467757, "step_time": 0.45586054801940923} +{"epoch": 0, "iter": 3067, "iter_tflops": 40.52536354953711, "iter_time": 0.509090892791748, "loss": 0.2696170210838318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29795088806316, "step_time": 0.4657347145080566} +{"epoch": 0, "iter": 3068, "iter_tflops": 42.508572403218125, "iter_time": 0.48533959960937495, "loss": 0.2969508171081543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56654884052193, "step_time": 0.44304536247253423} +{"epoch": 0, "iter": 3069, "iter_tflops": 21.327499199903666, "iter_time": 0.9673470535278321, "loss": 0.0823577418923378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.68213288684807, "step_time": 0.9095746688842773} +{"epoch": 0, "iter": 3070, "iter_tflops": 15.748150970769336, "iter_time": 1.3100644989013672, "loss": 0.06358107924461365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.885617620264053, "step_time": 1.0924235534667968} +{"epoch": 0, "iter": 3071, "iter_tflops": 40.508009667687766, "iter_time": 0.5093089904785156, "loss": 0.07828180491924286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.609718733531416, "step_time": 0.462479793548584} +{"epoch": 0, "iter": 3072, "iter_tflops": 46.5603408182511, "iter_time": 0.443104434967041, "loss": 0.08895532041788101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.112687364998926, "step_time": 0.4036393814086915} +{"epoch": 0, "iter": 3073, "iter_tflops": 20.035747251444953, "iter_time": 1.0297142028808595, "loss": 0.007313137408345938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.65116176876453, "step_time": 0.9528862113952636} +{"epoch": 0, "iter": 3074, "iter_tflops": 18.492722621305553, "iter_time": 1.1156331024169923, "loss": 0.014821864664554596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.83027617345279, "step_time": 0.8308845767974854} +{"epoch": 0, "iter": 3075, "iter_tflops": 49.074706403599244, "iter_time": 0.4204017715454102, "loss": 0.004678539000451565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.06091662155423, "step_time": 0.38162677955627444} +{"epoch": 0, "iter": 3076, "iter_tflops": 62.4190053554648, "iter_time": 0.33052582931518554, "loss": 0.007832852192223072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.8425297316762, "step_time": 0.2996852903366089} +{"epoch": 0, "iter": 3077, "iter_tflops": 24.544838983565242, "iter_time": 0.8405471115112304, "loss": 0.11776512861251831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.78798644609564, "step_time": 0.8000273132324219} +{"epoch": 0, "iter": 3078, "iter_tflops": 12.276294966483258, "iter_time": 1.680563522338867, "loss": 0.12913787364959717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.627431419444557, "step_time": 1.4104385738372804} +{"epoch": 0, "iter": 3079, "iter_tflops": 44.187646053011974, "iter_time": 0.4668973197937012, "loss": 0.03808855637907982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56525461887636, "step_time": 0.42481180572509775} +{"epoch": 0, "iter": 3080, "iter_tflops": 48.905523501578934, "iter_time": 0.4218561019897461, "loss": 0.10000213980674744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.411047019101666, "step_time": 0.3862701568603515} +{"epoch": 0, "iter": 3081, "iter_tflops": 16.592287116615616, "iter_time": 1.2434146881103514, "loss": 0.9454250931739807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.494830083325148, "step_time": 1.179268013000488} +{"epoch": 0, "iter": 3082, "iter_tflops": 19.293643846319473, "iter_time": 1.0693207397460938, "loss": 0.9834054112434387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.184058701307546, "step_time": 0.7879257278442383} +{"epoch": 0, "iter": 3083, "iter_tflops": 37.174013768144185, "iter_time": 0.5549869766235351, "loss": 1.0520269870758057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44194361371314, "step_time": 0.510140998840332} +{"epoch": 0, "iter": 3084, "iter_tflops": 36.03282441059269, "iter_time": 0.5725638732910157, "loss": 0.8430565595626831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.124171292434355, "step_time": 0.5273234634399413} +{"epoch": 0, "iter": 3085, "iter_tflops": 22.976417574616043, "iter_time": 0.8979247283935547, "loss": 0.9393353462219238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.92496060215388, "step_time": 0.8277282295227051} +{"epoch": 0, "iter": 3086, "iter_tflops": 24.14278146377447, "iter_time": 0.8545450134277344, "loss": 1.0047632455825806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.580015756358414, "step_time": 0.6974672927856445} +{"epoch": 0, "iter": 3087, "iter_tflops": 42.78756644073466, "iter_time": 0.4821749687194824, "loss": 0.8035109639167786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.904346346872174, "step_time": 0.4494366035461426} +{"epoch": 0, "iter": 3088, "iter_tflops": 40.927879078557666, "iter_time": 0.5040841102600098, "loss": 0.7778169512748718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.723033015923036, "step_time": 0.4718586997985841} +{"epoch": 0, "iter": 3089, "iter_tflops": 27.837520328989665, "iter_time": 0.7411254043579101, "loss": 0.37673982977867126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.4593462144529, "step_time": 0.7003242149353027} +{"epoch": 0, "iter": 3090, "iter_tflops": 8.2378670789014, "iter_time": 2.5044217529296873, "loss": 0.3423909544944763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.664832172986028, "step_time": 1.9344977188110355} +{"epoch": 0, "iter": 3091, "iter_tflops": 11.447336278034042, "iter_time": 1.8022615051269533, "loss": 0.3311957120895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.336631236224758, "step_time": 1.54694938659668} +{"epoch": 0, "iter": 3092, "iter_tflops": 35.1822185419733, "iter_time": 0.5864068374633788, "loss": 0.24673959612846375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39108975450386, "step_time": 0.5373927555084228} +{"epoch": 0, "iter": 3093, "iter_tflops": 15.249925728554416, "iter_time": 1.1334366455078126, "loss": 0.1728370189666748, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 16.337365292300273, "step_time": 1.0579934005737304} +{"epoch": 0, "iter": 3094, "iter_tflops": 12.710427435592122, "iter_time": 1.3598932647705078, "loss": 0.2856408655643463, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 15.855992756408531, "step_time": 1.0901130523681641} +{"epoch": 0, "iter": 3095, "iter_tflops": 24.367548171598727, "iter_time": 0.7093378677368165, "loss": 0.3310730457305908, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 26.213568864497343, "step_time": 0.6593846397399902} +{"epoch": 0, "iter": 3096, "iter_tflops": 25.429854136063526, "iter_time": 0.6797060089111329, "loss": 0.442326158285141, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 27.321849592283417, "step_time": 0.6326374282836914} +{"epoch": 0, "iter": 3097, "iter_tflops": 8.757137340571527, "iter_time": 0.8440884323120118, "loss": 0.004984394181519747, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 9.502930741908605, "step_time": 0.7778440704345703} +{"epoch": 0, "iter": 3098, "iter_tflops": 5.235197767542752, "iter_time": 1.4119425201416014, "loss": 0.010302776470780373, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 6.275158867070656, "step_time": 1.1779460067749024} +{"epoch": 0, "iter": 3099, "iter_tflops": 15.834039074955971, "iter_time": 0.4668296127319336, "loss": 0.005489047151058912, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 17.55375582738588, "step_time": 0.42109497261047363} +{"epoch": 0, "iter": 3100, "iter_tflops": 17.680952871031735, "iter_time": 0.4180656089782715, "loss": 0.0038573367055505514, "lr": 3e-05, "seqlen": 2992.0, "step_tflops": 19.5181477879719, "step_time": 0.3787141284942627} +{"epoch": 0, "iter": 3101, "iter_tflops": 22.482761776705697, "iter_time": 0.9176405334472656, "loss": 1.0097519159317017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.13664143867645, "step_time": 0.8547623977661132} +{"epoch": 0, "iter": 3102, "iter_tflops": 28.09112876206955, "iter_time": 0.7344344787597656, "loss": 0.8900644183158875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.700063987469434, "step_time": 0.5945549125671388} +{"epoch": 0, "iter": 3103, "iter_tflops": 36.729663287689704, "iter_time": 0.5617011337280273, "loss": 0.9466201066970825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.108723987820234, "step_time": 0.5143792037963867} +{"epoch": 0, "iter": 3104, "iter_tflops": 38.28511073179104, "iter_time": 0.5388803405761718, "loss": 0.7430343627929688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67458905220078, "step_time": 0.49505211639404295} +{"epoch": 0, "iter": 3105, "iter_tflops": 21.953514736240553, "iter_time": 0.9397626647949219, "loss": 0.3364202678203583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.5523582406006, "step_time": 0.8759672088623047} +{"epoch": 0, "iter": 3106, "iter_tflops": 29.361044833134986, "iter_time": 0.7026689147949219, "loss": 0.4621362090110779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.720667949154375, "step_time": 0.5618387317657471} +{"epoch": 0, "iter": 3107, "iter_tflops": 39.44612788462561, "iter_time": 0.5230194854736329, "loss": 0.43135881423950195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.279680112817545, "step_time": 0.4766923751831055} +{"epoch": 0, "iter": 3108, "iter_tflops": 37.53130155956023, "iter_time": 0.5497036514282228, "loss": 0.2669234275817871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.891446216908186, "step_time": 0.5045332317352295} +{"epoch": 0, "iter": 3109, "iter_tflops": 31.350501005998662, "iter_time": 0.6580785903930664, "loss": 0.2879076302051544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23218503913685, "step_time": 0.6026811752319335} +{"epoch": 0, "iter": 3110, "iter_tflops": 10.304079897115821, "iter_time": 2.002225692749023, "loss": 0.44235849380493164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.094468628932994, "step_time": 1.7058288497924805} +{"epoch": 0, "iter": 3111, "iter_tflops": 9.97053051001881, "iter_time": 2.0692071990966796, "loss": 0.34058913588523865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.49129177812017, "step_time": 1.6516381072998048} +{"epoch": 0, "iter": 3112, "iter_tflops": 40.95038832912149, "iter_time": 0.5038070297241211, "loss": 0.31960320472717285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74183858555851, "step_time": 0.3911712989807129} +{"epoch": 0, "iter": 3113, "iter_tflops": 22.199319198250386, "iter_time": 0.6954791488647462, "loss": 0.2700954079627991, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.56206486003544, "step_time": 0.6552551193237304} +{"epoch": 0, "iter": 3114, "iter_tflops": 7.58754992089028, "iter_time": 2.0348022460937503, "loss": 0.2411603331565857, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 10.401405355995104, "step_time": 1.484334384918213} +{"epoch": 0, "iter": 3115, "iter_tflops": 22.436223363384027, "iter_time": 0.6881355819702148, "loss": 0.16670601069927216, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.18511104603899, "step_time": 0.6383747253417968} +{"epoch": 0, "iter": 3116, "iter_tflops": 23.44047708628038, "iter_time": 0.6586539840698242, "loss": 0.20686359703540802, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.21026204998319, "step_time": 0.6124158325195312} +{"epoch": 0, "iter": 3117, "iter_tflops": 17.96943509724971, "iter_time": 1.1481214294433593, "loss": 0.28563860058784485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.240897969261578, "step_time": 1.072252113342285} +{"epoch": 0, "iter": 3118, "iter_tflops": 22.389449152115994, "iter_time": 0.921464988708496, "loss": 0.14212319254875183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.728730491356654, "step_time": 0.6939782886505128} +{"epoch": 0, "iter": 3119, "iter_tflops": 39.530752602672415, "iter_time": 0.5218998413085937, "loss": 0.2270555943250656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14200633690158, "step_time": 0.47821358489990234} +{"epoch": 0, "iter": 3120, "iter_tflops": 40.784758407948104, "iter_time": 0.5058530273437499, "loss": 0.1875838339328766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83778075927905, "step_time": 0.4601274452209472} +{"epoch": 0, "iter": 3121, "iter_tflops": 29.18707317961005, "iter_time": 0.7068572235107422, "loss": 0.026639562100172043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.429080012383256, "step_time": 0.6564332618713379} +{"epoch": 0, "iter": 3122, "iter_tflops": 36.38501396437314, "iter_time": 0.5670217285156249, "loss": 0.005828910041600466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.0272586058809, "step_time": 0.5028630771636963} +{"epoch": 0, "iter": 3123, "iter_tflops": 45.07611271016601, "iter_time": 0.4576946029663086, "loss": 0.0068417564034461975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.828642115205575, "step_time": 0.4140408535003662} +{"epoch": 0, "iter": 3124, "iter_tflops": 46.19043966894596, "iter_time": 0.4466528930664062, "loss": 0.004181934054940939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21627880107465, "step_time": 0.4028229694366455} +{"epoch": 0, "iter": 3125, "iter_tflops": 12.636120500169342, "iter_time": 1.2704312438964842, "loss": 0.2533276081085205, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 13.478069071324333, "step_time": 1.1910698928833008} +{"epoch": 0, "iter": 3126, "iter_tflops": 16.642755187197523, "iter_time": 0.9645832138061524, "loss": 0.43027156591415405, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 21.370708086499558, "step_time": 0.7511834526062012} +{"epoch": 0, "iter": 3127, "iter_tflops": 27.842506275614447, "iter_time": 0.5765760498046876, "loss": 0.20050658285617828, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.660615074059873, "step_time": 0.541233627319336} +{"epoch": 0, "iter": 3128, "iter_tflops": 27.990964028340333, "iter_time": 0.5735180206298829, "loss": 0.22239406406879425, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.69162235277718, "step_time": 0.5406684112548827} +{"epoch": 0, "iter": 3129, "iter_tflops": 48.1389861972868, "iter_time": 0.42857349395751954, "loss": 0.2987692952156067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9151136466942, "step_time": 0.38989037513732916} +{"epoch": 0, "iter": 3130, "iter_tflops": 47.669434840735136, "iter_time": 0.43279500961303713, "loss": 0.4629395306110382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.42312297616628, "step_time": 0.39354949378967286} +{"epoch": 0, "iter": 3131, "iter_tflops": 52.793833272169, "iter_time": 0.3907860488891602, "loss": 0.27953001856803894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.46561327289354, "step_time": 0.3590163288116455} +{"epoch": 0, "iter": 3132, "iter_tflops": 52.72590767396141, "iter_time": 0.39128948974609373, "loss": 0.23553529381752014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.31620310834395, "step_time": 0.35995220184326177} +{"epoch": 0, "iter": 3133, "iter_tflops": 34.064863864707085, "iter_time": 0.6056414489746094, "loss": 0.7528259754180908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.34370003054285, "step_time": 0.5676662940979004} +{"epoch": 0, "iter": 3134, "iter_tflops": 11.222263374126062, "iter_time": 1.8384075317382813, "loss": 0.9932466745376587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.449129889458911, "step_time": 1.3354210662841797} +{"epoch": 0, "iter": 3135, "iter_tflops": 16.536107097796684, "iter_time": 1.2476390838623046, "loss": 0.976795494556427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.920327029727126, "step_time": 1.0904194984436035} +{"epoch": 0, "iter": 3136, "iter_tflops": 26.49387996485765, "iter_time": 0.778711669921875, "loss": 0.9962713122367859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.54214472426511, "step_time": 0.6983613986968994} +{"epoch": 0, "iter": 3137, "iter_tflops": 17.635033353992597, "iter_time": 0.8476650314331055, "loss": 0.44177576899528503, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.601612898860775, "step_time": 0.8036185455322264} +{"epoch": 0, "iter": 3138, "iter_tflops": 7.945105452932082, "iter_time": 1.8814855499267578, "loss": 0.4118470251560211, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 9.52120746456658, "step_time": 1.5700320739746094} +{"epoch": 0, "iter": 3139, "iter_tflops": 25.55873354405379, "iter_time": 0.584872528076172, "loss": 0.32513368129730225, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.228829663479935, "step_time": 0.5489990310668945} +{"epoch": 0, "iter": 3140, "iter_tflops": 25.83702334815418, "iter_time": 0.5785728836059569, "loss": 0.33915868401527405, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.585921364637848, "step_time": 0.5418923988342286} +{"epoch": 0, "iter": 3141, "iter_tflops": 33.21517117782138, "iter_time": 0.6211346435546874, "loss": 0.7765581607818604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.99797002215457, "step_time": 0.5731182479858398} +{"epoch": 0, "iter": 3142, "iter_tflops": 15.809074999951742, "iter_time": 1.305015853881836, "loss": 0.8301911950111389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.531797548922967, "step_time": 1.1767814140319823} +{"epoch": 0, "iter": 3143, "iter_tflops": 39.23811664960734, "iter_time": 0.5257921447753906, "loss": 0.8401234745979309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07133480313397, "step_time": 0.4903836212158203} +{"epoch": 0, "iter": 3144, "iter_tflops": 44.34341458317001, "iter_time": 0.46525721359252936, "loss": 1.073987364768982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.764123943517596, "step_time": 0.4319370231628418} +{"epoch": 0, "iter": 3145, "iter_tflops": 27.32184603107883, "iter_time": 0.6356479644775391, "loss": 0.005293251946568489, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 29.241472759467904, "step_time": 0.5939193267822265} +{"epoch": 0, "iter": 3146, "iter_tflops": 11.74276763436661, "iter_time": 1.4789593353271484, "loss": 0.008488425984978676, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 15.903705188999094, "step_time": 1.0920144462585448} +{"epoch": 0, "iter": 3147, "iter_tflops": 46.62309537431446, "iter_time": 0.37249941635131845, "loss": 0.011531678028404713, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 51.33705423206299, "step_time": 0.3382951374053955} +{"epoch": 0, "iter": 3148, "iter_tflops": 45.39166661881081, "iter_time": 0.3826049385070801, "loss": 0.01881309226155281, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 49.92285444119649, "step_time": 0.34787826156616214} +{"epoch": 0, "iter": 3149, "iter_tflops": 24.09355878474085, "iter_time": 0.8562908325195313, "loss": 0.6015805602073669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.38527415657467, "step_time": 0.8127189559936523} +{"epoch": 0, "iter": 3150, "iter_tflops": 13.89862034869076, "iter_time": 1.4843986663818358, "loss": 0.35490357875823975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.089953557705808, "step_time": 1.0807304191589355} +{"epoch": 0, "iter": 3151, "iter_tflops": 39.51607700047198, "iter_time": 0.5220936660766602, "loss": 0.5238168239593506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14338880225283, "step_time": 0.4781982612609863} +{"epoch": 0, "iter": 3152, "iter_tflops": 39.797321788022806, "iter_time": 0.5184040679931642, "loss": 0.45717939734458923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.51731835630871, "step_time": 0.4740892658233643} +{"epoch": 0, "iter": 3153, "iter_tflops": 30.279956466176728, "iter_time": 0.6813448867797852, "loss": 1.0148892402648926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.01834538945976, "step_time": 0.6248372917175293} +{"epoch": 0, "iter": 3154, "iter_tflops": 42.29636826922141, "iter_time": 0.487774585723877, "loss": 1.069492220878601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97350183630488, "step_time": 0.44876054000854493} +{"epoch": 0, "iter": 3155, "iter_tflops": 41.46903674724057, "iter_time": 0.497505973815918, "loss": 0.847848117351532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31585483888636, "step_time": 0.4552731838226318} +{"epoch": 0, "iter": 3156, "iter_tflops": 41.09643455721909, "iter_time": 0.5020166282653808, "loss": 0.9301307201385498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6908695649743, "step_time": 0.46164001083374023} +{"epoch": 0, "iter": 3157, "iter_tflops": 40.007100378472416, "iter_time": 0.5156857986450195, "loss": 0.9008941650390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36899636265647, "step_time": 0.475710651397705} +{"epoch": 0, "iter": 3158, "iter_tflops": 43.56929472639776, "iter_time": 0.47352369689941404, "loss": 0.756281852722168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.985770203470416, "step_time": 0.43909237670898443} +{"epoch": 0, "iter": 3159, "iter_tflops": 43.12303727838919, "iter_time": 0.47842394256591797, "loss": 0.9574441909790039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24761871126645, "step_time": 0.44610066604614257} +{"epoch": 0, "iter": 3160, "iter_tflops": 45.28973283157345, "iter_time": 0.4555357742309571, "loss": 1.1678965091705322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62669981722533, "step_time": 0.42427500915527344} +{"epoch": 0, "iter": 3161, "iter_tflops": 26.94616969035731, "iter_time": 0.7656410446166992, "loss": 0.07529637962579727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.527667545260886, "step_time": 0.7231959457397461} +{"epoch": 0, "iter": 3162, "iter_tflops": 11.41629105964015, "iter_time": 1.8071625366210937, "loss": 0.15106377005577087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.33273212302072, "step_time": 1.5474017868041994} +{"epoch": 0, "iter": 3163, "iter_tflops": 15.407394019825073, "iter_time": 1.3390384826660156, "loss": 0.1006617620587349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.02479730324593, "step_time": 1.1445950355529786} +{"epoch": 0, "iter": 3164, "iter_tflops": 29.146669847910797, "iter_time": 0.7078370742797852, "loss": 0.10197074711322784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.75183577265374, "step_time": 0.577063892364502} +{"epoch": 0, "iter": 3165, "iter_tflops": 19.238202780403057, "iter_time": 0.8152892379760742, "loss": 0.3060142397880554, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.22929055795101, "step_time": 0.7753460083007813} +{"epoch": 0, "iter": 3166, "iter_tflops": 6.328549596585548, "iter_time": 2.4784035339355466, "loss": 0.22832636535167694, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 7.407548284723509, "step_time": 2.117394187927246} +{"epoch": 0, "iter": 3167, "iter_tflops": 8.790633307345853, "iter_time": 1.7842513885498046, "loss": 0.37881988286972046, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 10.82756923784244, "step_time": 1.448589183807373} +{"epoch": 0, "iter": 3168, "iter_tflops": 27.655689966643322, "iter_time": 0.5671418685913087, "loss": 0.2885518968105316, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.54803123927599, "step_time": 0.5308204650878906} +{"epoch": 0, "iter": 3169, "iter_tflops": 14.467780449184358, "iter_time": 1.0078342819213868, "loss": 0.47782739996910095, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 15.066626562058842, "step_time": 0.9677763671874999} +{"epoch": 0, "iter": 3170, "iter_tflops": 10.045232529187471, "iter_time": 1.4515467987060546, "loss": 0.3577312231063843, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 12.537046829121492, "step_time": 1.1630430450439453} +{"epoch": 0, "iter": 3171, "iter_tflops": 26.60320469886565, "iter_time": 0.5480965652465821, "loss": 0.24835805594921112, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.320909089023136, "step_time": 0.5148537101745606} +{"epoch": 0, "iter": 3172, "iter_tflops": 26.566694780760226, "iter_time": 0.5488498001098633, "loss": 0.3774310052394867, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.27145753457098, "step_time": 0.5157542762756347} +{"epoch": 0, "iter": 3173, "iter_tflops": 29.953141575745136, "iter_time": 0.688778953552246, "loss": 0.7643225789070129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.02895855035771, "step_time": 0.6441387557983398} +{"epoch": 0, "iter": 3174, "iter_tflops": 12.412261333940132, "iter_time": 1.662154296875, "loss": 0.9193038940429688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.0187202780797, "step_time": 1.3736918411254884} +{"epoch": 0, "iter": 3175, "iter_tflops": 16.417074522002462, "iter_time": 1.2566851348876953, "loss": 0.8745675683021545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.26639201624765, "step_time": 1.070833267211914} +{"epoch": 0, "iter": 3176, "iter_tflops": 20.002769810963876, "iter_time": 1.0314118347167969, "loss": 0.8682535290718079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.202140921942135, "step_time": 0.8891892166137694} +{"epoch": 0, "iter": 3177, "iter_tflops": 19.266985092836205, "iter_time": 0.814071304321289, "loss": 0.3584630787372589, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.317006652968, "step_time": 0.7719985504150391} +{"epoch": 0, "iter": 3178, "iter_tflops": 13.652339687594154, "iter_time": 1.1488653259277344, "loss": 0.3418627083301544, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.277090685441394, "step_time": 0.9636058425903321} +{"epoch": 0, "iter": 3179, "iter_tflops": 28.59731758657058, "iter_time": 0.5484675140380859, "loss": 0.41587281227111816, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 30.459769994405935, "step_time": 0.5149316520690919} +{"epoch": 0, "iter": 3180, "iter_tflops": 27.40239544112335, "iter_time": 0.5723842544555664, "loss": 0.3369119167327881, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.160132660603058, "step_time": 0.5378816299438477} +{"epoch": 0, "iter": 3181, "iter_tflops": 38.334857709849175, "iter_time": 0.538181037902832, "loss": 0.34753578901290894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52231033040093, "step_time": 0.49686766815185546} +{"epoch": 0, "iter": 3182, "iter_tflops": 16.05665481771069, "iter_time": 1.2848936309814454, "loss": 0.26236575841903687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.728023896089194, "step_time": 1.163755962371826} +{"epoch": 0, "iter": 3183, "iter_tflops": 38.01657954991117, "iter_time": 0.5426867370605469, "loss": 0.26399269700050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30899308521048, "step_time": 0.4876290359497071} +{"epoch": 0, "iter": 3184, "iter_tflops": 42.64754706006965, "iter_time": 0.4837580337524415, "loss": 0.48983633518218994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.729601159890514, "step_time": 0.44149945640563965} +{"epoch": 0, "iter": 3185, "iter_tflops": 30.50481122055431, "iter_time": 0.676322608947754, "loss": 0.6725792288780212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.56443759328552, "step_time": 0.6146712112426758} +{"epoch": 0, "iter": 3186, "iter_tflops": 40.8522291410523, "iter_time": 0.5050175704956055, "loss": 0.7483909726142883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88873023447843, "step_time": 0.4700772476196289} +{"epoch": 0, "iter": 3187, "iter_tflops": 42.21467132357287, "iter_time": 0.48871856307983397, "loss": 0.9113307595252991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37756201106043, "step_time": 0.45465407562255855} +{"epoch": 0, "iter": 3188, "iter_tflops": 44.16171535857748, "iter_time": 0.46717147064208986, "loss": 0.9317625761032104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59673059328216, "step_time": 0.43345610618591307} +{"epoch": 0, "iter": 3189, "iter_tflops": 36.913657763251734, "iter_time": 0.5589013595581055, "loss": 0.018314730376005173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66782505704952, "step_time": 0.5200964126586914} +{"epoch": 0, "iter": 3190, "iter_tflops": 14.134292256874701, "iter_time": 1.4596481475830079, "loss": 0.05627687647938728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88278066455925, "step_time": 1.222019874572754} +{"epoch": 0, "iter": 3191, "iter_tflops": 41.18344542388377, "iter_time": 0.5009559860229492, "loss": 0.028585882857441902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27100663738133, "step_time": 0.4557242050170899} +{"epoch": 0, "iter": 3192, "iter_tflops": 46.373855168674154, "iter_time": 0.4448863143920898, "loss": 0.05866629630327225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.02396389869391, "step_time": 0.4043412532806397} +{"epoch": 0, "iter": 3193, "iter_tflops": 17.647505323416492, "iter_time": 1.1690657196044922, "loss": 0.12246010452508926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.796040044125814, "step_time": 1.0976297912597655} +{"epoch": 0, "iter": 3194, "iter_tflops": 16.63769159916509, "iter_time": 1.2400213928222659, "loss": 0.20715048909187317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.193573662471763, "step_time": 0.9295976314544677} +{"epoch": 0, "iter": 3195, "iter_tflops": 42.66179745789411, "iter_time": 0.48359644317626954, "loss": 0.19277599453926086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92555585623546, "step_time": 0.43965581512451174} +{"epoch": 0, "iter": 3196, "iter_tflops": 42.42743067649095, "iter_time": 0.4862678031921387, "loss": 0.19814692437648773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.442970302632915, "step_time": 0.44422424697875973} +{"epoch": 0, "iter": 3197, "iter_tflops": 23.150385578649228, "iter_time": 0.8911771011352541, "loss": 0.8601746559143066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.578449238506302, "step_time": 0.839397689819336} +{"epoch": 0, "iter": 3198, "iter_tflops": 12.65257524762498, "iter_time": 1.6305845336914062, "loss": 0.84317547082901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.942309585572946, "step_time": 1.2941094512939453} +{"epoch": 0, "iter": 3199, "iter_tflops": 14.846360705456314, "iter_time": 1.3896397857666019, "loss": 0.8411169052124023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.483049450787558, "step_time": 1.1800626411437989} +{"epoch": 0, "iter": 3200, "iter_tflops": 24.231393653862128, "iter_time": 0.8514200134277343, "loss": 0.7090693116188049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14942284862506, "step_time": 0.5269833374023438} +{"epoch": 0, "iter": 3201, "iter_tflops": 18.324845401537434, "iter_time": 0.8001570053100587, "loss": 0.33872491121292114, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.786597869771633, "step_time": 0.7410446968078613} +{"epoch": 0, "iter": 3202, "iter_tflops": 22.5940993792568, "iter_time": 0.648963836669922, "loss": 0.2513773739337921, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.325348281707647, "step_time": 0.6027767105102539} +{"epoch": 0, "iter": 3203, "iter_tflops": 21.61995774353639, "iter_time": 0.6782045364379884, "loss": 0.27521607279777527, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.241854864871367, "step_time": 0.6308770751953126} +{"epoch": 0, "iter": 3204, "iter_tflops": 23.860951978245687, "iter_time": 0.614508316040039, "loss": 0.3985687494277954, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 25.563481277170737, "step_time": 0.5735820274353027} +{"epoch": 0, "iter": 3205, "iter_tflops": 26.64008563025498, "iter_time": 0.7744379577636719, "loss": 0.7207098603248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.973243922923754, "step_time": 0.7120739936828613} +{"epoch": 0, "iter": 3206, "iter_tflops": 44.28086085885362, "iter_time": 0.4659144630432129, "loss": 1.0151728391647339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.408594408648455, "step_time": 0.4261865844726562} +{"epoch": 0, "iter": 3207, "iter_tflops": 47.92337191602835, "iter_time": 0.430501708984375, "loss": 1.106951355934143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.6467166712435, "step_time": 0.39946573257446283} +{"epoch": 0, "iter": 3208, "iter_tflops": 43.30822855719217, "iter_time": 0.47637814331054684, "loss": 0.8359443545341492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49868932308898, "step_time": 0.4436919364929199} +{"epoch": 0, "iter": 3209, "iter_tflops": 43.79964056051528, "iter_time": 0.4710333976745606, "loss": 0.008913530968129635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.807290161135555, "step_time": 0.4315470180511474} +{"epoch": 0, "iter": 3210, "iter_tflops": 51.64220477658916, "iter_time": 0.39950063323974605, "loss": 0.012340269051492214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.646313826506024, "step_time": 0.3642089328765869} +{"epoch": 0, "iter": 3211, "iter_tflops": 58.57941700656935, "iter_time": 0.3521901473999024, "loss": 0.0033939178101718426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.24003955048022, "step_time": 0.3211563014984131} +{"epoch": 0, "iter": 3212, "iter_tflops": 59.47339571350192, "iter_time": 0.34689617538452144, "loss": 0.011136976070702076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.95231025728698, "step_time": 0.31763448333740235} +{"epoch": 0, "iter": 3213, "iter_tflops": 45.39069004397772, "iter_time": 0.45452257919311523, "loss": 0.10416159778833389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.675755456558214, "step_time": 0.41531514358520505} +{"epoch": 0, "iter": 3214, "iter_tflops": 44.787636432050874, "iter_time": 0.46064260482788083, "loss": 0.11583871394395828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8257789274562, "step_time": 0.42254509735107426} +{"epoch": 0, "iter": 3215, "iter_tflops": 49.936656530432884, "iter_time": 0.41314527130126955, "loss": 0.20142365992069244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.105026791553016, "step_time": 0.3813156509399414} +{"epoch": 0, "iter": 3216, "iter_tflops": 53.72224303032241, "iter_time": 0.3840326156616211, "loss": 0.16920387744903564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.58352786820982, "step_time": 0.352165433883667} +{"epoch": 0, "iter": 3217, "iter_tflops": 44.230446774354704, "iter_time": 0.4664455146789551, "loss": 0.8339841961860657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.084579662997115, "step_time": 0.42905841445922854} +{"epoch": 0, "iter": 3218, "iter_tflops": 43.33666224350839, "iter_time": 0.4760655860900879, "loss": 0.7020914554595947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.668840682762095, "step_time": 0.4420742664337159} +{"epoch": 0, "iter": 3219, "iter_tflops": 48.03696342104142, "iter_time": 0.4294837150573731, "loss": 0.7431726455688477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72764575898741, "step_time": 0.39884075927734375} +{"epoch": 0, "iter": 3220, "iter_tflops": 44.43913710770479, "iter_time": 0.46425504302978515, "loss": 0.9089385867118835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.957030265837815, "step_time": 0.43019956398010256} +{"epoch": 0, "iter": 3221, "iter_tflops": 43.77357534396675, "iter_time": 0.47131387710571293, "loss": 0.5406249761581421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.9249183340207, "step_time": 0.43048781776428213} +{"epoch": 0, "iter": 3222, "iter_tflops": 46.29692458643762, "iter_time": 0.44562557220458987, "loss": 0.399803102016449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87208143088979, "step_time": 0.39773020362854006} +{"epoch": 0, "iter": 3223, "iter_tflops": 46.69086175950326, "iter_time": 0.4418657684326172, "loss": 0.3761782944202423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3948836123377, "step_time": 0.40938865280151365} +{"epoch": 0, "iter": 3224, "iter_tflops": 47.507051081358746, "iter_time": 0.4342743453979493, "loss": 0.5409224629402161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.629368757203956, "step_time": 0.3995999565124512} +{"epoch": 0, "iter": 3225, "iter_tflops": 27.811379733954876, "iter_time": 0.7418220062255859, "loss": 0.6844953894615173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.34629333746699, "step_time": 0.7030221252441406} +{"epoch": 0, "iter": 3226, "iter_tflops": 10.795234626570178, "iter_time": 1.9111296997070313, "loss": 0.714815080165863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.39641731814297, "step_time": 1.6642787170410156} +{"epoch": 0, "iter": 3227, "iter_tflops": 10.968773307271784, "iter_time": 1.8808934173583984, "loss": 0.5494273900985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.709149653917338, "step_time": 1.6233260345458984} +{"epoch": 0, "iter": 3228, "iter_tflops": 32.38447816058714, "iter_time": 0.6370673446655273, "loss": 0.44720304012298584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34418917821937, "step_time": 0.4990082988739014} +{"epoch": 0, "iter": 3229, "iter_tflops": 20.58454339891098, "iter_time": 0.7858460159301758, "loss": 0.24646596610546112, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 21.681874226913983, "step_time": 0.7460739440917968} +{"epoch": 0, "iter": 3230, "iter_tflops": 7.704422755138264, "iter_time": 2.0996097869873047, "loss": 0.18618111312389374, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 10.474152062196609, "step_time": 1.5444000930786133} +{"epoch": 0, "iter": 3231, "iter_tflops": 10.086715416435116, "iter_time": 1.603721405029297, "loss": 0.2806413173675537, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 11.650233581077758, "step_time": 1.3884941711425782} +{"epoch": 0, "iter": 3232, "iter_tflops": 16.575094594791832, "iter_time": 0.9759390106201172, "loss": 0.26898032426834106, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 21.80151677095047, "step_time": 0.7419796333312988} +{"epoch": 0, "iter": 3233, "iter_tflops": 13.102250775275037, "iter_time": 1.1970996398925782, "loss": 0.46157553791999817, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.830571136212793, "step_time": 1.134060157775879} +{"epoch": 0, "iter": 3234, "iter_tflops": 22.75310837514945, "iter_time": 0.6893431625366211, "loss": 0.18882793188095093, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.243146141344006, "step_time": 0.6213448829650878} +{"epoch": 0, "iter": 3235, "iter_tflops": 25.329064477524643, "iter_time": 0.6192372283935547, "loss": 0.2828781008720398, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 27.21414140041644, "step_time": 0.5763437271118165} +{"epoch": 0, "iter": 3236, "iter_tflops": 24.59736019729162, "iter_time": 0.6376578445434571, "loss": 0.26947975158691406, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.463719644899943, "step_time": 0.5926868896484375} +{"epoch": 0, "iter": 3237, "iter_tflops": 20.805804926049532, "iter_time": 0.8762868118286133, "loss": 0.10912638902664185, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 22.156752501018868, "step_time": 0.8228576126098633} +{"epoch": 0, "iter": 3238, "iter_tflops": 12.043902096001677, "iter_time": 1.5137828521728516, "loss": 0.060698963701725006, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 14.420929759620686, "step_time": 1.2642633152008056} +{"epoch": 0, "iter": 3239, "iter_tflops": 39.5835223182709, "iter_time": 0.4605919685363769, "loss": 0.05935719609260559, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 43.48613397078851, "step_time": 0.41925668716430664} +{"epoch": 0, "iter": 3240, "iter_tflops": 42.78045299323481, "iter_time": 0.42617249679565433, "loss": 0.09210498631000519, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 46.73711277408287, "step_time": 0.39009368324279786} +{"epoch": 0, "iter": 3241, "iter_tflops": 23.117118508814468, "iter_time": 0.8924595642089844, "loss": 0.06241772696375847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.78346762716027, "step_time": 0.8324538688659668} +{"epoch": 0, "iter": 3242, "iter_tflops": 11.928465136689038, "iter_time": 1.729568161010742, "loss": 0.033884551376104355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.69038673645834, "step_time": 1.506976676940918} +{"epoch": 0, "iter": 3243, "iter_tflops": 11.093172456881906, "iter_time": 1.8598010253906252, "loss": 0.036765437573194504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.252397928520818, "step_time": 1.5567819213867187} +{"epoch": 0, "iter": 3244, "iter_tflops": 38.471092144177376, "iter_time": 0.5362752227783203, "loss": 0.025497883558273315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.243127706562724, "step_time": 0.4770953121185303} +{"epoch": 0, "iter": 3245, "iter_tflops": 20.041520306517356, "iter_time": 0.7479200744628907, "loss": 0.2552895247936249, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 21.28682122996342, "step_time": 0.7041659812927247} +{"epoch": 0, "iter": 3246, "iter_tflops": 17.6940045885215, "iter_time": 0.8471488342285156, "loss": 0.30347567796707153, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 19.74511402193404, "step_time": 0.7591475715637207} +{"epoch": 0, "iter": 3247, "iter_tflops": 24.64649135795538, "iter_time": 0.6081780624389649, "loss": 0.24098153412342072, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.1363915619835, "step_time": 0.5735089836120606} +{"epoch": 0, "iter": 3248, "iter_tflops": 26.901325742979544, "iter_time": 0.5572013626098633, "loss": 0.25745612382888794, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.63626277445193, "step_time": 0.5234431419372558} +{"epoch": 0, "iter": 3249, "iter_tflops": 28.62210374789, "iter_time": 0.7208098220825195, "loss": 0.0783795565366745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.248789507677547, "step_time": 0.6820469131469727} +{"epoch": 0, "iter": 3250, "iter_tflops": 14.799722730339544, "iter_time": 1.3940189208984375, "loss": 0.1492520570755005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.483760425339653, "step_time": 1.1161740379333496} +{"epoch": 0, "iter": 3251, "iter_tflops": 37.513441079859746, "iter_time": 0.5499653701782227, "loss": 0.1104150041937828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73530252562647, "step_time": 0.4414455966949463} +{"epoch": 0, "iter": 3252, "iter_tflops": 43.19507915140125, "iter_time": 0.47762601470947263, "loss": 0.07479831576347351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35359710891026, "step_time": 0.43568165397644043} +{"epoch": 0, "iter": 3253, "iter_tflops": 23.098292968572974, "iter_time": 0.8931869354248047, "loss": 0.8924185037612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.692741010925552, "step_time": 0.8355124893188477} +{"epoch": 0, "iter": 3254, "iter_tflops": 7.781763266412933, "iter_time": 2.6512106323242186, "loss": 0.9009072184562683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.864421388137263, "step_time": 2.3274044189453127} +{"epoch": 0, "iter": 3255, "iter_tflops": 12.65540116531806, "iter_time": 1.630220428466797, "loss": 0.9265493750572205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.120701316029367, "step_time": 1.3644270248413086} +{"epoch": 0, "iter": 3256, "iter_tflops": 34.776245783792845, "iter_time": 0.5932524642944336, "loss": 0.838438093662262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.813659962118514, "step_time": 0.5455989589691161} +{"epoch": 0, "iter": 3257, "iter_tflops": 9.032218851007904, "iter_time": 1.8227360839843751, "loss": 0.30530208349227905, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 9.607929053836159, "step_time": 1.7135171508789064} +{"epoch": 0, "iter": 3258, "iter_tflops": 16.45742176500957, "iter_time": 1.0003602905273437, "loss": 0.3424963355064392, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.692326271951995, "step_time": 0.594509506225586} +{"epoch": 0, "iter": 3259, "iter_tflops": 29.930377363898526, "iter_time": 0.550054916381836, "loss": 0.2697180211544037, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.851465658763676, "step_time": 0.5168789215087891} +{"epoch": 0, "iter": 3260, "iter_tflops": 27.994039555917123, "iter_time": 0.5881020202636718, "loss": 0.3745580315589905, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 29.69321503015361, "step_time": 0.5544482536315918} +{"epoch": 0, "iter": 3261, "iter_tflops": 28.66974393740044, "iter_time": 0.719612060546875, "loss": 1.16410493850708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.21260609647792, "step_time": 0.6828637504577637} +{"epoch": 0, "iter": 3262, "iter_tflops": 10.402464059716962, "iter_time": 1.9832890930175782, "loss": 0.8043547868728638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.310566740140878, "step_time": 1.6758849487304688} +{"epoch": 0, "iter": 3263, "iter_tflops": 45.81130302598673, "iter_time": 0.45034941482543944, "loss": 0.9934640526771545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.567086756587194, "step_time": 0.41622566223144536} +{"epoch": 0, "iter": 3264, "iter_tflops": 50.7363932530485, "iter_time": 0.40663303375244136, "loss": 0.922406792640686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.838300180828426, "step_time": 0.3762168674468994} +{"epoch": 0, "iter": 3265, "iter_tflops": 19.333128626020383, "iter_time": 1.0671368255615234, "loss": 0.09136226773262024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.141664073529412, "step_time": 1.0242993545532226} +{"epoch": 0, "iter": 3266, "iter_tflops": 38.56934344683116, "iter_time": 0.5349091186523438, "loss": 0.1450873464345932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32435033903044, "step_time": 0.41827400398254394} +{"epoch": 0, "iter": 3267, "iter_tflops": 46.30683620484106, "iter_time": 0.4455301895141601, "loss": 0.24831512570381165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.19250174732547, "step_time": 0.4110393543243408} +{"epoch": 0, "iter": 3268, "iter_tflops": 55.95197823714944, "iter_time": 0.3687285804748535, "loss": 0.18646307289600372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.87079347129034, "step_time": 0.33893255424499513} +{"epoch": 0, "iter": 3269, "iter_tflops": 18.082715482915006, "iter_time": 0.77028133392334, "loss": 0.003785719396546483, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 18.99496026913922, "step_time": 0.7332880935668945} +{"epoch": 0, "iter": 3270, "iter_tflops": 28.417413844707916, "iter_time": 0.4901493949890136, "loss": 0.004643033724278212, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 31.663063783603977, "step_time": 0.4399062042236328} +{"epoch": 0, "iter": 3271, "iter_tflops": 30.442465262464616, "iter_time": 0.4575443572998047, "loss": 0.01491919532418251, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 33.80895777833937, "step_time": 0.41198484420776366} +{"epoch": 0, "iter": 3272, "iter_tflops": 31.3793640465019, "iter_time": 0.44388338088989254, "loss": 0.01119445264339447, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 34.68439818018742, "step_time": 0.4015862731933594} +{"epoch": 0, "iter": 3273, "iter_tflops": 20.59695089604576, "iter_time": 1.0016576538085937, "loss": 0.6897807121276855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.15465630375918, "step_time": 0.931230583190918} +{"epoch": 0, "iter": 3274, "iter_tflops": 15.266657390693144, "iter_time": 1.3513824920654296, "loss": 0.9095608592033386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.440797344479126, "step_time": 1.118774482727051} +{"epoch": 0, "iter": 3275, "iter_tflops": 40.59064582759922, "iter_time": 0.508272117614746, "loss": 0.6119093298912048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.707061889142615, "step_time": 0.46147281074523927} +{"epoch": 0, "iter": 3276, "iter_tflops": 42.533665584422394, "iter_time": 0.4850532684326172, "loss": 0.636894166469574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.336681894214905, "step_time": 0.445243221282959} +{"epoch": 0, "iter": 3277, "iter_tflops": 19.51462956061374, "iter_time": 1.057211639404297, "loss": 1.008316993713379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.04839243939094, "step_time": 0.9801743087768555} +{"epoch": 0, "iter": 3278, "iter_tflops": 13.659055042022038, "iter_time": 1.5104334411621094, "loss": 1.0186866521835327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.361245919883384, "step_time": 1.1883417587280274} +{"epoch": 0, "iter": 3279, "iter_tflops": 41.857233749831906, "iter_time": 0.4928919486999511, "loss": 0.8264299035072327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.120973674855904, "step_time": 0.45723954582214354} +{"epoch": 0, "iter": 3280, "iter_tflops": 41.775151084224326, "iter_time": 0.4938604164123535, "loss": 0.7219610810279846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90472480763146, "step_time": 0.45944148635864257} +{"epoch": 0, "iter": 3281, "iter_tflops": 30.137272106373906, "iter_time": 0.6845707015991211, "loss": 1.0893409252166748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.91134190648878, "step_time": 0.6465128784179687} +{"epoch": 0, "iter": 3282, "iter_tflops": 9.8000700394315, "iter_time": 2.1051985778808593, "loss": 1.0313363075256348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.858325205200003, "step_time": 1.7397982559204104} +{"epoch": 0, "iter": 3283, "iter_tflops": 21.0423625955731, "iter_time": 0.9804551849365235, "loss": 0.9697380065917969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.323165713163352, "step_time": 0.8845751800537109} +{"epoch": 0, "iter": 3284, "iter_tflops": 42.40930381311284, "iter_time": 0.4864756469726563, "loss": 0.9256124496459961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.679275019566305, "step_time": 0.4516510715484619} +{"epoch": 0, "iter": 3285, "iter_tflops": 17.49681608852278, "iter_time": 0.9597011260986329, "loss": 0.2423526793718338, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 18.313026074795943, "step_time": 0.9169273300170899} +{"epoch": 0, "iter": 3286, "iter_tflops": 18.92678536584483, "iter_time": 0.8871931381225585, "loss": 0.3207853436470032, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 22.524710467689275, "step_time": 0.7454796867370607} +{"epoch": 0, "iter": 3287, "iter_tflops": 31.286455472968875, "iter_time": 0.536708740234375, "loss": 0.4175834655761719, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 33.29509157553205, "step_time": 0.5043300170898438} +{"epoch": 0, "iter": 3288, "iter_tflops": 28.85689503665814, "iter_time": 0.5818960800170899, "loss": 0.17845895886421204, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 30.787179870186787, "step_time": 0.5454125442504882} +{"epoch": 0, "iter": 3289, "iter_tflops": 36.76785930072717, "iter_time": 0.5611176147460938, "loss": 0.08661676943302155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.54976736531402, "step_time": 0.5216489219665528} +{"epoch": 0, "iter": 3290, "iter_tflops": 12.062083148505671, "iter_time": 1.7104088287353518, "loss": 0.08299875259399414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.912262798570321, "step_time": 1.383498519897461} +{"epoch": 0, "iter": 3291, "iter_tflops": 12.849428778340002, "iter_time": 1.6056039428710938, "loss": 0.026295550167560577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.133596864428844, "step_time": 1.363264377593994} +{"epoch": 0, "iter": 3292, "iter_tflops": 43.76939225875107, "iter_time": 0.47135892105102545, "loss": 0.03896123915910721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36084250394967, "step_time": 0.4266074047088623} +{"epoch": 0, "iter": 3293, "iter_tflops": 10.367959240598545, "iter_time": 1.3120284729003906, "loss": 0.3826240003108978, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 11.012346842975122, "step_time": 1.235255111694336} +{"epoch": 0, "iter": 3294, "iter_tflops": 16.478258654983616, "iter_time": 0.8255154876708984, "loss": 0.3790387213230133, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 19.903844627323455, "step_time": 0.6834387016296387} +{"epoch": 0, "iter": 3295, "iter_tflops": 22.82492747052393, "iter_time": 0.5959737548828125, "loss": 0.21871434152126312, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 24.420838623557366, "step_time": 0.5570266418457032} +{"epoch": 0, "iter": 3296, "iter_tflops": 22.1688415674915, "iter_time": 0.613611572265625, "loss": 0.40133756399154663, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 23.647795079644236, "step_time": 0.5752357749938964} +{"epoch": 0, "iter": 3297, "iter_tflops": 44.94529243244134, "iter_time": 0.4590267944335938, "loss": 0.06974557042121887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75898968684185, "step_time": 0.41462042617797856} +{"epoch": 0, "iter": 3298, "iter_tflops": 42.151090329872716, "iter_time": 0.4894557495117187, "loss": 0.04586311802268028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.63683797721146, "step_time": 0.4423776226043702} +{"epoch": 0, "iter": 3299, "iter_tflops": 47.93332866249977, "iter_time": 0.4304122848510742, "loss": 0.023046215996146202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.85894113669777, "step_time": 0.3903047065734863} +{"epoch": 0, "iter": 3300, "iter_tflops": 47.942755747475985, "iter_time": 0.4303276519775391, "loss": 0.006423529703170061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.93879245373795, "step_time": 0.38971598243713373} +{"epoch": 0, "iter": 3301, "iter_tflops": 19.416952389752648, "iter_time": 1.06252995300293, "loss": 1.0983843803405762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.730856327632047, "step_time": 0.9951877136230468} +{"epoch": 0, "iter": 3302, "iter_tflops": 16.51165543067799, "iter_time": 1.2494866790771486, "loss": 0.8197407722473145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.76738202327861, "step_time": 1.0993058853149413} +{"epoch": 0, "iter": 3303, "iter_tflops": 44.32249516155249, "iter_time": 0.4654768066406251, "loss": 0.9473100304603577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90554807206557, "step_time": 0.4306618824005127} +{"epoch": 0, "iter": 3304, "iter_tflops": 40.69934052818918, "iter_time": 0.5069146881103516, "loss": 0.7237861752510071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44876033711874, "step_time": 0.4748373336791992} +{"epoch": 0, "iter": 3305, "iter_tflops": 29.24693537696266, "iter_time": 0.7054104385375977, "loss": 0.21707333624362946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.94015891188926, "step_time": 0.6668063201904297} +{"epoch": 0, "iter": 3306, "iter_tflops": 17.386785322808556, "iter_time": 1.186596206665039, "loss": 0.10215987265110016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.740299440016162, "step_time": 0.9072481021881103} +{"epoch": 0, "iter": 3307, "iter_tflops": 48.55795578624092, "iter_time": 0.42487565994262694, "loss": 0.13896414637565613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.779260813356416, "step_time": 0.39089394569396974} +{"epoch": 0, "iter": 3308, "iter_tflops": 52.933674045706454, "iter_time": 0.3897536659240723, "loss": 0.1497938483953476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.1914879624158, "step_time": 0.36073713493347165} +{"epoch": 0, "iter": 3309, "iter_tflops": 36.251325727448894, "iter_time": 0.5691128005981445, "loss": 0.9200820922851562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.794151254965136, "step_time": 0.5318093795776367} +{"epoch": 0, "iter": 3310, "iter_tflops": 35.933486137738306, "iter_time": 0.574146728515625, "loss": 0.743042528629303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.850093190529925, "step_time": 0.4600011291503906} +{"epoch": 0, "iter": 3311, "iter_tflops": 43.972786646885744, "iter_time": 0.4691786689758301, "loss": 0.8771987557411194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.556726420864564, "step_time": 0.4338207244873047} +{"epoch": 0, "iter": 3312, "iter_tflops": 44.710785375437794, "iter_time": 0.46143437957763667, "loss": 0.9714175462722778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.96007651154789, "step_time": 0.43017223930358883} +{"epoch": 0, "iter": 3313, "iter_tflops": 29.735492241213915, "iter_time": 0.6938204803466798, "loss": 0.13540837168693542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.51135543130426, "step_time": 0.6547193298339844} +{"epoch": 0, "iter": 3314, "iter_tflops": 17.027791809387345, "iter_time": 1.2116129760742187, "loss": 0.07735582441091537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.637875394662803, "step_time": 0.9113529052734374} +{"epoch": 0, "iter": 3315, "iter_tflops": 44.728660236101454, "iter_time": 0.4612499771118164, "loss": 0.14290770888328552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.283971137901766, "step_time": 0.4186167030334472} +{"epoch": 0, "iter": 3316, "iter_tflops": 42.99191765294941, "iter_time": 0.47988307189941404, "loss": 0.06531617045402527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.16106958275647, "step_time": 0.4374602546691895} +{"epoch": 0, "iter": 3317, "iter_tflops": 24.082303357816482, "iter_time": 0.8566910400390625, "loss": 0.5433515906333923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.75016377092099, "step_time": 0.8012024192810059} +{"epoch": 0, "iter": 3318, "iter_tflops": 28.217140616497026, "iter_time": 0.7311546478271485, "loss": 0.4676534831523895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.287645723605653, "step_time": 0.6594006366729737} +{"epoch": 0, "iter": 3319, "iter_tflops": 36.79675613938185, "iter_time": 0.5606769638061523, "loss": 0.4120277464389801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22528928674112, "step_time": 0.5128886299133301} +{"epoch": 0, "iter": 3320, "iter_tflops": 37.12731679687049, "iter_time": 0.5556850128173828, "loss": 0.3397844731807709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.934626076913155, "step_time": 0.5166216773986817} +{"epoch": 0, "iter": 3321, "iter_tflops": 20.115155097836634, "iter_time": 1.025649238586426, "loss": 0.16286243498325348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.496802133394215, "step_time": 0.9597284927368164} +{"epoch": 0, "iter": 3322, "iter_tflops": 16.78076683853364, "iter_time": 1.2294487915039063, "loss": 0.14810243248939514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.909227256921383, "step_time": 1.0910595779418943} +{"epoch": 0, "iter": 3323, "iter_tflops": 38.06187964376728, "iter_time": 0.5420408477783204, "loss": 0.1006244644522667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57932613572335, "step_time": 0.496186336517334} +{"epoch": 0, "iter": 3324, "iter_tflops": 42.691719265965844, "iter_time": 0.4832574996948242, "loss": 0.11575689166784286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.453965802492434, "step_time": 0.44411910057067877} +{"epoch": 0, "iter": 3325, "iter_tflops": 32.150331078585964, "iter_time": 0.64170703125, "loss": 0.19354884326457977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.92305830552669, "step_time": 0.5907584991455078} +{"epoch": 0, "iter": 3326, "iter_tflops": 25.100778152361762, "iter_time": 0.8219304351806641, "loss": 0.19787120819091797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.42757903592686, "step_time": 0.5512270374298095} +{"epoch": 0, "iter": 3327, "iter_tflops": 43.56347568291984, "iter_time": 0.4735869483947754, "loss": 0.22467711567878723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.66047318787935, "step_time": 0.4328763885498047} +{"epoch": 0, "iter": 3328, "iter_tflops": 38.428615504652655, "iter_time": 0.5368679885864258, "loss": 0.24407324194908142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.99225155748639, "step_time": 0.4913071517944336} +{"epoch": 0, "iter": 3329, "iter_tflops": 25.51982132038564, "iter_time": 0.7322018814086915, "loss": 0.23659977316856384, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 27.81931502849426, "step_time": 0.6716794128417969} +{"epoch": 0, "iter": 3330, "iter_tflops": 28.537842470049146, "iter_time": 0.6547678298950195, "loss": 0.4171103537082672, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 30.74177060010382, "step_time": 0.6078264465332032} +{"epoch": 0, "iter": 3331, "iter_tflops": 29.443784277796016, "iter_time": 0.6346215896606445, "loss": 0.19563592970371246, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 31.602581760176026, "step_time": 0.5912700843811035} +{"epoch": 0, "iter": 3332, "iter_tflops": 30.22682099933459, "iter_time": 0.6181814880371094, "loss": 0.300326406955719, "lr": 3e-05, "seqlen": 7440.0, "step_tflops": 32.482733868050765, "step_time": 0.5752490310668945} +{"epoch": 0, "iter": 3333, "iter_tflops": 35.87552862138123, "iter_time": 0.5750742721557618, "loss": 0.7373083829879761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.658265663103464, "step_time": 0.520221778869629} +{"epoch": 0, "iter": 3334, "iter_tflops": 36.48764238587052, "iter_time": 0.5654268722534179, "loss": 0.721765398979187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9693169032168, "step_time": 0.516173282623291} +{"epoch": 0, "iter": 3335, "iter_tflops": 38.72272965822918, "iter_time": 0.5327902679443359, "loss": 0.71480792760849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20800233169288, "step_time": 0.48879578208923335} +{"epoch": 0, "iter": 3336, "iter_tflops": 40.68518693778816, "iter_time": 0.507091033935547, "loss": 0.8232461810112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.412979973998425, "step_time": 0.46452846717834473} +{"epoch": 0, "iter": 3337, "iter_tflops": 23.603485486969817, "iter_time": 0.8740697860717773, "loss": 0.6781080365180969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.60517534083571, "step_time": 0.8057392005920411} +{"epoch": 0, "iter": 3338, "iter_tflops": 37.698694562197325, "iter_time": 0.547262809753418, "loss": 0.8429061770439148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35888683156707, "step_time": 0.4988309669494629} +{"epoch": 0, "iter": 3339, "iter_tflops": 45.11595535840647, "iter_time": 0.4572904052734375, "loss": 0.9332610964775085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86486237838625, "step_time": 0.42220713424682615} +{"epoch": 0, "iter": 3340, "iter_tflops": 45.03883669782229, "iter_time": 0.4580734100341797, "loss": 0.8699394464492798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.4141988500207, "step_time": 0.4261372489929199} +{"epoch": 0, "iter": 3341, "iter_tflops": 41.6968803059458, "iter_time": 0.4947874603271484, "loss": 0.1695619374513626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42467819703663, "step_time": 0.4541824913024903} +{"epoch": 0, "iter": 3342, "iter_tflops": 35.92304688844361, "iter_time": 0.5743135757446288, "loss": 0.21704962849617004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.68039054837008, "step_time": 0.5199317150115967} +{"epoch": 0, "iter": 3343, "iter_tflops": 45.442973902771115, "iter_time": 0.4539996337890625, "loss": 0.16033530235290527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.457896060593264, "step_time": 0.4171445846557617} +{"epoch": 0, "iter": 3344, "iter_tflops": 39.5857895964197, "iter_time": 0.5211742324829102, "loss": 0.15762917697429657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54774001276088, "step_time": 0.4737580757141114} +{"epoch": 0, "iter": 3345, "iter_tflops": 33.74890417425728, "iter_time": 0.6113115081787109, "loss": 0.7423996925354004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.26546645389061, "step_time": 0.5536249904632569} +{"epoch": 0, "iter": 3346, "iter_tflops": 37.43043784954151, "iter_time": 0.5511849365234375, "loss": 0.9521471858024597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.83124362417505, "step_time": 0.49319818687438965} +{"epoch": 0, "iter": 3347, "iter_tflops": 36.76741087382121, "iter_time": 0.5611244583129884, "loss": 1.1177172660827637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05050790345127, "step_time": 0.5151268882751464} +{"epoch": 0, "iter": 3348, "iter_tflops": 41.66145449230171, "iter_time": 0.4952081909179687, "loss": 1.0390022993087769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.18600074572738, "step_time": 0.4565815334320068} +{"epoch": 0, "iter": 3349, "iter_tflops": 21.099198823370447, "iter_time": 0.9778140716552733, "loss": 0.6326216459274292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.672177039739584, "step_time": 0.9099740829467773} +{"epoch": 0, "iter": 3350, "iter_tflops": 18.096250506696034, "iter_time": 1.1400755920410155, "loss": 0.5779770612716675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.538821131665774, "step_time": 0.9578562068939209} +{"epoch": 0, "iter": 3351, "iter_tflops": 41.455910027337, "iter_time": 0.49766350555419925, "loss": 0.5029041171073914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1770866869785, "step_time": 0.4566716232299804} +{"epoch": 0, "iter": 3352, "iter_tflops": 44.783725286675754, "iter_time": 0.46068283462524406, "loss": 0.5360723733901978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78976967007864, "step_time": 0.4228569564819336} +{"epoch": 0, "iter": 3353, "iter_tflops": 33.190637374443696, "iter_time": 0.6215937728881835, "loss": 0.38368403911590576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.250790431814075, "step_time": 0.5691212043762208} +{"epoch": 0, "iter": 3354, "iter_tflops": 9.78054090017476, "iter_time": 2.109402099609375, "loss": 0.31965935230255127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.969332469339223, "step_time": 1.880797538757324} +{"epoch": 0, "iter": 3355, "iter_tflops": 14.231375859102299, "iter_time": 1.4496907196044921, "loss": 0.3122970163822174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.735050562037557, "step_time": 1.1012029800415037} +{"epoch": 0, "iter": 3356, "iter_tflops": 26.844588676947833, "iter_time": 0.7685382614135743, "loss": 0.2907589375972748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.90849260599439, "step_time": 0.5910049953460693} +{"epoch": 0, "iter": 3357, "iter_tflops": 23.201116866566995, "iter_time": 0.6760320968627931, "loss": 0.3668178915977478, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.704180488230225, "step_time": 0.6349006271362305} +{"epoch": 0, "iter": 3358, "iter_tflops": 6.977792550751757, "iter_time": 2.247802520751953, "loss": 0.31184014678001404, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 8.828548264702627, "step_time": 1.7765887680053714} +{"epoch": 0, "iter": 3359, "iter_tflops": 12.385688806704245, "iter_time": 1.266356674194336, "loss": 0.3212042450904846, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.75554628876661, "step_time": 1.1402454948425294} +{"epoch": 0, "iter": 3360, "iter_tflops": 28.008172885282857, "iter_time": 0.5600043869018555, "loss": 0.4859771430492401, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.825070888369737, "step_time": 0.5258897705078125} +{"epoch": 0, "iter": 3361, "iter_tflops": 19.793358990373186, "iter_time": 0.7779506988525391, "loss": 0.3421952426433563, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 21.02369089422495, "step_time": 0.7324240798950195} +{"epoch": 0, "iter": 3362, "iter_tflops": 10.994454987008373, "iter_time": 1.4005475921630857, "loss": 0.2474101036787033, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 13.051507331280062, "step_time": 1.179806827545166} +{"epoch": 0, "iter": 3363, "iter_tflops": 28.492580897696303, "iter_time": 0.540430419921875, "loss": 0.32826077938079834, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 30.334050481040343, "step_time": 0.507622859954834} +{"epoch": 0, "iter": 3364, "iter_tflops": 26.87994285425897, "iter_time": 0.5728530578613281, "loss": 0.35057365894317627, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.69022406258034, "step_time": 0.5367074661254883} +{"epoch": 0, "iter": 3365, "iter_tflops": 30.26514944081306, "iter_time": 0.6816782302856444, "loss": 0.6690022349357605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.49483773442025, "step_time": 0.6349037246704101} +{"epoch": 0, "iter": 3366, "iter_tflops": 8.537652311940118, "iter_time": 2.4164832153320317, "loss": 0.801477313041687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.706963721411151, "step_time": 1.762292427062988} +{"epoch": 0, "iter": 3367, "iter_tflops": 11.47700022978796, "iter_time": 1.797603302001953, "loss": 1.0490822792053223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.404453666881675, "step_time": 1.4322718505859373} +{"epoch": 0, "iter": 3368, "iter_tflops": 36.70630631715783, "iter_time": 0.5620585556030273, "loss": 0.948780357837677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.096941546702816, "step_time": 0.5145303535461425} +{"epoch": 0, "iter": 3369, "iter_tflops": 18.497929357853753, "iter_time": 1.030244171142578, "loss": 0.24195604026317596, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 19.503631548218817, "step_time": 0.9771197662353516} +{"epoch": 0, "iter": 3370, "iter_tflops": 11.722599327252356, "iter_time": 1.6256960906982423, "loss": 0.25819653272628784, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 12.942723753615082, "step_time": 1.4724399795532226} +{"epoch": 0, "iter": 3371, "iter_tflops": 9.372815334461935, "iter_time": 2.0332614288330078, "loss": 0.24940438568592072, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 11.353938424591458, "step_time": 1.6784822311401366} +{"epoch": 0, "iter": 3372, "iter_tflops": 32.93392291656791, "iter_time": 0.578655143737793, "loss": 0.31999868154525757, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 35.13901132672829, "step_time": 0.5423426322937012} +{"epoch": 0, "iter": 3373, "iter_tflops": 29.288990818501485, "iter_time": 0.6041952514648438, "loss": 0.2144027203321457, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 31.55798119307789, "step_time": 0.5607541580200196} +{"epoch": 0, "iter": 3374, "iter_tflops": 30.33988621024134, "iter_time": 0.5832674865722656, "loss": 0.41519472002983093, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 32.730187595727635, "step_time": 0.5406711807250977} +{"epoch": 0, "iter": 3375, "iter_tflops": 30.06222892073137, "iter_time": 0.5886545944213867, "loss": 0.40045881271362305, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 32.19276174203999, "step_time": 0.5496971435546875} +{"epoch": 0, "iter": 3376, "iter_tflops": 33.125493670560935, "iter_time": 0.5342190322875977, "loss": 0.2708788812160492, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 35.1581249166246, "step_time": 0.5033337020874024} +{"epoch": 0, "iter": 3377, "iter_tflops": 24.416978422616307, "iter_time": 0.5138453903198242, "loss": 0.005446110386401415, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 26.537260044236834, "step_time": 0.472790023803711} +{"epoch": 0, "iter": 3378, "iter_tflops": 7.47683383467478, "iter_time": 1.6780567932128907, "loss": 0.00516679510474205, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 10.410580121675924, "step_time": 1.2051731662750245} +{"epoch": 0, "iter": 3379, "iter_tflops": 33.21625459033338, "iter_time": 0.3777232551574707, "loss": 0.005348334088921547, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 36.639930930694845, "step_time": 0.3424283695220947} +{"epoch": 0, "iter": 3380, "iter_tflops": 35.81228164867334, "iter_time": 0.3503421516418457, "loss": 0.006344981491565704, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 39.400550359568626, "step_time": 0.3184359531402588} +{"epoch": 0, "iter": 3381, "iter_tflops": 21.504642842057283, "iter_time": 0.9593785705566406, "loss": 0.8056635856628418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.50580873012505, "step_time": 0.9167008285522462} +{"epoch": 0, "iter": 3382, "iter_tflops": 14.400703806736828, "iter_time": 1.4326448059082033, "loss": 0.79937744140625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.677764219064393, "step_time": 1.0484470329284667} +{"epoch": 0, "iter": 3383, "iter_tflops": 39.50611154742765, "iter_time": 0.5222253646850585, "loss": 0.8688047528266907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.08775671157247, "step_time": 0.4788156795501709} +{"epoch": 0, "iter": 3384, "iter_tflops": 34.11067067963561, "iter_time": 0.6048281402587891, "loss": 0.9648983478546143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.14207717151654, "step_time": 0.5554641819000244} +{"epoch": 0, "iter": 3385, "iter_tflops": 36.469065272756154, "iter_time": 0.5657148971557617, "loss": 0.8468400835990906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.268397669764006, "step_time": 0.5123395690917969} +{"epoch": 0, "iter": 3386, "iter_tflops": 38.65960612550095, "iter_time": 0.5336602096557618, "loss": 0.7579680681228638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.78978431161974, "step_time": 0.48214997673034665} +{"epoch": 0, "iter": 3387, "iter_tflops": 41.57945496041613, "iter_time": 0.4961847991943359, "loss": 0.8402795791625977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42500325757609, "step_time": 0.4541792411804199} +{"epoch": 0, "iter": 3388, "iter_tflops": 37.23970199247286, "iter_time": 0.5540080184936524, "loss": 0.8414157032966614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51533544023581, "step_time": 0.5092168998718262} +{"epoch": 0, "iter": 3389, "iter_tflops": 25.883891281072, "iter_time": 0.7970630569458008, "loss": 0.8138136863708496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.771648823025103, "step_time": 0.7428832778930664} +{"epoch": 0, "iter": 3390, "iter_tflops": 9.498340074774228, "iter_time": 2.1720735778808593, "loss": 0.7125030755996704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.162423137301708, "step_time": 1.848262985229492} +{"epoch": 0, "iter": 3391, "iter_tflops": 13.303990286696935, "iter_time": 1.5507447814941406, "loss": 0.692339301109314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.843359247042109, "step_time": 1.3021918640136718} +{"epoch": 0, "iter": 3392, "iter_tflops": 44.83829672459953, "iter_time": 0.4601221504211426, "loss": 0.7422987818717957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68047801570997, "step_time": 0.4238063049316406} +{"epoch": 0, "iter": 3393, "iter_tflops": 21.27446029323257, "iter_time": 0.7931506500244141, "loss": 0.30342531204223633, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 22.51211553112911, "step_time": 0.7495453720092774} +{"epoch": 0, "iter": 3394, "iter_tflops": 14.501926467926092, "iter_time": 1.163559341430664, "loss": 0.28178513050079346, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 15.900165001170224, "step_time": 1.0612375411987305} +{"epoch": 0, "iter": 3395, "iter_tflops": 25.08983046649417, "iter_time": 0.6725375061035157, "loss": 0.18184182047843933, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.098567013221878, "step_time": 0.6226842918395996} +{"epoch": 0, "iter": 3396, "iter_tflops": 24.67447358255519, "iter_time": 0.683858642578125, "loss": 0.48063814640045166, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.461493316580626, "step_time": 0.6376757278442383} +{"epoch": 0, "iter": 3397, "iter_tflops": 19.321873261734336, "iter_time": 1.0677584533691407, "loss": 0.042905617505311966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66828497505919, "step_time": 0.9982005538940429} +{"epoch": 0, "iter": 3398, "iter_tflops": 16.970807584561914, "iter_time": 1.2156813049316404, "loss": 0.02542818710207939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.916514568253117, "step_time": 1.0906392631530761} +{"epoch": 0, "iter": 3399, "iter_tflops": 35.44973701301679, "iter_time": 0.5819815673828124, "loss": 0.05849049612879753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.639247479964524, "step_time": 0.46217386436462404} +{"epoch": 0, "iter": 3400, "iter_tflops": 42.366060600350536, "iter_time": 0.48697219467163083, "loss": 0.07136974483728409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73830741719545, "step_time": 0.44141721534729006} +{"epoch": 0, "iter": 3401, "iter_tflops": 19.97784250811109, "iter_time": 1.0326987762451172, "loss": 0.6973514556884766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.290633048810868, "step_time": 0.9690220794677734} +{"epoch": 0, "iter": 3402, "iter_tflops": 22.75431815105151, "iter_time": 0.9066891555786133, "loss": 0.8594192266464233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.07572382283316, "step_time": 0.7619775428771973} +{"epoch": 0, "iter": 3403, "iter_tflops": 38.92905498990229, "iter_time": 0.5299664611816407, "loss": 0.7339656352996826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.44899847374031, "step_time": 0.486020736694336} +{"epoch": 0, "iter": 3404, "iter_tflops": 36.775571255207495, "iter_time": 0.5609999465942382, "loss": 0.9777041077613831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23859264617095, "step_time": 0.5127190628051758} +{"epoch": 0, "iter": 3405, "iter_tflops": 20.92125833211664, "iter_time": 0.9861306228637695, "loss": 0.8140201568603516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.26880334079642, "step_time": 0.9264572143554688} +{"epoch": 0, "iter": 3406, "iter_tflops": 15.0563073326105, "iter_time": 1.3702625122070313, "loss": 0.8073896765708923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.860010700213117, "step_time": 1.0388259010314942} +{"epoch": 0, "iter": 3407, "iter_tflops": 36.93801003971375, "iter_time": 0.5585328903198242, "loss": 0.8720466494560242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.221146783761476, "step_time": 0.5129414539337158} +{"epoch": 0, "iter": 3408, "iter_tflops": 42.1848858093621, "iter_time": 0.48906363296508787, "loss": 0.7157208323478699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91980071503235, "step_time": 0.44928534507751455} +{"epoch": 0, "iter": 3409, "iter_tflops": 29.827157996595997, "iter_time": 0.6916882095336914, "loss": 0.05475664511322975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.34837984305312, "step_time": 0.637778263092041} +{"epoch": 0, "iter": 3410, "iter_tflops": 9.416309854135974, "iter_time": 2.1909956054687503, "loss": 0.05424993112683296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.253252602738298, "step_time": 1.6837238388061526} +{"epoch": 0, "iter": 3411, "iter_tflops": 14.407592580354725, "iter_time": 1.4319598083496095, "loss": 0.07977021485567093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.134526402753135, "step_time": 1.2040655822753907} +{"epoch": 0, "iter": 3412, "iter_tflops": 21.66321049167183, "iter_time": 0.9523562316894532, "loss": 0.03665713965892792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.424594777375585, "step_time": 0.7011513214111328} +{"epoch": 0, "iter": 3413, "iter_tflops": 15.114103608115636, "iter_time": 0.9350542526245117, "loss": 0.37126654386520386, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 16.223939822710072, "step_time": 0.8710896987915039} +{"epoch": 0, "iter": 3414, "iter_tflops": 13.664740331415564, "iter_time": 1.0342316436767578, "loss": 0.2874335050582886, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 16.165519990141206, "step_time": 0.8742376899719237} +{"epoch": 0, "iter": 3415, "iter_tflops": 25.517633001413355, "iter_time": 0.5538329849243164, "loss": 0.32093966007232666, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.202017144258747, "step_time": 0.5195389289855957} +{"epoch": 0, "iter": 3416, "iter_tflops": 23.963527904628105, "iter_time": 0.5897506790161132, "loss": 0.13089756667613983, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 25.242043989557416, "step_time": 0.559879653930664} +{"epoch": 0, "iter": 3417, "iter_tflops": 20.927604901969676, "iter_time": 0.9858315658569338, "loss": 0.9029126763343811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.860447122427725, "step_time": 0.9437635650634766} +{"epoch": 0, "iter": 3418, "iter_tflops": 12.729224935400834, "iter_time": 1.6207658843994142, "loss": 0.9071154594421387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.15281839071453, "step_time": 1.3615350608825683} +{"epoch": 0, "iter": 3419, "iter_tflops": 42.55983265526618, "iter_time": 0.4847550430297851, "loss": 0.680589497089386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73315567047697, "step_time": 0.45111895751953124} +{"epoch": 0, "iter": 3420, "iter_tflops": 48.410330247056244, "iter_time": 0.42617130279541016, "loss": 1.0587868690490723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.277948256598826, "step_time": 0.3946423721313476} +{"epoch": 0, "iter": 3421, "iter_tflops": 30.12346194648722, "iter_time": 0.6848845443725586, "loss": 0.5857982635498047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.88178494673723, "step_time": 0.6471122474670411} +{"epoch": 0, "iter": 3422, "iter_tflops": 14.567032848539231, "iter_time": 1.4162866058349608, "loss": 0.6418293714523315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.299610288764196, "step_time": 1.1925756225585935} +{"epoch": 0, "iter": 3423, "iter_tflops": 44.239679603133084, "iter_time": 0.4663481674194336, "loss": 0.558668851852417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.841486617412066, "step_time": 0.4312385540008545} +{"epoch": 0, "iter": 3424, "iter_tflops": 51.04193067611302, "iter_time": 0.4041989250183105, "loss": 0.6119580864906311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31375152353893, "step_time": 0.37298308181762696} +{"epoch": 0, "iter": 3425, "iter_tflops": 32.73851509499715, "iter_time": 0.6301780471801758, "loss": 0.24829255044460297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.84503600366091, "step_time": 0.5920812797546386} +{"epoch": 0, "iter": 3426, "iter_tflops": 23.145486141093325, "iter_time": 0.8913657455444336, "loss": 0.4318182170391083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.833590449340363, "step_time": 0.7412300453186036} +{"epoch": 0, "iter": 3427, "iter_tflops": 37.844984069954414, "iter_time": 0.5451473693847656, "loss": 0.298606276512146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.328161139766785, "step_time": 0.49920182609558106} +{"epoch": 0, "iter": 3428, "iter_tflops": 38.546409656289626, "iter_time": 0.5352273712158203, "loss": 0.23113299906253815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.94482307922493, "step_time": 0.49186268997192384} +{"epoch": 0, "iter": 3429, "iter_tflops": 28.485564292212874, "iter_time": 0.7242648696899415, "loss": 0.0028399694710969925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.678417920172695, "step_time": 0.6724953536987305} +{"epoch": 0, "iter": 3430, "iter_tflops": 12.103124567499124, "iter_time": 1.7046088714599608, "loss": 0.006776707712560892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.927151181638116, "step_time": 1.2188166389465334} +{"epoch": 0, "iter": 3431, "iter_tflops": 15.86573587842037, "iter_time": 1.3003552856445313, "loss": 0.0032546238508075476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.53463086279831, "step_time": 1.0046975593566894} +{"epoch": 0, "iter": 3432, "iter_tflops": 39.63803057222664, "iter_time": 0.5204873504638672, "loss": 0.007286111358553171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.43270692810772, "step_time": 0.4090816211700439} +{"epoch": 0, "iter": 3433, "iter_tflops": 18.829849150190437, "iter_time": 0.7440450210571289, "loss": 0.2625282108783722, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 19.924253856369745, "step_time": 0.7031759185791017} +{"epoch": 0, "iter": 3434, "iter_tflops": 10.926267231710161, "iter_time": 1.2822545166015624, "loss": 0.24722807109355927, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 13.113141433989993, "step_time": 1.0684133605957031} +{"epoch": 0, "iter": 3435, "iter_tflops": 21.099156435442058, "iter_time": 0.6640196990966796, "loss": 0.34422117471694946, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 22.78866861263027, "step_time": 0.6147904357910157} +{"epoch": 0, "iter": 3436, "iter_tflops": 22.201428681383177, "iter_time": 0.6310519790649414, "loss": 0.3072531223297119, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 23.83088484374838, "step_time": 0.5879032859802247} +{"epoch": 0, "iter": 3437, "iter_tflops": 31.55269050842363, "iter_time": 0.6538616256713867, "loss": 0.29052138328552246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.87603952573209, "step_time": 0.5915549411773681} +{"epoch": 0, "iter": 3438, "iter_tflops": 39.78768583594122, "iter_time": 0.5185296173095703, "loss": 0.5225265026092529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.399831393351405, "step_time": 0.4646660327911377} +{"epoch": 0, "iter": 3439, "iter_tflops": 44.147559727034476, "iter_time": 0.4673212661743164, "loss": 0.354633092880249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46886603994452, "step_time": 0.42565661621093753} +{"epoch": 0, "iter": 3440, "iter_tflops": 44.12779351887561, "iter_time": 0.4675305938720703, "loss": 0.37949177622795105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41811531892653, "step_time": 0.4261027793884278} +{"epoch": 0, "iter": 3441, "iter_tflops": 28.082792358273153, "iter_time": 0.7346524963378905, "loss": 0.2924654483795166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.446051154147952, "step_time": 0.6560789909362793} +{"epoch": 0, "iter": 3442, "iter_tflops": 34.890940321743656, "iter_time": 0.5913023071289063, "loss": 0.23054219782352448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.43042388471551, "step_time": 0.5368427257537842} +{"epoch": 0, "iter": 3443, "iter_tflops": 41.57437737854396, "iter_time": 0.4962453994750976, "loss": 0.21359270811080933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38506787412051, "step_time": 0.4545788841247559} +{"epoch": 0, "iter": 3444, "iter_tflops": 41.062044016347066, "iter_time": 0.5024370803833008, "loss": 0.2156376987695694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.43869473516461, "step_time": 0.4642596645355224} +{"epoch": 0, "iter": 3445, "iter_tflops": 27.258060474879755, "iter_time": 0.667348487854004, "loss": 0.2037854641675949, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 29.654143901402033, "step_time": 0.6134260864257813} +{"epoch": 0, "iter": 3446, "iter_tflops": 10.651615087019026, "iter_time": 1.7077809600830076, "loss": 0.20940984785556793, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 11.770140618438472, "step_time": 1.5454892196655272} +{"epoch": 0, "iter": 3447, "iter_tflops": 20.61025885353164, "iter_time": 0.8826005325317383, "loss": 0.3006666600704193, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 25.898494068737055, "step_time": 0.7023815898895264} +{"epoch": 0, "iter": 3448, "iter_tflops": 32.31429674117948, "iter_time": 0.562928092956543, "loss": 0.15920381247997284, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 35.358719664274886, "step_time": 0.5144593925476074} +{"epoch": 0, "iter": 3449, "iter_tflops": 10.435857684736678, "iter_time": 1.3815754852294921, "loss": 0.3860965967178345, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 11.09901661840407, "step_time": 1.2990272598266601} +{"epoch": 0, "iter": 3450, "iter_tflops": 11.115188684137413, "iter_time": 1.2971372375488284, "loss": 0.3080844581127167, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 14.387506356627071, "step_time": 1.0021142501831055} +{"epoch": 0, "iter": 3451, "iter_tflops": 25.4425297515355, "iter_time": 0.5666859893798829, "loss": 0.24218997359275818, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 27.159605606118802, "step_time": 0.5308591499328613} +{"epoch": 0, "iter": 3452, "iter_tflops": 26.535301292361474, "iter_time": 0.5433488388061524, "loss": 0.2665211260318756, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 28.16935780739125, "step_time": 0.5118300971984863} +{"epoch": 0, "iter": 3453, "iter_tflops": 34.78764723723682, "iter_time": 0.5930580291748048, "loss": 0.9387564659118652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.257492889979694, "step_time": 0.5537434730529784} +{"epoch": 0, "iter": 3454, "iter_tflops": 20.135338830625386, "iter_time": 1.0246211242675782, "loss": 0.9944069385528564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.1873594667329, "step_time": 0.8529700622558593} +{"epoch": 0, "iter": 3455, "iter_tflops": 45.05020005490831, "iter_time": 0.4579578666687011, "loss": 1.07118821144104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.84028427335655, "step_time": 0.4224196033477783} +{"epoch": 0, "iter": 3456, "iter_tflops": 46.53155592695706, "iter_time": 0.4433785438537598, "loss": 1.0162051916122437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.10475269074842, "step_time": 0.41175921249389646} +{"epoch": 0, "iter": 3457, "iter_tflops": 39.52910744554137, "iter_time": 0.5219215621948242, "loss": 0.6963604688644409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67449290340834, "step_time": 0.4834525756835938} +{"epoch": 0, "iter": 3458, "iter_tflops": 43.41733401746295, "iter_time": 0.4751810302734375, "loss": 0.761493444442749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85656717980094, "step_time": 0.4403031368255615} +{"epoch": 0, "iter": 3459, "iter_tflops": 42.247042182424046, "iter_time": 0.4883440933227539, "loss": 0.8693479299545288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27580878039789, "step_time": 0.45567586898803714} +{"epoch": 0, "iter": 3460, "iter_tflops": 47.50846328472047, "iter_time": 0.4342614364624023, "loss": 0.9462235569953918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.27190424213366, "step_time": 0.4023859424591064} +{"epoch": 0, "iter": 3461, "iter_tflops": 26.826523209553905, "iter_time": 0.7690558090209961, "loss": 0.9423297643661499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.287841401071425, "step_time": 0.7293272476196289} +{"epoch": 0, "iter": 3462, "iter_tflops": 12.248132829010409, "iter_time": 1.6844276428222655, "loss": 0.9721505641937256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.42483954663122, "step_time": 1.1840047912597658} +{"epoch": 0, "iter": 3463, "iter_tflops": 14.93410914585838, "iter_time": 1.3814746704101564, "loss": 1.0076801776885986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.26699150312279, "step_time": 1.2682796020507812} +{"epoch": 0, "iter": 3464, "iter_tflops": 13.146105765500828, "iter_time": 1.5693692016601561, "loss": 0.8282268047332764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.658797593749988, "step_time": 1.3175400848388672} +{"epoch": 0, "iter": 3465, "iter_tflops": 17.43890832300022, "iter_time": 0.9558248901367188, "loss": 0.16616205871105194, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 18.58783596508485, "step_time": 0.896744659423828} +{"epoch": 0, "iter": 3466, "iter_tflops": 6.457637482935083, "iter_time": 2.581213745117188, "loss": 0.31257370114326477, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 7.385976799459063, "step_time": 2.2567824249267576} +{"epoch": 0, "iter": 3467, "iter_tflops": 10.920937617265055, "iter_time": 1.5262922668457033, "loss": 0.1898919641971588, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 12.776274587121048, "step_time": 1.304648120880127} +{"epoch": 0, "iter": 3468, "iter_tflops": 28.05112265393152, "iter_time": 0.5942201614379882, "loss": 0.3335151672363281, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 29.799421170651208, "step_time": 0.5593579330444336} +{"epoch": 0, "iter": 3469, "iter_tflops": 18.057956214055636, "iter_time": 0.9731326217651368, "loss": 0.35576504468917847, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 18.900586042388255, "step_time": 0.9297482223510741} +{"epoch": 0, "iter": 3470, "iter_tflops": 11.906979409915404, "iter_time": 1.4758391418457029, "loss": 0.3264836072921753, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 13.446852333462255, "step_time": 1.3068326950073241} +{"epoch": 0, "iter": 3471, "iter_tflops": 31.675061057246868, "iter_time": 0.5547830276489258, "loss": 0.2936779260635376, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 33.837120835868234, "step_time": 0.5193345603942872} +{"epoch": 0, "iter": 3472, "iter_tflops": 32.21420161446902, "iter_time": 0.5454981155395507, "loss": 0.2734822630882263, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 34.24390894016703, "step_time": 0.5131653137207031} +{"epoch": 0, "iter": 3473, "iter_tflops": 24.346942300390968, "iter_time": 0.8473792419433593, "loss": 0.5248178243637085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.719654023629854, "step_time": 0.8021528396606445} +{"epoch": 0, "iter": 3474, "iter_tflops": 13.43249867387489, "iter_time": 1.5359088439941404, "loss": 0.663982093334198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.219853279613698, "step_time": 1.1980992622375488} +{"epoch": 0, "iter": 3475, "iter_tflops": 41.88225088154479, "iter_time": 0.49259753417968755, "loss": 0.581379234790802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.736238255100766, "step_time": 0.4510885524749756} +{"epoch": 0, "iter": 3476, "iter_tflops": 40.320435739350344, "iter_time": 0.511678337097168, "loss": 0.6883317828178406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1486509609155, "step_time": 0.4673097152709961} +{"epoch": 0, "iter": 3477, "iter_tflops": 30.96113911441747, "iter_time": 0.5184990844726561, "loss": 0.05754230171442032, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 34.44002567748941, "step_time": 0.4661239929199219} +{"epoch": 0, "iter": 3478, "iter_tflops": 35.047060658944844, "iter_time": 0.45805046081542966, "loss": 0.02858206257224083, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 38.39928332737487, "step_time": 0.4180630702972412} +{"epoch": 0, "iter": 3479, "iter_tflops": 36.56219832208241, "iter_time": 0.43906884765625004, "loss": 0.05981412157416344, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 39.59180180342134, "step_time": 0.40547086906433105} +{"epoch": 0, "iter": 3480, "iter_tflops": 41.24700910223337, "iter_time": 0.3891996688842773, "loss": 0.05624333396553993, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 45.111097089689096, "step_time": 0.3558619346618653} +{"epoch": 0, "iter": 3481, "iter_tflops": 46.25219557502435, "iter_time": 0.44605652236938476, "loss": 0.20164993405342102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.919872186411446, "step_time": 0.405167818069458} +{"epoch": 0, "iter": 3482, "iter_tflops": 50.2432509206869, "iter_time": 0.4106241760253907, "loss": 0.14498214423656464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.94683443374751, "step_time": 0.3754737415313721} +{"epoch": 0, "iter": 3483, "iter_tflops": 48.50340336267771, "iter_time": 0.4253535232543945, "loss": 0.12286274880170822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72801732531604, "step_time": 0.39127383422851564} +{"epoch": 0, "iter": 3484, "iter_tflops": 55.63873716370107, "iter_time": 0.37080448913574215, "loss": 0.15931935608386993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.39273292852392, "step_time": 0.34161549758911136} +{"epoch": 0, "iter": 3485, "iter_tflops": 25.248588424472278, "iter_time": 0.8171186904907227, "loss": 0.7661846280097961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.577132362053767, "step_time": 0.7762723693847655} +{"epoch": 0, "iter": 3486, "iter_tflops": 13.325141719455386, "iter_time": 1.548283233642578, "loss": 0.7210424542427063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.777458589472317, "step_time": 1.229691219329834} +{"epoch": 0, "iter": 3487, "iter_tflops": 38.13750633623823, "iter_time": 0.5409659805297852, "loss": 0.9301450252532959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54798761112323, "step_time": 0.49656059646606443} +{"epoch": 0, "iter": 3488, "iter_tflops": 35.01728745749444, "iter_time": 0.5891688079833984, "loss": 0.9421846270561218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.97216856125311, "step_time": 0.5433214454650879} +{"epoch": 0, "iter": 3489, "iter_tflops": 28.375143730573456, "iter_time": 0.7270833129882813, "loss": 0.22665682435035706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.980926115756372, "step_time": 0.6659288825988771} +{"epoch": 0, "iter": 3490, "iter_tflops": 37.454677528147926, "iter_time": 0.5508282241821288, "loss": 0.2531760334968567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.23882736027843, "step_time": 0.5002832241058349} +{"epoch": 0, "iter": 3491, "iter_tflops": 41.260994256713495, "iter_time": 0.5000144538879395, "loss": 0.17829802632331848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.174367667889314, "step_time": 0.4566991100311279} +{"epoch": 0, "iter": 3492, "iter_tflops": 41.55297510273245, "iter_time": 0.49650099563598626, "loss": 0.3997075855731964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38488887126702, "step_time": 0.45458067703247074} +{"epoch": 0, "iter": 3493, "iter_tflops": 22.79133613389262, "iter_time": 0.9052164993286133, "loss": 0.8725283145904541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.83999605212005, "step_time": 0.8305594520568848} +{"epoch": 0, "iter": 3494, "iter_tflops": 34.3569047368746, "iter_time": 0.6004933700561523, "loss": 0.8127036094665527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.857244925631726, "step_time": 0.45992778968811043} +{"epoch": 0, "iter": 3495, "iter_tflops": 50.058883092314055, "iter_time": 0.41213651275634766, "loss": 0.7336246371269226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.23993423235487, "step_time": 0.3803672294616699} +{"epoch": 0, "iter": 3496, "iter_tflops": 50.869829883260984, "iter_time": 0.40556639480590817, "loss": 0.841534435749054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.72678899409885, "step_time": 0.3769834461212158} +{"epoch": 0, "iter": 3497, "iter_tflops": 25.420925552023423, "iter_time": 0.692891471862793, "loss": 0.21704848110675812, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 26.88860304212872, "step_time": 0.6550709419250488} +{"epoch": 0, "iter": 3498, "iter_tflops": 18.560760068734773, "iter_time": 0.9489882125854493, "loss": 0.14812467992305756, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 22.76844584083452, "step_time": 0.773611982345581} +{"epoch": 0, "iter": 3499, "iter_tflops": 41.81739340275363, "iter_time": 0.42121091461181637, "loss": 0.1591023951768875, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 45.34845033461764, "step_time": 0.38841332817077634} +{"epoch": 0, "iter": 3500, "iter_tflops": 47.267179566492786, "iter_time": 0.3726463623046875, "loss": 0.21972578763961792, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 51.207985381607116, "step_time": 0.34396866798400877} +{"epoch": 0, "iter": 3501, "iter_tflops": 23.653536201367352, "iter_time": 0.8722202606201171, "loss": 0.9712561368942261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.77822670513855, "step_time": 0.8326299438476563} +{"epoch": 0, "iter": 3502, "iter_tflops": 14.300475532510974, "iter_time": 1.4426858367919924, "loss": 1.0281480550765991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.06048596839297, "step_time": 1.2092910804748533} +{"epoch": 0, "iter": 3503, "iter_tflops": 36.28257958029712, "iter_time": 0.5686225662231446, "loss": 0.8652710318565369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49766185352965, "step_time": 0.5223370838165283} +{"epoch": 0, "iter": 3504, "iter_tflops": 39.23021899015904, "iter_time": 0.5258979949951171, "loss": 0.6836674213409424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32015340157844, "step_time": 0.4875004425048828} +{"epoch": 0, "iter": 3505, "iter_tflops": 24.17869194123023, "iter_time": 0.8532758331298829, "loss": 0.9891734719276428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.852415581583134, "step_time": 0.798033493041992} +{"epoch": 0, "iter": 3506, "iter_tflops": 10.923996932191644, "iter_time": 1.8886030120849608, "loss": 1.0590827465057373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.052906899598696, "step_time": 1.2851936187744142} +{"epoch": 0, "iter": 3507, "iter_tflops": 13.698985878408411, "iter_time": 1.5060307159423827, "loss": 0.7826272249221802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.565731678564063, "step_time": 1.2454079246520995} +{"epoch": 0, "iter": 3508, "iter_tflops": 19.611039942309763, "iter_time": 1.0520142517089843, "loss": 0.9854365587234497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.1207955333441, "step_time": 0.8553239250183106} +{"epoch": 0, "iter": 3509, "iter_tflops": 17.018617401872305, "iter_time": 0.8759672546386719, "loss": 0.2854483425617218, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 17.920541742180298, "step_time": 0.8318806304931641} +{"epoch": 0, "iter": 3510, "iter_tflops": 10.06208098981443, "iter_time": 1.481577377319336, "loss": 0.17885924875736237, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 11.684917056199362, "step_time": 1.2758115005493165} +{"epoch": 0, "iter": 3511, "iter_tflops": 26.621706691421185, "iter_time": 0.5599848175048828, "loss": 0.26511383056640625, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.33713906722248, "step_time": 0.5260852737426758} +{"epoch": 0, "iter": 3512, "iter_tflops": 27.792911087282125, "iter_time": 0.536386833190918, "loss": 0.4953061640262604, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 29.521935639910286, "step_time": 0.5049720230102539} +{"epoch": 0, "iter": 3513, "iter_tflops": 34.44441324737947, "iter_time": 0.5989677734375, "loss": 0.42125633358955383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.01771429906548, "step_time": 0.5573302917480468} +{"epoch": 0, "iter": 3514, "iter_tflops": 19.989880927781755, "iter_time": 1.0320768585205078, "loss": 0.42643165588378906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.51625854751382, "step_time": 0.9162753868103026} +{"epoch": 0, "iter": 3515, "iter_tflops": 33.21782408751847, "iter_time": 0.6210850372314453, "loss": 0.41201716661453247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.85743319936531, "step_time": 0.5753644828796386} +{"epoch": 0, "iter": 3516, "iter_tflops": 39.54271025334065, "iter_time": 0.5217420196533203, "loss": 0.46619123220443726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41833768819093, "step_time": 0.47517004585266115} +{"epoch": 0, "iter": 3517, "iter_tflops": 19.838707338161015, "iter_time": 1.039941421508789, "loss": 0.5604335069656372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.391755222953286, "step_time": 0.9644413604736328} +{"epoch": 0, "iter": 3518, "iter_tflops": 21.127959372205744, "iter_time": 0.9764830169677734, "loss": 0.5510494709014893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.38191302423673, "step_time": 0.8128265781402587} +{"epoch": 0, "iter": 3519, "iter_tflops": 50.00693728907396, "iter_time": 0.41256462860107423, "loss": 0.8036040663719177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.00392651932003, "step_time": 0.38202950859069823} +{"epoch": 0, "iter": 3520, "iter_tflops": 43.64120610310329, "iter_time": 0.4727434310913086, "loss": 0.5544530749320984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20722314999234, "step_time": 0.4370325584411621} +{"epoch": 0, "iter": 3521, "iter_tflops": 27.972387955221393, "iter_time": 0.7375521011352539, "loss": 0.15381716191768646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.98515155192601, "step_time": 0.6880436630249024} +{"epoch": 0, "iter": 3522, "iter_tflops": 12.864189148435397, "iter_time": 1.603761672973633, "loss": 0.11132882535457611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.199901966067785, "step_time": 1.1994890174865722} +{"epoch": 0, "iter": 3523, "iter_tflops": 13.75373285156181, "iter_time": 1.5000359344482423, "loss": 0.22560156881809235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.603677237692015, "step_time": 1.1719763565063475} +{"epoch": 0, "iter": 3524, "iter_tflops": 13.320787939459446, "iter_time": 1.5487892761230468, "loss": 0.22875292599201202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.741309854488296, "step_time": 1.3106338481903077} +{"epoch": 0, "iter": 3525, "iter_tflops": 23.911383723273307, "iter_time": 0.5927413864135742, "loss": 0.2205217480659485, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 25.886759821993067, "step_time": 0.5475102653503419} +{"epoch": 0, "iter": 3526, "iter_tflops": 26.050225717638202, "iter_time": 0.5440746231079101, "loss": 0.2985956072807312, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.800012283548238, "step_time": 0.5098295135498047} +{"epoch": 0, "iter": 3527, "iter_tflops": 25.427946412217914, "iter_time": 0.5573893585205079, "loss": 0.30518224835395813, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 26.98058143630111, "step_time": 0.5253136138916015} +{"epoch": 0, "iter": 3528, "iter_tflops": 25.559748653732957, "iter_time": 0.554515106201172, "loss": 0.4846331775188446, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.187673418711487, "step_time": 0.5213122329711914} +{"epoch": 0, "iter": 3529, "iter_tflops": 35.2153164374968, "iter_time": 0.5858556900024414, "loss": 0.08835667371749878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.890156486783624, "step_time": 0.5444974479675293} +{"epoch": 0, "iter": 3530, "iter_tflops": 19.619535741682494, "iter_time": 1.0515587005615235, "loss": 0.11578360199928284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.581867057195705, "step_time": 0.8748710803985597} +{"epoch": 0, "iter": 3531, "iter_tflops": 52.06320422769218, "iter_time": 0.3962701454162597, "loss": 0.07472117245197296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.76472877317839, "step_time": 0.36344916915893555} +{"epoch": 0, "iter": 3532, "iter_tflops": 54.552496536283066, "iter_time": 0.3781878890991211, "loss": 0.1048198789358139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01609645544176, "step_time": 0.34958417701721195} +{"epoch": 0, "iter": 3533, "iter_tflops": 23.986949173782325, "iter_time": 0.8600966033935546, "loss": 0.8611506819725037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.150053148798538, "step_time": 0.8203200759887695} +{"epoch": 0, "iter": 3534, "iter_tflops": 19.49610830019006, "iter_time": 1.0582159881591797, "loss": 0.9220188856124878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.10962355903452, "step_time": 0.9331273078918456} +{"epoch": 0, "iter": 3535, "iter_tflops": 39.27193422025663, "iter_time": 0.5253393783569337, "loss": 0.8210115432739258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.881825576462795, "step_time": 0.4811150932312013} +{"epoch": 0, "iter": 3536, "iter_tflops": 37.187894446672765, "iter_time": 0.5547798233032226, "loss": 0.7276607751846313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18413460392245, "step_time": 0.5134139060974121} +{"epoch": 0, "iter": 3537, "iter_tflops": 27.03976737966898, "iter_time": 0.7629907913208007, "loss": 0.0564279705286026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.144576775109837, "step_time": 0.707887908935547} +{"epoch": 0, "iter": 3538, "iter_tflops": 12.810895962620586, "iter_time": 1.6104333038330076, "loss": 0.041844598948955536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.833744987040658, "step_time": 1.4913599700927733} +{"epoch": 0, "iter": 3539, "iter_tflops": 12.794449838634247, "iter_time": 1.6125033721923827, "loss": 0.03840441629290581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.168684388924984, "step_time": 1.3601109352111815} +{"epoch": 0, "iter": 3540, "iter_tflops": 48.87408662307084, "iter_time": 0.4221274490356445, "loss": 0.04218509793281555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.290278902401646, "step_time": 0.38714553451538086} +{"epoch": 0, "iter": 3541, "iter_tflops": 16.787707527640247, "iter_time": 0.8734220199584961, "loss": 0.26071882247924805, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 17.62777545750225, "step_time": 0.8317982864379883} +{"epoch": 0, "iter": 3542, "iter_tflops": 10.139914579548314, "iter_time": 1.4460430908203126, "loss": 0.19791090488433838, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 11.95829100608921, "step_time": 1.2261579360961914} +{"epoch": 0, "iter": 3543, "iter_tflops": 26.373484662781312, "iter_time": 0.5559657211303711, "loss": 0.359375923871994, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.167233170573613, "step_time": 0.5205606575012207} +{"epoch": 0, "iter": 3544, "iter_tflops": 28.145085149699234, "iter_time": 0.5209702987670899, "loss": 0.22036175429821014, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 29.864935630666995, "step_time": 0.49096886062622064} +{"epoch": 0, "iter": 3545, "iter_tflops": 30.41377022941497, "iter_time": 0.6783471221923828, "loss": 0.6736275553703308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.295266324579416, "step_time": 0.6388271675109863} +{"epoch": 0, "iter": 3546, "iter_tflops": 15.356193656960334, "iter_time": 1.3435030822753904, "loss": 0.689818799495697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.02010382843458, "step_time": 1.144893154144287} +{"epoch": 0, "iter": 3547, "iter_tflops": 40.41554378720259, "iter_time": 0.5104742279052734, "loss": 0.8756300806999207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44037335104504, "step_time": 0.46424212837219236} +{"epoch": 0, "iter": 3548, "iter_tflops": 44.82289499611656, "iter_time": 0.4602802543640137, "loss": 0.8638805150985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63233672873193, "step_time": 0.4242258319854736} +{"epoch": 0, "iter": 3549, "iter_tflops": 17.509133472305187, "iter_time": 1.178304656982422, "loss": 0.9537951350212097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.64277316708703, "step_time": 1.1066536788940429} +{"epoch": 0, "iter": 3550, "iter_tflops": 15.634822148386174, "iter_time": 1.3195604858398438, "loss": 0.9348227977752686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.21859008587535, "step_time": 1.0204021854400636} +{"epoch": 0, "iter": 3551, "iter_tflops": 42.40728697858332, "iter_time": 0.48649878311157224, "loss": 0.9602336883544922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.40936098944138, "step_time": 0.45433569335937507} +{"epoch": 0, "iter": 3552, "iter_tflops": 45.27445834711473, "iter_time": 0.45568946075439454, "loss": 0.9771748185157776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55874163853098, "step_time": 0.42486878395080563} +{"epoch": 0, "iter": 3553, "iter_tflops": 38.32591585942902, "iter_time": 0.5199151229858399, "loss": 0.0014282211195677519, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 41.52612895864005, "step_time": 0.47984783935546876} +{"epoch": 0, "iter": 3554, "iter_tflops": 39.07989145666594, "iter_time": 0.5098843040466309, "loss": 0.0033895934466272593, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 44.27215490435676, "step_time": 0.45008478355407716} +{"epoch": 0, "iter": 3555, "iter_tflops": 43.520271528098355, "iter_time": 0.4578607292175293, "loss": 0.0037861133459955454, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 48.25970841824335, "step_time": 0.4128956413269043} +{"epoch": 0, "iter": 3556, "iter_tflops": 47.05146411437211, "iter_time": 0.42349847412109376, "loss": 0.003685023169964552, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 52.10849055200313, "step_time": 0.3823987808227539} +{"epoch": 0, "iter": 3557, "iter_tflops": 20.198711000197648, "iter_time": 1.0214064407348633, "loss": 0.8535419702529907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.290273661759223, "step_time": 0.9690384368896484} +{"epoch": 0, "iter": 3558, "iter_tflops": 22.45771224719756, "iter_time": 0.9186640777587891, "loss": 0.7959504723548889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.846882527359917, "step_time": 0.7408762359619141} +{"epoch": 0, "iter": 3559, "iter_tflops": 40.379317964826434, "iter_time": 0.5109321937561034, "loss": 0.629850447177887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11552806109012, "step_time": 0.46766058158874513} +{"epoch": 0, "iter": 3560, "iter_tflops": 43.70800949406383, "iter_time": 0.4720208892822265, "loss": 0.767579197883606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37557416626284, "step_time": 0.43547954559326174} +{"epoch": 0, "iter": 3561, "iter_tflops": 34.367857687006264, "iter_time": 0.6003019943237304, "loss": 0.2147783637046814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.891140818856364, "step_time": 0.5444833030700684} +{"epoch": 0, "iter": 3562, "iter_tflops": 9.887120891791694, "iter_time": 2.0866634216308593, "loss": 0.15638497471809387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.164168578951314, "step_time": 1.8479740219116212} +{"epoch": 0, "iter": 3563, "iter_tflops": 20.795389653984067, "iter_time": 0.992099395751953, "loss": 0.16632047295570374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.71824503471284, "step_time": 0.8698406429290771} +{"epoch": 0, "iter": 3564, "iter_tflops": 44.47968286928694, "iter_time": 0.46383184814453127, "loss": 0.2143096625804901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3553208796201, "step_time": 0.4266561183929444} +{"epoch": 0, "iter": 3565, "iter_tflops": 16.485464997732286, "iter_time": 0.9514259796142578, "loss": 0.24002157151699066, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 17.82981409755961, "step_time": 0.879689468383789} +{"epoch": 0, "iter": 3566, "iter_tflops": 18.07482008193192, "iter_time": 0.8677651901245119, "loss": 0.32634860277175903, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 19.804580706862687, "step_time": 0.7919733276367187} +{"epoch": 0, "iter": 3567, "iter_tflops": 27.936493057034028, "iter_time": 0.5614412536621094, "loss": 0.36349940299987793, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.7458232620148, "step_time": 0.5272908248901367} +{"epoch": 0, "iter": 3568, "iter_tflops": 29.50653675089401, "iter_time": 0.5315669479370118, "loss": 0.2722906768321991, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 31.358728664640932, "step_time": 0.5001701393127442} +{"epoch": 0, "iter": 3569, "iter_tflops": 32.02053817660385, "iter_time": 0.6443081436157228, "loss": 0.7125057578086853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.22586170604903, "step_time": 0.6027925224304199} +{"epoch": 0, "iter": 3570, "iter_tflops": 19.766384754146447, "iter_time": 1.0437464294433594, "loss": 0.9083192944526672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.099994031605238, "step_time": 0.856062183380127} +{"epoch": 0, "iter": 3571, "iter_tflops": 48.9632901270015, "iter_time": 0.4213583984375, "loss": 0.7965500950813293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.043201339717235, "step_time": 0.38894887542724604} +{"epoch": 0, "iter": 3572, "iter_tflops": 50.86129294649989, "iter_time": 0.4056344680786133, "loss": 0.7573665380477905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.77292566387693, "step_time": 0.3766659030914307} +{"epoch": 0, "iter": 3573, "iter_tflops": 28.592224115387673, "iter_time": 0.7215630874633789, "loss": 0.4257837235927582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.18702408061565, "step_time": 0.6834424438476563} +{"epoch": 0, "iter": 3574, "iter_tflops": 13.95693928162582, "iter_time": 1.4781961212158206, "loss": 0.4290701448917389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.014257871527743, "step_time": 1.2125767498016358} +{"epoch": 0, "iter": 3575, "iter_tflops": 39.67710845392525, "iter_time": 0.519974723815918, "loss": 0.5598616600036621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44676907535374, "step_time": 0.47485909652709957} +{"epoch": 0, "iter": 3576, "iter_tflops": 38.669104248755815, "iter_time": 0.5335291290283203, "loss": 0.4726286828517914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6049757700247, "step_time": 0.48424140930175785} +{"epoch": 0, "iter": 3577, "iter_tflops": 19.12365781802163, "iter_time": 1.078825698852539, "loss": 0.008391164243221283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.724568394031486, "step_time": 0.9954896583557128} +{"epoch": 0, "iter": 3578, "iter_tflops": 19.48738451510007, "iter_time": 1.058689712524414, "loss": 0.004739817697554827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.346726546690025, "step_time": 0.8473867511749269} +{"epoch": 0, "iter": 3579, "iter_tflops": 45.85004450553197, "iter_time": 0.4499688873291016, "loss": 0.009789196774363518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.89175224255975, "step_time": 0.40539169120788576} +{"epoch": 0, "iter": 3580, "iter_tflops": 44.98611340812106, "iter_time": 0.4586102676391602, "loss": 0.03306257352232933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.624620011524485, "step_time": 0.41574310302734374} +{"epoch": 0, "iter": 3581, "iter_tflops": 19.883601642489083, "iter_time": 1.0375933837890625, "loss": 0.1482958048582077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.23383057859339, "step_time": 0.9716143035888671} +{"epoch": 0, "iter": 3582, "iter_tflops": 14.840818992595807, "iter_time": 1.39015869140625, "loss": 0.08513805270195007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.27902147869117, "step_time": 1.0701317768096925} +{"epoch": 0, "iter": 3583, "iter_tflops": 39.09093819210186, "iter_time": 0.5277717666625976, "loss": 0.125333771109581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87518564149894, "step_time": 0.4811896018981933} +{"epoch": 0, "iter": 3584, "iter_tflops": 42.577103981208914, "iter_time": 0.4845584030151367, "loss": 0.16673614084720612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75806470600249, "step_time": 0.441230697631836} +{"epoch": 0, "iter": 3585, "iter_tflops": 24.036764056187554, "iter_time": 0.6968763427734375, "loss": 0.014233838766813278, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 26.14783025937651, "step_time": 0.6406134681701661} +{"epoch": 0, "iter": 3586, "iter_tflops": 7.619747187756955, "iter_time": 2.198321258544922, "loss": 0.01823616772890091, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 9.384081794025969, "step_time": 1.7850070571899412} +{"epoch": 0, "iter": 3587, "iter_tflops": 10.465795655499315, "iter_time": 1.6005139770507812, "loss": 0.0046270014718174934, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 12.09468761516892, "step_time": 1.3849594764709472} +{"epoch": 0, "iter": 3588, "iter_tflops": 21.219110685979864, "iter_time": 0.7894134902954101, "loss": 0.0020883698016405106, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 31.299926703513133, "step_time": 0.5351658611297607} +{"epoch": 0, "iter": 3589, "iter_tflops": 11.56398937946402, "iter_time": 1.3244961395263672, "loss": 0.15553225576877594, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 12.241075634630187, "step_time": 1.2512347564697266} +{"epoch": 0, "iter": 3590, "iter_tflops": 14.363800845740583, "iter_time": 1.0663235626220704, "loss": 0.3131990432739258, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 16.9712866996049, "step_time": 0.9024925193786622} +{"epoch": 0, "iter": 3591, "iter_tflops": 21.799310012546258, "iter_time": 0.7026121139526368, "loss": 0.2287577986717224, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 23.495550241882302, "step_time": 0.6518876609802247} +{"epoch": 0, "iter": 3592, "iter_tflops": 23.829530811452106, "iter_time": 0.642751190185547, "loss": 0.23731288313865662, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.640980163247207, "step_time": 0.5973429718017578} +{"epoch": 0, "iter": 3593, "iter_tflops": 15.976057552128077, "iter_time": 1.291375762939453, "loss": 0.4301700294017792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.24286182190532, "step_time": 1.1965005416870118} +{"epoch": 0, "iter": 3594, "iter_tflops": 14.885512894790951, "iter_time": 1.3859847259521485, "loss": 0.4122532904148102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.383026078033744, "step_time": 0.921729413986206} +{"epoch": 0, "iter": 3595, "iter_tflops": 41.81297580680056, "iter_time": 0.49341366195678715, "loss": 0.42235785722732544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83135980760357, "step_time": 0.45015233230590823} +{"epoch": 0, "iter": 3596, "iter_tflops": 42.092386051130944, "iter_time": 0.490138370513916, "loss": 0.47586941719055176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.958413366342405, "step_time": 0.44890787124633785} +{"epoch": 0, "iter": 3597, "iter_tflops": 32.85028538057355, "iter_time": 0.628033920288086, "loss": 0.187427818775177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.17576543556812, "step_time": 0.5703015060424805} +{"epoch": 0, "iter": 3598, "iter_tflops": 39.08746826333838, "iter_time": 0.527818618774414, "loss": 0.15236937999725342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61091786320245, "step_time": 0.473071756362915} +{"epoch": 0, "iter": 3599, "iter_tflops": 39.96758335248599, "iter_time": 0.516195671081543, "loss": 0.1498085856437683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.808502943838356, "step_time": 0.47093810844421385} +{"epoch": 0, "iter": 3600, "iter_tflops": 41.475811244131926, "iter_time": 0.4974247131347656, "loss": 0.22937564551830292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.21108477241497, "step_time": 0.4563282127380371} +{"epoch": 0, "iter": 3601, "iter_tflops": 23.166797325461065, "iter_time": 0.8905457763671876, "loss": 1.237452507019043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.186021838292834, "step_time": 0.8191485595703125} +{"epoch": 0, "iter": 3602, "iter_tflops": 29.92734157038311, "iter_time": 0.6893727416992188, "loss": 0.9941253662109375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.16683004433733, "step_time": 0.6038339958190918} +{"epoch": 0, "iter": 3603, "iter_tflops": 42.72531151898514, "iter_time": 0.4828775444030762, "loss": 1.0259414911270142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79674816885946, "step_time": 0.4504925422668457} +{"epoch": 0, "iter": 3604, "iter_tflops": 48.91366066273624, "iter_time": 0.4217859230041504, "loss": 0.9648994207382202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.49086775947208, "step_time": 0.3930415782928467} +{"epoch": 0, "iter": 3605, "iter_tflops": 32.41995108089609, "iter_time": 0.6363702850341797, "loss": 0.5079811215400696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.642646534187655, "step_time": 0.595540340423584} +{"epoch": 0, "iter": 3606, "iter_tflops": 12.973278038308282, "iter_time": 1.5902760620117187, "loss": 0.816618800163269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.126724070194284, "step_time": 1.3638837738037108} +{"epoch": 0, "iter": 3607, "iter_tflops": 37.47940989805832, "iter_time": 0.5504647369384765, "loss": 0.5370798110961914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89894351155629, "step_time": 0.5044407444000245} +{"epoch": 0, "iter": 3608, "iter_tflops": 41.709693668785995, "iter_time": 0.4946354598999024, "loss": 0.6314409971237183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.5274992802939, "step_time": 0.45315674781799314} +{"epoch": 0, "iter": 3609, "iter_tflops": 15.91753880553343, "iter_time": 1.296123336791992, "loss": 0.9495490789413452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.33781201741588, "step_time": 1.1899479293823243} +{"epoch": 0, "iter": 3610, "iter_tflops": 22.193824800652592, "iter_time": 0.9295871124267578, "loss": 0.9641414880752563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.41305087905322, "step_time": 0.7014265060424805} +{"epoch": 0, "iter": 3611, "iter_tflops": 47.271114094700465, "iter_time": 0.4364418716430664, "loss": 1.0424368381500244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.047550283270056, "step_time": 0.4041544284820557} +{"epoch": 0, "iter": 3612, "iter_tflops": 48.17069324785191, "iter_time": 0.42829139709472663, "loss": 0.9632659554481506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93924334382811, "step_time": 0.39721590423583986} +{"epoch": 0, "iter": 3613, "iter_tflops": 29.020223135051616, "iter_time": 0.7109212570190431, "loss": 0.026045339182019234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.80580591753685, "step_time": 0.6697144546508789} +{"epoch": 0, "iter": 3614, "iter_tflops": 13.162724940453339, "iter_time": 1.5673877258300781, "loss": 0.057448819279670715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.24338359619648, "step_time": 1.270122901916504} +{"epoch": 0, "iter": 3615, "iter_tflops": 42.84175784692235, "iter_time": 0.481565055847168, "loss": 0.0163824912160635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61852748004893, "step_time": 0.43325769615173343} +{"epoch": 0, "iter": 3616, "iter_tflops": 45.727832272605355, "iter_time": 0.45117147445678707, "loss": 0.022871583700180054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.096402146121285, "step_time": 0.4118278484344483} +{"epoch": 0, "iter": 3617, "iter_tflops": 19.23942923038016, "iter_time": 1.072333969116211, "loss": 0.07963794469833374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.587836623249604, "step_time": 1.002101089477539} +{"epoch": 0, "iter": 3618, "iter_tflops": 17.192684391251834, "iter_time": 1.1999925689697266, "loss": 0.07160332053899765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.735187876192523, "step_time": 0.8016686573028564} +{"epoch": 0, "iter": 3619, "iter_tflops": 51.96062784895462, "iter_time": 0.39705242919921874, "loss": 0.05143972486257553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.38322363426007, "step_time": 0.36590837097167966} +{"epoch": 0, "iter": 3620, "iter_tflops": 48.142231732236304, "iter_time": 0.4285446014404296, "loss": 0.09465563297271729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.52262214721993, "step_time": 0.39280395126342776} +{"epoch": 0, "iter": 3621, "iter_tflops": 26.654471731600918, "iter_time": 0.7740199737548829, "loss": 0.24227316677570343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.28258987932813, "step_time": 0.7294626693725587} +{"epoch": 0, "iter": 3622, "iter_tflops": 16.249436131755456, "iter_time": 1.2696498107910157, "loss": 0.19329334795475006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.614645418452792, "step_time": 1.0518208751678466} +{"epoch": 0, "iter": 3623, "iter_tflops": 47.635173759209934, "iter_time": 0.4331062927246094, "loss": 0.25521352887153625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99743498017907, "step_time": 0.396771369934082} +{"epoch": 0, "iter": 3624, "iter_tflops": 48.76626407455512, "iter_time": 0.423060775756836, "loss": 0.2255469560623169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.06161614925772, "step_time": 0.3888138923645019} +{"epoch": 0, "iter": 3625, "iter_tflops": 29.076899796008004, "iter_time": 0.7095355300903321, "loss": 0.7540076375007629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.998566624667014, "step_time": 0.665549919128418} +{"epoch": 0, "iter": 3626, "iter_tflops": 45.728880459031416, "iter_time": 0.45116113281250003, "loss": 0.5110741853713989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.542139981934305, "step_time": 0.416435251235962} +{"epoch": 0, "iter": 3627, "iter_tflops": 51.4292051268142, "iter_time": 0.4011552085876465, "loss": 0.4699922800064087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.871631108244394, "step_time": 0.3692588367462158} +{"epoch": 0, "iter": 3628, "iter_tflops": 47.41968266856273, "iter_time": 0.4350744743347168, "loss": 0.7037069201469421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.372728820178686, "step_time": 0.4015962162017822} +{"epoch": 0, "iter": 3629, "iter_tflops": 30.04013987620873, "iter_time": 0.6867842025756836, "loss": 0.21743157505989075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.87662734615533, "step_time": 0.6472169494628905} +{"epoch": 0, "iter": 3630, "iter_tflops": 16.633358978378, "iter_time": 1.2403443908691405, "loss": 0.2731102705001831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.755212364291776, "step_time": 1.1000191898345946} +{"epoch": 0, "iter": 3631, "iter_tflops": 40.33984847892894, "iter_time": 0.5114321022033691, "loss": 0.24592186510562897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.138895918586456, "step_time": 0.4674129943847657} +{"epoch": 0, "iter": 3632, "iter_tflops": 37.777109727578285, "iter_time": 0.546126838684082, "loss": 0.16370254755020142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33967778455438, "step_time": 0.49906275558471674} +{"epoch": 0, "iter": 3633, "iter_tflops": 18.693378346164426, "iter_time": 1.1036578369140626, "loss": 0.860990047454834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.032344771636634, "step_time": 1.0298890991210938} +{"epoch": 0, "iter": 3634, "iter_tflops": 20.407178010110933, "iter_time": 1.0109723892211915, "loss": 0.8730131387710571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.44276098184357, "step_time": 0.8440574092864991} +{"epoch": 0, "iter": 3635, "iter_tflops": 44.66142685251087, "iter_time": 0.4619443435668945, "loss": 0.7354487776756287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.085732266078764, "step_time": 0.4290481300354004} +{"epoch": 0, "iter": 3636, "iter_tflops": 42.574536921613465, "iter_time": 0.4845876197814941, "loss": 0.7513061165809631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.861659935100846, "step_time": 0.4498549232482909} +{"epoch": 0, "iter": 3637, "iter_tflops": 19.7903256384514, "iter_time": 0.5256499404907227, "loss": 0.008453556336462498, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 21.646472718848127, "step_time": 0.48057637977600093} +{"epoch": 0, "iter": 3638, "iter_tflops": 26.58395480177152, "iter_time": 0.39131813049316405, "loss": 0.0038344149943441153, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 29.058366254068122, "step_time": 0.35799615859985345} +{"epoch": 0, "iter": 3639, "iter_tflops": 27.5424411065669, "iter_time": 0.3777001266479492, "loss": 0.02287997491657734, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 30.050138670458796, "step_time": 0.3461808815002442} +{"epoch": 0, "iter": 3640, "iter_tflops": 28.72892836658097, "iter_time": 0.3621013412475586, "loss": 0.013340309262275696, "lr": 3e-05, "seqlen": 4192.0, "step_tflops": 31.242391882472425, "step_time": 0.332970136642456} +{"epoch": 0, "iter": 3641, "iter_tflops": 25.590044336139762, "iter_time": 0.8062156219482421, "loss": 0.198616623878479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.915305922442105, "step_time": 0.7665190048217773} +{"epoch": 0, "iter": 3642, "iter_tflops": 17.26320756140295, "iter_time": 1.1950903930664063, "loss": 0.27306267619132996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.660291541285638, "step_time": 0.8366119060516358} +{"epoch": 0, "iter": 3643, "iter_tflops": 39.6416651929442, "iter_time": 0.5204396286010742, "loss": 0.11980970948934555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.541474517021435, "step_time": 0.47382624816894536} +{"epoch": 0, "iter": 3644, "iter_tflops": 47.190529719460486, "iter_time": 0.43718715667724606, "loss": 0.1986084282398224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.789149320141796, "step_time": 0.3983671054840088} +{"epoch": 0, "iter": 3645, "iter_tflops": 18.777584389154676, "iter_time": 1.0987086029052735, "loss": 0.7789568305015564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.120917448992657, "step_time": 1.0253555068969726} +{"epoch": 0, "iter": 3646, "iter_tflops": 27.49633921619386, "iter_time": 0.7503214645385742, "loss": 0.7762183547019958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.620868555222607, "step_time": 0.6737592525482178} +{"epoch": 0, "iter": 3647, "iter_tflops": 37.7219874749928, "iter_time": 0.5469248809814453, "loss": 0.5136541724205017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91846606749668, "step_time": 0.5042000713348388} +{"epoch": 0, "iter": 3648, "iter_tflops": 36.797788633870894, "iter_time": 0.5606612319946289, "loss": 0.5651422142982483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87069484242148, "step_time": 0.5174500617980957} +{"epoch": 0, "iter": 3649, "iter_tflops": 21.42910992102034, "iter_time": 0.9627601699829101, "loss": 0.6032060384750366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.761248519272357, "step_time": 0.9064130859374999} +{"epoch": 0, "iter": 3650, "iter_tflops": 15.301889286291116, "iter_time": 1.3482709960937502, "loss": 0.5861291289329529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.377115767255376, "step_time": 1.1226513328552246} +{"epoch": 0, "iter": 3651, "iter_tflops": 38.00341291885054, "iter_time": 0.5428747558593751, "loss": 0.5775793790817261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58092307486821, "step_time": 0.49616728019714357} +{"epoch": 0, "iter": 3652, "iter_tflops": 44.08388241056209, "iter_time": 0.4679962921142578, "loss": 0.4522330164909363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.008366894371605, "step_time": 0.4297395401000977} +{"epoch": 0, "iter": 3653, "iter_tflops": 17.55090078191941, "iter_time": 1.1755005493164061, "loss": 1.02967369556427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.84868641539177, "step_time": 1.0945639953613282} +{"epoch": 0, "iter": 3654, "iter_tflops": 20.603730909164188, "iter_time": 1.00132804107666, "loss": 0.8879261016845703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.33291461237485, "step_time": 0.6801553287506104} +{"epoch": 0, "iter": 3655, "iter_tflops": 40.44274381694696, "iter_time": 0.5101309051513672, "loss": 0.8251210451126099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.023556113506125, "step_time": 0.46863759613037104} +{"epoch": 0, "iter": 3656, "iter_tflops": 41.45566979580242, "iter_time": 0.49766638946533204, "loss": 0.8649293780326843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.04010634506978, "step_time": 0.4580604972839356} +{"epoch": 0, "iter": 3657, "iter_tflops": 21.32182300446324, "iter_time": 0.9676045761108398, "loss": 1.2282456159591675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.921762085390366, "step_time": 0.9000657730102539} +{"epoch": 0, "iter": 3658, "iter_tflops": 16.817054520984833, "iter_time": 1.2267958984375, "loss": 0.9152603149414062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.162310522614632, "step_time": 0.8538543319702148} +{"epoch": 0, "iter": 3659, "iter_tflops": 38.19415427520959, "iter_time": 0.5401636428833008, "loss": 0.9273300766944885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.495723299699655, "step_time": 0.497186019897461} +{"epoch": 0, "iter": 3660, "iter_tflops": 36.517740896115136, "iter_time": 0.564960838317871, "loss": 0.6378332376480103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.50913378957562, "step_time": 0.522185417175293} +{"epoch": 0, "iter": 3661, "iter_tflops": 26.447005829497314, "iter_time": 0.7800918426513671, "loss": 0.1545889973640442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.42618544493153, "step_time": 0.7257777709960938} +{"epoch": 0, "iter": 3662, "iter_tflops": 16.211501059864176, "iter_time": 1.272620803833008, "loss": 0.1250937283039093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.03290689085667, "step_time": 0.8957225246429444} +{"epoch": 0, "iter": 3663, "iter_tflops": 50.34079666117125, "iter_time": 0.4098285064697266, "loss": 0.11909491568803787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.87182901061621, "step_time": 0.3759869842529297} +{"epoch": 0, "iter": 3664, "iter_tflops": 49.58447182785538, "iter_time": 0.4160797271728515, "loss": 0.16861523687839508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.87941126671815, "step_time": 0.38291237831115726} +{"epoch": 0, "iter": 3665, "iter_tflops": 30.025533736238827, "iter_time": 0.687118293762207, "loss": 0.8979544043540955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.901094673749938, "step_time": 0.6467205505371094} +{"epoch": 0, "iter": 3666, "iter_tflops": 17.016375378150677, "iter_time": 1.2124258575439453, "loss": 1.0629034042358398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.298280885378986, "step_time": 1.0163960990905763} +{"epoch": 0, "iter": 3667, "iter_tflops": 35.168216505538034, "iter_time": 0.5866403121948244, "loss": 1.035792350769043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.151010155715, "step_time": 0.5407745018005371} +{"epoch": 0, "iter": 3668, "iter_tflops": 36.39378864685666, "iter_time": 0.5668850173950196, "loss": 0.840506911277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4606299224603, "step_time": 0.5228272724151611} +{"epoch": 0, "iter": 3669, "iter_tflops": 22.869202052931648, "iter_time": 0.9021343841552735, "loss": 0.5659825801849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.886969388528676, "step_time": 0.8289917984008789} +{"epoch": 0, "iter": 3670, "iter_tflops": 19.47668756826863, "iter_time": 1.0592711639404297, "loss": 0.5390650033950806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.324271524116625, "step_time": 0.9241552848815918} +{"epoch": 0, "iter": 3671, "iter_tflops": 46.10663389425757, "iter_time": 0.44746475219726556, "loss": 0.5630896091461182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.43053218258871, "step_time": 0.4090992622375489} +{"epoch": 0, "iter": 3672, "iter_tflops": 48.230737442361644, "iter_time": 0.4277582015991211, "loss": 0.5929709672927856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.22169641401765, "step_time": 0.39506747055053715} +{"epoch": 0, "iter": 3673, "iter_tflops": 29.858032048282855, "iter_time": 0.6909729843139648, "loss": 1.0743381977081299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.70230804302252, "step_time": 0.6507757568359375} +{"epoch": 0, "iter": 3674, "iter_tflops": 14.507055463417885, "iter_time": 1.422142044067383, "loss": 1.0830755233764648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.862383689813772, "step_time": 1.223498046875} +{"epoch": 0, "iter": 3675, "iter_tflops": 14.215469908739072, "iter_time": 1.451312805175781, "loss": 0.9924925565719604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.486627319641595, "step_time": 1.2513835067749024} +{"epoch": 0, "iter": 3676, "iter_tflops": 39.808446761904584, "iter_time": 0.5182591934204102, "loss": 1.0827562808990479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50940801038661, "step_time": 0.47417545890808105} +{"epoch": 0, "iter": 3677, "iter_tflops": 15.754187295206686, "iter_time": 0.9100027313232422, "loss": 0.24252894520759583, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 16.845436602315488, "step_time": 0.8510526504516602} +{"epoch": 0, "iter": 3678, "iter_tflops": 5.118157416264191, "iter_time": 2.801077087402344, "loss": 0.27479249238967896, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 6.237304257495599, "step_time": 2.298485511779785} +{"epoch": 0, "iter": 3679, "iter_tflops": 7.780939169392705, "iter_time": 1.8424965362548829, "loss": 0.30584484338760376, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 9.530582289398927, "step_time": 1.504247383117676} +{"epoch": 0, "iter": 3680, "iter_tflops": 20.663703029427147, "iter_time": 0.6937940139770509, "loss": 0.31350332498550415, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 22.10579012883566, "step_time": 0.648533863067627} +{"epoch": 0, "iter": 3681, "iter_tflops": 17.860738984952164, "iter_time": 0.752505859375, "loss": 0.3098262846469879, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 19.378216786477452, "step_time": 0.6935783042907715} +{"epoch": 0, "iter": 3682, "iter_tflops": 20.33622851704192, "iter_time": 0.66090478515625, "loss": 0.33591586351394653, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 21.94956999567845, "step_time": 0.6123268356323243} +{"epoch": 0, "iter": 3683, "iter_tflops": 21.562125067425594, "iter_time": 0.6233295974731445, "loss": 0.1681978851556778, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 23.16659381617763, "step_time": 0.580159122467041} +{"epoch": 0, "iter": 3684, "iter_tflops": 21.00399570692399, "iter_time": 0.6398930435180664, "loss": 0.22660154104232788, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 22.448114442058312, "step_time": 0.5987278251647948} +{"epoch": 0, "iter": 3685, "iter_tflops": 22.653245857632097, "iter_time": 0.9107345428466797, "loss": 0.8150728940963745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.58504143260626, "step_time": 0.8391726150512696} +{"epoch": 0, "iter": 3686, "iter_tflops": 42.239920722345786, "iter_time": 0.4884264259338379, "loss": 0.7648400664329529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.778975234284246, "step_time": 0.44103346443176267} +{"epoch": 0, "iter": 3687, "iter_tflops": 51.00110796538777, "iter_time": 0.40452245712280277, "loss": 0.6728676557540894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.1311901300083, "step_time": 0.37421817779541017} +{"epoch": 0, "iter": 3688, "iter_tflops": 47.325694915547935, "iter_time": 0.43593852233886726, "loss": 0.7083042860031128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.189489759264134, "step_time": 0.4030337791442871} +{"epoch": 0, "iter": 3689, "iter_tflops": 21.626611720164874, "iter_time": 0.5445377731323241, "loss": 0.008569283410906792, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 23.360037026491664, "step_time": 0.5041304931640624} +{"epoch": 0, "iter": 3690, "iter_tflops": 12.512853148420385, "iter_time": 0.9411528167724609, "loss": 0.01464215200394392, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 15.442332476248767, "step_time": 0.7626119308471679} +{"epoch": 0, "iter": 3691, "iter_tflops": 30.46901709571534, "iter_time": 0.3865076103210449, "loss": 0.007223134394735098, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 33.50041502722829, "step_time": 0.35153316688537595} +{"epoch": 0, "iter": 3692, "iter_tflops": 35.131107741561394, "iter_time": 0.3352159309387207, "loss": 0.006223347503691912, "lr": 3e-05, "seqlen": 4736.0, "step_tflops": 38.730810126830036, "step_time": 0.30406043529510496} +{"epoch": 0, "iter": 3693, "iter_tflops": 20.080862171600415, "iter_time": 0.8218959884643554, "loss": 0.1616133451461792, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 21.096902744442314, "step_time": 0.7823129425048828} +{"epoch": 0, "iter": 3694, "iter_tflops": 20.422651246953937, "iter_time": 0.8081409149169921, "loss": 0.14885279536247253, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 23.303961586830276, "step_time": 0.7082220764160156} +{"epoch": 0, "iter": 3695, "iter_tflops": 40.07504785248045, "iter_time": 0.4118368148803711, "loss": 0.07477611303329468, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 43.693052107680934, "step_time": 0.3777346572875977} +{"epoch": 0, "iter": 3696, "iter_tflops": 46.23485898249329, "iter_time": 0.3569683227539062, "loss": 0.089789479970932, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 50.21945141896936, "step_time": 0.3286451683044434} +{"epoch": 0, "iter": 3697, "iter_tflops": 45.87190583609489, "iter_time": 0.44975444412231447, "loss": 0.2692808508872986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93685640962848, "step_time": 0.4131436176300049} +{"epoch": 0, "iter": 3698, "iter_tflops": 10.382756312601085, "iter_time": 1.9870536193847654, "loss": 0.2115621566772461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.238753759027587, "step_time": 1.835710075378418} +{"epoch": 0, "iter": 3699, "iter_tflops": 10.331001325200178, "iter_time": 1.9970081176757812, "loss": 0.25836366415023804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.2069931288768, "step_time": 1.4521787490844726} +{"epoch": 0, "iter": 3700, "iter_tflops": 22.541116773451126, "iter_time": 0.9152649230957032, "loss": 0.3135267198085785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.880778523830497, "step_time": 0.7399755172729493} +{"epoch": 0, "iter": 3701, "iter_tflops": 16.487648882272758, "iter_time": 0.8794168167114258, "loss": 0.3304603397846222, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 17.429498069136397, "step_time": 0.8318951950073242} +{"epoch": 0, "iter": 3702, "iter_tflops": 9.642216871873236, "iter_time": 1.5037533264160154, "loss": 0.18529918789863586, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 11.483409618408352, "step_time": 1.2626490020751953} +{"epoch": 0, "iter": 3703, "iter_tflops": 22.418803624693183, "iter_time": 0.6467568893432618, "loss": 0.2661636173725128, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.126004886028433, "step_time": 0.6009911613464355} +{"epoch": 0, "iter": 3704, "iter_tflops": 23.55766686131129, "iter_time": 0.6154903106689453, "loss": 0.3612963557243347, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.106451470207148, "step_time": 0.5775215072631836} +{"epoch": 0, "iter": 3705, "iter_tflops": 37.760534683509675, "iter_time": 0.5463665618896485, "loss": 0.044312212616205215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15920293102758, "step_time": 0.4893615646362305} +{"epoch": 0, "iter": 3706, "iter_tflops": 42.666281775881586, "iter_time": 0.4835456161499023, "loss": 0.07853139191865921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.24244113105194, "step_time": 0.43670676231384276} +{"epoch": 0, "iter": 3707, "iter_tflops": 46.99039880467881, "iter_time": 0.43904912567138676, "loss": 0.04640547186136246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96355540380414, "step_time": 0.39703005981445316} +{"epoch": 0, "iter": 3708, "iter_tflops": 48.53616478858697, "iter_time": 0.4250664138793946, "loss": 0.02300483174622059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37115479902931, "step_time": 0.38655887413024903} +{"epoch": 0, "iter": 3709, "iter_tflops": 21.771315147705604, "iter_time": 0.947627342224121, "loss": 0.8437384963035583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.07325925108609, "step_time": 0.8941560134887695} +{"epoch": 0, "iter": 3710, "iter_tflops": 12.81018008740064, "iter_time": 1.6105233001708985, "loss": 0.7097666263580322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.06545853247117, "step_time": 1.2841895217895507} +{"epoch": 0, "iter": 3711, "iter_tflops": 40.49325479932169, "iter_time": 0.509494571685791, "loss": 0.8092008829116821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.411985491287886, "step_time": 0.4752395744323731} +{"epoch": 0, "iter": 3712, "iter_tflops": 42.26867138193016, "iter_time": 0.4880942039489746, "loss": 0.8538194894790649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.415686127898624, "step_time": 0.4542724170684814} +{"epoch": 0, "iter": 3713, "iter_tflops": 45.972767537459966, "iter_time": 0.448767707824707, "loss": 0.24643607437610626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.44409513202886, "step_time": 0.4089892673492432} +{"epoch": 0, "iter": 3714, "iter_tflops": 45.32546152300462, "iter_time": 0.45517668914794923, "loss": 0.16639567911624908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.36770217734017, "step_time": 0.41790670013427733} +{"epoch": 0, "iter": 3715, "iter_tflops": 50.24164671903157, "iter_time": 0.41063728713989256, "loss": 0.29863637685775757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.62479555282479, "step_time": 0.37768733596801757} +{"epoch": 0, "iter": 3716, "iter_tflops": 45.928822308487064, "iter_time": 0.449197093963623, "loss": 0.27013224363327026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.80132942246275, "step_time": 0.4142679271697998} +{"epoch": 0, "iter": 3717, "iter_tflops": 25.179092908264625, "iter_time": 0.8193739776611328, "loss": 0.751717746257782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.527550860470242, "step_time": 0.7777232666015625} +{"epoch": 0, "iter": 3718, "iter_tflops": 13.178219177949776, "iter_time": 1.5655448760986328, "loss": 0.9072622656822205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.18966372002758, "step_time": 1.358232406616211} +{"epoch": 0, "iter": 3719, "iter_tflops": 37.38399873722487, "iter_time": 0.55186962890625, "loss": 0.9708771705627441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78793454737583, "step_time": 0.5058136367797851} +{"epoch": 0, "iter": 3720, "iter_tflops": 41.078440041496755, "iter_time": 0.5022365379333495, "loss": 0.8165402412414551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.627499785989045, "step_time": 0.4622955265045166} +{"epoch": 0, "iter": 3721, "iter_tflops": 34.44589141966888, "iter_time": 0.5989420700073242, "loss": 0.00918163824826479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57046323541048, "step_time": 0.5348935890197754} +{"epoch": 0, "iter": 3722, "iter_tflops": 48.809160901474236, "iter_time": 0.42268896102905273, "loss": 0.003143172711133957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.78222644906878, "step_time": 0.38360430335998535} +{"epoch": 0, "iter": 3723, "iter_tflops": 63.02848596995387, "iter_time": 0.3273296699523926, "loss": 0.005159354768693447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.3102815130914, "step_time": 0.29766281509399417} +{"epoch": 0, "iter": 3724, "iter_tflops": 58.52185263775872, "iter_time": 0.35253657531738286, "loss": 0.0014327785465866327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.35610562055884, "step_time": 0.3205770969390869} +{"epoch": 0, "iter": 3725, "iter_tflops": 30.11438695736016, "iter_time": 0.6850909347534179, "loss": 0.38352835178375244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.008822370386994, "step_time": 0.6445439720153808} +{"epoch": 0, "iter": 3726, "iter_tflops": 13.432518824523667, "iter_time": 1.5359065399169922, "loss": 0.4508519172668457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.90172169379774, "step_time": 1.2206504096984865} +{"epoch": 0, "iter": 3727, "iter_tflops": 48.35914112984888, "iter_time": 0.42662241363525394, "loss": 0.49436044692993164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.86680274302381, "step_time": 0.39024666595458984} +{"epoch": 0, "iter": 3728, "iter_tflops": 49.54331881753954, "iter_time": 0.4164253425598145, "loss": 0.45987144112586975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.92520527489962, "step_time": 0.3825872039794922} +{"epoch": 0, "iter": 3729, "iter_tflops": 33.560976779110426, "iter_time": 0.6147345962524414, "loss": 0.15444223582744598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.80207005458953, "step_time": 0.576254207611084} +{"epoch": 0, "iter": 3730, "iter_tflops": 14.687266263683439, "iter_time": 1.4046925506591796, "loss": 0.16188164055347443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.19474510948592, "step_time": 1.1998487548828125} +{"epoch": 0, "iter": 3731, "iter_tflops": 46.656623461296896, "iter_time": 0.44219002532958984, "loss": 0.17378845810890198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.21078515690411, "step_time": 0.4028661823272705} +{"epoch": 0, "iter": 3732, "iter_tflops": 50.61725019378064, "iter_time": 0.4075901679992675, "loss": 0.1358470767736435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.22021571799077, "step_time": 0.37361486625671386} +{"epoch": 0, "iter": 3733, "iter_tflops": 31.606193489190595, "iter_time": 0.652754768371582, "loss": 0.9072133302688599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.727000264475706, "step_time": 0.6117085227966308} +{"epoch": 0, "iter": 3734, "iter_tflops": 15.31474159878896, "iter_time": 1.3471395111083984, "loss": 0.9000842571258545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.369082298256778, "step_time": 1.1231423091888426} +{"epoch": 0, "iter": 3735, "iter_tflops": 42.05304360654967, "iter_time": 0.49059691619873047, "loss": 0.8909972906112671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.169559196290216, "step_time": 0.44685489463806155} +{"epoch": 0, "iter": 3736, "iter_tflops": 38.7674093453047, "iter_time": 0.5321762237548828, "loss": 0.7767359018325806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33925115781968, "step_time": 0.48728054809570315} +{"epoch": 0, "iter": 3737, "iter_tflops": 22.535679421251007, "iter_time": 0.9154857559204101, "loss": 0.897873044013977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.267595860277208, "step_time": 0.850149871826172} +{"epoch": 0, "iter": 3738, "iter_tflops": 37.73397934322051, "iter_time": 0.5467510681152344, "loss": 0.679074764251709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.269927020791755, "step_time": 0.49990622711181637} +{"epoch": 0, "iter": 3739, "iter_tflops": 44.85310158514555, "iter_time": 0.45997027587890627, "loss": 0.6298961639404297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.18013346087661, "step_time": 0.4282074794769287} +{"epoch": 0, "iter": 3740, "iter_tflops": 45.9769778618727, "iter_time": 0.44872661209106446, "loss": 0.7317951917648315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.79074638709322, "step_time": 0.41435597991943357} +{"epoch": 0, "iter": 3741, "iter_tflops": 21.629035927516455, "iter_time": 0.9538609848022461, "loss": 0.7737026810646057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.636221255990915, "step_time": 0.9114195022583008} +{"epoch": 0, "iter": 3742, "iter_tflops": 16.814233302161643, "iter_time": 1.2270017395019532, "loss": 0.8038478493690491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.289577545863793, "step_time": 1.016832088470459} +{"epoch": 0, "iter": 3743, "iter_tflops": 38.04498316870809, "iter_time": 0.5422815780639649, "loss": 1.0867998600006104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36168628156313, "step_time": 0.49879720497131347} +{"epoch": 0, "iter": 3744, "iter_tflops": 34.42934173066622, "iter_time": 0.5992299728393554, "loss": 0.9716874957084656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.97477292646946, "step_time": 0.5579775581359864} +{"epoch": 0, "iter": 3745, "iter_tflops": 20.127392880687506, "iter_time": 1.0250256271362304, "loss": 0.8008406162261963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.744723043218595, "step_time": 0.9487862167358398} +{"epoch": 0, "iter": 3746, "iter_tflops": 17.607460107161383, "iter_time": 1.1717245635986329, "loss": 0.7460957169532776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.41368077759088, "step_time": 0.9634538650512695} +{"epoch": 0, "iter": 3747, "iter_tflops": 35.06418058789968, "iter_time": 0.5883808822631836, "loss": 0.8364035487174988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19817518096103, "step_time": 0.540106782913208} +{"epoch": 0, "iter": 3748, "iter_tflops": 33.88310993714244, "iter_time": 0.6088901977539063, "loss": 0.7961873412132263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.72123703330723, "step_time": 0.5618300247192383} +{"epoch": 0, "iter": 3749, "iter_tflops": 24.608790859608295, "iter_time": 0.8383627471923828, "loss": 0.010347736068069935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.31323061028035, "step_time": 0.7840577926635743} +{"epoch": 0, "iter": 3750, "iter_tflops": 24.512517234683983, "iter_time": 0.8416554412841797, "loss": 0.02277899906039238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.952933150535557, "step_time": 0.7380654258728027} +{"epoch": 0, "iter": 3751, "iter_tflops": 51.65234371241479, "iter_time": 0.3994222145080566, "loss": 0.04517444968223572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.38450009452754, "step_time": 0.36590008735656737} +{"epoch": 0, "iter": 3752, "iter_tflops": 49.539821277291665, "iter_time": 0.4164547424316406, "loss": 0.041242558509111404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07475811662622, "step_time": 0.3815290946960449} +{"epoch": 0, "iter": 3753, "iter_tflops": 32.02942631014159, "iter_time": 0.6441293487548828, "loss": 0.6608558297157288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.09029804141798, "step_time": 0.6051895904541016} +{"epoch": 0, "iter": 3754, "iter_tflops": 13.196361425028119, "iter_time": 1.563392578125, "loss": 0.40706318616867065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.460014737797493, "step_time": 1.2534067459106446} +{"epoch": 0, "iter": 3755, "iter_tflops": 45.05432490575211, "iter_time": 0.4579159393310547, "loss": 0.5346338152885437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8643661368744, "step_time": 0.42221142196655276} +{"epoch": 0, "iter": 3756, "iter_tflops": 49.579858069512774, "iter_time": 0.4161184463500977, "loss": 0.6232755184173584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.618280225813606, "step_time": 0.3847772331237793} +{"epoch": 0, "iter": 3757, "iter_tflops": 27.418812161451992, "iter_time": 0.7524430084228516, "loss": 0.6814377903938293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.990868991755466, "step_time": 0.7116410865783691} +{"epoch": 0, "iter": 3758, "iter_tflops": 12.399448453916634, "iter_time": 1.6638718719482424, "loss": 0.7155857682228088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.997386573991024, "step_time": 1.4739246788024902} +{"epoch": 0, "iter": 3759, "iter_tflops": 34.7389905837629, "iter_time": 0.5938886871337891, "loss": 0.8414387106895447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62009894105839, "step_time": 0.5484061470031738} +{"epoch": 0, "iter": 3760, "iter_tflops": 35.9304700709453, "iter_time": 0.574194923400879, "loss": 0.8438544869422913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.90258351313471, "step_time": 0.5303270797729492} +{"epoch": 0, "iter": 3761, "iter_tflops": 26.59487176085402, "iter_time": 0.7757545776367187, "loss": 0.8585696816444397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.14461384027093, "step_time": 0.7078870086669922} +{"epoch": 0, "iter": 3762, "iter_tflops": 26.54049412890897, "iter_time": 0.7773439865112305, "loss": 0.7926762104034424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.969244217345132, "step_time": 0.6884088687896729} +{"epoch": 0, "iter": 3763, "iter_tflops": 45.88853657523599, "iter_time": 0.4495914459228516, "loss": 0.848056972026825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93626000435393, "step_time": 0.41314855194091804} +{"epoch": 0, "iter": 3764, "iter_tflops": 49.312296828922065, "iter_time": 0.41837624359130854, "loss": 0.6438644528388977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.993778805544075, "step_time": 0.3893116130828858} +{"epoch": 0, "iter": 3765, "iter_tflops": 32.029060598975036, "iter_time": 0.6441367034912109, "loss": 0.6325621604919434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.99563827313368, "step_time": 0.6068747215270995} +{"epoch": 0, "iter": 3766, "iter_tflops": 18.822593686966254, "iter_time": 1.096081329345703, "loss": 0.5602235794067383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.279995298913402, "step_time": 0.9259918251037598} +{"epoch": 0, "iter": 3767, "iter_tflops": 38.61638484683726, "iter_time": 0.5342575073242187, "loss": 0.8220417499542236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47082117090633, "step_time": 0.4857710056304932} +{"epoch": 0, "iter": 3768, "iter_tflops": 42.745590638683225, "iter_time": 0.48264846038818354, "loss": 0.5119158625602722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.606353677147375, "step_time": 0.44266697311401365} +{"epoch": 0, "iter": 3769, "iter_tflops": 37.9552228935178, "iter_time": 0.5391911163330078, "loss": 0.08153087645769119, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 41.928388899876566, "step_time": 0.48809695625305183} +{"epoch": 0, "iter": 3770, "iter_tflops": 40.42674106867594, "iter_time": 0.5062272758483887, "loss": 0.13106505572795868, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 44.67940535659405, "step_time": 0.4580436744689941} +{"epoch": 0, "iter": 3771, "iter_tflops": 40.70521955650852, "iter_time": 0.5027639999389648, "loss": 0.11754985898733139, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 44.634520441243666, "step_time": 0.45850428771972646} +{"epoch": 0, "iter": 3772, "iter_tflops": 38.536671233315595, "iter_time": 0.5310557022094726, "loss": 0.07948316633701324, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 42.47648552132558, "step_time": 0.4817987823486328} +{"epoch": 0, "iter": 3773, "iter_tflops": 28.12925429157999, "iter_time": 0.7334390487670899, "loss": 0.6067371964454651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.269800069366784, "step_time": 0.6815734977722168} +{"epoch": 0, "iter": 3774, "iter_tflops": 19.163740796401274, "iter_time": 1.076569221496582, "loss": 0.673505961894989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.379278471224858, "step_time": 0.8129109554290772} +{"epoch": 0, "iter": 3775, "iter_tflops": 47.32093709349138, "iter_time": 0.43598235321044926, "loss": 0.5584390759468079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40391754346152, "step_time": 0.4013525524139404} +{"epoch": 0, "iter": 3776, "iter_tflops": 51.14637161736625, "iter_time": 0.4033735504150391, "loss": 0.5835431814193726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.29632764491968, "step_time": 0.3731006088256836} +{"epoch": 0, "iter": 3777, "iter_tflops": 36.80907021119349, "iter_time": 0.51100248336792, "loss": 0.36077427864074707, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 39.77178029179671, "step_time": 0.47293649291992185} +{"epoch": 0, "iter": 3778, "iter_tflops": 12.865197346704786, "iter_time": 1.4620472412109373, "loss": 0.32375067472457886, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 15.077034591483443, "step_time": 1.2475613937377932} +{"epoch": 0, "iter": 3779, "iter_tflops": 33.23885416671617, "iter_time": 0.5658897323608398, "loss": 0.4078475832939148, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 36.77381086040624, "step_time": 0.5114924411773681} +{"epoch": 0, "iter": 3780, "iter_tflops": 37.29912250650101, "iter_time": 0.5042887077331543, "loss": 0.32069993019104004, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 40.58185811386978, "step_time": 0.46349593544006346} +{"epoch": 0, "iter": 3781, "iter_tflops": 30.909685867106866, "iter_time": 0.6674637069702148, "loss": 0.8268738985061646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.149614193557674, "step_time": 0.60413840675354} +{"epoch": 0, "iter": 3782, "iter_tflops": 37.1266653512223, "iter_time": 0.5556947631835937, "loss": 1.1413865089416504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53901232420237, "step_time": 0.508919490814209} +{"epoch": 0, "iter": 3783, "iter_tflops": 38.55619251770302, "iter_time": 0.535091567993164, "loss": 0.9656957983970642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79755589009733, "step_time": 0.49359569168090817} +{"epoch": 0, "iter": 3784, "iter_tflops": 34.00552537945494, "iter_time": 0.6066982727050781, "loss": 0.9539473056793213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.104321690925765, "step_time": 0.5560293941497804} +{"epoch": 0, "iter": 3785, "iter_tflops": 19.120636731325476, "iter_time": 1.0789961547851563, "loss": 0.11531499773263931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.351044462922, "step_time": 1.0137609176635742} +{"epoch": 0, "iter": 3786, "iter_tflops": 14.429660316069116, "iter_time": 1.4297698669433592, "loss": 0.10126317292451859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.650407411639414, "step_time": 0.8723356494903565} +{"epoch": 0, "iter": 3787, "iter_tflops": 50.702117277195, "iter_time": 0.40690792846679685, "loss": 0.0770777016878128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.417365692175366, "step_time": 0.3722857131958008} +{"epoch": 0, "iter": 3788, "iter_tflops": 52.414046368078594, "iter_time": 0.3936176452636719, "loss": 0.08794550597667694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.82967734635556, "step_time": 0.3630337963104248} +{"epoch": 0, "iter": 3789, "iter_tflops": 25.117862541104124, "iter_time": 0.8213713836669922, "loss": 0.7946954369544983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.39218577690473, "step_time": 0.7817121963500978} +{"epoch": 0, "iter": 3790, "iter_tflops": 13.77533869711504, "iter_time": 1.4976832122802732, "loss": 0.8041358590126038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.354518646550407, "step_time": 1.261491943359375} +{"epoch": 0, "iter": 3791, "iter_tflops": 37.15333365806146, "iter_time": 0.5552958908081055, "loss": 0.9153802990913391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.495789255051875, "step_time": 0.5094626846313476} +{"epoch": 0, "iter": 3792, "iter_tflops": 36.156126155141116, "iter_time": 0.5706112823486327, "loss": 0.7274318337440491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.65364338406836, "step_time": 0.520282419204712} +{"epoch": 0, "iter": 3793, "iter_tflops": 20.852383286860942, "iter_time": 0.9893877944946289, "loss": 0.7645583152770996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.227913678208456, "step_time": 0.928161491394043} +{"epoch": 0, "iter": 3794, "iter_tflops": 14.753132483776604, "iter_time": 1.3984212188720702, "loss": 0.8997319936752319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.022345995476094, "step_time": 1.0845714569091798} +{"epoch": 0, "iter": 3795, "iter_tflops": 36.380182571002855, "iter_time": 0.5670970306396484, "loss": 0.9244920611381531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.460523537770904, "step_time": 0.5228286819458008} +{"epoch": 0, "iter": 3796, "iter_tflops": 40.25274574245905, "iter_time": 0.5125387878417969, "loss": 0.9149919748306274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79360326548303, "step_time": 0.47109833335876466} +{"epoch": 0, "iter": 3797, "iter_tflops": 14.263086667407197, "iter_time": 1.356422088623047, "loss": 0.026931189000606537, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 15.387698158256455, "step_time": 1.257287841796875} +{"epoch": 0, "iter": 3798, "iter_tflops": 18.46600155148657, "iter_time": 1.047696533203125, "loss": 0.006026729941368103, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 24.38141951356842, "step_time": 0.793504487991333} +{"epoch": 0, "iter": 3799, "iter_tflops": 51.50825879198991, "iter_time": 0.3756051216125488, "loss": 0.011323053389787674, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 56.70224025275897, "step_time": 0.34119931983947754} +{"epoch": 0, "iter": 3800, "iter_tflops": 58.279411661331196, "iter_time": 0.33196570205688475, "loss": 0.0014027617871761322, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 63.720919270316465, "step_time": 0.3036171798706055} +{"epoch": 0, "iter": 3801, "iter_tflops": 24.430625806351667, "iter_time": 0.8444766693115234, "loss": 0.9530619978904724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.703701022224823, "step_time": 0.8026506958007813} +{"epoch": 0, "iter": 3802, "iter_tflops": 14.3561583416484, "iter_time": 1.4370901336669923, "loss": 0.8394431471824646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.788206859400983, "step_time": 1.1598186187744142} +{"epoch": 0, "iter": 3803, "iter_tflops": 42.468076662717586, "iter_time": 0.48580239868164066, "loss": 0.9557919502258301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54451574206429, "step_time": 0.45298743820190435} +{"epoch": 0, "iter": 3804, "iter_tflops": 44.75609045260309, "iter_time": 0.46096728515625, "loss": 0.9974439740180969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3497032617072, "step_time": 0.4267056903839111} +{"epoch": 0, "iter": 3805, "iter_tflops": 29.00271976308566, "iter_time": 0.7113503036499024, "loss": 0.36807528138160706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.821949896658445, "step_time": 0.669363670349121} +{"epoch": 0, "iter": 3806, "iter_tflops": 11.565706915435193, "iter_time": 1.7838160400390626, "loss": 0.3101655840873718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.75524681890925, "step_time": 1.3094744720458984} +{"epoch": 0, "iter": 3807, "iter_tflops": 15.18298998841408, "iter_time": 1.3588294219970702, "loss": 0.31709548830986023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.99858596048876, "step_time": 1.0859278450012209} +{"epoch": 0, "iter": 3808, "iter_tflops": 16.512988184267755, "iter_time": 1.2493858337402344, "loss": 0.2931184768676758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.031202435061292, "step_time": 1.0840667362213134} +{"epoch": 0, "iter": 3809, "iter_tflops": 24.31155810587793, "iter_time": 0.6031186218261719, "loss": 0.28062960505485535, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 26.13065347251802, "step_time": 0.5611322898864746} +{"epoch": 0, "iter": 3810, "iter_tflops": 25.89140449612498, "iter_time": 0.5663174209594727, "loss": 0.2878364622592926, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.794178282090456, "step_time": 0.5275476493835449} +{"epoch": 0, "iter": 3811, "iter_tflops": 26.082481613888117, "iter_time": 0.5621686477661133, "loss": 0.43680334091186523, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.849044504711337, "step_time": 0.5265083122253418} +{"epoch": 0, "iter": 3812, "iter_tflops": 26.424124210743635, "iter_time": 0.554900260925293, "loss": 0.3984259068965912, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.113211461401665, "step_time": 0.5215609550476074} +{"epoch": 0, "iter": 3813, "iter_tflops": 29.457987929225794, "iter_time": 0.7003565063476563, "loss": 0.28753769397735596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.203773336740262, "step_time": 0.6611730346679687} +{"epoch": 0, "iter": 3814, "iter_tflops": 16.33594080019783, "iter_time": 1.2629265594482422, "loss": 0.3329034149646759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.812618071141454, "step_time": 0.9912781486511231} +{"epoch": 0, "iter": 3815, "iter_tflops": 47.109246675133726, "iter_time": 0.43794148635864266, "loss": 0.23292583227157593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.23804988211952, "step_time": 0.40265180969238284} +{"epoch": 0, "iter": 3816, "iter_tflops": 48.72933705990446, "iter_time": 0.4233813705444336, "loss": 0.29027730226516724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76218282841182, "step_time": 0.39102046966552734} +{"epoch": 0, "iter": 3817, "iter_tflops": 26.742941504668533, "iter_time": 0.7714593963623047, "loss": 0.006523552350699902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.279237617687198, "step_time": 0.7295491409301759} +{"epoch": 0, "iter": 3818, "iter_tflops": 16.821403435828774, "iter_time": 1.226478729248047, "loss": 0.005856109783053398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.47176482045503, "step_time": 1.1168988838195801} +{"epoch": 0, "iter": 3819, "iter_tflops": 48.502999692544094, "iter_time": 0.4253570632934571, "loss": 0.005312090273946524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57065209282294, "step_time": 0.385119327545166} +{"epoch": 0, "iter": 3820, "iter_tflops": 53.3624745971904, "iter_time": 0.38662175369262697, "loss": 0.003118229331448674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.681726483295456, "step_time": 0.35157611656188964} +{"epoch": 0, "iter": 3821, "iter_tflops": 24.23160796542628, "iter_time": 0.851412483215332, "loss": 0.6674065589904785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.605130123871234, "step_time": 0.8057406234741211} +{"epoch": 0, "iter": 3822, "iter_tflops": 19.49700061988457, "iter_time": 1.0581675567626951, "loss": 0.8034911155700684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.56536397284633, "step_time": 0.9566772689819336} +{"epoch": 0, "iter": 3823, "iter_tflops": 46.53397012461424, "iter_time": 0.4433555412292481, "loss": 0.6187376379966736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.42684360915415, "step_time": 0.40912918663024905} +{"epoch": 0, "iter": 3824, "iter_tflops": 49.65604352702037, "iter_time": 0.4154800109863281, "loss": 0.8270795345306396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51264919694517, "step_time": 0.38553676223754885} +{"epoch": 0, "iter": 3825, "iter_tflops": 34.96544163166065, "iter_time": 0.5900424118041993, "loss": 0.08817911148071289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.60123886811361, "step_time": 0.5486812171936034} +{"epoch": 0, "iter": 3826, "iter_tflops": 19.12487128112636, "iter_time": 1.0787572479248049, "loss": 0.10954327881336212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.485826718201306, "step_time": 0.9602187423706053} +{"epoch": 0, "iter": 3827, "iter_tflops": 54.095843017666645, "iter_time": 0.38138038635253907, "loss": 0.09875431656837463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.297368375098955, "step_time": 0.3479259548187256} +{"epoch": 0, "iter": 3828, "iter_tflops": 53.462490297623596, "iter_time": 0.3858984756469726, "loss": 0.07154880464076996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.72725340126183, "step_time": 0.3573891410827636} +{"epoch": 0, "iter": 3829, "iter_tflops": 44.82352615272076, "iter_time": 0.4602737731933594, "loss": 0.043033335357904434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78637309069459, "step_time": 0.422886396408081} +{"epoch": 0, "iter": 3830, "iter_tflops": 13.780971149821317, "iter_time": 1.4970710906982423, "loss": 0.046367038041353226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.500064625173078, "step_time": 1.1789152755737304} +{"epoch": 0, "iter": 3831, "iter_tflops": 39.651775325485396, "iter_time": 0.5203069305419922, "loss": 0.03465452417731285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.30562973560366, "step_time": 0.465653995513916} +{"epoch": 0, "iter": 3832, "iter_tflops": 50.44493192799206, "iter_time": 0.40898248291015626, "loss": 0.03901555389165878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.05400770443351, "step_time": 0.3747428092956543} +{"epoch": 0, "iter": 3833, "iter_tflops": 21.280759180541445, "iter_time": 0.9694716873168945, "loss": 0.07333657145500183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.301140311963888, "step_time": 0.9251138381958007} +{"epoch": 0, "iter": 3834, "iter_tflops": 16.22144291418106, "iter_time": 1.2718408355712891, "loss": 0.04379238188266754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.083247778748117, "step_time": 1.0272787418365479} +{"epoch": 0, "iter": 3835, "iter_tflops": 51.031466555480876, "iter_time": 0.4042818069458008, "loss": 0.08185268193483353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.721755863644965, "step_time": 0.3702520351409912} +{"epoch": 0, "iter": 3836, "iter_tflops": 52.755507565632186, "iter_time": 0.3910699462890625, "loss": 0.12032071501016617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.354478810923844, "step_time": 0.359711986541748} +{"epoch": 0, "iter": 3837, "iter_tflops": 28.32088219986295, "iter_time": 0.7284763717651368, "loss": 0.5132426619529724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.009353497091876, "step_time": 0.68748876953125} +{"epoch": 0, "iter": 3838, "iter_tflops": 13.43630194528544, "iter_time": 1.535474090576172, "loss": 0.3261214792728424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.582605049257467, "step_time": 1.4147742080688475} +{"epoch": 0, "iter": 3839, "iter_tflops": 11.782999552375106, "iter_time": 1.7509203338623047, "loss": 0.5250340104103088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.575153379747237, "step_time": 1.5197687225341796} +{"epoch": 0, "iter": 3840, "iter_tflops": 18.84600611445642, "iter_time": 1.094719665527344, "loss": 0.4070582687854767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.429955463348723, "step_time": 0.962722183227539} +{"epoch": 0, "iter": 3841, "iter_tflops": 19.35822859217455, "iter_time": 0.8271596374511717, "loss": 0.4156254827976227, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 20.424521656488174, "step_time": 0.783976516723633} +{"epoch": 0, "iter": 3842, "iter_tflops": 16.50285335178539, "iter_time": 0.9702773818969727, "loss": 0.2597024142742157, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 19.692916359408297, "step_time": 0.8131017799377441} +{"epoch": 0, "iter": 3843, "iter_tflops": 27.588410974280944, "iter_time": 0.5804011459350586, "loss": 0.36437422037124634, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.518495597584785, "step_time": 0.5424512672424316} +{"epoch": 0, "iter": 3844, "iter_tflops": 29.813956007009093, "iter_time": 0.5370755004882812, "loss": 0.1467956304550171, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 31.71572155458978, "step_time": 0.5048709144592285} +{"epoch": 0, "iter": 3845, "iter_tflops": 41.044489206476875, "iter_time": 0.5026519737243653, "loss": 0.025822939351201057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83411358425168, "step_time": 0.46016508102416986} +{"epoch": 0, "iter": 3846, "iter_tflops": 10.801314946820487, "iter_time": 1.9100538787841799, "loss": 0.03447664901614189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.224769346153666, "step_time": 1.5600342788696289} +{"epoch": 0, "iter": 3847, "iter_tflops": 10.861805527658143, "iter_time": 1.8994165802001954, "loss": 0.05470116436481476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.28201750062255, "step_time": 1.553310218811035} +{"epoch": 0, "iter": 3848, "iter_tflops": 18.19115881215472, "iter_time": 1.1341275024414061, "loss": 0.09919802844524384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.917288771655983, "step_time": 0.9413159503936767} +{"epoch": 0, "iter": 3849, "iter_tflops": 21.144070986813126, "iter_time": 0.7359940490722656, "loss": 0.2877015173435211, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 23.049555368195993, "step_time": 0.6751501350402832} +{"epoch": 0, "iter": 3850, "iter_tflops": 21.971368351610213, "iter_time": 0.7082813491821289, "loss": 0.2815288305282593, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 23.63890370795761, "step_time": 0.658317771911621} +{"epoch": 0, "iter": 3851, "iter_tflops": 24.15679005139121, "iter_time": 0.6442043991088868, "loss": 0.16755688190460205, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 26.009928095165115, "step_time": 0.5983065528869629} +{"epoch": 0, "iter": 3852, "iter_tflops": 23.68292241559126, "iter_time": 0.6570941772460936, "loss": 0.4337080419063568, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 25.49276120282075, "step_time": 0.6104442863464354} +{"epoch": 0, "iter": 3853, "iter_tflops": 22.477990783730004, "iter_time": 0.9178353042602538, "loss": 0.8259142637252808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.072934722711157, "step_time": 0.8570244445800782} +{"epoch": 0, "iter": 3854, "iter_tflops": 18.6026867593722, "iter_time": 1.109038375854492, "loss": 0.8755587339401245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70575028351739, "step_time": 0.9086285743713379} +{"epoch": 0, "iter": 3855, "iter_tflops": 43.51908759349731, "iter_time": 0.47406999206542966, "loss": 0.9567086696624756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8373599497847, "step_time": 0.4404836978912353} +{"epoch": 0, "iter": 3856, "iter_tflops": 48.49265271938416, "iter_time": 0.4254478225708007, "loss": 0.9751830697059631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.1865360513087, "step_time": 0.39533364486694333} +{"epoch": 0, "iter": 3857, "iter_tflops": 29.129336851516925, "iter_time": 0.7082582626342774, "loss": 0.008195754140615463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.338361414653875, "step_time": 0.658333511352539} +{"epoch": 0, "iter": 3858, "iter_tflops": 14.900102157577868, "iter_time": 1.3846276550292969, "loss": 0.0075815217569470406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.886882916587766, "step_time": 1.153420280456543} +{"epoch": 0, "iter": 3859, "iter_tflops": 43.79646361332524, "iter_time": 0.4710675659179688, "loss": 0.0074128564447164536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89488938596953, "step_time": 0.4219478511810303} +{"epoch": 0, "iter": 3860, "iter_tflops": 44.344348267904984, "iter_time": 0.4652474174499512, "loss": 0.004119647201150656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27143976612664, "step_time": 0.41872317123413083} +{"epoch": 0, "iter": 3861, "iter_tflops": 23.87701453080597, "iter_time": 0.8640566635131836, "loss": 0.009359135292470455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.502604303565576, "step_time": 0.8089798698425293} +{"epoch": 0, "iter": 3862, "iter_tflops": 10.355689272377505, "iter_time": 1.992247253417969, "loss": 0.015567352995276451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.37963055619302, "step_time": 1.6665354766845706} +{"epoch": 0, "iter": 3863, "iter_tflops": 10.28222663609332, "iter_time": 2.006481109619141, "loss": 0.015374850481748581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.73545498850521, "step_time": 1.6199730224609374} +{"epoch": 0, "iter": 3864, "iter_tflops": 40.3954525838934, "iter_time": 0.5107281188964844, "loss": 0.0035127708688378334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.724640548982315, "step_time": 0.4512029676437378} +{"epoch": 0, "iter": 3865, "iter_tflops": 20.294128237816313, "iter_time": 0.7365973510742188, "loss": 0.16777534782886505, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.099649223406182, "step_time": 0.6764180259704591} +{"epoch": 0, "iter": 3866, "iter_tflops": 22.978332794631907, "iter_time": 0.6505520324707031, "loss": 0.25577816367149353, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.7345976978278, "step_time": 0.6043599853515624} +{"epoch": 0, "iter": 3867, "iter_tflops": 22.19256280712824, "iter_time": 0.6735860671997069, "loss": 0.39587071537971497, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.927200039397665, "step_time": 0.6247534637451173} +{"epoch": 0, "iter": 3868, "iter_tflops": 23.746355514259207, "iter_time": 0.6295113830566407, "loss": 0.32748809456825256, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.499623034564483, "step_time": 0.5862283172607423} +{"epoch": 0, "iter": 3869, "iter_tflops": 25.763346684152662, "iter_time": 0.8007924499511718, "loss": 0.7105395793914795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.81761918053831, "step_time": 0.7416556167602539} +{"epoch": 0, "iter": 3870, "iter_tflops": 9.407619854885665, "iter_time": 2.193019470214844, "loss": 0.725579559803009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.25089149683557, "step_time": 2.0126145629882815} +{"epoch": 0, "iter": 3871, "iter_tflops": 21.726114232672497, "iter_time": 0.9495988693237304, "loss": 0.6820395588874817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.28765023892771, "step_time": 0.8494479007720948} +{"epoch": 0, "iter": 3872, "iter_tflops": 48.469033708783115, "iter_time": 0.42565514373779295, "loss": 0.7637765407562256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.551581319139274, "step_time": 0.3925874919891358} +{"epoch": 0, "iter": 3873, "iter_tflops": 16.52020494982442, "iter_time": 0.9197099838256836, "loss": 0.2199375480413437, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 17.349554492370455, "step_time": 0.8757456817626953} +{"epoch": 0, "iter": 3874, "iter_tflops": 9.304875863891885, "iter_time": 1.6328855590820313, "loss": 0.2937582731246948, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 11.182192390811531, "step_time": 1.3587494201660155} +{"epoch": 0, "iter": 3875, "iter_tflops": 23.844331072963342, "iter_time": 0.6372079544067383, "loss": 0.24750091135501862, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.664171057297143, "step_time": 0.592023696899414} +{"epoch": 0, "iter": 3876, "iter_tflops": 24.5657146508521, "iter_time": 0.6184960479736329, "loss": 0.3085542917251587, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 26.2371887496725, "step_time": 0.5790939559936523} +{"epoch": 0, "iter": 3877, "iter_tflops": 18.517704026564413, "iter_time": 1.1141280517578125, "loss": 0.005080982577055693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.86747386200436, "step_time": 1.0384356689453125} +{"epoch": 0, "iter": 3878, "iter_tflops": 25.132981952869553, "iter_time": 0.8208772659301757, "loss": 0.008454517461359501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.076676386650426, "step_time": 0.6237353858947754} +{"epoch": 0, "iter": 3879, "iter_tflops": 39.269412917252176, "iter_time": 0.5253731079101562, "loss": 0.5929006338119507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94555654076413, "step_time": 0.4804011211395264} +{"epoch": 0, "iter": 3880, "iter_tflops": 43.52200272771657, "iter_time": 0.4740382385253906, "loss": 0.5538685321807861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.516938685819845, "step_time": 0.43418397903442385} +{"epoch": 0, "iter": 3881, "iter_tflops": 26.805737143473845, "iter_time": 0.7696521606445312, "loss": 0.15644659101963043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.990127278955477, "step_time": 0.6879295082092285} +{"epoch": 0, "iter": 3882, "iter_tflops": 35.96543058530071, "iter_time": 0.5736367721557618, "loss": 0.257769376039505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.359235797815906, "step_time": 0.5111864261627196} +{"epoch": 0, "iter": 3883, "iter_tflops": 46.1079396940402, "iter_time": 0.44745207977294926, "loss": 0.18988071382045746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0240167057017, "step_time": 0.41242376899719235} +{"epoch": 0, "iter": 3884, "iter_tflops": 49.19373021517711, "iter_time": 0.4193846130371094, "loss": 0.15992191433906555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.07210583977581, "step_time": 0.3887370433807373} +{"epoch": 0, "iter": 3885, "iter_tflops": 29.352675038232714, "iter_time": 0.7028692779541016, "loss": 0.6992751955986023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.31509984575376, "step_time": 0.6588225364685059} +{"epoch": 0, "iter": 3886, "iter_tflops": 46.86738169941683, "iter_time": 0.4402015380859375, "loss": 0.6564818024635315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8811976174395, "step_time": 0.4054757843017578} +{"epoch": 0, "iter": 3887, "iter_tflops": 49.54449294364202, "iter_time": 0.4164154739379883, "loss": 0.6115849614143372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.572337426320104, "step_time": 0.3851072120666504} +{"epoch": 0, "iter": 3888, "iter_tflops": 47.07038698807151, "iter_time": 0.438303035736084, "loss": 0.8048589825630188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.79365287456224, "step_time": 0.40617463684082034} +{"epoch": 0, "iter": 3889, "iter_tflops": 24.57184223979037, "iter_time": 0.39385432815551763, "loss": 0.011008962988853455, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 27.314323901164567, "step_time": 0.35430957221984866} +{"epoch": 0, "iter": 3890, "iter_tflops": 19.345050913486524, "iter_time": 0.5002688522338867, "loss": 0.004855496343225241, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 21.518493811668236, "step_time": 0.4497399539947509} +{"epoch": 0, "iter": 3891, "iter_tflops": 22.144855084621515, "iter_time": 0.4370191802978516, "loss": 0.013967642560601234, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 24.51396597753695, "step_time": 0.39478419876098636} +{"epoch": 0, "iter": 3892, "iter_tflops": 25.857390444876152, "iter_time": 0.3742731285095215, "loss": 0.004594530910253525, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 28.62688058726934, "step_time": 0.338064302444458} +{"epoch": 0, "iter": 3893, "iter_tflops": 19.001803396937966, "iter_time": 1.0857439727783202, "loss": 0.4441675841808319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.233166591558152, "step_time": 1.0196670608520508} +{"epoch": 0, "iter": 3894, "iter_tflops": 13.065114519210766, "iter_time": 1.5790977935791017, "loss": 0.30020591616630554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.578387661442243, "step_time": 1.1736624488830565} +{"epoch": 0, "iter": 3895, "iter_tflops": 37.380240283081136, "iter_time": 0.5519251174926758, "loss": 0.37817493081092834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50598212800288, "step_time": 0.5093344841003418} +{"epoch": 0, "iter": 3896, "iter_tflops": 45.54759193597892, "iter_time": 0.452956844329834, "loss": 0.3768007755279541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.736426224024136, "step_time": 0.41480852317810063} +{"epoch": 0, "iter": 3897, "iter_tflops": 20.063497008337134, "iter_time": 1.028290008544922, "loss": 0.778369665145874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.290961938825113, "step_time": 0.969007110595703} +{"epoch": 0, "iter": 3898, "iter_tflops": 19.376613872325436, "iter_time": 1.064741943359375, "loss": 0.713687539100647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.934579661749584, "step_time": 0.7955052204132081} +{"epoch": 0, "iter": 3899, "iter_tflops": 45.53156384826149, "iter_time": 0.4531162948608398, "loss": 0.847908616065979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.273545991811666, "step_time": 0.41870527267456065} +{"epoch": 0, "iter": 3900, "iter_tflops": 48.43210726239717, "iter_time": 0.4259796791076661, "loss": 0.6617474555969238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4856573221851, "step_time": 0.39308059692382813} +{"epoch": 0, "iter": 3901, "iter_tflops": 30.789188748518946, "iter_time": 0.6700759048461914, "loss": 0.90657639503479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.718618414984434, "step_time": 0.6305612678527833} +{"epoch": 0, "iter": 3902, "iter_tflops": 11.864073254184055, "iter_time": 1.7389553375244142, "loss": 0.7999879717826843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.16700121278464, "step_time": 1.360261875152588} +{"epoch": 0, "iter": 3903, "iter_tflops": 37.7987893182584, "iter_time": 0.5458136062622071, "loss": 0.9228689670562744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.327737480978385, "step_time": 0.4992069435119629} +{"epoch": 0, "iter": 3904, "iter_tflops": 35.23554804547083, "iter_time": 0.5855193023681641, "loss": 0.9091157913208008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14547753165163, "step_time": 0.5408529357910157} +{"epoch": 0, "iter": 3905, "iter_tflops": 18.955867823510502, "iter_time": 1.0883750457763672, "loss": 0.16457760334014893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.49317235881848, "step_time": 1.0067301025390625} +{"epoch": 0, "iter": 3906, "iter_tflops": 22.2086246153538, "iter_time": 0.9289676361083985, "loss": 0.17710810899734497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.024082367056497, "step_time": 0.8244495525360108} +{"epoch": 0, "iter": 3907, "iter_tflops": 43.72461063063997, "iter_time": 0.4718416748046875, "loss": 0.10213376581668854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.956682628026556, "step_time": 0.4302026824951172} +{"epoch": 0, "iter": 3908, "iter_tflops": 41.74504113855257, "iter_time": 0.49421662902832036, "loss": 0.10140664130449295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42093957226442, "step_time": 0.4542198753356934} +{"epoch": 0, "iter": 3909, "iter_tflops": 19.312941491567614, "iter_time": 1.0682522659301756, "loss": 0.046010177582502365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.518218730099495, "step_time": 1.0055011978149415} +{"epoch": 0, "iter": 3910, "iter_tflops": 12.360086291017824, "iter_time": 1.6691706695556643, "loss": 0.08133229613304138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.00425347593641, "step_time": 1.2891006469726565} +{"epoch": 0, "iter": 3911, "iter_tflops": 41.88363228272475, "iter_time": 0.4925812873840332, "loss": 0.028648577630519867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.280946461552766, "step_time": 0.44577942085266115} +{"epoch": 0, "iter": 3912, "iter_tflops": 49.63779000859461, "iter_time": 0.41563279724121094, "loss": 0.041020769625902176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.51141737402183, "step_time": 0.3784728870391846} +{"epoch": 0, "iter": 3913, "iter_tflops": 38.52880093327687, "iter_time": 0.5354719848632813, "loss": 0.22031140327453613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56951832451731, "step_time": 0.4846447486877441} +{"epoch": 0, "iter": 3914, "iter_tflops": 48.349921328022795, "iter_time": 0.42670376586914066, "loss": 0.1951177716255188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.627204529472806, "step_time": 0.3920233592987061} +{"epoch": 0, "iter": 3915, "iter_tflops": 52.88640674415437, "iter_time": 0.39010200881958007, "loss": 0.1470843106508255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.45301211501899, "step_time": 0.3590950717926025} +{"epoch": 0, "iter": 3916, "iter_tflops": 49.86368850450528, "iter_time": 0.41374984741210935, "loss": 0.15092431008815765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.21220749903281, "step_time": 0.380561767578125} +{"epoch": 0, "iter": 3917, "iter_tflops": 23.14117553526356, "iter_time": 0.8915317840576172, "loss": 0.8750644326210022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.229591806154744, "step_time": 0.8514833297729493} +{"epoch": 0, "iter": 3918, "iter_tflops": 10.913672582647735, "iter_time": 1.8903896331787111, "loss": 0.8841454982757568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.274559034581207, "step_time": 1.4453051376342774} +{"epoch": 0, "iter": 3919, "iter_tflops": 10.286773502494757, "iter_time": 2.005594223022461, "loss": 0.9703100323677063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.759051527772764, "step_time": 1.6169770507812502} +{"epoch": 0, "iter": 3920, "iter_tflops": 20.57442515269429, "iter_time": 1.0027543106079102, "loss": 0.722020149230957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.03817395547849, "step_time": 0.792340259552002} +{"epoch": 0, "iter": 3921, "iter_tflops": 23.856604042017675, "iter_time": 0.7073031845092773, "loss": 0.3489941358566284, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 25.957509792058104, "step_time": 0.650056655883789} +{"epoch": 0, "iter": 3922, "iter_tflops": 25.730045726565873, "iter_time": 0.6558034210205078, "loss": 0.25939083099365234, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.77368749956417, "step_time": 0.6075481338500978} +{"epoch": 0, "iter": 3923, "iter_tflops": 26.09976113855365, "iter_time": 0.6465136566162109, "loss": 0.3114241361618042, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 28.17179945351305, "step_time": 0.5989625205993653} +{"epoch": 0, "iter": 3924, "iter_tflops": 24.750369799376042, "iter_time": 0.6817616119384765, "loss": 0.3171108365058899, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.563768268461526, "step_time": 0.6352205696105957} +{"epoch": 0, "iter": 3925, "iter_tflops": 24.856077724747095, "iter_time": 0.830022087097168, "loss": 0.7938949465751648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.644538499153885, "step_time": 0.7743085327148437} +{"epoch": 0, "iter": 3926, "iter_tflops": 28.871782456557035, "iter_time": 0.7145763702392578, "loss": 0.9079506993293762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.71706086485598, "step_time": 0.5776257343292236} +{"epoch": 0, "iter": 3927, "iter_tflops": 45.154788361525256, "iter_time": 0.4568971366882324, "loss": 1.1503995656967163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94081037455327, "step_time": 0.42155193901062016} +{"epoch": 0, "iter": 3928, "iter_tflops": 46.49229854555979, "iter_time": 0.44375292587280274, "loss": 1.009973406791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.905026859201755, "step_time": 0.4134071216583252} +{"epoch": 0, "iter": 3929, "iter_tflops": 31.074313393772567, "iter_time": 0.6639275741577149, "loss": 0.4111665189266205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.09714798468122, "step_time": 0.6233495864868164} +{"epoch": 0, "iter": 3930, "iter_tflops": 8.378161681427112, "iter_time": 2.4624845275878906, "loss": 0.4446232318878174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.530765211747283, "step_time": 1.9591257705688476} +{"epoch": 0, "iter": 3931, "iter_tflops": 13.250511496871516, "iter_time": 1.5570035552978516, "loss": 0.47609180212020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.808834980686585, "step_time": 1.3050356674194337} +{"epoch": 0, "iter": 3932, "iter_tflops": 31.969641600580093, "iter_time": 0.6453339004516602, "loss": 0.4131816029548645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.99961758381655, "step_time": 0.5894662551879882} +{"epoch": 0, "iter": 3933, "iter_tflops": 16.30994082729392, "iter_time": 0.9415948333740235, "loss": 0.31978702545166016, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.321586707465507, "step_time": 0.8866021499633789} +{"epoch": 0, "iter": 3934, "iter_tflops": 10.925870657579868, "iter_time": 1.4055956268310548, "loss": 0.2778968811035156, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.9796709616687, "step_time": 1.2819514045715334} +{"epoch": 0, "iter": 3935, "iter_tflops": 21.880759407759022, "iter_time": 0.7018657684326172, "loss": 0.19968770444393158, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.670780461642348, "step_time": 0.6487895927429199} +{"epoch": 0, "iter": 3936, "iter_tflops": 21.827009818197702, "iter_time": 0.7035941314697265, "loss": 0.2779825031757355, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.48857747405389, "step_time": 0.6538223114013673} +{"epoch": 0, "iter": 3937, "iter_tflops": 17.61868037991253, "iter_time": 1.1709783630371093, "loss": 0.7664527297019958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.742740913721786, "step_time": 1.1007511444091795} +{"epoch": 0, "iter": 3938, "iter_tflops": 22.169789044877543, "iter_time": 0.9305949401855468, "loss": 0.8800654411315918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.296131638464793, "step_time": 0.7558248100280761} +{"epoch": 0, "iter": 3939, "iter_tflops": 46.292462493135844, "iter_time": 0.44566852569580084, "loss": 0.931464672088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20235152119019, "step_time": 0.41095870780944826} +{"epoch": 0, "iter": 3940, "iter_tflops": 44.13689780916653, "iter_time": 0.467434154510498, "loss": 0.9715531468391418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.265119335134784, "step_time": 0.43649722671508784} +{"epoch": 0, "iter": 3941, "iter_tflops": 23.69157974148515, "iter_time": 0.8708196640014648, "loss": 0.8858786821365356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.831976527533744, "step_time": 0.8308276824951172} +{"epoch": 0, "iter": 3942, "iter_tflops": 12.969798031814328, "iter_time": 1.5907027587890628, "loss": 0.8271015882492065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.222346708931692, "step_time": 1.27176997756958} +{"epoch": 0, "iter": 3943, "iter_tflops": 33.38000567552205, "iter_time": 0.6180674057006836, "loss": 1.1804912090301514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.875380015962534, "step_time": 0.5594815158843994} +{"epoch": 0, "iter": 3944, "iter_tflops": 37.27226899529323, "iter_time": 0.5535239486694336, "loss": 0.9640275239944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.35200730724799, "step_time": 0.511277997970581} +{"epoch": 0, "iter": 3945, "iter_tflops": 35.91391483671078, "iter_time": 0.5744596099853516, "loss": 0.3014565110206604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76010403070052, "step_time": 0.5188893241882324} +{"epoch": 0, "iter": 3946, "iter_tflops": 40.64590563220282, "iter_time": 0.5075811004638673, "loss": 0.20915864408016205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.29670489807661, "step_time": 0.4554656581878662} +{"epoch": 0, "iter": 3947, "iter_tflops": 40.44262950026051, "iter_time": 0.5101323471069337, "loss": 0.20085565745830536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95424966874062, "step_time": 0.46937653732299806} +{"epoch": 0, "iter": 3948, "iter_tflops": 40.20203142980446, "iter_time": 0.5131853485107422, "loss": 0.24866877496242523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.446912872893336, "step_time": 0.4748575248718261} +{"epoch": 0, "iter": 3949, "iter_tflops": 21.664029484052108, "iter_time": 0.9523202285766601, "loss": 0.01011254545301199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.202324871759508, "step_time": 0.8891821670532227} +{"epoch": 0, "iter": 3950, "iter_tflops": 22.878884378042642, "iter_time": 0.9017526016235352, "loss": 0.00805130135267973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.18356727599176, "step_time": 0.7320256271362304} +{"epoch": 0, "iter": 3951, "iter_tflops": 55.75734549993327, "iter_time": 0.37001570510864257, "loss": 0.002242947230115533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.405538282203906, "step_time": 0.33598098945617677} +{"epoch": 0, "iter": 3952, "iter_tflops": 60.70113028258457, "iter_time": 0.3398798904418945, "loss": 0.0068573905155062675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.16525906898919, "step_time": 0.31181157302856444} +{"epoch": 0, "iter": 3953, "iter_tflops": 51.52037416079437, "iter_time": 0.4004453353881836, "loss": 0.07924026250839233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.592295037866805, "step_time": 0.3645565795898437} +{"epoch": 0, "iter": 3954, "iter_tflops": 50.72903681908275, "iter_time": 0.4066920013427735, "loss": 0.02885635569691658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.63679682783518, "step_time": 0.3708174209594726} +{"epoch": 0, "iter": 3955, "iter_tflops": 51.55440936987575, "iter_time": 0.4001809692382813, "loss": 0.029168732464313507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.22214967217088, "step_time": 0.36695668220520017} +{"epoch": 0, "iter": 3956, "iter_tflops": 54.66915052808221, "iter_time": 0.3773809051513672, "loss": 0.07059437036514282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.655827442443886, "step_time": 0.34583534240722663} +{"epoch": 0, "iter": 3957, "iter_tflops": 28.187826529344505, "iter_time": 0.7319150161743164, "loss": 0.8187218904495239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.756636890115615, "step_time": 0.6933274612426757} +{"epoch": 0, "iter": 3958, "iter_tflops": 12.615313426337671, "iter_time": 1.635400787353516, "loss": 0.8037998080253601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.414697176000816, "step_time": 1.4312540359497068} +{"epoch": 0, "iter": 3959, "iter_tflops": 10.209735718684989, "iter_time": 2.020727478027344, "loss": 0.7190279364585876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.49004169799411, "step_time": 1.6518034133911135} +{"epoch": 0, "iter": 3960, "iter_tflops": 28.296372548453597, "iter_time": 0.7291073608398438, "loss": 0.6052294969558716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.132138863005764, "step_time": 0.5872427406311036} +{"epoch": 0, "iter": 3961, "iter_tflops": 24.661743814961557, "iter_time": 0.6708939056396483, "loss": 0.19210267066955566, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 26.290120849884296, "step_time": 0.6293395805358886} +{"epoch": 0, "iter": 3962, "iter_tflops": 16.559592981537914, "iter_time": 0.9991437377929686, "loss": 0.36968931555747986, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 19.707976109596885, "step_time": 0.8395288047790528} +{"epoch": 0, "iter": 3963, "iter_tflops": 25.683704307896573, "iter_time": 0.6441988830566406, "loss": 0.1830766350030899, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.64625829339301, "step_time": 0.5984684600830079} +{"epoch": 0, "iter": 3964, "iter_tflops": 25.749298871718654, "iter_time": 0.6425578308105468, "loss": 0.2730655372142792, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.68004319053623, "step_time": 0.5977379989624024} +{"epoch": 0, "iter": 3965, "iter_tflops": 26.74516143552278, "iter_time": 0.771395362854004, "loss": 0.24183711409568787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.11639906323479, "step_time": 0.7085729751586913} +{"epoch": 0, "iter": 3966, "iter_tflops": 47.99347001544044, "iter_time": 0.42987292861938475, "loss": 0.32571038603782654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.277571788617784, "step_time": 0.39464521408081055} +{"epoch": 0, "iter": 3967, "iter_tflops": 48.666523517285995, "iter_time": 0.42392782592773437, "loss": 0.2651657462120056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41241991679892, "step_time": 0.39362985992431637} +{"epoch": 0, "iter": 3968, "iter_tflops": 45.43999390631661, "iter_time": 0.4540294075012207, "loss": 0.16795605421066284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09459668712066, "step_time": 0.42023144912719723} +{"epoch": 0, "iter": 3969, "iter_tflops": 32.90528537705443, "iter_time": 0.6269841842651368, "loss": 0.16285240650177002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.195181340880566, "step_time": 0.5861908569335937} +{"epoch": 0, "iter": 3970, "iter_tflops": 15.652142565527319, "iter_time": 1.3181002807617188, "loss": 0.10840518027544022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.61026919272635, "step_time": 1.0010103855133057} +{"epoch": 0, "iter": 3971, "iter_tflops": 53.50667519547306, "iter_time": 0.3855798072814942, "loss": 0.1430206298828125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.31857956573151, "step_time": 0.35376536369323736} +{"epoch": 0, "iter": 3972, "iter_tflops": 54.18522894068024, "iter_time": 0.3807512474060058, "loss": 0.13264580070972443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.77956969362372, "step_time": 0.3509908905029297} +{"epoch": 0, "iter": 3973, "iter_tflops": 43.30304417607467, "iter_time": 0.4764351768493652, "loss": 0.05753743648529053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.18253957227866, "step_time": 0.43726119232177735} +{"epoch": 0, "iter": 3974, "iter_tflops": 37.906015818967845, "iter_time": 0.5442696380615234, "loss": 0.024602707475423813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07899553562215, "step_time": 0.49029434394836424} +{"epoch": 0, "iter": 3975, "iter_tflops": 45.62879118052617, "iter_time": 0.4521507797241211, "loss": 0.038142357021570206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.25613749119173, "step_time": 0.4105188846588135} +{"epoch": 0, "iter": 3976, "iter_tflops": 43.04951847325048, "iter_time": 0.479240982055664, "loss": 0.05834195017814636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.534687757380794, "step_time": 0.43402185821533207} +{"epoch": 0, "iter": 3977, "iter_tflops": 12.341811647720904, "iter_time": 1.6716422271728513, "loss": 0.15043216943740845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.974416750762385, "step_time": 1.590136489868164} +{"epoch": 0, "iter": 3978, "iter_tflops": 25.919397002227615, "iter_time": 0.7959711990356446, "loss": 0.13665513694286346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.95966968745333, "step_time": 0.6259496440887451} +{"epoch": 0, "iter": 3979, "iter_tflops": 43.86635492066043, "iter_time": 0.470317024230957, "loss": 0.13063201308250427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.988499701411044, "step_time": 0.42991745185852054} +{"epoch": 0, "iter": 3980, "iter_tflops": 45.12144253465901, "iter_time": 0.45723479461669925, "loss": 0.15783405303955078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11918188232669, "step_time": 0.42002111434936523} +{"epoch": 0, "iter": 3981, "iter_tflops": 28.901384628131602, "iter_time": 0.713844467163086, "loss": 0.2537890076637268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6858055335561, "step_time": 0.6723334503173828} +{"epoch": 0, "iter": 3982, "iter_tflops": 10.379027392674471, "iter_time": 1.9877675170898437, "loss": 0.3058715760707855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.457311800132297, "step_time": 1.6561433029174806} +{"epoch": 0, "iter": 3983, "iter_tflops": 13.622045013630693, "iter_time": 1.5145371704101562, "loss": 0.21045957505702972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.86101698856358, "step_time": 1.3007421607971192} +{"epoch": 0, "iter": 3984, "iter_tflops": 48.62295979629821, "iter_time": 0.4243076438903809, "loss": 0.16803213953971863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16260191624688, "step_time": 0.3880753154754639} +{"epoch": 0, "iter": 3985, "iter_tflops": 16.23375857323273, "iter_time": 0.915801887512207, "loss": 0.21242041885852814, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.05962048072947, "step_time": 0.8714676132202148} +{"epoch": 0, "iter": 3986, "iter_tflops": 12.23453482584879, "iter_time": 1.2151591339111327, "loss": 0.30316177010536194, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 14.793013399419836, "step_time": 1.0049951515197755} +{"epoch": 0, "iter": 3987, "iter_tflops": 22.205724038935884, "iter_time": 0.6695078582763673, "loss": 0.46132007241249084, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 23.853739928448704, "step_time": 0.6232526550292969} +{"epoch": 0, "iter": 3988, "iter_tflops": 22.438001552302428, "iter_time": 0.6625771331787109, "loss": 0.31505322456359863, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.058634207886655, "step_time": 0.6179447517395019} +{"epoch": 0, "iter": 3989, "iter_tflops": 26.635701937919205, "iter_time": 0.774565414428711, "loss": 0.721997857093811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.630430932382676, "step_time": 0.7206001739501953} +{"epoch": 0, "iter": 3990, "iter_tflops": 24.83170335159467, "iter_time": 0.8308368225097655, "loss": 0.8995248675346375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.67061580860464, "step_time": 0.6726664257049562} +{"epoch": 0, "iter": 3991, "iter_tflops": 45.66412591105955, "iter_time": 0.45180090713500975, "loss": 0.840460479259491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.937829777322534, "step_time": 0.41313556480407715} +{"epoch": 0, "iter": 3992, "iter_tflops": 35.600948953981295, "iter_time": 0.579509651184082, "loss": 0.6413218975067139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.788928921778506, "step_time": 0.5318809795379639} +{"epoch": 0, "iter": 3993, "iter_tflops": 19.887061713017708, "iter_time": 1.037412857055664, "loss": 0.8104466199874878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.40859503827287, "step_time": 0.9636827392578123} +{"epoch": 0, "iter": 3994, "iter_tflops": 12.143678557233125, "iter_time": 1.698916305541992, "loss": 0.9646405577659607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.204391700405122, "step_time": 1.356916732788086} +{"epoch": 0, "iter": 3995, "iter_tflops": 35.31223907659416, "iter_time": 0.584247673034668, "loss": 0.7682114243507385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17312976685323, "step_time": 0.5404611473083496} +{"epoch": 0, "iter": 3996, "iter_tflops": 38.44119863941489, "iter_time": 0.5366922531127929, "loss": 0.976185142993927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.450535671309794, "step_time": 0.4977280311584472} +{"epoch": 0, "iter": 3997, "iter_tflops": 18.89358905871623, "iter_time": 1.091962646484375, "loss": 0.9333253502845764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.383508036492263, "step_time": 1.0121463623046876} +{"epoch": 0, "iter": 3998, "iter_tflops": 16.30857565774069, "iter_time": 1.2650457000732422, "loss": 0.8042329549789429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.66856518642493, "step_time": 0.9101190719604494} +{"epoch": 0, "iter": 3999, "iter_tflops": 39.02843501423965, "iter_time": 0.5286169815063476, "loss": 0.6520770192146301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76358164464546, "step_time": 0.49399722671508794} +{"epoch": 0, "iter": 4000, "iter_tflops": 44.18336865935356, "iter_time": 0.4669425201416016, "loss": 0.8204001784324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.421120035532816, "step_time": 0.4350612869262695} +{"epoch": 0, "iter": 4001, "iter_tflops": 33.49639806103401, "iter_time": 0.6159197616577149, "loss": 0.1297139674425125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.67673493124848, "step_time": 0.5782786331176758} +{"epoch": 0, "iter": 4002, "iter_tflops": 13.638176546434309, "iter_time": 1.5127457427978517, "loss": 0.14672912657260895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.660428814605313, "step_time": 1.317402847290039} +{"epoch": 0, "iter": 4003, "iter_tflops": 41.30932406278987, "iter_time": 0.49942946243286135, "loss": 0.046325817704200745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53455280072122, "step_time": 0.4530865516662597} +{"epoch": 0, "iter": 4004, "iter_tflops": 46.311464057465585, "iter_time": 0.445485668182373, "loss": 0.09080910682678223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85960575165994, "step_time": 0.40564792442321784} +{"epoch": 0, "iter": 4005, "iter_tflops": 20.829188673773434, "iter_time": 0.9904895401000976, "loss": 0.4386875629425049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.3508645785836, "step_time": 0.9230557250976563} +{"epoch": 0, "iter": 4006, "iter_tflops": 16.753933523732897, "iter_time": 1.2314178924560548, "loss": 0.5064175128936768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.101069513861272, "step_time": 0.8930795822143555} +{"epoch": 0, "iter": 4007, "iter_tflops": 48.23355959880194, "iter_time": 0.4277331733703614, "loss": 0.5646878480911255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33063670870169, "step_time": 0.3942450313568115} +{"epoch": 0, "iter": 4008, "iter_tflops": 48.794503003994166, "iter_time": 0.42281593704223636, "loss": 0.7358881235122681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.699494268727385, "step_time": 0.39148560714721675} +{"epoch": 0, "iter": 4009, "iter_tflops": 3.1432683727935133, "iter_time": 0.6987245788574219, "loss": 0.2546464502811432, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 3.3290977634566064, "step_time": 0.6597219505310058} +{"epoch": 0, "iter": 4010, "iter_tflops": 1.3869838075650547, "iter_time": 1.5834927978515625, "loss": 0.20934614539146423, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 1.5946350459289638, "step_time": 1.3772924880981445} +{"epoch": 0, "iter": 4011, "iter_tflops": 1.1626633478288515, "iter_time": 1.8890067138671873, "loss": 0.2985460162162781, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 1.3559285357398276, "step_time": 1.6197600479125975} +{"epoch": 0, "iter": 4012, "iter_tflops": 2.816508555473431, "iter_time": 0.7797877502441407, "loss": 0.18682710826396942, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 3.510396431741381, "step_time": 0.6256498126983643} +{"epoch": 0, "iter": 4013, "iter_tflops": 15.844381651370949, "iter_time": 1.1272752380371096, "loss": 0.32186317443847656, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 16.978042396479665, "step_time": 1.052004623413086} +{"epoch": 0, "iter": 4014, "iter_tflops": 16.74301807380888, "iter_time": 1.0667717742919924, "loss": 0.26357436180114746, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 20.022096494614793, "step_time": 0.8920633811950683} +{"epoch": 0, "iter": 4015, "iter_tflops": 30.506806258925362, "iter_time": 0.5854752197265625, "loss": 0.38733789324760437, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 32.78328826622259, "step_time": 0.5448196334838867} +{"epoch": 0, "iter": 4016, "iter_tflops": 34.67130229856968, "iter_time": 0.5151516647338866, "loss": 0.27036967873573303, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 36.75793824842811, "step_time": 0.4859080772399902} +{"epoch": 0, "iter": 4017, "iter_tflops": 30.539066446405236, "iter_time": 0.6755639877319335, "loss": 0.062370505183935165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55457392414751, "step_time": 0.6337387046813965} +{"epoch": 0, "iter": 4018, "iter_tflops": 12.082693284834036, "iter_time": 1.7074912872314454, "loss": 0.08811452239751816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.983682775682233, "step_time": 1.3769040508270263} +{"epoch": 0, "iter": 4019, "iter_tflops": 49.578544555452964, "iter_time": 0.4161294708251953, "loss": 0.12054654210805893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27650941158143, "step_time": 0.3801109123229981} +{"epoch": 0, "iter": 4020, "iter_tflops": 53.1107134218353, "iter_time": 0.388454460144043, "loss": 0.17855128645896912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.945176934484365, "step_time": 0.356045051574707} +{"epoch": 0, "iter": 4021, "iter_tflops": 24.77848508285833, "iter_time": 0.8326212615966796, "loss": 0.09107465296983719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.042375069702803, "step_time": 0.7922124404907226} +{"epoch": 0, "iter": 4022, "iter_tflops": 12.090142030630846, "iter_time": 1.7064393005371095, "loss": 0.04263710603117943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.313914829123537, "step_time": 1.2646316795349122} +{"epoch": 0, "iter": 4023, "iter_tflops": 43.75658865629914, "iter_time": 0.47149684524536134, "loss": 0.06576704233884811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13001825832706, "step_time": 0.42865334892272944} +{"epoch": 0, "iter": 4024, "iter_tflops": 43.558250361989785, "iter_time": 0.4736437606811524, "loss": 0.12261547148227692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48634715600903, "step_time": 0.4344636878967285} +{"epoch": 0, "iter": 4025, "iter_tflops": 19.683036464190735, "iter_time": 1.0481661987304687, "loss": 0.2271570861339569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.82266065369797, "step_time": 0.9908000640869139} +{"epoch": 0, "iter": 4026, "iter_tflops": 7.541719292622876, "iter_time": 2.7355955200195314, "loss": 0.21411550045013428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.719100208392307, "step_time": 2.366195251464844} +{"epoch": 0, "iter": 4027, "iter_tflops": 12.500968091207977, "iter_time": 1.6503596649169923, "loss": 0.14829112589359283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.94229322295456, "step_time": 1.2177273311614991} +{"epoch": 0, "iter": 4028, "iter_tflops": 44.2550433867758, "iter_time": 0.4661862678527832, "loss": 0.23994548618793488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.671208835064945, "step_time": 0.4327789039611817} +{"epoch": 0, "iter": 4029, "iter_tflops": 21.348624544426595, "iter_time": 0.7538801345825195, "loss": 0.27419787645339966, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 22.5976978406472, "step_time": 0.7122098922729492} +{"epoch": 0, "iter": 4030, "iter_tflops": 10.98313059867655, "iter_time": 1.4653657989501954, "loss": 0.2412067949771881, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 13.134315430403792, "step_time": 1.2253629837036133} +{"epoch": 0, "iter": 4031, "iter_tflops": 24.766239346107593, "iter_time": 0.6498485183715821, "loss": 0.46028074622154236, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.70644060472139, "step_time": 0.6026375503540038} +{"epoch": 0, "iter": 4032, "iter_tflops": 25.63877862377501, "iter_time": 0.6277328643798828, "loss": 0.24848820269107819, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 27.599088842234462, "step_time": 0.5831462059020996} +{"epoch": 0, "iter": 4033, "iter_tflops": 23.845529546224647, "iter_time": 0.8651975402832031, "loss": 0.06675621122121811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.874199413819156, "step_time": 0.7973616180419922} +{"epoch": 0, "iter": 4034, "iter_tflops": 38.29760655650532, "iter_time": 0.5387045135498046, "loss": 0.10059282183647156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.4673298590442, "step_time": 0.4858109416961669} +{"epoch": 0, "iter": 4035, "iter_tflops": 42.9422965082021, "iter_time": 0.48043759155273436, "loss": 0.10567525029182434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.18282956301376, "step_time": 0.4372585048675537} +{"epoch": 0, "iter": 4036, "iter_tflops": 44.42265131107997, "iter_time": 0.464427333831787, "loss": 0.10085627436637878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61654080479683, "step_time": 0.4243636665344238} +{"epoch": 0, "iter": 4037, "iter_tflops": 16.980240580074152, "iter_time": 1.1198935852050782, "loss": 0.1836002618074417, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 17.827280536342784, "step_time": 1.066683303833008} +{"epoch": 0, "iter": 4038, "iter_tflops": 9.098514918538147, "iter_time": 2.0900182800292972, "loss": 0.32009410858154297, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 10.892546244688187, "step_time": 1.7457867126464843} +{"epoch": 0, "iter": 4039, "iter_tflops": 13.472232759004763, "iter_time": 1.4115004425048827, "loss": 0.26748406887054443, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 18.489161872049298, "step_time": 1.0284978103637696} +{"epoch": 0, "iter": 4040, "iter_tflops": 20.253858310774284, "iter_time": 0.9388859252929688, "loss": 0.2719394564628601, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 31.02444663683337, "step_time": 0.6129380073547364} +{"epoch": 0, "iter": 4041, "iter_tflops": 17.054070541512537, "iter_time": 0.8334685440063476, "loss": 0.1730075478553772, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 18.21571144037758, "step_time": 0.7803171119689942} +{"epoch": 0, "iter": 4042, "iter_tflops": 6.140149631034463, "iter_time": 2.314932403564453, "loss": 0.19690117239952087, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 7.071076410209925, "step_time": 2.0101651458740233} +{"epoch": 0, "iter": 4043, "iter_tflops": 8.289415857658566, "iter_time": 1.7147205047607423, "loss": 0.258256196975708, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 10.531080460105498, "step_time": 1.3497220344543457} +{"epoch": 0, "iter": 4044, "iter_tflops": 24.68787324626063, "iter_time": 0.5757495269775391, "loss": 0.23614934086799622, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 26.330782289418522, "step_time": 0.5398256378173828} +{"epoch": 0, "iter": 4045, "iter_tflops": 19.563497542330502, "iter_time": 0.716142677307129, "loss": 0.206821009516716, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 20.75396597514673, "step_time": 0.6750640106201172} +{"epoch": 0, "iter": 4046, "iter_tflops": 10.162428263051005, "iter_time": 1.3786326599121095, "loss": 0.2592790126800537, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 12.657955233010444, "step_time": 1.1068340225219728} +{"epoch": 0, "iter": 4047, "iter_tflops": 25.12238499931417, "iter_time": 0.5576801528930664, "loss": 0.39687955379486084, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 26.835183405705358, "step_time": 0.5220853271484376} +{"epoch": 0, "iter": 4048, "iter_tflops": 25.991193532588582, "iter_time": 0.5390385589599609, "loss": 0.2454107701778412, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 27.61157522682969, "step_time": 0.5074051513671876} +{"epoch": 0, "iter": 4049, "iter_tflops": 17.206182758490797, "iter_time": 0.7126887512207031, "loss": 0.0030739265494048595, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 18.361953875653576, "step_time": 0.6678294143676756} +{"epoch": 0, "iter": 4050, "iter_tflops": 9.393916547846178, "iter_time": 1.305382354736328, "loss": 0.007985345087945461, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 11.222263218569822, "step_time": 1.0927076530456543} +{"epoch": 0, "iter": 4051, "iter_tflops": 31.32021515920466, "iter_time": 0.3915251808166504, "loss": 0.0053475103341042995, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 34.53088558230201, "step_time": 0.35512129783630375} +{"epoch": 0, "iter": 4052, "iter_tflops": 32.353515159121855, "iter_time": 0.37902072906494144, "loss": 0.020147647708654404, "lr": 3e-05, "seqlen": 4928.0, "step_tflops": 35.6937257223905, "step_time": 0.3435520572662354} +{"epoch": 0, "iter": 4053, "iter_tflops": 46.39263032770414, "iter_time": 0.4447062683105468, "loss": 0.5796058773994446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.806420060248676, "step_time": 0.40607256889343263} +{"epoch": 0, "iter": 4054, "iter_tflops": 44.53210583925242, "iter_time": 0.46328582763671866, "loss": 0.6564128994941711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56593843816036, "step_time": 0.4248058242797851} +{"epoch": 0, "iter": 4055, "iter_tflops": 47.44927123164211, "iter_time": 0.4348031692504883, "loss": 0.6911560297012329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.166639744954864, "step_time": 0.40321376609802245} +{"epoch": 0, "iter": 4056, "iter_tflops": 46.42966666076478, "iter_time": 0.4443515319824219, "loss": 0.6553186774253845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.430694888163316, "step_time": 0.4090979423522949} +{"epoch": 0, "iter": 4057, "iter_tflops": 26.686536395867858, "iter_time": 0.7730899658203125, "loss": 0.14393632113933563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.257957150488895, "step_time": 0.7300985488891601} +{"epoch": 0, "iter": 4058, "iter_tflops": 19.501637695716965, "iter_time": 1.0579159469604493, "loss": 0.08775955438613892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.672966009685577, "step_time": 0.8036116085052489} +{"epoch": 0, "iter": 4059, "iter_tflops": 51.244534051957764, "iter_time": 0.4026008605957031, "loss": 0.11412525922060013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92238958399069, "step_time": 0.3689236755371094} +{"epoch": 0, "iter": 4060, "iter_tflops": 50.097281276190316, "iter_time": 0.41182062149047854, "loss": 0.1270882785320282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.61603897864983, "step_time": 0.37774789047241214} +{"epoch": 0, "iter": 4061, "iter_tflops": 5.4867424608574025, "iter_time": 0.6158378753662109, "loss": 0.08672817051410675, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 5.8956912005481525, "step_time": 0.5731208953857422} +{"epoch": 0, "iter": 4062, "iter_tflops": 6.264638547553588, "iter_time": 0.5393677215576171, "loss": 0.09434729814529419, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 6.828795552476861, "step_time": 0.49480816841125486} +{"epoch": 0, "iter": 4063, "iter_tflops": 6.817392996757898, "iter_time": 0.49563576889038086, "loss": 0.26244494318962097, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 7.457577029814868, "step_time": 0.45308869171142574} +{"epoch": 0, "iter": 4064, "iter_tflops": 6.911302558352456, "iter_time": 0.4889011573791504, "loss": 0.2885916531085968, "lr": 3e-05, "seqlen": 1376.0, "step_tflops": 7.503523940070703, "step_time": 0.45031425857543944} +{"epoch": 0, "iter": 4065, "iter_tflops": 21.5026213016735, "iter_time": 0.9594687652587891, "loss": 0.26720157265663147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.920464067420415, "step_time": 0.9001167449951172} +{"epoch": 0, "iter": 4066, "iter_tflops": 10.731587524086896, "iter_time": 1.9224642639160154, "loss": 0.2934545874595642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.27449693442046, "step_time": 1.680809700012207} +{"epoch": 0, "iter": 4067, "iter_tflops": 16.48378252862305, "iter_time": 1.2515994720458983, "loss": 0.2687164545059204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.83078918530739, "step_time": 0.9904134368896484} +{"epoch": 0, "iter": 4068, "iter_tflops": 39.04920489511969, "iter_time": 0.5283358154296875, "loss": 0.26171788573265076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.650976243295595, "step_time": 0.48371913909912106} +{"epoch": 0, "iter": 4069, "iter_tflops": 12.634184684994931, "iter_time": 1.1831868438720703, "loss": 0.33965760469436646, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.452688184183186, "step_time": 1.1111980667114258} +{"epoch": 0, "iter": 4070, "iter_tflops": 9.091888982058613, "iter_time": 1.6441688995361328, "loss": 0.34492602944374084, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 12.197147816854892, "step_time": 1.2255816955566408} +{"epoch": 0, "iter": 4071, "iter_tflops": 22.33358002757485, "iter_time": 0.6693329544067383, "loss": 0.3679288327693939, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.249526636079008, "step_time": 0.6164491920471191} +{"epoch": 0, "iter": 4072, "iter_tflops": 24.593504937977908, "iter_time": 0.6078271942138672, "loss": 0.18621087074279785, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.35623432158164, "step_time": 0.5671751480102539} +{"epoch": 0, "iter": 4073, "iter_tflops": 30.76949803581277, "iter_time": 0.6705047149658203, "loss": 0.6908479332923889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.61127386929694, "step_time": 0.6138146858215332} +{"epoch": 0, "iter": 4074, "iter_tflops": 42.708703219090076, "iter_time": 0.4830653228759766, "loss": 0.716625988483429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22301343160142, "step_time": 0.4463381328582764} +{"epoch": 0, "iter": 4075, "iter_tflops": 49.903272198644395, "iter_time": 0.4134216575622559, "loss": 0.7268979549407959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.0191678097733, "step_time": 0.38192172050476075} +{"epoch": 0, "iter": 4076, "iter_tflops": 50.2999725024029, "iter_time": 0.41016112899780277, "loss": 0.7010325789451599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.220070231153635, "step_time": 0.3805065803527832} +{"epoch": 0, "iter": 4077, "iter_tflops": 40.38257268338104, "iter_time": 0.5108910140991212, "loss": 0.8258751630783081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76003210156308, "step_time": 0.47145974349975583} +{"epoch": 0, "iter": 4078, "iter_tflops": 41.8477082050322, "iter_time": 0.49300414276123045, "loss": 0.8480455279350281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79704677633149, "step_time": 0.45048960494995116} +{"epoch": 0, "iter": 4079, "iter_tflops": 43.21287933764225, "iter_time": 0.4774292716979981, "loss": 0.7717933654785156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.896276692495235, "step_time": 0.43993030929565424} +{"epoch": 0, "iter": 4080, "iter_tflops": 47.298305202330646, "iter_time": 0.43619096755981446, "loss": 0.745313286781311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19166019727454, "step_time": 0.40301669120788575} +{"epoch": 0, "iter": 4081, "iter_tflops": 29.086850600745315, "iter_time": 0.7092927932739257, "loss": 0.7496179342269897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.811970009516006, "step_time": 0.6695804748535156} +{"epoch": 0, "iter": 4082, "iter_tflops": 16.44573660578283, "iter_time": 1.2544949493408204, "loss": 0.7796241044998169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.796451629378137, "step_time": 0.866981927871704} +{"epoch": 0, "iter": 4083, "iter_tflops": 39.27622876563358, "iter_time": 0.5252819366455077, "loss": 0.7814927697181702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.885822699123416, "step_time": 0.48107025146484383} +{"epoch": 0, "iter": 4084, "iter_tflops": 42.12662818176759, "iter_time": 0.4897399673461914, "loss": 0.6576001644134521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.971908605619795, "step_time": 0.4487760925292969} +{"epoch": 0, "iter": 4085, "iter_tflops": 19.64572445316164, "iter_time": 1.0501569213867188, "loss": 0.21076107025146484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.903038416614752, "step_time": 0.9869901733398438} +{"epoch": 0, "iter": 4086, "iter_tflops": 15.102048947211026, "iter_time": 1.3661122131347654, "loss": 0.2551300823688507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.456754307545722, "step_time": 1.1818401718139648} +{"epoch": 0, "iter": 4087, "iter_tflops": 47.016087409141214, "iter_time": 0.43880923843383784, "loss": 0.3247855305671692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.440058807557016, "step_time": 0.4010705661773682} +{"epoch": 0, "iter": 4088, "iter_tflops": 47.657186874842374, "iter_time": 0.4329062385559082, "loss": 0.21495988965034485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22642414124068, "step_time": 0.4027431907653809} +{"epoch": 0, "iter": 4089, "iter_tflops": 26.065843685330417, "iter_time": 0.7295394973754883, "loss": 0.06423097848892212, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 27.560243151872502, "step_time": 0.6899816665649413} +{"epoch": 0, "iter": 4090, "iter_tflops": 14.623853074951581, "iter_time": 1.3003455657958982, "loss": 0.0918693020939827, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 17.628396217052444, "step_time": 1.0787176704406738} +{"epoch": 0, "iter": 4091, "iter_tflops": 43.21406927330559, "iter_time": 0.4400433197021484, "loss": 0.15123985707759857, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 47.91878040551461, "step_time": 0.39683945083618166} +{"epoch": 0, "iter": 4092, "iter_tflops": 53.5242341988113, "iter_time": 0.3552794876098633, "loss": 0.12315327674150467, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 58.21904187269211, "step_time": 0.32662960243225103} +{"epoch": 0, "iter": 4093, "iter_tflops": 20.319833356498105, "iter_time": 1.0153180465698242, "loss": 0.6736992001533508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.230077556455917, "step_time": 0.9717860641479492} +{"epoch": 0, "iter": 4094, "iter_tflops": 19.505254652206993, "iter_time": 1.0577197723388672, "loss": 0.5920827984809875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.96587218261332, "step_time": 0.712255214691162} +{"epoch": 0, "iter": 4095, "iter_tflops": 47.034272467667364, "iter_time": 0.43863957977294926, "loss": 0.7945175170898438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.803724276141935, "step_time": 0.4060941162109375} +{"epoch": 0, "iter": 4096, "iter_tflops": 45.959942780196926, "iter_time": 0.44889293289184573, "loss": 0.5779693722724915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.453626436803376, "step_time": 0.4171805992126465} +{"epoch": 0, "iter": 4097, "iter_tflops": 16.033781210858592, "iter_time": 0.8382496032714843, "loss": 0.024654235690832138, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 16.822220270138825, "step_time": 0.7989617614746094} +{"epoch": 0, "iter": 4098, "iter_tflops": 11.92594371980426, "iter_time": 1.1269808959960939, "loss": 0.032302942126989365, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 14.438204484732694, "step_time": 0.9308851909637451} +{"epoch": 0, "iter": 4099, "iter_tflops": 35.957859207884326, "iter_time": 0.3737795028686523, "loss": 0.05024484917521477, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 39.473870279171976, "step_time": 0.34048626708984375} +{"epoch": 0, "iter": 4100, "iter_tflops": 38.02679244120555, "iter_time": 0.35344318771362304, "loss": 0.029627202078700066, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 41.41265822184154, "step_time": 0.32454595565795896} +{"epoch": 0, "iter": 4101, "iter_tflops": 46.31572237199512, "iter_time": 0.445444709777832, "loss": 0.019552793353796005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.88720466339928, "step_time": 0.4054279193878174} +{"epoch": 0, "iter": 4102, "iter_tflops": 13.278699904190747, "iter_time": 1.5536983032226561, "loss": 0.002772621577605605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.875446632908933, "step_time": 1.2995598793029786} +{"epoch": 0, "iter": 4103, "iter_tflops": 53.56111837097072, "iter_time": 0.38518787765502926, "loss": 0.002740205265581608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.95943218207125, "step_time": 0.3499201526641846} +{"epoch": 0, "iter": 4104, "iter_tflops": 52.48150489339666, "iter_time": 0.39311169815063474, "loss": 0.009388069622218609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.43568707179482, "step_time": 0.35920339012145996} +{"epoch": 0, "iter": 4105, "iter_tflops": 21.837184344873453, "iter_time": 0.6770667419433594, "loss": 0.2640221416950226, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.228038405705725, "step_time": 0.6365251770019531} +{"epoch": 0, "iter": 4106, "iter_tflops": 24.412974645579844, "iter_time": 0.6056300582885741, "loss": 0.4283548593521118, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 26.855580920255697, "step_time": 0.5505459480285645} +{"epoch": 0, "iter": 4107, "iter_tflops": 26.895137867152883, "iter_time": 0.549736213684082, "loss": 0.2641897201538086, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.69012280255603, "step_time": 0.5153422088623048} +{"epoch": 0, "iter": 4108, "iter_tflops": 27.839408310363513, "iter_time": 0.5310899963378907, "loss": 0.23543797433376312, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 29.547647804994124, "step_time": 0.5003860664367676} +{"epoch": 0, "iter": 4109, "iter_tflops": 28.959256391372385, "iter_time": 0.7124179306030274, "loss": 0.16728876531124115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.672028647191343, "step_time": 0.672635440826416} +{"epoch": 0, "iter": 4110, "iter_tflops": 10.225964785135764, "iter_time": 2.017520492553711, "loss": 0.1451500505208969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.68929510602645, "step_time": 1.7649561691284181} +{"epoch": 0, "iter": 4111, "iter_tflops": 14.59935085378081, "iter_time": 1.4131514282226563, "loss": 0.15055157244205475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.44878395390828, "step_time": 1.1182901573181152} +{"epoch": 0, "iter": 4112, "iter_tflops": 39.36138916475399, "iter_time": 0.5241454620361328, "loss": 0.1301169991493225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1107321749462, "step_time": 0.4785604991912842} +{"epoch": 0, "iter": 4113, "iter_tflops": 21.622995858023874, "iter_time": 0.8355443801879883, "loss": 0.27492186427116394, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 23.100559397083998, "step_time": 0.782101089477539} +{"epoch": 0, "iter": 4114, "iter_tflops": 16.106502445964946, "iter_time": 1.1217191772460937, "loss": 0.41487014293670654, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 19.01936522348291, "step_time": 0.9499251136779785} +{"epoch": 0, "iter": 4115, "iter_tflops": 32.4922665830655, "iter_time": 0.556039161682129, "loss": 0.35259485244750977, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 34.65055947177673, "step_time": 0.5214049339294434} +{"epoch": 0, "iter": 4116, "iter_tflops": 33.222992992039565, "iter_time": 0.5438093032836914, "loss": 0.24337902665138245, "lr": 3e-05, "seqlen": 7200.0, "step_tflops": 35.259758253757795, "step_time": 0.5123963851928711} +{"epoch": 0, "iter": 4117, "iter_tflops": 38.775965213025586, "iter_time": 0.5320587997436523, "loss": 0.32463380694389343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.92111666218845, "step_time": 0.49214083862304686} +{"epoch": 0, "iter": 4118, "iter_tflops": 44.8191626569822, "iter_time": 0.46031858444213863, "loss": 0.4416767656803131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.423390507050144, "step_time": 0.42605636024475096} +{"epoch": 0, "iter": 4119, "iter_tflops": 46.36054856537259, "iter_time": 0.44501400756835935, "loss": 0.43039339780807495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.09238740352632, "step_time": 0.4118608551025391} +{"epoch": 0, "iter": 4120, "iter_tflops": 47.80577667902515, "iter_time": 0.43156068038940426, "loss": 0.5843280553817749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.061204819774055, "step_time": 0.39628536415100096} +{"epoch": 0, "iter": 4121, "iter_tflops": 15.356796246463304, "iter_time": 0.9335510635375976, "loss": 0.007261800579726696, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 16.054359546766474, "step_time": 0.8929881896972656} +{"epoch": 0, "iter": 4122, "iter_tflops": 9.6495087610422, "iter_time": 1.4857081146240234, "loss": 0.006006553303450346, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 15.628971459302928, "step_time": 0.9172934703826905} +{"epoch": 0, "iter": 4123, "iter_tflops": 41.74493867662011, "iter_time": 0.3434273452758789, "loss": 0.006903046742081642, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 45.97371124250611, "step_time": 0.3118380718231201} +{"epoch": 0, "iter": 4124, "iter_tflops": 42.14480666324149, "iter_time": 0.34016892242431634, "loss": 0.01178768277168274, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 46.515119769395945, "step_time": 0.3082084608078003} +{"epoch": 0, "iter": 4125, "iter_tflops": 47.96463894890519, "iter_time": 0.43013132095336915, "loss": 0.02658458612859249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.662970884891465, "step_time": 0.39175711441040034} +{"epoch": 0, "iter": 4126, "iter_tflops": 38.56224388859376, "iter_time": 0.5350075988769531, "loss": 0.02040954679250717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29337536816173, "step_time": 0.4765415802001954} +{"epoch": 0, "iter": 4127, "iter_tflops": 42.69356979280688, "iter_time": 0.48323655319213865, "loss": 0.026006370782852173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.114433403396035, "step_time": 0.437893274307251} +{"epoch": 0, "iter": 4128, "iter_tflops": 42.589874162758555, "iter_time": 0.4844131126403809, "loss": 0.060421887785196304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83798847116268, "step_time": 0.44047778701782225} +{"epoch": 0, "iter": 4129, "iter_tflops": 21.381743064722652, "iter_time": 0.9648929672241209, "loss": 0.6901048421859741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.431187213825524, "step_time": 0.9197504043579101} +{"epoch": 0, "iter": 4130, "iter_tflops": 15.188110836606835, "iter_time": 1.3583712768554688, "loss": 0.7482144832611084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.238450461544033, "step_time": 1.1311867504119875} +{"epoch": 0, "iter": 4131, "iter_tflops": 47.64799516502715, "iter_time": 0.43298974990844724, "loss": 0.8643802404403687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52767889011006, "step_time": 0.4003885669708252} +{"epoch": 0, "iter": 4132, "iter_tflops": 46.70436231162206, "iter_time": 0.4417380409240723, "loss": 1.0328021049499512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.654640277889506, "step_time": 0.40728931045532224} +{"epoch": 0, "iter": 4133, "iter_tflops": 30.955540408193027, "iter_time": 0.6664749908447265, "loss": 0.30325791239738464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.92027314166051, "step_time": 0.6266987342834474} +{"epoch": 0, "iter": 4134, "iter_tflops": 15.921303639451745, "iter_time": 1.295816848754883, "loss": 0.27373841404914856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.065520067127075, "step_time": 1.142014923095703} +{"epoch": 0, "iter": 4135, "iter_tflops": 39.52072545390478, "iter_time": 0.5220322570800782, "loss": 0.29704585671424866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2043037359056, "step_time": 0.4775240364074707} +{"epoch": 0, "iter": 4136, "iter_tflops": 39.205482120719225, "iter_time": 0.5262298126220704, "loss": 0.2583394944667816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.436473947997385, "step_time": 0.4861641788482667} +{"epoch": 0, "iter": 4137, "iter_tflops": 14.041337705383121, "iter_time": 1.4693111114501953, "loss": 0.3369268774986267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.884639958284916, "step_time": 1.3860660095214845} +{"epoch": 0, "iter": 4138, "iter_tflops": 15.566815861935392, "iter_time": 1.3253252105712892, "loss": 0.2928760349750519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.21797498826695, "step_time": 1.0204332294464111} +{"epoch": 0, "iter": 4139, "iter_tflops": 41.42147381104611, "iter_time": 0.4980772438049317, "loss": 0.24150450527668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14286217708594, "step_time": 0.45701784324646} +{"epoch": 0, "iter": 4140, "iter_tflops": 39.36610158218851, "iter_time": 0.5240827178955079, "loss": 0.33906853199005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09865767770671, "step_time": 0.4786945724487305} +{"epoch": 0, "iter": 4141, "iter_tflops": 19.75498338838978, "iter_time": 1.0443488159179688, "loss": 0.0366608090698719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.848432437615696, "step_time": 0.9895752868652343} +{"epoch": 0, "iter": 4142, "iter_tflops": 26.47267195800173, "iter_time": 0.7793355178833007, "loss": 0.03388943895697594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.18840559490662, "step_time": 0.6216355724334717} +{"epoch": 0, "iter": 4143, "iter_tflops": 47.76634162721854, "iter_time": 0.43191696929931644, "loss": 0.05044754967093468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.496365857961486, "step_time": 0.3930004138946533} +{"epoch": 0, "iter": 4144, "iter_tflops": 41.16996356293852, "iter_time": 0.5011200332641601, "loss": 0.041969750076532364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54685372661569, "step_time": 0.45296418571472163} +{"epoch": 0, "iter": 4145, "iter_tflops": 14.435486571336554, "iter_time": 1.4176951293945315, "loss": 0.08511879295110703, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 15.329905727177733, "step_time": 1.3349800949096682} +{"epoch": 0, "iter": 4146, "iter_tflops": 22.985603581648707, "iter_time": 0.8903450775146483, "loss": 0.2173158973455429, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 27.60997692587416, "step_time": 0.7412218799591065} +{"epoch": 0, "iter": 4147, "iter_tflops": 47.61633769219495, "iter_time": 0.4297919578552246, "loss": 0.17418105900287628, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 51.952531717780666, "step_time": 0.39391957092285157} +{"epoch": 0, "iter": 4148, "iter_tflops": 52.929001899966195, "iter_time": 0.38665227508544925, "loss": 0.12192776799201965, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 57.323886038581, "step_time": 0.3570085773468018} +{"epoch": 0, "iter": 4149, "iter_tflops": 28.887846106740774, "iter_time": 0.7141790161132813, "loss": 0.2017083764076233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.569810074964845, "step_time": 0.6748845825195312} +{"epoch": 0, "iter": 4150, "iter_tflops": 15.22483920389436, "iter_time": 1.3550943450927733, "loss": 0.26228228211402893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.660221272470938, "step_time": 1.1682239532470704} +{"epoch": 0, "iter": 4151, "iter_tflops": 46.386768370348264, "iter_time": 0.4447624664306641, "loss": 0.2581179738044739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77237974633814, "step_time": 0.406344820022583} +{"epoch": 0, "iter": 4152, "iter_tflops": 49.17519209067082, "iter_time": 0.4195427131652832, "loss": 0.23070551455020905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26488373873856, "step_time": 0.387330114364624} +{"epoch": 0, "iter": 4153, "iter_tflops": 46.87433021547069, "iter_time": 0.4401362838745117, "loss": 0.15176281332969666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52775032042836, "step_time": 0.40038801193237306} +{"epoch": 0, "iter": 4154, "iter_tflops": 45.98480377402882, "iter_time": 0.44865024566650397, "loss": 0.142830953001976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24844485867528, "step_time": 0.41058173179626467} +{"epoch": 0, "iter": 4155, "iter_tflops": 49.501547634587645, "iter_time": 0.4167767372131348, "loss": 0.15875552594661713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.70347516484341, "step_time": 0.3841668243408204} +{"epoch": 0, "iter": 4156, "iter_tflops": 50.95000523717221, "iter_time": 0.40492819213867187, "loss": 0.1160956397652626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.17269434592019, "step_time": 0.3739366683959961} +{"epoch": 0, "iter": 4157, "iter_tflops": 46.65785071200068, "iter_time": 0.44217839431762695, "loss": 0.1864149123430252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42149780153377, "step_time": 0.40121533584594726} +{"epoch": 0, "iter": 4158, "iter_tflops": 43.1595750882995, "iter_time": 0.47801892089843745, "loss": 0.35290756821632385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82725298884783, "step_time": 0.4225323410034179} +{"epoch": 0, "iter": 4159, "iter_tflops": 49.827272922887495, "iter_time": 0.4140522308349609, "loss": 0.2327774167060852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.169673531016436, "step_time": 0.38086058425903324} +{"epoch": 0, "iter": 4160, "iter_tflops": 47.26938338604365, "iter_time": 0.4364578514099121, "loss": 0.19686806201934814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.049978546421784, "step_time": 0.40413520431518557} +{"epoch": 0, "iter": 4161, "iter_tflops": 26.602302531861277, "iter_time": 0.7755378875732423, "loss": 0.13937315344810486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.301220434852407, "step_time": 0.7289824676513672} +{"epoch": 0, "iter": 4162, "iter_tflops": 15.918711774737988, "iter_time": 1.2960278320312502, "loss": 0.1273091435432434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.833432125707088, "step_time": 1.0954505462646484} +{"epoch": 0, "iter": 4163, "iter_tflops": 46.84833361016398, "iter_time": 0.4403805198669434, "loss": 0.15661677718162537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.386693513655246, "step_time": 0.40148707962036134} +{"epoch": 0, "iter": 4164, "iter_tflops": 51.72445533173186, "iter_time": 0.39886536026000974, "loss": 0.09575841575860977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.00348627259057, "step_time": 0.3683894500732422} +{"epoch": 0, "iter": 4165, "iter_tflops": 32.775647791469076, "iter_time": 0.629464096069336, "loss": 0.6749213933944702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.85650552936678, "step_time": 0.5918864555358887} +{"epoch": 0, "iter": 4166, "iter_tflops": 18.053767993250073, "iter_time": 1.142758316040039, "loss": 0.613057017326355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.647916228838202, "step_time": 0.9530290718078612} +{"epoch": 0, "iter": 4167, "iter_tflops": 47.584079729935986, "iter_time": 0.43357134628295907, "loss": 0.5551071166992188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9926556716202, "step_time": 0.3968078422546386} +{"epoch": 0, "iter": 4168, "iter_tflops": 50.1910570568781, "iter_time": 0.41105118560791015, "loss": 0.5505371689796448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.061692627974644, "step_time": 0.381621301651001} +{"epoch": 0, "iter": 4169, "iter_tflops": 18.180607989483374, "iter_time": 1.134785675048828, "loss": 0.82475346326828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91091182828957, "step_time": 1.0909623870849607} +{"epoch": 0, "iter": 4170, "iter_tflops": 16.786021822146814, "iter_time": 1.2290639038085938, "loss": 0.9415023922920227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.63084395109948, "step_time": 1.0000120964050294} +{"epoch": 0, "iter": 4171, "iter_tflops": 38.59723565466184, "iter_time": 0.5345225677490235, "loss": 0.921351432800293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.139062879457725, "step_time": 0.48959545135498045} +{"epoch": 0, "iter": 4172, "iter_tflops": 39.50447998284338, "iter_time": 0.5222469329833984, "loss": 0.8358632326126099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84600496411072, "step_time": 0.4815173206329346} +{"epoch": 0, "iter": 4173, "iter_tflops": 25.227280009608897, "iter_time": 0.8178088760375977, "loss": 0.01974351517856121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.296314178790436, "step_time": 0.7558197555541992} +{"epoch": 0, "iter": 4174, "iter_tflops": 24.071971688763856, "iter_time": 0.8570587310791016, "loss": 0.018862256780266762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.848229402673113, "step_time": 0.6911999111175537} +{"epoch": 0, "iter": 4175, "iter_tflops": 57.399128910264665, "iter_time": 0.3594321708679199, "loss": 0.02865242213010788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.30119396945663, "step_time": 0.32591950035095213} +{"epoch": 0, "iter": 4176, "iter_tflops": 59.47687458747148, "iter_time": 0.3468758850097656, "loss": 0.016652993857860565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.35404238610079, "step_time": 0.31568198013305665} +{"epoch": 0, "iter": 4177, "iter_tflops": 32.041790286864256, "iter_time": 0.6438807983398438, "loss": 0.2981478273868561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.156598911267935, "step_time": 0.6040148658752441} +{"epoch": 0, "iter": 4178, "iter_tflops": 12.274500555922334, "iter_time": 1.6808092041015628, "loss": 0.3894866108894348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.512981276948075, "step_time": 1.2493863563537597} +{"epoch": 0, "iter": 4179, "iter_tflops": 42.18374340581862, "iter_time": 0.48907687759399415, "loss": 0.2628198564052582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31771220724335, "step_time": 0.4552545242309571} +{"epoch": 0, "iter": 4180, "iter_tflops": 49.695475736176164, "iter_time": 0.4151503372192383, "loss": 0.31983545422554016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.98793985514653, "step_time": 0.38214263343811034} +{"epoch": 0, "iter": 4181, "iter_tflops": 39.97812048635727, "iter_time": 0.5160596160888672, "loss": 0.9428555965423584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.196101380661354, "step_time": 0.47761471176147463} +{"epoch": 0, "iter": 4182, "iter_tflops": 43.06556203020098, "iter_time": 0.4790624465942382, "loss": 0.9127367734909058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35943565519452, "step_time": 0.4450246906280517} +{"epoch": 0, "iter": 4183, "iter_tflops": 47.058431370386145, "iter_time": 0.43841439056396486, "loss": 0.8218596577644348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52113351059616, "step_time": 0.4083656101226807} +{"epoch": 0, "iter": 4184, "iter_tflops": 43.127000743539796, "iter_time": 0.47837997436523433, "loss": 0.8267685174942017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.322406692920815, "step_time": 0.4453804321289062} +{"epoch": 0, "iter": 4185, "iter_tflops": 30.508490605432268, "iter_time": 0.6762410430908203, "loss": 0.03369515761733055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.394326224277286, "step_time": 0.6368736724853516} +{"epoch": 0, "iter": 4186, "iter_tflops": 19.26024438193048, "iter_time": 1.071175064086914, "loss": 0.017072394490242004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.981977843550045, "step_time": 0.860274896621704} +{"epoch": 0, "iter": 4187, "iter_tflops": 55.91664650084009, "iter_time": 0.36896156692504883, "loss": 0.008165478706359863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.51387484560735, "step_time": 0.33538926887512205} +{"epoch": 0, "iter": 4188, "iter_tflops": 57.498485486733706, "iter_time": 0.35881107711791993, "loss": 0.003601670265197754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.92276700618428, "step_time": 0.3278796291351318} +{"epoch": 0, "iter": 4189, "iter_tflops": 43.64589411444067, "iter_time": 0.4726926536560059, "loss": 0.3461760878562927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63471896128577, "step_time": 0.4331104278564453} +{"epoch": 0, "iter": 4190, "iter_tflops": 39.50548302993708, "iter_time": 0.5222336730957031, "loss": 0.3867725431919098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29210993463953, "step_time": 0.4765555095672607} +{"epoch": 0, "iter": 4191, "iter_tflops": 43.505676844921716, "iter_time": 0.47421612548828124, "loss": 0.3208833932876587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73810527892563, "step_time": 0.43217244148254397} +{"epoch": 0, "iter": 4192, "iter_tflops": 44.43624130670042, "iter_time": 0.4642852973937988, "loss": 0.4022907018661499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32665184905962, "step_time": 0.42690922546386717} +{"epoch": 0, "iter": 4193, "iter_tflops": 14.670094727888186, "iter_time": 1.4063367614746092, "loss": 0.7765867710113525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.841947141791247, "step_time": 1.3023079376220703} +{"epoch": 0, "iter": 4194, "iter_tflops": 17.0701861542376, "iter_time": 1.2086038970947266, "loss": 0.8103330135345459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.2601399041767, "step_time": 1.01830952835083} +{"epoch": 0, "iter": 4195, "iter_tflops": 47.97656795440079, "iter_time": 0.4300243721008301, "loss": 0.9143788814544678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.875795140168684, "step_time": 0.3977017307281494} +{"epoch": 0, "iter": 4196, "iter_tflops": 49.632234401665585, "iter_time": 0.4156793212890625, "loss": 1.0662580728530884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.68343590239828, "step_time": 0.3843102283477784} +{"epoch": 0, "iter": 4197, "iter_tflops": 33.17978792883151, "iter_time": 0.6217970275878907, "loss": 0.557527482509613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.32137770370122, "step_time": 0.5840965118408203} +{"epoch": 0, "iter": 4198, "iter_tflops": 15.539913342032541, "iter_time": 1.327619598388672, "loss": 0.5459704995155334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.067416392217282, "step_time": 1.0280891723632812} +{"epoch": 0, "iter": 4199, "iter_tflops": 37.871976514500545, "iter_time": 0.5447588272094727, "loss": 0.5748713612556458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.47320670307862, "step_time": 0.49745595169067386} +{"epoch": 0, "iter": 4200, "iter_tflops": 39.79796782604946, "iter_time": 0.5183956527709961, "loss": 0.5706990361213684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.52589099825924, "step_time": 0.47399589157104494} +{"epoch": 0, "iter": 4201, "iter_tflops": 25.51073560230459, "iter_time": 0.8087220153808594, "loss": 0.057976361364126205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.253813008861286, "step_time": 0.7569984245300292} +{"epoch": 0, "iter": 4202, "iter_tflops": 24.04144210281279, "iter_time": 0.8581470870971679, "loss": 0.026443183422088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.783111585104667, "step_time": 0.6927111511230469} +{"epoch": 0, "iter": 4203, "iter_tflops": 44.96825431809753, "iter_time": 0.4587924041748047, "loss": 0.0852595716714859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.703258970573486, "step_time": 0.41508532714843754} +{"epoch": 0, "iter": 4204, "iter_tflops": 44.93706506382364, "iter_time": 0.4591108360290527, "loss": 0.0189790278673172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.619582910346764, "step_time": 0.41578530693054205} +{"epoch": 0, "iter": 4205, "iter_tflops": 20.368561260503103, "iter_time": 1.0128890914916993, "loss": 0.28429120779037476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.6603848446567, "step_time": 0.9524804687499999} +{"epoch": 0, "iter": 4206, "iter_tflops": 18.35272168234736, "iter_time": 1.124143539428711, "loss": 0.31982681155204773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18597517457202, "step_time": 0.929916009902954} +{"epoch": 0, "iter": 4207, "iter_tflops": 36.46118044614264, "iter_time": 0.5658372344970704, "loss": 0.3805846571922302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.515575201538724, "step_time": 0.5221002960205079} +{"epoch": 0, "iter": 4208, "iter_tflops": 36.06712242521794, "iter_time": 0.5720193939208984, "loss": 0.2753541171550751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.42889903902211, "step_time": 0.5232480239868165} +{"epoch": 0, "iter": 4209, "iter_tflops": 15.109830728094286, "iter_time": 1.365408645629883, "loss": 0.3866698145866394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.014201113057844, "step_time": 1.2882998886108399} +{"epoch": 0, "iter": 4210, "iter_tflops": 16.836921634230674, "iter_time": 1.2253483123779296, "loss": 0.37598124146461487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.163349405224288, "step_time": 0.853817621231079} +{"epoch": 0, "iter": 4211, "iter_tflops": 46.4339124561353, "iter_time": 0.4443109016418457, "loss": 0.37025144696235657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.309374048423905, "step_time": 0.41008448028564454} +{"epoch": 0, "iter": 4212, "iter_tflops": 47.58583439752751, "iter_time": 0.4335553588867188, "loss": 0.3386904299259186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44465851838443, "step_time": 0.40103470611572267} +{"epoch": 0, "iter": 4213, "iter_tflops": 34.6464225973052, "iter_time": 0.5954754333496093, "loss": 0.9105141758918762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.25813892465813, "step_time": 0.5537338714599609} +{"epoch": 0, "iter": 4214, "iter_tflops": 16.568887101557248, "iter_time": 1.2451707458496093, "loss": 0.9377588033676147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.33181962936282, "step_time": 1.0672090835571288} +{"epoch": 0, "iter": 4215, "iter_tflops": 46.63093203894099, "iter_time": 0.44243365097045895, "loss": 0.8605491518974304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5133465711057, "step_time": 0.4084285621643066} +{"epoch": 0, "iter": 4216, "iter_tflops": 51.744201830988494, "iter_time": 0.39871314620971676, "loss": 0.9697360396385193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.08992517991414, "step_time": 0.36782173347473146} +{"epoch": 0, "iter": 4217, "iter_tflops": 35.73710259441552, "iter_time": 0.5773017959594726, "loss": 0.8436315655708313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14543852025481, "step_time": 0.5408534889221192} +{"epoch": 0, "iter": 4218, "iter_tflops": 18.21616253382942, "iter_time": 1.132570785522461, "loss": 1.0125770568847656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.06678819615667, "step_time": 1.0281213569641114} +{"epoch": 0, "iter": 4219, "iter_tflops": 45.03953471363904, "iter_time": 0.45806631088256833, "loss": 1.075020670890808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74300757561123, "step_time": 0.4232626285552979} +{"epoch": 0, "iter": 4220, "iter_tflops": 43.53859077219742, "iter_time": 0.4738576316833496, "loss": 0.935901403427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51157895018408, "step_time": 0.443568977355957} +{"epoch": 0, "iter": 4221, "iter_tflops": 37.23167114271744, "iter_time": 0.5541275177001953, "loss": 0.27144289016723633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.087426800716095, "step_time": 0.5146524772644042} +{"epoch": 0, "iter": 4222, "iter_tflops": 9.458431794219644, "iter_time": 2.1812382812499997, "loss": 0.210078164935112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.805952896912537, "step_time": 1.9092340774536134} +{"epoch": 0, "iter": 4223, "iter_tflops": 16.707905225975466, "iter_time": 1.2348103027343749, "loss": 0.23298758268356323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.66620937525065, "step_time": 1.1052642288208008} +{"epoch": 0, "iter": 4224, "iter_tflops": 17.10110011271516, "iter_time": 1.2064190826416015, "loss": 0.19548951089382172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.164903041957665, "step_time": 0.9308000793457031} +{"epoch": 0, "iter": 4225, "iter_tflops": 15.147102714791451, "iter_time": 1.073358917236328, "loss": 0.36415451765060425, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 15.787626615981704, "step_time": 1.029811393737793} +{"epoch": 0, "iter": 4226, "iter_tflops": 8.773946358624709, "iter_time": 1.8530176849365234, "loss": 0.29737043380737305, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 11.61650797955162, "step_time": 1.3995839195251465} +{"epoch": 0, "iter": 4227, "iter_tflops": 23.50728414976789, "iter_time": 0.6916272277832031, "loss": 0.3019356429576874, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 25.287450460614153, "step_time": 0.642938591003418} +{"epoch": 0, "iter": 4228, "iter_tflops": 24.86059096049187, "iter_time": 0.653977928161621, "loss": 0.18917053937911987, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.76281931672584, "step_time": 0.6074949569702148} +{"epoch": 0, "iter": 4229, "iter_tflops": 22.480760736901992, "iter_time": 0.9177222137451172, "loss": 0.14567799866199493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.196489380978146, "step_time": 0.8526482162475586} +{"epoch": 0, "iter": 4230, "iter_tflops": 8.70460127502048, "iter_time": 2.3701365356445314, "loss": 0.1929044872522354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.232467328385647, "step_time": 2.016238395690918} +{"epoch": 0, "iter": 4231, "iter_tflops": 11.124052300190652, "iter_time": 1.8546383056640625, "loss": 0.20581433176994324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.886521743028368, "step_time": 1.2217491455078127} +{"epoch": 0, "iter": 4232, "iter_tflops": 45.365628419455376, "iter_time": 0.45477367401123037, "loss": 0.24941381812095642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.46792731191381, "step_time": 0.37877507972717284} +{"epoch": 0, "iter": 4233, "iter_tflops": 17.313140379098463, "iter_time": 0.8327735366821288, "loss": 0.1912832111120224, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 18.26319455366156, "step_time": 0.7894525299072266} +{"epoch": 0, "iter": 4234, "iter_tflops": 7.987893129202474, "iter_time": 1.8049722137451172, "loss": 0.3078213632106781, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 9.733914222623707, "step_time": 1.481205280303955} +{"epoch": 0, "iter": 4235, "iter_tflops": 21.029437080680164, "iter_time": 0.6856068038940429, "loss": 0.40018609166145325, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 22.608630208546636, "step_time": 0.6377177658081055} +{"epoch": 0, "iter": 4236, "iter_tflops": 21.925825738470962, "iter_time": 0.657577293395996, "loss": 0.31024622917175293, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.613192907702242, "step_time": 0.6105876998901367} +{"epoch": 0, "iter": 4237, "iter_tflops": 24.85621846430873, "iter_time": 0.8300173873901368, "loss": 0.03644968569278717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.73835363425474, "step_time": 0.771591766357422} +{"epoch": 0, "iter": 4238, "iter_tflops": 8.628660923945661, "iter_time": 2.3909959716796876, "loss": 0.054330311715602875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.708836543289156, "step_time": 2.1249810333251955} +{"epoch": 0, "iter": 4239, "iter_tflops": 13.719202330406622, "iter_time": 1.5038114471435546, "loss": 0.07009311765432358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.022913902626026, "step_time": 1.211960163116455} +{"epoch": 0, "iter": 4240, "iter_tflops": 36.91808301545365, "iter_time": 0.5588343658447266, "loss": 0.02336297743022442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37745390612829, "step_time": 0.38651325607299797} +{"epoch": 0, "iter": 4241, "iter_tflops": 14.773043072534888, "iter_time": 1.0340162963867188, "loss": 0.16200703382492065, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.54623210504673, "step_time": 0.9825896835327148} +{"epoch": 0, "iter": 4242, "iter_tflops": 12.65120608264715, "iter_time": 1.2074396057128907, "loss": 0.3422884941101074, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 16.29781971293113, "step_time": 0.9372767372131348} +{"epoch": 0, "iter": 4243, "iter_tflops": 23.020529994610122, "iter_time": 0.6635627975463867, "loss": 0.27700644731521606, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.812684324404184, "step_time": 0.6156354179382324} +{"epoch": 0, "iter": 4244, "iter_tflops": 24.093265248163835, "iter_time": 0.6340181427001954, "loss": 0.23567339777946472, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.893891104836264, "step_time": 0.5899293861389161} +{"epoch": 0, "iter": 4245, "iter_tflops": 20.24115523387777, "iter_time": 1.0192646255493163, "loss": 0.008129318244755268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.016411787878617, "step_time": 0.9370779266357422} +{"epoch": 0, "iter": 4246, "iter_tflops": 18.499266958677033, "iter_time": 1.1152384338378905, "loss": 0.0020630445796996355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.3829073970904, "step_time": 0.8461293468475342} +{"epoch": 0, "iter": 4247, "iter_tflops": 53.53011083678729, "iter_time": 0.3854109992980957, "loss": 0.0068770889192819595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.51585702488532, "step_time": 0.35257269668579105} +{"epoch": 0, "iter": 4248, "iter_tflops": 58.88165729774018, "iter_time": 0.3503823509216309, "loss": 0.01206756941974163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.98665487142212, "step_time": 0.31746661758422856} +{"epoch": 0, "iter": 4249, "iter_tflops": 46.15871366289515, "iter_time": 0.44695988845825196, "loss": 0.40784379839897156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.450769569741624, "step_time": 0.4089351596832276} +{"epoch": 0, "iter": 4250, "iter_tflops": 47.61752650326582, "iter_time": 0.4332668037414551, "loss": 0.658942461013794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8848455526201, "step_time": 0.39763235855102536} +{"epoch": 0, "iter": 4251, "iter_tflops": 49.15430899871142, "iter_time": 0.41972095489501954, "loss": 0.5176464319229126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.207994838056884, "step_time": 0.38774423980712897} +{"epoch": 0, "iter": 4252, "iter_tflops": 42.70023584987467, "iter_time": 0.4831611137390137, "loss": 0.4783191680908203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70376968331133, "step_time": 0.45140901184082033} +{"epoch": 0, "iter": 4253, "iter_tflops": 31.20462023484248, "iter_time": 0.6611550903320312, "loss": 0.3409608006477356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.140738872082366, "step_time": 0.6225296783447265} +{"epoch": 0, "iter": 4254, "iter_tflops": 11.770091956898241, "iter_time": 1.7528404693603512, "loss": 0.3662949204444885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.853044800026591, "step_time": 1.4892822341918945} +{"epoch": 0, "iter": 4255, "iter_tflops": 46.42994448129683, "iter_time": 0.44434887313842775, "loss": 0.2166403830051422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.461993843542615, "step_time": 0.40884420013427736} +{"epoch": 0, "iter": 4256, "iter_tflops": 50.014052468571975, "iter_time": 0.41250593566894533, "loss": 0.3644702136516571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.34416843315318, "step_time": 0.3796376705169678} +{"epoch": 0, "iter": 4257, "iter_tflops": 40.390566532601895, "iter_time": 0.5107899017333983, "loss": 0.10600683093070984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88624830311413, "step_time": 0.47010383224487307} +{"epoch": 0, "iter": 4258, "iter_tflops": 8.313758681347002, "iter_time": 2.4815603027343753, "loss": 0.09813281893730164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.567678096532699, "step_time": 1.9522825469970704} +{"epoch": 0, "iter": 4259, "iter_tflops": 10.826463710896812, "iter_time": 1.9056170196533202, "loss": 0.06854203343391418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.427944218559723, "step_time": 1.6600568161010743} +{"epoch": 0, "iter": 4260, "iter_tflops": 22.280859664442882, "iter_time": 0.9259559020996093, "loss": 0.09045998752117157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.130862480806485, "step_time": 0.7604289588928222} +{"epoch": 0, "iter": 4261, "iter_tflops": 13.19372098267426, "iter_time": 1.080423080444336, "loss": 0.2134496420621872, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 14.291899612611665, "step_time": 0.9974041976928711} +{"epoch": 0, "iter": 4262, "iter_tflops": 10.095722641555724, "iter_time": 1.4119643707275389, "loss": 0.46326684951782227, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 12.601926932532207, "step_time": 1.1311603965759276} +{"epoch": 0, "iter": 4263, "iter_tflops": 21.756509418132268, "iter_time": 0.6551970443725585, "loss": 0.2190156877040863, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.250979640780965, "step_time": 0.6130838737487793} +{"epoch": 0, "iter": 4264, "iter_tflops": 22.08079149152741, "iter_time": 0.6455747146606445, "loss": 0.3668728172779083, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.729300356426076, "step_time": 0.6007257041931152} +{"epoch": 0, "iter": 4265, "iter_tflops": 2.5671669174131995, "iter_time": 0.5952036590576171, "loss": 0.3404015898704529, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.8376530468094834, "step_time": 0.5384686279296875} +{"epoch": 0, "iter": 4266, "iter_tflops": 2.706071472837556, "iter_time": 0.5646514358520508, "loss": 0.43929263949394226, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.0143189170171145, "step_time": 0.5069095821380616} +{"epoch": 0, "iter": 4267, "iter_tflops": 2.7814570460849852, "iter_time": 0.5493477401733399, "loss": 0.44548991322517395, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.041449098793901, "step_time": 0.5023878726959229} +{"epoch": 0, "iter": 4268, "iter_tflops": 3.037889695875129, "iter_time": 0.502976505279541, "loss": 0.3608004152774811, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.3332642346083694, "step_time": 0.4584056453704834} +{"epoch": 0, "iter": 4269, "iter_tflops": 25.04923579823724, "iter_time": 0.8236216735839843, "loss": 0.2699921131134033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.49743929679096, "step_time": 0.750291446685791} +{"epoch": 0, "iter": 4270, "iter_tflops": 43.142132293481694, "iter_time": 0.47821218872070315, "loss": 0.31327494978904724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8638164686399, "step_time": 0.43103736877441406} +{"epoch": 0, "iter": 4271, "iter_tflops": 48.721833888900726, "iter_time": 0.42344657135009767, "loss": 0.1700870245695114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83240953392989, "step_time": 0.39050071144104004} +{"epoch": 0, "iter": 4272, "iter_tflops": 49.29068933762406, "iter_time": 0.41855964660644535, "loss": 0.30335819721221924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.14829316846684, "step_time": 0.3881797943115235} +{"epoch": 0, "iter": 4273, "iter_tflops": 29.155933489273156, "iter_time": 0.7076121749877929, "loss": 0.7773148417472839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.902991722730807, "step_time": 0.6676082916259766} +{"epoch": 0, "iter": 4274, "iter_tflops": 13.84228975835706, "iter_time": 1.4904393615722658, "loss": 0.6991274952888489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.091894802647662, "step_time": 1.2820798149108887} +{"epoch": 0, "iter": 4275, "iter_tflops": 45.318849903938485, "iter_time": 0.4552430953979492, "loss": 0.890705943107605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.651450855461974, "step_time": 0.4155184421539307} +{"epoch": 0, "iter": 4276, "iter_tflops": 44.32491261422075, "iter_time": 0.46545141983032223, "loss": 0.6771684288978577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.336274455955945, "step_time": 0.4358410911560059} +{"epoch": 0, "iter": 4277, "iter_tflops": 23.042233514199104, "iter_time": 0.8953599700927733, "loss": 0.020334871485829353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.128434549462327, "step_time": 0.8550531311035157} +{"epoch": 0, "iter": 4278, "iter_tflops": 12.971717130744327, "iter_time": 1.5904674224853514, "loss": 0.07273750752210617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.315844972529199, "step_time": 1.3470424613952638} +{"epoch": 0, "iter": 4279, "iter_tflops": 41.55415607242464, "iter_time": 0.49648688507080074, "loss": 0.030775118619203568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74112394217439, "step_time": 0.4510403709411621} +{"epoch": 0, "iter": 4280, "iter_tflops": 46.91158492819914, "iter_time": 0.439786750793457, "loss": 0.03480016812682152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.786588989584835, "step_time": 0.3983868007659912} +{"epoch": 0, "iter": 4281, "iter_tflops": 29.363397725182498, "iter_time": 0.7026126098632812, "loss": 0.2853144407272339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.45177020707428, "step_time": 0.6357463207244873} +{"epoch": 0, "iter": 4282, "iter_tflops": 39.06265823762454, "iter_time": 0.5281538543701172, "loss": 0.2892901301383972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80424376736436, "step_time": 0.48198710441589354} +{"epoch": 0, "iter": 4283, "iter_tflops": 39.56148677916453, "iter_time": 0.5214943923950196, "loss": 0.3332703709602356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36605213761095, "step_time": 0.4757429485321045} +{"epoch": 0, "iter": 4284, "iter_tflops": 35.282910565987855, "iter_time": 0.5847333221435548, "loss": 0.08018086105585098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.71727292223401, "step_time": 0.5328653583526611} +{"epoch": 0, "iter": 4285, "iter_tflops": 18.67493844615036, "iter_time": 1.1047476043701172, "loss": 0.07891003042459488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.05861778554829, "step_time": 1.0285401382446289} +{"epoch": 0, "iter": 4286, "iter_tflops": 27.289802099304243, "iter_time": 0.7560001144409179, "loss": 0.10103604942560196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04720444755039, "step_time": 0.6059555797576903} +{"epoch": 0, "iter": 4287, "iter_tflops": 49.20097929241746, "iter_time": 0.4193228225708008, "loss": 0.08574891090393066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37773812132818, "step_time": 0.3865111980438232} +{"epoch": 0, "iter": 4288, "iter_tflops": 53.64870509786734, "iter_time": 0.38455902099609374, "loss": 0.10534120351076126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.46044962855191, "step_time": 0.35290685653686527} +{"epoch": 0, "iter": 4289, "iter_tflops": 24.480183875238954, "iter_time": 0.8427670974731446, "loss": 0.48793014883995056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.669261450420628, "step_time": 0.8037275848388672} +{"epoch": 0, "iter": 4290, "iter_tflops": 13.383600562489995, "iter_time": 1.5415204162597655, "loss": 0.4613117575645447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.227333022181377, "step_time": 1.1975790729522704} +{"epoch": 0, "iter": 4291, "iter_tflops": 41.58477934704864, "iter_time": 0.49612126922607425, "loss": 0.5280947089195251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42695619866785, "step_time": 0.45415971565246577} +{"epoch": 0, "iter": 4292, "iter_tflops": 41.34248838205653, "iter_time": 0.49902882766723633, "loss": 0.3346765637397766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09204538224013, "step_time": 0.45753288269042974} +{"epoch": 0, "iter": 4293, "iter_tflops": 17.84477095720673, "iter_time": 1.156142242431641, "loss": 0.41390693187713623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.99952202257263, "step_time": 1.0858743438720704} +{"epoch": 0, "iter": 4294, "iter_tflops": 31.649096541092458, "iter_time": 0.6518699035644532, "loss": 0.34589093923568726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.621610163804995, "step_time": 0.42431942176818843} +{"epoch": 0, "iter": 4295, "iter_tflops": 46.619504000027234, "iter_time": 0.4425421066284179, "loss": 0.559223473072052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49039184806139, "step_time": 0.4086142482757569} +{"epoch": 0, "iter": 4296, "iter_tflops": 50.88317659032529, "iter_time": 0.4054600143432617, "loss": 0.4709795415401459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.27457918612929, "step_time": 0.37324740982055665} +{"epoch": 0, "iter": 4297, "iter_tflops": 2.041473111335283, "iter_time": 0.9024402465820313, "loss": 0.005194429773837328, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 2.13590647929498, "step_time": 0.862541275024414} +{"epoch": 0, "iter": 4298, "iter_tflops": 1.504269500136836, "iter_time": 1.224719039916992, "loss": 0.30171555280685425, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 1.779714107449922, "step_time": 1.0351704750061035} +{"epoch": 0, "iter": 4299, "iter_tflops": 2.2808705530863485, "iter_time": 0.8077211990356445, "loss": 0.38808825612068176, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 2.8001243172742774, "step_time": 0.6579377517700197} +{"epoch": 0, "iter": 4300, "iter_tflops": 3.7173088910228453, "iter_time": 0.49560247802734375, "loss": 0.781044602394104, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.051812837363532, "step_time": 0.4546872158050537} +{"epoch": 0, "iter": 4301, "iter_tflops": 16.713477075362885, "iter_time": 0.970311393737793, "loss": 0.2715526521205902, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 17.997953234868728, "step_time": 0.9010623054504395} +{"epoch": 0, "iter": 4302, "iter_tflops": 17.700492835898462, "iter_time": 0.9162048416137695, "loss": 0.25032946467399597, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 19.582955562628225, "step_time": 0.8281322593688966} +{"epoch": 0, "iter": 4303, "iter_tflops": 29.80419550998449, "iter_time": 0.5441273269653321, "loss": 0.3097549080848694, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 31.808113106604058, "step_time": 0.5098471946716309} +{"epoch": 0, "iter": 4304, "iter_tflops": 29.71654652194371, "iter_time": 0.5457322311401368, "loss": 0.4983183741569519, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 31.57137184237705, "step_time": 0.513670337677002} +{"epoch": 0, "iter": 4305, "iter_tflops": 25.938634887758973, "iter_time": 0.7953808517456055, "loss": 0.8892542123794556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.328721840666503, "step_time": 0.754923469543457} +{"epoch": 0, "iter": 4306, "iter_tflops": 29.248593944048896, "iter_time": 0.7053704376220704, "loss": 0.9477673768997192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18208440919042, "step_time": 0.6410738735198975} +{"epoch": 0, "iter": 4307, "iter_tflops": 36.49834442429529, "iter_time": 0.5652610778808593, "loss": 0.9099726676940918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48276277474904, "step_time": 0.5225341911315917} +{"epoch": 0, "iter": 4308, "iter_tflops": 37.51488005545211, "iter_time": 0.5499442749023438, "loss": 0.7833384275436401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54740384070985, "step_time": 0.508814167022705} +{"epoch": 0, "iter": 4309, "iter_tflops": 11.10773390113154, "iter_time": 1.0966756591796876, "loss": 0.009135294705629349, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 12.020347862526211, "step_time": 1.0134133834838868} +{"epoch": 0, "iter": 4310, "iter_tflops": 11.219225047368523, "iter_time": 1.0857774353027345, "loss": 0.004146162420511246, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 13.791587544787271, "step_time": 0.8832617244720459} +{"epoch": 0, "iter": 4311, "iter_tflops": 27.226995859863287, "iter_time": 0.4474082069396973, "loss": 0.008716966956853867, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 30.609339000237767, "step_time": 0.39796943664550777} +{"epoch": 0, "iter": 4312, "iter_tflops": 25.597198025987346, "iter_time": 0.4758951110839844, "loss": 0.005037855822592974, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 28.37407269788359, "step_time": 0.4293208637237549} +{"epoch": 0, "iter": 4313, "iter_tflops": 21.17782994083412, "iter_time": 0.9741835479736327, "loss": 0.035028669983148575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.784813240556463, "step_time": 0.9054756469726563} +{"epoch": 0, "iter": 4314, "iter_tflops": 29.356630192994352, "iter_time": 0.7027745819091797, "loss": 0.027002880349755287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.18770419682641, "step_time": 0.6216487102508546} +{"epoch": 0, "iter": 4315, "iter_tflops": 50.25764057275323, "iter_time": 0.41050660705566405, "loss": 0.07369379699230194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.710246200103626, "step_time": 0.37709743499755866} +{"epoch": 0, "iter": 4316, "iter_tflops": 53.579891421522575, "iter_time": 0.38505291748046877, "loss": 0.04064711183309555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.492883847847, "step_time": 0.3527111701965332} +{"epoch": 0, "iter": 4317, "iter_tflops": 44.438320286380865, "iter_time": 0.46426357650756833, "loss": 0.5988353490829468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57134163652637, "step_time": 0.4247585678100586} +{"epoch": 0, "iter": 4318, "iter_tflops": 45.3247196707251, "iter_time": 0.455184139251709, "loss": 0.546989381313324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.83317188910427, "step_time": 0.41400321769714354} +{"epoch": 0, "iter": 4319, "iter_tflops": 48.83147642065833, "iter_time": 0.42249579620361327, "loss": 0.5470848679542542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.711917771964764, "step_time": 0.3913933391571045} +{"epoch": 0, "iter": 4320, "iter_tflops": 45.213823899975694, "iter_time": 0.45630056762695315, "loss": 0.5734931826591492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.06405868325964, "step_time": 0.4204930057525635} +{"epoch": 0, "iter": 4321, "iter_tflops": 28.49428359144642, "iter_time": 0.7240432434082031, "loss": 0.8190970420837402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.011862408808888, "step_time": 0.6874312973022462} +{"epoch": 0, "iter": 4322, "iter_tflops": 13.986765905782955, "iter_time": 1.4750438842773437, "loss": 1.0428107976913452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.351591561609318, "step_time": 1.2617177619934083} +{"epoch": 0, "iter": 4323, "iter_tflops": 36.475439550283085, "iter_time": 0.5656160354614258, "loss": 0.9325780272483826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72149519255953, "step_time": 0.5193936786651612} +{"epoch": 0, "iter": 4324, "iter_tflops": 35.533321605593436, "iter_time": 0.5806125793457032, "loss": 0.9858283400535583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.59364915419443, "step_time": 0.5345722408294677} +{"epoch": 0, "iter": 4325, "iter_tflops": 15.953946588693551, "iter_time": 1.2931655120849608, "loss": 0.017828112468123436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.20196403197446, "step_time": 1.1993452301025391} +{"epoch": 0, "iter": 4326, "iter_tflops": 16.19917296190807, "iter_time": 1.2735893096923827, "loss": 0.05478254705667496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.734856738640076, "step_time": 0.9949957103729248} +{"epoch": 0, "iter": 4327, "iter_tflops": 48.06915865317971, "iter_time": 0.4291960601806641, "loss": 0.03470303490757942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.106842965767434, "step_time": 0.38848277091979977} +{"epoch": 0, "iter": 4328, "iter_tflops": 50.522815785951195, "iter_time": 0.40835201263427734, "loss": 0.027541695162653923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.16578576717702, "step_time": 0.37398349761962896} +{"epoch": 0, "iter": 4329, "iter_tflops": 36.89316204307138, "iter_time": 0.5592118530273438, "loss": 0.7771451473236084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.63525842356238, "step_time": 0.5205237541198731} +{"epoch": 0, "iter": 4330, "iter_tflops": 12.452196954205569, "iter_time": 1.6568235778808593, "loss": 0.7573065161705017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.34413427766421, "step_time": 1.262293441772461} +{"epoch": 0, "iter": 4331, "iter_tflops": 37.15417542893992, "iter_time": 0.5552833099365234, "loss": 0.6622998714447021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52910517315604, "step_time": 0.509043893814087} +{"epoch": 0, "iter": 4332, "iter_tflops": 44.269613374794076, "iter_time": 0.46603283691406255, "loss": 0.9378232955932617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.03124228642305, "step_time": 0.4295348720550537} +{"epoch": 0, "iter": 4333, "iter_tflops": 18.06209145223074, "iter_time": 1.1422317047119142, "loss": 0.9058917760848999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38606757312431, "step_time": 1.0642227172851562} +{"epoch": 0, "iter": 4334, "iter_tflops": 16.095103340043853, "iter_time": 1.2818242340087889, "loss": 0.8173123002052307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.94192910013746, "step_time": 0.9851572608947754} +{"epoch": 0, "iter": 4335, "iter_tflops": 38.65969124015793, "iter_time": 0.5336590347290039, "loss": 0.8276017904281616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.040302757636866, "step_time": 0.49074559783935545} +{"epoch": 0, "iter": 4336, "iter_tflops": 33.478878117337565, "iter_time": 0.6162420806884766, "loss": 0.7488876581192017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48390927945645, "step_time": 0.5654847278594971} +{"epoch": 0, "iter": 4337, "iter_tflops": 34.7439757405894, "iter_time": 0.5938034744262696, "loss": 0.6031097173690796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44589648259487, "step_time": 0.536626672744751} +{"epoch": 0, "iter": 4338, "iter_tflops": 38.985189273543185, "iter_time": 0.529203369140625, "loss": 0.5058757662773132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.972121200897575, "step_time": 0.48010414505004884} +{"epoch": 0, "iter": 4339, "iter_tflops": 45.8266216026523, "iter_time": 0.4501988754272461, "loss": 0.6463276743888855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.850254329664665, "step_time": 0.41386134910583494} +{"epoch": 0, "iter": 4340, "iter_tflops": 46.69950640726767, "iter_time": 0.44178397369384764, "loss": 0.6196150183677673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.689412428605955, "step_time": 0.40700991630554195} +{"epoch": 0, "iter": 4341, "iter_tflops": 27.114301477031027, "iter_time": 0.7608934173583984, "loss": 0.18140488862991333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.604901686539712, "step_time": 0.7212432937622069} +{"epoch": 0, "iter": 4342, "iter_tflops": 14.778593635705413, "iter_time": 1.396011962890625, "loss": 0.1834712028503418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61292615652756, "step_time": 1.108428268432617} +{"epoch": 0, "iter": 4343, "iter_tflops": 40.288022921133305, "iter_time": 0.5120899963378907, "loss": 0.1053154245018959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25683129015258, "step_time": 0.4661674346923828} +{"epoch": 0, "iter": 4344, "iter_tflops": 42.94234253837404, "iter_time": 0.4804370765686035, "loss": 0.13371336460113525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.96380698967389, "step_time": 0.43929772377014165} +{"epoch": 0, "iter": 4345, "iter_tflops": 20.253107977297443, "iter_time": 0.7280068130493165, "loss": 0.015654349699616432, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 22.476299229090177, "step_time": 0.6559976997375488} +{"epoch": 0, "iter": 4346, "iter_tflops": 37.304855461967165, "iter_time": 0.39524079132080076, "loss": 0.0063775451853871346, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 41.86156375892589, "step_time": 0.35221810340881354} +{"epoch": 0, "iter": 4347, "iter_tflops": 38.25774998468889, "iter_time": 0.3853964385986328, "loss": 0.015300731174647808, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 42.65298659512314, "step_time": 0.34568272399902344} +{"epoch": 0, "iter": 4348, "iter_tflops": 40.18451807468322, "iter_time": 0.36691744232177737, "loss": 0.01380976289510727, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 44.06388006991964, "step_time": 0.33461421394348145} +{"epoch": 0, "iter": 4349, "iter_tflops": 34.788947795423496, "iter_time": 0.5930358581542969, "loss": 0.3608953356742859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.15003330408895, "step_time": 0.5553452224731446} +{"epoch": 0, "iter": 4350, "iter_tflops": 23.35170665623267, "iter_time": 0.8834940338134765, "loss": 0.3469788134098053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.987939045923213, "step_time": 0.7644560585021972} +{"epoch": 0, "iter": 4351, "iter_tflops": 42.43641650925475, "iter_time": 0.48616483688354495, "loss": 0.31769150495529175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51279258186503, "step_time": 0.4435574035644531} +{"epoch": 0, "iter": 4352, "iter_tflops": 40.01264361682411, "iter_time": 0.515614356994629, "loss": 0.2620634138584137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.833228495611166, "step_time": 0.47067246055603024} +{"epoch": 0, "iter": 4353, "iter_tflops": 22.027933597575878, "iter_time": 0.9365877838134764, "loss": 0.24133670330047607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.355402132210955, "step_time": 0.8833542404174803} +{"epoch": 0, "iter": 4354, "iter_tflops": 16.69413065700634, "iter_time": 1.2358291625976563, "loss": 0.1815331131219864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.010532769467503, "step_time": 0.9373282203674316} +{"epoch": 0, "iter": 4355, "iter_tflops": 39.892336673947305, "iter_time": 0.5171693420410157, "loss": 0.1462818682193756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.63756831432948, "step_time": 0.4727828407287598} +{"epoch": 0, "iter": 4356, "iter_tflops": 41.53352478481065, "iter_time": 0.49673350906372066, "loss": 0.24535439908504486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50156515923792, "step_time": 0.4534150295257568} +{"epoch": 0, "iter": 4357, "iter_tflops": 23.408916136874907, "iter_time": 0.881334846496582, "loss": 0.44589778780937195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.870359715780804, "step_time": 0.8295454406738281} +{"epoch": 0, "iter": 4358, "iter_tflops": 14.787068368228777, "iter_time": 1.395211883544922, "loss": 0.5159348249435425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.549613005764346, "step_time": 1.1122115325927735} +{"epoch": 0, "iter": 4359, "iter_tflops": 48.644795564372984, "iter_time": 0.4241171798706055, "loss": 0.5770117044448853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63649036277544, "step_time": 0.3919542007446289} +{"epoch": 0, "iter": 4360, "iter_tflops": 49.81648452990663, "iter_time": 0.4141418991088867, "loss": 0.5934709310531616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.96209093349079, "step_time": 0.3823256874084473} +{"epoch": 0, "iter": 4361, "iter_tflops": 43.54237648865101, "iter_time": 0.4738164329528809, "loss": 0.7558689117431641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32438631558458, "step_time": 0.43595057678222654} +{"epoch": 0, "iter": 4362, "iter_tflops": 38.79505788806296, "iter_time": 0.5317969512939453, "loss": 0.778049647808075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6104942734509, "step_time": 0.49581467056274414} +{"epoch": 0, "iter": 4363, "iter_tflops": 36.08050588947798, "iter_time": 0.5718072128295899, "loss": 0.8683899641036987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.08650582181086, "step_time": 0.5278316154479981} +{"epoch": 0, "iter": 4364, "iter_tflops": 45.72116652945084, "iter_time": 0.45123725128173825, "loss": 0.8639752864837646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.329535094985076, "step_time": 0.41823004150390625} +{"epoch": 0, "iter": 4365, "iter_tflops": 22.82409319929452, "iter_time": 0.9039173355102539, "loss": 0.01954714208841324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.062992439276574, "step_time": 0.8573785476684569} +{"epoch": 0, "iter": 4366, "iter_tflops": 15.70879773359514, "iter_time": 1.3133464355468751, "loss": 0.02139081433415413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.95886794409921, "step_time": 1.0336805458068847} +{"epoch": 0, "iter": 4367, "iter_tflops": 54.17714022855853, "iter_time": 0.3808080940246582, "loss": 0.02093987725675106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.3666276061263, "step_time": 0.34752005195617675} +{"epoch": 0, "iter": 4368, "iter_tflops": 60.45684513337644, "iter_time": 0.34125322723388674, "loss": 0.10055975615978241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.95598962070223, "step_time": 0.31280090904235835} +{"epoch": 0, "iter": 4369, "iter_tflops": 40.22676072008482, "iter_time": 0.5128698692321778, "loss": 0.8178157806396484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.222282511697884, "step_time": 0.47732540512084964} +{"epoch": 0, "iter": 4370, "iter_tflops": 9.957432223790155, "iter_time": 2.0719290924072267, "loss": 0.7356218099594116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.83997526152422, "step_time": 1.6067860794067381} +{"epoch": 0, "iter": 4371, "iter_tflops": 11.397904543032539, "iter_time": 1.8100777587890624, "loss": 0.6078296303749084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.930005749301293, "step_time": 1.48105419921875} +{"epoch": 0, "iter": 4372, "iter_tflops": 15.902471072883202, "iter_time": 1.2973514251708984, "loss": 0.7095030546188354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.660835257907518, "step_time": 1.1055825328826905} +{"epoch": 0, "iter": 4373, "iter_tflops": 21.28648300861763, "iter_time": 0.7041771697998047, "loss": 0.2858527600765228, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.14423972800146, "step_time": 0.6476538238525391} +{"epoch": 0, "iter": 4374, "iter_tflops": 22.694385823684584, "iter_time": 0.6604917831420898, "loss": 0.3084513247013092, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.455339782813265, "step_time": 0.6129317970275879} +{"epoch": 0, "iter": 4375, "iter_tflops": 21.705485020127114, "iter_time": 0.690583755493164, "loss": 0.25097811222076416, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 23.307244932595577, "step_time": 0.6431242904663086} +{"epoch": 0, "iter": 4376, "iter_tflops": 22.976209919377585, "iter_time": 0.6523902511596679, "loss": 0.40372154116630554, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.73985153688096, "step_time": 0.6058829956054688} +{"epoch": 0, "iter": 4377, "iter_tflops": 19.6180494773112, "iter_time": 1.0516383666992186, "loss": 0.99000483751297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.735227311286405, "step_time": 0.9949779281616211} +{"epoch": 0, "iter": 4378, "iter_tflops": 31.233704055816478, "iter_time": 0.6605394439697265, "loss": 0.8910876512527466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.19529139314803, "step_time": 0.5861890239715577} +{"epoch": 0, "iter": 4379, "iter_tflops": 41.89859611004274, "iter_time": 0.49240536499023435, "loss": 0.8766540288925171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75782411710384, "step_time": 0.45087575531005863} +{"epoch": 0, "iter": 4380, "iter_tflops": 40.376325714825946, "iter_time": 0.5109700584411621, "loss": 0.932258129119873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.992848450941615, "step_time": 0.46896471214294433} +{"epoch": 0, "iter": 4381, "iter_tflops": 22.012027292380832, "iter_time": 0.9372645797729492, "loss": 0.8614957928657532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.49400974017685, "step_time": 0.8781427154541015} +{"epoch": 0, "iter": 4382, "iter_tflops": 8.902314865569355, "iter_time": 2.3174976196289063, "loss": 0.755554735660553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.20968523874389, "step_time": 1.8404703674316407} +{"epoch": 0, "iter": 4383, "iter_tflops": 11.704571549312366, "iter_time": 1.7626526031494139, "loss": 0.8497287034988403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.42737184796167, "step_time": 1.337304481506348} +{"epoch": 0, "iter": 4384, "iter_tflops": 40.191899238662295, "iter_time": 0.5133147201538086, "loss": 0.8072865605354309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70249676971152, "step_time": 0.4720804309844971} +{"epoch": 0, "iter": 4385, "iter_tflops": 10.417312202785496, "iter_time": 1.5056378631591796, "loss": 0.28748270869255066, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 10.986275467100318, "step_time": 1.4276630630493163} +{"epoch": 0, "iter": 4386, "iter_tflops": 12.876002240956458, "iter_time": 1.21813427734375, "loss": 0.23426270484924316, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 18.543981611738417, "step_time": 0.8458107872009277} +{"epoch": 0, "iter": 4387, "iter_tflops": 29.749538023045417, "iter_time": 0.527224983215332, "loss": 0.2075541913509369, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 31.60933921929807, "step_time": 0.49620460510253905} +{"epoch": 0, "iter": 4388, "iter_tflops": 29.26470797205914, "iter_time": 0.5359595489501953, "loss": 0.16229955852031708, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 31.08594644367578, "step_time": 0.504559181213379} +{"epoch": 0, "iter": 4389, "iter_tflops": 28.448329168099164, "iter_time": 0.7252128372192383, "loss": 0.07944556325674057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.16633670708238, "step_time": 0.6839111328124999} +{"epoch": 0, "iter": 4390, "iter_tflops": 17.8588305345151, "iter_time": 1.1552320556640625, "loss": 0.1051211804151535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.1873455486851, "step_time": 0.9737460250854494} +{"epoch": 0, "iter": 4391, "iter_tflops": 55.343689955205164, "iter_time": 0.37278131484985344, "loss": 0.08177173882722855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.14236119347036, "step_time": 0.34303763771057133} +{"epoch": 0, "iter": 4392, "iter_tflops": 52.216936794993266, "iter_time": 0.39510348129272466, "loss": 0.07735893130302429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.47769267664782, "step_time": 0.3652963237762451} +{"epoch": 0, "iter": 4393, "iter_tflops": 25.745512475497456, "iter_time": 0.80134716796875, "loss": 0.03566378355026245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.062089395162623, "step_time": 0.762361442565918} +{"epoch": 0, "iter": 4394, "iter_tflops": 14.207097551133256, "iter_time": 1.4521680755615232, "loss": 0.06621811538934708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.912329720179528, "step_time": 1.1517816963195802} +{"epoch": 0, "iter": 4395, "iter_tflops": 43.81631990941694, "iter_time": 0.47085409164428715, "loss": 0.034277863800525665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.22675231647097, "step_time": 0.42779354858398444} +{"epoch": 0, "iter": 4396, "iter_tflops": 39.99202348756973, "iter_time": 0.5158802108764649, "loss": 0.03629462793469429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.971834937265484, "step_time": 0.4691888236999512} +{"epoch": 0, "iter": 4397, "iter_tflops": 25.298192556404345, "iter_time": 0.815516502380371, "loss": 0.03135428577661514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.882757971087635, "step_time": 0.7399229850769042} +{"epoch": 0, "iter": 4398, "iter_tflops": 45.04694003942951, "iter_time": 0.4579910087585449, "loss": 0.053265996277332306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08101868693549, "step_time": 0.4038896255493164} +{"epoch": 0, "iter": 4399, "iter_tflops": 56.91410766708211, "iter_time": 0.36249524688720697, "loss": 0.05062137171626091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.15866355180381, "step_time": 0.33191018486022944} +{"epoch": 0, "iter": 4400, "iter_tflops": 52.633724681135675, "iter_time": 0.391974796295166, "loss": 0.05636069178581238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.06795809451974, "step_time": 0.36151799011230473} +{"epoch": 0, "iter": 4401, "iter_tflops": 22.509093286222797, "iter_time": 0.9165670623779297, "loss": 0.20478731393814087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.51190453824073, "step_time": 0.877474365234375} +{"epoch": 0, "iter": 4402, "iter_tflops": 12.8041901666427, "iter_time": 1.6112767181396486, "loss": 0.21993079781532288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.27273511103428, "step_time": 1.350844715118408} +{"epoch": 0, "iter": 4403, "iter_tflops": 39.42223067808986, "iter_time": 0.5233365325927735, "loss": 0.24979430437088013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.276754581457986, "step_time": 0.4767245998382569} +{"epoch": 0, "iter": 4404, "iter_tflops": 40.157894553689765, "iter_time": 0.513749382019043, "loss": 0.2358006238937378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.133462867046305, "step_time": 0.4674705352783203} +{"epoch": 0, "iter": 4405, "iter_tflops": 10.697672844945219, "iter_time": 0.8445308456420898, "loss": 0.0033947837073355913, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 11.596184106773043, "step_time": 0.7790937614440918} +{"epoch": 0, "iter": 4406, "iter_tflops": 5.758284745089164, "iter_time": 1.5689593505859376, "loss": 0.018022984266281128, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 7.981963112495449, "step_time": 1.1318662548065186} +{"epoch": 0, "iter": 4407, "iter_tflops": 19.29906549923363, "iter_time": 0.46813223648071295, "loss": 0.0024453462101519108, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 21.432174124941433, "step_time": 0.4215398139953613} +{"epoch": 0, "iter": 4408, "iter_tflops": 19.24757678874818, "iter_time": 0.46938452529907226, "loss": 0.0039660269394516945, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 21.340886826267194, "step_time": 0.4233429832458496} +{"epoch": 0, "iter": 4409, "iter_tflops": 28.581447828364396, "iter_time": 0.7218351440429687, "loss": 0.6925376653671265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.719227177791073, "step_time": 0.6716019706726075} +{"epoch": 0, "iter": 4410, "iter_tflops": 9.043721528535746, "iter_time": 2.281261474609375, "loss": 1.0297349691390991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.572445719156304, "step_time": 1.7827772979736327} +{"epoch": 0, "iter": 4411, "iter_tflops": 15.73904152092424, "iter_time": 1.310822738647461, "loss": 0.6792724132537842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.90608968851853, "step_time": 1.0364212074279786} +{"epoch": 0, "iter": 4412, "iter_tflops": 23.706954773673633, "iter_time": 0.8702548980712891, "loss": 0.652832567691803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.817892838179006, "step_time": 0.7159126319885254} +{"epoch": 0, "iter": 4413, "iter_tflops": 19.327107793499273, "iter_time": 0.8009514236450195, "loss": 0.19400405883789062, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 20.453756756832405, "step_time": 0.7568328247070312} +{"epoch": 0, "iter": 4414, "iter_tflops": 7.009498631651423, "iter_time": 2.2084424743652344, "loss": 0.2691819369792938, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 8.141975187626354, "step_time": 1.9012677078247067} +{"epoch": 0, "iter": 4415, "iter_tflops": 8.715671838829202, "iter_time": 1.7761194763183592, "loss": 0.25100427865982056, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.356446857173912, "step_time": 1.494728328704834} +{"epoch": 0, "iter": 4416, "iter_tflops": 22.054639836814463, "iter_time": 0.7018964996337891, "loss": 0.24227400124073029, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 23.786392056208605, "step_time": 0.6507953987121583} +{"epoch": 0, "iter": 4417, "iter_tflops": 12.823283930278901, "iter_time": 1.1338997192382811, "loss": 0.17960023880004883, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.594941564997791, "step_time": 1.0695388412475586} +{"epoch": 0, "iter": 4418, "iter_tflops": 13.12252562693487, "iter_time": 1.1080426483154298, "loss": 0.22284391522407532, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.195827518375005, "step_time": 0.845572452545166} +{"epoch": 0, "iter": 4419, "iter_tflops": 22.67375218004373, "iter_time": 0.6412841567993165, "loss": 0.20941588282585144, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.440756378146204, "step_time": 0.5949209518432617} +{"epoch": 0, "iter": 4420, "iter_tflops": 22.440048563656035, "iter_time": 0.6479628601074219, "loss": 0.23569200932979584, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.157787143627182, "step_time": 0.6018894844055177} +{"epoch": 0, "iter": 4421, "iter_tflops": 23.093812163184758, "iter_time": 0.893360237121582, "loss": 0.6362082958221436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.165950711279304, "step_time": 0.8198018722534179} +{"epoch": 0, "iter": 4422, "iter_tflops": 19.148035828189176, "iter_time": 1.0774522094726562, "loss": 0.4247392416000366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.505531951445004, "step_time": 0.8777122573852539} +{"epoch": 0, "iter": 4423, "iter_tflops": 38.739472744334535, "iter_time": 0.5325599975585937, "loss": 0.3556533455848694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28469631800438, "step_time": 0.4879092273712158} +{"epoch": 0, "iter": 4424, "iter_tflops": 40.0313785038664, "iter_time": 0.5153730468750001, "loss": 0.4655401408672333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88786799059909, "step_time": 0.470086483001709} +{"epoch": 0, "iter": 4425, "iter_tflops": 23.11139837327863, "iter_time": 0.8926804504394531, "loss": 0.1887919008731842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.564690345834695, "step_time": 0.8398678436279298} +{"epoch": 0, "iter": 4426, "iter_tflops": 7.417040927939358, "iter_time": 2.781580108642578, "loss": 0.16948246955871582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.79825793445866, "step_time": 2.3449066467285156} +{"epoch": 0, "iter": 4427, "iter_tflops": 15.434739936079016, "iter_time": 1.3366660919189453, "loss": 0.10717544704675674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.016413632661276, "step_time": 1.030708791732788} +{"epoch": 0, "iter": 4428, "iter_tflops": 46.200256802342494, "iter_time": 0.4465579833984375, "loss": 0.22670608758926392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98746090919214, "step_time": 0.4127253742218017} +{"epoch": 0, "iter": 4429, "iter_tflops": 19.52515960983842, "iter_time": 0.7196356658935547, "loss": 0.1688084751367569, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 20.618308317388475, "step_time": 0.6814817695617675} +{"epoch": 0, "iter": 4430, "iter_tflops": 10.20071811408623, "iter_time": 1.377452163696289, "loss": 0.14855991303920746, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 13.150108026141684, "step_time": 1.06850842666626} +{"epoch": 0, "iter": 4431, "iter_tflops": 25.45155555535153, "iter_time": 0.5520684661865234, "loss": 0.24111489951610565, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 27.15051160275186, "step_time": 0.5175225219726562} +{"epoch": 0, "iter": 4432, "iter_tflops": 26.177433460770732, "iter_time": 0.5367600784301758, "loss": 0.2851898670196533, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 27.847673618523515, "step_time": 0.5045664291381835} +{"epoch": 0, "iter": 4433, "iter_tflops": 44.67027341264059, "iter_time": 0.4618528594970703, "loss": 0.31399646401405334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09641282535827, "step_time": 0.4202159042358398} +{"epoch": 0, "iter": 4434, "iter_tflops": 42.9100934526927, "iter_time": 0.48079814910888674, "loss": 0.27681902050971985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93585662217733, "step_time": 0.43955932617187504} +{"epoch": 0, "iter": 4435, "iter_tflops": 43.70819847393465, "iter_time": 0.4720188484191895, "loss": 0.23771297931671143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.691765100808375, "step_time": 0.4418572196960449} +{"epoch": 0, "iter": 4436, "iter_tflops": 49.84480243154623, "iter_time": 0.4139066162109375, "loss": 0.2687256932258606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.9219732234508, "step_time": 0.38261013603210453} +{"epoch": 0, "iter": 4437, "iter_tflops": 26.047748583251266, "iter_time": 0.7920490112304688, "loss": 0.3985084593296051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.43404071428851, "step_time": 0.7520253295898437} +{"epoch": 0, "iter": 4438, "iter_tflops": 12.716589578096668, "iter_time": 1.6223762969970703, "loss": 0.3363988399505615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.715622098846625, "step_time": 1.3127761268615725} +{"epoch": 0, "iter": 4439, "iter_tflops": 34.0606752873442, "iter_time": 0.6057159271240234, "loss": 0.30728965997695923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58405737059991, "step_time": 0.4961298828125} +{"epoch": 0, "iter": 4440, "iter_tflops": 45.51994566142387, "iter_time": 0.4532319450378418, "loss": 0.41682183742523193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.813820966823364, "step_time": 0.41416404342651364} +{"epoch": 0, "iter": 4441, "iter_tflops": 17.971093839887796, "iter_time": 1.1480154571533203, "loss": 0.22323165833950043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.023588523677795, "step_time": 1.0845006179809569} +{"epoch": 0, "iter": 4442, "iter_tflops": 18.82632814256294, "iter_time": 1.0958639068603515, "loss": 0.1522938311100006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.748058237399427, "step_time": 0.8336449394226075} +{"epoch": 0, "iter": 4443, "iter_tflops": 46.67198343783062, "iter_time": 0.44204449844360355, "loss": 0.14829325675964355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.485196079190096, "step_time": 0.40865630149841303} +{"epoch": 0, "iter": 4444, "iter_tflops": 49.09840516098531, "iter_time": 0.4201988525390625, "loss": 0.10683149099349976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.244112055495144, "step_time": 0.38748122024536125} +{"epoch": 0, "iter": 4445, "iter_tflops": 22.793517309658203, "iter_time": 0.6307555236816407, "loss": 0.07296714931726456, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.341277155445642, "step_time": 0.590648422241211} +{"epoch": 0, "iter": 4446, "iter_tflops": 11.197839266741097, "iter_time": 1.283920639038086, "loss": 0.05219680070877075, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 13.528192864575319, "step_time": 1.062753694534302} +{"epoch": 0, "iter": 4447, "iter_tflops": 30.00835441856927, "iter_time": 0.4791044769287109, "loss": 0.042719900608062744, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 33.12344265938916, "step_time": 0.4340471820831299} +{"epoch": 0, "iter": 4448, "iter_tflops": 32.543673464505204, "iter_time": 0.4417797813415527, "loss": 0.05159040167927742, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 35.91295782888869, "step_time": 0.40033285522460943} +{"epoch": 0, "iter": 4449, "iter_tflops": 20.589541975905142, "iter_time": 1.0020180892944337, "loss": 1.0673632621765137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.182623601676184, "step_time": 0.9300565109252931} +{"epoch": 0, "iter": 4450, "iter_tflops": 20.326576971399863, "iter_time": 1.014981201171875, "loss": 0.7893117070198059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.568198991529574, "step_time": 0.8397479000091553} +{"epoch": 0, "iter": 4451, "iter_tflops": 44.356610163668186, "iter_time": 0.4651188049316406, "loss": 0.7512205243110657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.794583310244796, "step_time": 0.43166175079345703} +{"epoch": 0, "iter": 4452, "iter_tflops": 46.082776571142716, "iter_time": 0.4476964073181153, "loss": 0.7945806384086609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.36125828084005, "step_time": 0.4179612560272218} +{"epoch": 0, "iter": 4453, "iter_tflops": 23.210098620492914, "iter_time": 0.8888843536376954, "loss": 0.8952581286430359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.309607115809886, "step_time": 0.8486806640625} +{"epoch": 0, "iter": 4454, "iter_tflops": 12.157108570129985, "iter_time": 1.6970395050048828, "loss": 0.7472105622291565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.121671827831342, "step_time": 1.2049695682525634} +{"epoch": 0, "iter": 4455, "iter_tflops": 34.8884752706265, "iter_time": 0.5913440856933594, "loss": 0.7885795831680298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.058914861181194, "step_time": 0.5420830726623534} +{"epoch": 0, "iter": 4456, "iter_tflops": 40.205700880829276, "iter_time": 0.5131385116577148, "loss": 0.8229638338088989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44572343061065, "step_time": 0.47487052536010743} +{"epoch": 0, "iter": 4457, "iter_tflops": 18.001739199887194, "iter_time": 1.1460611267089844, "loss": 0.16855716705322266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.152683367994623, "step_time": 1.077190757751465} +{"epoch": 0, "iter": 4458, "iter_tflops": 29.069464596411535, "iter_time": 0.7097170104980469, "loss": 0.18472743034362793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.70527931540832, "step_time": 0.630818447113037} +{"epoch": 0, "iter": 4459, "iter_tflops": 52.33104406956032, "iter_time": 0.3942419624328613, "loss": 0.20823241770267487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.227249745422895, "step_time": 0.3605117073059082} +{"epoch": 0, "iter": 4460, "iter_tflops": 48.00771232302515, "iter_time": 0.4297453994750976, "loss": 0.13946501910686493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.956139805068034, "step_time": 0.397086727142334} +{"epoch": 0, "iter": 4461, "iter_tflops": 29.59064163947479, "iter_time": 0.6972168350219726, "loss": 0.550625205039978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.50682575959452, "step_time": 0.6548134574890138} +{"epoch": 0, "iter": 4462, "iter_tflops": 14.629666101695312, "iter_time": 1.410223129272461, "loss": 0.6537951827049255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.442912041042316, "step_time": 1.1186462020874024} +{"epoch": 0, "iter": 4463, "iter_tflops": 34.70557580970288, "iter_time": 0.5944604873657227, "loss": 0.560892641544342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.04308719760345, "step_time": 0.49071309661865237} +{"epoch": 0, "iter": 4464, "iter_tflops": 51.64032114960489, "iter_time": 0.3995152053833008, "loss": 0.5687340497970581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.075587043123065, "step_time": 0.3679157829284668} +{"epoch": 0, "iter": 4465, "iter_tflops": 25.178198517919768, "iter_time": 0.8194030838012695, "loss": 0.9035488367080688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.466779774467287, "step_time": 0.7795090179443359} +{"epoch": 0, "iter": 4466, "iter_tflops": 14.557400923866954, "iter_time": 1.4172236938476563, "loss": 0.7559605240821838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.6057358997398, "step_time": 1.052298858642578} +{"epoch": 0, "iter": 4467, "iter_tflops": 42.60085129845959, "iter_time": 0.48428829193115236, "loss": 0.8881136775016785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.42623543279448, "step_time": 0.44438437271118164} +{"epoch": 0, "iter": 4468, "iter_tflops": 41.49875544512167, "iter_time": 0.49714969253540037, "loss": 0.7903274297714233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09455766413486, "step_time": 0.4575073928833008} +{"epoch": 0, "iter": 4469, "iter_tflops": 18.92236253777932, "iter_time": 1.0903022003173828, "loss": 0.3545345366001129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.235354102455176, "step_time": 1.0195568313598633} +{"epoch": 0, "iter": 4470, "iter_tflops": 15.249437673676875, "iter_time": 1.352908477783203, "loss": 0.3479163646697998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.115449718340283, "step_time": 0.9770615253448485} +{"epoch": 0, "iter": 4471, "iter_tflops": 40.15009506226753, "iter_time": 0.5138491821289063, "loss": 0.3728129267692566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12658126539198, "step_time": 0.46754343795776365} +{"epoch": 0, "iter": 4472, "iter_tflops": 40.7337162167578, "iter_time": 0.5064868965148925, "loss": 0.35854804515838623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5797606914831, "step_time": 0.46279058456420896} +{"epoch": 0, "iter": 4473, "iter_tflops": 18.856419736687474, "iter_time": 1.0941150970458984, "loss": 0.04909633472561836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.807094821667874, "step_time": 1.0416011886596679} +{"epoch": 0, "iter": 4474, "iter_tflops": 8.241523622738733, "iter_time": 2.5033106079101564, "loss": 0.06554006785154343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.575965195907848, "step_time": 2.1544662170410156} +{"epoch": 0, "iter": 4475, "iter_tflops": 12.074484842961677, "iter_time": 1.7086520690917968, "loss": 0.01637747697532177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.303118152048654, "step_time": 1.442419288635254} +{"epoch": 0, "iter": 4476, "iter_tflops": 41.63923378946245, "iter_time": 0.4954724578857422, "loss": 0.02778458781540394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.117512772871414, "step_time": 0.4473591976165772} +{"epoch": 0, "iter": 4477, "iter_tflops": 15.013569830697403, "iter_time": 0.9875110855102539, "loss": 0.29462486505508423, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.920612386399986, "step_time": 0.9312497711181639} +{"epoch": 0, "iter": 4478, "iter_tflops": 15.450296357775494, "iter_time": 0.9595975570678711, "loss": 0.28559401631355286, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 16.932948263505327, "step_time": 0.8755750274658203} +{"epoch": 0, "iter": 4479, "iter_tflops": 28.47426141239988, "iter_time": 0.5206830978393555, "loss": 0.29925090074539185, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 30.213129542571856, "step_time": 0.49071601867675774} +{"epoch": 0, "iter": 4480, "iter_tflops": 26.930943038233707, "iter_time": 0.550521629333496, "loss": 0.2323877513408661, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.4673665817365, "step_time": 0.5208092079162598} +{"epoch": 0, "iter": 4481, "iter_tflops": 30.361437297521732, "iter_time": 0.6795163650512696, "loss": 0.10013855993747711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.268503065075464, "step_time": 0.6393570060729981} +{"epoch": 0, "iter": 4482, "iter_tflops": 15.782414921472302, "iter_time": 1.3072203216552734, "loss": 0.1190594956278801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.386046199869735, "step_time": 1.1221060409545898} +{"epoch": 0, "iter": 4483, "iter_tflops": 33.43133581938252, "iter_time": 0.6171184310913086, "loss": 0.0709284096956253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21370179665357, "step_time": 0.4774201850891113} +{"epoch": 0, "iter": 4484, "iter_tflops": 50.255170588155245, "iter_time": 0.41052678298950196, "loss": 0.07808482646942139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.974354790588926, "step_time": 0.37528577804565433} +{"epoch": 0, "iter": 4485, "iter_tflops": 20.286076978725756, "iter_time": 1.017007553100586, "loss": 0.17781345546245575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12852196489283, "step_time": 0.9764570159912109} +{"epoch": 0, "iter": 4486, "iter_tflops": 13.310725699534043, "iter_time": 1.5499600830078126, "loss": 0.13941672444343567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.59508437102032, "step_time": 1.243205099105835} +{"epoch": 0, "iter": 4487, "iter_tflops": 41.716632300081095, "iter_time": 0.49455318832397466, "loss": 0.13640503585338593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.807178469253664, "step_time": 0.45038996505737305} +{"epoch": 0, "iter": 4488, "iter_tflops": 42.65339139724319, "iter_time": 0.4836917495727539, "loss": 0.1961023062467575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93599654074921, "step_time": 0.43955801582336423} +{"epoch": 0, "iter": 4489, "iter_tflops": 13.44912774823236, "iter_time": 1.534009780883789, "loss": 0.4140665829181671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.149378300357302, "step_time": 1.4580918731689454} +{"epoch": 0, "iter": 4490, "iter_tflops": 21.74204548385759, "iter_time": 0.9489030609130861, "loss": 0.38537120819091797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.087619749046656, "step_time": 0.6857004203796386} +{"epoch": 0, "iter": 4491, "iter_tflops": 43.51552475402633, "iter_time": 0.47410880661010735, "loss": 0.4149293601512909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60515684514039, "step_time": 0.44267834091186525} +{"epoch": 0, "iter": 4492, "iter_tflops": 53.13916500714635, "iter_time": 0.3882464752197266, "loss": 0.6065096259117126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.47427404980348, "step_time": 0.35896222877502443} +{"epoch": 0, "iter": 4493, "iter_tflops": 36.59591670865133, "iter_time": 0.5637539749145507, "loss": 0.25044572353363037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.194453529941825, "step_time": 0.5263778839111328} +{"epoch": 0, "iter": 4494, "iter_tflops": 9.549285918341882, "iter_time": 2.1604854736328125, "loss": 0.2783254384994507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.515953416637576, "step_time": 1.791522834777832} +{"epoch": 0, "iter": 4495, "iter_tflops": 10.627715296825892, "iter_time": 1.9412538757324218, "loss": 0.3607640862464905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.3670614092815, "step_time": 1.543427749633789} +{"epoch": 0, "iter": 4496, "iter_tflops": 18.578938662647445, "iter_time": 1.1104559783935546, "loss": 0.34680280089378357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.900182681216492, "step_time": 0.900913927078247} +{"epoch": 0, "iter": 4497, "iter_tflops": 22.45187980330856, "iter_time": 0.7351001434326172, "loss": 0.4016166031360626, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 24.441489843594482, "step_time": 0.6752608032226562} +{"epoch": 0, "iter": 4498, "iter_tflops": 25.35460861769234, "iter_time": 0.6509420166015625, "loss": 0.37845316529273987, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 27.309444391469096, "step_time": 0.6043469734191894} +{"epoch": 0, "iter": 4499, "iter_tflops": 26.401298885562703, "iter_time": 0.6251351547241211, "loss": 0.22606822848320007, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 28.41236902747785, "step_time": 0.580887149810791} +{"epoch": 0, "iter": 4500, "iter_tflops": 26.91834627391516, "iter_time": 0.6131275634765625, "loss": 0.3965306878089905, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 28.829843977276443, "step_time": 0.5724755249023438} +{"epoch": 0, "iter": 4501, "iter_tflops": 20.930921875766785, "iter_time": 0.9856753387451171, "loss": 0.8978270888328552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.220379577422065, "step_time": 0.9284761962890626} +{"epoch": 0, "iter": 4502, "iter_tflops": 13.192921763771762, "iter_time": 1.5638001861572266, "loss": 1.0610054731369019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.826163200379003, "step_time": 1.226131784439087} +{"epoch": 0, "iter": 4503, "iter_tflops": 36.47278685605447, "iter_time": 0.5656571731567382, "loss": 0.9511658549308777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.77949593107837, "step_time": 0.5186363735198976} +{"epoch": 0, "iter": 4504, "iter_tflops": 38.15467084125753, "iter_time": 0.5407226181030274, "loss": 1.1198865175247192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.652631341430656, "step_time": 0.49531308937072754} +{"epoch": 0, "iter": 4505, "iter_tflops": 25.35550512658036, "iter_time": 0.8136731414794921, "loss": 0.20438209176063538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.47307571604304, "step_time": 0.7509568176269531} +{"epoch": 0, "iter": 4506, "iter_tflops": 22.252314111539217, "iter_time": 0.9271437301635741, "loss": 0.25205743312835693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.14786442538066, "step_time": 0.8203914718627929} +{"epoch": 0, "iter": 4507, "iter_tflops": 50.551730477983625, "iter_time": 0.4081184425354004, "loss": 0.18639525771141052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.861090316649424, "step_time": 0.37606058120727537} +{"epoch": 0, "iter": 4508, "iter_tflops": 51.65203342373492, "iter_time": 0.39942461395263673, "loss": 0.2431710660457611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.86973423014275, "step_time": 0.3692713737487793} +{"epoch": 0, "iter": 4509, "iter_tflops": 20.878841342681312, "iter_time": 0.6788339691162109, "loss": 0.3260951042175293, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 22.179445648661392, "step_time": 0.6390270957946778} +{"epoch": 0, "iter": 4510, "iter_tflops": 21.29256957940814, "iter_time": 0.6656437911987304, "loss": 0.21189898252487183, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 26.97174795546223, "step_time": 0.5254856586456299} +{"epoch": 0, "iter": 4511, "iter_tflops": 34.80987294373228, "iter_time": 0.40716226577758796, "loss": 0.3907950520515442, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 38.1847047209044, "step_time": 0.3711765441894531} +{"epoch": 0, "iter": 4512, "iter_tflops": 35.46402863210223, "iter_time": 0.3996519088745117, "loss": 0.24015265703201294, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 38.44794067399845, "step_time": 0.3686352634429932} +{"epoch": 0, "iter": 4513, "iter_tflops": 39.04804614136171, "iter_time": 0.5283514938354492, "loss": 0.16392138600349426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.246756723502834, "step_time": 0.48834739303588864} +{"epoch": 0, "iter": 4514, "iter_tflops": 14.927668751071804, "iter_time": 1.3820706939697265, "loss": 0.20341156423091888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.315203080685496, "step_time": 1.0681271858215333} +{"epoch": 0, "iter": 4515, "iter_tflops": 43.19131736419458, "iter_time": 0.4776676139831543, "loss": 0.26804953813552856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37942110215668, "step_time": 0.4354441871643066} +{"epoch": 0, "iter": 4516, "iter_tflops": 41.284215641759836, "iter_time": 0.4997332077026367, "loss": 0.20752456784248352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7969671940462, "step_time": 0.4605466575622559} +{"epoch": 0, "iter": 4517, "iter_tflops": 19.731003752418182, "iter_time": 1.0456180419921877, "loss": 0.005310273729264736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.023783956737176, "step_time": 0.9813216094970703} +{"epoch": 0, "iter": 4518, "iter_tflops": 20.066843516335464, "iter_time": 1.028118522644043, "loss": 0.006827273406088352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.684948205150327, "step_time": 0.8357762527465822} +{"epoch": 0, "iter": 4519, "iter_tflops": 55.85486552969294, "iter_time": 0.36936967468261717, "loss": 0.004981059115380049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.56316583141547, "step_time": 0.3351207370758057} +{"epoch": 0, "iter": 4520, "iter_tflops": 67.34114314996845, "iter_time": 0.30636684417724613, "loss": 0.007677776273339987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 73.90329164532949, "step_time": 0.27916339111328126} +{"epoch": 0, "iter": 4521, "iter_tflops": 28.518512449067103, "iter_time": 0.723428108215332, "loss": 0.26143723726272583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.03688753822448, "step_time": 0.6868585662841796} +{"epoch": 0, "iter": 4522, "iter_tflops": 12.871372709040083, "iter_time": 1.6028666076660154, "loss": 0.2414693981409073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.203982630365537, "step_time": 1.2732112827301025} +{"epoch": 0, "iter": 4523, "iter_tflops": 39.77958707242224, "iter_time": 0.5186351852416992, "loss": 0.15795515477657318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.340192553736735, "step_time": 0.47602680778503426} +{"epoch": 0, "iter": 4524, "iter_tflops": 39.6696179564902, "iter_time": 0.5200729064941406, "loss": 0.21764510869979858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.289696580433045, "step_time": 0.47658207702636723} +{"epoch": 0, "iter": 4525, "iter_tflops": 20.255985793009618, "iter_time": 1.0185183639526367, "loss": 0.13121014833450317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.85116716231869, "step_time": 0.9441643714904786} +{"epoch": 0, "iter": 4526, "iter_tflops": 17.443831183152056, "iter_time": 1.1827157287597656, "loss": 0.14948828518390656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.870965346687957, "step_time": 0.9885069122314454} +{"epoch": 0, "iter": 4527, "iter_tflops": 37.71636999514689, "iter_time": 0.5470063400268554, "loss": 0.15326178073883057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39322773077117, "step_time": 0.49841712379455566} +{"epoch": 0, "iter": 4528, "iter_tflops": 41.42267809182704, "iter_time": 0.49806276321411136, "loss": 0.38191473484039307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.598582968267685, "step_time": 0.4524503211975097} +{"epoch": 0, "iter": 4529, "iter_tflops": 30.177667943085485, "iter_time": 0.6836543350219727, "loss": 0.10831323266029358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.58914254142587, "step_time": 0.6142191181182861} +{"epoch": 0, "iter": 4530, "iter_tflops": 39.33414357149146, "iter_time": 0.5245085220336914, "loss": 0.12489468604326248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15775845251547, "step_time": 0.4672133331298828} +{"epoch": 0, "iter": 4531, "iter_tflops": 40.415332676820924, "iter_time": 0.5104768943786622, "loss": 0.14455923438072205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62888886882643, "step_time": 0.4622811374664307} +{"epoch": 0, "iter": 4532, "iter_tflops": 38.31559140607341, "iter_time": 0.5384516525268555, "loss": 0.1268567144870758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.63451707483881, "step_time": 0.4955285892486572} +{"epoch": 0, "iter": 4533, "iter_tflops": 20.214631569379787, "iter_time": 1.0206020050048827, "loss": 0.3156874477863312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.842634303632423, "step_time": 0.9445332107543944} +{"epoch": 0, "iter": 4534, "iter_tflops": 17.561887789561883, "iter_time": 1.17476513671875, "loss": 0.20735077559947968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.027920999125612, "step_time": 0.9811285438537598} +{"epoch": 0, "iter": 4535, "iter_tflops": 42.38522407014048, "iter_time": 0.48675202178955074, "loss": 0.1716310679912567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45388919276189, "step_time": 0.44411983299255375} +{"epoch": 0, "iter": 4536, "iter_tflops": 39.21220073684443, "iter_time": 0.5261396484375, "loss": 0.1983301341533661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86215023137519, "step_time": 0.48133594322204587} +{"epoch": 0, "iter": 4537, "iter_tflops": 20.409206606513226, "iter_time": 1.0108719024658201, "loss": 0.9815497398376465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.03445434425124, "step_time": 0.9363106155395508} +{"epoch": 0, "iter": 4538, "iter_tflops": 43.33878372933152, "iter_time": 0.4760422821044922, "loss": 0.9228302240371704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32060420598429, "step_time": 0.43598542022705083} +{"epoch": 0, "iter": 4539, "iter_tflops": 43.24102750261543, "iter_time": 0.4771184844970703, "loss": 1.1086759567260742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39866572820569, "step_time": 0.4446484222412109} +{"epoch": 0, "iter": 4540, "iter_tflops": 44.85752638433368, "iter_time": 0.45992490386962886, "loss": 0.9424894452095032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.694415309475616, "step_time": 0.4236850032806397} +{"epoch": 0, "iter": 4541, "iter_tflops": 38.78621654559325, "iter_time": 0.5319181747436523, "loss": 1.1059777736663818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.224130952980346, "step_time": 0.48860907363891604} +{"epoch": 0, "iter": 4542, "iter_tflops": 42.890733498207055, "iter_time": 0.48101517105102537, "loss": 0.8064705729484558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.152777149708186, "step_time": 0.4470173797607422} +{"epoch": 0, "iter": 4543, "iter_tflops": 46.466376522508824, "iter_time": 0.44400048065185543, "loss": 0.8524615168571472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.787694613751455, "step_time": 0.41438137817382814} +{"epoch": 0, "iter": 4544, "iter_tflops": 40.20892497965799, "iter_time": 0.5130973663330078, "loss": 0.9030923843383789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.77390409314641, "step_time": 0.48232897949218745} +{"epoch": 0, "iter": 4545, "iter_tflops": 28.530876980533286, "iter_time": 0.7231145935058594, "loss": 0.6680254340171814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.008897255294695, "step_time": 0.6874992218017578} +{"epoch": 0, "iter": 4546, "iter_tflops": 14.0393520741676, "iter_time": 1.4695189208984374, "loss": 0.9410406351089478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.48530965232997, "step_time": 1.1799101028442383} +{"epoch": 0, "iter": 4547, "iter_tflops": 35.097928552800894, "iter_time": 0.5878151321411133, "loss": 0.7751643061637878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.062404133469585, "step_time": 0.5420333786010741} +{"epoch": 0, "iter": 4548, "iter_tflops": 39.63747221873951, "iter_time": 0.5204946823120117, "loss": 0.7268829941749573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.904027103307705, "step_time": 0.4808661308288575} +{"epoch": 0, "iter": 4549, "iter_tflops": 18.458139107483092, "iter_time": 1.117723373413086, "loss": 0.6719450950622559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.80702329718302, "step_time": 1.0416049499511717} +{"epoch": 0, "iter": 4550, "iter_tflops": 17.317363344349122, "iter_time": 1.1913530426025392, "loss": 0.5726669430732727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.489397746018824, "step_time": 0.917369764328003} +{"epoch": 0, "iter": 4551, "iter_tflops": 36.53317697440872, "iter_time": 0.5647221298217774, "loss": 0.48154595494270325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.61416904189914, "step_time": 0.5208008651733399} +{"epoch": 0, "iter": 4552, "iter_tflops": 39.908081152181346, "iter_time": 0.5169653091430664, "loss": 0.7570824027061462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71564734845514, "step_time": 0.47193841934204106} +{"epoch": 0, "iter": 4553, "iter_tflops": 17.977112546830792, "iter_time": 1.147631103515625, "loss": 0.8791589140892029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.81750715814236, "step_time": 1.0963776092529296} +{"epoch": 0, "iter": 4554, "iter_tflops": 17.321070414189975, "iter_time": 1.1910980682373047, "loss": 0.777498185634613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.18701915482388, "step_time": 0.7588582401275635} +{"epoch": 0, "iter": 4555, "iter_tflops": 44.02789724172041, "iter_time": 0.46859138870239264, "loss": 0.8037242889404297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17199571617579, "step_time": 0.43735892868042003} +{"epoch": 0, "iter": 4556, "iter_tflops": 44.379829769219235, "iter_time": 0.4648754539489746, "loss": 0.7656123638153076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74835551638592, "step_time": 0.4320796661376953} +{"epoch": 0, "iter": 4557, "iter_tflops": 37.101285952817555, "iter_time": 0.5560748901367187, "loss": 0.8510550856590271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06078791614722, "step_time": 0.5149947013854981} +{"epoch": 0, "iter": 4558, "iter_tflops": 22.033055959859915, "iter_time": 0.9363700408935547, "loss": 0.6865047812461853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.232033413280192, "step_time": 0.7864847221374512} +{"epoch": 0, "iter": 4559, "iter_tflops": 39.24112422011893, "iter_time": 0.5257518463134766, "loss": 0.8795236349105835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.77612799112301, "step_time": 0.4823039035797119} +{"epoch": 0, "iter": 4560, "iter_tflops": 37.199201113720484, "iter_time": 0.5546111984252929, "loss": 1.1081620454788208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54137092850465, "step_time": 0.5088898830413818} +{"epoch": 0, "iter": 4561, "iter_tflops": 17.232163836154143, "iter_time": 1.1972433471679687, "loss": 0.8355018496513367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.13054897717653, "step_time": 1.1379188537597655} +{"epoch": 0, "iter": 4562, "iter_tflops": 16.294051030512914, "iter_time": 1.266173370361328, "loss": 0.7386883497238159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95876867237575, "step_time": 0.9843657245635986} +{"epoch": 0, "iter": 4563, "iter_tflops": 47.87642687257319, "iter_time": 0.4309238357543945, "loss": 0.9035148024559021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98772182363487, "step_time": 0.39684550094604487} +{"epoch": 0, "iter": 4564, "iter_tflops": 49.06708359678677, "iter_time": 0.4204670829772949, "loss": 0.8507091403007507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.30942294790926, "step_time": 0.3870065059661866} +{"epoch": 0, "iter": 4565, "iter_tflops": 3.808058266118791, "iter_time": 0.46314918899536134, "loss": 0.06435798108577728, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.174645227912416, "step_time": 0.4224787979125977} +{"epoch": 0, "iter": 4566, "iter_tflops": 3.8151454523723896, "iter_time": 0.46228882217407224, "loss": 0.08878690749406815, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.160695007207548, "step_time": 0.4238953094482422} +{"epoch": 0, "iter": 4567, "iter_tflops": 4.227873465776307, "iter_time": 0.4171598587036133, "loss": 0.14108765125274658, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.579145167338566, "step_time": 0.38515902709960936} +{"epoch": 0, "iter": 4568, "iter_tflops": 4.285459220919053, "iter_time": 0.41155428314208986, "loss": 0.0810607448220253, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.627649418581086, "step_time": 0.3811220207214356} +{"epoch": 0, "iter": 4569, "iter_tflops": 26.04625754399299, "iter_time": 0.7920943527221679, "loss": 0.9616352915763855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.48750988662927, "step_time": 0.7505624771118165} +{"epoch": 0, "iter": 4570, "iter_tflops": 21.358996076791158, "iter_time": 0.9659205627441407, "loss": 0.8633551597595215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.73787266568548, "step_time": 0.6937649421691895} +{"epoch": 0, "iter": 4571, "iter_tflops": 34.93948347617169, "iter_time": 0.5904807815551758, "loss": 0.815065860748291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.16486543758425, "step_time": 0.5405781803131103} +{"epoch": 0, "iter": 4572, "iter_tflops": 38.99322135362671, "iter_time": 0.5290943603515624, "loss": 0.7804853320121765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.199554508898366, "step_time": 0.48889363288879395} +{"epoch": 0, "iter": 4573, "iter_tflops": 20.123320651064002, "iter_time": 1.0252330551147462, "loss": 1.0552161931991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.682122452604112, "step_time": 0.9515255508422852} +{"epoch": 0, "iter": 4574, "iter_tflops": 20.69760433442405, "iter_time": 0.9967865447998047, "loss": 0.7862794399261475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.337550669694963, "step_time": 0.8840299396514892} +{"epoch": 0, "iter": 4575, "iter_tflops": 36.290566568005175, "iter_time": 0.5684974212646485, "loss": 0.7735434174537659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41517332973714, "step_time": 0.5234302368164062} +{"epoch": 0, "iter": 4576, "iter_tflops": 34.55014452920595, "iter_time": 0.5971347961425781, "loss": 0.7172297239303589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.448644096738676, "step_time": 0.5509169692993164} +{"epoch": 0, "iter": 4577, "iter_tflops": 22.76900975549847, "iter_time": 0.906104118347168, "loss": 0.216551274061203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.06740914682673, "step_time": 0.8572212066650391} +{"epoch": 0, "iter": 4578, "iter_tflops": 15.328217791134787, "iter_time": 1.3459551391601565, "loss": 0.2513153851032257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.610639315937377, "step_time": 1.1715130348205567} +{"epoch": 0, "iter": 4579, "iter_tflops": 47.88584133848861, "iter_time": 0.4308391151428222, "loss": 0.23106619715690613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.84413099705503, "step_time": 0.3979446296691894} +{"epoch": 0, "iter": 4580, "iter_tflops": 53.513706592256284, "iter_time": 0.3855291442871094, "loss": 0.12594877183437347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.10013207683118, "step_time": 0.35509546661376956} +{"epoch": 0, "iter": 4581, "iter_tflops": 28.939276783395584, "iter_time": 0.6556744995117187, "loss": 0.2270200550556183, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 30.91230548693988, "step_time": 0.6138249969482422} +{"epoch": 0, "iter": 4582, "iter_tflops": 19.269342477420913, "iter_time": 0.9847116394042968, "loss": 0.1990041434764862, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 21.78482899721547, "step_time": 0.8710073337554932} +{"epoch": 0, "iter": 4583, "iter_tflops": 44.37762629359428, "iter_time": 0.4275746002197266, "loss": 0.22121472656726837, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 48.39572295238094, "step_time": 0.3920748500823975} +{"epoch": 0, "iter": 4584, "iter_tflops": 42.188408628335175, "iter_time": 0.4497620658874512, "loss": 0.21434292197227478, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 45.93141568301714, "step_time": 0.41311040687561035} +{"epoch": 0, "iter": 4585, "iter_tflops": 35.187812039269154, "iter_time": 0.5863136215209962, "loss": 0.5700142979621887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.724246881828535, "step_time": 0.5468921241760254} +{"epoch": 0, "iter": 4586, "iter_tflops": 10.506476262470764, "iter_time": 1.9636548919677737, "loss": 0.6295329928398132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.245320414588955, "step_time": 1.5576137733459472} +{"epoch": 0, "iter": 4587, "iter_tflops": 14.618339545339209, "iter_time": 1.4113157958984375, "loss": 0.6886627674102783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.585995167299615, "step_time": 1.1731547355651855} +{"epoch": 0, "iter": 4588, "iter_tflops": 34.94859999215248, "iter_time": 0.5903267517089843, "loss": 0.34174755215644836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.061125346841855, "step_time": 0.5420515899658203} +{"epoch": 0, "iter": 4589, "iter_tflops": 17.10829141050464, "iter_time": 1.0079151535034179, "loss": 0.2573835849761963, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 18.14861363490601, "step_time": 0.9501390304565429} +{"epoch": 0, "iter": 4590, "iter_tflops": 10.163476493769073, "iter_time": 1.6966346282958984, "loss": 0.2421169877052307, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 11.849566352464818, "step_time": 1.4552183303833008} +{"epoch": 0, "iter": 4591, "iter_tflops": 31.15877801800055, "iter_time": 0.5534140701293945, "loss": 0.15307007730007172, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 33.15464940002771, "step_time": 0.5200991859436035} +{"epoch": 0, "iter": 4592, "iter_tflops": 30.409886688932897, "iter_time": 0.5670427627563477, "loss": 0.3398195803165436, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 32.326528473528924, "step_time": 0.5334227638244629} +{"epoch": 0, "iter": 4593, "iter_tflops": 43.28592453171521, "iter_time": 0.47662360763549805, "loss": 0.7557989358901978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23372612512108, "step_time": 0.4367873382568359} +{"epoch": 0, "iter": 4594, "iter_tflops": 9.425472950290448, "iter_time": 2.1888656005859373, "loss": 0.6458340883255005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.069134060089004, "step_time": 2.0489441680908205} +{"epoch": 0, "iter": 4595, "iter_tflops": 13.231122104962065, "iter_time": 1.5592852478027344, "loss": 0.70673006772995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.27014620177994, "step_time": 1.3510737380981446} +{"epoch": 0, "iter": 4596, "iter_tflops": 13.715594383972206, "iter_time": 1.5042070312500002, "loss": 0.7934330701828003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.149915946713396, "step_time": 1.2029851093292236} +{"epoch": 0, "iter": 4597, "iter_tflops": 19.996968862572185, "iter_time": 0.7904963760375976, "loss": 0.24125681817531586, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 21.81417301936018, "step_time": 0.7246450004577637} +{"epoch": 0, "iter": 4598, "iter_tflops": 24.29767361107356, "iter_time": 0.650577980041504, "loss": 0.30508509278297424, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 26.29702818026468, "step_time": 0.60111474609375} +{"epoch": 0, "iter": 4599, "iter_tflops": 24.80622465813747, "iter_time": 0.6372405166625977, "loss": 0.2085915207862854, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 26.680559578454854, "step_time": 0.5924737586975097} +{"epoch": 0, "iter": 4600, "iter_tflops": 22.515969971512746, "iter_time": 0.7020586471557617, "loss": 0.20759128034114838, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.165218067087046, "step_time": 0.6541439590454101} +{"epoch": 0, "iter": 4601, "iter_tflops": 29.81999482659622, "iter_time": 0.6918543624877929, "loss": 0.7622451186180115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.16469690601495, "step_time": 0.6220799655914306} +{"epoch": 0, "iter": 4602, "iter_tflops": 39.18763877264092, "iter_time": 0.5264694213867187, "loss": 0.7386944890022278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.76992188571888, "step_time": 0.4823738880157471} +{"epoch": 0, "iter": 4603, "iter_tflops": 40.456438670734435, "iter_time": 0.5099582214355468, "loss": 0.6924288868904114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.2824251434053, "step_time": 0.46589800453186037} +{"epoch": 0, "iter": 4604, "iter_tflops": 37.972758034521185, "iter_time": 0.5433130111694336, "loss": 0.5715793967247009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.07133513258605, "step_time": 0.5023234195709229} +{"epoch": 0, "iter": 4605, "iter_tflops": 17.46211617038225, "iter_time": 1.1814772796630861, "loss": 0.4410964548587799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.748317810303877, "step_time": 1.100423713684082} +{"epoch": 0, "iter": 4606, "iter_tflops": 17.580725959726454, "iter_time": 1.17350634765625, "loss": 0.5299110412597656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.04284565440124, "step_time": 1.0293495178222656} +{"epoch": 0, "iter": 4607, "iter_tflops": 50.7469414505133, "iter_time": 0.40654851150512694, "loss": 0.5083653330802917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.73313910271024, "step_time": 0.3769397087097168} +{"epoch": 0, "iter": 4608, "iter_tflops": 43.868043237134934, "iter_time": 0.4702989234924317, "loss": 0.3091878890991211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.013774958971986, "step_time": 0.4388308219909668} +{"epoch": 0, "iter": 4609, "iter_tflops": 16.329533255423136, "iter_time": 1.263422119140625, "loss": 0.8733065724372864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.87192232494022, "step_time": 1.2228063354492187} +{"epoch": 0, "iter": 4610, "iter_tflops": 13.917426457776685, "iter_time": 1.482392852783203, "loss": 0.643246591091156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.770381741005362, "step_time": 1.1609820098876953} +{"epoch": 0, "iter": 4611, "iter_tflops": 46.85696562882145, "iter_time": 0.4402993927001953, "loss": 0.7182535529136658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5608071110723, "step_time": 0.4080451774597168} +{"epoch": 0, "iter": 4612, "iter_tflops": 47.82503206930494, "iter_time": 0.4313869247436523, "loss": 0.7120361924171448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.200670003276876, "step_time": 0.4029457721710205} +{"epoch": 0, "iter": 4613, "iter_tflops": 28.059470264778128, "iter_time": 0.7352631149291993, "loss": 0.48883771896362305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.736139671090477, "step_time": 0.6938053741455079} +{"epoch": 0, "iter": 4614, "iter_tflops": 12.916014249891349, "iter_time": 1.597326629638672, "loss": 0.4131893515586853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.295502374825066, "step_time": 1.1928588752746583} +{"epoch": 0, "iter": 4615, "iter_tflops": 35.42793959489343, "iter_time": 0.5823396377563477, "loss": 0.41996580362319946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.67253831550736, "step_time": 0.5334817523956299} +{"epoch": 0, "iter": 4616, "iter_tflops": 37.289238205320714, "iter_time": 0.5532720565795899, "loss": 0.4715636074542999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84408669140452, "step_time": 0.5051182479858398} +{"epoch": 0, "iter": 4617, "iter_tflops": 19.2123987538762, "iter_time": 1.0738426666259766, "loss": 0.06315536797046661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.298635064919697, "step_time": 1.0163783645629882} +{"epoch": 0, "iter": 4618, "iter_tflops": 17.647777586426617, "iter_time": 1.1690476837158204, "loss": 0.11242154240608215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.761628096567684, "step_time": 0.8682525215148925} +{"epoch": 0, "iter": 4619, "iter_tflops": 41.23845804304188, "iter_time": 0.5002877044677734, "loss": 0.10147019475698471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3455322743581, "step_time": 0.4549752197265625} +{"epoch": 0, "iter": 4620, "iter_tflops": 44.6714120411233, "iter_time": 0.4618410873413086, "loss": 0.10048536211252213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.65487074910777, "step_time": 0.42402935600280756} +{"epoch": 0, "iter": 4621, "iter_tflops": 21.10490286247556, "iter_time": 0.9775497970581054, "loss": 1.0784358978271484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.713813688063716, "step_time": 0.9083060111999512} +{"epoch": 0, "iter": 4622, "iter_tflops": 19.660268743603663, "iter_time": 1.0493800354003908, "loss": 1.1935391426086426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.388909541397176, "step_time": 0.8820887298583984} +{"epoch": 0, "iter": 4623, "iter_tflops": 43.218280102438186, "iter_time": 0.47736960983276366, "loss": 0.9472849369049072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39762641616683, "step_time": 0.44465838241577144} +{"epoch": 0, "iter": 4624, "iter_tflops": 42.295717958634626, "iter_time": 0.48778208541870116, "loss": 0.8082727193832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.28413298766791, "step_time": 0.45559210586547855} +{"epoch": 0, "iter": 4625, "iter_tflops": 34.145156083723805, "iter_time": 0.60421728515625, "loss": 0.08574084937572479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.56805901313206, "step_time": 0.5641834449768066} +{"epoch": 0, "iter": 4626, "iter_tflops": 14.035716729609126, "iter_time": 1.4698995361328124, "loss": 0.025637280195951462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.957551818527746, "step_time": 1.2928733520507814} +{"epoch": 0, "iter": 4627, "iter_tflops": 36.09547954891408, "iter_time": 0.5715700073242188, "loss": 0.03870058059692383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45356826992453, "step_time": 0.5099944057464599} +{"epoch": 0, "iter": 4628, "iter_tflops": 43.58239036239726, "iter_time": 0.47338141250610355, "loss": 0.06345333904027939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.647764284301005, "step_time": 0.43299184799194335} +{"epoch": 0, "iter": 4629, "iter_tflops": 16.86044517019542, "iter_time": 0.8527139587402344, "loss": 0.04958099499344826, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 18.01832962635952, "step_time": 0.7979173011779784} +{"epoch": 0, "iter": 4630, "iter_tflops": 25.122631454377153, "iter_time": 0.5722783050537109, "loss": 0.05887189134955406, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 28.33003224511016, "step_time": 0.5074874897003173} +{"epoch": 0, "iter": 4631, "iter_tflops": 31.389780164499644, "iter_time": 0.4580196762084961, "loss": 0.051948223263025284, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 34.57691402100681, "step_time": 0.4158016223907471} +{"epoch": 0, "iter": 4632, "iter_tflops": 31.98530102572201, "iter_time": 0.44949200057983396, "loss": 0.048768747597932816, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 35.12308428027572, "step_time": 0.4093358325958252} +{"epoch": 0, "iter": 4633, "iter_tflops": 34.67950869306291, "iter_time": 0.5949073181152343, "loss": 0.5671612620353699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25926497316434, "step_time": 0.5392443771362305} +{"epoch": 0, "iter": 4634, "iter_tflops": 47.18614402543036, "iter_time": 0.43722779083251956, "loss": 0.7615910172462463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45814850406097, "step_time": 0.40092957305908206} +{"epoch": 0, "iter": 4635, "iter_tflops": 49.82244221473103, "iter_time": 0.41409237670898436, "loss": 0.5378767848014832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.93667636455814, "step_time": 0.38250583648681635} +{"epoch": 0, "iter": 4636, "iter_tflops": 46.39984083183162, "iter_time": 0.4446371612548828, "loss": 0.4523150324821472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45373514144062, "step_time": 0.40891112327575685} +{"epoch": 0, "iter": 4637, "iter_tflops": 32.77051088569209, "iter_time": 0.6295627670288086, "loss": 0.43228834867477417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.09158032213395, "step_time": 0.5879214706420898} +{"epoch": 0, "iter": 4638, "iter_tflops": 31.354790786091243, "iter_time": 0.657988555908203, "loss": 0.5058812499046326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.725319335700334, "step_time": 0.5327546386718751} +{"epoch": 0, "iter": 4639, "iter_tflops": 38.04471607717834, "iter_time": 0.542285385131836, "loss": 0.34365904331207275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.290183870539934, "step_time": 0.4996609745025634} +{"epoch": 0, "iter": 4640, "iter_tflops": 43.135643045451246, "iter_time": 0.47828413009643556, "loss": 0.5064902305603027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.105686167291054, "step_time": 0.437974588394165} +{"epoch": 0, "iter": 4641, "iter_tflops": 18.66958285763392, "iter_time": 1.1050645141601563, "loss": 0.5479736328125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.070878914643597, "step_time": 1.0279118118286132} +{"epoch": 0, "iter": 4642, "iter_tflops": 20.556243390016963, "iter_time": 1.0036412353515622, "loss": 0.52159184217453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.095001963462835, "step_time": 0.8221196212768555} +{"epoch": 0, "iter": 4643, "iter_tflops": 46.96765851731987, "iter_time": 0.4392616996765137, "loss": 0.5366535186767578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.834946267082614, "step_time": 0.40584469985961913} +{"epoch": 0, "iter": 4644, "iter_tflops": 46.21099374083009, "iter_time": 0.4464542274475098, "loss": 0.5742665529251099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.094094837480235, "step_time": 0.4118468170166016} +{"epoch": 0, "iter": 4645, "iter_tflops": 33.18702024005443, "iter_time": 0.621661521911621, "loss": 0.43945983052253723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.481235763346305, "step_time": 0.5814649085998534} +{"epoch": 0, "iter": 4646, "iter_tflops": 26.94131320376347, "iter_time": 0.7657790603637695, "loss": 0.2774532735347748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.22210880289779, "step_time": 0.6826490383148193} +{"epoch": 0, "iter": 4647, "iter_tflops": 34.09125601094612, "iter_time": 0.6051725845336915, "loss": 0.3109949827194214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.04328087005171, "step_time": 0.5569456329345702} +{"epoch": 0, "iter": 4648, "iter_tflops": 40.85658028963207, "iter_time": 0.5049637870788574, "loss": 0.3112010061740875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83905694009826, "step_time": 0.46011434936523443} +{"epoch": 0, "iter": 4649, "iter_tflops": 19.484468119513565, "iter_time": 1.058848175048828, "loss": 0.28174155950546265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.700687474053186, "step_time": 0.9966380844116212} +{"epoch": 0, "iter": 4650, "iter_tflops": 16.864729052292617, "iter_time": 1.2233278961181642, "loss": 0.27805203199386597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.66491124135023, "step_time": 0.9522814693450926} +{"epoch": 0, "iter": 4651, "iter_tflops": 49.633898766730475, "iter_time": 0.41566538238525397, "loss": 0.2904379963874817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.996999689442525, "step_time": 0.38207851600646975} +{"epoch": 0, "iter": 4652, "iter_tflops": 46.61983392816702, "iter_time": 0.44253897476196297, "loss": 0.23794744908809662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.707552955642434, "step_time": 0.4068643093109131} +{"epoch": 0, "iter": 4653, "iter_tflops": 26.534353009366544, "iter_time": 0.7775238952636718, "loss": 0.8070218563079834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.904054326022028, "step_time": 0.7393582763671875} +{"epoch": 0, "iter": 4654, "iter_tflops": 21.24468501438442, "iter_time": 0.9711178817749024, "loss": 0.9194548726081848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.981200131179357, "step_time": 0.8603027954101563} +{"epoch": 0, "iter": 4655, "iter_tflops": 34.2377787458138, "iter_time": 0.6025827102661133, "loss": 0.919252872467041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.164523035551156, "step_time": 0.5551287040710449} +{"epoch": 0, "iter": 4656, "iter_tflops": 37.120347820331524, "iter_time": 0.5557893371582031, "loss": 1.0976005792617798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.405991816026926, "step_time": 0.5105949039459228} +{"epoch": 0, "iter": 4657, "iter_tflops": 18.58753070499532, "iter_time": 1.1099426727294923, "loss": 0.9142501950263977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.549052443209796, "step_time": 1.0553500518798828} +{"epoch": 0, "iter": 4658, "iter_tflops": 15.205539550973903, "iter_time": 1.3568143005371094, "loss": 0.6523863077163696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.94160612074831, "step_time": 0.9851724548339844} +{"epoch": 0, "iter": 4659, "iter_tflops": 41.06680824612596, "iter_time": 0.5023787918090821, "loss": 0.7448228001594543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.943984052895495, "step_time": 0.4590401573181152} +{"epoch": 0, "iter": 4660, "iter_tflops": 40.91584020175403, "iter_time": 0.5042324295043945, "loss": 0.6006280183792114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6395648967798, "step_time": 0.46217057800292966} +{"epoch": 0, "iter": 4661, "iter_tflops": 20.458338301817253, "iter_time": 1.0084442443847657, "loss": 0.3977726995944977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.080982178280657, "step_time": 0.9343376731872557} +{"epoch": 0, "iter": 4662, "iter_tflops": 15.295554184752074, "iter_time": 1.3488294219970702, "loss": 0.39244213700294495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.431129567853883, "step_time": 1.183577543258667} +{"epoch": 0, "iter": 4663, "iter_tflops": 37.91512860805067, "iter_time": 0.5441388244628906, "loss": 0.41293472051620483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53446956405267, "step_time": 0.49672220993041993} +{"epoch": 0, "iter": 4664, "iter_tflops": 43.24814229520855, "iter_time": 0.4770399932861329, "loss": 0.26936963200569153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.02073404546633, "step_time": 0.43876587486267093} +{"epoch": 0, "iter": 4665, "iter_tflops": 13.429487008983918, "iter_time": 0.961459243774414, "loss": 0.0009414373198524117, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 14.130101302006011, "step_time": 0.913787109375} +{"epoch": 0, "iter": 4666, "iter_tflops": 11.290273286102224, "iter_time": 1.1436308135986328, "loss": 0.005929259117692709, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 16.260206446850916, "step_time": 0.7940799808502198} +{"epoch": 0, "iter": 4667, "iter_tflops": 31.02577674255043, "iter_time": 0.41616699981689453, "loss": 0.001223062863573432, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 34.34054918120632, "step_time": 0.37599586296081544} +{"epoch": 0, "iter": 4668, "iter_tflops": 28.17680369627526, "iter_time": 0.45824588775634767, "loss": 0.0031353996600955725, "lr": 3e-05, "seqlen": 5184.0, "step_tflops": 31.11567093543978, "step_time": 0.41496467971801754} +{"epoch": 0, "iter": 4669, "iter_tflops": 17.970989457852617, "iter_time": 1.1480221252441407, "loss": 0.2439945489168167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.06048534861993, "step_time": 1.082401268005371} +{"epoch": 0, "iter": 4670, "iter_tflops": 25.24943642682276, "iter_time": 0.8170912475585936, "loss": 0.3117170035839081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.513266361199513, "step_time": 0.6546796283721924} +{"epoch": 0, "iter": 4671, "iter_tflops": 52.18656576160829, "iter_time": 0.39533341979980474, "loss": 0.22881478071212769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.505055015885375, "step_time": 0.3651194305419922} +{"epoch": 0, "iter": 4672, "iter_tflops": 54.93597680199584, "iter_time": 0.3755479507446289, "loss": 0.38199037313461304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.719690776637286, "step_time": 0.34546551132202147} +{"epoch": 0, "iter": 4673, "iter_tflops": 34.77123079618276, "iter_time": 0.5933380279541016, "loss": 0.20533832907676697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.0786846754966, "step_time": 0.5564138450622559} +{"epoch": 0, "iter": 4674, "iter_tflops": 15.529325321201544, "iter_time": 1.3285247802734375, "loss": 0.26751431822776794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.278476777372415, "step_time": 1.1287096710205078} +{"epoch": 0, "iter": 4675, "iter_tflops": 47.448448238795905, "iter_time": 0.4348107109069825, "loss": 0.3015778362751007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00029193761385, "step_time": 0.39674957084655765} +{"epoch": 0, "iter": 4676, "iter_tflops": 53.24216427135312, "iter_time": 0.38749539566040037, "loss": 0.2883441746234894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.83211859370306, "step_time": 0.35674109840393065} +{"epoch": 0, "iter": 4677, "iter_tflops": 31.00370186509567, "iter_time": 0.6654396820068359, "loss": 0.8139042258262634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.95981822753811, "step_time": 0.6259468231201172} +{"epoch": 0, "iter": 4678, "iter_tflops": 12.796354936358508, "iter_time": 1.6122633056640623, "loss": 0.5794900059700012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.982421836745008, "step_time": 1.290861530303955} +{"epoch": 0, "iter": 4679, "iter_tflops": 26.082435069896494, "iter_time": 0.7909956817626952, "loss": 0.8595051765441895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.285528413892038, "step_time": 0.6594452629089356} +{"epoch": 0, "iter": 4680, "iter_tflops": 44.67494525821574, "iter_time": 0.4618045616149902, "loss": 0.7864400148391724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.090226064623074, "step_time": 0.4290080375671387} +{"epoch": 0, "iter": 4681, "iter_tflops": 19.955177893150317, "iter_time": 0.7327389907836914, "loss": 0.32752299308776855, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 21.205399526069577, "step_time": 0.6895383834838867} +{"epoch": 0, "iter": 4682, "iter_tflops": 14.20706802205256, "iter_time": 1.0292015838623045, "loss": 0.30152031779289246, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 17.426496072055382, "step_time": 0.839063507080078} +{"epoch": 0, "iter": 4683, "iter_tflops": 26.543599252451166, "iter_time": 0.5508648910522461, "loss": 0.3319845199584961, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.263098877862248, "step_time": 0.5173508033752441} +{"epoch": 0, "iter": 4684, "iter_tflops": 26.68325746025237, "iter_time": 0.5479817047119141, "loss": 0.2872362732887268, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.410651146016804, "step_time": 0.5146639137268066} +{"epoch": 0, "iter": 4685, "iter_tflops": 28.791503978430754, "iter_time": 0.7165688018798829, "loss": 0.3780243694782257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.643750225002417, "step_time": 0.6732561569213867} +{"epoch": 0, "iter": 4686, "iter_tflops": 42.59385125732136, "iter_time": 0.4843678817749023, "loss": 0.3894350528717041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.225956559374815, "step_time": 0.4463097152709961} +{"epoch": 0, "iter": 4687, "iter_tflops": 45.274064564872845, "iter_time": 0.45569342422485354, "loss": 0.31103572249412537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13794856318427, "step_time": 0.41986070060729985} +{"epoch": 0, "iter": 4688, "iter_tflops": 48.296237460978524, "iter_time": 0.4271780700683594, "loss": 0.38008788228034973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74562309674866, "step_time": 0.39114323234558107} +{"epoch": 0, "iter": 4689, "iter_tflops": 29.168148987554655, "iter_time": 0.6987814025878906, "loss": 0.008299081586301327, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 30.865377538082605, "step_time": 0.6603567390441895} +{"epoch": 0, "iter": 4690, "iter_tflops": 13.718368690900238, "iter_time": 1.4857568359375, "loss": 0.0031458011362701654, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 17.56758440036458, "step_time": 1.160214153289795} +{"epoch": 0, "iter": 4691, "iter_tflops": 41.52697496676105, "iter_time": 0.4908173561096192, "loss": 0.004493851214647293, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 46.39568224528307, "step_time": 0.43931157112121577} +{"epoch": 0, "iter": 4692, "iter_tflops": 44.78885307674947, "iter_time": 0.45507215881347657, "loss": 0.012799086049199104, "lr": 3e-05, "seqlen": 8096.0, "step_tflops": 49.570569268097856, "step_time": 0.4111746215820312} +{"epoch": 0, "iter": 4693, "iter_tflops": 11.017825128923835, "iter_time": 0.9514981079101563, "loss": 0.0013822403270751238, "lr": 3e-05, "seqlen": 4224.0, "step_tflops": 11.83772374099133, "step_time": 0.8855959129333496} +{"epoch": 0, "iter": 4694, "iter_tflops": 16.319745566940156, "iter_time": 0.6423776473999023, "loss": 0.004546718206256628, "lr": 3e-05, "seqlen": 4224.0, "step_tflops": 18.684143585825172, "step_time": 0.5610875186920166} +{"epoch": 0, "iter": 4695, "iter_tflops": 21.363093091481648, "iter_time": 0.4907266807556152, "loss": 0.004357630852609873, "lr": 3e-05, "seqlen": 4224.0, "step_tflops": 23.956404367505904, "step_time": 0.4376048927307129} +{"epoch": 0, "iter": 4696, "iter_tflops": 24.547482020367735, "iter_time": 0.4270678253173828, "loss": 0.014137711375951767, "lr": 3e-05, "seqlen": 4224.0, "step_tflops": 27.11015485048784, "step_time": 0.3866978931427002} +{"epoch": 0, "iter": 4697, "iter_tflops": 17.770706390006467, "iter_time": 1.1609608001708986, "loss": 0.1272629052400589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.880162412941974, "step_time": 1.0927391967773437} +{"epoch": 0, "iter": 4698, "iter_tflops": 16.03073072320292, "iter_time": 1.2869714965820311, "loss": 0.17291124165058136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.880370593880876, "step_time": 1.0377620182037353} +{"epoch": 0, "iter": 4699, "iter_tflops": 44.182128084946065, "iter_time": 0.4669556312561035, "loss": 0.1903000921010971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54041642974408, "step_time": 0.42502918243408205} +{"epoch": 0, "iter": 4700, "iter_tflops": 38.27196059331242, "iter_time": 0.5390654983520508, "loss": 0.1552831530570984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05654185877235, "step_time": 0.4905561084747314} +{"epoch": 0, "iter": 4701, "iter_tflops": 29.92592173829827, "iter_time": 0.6894054489135742, "loss": 0.7994667291641235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.78208917173119, "step_time": 0.6293404121398924} +{"epoch": 0, "iter": 4702, "iter_tflops": 38.68553533378072, "iter_time": 0.5333025207519532, "loss": 0.9615366458892822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25038299855918, "step_time": 0.48830547904968263} +{"epoch": 0, "iter": 4703, "iter_tflops": 40.204238758331584, "iter_time": 0.5131571731567383, "loss": 0.9685457944869995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.515221195561, "step_time": 0.47411211395263675} +{"epoch": 0, "iter": 4704, "iter_tflops": 37.05630438829696, "iter_time": 0.5567498931884766, "loss": 0.8484327793121338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.36436250388234, "step_time": 0.5111215000152588} +{"epoch": 0, "iter": 4705, "iter_tflops": 20.541012195005866, "iter_time": 1.0043854370117187, "loss": 0.4136465787887573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.387956600161385, "step_time": 0.9215264205932616} +{"epoch": 0, "iter": 4706, "iter_tflops": 22.705835218944213, "iter_time": 0.9086251754760741, "loss": 0.29476407170295715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.59469051615807, "step_time": 0.7476472148895263} +{"epoch": 0, "iter": 4707, "iter_tflops": 43.84511281195585, "iter_time": 0.4705448837280274, "loss": 0.3496096432209015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15938411184602, "step_time": 0.42839197158813475} +{"epoch": 0, "iter": 4708, "iter_tflops": 42.273087969418526, "iter_time": 0.4880432090759277, "loss": 0.3751143217086792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.51856239863424, "step_time": 0.4435023880004883} +{"epoch": 0, "iter": 4709, "iter_tflops": 22.241901894109194, "iter_time": 0.9275777587890625, "loss": 0.9084571599960327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.983080083484207, "step_time": 0.8602353591918945} +{"epoch": 0, "iter": 4710, "iter_tflops": 24.211574616330577, "iter_time": 0.8521169662475586, "loss": 0.9501730799674988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.269934339627426, "step_time": 0.7565509052276611} +{"epoch": 0, "iter": 4711, "iter_tflops": 40.97471472156388, "iter_time": 0.5035079231262207, "loss": 0.7273582816123962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.97320924490593, "step_time": 0.46917416000366213} +{"epoch": 0, "iter": 4712, "iter_tflops": 42.22501774274345, "iter_time": 0.4885988121032715, "loss": 0.8767399787902832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.13121382500241, "step_time": 0.457135799407959} +{"epoch": 0, "iter": 4713, "iter_tflops": 26.404561524198975, "iter_time": 0.7813458099365234, "loss": 1.1643891334533691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.808375304341624, "step_time": 0.7419021530151367} +{"epoch": 0, "iter": 4714, "iter_tflops": 14.588615310051793, "iter_time": 1.4141913452148436, "loss": 0.8359408378601074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.096027838035734, "step_time": 1.0266254444122314} +{"epoch": 0, "iter": 4715, "iter_tflops": 40.74290705672891, "iter_time": 0.5063726425170899, "loss": 0.8613989949226379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0519193662372, "step_time": 0.468335859298706} +{"epoch": 0, "iter": 4716, "iter_tflops": 44.97235071311952, "iter_time": 0.45875061416625973, "loss": 0.7530693411827087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.34986383958613, "step_time": 0.426704273223877} +{"epoch": 0, "iter": 4717, "iter_tflops": 29.7388372720181, "iter_time": 0.6937424392700196, "loss": 0.6046574115753174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.48707454953109, "step_time": 0.6552242088317871} +{"epoch": 0, "iter": 4718, "iter_tflops": 13.90386151210015, "iter_time": 1.483839111328125, "loss": 0.6664963960647583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.454700725705226, "step_time": 1.2538115310668945} +{"epoch": 0, "iter": 4719, "iter_tflops": 38.681190265980455, "iter_time": 0.5333624267578124, "loss": 0.6328138709068298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.304953515491704, "step_time": 0.48767559814453126} +{"epoch": 0, "iter": 4720, "iter_tflops": 39.03239251005408, "iter_time": 0.5285633850097656, "loss": 0.8415591716766357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7145896326701, "step_time": 0.4829987525939941} +{"epoch": 0, "iter": 4721, "iter_tflops": 17.877299982712103, "iter_time": 1.1540385589599609, "loss": 0.15049704909324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.0190705672566, "step_time": 1.0847582397460938} +{"epoch": 0, "iter": 4722, "iter_tflops": 28.35421938621914, "iter_time": 0.7276198730468749, "loss": 0.1075214222073555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.362706144399624, "step_time": 0.5834138774871828} +{"epoch": 0, "iter": 4723, "iter_tflops": 48.86748988908193, "iter_time": 0.4221844329833984, "loss": 0.12738317251205444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.303421734697686, "step_time": 0.3870500774383545} +{"epoch": 0, "iter": 4724, "iter_tflops": 55.67584994006284, "iter_time": 0.370557315826416, "loss": 0.13072232902050018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.84916526913657, "step_time": 0.3390530242919922} +{"epoch": 0, "iter": 4725, "iter_tflops": 18.745750053108193, "iter_time": 1.100574447631836, "loss": 0.7139938473701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.45473973687696, "step_time": 1.060466178894043} +{"epoch": 0, "iter": 4726, "iter_tflops": 13.629078378596905, "iter_time": 1.5137555847167967, "loss": 0.7583964467048645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.20929168360046, "step_time": 1.1329981346130371} +{"epoch": 0, "iter": 4727, "iter_tflops": 44.89661664012037, "iter_time": 0.4595244598388672, "loss": 0.8118160963058472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.732078453440124, "step_time": 0.42335755348205567} +{"epoch": 0, "iter": 4728, "iter_tflops": 49.20361039711708, "iter_time": 0.4193003997802734, "loss": 0.6952333450317383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.291205965072166, "step_time": 0.3871387996673584} +{"epoch": 0, "iter": 4729, "iter_tflops": 26.349779742425707, "iter_time": 0.7829702453613281, "loss": 0.1349976509809494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.84062163555532, "step_time": 0.7410428466796875} +{"epoch": 0, "iter": 4730, "iter_tflops": 17.089285395082197, "iter_time": 1.2072531433105471, "loss": 0.1259537637233734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.571878339600968, "step_time": 1.0028784523010255} +{"epoch": 0, "iter": 4731, "iter_tflops": 39.62042473857871, "iter_time": 0.520718635559082, "loss": 0.23216883838176727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65608629132294, "step_time": 0.47258229637145993} +{"epoch": 0, "iter": 4732, "iter_tflops": 42.59667159723498, "iter_time": 0.4843358116149903, "loss": 0.19853584468364716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.777479227608595, "step_time": 0.44104756927490235} +{"epoch": 0, "iter": 4733, "iter_tflops": 22.043131920586777, "iter_time": 0.935942024230957, "loss": 0.1200343668460846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.558678458710204, "step_time": 0.8757322082519532} +{"epoch": 0, "iter": 4734, "iter_tflops": 18.309375392891944, "iter_time": 1.1268048782348632, "loss": 0.11570536345243454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.914490449161182, "step_time": 0.8627026176452637} +{"epoch": 0, "iter": 4735, "iter_tflops": 55.290950962791655, "iter_time": 0.373136890411377, "loss": 0.07013768702745438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.23291526877936, "step_time": 0.342521915435791} +{"epoch": 0, "iter": 4736, "iter_tflops": 58.32528426351606, "iter_time": 0.3537246971130371, "loss": 0.10811859369277954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.56765437505583, "step_time": 0.32455332374572754} +{"epoch": 0, "iter": 4737, "iter_tflops": 29.42027079532431, "iter_time": 0.7012543716430665, "loss": 0.007933901622891426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.276048242296078, "step_time": 0.6596451492309571} +{"epoch": 0, "iter": 4738, "iter_tflops": 53.30106380948723, "iter_time": 0.38706719970703124, "loss": 0.0026723667979240417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.429087569835296, "step_time": 0.34140997886657715} +{"epoch": 0, "iter": 4739, "iter_tflops": 54.97754461866015, "iter_time": 0.37526400375366215, "loss": 0.013346520252525806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.39210508363582, "step_time": 0.34161904907226565} +{"epoch": 0, "iter": 4740, "iter_tflops": 58.10675448612443, "iter_time": 0.35505499649047856, "loss": 0.008356484584510326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.01335567297372, "step_time": 0.3222935791015625} +{"epoch": 0, "iter": 4741, "iter_tflops": 25.591689463163526, "iter_time": 0.8061637954711915, "loss": 0.16009335219860077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.854535112380237, "step_time": 0.7682536087036133} +{"epoch": 0, "iter": 4742, "iter_tflops": 13.333284529151918, "iter_time": 1.5473376770019531, "loss": 0.16860389709472656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.769402402051615, "step_time": 1.3082990074157714} +{"epoch": 0, "iter": 4743, "iter_tflops": 44.00106212333321, "iter_time": 0.4688771705627442, "loss": 0.13292205333709717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.27550909674339, "step_time": 0.4273614902496338} +{"epoch": 0, "iter": 4744, "iter_tflops": 44.87310829730744, "iter_time": 0.4597651977539063, "loss": 0.08854398876428604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15209077977648, "step_time": 0.419739896774292} +{"epoch": 0, "iter": 4745, "iter_tflops": 20.28209271733789, "iter_time": 1.0172073364257812, "loss": 0.006206497550010681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.875726055628675, "step_time": 0.9431044006347656} +{"epoch": 0, "iter": 4746, "iter_tflops": 32.550651915686, "iter_time": 0.6338150634765625, "loss": 0.0058064707554876804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22354252754367, "step_time": 0.4886158828735352} +{"epoch": 0, "iter": 4747, "iter_tflops": 53.094472398735235, "iter_time": 0.38857328414917, "loss": 0.00432120356708765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.25282757633865, "step_time": 0.3541646709442139} +{"epoch": 0, "iter": 4748, "iter_tflops": 57.54972765639711, "iter_time": 0.3584915924072266, "loss": 0.009857296012341976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.23267943795797, "step_time": 0.32627264404296874} +{"epoch": 0, "iter": 4749, "iter_tflops": 21.265360612577393, "iter_time": 0.6991137847900392, "loss": 0.004952581133693457, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.62492281561771, "step_time": 0.6571030921936035} +{"epoch": 0, "iter": 4750, "iter_tflops": 13.977455892693019, "iter_time": 1.0636346740722655, "loss": 0.013476244173943996, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.183287382017888, "step_time": 0.865195722579956} +{"epoch": 0, "iter": 4751, "iter_tflops": 42.24673195019922, "iter_time": 0.3519066696166992, "loss": 0.003408007323741913, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 46.32800685013005, "step_time": 0.32090538215637204} +{"epoch": 0, "iter": 4752, "iter_tflops": 39.28192306093555, "iter_time": 0.37846687698364256, "loss": 0.0007481981301680207, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 43.241077948916185, "step_time": 0.3438144340515137} +{"epoch": 0, "iter": 4753, "iter_tflops": 28.540094839603476, "iter_time": 0.7228810424804688, "loss": 0.22735366225242615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.28486619272209, "step_time": 0.6812344284057616} +{"epoch": 0, "iter": 4754, "iter_tflops": 14.24889712452945, "iter_time": 1.4479080963134767, "loss": 0.16967327892780304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.020611975001515, "step_time": 1.1448608703613283} +{"epoch": 0, "iter": 4755, "iter_tflops": 49.253417175341596, "iter_time": 0.41887638854980463, "loss": 0.3006155788898468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6525386446308, "step_time": 0.38453154373168946} +{"epoch": 0, "iter": 4756, "iter_tflops": 51.42211970022008, "iter_time": 0.4012104835510254, "loss": 0.25571051239967346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.31912468391409, "step_time": 0.3663248252868652} +{"epoch": 0, "iter": 4757, "iter_tflops": 31.83083600975948, "iter_time": 0.6481480255126953, "loss": 0.44995322823524475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.01608531258857, "step_time": 0.6065099296569825} +{"epoch": 0, "iter": 4758, "iter_tflops": 12.698020386609846, "iter_time": 1.6247488098144531, "loss": 0.41662371158599854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.896743170242136, "step_time": 1.2210100669860842} +{"epoch": 0, "iter": 4759, "iter_tflops": 47.2202699302805, "iter_time": 0.43691180801391605, "loss": 0.38212311267852783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49422390798011, "step_time": 0.4006486930847168} +{"epoch": 0, "iter": 4760, "iter_tflops": 52.95336656702813, "iter_time": 0.38960872268676755, "loss": 0.48707908391952515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.29048413151156, "step_time": 0.3601137924194336} +{"epoch": 0, "iter": 4761, "iter_tflops": 42.11208866219802, "iter_time": 0.4899090538024902, "loss": 0.45678767561912537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85662190685277, "step_time": 0.4499043464660644} +{"epoch": 0, "iter": 4762, "iter_tflops": 46.29944648547248, "iter_time": 0.44560129928588865, "loss": 0.5633158683776855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20982844019882, "step_time": 0.41089751052856444} +{"epoch": 0, "iter": 4763, "iter_tflops": 44.53408378569137, "iter_time": 0.46326525115966793, "loss": 0.49907153844833374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.303922458490995, "step_time": 0.427110107421875} +{"epoch": 0, "iter": 4764, "iter_tflops": 48.74089616543671, "iter_time": 0.4232809638977051, "loss": 0.48355838656425476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56937015460209, "step_time": 0.39245464515686035} +{"epoch": 0, "iter": 4765, "iter_tflops": 25.639066613370076, "iter_time": 0.8046741256713867, "loss": 0.14151456952095032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.997702929435246, "step_time": 0.7641795883178711} +{"epoch": 0, "iter": 4766, "iter_tflops": 15.842475978858728, "iter_time": 1.3022644653320312, "loss": 0.20824038982391357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.978431685982645, "step_time": 1.1475468978881835} +{"epoch": 0, "iter": 4767, "iter_tflops": 39.8523781308583, "iter_time": 0.5176878890991211, "loss": 0.17215771973133087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.55913648420133, "step_time": 0.40805866050720213} +{"epoch": 0, "iter": 4768, "iter_tflops": 55.369666942159554, "iter_time": 0.37260642242431635, "loss": 0.1676168590784073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.19081826060213, "step_time": 0.3427614727020264} +{"epoch": 0, "iter": 4769, "iter_tflops": 39.80276691098419, "iter_time": 0.5183331489562988, "loss": 0.30023062229156494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.864519886212086, "step_time": 0.4813093338012695} +{"epoch": 0, "iter": 4770, "iter_tflops": 12.896584563682712, "iter_time": 1.599733123779297, "loss": 0.2537819445133209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.453058124813412, "step_time": 1.3350815963745117} +{"epoch": 0, "iter": 4771, "iter_tflops": 17.231051737998754, "iter_time": 1.1973206176757811, "loss": 0.15481191873550415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.145788531552668, "step_time": 1.0240896492004394} +{"epoch": 0, "iter": 4772, "iter_tflops": 15.311094454009453, "iter_time": 1.347460403442383, "loss": 0.2756233811378479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.19869612353166, "step_time": 1.1336577835083008} +{"epoch": 0, "iter": 4773, "iter_tflops": 14.43140157329976, "iter_time": 1.1464869537353515, "loss": 0.30497121810913086, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 15.283933159338476, "step_time": 1.0825363769531249} +{"epoch": 0, "iter": 4774, "iter_tflops": 15.202697366648577, "iter_time": 1.0883209228515627, "loss": 0.2476663440465927, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 18.210160562249953, "step_time": 0.9085814247131347} +{"epoch": 0, "iter": 4775, "iter_tflops": 25.93498108204393, "iter_time": 0.6379574203491211, "loss": 0.3140609860420227, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.915170577086766, "step_time": 0.592703296661377} +{"epoch": 0, "iter": 4776, "iter_tflops": 26.244806140553276, "iter_time": 0.6304262084960938, "loss": 0.26013055443763733, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 28.2071356912746, "step_time": 0.5865683708190919} +{"epoch": 0, "iter": 4777, "iter_tflops": 36.53952134930149, "iter_time": 0.5646240768432618, "loss": 0.1270037740468979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.62255243343777, "step_time": 0.5078729000091553} +{"epoch": 0, "iter": 4778, "iter_tflops": 40.34570884315962, "iter_time": 0.5113578147888184, "loss": 0.1597977727651596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.04849756853334, "step_time": 0.4579751739501953} +{"epoch": 0, "iter": 4779, "iter_tflops": 41.95633693243113, "iter_time": 0.49172771072387694, "loss": 0.1343545913696289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6586248741011, "step_time": 0.45185534095764157} +{"epoch": 0, "iter": 4780, "iter_tflops": 40.386680196529646, "iter_time": 0.510839054107666, "loss": 0.1269231140613556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13078303266227, "step_time": 0.4674989223480225} +{"epoch": 0, "iter": 4781, "iter_tflops": 21.495743695545503, "iter_time": 0.959775749206543, "loss": 0.409729540348053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.798316751354932, "step_time": 0.9049393310546875} +{"epoch": 0, "iter": 4782, "iter_tflops": 20.586256300900146, "iter_time": 1.0021780166625975, "loss": 0.35889771580696106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.99722900680537, "step_time": 0.8253352203369141} +{"epoch": 0, "iter": 4783, "iter_tflops": 43.50766686019678, "iter_time": 0.4741944351196289, "loss": 0.3298644721508026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47298522456418, "step_time": 0.43458597373962404} +{"epoch": 0, "iter": 4784, "iter_tflops": 43.18032449022316, "iter_time": 0.4777892189025879, "loss": 0.36128681898117065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77848444453622, "step_time": 0.44103809165954594} +{"epoch": 0, "iter": 4785, "iter_tflops": 32.80254818824709, "iter_time": 0.6289478912353516, "loss": 0.9065696001052856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83243265872815, "step_time": 0.5757659187316895} +{"epoch": 0, "iter": 4786, "iter_tflops": 36.40714413487856, "iter_time": 0.5666770629882812, "loss": 0.9816703796386719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.691811443146236, "step_time": 0.5197821102142335} +{"epoch": 0, "iter": 4787, "iter_tflops": 40.2931117374262, "iter_time": 0.5120253219604493, "loss": 1.1687740087509155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.641905137887946, "step_time": 0.47273585891723635} +{"epoch": 0, "iter": 4788, "iter_tflops": 41.39501920027733, "iter_time": 0.4983955535888672, "loss": 0.8534728288650513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9163040861571, "step_time": 0.4593230438232422} +{"epoch": 0, "iter": 4789, "iter_tflops": 18.46882161327434, "iter_time": 1.1170768737792969, "loss": 0.726381778717041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.5878346356844, "step_time": 1.0532605514526368} +{"epoch": 0, "iter": 4790, "iter_tflops": 16.699624186400218, "iter_time": 1.2354226226806642, "loss": 0.5951389074325562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.42336984033682, "step_time": 1.0101708812713623} +{"epoch": 0, "iter": 4791, "iter_tflops": 44.699740724676076, "iter_time": 0.46154839324951175, "loss": 0.6007327437400818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32471431251411, "step_time": 0.42692634201049806} +{"epoch": 0, "iter": 4792, "iter_tflops": 42.61142651416835, "iter_time": 0.48416810226440427, "loss": 0.5404286980628967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.865466799249134, "step_time": 0.4498175849914551} +{"epoch": 0, "iter": 4793, "iter_tflops": 32.80730879603015, "iter_time": 0.5394002075195312, "loss": 0.0035398828331381083, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 35.22415777716949, "step_time": 0.5023901290893555} +{"epoch": 0, "iter": 4794, "iter_tflops": 19.00275590449414, "iter_time": 0.9312475128173828, "loss": 0.00601548096165061, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 24.258212873327444, "step_time": 0.7294959964752197} +{"epoch": 0, "iter": 4795, "iter_tflops": 49.0693715179615, "iter_time": 0.36063777923583984, "loss": 0.0083695650100708, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 54.62343191096093, "step_time": 0.32396846103668214} +{"epoch": 0, "iter": 4796, "iter_tflops": 51.38202724693096, "iter_time": 0.34440581893920896, "loss": 0.0011230381205677986, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 56.43322447415855, "step_time": 0.3135789127349854} +{"epoch": 0, "iter": 4797, "iter_tflops": 31.67981608698499, "iter_time": 0.6512377929687501, "loss": 0.8386980891227722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.60323120546954, "step_time": 0.6139615974426269} +{"epoch": 0, "iter": 4798, "iter_tflops": 19.95531871210083, "iter_time": 1.0338643951416016, "loss": 1.1712419986724854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.343214250448302, "step_time": 0.8475090141296385} +{"epoch": 0, "iter": 4799, "iter_tflops": 39.467404742077704, "iter_time": 0.5227375259399414, "loss": 0.7804021239280701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.50313410487652, "step_time": 0.48540169906616215} +{"epoch": 0, "iter": 4800, "iter_tflops": 39.97543500760945, "iter_time": 0.5160942840576173, "loss": 1.146344542503357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.390870697190806, "step_time": 0.4754708347320557} +{"epoch": 0, "iter": 4801, "iter_tflops": 21.112550904194958, "iter_time": 0.9771956787109375, "loss": 0.35146424174308777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.668575067846405, "step_time": 0.9101186752319336} +{"epoch": 0, "iter": 4802, "iter_tflops": 21.144316989083293, "iter_time": 0.9757275924682618, "loss": 0.3819761276245117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.872680388149483, "step_time": 0.8294680423736572} +{"epoch": 0, "iter": 4803, "iter_tflops": 44.28803837034587, "iter_time": 0.4658389549255371, "loss": 0.35928967595100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.09224469915992, "step_time": 0.4289900302886963} +{"epoch": 0, "iter": 4804, "iter_tflops": 46.32560672574752, "iter_time": 0.44534966659545894, "loss": 0.30339062213897705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.960569466845136, "step_time": 0.41294752502441406} +{"epoch": 0, "iter": 4805, "iter_tflops": 36.56508999239352, "iter_time": 0.5642292556762696, "loss": 0.3118135333061218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35155654448408, "step_time": 0.5242764282226563} +{"epoch": 0, "iter": 4806, "iter_tflops": 19.978024344022277, "iter_time": 1.0326893768310548, "loss": 0.3708828091621399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.4231479923663, "step_time": 0.9200801563262939} +{"epoch": 0, "iter": 4807, "iter_tflops": 48.70518915792806, "iter_time": 0.42359128189086914, "loss": 0.3286757171154022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70545784661266, "step_time": 0.3914413108825684} +{"epoch": 0, "iter": 4808, "iter_tflops": 51.1397357621113, "iter_time": 0.4034258918762207, "loss": 0.2702395021915436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.5327584143191, "step_time": 0.3715121326446534} +{"epoch": 0, "iter": 4809, "iter_tflops": 24.487029450676562, "iter_time": 0.8425314941406249, "loss": 0.10171268880367279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.75107706617098, "step_time": 0.8011740036010743} +{"epoch": 0, "iter": 4810, "iter_tflops": 16.85934437584014, "iter_time": 1.2237186126708983, "loss": 0.09463273733854294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.45181690628582, "step_time": 0.9189052982330324} +{"epoch": 0, "iter": 4811, "iter_tflops": 51.82590031099935, "iter_time": 0.39808461380004884, "loss": 0.10023009777069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.52212080586232, "step_time": 0.3650091896057129} +{"epoch": 0, "iter": 4812, "iter_tflops": 49.758580869953, "iter_time": 0.41462383270263664, "loss": 0.07619711756706238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.19474747857716, "step_time": 0.38068437385559084} +{"epoch": 0, "iter": 4813, "iter_tflops": 48.87894809569278, "iter_time": 0.42208546447753903, "loss": 0.11156123131513596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.00520267975896, "step_time": 0.3820204811096191} +{"epoch": 0, "iter": 4814, "iter_tflops": 50.28386100046173, "iter_time": 0.41029254913330077, "loss": 0.0934901311993599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.786684658125274, "step_time": 0.3765713081359863} +{"epoch": 0, "iter": 4815, "iter_tflops": 49.57793418142858, "iter_time": 0.416134593963623, "loss": 0.09585382044315338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.92081281177438, "step_time": 0.3826183700561523} +{"epoch": 0, "iter": 4816, "iter_tflops": 53.535410691473004, "iter_time": 0.38537284469604494, "loss": 0.11572984606027603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.30033648114065, "step_time": 0.3538760623931885} +{"epoch": 0, "iter": 4817, "iter_tflops": 40.96682282879182, "iter_time": 0.5036049194335936, "loss": 0.3480137586593628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53138129624423, "step_time": 0.4632933654785156} +{"epoch": 0, "iter": 4818, "iter_tflops": 37.30583327127457, "iter_time": 0.5530259399414063, "loss": 0.44709134101867676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09841828242477, "step_time": 0.4678420295715332} +{"epoch": 0, "iter": 4819, "iter_tflops": 49.0347380770068, "iter_time": 0.42074444198608396, "loss": 0.3107786774635315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.0888746273359, "step_time": 0.38861425590515136} +{"epoch": 0, "iter": 4820, "iter_tflops": 46.33375779569795, "iter_time": 0.4452713203430176, "loss": 0.28464123606681824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.648451455547324, "step_time": 0.4155435447692871} +{"epoch": 0, "iter": 4821, "iter_tflops": 39.45562499304709, "iter_time": 0.5228935928344727, "loss": 0.04203697293996811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70003593227065, "step_time": 0.4831633758544922} +{"epoch": 0, "iter": 4822, "iter_tflops": 40.54674114626326, "iter_time": 0.5088224830627441, "loss": 0.040223848074674606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.76528857821376, "step_time": 0.46087256813049315} +{"epoch": 0, "iter": 4823, "iter_tflops": 46.04629294070948, "iter_time": 0.4480511283874512, "loss": 0.04498973488807678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.61865509215235, "step_time": 0.4075788555145264} +{"epoch": 0, "iter": 4824, "iter_tflops": 47.70285407141304, "iter_time": 0.43249180603027343, "loss": 0.04987665265798569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60721189343844, "step_time": 0.3921723423004151} +{"epoch": 0, "iter": 4825, "iter_tflops": 20.18339666245021, "iter_time": 1.0221814422607423, "loss": 0.010273083113133907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.721368424717312, "step_time": 0.9498063430786132} +{"epoch": 0, "iter": 4826, "iter_tflops": 30.91258929529211, "iter_time": 0.6674010162353516, "loss": 0.0030092340894043446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9098656415015, "step_time": 0.4493825721740723} +{"epoch": 0, "iter": 4827, "iter_tflops": 59.31247471168012, "iter_time": 0.3478373413085937, "loss": 0.004310401622205973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.29665363933039, "step_time": 0.3159594306945801} +{"epoch": 0, "iter": 4828, "iter_tflops": 55.449915487140345, "iter_time": 0.3720671768188477, "loss": 0.015913886949419975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.94220320127342, "step_time": 0.33853540611267086} +{"epoch": 0, "iter": 4829, "iter_tflops": 15.808349976922331, "iter_time": 0.447370735168457, "loss": 0.002372510964050889, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 17.374146679106843, "step_time": 0.4070526905059815} +{"epoch": 0, "iter": 4830, "iter_tflops": 3.591021322228079, "iter_time": 1.9694099578857422, "loss": 0.006482122931629419, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 4.420799095251136, "step_time": 1.5997544784545898} +{"epoch": 0, "iter": 4831, "iter_tflops": 5.43881218834525, "iter_time": 1.3003194274902343, "loss": 0.004577848128974438, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 6.995401260659325, "step_time": 1.010977481842041} +{"epoch": 0, "iter": 4832, "iter_tflops": 15.139476045522747, "iter_time": 0.46713592529296877, "loss": 0.004924003966152668, "lr": 3e-05, "seqlen": 2864.0, "step_tflops": 16.77584724727312, "step_time": 0.42156995391845703} +{"epoch": 0, "iter": 4833, "iter_tflops": 13.135985343426299, "iter_time": 1.1038011474609375, "loss": 0.23735490441322327, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 13.948580500238524, "step_time": 1.0394975814819336} +{"epoch": 0, "iter": 4834, "iter_tflops": 17.48424436492275, "iter_time": 0.8292903823852539, "loss": 0.27114659547805786, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 19.298081534125092, "step_time": 0.751344928741455} +{"epoch": 0, "iter": 4835, "iter_tflops": 21.95486607635492, "iter_time": 0.6604237823486329, "loss": 0.2186698168516159, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.697952751330238, "step_time": 0.6118467636108399} +{"epoch": 0, "iter": 4836, "iter_tflops": 23.494556738022016, "iter_time": 0.6171436157226563, "loss": 0.46886447072029114, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.15293863005438, "step_time": 0.5764541435241699} +{"epoch": 0, "iter": 4837, "iter_tflops": 18.935378316541673, "iter_time": 1.0895527496337891, "loss": 0.9400332570075989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.322396414046988, "step_time": 1.0151899948120118} +{"epoch": 0, "iter": 4838, "iter_tflops": 20.511169550400957, "iter_time": 1.00584676361084, "loss": 0.7795407772064209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9372486052388, "step_time": 0.8994580764770509} +{"epoch": 0, "iter": 4839, "iter_tflops": 36.48176191715081, "iter_time": 0.5655180130004882, "loss": 0.9646852612495422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71332245089524, "step_time": 0.519500566482544} +{"epoch": 0, "iter": 4840, "iter_tflops": 40.530645238508626, "iter_time": 0.5090245513916016, "loss": 0.8123497366905212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10727196215426, "step_time": 0.4677481193542481} +{"epoch": 0, "iter": 4841, "iter_tflops": 21.927451175597945, "iter_time": 0.9408796920776368, "loss": 0.10874054580926895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.578903684048495, "step_time": 0.8749810333251954} +{"epoch": 0, "iter": 4842, "iter_tflops": 15.680640834367082, "iter_time": 1.3157047424316406, "loss": 0.0941992849111557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.62146415017384, "step_time": 1.0514553527832031} +{"epoch": 0, "iter": 4843, "iter_tflops": 49.91056287250023, "iter_time": 0.4133612670898438, "loss": 0.0618487149477005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.52219823368213, "step_time": 0.3783980503082276} +{"epoch": 0, "iter": 4844, "iter_tflops": 53.55654742035827, "iter_time": 0.3852207527160644, "loss": 0.08053203672170639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.51140527463853, "step_time": 0.35259952163696284} +{"epoch": 0, "iter": 4845, "iter_tflops": 35.83653667940002, "iter_time": 0.5756999816894531, "loss": 0.027933787554502487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61844740332738, "step_time": 0.5342289733886719} +{"epoch": 0, "iter": 4846, "iter_tflops": 11.567658603747116, "iter_time": 1.7835150756835936, "loss": 0.03500020503997803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.992229785159545, "step_time": 1.376119083404541} +{"epoch": 0, "iter": 4847, "iter_tflops": 13.462319990552214, "iter_time": 1.5325065460205078, "loss": 0.04153568297624588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.844051658031825, "step_time": 1.3898559494018554} +{"epoch": 0, "iter": 4848, "iter_tflops": 16.372155623328617, "iter_time": 1.2601329956054688, "loss": 0.08470365405082703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.048797782094255, "step_time": 0.7920171089172363} +{"epoch": 0, "iter": 4849, "iter_tflops": 15.14130117307684, "iter_time": 1.022374122619629, "loss": 0.2061467468738556, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 15.945117894642065, "step_time": 0.9708347473144531} +{"epoch": 0, "iter": 4850, "iter_tflops": 7.260426874492649, "iter_time": 2.132116302490234, "loss": 0.2349301129579544, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 8.971198799838401, "step_time": 1.7255302047729493} +{"epoch": 0, "iter": 4851, "iter_tflops": 8.663698623117126, "iter_time": 1.7867743530273439, "loss": 0.22711330652236938, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.385922049060849, "step_time": 1.490486297607422} +{"epoch": 0, "iter": 4852, "iter_tflops": 26.922440967001826, "iter_time": 0.574987777709961, "loss": 0.29103317856788635, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.672122914500775, "step_time": 0.5398998374938965} +{"epoch": 0, "iter": 4853, "iter_tflops": 21.608092372791393, "iter_time": 0.7126153106689453, "loss": 0.1802685260772705, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.074519929651277, "step_time": 0.6673273162841797} +{"epoch": 0, "iter": 4854, "iter_tflops": 14.319048944920421, "iter_time": 1.0753687286376954, "loss": 0.21536920964717865, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 17.198776408829456, "step_time": 0.8953112182617187} +{"epoch": 0, "iter": 4855, "iter_tflops": 23.79785895120077, "iter_time": 0.647043815612793, "loss": 0.34728169441223145, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.651107009128687, "step_time": 0.6002960205078125} +{"epoch": 0, "iter": 4856, "iter_tflops": 24.565961051591856, "iter_time": 0.626812744140625, "loss": 0.19101577997207642, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.25569458108857, "step_time": 0.5864730567932128} +{"epoch": 0, "iter": 4857, "iter_tflops": 16.57691193197153, "iter_time": 1.2445679626464845, "loss": 0.7430329322814941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.538846320373967, "step_time": 1.1763084716796874} +{"epoch": 0, "iter": 4858, "iter_tflops": 22.730488748459734, "iter_time": 0.9076396789550781, "loss": 0.940709114074707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.939243651640005, "step_time": 0.6668260459899902} +{"epoch": 0, "iter": 4859, "iter_tflops": 35.065710160680034, "iter_time": 0.5883552169799805, "loss": 0.9603305459022522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.95685453809286, "step_time": 0.5435406532287598} +{"epoch": 0, "iter": 4860, "iter_tflops": 39.034358881124916, "iter_time": 0.5285367584228515, "loss": 0.8276103734970093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45407400588167, "step_time": 0.4859626312255859} +{"epoch": 0, "iter": 4861, "iter_tflops": 35.19160141022581, "iter_time": 0.58625048828125, "loss": 0.04391956701874733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27630192763387, "step_time": 0.5252809581756592} +{"epoch": 0, "iter": 4862, "iter_tflops": 41.61127287502374, "iter_time": 0.49580539321899414, "loss": 0.029129326343536377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.044788351461065, "step_time": 0.44806576919555663} +{"epoch": 0, "iter": 4863, "iter_tflops": 42.44157762631415, "iter_time": 0.4861057167053222, "loss": 0.03626423329114914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5044246507259, "step_time": 0.4436372165679932} +{"epoch": 0, "iter": 4864, "iter_tflops": 45.065227815299394, "iter_time": 0.4578051528930664, "loss": 0.041247230023145676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63272085478673, "step_time": 0.4156752471923828} +{"epoch": 0, "iter": 4865, "iter_tflops": 23.832688205706997, "iter_time": 0.8656637191772462, "loss": 1.155138611793518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.493144745678574, "step_time": 0.8092800521850586} +{"epoch": 0, "iter": 4866, "iter_tflops": 14.928248408002336, "iter_time": 1.3820170288085938, "loss": 1.0794752836227417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.69415060671312, "step_time": 0.9090930023193359} +{"epoch": 0, "iter": 4867, "iter_tflops": 38.399713276647326, "iter_time": 0.5372720718383788, "loss": 1.0190941095352173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.700829336896724, "step_time": 0.49474060440063483} +{"epoch": 0, "iter": 4868, "iter_tflops": 35.998809134513145, "iter_time": 0.5731048889160157, "loss": 0.8093083500862122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97058339735835, "step_time": 0.5294017105102539} +{"epoch": 0, "iter": 4869, "iter_tflops": 26.697525726518535, "iter_time": 0.772771743774414, "loss": 0.01521703414618969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.399881632863575, "step_time": 0.7017406997680664} +{"epoch": 0, "iter": 4870, "iter_tflops": 27.665472618565587, "iter_time": 0.7457343597412108, "loss": 0.026336848735809326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.30886159953423, "step_time": 0.658953805923462} +{"epoch": 0, "iter": 4871, "iter_tflops": 42.915037054564635, "iter_time": 0.4807427635192871, "loss": 0.033723700791597366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32042698029636, "step_time": 0.4269642219543457} +{"epoch": 0, "iter": 4872, "iter_tflops": 42.454548566198966, "iter_time": 0.4859571990966797, "loss": 0.060083430260419846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.84571504421784, "step_time": 0.4404051361083985} +{"epoch": 0, "iter": 4873, "iter_tflops": 18.9283490674513, "iter_time": 1.0899573669433593, "loss": 0.45728248357772827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.26828727119368, "step_time": 1.0179001922607425} +{"epoch": 0, "iter": 4874, "iter_tflops": 13.30293238339963, "iter_time": 1.550868103027344, "loss": 0.4615599513053894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.443789743529535, "step_time": 1.1185929679870605} +{"epoch": 0, "iter": 4875, "iter_tflops": 40.798038673374755, "iter_time": 0.5056883659362793, "loss": 0.34792792797088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52613199707883, "step_time": 0.46334798431396484} +{"epoch": 0, "iter": 4876, "iter_tflops": 39.11489812434957, "iter_time": 0.5274484786987305, "loss": 0.3375006318092346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.991851694853814, "step_time": 0.47988380813598636} +{"epoch": 0, "iter": 4877, "iter_tflops": 18.29572327221781, "iter_time": 1.1276456909179686, "loss": 0.7401468753814697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.214234847392472, "step_time": 1.0737400512695314} +{"epoch": 0, "iter": 4878, "iter_tflops": 19.51491403695246, "iter_time": 1.057196228027344, "loss": 0.9722169637680054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.537007290794218, "step_time": 0.8765385189056397} +{"epoch": 0, "iter": 4879, "iter_tflops": 36.70815044106783, "iter_time": 0.5620303192138671, "loss": 0.7854709625244141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.050811167194816, "step_time": 0.5151229877471923} +{"epoch": 0, "iter": 4880, "iter_tflops": 35.3017917004979, "iter_time": 0.5844205780029297, "loss": 0.8203432559967041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.391738091025644, "step_time": 0.5373836803436279} +{"epoch": 0, "iter": 4881, "iter_tflops": 24.859458885112403, "iter_time": 0.8299091949462891, "loss": 0.0418001264333725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.298762108228484, "step_time": 0.7557519798278808} +{"epoch": 0, "iter": 4882, "iter_tflops": 39.15634622125995, "iter_time": 0.5268901596069336, "loss": 0.07696805149316788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.015614826580425, "step_time": 0.4796187057495117} +{"epoch": 0, "iter": 4883, "iter_tflops": 43.627176263730156, "iter_time": 0.4728954582214355, "loss": 0.08328264951705933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.09645784304226, "step_time": 0.4289524517059326} +{"epoch": 0, "iter": 4884, "iter_tflops": 44.65212923727031, "iter_time": 0.46204053115844734, "loss": 0.03710108995437622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.10982743960264, "step_time": 0.42010111999511723} +{"epoch": 0, "iter": 4885, "iter_tflops": 27.891053259405844, "iter_time": 0.7397029190063477, "loss": 0.7309927940368652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.536246204652542, "step_time": 0.6756263809204102} +{"epoch": 0, "iter": 4886, "iter_tflops": 34.97240237435221, "iter_time": 0.5899249725341797, "loss": 0.7965408563613892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.95404060053264, "step_time": 0.5435809516906739} +{"epoch": 0, "iter": 4887, "iter_tflops": 38.27423517182772, "iter_time": 0.5390334625244141, "loss": 0.8570602536201477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59570589781684, "step_time": 0.49599094581604} +{"epoch": 0, "iter": 4888, "iter_tflops": 36.54546638686874, "iter_time": 0.5645322265624999, "loss": 0.9981293678283691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.801846252218084, "step_time": 0.5183451385498047} +{"epoch": 0, "iter": 4889, "iter_tflops": 21.31607589690183, "iter_time": 0.9678654556274413, "loss": 0.780591607093811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.674557379133258, "step_time": 0.9098785552978516} +{"epoch": 0, "iter": 4890, "iter_tflops": 13.73546460708822, "iter_time": 1.502030990600586, "loss": 0.8049297332763672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.789316919738269, "step_time": 1.3066488952636717} +{"epoch": 0, "iter": 4891, "iter_tflops": 19.64486813285495, "iter_time": 1.050202697753906, "loss": 0.8416869640350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.059504797313803, "step_time": 0.8575028324127199} +{"epoch": 0, "iter": 4892, "iter_tflops": 40.41314499035936, "iter_time": 0.5105045280456544, "loss": 0.9129824638366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34035770111351, "step_time": 0.4760249938964844} +{"epoch": 0, "iter": 4893, "iter_tflops": 22.236500354780706, "iter_time": 0.6502245330810547, "loss": 0.1452336460351944, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 23.60436278418408, "step_time": 0.6125443077087402} +{"epoch": 0, "iter": 4894, "iter_tflops": 10.871657750056825, "iter_time": 1.3299460296630858, "loss": 0.28234246373176575, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 13.003700353959001, "step_time": 1.1118925895690919} +{"epoch": 0, "iter": 4895, "iter_tflops": 21.885605399269945, "iter_time": 0.6606496734619139, "loss": 0.23268502950668335, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 23.571432321308745, "step_time": 0.6134000625610352} +{"epoch": 0, "iter": 4896, "iter_tflops": 23.91562671870134, "iter_time": 0.6045719909667968, "loss": 0.23482294380664825, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 25.65054614815032, "step_time": 0.5636807098388671} +{"epoch": 0, "iter": 4897, "iter_tflops": 14.47741409311246, "iter_time": 1.015621612548828, "loss": 0.00932220183312893, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 15.786581378788078, "step_time": 0.9313970069885253} +{"epoch": 0, "iter": 4898, "iter_tflops": 14.705766958539146, "iter_time": 0.9998509216308593, "loss": 0.007823855616152287, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 17.78574132518715, "step_time": 0.8267057514190674} +{"epoch": 0, "iter": 4899, "iter_tflops": 38.023865760568945, "iter_time": 0.3866933135986328, "loss": 0.04970640689134598, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 41.9015066281084, "step_time": 0.35090801811218264} +{"epoch": 0, "iter": 4900, "iter_tflops": 38.70438978710569, "iter_time": 0.3798942375183106, "loss": 0.011517458595335484, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 42.57216432367906, "step_time": 0.34538001251220696} +{"epoch": 0, "iter": 4901, "iter_tflops": 31.756631092577216, "iter_time": 0.6496625366210937, "loss": 0.6822303533554077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.21087173214459, "step_time": 0.6030566444396972} +{"epoch": 0, "iter": 4902, "iter_tflops": 37.86452320527225, "iter_time": 0.5448660583496094, "loss": 0.6462098360061646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86583835220955, "step_time": 0.49279064559936525} +{"epoch": 0, "iter": 4903, "iter_tflops": 38.11810040405769, "iter_time": 0.5412413864135742, "loss": 0.7518054246902466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3636267136552, "step_time": 0.49877380561828616} +{"epoch": 0, "iter": 4904, "iter_tflops": 37.57544298432884, "iter_time": 0.5490578918457032, "loss": 0.7268790006637573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.975873450374415, "step_time": 0.5034936847686768} +{"epoch": 0, "iter": 4905, "iter_tflops": 2.4305099529928773, "iter_time": 0.5802053451538085, "loss": 1.6554080247879028, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.6825654810133868, "step_time": 0.5256888885498047} +{"epoch": 0, "iter": 4906, "iter_tflops": 2.649362690136908, "iter_time": 0.5322770156860351, "loss": 1.3629554510116577, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.913773641067716, "step_time": 0.483975435256958} +{"epoch": 0, "iter": 4907, "iter_tflops": 2.9691947730999395, "iter_time": 0.4749418525695801, "loss": 1.320055365562439, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.2483937646477155, "step_time": 0.43412066650390624} +{"epoch": 0, "iter": 4908, "iter_tflops": 3.0649099261219703, "iter_time": 0.46010972595214844, "loss": 1.4650769233703613, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.3416728354812903, "step_time": 0.4220026721954346} +{"epoch": 0, "iter": 4909, "iter_tflops": 19.033000429447373, "iter_time": 0.9774133148193359, "loss": 0.0071860793977975845, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 20.442647485406667, "step_time": 0.9100146179199219} +{"epoch": 0, "iter": 4910, "iter_tflops": 34.746903463667785, "iter_time": 0.5353889465332031, "loss": 0.003528052242472768, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 39.1892025259903, "step_time": 0.474699836730957} +{"epoch": 0, "iter": 4911, "iter_tflops": 47.44769396141223, "iter_time": 0.39207612609863285, "loss": 0.020522015169262886, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 52.222601619921406, "step_time": 0.3562271404266358} +{"epoch": 0, "iter": 4912, "iter_tflops": 53.66312206492213, "iter_time": 0.3466646614074707, "loss": 0.015691407024860382, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 58.732001020851094, "step_time": 0.3167456874847412} +{"epoch": 0, "iter": 4913, "iter_tflops": 25.290073764324553, "iter_time": 0.815778305053711, "loss": 0.08574934303760529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.510197401557022, "step_time": 0.7782323608398436} +{"epoch": 0, "iter": 4914, "iter_tflops": 16.996517116201698, "iter_time": 1.2138424224853515, "loss": 0.05776580423116684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.200706132077304, "step_time": 1.0213055610656738} +{"epoch": 0, "iter": 4915, "iter_tflops": 33.37998095306864, "iter_time": 0.6180678634643554, "loss": 0.1973346620798111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.273254831966014, "step_time": 0.48804128265380864} +{"epoch": 0, "iter": 4916, "iter_tflops": 41.53402364330842, "iter_time": 0.4967275428771973, "loss": 0.09905309975147247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.894292944021785, "step_time": 0.44953505516052245} +{"epoch": 0, "iter": 4917, "iter_tflops": 31.647053825303267, "iter_time": 0.651911979675293, "loss": 0.7935012578964233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.99884127903817, "step_time": 0.5894793300628662} +{"epoch": 0, "iter": 4918, "iter_tflops": 38.1825210642696, "iter_time": 0.5403282165527344, "loss": 0.6781272888183594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71480065938677, "step_time": 0.49457490348815913} +{"epoch": 0, "iter": 4919, "iter_tflops": 43.10566763085117, "iter_time": 0.47861672592163085, "loss": 0.0652303621172905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33950692033515, "step_time": 0.43581133079528805} +{"epoch": 0, "iter": 4920, "iter_tflops": 45.14528873971109, "iter_time": 0.456993278503418, "loss": 0.029837142676115036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.68870563835032, "step_time": 0.41520690155029294} +{"epoch": 0, "iter": 4921, "iter_tflops": 23.802121483947683, "iter_time": 0.8667754058837891, "loss": 0.7471415996551514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.430163624957114, "step_time": 0.8112843399047851} +{"epoch": 0, "iter": 4922, "iter_tflops": 23.896192271208935, "iter_time": 0.8633632202148438, "loss": 0.9786633253097534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.530629518880833, "step_time": 0.6986337184906005} +{"epoch": 0, "iter": 4923, "iter_tflops": 42.278733941947664, "iter_time": 0.4879780349731445, "loss": 0.8103876113891602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42245745600982, "step_time": 0.4542046966552735} +{"epoch": 0, "iter": 4924, "iter_tflops": 42.970271712726706, "iter_time": 0.4801248092651367, "loss": 0.9162518382072449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.11491884006018, "step_time": 0.4473843612670898} +{"epoch": 0, "iter": 4925, "iter_tflops": 31.867445247648323, "iter_time": 0.6474034347534179, "loss": 0.7249453663825989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.95332141225997, "step_time": 0.6076310844421386} +{"epoch": 0, "iter": 4926, "iter_tflops": 17.030610502750623, "iter_time": 1.2114124450683592, "loss": 0.7030802965164185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.848160876753692, "step_time": 0.9895881767272949} +{"epoch": 0, "iter": 4927, "iter_tflops": 36.72229022833875, "iter_time": 0.5618139114379883, "loss": 0.7193383574485779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37244769405721, "step_time": 0.5110191402435302} +{"epoch": 0, "iter": 4928, "iter_tflops": 39.08062287969128, "iter_time": 0.5279110717773438, "loss": 0.6446243524551392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52863086693617, "step_time": 0.4851106910705566} +{"epoch": 0, "iter": 4929, "iter_tflops": 20.841700554305028, "iter_time": 0.9898949203491211, "loss": 0.5751675963401794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.442971709983667, "step_time": 0.9192674560546876} +{"epoch": 0, "iter": 4930, "iter_tflops": 27.024536294931394, "iter_time": 0.7634208145141601, "loss": 0.4899095296859741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61582511208179, "step_time": 0.6738702430725099} +{"epoch": 0, "iter": 4931, "iter_tflops": 36.3502751148602, "iter_time": 0.5675636138916016, "loss": 0.3988422155380249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.69930522528978, "step_time": 0.5196839942932129} +{"epoch": 0, "iter": 4932, "iter_tflops": 40.402362227555955, "iter_time": 0.5106407737731933, "loss": 0.4385819137096405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00345878070171, "step_time": 0.46885163307189937} +{"epoch": 0, "iter": 4933, "iter_tflops": 11.7911836796426, "iter_time": 0.878831398010254, "loss": 0.03971550241112709, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 12.557677015560731, "step_time": 0.8251894378662109} +{"epoch": 0, "iter": 4934, "iter_tflops": 4.517460332605735, "iter_time": 2.2938690490722657, "loss": 0.04906259849667549, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 5.406329233737302, "step_time": 1.9167279663085939} +{"epoch": 0, "iter": 4935, "iter_tflops": 6.816461630062042, "iter_time": 1.5202113647460935, "loss": 0.05297699570655823, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 8.588739398687535, "step_time": 1.2065172729492188} +{"epoch": 0, "iter": 4936, "iter_tflops": 22.83191837809838, "iter_time": 0.45385859680175783, "loss": 0.059487901628017426, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 25.140613484592773, "step_time": 0.4121801738739013} +{"epoch": 0, "iter": 4937, "iter_tflops": 22.298993399126225, "iter_time": 0.6997103042602539, "loss": 0.30625030398368835, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 23.690689895558005, "step_time": 0.6586062088012696} +{"epoch": 0, "iter": 4938, "iter_tflops": 22.6564285580424, "iter_time": 0.6886714477539062, "loss": 0.35958439111709595, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.422949914750035, "step_time": 0.6388595771789551} +{"epoch": 0, "iter": 4939, "iter_tflops": 22.294129950815133, "iter_time": 0.6998629455566407, "loss": 0.2554667294025421, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.10887715753268, "step_time": 0.6471821708679198} +{"epoch": 0, "iter": 4940, "iter_tflops": 24.257417320812603, "iter_time": 0.6432191543579101, "loss": 0.31079113483428955, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.913483464474492, "step_time": 0.6021126213073731} +{"epoch": 0, "iter": 4941, "iter_tflops": 22.501354296188797, "iter_time": 0.9168823013305663, "loss": 0.8227752447128296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.129928547047236, "step_time": 0.8550001907348633} +{"epoch": 0, "iter": 4942, "iter_tflops": 22.520102872202234, "iter_time": 0.9161189727783202, "loss": 0.7524769902229309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.296833917213068, "step_time": 0.6809653301239014} +{"epoch": 0, "iter": 4943, "iter_tflops": 39.07510900017455, "iter_time": 0.5279855651855468, "loss": 0.6866917610168457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90290242500011, "step_time": 0.4808787364959716} +{"epoch": 0, "iter": 4944, "iter_tflops": 40.66286732106226, "iter_time": 0.5073693733215332, "loss": 0.7037769556045532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48742394153477, "step_time": 0.4637511386871338} +{"epoch": 0, "iter": 4945, "iter_tflops": 38.358405504607425, "iter_time": 0.5378506546020507, "loss": 0.05773530527949333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6420148138102, "step_time": 0.4838207950592041} +{"epoch": 0, "iter": 4946, "iter_tflops": 40.05100054253177, "iter_time": 0.5151205520629883, "loss": 0.09728526324033737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10014716437723, "step_time": 0.4678236885070801} +{"epoch": 0, "iter": 4947, "iter_tflops": 45.18338030836859, "iter_time": 0.4566080131530762, "loss": 0.06341668218374252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75400972003817, "step_time": 0.41466192626953124} +{"epoch": 0, "iter": 4948, "iter_tflops": 45.33917591849044, "iter_time": 0.455039005279541, "loss": 0.06718873232603073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45677872366569, "step_time": 0.4171540088653564} +{"epoch": 0, "iter": 4949, "iter_tflops": 18.704345818014904, "iter_time": 0.950507164001465, "loss": 0.15457653999328613, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 19.759308548378378, "step_time": 0.8997589492797852} +{"epoch": 0, "iter": 4950, "iter_tflops": 12.302652151037684, "iter_time": 1.4451042327880859, "loss": 0.17681358754634857, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 16.170609300505685, "step_time": 1.0994400005340577} +{"epoch": 0, "iter": 4951, "iter_tflops": 34.27971729195953, "iter_time": 0.5186336441040039, "loss": 0.21958957612514496, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 37.645480237054485, "step_time": 0.4722642555236816} +{"epoch": 0, "iter": 4952, "iter_tflops": 35.12221946393773, "iter_time": 0.5061928024291992, "loss": 0.22849754989147186, "lr": 3e-05, "seqlen": 7088.0, "step_tflops": 38.665493367765514, "step_time": 0.4598057117462158} +{"epoch": 0, "iter": 4953, "iter_tflops": 13.652944072036671, "iter_time": 1.0470690155029296, "loss": 0.004954131320118904, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 14.572382490215743, "step_time": 0.9810046310424805} +{"epoch": 0, "iter": 4954, "iter_tflops": 14.051987484742751, "iter_time": 1.017334716796875, "loss": 0.0309591181576252, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 17.0289795278523, "step_time": 0.839485107421875} +{"epoch": 0, "iter": 4955, "iter_tflops": 31.63615357596946, "iter_time": 0.45187461471557616, "loss": 0.008063607849180698, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 35.00991391597521, "step_time": 0.40832933044433595} +{"epoch": 0, "iter": 4956, "iter_tflops": 32.17558472034037, "iter_time": 0.44429883193969727, "loss": 0.01721012406051159, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 35.637782424975306, "step_time": 0.40113536071777345} +{"epoch": 0, "iter": 4957, "iter_tflops": 29.13350354059673, "iter_time": 0.708156967163086, "loss": 0.04265255853533745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.71601291398845, "step_time": 0.6504945487976075} +{"epoch": 0, "iter": 4958, "iter_tflops": 44.061383821700495, "iter_time": 0.46823526000976556, "loss": 0.03044111281633377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.302491472991115, "step_time": 0.42712276077270506} +{"epoch": 0, "iter": 4959, "iter_tflops": 54.055021375549316, "iter_time": 0.381668399810791, "loss": 0.0483829565346241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.01118612204682, "step_time": 0.3496132659912109} +{"epoch": 0, "iter": 4960, "iter_tflops": 56.10799808392462, "iter_time": 0.36770325469970705, "loss": 0.03282979503273964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.21434605450077, "step_time": 0.33703036689758303} +{"epoch": 0, "iter": 4961, "iter_tflops": 19.49383485663663, "iter_time": 1.0583394012451173, "loss": 0.8710837960243225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.209847432315765, "step_time": 1.020843605041504} +{"epoch": 0, "iter": 4962, "iter_tflops": 18.08991893771309, "iter_time": 1.1404746246337891, "loss": 1.0570789575576782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.587509600891373, "step_time": 0.9556958580017089} +{"epoch": 0, "iter": 4963, "iter_tflops": 34.65124002016158, "iter_time": 0.5953926467895507, "loss": 0.7994751334190369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.7366625289011, "step_time": 0.5467121925354004} +{"epoch": 0, "iter": 4964, "iter_tflops": 38.619029260575324, "iter_time": 0.5342209243774414, "loss": 0.7658935785293579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.87707378944798, "step_time": 0.49265843200683596} +{"epoch": 0, "iter": 4965, "iter_tflops": 22.239052583149135, "iter_time": 0.9276966018676758, "loss": 0.02478947676718235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.86106668496257, "step_time": 0.8646341667175292} +{"epoch": 0, "iter": 4966, "iter_tflops": 17.90154874635076, "iter_time": 1.152475341796875, "loss": 0.017049849033355713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.39241134432345, "step_time": 1.0117044601440428} +{"epoch": 0, "iter": 4967, "iter_tflops": 42.488740139983, "iter_time": 0.4855661392211914, "loss": 0.0451531708240509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00114101543387, "step_time": 0.43894878005981447} +{"epoch": 0, "iter": 4968, "iter_tflops": 49.06532526894046, "iter_time": 0.4204821510314941, "loss": 0.061151258647441864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17192824589105, "step_time": 0.3808447322845459} +{"epoch": 0, "iter": 4969, "iter_tflops": 28.603212969715354, "iter_time": 0.7154832229614257, "loss": 0.29705870151519775, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 30.696596457810323, "step_time": 0.6666901664733886} +{"epoch": 0, "iter": 4970, "iter_tflops": 16.76558713411857, "iter_time": 1.2206622314453126, "loss": 0.44827800989151, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 18.4814926753016, "step_time": 1.1073304176330567} +{"epoch": 0, "iter": 4971, "iter_tflops": 39.705156767661926, "iter_time": 0.515427230834961, "loss": 0.3705432116985321, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 43.632013065639356, "step_time": 0.4690390739440918} +{"epoch": 0, "iter": 4972, "iter_tflops": 40.321787001520164, "iter_time": 0.5075449409484863, "loss": 0.3072209358215332, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 44.18082141296633, "step_time": 0.46321273231506344} +{"epoch": 0, "iter": 4973, "iter_tflops": 29.374934583085185, "iter_time": 0.7023366622924806, "loss": 0.7414330244064331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.845787820724315, "step_time": 0.6478437156677246} +{"epoch": 0, "iter": 4974, "iter_tflops": 10.470766165360898, "iter_time": 1.970351852416992, "loss": 0.6254298090934753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.78842662303157, "step_time": 1.7501142578124997} +{"epoch": 0, "iter": 4975, "iter_tflops": 9.372505465720248, "iter_time": 2.2012356872558594, "loss": 0.6878995895385742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.886869958718218, "step_time": 1.8950436248779297} +{"epoch": 0, "iter": 4976, "iter_tflops": 29.56454471612876, "iter_time": 0.6978322753906249, "loss": 0.630195677280426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.287278644780706, "step_time": 0.4766086978912353} +{"epoch": 0, "iter": 4977, "iter_tflops": 20.956533737265016, "iter_time": 0.6938322067260742, "loss": 0.19498157501220703, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 22.364482992355025, "step_time": 0.6501522102355957} +{"epoch": 0, "iter": 4978, "iter_tflops": 7.655879686199489, "iter_time": 1.8992354431152345, "loss": 0.17302682995796204, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 8.253234359206699, "step_time": 1.7617721023559572} +{"epoch": 0, "iter": 4979, "iter_tflops": 8.066628827223255, "iter_time": 1.8025272216796873, "loss": 0.24039751291275024, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 10.038841497135929, "step_time": 1.448405979156494} +{"epoch": 0, "iter": 4980, "iter_tflops": 23.377504237995513, "iter_time": 0.6219790573120118, "loss": 0.18109938502311707, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.975781138130802, "step_time": 0.5821767082214355} +{"epoch": 0, "iter": 4981, "iter_tflops": 17.548268012542273, "iter_time": 0.8704886016845703, "loss": 0.39525264501571655, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 18.46466967744082, "step_time": 0.827286247253418} +{"epoch": 0, "iter": 4982, "iter_tflops": 12.482735419974382, "iter_time": 1.223735565185547, "loss": 0.14402428269386292, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.933145757782833, "step_time": 1.0963473396301269} +{"epoch": 0, "iter": 4983, "iter_tflops": 25.917843160735092, "iter_time": 0.5893842010498047, "loss": 0.35473519563674927, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.749570362096538, "step_time": 0.5504794158935546} +{"epoch": 0, "iter": 4984, "iter_tflops": 27.52745136550635, "iter_time": 0.5549212341308595, "loss": 0.2914287745952606, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.334996936106226, "step_time": 0.520728443145752} +{"epoch": 0, "iter": 4985, "iter_tflops": 32.407921615130356, "iter_time": 0.6366064987182617, "loss": 0.8886520266532898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.845545630010605, "step_time": 0.5920726203918457} +{"epoch": 0, "iter": 4986, "iter_tflops": 11.413568013704914, "iter_time": 1.8075936889648436, "loss": 0.9836282730102539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.703430295164477, "step_time": 1.505542266845703} +{"epoch": 0, "iter": 4987, "iter_tflops": 12.091134225550016, "iter_time": 1.706299270629883, "loss": 0.7252413034439087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.204698167544384, "step_time": 1.4524133682250975} +{"epoch": 0, "iter": 4988, "iter_tflops": 26.43766392600825, "iter_time": 0.7803674926757813, "loss": 0.8367114067077637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.25587546319855, "step_time": 0.5537675132751465} +{"epoch": 0, "iter": 4989, "iter_tflops": 14.651799743233548, "iter_time": 1.0230453338623047, "loss": 0.33919772505760193, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 15.663536174650867, "step_time": 0.9569649658203125} +{"epoch": 0, "iter": 4990, "iter_tflops": 10.202515464878706, "iter_time": 1.469192123413086, "loss": 0.17833629250526428, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.875674260754213, "step_time": 1.1641685752868653} +{"epoch": 0, "iter": 4991, "iter_tflops": 22.485660188262408, "iter_time": 0.6666228713989257, "loss": 0.29602640867233276, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.253563057660966, "step_time": 0.618031063079834} +{"epoch": 0, "iter": 4992, "iter_tflops": 25.115147237352822, "iter_time": 0.5968292846679687, "loss": 0.3106745481491089, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.889176305182602, "step_time": 0.557453125} +{"epoch": 0, "iter": 4993, "iter_tflops": 19.378682574305152, "iter_time": 1.0646282806396483, "loss": 0.11501166224479675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.90769747299219, "step_time": 0.986770233154297} +{"epoch": 0, "iter": 4994, "iter_tflops": 15.41629277544494, "iter_time": 1.3382655487060546, "loss": 0.2190457284450531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.977014548385494, "step_time": 1.1476373596191407} +{"epoch": 0, "iter": 4995, "iter_tflops": 49.68357176655284, "iter_time": 0.4152498054504394, "loss": 0.1318695992231369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.109557320833765, "step_time": 0.38128372383117676} +{"epoch": 0, "iter": 4996, "iter_tflops": 54.79331985352757, "iter_time": 0.376525707244873, "loss": 0.11351552605628967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.79816084370111, "step_time": 0.3450121746063232} +{"epoch": 0, "iter": 4997, "iter_tflops": 42.82781465253267, "iter_time": 0.4817218360900879, "loss": 0.9071624875068665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.48274603469442, "step_time": 0.4438441200256348} +{"epoch": 0, "iter": 4998, "iter_tflops": 46.31799125455958, "iter_time": 0.44542288970947264, "loss": 0.8554799556732178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20156492545259, "step_time": 0.41096514701843256} +{"epoch": 0, "iter": 4999, "iter_tflops": 44.986575914486, "iter_time": 0.45860555267333986, "loss": 0.7799596786499023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55378628987337, "step_time": 0.424912145614624} +{"epoch": 0, "iter": 5000, "iter_tflops": 42.19908250411195, "iter_time": 0.4888991012573243, "loss": 0.7945634722709656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.514839934662696, "step_time": 0.45328278732299804} +{"epoch": 0, "iter": 5001, "iter_tflops": 37.56504352964324, "iter_time": 0.5492098922729491, "loss": 0.9083054065704346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.64245409147496, "step_time": 0.5076242065429687} +{"epoch": 0, "iter": 5002, "iter_tflops": 33.99225354663456, "iter_time": 0.6069351501464844, "loss": 0.8122969269752502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.20163221334713, "step_time": 0.5545749549865724} +{"epoch": 0, "iter": 5003, "iter_tflops": 37.60536639031537, "iter_time": 0.5486209945678711, "loss": 0.7751649022102356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.62206836457542, "step_time": 0.5078789520263672} +{"epoch": 0, "iter": 5004, "iter_tflops": 35.711998793702506, "iter_time": 0.5777076110839844, "loss": 0.8712170720100403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.629331588167055, "step_time": 0.5340784492492676} +{"epoch": 0, "iter": 5005, "iter_tflops": 20.779285428182423, "iter_time": 0.9928682861328125, "loss": 0.2986599802970886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.061930704128635, "step_time": 0.935144515991211} +{"epoch": 0, "iter": 5006, "iter_tflops": 12.565323013850783, "iter_time": 1.6419071350097654, "loss": 0.2305312156677246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.423993202807782, "step_time": 1.3375974197387694} +{"epoch": 0, "iter": 5007, "iter_tflops": 9.366491725930018, "iter_time": 2.2026489868164063, "loss": 0.2072431892156601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.360959834871062, "step_time": 1.8159639511108399} +{"epoch": 0, "iter": 5008, "iter_tflops": 23.172648375890155, "iter_time": 0.890320915222168, "loss": 0.3128686547279358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.769753955745877, "step_time": 0.6493941860198974} +{"epoch": 0, "iter": 5009, "iter_tflops": 17.8716873647967, "iter_time": 0.7839357986450195, "loss": 0.3353273570537567, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 19.469359937499583, "step_time": 0.7196053466796875} +{"epoch": 0, "iter": 5010, "iter_tflops": 21.906229258440092, "iter_time": 0.6395557785034179, "loss": 0.2084253877401352, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 23.565762574002544, "step_time": 0.5945173835754394} +{"epoch": 0, "iter": 5011, "iter_tflops": 22.648032775546756, "iter_time": 0.6186080551147461, "loss": 0.2276097983121872, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 24.328666660880756, "step_time": 0.5758743667602538} +{"epoch": 0, "iter": 5012, "iter_tflops": 21.632364464753426, "iter_time": 0.6476525268554687, "loss": 0.20845331251621246, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 23.26162176983282, "step_time": 0.6022905731201172} +{"epoch": 0, "iter": 5013, "iter_tflops": 14.620166987335924, "iter_time": 1.411139389038086, "loss": 0.5110706686973572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.532540431584884, "step_time": 1.3282497863769531} +{"epoch": 0, "iter": 5014, "iter_tflops": 23.92375804684733, "iter_time": 0.862368423461914, "loss": 0.4340585172176361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21767220401755, "step_time": 0.5398312435150147} +{"epoch": 0, "iter": 5015, "iter_tflops": 38.22969584790624, "iter_time": 0.5396614608764649, "loss": 0.4774229824542999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62391241733869, "step_time": 0.4956548366546631} +{"epoch": 0, "iter": 5016, "iter_tflops": 40.12283050731905, "iter_time": 0.514198356628418, "loss": 0.36055055260658264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.994652629751236, "step_time": 0.4689454803466797} +{"epoch": 0, "iter": 5017, "iter_tflops": 18.397938956864028, "iter_time": 1.1213806915283204, "loss": 0.8916107416152954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.554653771449377, "step_time": 1.055047752380371} +{"epoch": 0, "iter": 5018, "iter_tflops": 13.87612592104742, "iter_time": 1.4868050079345705, "loss": 0.7207547426223755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.860330678798075, "step_time": 1.0388091640472412} +{"epoch": 0, "iter": 5019, "iter_tflops": 35.166329951845185, "iter_time": 0.5866717834472656, "loss": 0.7153649926185608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.205274494638914, "step_time": 0.540006420135498} +{"epoch": 0, "iter": 5020, "iter_tflops": 38.732153976394244, "iter_time": 0.5326606292724609, "loss": 0.8854557275772095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.02198808339555, "step_time": 0.4909594821929932} +{"epoch": 0, "iter": 5021, "iter_tflops": 14.517903923150397, "iter_time": 0.836281379699707, "loss": 0.0033955860417336226, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 15.533121470446142, "step_time": 0.7816234970092775} +{"epoch": 0, "iter": 5022, "iter_tflops": 14.817349552701831, "iter_time": 0.8193808670043945, "loss": 0.003913936670869589, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 16.668615709451778, "step_time": 0.7283779850006102} +{"epoch": 0, "iter": 5023, "iter_tflops": 34.04349253574414, "iter_time": 0.3566335830688476, "loss": 0.001530005014501512, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 37.433259133832664, "step_time": 0.3243386497497559} +{"epoch": 0, "iter": 5024, "iter_tflops": 34.833754761759366, "iter_time": 0.34854275131225587, "loss": 0.0025462761987000704, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 38.066741927380164, "step_time": 0.3189412097930908} +{"epoch": 0, "iter": 5025, "iter_tflops": 34.60873218915177, "iter_time": 0.5961239318847656, "loss": 0.4294545352458954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.0866265128967, "step_time": 0.556294692993164} +{"epoch": 0, "iter": 5026, "iter_tflops": 17.003283557391242, "iter_time": 1.2133593749999998, "loss": 0.580085039138794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24989637934525, "step_time": 1.018824645996094} +{"epoch": 0, "iter": 5027, "iter_tflops": 48.15570919632537, "iter_time": 0.4284246635437011, "loss": 0.7579901218414307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.348015055396694, "step_time": 0.3941141510009766} +{"epoch": 0, "iter": 5028, "iter_tflops": 52.30512284032018, "iter_time": 0.39443733978271484, "loss": 0.5195839405059814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.70945230563994, "step_time": 0.36380343437194823} +{"epoch": 0, "iter": 5029, "iter_tflops": 44.98208521518299, "iter_time": 0.45865133666992186, "loss": 0.716793954372406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.050333187560675, "step_time": 0.42061067008972164} +{"epoch": 0, "iter": 5030, "iter_tflops": 46.3448686333601, "iter_time": 0.4451645698547363, "loss": 0.5457347631454468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.57963735998102, "step_time": 0.4078932666778564} +{"epoch": 0, "iter": 5031, "iter_tflops": 47.85180374277172, "iter_time": 0.43114557647705076, "loss": 0.6423490047454834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96472796606306, "step_time": 0.3970211009979248} +{"epoch": 0, "iter": 5032, "iter_tflops": 48.92233024627113, "iter_time": 0.42171117782592776, "loss": 0.6372353434562683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90248758882646, "step_time": 0.38998342895507815} +{"epoch": 0, "iter": 5033, "iter_tflops": 41.38574179820834, "iter_time": 0.49850727844238285, "loss": 0.5900903344154358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.811718407947254, "step_time": 0.4603950538635254} +{"epoch": 0, "iter": 5034, "iter_tflops": 14.489199212138134, "iter_time": 1.4238946685791016, "loss": 0.7199156880378723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.89631516747654, "step_time": 1.2210409965515137} +{"epoch": 0, "iter": 5035, "iter_tflops": 44.85824818485456, "iter_time": 0.4599175033569336, "loss": 0.6930773854255676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73320829502436, "step_time": 0.42334773826599126} +{"epoch": 0, "iter": 5036, "iter_tflops": 48.43481292845236, "iter_time": 0.4259558830261231, "loss": 0.6330427527427673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.509805406160375, "step_time": 0.3928998279571533} +{"epoch": 0, "iter": 5037, "iter_tflops": 35.83525729227347, "iter_time": 0.5757205352783202, "loss": 0.20284542441368103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.504377929526306, "step_time": 0.5358116302490235} +{"epoch": 0, "iter": 5038, "iter_tflops": 15.736470191460148, "iter_time": 1.311036926269531, "loss": 0.20679163932800293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.027090121265058, "step_time": 1.0843010349273683} +{"epoch": 0, "iter": 5039, "iter_tflops": 38.79989119681982, "iter_time": 0.5317307052612305, "loss": 0.1676492691040039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.462413348632964, "step_time": 0.4858671913146973} +{"epoch": 0, "iter": 5040, "iter_tflops": 44.3693120001157, "iter_time": 0.464985652923584, "loss": 0.18211589753627777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.285674796817034, "step_time": 0.42727151679992675} +{"epoch": 0, "iter": 5041, "iter_tflops": 20.837683582003947, "iter_time": 0.9900857467651367, "loss": 0.08776166290044785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.20630699105655, "step_time": 0.9290645904541016} +{"epoch": 0, "iter": 5042, "iter_tflops": 17.265288087254437, "iter_time": 1.1949463806152343, "loss": 0.0954677015542984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.53382405212391, "step_time": 0.9155611343383787} +{"epoch": 0, "iter": 5043, "iter_tflops": 42.80424190410137, "iter_time": 0.48198712539672856, "loss": 0.10351595282554626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.18309630000967, "step_time": 0.4372560329437256} +{"epoch": 0, "iter": 5044, "iter_tflops": 40.39109351522074, "iter_time": 0.5107832374572754, "loss": 0.09553176164627075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.401208890910546, "step_time": 0.4646516170501709} +{"epoch": 0, "iter": 5045, "iter_tflops": 25.0499975980796, "iter_time": 0.8235966262817384, "loss": 0.36240994930267334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.0879164326073, "step_time": 0.7616345672607422} +{"epoch": 0, "iter": 5046, "iter_tflops": 36.20304561885844, "iter_time": 0.5698717651367187, "loss": 0.43896400928497314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.45120177370008, "step_time": 0.5229522190093994} +{"epoch": 0, "iter": 5047, "iter_tflops": 36.27744586675759, "iter_time": 0.5687030334472656, "loss": 0.4266982674598694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.623670173339036, "step_time": 0.5206759853363037} +{"epoch": 0, "iter": 5048, "iter_tflops": 42.88456108525186, "iter_time": 0.4810844039916992, "loss": 0.45281264185905457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.84264114911552, "step_time": 0.44043403625488275} +{"epoch": 0, "iter": 5049, "iter_tflops": 19.572018147939964, "iter_time": 1.0541117095947266, "loss": 0.2801167964935303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.178206854913523, "step_time": 0.9741662101745606} +{"epoch": 0, "iter": 5050, "iter_tflops": 28.296996431905775, "iter_time": 0.7290912857055665, "loss": 0.2658008933067322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.93470841398915, "step_time": 0.6460398273468018} +{"epoch": 0, "iter": 5051, "iter_tflops": 50.33133324980501, "iter_time": 0.4099055633544922, "loss": 0.2500130832195282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.90460430141561, "step_time": 0.37576253890991207} +{"epoch": 0, "iter": 5052, "iter_tflops": 53.08995573053437, "iter_time": 0.3886063423156738, "loss": 0.20742881298065186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.631182790079514, "step_time": 0.35798490524291987} +{"epoch": 0, "iter": 5053, "iter_tflops": 34.162081841450444, "iter_time": 0.6039179229736328, "loss": 1.0084129571914673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38236828714611, "step_time": 0.5670629615783691} +{"epoch": 0, "iter": 5054, "iter_tflops": 9.484436001833235, "iter_time": 2.1752578125, "loss": 0.7589625716209412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.131227326468311, "step_time": 1.8534428329467774} +{"epoch": 0, "iter": 5055, "iter_tflops": 14.773035886687781, "iter_time": 1.3965371551513672, "loss": 1.0562841892242432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.84240545719908, "step_time": 1.1562955207824708} +{"epoch": 0, "iter": 5056, "iter_tflops": 15.596797176637793, "iter_time": 1.3227775726318358, "loss": 0.8652500510215759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.571982570614303, "step_time": 1.1740902557373047} +{"epoch": 0, "iter": 5057, "iter_tflops": 16.61451866466416, "iter_time": 0.8948141708374023, "loss": 0.256191611289978, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.808871368315568, "step_time": 0.8348034210205078} +{"epoch": 0, "iter": 5058, "iter_tflops": 12.940060701442649, "iter_time": 1.1489054870605468, "loss": 0.287628710269928, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.356661481675332, "step_time": 0.9681079940795898} +{"epoch": 0, "iter": 5059, "iter_tflops": 22.02055000918488, "iter_time": 0.6751378479003907, "loss": 0.4031483232975006, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 23.680136360295666, "step_time": 0.6278218383789063} +{"epoch": 0, "iter": 5060, "iter_tflops": 24.580305743283937, "iter_time": 0.6048300170898437, "loss": 0.1856495887041092, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.27465684679132, "step_time": 0.5658268661499023} +{"epoch": 0, "iter": 5061, "iter_tflops": 16.150682571537057, "iter_time": 1.277413101196289, "loss": 0.7844897508621216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.397432207099463, "step_time": 1.18587003326416} +{"epoch": 0, "iter": 5062, "iter_tflops": 16.823775510751084, "iter_time": 1.2263058013916015, "loss": 1.0976572036743164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24519489097115, "step_time": 1.0190612449645995} +{"epoch": 0, "iter": 5063, "iter_tflops": 43.85159649301214, "iter_time": 0.47047531127929687, "loss": 0.9800312519073486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.39133525105551, "step_time": 0.435334716796875} +{"epoch": 0, "iter": 5064, "iter_tflops": 44.37030611059095, "iter_time": 0.46497523498535154, "loss": 0.9360742568969727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.81547810621618, "step_time": 0.4314731197357178} +{"epoch": 0, "iter": 5065, "iter_tflops": 44.48325387358664, "iter_time": 0.46379461288452156, "loss": 0.7781158089637756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.72222475003852, "step_time": 0.4234431743621826} +{"epoch": 0, "iter": 5066, "iter_tflops": 44.18000660066106, "iter_time": 0.4669780540466309, "loss": 0.9864968657493591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86512690064095, "step_time": 0.4310255680084229} +{"epoch": 0, "iter": 5067, "iter_tflops": 45.52712120447113, "iter_time": 0.45316051101684574, "loss": 0.738945484161377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.282675724614016, "step_time": 0.4186277065277099} +{"epoch": 0, "iter": 5068, "iter_tflops": 51.4425245573769, "iter_time": 0.4010513420104981, "loss": 0.9716207981109619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.45868617123334, "step_time": 0.3720083351135254} +{"epoch": 0, "iter": 5069, "iter_tflops": 37.061167203959414, "iter_time": 0.5566768417358398, "loss": 0.924228310585022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66249881478344, "step_time": 0.5201662559509277} +{"epoch": 0, "iter": 5070, "iter_tflops": 15.089560765881936, "iter_time": 1.3672428131103516, "loss": 0.8369764089584351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.116867661381285, "step_time": 1.0792088890075684} +{"epoch": 0, "iter": 5071, "iter_tflops": 43.9943405604699, "iter_time": 0.46894880676269535, "loss": 0.7262222766876221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28955386242978, "step_time": 0.43627168846130376} +{"epoch": 0, "iter": 5072, "iter_tflops": 46.76663962918437, "iter_time": 0.44114979553222655, "loss": 1.1703308820724487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04058274356461, "step_time": 0.41228723526000977} +{"epoch": 0, "iter": 5073, "iter_tflops": 33.62665548631389, "iter_time": 0.6135339126586914, "loss": 0.6398807764053345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.84977876107048, "step_time": 0.5754873313903809} +{"epoch": 0, "iter": 5074, "iter_tflops": 18.112984825046777, "iter_time": 1.1390222930908203, "loss": 0.6012635231018066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.492460877326366, "step_time": 0.959922348022461} +{"epoch": 0, "iter": 5075, "iter_tflops": 37.464198970459925, "iter_time": 0.550688232421875, "loss": 0.9085932970046997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.943696187343114, "step_time": 0.5038893756866455} +{"epoch": 0, "iter": 5076, "iter_tflops": 38.272567807276936, "iter_time": 0.5390569458007812, "loss": 0.8197566270828247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76623310735476, "step_time": 0.4939658660888671} +{"epoch": 0, "iter": 5077, "iter_tflops": 24.146766296249826, "iter_time": 0.8544039916992187, "loss": 0.1918957531452179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.81659118297907, "step_time": 0.799140884399414} +{"epoch": 0, "iter": 5078, "iter_tflops": 12.810558770326725, "iter_time": 1.6104756927490234, "loss": 0.20377615094184875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.2145038007221, "step_time": 1.4514114456176757} +{"epoch": 0, "iter": 5079, "iter_tflops": 20.877605576766758, "iter_time": 0.9881925125122071, "loss": 0.21999743580818176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.469787261849984, "step_time": 0.8790490207672118} +{"epoch": 0, "iter": 5080, "iter_tflops": 45.16319710653162, "iter_time": 0.45681206893920906, "loss": 0.27103090286254883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85158565193526, "step_time": 0.42232188034057616} +{"epoch": 0, "iter": 5081, "iter_tflops": 21.921990321610753, "iter_time": 0.79597452545166, "loss": 0.28095629811286926, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 23.20399974011177, "step_time": 0.7519973297119142} +{"epoch": 0, "iter": 5082, "iter_tflops": 31.561026018421902, "iter_time": 0.5528763809204101, "loss": 0.2697191536426544, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 33.69343545534494, "step_time": 0.5178856239318848} +{"epoch": 0, "iter": 5083, "iter_tflops": 31.953283583562555, "iter_time": 0.5460892868041992, "loss": 0.20271244645118713, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 34.06449667699392, "step_time": 0.5122443466186524} +{"epoch": 0, "iter": 5084, "iter_tflops": 31.593413942062572, "iter_time": 0.5523096008300781, "loss": 0.2343287467956543, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 33.59306943474492, "step_time": 0.5194329109191894} +{"epoch": 0, "iter": 5085, "iter_tflops": 2.6092619525399905, "iter_time": 0.6157062606811524, "loss": 0.07122335582971573, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.7859652728659836, "step_time": 0.5766543235778808} +{"epoch": 0, "iter": 5086, "iter_tflops": 1.4144415642017651, "iter_time": 1.1358114471435548, "loss": 0.161795973777771, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.561033140111396, "step_time": 1.029151065826416} +{"epoch": 0, "iter": 5087, "iter_tflops": 3.3895237129671347, "iter_time": 0.4739718780517578, "loss": 0.13732028007507324, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.667880242533357, "step_time": 0.438002010345459} +{"epoch": 0, "iter": 5088, "iter_tflops": 3.6127717510655586, "iter_time": 0.44468320465087896, "loss": 0.1062854528427124, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.9135757582279913, "step_time": 0.41050410652160646} +{"epoch": 0, "iter": 5089, "iter_tflops": 36.84633462838776, "iter_time": 0.5599225463867188, "loss": 0.2308940440416336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.554717955304845, "step_time": 0.5215836334228516} +{"epoch": 0, "iter": 5090, "iter_tflops": 16.983943719930373, "iter_time": 1.2147410430908203, "loss": 0.21330919861793518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.814511144384497, "step_time": 1.0412113304138182} +{"epoch": 0, "iter": 5091, "iter_tflops": 37.47902030602812, "iter_time": 0.550470458984375, "loss": 0.2689908444881439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87367119380268, "step_time": 0.5047526416778565} +{"epoch": 0, "iter": 5092, "iter_tflops": 40.82480143763101, "iter_time": 0.5053568611145018, "loss": 0.21087083220481873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82080569366513, "step_time": 0.46030171012878424} +{"epoch": 0, "iter": 5093, "iter_tflops": 17.5085754842504, "iter_time": 1.1783422088623046, "loss": 0.3489692807197571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.52424601057872, "step_time": 1.1137345886230468} +{"epoch": 0, "iter": 5094, "iter_tflops": 15.883850383577148, "iter_time": 1.2988723144531251, "loss": 0.48607438802719116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.879043366384273, "step_time": 1.0378313045501708} +{"epoch": 0, "iter": 5095, "iter_tflops": 36.325506070128796, "iter_time": 0.5679506149291992, "loss": 0.30696213245391846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.63491480058359, "step_time": 0.5205282669067384} +{"epoch": 0, "iter": 5096, "iter_tflops": 39.0906545174182, "iter_time": 0.5277755966186524, "loss": 0.41675734519958496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84630129419561, "step_time": 0.48151399040222176} +{"epoch": 0, "iter": 5097, "iter_tflops": 19.549697754840263, "iter_time": 1.0553152160644532, "loss": 0.5260158777236938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.870091984792857, "step_time": 0.9885482788085939} +{"epoch": 0, "iter": 5098, "iter_tflops": 19.122818276081475, "iter_time": 1.0788730621337892, "loss": 0.5116271376609802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.895712807846493, "step_time": 0.8287006549835205} +{"epoch": 0, "iter": 5099, "iter_tflops": 47.36942883181293, "iter_time": 0.43553604125976564, "loss": 0.5810383558273315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.56525918191455, "step_time": 0.4000967674255371} +{"epoch": 0, "iter": 5100, "iter_tflops": 51.34679815033247, "iter_time": 0.4017990264892578, "loss": 0.5775185227394104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.65183453561999, "step_time": 0.3707172222137451} +{"epoch": 0, "iter": 5101, "iter_tflops": 36.04339290618344, "iter_time": 0.5723959884643555, "loss": 0.8999040126800537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.768667389731505, "step_time": 0.5321589546203613} +{"epoch": 0, "iter": 5102, "iter_tflops": 15.603220826797052, "iter_time": 1.3222330017089845, "loss": 0.9108887314796448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.605701593622747, "step_time": 1.1088586692810058} +{"epoch": 0, "iter": 5103, "iter_tflops": 44.38593234585018, "iter_time": 0.46481153869628905, "loss": 0.9009143710136414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84790257715277, "step_time": 0.4311807289123535} +{"epoch": 0, "iter": 5104, "iter_tflops": 45.36896935982605, "iter_time": 0.4547401847839355, "loss": 0.9218021631240845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71329729919659, "step_time": 0.4235207767486573} +{"epoch": 0, "iter": 5105, "iter_tflops": 38.19780554126636, "iter_time": 0.47190576171875004, "loss": 0.08927366137504578, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 41.517630511254964, "step_time": 0.4341713218688964} +{"epoch": 0, "iter": 5106, "iter_tflops": 18.78735040137577, "iter_time": 0.9594628372192383, "loss": 0.09545832127332687, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 22.828805780936218, "step_time": 0.7896061096191406} +{"epoch": 0, "iter": 5107, "iter_tflops": 44.27283408995548, "iter_time": 0.40715180969238285, "loss": 0.07982085645198822, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 48.31092666381755, "step_time": 0.3731198253631592} +{"epoch": 0, "iter": 5108, "iter_tflops": 46.320938587727525, "iter_time": 0.3891493797302246, "loss": 0.11281514167785645, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 50.43685917392952, "step_time": 0.35739268493652343} +{"epoch": 0, "iter": 5109, "iter_tflops": 40.13929430446887, "iter_time": 0.5139874496459961, "loss": 0.0019315780373290181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.462703932616634, "step_time": 0.4746849975585937} +{"epoch": 0, "iter": 5110, "iter_tflops": 30.100054976812636, "iter_time": 0.6854171371459961, "loss": 0.00539831118658185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.64492813621648, "step_time": 0.6132007007598878} +{"epoch": 0, "iter": 5111, "iter_tflops": 39.18408352121239, "iter_time": 0.5265171890258789, "loss": 0.011164805851876736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64222208799002, "step_time": 0.4727324256896973} +{"epoch": 0, "iter": 5112, "iter_tflops": 46.403446924471915, "iter_time": 0.4446026077270508, "loss": 0.010634482838213444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.474534200988025, "step_time": 0.4008019466400146} +{"epoch": 0, "iter": 5113, "iter_tflops": 21.6632799101116, "iter_time": 0.9523531799316406, "loss": 0.8012754917144775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.83439889630529, "step_time": 0.9035093765258788} +{"epoch": 0, "iter": 5114, "iter_tflops": 22.479711763176393, "iter_time": 0.9177650375366212, "loss": 0.8339884877204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.526259193411438, "step_time": 0.6758474197387694} +{"epoch": 0, "iter": 5115, "iter_tflops": 44.07093332679254, "iter_time": 0.4681338005065918, "loss": 0.8174034953117371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.38368753633408, "step_time": 0.43540497970581055} +{"epoch": 0, "iter": 5116, "iter_tflops": 47.974435819841254, "iter_time": 0.43004348373413087, "loss": 0.7127227187156677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47941301381817, "step_time": 0.4007639617919922} +{"epoch": 0, "iter": 5117, "iter_tflops": 30.186477840508363, "iter_time": 0.6834548110961913, "loss": 0.2532854974269867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.13906671525508, "step_time": 0.6419319419860839} +{"epoch": 0, "iter": 5118, "iter_tflops": 41.524306352892815, "iter_time": 0.4968437843322754, "loss": 0.2789447009563446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.75046849016048, "step_time": 0.46102519607543946} +{"epoch": 0, "iter": 5119, "iter_tflops": 48.60557439720753, "iter_time": 0.42445941162109374, "loss": 0.3451635539531708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.716159411828144, "step_time": 0.39136184692382814} +{"epoch": 0, "iter": 5120, "iter_tflops": 47.04345354538204, "iter_time": 0.4385539741516114, "loss": 0.29744529724121094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.987252526632275, "step_time": 0.4046323833465576} +{"epoch": 0, "iter": 5121, "iter_tflops": 38.65568794090355, "iter_time": 0.5337143020629883, "loss": 0.2863071858882904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.587735939058035, "step_time": 0.4960859985351562} +{"epoch": 0, "iter": 5122, "iter_tflops": 39.467463497206595, "iter_time": 0.5227367477416993, "loss": 0.3031022250652313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5810296635122, "step_time": 0.4733961925506592} +{"epoch": 0, "iter": 5123, "iter_tflops": 41.75205954670201, "iter_time": 0.4941335525512695, "loss": 0.33085155487060547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88961881420293, "step_time": 0.44958084297180173} +{"epoch": 0, "iter": 5124, "iter_tflops": 43.089432830166984, "iter_time": 0.4787970542907714, "loss": 0.4004122018814087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.054863596261704, "step_time": 0.4384476318359375} +{"epoch": 0, "iter": 5125, "iter_tflops": 21.87280330197824, "iter_time": 0.9432304229736328, "loss": 0.2469330132007599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.662205684398394, "step_time": 0.8719006919860839} +{"epoch": 0, "iter": 5126, "iter_tflops": 44.52930129947031, "iter_time": 0.46331500625610356, "loss": 0.20783458650112152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.713797339434606, "step_time": 0.37707295989990236} +{"epoch": 0, "iter": 5127, "iter_tflops": 50.07179091242727, "iter_time": 0.4120302696228027, "loss": 0.21845650672912598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.53760591835433, "step_time": 0.37829114723205565} +{"epoch": 0, "iter": 5128, "iter_tflops": 50.195510873889695, "iter_time": 0.4110147132873535, "loss": 0.2790620028972626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.13558053199962, "step_time": 0.38110043907165525} +{"epoch": 0, "iter": 5129, "iter_tflops": 35.55213196031794, "iter_time": 0.5803053817749023, "loss": 0.8001695275306702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.12837989431974, "step_time": 0.5410954666137695} +{"epoch": 0, "iter": 5130, "iter_tflops": 9.327157408611313, "iter_time": 2.2119379577636717, "loss": 0.738083004951477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.920295638081969, "step_time": 1.7307535095214843} +{"epoch": 0, "iter": 5131, "iter_tflops": 9.483817973684298, "iter_time": 2.1753995666503907, "loss": 0.8796417117118835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.965063889212045, "step_time": 1.7242777557373046} +{"epoch": 0, "iter": 5132, "iter_tflops": 31.812384584660695, "iter_time": 0.6485239562988282, "loss": 0.8278935551643372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30950055870131, "step_time": 0.4183999691009521} +{"epoch": 0, "iter": 5133, "iter_tflops": 19.36964283379351, "iter_time": 0.77386328125, "loss": 0.21229813992977142, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 20.402027147112847, "step_time": 0.7347042160034181} +{"epoch": 0, "iter": 5134, "iter_tflops": 9.154462146679732, "iter_time": 1.6373933410644534, "loss": 0.14507687091827393, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.046350958342876, "step_time": 1.244315013885498} +{"epoch": 0, "iter": 5135, "iter_tflops": 27.415592939698186, "iter_time": 0.5467492675781249, "loss": 0.21077784895896912, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.280083508400676, "step_time": 0.5119334907531738} +{"epoch": 0, "iter": 5136, "iter_tflops": 27.54655063517559, "iter_time": 0.5441499938964844, "loss": 0.19800794124603271, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.291571148603687, "step_time": 0.5117327194213867} +{"epoch": 0, "iter": 5137, "iter_tflops": 38.9728912256877, "iter_time": 0.529370361328125, "loss": 0.6039301753044128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42130470191304, "step_time": 0.4863380241394043} +{"epoch": 0, "iter": 5138, "iter_tflops": 45.87401237552913, "iter_time": 0.4497337913513183, "loss": 0.5469699501991272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9909235206095, "step_time": 0.4126967868804931} +{"epoch": 0, "iter": 5139, "iter_tflops": 46.48146206356102, "iter_time": 0.44385638046264647, "loss": 0.4899453818798065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20680058479684, "step_time": 0.410922290802002} +{"epoch": 0, "iter": 5140, "iter_tflops": 47.51463932221876, "iter_time": 0.4342049903869629, "loss": 0.5429419279098511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62036216281504, "step_time": 0.39966967773437495} +{"epoch": 0, "iter": 5141, "iter_tflops": 41.68668534356327, "iter_time": 0.49490846633911134, "loss": 0.34379348158836365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.18208935972517, "step_time": 0.4566210594177246} +{"epoch": 0, "iter": 5142, "iter_tflops": 14.49412153444717, "iter_time": 1.4234111022949216, "loss": 0.33232593536376953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.123063326744447, "step_time": 1.2048716468811036} +{"epoch": 0, "iter": 5143, "iter_tflops": 48.149249651363014, "iter_time": 0.4284821395874023, "loss": 0.4171293079853058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75325369493194, "step_time": 0.3910866546630859} +{"epoch": 0, "iter": 5144, "iter_tflops": 45.66074213164756, "iter_time": 0.4518343887329101, "loss": 0.40823835134506226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46518882285494, "step_time": 0.41708308410644535} +{"epoch": 0, "iter": 5145, "iter_tflops": 44.50139615449691, "iter_time": 0.4636055335998535, "loss": 0.655180811882019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6645049915842, "step_time": 0.42394540977478035} +{"epoch": 0, "iter": 5146, "iter_tflops": 41.2912690881373, "iter_time": 0.49964784240722654, "loss": 0.8228358030319214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92025334801277, "step_time": 0.45928266143798824} +{"epoch": 0, "iter": 5147, "iter_tflops": 42.405130358393286, "iter_time": 0.4865235252380371, "loss": 0.6580787897109985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68062115886575, "step_time": 0.45163776206970213} +{"epoch": 0, "iter": 5148, "iter_tflops": 50.10003046791647, "iter_time": 0.41179802322387693, "loss": 0.9251151084899902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.26041868448992, "step_time": 0.3802236328125} +{"epoch": 0, "iter": 5149, "iter_tflops": 32.29041942711674, "iter_time": 0.6389230575561523, "loss": 0.13159923255443573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.371639394654295, "step_time": 0.6002359466552734} +{"epoch": 0, "iter": 5150, "iter_tflops": 13.413102726034275, "iter_time": 1.538129837036133, "loss": 0.19291257858276367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.162704271949405, "step_time": 1.2020887374877929} +{"epoch": 0, "iter": 5151, "iter_tflops": 41.113044957982325, "iter_time": 0.5018138046264649, "loss": 0.1199817955493927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.044737882334104, "step_time": 0.4580133991241455} +{"epoch": 0, "iter": 5152, "iter_tflops": 44.23641534561161, "iter_time": 0.46638257980346676, "loss": 0.18811674416065216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32546931580186, "step_time": 0.42691967201232917} +{"epoch": 0, "iter": 5153, "iter_tflops": 18.836626862382005, "iter_time": 1.0952647552490236, "loss": 0.03156764805316925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.134518752272133, "step_time": 1.0246628570556642} +{"epoch": 0, "iter": 5154, "iter_tflops": 17.777572106680232, "iter_time": 1.160512435913086, "loss": 0.040956057608127594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.195329153095212, "step_time": 0.9295241069793703} +{"epoch": 0, "iter": 5155, "iter_tflops": 51.11368730537149, "iter_time": 0.40363148498535156, "loss": 0.01761162281036377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.710061895450515, "step_time": 0.3703297538757324} +{"epoch": 0, "iter": 5156, "iter_tflops": 48.16984074899628, "iter_time": 0.4282989768981933, "loss": 0.0678229108452797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.519853117769514, "step_time": 0.3928246612548828} +{"epoch": 0, "iter": 5157, "iter_tflops": 22.979411914890644, "iter_time": 0.8978077239990234, "loss": 0.737720251083374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.153986711259915, "step_time": 0.8541485824584961} +{"epoch": 0, "iter": 5158, "iter_tflops": 12.171382126346186, "iter_time": 1.695049362182617, "loss": 0.7317238450050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.793058275263855, "step_time": 1.495759178161621} +{"epoch": 0, "iter": 5159, "iter_tflops": 36.99199575976219, "iter_time": 0.5577177734375, "loss": 0.8792691230773926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2953347835315, "step_time": 0.5119970741271973} +{"epoch": 0, "iter": 5160, "iter_tflops": 37.49626464777024, "iter_time": 0.5502173004150391, "loss": 0.9954111576080322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.40959729838237, "step_time": 0.5105493469238281} +{"epoch": 0, "iter": 5161, "iter_tflops": 25.00594215586136, "iter_time": 0.8250476379394531, "loss": 0.5263658165931702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.91823139840961, "step_time": 0.7664356994628905} +{"epoch": 0, "iter": 5162, "iter_tflops": 8.00882207815006, "iter_time": 2.576045928955078, "loss": 0.5602604746818542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.355027163555867, "step_time": 2.205348327636719} +{"epoch": 0, "iter": 5163, "iter_tflops": 12.41162338225473, "iter_time": 1.6622397308349608, "loss": 0.6768059730529785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.113775563216928, "step_time": 1.3650522613525389} +{"epoch": 0, "iter": 5164, "iter_tflops": 40.3497567806961, "iter_time": 0.5113065147399902, "loss": 0.5437804460525513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.01876907078324, "step_time": 0.46868856048583984} +{"epoch": 0, "iter": 5165, "iter_tflops": 14.018536170821594, "iter_time": 1.2388651428222657, "loss": 0.21176795661449432, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 14.931994884590022, "step_time": 1.1630780715942381} +{"epoch": 0, "iter": 5166, "iter_tflops": 13.814992767983966, "iter_time": 1.2571179809570312, "loss": 0.32310017943382263, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 16.487386534452114, "step_time": 1.0533552894592284} +{"epoch": 0, "iter": 5167, "iter_tflops": 25.582751721376404, "iter_time": 0.6788587875366211, "loss": 0.32500946521759033, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 27.343481886667043, "step_time": 0.6351450004577637} +{"epoch": 0, "iter": 5168, "iter_tflops": 25.80268592086117, "iter_time": 0.6730724029541015, "loss": 0.28044769167900085, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 27.73961336855173, "step_time": 0.6260749053955079} +{"epoch": 0, "iter": 5169, "iter_tflops": 17.60700955747714, "iter_time": 1.1717545471191406, "loss": 0.0045365323312580585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.75244105362199, "step_time": 1.1001817550659179} +{"epoch": 0, "iter": 5170, "iter_tflops": 16.836588275580866, "iter_time": 1.2253725738525392, "loss": 0.007948212325572968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.501726229230485, "step_time": 1.0579111442565916} +{"epoch": 0, "iter": 5171, "iter_tflops": 47.28741847375691, "iter_time": 0.43629138946533197, "loss": 0.0018098565051332116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38163594756199, "step_time": 0.3938611907958985} +{"epoch": 0, "iter": 5172, "iter_tflops": 46.560737251545206, "iter_time": 0.4431006622314454, "loss": 0.009735407307744026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.29972580324608, "step_time": 0.4021677150726318} +{"epoch": 0, "iter": 5173, "iter_tflops": 15.856449389228265, "iter_time": 1.3011168518066405, "loss": 0.1509552001953125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.004142117873645, "step_time": 1.2132981109619139} +{"epoch": 0, "iter": 5174, "iter_tflops": 17.257301803839947, "iter_time": 1.1954993743896485, "loss": 0.13113698363304138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.09253964352842, "step_time": 0.9781227798461913} +{"epoch": 0, "iter": 5175, "iter_tflops": 43.814725731158596, "iter_time": 0.47087122344970705, "loss": 0.14486441016197205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53565767906991, "step_time": 0.4340130023956299} +{"epoch": 0, "iter": 5176, "iter_tflops": 52.95809182044852, "iter_time": 0.3895739593505859, "loss": 0.11975645273923874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.720921422259195, "step_time": 0.35742834663391115} +{"epoch": 0, "iter": 5177, "iter_tflops": 27.76286757331403, "iter_time": 0.7431182479858399, "loss": 0.16298887133598328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.24636759109466, "step_time": 0.7054241333007812} +{"epoch": 0, "iter": 5178, "iter_tflops": 15.186505051309094, "iter_time": 1.3585149078369143, "loss": 0.21078070998191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.18447056215549, "step_time": 1.20056614112854} +{"epoch": 0, "iter": 5179, "iter_tflops": 38.75090476493081, "iter_time": 0.5324028854370118, "loss": 0.20385277271270752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43765938517216, "step_time": 0.48615059852600095} +{"epoch": 0, "iter": 5180, "iter_tflops": 38.062817735134225, "iter_time": 0.542027488708496, "loss": 0.2106674611568451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.767038353009724, "step_time": 0.4939563426971435} +{"epoch": 0, "iter": 5181, "iter_tflops": 15.051803600995001, "iter_time": 1.3706725158691406, "loss": 0.8957726359367371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.043911130050436, "step_time": 1.2859142227172853} +{"epoch": 0, "iter": 5182, "iter_tflops": 22.170893818469796, "iter_time": 0.9305485687255859, "loss": 0.688072144985199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.51111383838403, "step_time": 0.7499185104370116} +{"epoch": 0, "iter": 5183, "iter_tflops": 44.791275975837344, "iter_time": 0.46060517501831055, "loss": 0.6248407363891602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.313103144701785, "step_time": 0.42702894592285157} +{"epoch": 0, "iter": 5184, "iter_tflops": 50.27587104526568, "iter_time": 0.4103577537536622, "loss": 0.8071821331977844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2914376261662, "step_time": 0.38000639533996583} +{"epoch": 0, "iter": 5185, "iter_tflops": 35.090740166677406, "iter_time": 0.5879355468749999, "loss": 0.016860006377100945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.60581509431914, "step_time": 0.5486144485473634} +{"epoch": 0, "iter": 5186, "iter_tflops": 10.82402574539416, "iter_time": 1.9060462341308595, "loss": 0.02432071603834629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.953250279202305, "step_time": 1.592734878540039} +{"epoch": 0, "iter": 5187, "iter_tflops": 13.58065984333597, "iter_time": 1.5191525115966795, "loss": 0.06040671095252037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.017526870281777, "step_time": 1.2123438186645505} +{"epoch": 0, "iter": 5188, "iter_tflops": 23.47547470837142, "iter_time": 0.878836051940918, "loss": 0.041667498648166656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.29474457020475, "step_time": 0.7291493110656738} +{"epoch": 0, "iter": 5189, "iter_tflops": 26.38222529596035, "iter_time": 0.5883123931884766, "loss": 0.45835816860198975, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 28.40952986929642, "step_time": 0.5463304100036621} +{"epoch": 0, "iter": 5190, "iter_tflops": 25.797962513783787, "iter_time": 0.6016362762451172, "loss": 0.2943215072154999, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 27.38404565822529, "step_time": 0.5667895202636719} +{"epoch": 0, "iter": 5191, "iter_tflops": 28.993095695251604, "iter_time": 0.5353340072631836, "loss": 0.33244818449020386, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 30.85009539639421, "step_time": 0.5031099548339844} +{"epoch": 0, "iter": 5192, "iter_tflops": 27.867682519027216, "iter_time": 0.5569530258178711, "loss": 0.27695950865745544, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.6917436741323, "step_time": 0.5227375755310059} +{"epoch": 0, "iter": 5193, "iter_tflops": 39.2040531910023, "iter_time": 0.5262489929199218, "loss": 0.7371054291725159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.532259034688025, "step_time": 0.4850693092346191} +{"epoch": 0, "iter": 5194, "iter_tflops": 39.66860132013283, "iter_time": 0.5200862350463867, "loss": 0.8090277910232544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14546421530454, "step_time": 0.4781752586364746} +{"epoch": 0, "iter": 5195, "iter_tflops": 50.381410234358555, "iter_time": 0.4094981346130372, "loss": 0.8893802762031555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.32655347118963, "step_time": 0.3797607650756836} +{"epoch": 0, "iter": 5196, "iter_tflops": 41.5064098975865, "iter_time": 0.49705801010131834, "loss": 0.7806066870689392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67662110257306, "step_time": 0.4617872390747071} +{"epoch": 0, "iter": 5197, "iter_tflops": 31.579262603279183, "iter_time": 0.6533114395141602, "loss": 0.16423135995864868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.72012126861612, "step_time": 0.6118333129882814} +{"epoch": 0, "iter": 5198, "iter_tflops": 11.302754752554943, "iter_time": 1.8253155059814452, "loss": 0.25226473808288574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.251351006270523, "step_time": 1.556904914855957} +{"epoch": 0, "iter": 5199, "iter_tflops": 21.12111969795693, "iter_time": 0.9767992324829102, "loss": 0.30131614208221436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.964530716984306, "step_time": 0.8609012107849121} +{"epoch": 0, "iter": 5200, "iter_tflops": 52.91526766967755, "iter_time": 0.3898892402648925, "loss": 0.23187696933746338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.412123307711006, "step_time": 0.35935081863403323} +{"epoch": 0, "iter": 5201, "iter_tflops": 20.309911685401012, "iter_time": 0.7239605407714843, "loss": 0.16629767417907715, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.58111792461788, "step_time": 0.6813166351318358} +{"epoch": 0, "iter": 5202, "iter_tflops": 11.875585728664241, "iter_time": 1.2381346893310545, "loss": 0.2180251181125641, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 13.251751460915374, "step_time": 1.1095570793151854} +{"epoch": 0, "iter": 5203, "iter_tflops": 21.159651678521715, "iter_time": 0.6948873672485352, "loss": 0.3809736967086792, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.779902722395818, "step_time": 0.645462574005127} +{"epoch": 0, "iter": 5204, "iter_tflops": 20.923963065066655, "iter_time": 0.702714614868164, "loss": 0.2025156468153, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.599586477477747, "step_time": 0.6506125526428223} +{"epoch": 0, "iter": 5205, "iter_tflops": 15.111884805294654, "iter_time": 1.3652230529785156, "loss": 0.866005539894104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.925101953902383, "step_time": 1.295507781982422} +{"epoch": 0, "iter": 5206, "iter_tflops": 22.256609663116144, "iter_time": 0.9269647903442384, "loss": 0.7974449396133423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.013283582675793, "step_time": 0.7637388267517089} +{"epoch": 0, "iter": 5207, "iter_tflops": 36.45998781576549, "iter_time": 0.5658557434082031, "loss": 0.8701149821281433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81202258579284, "step_time": 0.5182126445770263} +{"epoch": 0, "iter": 5208, "iter_tflops": 35.336013597733604, "iter_time": 0.5838545837402342, "loss": 0.8663121461868286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.27487387918766, "step_time": 0.5390244674682617} +{"epoch": 0, "iter": 5209, "iter_tflops": 19.64061332816894, "iter_time": 1.050430206298828, "loss": 0.6690961718559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.988206550776827, "step_time": 0.9829850616455078} +{"epoch": 0, "iter": 5210, "iter_tflops": 14.608696654869588, "iter_time": 1.4122473754882812, "loss": 0.7819010019302368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.23048970429121, "step_time": 1.1973596725463866} +{"epoch": 0, "iter": 5211, "iter_tflops": 33.92174858060233, "iter_time": 0.6081966400146485, "loss": 0.8804136514663696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.023053470841525, "step_time": 0.5572499179840087} +{"epoch": 0, "iter": 5212, "iter_tflops": 38.03143848865963, "iter_time": 0.5424747085571289, "loss": 0.7992331385612488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18051906858476, "step_time": 0.500991584777832} +{"epoch": 0, "iter": 5213, "iter_tflops": 18.687211519028363, "iter_time": 1.1018012390136718, "loss": 0.14946714043617249, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 20.080974321843275, "step_time": 1.0253283767700196} +{"epoch": 0, "iter": 5214, "iter_tflops": 14.786132888288758, "iter_time": 1.392493423461914, "loss": 0.1311064064502716, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 19.250096862790297, "step_time": 1.069583854675293} +{"epoch": 0, "iter": 5215, "iter_tflops": 42.311974184880874, "iter_time": 0.48661385345458985, "loss": 0.21154984831809998, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 46.67193306514029, "step_time": 0.44115577507019044} +{"epoch": 0, "iter": 5216, "iter_tflops": 39.45532920202121, "iter_time": 0.5218456726074219, "loss": 0.11472804099321365, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 43.073536732792334, "step_time": 0.47801026725769047} +{"epoch": 0, "iter": 5217, "iter_tflops": 20.17379095331381, "iter_time": 1.0226681518554688, "loss": 0.20633521676063538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.762762296710655, "step_time": 0.9479997634887695} +{"epoch": 0, "iter": 5218, "iter_tflops": 15.50183991318926, "iter_time": 1.330880310058594, "loss": 0.12353149056434631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.08580280552792, "step_time": 1.1407341842651366} +{"epoch": 0, "iter": 5219, "iter_tflops": 48.89920123505181, "iter_time": 0.42191064453125005, "loss": 0.14064446091651917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.22783650442123, "step_time": 0.3875997009277344} +{"epoch": 0, "iter": 5220, "iter_tflops": 50.939110981984115, "iter_time": 0.40501479339599605, "loss": 0.1958664506673813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.50796128018531, "step_time": 0.3716780986785889} +{"epoch": 0, "iter": 5221, "iter_tflops": 30.750275176201246, "iter_time": 0.6709238662719728, "loss": 0.02970242314040661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.656296629317964, "step_time": 0.6317646408081055} +{"epoch": 0, "iter": 5222, "iter_tflops": 13.345921038682853, "iter_time": 1.5458725891113283, "loss": 0.06777570396661758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.39755633482455, "step_time": 1.258180980682373} +{"epoch": 0, "iter": 5223, "iter_tflops": 41.837833241825074, "iter_time": 0.49312050628662113, "loss": 0.019934197887778282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.32611861330312, "step_time": 0.4453447456359863} +{"epoch": 0, "iter": 5224, "iter_tflops": 41.80485112594033, "iter_time": 0.49350955581665035, "loss": 0.06680257618427277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.999525964965684, "step_time": 0.44850665473937995} +{"epoch": 0, "iter": 5225, "iter_tflops": 27.468135998681806, "iter_time": 0.7510918655395508, "loss": 0.4003256559371948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.376649825349503, "step_time": 0.702295654296875} +{"epoch": 0, "iter": 5226, "iter_tflops": 8.612231674856668, "iter_time": 2.395557189941406, "loss": 0.4591664969921112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.407314532502696, "step_time": 1.9823647537231444} +{"epoch": 0, "iter": 5227, "iter_tflops": 11.174365066825912, "iter_time": 1.8462877655029297, "loss": 0.4047200381755829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.859129689355095, "step_time": 1.6043926773071289} +{"epoch": 0, "iter": 5228, "iter_tflops": 34.46568279835229, "iter_time": 0.5985981369018555, "loss": 0.3560962378978729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84759564372904, "step_time": 0.5310777454376221} +{"epoch": 0, "iter": 5229, "iter_tflops": 19.49721219750231, "iter_time": 0.7813773498535156, "loss": 0.22087594866752625, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 20.984301307666204, "step_time": 0.7260036811828613} +{"epoch": 0, "iter": 5230, "iter_tflops": 22.060612113499854, "iter_time": 0.6905828323364258, "loss": 0.42115354537963867, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.682193794554955, "step_time": 0.6432968215942383} +{"epoch": 0, "iter": 5231, "iter_tflops": 25.03616745388011, "iter_time": 0.6085068740844726, "loss": 0.30410194396972656, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.881120664115123, "step_time": 0.5667427406311035} +{"epoch": 0, "iter": 5232, "iter_tflops": 22.26167782387445, "iter_time": 0.6843455429077148, "loss": 0.23960347473621368, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.028989496411143, "step_time": 0.6340125122070313} +{"epoch": 0, "iter": 5233, "iter_tflops": 29.902114014687573, "iter_time": 0.689954345703125, "loss": 0.24918590486049652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.25174406441092, "step_time": 0.6396892356872558} +{"epoch": 0, "iter": 5234, "iter_tflops": 12.12227712184465, "iter_time": 1.7019156799316406, "loss": 0.38449037075042725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.772746128654322, "step_time": 1.2300367126464844} +{"epoch": 0, "iter": 5235, "iter_tflops": 8.320715824873377, "iter_time": 2.479485412597656, "loss": 0.22515881061553955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.266873610602088, "step_time": 2.009481590270996} +{"epoch": 0, "iter": 5236, "iter_tflops": 34.730456339399126, "iter_time": 0.5940346221923829, "loss": 0.286540150642395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.725376040757716, "step_time": 0.5327538585662842} +{"epoch": 0, "iter": 5237, "iter_tflops": 20.49319077192215, "iter_time": 0.8073615188598632, "loss": 0.3452715277671814, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 22.104724300148273, "step_time": 0.7485012435913085} +{"epoch": 0, "iter": 5238, "iter_tflops": 27.121013264932444, "iter_time": 0.6100588302612304, "loss": 0.23702839016914368, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 28.879309085961257, "step_time": 0.5729158401489258} +{"epoch": 0, "iter": 5239, "iter_tflops": 28.54677642253792, "iter_time": 0.5795895614624024, "loss": 0.20917631685733795, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.223239671793294, "step_time": 0.5474401092529297} +{"epoch": 0, "iter": 5240, "iter_tflops": 31.93198879831288, "iter_time": 0.5181454162597656, "loss": 0.3275355100631714, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 33.852785869671706, "step_time": 0.4887459983825684} +{"epoch": 0, "iter": 5241, "iter_tflops": 42.569018741289646, "iter_time": 0.48465043640136724, "loss": 0.10787024348974228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.47637980803151, "step_time": 0.4439049167633057} +{"epoch": 0, "iter": 5242, "iter_tflops": 39.056903472535524, "iter_time": 0.5282316741943358, "loss": 0.0764329731464386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1609907132798, "step_time": 0.47800324249267573} +{"epoch": 0, "iter": 5243, "iter_tflops": 43.47418109632469, "iter_time": 0.4745596809387207, "loss": 0.0899064913392067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92926043122008, "step_time": 0.43044881820678715} +{"epoch": 0, "iter": 5244, "iter_tflops": 42.53699384152849, "iter_time": 0.48501531600952147, "loss": 0.15182210505008698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44756716735824, "step_time": 0.4441802825927735} +{"epoch": 0, "iter": 5245, "iter_tflops": 28.791584600417128, "iter_time": 0.7165667953491212, "loss": 0.5674572587013245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.098107351469313, "step_time": 0.6634195861816407} +{"epoch": 0, "iter": 5246, "iter_tflops": 7.666714457665411, "iter_time": 2.6909954223632813, "loss": 0.771275520324707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.862610270462456, "step_time": 2.3278800354003906} +{"epoch": 0, "iter": 5247, "iter_tflops": 13.546064845923883, "iter_time": 1.5230322418212892, "loss": 1.2005250453948975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.30124780695283, "step_time": 1.2656143722534179} +{"epoch": 0, "iter": 5248, "iter_tflops": 33.875252814773674, "iter_time": 0.6090314254760743, "loss": 0.9026390314102173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.12202216940242, "step_time": 0.5557642688751221} +{"epoch": 0, "iter": 5249, "iter_tflops": 15.16912184061509, "iter_time": 1.0231963500976562, "loss": 0.3542032241821289, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.02947351576776, "step_time": 0.968278221130371} +{"epoch": 0, "iter": 5250, "iter_tflops": 6.089172977204636, "iter_time": 2.548948791503906, "loss": 0.3083992898464203, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 7.600191698664175, "step_time": 2.0421840286254884} +{"epoch": 0, "iter": 5251, "iter_tflops": 8.5626795363617, "iter_time": 1.8126323699951172, "loss": 0.3171212673187256, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 11.498336552546414, "step_time": 1.3498465652465819} +{"epoch": 0, "iter": 5252, "iter_tflops": 22.71042855689436, "iter_time": 0.6834300842285156, "loss": 0.14228342473506927, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.361489454194412, "step_time": 0.6371117057800294} +{"epoch": 0, "iter": 5253, "iter_tflops": 12.582064188372424, "iter_time": 1.2433385314941408, "loss": 0.32768648862838745, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.36907231006563, "step_time": 1.1701459045410154} +{"epoch": 0, "iter": 5254, "iter_tflops": 11.718180206212129, "iter_time": 1.3349995422363279, "loss": 0.15187349915504456, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 14.74692454580369, "step_time": 1.0608154373168945} +{"epoch": 0, "iter": 5255, "iter_tflops": 21.63947198683771, "iter_time": 0.7229273071289063, "loss": 0.3023452162742615, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 23.242877843211772, "step_time": 0.6730562934875488} +{"epoch": 0, "iter": 5256, "iter_tflops": 24.261315283987, "iter_time": 0.644802848815918, "loss": 0.21477873623371124, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 26.099198788500022, "step_time": 0.5993963775634765} +{"epoch": 0, "iter": 5257, "iter_tflops": 23.841552701459666, "iter_time": 0.8653418579101562, "loss": 0.7649930119514465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.632037808388763, "step_time": 0.8048947830200195} +{"epoch": 0, "iter": 5258, "iter_tflops": 7.318298974009784, "iter_time": 2.8191105041503906, "loss": 0.6773333549499512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.189104199707629, "step_time": 2.5193345947265624} +{"epoch": 0, "iter": 5259, "iter_tflops": 19.797208307645175, "iter_time": 1.042121353149414, "loss": 0.6306687593460083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.868078961394588, "step_time": 0.7975502758026124} +{"epoch": 0, "iter": 5260, "iter_tflops": 46.46917204942964, "iter_time": 0.44397377014160155, "loss": 0.899684488773346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30943558905454, "step_time": 0.4100839786529541} +{"epoch": 0, "iter": 5261, "iter_tflops": 18.96035745918858, "iter_time": 0.7970338363647461, "loss": 0.28011348843574524, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 20.08033214518892, "step_time": 0.7525795059204101} +{"epoch": 0, "iter": 5262, "iter_tflops": 9.858059688183525, "iter_time": 1.532963577270508, "loss": 0.2501678168773651, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 12.674254876693073, "step_time": 1.1923420028686524} +{"epoch": 0, "iter": 5263, "iter_tflops": 22.03052765682907, "iter_time": 0.6859593505859376, "loss": 0.2242298126220703, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 23.644293876895492, "step_time": 0.6391413726806641} +{"epoch": 0, "iter": 5264, "iter_tflops": 24.472228575351973, "iter_time": 0.617518196105957, "loss": 0.28926604986190796, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.234758513739138, "step_time": 0.576031467437744} +{"epoch": 0, "iter": 5265, "iter_tflops": 25.876968556310032, "iter_time": 0.7972762908935548, "loss": 0.7421251535415649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.82481751786803, "step_time": 0.7414637489318847} +{"epoch": 0, "iter": 5266, "iter_tflops": 8.420150311543457, "iter_time": 2.450204895019531, "loss": 0.7709400057792664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.774848806752926, "step_time": 2.1106304473876953} +{"epoch": 0, "iter": 5267, "iter_tflops": 13.54187719767335, "iter_time": 1.5235032196044924, "loss": 0.9409330487251282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.13442669844543, "step_time": 1.3631896286010743} +{"epoch": 0, "iter": 5268, "iter_tflops": 33.68021070022012, "iter_time": 0.6125583267211915, "loss": 0.8928118348121643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79010070225556, "step_time": 0.5057867755889893} +{"epoch": 0, "iter": 5269, "iter_tflops": 19.074889291412546, "iter_time": 0.7259449615478516, "loss": 0.17594429850578308, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 20.282989890345764, "step_time": 0.6827060432434082} +{"epoch": 0, "iter": 5270, "iter_tflops": 20.902273339966197, "iter_time": 0.6624791259765624, "loss": 0.27357035875320435, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.626005817810903, "step_time": 0.6120090255737305} +{"epoch": 0, "iter": 5271, "iter_tflops": 21.06927126914977, "iter_time": 0.6572282257080078, "loss": 0.15680073201656342, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.686774493805192, "step_time": 0.6103697013854981} +{"epoch": 0, "iter": 5272, "iter_tflops": 20.708097566858918, "iter_time": 0.6686910629272461, "loss": 0.2700277268886566, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.309208084511138, "step_time": 0.6206997451782227} +{"epoch": 0, "iter": 5273, "iter_tflops": 19.070812333538438, "iter_time": 1.0818151397705078, "loss": 0.004248473327606916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.995378473787724, "step_time": 1.031793098449707} +{"epoch": 0, "iter": 5274, "iter_tflops": 19.661669203798464, "iter_time": 1.049305290222168, "loss": 0.029920196160674095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.314762716849977, "step_time": 0.8485007133483886} +{"epoch": 0, "iter": 5275, "iter_tflops": 54.284090590165135, "iter_time": 0.3800578269958496, "loss": 0.006218206137418747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.75770278046551, "step_time": 0.34524575996398926} +{"epoch": 0, "iter": 5276, "iter_tflops": 55.405306727293855, "iter_time": 0.3723667411804199, "loss": 0.006444843485951424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.6008459621791, "step_time": 0.34044233512878425} +{"epoch": 0, "iter": 5277, "iter_tflops": 29.548525770928087, "iter_time": 0.6982105865478516, "loss": 0.23509040474891663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.504868536688114, "step_time": 0.6548541374206543} +{"epoch": 0, "iter": 5278, "iter_tflops": 17.73221160962846, "iter_time": 1.1634811248779298, "loss": 0.1477370262145996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.414840520180597, "step_time": 0.8811118526458741} +{"epoch": 0, "iter": 5279, "iter_tflops": 37.660594956266486, "iter_time": 0.5478164520263673, "loss": 0.20404326915740967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.126796828228635, "step_time": 0.5016460094451904} +{"epoch": 0, "iter": 5280, "iter_tflops": 40.49984431631589, "iter_time": 0.5094116744995117, "loss": 0.14317278563976288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7237763255318, "step_time": 0.4613003463745117} +{"epoch": 0, "iter": 5281, "iter_tflops": 21.76347866023868, "iter_time": 0.9479685592651367, "loss": 0.049795351922512054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.422170273070893, "step_time": 0.8808361167907715} +{"epoch": 0, "iter": 5282, "iter_tflops": 21.264952367741767, "iter_time": 0.9701923217773438, "loss": 0.02969290316104889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.218031265563255, "step_time": 0.7869047565460205} +{"epoch": 0, "iter": 5283, "iter_tflops": 51.10361964389871, "iter_time": 0.40371100234985346, "loss": 0.05378730595111847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.86395290857527, "step_time": 0.3693095893859863} +{"epoch": 0, "iter": 5284, "iter_tflops": 60.442993393706885, "iter_time": 0.34133143234252933, "loss": 0.06585104763507843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.78359934158459, "step_time": 0.3136206245422364} +{"epoch": 0, "iter": 5285, "iter_tflops": 22.96169212171217, "iter_time": 0.8985005722045899, "loss": 0.7842668890953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.127481062770617, "step_time": 0.8550869216918946} +{"epoch": 0, "iter": 5286, "iter_tflops": 16.001853458356784, "iter_time": 1.289293991088867, "loss": 0.9296522736549377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.025343156959305, "step_time": 0.981248836517334} +{"epoch": 0, "iter": 5287, "iter_tflops": 37.014865107303415, "iter_time": 0.5573731918334961, "loss": 0.7929859161376953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.38417702000752, "step_time": 0.5108707180023194} +{"epoch": 0, "iter": 5288, "iter_tflops": 37.332111319998766, "iter_time": 0.5526366653442383, "loss": 0.7328682541847229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3874845479009, "step_time": 0.5108288803100586} +{"epoch": 0, "iter": 5289, "iter_tflops": 19.184110680839787, "iter_time": 1.0754261093139648, "loss": 0.3281678259372711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.329944114741572, "step_time": 1.0148130950927734} +{"epoch": 0, "iter": 5290, "iter_tflops": 15.998783220533122, "iter_time": 1.2895414123535156, "loss": 0.3072341978549957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.703376264906957, "step_time": 1.1030678749084473} +{"epoch": 0, "iter": 5291, "iter_tflops": 45.1379693365855, "iter_time": 0.4570673828125, "loss": 0.3438901901245117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.01413910244827, "step_time": 0.42092126655578616} +{"epoch": 0, "iter": 5292, "iter_tflops": 49.42936088866282, "iter_time": 0.417385398864746, "loss": 0.3805031180381775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.31493356884497, "step_time": 0.3869665050506592} +{"epoch": 0, "iter": 5293, "iter_tflops": 44.03382632711178, "iter_time": 0.4685282936096192, "loss": 0.2549920380115509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.06172456305602, "step_time": 0.4292624473571777} +{"epoch": 0, "iter": 5294, "iter_tflops": 27.479680594303414, "iter_time": 0.7507763214111328, "loss": 0.1319248527288437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.41252048137086, "step_time": 0.6174659442901611} +{"epoch": 0, "iter": 5295, "iter_tflops": 48.1911785942356, "iter_time": 0.4281093368530273, "loss": 0.1889541745185852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4474324948586, "step_time": 0.39336708259582515} +{"epoch": 0, "iter": 5296, "iter_tflops": 49.92081053497672, "iter_time": 0.4132764129638672, "loss": 0.18979412317276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.36833869796135, "step_time": 0.37946889686584473} +{"epoch": 0, "iter": 5297, "iter_tflops": 28.912614703442788, "iter_time": 0.7135671997070313, "loss": 0.554897129535675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.562060251143638, "step_time": 0.6750557174682617} +{"epoch": 0, "iter": 5298, "iter_tflops": 11.96106082474847, "iter_time": 1.724854827880859, "loss": 0.5391130447387695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.85166484261541, "step_time": 1.3015095710754396} +{"epoch": 0, "iter": 5299, "iter_tflops": 41.05303273075369, "iter_time": 0.5025473670959472, "loss": 0.6430009603500366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80633599573112, "step_time": 0.46045035934448236} +{"epoch": 0, "iter": 5300, "iter_tflops": 36.77529618337387, "iter_time": 0.5610041427612305, "loss": 0.8278897404670715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.20825955218501, "step_time": 0.5131058578491211} +{"epoch": 0, "iter": 5301, "iter_tflops": 28.167779790070643, "iter_time": 0.7324359130859375, "loss": 0.1891837865114212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.691464662737513, "step_time": 0.6722094802856445} +{"epoch": 0, "iter": 5302, "iter_tflops": 44.0035831941984, "iter_time": 0.4688503074645996, "loss": 0.09159423410892487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37611265991902, "step_time": 0.41783551597595214} +{"epoch": 0, "iter": 5303, "iter_tflops": 46.6035990475017, "iter_time": 0.44269313812255856, "loss": 0.15968935191631317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96919332091869, "step_time": 0.4047757511138916} +{"epoch": 0, "iter": 5304, "iter_tflops": 51.78574229641746, "iter_time": 0.3983933143615722, "loss": 0.12635083496570587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.5375531964438, "step_time": 0.3649095573425293} +{"epoch": 0, "iter": 5305, "iter_tflops": 37.660577647906486, "iter_time": 0.5478167037963867, "loss": 0.16063831746578217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.801480677326325, "step_time": 0.5056457061767579} +{"epoch": 0, "iter": 5306, "iter_tflops": 11.34876566569319, "iter_time": 1.8179151916503908, "loss": 0.12001398950815201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.383600430011997, "step_time": 1.5415204315185547} +{"epoch": 0, "iter": 5307, "iter_tflops": 13.03878313980216, "iter_time": 1.5822867279052737, "loss": 0.16537439823150635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.995186208008876, "step_time": 1.2898314056396485} +{"epoch": 0, "iter": 5308, "iter_tflops": 18.995208027409568, "iter_time": 1.0861209564208985, "loss": 0.14454296231269836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.934865454407653, "step_time": 0.7954964542388916} +{"epoch": 0, "iter": 5309, "iter_tflops": 22.63290391667252, "iter_time": 0.7183469619750977, "loss": 0.3865073025226593, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 24.034036670121203, "step_time": 0.6764688758850099} +{"epoch": 0, "iter": 5310, "iter_tflops": 11.411023848784222, "iter_time": 1.4247869415283205, "loss": 0.2941909730434418, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.224489217968706, "step_time": 1.2294068603515624} +{"epoch": 0, "iter": 5311, "iter_tflops": 23.43533549811116, "iter_time": 0.6937505874633789, "loss": 0.24329087138175964, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 25.34404772070463, "step_time": 0.6415028076171874} +{"epoch": 0, "iter": 5312, "iter_tflops": 25.217359984861087, "iter_time": 0.6447256088256836, "loss": 0.3026892840862274, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.149903718292023, "step_time": 0.5988337173461914} +{"epoch": 0, "iter": 5313, "iter_tflops": 19.682130186663684, "iter_time": 1.0482144622802734, "loss": 0.19070209562778473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.029018331123336, "step_time": 0.9810773468017577} +{"epoch": 0, "iter": 5314, "iter_tflops": 14.701459895219294, "iter_time": 1.4033363800048828, "loss": 0.2973599433898926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.576953796500188, "step_time": 1.1737581920623779} +{"epoch": 0, "iter": 5315, "iter_tflops": 40.96717907320523, "iter_time": 0.5036005401611328, "loss": 0.33804816007614136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0618633132942, "step_time": 0.45783933448791503} +{"epoch": 0, "iter": 5316, "iter_tflops": 44.748941497884324, "iter_time": 0.4610409278869629, "loss": 0.24357694387435913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1233172061524, "step_time": 0.4199857559204102} +{"epoch": 0, "iter": 5317, "iter_tflops": 20.035203792346643, "iter_time": 1.0297421340942383, "loss": 0.8150674104690552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.34963904143595, "step_time": 0.9663439025878906} +{"epoch": 0, "iter": 5318, "iter_tflops": 15.329740189766486, "iter_time": 1.3458214721679687, "loss": 0.933742880821228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.252401738675452, "step_time": 1.1303221244812012} +{"epoch": 0, "iter": 5319, "iter_tflops": 34.41471874620074, "iter_time": 0.5994845886230469, "loss": 0.7997076511383057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.351039457332234, "step_time": 0.5523566093444824} +{"epoch": 0, "iter": 5320, "iter_tflops": 41.19196501541383, "iter_time": 0.5008523750305176, "loss": 0.9790419340133667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80222302347215, "step_time": 0.4604926300048829} +{"epoch": 0, "iter": 5321, "iter_tflops": 28.28832641458904, "iter_time": 0.7293147430419923, "loss": 0.4189096689224243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.195279893105166, "step_time": 0.6613530502319336} +{"epoch": 0, "iter": 5322, "iter_tflops": 44.5304279861004, "iter_time": 0.4633032836914062, "loss": 0.3942108154296875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13692934121445, "step_time": 0.41986940956115715} +{"epoch": 0, "iter": 5323, "iter_tflops": 50.51742695359981, "iter_time": 0.40839557266235355, "loss": 0.535907506942749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.76352292189984, "step_time": 0.37673057556152345} +{"epoch": 0, "iter": 5324, "iter_tflops": 48.12424496283324, "iter_time": 0.42870477294921877, "loss": 0.3711055517196655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.97934237566582, "step_time": 0.3969094753265381} +{"epoch": 0, "iter": 5325, "iter_tflops": 48.15363485340746, "iter_time": 0.4284431190490723, "loss": 0.3581518828868866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.810692994095625, "step_time": 0.3906612911224365} +{"epoch": 0, "iter": 5326, "iter_tflops": 37.672857993036295, "iter_time": 0.5476381301879883, "loss": 0.3402785360813141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.729040852580376, "step_time": 0.4944061279296875} +{"epoch": 0, "iter": 5327, "iter_tflops": 40.225858639091726, "iter_time": 0.5128813705444336, "loss": 0.31859493255615234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.208826456985186, "step_time": 0.46667362976074217} +{"epoch": 0, "iter": 5328, "iter_tflops": 40.5357235299341, "iter_time": 0.5089607810974122, "loss": 0.3101115822792053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.462089569379565, "step_time": 0.4640153827667236} +{"epoch": 0, "iter": 5329, "iter_tflops": 18.577971916767293, "iter_time": 1.1105137634277344, "loss": 0.7751843333244324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.86117317429146, "step_time": 1.0387650985717773} +{"epoch": 0, "iter": 5330, "iter_tflops": 22.268507179476526, "iter_time": 0.9264695358276366, "loss": 0.8689807057380676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.198790728255027, "step_time": 0.7585298080444337} +{"epoch": 0, "iter": 5331, "iter_tflops": 46.21332779720122, "iter_time": 0.44643167877197265, "loss": 0.9784230589866638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77513897702971, "step_time": 0.4144859046936035} +{"epoch": 0, "iter": 5332, "iter_tflops": 48.07618391587772, "iter_time": 0.4291333427429199, "loss": 1.0017120838165283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.038756044039346, "step_time": 0.3964563159942627} +{"epoch": 0, "iter": 5333, "iter_tflops": 28.72438181472624, "iter_time": 0.7182432556152344, "loss": 0.2743271291255951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.403308870486335, "step_time": 0.6785805320739746} +{"epoch": 0, "iter": 5334, "iter_tflops": 37.47667409436807, "iter_time": 0.5505049209594726, "loss": 0.3825814723968506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54941458748129, "step_time": 0.48487373352050783} +{"epoch": 0, "iter": 5335, "iter_tflops": 48.045266086468175, "iter_time": 0.429409496307373, "loss": 0.20619776844978333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.289650776227006, "step_time": 0.39455405044555664} +{"epoch": 0, "iter": 5336, "iter_tflops": 50.65431244574219, "iter_time": 0.40729194641113287, "loss": 0.25691381096839905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.796669700096224, "step_time": 0.3765026893615723} +{"epoch": 0, "iter": 5337, "iter_tflops": 21.028757083678276, "iter_time": 0.7108647003173829, "loss": 0.24270185828208923, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.381527537228582, "step_time": 0.667899055480957} +{"epoch": 0, "iter": 5338, "iter_tflops": 10.506300454981956, "iter_time": 1.422822540283203, "loss": 0.33791661262512207, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 12.270089258666703, "step_time": 1.2182960357666013} +{"epoch": 0, "iter": 5339, "iter_tflops": 22.1440088168739, "iter_time": 0.675063003540039, "loss": 0.3512137234210968, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.916531195146444, "step_time": 0.6250321578979492} +{"epoch": 0, "iter": 5340, "iter_tflops": 23.2915259292941, "iter_time": 0.6418042831420899, "loss": 0.3589218854904175, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.048663514603952, "step_time": 0.5967823829650879} +{"epoch": 0, "iter": 5341, "iter_tflops": 21.922601015217875, "iter_time": 0.9410878524780272, "loss": 0.7454793453216553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.308345178417287, "step_time": 0.8851376342773437} +{"epoch": 0, "iter": 5342, "iter_tflops": 9.85024961691041, "iter_time": 2.0944741821289066, "loss": 0.8474489450454712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.825071248920711, "step_time": 1.7446908416748046} +{"epoch": 0, "iter": 5343, "iter_tflops": 13.600021866191884, "iter_time": 1.516989730834961, "loss": 0.6553078889846802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.635421593459954, "step_time": 1.319509895324707} +{"epoch": 0, "iter": 5344, "iter_tflops": 25.172200360125608, "iter_time": 0.8195983352661134, "loss": 0.8776580095291138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.061340696598506, "step_time": 0.68629984664917} +{"epoch": 0, "iter": 5345, "iter_tflops": 20.183893981067072, "iter_time": 0.7143282241821289, "loss": 0.2408595085144043, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 21.412937515289148, "step_time": 0.6733277549743651} +{"epoch": 0, "iter": 5346, "iter_tflops": 9.467526212281822, "iter_time": 1.5228819885253908, "loss": 0.20413252711296082, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 11.64978695169492, "step_time": 1.2376127738952638} +{"epoch": 0, "iter": 5347, "iter_tflops": 23.362270565982936, "iter_time": 0.6171457138061524, "loss": 0.23695090413093567, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.993533310117567, "step_time": 0.5768662223815918} +{"epoch": 0, "iter": 5348, "iter_tflops": 24.748276127573398, "iter_time": 0.5825830078125, "loss": 0.2637310028076172, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 26.22954482019693, "step_time": 0.5496826286315918} +{"epoch": 0, "iter": 5349, "iter_tflops": 18.303753490752772, "iter_time": 1.1271509704589844, "loss": 0.6416805386543274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.046652972299448, "step_time": 1.0831873474121094} +{"epoch": 0, "iter": 5350, "iter_tflops": 21.990984896711794, "iter_time": 0.9381614151000975, "loss": 0.5250293612480164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.988115757801744, "step_time": 0.8256362228393554} +{"epoch": 0, "iter": 5351, "iter_tflops": 44.80194986660432, "iter_time": 0.4604954376220703, "loss": 0.45066073536872864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.039601860549425, "step_time": 0.4294601268768311} +{"epoch": 0, "iter": 5352, "iter_tflops": 48.434813362215955, "iter_time": 0.42595587921142575, "loss": 0.7593503594398499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3144276796804, "step_time": 0.39436718368530277} +{"epoch": 0, "iter": 5353, "iter_tflops": 5.508591561196488, "iter_time": 1.1173473968505858, "loss": 0.00570487417280674, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 5.719682683170498, "step_time": 1.0761104736328126} +{"epoch": 0, "iter": 5354, "iter_tflops": 4.650880029749183, "iter_time": 1.3234076995849609, "loss": 0.0051614404655992985, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 6.039745520731112, "step_time": 1.019084400177002} +{"epoch": 0, "iter": 5355, "iter_tflops": 16.716676587051765, "iter_time": 0.3681958198547363, "loss": 0.0068724388256669044, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 18.540434624262097, "step_time": 0.3319776782989502} +{"epoch": 0, "iter": 5356, "iter_tflops": 18.55751598758974, "iter_time": 0.3316721076965332, "loss": 0.001806833315640688, "lr": 3e-05, "seqlen": 2496.0, "step_tflops": 20.35767499623635, "step_time": 0.3023434867858886} +{"epoch": 0, "iter": 5357, "iter_tflops": 21.32535492654529, "iter_time": 0.9674443206787109, "loss": 0.39406299591064453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.2516783628136, "step_time": 0.9271702194213867} +{"epoch": 0, "iter": 5358, "iter_tflops": 13.882014709993934, "iter_time": 1.486174301147461, "loss": 0.26650741696357727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.45135583848095, "step_time": 1.2540664558410646} +{"epoch": 0, "iter": 5359, "iter_tflops": 37.57052519291548, "iter_time": 0.5491297607421876, "loss": 0.28555604815483093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.576145250156614, "step_time": 0.49622429847717286} +{"epoch": 0, "iter": 5360, "iter_tflops": 42.980429302111276, "iter_time": 0.48001134109497073, "loss": 0.24734070897102356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07866540132065, "step_time": 0.43822596359252924} +{"epoch": 0, "iter": 5361, "iter_tflops": 23.76545018280214, "iter_time": 0.8681128845214845, "loss": 0.03816996142268181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.281102902687373, "step_time": 0.8160677795410156} +{"epoch": 0, "iter": 5362, "iter_tflops": 32.537171820833734, "iter_time": 0.6340776519775392, "loss": 0.015594521537423134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92379235511079, "step_time": 0.504134449005127} +{"epoch": 0, "iter": 5363, "iter_tflops": 55.24238156322609, "iter_time": 0.37346495437622074, "loss": 0.03976612165570259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.63521093002128, "step_time": 0.34024938964843754} +{"epoch": 0, "iter": 5364, "iter_tflops": 55.98037209322768, "iter_time": 0.3685415573120117, "loss": 0.03830898925662041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.71376571376128, "step_time": 0.33980915641784676} +{"epoch": 0, "iter": 5365, "iter_tflops": 38.164249921334545, "iter_time": 0.5405868988037109, "loss": 0.6769644021987915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.124756750451226, "step_time": 0.5016708946228027} +{"epoch": 0, "iter": 5366, "iter_tflops": 12.40466520778625, "iter_time": 1.6631721343994141, "loss": 0.5590870976448059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.1078014765019, "step_time": 1.2808137435913085} +{"epoch": 0, "iter": 5367, "iter_tflops": 14.25006488068814, "iter_time": 1.4477894439697265, "loss": 0.4503374695777893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.831885410559154, "step_time": 1.3031356010437012} +{"epoch": 0, "iter": 5368, "iter_tflops": 45.0229393492615, "iter_time": 0.4582351531982422, "loss": 0.5940324068069458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.805041303924796, "step_time": 0.4227246398925781} +{"epoch": 0, "iter": 5369, "iter_tflops": 22.23872015007995, "iter_time": 0.7218635864257813, "loss": 0.21622315049171448, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 23.784489023909998, "step_time": 0.6749492187499999} +{"epoch": 0, "iter": 5370, "iter_tflops": 9.811178156154746, "iter_time": 1.636227783203125, "loss": 0.26373597979545593, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 11.358077574230897, "step_time": 1.4133837509155276} +{"epoch": 0, "iter": 5371, "iter_tflops": 24.781478096836818, "iter_time": 0.6477951889038086, "loss": 0.281588077545166, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.772289226880716, "step_time": 0.5996245651245118} +{"epoch": 0, "iter": 5372, "iter_tflops": 24.755304904401402, "iter_time": 0.6484800872802734, "loss": 0.3317452073097229, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.581993430921333, "step_time": 0.6039171714782714} +{"epoch": 0, "iter": 5373, "iter_tflops": 19.497997336902877, "iter_time": 1.0581134643554688, "loss": 0.8710794448852539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.78899898865299, "step_time": 0.9924043731689454} +{"epoch": 0, "iter": 5374, "iter_tflops": 22.861714859246806, "iter_time": 0.9024298324584962, "loss": 0.9789812564849854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.597334768891514, "step_time": 0.7475755786895752} +{"epoch": 0, "iter": 5375, "iter_tflops": 43.18682408609822, "iter_time": 0.47771731185913086, "loss": 0.6428513526916504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57034811426315, "step_time": 0.44300921821594247} +{"epoch": 0, "iter": 5376, "iter_tflops": 45.098493583762306, "iter_time": 0.45746746444702147, "loss": 0.8065339922904968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.53521349873169, "step_time": 0.4250747451782227} +{"epoch": 0, "iter": 5377, "iter_tflops": 41.92459576125964, "iter_time": 0.49209999847412106, "loss": 0.7105658054351807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.496154501890224, "step_time": 0.45346895217895505} +{"epoch": 0, "iter": 5378, "iter_tflops": 39.03771231067038, "iter_time": 0.5284913558959962, "loss": 0.7853553295135498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84929911082183, "step_time": 0.4929854011535645} +{"epoch": 0, "iter": 5379, "iter_tflops": 42.16795318773585, "iter_time": 0.4892600173950195, "loss": 0.6835366487503052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.496301660257124, "step_time": 0.4534674854278565} +{"epoch": 0, "iter": 5380, "iter_tflops": 45.56826153544445, "iter_time": 0.45275138473510745, "loss": 0.8000347018241882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.058814321336, "step_time": 0.42053795623779294} +{"epoch": 0, "iter": 5381, "iter_tflops": 37.98166021243305, "iter_time": 0.5431856689453126, "loss": 0.09407126158475876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.05268932601544, "step_time": 0.5025515708923339} +{"epoch": 0, "iter": 5382, "iter_tflops": 40.463249615507145, "iter_time": 0.5098723831176758, "loss": 0.08066781610250473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4592457808526, "step_time": 0.46404506301879883} +{"epoch": 0, "iter": 5383, "iter_tflops": 40.218719414768614, "iter_time": 0.512972412109375, "loss": 0.09375765919685364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.312151588179034, "step_time": 0.46558546066284173} +{"epoch": 0, "iter": 5384, "iter_tflops": 47.18091824192607, "iter_time": 0.43727621841430664, "loss": 0.10969240963459015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71210151961541, "step_time": 0.39896064758300787} +{"epoch": 0, "iter": 5385, "iter_tflops": 14.920905075481693, "iter_time": 1.382697189331055, "loss": 0.017724549397826195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.568375271358851, "step_time": 1.3251924591064452} +{"epoch": 0, "iter": 5386, "iter_tflops": 19.62028678794743, "iter_time": 1.0515184478759765, "loss": 0.04160156846046448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.407291940698382, "step_time": 0.8452840061187745} +{"epoch": 0, "iter": 5387, "iter_tflops": 42.56440878816568, "iter_time": 0.4847029266357422, "loss": 0.03998122364282608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.17231581853535, "step_time": 0.4373559608459473} +{"epoch": 0, "iter": 5388, "iter_tflops": 45.556667978364445, "iter_time": 0.45286660385131833, "loss": 0.040657613426446915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22676944499306, "step_time": 0.41075891876220705} +{"epoch": 0, "iter": 5389, "iter_tflops": 32.141366939204815, "iter_time": 0.6418860015869141, "loss": 0.2989841103553772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.68433801287703, "step_time": 0.5781554222106933} +{"epoch": 0, "iter": 5390, "iter_tflops": 40.18887198524566, "iter_time": 0.5133533859252929, "loss": 0.38490375876426697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03765670556516, "step_time": 0.4684875411987305} +{"epoch": 0, "iter": 5391, "iter_tflops": 42.27283486953418, "iter_time": 0.4880461311340332, "loss": 0.24530355632305145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1735434959729, "step_time": 0.44681633567810064} +{"epoch": 0, "iter": 5392, "iter_tflops": 44.10838962414934, "iter_time": 0.4677362670898437, "loss": 0.28575366735458374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0989466861952, "step_time": 0.4289302558898926} +{"epoch": 0, "iter": 5393, "iter_tflops": 20.414825355805753, "iter_time": 1.0105936813354492, "loss": 0.8866730332374573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.977756244393483, "step_time": 0.9387261047363282} +{"epoch": 0, "iter": 5394, "iter_tflops": 17.772499179920874, "iter_time": 1.1608436889648437, "loss": 0.9194309115409851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.215538691555757, "step_time": 0.9724520225524902} +{"epoch": 0, "iter": 5395, "iter_tflops": 36.1119431862344, "iter_time": 0.5713094253540039, "loss": 0.8957461714744568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46476900566389, "step_time": 0.5227724380493164} +{"epoch": 0, "iter": 5396, "iter_tflops": 41.19426513449032, "iter_time": 0.5008244094848633, "loss": 0.920629620552063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67684678467894, "step_time": 0.46178490638732905} +{"epoch": 0, "iter": 5397, "iter_tflops": 33.83344363458357, "iter_time": 0.6097840270996093, "loss": 0.623662531375885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.38031481952891, "step_time": 0.5519240169525146} +{"epoch": 0, "iter": 5398, "iter_tflops": 36.807737999296904, "iter_time": 0.5605096817016602, "loss": 0.5363771319389343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18933578010589, "step_time": 0.5133474617004394} +{"epoch": 0, "iter": 5399, "iter_tflops": 43.394143139996714, "iter_time": 0.4754349784851074, "loss": 0.5968411564826965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27797558325192, "step_time": 0.43637853050231934} +{"epoch": 0, "iter": 5400, "iter_tflops": 39.32134767466623, "iter_time": 0.5246792068481445, "loss": 0.4958784282207489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90566751379251, "step_time": 0.4808477458953858} +{"epoch": 0, "iter": 5401, "iter_tflops": 15.999094829552167, "iter_time": 1.2895162963867186, "loss": 0.006366212386637926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.931219533022766, "step_time": 1.2185237731933594} +{"epoch": 0, "iter": 5402, "iter_tflops": 29.156939463120867, "iter_time": 0.707587760925293, "loss": 0.005320632364600897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85385777131249, "step_time": 0.4704510517120361} +{"epoch": 0, "iter": 5403, "iter_tflops": 50.710142496825874, "iter_time": 0.4068435325622558, "loss": 0.001409587450325489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.600051884749305, "step_time": 0.37106248664855956} +{"epoch": 0, "iter": 5404, "iter_tflops": 52.915423505608494, "iter_time": 0.38988809204101565, "loss": 0.002364776097238064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.422866131887695, "step_time": 0.3592835903167725} +{"epoch": 0, "iter": 5405, "iter_tflops": 29.039065114688402, "iter_time": 0.710459976196289, "loss": 0.17878645658493042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.723890651564073, "step_time": 0.6715000305175781} +{"epoch": 0, "iter": 5406, "iter_tflops": 13.612812379030883, "iter_time": 1.5155643768310547, "loss": 0.13808172941207886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.232292536084405, "step_time": 1.197234405517578} +{"epoch": 0, "iter": 5407, "iter_tflops": 46.39623609565635, "iter_time": 0.44467170715332033, "loss": 0.13317689299583435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48930467315313, "step_time": 0.408623046875} +{"epoch": 0, "iter": 5408, "iter_tflops": 48.59518187209923, "iter_time": 0.4245501861572265, "loss": 0.14120519161224365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.430595227398236, "step_time": 0.3934934062957764} +{"epoch": 0, "iter": 5409, "iter_tflops": 27.938328442535123, "iter_time": 0.7384512481689454, "loss": 0.7990831136703491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.57078370424368, "step_time": 0.697685043334961} +{"epoch": 0, "iter": 5410, "iter_tflops": 9.599618879729142, "iter_time": 2.1491575622558594, "loss": 1.0404407978057861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.647795100856753, "step_time": 1.6312008018493653} +{"epoch": 0, "iter": 5411, "iter_tflops": 10.754099835766995, "iter_time": 1.9184398345947264, "loss": 0.6820964813232422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.115724728507404, "step_time": 1.7028361053466798} +{"epoch": 0, "iter": 5412, "iter_tflops": 32.86072071218122, "iter_time": 0.6278344802856445, "loss": 0.8922221660614014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.344094169071816, "step_time": 0.5113782806396484} +{"epoch": 0, "iter": 5413, "iter_tflops": 14.087545083461709, "iter_time": 1.0640218200683593, "loss": 0.2971371114253998, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.929607421618767, "step_time": 1.0040086746215822} +{"epoch": 0, "iter": 5414, "iter_tflops": 8.709374673080786, "iter_time": 1.7210713653564456, "loss": 0.19342225790023804, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 9.532732033935472, "step_time": 1.5724196701049806} +{"epoch": 0, "iter": 5415, "iter_tflops": 8.01883671106939, "iter_time": 1.869280532836914, "loss": 0.2924579083919525, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 10.963865817060107, "step_time": 1.3671688079833986} +{"epoch": 0, "iter": 5416, "iter_tflops": 24.583173163983894, "iter_time": 0.6097445297241212, "loss": 0.27678927779197693, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.118298523843542, "step_time": 0.5739062728881836} +{"epoch": 0, "iter": 5417, "iter_tflops": 20.069914342080658, "iter_time": 0.7978282852172851, "loss": 0.251293420791626, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 21.207328411711774, "step_time": 0.7550383071899414} +{"epoch": 0, "iter": 5418, "iter_tflops": 14.85777308918814, "iter_time": 1.077708297729492, "loss": 0.2867918014526367, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 16.522681357194543, "step_time": 0.9691130027770996} +{"epoch": 0, "iter": 5419, "iter_tflops": 28.48439420906207, "iter_time": 0.5621444931030274, "loss": 0.22845183312892914, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.399028690697733, "step_time": 0.5267387161254883} +{"epoch": 0, "iter": 5420, "iter_tflops": 27.01320491807402, "iter_time": 0.5927599258422851, "loss": 0.2455112636089325, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.59707922662915, "step_time": 0.5599293975830079} +{"epoch": 0, "iter": 5421, "iter_tflops": 29.14566960872559, "iter_time": 0.7078613662719727, "loss": 0.7832762598991394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.01777891831059, "step_time": 0.6651376800537109} +{"epoch": 0, "iter": 5422, "iter_tflops": 16.961363444503206, "iter_time": 1.2163582000732422, "loss": 1.1478731632232666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.015158314159923, "step_time": 0.9371312808990477} +{"epoch": 0, "iter": 5423, "iter_tflops": 40.80592970374212, "iter_time": 0.505590576171875, "loss": 1.0382486581802368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96169831862624, "step_time": 0.46929700851440426} +{"epoch": 0, "iter": 5424, "iter_tflops": 44.14005625227969, "iter_time": 0.46740070724487304, "loss": 0.8112050294876099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62008384669607, "step_time": 0.43324353599548343} +{"epoch": 0, "iter": 5425, "iter_tflops": 34.51991455037387, "iter_time": 0.5976577224731445, "loss": 0.4161028563976288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.7889344475998, "step_time": 0.560796169281006} +{"epoch": 0, "iter": 5426, "iter_tflops": 9.908306817813594, "iter_time": 2.082201721191406, "loss": 0.518913745880127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.829699417470351, "step_time": 1.4917962341308593} +{"epoch": 0, "iter": 5427, "iter_tflops": 11.368215093857083, "iter_time": 1.8148049926757812, "loss": 0.5229718089103699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.274736641350172, "step_time": 1.554162170410156} +{"epoch": 0, "iter": 5428, "iter_tflops": 26.92660033932491, "iter_time": 0.7661974868774415, "loss": 0.3918488621711731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.57466362827693, "step_time": 0.6333478603363037} +{"epoch": 0, "iter": 5429, "iter_tflops": 17.436979952182476, "iter_time": 0.8151658935546875, "loss": 0.3013933002948761, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 18.410771222992054, "step_time": 0.7720497512817383} +{"epoch": 0, "iter": 5430, "iter_tflops": 9.944888328412329, "iter_time": 1.4292801361083987, "loss": 0.11431333422660828, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 11.736286182052353, "step_time": 1.2111183319091796} +{"epoch": 0, "iter": 5431, "iter_tflops": 22.21473203133669, "iter_time": 0.6398470764160157, "loss": 0.32111379504203796, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 23.970799413022192, "step_time": 0.592972770690918} +{"epoch": 0, "iter": 5432, "iter_tflops": 22.543987654947724, "iter_time": 0.6305020904541017, "loss": 0.23720525205135345, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 24.120005588334497, "step_time": 0.5893046455383301} +{"epoch": 0, "iter": 5433, "iter_tflops": 19.093592842785647, "iter_time": 1.080524429321289, "loss": 0.4151114225387573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.418824475720026, "step_time": 1.010395751953125} +{"epoch": 0, "iter": 5434, "iter_tflops": 17.613006393469156, "iter_time": 1.1713555908203126, "loss": 0.39031773805618286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.758198600916042, "step_time": 0.9481986026763916} +{"epoch": 0, "iter": 5435, "iter_tflops": 42.035300516249094, "iter_time": 0.4908039970397949, "loss": 0.4203439950942993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.957382944049925, "step_time": 0.4489179363250732} +{"epoch": 0, "iter": 5436, "iter_tflops": 41.318870589306606, "iter_time": 0.4993140716552735, "loss": 0.47102564573287964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45574382279048, "step_time": 0.45387209129333494} +{"epoch": 0, "iter": 5437, "iter_tflops": 26.387795218814006, "iter_time": 0.7818422622680664, "loss": 0.056902751326560974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.720695840397536, "step_time": 0.7183354339599609} +{"epoch": 0, "iter": 5438, "iter_tflops": 13.774895636642164, "iter_time": 1.4977313842773436, "loss": 0.05935656651854515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.557940586387268, "step_time": 1.1750292358398438} +{"epoch": 0, "iter": 5439, "iter_tflops": 44.242008770391465, "iter_time": 0.46632361602783207, "loss": 0.04821847751736641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80767295633396, "step_time": 0.4227018470764161} +{"epoch": 0, "iter": 5440, "iter_tflops": 44.764622564694086, "iter_time": 0.4608794250488281, "loss": 0.062581866979599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.829415753156866, "step_time": 0.42251362609863274} +{"epoch": 0, "iter": 5441, "iter_tflops": 33.386074016060796, "iter_time": 0.5140311889648437, "loss": 0.056008558720350266, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 37.13838731817302, "step_time": 0.4620955448150635} +{"epoch": 0, "iter": 5442, "iter_tflops": 39.37396932599689, "iter_time": 0.4358586044311523, "loss": 0.028986427932977676, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 43.37014449250691, "step_time": 0.3956980895996094} +{"epoch": 0, "iter": 5443, "iter_tflops": 46.24244325932278, "iter_time": 0.3711197357177734, "loss": 0.02971980907022953, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 50.61326083252889, "step_time": 0.33907088851928713} +{"epoch": 0, "iter": 5444, "iter_tflops": 49.88012422266968, "iter_time": 0.3440545425415039, "loss": 0.0356413759291172, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 54.583633546869265, "step_time": 0.31440712547302246} +{"epoch": 0, "iter": 5445, "iter_tflops": 31.046454383932016, "iter_time": 0.6645233383178711, "loss": 0.7089903354644775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.833688580462386, "step_time": 0.6283513793945312} +{"epoch": 0, "iter": 5446, "iter_tflops": 8.558987379037477, "iter_time": 2.410459625244141, "loss": 0.6676440834999084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.41349700311096, "step_time": 1.9811878280639648} +{"epoch": 0, "iter": 5447, "iter_tflops": 13.677257853335673, "iter_time": 1.5084232330322267, "loss": 0.7677401900291443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.393089427824805, "step_time": 1.1861661262512206} +{"epoch": 0, "iter": 5448, "iter_tflops": 36.412512707936145, "iter_time": 0.5665935134887695, "loss": 0.9130392670631409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.97983647989083, "step_time": 0.5160374660491944} +{"epoch": 0, "iter": 5449, "iter_tflops": 19.788267351113934, "iter_time": 0.8174683074951172, "loss": 0.29897648096084595, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 20.83029243015851, "step_time": 0.7765748596191406} +{"epoch": 0, "iter": 5450, "iter_tflops": 9.711701650025988, "iter_time": 1.6656485137939452, "loss": 0.27363064885139465, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 11.425366454304312, "step_time": 1.4158216705322266} +{"epoch": 0, "iter": 5451, "iter_tflops": 23.555956501665, "iter_time": 0.6867172393798828, "loss": 0.3122311234474182, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.33936244781395, "step_time": 0.6383854942321777} +{"epoch": 0, "iter": 5452, "iter_tflops": 26.726813999570343, "iter_time": 0.6052454071044922, "loss": 0.38712191581726074, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 28.624471543584825, "step_time": 0.5651207008361816} +{"epoch": 0, "iter": 5453, "iter_tflops": 21.509589369617462, "iter_time": 0.9591579437255859, "loss": 0.0034502900671213865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.08777110577821, "step_time": 0.8935939903259278} +{"epoch": 0, "iter": 5454, "iter_tflops": 21.70291625021968, "iter_time": 0.950613883972168, "loss": 0.007367570418864489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.567703323371134, "step_time": 0.6977577285766602} +{"epoch": 0, "iter": 5455, "iter_tflops": 52.59404389438132, "iter_time": 0.3922705307006836, "loss": 0.0029220900032669306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.570281728286076, "step_time": 0.3583636016845703} +{"epoch": 0, "iter": 5456, "iter_tflops": 60.222112989576075, "iter_time": 0.34258335494995124, "loss": 0.004869480617344379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.93288184057018, "step_time": 0.31291053771972654} +{"epoch": 0, "iter": 5457, "iter_tflops": 40.85423378236253, "iter_time": 0.504992790222168, "loss": 0.6850801110267639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52629182616545, "step_time": 0.4633463211059571} +{"epoch": 0, "iter": 5458, "iter_tflops": 34.78574356726105, "iter_time": 0.5930904846191406, "loss": 0.9189345836639404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49061928144445, "step_time": 0.5360031585693359} +{"epoch": 0, "iter": 5459, "iter_tflops": 38.949733789173976, "iter_time": 0.5296850967407226, "loss": 0.7163374423980713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48655520154328, "step_time": 0.4855911102294922} +{"epoch": 0, "iter": 5460, "iter_tflops": 40.19401344948319, "iter_time": 0.5132877197265625, "loss": 0.8523157238960266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44547267182224, "step_time": 0.4748732662200928} +{"epoch": 0, "iter": 5461, "iter_tflops": 17.343924191498242, "iter_time": 1.189528579711914, "loss": 0.414360374212265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.41527152426935, "step_time": 1.120325241088867} +{"epoch": 0, "iter": 5462, "iter_tflops": 15.136608713099115, "iter_time": 1.3629931182861328, "loss": 0.4126993417739868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.990000159742326, "step_time": 1.1468089675903321} +{"epoch": 0, "iter": 5463, "iter_tflops": 39.5838373886224, "iter_time": 0.5211999359130859, "loss": 0.3230486512184143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24195527354123, "step_time": 0.47710824775695804} +{"epoch": 0, "iter": 5464, "iter_tflops": 39.28362678051735, "iter_time": 0.5251830139160156, "loss": 0.48792484402656555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99577450291819, "step_time": 0.4798400249481201} +{"epoch": 0, "iter": 5465, "iter_tflops": 24.294094383608616, "iter_time": 0.8492225799560548, "loss": 0.0026851836591959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.405964169029694, "step_time": 0.7813043060302733} +{"epoch": 0, "iter": 5466, "iter_tflops": 20.810477021341544, "iter_time": 0.9913801345825195, "loss": 0.00567571772262454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.01398142089846, "step_time": 0.7930771217346191} +{"epoch": 0, "iter": 5467, "iter_tflops": 46.14784231285412, "iter_time": 0.44706518173217774, "loss": 0.0035324699711054564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.29436554286948, "step_time": 0.4022097415924072} +{"epoch": 0, "iter": 5468, "iter_tflops": 48.261469016850306, "iter_time": 0.4274858169555664, "loss": 0.0031331086065620184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24759362910821, "step_time": 0.38745588493347166} +{"epoch": 0, "iter": 5469, "iter_tflops": 23.873349430550697, "iter_time": 0.8641893157958985, "loss": 0.19845141470432281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.569540390906255, "step_time": 0.8068621177673341} +{"epoch": 0, "iter": 5470, "iter_tflops": 9.337930052958136, "iter_time": 2.2093861694335937, "loss": 0.21269665658473969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.466771001405464, "step_time": 1.971103935241699} +{"epoch": 0, "iter": 5471, "iter_tflops": 14.898831677068328, "iter_time": 1.3847457275390622, "loss": 0.1809360533952713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.792527332638432, "step_time": 1.0978349609374998} +{"epoch": 0, "iter": 5472, "iter_tflops": 27.668853914984144, "iter_time": 0.7456432266235351, "loss": 0.2640489339828491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.211654199766205, "step_time": 0.6610060901641845} +{"epoch": 0, "iter": 5473, "iter_tflops": 15.392601827530184, "iter_time": 1.2810808868408203, "loss": 0.3376699984073639, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 16.27132785256033, "step_time": 1.211896667480469} +{"epoch": 0, "iter": 5474, "iter_tflops": 14.95142025286492, "iter_time": 1.3188825988769532, "loss": 0.32832956314086914, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 18.93886992792745, "step_time": 1.0412008781433106} +{"epoch": 0, "iter": 5475, "iter_tflops": 37.21718834901329, "iter_time": 0.5298403472900391, "loss": 0.2997318506240845, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 39.47846927115253, "step_time": 0.49949170684814453} +{"epoch": 0, "iter": 5476, "iter_tflops": 35.97523542642422, "iter_time": 0.5481317291259766, "loss": 0.23883402347564697, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 38.25315733118185, "step_time": 0.5154912528991699} +{"epoch": 0, "iter": 5477, "iter_tflops": 25.565135140176174, "iter_time": 0.8070011520385741, "loss": 0.753762423992157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.93628942852224, "step_time": 0.7659218826293944} +{"epoch": 0, "iter": 5478, "iter_tflops": 17.842484628694912, "iter_time": 1.1562903900146484, "loss": 0.8560481071472168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.724543204600902, "step_time": 0.9078771495819091} +{"epoch": 0, "iter": 5479, "iter_tflops": 37.19143733583874, "iter_time": 0.5547269744873047, "loss": 0.8901084661483765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.643077463550895, "step_time": 0.5076164207458496} +{"epoch": 0, "iter": 5480, "iter_tflops": 34.953754831683256, "iter_time": 0.5902396926879883, "loss": 0.877528727054596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.85892308998935, "step_time": 0.5449466552734376} +{"epoch": 0, "iter": 5481, "iter_tflops": 26.933185929554824, "iter_time": 0.766010139465332, "loss": 0.4741613268852234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.01182192472492, "step_time": 0.7111271247863771} +{"epoch": 0, "iter": 5482, "iter_tflops": 18.812594825751926, "iter_time": 1.0966638946533203, "loss": 0.6349310278892517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.964635185293915, "step_time": 0.8983854236602784} +{"epoch": 0, "iter": 5483, "iter_tflops": 36.18962583757667, "iter_time": 0.5700830841064454, "loss": 0.6479963064193726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.4982363233693, "step_time": 0.5223294868469237} +{"epoch": 0, "iter": 5484, "iter_tflops": 36.910785772387676, "iter_time": 0.5589448471069336, "loss": 0.583231508731842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.4764059629787, "step_time": 0.5097066555023193} +{"epoch": 0, "iter": 5485, "iter_tflops": 16.974178094135095, "iter_time": 1.215439910888672, "loss": 0.7940469980239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.972447452678335, "step_time": 1.1479289932250976} +{"epoch": 0, "iter": 5486, "iter_tflops": 18.66819810933175, "iter_time": 1.105146484375, "loss": 0.9403935074806213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.558936093587413, "step_time": 0.8757226314544678} +{"epoch": 0, "iter": 5487, "iter_tflops": 48.944016327692296, "iter_time": 0.4215243263244629, "loss": 1.0213134288787842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16724123705331, "step_time": 0.38804145240783694} +{"epoch": 0, "iter": 5488, "iter_tflops": 44.98736586442041, "iter_time": 0.45859749984741205, "loss": 0.785201370716095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.195262636676446, "step_time": 0.42807305908203125} +{"epoch": 0, "iter": 5489, "iter_tflops": 36.19193572627305, "iter_time": 0.5700466995239257, "loss": 0.2931731045246124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83913351332188, "step_time": 0.5311934547424316} +{"epoch": 0, "iter": 5490, "iter_tflops": 19.391975120420003, "iter_time": 1.0638985137939454, "loss": 0.33748388290405273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.233328788565174, "step_time": 0.8879955902099609} +{"epoch": 0, "iter": 5491, "iter_tflops": 48.928003818794686, "iter_time": 0.4216622772216797, "loss": 0.4980621337890625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24180759488889, "step_time": 0.3874979915618897} +{"epoch": 0, "iter": 5492, "iter_tflops": 53.639137724926044, "iter_time": 0.384627613067627, "loss": 0.24921418726444244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.13852045334797, "step_time": 0.35486100006103516} +{"epoch": 0, "iter": 5493, "iter_tflops": 27.661766172768377, "iter_time": 0.7458342819213867, "loss": 0.8660593628883362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.343525420306094, "step_time": 0.7030884399414062} +{"epoch": 0, "iter": 5494, "iter_tflops": 15.446446648154385, "iter_time": 1.3356530456542968, "loss": 0.7205955982208252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.001200301797944, "step_time": 1.1460954360961915} +{"epoch": 0, "iter": 5495, "iter_tflops": 44.00164600370278, "iter_time": 0.46887094879150387, "loss": 0.7749443054199219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43185439562172, "step_time": 0.4349628276824951} +{"epoch": 0, "iter": 5496, "iter_tflops": 44.3197431100729, "iter_time": 0.4655057106018066, "loss": 0.8057745099067688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76955989172413, "step_time": 0.43188787078857427} +{"epoch": 0, "iter": 5497, "iter_tflops": 24.41175402984342, "iter_time": 0.8451295013427734, "loss": 0.17820607125759125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.74781872940291, "step_time": 0.8012753906250001} +{"epoch": 0, "iter": 5498, "iter_tflops": 14.766826535277557, "iter_time": 1.3971243896484375, "loss": 0.17236123979091644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.16227341832471, "step_time": 1.2021189155578613} +{"epoch": 0, "iter": 5499, "iter_tflops": 37.00123073268298, "iter_time": 0.5575785751342773, "loss": 0.18908247351646423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70757320009946, "step_time": 0.5068121700286865} +{"epoch": 0, "iter": 5500, "iter_tflops": 41.66417003532958, "iter_time": 0.49517591476440426, "loss": 0.1515616774559021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.5564266045913, "step_time": 0.45286900329589835} +{"epoch": 0, "iter": 5501, "iter_tflops": 19.92993600361309, "iter_time": 1.0351811218261717, "loss": 0.5225124359130859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.232718186720426, "step_time": 0.9716652069091798} +{"epoch": 0, "iter": 5502, "iter_tflops": 15.300252427816865, "iter_time": 1.348415237426758, "loss": 0.6543552279472351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.694730785982237, "step_time": 1.0475438194274902} +{"epoch": 0, "iter": 5503, "iter_tflops": 48.274381203230384, "iter_time": 0.42737147521972657, "loss": 0.5555219054222107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.56597670409509, "step_time": 0.39247998046875} +{"epoch": 0, "iter": 5504, "iter_tflops": 47.91821423055365, "iter_time": 0.43054804611206055, "loss": 0.5591113567352295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93659109425287, "step_time": 0.39723618888854983} +{"epoch": 0, "iter": 5505, "iter_tflops": 26.696702332923802, "iter_time": 0.7727955780029296, "loss": 0.18002274632453918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.132002429546844, "step_time": 0.7333674011230469} +{"epoch": 0, "iter": 5506, "iter_tflops": 10.737608532473427, "iter_time": 1.921386260986328, "loss": 0.21190249919891357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.417755912469655, "step_time": 1.661418830871582} +{"epoch": 0, "iter": 5507, "iter_tflops": 13.685958186535142, "iter_time": 1.5074643096923825, "loss": 0.21121490001678467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.859064730895179, "step_time": 1.3009022827148438} +{"epoch": 0, "iter": 5508, "iter_tflops": 29.4197375484933, "iter_time": 0.7012670822143554, "loss": 0.26633238792419434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.61117384980484, "step_time": 0.5635190391540528} +{"epoch": 0, "iter": 5509, "iter_tflops": 16.175215250918363, "iter_time": 0.8863160858154296, "loss": 0.2548176646232605, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 17.306594278444276, "step_time": 0.8283751983642578} +{"epoch": 0, "iter": 5510, "iter_tflops": 25.1802727299387, "iter_time": 0.5693486175537109, "loss": 0.25870993733406067, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 26.926927167052703, "step_time": 0.5324169883728027} +{"epoch": 0, "iter": 5511, "iter_tflops": 25.47691182284036, "iter_time": 0.5627194366455078, "loss": 0.17419680953025818, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 27.223119245458044, "step_time": 0.5266242027282715} +{"epoch": 0, "iter": 5512, "iter_tflops": 25.681511156568646, "iter_time": 0.5582363662719726, "loss": 0.3238753080368042, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 27.39489094236079, "step_time": 0.5233221588134767} +{"epoch": 0, "iter": 5513, "iter_tflops": 24.557954732613005, "iter_time": 0.588759048461914, "loss": 0.04491520673036575, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 26.34739536829083, "step_time": 0.5487721977233887} +{"epoch": 0, "iter": 5514, "iter_tflops": 11.374914324289861, "iter_time": 1.2711056671142578, "loss": 0.04298948124051094, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 12.77793305766888, "step_time": 1.1315380973815918} +{"epoch": 0, "iter": 5515, "iter_tflops": 37.44826314964413, "iter_time": 0.3860984954833985, "loss": 0.016979360952973366, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 41.161678742978076, "step_time": 0.35126648139953615} +{"epoch": 0, "iter": 5516, "iter_tflops": 34.21780435024401, "iter_time": 0.42254955673217776, "loss": 0.04882253333926201, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 37.105857370512375, "step_time": 0.3896613388061524} +{"epoch": 0, "iter": 5517, "iter_tflops": 31.258979650275958, "iter_time": 0.6600053405761719, "loss": 0.4151095151901245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.516088439969266, "step_time": 0.6155579147338868} +{"epoch": 0, "iter": 5518, "iter_tflops": 13.318825966719091, "iter_time": 1.5490174255371094, "loss": 0.44321736693382263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.86647444974977, "step_time": 1.3877596588134766} +{"epoch": 0, "iter": 5519, "iter_tflops": 46.26345886435871, "iter_time": 0.4459479255676269, "loss": 0.45927178859710693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.046875017090365, "step_time": 0.40415977478027343} +{"epoch": 0, "iter": 5520, "iter_tflops": 46.72595530070355, "iter_time": 0.4415339050292968, "loss": 0.36183178424835205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.111669610645826, "step_time": 0.41170237731933595} +{"epoch": 0, "iter": 5521, "iter_tflops": 45.20304883963145, "iter_time": 0.45640933609008794, "loss": 0.20113791525363922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.57628720097416, "step_time": 0.4161484184265137} +{"epoch": 0, "iter": 5522, "iter_tflops": 45.82059744691558, "iter_time": 0.4502580642700196, "loss": 0.13652099668979645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.1368333284003, "step_time": 0.4034487895965576} +{"epoch": 0, "iter": 5523, "iter_tflops": 48.99454585480588, "iter_time": 0.4210895957946777, "loss": 0.20495964586734772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.016736029082274, "step_time": 0.38914303398132327} +{"epoch": 0, "iter": 5524, "iter_tflops": 51.45011337694456, "iter_time": 0.4009921875, "loss": 0.21874479949474335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.06525315110311, "step_time": 0.36798359680175785} +{"epoch": 0, "iter": 5525, "iter_tflops": 40.80714402672487, "iter_time": 0.5055755310058594, "loss": 0.6872284412384033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14930579921869, "step_time": 0.4673027839660645} +{"epoch": 0, "iter": 5526, "iter_tflops": 43.45275143799792, "iter_time": 0.4747937202453614, "loss": 0.9932504892349243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07782488911804, "step_time": 0.4382337875366211} +{"epoch": 0, "iter": 5527, "iter_tflops": 44.29469797881502, "iter_time": 0.4657689170837402, "loss": 0.9615576863288879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92573012131978, "step_time": 0.430480525970459} +{"epoch": 0, "iter": 5528, "iter_tflops": 43.33260564084979, "iter_time": 0.4761101531982422, "loss": 0.8319970369338989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.363243535479796, "step_time": 0.44498814010620114} +{"epoch": 0, "iter": 5529, "iter_tflops": 20.523581148685054, "iter_time": 1.005238479614258, "loss": 0.08552957326173782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.397437226155944, "step_time": 0.9641852569580077} +{"epoch": 0, "iter": 5530, "iter_tflops": 15.723815757753176, "iter_time": 1.312092041015625, "loss": 0.16813403367996216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.422426033730066, "step_time": 1.010217565536499} +{"epoch": 0, "iter": 5531, "iter_tflops": 41.7083404430379, "iter_time": 0.49465150833129884, "loss": 0.18080542981624603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85510149992951, "step_time": 0.4499192638397218} +{"epoch": 0, "iter": 5532, "iter_tflops": 42.45373742107637, "iter_time": 0.4859664840698242, "loss": 0.17291049659252167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56487916058086, "step_time": 0.4430612487792968} +{"epoch": 0, "iter": 5533, "iter_tflops": 16.81694361092589, "iter_time": 0.8864721145629884, "loss": 0.07317247241735458, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 18.040137115060798, "step_time": 0.8263657569885254} +{"epoch": 0, "iter": 5534, "iter_tflops": 11.11270195418347, "iter_time": 1.3415055694580078, "loss": 0.10525049269199371, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 13.27873914225052, "step_time": 1.1226782455444335} +{"epoch": 0, "iter": 5535, "iter_tflops": 33.71091754673208, "iter_time": 0.44222325134277346, "loss": 0.0978885143995285, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 36.619597197401106, "step_time": 0.4070976390838623} +{"epoch": 0, "iter": 5536, "iter_tflops": 38.07556335423004, "iter_time": 0.39153068923950196, "loss": 0.10686942934989929, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 41.603258956936244, "step_time": 0.35833134078979495} +{"epoch": 0, "iter": 5537, "iter_tflops": 33.00302573318004, "iter_time": 0.6251273345947266, "loss": 0.0958658903837204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.26635416165698, "step_time": 0.5850078353881836} +{"epoch": 0, "iter": 5538, "iter_tflops": 11.533948742700172, "iter_time": 1.788727691650391, "loss": 0.16814231872558594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.089094027776653, "step_time": 1.4643307418823241} +{"epoch": 0, "iter": 5539, "iter_tflops": 12.810098285119338, "iter_time": 1.6105335845947266, "loss": 0.1576278805732727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.20579964964607, "step_time": 1.3567910919189454} +{"epoch": 0, "iter": 5540, "iter_tflops": 15.36365354559018, "iter_time": 1.3428507385253905, "loss": 0.14889943599700928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.27280767931438, "step_time": 1.129059850692749} +{"epoch": 0, "iter": 5541, "iter_tflops": 16.61560318012747, "iter_time": 0.8431987304687499, "loss": 0.28992992639541626, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 18.036707421544648, "step_time": 0.7767634735107423} +{"epoch": 0, "iter": 5542, "iter_tflops": 13.792671256766951, "iter_time": 1.0157753524780273, "loss": 0.31503087282180786, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 16.887184093588125, "step_time": 0.8296383476257324} +{"epoch": 0, "iter": 5543, "iter_tflops": 24.503388950294195, "iter_time": 0.5717680740356446, "loss": 0.1772991418838501, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 26.13577046209147, "step_time": 0.5360567245483399} +{"epoch": 0, "iter": 5544, "iter_tflops": 24.426791607910705, "iter_time": 0.573561019897461, "loss": 0.11995605379343033, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 25.932567932302167, "step_time": 0.5402571601867676} +{"epoch": 0, "iter": 5545, "iter_tflops": 43.94320710445203, "iter_time": 0.4694944877624511, "loss": 0.8388730883598328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84252390669466, "step_time": 0.43122920417785643} +{"epoch": 0, "iter": 5546, "iter_tflops": 43.069192188289875, "iter_time": 0.47902206802368164, "loss": 0.6318361163139343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.31490332826436, "step_time": 0.4454525871276855} +{"epoch": 0, "iter": 5547, "iter_tflops": 43.916339938607386, "iter_time": 0.4697817153930664, "loss": 0.6868429780006409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45181927695551, "step_time": 0.434779821395874} +{"epoch": 0, "iter": 5548, "iter_tflops": 46.396131815026585, "iter_time": 0.44467270660400393, "loss": 0.9537057876586914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04088161292089, "step_time": 0.4122847728729248} +{"epoch": 0, "iter": 5549, "iter_tflops": 24.91243808396846, "iter_time": 0.5918489608764648, "loss": 0.11240893602371216, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.611784132331834, "step_time": 0.5540553207397461} +{"epoch": 0, "iter": 5550, "iter_tflops": 9.327304586659542, "iter_time": 1.5807782897949216, "loss": 0.09597968310117722, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 11.423719203286293, "step_time": 1.2906830368041993} +{"epoch": 0, "iter": 5551, "iter_tflops": 35.71155950084453, "iter_time": 0.4128747329711914, "loss": 0.12188281863927841, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 40.458666861168815, "step_time": 0.3644312019348145} +{"epoch": 0, "iter": 5552, "iter_tflops": 36.38279172157403, "iter_time": 0.40525753784179686, "loss": 0.10928482562303543, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 39.78607064439275, "step_time": 0.37059202766418453} +{"epoch": 0, "iter": 5553, "iter_tflops": 29.37383246613662, "iter_time": 0.7023630142211914, "loss": 0.5156259536743164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.243030244524004, "step_time": 0.660342269897461} +{"epoch": 0, "iter": 5554, "iter_tflops": 22.768112948014075, "iter_time": 0.9061398086547852, "loss": 0.461327463388443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.899832695470486, "step_time": 0.6676765441894531} +{"epoch": 0, "iter": 5555, "iter_tflops": 48.580855547798485, "iter_time": 0.42467538452148434, "loss": 0.498534619808197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76554400191477, "step_time": 0.3909955615997315} +{"epoch": 0, "iter": 5556, "iter_tflops": 50.78586397135251, "iter_time": 0.40623693084716794, "loss": 0.6278591752052307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.88983561883383, "step_time": 0.3758636417388916} +{"epoch": 0, "iter": 5557, "iter_tflops": 31.14468698975028, "iter_time": 0.6624273834228517, "loss": 0.6643316745758057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.1961584865875, "step_time": 0.6214903907775879} +{"epoch": 0, "iter": 5558, "iter_tflops": 24.089828197147916, "iter_time": 0.856423439025879, "loss": 0.6132151484489441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.301462868642954, "step_time": 0.7040977306365968} +{"epoch": 0, "iter": 5559, "iter_tflops": 40.32434120579773, "iter_time": 0.5116287803649902, "loss": 0.5685025453567505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10949798925906, "step_time": 0.46772451400756837} +{"epoch": 0, "iter": 5560, "iter_tflops": 43.77950239584964, "iter_time": 0.47125006866455077, "loss": 0.5044599771499634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.542263516112214, "step_time": 0.43395269775390627} +{"epoch": 0, "iter": 5561, "iter_tflops": 34.492740400185106, "iter_time": 0.5981285705566407, "loss": 0.2190314382314682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.00552054798264, "step_time": 0.5428446502685548} +{"epoch": 0, "iter": 5562, "iter_tflops": 37.392345693432304, "iter_time": 0.551746437072754, "loss": 0.1929558515548706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18248926398419, "step_time": 0.5009676170349121} +{"epoch": 0, "iter": 5563, "iter_tflops": 36.899706092279246, "iter_time": 0.559112678527832, "loss": 0.2106926590204239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.43629062336807, "step_time": 0.5102123165130614} +{"epoch": 0, "iter": 5564, "iter_tflops": 42.552232393524115, "iter_time": 0.48484162521362306, "loss": 0.1701378971338272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50321125575121, "step_time": 0.44364879226684567} +{"epoch": 0, "iter": 5565, "iter_tflops": 21.001325837182204, "iter_time": 0.9823710021972656, "loss": 0.7359204292297363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.50481622709752, "step_time": 0.9167412567138671} +{"epoch": 0, "iter": 5566, "iter_tflops": 20.284887287769035, "iter_time": 1.0170671997070313, "loss": 0.5733527541160583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.273649141456296, "step_time": 0.8499378643035889} +{"epoch": 0, "iter": 5567, "iter_tflops": 47.73541895736915, "iter_time": 0.43219676208496094, "loss": 0.6825569272041321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.845863519720425, "step_time": 0.3979313316345215} +{"epoch": 0, "iter": 5568, "iter_tflops": 47.372918323065484, "iter_time": 0.4355039596557618, "loss": 0.5774073004722595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97438541944413, "step_time": 0.40473452186584474} +{"epoch": 0, "iter": 5569, "iter_tflops": 29.561016123927676, "iter_time": 0.6979155731201172, "loss": 0.3503206670284271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.505909336412355, "step_time": 0.6548325042724609} +{"epoch": 0, "iter": 5570, "iter_tflops": 11.921659579977476, "iter_time": 1.7305554962158203, "loss": 0.26522162556648254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.514525282233684, "step_time": 1.249269546508789} +{"epoch": 0, "iter": 5571, "iter_tflops": 11.190726877251574, "iter_time": 1.8435883331298828, "loss": 0.3474849760532379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.366644965636727, "step_time": 1.4360411605834962} +{"epoch": 0, "iter": 5572, "iter_tflops": 17.340674355934006, "iter_time": 1.189751510620117, "loss": 0.3105680048465729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.320687337846344, "step_time": 0.9676561164855958} +{"epoch": 0, "iter": 5573, "iter_tflops": 17.94073716840579, "iter_time": 0.8241150360107423, "loss": 0.19791197776794434, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 19.312865917777497, "step_time": 0.7655638122558596} +{"epoch": 0, "iter": 5574, "iter_tflops": 8.368412623126014, "iter_time": 1.7667904205322265, "loss": 0.35562455654144287, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 9.446279509748749, "step_time": 1.5651909561157225} +{"epoch": 0, "iter": 5575, "iter_tflops": 6.872809102468525, "iter_time": 2.1512646484375, "loss": 0.26502954959869385, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 8.90161495517444, "step_time": 1.660960548400879} +{"epoch": 0, "iter": 5576, "iter_tflops": 14.892825879825141, "iter_time": 0.992775405883789, "loss": 0.27360478043556213, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 22.31043860671018, "step_time": 0.6627046432495117} +{"epoch": 0, "iter": 5577, "iter_tflops": 13.441586924989775, "iter_time": 1.0938868103027344, "loss": 0.19962020218372345, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 14.559020342889093, "step_time": 1.0099288482666016} +{"epoch": 0, "iter": 5578, "iter_tflops": 19.138975012608665, "iter_time": 0.7682529830932617, "loss": 0.4288489818572998, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 20.761082280423413, "step_time": 0.7082277526855469} +{"epoch": 0, "iter": 5579, "iter_tflops": 22.34310763894716, "iter_time": 0.6580810012817383, "loss": 0.19406946003437042, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.079072511170185, "step_time": 0.6106370849609375} +{"epoch": 0, "iter": 5580, "iter_tflops": 23.55829614123721, "iter_time": 0.6241357421875, "loss": 0.11603157222270966, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 25.2942198371041, "step_time": 0.5813017654418945} +{"epoch": 0, "iter": 5581, "iter_tflops": 21.358238784204914, "iter_time": 0.9659548110961914, "loss": 1.0424368381500244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.68172936361047, "step_time": 0.909590850830078} +{"epoch": 0, "iter": 5582, "iter_tflops": 11.433341512209465, "iter_time": 1.804467529296875, "loss": 0.8360582590103149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.498737181190773, "step_time": 1.528372116088867} +{"epoch": 0, "iter": 5583, "iter_tflops": 10.225671286746088, "iter_time": 2.017578399658203, "loss": 0.6820975542068481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.236817303719498, "step_time": 1.6859852523803711} +{"epoch": 0, "iter": 5584, "iter_tflops": 34.741212281037804, "iter_time": 0.5938507080078125, "loss": 0.8077052235603333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.7133144142093, "step_time": 0.5470506591796875} +{"epoch": 0, "iter": 5585, "iter_tflops": 9.54613569188635, "iter_time": 1.6859496307373047, "loss": 0.38099542260169983, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 10.261960911100056, "step_time": 1.568345863342285} +{"epoch": 0, "iter": 5586, "iter_tflops": 18.585100947420784, "iter_time": 0.8659788284301757, "loss": 0.18770135939121246, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.38510294977808, "step_time": 0.6600055770874024} +{"epoch": 0, "iter": 5587, "iter_tflops": 27.884921903719704, "iter_time": 0.5771686935424805, "loss": 0.2999453544616699, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 29.589457208815794, "step_time": 0.5439202156066895} +{"epoch": 0, "iter": 5588, "iter_tflops": 28.69300005891235, "iter_time": 0.560913948059082, "loss": 0.3715589642524719, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.546459524539376, "step_time": 0.5268795204162599} +{"epoch": 0, "iter": 5589, "iter_tflops": 19.212628348752137, "iter_time": 1.0738298339843753, "loss": 0.3872556686401367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.012070837401218, "step_time": 1.0309324645996094} +{"epoch": 0, "iter": 5590, "iter_tflops": 17.02160755294039, "iter_time": 1.2120531768798828, "loss": 0.3214297890663147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.14511601253515, "step_time": 1.0776165313720703} +{"epoch": 0, "iter": 5591, "iter_tflops": 46.996795760220074, "iter_time": 0.43898936462402344, "loss": 0.44804298877716064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.169305436488635, "step_time": 0.40319276046752933} +{"epoch": 0, "iter": 5592, "iter_tflops": 47.8697484035003, "iter_time": 0.4309839553833008, "loss": 0.3726431727409363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03999584689001, "step_time": 0.396446870803833} +{"epoch": 0, "iter": 5593, "iter_tflops": 22.6398283057467, "iter_time": 0.9112742919921875, "loss": 0.8090881109237671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.755926749855337, "step_time": 0.8684608993530274} +{"epoch": 0, "iter": 5594, "iter_tflops": 14.569628822887987, "iter_time": 1.4160342559814452, "loss": 0.6906169056892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.537821736279, "step_time": 1.112918972015381} +{"epoch": 0, "iter": 5595, "iter_tflops": 38.93660478420713, "iter_time": 0.5298637008666991, "loss": 0.9208734035491943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38591434079924, "step_time": 0.48674409484863274} +{"epoch": 0, "iter": 5596, "iter_tflops": 37.607845901459534, "iter_time": 0.5485848236083984, "loss": 0.6616190075874329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70243874296775, "step_time": 0.5068761024475097} +{"epoch": 0, "iter": 5597, "iter_tflops": 11.327922155354571, "iter_time": 0.9112126617431642, "loss": 0.011650596745312214, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 12.289633297900934, "step_time": 0.8399067611694336} +{"epoch": 0, "iter": 5598, "iter_tflops": 8.269177139225084, "iter_time": 1.248267623901367, "loss": 0.024070480838418007, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 10.904611875953819, "step_time": 0.9465853729248046} +{"epoch": 0, "iter": 5599, "iter_tflops": 28.16013463625714, "iter_time": 0.36655173110961914, "loss": 0.005234431941062212, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 31.066448598189773, "step_time": 0.33226025390625} +{"epoch": 0, "iter": 5600, "iter_tflops": 31.793977395158006, "iter_time": 0.3246572761535645, "loss": 0.0026445083785802126, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 34.995504887897695, "step_time": 0.2949563417434692} +{"epoch": 0, "iter": 5601, "iter_tflops": 37.99911880045726, "iter_time": 0.5429361038208008, "loss": 0.16010327637195587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.720366800332194, "step_time": 0.5066529388427734} +{"epoch": 0, "iter": 5602, "iter_tflops": 12.733409924701816, "iter_time": 1.6202332000732425, "loss": 0.13391509652137756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.189587919310387, "step_time": 1.2743433380126954} +{"epoch": 0, "iter": 5603, "iter_tflops": 41.72242607350479, "iter_time": 0.49448451232910157, "loss": 0.13423514366149902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00980499306817, "step_time": 0.44840645408630364} +{"epoch": 0, "iter": 5604, "iter_tflops": 40.306020835687136, "iter_time": 0.5118613319396973, "loss": 0.14508214592933655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3237263766584, "step_time": 0.46546387672424316} +{"epoch": 0, "iter": 5605, "iter_tflops": 34.14451196127466, "iter_time": 0.6042286834716797, "loss": 0.14934629201889038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.21634687653742, "step_time": 0.5543556861877441} +{"epoch": 0, "iter": 5606, "iter_tflops": 9.557782575558976, "iter_time": 2.1585648498535157, "loss": 0.13170844316482544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.70987112759719, "step_time": 1.6232338867187501} +{"epoch": 0, "iter": 5607, "iter_tflops": 10.290862017744697, "iter_time": 2.004797409057617, "loss": 0.14496755599975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.689582814237587, "step_time": 1.6258291397094728} +{"epoch": 0, "iter": 5608, "iter_tflops": 43.67945982019245, "iter_time": 0.4723294105529785, "loss": 0.12144097685813904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63237286675172, "step_time": 0.4156781616210937} +{"epoch": 0, "iter": 5609, "iter_tflops": 20.448861172141115, "iter_time": 0.7570140151977539, "loss": 0.48913130164146423, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 21.616701050483318, "step_time": 0.7161164169311525} +{"epoch": 0, "iter": 5610, "iter_tflops": 9.142224792946251, "iter_time": 1.693250259399414, "loss": 0.3540063500404358, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.224474511282517, "step_time": 1.2663181953430176} +{"epoch": 0, "iter": 5611, "iter_tflops": 24.259861482883174, "iter_time": 0.6380941009521485, "loss": 0.34835025668144226, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.109285000363137, "step_time": 0.5928953819274903} +{"epoch": 0, "iter": 5612, "iter_tflops": 25.825766628675048, "iter_time": 0.5994042587280274, "loss": 0.33153387904167175, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 27.57379228671238, "step_time": 0.5614053497314454} +{"epoch": 0, "iter": 5613, "iter_tflops": 31.787950332051096, "iter_time": 0.6490224533081055, "loss": 0.44616466760635376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.106491689098476, "step_time": 0.5876717529296875} +{"epoch": 0, "iter": 5614, "iter_tflops": 38.96405230377283, "iter_time": 0.5294904479980469, "loss": 0.25892239809036255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.525864336796964, "step_time": 0.4851422500610352} +{"epoch": 0, "iter": 5615, "iter_tflops": 38.383178079767596, "iter_time": 0.5375035247802734, "loss": 0.4203921854496002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.881007568662696, "step_time": 0.49261215782165535} +{"epoch": 0, "iter": 5616, "iter_tflops": 42.80272797183903, "iter_time": 0.48200417327880857, "loss": 0.3555176258087158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.375496066159634, "step_time": 0.44487057304382327} +{"epoch": 0, "iter": 5617, "iter_tflops": 21.522967399875707, "iter_time": 0.9585617599487305, "loss": 0.1353587508201599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.858810819542523, "step_time": 0.9025444793701172} +{"epoch": 0, "iter": 5618, "iter_tflops": 15.678039647166452, "iter_time": 1.315923034667969, "loss": 0.18832926452159882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.59933113853193, "step_time": 1.1092384643554687} +{"epoch": 0, "iter": 5619, "iter_tflops": 45.509303768230204, "iter_time": 0.4533379287719726, "loss": 0.1469075083732605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.65587552361705, "step_time": 0.4154814167022705} +{"epoch": 0, "iter": 5620, "iter_tflops": 52.882558827496545, "iter_time": 0.3901303939819336, "loss": 0.13031981885433197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.83964839838697, "step_time": 0.3566946563720703} +{"epoch": 0, "iter": 5621, "iter_tflops": 22.562354661890744, "iter_time": 0.8923446502685547, "loss": 0.004659649450331926, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 23.76602224616027, "step_time": 0.8471504516601562} +{"epoch": 0, "iter": 5622, "iter_tflops": 16.21065415187154, "iter_time": 1.2419854431152344, "loss": 0.0011222498724237084, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 20.32432980604972, "step_time": 0.990605676651001} +{"epoch": 0, "iter": 5623, "iter_tflops": 53.167101441399694, "iter_time": 0.3786814765930176, "loss": 0.005082837771624327, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 58.81378406580515, "step_time": 0.3423244533538818} +{"epoch": 0, "iter": 5624, "iter_tflops": 56.351823686022634, "iter_time": 0.35728030014038087, "loss": 0.002381963888183236, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 61.98672156488646, "step_time": 0.32480176353454593} +{"epoch": 0, "iter": 5625, "iter_tflops": 43.01322689756365, "iter_time": 0.47964533233642587, "loss": 0.41240665316581726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81048946035949, "step_time": 0.4407365474700928} +{"epoch": 0, "iter": 5626, "iter_tflops": 32.69047714538547, "iter_time": 0.6311040802001954, "loss": 0.4619332551956177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84060506845661, "step_time": 0.5178408679962159} +{"epoch": 0, "iter": 5627, "iter_tflops": 40.53436459698907, "iter_time": 0.5089778442382813, "loss": 0.38850638270378113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.354738163078494, "step_time": 0.46513843536376953} +{"epoch": 0, "iter": 5628, "iter_tflops": 42.7883802370645, "iter_time": 0.48216579818725586, "loss": 0.35948413610458374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4828347248031, "step_time": 0.4438432731628418} +{"epoch": 0, "iter": 5629, "iter_tflops": 21.26825252895353, "iter_time": 0.9700417785644532, "loss": 0.570740818977356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.184690111216753, "step_time": 0.889858497619629} +{"epoch": 0, "iter": 5630, "iter_tflops": 22.442111204041193, "iter_time": 0.9193027038574219, "loss": 0.6671306490898132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.623020953243582, "step_time": 0.7468804206848144} +{"epoch": 0, "iter": 5631, "iter_tflops": 39.68451966419996, "iter_time": 0.5198776168823243, "loss": 0.6242144703865051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.58361504648249, "step_time": 0.4733681106567383} +{"epoch": 0, "iter": 5632, "iter_tflops": 38.1452526118668, "iter_time": 0.5408561248779298, "loss": 0.5906485319137573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72001767907891, "step_time": 0.4945130577087402} +{"epoch": 0, "iter": 5633, "iter_tflops": 28.44222391803747, "iter_time": 0.7253685073852539, "loss": 0.028029756620526314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.179422371006517, "step_time": 0.6616894073486329} +{"epoch": 0, "iter": 5634, "iter_tflops": 42.10831544041161, "iter_time": 0.48995295333862304, "loss": 0.0032577719539403915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.611335648760274, "step_time": 0.4426196594238281} +{"epoch": 0, "iter": 5635, "iter_tflops": 44.29048688573397, "iter_time": 0.46581320190429687, "loss": 0.016409633681178093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02460962704515, "step_time": 0.4208313674926758} +{"epoch": 0, "iter": 5636, "iter_tflops": 44.53990570285192, "iter_time": 0.4632046966552735, "loss": 0.018314894288778305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.21961100126432, "step_time": 0.41916409111022945} +{"epoch": 0, "iter": 5637, "iter_tflops": 23.390776277626024, "iter_time": 0.8820183334350585, "loss": 0.0933983251452446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.89241395052129, "step_time": 0.8288104782104492} +{"epoch": 0, "iter": 5638, "iter_tflops": 9.437378628478669, "iter_time": 2.186104248046875, "loss": 0.032624758780002594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.543310232810448, "step_time": 1.9567946929931643} +{"epoch": 0, "iter": 5639, "iter_tflops": 11.731558850733895, "iter_time": 1.7585977935791015, "loss": 0.04923282191157341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.749555536509018, "step_time": 1.398760353088379} +{"epoch": 0, "iter": 5640, "iter_tflops": 41.743386945197614, "iter_time": 0.49423621368408205, "loss": 0.06409726291894913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.061819707661996, "step_time": 0.44790009689331056} +{"epoch": 0, "iter": 5641, "iter_tflops": 21.227374348003927, "iter_time": 0.7736391143798828, "loss": 0.3088560700416565, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 22.89362167554376, "step_time": 0.7173319854736327} +{"epoch": 0, "iter": 5642, "iter_tflops": 28.58075039901735, "iter_time": 0.5745939788818359, "loss": 0.2572663128376007, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.524171811606838, "step_time": 0.5380105705261231} +{"epoch": 0, "iter": 5643, "iter_tflops": 29.19492187340146, "iter_time": 0.5625062866210937, "loss": 0.1790887713432312, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 31.097858577583494, "step_time": 0.528085464477539} +{"epoch": 0, "iter": 5644, "iter_tflops": 29.072248277726867, "iter_time": 0.5648798446655274, "loss": 0.2211707979440689, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.971143610086727, "step_time": 0.5302460670471191} +{"epoch": 0, "iter": 5645, "iter_tflops": 26.921240058545038, "iter_time": 0.7663500442504884, "loss": 0.062394823879003525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.505643881795965, "step_time": 0.7237546920776368} +{"epoch": 0, "iter": 5646, "iter_tflops": 27.943716231696804, "iter_time": 0.7383088684082032, "loss": 0.09188257902860641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.230799600478214, "step_time": 0.6606008739471435} +{"epoch": 0, "iter": 5647, "iter_tflops": 46.020312803044156, "iter_time": 0.44830406951904295, "loss": 0.08988767862319946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.2554783306057, "step_time": 0.41052426910400386} +{"epoch": 0, "iter": 5648, "iter_tflops": 50.06943743069975, "iter_time": 0.41204963684082035, "loss": 0.13859395682811737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.69539408498152, "step_time": 0.3771998329162597} +{"epoch": 0, "iter": 5649, "iter_tflops": 41.69766407267, "iter_time": 0.4947781600952149, "loss": 0.5322653651237488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.609400705618086, "step_time": 0.45234300804138183} +{"epoch": 0, "iter": 5650, "iter_tflops": 38.50345632336944, "iter_time": 0.5358244552612305, "loss": 0.454596608877182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09810821264519, "step_time": 0.4900717487335205} +{"epoch": 0, "iter": 5651, "iter_tflops": 41.58078161349145, "iter_time": 0.49616896820068357, "loss": 0.4299662709236145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.23909814678505, "step_time": 0.4560456409454346} +{"epoch": 0, "iter": 5652, "iter_tflops": 40.55104114216695, "iter_time": 0.5087685279846191, "loss": 0.5341809988021851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.230756957115666, "step_time": 0.46644224357604985} +{"epoch": 0, "iter": 5653, "iter_tflops": 30.93590821554473, "iter_time": 0.6668979415893554, "loss": 0.1909501850605011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.51302482722575, "step_time": 0.5977770309448242} +{"epoch": 0, "iter": 5654, "iter_tflops": 39.62422508980897, "iter_time": 0.5206686935424805, "loss": 0.14327101409435272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.63761215010575, "step_time": 0.47278236579895017} +{"epoch": 0, "iter": 5655, "iter_tflops": 39.97819968508956, "iter_time": 0.51605859375, "loss": 0.2418680638074875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.40042760011479, "step_time": 0.4753661346435546} +{"epoch": 0, "iter": 5656, "iter_tflops": 47.078883423039755, "iter_time": 0.438223934173584, "loss": 0.22174520790576935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.780425497844924, "step_time": 0.3984342212677002} +{"epoch": 0, "iter": 5657, "iter_tflops": 31.204469359748927, "iter_time": 0.6611582870483399, "loss": 0.8105812668800354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.2962352283527, "step_time": 0.6196224098205566} +{"epoch": 0, "iter": 5658, "iter_tflops": 12.206292549451726, "iter_time": 1.690201461791992, "loss": 0.8882136940956116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.279976807286193, "step_time": 1.44475679397583} +{"epoch": 0, "iter": 5659, "iter_tflops": 39.65410522912658, "iter_time": 0.5202763595581055, "loss": 0.9088789820671082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.496647416963, "step_time": 0.47431456756591794} +{"epoch": 0, "iter": 5660, "iter_tflops": 42.21936105873442, "iter_time": 0.48866427612304686, "loss": 0.6144164204597473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7598361745356, "step_time": 0.4508559303283692} +{"epoch": 0, "iter": 5661, "iter_tflops": 24.968315816049223, "iter_time": 0.8262909545898438, "loss": 0.11213929951190948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.779465328166733, "step_time": 0.7704072227478027} +{"epoch": 0, "iter": 5662, "iter_tflops": 10.63496789238961, "iter_time": 1.9399300231933592, "loss": 0.10610868781805038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.440235118062475, "step_time": 1.658416687011719} +{"epoch": 0, "iter": 5663, "iter_tflops": 14.782768046241081, "iter_time": 1.3956177520751956, "loss": 0.10689570009708405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.805188812068177, "step_time": 1.1587124252319336} +{"epoch": 0, "iter": 5664, "iter_tflops": 50.9671958841654, "iter_time": 0.4047916145324707, "loss": 0.10041610896587372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.59005277592848, "step_time": 0.37112923049926755} +{"epoch": 0, "iter": 5665, "iter_tflops": 15.4711210025886, "iter_time": 0.9213812408447266, "loss": 0.14961130917072296, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 16.212774462315043, "step_time": 0.8792326507568361} +{"epoch": 0, "iter": 5666, "iter_tflops": 5.483086869530237, "iter_time": 2.599776550292969, "loss": 0.3404999077320099, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 6.498543174193533, "step_time": 2.193537887573242} +{"epoch": 0, "iter": 5667, "iter_tflops": 8.698625298768777, "iter_time": 1.6387417755126952, "loss": 0.38252466917037964, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 10.229119189244935, "step_time": 1.3935511360168458} +{"epoch": 0, "iter": 5668, "iter_tflops": 21.549017973216277, "iter_time": 0.6615058135986328, "loss": 0.25536370277404785, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.199379451561576, "step_time": 0.6144474983215332} +{"epoch": 0, "iter": 5669, "iter_tflops": 13.954890325830641, "iter_time": 1.0917090454101563, "loss": 0.18962378799915314, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.765241038697326, "step_time": 1.0317935180664064} +{"epoch": 0, "iter": 5670, "iter_tflops": 10.301803984193677, "iter_time": 1.4788361358642579, "loss": 0.29701417684555054, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 13.021160158167133, "step_time": 1.1699940567016602} +{"epoch": 0, "iter": 5671, "iter_tflops": 22.64611411712138, "iter_time": 0.6727282180786134, "loss": 0.23297317326068878, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.255539289372688, "step_time": 0.6280907554626465} +{"epoch": 0, "iter": 5672, "iter_tflops": 23.029109864648763, "iter_time": 0.6615401153564453, "loss": 0.2094242125749588, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.758180497263176, "step_time": 0.615339241027832} +{"epoch": 0, "iter": 5673, "iter_tflops": 17.447169783244252, "iter_time": 1.1824894104003907, "loss": 0.21225330233573914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.554092187455808, "step_time": 1.1119430313110352} +{"epoch": 0, "iter": 5674, "iter_tflops": 17.914149361171457, "iter_time": 1.1516647033691407, "loss": 0.18352364003658295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.89634081803156, "step_time": 0.942216495513916} +{"epoch": 0, "iter": 5675, "iter_tflops": 48.03131245650863, "iter_time": 0.42953424453735345, "loss": 0.27346697449684143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.354496615602784, "step_time": 0.39406535911560064} +{"epoch": 0, "iter": 5676, "iter_tflops": 50.74949191904643, "iter_time": 0.40652807998657225, "loss": 0.2988007962703705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.01453015631663, "step_time": 0.37501171875000006} +{"epoch": 0, "iter": 5677, "iter_tflops": 32.59742639328637, "iter_time": 0.6329055938720703, "loss": 0.568633496761322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.951987441722636, "step_time": 0.5902695388793945} +{"epoch": 0, "iter": 5678, "iter_tflops": 21.430751139027077, "iter_time": 0.9626864395141601, "loss": 0.49742329120635986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.15921542573931, "step_time": 0.7886740169525147} +{"epoch": 0, "iter": 5679, "iter_tflops": 44.583738830489025, "iter_time": 0.46274929046630864, "loss": 0.4357187747955322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99935063674521, "step_time": 0.4298202629089355} +{"epoch": 0, "iter": 5680, "iter_tflops": 41.933176388736435, "iter_time": 0.49199930191040037, "loss": 0.43506646156311035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07712597145646, "step_time": 0.4576843147277832} +{"epoch": 0, "iter": 5681, "iter_tflops": 35.9025826855616, "iter_time": 0.5746409301757811, "loss": 0.47383594512939453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.77663467674388, "step_time": 0.5320496139526367} +{"epoch": 0, "iter": 5682, "iter_tflops": 21.638220506450402, "iter_time": 0.9534561080932618, "loss": 0.319978266954422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.262473742681525, "step_time": 0.7050358657836915} +{"epoch": 0, "iter": 5683, "iter_tflops": 46.351157765404274, "iter_time": 0.44510416793823243, "loss": 0.3801288604736328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.271084038334145, "step_time": 0.41039682960510254} +{"epoch": 0, "iter": 5684, "iter_tflops": 46.94619695728017, "iter_time": 0.43946250915527346, "loss": 0.5163161158561707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.868412443344475, "step_time": 0.40557769584655756} +{"epoch": 0, "iter": 5685, "iter_tflops": 47.0158037559649, "iter_time": 0.43881188583374026, "loss": 0.07342628389596939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47191033825497, "step_time": 0.40082237815856936} +{"epoch": 0, "iter": 5686, "iter_tflops": 49.53219075701125, "iter_time": 0.4165188980102539, "loss": 0.07386629283428192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.74737189820641, "step_time": 0.37684171485900875} +{"epoch": 0, "iter": 5687, "iter_tflops": 52.14892528435928, "iter_time": 0.39561876678466795, "loss": 0.05388346686959267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.99916137868191, "step_time": 0.36195433425903323} +{"epoch": 0, "iter": 5688, "iter_tflops": 50.94618533501195, "iter_time": 0.404958553314209, "loss": 0.08090104907751083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.57810759575059, "step_time": 0.3712089958190917} +{"epoch": 0, "iter": 5689, "iter_tflops": 23.559159149086796, "iter_time": 0.875714340209961, "loss": 0.878264844417572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.751363062530462, "step_time": 0.8335336303710937} +{"epoch": 0, "iter": 5690, "iter_tflops": 14.478715853112474, "iter_time": 1.4249256439208982, "loss": 0.6563665866851807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.829103761275977, "step_time": 1.3033646011352538} +{"epoch": 0, "iter": 5691, "iter_tflops": 33.39905119112809, "iter_time": 0.6177149581909178, "loss": 1.0273425579071045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6213481800611, "step_time": 0.4956853733062744} +{"epoch": 0, "iter": 5692, "iter_tflops": 49.205416702826525, "iter_time": 0.4192850074768067, "loss": 0.9027515053749084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.93908600788119, "step_time": 0.3897138214111328} +{"epoch": 0, "iter": 5693, "iter_tflops": 34.08247943051614, "iter_time": 0.6053284225463867, "loss": 0.16438572108745575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.402338302355425, "step_time": 0.5515990295410156} +{"epoch": 0, "iter": 5694, "iter_tflops": 13.230848008747481, "iter_time": 1.5593175506591799, "loss": 0.19167158007621765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.228179250867225, "step_time": 1.2713128929138182} +{"epoch": 0, "iter": 5695, "iter_tflops": 41.75439911073283, "iter_time": 0.49410586547851565, "loss": 0.1755155324935913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78308968353171, "step_time": 0.45062693786621094} +{"epoch": 0, "iter": 5696, "iter_tflops": 45.86247741639322, "iter_time": 0.44984690475463873, "loss": 0.15787169337272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03397380484026, "step_time": 0.4123416938781738} +{"epoch": 0, "iter": 5697, "iter_tflops": 24.490680466876874, "iter_time": 0.8424058914184571, "loss": 0.8493984341621399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.886352986785276, "step_time": 0.7969872589111326} +{"epoch": 0, "iter": 5698, "iter_tflops": 13.903019280749943, "iter_time": 1.4839290008544925, "loss": 0.9395182728767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.870057726326706, "step_time": 1.1545062599182128} +{"epoch": 0, "iter": 5699, "iter_tflops": 37.621676701979, "iter_time": 0.5483831481933594, "loss": 0.8690696954727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.81844756291236, "step_time": 0.5054355258941651} +{"epoch": 0, "iter": 5700, "iter_tflops": 39.766554736584105, "iter_time": 0.5188051528930664, "loss": 0.8759663701057434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.0156226955527, "step_time": 0.47961861801147465} +{"epoch": 0, "iter": 5701, "iter_tflops": 19.011533154608195, "iter_time": 1.0851883087158203, "loss": 0.020976684987545013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.231599378238, "step_time": 1.0197460479736329} +{"epoch": 0, "iter": 5702, "iter_tflops": 13.873353080093533, "iter_time": 1.4871021728515623, "loss": 0.04555123299360275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.292498647262533, "step_time": 1.1930660762786867} +{"epoch": 0, "iter": 5703, "iter_tflops": 41.56741090656855, "iter_time": 0.4963285675048828, "loss": 0.044403064996004105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.892904390903134, "step_time": 0.449548656463623} +{"epoch": 0, "iter": 5704, "iter_tflops": 41.076680703792356, "iter_time": 0.5022580490112305, "loss": 0.04796653613448143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44376125218037, "step_time": 0.45399176788330076} +{"epoch": 0, "iter": 5705, "iter_tflops": 23.494759899178945, "iter_time": 0.8781146774291994, "loss": 0.5835497379302979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.20814540701749, "step_time": 0.8184296455383301} +{"epoch": 0, "iter": 5706, "iter_tflops": 37.72430503480261, "iter_time": 0.5468912811279297, "loss": 0.6403223872184753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.07294727981966, "step_time": 0.42916223526000974} +{"epoch": 0, "iter": 5707, "iter_tflops": 47.08501672593744, "iter_time": 0.4381668510437012, "loss": 0.4461006820201874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13353406879952, "step_time": 0.4034748210906982} +{"epoch": 0, "iter": 5708, "iter_tflops": 45.96624116355071, "iter_time": 0.44883142471313475, "loss": 0.6625236868858337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.741467862599094, "step_time": 0.4147664794921875} +{"epoch": 0, "iter": 5709, "iter_tflops": 41.062890771172405, "iter_time": 0.5024267196655273, "loss": 0.5303001403808594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62390319243336, "step_time": 0.46233278656005855} +{"epoch": 0, "iter": 5710, "iter_tflops": 47.43932646147173, "iter_time": 0.43489431762695313, "loss": 0.33863067626953125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.41693595988287, "step_time": 0.40125093269348144} +{"epoch": 0, "iter": 5711, "iter_tflops": 47.15838551164553, "iter_time": 0.4374851531982422, "loss": 0.47599276900291443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.10534817903059, "step_time": 0.4036973476409912} +{"epoch": 0, "iter": 5712, "iter_tflops": 48.53085912314039, "iter_time": 0.42511288452148444, "loss": 0.42456141114234924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65006403692717, "step_time": 0.39185315132141113} +{"epoch": 0, "iter": 5713, "iter_tflops": 23.558234609106574, "iter_time": 0.5584325561523438, "loss": 0.004363197833299637, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 25.29387393497862, "step_time": 0.5201134948730468} +{"epoch": 0, "iter": 5714, "iter_tflops": 8.753231069571928, "iter_time": 1.502951889038086, "loss": 0.01075935922563076, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 11.30364704189993, "step_time": 1.1638442993164064} +{"epoch": 0, "iter": 5715, "iter_tflops": 26.599854523633052, "iter_time": 0.49457733535766607, "loss": 0.006740653421729803, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 29.55794741240702, "step_time": 0.44508114814758293} +{"epoch": 0, "iter": 5716, "iter_tflops": 30.72527428022546, "iter_time": 0.42817144775390625, "loss": 0.0007960607763379812, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 34.07759603202654, "step_time": 0.3860508575439453} +{"epoch": 0, "iter": 5717, "iter_tflops": 24.88071632827148, "iter_time": 0.8292001419067382, "loss": 0.9574924111366272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.60241741933461, "step_time": 0.7755345382690428} +{"epoch": 0, "iter": 5718, "iter_tflops": 9.554598693310036, "iter_time": 2.159284149169922, "loss": 0.9241847991943359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.247360438596695, "step_time": 1.557373908996582} +{"epoch": 0, "iter": 5719, "iter_tflops": 16.258285714171624, "iter_time": 1.268958724975586, "loss": 0.6556456685066223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.650994249886015, "step_time": 1.049875301361084} +{"epoch": 0, "iter": 5720, "iter_tflops": 37.2252783704654, "iter_time": 0.5542226791381836, "loss": 0.8089728355407715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.69022596664418, "step_time": 0.5070282363891602} +{"epoch": 0, "iter": 5721, "iter_tflops": 12.259036030614416, "iter_time": 1.1960771942138673, "loss": 0.21398957073688507, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.026545925073867, "step_time": 1.125605628967285} +{"epoch": 0, "iter": 5722, "iter_tflops": 13.047410496610473, "iter_time": 1.1238056335449218, "loss": 0.3120504915714264, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 15.2333903381983, "step_time": 0.9625403861999511} +{"epoch": 0, "iter": 5723, "iter_tflops": 26.034281960162474, "iter_time": 0.5632094421386719, "loss": 0.3599889278411865, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.78575134665314, "step_time": 0.5277076454162597} +{"epoch": 0, "iter": 5724, "iter_tflops": 26.49647194670765, "iter_time": 0.553385124206543, "loss": 0.2906741201877594, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.117261526092136, "step_time": 0.5214858283996582} +{"epoch": 0, "iter": 5725, "iter_tflops": 33.743770120199954, "iter_time": 0.6114045181274415, "loss": 0.6240500807762146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.1720490575172, "step_time": 0.5703600997924805} +{"epoch": 0, "iter": 5726, "iter_tflops": 15.09127076000356, "iter_time": 1.3670878906250001, "loss": 0.7711092829704285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.591434521513197, "step_time": 1.2434785842895506} +{"epoch": 0, "iter": 5727, "iter_tflops": 46.88008849140381, "iter_time": 0.44008222198486335, "loss": 0.6626497507095337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84671119773528, "step_time": 0.40575079536437986} +{"epoch": 0, "iter": 5728, "iter_tflops": 47.583841095091834, "iter_time": 0.4335735206604004, "loss": 0.8645427227020264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.451421716600784, "step_time": 0.400981990814209} +{"epoch": 0, "iter": 5729, "iter_tflops": 25.953384070964553, "iter_time": 0.794928840637207, "loss": 0.6923069953918457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.38583255181713, "step_time": 0.7533491439819336} +{"epoch": 0, "iter": 5730, "iter_tflops": 14.25898073275278, "iter_time": 1.4468841705322266, "loss": 0.9586752653121948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.829324520797565, "step_time": 1.3033464241027832} +{"epoch": 0, "iter": 5731, "iter_tflops": 36.11500909074683, "iter_time": 0.5712609252929687, "loss": 0.7293569445610046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31908500816729, "step_time": 0.524709400177002} +{"epoch": 0, "iter": 5732, "iter_tflops": 38.01868543264575, "iter_time": 0.5426566772460937, "loss": 1.0551100969314575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.354323691203604, "step_time": 0.4988860092163086} +{"epoch": 0, "iter": 5733, "iter_tflops": 22.229034302644806, "iter_time": 0.9281147003173829, "loss": 0.5632399320602417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.777908659056138, "step_time": 0.8676580352783203} +{"epoch": 0, "iter": 5734, "iter_tflops": 17.103869566167432, "iter_time": 1.2062237396240236, "loss": 0.816074550151825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.96920209613257, "step_time": 0.9838759441375732} +{"epoch": 0, "iter": 5735, "iter_tflops": 42.55478571435635, "iter_time": 0.4848125343322754, "loss": 0.7944926023483276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.556964394123064, "step_time": 0.4431365699768066} +{"epoch": 0, "iter": 5736, "iter_tflops": 37.88934607052805, "iter_time": 0.5445090942382812, "loss": 0.8985157608985901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43615329820578, "step_time": 0.49790079116821284} +{"epoch": 0, "iter": 5737, "iter_tflops": 20.25990637282083, "iter_time": 1.0183212661743164, "loss": 0.7098953723907471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.739822989872316, "step_time": 0.9490000686645508} +{"epoch": 0, "iter": 5738, "iter_tflops": 13.73648287268592, "iter_time": 1.5019196472167968, "loss": 0.5118555426597595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.50040655532395, "step_time": 1.1151697368621827} +{"epoch": 0, "iter": 5739, "iter_tflops": 36.77930213576457, "iter_time": 0.5609430389404296, "loss": 0.7260634899139404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.83017769103464, "step_time": 0.5179764366149904} +{"epoch": 0, "iter": 5740, "iter_tflops": 34.41929320032663, "iter_time": 0.5994049148559571, "loss": 0.6248894333839417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.289977644309914, "step_time": 0.553261085510254} +{"epoch": 0, "iter": 5741, "iter_tflops": 38.75279511508404, "iter_time": 0.5323769149780273, "loss": 0.0019490520935505629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.51274666931154, "step_time": 0.47413907623291013} +{"epoch": 0, "iter": 5742, "iter_tflops": 41.41330552191196, "iter_time": 0.4981754837036133, "loss": 0.01338423602283001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91609592577262, "step_time": 0.4493215961456299} +{"epoch": 0, "iter": 5743, "iter_tflops": 43.04798234432026, "iter_time": 0.4792580833435059, "loss": 0.005546936299651861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.39801672081807, "step_time": 0.43527334976196286} +{"epoch": 0, "iter": 5744, "iter_tflops": 45.33340538465707, "iter_time": 0.4550969276428223, "loss": 0.009906831197440624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89339374375487, "step_time": 0.413503511428833} +{"epoch": 0, "iter": 5745, "iter_tflops": 20.569036604929803, "iter_time": 1.0030170059204102, "loss": 0.04345124214887619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.321140727277204, "step_time": 0.9242849082946778} +{"epoch": 0, "iter": 5746, "iter_tflops": 38.771324085341796, "iter_time": 0.5321224899291992, "loss": 0.03121708519756794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24264607126647, "step_time": 0.47710062599182135} +{"epoch": 0, "iter": 5747, "iter_tflops": 40.65138042816434, "iter_time": 0.5075127410888671, "loss": 0.02803293988108635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.774657662636024, "step_time": 0.46077613067626955} +{"epoch": 0, "iter": 5748, "iter_tflops": 42.88493072022633, "iter_time": 0.4810802574157715, "loss": 0.08312872052192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.16742094897768, "step_time": 0.4374013481140137} +{"epoch": 0, "iter": 5749, "iter_tflops": 32.707160075396644, "iter_time": 0.6307821731567385, "loss": 0.9687392711639404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.78806935704095, "step_time": 0.5764796447753906} +{"epoch": 0, "iter": 5750, "iter_tflops": 36.70487589093476, "iter_time": 0.5620804595947266, "loss": 0.762462854385376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.08334992534351, "step_time": 0.5147048225402833} +{"epoch": 0, "iter": 5751, "iter_tflops": 33.16470229535815, "iter_time": 0.6220798645019531, "loss": 0.7855458855628967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.01032128625415, "step_time": 0.5729216728210449} +{"epoch": 0, "iter": 5752, "iter_tflops": 34.60781489675332, "iter_time": 0.5961397323608398, "loss": 0.8362365365028381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.529057913224065, "step_time": 0.5497365150451661} +{"epoch": 0, "iter": 5753, "iter_tflops": 20.587536620760847, "iter_time": 1.0021156921386718, "loss": 0.782734751701355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.20385694130535, "step_time": 0.9291671066284181} +{"epoch": 0, "iter": 5754, "iter_tflops": 17.564328198231298, "iter_time": 1.1746019134521484, "loss": 0.6001392602920532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.76449763061895, "step_time": 0.9062837162017822} +{"epoch": 0, "iter": 5755, "iter_tflops": 41.404104237436826, "iter_time": 0.4982861938476562, "loss": 0.5661561489105225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.39642155240965, "step_time": 0.4647017211914063} +{"epoch": 0, "iter": 5756, "iter_tflops": 53.03383673157044, "iter_time": 0.38901755523681647, "loss": 0.8692647218704224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.24159194515805, "step_time": 0.3604213790893555} +{"epoch": 0, "iter": 5757, "iter_tflops": 15.510171404303494, "iter_time": 0.6239599914550782, "loss": 0.00466779014095664, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 16.458805526785923, "step_time": 0.5879968872070312} +{"epoch": 0, "iter": 5758, "iter_tflops": 5.335706953634706, "iter_time": 1.8137664794921875, "loss": 0.0050731683149933815, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 7.498451150543766, "step_time": 1.290630054473877} +{"epoch": 0, "iter": 5759, "iter_tflops": 26.32083954756275, "iter_time": 0.36768304443359373, "loss": 0.0031718993559479713, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 29.00446610827688, "step_time": 0.3336633186340332} +{"epoch": 0, "iter": 5760, "iter_tflops": 25.953425052480206, "iter_time": 0.3728882179260254, "loss": 0.0027758292853832245, "lr": 3e-05, "seqlen": 3904.0, "step_tflops": 28.442178430440347, "step_time": 0.34025967597961426} +{"epoch": 0, "iter": 5761, "iter_tflops": 44.527510010780006, "iter_time": 0.4633336448669434, "loss": 0.8088231086730957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6269577709113, "step_time": 0.42427275848388674} +{"epoch": 0, "iter": 5762, "iter_tflops": 21.903704738033543, "iter_time": 0.9418997268676758, "loss": 0.6998829245567322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.38474544191152, "step_time": 0.7819326343536377} +{"epoch": 0, "iter": 5763, "iter_tflops": 36.56400129921142, "iter_time": 0.5642460556030273, "loss": 0.6926203370094299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.992597345604906, "step_time": 0.5158728084564209} +{"epoch": 0, "iter": 5764, "iter_tflops": 39.94319726119181, "iter_time": 0.5165108184814453, "loss": 0.6427358388900757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24502669196756, "step_time": 0.47707436180114743} +{"epoch": 0, "iter": 5765, "iter_tflops": 30.59787025252053, "iter_time": 0.6742656707763672, "loss": 0.23782680928707123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00682445087991, "step_time": 0.6250553894042968} +{"epoch": 0, "iter": 5766, "iter_tflops": 7.499921201536671, "iter_time": 2.750841369628906, "loss": 0.2949424982070923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.969176720687136, "step_time": 2.3002215423583987} +{"epoch": 0, "iter": 5767, "iter_tflops": 12.664824320258703, "iter_time": 1.6290074768066407, "loss": 0.3808632791042328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.422593445155854, "step_time": 1.2562628173828128} +{"epoch": 0, "iter": 5768, "iter_tflops": 38.218568701662875, "iter_time": 0.5398185806274414, "loss": 0.2819437086582184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.932793880032115, "step_time": 0.4920037899017333} +{"epoch": 0, "iter": 5769, "iter_tflops": 9.814696300921744, "iter_time": 1.5980830383300781, "loss": 0.17042334377765656, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 10.328092233087062, "step_time": 1.5186444244384765} +{"epoch": 0, "iter": 5770, "iter_tflops": 11.853702325000091, "iter_time": 1.32318994140625, "loss": 0.3618328869342804, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.046491244990207, "step_time": 0.9774535408020019} +{"epoch": 0, "iter": 5771, "iter_tflops": 25.248563329537593, "iter_time": 0.6212115707397461, "loss": 0.07623983919620514, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 27.086998833124152, "step_time": 0.5790490036010743} +{"epoch": 0, "iter": 5772, "iter_tflops": 23.770411336603715, "iter_time": 0.6598413238525391, "loss": 0.2075508087873459, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.58244807398186, "step_time": 0.6131039390563965} +{"epoch": 0, "iter": 5773, "iter_tflops": 27.16636717164508, "iter_time": 0.7594351272583009, "loss": 0.12744443118572235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.838762111420674, "step_time": 0.6914192161560059} +{"epoch": 0, "iter": 5774, "iter_tflops": 44.96996495370001, "iter_time": 0.45877495193481443, "loss": 0.24034325778484344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.76264761665303, "step_time": 0.4064227237701416} +{"epoch": 0, "iter": 5775, "iter_tflops": 52.86516357438783, "iter_time": 0.39025876617431643, "loss": 0.21022845804691315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.16734488168046, "step_time": 0.360889482498169} +{"epoch": 0, "iter": 5776, "iter_tflops": 46.513389420675594, "iter_time": 0.44355171203613286, "loss": 0.18922056257724762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83580611524021, "step_time": 0.40583783531188966} +{"epoch": 0, "iter": 5777, "iter_tflops": 3.7139768171119316, "iter_time": 0.6973734054565429, "loss": 0.04813661053776741, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.9598665751717705, "step_time": 0.6540696792602538} +{"epoch": 0, "iter": 5778, "iter_tflops": 2.60655350438343, "iter_time": 0.9936602706909179, "loss": 0.06847993284463882, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 3.255638198708647, "step_time": 0.7955517482757568} +{"epoch": 0, "iter": 5779, "iter_tflops": 5.606490342381857, "iter_time": 0.46196969985961917, "loss": 0.05164044350385666, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 6.087870864455657, "step_time": 0.42544080162048337} +{"epoch": 0, "iter": 5780, "iter_tflops": 5.783970876394398, "iter_time": 0.44779420852661134, "loss": 0.06901641190052032, "lr": 3e-05, "seqlen": 1056.0, "step_tflops": 6.259704098362012, "step_time": 0.41376215553283696} +{"epoch": 0, "iter": 5781, "iter_tflops": 28.07006679621739, "iter_time": 0.41809752655029303, "loss": 0.06053458899259567, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 30.987676246423007, "step_time": 0.37873202896118163} +{"epoch": 0, "iter": 5782, "iter_tflops": 29.177991124916964, "iter_time": 0.40222184753417967, "loss": 0.06278758496046066, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 31.925027307036384, "step_time": 0.36761207389831546} +{"epoch": 0, "iter": 5783, "iter_tflops": 31.84144801988045, "iter_time": 0.3685770034790039, "loss": 0.0787954330444336, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 34.78118587751008, "step_time": 0.33742453575134274} +{"epoch": 0, "iter": 5784, "iter_tflops": 28.39660856591803, "iter_time": 0.4132896881103515, "loss": 0.05247628316283226, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 30.891571424411538, "step_time": 0.37991027832031243} +{"epoch": 0, "iter": 5785, "iter_tflops": 34.36508866113041, "iter_time": 0.6003503646850585, "loss": 0.5986170768737793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73748849200625, "step_time": 0.5615814895629883} +{"epoch": 0, "iter": 5786, "iter_tflops": 12.109614854902752, "iter_time": 1.7036952667236327, "loss": 0.7594684362411499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.525222682732117, "step_time": 1.5253792114257814} +{"epoch": 0, "iter": 5787, "iter_tflops": 14.664383512911757, "iter_time": 1.4068844757080077, "loss": 0.8504546880722046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.29798676210814, "step_time": 1.1926875534057617} +{"epoch": 0, "iter": 5788, "iter_tflops": 33.26912738073414, "iter_time": 0.6201272811889648, "loss": 0.7391382455825806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.51095919130522, "step_time": 0.4970035362243652} +{"epoch": 0, "iter": 5789, "iter_tflops": 11.724775130757427, "iter_time": 1.2784428863525392, "loss": 0.3390936255455017, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.641192582376238, "step_time": 1.1857627563476563} +{"epoch": 0, "iter": 5790, "iter_tflops": 13.05677129687764, "iter_time": 1.1480215911865235, "loss": 0.20875683426856995, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 16.810331910814426, "step_time": 0.8916811065673829} +{"epoch": 0, "iter": 5791, "iter_tflops": 27.244933825621025, "iter_time": 0.5501740417480468, "loss": 0.28087541460990906, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.022199124833882, "step_time": 0.5164824104309081} +{"epoch": 0, "iter": 5792, "iter_tflops": 27.458874752161893, "iter_time": 0.5458874588012695, "loss": 0.31850552558898926, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.16023325578516, "step_time": 0.5140375671386719} +{"epoch": 0, "iter": 5793, "iter_tflops": 27.250186793014613, "iter_time": 0.7570991592407227, "loss": 0.8321244716644287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.763736508805273, "step_time": 0.7172605514526367} +{"epoch": 0, "iter": 5794, "iter_tflops": 12.987259921783224, "iter_time": 1.5885639953613282, "loss": 0.7598916292190552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.609537603368622, "step_time": 1.242123291015625} +{"epoch": 0, "iter": 5795, "iter_tflops": 42.315290607093424, "iter_time": 0.4875564651489257, "loss": 0.7697455883026123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.4965969366788, "step_time": 0.453464542388916} +{"epoch": 0, "iter": 5796, "iter_tflops": 47.152002457786345, "iter_time": 0.43754437637329097, "loss": 0.6099657416343689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.504168827538095, "step_time": 0.4085027828216553} +{"epoch": 0, "iter": 5797, "iter_tflops": 26.444427549357457, "iter_time": 0.7801679000854492, "loss": 0.005008717067539692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.909671732077296, "step_time": 0.7392094650268555} +{"epoch": 0, "iter": 5798, "iter_tflops": 15.43716971125998, "iter_time": 1.3364557037353515, "loss": 0.004666971508413553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.43365763381668, "step_time": 1.1192078056335448} +{"epoch": 0, "iter": 5799, "iter_tflops": 44.31553052187178, "iter_time": 0.4655499610900879, "loss": 0.0061670639552176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.173420194032964, "step_time": 0.4195578308105468} +{"epoch": 0, "iter": 5800, "iter_tflops": 52.10603627769298, "iter_time": 0.3959444046020508, "loss": 0.011402154341340065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.78970627808794, "step_time": 0.3570029134750366} +{"epoch": 0, "iter": 5801, "iter_tflops": 16.96475319220463, "iter_time": 1.2161151580810545, "loss": 0.8816227316856384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.006775627690555, "step_time": 1.1457405776977538} +{"epoch": 0, "iter": 5802, "iter_tflops": 17.639400019548287, "iter_time": 1.1696029052734376, "loss": 0.771540105342865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.015129164079777, "step_time": 0.8964144134521485} +{"epoch": 0, "iter": 5803, "iter_tflops": 37.27383698168331, "iter_time": 0.5535006637573242, "loss": 0.9721986651420593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.70399124388102, "step_time": 0.5068567695617676} +{"epoch": 0, "iter": 5804, "iter_tflops": 38.46919907690145, "iter_time": 0.536301612854004, "loss": 0.6952853202819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76748896867375, "step_time": 0.4939510135650635} +{"epoch": 0, "iter": 5805, "iter_tflops": 28.440448192530837, "iter_time": 0.6613653259277344, "loss": 0.060180213302373886, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 31.060765110730042, "step_time": 0.605571891784668} +{"epoch": 0, "iter": 5806, "iter_tflops": 7.968429319963557, "iter_time": 2.360506134033203, "loss": 0.028673455119132996, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 11.362912979205177, "step_time": 1.6553436889648439} +{"epoch": 0, "iter": 5807, "iter_tflops": 17.756282542683735, "iter_time": 1.059316680908203, "loss": 0.059403445571660995, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 19.979218152701094, "step_time": 0.9414545726776122} +{"epoch": 0, "iter": 5808, "iter_tflops": 47.68988806264643, "iter_time": 0.39441330337524416, "loss": 0.02862532064318657, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 52.22324343850788, "step_time": 0.3601753749847412} +{"epoch": 0, "iter": 5809, "iter_tflops": 25.517615670823645, "iter_time": 0.5810130081176759, "loss": 0.1835373491048813, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.455722604318016, "step_time": 0.5399991416931152} +{"epoch": 0, "iter": 5810, "iter_tflops": 25.72333775727504, "iter_time": 0.5763663635253906, "loss": 0.204196035861969, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.552901214713934, "step_time": 0.5380945739746094} +{"epoch": 0, "iter": 5811, "iter_tflops": 27.52638220667936, "iter_time": 0.5386129760742188, "loss": 0.13339640200138092, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 29.332630902826505, "step_time": 0.5054461936950683} +{"epoch": 0, "iter": 5812, "iter_tflops": 26.59691822418443, "iter_time": 0.5574355087280274, "loss": 0.24134445190429688, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.392647342383324, "step_time": 0.5221797904968262} +{"epoch": 0, "iter": 5813, "iter_tflops": 22.79861698554104, "iter_time": 0.9049274139404296, "loss": 0.529407799243927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.069524806289486, "step_time": 0.8571458587646484} +{"epoch": 0, "iter": 5814, "iter_tflops": 17.77100138711873, "iter_time": 1.1609415283203124, "loss": 0.5162344574928284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.924998404787647, "step_time": 0.9859543647766112} +{"epoch": 0, "iter": 5815, "iter_tflops": 38.28094455917494, "iter_time": 0.5389389877319335, "loss": 0.5406180620193481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95467473881016, "step_time": 0.49174719238281256} +{"epoch": 0, "iter": 5816, "iter_tflops": 39.772011039050405, "iter_time": 0.5187339782714844, "loss": 0.5856855511665344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.4343345381734, "step_time": 0.47499504089355465} +{"epoch": 0, "iter": 5817, "iter_tflops": 16.13286515627441, "iter_time": 1.2788238983154299, "loss": 0.11440365016460419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.40315460880053, "step_time": 1.1854801025390624} +{"epoch": 0, "iter": 5818, "iter_tflops": 16.401978422047073, "iter_time": 1.257841766357422, "loss": 0.17537395656108856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.879914926320566, "step_time": 1.0377858047485353} +{"epoch": 0, "iter": 5819, "iter_tflops": 47.008972181701886, "iter_time": 0.43887565612792967, "loss": 0.13261570036411285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.09555364651162, "step_time": 0.4037747325897217} +{"epoch": 0, "iter": 5820, "iter_tflops": 49.2942807344992, "iter_time": 0.4185291519165039, "loss": 0.13669453561306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2996775718706, "step_time": 0.3870772666931152} +{"epoch": 0, "iter": 5821, "iter_tflops": 34.47912913688893, "iter_time": 0.5983646926879883, "loss": 0.30939045548439026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.900249394498836, "step_time": 0.5591044464111328} +{"epoch": 0, "iter": 5822, "iter_tflops": 8.035440872590998, "iter_time": 2.5675123291015627, "loss": 0.4762457609176636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.792698204545689, "step_time": 2.1067833480834963} +{"epoch": 0, "iter": 5823, "iter_tflops": 11.14076772639773, "iter_time": 1.8518556365966794, "loss": 0.5311340093612671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.011878635887154, "step_time": 1.5855584030151366} +{"epoch": 0, "iter": 5824, "iter_tflops": 25.41346749930982, "iter_time": 0.8118173370361328, "loss": 0.3920386731624603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85904637958537, "step_time": 0.517601282119751} +{"epoch": 0, "iter": 5825, "iter_tflops": 15.266422962479046, "iter_time": 1.073028549194336, "loss": 0.4244091808795929, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 16.105773643244586, "step_time": 1.017107780456543} +{"epoch": 0, "iter": 5826, "iter_tflops": 11.035765145763186, "iter_time": 1.4843834991455076, "loss": 0.24736201763153076, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 13.876490198170663, "step_time": 1.180507999420166} +{"epoch": 0, "iter": 5827, "iter_tflops": 25.858652622504525, "iter_time": 0.6334942474365235, "loss": 0.25030797719955444, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.750532408806922, "step_time": 0.5903060684204102} +{"epoch": 0, "iter": 5828, "iter_tflops": 23.42653263885976, "iter_time": 0.6992630081176758, "loss": 0.41588330268859863, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 25.14244176699474, "step_time": 0.6515400466918945} +{"epoch": 0, "iter": 5829, "iter_tflops": 21.84745337720112, "iter_time": 0.9443248672485351, "loss": 0.8452492952346802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.221720657390275, "step_time": 0.8884394836425782} +{"epoch": 0, "iter": 5830, "iter_tflops": 19.18720291336602, "iter_time": 1.0752527923583983, "loss": 0.6888665556907654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.458966137620546, "step_time": 0.9614206657409669} +{"epoch": 0, "iter": 5831, "iter_tflops": 41.99708245438376, "iter_time": 0.4912506370544434, "loss": 0.6865533590316772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02891828487915, "step_time": 0.45817430877685544} +{"epoch": 0, "iter": 5832, "iter_tflops": 43.40068950663399, "iter_time": 0.4753632659912109, "loss": 0.8067210912704468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66800104859806, "step_time": 0.44208222007751463} +{"epoch": 0, "iter": 5833, "iter_tflops": 26.301440417944224, "iter_time": 0.7844092636108398, "loss": 0.2429177612066269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.878380737647692, "step_time": 0.7400391616821289} +{"epoch": 0, "iter": 5834, "iter_tflops": 18.557179732796104, "iter_time": 1.1117580261230469, "loss": 0.30795103311538696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.63471151987429, "step_time": 0.8374806213378905} +{"epoch": 0, "iter": 5835, "iter_tflops": 45.2802426945829, "iter_time": 0.4556312484741211, "loss": 0.28137579560279846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.07149020837774, "step_time": 0.42042932510375974} +{"epoch": 0, "iter": 5836, "iter_tflops": 53.12325462211683, "iter_time": 0.38836275482177735, "loss": 0.2728113830089569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.724103718824615, "step_time": 0.35740864181518556} +{"epoch": 0, "iter": 5837, "iter_tflops": 44.50770619223943, "iter_time": 0.4635398063659668, "loss": 0.2161688208580017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.77270507293473, "step_time": 0.42300490570068366} +{"epoch": 0, "iter": 5838, "iter_tflops": 45.93412979745309, "iter_time": 0.4491451911926269, "loss": 0.24774889647960663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.65291624144105, "step_time": 0.4073031730651855} +{"epoch": 0, "iter": 5839, "iter_tflops": 52.240605811766265, "iter_time": 0.39492446899414063, "loss": 0.30174827575683594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.856166415289216, "step_time": 0.3628646602630615} +{"epoch": 0, "iter": 5840, "iter_tflops": 49.41274893179242, "iter_time": 0.4175257186889648, "loss": 0.25426724553108215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57288242303368, "step_time": 0.3851032943725586} +{"epoch": 0, "iter": 5841, "iter_tflops": 23.007834086917033, "iter_time": 0.8966986389160155, "loss": 0.9236493110656738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.18554209781502, "step_time": 0.8530341567993165} +{"epoch": 0, "iter": 5842, "iter_tflops": 19.030077866745337, "iter_time": 1.0841307983398436, "loss": 0.8149824142456055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.88452087241046, "step_time": 0.9015304985046386} +{"epoch": 0, "iter": 5843, "iter_tflops": 43.340452865473175, "iter_time": 0.4760239486694336, "loss": 0.7382772564888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.684285472631196, "step_time": 0.4419280128479004} +{"epoch": 0, "iter": 5844, "iter_tflops": 43.52065717874801, "iter_time": 0.47405289459228517, "loss": 0.8645061254501343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98419910313914, "step_time": 0.4391070594787598} +{"epoch": 0, "iter": 5845, "iter_tflops": 23.57634470054137, "iter_time": 0.8750760040283203, "loss": 0.9109328389167786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.825919954176054, "step_time": 0.831030372619629} +{"epoch": 0, "iter": 5846, "iter_tflops": 20.668536625544213, "iter_time": 0.9981884002685546, "loss": 0.7754129767417908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.45415588965444, "step_time": 0.8105196495056152} +{"epoch": 0, "iter": 5847, "iter_tflops": 49.03510529914071, "iter_time": 0.42074129104614255, "loss": 0.8694500923156738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.88506888192027, "step_time": 0.3901118774414063} +{"epoch": 0, "iter": 5848, "iter_tflops": 49.575787311335965, "iter_time": 0.4161526145935058, "loss": 0.7688021659851074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36843539909645, "step_time": 0.38657857131958007} +{"epoch": 0, "iter": 5849, "iter_tflops": 17.950035401191453, "iter_time": 1.1493622741699216, "loss": 0.09707048535346985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.698101516017935, "step_time": 1.103379051208496} +{"epoch": 0, "iter": 5850, "iter_tflops": 16.700540229299754, "iter_time": 1.2353548583984375, "loss": 0.10869377106428146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.598608803778156, "step_time": 1.0015770339965822} +{"epoch": 0, "iter": 5851, "iter_tflops": 43.02789666383903, "iter_time": 0.47948180389404293, "loss": 0.13047626614570618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.282008404042834, "step_time": 0.4363413105010986} +{"epoch": 0, "iter": 5852, "iter_tflops": 43.96730826653785, "iter_time": 0.4692371292114258, "loss": 0.08154613524675369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57219662792576, "step_time": 0.424751091003418} +{"epoch": 0, "iter": 5853, "iter_tflops": 22.24842840871025, "iter_time": 0.9273056564331054, "loss": 0.011651677079498768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.585045824752054, "step_time": 0.8747531661987304} +{"epoch": 0, "iter": 5854, "iter_tflops": 17.65814180148262, "iter_time": 1.1683615264892577, "loss": 0.008785083889961243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.798044070564806, "step_time": 0.8669239139556886} +{"epoch": 0, "iter": 5855, "iter_tflops": 54.15791982427738, "iter_time": 0.3809432411193848, "loss": 0.007303193211555481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.442744739153916, "step_time": 0.34707504844665527} +{"epoch": 0, "iter": 5856, "iter_tflops": 58.70922912481205, "iter_time": 0.351411418914795, "loss": 0.004070006776601076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.15581966045048, "step_time": 0.32157789611816406} +{"epoch": 0, "iter": 5857, "iter_tflops": 32.474636036254545, "iter_time": 0.635298683166504, "loss": 0.10024779289960861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.4985784723004, "step_time": 0.5980273513793946} +{"epoch": 0, "iter": 5858, "iter_tflops": 11.827615726282184, "iter_time": 1.7443155059814452, "loss": 0.07490397989749908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.606661929349487, "step_time": 1.412444103240967} +{"epoch": 0, "iter": 5859, "iter_tflops": 41.64097912772312, "iter_time": 0.4954516906738281, "loss": 0.16395114362239838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.93601224954571, "step_time": 0.4491267852783203} +{"epoch": 0, "iter": 5860, "iter_tflops": 44.59555910139508, "iter_time": 0.4626266365051269, "loss": 0.0813864916563034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80950823387795, "step_time": 0.42268595314025875} +{"epoch": 0, "iter": 5861, "iter_tflops": 23.31442273209735, "iter_time": 0.8849068984985351, "loss": 0.15702639520168304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.016703854888497, "step_time": 0.8246927185058595} +{"epoch": 0, "iter": 5862, "iter_tflops": 9.45701578521679, "iter_time": 2.1815648803710936, "loss": 0.2240789234638214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.847293022575323, "step_time": 1.9019577941894532} +{"epoch": 0, "iter": 5863, "iter_tflops": 10.694852892157499, "iter_time": 1.9290675354003908, "loss": 0.20713016390800476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.317564250236583, "step_time": 1.6749328918457032} +{"epoch": 0, "iter": 5864, "iter_tflops": 45.5123235024982, "iter_time": 0.4533078498840332, "loss": 0.2602505385875702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.79988354641039, "step_time": 0.4142799549102783} +{"epoch": 0, "iter": 5865, "iter_tflops": 19.96645511921282, "iter_time": 0.7098539352416993, "loss": 0.346418559551239, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 21.438468524485994, "step_time": 0.66111376953125} +{"epoch": 0, "iter": 5866, "iter_tflops": 21.716280318271235, "iter_time": 0.6526562805175782, "loss": 0.1925434023141861, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 23.393141142105886, "step_time": 0.6058727493286133} +{"epoch": 0, "iter": 5867, "iter_tflops": 22.15475719100811, "iter_time": 0.6397392044067383, "loss": 0.2910292148590088, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 23.809850500824368, "step_time": 0.5952690353393555} +{"epoch": 0, "iter": 5868, "iter_tflops": 22.326430108209358, "iter_time": 0.6348201065063477, "loss": 0.33853834867477417, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 23.91963301927839, "step_time": 0.5925369644165039} +{"epoch": 0, "iter": 5869, "iter_tflops": 30.38235874298025, "iter_time": 0.6790484466552735, "loss": 0.376871794462204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.4892882618932, "step_time": 0.616050521850586} +{"epoch": 0, "iter": 5870, "iter_tflops": 37.34329855572077, "iter_time": 0.5524711074829101, "loss": 0.3668142557144165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.568223166765534, "step_time": 0.5085530471801757} +{"epoch": 0, "iter": 5871, "iter_tflops": 37.653685464884404, "iter_time": 0.547916976928711, "loss": 0.5626105070114136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08806446633157, "step_time": 0.5021188945770264} +{"epoch": 0, "iter": 5872, "iter_tflops": 38.00562898023369, "iter_time": 0.5428431015014649, "loss": 0.4539913237094879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43591821625187, "step_time": 0.49790361595153804} +{"epoch": 0, "iter": 5873, "iter_tflops": 21.184112598495116, "iter_time": 0.973894630432129, "loss": 0.5490525960922241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.471931334208872, "step_time": 0.9180827941894533} +{"epoch": 0, "iter": 5874, "iter_tflops": 8.717298520984317, "iter_time": 2.3666842956542964, "loss": 0.633392870426178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.239058631064562, "step_time": 2.014940460205078} +{"epoch": 0, "iter": 5875, "iter_tflops": 17.696936983698738, "iter_time": 1.1658002471923827, "loss": 0.6746832132339478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.00896065114805, "step_time": 1.0310927124023437} +{"epoch": 0, "iter": 5876, "iter_tflops": 36.42646045149801, "iter_time": 0.5663765640258789, "loss": 0.7053104043006897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66109076620196, "step_time": 0.5201847229003906} +{"epoch": 0, "iter": 5877, "iter_tflops": 10.24258937540903, "iter_time": 1.3877380828857422, "loss": 0.32771503925323486, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 10.980395315188597, "step_time": 1.2944917678833008} +{"epoch": 0, "iter": 5878, "iter_tflops": 13.014623911481488, "iter_time": 1.0921584396362307, "loss": 0.286878377199173, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 17.648400409056485, "step_time": 0.8054005470275879} +{"epoch": 0, "iter": 5879, "iter_tflops": 25.938830269925017, "iter_time": 0.5479827423095702, "loss": 0.4115554392337799, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 27.698825880937772, "step_time": 0.5131636772155762} +{"epoch": 0, "iter": 5880, "iter_tflops": 25.433409849517496, "iter_time": 0.5588724212646484, "loss": 0.2419227659702301, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 27.074520895919882, "step_time": 0.5249965972900391} +{"epoch": 0, "iter": 5881, "iter_tflops": 40.291615935075356, "iter_time": 0.5120443305969238, "loss": 0.3286283016204834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82913327807355, "step_time": 0.470716438293457} +{"epoch": 0, "iter": 5882, "iter_tflops": 38.81927716146438, "iter_time": 0.5314651641845703, "loss": 0.3902474343776703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60114038815727, "step_time": 0.4842850055694581} +{"epoch": 0, "iter": 5883, "iter_tflops": 41.36820978542603, "iter_time": 0.49871854782104486, "loss": 0.3626886308193207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53852545252478, "step_time": 0.45304702568054195} +{"epoch": 0, "iter": 5884, "iter_tflops": 39.15171732986125, "iter_time": 0.5269524536132812, "loss": 0.41429978609085083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.779340170115816, "step_time": 0.4822676887512207} +{"epoch": 0, "iter": 5885, "iter_tflops": 16.74089548110625, "iter_time": 1.232376937866211, "loss": 0.8468362092971802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.006725507258583, "step_time": 1.145743766784668} +{"epoch": 0, "iter": 5886, "iter_tflops": 14.971685070137317, "iter_time": 1.3780074462890626, "loss": 0.9690195918083191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.09085703815382, "step_time": 1.14041548538208} +{"epoch": 0, "iter": 5887, "iter_tflops": 37.41605974841329, "iter_time": 0.551396743774414, "loss": 0.9953939914703369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84540708907695, "step_time": 0.5051019191741943} +{"epoch": 0, "iter": 5888, "iter_tflops": 33.27593761956726, "iter_time": 0.6200003662109375, "loss": 0.6511837244033813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.99926129379884, "step_time": 0.5730976905822753} +{"epoch": 0, "iter": 5889, "iter_tflops": 17.649684123535653, "iter_time": 1.1689214019775391, "loss": 0.22164522111415863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.901986450472755, "step_time": 1.0914775314331053} +{"epoch": 0, "iter": 5890, "iter_tflops": 16.261335308033107, "iter_time": 1.2687207489013672, "loss": 0.13618025183677673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.98373629137866, "step_time": 0.9831944713592529} +{"epoch": 0, "iter": 5891, "iter_tflops": 40.375482623116355, "iter_time": 0.5109807281494141, "loss": 0.20770688354969025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1954690855815, "step_time": 0.46681467437744145} +{"epoch": 0, "iter": 5892, "iter_tflops": 40.22633555514656, "iter_time": 0.5128752899169923, "loss": 0.23082268238067627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6505598810036, "step_time": 0.4726421279907227} +{"epoch": 0, "iter": 5893, "iter_tflops": 18.885179737123078, "iter_time": 1.0924488830566406, "loss": 0.9675326347351074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.192383204701404, "step_time": 1.0217265243530274} +{"epoch": 0, "iter": 5894, "iter_tflops": 22.24264173317651, "iter_time": 0.927546905517578, "loss": 0.7510557174682617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.92466223192785, "step_time": 0.7662526397705078} +{"epoch": 0, "iter": 5895, "iter_tflops": 44.019320459250444, "iter_time": 0.46868268966674803, "loss": 0.5994347929954529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1236230175177, "step_time": 0.43780788040161134} +{"epoch": 0, "iter": 5896, "iter_tflops": 42.2829665186419, "iter_time": 0.48792918777465816, "loss": 0.6427252292633057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.32703381350985, "step_time": 0.4551609001159668} +{"epoch": 0, "iter": 5897, "iter_tflops": 22.71662346416911, "iter_time": 0.9081936645507813, "loss": 0.7961010336875916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.909430878430946, "step_time": 0.8628851776123047} +{"epoch": 0, "iter": 5898, "iter_tflops": 24.10756786523187, "iter_time": 0.8557932357788086, "loss": 0.7084888219833374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.078233415896104, "step_time": 0.7619069232940675} +{"epoch": 0, "iter": 5899, "iter_tflops": 49.22396732254808, "iter_time": 0.4191269950866699, "loss": 0.7720146775245667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.09377082104404, "step_time": 0.3885784187316895} +{"epoch": 0, "iter": 5900, "iter_tflops": 51.10183931942383, "iter_time": 0.4037250671386719, "loss": 0.5983483791351318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.06372799038672, "step_time": 0.37467665672302247} +{"epoch": 0, "iter": 5901, "iter_tflops": 37.80048040620184, "iter_time": 0.44096113204956056, "loss": 0.0029043201357126236, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 41.11705551756504, "step_time": 0.4053924198150634} +{"epoch": 0, "iter": 5902, "iter_tflops": 8.365959160685339, "iter_time": 1.992424575805664, "loss": 0.009917610324919224, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 9.376983963020798, "step_time": 1.7776016998291015} +{"epoch": 0, "iter": 5903, "iter_tflops": 8.858669370818438, "iter_time": 1.881607940673828, "loss": 0.0017065273132175207, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 11.48331005583476, "step_time": 1.4515451164245607} +{"epoch": 0, "iter": 5904, "iter_tflops": 17.19777451575493, "iter_time": 0.969226722717285, "loss": 0.014269300736486912, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 19.45463685353336, "step_time": 0.8567902221679686} +{"epoch": 0, "iter": 5905, "iter_tflops": 20.203854453423688, "iter_time": 0.7439330139160156, "loss": 0.20962218940258026, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 21.50900845678274, "step_time": 0.6987915954589844} +{"epoch": 0, "iter": 5906, "iter_tflops": 11.060139189831753, "iter_time": 1.3589624938964842, "loss": 0.2690923511981964, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 12.874926607516258, "step_time": 1.16740970993042} +{"epoch": 0, "iter": 5907, "iter_tflops": 26.42772820713522, "iter_time": 0.5687327423095704, "loss": 0.38409438729286194, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.276991707289827, "step_time": 0.5315386619567871} +{"epoch": 0, "iter": 5908, "iter_tflops": 27.326407224786337, "iter_time": 0.550028923034668, "loss": 0.3383205235004425, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.10998958454678, "step_time": 0.5163283996582031} +{"epoch": 0, "iter": 5909, "iter_tflops": 29.184627508493275, "iter_time": 0.7069164581298827, "loss": 0.08904290199279785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.95016623277376, "step_time": 0.6665907173156739} +{"epoch": 0, "iter": 5910, "iter_tflops": 20.974308906724055, "iter_time": 0.9836363906860351, "loss": 0.08728359639644623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.786217505906063, "step_time": 0.8000821952819824} +{"epoch": 0, "iter": 5911, "iter_tflops": 52.571631080118486, "iter_time": 0.39243776702880856, "loss": 0.06164206191897392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.44614948908633, "step_time": 0.35913796997070313} +{"epoch": 0, "iter": 5912, "iter_tflops": 52.10170728781298, "iter_time": 0.3959773025512696, "loss": 0.08967499434947968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.819351400128376, "step_time": 0.36309977149963374} +{"epoch": 0, "iter": 5913, "iter_tflops": 39.12477301745901, "iter_time": 0.5273153533935546, "loss": 0.8644393086433411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16965271019511, "step_time": 0.4892402992248535} +{"epoch": 0, "iter": 5914, "iter_tflops": 12.466557281020464, "iter_time": 1.654915069580078, "loss": 0.8433839678764343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.14855188905847, "step_time": 1.2030807991027832} +{"epoch": 0, "iter": 5915, "iter_tflops": 40.16519534715294, "iter_time": 0.5136559982299804, "loss": 0.8521084785461426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.78496420208909, "step_time": 0.47119128417968753} +{"epoch": 0, "iter": 5916, "iter_tflops": 39.291672131481334, "iter_time": 0.5250754776000977, "loss": 0.8246563076972961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84291903003216, "step_time": 0.4815520038604737} +{"epoch": 0, "iter": 5917, "iter_tflops": 20.348965854313953, "iter_time": 1.013864471435547, "loss": 0.6790448427200317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.942842910442263, "step_time": 0.9402197151184083} +{"epoch": 0, "iter": 5918, "iter_tflops": 21.54851454630301, "iter_time": 0.9574253234863281, "loss": 0.6695411801338196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.92025469988315, "step_time": 0.7959448604583741} +{"epoch": 0, "iter": 5919, "iter_tflops": 49.591463648914576, "iter_time": 0.41602106475830075, "loss": 0.5422145128250122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.55442185995683, "step_time": 0.3852360420227051} +{"epoch": 0, "iter": 5920, "iter_tflops": 50.51882938341577, "iter_time": 0.4083842353820801, "loss": 0.6172909140586853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.47864131072988, "step_time": 0.3787005882263183} +{"epoch": 0, "iter": 5921, "iter_tflops": 39.77489587942852, "iter_time": 0.5082677993774415, "loss": 0.06056278944015503, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 43.35323450912966, "step_time": 0.4663158130645752} +{"epoch": 0, "iter": 5922, "iter_tflops": 43.595821492470776, "iter_time": 0.463721019744873, "loss": 0.04778735712170601, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 47.56158077701567, "step_time": 0.425055233001709} +{"epoch": 0, "iter": 5923, "iter_tflops": 53.75342803953512, "iter_time": 0.37609320068359375, "loss": 0.029604917392134666, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 59.06385525912578, "step_time": 0.3422786865234375} +{"epoch": 0, "iter": 5924, "iter_tflops": 60.05056897923451, "iter_time": 0.33665457534790044, "loss": 0.03642978519201279, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 65.4365819481515, "step_time": 0.30894490814208986} +{"epoch": 0, "iter": 5925, "iter_tflops": 44.1833145158685, "iter_time": 0.46694309234619147, "loss": 0.6390058994293213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8037002605917, "step_time": 0.43157942581176756} +{"epoch": 0, "iter": 5926, "iter_tflops": 35.6635975426374, "iter_time": 0.5784916534423828, "loss": 0.7809842824935913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.764641490384264, "step_time": 0.5322142219543456} +{"epoch": 0, "iter": 5927, "iter_tflops": 40.266537047419405, "iter_time": 0.5123632431030274, "loss": 0.8200393319129944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.69449654886807, "step_time": 0.4721668663024902} +{"epoch": 0, "iter": 5928, "iter_tflops": 44.530906469359046, "iter_time": 0.4632983055114746, "loss": 0.9381367564201355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08496571047148, "step_time": 0.42905496978759766} +{"epoch": 0, "iter": 5929, "iter_tflops": 40.49736060105811, "iter_time": 0.5094429168701172, "loss": 0.4509217441082001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96178372405916, "step_time": 0.4692960968017578} +{"epoch": 0, "iter": 5930, "iter_tflops": 27.58988311586706, "iter_time": 0.7477774887084961, "loss": 0.5889412760734558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.70087350723881, "step_time": 0.5196634654998779} +{"epoch": 0, "iter": 5931, "iter_tflops": 48.904063725590724, "iter_time": 0.42186869430541996, "loss": 0.5307272672653198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77810837421924, "step_time": 0.3909024810791016} +{"epoch": 0, "iter": 5932, "iter_tflops": 45.97537501032379, "iter_time": 0.4487422561645509, "loss": 0.557127058506012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.690727160381016, "step_time": 0.4151900100708008} +{"epoch": 0, "iter": 5933, "iter_tflops": 33.10760984273101, "iter_time": 0.6231526107788086, "loss": 0.4669833779335022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.51547474405071, "step_time": 0.5809043426513671} +{"epoch": 0, "iter": 5934, "iter_tflops": 11.147114414595801, "iter_time": 1.8508012695312501, "loss": 0.499393492937088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.253701990744567, "step_time": 1.5566287460327146} +{"epoch": 0, "iter": 5935, "iter_tflops": 11.369607149482317, "iter_time": 1.8145827941894535, "loss": 0.3960703909397125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.142426523045552, "step_time": 1.4588086051940916} +{"epoch": 0, "iter": 5936, "iter_tflops": 33.856151333143515, "iter_time": 0.6093750381469727, "loss": 0.59429931640625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23840430616436, "step_time": 0.4771474304199218} +{"epoch": 0, "iter": 5937, "iter_tflops": 19.61612131455943, "iter_time": 0.8016691284179688, "loss": 0.2694092094898224, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 20.716065623846184, "step_time": 0.7591035461425781} +{"epoch": 0, "iter": 5938, "iter_tflops": 12.129267411331918, "iter_time": 1.2965036010742186, "loss": 0.21281377971172333, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.178533267898878, "step_time": 1.1091160545349121} +{"epoch": 0, "iter": 5939, "iter_tflops": 28.29489997574718, "iter_time": 0.5557764434814452, "loss": 0.3768586814403534, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 30.11494543154709, "step_time": 0.5221871948242188} +{"epoch": 0, "iter": 5940, "iter_tflops": 28.109106838480738, "iter_time": 0.559449966430664, "loss": 0.3013840317726135, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.926826379618294, "step_time": 0.5254696464538574} +{"epoch": 0, "iter": 5941, "iter_tflops": 41.15032589726254, "iter_time": 0.5013591766357423, "loss": 0.09317447245121002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77798182164662, "step_time": 0.4607419242858887} +{"epoch": 0, "iter": 5942, "iter_tflops": 44.91171178819065, "iter_time": 0.4593700103759766, "loss": 0.07465329021215439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11015832795381, "step_time": 0.42009828948974615} +{"epoch": 0, "iter": 5943, "iter_tflops": 45.30496677085073, "iter_time": 0.4553825988769531, "loss": 0.1472778618335724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.850793083199015, "step_time": 0.41385687637329105} +{"epoch": 0, "iter": 5944, "iter_tflops": 52.53203413335914, "iter_time": 0.3927335739135743, "loss": 0.09288973361253738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.344304818701126, "step_time": 0.3597758064270019} +{"epoch": 0, "iter": 5945, "iter_tflops": 30.459300062498674, "iter_time": 0.6773331451416016, "loss": 0.3388502597808838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.278658683446594, "step_time": 0.6391558494567872} +{"epoch": 0, "iter": 5946, "iter_tflops": 7.972068832468101, "iter_time": 2.587922149658203, "loss": 0.2990087866783142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.28051936311189, "step_time": 2.0068143234252926} +{"epoch": 0, "iter": 5947, "iter_tflops": 12.443951037737442, "iter_time": 1.6579214630126955, "loss": 0.2736442983150482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.589687059739617, "step_time": 1.414087459564209} +{"epoch": 0, "iter": 5948, "iter_tflops": 39.51057003615013, "iter_time": 0.5221664352416993, "loss": 0.30940762162208557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41031275734911, "step_time": 0.4752578868865967} +{"epoch": 0, "iter": 5949, "iter_tflops": 12.925710363913456, "iter_time": 1.2102828216552735, "loss": 0.19509173929691315, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.629123922351694, "step_time": 1.1478188400268556} +{"epoch": 0, "iter": 5950, "iter_tflops": 13.159902153824028, "iter_time": 1.1887447967529299, "loss": 0.20776063203811646, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 14.641532683379877, "step_time": 1.0684513397216797} +{"epoch": 0, "iter": 5951, "iter_tflops": 28.446271934793675, "iter_time": 0.549940788269043, "loss": 0.4231676459312439, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 30.27567134437122, "step_time": 0.5167107620239259} +{"epoch": 0, "iter": 5952, "iter_tflops": 27.920552728005678, "iter_time": 0.5602956848144531, "loss": 0.16447174549102783, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.69763222657372, "step_time": 0.5267680969238282} +{"epoch": 0, "iter": 5953, "iter_tflops": 19.888264289766273, "iter_time": 1.037350128173828, "loss": 0.29913145303726196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.682759855971664, "step_time": 0.9975019607543943} +{"epoch": 0, "iter": 5954, "iter_tflops": 12.729743863687187, "iter_time": 1.6206998138427733, "loss": 0.25697752833366394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.204857600391666, "step_time": 1.1991435203552248} +{"epoch": 0, "iter": 5955, "iter_tflops": 49.81856740067388, "iter_time": 0.414124584197998, "loss": 0.3143434226512909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.032566599192286, "step_time": 0.381827013015747} +{"epoch": 0, "iter": 5956, "iter_tflops": 47.758443360412116, "iter_time": 0.4319883995056153, "loss": 0.29429560899734497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75794746383944, "step_time": 0.3986072578430176} +{"epoch": 0, "iter": 5957, "iter_tflops": 29.69338360826829, "iter_time": 0.6948043975830078, "loss": 0.48207300901412964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.509293007117357, "step_time": 0.6547621841430664} +{"epoch": 0, "iter": 5958, "iter_tflops": 17.574429897643764, "iter_time": 1.1739267578124999, "loss": 0.32153069972991943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.327523341611556, "step_time": 1.0674463119506836} +{"epoch": 0, "iter": 5959, "iter_tflops": 48.15807788742617, "iter_time": 0.4284035911560059, "loss": 0.3385551869869232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51064611624532, "step_time": 0.3928935375213623} +{"epoch": 0, "iter": 5960, "iter_tflops": 49.1273938740535, "iter_time": 0.4199509048461914, "loss": 0.5054206848144531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.35583818975518, "step_time": 0.38666984176635744} +{"epoch": 0, "iter": 5961, "iter_tflops": 30.216742552108897, "iter_time": 0.6827702713012695, "loss": 0.7841815948486328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.002900170285855, "step_time": 0.6446632461547852} +{"epoch": 0, "iter": 5962, "iter_tflops": 13.724661207341166, "iter_time": 1.503213317871094, "loss": 0.8287115097045898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.721316122054226, "step_time": 1.3123006591796875} +{"epoch": 0, "iter": 5963, "iter_tflops": 15.11925979777944, "iter_time": 1.364557113647461, "loss": 0.8454737663269043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.92610371931198, "step_time": 1.1508966941833498} +{"epoch": 0, "iter": 5964, "iter_tflops": 41.52884745300926, "iter_time": 0.49678945541381836, "loss": 0.9116387963294983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68634318437125, "step_time": 0.4616867713928222} +{"epoch": 0, "iter": 5965, "iter_tflops": 22.268262073166238, "iter_time": 0.8205886993408202, "loss": 0.2646923363208771, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 23.455751649366338, "step_time": 0.779044921875} +{"epoch": 0, "iter": 5966, "iter_tflops": 25.016644519465874, "iter_time": 0.7304370574951171, "loss": 0.25370901823043823, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 27.738002474558794, "step_time": 0.6587743377685547} +{"epoch": 0, "iter": 5967, "iter_tflops": 33.3893867455373, "iter_time": 0.547272232055664, "loss": 0.11612541973590851, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 35.47345921087315, "step_time": 0.5151198844909668} +{"epoch": 0, "iter": 5968, "iter_tflops": 33.56430117643454, "iter_time": 0.5444202194213867, "loss": 0.22672978043556213, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 35.82501212011111, "step_time": 0.5100649833679198} +{"epoch": 0, "iter": 5969, "iter_tflops": 35.32108151091161, "iter_time": 0.5841014099121093, "loss": 0.0029904362745583057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.04955856324286, "step_time": 0.5422163696289063} +{"epoch": 0, "iter": 5970, "iter_tflops": 27.077881536469803, "iter_time": 0.7619168243408204, "loss": 0.00635959068313241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.87668647970884, "step_time": 0.6681770572662353} +{"epoch": 0, "iter": 5971, "iter_tflops": 53.87336449195165, "iter_time": 0.3829553565979004, "loss": 0.0037484411150217056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.96105614338687, "step_time": 0.34991051483154295} +{"epoch": 0, "iter": 5972, "iter_tflops": 58.30804403175419, "iter_time": 0.3538292846679687, "loss": 0.0076564010232687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.00128564414275, "step_time": 0.3223543605804443} +{"epoch": 0, "iter": 5973, "iter_tflops": 22.8046848713406, "iter_time": 0.9046866302490233, "loss": 0.46470844745635986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.877133859936244, "step_time": 0.8640523452758788} +{"epoch": 0, "iter": 5974, "iter_tflops": 14.353956046506571, "iter_time": 1.4373106231689452, "loss": 0.405807763338089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.155796284181836, "step_time": 1.2025727729797364} +{"epoch": 0, "iter": 5975, "iter_tflops": 42.048740549088954, "iter_time": 0.4906471214294434, "loss": 0.5562554597854614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87197295133274, "step_time": 0.4497537860870361} +{"epoch": 0, "iter": 5976, "iter_tflops": 42.20125641501195, "iter_time": 0.48887391662597657, "loss": 0.5056775808334351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82795158750165, "step_time": 0.45018581008911135} +{"epoch": 0, "iter": 5977, "iter_tflops": 18.908611502577322, "iter_time": 1.0910951080322266, "loss": 0.06852634251117706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.2730670503409, "step_time": 1.0176602020263672} +{"epoch": 0, "iter": 5978, "iter_tflops": 20.46072989765072, "iter_time": 1.0083263702392578, "loss": 0.09385746717453003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.477020689017724, "step_time": 0.8097922344207763} +{"epoch": 0, "iter": 5979, "iter_tflops": 50.69385986243508, "iter_time": 0.40697420883178714, "loss": 0.103549063205719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.74395624689341, "step_time": 0.37010457992553714} +{"epoch": 0, "iter": 5980, "iter_tflops": 49.588828185574705, "iter_time": 0.4160431747436523, "loss": 0.06985471397638321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.01600298096498, "step_time": 0.3819440975189209} +{"epoch": 0, "iter": 5981, "iter_tflops": 21.529833128358863, "iter_time": 0.9582560806274414, "loss": 0.05717242881655693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.51575507504684, "step_time": 0.9162958755493165} +{"epoch": 0, "iter": 5982, "iter_tflops": 13.956939137554285, "iter_time": 1.4781961364746092, "loss": 0.06249939277768135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.327018723417066, "step_time": 0.9673688468933105} +{"epoch": 0, "iter": 5983, "iter_tflops": 51.90848093939614, "iter_time": 0.3974513053894043, "loss": 0.04280107095837593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.78374747468043, "step_time": 0.3633274383544922} +{"epoch": 0, "iter": 5984, "iter_tflops": 59.151904618636166, "iter_time": 0.3487815589904785, "loss": 0.06674735248088837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.49451108699323, "step_time": 0.3198891372680664} +{"epoch": 0, "iter": 5985, "iter_tflops": 35.32508191713535, "iter_time": 0.5840352630615234, "loss": 0.17018738389015198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.90070302185141, "step_time": 0.5443459320068359} +{"epoch": 0, "iter": 5986, "iter_tflops": 14.596716228255383, "iter_time": 1.413406494140625, "loss": 0.13864880800247192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.927528349123527, "step_time": 1.2187894821166991} +{"epoch": 0, "iter": 5987, "iter_tflops": 35.461124051424314, "iter_time": 0.5817946853637695, "loss": 0.14811000227928162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.116018541288796, "step_time": 0.4676553821563721} +{"epoch": 0, "iter": 5988, "iter_tflops": 47.00963412451599, "iter_time": 0.4388694763183594, "loss": 0.15612035989761353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15539959950066, "step_time": 0.4033023624420166} +{"epoch": 0, "iter": 5989, "iter_tflops": 40.96596048229642, "iter_time": 0.503615520477295, "loss": 1.0898548364639282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.485332497926684, "step_time": 0.4637729415893555} +{"epoch": 0, "iter": 5990, "iter_tflops": 40.06883268220503, "iter_time": 0.5148913040161134, "loss": 0.6498734354972839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.746905942401405, "step_time": 0.4716012039184571} +{"epoch": 0, "iter": 5991, "iter_tflops": 38.16645569168535, "iter_time": 0.5405556564331054, "loss": 0.9489245414733887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58936606698015, "step_time": 0.508288143157959} +{"epoch": 0, "iter": 5992, "iter_tflops": 46.90233321965901, "iter_time": 0.4398735008239746, "loss": 0.8080107569694519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69118765007178, "step_time": 0.40699566268920895} +{"epoch": 0, "iter": 5993, "iter_tflops": 42.577707665586814, "iter_time": 0.4845515327453613, "loss": 1.1457613706588745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.397362117505494, "step_time": 0.44466091537475577} +{"epoch": 0, "iter": 5994, "iter_tflops": 39.248783592230865, "iter_time": 0.5256492462158203, "loss": 0.674468457698822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.878097243896455, "step_time": 0.4811569271087646} +{"epoch": 0, "iter": 5995, "iter_tflops": 44.58990386775746, "iter_time": 0.4626853103637696, "loss": 1.2164881229400635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39609060497202, "step_time": 0.4262966957092285} +{"epoch": 0, "iter": 5996, "iter_tflops": 48.19212675185602, "iter_time": 0.4281009140014648, "loss": 0.9690003395080566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00907600439691, "step_time": 0.3966825618743896} +{"epoch": 0, "iter": 5997, "iter_tflops": 24.208057481323387, "iter_time": 0.8522407684326171, "loss": 0.18146389722824097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.335526007186356, "step_time": 0.8143147888183594} +{"epoch": 0, "iter": 5998, "iter_tflops": 14.42904050794155, "iter_time": 1.4298312835693359, "loss": 0.16462738811969757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.40530194438737, "step_time": 1.0110653381347656} +{"epoch": 0, "iter": 5999, "iter_tflops": 40.037719841770794, "iter_time": 0.5152914199829102, "loss": 0.1248699203133583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11107663332855, "step_time": 0.46770777511596684} +{"epoch": 0, "iter": 6000, "iter_tflops": 4.316035432497419, "iter_time": 4.780102905273438, "loss": 0.1479666829109192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.349865451889168, "step_time": 4.742926818847655} +{"epoch": 0, "iter": 6001, "iter_tflops": 9.013394800461668, "iter_time": 2.2889370727539062, "loss": 0.24206368625164032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.307580880442988, "step_time": 2.216590301513672} +{"epoch": 0, "iter": 6002, "iter_tflops": 19.256177299461356, "iter_time": 1.0714013061523437, "loss": 0.40453025698661804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.28355645326059, "step_time": 0.9258438415527344} +{"epoch": 0, "iter": 6003, "iter_tflops": 23.356156376473166, "iter_time": 0.8833257141113281, "loss": 0.23061810433864594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.486839603511736, "step_time": 0.7505807800292968} +{"epoch": 0, "iter": 6004, "iter_tflops": 25.225609156522907, "iter_time": 0.8178630447387696, "loss": 0.24664337933063507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.699714773292868, "step_time": 0.6946562843322753} +{"epoch": 0, "iter": 6005, "iter_tflops": 10.498884665973458, "iter_time": 1.9650747833251954, "loss": 0.648706316947937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.818872183564922, "step_time": 1.9069541778564452} +{"epoch": 0, "iter": 6006, "iter_tflops": 6.812368690888903, "iter_time": 3.0284757690429682, "loss": 0.6191710233688354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.555515851236217, "step_time": 2.1590768966674805} +{"epoch": 0, "iter": 6007, "iter_tflops": 20.374600525575044, "iter_time": 1.0125888595581054, "loss": 0.7333884835243225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.51562840138926, "step_time": 0.8773354110717774} +{"epoch": 0, "iter": 6008, "iter_tflops": 20.65241936426446, "iter_time": 0.9989673919677734, "loss": 0.7128118872642517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.47811233441757, "step_time": 0.8787373199462891} +{"epoch": 0, "iter": 6009, "iter_tflops": 4.578412262731684, "iter_time": 3.202584777832031, "loss": 0.13962393999099731, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 5.273840080102352, "step_time": 2.7802802505493163} +{"epoch": 0, "iter": 6010, "iter_tflops": 16.91573072742892, "iter_time": 0.8668117065429688, "loss": 0.2830577790737152, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.18724642323304, "step_time": 0.7641926879882813} +{"epoch": 0, "iter": 6011, "iter_tflops": 16.411487066427895, "iter_time": 0.8934445343017579, "loss": 0.31959062814712524, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 18.852967388123226, "step_time": 0.7777424697875978} +{"epoch": 0, "iter": 6012, "iter_tflops": 19.499727941718753, "iter_time": 0.7519465637207031, "loss": 0.20525871217250824, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 22.176071597844455, "step_time": 0.6611970634460449} +{"epoch": 0, "iter": 6013, "iter_tflops": 23.3528170114459, "iter_time": 0.8834520263671876, "loss": 0.0018549824599176645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.552246184522886, "step_time": 0.8402935256958007} +{"epoch": 0, "iter": 6014, "iter_tflops": 19.054461279418426, "iter_time": 1.0827434692382814, "loss": 0.002701100194826722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.083403291133905, "step_time": 0.934235237121582} +{"epoch": 0, "iter": 6015, "iter_tflops": 28.041645627799692, "iter_time": 0.7357304840087889, "loss": 0.009031614288687706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.847258865837443, "step_time": 0.6688144836425781} +{"epoch": 0, "iter": 6016, "iter_tflops": 27.767525237947467, "iter_time": 0.7429935989379883, "loss": 0.0131392115727067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.864509486260857, "step_time": 0.6908231163024903} +{"epoch": 0, "iter": 6017, "iter_tflops": 20.690530797896315, "iter_time": 0.9971273193359375, "loss": 0.11196240037679672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.79357848660551, "step_time": 0.867086618423462} +{"epoch": 0, "iter": 6018, "iter_tflops": 31.017669692301734, "iter_time": 0.665140022277832, "loss": 0.05811304226517677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.80970234873284, "step_time": 0.6288107490539552} +{"epoch": 0, "iter": 6019, "iter_tflops": 21.856101555994375, "iter_time": 0.9439512100219727, "loss": 0.07707730680704117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.5640876414617, "step_time": 0.8398884506225587} +{"epoch": 0, "iter": 6020, "iter_tflops": 23.343898836670128, "iter_time": 0.883789535522461, "loss": 0.05976669862866402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.566963043127505, "step_time": 0.7765695114135742} +{"epoch": 0, "iter": 6021, "iter_tflops": 22.69475036993332, "iter_time": 0.9090689773559569, "loss": 0.9062767624855042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.46446453090382, "step_time": 0.7795772132873535} +{"epoch": 0, "iter": 6022, "iter_tflops": 30.572342722459254, "iter_time": 0.6748286743164063, "loss": 0.7536319494247437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.92880994217322, "step_time": 0.626536262512207} +{"epoch": 0, "iter": 6023, "iter_tflops": 26.983274679869023, "iter_time": 0.7645882034301759, "loss": 0.8962713479995728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.932701142710382, "step_time": 0.7130718078613282} +{"epoch": 0, "iter": 6024, "iter_tflops": 22.03772487601162, "iter_time": 0.9361716613769532, "loss": 0.7823282480239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.525630656922512, "step_time": 0.8082500991821289} +{"epoch": 0, "iter": 6025, "iter_tflops": 11.208138901681913, "iter_time": 1.8407242889404296, "loss": 0.10042431950569153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.06161916290404, "step_time": 1.7104746246337892} +{"epoch": 0, "iter": 6026, "iter_tflops": 35.78066843657136, "iter_time": 0.5765988845825195, "loss": 0.17021115124225616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.68790527904683, "step_time": 0.5332698516845703} +{"epoch": 0, "iter": 6027, "iter_tflops": 36.837954627779084, "iter_time": 0.5600499191284181, "loss": 0.18633319437503815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.374404770947315, "step_time": 0.5109943695068359} +{"epoch": 0, "iter": 6028, "iter_tflops": 36.65755837494373, "iter_time": 0.5628059921264649, "loss": 0.11888749897480011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.207143652468375, "step_time": 0.5262075119018554} +{"epoch": 0, "iter": 6029, "iter_tflops": 21.485517728294685, "iter_time": 0.960232551574707, "loss": 0.8116323351860046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.992332095373737, "step_time": 0.8973032150268554} +{"epoch": 0, "iter": 6030, "iter_tflops": 26.1240153308211, "iter_time": 0.7897366943359375, "loss": 0.8870782852172852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.23251784796675, "step_time": 0.64007080078125} +{"epoch": 0, "iter": 6031, "iter_tflops": 31.56329349477488, "iter_time": 0.653641975402832, "loss": 0.9125537872314453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.89658133880575, "step_time": 0.608648208618164} +{"epoch": 0, "iter": 6032, "iter_tflops": 28.47822835628789, "iter_time": 0.7244514389038087, "loss": 0.8550518155097961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.33442934990357, "step_time": 0.6380534286499023} +{"epoch": 0, "iter": 6033, "iter_tflops": 11.369048737645894, "iter_time": 1.8146719207763673, "loss": 0.07364942133426666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.097987701782149, "step_time": 1.7053326568603515} +{"epoch": 0, "iter": 6034, "iter_tflops": 34.241178511208695, "iter_time": 0.6025228805541992, "loss": 0.06838447600603104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.005514001501254, "step_time": 0.5031297378540038} +{"epoch": 0, "iter": 6035, "iter_tflops": 37.69065051529735, "iter_time": 0.5473796081542969, "loss": 0.11365261673927307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.24752904030161, "step_time": 0.5126052207946777} +{"epoch": 0, "iter": 6036, "iter_tflops": 37.81311044784562, "iter_time": 0.5456068878173828, "loss": 0.11855445057153702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.637859870211074, "step_time": 0.5076815948486327} +{"epoch": 0, "iter": 6037, "iter_tflops": 9.024624219751534, "iter_time": 2.2860889282226564, "loss": 0.10470645129680634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.223231998114626, "step_time": 2.236861602783203} +{"epoch": 0, "iter": 6038, "iter_tflops": 21.05798605064735, "iter_time": 0.9797277603149415, "loss": 0.06942193955183029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.935522059056336, "step_time": 0.8619445800781249} +{"epoch": 0, "iter": 6039, "iter_tflops": 36.974806546716295, "iter_time": 0.5579770507812499, "loss": 0.12112272530794144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27841942364154, "step_time": 0.4998033790588379} +{"epoch": 0, "iter": 6040, "iter_tflops": 29.09678724211737, "iter_time": 0.7090505676269532, "loss": 0.09906353801488876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.437146832029786, "step_time": 0.656264820098877} +{"epoch": 0, "iter": 6041, "iter_tflops": 6.796195207586141, "iter_time": 2.4828380432128903, "loss": 0.04251876845955849, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 7.249698256866038, "step_time": 2.32752473449707} +{"epoch": 0, "iter": 6042, "iter_tflops": 16.23964524581259, "iter_time": 1.039052993774414, "loss": 0.03958992660045624, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.73220359793435, "step_time": 0.9007937545776367} +{"epoch": 0, "iter": 6043, "iter_tflops": 22.991560005674803, "iter_time": 0.733915054321289, "loss": 0.0712493434548378, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 27.31647129965408, "step_time": 0.6177171211242676} +{"epoch": 0, "iter": 6044, "iter_tflops": 29.4785582721526, "iter_time": 0.5724110336303712, "loss": 0.04679454118013382, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.042401506559685, "step_time": 0.5266100921630859} +{"epoch": 0, "iter": 6045, "iter_tflops": 4.125549710300876, "iter_time": 5.000810791015625, "loss": 0.23584191501140594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.237251820456608, "step_time": 4.868979797363281} +{"epoch": 0, "iter": 6046, "iter_tflops": 21.754264289996122, "iter_time": 0.9483700866699218, "loss": 0.29597166180610657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.642040405886164, "step_time": 0.6960078735351563} +{"epoch": 0, "iter": 6047, "iter_tflops": 25.431868144570213, "iter_time": 0.8112299652099609, "loss": 0.17894333600997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.699215396989597, "step_time": 0.7188730850219727} +{"epoch": 0, "iter": 6048, "iter_tflops": 24.67925277799702, "iter_time": 0.8359691314697266, "loss": 0.20470470190048218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.183333168956835, "step_time": 0.7320317077636718} +{"epoch": 0, "iter": 6049, "iter_tflops": 6.356804590493726, "iter_time": 2.47382763671875, "loss": 0.43447163701057434, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 7.217256463619619, "step_time": 2.178894287109375} +{"epoch": 0, "iter": 6050, "iter_tflops": 15.453050958325866, "iter_time": 1.0176397476196288, "loss": 0.2748754024505615, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.99261533896109, "step_time": 0.8740051727294922} +{"epoch": 0, "iter": 6051, "iter_tflops": 18.891742898254126, "iter_time": 0.8324080505371094, "loss": 0.29311466217041016, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.7728377511807, "step_time": 0.7222594985961913} +{"epoch": 0, "iter": 6052, "iter_tflops": 27.978174361398775, "iter_time": 0.5620680847167968, "loss": 0.4305437505245209, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.766807605104173, "step_time": 0.5282944374084473} +{"epoch": 0, "iter": 6053, "iter_tflops": 36.8112323722503, "iter_time": 0.5604564743041992, "loss": 0.0745060071349144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71225035626539, "step_time": 0.519514591217041} +{"epoch": 0, "iter": 6054, "iter_tflops": 30.377510859621598, "iter_time": 0.6791568145751954, "loss": 0.0837148129940033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.205827439951406, "step_time": 0.6031455764770507} +{"epoch": 0, "iter": 6055, "iter_tflops": 41.177668373113804, "iter_time": 0.5010262680053711, "loss": 0.08910040557384491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.364684147845, "step_time": 0.45478314018249516} +{"epoch": 0, "iter": 6056, "iter_tflops": 45.6358019966533, "iter_time": 0.4520813179016113, "loss": 0.07998570054769516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.10962948015173, "step_time": 0.41171913909912106} +{"epoch": 0, "iter": 6057, "iter_tflops": 11.678834329694945, "iter_time": 1.3745654602050785, "loss": 0.2164578139781952, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 12.318542058168028, "step_time": 1.303183624267578} +{"epoch": 0, "iter": 6058, "iter_tflops": 15.385451741543875, "iter_time": 1.0434092254638672, "loss": 0.23897956311702728, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 18.27436459388551, "step_time": 0.878461311340332} +{"epoch": 0, "iter": 6059, "iter_tflops": 28.835277972244864, "iter_time": 0.5567250747680664, "loss": 0.372097373008728, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.752727088886896, "step_time": 0.5220129661560059} +{"epoch": 0, "iter": 6060, "iter_tflops": 28.684476958793987, "iter_time": 0.5596519088745117, "loss": 0.40623170137405396, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.536378983170906, "step_time": 0.5257113914489746} +{"epoch": 0, "iter": 6061, "iter_tflops": 31.195310841952704, "iter_time": 0.6613523941040038, "loss": 0.08698812127113342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.2618373541982, "step_time": 0.6202631950378418} +{"epoch": 0, "iter": 6062, "iter_tflops": 18.44919295972623, "iter_time": 1.118265365600586, "loss": 0.09247025102376938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.904413865035874, "step_time": 0.9418692340850829} +{"epoch": 0, "iter": 6063, "iter_tflops": 41.50379386818038, "iter_time": 0.49708934020996093, "loss": 0.12906597554683685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59654124264589, "step_time": 0.4524705810546875} +{"epoch": 0, "iter": 6064, "iter_tflops": 40.259741810274825, "iter_time": 0.512449722290039, "loss": 0.05509312450885773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.801138393648486, "step_time": 0.4710172901153565} +{"epoch": 0, "iter": 6065, "iter_tflops": 24.205890098259367, "iter_time": 0.8523170776367187, "loss": 0.7688820362091064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.233731455303666, "step_time": 0.7864338150024414} +{"epoch": 0, "iter": 6066, "iter_tflops": 41.211259647773765, "iter_time": 0.5006178817749023, "loss": 0.6737642288208008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.67401014669282, "step_time": 0.4618142280578613} +{"epoch": 0, "iter": 6067, "iter_tflops": 45.97271868924707, "iter_time": 0.4487681846618652, "loss": 0.6314294338226318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.302763297583084, "step_time": 0.4184571437835693} +{"epoch": 0, "iter": 6068, "iter_tflops": 52.318674166299296, "iter_time": 0.3943351745605469, "loss": 0.6547502875328064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.23715610877866, "step_time": 0.36685876274108886} +{"epoch": 0, "iter": 6069, "iter_tflops": 21.64105076470572, "iter_time": 0.953331413269043, "loss": 0.23522572219371796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.493332861324877, "step_time": 0.9172092742919921} +{"epoch": 0, "iter": 6070, "iter_tflops": 15.053307618041906, "iter_time": 1.3705355682373046, "loss": 0.2043292075395584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.14316038790146, "step_time": 1.1371278800964355} +{"epoch": 0, "iter": 6071, "iter_tflops": 42.7404012582642, "iter_time": 0.48270706176757805, "loss": 0.3184853196144104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.7465150823814, "step_time": 0.44133971214294426} +{"epoch": 0, "iter": 6072, "iter_tflops": 46.26085976564648, "iter_time": 0.44597298049926754, "loss": 0.40978729724884033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.747143345325334, "step_time": 0.4065468940734864} +{"epoch": 0, "iter": 6073, "iter_tflops": 40.51081664184656, "iter_time": 0.5092737007141114, "loss": 0.11931340396404266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82332740519825, "step_time": 0.46027581405639645} +{"epoch": 0, "iter": 6074, "iter_tflops": 12.65757042944914, "iter_time": 1.6299410400390622, "loss": 0.19307196140289307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.81449256071564, "step_time": 1.392629104614258} +{"epoch": 0, "iter": 6075, "iter_tflops": 31.338394463952415, "iter_time": 0.6583328170776367, "loss": 0.14310744404792786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.550677218786575, "step_time": 0.5494200115203858} +{"epoch": 0, "iter": 6076, "iter_tflops": 39.93724089944901, "iter_time": 0.5165878524780274, "loss": 0.14539086818695068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.652048768125525, "step_time": 0.4726260070800781} +{"epoch": 0, "iter": 6077, "iter_tflops": 17.495075014184916, "iter_time": 0.8054696350097656, "loss": 0.22262009978294373, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 18.655156472477668, "step_time": 0.7553810501098633} +{"epoch": 0, "iter": 6078, "iter_tflops": 25.081348937866778, "iter_time": 0.5618418579101563, "loss": 0.2521582841873169, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 26.775519447906596, "step_time": 0.5262923736572266} +{"epoch": 0, "iter": 6079, "iter_tflops": 26.592911600951073, "iter_time": 0.5299063110351563, "loss": 0.1284266710281372, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 28.282374732503598, "step_time": 0.498252067565918} +{"epoch": 0, "iter": 6080, "iter_tflops": 25.39088528858856, "iter_time": 0.5549925308227539, "loss": 0.5161073207855225, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 27.064480477338083, "step_time": 0.5206732749938965} +{"epoch": 0, "iter": 6081, "iter_tflops": 25.275687104436585, "iter_time": 0.8162426376342774, "loss": 0.02248309925198555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.67678268273306, "step_time": 0.7733726272583008} +{"epoch": 0, "iter": 6082, "iter_tflops": 15.396452925548932, "iter_time": 1.3399900360107422, "loss": 0.001972566358745098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.582000613995287, "step_time": 1.1102729969024658} +{"epoch": 0, "iter": 6083, "iter_tflops": 43.50614545858245, "iter_time": 0.4742110176086426, "loss": 0.0027500493451952934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.170413297114635, "step_time": 0.4282938861846924} +{"epoch": 0, "iter": 6084, "iter_tflops": 50.33502777318062, "iter_time": 0.4098754768371582, "loss": 0.007479298859834671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.75517041512299, "step_time": 0.37003013992309575} +{"epoch": 0, "iter": 6085, "iter_tflops": 19.149083426411224, "iter_time": 1.0773932647705078, "loss": 0.8494625091552734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.21498971160461, "step_time": 1.0205839233398437} +{"epoch": 0, "iter": 6086, "iter_tflops": 16.875715146417228, "iter_time": 1.222531509399414, "loss": 0.9932926893234253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.443686659939466, "step_time": 0.9192381725311278} +{"epoch": 0, "iter": 6087, "iter_tflops": 37.65778229430244, "iter_time": 0.5478573684692383, "loss": 1.0129196643829346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.921927958019765, "step_time": 0.5041574172973633} +{"epoch": 0, "iter": 6088, "iter_tflops": 39.94383683358964, "iter_time": 0.5165025482177734, "loss": 0.6375569105148315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.115789316146866, "step_time": 0.47850436782836914} +{"epoch": 0, "iter": 6089, "iter_tflops": 19.81819085135766, "iter_time": 1.0410180053710938, "loss": 0.03445069491863251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.648462746023327, "step_time": 0.953005012512207} +{"epoch": 0, "iter": 6090, "iter_tflops": 17.354291315897242, "iter_time": 1.1888179779052734, "loss": 0.02857864834368229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.766935540155078, "step_time": 0.9061866703033447} +{"epoch": 0, "iter": 6091, "iter_tflops": 51.50407772197948, "iter_time": 0.4005720405578614, "loss": 0.03389521688222885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.1480421655273, "step_time": 0.3674410133361817} +{"epoch": 0, "iter": 6092, "iter_tflops": 54.831889514581135, "iter_time": 0.37626085281372074, "loss": 0.05170983448624611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.91410526554414, "step_time": 0.3443445148468018} +{"epoch": 0, "iter": 6093, "iter_tflops": 41.76520325099235, "iter_time": 0.4939780464172363, "loss": 0.7288482785224915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.634853375633135, "step_time": 0.45209071540832524} +{"epoch": 0, "iter": 6094, "iter_tflops": 42.05780510056888, "iter_time": 0.490541374206543, "loss": 0.7556790709495544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08612462154218, "step_time": 0.4476638832092285} +{"epoch": 0, "iter": 6095, "iter_tflops": 49.67609766525838, "iter_time": 0.4153122825622559, "loss": 0.7370675206184387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.8345159106567, "step_time": 0.38323170852661137} +{"epoch": 0, "iter": 6096, "iter_tflops": 45.43473128242555, "iter_time": 0.45408199691772466, "loss": 0.6876299977302551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.98609690873288, "step_time": 0.421162223815918} +{"epoch": 0, "iter": 6097, "iter_tflops": 35.75306217533716, "iter_time": 0.5770440979003906, "loss": 0.8272121548652649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.35425546144951, "step_time": 0.5379088516235352} +{"epoch": 0, "iter": 6098, "iter_tflops": 33.65793312266906, "iter_time": 0.612963768005371, "loss": 0.8805961608886719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80466855243915, "step_time": 0.4935117111206054} +{"epoch": 0, "iter": 6099, "iter_tflops": 48.547637649481715, "iter_time": 0.42496596145629884, "loss": 0.816410481929779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53106339340217, "step_time": 0.39274083137512206} +{"epoch": 0, "iter": 6100, "iter_tflops": 52.88418304440728, "iter_time": 0.39011841201782227, "loss": 0.7348564267158508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.319157775810496, "step_time": 0.3599336471557617} +{"epoch": 0, "iter": 6101, "iter_tflops": 36.27106178216411, "iter_time": 0.5688031311035157, "loss": 0.12627801299095154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.90513881131814, "step_time": 0.5302922477722168} +{"epoch": 0, "iter": 6102, "iter_tflops": 18.891019497165626, "iter_time": 1.0921111755371096, "loss": 0.19479945302009583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.66329847484194, "step_time": 0.9103305740356444} +{"epoch": 0, "iter": 6103, "iter_tflops": 39.84462052954335, "iter_time": 0.5177886810302735, "loss": 0.22058726847171783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81563107244718, "step_time": 0.47086149406433103} +{"epoch": 0, "iter": 6104, "iter_tflops": 38.63592068741836, "iter_time": 0.5339873657226563, "loss": 0.3119578957557678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8032708677892, "step_time": 0.49352821159362786} +{"epoch": 0, "iter": 6105, "iter_tflops": 19.734879846240414, "iter_time": 1.0454126739501952, "loss": 0.17478173971176147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.081075526422936, "step_time": 0.9786546936035156} +{"epoch": 0, "iter": 6106, "iter_tflops": 22.69352821209506, "iter_time": 0.9091179351806641, "loss": 0.20465689897537231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.41670430218786, "step_time": 0.6566918449401855} +{"epoch": 0, "iter": 6107, "iter_tflops": 50.06230880208963, "iter_time": 0.41210831069946285, "loss": 0.1947477012872696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.80173660771035, "step_time": 0.37646787834167483} +{"epoch": 0, "iter": 6108, "iter_tflops": 48.931219825824584, "iter_time": 0.42163456344604494, "loss": 0.23556260764598846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9017587342478, "step_time": 0.38998880195617674} +{"epoch": 0, "iter": 6109, "iter_tflops": 31.40064152671359, "iter_time": 0.6570277709960938, "loss": 0.08367148786783218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.28729278144077, "step_time": 0.6197888679504394} +{"epoch": 0, "iter": 6110, "iter_tflops": 14.153863041218035, "iter_time": 1.457629867553711, "loss": 0.03899325057864189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.777499966413437, "step_time": 1.2296881866455078} +{"epoch": 0, "iter": 6111, "iter_tflops": 39.599150785150535, "iter_time": 0.5209983825683594, "loss": 0.04542985185980797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73746184319989, "step_time": 0.4717030353546143} +{"epoch": 0, "iter": 6112, "iter_tflops": 40.386803848012185, "iter_time": 0.5108374900817871, "loss": 0.028030600398778915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12709737408054, "step_time": 0.4675379695892334} +{"epoch": 0, "iter": 6113, "iter_tflops": 20.25170393642683, "iter_time": 1.0187337112426758, "loss": 0.46345800161361694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.258576886746944, "step_time": 0.970483283996582} +{"epoch": 0, "iter": 6114, "iter_tflops": 12.773334804959342, "iter_time": 1.615168930053711, "loss": 0.3042658567428589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.33783958563915, "step_time": 1.1899460372924806} +{"epoch": 0, "iter": 6115, "iter_tflops": 40.16820531120251, "iter_time": 0.5136175079345704, "loss": 0.4251784086227417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.88147166916967, "step_time": 0.47015500450134273} +{"epoch": 0, "iter": 6116, "iter_tflops": 35.68590827786489, "iter_time": 0.578129981994629, "loss": 0.4930489957332611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.0010796969838, "step_time": 0.5289877529144287} +{"epoch": 0, "iter": 6117, "iter_tflops": 35.33700407018927, "iter_time": 0.5838382186889649, "loss": 0.5467509627342224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76059503713748, "step_time": 0.5322697830200195} +{"epoch": 0, "iter": 6118, "iter_tflops": 45.40594319281808, "iter_time": 0.4543698921203614, "loss": 0.4950122535228729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.869217878784156, "step_time": 0.4137039718627929} +{"epoch": 0, "iter": 6119, "iter_tflops": 51.85859261242131, "iter_time": 0.3978336563110352, "loss": 0.7968464493751526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.66188149595971, "step_time": 0.3641088676452637} +{"epoch": 0, "iter": 6120, "iter_tflops": 48.98391136985713, "iter_time": 0.4211810150146485, "loss": 0.5794697999954224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67316707853577, "step_time": 0.39168128013610837} +{"epoch": 0, "iter": 6121, "iter_tflops": 45.39068662748953, "iter_time": 0.4371686897277832, "loss": 0.0817001685500145, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 49.768641135532796, "step_time": 0.3987126541137695} +{"epoch": 0, "iter": 6122, "iter_tflops": 48.42929073277888, "iter_time": 0.4097393684387207, "loss": 0.04506632313132286, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 53.01116448853526, "step_time": 0.37432467651367185} +{"epoch": 0, "iter": 6123, "iter_tflops": 48.81332190072109, "iter_time": 0.4065158081054687, "loss": 0.06336479634046555, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 52.81807194944203, "step_time": 0.3756931343078614} +{"epoch": 0, "iter": 6124, "iter_tflops": 48.17217207913785, "iter_time": 0.41192634963989255, "loss": 0.08947156369686127, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 52.37941020922005, "step_time": 0.37883945083618165} +{"epoch": 0, "iter": 6125, "iter_tflops": 39.74097705426266, "iter_time": 0.5191390609741211, "loss": 0.6218761801719666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.16233271505955, "step_time": 0.47798838043212893} +{"epoch": 0, "iter": 6126, "iter_tflops": 45.508132747672576, "iter_time": 0.45334959411621095, "loss": 0.48914459347724915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39055063732917, "step_time": 0.4177133731842041} +{"epoch": 0, "iter": 6127, "iter_tflops": 49.20074027779458, "iter_time": 0.41932485961914057, "loss": 0.41500037908554077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24897086745976, "step_time": 0.3874458637237549} +{"epoch": 0, "iter": 6128, "iter_tflops": 47.398921461951026, "iter_time": 0.43526504135131844, "loss": 0.37247705459594727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85694401389223, "step_time": 0.40566915512084956} +{"epoch": 0, "iter": 6129, "iter_tflops": 28.31837401670212, "iter_time": 0.7285408935546875, "loss": 0.25783634185791016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.83276712940592, "step_time": 0.6915581588745117} +{"epoch": 0, "iter": 6130, "iter_tflops": 12.48743416491345, "iter_time": 1.6521483306884768, "loss": 0.27964380383491516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.106977503096672, "step_time": 1.1394001846313477} +{"epoch": 0, "iter": 6131, "iter_tflops": 46.99401724387358, "iter_time": 0.4390153198242187, "loss": 0.2337430715560913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.895909317349236, "step_time": 0.4053585796356201} +{"epoch": 0, "iter": 6132, "iter_tflops": 54.84719408793294, "iter_time": 0.3761558609008789, "loss": 0.23064377903938293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.48602140868657, "step_time": 0.3468225479125977} +{"epoch": 0, "iter": 6133, "iter_tflops": 40.6121201240762, "iter_time": 0.508003360748291, "loss": 0.3833860158920288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35290560355572, "step_time": 0.46515765380859375} +{"epoch": 0, "iter": 6134, "iter_tflops": 46.74755169850642, "iter_time": 0.44132992553710937, "loss": 0.4382495582103729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.818233387149675, "step_time": 0.40597817230224614} +{"epoch": 0, "iter": 6135, "iter_tflops": 49.21324287342239, "iter_time": 0.41921833038330075, "loss": 0.5413342714309692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.18788549732506, "step_time": 0.38789083862304685} +{"epoch": 0, "iter": 6136, "iter_tflops": 48.385349814525284, "iter_time": 0.4263913269042968, "loss": 0.3860277533531189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.0795109350992, "step_time": 0.39614606857299806} +{"epoch": 0, "iter": 6137, "iter_tflops": 30.848958213004593, "iter_time": 0.6687776412963867, "loss": 0.3808076083660126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.85165324062626, "step_time": 0.6280077705383301} +{"epoch": 0, "iter": 6138, "iter_tflops": 34.54793616699977, "iter_time": 0.597172966003418, "loss": 0.5224338173866272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08185632097004, "step_time": 0.49026101303100583} +{"epoch": 0, "iter": 6139, "iter_tflops": 49.05417639758391, "iter_time": 0.42057771682739253, "loss": 0.37734851241111755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51542991316885, "step_time": 0.38551672935485837} +{"epoch": 0, "iter": 6140, "iter_tflops": 48.69062870077313, "iter_time": 0.42371795272827145, "loss": 0.34484371542930603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.62549185233538, "step_time": 0.3920361175537109} +{"epoch": 0, "iter": 6141, "iter_tflops": 43.70134643739128, "iter_time": 0.47209285736083983, "loss": 0.16749687492847443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62381039973675, "step_time": 0.43320963478088376} +{"epoch": 0, "iter": 6142, "iter_tflops": 45.25786782213837, "iter_time": 0.4558565063476563, "loss": 0.2126602977514267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.709945309430395, "step_time": 0.4150294952392578} +{"epoch": 0, "iter": 6143, "iter_tflops": 48.501960968823546, "iter_time": 0.4253661727905273, "loss": 0.3587018549442291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60641925838328, "step_time": 0.39217825126647954} +{"epoch": 0, "iter": 6144, "iter_tflops": 50.93314803648595, "iter_time": 0.4050622100830078, "loss": 0.20871339738368988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.59509974322997, "step_time": 0.37109553909301757} +{"epoch": 0, "iter": 6145, "iter_tflops": 26.313037041741982, "iter_time": 0.7840635604858398, "loss": 0.28989678621292114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.68883613512001, "step_time": 0.7451051177978517} +{"epoch": 0, "iter": 6146, "iter_tflops": 13.458364397046683, "iter_time": 1.532956970214844, "loss": 0.3655644655227661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.052608114291445, "step_time": 1.2098497409820559} +{"epoch": 0, "iter": 6147, "iter_tflops": 41.82740620577249, "iter_time": 0.49324343490600586, "loss": 0.24885773658752441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71313121617624, "step_time": 0.4513165683746338} +{"epoch": 0, "iter": 6148, "iter_tflops": 44.34576086803169, "iter_time": 0.4652325973510742, "loss": 0.24435441195964813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.03518215290744, "step_time": 0.429499641418457} +{"epoch": 0, "iter": 6149, "iter_tflops": 31.1475916838921, "iter_time": 0.6623656082153321, "loss": 0.47336187958717346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.36017866830922, "step_time": 0.6004361534118652} +{"epoch": 0, "iter": 6150, "iter_tflops": 37.618285817807774, "iter_time": 0.5484325790405273, "loss": 0.5958629846572876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92466763682143, "step_time": 0.5041236667633057} +{"epoch": 0, "iter": 6151, "iter_tflops": 39.29259417531349, "iter_time": 0.5250631561279298, "loss": 0.42665791511535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63402219847377, "step_time": 0.48391149711608883} +{"epoch": 0, "iter": 6152, "iter_tflops": 40.394430385488675, "iter_time": 0.5107410430908202, "loss": 0.5107817053794861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.839812250835315, "step_time": 0.4706017761230469} +{"epoch": 0, "iter": 6153, "iter_tflops": 32.227872398433966, "iter_time": 0.6401630630493165, "loss": 0.45434579253196716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.236994574656485, "step_time": 0.5854952659606933} +{"epoch": 0, "iter": 6154, "iter_tflops": 38.778972420713956, "iter_time": 0.5320175399780274, "loss": 0.4385531544685364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71751792740598, "step_time": 0.4829656429290772} +{"epoch": 0, "iter": 6155, "iter_tflops": 38.72883674999679, "iter_time": 0.5327062530517579, "loss": 0.4467245638370514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08544713658189, "step_time": 0.49021918296813966} +{"epoch": 0, "iter": 6156, "iter_tflops": 40.93192572647079, "iter_time": 0.5040342750549316, "loss": 0.4862428307533264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.51900115845251, "step_time": 0.4634222011566162} +{"epoch": 0, "iter": 6157, "iter_tflops": 41.7530142944049, "iter_time": 0.4941222534179687, "loss": 0.008418415673077106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.61843849664369, "step_time": 0.4425522212982177} +{"epoch": 0, "iter": 6158, "iter_tflops": 43.98351034862963, "iter_time": 0.4690642776489258, "loss": 0.006536480039358139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66724128486315, "step_time": 0.423921573638916} +{"epoch": 0, "iter": 6159, "iter_tflops": 42.71595631143259, "iter_time": 0.48298329925537115, "loss": 0.004396731965243816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1134666405025, "step_time": 0.4379022598266601} +{"epoch": 0, "iter": 6160, "iter_tflops": 49.94496796735651, "iter_time": 0.4130765190124512, "loss": 0.025649873539805412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.17162130921043, "step_time": 0.373943941116333} +{"epoch": 0, "iter": 6161, "iter_tflops": 22.339142040312854, "iter_time": 0.8919869537353516, "loss": 0.08576836436986923, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 23.84093358165274, "step_time": 0.8357987823486328} +{"epoch": 0, "iter": 6162, "iter_tflops": 8.851227136964747, "iter_time": 2.2512384948730473, "loss": 0.0738888531923294, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 10.354152011889427, "step_time": 1.9244669418334963} +{"epoch": 0, "iter": 6163, "iter_tflops": 13.927254521848653, "iter_time": 1.4307359161376951, "loss": 0.07006809860467911, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 16.257037735377075, "step_time": 1.22569828414917} +{"epoch": 0, "iter": 6164, "iter_tflops": 32.724816581357835, "iter_time": 0.6089025192260742, "loss": 0.06521967053413391, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 42.44560614654403, "step_time": 0.4694531440734863} +{"epoch": 0, "iter": 6165, "iter_tflops": 18.08264955371391, "iter_time": 0.810874168395996, "loss": 0.3660300672054291, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.134437166350626, "step_time": 0.766301788330078} +{"epoch": 0, "iter": 6166, "iter_tflops": 10.308622324463018, "iter_time": 1.4223775939941408, "loss": 0.21349355578422546, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.954026130418193, "step_time": 1.0507901649475098} +{"epoch": 0, "iter": 6167, "iter_tflops": 26.170051020035434, "iter_time": 0.5602875366210938, "loss": 0.14779764413833618, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.954044989979867, "step_time": 0.5245306510925293} +{"epoch": 0, "iter": 6168, "iter_tflops": 25.222889017437847, "iter_time": 0.581327278137207, "loss": 0.18722011148929596, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 26.702021418395752, "step_time": 0.5491252212524413} +{"epoch": 0, "iter": 6169, "iter_tflops": 25.19803490121478, "iter_time": 0.8187580337524415, "loss": 0.615666925907135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.454009991648842, "step_time": 0.7798852996826172} +{"epoch": 0, "iter": 6170, "iter_tflops": 16.402759222338386, "iter_time": 1.2577818908691407, "loss": 0.8099230527877808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.427467865530495, "step_time": 1.0099682273864747} +{"epoch": 0, "iter": 6171, "iter_tflops": 38.499653588395596, "iter_time": 0.5358773803710937, "loss": 0.807473361492157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.403934768943685, "step_time": 0.48653724288940425} +{"epoch": 0, "iter": 6172, "iter_tflops": 38.13878057843537, "iter_time": 0.5409479064941407, "loss": 0.8550149202346802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.688010011497184, "step_time": 0.49489274024963376} +{"epoch": 0, "iter": 6173, "iter_tflops": 17.200393024754312, "iter_time": 1.1994547729492187, "loss": 0.6465088725090027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.48610885692286, "step_time": 1.1160322418212891} +{"epoch": 0, "iter": 6174, "iter_tflops": 14.60577922472512, "iter_time": 1.41252946472168, "loss": 0.7203987836837769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.745895505809045, "step_time": 1.2320089721679688} +{"epoch": 0, "iter": 6175, "iter_tflops": 38.74601703231116, "iter_time": 0.5324700469970703, "loss": 0.8084518909454346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75962705897159, "step_time": 0.48249002456665036} +{"epoch": 0, "iter": 6176, "iter_tflops": 41.97870293515588, "iter_time": 0.49146572113037107, "loss": 0.6468068361282349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61589250398244, "step_time": 0.45227863311767574} +{"epoch": 0, "iter": 6177, "iter_tflops": 22.371990301563393, "iter_time": 0.9221840896606446, "loss": 0.5114611983299255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.05204728868746, "step_time": 0.8577687072753907} +{"epoch": 0, "iter": 6178, "iter_tflops": 26.638108512927538, "iter_time": 0.7744954376220703, "loss": 0.3914378583431244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.366009984032114, "step_time": 0.6794140396118165} +{"epoch": 0, "iter": 6179, "iter_tflops": 36.16733016441402, "iter_time": 0.5704345169067382, "loss": 0.2960030734539032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47126610368597, "step_time": 0.5226863880157471} +{"epoch": 0, "iter": 6180, "iter_tflops": 41.92923199103827, "iter_time": 0.4920455856323242, "loss": 0.4051879644393921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91687344141349, "step_time": 0.44931398773193354} +{"epoch": 0, "iter": 6181, "iter_tflops": 21.199029769833167, "iter_time": 0.9732093276977539, "loss": 0.30883094668388367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.57727712176376, "step_time": 0.9137990112304688} +{"epoch": 0, "iter": 6182, "iter_tflops": 22.052518943556446, "iter_time": 0.9355436248779296, "loss": 0.020995087921619415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.24314673701496, "step_time": 0.7572948055267334} +{"epoch": 0, "iter": 6183, "iter_tflops": 56.25073626752037, "iter_time": 0.3667701950073242, "loss": 0.0378226712346077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.814577677404976, "step_time": 0.3337577362060547} +{"epoch": 0, "iter": 6184, "iter_tflops": 58.16635974050325, "iter_time": 0.35469115829467773, "loss": 0.04742996767163277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.58371066259991, "step_time": 0.32447136688232425} +{"epoch": 0, "iter": 6185, "iter_tflops": 39.3423206719832, "iter_time": 0.5243995056152344, "loss": 0.23801547288894653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.725884984774034, "step_time": 0.4828710632324219} +{"epoch": 0, "iter": 6186, "iter_tflops": 46.58759699144816, "iter_time": 0.4428451957702637, "loss": 0.13080117106437683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.79507783934131, "step_time": 0.4061632423400878} +{"epoch": 0, "iter": 6187, "iter_tflops": 47.17748413912168, "iter_time": 0.437308048248291, "loss": 0.23536817729473114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32270436316213, "step_time": 0.4019876537322998} +{"epoch": 0, "iter": 6188, "iter_tflops": 49.74123500480769, "iter_time": 0.4147684211730957, "loss": 0.1976689100265503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84446590818813, "step_time": 0.38316089057922365} +{"epoch": 0, "iter": 6189, "iter_tflops": 40.93475210189048, "iter_time": 0.5039994735717773, "loss": 0.9226711988449097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.44397949933994, "step_time": 0.464204460144043} +{"epoch": 0, "iter": 6190, "iter_tflops": 43.513184977445924, "iter_time": 0.4741343002319336, "loss": 0.7630362510681152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.886594664075, "step_time": 0.4400211544036865} +{"epoch": 0, "iter": 6191, "iter_tflops": 44.93691310001147, "iter_time": 0.45911238861083986, "loss": 0.9119452834129333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.177370125362415, "step_time": 0.4282320404052734} +{"epoch": 0, "iter": 6192, "iter_tflops": 44.882911249510336, "iter_time": 0.4596647796630859, "loss": 0.9160068035125732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.05052971110453, "step_time": 0.4293624572753907} +{"epoch": 0, "iter": 6193, "iter_tflops": 33.04321111115409, "iter_time": 0.6243670883178711, "loss": 0.1066393256187439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.17502784828628, "step_time": 0.5865267143249512} +{"epoch": 0, "iter": 6194, "iter_tflops": 11.85880764126193, "iter_time": 1.7397274780273437, "loss": 0.05198846012353897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.335497736152913, "step_time": 1.262960813522339} +{"epoch": 0, "iter": 6195, "iter_tflops": 40.23734007747757, "iter_time": 0.5127350234985351, "loss": 0.07467827200889587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25366208860724, "step_time": 0.4662008190155029} +{"epoch": 0, "iter": 6196, "iter_tflops": 41.421860531092904, "iter_time": 0.4980725936889649, "loss": 0.05415579304099083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25362269321264, "step_time": 0.4558992691040039} +{"epoch": 0, "iter": 6197, "iter_tflops": 33.69582029201466, "iter_time": 0.6122745590209961, "loss": 0.4094976782798767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.411550029790455, "step_time": 0.5514632110595703} +{"epoch": 0, "iter": 6198, "iter_tflops": 37.585977758184924, "iter_time": 0.5489039993286132, "loss": 0.38753542304039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.97466543067941, "step_time": 0.49151299476623533} +{"epoch": 0, "iter": 6199, "iter_tflops": 42.295517180108064, "iter_time": 0.48778440093994135, "loss": 0.39291828870773315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14920833514325, "step_time": 0.4470519485473633} +{"epoch": 0, "iter": 6200, "iter_tflops": 39.33698848949425, "iter_time": 0.524470588684082, "loss": 0.32448458671569824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.9679178316921, "step_time": 0.4801511116027832} +{"epoch": 0, "iter": 6201, "iter_tflops": 18.738284070001924, "iter_time": 1.101012954711914, "loss": 0.25658923387527466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.011300160305943, "step_time": 1.0309721679687498} +{"epoch": 0, "iter": 6202, "iter_tflops": 15.148182367147736, "iter_time": 1.3619517517089843, "loss": 0.1981213241815567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.330753015021205, "step_time": 0.967199493408203} +{"epoch": 0, "iter": 6203, "iter_tflops": 42.44285494512245, "iter_time": 0.4860910873413086, "loss": 0.26577329635620117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58624963684209, "step_time": 0.44285800361633304} +{"epoch": 0, "iter": 6204, "iter_tflops": 49.086761869578815, "iter_time": 0.42029852294921877, "loss": 0.25472742319107056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25653803682801, "step_time": 0.38739081192016606} +{"epoch": 0, "iter": 6205, "iter_tflops": 27.879612921730903, "iter_time": 0.7400064544677735, "loss": 0.5974369049072266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.542151905216546, "step_time": 0.6983612289428711} +{"epoch": 0, "iter": 6206, "iter_tflops": 10.971947765855836, "iter_time": 1.8803492279052734, "loss": 0.7195322513580322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.756031695484051, "step_time": 1.499785255432129} +{"epoch": 0, "iter": 6207, "iter_tflops": 16.830762711098973, "iter_time": 1.2257967071533202, "loss": 0.6921083927154541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.017264125586447, "step_time": 1.0306649990081789} +{"epoch": 0, "iter": 6208, "iter_tflops": 40.0866658517612, "iter_time": 0.5146622467041017, "loss": 0.6315199136734009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71280143315577, "step_time": 0.471969144821167} +{"epoch": 0, "iter": 6209, "iter_tflops": 18.676391363701104, "iter_time": 0.9232889709472656, "loss": 0.2707093358039856, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 19.913851242481364, "step_time": 0.8659151840209961} +{"epoch": 0, "iter": 6210, "iter_tflops": 7.046626570322714, "iter_time": 2.447086700439453, "loss": 0.33250895142555237, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 8.496079192216671, "step_time": 2.0296075134277345} +{"epoch": 0, "iter": 6211, "iter_tflops": 11.94033842389266, "iter_time": 1.444155563354492, "loss": 0.26913952827453613, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 13.900696124763138, "step_time": 1.2404922752380372} +{"epoch": 0, "iter": 6212, "iter_tflops": 25.002833399539288, "iter_time": 0.6896700820922852, "loss": 0.2572465240955353, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 26.90968235929834, "step_time": 0.6407993202209472} +{"epoch": 0, "iter": 6213, "iter_tflops": 19.987534787703783, "iter_time": 0.7397226028442383, "loss": 0.34103405475616455, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 21.81578547988985, "step_time": 0.6777308692932129} +{"epoch": 0, "iter": 6214, "iter_tflops": 21.55852667650171, "iter_time": 0.6858182601928711, "loss": 0.3341493010520935, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.28298002029498, "step_time": 0.6350231475830078} +{"epoch": 0, "iter": 6215, "iter_tflops": 23.62570418267455, "iter_time": 0.6258112411499024, "loss": 0.25781527161598206, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 25.40924551752516, "step_time": 0.5818839149475097} +{"epoch": 0, "iter": 6216, "iter_tflops": 23.424137361264197, "iter_time": 0.6311964035034179, "loss": 0.33266744017601013, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 25.151214255769485, "step_time": 0.5878535766601563} +{"epoch": 0, "iter": 6217, "iter_tflops": 19.274016688762266, "iter_time": 1.070409652709961, "loss": 0.15187445282936096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.853209014590817, "step_time": 0.989348617553711} +{"epoch": 0, "iter": 6218, "iter_tflops": 27.702047466451145, "iter_time": 0.7447497711181641, "loss": 0.2354281097650528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86939383153741, "step_time": 0.5916676845550537} +{"epoch": 0, "iter": 6219, "iter_tflops": 42.94806093517339, "iter_time": 0.4803731079101562, "loss": 0.12039897590875626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27456866070313, "step_time": 0.43640997886657723} +{"epoch": 0, "iter": 6220, "iter_tflops": 39.182720306502105, "iter_time": 0.5265355072021485, "loss": 0.1248989999294281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.740117368421686, "step_time": 0.4827102680206299} +{"epoch": 0, "iter": 6221, "iter_tflops": 26.45343341563216, "iter_time": 0.7799022979736329, "loss": 0.19911792874336243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.888279698473152, "step_time": 0.7141682968139649} +{"epoch": 0, "iter": 6222, "iter_tflops": 40.9146091868741, "iter_time": 0.50424760055542, "loss": 0.2690718472003937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.271427823435886, "step_time": 0.4660137367248536} +{"epoch": 0, "iter": 6223, "iter_tflops": 46.24851289248036, "iter_time": 0.44609204101562494, "loss": 0.16479286551475525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.00848908201759, "step_time": 0.41255182647705074} +{"epoch": 0, "iter": 6224, "iter_tflops": 48.391203034236625, "iter_time": 0.42633975219726555, "loss": 0.17882901430130005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93061274266053, "step_time": 0.3972819194793701} +{"epoch": 0, "iter": 6225, "iter_tflops": 23.409623988373124, "iter_time": 0.8813081970214844, "loss": 1.0317788124084473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.48170291179455, "step_time": 0.8427148056030273} +{"epoch": 0, "iter": 6226, "iter_tflops": 14.152628928203999, "iter_time": 1.4577569732666016, "loss": 0.7802037596702576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.423347367547887, "step_time": 1.1198341484069825} +{"epoch": 0, "iter": 6227, "iter_tflops": 36.4803651783456, "iter_time": 0.565539665222168, "loss": 0.7487430572509766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41193883812006, "step_time": 0.5234731941223145} +{"epoch": 0, "iter": 6228, "iter_tflops": 35.433596646396346, "iter_time": 0.5822466659545898, "loss": 0.7444369196891785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20613329934726, "step_time": 0.5399942817687988} +{"epoch": 0, "iter": 6229, "iter_tflops": 17.16748721399826, "iter_time": 1.2017538299560546, "loss": 0.0011783100198954344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.24320825825888, "step_time": 1.1308917388916016} +{"epoch": 0, "iter": 6230, "iter_tflops": 14.130146693048895, "iter_time": 1.4600763854980467, "loss": 0.01495860330760479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.46595478222857, "step_time": 1.1172502994537354} +{"epoch": 0, "iter": 6231, "iter_tflops": 47.65139230883105, "iter_time": 0.43295888137817384, "loss": 0.00812496617436409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.653773361191895, "step_time": 0.39182554626464844} +{"epoch": 0, "iter": 6232, "iter_tflops": 43.914834038043516, "iter_time": 0.46979782485961913, "loss": 0.007876384072005749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78622060230516, "step_time": 0.4228877182006836} +{"epoch": 0, "iter": 6233, "iter_tflops": 22.6547544437981, "iter_time": 0.9106738967895508, "loss": 0.7557385563850403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.07393834007379, "step_time": 0.8569887161254883} +{"epoch": 0, "iter": 6234, "iter_tflops": 42.359531642316455, "iter_time": 0.48704725265502924, "loss": 1.1575433015823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.813076270447326, "step_time": 0.4503319835662842} +{"epoch": 0, "iter": 6235, "iter_tflops": 44.540309559906795, "iter_time": 0.463200496673584, "loss": 0.9689492583274841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.06980785158136, "step_time": 0.429190263748169} +{"epoch": 0, "iter": 6236, "iter_tflops": 43.487633625347485, "iter_time": 0.4744128799438477, "loss": 0.7323917150497437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.84012402848067, "step_time": 0.44045770454406735} +{"epoch": 0, "iter": 6237, "iter_tflops": 40.46873254498806, "iter_time": 0.5098033027648926, "loss": 0.8663712739944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16502702701998, "step_time": 0.4671364402770996} +{"epoch": 0, "iter": 6238, "iter_tflops": 34.99383423293003, "iter_time": 0.5895636749267579, "loss": 0.9657332301139832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.55810762668186, "step_time": 0.5350649909973144} +{"epoch": 0, "iter": 6239, "iter_tflops": 34.032319021602014, "iter_time": 0.6062206192016601, "loss": 0.7649608850479126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.05592608173413, "step_time": 0.5567555770874023} +{"epoch": 0, "iter": 6240, "iter_tflops": 36.90767999056174, "iter_time": 0.5589918823242187, "loss": 0.801197350025177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.88087039171114, "step_time": 0.5173180351257325} +{"epoch": 0, "iter": 6241, "iter_tflops": 17.90933979491589, "iter_time": 1.1519739837646483, "loss": 0.2054450809955597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.40939575501658, "step_time": 1.0629436264038086} +{"epoch": 0, "iter": 6242, "iter_tflops": 16.441982618935413, "iter_time": 1.2547813720703125, "loss": 0.2483508437871933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.416587526855206, "step_time": 0.9633231010437012} +{"epoch": 0, "iter": 6243, "iter_tflops": 53.280898244344634, "iter_time": 0.38721369552612306, "loss": 0.22118811309337616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.03444706869861, "step_time": 0.3554973735809326} +{"epoch": 0, "iter": 6244, "iter_tflops": 51.46207152107067, "iter_time": 0.40089900970458986, "loss": 0.22753027081489563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.65497632539085, "step_time": 0.3706962947845459} +{"epoch": 0, "iter": 6245, "iter_tflops": 27.356226162193995, "iter_time": 0.7541644592285157, "loss": 0.10732528567314148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.01935363077501, "step_time": 0.7109425582885741} +{"epoch": 0, "iter": 6246, "iter_tflops": 28.424945110558774, "iter_time": 0.725809440612793, "loss": 0.17437872290611267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1259235853916, "step_time": 0.5141587200164794} +{"epoch": 0, "iter": 6247, "iter_tflops": 39.47565463716098, "iter_time": 0.5226282806396485, "loss": 0.24880409240722656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33012858048511, "step_time": 0.4761373710632324} +{"epoch": 0, "iter": 6248, "iter_tflops": 41.277678431723984, "iter_time": 0.4998123512268066, "loss": 0.14939801394939423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99404057060569, "step_time": 0.45852946853637694} +{"epoch": 0, "iter": 6249, "iter_tflops": 21.648333977954675, "iter_time": 0.9530106811523437, "loss": 0.01657811552286148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.58017988861404, "step_time": 0.87493367767334} +{"epoch": 0, "iter": 6250, "iter_tflops": 18.492313896222626, "iter_time": 1.1156577606201172, "loss": 0.00789625383913517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.648076550442024, "step_time": 0.9109424133300781} +{"epoch": 0, "iter": 6251, "iter_tflops": 59.014194493731246, "iter_time": 0.349595443725586, "loss": 0.004533919505774975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.33940748528073, "step_time": 0.31575268745422364} +{"epoch": 0, "iter": 6252, "iter_tflops": 52.08756979520453, "iter_time": 0.3960847778320313, "loss": 0.003984423354268074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.894401536218496, "step_time": 0.3626208019256592} +{"epoch": 0, "iter": 6253, "iter_tflops": 48.55027996843044, "iter_time": 0.42494283294677737, "loss": 0.05178398638963699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23400226810188, "step_time": 0.3875548076629639} +{"epoch": 0, "iter": 6254, "iter_tflops": 36.298758305794685, "iter_time": 0.5683691253662109, "loss": 0.10482848435640335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.339416795657314, "step_time": 0.43581216049194343} +{"epoch": 0, "iter": 6255, "iter_tflops": 49.90036914070619, "iter_time": 0.4134457092285157, "loss": 0.020000919699668884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.49871008602482, "step_time": 0.3785611343383789} +{"epoch": 0, "iter": 6256, "iter_tflops": 55.197982023080336, "iter_time": 0.37376535797119137, "loss": 0.07352642714977264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.2486896915505, "step_time": 0.3424322357177734} +{"epoch": 0, "iter": 6257, "iter_tflops": 30.85776375157443, "iter_time": 0.668586799621582, "loss": 0.3791724741458893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.36195505821847, "step_time": 0.6184018135070801} +{"epoch": 0, "iter": 6258, "iter_tflops": 41.013354251367424, "iter_time": 0.5030335578918457, "loss": 0.7022026181221008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.459954966363206, "step_time": 0.4009154987335205} +{"epoch": 0, "iter": 6259, "iter_tflops": 53.71940476396203, "iter_time": 0.38405290603637693, "loss": 0.6219211220741272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.46765882943882, "step_time": 0.35286334228515626} +{"epoch": 0, "iter": 6260, "iter_tflops": 52.18502640976857, "iter_time": 0.3953450813293457, "loss": 0.43652620911598206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.44923115866255, "step_time": 0.36548050498962403} +{"epoch": 0, "iter": 6261, "iter_tflops": 30.585033012949733, "iter_time": 0.6745486755371094, "loss": 0.5955145359039307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.427920013789155, "step_time": 0.6362139015197754} +{"epoch": 0, "iter": 6262, "iter_tflops": 13.048959634599592, "iter_time": 1.581052749633789, "loss": 0.5999959111213684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.623304705702825, "step_time": 1.4108366012573241} +{"epoch": 0, "iter": 6263, "iter_tflops": 34.74762234087028, "iter_time": 0.5937411575317383, "loss": 0.5309973955154419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.764332296785064, "step_time": 0.5463116188049316} +{"epoch": 0, "iter": 6264, "iter_tflops": 37.96006028222653, "iter_time": 0.5434947509765625, "loss": 0.5087260007858276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57845347334028, "step_time": 0.49619675064086916} +{"epoch": 0, "iter": 6265, "iter_tflops": 20.065209354308703, "iter_time": 1.0282022552490233, "loss": 0.03995293378829956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.869076942006416, "step_time": 0.943391143798828} +{"epoch": 0, "iter": 6266, "iter_tflops": 48.143625809703835, "iter_time": 0.42853219223022465, "loss": 0.03005407191812992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.510040039676156, "step_time": 0.3855555610656738} +{"epoch": 0, "iter": 6267, "iter_tflops": 56.03681146669675, "iter_time": 0.3681703681945801, "loss": 0.019653301686048508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.37773601899029, "step_time": 0.3361331787109375} +{"epoch": 0, "iter": 6268, "iter_tflops": 60.20788331794891, "iter_time": 0.3426643218994141, "loss": 0.04028226435184479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.63847597207729, "step_time": 0.3143140239715576} +{"epoch": 0, "iter": 6269, "iter_tflops": 35.76137008371142, "iter_time": 0.576910041809082, "loss": 0.06768912076950073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49533729544309, "step_time": 0.5359374656677245} +{"epoch": 0, "iter": 6270, "iter_tflops": 9.16806934380911, "iter_time": 2.2503204040527347, "loss": 0.10140480101108551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.326433358761014, "step_time": 1.6737277450561525} +{"epoch": 0, "iter": 6271, "iter_tflops": 14.402572662821969, "iter_time": 1.4324589080810548, "loss": 0.1214035376906395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.53730664163987, "step_time": 1.176411745071411} +{"epoch": 0, "iter": 6272, "iter_tflops": 39.68291467641789, "iter_time": 0.5198986434936523, "loss": 0.09815904498100281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64929884557642, "step_time": 0.47265578269958486} +{"epoch": 0, "iter": 6273, "iter_tflops": 13.425603747882082, "iter_time": 1.1530263214111327, "loss": 0.30311962962150574, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.148807630746125, "step_time": 1.094090393066406} +{"epoch": 0, "iter": 6274, "iter_tflops": 6.455294388746257, "iter_time": 2.3980431518554686, "loss": 0.30238544940948486, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 8.40767145280535, "step_time": 1.8411845169067385} +{"epoch": 0, "iter": 6275, "iter_tflops": 9.495522387048018, "iter_time": 1.6302499084472657, "loss": 0.13042916357517242, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 10.885404991662249, "step_time": 1.4220944938659668} +{"epoch": 0, "iter": 6276, "iter_tflops": 23.063196606171097, "iter_time": 0.6712024688720704, "loss": 0.2354525327682495, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.769960169036946, "step_time": 0.6249535484313965} +{"epoch": 0, "iter": 6277, "iter_tflops": 13.501797348009124, "iter_time": 1.1101822204589844, "loss": 0.3837217092514038, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.457727940530905, "step_time": 1.036778076171875} +{"epoch": 0, "iter": 6278, "iter_tflops": 10.831213192584631, "iter_time": 1.3839128723144531, "loss": 0.16626489162445068, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.602478025249162, "step_time": 1.1894053955078125} +{"epoch": 0, "iter": 6279, "iter_tflops": 22.825181644597276, "iter_time": 0.6567069473266602, "loss": 0.20011819899082184, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.56269022926239, "step_time": 0.6102529983520508} +{"epoch": 0, "iter": 6280, "iter_tflops": 26.414665969417324, "iter_time": 0.5674671554565429, "loss": 0.21498972177505493, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.16805721460741, "step_time": 0.532143741607666} +{"epoch": 0, "iter": 6281, "iter_tflops": 30.714299636047766, "iter_time": 0.6717097167968751, "loss": 0.13827946782112122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.56650564006488, "step_time": 0.6335065155029297} +{"epoch": 0, "iter": 6282, "iter_tflops": 14.693670108871292, "iter_time": 1.404080352783203, "loss": 0.1247522160410881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.90316444851083, "step_time": 1.2972948608398438} +{"epoch": 0, "iter": 6283, "iter_tflops": 36.831987801503786, "iter_time": 0.5601406478881836, "loss": 0.2112676501274109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27023716637771, "step_time": 0.4458825969696045} +{"epoch": 0, "iter": 6284, "iter_tflops": 52.41385994583635, "iter_time": 0.39361904525756836, "loss": 0.24638652801513672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.036733161427364, "step_time": 0.3617159042358398} +{"epoch": 0, "iter": 6285, "iter_tflops": 38.25648558011206, "iter_time": 0.5392835540771483, "loss": 0.7705004811286926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67002244596622, "step_time": 0.4951063690185547} +{"epoch": 0, "iter": 6286, "iter_tflops": 35.091599903472556, "iter_time": 0.5879211425781251, "loss": 0.8036311268806458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.119115153627774, "step_time": 0.5412269783020018} +{"epoch": 0, "iter": 6287, "iter_tflops": 37.88043616708729, "iter_time": 0.5446371688842774, "loss": 0.5802302956581116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.82386802142619, "step_time": 0.5053684158325196} +{"epoch": 0, "iter": 6288, "iter_tflops": 36.3620373960209, "iter_time": 0.5673800201416016, "loss": 1.0112367868423462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.45924854966635, "step_time": 0.5228455753326416} +{"epoch": 0, "iter": 6289, "iter_tflops": 22.459522085941877, "iter_time": 0.9185900497436523, "loss": 0.775629997253418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.951361187948933, "step_time": 0.8613745727539064} +{"epoch": 0, "iter": 6290, "iter_tflops": 7.879376530106384, "iter_time": 2.618366241455078, "loss": 0.7133352160453796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.474213242935381, "step_time": 2.1776049346923827} +{"epoch": 0, "iter": 6291, "iter_tflops": 15.905123509926751, "iter_time": 1.2971350708007814, "loss": 0.8424360752105713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.76533187687755, "step_time": 1.043802028656006} +{"epoch": 0, "iter": 6292, "iter_tflops": 39.81376040000345, "iter_time": 0.5181900253295898, "loss": 0.827562689781189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.498317088096535, "step_time": 0.42539813232421875} +{"epoch": 0, "iter": 6293, "iter_tflops": 18.177451164728158, "iter_time": 0.7707491760253906, "loss": 0.2654401957988739, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 19.24118347708983, "step_time": 0.7281389694213868} +{"epoch": 0, "iter": 6294, "iter_tflops": 5.89278478035345, "iter_time": 2.3775270996093747, "loss": 0.25544726848602295, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 7.2668522194840754, "step_time": 1.927967582702637} +{"epoch": 0, "iter": 6295, "iter_tflops": 8.096227882637553, "iter_time": 1.730467041015625, "loss": 0.36740389466285706, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 8.998525514087094, "step_time": 1.5569501342773437} +{"epoch": 0, "iter": 6296, "iter_tflops": 24.435164099201437, "iter_time": 0.5733644943237305, "loss": 0.22318440675735474, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 25.941657071883053, "step_time": 0.54006787109375} +{"epoch": 0, "iter": 6297, "iter_tflops": 24.984422806972965, "iter_time": 0.6146772384643555, "loss": 0.5093508958816528, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.75331838572544, "step_time": 0.5740355567932129} +{"epoch": 0, "iter": 6298, "iter_tflops": 27.556367420394142, "iter_time": 0.5573069839477538, "loss": 0.29446011781692505, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.38408115865009, "step_time": 0.5226420364379882} +{"epoch": 0, "iter": 6299, "iter_tflops": 28.946557735033117, "iter_time": 0.5305417022705078, "loss": 0.2580615282058716, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.800030597434684, "step_time": 0.4986149597167968} +{"epoch": 0, "iter": 6300, "iter_tflops": 26.834828476367935, "iter_time": 0.5722919387817382, "loss": 0.25942954421043396, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.602679176321256, "step_time": 0.5369201927185059} +{"epoch": 0, "iter": 6301, "iter_tflops": 31.636228316230323, "iter_time": 0.6521350555419922, "loss": 0.1983712613582611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.655836712030606, "step_time": 0.6130019493103027} +{"epoch": 0, "iter": 6302, "iter_tflops": 20.775135101991516, "iter_time": 0.993066635131836, "loss": 0.16802871227264404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.951821676577577, "step_time": 0.7380947742462158} +{"epoch": 0, "iter": 6303, "iter_tflops": 44.20404167251012, "iter_time": 0.4667241439819336, "loss": 0.23803593218326569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17302930082328, "step_time": 0.4282706279754639} +{"epoch": 0, "iter": 6304, "iter_tflops": 48.26973057274039, "iter_time": 0.4274126510620117, "loss": 0.18399588763713837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16716634799049, "step_time": 0.39548043251037596} +{"epoch": 0, "iter": 6305, "iter_tflops": 42.147879338506904, "iter_time": 0.4894930381774902, "loss": 0.7259188294410706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.625506552162854, "step_time": 0.45218333053588866} +{"epoch": 0, "iter": 6306, "iter_tflops": 33.29952130266776, "iter_time": 0.619561264038086, "loss": 0.605712354183197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.15763913805953, "step_time": 0.5552315483093262} +{"epoch": 0, "iter": 6307, "iter_tflops": 40.32732184630247, "iter_time": 0.5115909652709961, "loss": 0.7518940567970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16875148925579, "step_time": 0.4670970497131348} +{"epoch": 0, "iter": 6308, "iter_tflops": 38.32684357296049, "iter_time": 0.5382935714721679, "loss": 0.6714392304420471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76923959150328, "step_time": 0.493930311203003} +{"epoch": 0, "iter": 6309, "iter_tflops": 1.6359199080289073, "iter_time": 0.9580301971435548, "loss": 0.3328090012073517, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.775813900120163, "step_time": 0.8825590744018554} +{"epoch": 0, "iter": 6310, "iter_tflops": 1.2319963224369808, "iter_time": 1.2721309661865234, "loss": 0.3481963872909546, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.7669960305133434, "step_time": 0.8869633235931397} +{"epoch": 0, "iter": 6311, "iter_tflops": 2.920846301552483, "iter_time": 0.5365775909423828, "loss": 0.3893664479255676, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.1976727459309853, "step_time": 0.49012541198730464} +{"epoch": 0, "iter": 6312, "iter_tflops": 2.965731335549104, "iter_time": 0.5284567260742188, "loss": 0.4053466320037842, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.2441412126829032, "step_time": 0.4831049480438233} +{"epoch": 0, "iter": 6313, "iter_tflops": 19.308435067212802, "iter_time": 1.0685015869140624, "loss": 0.06584180891513824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.783204680330496, "step_time": 0.992681053161621} +{"epoch": 0, "iter": 6314, "iter_tflops": 16.248579060192363, "iter_time": 1.2697167816162112, "loss": 0.12511757016181946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.931053373179896, "step_time": 1.0898016662597656} +{"epoch": 0, "iter": 6315, "iter_tflops": 39.12911357112351, "iter_time": 0.5272568588256836, "loss": 0.08000655472278595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14854053060233, "step_time": 0.4781411666870117} +{"epoch": 0, "iter": 6316, "iter_tflops": 47.04934308794097, "iter_time": 0.43849907684326167, "loss": 0.12754510343074799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.853588719545506, "step_time": 0.3978720474243164} +{"epoch": 0, "iter": 6317, "iter_tflops": 18.02120067548343, "iter_time": 1.1448234710693361, "loss": 0.7475284934043884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.49747478580873, "step_time": 1.0581418228149413} +{"epoch": 0, "iter": 6318, "iter_tflops": 15.97867284678212, "iter_time": 1.2911643981933594, "loss": 0.7043160796165466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.362288922200396, "step_time": 1.1235578308105467} +{"epoch": 0, "iter": 6319, "iter_tflops": 36.8528228997416, "iter_time": 0.5598239669799805, "loss": 0.6266177892684937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91373402584786, "step_time": 0.5042583866119384} +{"epoch": 0, "iter": 6320, "iter_tflops": 36.88427372861573, "iter_time": 0.5593466110229492, "loss": 0.7204523682594299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.47309354739364, "step_time": 0.5097483711242675} +{"epoch": 0, "iter": 6321, "iter_tflops": 32.87017335861525, "iter_time": 0.6276539306640625, "loss": 0.8904334306716919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.277685800331625, "step_time": 0.5686992721557618} +{"epoch": 0, "iter": 6322, "iter_tflops": 41.589516300577934, "iter_time": 0.4960647621154785, "loss": 0.8967524766921997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.771647179752016, "step_time": 0.4608071136474609} +{"epoch": 0, "iter": 6323, "iter_tflops": 44.13234393404494, "iter_time": 0.4674823875427246, "loss": 0.838590145111084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.60477030361238, "step_time": 0.4333829021453857} +{"epoch": 0, "iter": 6324, "iter_tflops": 41.18259086758511, "iter_time": 0.500966381072998, "loss": 0.6398174166679382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.112502834646726, "step_time": 0.4676926536560059} +{"epoch": 0, "iter": 6325, "iter_tflops": 26.360405794258128, "iter_time": 0.7826546249389648, "loss": 0.029026338830590248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.87872391023495, "step_time": 0.7400300521850586} +{"epoch": 0, "iter": 6326, "iter_tflops": 17.696990027032893, "iter_time": 1.1657967529296875, "loss": 0.020706642419099808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.778108170710738, "step_time": 0.9473317584991454} +{"epoch": 0, "iter": 6327, "iter_tflops": 44.48825556044806, "iter_time": 0.46374246978759764, "loss": 0.04913695901632309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.21799445997154, "step_time": 0.4191778583526611} +{"epoch": 0, "iter": 6328, "iter_tflops": 47.4417146738603, "iter_time": 0.43487242507934576, "loss": 0.01797371357679367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.152727523758124, "step_time": 0.39558992385864256} +{"epoch": 0, "iter": 6329, "iter_tflops": 18.089181983225913, "iter_time": 1.1405210876464844, "loss": 0.2619934678077698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41740426556521, "step_time": 1.0625052261352539} +{"epoch": 0, "iter": 6330, "iter_tflops": 22.768571120009923, "iter_time": 0.9061215744018555, "loss": 0.22777362167835236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.115041887788024, "step_time": 0.7338098087310791} +{"epoch": 0, "iter": 6331, "iter_tflops": 40.48845478691466, "iter_time": 0.5095549736022948, "loss": 0.268012136220932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3633861186169, "step_time": 0.4650477638244629} +{"epoch": 0, "iter": 6332, "iter_tflops": 38.38853382139553, "iter_time": 0.5374285354614258, "loss": 0.31986865401268005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95732464441919, "step_time": 0.4917161350250244} +{"epoch": 0, "iter": 6333, "iter_tflops": 18.05087905467003, "iter_time": 1.1429412078857422, "loss": 0.7424530982971191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.19000579960892, "step_time": 1.0750957412719728} +{"epoch": 0, "iter": 6334, "iter_tflops": 17.834230560313205, "iter_time": 1.1568255462646484, "loss": 0.7904110550880432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.2925151815744, "step_time": 0.8857391891479492} +{"epoch": 0, "iter": 6335, "iter_tflops": 34.615166118298035, "iter_time": 0.5960131301879883, "loss": 0.9140018224716187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.545064899016126, "step_time": 0.5495021400451661} +{"epoch": 0, "iter": 6336, "iter_tflops": 39.31586741033067, "iter_time": 0.5247523422241211, "loss": 0.8677187561988831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.662139873881145, "step_time": 0.48359256172180176} +{"epoch": 0, "iter": 6337, "iter_tflops": 19.933088376355162, "iter_time": 1.0350174102783203, "loss": 0.3446671664714813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.044580206753942, "step_time": 0.9803518676757814} +{"epoch": 0, "iter": 6338, "iter_tflops": 12.037063042802162, "iter_time": 1.713964065551758, "loss": 0.3668798804283142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.8153939072384, "step_time": 1.3044944458007812} +{"epoch": 0, "iter": 6339, "iter_tflops": 16.03484024331297, "iter_time": 1.2866416625976562, "loss": 0.3928716778755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.270484282600005, "step_time": 1.017789867401123} +{"epoch": 0, "iter": 6340, "iter_tflops": 34.281855883397355, "iter_time": 0.6018079528808594, "loss": 0.42709314823150635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60852986593988, "step_time": 0.48420101737976073} +{"epoch": 0, "iter": 6341, "iter_tflops": 23.432521348230612, "iter_time": 0.667609130859375, "loss": 0.34801268577575684, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 24.91495992390145, "step_time": 0.6278864288330078} +{"epoch": 0, "iter": 6342, "iter_tflops": 13.050831650348698, "iter_time": 1.1986795654296876, "loss": 0.19805164635181427, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 14.530688600608304, "step_time": 1.076601779937744} +{"epoch": 0, "iter": 6343, "iter_tflops": 23.57397484620557, "iter_time": 0.6636032028198242, "loss": 0.4422527551651001, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.466386613535523, "step_time": 0.6142907295227051} +{"epoch": 0, "iter": 6344, "iter_tflops": 23.73348920179637, "iter_time": 0.6591430816650391, "loss": 0.4024617671966553, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.568578598546008, "step_time": 0.6118355445861817} +{"epoch": 0, "iter": 6345, "iter_tflops": 31.868718021477875, "iter_time": 0.6473775787353515, "loss": 0.17274126410484314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.127424771273944, "step_time": 0.587321548461914} +{"epoch": 0, "iter": 6346, "iter_tflops": 36.52373557812898, "iter_time": 0.5648681106567383, "loss": 0.0653526559472084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52070301259473, "step_time": 0.5091494464874267} +{"epoch": 0, "iter": 6347, "iter_tflops": 39.601821321496956, "iter_time": 0.520963249206543, "loss": 0.11825036257505417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.463622902638136, "step_time": 0.47467496109008783} +{"epoch": 0, "iter": 6348, "iter_tflops": 43.17197064260502, "iter_time": 0.4778816719055176, "loss": 0.10307147353887558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1295225897285, "step_time": 0.43775307655334467} +{"epoch": 0, "iter": 6349, "iter_tflops": 19.150281131877765, "iter_time": 1.0773258819580078, "loss": 0.7588333487510681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.495801703853008, "step_time": 1.0066009521484376} +{"epoch": 0, "iter": 6350, "iter_tflops": 33.7234846088089, "iter_time": 0.6117722930908204, "loss": 0.6478577256202698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.1247627561798, "step_time": 0.5557232418060302} +{"epoch": 0, "iter": 6351, "iter_tflops": 46.81037783965413, "iter_time": 0.44073759841918947, "loss": 0.6489603519439697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60635120180426, "step_time": 0.4076779499053955} +{"epoch": 0, "iter": 6352, "iter_tflops": 50.888505838301256, "iter_time": 0.4054175529479981, "loss": 0.7543727159500122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.1474267352831, "step_time": 0.3741079998016357} +{"epoch": 0, "iter": 6353, "iter_tflops": 26.008329780672486, "iter_time": 0.7932494583129882, "loss": 0.011925268918275833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.699017243087003, "step_time": 0.7448312454223633} +{"epoch": 0, "iter": 6354, "iter_tflops": 13.348770111669808, "iter_time": 1.5455426483154298, "loss": 0.0438525453209877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.18813290298332, "step_time": 1.2744578781127929} +{"epoch": 0, "iter": 6355, "iter_tflops": 38.69548252834204, "iter_time": 0.533165428161621, "loss": 0.052401501685380936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.158752794042584, "step_time": 0.4780280284881592} +{"epoch": 0, "iter": 6356, "iter_tflops": 41.52872723274447, "iter_time": 0.4967908935546875, "loss": 0.04479699581861496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72674334634239, "step_time": 0.45118221855163576} +{"epoch": 0, "iter": 6357, "iter_tflops": 15.506196624522005, "iter_time": 1.330506378173828, "loss": 0.17875756323337555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.218093791430867, "step_time": 1.2721034774780273} +{"epoch": 0, "iter": 6358, "iter_tflops": 15.199713860602698, "iter_time": 1.3573343353271485, "loss": 0.07336220145225525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.678017566319237, "step_time": 0.9097397270202637} +{"epoch": 0, "iter": 6359, "iter_tflops": 40.62806229475368, "iter_time": 0.5078040237426757, "loss": 0.12212024629116058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55294580435056, "step_time": 0.46306912231445313} +{"epoch": 0, "iter": 6360, "iter_tflops": 41.880998974217285, "iter_time": 0.49261225891113275, "loss": 0.17696700990200043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00250722022347, "step_time": 0.4484775886535644} +{"epoch": 0, "iter": 6361, "iter_tflops": 21.13970657324441, "iter_time": 0.9759403915405274, "loss": 0.06494130939245224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.51679391595956, "step_time": 0.9162536010742188} +{"epoch": 0, "iter": 6362, "iter_tflops": 15.810806275043635, "iter_time": 1.3048729553222655, "loss": 0.0941530242562294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.822824139709233, "step_time": 1.0407746829986573} +{"epoch": 0, "iter": 6363, "iter_tflops": 53.74212196586676, "iter_time": 0.3838905639648438, "loss": 0.11840397864580154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.76874370587686, "step_time": 0.3510555477142334} +{"epoch": 0, "iter": 6364, "iter_tflops": 50.68528735398404, "iter_time": 0.40704304122924806, "loss": 0.11548648029565811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.80778169824737, "step_time": 0.3764263553619384} +{"epoch": 0, "iter": 6365, "iter_tflops": 39.04327426678421, "iter_time": 0.5284160690307618, "loss": 0.8989720940589905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.023288759868876, "step_time": 0.4909442863464355} +{"epoch": 0, "iter": 6366, "iter_tflops": 17.499430548373926, "iter_time": 1.178957992553711, "loss": 0.7870144844055176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.006971687927457, "step_time": 0.9821069793701173} +{"epoch": 0, "iter": 6367, "iter_tflops": 39.461633767264644, "iter_time": 0.5228139724731445, "loss": 0.8963563442230225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.014660649499895, "step_time": 0.4796293449401855} +{"epoch": 0, "iter": 6368, "iter_tflops": 42.17577429626302, "iter_time": 0.48916928863525394, "loss": 0.8290573954582214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94857908193296, "step_time": 0.44900395011901856} +{"epoch": 0, "iter": 6369, "iter_tflops": 32.19613803747975, "iter_time": 0.6407940444946288, "loss": 0.15805977582931519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.81973490787854, "step_time": 0.575970022201538} +{"epoch": 0, "iter": 6370, "iter_tflops": 36.7080756958853, "iter_time": 0.5620314636230469, "loss": 0.10361222177743912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.393627229769045, "step_time": 0.49841231346130377} +{"epoch": 0, "iter": 6371, "iter_tflops": 40.21404346891316, "iter_time": 0.5130320587158204, "loss": 0.11804298311471939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.015344251030754, "step_time": 0.46872502899169916} +{"epoch": 0, "iter": 6372, "iter_tflops": 42.261092517528866, "iter_time": 0.4881817359924317, "loss": 0.11303827911615372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13025997983695, "step_time": 0.44723557853698725} +{"epoch": 0, "iter": 6373, "iter_tflops": 16.005910857147647, "iter_time": 1.2889671630859376, "loss": 0.901482880115509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.760799033137125, "step_time": 1.2309134826660157} +{"epoch": 0, "iter": 6374, "iter_tflops": 19.594481287213437, "iter_time": 1.0529032745361329, "loss": 0.8175715804100037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.24442154106096, "step_time": 0.8509624977111816} +{"epoch": 0, "iter": 6375, "iter_tflops": 42.52243752206885, "iter_time": 0.4851813468933105, "loss": 0.7167180776596069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79892188757166, "step_time": 0.4504711608886719} +{"epoch": 0, "iter": 6376, "iter_tflops": 47.20763891672012, "iter_time": 0.43702870941162103, "loss": 0.9064246416091919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.39389225253928, "step_time": 0.40143084335327145} +{"epoch": 0, "iter": 6377, "iter_tflops": 39.581005320223824, "iter_time": 0.5212372283935547, "loss": 0.3321276307106018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.97955387682063, "step_time": 0.4800211181640625} +{"epoch": 0, "iter": 6378, "iter_tflops": 34.43029386252232, "iter_time": 0.5992134017944336, "loss": 0.4168606400489807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.9317634583672, "step_time": 0.5439001941680908} +{"epoch": 0, "iter": 6379, "iter_tflops": 45.77274782751903, "iter_time": 0.45072875213623054, "loss": 0.4672762155532837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.84154996247003, "step_time": 0.4139336261749268} +{"epoch": 0, "iter": 6380, "iter_tflops": 50.303172554555125, "iter_time": 0.41013503646850585, "loss": 0.42230403423309326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.46039862373509, "step_time": 0.37882744216918945} +{"epoch": 0, "iter": 6381, "iter_tflops": 46.602597116169854, "iter_time": 0.44270265579223633, "loss": 0.7516945600509644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.970617343706344, "step_time": 0.4047644424438476} +{"epoch": 0, "iter": 6382, "iter_tflops": 43.10107090251367, "iter_time": 0.4786677703857422, "loss": 0.9656537175178528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87652942385903, "step_time": 0.44011563491821293} +{"epoch": 0, "iter": 6383, "iter_tflops": 47.38721192971739, "iter_time": 0.4353725967407226, "loss": 0.9181238412857056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22675796495186, "step_time": 0.40274056625366217} +{"epoch": 0, "iter": 6384, "iter_tflops": 44.75498750188689, "iter_time": 0.460978645324707, "loss": 0.9149786233901978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98266367013108, "step_time": 0.4299697418212891} +{"epoch": 0, "iter": 6385, "iter_tflops": 28.220504971517233, "iter_time": 0.7310674819946289, "loss": 0.14821933209896088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.8902532616084, "step_time": 0.690228126525879} +{"epoch": 0, "iter": 6386, "iter_tflops": 13.155982392537114, "iter_time": 1.5681910247802733, "loss": 0.12072716653347015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.660694066312523, "step_time": 1.2383093662261964} +{"epoch": 0, "iter": 6387, "iter_tflops": 38.26480764425988, "iter_time": 0.5391662673950195, "loss": 0.18584300577640533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.800001836550834, "step_time": 0.4935668087005616} +{"epoch": 0, "iter": 6388, "iter_tflops": 38.27241180399102, "iter_time": 0.5390591430664062, "loss": 0.1396191120147705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.06555987890427, "step_time": 0.4904509429931641} +{"epoch": 0, "iter": 6389, "iter_tflops": 33.643511732618, "iter_time": 0.6132265167236328, "loss": 0.8114739656448364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.113393556939116, "step_time": 0.5558934803009034} +{"epoch": 0, "iter": 6390, "iter_tflops": 37.38317132667779, "iter_time": 0.5518818435668945, "loss": 0.7319375276565552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.55941330594207, "step_time": 0.5086635093688965} +{"epoch": 0, "iter": 6391, "iter_tflops": 39.88160590206043, "iter_time": 0.5173084945678712, "loss": 0.7126721143722534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53571371747217, "step_time": 0.4738889465332031} +{"epoch": 0, "iter": 6392, "iter_tflops": 43.483974194316026, "iter_time": 0.4744528045654297, "loss": 0.7414596080780029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.508065363723375, "step_time": 0.4342650737762451} +{"epoch": 0, "iter": 6393, "iter_tflops": 22.90497827311287, "iter_time": 0.9007253036499024, "loss": 0.8088535070419312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.44519987195585, "step_time": 0.8439731979370118} +{"epoch": 0, "iter": 6394, "iter_tflops": 11.458061627371274, "iter_time": 1.8005744934082035, "loss": 0.791615903377533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.565521532320759, "step_time": 1.6418811950683594} +{"epoch": 0, "iter": 6395, "iter_tflops": 10.751631508316775, "iter_time": 1.9188802642822267, "loss": 0.9617389440536499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.392756137596137, "step_time": 1.664770393371582} +{"epoch": 0, "iter": 6396, "iter_tflops": 27.435709912290275, "iter_time": 0.7519795761108399, "loss": 0.7981778383255005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.57716879696595, "step_time": 0.6332991561889648} +{"epoch": 0, "iter": 6397, "iter_tflops": 10.962153718479659, "iter_time": 1.3264107055664063, "loss": 0.2224506288766861, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.630567708140246, "step_time": 1.2501812820434572} +{"epoch": 0, "iter": 6398, "iter_tflops": 11.072737985733635, "iter_time": 1.3131637420654296, "loss": 0.3349616527557373, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.319803784024973, "step_time": 1.091631549835205} +{"epoch": 0, "iter": 6399, "iter_tflops": 20.48814666701252, "iter_time": 0.7096941604614257, "loss": 0.295373797416687, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 22.192353788151827, "step_time": 0.6551949462890625} +{"epoch": 0, "iter": 6400, "iter_tflops": 22.834534481427585, "iter_time": 0.6367687530517577, "loss": 0.41471466422080994, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.484212550599487, "step_time": 0.593865047454834} +{"epoch": 0, "iter": 6401, "iter_tflops": 21.70212183383978, "iter_time": 0.950648681640625, "loss": 0.1302395462989807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.27812891395751, "step_time": 0.8862865905761719} +{"epoch": 0, "iter": 6402, "iter_tflops": 20.96251124148773, "iter_time": 0.9841899795532226, "loss": 0.23621118068695068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.739014374912607, "step_time": 0.8690796165466309} +{"epoch": 0, "iter": 6403, "iter_tflops": 39.18118460346882, "iter_time": 0.5265561447143555, "loss": 0.17536579072475433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.06223403707323, "step_time": 0.47909947013854975} +{"epoch": 0, "iter": 6404, "iter_tflops": 42.48784657780983, "iter_time": 0.48557635116577147, "loss": 0.06838789582252502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.347696823665345, "step_time": 0.4451374053955078} +{"epoch": 0, "iter": 6405, "iter_tflops": 17.305430087922783, "iter_time": 1.1921745605468748, "loss": 0.5101788640022278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.58603354988798, "step_time": 1.110032081604004} +{"epoch": 0, "iter": 6406, "iter_tflops": 16.020167357331513, "iter_time": 1.2878200988769533, "loss": 0.5301066637039185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.225825142256948, "step_time": 1.0730927467346192} +{"epoch": 0, "iter": 6407, "iter_tflops": 35.27091032766339, "iter_time": 0.5849322662353516, "loss": 0.5292986631393433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10404589100333, "step_time": 0.5414410209655761} +{"epoch": 0, "iter": 6408, "iter_tflops": 40.862178676022296, "iter_time": 0.5048946037292481, "loss": 0.5637080073356628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.2915155533141, "step_time": 0.46580238342285163} +{"epoch": 0, "iter": 6409, "iter_tflops": 20.35799625262899, "iter_time": 1.0134147415161132, "loss": 0.8438276052474976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.441855491497506, "step_time": 0.9621878814697267} +{"epoch": 0, "iter": 6410, "iter_tflops": 17.154046583302186, "iter_time": 1.2026954345703125, "loss": 1.0964722633361816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.045082435646467, "step_time": 0.9803284721374511} +{"epoch": 0, "iter": 6411, "iter_tflops": 35.742341031178135, "iter_time": 0.5772171859741211, "loss": 0.8428522348403931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.85519147527748, "step_time": 0.5309739246368409} +{"epoch": 0, "iter": 6412, "iter_tflops": 35.887250899319625, "iter_time": 0.5748864288330079, "loss": 0.8366366624832153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.869276710889615, "step_time": 0.5307815132141114} +{"epoch": 0, "iter": 6413, "iter_tflops": 22.643241970966113, "iter_time": 0.9111369094848634, "loss": 0.2748587131500244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.866360361812777, "step_time": 0.8296788597106933} +{"epoch": 0, "iter": 6414, "iter_tflops": 35.879011986636264, "iter_time": 0.5750184402465821, "loss": 0.4690478444099426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87256712404488, "step_time": 0.5174257640838623} +{"epoch": 0, "iter": 6415, "iter_tflops": 38.64001154676399, "iter_time": 0.5339308319091797, "loss": 0.31616923213005066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52764148926687, "step_time": 0.485121976852417} +{"epoch": 0, "iter": 6416, "iter_tflops": 42.20868634791216, "iter_time": 0.4887878608703613, "loss": 0.36870282888412476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.09127036013361, "step_time": 0.44761390495300296} +{"epoch": 0, "iter": 6417, "iter_tflops": 18.97621955063269, "iter_time": 1.087207778930664, "loss": 0.1693565547466278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.220585782775796, "step_time": 1.0203014755249025} +{"epoch": 0, "iter": 6418, "iter_tflops": 20.839610610386007, "iter_time": 0.9899941940307618, "loss": 0.2731447219848633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.8363462782238, "step_time": 0.830681505203247} +{"epoch": 0, "iter": 6419, "iter_tflops": 37.07760560075915, "iter_time": 0.5564300384521484, "loss": 0.35110247135162354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.738045837428366, "step_time": 0.5064330673217773} +{"epoch": 0, "iter": 6420, "iter_tflops": 41.60600731735939, "iter_time": 0.4958681411743164, "loss": 0.21046441793441772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6138080642706, "step_time": 0.4522993011474609} +{"epoch": 0, "iter": 6421, "iter_tflops": 17.038580900065135, "iter_time": 1.2108457641601562, "loss": 0.04125531390309334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.140661576874603, "step_time": 1.1372845153808593} +{"epoch": 0, "iter": 6422, "iter_tflops": 29.135447796560612, "iter_time": 0.7081097106933594, "loss": 0.026366975158452988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.491611627897875, "step_time": 0.46370748901367187} +{"epoch": 0, "iter": 6423, "iter_tflops": 56.636629411870565, "iter_time": 0.3642712097167969, "loss": 0.025303808972239494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.74614233556297, "step_time": 0.33412765121459964} +{"epoch": 0, "iter": 6424, "iter_tflops": 52.232626653168154, "iter_time": 0.39498479843139644, "loss": 0.03953170031309128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.09558599028321, "step_time": 0.3613430557250976} +{"epoch": 0, "iter": 6425, "iter_tflops": 31.585200400617882, "iter_time": 0.6531886215209961, "loss": 0.03605164960026741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.7298811374183, "step_time": 0.6116562767028808} +{"epoch": 0, "iter": 6426, "iter_tflops": 13.67238892622922, "iter_time": 1.5089604034423827, "loss": 0.0194996390491724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.28093381294765, "step_time": 1.2671935005187989} +{"epoch": 0, "iter": 6427, "iter_tflops": 53.301785585090165, "iter_time": 0.3870619583129883, "loss": 0.06683473289012909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.329187792458306, "step_time": 0.35370102500915523} +{"epoch": 0, "iter": 6428, "iter_tflops": 51.824581796173604, "iter_time": 0.39809474182128907, "loss": 0.028423571959137917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.447950587595585, "step_time": 0.36548879623413083} +{"epoch": 0, "iter": 6429, "iter_tflops": 32.04831275252678, "iter_time": 0.643749755859375, "loss": 0.7190684676170349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.24183365707858, "step_time": 0.6025113525390625} +{"epoch": 0, "iter": 6430, "iter_tflops": 10.120462547165534, "iter_time": 2.0385524291992185, "loss": 0.7593069076538086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.523184250107898, "step_time": 1.6474319229125975} +{"epoch": 0, "iter": 6431, "iter_tflops": 13.056373788001668, "iter_time": 1.5801549377441408, "loss": 0.6009913682937622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.487941753521127, "step_time": 1.424018253326416} +{"epoch": 0, "iter": 6432, "iter_tflops": 43.16896093374594, "iter_time": 0.4779149894714356, "loss": 0.6021096110343933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.633284785482005, "step_time": 0.4424113292694092} +{"epoch": 0, "iter": 6433, "iter_tflops": 19.519519524546528, "iter_time": 0.7804843750000001, "loss": 0.38954782485961914, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 20.619071473382913, "step_time": 0.7388635330200194} +{"epoch": 0, "iter": 6434, "iter_tflops": 8.980573065709446, "iter_time": 1.6964039916992188, "loss": 0.31223079562187195, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 10.897567486788184, "step_time": 1.3979890480041504} +{"epoch": 0, "iter": 6435, "iter_tflops": 21.814047494398928, "iter_time": 0.6983885040283203, "loss": 0.28080320358276367, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.46380778626475, "step_time": 0.6492842140197754} +{"epoch": 0, "iter": 6436, "iter_tflops": 21.882964630517048, "iter_time": 0.6961890335083009, "loss": 0.11493317037820816, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.49705453985426, "step_time": 0.648365520477295} +{"epoch": 0, "iter": 6437, "iter_tflops": 14.040197783218153, "iter_time": 1.4694304046630857, "loss": 0.22948813438415527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.010567333073775, "step_time": 1.374437957763672} +{"epoch": 0, "iter": 6438, "iter_tflops": 19.17518097012408, "iter_time": 1.0759269256591797, "loss": 0.30769261717796326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.692955723800786, "step_time": 0.9510503673553468} +{"epoch": 0, "iter": 6439, "iter_tflops": 50.25114039723046, "iter_time": 0.41055970764160155, "loss": 0.2790992856025696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.6032274683831, "step_time": 0.37783652114868166} +{"epoch": 0, "iter": 6440, "iter_tflops": 50.98904049916841, "iter_time": 0.40461819458007814, "loss": 0.22501897811889648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.551015473913935, "step_time": 0.3713900337219238} +{"epoch": 0, "iter": 6441, "iter_tflops": 31.957723742867923, "iter_time": 0.6455745620727539, "loss": 0.8450351357460022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.93522794650497, "step_time": 0.6079550590515137} +{"epoch": 0, "iter": 6442, "iter_tflops": 12.978346949550872, "iter_time": 1.5896549530029298, "loss": 0.9695540070533752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.25500662738798, "step_time": 1.1956583938598633} +{"epoch": 0, "iter": 6443, "iter_tflops": 11.460840726420033, "iter_time": 1.8001378784179687, "loss": 0.7604036927223206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.216725684933577, "step_time": 1.5609837112426757} +{"epoch": 0, "iter": 6444, "iter_tflops": 21.676198960419484, "iter_time": 0.9517855758666993, "loss": 1.0460050106048584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.256746604154895, "step_time": 0.7857444725036621} +{"epoch": 0, "iter": 6445, "iter_tflops": 15.98496684654805, "iter_time": 0.9428344879150391, "loss": 0.3790195882320404, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 16.777469402479532, "step_time": 0.8982986450195313} +{"epoch": 0, "iter": 6446, "iter_tflops": 11.95752462829016, "iter_time": 1.260392807006836, "loss": 0.19605375826358795, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 15.113572471303867, "step_time": 0.9971949424743651} +{"epoch": 0, "iter": 6447, "iter_tflops": 22.481773671361182, "iter_time": 0.670373176574707, "loss": 0.3078485131263733, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.151848093755238, "step_time": 0.6240175895690918} +{"epoch": 0, "iter": 6448, "iter_tflops": 22.42465670247539, "iter_time": 0.6720806579589844, "loss": 0.2457922250032425, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.069633099626017, "step_time": 0.6261490554809569} +{"epoch": 0, "iter": 6449, "iter_tflops": 22.625610692999153, "iter_time": 0.9118469238281249, "loss": 0.5844332575798035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.548551496560098, "step_time": 0.8404199943542482} +{"epoch": 0, "iter": 6450, "iter_tflops": 15.615677671698625, "iter_time": 1.321178237915039, "loss": 0.39356064796447754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.98079689784616, "step_time": 0.9385962486267089} +{"epoch": 0, "iter": 6451, "iter_tflops": 48.33450635752883, "iter_time": 0.42683985137939456, "loss": 0.3606838285923004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4785212343232, "step_time": 0.39313404846191413} +{"epoch": 0, "iter": 6452, "iter_tflops": 48.735965056085526, "iter_time": 0.42332379150390625, "loss": 0.43209022283554077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.50951557390247, "step_time": 0.3929019966125488} +{"epoch": 0, "iter": 6453, "iter_tflops": 43.87521816355987, "iter_time": 0.47022201538085934, "loss": 0.7010953426361084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76221564326177, "step_time": 0.4319542808532714} +{"epoch": 0, "iter": 6454, "iter_tflops": 37.399127010681134, "iter_time": 0.5516463928222656, "loss": 0.6038108468055725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42476068569298, "step_time": 0.4980377235412598} +{"epoch": 0, "iter": 6455, "iter_tflops": 34.31963960098647, "iter_time": 0.6011454010009766, "loss": 0.6825379133224487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.200526267984884, "step_time": 0.5545914421081543} +{"epoch": 0, "iter": 6456, "iter_tflops": 38.218189518131766, "iter_time": 0.5398239364624022, "loss": 0.8800190091133118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01697942707974, "step_time": 0.49101800727844247} +{"epoch": 0, "iter": 6457, "iter_tflops": 28.26925416108458, "iter_time": 0.7298067855834961, "loss": 0.0312050748616457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.631604153447036, "step_time": 0.6735231170654297} +{"epoch": 0, "iter": 6458, "iter_tflops": 10.963698028082964, "iter_time": 1.881764114379883, "loss": 0.03547177463769913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.530467492099204, "step_time": 1.6464743652343752} +{"epoch": 0, "iter": 6459, "iter_tflops": 23.738261144855443, "iter_time": 0.8691071929931641, "loss": 0.07375240325927734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.691863564025915, "step_time": 0.7450236587524414} +{"epoch": 0, "iter": 6460, "iter_tflops": 50.49520279957052, "iter_time": 0.4085753173828125, "loss": 0.03435724973678589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.75504511149567, "step_time": 0.37003097152709963} +{"epoch": 0, "iter": 6461, "iter_tflops": 12.690463247310033, "iter_time": 1.145767318725586, "loss": 0.11655468493700027, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.32391110046866, "step_time": 1.091295036315918} +{"epoch": 0, "iter": 6462, "iter_tflops": 9.43728782623475, "iter_time": 1.5407305908203126, "loss": 0.1636427491903305, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 12.144309958142914, "step_time": 1.197294708251953} +{"epoch": 0, "iter": 6463, "iter_tflops": 25.485817173749375, "iter_time": 0.5705258712768555, "loss": 0.31171584129333496, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.251995628178094, "step_time": 0.533550579071045} +{"epoch": 0, "iter": 6464, "iter_tflops": 26.334355342226406, "iter_time": 0.5521425476074219, "loss": 0.09301821887493134, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 28.071761547570325, "step_time": 0.5179695625305176} +{"epoch": 0, "iter": 6465, "iter_tflops": 40.734556234187075, "iter_time": 0.5064764518737793, "loss": 0.7287734746932983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.28608439470351, "step_time": 0.46585950851440433} +{"epoch": 0, "iter": 6466, "iter_tflops": 43.07224734534822, "iter_time": 0.47898809051513674, "loss": 0.7812684774398804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.64604919977754, "step_time": 0.4422902660369873} +{"epoch": 0, "iter": 6467, "iter_tflops": 45.77551475577176, "iter_time": 0.4507015075683593, "loss": 0.7532353401184082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17446060335496, "step_time": 0.4195489540100098} +{"epoch": 0, "iter": 6468, "iter_tflops": 41.7620881882247, "iter_time": 0.49401489257812503, "loss": 0.754448413848877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78635687160639, "step_time": 0.4606557655334473} +{"epoch": 0, "iter": 6469, "iter_tflops": 43.1104450623872, "iter_time": 0.4785636863708496, "loss": 0.6135019659996033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.937847332581306, "step_time": 0.43954068374633787} +{"epoch": 0, "iter": 6470, "iter_tflops": 48.91227250513, "iter_time": 0.42179789352416996, "loss": 0.5642920136451721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.192835808960055, "step_time": 0.3878547401428222} +{"epoch": 0, "iter": 6471, "iter_tflops": 52.5434193425808, "iter_time": 0.39264847564697264, "loss": 0.5332216620445251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.97036582565158, "step_time": 0.3621372833251953} +{"epoch": 0, "iter": 6472, "iter_tflops": 49.257999088117366, "iter_time": 0.41883742523193357, "loss": 0.5801395177841187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.39193192109948, "step_time": 0.386408447265625} +{"epoch": 0, "iter": 6473, "iter_tflops": 28.89496884406077, "iter_time": 0.7140029678344726, "loss": 0.07973852008581161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.88809105815848, "step_time": 0.6679303512573243} +{"epoch": 0, "iter": 6474, "iter_tflops": 14.295862196407041, "iter_time": 1.443151397705078, "loss": 0.07736215740442276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88896685910629, "step_time": 1.2215722656249999} +{"epoch": 0, "iter": 6475, "iter_tflops": 52.56107392536031, "iter_time": 0.3925165901184082, "loss": 0.07640055567026138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.438484180334775, "step_time": 0.35918589782714844} +{"epoch": 0, "iter": 6476, "iter_tflops": 54.33935011663477, "iter_time": 0.3796713333129883, "loss": 0.09055468440055847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.36191157498775, "step_time": 0.3475476608276368} +{"epoch": 0, "iter": 6477, "iter_tflops": 31.167799979951837, "iter_time": 0.661936149597168, "loss": 0.590148389339447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.27181388861186, "step_time": 0.6200772094726562} +{"epoch": 0, "iter": 6478, "iter_tflops": 24.255332458813637, "iter_time": 0.8505797042846679, "loss": 0.8463376760482788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.00316853263368, "step_time": 0.7640249137878418} +{"epoch": 0, "iter": 6479, "iter_tflops": 41.55185325966101, "iter_time": 0.4965144004821777, "loss": 0.6317355632781982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.51153227211265, "step_time": 0.46349996185302733} +{"epoch": 0, "iter": 6480, "iter_tflops": 45.11362019404701, "iter_time": 0.4573140754699707, "loss": 0.8161535263061523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21388622432336, "step_time": 0.4279077072143555} +{"epoch": 0, "iter": 6481, "iter_tflops": 24.909897726842647, "iter_time": 0.8282287521362304, "loss": 0.8213234543800354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.251345009418483, "step_time": 0.7859061508178711} +{"epoch": 0, "iter": 6482, "iter_tflops": 28.04558168994828, "iter_time": 0.735627227783203, "loss": 0.7623059153556824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.569189668963105, "step_time": 0.6145841979980469} +{"epoch": 0, "iter": 6483, "iter_tflops": 43.592487719945396, "iter_time": 0.47327176284790046, "loss": 0.9396616220474243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10030631841958, "step_time": 0.4380246143341064} +{"epoch": 0, "iter": 6484, "iter_tflops": 45.27092099558834, "iter_time": 0.4557250671386719, "loss": 0.8553964495658875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56012571596565, "step_time": 0.42485667419433587} +{"epoch": 0, "iter": 6485, "iter_tflops": 20.68015064821008, "iter_time": 0.9976278152465821, "loss": 0.009936020709574223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.66090553149331, "step_time": 0.9524575729370117} +{"epoch": 0, "iter": 6486, "iter_tflops": 20.874745871727406, "iter_time": 0.9883278884887696, "loss": 0.10052831470966339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.492629420586425, "step_time": 0.8092964115142822} +{"epoch": 0, "iter": 6487, "iter_tflops": 52.19405836903882, "iter_time": 0.39527666854858406, "loss": 0.04032163694500923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.473178636721734, "step_time": 0.3589690704345704} +{"epoch": 0, "iter": 6488, "iter_tflops": 53.1906804295166, "iter_time": 0.38787045669555664, "loss": 0.0539751797914505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.37591025333244, "step_time": 0.35341793251037595} +{"epoch": 0, "iter": 6489, "iter_tflops": 27.823768670480508, "iter_time": 0.7414916992187499, "loss": 0.31259095668792725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.50548118688976, "step_time": 0.6992291831970214} +{"epoch": 0, "iter": 6490, "iter_tflops": 14.968276856672267, "iter_time": 1.3783212127685547, "loss": 0.3872009515762329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.790392681371394, "step_time": 1.0979596786499022} +{"epoch": 0, "iter": 6491, "iter_tflops": 49.16720950416926, "iter_time": 0.4196108283996582, "loss": 0.3282780349254608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.90696430352548, "step_time": 0.3827166633605957} +{"epoch": 0, "iter": 6492, "iter_tflops": 51.83527487399142, "iter_time": 0.39801261901855467, "loss": 0.28460821509361267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.14883698473783, "step_time": 0.36743581199646} +{"epoch": 0, "iter": 6493, "iter_tflops": 27.385736590919898, "iter_time": 0.7533517837524414, "loss": 0.8040903210639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.8211081654275, "step_time": 0.715832763671875} +{"epoch": 0, "iter": 6494, "iter_tflops": 12.953041554269916, "iter_time": 1.5927605438232424, "loss": 0.6290110349655151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.964386962966696, "step_time": 1.2923198089599608} +{"epoch": 0, "iter": 6495, "iter_tflops": 48.83867072157233, "iter_time": 0.42243355941772454, "loss": 1.0566107034683228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.999177616503246, "step_time": 0.38927195549011234} +{"epoch": 0, "iter": 6496, "iter_tflops": 49.67731003455284, "iter_time": 0.41530214691162104, "loss": 0.7469570636749268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.52272262388846, "step_time": 0.3854642009735107} +{"epoch": 0, "iter": 6497, "iter_tflops": 45.8229823044721, "iter_time": 0.4502346305847168, "loss": 0.176312655210495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.16187456690102, "step_time": 0.41129032135009763} +{"epoch": 0, "iter": 6498, "iter_tflops": 21.084123394308765, "iter_time": 0.9785132217407226, "loss": 0.13153783977031708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.22974656972061, "step_time": 0.8177289237976073} +{"epoch": 0, "iter": 6499, "iter_tflops": 48.52819409768115, "iter_time": 0.42513623046875004, "loss": 0.16502510011196136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.950059443167156, "step_time": 0.389633056640625} +{"epoch": 0, "iter": 6500, "iter_tflops": 52.061735290652344, "iter_time": 0.3962813262939453, "loss": 0.18996228277683258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.7069576341911, "step_time": 0.36381943893432617} +{"epoch": 0, "iter": 6501, "iter_tflops": 16.98550615344316, "iter_time": 0.7673467864990233, "loss": 0.08406541496515274, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 17.899565976022757, "step_time": 0.7281614303588868} +{"epoch": 0, "iter": 6502, "iter_tflops": 15.670224087785773, "iter_time": 0.8317541275024414, "loss": 0.1441442221403122, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 19.356047283104733, "step_time": 0.67336958694458} +{"epoch": 0, "iter": 6503, "iter_tflops": 31.5324714300986, "iter_time": 0.41334449768066406, "loss": 0.0687522366642952, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 34.36478916236017, "step_time": 0.37927698326110837} +{"epoch": 0, "iter": 6504, "iter_tflops": 34.63540306512003, "iter_time": 0.3763136100769043, "loss": 0.07290584594011307, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 37.713301327279844, "step_time": 0.3456015014648437} +{"epoch": 0, "iter": 6505, "iter_tflops": 21.363557838048198, "iter_time": 0.9657143096923828, "loss": 0.6575763821601868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.355994454927572, "step_time": 0.9228439178466797} +{"epoch": 0, "iter": 6506, "iter_tflops": 12.224347151310182, "iter_time": 1.687705139160156, "loss": 0.6949138045310974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.55069783545058, "step_time": 1.246539192199707} +{"epoch": 0, "iter": 6507, "iter_tflops": 43.712888170769304, "iter_time": 0.4719682083129883, "loss": 0.8001303672790527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.963048056084574, "step_time": 0.43930482292175294} +{"epoch": 0, "iter": 6508, "iter_tflops": 43.79076952945969, "iter_time": 0.4711288185119629, "loss": 0.671073853969574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93655622337887, "step_time": 0.4395527744293213} +{"epoch": 0, "iter": 6509, "iter_tflops": 33.62036262582444, "iter_time": 0.6136487503051758, "loss": 0.1554419845342636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.06754070361702, "step_time": 0.5720127601623536} +{"epoch": 0, "iter": 6510, "iter_tflops": 45.81182184925568, "iter_time": 0.45034431457519536, "loss": 0.2852906286716461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34887781048812, "step_time": 0.4097627277374267} +{"epoch": 0, "iter": 6511, "iter_tflops": 47.02439334308949, "iter_time": 0.43873173141479493, "loss": 0.1430547833442688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.02174845274109, "step_time": 0.4043588104248047} +{"epoch": 0, "iter": 6512, "iter_tflops": 51.93654346347375, "iter_time": 0.39723655319213863, "loss": 0.300237774848938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.3728606696654, "step_time": 0.36597563552856444} +{"epoch": 0, "iter": 6513, "iter_tflops": 29.711776842507934, "iter_time": 0.6943742752075195, "loss": 0.05227646976709366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.490738930398248, "step_time": 0.6551479644775391} +{"epoch": 0, "iter": 6514, "iter_tflops": 13.052400859156668, "iter_time": 1.5806359100341796, "loss": 0.07491979748010635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.415923128752983, "step_time": 1.2567732772827152} +{"epoch": 0, "iter": 6515, "iter_tflops": 36.806497040253404, "iter_time": 0.560528579711914, "loss": 0.09897639602422714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17093092599084, "step_time": 0.5135826587677003} +{"epoch": 0, "iter": 6516, "iter_tflops": 43.83272101440204, "iter_time": 0.4706779098510742, "loss": 0.03476296737790108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26687790566591, "step_time": 0.42743791198730474} +{"epoch": 0, "iter": 6517, "iter_tflops": 20.101674970125753, "iter_time": 1.0263370361328126, "loss": 0.03429436311125755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.593524639788964, "step_time": 0.9554296417236329} +{"epoch": 0, "iter": 6518, "iter_tflops": 20.21107441014515, "iter_time": 1.0207816314697264, "loss": 0.008139400742948055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.966224207973784, "step_time": 0.7945357532501222} +{"epoch": 0, "iter": 6519, "iter_tflops": 55.13532222801558, "iter_time": 0.37419013214111324, "loss": 0.002596252365037799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.256581008739275, "step_time": 0.3423873901367187} +{"epoch": 0, "iter": 6520, "iter_tflops": 57.784359663229594, "iter_time": 0.357035945892334, "loss": 0.0047475071623921394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.38033348103074, "step_time": 0.3255125427246094} +{"epoch": 0, "iter": 6521, "iter_tflops": 23.19908268593199, "iter_time": 0.8893064346313477, "loss": 0.23838509619235992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.318920987485544, "step_time": 0.8483556289672851} +{"epoch": 0, "iter": 6522, "iter_tflops": 15.489447548468474, "iter_time": 1.331945083618164, "loss": 0.3051864206790924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.71894160067972, "step_time": 1.1021506423950196} +{"epoch": 0, "iter": 6523, "iter_tflops": 47.18243049222923, "iter_time": 0.4372622032165527, "loss": 0.17484380304813385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.09301606233779, "step_time": 0.40379478645324707} +{"epoch": 0, "iter": 6524, "iter_tflops": 53.01273791658336, "iter_time": 0.38917238235473633, "loss": 0.1692424714565277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.38002425769193, "step_time": 0.35955184364318843} +{"epoch": 0, "iter": 6525, "iter_tflops": 46.34566252526767, "iter_time": 0.4451569442749023, "loss": 0.4709376096725464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.661721728844, "step_time": 0.40723237991333006} +{"epoch": 0, "iter": 6526, "iter_tflops": 35.07036153646215, "iter_time": 0.5882771835327149, "loss": 0.3669973909854889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.32445302085538, "step_time": 0.5383271484375} +{"epoch": 0, "iter": 6527, "iter_tflops": 35.86631319944784, "iter_time": 0.5752220306396484, "loss": 0.40170106291770935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.18984203488089, "step_time": 0.5264398231506348} +{"epoch": 0, "iter": 6528, "iter_tflops": 39.87172568520155, "iter_time": 0.5174366836547852, "loss": 0.3664311468601227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.069916063890986, "step_time": 0.47901401710510255} +{"epoch": 0, "iter": 6529, "iter_tflops": 24.100679963468536, "iter_time": 0.8560378189086915, "loss": 0.011275969445705414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.58003065054757, "step_time": 0.8065312271118165} +{"epoch": 0, "iter": 6530, "iter_tflops": 28.357039908509407, "iter_time": 0.7275475006103514, "loss": 0.0013725513126701117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23013466627152, "step_time": 0.6027172756195069} +{"epoch": 0, "iter": 6531, "iter_tflops": 44.599437079966414, "iter_time": 0.4625864105224609, "loss": 0.002507410943508148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75210616118738, "step_time": 0.414677791595459} +{"epoch": 0, "iter": 6532, "iter_tflops": 48.860667159429305, "iter_time": 0.4222433853149414, "loss": 0.008154110051691532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.926487664996884, "step_time": 0.38257810592651365} +{"epoch": 0, "iter": 6533, "iter_tflops": 21.150478202407424, "iter_time": 0.9754433593749999, "loss": 0.6351154446601868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70579251283648, "step_time": 0.9086268844604493} +{"epoch": 0, "iter": 6534, "iter_tflops": 17.290651770477993, "iter_time": 1.1931935119628907, "loss": 0.6184203624725342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.90619628115782, "step_time": 0.9868410892486573} +{"epoch": 0, "iter": 6535, "iter_tflops": 38.865273739217756, "iter_time": 0.530836181640625, "loss": 0.5924233198165894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53592495557853, "step_time": 0.48502750396728517} +{"epoch": 0, "iter": 6536, "iter_tflops": 41.76417796058205, "iter_time": 0.49399017333984374, "loss": 0.5288318991661072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68388922377246, "step_time": 0.451605453491211} +{"epoch": 0, "iter": 6537, "iter_tflops": 19.402446104233924, "iter_time": 1.0633243560791015, "loss": 0.1607041358947754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.72403512831721, "step_time": 0.9955152740478515} +{"epoch": 0, "iter": 6538, "iter_tflops": 20.79876402723044, "iter_time": 0.9919384384155273, "loss": 0.17591024935245514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.953078005574106, "step_time": 0.7949382152557373} +{"epoch": 0, "iter": 6539, "iter_tflops": 54.57849269491067, "iter_time": 0.378007755279541, "loss": 0.1515529900789261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.60405609046447, "step_time": 0.3461357307434082} +{"epoch": 0, "iter": 6540, "iter_tflops": 54.33172074759829, "iter_time": 0.37972464752197266, "loss": 0.0962105318903923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.05781103868754, "step_time": 0.34933725357055667} +{"epoch": 0, "iter": 6541, "iter_tflops": 36.23485082604911, "iter_time": 0.5693715591430664, "loss": 0.9560614228248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8649550676068, "step_time": 0.530840534210205} +{"epoch": 0, "iter": 6542, "iter_tflops": 12.279844319609694, "iter_time": 1.6800777740478516, "loss": 0.7588040828704834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.264581457200876, "step_time": 1.4463160781860351} +{"epoch": 0, "iter": 6543, "iter_tflops": 17.178175842727452, "iter_time": 1.2010060729980467, "loss": 0.6667118668556213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.563399508714888, "step_time": 1.003291965484619} +{"epoch": 0, "iter": 6544, "iter_tflops": 24.984066557210067, "iter_time": 0.8257700347900391, "loss": 0.7277452945709229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.83235282621728, "step_time": 0.6691378250122071} +{"epoch": 0, "iter": 6545, "iter_tflops": 18.777030243524006, "iter_time": 0.8113466186523437, "loss": 0.19679293036460876, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 20.223045215686284, "step_time": 0.7533326377868653} +{"epoch": 0, "iter": 6546, "iter_tflops": 26.536171108235553, "iter_time": 0.5741099548339844, "loss": 0.34169045090675354, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 28.281879236459748, "step_time": 0.5386728324890137} +{"epoch": 0, "iter": 6547, "iter_tflops": 27.922815104998577, "iter_time": 0.5455997161865235, "loss": 0.34988027811050415, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.711875370139087, "step_time": 0.5127471694946288} +{"epoch": 0, "iter": 6548, "iter_tflops": 27.454338195481192, "iter_time": 0.5549097518920899, "loss": 0.3390817642211914, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.15782023008937, "step_time": 0.5224903602600097} +{"epoch": 0, "iter": 6549, "iter_tflops": 22.499649910414846, "iter_time": 0.867223747253418, "loss": 0.005368849262595177, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 23.670241487197075, "step_time": 0.8243359374999999} +{"epoch": 0, "iter": 6550, "iter_tflops": 15.157580035825234, "iter_time": 1.2872919464111328, "loss": 0.0022493607830256224, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 18.283586578265606, "step_time": 1.0671992950439453} +{"epoch": 0, "iter": 6551, "iter_tflops": 51.67943195447942, "iter_time": 0.37756279373168944, "loss": 0.004904599394649267, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 56.86730100537868, "step_time": 0.3431186351776123} +{"epoch": 0, "iter": 6552, "iter_tflops": 49.51849239009605, "iter_time": 0.39403927230834956, "loss": 0.0038558049127459526, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 54.209041741092456, "step_time": 0.3599442100524902} +{"epoch": 0, "iter": 6553, "iter_tflops": 31.3533133494156, "iter_time": 0.658019561767578, "loss": 0.0029181528370827436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.50975799259773, "step_time": 0.615674201965332} +{"epoch": 0, "iter": 6554, "iter_tflops": 19.276186381868378, "iter_time": 1.0702891693115235, "loss": 0.00341261038556695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.520219169847223, "step_time": 0.8771641693115234} +{"epoch": 0, "iter": 6555, "iter_tflops": 47.61157098508432, "iter_time": 0.4333209991455078, "loss": 0.010436080396175385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63075057865678, "step_time": 0.39199694633483884} +{"epoch": 0, "iter": 6556, "iter_tflops": 48.179801895382596, "iter_time": 0.42821042633056633, "loss": 0.01654263213276863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.0340192692546, "step_time": 0.3890162162780762} +{"epoch": 0, "iter": 6557, "iter_tflops": 15.13767584794333, "iter_time": 1.3628970336914064, "loss": 0.522309422492981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.93317331388498, "step_time": 1.294851509094238} +{"epoch": 0, "iter": 6558, "iter_tflops": 22.791551662222016, "iter_time": 0.9052079391479493, "loss": 0.3024836778640747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.442950341188336, "step_time": 0.8108766174316407} +{"epoch": 0, "iter": 6559, "iter_tflops": 42.546027822078415, "iter_time": 0.4849123306274414, "loss": 0.4019298255443573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39647849022907, "step_time": 0.4446693840026855} +{"epoch": 0, "iter": 6560, "iter_tflops": 36.253109836395666, "iter_time": 0.5690847930908203, "loss": 0.3495544493198395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.728694190769026, "step_time": 0.5192995624542236} +{"epoch": 0, "iter": 6561, "iter_tflops": 23.893528490314463, "iter_time": 0.8634594726562499, "loss": 0.18491412699222565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.62905959103374, "step_time": 0.8049883155822753} +{"epoch": 0, "iter": 6562, "iter_tflops": 8.23749026077997, "iter_time": 2.5045363159179685, "loss": 0.146169975399971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.986070871098379, "step_time": 2.065987091064453} +{"epoch": 0, "iter": 6563, "iter_tflops": 11.40081978282323, "iter_time": 1.8096149139404298, "loss": 0.16604092717170715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.460929100822117, "step_time": 1.4266782836914063} +{"epoch": 0, "iter": 6564, "iter_tflops": 46.446868704384464, "iter_time": 0.4441869621276856, "loss": 0.18593095242977142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.708673797747934, "step_time": 0.4068553161621094} +{"epoch": 0, "iter": 6565, "iter_tflops": 28.41231857810246, "iter_time": 0.5794441299438478, "loss": 0.15418434143066406, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 30.61613058636543, "step_time": 0.5377345504760742} +{"epoch": 0, "iter": 6566, "iter_tflops": 29.137186695587456, "iter_time": 0.5650288543701172, "loss": 0.2872031033039093, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.031696195448454, "step_time": 0.5305333976745605} +{"epoch": 0, "iter": 6567, "iter_tflops": 29.862743736483406, "iter_time": 0.5513006896972656, "loss": 0.27130210399627686, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.89562133488813, "step_time": 0.5161633644104003} +{"epoch": 0, "iter": 6568, "iter_tflops": 28.34787745216387, "iter_time": 0.5807613372802735, "loss": 0.3207549452781677, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 29.957886032870086, "step_time": 0.5495498313903808} +{"epoch": 0, "iter": 6569, "iter_tflops": 35.763128520917, "iter_time": 0.5768816757202149, "loss": 0.0009709974401630461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.54989436874757, "step_time": 0.5351789894104003} +{"epoch": 0, "iter": 6570, "iter_tflops": 9.88340954257658, "iter_time": 2.087446990966797, "loss": 0.00625097518786788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.62305265466676, "step_time": 1.775015060424805} +{"epoch": 0, "iter": 6571, "iter_tflops": 11.799629304638973, "iter_time": 1.7484526824951174, "loss": 0.0029284111224114895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.841558786277044, "step_time": 1.3900893974304198} +{"epoch": 0, "iter": 6572, "iter_tflops": 51.995396880784135, "iter_time": 0.39678692245483393, "loss": 0.004002669360488653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.63009850972633, "step_time": 0.345984561920166} +{"epoch": 0, "iter": 6573, "iter_tflops": 25.534669156743213, "iter_time": 0.6014315643310547, "loss": 0.17235912382602692, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.395502524601728, "step_time": 0.560579460144043} +{"epoch": 0, "iter": 6574, "iter_tflops": 21.769506626212415, "iter_time": 0.7054526443481446, "loss": 0.3835976719856262, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.483828247856, "step_time": 0.6539545364379883} +{"epoch": 0, "iter": 6575, "iter_tflops": 25.271863167590237, "iter_time": 0.6076859436035156, "loss": 0.14204959571361542, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.145830961962865, "step_time": 0.5657353439331054} +{"epoch": 0, "iter": 6576, "iter_tflops": 22.75314429771914, "iter_time": 0.6749553298950195, "loss": 0.24494297802448273, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.53375772395735, "step_time": 0.6259683570861817} +{"epoch": 0, "iter": 6577, "iter_tflops": 16.528643610995456, "iter_time": 1.248202453613281, "loss": 0.5156573057174683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.435727812883393, "step_time": 1.1832654037475587} +{"epoch": 0, "iter": 6578, "iter_tflops": 16.514637030993942, "iter_time": 1.2492610931396484, "loss": 0.5349704027175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.619614048820722, "step_time": 0.8734729309082032} +{"epoch": 0, "iter": 6579, "iter_tflops": 43.85020773322106, "iter_time": 0.47049021148681636, "loss": 0.38298916816711426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26111087051684, "step_time": 0.43653424835205085} +{"epoch": 0, "iter": 6580, "iter_tflops": 46.59624077528002, "iter_time": 0.44276304626464846, "loss": 0.550920307636261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15402837363131, "step_time": 0.40331317329406746} +{"epoch": 0, "iter": 6581, "iter_tflops": 30.843027675060988, "iter_time": 0.6689062347412109, "loss": 0.3562847673892975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.75007415036222, "step_time": 0.6299556274414062} +{"epoch": 0, "iter": 6582, "iter_tflops": 9.766013153215651, "iter_time": 2.1125400085449217, "loss": 0.47517186403274536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.196765677746358, "step_time": 1.6915216751098634} +{"epoch": 0, "iter": 6583, "iter_tflops": 14.482409037795376, "iter_time": 1.4245622711181642, "loss": 0.4240868389606476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.056418948582778, "step_time": 1.1425905418395996} +{"epoch": 0, "iter": 6584, "iter_tflops": 38.121526105758186, "iter_time": 0.5411927490234375, "loss": 0.4730089604854584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53626036081442, "step_time": 0.49670079421997076} +{"epoch": 0, "iter": 6585, "iter_tflops": 15.68777785855208, "iter_time": 0.9060576629638672, "loss": 0.30002933740615845, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 16.840208638418513, "step_time": 0.8440531616210937} +{"epoch": 0, "iter": 6586, "iter_tflops": 23.214857190857234, "iter_time": 0.6122816619873047, "loss": 0.19599035382270813, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 24.71926586377573, "step_time": 0.5750183448791504} +{"epoch": 0, "iter": 6587, "iter_tflops": 25.359181497855676, "iter_time": 0.560508285522461, "loss": 0.2935985028743744, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 27.00443205601494, "step_time": 0.5263592033386231} +{"epoch": 0, "iter": 6588, "iter_tflops": 25.49808852453013, "iter_time": 0.5574547805786133, "loss": 0.21048350632190704, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 27.133646049377813, "step_time": 0.523852611541748} +{"epoch": 0, "iter": 6589, "iter_tflops": 50.51427177118787, "iter_time": 0.4084210815429688, "loss": 0.03733442351222038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.61523329820968, "step_time": 0.37096119689941404} +{"epoch": 0, "iter": 6590, "iter_tflops": 41.17162652975583, "iter_time": 0.5010997924804688, "loss": 0.05483020842075348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41957188530051, "step_time": 0.4542335529327393} +{"epoch": 0, "iter": 6591, "iter_tflops": 41.85332176328586, "iter_time": 0.4929380187988281, "loss": 0.029812432825565338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25433718754565, "step_time": 0.4460358695983886} +{"epoch": 0, "iter": 6592, "iter_tflops": 41.64684458746455, "iter_time": 0.49538191223144534, "loss": 0.029988396912813187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89155155248119, "step_time": 0.4495619087219238} +{"epoch": 0, "iter": 6593, "iter_tflops": 27.42124471295221, "iter_time": 0.7523762588500978, "loss": 0.9783616065979004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.0317182869579, "step_time": 0.7106397666931152} +{"epoch": 0, "iter": 6594, "iter_tflops": 20.45737129612504, "iter_time": 1.0084919128417968, "loss": 0.8506454229354858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.895201355641465, "step_time": 0.9011099395751953} +{"epoch": 0, "iter": 6595, "iter_tflops": 35.09445901343475, "iter_time": 0.5878732452392578, "loss": 0.7442688345909119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.12455870133768, "step_time": 0.5411497001647949} +{"epoch": 0, "iter": 6596, "iter_tflops": 39.34457771307313, "iter_time": 0.5243694229125977, "loss": 0.9202103614807129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87635696537267, "step_time": 0.48117645645141593} +{"epoch": 0, "iter": 6597, "iter_tflops": 35.06808071650669, "iter_time": 0.588315444946289, "loss": 0.028819615021348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.1532616365368, "step_time": 0.5269316692352295} +{"epoch": 0, "iter": 6598, "iter_tflops": 41.08929080074864, "iter_time": 0.5021039085388184, "loss": 0.03175508230924606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71218710498693, "step_time": 0.45132588958740233} +{"epoch": 0, "iter": 6599, "iter_tflops": 42.560725228738406, "iter_time": 0.48474487686157225, "loss": 0.06063820794224739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.879486065278904, "step_time": 0.4400878772735596} +{"epoch": 0, "iter": 6600, "iter_tflops": 44.61199171832248, "iter_time": 0.4624562301635743, "loss": 0.0382966510951519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.105709100061304, "step_time": 0.42013635253906256} +{"epoch": 0, "iter": 6601, "iter_tflops": 18.268098668436416, "iter_time": 1.129350891113281, "loss": 0.8254261612892151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38907914665005, "step_time": 1.0640574188232423} +{"epoch": 0, "iter": 6602, "iter_tflops": 14.1083157712062, "iter_time": 1.4623356781005858, "loss": 0.9674125909805298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.884218493226975, "step_time": 1.2219158096313476} +{"epoch": 0, "iter": 6603, "iter_tflops": 45.06383509346686, "iter_time": 0.4578193016052246, "loss": 0.867080807685852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73341863664981, "step_time": 0.423345911026001} +{"epoch": 0, "iter": 6604, "iter_tflops": 42.48460376687328, "iter_time": 0.48561341476440434, "loss": 0.6816641688346863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.700735304522055, "step_time": 0.45143898391723636} +{"epoch": 0, "iter": 6605, "iter_tflops": 34.25318410277859, "iter_time": 0.6023116989135743, "loss": 0.5740640759468079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.774498248273225, "step_time": 0.561016315460205} +{"epoch": 0, "iter": 6606, "iter_tflops": 10.162911794708704, "iter_time": 2.0300376434326175, "loss": 0.38394877314567566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.584447931776658, "step_time": 1.7809302291870117} +{"epoch": 0, "iter": 6607, "iter_tflops": 14.843356875844659, "iter_time": 1.3899210052490232, "loss": 0.4988011419773102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.03125321481685, "step_time": 1.211366729736328} +{"epoch": 0, "iter": 6608, "iter_tflops": 23.342428046864143, "iter_time": 0.8838452224731446, "loss": 0.5266976952552795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.45392455755157, "step_time": 0.6357041187286376} +{"epoch": 0, "iter": 6609, "iter_tflops": 22.907448424280457, "iter_time": 0.7043687286376953, "loss": 0.37346816062927246, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 24.341702474760254, "step_time": 0.6628661384582518} +{"epoch": 0, "iter": 6610, "iter_tflops": 9.769308212926628, "iter_time": 1.651630798339844, "loss": 0.32474061846733093, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 11.462022050028791, "step_time": 1.4077176132202147} +{"epoch": 0, "iter": 6611, "iter_tflops": 29.3320503994094, "iter_time": 0.5500907745361329, "loss": 0.34034767746925354, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.316011178706205, "step_time": 0.5152409172058106} +{"epoch": 0, "iter": 6612, "iter_tflops": 27.865285116318155, "iter_time": 0.5790463027954103, "loss": 0.3603787124156952, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 29.744231287217442, "step_time": 0.5424678878784179} +{"epoch": 0, "iter": 6613, "iter_tflops": 21.60773449130616, "iter_time": 0.6238965225219727, "loss": 0.027740279212594032, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 23.089124256177147, "step_time": 0.5838675498962402} +{"epoch": 0, "iter": 6614, "iter_tflops": 7.099466929980867, "iter_time": 1.8988736114501954, "loss": 0.06623627990484238, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 8.5038466971364, "step_time": 1.5852814483642579} +{"epoch": 0, "iter": 6615, "iter_tflops": 6.742378792641262, "iter_time": 1.9994412689208985, "loss": 0.021624214947223663, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 8.35844764443404, "step_time": 1.6128581504821777} +{"epoch": 0, "iter": 6616, "iter_tflops": 14.464782332239084, "iter_time": 0.9319870910644532, "loss": 0.07193226367235184, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 17.729439065945492, "step_time": 0.7603732051849366} +{"epoch": 0, "iter": 6617, "iter_tflops": 22.93700531920271, "iter_time": 0.6588500213623047, "loss": 0.21241481602191925, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.547001092628786, "step_time": 0.6156371765136719} +{"epoch": 0, "iter": 6618, "iter_tflops": 15.0079737404105, "iter_time": 1.0069344940185547, "loss": 0.16706007719039917, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 16.53052825954341, "step_time": 0.9141901702880859} +{"epoch": 0, "iter": 6619, "iter_tflops": 26.608850780249533, "iter_time": 0.5679330749511718, "loss": 0.22338984906673431, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.361564343109315, "step_time": 0.5328354339599609} +{"epoch": 0, "iter": 6620, "iter_tflops": 25.904990666815273, "iter_time": 0.583364288330078, "loss": 0.22032177448272705, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.44864613010014, "step_time": 0.5505570793151856} +{"epoch": 0, "iter": 6621, "iter_tflops": 21.446539805151186, "iter_time": 0.9619777221679687, "loss": 0.7806280255317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.691460918590042, "step_time": 0.9092007598876953} +{"epoch": 0, "iter": 6622, "iter_tflops": 14.462065509085456, "iter_time": 1.426566177368164, "loss": 0.6419537663459778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.158688439804244, "step_time": 1.20237007522583} +{"epoch": 0, "iter": 6623, "iter_tflops": 37.08428187405276, "iter_time": 0.5563298645019531, "loss": 0.9292637705802917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.38033186117924, "step_time": 0.5109193649291992} +{"epoch": 0, "iter": 6624, "iter_tflops": 36.93265589598795, "iter_time": 0.5586138610839844, "loss": 0.7041587829589844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.26554759376037, "step_time": 0.5123758335113526} +{"epoch": 0, "iter": 6625, "iter_tflops": 1.314480989862168, "iter_time": 1.072814956665039, "loss": 0.5657727122306824, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.4151727497112785, "step_time": 0.996482490539551} +{"epoch": 0, "iter": 6626, "iter_tflops": 1.2441550672878927, "iter_time": 1.1334558715820313, "loss": 0.5127673149108887, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.6170137916582399, "step_time": 0.8720982303619385} +{"epoch": 0, "iter": 6627, "iter_tflops": 2.656604144201923, "iter_time": 0.5308261184692382, "loss": 0.5351425409317017, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.9177944747409414, "step_time": 0.4833084983825684} +{"epoch": 0, "iter": 6628, "iter_tflops": 2.861894096458707, "iter_time": 0.49274879455566406, "loss": 0.3105818033218384, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.125039806988369, "step_time": 0.45125660896301273} +{"epoch": 0, "iter": 6629, "iter_tflops": 22.562754075778614, "iter_time": 0.9143871994018555, "loss": 0.026775769889354706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.0826139124648, "step_time": 0.8566799926757811} +{"epoch": 0, "iter": 6630, "iter_tflops": 12.722410648830722, "iter_time": 1.6216339874267578, "loss": 0.017284534871578217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.624303410520543, "step_time": 1.3204488525390623} +{"epoch": 0, "iter": 6631, "iter_tflops": 11.542387976180484, "iter_time": 1.787419860839844, "loss": 0.011476459912955761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.297326289633645, "step_time": 1.5515219421386717} +{"epoch": 0, "iter": 6632, "iter_tflops": 22.13085595581961, "iter_time": 0.9322320632934571, "loss": 0.012747503817081451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.904074243262308, "step_time": 0.7668390045166016} +{"epoch": 0, "iter": 6633, "iter_tflops": 24.80271365688666, "iter_time": 0.6555039901733399, "loss": 0.14898091554641724, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.380581305292875, "step_time": 0.6162971763610839} +{"epoch": 0, "iter": 6634, "iter_tflops": 10.738219597310144, "iter_time": 1.514057113647461, "loss": 0.2584454417228699, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 12.606123278242904, "step_time": 1.2897127380371094} +{"epoch": 0, "iter": 6635, "iter_tflops": 23.461248809811533, "iter_time": 0.6929843292236327, "loss": 0.21742264926433563, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 25.244019826124422, "step_time": 0.6440447235107422} +{"epoch": 0, "iter": 6636, "iter_tflops": 27.58870568630091, "iter_time": 0.5893091888427734, "loss": 0.21594710648059845, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 29.51102640732392, "step_time": 0.5509221382141113} +{"epoch": 0, "iter": 6637, "iter_tflops": 5.534297929629896, "iter_time": 1.1337483978271485, "loss": 0.01730191893875599, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 5.981357486246704, "step_time": 1.0490095977783203} +{"epoch": 0, "iter": 6638, "iter_tflops": 4.488870299699831, "iter_time": 1.397790756225586, "loss": 0.012479068711400032, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 5.718525331114233, "step_time": 1.0972236804962159} +{"epoch": 0, "iter": 6639, "iter_tflops": 12.506821117042717, "iter_time": 0.5016863479614257, "loss": 0.01782599836587906, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 13.823290133213964, "step_time": 0.45390795898437497} +{"epoch": 0, "iter": 6640, "iter_tflops": 13.544500457951807, "iter_time": 0.4632508544921875, "loss": 0.0241694413125515, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 15.031648663105711, "step_time": 0.417419376373291} +{"epoch": 0, "iter": 6641, "iter_tflops": 18.809724856734828, "iter_time": 1.0968312225341796, "loss": 0.14354299008846283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.908725972568828, "step_time": 1.036283966064453} +{"epoch": 0, "iter": 6642, "iter_tflops": 19.269419246386423, "iter_time": 1.0706650390624999, "loss": 0.1619146466255188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.423435079579175, "step_time": 0.9200683765411377} +{"epoch": 0, "iter": 6643, "iter_tflops": 48.52905279614481, "iter_time": 0.4251287078857422, "loss": 0.11588897556066513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.08947419250737, "step_time": 0.38860986709594725} +{"epoch": 0, "iter": 6644, "iter_tflops": 52.715354241579995, "iter_time": 0.3913678245544433, "loss": 0.11706367135047913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.48234297569647, "step_time": 0.35891184043884283} +{"epoch": 0, "iter": 6645, "iter_tflops": 32.926855694305175, "iter_time": 0.6265734481811525, "loss": 0.09199819713830948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.113240841753345, "step_time": 0.5875587959289552} +{"epoch": 0, "iter": 6646, "iter_tflops": 13.233813822899831, "iter_time": 1.5589680938720705, "loss": 0.0662752315402031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.273433451683502, "step_time": 1.1943828983306883} +{"epoch": 0, "iter": 6647, "iter_tflops": 48.920432267731236, "iter_time": 0.4217275390625, "loss": 0.0845978781580925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38579065017646, "step_time": 0.38645289802551264} +{"epoch": 0, "iter": 6648, "iter_tflops": 54.764085769855704, "iter_time": 0.3767267036437988, "loss": 0.08398637920618057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.620072297587534, "step_time": 0.34604274559021} +{"epoch": 0, "iter": 6649, "iter_tflops": 32.67927486432789, "iter_time": 0.6313204193115234, "loss": 0.0026022957172244787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.02853830687793, "step_time": 0.5889795722961426} +{"epoch": 0, "iter": 6650, "iter_tflops": 9.738282201282672, "iter_time": 2.118555725097656, "loss": 0.0023076371289789677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.528691467372436, "step_time": 1.9595116424560544} +{"epoch": 0, "iter": 6651, "iter_tflops": 11.988845993572527, "iter_time": 1.7208573303222656, "loss": 0.0003214942989870906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.535927139681279, "step_time": 1.4193173446655274} +{"epoch": 0, "iter": 6652, "iter_tflops": 40.82636852648039, "iter_time": 0.5053374633789062, "loss": 0.02576153166592121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15733486202017, "step_time": 0.3881137676239014} +{"epoch": 0, "iter": 6653, "iter_tflops": 23.086351558515336, "iter_time": 0.6758467407226563, "loss": 0.14495782554149628, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.60266344489179, "step_time": 0.6341929397583007} +{"epoch": 0, "iter": 6654, "iter_tflops": 14.404010392612525, "iter_time": 1.083228561401367, "loss": 0.1731480062007904, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.77209557560487, "step_time": 0.9302853889465332} +{"epoch": 0, "iter": 6655, "iter_tflops": 23.18020743848658, "iter_time": 0.6731102600097656, "loss": 0.34090644121170044, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.029683722742156, "step_time": 0.6233732566833496} +{"epoch": 0, "iter": 6656, "iter_tflops": 23.823090513082736, "iter_time": 0.6549458999633788, "loss": 0.3608270585536957, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.578948249794202, "step_time": 0.6099873733520509} +{"epoch": 0, "iter": 6657, "iter_tflops": 32.45916655083276, "iter_time": 0.6356014556884765, "loss": 0.20833128690719604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.904713891280515, "step_time": 0.5746068210601807} +{"epoch": 0, "iter": 6658, "iter_tflops": 40.70282501912418, "iter_time": 0.5068712921142577, "loss": 0.2275821566581726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5811864924722, "step_time": 0.4627757835388184} +{"epoch": 0, "iter": 6659, "iter_tflops": 42.271198053906915, "iter_time": 0.48806502914428707, "loss": 0.15863001346588135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50269264818197, "step_time": 0.4436537399291992} +{"epoch": 0, "iter": 6660, "iter_tflops": 41.80131465398674, "iter_time": 0.49355130767822264, "loss": 0.19407206773757935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9305557088784, "step_time": 0.4491801414489746} +{"epoch": 0, "iter": 6661, "iter_tflops": 31.32371109985084, "iter_time": 0.6586414184570312, "loss": 0.557941198348999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.855841702636766, "step_time": 0.5918977279663086} +{"epoch": 0, "iter": 6662, "iter_tflops": 39.02065869040582, "iter_time": 0.5287223281860352, "loss": 0.5609603524208069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.655822174272444, "step_time": 0.47258515548706054} +{"epoch": 0, "iter": 6663, "iter_tflops": 44.42764156793945, "iter_time": 0.4643751678466797, "loss": 0.40434756875038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.58011458021233, "step_time": 0.4246818618774414} +{"epoch": 0, "iter": 6664, "iter_tflops": 39.067923640352724, "iter_time": 0.5280826721191406, "loss": 0.5549066662788391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65228570716578, "step_time": 0.483704288482666} +{"epoch": 0, "iter": 6665, "iter_tflops": 18.570808224321265, "iter_time": 0.9884094238281251, "loss": 0.1723407804965973, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 20.267906109721395, "step_time": 0.9056466789245605} +{"epoch": 0, "iter": 6666, "iter_tflops": 32.13565961870338, "iter_time": 0.5711898269653319, "loss": 0.09204228967428207, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 39.89328755779219, "step_time": 0.46011655044555666} +{"epoch": 0, "iter": 6667, "iter_tflops": 47.47194742344342, "iter_time": 0.38666123580932615, "loss": 0.12477003782987595, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 51.68981361469589, "step_time": 0.3551098480224609} +{"epoch": 0, "iter": 6668, "iter_tflops": 48.17508769939373, "iter_time": 0.38101771545410157, "loss": 0.1850607544183731, "lr": 3e-05, "seqlen": 7312.0, "step_tflops": 52.31672182182925, "step_time": 0.3508545875549316} +{"epoch": 0, "iter": 6669, "iter_tflops": 19.67506105031154, "iter_time": 1.0485910797119142, "loss": 0.8153002858161926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.554135778548165, "step_time": 1.0037441482543947} +{"epoch": 0, "iter": 6670, "iter_tflops": 22.933664811876245, "iter_time": 0.8995986328125, "loss": 0.7783705592155457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.88850435848302, "step_time": 0.7397705249786377} +{"epoch": 0, "iter": 6671, "iter_tflops": 43.83421809112267, "iter_time": 0.4706618347167969, "loss": 0.6819913387298584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.925723196346446, "step_time": 0.4396542472839355} +{"epoch": 0, "iter": 6672, "iter_tflops": 48.500216813407, "iter_time": 0.42538146972656254, "loss": 0.7500671148300171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40043772845158, "step_time": 0.39371986961364747} +{"epoch": 0, "iter": 6673, "iter_tflops": 15.052399564002867, "iter_time": 0.7018226470947265, "loss": 0.00720352353528142, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 16.081833109067823, "step_time": 0.6568974342346191} +{"epoch": 0, "iter": 6674, "iter_tflops": 6.548465242797366, "iter_time": 1.613219970703125, "loss": 0.008984417654573917, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 8.300558004933194, "step_time": 1.272699365615845} +{"epoch": 0, "iter": 6675, "iter_tflops": 28.910877023398935, "iter_time": 0.36540278244018554, "loss": 0.00496088620275259, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 32.275831671778214, "step_time": 0.3273072872161865} +{"epoch": 0, "iter": 6676, "iter_tflops": 28.762954385825626, "iter_time": 0.367281982421875, "loss": 0.0018384603317826986, "lr": 3e-05, "seqlen": 4256.0, "step_tflops": 31.48206457622041, "step_time": 0.33555978775024414} +{"epoch": 0, "iter": 6677, "iter_tflops": 37.089105235485135, "iter_time": 0.5562575149536133, "loss": 0.5718408226966858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.873698455961666, "step_time": 0.5174110832214356} +{"epoch": 0, "iter": 6678, "iter_tflops": 8.641753902870224, "iter_time": 2.3873734130859376, "loss": 0.7455072999000549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.312618124210559, "step_time": 1.8237240295410158} +{"epoch": 0, "iter": 6679, "iter_tflops": 9.700412619863537, "iter_time": 2.126826385498047, "loss": 0.6912911534309387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.247276599194823, "step_time": 1.8343190307617188} +{"epoch": 0, "iter": 6680, "iter_tflops": 42.124835005802154, "iter_time": 0.48976081466674803, "loss": 0.737280547618866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64072017085869, "step_time": 0.45203260231018066} +{"epoch": 0, "iter": 6681, "iter_tflops": 19.132172981012438, "iter_time": 0.7898761138916016, "loss": 0.31340521574020386, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 20.17738115404747, "step_time": 0.7489597549438477} +{"epoch": 0, "iter": 6682, "iter_tflops": 8.708335254563222, "iter_time": 1.7353542327880862, "loss": 0.31922638416290283, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 10.212354988986016, "step_time": 1.4797807617187502} +{"epoch": 0, "iter": 6683, "iter_tflops": 22.79979015246874, "iter_time": 0.6628151550292969, "loss": 0.18641354143619537, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.559751998548904, "step_time": 0.6153175506591797} +{"epoch": 0, "iter": 6684, "iter_tflops": 22.396670993423665, "iter_time": 0.6747452087402345, "loss": 0.30390384793281555, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.101580841272362, "step_time": 0.6270147399902344} +{"epoch": 0, "iter": 6685, "iter_tflops": 27.529464047465133, "iter_time": 0.7494186401367188, "loss": 0.7595778107643127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.856001039263457, "step_time": 0.6910199890136718} +{"epoch": 0, "iter": 6686, "iter_tflops": 10.35815132221472, "iter_time": 1.9917737121582029, "loss": 0.7942348718643188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.599332270096133, "step_time": 1.7786449279785157} +{"epoch": 0, "iter": 6687, "iter_tflops": 12.985891083741922, "iter_time": 1.5887314453124999, "loss": 0.9130052924156189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.864174175743997, "step_time": 1.3879744186401368} +{"epoch": 0, "iter": 6688, "iter_tflops": 25.12499474985875, "iter_time": 0.8211382217407227, "loss": 0.8051700592041016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.80536652041133, "step_time": 0.7419824333190919} +{"epoch": 0, "iter": 6689, "iter_tflops": 16.075368123181544, "iter_time": 0.863933349609375, "loss": 0.2628403306007385, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 16.916987625305502, "step_time": 0.8209526977539063} +{"epoch": 0, "iter": 6690, "iter_tflops": 10.059559723353095, "iter_time": 1.3805819549560545, "loss": 0.30344337224960327, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 11.234594555589833, "step_time": 1.236185832977295} +{"epoch": 0, "iter": 6691, "iter_tflops": 23.305650504238045, "iter_time": 0.5959089889526368, "loss": 0.2763449549674988, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 24.838119600785795, "step_time": 0.5591424331665039} +{"epoch": 0, "iter": 6692, "iter_tflops": 24.64196445941258, "iter_time": 0.5635933227539063, "loss": 0.15571536123752594, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 26.111047430727194, "step_time": 0.5318839340209961} +{"epoch": 0, "iter": 6693, "iter_tflops": 30.74656870761945, "iter_time": 0.6710047454833984, "loss": 0.2627864480018616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.764642532908105, "step_time": 0.6296755256652833} +{"epoch": 0, "iter": 6694, "iter_tflops": 33.95230403791742, "iter_time": 0.6076492919921874, "loss": 0.32726728916168213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.3206011288858, "step_time": 0.48749528503417966} +{"epoch": 0, "iter": 6695, "iter_tflops": 43.73288480907867, "iter_time": 0.4717524032592774, "loss": 0.22716684639453888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37691983797739, "step_time": 0.4354671764373779} +{"epoch": 0, "iter": 6696, "iter_tflops": 49.33497048493486, "iter_time": 0.41818396377563477, "loss": 0.4020031690597534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.73507740246795, "step_time": 0.3839408912658691} +{"epoch": 0, "iter": 6697, "iter_tflops": 45.7610418677132, "iter_time": 0.450844051361084, "loss": 0.00274264975450933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.858961507841876, "step_time": 0.4137890739440918} +{"epoch": 0, "iter": 6698, "iter_tflops": 11.183806549307702, "iter_time": 1.8447291107177735, "loss": 0.009725422598421574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.237270887798267, "step_time": 1.6859227600097655} +{"epoch": 0, "iter": 6699, "iter_tflops": 13.790096514309875, "iter_time": 1.4960804290771486, "loss": 0.018021196126937866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.401090511239786, "step_time": 1.2579098625183105} +{"epoch": 0, "iter": 6700, "iter_tflops": 36.32287123508763, "iter_time": 0.567991813659668, "loss": 0.0026333066634833813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.596858733085206, "step_time": 0.5081943321228027} +{"epoch": 0, "iter": 6701, "iter_tflops": 20.909876806383725, "iter_time": 0.7520675048828125, "loss": 0.2218242585659027, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 22.38733400175866, "step_time": 0.7024346389770508} +{"epoch": 0, "iter": 6702, "iter_tflops": 23.282783410822585, "iter_time": 0.6754191970825196, "loss": 0.2342958301305771, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.103821099689014, "step_time": 0.5595551872253418} +{"epoch": 0, "iter": 6703, "iter_tflops": 27.171527864157337, "iter_time": 0.57875431060791, "loss": 0.12753036618232727, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.833953056591323, "step_time": 0.545386157989502} +{"epoch": 0, "iter": 6704, "iter_tflops": 28.197082094106044, "iter_time": 0.557704475402832, "loss": 0.32331299781799316, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.872782920882468, "step_time": 0.5264202842712402} +{"epoch": 0, "iter": 6705, "iter_tflops": 44.55500488620753, "iter_time": 0.463047721862793, "loss": 0.15742307901382446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71917966655355, "step_time": 0.42346964073181154} +{"epoch": 0, "iter": 6706, "iter_tflops": 9.360412860541132, "iter_time": 2.2040794372558596, "loss": 0.24591831862926483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.74049750877961, "step_time": 1.7572588806152347} +{"epoch": 0, "iter": 6707, "iter_tflops": 13.487825555437588, "iter_time": 1.5296085662841796, "loss": 0.17574428021907806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.751941246199925, "step_time": 1.309749267578125} +{"epoch": 0, "iter": 6708, "iter_tflops": 17.18995385341374, "iter_time": 1.2001831817626953, "loss": 0.18835850059986115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.824946863588526, "step_time": 0.990691291809082} +{"epoch": 0, "iter": 6709, "iter_tflops": 22.399816310408234, "iter_time": 0.7111846542358399, "loss": 0.24619844555854797, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 24.18719526832142, "step_time": 0.6586297187805177} +{"epoch": 0, "iter": 6710, "iter_tflops": 25.93676453540536, "iter_time": 0.6142017288208008, "loss": 0.22937461733818054, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.676951739488302, "step_time": 0.5755838203430177} +{"epoch": 0, "iter": 6711, "iter_tflops": 29.210032028428344, "iter_time": 0.5453744659423828, "loss": 0.23399291932582855, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 31.16841616087508, "step_time": 0.5111073188781738} +{"epoch": 0, "iter": 6712, "iter_tflops": 27.32781779789099, "iter_time": 0.5829373474121093, "loss": 0.15056964755058289, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.001821367531658, "step_time": 0.5492898330688477} +{"epoch": 0, "iter": 6713, "iter_tflops": 30.233187486086543, "iter_time": 0.6823988876342773, "loss": 0.8101838827133179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.13058759150192, "step_time": 0.6421013450622558} +{"epoch": 0, "iter": 6714, "iter_tflops": 8.127355400385861, "iter_time": 2.538475616455078, "loss": 0.7073407173156738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.490406846401669, "step_time": 2.173889259338379} +{"epoch": 0, "iter": 6715, "iter_tflops": 14.14803141884936, "iter_time": 1.4582306823730469, "loss": 0.8131706714630127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.942083689710774, "step_time": 1.294127788543701} +{"epoch": 0, "iter": 6716, "iter_tflops": 35.62499192835111, "iter_time": 0.5791185455322266, "loss": 0.6379746794700623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75554878114425, "step_time": 0.5323390884399414} +{"epoch": 0, "iter": 6717, "iter_tflops": 22.092970606056888, "iter_time": 0.700678726196289, "loss": 0.32932648062705994, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.028311628170112, "step_time": 0.6442431221008301} +{"epoch": 0, "iter": 6718, "iter_tflops": 22.964700852982528, "iter_time": 0.6740812606811523, "loss": 0.21313950419425964, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.630028088672596, "step_time": 0.6285041351318359} +{"epoch": 0, "iter": 6719, "iter_tflops": 24.30444253771256, "iter_time": 0.6369236602783204, "loss": 0.2907320559024811, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.14782758338398, "step_time": 0.5920214385986328} +{"epoch": 0, "iter": 6720, "iter_tflops": 22.622375811616614, "iter_time": 0.6842815551757813, "loss": 0.3722638189792633, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.294562805756406, "step_time": 0.637182674407959} +{"epoch": 0, "iter": 6721, "iter_tflops": 37.47656710123853, "iter_time": 0.5505064926147462, "loss": 0.17134864628314972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.763511980522146, "step_time": 0.5061166839599609} +{"epoch": 0, "iter": 6722, "iter_tflops": 44.547607392482064, "iter_time": 0.4631246147155762, "loss": 0.2245638519525528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49141044867166, "step_time": 0.41686210441589355} +{"epoch": 0, "iter": 6723, "iter_tflops": 51.12180614390571, "iter_time": 0.4035673828125, "loss": 0.10583861172199249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.60886870255148, "step_time": 0.37100365447998046} +{"epoch": 0, "iter": 6724, "iter_tflops": 50.491573346588254, "iter_time": 0.4086046867370605, "loss": 0.13908956944942474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.15034929953087, "step_time": 0.37408817481994633} +{"epoch": 0, "iter": 6725, "iter_tflops": 32.44173530402265, "iter_time": 0.635942970275879, "loss": 0.8163361549377441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.71408134076395, "step_time": 0.5943148345947266} +{"epoch": 0, "iter": 6726, "iter_tflops": 33.07111971569404, "iter_time": 0.6238401870727539, "loss": 0.8842871785163879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.395875331705525, "step_time": 0.5668525161743164} +{"epoch": 0, "iter": 6727, "iter_tflops": 36.67433096073914, "iter_time": 0.5625485992431641, "loss": 0.8742105960845947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6777572900625, "step_time": 0.5199662208557129} +{"epoch": 0, "iter": 6728, "iter_tflops": 34.76453179579581, "iter_time": 0.5934523620605469, "loss": 0.9305437207221985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72663761683647, "step_time": 0.5468574676513673} +{"epoch": 0, "iter": 6729, "iter_tflops": 35.89620595335492, "iter_time": 0.5747430114746094, "loss": 0.23734527826309204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.92733030062051, "step_time": 0.5167160778045654} +{"epoch": 0, "iter": 6730, "iter_tflops": 36.56498171346533, "iter_time": 0.5642309265136718, "loss": 0.20865774154663086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99783040125524, "step_time": 0.5158053150177002} +{"epoch": 0, "iter": 6731, "iter_tflops": 47.477578823173374, "iter_time": 0.43454392623901367, "loss": 0.19496530294418335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68004076447506, "step_time": 0.3992081508636474} +{"epoch": 0, "iter": 6732, "iter_tflops": 47.94058922088961, "iter_time": 0.4303470993041993, "loss": 0.19127526879310608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.88461384696526, "step_time": 0.3976341342926025} +{"epoch": 0, "iter": 6733, "iter_tflops": 21.614796638693473, "iter_time": 0.9544893646240233, "loss": 0.5190754532814026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.592496482695882, "step_time": 0.9131834335327148} +{"epoch": 0, "iter": 6734, "iter_tflops": 23.566542324094627, "iter_time": 0.8754399871826173, "loss": 0.5311837196350098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.636191263162235, "step_time": 0.7204552211761475} +{"epoch": 0, "iter": 6735, "iter_tflops": 47.95599927395607, "iter_time": 0.43020881271362305, "loss": 0.5893679261207581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.363940456235994, "step_time": 0.39399428939819336} +{"epoch": 0, "iter": 6736, "iter_tflops": 48.80681538085393, "iter_time": 0.42270927429199223, "loss": 0.5288447737693787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.55280099585592, "step_time": 0.3925783805847168} +{"epoch": 0, "iter": 6737, "iter_tflops": 36.87540981001245, "iter_time": 0.5594810638427734, "loss": 0.8542091846466064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.5358351366103, "step_time": 0.521832748413086} +{"epoch": 0, "iter": 6738, "iter_tflops": 9.382516375630022, "iter_time": 2.1988870239257814, "loss": 0.5699633359909058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.208843520614613, "step_time": 1.5619152030944823} +{"epoch": 0, "iter": 6739, "iter_tflops": 11.480261657515609, "iter_time": 1.7970926208496096, "loss": 0.6042945981025696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.861978756023897, "step_time": 1.3881794509887695} +{"epoch": 0, "iter": 6740, "iter_tflops": 27.511864518301937, "iter_time": 0.7498980484008789, "loss": 0.9148658514022827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.36662292586403, "step_time": 0.6003235626220702} +{"epoch": 0, "iter": 6741, "iter_tflops": 19.89287664959713, "iter_time": 0.8399788208007812, "loss": 0.24544723331928253, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 21.49188970392828, "step_time": 0.7774837532043457} +{"epoch": 0, "iter": 6742, "iter_tflops": 26.649627301483235, "iter_time": 0.6270104598999022, "loss": 0.19341132044792175, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 28.686557700062806, "step_time": 0.5824886779785157} +{"epoch": 0, "iter": 6743, "iter_tflops": 24.97704583437504, "iter_time": 0.6689980545043945, "loss": 0.3038480281829834, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.926371413726816, "step_time": 0.6205661659240723} +{"epoch": 0, "iter": 6744, "iter_tflops": 24.469390108998212, "iter_time": 0.6828774642944336, "loss": 0.21990083158016205, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 26.392580725791575, "step_time": 0.6331171340942383} +{"epoch": 0, "iter": 6745, "iter_tflops": 21.555319953379435, "iter_time": 0.957123046875, "loss": 0.6069191694259644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.771896390866193, "step_time": 0.9059892578125} +{"epoch": 0, "iter": 6746, "iter_tflops": 8.379471189720743, "iter_time": 2.4620997009277343, "loss": 0.7374165058135986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.76648289839892, "step_time": 2.1124384002685543} +{"epoch": 0, "iter": 6747, "iter_tflops": 14.203393626956187, "iter_time": 1.4525467681884763, "loss": 0.4335036277770996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.360994449839843, "step_time": 1.1236370429992677} +{"epoch": 0, "iter": 6748, "iter_tflops": 50.734173915146144, "iter_time": 0.4066508216857911, "loss": 0.6048576235771179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.039252368232624, "step_time": 0.3748432731628418} +{"epoch": 0, "iter": 6749, "iter_tflops": 19.98424688780136, "iter_time": 0.8793319244384765, "loss": 0.2537817060947418, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 21.159269742496175, "step_time": 0.830500602722168} +{"epoch": 0, "iter": 6750, "iter_tflops": 12.266889016964727, "iter_time": 1.432538131713867, "loss": 0.21039818227291107, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 16.43373293952864, "step_time": 1.0693119049072266} +{"epoch": 0, "iter": 6751, "iter_tflops": 27.02864070483729, "iter_time": 0.6501542739868165, "loss": 0.328016459941864, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 29.188423004917535, "step_time": 0.602046443939209} +{"epoch": 0, "iter": 6752, "iter_tflops": 29.871084522769557, "iter_time": 0.5882875213623047, "loss": 0.2584361433982849, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 31.890688960935606, "step_time": 0.5510318794250488} +{"epoch": 0, "iter": 6753, "iter_tflops": 17.414209453145517, "iter_time": 1.1847275390624998, "loss": 0.32110899686813354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.758627888847997, "step_time": 1.0998189010620116} +{"epoch": 0, "iter": 6754, "iter_tflops": 22.47362972018961, "iter_time": 0.9180134124755859, "loss": 0.2758583724498749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.27139019199761, "step_time": 0.8163814239501953} +{"epoch": 0, "iter": 6755, "iter_tflops": 50.812047599180424, "iter_time": 0.40602759552001955, "loss": 0.256244421005249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.172450917986595, "step_time": 0.37393831825256346} +{"epoch": 0, "iter": 6756, "iter_tflops": 48.52824852746262, "iter_time": 0.42513575363159184, "loss": 0.13077162206172943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.44530760151617, "step_time": 0.393383020401001} +{"epoch": 0, "iter": 6757, "iter_tflops": 44.43450641671788, "iter_time": 0.4643034248352051, "loss": 0.12437080591917038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.73811776426403, "step_time": 0.42330509376525877} +{"epoch": 0, "iter": 6758, "iter_tflops": 44.192844007665066, "iter_time": 0.4668424034118653, "loss": 0.12901704013347626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82503143493934, "step_time": 0.4313869304656982} +{"epoch": 0, "iter": 6759, "iter_tflops": 50.61224994739686, "iter_time": 0.40763043594360354, "loss": 0.1582614928483963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.85569416969249, "step_time": 0.37609757423400886} +{"epoch": 0, "iter": 6760, "iter_tflops": 54.312579243440354, "iter_time": 0.3798584747314453, "loss": 0.11433973163366318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.094754671707534, "step_time": 0.34911886215209964} +{"epoch": 0, "iter": 6761, "iter_tflops": 22.144995896509332, "iter_time": 0.9316368179321289, "loss": 0.5207510590553284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.145557856151726, "step_time": 0.8913629837036132} +{"epoch": 0, "iter": 6762, "iter_tflops": 13.785640690335379, "iter_time": 1.496563995361328, "loss": 0.5161557197570801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.22336117057766, "step_time": 1.0732302913665772} +{"epoch": 0, "iter": 6763, "iter_tflops": 40.66974520217807, "iter_time": 0.5072835693359374, "loss": 0.5345259308815002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68861844561722, "step_time": 0.46166326522827145} +{"epoch": 0, "iter": 6764, "iter_tflops": 43.096164708044654, "iter_time": 0.47872226333618156, "loss": 0.34862083196640015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99086832919306, "step_time": 0.43904473876953126} +{"epoch": 0, "iter": 6765, "iter_tflops": 20.33842758404035, "iter_time": 1.0143898010253907, "loss": 0.29216086864471436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.51925310293037, "step_time": 0.9587272109985352} +{"epoch": 0, "iter": 6766, "iter_tflops": 10.37464316740813, "iter_time": 1.9886075286865235, "loss": 0.29475313425064087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.01325945808721, "step_time": 1.4722551574707032} +{"epoch": 0, "iter": 6767, "iter_tflops": 13.637111732817514, "iter_time": 1.5128638610839844, "loss": 0.22351588308811188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.259459587093918, "step_time": 1.352019931793213} +{"epoch": 0, "iter": 6768, "iter_tflops": 29.57898361267985, "iter_time": 0.6974916305541993, "loss": 0.2677209973335266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.72079617904057, "step_time": 0.546942153930664} +{"epoch": 0, "iter": 6769, "iter_tflops": 12.160541115309227, "iter_time": 1.212478988647461, "loss": 0.2637970447540283, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 13.057389319715842, "step_time": 1.129199737548828} +{"epoch": 0, "iter": 6770, "iter_tflops": 10.909061114500439, "iter_time": 1.351573745727539, "loss": 0.34222474694252014, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.61569141331915, "step_time": 1.1687350387573243} +{"epoch": 0, "iter": 6771, "iter_tflops": 25.447132823927525, "iter_time": 0.5794130401611328, "loss": 0.12199720740318298, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 27.349912496723295, "step_time": 0.5391022949218751} +{"epoch": 0, "iter": 6772, "iter_tflops": 27.543676941471638, "iter_time": 0.535309814453125, "loss": 0.36422935128211975, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 29.226108258962476, "step_time": 0.5044941482543945} +{"epoch": 0, "iter": 6773, "iter_tflops": 27.042529858632125, "iter_time": 0.7629128494262696, "loss": 0.06284339725971222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.536642413403108, "step_time": 0.7229684982299804} +{"epoch": 0, "iter": 6774, "iter_tflops": 18.02544497246979, "iter_time": 1.1445539093017576, "loss": 0.03913361579179764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.087383369149965, "step_time": 0.8565103645324706} +{"epoch": 0, "iter": 6775, "iter_tflops": 42.26178865823758, "iter_time": 0.4881736946105957, "loss": 0.10324627161026001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.527572047740854, "step_time": 0.4434165077209472} +{"epoch": 0, "iter": 6776, "iter_tflops": 46.58093702377278, "iter_time": 0.4429085121154785, "loss": 0.09373936802148819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08611536896506, "step_time": 0.4038493309020996} +{"epoch": 0, "iter": 6777, "iter_tflops": 15.801750164129855, "iter_time": 1.3056207885742186, "loss": 0.12621144950389862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.230954897382723, "step_time": 1.1973273468017578} +{"epoch": 0, "iter": 6778, "iter_tflops": 18.04657074454964, "iter_time": 1.143214065551758, "loss": 0.13333868980407715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.040583682659204, "step_time": 0.9360502338409424} +{"epoch": 0, "iter": 6779, "iter_tflops": 49.57645716574764, "iter_time": 0.4161469917297363, "loss": 0.15218451619148254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.81157544950741, "step_time": 0.3833950843811035} +{"epoch": 0, "iter": 6780, "iter_tflops": 44.35647446923487, "iter_time": 0.4651202278137207, "loss": 0.08630441874265671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12215556011996, "step_time": 0.4287233867645263} +{"epoch": 0, "iter": 6781, "iter_tflops": 37.109049818042465, "iter_time": 0.5559585494995117, "loss": 0.22843943536281586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0461850037259, "step_time": 0.5151824951171875} +{"epoch": 0, "iter": 6782, "iter_tflops": 12.181666718569033, "iter_time": 1.6936182861328126, "loss": 0.16303017735481262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.172218101481873, "step_time": 1.4557420272827148} +{"epoch": 0, "iter": 6783, "iter_tflops": 14.669833533100054, "iter_time": 1.4063618011474608, "loss": 0.23390717804431915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.80614275389029, "step_time": 1.15865034866333} +{"epoch": 0, "iter": 6784, "iter_tflops": 46.59222213071141, "iter_time": 0.4428012351989745, "loss": 0.1677030622959137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.760988637476835, "step_time": 0.4064360065460205} +{"epoch": 0, "iter": 6785, "iter_tflops": 20.44206281566576, "iter_time": 0.773284553527832, "loss": 0.26265889406204224, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 21.537432902848654, "step_time": 0.7339561538696289} +{"epoch": 0, "iter": 6786, "iter_tflops": 7.221642278544851, "iter_time": 2.1889108886718747, "loss": 0.2298426777124405, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 8.80726686590119, "step_time": 1.7948282546997072} +{"epoch": 0, "iter": 6787, "iter_tflops": 9.146902971628844, "iter_time": 1.728184005737305, "loss": 0.15328726172447205, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 10.71231004763778, "step_time": 1.475641700744629} +{"epoch": 0, "iter": 6788, "iter_tflops": 26.403762779466856, "iter_time": 0.5986847991943358, "loss": 0.2664266526699066, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.0491398158003, "step_time": 0.5635656394958496} +{"epoch": 0, "iter": 6789, "iter_tflops": 19.72430096665032, "iter_time": 0.7082387619018554, "loss": 0.29397493600845337, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 20.88873531312326, "step_time": 0.6687582702636719} +{"epoch": 0, "iter": 6790, "iter_tflops": 8.241737099303956, "iter_time": 1.6949721069335937, "loss": 0.24709802865982056, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 9.900417838211245, "step_time": 1.4110025177001952} +{"epoch": 0, "iter": 6791, "iter_tflops": 24.678116868444, "iter_time": 0.5660689010620118, "loss": 0.44883084297180176, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 26.38876210017506, "step_time": 0.5293736190795899} +{"epoch": 0, "iter": 6792, "iter_tflops": 25.586179575658914, "iter_time": 0.5459789123535157, "loss": 0.16044440865516663, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 27.21788179457835, "step_time": 0.513247673034668} +{"epoch": 0, "iter": 6793, "iter_tflops": 31.418757360932265, "iter_time": 0.6566489334106446, "loss": 0.05469468608498573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.789173075595336, "step_time": 0.6105829658508299} +{"epoch": 0, "iter": 6794, "iter_tflops": 22.432555643443834, "iter_time": 0.9196942977905274, "loss": 0.025399159640073776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.06674094760379, "step_time": 0.7350726451873778} +{"epoch": 0, "iter": 6795, "iter_tflops": 55.33865512124483, "iter_time": 0.3728152313232421, "loss": 0.06207812950015068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.65285135444702, "step_time": 0.34015043067932127} +{"epoch": 0, "iter": 6796, "iter_tflops": 52.33762297746933, "iter_time": 0.3941924057006836, "loss": 0.01600736938416958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.096129381687376, "step_time": 0.36133961677551274} +{"epoch": 0, "iter": 6797, "iter_tflops": 24.342469084237262, "iter_time": 0.8475349578857422, "loss": 0.04399707913398743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.604437221141126, "step_time": 0.8057624282836915} +{"epoch": 0, "iter": 6798, "iter_tflops": 15.040834629685211, "iter_time": 1.371672119140625, "loss": 0.017467156052589417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.598915802245855, "step_time": 1.1722934379577636} +{"epoch": 0, "iter": 6799, "iter_tflops": 31.43931514403158, "iter_time": 0.6562195587158203, "loss": 0.03248324245214462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.54004681582058, "step_time": 0.5217771644592285} +{"epoch": 0, "iter": 6800, "iter_tflops": 41.63360892264263, "iter_time": 0.4955393981933594, "loss": 0.023655323311686516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6882532892192, "step_time": 0.45156231689453125} +{"epoch": 0, "iter": 6801, "iter_tflops": 21.535878408570284, "iter_time": 0.957987091064453, "loss": 0.7008086442947388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.209148105568936, "step_time": 0.8889207572937012} +{"epoch": 0, "iter": 6802, "iter_tflops": 31.065899906622757, "iter_time": 0.6641073837280274, "loss": 0.6442633867263794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20748514481135, "step_time": 0.539975175857544} +{"epoch": 0, "iter": 6803, "iter_tflops": 38.069900183054166, "iter_time": 0.5419266510009766, "loss": 0.7195982336997986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.371288991586475, "step_time": 0.4986814289093018} +{"epoch": 0, "iter": 6804, "iter_tflops": 40.80074993777414, "iter_time": 0.5056547622680664, "loss": 0.7584601044654846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64941203380957, "step_time": 0.4620686492919922} +{"epoch": 0, "iter": 6805, "iter_tflops": 23.179052123445548, "iter_time": 0.8900749435424805, "loss": 0.5406304597854614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.801162806934116, "step_time": 0.8318599281311034} +{"epoch": 0, "iter": 6806, "iter_tflops": 10.959412223184069, "iter_time": 1.8824999999999998, "loss": 0.7431405186653137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.710416194895721, "step_time": 1.4024819717407226} +{"epoch": 0, "iter": 6807, "iter_tflops": 11.401485442728466, "iter_time": 1.8095092620849607, "loss": 0.7024593949317932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.162020616769583, "step_time": 1.3607087097167967} +{"epoch": 0, "iter": 6808, "iter_tflops": 26.663692241049688, "iter_time": 0.7737523117065429, "loss": 0.7385804057121277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.37376791963386, "step_time": 0.6181829261779785} +{"epoch": 0, "iter": 6809, "iter_tflops": 21.03812440965229, "iter_time": 0.7222030410766601, "loss": 0.20477360486984253, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.414103605323447, "step_time": 0.6778677253723144} +{"epoch": 0, "iter": 6810, "iter_tflops": 11.500425894386668, "iter_time": 1.321150848388672, "loss": 0.2228671759366989, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.301983439941168, "step_time": 1.142220443725586} +{"epoch": 0, "iter": 6811, "iter_tflops": 23.243301141396884, "iter_time": 0.6536850051879883, "loss": 0.18341514468193054, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.063199323625316, "step_time": 0.6062193908691407} +{"epoch": 0, "iter": 6812, "iter_tflops": 23.561842704709903, "iter_time": 0.6448475875854492, "loss": 0.19356636703014374, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.32458825593539, "step_time": 0.5999622688293457} +{"epoch": 0, "iter": 6813, "iter_tflops": 20.998988998096433, "iter_time": 0.9824803237915039, "loss": 0.144673153758049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.916956987401047, "step_time": 0.9002544937133788} +{"epoch": 0, "iter": 6814, "iter_tflops": 17.580034250980646, "iter_time": 1.173552520751953, "loss": 0.25766491889953613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.251353278210388, "step_time": 0.9708131637573242} +{"epoch": 0, "iter": 6815, "iter_tflops": 36.68894446608304, "iter_time": 0.5623245315551758, "loss": 0.16696617007255554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03997674977084, "step_time": 0.5152623748779297} +{"epoch": 0, "iter": 6816, "iter_tflops": 46.50048239805691, "iter_time": 0.44367482757568355, "loss": 0.19375073909759521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.98107862829008, "step_time": 0.40468138504028317} +{"epoch": 0, "iter": 6817, "iter_tflops": 15.530366134816893, "iter_time": 1.3284357452392577, "loss": 0.26973918080329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.301575633636954, "step_time": 1.2655889205932618} +{"epoch": 0, "iter": 6818, "iter_tflops": 17.68421507882195, "iter_time": 1.166638916015625, "loss": 0.29395657777786255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.110992616350206, "step_time": 0.8926961231231688} +{"epoch": 0, "iter": 6819, "iter_tflops": 43.026676995989035, "iter_time": 0.47949539566040034, "loss": 0.389847993850708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28265465787143, "step_time": 0.44576296806335447} +{"epoch": 0, "iter": 6820, "iter_tflops": 43.171687021490854, "iter_time": 0.4778848114013672, "loss": 0.34107792377471924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.36329480693496, "step_time": 0.4449876480102539} +{"epoch": 0, "iter": 6821, "iter_tflops": 42.54969276938329, "iter_time": 0.4848705635070801, "loss": 0.10172848403453827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.278321835709406, "step_time": 0.44580470275878903} +{"epoch": 0, "iter": 6822, "iter_tflops": 24.23045177016804, "iter_time": 0.8514531097412109, "loss": 0.13676109910011292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.610918836283243, "step_time": 0.6967393894195557} +{"epoch": 0, "iter": 6823, "iter_tflops": 51.273175583618276, "iter_time": 0.40237596511840823, "loss": 0.07561388611793518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.62576667294348, "step_time": 0.3708909511566162} +{"epoch": 0, "iter": 6824, "iter_tflops": 49.9268047231345, "iter_time": 0.4132267951965332, "loss": 0.053937096148729324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.454479535846616, "step_time": 0.3788686199188232} +{"epoch": 0, "iter": 6825, "iter_tflops": 33.34607029833875, "iter_time": 0.6186963958740235, "loss": 0.8623485565185547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.43163624984291, "step_time": 0.5822788810729981} +{"epoch": 0, "iter": 6826, "iter_tflops": 15.34072953959837, "iter_time": 1.344857391357422, "loss": 0.8797857761383057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.232193124619876, "step_time": 1.0727374343872071} +{"epoch": 0, "iter": 6827, "iter_tflops": 31.827622678623325, "iter_time": 0.6482134628295898, "loss": 0.9668172597885132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.06122867149385, "step_time": 0.588430419921875} +{"epoch": 0, "iter": 6828, "iter_tflops": 35.217910910707204, "iter_time": 0.5858125305175781, "loss": 0.7010661363601685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.0143742352679, "step_time": 0.5427182197570801} +{"epoch": 0, "iter": 6829, "iter_tflops": 17.066656774150605, "iter_time": 1.2088538360595702, "loss": 0.45444297790527344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.22019090473817, "step_time": 1.1323203811645508} +{"epoch": 0, "iter": 6830, "iter_tflops": 18.201125744653243, "iter_time": 1.1335064544677733, "loss": 0.5523462891578674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.7150434109485, "step_time": 0.9500829963684081} +{"epoch": 0, "iter": 6831, "iter_tflops": 48.07038229677395, "iter_time": 0.42918513488769533, "loss": 0.37891802191734314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.245094939075095, "step_time": 0.3948905353546142} +{"epoch": 0, "iter": 6832, "iter_tflops": 50.07215482486052, "iter_time": 0.41202727508544923, "loss": 0.4982381761074066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.1960614574447, "step_time": 0.3806751441955566} +{"epoch": 0, "iter": 6833, "iter_tflops": 33.664821761287115, "iter_time": 0.6128383407592773, "loss": 0.5739343762397766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.97381619797762, "step_time": 0.5735030555725097} +{"epoch": 0, "iter": 6834, "iter_tflops": 14.097862371072061, "iter_time": 1.4634199829101562, "loss": 0.8500450849533081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.870995858258198, "step_time": 1.2228734855651855} +{"epoch": 0, "iter": 6835, "iter_tflops": 47.14469233541492, "iter_time": 0.43761222076416023, "loss": 0.5800883173942566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22431673689327, "step_time": 0.40275975990295415} +{"epoch": 0, "iter": 6836, "iter_tflops": 47.716430984797704, "iter_time": 0.4323687477111816, "loss": 0.5703530311584473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26977924240588, "step_time": 0.40240262031555174} +{"epoch": 0, "iter": 6837, "iter_tflops": 23.587485512617533, "iter_time": 0.8746626892089843, "loss": 0.23679053783416748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.860005093810265, "step_time": 0.8298909606933593} +{"epoch": 0, "iter": 6838, "iter_tflops": 22.856712918153526, "iter_time": 0.9026273193359374, "loss": 0.14939531683921814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.493977209651618, "step_time": 0.6550806007385254} +{"epoch": 0, "iter": 6839, "iter_tflops": 40.26696516121075, "iter_time": 0.512357795715332, "loss": 0.1767309159040451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.02731427731816, "step_time": 0.4685975933074951} +{"epoch": 0, "iter": 6840, "iter_tflops": 40.178510619224255, "iter_time": 0.5134857711791991, "loss": 0.15052929520606995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0590906826393, "step_time": 0.46825963020324707} +{"epoch": 0, "iter": 6841, "iter_tflops": 26.056511605168605, "iter_time": 0.7917826385498048, "loss": 0.5698845386505127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.06342635460052, "step_time": 0.7098644618988037} +{"epoch": 0, "iter": 6842, "iter_tflops": 37.761095721379846, "iter_time": 0.5463584442138671, "loss": 0.6631321310997009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74750768958123, "step_time": 0.49418742942810057} +{"epoch": 0, "iter": 6843, "iter_tflops": 37.884105879436255, "iter_time": 0.5445844116210937, "loss": 0.5459429025650024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36589194131593, "step_time": 0.4987464923858642} +{"epoch": 0, "iter": 6844, "iter_tflops": 37.992037574527714, "iter_time": 0.5430373001098633, "loss": 0.49240434169769287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.16693634291439, "step_time": 0.5011568832397462} +{"epoch": 0, "iter": 6845, "iter_tflops": 18.719369356805288, "iter_time": 0.837886116027832, "loss": 0.040875110775232315, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.366953570542233, "step_time": 0.770105339050293} +{"epoch": 0, "iter": 6846, "iter_tflops": 18.650705236716554, "iter_time": 0.8409708633422852, "loss": 0.021797899156808853, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 21.55015363110682, "step_time": 0.7278231029510499} +{"epoch": 0, "iter": 6847, "iter_tflops": 39.00614573839113, "iter_time": 0.4021084213256836, "loss": 0.05368678271770477, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 43.30990548471527, "step_time": 0.3621504020690918} +{"epoch": 0, "iter": 6848, "iter_tflops": 38.987176300789486, "iter_time": 0.402304069519043, "loss": 0.04783383384346962, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 42.49419493710684, "step_time": 0.3691021728515625} +{"epoch": 0, "iter": 6849, "iter_tflops": 48.356710247172614, "iter_time": 0.42664385986328124, "loss": 0.01809174381196499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80603731617437, "step_time": 0.3906957340240479} +{"epoch": 0, "iter": 6850, "iter_tflops": 45.63374615547443, "iter_time": 0.45210168457031247, "loss": 0.07286005467176437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.17264732265684, "step_time": 0.40316642951965337} +{"epoch": 0, "iter": 6851, "iter_tflops": 60.17034525830979, "iter_time": 0.3428780975341797, "loss": 0.04484110698103905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.70913705891621, "step_time": 0.3139760227203369} +{"epoch": 0, "iter": 6852, "iter_tflops": 54.781778955175575, "iter_time": 0.37660503005981444, "loss": 0.06332305073738098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.764823716914876, "step_time": 0.3452046241760254} +{"epoch": 0, "iter": 6853, "iter_tflops": 19.2721982709006, "iter_time": 1.0705106506347657, "loss": 0.6569671630859375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14500264377658, "step_time": 1.0241296005249023} +{"epoch": 0, "iter": 6854, "iter_tflops": 15.219495176466308, "iter_time": 1.3555701599121093, "loss": 0.6562935709953308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41824749334917, "step_time": 1.062459087371826} +{"epoch": 0, "iter": 6855, "iter_tflops": 38.58607745571481, "iter_time": 0.5346771392822266, "loss": 0.6371433734893799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15221388177371, "step_time": 0.4894427032470703} +{"epoch": 0, "iter": 6856, "iter_tflops": 41.073886477689555, "iter_time": 0.5022922172546387, "loss": 0.6136049032211304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85026669787253, "step_time": 0.45999934959411615} +{"epoch": 0, "iter": 6857, "iter_tflops": 20.18146075141166, "iter_time": 1.022279495239258, "loss": 0.7044943571090698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.475487555189677, "step_time": 0.9606810302734377} +{"epoch": 0, "iter": 6858, "iter_tflops": 12.345455240624895, "iter_time": 1.671148864746094, "loss": 0.8044673800468445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.611821140643976, "step_time": 1.1714344215393067} +{"epoch": 0, "iter": 6859, "iter_tflops": 37.25544251352894, "iter_time": 0.5537739486694335, "loss": 0.8082234859466553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56627950850243, "step_time": 0.50857741355896} +{"epoch": 0, "iter": 6860, "iter_tflops": 38.38333117361597, "iter_time": 0.5375013809204101, "loss": 0.829145073890686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.857213340949876, "step_time": 0.49289218902587895} +{"epoch": 0, "iter": 6861, "iter_tflops": 19.97748415936537, "iter_time": 1.032717300415039, "loss": 0.48603811860084534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.521897476903742, "step_time": 0.9586094131469727} +{"epoch": 0, "iter": 6862, "iter_tflops": 19.179556160059402, "iter_time": 1.0756814880371095, "loss": 0.4731829762458801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.54030789195782, "step_time": 0.8764156188964844} +{"epoch": 0, "iter": 6863, "iter_tflops": 38.556233198390565, "iter_time": 0.5350910034179688, "loss": 0.4000340402126312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17622407189855, "step_time": 0.4891640720367432} +{"epoch": 0, "iter": 6864, "iter_tflops": 42.4187141413442, "iter_time": 0.48636772537231443, "loss": 0.493828147649765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.61887007690892, "step_time": 0.44254812431335444} +{"epoch": 0, "iter": 6865, "iter_tflops": 17.33151730902039, "iter_time": 1.1903801116943358, "loss": 0.3623948097229004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.44634722407017, "step_time": 1.1184378814697264} +{"epoch": 0, "iter": 6866, "iter_tflops": 19.522119122391864, "iter_time": 1.0568060455322266, "loss": 0.28934693336486816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.126747108035875, "step_time": 0.8551129341125489} +{"epoch": 0, "iter": 6867, "iter_tflops": 45.77205517926698, "iter_time": 0.4507355728149415, "loss": 0.2373124659061432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.40776061593639, "step_time": 0.4175678730010986} +{"epoch": 0, "iter": 6868, "iter_tflops": 48.21870837723425, "iter_time": 0.4278649139404297, "loss": 0.26429733633995056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.31453799560186, "step_time": 0.39436635208129883} +{"epoch": 0, "iter": 6869, "iter_tflops": 31.920717375097137, "iter_time": 0.6463229904174804, "loss": 0.013530303724110126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.825499517102976, "step_time": 0.6099272384643555} +{"epoch": 0, "iter": 6870, "iter_tflops": 10.775329038921742, "iter_time": 1.914660186767578, "loss": 0.008411071263253689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.25328080716289, "step_time": 1.5566782150268554} +{"epoch": 0, "iter": 6871, "iter_tflops": 14.736357236737652, "iter_time": 1.4000131225585937, "loss": 0.007778788451105356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.43087554949904, "step_time": 1.1835947914123535} +{"epoch": 0, "iter": 6872, "iter_tflops": 34.98870378304668, "iter_time": 0.5896501235961915, "loss": 0.009506115689873695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1669490580009, "step_time": 0.4567741222381591} +{"epoch": 0, "iter": 6873, "iter_tflops": 17.10844826952885, "iter_time": 0.8070046691894531, "loss": 0.2349979281425476, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 18.013844444802768, "step_time": 0.766443702697754} +{"epoch": 0, "iter": 6874, "iter_tflops": 16.974904219859933, "iter_time": 0.8133534927368165, "loss": 0.2543763518333435, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 18.886121849142114, "step_time": 0.7310446128845215} +{"epoch": 0, "iter": 6875, "iter_tflops": 24.24318015572117, "iter_time": 0.56950439453125, "loss": 0.3026822507381439, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 25.910713415273392, "step_time": 0.5328528556823731} +{"epoch": 0, "iter": 6876, "iter_tflops": 23.906156426167804, "iter_time": 0.5775331420898437, "loss": 0.1634167581796646, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 25.526852249168506, "step_time": 0.5408656539916992} +{"epoch": 0, "iter": 6877, "iter_tflops": 13.390341971601607, "iter_time": 0.773875862121582, "loss": 0.006053985562175512, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 14.354623730624285, "step_time": 0.7218902168273924} +{"epoch": 0, "iter": 6878, "iter_tflops": 5.769899893593205, "iter_time": 1.7959518585205079, "loss": 0.010121671482920647, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 6.705029730661115, "step_time": 1.545475986480713} +{"epoch": 0, "iter": 6879, "iter_tflops": 28.538137190715183, "iter_time": 0.36310927963256834, "loss": 0.00131307914853096, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 31.553805999868835, "step_time": 0.32840610218048094} +{"epoch": 0, "iter": 6880, "iter_tflops": 30.702230001136034, "iter_time": 0.33751497650146484, "loss": 0.014417515136301517, "lr": 3e-05, "seqlen": 4176.0, "step_tflops": 33.74936989233495, "step_time": 0.3070416564941406} +{"epoch": 0, "iter": 6881, "iter_tflops": 41.51047965152868, "iter_time": 0.4045174674987793, "loss": 0.007318529300391674, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 45.75499477909708, "step_time": 0.3669919357299805} +{"epoch": 0, "iter": 6882, "iter_tflops": 32.13583758826327, "iter_time": 0.5225229949951172, "loss": 0.007833762094378471, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 35.52268288348604, "step_time": 0.4727039947509766} +{"epoch": 0, "iter": 6883, "iter_tflops": 32.90165270618261, "iter_time": 0.5103608093261719, "loss": 0.005379220936447382, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 36.513292765488096, "step_time": 0.45987948036193843} +{"epoch": 0, "iter": 6884, "iter_tflops": 36.610125472334836, "iter_time": 0.4586631126403809, "loss": 0.0024830487091094255, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 40.467926439904716, "step_time": 0.41493883132934567} +{"epoch": 0, "iter": 6885, "iter_tflops": 26.784433556703085, "iter_time": 0.7702643203735351, "loss": 0.31704992055892944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.563999801897424, "step_time": 0.7222760696411132} +{"epoch": 0, "iter": 6886, "iter_tflops": 24.182716967894148, "iter_time": 0.8531338119506835, "loss": 0.5405832529067993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.701954231455247, "step_time": 0.671979814529419} +{"epoch": 0, "iter": 6887, "iter_tflops": 48.353506196598545, "iter_time": 0.4266721305847168, "loss": 0.3616132140159607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6319590793715, "step_time": 0.39198794555664057} +{"epoch": 0, "iter": 6888, "iter_tflops": 42.037893795684404, "iter_time": 0.49077371978759765, "loss": 0.34915950894355774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42689667507515, "step_time": 0.4541603107452393} +{"epoch": 0, "iter": 6889, "iter_tflops": 20.45037676798111, "iter_time": 0.7409604110717773, "loss": 0.25363633036613464, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 21.68034501764844, "step_time": 0.6989242820739746} +{"epoch": 0, "iter": 6890, "iter_tflops": 10.045505814605331, "iter_time": 1.5084277343749999, "loss": 0.283606618642807, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 11.820309264288479, "step_time": 1.281939350128174} +{"epoch": 0, "iter": 6891, "iter_tflops": 22.774506847235152, "iter_time": 0.6653456726074218, "loss": 0.39992254972457886, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.64704761296358, "step_time": 0.6147965393066407} +{"epoch": 0, "iter": 6892, "iter_tflops": 25.367222961178996, "iter_time": 0.5973424682617188, "loss": 0.391122430562973, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.122149820596565, "step_time": 0.5586916847229004} +{"epoch": 0, "iter": 6893, "iter_tflops": 21.500753138802317, "iter_time": 0.7504523315429688, "loss": 0.3961864411830902, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 23.205801762233953, "step_time": 0.6953127708435058} +{"epoch": 0, "iter": 6894, "iter_tflops": 28.121789022579705, "iter_time": 0.5737647171020508, "loss": 0.291719526052475, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 30.054916780953214, "step_time": 0.5368602561950684} +{"epoch": 0, "iter": 6895, "iter_tflops": 29.293035405808915, "iter_time": 0.5508234329223634, "loss": 0.21167685091495514, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.14824386808503, "step_time": 0.5180160522460937} +{"epoch": 0, "iter": 6896, "iter_tflops": 29.703662272180036, "iter_time": 0.5432087860107422, "loss": 0.25980329513549805, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 31.648310754334233, "step_time": 0.5098310127258301} +{"epoch": 0, "iter": 6897, "iter_tflops": 27.137955511628732, "iter_time": 0.706809928894043, "loss": 0.011558099649846554, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 28.747761090657328, "step_time": 0.6672302703857422} +{"epoch": 0, "iter": 6898, "iter_tflops": 17.380538490579312, "iter_time": 1.103612319946289, "loss": 0.013873848132789135, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 26.4788242767889, "step_time": 0.724404384613037} +{"epoch": 0, "iter": 6899, "iter_tflops": 52.59768074890495, "iter_time": 0.3646810302734375, "loss": 0.019062217324972153, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 57.877076846456134, "step_time": 0.3314157772064209} +{"epoch": 0, "iter": 6900, "iter_tflops": 56.98435117789643, "iter_time": 0.33660778808593755, "loss": 0.0028511041309684515, "lr": 3e-05, "seqlen": 7632.0, "step_tflops": 62.48721152001105, "step_time": 0.3069648323059082} +{"epoch": 0, "iter": 6901, "iter_tflops": 40.032463515161176, "iter_time": 0.41329991149902345, "loss": 0.08694018423557281, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 44.16104399103301, "step_time": 0.37466083526611327} +{"epoch": 0, "iter": 6902, "iter_tflops": 32.72772286864523, "iter_time": 0.5055473518371583, "loss": 0.06708110123872757, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 35.45262424383255, "step_time": 0.46669080162048343} +{"epoch": 0, "iter": 6903, "iter_tflops": 40.10898126071321, "iter_time": 0.4125114402770995, "loss": 0.04837937280535698, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 43.61829789740351, "step_time": 0.3793227710723877} +{"epoch": 0, "iter": 6904, "iter_tflops": 42.29653774573534, "iter_time": 0.3911765480041504, "loss": 0.0678718239068985, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 46.1604052788531, "step_time": 0.35843302345275874} +{"epoch": 0, "iter": 6905, "iter_tflops": 34.3538053469271, "iter_time": 0.6005475463867187, "loss": 0.8266313672065735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.69483640062604, "step_time": 0.5622342414855958} +{"epoch": 0, "iter": 6906, "iter_tflops": 15.05362924019036, "iter_time": 1.3705062866210938, "loss": 0.8678135871887207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.396773744663836, "step_time": 1.121451717376709} +{"epoch": 0, "iter": 6907, "iter_tflops": 37.77731185553188, "iter_time": 0.5461239166259766, "loss": 0.9563879370689392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27999837334592, "step_time": 0.4997842617034912} +{"epoch": 0, "iter": 6908, "iter_tflops": 41.51617394657694, "iter_time": 0.4969411087036133, "loss": 0.8471323847770691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.96835246580122, "step_time": 0.45879140281677244} +{"epoch": 0, "iter": 6909, "iter_tflops": 16.295406625547656, "iter_time": 1.2660680389404297, "loss": 0.8229988813400269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.76530813128181, "step_time": 1.161313575744629} +{"epoch": 0, "iter": 6910, "iter_tflops": 22.18330472665665, "iter_time": 0.9300279541015626, "loss": 0.8316981792449951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.401666290987567, "step_time": 0.7529138298034669} +{"epoch": 0, "iter": 6911, "iter_tflops": 43.873292258480454, "iter_time": 0.47024265670776366, "loss": 0.6754817962646484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08429363120172, "step_time": 0.43817358016967767} +{"epoch": 0, "iter": 6912, "iter_tflops": 37.08094597927082, "iter_time": 0.5563799133300781, "loss": 0.6035745739936829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.90510798367501, "step_time": 0.5170038261413574} +{"epoch": 0, "iter": 6913, "iter_tflops": 22.031814456744332, "iter_time": 0.9364228057861327, "loss": 0.983485996723175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.066308585756296, "step_time": 0.8944254531860352} +{"epoch": 0, "iter": 6914, "iter_tflops": 14.427933147322777, "iter_time": 1.4299410247802735, "loss": 0.6857642531394958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.601733983398695, "step_time": 1.1090951805114746} +{"epoch": 0, "iter": 6915, "iter_tflops": 42.74992532144244, "iter_time": 0.4825995216369629, "loss": 0.7974609136581421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0759969437696, "step_time": 0.4477622814178467} +{"epoch": 0, "iter": 6916, "iter_tflops": 44.43696969419736, "iter_time": 0.46427768707275385, "loss": 0.7522055506706238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76474932044162, "step_time": 0.4319313678741455} +{"epoch": 0, "iter": 6917, "iter_tflops": 28.377323984410047, "iter_time": 0.7270274505615234, "loss": 0.003996688406914473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.04851571056336, "step_time": 0.6865927658081055} +{"epoch": 0, "iter": 6918, "iter_tflops": 13.744932888110368, "iter_time": 1.5009963073730468, "loss": 0.005060957279056311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.368822558906082, "step_time": 1.1878233795166016} +{"epoch": 0, "iter": 6919, "iter_tflops": 42.094794054574386, "iter_time": 0.4901103324890137, "loss": 0.017723821103572845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.582769548792385, "step_time": 0.4428910884857178} +{"epoch": 0, "iter": 6920, "iter_tflops": 50.81860966303876, "iter_time": 0.4059751663208007, "loss": 0.0062972609885036945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.00403401222374, "step_time": 0.36838584709167477} +{"epoch": 0, "iter": 6921, "iter_tflops": 18.712105114794905, "iter_time": 1.1025533142089843, "loss": 0.6320616006851196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.012721753322346, "step_time": 1.0308989334106444} +{"epoch": 0, "iter": 6922, "iter_tflops": 18.891537893611474, "iter_time": 1.0920812072753907, "loss": 0.6932228207588196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.156656757100585, "step_time": 0.8909357566833496} +{"epoch": 0, "iter": 6923, "iter_tflops": 36.61145465062944, "iter_time": 0.5635147171020507, "loss": 0.6778934001922607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.955508348493105, "step_time": 0.516351671218872} +{"epoch": 0, "iter": 6924, "iter_tflops": 39.76193305294476, "iter_time": 0.5188654556274415, "loss": 0.7271333932876587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.115207398390446, "step_time": 0.4785108261108399} +{"epoch": 0, "iter": 6925, "iter_tflops": 18.18447305089184, "iter_time": 1.1345444793701172, "loss": 0.12423902750015259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.50726605948811, "step_time": 1.057610710144043} +{"epoch": 0, "iter": 6926, "iter_tflops": 14.50349485052953, "iter_time": 1.4224911804199218, "loss": 0.1289719045162201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.49572034216329, "step_time": 1.0066049480438233} +{"epoch": 0, "iter": 6927, "iter_tflops": 41.37128282037425, "iter_time": 0.49868150329589844, "loss": 0.09611978381872177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53368620562357, "step_time": 0.4530951747894287} +{"epoch": 0, "iter": 6928, "iter_tflops": 40.21350644472248, "iter_time": 0.5130389099121094, "loss": 0.1520467847585678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75826216107069, "step_time": 0.47147881317138673} +{"epoch": 0, "iter": 6929, "iter_tflops": 33.97667638750009, "iter_time": 0.6072134094238282, "loss": 0.17184461653232574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.05666419547322, "step_time": 0.5421151313781739} +{"epoch": 0, "iter": 6930, "iter_tflops": 33.96082987010249, "iter_time": 0.6074967422485351, "loss": 0.14022018015384674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.49051914568898, "step_time": 0.5503016223907471} +{"epoch": 0, "iter": 6931, "iter_tflops": 39.308402965423944, "iter_time": 0.5248519897460937, "loss": 0.10828644782304764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.012849061171345, "step_time": 0.47964954566955564} +{"epoch": 0, "iter": 6932, "iter_tflops": 43.32737168669056, "iter_time": 0.476167667388916, "loss": 0.10096529871225357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57249488731853, "step_time": 0.4336769294738769} +{"epoch": 0, "iter": 6933, "iter_tflops": 37.55619107076453, "iter_time": 0.5493393478393555, "loss": 0.12458908557891846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55027004437471, "step_time": 0.4965333194732666} +{"epoch": 0, "iter": 6934, "iter_tflops": 36.53348051825423, "iter_time": 0.5647174377441406, "loss": 0.20072580873966217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.95794711309626, "step_time": 0.5037140522003174} +{"epoch": 0, "iter": 6935, "iter_tflops": 46.936100614638434, "iter_time": 0.4395570411682129, "loss": 0.1165616363286972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40695005628997, "step_time": 0.40132887649536136} +{"epoch": 0, "iter": 6936, "iter_tflops": 37.77955440710706, "iter_time": 0.5460914993286132, "loss": 0.10943926870822906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62769944944665, "step_time": 0.4956097450256347} +{"epoch": 0, "iter": 6937, "iter_tflops": 31.644962861205148, "iter_time": 0.6519550552368165, "loss": 0.034714605659246445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.32447026432766, "step_time": 0.6010607986450196} +{"epoch": 0, "iter": 6938, "iter_tflops": 23.957327067432946, "iter_time": 0.8611600723266601, "loss": 0.023737000301480293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.369227619317606, "step_time": 0.7024731388092041} +{"epoch": 0, "iter": 6939, "iter_tflops": 53.18330062434192, "iter_time": 0.3879242782592773, "loss": 0.03026665560901165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88427058703976, "step_time": 0.3564196853637695} +{"epoch": 0, "iter": 6940, "iter_tflops": 57.2050918928545, "iter_time": 0.36065134811401367, "loss": 0.0434718057513237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.482024716131036, "step_time": 0.33019246101379396} +{"epoch": 0, "iter": 6941, "iter_tflops": 47.755723755594815, "iter_time": 0.4320130004882813, "loss": 0.10027610510587692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.100578726328706, "step_time": 0.3959858798980713} +{"epoch": 0, "iter": 6942, "iter_tflops": 44.208683715077385, "iter_time": 0.46667513656616216, "loss": 0.1457817107439041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.741520930691564, "step_time": 0.4147660369873047} +{"epoch": 0, "iter": 6943, "iter_tflops": 51.52128312020788, "iter_time": 0.4004382705688477, "loss": 0.11543181538581848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.76984316722639, "step_time": 0.3699327869415283} +{"epoch": 0, "iter": 6944, "iter_tflops": 52.08667485625567, "iter_time": 0.39609158325195315, "loss": 0.10924256592988968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.615109015094056, "step_time": 0.36440967559814447} +{"epoch": 0, "iter": 6945, "iter_tflops": 26.449916276956078, "iter_time": 0.780006004333496, "loss": 0.5419372320175171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.89575864167259, "step_time": 0.7395781478881837} +{"epoch": 0, "iter": 6946, "iter_tflops": 17.283566795190282, "iter_time": 1.193682632446289, "loss": 0.6524304151535034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.65496004921699, "step_time": 0.9988445129394532} +{"epoch": 0, "iter": 6947, "iter_tflops": 37.746342386939666, "iter_time": 0.5465719909667969, "loss": 0.3976861834526062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.52421708409191, "step_time": 0.49684485244750975} +{"epoch": 0, "iter": 6948, "iter_tflops": 42.972223973877405, "iter_time": 0.48010299682617186, "loss": 0.5522469282150269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.025680906970514, "step_time": 0.43871971893310546} +{"epoch": 0, "iter": 6949, "iter_tflops": 15.809477050859913, "iter_time": 1.304982666015625, "loss": 0.13541632890701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.901077999007764, "step_time": 1.2206968994140626} +{"epoch": 0, "iter": 6950, "iter_tflops": 21.90535504732134, "iter_time": 0.9418287658691407, "loss": 0.22843436896800995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.84130940738606, "step_time": 0.6479348335266113} +{"epoch": 0, "iter": 6951, "iter_tflops": 37.13838212042354, "iter_time": 0.5555194473266603, "loss": 0.19377949833869934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.57793801130231, "step_time": 0.5084312934875489} +{"epoch": 0, "iter": 6952, "iter_tflops": 44.3405479658786, "iter_time": 0.46528729248046874, "loss": 0.18713043630123138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.474281113369095, "step_time": 0.42560906600952153} +{"epoch": 0, "iter": 6953, "iter_tflops": 16.649970564120032, "iter_time": 1.1123487396240235, "loss": 0.025006333366036415, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 17.807264130856293, "step_time": 1.0400572280883789} +{"epoch": 0, "iter": 6954, "iter_tflops": 17.360479143672023, "iter_time": 1.0668238830566406, "loss": 0.055321864783763885, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 23.237939713006888, "step_time": 0.7969972381591797} +{"epoch": 0, "iter": 6955, "iter_tflops": 44.809705861841024, "iter_time": 0.41331612014770513, "loss": 0.061121512204408646, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 48.75142316531193, "step_time": 0.37989811515808103} +{"epoch": 0, "iter": 6956, "iter_tflops": 49.24593636384804, "iter_time": 0.37608329010009767, "loss": 0.027156593278050423, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 54.14711472366193, "step_time": 0.34204174804687504} +{"epoch": 0, "iter": 6957, "iter_tflops": 41.93799337689364, "iter_time": 0.49194279098510746, "loss": 0.4897446632385254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78169351202224, "step_time": 0.45064068031311033} +{"epoch": 0, "iter": 6958, "iter_tflops": 42.37742305023983, "iter_time": 0.48684162521362306, "loss": 0.37739890813827515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59679688082246, "step_time": 0.4524680442810059} +{"epoch": 0, "iter": 6959, "iter_tflops": 49.906783485271475, "iter_time": 0.4133925704956055, "loss": 0.44899269938468933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.325521006954354, "step_time": 0.37976798248291016} +{"epoch": 0, "iter": 6960, "iter_tflops": 46.8213179834725, "iter_time": 0.44063461685180666, "loss": 0.3360659182071686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.284922514493275, "step_time": 0.4102838878631592} +{"epoch": 0, "iter": 6961, "iter_tflops": 26.380160549577273, "iter_time": 0.7820685348510743, "loss": 0.07539007067680359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.87501096727215, "step_time": 0.7401286239624023} +{"epoch": 0, "iter": 6962, "iter_tflops": 15.599648279504162, "iter_time": 1.3225358123779296, "loss": 0.0928870365023613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.27660759463642, "step_time": 0.8863445167541505} +{"epoch": 0, "iter": 6963, "iter_tflops": 45.78295829832217, "iter_time": 0.45062823104858396, "loss": 0.07349258661270142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.71218629323612, "step_time": 0.41501078605651853} +{"epoch": 0, "iter": 6964, "iter_tflops": 51.32462358186863, "iter_time": 0.4019726219177246, "loss": 0.05696830898523331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92594917622453, "step_time": 0.36890019416809083} +{"epoch": 0, "iter": 6965, "iter_tflops": 35.04899577055245, "iter_time": 0.5886357955932617, "loss": 0.5319865345954895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.52089398220916, "step_time": 0.5498561286926269} +{"epoch": 0, "iter": 6966, "iter_tflops": 17.55591088972222, "iter_time": 1.1751650848388673, "loss": 0.7433174252510071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.719988840314052, "step_time": 0.9080591392517089} +{"epoch": 0, "iter": 6967, "iter_tflops": 46.89284730630744, "iter_time": 0.43996248245239256, "loss": 0.7108756899833679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84520278959815, "step_time": 0.4057628326416016} +{"epoch": 0, "iter": 6968, "iter_tflops": 48.56544127171913, "iter_time": 0.42481017303466795, "loss": 0.6993741393089294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.47890085669438, "step_time": 0.39313120460510254} +{"epoch": 0, "iter": 6969, "iter_tflops": 27.661523676727406, "iter_time": 0.7458408203125, "loss": 0.45410582423210144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.22600514548093, "step_time": 0.7059156188964845} +{"epoch": 0, "iter": 6970, "iter_tflops": 17.82007133832135, "iter_time": 1.1577447204589844, "loss": 0.5234630703926086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.095730137081905, "step_time": 0.9779748497009277} +{"epoch": 0, "iter": 6971, "iter_tflops": 50.50638813333882, "iter_time": 0.40848483276367187, "loss": 0.5292564630508423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.72225279916092, "step_time": 0.37701469612121585} +{"epoch": 0, "iter": 6972, "iter_tflops": 51.21535074446245, "iter_time": 0.40283026885986334, "loss": 0.6467933058738708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.31600828581895, "step_time": 0.3729678649902344} +{"epoch": 0, "iter": 6973, "iter_tflops": 25.65712167273246, "iter_time": 0.8041078720092774, "loss": 0.6857925653457642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.90648244159129, "step_time": 0.7667703704833985} +{"epoch": 0, "iter": 6974, "iter_tflops": 13.691687333597258, "iter_time": 1.5068335266113282, "loss": 0.9457852244377136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.090676330520935, "step_time": 1.0806895027160643} +{"epoch": 0, "iter": 6975, "iter_tflops": 46.49965240904182, "iter_time": 0.44368274688720705, "loss": 0.7599142789840698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.290902988317285, "step_time": 0.4102350978851318} +{"epoch": 0, "iter": 6976, "iter_tflops": 46.76207319627545, "iter_time": 0.4411928749084472, "loss": 0.7508736848831177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40558294371434, "step_time": 0.4093017539978027} +{"epoch": 0, "iter": 6977, "iter_tflops": 39.78099564696691, "iter_time": 0.5186168212890625, "loss": 0.0010388493537902832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.076638241894436, "step_time": 0.47893926620483396} +{"epoch": 0, "iter": 6978, "iter_tflops": 36.89407310706782, "iter_time": 0.5591980438232421, "loss": 0.03284551575779915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.002849277297486, "step_time": 0.43893282699584957} +{"epoch": 0, "iter": 6979, "iter_tflops": 52.378841180946786, "iter_time": 0.3938822059631348, "loss": 0.0068326229229569435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.80513329506559, "step_time": 0.35690763664245606} +{"epoch": 0, "iter": 6980, "iter_tflops": 49.82222649799901, "iter_time": 0.4140941696166992, "loss": 0.010534347966313362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.40189379425279, "step_time": 0.37923484039306643} +{"epoch": 0, "iter": 6981, "iter_tflops": 39.9479910126076, "iter_time": 0.5164488372802735, "loss": 0.5414856672286987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10675847147296, "step_time": 0.47860461425781253} +{"epoch": 0, "iter": 6982, "iter_tflops": 22.994875560733746, "iter_time": 0.8972039642333984, "loss": 0.6046342253684998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.855257377643294, "step_time": 0.7979457798004149} +{"epoch": 0, "iter": 6983, "iter_tflops": 39.29960198061852, "iter_time": 0.5249695281982422, "loss": 0.534576952457428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80305063470489, "step_time": 0.4820005397796631} +{"epoch": 0, "iter": 6984, "iter_tflops": 38.20638619797296, "iter_time": 0.539990707397461, "loss": 0.4632263779640198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6523873810006, "step_time": 0.49531599044799807} +{"epoch": 0, "iter": 6985, "iter_tflops": 29.744939664782276, "iter_time": 0.693600112915039, "loss": 1.0851682424545288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.01101170715651, "step_time": 0.6249761047363283} +{"epoch": 0, "iter": 6986, "iter_tflops": 39.12275224565881, "iter_time": 0.5273425903320312, "loss": 0.8361703157424927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.480486664760896, "step_time": 0.47449085998535157} +{"epoch": 0, "iter": 6987, "iter_tflops": 37.49645650246279, "iter_time": 0.5502144851684571, "loss": 0.7331969141960144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.80266764621919, "step_time": 0.5056309967041016} +{"epoch": 0, "iter": 6988, "iter_tflops": 40.282082092621856, "iter_time": 0.5121655197143555, "loss": 0.6839632391929626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.46884912645895, "step_time": 0.4746178913116455} +{"epoch": 0, "iter": 6989, "iter_tflops": 29.28226238204086, "iter_time": 0.7045594100952148, "loss": 0.30385690927505493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.58940801438771, "step_time": 0.633061315536499} +{"epoch": 0, "iter": 6990, "iter_tflops": 36.75100757033934, "iter_time": 0.5613749084472656, "loss": 0.22931551933288574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92457117284067, "step_time": 0.5041248550415038} +{"epoch": 0, "iter": 6991, "iter_tflops": 41.06475805756286, "iter_time": 0.5024038734436035, "loss": 0.1858697086572647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.05174899026085, "step_time": 0.45794212150573727} +{"epoch": 0, "iter": 6992, "iter_tflops": 44.209188917161434, "iter_time": 0.4666698036193848, "loss": 0.31102243065834045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36590301023628, "step_time": 0.4265627689361572} +{"epoch": 0, "iter": 6993, "iter_tflops": 31.49934194306285, "iter_time": 0.6549690322875976, "loss": 0.2077002227306366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.22191884822356, "step_time": 0.6028619728088379} +{"epoch": 0, "iter": 6994, "iter_tflops": 9.548116727413449, "iter_time": 2.1607500305175784, "loss": 0.19846640527248383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.112358847785755, "step_time": 1.7033093032836915} +{"epoch": 0, "iter": 6995, "iter_tflops": 22.050900869644934, "iter_time": 0.935612274169922, "loss": 0.20383933186531067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.367859528105853, "step_time": 0.7538438835144043} +{"epoch": 0, "iter": 6996, "iter_tflops": 45.78992430444454, "iter_time": 0.4505596771240235, "loss": 0.11969102919101715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.49586054180163, "step_time": 0.4168246250152588} +{"epoch": 0, "iter": 6997, "iter_tflops": 20.021265546185933, "iter_time": 0.7303203125, "loss": 0.1933237463235855, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 21.232983039142333, "step_time": 0.6886426124572753} +{"epoch": 0, "iter": 6998, "iter_tflops": 8.58818291302796, "iter_time": 1.702564682006836, "loss": 0.3012677729129791, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.192722493303203, "step_time": 1.306378936767578} +{"epoch": 0, "iter": 6999, "iter_tflops": 21.708321163078253, "iter_time": 0.673563690185547, "loss": 0.3420714735984802, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.408866052981686, "step_time": 0.6246324310302734} +{"epoch": 0, "iter": 7000, "iter_tflops": 22.131185230937476, "iter_time": 0.6606938018798828, "loss": 0.2202458381652832, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.79627426288145, "step_time": 0.6144632873535156} +{"epoch": 0, "iter": 7001, "iter_tflops": 19.65827782336602, "iter_time": 1.049486312866211, "loss": 0.29552942514419556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.825954064889448, "step_time": 0.9906433792114256} +{"epoch": 0, "iter": 7002, "iter_tflops": 10.29060809381742, "iter_time": 2.004846878051758, "loss": 0.2843955159187317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.176200487833814, "step_time": 1.5657847290039064} +{"epoch": 0, "iter": 7003, "iter_tflops": 15.884304018790285, "iter_time": 1.298835220336914, "loss": 0.2508108615875244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.416936586959174, "step_time": 1.1202239532470701} +{"epoch": 0, "iter": 7004, "iter_tflops": 21.480652394597872, "iter_time": 0.9604500427246093, "loss": 0.36779215931892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.35026859386394, "step_time": 0.7029269065856933} +{"epoch": 0, "iter": 7005, "iter_tflops": 20.81955251788133, "iter_time": 0.8737279739379884, "loss": 0.2663579285144806, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 21.943103171434842, "step_time": 0.8289905624389647} +{"epoch": 0, "iter": 7006, "iter_tflops": 15.50870052818271, "iter_time": 1.1729303436279297, "loss": 0.23364156484603882, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 17.936948149108137, "step_time": 1.014142723083496} +{"epoch": 0, "iter": 7007, "iter_tflops": 27.874849760887713, "iter_time": 0.6525820083618163, "loss": 0.27035650610923767, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 30.091842132462506, "step_time": 0.6045035514831543} +{"epoch": 0, "iter": 7008, "iter_tflops": 30.449964328174417, "iter_time": 0.5973939819335937, "loss": 0.26532799005508423, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 32.570495901234935, "step_time": 0.5585001068115235} +{"epoch": 0, "iter": 7009, "iter_tflops": 35.40832382198478, "iter_time": 0.5826622467041015, "loss": 0.10350349545478821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.52720473524489, "step_time": 0.5219466857910157} +{"epoch": 0, "iter": 7010, "iter_tflops": 43.50545846530494, "iter_time": 0.47421850585937503, "loss": 0.1133546456694603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.22286459340065, "step_time": 0.42782803726196295} +{"epoch": 0, "iter": 7011, "iter_tflops": 40.04837762363638, "iter_time": 0.5151542892456055, "loss": 0.07706256210803986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.07500183932404, "step_time": 0.46809058761596684} +{"epoch": 0, "iter": 7012, "iter_tflops": 41.50864682762796, "iter_time": 0.49703122329711913, "loss": 0.12075150012969971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75374189162399, "step_time": 0.4509159832000732} +{"epoch": 0, "iter": 7013, "iter_tflops": 44.619273673383944, "iter_time": 0.4623807563781739, "loss": 0.05893697217106819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39494403846726, "step_time": 0.41767621994018556} +{"epoch": 0, "iter": 7014, "iter_tflops": 46.71198918020339, "iter_time": 0.4416659164428711, "loss": 0.06706196069717407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.91583366742401, "step_time": 0.40519995498657224} +{"epoch": 0, "iter": 7015, "iter_tflops": 52.951048581921285, "iter_time": 0.3896257781982422, "loss": 0.03681696951389313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.74480876283697, "step_time": 0.3572804889678955} +{"epoch": 0, "iter": 7016, "iter_tflops": 53.74122961286349, "iter_time": 0.38389693832397453, "loss": 0.023140013217926025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.76913996142732, "step_time": 0.35105318069458} +{"epoch": 0, "iter": 7017, "iter_tflops": 22.930261021718028, "iter_time": 0.8997321701049805, "loss": 0.3781546354293823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.020631237239304, "step_time": 0.8588905639648438} +{"epoch": 0, "iter": 7018, "iter_tflops": 11.976222431111957, "iter_time": 1.722671203613281, "loss": 0.2684544026851654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.183494162300263, "step_time": 1.2748231811523438} +{"epoch": 0, "iter": 7019, "iter_tflops": 36.59661800998208, "iter_time": 0.5637431716918946, "loss": 0.20067338645458221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30735804214855, "step_time": 0.5118443508148193} +{"epoch": 0, "iter": 7020, "iter_tflops": 38.902469902369965, "iter_time": 0.530328628540039, "loss": 0.2728360891342163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42788766629506, "step_time": 0.48626256561279296} +{"epoch": 0, "iter": 7021, "iter_tflops": 13.50167631656052, "iter_time": 1.092042221069336, "loss": 0.11198928952217102, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 14.61739856027553, "step_time": 1.0086884155273437} +{"epoch": 0, "iter": 7022, "iter_tflops": 10.922523392370477, "iter_time": 1.3499078979492187, "loss": 0.14027874171733856, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 15.379886742529985, "step_time": 0.958680700302124} +{"epoch": 0, "iter": 7023, "iter_tflops": 28.422851027142173, "iter_time": 0.5187516403198241, "loss": 0.10951439291238785, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 31.24023321669406, "step_time": 0.47196832656860355} +{"epoch": 0, "iter": 7024, "iter_tflops": 33.59642938730132, "iter_time": 0.43886808395385746, "loss": 0.08858764171600342, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 36.93713101803703, "step_time": 0.3991755771636963} +{"epoch": 0, "iter": 7025, "iter_tflops": 18.795517991882335, "iter_time": 1.0976602783203127, "loss": 0.19238145649433136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.27498424497188, "step_time": 1.0175639724731447} +{"epoch": 0, "iter": 7026, "iter_tflops": 38.515567367338654, "iter_time": 0.5356559677124023, "loss": 0.22170163691043854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.965827598352895, "step_time": 0.42133656311035156} +{"epoch": 0, "iter": 7027, "iter_tflops": 50.343607323723425, "iter_time": 0.40980562591552744, "loss": 0.2257780134677887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.711101840835475, "step_time": 0.37709153747558594} +{"epoch": 0, "iter": 7028, "iter_tflops": 49.69559263564085, "iter_time": 0.41514936065673824, "loss": 0.21343573927879333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.70045519161992, "step_time": 0.3841884288787842} +{"epoch": 0, "iter": 7029, "iter_tflops": 32.42704371575224, "iter_time": 0.6362310943603515, "loss": 0.058183539658784866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.60186984187407, "step_time": 0.5962421569824219} +{"epoch": 0, "iter": 7030, "iter_tflops": 16.309759947338947, "iter_time": 1.264953842163086, "loss": 0.07421513646841049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.585313603890356, "step_time": 1.0533961277008055} +{"epoch": 0, "iter": 7031, "iter_tflops": 39.76686409562304, "iter_time": 0.5188011169433594, "loss": 0.0713554099202156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77355390928532, "step_time": 0.4713141078948974} +{"epoch": 0, "iter": 7032, "iter_tflops": 39.43920577953471, "iter_time": 0.5231112823486327, "loss": 0.11971181631088257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05242432855595, "step_time": 0.4792086353302002} +{"epoch": 0, "iter": 7033, "iter_tflops": 16.03248807610154, "iter_time": 1.2868304290771484, "loss": 0.8097180724143982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.105974838243498, "step_time": 1.2060752868652345} +{"epoch": 0, "iter": 7034, "iter_tflops": 20.49778441020605, "iter_time": 1.0065035858154299, "loss": 0.6472159028053284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.176985480865103, "step_time": 0.7591384086608887} +{"epoch": 0, "iter": 7035, "iter_tflops": 35.746862247656644, "iter_time": 0.5771441802978515, "loss": 0.6669331789016724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57517965897784, "step_time": 0.5348281898498536} +{"epoch": 0, "iter": 7036, "iter_tflops": 35.52435813211374, "iter_time": 0.5807590789794922, "loss": 0.7173519134521484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.55801663614615, "step_time": 0.5350662536621094} +{"epoch": 0, "iter": 7037, "iter_tflops": 18.298016795169957, "iter_time": 1.1275043487548828, "loss": 0.15370124578475952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.440247393124118, "step_time": 1.061256736755371} +{"epoch": 0, "iter": 7038, "iter_tflops": 25.73843891329166, "iter_time": 0.801567398071289, "loss": 0.20462357997894287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.080296475276146, "step_time": 0.7094526538848878} +{"epoch": 0, "iter": 7039, "iter_tflops": 44.09838376369956, "iter_time": 0.46784239578247067, "loss": 0.27071040868759155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.272753337539136, "step_time": 0.4273858871459961} +{"epoch": 0, "iter": 7040, "iter_tflops": 49.799039125762796, "iter_time": 0.41428697967529293, "loss": 0.18243379890918732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.19387940225847, "step_time": 0.38069047164916986} +{"epoch": 0, "iter": 7041, "iter_tflops": 28.516734761437302, "iter_time": 0.7234732055664062, "loss": 0.3511500656604767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.19657367013851, "step_time": 0.6832263069152831} +{"epoch": 0, "iter": 7042, "iter_tflops": 14.824985194039915, "iter_time": 1.3916434478759765, "loss": 0.4574165344238281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.93708788741261, "step_time": 1.1501919174194337} +{"epoch": 0, "iter": 7043, "iter_tflops": 40.4881428891481, "iter_time": 0.5095588989257813, "loss": 0.48321229219436646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35623727881457, "step_time": 0.46512271499633795} +{"epoch": 0, "iter": 7044, "iter_tflops": 45.60737551039286, "iter_time": 0.452363094329834, "loss": 0.45208218693733215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.68533611577418, "step_time": 0.41523505973815916} +{"epoch": 0, "iter": 7045, "iter_tflops": 19.37316951529824, "iter_time": 1.0649312438964844, "loss": 0.6067554950714111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62668449967932, "step_time": 1.000213752746582} +{"epoch": 0, "iter": 7046, "iter_tflops": 26.015050676168794, "iter_time": 0.7930445251464844, "loss": 0.5866722464561462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.394746121954377, "step_time": 0.6571511497497559} +{"epoch": 0, "iter": 7047, "iter_tflops": 47.11989058095895, "iter_time": 0.4378425598144532, "loss": 0.7862446308135986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84796464757507, "step_time": 0.40574079322814943} +{"epoch": 0, "iter": 7048, "iter_tflops": 47.65250240495935, "iter_time": 0.43294879531860353, "loss": 0.6761960387229919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.25373515498766, "step_time": 0.40252858543396} +{"epoch": 0, "iter": 7049, "iter_tflops": 29.133032739465836, "iter_time": 0.7081684112548828, "loss": 0.8233197927474976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.242135960738235, "step_time": 0.6603611717224122} +{"epoch": 0, "iter": 7050, "iter_tflops": 25.94394426510142, "iter_time": 0.7952180786132813, "loss": 0.6359947919845581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.964111333350644, "step_time": 0.6454455528259277} +{"epoch": 0, "iter": 7051, "iter_tflops": 43.95945752728608, "iter_time": 0.46932093048095697, "loss": 0.8752890229225159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.377173210470986, "step_time": 0.4354648475646973} +{"epoch": 0, "iter": 7052, "iter_tflops": 47.74943640892752, "iter_time": 0.43206988525390627, "loss": 0.8439453840255737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.34866554376084, "step_time": 0.40178441429138184} +{"epoch": 0, "iter": 7053, "iter_tflops": 39.70028525948986, "iter_time": 0.5196711654663086, "loss": 0.743748128414154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.92391347182725, "step_time": 0.48064334869384767} +{"epoch": 0, "iter": 7054, "iter_tflops": 46.06094489184324, "iter_time": 0.4479086036682129, "loss": 0.7220436930656433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.1321304755734, "step_time": 0.411534345626831} +{"epoch": 0, "iter": 7055, "iter_tflops": 47.09882209190737, "iter_time": 0.4380384178161621, "loss": 0.7036666870117188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.67180971396534, "step_time": 0.4071513061523438} +{"epoch": 0, "iter": 7056, "iter_tflops": 46.72603402151315, "iter_time": 0.4415331611633301, "loss": 0.8007664680480957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.53354307313323, "step_time": 0.40826532745361327} +{"epoch": 0, "iter": 7057, "iter_tflops": 43.02691763786085, "iter_time": 0.4794927139282226, "loss": 0.5294595956802368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83200897323444, "step_time": 0.44053402709960937} +{"epoch": 0, "iter": 7058, "iter_tflops": 35.15230217248492, "iter_time": 0.5869058990478515, "loss": 0.5315229892730713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.521421886144374, "step_time": 0.5355745582580567} +{"epoch": 0, "iter": 7059, "iter_tflops": 37.657941717945505, "iter_time": 0.5478550491333007, "loss": 0.323756605386734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03949254126857, "step_time": 0.5027131729125977} +{"epoch": 0, "iter": 7060, "iter_tflops": 39.4201664209163, "iter_time": 0.5233639373779297, "loss": 0.41653546690940857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05713237596035, "step_time": 0.4791562366485595} +{"epoch": 0, "iter": 7061, "iter_tflops": 32.46212677286316, "iter_time": 0.6355434951782226, "loss": 0.326942503452301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.779260008734646, "step_time": 0.57662158203125} +{"epoch": 0, "iter": 7062, "iter_tflops": 37.89467586330672, "iter_time": 0.5444325103759766, "loss": 0.3208540380001068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53861009869461, "step_time": 0.4966726970672608} +{"epoch": 0, "iter": 7063, "iter_tflops": 41.744470823347555, "iter_time": 0.49422338104248054, "loss": 0.43038854002952576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.5261025580371, "step_time": 0.45317065048217775} +{"epoch": 0, "iter": 7064, "iter_tflops": 44.366550492325715, "iter_time": 0.46501459503173825, "loss": 0.40278151631355286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47038118286592, "step_time": 0.42564331054687504} +{"epoch": 0, "iter": 7065, "iter_tflops": 31.7242621443162, "iter_time": 0.6503254013061524, "loss": 0.24181580543518066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.2519277845053, "step_time": 0.585247241973877} +{"epoch": 0, "iter": 7066, "iter_tflops": 34.834131552286884, "iter_time": 0.5922666244506837, "loss": 0.18897491693496704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25746454615083, "step_time": 0.5392697544097901} +{"epoch": 0, "iter": 7067, "iter_tflops": 41.174159160968536, "iter_time": 0.5010689697265625, "loss": 0.1619320511817932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82328969909148, "step_time": 0.4602762012481689} +{"epoch": 0, "iter": 7068, "iter_tflops": 38.824090309205616, "iter_time": 0.5313992767333985, "loss": 0.2203972488641739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41389852258734, "step_time": 0.48642294692993165} +{"epoch": 0, "iter": 7069, "iter_tflops": 18.76216816214432, "iter_time": 1.0996113739013673, "loss": 0.5676359534263611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.101771501151863, "step_time": 1.0263321075439453} +{"epoch": 0, "iter": 7070, "iter_tflops": 31.65239952031632, "iter_time": 0.6518018798828125, "loss": 0.7110785245895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.25718226775362, "step_time": 0.585160020828247} +{"epoch": 0, "iter": 7071, "iter_tflops": 46.99506302841364, "iter_time": 0.4390055503845215, "loss": 0.6350451707839966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77450041791159, "step_time": 0.40632784843444825} +{"epoch": 0, "iter": 7072, "iter_tflops": 48.21978788410721, "iter_time": 0.4278553352355957, "loss": 0.5929080247879028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14028794908313, "step_time": 0.39568430328369136} +{"epoch": 0, "iter": 7073, "iter_tflops": 29.68479820106623, "iter_time": 0.6950053482055665, "loss": 0.056200526654720306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.471535850316794, "step_time": 0.6555477180480956} +{"epoch": 0, "iter": 7074, "iter_tflops": 20.88362989445617, "iter_time": 0.9879074478149413, "loss": 0.037789177149534225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.39263400326398, "step_time": 0.8819482879638672} +{"epoch": 0, "iter": 7075, "iter_tflops": 49.06211831447614, "iter_time": 0.42050963592529295, "loss": 0.11191340535879135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.50395467154529, "step_time": 0.38559941291809086} +{"epoch": 0, "iter": 7076, "iter_tflops": 54.63082539720043, "iter_time": 0.3776456489562988, "loss": 0.09979512542486191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.526957815800905, "step_time": 0.3465840396881103} +{"epoch": 0, "iter": 7077, "iter_tflops": 25.76157609144407, "iter_time": 0.8008474884033203, "loss": 0.7537803053855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.089544047233655, "step_time": 0.7615888061523437} +{"epoch": 0, "iter": 7078, "iter_tflops": 12.877551503571613, "iter_time": 1.6020975341796875, "loss": 0.5779714584350586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.983619822808455, "step_time": 1.3769098358154297} +{"epoch": 0, "iter": 7079, "iter_tflops": 36.13605860948262, "iter_time": 0.5709281616210937, "loss": 0.661624014377594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.69432304030096, "step_time": 0.5197492218017579} +{"epoch": 0, "iter": 7080, "iter_tflops": 39.781181162556436, "iter_time": 0.5186144027709961, "loss": 0.7153686285018921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39374865905452, "step_time": 0.4754393005371093} +{"epoch": 0, "iter": 7081, "iter_tflops": 35.896867350614016, "iter_time": 0.574732421875, "loss": 0.4946334660053253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66575871017391, "step_time": 0.5201235065460205} +{"epoch": 0, "iter": 7082, "iter_tflops": 36.68401212017464, "iter_time": 0.5624001388549805, "loss": 0.384372353553772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.51150547345655, "step_time": 0.5092650413513183} +{"epoch": 0, "iter": 7083, "iter_tflops": 38.214716719886376, "iter_time": 0.5398729934692383, "loss": 0.34818512201309204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75703941312929, "step_time": 0.4940746231079102} +{"epoch": 0, "iter": 7084, "iter_tflops": 35.50613891326125, "iter_time": 0.5810570831298827, "loss": 0.3732267916202545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.51098287664342, "step_time": 0.5357197341918944} +{"epoch": 0, "iter": 7085, "iter_tflops": 14.620557482720072, "iter_time": 0.8692475357055663, "loss": 0.06309142708778381, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 15.61075508415069, "step_time": 0.8141107521057129} +{"epoch": 0, "iter": 7086, "iter_tflops": 10.9977957932973, "iter_time": 1.1555846099853515, "loss": 0.095615454018116, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 15.902177227033997, "step_time": 0.7991914176940917} +{"epoch": 0, "iter": 7087, "iter_tflops": 34.46379816278546, "iter_time": 0.3687603874206543, "loss": 0.0635429322719574, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 37.61374394766849, "step_time": 0.3378787174224854} +{"epoch": 0, "iter": 7088, "iter_tflops": 35.21181841676757, "iter_time": 0.3609266471862793, "loss": 0.10071538388729095, "lr": 3e-05, "seqlen": 5104.0, "step_tflops": 38.20830727264012, "step_time": 0.33262095260620117} +{"epoch": 0, "iter": 7089, "iter_tflops": 44.47064103580641, "iter_time": 0.463926155090332, "loss": 0.13981463015079498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.356290855991766, "step_time": 0.4266475601196289} +{"epoch": 0, "iter": 7090, "iter_tflops": 8.098660140221256, "iter_time": 2.547469970703125, "loss": 0.09423468261957169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.144406640012836, "step_time": 2.0337407836914063} +{"epoch": 0, "iter": 7091, "iter_tflops": 14.763668763866857, "iter_time": 1.3974232177734374, "loss": 0.13070344924926758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.849790870547608, "step_time": 1.2244124374389647} +{"epoch": 0, "iter": 7092, "iter_tflops": 22.56883887752451, "iter_time": 0.9141406707763672, "loss": 0.18720510601997375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.91072253754368, "step_time": 0.6465254268646241} +{"epoch": 0, "iter": 7093, "iter_tflops": 25.119881995393893, "iter_time": 0.6717329330444336, "loss": 0.2564501166343689, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.716107391211903, "step_time": 0.6315984497070313} +{"epoch": 0, "iter": 7094, "iter_tflops": 23.161820939727097, "iter_time": 0.7285200958251953, "loss": 0.3489498794078827, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 25.907441583458937, "step_time": 0.6513129425048827} +{"epoch": 0, "iter": 7095, "iter_tflops": 29.87439054975138, "iter_time": 0.5648266525268555, "loss": 0.2649344205856323, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 31.817321684243684, "step_time": 0.5303353996276855} +{"epoch": 0, "iter": 7096, "iter_tflops": 29.692959898656692, "iter_time": 0.5682778701782226, "loss": 0.23627623915672302, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 31.667248064454064, "step_time": 0.5328487014770508} +{"epoch": 0, "iter": 7097, "iter_tflops": 31.82395681932658, "iter_time": 0.6482881317138671, "loss": 0.8045278191566467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.73173158376705, "step_time": 0.6116227226257324} +{"epoch": 0, "iter": 7098, "iter_tflops": 14.929308124711879, "iter_time": 1.3819189300537107, "loss": 0.7792884707450867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.330995982495548, "step_time": 1.0672545547485353} +{"epoch": 0, "iter": 7099, "iter_tflops": 39.5820533932263, "iter_time": 0.5212234268188476, "loss": 0.8168691992759705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91821546694723, "step_time": 0.480707160949707} +{"epoch": 0, "iter": 7100, "iter_tflops": 37.03437638278911, "iter_time": 0.5570795440673828, "loss": 1.0171706676483154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.077290812028764, "step_time": 0.5147826385498047} +{"epoch": 0, "iter": 7101, "iter_tflops": 14.725821128218865, "iter_time": 0.9569416580200195, "loss": 0.04521928355097771, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 15.646364571631116, "step_time": 0.9006406326293946} +{"epoch": 0, "iter": 7102, "iter_tflops": 7.542144484920914, "iter_time": 1.8684011840820312, "loss": 0.041248761117458344, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 9.080188635773313, "step_time": 1.5519227905273438} +{"epoch": 0, "iter": 7103, "iter_tflops": 10.762554191731043, "iter_time": 1.30933154296875, "loss": 0.04002148285508156, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 11.800690091920028, "step_time": 1.1941464080810549} +{"epoch": 0, "iter": 7104, "iter_tflops": 29.23806018889949, "iter_time": 0.4819660263061523, "loss": 0.023836487904191017, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 32.37749047089296, "step_time": 0.4352329807281494} +{"epoch": 0, "iter": 7105, "iter_tflops": 23.52852453287775, "iter_time": 0.7328851470947265, "loss": 0.2785324156284332, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 25.646885557507037, "step_time": 0.6723508834838867} +{"epoch": 0, "iter": 7106, "iter_tflops": 26.631616875228847, "iter_time": 0.6474900207519532, "loss": 0.19311052560806274, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.696452384251344, "step_time": 0.600900276184082} +{"epoch": 0, "iter": 7107, "iter_tflops": 26.971315479550096, "iter_time": 0.6393350067138672, "loss": 0.27411380410194397, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 29.062674505863033, "step_time": 0.5933282623291015} +{"epoch": 0, "iter": 7108, "iter_tflops": 25.44088214874726, "iter_time": 0.6777951354980469, "loss": 0.4742041826248169, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 27.456097473493816, "step_time": 0.6280465087890625} +{"epoch": 0, "iter": 7109, "iter_tflops": 17.040133439110367, "iter_time": 1.2107354431152342, "loss": 0.40782231092453003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.991216095061894, "step_time": 1.146731460571289} +{"epoch": 0, "iter": 7110, "iter_tflops": 19.215028639879886, "iter_time": 1.0736956939697266, "loss": 0.46818026900291443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.72943896411811, "step_time": 0.9494535751342774} +{"epoch": 0, "iter": 7111, "iter_tflops": 39.87805123568172, "iter_time": 0.5173546066284179, "loss": 0.4061276614665985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.885227334640184, "step_time": 0.4701147689819336} +{"epoch": 0, "iter": 7112, "iter_tflops": 40.37067010874503, "iter_time": 0.5110416412353516, "loss": 0.5224151015281677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.036219025426405, "step_time": 0.46850283622741695} +{"epoch": 0, "iter": 7113, "iter_tflops": 24.265100121193612, "iter_time": 0.8502373123168946, "loss": 0.5368950963020325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.015028401727815, "step_time": 0.7930452041625976} +{"epoch": 0, "iter": 7114, "iter_tflops": 9.415420566965844, "iter_time": 2.1912025451660155, "loss": 0.7836160063743591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.611517016647221, "step_time": 1.9442171630859373} +{"epoch": 0, "iter": 7115, "iter_tflops": 12.454420777876242, "iter_time": 1.6565277404785157, "loss": 0.9044788479804993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54674254074656, "step_time": 1.2468371620178222} +{"epoch": 0, "iter": 7116, "iter_tflops": 27.76783717314207, "iter_time": 0.742985252380371, "loss": 0.7090094089508057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.62784254670602, "step_time": 0.6736058368682861} +{"epoch": 0, "iter": 7117, "iter_tflops": 14.257729128035072, "iter_time": 1.1000839996337892, "loss": 0.23627185821533203, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.399091603157318, "step_time": 1.0185470733642576} +{"epoch": 0, "iter": 7118, "iter_tflops": 17.659889442896215, "iter_time": 0.8881539001464844, "loss": 0.23477745056152344, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.881171246353563, "step_time": 0.7511408004760742} +{"epoch": 0, "iter": 7119, "iter_tflops": 27.947717835045466, "iter_time": 0.5612157592773437, "loss": 0.3226815462112427, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.795346831380858, "step_time": 0.5264144020080567} +{"epoch": 0, "iter": 7120, "iter_tflops": 29.00992802702068, "iter_time": 0.5406666183471679, "loss": 0.20584538578987122, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 30.60768799115575, "step_time": 0.5124431381225585} +{"epoch": 0, "iter": 7121, "iter_tflops": 25.72275035061214, "iter_time": 0.8020562820434571, "loss": 0.18284550309181213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.23813207139807, "step_time": 0.757434226989746} +{"epoch": 0, "iter": 7122, "iter_tflops": 18.08582010288941, "iter_time": 1.1407330932617186, "loss": 0.09979367256164551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.65695493802503, "step_time": 0.8720942134857179} +{"epoch": 0, "iter": 7123, "iter_tflops": 45.832749882456156, "iter_time": 0.45013867950439457, "loss": 0.13026154041290283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.490613592811364, "step_time": 0.4168688163757324} +{"epoch": 0, "iter": 7124, "iter_tflops": 50.357580945928696, "iter_time": 0.40969190979003905, "loss": 0.14348167181015015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.060738923607005, "step_time": 0.37469699668884276} +{"epoch": 0, "iter": 7125, "iter_tflops": 21.018672592187453, "iter_time": 0.9815602493286133, "loss": 0.6001402735710144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.961080448077492, "step_time": 0.9394389114379884} +{"epoch": 0, "iter": 7126, "iter_tflops": 14.123092347046121, "iter_time": 1.4608056793212891, "loss": 0.5803062319755554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.81643814392305, "step_time": 1.1579808120727542} +{"epoch": 0, "iter": 7127, "iter_tflops": 33.99417263651057, "iter_time": 0.6069008865356444, "loss": 0.669896125793457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.08211271141428, "step_time": 0.5563624076843262} +{"epoch": 0, "iter": 7128, "iter_tflops": 37.2662736402711, "iter_time": 0.5536129989624023, "loss": 0.8660265207290649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.34100813859073, "step_time": 0.5114174003601074} +{"epoch": 0, "iter": 7129, "iter_tflops": 19.134199744918696, "iter_time": 1.0782313232421874, "loss": 0.7704325318336487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.396038626032063, "step_time": 1.0115245361328125} +{"epoch": 0, "iter": 7130, "iter_tflops": 15.560383644073314, "iter_time": 1.3258730621337889, "loss": 0.6095644235610962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.673448647789588, "step_time": 1.1673496170043944} +{"epoch": 0, "iter": 7131, "iter_tflops": 35.02788061584307, "iter_time": 0.5889906311035156, "loss": 0.8165145516395569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.013289975913594, "step_time": 0.542733699798584} +{"epoch": 0, "iter": 7132, "iter_tflops": 41.93348168543406, "iter_time": 0.491995719909668, "loss": 0.6386772394180298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54963541815337, "step_time": 0.45293652343750007} +{"epoch": 0, "iter": 7133, "iter_tflops": 20.333413282882926, "iter_time": 1.0146399536132813, "loss": 0.7722955346107483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.830494755669427, "step_time": 0.9450584487915039} +{"epoch": 0, "iter": 7134, "iter_tflops": 10.339270787419554, "iter_time": 1.9954108886718749, "loss": 0.7990877032279968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.214343422788357, "step_time": 1.6890873947143554} +{"epoch": 0, "iter": 7135, "iter_tflops": 21.05686970671344, "iter_time": 0.9797797012329101, "loss": 0.9847177863121033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.528483421944454, "step_time": 0.8081597785949707} +{"epoch": 0, "iter": 7136, "iter_tflops": 44.18206564297833, "iter_time": 0.4669562911987304, "loss": 0.9431267976760864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.88587038158457, "step_time": 0.4308388538360596} +{"epoch": 0, "iter": 7137, "iter_tflops": 17.094061135865594, "iter_time": 0.8529936218261718, "loss": 0.18330621719360352, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 17.966030071354904, "step_time": 0.8115941619873046} +{"epoch": 0, "iter": 7138, "iter_tflops": 8.807614484170326, "iter_time": 1.6555135498046873, "loss": 0.23857641220092773, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 11.56606554571811, "step_time": 1.2606815223693848} +{"epoch": 0, "iter": 7139, "iter_tflops": 22.170005012423154, "iter_time": 0.6576960678100586, "loss": 0.3096546530723572, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.927709308930545, "step_time": 0.6093824081420899} +{"epoch": 0, "iter": 7140, "iter_tflops": 21.51656035771775, "iter_time": 0.6776698913574218, "loss": 0.3598472476005554, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.042535272952758, "step_time": 0.6327917022705078} +{"epoch": 0, "iter": 7141, "iter_tflops": 18.976775660288794, "iter_time": 1.0871759185791017, "loss": 0.14470174908638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.385179398098185, "step_time": 1.012063377380371} +{"epoch": 0, "iter": 7142, "iter_tflops": 26.297437770061418, "iter_time": 0.7845286560058594, "loss": 0.22830307483673096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.45784500960506, "step_time": 0.6356273345947265} +{"epoch": 0, "iter": 7143, "iter_tflops": 51.71919784791901, "iter_time": 0.39890590667724607, "loss": 0.2821410894393921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.51339588125419, "step_time": 0.36506554222106935} +{"epoch": 0, "iter": 7144, "iter_tflops": 46.47918792288543, "iter_time": 0.4438780975341797, "loss": 0.2302556335926056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3915138880193, "step_time": 0.4094160289764404} +{"epoch": 0, "iter": 7145, "iter_tflops": 36.34712956466538, "iter_time": 0.5676127319335937, "loss": 0.13703785836696625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.092539449023356, "step_time": 0.5277501487731934} +{"epoch": 0, "iter": 7146, "iter_tflops": 26.708628838836088, "iter_time": 0.7724504928588867, "loss": 0.10619243234395981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.84415048265496, "step_time": 0.6912943801879882} +{"epoch": 0, "iter": 7147, "iter_tflops": 49.80378641200737, "iter_time": 0.4142474899291992, "loss": 0.20016612112522125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.60122473136058, "step_time": 0.3778503799438477} +{"epoch": 0, "iter": 7148, "iter_tflops": 50.4971363008799, "iter_time": 0.4085596733093262, "loss": 0.16709990799427032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.632177439214544, "step_time": 0.37763630294799805} +{"epoch": 0, "iter": 7149, "iter_tflops": 22.671632071078736, "iter_time": 0.9099959564208985, "loss": 0.6868149638175964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.870359077066432, "step_time": 0.864297576904297} +{"epoch": 0, "iter": 7150, "iter_tflops": 17.261887597935168, "iter_time": 1.1951817779541016, "loss": 0.6837472915649414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95990359693466, "step_time": 0.9843124237060548} +{"epoch": 0, "iter": 7151, "iter_tflops": 36.84344649836325, "iter_time": 0.559966438293457, "loss": 0.6571341753005981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.191859364118876, "step_time": 0.5133152294158935} +{"epoch": 0, "iter": 7152, "iter_tflops": 39.60519987752394, "iter_time": 0.5209188079833985, "loss": 0.7851726412773132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.813444166121684, "step_time": 0.4818835277557373} +{"epoch": 0, "iter": 7153, "iter_tflops": 28.01073913217936, "iter_time": 0.7365422744750977, "loss": 0.2555810809135437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.034711864718172, "step_time": 0.6869083213806153} +{"epoch": 0, "iter": 7154, "iter_tflops": 10.453615963250195, "iter_time": 1.9735844116210937, "loss": 0.2138351947069168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.0787252699898, "step_time": 1.5774544601440428} +{"epoch": 0, "iter": 7155, "iter_tflops": 11.349436407058517, "iter_time": 1.8178077545166016, "loss": 0.15401870012283325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.937512212310374, "step_time": 1.3811599426269532} +{"epoch": 0, "iter": 7156, "iter_tflops": 42.226393494406366, "iter_time": 0.488582893371582, "loss": 0.23360957205295563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.389488672302676, "step_time": 0.444736385345459} +{"epoch": 0, "iter": 7157, "iter_tflops": 19.277332316958255, "iter_time": 0.8306307678222655, "loss": 0.1524718552827835, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 20.25226108528198, "step_time": 0.7906448211669922} +{"epoch": 0, "iter": 7158, "iter_tflops": 8.351415108415068, "iter_time": 1.9173212127685546, "loss": 0.14650975167751312, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 9.783854337652391, "step_time": 1.636609130859375} +{"epoch": 0, "iter": 7159, "iter_tflops": 24.105770949819014, "iter_time": 0.6642536087036133, "loss": 0.2217969298362732, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 25.9105227327767, "step_time": 0.6179861946105958} +{"epoch": 0, "iter": 7160, "iter_tflops": 24.691614590796238, "iter_time": 0.6484932479858398, "loss": 0.10901588946580887, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.45134279376068, "step_time": 0.6053509445190429} +{"epoch": 0, "iter": 7161, "iter_tflops": 23.775642434069987, "iter_time": 0.8677407379150391, "loss": 0.843843936920166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.38646227734733, "step_time": 0.8126809196472169} +{"epoch": 0, "iter": 7162, "iter_tflops": 9.933908444574195, "iter_time": 2.076835479736328, "loss": 0.8315654993057251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.700503014055277, "step_time": 1.7632655181884764} +{"epoch": 0, "iter": 7163, "iter_tflops": 14.11803694829305, "iter_time": 1.4613287658691405, "loss": 0.6812010407447815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.809258740841447, "step_time": 1.2273648605346679} +{"epoch": 0, "iter": 7164, "iter_tflops": 39.72195745254762, "iter_time": 0.5193876342773437, "loss": 0.7753415703773499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88240563158196, "step_time": 0.48110858535766604} +{"epoch": 0, "iter": 7165, "iter_tflops": 15.949452617542793, "iter_time": 0.93724853515625, "loss": 0.2398233711719513, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.6727561949134, "step_time": 0.896588478088379} +{"epoch": 0, "iter": 7166, "iter_tflops": 6.56590345941078, "iter_time": 2.276701324462891, "loss": 0.19987894594669342, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 8.104895417999439, "step_time": 1.8443916091918946} +{"epoch": 0, "iter": 7167, "iter_tflops": 9.779225367234382, "iter_time": 1.528607894897461, "loss": 0.16165290772914886, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.321596813268986, "step_time": 1.122132827758789} +{"epoch": 0, "iter": 7168, "iter_tflops": 27.948597786410474, "iter_time": 0.5348605041503907, "loss": 0.37914472818374634, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.69196109951167, "step_time": 0.5034561729431153} +{"epoch": 0, "iter": 7169, "iter_tflops": 23.98003338222264, "iter_time": 0.6046495208740233, "loss": 0.21222509443759918, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.786910583200694, "step_time": 0.5622820014953614} +{"epoch": 0, "iter": 7170, "iter_tflops": 22.14229901225893, "iter_time": 0.6548333435058594, "loss": 0.2250547707080841, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.858154307846657, "step_time": 0.6077383651733399} +{"epoch": 0, "iter": 7171, "iter_tflops": 22.2058834613789, "iter_time": 0.6529582901000976, "loss": 0.20465801656246185, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.86770363855816, "step_time": 0.6074952125549317} +{"epoch": 0, "iter": 7172, "iter_tflops": 24.581997902840754, "iter_time": 0.5898428497314454, "loss": 0.2444174885749817, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.282243797389775, "step_time": 0.5516848487854004} +{"epoch": 0, "iter": 7173, "iter_tflops": 30.159349444814964, "iter_time": 0.6840695800781251, "loss": 0.2225600928068161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.43251296601073, "step_time": 0.6170967025756836} +{"epoch": 0, "iter": 7174, "iter_tflops": 41.3060050047078, "iter_time": 0.4994695930480957, "loss": 0.2517602741718292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31898300603252, "step_time": 0.45524175834655767} +{"epoch": 0, "iter": 7175, "iter_tflops": 39.5993398275602, "iter_time": 0.5209958953857422, "loss": 0.2800273299217224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27718329988124, "step_time": 0.476719877243042} +{"epoch": 0, "iter": 7176, "iter_tflops": 44.898155963978915, "iter_time": 0.4595087051391601, "loss": 0.3033187985420227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.889510532383675, "step_time": 0.42199427413940427} +{"epoch": 0, "iter": 7177, "iter_tflops": 19.33451065224401, "iter_time": 1.067060546875, "loss": 0.29961875081062317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.947024377997575, "step_time": 0.9849176254272461} +{"epoch": 0, "iter": 7178, "iter_tflops": 31.45622855184331, "iter_time": 0.6558667221069336, "loss": 0.2322617769241333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.520457105714414, "step_time": 0.5808228607177734} +{"epoch": 0, "iter": 7179, "iter_tflops": 47.644391162156296, "iter_time": 0.4330225028991699, "loss": 0.233485609292984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75864464585388, "step_time": 0.39860188865661617} +{"epoch": 0, "iter": 7180, "iter_tflops": 52.80684861044063, "iter_time": 0.3906897315979004, "loss": 0.29203349351882935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.280701986375, "step_time": 0.3601752910614014} +{"epoch": 0, "iter": 7181, "iter_tflops": 32.75540174636076, "iter_time": 0.4389241523742676, "loss": 0.06840461492538452, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 35.841132708297586, "step_time": 0.40113511657714845} +{"epoch": 0, "iter": 7182, "iter_tflops": 28.274145587361957, "iter_time": 0.5084905891418458, "loss": 0.05057172104716301, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 31.37821591468781, "step_time": 0.4581884765625} +{"epoch": 0, "iter": 7183, "iter_tflops": 31.704208083719642, "iter_time": 0.4534772453308106, "loss": 0.040125712752342224, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 34.970908993493524, "step_time": 0.41111705017089845} +{"epoch": 0, "iter": 7184, "iter_tflops": 29.984676443335463, "iter_time": 0.47948281097412115, "loss": 0.03684670105576515, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 32.981659154518496, "step_time": 0.4359130897521973} +{"epoch": 0, "iter": 7185, "iter_tflops": 19.257737054433314, "iter_time": 1.0713145294189452, "loss": 0.061846569180488586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.56311327463529, "step_time": 1.0033059310913088} +{"epoch": 0, "iter": 7186, "iter_tflops": 18.71840918088518, "iter_time": 1.1021819915771487, "loss": 0.04262499138712883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.636521717517187, "step_time": 0.8374190864562988} +{"epoch": 0, "iter": 7187, "iter_tflops": 39.9194384515154, "iter_time": 0.516818229675293, "loss": 0.09022284299135208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93862010780141, "step_time": 0.46954350090026853} +{"epoch": 0, "iter": 7188, "iter_tflops": 48.1045250086174, "iter_time": 0.4288805160522461, "loss": 0.09544461965560913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78437747823752, "step_time": 0.3908560543060303} +{"epoch": 0, "iter": 7189, "iter_tflops": 22.408101131782974, "iter_time": 0.9206979827880859, "loss": 0.6358617544174194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.017853033341623, "step_time": 0.8589899139404297} +{"epoch": 0, "iter": 7190, "iter_tflops": 13.929221047853561, "iter_time": 1.481137634277344, "loss": 0.6759518980979919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.196274441574232, "step_time": 1.0747446632385254} +{"epoch": 0, "iter": 7191, "iter_tflops": 38.64765565932827, "iter_time": 0.5338252258300781, "loss": 0.749582827091217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.363389178391806, "step_time": 0.4870029029846191} +{"epoch": 0, "iter": 7192, "iter_tflops": 35.70344557854085, "iter_time": 0.5778460083007813, "loss": 0.5828049778938293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.09187386741775, "step_time": 0.5277591342926026} +{"epoch": 0, "iter": 7193, "iter_tflops": 32.360020484703156, "iter_time": 0.6093682174682618, "loss": 0.039380934089422226, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 35.50207535627507, "step_time": 0.5554370498657226} +{"epoch": 0, "iter": 7194, "iter_tflops": 32.35524565090265, "iter_time": 0.6094581451416015, "loss": 0.05067726969718933, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 35.869510701708826, "step_time": 0.5497473373413085} +{"epoch": 0, "iter": 7195, "iter_tflops": 39.555835288103076, "iter_time": 0.49851476669311523, "loss": 0.031028058379888535, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 43.66540779017175, "step_time": 0.4515970191955566} +{"epoch": 0, "iter": 7196, "iter_tflops": 43.67116809953623, "iter_time": 0.45153745269775386, "loss": 0.018306540325284004, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 48.07168819641572, "step_time": 0.4102033596038818} +{"epoch": 0, "iter": 7197, "iter_tflops": 13.695413584040503, "iter_time": 1.1362862701416017, "loss": 0.14880090951919556, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.563471306494831, "step_time": 1.0685577697753905} +{"epoch": 0, "iter": 7198, "iter_tflops": 11.989970279634662, "iter_time": 1.297910675048828, "loss": 0.1934022158384323, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.423814945198728, "step_time": 1.0789039154052735} +{"epoch": 0, "iter": 7199, "iter_tflops": 24.91043634959044, "iter_time": 0.624714485168457, "loss": 0.2306622713804245, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 26.78656518566089, "step_time": 0.5809595336914063} +{"epoch": 0, "iter": 7200, "iter_tflops": 25.06854691618364, "iter_time": 0.6207743301391602, "loss": 0.26515474915504456, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 26.851423793486898, "step_time": 0.5795562477111816} +{"epoch": 0, "iter": 7201, "iter_tflops": 21.55017994509072, "iter_time": 0.9573513336181639, "loss": 0.9769712686538696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.944157321854423, "step_time": 0.899187240600586} +{"epoch": 0, "iter": 7202, "iter_tflops": 23.05637028937436, "iter_time": 0.8948109893798829, "loss": 0.8795165419578552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.394127650851008, "step_time": 0.7816546840667725} +{"epoch": 0, "iter": 7203, "iter_tflops": 43.41979944620358, "iter_time": 0.4751540489196777, "loss": 1.0994765758514404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.837994555672786, "step_time": 0.44047772979736327} +{"epoch": 0, "iter": 7204, "iter_tflops": 45.55748882182874, "iter_time": 0.4528584442138672, "loss": 1.0361137390136719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.19532614451379, "step_time": 0.41937100791931153} +{"epoch": 0, "iter": 7205, "iter_tflops": 29.450176971798758, "iter_time": 0.7005422592163086, "loss": 0.742841362953186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.145952547567475, "step_time": 0.6624004669189453} +{"epoch": 0, "iter": 7206, "iter_tflops": 16.899292157201263, "iter_time": 1.220825897216797, "loss": 0.8502954840660095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.20895785466069, "step_time": 0.8522090721130372} +{"epoch": 0, "iter": 7207, "iter_tflops": 45.71811747325815, "iter_time": 0.4512673454284668, "loss": 0.8798341155052185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.28054514354413, "step_time": 0.41864580535888674} +{"epoch": 0, "iter": 7208, "iter_tflops": 47.449428173832736, "iter_time": 0.43480173110961917, "loss": 0.829858124256134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26929370566279, "step_time": 0.40240643119812014} +{"epoch": 0, "iter": 7209, "iter_tflops": 27.1459216927235, "iter_time": 0.7600071105957033, "loss": 0.7247411608695984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.713747780154534, "step_time": 0.7185092544555663} +{"epoch": 0, "iter": 7210, "iter_tflops": 17.42987050380105, "iter_time": 1.1836630401611328, "loss": 0.692232608795166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.018608550444895, "step_time": 0.9815632400512695} +{"epoch": 0, "iter": 7211, "iter_tflops": 37.86255629675742, "iter_time": 0.5448943634033203, "loss": 0.7867220044136047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5553834070024, "step_time": 0.49647222137451175} +{"epoch": 0, "iter": 7212, "iter_tflops": 42.141157057850826, "iter_time": 0.4895711212158203, "loss": 0.7648032903671265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.575517610741144, "step_time": 0.45267930221557623} +{"epoch": 0, "iter": 7213, "iter_tflops": 20.25193447282171, "iter_time": 1.0187221145629883, "loss": 0.03629518300294876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.771032246341164, "step_time": 0.9476396560668944} +{"epoch": 0, "iter": 7214, "iter_tflops": 13.919426898768114, "iter_time": 1.4821798095703125, "loss": 0.034694183617830276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.731576331933333, "step_time": 1.233063346862793} +{"epoch": 0, "iter": 7215, "iter_tflops": 54.06982663452717, "iter_time": 0.381563892364502, "loss": 0.04270099103450775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.10247640277792, "step_time": 0.3490732498168945} +{"epoch": 0, "iter": 7216, "iter_tflops": 56.15017689043131, "iter_time": 0.3674270439147949, "loss": 0.025857409462332726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.126304944010634, "step_time": 0.3375157966613769} +{"epoch": 0, "iter": 7217, "iter_tflops": 31.27343564237089, "iter_time": 0.6597002563476563, "loss": 0.23114682734012604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.358630313850824, "step_time": 0.6184634475708009} +{"epoch": 0, "iter": 7218, "iter_tflops": 13.85983508482493, "iter_time": 1.4885525970458984, "loss": 0.21078695356845856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.01065963493549, "step_time": 1.2885848541259766} +{"epoch": 0, "iter": 7219, "iter_tflops": 38.88128722063185, "iter_time": 0.5306175537109376, "loss": 0.29846781492233276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09338617150399, "step_time": 0.47875312995910646} +{"epoch": 0, "iter": 7220, "iter_tflops": 47.14123022066781, "iter_time": 0.4376443595886231, "loss": 0.28287461400032043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.34604304142012, "step_time": 0.40180493545532225} +{"epoch": 0, "iter": 7221, "iter_tflops": 27.258565887840142, "iter_time": 0.7568664321899413, "loss": 0.5555399060249329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.766777723191197, "step_time": 0.7171847229003906} +{"epoch": 0, "iter": 7222, "iter_tflops": 15.750857311615716, "iter_time": 1.3098394012451173, "loss": 0.5032348036766052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.087420986962723, "step_time": 0.934065299987793} +{"epoch": 0, "iter": 7223, "iter_tflops": 37.5954997012987, "iter_time": 0.5487649765014648, "loss": 0.5863865613937378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2630532258159, "step_time": 0.4999895038604736} +{"epoch": 0, "iter": 7224, "iter_tflops": 39.797119722411296, "iter_time": 0.5184067001342774, "loss": 0.5443770885467529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77673763232573, "step_time": 0.47127983093261716} +{"epoch": 0, "iter": 7225, "iter_tflops": 18.558767642227693, "iter_time": 1.1116629028320313, "loss": 0.03839763626456261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.467633571709026, "step_time": 1.0597638092041017} +{"epoch": 0, "iter": 7226, "iter_tflops": 21.99035862764628, "iter_time": 0.9381881332397461, "loss": 0.0335206538438797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.713478057109942, "step_time": 0.7723102722167968} +{"epoch": 0, "iter": 7227, "iter_tflops": 54.943409548108356, "iter_time": 0.3754971466064453, "loss": 0.03526996076107025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.292586017027695, "step_time": 0.3421829261779785} +{"epoch": 0, "iter": 7228, "iter_tflops": 55.25562350695782, "iter_time": 0.37337545394897464, "loss": 0.03024560958147049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.66314461343355, "step_time": 0.34009271430969235} +{"epoch": 0, "iter": 7229, "iter_tflops": 30.117306580238584, "iter_time": 0.6850245208740234, "loss": 0.7000844478607178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.92399758449153, "step_time": 0.6462565803527832} +{"epoch": 0, "iter": 7230, "iter_tflops": 21.823706488514933, "iter_time": 0.945352409362793, "loss": 0.784578800201416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.635007097991075, "step_time": 0.7745856208801271} +{"epoch": 0, "iter": 7231, "iter_tflops": 41.51958360392046, "iter_time": 0.49690029907226557, "loss": 0.8098635673522949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58814409256964, "step_time": 0.46270357131958} +{"epoch": 0, "iter": 7232, "iter_tflops": 44.96427605127771, "iter_time": 0.45883299636840824, "loss": 0.7828570604324341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.467927159321114, "step_time": 0.42566486167907713} +{"epoch": 0, "iter": 7233, "iter_tflops": 33.04986695840093, "iter_time": 0.6242413482666017, "loss": 0.23896801471710205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.34608090696167, "step_time": 0.5836882896423339} +{"epoch": 0, "iter": 7234, "iter_tflops": 14.84217181406008, "iter_time": 1.390031982421875, "loss": 0.21238364279270172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.772388050548436, "step_time": 1.230062973022461} +{"epoch": 0, "iter": 7235, "iter_tflops": 34.33354237680613, "iter_time": 0.6009019775390626, "loss": 0.27162790298461914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.08249427606174, "step_time": 0.5278857936859132} +{"epoch": 0, "iter": 7236, "iter_tflops": 43.79565641514776, "iter_time": 0.47107624816894533, "loss": 0.43819236755371094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.14212781177304, "step_time": 0.4285455265045166} +{"epoch": 0, "iter": 7237, "iter_tflops": 20.236558586024678, "iter_time": 1.0194961471557618, "loss": 0.04117294028401375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.60819743058127, "step_time": 0.9547808685302734} +{"epoch": 0, "iter": 7238, "iter_tflops": 24.774102699996064, "iter_time": 0.8327685470581054, "loss": 0.01696634851396084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.28422303391756, "step_time": 0.5388928356170655} +{"epoch": 0, "iter": 7239, "iter_tflops": 58.120736478089526, "iter_time": 0.35496958160400394, "loss": 0.05335906893014908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.64337139757356, "step_time": 0.324167200088501} +{"epoch": 0, "iter": 7240, "iter_tflops": 52.59210911556311, "iter_time": 0.39228496170043947, "loss": 0.06261347234249115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.32703463134816, "step_time": 0.35988419151306145} +{"epoch": 0, "iter": 7241, "iter_tflops": 47.339600360305965, "iter_time": 0.43581047058105465, "loss": 0.09543675184249878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.80210858437423, "step_time": 0.3982674465179443} +{"epoch": 0, "iter": 7242, "iter_tflops": 28.91805952558981, "iter_time": 0.7134328460693359, "loss": 0.08272729068994522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.04762035863179, "step_time": 0.5723288612365722} +{"epoch": 0, "iter": 7243, "iter_tflops": 55.7051282936821, "iter_time": 0.3703625526428223, "loss": 0.07501262426376343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.54016904629812, "step_time": 0.34078354644775394} +{"epoch": 0, "iter": 7244, "iter_tflops": 50.316457123262225, "iter_time": 0.4100267524719238, "loss": 0.10268474370241165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.8506144984688, "step_time": 0.3761324043273926} +{"epoch": 0, "iter": 7245, "iter_tflops": 24.72345154092985, "iter_time": 0.8344746475219726, "loss": 0.47997230291366577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.908919153400205, "step_time": 0.796293098449707} +{"epoch": 0, "iter": 7246, "iter_tflops": 14.73879492765358, "iter_time": 1.3997815704345704, "loss": 0.5690959095954895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.97834999430229, "step_time": 1.2151412544250488} +{"epoch": 0, "iter": 7247, "iter_tflops": 35.980729199849584, "iter_time": 0.5733928680419922, "loss": 0.6889349818229675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.52333328827921, "step_time": 0.5219978122711182} +{"epoch": 0, "iter": 7248, "iter_tflops": 40.69617725211616, "iter_time": 0.5069540901184082, "loss": 0.4804881811141968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.303236160853054, "step_time": 0.46567915344238275} +{"epoch": 0, "iter": 7249, "iter_tflops": 18.798261049907158, "iter_time": 1.0975001068115235, "loss": 0.7090470790863037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.582578676878512, "step_time": 1.0023570823669434} +{"epoch": 0, "iter": 7250, "iter_tflops": 20.191576716093664, "iter_time": 1.0217673339843751, "loss": 0.8794081211090088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.91570448131386, "step_time": 0.8280357284545897} +{"epoch": 0, "iter": 7251, "iter_tflops": 46.79267031615264, "iter_time": 0.44090438461303716, "loss": 0.8323001861572266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.72896092444346, "step_time": 0.40669260978698724} +{"epoch": 0, "iter": 7252, "iter_tflops": 45.545451981035875, "iter_time": 0.4529781265258789, "loss": 0.7413581013679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.20565218144607, "step_time": 0.4192830009460449} +{"epoch": 0, "iter": 7253, "iter_tflops": 43.901605595009144, "iter_time": 0.4699393844604492, "loss": 0.198208749294281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.024301366766544, "step_time": 0.4295969524383545} +{"epoch": 0, "iter": 7254, "iter_tflops": 49.373947849081816, "iter_time": 0.4178538360595703, "loss": 0.25080353021621704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85329505741687, "step_time": 0.3830980720520019} +{"epoch": 0, "iter": 7255, "iter_tflops": 47.42610430327346, "iter_time": 0.43501556396484375, "loss": 0.21231816709041595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31701452001917, "step_time": 0.4020322246551513} +{"epoch": 0, "iter": 7256, "iter_tflops": 50.24491217316598, "iter_time": 0.41061059951782225, "loss": 0.378450870513916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.780903901186306, "step_time": 0.37661104583740235} +{"epoch": 0, "iter": 7257, "iter_tflops": 25.14045230636443, "iter_time": 0.8206333465576172, "loss": 0.7474116086959839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.469473825488677, "step_time": 0.7794296798706054} +{"epoch": 0, "iter": 7258, "iter_tflops": 22.144537632986733, "iter_time": 0.9316560974121093, "loss": 0.7718908786773682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.523845182067813, "step_time": 0.7495716304779053} +{"epoch": 0, "iter": 7259, "iter_tflops": 48.29091081530767, "iter_time": 0.4272251892089844, "loss": 0.9162521958351135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.084509371702005, "step_time": 0.3961080513000488} +{"epoch": 0, "iter": 7260, "iter_tflops": 46.165000050816, "iter_time": 0.4468990249633789, "loss": 0.6561198234558105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03933961499163, "step_time": 0.41229747772216796} +{"epoch": 0, "iter": 7261, "iter_tflops": 29.056538713595046, "iter_time": 0.7100327301025391, "loss": 0.40500369668006897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.9202814047192, "step_time": 0.6672349853515626} +{"epoch": 0, "iter": 7262, "iter_tflops": 21.859312815074393, "iter_time": 0.9438125381469726, "loss": 0.5920421481132507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.473913085736765, "step_time": 0.8429830341339111} +{"epoch": 0, "iter": 7263, "iter_tflops": 47.53785440661161, "iter_time": 0.4339929466247559, "loss": 0.643515944480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.92599726782542, "step_time": 0.39731723213195796} +{"epoch": 0, "iter": 7264, "iter_tflops": 47.913378980048265, "iter_time": 0.43059149551391596, "loss": 0.600519597530365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.13875383932623, "step_time": 0.3956959457397461} +{"epoch": 0, "iter": 7265, "iter_tflops": 47.59326354708719, "iter_time": 0.43348768234252927, "loss": 0.046859826892614365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.594104246631254, "step_time": 0.3922700805664062} +{"epoch": 0, "iter": 7266, "iter_tflops": 40.947123906464896, "iter_time": 0.5038471946716309, "loss": 0.02594335936009884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.102047293653115, "step_time": 0.45743141937255855} +{"epoch": 0, "iter": 7267, "iter_tflops": 45.875547072799016, "iter_time": 0.4497187461853027, "loss": 0.04654765874147415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6395119959906, "step_time": 0.40741098594665526} +{"epoch": 0, "iter": 7268, "iter_tflops": 40.32600210757499, "iter_time": 0.5116077079772949, "loss": 0.02121799997985363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5359109937423, "step_time": 0.463246244430542} +{"epoch": 0, "iter": 7269, "iter_tflops": 37.80284802807082, "iter_time": 0.5457550048828125, "loss": 0.035847753286361694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33687890376175, "step_time": 0.4873078517913818} +{"epoch": 0, "iter": 7270, "iter_tflops": 37.86080798737758, "iter_time": 0.5449195251464843, "loss": 0.033901117742061615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62969039656354, "step_time": 0.4955860424041748} +{"epoch": 0, "iter": 7271, "iter_tflops": 43.0109897375915, "iter_time": 0.479670280456543, "loss": 0.03761264681816101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56270377965513, "step_time": 0.43376620483398437} +{"epoch": 0, "iter": 7272, "iter_tflops": 46.38187525602178, "iter_time": 0.44480938720703117, "loss": 0.024959756061434746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86075341461478, "step_time": 0.40563877105712887} +{"epoch": 0, "iter": 7273, "iter_tflops": 23.994931905479636, "iter_time": 0.8598104629516603, "loss": 0.16317644715309143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.53890938125938, "step_time": 0.8078298568725586} +{"epoch": 0, "iter": 7274, "iter_tflops": 16.073601747135996, "iter_time": 1.2835389251708984, "loss": 0.12083636969327927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.668930722283708, "step_time": 1.0489179000854494} +{"epoch": 0, "iter": 7275, "iter_tflops": 47.80677354227148, "iter_time": 0.4315516815185547, "loss": 0.13664300739765167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93400993119341, "step_time": 0.397255931854248} +{"epoch": 0, "iter": 7276, "iter_tflops": 44.239139326380936, "iter_time": 0.4663538627624511, "loss": 0.1198078915476799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.44057421088553, "step_time": 0.42590522193908686} +{"epoch": 0, "iter": 7277, "iter_tflops": 36.86800677771978, "iter_time": 0.5595934066772461, "loss": 0.6398566365242004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.95554834581208, "step_time": 0.5163511543273925} +{"epoch": 0, "iter": 7278, "iter_tflops": 12.146474401902747, "iter_time": 1.6985252532958983, "loss": 0.702186107635498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.597637986853782, "step_time": 1.4133172454833984} +{"epoch": 0, "iter": 7279, "iter_tflops": 9.94967111666427, "iter_time": 2.073545272827148, "loss": 0.6369944214820862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.570128571767887, "step_time": 1.6412794342041015} +{"epoch": 0, "iter": 7280, "iter_tflops": 29.650585489168567, "iter_time": 0.6958072891235352, "loss": 0.6455835103988647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.040362318274845, "step_time": 0.5423474502563477} +{"epoch": 0, "iter": 7281, "iter_tflops": 15.361967941112647, "iter_time": 0.8987518844604493, "loss": 0.208521768450737, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 16.51080443251145, "step_time": 0.8362159271240236} +{"epoch": 0, "iter": 7282, "iter_tflops": 24.509430908290877, "iter_time": 0.563317756652832, "loss": 0.5107141733169556, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 26.1671556707047, "step_time": 0.5276308135986328} +{"epoch": 0, "iter": 7283, "iter_tflops": 23.84155517493704, "iter_time": 0.579098030090332, "loss": 0.21310681104660034, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 25.500802110883907, "step_time": 0.5414181709289551} +{"epoch": 0, "iter": 7284, "iter_tflops": 24.538508477775476, "iter_time": 0.5626502380371095, "loss": 0.30781880021095276, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 26.226957537835002, "step_time": 0.5264277267456055} +{"epoch": 0, "iter": 7285, "iter_tflops": 29.111195512660544, "iter_time": 0.7086996307373047, "loss": 0.06299305707216263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.93636582871631, "step_time": 0.6668880767822265} +{"epoch": 0, "iter": 7286, "iter_tflops": 40.58589335988305, "iter_time": 0.5083316345214843, "loss": 0.10254691541194916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.47037879366118, "step_time": 0.42564333152770994} +{"epoch": 0, "iter": 7287, "iter_tflops": 52.94044577862888, "iter_time": 0.3897038116455078, "loss": 0.07617738842964172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.843428110380344, "step_time": 0.3566713485717773} +{"epoch": 0, "iter": 7288, "iter_tflops": 54.76371866967972, "iter_time": 0.3767292289733886, "loss": 0.08443216234445572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.51658834139326, "step_time": 0.34664442443847654} +{"epoch": 0, "iter": 7289, "iter_tflops": 28.8976410463995, "iter_time": 0.7139369430541992, "loss": 0.16040898859500885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.738114409205092, "step_time": 0.6711893005371093} +{"epoch": 0, "iter": 7290, "iter_tflops": 16.64223500520209, "iter_time": 1.239682861328125, "loss": 0.0885261818766594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.275666599437443, "step_time": 1.1288832283020018} +{"epoch": 0, "iter": 7291, "iter_tflops": 48.18848891533945, "iter_time": 0.4281332321166992, "loss": 0.1864604949951172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74200548867134, "step_time": 0.3911700611114502} +{"epoch": 0, "iter": 7292, "iter_tflops": 49.38694181715155, "iter_time": 0.417743896484375, "loss": 0.10101091861724854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.76476993250457, "step_time": 0.38372885322570804} +{"epoch": 0, "iter": 7293, "iter_tflops": 19.38994876389737, "iter_time": 0.8258064804077149, "loss": 0.24972102046012878, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 20.399784597144144, "step_time": 0.7849271774291993} +{"epoch": 0, "iter": 7294, "iter_tflops": 7.634596745863489, "iter_time": 2.097340026855469, "loss": 0.21185627579689026, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 8.546132171197966, "step_time": 1.8736365203857424} +{"epoch": 0, "iter": 7295, "iter_tflops": 9.912521409677941, "iter_time": 1.6153655242919924, "loss": 0.4298149645328522, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 11.467711403060303, "step_time": 1.3962982482910158} +{"epoch": 0, "iter": 7296, "iter_tflops": 22.93437969409955, "iter_time": 0.6981808776855468, "loss": 0.3119968771934509, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 24.743977786844383, "step_time": 0.647120906829834} +{"epoch": 0, "iter": 7297, "iter_tflops": 19.069203511074026, "iter_time": 0.828956039428711, "loss": 0.20109787583351135, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 20.910288892920274, "step_time": 0.7559690589904784} +{"epoch": 0, "iter": 7298, "iter_tflops": 22.953730080229263, "iter_time": 0.6886693954467773, "loss": 0.403506875038147, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.609486980671125, "step_time": 0.6423348617553711} +{"epoch": 0, "iter": 7299, "iter_tflops": 24.08579445986925, "iter_time": 0.6563010177612304, "loss": 0.2908177971839905, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.682544707629013, "step_time": 0.6154970855712891} +{"epoch": 0, "iter": 7300, "iter_tflops": 23.58036155228177, "iter_time": 0.6703684921264648, "loss": 0.28214389085769653, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 25.28609374479524, "step_time": 0.6251472282409668} +{"epoch": 0, "iter": 7301, "iter_tflops": 17.904715850305415, "iter_time": 1.152271484375, "loss": 0.7902941703796387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.210025321089766, "step_time": 1.0739753417968751} +{"epoch": 0, "iter": 7302, "iter_tflops": 20.450207653936445, "iter_time": 1.008845184326172, "loss": 0.7481001019477844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.705526321209994, "step_time": 0.8350801048278808} +{"epoch": 0, "iter": 7303, "iter_tflops": 37.50321218299122, "iter_time": 0.5501153717041016, "loss": 0.7237042784690857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84917148533053, "step_time": 0.5050553722381592} +{"epoch": 0, "iter": 7304, "iter_tflops": 40.95977710345434, "iter_time": 0.5036915473937988, "loss": 0.965505838394165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.310857664919205, "step_time": 0.46559905624389647} +{"epoch": 0, "iter": 7305, "iter_tflops": 18.343966003798656, "iter_time": 1.1246800994873047, "loss": 0.11680109798908234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.634501307970922, "step_time": 1.050757194519043} +{"epoch": 0, "iter": 7306, "iter_tflops": 24.012088529626883, "iter_time": 0.859196128845215, "loss": 0.16082198917865753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.79890849567271, "step_time": 0.6923439331054687} +{"epoch": 0, "iter": 7307, "iter_tflops": 51.71255641743268, "iter_time": 0.39895713806152344, "loss": 0.1227700412273407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.3277958810799, "step_time": 0.36626843261718756} +{"epoch": 0, "iter": 7308, "iter_tflops": 53.65069551468801, "iter_time": 0.38454475402832033, "loss": 0.1513623148202896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.017725918882704, "step_time": 0.35559983062744144} +{"epoch": 0, "iter": 7309, "iter_tflops": 17.91714973694275, "iter_time": 1.1514718475341796, "loss": 1.001020908355713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.590795669071767, "step_time": 1.1097477416992187} +{"epoch": 0, "iter": 7310, "iter_tflops": 18.745319411491373, "iter_time": 1.1005997314453124, "loss": 0.9565277099609375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.790495771323037, "step_time": 0.8322178668975829} +{"epoch": 0, "iter": 7311, "iter_tflops": 41.047283484078946, "iter_time": 0.5026177558898925, "loss": 0.7191203236579895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99856639738182, "step_time": 0.4689037666320801} +{"epoch": 0, "iter": 7312, "iter_tflops": 42.203262591481526, "iter_time": 0.4888506774902344, "loss": 0.9245975017547607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38017208365736, "step_time": 0.4546279258728027} +{"epoch": 0, "iter": 7313, "iter_tflops": 33.605892384807284, "iter_time": 0.6139129791259765, "loss": 0.7463304996490479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.84980395042047, "step_time": 0.5754869270324707} +{"epoch": 0, "iter": 7314, "iter_tflops": 12.550715390853151, "iter_time": 1.643818130493164, "loss": 0.6505951285362244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.51799942307071, "step_time": 1.3294944114685059} +{"epoch": 0, "iter": 7315, "iter_tflops": 45.63816496031203, "iter_time": 0.45205791091918945, "loss": 0.8200969696044922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9114232858194, "step_time": 0.4133541412353516} +{"epoch": 0, "iter": 7316, "iter_tflops": 44.63861763372994, "iter_time": 0.4621803855895997, "loss": 0.5868625640869141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.267693134382846, "step_time": 0.4274306926727295} +{"epoch": 0, "iter": 7317, "iter_tflops": 36.45273293599754, "iter_time": 0.5659683609008789, "loss": 0.43840959668159485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.294438107311585, "step_time": 0.5250385169982911} +{"epoch": 0, "iter": 7318, "iter_tflops": 45.503470707523576, "iter_time": 0.4533960418701172, "loss": 0.3500220477581024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.14936379071768, "step_time": 0.4197631855010987} +{"epoch": 0, "iter": 7319, "iter_tflops": 47.49345916209832, "iter_time": 0.43439862823486325, "loss": 0.30882248282432556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62964747610101, "step_time": 0.39959779930114747} +{"epoch": 0, "iter": 7320, "iter_tflops": 46.341002820116316, "iter_time": 0.4452017059326172, "loss": 0.32824480533599854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49581145226527, "step_time": 0.40857039260864253} +{"epoch": 0, "iter": 7321, "iter_tflops": 28.5362882729776, "iter_time": 0.7229774703979492, "loss": 0.08605829626321793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.39478859508779, "step_time": 0.6787707519531251} +{"epoch": 0, "iter": 7322, "iter_tflops": 17.439524089396485, "iter_time": 1.183007827758789, "loss": 0.10681939870119095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.85583015668738, "step_time": 1.039044620513916} +{"epoch": 0, "iter": 7323, "iter_tflops": 44.3155032879157, "iter_time": 0.4655502471923828, "loss": 0.10006991773843765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.31983445569791, "step_time": 0.42696945762634286} +{"epoch": 0, "iter": 7324, "iter_tflops": 49.001330995957126, "iter_time": 0.42103128814697266, "loss": 0.0929645374417305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.50396049394921, "step_time": 0.3855993709564209} +{"epoch": 0, "iter": 7325, "iter_tflops": 24.504475352559613, "iter_time": 0.841931655883789, "loss": 0.9433082938194275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.726891287064387, "step_time": 0.8019271850585938} +{"epoch": 0, "iter": 7326, "iter_tflops": 9.335118189820212, "iter_time": 2.2100516662597656, "loss": 0.6614511609077454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.736980019937036, "step_time": 1.9214987335205076} +{"epoch": 0, "iter": 7327, "iter_tflops": 12.820671811766847, "iter_time": 1.609205337524414, "loss": 0.730922281742096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.906224711688115, "step_time": 1.2970452690124512} +{"epoch": 0, "iter": 7328, "iter_tflops": 25.7659858442091, "iter_time": 0.8007104263305664, "loss": 0.6255035400390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.55911972886179, "step_time": 0.5801913452148437} +{"epoch": 0, "iter": 7329, "iter_tflops": 16.104805199520662, "iter_time": 0.978998664855957, "loss": 0.24798305332660675, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 17.121076769353284, "step_time": 0.9208873367309569} +{"epoch": 0, "iter": 7330, "iter_tflops": 10.349629026343422, "iter_time": 1.5233959350585937, "loss": 0.22191530466079712, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 13.140156511351684, "step_time": 1.1998778533935548} +{"epoch": 0, "iter": 7331, "iter_tflops": 27.98212859306551, "iter_time": 0.5634518737792968, "loss": 0.30528566241264343, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 29.896632853303352, "step_time": 0.5273698501586914} +{"epoch": 0, "iter": 7332, "iter_tflops": 26.35860830322811, "iter_time": 0.5981568756103517, "loss": 0.2102421671152115, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 27.95066077094769, "step_time": 0.5640862274169922} +{"epoch": 0, "iter": 7333, "iter_tflops": 27.208226936475633, "iter_time": 0.7582667388916016, "loss": 0.7408435344696045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.787859600496454, "step_time": 0.7166595153808594} +{"epoch": 0, "iter": 7334, "iter_tflops": 9.560122196369125, "iter_time": 2.1580365905761716, "loss": 0.5462774038314819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.385358840447674, "step_time": 1.8120723114013675} +{"epoch": 0, "iter": 7335, "iter_tflops": 34.52503714430345, "iter_time": 0.5975690460205079, "loss": 0.6692836880683899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.44969041956371, "step_time": 0.5509015769958496} +{"epoch": 0, "iter": 7336, "iter_tflops": 37.24496907101975, "iter_time": 0.5539296722412109, "loss": 0.685719907283783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.039357215935404, "step_time": 0.5152703475952148} +{"epoch": 0, "iter": 7337, "iter_tflops": 10.158620213864115, "iter_time": 1.4313280487060547, "loss": 0.18902118504047394, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 10.863889848836761, "step_time": 1.338408088684082} +{"epoch": 0, "iter": 7338, "iter_tflops": 11.35766622051742, "iter_time": 1.2802205810546876, "loss": 0.29883819818496704, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 16.315460000158556, "step_time": 0.8911987800598146} +{"epoch": 0, "iter": 7339, "iter_tflops": 25.773809812244593, "iter_time": 0.5641509017944336, "loss": 0.3069848120212555, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.526133254284314, "step_time": 0.5282368545532227} +{"epoch": 0, "iter": 7340, "iter_tflops": 25.027602008484358, "iter_time": 0.5809712829589843, "loss": 0.3195129632949829, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 26.69080904424789, "step_time": 0.544768726348877} +{"epoch": 0, "iter": 7341, "iter_tflops": 36.504951278336506, "iter_time": 0.5299764862060546, "loss": 0.16790977120399475, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 39.752767943514016, "step_time": 0.4866772003173829} +{"epoch": 0, "iter": 7342, "iter_tflops": 9.722273391514262, "iter_time": 1.9899425811767582, "loss": 0.16246122121810913, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 10.885529818487447, "step_time": 1.7772920684814453} +{"epoch": 0, "iter": 7343, "iter_tflops": 13.08572190029454, "iter_time": 1.4784637756347656, "loss": 0.12765906751155853, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 16.35710349234014, "step_time": 1.1827745552062987} +{"epoch": 0, "iter": 7344, "iter_tflops": 30.78271227884071, "iter_time": 0.628494514465332, "loss": 0.1776721030473709, "lr": 3e-05, "seqlen": 7696.0, "step_tflops": 40.3055633407405, "step_time": 0.4800023670196534} +{"epoch": 0, "iter": 7345, "iter_tflops": 18.100002672736025, "iter_time": 0.8620349807739258, "loss": 0.3350536525249481, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 19.052786253079013, "step_time": 0.8189267044067383} +{"epoch": 0, "iter": 7346, "iter_tflops": 6.728786206458358, "iter_time": 2.3188187255859374, "loss": 0.3548777401447296, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 7.728842544992883, "step_time": 2.018780349731445} +{"epoch": 0, "iter": 7347, "iter_tflops": 11.909498258981015, "iter_time": 1.310116943359375, "loss": 0.1430954486131668, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.624772752587257, "step_time": 1.1451813354492189} +{"epoch": 0, "iter": 7348, "iter_tflops": 19.409236151756833, "iter_time": 0.8038871459960937, "loss": 0.19026997685432434, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 22.7703256636728, "step_time": 0.6852267150878906} +{"epoch": 0, "iter": 7349, "iter_tflops": 14.837300773235096, "iter_time": 1.0598719482421877, "loss": 0.19015128910541534, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 15.715183949165938, "step_time": 1.0006652755737304} +{"epoch": 0, "iter": 7350, "iter_tflops": 6.792486908686934, "iter_time": 2.315151885986328, "loss": 0.306132972240448, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 8.499180187173707, "step_time": 1.8502536163330077} +{"epoch": 0, "iter": 7351, "iter_tflops": 7.78361395732682, "iter_time": 2.0203518524169923, "loss": 0.2328743189573288, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 9.565906934191347, "step_time": 1.6439255561828612} +{"epoch": 0, "iter": 7352, "iter_tflops": 23.725820611567613, "iter_time": 0.6628069534301757, "loss": 0.21018745005130768, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.43973967461491, "step_time": 0.6181525077819824} +{"epoch": 0, "iter": 7353, "iter_tflops": 12.645487143047045, "iter_time": 1.2079856719970703, "loss": 0.2556033432483673, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.33010465579839, "step_time": 1.1459450378417968} +{"epoch": 0, "iter": 7354, "iter_tflops": 6.572657496519228, "iter_time": 2.324108215332031, "loss": 0.138968363404274, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 7.4693986850525365, "step_time": 2.0450866165161132} +{"epoch": 0, "iter": 7355, "iter_tflops": 8.943553108704382, "iter_time": 1.707997604370117, "loss": 0.19957922399044037, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.484752354215383, "step_time": 1.2235378684997558} +{"epoch": 0, "iter": 7356, "iter_tflops": 22.26697911342474, "iter_time": 0.6860188446044921, "loss": 0.18048283457756042, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.980533230147543, "step_time": 0.636998649597168} +{"epoch": 0, "iter": 7357, "iter_tflops": 15.709228663528894, "iter_time": 0.990622184753418, "loss": 0.21001635491847992, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 16.733326924872234, "step_time": 0.9299950027465821} +{"epoch": 0, "iter": 7358, "iter_tflops": 5.5382611991418225, "iter_time": 2.8098910217285153, "loss": 0.3224780261516571, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 7.084067968279475, "step_time": 2.1967477569580076} +{"epoch": 0, "iter": 7359, "iter_tflops": 10.696363424929215, "iter_time": 1.4548786163330079, "loss": 0.2778628468513489, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 13.391532931815334, "step_time": 1.1620708770751955} +{"epoch": 0, "iter": 7360, "iter_tflops": 21.92159885717225, "iter_time": 0.7098893890380858, "loss": 0.1765245646238327, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 23.56000514252217, "step_time": 0.6605223693847657} +{"epoch": 0, "iter": 7361, "iter_tflops": 10.535052489398174, "iter_time": 1.3298704986572265, "loss": 0.2742169499397278, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 11.098693883349496, "step_time": 1.2623337173461913} +{"epoch": 0, "iter": 7362, "iter_tflops": 12.612225109992558, "iter_time": 1.1108472442626955, "loss": 0.20648562908172607, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 14.994311235084822, "step_time": 0.9343713951110839} +{"epoch": 0, "iter": 7363, "iter_tflops": 21.119774536896852, "iter_time": 0.6633714523315429, "loss": 0.28249385952949524, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 22.769959405865084, "step_time": 0.6152955856323242} +{"epoch": 0, "iter": 7364, "iter_tflops": 22.084828512246865, "iter_time": 0.6343837127685548, "loss": 0.19523532688617706, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 23.554298211708083, "step_time": 0.5948067474365234} +{"epoch": 0, "iter": 7365, "iter_tflops": 29.973883334543324, "iter_time": 0.6883023223876954, "loss": 0.6016353964805603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77511706613395, "step_time": 0.6294742889404297} +{"epoch": 0, "iter": 7366, "iter_tflops": 8.963364135802468, "iter_time": 2.3017131958007813, "loss": 0.5527855157852173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.479342081033103, "step_time": 1.968739387512207} +{"epoch": 0, "iter": 7367, "iter_tflops": 10.2341101075276, "iter_time": 2.015914749145508, "loss": 0.5459672212600708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.745603060627912, "step_time": 1.6186831970214846} +{"epoch": 0, "iter": 7368, "iter_tflops": 36.474931808918065, "iter_time": 0.5656239089965821, "loss": 0.57291579246521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.830011664715634, "step_time": 0.5179785957336426} +{"epoch": 0, "iter": 7369, "iter_tflops": 18.475763066164248, "iter_time": 0.8821980972290039, "loss": 0.16725367307662964, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 19.65565134564859, "step_time": 0.8292415618896484} +{"epoch": 0, "iter": 7370, "iter_tflops": 5.550635126222854, "iter_time": 2.9364717102050784, "loss": 0.2814147472381592, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 6.156275578652678, "step_time": 2.6475882720947266} +{"epoch": 0, "iter": 7371, "iter_tflops": 10.3113008931384, "iter_time": 1.5807203369140626, "loss": 0.1573721319437027, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 13.0818627057448, "step_time": 1.2459451217651367} +{"epoch": 0, "iter": 7372, "iter_tflops": 26.04046508241904, "iter_time": 0.6259213485717774, "loss": 0.20587922632694244, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.95081115641436, "step_time": 0.5831416816711427} +{"epoch": 0, "iter": 7373, "iter_tflops": 12.789859730468883, "iter_time": 1.092233596801758, "loss": 0.3470333516597748, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 13.873084639804965, "step_time": 1.0069508590698242} +{"epoch": 0, "iter": 7374, "iter_tflops": 15.805739587921572, "iter_time": 0.883825424194336, "loss": 0.29722732305526733, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 17.623422899633674, "step_time": 0.7926674957275391} +{"epoch": 0, "iter": 7375, "iter_tflops": 22.625637766197297, "iter_time": 0.6174197006225587, "loss": 0.258938729763031, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 24.272369217256248, "step_time": 0.5755315589904786} +{"epoch": 0, "iter": 7376, "iter_tflops": 21.37385912995071, "iter_time": 0.6535794219970704, "loss": 0.23110385239124298, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 22.861115510244066, "step_time": 0.6110600547790528} +{"epoch": 0, "iter": 7377, "iter_tflops": 21.63539566209527, "iter_time": 0.9535805969238281, "loss": 0.49261870980262756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.273905877108582, "step_time": 0.8864474067687989} +{"epoch": 0, "iter": 7378, "iter_tflops": 44.01322800750862, "iter_time": 0.46874756622314456, "loss": 0.35512158274650574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.14442466576497, "step_time": 0.4285250816345216} +{"epoch": 0, "iter": 7379, "iter_tflops": 50.41381061525767, "iter_time": 0.4092349548339843, "loss": 0.44604435563087463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.90394603749345, "step_time": 0.3757670440673828} +{"epoch": 0, "iter": 7380, "iter_tflops": 47.955768800497104, "iter_time": 0.430210880279541, "loss": 0.325201153755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59046145220278, "step_time": 0.39990131759643555} +{"epoch": 0, "iter": 7381, "iter_tflops": 29.713542096680253, "iter_time": 0.6943330230712891, "loss": 0.8363881707191467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.6165042141964, "step_time": 0.652541893005371} +{"epoch": 0, "iter": 7382, "iter_tflops": 18.321341431046374, "iter_time": 1.1260689392089844, "loss": 0.7458382844924927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.36893982788248, "step_time": 0.9223098487854003} +{"epoch": 0, "iter": 7383, "iter_tflops": 36.6706248261888, "iter_time": 0.5626054534912109, "loss": 0.7452628016471863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.60700075288572, "step_time": 0.5208951225280761} +{"epoch": 0, "iter": 7384, "iter_tflops": 38.91153846419581, "iter_time": 0.5302050323486328, "loss": 0.7360574007034302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.031380001679224, "step_time": 0.49084977722167966} +{"epoch": 0, "iter": 7385, "iter_tflops": 34.44689361279074, "iter_time": 0.5989246444702149, "loss": 0.024527736008167267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.59822538888368, "step_time": 0.534508861541748} +{"epoch": 0, "iter": 7386, "iter_tflops": 38.88573551534691, "iter_time": 0.5305568542480469, "loss": 0.04049982130527496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75395906549592, "step_time": 0.4825539894104004} +{"epoch": 0, "iter": 7387, "iter_tflops": 40.91461475831092, "iter_time": 0.5042475318908691, "loss": 0.04831204190850258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99093595336021, "step_time": 0.4585611095428467} +{"epoch": 0, "iter": 7388, "iter_tflops": 50.36928425408306, "iter_time": 0.4095967178344727, "loss": 0.09183432906866074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.5144315797524, "step_time": 0.3716347789764405} +{"epoch": 0, "iter": 7389, "iter_tflops": 35.46639724665747, "iter_time": 0.5817081832885742, "loss": 0.3275131583213806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.34973258556973, "step_time": 0.5243007297515869} +{"epoch": 0, "iter": 7390, "iter_tflops": 36.205733871949434, "iter_time": 0.5698294525146484, "loss": 0.5335688591003418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.08856887262228, "step_time": 0.5146378154754638} +{"epoch": 0, "iter": 7391, "iter_tflops": 40.908334545634474, "iter_time": 0.5043249435424805, "loss": 0.35763293504714966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68588683075206, "step_time": 0.46169148635864254} +{"epoch": 0, "iter": 7392, "iter_tflops": 40.30006598989374, "iter_time": 0.5119369659423828, "loss": 0.34913331270217896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94623290079082, "step_time": 0.4694621620178222} +{"epoch": 0, "iter": 7393, "iter_tflops": 21.66875981677122, "iter_time": 0.9521123352050782, "loss": 0.050987131893634796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.969870467739657, "step_time": 0.8981806640624999} +{"epoch": 0, "iter": 7394, "iter_tflops": 19.17460229437595, "iter_time": 1.0759593963623046, "loss": 0.09071671217679977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.865232367108835, "step_time": 0.6684250183105469} +{"epoch": 0, "iter": 7395, "iter_tflops": 49.098110980449455, "iter_time": 0.4202013702392578, "loss": 0.0994841605424881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26228085161952, "step_time": 0.38734904289245603} +{"epoch": 0, "iter": 7396, "iter_tflops": 53.064444684751834, "iter_time": 0.3887931671142578, "loss": 0.08375964313745499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.995206187433595, "step_time": 0.3557379112243652} +{"epoch": 0, "iter": 7397, "iter_tflops": 33.80671908319552, "iter_time": 0.6102660675048829, "loss": 0.6095622181892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.23143516467505, "step_time": 0.569425235748291} +{"epoch": 0, "iter": 7398, "iter_tflops": 16.95035568874396, "iter_time": 1.2171481170654297, "loss": 0.4719699025154114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.8035330739578, "step_time": 0.9917110443115235} +{"epoch": 0, "iter": 7399, "iter_tflops": 47.62615116781523, "iter_time": 0.4331883430480957, "loss": 0.5660488605499268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62679487742371, "step_time": 0.39961987876892086} +{"epoch": 0, "iter": 7400, "iter_tflops": 52.011138434920035, "iter_time": 0.39666683197021485, "loss": 0.3999927043914795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.124399374820335, "step_time": 0.3675958003997803} +{"epoch": 0, "iter": 7401, "iter_tflops": 30.713763450201647, "iter_time": 0.6717214431762695, "loss": 0.7434465289115906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.65467644869131, "step_time": 0.631795986175537} +{"epoch": 0, "iter": 7402, "iter_tflops": 20.049162410584024, "iter_time": 1.0290252075195312, "loss": 1.0540602207183838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.046354618935904, "step_time": 0.857971773147583} +{"epoch": 0, "iter": 7403, "iter_tflops": 46.74937169629953, "iter_time": 0.441312744140625, "loss": 0.8290728330612183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49559269391437, "step_time": 0.40857216262817386} +{"epoch": 0, "iter": 7404, "iter_tflops": 43.12829007555621, "iter_time": 0.47836567306518557, "loss": 1.0341740846633911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.99157477606416, "step_time": 0.4485841941833496} +{"epoch": 0, "iter": 7405, "iter_tflops": 21.888587094009523, "iter_time": 0.9425502624511719, "loss": 0.8073899745941162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.13081549817393, "step_time": 0.8919310913085938} +{"epoch": 0, "iter": 7406, "iter_tflops": 13.448909291622442, "iter_time": 1.534034698486328, "loss": 0.6054943203926086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.169121642798203, "step_time": 1.360071729660034} +{"epoch": 0, "iter": 7407, "iter_tflops": 39.000171705509906, "iter_time": 0.5290000686645507, "loss": 0.768412172794342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.874073690834415, "step_time": 0.4812020816802978} +{"epoch": 0, "iter": 7408, "iter_tflops": 41.61293678534238, "iter_time": 0.4957855682373047, "loss": 0.7969393730163574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.54583630657871, "step_time": 0.4529743041992188} +{"epoch": 0, "iter": 7409, "iter_tflops": 12.349287208905805, "iter_time": 1.1576032257080076, "loss": 0.06857885420322418, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.096781911172434, "step_time": 1.0915333862304688} +{"epoch": 0, "iter": 7410, "iter_tflops": 10.890198654800134, "iter_time": 1.31270100402832, "loss": 0.05347217246890068, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 15.400054188530083, "step_time": 0.9282808055877687} +{"epoch": 0, "iter": 7411, "iter_tflops": 30.114941274305426, "iter_time": 0.4747004013061524, "loss": 0.08829712867736816, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 33.05287061061346, "step_time": 0.43250629806518553} +{"epoch": 0, "iter": 7412, "iter_tflops": 27.57524874177125, "iter_time": 0.5184205169677735, "loss": 0.060526274144649506, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 30.150983539290383, "step_time": 0.4741329479217529} +{"epoch": 0, "iter": 7413, "iter_tflops": 21.55764064864693, "iter_time": 0.9570200119018554, "loss": 0.7912346124649048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.223522307276777, "step_time": 0.8883705596923829} +{"epoch": 0, "iter": 7414, "iter_tflops": 28.889399073388397, "iter_time": 0.714140625, "loss": 0.7563748955726624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14827866412179, "step_time": 0.6417479991912842} +{"epoch": 0, "iter": 7415, "iter_tflops": 46.06058948289105, "iter_time": 0.44791205978393556, "loss": 0.7946950793266296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.811106994975894, "step_time": 0.41418660926818845} +{"epoch": 0, "iter": 7416, "iter_tflops": 44.64442084984081, "iter_time": 0.4621203079223632, "loss": 0.6618139743804932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08692383187678, "step_time": 0.42903749847412104} +{"epoch": 0, "iter": 7417, "iter_tflops": 28.87313084207853, "iter_time": 0.7145429992675781, "loss": 0.8893728256225586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.608983745631942, "step_time": 0.6740208587646485} +{"epoch": 0, "iter": 7418, "iter_tflops": 11.20422678418082, "iter_time": 1.8413670043945312, "loss": 0.6591162085533142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.906609550387367, "step_time": 1.5984905586242675} +{"epoch": 0, "iter": 7419, "iter_tflops": 34.7899816877209, "iter_time": 0.5930182342529297, "loss": 0.6886650919914246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.78817902054519, "step_time": 0.5459668617248535} +{"epoch": 0, "iter": 7420, "iter_tflops": 35.06729443678214, "iter_time": 0.5883286361694338, "loss": 0.9138504266738892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92505210477993, "step_time": 0.5439964447021484} +{"epoch": 0, "iter": 7421, "iter_tflops": 14.227172410728947, "iter_time": 1.4501190338134764, "loss": 0.16039879620075226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.344508537206206, "step_time": 1.344526184082031} +{"epoch": 0, "iter": 7422, "iter_tflops": 26.198780246701446, "iter_time": 0.7874829788208008, "loss": 0.16539384424686432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.66108442290188, "step_time": 0.695560998916626} +{"epoch": 0, "iter": 7423, "iter_tflops": 48.580421786645246, "iter_time": 0.42467917633056645, "loss": 0.10805275291204453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72327752883568, "step_time": 0.39130900955200193} +{"epoch": 0, "iter": 7424, "iter_tflops": 52.1962244088904, "iter_time": 0.3952602653503418, "loss": 0.12317164242267609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.240623711880794, "step_time": 0.36683614349365234} +{"epoch": 0, "iter": 7425, "iter_tflops": 46.91840124903882, "iter_time": 0.4397228584289551, "loss": 0.13119947910308838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64305886425809, "step_time": 0.39949402618408203} +{"epoch": 0, "iter": 7426, "iter_tflops": 35.78242403732761, "iter_time": 0.5765705947875976, "loss": 0.12209556251764297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.21607331562269, "step_time": 0.5260876922607421} +{"epoch": 0, "iter": 7427, "iter_tflops": 43.54676874459168, "iter_time": 0.47376864242553707, "loss": 0.12991224229335785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.85038438996525, "step_time": 0.43115836524963375} +{"epoch": 0, "iter": 7428, "iter_tflops": 45.22718034104554, "iter_time": 0.4561658134460449, "loss": 0.0917060449719429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.24322150903635, "step_time": 0.4189631156921387} +{"epoch": 0, "iter": 7429, "iter_tflops": 24.351387776205392, "iter_time": 0.8472245483398438, "loss": 0.9264904856681824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.381308762670113, "step_time": 0.7820344963073731} +{"epoch": 0, "iter": 7430, "iter_tflops": 18.666714089314233, "iter_time": 1.105234344482422, "loss": 0.8972000479698181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.803825442419868, "step_time": 0.9047207260131835} +{"epoch": 0, "iter": 7431, "iter_tflops": 37.97467081602099, "iter_time": 0.54328564453125, "loss": 0.8075506091117859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2408726390786, "step_time": 0.5002584133148194} +{"epoch": 0, "iter": 7432, "iter_tflops": 39.258628178767374, "iter_time": 0.5255174331665039, "loss": 0.7765731811523438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65925045929706, "step_time": 0.48362531661987307} +{"epoch": 0, "iter": 7433, "iter_tflops": 19.796311195574283, "iter_time": 1.0421685791015627, "loss": 0.7295387387275696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.840757040169887, "step_time": 0.9899397354125976} +{"epoch": 0, "iter": 7434, "iter_tflops": 14.305893132751065, "iter_time": 1.4421394958496094, "loss": 0.924065351486206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.990618110477968, "step_time": 1.214263858795166} +{"epoch": 0, "iter": 7435, "iter_tflops": 37.46166155634892, "iter_time": 0.5507255325317383, "loss": 0.830141007900238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.95134164864471, "step_time": 0.5037953014373779} +{"epoch": 0, "iter": 7436, "iter_tflops": 37.19256934094103, "iter_time": 0.5547100906372071, "loss": 0.946105420589447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39083318951807, "step_time": 0.5107865295410157} +{"epoch": 0, "iter": 7437, "iter_tflops": 19.60034010800107, "iter_time": 1.0525885467529297, "loss": 0.24675080180168152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.602774754804763, "step_time": 1.00137451171875} +{"epoch": 0, "iter": 7438, "iter_tflops": 16.405199801979478, "iter_time": 1.2575947723388672, "loss": 0.3277985751628876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.366320915669107, "step_time": 1.06530784034729} +{"epoch": 0, "iter": 7439, "iter_tflops": 44.03867832408684, "iter_time": 0.46847667312622066, "loss": 0.3452812135219574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.5648251998571, "step_time": 0.43374685859680173} +{"epoch": 0, "iter": 7440, "iter_tflops": 46.93734627845607, "iter_time": 0.4395453758239746, "loss": 0.23951716721057892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7964573214806, "step_time": 0.40615221214294434} +{"epoch": 0, "iter": 7441, "iter_tflops": 27.65060303061861, "iter_time": 0.7461353912353514, "loss": 0.21829284727573395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.346842715847014, "step_time": 0.7030089645385742} +{"epoch": 0, "iter": 7442, "iter_tflops": 25.67278582901329, "iter_time": 0.8036172485351563, "loss": 0.244879812002182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.2917130073368, "step_time": 0.6593149280548095} +{"epoch": 0, "iter": 7443, "iter_tflops": 43.946606065972325, "iter_time": 0.4694581756591797, "loss": 0.282503217458725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92466522678564, "step_time": 0.4304900913238525} +{"epoch": 0, "iter": 7444, "iter_tflops": 42.53866100938186, "iter_time": 0.4849963073730469, "loss": 0.2178419977426529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.076000476657484, "step_time": 0.4477622470855713} +{"epoch": 0, "iter": 7445, "iter_tflops": 18.53851531414033, "iter_time": 1.1128773345947265, "loss": 0.6134642958641052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.85330391615593, "step_time": 1.0391768341064451} +{"epoch": 0, "iter": 7446, "iter_tflops": 29.43835084454908, "iter_time": 0.7008236846923829, "loss": 0.6113346815109253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.57108607977005, "step_time": 0.5491215629577637} +{"epoch": 0, "iter": 7447, "iter_tflops": 40.17458947903501, "iter_time": 0.513535888671875, "loss": 0.702624499797821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76438356312845, "step_time": 0.4714128665924072} +{"epoch": 0, "iter": 7448, "iter_tflops": 44.07849917673638, "iter_time": 0.46805344772338864, "loss": 0.6926112174987793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.00572464073161, "step_time": 0.4297631931304931} +{"epoch": 0, "iter": 7449, "iter_tflops": 16.55058144498096, "iter_time": 1.2465479583740233, "loss": 0.7548583745956421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.622407112142504, "step_time": 1.1707307281494141} +{"epoch": 0, "iter": 7450, "iter_tflops": 18.005978889745354, "iter_time": 1.145791275024414, "loss": 0.6956474184989929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12081846930302, "step_time": 0.9768131637573242} +{"epoch": 0, "iter": 7451, "iter_tflops": 42.84574309047723, "iter_time": 0.48152026367187495, "loss": 0.8816318511962891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19954563024152, "step_time": 0.44656485748291014} +{"epoch": 0, "iter": 7452, "iter_tflops": 42.59999898486876, "iter_time": 0.484297981262207, "loss": 0.7965946793556213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9466934528597, "step_time": 0.4490223770141601} +{"epoch": 0, "iter": 7453, "iter_tflops": 4.336420491429402, "iter_time": 0.47018297195434566, "loss": 1.7583472728729248, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.754170450794283, "step_time": 0.4288678951263427} +{"epoch": 0, "iter": 7454, "iter_tflops": 3.43373298745526, "iter_time": 0.5937884750366211, "loss": 0.7475664615631104, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 3.7930769753713376, "step_time": 0.5375348529815673} +{"epoch": 0, "iter": 7455, "iter_tflops": 3.7414393690462933, "iter_time": 0.5449536590576172, "loss": 0.030197251588106155, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.0794442842025465, "step_time": 0.49980118179321287} +{"epoch": 0, "iter": 7456, "iter_tflops": 4.196123823032937, "iter_time": 0.48590345764160153, "loss": 0.865981936454773, "lr": 3e-05, "seqlen": 832.0, "step_tflops": 4.58369994512516, "step_time": 0.44481774520874023} +{"epoch": 0, "iter": 7457, "iter_tflops": 23.718170090941086, "iter_time": 0.869843391418457, "loss": 0.277648389339447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.426895494371657, "step_time": 0.811388614654541} +{"epoch": 0, "iter": 7458, "iter_tflops": 8.954171638064786, "iter_time": 2.304076171875, "loss": 0.25087621808052063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.16864096631621, "step_time": 1.847234016418457} +{"epoch": 0, "iter": 7459, "iter_tflops": 13.306637483676685, "iter_time": 1.5504362792968749, "loss": 0.2466074675321579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.997810081457274, "step_time": 1.2137500896453859} +{"epoch": 0, "iter": 7460, "iter_tflops": 49.023462678426164, "iter_time": 0.4208412132263184, "loss": 0.22200830280780792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.236546582544115, "step_time": 0.38753628540039065} +{"epoch": 0, "iter": 7461, "iter_tflops": 13.290086982617227, "iter_time": 1.1155733337402343, "loss": 0.26639240980148315, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 13.893996088249967, "step_time": 1.0670844116210936} +{"epoch": 0, "iter": 7462, "iter_tflops": 13.813650581824371, "iter_time": 1.073290985107422, "loss": 0.33872294425964355, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 16.895101608978383, "step_time": 0.8775363998413086} +{"epoch": 0, "iter": 7463, "iter_tflops": 26.439883507736198, "iter_time": 0.5607462921142579, "loss": 0.2514325976371765, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.212171946284357, "step_time": 0.5255202140808105} +{"epoch": 0, "iter": 7464, "iter_tflops": 24.778667739014285, "iter_time": 0.5983399429321289, "loss": 0.25967878103256226, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.290700748097482, "step_time": 0.563928165435791} +{"epoch": 0, "iter": 7465, "iter_tflops": 38.2228498924338, "iter_time": 0.5397581176757813, "loss": 0.6769324541091919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.25696600210658, "step_time": 0.5000632743835449} +{"epoch": 0, "iter": 7466, "iter_tflops": 42.58622475785834, "iter_time": 0.48445462417602536, "loss": 0.8047475814819336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.743986676519405, "step_time": 0.4510121440887452} +{"epoch": 0, "iter": 7467, "iter_tflops": 39.34440597880563, "iter_time": 0.524371711730957, "loss": 0.9227482676506042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1745041224655, "step_time": 0.4891840209960938} +{"epoch": 0, "iter": 7468, "iter_tflops": 42.234922319582545, "iter_time": 0.48848423004150393, "loss": 0.657323956489563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.357114082132185, "step_time": 0.45485904312133796} +{"epoch": 0, "iter": 7469, "iter_tflops": 28.50326690629504, "iter_time": 0.7238150482177734, "loss": 0.0655420571565628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.09028356371204, "step_time": 0.6856397171020507} +{"epoch": 0, "iter": 7470, "iter_tflops": 13.721174316658365, "iter_time": 1.5035953216552733, "loss": 0.18088273704051971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.60381504209863, "step_time": 1.1719671821594237} +{"epoch": 0, "iter": 7471, "iter_tflops": 45.18969569561733, "iter_time": 0.45654420089721676, "loss": 0.18670594692230225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30611933646347, "step_time": 0.4184286613464355} +{"epoch": 0, "iter": 7472, "iter_tflops": 51.23240305867529, "iter_time": 0.4026961898803711, "loss": 0.12313154339790344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.80726183248902, "step_time": 0.36968474769592286} +{"epoch": 0, "iter": 7473, "iter_tflops": 32.138349192213575, "iter_time": 0.6419462738037109, "loss": 0.7998465299606323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.318483867411786, "step_time": 0.6011656455993653} +{"epoch": 0, "iter": 7474, "iter_tflops": 13.645961433435003, "iter_time": 1.5118827362060547, "loss": 0.5355226993560791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.937405790494147, "step_time": 1.2945076370239257} +{"epoch": 0, "iter": 7475, "iter_tflops": 13.190112829533218, "iter_time": 1.5641332092285156, "loss": 0.6878919005393982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.372326575870355, "step_time": 1.342093105316162} +{"epoch": 0, "iter": 7476, "iter_tflops": 43.4438556350864, "iter_time": 0.47489094161987305, "loss": 0.6170291900634766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08151767021965, "step_time": 0.43819941520690914} +{"epoch": 0, "iter": 7477, "iter_tflops": 19.147115167150275, "iter_time": 0.755138198852539, "loss": 0.44990018010139465, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 20.3715016540242, "step_time": 0.709752197265625} +{"epoch": 0, "iter": 7478, "iter_tflops": 6.174014599491627, "iter_time": 2.341866516113281, "loss": 0.26743870973587036, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 7.0799217818536615, "step_time": 2.0422143783569338} +{"epoch": 0, "iter": 7479, "iter_tflops": 9.363791013200437, "iter_time": 1.5441094360351562, "loss": 0.2615853250026703, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 11.298446496587498, "step_time": 1.2797085037231444} +{"epoch": 0, "iter": 7480, "iter_tflops": 22.206465192417426, "iter_time": 0.6511039886474609, "loss": 0.21657194197177887, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 24.065358773447528, "step_time": 0.6008104095458985} +{"epoch": 0, "iter": 7481, "iter_tflops": 25.234137126688267, "iter_time": 0.6296804504394531, "loss": 0.4460482895374298, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.11673693537364, "step_time": 0.585964412689209} +{"epoch": 0, "iter": 7482, "iter_tflops": 25.379719497200348, "iter_time": 0.6260684967041015, "loss": 0.1692311018705368, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 27.13608605467937, "step_time": 0.5855465965270996} +{"epoch": 0, "iter": 7483, "iter_tflops": 28.76575610914947, "iter_time": 0.5523735504150391, "loss": 0.20506589114665985, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.692383439912803, "step_time": 0.5176998672485351} +{"epoch": 0, "iter": 7484, "iter_tflops": 27.495492321379892, "iter_time": 0.5778926467895508, "loss": 0.3184848427772522, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.28284299930555, "step_time": 0.542619541168213} +{"epoch": 0, "iter": 7485, "iter_tflops": 38.33011293610607, "iter_time": 0.5382476577758789, "loss": 0.18379881978034973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79548603186633, "step_time": 0.4936201362609863} +{"epoch": 0, "iter": 7486, "iter_tflops": 34.43323037240671, "iter_time": 0.5991623001098633, "loss": 0.14010125398635864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61345573349317, "step_time": 0.5342980346679688} +{"epoch": 0, "iter": 7487, "iter_tflops": 42.24042217826612, "iter_time": 0.4884206275939941, "loss": 0.11556984484195709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19944519177307, "step_time": 0.4465658283233642} +{"epoch": 0, "iter": 7488, "iter_tflops": 39.65929398428889, "iter_time": 0.5202082901000976, "loss": 0.11730043590068817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66457903173948, "step_time": 0.47249037933349614} +{"epoch": 0, "iter": 7489, "iter_tflops": 34.31087136343577, "iter_time": 0.589209716796875, "loss": 0.1936032772064209, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 38.18877266177218, "step_time": 0.5293780708312988} +{"epoch": 0, "iter": 7490, "iter_tflops": 35.20545929396238, "iter_time": 0.5742376098632813, "loss": 0.13611936569213867, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 38.52025174763608, "step_time": 0.5248226032257081} +{"epoch": 0, "iter": 7491, "iter_tflops": 43.47727923451235, "iter_time": 0.46498537063598633, "loss": 0.22845324873924255, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 47.576007630396745, "step_time": 0.42492634010314945} +{"epoch": 0, "iter": 7492, "iter_tflops": 43.53019443789603, "iter_time": 0.46442013549804684, "loss": 0.19108286499977112, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 47.46712996446316, "step_time": 0.42590101432800287} +{"epoch": 0, "iter": 7493, "iter_tflops": 20.05137067739503, "iter_time": 1.028911880493164, "loss": 0.04679643362760544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.166458174637683, "step_time": 0.9747069320678712} +{"epoch": 0, "iter": 7494, "iter_tflops": 16.517302505558263, "iter_time": 1.2490594940185547, "loss": 0.038726333528757095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.675065754537897, "step_time": 0.9518353366851806} +{"epoch": 0, "iter": 7495, "iter_tflops": 53.523442204919945, "iter_time": 0.38545901870727534, "loss": 0.04282159358263016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.70390998200313, "step_time": 0.3514432601928711} +{"epoch": 0, "iter": 7496, "iter_tflops": 52.60755602422808, "iter_time": 0.3921697769165039, "loss": 0.045949798077344894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.44324821829574, "step_time": 0.35915610885620114} +{"epoch": 0, "iter": 7497, "iter_tflops": 27.72279701567841, "iter_time": 0.6680629577636717, "loss": 0.018893098458647728, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 29.506815120691694, "step_time": 0.6276710548400879} +{"epoch": 0, "iter": 7498, "iter_tflops": 15.58952895269994, "iter_time": 1.188013687133789, "loss": 0.005974927917122841, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 19.283688614898626, "step_time": 0.9604269256591796} +{"epoch": 0, "iter": 7499, "iter_tflops": 48.07022112890746, "iter_time": 0.38528164291381833, "loss": 0.013450694270431995, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 53.09799396208509, "step_time": 0.3487998771667481} +{"epoch": 0, "iter": 7500, "iter_tflops": 50.342544474090104, "iter_time": 0.3678910942077636, "loss": 0.005339360795915127, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 55.10275061545755, "step_time": 0.3361097869873047} +{"epoch": 0, "iter": 7501, "iter_tflops": 33.824892360993736, "iter_time": 0.6099381866455078, "loss": 0.5599347352981567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.132555814044736, "step_time": 0.5709835090637208} +{"epoch": 0, "iter": 7502, "iter_tflops": 33.72473835848752, "iter_time": 0.6117495498657226, "loss": 0.7262963056564331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.59773068761788, "step_time": 0.5637260322570801} +{"epoch": 0, "iter": 7503, "iter_tflops": 33.14972058854654, "iter_time": 0.6223610076904297, "loss": 0.6989609003067017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.92938422943392, "step_time": 0.5742122764587402} +{"epoch": 0, "iter": 7504, "iter_tflops": 33.677816864023974, "iter_time": 0.6126018676757813, "loss": 0.9108777642250061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.97338696604888, "step_time": 0.5579984741210937} +{"epoch": 0, "iter": 7505, "iter_tflops": 2.1233162918379627, "iter_time": 0.7381192703247071, "loss": 3.0912883281707764, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 2.3029603642950183, "step_time": 0.6805417480468751} +{"epoch": 0, "iter": 7506, "iter_tflops": 0.6526090926649929, "iter_time": 2.401530548095703, "loss": 2.9929447174072266, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 0.7642332868209352, "step_time": 2.0507621154785154} +{"epoch": 0, "iter": 7507, "iter_tflops": 0.9547429707305158, "iter_time": 1.6415524597167968, "loss": 4.041251182556152, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.2710931507108971, "step_time": 1.2330022163391112} +{"epoch": 0, "iter": 7508, "iter_tflops": 2.918034536379869, "iter_time": 0.5370946273803712, "loss": 3.946732521057129, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.2028271081053945, "step_time": 0.48933664512634273} +{"epoch": 0, "iter": 7509, "iter_tflops": 13.813242651168235, "iter_time": 1.0969849700927734, "loss": 0.21882474422454834, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.969910276275137, "step_time": 1.012225143432617} +{"epoch": 0, "iter": 7510, "iter_tflops": 16.70385137884976, "iter_time": 0.9071512451171875, "loss": 0.293521523475647, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 19.995684832417055, "step_time": 0.757809482574463} +{"epoch": 0, "iter": 7511, "iter_tflops": 25.402453897680697, "iter_time": 0.5965140075683595, "loss": 0.2634270489215851, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.018998184377303, "step_time": 0.5608246269226074} +{"epoch": 0, "iter": 7512, "iter_tflops": 26.963062829003864, "iter_time": 0.5619880676269531, "loss": 0.27965834736824036, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.66171048675188, "step_time": 0.5286816215515137} +{"epoch": 0, "iter": 7513, "iter_tflops": 31.521390881832662, "iter_time": 0.6545108871459961, "loss": 0.41664183139801025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.54683414422437, "step_time": 0.6149937553405762} +{"epoch": 0, "iter": 7514, "iter_tflops": 11.37398523553908, "iter_time": 1.8138843231201172, "loss": 0.4612484276294708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.851952699328699, "step_time": 1.4893996505737306} +{"epoch": 0, "iter": 7515, "iter_tflops": 38.520057474159785, "iter_time": 0.5355935287475586, "loss": 0.48403671383857727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13622894500853, "step_time": 0.48962837982177737} +{"epoch": 0, "iter": 7516, "iter_tflops": 35.01443047964035, "iter_time": 0.5892168807983399, "loss": 0.36221837997436523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.21142597537448, "step_time": 0.5399194869995118} +{"epoch": 0, "iter": 7517, "iter_tflops": 29.77286233992241, "iter_time": 0.6929496154785155, "loss": 0.7425752878189087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.251584624653965, "step_time": 0.6396923980712891} +{"epoch": 0, "iter": 7518, "iter_tflops": 10.639860117664107, "iter_time": 1.939038040161133, "loss": 0.8396986126899719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.777872843605635, "step_time": 1.7516824798583988} +{"epoch": 0, "iter": 7519, "iter_tflops": 13.40651563322459, "iter_time": 1.5388855743408203, "loss": 0.8945326805114746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.640712732993803, "step_time": 1.3190635147094727} +{"epoch": 0, "iter": 7520, "iter_tflops": 45.29000400428899, "iter_time": 0.4555330467224121, "loss": 0.9101290702819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.121939431109325, "step_time": 0.4199975357055664} +{"epoch": 0, "iter": 7521, "iter_tflops": 18.812083315068595, "iter_time": 0.7382513885498047, "loss": 0.21405549347400665, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 19.90383344443498, "step_time": 0.6977573776245116} +{"epoch": 0, "iter": 7522, "iter_tflops": 9.48539872418986, "iter_time": 1.4641500091552735, "loss": 0.26912686228752136, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 11.963242648452566, "step_time": 1.160893165588379} +{"epoch": 0, "iter": 7523, "iter_tflops": 19.155431308079898, "iter_time": 0.7250187377929687, "loss": 0.29285451769828796, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 20.722459182237163, "step_time": 0.6701929779052734} +{"epoch": 0, "iter": 7524, "iter_tflops": 21.419269933415034, "iter_time": 0.6483902893066406, "loss": 0.20699815452098846, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 23.053910144958827, "step_time": 0.60241609954834} +{"epoch": 0, "iter": 7525, "iter_tflops": 35.968763501615356, "iter_time": 0.5735836181640624, "loss": 0.6443581581115723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86948197291525, "step_time": 0.5174658031463624} +{"epoch": 0, "iter": 7526, "iter_tflops": 38.62834577605321, "iter_time": 0.5340920791625977, "loss": 0.625908613204956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41670838775844, "step_time": 0.4863907241821289} +{"epoch": 0, "iter": 7527, "iter_tflops": 37.1772401830706, "iter_time": 0.5549388122558594, "loss": 0.4414930045604706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.6945316487638, "step_time": 0.5069745903015137} +{"epoch": 0, "iter": 7528, "iter_tflops": 36.54129443616843, "iter_time": 0.5645966796874999, "loss": 0.5756173729896545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81891611932724, "step_time": 0.5181229305267333} +{"epoch": 0, "iter": 7529, "iter_tflops": 11.92480208888568, "iter_time": 1.246721466064453, "loss": 0.12921833992004395, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 12.417517024934082, "step_time": 1.1972527770996093} +{"epoch": 0, "iter": 7530, "iter_tflops": 21.24987463978384, "iter_time": 0.6996232681274414, "loss": 0.1553180068731308, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.56242297524911, "step_time": 0.5596969356536866} +{"epoch": 0, "iter": 7531, "iter_tflops": 36.74011730361494, "iter_time": 0.4046504974365234, "loss": 0.12967774271965027, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 39.95957873489308, "step_time": 0.3720486354827881} +{"epoch": 0, "iter": 7532, "iter_tflops": 36.114400194594374, "iter_time": 0.4116614608764648, "loss": 0.20084218680858612, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 39.42208822207359, "step_time": 0.37712123870849606} +{"epoch": 0, "iter": 7533, "iter_tflops": 31.620148314601117, "iter_time": 0.6524666900634766, "loss": 0.2599562108516693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.75644637184211, "step_time": 0.6111749229431154} +{"epoch": 0, "iter": 7534, "iter_tflops": 17.111969554135293, "iter_time": 1.2056527709960938, "loss": 0.31450003385543823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.885417430114046, "step_time": 0.9878228950500489} +{"epoch": 0, "iter": 7535, "iter_tflops": 48.5017674092289, "iter_time": 0.4253678703308105, "loss": 0.2183278501033783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.7858104561382, "step_time": 0.39084544372558594} +{"epoch": 0, "iter": 7536, "iter_tflops": 50.674470387506055, "iter_time": 0.40712992858886726, "loss": 0.2641839385032654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.776387302636515, "step_time": 0.3766420993804932} +{"epoch": 0, "iter": 7537, "iter_tflops": 24.27439712755218, "iter_time": 0.8499116744995117, "loss": 0.26863718032836914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.470854556849435, "step_time": 0.8099882736206055} +{"epoch": 0, "iter": 7538, "iter_tflops": 14.889032062032147, "iter_time": 1.3856571350097657, "loss": 0.28637510538101196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.110668827594036, "step_time": 1.1391679515838624} +{"epoch": 0, "iter": 7539, "iter_tflops": 38.11539950447808, "iter_time": 0.5412797393798828, "loss": 0.28707167506217957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.85336824156912, "step_time": 0.4929374713897705} +{"epoch": 0, "iter": 7540, "iter_tflops": 43.58865745200957, "iter_time": 0.4733133506774903, "loss": 0.3908170163631439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.79772490898663, "step_time": 0.431633378982544} +{"epoch": 0, "iter": 7541, "iter_tflops": 35.54490958756885, "iter_time": 0.5804232940673828, "loss": 0.21787045896053314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49091917597928, "step_time": 0.5224262676239013} +{"epoch": 0, "iter": 7542, "iter_tflops": 37.55991769362631, "iter_time": 0.5492848434448242, "loss": 0.1340908259153366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.05829296334566, "step_time": 0.5024829826354981} +{"epoch": 0, "iter": 7543, "iter_tflops": 38.1600480822483, "iter_time": 0.5406464233398438, "loss": 0.2052985578775406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1227189920431, "step_time": 0.48978541755676275} +{"epoch": 0, "iter": 7544, "iter_tflops": 44.522624843961324, "iter_time": 0.4633844833374023, "loss": 0.14244559407234192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57724518048855, "step_time": 0.42470694732666014} +{"epoch": 0, "iter": 7545, "iter_tflops": 23.031178518066568, "iter_time": 0.7219558868408203, "loss": 0.035476792603731155, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 25.122166886727765, "step_time": 0.6618654747009277} +{"epoch": 0, "iter": 7546, "iter_tflops": 31.89665918428617, "iter_time": 0.5212926788330078, "loss": 0.053083959966897964, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 35.81545566378576, "step_time": 0.4642547359466553} +{"epoch": 0, "iter": 7547, "iter_tflops": 32.20193124655047, "iter_time": 0.5163508605957031, "loss": 0.02980833873152733, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 35.45072837328469, "step_time": 0.4690311222076416} +{"epoch": 0, "iter": 7548, "iter_tflops": 32.48867214352123, "iter_time": 0.5117936134338379, "loss": 0.03465087339282036, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 35.51752372093285, "step_time": 0.4681490478515625} +{"epoch": 0, "iter": 7549, "iter_tflops": 18.13465376285795, "iter_time": 1.1376612854003907, "loss": 0.6099212169647217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.344185804548584, "step_time": 1.0665268478393555} +{"epoch": 0, "iter": 7550, "iter_tflops": 24.36589092014203, "iter_time": 0.8467202606201173, "loss": 0.7898847460746765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.745483927559, "step_time": 0.6300439338684083} +{"epoch": 0, "iter": 7551, "iter_tflops": 37.03880171059906, "iter_time": 0.5570129852294923, "loss": 0.925686240196228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32692296879086, "step_time": 0.511596025466919} +{"epoch": 0, "iter": 7552, "iter_tflops": 35.91052722074814, "iter_time": 0.5745138015747069, "loss": 0.7781568169593811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.068033845214515, "step_time": 0.5280811824798584} +{"epoch": 0, "iter": 7553, "iter_tflops": 25.517111701929643, "iter_time": 0.8085199356079102, "loss": 0.4569053053855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.5509071437933, "step_time": 0.7488353614807128} +{"epoch": 0, "iter": 7554, "iter_tflops": 13.144843939374576, "iter_time": 1.5695198516845703, "loss": 0.4534045457839966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.808469189447765, "step_time": 1.3050658645629882} +{"epoch": 0, "iter": 7555, "iter_tflops": 12.835964446700416, "iter_time": 1.6072881469726563, "loss": 0.47614723443984985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.03216914196736, "step_time": 1.3724628372192385} +{"epoch": 0, "iter": 7556, "iter_tflops": 38.99847761360737, "iter_time": 0.529023048400879, "loss": 0.443557471036911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63126831949718, "step_time": 0.483942756652832} +{"epoch": 0, "iter": 7557, "iter_tflops": 23.62920914749372, "iter_time": 0.6863233184814452, "loss": 0.14542672038078308, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 25.145270190422856, "step_time": 0.6449434471130371} +{"epoch": 0, "iter": 7558, "iter_tflops": 9.939603487852835, "iter_time": 1.6315819091796877, "loss": 0.2722233533859253, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 12.151898425901793, "step_time": 1.3345468063354493} +{"epoch": 0, "iter": 7559, "iter_tflops": 28.26752543457943, "iter_time": 0.5737070007324218, "loss": 0.24103128910064697, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.08074298709747, "step_time": 0.5391248893737793} +{"epoch": 0, "iter": 7560, "iter_tflops": 28.306726961480198, "iter_time": 0.5729124832153321, "loss": 0.4857832193374634, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 29.968683024106188, "step_time": 0.5411408042907715} +{"epoch": 0, "iter": 7561, "iter_tflops": 33.261179692284436, "iter_time": 0.6202754592895507, "loss": 0.31418755650520325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.7756343498302, "step_time": 0.5766800193786621} +{"epoch": 0, "iter": 7562, "iter_tflops": 17.53551007073591, "iter_time": 1.1765322723388671, "loss": 0.37045833468437195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.082820262540128, "step_time": 0.9785737037658692} +{"epoch": 0, "iter": 7563, "iter_tflops": 38.694149775886096, "iter_time": 0.5331837921142578, "loss": 0.4340595006942749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33691701681474, "step_time": 0.48730741310119635} +{"epoch": 0, "iter": 7564, "iter_tflops": 39.35189212209413, "iter_time": 0.5242719573974609, "loss": 0.5091841220855713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.18643957279804, "step_time": 0.477721565246582} +{"epoch": 0, "iter": 7565, "iter_tflops": 30.89834980685774, "iter_time": 0.6677085876464844, "loss": 0.24530838429927826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.488318607010754, "step_time": 0.5982052574157715} +{"epoch": 0, "iter": 7566, "iter_tflops": 39.10292127319616, "iter_time": 0.5276100311279297, "loss": 0.09737207740545273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1962219606744, "step_time": 0.4776133785247803} +{"epoch": 0, "iter": 7567, "iter_tflops": 41.32675850395973, "iter_time": 0.4992187690734863, "loss": 0.1054961085319519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.52102073756569, "step_time": 0.4532212409973144} +{"epoch": 0, "iter": 7568, "iter_tflops": 38.944077865865275, "iter_time": 0.5297620239257813, "loss": 0.17516684532165527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73712552377418, "step_time": 0.48274406051635743} +{"epoch": 0, "iter": 7569, "iter_tflops": 19.84452702422035, "iter_time": 1.0396364440917967, "loss": 0.8284134864807129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.408228946065034, "step_time": 0.96369921875} +{"epoch": 0, "iter": 7570, "iter_tflops": 16.250532744984763, "iter_time": 1.2695641326904297, "loss": 0.8812986016273499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.628757576632218, "step_time": 0.9538732604980469} +{"epoch": 0, "iter": 7571, "iter_tflops": 37.996668047406594, "iter_time": 0.5429711227416992, "loss": 0.9588671326637268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.290890319436656, "step_time": 0.4996524257659912} +{"epoch": 0, "iter": 7572, "iter_tflops": 35.76878667836908, "iter_time": 0.5767904205322266, "loss": 0.5201996564865112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.48805459832988, "step_time": 0.536038875579834} +{"epoch": 0, "iter": 7573, "iter_tflops": 18.20679619926344, "iter_time": 1.1331534271240233, "loss": 0.7840652465820312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.64134886951492, "step_time": 1.050390869140625} +{"epoch": 0, "iter": 7574, "iter_tflops": 14.1078161456755, "iter_time": 1.462387466430664, "loss": 0.7055043578147888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.98169397501669, "step_time": 1.0868942222595215} +{"epoch": 0, "iter": 7575, "iter_tflops": 46.5890758605271, "iter_time": 0.44283113861083984, "loss": 0.9413911700248718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20578943888033, "step_time": 0.4109305667877198} +{"epoch": 0, "iter": 7576, "iter_tflops": 45.8424858765512, "iter_time": 0.4500430793762207, "loss": 0.6371822357177734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.284565761916234, "step_time": 0.41861165237426756} +{"epoch": 0, "iter": 7577, "iter_tflops": 28.72677353633297, "iter_time": 0.7181834564208984, "loss": 0.7803253531455994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.508173945883108, "step_time": 0.6762480621337892} +{"epoch": 0, "iter": 7578, "iter_tflops": 16.061319663617752, "iter_time": 1.2845204467773437, "loss": 0.8753986358642578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.72638491312569, "step_time": 1.1017125625610351} +{"epoch": 0, "iter": 7579, "iter_tflops": 45.50408939900728, "iter_time": 0.453389877319336, "loss": 0.7645103931427002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.256493296973936, "step_time": 0.4188502292633056} +{"epoch": 0, "iter": 7580, "iter_tflops": 50.94376477632833, "iter_time": 0.4049777946472168, "loss": 0.7366912364959717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.40855783336024, "step_time": 0.37234489250183106} +{"epoch": 0, "iter": 7581, "iter_tflops": 30.18572573965613, "iter_time": 0.6834718399047852, "loss": 1.1121127605438232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18623060614852, "step_time": 0.6409912910461426} +{"epoch": 0, "iter": 7582, "iter_tflops": 10.527386801072934, "iter_time": 1.9597544860839844, "loss": 0.7307400107383728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.067292659056637, "step_time": 1.5788345794677734} +{"epoch": 0, "iter": 7583, "iter_tflops": 14.599596775466562, "iter_time": 1.4131276245117186, "loss": 0.782471776008606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.220227538780964, "step_time": 1.1980732231140137} +{"epoch": 0, "iter": 7584, "iter_tflops": 25.252584930498397, "iter_time": 0.816989372253418, "loss": 0.9036981463432312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.535015961365826, "step_time": 0.6542280979156494} +{"epoch": 0, "iter": 7585, "iter_tflops": 19.887632170862297, "iter_time": 0.7126673812866211, "loss": 0.25559961795806885, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 21.124592325995334, "step_time": 0.6709368171691894} +{"epoch": 0, "iter": 7586, "iter_tflops": 12.702334955798348, "iter_time": 1.115800109863281, "loss": 0.15931996703147888, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 14.84399446133142, "step_time": 0.9548148765563964} +{"epoch": 0, "iter": 7587, "iter_tflops": 25.112019462851176, "iter_time": 0.5644017105102539, "loss": 0.24922508001327515, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 26.87437571316021, "step_time": 0.5273896179199219} +{"epoch": 0, "iter": 7588, "iter_tflops": 25.714379377147917, "iter_time": 0.5511805877685547, "loss": 0.3035217821598053, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.344232344163355, "step_time": 0.5183274688720704} +{"epoch": 0, "iter": 7589, "iter_tflops": 26.264972363533435, "iter_time": 0.7854983901977539, "loss": 0.9322788119316101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.65513655488718, "step_time": 0.7460130767822265} +{"epoch": 0, "iter": 7590, "iter_tflops": 15.635205621599665, "iter_time": 1.3195281219482424, "loss": 0.827810525894165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.448294816269854, "step_time": 1.0608176040649415} +{"epoch": 0, "iter": 7591, "iter_tflops": 46.28186153689715, "iter_time": 0.44577060699462884, "loss": 1.0176587104797363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.886006622303356, "step_time": 0.4135647430419922} +{"epoch": 0, "iter": 7592, "iter_tflops": 47.30732109754969, "iter_time": 0.43610783767700195, "loss": 0.844703733921051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.937789946842514, "step_time": 0.405025297164917} +{"epoch": 0, "iter": 7593, "iter_tflops": 23.701487471716955, "iter_time": 0.8704556427001953, "loss": 0.7199938893318176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.817944705060764, "step_time": 0.8312974243164064} +{"epoch": 0, "iter": 7594, "iter_tflops": 13.029002537102848, "iter_time": 1.5834745178222658, "loss": 0.9222883582115173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.115388774130288, "step_time": 1.3649065742492676} +{"epoch": 0, "iter": 7595, "iter_tflops": 32.14841730540475, "iter_time": 0.6417452316284179, "loss": 0.7609357237815857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.22739084375809, "step_time": 0.5856548843383789} +{"epoch": 0, "iter": 7596, "iter_tflops": 33.698741614803204, "iter_time": 0.6122214813232423, "loss": 0.6030662059783936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.50219787720447, "step_time": 0.5652014045715332} +{"epoch": 0, "iter": 7597, "iter_tflops": 13.470173824105322, "iter_time": 1.1674414215087892, "loss": 0.041325654834508896, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.432144897903196, "step_time": 1.0896259002685547} +{"epoch": 0, "iter": 7598, "iter_tflops": 20.50848877175463, "iter_time": 0.7667868194580078, "loss": 0.021218180656433105, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.50830373109448, "step_time": 0.6164909687042237} +{"epoch": 0, "iter": 7599, "iter_tflops": 40.11070617016978, "iter_time": 0.3920558967590332, "loss": 0.021540839225053787, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 43.89011939168438, "step_time": 0.35829565048217776} +{"epoch": 0, "iter": 7600, "iter_tflops": 41.84705746682298, "iter_time": 0.37578840255737306, "loss": 0.0054612113162875175, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 46.12465311471754, "step_time": 0.34093782424926755} +{"epoch": 0, "iter": 7601, "iter_tflops": 33.15317920529033, "iter_time": 0.6222960815429688, "loss": 0.24810269474983215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.49803350021675, "step_time": 0.5811897583007812} +{"epoch": 0, "iter": 7602, "iter_tflops": 13.983655806066162, "iter_time": 1.4753719482421874, "loss": 0.10218476504087448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.185589652520513, "step_time": 1.2004879627227782} +{"epoch": 0, "iter": 7603, "iter_tflops": 41.66908690007406, "iter_time": 0.49511748504638675, "loss": 0.10620525479316711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73678322796956, "step_time": 0.4510831775665283} +{"epoch": 0, "iter": 7604, "iter_tflops": 42.52363746294391, "iter_time": 0.4851676559448242, "loss": 0.2014741599559784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.37973772668195, "step_time": 0.44482988739013674} +{"epoch": 0, "iter": 7605, "iter_tflops": 19.67377590795911, "iter_time": 1.0486595764160154, "loss": 0.5845127701759338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.016965980453218, "step_time": 0.9816399536132813} +{"epoch": 0, "iter": 7606, "iter_tflops": 19.63603355474563, "iter_time": 1.0506752014160157, "loss": 0.532634973526001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.65437978157894, "step_time": 0.8368125133514405} +{"epoch": 0, "iter": 7607, "iter_tflops": 48.15438130471688, "iter_time": 0.4284364776611329, "loss": 0.5446441769599915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.056889288392576, "step_time": 0.3963182163238525} +{"epoch": 0, "iter": 7608, "iter_tflops": 49.27812940558657, "iter_time": 0.41866632843017576, "loss": 0.5652632713317871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.970305525901175, "step_time": 0.38948413276672367} +{"epoch": 0, "iter": 7609, "iter_tflops": 32.45134484283941, "iter_time": 0.635754653930664, "loss": 0.7129547595977783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.51724345388828, "step_time": 0.5977039718627929} +{"epoch": 0, "iter": 7610, "iter_tflops": 12.479952776495916, "iter_time": 1.653138748168945, "loss": 0.6269761323928833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.716202010781311, "step_time": 1.4019305725097657} +{"epoch": 0, "iter": 7611, "iter_tflops": 22.068239631460447, "iter_time": 0.9348771743774413, "loss": 0.7554828524589539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.913359248001356, "step_time": 0.828113676071167} +{"epoch": 0, "iter": 7612, "iter_tflops": 35.105087072014655, "iter_time": 0.5876952667236328, "loss": 0.5082489252090454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.09103672268395, "step_time": 0.5416259384155273} +{"epoch": 0, "iter": 7613, "iter_tflops": 12.271057912002123, "iter_time": 1.2082142181396485, "loss": 0.22146430611610413, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 13.073244143267003, "step_time": 1.1340770874023438} +{"epoch": 0, "iter": 7614, "iter_tflops": 12.131332870662836, "iter_time": 1.2221300659179686, "loss": 0.24300208687782288, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.950336132469518, "step_time": 0.9295143699645996} +{"epoch": 0, "iter": 7615, "iter_tflops": 22.87038017451084, "iter_time": 0.6482649841308594, "loss": 0.19526465237140656, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.638792123861133, "step_time": 0.6017367477416992} +{"epoch": 0, "iter": 7616, "iter_tflops": 23.47216898845161, "iter_time": 0.6316445083618164, "loss": 0.23780135810375214, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.199193934236924, "step_time": 0.5883547973632812} +{"epoch": 0, "iter": 7617, "iter_tflops": 14.799204522459037, "iter_time": 1.3940677337646485, "loss": 0.08221250027418137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.548675757850944, "step_time": 1.3268714218139648} +{"epoch": 0, "iter": 7618, "iter_tflops": 19.241722308044597, "iter_time": 1.0722061767578124, "loss": 0.06440865248441696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.486892199577344, "step_time": 0.7505793437957764} +{"epoch": 0, "iter": 7619, "iter_tflops": 47.62361225336814, "iter_time": 0.4332114372253418, "loss": 0.10152674466371536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26668082065307, "step_time": 0.3947274475097656} +{"epoch": 0, "iter": 7620, "iter_tflops": 39.90435099627741, "iter_time": 0.5170136337280273, "loss": 0.08238878101110458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.08273149595688, "step_time": 0.4680085105895996} +{"epoch": 0, "iter": 7621, "iter_tflops": 23.336747730913604, "iter_time": 0.8840603561401368, "loss": 0.6177192330360413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.9008425287767, "step_time": 0.8285299377441406} +{"epoch": 0, "iter": 7622, "iter_tflops": 14.778601227808874, "iter_time": 1.3960112457275393, "loss": 0.5939931273460388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.347270002637224, "step_time": 1.1892991523742675} +{"epoch": 0, "iter": 7623, "iter_tflops": 45.2449296705428, "iter_time": 0.4559868621826172, "loss": 0.8010072112083435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.94702490663836, "step_time": 0.4214984169006347} +{"epoch": 0, "iter": 7624, "iter_tflops": 44.38132985317462, "iter_time": 0.46485974121093754, "loss": 0.5715487003326416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49614230325779, "step_time": 0.4343740882873536} +{"epoch": 0, "iter": 7625, "iter_tflops": 18.673180760681376, "iter_time": 1.1048515930175782, "loss": 0.10517860949039459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.416962567532334, "step_time": 1.062529396057129} +{"epoch": 0, "iter": 7626, "iter_tflops": 15.822516240746262, "iter_time": 1.303907241821289, "loss": 0.05678384751081467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.030407867793564, "step_time": 1.0299886875152588} +{"epoch": 0, "iter": 7627, "iter_tflops": 50.19940773858462, "iter_time": 0.4109828071594238, "loss": 0.11020588874816895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.74560074103045, "step_time": 0.3768539066314697} +{"epoch": 0, "iter": 7628, "iter_tflops": 55.01992205659715, "iter_time": 0.37497496795654295, "loss": 0.08113516122102737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.86047922345316, "step_time": 0.3446529960632324} +{"epoch": 0, "iter": 7629, "iter_tflops": 17.247654128606246, "iter_time": 1.1961680908203123, "loss": 0.45900195837020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.854578506012817, "step_time": 1.1555071716308596} +{"epoch": 0, "iter": 7630, "iter_tflops": 17.010135851816266, "iter_time": 1.2128705902099608, "loss": 0.5631473660469055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.820065019143577, "step_time": 0.945510175704956} +{"epoch": 0, "iter": 7631, "iter_tflops": 38.74228390845264, "iter_time": 0.532521354675293, "loss": 0.6534268260002136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43872899936893, "step_time": 0.48613834571838377} +{"epoch": 0, "iter": 7632, "iter_tflops": 38.76092950079606, "iter_time": 0.5322651901245117, "loss": 0.5654187202453613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45711817462934, "step_time": 0.48592778778076173} +{"epoch": 0, "iter": 7633, "iter_tflops": 15.365040335248208, "iter_time": 1.342729537963867, "loss": 0.5623266100883484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.39235386523446, "step_time": 1.2585802917480469} +{"epoch": 0, "iter": 7634, "iter_tflops": 16.836189520497946, "iter_time": 1.2254015960693359, "loss": 0.48835089802742004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.32157873052585, "step_time": 0.9676156616210936} +{"epoch": 0, "iter": 7635, "iter_tflops": 40.36821727696518, "iter_time": 0.5110726928710938, "loss": 0.4904917776584625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20487393090234, "step_time": 0.4667153568267822} +{"epoch": 0, "iter": 7636, "iter_tflops": 38.81272314569563, "iter_time": 0.5315549087524414, "loss": 0.6951151490211487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.047004993155724, "step_time": 0.4906673736572265} +{"epoch": 0, "iter": 7637, "iter_tflops": 15.620891928092403, "iter_time": 1.3207372283935546, "loss": 0.2611178159713745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.652247708521898, "step_time": 1.2389374618530273} +{"epoch": 0, "iter": 7638, "iter_tflops": 17.404867280044936, "iter_time": 1.1853634490966796, "loss": 0.25386783480644226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.718861974819784, "step_time": 0.9081041793823241} +{"epoch": 0, "iter": 7639, "iter_tflops": 43.37968573072074, "iter_time": 0.4755934295654297, "loss": 0.2105669528245926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.559695075337316, "step_time": 0.44311058044433593} +{"epoch": 0, "iter": 7640, "iter_tflops": 44.02226931430419, "iter_time": 0.46865129470825195, "loss": 0.31390026211738586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.615289708086685, "step_time": 0.4332871570587158} +{"epoch": 0, "iter": 7641, "iter_tflops": 32.22689799583573, "iter_time": 0.6401824188232421, "loss": 0.6030048131942749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.55579119523207, "step_time": 0.5970372200012206} +{"epoch": 0, "iter": 7642, "iter_tflops": 8.941232071010361, "iter_time": 2.307410583496094, "loss": 0.8321064114570618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.596881528523589, "step_time": 1.6377937240600584} +{"epoch": 0, "iter": 7643, "iter_tflops": 10.883287051469958, "iter_time": 1.895667495727539, "loss": 0.5538055896759033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.193249679434656, "step_time": 1.563761318206787} +{"epoch": 0, "iter": 7644, "iter_tflops": 37.27809309593428, "iter_time": 0.5534374694824218, "loss": 0.5563936829566956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.31262093285582, "step_time": 0.4993896064758301} +{"epoch": 0, "iter": 7645, "iter_tflops": 12.611106373095597, "iter_time": 1.1626857299804687, "loss": 0.1422056406736374, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.399065316540858, "step_time": 1.0943116607666015} +{"epoch": 0, "iter": 7646, "iter_tflops": 16.833077854687286, "iter_time": 0.8710678787231445, "loss": 0.2377568781375885, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.785688438961376, "step_time": 0.741078758239746} +{"epoch": 0, "iter": 7647, "iter_tflops": 22.607812216256598, "iter_time": 0.6485702056884766, "loss": 0.1455473154783249, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.320565413449593, "step_time": 0.6028952522277833} +{"epoch": 0, "iter": 7648, "iter_tflops": 22.328459163818515, "iter_time": 0.6566845169067382, "loss": 0.39117005467414856, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.006470024418107, "step_time": 0.6107834014892577} +{"epoch": 0, "iter": 7649, "iter_tflops": 18.69490047120134, "iter_time": 1.1035679779052736, "loss": 0.46426922082901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.064059421163563, "step_time": 1.0282611846923828} +{"epoch": 0, "iter": 7650, "iter_tflops": 18.663388143707245, "iter_time": 1.1054313049316407, "loss": 0.4830106198787689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.91617855344053, "step_time": 0.9002850742340088} +{"epoch": 0, "iter": 7651, "iter_tflops": 46.381213372510736, "iter_time": 0.44481573486328124, "loss": 0.5259147882461548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.23524166872786, "step_time": 0.4106896438598633} +{"epoch": 0, "iter": 7652, "iter_tflops": 49.2573382603494, "iter_time": 0.4188430442810059, "loss": 0.6187169551849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.401530733210805, "step_time": 0.3863389911651611} +{"epoch": 0, "iter": 7653, "iter_tflops": 28.347519996608515, "iter_time": 0.7277918319702148, "loss": 0.6786127090454102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.987757512924034, "step_time": 0.687983871459961} +{"epoch": 0, "iter": 7654, "iter_tflops": 15.4456014340262, "iter_time": 1.3357261352539063, "loss": 0.8221737146377563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.672277286943334, "step_time": 1.1049050521850585} +{"epoch": 0, "iter": 7655, "iter_tflops": 32.89496658159481, "iter_time": 0.6271808624267579, "loss": 0.7150232195854187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.64324785099376, "step_time": 0.5788219299316406} +{"epoch": 0, "iter": 7656, "iter_tflops": 40.11538264271272, "iter_time": 0.5142938232421874, "loss": 0.8349719047546387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.443201490939664, "step_time": 0.47489809226989743} +{"epoch": 0, "iter": 7657, "iter_tflops": 18.356332308871124, "iter_time": 1.1239224243164063, "loss": 0.2865085005760193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.477722193620284, "step_time": 1.0592148971557618} +{"epoch": 0, "iter": 7658, "iter_tflops": 20.351650069830853, "iter_time": 1.0137307510375977, "loss": 0.30579328536987305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.719735939070205, "step_time": 0.8346000766754151} +{"epoch": 0, "iter": 7659, "iter_tflops": 41.8385417246321, "iter_time": 0.4931121559143067, "loss": 0.3674532175064087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88956547005368, "step_time": 0.44958136558532713} +{"epoch": 0, "iter": 7660, "iter_tflops": 39.87199964497123, "iter_time": 0.5174331283569336, "loss": 0.2586982846260071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71846553895481, "step_time": 0.4719079971313477} +{"epoch": 0, "iter": 7661, "iter_tflops": 29.229935397760087, "iter_time": 0.705820701599121, "loss": 0.07513268291950226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.713787486097324, "step_time": 0.6505401954650878} +{"epoch": 0, "iter": 7662, "iter_tflops": 9.479898132137745, "iter_time": 2.1762990722656252, "loss": 0.08675079047679901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.156797108162841, "step_time": 1.849195007324219} +{"epoch": 0, "iter": 7663, "iter_tflops": 19.49548844753613, "iter_time": 1.0582496337890626, "loss": 0.05559835210442543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.101128788304507, "step_time": 0.8560218772888184} +{"epoch": 0, "iter": 7664, "iter_tflops": 40.985823830358896, "iter_time": 0.5033714485168457, "loss": 0.10551097989082336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.05845952153799, "step_time": 0.4578739204406738} +{"epoch": 0, "iter": 7665, "iter_tflops": 17.415832631101544, "iter_time": 0.9429538879394532, "loss": 0.2660185992717743, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 18.603840909665603, "step_time": 0.882738525390625} +{"epoch": 0, "iter": 7666, "iter_tflops": 7.635631329456826, "iter_time": 2.1507490844726562, "loss": 0.2657983899116516, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 8.403414607180892, "step_time": 1.9542445373535158} +{"epoch": 0, "iter": 7667, "iter_tflops": 7.527047857853694, "iter_time": 2.1817752990722656, "loss": 0.19701677560806274, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 9.91148065288154, "step_time": 1.6568994750976564} +{"epoch": 0, "iter": 7668, "iter_tflops": 25.33801124412708, "iter_time": 0.6481300735473632, "loss": 0.2262016236782074, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.302880923027903, "step_time": 0.6014869689941407} +{"epoch": 0, "iter": 7669, "iter_tflops": 16.78593090242357, "iter_time": 1.022373046875, "loss": 0.2983472943305969, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 18.11844512352967, "step_time": 0.9471830062866211} +{"epoch": 0, "iter": 7670, "iter_tflops": 10.981902374226246, "iter_time": 1.5627058715820312, "loss": 0.17059773206710815, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 14.844105407169078, "step_time": 1.1561143531799316} +{"epoch": 0, "iter": 7671, "iter_tflops": 25.070719628000322, "iter_time": 0.684522964477539, "loss": 0.22236056625843048, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 26.93478293295846, "step_time": 0.6371494941711425} +{"epoch": 0, "iter": 7672, "iter_tflops": 27.174479286617817, "iter_time": 0.631529426574707, "loss": 0.3203815817832947, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 29.198192638789134, "step_time": 0.5877584114074706} +{"epoch": 0, "iter": 7673, "iter_tflops": 24.437099403397408, "iter_time": 0.8442529602050781, "loss": 0.7519174218177795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.30031449412269, "step_time": 0.7844428443908692} +{"epoch": 0, "iter": 7674, "iter_tflops": 8.056404489432493, "iter_time": 2.5608313903808595, "loss": 0.9962704181671143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.126960147531877, "step_time": 2.2604561843872073} +{"epoch": 0, "iter": 7675, "iter_tflops": 14.990330964432356, "iter_time": 1.376293395996094, "loss": 0.7387847304344177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.177379317882348, "step_time": 1.1349872360229492} +{"epoch": 0, "iter": 7676, "iter_tflops": 33.8681976267902, "iter_time": 0.6091582946777344, "loss": 0.6016941666603088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.012610589203796, "step_time": 0.5574071426391601} +{"epoch": 0, "iter": 7677, "iter_tflops": 11.30555548085705, "iter_time": 1.3366920776367188, "loss": 0.2654312252998352, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 11.87638842485191, "step_time": 1.272444610595703} +{"epoch": 0, "iter": 7678, "iter_tflops": 11.021915833677069, "iter_time": 1.3710907135009767, "loss": 0.1624312698841095, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 12.765044717162843, "step_time": 1.1838616142272949} +{"epoch": 0, "iter": 7679, "iter_tflops": 21.37204106414462, "iter_time": 0.7070942077636719, "loss": 0.22307394444942474, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 22.857657105607807, "step_time": 0.6611371574401855} +{"epoch": 0, "iter": 7680, "iter_tflops": 22.882202315363934, "iter_time": 0.6604279708862304, "loss": 0.1976693868637085, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.66244723738296, "step_time": 0.6127553482055664} +{"epoch": 0, "iter": 7681, "iter_tflops": 18.102971401337435, "iter_time": 1.139652328491211, "loss": 0.07076887041330338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.395884972407927, "step_time": 1.0636840515136718} +{"epoch": 0, "iter": 7682, "iter_tflops": 13.348251013591076, "iter_time": 1.545602752685547, "loss": 0.09921605885028839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.201662210661866, "step_time": 1.1993662738800048} +{"epoch": 0, "iter": 7683, "iter_tflops": 40.71687010925161, "iter_time": 0.5066964492797852, "loss": 0.0812561884522438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.846138591363804, "step_time": 0.4600416927337646} +{"epoch": 0, "iter": 7684, "iter_tflops": 47.4466628962164, "iter_time": 0.4348270721435547, "loss": 0.09254733473062515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.13749701229032, "step_time": 0.39570548439025877} +{"epoch": 0, "iter": 7685, "iter_tflops": 14.572675066412735, "iter_time": 0.8359193725585936, "loss": 0.008040080778300762, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 16.066156706965955, "step_time": 0.7582137794494629} +{"epoch": 0, "iter": 7686, "iter_tflops": 10.512213474706224, "iter_time": 1.1588027038574218, "loss": 0.005470317788422108, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 14.107443826673459, "step_time": 0.8634860820770263} +{"epoch": 0, "iter": 7687, "iter_tflops": 27.086237024077416, "iter_time": 0.44973324966430667, "loss": 0.004525665193796158, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 30.117043598245342, "step_time": 0.4044746742248535} +{"epoch": 0, "iter": 7688, "iter_tflops": 28.63256835812541, "iter_time": 0.4254449424743652, "loss": 0.000924602325540036, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 31.634419040644843, "step_time": 0.3850736560821533} +{"epoch": 0, "iter": 7689, "iter_tflops": 36.871910281311465, "iter_time": 0.559534164428711, "loss": 0.027378585189580917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96509553465185, "step_time": 0.5036261539459228} +{"epoch": 0, "iter": 7690, "iter_tflops": 48.13635375312729, "iter_time": 0.42859693145751954, "loss": 0.0219587329775095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.6861171542321, "step_time": 0.3842910346984863} +{"epoch": 0, "iter": 7691, "iter_tflops": 52.88657223626295, "iter_time": 0.3901007881164551, "loss": 0.032559171319007874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.65029592008921, "step_time": 0.3578662204742432} +{"epoch": 0, "iter": 7692, "iter_tflops": 54.39876603644169, "iter_time": 0.3792566452026367, "loss": 0.0332125760614872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.51422534437976, "step_time": 0.3466581878662109} +{"epoch": 0, "iter": 7693, "iter_tflops": 21.066071761861874, "iter_time": 0.9793517150878907, "loss": 0.30060046911239624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.007114933279553, "step_time": 0.9374737930297852} +{"epoch": 0, "iter": 7694, "iter_tflops": 12.254702194366587, "iter_time": 1.683524673461914, "loss": 0.30733388662338257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.357339294025616, "step_time": 1.3434028587341307} +{"epoch": 0, "iter": 7695, "iter_tflops": 34.620850681193055, "iter_time": 0.595915267944336, "loss": 0.28612416982650757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.94506004813429, "step_time": 0.5437096023559571} +{"epoch": 0, "iter": 7696, "iter_tflops": 42.39510093406958, "iter_time": 0.4866386222839355, "loss": 0.31424686312675476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.439450377593054, "step_time": 0.4442579174041748} +{"epoch": 0, "iter": 7697, "iter_tflops": 16.54081999680504, "iter_time": 1.2472835998535157, "loss": 0.058223649859428406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.43144449572392, "step_time": 1.1835561599731446} +{"epoch": 0, "iter": 7698, "iter_tflops": 16.250945062971336, "iter_time": 1.2695319213867187, "loss": 0.048602838069200516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.14958629526694, "step_time": 0.9754844951629638} +{"epoch": 0, "iter": 7699, "iter_tflops": 47.396081072962495, "iter_time": 0.4352911262512207, "loss": 0.03942161053419113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.345708879910454, "step_time": 0.4018075504302978} +{"epoch": 0, "iter": 7700, "iter_tflops": 52.690482644684785, "iter_time": 0.3915525627136231, "loss": 0.0508461594581604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.06626181883869, "step_time": 0.3615287361145019} +{"epoch": 0, "iter": 7701, "iter_tflops": 31.85825530093494, "iter_time": 0.6475901870727538, "loss": 0.835024356842041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83749013135203, "step_time": 0.6097111053466796} +{"epoch": 0, "iter": 7702, "iter_tflops": 9.029891436091354, "iter_time": 2.284755432128906, "loss": 0.6911998391151428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.710667235144603, "step_time": 1.7617350997924806} +{"epoch": 0, "iter": 7703, "iter_tflops": 16.91866250055868, "iter_time": 1.2194281616210938, "loss": 0.8702641129493713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.136639698866446, "step_time": 1.024554931640625} +{"epoch": 0, "iter": 7704, "iter_tflops": 38.129333094516284, "iter_time": 0.5410819396972655, "loss": 0.9136171340942383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69730963800607, "step_time": 0.4947823657989502} +{"epoch": 0, "iter": 7705, "iter_tflops": 21.872355674776518, "iter_time": 0.7133587112426758, "loss": 0.25360798835754395, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 23.806278737488466, "step_time": 0.6554084167480467} +{"epoch": 0, "iter": 7706, "iter_tflops": 26.102397994438785, "iter_time": 0.5977548675537109, "loss": 0.3315972089767456, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.77130623944826, "step_time": 0.5618329696655273} +{"epoch": 0, "iter": 7707, "iter_tflops": 26.531135163881608, "iter_time": 0.5880952835083009, "loss": 0.27946656942367554, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.11977026799088, "step_time": 0.5548706588745117} +{"epoch": 0, "iter": 7708, "iter_tflops": 29.011692232094987, "iter_time": 0.5378119735717772, "loss": 0.22973614931106567, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.79597393599096, "step_time": 0.5066517944335938} +{"epoch": 0, "iter": 7709, "iter_tflops": 26.491871024778234, "iter_time": 0.7787707214355469, "loss": 0.06564613431692123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.013550060970395, "step_time": 0.7364683685302734} +{"epoch": 0, "iter": 7710, "iter_tflops": 13.88468222256932, "iter_time": 1.4858887786865234, "loss": 0.0463205948472023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.026541448406906, "step_time": 1.1444842910766602} +{"epoch": 0, "iter": 7711, "iter_tflops": 52.9865119337393, "iter_time": 0.3893650054931641, "loss": 0.02432979829609394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.101388838996705, "step_time": 0.3550877857208252} +{"epoch": 0, "iter": 7712, "iter_tflops": 52.68620279223012, "iter_time": 0.39158436965942384, "loss": 0.05277971550822258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.417932351824135, "step_time": 0.35931446266174316} +{"epoch": 0, "iter": 7713, "iter_tflops": 28.07915198545747, "iter_time": 0.7347477416992186, "loss": 0.7273597121238708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.748165408944573, "step_time": 0.6935249023437501} +{"epoch": 0, "iter": 7714, "iter_tflops": 16.550597855026368, "iter_time": 1.2465467224121094, "loss": 0.7715374827384949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.527215779984658, "step_time": 1.0565302162170411} +{"epoch": 0, "iter": 7715, "iter_tflops": 35.72184147441803, "iter_time": 0.5775484313964844, "loss": 0.7613019943237305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.79409893979963, "step_time": 0.5318100967407227} +{"epoch": 0, "iter": 7716, "iter_tflops": 42.7790968755468, "iter_time": 0.4822704315185547, "loss": 0.7380905151367188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.52266639271279, "step_time": 0.443463264465332} +{"epoch": 0, "iter": 7717, "iter_tflops": 23.020710609519828, "iter_time": 0.8961970748901368, "loss": 0.841053307056427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.49095461978847, "step_time": 0.8423964614868165} +{"epoch": 0, "iter": 7718, "iter_tflops": 8.184777806437065, "iter_time": 2.520666290283203, "loss": 0.724410355091095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.933464966409487, "step_time": 2.076928199768066} +{"epoch": 0, "iter": 7719, "iter_tflops": 16.40105047271189, "iter_time": 1.2579129333496093, "loss": 0.8794534802436829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.59901739892165, "step_time": 1.0015571670532226} +{"epoch": 0, "iter": 7720, "iter_tflops": 36.8696892296868, "iter_time": 0.5595678710937501, "loss": 0.7669434547424316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.60265867336114, "step_time": 0.5081217384338379} +{"epoch": 0, "iter": 7721, "iter_tflops": 24.465555248393606, "iter_time": 0.6093363265991212, "loss": 0.3412708044052124, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 26.2855659232368, "step_time": 0.5671459236145019} +{"epoch": 0, "iter": 7722, "iter_tflops": 26.504476242373787, "iter_time": 0.562461654663086, "loss": 0.19068624079227448, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.279523398442155, "step_time": 0.5271571006774902} +{"epoch": 0, "iter": 7723, "iter_tflops": 27.054115004297262, "iter_time": 0.5510345306396484, "loss": 0.38248732686042786, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.81388175390808, "step_time": 0.5173808822631836} +{"epoch": 0, "iter": 7724, "iter_tflops": 27.765790731477892, "iter_time": 0.5369107513427734, "loss": 0.2964465618133545, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 29.52602098097428, "step_time": 0.5049021530151366} +{"epoch": 0, "iter": 7725, "iter_tflops": 30.2313356165029, "iter_time": 0.6824406890869141, "loss": 0.1666671186685562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.121724505411585, "step_time": 0.6422785148620606} +{"epoch": 0, "iter": 7726, "iter_tflops": 24.95280458196664, "iter_time": 0.8268045959472657, "loss": 0.2508251368999481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.289722437868402, "step_time": 0.7292787532806397} +{"epoch": 0, "iter": 7727, "iter_tflops": 45.91802387761146, "iter_time": 0.44930273056030273, "loss": 0.21448808908462524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34205996782981, "step_time": 0.40981822204589835} +{"epoch": 0, "iter": 7728, "iter_tflops": 38.70872755663042, "iter_time": 0.5329829940795898, "loss": 0.3141676187515259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.58514166026457, "step_time": 0.48446694564819337} +{"epoch": 0, "iter": 7729, "iter_tflops": 30.344245457989942, "iter_time": 0.6799013519287109, "loss": 0.3630765378475189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.42615992056901, "step_time": 0.6172139892578126} +{"epoch": 0, "iter": 7730, "iter_tflops": 37.459881056364225, "iter_time": 0.550751708984375, "loss": 0.49210721254348755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.30589127647507, "step_time": 0.4994709682464599} +{"epoch": 0, "iter": 7731, "iter_tflops": 41.931595354599146, "iter_time": 0.49201785278320315, "loss": 0.40103447437286377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.762703575002604, "step_time": 0.45082768058776856} +{"epoch": 0, "iter": 7732, "iter_tflops": 37.10870811722639, "iter_time": 0.5559636688232421, "loss": 0.4856485426425934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79175404407362, "step_time": 0.5057662754058837} +{"epoch": 0, "iter": 7733, "iter_tflops": 20.616822708798935, "iter_time": 1.0006921920776368, "loss": 0.003605534555390477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.26731775393767, "step_time": 0.9265190238952637} +{"epoch": 0, "iter": 7734, "iter_tflops": 44.93017993201279, "iter_time": 0.45918119049072265, "loss": 0.03997362032532692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.136959610399266, "step_time": 0.411494707107544} +{"epoch": 0, "iter": 7735, "iter_tflops": 42.252942310533676, "iter_time": 0.4882759017944336, "loss": 0.0150762889534235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83683000924247, "step_time": 0.4404886817932129} +{"epoch": 0, "iter": 7736, "iter_tflops": 50.38585940788156, "iter_time": 0.40946197509765625, "loss": 0.008027602918446064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.537407171348924, "step_time": 0.37148103523254394} +{"epoch": 0, "iter": 7737, "iter_tflops": 33.482531793557385, "iter_time": 0.6161748352050781, "loss": 0.8343183994293213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.79118857792054, "step_time": 0.5607618103027344} +{"epoch": 0, "iter": 7738, "iter_tflops": 37.76918622210653, "iter_time": 0.5462414093017579, "loss": 0.933468759059906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.21254532217198, "step_time": 0.500602264404297} +{"epoch": 0, "iter": 7739, "iter_tflops": 37.45575401947151, "iter_time": 0.5508123931884766, "loss": 0.8746500015258789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.77220464332645, "step_time": 0.5060087795257567} +{"epoch": 0, "iter": 7740, "iter_tflops": 32.133382221709454, "iter_time": 0.6420455017089846, "loss": 0.7046380043029785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93242143570189, "step_time": 0.590600154876709} +{"epoch": 0, "iter": 7741, "iter_tflops": 14.668216592635009, "iter_time": 1.4065168304443358, "loss": 0.7702187299728394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.515676027055934, "step_time": 1.3296934967041016} +{"epoch": 0, "iter": 7742, "iter_tflops": 14.489774819616615, "iter_time": 1.423838104248047, "loss": 0.7372969388961792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.84851453877845, "step_time": 1.0945739765167237} +{"epoch": 0, "iter": 7743, "iter_tflops": 40.95361636863597, "iter_time": 0.5037673187255859, "loss": 0.6569474935531616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.707758159251796, "step_time": 0.4614656238555908} +{"epoch": 0, "iter": 7744, "iter_tflops": 41.577490071040764, "iter_time": 0.49620824813842773, "loss": 0.8022879362106323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45032094840809, "step_time": 0.45392624473571774} +{"epoch": 0, "iter": 7745, "iter_tflops": 25.980341381267127, "iter_time": 0.794104019165039, "loss": 0.6421089172363281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.043694371096915, "step_time": 0.7356767349243164} +{"epoch": 0, "iter": 7746, "iter_tflops": 22.032734620445314, "iter_time": 0.9363836975097657, "loss": 0.7257391214370728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.48605596556307, "step_time": 0.7789417018890381} +{"epoch": 0, "iter": 7747, "iter_tflops": 49.66784992324794, "iter_time": 0.41538124847412106, "loss": 0.6041874885559082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.79008772027048, "step_time": 0.3835482406616211} +{"epoch": 0, "iter": 7748, "iter_tflops": 47.779099944081864, "iter_time": 0.4318016357421875, "loss": 0.5359957814216614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.29665605714187, "step_time": 0.4021917819976807} +{"epoch": 0, "iter": 7749, "iter_tflops": 38.52380661542297, "iter_time": 0.5355414047241212, "loss": 0.24298053979873657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.66185212504376, "step_time": 0.4952034645080566} +{"epoch": 0, "iter": 7750, "iter_tflops": 37.002873719772225, "iter_time": 0.5575538177490234, "loss": 0.23567350208759308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59189748778025, "step_time": 0.5082564449310303} +{"epoch": 0, "iter": 7751, "iter_tflops": 39.64864111541263, "iter_time": 0.5203480606079102, "loss": 0.36554890871047974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.43142154456024, "step_time": 0.47502689933776854} +{"epoch": 0, "iter": 7752, "iter_tflops": 39.67548136287016, "iter_time": 0.5199960479736329, "loss": 0.3876788318157196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53130424503861, "step_time": 0.47393694877624515} +{"epoch": 0, "iter": 7753, "iter_tflops": 23.840076286101468, "iter_time": 0.5535356369018554, "loss": 0.01052464172244072, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 26.73777454342711, "step_time": 0.49354637908935545} +{"epoch": 0, "iter": 7754, "iter_tflops": 28.473104042772366, "iter_time": 0.4634665679931641, "loss": 0.004039566498249769, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 31.596497743465395, "step_time": 0.41765172576904297} +{"epoch": 0, "iter": 7755, "iter_tflops": 29.268499837419217, "iter_time": 0.4508714790344238, "loss": 0.025958769023418427, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 32.575136603840875, "step_time": 0.40510441970825195} +{"epoch": 0, "iter": 7756, "iter_tflops": 31.960193331567318, "iter_time": 0.41289899826049803, "loss": 0.0035615256056189537, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 35.395035279855044, "step_time": 0.37283002281188965} +{"epoch": 0, "iter": 7757, "iter_tflops": 22.681941492141046, "iter_time": 0.9095823440551759, "loss": 0.41668641567230225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.60797672136103, "step_time": 0.8383904838562012} +{"epoch": 0, "iter": 7758, "iter_tflops": 16.946392247490518, "iter_time": 1.2174327850341795, "loss": 0.5184047818183899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.641660499668337, "step_time": 0.9533045539855958} +{"epoch": 0, "iter": 7759, "iter_tflops": 38.583665467427096, "iter_time": 0.534710563659668, "loss": 0.47924530506134033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.193948426122454, "step_time": 0.48895858955383303} +{"epoch": 0, "iter": 7760, "iter_tflops": 36.02972491802247, "iter_time": 0.5726131286621094, "loss": 0.48797595500946045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.130501936326624, "step_time": 0.527238151550293} +{"epoch": 0, "iter": 7761, "iter_tflops": 19.591481368584592, "iter_time": 1.0530644989013673, "loss": 0.8511126637458801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.14193532897005, "step_time": 0.9758375091552733} +{"epoch": 0, "iter": 7762, "iter_tflops": 21.860203251433536, "iter_time": 0.9437740936279297, "loss": 0.666913628578186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.26135455723447, "step_time": 0.7856066017150879} +{"epoch": 0, "iter": 7763, "iter_tflops": 37.82810388707814, "iter_time": 0.5453906326293946, "loss": 0.7077251076698303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39097708928187, "step_time": 0.49844422531127924} +{"epoch": 0, "iter": 7764, "iter_tflops": 36.551254273945865, "iter_time": 0.5644428329467773, "loss": 0.7113234400749207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.59589617542628, "step_time": 0.5210412063598633} +{"epoch": 0, "iter": 7765, "iter_tflops": 34.33177002468461, "iter_time": 0.6009329986572266, "loss": 0.13919873535633087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.29605511483863, "step_time": 0.5387263374328612} +{"epoch": 0, "iter": 7766, "iter_tflops": 41.511154183321544, "iter_time": 0.4970012016296386, "loss": 0.05264941602945328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81410253123871, "step_time": 0.44070253181457514} +{"epoch": 0, "iter": 7767, "iter_tflops": 43.34469574505004, "iter_time": 0.475977352142334, "loss": 0.04949188977479935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59194178364491, "step_time": 0.43349972152709965} +{"epoch": 0, "iter": 7768, "iter_tflops": 43.27845409473926, "iter_time": 0.47670587921142576, "loss": 0.09353066235780716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42850948530915, "step_time": 0.4349935035705567} +{"epoch": 0, "iter": 7769, "iter_tflops": 17.319755121242608, "iter_time": 1.1911885223388672, "loss": 0.5329324007034302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.652628687215465, "step_time": 1.1060689544677735} +{"epoch": 0, "iter": 7770, "iter_tflops": 14.874624824522984, "iter_time": 1.3869992523193357, "loss": 0.34888437390327454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.924351123243078, "step_time": 1.1510092258453368} +{"epoch": 0, "iter": 7771, "iter_tflops": 42.02050596218112, "iter_time": 0.4909767990112304, "loss": 0.3500557243824005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.985190467152535, "step_time": 0.4486464729309082} +{"epoch": 0, "iter": 7772, "iter_tflops": 40.98036509455513, "iter_time": 0.5034384994506836, "loss": 0.44858115911483765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.934472298779376, "step_time": 0.4591373271942139} +{"epoch": 0, "iter": 7773, "iter_tflops": 28.54725112608914, "iter_time": 0.7226998291015625, "loss": 0.0052251555025577545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.85380760750112, "step_time": 0.6686725273132323} +{"epoch": 0, "iter": 7774, "iter_tflops": 8.942513556543084, "iter_time": 2.3070799255371095, "loss": 0.005513760261237621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.543341022469765, "step_time": 1.9567889785766601} +{"epoch": 0, "iter": 7775, "iter_tflops": 10.439809818835514, "iter_time": 1.9761943817138672, "loss": 0.0012581620831042528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.966678034616246, "step_time": 1.7240451736450197} +{"epoch": 0, "iter": 7776, "iter_tflops": 54.408618377973575, "iter_time": 0.37918796920776365, "loss": 0.004704395774751902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.90740724938765, "step_time": 0.3443830146789551} +{"epoch": 0, "iter": 7777, "iter_tflops": 18.557189741814174, "iter_time": 0.8297731323242188, "loss": 0.191411554813385, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.55914806307667, "step_time": 0.7872662658691407} +{"epoch": 0, "iter": 7778, "iter_tflops": 8.12104395209874, "iter_time": 1.8960933532714843, "loss": 0.31408610939979553, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 9.118034748349977, "step_time": 1.6887693328857423} +{"epoch": 0, "iter": 7779, "iter_tflops": 8.980992779663183, "iter_time": 1.7145384521484375, "loss": 0.19768334925174713, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 10.487153790946826, "step_time": 1.4682970962524413} +{"epoch": 0, "iter": 7780, "iter_tflops": 22.915396016554787, "iter_time": 0.6719612197875975, "loss": 0.21056632697582245, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.049784420064288, "step_time": 0.5489617042541504} +{"epoch": 0, "iter": 7781, "iter_tflops": 19.730256376241382, "iter_time": 0.736955352783203, "loss": 0.16100475192070007, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 20.912156501839195, "step_time": 0.695304573059082} +{"epoch": 0, "iter": 7782, "iter_tflops": 7.727763837712561, "iter_time": 1.8815686340332032, "loss": 0.26578447222709656, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 8.806385882499368, "step_time": 1.651110710144043} +{"epoch": 0, "iter": 7783, "iter_tflops": 12.250860377157643, "iter_time": 1.186881378173828, "loss": 0.1958942413330078, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.635526844974024, "step_time": 1.0663554267883302} +{"epoch": 0, "iter": 7784, "iter_tflops": 22.180940424269846, "iter_time": 0.6555320816040039, "loss": 0.13532418012619019, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.839416246780363, "step_time": 0.609927604675293} +{"epoch": 0, "iter": 7785, "iter_tflops": 16.148069354411874, "iter_time": 0.9915950317382812, "loss": 0.2412230521440506, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.38244619213428, "step_time": 0.9211790542602538} +{"epoch": 0, "iter": 7786, "iter_tflops": 26.280775324569866, "iter_time": 0.6092797927856445, "loss": 0.28584104776382446, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 28.12015205261165, "step_time": 0.5694259872436523} +{"epoch": 0, "iter": 7787, "iter_tflops": 27.40931337959814, "iter_time": 0.5841935958862305, "loss": 0.2645426094532013, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.322826213959388, "step_time": 0.5460710105895996} +{"epoch": 0, "iter": 7788, "iter_tflops": 27.855222199573895, "iter_time": 0.5748417739868164, "loss": 0.18938222527503967, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.61819703868949, "step_time": 0.5406252555847167} +{"epoch": 0, "iter": 7789, "iter_tflops": 42.05434015657769, "iter_time": 0.4905817909240722, "loss": 0.22482438385486603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.05942502741434, "step_time": 0.4479233837127685} +{"epoch": 0, "iter": 7790, "iter_tflops": 36.66292252130542, "iter_time": 0.562723648071289, "loss": 0.13961893320083618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.091214968176985, "step_time": 0.5020803966522217} +{"epoch": 0, "iter": 7791, "iter_tflops": 40.71932134755148, "iter_time": 0.5066659469604493, "loss": 0.19535043835639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5500624920317, "step_time": 0.46309909248352055} +{"epoch": 0, "iter": 7792, "iter_tflops": 41.2598154119406, "iter_time": 0.5000287399291992, "loss": 0.21065598726272583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92687176507193, "step_time": 0.4592150020599365} +{"epoch": 0, "iter": 7793, "iter_tflops": 15.401349328269449, "iter_time": 1.339564025878906, "loss": 0.4871252179145813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.485659315198802, "step_time": 1.2514569854736328} +{"epoch": 0, "iter": 7794, "iter_tflops": 17.582559045397947, "iter_time": 1.1733840026855469, "loss": 0.5683995485305786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.420385034185134, "step_time": 0.8809032592773437} +{"epoch": 0, "iter": 7795, "iter_tflops": 37.100792706609695, "iter_time": 0.5560822830200195, "loss": 0.5586660504341125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59987202649759, "step_time": 0.5081566143035889} +{"epoch": 0, "iter": 7796, "iter_tflops": 42.663003930794275, "iter_time": 0.4835827674865723, "loss": 0.575355589389801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53678320106593, "step_time": 0.4433287410736084} +{"epoch": 0, "iter": 7797, "iter_tflops": 20.046087495114854, "iter_time": 1.0291830520629883, "loss": 0.2906999886035919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.26423299766489, "step_time": 0.9702251434326171} +{"epoch": 0, "iter": 7798, "iter_tflops": 19.595304537377196, "iter_time": 1.0528590393066406, "loss": 0.256021648645401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.341050420038037, "step_time": 0.883897388458252} +{"epoch": 0, "iter": 7799, "iter_tflops": 48.50439473196048, "iter_time": 0.4253448295593262, "loss": 0.3128395080566406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70588262070538, "step_time": 0.3914381561279297} +{"epoch": 0, "iter": 7800, "iter_tflops": 49.62283606826026, "iter_time": 0.41575804901123053, "loss": 0.251785546541214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.98521840221936, "step_time": 0.38216189765930175} +{"epoch": 0, "iter": 7801, "iter_tflops": 30.546156250636834, "iter_time": 0.6754071884155273, "loss": 0.00242426129989326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.536739419150116, "step_time": 0.6340860786437987} +{"epoch": 0, "iter": 7802, "iter_tflops": 20.431532222871144, "iter_time": 1.009767318725586, "loss": 0.008508202619850636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.70195453877632, "step_time": 0.8352008533477783} +{"epoch": 0, "iter": 7803, "iter_tflops": 42.408100340045415, "iter_time": 0.48648945236206054, "loss": 0.012609584257006645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81528985694203, "step_time": 0.44069135475158694} +{"epoch": 0, "iter": 7804, "iter_tflops": 45.993991155394816, "iter_time": 0.4485606269836426, "loss": 0.002557569881901145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.906248914447154, "step_time": 0.40527624702453613} +{"epoch": 0, "iter": 7805, "iter_tflops": 21.94255526673104, "iter_time": 0.9402320404052735, "loss": 0.7890396118164062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.25830314679014, "step_time": 0.88704207611084} +{"epoch": 0, "iter": 7806, "iter_tflops": 16.547689328276032, "iter_time": 1.2467658233642578, "loss": 0.6541299223899841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.54618544846082, "step_time": 1.0555048484802245} +{"epoch": 0, "iter": 7807, "iter_tflops": 42.92597293293025, "iter_time": 0.4806202888488769, "loss": 0.6530656218528748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.187193396623165, "step_time": 0.44668428611755373} +{"epoch": 0, "iter": 7808, "iter_tflops": 46.0311080802161, "iter_time": 0.44819893264770505, "loss": 0.9851552248001099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.499072352978764, "step_time": 0.4167975788116455} +{"epoch": 0, "iter": 7809, "iter_tflops": 32.256412751566714, "iter_time": 0.6395966491699219, "loss": 0.8305941224098206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.497077071299806, "step_time": 0.5980533790588379} +{"epoch": 0, "iter": 7810, "iter_tflops": 13.909245853681076, "iter_time": 1.483264709472656, "loss": 0.8425037264823914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.15751013471194, "step_time": 1.2768733139038084} +{"epoch": 0, "iter": 7811, "iter_tflops": 40.89805788670864, "iter_time": 0.5044516677856445, "loss": 0.6245045065879822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.08879899811467, "step_time": 0.4679441032409669} +{"epoch": 0, "iter": 7812, "iter_tflops": 46.1687987178324, "iter_time": 0.4468622550964355, "loss": 0.897831916809082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.57210048218817, "step_time": 0.4161835651397705} +{"epoch": 0, "iter": 7813, "iter_tflops": 45.9609269004479, "iter_time": 0.40027059936523435, "loss": 0.021804576739668846, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 50.93118992651, "step_time": 0.36120907020568843} +{"epoch": 0, "iter": 7814, "iter_tflops": 35.723390995108666, "iter_time": 0.5149793243408203, "loss": 0.006979908794164658, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 39.4863890518452, "step_time": 0.46590250968933106} +{"epoch": 0, "iter": 7815, "iter_tflops": 41.44341048052844, "iter_time": 0.44390187835693357, "loss": 0.012026146054267883, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 45.9551915091678, "step_time": 0.4003205547332763} +{"epoch": 0, "iter": 7816, "iter_tflops": 42.2150181570405, "iter_time": 0.43578822326660155, "loss": 0.003775066463276744, "lr": 3e-05, "seqlen": 7328.0, "step_tflops": 46.80611719242981, "step_time": 0.3930428085327149} +{"epoch": 0, "iter": 7817, "iter_tflops": 22.946709488686782, "iter_time": 0.7658085479736327, "loss": 0.004010238219052553, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 24.478371178401442, "step_time": 0.7178903427124024} +{"epoch": 0, "iter": 7818, "iter_tflops": 27.488431600405185, "iter_time": 0.6392793350219725, "loss": 0.002839060965925455, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 38.88912987722091, "step_time": 0.4518688468933105} +{"epoch": 0, "iter": 7819, "iter_tflops": 41.82359389099544, "iter_time": 0.42016442489624023, "loss": 0.022779671475291252, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 46.26841296368306, "step_time": 0.3798009300231934} +{"epoch": 0, "iter": 7820, "iter_tflops": 41.34648434685463, "iter_time": 0.4250128288269043, "loss": 0.0044820779003202915, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 45.378931747996084, "step_time": 0.3872454814910889} +{"epoch": 0, "iter": 7821, "iter_tflops": 21.80320092778146, "iter_time": 0.704362449645996, "loss": 0.28595301508903503, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.715936961937956, "step_time": 0.6475542602539062} +{"epoch": 0, "iter": 7822, "iter_tflops": 26.497254813651097, "iter_time": 0.5795829086303711, "loss": 0.25371792912483215, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.280447962240043, "step_time": 0.5430379333496094} +{"epoch": 0, "iter": 7823, "iter_tflops": 28.427761193326038, "iter_time": 0.5402238998413086, "loss": 0.31823715567588806, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.260565294194393, "step_time": 0.5075039367675782} +{"epoch": 0, "iter": 7824, "iter_tflops": 28.84296114758862, "iter_time": 0.5324472732543946, "loss": 0.23482829332351685, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.599622367274478, "step_time": 0.5018805732727051} +{"epoch": 0, "iter": 7825, "iter_tflops": 45.97340999881682, "iter_time": 0.4487614364624024, "loss": 0.06909945607185364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.336948090250175, "step_time": 0.40985984039306644} +{"epoch": 0, "iter": 7826, "iter_tflops": 43.67415021029138, "iter_time": 0.47238683319091795, "loss": 0.03637748956680298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61269441811508, "step_time": 0.4243972434997558} +{"epoch": 0, "iter": 7827, "iter_tflops": 51.53561623087203, "iter_time": 0.40032690048217767, "loss": 0.03384148329496384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.30028578133518, "step_time": 0.3664474029541016} +{"epoch": 0, "iter": 7828, "iter_tflops": 54.042861039468136, "iter_time": 0.38175428009033197, "loss": 0.023631105199456215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.12326154275886, "step_time": 0.3489505310058594} +{"epoch": 0, "iter": 7829, "iter_tflops": 21.65591050305278, "iter_time": 0.952677261352539, "loss": 0.08370211720466614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.574914523704855, "step_time": 0.913894645690918} +{"epoch": 0, "iter": 7830, "iter_tflops": 21.082931624491483, "iter_time": 0.9785685348510742, "loss": 0.09494782984256744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.737581080005747, "step_time": 0.8015941143035887} +{"epoch": 0, "iter": 7831, "iter_tflops": 42.55479776852519, "iter_time": 0.4848123970031738, "loss": 0.09319843351840973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8164079432688, "step_time": 0.440680830001831} +{"epoch": 0, "iter": 7832, "iter_tflops": 42.51737705098392, "iter_time": 0.48523909378051755, "loss": 0.09068118035793304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.549638067877765, "step_time": 0.44320631408691413} +{"epoch": 0, "iter": 7833, "iter_tflops": 33.57062826526703, "iter_time": 0.614557861328125, "loss": 0.992763102054596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.05937555864221, "step_time": 0.5567037544250488} +{"epoch": 0, "iter": 7834, "iter_tflops": 36.88708674921171, "iter_time": 0.559303955078125, "loss": 0.7620953917503357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17880910849473, "step_time": 0.5134819564819336} +{"epoch": 0, "iter": 7835, "iter_tflops": 37.976248859169225, "iter_time": 0.5432630691528321, "loss": 0.7675470113754272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55310280641002, "step_time": 0.49649946975708004} +{"epoch": 0, "iter": 7836, "iter_tflops": 39.619618433328796, "iter_time": 0.5207292327880859, "loss": 0.7125478386878967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.187183086310625, "step_time": 0.47771334075927735} +{"epoch": 0, "iter": 7837, "iter_tflops": 19.32001626388127, "iter_time": 1.067861083984375, "loss": 0.7556878924369812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.710009973931392, "step_time": 0.996189453125} +{"epoch": 0, "iter": 7838, "iter_tflops": 29.122315773193844, "iter_time": 0.7084290161132814, "loss": 0.7467225790023804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.65001749908645, "step_time": 0.6131079578399659} +{"epoch": 0, "iter": 7839, "iter_tflops": 43.082946040057564, "iter_time": 0.47886914443969725, "loss": 0.6489115953445435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.34496851371269, "step_time": 0.44516361045837405} +{"epoch": 0, "iter": 7840, "iter_tflops": 48.26196773130047, "iter_time": 0.42748139953613284, "loss": 0.7818383574485779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.18335019566345, "step_time": 0.39535778045654296} +{"epoch": 0, "iter": 7841, "iter_tflops": 41.13431763969067, "iter_time": 0.5015542907714843, "loss": 0.9494156241416931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78242462803355, "step_time": 0.4606962146759034} +{"epoch": 0, "iter": 7842, "iter_tflops": 41.78571285621632, "iter_time": 0.4937355880737305, "loss": 0.7681211233139038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.994534465022696, "step_time": 0.4485553283691406} +{"epoch": 0, "iter": 7843, "iter_tflops": 40.53258654668839, "iter_time": 0.5090001716613769, "loss": 0.6662771105766296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54677154963592, "step_time": 0.47376861190795894} +{"epoch": 0, "iter": 7844, "iter_tflops": 41.686632326465414, "iter_time": 0.4949090957641602, "loss": 0.6441065073013306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.858179352359656, "step_time": 0.4599182090759277} +{"epoch": 0, "iter": 7845, "iter_tflops": 37.362321927642235, "iter_time": 0.552189811706543, "loss": 0.8616608381271362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.510938020165746, "step_time": 0.5092721748352051} +{"epoch": 0, "iter": 7846, "iter_tflops": 41.09469897048693, "iter_time": 0.5020378303527832, "loss": 1.0010648965835571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.109876090845404, "step_time": 0.46772050476074223} +{"epoch": 0, "iter": 7847, "iter_tflops": 39.96101146263401, "iter_time": 0.5162805633544922, "loss": 0.6434530019760132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66848052653342, "step_time": 0.4835206985473633} +{"epoch": 0, "iter": 7848, "iter_tflops": 44.576341345615916, "iter_time": 0.4628260841369629, "loss": 0.8236920833587646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.296572787450714, "step_time": 0.4271751041412353} +{"epoch": 0, "iter": 7849, "iter_tflops": 33.498819707403186, "iter_time": 0.6158752365112306, "loss": 0.9272210597991943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.74187876829049, "step_time": 0.57722465133667} +{"epoch": 0, "iter": 7850, "iter_tflops": 15.817593422803535, "iter_time": 1.3043130493164061, "loss": 0.7295896410942078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.69066083600373, "step_time": 1.103818302154541} +{"epoch": 0, "iter": 7851, "iter_tflops": 39.38806610120525, "iter_time": 0.5237904663085937, "loss": 0.761458694934845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.00866576640203, "step_time": 0.47969619941711433} +{"epoch": 0, "iter": 7852, "iter_tflops": 38.276322019865596, "iter_time": 0.5390040740966797, "loss": 0.7349487543106079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80106636411512, "step_time": 0.4935542392730713} +{"epoch": 0, "iter": 7853, "iter_tflops": 19.84468619853051, "iter_time": 1.0396281051635743, "loss": 0.14428582787513733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.30817612031816, "step_time": 0.9682242813110351} +{"epoch": 0, "iter": 7854, "iter_tflops": 23.190156230834848, "iter_time": 0.8896487503051759, "loss": 0.19874373078346252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.401725780929592, "step_time": 0.7529121952056885} +{"epoch": 0, "iter": 7855, "iter_tflops": 38.90867749225952, "iter_time": 0.5302440185546875, "loss": 0.08660375326871872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53485327988899, "step_time": 0.48503972434997555} +{"epoch": 0, "iter": 7856, "iter_tflops": 43.24176045044517, "iter_time": 0.47711039733886723, "loss": 0.12082719057798386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19411624365023, "step_time": 0.4371539325714111} +{"epoch": 0, "iter": 7857, "iter_tflops": 26.51931808713964, "iter_time": 0.7779647064208983, "loss": 0.13606911897659302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.470115839165572, "step_time": 0.700068286895752} +{"epoch": 0, "iter": 7858, "iter_tflops": 39.72065165780651, "iter_time": 0.5194047088623046, "loss": 0.1648041307926178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.38710661509998, "step_time": 0.4647992420196533} +{"epoch": 0, "iter": 7859, "iter_tflops": 42.85096317713189, "iter_time": 0.4814616050720215, "loss": 0.13980001211166382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76780109219228, "step_time": 0.4411388397216797} +{"epoch": 0, "iter": 7860, "iter_tflops": 42.77792612257933, "iter_time": 0.4822836303710938, "loss": 0.08178659528493881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53912765789098, "step_time": 0.44330640792846676} +{"epoch": 0, "iter": 7861, "iter_tflops": 20.773783626619018, "iter_time": 0.9931312408447265, "loss": 1.0556191205978394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.98385018834555, "step_time": 0.9384658889770507} +{"epoch": 0, "iter": 7862, "iter_tflops": 8.554588887869105, "iter_time": 2.411699005126953, "loss": 0.8030760288238525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.33880037550824, "step_time": 2.209180267333984} +{"epoch": 0, "iter": 7863, "iter_tflops": 16.45426869242847, "iter_time": 1.2538444519042968, "loss": 1.1070075035095215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.41323122388497, "step_time": 1.010672601699829} +{"epoch": 0, "iter": 7864, "iter_tflops": 33.754666315778486, "iter_time": 0.6112071533203125, "loss": 0.6219469904899597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.57621127952135, "step_time": 0.5640576972961425} +{"epoch": 0, "iter": 7865, "iter_tflops": 12.569242397524171, "iter_time": 1.2413505096435546, "loss": 0.2751566171646118, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.296786976151097, "step_time": 1.173429000854492} +{"epoch": 0, "iter": 7866, "iter_tflops": 12.828345558203692, "iter_time": 1.216278076171875, "loss": 0.1388101875782013, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.523764301729592, "step_time": 1.0050935554504394} +{"epoch": 0, "iter": 7867, "iter_tflops": 26.30394538037558, "iter_time": 0.5931747207641601, "loss": 0.28111550211906433, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.270267993841884, "step_time": 0.5519167861938477} +{"epoch": 0, "iter": 7868, "iter_tflops": 22.731456897530485, "iter_time": 0.6863983917236328, "loss": 0.25593075156211853, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.37650153791704, "step_time": 0.6400768966674805} +{"epoch": 0, "iter": 7869, "iter_tflops": 24.150976547907447, "iter_time": 0.8542550430297852, "loss": 0.07406458258628845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.467872328821734, "step_time": 0.7794768409729004} +{"epoch": 0, "iter": 7870, "iter_tflops": 41.43424605258495, "iter_time": 0.4979237098693847, "loss": 0.047386907041072845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.8004265475315, "step_time": 0.45045636177062987} +{"epoch": 0, "iter": 7871, "iter_tflops": 46.768013817642725, "iter_time": 0.441136833190918, "loss": 0.04187467321753502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54300903640588, "step_time": 0.4002694816589355} +{"epoch": 0, "iter": 7872, "iter_tflops": 38.785795417876294, "iter_time": 0.5319239501953125, "loss": 0.040882471948862076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59837414684988, "step_time": 0.4843164539337158} +{"epoch": 0, "iter": 7873, "iter_tflops": 17.32033952330635, "iter_time": 1.1911483306884767, "loss": 0.5658574104309082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61580545516691, "step_time": 1.1082568283081056} +{"epoch": 0, "iter": 7874, "iter_tflops": 18.123002530795773, "iter_time": 1.1383926849365233, "loss": 0.6742154359817505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.710320739416197, "step_time": 0.9502896690368652} +{"epoch": 0, "iter": 7875, "iter_tflops": 34.704834652417354, "iter_time": 0.5944731826782226, "loss": 0.7122183442115784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.77373179299346, "step_time": 0.5461756763458253} +{"epoch": 0, "iter": 7876, "iter_tflops": 38.60892508525655, "iter_time": 0.5343607330322265, "loss": 0.6584529876708984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.15507789594054, "step_time": 0.4894094505310058} +{"epoch": 0, "iter": 7877, "iter_tflops": 33.69925059908736, "iter_time": 0.6122122344970704, "loss": 0.09996335953474045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.10265721055281, "step_time": 0.5560543384552002} +{"epoch": 0, "iter": 7878, "iter_tflops": 37.94072693760923, "iter_time": 0.5437716979980469, "loss": 0.1802695095539093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77899086137967, "step_time": 0.49381502723693854} +{"epoch": 0, "iter": 7879, "iter_tflops": 41.19336024182855, "iter_time": 0.5008354110717773, "loss": 0.14537180960178375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.226894791767975, "step_time": 0.4561686935424804} +{"epoch": 0, "iter": 7880, "iter_tflops": 43.38552120576105, "iter_time": 0.47552946090698245, "loss": 0.13671660423278809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45976262466669, "step_time": 0.43470705223083494} +{"epoch": 0, "iter": 7881, "iter_tflops": 1.3263222908016128, "iter_time": 1.2112734069824218, "loss": 1.175764799118042, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.4070186099699407, "step_time": 1.1418036041259767} +{"epoch": 0, "iter": 7882, "iter_tflops": 1.7650366291140294, "iter_time": 0.9102014617919922, "loss": 1.0603277683258057, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.411600709878764, "step_time": 0.6661711921691895} +{"epoch": 0, "iter": 7883, "iter_tflops": 3.877772522482375, "iter_time": 0.4142942657470703, "loss": 1.251466155052185, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.2072910860222335, "step_time": 0.3818463916778565} +{"epoch": 0, "iter": 7884, "iter_tflops": 3.642078774422159, "iter_time": 0.4411049346923828, "loss": 1.1605348587036133, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.938106991780039, "step_time": 0.40794699668884277} +{"epoch": 0, "iter": 7885, "iter_tflops": 34.6034833292862, "iter_time": 0.59621435546875, "loss": 0.4564974009990692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.933429686435765, "step_time": 0.5586021575927734} +{"epoch": 0, "iter": 7886, "iter_tflops": 14.595219820103939, "iter_time": 1.4135514068603514, "loss": 0.5105789303779602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.55528355807976, "step_time": 1.175207078933716} +{"epoch": 0, "iter": 7887, "iter_tflops": 37.62968871333102, "iter_time": 0.5482663879394531, "loss": 0.3283940553665161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.15446895304822, "step_time": 0.5013087043762208} +{"epoch": 0, "iter": 7888, "iter_tflops": 43.313920981425525, "iter_time": 0.4763155364990235, "loss": 0.42279547452926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.18096104789596, "step_time": 0.43727582168579104} +{"epoch": 0, "iter": 7889, "iter_tflops": 31.71233683000664, "iter_time": 0.6505699539184571, "loss": 0.7979254126548767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.22488949571021, "step_time": 0.5856964721679687} +{"epoch": 0, "iter": 7890, "iter_tflops": 37.742631359003305, "iter_time": 0.546625732421875, "loss": 0.6766785979270935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27252183961859, "step_time": 0.499874797821045} +{"epoch": 0, "iter": 7891, "iter_tflops": 37.00786889146445, "iter_time": 0.5574785614013672, "loss": 0.7641834020614624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.30999351331304, "step_time": 0.5118108863830566} +{"epoch": 0, "iter": 7892, "iter_tflops": 36.209727726346735, "iter_time": 0.5697666015625, "loss": 0.5924701690673828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.988436729349544, "step_time": 0.5291592903137208} +{"epoch": 0, "iter": 7893, "iter_tflops": 18.145144128699148, "iter_time": 0.954853141784668, "loss": 0.09534697234630585, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 19.463298799080786, "step_time": 0.8901855773925782} +{"epoch": 0, "iter": 7894, "iter_tflops": 15.027573038049406, "iter_time": 1.15294384765625, "loss": 0.074966661632061, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 18.32129575710944, "step_time": 0.9456726264953613} +{"epoch": 0, "iter": 7895, "iter_tflops": 34.724585523845036, "iter_time": 0.4989533386230468, "loss": 0.07021544873714447, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 38.292635281903, "step_time": 0.45246162223815917} +{"epoch": 0, "iter": 7896, "iter_tflops": 35.80867846239287, "iter_time": 0.48384773254394536, "loss": 0.10930140316486359, "lr": 3e-05, "seqlen": 6912.0, "step_tflops": 39.512172929129086, "step_time": 0.4384964580535889} +{"epoch": 0, "iter": 7897, "iter_tflops": 19.141602786106287, "iter_time": 1.0778143157958984, "loss": 0.29081207513809204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.864406449735423, "step_time": 0.988817657470703} +{"epoch": 0, "iter": 7898, "iter_tflops": 16.101960973718924, "iter_time": 1.2812783203125, "loss": 0.30486705899238586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.087250551759425, "step_time": 1.0808834648132324} +{"epoch": 0, "iter": 7899, "iter_tflops": 46.91237434630073, "iter_time": 0.4397793502807617, "loss": 0.3446458876132965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.09167617283897, "step_time": 0.4038053760528565} +{"epoch": 0, "iter": 7900, "iter_tflops": 50.202128075166954, "iter_time": 0.41096053695678714, "loss": 0.282917320728302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.70315028387975, "step_time": 0.3771463508605957} +{"epoch": 0, "iter": 7901, "iter_tflops": 45.54062736787632, "iter_time": 0.4530261154174805, "loss": 0.5895370841026306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50627031677023, "step_time": 0.4167369785308839} +{"epoch": 0, "iter": 7902, "iter_tflops": 36.09843664311336, "iter_time": 0.5715231857299805, "loss": 0.49972081184387207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.286432817100646, "step_time": 0.5121102085113526} +{"epoch": 0, "iter": 7903, "iter_tflops": 46.531961878993094, "iter_time": 0.44337467575073247, "loss": 0.4543006420135498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.650547661777054, "step_time": 0.4073222198486328} +{"epoch": 0, "iter": 7904, "iter_tflops": 51.11705405889863, "iter_time": 0.40360490036010743, "loss": 0.5455530285835266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.17343561831424, "step_time": 0.3739316444396973} +{"epoch": 0, "iter": 7905, "iter_tflops": 19.01827014397389, "iter_time": 1.0848038940429687, "loss": 0.07779036462306976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.821322789841066, "step_time": 1.040853515625} +{"epoch": 0, "iter": 7906, "iter_tflops": 19.508751629399878, "iter_time": 1.057530174255371, "loss": 0.0834391638636589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.46833001553843, "step_time": 0.7001107120513916} +{"epoch": 0, "iter": 7907, "iter_tflops": 46.65633567652804, "iter_time": 0.4421927528381347, "loss": 0.09365590661764145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.163265004660936, "step_time": 0.40324036216735837} +{"epoch": 0, "iter": 7908, "iter_tflops": 42.65328711614445, "iter_time": 0.4836929321289063, "loss": 0.06956157088279724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00356905560823, "step_time": 0.4389261054992676} +{"epoch": 0, "iter": 7909, "iter_tflops": 18.590119069198384, "iter_time": 1.1097881317138671, "loss": 0.6162711381912231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.912163562532932, "step_time": 1.03610506439209} +{"epoch": 0, "iter": 7910, "iter_tflops": 31.33094960078261, "iter_time": 0.6584892501831054, "loss": 0.6524196267127991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.01251924898676, "step_time": 0.5892490444183349} +{"epoch": 0, "iter": 7911, "iter_tflops": 39.54309593657548, "iter_time": 0.5217369308471679, "loss": 0.7244722843170166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.585938166620544, "step_time": 0.47334288024902343} +{"epoch": 0, "iter": 7912, "iter_tflops": 40.29717975280913, "iter_time": 0.5119736328125, "loss": 0.7277535796165466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9343811514303, "step_time": 0.46958880424499516} +{"epoch": 0, "iter": 7913, "iter_tflops": 25.495276683760686, "iter_time": 0.8092123794555665, "loss": 0.3107337951660156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.42213050911289, "step_time": 0.7523519554138184} +{"epoch": 0, "iter": 7914, "iter_tflops": 9.80298394223932, "iter_time": 2.104572814941406, "loss": 0.40426886081695557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.555236903633656, "step_time": 1.6432261428833008} +{"epoch": 0, "iter": 7915, "iter_tflops": 11.301833496855897, "iter_time": 1.825464294433594, "loss": 0.4917357861995697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.031817175205024, "step_time": 1.470308032989502} +{"epoch": 0, "iter": 7916, "iter_tflops": 24.200917982188557, "iter_time": 0.8524921875, "loss": 0.4151848554611206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55953681408687, "step_time": 0.6336421070098877} +{"epoch": 0, "iter": 7917, "iter_tflops": 17.88397434787531, "iter_time": 0.8153179397583007, "loss": 0.16213075816631317, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 19.209800505560285, "step_time": 0.7590461502075195} +{"epoch": 0, "iter": 7918, "iter_tflops": 23.927369299490373, "iter_time": 0.6093910675048828, "loss": 0.24294215440750122, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 26.290755987143978, "step_time": 0.5546103401184082} +{"epoch": 0, "iter": 7919, "iter_tflops": 26.385045085991734, "iter_time": 0.5526283950805664, "loss": 0.3064381182193756, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.075173490665858, "step_time": 0.519360107421875} +{"epoch": 0, "iter": 7920, "iter_tflops": 26.14130236732939, "iter_time": 0.5577811279296875, "loss": 0.2081373929977417, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.80736586974509, "step_time": 0.5243619689941406} +{"epoch": 0, "iter": 7921, "iter_tflops": 18.132632373415905, "iter_time": 0.883067886352539, "loss": 0.33425459265708923, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 19.018353279573756, "step_time": 0.8419417343139647} +{"epoch": 0, "iter": 7922, "iter_tflops": 7.566813370987366, "iter_time": 2.116127960205078, "loss": 0.31417742371559143, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 9.500417581746044, "step_time": 1.6854359512329102} +{"epoch": 0, "iter": 7923, "iter_tflops": 9.370709959025683, "iter_time": 1.7087654418945313, "loss": 0.21545527875423431, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 10.909849166505104, "step_time": 1.4676963081359862} +{"epoch": 0, "iter": 7924, "iter_tflops": 17.108267782868253, "iter_time": 0.9359419403076171, "loss": 0.19890691339969635, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 23.233266292414786, "step_time": 0.6891990623474121} +{"epoch": 0, "iter": 7925, "iter_tflops": 15.947449411796589, "iter_time": 1.034922866821289, "loss": 0.1646147072315216, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 16.68813363900717, "step_time": 0.9889889678955078} +{"epoch": 0, "iter": 7926, "iter_tflops": 11.685538101717736, "iter_time": 1.4123765563964843, "loss": 0.2165755182504654, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 14.88797393918913, "step_time": 1.1085712623596191} +{"epoch": 0, "iter": 7927, "iter_tflops": 24.821995233137088, "iter_time": 0.6649094848632813, "loss": 0.40635523200035095, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 26.746013331229612, "step_time": 0.6170781364440918} +{"epoch": 0, "iter": 7928, "iter_tflops": 27.399492507305762, "iter_time": 0.6023607940673829, "loss": 0.27442729473114014, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 29.35533332962495, "step_time": 0.5622276496887207} +{"epoch": 0, "iter": 7929, "iter_tflops": 22.077173108782432, "iter_time": 0.934498878479004, "loss": 0.6898790001869202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.950817054117294, "step_time": 0.861394142150879} +{"epoch": 0, "iter": 7930, "iter_tflops": 24.423022685431174, "iter_time": 0.8447395629882812, "loss": 0.6991751194000244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.040824501827142, "step_time": 0.6867685508728026} +{"epoch": 0, "iter": 7931, "iter_tflops": 46.43198698280076, "iter_time": 0.44432932662963864, "loss": 0.8372377753257751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34771047981291, "step_time": 0.40977222824096676} +{"epoch": 0, "iter": 7932, "iter_tflops": 48.34544109777464, "iter_time": 0.4267433090209961, "loss": 0.6649522185325623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.36294727462488, "step_time": 0.39400176239013673} +{"epoch": 0, "iter": 7933, "iter_tflops": 26.525998922479804, "iter_time": 0.7777687683105468, "loss": 0.11316623538732529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.96449694100002, "step_time": 0.737760223388672} +{"epoch": 0, "iter": 7934, "iter_tflops": 16.54882656832603, "iter_time": 1.2466801452636718, "loss": 0.13190396130084991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.95164458592226, "step_time": 1.0340547828674316} +{"epoch": 0, "iter": 7935, "iter_tflops": 48.758460283066924, "iter_time": 0.42312848663330077, "loss": 0.17520686984062195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96507831965018, "step_time": 0.38952257156372067} +{"epoch": 0, "iter": 7936, "iter_tflops": 52.2571930044053, "iter_time": 0.39479911422729497, "loss": 0.14895212650299072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.47014948418194, "step_time": 0.36534511947631837} +{"epoch": 0, "iter": 7937, "iter_tflops": 39.924716333548055, "iter_time": 0.5167499084472656, "loss": 0.003061684314161539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89301701676771, "step_time": 0.4809895629882812} +{"epoch": 0, "iter": 7938, "iter_tflops": 14.462391136041415, "iter_time": 1.4265340576171877, "loss": 0.003026192309334874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.536457079362524, "step_time": 1.247612678527832} +{"epoch": 0, "iter": 7939, "iter_tflops": 47.97281960938553, "iter_time": 0.4300579719543457, "loss": 0.0044828783720731735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.09666116923838, "step_time": 0.38855726623535153} +{"epoch": 0, "iter": 7940, "iter_tflops": 50.73715671590219, "iter_time": 0.40662691497802733, "loss": 0.0014761022757738829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.109717037428396, "step_time": 0.3676919898986817} +{"epoch": 0, "iter": 7941, "iter_tflops": 21.168850500483558, "iter_time": 0.9745967788696289, "loss": 0.14745008945465088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.721549667319994, "step_time": 0.9079967613220215} +{"epoch": 0, "iter": 7942, "iter_tflops": 15.93464989911257, "iter_time": 1.2947315216064454, "loss": 0.12654949724674225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.938783050557614, "step_time": 1.0893568744659423} +{"epoch": 0, "iter": 7943, "iter_tflops": 35.541976614141866, "iter_time": 0.5804711914062501, "loss": 0.2118079960346222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84892308131669, "step_time": 0.5310595989227295} +{"epoch": 0, "iter": 7944, "iter_tflops": 41.58974433346219, "iter_time": 0.4960620422363281, "loss": 0.10803595185279846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78276877939255, "step_time": 0.4506300964355469} +{"epoch": 0, "iter": 7945, "iter_tflops": 16.31662122847929, "iter_time": 1.0291171112060546, "loss": 0.0025061671622097492, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 17.596651071866855, "step_time": 0.9542562408447265} +{"epoch": 0, "iter": 7946, "iter_tflops": 15.041834571519194, "iter_time": 1.1163341827392577, "loss": 0.005269076209515333, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 17.87997855032557, "step_time": 0.939135025024414} +{"epoch": 0, "iter": 7947, "iter_tflops": 35.899775842841336, "iter_time": 0.4677386894226075, "loss": 0.0016411672113463283, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 39.53900093065326, "step_time": 0.4246873645782471} +{"epoch": 0, "iter": 7948, "iter_tflops": 41.50906615987031, "iter_time": 0.4045312423706054, "loss": 0.005859350319951773, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 45.9450230218908, "step_time": 0.3654740600585938} +{"epoch": 0, "iter": 7949, "iter_tflops": 23.34101486078187, "iter_time": 0.8838987350463867, "loss": 0.7696238160133362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.021793207807136, "step_time": 0.8245249786376954} +{"epoch": 0, "iter": 7950, "iter_tflops": 40.21712415985082, "iter_time": 0.5129927597045899, "loss": 0.8652123808860779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5514869892349, "step_time": 0.47371731567382813} +{"epoch": 0, "iter": 7951, "iter_tflops": 43.13207804172837, "iter_time": 0.4783236618041992, "loss": 0.7393953204154968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.498572188332304, "step_time": 0.44369305419921873} +{"epoch": 0, "iter": 7952, "iter_tflops": 46.5235984556, "iter_time": 0.44345438003540033, "loss": 0.8066898584365845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.97087599859952, "step_time": 0.4128623542785645} +{"epoch": 0, "iter": 7953, "iter_tflops": 27.07976420799403, "iter_time": 0.7618638534545897, "loss": 0.46736639738082886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.564648215538845, "step_time": 0.7222596740722655} +{"epoch": 0, "iter": 7954, "iter_tflops": 16.67701182162755, "iter_time": 1.2370977325439452, "loss": 0.35636404156684875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6579948275442, "step_time": 1.1057508430480958} +{"epoch": 0, "iter": 7955, "iter_tflops": 38.25353829502775, "iter_time": 0.5393251037597656, "loss": 0.41265901923179626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.847603940649144, "step_time": 0.49300537109375003} +{"epoch": 0, "iter": 7956, "iter_tflops": 42.883129192436755, "iter_time": 0.48110046768188475, "loss": 0.3931804299354553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90079352104246, "step_time": 0.43988794136047366} +{"epoch": 0, "iter": 7957, "iter_tflops": 21.67345469431058, "iter_time": 0.951906089782715, "loss": 0.6971442103385925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.00837791522053, "step_time": 0.8966774444580077} +{"epoch": 0, "iter": 7958, "iter_tflops": 21.53450966303692, "iter_time": 0.9580479812622069, "loss": 0.7379925847053528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.404755410343952, "step_time": 0.781340072631836} +{"epoch": 0, "iter": 7959, "iter_tflops": 40.90679240816296, "iter_time": 0.5043439559936523, "loss": 0.8092837929725647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58464682768117, "step_time": 0.4627398662567139} +{"epoch": 0, "iter": 7960, "iter_tflops": 39.25044139004125, "iter_time": 0.5256270446777344, "loss": 0.6687189936637878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.563606336229874, "step_time": 0.484712064743042} +{"epoch": 0, "iter": 7961, "iter_tflops": 1.1298314605225608, "iter_time": 1.2134024047851562, "loss": 1.0754492282867432, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.2140788391179325, "step_time": 1.1292019653320313} +{"epoch": 0, "iter": 7962, "iter_tflops": 1.3142301648754453, "iter_time": 1.0431507720947264, "loss": 0.9023393392562866, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.5803015603002653, "step_time": 0.867518102645874} +{"epoch": 0, "iter": 7963, "iter_tflops": 3.353498757113401, "iter_time": 0.4088089218139648, "loss": 0.728364884853363, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.628418879058774, "step_time": 0.3778340530395508} +{"epoch": 0, "iter": 7964, "iter_tflops": 3.2491459754846264, "iter_time": 0.42193863296508793, "loss": 1.121734857559204, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.522058259298576, "step_time": 0.389244047164917} +{"epoch": 0, "iter": 7965, "iter_tflops": 51.045283647018216, "iter_time": 0.4041723747253418, "loss": 0.0056816814467310905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.746800490075465, "step_time": 0.3635639953613281} +{"epoch": 0, "iter": 7966, "iter_tflops": 46.35158123299293, "iter_time": 0.4451001014709473, "loss": 0.004154660273343325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.58432481823627, "step_time": 0.40785546874999995} +{"epoch": 0, "iter": 7967, "iter_tflops": 54.65920306225568, "iter_time": 0.3774495849609375, "loss": 0.024740712717175484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.051900619372596, "step_time": 0.3435543804168701} +{"epoch": 0, "iter": 7968, "iter_tflops": 55.89894940661797, "iter_time": 0.36907837677001953, "loss": 0.003225466934964061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.4519970366058, "step_time": 0.3357269821166992} +{"epoch": 0, "iter": 7969, "iter_tflops": 31.94747352825774, "iter_time": 0.4410912704467774, "loss": 0.2447543442249298, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 35.00078275629595, "step_time": 0.40261247253417976} +{"epoch": 0, "iter": 7970, "iter_tflops": 26.25448870633992, "iter_time": 0.5367368545532226, "loss": 0.23446129262447357, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 29.12112898541613, "step_time": 0.4839012832641601} +{"epoch": 0, "iter": 7971, "iter_tflops": 29.730095742280813, "iter_time": 0.47398944854736325, "loss": 0.15350379049777985, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 32.52388738397544, "step_time": 0.43327390480041506} +{"epoch": 0, "iter": 7972, "iter_tflops": 31.199748693891735, "iter_time": 0.45166234588623044, "loss": 0.21286797523498535, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 34.20402233096762, "step_time": 0.4119910678863525} +{"epoch": 0, "iter": 7973, "iter_tflops": 5.993288946914165, "iter_time": 0.8677391586303711, "loss": 0.00479592802003026, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 6.470051576601572, "step_time": 0.803797534942627} +{"epoch": 0, "iter": 7974, "iter_tflops": 4.045472886455786, "iter_time": 1.285538589477539, "loss": 0.004322350956499577, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 4.690073053504264, "step_time": 1.1088551177978516} +{"epoch": 0, "iter": 7975, "iter_tflops": 13.67257839560572, "iter_time": 0.38036801528930664, "loss": 0.008666363544762135, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 15.112533730687026, "step_time": 0.344125717163086} +{"epoch": 0, "iter": 7976, "iter_tflops": 13.129532496727823, "iter_time": 0.3961002807617187, "loss": 0.00791990663856268, "lr": 3e-05, "seqlen": 2112.0, "step_tflops": 14.465885937541461, "step_time": 0.35950867652893065} +{"epoch": 0, "iter": 7977, "iter_tflops": 29.96749901344222, "iter_time": 0.6884489593505859, "loss": 0.026663755998015404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.915015684992163, "step_time": 0.6464384574890136} +{"epoch": 0, "iter": 7978, "iter_tflops": 29.676227527383283, "iter_time": 0.695206069946289, "loss": 0.02174157090485096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92109753092709, "step_time": 0.5440531749725341} +{"epoch": 0, "iter": 7979, "iter_tflops": 54.17554307216996, "iter_time": 0.38081932067871094, "loss": 0.03813396394252777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.33929058547696, "step_time": 0.34768015098571775} +{"epoch": 0, "iter": 7980, "iter_tflops": 52.76738215990711, "iter_time": 0.39098194122314456, "loss": 0.06904558092355728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.34726025596374, "step_time": 0.35975726509094236} +{"epoch": 0, "iter": 7981, "iter_tflops": 32.560733837360075, "iter_time": 0.6336188125610351, "loss": 0.060714904218912125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.70242054315091, "step_time": 0.5945145378112794} +{"epoch": 0, "iter": 7982, "iter_tflops": 13.538625843838188, "iter_time": 1.5238690948486329, "loss": 0.06454691290855408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.28191826495222, "step_time": 1.2671168823242187} +{"epoch": 0, "iter": 7983, "iter_tflops": 49.87230816801997, "iter_time": 0.41367833709716795, "loss": 0.08620147407054901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.41202125555314, "step_time": 0.3791642551422119} +{"epoch": 0, "iter": 7984, "iter_tflops": 51.48973959700477, "iter_time": 0.40068358612060545, "loss": 0.11191132664680481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.67945187458605, "step_time": 0.3705333442687988} +{"epoch": 0, "iter": 7985, "iter_tflops": 21.50351916615388, "iter_time": 0.9594287033081055, "loss": 0.0016297008842229843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.471773722664288, "step_time": 0.9180892333984375} +{"epoch": 0, "iter": 7986, "iter_tflops": 13.196224772675896, "iter_time": 1.563408767700195, "loss": 0.037856388837099075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.702466730248029, "step_time": 1.3138759574890135} +{"epoch": 0, "iter": 7987, "iter_tflops": 52.47178416923695, "iter_time": 0.3931845245361328, "loss": 0.03142762929201126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.70486989769927, "step_time": 0.3575277709960938} +{"epoch": 0, "iter": 7988, "iter_tflops": 53.68658452892659, "iter_time": 0.3842876892089844, "loss": 0.009863056242465973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.979082238386894, "step_time": 0.34980356979370114} +{"epoch": 0, "iter": 7989, "iter_tflops": 35.127900709561374, "iter_time": 0.5873135910034181, "loss": 0.3535284996032715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.54675575650525, "step_time": 0.5494773941040039} +{"epoch": 0, "iter": 7990, "iter_tflops": 41.46971276182501, "iter_time": 0.49749786376953126, "loss": 0.417115718126297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.649438558031015, "step_time": 0.4422581310272217} +{"epoch": 0, "iter": 7991, "iter_tflops": 48.149989965244124, "iter_time": 0.42847555160522466, "loss": 0.2650217115879059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.221126877886554, "step_time": 0.3950717792510986} +{"epoch": 0, "iter": 7992, "iter_tflops": 47.69362875665324, "iter_time": 0.4325754623413086, "loss": 0.2887093722820282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87500448950178, "step_time": 0.39770779228210446} +{"epoch": 0, "iter": 7993, "iter_tflops": 22.441057747066694, "iter_time": 0.7299703903198242, "loss": 0.2008620649576187, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 23.705574868317676, "step_time": 0.691031867980957} +{"epoch": 0, "iter": 7994, "iter_tflops": 17.21534195730733, "iter_time": 0.9515528488159178, "loss": 0.3218669593334198, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 22.65445976133284, "step_time": 0.7230941658020019} +{"epoch": 0, "iter": 7995, "iter_tflops": 25.55988373308733, "iter_time": 0.6408991470336913, "loss": 0.29387471079826355, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.562941131544477, "step_time": 0.5943236465454103} +{"epoch": 0, "iter": 7996, "iter_tflops": 25.57833703023688, "iter_time": 0.6404367752075195, "loss": 0.21183691918849945, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 27.25653598906171, "step_time": 0.601004753112793} +{"epoch": 0, "iter": 7997, "iter_tflops": 17.77412807086305, "iter_time": 1.1607373046874998, "loss": 0.6315907835960388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.116680284109762, "step_time": 1.079219467163086} +{"epoch": 0, "iter": 7998, "iter_tflops": 17.95166935125913, "iter_time": 1.1492576599121096, "loss": 0.6800190210342407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.429479614688834, "step_time": 0.9627435607910155} +{"epoch": 0, "iter": 7999, "iter_tflops": 36.548776278641085, "iter_time": 0.564481101989746, "loss": 0.48859912157058716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.837670996036366, "step_time": 0.5178790073394776} +{"epoch": 0, "iter": 8000, "iter_tflops": 37.80783000295252, "iter_time": 0.545683090209961, "loss": 0.5284044146537781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.334708032637444, "step_time": 0.4991227588653564} +{"epoch": 0, "iter": 8001, "iter_tflops": 19.795799631717212, "iter_time": 1.042195510864258, "loss": 0.8219303488731384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.153672928176984, "step_time": 0.9752960433959962} +{"epoch": 0, "iter": 8002, "iter_tflops": 29.49080470428375, "iter_time": 0.6995771636962892, "loss": 0.9467157125473022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.72000865197208, "step_time": 0.5775780658721923} +{"epoch": 0, "iter": 8003, "iter_tflops": 43.47343675382184, "iter_time": 0.4745678062438965, "loss": 0.862205982208252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.743414397772305, "step_time": 0.44136898803710933} +{"epoch": 0, "iter": 8004, "iter_tflops": 45.58956515690153, "iter_time": 0.45253981781005853, "loss": 0.9619129300117493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03981876790978, "step_time": 0.42070085144042974} +{"epoch": 0, "iter": 8005, "iter_tflops": 42.26668937161656, "iter_time": 0.4881170921325684, "loss": 0.09037764370441437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07425588478411, "step_time": 0.44777920150756834} +{"epoch": 0, "iter": 8006, "iter_tflops": 49.557725338258514, "iter_time": 0.41630428695678706, "loss": 0.13330256938934326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82175478054346, "step_time": 0.3833225727081299} +{"epoch": 0, "iter": 8007, "iter_tflops": 48.994040762110046, "iter_time": 0.421093936920166, "loss": 0.09532083570957184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27422176153399, "step_time": 0.38726222229003904} +{"epoch": 0, "iter": 8008, "iter_tflops": 46.89895419061566, "iter_time": 0.4399051933288574, "loss": 0.15551386773586273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.64804649617879, "step_time": 0.40734233474731446} +{"epoch": 0, "iter": 8009, "iter_tflops": 33.77981602500124, "iter_time": 0.6107520980834961, "loss": 0.004173933062702417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.02528736922426, "step_time": 0.5726836624145508} +{"epoch": 0, "iter": 8010, "iter_tflops": 42.186743663381144, "iter_time": 0.4890420951843262, "loss": 0.01976761780679226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.2768819926065, "step_time": 0.3801083030700684} +{"epoch": 0, "iter": 8011, "iter_tflops": 53.6552719046866, "iter_time": 0.38451195526123044, "loss": 0.005462709814310074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.288815031809946, "step_time": 0.34797614860534665} +{"epoch": 0, "iter": 8012, "iter_tflops": 60.090725416635124, "iter_time": 0.3433324089050293, "loss": 0.006708630360662937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.53172465528363, "step_time": 0.314826042175293} +{"epoch": 0, "iter": 8013, "iter_tflops": 54.60857543546891, "iter_time": 0.37779951858520505, "loss": 0.006739958189427853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.343094933015436, "step_time": 0.3418965091705322} +{"epoch": 0, "iter": 8014, "iter_tflops": 52.66606684422754, "iter_time": 0.39173408508300783, "loss": 0.007470238953828812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.97398874487544, "step_time": 0.3558681049346924} +{"epoch": 0, "iter": 8015, "iter_tflops": 54.906415862791455, "iter_time": 0.3757501411437989, "loss": 0.0021781607065349817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.25033175562435, "step_time": 0.34242290306091305} +{"epoch": 0, "iter": 8016, "iter_tflops": 53.102835423117135, "iter_time": 0.3885120887756348, "loss": 0.002569095930084586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.43470517345949, "step_time": 0.35306233596801756} +{"epoch": 0, "iter": 8017, "iter_tflops": 21.991669863457844, "iter_time": 0.938132194519043, "loss": 0.5722509026527405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.992698457339102, "step_time": 0.8972889175415039} +{"epoch": 0, "iter": 8018, "iter_tflops": 13.568294957992311, "iter_time": 1.5205369262695314, "loss": 0.5922076106071472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.531748173938194, "step_time": 1.1132837181091308} +{"epoch": 0, "iter": 8019, "iter_tflops": 49.76088049510855, "iter_time": 0.4146046714782715, "loss": 0.5804543495178223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.956717570530564, "step_time": 0.38236376190185545} +{"epoch": 0, "iter": 8020, "iter_tflops": 48.36321174085416, "iter_time": 0.42658650588989266, "loss": 0.5399580597877502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40181718724367, "step_time": 0.39370950508117675} +{"epoch": 0, "iter": 8021, "iter_tflops": 37.19938840533113, "iter_time": 0.5546084060668944, "loss": 0.6215993762016296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.189204674098725, "step_time": 0.5133491363525391} +{"epoch": 0, "iter": 8022, "iter_tflops": 9.07867398035921, "iter_time": 2.2724787292480473, "loss": 0.5911526679992676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.604056214879856, "step_time": 1.945585075378418} +{"epoch": 0, "iter": 8023, "iter_tflops": 12.107695141047962, "iter_time": 1.7039653930664063, "loss": 0.6218563914299011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.274242884419369, "step_time": 1.350711368560791} +{"epoch": 0, "iter": 8024, "iter_tflops": 20.589063526923276, "iter_time": 1.0020413742065428, "loss": 0.5992839336395264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.405673952086207, "step_time": 0.8814569301605223} +{"epoch": 0, "iter": 8025, "iter_tflops": 12.149804511708528, "iter_time": 1.163188003540039, "loss": 0.1667400449514389, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 12.984056678440572, "step_time": 1.088450798034668} +{"epoch": 0, "iter": 8026, "iter_tflops": 9.397385669992378, "iter_time": 1.503876434326172, "loss": 0.250373512506485, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 13.003624356591272, "step_time": 1.0868129119873047} +{"epoch": 0, "iter": 8027, "iter_tflops": 26.193473872663073, "iter_time": 0.5395430526733398, "loss": 0.1979207545518875, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.959478037667726, "step_time": 0.5054639015197754} +{"epoch": 0, "iter": 8028, "iter_tflops": 25.55341877201784, "iter_time": 0.5530573806762695, "loss": 0.19209833443164825, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.278441598580724, "step_time": 0.5180833663940431} +{"epoch": 0, "iter": 8029, "iter_tflops": 28.25791108525919, "iter_time": 0.730099739074707, "loss": 0.6181915402412415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.960277745851574, "step_time": 0.6886148948669435} +{"epoch": 0, "iter": 8030, "iter_tflops": 11.171566504281374, "iter_time": 1.846750274658203, "loss": 0.4761043190956116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.153907435517658, "step_time": 1.4576252956390383} +{"epoch": 0, "iter": 8031, "iter_tflops": 40.06645794553536, "iter_time": 0.5149218215942383, "loss": 0.6975930333137512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85993890245063, "step_time": 0.47038582420349123} +{"epoch": 0, "iter": 8032, "iter_tflops": 39.507957392081344, "iter_time": 0.5222009658813477, "loss": 0.6389886140823364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.347640718864895, "step_time": 0.4759450149536133} +{"epoch": 0, "iter": 8033, "iter_tflops": 34.434071346412686, "iter_time": 0.5991476669311524, "loss": 0.4773986041545868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.02129926982679, "step_time": 0.5426193714141846} +{"epoch": 0, "iter": 8034, "iter_tflops": 39.27213954305794, "iter_time": 0.5253366317749023, "loss": 0.5214920043945312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.019241701525544, "step_time": 0.47957826995849606} +{"epoch": 0, "iter": 8035, "iter_tflops": 40.568791008779144, "iter_time": 0.5085459289550781, "loss": 0.5280153751373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.263987608997596, "step_time": 0.4660920677185058} +{"epoch": 0, "iter": 8036, "iter_tflops": 44.132051875579585, "iter_time": 0.467485481262207, "loss": 0.48219263553619385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.963022335078406, "step_time": 0.43014581871032714} +{"epoch": 0, "iter": 8037, "iter_tflops": 23.81418876014796, "iter_time": 0.5609666442871094, "loss": 0.05833769962191582, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 26.598804701015773, "step_time": 0.5022393188476563} +{"epoch": 0, "iter": 8038, "iter_tflops": 25.424215394402342, "iter_time": 0.5254425888061524, "loss": 0.0038855622988194227, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 28.316145868702304, "step_time": 0.4717790908813476} +{"epoch": 0, "iter": 8039, "iter_tflops": 27.796029260211068, "iter_time": 0.48060697555542, "loss": 0.007400047965347767, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 30.84869144460492, "step_time": 0.43304804611206055} +{"epoch": 0, "iter": 8040, "iter_tflops": 31.485462979970823, "iter_time": 0.4242899513244629, "loss": 0.007157010957598686, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 34.84933658393686, "step_time": 0.38333485984802246} +{"epoch": 0, "iter": 8041, "iter_tflops": 24.964469617868613, "iter_time": 0.8264182586669921, "loss": 0.031948648393154144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.61769241477979, "step_time": 0.775089485168457} +{"epoch": 0, "iter": 8042, "iter_tflops": 24.447010282696862, "iter_time": 0.8439106979370117, "loss": 0.06190083548426628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.307705803675404, "step_time": 0.7555044593811036} +{"epoch": 0, "iter": 8043, "iter_tflops": 54.36129594632658, "iter_time": 0.3795180587768555, "loss": 0.017912037670612335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.63528795896957, "step_time": 0.34595445442199707} +{"epoch": 0, "iter": 8044, "iter_tflops": 50.04836377016844, "iter_time": 0.41222313690185547, "loss": 0.03216398507356644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.12068021443071, "step_time": 0.3812053623199463} +{"epoch": 0, "iter": 8045, "iter_tflops": 21.746123189396457, "iter_time": 0.9487251281738281, "loss": 0.13970841467380524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70369686984764, "step_time": 0.9087107543945313} +{"epoch": 0, "iter": 8046, "iter_tflops": 14.111787187624882, "iter_time": 1.4619759521484375, "loss": 0.10778200626373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.066470461075728, "step_time": 1.2088670330047608} +{"epoch": 0, "iter": 8047, "iter_tflops": 41.75002448750909, "iter_time": 0.49415763854980466, "loss": 0.12408794462680817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94332289346344, "step_time": 0.44905531883239747} +{"epoch": 0, "iter": 8048, "iter_tflops": 39.35731083203758, "iter_time": 0.5241997756958008, "loss": 0.13536174595355988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27476468425671, "step_time": 0.47674652099609377} +{"epoch": 0, "iter": 8049, "iter_tflops": 27.634924465803707, "iter_time": 0.746558708190918, "loss": 0.6479001641273499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.267225997269186, "step_time": 0.681631462097168} +{"epoch": 0, "iter": 8050, "iter_tflops": 37.595770976466646, "iter_time": 0.5487610168457031, "loss": 0.9433832764625549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10321530506962, "step_time": 0.5019338111877442} +{"epoch": 0, "iter": 8051, "iter_tflops": 36.72986483892699, "iter_time": 0.5616980514526367, "loss": 0.7829930782318115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.985130198103384, "step_time": 0.5159691467285157} +{"epoch": 0, "iter": 8052, "iter_tflops": 42.606860070212356, "iter_time": 0.4842199935913086, "loss": 0.9661723971366882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.26960063275091, "step_time": 0.4458887310028077} +{"epoch": 0, "iter": 8053, "iter_tflops": 32.08512386921223, "iter_time": 0.6430111846923828, "loss": 0.7273851037025452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.4935879317883, "step_time": 0.5812625522613526} +{"epoch": 0, "iter": 8054, "iter_tflops": 36.018143508742575, "iter_time": 0.572797248840332, "loss": 0.8486067652702332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03928311030109, "step_time": 0.5152713012695312} +{"epoch": 0, "iter": 8055, "iter_tflops": 37.732856789015095, "iter_time": 0.546767333984375, "loss": 0.7759000062942505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.974065613715105, "step_time": 0.5035158996582032} +{"epoch": 0, "iter": 8056, "iter_tflops": 38.69113633307582, "iter_time": 0.5332253189086914, "loss": 0.9280616044998169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.27783084854081, "step_time": 0.48798845863342294} +{"epoch": 0, "iter": 8057, "iter_tflops": 21.865672039888178, "iter_time": 0.9435380477905273, "loss": 0.5196823477745056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.9424768695543, "step_time": 0.8616942024230956} +{"epoch": 0, "iter": 8058, "iter_tflops": 17.730054934515785, "iter_time": 1.1636226501464844, "loss": 0.5494343042373657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.39529077715904, "step_time": 1.0115616264343261} +{"epoch": 0, "iter": 8059, "iter_tflops": 39.172940986452005, "iter_time": 0.5266669540405273, "loss": 0.33890488743782043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.0242500747071, "step_time": 0.49093305587768554} +{"epoch": 0, "iter": 8060, "iter_tflops": 47.34768400307471, "iter_time": 0.4357360649108887, "loss": 0.568790078163147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18950671699039, "step_time": 0.4030336456298828} +{"epoch": 0, "iter": 8061, "iter_tflops": 21.811942119613345, "iter_time": 0.945862289428711, "loss": 0.5632161498069763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.087558316134967, "step_time": 0.8936022262573243} +{"epoch": 0, "iter": 8062, "iter_tflops": 14.768105410568808, "iter_time": 1.397003402709961, "loss": 0.7867861390113831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.189886977811092, "step_time": 1.0218528480529785} +{"epoch": 0, "iter": 8063, "iter_tflops": 37.44772255128209, "iter_time": 0.5509305267333985, "loss": 0.6837296485900879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83322645596643, "step_time": 0.505252592086792} +{"epoch": 0, "iter": 8064, "iter_tflops": 40.464238970189214, "iter_time": 0.5098599166870118, "loss": 0.6666850447654724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.742342675076884, "step_time": 0.47165040206909187} +{"epoch": 0, "iter": 8065, "iter_tflops": 22.48535958517075, "iter_time": 0.9175345153808595, "loss": 0.9270636439323425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.491856306755647, "step_time": 0.8423654479980468} +{"epoch": 0, "iter": 8066, "iter_tflops": 28.000189327345545, "iter_time": 0.7368197860717773, "loss": 0.8170552253723145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.642265089957, "step_time": 0.5955468978881836} +{"epoch": 0, "iter": 8067, "iter_tflops": 36.971643477845774, "iter_time": 0.5580247879028319, "loss": 0.8084812164306641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23250940734517, "step_time": 0.5127965869903564} +{"epoch": 0, "iter": 8068, "iter_tflops": 35.842453251825106, "iter_time": 0.575604949951172, "loss": 0.8629593849182129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.9187906170445, "step_time": 0.5301062335968018} +{"epoch": 0, "iter": 8069, "iter_tflops": 9.91776103919923, "iter_time": 1.3510746765136719, "loss": 0.0028294557705521584, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 10.526163552154234, "step_time": 1.2729838104248048} +{"epoch": 0, "iter": 8070, "iter_tflops": 15.224808563093402, "iter_time": 0.8801185073852538, "loss": 0.019012494012713432, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 18.38612286102739, "step_time": 0.7287907238006591} +{"epoch": 0, "iter": 8071, "iter_tflops": 30.038625366213516, "iter_time": 0.446080192565918, "loss": 0.0017501383554190397, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 33.28930095995666, "step_time": 0.4025207920074462} +{"epoch": 0, "iter": 8072, "iter_tflops": 30.082738236282882, "iter_time": 0.4454260673522949, "loss": 0.007074451539665461, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 33.314613912459144, "step_time": 0.4022149505615234} +{"epoch": 0, "iter": 8073, "iter_tflops": 32.57221318382211, "iter_time": 0.6333955078125, "loss": 0.8649445176124573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.99110883531928, "step_time": 0.5732275047302245} +{"epoch": 0, "iter": 8074, "iter_tflops": 39.179615528850746, "iter_time": 0.5265772323608399, "loss": 0.7150988578796387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.895699347562356, "step_time": 0.48095948600769034} +{"epoch": 0, "iter": 8075, "iter_tflops": 38.343373752897456, "iter_time": 0.538061508178711, "loss": 0.5719220042228699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56169507589704, "step_time": 0.4963968257904053} +{"epoch": 0, "iter": 8076, "iter_tflops": 44.058638974769295, "iter_time": 0.46826443099975584, "loss": 0.7248859405517578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.08622265217184, "step_time": 0.42904375457763666} +{"epoch": 0, "iter": 8077, "iter_tflops": 23.829313668549585, "iter_time": 0.8657863082885743, "loss": 0.5233267545700073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.59328114867561, "step_time": 0.8061136589050293} +{"epoch": 0, "iter": 8078, "iter_tflops": 8.281760010595118, "iter_time": 2.4911484374999997, "loss": 0.6247100830078125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.665056149256978, "step_time": 2.134606689453125} +{"epoch": 0, "iter": 8079, "iter_tflops": 13.916317021167275, "iter_time": 1.482511032104492, "loss": 0.5059310793876648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.38927688881799, "step_time": 1.2588165817260741} +{"epoch": 0, "iter": 8080, "iter_tflops": 43.89837535724238, "iter_time": 0.4699739646911621, "loss": 0.3694785535335541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68445872197148, "step_time": 0.4326586494445801} +{"epoch": 0, "iter": 8081, "iter_tflops": 25.581483974025385, "iter_time": 0.5971337432861329, "loss": 0.19610938429832458, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.53974198582005, "step_time": 0.5546735801696777} +{"epoch": 0, "iter": 8082, "iter_tflops": 27.561016216995892, "iter_time": 0.5542454299926758, "loss": 0.22615201771259308, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.410036937694223, "step_time": 0.5193997993469238} +{"epoch": 0, "iter": 8083, "iter_tflops": 26.482809182533824, "iter_time": 0.5768106842041015, "loss": 0.3372383117675781, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.105998868457334, "step_time": 0.5434984664916993} +{"epoch": 0, "iter": 8084, "iter_tflops": 25.600489842319323, "iter_time": 0.5966904296875001, "loss": 0.3580614924430847, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.24316015971487, "step_time": 0.5607120170593262} +{"epoch": 0, "iter": 8085, "iter_tflops": 38.6303726361013, "iter_time": 0.5340640563964844, "loss": 0.8793604969978333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.019905568781454, "step_time": 0.490983814239502} +{"epoch": 0, "iter": 8086, "iter_tflops": 45.539151424540215, "iter_time": 0.45304079818725584, "loss": 0.9465026259422302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.41950206297812, "step_time": 0.4174686641693115} +{"epoch": 0, "iter": 8087, "iter_tflops": 43.251912604203994, "iter_time": 0.47699840927124015, "loss": 0.9067426919937134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66072306912041, "step_time": 0.4421511745452881} +{"epoch": 0, "iter": 8088, "iter_tflops": 44.944056510403946, "iter_time": 0.45903941726684566, "loss": 0.7945947051048279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40790093137531, "step_time": 0.42619268989562986} +{"epoch": 0, "iter": 8089, "iter_tflops": 26.490833191767496, "iter_time": 0.7788012313842773, "loss": 0.2600008547306061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.88392743024942, "step_time": 0.7398919525146485} +{"epoch": 0, "iter": 8090, "iter_tflops": 12.721717196684194, "iter_time": 1.621722381591797, "loss": 0.1947026252746582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.067715273336884, "step_time": 1.208778865814209} +{"epoch": 0, "iter": 8091, "iter_tflops": 38.817308439772795, "iter_time": 0.5314921188354492, "loss": 0.13553112745285034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48702564860423, "step_time": 0.48558573341369626} +{"epoch": 0, "iter": 8092, "iter_tflops": 40.16973343585713, "iter_time": 0.5135979690551757, "loss": 0.16421738266944885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09454155611578, "step_time": 0.4678831615447998} +{"epoch": 0, "iter": 8093, "iter_tflops": 16.568737055356, "iter_time": 1.2451820220947265, "loss": 0.6566031575202942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.684837057468616, "step_time": 1.166597885131836} +{"epoch": 0, "iter": 8094, "iter_tflops": 18.29947595624205, "iter_time": 1.1274144439697265, "loss": 0.6363720893859863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.44965568241563, "step_time": 0.9189937610626222} +{"epoch": 0, "iter": 8095, "iter_tflops": 47.02831595103554, "iter_time": 0.43869513702392576, "loss": 0.5835841298103333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.16102704032405, "step_time": 0.40325800132751466} +{"epoch": 0, "iter": 8096, "iter_tflops": 48.16037862239935, "iter_time": 0.42838312530517575, "loss": 0.7134405970573425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.665720254139075, "step_time": 0.3993188018798828} +{"epoch": 0, "iter": 8097, "iter_tflops": 40.76712577764169, "iter_time": 0.5060718193054199, "loss": 0.00799434632062912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37982649165228, "step_time": 0.46487548828125} +{"epoch": 0, "iter": 8098, "iter_tflops": 24.235790513904615, "iter_time": 0.8512655487060548, "loss": 0.0032588604371994734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.401931274599672, "step_time": 0.6786112804412843} +{"epoch": 0, "iter": 8099, "iter_tflops": 62.01703259678965, "iter_time": 0.33266818237304685, "loss": 0.010933316312730312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 68.34115988870131, "step_time": 0.30188386535644535} +{"epoch": 0, "iter": 8100, "iter_tflops": 52.02886302194178, "iter_time": 0.39653170013427735, "loss": 0.010454426519572735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.698187147501606, "step_time": 0.3638757171630859} +{"epoch": 0, "iter": 8101, "iter_tflops": 28.69894431929824, "iter_time": 0.7188798751831054, "loss": 0.8436936140060425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.304588274204388, "step_time": 0.6807910842895507} +{"epoch": 0, "iter": 8102, "iter_tflops": 12.630983593957865, "iter_time": 1.6333718872070313, "loss": 0.6645007729530334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.824853164165345, "step_time": 1.3916558418273925} +{"epoch": 0, "iter": 8103, "iter_tflops": 35.33803800719583, "iter_time": 0.5838211364746094, "loss": 0.8958642482757568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.523032115394635, "step_time": 0.5355521717071533} +{"epoch": 0, "iter": 8104, "iter_tflops": 34.78654098898655, "iter_time": 0.593076889038086, "loss": 0.8674875497817993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.31630029603441, "step_time": 0.5528708190917969} +{"epoch": 0, "iter": 8105, "iter_tflops": 23.193255081575092, "iter_time": 0.8895298843383789, "loss": 0.5177528858184814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.68976211405584, "step_time": 0.8356132965087891} +{"epoch": 0, "iter": 8106, "iter_tflops": 10.827155371194708, "iter_time": 1.9054952850341795, "loss": 0.4577293395996094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.895399804032895, "step_time": 1.4847427062988283} +{"epoch": 0, "iter": 8107, "iter_tflops": 9.491724027251541, "iter_time": 2.1735875854492184, "loss": 0.3390922546386719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.474947878136108, "step_time": 1.7979248123168945} +{"epoch": 0, "iter": 8108, "iter_tflops": 37.67822889724087, "iter_time": 0.5475600662231446, "loss": 0.3711903989315033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6867476789902, "step_time": 0.4949077262878418} +{"epoch": 0, "iter": 8109, "iter_tflops": 12.592843077361639, "iter_time": 1.3138743591308593, "loss": 0.48380857706069946, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 13.300644600879782, "step_time": 1.2439557723999024} +{"epoch": 0, "iter": 8110, "iter_tflops": 12.207422651335827, "iter_time": 1.3553568267822265, "loss": 0.27248117327690125, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.76737130904026, "step_time": 0.986762523651123} +{"epoch": 0, "iter": 8111, "iter_tflops": 25.695462517121992, "iter_time": 0.6439040985107422, "loss": 0.3699875771999359, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.680217899428005, "step_time": 0.5977342262268066} +{"epoch": 0, "iter": 8112, "iter_tflops": 27.784727738278654, "iter_time": 0.5954859008789063, "loss": 0.33230167627334595, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 29.810419318856493, "step_time": 0.5550211639404297} +{"epoch": 0, "iter": 8113, "iter_tflops": 29.667011165897947, "iter_time": 0.6954220428466797, "loss": 0.24897310137748718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.721646740777885, "step_time": 0.6305029106140136} +{"epoch": 0, "iter": 8114, "iter_tflops": 40.43900436342649, "iter_time": 0.5101780776977539, "loss": 0.28074392676353455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.11723199203306, "step_time": 0.4572774658203125} +{"epoch": 0, "iter": 8115, "iter_tflops": 47.32263514107579, "iter_time": 0.4359667091369629, "loss": 0.3226891756057739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.35471033620008, "step_time": 0.4017371215820313} +{"epoch": 0, "iter": 8116, "iter_tflops": 50.344755015259445, "iter_time": 0.40979628372192384, "loss": 0.23702701926231384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17047952051794, "step_time": 0.3808549175262451} +{"epoch": 0, "iter": 8117, "iter_tflops": 26.57374921588256, "iter_time": 0.5164934539794922, "loss": 0.007431610021740198, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 28.774434989225103, "step_time": 0.4769917297363282} +{"epoch": 0, "iter": 8118, "iter_tflops": 14.547177730671935, "iter_time": 0.9434934921264648, "loss": 0.016100918874144554, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 17.657674166340126, "step_time": 0.7772919235229493} +{"epoch": 0, "iter": 8119, "iter_tflops": 29.268110400865154, "iter_time": 0.46894614410400387, "loss": 0.01911756955087185, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 32.45215955939328, "step_time": 0.42293541336059576} +{"epoch": 0, "iter": 8120, "iter_tflops": 32.77761230304712, "iter_time": 0.4187360382080078, "loss": 0.0035284576006233692, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 36.37206478600447, "step_time": 0.37735464286804205} +{"epoch": 0, "iter": 8121, "iter_tflops": 31.681485544865268, "iter_time": 0.6512034759521484, "loss": 0.09450142085552216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.35452740844307, "step_time": 0.5835488414764404} +{"epoch": 0, "iter": 8122, "iter_tflops": 39.901133163480104, "iter_time": 0.5170553283691406, "loss": 0.05843273922801018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.41887788871978, "step_time": 0.46446678733825686} +{"epoch": 0, "iter": 8123, "iter_tflops": 46.112161447510786, "iter_time": 0.44741111373901365, "loss": 0.0037189307622611523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.027882375662095, "step_time": 0.404310203552246} +{"epoch": 0, "iter": 8124, "iter_tflops": 46.555326058984285, "iter_time": 0.4431521644592285, "loss": 0.023967718705534935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31034380707061, "step_time": 0.4020844917297363} +{"epoch": 0, "iter": 8125, "iter_tflops": 25.567652862040433, "iter_time": 0.8069216842651367, "loss": 0.11971516907215118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46010399651805, "step_time": 0.7513115577697753} +{"epoch": 0, "iter": 8126, "iter_tflops": 10.337367044705882, "iter_time": 1.9957783660888673, "loss": 0.15511973202228546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.502760079213632, "step_time": 1.6501231231689455} +{"epoch": 0, "iter": 8127, "iter_tflops": 12.848393212953274, "iter_time": 1.605733352661133, "loss": 0.12281496822834015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.18990713886191, "step_time": 1.2001864433288574} +{"epoch": 0, "iter": 8128, "iter_tflops": 24.58372543751618, "iter_time": 0.8392175369262697, "loss": 0.1348150074481964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.54464083962248, "step_time": 0.698302396774292} +{"epoch": 0, "iter": 8129, "iter_tflops": 11.608482229053436, "iter_time": 1.2385027313232422, "loss": 0.32751530408859253, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 12.052957498317975, "step_time": 1.192830635070801} +{"epoch": 0, "iter": 8130, "iter_tflops": 11.705080036679021, "iter_time": 1.2282818145751953, "loss": 0.2603955566883087, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 12.947497376130798, "step_time": 1.1104182167053223} +{"epoch": 0, "iter": 8131, "iter_tflops": 23.175943532473863, "iter_time": 0.6203474273681641, "loss": 0.2195010930299759, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.677246275431358, "step_time": 0.5826070213317871} +{"epoch": 0, "iter": 8132, "iter_tflops": 26.038663690376953, "iter_time": 0.5521457290649414, "loss": 0.3269841969013214, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.69585431384358, "step_time": 0.5191079063415527} +{"epoch": 0, "iter": 8133, "iter_tflops": 28.979677592159828, "iter_time": 0.7119159088134767, "loss": 0.7580162882804871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.721963871745462, "step_time": 0.6715421447753905} +{"epoch": 0, "iter": 8134, "iter_tflops": 37.99509204384353, "iter_time": 0.5429936447143555, "loss": 0.7882291674613953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6379527709447, "step_time": 0.4954877014160156} +{"epoch": 0, "iter": 8135, "iter_tflops": 43.2507528382056, "iter_time": 0.4770111999511719, "loss": 0.8381876349449158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.549771286009985, "step_time": 0.4432050457000732} +{"epoch": 0, "iter": 8136, "iter_tflops": 44.886914993182295, "iter_time": 0.459623779296875, "loss": 0.7164450287818909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55863155228149, "step_time": 0.42486974716186526} +{"epoch": 0, "iter": 8137, "iter_tflops": 32.548423001222474, "iter_time": 0.6338584671020507, "loss": 0.8931977152824402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.75690033481496, "step_time": 0.5935826644897461} +{"epoch": 0, "iter": 8138, "iter_tflops": 38.36844102481875, "iter_time": 0.537709976196289, "loss": 0.6981968879699707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.865527234007246, "step_time": 0.49279430770874016} +{"epoch": 0, "iter": 8139, "iter_tflops": 44.73785282541295, "iter_time": 0.4611552009582519, "loss": 0.8281994462013245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28915981643724, "step_time": 0.42724068069458004} +{"epoch": 0, "iter": 8140, "iter_tflops": 48.069819173722884, "iter_time": 0.4291901626586914, "loss": 0.783078134059906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94403678431602, "step_time": 0.3971792488098145} +{"epoch": 0, "iter": 8141, "iter_tflops": 34.7047857648864, "iter_time": 0.5598504180908204, "loss": 0.0503733828663826, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 37.29995713296065, "step_time": 0.520898422241211} +{"epoch": 0, "iter": 8142, "iter_tflops": 20.399295632201824, "iter_time": 0.9524588088989256, "loss": 0.036174144595861435, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 27.889475519658973, "step_time": 0.696660243988037} +{"epoch": 0, "iter": 8143, "iter_tflops": 43.98248174475052, "iter_time": 0.4417551727294922, "loss": 0.10271837562322617, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 48.65109070101664, "step_time": 0.3993638896942139} +{"epoch": 0, "iter": 8144, "iter_tflops": 49.625084720833314, "iter_time": 0.3915255546569824, "loss": 0.07506997138261795, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 54.68452631477472, "step_time": 0.35530140113830566} +{"epoch": 0, "iter": 8145, "iter_tflops": 37.24713808989752, "iter_time": 0.5538974151611328, "loss": 0.02301039546728134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.596547611649676, "step_time": 0.4959809093475342} +{"epoch": 0, "iter": 8146, "iter_tflops": 37.05470793630455, "iter_time": 0.5567738800048828, "loss": 0.026234162971377373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11143297138638, "step_time": 0.5018334808349609} +{"epoch": 0, "iter": 8147, "iter_tflops": 42.35766086091555, "iter_time": 0.4870687637329101, "loss": 0.042452745139598846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62729693637918, "step_time": 0.4424681434631348} +{"epoch": 0, "iter": 8148, "iter_tflops": 49.180783590354, "iter_time": 0.41949501419067386, "loss": 0.029802286997437477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.97439485532838, "step_time": 0.38223853302001953} +{"epoch": 0, "iter": 8149, "iter_tflops": 15.127467847082752, "iter_time": 1.0260137557983398, "loss": 0.03587592393159866, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 16.27707847803609, "step_time": 0.9535488891601562} +{"epoch": 0, "iter": 8150, "iter_tflops": 12.438461910389318, "iter_time": 1.2478222961425782, "loss": 0.047857023775577545, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 17.68628719915563, "step_time": 0.8775719814300537} +{"epoch": 0, "iter": 8151, "iter_tflops": 41.57900777661649, "iter_time": 0.3732890930175781, "loss": 0.0354422926902771, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 45.31350288993966, "step_time": 0.3425246143341064} +{"epoch": 0, "iter": 8152, "iter_tflops": 45.04338112581578, "iter_time": 0.3445787086486816, "loss": 0.053815361112356186, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 49.027141495283765, "step_time": 0.3165795440673828} +{"epoch": 0, "iter": 8153, "iter_tflops": 29.38279091775158, "iter_time": 0.7021488723754882, "loss": 0.07237989455461502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.12126351684058, "step_time": 0.6629259605407715} +{"epoch": 0, "iter": 8154, "iter_tflops": 16.317704443718743, "iter_time": 1.2643379821777345, "loss": 0.10656936466693878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.51918575426748, "step_time": 1.1140389099121095} +{"epoch": 0, "iter": 8155, "iter_tflops": 42.892362857225336, "iter_time": 0.48099689865112305, "loss": 0.08977784216403961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.106439255470946, "step_time": 0.437967586517334} +{"epoch": 0, "iter": 8156, "iter_tflops": 48.414103503432926, "iter_time": 0.4261380882263184, "loss": 0.11652597039937973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.13403388223924, "step_time": 0.38828396797180176} +{"epoch": 0, "iter": 8157, "iter_tflops": 20.323096083489684, "iter_time": 1.0151550445556639, "loss": 0.5821129083633423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.360982432528203, "step_time": 0.9658307418823242} +{"epoch": 0, "iter": 8158, "iter_tflops": 17.464792866546766, "iter_time": 1.1812962036132812, "loss": 0.5831379890441895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.988483484374793, "step_time": 1.0321490135192872} +{"epoch": 0, "iter": 8159, "iter_tflops": 38.6645102252777, "iter_time": 0.5335925216674804, "loss": 0.6979003548622131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.012843337868475, "step_time": 0.4910663471221924} +{"epoch": 0, "iter": 8160, "iter_tflops": 39.95023073319419, "iter_time": 0.5164198837280274, "loss": 0.4473155736923218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80360077370528, "step_time": 0.47099081230163575} +{"epoch": 0, "iter": 8161, "iter_tflops": 21.262752940646195, "iter_time": 0.9702926788330077, "loss": 0.1269681602716446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.642265937162904, "step_time": 0.9111761856079101} +{"epoch": 0, "iter": 8162, "iter_tflops": 18.79134287885127, "iter_time": 1.0979041595458985, "loss": 0.12264210730791092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.603474258607957, "step_time": 0.8740702018737793} +{"epoch": 0, "iter": 8163, "iter_tflops": 48.67111120077293, "iter_time": 0.423887866973877, "loss": 0.10244587808847427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.556646271532884, "step_time": 0.3925496578216553} +{"epoch": 0, "iter": 8164, "iter_tflops": 52.69857304403025, "iter_time": 0.3914924507141114, "loss": 0.07993043214082718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.50629216874264, "step_time": 0.35876236724853516} +{"epoch": 0, "iter": 8165, "iter_tflops": 28.644416125910702, "iter_time": 0.7202483520507813, "loss": 0.4944644868373871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.213652721186968, "step_time": 0.6828400955200196} +{"epoch": 0, "iter": 8166, "iter_tflops": 14.56447341240988, "iter_time": 1.4165354919433595, "loss": 0.7120795845985413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.190868811205057, "step_time": 1.0218031578063964} +{"epoch": 0, "iter": 8167, "iter_tflops": 41.41454008979146, "iter_time": 0.49816063308715824, "loss": 0.559716522693634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37762502240538, "step_time": 0.4546534442901612} +{"epoch": 0, "iter": 8168, "iter_tflops": 36.705163860500704, "iter_time": 0.5620760498046875, "loss": 0.4645298719406128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23304948323495, "step_time": 0.5127897033691406} +{"epoch": 0, "iter": 8169, "iter_tflops": 18.90940986217217, "iter_time": 1.0910490417480467, "loss": 0.006375242490321398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.317577319785514, "step_time": 1.0154307861328127} +{"epoch": 0, "iter": 8170, "iter_tflops": 40.93652904872511, "iter_time": 0.503977596282959, "loss": 0.003074931912124157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.47336123785882, "step_time": 0.45369625091552734} +{"epoch": 0, "iter": 8171, "iter_tflops": 45.023959217106864, "iter_time": 0.4582247734069824, "loss": 0.001938962028361857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.880447762189796, "step_time": 0.4136108322143555} +{"epoch": 0, "iter": 8172, "iter_tflops": 44.83483314477522, "iter_time": 0.46015769577026366, "loss": 0.016330471262335777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.40603713765044, "step_time": 0.4175824394226074} +{"epoch": 0, "iter": 8173, "iter_tflops": 34.91859381366113, "iter_time": 0.5908340301513673, "loss": 0.5000268220901489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.642929780751196, "step_time": 0.533890510559082} +{"epoch": 0, "iter": 8174, "iter_tflops": 36.35432536047481, "iter_time": 0.5675003814697266, "loss": 0.5324816107749939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2829467916083, "step_time": 0.512154525756836} +{"epoch": 0, "iter": 8175, "iter_tflops": 39.44740188109879, "iter_time": 0.5230025939941406, "loss": 0.5225701928138733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19051248547783, "step_time": 0.4776765155792237} +{"epoch": 0, "iter": 8176, "iter_tflops": 39.39423509052626, "iter_time": 0.5237084426879883, "loss": 0.5741978883743286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84033865203997, "step_time": 0.48158100891113287} +{"epoch": 0, "iter": 8177, "iter_tflops": 28.918821840638042, "iter_time": 0.7134140396118164, "loss": 0.39436933398246765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.08793525024687, "step_time": 0.6429548473358154} +{"epoch": 0, "iter": 8178, "iter_tflops": 39.986477091438346, "iter_time": 0.5159517669677735, "loss": 0.3507944345474243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84118524825008, "step_time": 0.4705870380401611} +{"epoch": 0, "iter": 8179, "iter_tflops": 38.43544905735053, "iter_time": 0.5367725372314454, "loss": 0.5022256374359131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21232569352655, "step_time": 0.488745719909668} +{"epoch": 0, "iter": 8180, "iter_tflops": 41.35511898211137, "iter_time": 0.4988764152526855, "loss": 0.43766969442367554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14169731621558, "step_time": 0.4570296363830566} +{"epoch": 0, "iter": 8181, "iter_tflops": 38.37953415290113, "iter_time": 0.5375545578002929, "loss": 0.01201584842056036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7563659538135, "step_time": 0.48252682495117183} +{"epoch": 0, "iter": 8182, "iter_tflops": 39.9254885360421, "iter_time": 0.5167399139404297, "loss": 0.059104520827531815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90440450913169, "step_time": 0.4699094257354736} +{"epoch": 0, "iter": 8183, "iter_tflops": 42.72019758703809, "iter_time": 0.48293534851074216, "loss": 0.041816603392362595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.14519001768592, "step_time": 0.43760760116577146} +{"epoch": 0, "iter": 8184, "iter_tflops": 40.696567084242446, "iter_time": 0.5069492340087891, "loss": 0.02922576293349266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68310995745519, "step_time": 0.46172017860412595} +{"epoch": 0, "iter": 8185, "iter_tflops": 18.914686177617142, "iter_time": 1.0907446899414062, "loss": 0.002889114897698164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.452847796710465, "step_time": 1.008714958190918} +{"epoch": 0, "iter": 8186, "iter_tflops": 27.936787724089122, "iter_time": 0.7384919738769531, "loss": 0.014559794217348099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.09055153567769, "step_time": 0.5879387073516845} +{"epoch": 0, "iter": 8187, "iter_tflops": 45.29979949836257, "iter_time": 0.4554345436096191, "loss": 0.00759497145190835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20251578674075, "step_time": 0.4109573631286621} +{"epoch": 0, "iter": 8188, "iter_tflops": 44.73665492924911, "iter_time": 0.4611675491333007, "loss": 0.007489688694477081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.11774322593809, "step_time": 0.42003341674804684} +{"epoch": 0, "iter": 8189, "iter_tflops": 27.629966751284904, "iter_time": 0.7466926651000976, "loss": 0.2987504303455353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.76006479303308, "step_time": 0.6932476005554198} +{"epoch": 0, "iter": 8190, "iter_tflops": 47.73608803404873, "iter_time": 0.4321907043457031, "loss": 0.2691400647163391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.36455975623435, "step_time": 0.39398962974548335} +{"epoch": 0, "iter": 8191, "iter_tflops": 48.54182102897481, "iter_time": 0.42501688385009756, "loss": 0.2600829005241394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74545102665699, "step_time": 0.39114450836181636} +{"epoch": 0, "iter": 8192, "iter_tflops": 49.191630359936454, "iter_time": 0.41940251541137696, "loss": 0.19140014052391052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26069259647278, "step_time": 0.3873605937957764} +{"epoch": 0, "iter": 8193, "iter_tflops": 44.19624713125142, "iter_time": 0.4349400215148925, "loss": 0.10193555057048798, "lr": 3e-05, "seqlen": 7648.0, "step_tflops": 48.947295767752436, "step_time": 0.3927227516174316} +{"epoch": 0, "iter": 8194, "iter_tflops": 36.042963799544005, "iter_time": 0.5333278579711914, "loss": 0.1202390193939209, "lr": 3e-05, "seqlen": 7648.0, "step_tflops": 39.34435884196496, "step_time": 0.48857618331909175} +{"epoch": 0, "iter": 8195, "iter_tflops": 41.74785349245695, "iter_time": 0.4604480247497559, "loss": 0.13162530958652496, "lr": 3e-05, "seqlen": 7648.0, "step_tflops": 45.23903427377636, "step_time": 0.4249143905639648} +{"epoch": 0, "iter": 8196, "iter_tflops": 50.11264028197681, "iter_time": 0.3835901794433593, "loss": 0.08025925606489182, "lr": 3e-05, "seqlen": 7648.0, "step_tflops": 54.50878847450254, "step_time": 0.3526535301208496} +{"epoch": 0, "iter": 8197, "iter_tflops": 40.707384305595816, "iter_time": 0.5068145217895508, "loss": 0.6683417558670044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84782258534788, "step_time": 0.47051580429077144} +{"epoch": 0, "iter": 8198, "iter_tflops": 25.450352462570464, "iter_time": 0.8106407775878907, "loss": 0.6475204825401306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.208350555287595, "step_time": 0.6405510730743408} +{"epoch": 0, "iter": 8199, "iter_tflops": 44.37181938440315, "iter_time": 0.4649593772888183, "loss": 0.5929123163223267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91639845621849, "step_time": 0.43056436157226563} +{"epoch": 0, "iter": 8200, "iter_tflops": 45.39842013470998, "iter_time": 0.4544451866149903, "loss": 0.5811136364936829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38932651549057, "step_time": 0.417723726272583} +{"epoch": 0, "iter": 8201, "iter_tflops": 27.197617167520477, "iter_time": 0.7585625381469726, "loss": 0.6646156311035156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.5797767644746, "step_time": 0.7218773498535156} +{"epoch": 0, "iter": 8202, "iter_tflops": 11.947102257083388, "iter_time": 1.7268700866699218, "loss": 0.8537302017211914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.014360212672347, "step_time": 1.3740907516479492} +{"epoch": 0, "iter": 8203, "iter_tflops": 39.185776742105084, "iter_time": 0.5264944381713867, "loss": 0.8626183867454529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86050465507022, "step_time": 0.4813544235229492} +{"epoch": 0, "iter": 8204, "iter_tflops": 40.20697120849614, "iter_time": 0.5131222991943359, "loss": 0.7039268612861633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.761437994494024, "step_time": 0.4714445972442627} +{"epoch": 0, "iter": 8205, "iter_tflops": 20.014951479777487, "iter_time": 1.0307840881347654, "loss": 0.2122797966003418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.379733238344542, "step_time": 0.9649836730957032} +{"epoch": 0, "iter": 8206, "iter_tflops": 29.042144558345548, "iter_time": 0.7103846435546874, "loss": 0.16102120280265808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.20967790682228, "step_time": 0.5697673854827882} +{"epoch": 0, "iter": 8207, "iter_tflops": 47.42901858503486, "iter_time": 0.43498883438110353, "loss": 0.20252561569213867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52164852896717, "step_time": 0.4004354305267334} +{"epoch": 0, "iter": 8208, "iter_tflops": 47.47670983659692, "iter_time": 0.4345518798828125, "loss": 0.2145923227071762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.61465883976036, "step_time": 0.3997138404846191} +{"epoch": 0, "iter": 8209, "iter_tflops": 43.00531886149743, "iter_time": 0.47973353195190427, "loss": 0.22854530811309814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19838829339204, "step_time": 0.4371143646240234} +{"epoch": 0, "iter": 8210, "iter_tflops": 47.040550855694136, "iter_time": 0.4385810356140136, "loss": 0.22027228772640228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.506077087049825, "step_time": 0.39292772674560544} +{"epoch": 0, "iter": 8211, "iter_tflops": 52.00324271501834, "iter_time": 0.3967270584106446, "loss": 0.1609000265598297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.68800815357145, "step_time": 0.3639410552978516} +{"epoch": 0, "iter": 8212, "iter_tflops": 47.74027145251272, "iter_time": 0.43215283203125, "loss": 0.1898556351661682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75460224212307, "step_time": 0.3986330223083496} +{"epoch": 0, "iter": 8213, "iter_tflops": 39.565935777596934, "iter_time": 0.5214357528686524, "loss": 0.7392978072166443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.922635649161656, "step_time": 0.48065765762329105} +{"epoch": 0, "iter": 8214, "iter_tflops": 43.657975023995476, "iter_time": 0.4725618515014648, "loss": 0.5862749218940735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34176968016542, "step_time": 0.43579050064086916} +{"epoch": 0, "iter": 8215, "iter_tflops": 46.28264218160072, "iter_time": 0.44576308822631844, "loss": 0.7761673331260681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.00908120241528, "step_time": 0.41254694175720213} +{"epoch": 0, "iter": 8216, "iter_tflops": 44.78148853430726, "iter_time": 0.46070584487915034, "loss": 0.5726760625839233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8507165173262, "step_time": 0.4311553726196289} +{"epoch": 0, "iter": 8217, "iter_tflops": 35.655866718617744, "iter_time": 0.5786170806884765, "loss": 0.018899692222476006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.41685215104409, "step_time": 0.5370323791503906} +{"epoch": 0, "iter": 8218, "iter_tflops": 26.657887383301034, "iter_time": 0.773920799255371, "loss": 0.022604994475841522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.87165455033338, "step_time": 0.668285966873169} +{"epoch": 0, "iter": 8219, "iter_tflops": 53.52705655820273, "iter_time": 0.385432991027832, "loss": 0.026275357231497765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.27687910178134, "step_time": 0.3540185031890869} +{"epoch": 0, "iter": 8220, "iter_tflops": 56.04411823747852, "iter_time": 0.3681223678588867, "loss": 0.04102932661771774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.28335273970217, "step_time": 0.33665086174011233} +{"epoch": 0, "iter": 8221, "iter_tflops": 26.903367606135323, "iter_time": 0.7668591461181641, "loss": 0.03611847013235092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.419620163815154, "step_time": 0.7259454345703126} +{"epoch": 0, "iter": 8222, "iter_tflops": 17.948959530283084, "iter_time": 1.1494311676025393, "loss": 0.06547876447439194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.721995737052215, "step_time": 0.9497789134979249} +{"epoch": 0, "iter": 8223, "iter_tflops": 54.00530082770341, "iter_time": 0.38201978683471677, "loss": 0.02909019961953163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.38212329143338, "step_time": 0.34742936706542965} +{"epoch": 0, "iter": 8224, "iter_tflops": 54.96704383611667, "iter_time": 0.37533569335937494, "loss": 0.041424330323934555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.1451281623883, "step_time": 0.34302185630798343} +{"epoch": 0, "iter": 8225, "iter_tflops": 12.936125521174068, "iter_time": 0.7170290298461914, "loss": 0.0320436917245388, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 13.636555367210219, "step_time": 0.6801994552612305} +{"epoch": 0, "iter": 8226, "iter_tflops": 6.52761967326731, "iter_time": 1.4209739532470704, "loss": 0.028342030942440033, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 7.759112626104993, "step_time": 1.195443084716797} +{"epoch": 0, "iter": 8227, "iter_tflops": 22.62732554727874, "iter_time": 0.40992814254760745, "loss": 0.04949062317609787, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 24.864442117695884, "step_time": 0.37304587364196773} +{"epoch": 0, "iter": 8228, "iter_tflops": 25.82005127018182, "iter_time": 0.359239315032959, "loss": 0.0838639885187149, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 28.234208018742116, "step_time": 0.3285226745605469} +{"epoch": 0, "iter": 8229, "iter_tflops": 32.797878672093205, "iter_time": 0.45079840087890627, "loss": 0.01122825313359499, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 35.85489511778792, "step_time": 0.41236297607421873} +{"epoch": 0, "iter": 8230, "iter_tflops": 30.175645688907604, "iter_time": 0.4899723243713379, "loss": 0.009519126266241074, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 33.58102514306801, "step_time": 0.44028528594970706} +{"epoch": 0, "iter": 8231, "iter_tflops": 30.130112247534136, "iter_time": 0.4907127838134766, "loss": 0.010381299071013927, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 33.29424207369524, "step_time": 0.4440777244567871} +{"epoch": 0, "iter": 8232, "iter_tflops": 34.44465778621994, "iter_time": 0.4292459907531738, "loss": 0.0020355188753455877, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 37.91076520331548, "step_time": 0.39000086593627925} +{"epoch": 0, "iter": 8233, "iter_tflops": 18.904969098772163, "iter_time": 1.0913053283691405, "loss": 0.8000341057777405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.346039400784676, "step_time": 1.0140102996826172} +{"epoch": 0, "iter": 8234, "iter_tflops": 28.06421721277939, "iter_time": 0.7351387481689453, "loss": 0.9029587507247925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.20841126376383, "step_time": 0.6405498657226563} +{"epoch": 0, "iter": 8235, "iter_tflops": 32.578777174127715, "iter_time": 0.6332678909301758, "loss": 0.8786020278930664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.52918707212895, "step_time": 0.5806801452636718} +{"epoch": 0, "iter": 8236, "iter_tflops": 32.40787500816705, "iter_time": 0.6366074142456055, "loss": 0.7268034815788269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.30968924141252, "step_time": 0.5842898635864258} +{"epoch": 0, "iter": 8237, "iter_tflops": 16.63966400829746, "iter_time": 1.2398744049072266, "loss": 0.19530220329761505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.472768947263322, "step_time": 1.1807569580078126} +{"epoch": 0, "iter": 8238, "iter_tflops": 10.738200873332989, "iter_time": 1.9212802734375, "loss": 0.2797059416770935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.377469957064214, "step_time": 1.6668263854980467} +{"epoch": 0, "iter": 8239, "iter_tflops": 14.753489380454818, "iter_time": 1.3983873901367188, "loss": 0.24617280066013336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.568844231863157, "step_time": 1.1110596466064453} +{"epoch": 0, "iter": 8240, "iter_tflops": 48.349959797804885, "iter_time": 0.426703426361084, "loss": 0.26892319321632385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.289832777039, "step_time": 0.39455267715454106} +{"epoch": 0, "iter": 8241, "iter_tflops": 15.191723576944579, "iter_time": 1.0270615692138674, "loss": 0.137600377202034, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.765753697039106, "step_time": 0.9896663208007811} +{"epoch": 0, "iter": 8242, "iter_tflops": 11.67259027746996, "iter_time": 1.3367071990966797, "loss": 0.21851846575737, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.063178104083523, "step_time": 1.1944134368896486} +{"epoch": 0, "iter": 8243, "iter_tflops": 27.52839322981046, "iter_time": 0.5667906341552733, "loss": 0.2058674395084381, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.389117521785884, "step_time": 0.5309052047729492} +{"epoch": 0, "iter": 8244, "iter_tflops": 26.965363402386767, "iter_time": 0.5786250762939453, "loss": 0.2535213232040405, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 28.83923695156205, "step_time": 0.5410280265808105} +{"epoch": 0, "iter": 8245, "iter_tflops": 39.7838985497383, "iter_time": 0.5185789794921876, "loss": 0.7228865027427673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33712895951401, "step_time": 0.4760604591369628} +{"epoch": 0, "iter": 8246, "iter_tflops": 44.29434971437014, "iter_time": 0.4657725791931152, "loss": 0.7059203386306763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89447828920043, "step_time": 0.42195139884948735} +{"epoch": 0, "iter": 8247, "iter_tflops": 44.39900453490328, "iter_time": 0.46467468643188475, "loss": 0.8041152954101562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92635655197606, "step_time": 0.43047489929199223} +{"epoch": 0, "iter": 8248, "iter_tflops": 45.163549738911804, "iter_time": 0.4568085021972656, "loss": 0.6705219745635986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95349978833416, "step_time": 0.4214426670074463} +{"epoch": 0, "iter": 8249, "iter_tflops": 27.510584860362783, "iter_time": 0.7499329299926758, "loss": 0.14052371680736542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.253548621640974, "step_time": 0.7052509689331055} +{"epoch": 0, "iter": 8250, "iter_tflops": 18.56829443347095, "iter_time": 1.111092544555664, "loss": 0.16056117415428162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.049501295981194, "step_time": 0.9356716613769532} +{"epoch": 0, "iter": 8251, "iter_tflops": 41.338882139491744, "iter_time": 0.4990723609924317, "loss": 0.1685774177312851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.49327563274361, "step_time": 0.45349764823913574} +{"epoch": 0, "iter": 8252, "iter_tflops": 40.86810995919604, "iter_time": 0.5048213272094726, "loss": 0.13432760536670685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.867529297361685, "step_time": 0.4598223667144775} +{"epoch": 0, "iter": 8253, "iter_tflops": 16.29106217397103, "iter_time": 1.2664056701660156, "loss": 0.44636285305023193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.539165749773208, "step_time": 1.1762870483398438} +{"epoch": 0, "iter": 8254, "iter_tflops": 27.096693159527558, "iter_time": 0.7613878707885742, "loss": 0.418730229139328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.74328930838196, "step_time": 0.6300861625671387} +{"epoch": 0, "iter": 8255, "iter_tflops": 39.18359693181454, "iter_time": 0.5265237274169922, "loss": 0.48841753602027893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.895035750320865, "step_time": 0.480966926574707} +{"epoch": 0, "iter": 8256, "iter_tflops": 39.02812182891262, "iter_time": 0.5286212234497071, "loss": 0.34009024500846863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55457393042057, "step_time": 0.4848149471282959} +{"epoch": 0, "iter": 8257, "iter_tflops": 31.605832207372625, "iter_time": 0.6527622299194336, "loss": 0.7866443395614624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.10688308320356, "step_time": 0.5876652011871338} +{"epoch": 0, "iter": 8258, "iter_tflops": 37.84837356875649, "iter_time": 0.5450985488891602, "loss": 0.7693625688552856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14669800329708, "step_time": 0.5014033813476562} +{"epoch": 0, "iter": 8259, "iter_tflops": 38.70929163535994, "iter_time": 0.532975227355957, "loss": 0.83079993724823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05959273782172, "step_time": 0.49052052497863774} +{"epoch": 0, "iter": 8260, "iter_tflops": 37.015213695515186, "iter_time": 0.5573679428100586, "loss": 0.7364721894264221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37132886839693, "step_time": 0.511033302307129} +{"epoch": 0, "iter": 8261, "iter_tflops": 34.634268012564426, "iter_time": 0.5956844100952148, "loss": 0.7870647311210632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.15504836165372, "step_time": 0.5407172679901123} +{"epoch": 0, "iter": 8262, "iter_tflops": 39.11427746690431, "iter_time": 0.5274568481445313, "loss": 0.8141998648643494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.109929094132646, "step_time": 0.47856941413879395} +{"epoch": 0, "iter": 8263, "iter_tflops": 37.23129898659701, "iter_time": 0.5541330566406251, "loss": 0.6972782611846924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.629857583301124, "step_time": 0.5077815856933594} +{"epoch": 0, "iter": 8264, "iter_tflops": 36.892001885823674, "iter_time": 0.5592294387817384, "loss": 0.7643916606903076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2879007741251, "step_time": 0.5120915489196778} +{"epoch": 0, "iter": 8265, "iter_tflops": 14.294403259499331, "iter_time": 1.4432986907958985, "loss": 0.5887951254844666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.994922783987604, "step_time": 1.3758719406127928} +{"epoch": 0, "iter": 8266, "iter_tflops": 27.802968394811014, "iter_time": 0.7420464324951173, "loss": 0.4361887574195862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.265954006174823, "step_time": 0.6598581161499023} +{"epoch": 0, "iter": 8267, "iter_tflops": 46.57717011812718, "iter_time": 0.44294433212280276, "loss": 0.4946357309818268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21877990181478, "step_time": 0.41082426834106445} +{"epoch": 0, "iter": 8268, "iter_tflops": 44.11048049123917, "iter_time": 0.46771409606933595, "loss": 0.29669561982154846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.36741711027026, "step_time": 0.4355545387268066} +{"epoch": 0, "iter": 8269, "iter_tflops": 19.803195967427087, "iter_time": 1.0418062591552735, "loss": 0.3716760277748108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.566868492280815, "step_time": 1.0031227416992188} +{"epoch": 0, "iter": 8270, "iter_tflops": 16.67840884735691, "iter_time": 1.2369941101074218, "loss": 0.40658390522003174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.183904727391297, "step_time": 1.0221557121276854} +{"epoch": 0, "iter": 8271, "iter_tflops": 39.38821469427159, "iter_time": 0.5237884902954102, "loss": 0.3857155442237854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22050226679103, "step_time": 0.4773450660705567} +{"epoch": 0, "iter": 8272, "iter_tflops": 41.04291000045838, "iter_time": 0.502671314239502, "loss": 0.34003984928131104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.960047694783405, "step_time": 0.45887614822387696} +{"epoch": 0, "iter": 8273, "iter_tflops": 17.993286160431015, "iter_time": 1.1465995330810548, "loss": 0.06749414652585983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.066620776165294, "step_time": 1.0820529632568359} +{"epoch": 0, "iter": 8274, "iter_tflops": 14.642926435025167, "iter_time": 1.4089460601806643, "loss": 0.08068152517080307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.631707084318624, "step_time": 0.9999702606201171} +{"epoch": 0, "iter": 8275, "iter_tflops": 38.03487377434004, "iter_time": 0.5424257125854491, "loss": 0.052577197551727295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76649920753098, "step_time": 0.49396271896362304} +{"epoch": 0, "iter": 8276, "iter_tflops": 45.34070316674063, "iter_time": 0.45502367782592773, "loss": 0.06471291929483414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88712411102039, "step_time": 0.4135554790496826} +{"epoch": 0, "iter": 8277, "iter_tflops": 20.997475524460928, "iter_time": 0.9825511398315429, "loss": 0.6455897092819214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.389315311093707, "step_time": 0.9214704971313475} +{"epoch": 0, "iter": 8278, "iter_tflops": 18.638785235535106, "iter_time": 1.1068904571533202, "loss": 0.8336935043334961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.84735853817791, "step_time": 0.9029968814849854} +{"epoch": 0, "iter": 8279, "iter_tflops": 38.48405417789407, "iter_time": 0.536094596862793, "loss": 0.6277825832366943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22972230732716, "step_time": 0.4885443801879883} +{"epoch": 0, "iter": 8280, "iter_tflops": 39.24241405224364, "iter_time": 0.5257345657348633, "loss": 0.6252423524856567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.917971441640425, "step_time": 0.48070989418029775} +{"epoch": 0, "iter": 8281, "iter_tflops": 19.90090027862566, "iter_time": 1.0366914672851562, "loss": 0.07257650047540665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.232198376881087, "step_time": 0.9716889953613282} +{"epoch": 0, "iter": 8282, "iter_tflops": 27.34547496122202, "iter_time": 0.7544609680175781, "loss": 0.05004851892590523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61115323473472, "step_time": 0.6739730892181397} +{"epoch": 0, "iter": 8283, "iter_tflops": 53.725391140692196, "iter_time": 0.38401011276245123, "loss": 0.06081777065992355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.55495635476458, "step_time": 0.35233727073669435} +{"epoch": 0, "iter": 8284, "iter_tflops": 56.02815532533138, "iter_time": 0.3682272491455078, "loss": 0.023290256038308144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.12317237052964, "step_time": 0.3375330944061279} +{"epoch": 0, "iter": 8285, "iter_tflops": 26.15949152155461, "iter_time": 0.513783332824707, "loss": 0.004686826374381781, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 28.334974486997485, "step_time": 0.4743364334106445} +{"epoch": 0, "iter": 8286, "iter_tflops": 10.51256029159945, "iter_time": 1.278500228881836, "loss": 0.0029050076846033335, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 12.313499077065957, "step_time": 1.0915102729797364} +{"epoch": 0, "iter": 8287, "iter_tflops": 28.611551355639104, "iter_time": 0.46975120544433596, "loss": 0.011729570105671883, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 31.599716659263017, "step_time": 0.4253301029205322} +{"epoch": 0, "iter": 8288, "iter_tflops": 31.573115839474276, "iter_time": 0.4256884498596192, "loss": 0.006070327013731003, "lr": 3e-05, "seqlen": 5392.0, "step_tflops": 34.92909535796617, "step_time": 0.38478840065002445} +{"epoch": 0, "iter": 8289, "iter_tflops": 22.91238570088366, "iter_time": 0.9004341049194335, "loss": 0.1808951497077942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.33300010202879, "step_time": 0.8478647689819336} +{"epoch": 0, "iter": 8290, "iter_tflops": 25.20097355192437, "iter_time": 0.8186625595092774, "loss": 0.19239093363285065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.49756420258378, "step_time": 0.7239598922729492} +{"epoch": 0, "iter": 8291, "iter_tflops": 40.02305288116798, "iter_time": 0.5154802551269532, "loss": 0.22559945285320282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99639700678699, "step_time": 0.46892688751220707} +{"epoch": 0, "iter": 8292, "iter_tflops": 40.57911140039765, "iter_time": 0.5084165916442871, "loss": 0.22100311517715454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25681209577014, "step_time": 0.4661676368713379} +{"epoch": 0, "iter": 8293, "iter_tflops": 2.528393432550257, "iter_time": 0.6820148925781251, "loss": 0.15494880080223083, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 2.746712359965325, "step_time": 0.6278058090209961} +{"epoch": 0, "iter": 8294, "iter_tflops": 0.7477792594625905, "iter_time": 2.30603076171875, "loss": 0.3298262655735016, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 0.8381967937897821, "step_time": 2.0572757949829104} +{"epoch": 0, "iter": 8295, "iter_tflops": 1.3030091591723967, "iter_time": 1.3233997344970703, "loss": 0.3378249704837799, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 1.65738345085533, "step_time": 1.0404363422393799} +{"epoch": 0, "iter": 8296, "iter_tflops": 3.28008126692003, "iter_time": 0.5257192840576173, "loss": 0.2511463165283203, "lr": 3e-05, "seqlen": 704.0, "step_tflops": 3.5859691302194783, "step_time": 0.48087474060058594} +{"epoch": 0, "iter": 8297, "iter_tflops": 16.419211286338445, "iter_time": 0.9228774337768555, "loss": 0.4984697699546814, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 17.39789559116354, "step_time": 0.8709627838134766} +{"epoch": 0, "iter": 8298, "iter_tflops": 13.820156765812381, "iter_time": 1.0964361572265626, "loss": 0.3486403226852417, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.61153399560562, "step_time": 0.9706233596801759} +{"epoch": 0, "iter": 8299, "iter_tflops": 23.255182606244293, "iter_time": 0.6515932312011719, "loss": 0.41253751516342163, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.016179363684138, "step_time": 0.6057247734069825} +{"epoch": 0, "iter": 8300, "iter_tflops": 24.444987431134074, "iter_time": 0.6198783950805664, "loss": 0.26148879528045654, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 26.19214006576179, "step_time": 0.578529266357422} +{"epoch": 0, "iter": 8301, "iter_tflops": 22.020755172477667, "iter_time": 0.9368930969238282, "loss": 0.43138936161994934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.716927996804397, "step_time": 0.8698889465332031} +{"epoch": 0, "iter": 8302, "iter_tflops": 26.616244657061554, "iter_time": 0.7751316452026367, "loss": 0.48910287022590637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.88357405405628, "step_time": 0.6273981494903564} +{"epoch": 0, "iter": 8303, "iter_tflops": 43.923420089394924, "iter_time": 0.46970598983764644, "loss": 0.22964507341384888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32221258456093, "step_time": 0.4359706020355225} +{"epoch": 0, "iter": 8304, "iter_tflops": 44.24844456882291, "iter_time": 0.4662557907104492, "loss": 0.33092355728149414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55065942443684, "step_time": 0.4338760757446289} +{"epoch": 0, "iter": 8305, "iter_tflops": 34.783179267447174, "iter_time": 0.5931342086791992, "loss": 0.79216468334198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.229658461424386, "step_time": 0.5541574745178223} +{"epoch": 0, "iter": 8306, "iter_tflops": 14.107414733829135, "iter_time": 1.4624290771484374, "loss": 0.7087556719779968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.17739669206216, "step_time": 1.2753036785125733} +{"epoch": 0, "iter": 8307, "iter_tflops": 37.61991026761136, "iter_time": 0.5484088973999023, "loss": 0.8033403158187866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.800086938941504, "step_time": 0.5056629791259766} +{"epoch": 0, "iter": 8308, "iter_tflops": 39.72313729266914, "iter_time": 0.5193722076416016, "loss": 0.8622241020202637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89661116808245, "step_time": 0.4809492626190185} +{"epoch": 0, "iter": 8309, "iter_tflops": 31.827811856430575, "iter_time": 0.6482096099853515, "loss": 0.8285077810287476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.92172041653919, "step_time": 0.5907811317443847} +{"epoch": 0, "iter": 8310, "iter_tflops": 38.129609977730254, "iter_time": 0.541078010559082, "loss": 0.6652296781539917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58491859738311, "step_time": 0.4961196079254151} +{"epoch": 0, "iter": 8311, "iter_tflops": 37.02818148322186, "iter_time": 0.5571727447509766, "loss": 0.7864303588867188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18591326783258, "step_time": 0.5133911819458008} +{"epoch": 0, "iter": 8312, "iter_tflops": 36.18933815139322, "iter_time": 0.5700876159667968, "loss": 0.7876238822937012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2301637849048, "step_time": 0.5258987350463867} +{"epoch": 0, "iter": 8313, "iter_tflops": 21.73967687877846, "iter_time": 0.949006446838379, "loss": 0.7348508834838867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.76010096327054, "step_time": 0.8683083267211914} +{"epoch": 0, "iter": 8314, "iter_tflops": 45.44675434532356, "iter_time": 0.45396186828613283, "loss": 0.6210115551948547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.2724470689884, "step_time": 0.4187146110534668} +{"epoch": 0, "iter": 8315, "iter_tflops": 44.08171681497599, "iter_time": 0.46801928329467773, "loss": 0.9062670469284058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.569437217432075, "step_time": 0.43370480537414546} +{"epoch": 0, "iter": 8316, "iter_tflops": 44.29845559257044, "iter_time": 0.46572940826416015, "loss": 0.7576696276664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.792926190893986, "step_time": 0.4316767177581787} +{"epoch": 0, "iter": 8317, "iter_tflops": 41.39391252302767, "iter_time": 0.498408878326416, "loss": 0.14189685881137848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88481692453996, "step_time": 0.459645263671875} +{"epoch": 0, "iter": 8318, "iter_tflops": 37.49272114827248, "iter_time": 0.5502693023681641, "loss": 0.05848598852753639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.142904007458284, "step_time": 0.4895508270263672} +{"epoch": 0, "iter": 8319, "iter_tflops": 43.689028701112306, "iter_time": 0.472225959777832, "loss": 0.046235550194978714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.868809922820155, "step_time": 0.4309924049377441} +{"epoch": 0, "iter": 8320, "iter_tflops": 37.31006014682186, "iter_time": 0.5529632873535156, "loss": 0.11415757983922958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24609980780652, "step_time": 0.5001950149536133} +{"epoch": 0, "iter": 8321, "iter_tflops": 23.875147796605876, "iter_time": 0.864124221801758, "loss": 0.7974346280097961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.514250524439415, "step_time": 0.8086106033325196} +{"epoch": 0, "iter": 8322, "iter_tflops": 16.691470039406568, "iter_time": 1.236026153564453, "loss": 0.6229366064071655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.620422526192627, "step_time": 1.0515111732482911} +{"epoch": 0, "iter": 8323, "iter_tflops": 36.20625353705362, "iter_time": 0.569821273803711, "loss": 0.7264809012413025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29097049368381, "step_time": 0.5250848541259765} +{"epoch": 0, "iter": 8324, "iter_tflops": 36.90015976615399, "iter_time": 0.5591058044433593, "loss": 0.6500481963157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.984559362656846, "step_time": 0.5159765129089355} +{"epoch": 0, "iter": 8325, "iter_tflops": 22.537468226235696, "iter_time": 0.9154130935668945, "loss": 0.034739598631858826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.1829921630249, "step_time": 0.8531241035461427} +{"epoch": 0, "iter": 8326, "iter_tflops": 21.49614576661668, "iter_time": 0.959757797241211, "loss": 0.04306185990571976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.611998573778948, "step_time": 0.7752553215026856} +{"epoch": 0, "iter": 8327, "iter_tflops": 50.023250262920584, "iter_time": 0.4124300880432129, "loss": 0.029117310419678688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.9492397284398, "step_time": 0.3754573059082031} +{"epoch": 0, "iter": 8328, "iter_tflops": 54.07331350251473, "iter_time": 0.3815392875671387, "loss": 0.039740435779094696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.4077603360396, "step_time": 0.3472794361114502} +{"epoch": 0, "iter": 8329, "iter_tflops": 31.841674030543224, "iter_time": 0.6479274139404296, "loss": 0.5088343620300293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.94933539270834, "step_time": 0.6077024269104003} +{"epoch": 0, "iter": 8330, "iter_tflops": 14.388866874702689, "iter_time": 1.4338233642578126, "loss": 0.5200111865997314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.57627920505208, "step_time": 1.1106149559020997} +{"epoch": 0, "iter": 8331, "iter_tflops": 40.965638701991374, "iter_time": 0.5036194763183593, "loss": 0.5523476004600525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.04194699183083, "step_time": 0.4580417785644531} +{"epoch": 0, "iter": 8332, "iter_tflops": 43.942168488983086, "iter_time": 0.4695055847167969, "loss": 0.4277842044830322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84948032411936, "step_time": 0.43116651153564456} +{"epoch": 0, "iter": 8333, "iter_tflops": 23.360793053087498, "iter_time": 0.8831503906249999, "loss": 0.08633258938789368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.092704293445628, "step_time": 0.8221949005126953} +{"epoch": 0, "iter": 8334, "iter_tflops": 21.723876844363353, "iter_time": 0.9496966705322265, "loss": 0.12087985128164291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.21333288273888, "step_time": 0.7581244678497314} +{"epoch": 0, "iter": 8335, "iter_tflops": 39.5883586452885, "iter_time": 0.5211404113769531, "loss": 0.2072969228029251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.658689574578034, "step_time": 0.47255411720275875} +{"epoch": 0, "iter": 8336, "iter_tflops": 46.285617269464865, "iter_time": 0.4457344360351562, "loss": 0.12023786455392838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.87775225500607, "step_time": 0.4055032424926758} +{"epoch": 0, "iter": 8337, "iter_tflops": 20.345585059933835, "iter_time": 1.014032943725586, "loss": 0.8505978584289551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.15655532159699, "step_time": 0.9311507682800293} +{"epoch": 0, "iter": 8338, "iter_tflops": 26.347306123745465, "iter_time": 0.7830437545776368, "loss": 0.6778628826141357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.37890259159725, "step_time": 0.65748295211792} +{"epoch": 0, "iter": 8339, "iter_tflops": 45.546248255530244, "iter_time": 0.45297020721435544, "loss": 0.9007084369659424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23482665178752, "step_time": 0.41903455162048336} +{"epoch": 0, "iter": 8340, "iter_tflops": 45.17080877765636, "iter_time": 0.45673509216308594, "loss": 0.8600112795829773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3450759217024, "step_time": 0.42674653244018557} +{"epoch": 0, "iter": 8341, "iter_tflops": 26.213120936293194, "iter_time": 0.7870521621704103, "loss": 0.8641217947006226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.573437408810168, "step_time": 0.7482234878540038} +{"epoch": 0, "iter": 8342, "iter_tflops": 16.328346881372127, "iter_time": 1.2635139160156248, "loss": 0.6687679886817932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.577294386988875, "step_time": 0.875040756225586} +{"epoch": 0, "iter": 8343, "iter_tflops": 35.161410314761525, "iter_time": 0.5867538681030273, "loss": 0.5525569915771484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17035398401351, "step_time": 0.5405004501342774} +{"epoch": 0, "iter": 8344, "iter_tflops": 42.053302256174156, "iter_time": 0.49059389877319337, "loss": 0.8160765171051025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90632552614834, "step_time": 0.4494172267913818} +{"epoch": 0, "iter": 8345, "iter_tflops": 30.720005924632517, "iter_time": 0.671584945678711, "loss": 0.004379569552838802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.10676776250426, "step_time": 0.6231684608459473} +{"epoch": 0, "iter": 8346, "iter_tflops": 41.96402437854381, "iter_time": 0.49163763046264647, "loss": 0.0071064988151192665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.598711570422005, "step_time": 0.43343806648254396} +{"epoch": 0, "iter": 8347, "iter_tflops": 53.66881669499207, "iter_time": 0.38441491317749016, "loss": 0.008326850831508636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.11775952077364, "step_time": 0.3489830074310303} +{"epoch": 0, "iter": 8348, "iter_tflops": 56.90124251662573, "iter_time": 0.36257720565795903, "loss": 0.01661914773285389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.44643605108462, "step_time": 0.33038064002990725} +{"epoch": 0, "iter": 8349, "iter_tflops": 23.000672783261546, "iter_time": 0.6552494430541993, "loss": 0.2433159202337265, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.43346590442795, "step_time": 0.6168252220153808} +{"epoch": 0, "iter": 8350, "iter_tflops": 8.327505204572548, "iter_time": 1.809807098388672, "loss": 0.12914924323558807, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 10.516047051685153, "step_time": 1.4331600036621095} +{"epoch": 0, "iter": 8351, "iter_tflops": 8.282303522628993, "iter_time": 1.819684341430664, "loss": 0.16278310120105743, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 10.59152372834385, "step_time": 1.4229471054077147} +{"epoch": 0, "iter": 8352, "iter_tflops": 24.34974026237372, "iter_time": 0.6189461517333985, "loss": 0.3032846748828888, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 26.11738286027201, "step_time": 0.5770554466247558} +{"epoch": 0, "iter": 8353, "iter_tflops": 18.504264990921, "iter_time": 0.8609045333862305, "loss": 0.32698941230773926, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 20.096952296577197, "step_time": 0.7926776847839354} +{"epoch": 0, "iter": 8354, "iter_tflops": 24.489648025834526, "iter_time": 0.650495491027832, "loss": 0.25430476665496826, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.386236974578214, "step_time": 0.603739200592041} +{"epoch": 0, "iter": 8355, "iter_tflops": 25.152680774737366, "iter_time": 0.6333482208251953, "loss": 0.29939889907836914, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 27.064234778725684, "step_time": 0.5886146697998046} +{"epoch": 0, "iter": 8356, "iter_tflops": 22.707656292634223, "iter_time": 0.7015433654785156, "loss": 0.21610167622566223, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 24.539214440076403, "step_time": 0.6491815643310547} +{"epoch": 0, "iter": 8357, "iter_tflops": 33.75820680838599, "iter_time": 0.611143051147461, "loss": 0.10800871253013611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.63557176136746, "step_time": 0.5481806850433351} +{"epoch": 0, "iter": 8358, "iter_tflops": 40.956977667558476, "iter_time": 0.5037259750366211, "loss": 0.09446479380130768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57150317493266, "step_time": 0.4527191791534424} +{"epoch": 0, "iter": 8359, "iter_tflops": 44.494047198233176, "iter_time": 0.4636821060180665, "loss": 0.13236668705940247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.765366621345756, "step_time": 0.42306856155395517} +{"epoch": 0, "iter": 8360, "iter_tflops": 40.49881925405156, "iter_time": 0.5094245681762696, "loss": 0.06600319594144821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57950255041068, "step_time": 0.4627932643890381} +{"epoch": 0, "iter": 8361, "iter_tflops": 18.516288465103155, "iter_time": 1.1142132263183593, "loss": 0.787132740020752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.797324546855258, "step_time": 1.042115234375} +{"epoch": 0, "iter": 8362, "iter_tflops": 20.37708482729464, "iter_time": 1.0124654083251952, "loss": 0.755021333694458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.730399940094447, "step_time": 0.8342401885986328} +{"epoch": 0, "iter": 8363, "iter_tflops": 45.62259724296074, "iter_time": 0.45221216583251955, "loss": 0.8216115236282349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87561218916593, "step_time": 0.422114273071289} +{"epoch": 0, "iter": 8364, "iter_tflops": 46.66583404612786, "iter_time": 0.4421027488708496, "loss": 0.953857958316803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.2056910995977, "step_time": 0.4109313716888428} +{"epoch": 0, "iter": 8365, "iter_tflops": 43.73360128341523, "iter_time": 0.47174467468261716, "loss": 0.3305385410785675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53238228935039, "step_time": 0.4340429096221924} +{"epoch": 0, "iter": 8366, "iter_tflops": 43.09400922067792, "iter_time": 0.47874620819091795, "loss": 0.30553358793258667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26249114293809, "step_time": 0.43652149963378906} +{"epoch": 0, "iter": 8367, "iter_tflops": 46.30599566940363, "iter_time": 0.4455382766723633, "loss": 0.4089025557041168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0258624647765, "step_time": 0.41240855216979977} +{"epoch": 0, "iter": 8368, "iter_tflops": 50.50972771776812, "iter_time": 0.40845782470703124, "loss": 0.23554477095603943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.462369928038825, "step_time": 0.3788137302398682} +{"epoch": 0, "iter": 8369, "iter_tflops": 35.40333624867755, "iter_time": 0.5827443313598634, "loss": 0.8672642111778259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.030563718640884, "step_time": 0.5424871864318848} +{"epoch": 0, "iter": 8370, "iter_tflops": 16.58686152649053, "iter_time": 1.2438214111328125, "loss": 0.8819074034690857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.96937991116745, "step_time": 1.0875997848510741} +{"epoch": 0, "iter": 8371, "iter_tflops": 37.833758407343666, "iter_time": 0.5453091201782226, "loss": 0.8722821474075317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42613238040022, "step_time": 0.49802123260498043} +{"epoch": 0, "iter": 8372, "iter_tflops": 38.75074816957662, "iter_time": 0.5324050369262695, "loss": 0.648857831954956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81832395639918, "step_time": 0.4933505592346192} +{"epoch": 0, "iter": 8373, "iter_tflops": 17.790772376857156, "iter_time": 1.1596513671875, "loss": 0.08690916001796722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.18310537892643, "step_time": 1.075482467651367} +{"epoch": 0, "iter": 8374, "iter_tflops": 38.247131167622264, "iter_time": 0.5394154510498047, "loss": 0.1359444111585617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03325035015116, "step_time": 0.4207572078704835} +{"epoch": 0, "iter": 8375, "iter_tflops": 47.44456053517755, "iter_time": 0.43484634017944335, "loss": 0.15546876192092896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71465201746919, "step_time": 0.3989409713745117} +{"epoch": 0, "iter": 8376, "iter_tflops": 53.55881635587483, "iter_time": 0.38520443344116206, "loss": 0.10374913364648819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.42189382708767, "step_time": 0.35313975906372064} +{"epoch": 0, "iter": 8377, "iter_tflops": 24.145467947802768, "iter_time": 0.7584993057250977, "loss": 0.2262110710144043, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 25.36981021870001, "step_time": 0.721894271850586} +{"epoch": 0, "iter": 8378, "iter_tflops": 11.540531458546932, "iter_time": 1.5869564361572266, "loss": 0.10888934880495071, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 13.8886884197608, "step_time": 1.3186501216888427} +{"epoch": 0, "iter": 8379, "iter_tflops": 33.55765506676126, "iter_time": 0.5457568664550781, "loss": 0.12703852355480194, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 36.61252267200042, "step_time": 0.5002201251983642} +{"epoch": 0, "iter": 8380, "iter_tflops": 36.32255196464072, "iter_time": 0.5042134895324708, "loss": 0.15809935331344604, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 39.70420851845868, "step_time": 0.4612690029144287} +{"epoch": 0, "iter": 8381, "iter_tflops": 30.0660492387787, "iter_time": 0.686192367553711, "loss": 0.12177801877260208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.339748313068384, "step_time": 0.6188137149810791} +{"epoch": 0, "iter": 8382, "iter_tflops": 36.88687793397802, "iter_time": 0.5593071212768554, "loss": 0.21056786179542542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.950799784820155, "step_time": 0.4803424758911133} +{"epoch": 0, "iter": 8383, "iter_tflops": 42.13839966786162, "iter_time": 0.489603157043457, "loss": 0.2055041790008545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17775617829626, "step_time": 0.4467755737304688} +{"epoch": 0, "iter": 8384, "iter_tflops": 45.58300727724554, "iter_time": 0.452604923248291, "loss": 0.17363739013671875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73012738476245, "step_time": 0.4148610630035401} +{"epoch": 0, "iter": 8385, "iter_tflops": 30.005513839420775, "iter_time": 0.6875767440795898, "loss": 0.20325496792793274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.28839516264657, "step_time": 0.6197683429718017} +{"epoch": 0, "iter": 8386, "iter_tflops": 34.697149239099154, "iter_time": 0.5946048583984376, "loss": 0.20908322930335999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.968361198470724, "step_time": 0.5294319000244141} +{"epoch": 0, "iter": 8387, "iter_tflops": 41.82756471604931, "iter_time": 0.49324156570434574, "loss": 0.24239984154701233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74904603651145, "step_time": 0.45096226692199703} +{"epoch": 0, "iter": 8388, "iter_tflops": 40.081468044680676, "iter_time": 0.514728988647461, "loss": 0.26149752736091614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.742526645106274, "step_time": 0.4716484184265137} +{"epoch": 0, "iter": 8389, "iter_tflops": 25.569379248039187, "iter_time": 0.8068672027587891, "loss": 0.7401748895645142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.519064392893533, "step_time": 0.7497018508911132} +{"epoch": 0, "iter": 8390, "iter_tflops": 8.019490490461392, "iter_time": 2.5726189880371093, "loss": 0.8155707120895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.67703044995467, "step_time": 2.1319653396606446} +{"epoch": 0, "iter": 8391, "iter_tflops": 12.356926988480938, "iter_time": 1.669597427368164, "loss": 0.656731128692627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.750351349968886, "step_time": 1.3986848869323731} +{"epoch": 0, "iter": 8392, "iter_tflops": 41.452622655950286, "iter_time": 0.4977029724121094, "loss": 0.9375693798065186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37412101974208, "step_time": 0.45468855476379394} +{"epoch": 0, "iter": 8393, "iter_tflops": 13.178863761284886, "iter_time": 1.137386016845703, "loss": 0.14641641080379486, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.319815942798861, "step_time": 1.0467631301879883} +{"epoch": 0, "iter": 8394, "iter_tflops": 12.465813628646343, "iter_time": 1.2024450073242186, "loss": 0.2539963722229004, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.96175855054086, "step_time": 1.001851173400879} +{"epoch": 0, "iter": 8395, "iter_tflops": 26.86147350777217, "iter_time": 0.5580280380249023, "loss": 0.24045249819755554, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.670318230599193, "step_time": 0.5228213806152344} +{"epoch": 0, "iter": 8396, "iter_tflops": 23.035128437902156, "iter_time": 0.6507215881347657, "loss": 0.2823914587497711, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.724367342325227, "step_time": 0.6062624435424805} +{"epoch": 0, "iter": 8397, "iter_tflops": 29.398906448358623, "iter_time": 0.7017639770507813, "loss": 0.1722492128610611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.302312407841814, "step_time": 0.6590916748046874} +{"epoch": 0, "iter": 8398, "iter_tflops": 15.051972337576709, "iter_time": 1.3706571502685547, "loss": 0.15535949170589447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.02328587248053, "step_time": 1.0845178718566895} +{"epoch": 0, "iter": 8399, "iter_tflops": 35.756180439026224, "iter_time": 0.5769937744140624, "loss": 0.15712769329547882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.05276024549077, "step_time": 0.5282877159118652} +{"epoch": 0, "iter": 8400, "iter_tflops": 42.40015161808347, "iter_time": 0.48658065414428714, "loss": 0.11919985711574554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.41185641717299, "step_time": 0.4445220489501953} +{"epoch": 0, "iter": 8401, "iter_tflops": 19.870939126610573, "iter_time": 1.0382545776367187, "loss": 0.22477765381336212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.23857956171513, "step_time": 0.9713970489501953} +{"epoch": 0, "iter": 8402, "iter_tflops": 30.372684309273318, "iter_time": 0.6792647399902343, "loss": 0.2778550684452057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.68741657713486, "step_time": 0.5474265785217285} +{"epoch": 0, "iter": 8403, "iter_tflops": 48.17449274780107, "iter_time": 0.4282576179504395, "loss": 0.3256280720233917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38708987336281, "step_time": 0.39382018661499024} +{"epoch": 0, "iter": 8404, "iter_tflops": 54.77354057207313, "iter_time": 0.3766616744995117, "loss": 0.3560645282268524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.37750095338983, "step_time": 0.347456413269043} +{"epoch": 0, "iter": 8405, "iter_tflops": 51.22901284457248, "iter_time": 0.40272283935546876, "loss": 0.09100357443094254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.60533840039155, "step_time": 0.36447257614135736} +{"epoch": 0, "iter": 8406, "iter_tflops": 50.744880215456334, "iter_time": 0.4065650253295899, "loss": 0.0822795182466507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.30662008669582, "step_time": 0.3730311756134033} +{"epoch": 0, "iter": 8407, "iter_tflops": 47.60291556665341, "iter_time": 0.433399787902832, "loss": 0.11671414226293564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4128861491298, "step_time": 0.4012825393676758} +{"epoch": 0, "iter": 8408, "iter_tflops": 51.23613640437285, "iter_time": 0.4026668472290039, "loss": 0.053893014788627625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.829295564806934, "step_time": 0.3695388469696045} +{"epoch": 0, "iter": 8409, "iter_tflops": 30.382623980898845, "iter_time": 0.6790425186157227, "loss": 0.1073402687907219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.265533775728166, "step_time": 0.639415843963623} +{"epoch": 0, "iter": 8410, "iter_tflops": 12.918266626670777, "iter_time": 1.5970481262207032, "loss": 0.07431814819574356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.779358821654444, "step_time": 1.3959396858215332} +{"epoch": 0, "iter": 8411, "iter_tflops": 39.67578173886685, "iter_time": 0.5199921112060547, "loss": 0.07925564795732498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.705181702625765, "step_time": 0.4720514297485351} +{"epoch": 0, "iter": 8412, "iter_tflops": 37.73986700159385, "iter_time": 0.546665771484375, "loss": 0.06383310258388519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.19041396840944, "step_time": 0.5008712348937987} +{"epoch": 0, "iter": 8413, "iter_tflops": 19.76635874690741, "iter_time": 1.0437478027343752, "loss": 0.003268803935497999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.280642955647373, "step_time": 0.9694769821166992} +{"epoch": 0, "iter": 8414, "iter_tflops": 19.366705282940025, "iter_time": 1.0652866973876953, "loss": 0.0053864880464971066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.851962823275176, "step_time": 0.8649641819000243} +{"epoch": 0, "iter": 8415, "iter_tflops": 56.08561590233387, "iter_time": 0.3678499946594238, "loss": 0.02904401160776615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.497370563172694, "step_time": 0.3354792785644531} +{"epoch": 0, "iter": 8416, "iter_tflops": 53.71309591170983, "iter_time": 0.38409801483154293, "loss": 0.00523387873545289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.10734800153468, "step_time": 0.3490444793701172} +{"epoch": 0, "iter": 8417, "iter_tflops": 30.781695215600894, "iter_time": 0.670239028930664, "loss": 0.48198577761650085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.61861845630199, "step_time": 0.6324944000244141} +{"epoch": 0, "iter": 8418, "iter_tflops": 10.250699459485483, "iter_time": 2.0126522674560543, "loss": 0.44672852754592896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.169069201703955, "step_time": 1.5666326293945314} +{"epoch": 0, "iter": 8419, "iter_tflops": 12.020051436585204, "iter_time": 1.7163897857666015, "loss": 0.5957521200180054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.554454031772321, "step_time": 1.5220895996093748} +{"epoch": 0, "iter": 8420, "iter_tflops": 34.17776428028318, "iter_time": 0.6036408157348633, "loss": 0.5303340554237366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.734110141203104, "step_time": 0.4066513328552246} +{"epoch": 0, "iter": 8421, "iter_tflops": 13.98222641421014, "iter_time": 1.0954234924316406, "loss": 0.25164446234703064, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 14.599842306993514, "step_time": 1.0490838851928712} +{"epoch": 0, "iter": 8422, "iter_tflops": 9.385391776313202, "iter_time": 1.6319467163085934, "loss": 0.2942584156990051, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 11.621489102627786, "step_time": 1.3179429206848143} +{"epoch": 0, "iter": 8423, "iter_tflops": 27.33491831001326, "iter_time": 0.5603257751464843, "loss": 0.34973907470703125, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.13606882784741, "step_time": 0.5256872291564941} +{"epoch": 0, "iter": 8424, "iter_tflops": 27.386655855209938, "iter_time": 0.5592672348022462, "loss": 0.16023221611976624, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.18705317701187, "step_time": 0.5247689514160156} +{"epoch": 0, "iter": 8425, "iter_tflops": 25.256333364350187, "iter_time": 0.8168681182861327, "loss": 0.05134769529104233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.69570557803646, "step_time": 0.7728244323730469} +{"epoch": 0, "iter": 8426, "iter_tflops": 17.69407427354315, "iter_time": 1.1659888610839844, "loss": 0.12154184281826019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.234966411563768, "step_time": 0.7863967952728271} +{"epoch": 0, "iter": 8427, "iter_tflops": 44.606952238137055, "iter_time": 0.4625084762573242, "loss": 0.06371726095676422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89256589602647, "step_time": 0.42196790313720706} +{"epoch": 0, "iter": 8428, "iter_tflops": 45.02307802758471, "iter_time": 0.45823374176025383, "loss": 0.06086939945816994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33877200971541, "step_time": 0.4181517429351807} +{"epoch": 0, "iter": 8429, "iter_tflops": 22.48405985811458, "iter_time": 0.9175875549316406, "loss": 0.1586361676454544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.804719238947182, "step_time": 0.8666808166503907} +{"epoch": 0, "iter": 8430, "iter_tflops": 25.519346630588817, "iter_time": 0.8084491271972656, "loss": 0.17440956830978394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.569973924578147, "step_time": 0.7221250381469727} +{"epoch": 0, "iter": 8431, "iter_tflops": 48.65083207806941, "iter_time": 0.4240645561218262, "loss": 0.16046081483364105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81883969798997, "step_time": 0.3906010360717774} +{"epoch": 0, "iter": 8432, "iter_tflops": 51.79527148532404, "iter_time": 0.3983200187683105, "loss": 0.22094999253749847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.29418737057908, "step_time": 0.36648710060119627} +{"epoch": 0, "iter": 8433, "iter_tflops": 23.837776809958104, "iter_time": 0.8654789276123048, "loss": 0.42873749136924744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.968770218695727, "step_time": 0.8262759170532227} +{"epoch": 0, "iter": 8434, "iter_tflops": 14.4325751290556, "iter_time": 1.4294811096191404, "loss": 0.47435399889945984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.40526100989796, "step_time": 1.2575900802612305} +{"epoch": 0, "iter": 8435, "iter_tflops": 37.1412360762751, "iter_time": 0.5554767608642578, "loss": 0.4715631604194641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.89693138773813, "step_time": 0.5044655628204345} +{"epoch": 0, "iter": 8436, "iter_tflops": 41.52799922940589, "iter_time": 0.496799602508545, "loss": 0.30085083842277527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43389806272584, "step_time": 0.4540903244018555} +{"epoch": 0, "iter": 8437, "iter_tflops": 19.812049548212105, "iter_time": 1.0413406982421876, "loss": 0.6655943393707275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.34927563696686, "step_time": 0.9663603515625} +{"epoch": 0, "iter": 8438, "iter_tflops": 13.718903602493395, "iter_time": 1.5038441925048829, "loss": 0.5794095993041992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.26965481390246, "step_time": 1.2680719871520993} +{"epoch": 0, "iter": 8439, "iter_tflops": 40.355124078629665, "iter_time": 0.511238510131836, "loss": 0.7247363328933716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.18319395685181, "step_time": 0.4669443664550782} +{"epoch": 0, "iter": 8440, "iter_tflops": 37.87130503961437, "iter_time": 0.5447684860229492, "loss": 0.5588602423667908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4372769982213, "step_time": 0.4978872890472412} +{"epoch": 0, "iter": 8441, "iter_tflops": 21.673687467830273, "iter_time": 0.951895866394043, "loss": 0.28716567158699036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.197537950275475, "step_time": 0.8893656539916992} +{"epoch": 0, "iter": 8442, "iter_tflops": 21.951088786510027, "iter_time": 0.9398665237426758, "loss": 0.37988725304603577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.249668383797346, "step_time": 0.7859563484191895} +{"epoch": 0, "iter": 8443, "iter_tflops": 52.2112959549924, "iter_time": 0.39514616775512695, "loss": 0.2518029808998108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.71686596505921, "step_time": 0.363755880355835} +{"epoch": 0, "iter": 8444, "iter_tflops": 49.27122789661392, "iter_time": 0.41872497177124024, "loss": 0.22494204342365265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.661230684846196, "step_time": 0.38446925735473636} +{"epoch": 0, "iter": 8445, "iter_tflops": 21.83311693571709, "iter_time": 0.9449449462890626, "loss": 0.10401687026023865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.844094609632076, "step_time": 0.9031259002685548} +{"epoch": 0, "iter": 8446, "iter_tflops": 17.415696158565012, "iter_time": 1.1846264038085936, "loss": 0.029013710096478462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.03594485106245, "step_time": 0.9807543067932128} +{"epoch": 0, "iter": 8447, "iter_tflops": 45.355118645970784, "iter_time": 0.45487905502319337, "loss": 0.0695170983672142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88099245916322, "step_time": 0.413606315612793} +{"epoch": 0, "iter": 8448, "iter_tflops": 45.96949298304452, "iter_time": 0.44879967498779294, "loss": 0.11328994482755661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.700349607736015, "step_time": 0.4069221153259277} +{"epoch": 0, "iter": 8449, "iter_tflops": 29.392661176260006, "iter_time": 0.7019130859375, "loss": 0.18996906280517578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.744592742907543, "step_time": 0.649908905029297} +{"epoch": 0, "iter": 8450, "iter_tflops": 11.286805509274004, "iter_time": 1.827894836425781, "loss": 0.13465645909309387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.58512669329732, "step_time": 1.5186530075073243} +{"epoch": 0, "iter": 8451, "iter_tflops": 10.019909505523716, "iter_time": 2.059009963989258, "loss": 0.17268610000610352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.914535620935014, "step_time": 1.731590232849121} +{"epoch": 0, "iter": 8452, "iter_tflops": 39.59497607546025, "iter_time": 0.5210533142089844, "loss": 0.1491752713918686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48760459035886, "step_time": 0.40070020103454596} +{"epoch": 0, "iter": 8453, "iter_tflops": 25.444809592633284, "iter_time": 0.7035685729980468, "loss": 0.18278822302818298, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 27.099586255490728, "step_time": 0.6606067047119142} +{"epoch": 0, "iter": 8454, "iter_tflops": 20.228077871597243, "iter_time": 0.8850157928466797, "loss": 0.2752622365951538, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 24.493496289070492, "step_time": 0.7308947715759277} +{"epoch": 0, "iter": 8455, "iter_tflops": 32.37977128850656, "iter_time": 0.5528812484741211, "loss": 0.15302641689777374, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 34.57306562613974, "step_time": 0.5178067970275878} +{"epoch": 0, "iter": 8456, "iter_tflops": 32.61978086938296, "iter_time": 0.5488132629394531, "loss": 0.24796321988105774, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 34.74527854190578, "step_time": 0.5152403182983398} +{"epoch": 0, "iter": 8457, "iter_tflops": 28.92307793613215, "iter_time": 0.7133090591430665, "loss": 0.47751176357269287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.71507464431564, "step_time": 0.6716927680969238} +{"epoch": 0, "iter": 8458, "iter_tflops": 13.701977153058762, "iter_time": 1.5057019348144531, "loss": 0.4732946455478668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.021005959095973, "step_time": 1.2120960159301757} +{"epoch": 0, "iter": 8459, "iter_tflops": 14.99297061981337, "iter_time": 1.3760510864257813, "loss": 0.5271874070167542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.404026079996157, "step_time": 1.185420742034912} +{"epoch": 0, "iter": 8460, "iter_tflops": 18.31378270182413, "iter_time": 1.126533706665039, "loss": 0.5757444500923157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.66933013418763, "step_time": 0.9520872764587403} +{"epoch": 0, "iter": 8461, "iter_tflops": 25.282774899744247, "iter_time": 0.5993376770019532, "loss": 0.27508583664894104, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 27.195709944113005, "step_time": 0.5571805114746095} +{"epoch": 0, "iter": 8462, "iter_tflops": 22.443949830166254, "iter_time": 0.6751449584960938, "loss": 0.263670414686203, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 24.178321280696096, "step_time": 0.6267151222229004} +{"epoch": 0, "iter": 8463, "iter_tflops": 23.67225639742254, "iter_time": 0.6401130218505859, "loss": 0.16236919164657593, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.498896259035046, "step_time": 0.5942578620910645} +{"epoch": 0, "iter": 8464, "iter_tflops": 23.91645447841871, "iter_time": 0.6335771713256837, "loss": 0.2532426416873932, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 25.500759284010034, "step_time": 0.5942144470214844} +{"epoch": 0, "iter": 8465, "iter_tflops": 35.53697140163935, "iter_time": 0.5805529479980469, "loss": 0.35308006405830383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86263570714094, "step_time": 0.5175546760559082} +{"epoch": 0, "iter": 8466, "iter_tflops": 36.25530874559992, "iter_time": 0.569050277709961, "loss": 0.2503223121166229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0626225982236, "step_time": 0.5149711170196533} +{"epoch": 0, "iter": 8467, "iter_tflops": 41.38297661426062, "iter_time": 0.49854058837890625, "loss": 0.25160422921180725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.34431302054869, "step_time": 0.45498745346069336} +{"epoch": 0, "iter": 8468, "iter_tflops": 42.36219826418853, "iter_time": 0.4870165939331054, "loss": 0.2777424454689026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00863508028174, "step_time": 0.4484178562164307} +{"epoch": 0, "iter": 8469, "iter_tflops": 32.117183601377256, "iter_time": 0.6423693237304687, "loss": 0.61136394739151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.7135209639435, "step_time": 0.5943244285583495} +{"epoch": 0, "iter": 8470, "iter_tflops": 12.521210902297167, "iter_time": 1.6476915588378906, "loss": 0.6846088767051697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.105916895494351, "step_time": 1.3657624130249024} +{"epoch": 0, "iter": 8471, "iter_tflops": 40.52992902749984, "iter_time": 0.5090335464477539, "loss": 0.8211337327957153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.616400823098274, "step_time": 0.4730122871398926} +{"epoch": 0, "iter": 8472, "iter_tflops": 47.83391187586978, "iter_time": 0.4313068428039551, "loss": 0.6993371844291687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.60570390978474, "step_time": 0.3997832012176514} +{"epoch": 0, "iter": 8473, "iter_tflops": 21.414669996778176, "iter_time": 0.8340534362792968, "loss": 0.20331460237503052, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 22.48881231271767, "step_time": 0.7942162017822265} +{"epoch": 0, "iter": 8474, "iter_tflops": 7.9994837174652655, "iter_time": 2.2327664794921875, "loss": 0.32139286398887634, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 9.819323106447335, "step_time": 1.8189623565673827} +{"epoch": 0, "iter": 8475, "iter_tflops": 10.013818084341764, "iter_time": 1.7836332702636717, "loss": 0.2002679854631424, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 11.64511402645225, "step_time": 1.5337745132446288} +{"epoch": 0, "iter": 8476, "iter_tflops": 24.342996270343125, "iter_time": 0.7337214736938478, "loss": 0.31256750226020813, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 26.387564138140895, "step_time": 0.6768710823059082} +{"epoch": 0, "iter": 8477, "iter_tflops": 13.177762362845145, "iter_time": 1.1003018035888672, "loss": 0.40627697110176086, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 14.079729284063966, "step_time": 1.0298149490356445} +{"epoch": 0, "iter": 8478, "iter_tflops": 10.249703131408744, "iter_time": 1.4146278686523437, "loss": 0.22836646437644958, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 11.906831855464555, "step_time": 1.2177475814819336} +{"epoch": 0, "iter": 8479, "iter_tflops": 22.508944850135403, "iter_time": 0.6441668319702148, "loss": 0.1997155398130417, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.21366321249272, "step_time": 0.5988154525756835} +{"epoch": 0, "iter": 8480, "iter_tflops": 21.373553902745474, "iter_time": 0.6783858108520509, "loss": 0.18753840029239655, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.80509915950345, "step_time": 0.6358014755249024} +{"epoch": 0, "iter": 8481, "iter_tflops": 21.74941084039461, "iter_time": 0.9485817184448243, "loss": 0.02359844371676445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.322205814509093, "step_time": 0.884611587524414} +{"epoch": 0, "iter": 8482, "iter_tflops": 24.735939569269703, "iter_time": 0.8340533599853517, "loss": 0.04055570811033249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.63103411788326, "step_time": 0.6134540328979492} +{"epoch": 0, "iter": 8483, "iter_tflops": 51.236544134984825, "iter_time": 0.40266364288330075, "loss": 0.02329326421022415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.59727407309685, "step_time": 0.37108102607727056} +{"epoch": 0, "iter": 8484, "iter_tflops": 51.24996986230574, "iter_time": 0.4025581588745117, "loss": 0.056265704333782196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.16176564724912, "step_time": 0.3673512268066406} +{"epoch": 0, "iter": 8485, "iter_tflops": 37.80620091230622, "iter_time": 0.5457066040039062, "loss": 0.17537197470664978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96079243942068, "step_time": 0.5036790618896484} +{"epoch": 0, "iter": 8486, "iter_tflops": 37.75853902132895, "iter_time": 0.5463954391479492, "loss": 0.1529337614774704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74208581802115, "step_time": 0.4942516193389892} +{"epoch": 0, "iter": 8487, "iter_tflops": 40.59875579865063, "iter_time": 0.5081705856323242, "loss": 0.16283577680587769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.56194366805382, "step_time": 0.4629756202697754} +{"epoch": 0, "iter": 8488, "iter_tflops": 39.96367433251161, "iter_time": 0.5162461624145509, "loss": 0.12829598784446716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.51791012868875, "step_time": 0.47408281898498533} +{"epoch": 0, "iter": 8489, "iter_tflops": 16.06596579139462, "iter_time": 1.0502855682373047, "loss": 0.10981050878763199, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 17.328132879240123, "step_time": 0.9737836227416992} +{"epoch": 0, "iter": 8490, "iter_tflops": 17.50660508584547, "iter_time": 0.9638563232421875, "loss": 0.051250576972961426, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 23.93925376369596, "step_time": 0.7048612365722656} +{"epoch": 0, "iter": 8491, "iter_tflops": 34.03857343704589, "iter_time": 0.49572735595703127, "loss": 0.09236188232898712, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 37.46853267833252, "step_time": 0.45034728622436526} +{"epoch": 0, "iter": 8492, "iter_tflops": 36.487993475778815, "iter_time": 0.46244943618774415, "loss": 0.09036773443222046, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 40.09315281672579, "step_time": 0.4208661785125733} +{"epoch": 0, "iter": 8493, "iter_tflops": 23.565900321617274, "iter_time": 0.875463836669922, "loss": 0.49012088775634766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.187290499136562, "step_time": 0.8191072998046874} +{"epoch": 0, "iter": 8494, "iter_tflops": 6.941481937294977, "iter_time": 2.972145385742188, "loss": 0.7017846703529358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.891420889288986, "step_time": 2.614369934082031} +{"epoch": 0, "iter": 8495, "iter_tflops": 15.397900879994172, "iter_time": 1.339864028930664, "loss": 0.5733140110969543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.914494752008018, "step_time": 1.035983777999878} +{"epoch": 0, "iter": 8496, "iter_tflops": 44.476004906277225, "iter_time": 0.4638702049255371, "loss": 0.564067006111145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.302142475520576, "step_time": 0.427125846862793} +{"epoch": 0, "iter": 8497, "iter_tflops": 18.832160880200753, "iter_time": 0.7872737884521483, "loss": 0.26757729053497314, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 19.86334355248005, "step_time": 0.7464033737182617} +{"epoch": 0, "iter": 8498, "iter_tflops": 10.08478765292423, "iter_time": 1.4701416778564453, "loss": 0.3690192699432373, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 11.956616292971265, "step_time": 1.239988494873047} +{"epoch": 0, "iter": 8499, "iter_tflops": 23.30000805077047, "iter_time": 0.6363116531372071, "loss": 0.3409866690635681, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.044685406145565, "step_time": 0.5919845428466797} +{"epoch": 0, "iter": 8500, "iter_tflops": 24.025923145860254, "iter_time": 0.6170862426757813, "loss": 0.1774546205997467, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.72169273048208, "step_time": 0.5764032249450683} +{"epoch": 0, "iter": 8501, "iter_tflops": 19.029364629783238, "iter_time": 1.0841714324951173, "loss": 0.7103201746940613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.38602832179598, "step_time": 1.0120212326049804} +{"epoch": 0, "iter": 8502, "iter_tflops": 16.523776903542547, "iter_time": 1.2485700836181641, "loss": 0.8361238241195679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.659750393776967, "step_time": 1.049407703399658} +{"epoch": 0, "iter": 8503, "iter_tflops": 38.296571163374274, "iter_time": 0.5387190780639648, "loss": 0.7939444184303284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.059056970375394, "step_time": 0.49052677345275886} +{"epoch": 0, "iter": 8504, "iter_tflops": 38.470099341516985, "iter_time": 0.5362890625, "loss": 0.6441987156867981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.797179727864865, "step_time": 0.49360013389587404} +{"epoch": 0, "iter": 8505, "iter_tflops": 16.40916721859235, "iter_time": 1.2572907104492188, "loss": 0.417071133852005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.392979961791045, "step_time": 1.1861735916137695} +{"epoch": 0, "iter": 8506, "iter_tflops": 20.970732272225852, "iter_time": 0.9838041534423827, "loss": 0.4580215513706207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.62916463596255, "step_time": 0.7747555656433106} +{"epoch": 0, "iter": 8507, "iter_tflops": 37.83452118776181, "iter_time": 0.5452981262207032, "loss": 0.39686813950538635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.443223670011534, "step_time": 0.49781584739685064} +{"epoch": 0, "iter": 8508, "iter_tflops": 40.54225302555376, "iter_time": 0.5088788108825684, "loss": 0.4310884475708008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.3588050364006, "step_time": 0.4650957908630371} +{"epoch": 0, "iter": 8509, "iter_tflops": 17.435298936781233, "iter_time": 1.1832945098876955, "loss": 0.3992229998111725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.728797546366728, "step_time": 1.1015706405639647} +{"epoch": 0, "iter": 8510, "iter_tflops": 18.370516717782905, "iter_time": 1.1230546112060547, "loss": 0.47777852416038513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.052805438252026, "step_time": 0.8235043201446534} +{"epoch": 0, "iter": 8511, "iter_tflops": 40.5715665425205, "iter_time": 0.5085111389160155, "loss": 0.5823621153831482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37322281332427, "step_time": 0.46494467163085934} +{"epoch": 0, "iter": 8512, "iter_tflops": 41.51144157685521, "iter_time": 0.496997760772705, "loss": 0.6911675930023193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.36735800536257, "step_time": 0.4547563362121582} +{"epoch": 0, "iter": 8513, "iter_tflops": 16.149206667577886, "iter_time": 1.2775298461914062, "loss": 0.4808829128742218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.07627463980518, "step_time": 1.2081729736328124} +{"epoch": 0, "iter": 8514, "iter_tflops": 21.522786673751543, "iter_time": 0.9585698089599609, "loss": 0.7667485475540161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.283133607291063, "step_time": 0.7561848945617675} +{"epoch": 0, "iter": 8515, "iter_tflops": 46.845939029111754, "iter_time": 0.4404030303955078, "loss": 0.6154547929763794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.71039045813968, "step_time": 0.40684154319763177} +{"epoch": 0, "iter": 8516, "iter_tflops": 44.34881217327064, "iter_time": 0.4652005882263184, "loss": 0.547035813331604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.814989422090974, "step_time": 0.43147752952575685} +{"epoch": 0, "iter": 8517, "iter_tflops": 26.929636616053894, "iter_time": 0.766111099243164, "loss": 0.24005134403705597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.668014513238106, "step_time": 0.7196554718017578} +{"epoch": 0, "iter": 8518, "iter_tflops": 20.103251560386866, "iter_time": 1.0262565460205078, "loss": 0.345183789730072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.483444750001865, "step_time": 0.8426548519134521} +{"epoch": 0, "iter": 8519, "iter_tflops": 39.548121492737494, "iter_time": 0.5216706314086914, "loss": 0.36898696422576904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41350275381576, "step_time": 0.4752229652404786} +{"epoch": 0, "iter": 8520, "iter_tflops": 38.79530723350853, "iter_time": 0.5317935333251953, "loss": 0.4204632639884949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.103263241325465, "step_time": 0.49001174545288084} +{"epoch": 0, "iter": 8521, "iter_tflops": 26.307345709969233, "iter_time": 0.784233184814453, "loss": 0.12333876639604568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.28182450342722, "step_time": 0.7294824104309081} +{"epoch": 0, "iter": 8522, "iter_tflops": 10.002113667919204, "iter_time": 2.062673370361328, "loss": 0.037097636610269547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.324444165898058, "step_time": 1.8218195266723636} +{"epoch": 0, "iter": 8523, "iter_tflops": 12.59012287286129, "iter_time": 1.6386729278564451, "loss": 0.07213357836008072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.238433944847502, "step_time": 1.3538854179382325} +{"epoch": 0, "iter": 8524, "iter_tflops": 22.829330842957052, "iter_time": 0.9037099533081054, "loss": 0.06451407074928284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.647794580246956, "step_time": 0.6518967208862305} +{"epoch": 0, "iter": 8525, "iter_tflops": 14.245263770535338, "iter_time": 0.9892236404418945, "loss": 0.3125310242176056, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 14.881542504786868, "step_time": 0.9469281616210937} +{"epoch": 0, "iter": 8526, "iter_tflops": 9.712189118305803, "iter_time": 1.4509346466064454, "loss": 0.24180245399475098, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 12.417461344641817, "step_time": 1.1348335456848144} +{"epoch": 0, "iter": 8527, "iter_tflops": 21.54703198675772, "iter_time": 0.6539996643066406, "loss": 0.24969352781772614, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 23.081656840320136, "step_time": 0.6105173377990722} +{"epoch": 0, "iter": 8528, "iter_tflops": 21.83102025321372, "iter_time": 0.6454921264648438, "loss": 0.2409113049507141, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 23.339696372911497, "step_time": 0.6037675666809083} +{"epoch": 0, "iter": 8529, "iter_tflops": 23.197233485296852, "iter_time": 0.8893773269653321, "loss": 0.6963280439376831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.818240583814074, "step_time": 0.8312875137329101} +{"epoch": 0, "iter": 8530, "iter_tflops": 7.956295200260541, "iter_time": 2.5930527954101565, "loss": 0.4137890338897705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.972818827325366, "step_time": 2.299287872314453} +{"epoch": 0, "iter": 8531, "iter_tflops": 13.736781808423267, "iter_time": 1.501886962890625, "loss": 0.5426519513130188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.749484031648866, "step_time": 1.231745018005371} +{"epoch": 0, "iter": 8532, "iter_tflops": 40.927038192366005, "iter_time": 0.5040944671630859, "loss": 0.6046831011772156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.132066855018444, "step_time": 0.41991096305847164} +{"epoch": 0, "iter": 8533, "iter_tflops": 19.402184305335236, "iter_time": 0.7809903945922851, "loss": 0.33581751585006714, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 20.534320830852682, "step_time": 0.7379313735961914} +{"epoch": 0, "iter": 8534, "iter_tflops": 6.6592235796540225, "iter_time": 2.275478424072266, "loss": 0.379144549369812, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 8.2941319705328, "step_time": 1.8269445953369141} +{"epoch": 0, "iter": 8535, "iter_tflops": 7.900291955082204, "iter_time": 1.9180202026367188, "loss": 0.32007575035095215, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.297564347531884, "step_time": 1.6297730255126952} +{"epoch": 0, "iter": 8536, "iter_tflops": 17.865909578867413, "iter_time": 0.8481471099853516, "loss": 0.2657448351383209, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 21.03788001032646, "step_time": 0.7202683715820313} +{"epoch": 0, "iter": 8537, "iter_tflops": 19.816559832501458, "iter_time": 0.7605345306396485, "loss": 0.18872994184494019, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 21.518662564467807, "step_time": 0.7003770790100098} +{"epoch": 0, "iter": 8538, "iter_tflops": 22.512743981213116, "iter_time": 0.669450958251953, "loss": 0.25917190313339233, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.306557190224456, "step_time": 0.6200457725524902} +{"epoch": 0, "iter": 8539, "iter_tflops": 21.959303991581475, "iter_time": 0.6863231201171875, "loss": 0.3270961046218872, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 23.729194973346704, "step_time": 0.6351322937011717} +{"epoch": 0, "iter": 8540, "iter_tflops": 22.93336364119686, "iter_time": 0.657172592163086, "loss": 0.2371504306793213, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.532462047536438, "step_time": 0.6143361396789551} +{"epoch": 0, "iter": 8541, "iter_tflops": 21.25955192700197, "iter_time": 0.9704387741088867, "loss": 0.6789301037788391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.675663025792176, "step_time": 0.9098341903686523} +{"epoch": 0, "iter": 8542, "iter_tflops": 26.47082481393561, "iter_time": 0.7793899002075195, "loss": 0.49382326006889343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.464308239989656, "step_time": 0.616510383605957} +{"epoch": 0, "iter": 8543, "iter_tflops": 39.18909887827346, "iter_time": 0.5264498062133789, "loss": 0.8490669131278992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.665431722606165, "step_time": 0.48355525016784673} +{"epoch": 0, "iter": 8544, "iter_tflops": 42.745195023135715, "iter_time": 0.48265292739868165, "loss": 0.7487959861755371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59031240359548, "step_time": 0.4428193855285645} +{"epoch": 0, "iter": 8545, "iter_tflops": 19.923455416897028, "iter_time": 0.7728708267211915, "loss": 0.28690212965011597, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 21.6918785034531, "step_time": 0.70986279296875} +{"epoch": 0, "iter": 8546, "iter_tflops": 23.650155710681172, "iter_time": 0.6510848236083984, "loss": 0.23543483018875122, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.461201626382227, "step_time": 0.6047733993530273} +{"epoch": 0, "iter": 8547, "iter_tflops": 24.841944185546534, "iter_time": 0.6198491287231446, "loss": 0.2466856986284256, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.655794537632286, "step_time": 0.5776701736450196} +{"epoch": 0, "iter": 8548, "iter_tflops": 24.089381408950235, "iter_time": 0.6392134857177734, "loss": 0.30513808131217957, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.867891890493038, "step_time": 0.5952652626037598} +{"epoch": 0, "iter": 8549, "iter_tflops": 30.779017060131178, "iter_time": 0.6702973480224609, "loss": 0.7488468885421753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.81976921227512, "step_time": 0.6100305824279786} +{"epoch": 0, "iter": 8550, "iter_tflops": 34.34477574109708, "iter_time": 0.6007054367065431, "loss": 0.5409099459648132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.148706386115634, "step_time": 0.5553650588989257} +{"epoch": 0, "iter": 8551, "iter_tflops": 37.072914834138686, "iter_time": 0.5565004425048827, "loss": 0.9169642925262451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.017910050279696, "step_time": 0.5155465011596679} +{"epoch": 0, "iter": 8552, "iter_tflops": 37.53362804702321, "iter_time": 0.5496695785522461, "loss": 0.8169243335723877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.98670238881039, "step_time": 0.5033606586456298} +{"epoch": 0, "iter": 8553, "iter_tflops": 15.382783207705705, "iter_time": 1.3411808013916016, "loss": 0.3212184011936188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.362353654469988, "step_time": 1.2608878860473631} +{"epoch": 0, "iter": 8554, "iter_tflops": 19.25266925414948, "iter_time": 1.0715965270996093, "loss": 0.3035818636417389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.938349950755658, "step_time": 0.9404122714996338} +{"epoch": 0, "iter": 8555, "iter_tflops": 43.11493345908414, "iter_time": 0.4785138664245606, "loss": 0.35707733035087585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10271384783237, "step_time": 0.4380022258758544} +{"epoch": 0, "iter": 8556, "iter_tflops": 39.085286382964924, "iter_time": 0.5278480834960938, "loss": 0.24844074249267578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88984658705887, "step_time": 0.4810251178741455} +{"epoch": 0, "iter": 8557, "iter_tflops": 20.191124573903572, "iter_time": 1.0217902145385742, "loss": 0.2763815224170685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.598285469620492, "step_time": 0.9552190399169922} +{"epoch": 0, "iter": 8558, "iter_tflops": 15.464796808106822, "iter_time": 1.3340681915283203, "loss": 0.20557494461536407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.2184822410967, "step_time": 1.1324265785217285} +{"epoch": 0, "iter": 8559, "iter_tflops": 48.64773771272076, "iter_time": 0.4240915298461914, "loss": 0.15248015522956848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.264653707128325, "step_time": 0.387331787109375} +{"epoch": 0, "iter": 8560, "iter_tflops": 52.28422015163072, "iter_time": 0.39459503173828125, "loss": 0.18531353771686554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.72894690000389, "step_time": 0.3636784152984619} +{"epoch": 0, "iter": 8561, "iter_tflops": 42.20251930917805, "iter_time": 0.4888592872619629, "loss": 0.4312879145145416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59824657648922, "step_time": 0.45245365905761714} +{"epoch": 0, "iter": 8562, "iter_tflops": 40.89690680183625, "iter_time": 0.5044658660888672, "loss": 0.3929296135902405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8546169080329, "step_time": 0.4599547367095947} +{"epoch": 0, "iter": 8563, "iter_tflops": 49.9528318612385, "iter_time": 0.41301148986816405, "loss": 0.567618727684021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.03128590643738, "step_time": 0.38183606338500975} +{"epoch": 0, "iter": 8564, "iter_tflops": 47.626890582135985, "iter_time": 0.43318161773681635, "loss": 0.47987186908721924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.717607557101665, "step_time": 0.3989181728363037} +{"epoch": 0, "iter": 8565, "iter_tflops": 20.411374574618556, "iter_time": 1.010764533996582, "loss": 0.6987202167510986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.307234725615288, "step_time": 0.9682670593261719} +{"epoch": 0, "iter": 8566, "iter_tflops": 17.42681568779699, "iter_time": 1.1838705291748046, "loss": 0.638741135597229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.046848984660976, "step_time": 0.9802461891174317} +{"epoch": 0, "iter": 8567, "iter_tflops": 44.83720235694205, "iter_time": 0.46013338088989264, "loss": 0.5864567756652832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28196032452226, "step_time": 0.4273043880462646} +{"epoch": 0, "iter": 8568, "iter_tflops": 45.81884982190315, "iter_time": 0.4502752380371094, "loss": 0.7266615033149719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.43133425533195, "step_time": 0.4173687362670899} +{"epoch": 0, "iter": 8569, "iter_tflops": 29.263822449510428, "iter_time": 0.7050033721923828, "loss": 0.36178117990493774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.010972797055363, "step_time": 0.6652836608886719} +{"epoch": 0, "iter": 8570, "iter_tflops": 12.385051181388041, "iter_time": 1.6658060760498046, "loss": 0.2910309135913849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.886196069896704, "step_time": 1.298680528640747} +{"epoch": 0, "iter": 8571, "iter_tflops": 38.72239474451086, "iter_time": 0.5327948760986327, "loss": 0.4704495370388031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60968045468409, "step_time": 0.4841879425048828} +{"epoch": 0, "iter": 8572, "iter_tflops": 43.37580335966137, "iter_time": 0.47563599777221677, "loss": 0.3268946409225464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57273696528362, "step_time": 0.4336747226715088} +{"epoch": 0, "iter": 8573, "iter_tflops": 18.180592588274575, "iter_time": 1.1347866363525392, "loss": 0.5091549158096313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.43873492834605, "step_time": 1.0613393096923829} +{"epoch": 0, "iter": 8574, "iter_tflops": 22.570305158839933, "iter_time": 0.9140812835693358, "loss": 0.5998472571372986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.88552824607792, "step_time": 0.7398494777679443} +{"epoch": 0, "iter": 8575, "iter_tflops": 49.37874607406486, "iter_time": 0.417813232421875, "loss": 0.6228460669517517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47856543230909, "step_time": 0.3857824783325195} +{"epoch": 0, "iter": 8576, "iter_tflops": 48.76612996014379, "iter_time": 0.42306193923950197, "loss": 0.7229719758033752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.765238212393776, "step_time": 0.3909978275299072} +{"epoch": 0, "iter": 8577, "iter_tflops": 26.473983099523192, "iter_time": 0.7792969207763671, "loss": 0.24457809329032898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.916517338872882, "step_time": 0.7390281982421874} +{"epoch": 0, "iter": 8578, "iter_tflops": 14.531952725654387, "iter_time": 1.419705520629883, "loss": 0.1661379337310791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.816916460453424, "step_time": 1.0410849514007567} +{"epoch": 0, "iter": 8579, "iter_tflops": 50.95285457848964, "iter_time": 0.40490554809570317, "loss": 0.22181440889835358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.5590128306071, "step_time": 0.37133657455444335} +{"epoch": 0, "iter": 8580, "iter_tflops": 48.29696073687685, "iter_time": 0.4271716728210449, "loss": 0.20270542800426483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.379009345527784, "step_time": 0.39388094139099117} +{"epoch": 0, "iter": 8581, "iter_tflops": 25.242429445380626, "iter_time": 0.8173180618286132, "loss": 0.29236018657684326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.514788157868722, "step_time": 0.7780976181030274} +{"epoch": 0, "iter": 8582, "iter_tflops": 18.097118110452623, "iter_time": 1.1400209350585937, "loss": 0.2685742974281311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.24559406960669, "step_time": 0.85092134475708} +{"epoch": 0, "iter": 8583, "iter_tflops": 39.90834795513, "iter_time": 0.5169618530273438, "loss": 0.1840103417634964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8560159726043, "step_time": 0.4704279003143311} +{"epoch": 0, "iter": 8584, "iter_tflops": 40.93426350728296, "iter_time": 0.5040054893493652, "loss": 0.18360331654548645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.79184484710018, "step_time": 0.4605993251800537} +{"epoch": 0, "iter": 8585, "iter_tflops": 23.779744116139042, "iter_time": 0.8675910644531251, "loss": 0.14884035289287567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.682205775626127, "step_time": 0.8033224906921387} +{"epoch": 0, "iter": 8586, "iter_tflops": 25.457948949523434, "iter_time": 0.8103988876342773, "loss": 0.17974911630153656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.952228346512456, "step_time": 0.5902654705047607} +{"epoch": 0, "iter": 8587, "iter_tflops": 52.85068728757569, "iter_time": 0.3903656616210937, "loss": 0.15498866140842438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.54927725092925, "step_time": 0.35849439811706546} +{"epoch": 0, "iter": 8588, "iter_tflops": 52.34176989137592, "iter_time": 0.39416117477416995, "loss": 0.15990792214870453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.88946898215298, "step_time": 0.3626522426605225} +{"epoch": 0, "iter": 8589, "iter_tflops": 23.413345117603512, "iter_time": 0.8811681289672852, "loss": 0.09940673410892487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.490020173933743, "step_time": 0.8424286041259765} +{"epoch": 0, "iter": 8590, "iter_tflops": 14.363900692467213, "iter_time": 1.4363155212402343, "loss": 0.11995597928762436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.326190126517258, "step_time": 1.1907461109161377} +{"epoch": 0, "iter": 8591, "iter_tflops": 36.2863123765106, "iter_time": 0.5685640716552735, "loss": 0.06798696517944336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13995342935151, "step_time": 0.5139790096282959} +{"epoch": 0, "iter": 8592, "iter_tflops": 39.68474038929261, "iter_time": 0.5198747253417969, "loss": 0.14660082757472992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.430148377069244, "step_time": 0.47504082489013677} +{"epoch": 0, "iter": 8593, "iter_tflops": 17.171006088895428, "iter_time": 1.2015075531005859, "loss": 0.10727406293153763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.629607721111377, "step_time": 1.1074357452392578} +{"epoch": 0, "iter": 8594, "iter_tflops": 15.401476870313289, "iter_time": 1.3395529327392577, "loss": 0.1162518784403801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.67387692540497, "step_time": 1.0486541919708252} +{"epoch": 0, "iter": 8595, "iter_tflops": 49.05121780435845, "iter_time": 0.4206030845642089, "loss": 0.11340455710887909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.32764055843376, "step_time": 0.3868742980957031} +{"epoch": 0, "iter": 8596, "iter_tflops": 50.92177386062666, "iter_time": 0.40515268707275387, "loss": 0.15440337359905243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.97467734477459, "step_time": 0.36857905197143553} +{"epoch": 0, "iter": 8597, "iter_tflops": 37.478051555624084, "iter_time": 0.5504846878051758, "loss": 0.06179191172122955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22711528040686, "step_time": 0.5128653488159179} +{"epoch": 0, "iter": 8598, "iter_tflops": 9.071236170687497, "iter_time": 2.274342010498047, "loss": 0.052376847714185715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.700454210426583, "step_time": 1.7632728729248046} +{"epoch": 0, "iter": 8599, "iter_tflops": 15.013737921987842, "iter_time": 1.374147705078125, "loss": 0.043554265052080154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.450710169226273, "step_time": 1.1822495079040527} +{"epoch": 0, "iter": 8600, "iter_tflops": 35.742594725754664, "iter_time": 0.5772130889892579, "loss": 0.0846937969326973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.098948263968396, "step_time": 0.5019859237670898} +{"epoch": 0, "iter": 8601, "iter_tflops": 15.773290880027393, "iter_time": 0.9736304321289061, "loss": 0.20278309285640717, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 16.51806054083643, "step_time": 0.929731185913086} +{"epoch": 0, "iter": 8602, "iter_tflops": 9.451789587874764, "iter_time": 1.6248093414306641, "loss": 0.15422309935092926, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.33682035411268, "step_time": 1.2448390731811523} +{"epoch": 0, "iter": 8603, "iter_tflops": 25.109642254083216, "iter_time": 0.6116119003295899, "loss": 0.2511315941810608, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.767897357462147, "step_time": 0.5737229118347168} +{"epoch": 0, "iter": 8604, "iter_tflops": 27.85063601859945, "iter_time": 0.5514185028076172, "loss": 0.2694542109966278, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.63248407514149, "step_time": 0.5182608375549317} +{"epoch": 0, "iter": 8605, "iter_tflops": 42.253548721621094, "iter_time": 0.48826889419555664, "loss": 0.3914283812046051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39627987773986, "step_time": 0.4446712875366211} +{"epoch": 0, "iter": 8606, "iter_tflops": 39.33798705486087, "iter_time": 0.524457275390625, "loss": 0.3499489426612854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65498403345412, "step_time": 0.4725942287445068} +{"epoch": 0, "iter": 8607, "iter_tflops": 39.68214891703873, "iter_time": 0.519908676147461, "loss": 0.3825860321521759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64207576212082, "step_time": 0.4727340106964112} +{"epoch": 0, "iter": 8608, "iter_tflops": 40.7281581871008, "iter_time": 0.5065560150146484, "loss": 0.3942520320415497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.60259861875549, "step_time": 0.4625536212921143} +{"epoch": 0, "iter": 8609, "iter_tflops": 18.148877543444417, "iter_time": 1.1367696685791016, "loss": 0.4611247181892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.679114399263188, "step_time": 1.048375099182129} +{"epoch": 0, "iter": 8610, "iter_tflops": 18.991480170213762, "iter_time": 1.0863341522216796, "loss": 0.6057116985321045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.38193303649771, "step_time": 0.8128259372711182} +{"epoch": 0, "iter": 8611, "iter_tflops": 33.62034298000189, "iter_time": 0.6136491088867188, "loss": 0.6292232871055603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.83434542521642, "step_time": 0.5601047954559326} +{"epoch": 0, "iter": 8612, "iter_tflops": 37.7907194033754, "iter_time": 0.5459301605224609, "loss": 0.648842453956604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.197647538496966, "step_time": 0.5007832908630371} +{"epoch": 0, "iter": 8613, "iter_tflops": 19.953552630483895, "iter_time": 1.0339559020996094, "loss": 0.09881429374217987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.360879840937102, "step_time": 0.9658353805541993} +{"epoch": 0, "iter": 8614, "iter_tflops": 14.84690455627724, "iter_time": 1.3895888824462892, "loss": 0.1155228465795517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.325794027323933, "step_time": 1.1257953395843505} +{"epoch": 0, "iter": 8615, "iter_tflops": 40.2860686578774, "iter_time": 0.5121148376464844, "loss": 0.12371498346328735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29104854827012, "step_time": 0.46580729484558114} +{"epoch": 0, "iter": 8616, "iter_tflops": 38.82078573973405, "iter_time": 0.5314445114135742, "loss": 0.13105250895023346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.634658250751, "step_time": 0.4839042778015137} +{"epoch": 0, "iter": 8617, "iter_tflops": 20.902939698810883, "iter_time": 0.8978752975463866, "loss": 0.10985328257083893, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 22.527166383868703, "step_time": 0.8331377716064454} +{"epoch": 0, "iter": 8618, "iter_tflops": 8.65345605230488, "iter_time": 2.1688713836669926, "loss": 0.18588261306285858, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 10.251890240512434, "step_time": 1.8307095336914063} +{"epoch": 0, "iter": 8619, "iter_tflops": 15.773377881950704, "iter_time": 1.1898677215576172, "loss": 0.14400635659694672, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 17.998975332465474, "step_time": 1.0427389812469483} +{"epoch": 0, "iter": 8620, "iter_tflops": 39.67523304511046, "iter_time": 0.4730465774536132, "loss": 0.136518657207489, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 43.48607585286265, "step_time": 0.4315917873382568} +{"epoch": 0, "iter": 8621, "iter_tflops": 12.688414573916232, "iter_time": 1.2425967559814453, "loss": 0.2847326099872589, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 13.40763192415694, "step_time": 1.1759409027099608} +{"epoch": 0, "iter": 8622, "iter_tflops": 15.727801336064973, "iter_time": 1.0024657897949218, "loss": 0.26847460865974426, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 18.98871492261725, "step_time": 0.8303133125305177} +{"epoch": 0, "iter": 8623, "iter_tflops": 26.888897520987094, "iter_time": 0.5863603286743163, "loss": 0.4761917293071747, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.697825377134084, "step_time": 0.5493999137878417} +{"epoch": 0, "iter": 8624, "iter_tflops": 28.4532854217303, "iter_time": 0.554121696472168, "loss": 0.3130570948123932, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.25811206807016, "step_time": 0.5210696144104003} +{"epoch": 0, "iter": 8625, "iter_tflops": 33.22189101103226, "iter_time": 0.4057863655090332, "loss": 0.0024343773256987333, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 36.66150268381002, "step_time": 0.3677151622772217} +{"epoch": 0, "iter": 8626, "iter_tflops": 30.952016744222462, "iter_time": 0.43554481506347653, "loss": 0.0021070486400276423, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 34.19924711078645, "step_time": 0.3941896839141846} +{"epoch": 0, "iter": 8627, "iter_tflops": 36.46872517913268, "iter_time": 0.36965894317626957, "loss": 0.001845581573434174, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 40.151946123889594, "step_time": 0.33574936485290524} +{"epoch": 0, "iter": 8628, "iter_tflops": 35.813245185350766, "iter_time": 0.3764247093200684, "loss": 0.01626678556203842, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 39.425823218148615, "step_time": 0.3419330101013183} +{"epoch": 0, "iter": 8629, "iter_tflops": 34.44478223012362, "iter_time": 0.5989613571166992, "loss": 0.5484665036201477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.092368060942086, "step_time": 0.5562085838317871} +{"epoch": 0, "iter": 8630, "iter_tflops": 35.68053995107984, "iter_time": 0.5782169647216796, "loss": 0.7631608247756958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.7640735746475, "step_time": 0.5322220191955566} +{"epoch": 0, "iter": 8631, "iter_tflops": 38.17904126901713, "iter_time": 0.5403774642944336, "loss": 0.5438501834869385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59633805979131, "step_time": 0.4959834079742431} +{"epoch": 0, "iter": 8632, "iter_tflops": 42.669059200509636, "iter_time": 0.4835141410827636, "loss": 0.49824342131614685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35215824983074, "step_time": 0.445094560623169} +{"epoch": 0, "iter": 8633, "iter_tflops": 23.61713223321506, "iter_time": 0.8735647201538087, "loss": 0.11472177505493164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.28718263688669, "step_time": 0.8158715744018555} +{"epoch": 0, "iter": 8634, "iter_tflops": 8.549453366504432, "iter_time": 2.413147674560547, "loss": 0.04301071912050247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.040984487049426, "step_time": 2.054688316345215} +{"epoch": 0, "iter": 8635, "iter_tflops": 12.285795431546068, "iter_time": 1.6792639617919922, "loss": 0.12919047474861145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.222945187137048, "step_time": 1.4505500259399413} +{"epoch": 0, "iter": 8636, "iter_tflops": 40.4349765911313, "iter_time": 0.5102288970947265, "loss": 0.12308604270219803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02657225364344, "step_time": 0.45819818115234373} +{"epoch": 0, "iter": 8637, "iter_tflops": 12.314573124906676, "iter_time": 1.1906830444335936, "loss": 0.2929378151893616, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.248234420825534, "step_time": 1.1067703781127929} +{"epoch": 0, "iter": 8638, "iter_tflops": 13.890304465635333, "iter_time": 1.0556106567382812, "loss": 0.20849134027957916, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 16.53125419663256, "step_time": 0.8869716262817384} +{"epoch": 0, "iter": 8639, "iter_tflops": 26.37770384794291, "iter_time": 0.5558767929077149, "loss": 0.23987992107868195, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.139307256627223, "step_time": 0.5210772705078125} +{"epoch": 0, "iter": 8640, "iter_tflops": 27.334896085865797, "iter_time": 0.5364115295410157, "loss": 0.23275643587112427, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 29.05688758240388, "step_time": 0.5046222991943359} +{"epoch": 0, "iter": 8641, "iter_tflops": 23.64891579111428, "iter_time": 0.8723906707763671, "loss": 0.15975114703178406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.820089587202943, "step_time": 0.8312255859375} +{"epoch": 0, "iter": 8642, "iter_tflops": 13.221411244162093, "iter_time": 1.5604305114746095, "loss": 0.13767077028751373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.95165376941601, "step_time": 1.1492586574554444} +{"epoch": 0, "iter": 8643, "iter_tflops": 42.43807080729063, "iter_time": 0.4861458854675293, "loss": 0.1302088499069214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66183861559508, "step_time": 0.44214060401916505} +{"epoch": 0, "iter": 8644, "iter_tflops": 40.952124771281575, "iter_time": 0.5037856674194336, "loss": 0.16893139481544495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.111863052885134, "step_time": 0.4573318881988525} +{"epoch": 0, "iter": 8645, "iter_tflops": 17.54147695421698, "iter_time": 1.1761320648193359, "loss": 0.7962784171104431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.764223489754063, "step_time": 1.0994909286499026} +{"epoch": 0, "iter": 8646, "iter_tflops": 18.99647944071124, "iter_time": 1.0860482635498048, "loss": 0.6404927968978882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.72929442646732, "step_time": 0.8342774829864502} +{"epoch": 0, "iter": 8647, "iter_tflops": 44.772551912960104, "iter_time": 0.46079780197143555, "loss": 0.44571825861930847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55382791808745, "step_time": 0.4249117813110352} +{"epoch": 0, "iter": 8648, "iter_tflops": 50.50200536310646, "iter_time": 0.4085202827453614, "loss": 0.5420753359794617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.70933302962254, "step_time": 0.3771037292480469} +{"epoch": 0, "iter": 8649, "iter_tflops": 8.339928911598129, "iter_time": 0.61405810546875, "loss": 0.009509129449725151, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 8.908147994178151, "step_time": 0.5748895225524903} +{"epoch": 0, "iter": 8650, "iter_tflops": 3.771616055550197, "iter_time": 1.357826690673828, "loss": 0.0004751339147333056, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 4.526987779076898, "step_time": 1.131260166168213} +{"epoch": 0, "iter": 8651, "iter_tflops": 10.147493963946312, "iter_time": 0.5046764221191407, "loss": 0.029445193707942963, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 11.259596129898435, "step_time": 0.45482989692687986} +{"epoch": 0, "iter": 8652, "iter_tflops": 10.951980468296853, "iter_time": 0.4676050109863281, "loss": 0.006971972528845072, "lr": 3e-05, "seqlen": 2080.0, "step_tflops": 12.07908456774136, "step_time": 0.42397260475158693} +{"epoch": 0, "iter": 8653, "iter_tflops": 22.990976032103273, "iter_time": 0.8973561401367188, "loss": 0.0017853864701464772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.786464295365704, "step_time": 0.8323532257080078} +{"epoch": 0, "iter": 8654, "iter_tflops": 18.193688141383863, "iter_time": 1.1339698333740236, "loss": 0.025103915482759476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.816618112439155, "step_time": 0.9456595611572265} +{"epoch": 0, "iter": 8655, "iter_tflops": 43.777311676517535, "iter_time": 0.4712736511230468, "loss": 0.004998494405299425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.548536694222065, "step_time": 0.42495809173583987} +{"epoch": 0, "iter": 8656, "iter_tflops": 50.94955213678992, "iter_time": 0.4049317932128907, "loss": 0.021058978512883186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.48843553975262, "step_time": 0.3652268524169922} +{"epoch": 0, "iter": 8657, "iter_tflops": 19.521572306278173, "iter_time": 1.056835647583008, "loss": 0.5290312170982361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.584151305350595, "step_time": 1.002280502319336} +{"epoch": 0, "iter": 8658, "iter_tflops": 13.072086365771902, "iter_time": 1.5782555999755858, "loss": 0.4736034572124481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.887990350250227, "step_time": 1.3857540893554687} +{"epoch": 0, "iter": 8659, "iter_tflops": 18.195168172844784, "iter_time": 1.1338775939941406, "loss": 0.38577234745025635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.38444264066145, "step_time": 1.012099956512451} +{"epoch": 0, "iter": 8660, "iter_tflops": 40.57120740601926, "iter_time": 0.508515640258789, "loss": 0.39718520641326904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.285562021925145, "step_time": 0.46586500358581545} +{"epoch": 0, "iter": 8661, "iter_tflops": 18.974633867992402, "iter_time": 0.8827918548583984, "loss": 0.3050374984741211, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 20.27745619841805, "step_time": 0.8260726623535157} +{"epoch": 0, "iter": 8662, "iter_tflops": 9.261528754066811, "iter_time": 1.808627136230469, "loss": 0.17398801445960999, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 11.309938673725476, "step_time": 1.4810559730529784} +{"epoch": 0, "iter": 8663, "iter_tflops": 24.049697146850534, "iter_time": 0.6965015869140625, "loss": 0.2390565127134323, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 25.947862514145065, "step_time": 0.6455503692626954} +{"epoch": 0, "iter": 8664, "iter_tflops": 26.49107726399158, "iter_time": 0.63231298828125, "loss": 0.31603434681892395, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 28.475279460020467, "step_time": 0.5882524261474609} +{"epoch": 0, "iter": 8665, "iter_tflops": 16.691596569135584, "iter_time": 1.0552581024169922, "loss": 0.2047361135482788, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 17.850002651371366, "step_time": 0.9867753448486329} +{"epoch": 0, "iter": 8666, "iter_tflops": 11.713143169453204, "iter_time": 1.503775909423828, "loss": 0.20959778130054474, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 14.465586370680102, "step_time": 1.217644557952881} +{"epoch": 0, "iter": 8667, "iter_tflops": 25.8677996112808, "iter_time": 0.6809215621948244, "loss": 0.3856499195098877, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 28.082426023898492, "step_time": 0.6272229652404785} +{"epoch": 0, "iter": 8668, "iter_tflops": 27.344148790800727, "iter_time": 0.6441576461791992, "loss": 0.298234224319458, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 29.386794054769506, "step_time": 0.5993829231262208} +{"epoch": 0, "iter": 8669, "iter_tflops": 21.2966138296835, "iter_time": 0.9687499465942383, "loss": 0.9883608222007751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.003904233251305, "step_time": 0.8968518257141114} +{"epoch": 0, "iter": 8670, "iter_tflops": 14.654969207682317, "iter_time": 1.4077882537841795, "loss": 0.836601734161377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.21873624618961, "step_time": 1.198176986694336} +{"epoch": 0, "iter": 8671, "iter_tflops": 35.56849888397356, "iter_time": 0.5800383529663086, "loss": 0.9083579182624817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.72468491586746, "step_time": 0.5327633666992189} +{"epoch": 0, "iter": 8672, "iter_tflops": 39.45911798931631, "iter_time": 0.5228473052978515, "loss": 0.9239454865455627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94443599260047, "step_time": 0.4804136562347412} +{"epoch": 0, "iter": 8673, "iter_tflops": 14.474747167725292, "iter_time": 1.4253163299560545, "loss": 0.7545632123947144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.50234735312778, "step_time": 1.3308367462158204} +{"epoch": 0, "iter": 8674, "iter_tflops": 19.405955610703607, "iter_time": 1.0631320571899412, "loss": 0.7640941739082336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.8504656630608, "step_time": 0.8302095336914063} +{"epoch": 0, "iter": 8675, "iter_tflops": 42.57971291890691, "iter_time": 0.4845287132263184, "loss": 0.7651286721229553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64138439143757, "step_time": 0.4520260238647461} +{"epoch": 0, "iter": 8676, "iter_tflops": 46.42003786962032, "iter_time": 0.44444370269775385, "loss": 0.9337478876113892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99175043211894, "step_time": 0.41268996047973633} +{"epoch": 0, "iter": 8677, "iter_tflops": 35.33009316199517, "iter_time": 0.583952423095703, "loss": 0.5572836399078369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.894837564553924, "step_time": 0.5444301872253418} +{"epoch": 0, "iter": 8678, "iter_tflops": 12.120770294660486, "iter_time": 1.7021272583007814, "loss": 0.6987642049789429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.8686545957965, "step_time": 1.4876059799194337} +{"epoch": 0, "iter": 8679, "iter_tflops": 16.029773037789617, "iter_time": 1.2870483856201171, "loss": 0.45159077644348145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.558296832109257, "step_time": 1.111691104888916} +{"epoch": 0, "iter": 8680, "iter_tflops": 21.710254592323253, "iter_time": 0.9502925643920899, "loss": 0.5726838111877441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.34779198079774, "step_time": 0.8473496704101562} +{"epoch": 0, "iter": 8681, "iter_tflops": 12.10832557276809, "iter_time": 1.3562839050292967, "loss": 0.223724827170372, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 12.843813368366124, "step_time": 1.2786176986694335} +{"epoch": 0, "iter": 8682, "iter_tflops": 10.96238706651986, "iter_time": 1.4980612335205077, "loss": 0.21071773767471313, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 14.14876246941937, "step_time": 1.160689998626709} +{"epoch": 0, "iter": 8683, "iter_tflops": 28.65225589062793, "iter_time": 0.5731600036621094, "loss": 0.2680627703666687, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.627498288320144, "step_time": 0.5361955108642578} +{"epoch": 0, "iter": 8684, "iter_tflops": 29.74857660966195, "iter_time": 0.5520374069213867, "loss": 0.32943108677864075, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 31.677298182042183, "step_time": 0.5184257507324218} +{"epoch": 0, "iter": 8685, "iter_tflops": 21.71910462624555, "iter_time": 0.9499053421020507, "loss": 0.0032868636772036552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.76491450105616, "step_time": 0.9062671203613282} +{"epoch": 0, "iter": 8686, "iter_tflops": 15.182605531228601, "iter_time": 1.3588638305664065, "loss": 0.01951117254793644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.157582670420478, "step_time": 1.02349045753479} +{"epoch": 0, "iter": 8687, "iter_tflops": 56.22360530055181, "iter_time": 0.3669471817016602, "loss": 0.007681883871555328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.84054796020999, "step_time": 0.33361757278442383} +{"epoch": 0, "iter": 8688, "iter_tflops": 58.979807756556376, "iter_time": 0.3497992668151855, "loss": 0.014109702780842781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.65989239163183, "step_time": 0.3190709533691406} +{"epoch": 0, "iter": 8689, "iter_tflops": 14.11573609440172, "iter_time": 0.6400314254760743, "loss": 0.01476293709129095, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 15.017445163444814, "step_time": 0.6016013107299805} +{"epoch": 0, "iter": 8690, "iter_tflops": 21.862948518233946, "iter_time": 0.41323404693603516, "loss": 0.006278124637901783, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 24.146966261814605, "step_time": 0.3741469879150391} +{"epoch": 0, "iter": 8691, "iter_tflops": 25.934691865486716, "iter_time": 0.3483563537597656, "loss": 0.03330595791339874, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 28.54465400518224, "step_time": 0.31650461387634277} +{"epoch": 0, "iter": 8692, "iter_tflops": 27.407957004057387, "iter_time": 0.32963108825683596, "loss": 0.0017301030457019806, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 30.12242568687426, "step_time": 0.29992653274536135} +{"epoch": 0, "iter": 8693, "iter_tflops": 28.553896319153043, "iter_time": 0.7225316390991211, "loss": 0.7680144906044006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.626510953455448, "step_time": 0.6736351242065429} +{"epoch": 0, "iter": 8694, "iter_tflops": 11.549491868672163, "iter_time": 1.7863204498291014, "loss": 0.5388818979263306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.775435747583822, "step_time": 1.4976726608276367} +{"epoch": 0, "iter": 8695, "iter_tflops": 16.677712670984906, "iter_time": 1.2370457458496096, "loss": 0.6343464255332947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.347231163195932, "step_time": 1.1244799461364745} +{"epoch": 0, "iter": 8696, "iter_tflops": 19.370162465379536, "iter_time": 1.065096565246582, "loss": 0.7961123585700989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.678394378121094, "step_time": 0.9097246112823485} +{"epoch": 0, "iter": 8697, "iter_tflops": 12.578748237404938, "iter_time": 1.1105647583007812, "loss": 0.23030881583690643, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 13.475175554717513, "step_time": 1.0366851577758789} +{"epoch": 0, "iter": 8698, "iter_tflops": 12.507263379282323, "iter_time": 1.1169121551513672, "loss": 0.2692282497882843, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 17.941171096304718, "step_time": 0.7786289100646973} +{"epoch": 0, "iter": 8699, "iter_tflops": 26.20162677131455, "iter_time": 0.5331544723510743, "loss": 0.2019386738538742, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 27.851501348213755, "step_time": 0.5015713272094727} +{"epoch": 0, "iter": 8700, "iter_tflops": 26.01732836329007, "iter_time": 0.536931167602539, "loss": 0.2885589003562927, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 27.615201399212825, "step_time": 0.5058632125854492} +{"epoch": 0, "iter": 8701, "iter_tflops": 29.09694817111877, "iter_time": 0.6025248641967773, "loss": 0.0987991914153099, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 31.413441362903033, "step_time": 0.5580934143066406} +{"epoch": 0, "iter": 8702, "iter_tflops": 42.3622953755686, "iter_time": 0.4138499717712402, "loss": 0.12125575542449951, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 46.6552731395108, "step_time": 0.3757696304321289} +{"epoch": 0, "iter": 8703, "iter_tflops": 44.53783657510266, "iter_time": 0.3936346282958984, "loss": 0.08059802651405334, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 48.623877738887224, "step_time": 0.360556079864502} +{"epoch": 0, "iter": 8704, "iter_tflops": 40.14797611440118, "iter_time": 0.43667543029785155, "loss": 0.06449175626039505, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 43.72815866207184, "step_time": 0.40092323303222654} +{"epoch": 0, "iter": 8705, "iter_tflops": 36.83721494142546, "iter_time": 0.560061164855957, "loss": 0.6730864644050598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.467874788007194, "step_time": 0.522731300354004} +{"epoch": 0, "iter": 8706, "iter_tflops": 26.48064240620329, "iter_time": 0.779100944519043, "loss": 0.4494823217391968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.099007915628476, "step_time": 0.6854409809112549} +{"epoch": 0, "iter": 8707, "iter_tflops": 36.96620024268028, "iter_time": 0.5581069564819336, "loss": 0.5458275079727173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.43688788401137, "step_time": 0.5102047805786133} +{"epoch": 0, "iter": 8708, "iter_tflops": 38.36450380166653, "iter_time": 0.5377651596069335, "loss": 0.6820150017738342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03743471258051, "step_time": 0.49077907943725585} +{"epoch": 0, "iter": 8709, "iter_tflops": 34.823487070885534, "iter_time": 0.5924476623535156, "loss": 0.7243882417678833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.4049682285047, "step_time": 0.5371985569000244} +{"epoch": 0, "iter": 8710, "iter_tflops": 35.01111613525434, "iter_time": 0.5892726593017579, "loss": 0.8436699509620667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35601515440174, "step_time": 0.5242170333862305} +{"epoch": 0, "iter": 8711, "iter_tflops": 36.54317436280255, "iter_time": 0.5645676345825196, "loss": 0.6531831622123718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.83626075254486, "step_time": 0.5178973407745362} +{"epoch": 0, "iter": 8712, "iter_tflops": 36.5181349241269, "iter_time": 0.5649547424316406, "loss": 0.7536687254905701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.79749530252238, "step_time": 0.5184018077850342} +{"epoch": 0, "iter": 8713, "iter_tflops": 30.212478670103234, "iter_time": 0.6828666305541992, "loss": 0.48596662282943726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.46849494706556, "step_time": 0.6164332618713378} +{"epoch": 0, "iter": 8714, "iter_tflops": 34.85635164511547, "iter_time": 0.5918890686035155, "loss": 0.38134828209877014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.48286327751531, "step_time": 0.5361111869812012} +{"epoch": 0, "iter": 8715, "iter_tflops": 37.2040580003312, "iter_time": 0.5545387954711914, "loss": 0.5280072093009949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71004492289449, "step_time": 0.5067813987731934} +{"epoch": 0, "iter": 8716, "iter_tflops": 40.184088922669474, "iter_time": 0.5134144897460938, "loss": 0.4842193126678467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.11422472503437, "step_time": 0.46767439842224123} +{"epoch": 0, "iter": 8717, "iter_tflops": 21.13881916840994, "iter_time": 0.9759813613891603, "loss": 0.8219205141067505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.37011311665418, "step_time": 0.922261474609375} +{"epoch": 0, "iter": 8718, "iter_tflops": 9.940281048340784, "iter_time": 2.0755040435791017, "loss": 0.6578869223594666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.087884758807249, "step_time": 1.576350486755371} +{"epoch": 0, "iter": 8719, "iter_tflops": 15.923714998113326, "iter_time": 1.295620620727539, "loss": 0.6231689453125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.6216807705472, "step_time": 1.1707789840698242} +{"epoch": 0, "iter": 8720, "iter_tflops": 43.2186130326214, "iter_time": 0.4773659324645996, "loss": 0.82085120677948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62189940051912, "step_time": 0.4425193691253662} +{"epoch": 0, "iter": 8721, "iter_tflops": 14.636969939187711, "iter_time": 1.0715810546875, "loss": 0.2190735787153244, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.25268171871356, "step_time": 1.0283240661621094} +{"epoch": 0, "iter": 8722, "iter_tflops": 10.23274598987542, "iter_time": 1.5327947845458982, "loss": 0.15493576228618622, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.58553307173929, "step_time": 1.1545148506164553} +{"epoch": 0, "iter": 8723, "iter_tflops": 22.77609581592084, "iter_time": 0.6886474227905274, "loss": 0.36233699321746826, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.644387910501635, "step_time": 0.636441032409668} +{"epoch": 0, "iter": 8724, "iter_tflops": 24.016163566131233, "iter_time": 0.6530893096923828, "loss": 0.15556374192237854, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.668032874491423, "step_time": 0.611059669494629} +{"epoch": 0, "iter": 8725, "iter_tflops": 22.410276276896592, "iter_time": 0.9206086196899412, "loss": 0.037101030349731445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.92164507922329, "step_time": 0.862444595336914} +{"epoch": 0, "iter": 8726, "iter_tflops": 7.738750624791877, "iter_time": 2.6659462890625, "loss": 0.03958697244524956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.429656754802695, "step_time": 2.1878944320678713} +{"epoch": 0, "iter": 8727, "iter_tflops": 16.981716302396496, "iter_time": 1.214900375366211, "loss": 0.06786229461431503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.202118236907918, "step_time": 1.0744175853729248} +{"epoch": 0, "iter": 8728, "iter_tflops": 42.714287025179864, "iter_time": 0.4830021743774414, "loss": 0.04776214808225632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.29596863064877, "step_time": 0.436212516784668} +{"epoch": 0, "iter": 8729, "iter_tflops": 16.378884736199595, "iter_time": 0.9351344451904298, "loss": 0.23851124942302704, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 17.385002104590104, "step_time": 0.8810156707763672} +{"epoch": 0, "iter": 8730, "iter_tflops": 7.054834304715381, "iter_time": 2.1710586853027345, "loss": 0.19170351326465607, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 8.50035764237629, "step_time": 1.8018605728149415} +{"epoch": 0, "iter": 8731, "iter_tflops": 10.008678713631099, "iter_time": 1.5303178100585937, "loss": 0.24062322080135345, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 13.582392792480448, "step_time": 1.1276701774597169} +{"epoch": 0, "iter": 8732, "iter_tflops": 27.87781568627543, "iter_time": 0.5494138946533202, "loss": 0.15464720129966736, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.715005309033636, "step_time": 0.5154452819824219} +{"epoch": 0, "iter": 8733, "iter_tflops": 22.286219125609307, "iter_time": 0.8421452331542969, "loss": 0.48311305046081543, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 23.37548221474811, "step_time": 0.8029025039672852} +{"epoch": 0, "iter": 8734, "iter_tflops": 16.2202828338978, "iter_time": 1.157084213256836, "loss": 0.17466837167739868, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 19.008070803783145, "step_time": 0.9873823280334472} +{"epoch": 0, "iter": 8735, "iter_tflops": 31.447029149311067, "iter_time": 0.5968205490112305, "loss": 0.2655346989631653, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 33.50308201308407, "step_time": 0.5601942291259765} +{"epoch": 0, "iter": 8736, "iter_tflops": 34.29384701037073, "iter_time": 0.547276985168457, "loss": 0.21953704953193665, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 36.44642812581418, "step_time": 0.5149539794921875} +{"epoch": 0, "iter": 8737, "iter_tflops": 29.572088217653494, "iter_time": 0.6976542663574219, "loss": 0.11519424617290497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.545303198794123, "step_time": 0.6540147476196289} +{"epoch": 0, "iter": 8738, "iter_tflops": 15.183082056438984, "iter_time": 1.3588211822509766, "loss": 0.07671918720006943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.08952673491942, "step_time": 1.1404993515014648} +{"epoch": 0, "iter": 8739, "iter_tflops": 38.65234070515707, "iter_time": 0.5337605209350585, "loss": 0.06193988770246506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.5851958138358, "step_time": 0.4844663295745849} +{"epoch": 0, "iter": 8740, "iter_tflops": 43.453159910040775, "iter_time": 0.47478925704956054, "loss": 0.06278255581855774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.93544652304353, "step_time": 0.43039326858520505} +{"epoch": 0, "iter": 8741, "iter_tflops": 33.49945754332413, "iter_time": 0.615863510131836, "loss": 0.18328242003917694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.23611888155563, "step_time": 0.5540613288879395} +{"epoch": 0, "iter": 8742, "iter_tflops": 37.998876914690186, "iter_time": 0.5429395599365234, "loss": 0.11320582032203674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.65859045636536, "step_time": 0.4836327991485596} +{"epoch": 0, "iter": 8743, "iter_tflops": 37.02694285339846, "iter_time": 0.5571913833618164, "loss": 0.10787736624479294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.76711732698184, "step_time": 0.5060719242095947} +{"epoch": 0, "iter": 8744, "iter_tflops": 41.9560941209191, "iter_time": 0.4917305564880371, "loss": 0.1583862453699112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0291527963741, "step_time": 0.4482179718017578} +{"epoch": 0, "iter": 8745, "iter_tflops": 35.40120887983541, "iter_time": 0.5827793502807617, "loss": 0.13750678300857544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.01881770190111, "step_time": 0.5287472743988038} +{"epoch": 0, "iter": 8746, "iter_tflops": 36.926831313639255, "iter_time": 0.5587019729614258, "loss": 0.1116175726056099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.571023421980726, "step_time": 0.49628543663024893} +{"epoch": 0, "iter": 8747, "iter_tflops": 44.54799010673552, "iter_time": 0.4631206359863282, "loss": 0.15518924593925476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.933114878283156, "step_time": 0.4216182346343994} +{"epoch": 0, "iter": 8748, "iter_tflops": 40.021619412969336, "iter_time": 0.5154987182617187, "loss": 0.1350257247686386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76387501875314, "step_time": 0.47141834449768066} +{"epoch": 0, "iter": 8749, "iter_tflops": 25.540613071742236, "iter_time": 0.8077759704589844, "loss": 0.8279673457145691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.418365693657528, "step_time": 0.7259774799346924} +{"epoch": 0, "iter": 8750, "iter_tflops": 36.89425230506093, "iter_time": 0.5591953277587891, "loss": 1.058666467666626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.28761206595907, "step_time": 0.5120952186584473} +{"epoch": 0, "iter": 8751, "iter_tflops": 38.59897219940274, "iter_time": 0.534498519897461, "loss": 0.6094012260437012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.592091330528056, "step_time": 0.49603404998779305} +{"epoch": 0, "iter": 8752, "iter_tflops": 30.759890075809412, "iter_time": 0.6707141494750977, "loss": 0.7411288619041443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.15250144650986, "step_time": 0.6223088035583497} +{"epoch": 0, "iter": 8753, "iter_tflops": 25.50569321350683, "iter_time": 0.8088818969726562, "loss": 0.6042577624320984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.454655214275963, "step_time": 0.7514606666564941} +{"epoch": 0, "iter": 8754, "iter_tflops": 32.94499010924431, "iter_time": 0.6262285537719727, "loss": 0.6757816672325134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.89609321220348, "step_time": 0.5591674270629883} +{"epoch": 0, "iter": 8755, "iter_tflops": 41.67946212298814, "iter_time": 0.49499423599243164, "loss": 0.7128787636756897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.649053697163005, "step_time": 0.451950080871582} +{"epoch": 0, "iter": 8756, "iter_tflops": 39.92305295759159, "iter_time": 0.5167714385986328, "loss": 0.5821056962013245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.661764435419656, "step_time": 0.4725208377838135} +{"epoch": 0, "iter": 8757, "iter_tflops": 16.338771958752307, "iter_time": 0.9574627304077148, "loss": 0.28474465012550354, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 17.462223019065004, "step_time": 0.8958633270263673} +{"epoch": 0, "iter": 8758, "iter_tflops": 9.585030331242441, "iter_time": 1.6321038818359375, "loss": 0.22316212952136993, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 12.680336153424443, "step_time": 1.2337027206420899} +{"epoch": 0, "iter": 8759, "iter_tflops": 18.507501929778076, "iter_time": 0.8452661666870118, "loss": 0.1704566329717636, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 21.730986093516517, "step_time": 0.7198828964233398} +{"epoch": 0, "iter": 8760, "iter_tflops": 27.608580118070055, "iter_time": 0.566626937866211, "loss": 0.3627735674381256, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 29.53951893234535, "step_time": 0.5295876770019532} +{"epoch": 0, "iter": 8761, "iter_tflops": 17.887173313789237, "iter_time": 0.8448538055419922, "loss": 0.21745778620243073, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 18.85118948659792, "step_time": 0.8016494903564454} +{"epoch": 0, "iter": 8762, "iter_tflops": 19.514526206972562, "iter_time": 0.7743998641967774, "loss": 0.13379670679569244, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 21.527573995322964, "step_time": 0.7019855766296387} +{"epoch": 0, "iter": 8763, "iter_tflops": 27.52590563804983, "iter_time": 0.5490117797851563, "loss": 0.20692172646522522, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 29.330439779230947, "step_time": 0.5152342262268066} +{"epoch": 0, "iter": 8764, "iter_tflops": 26.459752597610134, "iter_time": 0.5711333236694336, "loss": 0.19416190683841705, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.18837266794381, "step_time": 0.5361092185974121} +{"epoch": 0, "iter": 8765, "iter_tflops": 37.79019603703881, "iter_time": 0.5459377212524413, "loss": 0.7626522779464722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.679444787071034, "step_time": 0.5071626129150391} +{"epoch": 0, "iter": 8766, "iter_tflops": 30.422842782361617, "iter_time": 0.6781448287963867, "loss": 0.6688478589057922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31950078755392, "step_time": 0.5247038516998291} +{"epoch": 0, "iter": 8767, "iter_tflops": 34.60718464628702, "iter_time": 0.5961505889892579, "loss": 0.7797520160675049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.64650282788271, "step_time": 0.5480215148925781} +{"epoch": 0, "iter": 8768, "iter_tflops": 34.55510565570369, "iter_time": 0.5970490646362305, "loss": 0.643088161945343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.56850467747804, "step_time": 0.549159294128418} +{"epoch": 0, "iter": 8769, "iter_tflops": 14.471368167719463, "iter_time": 0.9006593856811523, "loss": 0.07121451944112778, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 15.53513032532462, "step_time": 0.838987075805664} +{"epoch": 0, "iter": 8770, "iter_tflops": 18.501861815524087, "iter_time": 0.704457405090332, "loss": 0.04491569474339485, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 21.06582783165179, "step_time": 0.6187164192199707} +{"epoch": 0, "iter": 8771, "iter_tflops": 27.42645260929556, "iter_time": 0.4752263717651367, "loss": 0.0521404929459095, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 30.404276072127473, "step_time": 0.42868225288391104} +{"epoch": 0, "iter": 8772, "iter_tflops": 27.138590553302926, "iter_time": 0.48026715087890626, "loss": 0.04725940153002739, "lr": 3e-05, "seqlen": 5232.0, "step_tflops": 29.987657794188806, "step_time": 0.4346379318237305} +{"epoch": 0, "iter": 8773, "iter_tflops": 34.243755447294404, "iter_time": 0.6024775390624999, "loss": 0.7915088534355164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.76814307014618, "step_time": 0.5462564964294433} +{"epoch": 0, "iter": 8774, "iter_tflops": 37.17140565197363, "iter_time": 0.5550259170532227, "loss": 0.7928312420845032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.55654241692572, "step_time": 0.5086995162963868} +{"epoch": 0, "iter": 8775, "iter_tflops": 40.958603930385024, "iter_time": 0.5037059745788575, "loss": 0.727097749710083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48989912847272, "step_time": 0.4637253379821778} +{"epoch": 0, "iter": 8776, "iter_tflops": 40.211680191386776, "iter_time": 0.5130622100830079, "loss": 0.6608268618583679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.67357904470832, "step_time": 0.47239301109313964} +{"epoch": 0, "iter": 8777, "iter_tflops": 37.99457955053049, "iter_time": 0.5430009689331055, "loss": 0.02295445092022419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.595356152544355, "step_time": 0.48435076904296875} +{"epoch": 0, "iter": 8778, "iter_tflops": 40.43607340082763, "iter_time": 0.510215057373047, "loss": 0.005919284652918577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8047957233349, "step_time": 0.4604661884307862} +{"epoch": 0, "iter": 8779, "iter_tflops": 44.67593281391074, "iter_time": 0.46179435348510744, "loss": 0.0006509613012894988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.66111858624994, "step_time": 0.41543755149841305} +{"epoch": 0, "iter": 8780, "iter_tflops": 49.34772916339312, "iter_time": 0.4180758438110352, "loss": 0.013518750667572021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.48068171402879, "step_time": 0.3786864051818848} +{"epoch": 0, "iter": 8781, "iter_tflops": 24.449696116082617, "iter_time": 0.8438179931640626, "loss": 0.40278491377830505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.18770862504804, "step_time": 0.7878159103393554} +{"epoch": 0, "iter": 8782, "iter_tflops": 22.210824872811028, "iter_time": 0.9288756103515625, "loss": 0.46269071102142334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.260088681608167, "step_time": 0.7856444721221925} +{"epoch": 0, "iter": 8783, "iter_tflops": 49.452704862822024, "iter_time": 0.4171883735656738, "loss": 0.40592560172080994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.837519629664115, "step_time": 0.38321032714843745} +{"epoch": 0, "iter": 8784, "iter_tflops": 46.75499866476141, "iter_time": 0.4412596321105956, "loss": 0.40226250886917114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.34343651008157, "step_time": 0.4098070163726807} +{"epoch": 0, "iter": 8785, "iter_tflops": 39.52616996424444, "iter_time": 0.5219603500366211, "loss": 1.0176997184753418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19753734431352, "step_time": 0.47759883499145506} +{"epoch": 0, "iter": 8786, "iter_tflops": 43.352103584054625, "iter_time": 0.47589601898193357, "loss": 0.6694176197052002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.78887738725851, "step_time": 0.4409401264190673} +{"epoch": 0, "iter": 8787, "iter_tflops": 41.93360816215208, "iter_time": 0.49199423599243164, "loss": 0.6655079126358032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.16867861672401, "step_time": 0.45675663185119625} +{"epoch": 0, "iter": 8788, "iter_tflops": 42.22311234453511, "iter_time": 0.4886208610534668, "loss": 0.9076728224754333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38536989702969, "step_time": 0.4545758590698242} +{"epoch": 0, "iter": 8789, "iter_tflops": 27.97394854908397, "iter_time": 0.5183220977783204, "loss": 0.011505591683089733, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 30.361271298786658, "step_time": 0.47756615829467775} +{"epoch": 0, "iter": 8790, "iter_tflops": 7.209181068272186, "iter_time": 2.011256973266602, "loss": 0.005682147573679686, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 8.678663089638706, "step_time": 1.6707084426879881} +{"epoch": 0, "iter": 8791, "iter_tflops": 7.662005323846762, "iter_time": 1.8923917541503907, "loss": 0.0030748145654797554, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 8.896281218449374, "step_time": 1.6298400802612305} +{"epoch": 0, "iter": 8792, "iter_tflops": 17.401820506276223, "iter_time": 0.8332183227539062, "loss": 0.010436139069497585, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 19.958151367481175, "step_time": 0.7264959278106689} +{"epoch": 0, "iter": 8793, "iter_tflops": 21.976603376946123, "iter_time": 0.7025272903442383, "loss": 0.2270747274160385, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.730229046022224, "step_time": 0.6506116561889649} +{"epoch": 0, "iter": 8794, "iter_tflops": 8.461915327948457, "iter_time": 1.8245471649169922, "loss": 0.2693583071231842, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 10.254093986755027, "step_time": 1.5056584854125976} +{"epoch": 0, "iter": 8795, "iter_tflops": 27.44090614211828, "iter_time": 0.5626331558227539, "loss": 0.23899558186531067, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.262329855867225, "step_time": 0.5276122474670409} +{"epoch": 0, "iter": 8796, "iter_tflops": 28.22215149883057, "iter_time": 0.5470583496093749, "loss": 0.30371031165122986, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.973691939681625, "step_time": 0.5150904884338379} +{"epoch": 0, "iter": 8797, "iter_tflops": 27.494038963595298, "iter_time": 0.7503842391967773, "loss": 0.0325830802321434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.002723184754817, "step_time": 0.7113502197265625} +{"epoch": 0, "iter": 8798, "iter_tflops": 13.850686603199392, "iter_time": 1.4895357971191405, "loss": 0.031180350109934807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.143017840743433, "step_time": 1.278019618988037} +{"epoch": 0, "iter": 8799, "iter_tflops": 51.04069702856756, "iter_time": 0.4042086944580078, "loss": 0.041272636502981186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.80366839013865, "step_time": 0.36970855331420893} +{"epoch": 0, "iter": 8800, "iter_tflops": 50.72809897712426, "iter_time": 0.406699520111084, "loss": 0.04862884059548378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.27177138246206, "step_time": 0.3732663707733154} +{"epoch": 0, "iter": 8801, "iter_tflops": 25.16798938897928, "iter_time": 0.819735466003418, "loss": 0.20534150302410126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.54087652914149, "step_time": 0.7773327865600586} +{"epoch": 0, "iter": 8802, "iter_tflops": 15.19510122817086, "iter_time": 1.357746368408203, "loss": 0.15346026420593262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.404911293690045, "step_time": 1.0631892719268798} +{"epoch": 0, "iter": 8803, "iter_tflops": 52.09629856735774, "iter_time": 0.39601841354370126, "loss": 0.16039341688156128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.66428760252895, "step_time": 0.3640934066772461} +{"epoch": 0, "iter": 8804, "iter_tflops": 52.42047696930024, "iter_time": 0.3935693588256836, "loss": 0.23076486587524414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.012338895812874, "step_time": 0.36187067413330076} +{"epoch": 0, "iter": 8805, "iter_tflops": 32.997733156568216, "iter_time": 0.6252276000976563, "loss": 0.3116579055786133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.445280185891136, "step_time": 0.582054744720459} +{"epoch": 0, "iter": 8806, "iter_tflops": 10.985568274520112, "iter_time": 1.8780178680419923, "loss": 0.2616477608680725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.860508153291608, "step_time": 1.3883168258666991} +{"epoch": 0, "iter": 8807, "iter_tflops": 11.658110150007857, "iter_time": 1.7696773529052736, "loss": 0.35980480909347534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.884943759242084, "step_time": 1.6011783905029298} +{"epoch": 0, "iter": 8808, "iter_tflops": 23.469649817629612, "iter_time": 0.8790541687011718, "loss": 0.24434247612953186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.54215616770875, "step_time": 0.597272891998291} +{"epoch": 0, "iter": 8809, "iter_tflops": 13.956601821749041, "iter_time": 0.9775844039916991, "loss": 0.24914294481277466, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 14.612447542032083, "step_time": 0.9337078018188476} +{"epoch": 0, "iter": 8810, "iter_tflops": 11.26157419089984, "iter_time": 1.2115318908691408, "loss": 0.2912779152393341, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 14.654175072383033, "step_time": 0.931049083709717} +{"epoch": 0, "iter": 8811, "iter_tflops": 24.357517561729153, "iter_time": 0.5601455993652343, "loss": 0.273728609085083, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 25.99273703845168, "step_time": 0.5249064865112305} +{"epoch": 0, "iter": 8812, "iter_tflops": 24.767598717525544, "iter_time": 0.5508711776733398, "loss": 0.2573954463005066, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 26.35059715458411, "step_time": 0.5177778778076172} +{"epoch": 0, "iter": 8813, "iter_tflops": 42.246099687626796, "iter_time": 0.4883549880981445, "loss": 0.7423804402351379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89555091210838, "step_time": 0.4495227336883545} +{"epoch": 0, "iter": 8814, "iter_tflops": 35.3038416885024, "iter_time": 0.5843866424560547, "loss": 0.5297262072563171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.87904850207856, "step_time": 0.5306481075286866} +{"epoch": 0, "iter": 8815, "iter_tflops": 36.68308303655914, "iter_time": 0.5624143829345702, "loss": 0.5963857769966125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8991439898526, "step_time": 0.5170811061859132} +{"epoch": 0, "iter": 8816, "iter_tflops": 39.0080625332019, "iter_time": 0.5288930587768556, "loss": 0.45753511786460876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.39329064523981, "step_time": 0.48665940284729} +{"epoch": 0, "iter": 8817, "iter_tflops": 20.83807987839656, "iter_time": 0.9900669174194336, "loss": 0.7278157472610474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.457220901052626, "step_time": 0.9186841773986815} +{"epoch": 0, "iter": 8818, "iter_tflops": 18.788954836793568, "iter_time": 1.0980437011718749, "loss": 0.7138680219650269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.579610226428798, "step_time": 0.8065444831848144} +{"epoch": 0, "iter": 8819, "iter_tflops": 35.612130388607675, "iter_time": 0.5793276977539064, "loss": 0.788050651550293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75153782287972, "step_time": 0.532394187927246} +{"epoch": 0, "iter": 8820, "iter_tflops": 36.53998744067847, "iter_time": 0.5646168746948242, "loss": 1.0423119068145752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7053705283368, "step_time": 0.5196046085357666} +{"epoch": 0, "iter": 8821, "iter_tflops": 12.489499997243103, "iter_time": 1.1740064392089844, "loss": 0.025677073746919632, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.258563949239877, "step_time": 1.1059081115722658} +{"epoch": 0, "iter": 8822, "iter_tflops": 11.79390855754126, "iter_time": 1.243248016357422, "loss": 0.02404608763754368, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 14.217536595392689, "step_time": 1.031314624786377} +{"epoch": 0, "iter": 8823, "iter_tflops": 38.316686689581275, "iter_time": 0.3826727905273437, "loss": 0.03800332546234131, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 41.956213434629326, "step_time": 0.34947751998901366} +{"epoch": 0, "iter": 8824, "iter_tflops": 39.12751465507744, "iter_time": 0.3747427749633789, "loss": 0.03418954461812973, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 42.91845325525149, "step_time": 0.341642168045044} +{"epoch": 0, "iter": 8825, "iter_tflops": 27.929409178949122, "iter_time": 0.7386870727539063, "loss": 0.20800557732582092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.547587192217854, "step_time": 0.6982327651977539} +{"epoch": 0, "iter": 8826, "iter_tflops": 11.741012665271848, "iter_time": 1.7571817779541015, "loss": 0.23540528118610382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.050505891941523, "step_time": 1.285385871887207} +{"epoch": 0, "iter": 8827, "iter_tflops": 49.93860144371699, "iter_time": 0.4131291809082031, "loss": 0.284366250038147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22338459912834, "step_time": 0.3804833221435547} +{"epoch": 0, "iter": 8828, "iter_tflops": 50.843049441702576, "iter_time": 0.40578001785278317, "loss": 0.22608691453933716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.093901980173804, "step_time": 0.37447145271301263} +{"epoch": 0, "iter": 8829, "iter_tflops": 26.589597496956515, "iter_time": 0.7759084548950196, "loss": 0.6344082355499268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.11063050951068, "step_time": 0.7339249649047851} +{"epoch": 0, "iter": 8830, "iter_tflops": 14.965921036517798, "iter_time": 1.3785381774902346, "loss": 0.8036224842071533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.91388663354401, "step_time": 1.2197724838256836} +{"epoch": 0, "iter": 8831, "iter_tflops": 36.34790197693422, "iter_time": 0.5676006698608399, "loss": 0.9706020355224609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84353605073257, "step_time": 0.5178027744293213} +{"epoch": 0, "iter": 8832, "iter_tflops": 38.42496840586656, "iter_time": 0.5369189453125001, "loss": 0.8082613945007324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01886071623115, "step_time": 0.49099602317810054} +{"epoch": 0, "iter": 8833, "iter_tflops": 14.634067909630994, "iter_time": 1.206438537597656, "loss": 0.0317760668694973, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 15.573949211936476, "step_time": 1.1336304779052735} +{"epoch": 0, "iter": 8834, "iter_tflops": 18.00274552524363, "iter_time": 0.9806894989013671, "loss": 0.02034624107182026, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 20.47416454273852, "step_time": 0.8623113021850585} +{"epoch": 0, "iter": 8835, "iter_tflops": 36.93246240013948, "iter_time": 0.478037540435791, "loss": 0.03632434085011482, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 40.639173289703294, "step_time": 0.43443559646606444} +{"epoch": 0, "iter": 8836, "iter_tflops": 34.19614907741712, "iter_time": 0.5162892303466797, "loss": 0.07931307703256607, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 37.605772534941195, "step_time": 0.4694785480499267} +{"epoch": 0, "iter": 8837, "iter_tflops": 18.26849038393773, "iter_time": 1.129326675415039, "loss": 0.611369788646698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.57071725351173, "step_time": 1.0541817779541016} +{"epoch": 0, "iter": 8838, "iter_tflops": 22.044676430626513, "iter_time": 0.9358764495849609, "loss": 0.9133425951004028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.45233707184472, "step_time": 0.7799346218109131} +{"epoch": 0, "iter": 8839, "iter_tflops": 44.04960389181468, "iter_time": 0.4683604774475098, "loss": 0.6662294864654541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44621585314889, "step_time": 0.4348311691284179} +{"epoch": 0, "iter": 8840, "iter_tflops": 43.01726874256282, "iter_time": 0.4796002655029297, "loss": 0.9017345309257507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.202221913892046, "step_time": 0.446538990020752} +{"epoch": 0, "iter": 8841, "iter_tflops": 36.12669053286287, "iter_time": 0.5710762100219727, "loss": 0.5022295117378235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.866606016303834, "step_time": 0.530817985534668} +{"epoch": 0, "iter": 8842, "iter_tflops": 10.044061456783911, "iter_time": 2.054058868408203, "loss": 0.41669952869415283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.866357048523428, "step_time": 1.7386206588745117} +{"epoch": 0, "iter": 8843, "iter_tflops": 11.66388166955595, "iter_time": 1.7688016815185545, "loss": 0.4962984025478363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.174299485134348, "step_time": 1.455528263092041} +{"epoch": 0, "iter": 8844, "iter_tflops": 39.66700343836697, "iter_time": 0.5201071853637694, "loss": 0.6241058707237244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50190765441844, "step_time": 0.47425721359252926} +{"epoch": 0, "iter": 8845, "iter_tflops": 10.154138930814975, "iter_time": 1.4158893280029297, "loss": 0.18268118798732758, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 10.886509981651955, "step_time": 1.3206378326416017} +{"epoch": 0, "iter": 8846, "iter_tflops": 14.267240488236013, "iter_time": 1.0077027130126952, "loss": 0.2370709478855133, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 16.563506833108953, "step_time": 0.8680007858276367} +{"epoch": 0, "iter": 8847, "iter_tflops": 25.791937129992476, "iter_time": 0.5574275741577148, "loss": 0.1465253084897995, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.49664937596123, "step_time": 0.5228686866760254} +{"epoch": 0, "iter": 8848, "iter_tflops": 24.644876171200437, "iter_time": 0.5833722534179687, "loss": 0.1280573606491089, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 26.20561162018926, "step_time": 0.5486281776428222} +{"epoch": 0, "iter": 8849, "iter_tflops": 46.29679225023687, "iter_time": 0.4089577903747559, "loss": 0.07534904032945633, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 51.29196406372598, "step_time": 0.36913060760498045} +{"epoch": 0, "iter": 8850, "iter_tflops": 47.93908710000109, "iter_time": 0.3949477348327637, "loss": 0.03298395872116089, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 52.52168093745472, "step_time": 0.360487964630127} +{"epoch": 0, "iter": 8851, "iter_tflops": 49.58805012083783, "iter_time": 0.38181444549560556, "loss": 0.04595106095075607, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 54.175631681919235, "step_time": 0.34948247528076176} +{"epoch": 0, "iter": 8852, "iter_tflops": 51.79040295172006, "iter_time": 0.3655780372619629, "loss": 0.03759557008743286, "lr": 3e-05, "seqlen": 7536.0, "step_tflops": 56.63201725020444, "step_time": 0.3343238468170166} +{"epoch": 0, "iter": 8853, "iter_tflops": 46.97343607546901, "iter_time": 0.4392076721191407, "loss": 0.021150166168808937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81372460251446, "step_time": 0.39817815971374515} +{"epoch": 0, "iter": 8854, "iter_tflops": 48.31805265234051, "iter_time": 0.42698520278930663, "loss": 0.0170912928879261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.1753051396692, "step_time": 0.3879826068878174} +{"epoch": 0, "iter": 8855, "iter_tflops": 55.33860529280233, "iter_time": 0.37281556701660157, "loss": 0.03830907493829727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.276914229158855, "step_time": 0.3422718925476074} +{"epoch": 0, "iter": 8856, "iter_tflops": 54.330060977627056, "iter_time": 0.3797362480163574, "loss": 0.024084316566586494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.88722574069591, "step_time": 0.3503492183685303} +{"epoch": 0, "iter": 8857, "iter_tflops": 19.658364426438617, "iter_time": 1.049481689453125, "loss": 0.6878623366355896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.413172397910483, "step_time": 1.0106755142211914} +{"epoch": 0, "iter": 8858, "iter_tflops": 16.802066069089168, "iter_time": 1.2278902740478517, "loss": 0.5132215023040771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.336703161147433, "step_time": 1.014475814819336} +{"epoch": 0, "iter": 8859, "iter_tflops": 41.42183927557876, "iter_time": 0.4980728492736817, "loss": 0.5021868944168091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.18880668527148, "step_time": 0.4565531826019288} +{"epoch": 0, "iter": 8860, "iter_tflops": 38.594576597904684, "iter_time": 0.5345593948364258, "loss": 0.6322619318962097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08789758018706, "step_time": 0.49019064140319824} +{"epoch": 0, "iter": 8861, "iter_tflops": 21.958072259365835, "iter_time": 0.9395676116943359, "loss": 0.6621028780937195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.396534345672382, "step_time": 0.8818012619018555} +{"epoch": 0, "iter": 8862, "iter_tflops": 9.178955104093614, "iter_time": 2.247651641845703, "loss": 0.9737400412559509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.483122879849708, "step_time": 1.9680293502807618} +{"epoch": 0, "iter": 8863, "iter_tflops": 10.77558065400109, "iter_time": 1.914615478515625, "loss": 0.6923568248748779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.663700860373545, "step_time": 1.509919876098633} +{"epoch": 0, "iter": 8864, "iter_tflops": 38.119203547133274, "iter_time": 0.5412257232666016, "loss": 0.7891179919242859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.731743639004, "step_time": 0.49437410736083987} +{"epoch": 0, "iter": 8865, "iter_tflops": 12.094663552145672, "iter_time": 1.1819737396240235, "loss": 0.11084026098251343, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.036135634730309, "step_time": 1.0966113815307617} +{"epoch": 0, "iter": 8866, "iter_tflops": 9.366084688261418, "iter_time": 1.5263127746582033, "loss": 0.21430453658103943, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.035013554722136, "step_time": 1.0967057800292968} +{"epoch": 0, "iter": 8867, "iter_tflops": 25.953918658334054, "iter_time": 0.550806022644043, "loss": 0.2270270735025406, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.681720054598415, "step_time": 0.5164265327453614} +{"epoch": 0, "iter": 8868, "iter_tflops": 25.59368509506669, "iter_time": 0.5585586700439453, "loss": 0.3044724464416504, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.175204966811155, "step_time": 0.5260521392822266} +{"epoch": 0, "iter": 8869, "iter_tflops": 40.94076893715447, "iter_time": 0.5039254035949707, "loss": 0.7951543927192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.50543686293916, "step_time": 0.4635634422302246} +{"epoch": 0, "iter": 8870, "iter_tflops": 42.414871471527334, "iter_time": 0.48641178894042975, "loss": 0.5365405678749084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17710759869464, "step_time": 0.44678184890747075} +{"epoch": 0, "iter": 8871, "iter_tflops": 46.55173879626549, "iter_time": 0.44318631362915045, "loss": 0.8422900438308716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.116582345135505, "step_time": 0.41166201972961425} +{"epoch": 0, "iter": 8872, "iter_tflops": 49.22797602537268, "iter_time": 0.41909286499023435, "loss": 0.7392598986625671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05809821134155, "step_time": 0.3888396720886231} +{"epoch": 0, "iter": 8873, "iter_tflops": 38.12405265296037, "iter_time": 0.5411568832397461, "loss": 0.9407387971878052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.014341141783106, "step_time": 0.5030214538574218} +{"epoch": 0, "iter": 8874, "iter_tflops": 15.243992666635675, "iter_time": 1.3533917236328123, "loss": 0.835361897945404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.13787656828746, "step_time": 0.9760248832702636} +{"epoch": 0, "iter": 8875, "iter_tflops": 40.259830520252365, "iter_time": 0.5124485931396485, "loss": 1.0001792907714844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96222773045381, "step_time": 0.46929135704040537} +{"epoch": 0, "iter": 8876, "iter_tflops": 41.25350242027976, "iter_time": 0.5001052589416504, "loss": 0.8307050466537476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.707381010250565, "step_time": 0.4614695167541504} +{"epoch": 0, "iter": 8877, "iter_tflops": 23.354386807626348, "iter_time": 0.8142394256591796, "loss": 0.2104828953742981, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 25.55258852118234, "step_time": 0.7441931953430176} +{"epoch": 0, "iter": 8878, "iter_tflops": 34.06031663674309, "iter_time": 0.5583055114746094, "loss": 0.3305307626724243, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 36.35371509133815, "step_time": 0.5230844345092773} +{"epoch": 0, "iter": 8879, "iter_tflops": 32.14526363477697, "iter_time": 0.5915665435791017, "loss": 0.18429668247699738, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 34.1135869651233, "step_time": 0.5574336853027343} +{"epoch": 0, "iter": 8880, "iter_tflops": 34.18916733368714, "iter_time": 0.5562013931274414, "loss": 0.412887841463089, "lr": 3e-05, "seqlen": 7568.0, "step_tflops": 36.320045729404676, "step_time": 0.5235693435668946} +{"epoch": 0, "iter": 8881, "iter_tflops": 33.65432232307425, "iter_time": 0.6130295333862306, "loss": 0.8473145961761475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.39263837842406, "step_time": 0.5669029350280762} +{"epoch": 0, "iter": 8882, "iter_tflops": 43.19094518813457, "iter_time": 0.4776717300415039, "loss": 0.7598960399627686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54444615964578, "step_time": 0.4432557525634766} +{"epoch": 0, "iter": 8883, "iter_tflops": 44.61181839369471, "iter_time": 0.46245802688598636, "loss": 0.8265383839607239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.790326601320864, "step_time": 0.43170019912719726} +{"epoch": 0, "iter": 8884, "iter_tflops": 40.20626639090308, "iter_time": 0.5131312942504883, "loss": 0.8501202464103699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.038602789216796, "step_time": 0.4793625297546386} +{"epoch": 0, "iter": 8885, "iter_tflops": 32.18793949833797, "iter_time": 0.640957260131836, "loss": 0.6534897089004517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.410298761486395, "step_time": 0.5995615921020507} +{"epoch": 0, "iter": 8886, "iter_tflops": 16.59510298265967, "iter_time": 1.2432037048339843, "loss": 0.6251497268676758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.035594535123977, "step_time": 1.0297220516204835} +{"epoch": 0, "iter": 8887, "iter_tflops": 48.29170723626885, "iter_time": 0.42721814346313475, "loss": 0.6221728324890137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.301276346769775, "step_time": 0.39446634864807123} +{"epoch": 0, "iter": 8888, "iter_tflops": 46.22966489266342, "iter_time": 0.44627391433715824, "loss": 0.6166843175888062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75954363757731, "step_time": 0.4146158103942871} +{"epoch": 0, "iter": 8889, "iter_tflops": 47.62838625724104, "iter_time": 0.4331680145263671, "loss": 0.004264398477971554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21569434447466, "step_time": 0.39511288261413574} +{"epoch": 0, "iter": 8890, "iter_tflops": 12.536574903681945, "iter_time": 1.6456722564697264, "loss": 0.010469187051057816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.885521092828235, "step_time": 1.4857990112304686} +{"epoch": 0, "iter": 8891, "iter_tflops": 10.79999722891527, "iter_time": 1.9102869262695314, "loss": 0.0052470918744802475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.253632679391508, "step_time": 1.5566368865966798} +{"epoch": 0, "iter": 8892, "iter_tflops": 14.930516540277996, "iter_time": 1.381807083129883, "loss": 0.011434380896389484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.347449909257683, "step_time": 1.066346914291382} +{"epoch": 0, "iter": 8893, "iter_tflops": 19.816306546365166, "iter_time": 0.731696174621582, "loss": 0.22972749173641205, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 21.536398764552445, "step_time": 0.6732562789916992} +{"epoch": 0, "iter": 8894, "iter_tflops": 22.391894011094568, "iter_time": 0.6475341339111329, "loss": 0.3103257119655609, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.106107040567007, "step_time": 0.6014872360229492} +{"epoch": 0, "iter": 8895, "iter_tflops": 22.770610071797133, "iter_time": 0.6367644805908204, "loss": 0.3929765224456787, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.4434328432233, "step_time": 0.5931865539550781} +{"epoch": 0, "iter": 8896, "iter_tflops": 23.19029350876681, "iter_time": 0.6252407150268554, "loss": 0.3358563184738159, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.90115311849354, "step_time": 0.5822829017639161} +{"epoch": 0, "iter": 8897, "iter_tflops": 12.72053107398613, "iter_time": 1.2781131286621095, "loss": 0.023770704865455627, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 13.543653194354205, "step_time": 1.2004351806640625} +{"epoch": 0, "iter": 8898, "iter_tflops": 16.73541613804168, "iter_time": 0.9714893035888671, "loss": 0.03764386847615242, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 20.221432314972976, "step_time": 0.8040121746063231} +{"epoch": 0, "iter": 8899, "iter_tflops": 40.60031906263741, "iter_time": 0.4004470443725585, "loss": 0.07579998672008514, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 44.1352180159555, "step_time": 0.3683742485046387} +{"epoch": 0, "iter": 8900, "iter_tflops": 43.06979006126959, "iter_time": 0.37748681259155276, "loss": 0.031248733401298523, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 46.99584755975772, "step_time": 0.34595136833190915} +{"epoch": 0, "iter": 8901, "iter_tflops": 32.57381283173178, "iter_time": 0.6333644027709961, "loss": 0.7929166555404663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.791829867354735, "step_time": 0.5929867324829102} +{"epoch": 0, "iter": 8902, "iter_tflops": 11.46627179336899, "iter_time": 1.7992852325439455, "loss": 0.9345871806144714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.203357064976572, "step_time": 1.357009075164795} +{"epoch": 0, "iter": 8903, "iter_tflops": 36.15811411951127, "iter_time": 0.5705799102783203, "loss": 0.7921305298805237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.40924818928987, "step_time": 0.5235089340209961} +{"epoch": 0, "iter": 8904, "iter_tflops": 42.48950121622352, "iter_time": 0.4855574417114258, "loss": 0.8200567364692688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22186484625188, "step_time": 0.4463492240905762} +{"epoch": 0, "iter": 8905, "iter_tflops": 16.52063434780935, "iter_time": 1.2488075866699218, "loss": 0.34424084424972534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.426267762861485, "step_time": 1.1839077529907227} +{"epoch": 0, "iter": 8906, "iter_tflops": 17.78121366785353, "iter_time": 1.1602747650146483, "loss": 0.2775935232639313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.372078853661023, "step_time": 0.9653292808532716} +{"epoch": 0, "iter": 8907, "iter_tflops": 46.28651133612091, "iter_time": 0.44572582626342777, "loss": 0.2571536600589752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03247273208183, "step_time": 0.41235406494140625} +{"epoch": 0, "iter": 8908, "iter_tflops": 46.87034835731488, "iter_time": 0.4401736755371094, "loss": 0.23507800698280334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.75004076044034, "step_time": 0.4065236835479736} +{"epoch": 0, "iter": 8909, "iter_tflops": 31.118069198469346, "iter_time": 0.662994010925293, "loss": 0.757789134979248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.195059455308474, "step_time": 0.6215109672546386} +{"epoch": 0, "iter": 8910, "iter_tflops": 11.3945727723413, "iter_time": 1.8106070251464843, "loss": 0.8700531721115112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.087253639282167, "step_time": 1.464522045135498} +{"epoch": 0, "iter": 8911, "iter_tflops": 36.16353475349251, "iter_time": 0.570494384765625, "loss": 0.6999270915985107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41056675946923, "step_time": 0.5234914188385009} +{"epoch": 0, "iter": 8912, "iter_tflops": 39.13315833697767, "iter_time": 0.5272023620605468, "loss": 0.8300412893295288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46772518319131, "step_time": 0.48580641937255864} +{"epoch": 0, "iter": 8913, "iter_tflops": 32.08757191408643, "iter_time": 0.6429621276855468, "loss": 0.25962400436401367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.588183801032315, "step_time": 0.5797175159454346} +{"epoch": 0, "iter": 8914, "iter_tflops": 46.62070880285934, "iter_time": 0.44253067016601566, "loss": 0.3382706344127655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.509651400967606, "step_time": 0.4005286960601806} +{"epoch": 0, "iter": 8915, "iter_tflops": 50.82338283385539, "iter_time": 0.4059370384216308, "loss": 0.2558314800262451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.14527339110275, "step_time": 0.37412260818481446} +{"epoch": 0, "iter": 8916, "iter_tflops": 47.819498081264086, "iter_time": 0.4314368476867676, "loss": 0.27820608019828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.492670709567555, "step_time": 0.40066077804565425} +{"epoch": 0, "iter": 8917, "iter_tflops": 26.420294634669567, "iter_time": 0.7808805236816405, "loss": 0.2648797929286957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.802460148803476, "step_time": 0.7420599975585938} +{"epoch": 0, "iter": 8918, "iter_tflops": 12.636644573719844, "iter_time": 1.6326401672363284, "loss": 0.3271106481552124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.806571838188102, "step_time": 1.3052225189208984} +{"epoch": 0, "iter": 8919, "iter_tflops": 38.519506578715664, "iter_time": 0.535601188659668, "loss": 0.342325896024704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.39639373609027, "step_time": 0.48662378311157223} +{"epoch": 0, "iter": 8920, "iter_tflops": 38.88981513165386, "iter_time": 0.5305011978149414, "loss": 0.24735882878303528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49047329710755, "step_time": 0.48554633331298824} +{"epoch": 0, "iter": 8921, "iter_tflops": 16.094338718630592, "iter_time": 1.2818851318359374, "loss": 0.9995394945144653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.05338275027196, "step_time": 1.2097947845458985} +{"epoch": 0, "iter": 8922, "iter_tflops": 19.431936426010857, "iter_time": 1.061710632324219, "loss": 0.8091145157814026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.406134739771957, "step_time": 0.7812992591857909} +{"epoch": 0, "iter": 8923, "iter_tflops": 39.69569702994371, "iter_time": 0.5197312316894531, "loss": 0.7495642900466919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.763202778167226, "step_time": 0.4824496803283692} +{"epoch": 0, "iter": 8924, "iter_tflops": 43.34675444010818, "iter_time": 0.4759547462463379, "loss": 0.7758730053901672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.97634925758264, "step_time": 0.4391804351806641} +{"epoch": 0, "iter": 8925, "iter_tflops": 14.491628813232234, "iter_time": 1.4236559448242188, "loss": 0.556591272354126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.301711436512662, "step_time": 1.3482866668701172} +{"epoch": 0, "iter": 8926, "iter_tflops": 24.53866824485565, "iter_time": 0.8407584838867188, "loss": 0.4819222390651703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.621879246417077, "step_time": 0.6737370147705078} +{"epoch": 0, "iter": 8927, "iter_tflops": 39.52937036100353, "iter_time": 0.5219180908203125, "loss": 0.4406836926937103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23310409067355, "step_time": 0.4772059268951416} +{"epoch": 0, "iter": 8928, "iter_tflops": 42.13346596330441, "iter_time": 0.4896604881286621, "loss": 0.5436153411865234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85228704003418, "step_time": 0.44994688034057617} +{"epoch": 0, "iter": 8929, "iter_tflops": 8.862959167347244, "iter_time": 1.1555426788330079, "loss": 0.058096423745155334, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 9.618530854384908, "step_time": 1.06477046585083} +{"epoch": 0, "iter": 8930, "iter_tflops": 9.779444926081958, "iter_time": 1.0472503967285156, "loss": 0.02621297538280487, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 11.962898945820529, "step_time": 0.8561075057983397} +{"epoch": 0, "iter": 8931, "iter_tflops": 22.56688104221542, "iter_time": 0.4538299980163575, "loss": 0.03216533735394478, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 24.902354551339457, "step_time": 0.4112674388885498} +{"epoch": 0, "iter": 8932, "iter_tflops": 22.590764410979656, "iter_time": 0.4533502006530762, "loss": 0.016823841258883476, "lr": 3e-05, "seqlen": 4128.0, "step_tflops": 24.98276095532327, "step_time": 0.40994378471374515} +{"epoch": 0, "iter": 8933, "iter_tflops": 22.0036635134701, "iter_time": 0.9376208419799803, "loss": 0.7718100547790527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.408789486025913, "step_time": 0.8813396148681641} +{"epoch": 0, "iter": 8934, "iter_tflops": 20.44655474267657, "iter_time": 1.0090254211425782, "loss": 0.9335539937019348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.084819339409137, "step_time": 0.8224533424377442} +{"epoch": 0, "iter": 8935, "iter_tflops": 46.220260280199106, "iter_time": 0.44636471939086914, "loss": 0.7547134160995483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06516515430334, "step_time": 0.4120847988128662} +{"epoch": 0, "iter": 8936, "iter_tflops": 43.647727893589526, "iter_time": 0.472672794342041, "loss": 0.751848578453064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98006857795548, "step_time": 0.43914566612243655} +{"epoch": 0, "iter": 8937, "iter_tflops": 32.822620213670675, "iter_time": 0.6285632705688476, "loss": 0.11955395340919495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.13061626860914, "step_time": 0.5872681922912597} +{"epoch": 0, "iter": 8938, "iter_tflops": 15.21991748250409, "iter_time": 1.3555325469970703, "loss": 0.12474291026592255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.900460273718352, "step_time": 1.0915656661987303} +{"epoch": 0, "iter": 8939, "iter_tflops": 21.028520220920846, "iter_time": 0.9811005859374999, "loss": 0.14030852913856506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.946097502836537, "step_time": 0.795152084350586} +{"epoch": 0, "iter": 8940, "iter_tflops": 48.394600889024936, "iter_time": 0.4263098182678222, "loss": 0.08883114904165268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.85095920667891, "step_time": 0.3903636531829834} +{"epoch": 0, "iter": 8941, "iter_tflops": 20.586131092447104, "iter_time": 0.7063162078857421, "loss": 0.3131049871444702, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 21.91898072610051, "step_time": 0.6633665237426758} +{"epoch": 0, "iter": 8942, "iter_tflops": 8.941723718076695, "iter_time": 1.6261202545166016, "loss": 0.1536441445350647, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.22784624859179, "step_time": 1.295022903442383} +{"epoch": 0, "iter": 8943, "iter_tflops": 21.696318470917802, "iter_time": 0.6701744384765624, "loss": 0.14599467813968658, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.330603662865926, "step_time": 0.6232293968200684} +{"epoch": 0, "iter": 8944, "iter_tflops": 23.574681117831975, "iter_time": 0.6167768707275391, "loss": 0.29462939500808716, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 25.23086003039389, "step_time": 0.5762910194396973} +{"epoch": 0, "iter": 8945, "iter_tflops": 36.624982305667324, "iter_time": 0.5633065795898438, "loss": 0.08771077543497086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.818824645281445, "step_time": 0.5054308567047119} +{"epoch": 0, "iter": 8946, "iter_tflops": 44.97816622072471, "iter_time": 0.45869129943847653, "loss": 0.07693026959896088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.90659815758813, "step_time": 0.42184683227539066} +{"epoch": 0, "iter": 8947, "iter_tflops": 49.20387629848071, "iter_time": 0.4192981338500976, "loss": 0.06766142696142197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.61678893691598, "step_time": 0.384787935256958} +{"epoch": 0, "iter": 8948, "iter_tflops": 52.33851085916522, "iter_time": 0.39418571853637696, "loss": 0.07014678418636322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.157090681654914, "step_time": 0.36095422744750977} +{"epoch": 0, "iter": 8949, "iter_tflops": 32.17905010878469, "iter_time": 0.6411343231201172, "loss": 0.4489540159702301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.368865828041784, "step_time": 0.6002843856811523} +{"epoch": 0, "iter": 8950, "iter_tflops": 14.57366982221778, "iter_time": 1.415641616821289, "loss": 0.30693772435188293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.008994875560155, "step_time": 1.1455993881225586} +{"epoch": 0, "iter": 8951, "iter_tflops": 40.163541699653656, "iter_time": 0.513677146911621, "loss": 0.4387681782245636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93935215252378, "step_time": 0.4131229705810546} +{"epoch": 0, "iter": 8952, "iter_tflops": 49.695193535828544, "iter_time": 0.41515269470214844, "loss": 0.3664839565753937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.986201327029235, "step_time": 0.3821549396514893} +{"epoch": 0, "iter": 8953, "iter_tflops": 27.325990051615257, "iter_time": 0.7549989395141601, "loss": 0.13842107355594635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.810665770423384, "step_time": 0.7160922164916992} +{"epoch": 0, "iter": 8954, "iter_tflops": 13.424345161050793, "iter_time": 1.5368417053222658, "loss": 0.08702001720666885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.962680434015557, "step_time": 1.2162637615203857} +{"epoch": 0, "iter": 8955, "iter_tflops": 40.206859416440246, "iter_time": 0.5131237258911132, "loss": 0.18504710495471954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.16201357952193, "step_time": 0.46716831588745117} +{"epoch": 0, "iter": 8956, "iter_tflops": 48.25524499737388, "iter_time": 0.4275409545898437, "loss": 0.10728604346513748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.85779735546096, "step_time": 0.39031315231323244} +{"epoch": 0, "iter": 8957, "iter_tflops": 10.78795430775785, "iter_time": 0.9306735610961914, "loss": 0.002147523919120431, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 11.360391539447113, "step_time": 0.883777976989746} +{"epoch": 0, "iter": 8958, "iter_tflops": 11.246913093175213, "iter_time": 0.8926950683593751, "loss": 0.003290389198809862, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 13.683302844660131, "step_time": 0.7337456436157226} +{"epoch": 0, "iter": 8959, "iter_tflops": 27.355976870653304, "iter_time": 0.36701536560058595, "loss": 0.0126516567543149, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 30.07963126139339, "step_time": 0.3337828102111816} +{"epoch": 0, "iter": 8960, "iter_tflops": 29.479044053028957, "iter_time": 0.34058308792114256, "loss": 0.003439185908064246, "lr": 3e-05, "seqlen": 4048.0, "step_tflops": 32.20055455608582, "step_time": 0.31179785537719723} +{"epoch": 0, "iter": 8961, "iter_tflops": 43.045460287856564, "iter_time": 0.47928616333007806, "loss": 0.6616972088813782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.903182934168214, "step_time": 0.4398655319213867} +{"epoch": 0, "iter": 8962, "iter_tflops": 44.81104340081138, "iter_time": 0.46040198898315426, "loss": 0.6069394946098328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38439230506466, "step_time": 0.4177654628753662} +{"epoch": 0, "iter": 8963, "iter_tflops": 46.22612645402202, "iter_time": 0.44630807495117186, "loss": 0.5936383008956909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.946296356589855, "step_time": 0.41306553268432616} +{"epoch": 0, "iter": 8964, "iter_tflops": 46.448548481496346, "iter_time": 0.4441708984375, "loss": 0.4725596308708191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.919717569956156, "step_time": 0.41328546142578126} +{"epoch": 0, "iter": 8965, "iter_tflops": 30.542511931285492, "iter_time": 0.675487777709961, "loss": 0.269921213388443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.353193062733844, "step_time": 0.6376833801269531} +{"epoch": 0, "iter": 8966, "iter_tflops": 11.034542786563216, "iter_time": 1.8696826782226563, "loss": 0.3092822730541229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.68061829205124, "step_time": 1.5080527114868163} +{"epoch": 0, "iter": 8967, "iter_tflops": 37.05497654089806, "iter_time": 0.5567698440551757, "loss": 0.2989334762096405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.72610993003891, "step_time": 0.5065814914703369} +{"epoch": 0, "iter": 8968, "iter_tflops": 40.04408572250893, "iter_time": 0.5152095031738281, "loss": 0.31576141715049744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87408221147501, "step_time": 0.4702341899871826} +{"epoch": 0, "iter": 8969, "iter_tflops": 24.495745079349014, "iter_time": 0.8422317199707031, "loss": 0.7692387104034424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.64471767926733, "step_time": 0.7743033256530762} +{"epoch": 0, "iter": 8970, "iter_tflops": 19.04087642321562, "iter_time": 1.0835159606933593, "loss": 0.5489403605461121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.15090123399023, "step_time": 0.8911572513580323} +{"epoch": 0, "iter": 8971, "iter_tflops": 42.77821575954428, "iter_time": 0.4822803649902343, "loss": 0.5660433769226074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.10226302986727, "step_time": 0.4475071754455566} +{"epoch": 0, "iter": 8972, "iter_tflops": 41.5239409908727, "iter_time": 0.4968481559753418, "loss": 0.5825707316398621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.341714563497064, "step_time": 0.46527505111694334} +{"epoch": 0, "iter": 8973, "iter_tflops": 14.922334268316282, "iter_time": 0.6431472930908203, "loss": 0.003244870575144887, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 15.8660962869989, "step_time": 0.6048910026550294} +{"epoch": 0, "iter": 8974, "iter_tflops": 6.10397703291587, "iter_time": 1.5722960357666014, "loss": 0.0039466735906898975, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 8.301629965409798, "step_time": 1.156069221496582} +{"epoch": 0, "iter": 8975, "iter_tflops": 20.61377472548816, "iter_time": 0.46557503509521486, "loss": 0.0014011532766744494, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 22.86277494875872, "step_time": 0.4197766418457031} +{"epoch": 0, "iter": 8976, "iter_tflops": 21.33635260864903, "iter_time": 0.4498078498840332, "loss": 0.0018434131052345037, "lr": 3e-05, "seqlen": 3872.0, "step_tflops": 23.66796940011382, "step_time": 0.40549566078186033} +{"epoch": 0, "iter": 8977, "iter_tflops": 18.725023752597757, "iter_time": 1.1017926483154297, "loss": 0.5543374419212341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.86543696270118, "step_time": 1.0385421447753906} +{"epoch": 0, "iter": 8978, "iter_tflops": 25.367496974058433, "iter_time": 0.8132884979248046, "loss": 0.4979315996170044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.996164762468858, "step_time": 0.644798952102661} +{"epoch": 0, "iter": 8979, "iter_tflops": 50.78732045297219, "iter_time": 0.40622528076171877, "loss": 0.4789116680622101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.18126409646604, "step_time": 0.3738785953521728} +{"epoch": 0, "iter": 8980, "iter_tflops": 49.40645733300438, "iter_time": 0.41757888793945314, "loss": 0.5969708561897278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.29161870343465, "step_time": 0.3871358013153076} +{"epoch": 0, "iter": 8981, "iter_tflops": 22.112922683453366, "iter_time": 0.9329880905151366, "loss": 0.44276005029678345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.378860143429844, "step_time": 0.8824678955078125} +{"epoch": 0, "iter": 8982, "iter_tflops": 18.662008693043, "iter_time": 1.1055130157470703, "loss": 0.5285695791244507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87534735237754, "step_time": 0.9431207275390625} +{"epoch": 0, "iter": 8983, "iter_tflops": 37.872000912930496, "iter_time": 0.5447584762573242, "loss": 0.3012310564517975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29319393318088, "step_time": 0.49962455177307136} +{"epoch": 0, "iter": 8984, "iter_tflops": 40.916653386984706, "iter_time": 0.5042224082946777, "loss": 0.44815486669540405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.638679714888575, "step_time": 0.46217974281311036} +{"epoch": 0, "iter": 8985, "iter_tflops": 25.701220679900867, "iter_time": 0.802728157043457, "loss": 0.22170886397361755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.622699706927435, "step_time": 0.7468891067504884} +{"epoch": 0, "iter": 8986, "iter_tflops": 47.275036236297105, "iter_time": 0.4364056625366211, "loss": 0.17477364838123322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48941238612799, "step_time": 0.40068613243103024} +{"epoch": 0, "iter": 8987, "iter_tflops": 49.26275416124873, "iter_time": 0.4187969970703125, "loss": 0.16297386586666107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.56807919214401, "step_time": 0.385137825012207} +{"epoch": 0, "iter": 8988, "iter_tflops": 49.76787773533291, "iter_time": 0.41454637908935543, "loss": 0.18250147998332977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.26588347360511, "step_time": 0.3801853427886963} +{"epoch": 0, "iter": 8989, "iter_tflops": 37.46858952425853, "iter_time": 0.5506237030029296, "loss": 0.14667315781116486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.005523328538345, "step_time": 0.503129623413086} +{"epoch": 0, "iter": 8990, "iter_tflops": 35.08937733338738, "iter_time": 0.587958381652832, "loss": 0.09729038178920746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.15142809591424, "step_time": 0.5269563465118409} +{"epoch": 0, "iter": 8991, "iter_tflops": 44.143180198270194, "iter_time": 0.46736763000488274, "loss": 0.1888929307460785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62736853501426, "step_time": 0.4242691745758057} +{"epoch": 0, "iter": 8992, "iter_tflops": 41.13120556978748, "iter_time": 0.5015922393798827, "loss": 0.12765412032604218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.980501785881785, "step_time": 0.45866748237609867} +{"epoch": 0, "iter": 8993, "iter_tflops": 20.54234634859415, "iter_time": 1.0043202056884768, "loss": 0.22078807651996613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.872244956980897, "step_time": 0.9432545013427736} +{"epoch": 0, "iter": 8994, "iter_tflops": 16.60919849769277, "iter_time": 1.2421486511230468, "loss": 0.24219748377799988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.692183358306025, "step_time": 0.870797477722168} +{"epoch": 0, "iter": 8995, "iter_tflops": 36.94276413314008, "iter_time": 0.5584610137939453, "loss": 0.4228784441947937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45329246296983, "step_time": 0.5099978828430175} +{"epoch": 0, "iter": 8996, "iter_tflops": 38.91559101465823, "iter_time": 0.5301498184204102, "loss": 0.25470560789108276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53053551093036, "step_time": 0.48508896636962895} +{"epoch": 0, "iter": 8997, "iter_tflops": 23.23919572352387, "iter_time": 0.8877714080810546, "loss": 0.08950944244861603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.916478613684216, "step_time": 0.8280100021362304} +{"epoch": 0, "iter": 8998, "iter_tflops": 9.507326113023504, "iter_time": 2.1700205993652344, "loss": 0.10341309756040573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.642753238972976, "step_time": 1.9385109329223635} +{"epoch": 0, "iter": 8999, "iter_tflops": 14.62659661818744, "iter_time": 1.4105190734863282, "loss": 0.07963139563798904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.981646025171234, "step_time": 1.214905403137207} +{"epoch": 0, "iter": 9000, "iter_tflops": 4.102055631461777, "iter_time": 5.029452392578126, "loss": 0.2044282853603363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.208152332028627, "step_time": 4.90264892578125} +{"epoch": 0, "iter": 9001, "iter_tflops": 19.818626067971287, "iter_time": 0.7563317108154296, "loss": 0.2046704739332199, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 21.212025046367216, "step_time": 0.7066489562988282} +{"epoch": 0, "iter": 9002, "iter_tflops": 14.491981327016813, "iter_time": 1.0343275375366212, "loss": 0.4235852360725403, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 15.961886196560188, "step_time": 0.9390779495239258} +{"epoch": 0, "iter": 9003, "iter_tflops": 14.674609240743772, "iter_time": 1.0214551620483399, "loss": 0.2944351136684418, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 17.250731560574476, "step_time": 0.8689170837402344} +{"epoch": 0, "iter": 9004, "iter_tflops": 17.671072575586813, "iter_time": 0.8482481918334961, "loss": 0.2024163156747818, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 20.509571299558896, "step_time": 0.7308517150878906} +{"epoch": 0, "iter": 9005, "iter_tflops": 43.2795706688322, "iter_time": 0.4766935806274414, "loss": 0.5588089227676392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.735432562999506, "step_time": 0.42332841682434086} +{"epoch": 0, "iter": 9006, "iter_tflops": 29.57836047867268, "iter_time": 0.6975063247680665, "loss": 0.610043466091156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.308498655322687, "step_time": 0.6589614448547364} +{"epoch": 0, "iter": 9007, "iter_tflops": 24.45992474764674, "iter_time": 0.8434651260375976, "loss": 0.532512903213501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.431993809432985, "step_time": 0.752081443786621} +{"epoch": 0, "iter": 9008, "iter_tflops": 28.679041419434167, "iter_time": 0.7193787689208985, "loss": 0.6023833751678467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.34563525036461, "step_time": 0.6187044677734375} +{"epoch": 0, "iter": 9009, "iter_tflops": 18.963274980282545, "iter_time": 1.0879499206542969, "loss": 0.7642704248428345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.380642169290507, "step_time": 0.9649426498413087} +{"epoch": 0, "iter": 9010, "iter_tflops": 20.773466052230123, "iter_time": 0.9931464233398437, "loss": 0.5592755079269409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.490053979752535, "step_time": 0.8782905960083007} +{"epoch": 0, "iter": 9011, "iter_tflops": 20.05177599044544, "iter_time": 1.028891082763672, "loss": 0.5709279179573059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.907814103032248, "step_time": 0.9006138000488281} +{"epoch": 0, "iter": 9012, "iter_tflops": 24.003785421250992, "iter_time": 0.8594933319091798, "loss": 0.7571988701820374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.470543543631283, "step_time": 0.7510260391235353} +{"epoch": 0, "iter": 9013, "iter_tflops": 5.917334852765557, "iter_time": 3.4865516357421873, "loss": 0.5282918810844421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.097859582421963, "step_time": 3.383333648681641} +{"epoch": 0, "iter": 9014, "iter_tflops": 11.148426555266761, "iter_time": 1.8505834350585937, "loss": 0.4778222143650055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.94267320933503, "step_time": 1.1498338775634767} +{"epoch": 0, "iter": 9015, "iter_tflops": 43.64541720212373, "iter_time": 0.47269781875610356, "loss": 0.4934879541397095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.083891110612065, "step_time": 0.42032310485839847} +{"epoch": 0, "iter": 9016, "iter_tflops": 30.78955929656903, "iter_time": 0.6700678405761719, "loss": 0.4878195524215698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.012586228635826, "step_time": 0.589247917175293} +{"epoch": 0, "iter": 9017, "iter_tflops": 5.819578425982951, "iter_time": 2.5686742248535155, "loss": 0.26442164182662964, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 6.18991938593085, "step_time": 2.4149912414550783} +{"epoch": 0, "iter": 9018, "iter_tflops": 17.94467380742741, "iter_time": 0.833038330078125, "loss": 0.27590328454971313, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 20.509815773869537, "step_time": 0.7288510665893555} +{"epoch": 0, "iter": 9019, "iter_tflops": 21.4601332613497, "iter_time": 0.6965754089355469, "loss": 0.3502155542373657, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.6731156318261, "step_time": 0.6314589653015137} +{"epoch": 0, "iter": 9020, "iter_tflops": 20.23111456087932, "iter_time": 0.7388916244506836, "loss": 0.31853264570236206, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.766164560030425, "step_time": 0.6289866867065429} +{"epoch": 0, "iter": 9021, "iter_tflops": 6.970416538309702, "iter_time": 2.4620455932617187, "loss": 0.0027070140931755304, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 7.270061013761183, "step_time": 2.3605693664550786} +{"epoch": 0, "iter": 9022, "iter_tflops": 24.398985372271035, "iter_time": 0.7033687286376954, "loss": 0.0074406699277460575, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 28.258814408479385, "step_time": 0.6072966499328614} +{"epoch": 0, "iter": 9023, "iter_tflops": 41.61483913829297, "iter_time": 0.4123885536193848, "loss": 0.014405768364667892, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 46.27304587489738, "step_time": 0.3708742961883545} +{"epoch": 0, "iter": 9024, "iter_tflops": 28.40838981899581, "iter_time": 0.60409912109375, "loss": 0.006998573895543814, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 30.102088501917, "step_time": 0.570109390258789} +{"epoch": 0, "iter": 9025, "iter_tflops": 15.196952230923266, "iter_time": 1.3575809936523437, "loss": 0.667782723903656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.51053831375346, "step_time": 1.2495712203979492} +{"epoch": 0, "iter": 9026, "iter_tflops": 27.570373695010616, "iter_time": 0.7483066329956056, "loss": 0.7854984998703003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.03414033103014, "step_time": 0.6440345611572266} +{"epoch": 0, "iter": 9027, "iter_tflops": 31.61038873097787, "iter_time": 0.6526681365966797, "loss": 0.713623046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.56426287636596, "step_time": 0.549221305847168} +{"epoch": 0, "iter": 9028, "iter_tflops": 34.819374837578835, "iter_time": 0.5925176315307616, "loss": 0.7463238835334778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.92214257135569, "step_time": 0.5587729225158692} +{"epoch": 0, "iter": 9029, "iter_tflops": 12.229509497287642, "iter_time": 1.6869927215576173, "loss": 0.5289089679718018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.727313415222492, "step_time": 1.6210093078613281} +{"epoch": 0, "iter": 9030, "iter_tflops": 25.733094784810113, "iter_time": 0.8017338638305663, "loss": 0.43810775876045227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.120540727247942, "step_time": 0.7084721984863283} +{"epoch": 0, "iter": 9031, "iter_tflops": 30.241635483524192, "iter_time": 0.6822082595825196, "loss": 0.5582922697067261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.1926408250357, "step_time": 0.5862331733703613} +{"epoch": 0, "iter": 9032, "iter_tflops": 35.64172150328005, "iter_time": 0.5788467178344727, "loss": 0.3076527416706085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.99978894156923, "step_time": 0.542926528930664} +{"epoch": 0, "iter": 9033, "iter_tflops": 24.39104693797392, "iter_time": 0.8458469848632812, "loss": 0.3318282961845398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.75779224707371, "step_time": 0.8009651336669922} +{"epoch": 0, "iter": 9034, "iter_tflops": 30.323380872030366, "iter_time": 0.6803691711425781, "loss": 0.37127068638801575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.31484734590801, "step_time": 0.5681173133850097} +{"epoch": 0, "iter": 9035, "iter_tflops": 34.18627451008685, "iter_time": 0.6034905471801758, "loss": 0.2781575322151184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.80961657002189, "step_time": 0.5315974578857421} +{"epoch": 0, "iter": 9036, "iter_tflops": 38.93186010653596, "iter_time": 0.5299282760620118, "loss": 0.31231921911239624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10476415415723, "step_time": 0.48999427795410155} +{"epoch": 0, "iter": 9037, "iter_tflops": 10.236930716716387, "iter_time": 2.0153592987060547, "loss": 0.24511125683784485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.456959193560829, "step_time": 1.9729534301757812} +{"epoch": 0, "iter": 9038, "iter_tflops": 22.381000286107376, "iter_time": 0.9218128433227539, "loss": 0.2420952469110489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.35418038863786, "step_time": 0.7828395042419434} +{"epoch": 0, "iter": 9039, "iter_tflops": 27.385131719934634, "iter_time": 0.753368423461914, "loss": 0.22253850102424622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.731804327078418, "step_time": 0.6713271141052246} +{"epoch": 0, "iter": 9040, "iter_tflops": 29.087192882428724, "iter_time": 0.7092844467163086, "loss": 0.3091256320476532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.91080103617747, "step_time": 0.6083929862976074} +{"epoch": 0, "iter": 9041, "iter_tflops": 21.095294440705835, "iter_time": 0.9779950485229494, "loss": 0.23612761497497559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.388298579401724, "step_time": 0.9215123443603515} +{"epoch": 0, "iter": 9042, "iter_tflops": 4.893483527296202, "iter_time": 4.2160341186523445, "loss": 0.15025484561920166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.619901557660525, "step_time": 3.1165257263183594} +{"epoch": 0, "iter": 9043, "iter_tflops": 26.458372391337964, "iter_time": 0.7797567138671875, "loss": 0.23574510216712952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.993782633387116, "step_time": 0.6656526489257812} +{"epoch": 0, "iter": 9044, "iter_tflops": 33.45301637588142, "iter_time": 0.6167184829711914, "loss": 0.18748438358306885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.52298683760947, "step_time": 0.5220023880004883} +{"epoch": 0, "iter": 9045, "iter_tflops": 18.340043886890218, "iter_time": 0.7928180618286133, "loss": 0.2681449055671692, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 19.426986762854852, "step_time": 0.7484597702026367} +{"epoch": 0, "iter": 9046, "iter_tflops": 14.735941630811228, "iter_time": 0.9867247314453125, "loss": 0.35303741693496704, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 15.72005924983655, "step_time": 0.9249531326293946} +{"epoch": 0, "iter": 9047, "iter_tflops": 13.37660842897459, "iter_time": 1.0869958648681641, "loss": 0.17816536128520966, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 15.105066872056552, "step_time": 0.9626119613647461} +{"epoch": 0, "iter": 9048, "iter_tflops": 13.674482084214485, "iter_time": 1.0633176422119142, "loss": 0.3677064776420593, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 15.528593255700208, "step_time": 0.9363577117919921} +{"epoch": 0, "iter": 9049, "iter_tflops": 9.206573180225007, "iter_time": 2.2409090881347655, "loss": 0.31871581077575684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.918614578505718, "step_time": 2.0800378265380863} +{"epoch": 0, "iter": 9050, "iter_tflops": 25.47786532176552, "iter_time": 0.8097653884887697, "loss": 0.2553968131542206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.065409925454887, "step_time": 0.7098160171508788} +{"epoch": 0, "iter": 9051, "iter_tflops": 25.224691458156688, "iter_time": 0.8178927993774414, "loss": 0.2660946249961853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.817327001313267, "step_time": 0.7159266891479493} +{"epoch": 0, "iter": 9052, "iter_tflops": 26.891976600834408, "iter_time": 0.7671839752197265, "loss": 0.2835741937160492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.503117060913276, "step_time": 0.6763601722717286} +{"epoch": 0, "iter": 9053, "iter_tflops": 15.70750969237971, "iter_time": 1.313454132080078, "loss": 0.3823988139629364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.694414492538755, "step_time": 1.2358081512451171} +{"epoch": 0, "iter": 9054, "iter_tflops": 16.368573861413044, "iter_time": 1.2604087371826171, "loss": 0.30300840735435486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.942031396590775, "step_time": 0.9851524486541748} +{"epoch": 0, "iter": 9055, "iter_tflops": 39.56579568198328, "iter_time": 0.521437599182129, "loss": 0.28328680992126465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48684878530111, "step_time": 0.4744214420318603} +{"epoch": 0, "iter": 9056, "iter_tflops": 36.71244082637293, "iter_time": 0.5619646377563476, "loss": 0.22997735440731049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.2628074761516, "step_time": 0.5124107036590576} +{"epoch": 0, "iter": 9057, "iter_tflops": 28.236092471092466, "iter_time": 0.7306639022827149, "loss": 0.674261748790741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.953229445269578, "step_time": 0.6665247497558595} +{"epoch": 0, "iter": 9058, "iter_tflops": 37.38201890456453, "iter_time": 0.5518988571166992, "loss": 0.5087634921073914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.06647458932588, "step_time": 0.5023828735351562} +{"epoch": 0, "iter": 9059, "iter_tflops": 42.169336402546435, "iter_time": 0.48924396896362304, "loss": 0.5107295513153076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.092514009106935, "step_time": 0.44760182762145995} +{"epoch": 0, "iter": 9060, "iter_tflops": 37.60544378837699, "iter_time": 0.5486198654174804, "loss": 0.5399110317230225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.159981534371724, "step_time": 0.5012415637969971} +{"epoch": 0, "iter": 9061, "iter_tflops": 18.795966620611342, "iter_time": 1.0976340789794923, "loss": 0.6442335844039917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.908881781169594, "step_time": 1.0362758560180665} +{"epoch": 0, "iter": 9062, "iter_tflops": 14.018849957109687, "iter_time": 1.4716680450439452, "loss": 0.7476798295974731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.583523158629376, "step_time": 1.2440718002319335} +{"epoch": 0, "iter": 9063, "iter_tflops": 34.60514523693178, "iter_time": 0.5961857223510741, "loss": 0.9522560834884644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.77800401502685, "step_time": 0.5461139106750488} +{"epoch": 0, "iter": 9064, "iter_tflops": 38.344815119732466, "iter_time": 0.5380412826538086, "loss": 0.9269452691078186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.70821419576759, "step_time": 0.49465300559997566} +{"epoch": 0, "iter": 9065, "iter_tflops": 24.014103626601493, "iter_time": 0.8591240310668946, "loss": 0.8429526090621948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.791236385701346, "step_time": 0.7999265022277832} +{"epoch": 0, "iter": 9066, "iter_tflops": 43.10535533420575, "iter_time": 0.47862019348144536, "loss": 0.6624553799629211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5349777158523, "step_time": 0.4433459415435791} +{"epoch": 0, "iter": 9067, "iter_tflops": 42.84971006379677, "iter_time": 0.48147568511962896, "loss": 0.6638728380203247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15365466956427, "step_time": 0.44700888061523436} +{"epoch": 0, "iter": 9068, "iter_tflops": 45.96579189149302, "iter_time": 0.44883581161499025, "loss": 0.6782620549201965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.377173159694536, "step_time": 0.4178265419006348} +{"epoch": 0, "iter": 9069, "iter_tflops": 33.781609918078345, "iter_time": 0.6107196655273438, "loss": 0.025611823424696922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.20888408574204, "step_time": 0.5697798767089844} +{"epoch": 0, "iter": 9070, "iter_tflops": 50.7561383415064, "iter_time": 0.4064748458862305, "loss": 0.02243289165198803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.39321208296917, "step_time": 0.35331321525573733} +{"epoch": 0, "iter": 9071, "iter_tflops": 50.78938036773008, "iter_time": 0.40620880508422846, "loss": 0.02096080221235752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.30891727482563, "step_time": 0.37301568222045894} +{"epoch": 0, "iter": 9072, "iter_tflops": 53.56735496173116, "iter_time": 0.3851430320739746, "loss": 0.034515541046857834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.480889907569185, "step_time": 0.35278350830078126} +{"epoch": 0, "iter": 9073, "iter_tflops": 37.2967126195924, "iter_time": 0.5531611785888673, "loss": 0.0032463485840708017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54069004573221, "step_time": 0.5088984298706055} +{"epoch": 0, "iter": 9074, "iter_tflops": 53.438676611953454, "iter_time": 0.386070442199707, "loss": 0.0025257603265345097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.11621770360898, "step_time": 0.34899210929870605} +{"epoch": 0, "iter": 9075, "iter_tflops": 57.90606129235865, "iter_time": 0.35628556060791017, "loss": 0.0060544610023498535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.831715117351685, "step_time": 0.32321070289611814} +{"epoch": 0, "iter": 9076, "iter_tflops": 54.00237811321782, "iter_time": 0.38204046249389645, "loss": 0.0019613876938819885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.0493205359142, "step_time": 0.3493874835968017} +{"epoch": 0, "iter": 9077, "iter_tflops": 43.78857555518774, "iter_time": 0.47115242385864253, "loss": 0.6061505675315857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.853725136575, "step_time": 0.4311282653808594} +{"epoch": 0, "iter": 9078, "iter_tflops": 46.2809074513544, "iter_time": 0.4457797966003418, "loss": 0.43880021572113037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.64072727468092, "step_time": 0.4074012088775635} +{"epoch": 0, "iter": 9079, "iter_tflops": 47.71810448627435, "iter_time": 0.4323535842895508, "loss": 0.43063926696777344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71895748047667, "step_time": 0.39890776062011724} +{"epoch": 0, "iter": 9080, "iter_tflops": 44.87429154449849, "iter_time": 0.45975307464599613, "loss": 0.5143788456916809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.55144607084361, "step_time": 0.42493262672424315} +{"epoch": 0, "iter": 9081, "iter_tflops": 27.622872109598955, "iter_time": 0.7468844451904296, "loss": 0.38873010873794556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.23130148771292, "step_time": 0.7057877159118652} +{"epoch": 0, "iter": 9082, "iter_tflops": 14.021355846529065, "iter_time": 1.471405029296875, "loss": 0.5192495584487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.620285465794336, "step_time": 1.2413200454711915} +{"epoch": 0, "iter": 9083, "iter_tflops": 35.51607787133424, "iter_time": 0.5808944778442383, "loss": 0.4942087233066559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.87317586888096, "step_time": 0.5307282733917236} +{"epoch": 0, "iter": 9084, "iter_tflops": 40.68288731234627, "iter_time": 0.5071196975708008, "loss": 0.5420249104499817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42487881591671, "step_time": 0.46440404701232907} +{"epoch": 0, "iter": 9085, "iter_tflops": 22.157922855979432, "iter_time": 0.9310932998657226, "loss": 0.19209308922290802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.103119005344716, "step_time": 0.8559511947631836} +{"epoch": 0, "iter": 9086, "iter_tflops": 17.851673528275604, "iter_time": 1.1556952056884766, "loss": 0.11677089333534241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.783951584976528, "step_time": 0.9470776424407958} +{"epoch": 0, "iter": 9087, "iter_tflops": 38.98016753225615, "iter_time": 0.5292715454101563, "loss": 0.1390482634305954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.848597397254885, "step_time": 0.48148818778991703} +{"epoch": 0, "iter": 9088, "iter_tflops": 44.61866150751201, "iter_time": 0.4623871002197265, "loss": 0.09149112552404404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.63156642167966, "step_time": 0.4242325515747071} +{"epoch": 0, "iter": 9089, "iter_tflops": 36.17684274727031, "iter_time": 0.5702845230102539, "loss": 0.054740045219659805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.682112813523794, "step_time": 0.5199091491699218} +{"epoch": 0, "iter": 9090, "iter_tflops": 48.607352350244085, "iter_time": 0.4244438858032227, "loss": 0.09200599044561386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15529389784525, "step_time": 0.3881286697387695} +{"epoch": 0, "iter": 9091, "iter_tflops": 54.30040634785496, "iter_time": 0.3799436302185058, "loss": 0.09158061444759369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.609287326193304, "step_time": 0.34610535430908207} +{"epoch": 0, "iter": 9092, "iter_tflops": 55.043872084766406, "iter_time": 0.3748118133544922, "loss": 0.0688987672328949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.809129029447874, "step_time": 0.3449489040374756} +{"epoch": 0, "iter": 9093, "iter_tflops": 47.970006182101635, "iter_time": 0.430083194732666, "loss": 0.07458774000406265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.06555551274727, "step_time": 0.3887850284576416} +{"epoch": 0, "iter": 9094, "iter_tflops": 50.376894274988814, "iter_time": 0.40953484344482427, "loss": 0.05373397842049599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.830304105949516, "step_time": 0.3762717323303223} +{"epoch": 0, "iter": 9095, "iter_tflops": 51.59673338119989, "iter_time": 0.39985270690917973, "loss": 0.0643414780497551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.10668725620301, "step_time": 0.3677118453979492} +{"epoch": 0, "iter": 9096, "iter_tflops": 53.764135242139645, "iter_time": 0.3837333831787109, "loss": 0.07815505564212799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.315946347120835, "step_time": 0.3537813377380371} +{"epoch": 0, "iter": 9097, "iter_tflops": 26.199108095609265, "iter_time": 0.6817399673461914, "loss": 0.03743390366435051, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 27.82553758583317, "step_time": 0.6418916091918945} +{"epoch": 0, "iter": 9098, "iter_tflops": 10.02764764048037, "iter_time": 1.781173385620117, "loss": 0.024298924952745438, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 12.048495360653614, "step_time": 1.4824240341186523} +{"epoch": 0, "iter": 9099, "iter_tflops": 11.755179012846677, "iter_time": 1.5194136199951171, "loss": 0.03119080141186714, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 13.22138221230775, "step_time": 1.3509161758422852} +{"epoch": 0, "iter": 9100, "iter_tflops": 17.40991898174961, "iter_time": 1.0259082260131838, "loss": 0.07109400629997253, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 24.041311750958094, "step_time": 0.7429286422729493} +{"epoch": 0, "iter": 9101, "iter_tflops": 18.07571389463176, "iter_time": 0.8089272155761719, "loss": 0.20917391777038574, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 19.020090124818093, "step_time": 0.7687627563476562} +{"epoch": 0, "iter": 9102, "iter_tflops": 8.582217395013277, "iter_time": 1.7037481384277346, "loss": 0.2921985983848572, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.253339673507108, "step_time": 1.2993420028686524} +{"epoch": 0, "iter": 9103, "iter_tflops": 21.873713400174843, "iter_time": 0.6684707183837891, "loss": 0.15260353684425354, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.512139975095995, "step_time": 0.6218888168334961} +{"epoch": 0, "iter": 9104, "iter_tflops": 21.838764515188522, "iter_time": 0.6695404815673828, "loss": 0.23646360635757446, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.36138161323875, "step_time": 0.6259020614624022} +{"epoch": 0, "iter": 9105, "iter_tflops": 16.709118076541383, "iter_time": 1.2347206726074218, "loss": 0.34514790773391724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.638779161808976, "step_time": 1.169644073486328} +{"epoch": 0, "iter": 9106, "iter_tflops": 20.582800592899023, "iter_time": 1.0023462753295898, "loss": 0.2548697292804718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.28665609717102, "step_time": 0.8158885631561281} +{"epoch": 0, "iter": 9107, "iter_tflops": 50.128107681491606, "iter_time": 0.41156737136840815, "loss": 0.30752095580101013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.009484532314865, "step_time": 0.3750461158752441} +{"epoch": 0, "iter": 9108, "iter_tflops": 50.6364745285316, "iter_time": 0.4074354248046875, "loss": 0.30047735571861267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.64168832426961, "step_time": 0.377570571899414} +{"epoch": 0, "iter": 9109, "iter_tflops": 38.70880374508596, "iter_time": 0.5329819450378418, "loss": 0.11766884475946426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53330948832975, "step_time": 0.4967360839843751} +{"epoch": 0, "iter": 9110, "iter_tflops": 10.191856271727028, "iter_time": 2.024272415161133, "loss": 0.11125552654266357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.351205726728768, "step_time": 1.54526070022583} +{"epoch": 0, "iter": 9111, "iter_tflops": 12.060136197872154, "iter_time": 1.7106849517822265, "loss": 0.1929941028356552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.95061390499708, "step_time": 1.3799495887756348} +{"epoch": 0, "iter": 9112, "iter_tflops": 19.379820284799813, "iter_time": 1.0645657806396485, "loss": 0.15564145147800446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.74970436854518, "step_time": 0.86868843460083} +{"epoch": 0, "iter": 9113, "iter_tflops": 15.547878718392788, "iter_time": 0.9456965103149415, "loss": 0.19831502437591553, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 16.700513672295294, "step_time": 0.8804264907836914} +{"epoch": 0, "iter": 9114, "iter_tflops": 25.979906269864188, "iter_time": 0.5659594955444336, "loss": 0.4313749074935913, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.754972271205478, "step_time": 0.5297636222839356} +{"epoch": 0, "iter": 9115, "iter_tflops": 26.417440082250312, "iter_time": 0.5565858993530274, "loss": 0.20750008523464203, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.19249606032889, "step_time": 0.5215421371459961} +{"epoch": 0, "iter": 9116, "iter_tflops": 25.614119723087917, "iter_time": 0.5740417709350587, "loss": 0.2947740852832794, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.241442940961484, "step_time": 0.5397502136230469} +{"epoch": 0, "iter": 9117, "iter_tflops": 39.34067864350722, "iter_time": 0.42161072158813473, "loss": 0.08350975811481476, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 43.387916264614454, "step_time": 0.3822827491760254} +{"epoch": 0, "iter": 9118, "iter_tflops": 36.719833179834374, "iter_time": 0.45170281219482417, "loss": 0.0717056542634964, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 40.32733267737703, "step_time": 0.4112955360412598} +{"epoch": 0, "iter": 9119, "iter_tflops": 36.42493259723631, "iter_time": 0.4553598518371582, "loss": 0.08261924237012863, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 39.67776747997536, "step_time": 0.4180288600921631} +{"epoch": 0, "iter": 9120, "iter_tflops": 41.889499451900896, "iter_time": 0.39595727157592775, "loss": 0.07121842354536057, "lr": 3e-05, "seqlen": 6624.0, "step_tflops": 45.536755755461265, "step_time": 0.36424316215515135} +{"epoch": 0, "iter": 9121, "iter_tflops": 42.82887655222555, "iter_time": 0.4817098922729492, "loss": 0.6470506191253662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.446809270184055, "step_time": 0.4441875305175781} +{"epoch": 0, "iter": 9122, "iter_tflops": 42.858004178055104, "iter_time": 0.4813825073242187, "loss": 0.7628620266914368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46945513424903, "step_time": 0.4439710655212402} +{"epoch": 0, "iter": 9123, "iter_tflops": 39.41827648192755, "iter_time": 0.523389030456543, "loss": 0.9124077558517456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.687436135873924, "step_time": 0.48330598831176763} +{"epoch": 0, "iter": 9124, "iter_tflops": 47.77479366173453, "iter_time": 0.4318405570983887, "loss": 0.724573016166687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75706901647699, "step_time": 0.3986140232086181} +{"epoch": 0, "iter": 9125, "iter_tflops": 36.868060058875486, "iter_time": 0.5595925979614258, "loss": 0.49040651321411133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84195354264124, "step_time": 0.5178233413696288} +{"epoch": 0, "iter": 9126, "iter_tflops": 14.608387292390995, "iter_time": 1.4122772827148438, "loss": 0.4917904734611511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.743842398694483, "step_time": 1.1006864585876466} +{"epoch": 0, "iter": 9127, "iter_tflops": 41.8655612623354, "iter_time": 0.4927939071655274, "loss": 0.6848919987678528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70868320350733, "step_time": 0.4513604869842529} +{"epoch": 0, "iter": 9128, "iter_tflops": 39.58504264703693, "iter_time": 0.521184066772461, "loss": 0.7584791779518127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41404361350382, "step_time": 0.47521704483032223} +{"epoch": 0, "iter": 9129, "iter_tflops": 19.548683309062984, "iter_time": 1.0553699798583984, "loss": 0.003533395938575268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.583287994615745, "step_time": 1.002322540283203} +{"epoch": 0, "iter": 9130, "iter_tflops": 24.503751255791993, "iter_time": 0.8419565353393555, "loss": 0.002337235026061535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.41497947645413, "step_time": 0.6783201522827148} +{"epoch": 0, "iter": 9131, "iter_tflops": 50.7083329043648, "iter_time": 0.4068580513000488, "loss": 0.0014706164365634322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.181254285329395, "step_time": 0.3672237968444824} +{"epoch": 0, "iter": 9132, "iter_tflops": 42.80640102655876, "iter_time": 0.4819628143310547, "loss": 0.009398950263857841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.39328172462716, "step_time": 0.43531683731079107} +{"epoch": 0, "iter": 9133, "iter_tflops": 33.57163018790413, "iter_time": 0.614539520263672, "loss": 0.7363156676292419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.94967668144907, "step_time": 0.5583565368652343} +{"epoch": 0, "iter": 9134, "iter_tflops": 34.3871077551602, "iter_time": 0.5999659423828124, "loss": 0.8333485126495361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.68848455843952, "step_time": 0.5474110660552979} +{"epoch": 0, "iter": 9135, "iter_tflops": 36.656125782798846, "iter_time": 0.5628279876708984, "loss": 0.6208261251449585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.50562789307646, "step_time": 0.5222317581176757} +{"epoch": 0, "iter": 9136, "iter_tflops": 39.9706972940361, "iter_time": 0.5161554565429688, "loss": 0.7245079874992371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.462578716790766, "step_time": 0.47468636512756346} +{"epoch": 0, "iter": 9137, "iter_tflops": 32.5492359378231, "iter_time": 0.6338426361083984, "loss": 0.24823319911956787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.82413216237992, "step_time": 0.5758993244171142} +{"epoch": 0, "iter": 9138, "iter_tflops": 37.164740369242956, "iter_time": 0.5551254577636718, "loss": 0.21618875861167908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.83473376149973, "step_time": 0.5052339420318605} +{"epoch": 0, "iter": 9139, "iter_tflops": 40.95145715386213, "iter_time": 0.5037938804626465, "loss": 0.2725139558315277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.63296160422834, "step_time": 0.4622389545440674} +{"epoch": 0, "iter": 9140, "iter_tflops": 40.366748432573374, "iter_time": 0.5110912895202637, "loss": 0.19294139742851257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.126521860562555, "step_time": 0.4675440673828125} +{"epoch": 0, "iter": 9141, "iter_tflops": 17.373806928719887, "iter_time": 1.1874826049804688, "loss": 0.1644410789012909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.468675041994274, "step_time": 1.1170857391357423} +{"epoch": 0, "iter": 9142, "iter_tflops": 15.554923013698513, "iter_time": 1.3263385162353516, "loss": 0.1942627876996994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.881923001894172, "step_time": 1.0376809883117675} +{"epoch": 0, "iter": 9143, "iter_tflops": 37.291493637348125, "iter_time": 0.5532385940551757, "loss": 0.3154796063899994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56673471101939, "step_time": 0.5085717067718506} +{"epoch": 0, "iter": 9144, "iter_tflops": 44.51927728909572, "iter_time": 0.4634193267822266, "loss": 0.2880004048347473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40638839227067, "step_time": 0.42620600700378414} +{"epoch": 0, "iter": 9145, "iter_tflops": 50.33073277119206, "iter_time": 0.4099104537963867, "loss": 0.013526893220841885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.01044849560695, "step_time": 0.36834365844726563} +{"epoch": 0, "iter": 9146, "iter_tflops": 51.011016432508036, "iter_time": 0.4044438819885254, "loss": 0.006925020832568407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.59288307696777, "step_time": 0.36455279159545895} +{"epoch": 0, "iter": 9147, "iter_tflops": 52.362879340148616, "iter_time": 0.39400227355957035, "loss": 0.004422532394528389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.208559469323916, "step_time": 0.360629487991333} +{"epoch": 0, "iter": 9148, "iter_tflops": 58.01209823574078, "iter_time": 0.35563432693481445, "loss": 0.004678802099078894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.396424200474065, "step_time": 0.3254299240112305} +{"epoch": 0, "iter": 9149, "iter_tflops": 39.07050663326829, "iter_time": 0.5280477600097656, "loss": 0.20188608765602112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07748843880852, "step_time": 0.49031190490722654} +{"epoch": 0, "iter": 9150, "iter_tflops": 10.576160200094197, "iter_time": 1.950716812133789, "loss": 0.1586979627609253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.117559408304926, "step_time": 1.4613781967163086} +{"epoch": 0, "iter": 9151, "iter_tflops": 12.05432211358563, "iter_time": 1.7115100555419922, "loss": 0.23412442207336426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.251312590930295, "step_time": 1.4476626892089843} +{"epoch": 0, "iter": 9152, "iter_tflops": 40.082824703934165, "iter_time": 0.5147115669250488, "loss": 0.1351012885570526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.81007320038465, "step_time": 0.4060433731079101} +{"epoch": 0, "iter": 9153, "iter_tflops": 19.534204512908694, "iter_time": 0.7840841064453126, "loss": 0.308215469121933, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 20.651244404776964, "step_time": 0.7416724624633789} +{"epoch": 0, "iter": 9154, "iter_tflops": 7.3587719463592105, "iter_time": 2.081387954711914, "loss": 0.17061136662960052, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 9.768694197167372, "step_time": 1.5679126586914063} +{"epoch": 0, "iter": 9155, "iter_tflops": 9.513372571193194, "iter_time": 1.6099925842285154, "loss": 0.27280038595199585, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 11.66727822219328, "step_time": 1.3127705535888672} +{"epoch": 0, "iter": 9156, "iter_tflops": 11.348629824143462, "iter_time": 1.3496307067871092, "loss": 0.26276952028274536, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 13.41183513992743, "step_time": 1.1420107040405272} +{"epoch": 0, "iter": 9157, "iter_tflops": 16.67393894278946, "iter_time": 0.8451363372802733, "loss": 0.36654412746429443, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 17.87148154205869, "step_time": 0.7885049514770508} +{"epoch": 0, "iter": 9158, "iter_tflops": 5.596141287890633, "iter_time": 2.518119354248047, "loss": 0.13208913803100586, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 6.910841241506266, "step_time": 2.039079063415527} +{"epoch": 0, "iter": 9159, "iter_tflops": 8.786449485085342, "iter_time": 1.60380500793457, "loss": 0.2827756404876709, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 10.10495557276454, "step_time": 1.3945387077331541} +{"epoch": 0, "iter": 9160, "iter_tflops": 21.257667863092575, "iter_time": 0.6629020538330078, "loss": 0.1625024378299713, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 22.86992639370457, "step_time": 0.6161695251464844} +{"epoch": 0, "iter": 9161, "iter_tflops": 12.380281367057181, "iter_time": 1.118497985839844, "loss": 0.32954177260398865, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 13.125832690864732, "step_time": 1.0549669570922853} +{"epoch": 0, "iter": 9162, "iter_tflops": 12.24603535774255, "iter_time": 1.1307594146728515, "loss": 0.20482465624809265, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 14.60219406073179, "step_time": 0.9483040504455567} +{"epoch": 0, "iter": 9163, "iter_tflops": 23.882445695745563, "iter_time": 0.5798116302490235, "loss": 0.2672114372253418, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 25.412855485498014, "step_time": 0.5448942871093749} +{"epoch": 0, "iter": 9164, "iter_tflops": 24.863294003359588, "iter_time": 0.556938262939453, "loss": 0.3994532525539398, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 26.515761944059626, "step_time": 0.5222297515869141} +{"epoch": 0, "iter": 9165, "iter_tflops": 32.222266068536975, "iter_time": 0.6402744445800781, "loss": 0.2847210168838501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.670270631887135, "step_time": 0.5950658340454101} +{"epoch": 0, "iter": 9166, "iter_tflops": 11.975197140129357, "iter_time": 1.7228186950683595, "loss": 0.23162613809108734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.021012942065699, "step_time": 1.3734821739196776} +{"epoch": 0, "iter": 9167, "iter_tflops": 39.87162398006506, "iter_time": 0.517438003540039, "loss": 0.2976672351360321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.693752938592496, "step_time": 0.4721749019622803} +{"epoch": 0, "iter": 9168, "iter_tflops": 41.99529572452248, "iter_time": 0.49127153778076177, "loss": 0.28702953457832336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63002866843904, "step_time": 0.4521385173797608} +{"epoch": 0, "iter": 9169, "iter_tflops": 16.46208444099348, "iter_time": 1.2532491607666016, "loss": 0.4697853922843933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.82013933174719, "step_time": 1.1577403030395508} +{"epoch": 0, "iter": 9170, "iter_tflops": 20.70066402104328, "iter_time": 0.9966392135620119, "loss": 0.2667016386985779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.106594053257265, "step_time": 0.8928660564422608} +{"epoch": 0, "iter": 9171, "iter_tflops": 36.40791910001776, "iter_time": 0.5666650009155274, "loss": 0.32362040877342224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47938792745242, "step_time": 0.5225788593292237} +{"epoch": 0, "iter": 9172, "iter_tflops": 42.24069072600302, "iter_time": 0.4884175224304199, "loss": 0.39388856291770935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.128958817212286, "step_time": 0.44724819374084474} +{"epoch": 0, "iter": 9173, "iter_tflops": 17.689444392253147, "iter_time": 1.1662940368652346, "loss": 0.21906976401805878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.95775874226575, "step_time": 1.0882664871215821} +{"epoch": 0, "iter": 9174, "iter_tflops": 18.65203995171554, "iter_time": 1.1061038665771485, "loss": 0.2855406701564789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70304259682174, "step_time": 0.9087369422912598} +{"epoch": 0, "iter": 9175, "iter_tflops": 49.49457570755671, "iter_time": 0.4168354454040527, "loss": 0.28685787320137024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.85114319649544, "step_time": 0.3831133804321289} +{"epoch": 0, "iter": 9176, "iter_tflops": 49.84788234439996, "iter_time": 0.41388104248046875, "loss": 0.3188174366950989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02769624176151, "step_time": 0.38186143302917475} +{"epoch": 0, "iter": 9177, "iter_tflops": 37.16536199338788, "iter_time": 0.5551161727905274, "loss": 0.0032172028440982103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.005221803607924, "step_time": 0.5157100143432617} +{"epoch": 0, "iter": 9178, "iter_tflops": 14.556507433463336, "iter_time": 1.4173106842041014, "loss": 0.00855551939457655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.83878758566854, "step_time": 1.2252125282287598} +{"epoch": 0, "iter": 9179, "iter_tflops": 51.834454656845814, "iter_time": 0.39801891708374026, "loss": 0.0029092924669384956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.153619375799806, "step_time": 0.36097615051269527} +{"epoch": 0, "iter": 9180, "iter_tflops": 62.86860268424102, "iter_time": 0.3281621131896973, "loss": 0.001760060084052384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.24410250077712, "step_time": 0.29794730186462404} +{"epoch": 0, "iter": 9181, "iter_tflops": 37.935019645604115, "iter_time": 0.5438535079956055, "loss": 0.2514437735080719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.79667594434739, "step_time": 0.5057052574157714} +{"epoch": 0, "iter": 9182, "iter_tflops": 15.432131088086958, "iter_time": 1.3368920593261717, "loss": 0.28022462129592896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61760369075084, "step_time": 1.108149784088135} +{"epoch": 0, "iter": 9183, "iter_tflops": 39.700987603383446, "iter_time": 0.5196619720458985, "loss": 0.22994619607925415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6571117794097, "step_time": 0.47257119560241706} +{"epoch": 0, "iter": 9184, "iter_tflops": 39.152388493788216, "iter_time": 0.5269434204101563, "loss": 0.2758004665374756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.85593459340869, "step_time": 0.4814057540893555} +{"epoch": 0, "iter": 9185, "iter_tflops": 19.91038818334503, "iter_time": 1.015364372253418, "loss": 0.15480676293373108, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 21.206896539219393, "step_time": 0.9532888870239258} +{"epoch": 0, "iter": 9186, "iter_tflops": 20.09136543056967, "iter_time": 1.00621826171875, "loss": 0.15734462440013885, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 22.881303738011518, "step_time": 0.8835291481018066} +{"epoch": 0, "iter": 9187, "iter_tflops": 48.0211014534165, "iter_time": 0.4209878196716309, "loss": 0.1551406979560852, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 52.348225420375066, "step_time": 0.3861888084411621} +{"epoch": 0, "iter": 9188, "iter_tflops": 50.677396898062334, "iter_time": 0.3989214134216309, "loss": 0.1743900030851364, "lr": 3e-05, "seqlen": 8032.0, "step_tflops": 55.352047187611326, "step_time": 0.3652312755584717} +{"epoch": 0, "iter": 9189, "iter_tflops": 35.65565327504849, "iter_time": 0.5786205444335938, "loss": 0.11120791733264923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.38172157001744, "step_time": 0.5375239219665527} +{"epoch": 0, "iter": 9190, "iter_tflops": 17.725452449784463, "iter_time": 1.1639247894287108, "loss": 0.16094253957271576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.594985443209364, "step_time": 0.9553650112152099} +{"epoch": 0, "iter": 9191, "iter_tflops": 40.83637576901087, "iter_time": 0.5052136268615722, "loss": 0.17370223999023438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03253921795027, "step_time": 0.4581374683380127} +{"epoch": 0, "iter": 9192, "iter_tflops": 43.66897094273596, "iter_time": 0.4724428596496582, "loss": 0.21615073084831238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77438767966597, "step_time": 0.43184422683715823} +{"epoch": 0, "iter": 9193, "iter_tflops": 26.472887837761064, "iter_time": 0.7793291625976563, "loss": 0.4246571958065033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.47000924296198, "step_time": 0.7246605834960937} +{"epoch": 0, "iter": 9194, "iter_tflops": 7.618764178019411, "iter_time": 2.7079317626953125, "loss": 0.4251309633255005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.13372736578534, "step_time": 2.2587814025878905} +{"epoch": 0, "iter": 9195, "iter_tflops": 14.981114092935226, "iter_time": 1.37714013671875, "loss": 0.5013753771781921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10606438168042, "step_time": 1.2060689735412597} +{"epoch": 0, "iter": 9196, "iter_tflops": 41.02382648209155, "iter_time": 0.5029051475524903, "loss": 0.5143560171127319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92709792986571, "step_time": 0.4592126903533935} +{"epoch": 0, "iter": 9197, "iter_tflops": 24.069682001968516, "iter_time": 0.6057880249023437, "loss": 0.12127183377742767, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 25.697621788387803, "step_time": 0.5674114608764649} +{"epoch": 0, "iter": 9198, "iter_tflops": 26.537315131367173, "iter_time": 0.5494574356079102, "loss": 0.1974908709526062, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.25704329985341, "step_time": 0.5160173683166505} +{"epoch": 0, "iter": 9199, "iter_tflops": 25.6397676746513, "iter_time": 0.5686917800903319, "loss": 0.36110079288482666, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.3471937559832, "step_time": 0.5331854248046874} +{"epoch": 0, "iter": 9200, "iter_tflops": 25.89993075194064, "iter_time": 0.5629793090820313, "loss": 0.1952262967824936, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.57368659190995, "step_time": 0.5288057899475097} +{"epoch": 0, "iter": 9201, "iter_tflops": 27.98737748710512, "iter_time": 0.7371570816040038, "loss": 0.6408820748329163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.59474442806123, "step_time": 0.6971201782226563} +{"epoch": 0, "iter": 9202, "iter_tflops": 14.880385060204109, "iter_time": 1.3864623413085937, "loss": 0.7698450088500977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.430722285779236, "step_time": 1.1193860549926757} +{"epoch": 0, "iter": 9203, "iter_tflops": 37.15129345356232, "iter_time": 0.5553263854980468, "loss": 0.685594916343689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2508056699479, "step_time": 0.45592765045166017} +{"epoch": 0, "iter": 9204, "iter_tflops": 45.62841738729779, "iter_time": 0.452154483795166, "loss": 0.8214914798736572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.392946510470196, "step_time": 0.4176931114196777} +{"epoch": 0, "iter": 9205, "iter_tflops": 23.74618615567588, "iter_time": 0.868817138671875, "loss": 0.06759660691022873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.98126596814864, "step_time": 0.8258626098632813} +{"epoch": 0, "iter": 9206, "iter_tflops": 15.21959813817152, "iter_time": 1.3555609893798828, "loss": 0.12816601991653442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.41249234558044, "step_time": 1.120494342803955} +{"epoch": 0, "iter": 9207, "iter_tflops": 39.36695433857914, "iter_time": 0.5240713653564453, "loss": 0.09657776355743408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26031403083774, "step_time": 0.47690577316284183} +{"epoch": 0, "iter": 9208, "iter_tflops": 47.2619897420165, "iter_time": 0.43652613067626955, "loss": 0.09903253614902496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.810505928670764, "step_time": 0.39820289611816406} +{"epoch": 0, "iter": 9209, "iter_tflops": 22.333030717285975, "iter_time": 0.923792823791504, "loss": 0.7773281335830688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.662375157224112, "step_time": 0.8718944473266601} +{"epoch": 0, "iter": 9210, "iter_tflops": 16.222906747286874, "iter_time": 1.2717260742187502, "loss": 0.8397470116615295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6696259732087, "step_time": 1.1050619621276856} +{"epoch": 0, "iter": 9211, "iter_tflops": 46.024227518064606, "iter_time": 0.44826593780517576, "loss": 0.6096839308738708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.76163411152684, "step_time": 0.4145983924865723} +{"epoch": 0, "iter": 9212, "iter_tflops": 49.80969839707182, "iter_time": 0.41419832229614256, "loss": 0.6191672086715698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.5748979989364, "step_time": 0.38508880615234375} +{"epoch": 0, "iter": 9213, "iter_tflops": 22.373755805378742, "iter_time": 0.9221113204956054, "loss": 0.09915521740913391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.439898768477192, "step_time": 0.880169906616211} +{"epoch": 0, "iter": 9214, "iter_tflops": 13.33283000528773, "iter_time": 1.5473904266357423, "loss": 0.0721893161535263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.394289477336283, "step_time": 1.1860842914581298} +{"epoch": 0, "iter": 9215, "iter_tflops": 44.50172607789306, "iter_time": 0.4636020965576172, "loss": 0.08689294010400772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96726889634471, "step_time": 0.421324161529541} +{"epoch": 0, "iter": 9216, "iter_tflops": 44.61179337029283, "iter_time": 0.4624582862854004, "loss": 0.10089463740587234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.92114913669904, "step_time": 0.4217213592529297} +{"epoch": 0, "iter": 9217, "iter_tflops": 39.9222278047963, "iter_time": 0.5167821197509765, "loss": 0.06204935163259506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20955047981891, "step_time": 0.46666598701477047} +{"epoch": 0, "iter": 9218, "iter_tflops": 36.87352722236478, "iter_time": 0.5595096282958985, "loss": 0.10802562534809113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.7421712010786, "step_time": 0.5063817882537842} +{"epoch": 0, "iter": 9219, "iter_tflops": 41.1099714169686, "iter_time": 0.5018513221740722, "loss": 0.09934047609567642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.24511854795511, "step_time": 0.4559849586486816} +{"epoch": 0, "iter": 9220, "iter_tflops": 45.62322148733022, "iter_time": 0.4522059783935547, "loss": 0.10811848938465118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03494980502794, "step_time": 0.4123336505889893} +{"epoch": 0, "iter": 9221, "iter_tflops": 35.42416737441976, "iter_time": 0.5824016494750976, "loss": 0.5362467169761658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.861124993462624, "step_time": 0.5308928527832031} +{"epoch": 0, "iter": 9222, "iter_tflops": 38.727927662905365, "iter_time": 0.5327187576293946, "loss": 0.5436884164810181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.439798001423256, "step_time": 0.4861261005401611} +{"epoch": 0, "iter": 9223, "iter_tflops": 36.867605666038436, "iter_time": 0.559599494934082, "loss": 0.5076107978820801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.303114974273754, "step_time": 0.5118982372283936} +{"epoch": 0, "iter": 9224, "iter_tflops": 40.20121622001279, "iter_time": 0.5131957550048829, "loss": 0.42947903275489807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.046705720856835, "step_time": 0.4683912944793701} +{"epoch": 0, "iter": 9225, "iter_tflops": 35.32604270325137, "iter_time": 0.5840193786621094, "loss": 0.5477884411811829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84199263560248, "step_time": 0.531154354095459} +{"epoch": 0, "iter": 9226, "iter_tflops": 34.87537304317912, "iter_time": 0.5915662460327148, "loss": 0.4843567907810211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.28591512920732, "step_time": 0.5388690185546875} +{"epoch": 0, "iter": 9227, "iter_tflops": 40.8534258536852, "iter_time": 0.5050027770996094, "loss": 0.5224190354347229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.38939030681898, "step_time": 0.46477532958984374} +{"epoch": 0, "iter": 9228, "iter_tflops": 39.91801121718263, "iter_time": 0.5168367080688477, "loss": 0.45249879360198975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64971877161767, "step_time": 0.47265123558044436} +{"epoch": 0, "iter": 9229, "iter_tflops": 35.8505346946138, "iter_time": 0.5754751968383789, "loss": 0.06757426261901855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.73949042813335, "step_time": 0.5191584815979005} +{"epoch": 0, "iter": 9230, "iter_tflops": 38.247703512398395, "iter_time": 0.5394073791503906, "loss": 0.06830399483442307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67676136780945, "step_time": 0.48342687797546385} +{"epoch": 0, "iter": 9231, "iter_tflops": 45.68826216639581, "iter_time": 0.4515622291564942, "loss": 0.042918138206005096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.15717948196685, "step_time": 0.41132882118225095} +{"epoch": 0, "iter": 9232, "iter_tflops": 47.687043232444346, "iter_time": 0.4326352005004882, "loss": 0.06910411268472672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38914001912102, "step_time": 0.3938047752380371} +{"epoch": 0, "iter": 9233, "iter_tflops": 19.83078504533265, "iter_time": 1.0403568725585939, "loss": 0.5944080352783203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.35142573525661, "step_time": 0.9662630386352539} +{"epoch": 0, "iter": 9234, "iter_tflops": 13.430273800302155, "iter_time": 1.5361632843017579, "loss": 0.6858908534049988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.45587678539296, "step_time": 1.1178603839874268} +{"epoch": 0, "iter": 9235, "iter_tflops": 38.44723419039278, "iter_time": 0.5366080017089844, "loss": 0.5979063510894775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.158542864591254, "step_time": 0.48936922645568853} +{"epoch": 0, "iter": 9236, "iter_tflops": 37.48980460628104, "iter_time": 0.5503121109008788, "loss": 0.5559674501419067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.02171166361501, "step_time": 0.502931074142456} +{"epoch": 0, "iter": 9237, "iter_tflops": 20.78978917578256, "iter_time": 0.9923666534423827, "loss": 0.515687108039856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.325747025372774, "step_time": 0.9240942077636719} +{"epoch": 0, "iter": 9238, "iter_tflops": 18.914740950765534, "iter_time": 1.0907415313720703, "loss": 0.416473388671875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.372092331508764, "step_time": 0.9221798839569091} +{"epoch": 0, "iter": 9239, "iter_tflops": 46.525276583103334, "iter_time": 0.4434383850097656, "loss": 0.3667316138744354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41827256367474, "step_time": 0.40919873809814455} +{"epoch": 0, "iter": 9240, "iter_tflops": 48.38353698902219, "iter_time": 0.4264073028564453, "loss": 0.4311801493167877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4407743423046, "step_time": 0.39341702651977545} +{"epoch": 0, "iter": 9241, "iter_tflops": 34.13943269578691, "iter_time": 0.6043185806274414, "loss": 0.5284848809242249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.512494821583715, "step_time": 0.5650420112609863} +{"epoch": 0, "iter": 9242, "iter_tflops": 14.054563122348133, "iter_time": 1.467928482055664, "loss": 0.6103758811950684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.843208990713666, "step_time": 1.2248909053802488} +{"epoch": 0, "iter": 9243, "iter_tflops": 39.08987923629733, "iter_time": 0.5277860641479492, "loss": 0.8038985133171082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.745714460083825, "step_time": 0.48264706230163573} +{"epoch": 0, "iter": 9244, "iter_tflops": 36.35647986459398, "iter_time": 0.5674667510986329, "loss": 0.4981999397277832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.61575686613796, "step_time": 0.5207799911499024} +{"epoch": 0, "iter": 9245, "iter_tflops": 19.58798148918043, "iter_time": 1.0532526550292969, "loss": 0.7473464608192444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.853273499684384, "step_time": 0.9893455581665038} +{"epoch": 0, "iter": 9246, "iter_tflops": 14.731463888952089, "iter_time": 1.4004781646728515, "loss": 0.7406119704246521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46069007701479, "step_time": 1.181573776245117} +{"epoch": 0, "iter": 9247, "iter_tflops": 35.653867313264634, "iter_time": 0.578649528503418, "loss": 0.7251200675964355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5937401752793, "step_time": 0.5345709800720215} +{"epoch": 0, "iter": 9248, "iter_tflops": 39.66409662454138, "iter_time": 0.5201453018188475, "loss": 0.9321438074111938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99409882687843, "step_time": 0.4798587265014649} +{"epoch": 0, "iter": 9249, "iter_tflops": 17.23811723124857, "iter_time": 1.196829864501953, "loss": 0.2897299826145172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.326502290309648, "step_time": 1.1257518310546875} +{"epoch": 0, "iter": 9250, "iter_tflops": 39.691544177697416, "iter_time": 0.5197856101989746, "loss": 0.32484352588653564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.558571677059035, "step_time": 0.4080632190704345} +{"epoch": 0, "iter": 9251, "iter_tflops": 48.55903091705478, "iter_time": 0.42486625289916996, "loss": 0.3127039670944214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34862764504103, "step_time": 0.39410953903198237} +{"epoch": 0, "iter": 9252, "iter_tflops": 45.92783084980388, "iter_time": 0.4492067909240723, "loss": 0.36963924765586853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.08005905621446, "step_time": 0.41196224403381343} +{"epoch": 0, "iter": 9253, "iter_tflops": 22.926992949571467, "iter_time": 0.8998604202270507, "loss": 0.72264164686203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.176740993667398, "step_time": 0.8533446884155272} +{"epoch": 0, "iter": 9254, "iter_tflops": 14.831993913368313, "iter_time": 1.3909858398437498, "loss": 0.6689856052398682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.81955917367132, "step_time": 1.0962580642700195} +{"epoch": 0, "iter": 9255, "iter_tflops": 42.72926905013464, "iter_time": 0.482832820892334, "loss": 0.7620962262153625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.301973812506816, "step_time": 0.44557697677612307} +{"epoch": 0, "iter": 9256, "iter_tflops": 44.40151965072037, "iter_time": 0.46464836502075196, "loss": 0.7079821228981018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.98710969671088, "step_time": 0.42992990493774413} +{"epoch": 0, "iter": 9257, "iter_tflops": 36.83980748681274, "iter_time": 0.5600217514038086, "loss": 0.39256444573402405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71394374696658, "step_time": 0.5194924392700195} +{"epoch": 0, "iter": 9258, "iter_tflops": 28.366915859471366, "iter_time": 0.7272942047119141, "loss": 0.3832671642303467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.55415430199786, "step_time": 0.6538312931060791} +{"epoch": 0, "iter": 9259, "iter_tflops": 39.94419084925784, "iter_time": 0.5164979705810546, "loss": 0.36557742953300476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82372844997637, "step_time": 0.470774492263794} +{"epoch": 0, "iter": 9260, "iter_tflops": 44.80315757175231, "iter_time": 0.46048302459716794, "loss": 0.32126063108444214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78158809417016, "step_time": 0.4229278774261474} +{"epoch": 0, "iter": 9261, "iter_tflops": 28.721238212945327, "iter_time": 0.7183218688964843, "loss": 0.6658427119255066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.543957512987905, "step_time": 0.6540426483154298} +{"epoch": 0, "iter": 9262, "iter_tflops": 11.005276372634127, "iter_time": 1.8746547393798827, "loss": 0.8182983994483948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.991839343510124, "step_time": 1.5880040512084963} +{"epoch": 0, "iter": 9263, "iter_tflops": 13.081894951454048, "iter_time": 1.577072250366211, "loss": 0.7313289046287537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.17768923874623, "step_time": 1.2752806167602537} +{"epoch": 0, "iter": 9264, "iter_tflops": 21.458369718595673, "iter_time": 0.9614473876953126, "loss": 0.7536834478378296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.557397121893437, "step_time": 0.8401172733306884} +{"epoch": 0, "iter": 9265, "iter_tflops": 28.28285235166219, "iter_time": 0.6111414947509766, "loss": 0.202705517411232, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 30.322750258481598, "step_time": 0.570028263092041} +{"epoch": 0, "iter": 9266, "iter_tflops": 29.681405064436607, "iter_time": 0.582345230102539, "loss": 0.18298836052417755, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 31.568684711763147, "step_time": 0.54753071975708} +{"epoch": 0, "iter": 9267, "iter_tflops": 32.14043816672738, "iter_time": 0.5377905731201171, "loss": 0.2611779570579529, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 34.166262109809296, "step_time": 0.5059032974243164} +{"epoch": 0, "iter": 9268, "iter_tflops": 30.964552958250064, "iter_time": 0.558213279724121, "loss": 0.30316871404647827, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 32.9756054087429, "step_time": 0.5241700477600097} +{"epoch": 0, "iter": 9269, "iter_tflops": 30.978373765203287, "iter_time": 0.6659837493896485, "loss": 0.8271792531013489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00726158107095, "step_time": 0.6250471115112305} +{"epoch": 0, "iter": 9270, "iter_tflops": 22.313166876162928, "iter_time": 0.9246152114868162, "loss": 0.8779496550559998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.87139855074239, "step_time": 0.740224552154541} +{"epoch": 0, "iter": 9271, "iter_tflops": 36.90422268384724, "iter_time": 0.5590442504882813, "loss": 0.8686026930809021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.18734688342112, "step_time": 0.5133728675842285} +{"epoch": 0, "iter": 9272, "iter_tflops": 38.975158319528944, "iter_time": 0.5293395690917968, "loss": 0.8479939699172974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16906629535316, "step_time": 0.4892471027374268} +{"epoch": 0, "iter": 9273, "iter_tflops": 17.06719879895372, "iter_time": 1.2088154449462891, "loss": 0.2142626792192459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.173229155417793, "step_time": 1.1352464294433595} +{"epoch": 0, "iter": 9274, "iter_tflops": 16.24773847974119, "iter_time": 1.269782470703125, "loss": 0.21649104356765747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.189305733198115, "step_time": 0.8896813793182373} +{"epoch": 0, "iter": 9275, "iter_tflops": 43.97585370595525, "iter_time": 0.46914594650268554, "loss": 0.272287517786026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.60305571963373, "step_time": 0.4333985118865968} +{"epoch": 0, "iter": 9276, "iter_tflops": 50.783138657966525, "iter_time": 0.40625873184204103, "loss": 0.318042516708374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.38561865641913, "step_time": 0.3724991073608399} +{"epoch": 0, "iter": 9277, "iter_tflops": 24.63949956688503, "iter_time": 0.8373178787231444, "loss": 0.4912625551223755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.269029950800288, "step_time": 0.7853770599365235} +{"epoch": 0, "iter": 9278, "iter_tflops": 20.897653223545447, "iter_time": 0.9872445144653321, "loss": 0.47522297501564026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.66681332999098, "step_time": 0.8038042449951172} +{"epoch": 0, "iter": 9279, "iter_tflops": 48.200322913615466, "iter_time": 0.42802811813354485, "loss": 0.5248327255249023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.48942833547894, "step_time": 0.39305235671997074} +{"epoch": 0, "iter": 9280, "iter_tflops": 50.44752083103077, "iter_time": 0.40896149444580077, "loss": 0.5556530356407166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.715577787185644, "step_time": 0.3770606899261475} +{"epoch": 0, "iter": 9281, "iter_tflops": 26.83334989376736, "iter_time": 0.7688601531982422, "loss": 0.16333723068237305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.29709237086194, "step_time": 0.7290888137817383} +{"epoch": 0, "iter": 9282, "iter_tflops": 12.733853759638974, "iter_time": 1.620176727294922, "loss": 0.1454925388097763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.376395104024004, "step_time": 1.3417379932403564} +{"epoch": 0, "iter": 9283, "iter_tflops": 37.45862738701063, "iter_time": 0.5507701416015625, "loss": 0.2084188312292099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39015808695348, "step_time": 0.4984540882110596} +{"epoch": 0, "iter": 9284, "iter_tflops": 38.46975072341145, "iter_time": 0.5362939224243164, "loss": 0.14191438257694244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26038715356176, "step_time": 0.48818988418579107} +{"epoch": 0, "iter": 9285, "iter_tflops": 20.465083680080934, "iter_time": 0.8949057312011719, "loss": 0.006126432213932276, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 21.98531702208278, "step_time": 0.833025089263916} +{"epoch": 0, "iter": 9286, "iter_tflops": 19.683672464151716, "iter_time": 0.9304320983886718, "loss": 0.0017503317212685943, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 22.380132911570755, "step_time": 0.8183293972015381} +{"epoch": 0, "iter": 9287, "iter_tflops": 38.40956860877083, "iter_time": 0.4768166198730469, "loss": 0.026414699852466583, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 42.54334860090569, "step_time": 0.4304861106872559} +{"epoch": 0, "iter": 9288, "iter_tflops": 42.27276315216121, "iter_time": 0.43324162673950195, "loss": 0.005019017495214939, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 46.85552424989323, "step_time": 0.3908679065704346} +{"epoch": 0, "iter": 9289, "iter_tflops": 35.782007374307696, "iter_time": 0.5765773086547852, "loss": 0.09508445113897324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85427791461016, "step_time": 0.5176632118225097} +{"epoch": 0, "iter": 9290, "iter_tflops": 39.065747891907485, "iter_time": 0.5281120834350586, "loss": 0.18820960819721222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61547831956772, "step_time": 0.4841220684051514} +{"epoch": 0, "iter": 9291, "iter_tflops": 37.13246085998121, "iter_time": 0.5556080322265625, "loss": 0.09476854652166367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.905032118668174, "step_time": 0.5043656597137451} +{"epoch": 0, "iter": 9292, "iter_tflops": 40.1755981936898, "iter_time": 0.5135229949951172, "loss": 0.19127695262432098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.744317603441175, "step_time": 0.4716291084289551} +{"epoch": 0, "iter": 9293, "iter_tflops": 15.482864655318885, "iter_time": 1.0898404388427734, "loss": 0.05784410238265991, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 16.382185347351573, "step_time": 1.0300122756958008} +{"epoch": 0, "iter": 9294, "iter_tflops": 22.280597317058966, "iter_time": 0.7573339157104493, "loss": 0.020733853802084923, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 28.883478696302642, "step_time": 0.5842042846679687} +{"epoch": 0, "iter": 9295, "iter_tflops": 34.8346839884828, "iter_time": 0.48439802169799806, "loss": 0.025381937623023987, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 38.58620940230153, "step_time": 0.43730265998840334} +{"epoch": 0, "iter": 9296, "iter_tflops": 35.21155107878981, "iter_time": 0.4792135391235352, "loss": 0.01999589428305626, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 38.86025054490253, "step_time": 0.4342188167572022} +{"epoch": 0, "iter": 9297, "iter_tflops": 19.004828985352475, "iter_time": 1.0855711212158203, "loss": 0.007826397195458412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.127048172138053, "step_time": 1.025043182373047} +{"epoch": 0, "iter": 9298, "iter_tflops": 16.27878215705524, "iter_time": 1.2673609924316405, "loss": 0.00573959294706583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.19166423809022, "step_time": 0.7876969299316406} +{"epoch": 0, "iter": 9299, "iter_tflops": 54.34839611822469, "iter_time": 0.37960813903808593, "loss": 0.004725813400000334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.760442391855, "step_time": 0.34522993278503417} +{"epoch": 0, "iter": 9300, "iter_tflops": 54.08039349820216, "iter_time": 0.3814893379211426, "loss": 0.0064118956215679646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.398049137054166, "step_time": 0.34733621406555176} +{"epoch": 0, "iter": 9301, "iter_tflops": 10.535639528803461, "iter_time": 0.8689576797485352, "loss": 0.008866751566529274, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 11.028278717434347, "step_time": 0.830140869140625} +{"epoch": 0, "iter": 9302, "iter_tflops": 7.198435153410072, "iter_time": 1.271807647705078, "loss": 0.0014816391048952937, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 8.855658088422306, "step_time": 1.0338051433563233} +{"epoch": 0, "iter": 9303, "iter_tflops": 25.473277605084935, "iter_time": 0.35939720916748047, "loss": 0.007866956293582916, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 28.137232149412792, "step_time": 0.32537048530578605} +{"epoch": 0, "iter": 9304, "iter_tflops": 25.951870905942652, "iter_time": 0.352769359588623, "loss": 0.0028675163630396128, "lr": 3e-05, "seqlen": 3696.0, "step_tflops": 28.429734808787288, "step_time": 0.3220228729248047} +{"epoch": 0, "iter": 9305, "iter_tflops": 32.248221563210876, "iter_time": 0.6397591094970703, "loss": 0.1834588199853897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23371439350793, "step_time": 0.6026542510986328} +{"epoch": 0, "iter": 9306, "iter_tflops": 12.868622465880048, "iter_time": 1.6032091674804687, "loss": 0.3202201724052429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.11187602086992, "step_time": 1.2804898376464844} +{"epoch": 0, "iter": 9307, "iter_tflops": 48.39296707056487, "iter_time": 0.4263242111206055, "loss": 0.3243560194969177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72403770993392, "step_time": 0.3913033676147461} +{"epoch": 0, "iter": 9308, "iter_tflops": 51.89819439545819, "iter_time": 0.3975300827026367, "loss": 0.30667561292648315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18844028524402, "step_time": 0.3671768321990967} +{"epoch": 0, "iter": 9309, "iter_tflops": 44.34083043028366, "iter_time": 0.46528432846069334, "loss": 0.5723108053207397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.2593769402173, "step_time": 0.42750434875488286} +{"epoch": 0, "iter": 9310, "iter_tflops": 47.898342452583435, "iter_time": 0.4307266693115234, "loss": 0.6166818141937256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.97045773093933, "step_time": 0.3969773292541504} +{"epoch": 0, "iter": 9311, "iter_tflops": 47.376693859391125, "iter_time": 0.4354692535400391, "loss": 0.7429065704345703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.03106448981995, "step_time": 0.40428499221801756} +{"epoch": 0, "iter": 9312, "iter_tflops": 46.90720820893186, "iter_time": 0.43982778549194335, "loss": 0.6290560960769653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90922323069347, "step_time": 0.4052525691986084} +{"epoch": 0, "iter": 9313, "iter_tflops": 27.089847720228015, "iter_time": 0.7615802688598634, "loss": 0.359806627035141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.702625016920134, "step_time": 0.7187876892089844} +{"epoch": 0, "iter": 9314, "iter_tflops": 12.749542026745894, "iter_time": 1.6181831054687497, "loss": 0.38876956701278687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.512727652980384, "step_time": 1.329946220397949} +{"epoch": 0, "iter": 9315, "iter_tflops": 10.156760277060142, "iter_time": 2.0312671508789064, "loss": 0.43907541036605835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.803148285232483, "step_time": 1.7479314002990725} +{"epoch": 0, "iter": 9316, "iter_tflops": 36.1741374905715, "iter_time": 0.5703271713256837, "loss": 0.3077167570590973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49738536973427, "step_time": 0.5223407402038575} +{"epoch": 0, "iter": 9317, "iter_tflops": 12.61667672420768, "iter_time": 1.0943133392333984, "loss": 0.32746753096580505, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 13.532133161155967, "step_time": 1.0202824249267577} +{"epoch": 0, "iter": 9318, "iter_tflops": 17.21833394812527, "iter_time": 0.8018544464111327, "loss": 0.32797667384147644, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 20.58902269422685, "step_time": 0.67058052444458} +{"epoch": 0, "iter": 9319, "iter_tflops": 20.90183116425549, "iter_time": 0.6605448837280273, "loss": 0.16982148587703705, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 22.415565499850928, "step_time": 0.6159379577636719} +{"epoch": 0, "iter": 9320, "iter_tflops": 22.00667056122527, "iter_time": 0.6273823928833008, "loss": 0.4007982015609741, "lr": 3e-05, "seqlen": 5536.0, "step_tflops": 23.623477840886313, "step_time": 0.5844439048767089} +{"epoch": 0, "iter": 9321, "iter_tflops": 20.85383233359519, "iter_time": 0.9893190460205077, "loss": 0.7691256999969482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18631251274137, "step_time": 0.929901870727539} +{"epoch": 0, "iter": 9322, "iter_tflops": 9.833907516049369, "iter_time": 2.0979548034667967, "loss": 0.6979304552078247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.639889924842937, "step_time": 1.9390326080322264} +{"epoch": 0, "iter": 9323, "iter_tflops": 12.60186001374974, "iter_time": 1.637146697998047, "loss": 0.8439815640449524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.806008464206696, "step_time": 1.3934271049499511} +{"epoch": 0, "iter": 9324, "iter_tflops": 36.734125371341946, "iter_time": 0.5616329040527345, "loss": 0.82686448097229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.490325385181556, "step_time": 0.45352705955505374} +{"epoch": 0, "iter": 9325, "iter_tflops": 16.533897235604456, "iter_time": 0.9041184234619141, "loss": 0.20271247625350952, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.28658143517332, "step_time": 0.864751724243164} +{"epoch": 0, "iter": 9326, "iter_tflops": 10.48299429389114, "iter_time": 1.425985809326172, "loss": 0.18190054595470428, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 12.58198337311585, "step_time": 1.188095760345459} +{"epoch": 0, "iter": 9327, "iter_tflops": 25.516998530371218, "iter_time": 0.5858291320800781, "loss": 0.2523972690105438, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.261563019203102, "step_time": 0.5483398399353028} +{"epoch": 0, "iter": 9328, "iter_tflops": 26.3781797146357, "iter_time": 0.5667032852172851, "loss": 0.1797681599855423, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.131784625752235, "step_time": 0.5313776321411133} +{"epoch": 0, "iter": 9329, "iter_tflops": 31.872763480840824, "iter_time": 0.64729541015625, "loss": 1.0675230026245117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.92500223810449, "step_time": 0.6081383094787598} +{"epoch": 0, "iter": 9330, "iter_tflops": 15.629961570246824, "iter_time": 1.3199708404541015, "loss": 0.8655162453651428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.140597464821436, "step_time": 1.02435359954834} +{"epoch": 0, "iter": 9331, "iter_tflops": 42.43224137886329, "iter_time": 0.4862126731872558, "loss": 0.616438627243042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74796828935914, "step_time": 0.45097289085388187} +{"epoch": 0, "iter": 9332, "iter_tflops": 42.35663314603625, "iter_time": 0.487080581665039, "loss": 0.7477725148200989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19200173513726, "step_time": 0.4565209045410156} +{"epoch": 0, "iter": 9333, "iter_tflops": 38.33763219698029, "iter_time": 0.53814208984375, "loss": 0.892822802066803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45690593749468, "step_time": 0.49765155029296876} +{"epoch": 0, "iter": 9334, "iter_tflops": 40.57964392344006, "iter_time": 0.5084099197387696, "loss": 0.6605952978134155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59274667906272, "step_time": 0.4732689514160156} +{"epoch": 0, "iter": 9335, "iter_tflops": 43.09461700714031, "iter_time": 0.47873945617675784, "loss": 0.7492075562477112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4098726363445, "step_time": 0.4445410499572754} +{"epoch": 0, "iter": 9336, "iter_tflops": 45.437889626662106, "iter_time": 0.45405043411254886, "loss": 0.9817010164260864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00006993174582, "step_time": 0.4210421237945557} +{"epoch": 0, "iter": 9337, "iter_tflops": 28.775083369810982, "iter_time": 0.7169777145385741, "loss": 0.19166581332683563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.420298910513182, "step_time": 0.6782015380859375} +{"epoch": 0, "iter": 9338, "iter_tflops": 17.084757807643182, "iter_time": 1.2075730743408202, "loss": 0.290040522813797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.592178390306984, "step_time": 1.0018898010253905} +{"epoch": 0, "iter": 9339, "iter_tflops": 49.443708433548395, "iter_time": 0.41726428222656253, "loss": 0.24721835553646088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.514569695017016, "step_time": 0.38552292633056645} +{"epoch": 0, "iter": 9340, "iter_tflops": 50.528107105856634, "iter_time": 0.40830924987792966, "loss": 0.28127795457839966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.6045566436144, "step_time": 0.3778273239135742} +{"epoch": 0, "iter": 9341, "iter_tflops": 34.76994095077884, "iter_time": 0.5933600387573242, "loss": 0.05663009732961655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18654666891243, "step_time": 0.5547999305725098} +{"epoch": 0, "iter": 9342, "iter_tflops": 16.121440360929256, "iter_time": 1.2797301635742189, "loss": 0.075632244348526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91372227504119, "step_time": 1.090800277709961} +{"epoch": 0, "iter": 9343, "iter_tflops": 41.13463581812284, "iter_time": 0.5015504112243652, "loss": 0.066253162920475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45588632630045, "step_time": 0.45387066841125495} +{"epoch": 0, "iter": 9344, "iter_tflops": 36.00472716138541, "iter_time": 0.5730106887817381, "loss": 0.05276717618107796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66419218305493, "step_time": 0.5201440486907959} +{"epoch": 0, "iter": 9345, "iter_tflops": 20.50381334188394, "iter_time": 1.006207633972168, "loss": 0.6144279837608337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.83782723141128, "step_time": 0.9447411270141601} +{"epoch": 0, "iter": 9346, "iter_tflops": 21.71445429419847, "iter_time": 0.9501087722778321, "loss": 0.6825400590896606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.49782830357096, "step_time": 0.8421600990295409} +{"epoch": 0, "iter": 9347, "iter_tflops": 33.91889525518552, "iter_time": 0.608247802734375, "loss": 0.607046902179718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.077969098308, "step_time": 0.5564245834350586} +{"epoch": 0, "iter": 9348, "iter_tflops": 36.716895734990764, "iter_time": 0.5618964538574219, "loss": 0.6193892359733582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05393453192506, "step_time": 0.5150828189849853} +{"epoch": 0, "iter": 9349, "iter_tflops": 26.662601994604447, "iter_time": 0.7737839508056641, "loss": 0.3432764708995819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.49429545133441, "step_time": 0.724042942047119} +{"epoch": 0, "iter": 9350, "iter_tflops": 14.752613026325298, "iter_time": 1.398470458984375, "loss": 0.25782573223114014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.357149748709706, "step_time": 1.2612890281677247} +{"epoch": 0, "iter": 9351, "iter_tflops": 36.924385325100346, "iter_time": 0.5587389831542968, "loss": 0.2592155337333679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.39397994671005, "step_time": 0.5107467384338379} +{"epoch": 0, "iter": 9352, "iter_tflops": 34.45403439069705, "iter_time": 0.5988005142211914, "loss": 0.2351105809211731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6059147206118, "step_time": 0.548612995147705} +{"epoch": 0, "iter": 9353, "iter_tflops": 23.202792125372664, "iter_time": 0.8891642608642579, "loss": 0.7949187159538269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.7540538752229, "step_time": 0.8334430236816407} +{"epoch": 0, "iter": 9354, "iter_tflops": 23.3983723351527, "iter_time": 0.8817319946289062, "loss": 0.8800670504570007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.96505190580516, "step_time": 0.7377455825805664} +{"epoch": 0, "iter": 9355, "iter_tflops": 43.63530833848588, "iter_time": 0.4728073272705078, "loss": 0.6999207735061646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10482417654314, "step_time": 0.43798260307312015} +{"epoch": 0, "iter": 9356, "iter_tflops": 45.56418484705352, "iter_time": 0.45279189300537115, "loss": 0.8334406614303589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87456318661662, "step_time": 0.42212333297729493} +{"epoch": 0, "iter": 9357, "iter_tflops": 35.89210278536099, "iter_time": 0.5748087158203125, "loss": 0.2817396819591522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39848533342173, "step_time": 0.5372892532348632} +{"epoch": 0, "iter": 9358, "iter_tflops": 9.089112756349516, "iter_time": 2.2698688049316407, "loss": 0.2646075487136841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.253697686963338, "step_time": 2.012063758850098} +{"epoch": 0, "iter": 9359, "iter_tflops": 13.32553438742898, "iter_time": 1.5482376098632813, "loss": 0.22611333429813385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.628281322779374, "step_time": 1.3201127548217773} +{"epoch": 0, "iter": 9360, "iter_tflops": 34.5184890622586, "iter_time": 0.5976824035644531, "loss": 0.32109010219573975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33274488417266, "step_time": 0.4991464653015137} +{"epoch": 0, "iter": 9361, "iter_tflops": 13.188370232943043, "iter_time": 1.1799722137451172, "loss": 0.15946725010871887, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.028208595576688, "step_time": 1.1093298416137696} +{"epoch": 0, "iter": 9362, "iter_tflops": 11.483699576772832, "iter_time": 1.3551304016113284, "loss": 0.331903338432312, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 13.593385961336294, "step_time": 1.1448148727416991} +{"epoch": 0, "iter": 9363, "iter_tflops": 24.00446177697372, "iter_time": 0.6482924118041993, "loss": 0.19222216308116913, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 25.847440665436157, "step_time": 0.6020677490234375} +{"epoch": 0, "iter": 9364, "iter_tflops": 24.485036287340037, "iter_time": 0.6355681991577149, "loss": 0.2755529582500458, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 26.28178951640578, "step_time": 0.5921176109313964} +{"epoch": 0, "iter": 9365, "iter_tflops": 23.913277720553133, "iter_time": 0.8627463684082031, "loss": 0.42851537466049194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.829612149024523, "step_time": 0.7987380294799804} +{"epoch": 0, "iter": 9366, "iter_tflops": 15.891408440233288, "iter_time": 1.2982545623779298, "loss": 0.35653015971183777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.5109727847905, "step_time": 1.005856412887573} +{"epoch": 0, "iter": 9367, "iter_tflops": 37.49458587298275, "iter_time": 0.5502419357299805, "loss": 0.44943374395370483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.895517050616526, "step_time": 0.5044830093383789} +{"epoch": 0, "iter": 9368, "iter_tflops": 38.050110018892454, "iter_time": 0.542208511352539, "loss": 0.32239651679992676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5283574880431, "step_time": 0.496795316696167} +{"epoch": 0, "iter": 9369, "iter_tflops": 30.553311830535556, "iter_time": 0.6752490081787109, "loss": 0.2191314697265625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.7489616679984, "step_time": 0.6113104667663575} +{"epoch": 0, "iter": 9370, "iter_tflops": 40.364222270243744, "iter_time": 0.5111232757568359, "loss": 0.32549214363098145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.240311812444254, "step_time": 0.4663415031433105} +{"epoch": 0, "iter": 9371, "iter_tflops": 40.87391600258657, "iter_time": 0.5047496185302734, "loss": 0.31635111570358276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.78963881270473, "step_time": 0.46062201118469237} +{"epoch": 0, "iter": 9372, "iter_tflops": 39.90471903401532, "iter_time": 0.5170088653564453, "loss": 0.25372394919395447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.55932511157055, "step_time": 0.4736320743560791} +{"epoch": 0, "iter": 9373, "iter_tflops": 39.053298017228414, "iter_time": 0.5282804412841797, "loss": 0.0373091921210289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75486961480856, "step_time": 0.4715153694152832} +{"epoch": 0, "iter": 9374, "iter_tflops": 40.58402612313691, "iter_time": 0.5083550224304199, "loss": 0.025209151208400726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58389521433524, "step_time": 0.46274766731262207} +{"epoch": 0, "iter": 9375, "iter_tflops": 41.104645763296766, "iter_time": 0.5019163436889648, "loss": 0.03320184722542763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.424591972838556, "step_time": 0.45418335342407223} +{"epoch": 0, "iter": 9376, "iter_tflops": 41.83320458174484, "iter_time": 0.4931750679016113, "loss": 0.04802202805876732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82486342291872, "step_time": 0.45021614837646484} +{"epoch": 0, "iter": 9377, "iter_tflops": 19.753277117142122, "iter_time": 1.0444390258789062, "loss": 0.6526660323143005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.82323869330106, "step_time": 0.990772560119629} +{"epoch": 0, "iter": 9378, "iter_tflops": 17.68621648417705, "iter_time": 1.1665068969726562, "loss": 0.8254120349884033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.627090821661742, "step_time": 0.9539467735290528} +{"epoch": 0, "iter": 9379, "iter_tflops": 38.64437553277192, "iter_time": 0.5338705368041993, "loss": 0.8897457718849182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16515925223957, "step_time": 0.48929243659973143} +{"epoch": 0, "iter": 9380, "iter_tflops": 37.7064171128228, "iter_time": 0.5471507263183594, "loss": 0.6501072645187378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.86004761399403, "step_time": 0.5049209365844726} +{"epoch": 0, "iter": 9381, "iter_tflops": 24.19016988474638, "iter_time": 0.852870964050293, "loss": 0.10904797911643982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.96575278400913, "step_time": 0.7945501785278322} +{"epoch": 0, "iter": 9382, "iter_tflops": 8.139744372126296, "iter_time": 2.534611968994141, "loss": 0.24767237901687622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.305970537414298, "step_time": 2.21697386932373} +{"epoch": 0, "iter": 9383, "iter_tflops": 13.245386362692154, "iter_time": 1.5576060180664064, "loss": 0.1251678168773651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.35373396394101, "step_time": 1.0660006771087644} +{"epoch": 0, "iter": 9384, "iter_tflops": 38.390220025966045, "iter_time": 0.5374049301147461, "loss": 0.14221033453941345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95386289281642, "step_time": 0.4917567081451416} +{"epoch": 0, "iter": 9385, "iter_tflops": 11.490221386377165, "iter_time": 1.1909656982421877, "loss": 0.3327192962169647, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 12.088595668535751, "step_time": 1.1320139999389647} +{"epoch": 0, "iter": 9386, "iter_tflops": 9.761479263224368, "iter_time": 1.4018837890625, "loss": 0.19240538775920868, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 11.415732286727378, "step_time": 1.19873690032959} +{"epoch": 0, "iter": 9387, "iter_tflops": 22.904254141649066, "iter_time": 0.5974636611938476, "loss": 0.09340539574623108, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 24.301635078579682, "step_time": 0.5631085929870605} +{"epoch": 0, "iter": 9388, "iter_tflops": 24.303466372617518, "iter_time": 0.563066162109375, "loss": 0.15123000741004944, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 25.83832980309158, "step_time": 0.529618579864502} +{"epoch": 0, "iter": 9389, "iter_tflops": 20.22261435061469, "iter_time": 1.0201991271972657, "loss": 0.16748633980751038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.244166287418977, "step_time": 0.9711415939331055} +{"epoch": 0, "iter": 9390, "iter_tflops": 16.30026470077397, "iter_time": 1.2656907043457033, "loss": 0.1510394811630249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.991058692461515, "step_time": 1.0320160541534424} +{"epoch": 0, "iter": 9391, "iter_tflops": 40.18302305625098, "iter_time": 0.513428108215332, "loss": 0.14728641510009766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.98175483249384, "step_time": 0.46908300018310545} +{"epoch": 0, "iter": 9392, "iter_tflops": 38.56394373847359, "iter_time": 0.534984016418457, "loss": 0.20212288200855255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.344138026924306, "step_time": 0.48722431182861325} +{"epoch": 0, "iter": 9393, "iter_tflops": 30.97513757138606, "iter_time": 0.6660533294677735, "loss": 0.4748888611793518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.1179861001791, "step_time": 0.6046984558105469} +{"epoch": 0, "iter": 9394, "iter_tflops": 39.945537936241756, "iter_time": 0.5164805526733399, "loss": 0.47483736276626587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27177119748946, "step_time": 0.46601012229919425} +{"epoch": 0, "iter": 9395, "iter_tflops": 38.942632035088415, "iter_time": 0.5297816925048828, "loss": 0.4372173547744751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47543272330021, "step_time": 0.4857182655334473} +{"epoch": 0, "iter": 9396, "iter_tflops": 41.952226766304946, "iter_time": 0.4917758865356445, "loss": 0.4866490662097931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.938356654563336, "step_time": 0.4491038646697998} +{"epoch": 0, "iter": 9397, "iter_tflops": 16.06691620831761, "iter_time": 1.2840730133056641, "loss": 0.052187249064445496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.221657560389385, "step_time": 1.1979737396240235} +{"epoch": 0, "iter": 9398, "iter_tflops": 23.19594765666007, "iter_time": 0.8894266281127929, "loss": 0.10166259855031967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.560312864830934, "step_time": 0.6336270046234131} +{"epoch": 0, "iter": 9399, "iter_tflops": 55.2558493218421, "iter_time": 0.37337392807006836, "loss": 0.05589014291763306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.32249703443254, "step_time": 0.3420132541656494} +{"epoch": 0, "iter": 9400, "iter_tflops": 50.399438022614234, "iter_time": 0.40935165786743166, "loss": 0.0886036679148674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.03625559691181, "step_time": 0.37486368370056156} +{"epoch": 0, "iter": 9401, "iter_tflops": 39.472469270013185, "iter_time": 0.5226704559326171, "loss": 0.653567373752594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.880207557227884, "step_time": 0.48113324737548835} +{"epoch": 0, "iter": 9402, "iter_tflops": 17.089808769320417, "iter_time": 1.2072161712646485, "loss": 0.43600162863731384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.658567117454893, "step_time": 0.9986701107025147} +{"epoch": 0, "iter": 9403, "iter_tflops": 38.31173884059834, "iter_time": 0.5385057983398438, "loss": 0.40634456276893616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.908181696123584, "step_time": 0.4922927379608154} +{"epoch": 0, "iter": 9404, "iter_tflops": 45.61153417244234, "iter_time": 0.45232184982299806, "loss": 0.6812559962272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.786928062310416, "step_time": 0.4143877582550049} +{"epoch": 0, "iter": 9405, "iter_tflops": 15.629675556695885, "iter_time": 1.3199949951171874, "loss": 0.7907487750053406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.73213279258053, "step_time": 1.2330223388671875} +{"epoch": 0, "iter": 9406, "iter_tflops": 16.76333465201456, "iter_time": 1.2307272949218748, "loss": 0.7066716551780701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.534336883925178, "step_time": 0.8409069137573242} +{"epoch": 0, "iter": 9407, "iter_tflops": 33.47087133875771, "iter_time": 0.6163894958496094, "loss": 0.7376319766044617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.25556126783158, "step_time": 0.569046314239502} +{"epoch": 0, "iter": 9408, "iter_tflops": 35.40763487005317, "iter_time": 0.582673583984375, "loss": 0.6048585176467896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.418924013896316, "step_time": 0.53700341796875} +{"epoch": 0, "iter": 9409, "iter_tflops": 24.20509405840804, "iter_time": 0.8523451080322266, "loss": 0.8126589059829712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.687253664636863, "step_time": 0.8031646270751952} +{"epoch": 0, "iter": 9410, "iter_tflops": 8.558086344687043, "iter_time": 2.410713409423828, "loss": 0.9124463200569153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.787180436950809, "step_time": 2.1079710998535157} +{"epoch": 0, "iter": 9411, "iter_tflops": 15.264887947347422, "iter_time": 1.3515391387939453, "loss": 0.7722401022911072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.065935428897806, "step_time": 1.1419886665344239} +{"epoch": 0, "iter": 9412, "iter_tflops": 33.60210484655072, "iter_time": 0.613982177734375, "loss": 0.858515739440918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.59217255614476, "step_time": 0.563811658859253} +{"epoch": 0, "iter": 9413, "iter_tflops": 14.062439789400933, "iter_time": 1.1211840209960937, "loss": 0.21851694583892822, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 14.93367148035137, "step_time": 1.0557740478515625} +{"epoch": 0, "iter": 9414, "iter_tflops": 16.57048440532965, "iter_time": 0.9514859313964843, "loss": 0.3028953969478607, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 18.241881086781724, "step_time": 0.8643068504333496} +{"epoch": 0, "iter": 9415, "iter_tflops": 28.196369841332864, "iter_time": 0.5591706619262695, "loss": 0.2096705138683319, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 30.03722134123628, "step_time": 0.5249015083312988} +{"epoch": 0, "iter": 9416, "iter_tflops": 26.449758903706243, "iter_time": 0.5960955200195313, "loss": 0.1329629123210907, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 28.05354108097376, "step_time": 0.562017562866211} +{"epoch": 0, "iter": 9417, "iter_tflops": 33.85137403115793, "iter_time": 0.6094610366821289, "loss": 0.039204999804496765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.41186968013484, "step_time": 0.5666035194396973} +{"epoch": 0, "iter": 9418, "iter_tflops": 28.436469956151786, "iter_time": 0.7255152816772461, "loss": 0.028742652386426926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.56788655494648, "step_time": 0.5641861057281494} +{"epoch": 0, "iter": 9419, "iter_tflops": 57.54184489483654, "iter_time": 0.35854070281982425, "loss": 0.02949278987944126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.10812284886922, "step_time": 0.3269166088104248} +{"epoch": 0, "iter": 9420, "iter_tflops": 52.32892505224304, "iter_time": 0.394257926940918, "loss": 0.017444679513573647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.33744382128998, "step_time": 0.35981885719299317} +{"epoch": 0, "iter": 9421, "iter_tflops": 41.84994224283283, "iter_time": 0.49297782516479494, "loss": 0.5300583243370056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.801165629891976, "step_time": 0.45044909286499024} +{"epoch": 0, "iter": 9422, "iter_tflops": 43.24377272742036, "iter_time": 0.47708819580078127, "loss": 0.625552773475647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.2956416779314, "step_time": 0.4362155323028565} +{"epoch": 0, "iter": 9423, "iter_tflops": 45.40879214699675, "iter_time": 0.4543413848876953, "loss": 0.6378394961357117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.051315676922535, "step_time": 0.4206022453308106} +{"epoch": 0, "iter": 9424, "iter_tflops": 47.481709549243114, "iter_time": 0.43450612258911137, "loss": 0.6995558142662048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15881564570656, "step_time": 0.40327543258666987} +{"epoch": 0, "iter": 9425, "iter_tflops": 26.329100366420327, "iter_time": 0.783585205078125, "loss": 0.61700838804245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.77970732119674, "step_time": 0.7426677780151367} +{"epoch": 0, "iter": 9426, "iter_tflops": 13.440701111327641, "iter_time": 1.5349715270996096, "loss": 0.5631940960884094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.11286131772922, "step_time": 1.1390300598144532} +{"epoch": 0, "iter": 9427, "iter_tflops": 37.16304977934216, "iter_time": 0.5551507110595704, "loss": 0.49614062905311584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.42069734640421, "step_time": 0.510409143447876} +{"epoch": 0, "iter": 9428, "iter_tflops": 39.117293243338295, "iter_time": 0.5274161834716797, "loss": 0.47354087233543396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49682930514345, "step_time": 0.48547371292114255} +{"epoch": 0, "iter": 9429, "iter_tflops": 2.449991166405846, "iter_time": 0.6076423263549804, "loss": 0.26156020164489746, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.7077843104950703, "step_time": 0.5497920665740966} +{"epoch": 0, "iter": 9430, "iter_tflops": 2.6728723318258654, "iter_time": 0.5569732284545899, "loss": 1.0422347784042358, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.958388909161087, "step_time": 0.5032192783355712} +{"epoch": 0, "iter": 9431, "iter_tflops": 2.999346086976882, "iter_time": 0.49634763336181636, "loss": 1.3332504034042358, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.27317169176291, "step_time": 0.4548243942260742} +{"epoch": 0, "iter": 9432, "iter_tflops": 2.935194624236387, "iter_time": 0.5071957817077637, "loss": 1.0924394130706787, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.2110404176530243, "step_time": 0.4636249122619629} +{"epoch": 0, "iter": 9433, "iter_tflops": 31.08241703796725, "iter_time": 0.6637544784545898, "loss": 0.26254701614379883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.94577728310786, "step_time": 0.6077661247253419} +{"epoch": 0, "iter": 9434, "iter_tflops": 33.08952072766237, "iter_time": 0.6234932708740235, "loss": 0.32757577300071716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.21860312230633, "step_time": 0.5696269798278809} +{"epoch": 0, "iter": 9435, "iter_tflops": 36.37336206483111, "iter_time": 0.5672033691406251, "loss": 0.4039369225502014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.01620100168104, "step_time": 0.5155685195922851} +{"epoch": 0, "iter": 9436, "iter_tflops": 42.60020702677213, "iter_time": 0.48429561614990235, "loss": 0.3059427738189697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.573736907793865, "step_time": 0.44297698402404784} +{"epoch": 0, "iter": 9437, "iter_tflops": 23.8623499281643, "iter_time": 0.8645876693725585, "loss": 0.005488076247274876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.37074949541625, "step_time": 0.8131842346191406} +{"epoch": 0, "iter": 9438, "iter_tflops": 12.621495481786557, "iter_time": 1.634599761962891, "loss": 0.0035600392147898674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.669003891484913, "step_time": 1.4064413414001464} +{"epoch": 0, "iter": 9439, "iter_tflops": 54.9973311601767, "iter_time": 0.37512899398803706, "loss": 0.016514554619789124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.581594172966064, "step_time": 0.34055052185058593} +{"epoch": 0, "iter": 9440, "iter_tflops": 58.88074252101342, "iter_time": 0.3503877944946289, "loss": 0.005182371940463781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.28524237975077, "step_time": 0.320930477142334} +{"epoch": 0, "iter": 9441, "iter_tflops": 25.062671664352035, "iter_time": 0.8231801376342772, "loss": 0.19587014615535736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.400120712560266, "step_time": 0.7814772415161134} +{"epoch": 0, "iter": 9442, "iter_tflops": 14.229409647922841, "iter_time": 1.4498910369873048, "loss": 0.17039908468723297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.376184551952726, "step_time": 1.1227082233428956} +{"epoch": 0, "iter": 9443, "iter_tflops": 40.94618240837785, "iter_time": 0.5038587799072266, "loss": 0.15075355768203735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.75115426356861, "step_time": 0.46101813125610347} +{"epoch": 0, "iter": 9444, "iter_tflops": 41.53339943414364, "iter_time": 0.49673500823974615, "loss": 0.13698264956474304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.559883213513494, "step_time": 0.452834644317627} +{"epoch": 0, "iter": 9445, "iter_tflops": 18.04041518060415, "iter_time": 1.1436041412353515, "loss": 0.5758703947067261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.16818237093436, "step_time": 1.076319763183594} +{"epoch": 0, "iter": 9446, "iter_tflops": 15.896520106539914, "iter_time": 1.2978370971679687, "loss": 0.7192838191986084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.438357399891043, "step_time": 0.8802277889251708} +{"epoch": 0, "iter": 9447, "iter_tflops": 41.41398225768607, "iter_time": 0.49816734313964844, "loss": 0.695273756980896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26581395796171, "step_time": 0.4557764835357666} +{"epoch": 0, "iter": 9448, "iter_tflops": 40.222856182671784, "iter_time": 0.5129196548461914, "loss": 0.689843475818634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.877980429388266, "step_time": 0.47019241333007816} +{"epoch": 0, "iter": 9449, "iter_tflops": 13.852741421448405, "iter_time": 1.4893148498535156, "loss": 0.6606801152229309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.812674802512431, "step_time": 1.3928000030517578} +{"epoch": 0, "iter": 9450, "iter_tflops": 27.845347003679205, "iter_time": 0.7409170913696289, "loss": 0.6571836471557617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.366422337312684, "step_time": 0.5673116073608399} +{"epoch": 0, "iter": 9451, "iter_tflops": 49.4355503244908, "iter_time": 0.4173331413269043, "loss": 0.6972021460533142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84981398682558, "step_time": 0.3831228370666504} +{"epoch": 0, "iter": 9452, "iter_tflops": 43.92979671073824, "iter_time": 0.46963780975341796, "loss": 0.7080551981925964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.3436513687427, "step_time": 0.4357731800079346} +{"epoch": 0, "iter": 9453, "iter_tflops": 25.841954515702987, "iter_time": 0.7983565444946289, "loss": 0.19576701521873474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46652562351622, "step_time": 0.7511359024047851} +{"epoch": 0, "iter": 9454, "iter_tflops": 15.002828336336101, "iter_time": 1.375146942138672, "loss": 0.1705663651227951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.82783521753239, "step_time": 1.1572405319213865} +{"epoch": 0, "iter": 9455, "iter_tflops": 48.70093184703826, "iter_time": 0.42362831115722654, "loss": 0.18109560012817383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94811211944001, "step_time": 0.38964738655090325} +{"epoch": 0, "iter": 9456, "iter_tflops": 52.142770764234356, "iter_time": 0.39566546249389645, "loss": 0.20824116468429565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.83620322543646, "step_time": 0.3629921131134033} +{"epoch": 0, "iter": 9457, "iter_tflops": 34.818136560043484, "iter_time": 0.592538703918457, "loss": 0.7988983392715454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.56921426024505, "step_time": 0.5491489219665527} +{"epoch": 0, "iter": 9458, "iter_tflops": 35.28504169649391, "iter_time": 0.5846980056762696, "loss": 0.5687196254730225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.28546197394481, "step_time": 0.5388753967285156} +{"epoch": 0, "iter": 9459, "iter_tflops": 39.244206290877585, "iter_time": 0.5257105560302734, "loss": 0.592471718788147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.5081666291921, "step_time": 0.4853442325592041} +{"epoch": 0, "iter": 9460, "iter_tflops": 39.11788683459603, "iter_time": 0.5274081802368163, "loss": 0.7908433079719543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.41590900027586, "step_time": 0.4863998908996582} +{"epoch": 0, "iter": 9461, "iter_tflops": 11.263631527195052, "iter_time": 0.9916677474975586, "loss": 0.013346036896109581, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 12.21782084759631, "step_time": 0.9142203216552736} +{"epoch": 0, "iter": 9462, "iter_tflops": 10.843866293994434, "iter_time": 1.0300551300048828, "loss": 0.0008389365393668413, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 13.070070854600843, "step_time": 0.85460746383667} +{"epoch": 0, "iter": 9463, "iter_tflops": 30.715433680462674, "iter_time": 0.3636536674499512, "loss": 0.00590152945369482, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 33.82212041981703, "step_time": 0.33025073432922364} +{"epoch": 0, "iter": 9464, "iter_tflops": 30.13169546203931, "iter_time": 0.3706986923217773, "loss": 0.011827483773231506, "lr": 3e-05, "seqlen": 4496.0, "step_tflops": 33.13045934422739, "step_time": 0.3371453437805176} +{"epoch": 0, "iter": 9465, "iter_tflops": 32.35872268903198, "iter_time": 0.6375744094848632, "loss": 0.0926453024148941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.65238119664534, "step_time": 0.5953730392456056} +{"epoch": 0, "iter": 9466, "iter_tflops": 40.754381555721615, "iter_time": 0.5062300720214844, "loss": 0.06782132387161255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.544559465600244, "step_time": 0.452987003326416} +{"epoch": 0, "iter": 9467, "iter_tflops": 42.42423468577898, "iter_time": 0.48630443572998044, "loss": 0.0576050728559494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.77614898480515, "step_time": 0.4410601119995118} +{"epoch": 0, "iter": 9468, "iter_tflops": 46.59155864534715, "iter_time": 0.4428075408935547, "loss": 0.10850703716278076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97729250531102, "step_time": 0.4047114410400391} +{"epoch": 0, "iter": 9469, "iter_tflops": 20.12322241547796, "iter_time": 1.0252380599975586, "loss": 0.3800217807292938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.719167512856057, "step_time": 0.9499025917053223} +{"epoch": 0, "iter": 9470, "iter_tflops": 15.127131935105211, "iter_time": 1.3638470001220702, "loss": 0.3099537491798401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.922076384174353, "step_time": 1.0355895195007325} +{"epoch": 0, "iter": 9471, "iter_tflops": 38.45561977146211, "iter_time": 0.5364909896850585, "loss": 0.4168066382408142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10540680307998, "step_time": 0.4899867992401123} +{"epoch": 0, "iter": 9472, "iter_tflops": 40.31198104722973, "iter_time": 0.5117856521606445, "loss": 0.42697107791900635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.187908701643806, "step_time": 0.46689454460144036} +{"epoch": 0, "iter": 9473, "iter_tflops": 22.749388543523278, "iter_time": 0.906885627746582, "loss": 0.23396934568881989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.621588560519704, "step_time": 0.837926986694336} +{"epoch": 0, "iter": 9474, "iter_tflops": 40.71415342994085, "iter_time": 0.5067302589416504, "loss": 0.22886793315410614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.12311213865545, "step_time": 0.45721787643432615} +{"epoch": 0, "iter": 9475, "iter_tflops": 48.735216275316546, "iter_time": 0.4233302955627441, "loss": 0.21238426864147186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95432264594315, "step_time": 0.38960168838500975} +{"epoch": 0, "iter": 9476, "iter_tflops": 44.40838335687552, "iter_time": 0.4645765495300293, "loss": 0.16313444077968597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.862012650442416, "step_time": 0.4310536136627197} +{"epoch": 0, "iter": 9477, "iter_tflops": 29.188300894699477, "iter_time": 0.706827491760254, "loss": 0.8449845314025879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.95797965978142, "step_time": 0.6664224777221679} +{"epoch": 0, "iter": 9478, "iter_tflops": 13.788765834805835, "iter_time": 1.496224807739258, "loss": 0.6315594911575317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.29496295625487, "step_time": 1.127692554473877} +{"epoch": 0, "iter": 9479, "iter_tflops": 41.52380613374115, "iter_time": 0.4968497695922852, "loss": 0.5453189611434937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.641718955328116, "step_time": 0.4621482772827149} +{"epoch": 0, "iter": 9480, "iter_tflops": 47.57691042522932, "iter_time": 0.43363668060302735, "loss": 0.7099261283874512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40981286365612, "step_time": 0.40130652809143064} +{"epoch": 0, "iter": 9481, "iter_tflops": 21.878102275336325, "iter_time": 0.9430019683837891, "loss": 0.5978334546089172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.776169494295495, "step_time": 0.9058192825317383} +{"epoch": 0, "iter": 9482, "iter_tflops": 14.066667349298317, "iter_time": 1.4666653442382813, "loss": 0.6750571131706238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.161047198833412, "step_time": 1.1360079231262206} +{"epoch": 0, "iter": 9483, "iter_tflops": 34.24610941387013, "iter_time": 0.6024361267089844, "loss": 0.7979728579521179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.06055210789661, "step_time": 0.5566860809326172} +{"epoch": 0, "iter": 9484, "iter_tflops": 37.12972601904538, "iter_time": 0.5556489562988282, "loss": 0.8775538206100464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.348787164276025, "step_time": 0.5113188018798828} +{"epoch": 0, "iter": 9485, "iter_tflops": 17.5155598127717, "iter_time": 1.1778723449707031, "loss": 0.07715295255184174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.59863485751455, "step_time": 1.1092799911499025} +{"epoch": 0, "iter": 9486, "iter_tflops": 37.99473383091705, "iter_time": 0.542998764038086, "loss": 0.03748982772231102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.8326339755379, "step_time": 0.4140076866149902} +{"epoch": 0, "iter": 9487, "iter_tflops": 56.730121833827745, "iter_time": 0.36367088317871094, "loss": 0.021475132554769516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.96060179831, "step_time": 0.33297116088867185} +{"epoch": 0, "iter": 9488, "iter_tflops": 50.88501400387809, "iter_time": 0.40544537353515625, "loss": 0.025837576016783714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.41045190978456, "step_time": 0.37233216476440434} +{"epoch": 0, "iter": 9489, "iter_tflops": 32.64976483541488, "iter_time": 0.6318910293579102, "loss": 0.050095219165086746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.923940966991, "step_time": 0.5907435684204101} +{"epoch": 0, "iter": 9490, "iter_tflops": 22.14447108026532, "iter_time": 0.9316588973999023, "loss": 0.06318388134241104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.437307239067998, "step_time": 0.7803780212402344} +{"epoch": 0, "iter": 9491, "iter_tflops": 49.92158835710781, "iter_time": 0.41326997375488284, "loss": 0.02389495074748993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.64710926024732, "step_time": 0.37753311729431155} +{"epoch": 0, "iter": 9492, "iter_tflops": 59.51728915540867, "iter_time": 0.34664034271240235, "loss": 0.017461301758885384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.26779501106577, "step_time": 0.31609913444519044} +{"epoch": 0, "iter": 9493, "iter_tflops": 20.335740126188913, "iter_time": 1.0145238571166992, "loss": 0.12931141257286072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.207177943093747, "step_time": 0.972835403442383} +{"epoch": 0, "iter": 9494, "iter_tflops": 12.081283612888116, "iter_time": 1.7076905212402342, "loss": 0.12899072468280792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.25778513175698, "step_time": 1.1954658927917479} +{"epoch": 0, "iter": 9495, "iter_tflops": 38.523658984671606, "iter_time": 0.5355434570312501, "loss": 0.07649847120046616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.124464577313994, "step_time": 0.4897651214599609} +{"epoch": 0, "iter": 9496, "iter_tflops": 42.6571519125372, "iter_time": 0.48364910888671875, "loss": 0.10230530053377151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.56107115863716, "step_time": 0.443097484588623} +{"epoch": 0, "iter": 9497, "iter_tflops": 20.22192793387342, "iter_time": 1.0202337570190427, "loss": 0.08448374271392822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.84221237417063, "step_time": 0.944551456451416} +{"epoch": 0, "iter": 9498, "iter_tflops": 28.36545872422943, "iter_time": 0.7273315658569336, "loss": 0.04701346904039383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.257497259172425, "step_time": 0.6395751457214355} +{"epoch": 0, "iter": 9499, "iter_tflops": 54.013592920695544, "iter_time": 0.38196113967895506, "loss": 0.09588096290826797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.08179106779429, "step_time": 0.3491954650878906} +{"epoch": 0, "iter": 9500, "iter_tflops": 56.05947313451635, "iter_time": 0.36802153778076174, "loss": 0.11633778363466263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.07630572556303, "step_time": 0.3377920989990234} +{"epoch": 0, "iter": 9501, "iter_tflops": 38.0138919486423, "iter_time": 0.5427251052856445, "loss": 0.13055512309074402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.08920651425358, "step_time": 0.50210493850708} +{"epoch": 0, "iter": 9502, "iter_tflops": 50.045962012043404, "iter_time": 0.412242919921875, "loss": 0.0032628520857542753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.59565912879952, "step_time": 0.3778888988494873} +{"epoch": 0, "iter": 9503, "iter_tflops": 56.634551243210346, "iter_time": 0.3642845764160156, "loss": 0.007962214760482311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.03203219479071, "step_time": 0.3325877418518066} +{"epoch": 0, "iter": 9504, "iter_tflops": 53.177545285310394, "iter_time": 0.38796626281738283, "loss": 0.00685161305591464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.626131776050926, "step_time": 0.35190951347351074} +{"epoch": 0, "iter": 9505, "iter_tflops": 43.05535628328736, "iter_time": 0.4791760025024414, "loss": 0.6493134498596191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12121396608412, "step_time": 0.4378302631378174} +{"epoch": 0, "iter": 9506, "iter_tflops": 43.30350774118627, "iter_time": 0.47643007659912107, "loss": 0.5085994601249695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.975122739393576, "step_time": 0.43919190216064447} +{"epoch": 0, "iter": 9507, "iter_tflops": 48.30995147522628, "iter_time": 0.4270568046569824, "loss": 0.47805675864219666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.49732308603781, "step_time": 0.3929932479858398} +{"epoch": 0, "iter": 9508, "iter_tflops": 51.81547519885624, "iter_time": 0.3981647071838379, "loss": 0.5904740691184998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.09700026420148, "step_time": 0.36777534294128417} +{"epoch": 0, "iter": 9509, "iter_tflops": 44.585358954169244, "iter_time": 0.4627324752807617, "loss": 0.1807977259159088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6334595447108, "step_time": 0.42421603775024413} +{"epoch": 0, "iter": 9510, "iter_tflops": 37.42889810532853, "iter_time": 0.5512076110839844, "loss": 0.3004254400730133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.45340521022316, "step_time": 0.4976935768127442} +{"epoch": 0, "iter": 9511, "iter_tflops": 39.168939621879325, "iter_time": 0.5267207565307618, "loss": 0.3121705651283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.83452134303333, "step_time": 0.48164641189575197} +{"epoch": 0, "iter": 9512, "iter_tflops": 43.35108681500518, "iter_time": 0.4759071807861328, "loss": 0.1973397582769394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.41124982004367, "step_time": 0.4351518592834473} +{"epoch": 0, "iter": 9513, "iter_tflops": 18.377702200027073, "iter_time": 1.122615509033203, "loss": 0.05617660656571388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.492107641396277, "step_time": 1.0584331817626953} +{"epoch": 0, "iter": 9514, "iter_tflops": 26.164339491967063, "iter_time": 0.7885195617675782, "loss": 0.021929267793893814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.24554868974918, "step_time": 0.7054438858032226} +{"epoch": 0, "iter": 9515, "iter_tflops": 51.905253222435014, "iter_time": 0.39747602081298833, "loss": 0.021174298599362373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.860142383857315, "step_time": 0.36283928680419925} +{"epoch": 0, "iter": 9516, "iter_tflops": 50.95796460659944, "iter_time": 0.4048649444580078, "loss": 0.028445439413189888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66684800172598, "step_time": 0.37061723899841303} +{"epoch": 0, "iter": 9517, "iter_tflops": 46.91871791892711, "iter_time": 0.4397198905944824, "loss": 0.006726588122546673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.44123746470503, "step_time": 0.4010613765716553} +{"epoch": 0, "iter": 9518, "iter_tflops": 31.963358152790796, "iter_time": 0.6454607620239258, "loss": 0.0056254565715789795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.35054386526735, "step_time": 0.5675594177246094} +{"epoch": 0, "iter": 9519, "iter_tflops": 54.1811484583662, "iter_time": 0.3807799224853516, "loss": 0.002302041742950678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.46755061046271, "step_time": 0.34119281005859375} +{"epoch": 0, "iter": 9520, "iter_tflops": 57.61567553321643, "iter_time": 0.35808125686645514, "loss": 0.0062897661700844765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.313893750198915, "step_time": 0.3258541259765625} +{"epoch": 0, "iter": 9521, "iter_tflops": 20.277333325927415, "iter_time": 0.7815862426757813, "loss": 0.20217572152614594, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 21.344828836985123, "step_time": 0.7424976272583008} +{"epoch": 0, "iter": 9522, "iter_tflops": 6.32721702572878, "iter_time": 2.5048113098144524, "loss": 0.2737683653831482, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 7.877225672900579, "step_time": 2.0119373779296876} +{"epoch": 0, "iter": 9523, "iter_tflops": 12.058714213661741, "iter_time": 1.3142765045166014, "loss": 0.23811863362789154, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 13.469394141984493, "step_time": 1.1766293716430662} +{"epoch": 0, "iter": 9524, "iter_tflops": 17.037295211025096, "iter_time": 0.9302230529785157, "loss": 0.190572589635849, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 20.318473028290544, "step_time": 0.7800037307739258} +{"epoch": 0, "iter": 9525, "iter_tflops": 26.944546546052443, "iter_time": 0.6537108535766601, "loss": 0.21568670868873596, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 28.795062357252768, "step_time": 0.6117000999450684} +{"epoch": 0, "iter": 9526, "iter_tflops": 16.077623716751745, "iter_time": 1.0955563354492188, "loss": 0.14383040368556976, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 18.081385699325747, "step_time": 0.9741478233337402} +{"epoch": 0, "iter": 9527, "iter_tflops": 31.313349838397656, "iter_time": 0.5625058517456055, "loss": 0.22094683349132538, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 33.4877331798357, "step_time": 0.5259819297790527} +{"epoch": 0, "iter": 9528, "iter_tflops": 26.457415052039515, "iter_time": 0.6657469177246094, "loss": 0.27005675435066223, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 28.424446008580375, "step_time": 0.6196758422851563} +{"epoch": 0, "iter": 9529, "iter_tflops": 30.79056756895949, "iter_time": 0.6700458984375, "loss": 0.4550478458404541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.24124181342163, "step_time": 0.6025217666625976} +{"epoch": 0, "iter": 9530, "iter_tflops": 35.775086032598715, "iter_time": 0.5766888580322265, "loss": 0.5947154760360718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.199386449078034, "step_time": 0.5263116436004638} +{"epoch": 0, "iter": 9531, "iter_tflops": 35.542207384884975, "iter_time": 0.5804674224853515, "loss": 0.5601502060890198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92160545873415, "step_time": 0.5300678958892822} +{"epoch": 0, "iter": 9532, "iter_tflops": 41.891317118274785, "iter_time": 0.4924909248352051, "loss": 0.6268428564071655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71175128535364, "step_time": 0.451330192565918} +{"epoch": 0, "iter": 9533, "iter_tflops": 30.565437338235736, "iter_time": 0.6749811325073242, "loss": 0.7411896586418152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57092395959189, "step_time": 0.614552448272705} +{"epoch": 0, "iter": 9534, "iter_tflops": 36.01340808857623, "iter_time": 0.5728725662231444, "loss": 0.8249525427818298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.25591582346816, "step_time": 0.5255537433624267} +{"epoch": 0, "iter": 9535, "iter_tflops": 41.013015242297826, "iter_time": 0.5030377159118652, "loss": 0.5926305651664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72115856099636, "step_time": 0.46132734870910647} +{"epoch": 0, "iter": 9536, "iter_tflops": 36.46701690607705, "iter_time": 0.5657466735839843, "loss": 0.9481247663497925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86223451059481, "step_time": 0.5175598850250244} +{"epoch": 0, "iter": 9537, "iter_tflops": 2.392365749407274, "iter_time": 0.7536476974487305, "loss": 1.6271886825561523, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 2.585961400394349, "step_time": 0.6972265472412109} +{"epoch": 0, "iter": 9538, "iter_tflops": 0.8666683392409984, "iter_time": 2.0803816833496094, "loss": 2.0228750705718994, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 1.0893147973883888, "step_time": 1.6551697845458986} +{"epoch": 0, "iter": 9539, "iter_tflops": 1.2461636676333607, "iter_time": 1.4468412017822265, "loss": 1.8119230270385742, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 1.4892092779618251, "step_time": 1.2107102508544922} +{"epoch": 0, "iter": 9540, "iter_tflops": 4.248416084008929, "iter_time": 0.4243936805725098, "loss": 1.9751684665679932, "lr": 3e-05, "seqlen": 736.0, "step_tflops": 4.618525411639574, "step_time": 0.39038454437255854} +{"epoch": 0, "iter": 9541, "iter_tflops": 20.410829153093225, "iter_time": 0.7443988342285156, "loss": 0.3083844482898712, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 21.578233815406573, "step_time": 0.7041260910034179} +{"epoch": 0, "iter": 9542, "iter_tflops": 10.539795609031794, "iter_time": 1.441564712524414, "loss": 0.15538282692432404, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 11.425723691397232, "step_time": 1.3297886276245119} +{"epoch": 0, "iter": 9543, "iter_tflops": 22.155964407654213, "iter_time": 0.6857655639648438, "loss": 0.22250068187713623, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.776659643378512, "step_time": 0.6390215301513672} +{"epoch": 0, "iter": 9544, "iter_tflops": 22.489014483997288, "iter_time": 0.6756097488403321, "loss": 0.17120452225208282, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 24.019134441915583, "step_time": 0.6325705642700195} +{"epoch": 0, "iter": 9545, "iter_tflops": 32.34272783682984, "iter_time": 0.6378897171020508, "loss": 0.34053924679756165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.69198873652011, "step_time": 0.5780314922332763} +{"epoch": 0, "iter": 9546, "iter_tflops": 35.91505531412873, "iter_time": 0.5744413681030274, "loss": 0.3391669988632202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.005773464994014, "step_time": 0.5289240970611573} +{"epoch": 0, "iter": 9547, "iter_tflops": 41.70602887210498, "iter_time": 0.49467892456054685, "loss": 0.3446713984012604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50954962127297, "step_time": 0.4533354797363281} +{"epoch": 0, "iter": 9548, "iter_tflops": 42.15169776321434, "iter_time": 0.48944869613647457, "loss": 0.37622377276420593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.01136973188985, "step_time": 0.44839120483398437} +{"epoch": 0, "iter": 9549, "iter_tflops": 17.330335706597175, "iter_time": 1.1904612731933593, "loss": 0.9872668981552124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.651753702799684, "step_time": 1.1061208419799806} +{"epoch": 0, "iter": 9550, "iter_tflops": 33.28201002782756, "iter_time": 0.6198872451782227, "loss": 0.7933158874511719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.219567453028, "step_time": 0.5543077182769776} +{"epoch": 0, "iter": 9551, "iter_tflops": 49.76516716451019, "iter_time": 0.41456895828247076, "loss": 0.800308346748352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.98758712859312, "step_time": 0.3821451301574707} +{"epoch": 0, "iter": 9552, "iter_tflops": 49.19198830255506, "iter_time": 0.41939946365356445, "loss": 0.698535680770874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.017560307140805, "step_time": 0.38913698387146} +{"epoch": 0, "iter": 9553, "iter_tflops": 49.8789089655074, "iter_time": 0.413623592376709, "loss": 0.04490622878074646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.336669697128464, "step_time": 0.3728286075592041} +{"epoch": 0, "iter": 9554, "iter_tflops": 43.4434902611338, "iter_time": 0.47489493560791013, "loss": 0.038108211010694504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.53254755951554, "step_time": 0.4250980949401855} +{"epoch": 0, "iter": 9555, "iter_tflops": 51.49507359814928, "iter_time": 0.40064208221435543, "loss": 0.03238238766789436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18874033772522, "step_time": 0.3671748714447022} +{"epoch": 0, "iter": 9556, "iter_tflops": 56.38082076570577, "iter_time": 0.36592396545410155, "loss": 0.06901014596223831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.56878412137691, "step_time": 0.33509015655517577} +{"epoch": 0, "iter": 9557, "iter_tflops": 26.01352258780723, "iter_time": 0.7930911102294922, "loss": 0.11199166625738144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.602241123106797, "step_time": 0.7474426956176758} +{"epoch": 0, "iter": 9558, "iter_tflops": 16.351495109306867, "iter_time": 1.2617252044677736, "loss": 0.07578578591346741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.023451821499016, "step_time": 1.0303465003967285} +{"epoch": 0, "iter": 9559, "iter_tflops": 38.87347218266876, "iter_time": 0.5307242279052735, "loss": 0.07223176956176758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84970072768151, "step_time": 0.4814757900238037} +{"epoch": 0, "iter": 9560, "iter_tflops": 44.27516264210274, "iter_time": 0.4659744262695313, "loss": 0.10948711633682251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17880808466176, "step_time": 0.42821925926208493} +{"epoch": 0, "iter": 9561, "iter_tflops": 19.524109058750252, "iter_time": 1.0566983337402345, "loss": 0.5363110303878784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.04553669788131, "step_time": 0.9803073120117187} +{"epoch": 0, "iter": 9562, "iter_tflops": 13.614849451870311, "iter_time": 1.5153376159667968, "loss": 0.6526986956596375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.2951745402381, "step_time": 1.2660860710144044} +{"epoch": 0, "iter": 9563, "iter_tflops": 36.813166738320554, "iter_time": 0.5604270248413086, "loss": 0.5152066946029663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.160629505305444, "step_time": 0.5137143955230713} +{"epoch": 0, "iter": 9564, "iter_tflops": 39.33714470869638, "iter_time": 0.524468505859375, "loss": 0.8208602666854858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.0456696199733, "step_time": 0.4792838325500488} +{"epoch": 0, "iter": 9565, "iter_tflops": 19.189118073632297, "iter_time": 1.0751454772949218, "loss": 0.06272906064987183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.772638489418764, "step_time": 0.9931859893798828} +{"epoch": 0, "iter": 9566, "iter_tflops": 29.636072723110477, "iter_time": 0.6961480255126954, "loss": 0.0802944004535675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.80044980598358, "step_time": 0.6103792591094972} +{"epoch": 0, "iter": 9567, "iter_tflops": 48.69713742845915, "iter_time": 0.423661319732666, "loss": 0.04762277007102966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.64810181509537, "step_time": 0.39186775588989264} +{"epoch": 0, "iter": 9568, "iter_tflops": 51.820135593332765, "iter_time": 0.39812889862060546, "loss": 0.06233809515833855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.170748109726524, "step_time": 0.3672924823760987} +{"epoch": 0, "iter": 9569, "iter_tflops": 42.516904093209874, "iter_time": 0.4852444915771485, "loss": 0.40331050753593445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.363816073166205, "step_time": 0.44498264503479} +{"epoch": 0, "iter": 9570, "iter_tflops": 48.936757306342976, "iter_time": 0.4215868530273438, "loss": 0.41330602765083313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33807184365768, "step_time": 0.38679863739013676} +{"epoch": 0, "iter": 9571, "iter_tflops": 47.92477670405552, "iter_time": 0.43048908996582036, "loss": 0.3370794355869293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99292957959609, "step_time": 0.3968057518005371} +{"epoch": 0, "iter": 9572, "iter_tflops": 48.89511107166615, "iter_time": 0.4219459381103515, "loss": 0.43647027015686035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.1171679572673, "step_time": 0.3884072570800781} +{"epoch": 0, "iter": 9573, "iter_tflops": 33.18693104366105, "iter_time": 0.6216631927490235, "loss": 0.7877418994903564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.26580846665282, "step_time": 0.585016887664795} +{"epoch": 0, "iter": 9574, "iter_tflops": 32.57673670512965, "iter_time": 0.6333075561523437, "loss": 0.684589684009552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.14916698069373, "step_time": 0.5707211322784425} +{"epoch": 0, "iter": 9575, "iter_tflops": 39.18432880773064, "iter_time": 0.5265138931274413, "loss": 0.6843265891075134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63195755401477, "step_time": 0.4839349327087403} +{"epoch": 0, "iter": 9576, "iter_tflops": 35.69149350383111, "iter_time": 0.5780395126342772, "loss": 0.7106192111968994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92761503760085, "step_time": 0.5299860649108886} +{"epoch": 0, "iter": 9577, "iter_tflops": 20.803049749386393, "iter_time": 0.9917340850830079, "loss": 0.3952464163303375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.647630422692846, "step_time": 0.9109603576660156} +{"epoch": 0, "iter": 9578, "iter_tflops": 20.772058947471756, "iter_time": 0.9932136993408203, "loss": 0.5878055095672607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.333091230034736, "step_time": 0.8143930530548096} +{"epoch": 0, "iter": 9579, "iter_tflops": 47.06893803253351, "iter_time": 0.4383165283203125, "loss": 0.49149319529533386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05584814430356, "step_time": 0.40408874320983884} +{"epoch": 0, "iter": 9580, "iter_tflops": 48.43796355826029, "iter_time": 0.4259281768798828, "loss": 0.4698343575000763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6168617993295, "step_time": 0.3921004180908203} +{"epoch": 0, "iter": 9581, "iter_tflops": 38.65306944482062, "iter_time": 0.5337504577636719, "loss": 0.42192116379737854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80977765796028, "step_time": 0.4934514045715332} +{"epoch": 0, "iter": 9582, "iter_tflops": 19.356556587638156, "iter_time": 1.065845230102539, "loss": 0.33721375465393066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.035937314524055, "step_time": 0.8956046905517578} +{"epoch": 0, "iter": 9583, "iter_tflops": 35.58679060269596, "iter_time": 0.5797402114868164, "loss": 0.33169686794281006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.998108245748575, "step_time": 0.5290280590057373} +{"epoch": 0, "iter": 9584, "iter_tflops": 37.61107744464188, "iter_time": 0.5485376892089844, "loss": 0.3916650712490082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.26826728261429, "step_time": 0.49992633247375484} +{"epoch": 0, "iter": 9585, "iter_tflops": 20.69265159109459, "iter_time": 0.9970251235961912, "loss": 0.7238010764122009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.02828727630449, "step_time": 0.9365727462768555} +{"epoch": 0, "iter": 9586, "iter_tflops": 14.586483691701748, "iter_time": 1.4143980102539064, "loss": 0.7582527995109558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.646230248918503, "step_time": 1.239385326385498} +{"epoch": 0, "iter": 9587, "iter_tflops": 32.955971681873855, "iter_time": 0.6260198822021483, "loss": 0.6628923416137695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83032224988016, "step_time": 0.5757998313903808} +{"epoch": 0, "iter": 9588, "iter_tflops": 37.03175384544589, "iter_time": 0.5571189956665039, "loss": 0.5698414444923401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.02826665478908, "step_time": 0.5154131126403809} +{"epoch": 0, "iter": 9589, "iter_tflops": 22.61645890445925, "iter_time": 0.9122159042358399, "loss": 0.055259980261325836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.534922546793222, "step_time": 0.8408868408203125} +{"epoch": 0, "iter": 9590, "iter_tflops": 23.521071865423067, "iter_time": 0.8771323699951172, "loss": 0.11488577723503113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.73869406387075, "step_time": 0.7178855609893797} +{"epoch": 0, "iter": 9591, "iter_tflops": 43.128031102442414, "iter_time": 0.4783685455322266, "loss": 0.12408032268285751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.555362779769936, "step_time": 0.4338331642150879} +{"epoch": 0, "iter": 9592, "iter_tflops": 43.82294492536045, "iter_time": 0.4707829093933106, "loss": 0.09932774305343628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.908465981288856, "step_time": 0.4306356525421143} +{"epoch": 0, "iter": 9593, "iter_tflops": 15.840810326599335, "iter_time": 1.3024013977050781, "loss": 0.3273577392101288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.026958519660308, "step_time": 1.2116722717285155} +{"epoch": 0, "iter": 9594, "iter_tflops": 18.489744223564728, "iter_time": 1.1158128128051756, "loss": 0.342244029045105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.90272189538606, "step_time": 0.7668775520324707} +{"epoch": 0, "iter": 9595, "iter_tflops": 47.3342237076438, "iter_time": 0.4358599739074707, "loss": 0.29227572679519653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.06863537787297, "step_time": 0.4039875621795654} +{"epoch": 0, "iter": 9596, "iter_tflops": 40.235072839086705, "iter_time": 0.512763916015625, "loss": 0.2509596347808838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75353706118849, "step_time": 0.4715297298431397} +{"epoch": 0, "iter": 9597, "iter_tflops": 28.016790278611616, "iter_time": 0.7363831939697266, "loss": 0.40204566717147827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.813058929655753, "step_time": 0.6920153198242188} +{"epoch": 0, "iter": 9598, "iter_tflops": 8.296096625255116, "iter_time": 2.4868434448242187, "loss": 0.43155306577682495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.777924681130612, "step_time": 2.1099664993286136} +{"epoch": 0, "iter": 9599, "iter_tflops": 16.300930900705207, "iter_time": 1.2656389770507812, "loss": 0.5570240616798401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.9986250029462, "step_time": 1.0859256134033204} +{"epoch": 0, "iter": 9600, "iter_tflops": 48.96111105509231, "iter_time": 0.4213771514892578, "loss": 0.4922759234905243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24121034940901, "step_time": 0.38750233840942383} +{"epoch": 0, "iter": 9601, "iter_tflops": 17.670217859293288, "iter_time": 0.7836530303955078, "loss": 0.23831753432750702, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 18.701964666033202, "step_time": 0.7404205932617188} +{"epoch": 0, "iter": 9602, "iter_tflops": 8.56140906616672, "iter_time": 1.6174113006591797, "loss": 0.1496480256319046, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 12.162193538989774, "step_time": 1.1385544662475586} +{"epoch": 0, "iter": 9603, "iter_tflops": 24.542904573601216, "iter_time": 0.5642086791992188, "loss": 0.21531158685684204, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 26.150452644261936, "step_time": 0.5295250511169434} +{"epoch": 0, "iter": 9604, "iter_tflops": 25.647290280796412, "iter_time": 0.539913558959961, "loss": 0.15857069194316864, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 27.280762538428746, "step_time": 0.5075855102539063} +{"epoch": 0, "iter": 9605, "iter_tflops": 26.362297697802706, "iter_time": 0.7825984573364259, "loss": 0.6413894295692444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.776953679559067, "step_time": 0.7427414016723632} +{"epoch": 0, "iter": 9606, "iter_tflops": 13.274602531512523, "iter_time": 1.5541778717041013, "loss": 0.7429988384246826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.975160630188178, "step_time": 1.2153695602416992} +{"epoch": 0, "iter": 9607, "iter_tflops": 44.38877277427306, "iter_time": 0.464781795501709, "loss": 0.7248647809028625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.843669378555795, "step_time": 0.43121887969970707} +{"epoch": 0, "iter": 9608, "iter_tflops": 41.630912423254564, "iter_time": 0.4955714950561523, "loss": 0.7954973578453064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.8107304093539, "step_time": 0.4604052047729493} +{"epoch": 0, "iter": 9609, "iter_tflops": 42.07587522660895, "iter_time": 0.4903307037353515, "loss": 0.01825057528913021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.853877043983786, "step_time": 0.4499312782287598} +{"epoch": 0, "iter": 9610, "iter_tflops": 40.7920418696927, "iter_time": 0.5057627067565919, "loss": 0.06020137295126915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26433702131382, "step_time": 0.4557913551330566} +{"epoch": 0, "iter": 9611, "iter_tflops": 46.92907554298858, "iter_time": 0.4396228408813476, "loss": 0.02038983628153801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.687000357140136, "step_time": 0.3991543979644776} +{"epoch": 0, "iter": 9612, "iter_tflops": 43.22802423069275, "iter_time": 0.4772620048522949, "loss": 0.01966533623635769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.85260268174555, "step_time": 0.4311383781433105} +{"epoch": 0, "iter": 9613, "iter_tflops": 39.43564096891643, "iter_time": 0.5231585693359376, "loss": 0.005657748784869909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09371776094505, "step_time": 0.46789190292358396} +{"epoch": 0, "iter": 9614, "iter_tflops": 40.310031970159734, "iter_time": 0.5118103981018067, "loss": 0.005511696450412273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45840958142842, "step_time": 0.4640537910461426} +{"epoch": 0, "iter": 9615, "iter_tflops": 44.39869763737831, "iter_time": 0.4646778984069824, "loss": 0.0032198522239923477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35230837508885, "step_time": 0.418037052154541} +{"epoch": 0, "iter": 9616, "iter_tflops": 46.689132569095655, "iter_time": 0.4418821334838867, "loss": 0.006667559500783682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.76497116175181, "step_time": 0.39855317306518556} +{"epoch": 0, "iter": 9617, "iter_tflops": 24.939992711582004, "iter_time": 0.8272293319702149, "loss": 0.8624630570411682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.653965987904, "step_time": 0.7740346603393555} +{"epoch": 0, "iter": 9618, "iter_tflops": 25.304926719179562, "iter_time": 0.8152994766235352, "loss": 0.6808647513389587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.36165204376577, "step_time": 0.7274291877746583} +{"epoch": 0, "iter": 9619, "iter_tflops": 48.83835891724264, "iter_time": 0.42243625640869137, "loss": 0.756903350353241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95784317010733, "step_time": 0.38957578849792485} +{"epoch": 0, "iter": 9620, "iter_tflops": 47.528170692990024, "iter_time": 0.43408137130737307, "loss": 0.6584635376930237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55604935339506, "step_time": 0.4001682395935059} +{"epoch": 0, "iter": 9621, "iter_tflops": 23.425525618533985, "iter_time": 0.43031466293334963, "loss": 0.005807938519865274, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 25.833152980252013, "step_time": 0.3902097110748291} +{"epoch": 0, "iter": 9622, "iter_tflops": 22.730169593758067, "iter_time": 0.4434787483215331, "loss": 0.022228693589568138, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 25.79169522518275, "step_time": 0.3908369369506836} +{"epoch": 0, "iter": 9623, "iter_tflops": 26.444892515020108, "iter_time": 0.38118313980102536, "loss": 0.0013723886804655194, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 29.147426362056937, "step_time": 0.3458400421142578} +{"epoch": 0, "iter": 9624, "iter_tflops": 29.352045588609858, "iter_time": 0.34342911911010743, "loss": 0.020528897643089294, "lr": 3e-05, "seqlen": 4064.0, "step_tflops": 32.27971814369009, "step_time": 0.3122811393737793} +{"epoch": 0, "iter": 9625, "iter_tflops": 32.63727748375558, "iter_time": 0.632132797241211, "loss": 0.9637516140937805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.74624540038034, "step_time": 0.5937646865844727} +{"epoch": 0, "iter": 9626, "iter_tflops": 15.769365388295183, "iter_time": 1.3083020782470705, "loss": 0.9682108163833618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.023415550280035, "step_time": 1.0845104789733886} +{"epoch": 0, "iter": 9627, "iter_tflops": 45.56817399741548, "iter_time": 0.452752254486084, "loss": 0.7115039229393005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17647001009388, "step_time": 0.41953181076049795} +{"epoch": 0, "iter": 9628, "iter_tflops": 43.13717993200032, "iter_time": 0.47826708984375, "loss": 0.5597571730613708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.052802853008004, "step_time": 0.44798779296875} +{"epoch": 0, "iter": 9629, "iter_tflops": 26.20871964069407, "iter_time": 0.7871843338012695, "loss": 0.752273678779602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.965429606799617, "step_time": 0.7377356185913085} +{"epoch": 0, "iter": 9630, "iter_tflops": 16.699724015964417, "iter_time": 1.2354152374267577, "loss": 0.8485047221183777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14202316054165, "step_time": 1.0242810935974123} +{"epoch": 0, "iter": 9631, "iter_tflops": 39.13169503521437, "iter_time": 0.5272220764160156, "loss": 0.6857278347015381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1205999104553, "step_time": 0.48981005859375} +{"epoch": 0, "iter": 9632, "iter_tflops": 35.55636627531927, "iter_time": 0.5802362747192382, "loss": 0.5878933072090149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.782672432084865, "step_time": 0.5319667835235595} +{"epoch": 0, "iter": 9633, "iter_tflops": 20.1282248139681, "iter_time": 1.0249832611083984, "loss": 0.48186221718788147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.463959903941998, "step_time": 0.9611969833374023} +{"epoch": 0, "iter": 9634, "iter_tflops": 18.07835386406346, "iter_time": 1.1412042083740235, "loss": 0.5880345106124878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.831927120791107, "step_time": 0.9449964447021484} +{"epoch": 0, "iter": 9635, "iter_tflops": 41.49362178589892, "iter_time": 0.4972112007141113, "loss": 0.5104561448097229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.29435079616348, "step_time": 0.4554893302917481} +{"epoch": 0, "iter": 9636, "iter_tflops": 44.82844227853837, "iter_time": 0.4602232971191406, "loss": 0.40553373098373413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.738932518281324, "step_time": 0.42329801750183105} +{"epoch": 0, "iter": 9637, "iter_tflops": 33.9422729610658, "iter_time": 0.6078288726806641, "loss": 0.11356281489133835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.76133261197123, "step_time": 0.546355016708374} +{"epoch": 0, "iter": 9638, "iter_tflops": 35.53919358681361, "iter_time": 0.5805166473388672, "loss": 0.11324488371610641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99133757064945, "step_time": 0.5158890590667724} +{"epoch": 0, "iter": 9639, "iter_tflops": 42.59464495385455, "iter_time": 0.4843588562011719, "loss": 0.0790259838104248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.833969591990915, "step_time": 0.4405155849456787} +{"epoch": 0, "iter": 9640, "iter_tflops": 46.62437221999801, "iter_time": 0.44249589920043947, "loss": 0.2111690491437912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.018951080255256, "step_time": 0.40438098144531254} +{"epoch": 0, "iter": 9641, "iter_tflops": 28.867662861727688, "iter_time": 0.7146783447265626, "loss": 0.18186232447624207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.045257667854596, "step_time": 0.6645489540100098} +{"epoch": 0, "iter": 9642, "iter_tflops": 14.14497201783214, "iter_time": 1.4585460815429687, "loss": 0.27861788868904114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91276938339122, "step_time": 1.090855236053467} +{"epoch": 0, "iter": 9643, "iter_tflops": 49.273402787898185, "iter_time": 0.4187064895629883, "loss": 0.17095644772052765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57826771399287, "step_time": 0.38506458663940435} +{"epoch": 0, "iter": 9644, "iter_tflops": 48.640063670164814, "iter_time": 0.42415843963623046, "loss": 0.14143522083759308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66322677364404, "step_time": 0.3917552108764648} +{"epoch": 0, "iter": 9645, "iter_tflops": 25.17613662177315, "iter_time": 0.8194701919555665, "loss": 0.23229214549064636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.923601739600645, "step_time": 0.7662828216552734} +{"epoch": 0, "iter": 9646, "iter_tflops": 14.037018410771978, "iter_time": 1.4697632293701168, "loss": 0.17357373237609863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.01637114713216, "step_time": 1.084912223815918} +{"epoch": 0, "iter": 9647, "iter_tflops": 48.75891613165378, "iter_time": 0.42312453079223633, "loss": 0.2349243015050888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.968729707553095, "step_time": 0.38949571990966797} +{"epoch": 0, "iter": 9648, "iter_tflops": 49.64978417357088, "iter_time": 0.4155323905944825, "loss": 0.18823480606079102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.704552380584595, "step_time": 0.38415911865234376} +{"epoch": 0, "iter": 9649, "iter_tflops": 27.401267152782918, "iter_time": 0.7529247970581054, "loss": 1.1050169467926025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.94893556023507, "step_time": 0.7126719207763672} +{"epoch": 0, "iter": 9650, "iter_tflops": 12.351251916668414, "iter_time": 1.6703645629882815, "loss": 1.0316270589828491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.35697831247767, "step_time": 1.3434344367980957} +{"epoch": 0, "iter": 9651, "iter_tflops": 46.052718933496536, "iter_time": 0.4479886093139648, "loss": 0.8612663149833679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.90393320376996, "step_time": 0.413416181564331} +{"epoch": 0, "iter": 9652, "iter_tflops": 45.39367729305312, "iter_time": 0.4544926681518554, "loss": 0.9916171431541443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.07896451443088, "step_time": 0.4203652973175049} +{"epoch": 0, "iter": 9653, "iter_tflops": 36.158080275876316, "iter_time": 0.5705804443359376, "loss": 0.14364507794380188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.77774929615089, "step_time": 0.5320343208312988} +{"epoch": 0, "iter": 9654, "iter_tflops": 12.51163290825332, "iter_time": 1.6489529113769532, "loss": 0.1651967167854309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.290133042495995, "step_time": 1.2664779014587402} +{"epoch": 0, "iter": 9655, "iter_tflops": 45.974021213514675, "iter_time": 0.44875547027587886, "loss": 0.12470841407775879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.98632090291579, "step_time": 0.41273478698730476} +{"epoch": 0, "iter": 9656, "iter_tflops": 50.13458489454613, "iter_time": 0.41151419830322267, "loss": 0.10507321357727051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27953133281666, "step_time": 0.380089750289917} +{"epoch": 0, "iter": 9657, "iter_tflops": 24.752150352448886, "iter_time": 0.8335071182250977, "loss": 0.07654759287834167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.95336588742735, "step_time": 0.7949293975830078} +{"epoch": 0, "iter": 9658, "iter_tflops": 17.149281240976993, "iter_time": 1.2030296325683594, "loss": 0.07016046345233917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.426536796730886, "step_time": 0.919941125869751} +{"epoch": 0, "iter": 9659, "iter_tflops": 51.38047449366785, "iter_time": 0.40153567504882803, "loss": 0.06992055475711823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.09110695145924, "step_time": 0.3678139839172363} +{"epoch": 0, "iter": 9660, "iter_tflops": 50.71300691873287, "iter_time": 0.4068205528259278, "loss": 0.09587417542934418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.855571763617405, "step_time": 0.37609841346740724} +{"epoch": 0, "iter": 9661, "iter_tflops": 41.60747970822122, "iter_time": 0.4958505935668945, "loss": 0.3525603711605072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.444217751687184, "step_time": 0.45398720741271975} +{"epoch": 0, "iter": 9662, "iter_tflops": 42.476854866496055, "iter_time": 0.4857020034790039, "loss": 0.527079701423645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37659031253143, "step_time": 0.43547020530700675} +{"epoch": 0, "iter": 9663, "iter_tflops": 44.18178267118648, "iter_time": 0.46695928192138675, "loss": 0.5028370022773743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.367196822828454, "step_time": 0.43555656433105466} +{"epoch": 0, "iter": 9664, "iter_tflops": 49.616481755721416, "iter_time": 0.415811294555664, "loss": 0.6546852588653564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64870456569008, "step_time": 0.384559024810791} +{"epoch": 0, "iter": 9665, "iter_tflops": 38.29126490661409, "iter_time": 0.5387937316894532, "loss": 0.6125016808509827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.527950441658476, "step_time": 0.49680018615722654} +{"epoch": 0, "iter": 9666, "iter_tflops": 44.626806565831934, "iter_time": 0.4623027076721192, "loss": 0.6683974266052246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.445540325221785, "step_time": 0.42586156272888176} +{"epoch": 0, "iter": 9667, "iter_tflops": 47.904979681288935, "iter_time": 0.4306669921875, "loss": 0.6411133408546448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.80330314725862, "step_time": 0.3982582626342773} +{"epoch": 0, "iter": 9668, "iter_tflops": 49.91985441835158, "iter_time": 0.4132843284606933, "loss": 0.6965529918670654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.858589109795915, "step_time": 0.3830604152679444} +{"epoch": 0, "iter": 9669, "iter_tflops": 35.94954326434624, "iter_time": 0.5738902816772461, "loss": 0.4029907286167145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.538657243210515, "step_time": 0.5353350372314453} +{"epoch": 0, "iter": 9670, "iter_tflops": 45.38743922389139, "iter_time": 0.45455513381958007, "loss": 0.36507827043533325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.325503087626224, "step_time": 0.4182642288208007} +{"epoch": 0, "iter": 9671, "iter_tflops": 48.009157432391106, "iter_time": 0.42973246383667, "loss": 0.294268935918808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96874808192777, "step_time": 0.3969903888702393} +{"epoch": 0, "iter": 9672, "iter_tflops": 48.8043974181317, "iter_time": 0.4227302169799804, "loss": 0.29929351806640625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41220252246437, "step_time": 0.39363149261474606} +{"epoch": 0, "iter": 9673, "iter_tflops": 29.020755701008024, "iter_time": 0.7109082107543945, "loss": 0.7645414471626282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.69283195722646, "step_time": 0.6721795349121094} +{"epoch": 0, "iter": 9674, "iter_tflops": 15.175225683226659, "iter_time": 1.359524658203125, "loss": 0.6423779129981995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.932427957507542, "step_time": 0.9856044197082519} +{"epoch": 0, "iter": 9675, "iter_tflops": 42.79085769947451, "iter_time": 0.482137882232666, "loss": 0.6969442367553711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02176390348662, "step_time": 0.44828993415832524} +{"epoch": 0, "iter": 9676, "iter_tflops": 44.1862778034247, "iter_time": 0.46691177749633794, "loss": 0.8389325737953186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28944986914445, "step_time": 0.43627264785766606} +{"epoch": 0, "iter": 9677, "iter_tflops": 37.942964975557345, "iter_time": 0.5437396240234376, "loss": 0.5874016284942627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.149424812453496, "step_time": 0.5013701553344726} +{"epoch": 0, "iter": 9678, "iter_tflops": 45.85504920796741, "iter_time": 0.4499197769165039, "loss": 0.6871146559715271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.56924014230515, "step_time": 0.4162075805664063} +{"epoch": 0, "iter": 9679, "iter_tflops": 52.8799983417617, "iter_time": 0.390149284362793, "loss": 0.4773985743522644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.22550765689278, "step_time": 0.3605226821899413} +{"epoch": 0, "iter": 9680, "iter_tflops": 47.109241750982875, "iter_time": 0.43794153213500975, "loss": 0.497912734746933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.09763429638226, "step_time": 0.4037582912445068} +{"epoch": 0, "iter": 9681, "iter_tflops": 44.396893880076505, "iter_time": 0.46469677734375, "loss": 0.09569453448057175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.74838433780538, "step_time": 0.4232159442901612} +{"epoch": 0, "iter": 9682, "iter_tflops": 12.017877359254854, "iter_time": 1.7167002868652341, "loss": 0.07020218670368195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.066140553578416, "step_time": 1.5789737930297851} +{"epoch": 0, "iter": 9683, "iter_tflops": 9.937192502313755, "iter_time": 2.0761491241455077, "loss": 0.0955507755279541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.755272317840486, "step_time": 1.6174561386108397} +{"epoch": 0, "iter": 9684, "iter_tflops": 15.689643461948728, "iter_time": 1.3149497985839844, "loss": 0.0889625996351242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.93738666147914, "step_time": 1.0894372005462647} +{"epoch": 0, "iter": 9685, "iter_tflops": 11.491128496253369, "iter_time": 1.1802464752197266, "loss": 0.20538240671157837, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 12.426819907214187, "step_time": 1.091378486633301} +{"epoch": 0, "iter": 9686, "iter_tflops": 14.481612285128467, "iter_time": 0.936523063659668, "loss": 0.23914377391338348, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 17.810042362074352, "step_time": 0.7615009346008301} +{"epoch": 0, "iter": 9687, "iter_tflops": 23.087651671777238, "iter_time": 0.5874293365478515, "loss": 0.2956629693508148, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 24.544923774603443, "step_time": 0.5525527000427246} +{"epoch": 0, "iter": 9688, "iter_tflops": 23.280745808256256, "iter_time": 0.5825571060180664, "loss": 0.2463642656803131, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 24.72116109153674, "step_time": 0.5486135482788086} +{"epoch": 0, "iter": 9689, "iter_tflops": 30.6907223686508, "iter_time": 0.6722257385253906, "loss": 0.8929510712623596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83428858576163, "step_time": 0.6097687988281251} +{"epoch": 0, "iter": 9690, "iter_tflops": 23.94505563185514, "iter_time": 0.8616014022827149, "loss": 0.528786838054657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.68959927609054, "step_time": 0.6948929595947266} +{"epoch": 0, "iter": 9691, "iter_tflops": 41.71396975728931, "iter_time": 0.4945847549438477, "loss": 0.4444926083087921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.0836352983548, "step_time": 0.45761823272705077} +{"epoch": 0, "iter": 9692, "iter_tflops": 43.845694693599945, "iter_time": 0.4705386390686035, "loss": 0.6473550796508789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.247642991579184, "step_time": 0.43665868186950685} +{"epoch": 0, "iter": 9693, "iter_tflops": 34.28203928843075, "iter_time": 0.6018047332763672, "loss": 0.7517713308334351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.63570576041186, "step_time": 0.5631416969299317} +{"epoch": 0, "iter": 9694, "iter_tflops": 41.95357047973715, "iter_time": 0.49176013565063476, "loss": 0.6790819764137268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.51519482377845, "step_time": 0.45327925300598143} +{"epoch": 0, "iter": 9695, "iter_tflops": 45.30509352936637, "iter_time": 0.45538132476806636, "loss": 0.8078116774559021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22018727530959, "step_time": 0.4191591835021973} +{"epoch": 0, "iter": 9696, "iter_tflops": 48.43798221252803, "iter_time": 0.4259280128479004, "loss": 0.6816364526748657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40432065635187, "step_time": 0.3936906967163086} +{"epoch": 0, "iter": 9697, "iter_tflops": 25.06214949688042, "iter_time": 0.8231972885131835, "loss": 0.012515525333583355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.52723780403181, "step_time": 0.7777324447631837} +{"epoch": 0, "iter": 9698, "iter_tflops": 21.87838389494626, "iter_time": 0.9429898300170898, "loss": 0.009460648521780968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.018472177391114, "step_time": 0.7635921592712402} +{"epoch": 0, "iter": 9699, "iter_tflops": 51.88874478977763, "iter_time": 0.3976024780273438, "loss": 0.010818912647664547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.581636948934054, "step_time": 0.36462524986267086} +{"epoch": 0, "iter": 9700, "iter_tflops": 57.166364464006065, "iter_time": 0.36089567184448235, "loss": 0.00829610787332058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.262569911673694, "step_time": 0.3313562793731689} +{"epoch": 0, "iter": 9701, "iter_tflops": 29.985037507834246, "iter_time": 0.6880462799072266, "loss": 0.2781532406806946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.855430827389018, "step_time": 0.6476476058959961} +{"epoch": 0, "iter": 9702, "iter_tflops": 11.63028402842307, "iter_time": 1.7739114074707032, "loss": 0.32974690198898315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.197368022084659, "step_time": 1.4531632537841799} +{"epoch": 0, "iter": 9703, "iter_tflops": 42.482346841025034, "iter_time": 0.4856392135620117, "loss": 0.22703170776367188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03853505969231, "step_time": 0.4385998306274414} +{"epoch": 0, "iter": 9704, "iter_tflops": 45.82638434923546, "iter_time": 0.45020120620727533, "loss": 0.22526176273822784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.726448426400744, "step_time": 0.41489175605773926} +{"epoch": 0, "iter": 9705, "iter_tflops": 19.415580891985606, "iter_time": 0.6796774139404296, "loss": 0.006654755678027868, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 20.836448593230994, "step_time": 0.6333292236328125} +{"epoch": 0, "iter": 9706, "iter_tflops": 14.153514058922465, "iter_time": 0.9323714065551758, "loss": 0.004923176486045122, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 16.166323078197657, "step_time": 0.8162852954864502} +{"epoch": 0, "iter": 9707, "iter_tflops": 35.90626738661913, "iter_time": 0.3675216827392578, "loss": 0.010778367519378662, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 39.60276750997692, "step_time": 0.333217414855957} +{"epoch": 0, "iter": 9708, "iter_tflops": 34.697004952274064, "iter_time": 0.3803305740356445, "loss": 0.02084982581436634, "lr": 3e-05, "seqlen": 5296.0, "step_tflops": 38.26031730679693, "step_time": 0.3449091053009033} +{"epoch": 0, "iter": 9709, "iter_tflops": 36.931899287398146, "iter_time": 0.5586253051757812, "loss": 0.2856174409389496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.91604348650276, "step_time": 0.5168621864318848} +{"epoch": 0, "iter": 9710, "iter_tflops": 38.10582613418698, "iter_time": 0.5414157257080078, "loss": 0.3341822624206543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.4943817648576, "step_time": 0.4855016746520996} +{"epoch": 0, "iter": 9711, "iter_tflops": 36.98348463685006, "iter_time": 0.5578461227416993, "loss": 0.23062606155872345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.21417159734965, "step_time": 0.513030424118042} +{"epoch": 0, "iter": 9712, "iter_tflops": 40.667058047511745, "iter_time": 0.5073170890808105, "loss": 0.22366610169410706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27562278781522, "step_time": 0.46596958351135254} +{"epoch": 0, "iter": 9713, "iter_tflops": 25.758062870366235, "iter_time": 0.8009567184448242, "loss": 0.10680023580789566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.827667426649064, "step_time": 0.7413878135681152} +{"epoch": 0, "iter": 9714, "iter_tflops": 11.437828819070567, "iter_time": 1.8037595977783205, "loss": 0.13633981347084045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.84201845163615, "step_time": 1.4904685745239257} +{"epoch": 0, "iter": 9715, "iter_tflops": 13.625734901556646, "iter_time": 1.5141270294189453, "loss": 0.08829884976148605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.265218468863278, "step_time": 1.2684178543090818} +{"epoch": 0, "iter": 9716, "iter_tflops": 45.18103815925601, "iter_time": 0.45663168334960935, "loss": 0.1566934734582901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23506532538231, "step_time": 0.4190325202941895} +{"epoch": 0, "iter": 9717, "iter_tflops": 22.281678483785132, "iter_time": 0.7002540435791016, "loss": 0.22262561321258545, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 23.642493213937453, "step_time": 0.6599488182067871} +{"epoch": 0, "iter": 9718, "iter_tflops": 13.82998654089473, "iter_time": 1.1281887664794923, "loss": 0.19222046434879303, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.62939651461358, "step_time": 0.9382682914733886} +{"epoch": 0, "iter": 9719, "iter_tflops": 23.05363104552682, "iter_time": 0.6768059844970703, "loss": 0.18465979397296906, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.753689354345724, "step_time": 0.6303236351013184} +{"epoch": 0, "iter": 9720, "iter_tflops": 25.981742896556735, "iter_time": 0.600530746459961, "loss": 0.17678755521774292, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 27.82282605158035, "step_time": 0.5607926177978515} +{"epoch": 0, "iter": 9721, "iter_tflops": 17.550735612149605, "iter_time": 1.1755116119384765, "loss": 0.7194446921348572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.701812728392827, "step_time": 1.1031600952148437} +{"epoch": 0, "iter": 9722, "iter_tflops": 23.48952019799596, "iter_time": 0.8783105545043945, "loss": 0.49613717198371887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.932587558454898, "step_time": 0.7955663299560547} +{"epoch": 0, "iter": 9723, "iter_tflops": 37.23043731482466, "iter_time": 0.5541458816528321, "loss": 0.7742586731910706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.64777490146737, "step_time": 0.5075577583312988} +{"epoch": 0, "iter": 9724, "iter_tflops": 35.655040226672526, "iter_time": 0.5786304931640625, "loss": 0.865534782409668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8389180501871, "step_time": 0.5311964015960693} +{"epoch": 0, "iter": 9725, "iter_tflops": 37.310319081801616, "iter_time": 0.5529594497680663, "loss": 0.038080085068941116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4790264221515, "step_time": 0.49738615608215336} +{"epoch": 0, "iter": 9726, "iter_tflops": 38.74842104523342, "iter_time": 0.53243701171875, "loss": 0.016982652246952057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.668769608397376, "step_time": 0.47244503784179687} +{"epoch": 0, "iter": 9727, "iter_tflops": 43.293731290084054, "iter_time": 0.4765376625061035, "loss": 0.06223214045166969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.619505438322264, "step_time": 0.43324879837036134} +{"epoch": 0, "iter": 9728, "iter_tflops": 44.37188017961411, "iter_time": 0.464958740234375, "loss": 0.0323881097137928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.9314358645743, "step_time": 0.4216327018737793} +{"epoch": 0, "iter": 9729, "iter_tflops": 11.645676321275, "iter_time": 1.0843258819580077, "loss": 0.009029393084347248, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 12.434736267191681, "step_time": 1.0155187835693358} +{"epoch": 0, "iter": 9730, "iter_tflops": 11.102395683004055, "iter_time": 1.1373858947753905, "loss": 0.0011837936472147703, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 13.58041081748849, "step_time": 0.9298472938537599} +{"epoch": 0, "iter": 9731, "iter_tflops": 33.92898072657976, "iter_time": 0.37218059539794923, "loss": 0.010797464288771152, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 37.31004758542619, "step_time": 0.3384532871246338} +{"epoch": 0, "iter": 9732, "iter_tflops": 37.754102277249025, "iter_time": 0.3344724807739257, "loss": 0.006901689805090427, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 41.43804209073707, "step_time": 0.3047370872497559} +{"epoch": 0, "iter": 9733, "iter_tflops": 45.78077175607258, "iter_time": 0.45064975357055664, "loss": 0.049353063106536865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41245677171766, "step_time": 0.40924594497680666} +{"epoch": 0, "iter": 9734, "iter_tflops": 38.79623340179753, "iter_time": 0.5317808380126953, "loss": 0.017530199140310287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45475791249351, "step_time": 0.4747717971801758} +{"epoch": 0, "iter": 9735, "iter_tflops": 42.31846258788716, "iter_time": 0.48751992034912106, "loss": 0.06689605116844177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.723201450407565, "step_time": 0.44155992889404294} +{"epoch": 0, "iter": 9736, "iter_tflops": 47.19185028101099, "iter_time": 0.43717492294311516, "loss": 0.04812988266348839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94481407448961, "step_time": 0.3971733055114747} +{"epoch": 0, "iter": 9737, "iter_tflops": 34.31079816166827, "iter_time": 0.601300308227539, "loss": 0.7157647609710693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.8828625288528, "step_time": 0.5446022853851319} +{"epoch": 0, "iter": 9738, "iter_tflops": 39.74710227426651, "iter_time": 0.5190590591430664, "loss": 0.523064136505127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.45471514161592, "step_time": 0.47477226448059084} +{"epoch": 0, "iter": 9739, "iter_tflops": 41.47700087347517, "iter_time": 0.4974104461669922, "loss": 0.6112204194068909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27091038512328, "step_time": 0.4557251739501953} +{"epoch": 0, "iter": 9740, "iter_tflops": 39.11792984095423, "iter_time": 0.5274076004028321, "loss": 0.5195069909095764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.3369256336884, "step_time": 0.48730731391906745} +{"epoch": 0, "iter": 9741, "iter_tflops": 17.303145675487997, "iter_time": 1.1923319549560547, "loss": 0.02175111696124077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.585157521317882, "step_time": 1.1100844039916993} +{"epoch": 0, "iter": 9742, "iter_tflops": 18.95458058267921, "iter_time": 1.088448959350586, "loss": 0.04877100884914398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.046970718237233, "step_time": 0.7102666130065918} +{"epoch": 0, "iter": 9743, "iter_tflops": 46.53397773193478, "iter_time": 0.44335546875000004, "loss": 0.0424308218061924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19223148502368, "step_time": 0.40301219367980956} +{"epoch": 0, "iter": 9744, "iter_tflops": 42.10989048484047, "iter_time": 0.48993462753295897, "loss": 0.06145380437374115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70781359778541, "step_time": 0.441705400466919} +{"epoch": 0, "iter": 9745, "iter_tflops": 17.4399805050105, "iter_time": 1.1829768676757813, "loss": 0.6456390619277954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.5689607749438, "step_time": 1.1110526733398438} +{"epoch": 0, "iter": 9746, "iter_tflops": 25.375277900571927, "iter_time": 0.8130391159057616, "loss": 0.7510675191879272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.586706598396376, "step_time": 0.6531574745178222} +{"epoch": 0, "iter": 9747, "iter_tflops": 43.657976786113636, "iter_time": 0.4725618324279785, "loss": 0.5215510725975037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99357542307933, "step_time": 0.4390194473266602} +{"epoch": 0, "iter": 9748, "iter_tflops": 42.83005993206077, "iter_time": 0.4816965827941895, "loss": 0.7588285207748413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06220926638561, "step_time": 0.44789630889892573} +{"epoch": 0, "iter": 9749, "iter_tflops": 42.75654510392837, "iter_time": 0.4825248031616211, "loss": 0.11132587492465973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6886501115493, "step_time": 0.4418866996765136} +{"epoch": 0, "iter": 9750, "iter_tflops": 40.87953675381293, "iter_time": 0.5046802177429199, "loss": 0.1524537205696106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25483914693093, "step_time": 0.466188419342041} +{"epoch": 0, "iter": 9751, "iter_tflops": 49.55336581676628, "iter_time": 0.4163409118652344, "loss": 0.20747679471969604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59714388308559, "step_time": 0.3849289722442627} +{"epoch": 0, "iter": 9752, "iter_tflops": 47.585399381973296, "iter_time": 0.43355932235717776, "loss": 0.19491934776306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68055781686797, "step_time": 0.3992041568756103} +{"epoch": 0, "iter": 9753, "iter_tflops": 43.551091746604556, "iter_time": 0.47372161483764647, "loss": 0.08450303971767426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62377412519888, "step_time": 0.43320996475219725} +{"epoch": 0, "iter": 9754, "iter_tflops": 40.638450427818505, "iter_time": 0.5076742172241211, "loss": 0.11763764917850494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02393279216397, "step_time": 0.45822504234313965} +{"epoch": 0, "iter": 9755, "iter_tflops": 46.0521895417595, "iter_time": 0.4479937591552734, "loss": 0.03289828076958656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3299906237807, "step_time": 0.40991649818420406} +{"epoch": 0, "iter": 9756, "iter_tflops": 43.471721717982895, "iter_time": 0.47458652877807617, "loss": 0.06846416741609573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.469439103576725, "step_time": 0.4346184387207031} +{"epoch": 0, "iter": 9757, "iter_tflops": 30.063036938762714, "iter_time": 0.6862611236572266, "loss": 0.9271828532218933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55824050569222, "step_time": 0.6336673355102539} +{"epoch": 0, "iter": 9758, "iter_tflops": 16.84558289112491, "iter_time": 1.2247182922363282, "loss": 0.8255515098571777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.855960790256223, "step_time": 1.039037784576416} +{"epoch": 0, "iter": 9759, "iter_tflops": 39.22835575642332, "iter_time": 0.5259229736328125, "loss": 0.6618062853813171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.019564557253624, "step_time": 0.479574670791626} +{"epoch": 0, "iter": 9760, "iter_tflops": 42.12457481930094, "iter_time": 0.4897638397216797, "loss": 0.7189421653747559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.732264290992326, "step_time": 0.45112775039672853} +{"epoch": 0, "iter": 9761, "iter_tflops": 13.269762258987303, "iter_time": 1.1696509552001952, "loss": 0.5058574676513672, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 14.255520423168829, "step_time": 1.0887705001831054} +{"epoch": 0, "iter": 9762, "iter_tflops": 15.46676720671622, "iter_time": 1.0035057678222656, "loss": 0.27841708064079285, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 18.60481616605637, "step_time": 0.8342458190917967} +{"epoch": 0, "iter": 9763, "iter_tflops": 27.448485289597418, "iter_time": 0.5654588928222656, "loss": 0.19626930356025696, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.340261383689384, "step_time": 0.5289997215270996} +{"epoch": 0, "iter": 9764, "iter_tflops": 27.35495170301279, "iter_time": 0.5673923416137695, "loss": 0.2619970738887787, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.046407653759417, "step_time": 0.5343514518737793} +{"epoch": 0, "iter": 9765, "iter_tflops": 37.33856301061056, "iter_time": 0.5525411758422851, "loss": 0.073646679520607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03501005056803, "step_time": 0.5153262977600098} +{"epoch": 0, "iter": 9766, "iter_tflops": 14.743173872520309, "iter_time": 1.3993658142089844, "loss": 0.04683005064725876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.926819823025692, "step_time": 1.0900454330444336} +{"epoch": 0, "iter": 9767, "iter_tflops": 36.70733174876098, "iter_time": 0.5620428543090821, "loss": 0.12239197641611099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3788080031405, "step_time": 0.4985908126831054} +{"epoch": 0, "iter": 9768, "iter_tflops": 43.04066607763935, "iter_time": 0.4793395500183105, "loss": 0.09098237752914429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07321059792178, "step_time": 0.43827674484252926} +{"epoch": 0, "iter": 9769, "iter_tflops": 16.578931551555893, "iter_time": 1.2444163513183593, "loss": 0.009156550280749798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.524268933589852, "step_time": 1.1772869720458987} +{"epoch": 0, "iter": 9770, "iter_tflops": 16.562995921672623, "iter_time": 1.2456136322021485, "loss": 0.010953526012599468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.0568816701089, "step_time": 0.9353585796356202} +{"epoch": 0, "iter": 9771, "iter_tflops": 46.83959929899223, "iter_time": 0.4404626388549804, "loss": 0.0041954293847084045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.906277688894306, "step_time": 0.3974681758880616} +{"epoch": 0, "iter": 9772, "iter_tflops": 48.18650276474498, "iter_time": 0.42815087890625003, "loss": 0.002053220756351948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21758234394609, "step_time": 0.3876743850708008} +{"epoch": 0, "iter": 9773, "iter_tflops": 17.039747747398113, "iter_time": 1.2107628479003907, "loss": 0.7483262419700623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.149807333459705, "step_time": 1.1367114334106445} +{"epoch": 0, "iter": 9774, "iter_tflops": 28.252121645468694, "iter_time": 0.7302493515014647, "loss": 0.6045613884925842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.08062874467175, "step_time": 0.6053612937927246} +{"epoch": 0, "iter": 9775, "iter_tflops": 37.60969751090423, "iter_time": 0.5485578155517578, "loss": 0.7487360835075378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.043364749496156, "step_time": 0.5026657447814942} +{"epoch": 0, "iter": 9776, "iter_tflops": 38.40376462852724, "iter_time": 0.5372153930664062, "loss": 0.7278378009796143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76912926599741, "step_time": 0.49393161582946776} +{"epoch": 0, "iter": 9777, "iter_tflops": 35.68750858811061, "iter_time": 0.5781040573120118, "loss": 0.2088300734758377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.58190362372939, "step_time": 0.521225399017334} +{"epoch": 0, "iter": 9778, "iter_tflops": 37.287952227250706, "iter_time": 0.5532911376953126, "loss": 0.15862774848937988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.786210849872184, "step_time": 0.49372970390319837} +{"epoch": 0, "iter": 9779, "iter_tflops": 41.59426456998339, "iter_time": 0.49600813293457036, "loss": 0.17629534006118774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48826196992366, "step_time": 0.4535476322174072} +{"epoch": 0, "iter": 9780, "iter_tflops": 40.08496696885108, "iter_time": 0.5146840591430664, "loss": 0.2532050609588623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.98892031560501, "step_time": 0.4690065898895263} +{"epoch": 0, "iter": 9781, "iter_tflops": 15.10122300235659, "iter_time": 1.2318941345214842, "loss": 0.0012637291802093387, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 16.295845263115933, "step_time": 1.141585952758789} +{"epoch": 0, "iter": 9782, "iter_tflops": 16.555921689529413, "iter_time": 1.1236528167724606, "loss": 0.00793751236051321, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 20.173306949558498, "step_time": 0.9221645259857179} +{"epoch": 0, "iter": 9783, "iter_tflops": 39.56695879030841, "iter_time": 0.47016775131225585, "loss": 0.006849430967122316, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 44.02181265258386, "step_time": 0.4225884151458741} +{"epoch": 0, "iter": 9784, "iter_tflops": 40.72813317200662, "iter_time": 0.4567630920410156, "loss": 0.007636156398802996, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 45.178967905353375, "step_time": 0.4117647857666016} +{"epoch": 0, "iter": 9785, "iter_tflops": 18.627764190324257, "iter_time": 0.9522044296264648, "loss": 0.006123181898146868, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 19.817765647366446, "step_time": 0.8950272140502932} +{"epoch": 0, "iter": 9786, "iter_tflops": 19.95884731330465, "iter_time": 0.8887005996704102, "loss": 0.011928115971386433, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 22.495417106724474, "step_time": 0.7884912509918212} +{"epoch": 0, "iter": 9787, "iter_tflops": 44.971306993214434, "iter_time": 0.3944168128967285, "loss": 0.008855714462697506, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 49.56988696176884, "step_time": 0.35782691192626953} +{"epoch": 0, "iter": 9788, "iter_tflops": 50.6190597168058, "iter_time": 0.3504102935791016, "loss": 0.003962040413171053, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 55.48851312588471, "step_time": 0.3196596660614014} +{"epoch": 0, "iter": 9789, "iter_tflops": 21.34395324674292, "iter_time": 0.9666013259887696, "loss": 0.71468585729599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.34571766734938, "step_time": 0.9232683334350587} +{"epoch": 0, "iter": 9790, "iter_tflops": 12.651828536057376, "iter_time": 1.6306807708740234, "loss": 0.722374677658081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.014777115215587, "step_time": 1.2125397453308104} +{"epoch": 0, "iter": 9791, "iter_tflops": 35.850607889494704, "iter_time": 0.575474021911621, "loss": 0.7178434133529663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84262476764788, "step_time": 0.531145709991455} +{"epoch": 0, "iter": 9792, "iter_tflops": 35.2967414869503, "iter_time": 0.5845041961669921, "loss": 0.6246756911277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.22794940725445, "step_time": 0.5396861152648926} +{"epoch": 0, "iter": 9793, "iter_tflops": 18.55389524790169, "iter_time": 1.111954833984375, "loss": 0.27277815341949463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.80410632181379, "step_time": 1.0417583694458008} +{"epoch": 0, "iter": 9794, "iter_tflops": 18.054656360210036, "iter_time": 1.1427020874023437, "loss": 0.27448269724845886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.91530958175514, "step_time": 1.035941390991211} +{"epoch": 0, "iter": 9795, "iter_tflops": 47.01284072495902, "iter_time": 0.438839542388916, "loss": 0.33742985129356384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.07986757493322, "step_time": 0.4038987274169922} +{"epoch": 0, "iter": 9796, "iter_tflops": 49.26671667555162, "iter_time": 0.41876331329345706, "loss": 0.3105524480342865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.40020991748488, "step_time": 0.3863485469818116} +{"epoch": 0, "iter": 9797, "iter_tflops": 27.112640206187457, "iter_time": 0.5077292404174805, "loss": 0.02230706624686718, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 29.295326959454098, "step_time": 0.46990020751953127} +{"epoch": 0, "iter": 9798, "iter_tflops": 27.31224952362908, "iter_time": 0.5040185432434082, "loss": 0.0017085676081478596, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 30.413353774319535, "step_time": 0.45262618255615233} +{"epoch": 0, "iter": 9799, "iter_tflops": 34.56981793767435, "iter_time": 0.39820516967773434, "loss": 0.0042266142554581165, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 38.21778179680538, "step_time": 0.3601956882476807} +{"epoch": 0, "iter": 9800, "iter_tflops": 33.269176537676074, "iter_time": 0.41377279663085936, "loss": 0.0036326814442873, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 36.75932146663114, "step_time": 0.3744867877960205} +{"epoch": 0, "iter": 9801, "iter_tflops": 22.586351652889586, "iter_time": 0.7798489456176758, "loss": 0.007161078974604607, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 24.843087452536558, "step_time": 0.7090077896118163} +{"epoch": 0, "iter": 9802, "iter_tflops": 47.79839012457577, "iter_time": 0.3685049324035644, "loss": 0.002206917852163315, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 52.82538522511864, "step_time": 0.3334370861053467} +{"epoch": 0, "iter": 9803, "iter_tflops": 49.496889960566165, "iter_time": 0.35585958099365234, "loss": 0.0035933435428887606, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 54.114000456554, "step_time": 0.3254969577789307} +{"epoch": 0, "iter": 9804, "iter_tflops": 44.294565009124014, "iter_time": 0.3976547126770019, "loss": 0.006342594511806965, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 48.34754394401708, "step_time": 0.3643192825317383} +{"epoch": 0, "iter": 9805, "iter_tflops": 45.027864452494676, "iter_time": 0.4581850318908692, "loss": 0.06084312126040459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89970724802528, "step_time": 0.42190627861022945} +{"epoch": 0, "iter": 9806, "iter_tflops": 48.92343352227626, "iter_time": 0.4217016677856445, "loss": 0.05407283082604408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.62245951919797, "step_time": 0.3847472438812256} +{"epoch": 0, "iter": 9807, "iter_tflops": 48.94543596587002, "iter_time": 0.4215121002197265, "loss": 0.08997708559036255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64104177567922, "step_time": 0.3846139602661133} +{"epoch": 0, "iter": 9808, "iter_tflops": 53.16337508015511, "iter_time": 0.3880696716308594, "loss": 0.13072827458381653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.030373674932676, "step_time": 0.3555223274230957} +{"epoch": 0, "iter": 9809, "iter_tflops": 50.20163645431863, "iter_time": 0.41096456146240234, "loss": 0.05174946039915085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.33282891596782, "step_time": 0.37285448646545405} +{"epoch": 0, "iter": 9810, "iter_tflops": 50.50705035480793, "iter_time": 0.40847947692871095, "loss": 0.027456896379590034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.27396794695751, "step_time": 0.37325153732299804} +{"epoch": 0, "iter": 9811, "iter_tflops": 56.286163555236676, "iter_time": 0.36653934478759764, "loss": 0.03532567620277405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.79842961583572, "step_time": 0.33384494781494145} +{"epoch": 0, "iter": 9812, "iter_tflops": 58.104579517689494, "iter_time": 0.355068286895752, "loss": 0.052960895001888275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.83608310431092, "step_time": 0.3231885871887207} +{"epoch": 0, "iter": 9813, "iter_tflops": 42.78200339452421, "iter_time": 0.4822376670837402, "loss": 0.039684396237134933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.700140307408006, "step_time": 0.44177797698974614} +{"epoch": 0, "iter": 9814, "iter_tflops": 47.47711494292767, "iter_time": 0.4345481719970703, "loss": 0.023955103009939194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.9331918226327, "step_time": 0.3972621898651123} +{"epoch": 0, "iter": 9815, "iter_tflops": 54.63719547976872, "iter_time": 0.37760161972045897, "loss": 0.033329837024211884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.63252364704223, "step_time": 0.3459704914093018} +{"epoch": 0, "iter": 9816, "iter_tflops": 59.106645826857125, "iter_time": 0.3490486259460449, "loss": 0.06469181180000305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.50129988638504, "step_time": 0.31985546875} +{"epoch": 0, "iter": 9817, "iter_tflops": 44.28911225999884, "iter_time": 0.4658276596069336, "loss": 0.07652320712804794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.06181681831147, "step_time": 0.42926162338256835} +{"epoch": 0, "iter": 9818, "iter_tflops": 18.117054471674486, "iter_time": 1.1387664337158203, "loss": 0.10023608803749084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.511215368276133, "step_time": 1.0058445167541503} +{"epoch": 0, "iter": 9819, "iter_tflops": 47.52459606879709, "iter_time": 0.4341140213012695, "loss": 0.13380062580108643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03952715841507, "step_time": 0.39645044136047364} +{"epoch": 0, "iter": 9820, "iter_tflops": 52.50097906563984, "iter_time": 0.39296588134765625, "loss": 0.09899072349071503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.29776670663793, "step_time": 0.360068021774292} +{"epoch": 0, "iter": 9821, "iter_tflops": 31.705644814949803, "iter_time": 0.6507072677612304, "loss": 0.01681622676551342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.67100046898305, "step_time": 0.6127258834838868} +{"epoch": 0, "iter": 9822, "iter_tflops": 17.430993807703715, "iter_time": 1.1835867614746094, "loss": 0.025904862210154533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.486931131018196, "step_time": 0.9601693878173829} +{"epoch": 0, "iter": 9823, "iter_tflops": 43.017482248639936, "iter_time": 0.47959788513183593, "loss": 0.02337946929037571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.09964181964998, "step_time": 0.43803079414367674} +{"epoch": 0, "iter": 9824, "iter_tflops": 46.17375577737363, "iter_time": 0.446814281463623, "loss": 0.04428227245807648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66370739584417, "step_time": 0.4072164192199707} +{"epoch": 0, "iter": 9825, "iter_tflops": 18.530949828558313, "iter_time": 1.1133316802978515, "loss": 0.15762095153331757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.814766027443447, "step_time": 1.0411979370117188} +{"epoch": 0, "iter": 9826, "iter_tflops": 20.200438054570206, "iter_time": 1.0213191146850584, "loss": 0.136315256357193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.579107773815768, "step_time": 0.8393752002716064} +{"epoch": 0, "iter": 9827, "iter_tflops": 40.592294616980446, "iter_time": 0.5082514724731445, "loss": 0.07851512730121613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42219357729155, "step_time": 0.46443211936950685} +{"epoch": 0, "iter": 9828, "iter_tflops": 39.83455794177719, "iter_time": 0.5179194793701172, "loss": 0.08366727083921432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47471735323375, "step_time": 0.47455382728576656} +{"epoch": 0, "iter": 9829, "iter_tflops": 21.4689463647684, "iter_time": 0.9609737319946289, "loss": 0.8455089926719666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.75203436231949, "step_time": 0.9067801666259765} +{"epoch": 0, "iter": 9830, "iter_tflops": 20.28746117061576, "iter_time": 1.0169381637573243, "loss": 0.6023059487342834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.786318895217377, "step_time": 0.8673512535095215} +{"epoch": 0, "iter": 9831, "iter_tflops": 40.70954574338172, "iter_time": 0.506787612915039, "loss": 0.8803784251213074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17149816856417, "step_time": 0.4670680046081543} +{"epoch": 0, "iter": 9832, "iter_tflops": 39.84523992273476, "iter_time": 0.517780632019043, "loss": 0.6635706424713135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67607859745779, "step_time": 0.48343461227416995} +{"epoch": 0, "iter": 9833, "iter_tflops": 29.81048192729943, "iter_time": 0.6920751419067382, "loss": 0.03839462250471115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.611607240828572, "step_time": 0.652642978668213} +{"epoch": 0, "iter": 9834, "iter_tflops": 11.782748594114292, "iter_time": 1.7509576263427733, "loss": 0.01752595044672489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.346348546576534, "step_time": 1.4380727920532226} +{"epoch": 0, "iter": 9835, "iter_tflops": 41.41136708491345, "iter_time": 0.49819880294799807, "loss": 0.04008464887738228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.833567304482315, "step_time": 0.45013065147399905} +{"epoch": 0, "iter": 9836, "iter_tflops": 45.869411233935026, "iter_time": 0.44977890396118164, "loss": 0.03662203252315521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.586482802855635, "step_time": 0.4078380699157715} +{"epoch": 0, "iter": 9837, "iter_tflops": 21.49443557347978, "iter_time": 0.9598341598510742, "loss": 0.33578407764434814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.748816891701253, "step_time": 0.9069084167480469} +{"epoch": 0, "iter": 9838, "iter_tflops": 24.200715258865475, "iter_time": 0.8524993286132813, "loss": 0.290536105632782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.866798392295834, "step_time": 0.7146997470855713} +{"epoch": 0, "iter": 9839, "iter_tflops": 45.5146499477289, "iter_time": 0.4532846794128418, "loss": 0.21212750673294067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.16830217322634, "step_time": 0.4196015033721925} +{"epoch": 0, "iter": 9840, "iter_tflops": 47.177932717165255, "iter_time": 0.43730389022827143, "loss": 0.27967962622642517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69242845475531, "step_time": 0.40698570060729977} +{"epoch": 0, "iter": 9841, "iter_tflops": 36.65882111663856, "iter_time": 0.5627866058349609, "loss": 0.011106706224381924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2883596963353, "step_time": 0.5251197471618653} +{"epoch": 0, "iter": 9842, "iter_tflops": 17.752454090486008, "iter_time": 1.1621544494628906, "loss": 0.004823352675884962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.782313068822628, "step_time": 0.9471488838195803} +{"epoch": 0, "iter": 9843, "iter_tflops": 50.63823395368669, "iter_time": 0.40742126846313476, "loss": 0.013590959832072258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.20182886701173, "step_time": 0.36708936214447024} +{"epoch": 0, "iter": 9844, "iter_tflops": 48.98161024280847, "iter_time": 0.4212008018493652, "loss": 0.00579859921708703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.79545550191187, "step_time": 0.38350996971130374} +{"epoch": 0, "iter": 9845, "iter_tflops": 32.54357560798032, "iter_time": 0.633952880859375, "loss": 0.5475705862045288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.89835868119952, "step_time": 0.5747085456848144} +{"epoch": 0, "iter": 9846, "iter_tflops": 38.703629983660065, "iter_time": 0.5330531921386719, "loss": 0.533553421497345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.3670618896408, "step_time": 0.4869606857299805} +{"epoch": 0, "iter": 9847, "iter_tflops": 41.00395986547754, "iter_time": 0.5031488075256347, "loss": 0.6004189848899841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.766012971467354, "step_time": 0.4608651103973389} +{"epoch": 0, "iter": 9848, "iter_tflops": 40.29666392501903, "iter_time": 0.5119801864624024, "loss": 0.5558371543884277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87818476544382, "step_time": 0.47019022369384766} +{"epoch": 0, "iter": 9849, "iter_tflops": 33.65478849992753, "iter_time": 0.6130210418701171, "loss": 0.5405822396278381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.12775199874885, "step_time": 0.5556784992218017} +{"epoch": 0, "iter": 9850, "iter_tflops": 36.21240484857247, "iter_time": 0.569724479675293, "loss": 0.45376062393188477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.999553116449746, "step_time": 0.5157831001281739} +{"epoch": 0, "iter": 9851, "iter_tflops": 41.493963056498814, "iter_time": 0.4972071113586426, "loss": 0.41119927167892456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.58433065117732, "step_time": 0.4525917835235596} +{"epoch": 0, "iter": 9852, "iter_tflops": 43.93916145815339, "iter_time": 0.46953771591186527, "loss": 0.4858345091342926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.03243243295043, "step_time": 0.4295242290496827} +{"epoch": 0, "iter": 9853, "iter_tflops": 19.15049296992662, "iter_time": 1.07731396484375, "loss": 0.7976601719856262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.378297950411653, "step_time": 1.0124051361083986} +{"epoch": 0, "iter": 9854, "iter_tflops": 18.16934445560922, "iter_time": 1.1354891510009764, "loss": 0.7297798991203308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.46376806202567, "step_time": 0.8792745246887207} +{"epoch": 0, "iter": 9855, "iter_tflops": 37.1334990221664, "iter_time": 0.555592498779297, "loss": 0.6772445440292358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1515928947818, "step_time": 0.5138300132751464} +{"epoch": 0, "iter": 9856, "iter_tflops": 37.76432306747174, "iter_time": 0.5463117523193359, "loss": 0.8001857995986938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.843886811520065, "step_time": 0.505120719909668} +{"epoch": 0, "iter": 9857, "iter_tflops": 25.030353201539985, "iter_time": 0.8242430038452149, "loss": 0.6560313701629639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.583184328402243, "step_time": 0.7760956420898437} +{"epoch": 0, "iter": 9858, "iter_tflops": 15.33429824429297, "iter_time": 1.3454214324951173, "loss": 0.5829327702522278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.938663178686078, "step_time": 1.2179882965087891} +{"epoch": 0, "iter": 9859, "iter_tflops": 16.91037156385715, "iter_time": 1.220026031494141, "loss": 0.7920455932617188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.5945778123998, "step_time": 0.9553830451965332} +{"epoch": 0, "iter": 9860, "iter_tflops": 38.83650432919169, "iter_time": 0.5312294158935547, "loss": 0.6089127063751221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.27187949434584, "step_time": 0.48805716133117677} +{"epoch": 0, "iter": 9861, "iter_tflops": 10.13636786570994, "iter_time": 1.3139124603271486, "loss": 0.10176637768745422, "lr": 3e-05, "seqlen": 5344.0, "step_tflops": 10.764588504496821, "step_time": 1.2372326202392578} +{"epoch": 0, "iter": 9862, "iter_tflops": 12.003924357813569, "iter_time": 1.1094954986572267, "loss": 0.26149287819862366, "lr": 3e-05, "seqlen": 5344.0, "step_tflops": 15.646707004897817, "step_time": 0.851188690185547} +{"epoch": 0, "iter": 9863, "iter_tflops": 20.61547618057724, "iter_time": 0.6460340728759766, "loss": 0.22224251925945282, "lr": 3e-05, "seqlen": 5344.0, "step_tflops": 22.18774029181802, "step_time": 0.6002549095153809} +{"epoch": 0, "iter": 9864, "iter_tflops": 20.601716477492598, "iter_time": 0.6464655532836915, "loss": 0.31489187479019165, "lr": 3e-05, "seqlen": 5344.0, "step_tflops": 22.158307171054737, "step_time": 0.6010522346496582} +{"epoch": 0, "iter": 9865, "iter_tflops": 27.941601806516893, "iter_time": 0.7383647384643555, "loss": 0.6848422288894653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.385158994656514, "step_time": 0.6789858665466308} +{"epoch": 0, "iter": 9866, "iter_tflops": 13.273896968116468, "iter_time": 1.5542604827880857, "loss": 0.9292072653770447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.707506874494435, "step_time": 1.4027593994140626} +{"epoch": 0, "iter": 9867, "iter_tflops": 17.635777201841673, "iter_time": 1.1698431701660157, "loss": 0.6285790205001831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.868737951251745, "step_time": 0.9886124191284178} +{"epoch": 0, "iter": 9868, "iter_tflops": 46.11588536198173, "iter_time": 0.4473749847412109, "loss": 0.4770132005214691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06329911557552, "step_time": 0.41210015869140626} +{"epoch": 0, "iter": 9869, "iter_tflops": 20.811880235454968, "iter_time": 0.8009147872924806, "loss": 0.22738008201122284, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 21.944069168720613, "step_time": 0.7595921478271483} +{"epoch": 0, "iter": 9870, "iter_tflops": 15.79218081598193, "iter_time": 1.0554934005737304, "loss": 0.18027883768081665, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 18.736925716646113, "step_time": 0.889609260559082} +{"epoch": 0, "iter": 9871, "iter_tflops": 30.1127879888792, "iter_time": 0.5535370101928712, "loss": 0.2543889582157135, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 32.05322064580246, "step_time": 0.5200270767211915} +{"epoch": 0, "iter": 9872, "iter_tflops": 30.100762321361167, "iter_time": 0.5537581558227539, "loss": 0.2090446799993515, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 32.039570886712355, "step_time": 0.5202486228942871} +{"epoch": 0, "iter": 9873, "iter_tflops": 25.64632364552629, "iter_time": 0.5208920288085938, "loss": 0.05352288484573364, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 27.908487486777723, "step_time": 0.478670352935791} +{"epoch": 0, "iter": 9874, "iter_tflops": 4.967071472396531, "iter_time": 2.689505401611328, "loss": 0.0331302173435688, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 6.0990172812951675, "step_time": 2.1903472213745117} +{"epoch": 0, "iter": 9875, "iter_tflops": 12.013594893527598, "iter_time": 1.1119873504638673, "loss": 0.01956757716834545, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 13.479262097334159, "step_time": 0.9910754356384278} +{"epoch": 0, "iter": 9876, "iter_tflops": 36.356258342719194, "iter_time": 0.36744610595703125, "loss": 0.06738464534282684, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 39.840885575171676, "step_time": 0.3353079471588135} +{"epoch": 0, "iter": 9877, "iter_tflops": 18.445057053809013, "iter_time": 0.8547863388061524, "loss": 0.17511485517024994, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 19.45672150238162, "step_time": 0.8103411865234376} +{"epoch": 0, "iter": 9878, "iter_tflops": 23.63340957227326, "iter_time": 0.6671311111450196, "loss": 0.2145671397447586, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 25.417909117562413, "step_time": 0.620294246673584} +{"epoch": 0, "iter": 9879, "iter_tflops": 24.891513413084564, "iter_time": 0.633411979675293, "loss": 0.11552460491657257, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 26.817734519831408, "step_time": 0.5879162826538086} +{"epoch": 0, "iter": 9880, "iter_tflops": 25.9039287350469, "iter_time": 0.6086560440063477, "loss": 0.23176124691963196, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 27.770094279516755, "step_time": 0.5677540245056152} +{"epoch": 0, "iter": 9881, "iter_tflops": 19.53527222748804, "iter_time": 1.056094497680664, "loss": 0.15347813069820404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.88101458727089, "step_time": 0.9880311813354493} +{"epoch": 0, "iter": 9882, "iter_tflops": 22.050657765477645, "iter_time": 0.9356225891113281, "loss": 0.17705698311328888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.053326578117517, "step_time": 0.823487190246582} +{"epoch": 0, "iter": 9883, "iter_tflops": 52.7900060280657, "iter_time": 0.39081438064575197, "loss": 0.2215021401643753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.32150915309669, "step_time": 0.35991888236999514} +{"epoch": 0, "iter": 9884, "iter_tflops": 52.58391639627701, "iter_time": 0.3923460807800293, "loss": 0.12728889286518097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.82489900274994, "step_time": 0.36306432342529293} +{"epoch": 0, "iter": 9885, "iter_tflops": 25.504444714537556, "iter_time": 0.8089214935302734, "loss": 0.5111830830574036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.12171419315966, "step_time": 0.7606854553222655} +{"epoch": 0, "iter": 9886, "iter_tflops": 9.849220232153169, "iter_time": 2.094693084716797, "loss": 0.7024943828582764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.742093641729355, "step_time": 1.7570200119018553} +{"epoch": 0, "iter": 9887, "iter_tflops": 12.857999445225074, "iter_time": 1.6045337066650394, "loss": 0.448076993227005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.244836059718779, "step_time": 1.4483208808898926} +{"epoch": 0, "iter": 9888, "iter_tflops": 36.40403578276557, "iter_time": 0.5667254486083985, "loss": 0.43695685267448425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50441061984696, "step_time": 0.4533866767883301} +{"epoch": 0, "iter": 9889, "iter_tflops": 18.394379588402, "iter_time": 0.8304475402832031, "loss": 0.3993149399757385, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.36617875918908, "step_time": 0.7887754974365233} +{"epoch": 0, "iter": 9890, "iter_tflops": 9.483495944322694, "iter_time": 1.6107527618408204, "loss": 0.34012433886528015, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 11.72899868840899, "step_time": 1.3023760757446288} +{"epoch": 0, "iter": 9891, "iter_tflops": 27.22053090550269, "iter_time": 0.5611781539916992, "loss": 0.2729797959327698, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.973300800862162, "step_time": 0.5272290992736817} +{"epoch": 0, "iter": 9892, "iter_tflops": 26.874468890471174, "iter_time": 0.5684044342041016, "loss": 0.33932265639305115, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.568296207580733, "step_time": 0.5347034759521484} +{"epoch": 0, "iter": 9893, "iter_tflops": 24.691802827743086, "iter_time": 0.8355442352294922, "loss": 0.5294173955917358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.309322792984172, "step_time": 0.7841742515563965} +{"epoch": 0, "iter": 9894, "iter_tflops": 15.313153325103652, "iter_time": 1.3472792358398435, "loss": 0.6377518177032471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.995558189983175, "step_time": 1.1464547691345215} +{"epoch": 0, "iter": 9895, "iter_tflops": 46.379339994545525, "iter_time": 0.44483370208740236, "loss": 0.5640174150466919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.41123386379475, "step_time": 0.40925587272644043} +{"epoch": 0, "iter": 9896, "iter_tflops": 48.14679353269819, "iter_time": 0.42850399780273435, "loss": 0.6346950531005859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.268529090761696, "step_time": 0.3947134895324707} +{"epoch": 0, "iter": 9897, "iter_tflops": 26.354637704609075, "iter_time": 0.7828259201049804, "loss": 0.7104634046554565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.935241403879157, "step_time": 0.7385328521728516} +{"epoch": 0, "iter": 9898, "iter_tflops": 15.395444708339348, "iter_time": 1.3400777893066405, "loss": 0.756921112537384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.078716287570703, "step_time": 1.2080002479553222} +{"epoch": 0, "iter": 9899, "iter_tflops": 47.52792677117274, "iter_time": 0.4340835990905762, "loss": 0.5602461695671082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.63874014313461, "step_time": 0.39952743721008305} +{"epoch": 0, "iter": 9900, "iter_tflops": 42.234387023277684, "iter_time": 0.488490421295166, "loss": 0.5612013339996338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.561670632269504, "step_time": 0.45281687927246095} +{"epoch": 0, "iter": 9901, "iter_tflops": 3.1789760626176515, "iter_time": 0.46830120849609375, "loss": 0.6537485122680664, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.4808944761037135, "step_time": 0.42768269538879394} +{"epoch": 0, "iter": 9902, "iter_tflops": 3.1815925847070896, "iter_time": 0.4679160804748536, "loss": 0.7931818962097168, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.4864189136676473, "step_time": 0.4270050067901612} +{"epoch": 0, "iter": 9903, "iter_tflops": 3.490034495079322, "iter_time": 0.4265626411437988, "loss": 0.681735634803772, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.7952750277728975, "step_time": 0.3922557182312012} +{"epoch": 0, "iter": 9904, "iter_tflops": 3.515243905398162, "iter_time": 0.42350356674194334, "loss": 1.0034195184707642, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 3.798902712153877, "step_time": 0.3918811416625977} +{"epoch": 0, "iter": 9905, "iter_tflops": 15.995266190820542, "iter_time": 0.9013870086669921, "loss": 0.06458595395088196, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 16.700695762944637, "step_time": 0.8633128433227538} +{"epoch": 0, "iter": 9906, "iter_tflops": 8.755631861073356, "iter_time": 1.6467029876708983, "loss": 0.050092924386262894, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 11.670390394883217, "step_time": 1.2354278354644774} +{"epoch": 0, "iter": 9907, "iter_tflops": 30.28307379259474, "iter_time": 0.47610507583618156, "loss": 0.07594230771064758, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 33.51533704335742, "step_time": 0.43018887519836435} +{"epoch": 0, "iter": 9908, "iter_tflops": 30.452208745038753, "iter_time": 0.4734607353210449, "loss": 0.10863766819238663, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 33.37916866614511, "step_time": 0.4319438056945801} +{"epoch": 0, "iter": 9909, "iter_tflops": 26.25972459362058, "iter_time": 0.7856553649902344, "loss": 0.06141218915581703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.301565802779926, "step_time": 0.7289735717773437} +{"epoch": 0, "iter": 9910, "iter_tflops": 9.937359972152315, "iter_time": 2.0761141357421873, "loss": 0.023889681324362755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.235522246268046, "step_time": 1.6861637039184572} +{"epoch": 0, "iter": 9911, "iter_tflops": 17.014723746818927, "iter_time": 1.2125435485839844, "loss": 0.026961535215377808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.849671408572725, "step_time": 1.0393670043945313} +{"epoch": 0, "iter": 9912, "iter_tflops": 41.3850067653643, "iter_time": 0.4985161323547363, "loss": 0.03757065162062645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.713968526868406, "step_time": 0.4513083019256592} +{"epoch": 0, "iter": 9913, "iter_tflops": 14.408639645205033, "iter_time": 1.0006444396972658, "loss": 0.16967935860157013, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 15.524039233419591, "step_time": 0.9287483062744142} +{"epoch": 0, "iter": 9914, "iter_tflops": 11.040261719487415, "iter_time": 1.3059405212402344, "loss": 0.13789767026901245, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 13.06448101352644, "step_time": 1.1035972366333007} +{"epoch": 0, "iter": 9915, "iter_tflops": 22.641971385385347, "iter_time": 0.6367787017822265, "loss": 0.2532753646373749, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.26087960872026, "step_time": 0.5942869911193849} +{"epoch": 0, "iter": 9916, "iter_tflops": 22.860935664433402, "iter_time": 0.6306795730590821, "loss": 0.2592136263847351, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.57486123711131, "step_time": 0.5866940612792969} +{"epoch": 0, "iter": 9917, "iter_tflops": 35.5968056974925, "iter_time": 0.5795771026611328, "loss": 0.11162248998880386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.20336647216261, "step_time": 0.5262582111358642} +{"epoch": 0, "iter": 9918, "iter_tflops": 44.67944718346439, "iter_time": 0.4617580299377442, "loss": 0.07156099379062653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.638519747648985, "step_time": 0.4241719036102295} +{"epoch": 0, "iter": 9919, "iter_tflops": 50.263718263439806, "iter_time": 0.41045697021484373, "loss": 0.13499397039413452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.459471565012606, "step_time": 0.378833890914917} +{"epoch": 0, "iter": 9920, "iter_tflops": 57.680659393768444, "iter_time": 0.3576778373718262, "loss": 0.06946219503879547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.79174011433175, "step_time": 0.32856381225585934} +{"epoch": 0, "iter": 9921, "iter_tflops": 16.98765015709301, "iter_time": 0.7003899688720703, "loss": 0.0051902406848967075, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 17.96868016744403, "step_time": 0.6621510124206542} +{"epoch": 0, "iter": 9922, "iter_tflops": 11.611668230410187, "iter_time": 1.0246572265625, "loss": 0.0020221329759806395, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 13.073532700407153, "step_time": 0.9100814628601076} +{"epoch": 0, "iter": 9923, "iter_tflops": 31.431591762084757, "iter_time": 0.3785357055664062, "loss": 0.003535144031047821, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 34.605548178181664, "step_time": 0.3438171157836914} +{"epoch": 0, "iter": 9924, "iter_tflops": 32.79649373134713, "iter_time": 0.3627820663452148, "loss": 0.004752262961119413, "lr": 3e-05, "seqlen": 4784.0, "step_tflops": 35.807924543819915, "step_time": 0.33227225303649904} +{"epoch": 0, "iter": 9925, "iter_tflops": 29.760291277447042, "iter_time": 0.4202237319946289, "loss": 0.008183497935533524, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 32.850648273791954, "step_time": 0.38069205093383784} +{"epoch": 0, "iter": 9926, "iter_tflops": 7.843233513657754, "iter_time": 1.5944929656982425, "loss": 0.003978838678449392, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 8.425052871864027, "step_time": 1.4843800811767576} +{"epoch": 0, "iter": 9927, "iter_tflops": 7.464018237569145, "iter_time": 1.6755024261474611, "loss": 0.0028497965540736914, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 8.308353276897394, "step_time": 1.505229766845703} +{"epoch": 0, "iter": 9928, "iter_tflops": 20.372963596642396, "iter_time": 0.6138518142700196, "loss": 0.0074389223009347916, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 28.94315551473951, "step_time": 0.4320876712799072} +{"epoch": 0, "iter": 9929, "iter_tflops": 15.689593733872329, "iter_time": 0.9501681060791016, "loss": 0.23074406385421753, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 16.39338452589994, "step_time": 0.9093760681152343} +{"epoch": 0, "iter": 9930, "iter_tflops": 9.809514370233353, "iter_time": 1.5197237091064455, "loss": 0.29692187905311584, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 12.343965525672616, "step_time": 1.2076954956054688} +{"epoch": 0, "iter": 9931, "iter_tflops": 22.217574161983787, "iter_time": 0.6709891662597656, "loss": 0.23808375000953674, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.976215420094785, "step_time": 0.6217725067138672} +{"epoch": 0, "iter": 9932, "iter_tflops": 21.19453371948831, "iter_time": 0.7033771896362305, "loss": 0.30005282163619995, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 22.906126158945142, "step_time": 0.6508194122314452} +{"epoch": 0, "iter": 9933, "iter_tflops": 22.396469892053823, "iter_time": 0.9211761322021484, "loss": 0.3766425848007202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.27783384744895, "step_time": 0.8497913627624513} +{"epoch": 0, "iter": 9934, "iter_tflops": 14.693064935632478, "iter_time": 1.40413818359375, "loss": 0.538731575012207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.44140152545186, "step_time": 1.118737829208374} +{"epoch": 0, "iter": 9935, "iter_tflops": 46.79490559641315, "iter_time": 0.4408833236694336, "loss": 0.6220038533210754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.91755167946547, "step_time": 0.4051862831115723} +{"epoch": 0, "iter": 9936, "iter_tflops": 47.03350675564491, "iter_time": 0.4386467208862305, "loss": 0.5091434717178345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.866808977430985, "step_time": 0.4055904808044434} +{"epoch": 0, "iter": 9937, "iter_tflops": 39.87093498552088, "iter_time": 0.5174469451904297, "loss": 0.43408456444740295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26460768489848, "step_time": 0.47685844421386714} +{"epoch": 0, "iter": 9938, "iter_tflops": 46.95006740232206, "iter_time": 0.43942628097534175, "loss": 0.4333553910255432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54742525043977, "step_time": 0.4002351894378662} +{"epoch": 0, "iter": 9939, "iter_tflops": 51.064823462403254, "iter_time": 0.4040177192687988, "loss": 0.40239790081977844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.304982788517314, "step_time": 0.37304221916198727} +{"epoch": 0, "iter": 9940, "iter_tflops": 48.58977424171824, "iter_time": 0.42459743499755864, "loss": 0.42906510829925537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.434311055045626, "step_time": 0.39346552085876463} +{"epoch": 0, "iter": 9941, "iter_tflops": 32.99512172522923, "iter_time": 0.6252770843505859, "loss": 0.7405428290367126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.36107853331554, "step_time": 0.583440731048584} +{"epoch": 0, "iter": 9942, "iter_tflops": 18.347199482411337, "iter_time": 1.1244818878173828, "loss": 0.8827375769615173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.339603951209284, "step_time": 0.9667983322143555} +{"epoch": 0, "iter": 9943, "iter_tflops": 41.8849135416023, "iter_time": 0.4925662193298339, "loss": 0.6827720403671265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20281969911367, "step_time": 0.45641164970397946} +{"epoch": 0, "iter": 9944, "iter_tflops": 45.36551578213093, "iter_time": 0.4547748031616211, "loss": 0.7342195510864258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.911920390999555, "step_time": 0.4218009300231934} +{"epoch": 0, "iter": 9945, "iter_tflops": 32.73191075322874, "iter_time": 0.6303051986694336, "loss": 0.5060656070709229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.06291710865429, "step_time": 0.588402084350586} +{"epoch": 0, "iter": 9946, "iter_tflops": 34.459207707501456, "iter_time": 0.5987106170654297, "loss": 0.4644407033920288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.179289634243695, "step_time": 0.5010065422058105} +{"epoch": 0, "iter": 9947, "iter_tflops": 39.89771511583477, "iter_time": 0.517099624633789, "loss": 0.49594438076019287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54754522969643, "step_time": 0.47376019477844233} +{"epoch": 0, "iter": 9948, "iter_tflops": 38.97224529827015, "iter_time": 0.5293791351318359, "loss": 0.5066288113594055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.734087500069805, "step_time": 0.48277837944030766} +{"epoch": 0, "iter": 9949, "iter_tflops": 20.50437210407693, "iter_time": 1.0061802139282228, "loss": 0.40461668372154236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.103068821701626, "step_time": 0.9334040298461915} +{"epoch": 0, "iter": 9950, "iter_tflops": 14.217288601464592, "iter_time": 1.4511271514892579, "loss": 0.45667847990989685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.414749610618813, "step_time": 1.1846907920837402} +{"epoch": 0, "iter": 9951, "iter_tflops": 37.454856506753906, "iter_time": 0.5508255920410157, "loss": 0.625446081161499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9852711203818, "step_time": 0.5033782367706299} +{"epoch": 0, "iter": 9952, "iter_tflops": 37.82776151580144, "iter_time": 0.5453955688476563, "loss": 0.5217592120170593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.206255733966316, "step_time": 0.500678674697876} +{"epoch": 0, "iter": 9953, "iter_tflops": 18.605951161677336, "iter_time": 1.1088437957763673, "loss": 0.1445702165365219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.66203175208223, "step_time": 1.0492859420776368} +{"epoch": 0, "iter": 9954, "iter_tflops": 9.423863562903238, "iter_time": 2.1892394104003907, "loss": 0.1097167432308197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.568292519711028, "step_time": 1.9521690444946287} +{"epoch": 0, "iter": 9955, "iter_tflops": 14.295233878460424, "iter_time": 1.443214828491211, "loss": 0.13438503444194794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.208400478895893, "step_time": 1.1330535888671875} +{"epoch": 0, "iter": 9956, "iter_tflops": 40.37344903910143, "iter_time": 0.5110064659118652, "loss": 0.16539311408996582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.214971918401424, "step_time": 0.46660876655578615} +{"epoch": 0, "iter": 9957, "iter_tflops": 15.684645437739816, "iter_time": 1.0130571899414063, "loss": 0.1124085932970047, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 16.53335200691223, "step_time": 0.961053924560547} +{"epoch": 0, "iter": 9958, "iter_tflops": 13.83668299510105, "iter_time": 1.1483563537597656, "loss": 0.2600008249282837, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 16.219974182027144, "step_time": 0.979621955871582} +{"epoch": 0, "iter": 9959, "iter_tflops": 28.86628435636476, "iter_time": 0.5504498825073243, "loss": 0.1570097953081131, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.76321445319734, "step_time": 0.5165078849792482} +{"epoch": 0, "iter": 9960, "iter_tflops": 28.60207521881141, "iter_time": 0.5555346145629882, "loss": 0.19942191243171692, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 30.436213088286006, "step_time": 0.5220571556091309} +{"epoch": 0, "iter": 9961, "iter_tflops": 33.98190037643872, "iter_time": 0.4820597877502441, "loss": 0.048357393592596054, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 37.17132503353084, "step_time": 0.44069743728637695} +{"epoch": 0, "iter": 9962, "iter_tflops": 27.82953874178377, "iter_time": 0.5886302261352538, "loss": 0.029174720868468285, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 34.095214840675794, "step_time": 0.4804576759338378} +{"epoch": 0, "iter": 9963, "iter_tflops": 36.496735510617, "iter_time": 0.44884309387207033, "loss": 0.016620269045233727, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 40.215999407717476, "step_time": 0.4073330993652344} +{"epoch": 0, "iter": 9964, "iter_tflops": 39.63905110780769, "iter_time": 0.41326185226440426, "loss": 0.01750189997255802, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 43.54716636992576, "step_time": 0.37617390632629394} +{"epoch": 0, "iter": 9965, "iter_tflops": 25.44155095465413, "iter_time": 0.8109212188720702, "loss": 0.9237954616546631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.968714990527204, "step_time": 0.7650009841918944} +{"epoch": 0, "iter": 9966, "iter_tflops": 39.737984058738164, "iter_time": 0.5191781616210938, "loss": 0.7495787143707275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.054503864518836, "step_time": 0.4683083839416504} +{"epoch": 0, "iter": 9967, "iter_tflops": 43.45227070766203, "iter_time": 0.47479897308349606, "loss": 0.6117789149284363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8895556243172, "step_time": 0.43999336814880374} +{"epoch": 0, "iter": 9968, "iter_tflops": 46.804266456345914, "iter_time": 0.44079514694213867, "loss": 0.7650656700134277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.313987205109335, "step_time": 0.41004688072204587} +{"epoch": 0, "iter": 9969, "iter_tflops": 25.23275101807873, "iter_time": 0.5859539947509765, "loss": 0.23576965928077698, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.095057620972657, "step_time": 0.5456800079345704} +{"epoch": 0, "iter": 9970, "iter_tflops": 25.625983680948647, "iter_time": 0.5769624862670898, "loss": 0.1453220546245575, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.431976376110192, "step_time": 0.5389779815673829} +{"epoch": 0, "iter": 9971, "iter_tflops": 26.201458892676623, "iter_time": 0.5642903823852539, "loss": 0.14514337480068207, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.009007366695602, "step_time": 0.5278741607666015} +{"epoch": 0, "iter": 9972, "iter_tflops": 25.501709444848046, "iter_time": 0.5797741241455079, "loss": 0.22902747988700867, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.11276101789981, "step_time": 0.5453237037658691} +{"epoch": 0, "iter": 9973, "iter_tflops": 44.86861118694663, "iter_time": 0.45981127929687504, "loss": 0.03946538642048836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22031852344206, "step_time": 0.4191580657958985} +{"epoch": 0, "iter": 9974, "iter_tflops": 10.669412482014456, "iter_time": 1.9336672515869142, "loss": 0.031107207760214806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.595424462573735, "step_time": 1.7792443542480467} +{"epoch": 0, "iter": 9975, "iter_tflops": 15.064713736382044, "iter_time": 1.36949787902832, "loss": 0.010220509953796864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.454170652086372, "step_time": 1.1820151138305666} +{"epoch": 0, "iter": 9976, "iter_tflops": 23.931622098228527, "iter_time": 0.8620850448608399, "loss": 0.0325024239718914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.4645316788359, "step_time": 0.751190435409546} +{"epoch": 0, "iter": 9977, "iter_tflops": 12.5211374868627, "iter_time": 1.2624676818847655, "loss": 0.26709476113319397, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 13.514585715600383, "step_time": 1.1696645202636717} +{"epoch": 0, "iter": 9978, "iter_tflops": 15.25368437316857, "iter_time": 1.036309066772461, "loss": 0.23751813173294067, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 19.897038860353664, "step_time": 0.7944665298461914} +{"epoch": 0, "iter": 9979, "iter_tflops": 26.328703768829886, "iter_time": 0.6003915557861328, "loss": 0.14360341429710388, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.057256241231755, "step_time": 0.5634026107788086} +{"epoch": 0, "iter": 9980, "iter_tflops": 27.984019706647214, "iter_time": 0.564877082824707, "loss": 0.28169718384742737, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 29.83526624089629, "step_time": 0.5298270606994628} +{"epoch": 0, "iter": 9981, "iter_tflops": 26.702613232201102, "iter_time": 0.77262451171875, "loss": 0.05213103070855141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.250366092515453, "step_time": 0.7302947311401368} +{"epoch": 0, "iter": 9982, "iter_tflops": 19.10385141818342, "iter_time": 1.0799441986083984, "loss": 0.03701151907444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.427210900776164, "step_time": 0.8806465950012207} +{"epoch": 0, "iter": 9983, "iter_tflops": 52.6868592511964, "iter_time": 0.3915794906616211, "loss": 0.03493674099445343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.799659818767445, "step_time": 0.35694143486022956} +{"epoch": 0, "iter": 9984, "iter_tflops": 52.931695030944006, "iter_time": 0.38976823806762695, "loss": 0.05172094330191612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.50478037185676, "step_time": 0.3587717990875244} +{"epoch": 0, "iter": 9985, "iter_tflops": 40.86885701104332, "iter_time": 0.5048120994567871, "loss": 0.6819140315055847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.549068200070636, "step_time": 0.46310942840576175} +{"epoch": 0, "iter": 9986, "iter_tflops": 47.159328006483534, "iter_time": 0.43747640991210934, "loss": 0.7289042472839355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.35908556187141, "step_time": 0.4017028980255126} +{"epoch": 0, "iter": 9987, "iter_tflops": 45.571302538904746, "iter_time": 0.4527211723327637, "loss": 0.7241067886352539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.962594407518736, "step_time": 0.4213643856048584} +{"epoch": 0, "iter": 9988, "iter_tflops": 45.00099883477044, "iter_time": 0.45845856857299805, "loss": 0.6751077175140381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.583587024606366, "step_time": 0.4246515083312988} +{"epoch": 0, "iter": 9989, "iter_tflops": 26.405662484676977, "iter_time": 0.781313232421875, "loss": 0.23069651424884796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.8480657559867, "step_time": 0.7408447570800781} +{"epoch": 0, "iter": 9990, "iter_tflops": 9.686833204418114, "iter_time": 2.129807861328125, "loss": 0.25547146797180176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.103390791589275, "step_time": 1.8580894699096684} +{"epoch": 0, "iter": 9991, "iter_tflops": 17.9610054421947, "iter_time": 1.1486602783203124, "loss": 0.37165483832359314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.947207322096894, "step_time": 0.9400327434539795} +{"epoch": 0, "iter": 9992, "iter_tflops": 37.496983723029196, "iter_time": 0.5502067489624023, "loss": 0.1526234745979309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.93718951737452, "step_time": 0.5039694652557373} +{"epoch": 0, "iter": 9993, "iter_tflops": 10.730224908088982, "iter_time": 1.2942922210693362, "loss": 0.4955580234527588, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 11.47531903957015, "step_time": 1.2102536392211913} +{"epoch": 0, "iter": 9994, "iter_tflops": 11.234874787205852, "iter_time": 1.2361549987792968, "loss": 0.216511070728302, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 13.608956340513718, "step_time": 1.020507839202881} +{"epoch": 0, "iter": 9995, "iter_tflops": 24.094217039930935, "iter_time": 0.5764058074951172, "loss": 0.26256781816482544, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 25.55677606490688, "step_time": 0.5434193496704102} +{"epoch": 0, "iter": 9996, "iter_tflops": 24.678763129688328, "iter_time": 0.562752944946289, "loss": 0.27068984508514404, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 26.293195886249297, "step_time": 0.52819926071167} +{"epoch": 0, "iter": 9997, "iter_tflops": 44.85624208331872, "iter_time": 0.4599380722045899, "loss": 0.050094038248062134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.90011476512684, "step_time": 0.41344781684875487} +{"epoch": 0, "iter": 9998, "iter_tflops": 48.09534685459987, "iter_time": 0.4289623603820801, "loss": 0.025276923552155495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.297436234541685, "step_time": 0.39449531364440915} +{"epoch": 0, "iter": 9999, "iter_tflops": 52.31120190375284, "iter_time": 0.3943915023803711, "loss": 0.03530719503760338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.33939395152699, "step_time": 0.359806619644165} +{"epoch": 0, "iter": 10000, "iter_tflops": 55.11643271674346, "iter_time": 0.3743183746337891, "loss": 0.03905526176095009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.81535655135854, "step_time": 0.3449129905700684} +{"epoch": 0, "iter": 10001, "iter_tflops": 32.34431624236594, "iter_time": 0.6378583908081055, "loss": 0.39467450976371765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.559804299742616, "step_time": 0.5969678916931153} +{"epoch": 0, "iter": 10002, "iter_tflops": 11.870067647784294, "iter_time": 1.738077163696289, "loss": 0.3615213930606842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.138626007578203, "step_time": 1.362811492919922} +{"epoch": 0, "iter": 10003, "iter_tflops": 48.0050222051756, "iter_time": 0.4297694816589355, "loss": 0.37898117303848267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.072294837167334, "step_time": 0.3962009658813476} +{"epoch": 0, "iter": 10004, "iter_tflops": 51.045276420314146, "iter_time": 0.40417243194580077, "loss": 0.4364795982837677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.46740245878361, "step_time": 0.37194987678527835} +{"epoch": 0, "iter": 10005, "iter_tflops": 13.635786544693842, "iter_time": 0.9528710174560546, "loss": 0.0029677259735763073, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 14.255780370482048, "step_time": 0.9114299926757813} +{"epoch": 0, "iter": 10006, "iter_tflops": 9.50900292919645, "iter_time": 1.3664046478271485, "loss": 0.009428447112441063, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 12.301825701385795, "step_time": 1.0561965446472168} +{"epoch": 0, "iter": 10007, "iter_tflops": 27.421571928096228, "iter_time": 0.47382935714721675, "loss": 0.005077812820672989, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 30.378304663523142, "step_time": 0.42771135330200194} +{"epoch": 0, "iter": 10008, "iter_tflops": 32.96513828016424, "iter_time": 0.3941480751037597, "loss": 0.007870043627917767, "lr": 3e-05, "seqlen": 5216.0, "step_tflops": 36.38139293322939, "step_time": 0.3571371173858643} +{"epoch": 0, "iter": 10009, "iter_tflops": 17.61280563775747, "iter_time": 1.1713689422607423, "loss": 0.4647761285305023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.684651656076376, "step_time": 1.1041733016967772} +{"epoch": 0, "iter": 10010, "iter_tflops": 19.23217739482953, "iter_time": 1.0727383117675782, "loss": 0.29312264919281006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.558504624033603, "step_time": 0.8072105083465576} +{"epoch": 0, "iter": 10011, "iter_tflops": 39.09867641341005, "iter_time": 0.5276673126220703, "loss": 0.4590216279029846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.741379111157066, "step_time": 0.48269601821899416} +{"epoch": 0, "iter": 10012, "iter_tflops": 39.88144179943938, "iter_time": 0.5173106231689453, "loss": 0.6283265352249146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.62294317399493, "step_time": 0.4729413471221924} +{"epoch": 0, "iter": 10013, "iter_tflops": 19.140275565583508, "iter_time": 1.0778890533447265, "loss": 0.2964109480381012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68644923746944, "step_time": 0.997324058532715} +{"epoch": 0, "iter": 10014, "iter_tflops": 19.626053367405927, "iter_time": 1.0512094879150389, "loss": 0.26870545744895935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.519946322365836, "step_time": 0.8771743450164795} +{"epoch": 0, "iter": 10015, "iter_tflops": 45.2394090149573, "iter_time": 0.45604250717163086, "loss": 0.3244486153125763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86216032405593, "step_time": 0.42223048210144043} +{"epoch": 0, "iter": 10016, "iter_tflops": 48.743856540135816, "iter_time": 0.42325525665283203, "loss": 0.2847714424133301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89390095489396, "step_time": 0.3900467376708984} +{"epoch": 0, "iter": 10017, "iter_tflops": 31.71039192062871, "iter_time": 0.6506098556518555, "loss": 0.4795398414134979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.88619928378998, "step_time": 0.6088346862792969} +{"epoch": 0, "iter": 10018, "iter_tflops": 15.681535430539041, "iter_time": 1.3156296844482422, "loss": 0.34423840045928955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.57572488301661, "step_time": 1.1106480979919433} +{"epoch": 0, "iter": 10019, "iter_tflops": 44.26950357770151, "iter_time": 0.466033992767334, "loss": 0.3824945390224457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.845099549549, "step_time": 0.43120598983764646} +{"epoch": 0, "iter": 10020, "iter_tflops": 45.6189714214185, "iter_time": 0.4522481079101563, "loss": 0.3242291510105133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.35108997593497, "step_time": 0.41804737281799315} +{"epoch": 0, "iter": 10021, "iter_tflops": 35.77709716352553, "iter_time": 0.5766564407348633, "loss": 0.33369144797325134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.547489099716934, "step_time": 0.5352123832702637} +{"epoch": 0, "iter": 10022, "iter_tflops": 7.686925007012757, "iter_time": 2.6839202270507814, "loss": 0.19303996860980988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.388303775688907, "step_time": 2.1975315246582032} +{"epoch": 0, "iter": 10023, "iter_tflops": 11.776918262747259, "iter_time": 1.751824462890625, "loss": 0.31883707642555237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.620968719243338, "step_time": 1.3207307357788087} +{"epoch": 0, "iter": 10024, "iter_tflops": 29.17286572809841, "iter_time": 0.7072014694213867, "loss": 0.3727453649044037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.73764581073717, "step_time": 0.49430419731140135} +{"epoch": 0, "iter": 10025, "iter_tflops": 21.477860665325075, "iter_time": 0.6979026260375977, "loss": 0.23099838197231293, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 22.786029787995396, "step_time": 0.6578353271484376} +{"epoch": 0, "iter": 10026, "iter_tflops": 10.00037448862447, "iter_time": 1.4988894042968748, "loss": 0.3741464614868164, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 12.280933176311905, "step_time": 1.2205469360351564} +{"epoch": 0, "iter": 10027, "iter_tflops": 23.694443082676514, "iter_time": 0.6326147994995117, "loss": 0.3034800887107849, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 25.49606845253258, "step_time": 0.5879124221801758} +{"epoch": 0, "iter": 10028, "iter_tflops": 24.536139065919027, "iter_time": 0.6109133682250977, "loss": 0.2118099331855774, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.289038783705337, "step_time": 0.5701789054870606} +{"epoch": 0, "iter": 10029, "iter_tflops": 16.723785319975615, "iter_time": 1.2336377868652342, "loss": 0.2118278592824936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.808955463526477, "step_time": 1.1584673538208008} +{"epoch": 0, "iter": 10030, "iter_tflops": 16.911525358473565, "iter_time": 1.2199427947998047, "loss": 0.1892908811569214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.37491208559851, "step_time": 1.0125733757019042} +{"epoch": 0, "iter": 10031, "iter_tflops": 52.73347472371656, "iter_time": 0.391233341217041, "loss": 0.10859064757823944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.44364754511608, "step_time": 0.3591536121368408} +{"epoch": 0, "iter": 10032, "iter_tflops": 53.12353796305357, "iter_time": 0.38836068344116215, "loss": 0.14944009482860565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.41736818819415, "step_time": 0.35931799316406254} +{"epoch": 0, "iter": 10033, "iter_tflops": 30.298945971853655, "iter_time": 0.6809178619384766, "loss": 0.028505679219961166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.284839883057366, "step_time": 0.6390334777832031} +{"epoch": 0, "iter": 10034, "iter_tflops": 17.96779934432009, "iter_time": 1.1482259521484377, "loss": 0.03560889512300491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.3430507369063, "step_time": 1.0141592712402345} +{"epoch": 0, "iter": 10035, "iter_tflops": 38.03639529813904, "iter_time": 0.5424040145874023, "loss": 0.07913476973772049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96238322684748, "step_time": 0.4916568584442139} +{"epoch": 0, "iter": 10036, "iter_tflops": 42.03992041616077, "iter_time": 0.4907500610351562, "loss": 0.03546882048249245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.421784639517384, "step_time": 0.4444269790649414} +{"epoch": 0, "iter": 10037, "iter_tflops": 17.049067429515514, "iter_time": 1.2101009979248047, "loss": 0.4433783292770386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.41240113949186, "step_time": 1.1204998931884764} +{"epoch": 0, "iter": 10038, "iter_tflops": 20.02089060712472, "iter_time": 1.0304783096313477, "loss": 0.5013502240180969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.70080965659479, "step_time": 0.83523956489563} +{"epoch": 0, "iter": 10039, "iter_tflops": 41.99285866010881, "iter_time": 0.491300048828125, "loss": 0.48708513379096985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.40012897415699, "step_time": 0.4544280815124512} +{"epoch": 0, "iter": 10040, "iter_tflops": 45.45220463556027, "iter_time": 0.45390743255615235, "loss": 0.4460428059101105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.10511521722155, "step_time": 0.4201414337158203} +{"epoch": 0, "iter": 10041, "iter_tflops": 21.889132276120566, "iter_time": 0.9425267868041993, "loss": 0.28265270590782166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.998842135461384, "step_time": 0.8970492248535156} +{"epoch": 0, "iter": 10042, "iter_tflops": 11.631122433286205, "iter_time": 1.7737835388183592, "loss": 0.2777101397514343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.617438640880192, "step_time": 1.3210292663574217} +{"epoch": 0, "iter": 10043, "iter_tflops": 39.103433002745966, "iter_time": 0.5276031265258789, "loss": 0.3116438686847687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49535223260461, "step_time": 0.4854905872344971} +{"epoch": 0, "iter": 10044, "iter_tflops": 44.82203577499524, "iter_time": 0.460289077758789, "loss": 0.25949010252952576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.491340962726625, "step_time": 0.42545933151245113} +{"epoch": 0, "iter": 10045, "iter_tflops": 18.572003653536058, "iter_time": 1.110870635986328, "loss": 0.1009894534945488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.454754993046816, "step_time": 1.060465347290039} +{"epoch": 0, "iter": 10046, "iter_tflops": 21.455142734430417, "iter_time": 0.9615919952392576, "loss": 0.15522833168506622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.68389132371343, "step_time": 0.7731665992736816} +{"epoch": 0, "iter": 10047, "iter_tflops": 48.8513434010621, "iter_time": 0.422323974609375, "loss": 0.13584290444850922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.4842255899613, "step_time": 0.3857416515350342} +{"epoch": 0, "iter": 10048, "iter_tflops": 50.669407086676884, "iter_time": 0.40717061233520513, "loss": 0.09313291311264038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.935481003682334, "step_time": 0.37555134010314944} +{"epoch": 0, "iter": 10049, "iter_tflops": 28.109901733296777, "iter_time": 0.7339439926147461, "loss": 0.23099695146083832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.851450156209694, "step_time": 0.6911253356933593} +{"epoch": 0, "iter": 10050, "iter_tflops": 14.61675289771794, "iter_time": 1.411468994140625, "loss": 0.21903550624847412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.410112296856685, "step_time": 1.0108270454406738} +{"epoch": 0, "iter": 10051, "iter_tflops": 37.60125167779614, "iter_time": 0.5486810302734375, "loss": 0.1401042640209198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2330836458037, "step_time": 0.500352912902832} +{"epoch": 0, "iter": 10052, "iter_tflops": 43.65469455903844, "iter_time": 0.4725973625183105, "loss": 0.18441131711006165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8374322677847, "step_time": 0.4312751026153564} +{"epoch": 0, "iter": 10053, "iter_tflops": 21.722271040067138, "iter_time": 0.9497668762207031, "loss": 0.7261083722114563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.07900858159809, "step_time": 0.8939332656860352} +{"epoch": 0, "iter": 10054, "iter_tflops": 26.397694583316643, "iter_time": 0.7815490646362304, "loss": 0.9458996057510376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.49244379255413, "step_time": 0.6349505023956299} +{"epoch": 0, "iter": 10055, "iter_tflops": 44.20075991361016, "iter_time": 0.4667587966918946, "loss": 0.8869852423667908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.6453506652201, "step_time": 0.43301378250122075} +{"epoch": 0, "iter": 10056, "iter_tflops": 40.52051129451353, "iter_time": 0.5091518554687501, "loss": 0.9964957237243652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.49960607838819, "step_time": 0.47428230667114263} +{"epoch": 0, "iter": 10057, "iter_tflops": 7.438995132458858, "iter_time": 0.8488164138793945, "loss": 0.002834862098097801, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 7.8416369016697125, "step_time": 0.8052325363159181} +{"epoch": 0, "iter": 10058, "iter_tflops": 5.005594815154846, "iter_time": 1.2614567108154295, "loss": 0.004814961925148964, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 6.551877601126349, "step_time": 0.9637452888488769} +{"epoch": 0, "iter": 10059, "iter_tflops": 14.855229821874307, "iter_time": 0.42505846405029296, "loss": 0.039241667836904526, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 16.459492951321543, "step_time": 0.3836291427612305} +{"epoch": 0, "iter": 10060, "iter_tflops": 16.25960518454093, "iter_time": 0.3883452949523926, "loss": 0.01494133472442627, "lr": 3e-05, "seqlen": 2560.0, "step_tflops": 17.967053644866073, "step_time": 0.3514399909973145} +{"epoch": 0, "iter": 10061, "iter_tflops": 31.676684008495926, "iter_time": 0.6513021850585937, "loss": 0.7643799781799316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.190602327028266, "step_time": 0.6034141578674316} +{"epoch": 0, "iter": 10062, "iter_tflops": 10.260382949664816, "iter_time": 2.010752777099609, "loss": 0.8785441517829895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.122684080140145, "step_time": 1.572170249938965} +{"epoch": 0, "iter": 10063, "iter_tflops": 15.79821775107898, "iter_time": 1.3059127197265625, "loss": 0.7701719999313354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.55991404025615, "step_time": 1.1115942382812498} +{"epoch": 0, "iter": 10064, "iter_tflops": 46.56434995576639, "iter_time": 0.44306628417968746, "loss": 0.7515236139297485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.446905342545364, "step_time": 0.4089664840698242} +{"epoch": 0, "iter": 10065, "iter_tflops": 17.51567475473561, "iter_time": 0.9235317306518555, "loss": 0.2849315404891968, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 18.386354249804985, "step_time": 0.8797982025146485} +{"epoch": 0, "iter": 10066, "iter_tflops": 10.816677698267343, "iter_time": 1.4954944458007815, "loss": 0.35199689865112305, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 13.439688179382873, "step_time": 1.2036202926635742} +{"epoch": 0, "iter": 10067, "iter_tflops": 28.627294214138026, "iter_time": 0.5650649795532227, "loss": 0.2068810611963272, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 30.69614768451982, "step_time": 0.5269808311462403} +{"epoch": 0, "iter": 10068, "iter_tflops": 29.596167313176604, "iter_time": 0.5465667648315429, "loss": 0.2610183656215668, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 31.368896684388293, "step_time": 0.5156790046691895} +{"epoch": 0, "iter": 10069, "iter_tflops": 30.251005614203414, "iter_time": 0.6819969482421875, "loss": 0.2824568450450897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.2105602802573, "step_time": 0.6405071296691895} +{"epoch": 0, "iter": 10070, "iter_tflops": 8.967378632630071, "iter_time": 2.3006827697753907, "loss": 0.2421186864376068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.916244591007267, "step_time": 1.8899442329406737} +{"epoch": 0, "iter": 10071, "iter_tflops": 11.203299796976118, "iter_time": 1.8415193634033202, "loss": 0.26664993166923523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.202636860847345, "step_time": 1.5626494712829588} +{"epoch": 0, "iter": 10072, "iter_tflops": 30.259269217095813, "iter_time": 0.6818106994628905, "loss": 0.279023677110672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.70061865362338, "step_time": 0.5621456604003906} +{"epoch": 0, "iter": 10073, "iter_tflops": 19.97419705213163, "iter_time": 0.7606712493896484, "loss": 0.1652640700340271, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 21.615356278263054, "step_time": 0.7029168167114258} +{"epoch": 0, "iter": 10074, "iter_tflops": 15.764102614507804, "iter_time": 0.9638225402832032, "loss": 0.22846439480781555, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 17.63034213096316, "step_time": 0.8617982177734373} +{"epoch": 0, "iter": 10075, "iter_tflops": 24.481065908524165, "iter_time": 0.620634635925293, "loss": 0.31689631938934326, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 26.020485929341014, "step_time": 0.5839167442321778} +{"epoch": 0, "iter": 10076, "iter_tflops": 27.434192814430304, "iter_time": 0.5538270263671875, "loss": 0.250101774930954, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.208250246508175, "step_time": 0.5201885528564453} +{"epoch": 0, "iter": 10077, "iter_tflops": 50.43706192075043, "iter_time": 0.4090462989807129, "loss": 0.04231949895620346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.68167064369521, "step_time": 0.37051857948303224} +{"epoch": 0, "iter": 10078, "iter_tflops": 46.417290080947076, "iter_time": 0.44447001266479497, "loss": 0.02764999493956566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78051543267083, "step_time": 0.40627971839904786} +{"epoch": 0, "iter": 10079, "iter_tflops": 50.06439745010171, "iter_time": 0.4120911178588867, "loss": 0.020978732034564018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3963025511077, "step_time": 0.3792738208770752} +{"epoch": 0, "iter": 10080, "iter_tflops": 42.327341695528254, "iter_time": 0.48741765213012694, "loss": 0.01916186697781086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.31459792954203, "step_time": 0.4454555244445801} +{"epoch": 0, "iter": 10081, "iter_tflops": 29.318341313716722, "iter_time": 0.6754125595092774, "loss": 0.10967434197664261, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 31.614451603517676, "step_time": 0.62635835647583} +{"epoch": 0, "iter": 10082, "iter_tflops": 37.04604609448696, "iter_time": 0.5345233306884766, "loss": 0.1638723909854889, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 41.029645361143245, "step_time": 0.48262605667114256} +{"epoch": 0, "iter": 10083, "iter_tflops": 39.694419083100684, "iter_time": 0.4988604545593262, "loss": 0.19197680056095123, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 43.74244481581707, "step_time": 0.4526947689056396} +{"epoch": 0, "iter": 10084, "iter_tflops": 38.41143326628033, "iter_time": 0.5155229644775391, "loss": 0.08812104165554047, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 42.11900249730877, "step_time": 0.4701435165405274} +{"epoch": 0, "iter": 10085, "iter_tflops": 17.1284748640738, "iter_time": 1.204490982055664, "loss": 0.13862226903438568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.08490991953868, "step_time": 1.1407905044555664} +{"epoch": 0, "iter": 10086, "iter_tflops": 24.88234523764305, "iter_time": 0.8291458587646485, "loss": 0.09170340746641159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.435689488746334, "step_time": 0.656295244216919} +{"epoch": 0, "iter": 10087, "iter_tflops": 47.16482376476459, "iter_time": 0.4374254341125488, "loss": 0.07938596606254578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26422042111892, "step_time": 0.4024462547302246} +{"epoch": 0, "iter": 10088, "iter_tflops": 52.04896990492277, "iter_time": 0.39637851715087885, "loss": 0.14166851341724396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.728800222647386, "step_time": 0.36367935562133785} +{"epoch": 0, "iter": 10089, "iter_tflops": 32.10884221420928, "iter_time": 0.6425362014770508, "loss": 0.16169539093971252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.22457453575039, "step_time": 0.6028151931762695} +{"epoch": 0, "iter": 10090, "iter_tflops": 9.940264605496314, "iter_time": 2.0755074768066404, "loss": 0.13294203579425812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.611690632664736, "step_time": 1.6358705673217775} +{"epoch": 0, "iter": 10091, "iter_tflops": 14.547541462157282, "iter_time": 1.4181842041015627, "loss": 0.2280050814151764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.441167488655, "step_time": 1.1828963584899903} +{"epoch": 0, "iter": 10092, "iter_tflops": 35.41306422278125, "iter_time": 0.5825842514038085, "loss": 0.18230921030044556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60443755666783, "step_time": 0.4523922367095948} +{"epoch": 0, "iter": 10093, "iter_tflops": 21.881129681259797, "iter_time": 0.7280431060791016, "loss": 0.20745158195495605, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 23.202176460745463, "step_time": 0.6865910034179689} +{"epoch": 0, "iter": 10094, "iter_tflops": 17.29579123776262, "iter_time": 0.9210567703247069, "loss": 0.177020862698555, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 20.590054373313333, "step_time": 0.7736941986083983} +{"epoch": 0, "iter": 10095, "iter_tflops": 27.72558116445809, "iter_time": 0.5745742721557617, "loss": 0.3052433729171753, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.556985262968922, "step_time": 0.5389726142883301} +{"epoch": 0, "iter": 10096, "iter_tflops": 28.438172819258167, "iter_time": 0.5601768341064454, "loss": 0.23465517163276672, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.29581858688044, "step_time": 0.5258285255432129} +{"epoch": 0, "iter": 10097, "iter_tflops": 44.26244980314998, "iter_time": 0.4661082611083985, "loss": 0.11616469919681549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80909526787844, "step_time": 0.42268952941894533} +{"epoch": 0, "iter": 10098, "iter_tflops": 10.482589349997879, "iter_time": 1.9681295166015622, "loss": 0.021335814148187637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.331945468678107, "step_time": 1.5474930915832519} +{"epoch": 0, "iter": 10099, "iter_tflops": 12.123127091538548, "iter_time": 1.7017963562011718, "loss": 0.045299749821424484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.997395415099929, "step_time": 1.3756450996398926} +{"epoch": 0, "iter": 10100, "iter_tflops": 33.084179731855926, "iter_time": 0.6235939254760742, "loss": 0.05389142408967018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01418408314022, "step_time": 0.49105067634582517} +{"epoch": 0, "iter": 10101, "iter_tflops": 22.5491257128434, "iter_time": 0.7173795394897462, "loss": 0.21258577704429626, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 24.6267161901597, "step_time": 0.6568590507507324} +{"epoch": 0, "iter": 10102, "iter_tflops": 28.334412715518056, "iter_time": 0.5709058303833008, "loss": 0.27827805280685425, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 30.282634821539208, "step_time": 0.534176815032959} +{"epoch": 0, "iter": 10103, "iter_tflops": 29.02491100226557, "iter_time": 0.5573240661621094, "loss": 0.1617155224084854, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 30.963530571080014, "step_time": 0.5224301338195801} +{"epoch": 0, "iter": 10104, "iter_tflops": 28.624934317507503, "iter_time": 0.5651115646362305, "loss": 0.2566221356391907, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 30.49475692691639, "step_time": 0.5304610710144043} +{"epoch": 0, "iter": 10105, "iter_tflops": 2.142479809344323, "iter_time": 0.6582068405151368, "loss": 0.48153334856033325, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.282755087618707, "step_time": 0.6177600364685057} +{"epoch": 0, "iter": 10106, "iter_tflops": 1.1102340162287307, "iter_time": 1.2701780395507811, "loss": 0.4201526641845703, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.223001758181776, "step_time": 1.153060375213623} +{"epoch": 0, "iter": 10107, "iter_tflops": 2.700188731342239, "iter_time": 0.5222578887939453, "loss": 0.32397934794425964, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.9670227411312533, "step_time": 0.47528953742980967} +{"epoch": 0, "iter": 10108, "iter_tflops": 2.905500969122283, "iter_time": 0.4853534317016602, "loss": 0.38337433338165283, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.1753241557880574, "step_time": 0.4441105213165283} +{"epoch": 0, "iter": 10109, "iter_tflops": 27.36617406593284, "iter_time": 0.7538903121948242, "loss": 0.322222501039505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.74702038273605, "step_time": 0.6935515975952149} +{"epoch": 0, "iter": 10110, "iter_tflops": 8.327828138353409, "iter_time": 2.4773678283691405, "loss": 0.120758056640625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.115583164728086, "step_time": 2.0395357513427737} +{"epoch": 0, "iter": 10111, "iter_tflops": 12.106017096514417, "iter_time": 1.7042015838623046, "loss": 0.12177586555480957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.23238177238604, "step_time": 1.3544233474731444} +{"epoch": 0, "iter": 10112, "iter_tflops": 47.14346276415149, "iter_time": 0.43762363433837886, "loss": 0.1424344778060913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24691747932656, "step_time": 0.4025821361541748} +{"epoch": 0, "iter": 10113, "iter_tflops": 15.133912094941207, "iter_time": 1.1611529235839844, "loss": 0.23188214004039764, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 15.716810163174799, "step_time": 1.1180886001586914} +{"epoch": 0, "iter": 10114, "iter_tflops": 11.65626920551739, "iter_time": 1.5075823974609375, "loss": 0.19530554115772247, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 15.738505658294372, "step_time": 1.116547317504883} +{"epoch": 0, "iter": 10115, "iter_tflops": 25.353598537973543, "iter_time": 0.6931081695556641, "loss": 0.36988794803619385, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 27.420187774463894, "step_time": 0.6408703842163086} +{"epoch": 0, "iter": 10116, "iter_tflops": 26.503101471767994, "iter_time": 0.6630464096069335, "loss": 0.31153571605682373, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 28.38622646587228, "step_time": 0.6190603141784669} +{"epoch": 0, "iter": 10117, "iter_tflops": 19.65531977198954, "iter_time": 1.049644256591797, "loss": 0.20413723587989807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.925888270297047, "step_time": 0.985912437438965} +{"epoch": 0, "iter": 10118, "iter_tflops": 15.640726574161182, "iter_time": 1.3190623474121095, "loss": 0.18154458701610565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.291897868974083, "step_time": 1.0167158164978027} +{"epoch": 0, "iter": 10119, "iter_tflops": 49.623249031976236, "iter_time": 0.41575458908081053, "loss": 0.2641216814517975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02921398478709, "step_time": 0.3818507061004639} +{"epoch": 0, "iter": 10120, "iter_tflops": 50.900682888618086, "iter_time": 0.4053205642700195, "loss": 0.19728684425354004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.16812923374738, "step_time": 0.37396761131286615} +{"epoch": 0, "iter": 10121, "iter_tflops": 27.00021904001817, "iter_time": 0.7641083755493164, "loss": 0.052275147289037704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.505556139084437, "step_time": 0.7237569198608399} +{"epoch": 0, "iter": 10122, "iter_tflops": 13.647877152176958, "iter_time": 1.5116705169677735, "loss": 0.08857819437980652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.22115299134862, "step_time": 1.2718635673522949} +{"epoch": 0, "iter": 10123, "iter_tflops": 41.47859807408401, "iter_time": 0.4973912925720215, "loss": 0.06836947053670883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75135030255379, "step_time": 0.45093955421447757} +{"epoch": 0, "iter": 10124, "iter_tflops": 45.14099386959171, "iter_time": 0.4570367584228515, "loss": 0.06030205264687538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64243938933479, "step_time": 0.4155938701629639} +{"epoch": 0, "iter": 10125, "iter_tflops": 16.807680394169658, "iter_time": 1.2274801177978514, "loss": 0.7147605419158936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.81907075586913, "step_time": 1.157809730529785} +{"epoch": 0, "iter": 10126, "iter_tflops": 17.65497348737418, "iter_time": 1.1685711975097657, "loss": 0.6431621313095093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.779536322973673, "step_time": 0.9056854019165038} +{"epoch": 0, "iter": 10127, "iter_tflops": 45.071428330777195, "iter_time": 0.45774217224121094, "loss": 0.7384905219078064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62476612615629, "step_time": 0.4242918815612793} +{"epoch": 0, "iter": 10128, "iter_tflops": 43.03277534198619, "iter_time": 0.4794274444580078, "loss": 0.7513420581817627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59968694071247, "step_time": 0.44273030281066894} +{"epoch": 0, "iter": 10129, "iter_tflops": 29.49545958075467, "iter_time": 0.6994667587280273, "loss": 0.21044811606407166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.4097714404363, "step_time": 0.6568367919921875} +{"epoch": 0, "iter": 10130, "iter_tflops": 14.758690568805935, "iter_time": 1.3978945770263673, "loss": 0.17958009243011475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.068368999221715, "step_time": 1.028040370941162} +{"epoch": 0, "iter": 10131, "iter_tflops": 39.33762767993822, "iter_time": 0.5244620666503905, "loss": 0.1958417296409607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23107500983625, "step_time": 0.4772283248901368} +{"epoch": 0, "iter": 10132, "iter_tflops": 37.38811928059076, "iter_time": 0.5518088073730469, "loss": 0.18513555824756622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.94809180047053, "step_time": 0.5038352851867676} +{"epoch": 0, "iter": 10133, "iter_tflops": 22.5256242766731, "iter_time": 0.915894416809082, "loss": 0.3782711327075958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.05744852529996, "step_time": 0.8575761260986329} +{"epoch": 0, "iter": 10134, "iter_tflops": 16.63765310979478, "iter_time": 1.2400242614746093, "loss": 0.41281965374946594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.29988087623415, "step_time": 0.9686013565063476} +{"epoch": 0, "iter": 10135, "iter_tflops": 38.80651438990836, "iter_time": 0.5316399536132812, "loss": 0.4521712064743042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61180572442692, "step_time": 0.48416379356384276} +{"epoch": 0, "iter": 10136, "iter_tflops": 38.271527267741796, "iter_time": 0.5390716018676759, "loss": 0.42934998869895935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.769052328818496, "step_time": 0.49393252563476564} +{"epoch": 0, "iter": 10137, "iter_tflops": 34.24217706585463, "iter_time": 0.6025053100585938, "loss": 0.5693733096122742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.881442945447176, "step_time": 0.5446226940155029} +{"epoch": 0, "iter": 10138, "iter_tflops": 34.520088172734354, "iter_time": 0.5976547164916992, "loss": 0.5981528162956238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.59043295884758, "step_time": 0.5488389434814454} +{"epoch": 0, "iter": 10139, "iter_tflops": 40.61798879215973, "iter_time": 0.5079299621582031, "loss": 0.6387287974357605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.416401660781844, "step_time": 0.464492681503296} +{"epoch": 0, "iter": 10140, "iter_tflops": 39.30012858063758, "iter_time": 0.5249624938964844, "loss": 0.5036658644676208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24428965697985, "step_time": 0.4770824928283691} +{"epoch": 0, "iter": 10141, "iter_tflops": 21.451040006297823, "iter_time": 0.9617759094238281, "loss": 0.6704801917076111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.00686375592521, "step_time": 0.8967364578247071} +{"epoch": 0, "iter": 10142, "iter_tflops": 14.67521226503968, "iter_time": 1.4058463439941409, "loss": 0.6024699211120605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.196949793905024, "step_time": 1.1337665786743165} +{"epoch": 0, "iter": 10143, "iter_tflops": 45.06076532637607, "iter_time": 0.4578504905700683, "loss": 0.7215437293052673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.804255166585065, "step_time": 0.4227314491271973} +{"epoch": 0, "iter": 10144, "iter_tflops": 45.33450054822097, "iter_time": 0.4550859336853027, "loss": 0.7170323133468628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.968032362014974, "step_time": 0.42131759262084967} +{"epoch": 0, "iter": 10145, "iter_tflops": 27.64042272484267, "iter_time": 0.7464102020263672, "loss": 0.8339911103248596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.275297025458965, "step_time": 0.7047270431518555} +{"epoch": 0, "iter": 10146, "iter_tflops": 19.777903726972553, "iter_time": 1.0431385345458983, "loss": 0.8380534648895264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.220044625910052, "step_time": 0.9284901924133302} +{"epoch": 0, "iter": 10147, "iter_tflops": 42.53529903822928, "iter_time": 0.48503464126586915, "loss": 0.706047534942627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88573085681946, "step_time": 0.44961893653869633} +{"epoch": 0, "iter": 10148, "iter_tflops": 45.523341941127065, "iter_time": 0.4531981315612793, "loss": 0.7138819098472595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95000018439519, "step_time": 0.4214727973937988} +{"epoch": 0, "iter": 10149, "iter_tflops": 28.807521975595918, "iter_time": 0.7161703643798828, "loss": 0.41088590025901794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.490527990160025, "step_time": 0.6766394309997559} +{"epoch": 0, "iter": 10150, "iter_tflops": 11.378110941001301, "iter_time": 1.8132266082763668, "loss": 0.5057337880134583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.2608061225158, "step_time": 1.555794822692871} +{"epoch": 0, "iter": 10151, "iter_tflops": 13.361034352561314, "iter_time": 1.5441239776611324, "loss": 0.36821499466896057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.13098879192811, "step_time": 1.2043142261505126} +{"epoch": 0, "iter": 10152, "iter_tflops": 38.15473275139863, "iter_time": 0.5407217407226561, "loss": 0.452395498752594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.526331263679374, "step_time": 0.4968195571899414} +{"epoch": 0, "iter": 10153, "iter_tflops": 12.520528262482049, "iter_time": 1.2494492950439453, "loss": 0.35160091519355774, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.309380911433827, "step_time": 1.1753939056396483} +{"epoch": 0, "iter": 10154, "iter_tflops": 15.6336112121423, "iter_time": 1.0006494979858398, "loss": 0.3492491841316223, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 18.41012697207964, "step_time": 0.8497369537353516} +{"epoch": 0, "iter": 10155, "iter_tflops": 25.460769777668354, "iter_time": 0.6144262466430664, "loss": 0.23397964239120483, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 27.36124371858621, "step_time": 0.5717490539550781} +{"epoch": 0, "iter": 10156, "iter_tflops": 25.35228179674026, "iter_time": 0.6170555114746094, "loss": 0.41543132066726685, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 27.16601043885639, "step_time": 0.5758580284118652} +{"epoch": 0, "iter": 10157, "iter_tflops": 16.632389114553135, "iter_time": 1.2404167175292968, "loss": 0.4947015941143036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.692444857731257, "step_time": 1.1660962448120118} +{"epoch": 0, "iter": 10158, "iter_tflops": 21.44558172323802, "iter_time": 0.9620206985473633, "loss": 0.5682265758514404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.759150226287176, "step_time": 0.6932689056396485} +{"epoch": 0, "iter": 10159, "iter_tflops": 36.641599969994616, "iter_time": 0.5630511093139648, "loss": 0.6057891249656677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10923932840406, "step_time": 0.5143725948333739} +{"epoch": 0, "iter": 10160, "iter_tflops": 39.176722947906626, "iter_time": 0.5266161117553712, "loss": 0.48683780431747437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30633779040997, "step_time": 0.48765964126586914} +{"epoch": 0, "iter": 10161, "iter_tflops": 20.921996446375296, "iter_time": 0.986095832824707, "loss": 0.16805000603199005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.407762817951518, "step_time": 0.920711883544922} +{"epoch": 0, "iter": 10162, "iter_tflops": 16.386207422169722, "iter_time": 1.2590523834228515, "loss": 0.2342718243598938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.463028160283837, "step_time": 0.9612387104034426} +{"epoch": 0, "iter": 10163, "iter_tflops": 47.91950917356387, "iter_time": 0.4305364112854004, "loss": 0.15533089637756348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.013381610708365, "step_time": 0.39664972496032713} +{"epoch": 0, "iter": 10164, "iter_tflops": 47.53974440240222, "iter_time": 0.43397569274902337, "loss": 0.15472637116909027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64773169725306, "step_time": 0.39945788192749027} +{"epoch": 0, "iter": 10165, "iter_tflops": 32.41124231787281, "iter_time": 0.6365412750244142, "loss": 0.7598146796226501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.60202259425028, "step_time": 0.5962395248413085} +{"epoch": 0, "iter": 10166, "iter_tflops": 24.240597264754307, "iter_time": 0.8510967483520508, "loss": 0.7515477538108826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.476664452902817, "step_time": 0.6999127578735351} +{"epoch": 0, "iter": 10167, "iter_tflops": 46.678703309639076, "iter_time": 0.44198086166381834, "loss": 0.829047441482544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.247781931693105, "step_time": 0.4105871486663818} +{"epoch": 0, "iter": 10168, "iter_tflops": 48.77203213198603, "iter_time": 0.4230107421875, "loss": 0.9048769474029541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65196720124074, "step_time": 0.3918389873504639} +{"epoch": 0, "iter": 10169, "iter_tflops": 35.20087839472268, "iter_time": 0.5860959854125977, "loss": 0.40310636162757874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.984477713551726, "step_time": 0.543145378112793} +{"epoch": 0, "iter": 10170, "iter_tflops": 9.012923749412213, "iter_time": 2.289056701660156, "loss": 0.6740822196006775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.217759911490354, "step_time": 2.019140563964844} +{"epoch": 0, "iter": 10171, "iter_tflops": 21.522050610172, "iter_time": 0.9586025924682616, "loss": 0.5706831216812134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.478045830151004, "step_time": 0.6998799591064454} +{"epoch": 0, "iter": 10172, "iter_tflops": 35.914164772030226, "iter_time": 0.5744556121826172, "loss": 0.3906080722808838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.36550473200256, "step_time": 0.5240906639099121} +{"epoch": 0, "iter": 10173, "iter_tflops": 10.559548297773588, "iter_time": 1.3615295410156252, "loss": 0.25283220410346985, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 11.250071872265368, "step_time": 1.2779595642089843} +{"epoch": 0, "iter": 10174, "iter_tflops": 11.85555906587907, "iter_time": 1.2126916046142577, "loss": 0.1967729926109314, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 15.623198861857086, "step_time": 0.9202428436279297} +{"epoch": 0, "iter": 10175, "iter_tflops": 22.431218234247385, "iter_time": 0.6409432067871094, "loss": 0.35835695266723633, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.002696462197655, "step_time": 0.5989800758361816} +{"epoch": 0, "iter": 10176, "iter_tflops": 22.35395963717366, "iter_time": 0.6431584014892578, "loss": 0.24106405675411224, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 24.010001804832296, "step_time": 0.5987978286743165} +{"epoch": 0, "iter": 10177, "iter_tflops": 20.771913269243573, "iter_time": 0.9932206649780273, "loss": 0.008413108997046947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.494018045013174, "step_time": 0.9171813354492188} +{"epoch": 0, "iter": 10178, "iter_tflops": 43.62348134785123, "iter_time": 0.4729355125427246, "loss": 0.0042314850725233555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.907855078570094, "step_time": 0.4218359909057618} +{"epoch": 0, "iter": 10179, "iter_tflops": 51.333475129210925, "iter_time": 0.4019033088684082, "loss": 0.008904779329895973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.37029740370333, "step_time": 0.36599227714538574} +{"epoch": 0, "iter": 10180, "iter_tflops": 52.841207762691624, "iter_time": 0.3904356918334961, "loss": 0.01971086673438549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.06682570248916, "step_time": 0.355299144744873} +{"epoch": 0, "iter": 10181, "iter_tflops": 50.22133072014983, "iter_time": 0.4108034019470215, "loss": 0.13691310584545135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.51767698356524, "step_time": 0.37161305427551267} +{"epoch": 0, "iter": 10182, "iter_tflops": 39.20338480140324, "iter_time": 0.5262579650878907, "loss": 0.10215525329113007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38849174494892, "step_time": 0.47549690437316894} +{"epoch": 0, "iter": 10183, "iter_tflops": 39.83934736726937, "iter_time": 0.5178572158813476, "loss": 0.14797504246234894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.86237268091216, "step_time": 0.4703597240447998} +{"epoch": 0, "iter": 10184, "iter_tflops": 43.47940094628063, "iter_time": 0.4745027084350586, "loss": 0.08017735928297043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.545729405354024, "step_time": 0.433921064376831} +{"epoch": 0, "iter": 10185, "iter_tflops": 27.870684280292956, "iter_time": 0.7402435226440429, "loss": 0.41815879940986633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.344007278247297, "step_time": 0.6799066886901856} +{"epoch": 0, "iter": 10186, "iter_tflops": 45.01093978894363, "iter_time": 0.45835731506347654, "loss": 0.3419743478298187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80565723930523, "step_time": 0.4227193050384521} +{"epoch": 0, "iter": 10187, "iter_tflops": 47.50500303488202, "iter_time": 0.4342930679321289, "loss": 0.4547097086906433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.38130042015039, "step_time": 0.4015292205810548} +{"epoch": 0, "iter": 10188, "iter_tflops": 46.75537538013154, "iter_time": 0.44125607681274415, "loss": 0.46095913648605347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96472146038897, "step_time": 0.4048112678527832} +{"epoch": 0, "iter": 10189, "iter_tflops": 35.07014549393616, "iter_time": 0.5882808074951171, "loss": 0.6466116309165955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.61332358441694, "step_time": 0.5485049324035645} +{"epoch": 0, "iter": 10190, "iter_tflops": 10.956898659714419, "iter_time": 1.8829318542480469, "loss": 0.7384961247444153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.86619202130796, "step_time": 1.6035120162963867} +{"epoch": 0, "iter": 10191, "iter_tflops": 9.522512238110345, "iter_time": 2.166559936523438, "loss": 0.5858681201934814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.034922229153349, "step_time": 1.7142689514160157} +{"epoch": 0, "iter": 10192, "iter_tflops": 23.56026674094914, "iter_time": 0.8756731719970703, "loss": 0.9228569269180298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.62966274075138, "step_time": 0.6962986278533936} +{"epoch": 0, "iter": 10193, "iter_tflops": 16.104152702081777, "iter_time": 0.9231722412109374, "loss": 0.16282592713832855, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.35498461040015, "step_time": 0.8566361236572265} +{"epoch": 0, "iter": 10194, "iter_tflops": 14.427274111236521, "iter_time": 1.030472328186035, "loss": 0.2543981373310089, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.98633945445978, "step_time": 0.9299756698608399} +{"epoch": 0, "iter": 10195, "iter_tflops": 21.11954821099746, "iter_time": 0.7039405670166016, "loss": 0.2460414320230484, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.726991443070073, "step_time": 0.6541519927978515} +{"epoch": 0, "iter": 10196, "iter_tflops": 21.60603664586594, "iter_time": 0.6880904159545899, "loss": 0.21905092895030975, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 23.134252836078463, "step_time": 0.6426361312866211} +{"epoch": 0, "iter": 10197, "iter_tflops": 19.05670671484401, "iter_time": 1.0826158905029297, "loss": 0.5260177850723267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.20965818013168, "step_time": 1.0208531646728516} +{"epoch": 0, "iter": 10198, "iter_tflops": 17.02463341650526, "iter_time": 1.2118377532958984, "loss": 0.47873106598854065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.6654330485917, "step_time": 0.9983383102416993} +{"epoch": 0, "iter": 10199, "iter_tflops": 39.45355032973946, "iter_time": 0.5229210891723632, "loss": 0.6538345217704773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14319761960201, "step_time": 0.47820038032531736} +{"epoch": 0, "iter": 10200, "iter_tflops": 37.09040195353991, "iter_time": 0.5562380676269532, "loss": 0.5217657685279846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.19556131349996, "step_time": 0.5132679538726808} +{"epoch": 0, "iter": 10201, "iter_tflops": 21.89911424383812, "iter_time": 0.9420971679687501, "loss": 0.6809796690940857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.441673063726068, "step_time": 0.8801032867431641} +{"epoch": 0, "iter": 10202, "iter_tflops": 17.16505100940253, "iter_time": 1.2019243927001952, "loss": 0.9072497487068176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.341378701907335, "step_time": 1.0142426338195802} +{"epoch": 0, "iter": 10203, "iter_tflops": 38.277019311867925, "iter_time": 0.538994255065918, "loss": 0.818072497844696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.88666510065472, "step_time": 0.4925456218719482} +{"epoch": 0, "iter": 10204, "iter_tflops": 40.16151300573871, "iter_time": 0.5137030944824219, "loss": 0.7421656250953674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95264650980587, "step_time": 0.46939365768432617} +{"epoch": 0, "iter": 10205, "iter_tflops": 22.77789806420367, "iter_time": 0.9057505416870117, "loss": 0.15705624222755432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.413181493593992, "step_time": 0.8450800857543945} +{"epoch": 0, "iter": 10206, "iter_tflops": 41.163578091167466, "iter_time": 0.501197769165039, "loss": 0.18964368104934692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64563150196461, "step_time": 0.46210777664184566} +{"epoch": 0, "iter": 10207, "iter_tflops": 45.420042775335595, "iter_time": 0.4542288436889648, "loss": 0.22028249502182007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.07129697402194, "step_time": 0.420430980682373} +{"epoch": 0, "iter": 10208, "iter_tflops": 55.1638315769086, "iter_time": 0.37399674606323247, "loss": 0.1473775953054428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.93181933464326, "step_time": 0.34424273681640627} +{"epoch": 0, "iter": 10209, "iter_tflops": 35.38666202865476, "iter_time": 0.5830189208984374, "loss": 0.2617602050304413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.045030806669615, "step_time": 0.5422808990478516} +{"epoch": 0, "iter": 10210, "iter_tflops": 7.867019152139627, "iter_time": 2.6224791259765627, "loss": 0.24509018659591675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.8402858438255, "step_time": 2.096594940185547} +{"epoch": 0, "iter": 10211, "iter_tflops": 13.11572019548724, "iter_time": 1.5730050048828126, "loss": 0.33724892139434814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.525256832347186, "step_time": 1.248458267211914} +{"epoch": 0, "iter": 10212, "iter_tflops": 37.529870699440174, "iter_time": 0.549724609375, "loss": 0.31458577513694763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57019488265951, "step_time": 0.49629532814025884} +{"epoch": 0, "iter": 10213, "iter_tflops": 24.187799309728305, "iter_time": 0.6095800781249999, "loss": 0.39904817938804626, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 25.941066271962246, "step_time": 0.5683806686401367} +{"epoch": 0, "iter": 10214, "iter_tflops": 25.92508015572858, "iter_time": 0.5687311477661132, "loss": 0.29035669565200806, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 27.70285733089609, "step_time": 0.53223392868042} +{"epoch": 0, "iter": 10215, "iter_tflops": 25.071373656792204, "iter_time": 0.5880970382690429, "loss": 0.2321314811706543, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.674731949436946, "step_time": 0.5527478446960449} +{"epoch": 0, "iter": 10216, "iter_tflops": 27.856456850153776, "iter_time": 0.5292992095947265, "loss": 0.29196736216545105, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 29.59482817101672, "step_time": 0.4982086906433105} +{"epoch": 0, "iter": 10217, "iter_tflops": 43.47548288333054, "iter_time": 0.4745454711914062, "loss": 0.29934337735176086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.66772756940925, "step_time": 0.432810510635376} +{"epoch": 0, "iter": 10218, "iter_tflops": 48.58107548599057, "iter_time": 0.42467346191406247, "loss": 0.26314684748649597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21923977454424, "step_time": 0.3876623115539551} +{"epoch": 0, "iter": 10219, "iter_tflops": 48.872177812024766, "iter_time": 0.42214393615722656, "loss": 0.3842168152332306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.09857564432419, "step_time": 0.3885432567596435} +{"epoch": 0, "iter": 10220, "iter_tflops": 50.675370160995236, "iter_time": 0.4071226997375489, "loss": 0.19589009881019592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.36467867307631, "step_time": 0.3726399936676026} +{"epoch": 0, "iter": 10221, "iter_tflops": 38.21925038670475, "iter_time": 0.5398089523315429, "loss": 0.36766621470451355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2443556459183, "step_time": 0.5002161674499512} +{"epoch": 0, "iter": 10222, "iter_tflops": 37.05224497821295, "iter_time": 0.556810890197754, "loss": 0.5003848075866699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75695349327157, "step_time": 0.49407563972473145} +{"epoch": 0, "iter": 10223, "iter_tflops": 47.855318511618734, "iter_time": 0.43111391067504884, "loss": 0.44526398181915283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87648057269493, "step_time": 0.397696475982666} +{"epoch": 0, "iter": 10224, "iter_tflops": 46.291165239483924, "iter_time": 0.44568101501464846, "loss": 0.4942937195301056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.80488577262428, "step_time": 0.41423834609985355} +{"epoch": 0, "iter": 10225, "iter_tflops": 40.30528670906749, "iter_time": 0.5118706550598144, "loss": 0.7311386466026306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.670306284190346, "step_time": 0.4724284133911133} +{"epoch": 0, "iter": 10226, "iter_tflops": 44.55014267584178, "iter_time": 0.463098258972168, "loss": 0.6789337396621704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15798869280799, "step_time": 0.42840438461303715} +{"epoch": 0, "iter": 10227, "iter_tflops": 49.95951121887034, "iter_time": 0.41295627212524416, "loss": 0.5895934104919434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.77851659248739, "step_time": 0.38363076591491696} +{"epoch": 0, "iter": 10228, "iter_tflops": 44.91390450078091, "iter_time": 0.4593475837707519, "loss": 0.787023663520813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.02864912301656, "step_time": 0.42955806350708003} +{"epoch": 0, "iter": 10229, "iter_tflops": 41.307764165260366, "iter_time": 0.49944832229614256, "loss": 0.33916014432907104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85400663741616, "step_time": 0.459960994720459} +{"epoch": 0, "iter": 10230, "iter_tflops": 41.980136977799475, "iter_time": 0.4914489326477051, "loss": 0.3398975431919098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88321295195544, "step_time": 0.44964361000061037} +{"epoch": 0, "iter": 10231, "iter_tflops": 46.392316739942366, "iter_time": 0.44470927429199214, "loss": 0.3241705000400543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.367988621406525, "step_time": 0.4096072540283203} +{"epoch": 0, "iter": 10232, "iter_tflops": 42.49062652407962, "iter_time": 0.4855445823669434, "loss": 0.3395165205001831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98893730898984, "step_time": 0.44860992050170895} +{"epoch": 0, "iter": 10233, "iter_tflops": 25.1002902752627, "iter_time": 0.8219464111328124, "loss": 0.10516688227653503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.29159520698507, "step_time": 0.784702995300293} +{"epoch": 0, "iter": 10234, "iter_tflops": 14.044173149432469, "iter_time": 1.4690144653320314, "loss": 0.06988832354545593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.681363389115788, "step_time": 1.1043676567077636} +{"epoch": 0, "iter": 10235, "iter_tflops": 41.00091069386399, "iter_time": 0.5031862258911133, "loss": 0.06539780646562576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.385152615325524, "step_time": 0.4545780353546143} +{"epoch": 0, "iter": 10236, "iter_tflops": 42.41531255641828, "iter_time": 0.48640673065185547, "loss": 0.0712960883975029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.240348889661625, "step_time": 0.4461708011627198} +{"epoch": 0, "iter": 10237, "iter_tflops": 2.0380736395706656, "iter_time": 0.7304536514282226, "loss": 0.8934358358383179, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.188796300268176, "step_time": 0.680153896331787} +{"epoch": 0, "iter": 10238, "iter_tflops": 0.7942101725035057, "iter_time": 1.8744639434814452, "loss": 0.9250437617301941, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 0.900721824099946, "step_time": 1.6528058853149414} +{"epoch": 0, "iter": 10239, "iter_tflops": 0.7420231957277725, "iter_time": 2.00629621887207, "loss": 0.9057567715644836, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 0.8549817888580193, "step_time": 1.7412281188964842} +{"epoch": 0, "iter": 10240, "iter_tflops": 1.8716872267103786, "iter_time": 0.7953884124755859, "loss": 1.0567659139633179, "lr": 3e-05, "seqlen": 608.0, "step_tflops": 2.103538862621256, "step_time": 0.7077208595275879} +{"epoch": 0, "iter": 10241, "iter_tflops": 14.185284038899841, "iter_time": 1.2185039520263672, "loss": 0.1393665075302124, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 15.105131646886385, "step_time": 1.1443014907836913} +{"epoch": 0, "iter": 10242, "iter_tflops": 11.219784189430921, "iter_time": 1.5405665893554685, "loss": 0.22925806045532227, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 13.837371106431345, "step_time": 1.2491407890319823} +{"epoch": 0, "iter": 10243, "iter_tflops": 26.536525618584143, "iter_time": 0.651359748840332, "loss": 0.17570897936820984, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 28.6245724848923, "step_time": 0.6038456878662108} +{"epoch": 0, "iter": 10244, "iter_tflops": 27.260003505025097, "iter_time": 0.6340727233886718, "loss": 0.16685360670089722, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 29.32262609965774, "step_time": 0.5894705543518067} +{"epoch": 0, "iter": 10245, "iter_tflops": 32.93015367227719, "iter_time": 0.6265106964111329, "loss": 0.13765276968479156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.63218098767868, "step_time": 0.5631958827972412} +{"epoch": 0, "iter": 10246, "iter_tflops": 37.21863217471212, "iter_time": 0.5543216476440429, "loss": 0.14863808453083038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.63982832295859, "step_time": 0.49546538352966313} +{"epoch": 0, "iter": 10247, "iter_tflops": 41.11154422502743, "iter_time": 0.5018321228027344, "loss": 0.13820238411426544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02285145763139, "step_time": 0.45823604774475096} +{"epoch": 0, "iter": 10248, "iter_tflops": 43.94152049492206, "iter_time": 0.46951250839233394, "loss": 0.22432270646095276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.00071855843832, "step_time": 0.42980801391601564} +{"epoch": 0, "iter": 10249, "iter_tflops": 26.20592628100425, "iter_time": 0.7872682418823241, "loss": 0.4405806362628937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.411838576806716, "step_time": 0.726144260406494} +{"epoch": 0, "iter": 10250, "iter_tflops": 43.75300735429029, "iter_time": 0.47153543853759766, "loss": 0.5554196834564209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33023064413116, "step_time": 0.4358967456817626} +{"epoch": 0, "iter": 10251, "iter_tflops": 48.51269876894492, "iter_time": 0.4252720222473144, "loss": 0.5890401601791382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.549289184783575, "step_time": 0.3926046161651611} +{"epoch": 0, "iter": 10252, "iter_tflops": 47.31268915652768, "iter_time": 0.43605835723876957, "loss": 0.48750367760658264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.113250853155755, "step_time": 0.40363493156433106} +{"epoch": 0, "iter": 10253, "iter_tflops": 2.169048979385786, "iter_time": 0.7044502716064454, "loss": 0.11439558863639832, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.301925637936706, "step_time": 0.6637864913940429} +{"epoch": 0, "iter": 10254, "iter_tflops": 1.4192211125508967, "iter_time": 1.0766378326416015, "loss": 0.12356872111558914, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.7137591393633425, "step_time": 0.8915997047424317} +{"epoch": 0, "iter": 10255, "iter_tflops": 3.576019432172191, "iter_time": 0.42728714752197267, "loss": 0.06886761635541916, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.8777276659549265, "step_time": 0.39404189109802246} +{"epoch": 0, "iter": 10256, "iter_tflops": 3.5284275962167264, "iter_time": 0.43305044555664063, "loss": 0.07876476645469666, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.8320608874927506, "step_time": 0.39873769950866694} +{"epoch": 0, "iter": 10257, "iter_tflops": 27.25562531118469, "iter_time": 0.7569480895996094, "loss": 0.31686651706695557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.796576992476222, "step_time": 0.7164425659179687} +{"epoch": 0, "iter": 10258, "iter_tflops": 14.59701217430355, "iter_time": 1.4133778381347657, "loss": 0.4030134677886963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.21369974618155, "step_time": 1.1985275573730467} +{"epoch": 0, "iter": 10259, "iter_tflops": 46.00843683899508, "iter_time": 0.4484197883605957, "loss": 0.48410192131996155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.106583522566375, "step_time": 0.4117441673278809} +{"epoch": 0, "iter": 10260, "iter_tflops": 50.543312718452924, "iter_time": 0.4081864128112793, "loss": 0.3280341625213623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.75573046900174, "step_time": 0.37678418922424317} +{"epoch": 0, "iter": 10261, "iter_tflops": 47.54689874758718, "iter_time": 0.43391039276123045, "loss": 0.20605386793613434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.05319770167618, "step_time": 0.3963463230133057} +{"epoch": 0, "iter": 10262, "iter_tflops": 44.296173810174224, "iter_time": 0.4657533988952637, "loss": 0.19256813824176788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86613084222241, "step_time": 0.42219617462158204} +{"epoch": 0, "iter": 10263, "iter_tflops": 47.90326589183008, "iter_time": 0.43068239974975586, "loss": 0.16592277586460114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3632747821106, "step_time": 0.3939992980957031} +{"epoch": 0, "iter": 10264, "iter_tflops": 49.64778102151247, "iter_time": 0.4155491561889648, "loss": 0.18360869586467743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.93366829437818, "step_time": 0.38252717018127447} +{"epoch": 0, "iter": 10265, "iter_tflops": 31.94200051760481, "iter_time": 0.6458923416137695, "loss": 0.550682008266449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.00594018431491, "step_time": 0.6066908721923827} +{"epoch": 0, "iter": 10266, "iter_tflops": 9.263877932664395, "iter_time": 2.22704721069336, "loss": 0.7060211300849915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.260404221797472, "step_time": 1.8321805419921875} +{"epoch": 0, "iter": 10267, "iter_tflops": 12.372017926448478, "iter_time": 1.6675609130859372, "loss": 0.748104989528656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.66240905142413, "step_time": 1.407073928833008} +{"epoch": 0, "iter": 10268, "iter_tflops": 45.67705998481116, "iter_time": 0.45167297363281256, "loss": 0.6315429210662842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.544359507103515, "step_time": 0.41641659545898435} +{"epoch": 0, "iter": 10269, "iter_tflops": 15.976175746473372, "iter_time": 0.9101250686645508, "loss": 0.1735571026802063, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 16.812189584712783, "step_time": 0.8648675994873046} +{"epoch": 0, "iter": 10270, "iter_tflops": 11.102894660456988, "iter_time": 1.3095970458984376, "loss": 0.1966533064842224, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 12.96529108102104, "step_time": 1.1214802627563478} +{"epoch": 0, "iter": 10271, "iter_tflops": 25.907243822429034, "iter_time": 0.5612452697753907, "loss": 0.19787007570266724, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.59783206944858, "step_time": 0.5268645019531251} +{"epoch": 0, "iter": 10272, "iter_tflops": 26.02991930642479, "iter_time": 0.5586001968383789, "loss": 0.3552762567996979, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.72873764676755, "step_time": 0.5243772087097168} +{"epoch": 0, "iter": 10273, "iter_tflops": 37.75624150297543, "iter_time": 0.5464286880493163, "loss": 0.505289614200592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.77097595535737, "step_time": 0.5060240287780762} +{"epoch": 0, "iter": 10274, "iter_tflops": 31.529784559173144, "iter_time": 0.6543366470336913, "loss": 0.6981752514839172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.280753162855795, "step_time": 0.5121824169158936} +{"epoch": 0, "iter": 10275, "iter_tflops": 48.15832103020268, "iter_time": 0.42840142822265626, "loss": 0.6254988312721252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.27166422548277, "step_time": 0.3946898155212402} +{"epoch": 0, "iter": 10276, "iter_tflops": 45.8320643502318, "iter_time": 0.45014541244506834, "loss": 0.4534875750541687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.583763571527385, "step_time": 0.41608567047119144} +{"epoch": 0, "iter": 10277, "iter_tflops": 39.90675720268566, "iter_time": 0.5169824600219727, "loss": 0.8740075826644897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.22534319010888, "step_time": 0.4772916069030762} +{"epoch": 0, "iter": 10278, "iter_tflops": 40.67076486648575, "iter_time": 0.5072708511352539, "loss": 0.815764307975769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03049989207291, "step_time": 0.46856369018554694} +{"epoch": 0, "iter": 10279, "iter_tflops": 42.533266188429266, "iter_time": 0.4850578231811523, "loss": 0.7239840030670166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.713679502452294, "step_time": 0.45131115531921384} +{"epoch": 0, "iter": 10280, "iter_tflops": 45.63089277469192, "iter_time": 0.45212995529174804, "loss": 0.7371942400932312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22676846206724, "step_time": 0.41910314559936523} +{"epoch": 0, "iter": 10281, "iter_tflops": 52.75145999848261, "iter_time": 0.39109995269775394, "loss": 0.003386321710422635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.24760366989194, "step_time": 0.3541964340209961} +{"epoch": 0, "iter": 10282, "iter_tflops": 18.558480810116865, "iter_time": 1.1116800842285157, "loss": 0.0034955162554979324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15917483655082, "step_time": 0.9750424423217774} +{"epoch": 0, "iter": 10283, "iter_tflops": 53.49079107528449, "iter_time": 0.3856943054199219, "loss": 0.006001854781061411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.95333758118209, "step_time": 0.3499563274383545} +{"epoch": 0, "iter": 10284, "iter_tflops": 59.34093944650015, "iter_time": 0.34767049026489255, "loss": 0.0026189994532614946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.88458491485805, "step_time": 0.3179660243988037} +{"epoch": 0, "iter": 10285, "iter_tflops": 30.672759077944256, "iter_time": 0.6726194229125977, "loss": 0.2711755931377411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.75873917633088, "step_time": 0.6297889976501465} +{"epoch": 0, "iter": 10286, "iter_tflops": 31.04962599408484, "iter_time": 0.6644554595947265, "loss": 0.25683867931365967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.2281581467941, "step_time": 0.5259256229400634} +{"epoch": 0, "iter": 10287, "iter_tflops": 39.76712959852286, "iter_time": 0.5187976531982421, "loss": 0.2905968725681305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83246150545102, "step_time": 0.4706806964874267} +{"epoch": 0, "iter": 10288, "iter_tflops": 40.976086887141925, "iter_time": 0.5034910621643067, "loss": 0.2345096617937088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.02012398922677, "step_time": 0.45826380920410154} +{"epoch": 0, "iter": 10289, "iter_tflops": 25.87242196838499, "iter_time": 0.7974163970947264, "loss": 0.0455741286277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.80645343907387, "step_time": 0.7419534301757813} +{"epoch": 0, "iter": 10290, "iter_tflops": 12.706616722200637, "iter_time": 1.6236496276855468, "loss": 0.11057700961828232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.619819547730495, "step_time": 1.3208279037475585} +{"epoch": 0, "iter": 10291, "iter_tflops": 19.056309878017494, "iter_time": 1.0826384353637697, "loss": 0.07349290698766708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.870240013114394, "step_time": 0.9020934410095216} +{"epoch": 0, "iter": 10292, "iter_tflops": 43.65627394146945, "iter_time": 0.47258026504516604, "loss": 0.05325763300061226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12416981055848, "step_time": 0.4287054424285888} +{"epoch": 0, "iter": 10293, "iter_tflops": 14.685661310681207, "iter_time": 1.0123416595458985, "loss": 0.26758426427841187, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.585316316981832, "step_time": 0.9539047164916992} +{"epoch": 0, "iter": 10294, "iter_tflops": 6.730240957305034, "iter_time": 2.208970947265625, "loss": 0.18988794088363647, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 8.826921392856988, "step_time": 1.6842686233520507} +{"epoch": 0, "iter": 10295, "iter_tflops": 10.518856691593909, "iter_time": 1.413357666015625, "loss": 0.28326159715652466, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 11.581812751623563, "step_time": 1.2836424713134766} +{"epoch": 0, "iter": 10296, "iter_tflops": 18.164876326284777, "iter_time": 0.818442497253418, "loss": 0.42584773898124695, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 21.585055274291356, "step_time": 0.6887592620849609} +{"epoch": 0, "iter": 10297, "iter_tflops": 12.606013919912014, "iter_time": 1.1728712463378905, "loss": 0.22291810810565948, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 13.553526892091712, "step_time": 1.0908770370483398} +{"epoch": 0, "iter": 10298, "iter_tflops": 10.504154193233738, "iter_time": 1.4075603790283204, "loss": 0.136338472366333, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 14.50314957920482, "step_time": 1.0194496841430665} +{"epoch": 0, "iter": 10299, "iter_tflops": 26.100070342049865, "iter_time": 0.5664824295043945, "loss": 0.14017900824546814, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.851996338553608, "step_time": 0.5308499641418457} +{"epoch": 0, "iter": 10300, "iter_tflops": 26.536703018648957, "iter_time": 0.5571615753173829, "loss": 0.2659190595149994, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.297115421449547, "step_time": 0.5224995918273925} +{"epoch": 0, "iter": 10301, "iter_tflops": 26.365308291051825, "iter_time": 0.7825090942382812, "loss": 0.24437083303928375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.042523250093637, "step_time": 0.7357074584960938} +{"epoch": 0, "iter": 10302, "iter_tflops": 15.423575242790676, "iter_time": 1.3376336669921873, "loss": 0.21395203471183777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.600677714735085, "step_time": 0.9551132507324219} +{"epoch": 0, "iter": 10303, "iter_tflops": 36.42619008669834, "iter_time": 0.5663807678222657, "loss": 0.2597361207008362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7207048973118, "step_time": 0.5194040126800538} +{"epoch": 0, "iter": 10304, "iter_tflops": 39.593417742819106, "iter_time": 0.5210738220214844, "loss": 0.25397753715515137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.98476596687572, "step_time": 0.4799629135131836} +{"epoch": 0, "iter": 10305, "iter_tflops": 13.940178688872015, "iter_time": 0.9758165969848633, "loss": 0.0031707945745438337, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 14.982550407652994, "step_time": 0.9079267120361327} +{"epoch": 0, "iter": 10306, "iter_tflops": 9.574494087964068, "iter_time": 1.4207599487304685, "loss": 0.01552039198577404, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 11.17194230947075, "step_time": 1.2176090202331542} +{"epoch": 0, "iter": 10307, "iter_tflops": 28.532323872904033, "iter_time": 0.4767595443725586, "loss": 0.0019793182145804167, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 31.744333267569356, "step_time": 0.42851924514770506} +{"epoch": 0, "iter": 10308, "iter_tflops": 29.43867652536705, "iter_time": 0.46208115768432617, "loss": 0.003993724938482046, "lr": 3e-05, "seqlen": 5456.0, "step_tflops": 32.6029252736402, "step_time": 0.4172342700958252} +{"epoch": 0, "iter": 10309, "iter_tflops": 20.147995243188426, "iter_time": 1.0239774856567383, "loss": 0.5671297907829285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.769242373280512, "step_time": 0.9477175712585448} +{"epoch": 0, "iter": 10310, "iter_tflops": 13.190117333166665, "iter_time": 1.5641326751708982, "loss": 0.7794836163520813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.542606267675826, "step_time": 1.2471489181518554} +{"epoch": 0, "iter": 10311, "iter_tflops": 32.940631389551335, "iter_time": 0.6263114166259766, "loss": 0.643039882183075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.72265384166895, "step_time": 0.5775352973937988} +{"epoch": 0, "iter": 10312, "iter_tflops": 36.30630298248035, "iter_time": 0.5682510147094726, "loss": 0.7854431867599487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.344993888666714, "step_time": 0.5243638763427734} +{"epoch": 0, "iter": 10313, "iter_tflops": 22.152093325290885, "iter_time": 0.8491082992553711, "loss": 0.09703920036554337, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 23.704252514643972, "step_time": 0.793508518218994} +{"epoch": 0, "iter": 10314, "iter_tflops": 17.161663782407146, "iter_time": 1.0960199737548828, "loss": 0.013417084701359272, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 20.413932892150786, "step_time": 0.9214062957763673} +{"epoch": 0, "iter": 10315, "iter_tflops": 48.683920625713355, "iter_time": 0.3863601379394532, "loss": 0.0446806363761425, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 53.355547326506624, "step_time": 0.3525317840576171} +{"epoch": 0, "iter": 10316, "iter_tflops": 51.61449508211004, "iter_time": 0.36442333221435547, "loss": 0.044616688042879105, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 56.66342815811563, "step_time": 0.33195178794860836} +{"epoch": 0, "iter": 10317, "iter_tflops": 32.02188767188694, "iter_time": 0.644280990600586, "loss": 0.4186662435531616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.16959786634498, "step_time": 0.6037850837707519} +{"epoch": 0, "iter": 10318, "iter_tflops": 16.533878727090748, "iter_time": 1.2478072357177736, "loss": 0.324105829000473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.300944116311655, "step_time": 1.0689162864685058} +{"epoch": 0, "iter": 10319, "iter_tflops": 42.08096047270571, "iter_time": 0.4902714500427246, "loss": 0.4614083766937256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.492203588671615, "step_time": 0.4535083351135254} +{"epoch": 0, "iter": 10320, "iter_tflops": 47.018675187146826, "iter_time": 0.4387850875854492, "loss": 0.6536023020744324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.06064790916548, "step_time": 0.4040507583618164} +{"epoch": 0, "iter": 10321, "iter_tflops": 21.53160056343551, "iter_time": 0.9581774215698241, "loss": 0.5340825319290161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.49519710023974, "step_time": 0.9171332626342774} +{"epoch": 0, "iter": 10322, "iter_tflops": 18.303502238403173, "iter_time": 1.127166442871094, "loss": 0.5505226850509644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.430358816851005, "step_time": 0.8805282783508301} +{"epoch": 0, "iter": 10323, "iter_tflops": 38.877925396684624, "iter_time": 0.5306634368896483, "loss": 0.6377593874931335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.49452900999287, "step_time": 0.4854999923706055} +{"epoch": 0, "iter": 10324, "iter_tflops": 34.4810583105331, "iter_time": 0.5983312149047851, "loss": 0.5999134182929993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.792550243362896, "step_time": 0.5459037132263184} +{"epoch": 0, "iter": 10325, "iter_tflops": 13.62540033856575, "iter_time": 1.1031099243164062, "loss": 0.0410475917160511, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 14.600961496052452, "step_time": 1.0294057922363282} +{"epoch": 0, "iter": 10326, "iter_tflops": 14.891375503299715, "iter_time": 1.0093301544189452, "loss": 0.03086131252348423, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 20.885794840712798, "step_time": 0.7196429176330567} +{"epoch": 0, "iter": 10327, "iter_tflops": 40.857940429860065, "iter_time": 0.3678676452636719, "loss": 0.04840023070573807, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 45.080170897084805, "step_time": 0.33341298484802245} +{"epoch": 0, "iter": 10328, "iter_tflops": 41.07929356041852, "iter_time": 0.3658854141235351, "loss": 0.05493421107530594, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 44.98534198458892, "step_time": 0.3341158180236817} +{"epoch": 0, "iter": 10329, "iter_tflops": 28.253292289777796, "iter_time": 0.6628234558105469, "loss": 0.01261971890926361, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 29.919681263708007, "step_time": 0.6259072303771972} +{"epoch": 0, "iter": 10330, "iter_tflops": 13.27572890530136, "iter_time": 1.4106151885986329, "loss": 0.02695939876139164, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 17.18621950352675, "step_time": 1.089648880004883} +{"epoch": 0, "iter": 10331, "iter_tflops": 49.90817327068636, "iter_time": 0.3752280158996582, "loss": 0.0049131098203361034, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 55.11429817318375, "step_time": 0.33978378486633304} +{"epoch": 0, "iter": 10332, "iter_tflops": 48.82951379568866, "iter_time": 0.3835169219970703, "loss": 0.00502045638859272, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 53.8368897397678, "step_time": 0.3478459644317627} +{"epoch": 0, "iter": 10333, "iter_tflops": 38.937399787783406, "iter_time": 0.5298528823852539, "loss": 0.4400222897529602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03467683077627, "step_time": 0.4908112792968749} +{"epoch": 0, "iter": 10334, "iter_tflops": 46.09766471089985, "iter_time": 0.44755181503295893, "loss": 0.4007565379142761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99256189017948, "step_time": 0.41268326187133797} +{"epoch": 0, "iter": 10335, "iter_tflops": 45.334998747352074, "iter_time": 0.4550809326171875, "loss": 0.47609320282936096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30088669499926, "step_time": 0.41847307205200196} +{"epoch": 0, "iter": 10336, "iter_tflops": 47.18441253482255, "iter_time": 0.4372438354492188, "loss": 0.3507236838340759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.28280881538811, "step_time": 0.40230038070678703} +{"epoch": 0, "iter": 10337, "iter_tflops": 26.49649126363861, "iter_time": 0.7786349258422852, "loss": 0.24125318229198456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.970889769350574, "step_time": 0.7375916061401367} +{"epoch": 0, "iter": 10338, "iter_tflops": 15.915310471603217, "iter_time": 1.2963048095703125, "loss": 0.2523500919342041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.759328635509082, "step_time": 0.9938227710723878} +{"epoch": 0, "iter": 10339, "iter_tflops": 40.88304938811305, "iter_time": 0.5046368560791016, "loss": 0.25085800886154175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.012373262020354, "step_time": 0.45834271812438965} +{"epoch": 0, "iter": 10340, "iter_tflops": 42.47205238792358, "iter_time": 0.48575692367553713, "loss": 0.23543338477611542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.535177117184176, "step_time": 0.44334404182434084} +{"epoch": 0, "iter": 10341, "iter_tflops": 21.10764161347769, "iter_time": 0.9774229583740234, "loss": 0.6210582852363586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.541596859226175, "step_time": 0.9152454299926758} +{"epoch": 0, "iter": 10342, "iter_tflops": 43.52916509580801, "iter_time": 0.4739602394104004, "loss": 0.744124174118042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.110595508420616, "step_time": 0.4288264007568359} +{"epoch": 0, "iter": 10343, "iter_tflops": 47.59020090371718, "iter_time": 0.4335155792236328, "loss": 0.6649010181427002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.792786932374035, "step_time": 0.39833912658691406} +{"epoch": 0, "iter": 10344, "iter_tflops": 49.05397351122232, "iter_time": 0.42057945632934574, "loss": 0.6488489508628845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.212303866763655, "step_time": 0.3877128410339355} +{"epoch": 0, "iter": 10345, "iter_tflops": 33.53459883665575, "iter_time": 0.6152181396484375, "loss": 0.5073732137680054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.651142879188406, "step_time": 0.5786937484741211} +{"epoch": 0, "iter": 10346, "iter_tflops": 18.96290210466882, "iter_time": 1.0879713134765625, "loss": 0.6612372398376465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.25767808512115, "step_time": 0.9269202938079834} +{"epoch": 0, "iter": 10347, "iter_tflops": 47.62405677450529, "iter_time": 0.4332073936462402, "loss": 0.679076611995697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.65935999416796, "step_time": 0.3993679656982421} +{"epoch": 0, "iter": 10348, "iter_tflops": 44.64405600865138, "iter_time": 0.46212408447265624, "loss": 0.5031539797782898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.747289162371175, "step_time": 0.42322545242309567} +{"epoch": 0, "iter": 10349, "iter_tflops": 29.17158361023868, "iter_time": 0.7072325515747071, "loss": 0.7683191895484924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.901892734552522, "step_time": 0.6676320343017578} +{"epoch": 0, "iter": 10350, "iter_tflops": 14.569311065370592, "iter_time": 1.4160651397705075, "loss": 0.7531197667121887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.84298834133479, "step_time": 1.224906951904297} +{"epoch": 0, "iter": 10351, "iter_tflops": 42.38919592526408, "iter_time": 0.4867064132690429, "loss": 0.7045270204544067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.852242918025155, "step_time": 0.4499473133087158} +{"epoch": 0, "iter": 10352, "iter_tflops": 45.88626713462135, "iter_time": 0.4496136817932129, "loss": 0.7580072283744812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48405584818303, "step_time": 0.4169240608215332} +{"epoch": 0, "iter": 10353, "iter_tflops": 37.060793369401736, "iter_time": 0.5566824569702149, "loss": 0.7781503200531006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.80333491102093, "step_time": 0.5183257522583007} +{"epoch": 0, "iter": 10354, "iter_tflops": 26.386796678951995, "iter_time": 0.7818718490600586, "loss": 0.8550963401794434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.333191147565014, "step_time": 0.6584421424865723} +{"epoch": 0, "iter": 10355, "iter_tflops": 40.8445543192703, "iter_time": 0.5051124649047851, "loss": 0.6450912952423096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.865058438756655, "step_time": 0.47033092498779294} +{"epoch": 0, "iter": 10356, "iter_tflops": 39.39980496436669, "iter_time": 0.5236344070434571, "loss": 0.6074943542480469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.2457855320393, "step_time": 0.48835861968994143} +{"epoch": 0, "iter": 10357, "iter_tflops": 31.556281246561884, "iter_time": 0.6537872238159179, "loss": 0.7158679962158203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.628960085209535, "step_time": 0.6134918670654297} +{"epoch": 0, "iter": 10358, "iter_tflops": 10.8857750140152, "iter_time": 1.8952342376708982, "loss": 0.6218804121017456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.459525740049727, "step_time": 1.655849021911621} +{"epoch": 0, "iter": 10359, "iter_tflops": 12.742087762432945, "iter_time": 1.6191297607421875, "loss": 0.5562159419059753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.85089727580119, "step_time": 1.3892152862548828} +{"epoch": 0, "iter": 10360, "iter_tflops": 32.55797433765382, "iter_time": 0.6336725158691405, "loss": 0.5311030745506287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.51241572012069, "step_time": 0.5221420440673828} +{"epoch": 0, "iter": 10361, "iter_tflops": 12.596104151834265, "iter_time": 1.157590072631836, "loss": 0.14645139873027802, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 13.371914048638128, "step_time": 1.090429168701172} +{"epoch": 0, "iter": 10362, "iter_tflops": 14.706665186917615, "iter_time": 0.9914637298583984, "loss": 0.3860546946525574, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 17.699506564149686, "step_time": 0.8238153457641602} +{"epoch": 0, "iter": 10363, "iter_tflops": 25.999297060099625, "iter_time": 0.5608276672363282, "loss": 0.2895841896533966, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.707271590966148, "step_time": 0.5262562599182129} +{"epoch": 0, "iter": 10364, "iter_tflops": 28.135401254932155, "iter_time": 0.5182483444213868, "loss": 0.2424018234014511, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 29.893112576229203, "step_time": 0.48777540588378904} +{"epoch": 0, "iter": 10365, "iter_tflops": 22.340012237887336, "iter_time": 0.9235041275024415, "loss": 0.04598100855946541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.464769274472562, "step_time": 0.8792370071411133} +{"epoch": 0, "iter": 10366, "iter_tflops": 21.397238961993775, "iter_time": 0.9641941909790038, "loss": 0.05122579634189606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.54349988811324, "step_time": 0.6983293647766114} +{"epoch": 0, "iter": 10367, "iter_tflops": 56.0741974980866, "iter_time": 0.3679249000549316, "loss": 0.04772939160466194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.34967011001383, "step_time": 0.33628695106506346} +{"epoch": 0, "iter": 10368, "iter_tflops": 55.619529794945784, "iter_time": 0.37093254089355465, "loss": 0.01785656437277794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.772890026247, "step_time": 0.3394785652160645} +{"epoch": 0, "iter": 10369, "iter_tflops": 26.2558480747281, "iter_time": 0.7857713623046876, "loss": 0.6237615942955017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.732609751891143, "step_time": 0.7439290313720703} +{"epoch": 0, "iter": 10370, "iter_tflops": 16.21592278124652, "iter_time": 1.2722737884521484, "loss": 0.4856017827987671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.305232587630982, "step_time": 1.1270598945617676} +{"epoch": 0, "iter": 10371, "iter_tflops": 48.394778004518265, "iter_time": 0.42630825805664063, "loss": 0.7032260298728943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.57297357270679, "step_time": 0.39242774581909173} +{"epoch": 0, "iter": 10372, "iter_tflops": 48.333717595325524, "iter_time": 0.42684681701660154, "loss": 0.5841875076293945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39149103350592, "step_time": 0.39378710365295405} +{"epoch": 0, "iter": 10373, "iter_tflops": 30.351545207186867, "iter_time": 0.6797378311157227, "loss": 0.0705266073346138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18120899601223, "step_time": 0.6410913124084472} +{"epoch": 0, "iter": 10374, "iter_tflops": 14.937857291233545, "iter_time": 1.3811280364990235, "loss": 0.06365201622247696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.675906412990255, "step_time": 1.1671873016357424} +{"epoch": 0, "iter": 10375, "iter_tflops": 43.938591728774085, "iter_time": 0.4695438041687011, "loss": 0.07106570154428482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.00231346168835, "step_time": 0.4126027793884277} +{"epoch": 0, "iter": 10376, "iter_tflops": 53.37956386128964, "iter_time": 0.3864979782104493, "loss": 0.12345845997333527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.26926924807249, "step_time": 0.35406473731994625} +{"epoch": 0, "iter": 10377, "iter_tflops": 26.025580494818072, "iter_time": 0.792723663330078, "loss": 0.6448630094528198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.365286204024706, "step_time": 0.7539147720336915} +{"epoch": 0, "iter": 10378, "iter_tflops": 13.819839145498328, "iter_time": 1.492860610961914, "loss": 0.6166024208068848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.58875522526826, "step_time": 1.1729706420898438} +{"epoch": 0, "iter": 10379, "iter_tflops": 33.90144153736217, "iter_time": 0.6085609512329101, "loss": 0.3883526027202606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.76185293800604, "step_time": 0.5612092933654785} +{"epoch": 0, "iter": 10380, "iter_tflops": 38.15485280380997, "iter_time": 0.5407200393676759, "loss": 0.5242286920547485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62878822255213, "step_time": 0.4955967826843262} +{"epoch": 0, "iter": 10381, "iter_tflops": 16.567348800057005, "iter_time": 1.245286361694336, "loss": 0.22663475573062897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.815535270516325, "step_time": 1.1580394973754884} +{"epoch": 0, "iter": 10382, "iter_tflops": 14.792957458692763, "iter_time": 1.394656448364258, "loss": 0.17600513994693756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.798788708905022, "step_time": 1.1591290760040283} +{"epoch": 0, "iter": 10383, "iter_tflops": 38.48019285074844, "iter_time": 0.5361483917236328, "loss": 0.23879584670066833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10606307588494, "step_time": 0.48997916221618654} +{"epoch": 0, "iter": 10384, "iter_tflops": 40.50141021426206, "iter_time": 0.5093919792175293, "loss": 0.15683545172214508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27600573637958, "step_time": 0.4659655532836915} +{"epoch": 0, "iter": 10385, "iter_tflops": 16.5232900471448, "iter_time": 1.2486068725585937, "loss": 0.4810260534286499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.851946234899522, "step_time": 1.1556775512695312} +{"epoch": 0, "iter": 10386, "iter_tflops": 16.7706592540171, "iter_time": 1.2301897735595702, "loss": 0.3149147629737854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.13546923223753, "step_time": 1.0246144886016846} +{"epoch": 0, "iter": 10387, "iter_tflops": 38.97645207427519, "iter_time": 0.5293219985961913, "loss": 0.4425264000892639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.735060505384745, "step_time": 0.48276738739013675} +{"epoch": 0, "iter": 10388, "iter_tflops": 40.84120928831362, "iter_time": 0.5051538352966308, "loss": 0.3281860947608948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.580979966165046, "step_time": 0.4627779273986816} +{"epoch": 0, "iter": 10389, "iter_tflops": 32.99417404617776, "iter_time": 0.6252950439453124, "loss": 0.23370805382728577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.71711559231022, "step_time": 0.5618930892944336} +{"epoch": 0, "iter": 10390, "iter_tflops": 33.869362894194616, "iter_time": 0.609137336730957, "loss": 0.3548484742641449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.90944150328127, "step_time": 0.5442204551696778} +{"epoch": 0, "iter": 10391, "iter_tflops": 40.78082536831088, "iter_time": 0.5059018135070801, "loss": 0.36240360140800476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.41538797534034, "step_time": 0.4645032825469971} +{"epoch": 0, "iter": 10392, "iter_tflops": 39.17495048819635, "iter_time": 0.5266399383544922, "loss": 0.22344622015953064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68901807462192, "step_time": 0.48328807830810544} +{"epoch": 0, "iter": 10393, "iter_tflops": 23.78974666806801, "iter_time": 0.8672262802124022, "loss": 0.6785963773727417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.445995484312206, "step_time": 0.8107795791625976} +{"epoch": 0, "iter": 10394, "iter_tflops": 8.55392753424491, "iter_time": 2.411885467529297, "loss": 0.7241976857185364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.110426479429751, "step_time": 2.0405759887695307} +{"epoch": 0, "iter": 10395, "iter_tflops": 10.57449795437938, "iter_time": 1.951023452758789, "loss": 0.8220204710960388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.993147660673099, "step_time": 1.5878441505432128} +{"epoch": 0, "iter": 10396, "iter_tflops": 34.84313794504417, "iter_time": 0.5921135330200196, "loss": 0.5658751726150513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.927048385546264, "step_time": 0.5439678115844727} +{"epoch": 0, "iter": 10397, "iter_tflops": 12.494149873285709, "iter_time": 1.1409180145263673, "loss": 0.27628716826438904, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 13.13995426357718, "step_time": 1.084844009399414} +{"epoch": 0, "iter": 10398, "iter_tflops": 15.139869082762871, "iter_time": 0.9415405502319336, "loss": 0.20812930166721344, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 17.44190956015038, "step_time": 0.8172729377746581} +{"epoch": 0, "iter": 10399, "iter_tflops": 24.898269160379655, "iter_time": 0.5725217514038086, "loss": 0.1989191770553589, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 26.606873704737612, "step_time": 0.5357563171386719} +{"epoch": 0, "iter": 10400, "iter_tflops": 25.464253277729636, "iter_time": 0.5597965316772461, "loss": 0.4121512472629547, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 27.164905357754293, "step_time": 0.5247506103515625} +{"epoch": 0, "iter": 10401, "iter_tflops": 31.1143218719293, "iter_time": 0.6630738601684569, "loss": 0.5847100019454956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.02582208891266, "step_time": 0.6246958351135254} +{"epoch": 0, "iter": 10402, "iter_tflops": 18.841441479677584, "iter_time": 1.094984878540039, "loss": 0.565538763999939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.278598712375103, "step_time": 0.8497645912170411} +{"epoch": 0, "iter": 10403, "iter_tflops": 38.59414144430292, "iter_time": 0.5345654220581054, "loss": 0.7497231960296631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38242651180543, "step_time": 0.4867841510772705} +{"epoch": 0, "iter": 10404, "iter_tflops": 41.29780366628595, "iter_time": 0.49956878280639644, "loss": 0.6774377226829529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.019327642028834, "step_time": 0.45827191543579104} +{"epoch": 0, "iter": 10405, "iter_tflops": 32.78764772352977, "iter_time": 0.6292337188720704, "loss": 0.8034069538116455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.84150882520968, "step_time": 0.5756201171875} +{"epoch": 0, "iter": 10406, "iter_tflops": 40.29667203164003, "iter_time": 0.5119800834655761, "loss": 0.6906662583351135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37356937170994, "step_time": 0.4756604957580566} +{"epoch": 0, "iter": 10407, "iter_tflops": 40.56899429156329, "iter_time": 0.5085433807373047, "loss": 0.7545942664146423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61982470120554, "step_time": 0.4729751586914062} +{"epoch": 0, "iter": 10408, "iter_tflops": 40.40685233344986, "iter_time": 0.5105840301513672, "loss": 0.6429835557937622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.530179023881516, "step_time": 0.4739491996765137} +{"epoch": 0, "iter": 10409, "iter_tflops": 21.031576627984904, "iter_time": 0.9809580078125001, "loss": 0.4597681760787964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.005917002829335, "step_time": 0.9375248260498046} +{"epoch": 0, "iter": 10410, "iter_tflops": 13.421329846303204, "iter_time": 1.5371869812011718, "loss": 0.41961345076560974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.072279019252132, "step_time": 1.2084557361602783} +{"epoch": 0, "iter": 10411, "iter_tflops": 39.44222814860463, "iter_time": 0.5230711975097656, "loss": 0.46353524923324585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.27297474688178, "step_time": 0.47676624107360843} +{"epoch": 0, "iter": 10412, "iter_tflops": 40.320460989724765, "iter_time": 0.5116780166625977, "loss": 0.531191885471344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.740382075237136, "step_time": 0.4716715431213379} +{"epoch": 0, "iter": 10413, "iter_tflops": 19.70225927326037, "iter_time": 1.047143539428711, "loss": 0.030554434284567833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.9390493967836, "step_time": 0.9852927474975586} +{"epoch": 0, "iter": 10414, "iter_tflops": 8.765538334161546, "iter_time": 2.3536596069335936, "loss": 0.05078151822090149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.90754899967903, "step_time": 1.7326062240600584} +{"epoch": 0, "iter": 10415, "iter_tflops": 13.946651086539191, "iter_time": 1.4792865600585938, "loss": 0.036664918065071106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88221598321961, "step_time": 1.222060749053955} +{"epoch": 0, "iter": 10416, "iter_tflops": 42.92903574600246, "iter_time": 0.48058599853515627, "loss": 0.037678301334381104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.400851525635375, "step_time": 0.43524731826782226} +{"epoch": 0, "iter": 10417, "iter_tflops": 17.25241195952914, "iter_time": 0.9780575637817384, "loss": 0.29440975189208984, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.31602686039571, "step_time": 0.9212615890502929} +{"epoch": 0, "iter": 10418, "iter_tflops": 14.569567988951839, "iter_time": 1.1581573333740236, "loss": 0.3020707666873932, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 17.14503910665909, "step_time": 0.9841827659606933} +{"epoch": 0, "iter": 10419, "iter_tflops": 29.90951413909405, "iter_time": 0.5641633605957032, "loss": 0.21354557573795319, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 31.91878275441712, "step_time": 0.5286496086120606} +{"epoch": 0, "iter": 10420, "iter_tflops": 30.94764959583135, "iter_time": 0.5452385635375976, "loss": 0.13392069935798645, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.93352174368719, "step_time": 0.5123609962463379} +{"epoch": 0, "iter": 10421, "iter_tflops": 28.96691519580106, "iter_time": 0.7122295684814453, "loss": 0.6211211085319519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.736163660939603, "step_time": 0.6712318992614746} +{"epoch": 0, "iter": 10422, "iter_tflops": 14.868817703649365, "iter_time": 1.3875409545898438, "loss": 0.8323070406913757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.147396423844405, "step_time": 1.0774881896972657} +{"epoch": 0, "iter": 10423, "iter_tflops": 45.64954611857286, "iter_time": 0.45194520568847657, "loss": 0.8831014633178711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18295363502028, "step_time": 0.41947650527954106} +{"epoch": 0, "iter": 10424, "iter_tflops": 43.58507267532003, "iter_time": 0.4733522796630859, "loss": 0.9367241263389587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.817250899766584, "step_time": 0.44067289543151855} +{"epoch": 0, "iter": 10425, "iter_tflops": 25.482901275191654, "iter_time": 0.8096053619384767, "loss": 0.10143464058637619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.80341283133324, "step_time": 0.7697189025878907} +{"epoch": 0, "iter": 10426, "iter_tflops": 15.176002556970426, "iter_time": 1.359455062866211, "loss": 0.1468978226184845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.580089674637055, "step_time": 1.0536771717071534} +{"epoch": 0, "iter": 10427, "iter_tflops": 41.16255643658631, "iter_time": 0.5012102088928223, "loss": 0.2001594603061676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.04883265210988, "step_time": 0.45797176742553714} +{"epoch": 0, "iter": 10428, "iter_tflops": 43.85161818197069, "iter_time": 0.47047507858276366, "loss": 0.1250515878200531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.045534340799705, "step_time": 0.4294070987701416} +{"epoch": 0, "iter": 10429, "iter_tflops": 29.306481449897255, "iter_time": 0.7039771575927734, "loss": 0.393655389547348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.727036044585947, "step_time": 0.6502685432434082} +{"epoch": 0, "iter": 10430, "iter_tflops": 8.37301536675716, "iter_time": 2.4639980468750005, "loss": 0.4363028109073639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.278715655987702, "step_time": 2.2234859085083007} +{"epoch": 0, "iter": 10431, "iter_tflops": 11.151319116467826, "iter_time": 1.8501034088134762, "loss": 0.4678141474723816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.832298378545063, "step_time": 1.3909572868347166} +{"epoch": 0, "iter": 10432, "iter_tflops": 36.274870065870516, "iter_time": 0.5687434158325195, "loss": 0.3017798066139221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.45373237213635, "step_time": 0.5229186763763428} +{"epoch": 0, "iter": 10433, "iter_tflops": 21.485819809848458, "iter_time": 0.7109604110717774, "loss": 0.2407420128583908, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.32309168687591, "step_time": 0.654954647064209} +{"epoch": 0, "iter": 10434, "iter_tflops": 22.1770076755142, "iter_time": 0.6888020019531249, "loss": 0.20237551629543304, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.820248855650917, "step_time": 0.6412849578857422} +{"epoch": 0, "iter": 10435, "iter_tflops": 23.036226766606575, "iter_time": 0.6631106491088867, "loss": 0.17373435199260712, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.86744890957436, "step_time": 0.6142796287536622} +{"epoch": 0, "iter": 10436, "iter_tflops": 24.18886686669918, "iter_time": 0.6315123138427735, "loss": 0.2069585919380188, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.001203807127524, "step_time": 0.5874946174621583} +{"epoch": 0, "iter": 10437, "iter_tflops": 20.995469961811647, "iter_time": 0.9826449966430665, "loss": 0.06285734474658966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.551053924800165, "step_time": 0.9148616104125976} +{"epoch": 0, "iter": 10438, "iter_tflops": 25.739193966584374, "iter_time": 0.8015438842773437, "loss": 0.034108128398656845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.924443062122624, "step_time": 0.6462475624084472} +{"epoch": 0, "iter": 10439, "iter_tflops": 54.91121122467924, "iter_time": 0.37571732711791994, "loss": 0.06995139271020889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.622274784307514, "step_time": 0.34602996253967283} +{"epoch": 0, "iter": 10440, "iter_tflops": 54.75918189523864, "iter_time": 0.376760440826416, "loss": 0.07096069306135178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.47497256947365, "step_time": 0.3468869781494141} +{"epoch": 0, "iter": 10441, "iter_tflops": 44.62917336159762, "iter_time": 0.46227819061279296, "loss": 0.28093016147613525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6287884462908, "step_time": 0.42425678634643554} +{"epoch": 0, "iter": 10442, "iter_tflops": 10.472113772349683, "iter_time": 1.9700982971191405, "loss": 0.21902915835380554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.216958756752849, "step_time": 1.5609561843872068} +{"epoch": 0, "iter": 10443, "iter_tflops": 15.49623966362386, "iter_time": 1.3313612823486327, "loss": 0.34589898586273193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.254895534448764, "step_time": 1.195666091918945} +{"epoch": 0, "iter": 10444, "iter_tflops": 18.623414876846848, "iter_time": 1.1078040008544923, "loss": 0.35502833127975464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.55414238248324, "step_time": 0.8402286338806153} +{"epoch": 0, "iter": 10445, "iter_tflops": 16.53988546069577, "iter_time": 0.8815735244750977, "loss": 0.24394267797470093, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 17.945311459967535, "step_time": 0.8125311813354493} +{"epoch": 0, "iter": 10446, "iter_tflops": 24.20078043037964, "iter_time": 0.6025064010620116, "loss": 0.08435876667499542, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 25.762995275168358, "step_time": 0.5659716567993165} +{"epoch": 0, "iter": 10447, "iter_tflops": 26.10868895455203, "iter_time": 0.5584778747558594, "loss": 0.5804036259651184, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.882953397967444, "step_time": 0.5229404830932617} +{"epoch": 0, "iter": 10448, "iter_tflops": 25.32136847845619, "iter_time": 0.5758426971435546, "loss": 0.18265312910079956, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 26.821719981930208, "step_time": 0.543631248474121} +{"epoch": 0, "iter": 10449, "iter_tflops": 37.663305746695926, "iter_time": 0.5477770233154297, "loss": 0.5279992818832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59706930623403, "step_time": 0.5081916961669921} +{"epoch": 0, "iter": 10450, "iter_tflops": 13.281272858688869, "iter_time": 1.5533973083496093, "loss": 0.45107918977737427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.80299636116611, "step_time": 1.2278222923278808} +{"epoch": 0, "iter": 10451, "iter_tflops": 45.24739201383704, "iter_time": 0.45596204757690434, "loss": 0.3850060701370239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.09217463577242, "step_time": 0.42025218200683595} +{"epoch": 0, "iter": 10452, "iter_tflops": 46.567931544019, "iter_time": 0.4430322074890137, "loss": 0.3750761151313782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.421341727824945, "step_time": 0.4091738300323486} +{"epoch": 0, "iter": 10453, "iter_tflops": 41.658513080191824, "iter_time": 0.4952431564331054, "loss": 0.3405991494655609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41267577143701, "step_time": 0.4543025302886963} +{"epoch": 0, "iter": 10454, "iter_tflops": 37.1332629320513, "iter_time": 0.5555960311889648, "loss": 0.3237664997577667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91645929682314, "step_time": 0.5042248001098634} +{"epoch": 0, "iter": 10455, "iter_tflops": 42.065267706213525, "iter_time": 0.49045434951782224, "loss": 0.37994298338890076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.04096381308492, "step_time": 0.4481029891967773} +{"epoch": 0, "iter": 10456, "iter_tflops": 36.8334602618563, "iter_time": 0.5601182556152344, "loss": 0.4414876401424408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.347553160727216, "step_time": 0.5113344402313232} +{"epoch": 0, "iter": 10457, "iter_tflops": 19.723317988619083, "iter_time": 1.0460254974365235, "loss": 0.19613578915596008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.06802091209949, "step_time": 0.9792611083984375} +{"epoch": 0, "iter": 10458, "iter_tflops": 18.84501846788172, "iter_time": 1.0947770385742188, "loss": 0.2285376787185669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.528315800607093, "step_time": 0.9157849922180176} +{"epoch": 0, "iter": 10459, "iter_tflops": 36.52081888054425, "iter_time": 0.5649132232666015, "loss": 0.35659387707710266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.96214250411663, "step_time": 0.5162659511566161} +{"epoch": 0, "iter": 10460, "iter_tflops": 36.65826353165802, "iter_time": 0.562795166015625, "loss": 0.21304546296596527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.969139228765194, "step_time": 0.5161755771636963} +{"epoch": 0, "iter": 10461, "iter_tflops": 26.926127918363292, "iter_time": 0.7662109298706055, "loss": 0.015389390289783478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.10316110620043, "step_time": 0.7088952789306641} +{"epoch": 0, "iter": 10462, "iter_tflops": 39.58769621255957, "iter_time": 0.5211491317749023, "loss": 0.018792584538459778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17189212536432, "step_time": 0.46706383895874026} +{"epoch": 0, "iter": 10463, "iter_tflops": 44.70016041624772, "iter_time": 0.461544059753418, "loss": 0.028969567269086838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17470383388603, "step_time": 0.4195468788146972} +{"epoch": 0, "iter": 10464, "iter_tflops": 38.95096582372296, "iter_time": 0.529668342590332, "loss": 0.03750418871641159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19820343338862, "step_time": 0.4775914707183838} +{"epoch": 0, "iter": 10465, "iter_tflops": 36.27262450735164, "iter_time": 0.5687786254882813, "loss": 0.1839853823184967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.21173849258914, "step_time": 0.513061466217041} +{"epoch": 0, "iter": 10466, "iter_tflops": 38.15803527567683, "iter_time": 0.5406749420166016, "loss": 0.3064919412136078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.060927800154055, "step_time": 0.4905049552917481} +{"epoch": 0, "iter": 10467, "iter_tflops": 41.94288563507191, "iter_time": 0.49188541030883787, "loss": 0.15971739590168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.895469512086606, "step_time": 0.44952353096008296} +{"epoch": 0, "iter": 10468, "iter_tflops": 44.09181360512638, "iter_time": 0.467912109375, "loss": 0.32147467136383057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.999401330676825, "step_time": 0.42981980895996097} +{"epoch": 0, "iter": 10469, "iter_tflops": 18.512008355227735, "iter_time": 1.1144708404541017, "loss": 0.3665534257888794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.849774276839298, "step_time": 1.0393616180419922} +{"epoch": 0, "iter": 10470, "iter_tflops": 29.731405582100006, "iter_time": 0.6939158477783203, "loss": 0.37784913182258606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.045666508236565, "step_time": 0.6243206958770752} +{"epoch": 0, "iter": 10471, "iter_tflops": 49.65657740698259, "iter_time": 0.41547554397583014, "loss": 0.40923911333084106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86869153573054, "step_time": 0.38298857688903803} +{"epoch": 0, "iter": 10472, "iter_tflops": 51.69370760978281, "iter_time": 0.39910260772705075, "loss": 0.45479118824005127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.012047197687345, "step_time": 0.36833314514160154} +{"epoch": 0, "iter": 10473, "iter_tflops": 25.570566167872848, "iter_time": 0.8068297500610351, "loss": 0.7147875428199768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.870633496951335, "step_time": 0.767793342590332} +{"epoch": 0, "iter": 10474, "iter_tflops": 12.043621470496696, "iter_time": 1.713030715942383, "loss": 0.7690271139144897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.713733676113932, "step_time": 1.5044111251831056} +{"epoch": 0, "iter": 10475, "iter_tflops": 42.6911450320108, "iter_time": 0.48326399993896485, "loss": 0.816063642501831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92585040078008, "step_time": 0.4492261619567871} +{"epoch": 0, "iter": 10476, "iter_tflops": 42.48121262859968, "iter_time": 0.4856521797180176, "loss": 0.8302682638168335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59789673430461, "step_time": 0.452457130432129} +{"epoch": 0, "iter": 10477, "iter_tflops": 24.387296022806083, "iter_time": 0.8459770812988281, "loss": 0.7916199564933777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.77494451026419, "step_time": 0.8004321212768555} +{"epoch": 0, "iter": 10478, "iter_tflops": 15.861165886830298, "iter_time": 1.300729949951172, "loss": 0.765159010887146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.73171157583184, "step_time": 1.1013992729187012} +{"epoch": 0, "iter": 10479, "iter_tflops": 37.05471352161562, "iter_time": 0.556773796081543, "loss": 0.9403831362724304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17913551158844, "step_time": 0.5134777851104736} +{"epoch": 0, "iter": 10480, "iter_tflops": 36.840197955965756, "iter_time": 0.5600158157348634, "loss": 0.867928683757782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.04789379728696, "step_time": 0.5151605129241943} +{"epoch": 0, "iter": 10481, "iter_tflops": 20.804660971988543, "iter_time": 0.9916572799682617, "loss": 0.5782759785652161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.325669425606826, "step_time": 0.9240974197387697} +{"epoch": 0, "iter": 10482, "iter_tflops": 19.933232517895462, "iter_time": 1.0350099258422851, "loss": 0.685569703578949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.01721761100822, "step_time": 0.7929784736633301} +{"epoch": 0, "iter": 10483, "iter_tflops": 43.76770054688524, "iter_time": 0.47137714004516595, "loss": 0.6701546311378479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13280307297595, "step_time": 0.43772260856628414} +{"epoch": 0, "iter": 10484, "iter_tflops": 43.53519330172429, "iter_time": 0.4738946113586425, "loss": 0.6915578842163086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.78585749054558, "step_time": 0.4409685878753662} +{"epoch": 0, "iter": 10485, "iter_tflops": 42.382680759642014, "iter_time": 0.4867812309265137, "loss": 0.715705394744873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06026801119547, "step_time": 0.4479151859283448} +{"epoch": 0, "iter": 10486, "iter_tflops": 45.88886636166326, "iter_time": 0.44958821487426753, "loss": 0.64110267162323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.653859343509566, "step_time": 0.4154982872009278} +{"epoch": 0, "iter": 10487, "iter_tflops": 49.27657815971721, "iter_time": 0.41867950820922845, "loss": 0.8137911558151245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.35102222148996, "step_time": 0.3867047462463379} +{"epoch": 0, "iter": 10488, "iter_tflops": 43.489848604085054, "iter_time": 0.47438871765136714, "loss": 0.5961263179779053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.575940690287894, "step_time": 0.4429560241699218} +{"epoch": 0, "iter": 10489, "iter_tflops": 30.986704751510498, "iter_time": 0.6658046951293946, "loss": 0.7747301459312439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.07001135561211, "step_time": 0.6238610954284669} +{"epoch": 0, "iter": 10490, "iter_tflops": 12.168241557034554, "iter_time": 1.6954868469238282, "loss": 0.9243314266204834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.266051877167, "step_time": 1.1948935203552244} +{"epoch": 0, "iter": 10491, "iter_tflops": 43.31749702740862, "iter_time": 0.47627621459960934, "loss": 0.9192000031471252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.300906352379364, "step_time": 0.43616698074340826} +{"epoch": 0, "iter": 10492, "iter_tflops": 44.538508582668875, "iter_time": 0.46321922683715816, "loss": 0.7157275080680847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.2952312938662, "step_time": 0.4271869697570801} +{"epoch": 0, "iter": 10493, "iter_tflops": 36.12021802937993, "iter_time": 0.5711785430908204, "loss": 0.26812607049942017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.939921251533896, "step_time": 0.5298185729980469} +{"epoch": 0, "iter": 10494, "iter_tflops": 44.38185357716024, "iter_time": 0.4648542556762696, "loss": 0.21660944819450378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.911089899363304, "step_time": 0.4306120681762695} +{"epoch": 0, "iter": 10495, "iter_tflops": 48.03280591223557, "iter_time": 0.42952088928222654, "loss": 0.21292006969451904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8930140636994, "step_time": 0.39756976699829105} +{"epoch": 0, "iter": 10496, "iter_tflops": 51.453376273861934, "iter_time": 0.4009667587280274, "loss": 0.2940041124820709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73896091816295, "step_time": 0.3701377487182617} +{"epoch": 0, "iter": 10497, "iter_tflops": 48.73278035595343, "iter_time": 0.3919068412780761, "loss": 0.0012811933411285281, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 54.155870269373914, "step_time": 0.3526618614196777} +{"epoch": 0, "iter": 10498, "iter_tflops": 45.82559042621769, "iter_time": 0.41676953506469727, "loss": 0.0035930187441408634, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 51.30671595424734, "step_time": 0.37224580955505365} +{"epoch": 0, "iter": 10499, "iter_tflops": 51.90918552839707, "iter_time": 0.3679254417419434, "loss": 0.0034637406934052706, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 57.21905695352903, "step_time": 0.3337823276519775} +{"epoch": 0, "iter": 10500, "iter_tflops": 51.41097171114039, "iter_time": 0.37149093627929686, "loss": 0.011547879315912724, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 56.211184119216156, "step_time": 0.3397670822143555} +{"epoch": 0, "iter": 10501, "iter_tflops": 28.69484330313075, "iter_time": 0.6670202255249023, "loss": 0.004266859497874975, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 30.471360956390257, "step_time": 0.6281321296691895} +{"epoch": 0, "iter": 10502, "iter_tflops": 11.973917457776317, "iter_time": 1.5984777679443358, "loss": 0.005326235666871071, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 15.862764126222276, "step_time": 1.20660186958313} +{"epoch": 0, "iter": 10503, "iter_tflops": 51.75181377421968, "iter_time": 0.3698428993225098, "loss": 0.0018428317271173, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 57.22793728979012, "step_time": 0.3344527473449707} +{"epoch": 0, "iter": 10504, "iter_tflops": 56.11777483507471, "iter_time": 0.3410691337585449, "loss": 0.0038576091174036264, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 61.81659970708025, "step_time": 0.3096262321472168} +{"epoch": 0, "iter": 10505, "iter_tflops": 27.591594699275902, "iter_time": 0.7477311019897461, "loss": 0.15854617953300476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.138943652669642, "step_time": 0.7080247573852539} +{"epoch": 0, "iter": 10506, "iter_tflops": 15.872846802474667, "iter_time": 1.2997727355957032, "loss": 0.24579596519470215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.075414613698964, "step_time": 1.1413897800445558} +{"epoch": 0, "iter": 10507, "iter_tflops": 34.00037836480697, "iter_time": 0.6067901153564453, "loss": 0.2025431990623474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32046116897566, "step_time": 0.5528091793060304} +{"epoch": 0, "iter": 10508, "iter_tflops": 40.371347854012114, "iter_time": 0.5110330619812012, "loss": 0.16843187808990479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49089023196723, "step_time": 0.46371500778198244} +{"epoch": 0, "iter": 10509, "iter_tflops": 32.46299775840303, "iter_time": 0.6355264434814454, "loss": 0.46035128831863403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.78442427329033, "step_time": 0.576538366317749} +{"epoch": 0, "iter": 10510, "iter_tflops": 34.88687785503079, "iter_time": 0.5913711624145508, "loss": 0.5443126559257507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.99719087489317, "step_time": 0.5429636516571046} +{"epoch": 0, "iter": 10511, "iter_tflops": 43.881727842274884, "iter_time": 0.47015225982666015, "loss": 0.8071597814559937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.94937149490874, "step_time": 0.43026827812194823} +{"epoch": 0, "iter": 10512, "iter_tflops": 41.007825681983626, "iter_time": 0.503101375579834, "loss": 0.7012898325920105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92991697025035, "step_time": 0.45918387794494625} +{"epoch": 0, "iter": 10513, "iter_tflops": 33.50542873257932, "iter_time": 0.6157537536621094, "loss": 0.4493600130081177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.49699715176274, "step_time": 0.5652819442749022} +{"epoch": 0, "iter": 10514, "iter_tflops": 38.76036613842564, "iter_time": 0.5322729263305663, "loss": 0.3205391764640808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09274288226387, "step_time": 0.4787602767944336} +{"epoch": 0, "iter": 10515, "iter_tflops": 39.611580975515864, "iter_time": 0.5208348922729492, "loss": 0.29986175894737244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94525116342754, "step_time": 0.4804045372009277} +{"epoch": 0, "iter": 10516, "iter_tflops": 40.297897369895324, "iter_time": 0.5119645156860352, "loss": 0.5680316686630249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.730388295161625, "step_time": 0.47177933502197267} +{"epoch": 0, "iter": 10517, "iter_tflops": 21.871981539849653, "iter_time": 0.9432658615112304, "loss": 0.17596875131130219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.329129435343322, "step_time": 0.8843490524291993} +{"epoch": 0, "iter": 10518, "iter_tflops": 10.382128953146964, "iter_time": 1.9871736907958983, "loss": 0.16512081027030945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.56536169255397, "step_time": 1.5208657150268554} +{"epoch": 0, "iter": 10519, "iter_tflops": 14.159044663643542, "iter_time": 1.4570964355468752, "loss": 0.11529865115880966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.775089299760225, "step_time": 1.0988546142578126} +{"epoch": 0, "iter": 10520, "iter_tflops": 24.58250522828563, "iter_time": 0.83925919342041, "loss": 0.13211998343467712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.55062478213565, "step_time": 0.6338155918121336} +{"epoch": 0, "iter": 10521, "iter_tflops": 26.199980252875992, "iter_time": 0.6409056015014649, "loss": 0.43780505657196045, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 28.114911652811426, "step_time": 0.5972529563903808} +{"epoch": 0, "iter": 10522, "iter_tflops": 12.075256881880268, "iter_time": 1.3905885620117187, "loss": 0.11764685809612274, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 15.916018406189199, "step_time": 1.0550197715759277} +{"epoch": 0, "iter": 10523, "iter_tflops": 25.220708354869412, "iter_time": 0.6657907409667969, "loss": 0.19629578292369843, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 27.193260361070372, "step_time": 0.617495433807373} +{"epoch": 0, "iter": 10524, "iter_tflops": 27.33927109614115, "iter_time": 0.6141975784301758, "loss": 0.37963807582855225, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 29.295956910995052, "step_time": 0.5731751365661621} +{"epoch": 0, "iter": 10525, "iter_tflops": 29.621503745559504, "iter_time": 0.6964904174804688, "loss": 0.4674645662307739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.916551716037404, "step_time": 0.6267695865631104} +{"epoch": 0, "iter": 10526, "iter_tflops": 44.23006985841762, "iter_time": 0.4664494895935059, "loss": 0.45563867688179016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.596307339671625, "step_time": 0.4245403537750244} +{"epoch": 0, "iter": 10527, "iter_tflops": 49.92943106142357, "iter_time": 0.41320505905151367, "loss": 0.6492198705673218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.96772547322234, "step_time": 0.3822857704162598} +{"epoch": 0, "iter": 10528, "iter_tflops": 48.67823816867077, "iter_time": 0.4238258056640625, "loss": 0.5596089959144592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67599180364855, "step_time": 0.39166027641296386} +{"epoch": 0, "iter": 10529, "iter_tflops": 34.04998668504043, "iter_time": 0.6059060668945313, "loss": 0.5548085570335388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.368818193278194, "step_time": 0.5672742347717286} +{"epoch": 0, "iter": 10530, "iter_tflops": 15.141485850073682, "iter_time": 1.3625540924072266, "loss": 0.6599976420402527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14927237424597, "step_time": 1.0239125823974609} +{"epoch": 0, "iter": 10531, "iter_tflops": 43.633617822589756, "iter_time": 0.47282564544677735, "loss": 0.6211869120597839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.87757905455812, "step_time": 0.43091346549987797} +{"epoch": 0, "iter": 10532, "iter_tflops": 44.66482791108451, "iter_time": 0.4619091682434082, "loss": 0.750197172164917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0748898929017, "step_time": 0.4291448936462402} +{"epoch": 0, "iter": 10533, "iter_tflops": 29.635193204768367, "iter_time": 0.696168685913086, "loss": 0.0007225663866847754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.38743895201444, "step_time": 0.6573041381835937} +{"epoch": 0, "iter": 10534, "iter_tflops": 14.015997994013714, "iter_time": 1.4719674987792968, "loss": 0.0050141275860369205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.9489268923532, "step_time": 1.0887737140655518} +{"epoch": 0, "iter": 10535, "iter_tflops": 43.78200310308749, "iter_time": 0.47122315216064453, "loss": 0.004673752933740616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.444922597100565, "step_time": 0.4258669929504394} +{"epoch": 0, "iter": 10536, "iter_tflops": 45.69768086407411, "iter_time": 0.4514691581726074, "loss": 0.0072637335397303104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36215229902646, "step_time": 0.4096547222137451} +{"epoch": 0, "iter": 10537, "iter_tflops": 15.970234744549504, "iter_time": 1.2918466033935545, "loss": 0.1139024943113327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.06216420575331, "step_time": 1.209172134399414} +{"epoch": 0, "iter": 10538, "iter_tflops": 26.163047196940536, "iter_time": 0.7885585098266602, "loss": 0.22681857645511627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.79585514505596, "step_time": 0.6290762481689454} +{"epoch": 0, "iter": 10539, "iter_tflops": 47.60847413436461, "iter_time": 0.4333491859436035, "loss": 0.17712584137916565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.97868569459815, "step_time": 0.3969144897460938} +{"epoch": 0, "iter": 10540, "iter_tflops": 45.353261442931576, "iter_time": 0.45489768218994137, "loss": 0.13868723809719086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33587552460024, "step_time": 0.41817629241943366} +{"epoch": 0, "iter": 10541, "iter_tflops": 14.122601261671708, "iter_time": 0.9257788848876954, "loss": 0.04669008404016495, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 14.799774386696093, "step_time": 0.8834192810058593} +{"epoch": 0, "iter": 10542, "iter_tflops": 8.554838842721809, "iter_time": 1.5283053588867186, "loss": 0.018831325694918633, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 12.85899106625363, "step_time": 1.0167520904541016} +{"epoch": 0, "iter": 10543, "iter_tflops": 33.57988596439666, "iter_time": 0.3893523063659668, "loss": 0.05899945646524429, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 36.64663942681897, "step_time": 0.3567695770263672} +{"epoch": 0, "iter": 10544, "iter_tflops": 38.53480611518878, "iter_time": 0.33928822708129885, "loss": 0.03466300293803215, "lr": 3e-05, "seqlen": 5248.0, "step_tflops": 42.08290736829252, "step_time": 0.3106821002960205} +{"epoch": 0, "iter": 10545, "iter_tflops": 25.148326088485423, "iter_time": 0.8203764114379882, "loss": 0.7327144145965576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.3854578580771, "step_time": 0.7819115219116212} +{"epoch": 0, "iter": 10546, "iter_tflops": 14.78418369618262, "iter_time": 1.3954841156005862, "loss": 0.9322739839553833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.41929711000541, "step_time": 1.1843815155029296} +{"epoch": 0, "iter": 10547, "iter_tflops": 42.154142785545744, "iter_time": 0.4894203071594238, "loss": 0.7079593539237976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.508112452556524, "step_time": 0.45334979629516603} +{"epoch": 0, "iter": 10548, "iter_tflops": 46.36124602426733, "iter_time": 0.44500731277465816, "loss": 0.5190475583076477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.7747227942625, "step_time": 0.4144893703460693} +{"epoch": 0, "iter": 10549, "iter_tflops": 25.946993504388207, "iter_time": 0.7951246261596681, "loss": 0.1307174116373062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.34364502283551, "step_time": 0.7545114593505861} +{"epoch": 0, "iter": 10550, "iter_tflops": 16.30172451611614, "iter_time": 1.2655773620605468, "loss": 0.16697897017002106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.495968724487405, "step_time": 1.0582235641479492} +{"epoch": 0, "iter": 10551, "iter_tflops": 41.585578090681246, "iter_time": 0.49611174011230474, "loss": 0.20568010210990906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.780394499837094, "step_time": 0.4506534671783447} +{"epoch": 0, "iter": 10552, "iter_tflops": 38.45412686278223, "iter_time": 0.5365118179321289, "loss": 0.2292080670595169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.43000232922329, "step_time": 0.48623833084106444} +{"epoch": 0, "iter": 10553, "iter_tflops": 19.396197222547876, "iter_time": 0.8636049652099609, "loss": 0.012362862937152386, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 20.70114552226645, "step_time": 0.8091654739379883} +{"epoch": 0, "iter": 10554, "iter_tflops": 16.69586382922108, "iter_time": 1.0032815551757812, "loss": 0.004677378572523594, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 18.858227160991657, "step_time": 0.8882410888671874} +{"epoch": 0, "iter": 10555, "iter_tflops": 43.76222769177168, "iter_time": 0.38276507186889647, "loss": 0.005512692034244537, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 47.88599620349831, "step_time": 0.34980273056030275} +{"epoch": 0, "iter": 10556, "iter_tflops": 49.25273071055979, "iter_time": 0.34009590911865234, "loss": 0.00769847584888339, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 54.142818368571085, "step_time": 0.3093790225982666} +{"epoch": 0, "iter": 10557, "iter_tflops": 25.737247136383857, "iter_time": 0.8016045150756836, "loss": 0.5233660936355591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.131278551502657, "step_time": 0.7604172973632812} +{"epoch": 0, "iter": 10558, "iter_tflops": 11.96580035356509, "iter_time": 1.724171630859375, "loss": 0.3027832508087158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.682989963881026, "step_time": 1.5077913208007814} +{"epoch": 0, "iter": 10559, "iter_tflops": 35.8784673984586, "iter_time": 0.5750271682739257, "loss": 0.35115787386894226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.671922020751445, "step_time": 0.5200427017211914} +{"epoch": 0, "iter": 10560, "iter_tflops": 41.399532382085305, "iter_time": 0.49834122085571286, "loss": 0.4185634255409241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.13595697329552, "step_time": 0.4570877609252929} +{"epoch": 0, "iter": 10561, "iter_tflops": 17.35234049547827, "iter_time": 1.188951629638672, "loss": 0.45877158641815186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.659791824019862, "step_time": 1.1056443557739257} +{"epoch": 0, "iter": 10562, "iter_tflops": 18.871828517190767, "iter_time": 1.0932217559814454, "loss": 0.4408251941204071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.639699604904298, "step_time": 0.9112794723510741} +{"epoch": 0, "iter": 10563, "iter_tflops": 48.691926274010676, "iter_time": 0.42370666122436523, "loss": 0.48363539576530457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.67719802176578, "step_time": 0.3916513080596924} +{"epoch": 0, "iter": 10564, "iter_tflops": 49.49427811929258, "iter_time": 0.41683795166015625, "loss": 0.5517950654029846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48989567489391, "step_time": 0.385700761795044} +{"epoch": 0, "iter": 10565, "iter_tflops": 21.541847630496747, "iter_time": 0.9577216339111329, "loss": 0.7915512323379517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.539658603070738, "step_time": 0.9153241348266601} +{"epoch": 0, "iter": 10566, "iter_tflops": 13.656378474911799, "iter_time": 1.510729476928711, "loss": 0.5976415276527405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.057514708246604, "step_time": 1.082569990158081} +{"epoch": 0, "iter": 10567, "iter_tflops": 36.10529663475918, "iter_time": 0.5714145965576172, "loss": 0.7351446151733398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56374848880866, "step_time": 0.5214645805358886} +{"epoch": 0, "iter": 10568, "iter_tflops": 35.70007492696529, "iter_time": 0.5779005661010742, "loss": 0.6970626711845398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.43023122850176, "step_time": 0.536845417022705} +{"epoch": 0, "iter": 10569, "iter_tflops": 19.8708877286186, "iter_time": 1.0382572631835938, "loss": 0.3383987247943878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.98490103574194, "step_time": 0.9831399002075196} +{"epoch": 0, "iter": 10570, "iter_tflops": 12.713536269466298, "iter_time": 1.6227659301757813, "loss": 0.3116859793663025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.120432934733586, "step_time": 1.2798101387023926} +{"epoch": 0, "iter": 10571, "iter_tflops": 46.37413430988595, "iter_time": 0.4448836364746093, "loss": 0.5186381936073303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.289471566389366, "step_time": 0.41024677467346193} +{"epoch": 0, "iter": 10572, "iter_tflops": 52.559468473061635, "iter_time": 0.39252857971191407, "loss": 0.6226983070373535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.01564699993735, "step_time": 0.36184967803955076} +{"epoch": 0, "iter": 10573, "iter_tflops": 31.748999631310493, "iter_time": 0.6498186950683594, "loss": 0.26916074752807617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.775103268894604, "step_time": 0.6108373184204101} +{"epoch": 0, "iter": 10574, "iter_tflops": 21.59832876893017, "iter_time": 0.9552171249389648, "loss": 0.23207072913646698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.92065344113374, "step_time": 0.6895268363952637} +{"epoch": 0, "iter": 10575, "iter_tflops": 40.406960712036984, "iter_time": 0.5105826606750489, "loss": 0.20437216758728027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84340315744765, "step_time": 0.470563232421875} +{"epoch": 0, "iter": 10576, "iter_tflops": 44.46543748589009, "iter_time": 0.46398044586181636, "loss": 0.29101884365081787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46195136244323, "step_time": 0.4257173500061035} +{"epoch": 0, "iter": 10577, "iter_tflops": 20.437085426981394, "iter_time": 1.0094929428100585, "loss": 0.09899577498435974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.935375609717124, "step_time": 0.9405397872924806} +{"epoch": 0, "iter": 10578, "iter_tflops": 15.691960203680342, "iter_time": 1.3147556610107423, "loss": 0.05432205647230148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.94769785650089, "step_time": 1.0342593746185302} +{"epoch": 0, "iter": 10579, "iter_tflops": 32.774549415618814, "iter_time": 0.6294851913452149, "loss": 0.1032341718673706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.11058884630328, "step_time": 0.571330852508545} +{"epoch": 0, "iter": 10580, "iter_tflops": 36.62361326760966, "iter_time": 0.56332763671875, "loss": 0.08734080940485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.506343447834766, "step_time": 0.5093299407958984} +{"epoch": 0, "iter": 10581, "iter_tflops": 14.422427884335573, "iter_time": 1.28987353515625, "loss": 0.03082883730530739, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 15.09137103245748, "step_time": 1.2326983413696289} +{"epoch": 0, "iter": 10582, "iter_tflops": 10.008480456627039, "iter_time": 1.8587345123291015, "loss": 0.04393327236175537, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 11.72892617250839, "step_time": 1.5860879135131836} +{"epoch": 0, "iter": 10583, "iter_tflops": 21.396075151422007, "iter_time": 0.8694635772705078, "loss": 0.05427742376923561, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 26.876874618257666, "step_time": 0.6921603908538818} +{"epoch": 0, "iter": 10584, "iter_tflops": 41.44181090389911, "iter_time": 0.4488970832824707, "loss": 0.03389940783381462, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 45.80079424553222, "step_time": 0.40617435455322265} +{"epoch": 0, "iter": 10585, "iter_tflops": 21.947975136644676, "iter_time": 0.6904017105102539, "loss": 0.2835710942745209, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 23.736851490579756, "step_time": 0.6383710823059081} +{"epoch": 0, "iter": 10586, "iter_tflops": 26.826135037163844, "iter_time": 0.5648566055297852, "loss": 0.3082343637943268, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.788903856720566, "step_time": 0.526345832824707} +{"epoch": 0, "iter": 10587, "iter_tflops": 27.46556694064015, "iter_time": 0.5517060546875, "loss": 0.23081515729427338, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 29.312713108011447, "step_time": 0.5169401931762696} +{"epoch": 0, "iter": 10588, "iter_tflops": 27.069832278208928, "iter_time": 0.5597714614868163, "loss": 0.273124635219574, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.862103122940017, "step_time": 0.525010929107666} +{"epoch": 0, "iter": 10589, "iter_tflops": 32.03990118168159, "iter_time": 0.6439187622070313, "loss": 0.5330900549888611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.17075956991238, "step_time": 0.6037645568847656} +{"epoch": 0, "iter": 10590, "iter_tflops": 11.478824346682446, "iter_time": 1.7973176422119141, "loss": 0.5120221972465515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.895494555033386, "step_time": 1.4847325820922852} +{"epoch": 0, "iter": 10591, "iter_tflops": 14.064570965528228, "iter_time": 1.4668839569091796, "loss": 0.5842110514640808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.850058814791897, "step_time": 1.2243929672241212} +{"epoch": 0, "iter": 10592, "iter_tflops": 23.10883558369985, "iter_time": 0.8927794494628907, "loss": 0.6471229791641235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.869258117474736, "step_time": 0.7146388530731201} +{"epoch": 0, "iter": 10593, "iter_tflops": 18.0182336176615, "iter_time": 0.8387085418701172, "loss": 0.23702380061149597, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 19.00008257598551, "step_time": 0.7953674087524413} +{"epoch": 0, "iter": 10594, "iter_tflops": 10.169233828982456, "iter_time": 1.4860555572509764, "loss": 0.1952732652425766, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 12.819789959408613, "step_time": 1.1788060874938964} +{"epoch": 0, "iter": 10595, "iter_tflops": 20.83928379413074, "iter_time": 0.7251711044311524, "loss": 0.23364567756652832, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 22.429411416836526, "step_time": 0.6737602767944335} +{"epoch": 0, "iter": 10596, "iter_tflops": 22.264919817539997, "iter_time": 0.6787379684448243, "loss": 0.40395107865333557, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 23.8404886572378, "step_time": 0.6338815727233886} +{"epoch": 0, "iter": 10597, "iter_tflops": 20.555535075597977, "iter_time": 1.0036758193969726, "loss": 0.378607839345932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.849261695943717, "step_time": 0.944246711730957} +{"epoch": 0, "iter": 10598, "iter_tflops": 18.078416711916407, "iter_time": 1.141200241088867, "loss": 0.26765912771224976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.899064535003127, "step_time": 0.86325945854187} +{"epoch": 0, "iter": 10599, "iter_tflops": 40.760597688361514, "iter_time": 0.5061528701782226, "loss": 0.3933992087841034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42353834537674, "step_time": 0.46441806030273436} +{"epoch": 0, "iter": 10600, "iter_tflops": 39.82814645708034, "iter_time": 0.5180028533935547, "loss": 0.3758470416069031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.427022710873096, "step_time": 0.4750750160217286} +{"epoch": 0, "iter": 10601, "iter_tflops": 26.53240633507057, "iter_time": 0.5849823760986328, "loss": 0.041823070496320724, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 29.73264011131347, "step_time": 0.5220185642242432} +{"epoch": 0, "iter": 10602, "iter_tflops": 30.13656174899893, "iter_time": 0.5150219268798828, "loss": 0.0568903312087059, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 33.40931358801881, "step_time": 0.46457075691223143} +{"epoch": 0, "iter": 10603, "iter_tflops": 32.153224001932955, "iter_time": 0.48271955871582034, "loss": 0.05723188817501068, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 35.531157107472374, "step_time": 0.436827600479126} +{"epoch": 0, "iter": 10604, "iter_tflops": 32.673175109107454, "iter_time": 0.4750377044677735, "loss": 0.02696695178747177, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 36.131800460289725, "step_time": 0.42956591987609866} +{"epoch": 0, "iter": 10605, "iter_tflops": 31.224609855132492, "iter_time": 0.6607318267822266, "loss": 0.1396782398223877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.16510853069811, "step_time": 0.6038644218444824} +{"epoch": 0, "iter": 10606, "iter_tflops": 37.47886499093021, "iter_time": 0.5504727401733398, "loss": 0.10889925062656403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25787068868388, "step_time": 0.4882189559936524} +{"epoch": 0, "iter": 10607, "iter_tflops": 38.9441861112011, "iter_time": 0.5297605514526367, "loss": 0.11580508202314377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81713583980433, "step_time": 0.48184197998046874} +{"epoch": 0, "iter": 10608, "iter_tflops": 38.02462057356036, "iter_time": 0.5425719757080079, "loss": 0.1405971199274063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.82589605127117, "step_time": 0.49326124382019043} +{"epoch": 0, "iter": 10609, "iter_tflops": 21.3718394231105, "iter_time": 0.9653400955200195, "loss": 0.14128530025482178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.995080290269627, "step_time": 0.8971959762573241} +{"epoch": 0, "iter": 10610, "iter_tflops": 35.70577585281648, "iter_time": 0.5778082962036133, "loss": 0.1493607759475708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8369605799997, "step_time": 0.5178882427215576} +{"epoch": 0, "iter": 10611, "iter_tflops": 47.08833201430349, "iter_time": 0.43813600158691407, "loss": 0.16582682728767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.11715503453723, "step_time": 0.4036041030883789} +{"epoch": 0, "iter": 10612, "iter_tflops": 53.21770121467875, "iter_time": 0.38767351913452147, "loss": 0.22630412876605988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.663512507188166, "step_time": 0.35778419685363766} +{"epoch": 0, "iter": 10613, "iter_tflops": 33.6671240498914, "iter_time": 0.6127964324951172, "loss": 0.9510036110877991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.16800134869832, "step_time": 0.5704239311218262} +{"epoch": 0, "iter": 10614, "iter_tflops": 8.985016916696184, "iter_time": 2.2961663513183592, "loss": 0.6456018686294556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.65946989701147, "step_time": 1.7694709701538085} +{"epoch": 0, "iter": 10615, "iter_tflops": 10.927487912303567, "iter_time": 1.8879996643066408, "loss": 0.7490132451057434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.778059471910087, "step_time": 1.6145717239379882} +{"epoch": 0, "iter": 10616, "iter_tflops": 32.88929538225749, "iter_time": 0.6272890090942383, "loss": 0.7734007835388184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.6310524657092, "step_time": 0.5790200424194336} +{"epoch": 0, "iter": 10617, "iter_tflops": 14.430273386902108, "iter_time": 1.0755853118896486, "loss": 0.39865365624427795, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 15.6140873341687, "step_time": 0.994037612915039} +{"epoch": 0, "iter": 10618, "iter_tflops": 19.208457131486735, "iter_time": 0.8080289840698244, "loss": 0.3090510666370392, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 23.379763297967024, "step_time": 0.6638642959594727} +{"epoch": 0, "iter": 10619, "iter_tflops": 26.75690069056949, "iter_time": 0.5800742874145508, "loss": 0.1797012984752655, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 28.522583005248457, "step_time": 0.5441649551391602} +{"epoch": 0, "iter": 10620, "iter_tflops": 29.24256433341097, "iter_time": 0.5307670669555664, "loss": 0.2722543776035309, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 31.047322383349428, "step_time": 0.49991396713256836} +{"epoch": 0, "iter": 10621, "iter_tflops": 29.667112388451052, "iter_time": 0.6954196701049804, "loss": 0.3273790180683136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.494506319642365, "step_time": 0.6550695953369141} +{"epoch": 0, "iter": 10622, "iter_tflops": 14.450798573319593, "iter_time": 1.4276784362792967, "loss": 0.34884029626846313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.87350135618698, "step_time": 1.1542838249206542} +{"epoch": 0, "iter": 10623, "iter_tflops": 39.038147380216834, "iter_time": 0.5284854660034181, "loss": 0.3842538893222809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68659534692815, "step_time": 0.48331550788879396} +{"epoch": 0, "iter": 10624, "iter_tflops": 35.55951251297367, "iter_time": 0.5801849365234375, "loss": 0.29167598485946655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.72506215470071, "step_time": 0.5327581768035888} +{"epoch": 0, "iter": 10625, "iter_tflops": 19.415740055510504, "iter_time": 1.0625962982177735, "loss": 0.7705671191215515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.739021802362764, "step_time": 0.994795883178711} +{"epoch": 0, "iter": 10626, "iter_tflops": 14.360586008386555, "iter_time": 1.4366470489501952, "loss": 0.567053496837616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.132550700870716, "step_time": 1.2042044334411623} +{"epoch": 0, "iter": 10627, "iter_tflops": 38.2822159396801, "iter_time": 0.5389210891723634, "loss": 0.8912542462348938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8514115465694, "step_time": 0.4929605178833008} +{"epoch": 0, "iter": 10628, "iter_tflops": 40.87046979732158, "iter_time": 0.5047921791076659, "loss": 0.7440090179443359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.2684002015141, "step_time": 0.4660456085205078} +{"epoch": 0, "iter": 10629, "iter_tflops": 18.732677453970062, "iter_time": 1.1013424835205077, "loss": 0.004996649455279112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.808600292544984, "step_time": 1.0415220260620117} +{"epoch": 0, "iter": 10630, "iter_tflops": 24.92848841587559, "iter_time": 0.8276110916137696, "loss": 0.004401815123856068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.16655251474791, "step_time": 0.7324678268432617} +{"epoch": 0, "iter": 10631, "iter_tflops": 55.20950678379355, "iter_time": 0.3736873359680176, "loss": 0.007276535965502262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.80344777953023, "step_time": 0.33930795478820797} +{"epoch": 0, "iter": 10632, "iter_tflops": 57.1044557447295, "iter_time": 0.3612869300842285, "loss": 0.00231856107711792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.6075773310802, "step_time": 0.3295302963256836} +{"epoch": 0, "iter": 10633, "iter_tflops": 16.663266566931355, "iter_time": 1.0644635314941406, "loss": 0.30779731273651123, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 17.32912562325591, "step_time": 1.0235622940063476} +{"epoch": 0, "iter": 10634, "iter_tflops": 13.733338929223404, "iter_time": 1.2915606079101563, "loss": 0.3518707752227783, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 16.74664886725816, "step_time": 1.0591635208129884} +{"epoch": 0, "iter": 10635, "iter_tflops": 34.559500621372194, "iter_time": 0.5132435150146485, "loss": 0.26385173201560974, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 38.02698269685228, "step_time": 0.4664435176849366} +{"epoch": 0, "iter": 10636, "iter_tflops": 40.33123849934344, "iter_time": 0.43979407119750974, "loss": 0.20262128114700317, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 43.93855712786684, "step_time": 0.4036873474121094} +{"epoch": 0, "iter": 10637, "iter_tflops": 20.4223221956514, "iter_time": 1.0102227020263672, "loss": 0.5810028314590454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87051729332593, "step_time": 0.9433290138244629} +{"epoch": 0, "iter": 10638, "iter_tflops": 29.11650063890059, "iter_time": 0.7085705032348633, "loss": 0.6808691620826721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.56985725970336, "step_time": 0.5349019927978516} +{"epoch": 0, "iter": 10639, "iter_tflops": 48.073704478566256, "iter_time": 0.42915547561645506, "loss": 0.6204978823661804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.063109503560895, "step_time": 0.39627086639404296} +{"epoch": 0, "iter": 10640, "iter_tflops": 54.83404875289405, "iter_time": 0.37624603652954103, "loss": 0.7179040312767029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.499791428759195, "step_time": 0.3467422828674317} +{"epoch": 0, "iter": 10641, "iter_tflops": 33.675167136977976, "iter_time": 0.6126500701904297, "loss": 0.0393298976123333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.0251129135716, "step_time": 0.5726864356994629} +{"epoch": 0, "iter": 10642, "iter_tflops": 14.58113173893914, "iter_time": 1.4149171600341797, "loss": 0.030314786359667778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.22840218923187, "step_time": 1.197504753112793} +{"epoch": 0, "iter": 10643, "iter_tflops": 51.729908336434896, "iter_time": 0.3988233146667481, "loss": 0.05276867002248764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.74789994724833, "step_time": 0.3635569515228272} +{"epoch": 0, "iter": 10644, "iter_tflops": 53.99350944976415, "iter_time": 0.3821032142639161, "loss": 0.04229764640331268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.08330365774914, "step_time": 0.3491865253448486} +{"epoch": 0, "iter": 10645, "iter_tflops": 35.877495844698345, "iter_time": 0.5750427398681641, "loss": 1.0704439878463745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6336098176185, "step_time": 0.5340193061828613} +{"epoch": 0, "iter": 10646, "iter_tflops": 40.586702924819214, "iter_time": 0.5083214950561522, "loss": 0.7145487070083618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90663452277386, "step_time": 0.4698855590820313} +{"epoch": 0, "iter": 10647, "iter_tflops": 44.26584905852474, "iter_time": 0.46607246780395506, "loss": 0.8768874406814575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57683215957958, "step_time": 0.433637393951416} +{"epoch": 0, "iter": 10648, "iter_tflops": 42.51759565127745, "iter_time": 0.4852365989685059, "loss": 0.7766971588134766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60611790679757, "step_time": 0.45237556838989257} +{"epoch": 0, "iter": 10649, "iter_tflops": 30.666657857644765, "iter_time": 0.6727532424926758, "loss": 0.019865427166223526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.7083791544193, "step_time": 0.6307586631774903} +{"epoch": 0, "iter": 10650, "iter_tflops": 14.581620633913397, "iter_time": 1.4148697204589844, "loss": 0.021372688934206963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.121734677430908, "step_time": 1.2049651451110839} +{"epoch": 0, "iter": 10651, "iter_tflops": 38.94761606128468, "iter_time": 0.5297138977050782, "loss": 0.04948738217353821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2658330916528, "step_time": 0.4768449382781983} +{"epoch": 0, "iter": 10652, "iter_tflops": 41.56217049665586, "iter_time": 0.49639114761352543, "loss": 0.036386024206876755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.773520689658746, "step_time": 0.4507211418151855} +{"epoch": 0, "iter": 10653, "iter_tflops": 24.07165519442997, "iter_time": 0.8570699996948242, "loss": 0.6578370332717896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.6383774635655, "step_time": 0.8046957550048827} +{"epoch": 0, "iter": 10654, "iter_tflops": 8.635523984921456, "iter_time": 2.3890957336425784, "loss": 0.688332736492157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.477286969825961, "step_time": 2.176898681640625} +{"epoch": 0, "iter": 10655, "iter_tflops": 11.371509182293725, "iter_time": 1.8142792816162112, "loss": 0.5657376050949097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.726822297697513, "step_time": 1.6210718612670898} +{"epoch": 0, "iter": 10656, "iter_tflops": 34.33659363980621, "iter_time": 0.6008485794067383, "loss": 0.5300356149673462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.85068029420826, "step_time": 0.45999510765075685} +{"epoch": 0, "iter": 10657, "iter_tflops": 22.86108034952021, "iter_time": 0.6932517852783202, "loss": 0.16923263669013977, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 24.263187542444687, "step_time": 0.6531905479431152} +{"epoch": 0, "iter": 10658, "iter_tflops": 11.59967404430813, "iter_time": 1.3662870788574217, "loss": 0.1398962438106537, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 13.397794641546106, "step_time": 1.1829174270629883} +{"epoch": 0, "iter": 10659, "iter_tflops": 23.750478289315858, "iter_time": 0.6672911834716796, "loss": 0.2766183912754059, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.70330754551794, "step_time": 0.616593204498291} +{"epoch": 0, "iter": 10660, "iter_tflops": 24.013883077339134, "iter_time": 0.6599717636108399, "loss": 0.1980292946100235, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.836441227036545, "step_time": 0.6134159355163575} +{"epoch": 0, "iter": 10661, "iter_tflops": 15.727797399247214, "iter_time": 1.3117598724365234, "loss": 0.5961456298828125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.67989674887238, "step_time": 1.2368837661743166} +{"epoch": 0, "iter": 10662, "iter_tflops": 17.88070159439284, "iter_time": 1.1538190155029295, "loss": 0.5946835279464722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.428742957054133, "step_time": 0.9627766571044922} +{"epoch": 0, "iter": 10663, "iter_tflops": 46.2058557779889, "iter_time": 0.44650387191772456, "loss": 0.6613525152206421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28654701461085, "step_time": 0.4102706336975097} +{"epoch": 0, "iter": 10664, "iter_tflops": 50.923583855191985, "iter_time": 0.40513828659057616, "loss": 0.7310048341751099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.06266115005243, "step_time": 0.3746839160919189} +{"epoch": 0, "iter": 10665, "iter_tflops": 5.242110969495332, "iter_time": 0.44900273513793953, "loss": 1.1139194965362549, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 5.748266331867803, "step_time": 0.4094664421081543} +{"epoch": 0, "iter": 10666, "iter_tflops": 5.265303541972142, "iter_time": 0.44702497100830074, "loss": 1.1421549320220947, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 5.8144654312665285, "step_time": 0.40480456733703607} +{"epoch": 0, "iter": 10667, "iter_tflops": 5.505362271702574, "iter_time": 0.427532657623291, "loss": 0.7626432180404663, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 5.962775763600986, "step_time": 0.3947359848022461} +{"epoch": 0, "iter": 10668, "iter_tflops": 5.1416545247506065, "iter_time": 0.45777524566650385, "loss": 0.26293736696243286, "lr": 3e-05, "seqlen": 960.0, "step_tflops": 5.538408534841106, "step_time": 0.4249816799163818} +{"epoch": 0, "iter": 10669, "iter_tflops": 22.417637538200353, "iter_time": 0.9203063201904296, "loss": 0.3797714114189148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.506567057923693, "step_time": 0.8418597946166992} +{"epoch": 0, "iter": 10670, "iter_tflops": 15.776101165648917, "iter_time": 1.3077434844970703, "loss": 0.36453884840011597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6841646260183, "step_time": 1.1042020835876465} +{"epoch": 0, "iter": 10671, "iter_tflops": 35.60516677759399, "iter_time": 0.5794410018920897, "loss": 0.3504093289375305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.098756829898704, "step_time": 0.5276662273406982} +{"epoch": 0, "iter": 10672, "iter_tflops": 37.95567047054389, "iter_time": 0.5435576095581054, "loss": 0.3668975830078125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36971191115867, "step_time": 0.49870043945312503} +{"epoch": 0, "iter": 10673, "iter_tflops": 23.637578657854664, "iter_time": 0.8728090896606445, "loss": 0.3385162651538849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.07479354879368, "step_time": 0.8227821884155273} +{"epoch": 0, "iter": 10674, "iter_tflops": 29.412671453076104, "iter_time": 0.7014355545043944, "loss": 0.34402868151664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.244758492645644, "step_time": 0.553932804107666} +{"epoch": 0, "iter": 10675, "iter_tflops": 38.87490729384153, "iter_time": 0.5307046356201173, "loss": 0.4344080090522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.68430478202957, "step_time": 0.4833414440155029} +{"epoch": 0, "iter": 10676, "iter_tflops": 39.14191608384018, "iter_time": 0.5270844039916993, "loss": 0.3997652530670166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73457999088886, "step_time": 0.4827728157043457} +{"epoch": 0, "iter": 10677, "iter_tflops": 18.337856622147182, "iter_time": 1.1250547943115237, "loss": 0.06286245584487915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.59092583427811, "step_time": 1.0530943603515626} +{"epoch": 0, "iter": 10678, "iter_tflops": 29.696839512207905, "iter_time": 0.6947235412597657, "loss": 0.06380204856395721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.13421406718505, "step_time": 0.5555818004608154} +{"epoch": 0, "iter": 10679, "iter_tflops": 46.828501803542146, "iter_time": 0.4405670204162597, "loss": 0.0624268501996994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.812399913744166, "step_time": 0.40602478027343747} +{"epoch": 0, "iter": 10680, "iter_tflops": 49.407737821999135, "iter_time": 0.4175680656433105, "loss": 0.0837821438908577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.668319273298884, "step_time": 0.38441847610473634} +{"epoch": 0, "iter": 10681, "iter_tflops": 39.06484549303509, "iter_time": 0.528124282836914, "loss": 0.6906707286834717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37989565835057, "step_time": 0.4868132209777831} +{"epoch": 0, "iter": 10682, "iter_tflops": 43.90773312792294, "iter_time": 0.46987380218505864, "loss": 0.608307421207428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43444321376061, "step_time": 0.43493908882141114} +{"epoch": 0, "iter": 10683, "iter_tflops": 43.74819898816217, "iter_time": 0.4715872650146484, "loss": 0.986882746219635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.196979224339174, "step_time": 0.4371274147033692} +{"epoch": 0, "iter": 10684, "iter_tflops": 42.87683523469885, "iter_time": 0.4811710891723633, "loss": 0.5936547517776489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.10944408908922, "step_time": 0.4474374809265137} +{"epoch": 0, "iter": 10685, "iter_tflops": 22.488621529741913, "iter_time": 0.9174014282226562, "loss": 0.2911486327648163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.571642207946063, "step_time": 0.8752505798339842} +{"epoch": 0, "iter": 10686, "iter_tflops": 15.371560885070247, "iter_time": 1.3421599578857424, "loss": 0.2827969193458557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.016022343151054, "step_time": 1.1451525268554685} +{"epoch": 0, "iter": 10687, "iter_tflops": 38.469189226214695, "iter_time": 0.5363017501831056, "loss": 0.26164332032203674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.23985589684095, "step_time": 0.48842717552185055} +{"epoch": 0, "iter": 10688, "iter_tflops": 39.123823738153064, "iter_time": 0.5273281478881836, "loss": 0.34737488627433777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89702830487021, "step_time": 0.48094458580017085} +{"epoch": 0, "iter": 10689, "iter_tflops": 30.832584496238425, "iter_time": 0.6691327972412109, "loss": 0.4347383379936218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04614436096842, "step_time": 0.6059744472503662} +{"epoch": 0, "iter": 10690, "iter_tflops": 37.958154833254945, "iter_time": 0.5435220336914063, "loss": 0.48697978258132935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.871179773724805, "step_time": 0.4927277812957763} +{"epoch": 0, "iter": 10691, "iter_tflops": 40.103959387893404, "iter_time": 0.5144403152465821, "loss": 0.41998064517974854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87433473905649, "step_time": 0.47023148345947263} +{"epoch": 0, "iter": 10692, "iter_tflops": 41.315561675003494, "iter_time": 0.499354061126709, "loss": 0.36124587059020996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.00906384346786, "step_time": 0.4583764190673828} +{"epoch": 0, "iter": 10693, "iter_tflops": 24.82230684101915, "iter_time": 0.8311513366699218, "loss": 0.3544999659061432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.312009510097663, "step_time": 0.755385410308838} +{"epoch": 0, "iter": 10694, "iter_tflops": 33.51034059950689, "iter_time": 0.6156634979248047, "loss": 0.2895665764808655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.07927495213993, "step_time": 0.5564049873352052} +{"epoch": 0, "iter": 10695, "iter_tflops": 38.11562407234771, "iter_time": 0.5412765502929688, "loss": 0.26877233386039734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.88311153016092, "step_time": 0.49258741188049315} +{"epoch": 0, "iter": 10696, "iter_tflops": 38.41782310425662, "iter_time": 0.5370188064575195, "loss": 0.44445499777793884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95242836664638, "step_time": 0.49177352333068847} +{"epoch": 0, "iter": 10697, "iter_tflops": 16.914435185164702, "iter_time": 1.219732925415039, "loss": 0.16743111610412598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.19179322021964, "step_time": 1.1340879516601563} +{"epoch": 0, "iter": 10698, "iter_tflops": 18.011168004334333, "iter_time": 1.145461166381836, "loss": 0.14797283709049225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.08357645756136, "step_time": 0.8566457538604737} +{"epoch": 0, "iter": 10699, "iter_tflops": 50.83316932380204, "iter_time": 0.40585888671875, "loss": 0.18052734434604645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.50651541204209, "step_time": 0.371687780380249} +{"epoch": 0, "iter": 10700, "iter_tflops": 50.61112176012673, "iter_time": 0.40763952255249025, "loss": 0.18845827877521515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.697178313823194, "step_time": 0.37718752861022947} +{"epoch": 0, "iter": 10701, "iter_tflops": 37.060399733827595, "iter_time": 0.5566883697509766, "loss": 0.7899782061576843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81064536698738, "step_time": 0.5182305717468262} +{"epoch": 0, "iter": 10702, "iter_tflops": 12.955180372773382, "iter_time": 1.5924975891113282, "loss": 0.5990591645240784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.776741868213533, "step_time": 1.3076903762817382} +{"epoch": 0, "iter": 10703, "iter_tflops": 18.626900571551882, "iter_time": 1.1075966949462892, "loss": 0.5569287538528442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.551474733806348, "step_time": 1.0038741149902344} +{"epoch": 0, "iter": 10704, "iter_tflops": 30.383341204691654, "iter_time": 0.6790264892578125, "loss": 0.7175010442733765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.41156325093434, "step_time": 0.5666082878112794} +{"epoch": 0, "iter": 10705, "iter_tflops": 17.23812695590721, "iter_time": 0.815111831665039, "loss": 0.2898871898651123, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 18.21022192769337, "step_time": 0.7715996704101563} +{"epoch": 0, "iter": 10706, "iter_tflops": 6.435795068370952, "iter_time": 2.183258026123047, "loss": 0.19041670858860016, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 8.586240307796562, "step_time": 1.636455623626709} +{"epoch": 0, "iter": 10707, "iter_tflops": 10.264100417579611, "iter_time": 1.3689461975097656, "loss": 0.24425339698791504, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 12.195826467594747, "step_time": 1.1521155433654786} +{"epoch": 0, "iter": 10708, "iter_tflops": 21.560172527498253, "iter_time": 0.6517109832763672, "loss": 0.1682714819908142, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 23.209600333111112, "step_time": 0.6053960876464843} +{"epoch": 0, "iter": 10709, "iter_tflops": 13.24305841704319, "iter_time": 1.0856356964111329, "loss": 0.30858391523361206, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 14.04586700841084, "step_time": 1.0235848693847658} +{"epoch": 0, "iter": 10710, "iter_tflops": 12.300693827814957, "iter_time": 1.1688069915771486, "loss": 0.2902928292751312, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 16.138794567946675, "step_time": 0.8908432960510253} +{"epoch": 0, "iter": 10711, "iter_tflops": 24.687014110919634, "iter_time": 0.5823765029907226, "loss": 0.23696978390216827, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 26.280980434089667, "step_time": 0.547054817199707} +{"epoch": 0, "iter": 10712, "iter_tflops": 26.156357249587334, "iter_time": 0.5496612854003906, "loss": 0.2521938979625702, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.763855457987617, "step_time": 0.5178364715576173} +{"epoch": 0, "iter": 10713, "iter_tflops": 34.52521831178035, "iter_time": 0.5975659103393555, "loss": 0.10546154528856277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.093671113047435, "step_time": 0.5561890449523926} +{"epoch": 0, "iter": 10714, "iter_tflops": 15.74690268367725, "iter_time": 1.3101683502197266, "loss": 0.1111392080783844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.802608235590498, "step_time": 1.158880386352539} +{"epoch": 0, "iter": 10715, "iter_tflops": 38.0875215454986, "iter_time": 0.5416759262084961, "loss": 0.12447188049554825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.978729001859755, "step_time": 0.4914654159545898} +{"epoch": 0, "iter": 10716, "iter_tflops": 39.768778836909654, "iter_time": 0.518776138305664, "loss": 0.11577363312244415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.742668161566506, "step_time": 0.47164689254760744} +{"epoch": 0, "iter": 10717, "iter_tflops": 17.910181027536563, "iter_time": 1.151919876098633, "loss": 0.4201798141002655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.873661474664196, "step_time": 1.0931155853271486} +{"epoch": 0, "iter": 10718, "iter_tflops": 8.376133323024838, "iter_time": 2.463080841064453, "loss": 0.4327256381511688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.79754443654151, "step_time": 1.9107208709716796} +{"epoch": 0, "iter": 10719, "iter_tflops": 22.213592321842626, "iter_time": 0.9287598876953124, "loss": 0.3794000744819641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.387693293011473, "step_time": 0.8126415138244629} +{"epoch": 0, "iter": 10720, "iter_tflops": 47.704965500195144, "iter_time": 0.4324726638793946, "loss": 0.6200135350227356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85351414577491, "step_time": 0.39787261962890624} +{"epoch": 0, "iter": 10721, "iter_tflops": 17.53503728930899, "iter_time": 0.875809715270996, "loss": 0.2077900618314743, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 18.545773351941833, "step_time": 0.8280784912109375} +{"epoch": 0, "iter": 10722, "iter_tflops": 10.761766914030881, "iter_time": 1.427029235839844, "loss": 0.30717459321022034, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.323896632043645, "step_time": 1.246144500732422} +{"epoch": 0, "iter": 10723, "iter_tflops": 26.66109892735203, "iter_time": 0.5760211181640625, "loss": 0.28518420457839966, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.477063822115444, "step_time": 0.5392886047363281} +{"epoch": 0, "iter": 10724, "iter_tflops": 28.06450677788921, "iter_time": 0.5472163162231445, "loss": 0.3715083599090576, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.897777256719014, "step_time": 0.5136621322631836} +{"epoch": 0, "iter": 10725, "iter_tflops": 22.317857098444723, "iter_time": 0.9244208984375001, "loss": 0.43214479088783264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.355367638652957, "step_time": 0.8833555450439454} +{"epoch": 0, "iter": 10726, "iter_tflops": 23.544221354807846, "iter_time": 0.8762699432373047, "loss": 0.37879201769828796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.584466247596005, "step_time": 0.6331573257446289} +{"epoch": 0, "iter": 10727, "iter_tflops": 50.55556470503228, "iter_time": 0.40808749008178713, "loss": 0.5158810615539551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.76516519488125, "step_time": 0.3767192783355713} +{"epoch": 0, "iter": 10728, "iter_tflops": 48.634957004906255, "iter_time": 0.4242029762268066, "loss": 0.48295125365257263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83992845771597, "step_time": 0.3904451446533203} +{"epoch": 0, "iter": 10729, "iter_tflops": 29.442828888283394, "iter_time": 0.7007170944213867, "loss": 0.8014641404151917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.18511184380593, "step_time": 0.6615686874389649} +{"epoch": 0, "iter": 10730, "iter_tflops": 14.646245195872584, "iter_time": 1.4086268005371094, "loss": 0.49123266339302063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.794738838576542, "step_time": 1.2284259796142576} +{"epoch": 0, "iter": 10731, "iter_tflops": 23.575128720247765, "iter_time": 0.8751211395263672, "loss": 0.819442868232727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.25901373910027, "step_time": 0.7300712509155274} +{"epoch": 0, "iter": 10732, "iter_tflops": 41.476559366707214, "iter_time": 0.49741574096679686, "loss": 0.6712836623191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52579364708705, "step_time": 0.46335150527954105} +{"epoch": 0, "iter": 10733, "iter_tflops": 29.226933926793947, "iter_time": 0.6040696411132813, "loss": 0.18089845776557922, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 31.481415517698668, "step_time": 0.5608103446960448} +{"epoch": 0, "iter": 10734, "iter_tflops": 29.518106441470234, "iter_time": 0.5981109771728516, "loss": 0.30583885312080383, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 31.393545107629652, "step_time": 0.5623800506591796} +{"epoch": 0, "iter": 10735, "iter_tflops": 32.891897655002914, "iter_time": 0.536761474609375, "loss": 0.25845861434936523, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 34.98520321433423, "step_time": 0.504644874572754} +{"epoch": 0, "iter": 10736, "iter_tflops": 32.114916376696044, "iter_time": 0.5497477645874023, "loss": 0.2851370871067047, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 34.091199252190314, "step_time": 0.5178786277770996} +{"epoch": 0, "iter": 10737, "iter_tflops": 24.371178160076955, "iter_time": 0.8465365676879884, "loss": 0.06880442053079605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.567796456747697, "step_time": 0.8069171524047852} +{"epoch": 0, "iter": 10738, "iter_tflops": 17.148628937620153, "iter_time": 1.203075393676758, "loss": 0.06272704899311066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.059913669702485, "step_time": 1.0284736938476562} +{"epoch": 0, "iter": 10739, "iter_tflops": 51.47439775941128, "iter_time": 0.4008030090332031, "loss": 0.05611797049641609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.98774993171656, "step_time": 0.3684929924011231} +{"epoch": 0, "iter": 10740, "iter_tflops": 53.65751088369201, "iter_time": 0.38449591064453126, "loss": 0.11846885830163956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.83782069998126, "step_time": 0.3506434001922607} +{"epoch": 0, "iter": 10741, "iter_tflops": 33.11791769989174, "iter_time": 0.6229586563110352, "loss": 0.055266689509153366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.28079304169984, "step_time": 0.5847684173583984} +{"epoch": 0, "iter": 10742, "iter_tflops": 15.482841692800037, "iter_time": 1.3325133666992188, "loss": 0.026259800419211388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.24985168689222, "step_time": 1.1304800643920898} +{"epoch": 0, "iter": 10743, "iter_tflops": 42.256801587262274, "iter_time": 0.4882313079833985, "loss": 0.03227054700255394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.47220291315229, "step_time": 0.4439448146820068} +{"epoch": 0, "iter": 10744, "iter_tflops": 41.669333785077946, "iter_time": 0.49511455154418943, "loss": 0.033890046179294586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18434295449848, "step_time": 0.4467118549346924} +{"epoch": 0, "iter": 10745, "iter_tflops": 30.83783931349048, "iter_time": 0.6690187759399415, "loss": 0.20701897144317627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.682999464007075, "step_time": 0.6125076103210448} +{"epoch": 0, "iter": 10746, "iter_tflops": 7.731034876098497, "iter_time": 2.668606964111328, "loss": 0.1729034185409546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.610985512260761, "step_time": 2.3959038696289063} +{"epoch": 0, "iter": 10747, "iter_tflops": 12.87356602799021, "iter_time": 1.602593521118164, "loss": 0.228926420211792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.13886176691622, "step_time": 1.4591764068603514} +{"epoch": 0, "iter": 10748, "iter_tflops": 36.44736969096797, "iter_time": 0.5660516433715821, "loss": 0.18072618544101715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54727585995696, "step_time": 0.5088157730102539} +{"epoch": 0, "iter": 10749, "iter_tflops": 20.238378516389755, "iter_time": 0.7648870925903319, "loss": 0.20913873612880707, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 21.987073288004055, "step_time": 0.704053436279297} +{"epoch": 0, "iter": 10750, "iter_tflops": 23.858237607444796, "iter_time": 0.6488356246948243, "loss": 0.22353285551071167, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.706262865235207, "step_time": 0.6021907806396484} +{"epoch": 0, "iter": 10751, "iter_tflops": 22.77542007758463, "iter_time": 0.6796833801269531, "loss": 0.29038819670677185, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.51101623646624, "step_time": 0.6315558013916016} +{"epoch": 0, "iter": 10752, "iter_tflops": 23.92261310714723, "iter_time": 0.647089614868164, "loss": 0.2572357952594757, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 25.557076641756122, "step_time": 0.6057059936523437} +{"epoch": 0, "iter": 10753, "iter_tflops": 17.522078068572746, "iter_time": 1.1774341735839844, "loss": 0.5709943175315857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.68933928694109, "step_time": 1.103896354675293} +{"epoch": 0, "iter": 10754, "iter_tflops": 20.401931344148053, "iter_time": 1.0112323760986328, "loss": 0.6164810061454773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.885033493943496, "step_time": 0.7970278854370118} +{"epoch": 0, "iter": 10755, "iter_tflops": 35.08393343117762, "iter_time": 0.5880496139526368, "loss": 0.6671337485313416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.12413865067496, "step_time": 0.5411556625366211} +{"epoch": 0, "iter": 10756, "iter_tflops": 34.63197082235059, "iter_time": 0.5957239227294922, "loss": 0.5270534753799438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.58102978919683, "step_time": 0.5489762687683106} +{"epoch": 0, "iter": 10757, "iter_tflops": 16.727675314100864, "iter_time": 1.2333509063720705, "loss": 0.09326330572366714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.645101964803153, "step_time": 1.1692249526977538} +{"epoch": 0, "iter": 10758, "iter_tflops": 13.571798296576256, "iter_time": 1.5201444244384765, "loss": 0.10375902056694031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.436745114926765, "step_time": 1.1831963691711427} +{"epoch": 0, "iter": 10759, "iter_tflops": 39.64349118039932, "iter_time": 0.520415657043457, "loss": 0.13788340985774994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32970714388584, "step_time": 0.4761420021057128} +{"epoch": 0, "iter": 10760, "iter_tflops": 42.212625184236735, "iter_time": 0.48874225234985347, "loss": 0.13965776562690735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86950732478278, "step_time": 0.44977796173095713} +{"epoch": 0, "iter": 10761, "iter_tflops": 19.43008697257071, "iter_time": 1.0618116912841795, "loss": 0.0029120324179530144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.736192619225847, "step_time": 0.9949316101074218} +{"epoch": 0, "iter": 10762, "iter_tflops": 21.870887957799066, "iter_time": 0.9433130264282227, "loss": 0.002345465589314699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.306214432923415, "step_time": 0.8152579898834229} +{"epoch": 0, "iter": 10763, "iter_tflops": 55.00301336506172, "iter_time": 0.37509024047851564, "loss": 0.012391685508191586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.79919445258784, "step_time": 0.33933169174194333} +{"epoch": 0, "iter": 10764, "iter_tflops": 57.28236098107945, "iter_time": 0.36016485977172846, "loss": 0.002859374275431037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.57971201163386, "step_time": 0.32967702865600584} +{"epoch": 0, "iter": 10765, "iter_tflops": 19.87933049227711, "iter_time": 1.0378163146972657, "loss": 0.7607877254486084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.735288365821802, "step_time": 0.9949749984741211} +{"epoch": 0, "iter": 10766, "iter_tflops": 18.00218932495536, "iter_time": 1.1460324707031249, "loss": 0.6845934391021729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.30355557190549, "step_time": 0.9684342803955079} +{"epoch": 0, "iter": 10767, "iter_tflops": 38.79057633893325, "iter_time": 0.5318583908081055, "loss": 0.7903287410736084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.141022594706605, "step_time": 0.48957268333435056} +{"epoch": 0, "iter": 10768, "iter_tflops": 39.24840135012957, "iter_time": 0.5256543655395508, "loss": 0.6889578700065613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.383419108665606, "step_time": 0.48677275085449223} +{"epoch": 0, "iter": 10769, "iter_tflops": 31.384376801017982, "iter_time": 0.6573682708740235, "loss": 0.07889028638601303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.97588977914466, "step_time": 0.5898661518096923} +{"epoch": 0, "iter": 10770, "iter_tflops": 38.14020929398654, "iter_time": 0.5409276428222657, "loss": 0.0796528160572052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18071182866136, "step_time": 0.48911202812194826} +{"epoch": 0, "iter": 10771, "iter_tflops": 41.06687030065214, "iter_time": 0.5023780326843262, "loss": 0.1607663482427597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.39113576336726, "step_time": 0.45451811599731445} +{"epoch": 0, "iter": 10772, "iter_tflops": 45.39600839000611, "iter_time": 0.45446932983398436, "loss": 0.11472151428461075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92451508396848, "step_time": 0.4132457466125488} +{"epoch": 0, "iter": 10773, "iter_tflops": 21.98738871671311, "iter_time": 0.9383148574829102, "loss": 0.17537392675876617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.83631141541756, "step_time": 0.8655321350097657} +{"epoch": 0, "iter": 10774, "iter_tflops": 16.494830720702602, "iter_time": 1.2507611541748045, "loss": 0.1833621710538864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.914147984683932, "step_time": 1.036001817703247} +{"epoch": 0, "iter": 10775, "iter_tflops": 48.543410885915705, "iter_time": 0.42500296401977544, "loss": 0.2084110975265503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76574889317279, "step_time": 0.3909940433502197} +{"epoch": 0, "iter": 10776, "iter_tflops": 47.124776411139464, "iter_time": 0.4377971649169922, "loss": 0.12304981797933578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.810164135836466, "step_time": 0.4060426464080811} +{"epoch": 0, "iter": 10777, "iter_tflops": 29.330918135015764, "iter_time": 0.7033906478881835, "loss": 0.222958043217659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.285625780142784, "step_time": 0.6594432106018067} +{"epoch": 0, "iter": 10778, "iter_tflops": 16.375560651769113, "iter_time": 1.2598709716796874, "loss": 0.2243715524673462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.09420696512224, "step_time": 0.9780454673767089} +{"epoch": 0, "iter": 10779, "iter_tflops": 46.000875590959744, "iter_time": 0.4484934959411621, "loss": 0.25880786776542664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91470768433354, "step_time": 0.4133269424438476} +{"epoch": 0, "iter": 10780, "iter_tflops": 46.68758124362936, "iter_time": 0.4418968162536622, "loss": 0.2559514045715332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.331410769734156, "step_time": 0.40990493202209477} +{"epoch": 0, "iter": 10781, "iter_tflops": 2.2126391231965155, "iter_time": 0.6550793685913086, "loss": 0.753023087978363, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 2.358880748603011, "step_time": 0.6144669418334961} +{"epoch": 0, "iter": 10782, "iter_tflops": 0.566492218574548, "iter_time": 2.5586481018066407, "loss": 0.5274447798728943, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 0.7205254328052837, "step_time": 2.0116628417968747} +{"epoch": 0, "iter": 10783, "iter_tflops": 0.7892676921552036, "iter_time": 1.8364545440673827, "loss": 0.5799984335899353, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 0.9331312465778651, "step_time": 1.5533230133056641} +{"epoch": 0, "iter": 10784, "iter_tflops": 2.45300596202285, "iter_time": 0.5908889999389648, "loss": 0.554694414138794, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 2.6764864839168836, "step_time": 0.5415511150360107} +{"epoch": 0, "iter": 10785, "iter_tflops": 15.705908799383005, "iter_time": 0.9231885833740234, "loss": 0.29729852080345154, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 16.827900840975744, "step_time": 0.8616354370117187} +{"epoch": 0, "iter": 10786, "iter_tflops": 25.004694052014663, "iter_time": 0.5798717498779298, "loss": 0.30305206775665283, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.737376760971472, "step_time": 0.5422938766479491} +{"epoch": 0, "iter": 10787, "iter_tflops": 25.600074954078973, "iter_time": 0.5663856735229493, "loss": 0.2532579302787781, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.292154220657476, "step_time": 0.5312704734802245} +{"epoch": 0, "iter": 10788, "iter_tflops": 25.86182606868947, "iter_time": 0.5606532058715821, "loss": 0.2788165509700775, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.52086915558718, "step_time": 0.526855297088623} +{"epoch": 0, "iter": 10789, "iter_tflops": 27.74638046478858, "iter_time": 0.743559814453125, "loss": 0.3051398694515228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.364936235856486, "step_time": 0.702575798034668} +{"epoch": 0, "iter": 10790, "iter_tflops": 8.61046949154581, "iter_time": 2.396047454833984, "loss": 0.579458475112915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.63868936972907, "step_time": 2.1404459381103513} +{"epoch": 0, "iter": 10791, "iter_tflops": 11.91504406153669, "iter_time": 1.731516342163086, "loss": 0.3542041480541229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.2479153336302, "step_time": 1.5573086776733398} +{"epoch": 0, "iter": 10792, "iter_tflops": 32.96044695158068, "iter_time": 0.6259348831176759, "loss": 0.3175804913043976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.87356232763644, "step_time": 0.5751057929992676} +{"epoch": 0, "iter": 10793, "iter_tflops": 17.02722847110542, "iter_time": 0.8803226776123049, "loss": 0.2023746520280838, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 18.692672393643075, "step_time": 0.8018893737792968} +{"epoch": 0, "iter": 10794, "iter_tflops": 26.089024645436062, "iter_time": 0.5745502395629883, "loss": 0.3319038450717926, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.890965768291164, "step_time": 0.5374304885864257} +{"epoch": 0, "iter": 10795, "iter_tflops": 24.494597418148217, "iter_time": 0.6119494476318359, "loss": 0.23834693431854248, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.07848814300477, "step_time": 0.5747823753356933} +{"epoch": 0, "iter": 10796, "iter_tflops": 28.017769861359724, "iter_time": 0.534998161315918, "loss": 0.20816461741924286, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.72838345073908, "step_time": 0.5042136039733887} +{"epoch": 0, "iter": 10797, "iter_tflops": 33.71262965850768, "iter_time": 0.4883437271118164, "loss": 0.00800562184303999, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 36.728020314453886, "step_time": 0.44825043869018555} +{"epoch": 0, "iter": 10798, "iter_tflops": 7.898750553010853, "iter_time": 2.0842981567382814, "loss": 0.007965621538460255, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 9.91077796835583, "step_time": 1.661156295776367} +{"epoch": 0, "iter": 10799, "iter_tflops": 17.74991086006344, "iter_time": 0.9275174026489257, "loss": 0.008434665389358997, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 24.058458507060212, "step_time": 0.6843061542510986} +{"epoch": 0, "iter": 10800, "iter_tflops": 44.099848938045405, "iter_time": 0.37331990051269526, "loss": 0.0018883689772337675, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 48.408721048686544, "step_time": 0.34009060478210457} +{"epoch": 0, "iter": 10801, "iter_tflops": 22.375338054940045, "iter_time": 0.7780087356567382, "loss": 0.22095626592636108, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 23.583968083127488, "step_time": 0.7381373825073243} +{"epoch": 0, "iter": 10802, "iter_tflops": 7.80531264617267, "iter_time": 2.230302520751953, "loss": 0.2998168468475342, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 9.772067772019819, "step_time": 1.7814252700805664} +{"epoch": 0, "iter": 10803, "iter_tflops": 8.796534693069983, "iter_time": 1.9789848022460936, "loss": 0.18294985592365265, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 10.605255669279298, "step_time": 1.6414699478149415} +{"epoch": 0, "iter": 10804, "iter_tflops": 22.385524193244986, "iter_time": 0.7776547164916991, "loss": 0.25126907229423523, "lr": 3e-05, "seqlen": 6944.0, "step_tflops": 30.35311798315463, "step_time": 0.5735229072570801} +{"epoch": 0, "iter": 10805, "iter_tflops": 17.078962166955062, "iter_time": 0.8537477264404296, "loss": 0.15182775259017944, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 18.33034266082192, "step_time": 0.7954638595581055} +{"epoch": 0, "iter": 10806, "iter_tflops": 9.898734981959038, "iter_time": 1.4730291442871095, "loss": 0.2779378890991211, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 11.375087966725257, "step_time": 1.281847240447998} +{"epoch": 0, "iter": 10807, "iter_tflops": 25.457691870805256, "iter_time": 0.5727591171264648, "loss": 0.24218399822711945, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.163585485464907, "step_time": 0.5367894134521484} +{"epoch": 0, "iter": 10808, "iter_tflops": 25.856246482349693, "iter_time": 0.5639304656982421, "loss": 0.3251098394393921, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.482430300157954, "step_time": 0.5305617065429687} +{"epoch": 0, "iter": 10809, "iter_tflops": 21.812709758552195, "iter_time": 0.9458290023803712, "loss": 0.0690690353512764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.831177944687724, "step_time": 0.9036368408203125} +{"epoch": 0, "iter": 10810, "iter_tflops": 18.61439292301383, "iter_time": 1.1083409271240234, "loss": 0.0672108381986618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.84702048770901, "step_time": 0.9030102424621582} +{"epoch": 0, "iter": 10811, "iter_tflops": 41.85749258801477, "iter_time": 0.49288890075683595, "loss": 0.019221261143684387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17939919672875, "step_time": 0.44675967788696286} +{"epoch": 0, "iter": 10812, "iter_tflops": 42.01906001935445, "iter_time": 0.4909936943054199, "loss": 0.05137770622968674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.37002844968846, "step_time": 0.4449230289459229} +{"epoch": 0, "iter": 10813, "iter_tflops": 16.914565530916462, "iter_time": 1.2197235260009767, "loss": 0.0802149698138237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.03883687470219, "step_time": 1.143704200744629} +{"epoch": 0, "iter": 10814, "iter_tflops": 19.53359692434401, "iter_time": 1.0561850738525391, "loss": 0.07996976375579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.674381291703636, "step_time": 0.871452278137207} +{"epoch": 0, "iter": 10815, "iter_tflops": 51.966499766334465, "iter_time": 0.3970075645446777, "loss": 0.15252114832401276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.27264347265162, "step_time": 0.3602259693145752} +{"epoch": 0, "iter": 10816, "iter_tflops": 49.84888900345587, "iter_time": 0.4138726844787598, "loss": 0.07648934423923492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.920188408925426, "step_time": 0.3826228008270264} +{"epoch": 0, "iter": 10817, "iter_tflops": 28.830470284817626, "iter_time": 0.7156003112792968, "loss": 0.6311395764350891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.37717882735576, "step_time": 0.6791642379760742} +{"epoch": 0, "iter": 10818, "iter_tflops": 11.609544840938158, "iter_time": 1.7770803070068357, "loss": 0.7215819358825684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.661868636081755, "step_time": 1.4071257915496829} +{"epoch": 0, "iter": 10819, "iter_tflops": 35.401181999686195, "iter_time": 0.5827797927856445, "loss": 0.61746746301651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.539083731637, "step_time": 0.5353291130065918} +{"epoch": 0, "iter": 10820, "iter_tflops": 39.53424851056216, "iter_time": 0.5218536911010743, "loss": 0.7702057957649231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.807566053880876, "step_time": 0.48194969749450683} +{"epoch": 0, "iter": 10821, "iter_tflops": 19.426581716464252, "iter_time": 0.9342561340332031, "loss": 0.02989949844777584, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 20.831848698387592, "step_time": 0.8712334365844727} +{"epoch": 0, "iter": 10822, "iter_tflops": 15.332906821538353, "iter_time": 1.1836896514892579, "loss": 0.03627043962478638, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 18.650824286148065, "step_time": 0.9731153354644775} +{"epoch": 0, "iter": 10823, "iter_tflops": 38.32134018427772, "iter_time": 0.4736108665466308, "loss": 0.019606197252869606, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 42.18724802559357, "step_time": 0.4302106437683106} +{"epoch": 0, "iter": 10824, "iter_tflops": 43.028756647288255, "iter_time": 0.421797061920166, "loss": 0.07004305720329285, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 47.20591586607073, "step_time": 0.3844730644226074} +{"epoch": 0, "iter": 10825, "iter_tflops": 20.46874202354686, "iter_time": 1.0079316787719725, "loss": 0.41643333435058594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.078705267713172, "step_time": 0.9344340286254883} +{"epoch": 0, "iter": 10826, "iter_tflops": 22.725264270839368, "iter_time": 0.9078483428955078, "loss": 0.3230472803115845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.709667239865787, "step_time": 0.7445449752807618} +{"epoch": 0, "iter": 10827, "iter_tflops": 37.90864567327005, "iter_time": 0.5442318801879883, "loss": 0.36200016736984253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14049703212041, "step_time": 0.5014789562225342} +{"epoch": 0, "iter": 10828, "iter_tflops": 39.05578938797747, "iter_time": 0.5282467422485352, "loss": 0.38845351338386536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.871645546075314, "step_time": 0.4812293357849121} +{"epoch": 0, "iter": 10829, "iter_tflops": 15.553041574024496, "iter_time": 1.3264989624023438, "loss": 0.0073952581733465195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.264052083590272, "step_time": 1.2685088195800782} +{"epoch": 0, "iter": 10830, "iter_tflops": 8.563138747049944, "iter_time": 2.4092910461425783, "loss": 0.002367340726777911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.053347570185455, "step_time": 2.052161567687988} +{"epoch": 0, "iter": 10831, "iter_tflops": 17.161700559744475, "iter_time": 1.2021590423583985, "loss": 0.0026384033262729645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.068549826562663, "step_time": 0.9792365245819092} +{"epoch": 0, "iter": 10832, "iter_tflops": 45.418788234546824, "iter_time": 0.4542413902282715, "loss": 0.0021208105608820915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.14677759818607, "step_time": 0.4114141426086426} +{"epoch": 0, "iter": 10833, "iter_tflops": 18.318094656847183, "iter_time": 1.040358413696289, "loss": 0.2541368901729584, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 19.57351775465258, "step_time": 0.9736310119628906} +{"epoch": 0, "iter": 10834, "iter_tflops": 14.730918905671166, "iter_time": 1.2936996002197265, "loss": 0.21366336941719055, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 18.71196651077593, "step_time": 1.018459705352783} +{"epoch": 0, "iter": 10835, "iter_tflops": 30.614359498565992, "iter_time": 0.6224982070922851, "loss": 0.34708866477012634, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 32.900694707947196, "step_time": 0.5792395591735839} +{"epoch": 0, "iter": 10836, "iter_tflops": 29.095157869505975, "iter_time": 0.6550019073486328, "loss": 0.28589460253715515, "lr": 3e-05, "seqlen": 7584.0, "step_tflops": 31.246058699702548, "step_time": 0.6099132080078125} +{"epoch": 0, "iter": 10837, "iter_tflops": 21.204484301459853, "iter_time": 0.972958984375, "loss": 0.5783008337020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.684127312944508, "step_time": 0.9094946975708008} +{"epoch": 0, "iter": 10838, "iter_tflops": 13.713017725731072, "iter_time": 1.5044896697998047, "loss": 0.6390604972839355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.556160357205254, "step_time": 1.0549664726257324} +{"epoch": 0, "iter": 10839, "iter_tflops": 34.902982032609415, "iter_time": 0.5910983047485352, "loss": 0.7519081830978394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.926655814934996, "step_time": 0.5439734420776368} +{"epoch": 0, "iter": 10840, "iter_tflops": 34.36369907529834, "iter_time": 0.6003746414184571, "loss": 0.4864165186882019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.28593756950029, "step_time": 0.5533210334777833} +{"epoch": 0, "iter": 10841, "iter_tflops": 26.74140154993122, "iter_time": 0.7715038223266601, "loss": 0.6655163168907166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.941601427546292, "step_time": 0.7128525199890137} +{"epoch": 0, "iter": 10842, "iter_tflops": 19.025337304125337, "iter_time": 1.0844009323120116, "loss": 0.6561538577079773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.002908893875148, "step_time": 0.8968906326293946} +{"epoch": 0, "iter": 10843, "iter_tflops": 35.43169683433871, "iter_time": 0.5822778854370118, "loss": 0.6045099496841431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5209636875481, "step_time": 0.5355809288024902} +{"epoch": 0, "iter": 10844, "iter_tflops": 42.62876864516463, "iter_time": 0.483971134185791, "loss": 0.7250229716300964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.176746452236, "step_time": 0.44678534317016605} +{"epoch": 0, "iter": 10845, "iter_tflops": 21.387203497626334, "iter_time": 0.7659396743774414, "loss": 0.27222082018852234, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 23.345406576291456, "step_time": 0.701692970275879} +{"epoch": 0, "iter": 10846, "iter_tflops": 29.140871799197523, "iter_time": 0.5621419906616211, "loss": 0.2718983292579651, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 31.099711387081538, "step_time": 0.5267350387573242} +{"epoch": 0, "iter": 10847, "iter_tflops": 29.81099026373008, "iter_time": 0.5495056533813476, "loss": 0.32547324895858765, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 31.73254127411484, "step_time": 0.5162305641174317} +{"epoch": 0, "iter": 10848, "iter_tflops": 29.77247918378814, "iter_time": 0.5502164459228516, "loss": 0.17682921886444092, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 31.70806902903323, "step_time": 0.5166289901733399} +{"epoch": 0, "iter": 10849, "iter_tflops": 23.304782673131776, "iter_time": 0.8852729415893554, "loss": 0.6951149702072144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.422984304525578, "step_time": 0.8447408905029296} +{"epoch": 0, "iter": 10850, "iter_tflops": 14.398630424296963, "iter_time": 1.4328511047363282, "loss": 1.1492763757705688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.020063528867123, "step_time": 1.2878284454345705} +{"epoch": 0, "iter": 10851, "iter_tflops": 41.74682403793896, "iter_time": 0.4941955223083496, "loss": 0.8091306090354919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1859190117269, "step_time": 0.45658235931396485} +{"epoch": 0, "iter": 10852, "iter_tflops": 46.20795401651536, "iter_time": 0.4464835968017578, "loss": 0.6954528093338013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.7966827864746, "step_time": 0.41430658340454096} +{"epoch": 0, "iter": 10853, "iter_tflops": 41.13668203892489, "iter_time": 0.5015254631042481, "loss": 0.48888471722602844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99464361485846, "step_time": 0.45852332305908206} +{"epoch": 0, "iter": 10854, "iter_tflops": 11.018785480827935, "iter_time": 1.8723563995361328, "loss": 0.3619963526725769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.309923787840274, "step_time": 1.5500534667968748} +{"epoch": 0, "iter": 10855, "iter_tflops": 43.32080647140258, "iter_time": 0.4762398300170898, "loss": 0.383188396692276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.732499318263926, "step_time": 0.4414720764160156} +{"epoch": 0, "iter": 10856, "iter_tflops": 49.67330994229271, "iter_time": 0.4153355903625488, "loss": 0.3471418023109436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.90276955645573, "step_time": 0.3827464466094971} +{"epoch": 0, "iter": 10857, "iter_tflops": 18.250169976989277, "iter_time": 0.962883430480957, "loss": 0.1810716986656189, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 19.03733767233477, "step_time": 0.9230695266723633} +{"epoch": 0, "iter": 10858, "iter_tflops": 19.380797205112923, "iter_time": 0.9067112197875977, "loss": 0.20234720408916473, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 21.67808014418773, "step_time": 0.810624656677246} +{"epoch": 0, "iter": 10859, "iter_tflops": 33.09021220912728, "iter_time": 0.5310569229125977, "loss": 0.26849690079689026, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 35.21720045235398, "step_time": 0.49898305511474605} +{"epoch": 0, "iter": 10860, "iter_tflops": 31.05782851023373, "iter_time": 0.56580859375, "loss": 0.2710968554019928, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 33.02250100544673, "step_time": 0.5321458320617676} +{"epoch": 0, "iter": 10861, "iter_tflops": 16.506036497002693, "iter_time": 0.7355522766113282, "loss": 0.064495749771595, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 17.438094325259826, "step_time": 0.6962373580932618} +{"epoch": 0, "iter": 10862, "iter_tflops": 8.83037030472238, "iter_time": 1.3749199981689453, "loss": 0.02181662619113922, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 10.274230915384306, "step_time": 1.181699420928955} +{"epoch": 0, "iter": 10863, "iter_tflops": 17.18072874088003, "iter_time": 0.7066669235229491, "loss": 0.030358774587512016, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 22.045234051069553, "step_time": 0.5507336730957032} +{"epoch": 0, "iter": 10864, "iter_tflops": 25.34596597514969, "iter_time": 0.4790132179260254, "loss": 0.04007526859641075, "lr": 3e-05, "seqlen": 4880.0, "step_tflops": 27.973359863800592, "step_time": 0.43402196884155275} +{"epoch": 0, "iter": 10865, "iter_tflops": 18.385466547088342, "iter_time": 1.1221414184570313, "loss": 0.005959881469607353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.715728337972, "step_time": 1.0464281692504882} +{"epoch": 0, "iter": 10866, "iter_tflops": 14.626209127142703, "iter_time": 1.4105564422607424, "loss": 0.004310621414333582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.993461237417545, "step_time": 0.9827390193939209} +{"epoch": 0, "iter": 10867, "iter_tflops": 54.424863404408846, "iter_time": 0.3790747871398925, "loss": 0.006787690334022045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.00085166835643, "step_time": 0.3438466777801513} +{"epoch": 0, "iter": 10868, "iter_tflops": 51.75952562348782, "iter_time": 0.39859510421752936, "loss": 0.002446281723678112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.433394988589036, "step_time": 0.365583065032959} +{"epoch": 0, "iter": 10869, "iter_tflops": 27.205790261330844, "iter_time": 0.5991108093261719, "loss": 0.028976744040846825, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.19634795133531, "step_time": 0.5582644462585449} +{"epoch": 0, "iter": 10870, "iter_tflops": 8.185912788852429, "iter_time": 1.9911381225585938, "loss": 0.036802079528570175, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 10.397703361939033, "step_time": 1.5675849227905274} +{"epoch": 0, "iter": 10871, "iter_tflops": 11.484851276673579, "iter_time": 1.4191984405517577, "loss": 0.03132621943950653, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 13.635411103613247, "step_time": 1.1953642539978029} +{"epoch": 0, "iter": 10872, "iter_tflops": 14.544827814505602, "iter_time": 1.1206239929199218, "loss": 0.07727963477373123, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 17.93297067102008, "step_time": 0.908900333404541} +{"epoch": 0, "iter": 10873, "iter_tflops": 22.865834829137253, "iter_time": 0.723586685180664, "loss": 0.23237167298793793, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 24.930212342296823, "step_time": 0.6636691818237305} +{"epoch": 0, "iter": 10874, "iter_tflops": 26.390068814010654, "iter_time": 0.6269560623168946, "loss": 0.19097843766212463, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 28.39098879333077, "step_time": 0.5827698974609375} +{"epoch": 0, "iter": 10875, "iter_tflops": 27.248912197209503, "iter_time": 0.6071953811645507, "loss": 0.2017364352941513, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 29.226878183726846, "step_time": 0.5661026649475097} +{"epoch": 0, "iter": 10876, "iter_tflops": 25.497042317571285, "iter_time": 0.6489150161743165, "loss": 0.2400558590888977, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.468797701609446, "step_time": 0.602334831237793} +{"epoch": 0, "iter": 10877, "iter_tflops": 29.43456587511846, "iter_time": 0.700913803100586, "loss": 0.03224710375070572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.181127230659655, "step_time": 0.6410929412841797} +{"epoch": 0, "iter": 10878, "iter_tflops": 8.98634538672354, "iter_time": 2.295826904296875, "loss": 0.04763109236955643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.12623797532659, "step_time": 2.0373897552490234} +{"epoch": 0, "iter": 10879, "iter_tflops": 10.475451235005288, "iter_time": 1.9694706268310547, "loss": 0.040918029844760895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.054911675453301, "step_time": 1.3703895416259764} +{"epoch": 0, "iter": 10880, "iter_tflops": 42.42941647358629, "iter_time": 0.48624504470825197, "loss": 0.0289088636636734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0854887580933, "step_time": 0.43816245841979984} +{"epoch": 0, "iter": 10881, "iter_tflops": 21.238728839264095, "iter_time": 0.7153816757202147, "loss": 0.1731189489364624, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.490572633933635, "step_time": 0.6755629425048828} +{"epoch": 0, "iter": 10882, "iter_tflops": 8.97145826285034, "iter_time": 1.693570541381836, "loss": 0.22028563916683197, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 10.592700620649795, "step_time": 1.4343648490905763} +{"epoch": 0, "iter": 10883, "iter_tflops": 21.83969499348102, "iter_time": 0.6956964111328124, "loss": 0.28812336921691895, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.614254014254126, "step_time": 0.6434163627624511} +{"epoch": 0, "iter": 10884, "iter_tflops": 23.584058827255813, "iter_time": 0.6442401428222656, "loss": 0.20654229819774628, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.28639671760536, "step_time": 0.6008684272766113} +{"epoch": 0, "iter": 10885, "iter_tflops": 14.180881275586009, "iter_time": 1.45485270690918, "loss": 0.8894920349121094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.024571892955827, "step_time": 1.3731568298339845} +{"epoch": 0, "iter": 10886, "iter_tflops": 19.59003114111723, "iter_time": 1.0531424560546874, "loss": 0.47069790959358215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.067582492235886, "step_time": 0.8572150325775147} +{"epoch": 0, "iter": 10887, "iter_tflops": 36.5375568850782, "iter_time": 0.5646544342041016, "loss": 0.6271466016769409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.705020150221536, "step_time": 0.5196091938018799} +{"epoch": 0, "iter": 10888, "iter_tflops": 34.79524698777049, "iter_time": 0.5929284973144532, "loss": 0.7952521443367004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.762931602330305, "step_time": 0.5463318824768066} +{"epoch": 0, "iter": 10889, "iter_tflops": 29.54483088572593, "iter_time": 0.6982979049682617, "loss": 0.7905206680297852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.38138182636808, "step_time": 0.6371282615661621} +{"epoch": 0, "iter": 10890, "iter_tflops": 35.90479839153812, "iter_time": 0.57460546875, "loss": 0.645716667175293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.292102746256845, "step_time": 0.5250697231292725} +{"epoch": 0, "iter": 10891, "iter_tflops": 33.530619465420806, "iter_time": 0.6152911529541015, "loss": 0.7227556705474854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.54711335477691, "step_time": 0.5645067863464355} +{"epoch": 0, "iter": 10892, "iter_tflops": 37.49864659270384, "iter_time": 0.5501823501586914, "loss": 0.7180637121200562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84651748605821, "step_time": 0.5050881881713868} +{"epoch": 0, "iter": 10893, "iter_tflops": 1.6370735301597086, "iter_time": 0.8853934860229492, "loss": 0.4055858552455902, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.746391314038849, "step_time": 0.8299710540771486} +{"epoch": 0, "iter": 10894, "iter_tflops": 1.26423161491544, "iter_time": 1.1465100402832031, "loss": 0.35268929600715637, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.6385210655965523, "step_time": 0.8846112937927246} +{"epoch": 0, "iter": 10895, "iter_tflops": 3.4161721503927334, "iter_time": 0.4242919197082519, "loss": 0.3426770567893982, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.7220805834790633, "step_time": 0.3894204349517822} +{"epoch": 0, "iter": 10896, "iter_tflops": 3.5842299076499406, "iter_time": 0.404397674560547, "loss": 0.44737961888313293, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.8831010079778774, "step_time": 0.3732723503112793} +{"epoch": 0, "iter": 10897, "iter_tflops": 28.89026944964484, "iter_time": 0.7141191101074218, "loss": 0.5153826475143433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.587378065780907, "step_time": 0.6744969596862793} +{"epoch": 0, "iter": 10898, "iter_tflops": 15.868239750863513, "iter_time": 1.3001501007080078, "loss": 0.4087149202823639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.651383522005446, "step_time": 1.1061427955627443} +{"epoch": 0, "iter": 10899, "iter_tflops": 41.67152505782734, "iter_time": 0.49508851623535155, "loss": 0.5659809708595276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.820617184978026, "step_time": 0.46030364608764646} +{"epoch": 0, "iter": 10900, "iter_tflops": 42.61020214252757, "iter_time": 0.4841820144653321, "loss": 0.839601993560791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75213502120486, "step_time": 0.45093181991577147} +{"epoch": 0, "iter": 10901, "iter_tflops": 20.749523721725524, "iter_time": 0.9942923889160157, "loss": 0.13264906406402588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.782133487947295, "step_time": 0.9471566925048829} +{"epoch": 0, "iter": 10902, "iter_tflops": 13.622898291564097, "iter_time": 1.5144423065185546, "loss": 0.20918379724025726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.42215481858656, "step_time": 1.256296371459961} +{"epoch": 0, "iter": 10903, "iter_tflops": 39.59006322196615, "iter_time": 0.5211179733276368, "loss": 0.1428074836730957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47569170071657, "step_time": 0.47454319190979005} +{"epoch": 0, "iter": 10904, "iter_tflops": 43.270807249412606, "iter_time": 0.4767901229858399, "loss": 0.13446249067783356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34038974595546, "step_time": 0.4358032035827637} +{"epoch": 0, "iter": 10905, "iter_tflops": 18.84857290356013, "iter_time": 1.0945705871582032, "loss": 0.24816729128360748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.190755970129317, "step_time": 1.0218088684082032} +{"epoch": 0, "iter": 10906, "iter_tflops": 16.0820262749283, "iter_time": 1.282866546630859, "loss": 0.20915542542934418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.297066713441406, "step_time": 1.1275628948211671} +{"epoch": 0, "iter": 10907, "iter_tflops": 39.66036947452443, "iter_time": 0.5201941833496093, "loss": 0.17299945652484894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.622503707448736, "step_time": 0.4729461116790772} +{"epoch": 0, "iter": 10908, "iter_tflops": 40.8078768428013, "iter_time": 0.5055664520263673, "loss": 0.11657018214464188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62556176956415, "step_time": 0.4623156032562256} +{"epoch": 0, "iter": 10909, "iter_tflops": 27.352550095411157, "iter_time": 0.7542658157348633, "loss": 0.6746225357055664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.73536864416958, "step_time": 0.6938233642578125} +{"epoch": 0, "iter": 10910, "iter_tflops": 37.046529800945706, "iter_time": 0.5568967895507813, "loss": 0.8070075511932373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.398052559769326, "step_time": 0.4866047439575195} +{"epoch": 0, "iter": 10911, "iter_tflops": 47.24999935238792, "iter_time": 0.436636905670166, "loss": 0.8406556248664856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.29289491479989, "step_time": 0.4022212734222412} +{"epoch": 0, "iter": 10912, "iter_tflops": 47.48534735080799, "iter_time": 0.4344728355407715, "loss": 0.62351393699646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48259680612061, "step_time": 0.4007391777038574} +{"epoch": 0, "iter": 10913, "iter_tflops": 25.317338413346118, "iter_time": 0.8148997802734375, "loss": 0.34880775213241577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.67707901431724, "step_time": 0.7733640365600586} +{"epoch": 0, "iter": 10914, "iter_tflops": 13.810763261492852, "iter_time": 1.4938416595458985, "loss": 0.32852229475975037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.410957210793626, "step_time": 1.2571535739898683} +{"epoch": 0, "iter": 10915, "iter_tflops": 41.064411650480416, "iter_time": 0.5024081115722656, "loss": 0.36119920015335083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.963693633169505, "step_time": 0.4588389396667481} +{"epoch": 0, "iter": 10916, "iter_tflops": 38.90524320200763, "iter_time": 0.5302908248901367, "loss": 0.37789827585220337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53368933433666, "step_time": 0.48505299758911136} +{"epoch": 0, "iter": 10917, "iter_tflops": 26.66689251227735, "iter_time": 0.773659454345703, "loss": 0.11538924276828766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.884146756155076, "step_time": 0.7142704849243164} +{"epoch": 0, "iter": 10918, "iter_tflops": 7.346215856367555, "iter_time": 2.8083973999023435, "loss": 0.15133115649223328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.26926114115136, "step_time": 2.494913772583008} +{"epoch": 0, "iter": 10919, "iter_tflops": 12.76798898203075, "iter_time": 1.6158451843261719, "loss": 0.10281096398830414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.059161761612174, "step_time": 1.2846930503845215} +{"epoch": 0, "iter": 10920, "iter_tflops": 39.75623694958056, "iter_time": 0.5189397964477539, "loss": 0.11288915574550629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81002995122933, "step_time": 0.4709216938018799} +{"epoch": 0, "iter": 10921, "iter_tflops": 16.956832877068315, "iter_time": 0.9491338424682617, "loss": 0.22346964478492737, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 17.940922597099018, "step_time": 0.8970722579956054} +{"epoch": 0, "iter": 10922, "iter_tflops": 13.64188406931861, "iter_time": 1.1797713470458984, "loss": 0.25456321239471436, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 16.472851767430324, "step_time": 0.9770198974609374} +{"epoch": 0, "iter": 10923, "iter_tflops": 28.16022076199944, "iter_time": 0.5715261993408203, "loss": 0.17398391664028168, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.062747635633077, "step_time": 0.5353570518493652} +{"epoch": 0, "iter": 10924, "iter_tflops": 28.950885423678514, "iter_time": 0.5559175033569336, "loss": 0.2495991438627243, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 30.860153076566142, "step_time": 0.5215237884521484} +{"epoch": 0, "iter": 10925, "iter_tflops": 25.00719320181612, "iter_time": 0.8250063629150391, "loss": 0.14551791548728943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.327738172471843, "step_time": 0.7836257476806641} +{"epoch": 0, "iter": 10926, "iter_tflops": 15.155408860762336, "iter_time": 1.3613023376464843, "loss": 0.09417466819286346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.598620101121828, "step_time": 1.0015764846801758} +{"epoch": 0, "iter": 10927, "iter_tflops": 41.278938637911075, "iter_time": 0.4997970924377441, "loss": 0.14024892449378967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.528257942004515, "step_time": 0.4531491966247559} +{"epoch": 0, "iter": 10928, "iter_tflops": 44.2445596998025, "iter_time": 0.46629673004150385, "loss": 0.13281533122062683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.729971502507176, "step_time": 0.42337585830688473} +{"epoch": 0, "iter": 10929, "iter_tflops": 18.03000117855945, "iter_time": 1.144264678955078, "loss": 0.0010238338727504015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.338585862945912, "step_time": 1.0668356857299806} +{"epoch": 0, "iter": 10930, "iter_tflops": 17.3498230509259, "iter_time": 1.1891241455078125, "loss": 0.018813174217939377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.42405775715048, "step_time": 0.9629872055053712} +{"epoch": 0, "iter": 10931, "iter_tflops": 53.18124119309353, "iter_time": 0.38793930053710934, "loss": 0.004016360733658075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.54959603905338, "step_time": 0.35236952781677244} +{"epoch": 0, "iter": 10932, "iter_tflops": 55.23350485444901, "iter_time": 0.37352497482299807, "loss": 0.005192401818931103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.49796339901408, "step_time": 0.34102128982543944} +{"epoch": 0, "iter": 10933, "iter_tflops": 27.421121809953874, "iter_time": 0.7523796310424804, "loss": 0.4645146131515503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.846373513841247, "step_time": 0.715205795288086} +{"epoch": 0, "iter": 10934, "iter_tflops": 15.823080630831516, "iter_time": 1.3038607330322267, "loss": 0.5403311252593994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.799712820472553, "step_time": 1.1590688972473144} +{"epoch": 0, "iter": 10935, "iter_tflops": 34.23528939429674, "iter_time": 0.6026265258789062, "loss": 0.5404243469238281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.86757324534693, "step_time": 0.5448221721649169} +{"epoch": 0, "iter": 10936, "iter_tflops": 43.90668763382821, "iter_time": 0.4698849906921387, "loss": 0.4548884332180023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89524890384004, "step_time": 0.43075448989868165} +{"epoch": 0, "iter": 10937, "iter_tflops": 17.868111372179296, "iter_time": 1.1546320190429686, "loss": 0.41204798221588135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.32117650058336, "step_time": 1.0677969589233398} +{"epoch": 0, "iter": 10938, "iter_tflops": 17.39375447617888, "iter_time": 1.1861207733154295, "loss": 0.47354841232299805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.395881292616608, "step_time": 0.9212003421783447} +{"epoch": 0, "iter": 10939, "iter_tflops": 40.999833380657186, "iter_time": 0.503199447631836, "loss": 0.5578126907348633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.81060881539399, "step_time": 0.4604064540863037} +{"epoch": 0, "iter": 10940, "iter_tflops": 37.127799024101385, "iter_time": 0.5556777954101563, "loss": 0.5505658388137817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.33422820363871, "step_time": 0.5115033664703369} +{"epoch": 0, "iter": 10941, "iter_tflops": 17.77571940150348, "iter_time": 1.1606333923339844, "loss": 0.3836422562599182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.024595645970205, "step_time": 1.0844432067871095} +{"epoch": 0, "iter": 10942, "iter_tflops": 12.868057127249944, "iter_time": 1.6032796020507811, "loss": 0.3681224584579468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.303312030775995, "step_time": 1.1923204917907715} +{"epoch": 0, "iter": 10943, "iter_tflops": 35.736853228325245, "iter_time": 0.5773058242797852, "loss": 0.5104061961174011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.20025415990325, "step_time": 0.5262999935150147} +{"epoch": 0, "iter": 10944, "iter_tflops": 36.164517991729326, "iter_time": 0.5704788742065429, "loss": 0.5219358205795288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.559495010278695, "step_time": 0.5215206489562988} +{"epoch": 0, "iter": 10945, "iter_tflops": 20.098715832546283, "iter_time": 1.0264881439208984, "loss": 0.5324772000312805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.685771107315915, "step_time": 0.9513654556274413} +{"epoch": 0, "iter": 10946, "iter_tflops": 16.083282537141166, "iter_time": 1.282766342163086, "loss": 0.47548237442970276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.687012538509336, "step_time": 0.9972969017028808} +{"epoch": 0, "iter": 10947, "iter_tflops": 48.575010896386296, "iter_time": 0.4247264823913574, "loss": 0.43826064467430115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75680954673645, "step_time": 0.3910602951049805} +{"epoch": 0, "iter": 10948, "iter_tflops": 46.97652308071115, "iter_time": 0.43917881011962895, "loss": 0.5276157259941101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.839275667467135, "step_time": 0.4058101387023926} +{"epoch": 0, "iter": 10949, "iter_tflops": 32.98730681156933, "iter_time": 0.6254252166748047, "loss": 0.7277523279190063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.30172188158279, "step_time": 0.5844217338562011} +{"epoch": 0, "iter": 10950, "iter_tflops": 44.90468901506865, "iter_time": 0.45944185256958003, "loss": 0.5743252038955688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96875949431089, "step_time": 0.421311336517334} +{"epoch": 0, "iter": 10951, "iter_tflops": 45.73136443901014, "iter_time": 0.4511366271972656, "loss": 0.5074012875556946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.424461359000816, "step_time": 0.4174267749786377} +{"epoch": 0, "iter": 10952, "iter_tflops": 49.30694689848777, "iter_time": 0.41842163848876957, "loss": 0.6658698320388794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.22862361610997, "step_time": 0.38759396934509277} +{"epoch": 0, "iter": 10953, "iter_tflops": 25.785893548228294, "iter_time": 0.8000922470092775, "loss": 0.17835113406181335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.232759495637353, "step_time": 0.7575836563110352} +{"epoch": 0, "iter": 10954, "iter_tflops": 12.975644946773734, "iter_time": 1.5899859771728515, "loss": 0.24332091212272644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.475288348151327, "step_time": 1.1805867290496828} +{"epoch": 0, "iter": 10955, "iter_tflops": 38.50268332664672, "iter_time": 0.5358352127075195, "loss": 0.21497668325901031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05123396566219, "step_time": 0.49061802864074705} +{"epoch": 0, "iter": 10956, "iter_tflops": 35.797496241306014, "iter_time": 0.5763278350830079, "loss": 0.22243447601795197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.190889516117046, "step_time": 0.5264257526397705} +{"epoch": 0, "iter": 10957, "iter_tflops": 19.21484513609782, "iter_time": 1.0737059478759765, "loss": 0.13384594023227692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.562493610234824, "step_time": 1.003336166381836} +{"epoch": 0, "iter": 10958, "iter_tflops": 32.63225080015937, "iter_time": 0.6322301712036134, "loss": 0.06723014265298843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79114816688737, "step_time": 0.4936713733673096} +{"epoch": 0, "iter": 10959, "iter_tflops": 52.02589207022436, "iter_time": 0.3965543441772461, "loss": 0.08443652838468552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.67190591292144, "step_time": 0.36404446220397946} +{"epoch": 0, "iter": 10960, "iter_tflops": 54.11488509389694, "iter_time": 0.38124618530273435, "loss": 0.08855468034744263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.12315102046321, "step_time": 0.3489511833190918} +{"epoch": 0, "iter": 10961, "iter_tflops": 37.44880636280055, "iter_time": 0.4341467552185059, "loss": 0.0035721061285585165, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 41.12503461505809, "step_time": 0.3953377285003662} +{"epoch": 0, "iter": 10962, "iter_tflops": 10.253081367391033, "iter_time": 1.5856967468261718, "loss": 0.009488451294600964, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 10.840194740548604, "step_time": 1.4998141784667969} +{"epoch": 0, "iter": 10963, "iter_tflops": 6.303608750380847, "iter_time": 2.5792015991210935, "loss": 0.018741384148597717, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 7.818534863657634, "step_time": 2.079453254699707} +{"epoch": 0, "iter": 10964, "iter_tflops": 22.22056875103678, "iter_time": 0.7316769409179686, "loss": 0.0041891951113939285, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 27.92045502933091, "step_time": 0.5823070487976074} +{"epoch": 0, "iter": 10965, "iter_tflops": 14.062106198594552, "iter_time": 1.0833853607177732, "loss": 0.22166335582733154, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 15.034215040295921, "step_time": 1.0133339157104493} +{"epoch": 0, "iter": 10966, "iter_tflops": 12.1922586452498, "iter_time": 1.2495371398925783, "loss": 0.2916523516178131, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 15.490245830005916, "step_time": 0.9835014991760255} +{"epoch": 0, "iter": 10967, "iter_tflops": 27.57316915488875, "iter_time": 0.5525182800292969, "loss": 0.14666281640529633, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 29.412532678351536, "step_time": 0.5179655952453613} +{"epoch": 0, "iter": 10968, "iter_tflops": 26.210033226277975, "iter_time": 0.5812537460327148, "loss": 0.3363998830318451, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.742117567988014, "step_time": 0.5491534652709961} +{"epoch": 0, "iter": 10969, "iter_tflops": 28.632977722823895, "iter_time": 0.7205360794067384, "loss": 0.2584632933139801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.29887467979751, "step_time": 0.6809194641113281} +{"epoch": 0, "iter": 10970, "iter_tflops": 15.586807651529842, "iter_time": 1.3236253356933594, "loss": 0.21656730771064758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.135508887375153, "step_time": 0.9320361061096193} +{"epoch": 0, "iter": 10971, "iter_tflops": 46.98398685549974, "iter_time": 0.4391090431213379, "loss": 0.2971683740615845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.97213176119601, "step_time": 0.4047524166107178} +{"epoch": 0, "iter": 10972, "iter_tflops": 48.65275121120691, "iter_time": 0.4240478286743164, "loss": 0.17382024228572845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05914318488793, "step_time": 0.38883201408386225} +{"epoch": 0, "iter": 10973, "iter_tflops": 20.751998855526857, "iter_time": 0.9941737976074219, "loss": 0.6213080883026123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.67928822031518, "step_time": 0.9516499481201172} +{"epoch": 0, "iter": 10974, "iter_tflops": 14.159880090037287, "iter_time": 1.4570104675292967, "loss": 0.5529426336288452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.432972193782902, "step_time": 1.1192494239807127} +{"epoch": 0, "iter": 10975, "iter_tflops": 39.21650782005233, "iter_time": 0.5260818634033202, "loss": 0.6524434685707092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94634702957412, "step_time": 0.4803922786712646} +{"epoch": 0, "iter": 10976, "iter_tflops": 40.49693060657016, "iter_time": 0.5094483261108399, "loss": 0.7161975502967834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.855353091671766, "step_time": 0.47043501091003415} +{"epoch": 0, "iter": 10977, "iter_tflops": 35.0317141291971, "iter_time": 0.5889261779785155, "loss": 0.6023639440536499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.44070874055656, "step_time": 0.5366990928649903} +{"epoch": 0, "iter": 10978, "iter_tflops": 35.30454550502043, "iter_time": 0.5843749923706055, "loss": 0.6808117628097534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39254029811131, "step_time": 0.5373724517822266} +{"epoch": 0, "iter": 10979, "iter_tflops": 40.084020434440745, "iter_time": 0.5146962127685547, "loss": 0.8092491030693054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34293336424428, "step_time": 0.4759967060089111} +{"epoch": 0, "iter": 10980, "iter_tflops": 38.18811216723439, "iter_time": 0.54024910736084, "loss": 0.6709077954292297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.326725819559364, "step_time": 0.4992191638946533} +{"epoch": 0, "iter": 10981, "iter_tflops": 26.94830908647639, "iter_time": 0.7655802612304687, "loss": 0.14753609895706177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.142032068898363, "step_time": 0.707949722290039} +{"epoch": 0, "iter": 10982, "iter_tflops": 45.36946413026754, "iter_time": 0.45473522567749025, "loss": 0.19843563437461853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.133635593006815, "step_time": 0.40347402000427246} +{"epoch": 0, "iter": 10983, "iter_tflops": 49.07979983480273, "iter_time": 0.42035814285278317, "loss": 0.15287815034389496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.1349262827423, "step_time": 0.3882774467468262} +{"epoch": 0, "iter": 10984, "iter_tflops": 46.68901971263586, "iter_time": 0.44188320159912103, "loss": 0.1904703825712204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22939384929606, "step_time": 0.4107374572753906} +{"epoch": 0, "iter": 10985, "iter_tflops": 42.18677755773915, "iter_time": 0.4890417022705078, "loss": 0.018742162734270096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.84333570444969, "step_time": 0.45003473663330074} +{"epoch": 0, "iter": 10986, "iter_tflops": 51.0415058052124, "iter_time": 0.40420228958129883, "loss": 0.019853642210364342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.925211341503726, "step_time": 0.36242453956604004} +{"epoch": 0, "iter": 10987, "iter_tflops": 57.75251751039097, "iter_time": 0.35723279953002934, "loss": 0.014712866395711899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.20587083481485, "step_time": 0.32641103172302244} +{"epoch": 0, "iter": 10988, "iter_tflops": 56.701630695027056, "iter_time": 0.3638536186218262, "loss": 0.04838702827692032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.71983816500789, "step_time": 0.33427005195617676} +{"epoch": 0, "iter": 10989, "iter_tflops": 30.244388374139827, "iter_time": 0.6821461639404296, "loss": 0.16745510697364807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.18357098767773, "step_time": 0.6410442619323731} +{"epoch": 0, "iter": 10990, "iter_tflops": 20.213002258017116, "iter_time": 1.0206842727661134, "loss": 0.14174708724021912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.33116751392739, "step_time": 0.8479286289215089} +{"epoch": 0, "iter": 10991, "iter_tflops": 47.310588718547535, "iter_time": 0.4360777168273926, "loss": 0.2083059549331665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.341309370466945, "step_time": 0.4018419818878174} +{"epoch": 0, "iter": 10992, "iter_tflops": 54.527998460109906, "iter_time": 0.3783577995300293, "loss": 0.19273684918880463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.23411267956351, "step_time": 0.3482975025177002} +{"epoch": 0, "iter": 10993, "iter_tflops": 30.741188383936723, "iter_time": 0.671122184753418, "loss": 0.004578865598887205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.69903355614062, "step_time": 0.6309389381408691} +{"epoch": 0, "iter": 10994, "iter_tflops": 15.437214655560323, "iter_time": 1.3364518127441407, "loss": 0.0051082768477499485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.249781285329277, "step_time": 1.018830436706543} +{"epoch": 0, "iter": 10995, "iter_tflops": 50.16507242637964, "iter_time": 0.4112641029357911, "loss": 0.011609284207224846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.446527647098556, "step_time": 0.3720899105072021} +{"epoch": 0, "iter": 10996, "iter_tflops": 54.57803444690302, "iter_time": 0.378010929107666, "loss": 0.003479183418676257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.79705869275005, "step_time": 0.345018533706665} +{"epoch": 0, "iter": 10997, "iter_tflops": 36.24914479440341, "iter_time": 0.5691470413208007, "loss": 0.7951765656471252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8203435196602, "step_time": 0.5314505653381347} +{"epoch": 0, "iter": 10998, "iter_tflops": 43.59395156458716, "iter_time": 0.4732558708190919, "loss": 0.8046214580535889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99934771916952, "step_time": 0.43896552848815923} +{"epoch": 0, "iter": 10999, "iter_tflops": 43.89870780212987, "iter_time": 0.46997040557861325, "loss": 0.756417989730835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20974773734999, "step_time": 0.4370091876983642} +{"epoch": 0, "iter": 11000, "iter_tflops": 44.12938067431277, "iter_time": 0.4675137786865235, "loss": 0.7739554047584534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68835009987064, "step_time": 0.4326233444213867} +{"epoch": 0, "iter": 11001, "iter_tflops": 30.163357775354417, "iter_time": 0.6839786758422851, "loss": 0.7142225503921509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.1431877933232, "step_time": 0.6418496398925781} +{"epoch": 0, "iter": 11002, "iter_tflops": 12.89543032301899, "iter_time": 1.5998763122558595, "loss": 0.5468354821205139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.809417342696472, "step_time": 1.304987594604492} +{"epoch": 0, "iter": 11003, "iter_tflops": 36.11006203864852, "iter_time": 0.5713391876220704, "loss": 0.6330192685127258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56908732271103, "step_time": 0.5213942222595215} +{"epoch": 0, "iter": 11004, "iter_tflops": 33.77917126594513, "iter_time": 0.6107637557983399, "loss": 0.6573932766914368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.64956576251023, "step_time": 0.5629287300109863} +{"epoch": 0, "iter": 11005, "iter_tflops": 26.67528007239226, "iter_time": 0.7734161911010743, "loss": 0.6733512878417969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.589523843803477, "step_time": 0.697243173599243} +{"epoch": 0, "iter": 11006, "iter_tflops": 38.927485309826324, "iter_time": 0.5299878311157227, "loss": 0.5411103963851929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54284706993496, "step_time": 0.48494858551025394} +{"epoch": 0, "iter": 11007, "iter_tflops": 37.8755241640641, "iter_time": 0.5447078018188477, "loss": 0.6365611553192139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10933020331309, "step_time": 0.5018591499328613} +{"epoch": 0, "iter": 11008, "iter_tflops": 36.6046834211198, "iter_time": 0.5636189575195312, "loss": 0.6541303396224976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72098509990965, "step_time": 0.5194003486633301} +{"epoch": 0, "iter": 11009, "iter_tflops": 22.602720629135845, "iter_time": 0.912770362854004, "loss": 0.2531243860721588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.10426522906707, "step_time": 0.8559104919433593} +{"epoch": 0, "iter": 11010, "iter_tflops": 9.860429970473739, "iter_time": 2.092311752319336, "loss": 0.3774985373020172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.75821272228222, "step_time": 1.7546113510131836} +{"epoch": 0, "iter": 11011, "iter_tflops": 20.739874366392186, "iter_time": 0.9947549896240234, "loss": 0.3599957525730133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.495787337525137, "step_time": 0.8780762786865234} +{"epoch": 0, "iter": 11012, "iter_tflops": 45.32234727391555, "iter_time": 0.45520796585083007, "loss": 0.3219272196292877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0812389418117, "step_time": 0.420345817565918} +{"epoch": 0, "iter": 11013, "iter_tflops": 20.39358467953323, "iter_time": 0.7771308975219726, "loss": 0.23318663239479065, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 21.564740526022376, "step_time": 0.7349258270263671} +{"epoch": 0, "iter": 11014, "iter_tflops": 9.896865910187493, "iter_time": 1.601363998413086, "loss": 0.17632238566875458, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 12.057012169269443, "step_time": 1.3144620361328125} +{"epoch": 0, "iter": 11015, "iter_tflops": 23.525695322046175, "iter_time": 0.6736670074462892, "loss": 0.3175601661205292, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.42188909441748, "step_time": 0.6234188461303711} +{"epoch": 0, "iter": 11016, "iter_tflops": 25.829211284749736, "iter_time": 0.6135876388549806, "loss": 0.37653934955596924, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 27.62704661741673, "step_time": 0.5736583061218262} +{"epoch": 0, "iter": 11017, "iter_tflops": 33.09573428927716, "iter_time": 0.6233762130737306, "loss": 0.1621200144290924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.58083335205686, "step_time": 0.5639864273071289} +{"epoch": 0, "iter": 11018, "iter_tflops": 47.23384781806002, "iter_time": 0.43678621292114256, "loss": 0.11731984466314316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.565542863041074, "step_time": 0.4000945663452149} +{"epoch": 0, "iter": 11019, "iter_tflops": 48.26596686362058, "iter_time": 0.42744598007202145, "loss": 0.15490660071372986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37770058181393, "step_time": 0.39389078330993654} +{"epoch": 0, "iter": 11020, "iter_tflops": 48.63916210724386, "iter_time": 0.4241663017272949, "loss": 0.12933842837810516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.83698365874466, "step_time": 0.39046690559387204} +{"epoch": 0, "iter": 11021, "iter_tflops": 42.160287147740824, "iter_time": 0.4893489799499512, "loss": 0.625076949596405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.64809662678498, "step_time": 0.4519595565795898} +{"epoch": 0, "iter": 11022, "iter_tflops": 9.083276695713966, "iter_time": 2.271327209472656, "loss": 0.7521596550941467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.696731919923312, "step_time": 1.9287286682128908} +{"epoch": 0, "iter": 11023, "iter_tflops": 11.358543695352012, "iter_time": 1.8163502349853515, "loss": 0.7267675399780273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.168467085829553, "step_time": 1.4561274261474613} +{"epoch": 0, "iter": 11024, "iter_tflops": 38.43249762140529, "iter_time": 0.5368137588500976, "loss": 0.6094473004341125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32178026672551, "step_time": 0.48748170280456543} +{"epoch": 0, "iter": 11025, "iter_tflops": 10.873514351137375, "iter_time": 1.348483383178711, "loss": 0.3186119794845581, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 11.548183931122697, "step_time": 1.2697021026611328} +{"epoch": 0, "iter": 11026, "iter_tflops": 12.712849851547563, "iter_time": 1.1533805236816406, "loss": 0.22900322079658508, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 14.437511139278781, "step_time": 1.0156011848449709} +{"epoch": 0, "iter": 11027, "iter_tflops": 26.483112694100083, "iter_time": 0.5536642761230468, "loss": 0.2149726301431656, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.219889661208725, "step_time": 0.5195893249511718} +{"epoch": 0, "iter": 11028, "iter_tflops": 27.8415294438365, "iter_time": 0.5266504287719727, "loss": 0.2846893072128296, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 29.523593682063453, "step_time": 0.49664527893066407} +{"epoch": 0, "iter": 11029, "iter_tflops": 40.70175228614596, "iter_time": 0.506884651184082, "loss": 0.043052878230810165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25189634695843, "step_time": 0.4662194213867187} +{"epoch": 0, "iter": 11030, "iter_tflops": 9.127370148675983, "iter_time": 2.260354644775391, "loss": 0.03000829741358757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.45922019845669, "step_time": 1.800392448425293} +{"epoch": 0, "iter": 11031, "iter_tflops": 21.54473684171542, "iter_time": 0.9575932006835937, "loss": 0.07551181316375732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.797578555496337, "step_time": 0.8319801654815674} +{"epoch": 0, "iter": 11032, "iter_tflops": 39.802697486614456, "iter_time": 0.5183340530395507, "loss": 0.030579516664147377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.041537422511006, "step_time": 0.4684462604522705} +{"epoch": 0, "iter": 11033, "iter_tflops": 15.533122942771596, "iter_time": 1.0282139129638672, "loss": 0.20604467391967773, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 16.771827904478148, "step_time": 0.9522738494873048} +{"epoch": 0, "iter": 11034, "iter_tflops": 21.627547338039285, "iter_time": 0.738473617553711, "loss": 0.2241741418838501, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.043783232146968, "step_time": 0.6642620658874512} +{"epoch": 0, "iter": 11035, "iter_tflops": 27.265015120675013, "iter_time": 0.5857826614379883, "loss": 0.1139478012919426, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 29.060616500278638, "step_time": 0.5495882415771485} +{"epoch": 0, "iter": 11036, "iter_tflops": 29.001372249435473, "iter_time": 0.5507109451293944, "loss": 0.21157515048980713, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.87280243959707, "step_time": 0.5173282585144042} +{"epoch": 0, "iter": 11037, "iter_tflops": 18.30456924119853, "iter_time": 1.1271007385253906, "loss": 0.3297319710254669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.01452895170162, "step_time": 1.0850173339843752} +{"epoch": 0, "iter": 11038, "iter_tflops": 12.793333780072015, "iter_time": 1.6126440429687496, "loss": 0.5540390014648438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.306459365167367, "step_time": 1.2652098808288574} +{"epoch": 0, "iter": 11039, "iter_tflops": 35.72491323327565, "iter_time": 0.5774987716674805, "loss": 0.43345925211906433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13220723501191, "step_time": 0.5272151756286622} +{"epoch": 0, "iter": 11040, "iter_tflops": 35.801231794097745, "iter_time": 0.5762677001953125, "loss": 0.5234987139701843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.23710677985354, "step_time": 0.5258056774139405} +{"epoch": 0, "iter": 11041, "iter_tflops": 19.643979351332813, "iter_time": 1.050250213623047, "loss": 0.45286744832992554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.851407613436937, "step_time": 0.9894340896606445} +{"epoch": 0, "iter": 11042, "iter_tflops": 8.955526947922758, "iter_time": 2.303727478027344, "loss": 0.37698280811309814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.537287880599566, "step_time": 1.957913055419922} +{"epoch": 0, "iter": 11043, "iter_tflops": 14.241270421629123, "iter_time": 1.4486835021972657, "loss": 0.43065041303634644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.306506351282366, "step_time": 1.1269814739227295} +{"epoch": 0, "iter": 11044, "iter_tflops": 38.773910815453746, "iter_time": 0.5320869903564454, "loss": 0.48903971910476685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37911857878111, "step_time": 0.48682214736938473} +{"epoch": 0, "iter": 11045, "iter_tflops": 19.635203415085854, "iter_time": 0.7342895736694337, "loss": 0.1466292291879654, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 21.400716074763395, "step_time": 0.6737122764587402} +{"epoch": 0, "iter": 11046, "iter_tflops": 20.27046023588995, "iter_time": 0.7112776412963867, "loss": 0.2934585511684418, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 21.82618155117776, "step_time": 0.6605793647766114} +{"epoch": 0, "iter": 11047, "iter_tflops": 21.862574921335177, "iter_time": 0.6594797363281251, "loss": 0.23846513032913208, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.625897396336896, "step_time": 0.6102593650817871} +{"epoch": 0, "iter": 11048, "iter_tflops": 22.01737734001711, "iter_time": 0.6548429870605469, "loss": 0.224343404173851, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.710795461241343, "step_time": 0.608074291229248} +{"epoch": 0, "iter": 11049, "iter_tflops": 16.565947787329062, "iter_time": 1.2453916778564453, "loss": 0.37851500511169434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.714779894634795, "step_time": 1.1646260147094725} +{"epoch": 0, "iter": 11050, "iter_tflops": 22.19787986373137, "iter_time": 0.9294172973632812, "loss": 0.5705945491790771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.16100977142947, "step_time": 0.7595849227905273} +{"epoch": 0, "iter": 11051, "iter_tflops": 49.06938563874167, "iter_time": 0.42044735717773435, "loss": 0.4281282126903534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.07614650909745, "step_time": 0.38870744895935055} +{"epoch": 0, "iter": 11052, "iter_tflops": 50.71294985537662, "iter_time": 0.40682101058959963, "loss": 0.3824501037597656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.690228194460765, "step_time": 0.37723546218872067} +{"epoch": 0, "iter": 11053, "iter_tflops": 28.299458774907485, "iter_time": 0.729027847290039, "loss": 0.16881699860095978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.88162167063067, "step_time": 0.6904275054931641} +{"epoch": 0, "iter": 11054, "iter_tflops": 17.061906267170738, "iter_time": 1.2091904144287111, "loss": 0.207929328083992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.80618152510834, "step_time": 1.0970378799438476} +{"epoch": 0, "iter": 11055, "iter_tflops": 38.31828600237497, "iter_time": 0.5384137878417968, "loss": 0.1455293595790863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.217756393619844, "step_time": 0.4886828498840332} +{"epoch": 0, "iter": 11056, "iter_tflops": 41.69511034889459, "iter_time": 0.494808464050293, "loss": 0.2716902494430542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.47297736978705, "step_time": 0.4537000808715821} +{"epoch": 0, "iter": 11057, "iter_tflops": 18.96094301990127, "iter_time": 1.088083724975586, "loss": 0.6591173410415649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.284639567170363, "step_time": 1.0170796203613282} +{"epoch": 0, "iter": 11058, "iter_tflops": 13.864789831716841, "iter_time": 1.4880206451416014, "loss": 0.7059583067893982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.341739483829045, "step_time": 1.2624784240722655} +{"epoch": 0, "iter": 11059, "iter_tflops": 38.234563917215596, "iter_time": 0.5395927505493164, "loss": 0.7219477295875549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.50137497944672, "step_time": 0.49711831283569324} +{"epoch": 0, "iter": 11060, "iter_tflops": 39.90905179018114, "iter_time": 0.5169527359008789, "loss": 0.6724611520767212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32395226511888, "step_time": 0.47620524978637696} +{"epoch": 0, "iter": 11061, "iter_tflops": 21.96049969783427, "iter_time": 0.930016960144043, "loss": 0.072283536195755, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 23.39079192890936, "step_time": 0.8731485977172853} +{"epoch": 0, "iter": 11062, "iter_tflops": 7.745175443893269, "iter_time": 2.6369495849609375, "loss": 0.10026400536298752, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 8.949101354512658, "step_time": 2.2821997833251957} +{"epoch": 0, "iter": 11063, "iter_tflops": 13.028908908706173, "iter_time": 1.5675631256103515, "loss": 0.04784436151385307, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 15.920123049723662, "step_time": 1.282881866455078} +{"epoch": 0, "iter": 11064, "iter_tflops": 31.250364107863113, "iter_time": 0.6535487747192383, "loss": 0.0631934180855751, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 35.186168032833194, "step_time": 0.5804450531005859} +{"epoch": 0, "iter": 11065, "iter_tflops": 14.472492619221287, "iter_time": 1.0781028442382814, "loss": 0.19971562922000885, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.646449791546413, "step_time": 0.9972125091552735} +{"epoch": 0, "iter": 11066, "iter_tflops": 12.650941771540579, "iter_time": 1.2333339080810548, "loss": 0.27346286177635193, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 17.457722090938027, "step_time": 0.8937497901916505} +{"epoch": 0, "iter": 11067, "iter_tflops": 27.504234129402086, "iter_time": 0.5672884902954102, "loss": 0.4158432185649872, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.13119166266118, "step_time": 0.5356058082580567} +{"epoch": 0, "iter": 11068, "iter_tflops": 29.174378541287275, "iter_time": 0.5348129501342774, "loss": 0.3335442543029785, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.993030799746062, "step_time": 0.5034304504394531} +{"epoch": 0, "iter": 11069, "iter_tflops": 27.112784515497967, "iter_time": 0.7609359893798827, "loss": 0.3957621157169342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.597210809953083, "step_time": 0.7214372634887695} +{"epoch": 0, "iter": 11070, "iter_tflops": 11.17404036870151, "iter_time": 1.8463414154052735, "loss": 0.3995577096939087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.674571368300253, "step_time": 1.2372787914276124} +{"epoch": 0, "iter": 11071, "iter_tflops": 33.362372627266346, "iter_time": 0.6183940734863281, "loss": 0.48163726925849915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.51117731472236, "step_time": 0.5650624008178711} +{"epoch": 0, "iter": 11072, "iter_tflops": 38.06969866366017, "iter_time": 0.5419295196533204, "loss": 0.48529788851737976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62242380789752, "step_time": 0.49567256355285644} +{"epoch": 0, "iter": 11073, "iter_tflops": 30.349925765021517, "iter_time": 0.6797741012573242, "loss": 0.43511712551116943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.6825641808769, "step_time": 0.6125155258178712} +{"epoch": 0, "iter": 11074, "iter_tflops": 40.6750633716714, "iter_time": 0.5072172431945801, "loss": 0.3839728534221649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.84093751544661, "step_time": 0.46009505271911627} +{"epoch": 0, "iter": 11075, "iter_tflops": 37.65505866598763, "iter_time": 0.5478969955444335, "loss": 0.4436538517475128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.31496989511113, "step_time": 0.4993612136840821} +{"epoch": 0, "iter": 11076, "iter_tflops": 39.93051683523442, "iter_time": 0.5166748428344726, "loss": 0.3861273229122162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.58790022639104, "step_time": 0.4733215732574463} +{"epoch": 0, "iter": 11077, "iter_tflops": 21.360503736443754, "iter_time": 0.9658523864746092, "loss": 0.29854246973991394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.16405555717965, "step_time": 0.8906511840820311} +{"epoch": 0, "iter": 11078, "iter_tflops": 21.231476220207455, "iter_time": 0.9717220458984374, "loss": 0.3160281479358673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.89646535555489, "step_time": 0.8286756057739258} +{"epoch": 0, "iter": 11079, "iter_tflops": 48.15433414168619, "iter_time": 0.4284368972778321, "loss": 0.2767150402069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.31324333094442, "step_time": 0.39437611198425293} +{"epoch": 0, "iter": 11080, "iter_tflops": 46.80677425572155, "iter_time": 0.4407715301513672, "loss": 0.44753190875053406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.428296498010134, "step_time": 0.40911739921569823} +{"epoch": 0, "iter": 11081, "iter_tflops": 22.17569317269087, "iter_time": 0.9303471755981445, "loss": 0.7196885943412781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.31668250189459, "step_time": 0.8848211364746094} +{"epoch": 0, "iter": 11082, "iter_tflops": 14.993670251061346, "iter_time": 1.3759868774414064, "loss": 0.6703467965126038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.980925971873962, "step_time": 1.0869381999969483} +{"epoch": 0, "iter": 11083, "iter_tflops": 40.43220671621024, "iter_time": 0.5102638511657716, "loss": 0.695343554019928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61061982974302, "step_time": 0.4730749893188477} +{"epoch": 0, "iter": 11084, "iter_tflops": 42.82041842708147, "iter_time": 0.48180504226684573, "loss": 0.622631847858429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02256125468037, "step_time": 0.44828216743469235} +{"epoch": 0, "iter": 11085, "iter_tflops": 33.246388252064975, "iter_time": 0.6205514221191406, "loss": 0.44524139165878296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.4806058868724, "step_time": 0.5814752311706544} +{"epoch": 0, "iter": 11086, "iter_tflops": 12.50184055480576, "iter_time": 1.6502444915771486, "loss": 0.4017498791217804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.63477665315245, "step_time": 1.409730670928955} +{"epoch": 0, "iter": 11087, "iter_tflops": 36.4061486398382, "iter_time": 0.5666925582885742, "loss": 0.2686018943786621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.77919339747676, "step_time": 0.5186403179168702} +{"epoch": 0, "iter": 11088, "iter_tflops": 39.67928005704201, "iter_time": 0.5199462661743164, "loss": 0.2549252212047577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.1108522788208, "step_time": 0.4785591659545898} +{"epoch": 0, "iter": 11089, "iter_tflops": 29.722773674642088, "iter_time": 0.6941173706054686, "loss": 0.7868971228599548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.749381043187334, "step_time": 0.6299689598083497} +{"epoch": 0, "iter": 11090, "iter_tflops": 33.52449133489363, "iter_time": 0.6154036254882812, "loss": 0.8023453950881958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.41388071910764, "step_time": 0.5665722274780274} +{"epoch": 0, "iter": 11091, "iter_tflops": 38.58638744092496, "iter_time": 0.5346728439331054, "loss": 0.8088117837905884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.214804609578266, "step_time": 0.48871702003479006} +{"epoch": 0, "iter": 11092, "iter_tflops": 38.78167863904138, "iter_time": 0.5319804153442383, "loss": 0.8182240128517151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.956860646721395, "step_time": 0.49172157287597656} +{"epoch": 0, "iter": 11093, "iter_tflops": 31.575685805912165, "iter_time": 0.6533854446411133, "loss": 0.6948720216751099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.63015533065212, "step_time": 0.595755153656006} +{"epoch": 0, "iter": 11094, "iter_tflops": 36.05564051608695, "iter_time": 0.5722015533447264, "loss": 0.8071720004081726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.1965038735744, "step_time": 0.5263503494262695} +{"epoch": 0, "iter": 11095, "iter_tflops": 37.85758638956272, "iter_time": 0.5449658966064453, "loss": 0.8608381748199463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3485057169016, "step_time": 0.4989562053680419} +{"epoch": 0, "iter": 11096, "iter_tflops": 38.29372181622632, "iter_time": 0.538759162902832, "loss": 0.7566784620285034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72253389934914, "step_time": 0.4944832344055175} +{"epoch": 0, "iter": 11097, "iter_tflops": 17.959355479223888, "iter_time": 1.1487658081054688, "loss": 0.3950292468070984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.292967977176357, "step_time": 1.0693582000732422} +{"epoch": 0, "iter": 11098, "iter_tflops": 32.972840321095596, "iter_time": 0.6256996154785156, "loss": 0.377824604511261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.98989607319958, "step_time": 0.5577494316101074} +{"epoch": 0, "iter": 11099, "iter_tflops": 49.66634428406011, "iter_time": 0.41539384078979497, "loss": 0.47766703367233276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.186492239715214, "step_time": 0.3807423706054688} +{"epoch": 0, "iter": 11100, "iter_tflops": 44.50186998607869, "iter_time": 0.46360059738159176, "loss": 0.325379878282547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90940622976535, "step_time": 0.43062720108032226} +{"epoch": 0, "iter": 11101, "iter_tflops": 28.337860368122367, "iter_time": 0.7280399169921875, "loss": 0.8640632629394531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.143820710634937, "step_time": 0.6844219818115235} +{"epoch": 0, "iter": 11102, "iter_tflops": 10.607614843607099, "iter_time": 1.9449323730468748, "loss": 0.7220897674560547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.866278710007162, "step_time": 1.487860870361328} +{"epoch": 0, "iter": 11103, "iter_tflops": 13.648578394512382, "iter_time": 1.5115928497314455, "loss": 0.7378955483436584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.291699157651674, "step_time": 1.2663561553955078} +{"epoch": 0, "iter": 11104, "iter_tflops": 18.905540601863365, "iter_time": 1.0912723388671874, "loss": 0.5696743726730347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.021742279088, "step_time": 0.8961569137573242} +{"epoch": 0, "iter": 11105, "iter_tflops": 17.377299792743717, "iter_time": 0.8343940582275391, "loss": 0.2524077594280243, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 18.350149498898407, "step_time": 0.7901579055786132} +{"epoch": 0, "iter": 11106, "iter_tflops": 12.453042905445923, "iter_time": 1.164335159301758, "loss": 0.22147271037101746, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 15.991352323084298, "step_time": 0.9067097892761231} +{"epoch": 0, "iter": 11107, "iter_tflops": 25.674698561881588, "iter_time": 0.5647394714355469, "loss": 0.2961950898170471, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.46696756090672, "step_time": 0.5278892059326172} +{"epoch": 0, "iter": 11108, "iter_tflops": 25.19917558522217, "iter_time": 0.5753964309692383, "loss": 0.14291565120220184, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.788396962846484, "step_time": 0.541261043548584} +{"epoch": 0, "iter": 11109, "iter_tflops": 26.043930378658413, "iter_time": 0.7921651306152343, "loss": 0.29111745953559875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.568747094176793, "step_time": 0.7483507843017578} +{"epoch": 0, "iter": 11110, "iter_tflops": 13.16197868730321, "iter_time": 1.567476593017578, "loss": 0.3310536742210388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.82387465828783, "step_time": 1.226298574447632} +{"epoch": 0, "iter": 11111, "iter_tflops": 41.764129261346184, "iter_time": 0.4939907493591309, "loss": 0.43893250823020935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.69133001644437, "step_time": 0.45153190994262704} +{"epoch": 0, "iter": 11112, "iter_tflops": 42.293522398499384, "iter_time": 0.48780740737915035, "loss": 0.34267544746398926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88714992774103, "step_time": 0.44960503196716306} +{"epoch": 0, "iter": 11113, "iter_tflops": 17.915657138624542, "iter_time": 1.1515677795410155, "loss": 0.5297803282737732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.99622709273291, "step_time": 1.0860626907348632} +{"epoch": 0, "iter": 11114, "iter_tflops": 13.361049272152098, "iter_time": 1.5441222534179688, "loss": 0.4368140697479248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.307708706291432, "step_time": 1.1920176067352295} +{"epoch": 0, "iter": 11115, "iter_tflops": 43.155205836003226, "iter_time": 0.47806731796264645, "loss": 0.6131401062011719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13618527870964, "step_time": 0.43769120025634767} +{"epoch": 0, "iter": 11116, "iter_tflops": 41.016534989220034, "iter_time": 0.5029945487976074, "loss": 0.6369492411613464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.717345534734974, "step_time": 0.46136668586730956} +{"epoch": 0, "iter": 11117, "iter_tflops": 23.658224651385773, "iter_time": 0.8720474090576171, "loss": 0.3008037507534027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.327486073513196, "step_time": 0.81457328414917} +{"epoch": 0, "iter": 11118, "iter_tflops": 19.702748212966483, "iter_time": 1.0471175537109374, "loss": 0.40919169783592224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.348323718867857, "step_time": 0.8473311653137208} +{"epoch": 0, "iter": 11119, "iter_tflops": 43.0977372475533, "iter_time": 0.4787047958374023, "loss": 0.3206077516078949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.37112874810993, "step_time": 0.44491247177124027} +{"epoch": 0, "iter": 11120, "iter_tflops": 45.107885108077724, "iter_time": 0.4573722190856933, "loss": 0.505433201789856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.90437261063535, "step_time": 0.4218660297393799} +{"epoch": 0, "iter": 11121, "iter_tflops": 24.672950344099444, "iter_time": 0.8361826705932617, "loss": 0.0020969132892787457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.958418652578864, "step_time": 0.7947746658325195} +{"epoch": 0, "iter": 11122, "iter_tflops": 26.19527238303924, "iter_time": 0.7875884323120117, "loss": 0.005927844438701868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.05763539907442, "step_time": 0.6435625476837158} +{"epoch": 0, "iter": 11123, "iter_tflops": 56.290730735947406, "iter_time": 0.3665096054077148, "loss": 0.004899319726973772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.78504686629381, "step_time": 0.33391725921630866} +{"epoch": 0, "iter": 11124, "iter_tflops": 57.73904927967333, "iter_time": 0.35731612777709965, "loss": 0.0069693829864263535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.10763793925223, "step_time": 0.3269191207885742} +{"epoch": 0, "iter": 11125, "iter_tflops": 32.05817972247065, "iter_time": 0.6435516204833984, "loss": 0.002677335636690259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.12470650167106, "step_time": 0.6045793685913086} +{"epoch": 0, "iter": 11126, "iter_tflops": 12.688238674969144, "iter_time": 1.626001373291016, "loss": 0.008268601261079311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.670918791947715, "step_time": 1.5091226730346678} +{"epoch": 0, "iter": 11127, "iter_tflops": 17.357061838064777, "iter_time": 1.1886282196044922, "loss": 0.004167307633906603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.604140570994286, "step_time": 1.0013081321716308} +{"epoch": 0, "iter": 11128, "iter_tflops": 43.765601319960325, "iter_time": 0.4713997497558594, "loss": 0.002067454159259796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.587913546018825, "step_time": 0.4246136951446533} +{"epoch": 0, "iter": 11129, "iter_tflops": 9.957964775852837, "iter_time": 1.36196142578125, "loss": 0.3678232729434967, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 10.551520978189677, "step_time": 1.2853468170166014} +{"epoch": 0, "iter": 11130, "iter_tflops": 9.666871706906752, "iter_time": 1.4029734039306643, "loss": 0.17870554327964783, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 12.06486381536318, "step_time": 1.124120761871338} +{"epoch": 0, "iter": 11131, "iter_tflops": 20.339010808389666, "iter_time": 0.6668153152465821, "loss": 0.16890542209148407, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 21.8266733651763, "step_time": 0.621366512298584} +{"epoch": 0, "iter": 11132, "iter_tflops": 20.787662646432366, "iter_time": 0.6524237060546875, "loss": 0.2714978754520416, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 22.354539813611762, "step_time": 0.6066939430236816} +{"epoch": 0, "iter": 11133, "iter_tflops": 21.38490420458531, "iter_time": 0.9647503356933593, "loss": 0.45646587014198303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.98244160838377, "step_time": 0.89768936920166} +{"epoch": 0, "iter": 11134, "iter_tflops": 19.48779038030725, "iter_time": 1.0586676635742187, "loss": 0.6144001483917236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.45215118764555, "step_time": 0.8797100677490235} +{"epoch": 0, "iter": 11135, "iter_tflops": 44.48305520517675, "iter_time": 0.46379668426513676, "loss": 0.38839322328567505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8304770262505, "step_time": 0.4313378162384033} +{"epoch": 0, "iter": 11136, "iter_tflops": 44.13404378383045, "iter_time": 0.4674643821716309, "loss": 0.3472704589366913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.43474192572928, "step_time": 0.4349363498687744} +{"epoch": 0, "iter": 11137, "iter_tflops": 30.320560547638774, "iter_time": 0.6804324569702147, "loss": 0.0016123611712828279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.188483179029866, "step_time": 0.6409464340209962} +{"epoch": 0, "iter": 11138, "iter_tflops": 29.75009634726776, "iter_time": 0.6934798889160156, "loss": 0.0008800199721008539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.513528789725015, "step_time": 0.6156049289703369} +{"epoch": 0, "iter": 11139, "iter_tflops": 44.07459847425099, "iter_time": 0.4680948715209961, "loss": 0.001735767349600792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.29490612066018, "step_time": 0.4271898460388184} +{"epoch": 0, "iter": 11140, "iter_tflops": 47.678614229080026, "iter_time": 0.4327116851806641, "loss": 0.015094838105142117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63536592112018, "step_time": 0.39196257400512696} +{"epoch": 0, "iter": 11141, "iter_tflops": 11.687645308793533, "iter_time": 1.2266246185302734, "loss": 0.015267613343894482, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 12.56798247144389, "step_time": 1.140704444885254} +{"epoch": 0, "iter": 11142, "iter_tflops": 12.43009364550819, "iter_time": 1.1533584442138674, "loss": 0.0034408539067953825, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 15.024595595449306, "step_time": 0.9541923027038574} +{"epoch": 0, "iter": 11143, "iter_tflops": 32.154648203358676, "iter_time": 0.44585633087158205, "loss": 0.008875733241438866, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 35.566664213729204, "step_time": 0.40308400535583494} +{"epoch": 0, "iter": 11144, "iter_tflops": 31.10098171645103, "iter_time": 0.46096144485473634, "loss": 0.006326900329440832, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 34.455730295874986, "step_time": 0.41608038330078123} +{"epoch": 0, "iter": 11145, "iter_tflops": 20.315270655958987, "iter_time": 1.0155460815429689, "loss": 0.2632524073123932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.614349690092435, "step_time": 0.9545091018676758} +{"epoch": 0, "iter": 11146, "iter_tflops": 25.09117297998099, "iter_time": 0.8222450790405272, "loss": 0.29656562209129333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.616426864265797, "step_time": 0.6525434894561767} +{"epoch": 0, "iter": 11147, "iter_tflops": 43.81646332370282, "iter_time": 0.4708525505065918, "loss": 0.3235795199871063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92880276124946, "step_time": 0.43045292854309086} +{"epoch": 0, "iter": 11148, "iter_tflops": 39.673158259508575, "iter_time": 0.520026496887207, "loss": 0.2432851940393448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.51103903222189, "step_time": 0.4741576843261719} +{"epoch": 0, "iter": 11149, "iter_tflops": 20.37001181836993, "iter_time": 1.0128169631958008, "loss": 0.4770229756832123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.929656535813383, "step_time": 0.9407850723266602} +{"epoch": 0, "iter": 11150, "iter_tflops": 19.7971277236573, "iter_time": 1.0421255950927735, "loss": 0.4966927468776703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.871886771706077, "step_time": 0.864242265701294} +{"epoch": 0, "iter": 11151, "iter_tflops": 45.7127871433095, "iter_time": 0.4513199653625488, "loss": 0.6104452013969421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.66269824225615, "step_time": 0.41542433738708495} +{"epoch": 0, "iter": 11152, "iter_tflops": 45.71113543383501, "iter_time": 0.4513362731933594, "loss": 0.49801531434059143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.34689167702004, "step_time": 0.4180829391479492} +{"epoch": 0, "iter": 11153, "iter_tflops": 36.52002810024399, "iter_time": 0.4586702995300293, "loss": 0.020323418080806732, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 40.074792675141495, "step_time": 0.4179847507476807} +{"epoch": 0, "iter": 11154, "iter_tflops": 10.337840207978864, "iter_time": 1.6203241577148437, "loss": 0.03332103043794632, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 11.879462086337218, "step_time": 1.4100514068603514} +{"epoch": 0, "iter": 11155, "iter_tflops": 10.700257263214386, "iter_time": 1.565443878173828, "loss": 0.03328624367713928, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 13.083923651315596, "step_time": 1.2802468643188476} +{"epoch": 0, "iter": 11156, "iter_tflops": 15.550984732174392, "iter_time": 1.0771441497802734, "loss": 0.055944010615348816, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 19.166262588718176, "step_time": 0.8739654979705811} +{"epoch": 0, "iter": 11157, "iter_tflops": 13.852753995542423, "iter_time": 1.1145194396972655, "loss": 0.21691550314426422, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 14.858474351583586, "step_time": 1.0390813522338869} +{"epoch": 0, "iter": 11158, "iter_tflops": 10.87556387563267, "iter_time": 1.419619598388672, "loss": 0.17281386256217957, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.778134430581117, "step_time": 1.2082486457824708} +{"epoch": 0, "iter": 11159, "iter_tflops": 22.75757374421868, "iter_time": 0.678418701171875, "loss": 0.3218681514263153, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.661001499566297, "step_time": 0.6260558242797852} +{"epoch": 0, "iter": 11160, "iter_tflops": 23.476596539905056, "iter_time": 0.657640625, "loss": 0.2875094711780548, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.255797045719664, "step_time": 0.6113116760253907} +{"epoch": 0, "iter": 11161, "iter_tflops": 18.992909827360617, "iter_time": 1.0862523803710937, "loss": 0.11398908495903015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.532570943588595, "step_time": 1.004798355102539} +{"epoch": 0, "iter": 11162, "iter_tflops": 13.209903644496215, "iter_time": 1.5617898559570313, "loss": 0.12759654223918915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.456347588157545, "step_time": 1.33479746055603} +{"epoch": 0, "iter": 11163, "iter_tflops": 40.59782842380675, "iter_time": 0.5081821937561035, "loss": 0.11185983568429947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71352058980974, "step_time": 0.46140615272521973} +{"epoch": 0, "iter": 11164, "iter_tflops": 38.162530728441794, "iter_time": 0.5406112518310546, "loss": 0.08901636302471161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65514489371448, "step_time": 0.49528320121765135} +{"epoch": 0, "iter": 11165, "iter_tflops": 20.987001004341323, "iter_time": 0.9830415267944337, "loss": 0.08343630284070969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.72655307050235, "step_time": 0.9077968597412109} +{"epoch": 0, "iter": 11166, "iter_tflops": 21.736190369894214, "iter_time": 0.9491586685180664, "loss": 0.1218397319316864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.043886273786494, "step_time": 0.7103420429229735} +{"epoch": 0, "iter": 11167, "iter_tflops": 38.00294452922443, "iter_time": 0.5428814468383788, "loss": 0.19670569896697998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34524057189613, "step_time": 0.4989956092834472} +{"epoch": 0, "iter": 11168, "iter_tflops": 41.059701593074486, "iter_time": 0.5024657440185547, "loss": 0.05154040828347206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.10020756113388, "step_time": 0.4574500789642334} +{"epoch": 0, "iter": 11169, "iter_tflops": 17.15298327909102, "iter_time": 1.202769989013672, "loss": 0.04930201545357704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23629209044683, "step_time": 1.1313206329345704} +{"epoch": 0, "iter": 11170, "iter_tflops": 20.799640068595057, "iter_time": 0.9918966598510743, "loss": 0.07845328003168106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.2220644451695, "step_time": 0.8179779872894287} +{"epoch": 0, "iter": 11171, "iter_tflops": 48.61462846012889, "iter_time": 0.4243803596496582, "loss": 0.08758999407291412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.691310426648535, "step_time": 0.3842538642883301} +{"epoch": 0, "iter": 11172, "iter_tflops": 51.97106401292942, "iter_time": 0.3969726982116699, "loss": 0.07010335475206375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.3583293764531, "step_time": 0.3660699977874756} +{"epoch": 0, "iter": 11173, "iter_tflops": 24.27194116483487, "iter_time": 0.849997673034668, "loss": 0.3526413142681122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.482557158376075, "step_time": 0.8096162948608399} +{"epoch": 0, "iter": 11174, "iter_tflops": 15.734696927206798, "iter_time": 1.3111846771240234, "loss": 0.5494813323020935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.151821866035462, "step_time": 1.0237830429077148} +{"epoch": 0, "iter": 11175, "iter_tflops": 48.72415412898011, "iter_time": 0.42342640686035155, "loss": 0.5363959074020386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.385076082996584, "step_time": 0.3864580707550048} +{"epoch": 0, "iter": 11176, "iter_tflops": 49.3709293761471, "iter_time": 0.4178793830871582, "loss": 0.6085141897201538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.1201260712088, "step_time": 0.38838562774658203} +{"epoch": 0, "iter": 11177, "iter_tflops": 37.018055339670425, "iter_time": 0.5573251571655273, "loss": 0.3120911717414856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.80378955707879, "step_time": 0.5183198318481446} +{"epoch": 0, "iter": 11178, "iter_tflops": 15.657191914952318, "iter_time": 1.3176752014160158, "loss": 0.23538042604923248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.4294013795519, "step_time": 1.1194662857055664} +{"epoch": 0, "iter": 11179, "iter_tflops": 41.286290008809395, "iter_time": 0.49970809936523436, "loss": 0.27138713002204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.647976335338576, "step_time": 0.3994559898376464} +{"epoch": 0, "iter": 11180, "iter_tflops": 50.923136019391755, "iter_time": 0.4051418495178223, "loss": 0.27306506037712097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.57700017030958, "step_time": 0.3712163925170898} +{"epoch": 0, "iter": 11181, "iter_tflops": 17.63565298578733, "iter_time": 1.1698514099121093, "loss": 0.552273154258728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.517217101750052, "step_time": 1.1141573486328125} +{"epoch": 0, "iter": 11182, "iter_tflops": 17.132132322405905, "iter_time": 1.2042338409423827, "loss": 0.603203296661377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.584934458199438, "step_time": 1.0022423706054688} +{"epoch": 0, "iter": 11183, "iter_tflops": 45.17273935649672, "iter_time": 0.45671557235717775, "loss": 0.8164974451065063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.89331288953869, "step_time": 0.4219614562988281} +{"epoch": 0, "iter": 11184, "iter_tflops": 48.75768795346928, "iter_time": 0.42313518905639647, "loss": 0.6974372267723083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77626637880949, "step_time": 0.39091612434387213} +{"epoch": 0, "iter": 11185, "iter_tflops": 28.51655312557877, "iter_time": 0.723477813720703, "loss": 0.6733219623565674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.271533812618625, "step_time": 0.6815344619750976} +{"epoch": 0, "iter": 11186, "iter_tflops": 16.117811045524263, "iter_time": 1.280018325805664, "loss": 0.804776668548584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.77735700525358, "step_time": 1.1605264778137205} +{"epoch": 0, "iter": 11187, "iter_tflops": 44.063180164024615, "iter_time": 0.46821617126464843, "loss": 0.7429406046867371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.467100799475666, "step_time": 0.43463984870910644} +{"epoch": 0, "iter": 11188, "iter_tflops": 41.317219380186486, "iter_time": 0.49933402633666996, "loss": 0.9698959589004517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21903579692241, "step_time": 0.46656588363647467} +{"epoch": 0, "iter": 11189, "iter_tflops": 35.9567412142631, "iter_time": 0.5737753982543945, "loss": 0.19624042510986328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.493966511126786, "step_time": 0.5359565505981446} +{"epoch": 0, "iter": 11190, "iter_tflops": 9.841415151179232, "iter_time": 2.0963543548583985, "loss": 0.19974492490291595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.902773556393653, "step_time": 1.5989657897949219} +{"epoch": 0, "iter": 11191, "iter_tflops": 15.759861411351917, "iter_time": 1.309091049194336, "loss": 0.21139754354953766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.306568471737187, "step_time": 1.1269776496887207} +{"epoch": 0, "iter": 11192, "iter_tflops": 18.80076648508395, "iter_time": 1.0973538513183594, "loss": 0.1945239007472992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.34451020983048, "step_time": 0.8474638977050781} +{"epoch": 0, "iter": 11193, "iter_tflops": 12.167808955221535, "iter_time": 1.1782198028564452, "loss": 0.23722855746746063, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 13.100545595373482, "step_time": 1.0943325500488281} +{"epoch": 0, "iter": 11194, "iter_tflops": 14.39790927597547, "iter_time": 0.9957246704101562, "loss": 0.2615695297718048, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 15.896382404444465, "step_time": 0.9018626441955566} +{"epoch": 0, "iter": 11195, "iter_tflops": 24.407094558345353, "iter_time": 0.5873846817016601, "loss": 0.2761411964893341, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 25.934374934331093, "step_time": 0.5527934837341308} +{"epoch": 0, "iter": 11196, "iter_tflops": 25.94435851267829, "iter_time": 0.5525807647705079, "loss": 0.12934912741184235, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 27.59864662122973, "step_time": 0.5194585685729981} +{"epoch": 0, "iter": 11197, "iter_tflops": 43.71373719295753, "iter_time": 0.47195904159545904, "loss": 0.006033935584127903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63526795045596, "step_time": 0.4331054363250733} +{"epoch": 0, "iter": 11198, "iter_tflops": 14.924215970322841, "iter_time": 1.382390441894531, "loss": 0.0018094859551638365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.689084893566466, "step_time": 1.1039113807678225} +{"epoch": 0, "iter": 11199, "iter_tflops": 55.648624102898054, "iter_time": 0.37073860931396485, "loss": 0.005650350358337164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.12619026039112, "step_time": 0.337516429901123} +{"epoch": 0, "iter": 11200, "iter_tflops": 63.010582396859625, "iter_time": 0.3274226760864257, "loss": 0.003135062288492918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.31198365588091, "step_time": 0.2976555051803589} +{"epoch": 0, "iter": 11201, "iter_tflops": 34.64124661510696, "iter_time": 0.5955644073486328, "loss": 0.40747830271720886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.09543961465251, "step_time": 0.5561625289916993} +{"epoch": 0, "iter": 11202, "iter_tflops": 38.43394569666969, "iter_time": 0.5367935333251953, "loss": 0.4119071066379547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37847169837964, "step_time": 0.4868295783996582} +{"epoch": 0, "iter": 11203, "iter_tflops": 48.39099736826931, "iter_time": 0.4263415641784668, "loss": 0.4045407176017761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51523659275322, "step_time": 0.39285919380187984} +{"epoch": 0, "iter": 11204, "iter_tflops": 48.51105696684002, "iter_time": 0.4252864151000976, "loss": 0.3027094602584839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.31816906598522, "step_time": 0.39433898162841796} +{"epoch": 0, "iter": 11205, "iter_tflops": 29.594020228619602, "iter_time": 0.6971372375488282, "loss": 0.36522677540779114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.46534102991532, "step_time": 0.6556767807006836} +{"epoch": 0, "iter": 11206, "iter_tflops": 35.23706735191785, "iter_time": 0.5854940567016602, "loss": 0.33092623949050903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29327702947232, "step_time": 0.5250540313720703} +{"epoch": 0, "iter": 11207, "iter_tflops": 42.87421559176641, "iter_time": 0.48120048904418944, "loss": 0.37708067893981934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83000371131276, "step_time": 0.4405528907775879} +{"epoch": 0, "iter": 11208, "iter_tflops": 36.8385086592674, "iter_time": 0.5600414962768554, "loss": 0.31183111667633057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03234862981554, "step_time": 0.5153605575561524} +{"epoch": 0, "iter": 11209, "iter_tflops": 19.586479998293495, "iter_time": 1.053333396911621, "loss": 0.8139437437057495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.683026887692495, "step_time": 0.9974890823364259} +{"epoch": 0, "iter": 11210, "iter_tflops": 8.05819237507777, "iter_time": 2.560263214111328, "loss": 0.6686410903930664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.216659697133395, "step_time": 2.0193580017089845} +{"epoch": 0, "iter": 11211, "iter_tflops": 15.818841653512443, "iter_time": 1.3042101287841796, "loss": 0.6811895370483398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.14435888753445, "step_time": 1.077659149169922} +{"epoch": 0, "iter": 11212, "iter_tflops": 37.233878601789016, "iter_time": 0.5540946655273438, "loss": 0.7905977964401245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68464750936078, "step_time": 0.5070977573394775} +{"epoch": 0, "iter": 11213, "iter_tflops": 13.131787061570114, "iter_time": 1.1259115905761719, "loss": 0.12495603412389755, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 13.940072862149105, "step_time": 1.0606279754638672} +{"epoch": 0, "iter": 11214, "iter_tflops": 12.07593357385468, "iter_time": 1.2243551330566407, "loss": 0.2522968649864197, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 14.552113662131203, "step_time": 1.016019500732422} +{"epoch": 0, "iter": 11215, "iter_tflops": 26.285223850265634, "iter_time": 0.5624921188354493, "loss": 0.2783359885215759, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 28.057610141796864, "step_time": 0.5269597511291504} +{"epoch": 0, "iter": 11216, "iter_tflops": 26.109089847637538, "iter_time": 0.5662867355346679, "loss": 0.184719055891037, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.682115887805743, "step_time": 0.5341077003479003} +{"epoch": 0, "iter": 11217, "iter_tflops": 35.837980492475026, "iter_time": 0.575676788330078, "loss": 0.11867288500070572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39574319224385, "step_time": 0.5373276252746582} +{"epoch": 0, "iter": 11218, "iter_tflops": 13.189926510479376, "iter_time": 1.564155303955078, "loss": 0.10525687783956528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.223374848588499, "step_time": 1.3552246932983398} +{"epoch": 0, "iter": 11219, "iter_tflops": 46.76807367230084, "iter_time": 0.44113626861572264, "loss": 0.15938825905323029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.771210092303605, "step_time": 0.4063541812896729} +{"epoch": 0, "iter": 11220, "iter_tflops": 51.226707017737866, "iter_time": 0.402740966796875, "loss": 0.116268090903759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.024021221532195, "step_time": 0.36825442123413094} +{"epoch": 0, "iter": 11221, "iter_tflops": 35.744320605858114, "iter_time": 0.5771852188110351, "loss": 0.09282582253217697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.3989768829728, "step_time": 0.5372823753356935} +{"epoch": 0, "iter": 11222, "iter_tflops": 34.22311053242119, "iter_time": 0.6028409805297852, "loss": 0.052373938262462616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14931837051926, "step_time": 0.47813254737854005} +{"epoch": 0, "iter": 11223, "iter_tflops": 40.19997972222694, "iter_time": 0.513211540222168, "loss": 0.07386066019535065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27190039448869, "step_time": 0.46600876235961913} +{"epoch": 0, "iter": 11224, "iter_tflops": 40.27693607140841, "iter_time": 0.5122309570312501, "loss": 0.052522189915180206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1642211527941, "step_time": 0.4671449642181397} +{"epoch": 0, "iter": 11225, "iter_tflops": 19.536084864292107, "iter_time": 1.0560505676269532, "loss": 0.045485809445381165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.140314166281723, "step_time": 0.9759123420715332} +{"epoch": 0, "iter": 11226, "iter_tflops": 25.960495288749932, "iter_time": 0.7947110900878906, "loss": 0.01475992426276207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.29530961905652, "step_time": 0.6388263111114502} +{"epoch": 0, "iter": 11227, "iter_tflops": 43.61627911699211, "iter_time": 0.47301360702514644, "loss": 0.015655936673283577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.287085385589464, "step_time": 0.42725903511047364} +{"epoch": 0, "iter": 11228, "iter_tflops": 42.59370030401668, "iter_time": 0.4843695983886719, "loss": 0.03597417101264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06099330366664, "step_time": 0.43839052391052247} +{"epoch": 0, "iter": 11229, "iter_tflops": 23.741456319259278, "iter_time": 0.8689902267456056, "loss": 0.7225109338760376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.32877329671076, "step_time": 0.8145318870544432} +{"epoch": 0, "iter": 11230, "iter_tflops": 18.754188329892415, "iter_time": 1.1000792541503908, "loss": 0.6037107706069946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.008156661891064, "step_time": 0.8249745788574219} +{"epoch": 0, "iter": 11231, "iter_tflops": 43.19414286911176, "iter_time": 0.4776363677978515, "loss": 0.5392880439758301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44381462162648, "step_time": 0.4442161712646484} +{"epoch": 0, "iter": 11232, "iter_tflops": 39.04297493593293, "iter_time": 0.5284201202392578, "loss": 0.9108033180236816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.292441570733175, "step_time": 0.48781987380981445} +{"epoch": 0, "iter": 11233, "iter_tflops": 30.112594545788813, "iter_time": 0.6851317138671874, "loss": 0.025772619992494583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.945551348173726, "step_time": 0.6458205490112304} +{"epoch": 0, "iter": 11234, "iter_tflops": 18.599175836356306, "iter_time": 1.1092477264404297, "loss": 0.04319951310753822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.780958196976922, "step_time": 0.7426343383789062} +{"epoch": 0, "iter": 11235, "iter_tflops": 47.28509165926459, "iter_time": 0.436312858581543, "loss": 0.024025384336709976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.96071371387574, "step_time": 0.3970517730712891} +{"epoch": 0, "iter": 11236, "iter_tflops": 40.41327997740808, "iter_time": 0.5105028228759766, "loss": 0.03144137188792229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.38730078442105, "step_time": 0.4647972087860107} +{"epoch": 0, "iter": 11237, "iter_tflops": 18.008517905780195, "iter_time": 1.1456297302246092, "loss": 0.05760619044303894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.320120479328317, "step_time": 1.067855323791504} +{"epoch": 0, "iter": 11238, "iter_tflops": 14.41342061740142, "iter_time": 1.4313807983398439, "loss": 0.09873035550117493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.933150953568447, "step_time": 0.9406351852416993} +{"epoch": 0, "iter": 11239, "iter_tflops": 54.591336682830665, "iter_time": 0.3779188194274903, "loss": 0.05604676157236099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.69646226469458, "step_time": 0.3455999355316162} +{"epoch": 0, "iter": 11240, "iter_tflops": 51.45976131458569, "iter_time": 0.40091700744628905, "loss": 0.13252931833267212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.443120612144206, "step_time": 0.37211277580261226} +{"epoch": 0, "iter": 11241, "iter_tflops": 19.739603588692407, "iter_time": 0.6232747116088866, "loss": 0.011990907602012157, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 20.95341777153036, "step_time": 0.5871689224243164} +{"epoch": 0, "iter": 11242, "iter_tflops": 6.996140945467065, "iter_time": 1.7585688781738282, "loss": 0.003971598111093044, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 9.10198885270192, "step_time": 1.3517041091918944} +{"epoch": 0, "iter": 11243, "iter_tflops": 27.228277537184056, "iter_time": 0.4518536186218262, "loss": 0.0038961528334766626, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 30.24769208060409, "step_time": 0.40674824714660646} +{"epoch": 0, "iter": 11244, "iter_tflops": 27.826262781395588, "iter_time": 0.4421433029174805, "loss": 0.0024274662137031555, "lr": 3e-05, "seqlen": 4944.0, "step_tflops": 30.907819050859953, "step_time": 0.3980609474182129} +{"epoch": 0, "iter": 11245, "iter_tflops": 18.385777807462812, "iter_time": 1.1221224212646483, "loss": 0.32933053374290466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.63481381158093, "step_time": 1.0507404708862305} +{"epoch": 0, "iter": 11246, "iter_tflops": 18.661198376962954, "iter_time": 1.105561019897461, "loss": 0.4048517048358917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.263577419428096, "step_time": 0.8502906703948974} +{"epoch": 0, "iter": 11247, "iter_tflops": 49.895447376093, "iter_time": 0.4134864921569824, "loss": 0.3798324763774872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.219778877520454, "step_time": 0.3805086250305176} +{"epoch": 0, "iter": 11248, "iter_tflops": 49.7608333376104, "iter_time": 0.41460506439208983, "loss": 0.41374215483665466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.79213786437327, "step_time": 0.38353362274169917} +{"epoch": 0, "iter": 11249, "iter_tflops": 21.853030400344107, "iter_time": 0.9440838699340821, "loss": 0.3079717755317688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.838899377972925, "step_time": 0.9033313369750977} +{"epoch": 0, "iter": 11250, "iter_tflops": 13.838225452042787, "iter_time": 1.4908771057128907, "loss": 0.30120086669921875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.280463287995273, "step_time": 1.3501615180969238} +{"epoch": 0, "iter": 11251, "iter_tflops": 35.42797904780206, "iter_time": 0.5823389892578125, "loss": 0.23642989993095398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13359355445592, "step_time": 0.5271964988708496} +{"epoch": 0, "iter": 11252, "iter_tflops": 36.99173970617636, "iter_time": 0.5577216339111328, "loss": 0.307996541261673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.62619941569306, "step_time": 0.5078273086547851} +{"epoch": 0, "iter": 11253, "iter_tflops": 28.817321166476265, "iter_time": 0.7159268341064453, "loss": 0.04282025620341301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.93272500981988, "step_time": 0.6460799541473389} +{"epoch": 0, "iter": 11254, "iter_tflops": 40.20571403204566, "iter_time": 0.5131383438110352, "loss": 0.07701418548822403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.835477832639675, "step_time": 0.46015107917785647} +{"epoch": 0, "iter": 11255, "iter_tflops": 43.72161244707122, "iter_time": 0.4718740310668945, "loss": 0.05126247927546501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01919334853861, "step_time": 0.4296426506042481} +{"epoch": 0, "iter": 11256, "iter_tflops": 41.79080376643164, "iter_time": 0.4936754417419434, "loss": 0.07821149379014969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.958182947644104, "step_time": 0.4489101219177246} +{"epoch": 0, "iter": 11257, "iter_tflops": 18.872089029393923, "iter_time": 1.0932066650390626, "loss": 0.044461362063884735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.184545400224334, "step_time": 1.0221232681274413} +{"epoch": 0, "iter": 11258, "iter_tflops": 16.180645640251612, "iter_time": 1.2750476074218748, "loss": 0.0713014155626297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.984134153554052, "step_time": 1.032373649597168} +{"epoch": 0, "iter": 11259, "iter_tflops": 40.92935435207202, "iter_time": 0.5040659408569336, "loss": 0.04587864875793457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19391297718439, "step_time": 0.45650159835815435} +{"epoch": 0, "iter": 11260, "iter_tflops": 43.53904957999432, "iter_time": 0.47385263824462887, "loss": 0.024813003838062286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.95330814540213, "step_time": 0.4302329559326172} +{"epoch": 0, "iter": 11261, "iter_tflops": 33.39696153849977, "iter_time": 0.6177536087036133, "loss": 0.9681631922721863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.6205536391523, "step_time": 0.5633747024536132} +{"epoch": 0, "iter": 11262, "iter_tflops": 38.31908310651575, "iter_time": 0.538402587890625, "loss": 0.8859916925430298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81405424988045, "step_time": 0.493400936126709} +{"epoch": 0, "iter": 11263, "iter_tflops": 40.97898612195771, "iter_time": 0.5034554405212402, "loss": 0.9314538240432739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4661666480657, "step_time": 0.4639728374481201} +{"epoch": 0, "iter": 11264, "iter_tflops": 39.37635372570833, "iter_time": 0.5239462661743164, "loss": 0.693649411201477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.62853277222714, "step_time": 0.4839738121032715} +{"epoch": 0, "iter": 11265, "iter_tflops": 33.788159603870014, "iter_time": 0.6106012802124023, "loss": 0.16130496561527252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.506522397985336, "step_time": 0.5500668201446532} +{"epoch": 0, "iter": 11266, "iter_tflops": 39.62727239965492, "iter_time": 0.5206286544799805, "loss": 0.12646998465061188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7606921040204, "step_time": 0.4714526329040527} +{"epoch": 0, "iter": 11267, "iter_tflops": 43.573679074509336, "iter_time": 0.4734760513305664, "loss": 0.10767027735710144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70754360703121, "step_time": 0.4324492931365967} +{"epoch": 0, "iter": 11268, "iter_tflops": 45.685016051522304, "iter_time": 0.4515943145751953, "loss": 0.20922741293907166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82713772927013, "step_time": 0.4140533542633057} +{"epoch": 0, "iter": 11269, "iter_tflops": 30.3833654427364, "iter_time": 0.6790259475708008, "loss": 0.4106011390686035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.625066266563415, "step_time": 0.613562910079956} +{"epoch": 0, "iter": 11270, "iter_tflops": 36.376801848349594, "iter_time": 0.5671497344970703, "loss": 0.7276874780654907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.62437996987171, "step_time": 0.5206666584014893} +{"epoch": 0, "iter": 11271, "iter_tflops": 42.48148458045744, "iter_time": 0.4856490707397461, "loss": 0.5764185786247253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.317448209809285, "step_time": 0.44542811203002924} +{"epoch": 0, "iter": 11272, "iter_tflops": 41.61907778846514, "iter_time": 0.4957124137878418, "loss": 0.6011236906051636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1044634516824, "step_time": 0.45740691566467284} +{"epoch": 0, "iter": 11273, "iter_tflops": 22.411250037983184, "iter_time": 0.9039101791381836, "loss": 0.002359274309128523, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 23.962550491331, "step_time": 0.8453923568725586} +{"epoch": 0, "iter": 11274, "iter_tflops": 7.866486076994144, "iter_time": 2.575197723388672, "loss": 0.0026650468353182077, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 8.96759189114574, "step_time": 2.2589963150024412} +{"epoch": 0, "iter": 11275, "iter_tflops": 13.437625636448725, "iter_time": 1.5075399169921875, "loss": 0.010596838779747486, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 19.95511988856573, "step_time": 1.0151658897399902} +{"epoch": 0, "iter": 11276, "iter_tflops": 57.22769756602851, "iter_time": 0.3539851837158203, "loss": 0.005286357831209898, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 62.97725161382644, "step_time": 0.3216678485870362} +{"epoch": 0, "iter": 11277, "iter_tflops": 20.475762332246795, "iter_time": 0.744034812927246, "loss": 0.3619992434978485, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 21.641941330305574, "step_time": 0.7039423942565918} +{"epoch": 0, "iter": 11278, "iter_tflops": 10.330418165539244, "iter_time": 1.4747399139404296, "loss": 0.17994815111160278, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 12.853443561830048, "step_time": 1.1852605819702149} +{"epoch": 0, "iter": 11279, "iter_tflops": 23.51692917651632, "iter_time": 0.6478175735473632, "loss": 0.18534141778945923, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.44196220022167, "step_time": 0.5761554260253907} +{"epoch": 0, "iter": 11280, "iter_tflops": 25.48570334640748, "iter_time": 0.5977735748291015, "loss": 0.27492889761924744, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.179451533109077, "step_time": 0.5605219802856445} +{"epoch": 0, "iter": 11281, "iter_tflops": 44.87602259341281, "iter_time": 0.45973534011840816, "loss": 0.05314786732196808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0559735279023, "step_time": 0.4205623092651367} +{"epoch": 0, "iter": 11282, "iter_tflops": 18.363721439139038, "iter_time": 1.1234701843261719, "loss": 0.048181112855672836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.41473745300814, "step_time": 0.9204253921508788} +{"epoch": 0, "iter": 11283, "iter_tflops": 45.140886112698794, "iter_time": 0.45703784942626946, "loss": 0.030447695404291153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.97716970348659, "step_time": 0.4128103618621826} +{"epoch": 0, "iter": 11284, "iter_tflops": 47.61338385023962, "iter_time": 0.43330450057983394, "loss": 0.035990919917821884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75685689271482, "step_time": 0.391059944152832} +{"epoch": 0, "iter": 11285, "iter_tflops": 19.24719175866528, "iter_time": 1.0719014892578125, "loss": 0.23779936134815216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.829166533083047, "step_time": 0.9904905929565431} +{"epoch": 0, "iter": 11286, "iter_tflops": 19.112637127502996, "iter_time": 1.079447769165039, "loss": 0.19164276123046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.496498039715078, "step_time": 0.9597420692443848} +{"epoch": 0, "iter": 11287, "iter_tflops": 42.22532400704359, "iter_time": 0.4885952682495117, "loss": 0.2830330729484558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21696340856074, "step_time": 0.4463965606689453} +{"epoch": 0, "iter": 11288, "iter_tflops": 38.718141178985775, "iter_time": 0.5328534088134765, "loss": 0.19587160646915436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17948741151844, "step_time": 0.48912622642517084} +{"epoch": 0, "iter": 11289, "iter_tflops": 20.03611214710927, "iter_time": 1.0296954498291016, "loss": 0.6224371790885925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.57417267174342, "step_time": 0.9562866592407226} +{"epoch": 0, "iter": 11290, "iter_tflops": 16.290636629886745, "iter_time": 1.266438751220703, "loss": 0.5991302132606506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.936306569086657, "step_time": 1.089499340057373} +{"epoch": 0, "iter": 11291, "iter_tflops": 35.00763881095034, "iter_time": 0.5893311920166016, "loss": 0.621193528175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.0623462728135, "step_time": 0.5420342025756836} +{"epoch": 0, "iter": 11292, "iter_tflops": 37.25526081625035, "iter_time": 0.5537766494750976, "loss": 0.84139484167099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54062060639543, "step_time": 0.5088993015289307} +{"epoch": 0, "iter": 11293, "iter_tflops": 17.55604720643779, "iter_time": 1.175155960083008, "loss": 0.7148668766021729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.060271198079803, "step_time": 1.0824134292602539} +{"epoch": 0, "iter": 11294, "iter_tflops": 17.1000853181048, "iter_time": 1.2064906768798829, "loss": 0.9299525022506714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.52587611868475, "step_time": 1.0051260852813722} +{"epoch": 0, "iter": 11295, "iter_tflops": 46.300028348740824, "iter_time": 0.44559569931030274, "loss": 0.7178046107292175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99913192736245, "step_time": 0.4126290340423584} +{"epoch": 0, "iter": 11296, "iter_tflops": 45.479962565015825, "iter_time": 0.4536303977966309, "loss": 0.6355687975883484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1463017047281, "step_time": 0.4197893390655517} +{"epoch": 0, "iter": 11297, "iter_tflops": 44.42970514325965, "iter_time": 0.4643535995483398, "loss": 0.6562238335609436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.32511502050056, "step_time": 0.42692280197143556} +{"epoch": 0, "iter": 11298, "iter_tflops": 42.24156072535182, "iter_time": 0.4884074630737304, "loss": 0.7965265512466431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.56217959441957, "step_time": 0.45281182098388667} +{"epoch": 0, "iter": 11299, "iter_tflops": 42.670885879974364, "iter_time": 0.48349344253540033, "loss": 0.5846533179283142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.98306373544948, "step_time": 0.4486672229766846} +{"epoch": 0, "iter": 11300, "iter_tflops": 43.214794313711, "iter_time": 0.4774081153869629, "loss": 0.768354058265686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.52619474177826, "step_time": 0.4434296340942383} +{"epoch": 0, "iter": 11301, "iter_tflops": 30.645481054215615, "iter_time": 0.673218132019043, "loss": 0.21008898317813873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.7914546768547, "step_time": 0.6291606674194336} +{"epoch": 0, "iter": 11302, "iter_tflops": 8.258809753262733, "iter_time": 2.498071044921875, "loss": 0.20944149792194366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.201919752664425, "step_time": 2.0222756118774416} +{"epoch": 0, "iter": 11303, "iter_tflops": 17.16861117858606, "iter_time": 1.2016751556396483, "loss": 0.11584074795246124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.92828529172793, "step_time": 0.8998097000122071} +{"epoch": 0, "iter": 11304, "iter_tflops": 47.666835433819614, "iter_time": 0.4328186111450195, "loss": 0.13991902768611908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.63956773728513, "step_time": 0.39952103424072266} +{"epoch": 0, "iter": 11305, "iter_tflops": 23.230076248424716, "iter_time": 0.6734272003173829, "loss": 0.2633255124092102, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 24.641009365552843, "step_time": 0.6348670616149903} +{"epoch": 0, "iter": 11306, "iter_tflops": 13.109856378958673, "iter_time": 1.1932827301025388, "loss": 0.24077953398227692, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 15.242174917048962, "step_time": 1.0263473091125488} +{"epoch": 0, "iter": 11307, "iter_tflops": 21.342950001616668, "iter_time": 0.7329710845947265, "loss": 0.31070536375045776, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 23.06669260379984, "step_time": 0.6781971511840821} +{"epoch": 0, "iter": 11308, "iter_tflops": 22.238172207188065, "iter_time": 0.7034645233154297, "loss": 0.31489384174346924, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 23.913045319554424, "step_time": 0.6541937675476074} +{"epoch": 0, "iter": 11309, "iter_tflops": 17.674588665486045, "iter_time": 1.1672743225097657, "loss": 0.9386778473854065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.025093826140996, "step_time": 1.0844148101806639} +{"epoch": 0, "iter": 11310, "iter_tflops": 23.355807590442456, "iter_time": 0.8833389053344728, "loss": 0.8740840554237366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.454373864170954, "step_time": 0.7250587768554687} +{"epoch": 0, "iter": 11311, "iter_tflops": 43.463342422300656, "iter_time": 0.47467802429199224, "loss": 0.8143916726112366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.837753001835964, "step_time": 0.440480001449585} +{"epoch": 0, "iter": 11312, "iter_tflops": 44.85957725152368, "iter_time": 0.4599038772583008, "loss": 0.6354825496673584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.27713585547186, "step_time": 0.4273470897674561} +{"epoch": 0, "iter": 11313, "iter_tflops": 30.602429936618382, "iter_time": 0.6741652069091798, "loss": 0.09623683989048004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.40620617882971, "step_time": 0.6366401977539062} +{"epoch": 0, "iter": 11314, "iter_tflops": 24.006010103785233, "iter_time": 0.8594136810302736, "loss": 0.09204647690057755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.449299709305585, "step_time": 0.6775555992126465} +{"epoch": 0, "iter": 11315, "iter_tflops": 47.71865266246861, "iter_time": 0.4323486175537109, "loss": 0.11266512423753738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.203340359455765, "step_time": 0.39520638656616214} +{"epoch": 0, "iter": 11316, "iter_tflops": 50.76360508089456, "iter_time": 0.4064150581359863, "loss": 0.09723600000143051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.332106847509, "step_time": 0.3728593521118164} +{"epoch": 0, "iter": 11317, "iter_tflops": 32.86696208444232, "iter_time": 0.6277152557373047, "loss": 0.1295803338289261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.098919162251136, "step_time": 0.5877985420227052} +{"epoch": 0, "iter": 11318, "iter_tflops": 21.36103929704183, "iter_time": 0.9658281707763673, "loss": 0.0899410992860794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.464157930378082, "step_time": 0.810201286315918} +{"epoch": 0, "iter": 11319, "iter_tflops": 46.93805631197527, "iter_time": 0.4395387268066406, "loss": 0.1298462152481079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43052511938153, "step_time": 0.4011449127197266} +{"epoch": 0, "iter": 11320, "iter_tflops": 48.18313406785188, "iter_time": 0.42818081283569337, "loss": 0.09625399112701416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87385977020297, "step_time": 0.3901945800781249} +{"epoch": 0, "iter": 11321, "iter_tflops": 21.991052676114176, "iter_time": 0.9381585235595702, "loss": 0.4694049656391144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.318359771084364, "step_time": 0.8847574920654298} +{"epoch": 0, "iter": 11322, "iter_tflops": 15.986141872121536, "iter_time": 1.2905611419677734, "loss": 0.5350592136383057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.858358862947533, "step_time": 0.9025623245239259} +{"epoch": 0, "iter": 11323, "iter_tflops": 38.29841147910515, "iter_time": 0.5386931915283203, "loss": 0.5212045907974243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89986351481296, "step_time": 0.4923904705047607} +{"epoch": 0, "iter": 11324, "iter_tflops": 40.39729405943271, "iter_time": 0.5107048377990723, "loss": 0.370443731546402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.963656652148515, "step_time": 0.4692761039733887} +{"epoch": 0, "iter": 11325, "iter_tflops": 17.48539275287742, "iter_time": 1.1799044952392579, "loss": 0.16885380446910858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.596499008492003, "step_time": 1.1094073944091798} +{"epoch": 0, "iter": 11326, "iter_tflops": 17.427533578879544, "iter_time": 1.183821762084961, "loss": 0.15068429708480835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.878097207777785, "step_time": 0.9881692428588866} +{"epoch": 0, "iter": 11327, "iter_tflops": 50.73946723536559, "iter_time": 0.4066083984375, "loss": 0.12962020933628082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.437771320837186, "step_time": 0.372148681640625} +{"epoch": 0, "iter": 11328, "iter_tflops": 51.29662905434837, "iter_time": 0.4021919937133789, "loss": 0.1820572018623352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.063919910623014, "step_time": 0.3679923477172851} +{"epoch": 0, "iter": 11329, "iter_tflops": 40.058267857733654, "iter_time": 0.5150270996093749, "loss": 0.23716066777706146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41538710873389, "step_time": 0.47520233917236326} +{"epoch": 0, "iter": 11330, "iter_tflops": 22.593538830634206, "iter_time": 0.9131413040161133, "loss": 0.28870517015457153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.39604687110125, "step_time": 0.753068265914917} +{"epoch": 0, "iter": 11331, "iter_tflops": 43.8181911136478, "iter_time": 0.47083398437499996, "loss": 0.23627828061580658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34199180321858, "step_time": 0.43578845596313476} +{"epoch": 0, "iter": 11332, "iter_tflops": 45.82933446528755, "iter_time": 0.4501722259521484, "loss": 0.2898825705051422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45152332680369, "step_time": 0.41719834136962886} +{"epoch": 0, "iter": 11333, "iter_tflops": 24.49626676754716, "iter_time": 0.8422137832641601, "loss": 0.5280306935310364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.761432520640497, "step_time": 0.8008519515991211} +{"epoch": 0, "iter": 11334, "iter_tflops": 14.523029447761536, "iter_time": 1.4205778198242187, "loss": 0.6494563817977905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.336364042489002, "step_time": 1.1251463737487792} +{"epoch": 0, "iter": 11335, "iter_tflops": 12.98022297102362, "iter_time": 1.5894252014160155, "loss": 0.6440567970275879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.061496458401958, "step_time": 1.3697904167175292} +{"epoch": 0, "iter": 11336, "iter_tflops": 21.984786368274595, "iter_time": 0.9384259262084962, "loss": 0.7468718886375427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.220600663580363, "step_time": 0.68268310546875} +{"epoch": 0, "iter": 11337, "iter_tflops": 20.62974919667111, "iter_time": 0.7483931808471679, "loss": 0.4506642520427704, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.869596993948193, "step_time": 0.7059647064208984} +{"epoch": 0, "iter": 11338, "iter_tflops": 10.198520869148885, "iter_time": 1.5138630218505862, "loss": 0.25340014696121216, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.749393071669754, "step_time": 1.2109724388122558} +{"epoch": 0, "iter": 11339, "iter_tflops": 7.69639831326034, "iter_time": 2.0060245056152346, "loss": 0.2526605725288391, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 9.311058493010895, "step_time": 1.6581534347534177} +{"epoch": 0, "iter": 11340, "iter_tflops": 16.333006175670526, "iter_time": 0.9452738494873048, "loss": 0.15025469660758972, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.42645238482846, "step_time": 0.7947495155334472} +{"epoch": 0, "iter": 11341, "iter_tflops": 12.626795203508843, "iter_time": 1.1871147918701173, "loss": 0.25580736994743347, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 13.397604075573119, "step_time": 1.118816116333008} +{"epoch": 0, "iter": 11342, "iter_tflops": 13.264024122672131, "iter_time": 1.1300835418701172, "loss": 0.16584788262844086, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 18.26403751515527, "step_time": 0.8207087478637696} +{"epoch": 0, "iter": 11343, "iter_tflops": 25.679765813484853, "iter_time": 0.5837068557739259, "loss": 0.1626160740852356, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.203836318908106, "step_time": 0.5510052032470704} +{"epoch": 0, "iter": 11344, "iter_tflops": 25.69179945893668, "iter_time": 0.5834334564208985, "loss": 0.14596553146839142, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.27852792462204, "step_time": 0.5494964904785156} +{"epoch": 0, "iter": 11345, "iter_tflops": 26.671999654404786, "iter_time": 0.7735113143920898, "loss": 0.00244051031768322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.437389213466762, "step_time": 0.7254918289184572} +{"epoch": 0, "iter": 11346, "iter_tflops": 15.811721887394329, "iter_time": 1.3047973937988282, "loss": 0.010061966255307198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.159006451309338, "step_time": 1.0234181709289552} +{"epoch": 0, "iter": 11347, "iter_tflops": 50.93752200599923, "iter_time": 0.4050274276733398, "loss": 0.0015767618315294385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.65634030244555, "step_time": 0.3706872100830078} +{"epoch": 0, "iter": 11348, "iter_tflops": 56.18178070401832, "iter_time": 0.3672203559875488, "loss": 0.0055283489637076855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.716763134130304, "step_time": 0.3342867069244385} +{"epoch": 0, "iter": 11349, "iter_tflops": 24.44946643060756, "iter_time": 0.6030561294555664, "loss": 0.129388689994812, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 26.20645472753161, "step_time": 0.562624771118164} +{"epoch": 0, "iter": 11350, "iter_tflops": 7.728169493036584, "iter_time": 1.9078774871826172, "loss": 0.1498403698205948, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 8.912143193932467, "step_time": 1.6544169311523438} +{"epoch": 0, "iter": 11351, "iter_tflops": 10.521454185951715, "iter_time": 1.4013652801513672, "loss": 0.12038054317235947, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.473405527749021, "step_time": 1.1820669631958007} +{"epoch": 0, "iter": 11352, "iter_tflops": 22.19064263872612, "iter_time": 0.664442253112793, "loss": 0.10772266238927841, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.904991564545234, "step_time": 0.510098768234253} +{"epoch": 0, "iter": 11353, "iter_tflops": 23.255815304752776, "iter_time": 0.6550912017822266, "loss": 0.13678507506847382, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.758713253018666, "step_time": 0.615326000213623} +{"epoch": 0, "iter": 11354, "iter_tflops": 8.348817134897674, "iter_time": 1.8247710723876955, "loss": 0.24401234090328217, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 10.183807135948014, "step_time": 1.4959709854125978} +{"epoch": 0, "iter": 11355, "iter_tflops": 22.952097369590778, "iter_time": 0.6637598190307616, "loss": 0.19008173048496246, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.73689058369664, "step_time": 0.6158688354492188} +{"epoch": 0, "iter": 11356, "iter_tflops": 23.96760453834019, "iter_time": 0.6356363220214843, "loss": 0.342672199010849, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 25.730935538850577, "step_time": 0.5920764122009278} +{"epoch": 0, "iter": 11357, "iter_tflops": 24.374374410672946, "iter_time": 0.8464255599975585, "loss": 0.0031998190097510815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.05714721249906, "step_time": 0.7917633247375488} +{"epoch": 0, "iter": 11358, "iter_tflops": 9.38622980024719, "iter_time": 2.19801708984375, "loss": 0.028941571712493896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.124797519427615, "step_time": 1.854514068603516} +{"epoch": 0, "iter": 11359, "iter_tflops": 12.958424885869263, "iter_time": 1.592098861694336, "loss": 0.011116983368992805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.33957333611226, "step_time": 1.1898270568847658} +{"epoch": 0, "iter": 11360, "iter_tflops": 52.6756436986533, "iter_time": 0.3916628646850586, "loss": 0.012388896197080612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.733211074003876, "step_time": 0.3397003574371338} +{"epoch": 0, "iter": 11361, "iter_tflops": 19.96745576513511, "iter_time": 0.7609280624389648, "loss": 0.08240915834903717, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 21.13997687263843, "step_time": 0.7187234649658203} +{"epoch": 0, "iter": 11362, "iter_tflops": 9.170254533083343, "iter_time": 1.6568566741943358, "loss": 0.16575779020786285, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.07703012353224, "step_time": 1.2580739860534669} +{"epoch": 0, "iter": 11363, "iter_tflops": 22.037555713573838, "iter_time": 0.6894502105712891, "loss": 0.15784814953804016, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.625068016902517, "step_time": 0.6431218490600585} +{"epoch": 0, "iter": 11364, "iter_tflops": 24.377684976459406, "iter_time": 0.6232666244506837, "loss": 0.14996622502803802, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 26.127606194038258, "step_time": 0.5815227508544922} +{"epoch": 0, "iter": 11365, "iter_tflops": 21.14311658648023, "iter_time": 0.9757829895019531, "loss": 0.643301248550415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.817913994731434, "step_time": 0.9041621208190918} +{"epoch": 0, "iter": 11366, "iter_tflops": 28.587072339386058, "iter_time": 0.7216931228637695, "loss": 0.6713743209838867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.02591586655083, "step_time": 0.6246940612792968} +{"epoch": 0, "iter": 11367, "iter_tflops": 49.4994177947322, "iter_time": 0.4167946701049805, "loss": 0.7615746855735779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.91674625473012, "step_time": 0.3826472282409668} +{"epoch": 0, "iter": 11368, "iter_tflops": 47.56860231069212, "iter_time": 0.4337124176025391, "loss": 0.8018019795417786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3615123376227, "step_time": 0.4016839179992676} +{"epoch": 0, "iter": 11369, "iter_tflops": 28.27942304495915, "iter_time": 0.7295443572998047, "loss": 0.7155705094337463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.842174712695382, "step_time": 0.6913401489257812} +{"epoch": 0, "iter": 11370, "iter_tflops": 15.511970468877987, "iter_time": 1.3300111389160154, "loss": 0.5721518993377686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.911709038858902, "step_time": 1.0361287155151366} +{"epoch": 0, "iter": 11371, "iter_tflops": 45.4337079875701, "iter_time": 0.45409222412109373, "loss": 0.6617517471313477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13696907356098, "step_time": 0.41986907005310053} +{"epoch": 0, "iter": 11372, "iter_tflops": 48.93333425430686, "iter_time": 0.4216163444519043, "loss": 0.7188202738761902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66760213813039, "step_time": 0.39172266578674314} +{"epoch": 0, "iter": 11373, "iter_tflops": 25.88447451557015, "iter_time": 0.7970450973510741, "loss": 0.09970956295728683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.544026563693052, "step_time": 0.7490224227905272} +{"epoch": 0, "iter": 11374, "iter_tflops": 17.49695311782516, "iter_time": 1.1791249237060546, "loss": 0.10177966207265854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.86690285755693, "step_time": 1.0384655151367188} +{"epoch": 0, "iter": 11375, "iter_tflops": 50.11537792907047, "iter_time": 0.41167191314697266, "loss": 0.11058758199214935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.584432990425256, "step_time": 0.3779666175842285} +{"epoch": 0, "iter": 11376, "iter_tflops": 55.439136322096815, "iter_time": 0.372139518737793, "loss": 0.13729961216449738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.28122480474528, "step_time": 0.3422474174499512} +{"epoch": 0, "iter": 11377, "iter_tflops": 35.63796984616365, "iter_time": 0.5789076538085938, "loss": 0.36055949330329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.22293687672329, "step_time": 0.5397568893432617} +{"epoch": 0, "iter": 11378, "iter_tflops": 13.120558545506833, "iter_time": 1.5724249420166017, "loss": 0.254854679107666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.80034668522802, "step_time": 1.3939601516723632} +{"epoch": 0, "iter": 11379, "iter_tflops": 12.175474165293235, "iter_time": 1.6944796752929687, "loss": 0.3007003366947174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.193443734777954, "step_time": 1.4535650329589844} +{"epoch": 0, "iter": 11380, "iter_tflops": 13.444546863083525, "iter_time": 1.534532455444336, "loss": 0.3772565722465515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.70847483660664, "step_time": 1.313373432159424} +{"epoch": 0, "iter": 11381, "iter_tflops": 17.55209068540562, "iter_time": 0.9192802505493164, "loss": 0.3158408999443054, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 18.43732573290176, "step_time": 0.8751426620483398} +{"epoch": 0, "iter": 11382, "iter_tflops": 9.78419268350724, "iter_time": 1.6491182098388673, "loss": 0.3279586136341095, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.376596805321443, "step_time": 1.206232837677002} +{"epoch": 0, "iter": 11383, "iter_tflops": 9.325485470255998, "iter_time": 1.7302359619140626, "loss": 0.21781030297279358, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 10.755666308326228, "step_time": 1.5001665039062502} +{"epoch": 0, "iter": 11384, "iter_tflops": 11.92139821104412, "iter_time": 1.3534729766845706, "loss": 0.1734994649887085, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 13.869946260302775, "step_time": 1.1633275299072265} +{"epoch": 0, "iter": 11385, "iter_tflops": 21.85376757434878, "iter_time": 0.7177114715576172, "loss": 0.23998019099235535, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.2030433641365, "step_time": 0.6759759674072265} +{"epoch": 0, "iter": 11386, "iter_tflops": 9.24185302691375, "iter_time": 1.6971379699707032, "loss": 0.2692924737930298, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 12.145837211024064, "step_time": 1.291364227294922} +{"epoch": 0, "iter": 11387, "iter_tflops": 10.298896400804807, "iter_time": 1.5229495544433596, "loss": 0.3247185945510864, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 12.638474512315415, "step_time": 1.2410279159545898} +{"epoch": 0, "iter": 11388, "iter_tflops": 11.262835230974916, "iter_time": 1.392606689453125, "loss": 0.12150663882493973, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.01641324083418, "step_time": 1.1190237770080567} +{"epoch": 0, "iter": 11389, "iter_tflops": 18.43143579217484, "iter_time": 0.7756083068847656, "loss": 0.2276994287967682, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 19.553461431568078, "step_time": 0.7311019973754883} +{"epoch": 0, "iter": 11390, "iter_tflops": 10.049299356821825, "iter_time": 1.4225444183349607, "loss": 0.31026744842529297, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 11.822072292342593, "step_time": 1.2092274818420408} +{"epoch": 0, "iter": 11391, "iter_tflops": 19.567584716057453, "iter_time": 0.7305743103027342, "loss": 0.126073956489563, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 23.119289399636312, "step_time": 0.6183397102355958} +{"epoch": 0, "iter": 11392, "iter_tflops": 23.52882635256703, "iter_time": 0.6075770416259767, "loss": 0.09847783297300339, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 25.18844108129335, "step_time": 0.567545036315918} +{"epoch": 0, "iter": 11393, "iter_tflops": 19.929300596162093, "iter_time": 1.0352141265869141, "loss": 0.8051983118057251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.347160686458157, "step_time": 0.9664560928344726} +{"epoch": 0, "iter": 11394, "iter_tflops": 27.600192142420997, "iter_time": 0.7474981842041016, "loss": 0.730595588684082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.48089175319611, "step_time": 0.6768533439636231} +{"epoch": 0, "iter": 11395, "iter_tflops": 42.87065662619338, "iter_time": 0.48124043655395504, "loss": 0.7181899547576904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18897593988242, "step_time": 0.44666704750061037} +{"epoch": 0, "iter": 11396, "iter_tflops": 41.08897831810794, "iter_time": 0.5021077270507812, "loss": 0.8151469230651855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.92870556462156, "step_time": 0.4696494750976563} +{"epoch": 0, "iter": 11397, "iter_tflops": 43.8138397717497, "iter_time": 0.47088074493408205, "loss": 0.20090658962726593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.81941013641792, "step_time": 0.4314376411437988} +{"epoch": 0, "iter": 11398, "iter_tflops": 34.7647275527286, "iter_time": 0.5934490203857422, "loss": 0.2013779729604721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.01500143102415, "step_time": 0.5287989940643311} +{"epoch": 0, "iter": 11399, "iter_tflops": 37.62401179022805, "iter_time": 0.5483491134643554, "loss": 0.25624266266822815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24940940267431, "step_time": 0.5001548824310303} +{"epoch": 0, "iter": 11400, "iter_tflops": 39.201580945276085, "iter_time": 0.5262821807861329, "loss": 0.3236217796802521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99811400273086, "step_time": 0.4798139171600342} +{"epoch": 0, "iter": 11401, "iter_tflops": 17.635129457350622, "iter_time": 1.1698861389160156, "loss": 0.11613273620605469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.633420571957835, "step_time": 1.1072091369628907} +{"epoch": 0, "iter": 11402, "iter_tflops": 6.9737798802783555, "iter_time": 2.95838037109375, "loss": 0.16401343047618866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.69584445177079, "step_time": 2.372523292541504} +{"epoch": 0, "iter": 11403, "iter_tflops": 19.13423359262233, "iter_time": 1.0782294158935548, "loss": 0.07405396550893784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.37644876850361, "step_time": 0.8825589256286621} +{"epoch": 0, "iter": 11404, "iter_tflops": 44.6780457244966, "iter_time": 0.4617725143432617, "loss": 0.09079150855541229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.999950450798, "step_time": 0.4298148918151855} +{"epoch": 0, "iter": 11405, "iter_tflops": 22.27168471314396, "iter_time": 0.6858739013671875, "loss": 0.3648037612438202, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.65145628543868, "step_time": 0.6458615951538086} +{"epoch": 0, "iter": 11406, "iter_tflops": 13.948431182442405, "iter_time": 1.0951459045410157, "loss": 0.2759491205215454, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 16.614414579713472, "step_time": 0.9194165229797363} +{"epoch": 0, "iter": 11407, "iter_tflops": 22.08236814007484, "iter_time": 0.6917540359497071, "loss": 0.2448110431432724, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.867730886080558, "step_time": 0.6400091972351074} +{"epoch": 0, "iter": 11408, "iter_tflops": 22.396721784764303, "iter_time": 0.6820447845458983, "loss": 0.20070083439350128, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.008150484879923, "step_time": 0.6362658920288086} +{"epoch": 0, "iter": 11409, "iter_tflops": 28.536478592249754, "iter_time": 0.7229726486206054, "loss": 0.9083236455917358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.10362569021823, "step_time": 0.6633018836975098} +{"epoch": 0, "iter": 11410, "iter_tflops": 47.085837817919504, "iter_time": 0.4381592102050781, "loss": 0.6191921830177307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19973524174188, "step_time": 0.40295312881469725} +{"epoch": 0, "iter": 11411, "iter_tflops": 45.54081143610863, "iter_time": 0.4530242843627929, "loss": 0.8074746131896973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.10737512510276, "step_time": 0.4201220989227295} +{"epoch": 0, "iter": 11412, "iter_tflops": 43.86560989672064, "iter_time": 0.47032501220703127, "loss": 0.6037717461585999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.973056245997554, "step_time": 0.4392112236022949} +{"epoch": 0, "iter": 11413, "iter_tflops": 21.37915671785721, "iter_time": 0.9262281112670899, "loss": 0.11837995797395706, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 22.36584601858853, "step_time": 0.885366729736328} +{"epoch": 0, "iter": 11414, "iter_tflops": 30.326901180357194, "iter_time": 0.6529508514404296, "loss": 0.13178983330726624, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 34.039186711720156, "step_time": 0.5817405719757079} +{"epoch": 0, "iter": 11415, "iter_tflops": 36.59744157843718, "iter_time": 0.5410754165649414, "loss": 0.11385853588581085, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 40.11336317568056, "step_time": 0.49365035438537597} +{"epoch": 0, "iter": 11416, "iter_tflops": 44.555579172447466, "iter_time": 0.4444331398010254, "loss": 0.1368178427219391, "lr": 3e-05, "seqlen": 7872.0, "step_tflops": 48.758688406222255, "step_time": 0.4061219978332519} +{"epoch": 0, "iter": 11417, "iter_tflops": 19.48832463142487, "iter_time": 1.058638641357422, "loss": 0.4189909100532532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.152721474372647, "step_time": 0.9753399124145509} +{"epoch": 0, "iter": 11418, "iter_tflops": 20.127117382555262, "iter_time": 1.0250396575927736, "loss": 0.4488735795021057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.606666873380096, "step_time": 0.9126110286712645} +{"epoch": 0, "iter": 11419, "iter_tflops": 46.84467833044443, "iter_time": 0.4404148826599121, "loss": 0.3915899693965912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69363178210448, "step_time": 0.40697603988647457} +{"epoch": 0, "iter": 11420, "iter_tflops": 47.71034548117172, "iter_time": 0.43242389678955073, "loss": 0.30636441707611084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.417336796560484, "step_time": 0.4012478046417236} +{"epoch": 0, "iter": 11421, "iter_tflops": 29.560916916666358, "iter_time": 0.6979179153442382, "loss": 0.1516345590353012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.39601349363729, "step_time": 0.6571246223449707} +{"epoch": 0, "iter": 11422, "iter_tflops": 21.983947412944026, "iter_time": 0.9384617385864258, "loss": 0.1374436318874359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.229077326490653, "step_time": 0.7576861038208007} +{"epoch": 0, "iter": 11423, "iter_tflops": 40.98025517079543, "iter_time": 0.5034398498535156, "loss": 0.15157225728034973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65981115312497, "step_time": 0.4619610557556152} +{"epoch": 0, "iter": 11424, "iter_tflops": 42.12891375905891, "iter_time": 0.4897133979797364, "loss": 0.1412327140569687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95966586906957, "step_time": 0.44889563751220707} +{"epoch": 0, "iter": 11425, "iter_tflops": 18.855786436242077, "iter_time": 0.9778461608886718, "loss": 0.059571944177150726, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 20.107965947011895, "step_time": 0.9169529342651367} +{"epoch": 0, "iter": 11426, "iter_tflops": 20.60160402665054, "iter_time": 0.8949816894531251, "loss": 0.11564406752586365, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 25.696205118739236, "step_time": 0.7175401306152344} +{"epoch": 0, "iter": 11427, "iter_tflops": 36.522388869442324, "iter_time": 0.5048426170349121, "loss": 0.05738731101155281, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 40.17384552252357, "step_time": 0.4589567699432373} +{"epoch": 0, "iter": 11428, "iter_tflops": 41.721411299847, "iter_time": 0.44193275833129886, "loss": 0.03719765692949295, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 46.0078323898609, "step_time": 0.4007591190338135} +{"epoch": 0, "iter": 11429, "iter_tflops": 20.93441282318561, "iter_time": 0.9855109710693359, "loss": 0.4098595380783081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.47804758492661, "step_time": 0.9178329849243163} +{"epoch": 0, "iter": 11430, "iter_tflops": 40.272827158448386, "iter_time": 0.5122832183837891, "loss": 0.3433334529399872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.737309926106164, "step_time": 0.47170467376708985} +{"epoch": 0, "iter": 11431, "iter_tflops": 49.241289135986825, "iter_time": 0.4189795570373535, "loss": 0.35797277092933655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47021396319873, "step_time": 0.38584273338317876} +{"epoch": 0, "iter": 11432, "iter_tflops": 50.61867333194599, "iter_time": 0.4075787086486817, "loss": 0.41112643480300903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.4485110293237, "step_time": 0.37891015052795407} +{"epoch": 0, "iter": 11433, "iter_tflops": 32.9739206660321, "iter_time": 0.6256791152954102, "loss": 0.8811373114585876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.26098326173154, "step_time": 0.5850969429016113} +{"epoch": 0, "iter": 11434, "iter_tflops": 28.7077889667939, "iter_time": 0.7186583938598633, "loss": 0.7335878014564514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.766638818138496, "step_time": 0.5768250579833984} +{"epoch": 0, "iter": 11435, "iter_tflops": 38.71637782008738, "iter_time": 0.5328776779174805, "loss": 0.564437747001648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.073103952685834, "step_time": 0.490363000869751} +{"epoch": 0, "iter": 11436, "iter_tflops": 35.94997435273531, "iter_time": 0.5738833999633788, "loss": 0.5951747298240662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14235107120361, "step_time": 0.5270785465240478} +{"epoch": 0, "iter": 11437, "iter_tflops": 22.944055117495434, "iter_time": 0.8991912460327149, "loss": 0.03140510618686676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85423591980185, "step_time": 0.8300835952758788} +{"epoch": 0, "iter": 11438, "iter_tflops": 19.45838188153321, "iter_time": 1.0602676849365233, "loss": 0.03320515528321266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.860359217862786, "step_time": 0.8646598033905029} +{"epoch": 0, "iter": 11439, "iter_tflops": 51.87084190473814, "iter_time": 0.3977397079467773, "loss": 0.03627471998333931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.07649941415795, "step_time": 0.3614638900756836} +{"epoch": 0, "iter": 11440, "iter_tflops": 58.161401830387085, "iter_time": 0.35472139358520516, "loss": 0.018282920122146606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.69527707713579, "step_time": 0.3239030342102051} +{"epoch": 0, "iter": 11441, "iter_tflops": 44.68393414129278, "iter_time": 0.46171166229248045, "loss": 0.20326445996761322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.000111662809424, "step_time": 0.4210417652130127} +{"epoch": 0, "iter": 11442, "iter_tflops": 47.13147575096014, "iter_time": 0.43773493576049805, "loss": 0.15420500934123993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.132984878171875, "step_time": 0.403479154586792} +{"epoch": 0, "iter": 11443, "iter_tflops": 44.83028127289333, "iter_time": 0.46020441818237307, "loss": 0.12336307764053345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.498026575459306, "step_time": 0.42540068054199215} +{"epoch": 0, "iter": 11444, "iter_tflops": 49.166480489154424, "iter_time": 0.4196170501708984, "loss": 0.1656745821237564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.482254381110096, "step_time": 0.3857558689117432} +{"epoch": 0, "iter": 11445, "iter_tflops": 29.932011373470235, "iter_time": 0.6892651901245117, "loss": 0.313649445772171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.87285533257942, "step_time": 0.6472935447692871} +{"epoch": 0, "iter": 11446, "iter_tflops": 17.630715894814983, "iter_time": 1.1701790008544921, "loss": 0.36471298336982727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.197548389974262, "step_time": 0.9732773399353027} +{"epoch": 0, "iter": 11447, "iter_tflops": 48.6519480891795, "iter_time": 0.42405482864379884, "loss": 0.4020504057407379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.92303288431057, "step_time": 0.3898320331573486} +{"epoch": 0, "iter": 11448, "iter_tflops": 52.499419071164375, "iter_time": 0.39297755813598634, "loss": 0.41282421350479126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.994449391218616, "step_time": 0.3619842586517334} +{"epoch": 0, "iter": 11449, "iter_tflops": 28.690786059353968, "iter_time": 0.7190842895507813, "loss": 0.39648422598838806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.428498457718934, "step_time": 0.678018783569336} +{"epoch": 0, "iter": 11450, "iter_tflops": 10.262743642408317, "iter_time": 2.010290252685547, "loss": 0.35625791549682617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.440395250127281, "step_time": 1.6583953399658202} +{"epoch": 0, "iter": 11451, "iter_tflops": 11.651789941524184, "iter_time": 1.770637268066406, "loss": 0.3684259057044983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.4669648061185, "step_time": 1.5319779777526856} +{"epoch": 0, "iter": 11452, "iter_tflops": 37.955138796303984, "iter_time": 0.5435652236938476, "loss": 0.22658318281173706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.952595310768565, "step_time": 0.4917715663909912} +{"epoch": 0, "iter": 11453, "iter_tflops": 19.896775336492098, "iter_time": 0.7739071884155273, "loss": 0.3184330463409424, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 21.74146883261035, "step_time": 0.7082436599731446} +{"epoch": 0, "iter": 11454, "iter_tflops": 21.93004719338375, "iter_time": 0.7021534118652344, "loss": 0.2695891559123993, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.681020070545948, "step_time": 0.6502362403869628} +{"epoch": 0, "iter": 11455, "iter_tflops": 23.2554115468585, "iter_time": 0.6621365280151367, "loss": 0.2533811330795288, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.88450811318007, "step_time": 0.6187889022827149} +{"epoch": 0, "iter": 11456, "iter_tflops": 24.69355677137552, "iter_time": 0.6235738983154296, "loss": 0.2536548376083374, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.520573087151266, "step_time": 0.5806155624389648} +{"epoch": 0, "iter": 11457, "iter_tflops": 22.417672848532042, "iter_time": 0.9203048706054686, "loss": 0.3512169122695923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.278446454856734, "step_time": 0.8497699203491211} +{"epoch": 0, "iter": 11458, "iter_tflops": 25.37703294035177, "iter_time": 0.8129828872680663, "loss": 0.41963911056518555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.752727911945705, "step_time": 0.6708703556060791} +{"epoch": 0, "iter": 11459, "iter_tflops": 42.3433808231724, "iter_time": 0.48723302459716794, "loss": 0.35707587003707886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35795920926869, "step_time": 0.44503886413574223} +{"epoch": 0, "iter": 11460, "iter_tflops": 44.86048405357039, "iter_time": 0.45989458084106444, "loss": 0.29817622900009155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.16779192716224, "step_time": 0.4196058578491212} +{"epoch": 0, "iter": 11461, "iter_tflops": 21.88133222015024, "iter_time": 0.9428627700805665, "loss": 0.42084288597106934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.173343400591207, "step_time": 0.8902942123413086} +{"epoch": 0, "iter": 11462, "iter_tflops": 13.269466952146269, "iter_time": 1.5547793731689452, "loss": 0.5409390330314636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.711910428783828, "step_time": 1.2345143661499023} +{"epoch": 0, "iter": 11463, "iter_tflops": 36.79520199892894, "iter_time": 0.5607006454467773, "loss": 0.460176944732666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.32103183698567, "step_time": 0.5116707725524902} +{"epoch": 0, "iter": 11464, "iter_tflops": 37.71019359340845, "iter_time": 0.5470959320068359, "loss": 0.41465330123901367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.32605035352649, "step_time": 0.49922732353210447} +{"epoch": 0, "iter": 11465, "iter_tflops": 13.343431352363542, "iter_time": 1.5461610260009764, "loss": 0.12547121942043304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.040620530148407, "step_time": 1.4693861618041992} +{"epoch": 0, "iter": 11466, "iter_tflops": 19.507144898977497, "iter_time": 1.0576172790527345, "loss": 0.03420644253492355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.267925905535208, "step_time": 0.8866752281188964} +{"epoch": 0, "iter": 11467, "iter_tflops": 54.71231229164288, "iter_time": 0.377083194732666, "loss": 0.09888537973165512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.781765005572915, "step_time": 0.3451067981719971} +{"epoch": 0, "iter": 11468, "iter_tflops": 53.66104699857573, "iter_time": 0.384470573425293, "loss": 0.12166677415370941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.722205070110675, "step_time": 0.3513337669372559} +{"epoch": 0, "iter": 11469, "iter_tflops": 49.873239468667514, "iter_time": 0.4136706123352051, "loss": 0.04783164709806442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.027801369832964, "step_time": 0.37492127609252934} +{"epoch": 0, "iter": 11470, "iter_tflops": 51.701588193572036, "iter_time": 0.39904177474975583, "loss": 0.030974052846431732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.509301750616565, "step_time": 0.36509199142456056} +{"epoch": 0, "iter": 11471, "iter_tflops": 51.38472693183896, "iter_time": 0.4015024452209473, "loss": 0.03143041580915451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.048945072642375, "step_time": 0.36809066581726074} +{"epoch": 0, "iter": 11472, "iter_tflops": 49.56730980958583, "iter_time": 0.41622378921508785, "loss": 0.03636721149086952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.69330827067776, "step_time": 0.38423956680297855} +{"epoch": 0, "iter": 11473, "iter_tflops": 38.51561180241654, "iter_time": 0.5356553497314452, "loss": 0.5809080600738525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.469976050985125, "step_time": 0.4974947052001954} +{"epoch": 0, "iter": 11474, "iter_tflops": 17.47268630535803, "iter_time": 1.1807625427246093, "loss": 0.6734102368354797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.569960047992115, "step_time": 1.0542225666046143} +{"epoch": 0, "iter": 11475, "iter_tflops": 38.287837363966794, "iter_time": 0.5388419647216798, "loss": 0.8490346670150757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.891100367790415, "step_time": 0.4924934730529786} +{"epoch": 0, "iter": 11476, "iter_tflops": 43.40513996267322, "iter_time": 0.47531452560424803, "loss": 0.7087594270706177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19396098583596, "step_time": 0.4371553707122802} +{"epoch": 0, "iter": 11477, "iter_tflops": 29.782890825397878, "iter_time": 0.6927162857055664, "loss": 0.3497249186038971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.865979411323615, "step_time": 0.6277340240478515} +{"epoch": 0, "iter": 11478, "iter_tflops": 45.75668130701359, "iter_time": 0.4508870162963867, "loss": 0.43763187527656555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.967890973902655, "step_time": 0.41288701820373536} +{"epoch": 0, "iter": 11479, "iter_tflops": 52.04103014604024, "iter_time": 0.39643899154663087, "loss": 0.3936518430709839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.58367986011586, "step_time": 0.3646120853424072} +{"epoch": 0, "iter": 11480, "iter_tflops": 39.02959654759594, "iter_time": 0.5286012496948241, "loss": 0.45169222354888916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70174591194406, "step_time": 0.4831440277099609} +{"epoch": 0, "iter": 11481, "iter_tflops": 20.078819954443325, "iter_time": 1.0275052795410156, "loss": 0.8062612414360046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.721557910139538, "step_time": 0.9497980575561523} +{"epoch": 0, "iter": 11482, "iter_tflops": 21.50609057512155, "iter_time": 0.9593139877319337, "loss": 0.6143754124641418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.42433774151311, "step_time": 0.780761043548584} +{"epoch": 0, "iter": 11483, "iter_tflops": 37.76577766292038, "iter_time": 0.5462907104492187, "loss": 0.6331491470336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.283187359927474, "step_time": 0.49974565505981444} +{"epoch": 0, "iter": 11484, "iter_tflops": 44.98854766036214, "iter_time": 0.45858545303344733, "loss": 0.6610208749771118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04443685405484, "step_time": 0.4206612377166748} +{"epoch": 0, "iter": 11485, "iter_tflops": 21.923723770493773, "iter_time": 0.9410396575927734, "loss": 0.7432150840759277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.803422176343428, "step_time": 0.866728042602539} +{"epoch": 0, "iter": 11486, "iter_tflops": 20.27636631528373, "iter_time": 1.0174946136474607, "loss": 0.7219853401184082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.721459946571738, "step_time": 0.8345418739318847} +{"epoch": 0, "iter": 11487, "iter_tflops": 42.484155566301936, "iter_time": 0.48561853790283205, "loss": 0.6461688280105591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95994883400356, "step_time": 0.4488928737640381} +{"epoch": 0, "iter": 11488, "iter_tflops": 47.19843854528908, "iter_time": 0.437113899230957, "loss": 0.7300598621368408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.038983942195124, "step_time": 0.40422226142883305} +{"epoch": 0, "iter": 11489, "iter_tflops": 42.838430251256334, "iter_time": 0.48160246276855473, "loss": 0.6891251802444458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.676101858893894, "step_time": 0.4420054950714111} +{"epoch": 0, "iter": 11490, "iter_tflops": 47.80820827006409, "iter_time": 0.4315387306213379, "loss": 0.6167150735855103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8944463570909, "step_time": 0.3975587940216065} +{"epoch": 0, "iter": 11491, "iter_tflops": 44.506481399585404, "iter_time": 0.463552562713623, "loss": 0.6105846762657166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.83689764933294, "step_time": 0.4312799224853516} +{"epoch": 0, "iter": 11492, "iter_tflops": 46.95616637002748, "iter_time": 0.4393692054748535, "loss": 0.6664827466011047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7937249080241, "step_time": 0.40617406082153323} +{"epoch": 0, "iter": 11493, "iter_tflops": 32.824140162525744, "iter_time": 0.6285341644287109, "loss": 0.06121564656496048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.96763948536546, "step_time": 0.5900053253173828} +{"epoch": 0, "iter": 11494, "iter_tflops": 11.99633845928264, "iter_time": 1.7197825469970702, "loss": 0.13021713495254517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.87564159025797, "step_time": 1.3869044494628906} +{"epoch": 0, "iter": 11495, "iter_tflops": 52.18357375179007, "iter_time": 0.39535608673095707, "loss": 0.048720598220825195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.12047506410035, "step_time": 0.3611856079101563} +{"epoch": 0, "iter": 11496, "iter_tflops": 54.56548518832459, "iter_time": 0.3780978660583496, "loss": 0.0815022885799408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.55715562227924, "step_time": 0.3464083080291748} +{"epoch": 0, "iter": 11497, "iter_tflops": 25.13525079220049, "iter_time": 0.8208031692504882, "loss": 0.620164155960083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.363490235104322, "step_time": 0.7825630569458009} +{"epoch": 0, "iter": 11498, "iter_tflops": 23.095724207948734, "iter_time": 0.893286277770996, "loss": 0.6094180345535278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.88580128515007, "step_time": 0.739842233657837} +{"epoch": 0, "iter": 11499, "iter_tflops": 48.378809477951016, "iter_time": 0.4264489707946777, "loss": 0.4689694344997406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.437806752058236, "step_time": 0.39343929100036623} +{"epoch": 0, "iter": 11500, "iter_tflops": 52.233954409824044, "iter_time": 0.39497475814819333, "loss": 0.6169306635856628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.65244812202463, "step_time": 0.3641694965362549} +{"epoch": 0, "iter": 11501, "iter_tflops": 26.505672665836308, "iter_time": 0.7783652114868165, "loss": 0.8300090432167053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.986762257532668, "step_time": 0.7371732864379883} +{"epoch": 0, "iter": 11502, "iter_tflops": 27.636214305300232, "iter_time": 0.7465238647460937, "loss": 0.7645106911659241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.578081440229912, "step_time": 0.6747020263671875} +{"epoch": 0, "iter": 11503, "iter_tflops": 46.8832888223075, "iter_time": 0.44005218124389656, "loss": 1.0164166688919067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49222928715492, "step_time": 0.4085993785858154} +{"epoch": 0, "iter": 11504, "iter_tflops": 43.952340036854494, "iter_time": 0.4693969306945801, "loss": 0.7310246229171753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.04906967510636, "step_time": 0.4385016250610352} +{"epoch": 0, "iter": 11505, "iter_tflops": 45.03561432819909, "iter_time": 0.45810618591308594, "loss": 0.24033555388450623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.005011561357065, "step_time": 0.42099966621398927} +{"epoch": 0, "iter": 11506, "iter_tflops": 30.832487820161415, "iter_time": 0.6691348953247069, "loss": 0.20128720998764038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.938936625093085, "step_time": 0.5298319702148437} +{"epoch": 0, "iter": 11507, "iter_tflops": 38.42578142122735, "iter_time": 0.536907585144043, "loss": 0.15050457417964935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.997808244711706, "step_time": 0.4912421474456787} +{"epoch": 0, "iter": 11508, "iter_tflops": 46.09503588428752, "iter_time": 0.44757733917236325, "loss": 0.1254681795835495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.51352868328226, "step_time": 0.40842708969116204} +{"epoch": 0, "iter": 11509, "iter_tflops": 34.43413010212769, "iter_time": 0.5991466445922852, "loss": 0.70088791847229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.751495387666495, "step_time": 0.5464973850250244} +{"epoch": 0, "iter": 11510, "iter_tflops": 35.72501942581367, "iter_time": 0.5774970550537109, "loss": 0.7953581213951111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13805303840945, "step_time": 0.5271364288330078} +{"epoch": 0, "iter": 11511, "iter_tflops": 42.783554105613526, "iter_time": 0.48222018814086914, "loss": 1.1476547718048096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.669134662535974, "step_time": 0.4420714817047119} +{"epoch": 0, "iter": 11512, "iter_tflops": 33.89592021811843, "iter_time": 0.6086600799560546, "loss": 0.7377954125404358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.75925286392993, "step_time": 0.5612489891052247} +{"epoch": 0, "iter": 11513, "iter_tflops": 32.9854816111437, "iter_time": 0.6254598236083985, "loss": 0.8157435059547424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.293050223464405, "step_time": 0.568458517074585} +{"epoch": 0, "iter": 11514, "iter_tflops": 35.19489733820884, "iter_time": 0.5861955871582031, "loss": 0.6574978232383728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.36540978676086, "step_time": 0.5377524604797363} +{"epoch": 0, "iter": 11515, "iter_tflops": 37.38112698896249, "iter_time": 0.5519120254516602, "loss": 0.6307073831558228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.74382986802415, "step_time": 0.5063611736297607} +{"epoch": 0, "iter": 11516, "iter_tflops": 35.252877931487674, "iter_time": 0.5852314682006836, "loss": 0.7641995549201965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.145278170799685, "step_time": 0.5408557624816894} +{"epoch": 0, "iter": 11517, "iter_tflops": 18.540862235268413, "iter_time": 1.1127364654541017, "loss": 0.23043157160282135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.605527231882856, "step_time": 1.05231005859375} +{"epoch": 0, "iter": 11518, "iter_tflops": 20.81333372126368, "iter_time": 0.9912440643310547, "loss": 0.16501526534557343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.93357782043334, "step_time": 0.7955359516143797} +{"epoch": 0, "iter": 11519, "iter_tflops": 38.746812044679615, "iter_time": 0.5324591217041015, "loss": 0.14922966063022614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29662992067885, "step_time": 0.4877715682983398} +{"epoch": 0, "iter": 11520, "iter_tflops": 41.2690361211085, "iter_time": 0.49991701889038087, "loss": 0.12374308705329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20168290936355, "step_time": 0.4564231281280518} +{"epoch": 0, "iter": 11521, "iter_tflops": 18.519269206786912, "iter_time": 1.1140338897705078, "loss": 0.6550061702728271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.730457408718596, "step_time": 1.0456469955444334} +{"epoch": 0, "iter": 11522, "iter_tflops": 14.07098690078191, "iter_time": 1.4662151031494144, "loss": 0.6955275535583496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.519609999059707, "step_time": 1.1776000442504884} +{"epoch": 0, "iter": 11523, "iter_tflops": 39.68259555789091, "iter_time": 0.5199028244018554, "loss": 0.6436871290206909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60542350278172, "step_time": 0.48423632049560544} +{"epoch": 0, "iter": 11524, "iter_tflops": 44.74343867351359, "iter_time": 0.4610976295471191, "loss": 0.601790189743042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.29194871180966, "step_time": 0.42721600723266595} +{"epoch": 0, "iter": 11525, "iter_tflops": 24.741617277250523, "iter_time": 0.8338619613647461, "loss": 0.2941848039627075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.11113890627046, "step_time": 0.7901261444091798} +{"epoch": 0, "iter": 11526, "iter_tflops": 14.507603381244898, "iter_time": 1.4220883331298828, "loss": 0.2922106683254242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.36721359412981, "step_time": 1.1879334239959716} +{"epoch": 0, "iter": 11527, "iter_tflops": 37.59307512467887, "iter_time": 0.5488003692626953, "loss": 0.2487039715051651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.014561044731174, "step_time": 0.503018756866455} +{"epoch": 0, "iter": 11528, "iter_tflops": 39.28734681066333, "iter_time": 0.5251332855224609, "loss": 0.27800828218460083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04016068383675, "step_time": 0.47934517860412595} +{"epoch": 0, "iter": 11529, "iter_tflops": 23.052360855531646, "iter_time": 0.8949666213989258, "loss": 0.3730956017971039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.498967430584887, "step_time": 0.8421209411621093} +{"epoch": 0, "iter": 11530, "iter_tflops": 12.143864412376745, "iter_time": 1.6988903045654296, "loss": 0.4973065257072449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.996237565868359, "step_time": 1.3757513122558593} +{"epoch": 0, "iter": 11531, "iter_tflops": 47.28635674321662, "iter_time": 0.43630118560791015, "loss": 0.5086050033569336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32177707227203, "step_time": 0.4019949169158936} +{"epoch": 0, "iter": 11532, "iter_tflops": 47.83617708634571, "iter_time": 0.4312864189147949, "loss": 0.3991023898124695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99380630445388, "step_time": 0.39679906082153316} +{"epoch": 0, "iter": 11533, "iter_tflops": 25.346087721275943, "iter_time": 0.8139754638671874, "loss": 0.14649491012096405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.638694217120197, "step_time": 0.7744784088134765} +{"epoch": 0, "iter": 11534, "iter_tflops": 15.66068929004796, "iter_time": 1.317380935668945, "loss": 0.11056549102067947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.8546739342028, "step_time": 1.0942164039611815} +{"epoch": 0, "iter": 11535, "iter_tflops": 43.25642185898977, "iter_time": 0.47694868469238283, "loss": 0.11363052576780319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.64320673704467, "step_time": 0.4330332679748535} +{"epoch": 0, "iter": 11536, "iter_tflops": 52.49082267279455, "iter_time": 0.3930419158935547, "loss": 0.1109481006860733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.997898084751064, "step_time": 0.3619623565673828} +{"epoch": 0, "iter": 11537, "iter_tflops": 37.596221542332785, "iter_time": 0.5487544403076172, "loss": 0.0030052403453737497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85915518396885, "step_time": 0.5049319648742676} +{"epoch": 0, "iter": 11538, "iter_tflops": 9.94663212514493, "iter_time": 2.0741788024902346, "loss": 0.005667904857546091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.490682255633452, "step_time": 1.6517187042236328} +{"epoch": 0, "iter": 11539, "iter_tflops": 15.79731071569416, "iter_time": 1.3059877014160157, "loss": 0.010577342472970486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.609687858152203, "step_time": 1.1715763320922852} +{"epoch": 0, "iter": 11540, "iter_tflops": 33.173963570594125, "iter_time": 0.6219061965942383, "loss": 0.01642378605902195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43666775762156, "step_time": 0.49789460945129393} +{"epoch": 0, "iter": 11541, "iter_tflops": 16.29252110639544, "iter_time": 1.0583816986083985, "loss": 0.115510493516922, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 17.694409663456685, "step_time": 0.9745284805297852} +{"epoch": 0, "iter": 11542, "iter_tflops": 18.218661122360924, "iter_time": 0.9464859161376953, "loss": 0.20563411712646484, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 20.430946894343002, "step_time": 0.8439993629455566} +{"epoch": 0, "iter": 11543, "iter_tflops": 26.000766324259768, "iter_time": 0.6631999206542969, "loss": 0.22088170051574707, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 27.946451807593125, "step_time": 0.6170266723632812} +{"epoch": 0, "iter": 11544, "iter_tflops": 26.843038374369755, "iter_time": 0.642390251159668, "loss": 0.23077696561813354, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.84789732366438, "step_time": 0.5977456855773925} +{"epoch": 0, "iter": 11545, "iter_tflops": 19.24957739948627, "iter_time": 1.0717686462402343, "loss": 0.7495619058609009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.573410515321864, "step_time": 1.0028037643432617} +{"epoch": 0, "iter": 11546, "iter_tflops": 12.379342319385628, "iter_time": 1.6665742797851562, "loss": 0.9345579147338867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.701848890390131, "step_time": 1.3139276561737059} +{"epoch": 0, "iter": 11547, "iter_tflops": 32.62901300713051, "iter_time": 0.6322929077148438, "loss": 0.5641090869903564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.2656389895933, "step_time": 0.5850196990966797} +{"epoch": 0, "iter": 11548, "iter_tflops": 36.27599027288496, "iter_time": 0.5687258529663085, "loss": 0.7399507164955139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47716466811458, "step_time": 0.5226082897186279} +{"epoch": 0, "iter": 11549, "iter_tflops": 19.677681955495157, "iter_time": 1.0484514160156249, "loss": 0.7120527029037476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.96900351768973, "step_time": 0.9838852615356445} +{"epoch": 0, "iter": 11550, "iter_tflops": 13.453327293496729, "iter_time": 1.53353092956543, "loss": 0.5747596621513367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.775520795347587, "step_time": 1.3077915954589845} +{"epoch": 0, "iter": 11551, "iter_tflops": 42.71354081410667, "iter_time": 0.483010612487793, "loss": 0.762553870677948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.927048867938275, "step_time": 0.4492144393920899} +{"epoch": 0, "iter": 11552, "iter_tflops": 42.78332565464529, "iter_time": 0.48222276306152334, "loss": 0.5438511967658997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78335400650331, "step_time": 0.45062433624267584} +{"epoch": 0, "iter": 11553, "iter_tflops": 48.93043006105578, "iter_time": 0.42164136886596676, "loss": 0.04814310744404793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.7807444772571, "step_time": 0.3836148738861085} +{"epoch": 0, "iter": 11554, "iter_tflops": 37.31760419476035, "iter_time": 0.5528515014648437, "loss": 0.09497462958097458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20258352615503, "step_time": 0.4888585433959961} +{"epoch": 0, "iter": 11555, "iter_tflops": 40.72472854245202, "iter_time": 0.50659867477417, "loss": 0.08236686140298843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.80074723661397, "step_time": 0.4605077991485595} +{"epoch": 0, "iter": 11556, "iter_tflops": 41.440840255703414, "iter_time": 0.4978444786071778, "loss": 0.08912589401006699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22355564569886, "step_time": 0.4562023754119873} +{"epoch": 0, "iter": 11557, "iter_tflops": 22.693715612512808, "iter_time": 0.9091104278564452, "loss": 0.15946321189403534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.34295336586571, "step_time": 0.8475180969238283} +{"epoch": 0, "iter": 11558, "iter_tflops": 10.454769828132648, "iter_time": 1.9733665924072266, "loss": 0.11289241909980774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.9416631374216, "step_time": 1.7276566314697266} +{"epoch": 0, "iter": 11559, "iter_tflops": 22.372248871778396, "iter_time": 0.9221734313964843, "loss": 0.12638835608959198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.853149176367502, "step_time": 0.7407095470428466} +{"epoch": 0, "iter": 11560, "iter_tflops": 53.661726377840154, "iter_time": 0.3844657058715821, "loss": 0.12373807281255722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.40935366934357, "step_time": 0.353215576171875} +{"epoch": 0, "iter": 11561, "iter_tflops": 15.59351428228558, "iter_time": 0.9063067245483399, "loss": 0.3649878203868866, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 16.299064819643803, "step_time": 0.8670747070312499} +{"epoch": 0, "iter": 11562, "iter_tflops": 9.664115143061174, "iter_time": 1.4623694610595703, "loss": 0.19524134695529938, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 11.579875221258488, "step_time": 1.2204368858337402} +{"epoch": 0, "iter": 11563, "iter_tflops": 21.36574364377536, "iter_time": 0.6614563522338868, "loss": 0.2910998463630676, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 23.0345865288459, "step_time": 0.6135342102050781} +{"epoch": 0, "iter": 11564, "iter_tflops": 20.517243711145525, "iter_time": 0.6888111801147461, "loss": 0.20350944995880127, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 22.12506481090294, "step_time": 0.638755500793457} +{"epoch": 0, "iter": 11565, "iter_tflops": 18.74701974237613, "iter_time": 1.1004999084472655, "loss": 0.04972664266824722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24828715317452, "step_time": 1.018905616760254} +{"epoch": 0, "iter": 11566, "iter_tflops": 18.991671303363248, "iter_time": 1.0863232192993164, "loss": 0.0333460196852684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.0425067649252, "step_time": 0.7103757839202881} +{"epoch": 0, "iter": 11567, "iter_tflops": 42.353115806069624, "iter_time": 0.48712103271484375, "loss": 0.03352236747741699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92909671808362, "step_time": 0.4396226425170898} +{"epoch": 0, "iter": 11568, "iter_tflops": 46.67773319545675, "iter_time": 0.44199004745483395, "loss": 0.03941970691084862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36385617052642, "step_time": 0.40166558837890626} +{"epoch": 0, "iter": 11569, "iter_tflops": 28.851022007339246, "iter_time": 0.715090560913086, "loss": 0.17817597091197968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.136063631949792, "step_time": 0.6626108474731446} +{"epoch": 0, "iter": 11570, "iter_tflops": 8.344135089203307, "iter_time": 2.4725263061523437, "loss": 0.20757576823234558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.676592790644735, "step_time": 1.9323668060302734} +{"epoch": 0, "iter": 11571, "iter_tflops": 14.164621081020229, "iter_time": 1.4565227966308592, "loss": 0.34211817383766174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.16589340290645, "step_time": 1.2018654098510742} +{"epoch": 0, "iter": 11572, "iter_tflops": 38.04000325123902, "iter_time": 0.5423525695800782, "loss": 0.29188624024391174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59686642245779, "step_time": 0.49597710800170897} +{"epoch": 0, "iter": 11573, "iter_tflops": 14.018816654602823, "iter_time": 1.107154083251953, "loss": 0.16954609751701355, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 14.978147886122041, "step_time": 1.0362422790527344} +{"epoch": 0, "iter": 11574, "iter_tflops": 10.897754342811337, "iter_time": 1.4242374725341795, "loss": 0.23305943608283997, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 12.821114826909968, "step_time": 1.2105803833007813} +{"epoch": 0, "iter": 11575, "iter_tflops": 23.223830794546267, "iter_time": 0.6683217010498047, "loss": 0.29878726601600647, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 24.923629902735904, "step_time": 0.6227419586181641} +{"epoch": 0, "iter": 11576, "iter_tflops": 26.038784470194255, "iter_time": 0.5960719909667969, "loss": 0.2628142833709717, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 27.832321441464032, "step_time": 0.5576606369018554} +{"epoch": 0, "iter": 11577, "iter_tflops": 19.97670758441522, "iter_time": 1.0327574462890625, "loss": 0.2874343991279602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.77625851928276, "step_time": 0.947412223815918} +{"epoch": 0, "iter": 11578, "iter_tflops": 17.501874471247206, "iter_time": 1.1787933654785157, "loss": 0.3954821228981018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.287031891121384, "step_time": 1.0169596824645997} +{"epoch": 0, "iter": 11579, "iter_tflops": 40.31046822015192, "iter_time": 0.5118048591613769, "loss": 0.5250888466835022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34278451362716, "step_time": 0.4652638244628906} +{"epoch": 0, "iter": 11580, "iter_tflops": 36.4537943734578, "iter_time": 0.5659518814086913, "loss": 0.33993640542030334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72525236944463, "step_time": 0.5193445549011231} +{"epoch": 0, "iter": 11581, "iter_tflops": 20.654079903865146, "iter_time": 0.9988870773315429, "loss": 0.5043692588806152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.279528218811308, "step_time": 0.9260112380981446} +{"epoch": 0, "iter": 11582, "iter_tflops": 25.958205350795254, "iter_time": 0.7947811965942382, "loss": 0.5632581114768982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.46401901424278, "step_time": 0.7002131481170654} +{"epoch": 0, "iter": 11583, "iter_tflops": 38.67801674776552, "iter_time": 0.5334061889648438, "loss": 0.6035269498825073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32674475855305, "step_time": 0.48742452621459964} +{"epoch": 0, "iter": 11584, "iter_tflops": 37.52291640856045, "iter_time": 0.5498264923095703, "loss": 0.6147076487541199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.122565381693526, "step_time": 0.5016976280212402} +{"epoch": 0, "iter": 11585, "iter_tflops": 32.343915838051124, "iter_time": 0.6378662872314452, "loss": 0.5956312417984009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.29407388947995, "step_time": 0.5845483741760253} +{"epoch": 0, "iter": 11586, "iter_tflops": 35.93513786013038, "iter_time": 0.5741203384399415, "loss": 0.8084236979484558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.38553431076471, "step_time": 0.5238241367340087} +{"epoch": 0, "iter": 11587, "iter_tflops": 35.204861700877444, "iter_time": 0.586029670715332, "loss": 0.6695531606674194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.40602967347982, "step_time": 0.5371837100982666} +{"epoch": 0, "iter": 11588, "iter_tflops": 33.12470755517004, "iter_time": 0.6228309631347657, "loss": 0.6927745938301086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.10579377578312, "step_time": 0.5714067287445068} +{"epoch": 0, "iter": 11589, "iter_tflops": 17.288604034831295, "iter_time": 1.1933348388671874, "loss": 0.6737172603607178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.231481327329796, "step_time": 1.1316191558837891} +{"epoch": 0, "iter": 11590, "iter_tflops": 19.590754099081956, "iter_time": 1.0531035919189453, "loss": 0.8219826817512512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.682883124439503, "step_time": 0.8711394386291504} +{"epoch": 0, "iter": 11591, "iter_tflops": 39.11123952821995, "iter_time": 0.5274978179931641, "loss": 0.727846086025238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.87598171752368, "step_time": 0.49267127990722653} +{"epoch": 0, "iter": 11592, "iter_tflops": 43.23940784518136, "iter_time": 0.4771363563537598, "loss": 0.6138108968734741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.373740252542085, "step_time": 0.4448874168395996} +{"epoch": 0, "iter": 11593, "iter_tflops": 41.72622790624304, "iter_time": 0.4835051307678223, "loss": 0.28383946418762207, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 45.38267206942758, "step_time": 0.44454952430725103} +{"epoch": 0, "iter": 11594, "iter_tflops": 35.64673686930182, "iter_time": 0.5659661178588867, "loss": 0.19339655339717865, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 39.46527657881622, "step_time": 0.511204963684082} +{"epoch": 0, "iter": 11595, "iter_tflops": 40.38182484621077, "iter_time": 0.4996021194458008, "loss": 0.2381119579076767, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 44.37254417057727, "step_time": 0.454669563293457} +{"epoch": 0, "iter": 11596, "iter_tflops": 41.90375057652402, "iter_time": 0.4814567909240723, "loss": 0.2511475086212158, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 45.98302470701002, "step_time": 0.43874550247192384} +{"epoch": 0, "iter": 11597, "iter_tflops": 22.554608860696984, "iter_time": 0.914717414855957, "loss": 0.06850245594978333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.179341390187222, "step_time": 0.853252914428711} +{"epoch": 0, "iter": 11598, "iter_tflops": 24.788164505191272, "iter_time": 0.8322961349487306, "loss": 0.09336811304092407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.658074189001763, "step_time": 0.6516850452423096} +{"epoch": 0, "iter": 11599, "iter_tflops": 49.83883204983316, "iter_time": 0.41395619964599617, "loss": 0.062310390174388885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.96275588148239, "step_time": 0.3823209762573242} +{"epoch": 0, "iter": 11600, "iter_tflops": 54.83886540100819, "iter_time": 0.3762129898071289, "loss": 0.06136299669742584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.53647691030854, "step_time": 0.34652862548828123} +{"epoch": 0, "iter": 11601, "iter_tflops": 25.53583110944244, "iter_time": 0.8079272384643554, "loss": 0.3224560022354126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.913586674798218, "step_time": 0.7665679702758789} +{"epoch": 0, "iter": 11602, "iter_tflops": 12.387717762458305, "iter_time": 1.6654474945068358, "loss": 0.3979458212852478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.084210671456201, "step_time": 1.4648384628295898} +{"epoch": 0, "iter": 11603, "iter_tflops": 45.76660188767853, "iter_time": 0.45078927993774415, "loss": 0.35228803753852844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.377520510598664, "step_time": 0.4015587615966797} +{"epoch": 0, "iter": 11604, "iter_tflops": 45.60848472048308, "iter_time": 0.45235209274292, "loss": 0.2988719344139099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.32410450127625, "step_time": 0.41827608871459965} +{"epoch": 0, "iter": 11605, "iter_tflops": 25.91623163440273, "iter_time": 0.42008438491821287, "loss": 0.0017551154596731067, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 28.683498186688947, "step_time": 0.3795563621520996} +{"epoch": 0, "iter": 11606, "iter_tflops": 28.682213664379592, "iter_time": 0.3795733604431153, "loss": 0.005612295586615801, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 31.575506765120053, "step_time": 0.3447926998138428} +{"epoch": 0, "iter": 11607, "iter_tflops": 29.860619148842304, "iter_time": 0.36459405517578125, "loss": 0.005098520312458277, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 32.75040734364705, "step_time": 0.332423475265503} +{"epoch": 0, "iter": 11608, "iter_tflops": 31.92177595230737, "iter_time": 0.3410525856018066, "loss": 0.007812258787453175, "lr": 3e-05, "seqlen": 4384.0, "step_tflops": 34.89514070155711, "step_time": 0.3119919853210449} +{"epoch": 0, "iter": 11609, "iter_tflops": 29.87132828257957, "iter_time": 0.6906654205322265, "loss": 0.06268958747386932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.550460884307817, "step_time": 0.6539078330993653} +{"epoch": 0, "iter": 11610, "iter_tflops": 17.990321991377684, "iter_time": 1.1467884521484375, "loss": 0.1040882095694542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.252199867292425, "step_time": 0.8506895713806153} +{"epoch": 0, "iter": 11611, "iter_tflops": 46.05343147477563, "iter_time": 0.44798167800903316, "loss": 0.09688401967287064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78457614683355, "step_time": 0.4062472324371338} +{"epoch": 0, "iter": 11612, "iter_tflops": 43.920340722609545, "iter_time": 0.4697389221191407, "loss": 0.05168202146887779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.083222988292434, "step_time": 0.429070520401001} +{"epoch": 0, "iter": 11613, "iter_tflops": 1.9275403786509275, "iter_time": 0.8334657669067383, "loss": 0.9032673835754395, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.070684831814441, "step_time": 0.77584907913208} +{"epoch": 0, "iter": 11614, "iter_tflops": 0.7994212085548641, "iter_time": 2.009627593994141, "loss": 0.9699267745018005, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 0.9246829882082932, "step_time": 1.7373942642211913} +{"epoch": 0, "iter": 11615, "iter_tflops": 1.1355947133048816, "iter_time": 1.4147115173339844, "loss": 0.9359656572341919, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.3417772182272172, "step_time": 1.1973216552734376} +{"epoch": 0, "iter": 11616, "iter_tflops": 1.6276335962389028, "iter_time": 0.987039665222168, "loss": 0.8982986211776733, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.287684519285279, "step_time": 0.7022554492950439} +{"epoch": 0, "iter": 11617, "iter_tflops": 15.135249972658515, "iter_time": 1.0308938064575195, "loss": 0.13825394213199615, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.268023638154403, "step_time": 0.9591106948852539} +{"epoch": 0, "iter": 11618, "iter_tflops": 10.087820884563847, "iter_time": 1.5467002868652344, "loss": 0.29396361112594604, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 13.33477278445394, "step_time": 1.1700863380432127} +{"epoch": 0, "iter": 11619, "iter_tflops": 29.189391512607223, "iter_time": 0.5345378799438476, "loss": 0.20338253676891327, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 31.049160609901612, "step_time": 0.5025203628540039} +{"epoch": 0, "iter": 11620, "iter_tflops": 29.2153531358646, "iter_time": 0.534062873840332, "loss": 0.15549366176128387, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 31.008326401595617, "step_time": 0.5031821212768555} +{"epoch": 0, "iter": 11621, "iter_tflops": 31.766009523529778, "iter_time": 0.6494707336425781, "loss": 0.3964332938194275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.92272029300178, "step_time": 0.6081792182922364} +{"epoch": 0, "iter": 11622, "iter_tflops": 10.21614866060624, "iter_time": 2.019459014892578, "loss": 0.21694187819957733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.60893144763911, "step_time": 1.4122246780395507} +{"epoch": 0, "iter": 11623, "iter_tflops": 16.69975660521279, "iter_time": 1.2354128265380862, "loss": 0.29312506318092346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.876349741040258, "step_time": 1.0379719505310059} +{"epoch": 0, "iter": 11624, "iter_tflops": 42.89020287773352, "iter_time": 0.48102112197875974, "loss": 0.21284332871437073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07365338459014, "step_time": 0.44778505706787114} +{"epoch": 0, "iter": 11625, "iter_tflops": 15.799339559023672, "iter_time": 1.0706098175048828, "loss": 0.36181795597076416, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 16.469086015238073, "step_time": 1.0270714492797852} +{"epoch": 0, "iter": 11626, "iter_tflops": 9.937870635976777, "iter_time": 1.702067642211914, "loss": 0.1693783551454544, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 13.336792773859324, "step_time": 1.2682905349731444} +{"epoch": 0, "iter": 11627, "iter_tflops": 31.041987136594877, "iter_time": 0.5449048080444336, "loss": 0.16770458221435547, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 33.09506222757406, "step_time": 0.5111012611389161} +{"epoch": 0, "iter": 11628, "iter_tflops": 30.880175465998416, "iter_time": 0.5477601013183594, "loss": 0.3013077676296234, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.810319560577796, "step_time": 0.5155368270874023} +{"epoch": 0, "iter": 11629, "iter_tflops": 20.659203436890724, "iter_time": 0.9986393508911133, "loss": 0.38854414224624634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.57492624773482, "step_time": 0.9562532577514649} +{"epoch": 0, "iter": 11630, "iter_tflops": 14.167697005206321, "iter_time": 1.456206573486328, "loss": 0.5751526951789856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.295577702560994, "step_time": 1.1276546630859374} +{"epoch": 0, "iter": 11631, "iter_tflops": 39.79747055648015, "iter_time": 0.5184021301269531, "loss": 0.497467577457428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.730905962441526, "step_time": 0.47177375030517577} +{"epoch": 0, "iter": 11632, "iter_tflops": 37.40602720926268, "iter_time": 0.5515446319580077, "loss": 0.3194653391838074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.819581295625646, "step_time": 0.5054214878082275} +{"epoch": 0, "iter": 11633, "iter_tflops": 35.272540805699066, "iter_time": 0.5849052276611328, "loss": 0.09560218453407288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29891005421697, "step_time": 0.5249787712097168} +{"epoch": 0, "iter": 11634, "iter_tflops": 36.03497506891978, "iter_time": 0.5725297012329102, "loss": 0.08331496268510818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.75095574563161, "step_time": 0.5190087413787842} +{"epoch": 0, "iter": 11635, "iter_tflops": 42.931396274051245, "iter_time": 0.48055957412719724, "loss": 0.10689138621091843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27559019001126, "step_time": 0.43640054893493646} +{"epoch": 0, "iter": 11636, "iter_tflops": 41.50220969509922, "iter_time": 0.49710831451416015, "loss": 0.13024860620498657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68010916131052, "step_time": 0.45164282417297363} +{"epoch": 0, "iter": 11637, "iter_tflops": 32.70821239850808, "iter_time": 0.6307618789672851, "loss": 0.6302962303161621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.781486795908926, "step_time": 0.5765856971740723} +{"epoch": 0, "iter": 11638, "iter_tflops": 34.17579201208715, "iter_time": 0.603675651550293, "loss": 0.5371365547180176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.47113670394506, "step_time": 0.5505862731933594} +{"epoch": 0, "iter": 11639, "iter_tflops": 40.91555511477475, "iter_time": 0.5042359428405763, "loss": 0.5721187591552734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.626760536039505, "step_time": 0.4623031845092774} +{"epoch": 0, "iter": 11640, "iter_tflops": 39.747843083042724, "iter_time": 0.5190493850708008, "loss": 0.6722397208213806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.262312289074885, "step_time": 0.4768837451934814} +{"epoch": 0, "iter": 11641, "iter_tflops": 18.975037659531623, "iter_time": 1.0872754974365233, "loss": 0.13422748446464539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.0955062297251, "step_time": 1.0266520919799806} +{"epoch": 0, "iter": 11642, "iter_tflops": 30.9197111340182, "iter_time": 0.6672472915649414, "loss": 0.15214964747428894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97612150860149, "step_time": 0.44873497009277347} +{"epoch": 0, "iter": 11643, "iter_tflops": 50.21033607054457, "iter_time": 0.41089335632324214, "loss": 0.12340939044952393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.43384449034617, "step_time": 0.37901224327087407} +{"epoch": 0, "iter": 11644, "iter_tflops": 51.141044322274325, "iter_time": 0.4034155693054199, "loss": 0.19504714012145996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49273217535332, "step_time": 0.3717800998687744} +{"epoch": 0, "iter": 11645, "iter_tflops": 21.861436622960632, "iter_time": 0.9437208480834961, "loss": 0.29808396100997925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.914241801528533, "step_time": 0.9003611679077148} +{"epoch": 0, "iter": 11646, "iter_tflops": 14.491626638728313, "iter_time": 1.4236561584472656, "loss": 0.19854043424129486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.551431310800215, "step_time": 1.1121025199890135} +{"epoch": 0, "iter": 11647, "iter_tflops": 38.621498626183595, "iter_time": 0.534186767578125, "loss": 0.30378246307373047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48314072564622, "step_time": 0.48563013839721675} +{"epoch": 0, "iter": 11648, "iter_tflops": 45.46356889342889, "iter_time": 0.4537939720153808, "loss": 0.16360591351985931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.5617608836064, "step_time": 0.4162703895568848} +{"epoch": 0, "iter": 11649, "iter_tflops": 17.974008427438182, "iter_time": 1.1478292999267576, "loss": 0.5725382566452026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.085114088383857, "step_time": 1.081004463195801} +{"epoch": 0, "iter": 11650, "iter_tflops": 22.08329544641155, "iter_time": 0.9342397994995117, "loss": 0.6188212037086487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.593798688893635, "step_time": 0.8388738059997558} +{"epoch": 0, "iter": 11651, "iter_tflops": 36.921237934165454, "iter_time": 0.5587866134643554, "loss": 0.5837098360061646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.07591077016831, "step_time": 0.514800365447998} +{"epoch": 0, "iter": 11652, "iter_tflops": 35.49840769397531, "iter_time": 0.5811836318969726, "loss": 0.749886691570282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.542641007301214, "step_time": 0.5352797050476074} +{"epoch": 0, "iter": 11653, "iter_tflops": 16.268024524398136, "iter_time": 1.2681990661621094, "loss": 0.023434508591890335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.524247469675974, "step_time": 1.1772884140014648} +{"epoch": 0, "iter": 11654, "iter_tflops": 15.718462221954862, "iter_time": 1.3125389251708985, "loss": 0.07912170141935349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.98927484323415, "step_time": 0.8255979270935059} +{"epoch": 0, "iter": 11655, "iter_tflops": 43.75012171230671, "iter_time": 0.4715665397644043, "loss": 0.05022178962826729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19391495498626, "step_time": 0.42808502960205075} +{"epoch": 0, "iter": 11656, "iter_tflops": 48.82489691354564, "iter_time": 0.42255273056030274, "loss": 0.03211542218923569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.75531526544955, "step_time": 0.38379634475708013} +{"epoch": 0, "iter": 11657, "iter_tflops": 30.617781761396447, "iter_time": 0.673827178955078, "loss": 0.4268319606781006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.437160983657805, "step_time": 0.6170109214782715} +{"epoch": 0, "iter": 11658, "iter_tflops": 9.812816303455538, "iter_time": 2.102464050292969, "loss": 0.44561851024627686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.090385754959852, "step_time": 1.8602683410644534} +{"epoch": 0, "iter": 11659, "iter_tflops": 11.016529775148696, "iter_time": 1.8727397766113278, "loss": 0.28111064434051514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.732082124415292, "step_time": 1.5024009704589845} +{"epoch": 0, "iter": 11660, "iter_tflops": 34.25942703940673, "iter_time": 0.6022019424438476, "loss": 0.32563912868499756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.27639603178825, "step_time": 0.5534626655578613} +{"epoch": 0, "iter": 11661, "iter_tflops": 13.044897647269321, "iter_time": 1.1396721649169923, "loss": 0.19062462449073792, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 13.970383455365637, "step_time": 1.0641731338500977} +{"epoch": 0, "iter": 11662, "iter_tflops": 20.091930838135596, "iter_time": 0.7399441528320313, "loss": 0.27278265357017517, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 21.946005676578654, "step_time": 0.6774310989379883} +{"epoch": 0, "iter": 11663, "iter_tflops": 23.332188464187997, "iter_time": 0.6371844100952149, "loss": 0.23660290241241455, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.07469305612935, "step_time": 0.5929048347473145} +{"epoch": 0, "iter": 11664, "iter_tflops": 22.859171029591565, "iter_time": 0.6503694610595703, "loss": 0.2588278651237488, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.546821044942362, "step_time": 0.6056550750732421} +{"epoch": 0, "iter": 11665, "iter_tflops": 19.02167401393106, "iter_time": 1.0846097717285157, "loss": 0.3944067060947418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.280790632636506, "step_time": 1.0172726440429687} +{"epoch": 0, "iter": 11666, "iter_tflops": 9.406211819722593, "iter_time": 2.1933477478027346, "loss": 0.1930903196334839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.987649015538832, "step_time": 1.7210291595458982} +{"epoch": 0, "iter": 11667, "iter_tflops": 11.83310252069061, "iter_time": 1.7435066986083987, "loss": 0.18717873096466064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.493049214357729, "step_time": 1.4235164184570315} +{"epoch": 0, "iter": 11668, "iter_tflops": 38.5191526751149, "iter_time": 0.5356061096191407, "loss": 0.22638344764709473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.183807401155306, "step_time": 0.489076135635376} +{"epoch": 0, "iter": 11669, "iter_tflops": 17.70973115478163, "iter_time": 0.8233397216796875, "loss": 0.30997952818870544, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 19.130011252614867, "step_time": 0.7622120513916015} +{"epoch": 0, "iter": 11670, "iter_tflops": 25.02512961387684, "iter_time": 0.5826593246459961, "loss": 0.16432024538516998, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 26.788414185812254, "step_time": 0.5443071403503418} +{"epoch": 0, "iter": 11671, "iter_tflops": 25.737577595487455, "iter_time": 0.5665305938720703, "loss": 0.26351282000541687, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.47867726907808, "step_time": 0.5306341705322266} +{"epoch": 0, "iter": 11672, "iter_tflops": 24.39204704281117, "iter_time": 0.5977819366455079, "loss": 0.4526081383228302, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 25.868255201047305, "step_time": 0.5636686744689942} +{"epoch": 0, "iter": 11673, "iter_tflops": 23.777836006541484, "iter_time": 0.5242451095581054, "loss": 0.0060565657913684845, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 25.7808407340718, "step_time": 0.4835146522521972} +{"epoch": 0, "iter": 11674, "iter_tflops": 8.336026481783305, "iter_time": 1.495366439819336, "loss": 0.010158811695873737, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 10.540411653456955, "step_time": 1.1826306838989258} +{"epoch": 0, "iter": 11675, "iter_tflops": 6.168362626122138, "iter_time": 2.020862747192383, "loss": 0.004259004257619381, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 7.138210454867651, "step_time": 1.7462940216064455} +{"epoch": 0, "iter": 11676, "iter_tflops": 10.727678752488991, "iter_time": 1.1619861602783204, "loss": 0.004881844390183687, "lr": 3e-05, "seqlen": 5008.0, "step_tflops": 14.001273251989721, "step_time": 0.8903057613372802} +{"epoch": 0, "iter": 11677, "iter_tflops": 11.980899263860739, "iter_time": 1.2102193145751954, "loss": 0.4091574251651764, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 12.755981948598217, "step_time": 1.1366836166381835} +{"epoch": 0, "iter": 11678, "iter_tflops": 9.284289939063521, "iter_time": 1.5617258605957032, "loss": 0.2792202830314636, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 10.806777825710492, "step_time": 1.3417057266235353} +{"epoch": 0, "iter": 11679, "iter_tflops": 22.492025263764123, "iter_time": 0.6446514053344726, "loss": 0.2999848425388336, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.243919176395735, "step_time": 0.5980681419372559} +{"epoch": 0, "iter": 11680, "iter_tflops": 23.284102087869307, "iter_time": 0.6227217025756836, "loss": 0.1913497895002365, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.987068795603477, "step_time": 0.580280776977539} +{"epoch": 0, "iter": 11681, "iter_tflops": 29.065663289725663, "iter_time": 0.7098098297119142, "loss": 0.0013395885471254587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.591238786451434, "step_time": 0.6530637702941895} +{"epoch": 0, "iter": 11682, "iter_tflops": 7.972871186877023, "iter_time": 2.5876617126464847, "loss": 0.004468181170523167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.0382898138248, "step_time": 2.055239875793457} +{"epoch": 0, "iter": 11683, "iter_tflops": 11.82715749884642, "iter_time": 1.7443830871582031, "loss": 0.005513500422239304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.300866981287703, "step_time": 1.4426463470458983} +{"epoch": 0, "iter": 11684, "iter_tflops": 41.88817023818155, "iter_time": 0.49252792358398434, "loss": 0.008737095631659031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44673129176815, "step_time": 0.4172387733459472} +{"epoch": 0, "iter": 11685, "iter_tflops": 16.47274703185939, "iter_time": 0.9397384948730468, "loss": 0.42591971158981323, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 17.419525014994303, "step_time": 0.8886622619628907} +{"epoch": 0, "iter": 11686, "iter_tflops": 10.602332344824367, "iter_time": 1.4600631256103516, "loss": 0.24607089161872864, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 12.682867027708081, "step_time": 1.2205500907897948} +{"epoch": 0, "iter": 11687, "iter_tflops": 21.889089387180352, "iter_time": 0.7072050476074219, "loss": 0.28026601672172546, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 23.582723337720893, "step_time": 0.6564158973693849} +{"epoch": 0, "iter": 11688, "iter_tflops": 26.56077781763, "iter_time": 0.5828170623779296, "loss": 0.12924297153949738, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.396986465387148, "step_time": 0.5451308898925782} +{"epoch": 0, "iter": 11689, "iter_tflops": 24.079599640323142, "iter_time": 0.8567872314453125, "loss": 0.029693584889173508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.724961612686123, "step_time": 0.8019873390197753} +{"epoch": 0, "iter": 11690, "iter_tflops": 23.139794136045925, "iter_time": 0.8915850067138671, "loss": 0.03266672044992447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.088188751373746, "step_time": 0.7345113525390624} +{"epoch": 0, "iter": 11691, "iter_tflops": 50.27736058457231, "iter_time": 0.4103455963134766, "loss": 0.02712791971862316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.04399617296903, "step_time": 0.3748109683990479} +{"epoch": 0, "iter": 11692, "iter_tflops": 47.8519104357812, "iter_time": 0.4311446151733398, "loss": 0.05449811741709709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.792271103788174, "step_time": 0.39834309387207034} +{"epoch": 0, "iter": 11693, "iter_tflops": 36.564707310834216, "iter_time": 0.5642351608276367, "loss": 0.5691148638725281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29204379843299, "step_time": 0.5250705108642579} +{"epoch": 0, "iter": 11694, "iter_tflops": 12.735484378116286, "iter_time": 1.6199692840576172, "loss": 0.49262532591819763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.634224014734967, "step_time": 1.319610969543457} +{"epoch": 0, "iter": 11695, "iter_tflops": 36.90254614347689, "iter_time": 0.5590696487426757, "loss": 0.6438993811607361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.325195693208606, "step_time": 0.5116179389953613} +{"epoch": 0, "iter": 11696, "iter_tflops": 37.0634266255076, "iter_time": 0.5566429061889648, "loss": 0.48195090889930725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.285636236728, "step_time": 0.5121203346252442} +{"epoch": 0, "iter": 11697, "iter_tflops": 17.749328735767293, "iter_time": 1.1623590850830077, "loss": 0.17856436967849731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.02442620105781, "step_time": 1.0844528656005858} +{"epoch": 0, "iter": 11698, "iter_tflops": 23.20109341916521, "iter_time": 0.8892293624877929, "loss": 0.26663798093795776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.558967601328526, "step_time": 0.7224033374786376} +{"epoch": 0, "iter": 11699, "iter_tflops": 46.48173690891324, "iter_time": 0.44385375595092774, "loss": 0.23968100547790527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.456120653499504, "step_time": 0.40889179039001466} +{"epoch": 0, "iter": 11700, "iter_tflops": 47.86466028797038, "iter_time": 0.43102976989746095, "loss": 0.39793407917022705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03788781663286, "step_time": 0.39646293067932126} +{"epoch": 0, "iter": 11701, "iter_tflops": 28.118384058527184, "iter_time": 0.7337225875854492, "loss": 0.6001654267311096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.801746736811744, "step_time": 0.6922779960632324} +{"epoch": 0, "iter": 11702, "iter_tflops": 11.5347114191028, "iter_time": 1.7886094207763672, "loss": 0.6469513773918152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.06670422853605, "step_time": 1.4666614990234375} +{"epoch": 0, "iter": 11703, "iter_tflops": 15.642682134035939, "iter_time": 1.318897445678711, "loss": 0.6128511428833008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.145749505029546, "step_time": 1.1369656295776367} +{"epoch": 0, "iter": 11704, "iter_tflops": 26.99147208930247, "iter_time": 0.7643559951782226, "loss": 0.5569871068000793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.12194636850771, "step_time": 0.622882884979248} +{"epoch": 0, "iter": 11705, "iter_tflops": 21.983624736297802, "iter_time": 0.7023029098510741, "loss": 0.19167868793010712, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.965335710012138, "step_time": 0.6442289733886719} +{"epoch": 0, "iter": 11706, "iter_tflops": 23.45027294480863, "iter_time": 0.6583788452148437, "loss": 0.1410597860813141, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.30605884977005, "step_time": 0.6100975151062011} +{"epoch": 0, "iter": 11707, "iter_tflops": 23.295177519366042, "iter_time": 0.6627622222900391, "loss": 0.33047598600387573, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.0493500583948, "step_time": 0.6163498687744141} +{"epoch": 0, "iter": 11708, "iter_tflops": 22.022403795333705, "iter_time": 0.7010662307739258, "loss": 0.3178481459617615, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.66846514703208, "step_time": 0.652309455871582} +{"epoch": 0, "iter": 11709, "iter_tflops": 24.712316835947483, "iter_time": 0.8348506393432618, "loss": 0.6430612206459045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.393862753990476, "step_time": 0.7816625289916992} +{"epoch": 0, "iter": 11710, "iter_tflops": 18.214158646934443, "iter_time": 1.1326953887939453, "loss": 0.5373958945274353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.47892650854881, "step_time": 0.8787068481445313} +{"epoch": 0, "iter": 11711, "iter_tflops": 39.0574788727413, "iter_time": 0.528223892211914, "loss": 0.7338080406188965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.58147948835506, "step_time": 0.4845086116790771} +{"epoch": 0, "iter": 11712, "iter_tflops": 39.782232255162214, "iter_time": 0.5186007003784179, "loss": 0.7916689515113831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99928162596177, "step_time": 0.4798008880615235} +{"epoch": 0, "iter": 11713, "iter_tflops": 26.326066768017977, "iter_time": 0.7836754989624024, "loss": 0.8802315592765808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.012836321218156, "step_time": 0.7364871330261231} +{"epoch": 0, "iter": 11714, "iter_tflops": 12.043964019956583, "iter_time": 1.7129819946289064, "loss": 0.6883988976478577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.610366649308483, "step_time": 1.4120859527587892} +{"epoch": 0, "iter": 11715, "iter_tflops": 17.186274944799294, "iter_time": 1.2004400939941406, "loss": 0.7349302172660828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.11023516663509, "step_time": 1.0259001617431642} +{"epoch": 0, "iter": 11716, "iter_tflops": 14.793315151834546, "iter_time": 1.3946227264404294, "loss": 0.5859745144844055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.12351143834954, "step_time": 0.9766886329650879} +{"epoch": 0, "iter": 11717, "iter_tflops": 17.3596353530582, "iter_time": 0.9460064544677734, "loss": 0.24109138548374176, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 18.19366203835918, "step_time": 0.9026399993896485} +{"epoch": 0, "iter": 11718, "iter_tflops": 10.071198084296457, "iter_time": 1.6306229858398436, "loss": 0.20495042204856873, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 12.988064790076134, "step_time": 1.2644167823791503} +{"epoch": 0, "iter": 11719, "iter_tflops": 25.226989181212353, "iter_time": 0.6509824447631836, "loss": 0.23126080632209778, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.081533345810715, "step_time": 0.606403148651123} +{"epoch": 0, "iter": 11720, "iter_tflops": 25.556361982839864, "iter_time": 0.6425925216674805, "loss": 0.3204851746559143, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 27.44200556767752, "step_time": 0.5984375686645508} +{"epoch": 0, "iter": 11721, "iter_tflops": 19.88950065868039, "iter_time": 1.03728564453125, "loss": 0.8350378274917603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.441252627878825, "step_time": 0.9622149353027344} +{"epoch": 0, "iter": 11722, "iter_tflops": 16.28755170760257, "iter_time": 1.2666786193847657, "loss": 0.7637540102005005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.46433173434195, "step_time": 1.05994358253479} +{"epoch": 0, "iter": 11723, "iter_tflops": 43.16787142197671, "iter_time": 0.47792705154418946, "loss": 0.6679884195327759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62351911285676, "step_time": 0.4425039958953858} +{"epoch": 0, "iter": 11724, "iter_tflops": 42.9885766002315, "iter_time": 0.47992036819458006, "loss": 0.5464895367622375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.138296393673166, "step_time": 0.447157678604126} +{"epoch": 0, "iter": 11725, "iter_tflops": 35.14478272540862, "iter_time": 0.5870314712524414, "loss": 0.3606869876384735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.61758222966226, "step_time": 0.5484428367614747} +{"epoch": 0, "iter": 11726, "iter_tflops": 49.20512929661502, "iter_time": 0.4192874565124512, "loss": 0.3779720067977905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86737326317053, "step_time": 0.3829979496002197} +{"epoch": 0, "iter": 11727, "iter_tflops": 49.418423036471424, "iter_time": 0.41747777938842767, "loss": 0.2695782482624054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.411952705140266, "step_time": 0.38626360702514645} +{"epoch": 0, "iter": 11728, "iter_tflops": 48.53220829500646, "iter_time": 0.4251010665893554, "loss": 0.3844761550426483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.461446200206474, "step_time": 0.39326200485229496} +{"epoch": 0, "iter": 11729, "iter_tflops": 34.020896594553285, "iter_time": 0.606424156188965, "loss": 0.03468490391969681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.276793487356116, "step_time": 0.5687132606506348} +{"epoch": 0, "iter": 11730, "iter_tflops": 19.118133179179303, "iter_time": 1.079137451171875, "loss": 0.02313101477921009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.723882908897146, "step_time": 0.9496964054107666} +{"epoch": 0, "iter": 11731, "iter_tflops": 57.23177349453889, "iter_time": 0.3604832115173339, "loss": 0.029420189559459686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.825371672634624, "step_time": 0.3283879261016846} +{"epoch": 0, "iter": 11732, "iter_tflops": 56.74992421499924, "iter_time": 0.36354398345947264, "loss": 0.023114344105124474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.680524672968176, "step_time": 0.33448310661315916} +{"epoch": 0, "iter": 11733, "iter_tflops": 33.10405645003056, "iter_time": 0.5869216918945311, "loss": 0.08881650865077972, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 35.542340274617295, "step_time": 0.5466575546264648} +{"epoch": 0, "iter": 11734, "iter_tflops": 44.45159943482598, "iter_time": 0.43709313201904293, "loss": 0.11722689121961594, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 48.77875427682454, "step_time": 0.3983186759948731} +{"epoch": 0, "iter": 11735, "iter_tflops": 48.07971392122876, "iter_time": 0.4041099090576172, "loss": 0.11529144644737244, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 52.44489239558605, "step_time": 0.3704743766784668} +{"epoch": 0, "iter": 11736, "iter_tflops": 47.83404014164439, "iter_time": 0.4061854019165039, "loss": 0.1442526876926422, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 51.80964649004544, "step_time": 0.37501681900024414} +{"epoch": 0, "iter": 11737, "iter_tflops": 27.12812805564499, "iter_time": 0.7605056076049804, "loss": 0.5043292045593262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.615672959084893, "step_time": 0.7209718093872071} +{"epoch": 0, "iter": 11738, "iter_tflops": 16.090545041067788, "iter_time": 1.2821873626708984, "loss": 0.39781299233436584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.475719539056616, "step_time": 1.0593238143920898} +{"epoch": 0, "iter": 11739, "iter_tflops": 46.308382552566485, "iter_time": 0.4455153121948242, "loss": 0.4632035195827484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.297038309508906, "step_time": 0.4101850566864013} +{"epoch": 0, "iter": 11740, "iter_tflops": 47.62573009357291, "iter_time": 0.4331921730041504, "loss": 0.4910021424293518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4737434867842, "step_time": 0.40080810356140134} +{"epoch": 0, "iter": 11741, "iter_tflops": 30.837792189740497, "iter_time": 0.6690197982788086, "loss": 0.5712646842002869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.861613817551124, "step_time": 0.6278174171447753} +{"epoch": 0, "iter": 11742, "iter_tflops": 13.348919298885978, "iter_time": 1.545525375366211, "loss": 0.7710208296775818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.194740971644897, "step_time": 1.2739378509521484} +{"epoch": 0, "iter": 11743, "iter_tflops": 34.49982449450447, "iter_time": 0.5980057525634765, "loss": 0.5366398692131042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.5661766043628, "step_time": 0.5491933269500732} +{"epoch": 0, "iter": 11744, "iter_tflops": 36.0570395366451, "iter_time": 0.5721793518066407, "loss": 0.6587511301040649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.19941642338573, "step_time": 0.5263112411499024} +{"epoch": 0, "iter": 11745, "iter_tflops": 21.31172552500507, "iter_time": 0.9680630264282226, "loss": 0.3068127930164337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.89879155807678, "step_time": 0.9009686584472656} +{"epoch": 0, "iter": 11746, "iter_tflops": 14.193041756090157, "iter_time": 1.453606201171875, "loss": 0.28653356432914734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.366947074938125, "step_time": 1.1879516544342041} +{"epoch": 0, "iter": 11747, "iter_tflops": 39.664938486159635, "iter_time": 0.5201342620849609, "loss": 0.17403154075145721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17834327843175, "step_time": 0.47781114196777347} +{"epoch": 0, "iter": 11748, "iter_tflops": 36.548795544054954, "iter_time": 0.5644808044433594, "loss": 0.24102742969989777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.971376003920746, "step_time": 0.516146692276001} +{"epoch": 0, "iter": 11749, "iter_tflops": 16.064157624345537, "iter_time": 1.2842935180664061, "loss": 0.02368585206568241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.98593762829218, "step_time": 1.2145984497070312} +{"epoch": 0, "iter": 11750, "iter_tflops": 28.472152836892388, "iter_time": 0.7246060256958007, "loss": 0.09193427860736847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.157530309991266, "step_time": 0.5868186225891113} +{"epoch": 0, "iter": 11751, "iter_tflops": 44.06991164764594, "iter_time": 0.46814465332031246, "loss": 0.037339940667152405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.5273704811303, "step_time": 0.4251434459686279} +{"epoch": 0, "iter": 11752, "iter_tflops": 41.14879708211274, "iter_time": 0.5013778038024903, "loss": 0.03114951215684414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.23420863714418, "step_time": 0.4560949363708496} +{"epoch": 0, "iter": 11753, "iter_tflops": 19.051911132457736, "iter_time": 1.0828883972167969, "loss": 0.03589271008968353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.56280210868587, "step_time": 1.0033211135864257} +{"epoch": 0, "iter": 11754, "iter_tflops": 17.43809493648202, "iter_time": 1.1831047821044922, "loss": 0.0266573466360569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.392495559070714, "step_time": 0.9644079837799073} +{"epoch": 0, "iter": 11755, "iter_tflops": 36.5239171163952, "iter_time": 0.5648653030395507, "loss": 0.02395526133477688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99444087061835, "step_time": 0.5158490295410156} +{"epoch": 0, "iter": 11756, "iter_tflops": 40.79606846982294, "iter_time": 0.5057127876281738, "loss": 0.05645903944969177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92118088730531, "step_time": 0.45927317810058593} +{"epoch": 0, "iter": 11757, "iter_tflops": 18.11726591609306, "iter_time": 1.138753143310547, "loss": 0.6599865555763245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.203977425640268, "step_time": 1.0743135681152343} +{"epoch": 0, "iter": 11758, "iter_tflops": 14.525852892350859, "iter_time": 1.4203016967773436, "loss": 0.7252651453018188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.207595968895873, "step_time": 1.0209573440551758} +{"epoch": 0, "iter": 11759, "iter_tflops": 37.73536156667128, "iter_time": 0.5467310409545898, "loss": 0.8062707781791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.05770837454112, "step_time": 0.5024901371002197} +{"epoch": 0, "iter": 11760, "iter_tflops": 38.21199401222663, "iter_time": 0.5399114608764648, "loss": 0.6861967444419861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.244214106339115, "step_time": 0.5002178840637206} +{"epoch": 0, "iter": 11761, "iter_tflops": 31.2565469296573, "iter_time": 0.6600567092895507, "loss": 0.051631517708301544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.52250970366422, "step_time": 0.5976127948760986} +{"epoch": 0, "iter": 11762, "iter_tflops": 43.53353225994532, "iter_time": 0.47391269302368166, "loss": 0.05296594277024269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.782347320783124, "step_time": 0.42292129516601557} +{"epoch": 0, "iter": 11763, "iter_tflops": 54.285924642601834, "iter_time": 0.38004498672485343, "loss": 0.04533378779888153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.45413584725867, "step_time": 0.34700855064392083} +{"epoch": 0, "iter": 11764, "iter_tflops": 58.28805355378307, "iter_time": 0.35395063400268556, "loss": 0.03545252978801727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.57677395222918, "step_time": 0.32450676918029786} +{"epoch": 0, "iter": 11765, "iter_tflops": 48.4161453026776, "iter_time": 0.42612011718750004, "loss": 0.20903447270393372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.01793399523396, "step_time": 0.389134241104126} +{"epoch": 0, "iter": 11766, "iter_tflops": 34.472246894019236, "iter_time": 0.5984841537475586, "loss": 0.16026152670383453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82495663305123, "step_time": 0.5454360122680664} +{"epoch": 0, "iter": 11767, "iter_tflops": 35.18633268887304, "iter_time": 0.5863382720947266, "loss": 0.2056678980588913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.305328753389695, "step_time": 0.5385959129333496} +{"epoch": 0, "iter": 11768, "iter_tflops": 38.06730471216712, "iter_time": 0.5419636001586914, "loss": 0.18032370507717133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6944168405893, "step_time": 0.4948166942596436} +{"epoch": 0, "iter": 11769, "iter_tflops": 11.552757954825848, "iter_time": 1.1915665740966797, "loss": 0.012582292780280113, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 12.393395617056054, "step_time": 1.1107432250976563} +{"epoch": 0, "iter": 11770, "iter_tflops": 11.35588463581279, "iter_time": 1.212224380493164, "loss": 0.0062709953635931015, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 13.741890463715258, "step_time": 1.001745738983154} +{"epoch": 0, "iter": 11771, "iter_tflops": 37.327821741367124, "iter_time": 0.3687833786010742, "loss": 0.00543404184281826, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 41.10762748806248, "step_time": 0.3348741111755371} +{"epoch": 0, "iter": 11772, "iter_tflops": 38.51587936907815, "iter_time": 0.357407917022705, "loss": 0.007207301910966635, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 42.36494428314772, "step_time": 0.3249356384277344} +{"epoch": 0, "iter": 11773, "iter_tflops": 32.08280751754299, "iter_time": 0.6430576095581054, "loss": 0.22855770587921143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23231352738151, "step_time": 0.602678913116455} +{"epoch": 0, "iter": 11774, "iter_tflops": 36.58680669176715, "iter_time": 0.5638943481445312, "loss": 0.262078195810318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.249503943365234, "step_time": 0.5001537361145021} +{"epoch": 0, "iter": 11775, "iter_tflops": 47.06161965105501, "iter_time": 0.4383846893310547, "loss": 0.25048595666885376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8632234929912, "step_time": 0.4056190719604492} +{"epoch": 0, "iter": 11776, "iter_tflops": 46.11097964029997, "iter_time": 0.4474225807189941, "loss": 0.2905272841453552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.53908434724207, "step_time": 0.4164609375} +{"epoch": 0, "iter": 11777, "iter_tflops": 30.213568294072548, "iter_time": 0.6526593551635742, "loss": 0.14062373340129852, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 32.13494262373471, "step_time": 0.6136363220214844} +{"epoch": 0, "iter": 11778, "iter_tflops": 13.419559551771489, "iter_time": 1.469434814453125, "loss": 0.10336355119943619, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 16.497574240292554, "step_time": 1.1952768154144289} +{"epoch": 0, "iter": 11779, "iter_tflops": 48.394006252893234, "iter_time": 0.4074712867736816, "loss": 0.08384383469820023, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 53.04917602904062, "step_time": 0.3717148780822754} +{"epoch": 0, "iter": 11780, "iter_tflops": 50.94243943647603, "iter_time": 0.38708723449707033, "loss": 0.10764535516500473, "lr": 3e-05, "seqlen": 7840.0, "step_tflops": 55.31466067582172, "step_time": 0.35649080657958976} +{"epoch": 0, "iter": 11781, "iter_tflops": 30.18650378730868, "iter_time": 0.6834542236328125, "loss": 0.6841332912445068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.03642612890641, "step_time": 0.6439886093139648} +{"epoch": 0, "iter": 11782, "iter_tflops": 8.014192635853858, "iter_time": 2.5743196411132816, "loss": 0.6964177489280701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.63790360683041, "step_time": 2.1406204452514648} +{"epoch": 0, "iter": 11783, "iter_tflops": 12.260120999937703, "iter_time": 1.6827805786132812, "loss": 0.6210734844207764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.052744450295721, "step_time": 1.4681184577941895} +{"epoch": 0, "iter": 11784, "iter_tflops": 27.11659980682758, "iter_time": 0.7608289260864258, "loss": 0.650478720664978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.0876757390411, "step_time": 0.6856991443634033} +{"epoch": 0, "iter": 11785, "iter_tflops": 11.634145844047266, "iter_time": 1.3376066131591797, "loss": 0.30564263463020325, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 12.455827940488103, "step_time": 1.249367805480957} +{"epoch": 0, "iter": 11786, "iter_tflops": 13.366045867073652, "iter_time": 1.1642867736816407, "loss": 0.23757624626159668, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 16.002466356601524, "step_time": 0.972469497680664} +{"epoch": 0, "iter": 11787, "iter_tflops": 29.32235083334324, "iter_time": 0.5307183761596679, "loss": 0.2774639129638672, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 31.191589583752357, "step_time": 0.49891366958618166} +{"epoch": 0, "iter": 11788, "iter_tflops": 27.553831692499088, "iter_time": 0.5647820816040039, "loss": 0.2086847424507141, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 29.35497360414882, "step_time": 0.5301285781860352} +{"epoch": 0, "iter": 11789, "iter_tflops": 37.19345790835602, "iter_time": 0.5546968383789063, "loss": 0.3156462609767914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.93770450809812, "step_time": 0.5165818557739258} +{"epoch": 0, "iter": 11790, "iter_tflops": 9.454167322178652, "iter_time": 2.18222216796875, "loss": 0.32603612542152405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.840647091091267, "step_time": 1.9031238021850587} +{"epoch": 0, "iter": 11791, "iter_tflops": 10.281232342046728, "iter_time": 2.0066751556396487, "loss": 0.2894918620586395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.685611206650066, "step_time": 1.6263381538391113} +{"epoch": 0, "iter": 11792, "iter_tflops": 22.092182642068153, "iter_time": 0.9338639755249025, "loss": 0.29924777150154114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.49148161732556, "step_time": 0.6766182689666749} +{"epoch": 0, "iter": 11793, "iter_tflops": 14.262259579977163, "iter_time": 1.0538522491455078, "loss": 0.3686731159687042, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 14.852910153883414, "step_time": 1.0119440689086914} +{"epoch": 0, "iter": 11794, "iter_tflops": 11.093456316761351, "iter_time": 1.354881103515625, "loss": 0.2769242823123932, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.24002549745954, "step_time": 1.135217929840088} +{"epoch": 0, "iter": 11795, "iter_tflops": 25.39352596935544, "iter_time": 0.591895523071289, "loss": 0.23658640682697296, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 26.99572644123808, "step_time": 0.5567664337158204} +{"epoch": 0, "iter": 11796, "iter_tflops": 28.679594767454255, "iter_time": 0.5240769424438477, "loss": 0.22422659397125244, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 30.443584870247108, "step_time": 0.49371039581298826} +{"epoch": 0, "iter": 11797, "iter_tflops": 22.309316581748938, "iter_time": 0.9247747879028321, "loss": 0.4120990037918091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.381957473461398, "step_time": 0.8823509979248046} +{"epoch": 0, "iter": 11798, "iter_tflops": 15.839397562321505, "iter_time": 1.302517562866211, "loss": 0.6331496834754944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.705212125689414, "step_time": 0.9964202919006347} +{"epoch": 0, "iter": 11799, "iter_tflops": 37.439927790614476, "iter_time": 0.5510452270507813, "loss": 0.44221246242523193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.219850599562456, "step_time": 0.5005135440826417} +{"epoch": 0, "iter": 11800, "iter_tflops": 37.87265968242243, "iter_time": 0.5447490005493164, "loss": 0.5837268829345703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40836465743609, "step_time": 0.49823492622375487} +{"epoch": 0, "iter": 11801, "iter_tflops": 19.012064602421617, "iter_time": 1.085157974243164, "loss": 0.34610289335250854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.19946479610278, "step_time": 1.021368324279785} +{"epoch": 0, "iter": 11802, "iter_tflops": 16.343699634557193, "iter_time": 1.2623270111083986, "loss": 0.3641081750392914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.00991895676303, "step_time": 0.9373543605804443} +{"epoch": 0, "iter": 11803, "iter_tflops": 45.248210456915366, "iter_time": 0.45595380020141596, "loss": 0.3443586826324463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.71185402147512, "step_time": 0.4235333251953124} +{"epoch": 0, "iter": 11804, "iter_tflops": 48.545023941203574, "iter_time": 0.424988842010498, "loss": 0.2855284512042999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32972554972659, "step_time": 0.394251895904541} +{"epoch": 0, "iter": 11805, "iter_tflops": 32.73211638046058, "iter_time": 0.6303012390136719, "loss": 0.10871611535549164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.94370856984602, "step_time": 0.5904093856811524} +{"epoch": 0, "iter": 11806, "iter_tflops": 10.749147136875866, "iter_time": 1.919323760986328, "loss": 0.10012239217758179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.87534577037213, "step_time": 1.737304656982422} +{"epoch": 0, "iter": 11807, "iter_tflops": 10.581075631882424, "iter_time": 1.9498106079101563, "loss": 0.11313771456480026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.327624600862304, "step_time": 1.6735660095214844} +{"epoch": 0, "iter": 11808, "iter_tflops": 22.556374895460554, "iter_time": 0.9146457977294922, "loss": 0.08367627114057541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.95408777783228, "step_time": 0.6665062675476074} +{"epoch": 0, "iter": 11809, "iter_tflops": 19.723667688920866, "iter_time": 0.8700959472656251, "loss": 0.1487388163805008, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 20.740881752501156, "step_time": 0.8274230346679687} +{"epoch": 0, "iter": 11810, "iter_tflops": 10.960017439316275, "iter_time": 1.5658262786865236, "loss": 0.1929519921541214, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 12.835946725065948, "step_time": 1.3369861755371093} +{"epoch": 0, "iter": 11811, "iter_tflops": 25.602654068253077, "iter_time": 0.6703009490966797, "loss": 0.278189480304718, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 27.455872557398855, "step_time": 0.625056926727295} +{"epoch": 0, "iter": 11812, "iter_tflops": 24.980686823091894, "iter_time": 0.6869900512695313, "loss": 0.207095667719841, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 26.81240481199354, "step_time": 0.6400575942993165} +{"epoch": 0, "iter": 11813, "iter_tflops": 26.063799787650282, "iter_time": 0.7915612335205079, "loss": 0.0017217063577845693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.0995361059782, "step_time": 0.7342147369384766} +{"epoch": 0, "iter": 11814, "iter_tflops": 7.664451503443011, "iter_time": 2.6917899475097657, "loss": 0.025464346632361412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.225256976130828, "step_time": 2.0176601486206054} +{"epoch": 0, "iter": 11815, "iter_tflops": 13.47407344056018, "iter_time": 1.5311697387695313, "loss": 0.019367506727576256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.332609021881883, "step_time": 1.263184190750122} +{"epoch": 0, "iter": 11816, "iter_tflops": 43.72460992363948, "iter_time": 0.47184168243408203, "loss": 0.014049471355974674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.23689082216646, "step_time": 0.427703634262085} +{"epoch": 0, "iter": 11817, "iter_tflops": 12.482565334154009, "iter_time": 1.1975584106445314, "loss": 0.20212028920650482, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.415623628338652, "step_time": 1.1142680740356448} +{"epoch": 0, "iter": 11818, "iter_tflops": 11.551830992998907, "iter_time": 1.2940460357666015, "loss": 0.2251581847667694, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.596663161889015, "step_time": 0.8495133972167968} +{"epoch": 0, "iter": 11819, "iter_tflops": 20.7800714285649, "iter_time": 0.7193719787597656, "loss": 0.1901223361492157, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.35893027912678, "step_time": 0.6685740737915039} +{"epoch": 0, "iter": 11820, "iter_tflops": 23.747990015341422, "iter_time": 0.6294680557250978, "loss": 0.2307879626750946, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.492449043500674, "step_time": 0.5863932914733887} +{"epoch": 0, "iter": 11821, "iter_tflops": 41.42515190927486, "iter_time": 0.49803302001953126, "loss": 0.10562524199485779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.00895974566424, "step_time": 0.4484146919250489} +{"epoch": 0, "iter": 11822, "iter_tflops": 47.06042716877095, "iter_time": 0.4383957977294922, "loss": 0.14000648260116577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.38013427044506, "step_time": 0.40153833389282223} +{"epoch": 0, "iter": 11823, "iter_tflops": 51.57197101916085, "iter_time": 0.40004469680786126, "loss": 0.14313271641731262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.118093567550886, "step_time": 0.3676371059417725} +{"epoch": 0, "iter": 11824, "iter_tflops": 48.934310512534125, "iter_time": 0.42160793304443356, "loss": 0.1006498634815216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.19773026753294, "step_time": 0.387819055557251} +{"epoch": 0, "iter": 11825, "iter_tflops": 40.08178410510788, "iter_time": 0.5147249298095704, "loss": 0.6540502905845642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.43223996159741, "step_time": 0.4750179481506348} +{"epoch": 0, "iter": 11826, "iter_tflops": 34.59138982263085, "iter_time": 0.5964227981567383, "loss": 0.8217873573303223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10784363991971, "step_time": 0.5413870620727539} +{"epoch": 0, "iter": 11827, "iter_tflops": 38.23263459510489, "iter_time": 0.5396199798583985, "loss": 0.7734208703041077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.557446635504824, "step_time": 0.49644757270812984} +{"epoch": 0, "iter": 11828, "iter_tflops": 38.79379618246394, "iter_time": 0.5318142471313476, "loss": 0.8783116936683655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93196597497552, "step_time": 0.49201350402832034} +{"epoch": 0, "iter": 11829, "iter_tflops": 15.635546984632137, "iter_time": 1.3194993133544923, "loss": 0.06449134647846222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.57744961575031, "step_time": 1.2445275955200197} +{"epoch": 0, "iter": 11830, "iter_tflops": 23.197543323258625, "iter_time": 0.8893654479980468, "loss": 0.06299081444740295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.482150169992412, "step_time": 0.7243516864776612} +{"epoch": 0, "iter": 11831, "iter_tflops": 51.686702248023536, "iter_time": 0.39915670013427734, "loss": 0.054720401763916016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.50088978584443, "step_time": 0.36514634704589843} +{"epoch": 0, "iter": 11832, "iter_tflops": 49.009947747960496, "iter_time": 0.4209572639465332, "loss": 0.048239223659038544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.39718309683199, "step_time": 0.3863704471588134} +{"epoch": 0, "iter": 11833, "iter_tflops": 23.732041035956854, "iter_time": 0.8693349838256835, "loss": 0.04719668626785278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.96437028612572, "step_time": 0.8264215469360352} +{"epoch": 0, "iter": 11834, "iter_tflops": 16.108042407994795, "iter_time": 1.2807945861816405, "loss": 0.031218353658914566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.205256822027206, "step_time": 1.0742420005798339} +{"epoch": 0, "iter": 11835, "iter_tflops": 38.550959168441814, "iter_time": 0.5351642074584961, "loss": 0.07187015563249588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46873495216926, "step_time": 0.4857948684692383} +{"epoch": 0, "iter": 11836, "iter_tflops": 42.60396555302217, "iter_time": 0.48425289154052736, "loss": 0.03939256817102432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8845824895805, "step_time": 0.4400400390625} +{"epoch": 0, "iter": 11837, "iter_tflops": 20.525000123997394, "iter_time": 1.0051689834594726, "loss": 0.5807374715805054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.534635880126263, "step_time": 0.958042366027832} +{"epoch": 0, "iter": 11838, "iter_tflops": 21.394306571438126, "iter_time": 0.9643263473510741, "loss": 0.6356847882270813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.094774272861418, "step_time": 0.7906216506958008} +{"epoch": 0, "iter": 11839, "iter_tflops": 43.931307919879615, "iter_time": 0.46962165451049803, "loss": 0.6614255309104919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31979001765793, "step_time": 0.4359929218292236} +{"epoch": 0, "iter": 11840, "iter_tflops": 49.671375601571725, "iter_time": 0.41535176467895507, "loss": 0.649871289730072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.559473258669136, "step_time": 0.3851997089385986} +{"epoch": 0, "iter": 11841, "iter_tflops": 36.75238215343811, "iter_time": 0.5613539123535156, "loss": 0.2909433841705322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35754626272973, "step_time": 0.5241966400146485} +{"epoch": 0, "iter": 11842, "iter_tflops": 16.110577470979816, "iter_time": 1.2805930480957033, "loss": 0.2808927297592163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.452667149717172, "step_time": 1.0605791664123534} +{"epoch": 0, "iter": 11843, "iter_tflops": 35.35521926436768, "iter_time": 0.5835374221801758, "loss": 0.2037281095981598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.883784368875254, "step_time": 0.5305834770202636} +{"epoch": 0, "iter": 11844, "iter_tflops": 39.724799223954534, "iter_time": 0.5193504791259765, "loss": 0.28028610348701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.502109027665604, "step_time": 0.47425501823425287} +{"epoch": 0, "iter": 11845, "iter_tflops": 37.57714519503778, "iter_time": 0.5490330200195312, "loss": 0.06867464631795883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.611693562504726, "step_time": 0.49580038070678717} +{"epoch": 0, "iter": 11846, "iter_tflops": 41.57828150243676, "iter_time": 0.49619880294799806, "loss": 0.05733020603656769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.22290518009096, "step_time": 0.4368874263763428} +{"epoch": 0, "iter": 11847, "iter_tflops": 53.090527959158095, "iter_time": 0.3886021537780762, "loss": 0.05387914180755615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.71998814657251, "step_time": 0.3574341259002686} +{"epoch": 0, "iter": 11848, "iter_tflops": 46.17889370080425, "iter_time": 0.44676456832885747, "loss": 0.09706003218889236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.08792588988246, "step_time": 0.41189754104614257} +{"epoch": 0, "iter": 11849, "iter_tflops": 31.383676735735744, "iter_time": 0.6573829345703125, "loss": 0.4958212673664093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.5275530416744, "step_time": 0.6153474273681641} +{"epoch": 0, "iter": 11850, "iter_tflops": 18.41352476120746, "iter_time": 1.1204315185546874, "loss": 0.578529417514801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.603413740884818, "step_time": 0.8385459728240967} +{"epoch": 0, "iter": 11851, "iter_tflops": 47.60265076566669, "iter_time": 0.4334021987915039, "loss": 0.5010607242584229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.51164374732123, "step_time": 0.40051320457458495} +{"epoch": 0, "iter": 11852, "iter_tflops": 46.91370095408082, "iter_time": 0.4397669143676758, "loss": 0.5051537156105042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38834765469764, "step_time": 0.40944175529479976} +{"epoch": 0, "iter": 11853, "iter_tflops": 43.93520614185929, "iter_time": 0.46957998657226563, "loss": 0.41006940603256226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3017458151741, "step_time": 0.42712935447692874} +{"epoch": 0, "iter": 11854, "iter_tflops": 36.95023710796747, "iter_time": 0.5583480682373048, "loss": 0.43514683842658997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42147365242576, "step_time": 0.49807724571228024} +{"epoch": 0, "iter": 11855, "iter_tflops": 39.00486947451199, "iter_time": 0.5289363555908203, "loss": 0.48070281744003296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79795025139603, "step_time": 0.4820579814910888} +{"epoch": 0, "iter": 11856, "iter_tflops": 37.57746633490187, "iter_time": 0.5490283279418946, "loss": 0.31797075271606445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.978493210675396, "step_time": 0.5034614963531494} +{"epoch": 0, "iter": 11857, "iter_tflops": 30.570210611773792, "iter_time": 0.6748757400512695, "loss": 0.2793784737586975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.962127988547245, "step_time": 0.6074735221862793} +{"epoch": 0, "iter": 11858, "iter_tflops": 41.41899662642764, "iter_time": 0.49810703277587887, "loss": 0.2824901044368744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20310683351918, "step_time": 0.4564087505340576} +{"epoch": 0, "iter": 11859, "iter_tflops": 47.51790976204748, "iter_time": 0.434175106048584, "loss": 0.26763132214546204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.28289829004034, "step_time": 0.4022996788024903} +{"epoch": 0, "iter": 11860, "iter_tflops": 49.27641069386237, "iter_time": 0.41868093109130855, "loss": 0.2024455964565277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23592089601655, "step_time": 0.3875408401489258} +{"epoch": 0, "iter": 11861, "iter_tflops": 32.6022615426072, "iter_time": 0.6328117294311523, "loss": 0.5336379408836365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.672867436046666, "step_time": 0.5950212669372559} +{"epoch": 0, "iter": 11862, "iter_tflops": 16.703457136994306, "iter_time": 1.2351391296386718, "loss": 0.5355997681617737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.800500422443072, "step_time": 1.0973693809509277} +{"epoch": 0, "iter": 11863, "iter_tflops": 45.58993985105382, "iter_time": 0.45253609848022464, "loss": 0.65488600730896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.57667780357562, "step_time": 0.41614513969421385} +{"epoch": 0, "iter": 11864, "iter_tflops": 49.01133790212656, "iter_time": 0.4209453239440918, "loss": 0.5337513089179993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.82517007901447, "step_time": 0.3905542278289795} +{"epoch": 0, "iter": 11865, "iter_tflops": 28.650297054165776, "iter_time": 0.7201005096435547, "loss": 0.838424801826477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.231332574742783, "step_time": 0.6824407577514648} +{"epoch": 0, "iter": 11866, "iter_tflops": 13.929115145760747, "iter_time": 1.4811488952636718, "loss": 0.6199902892112732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.164919335003116, "step_time": 1.2019336128234863} +{"epoch": 0, "iter": 11867, "iter_tflops": 36.024806000506494, "iter_time": 0.5726913146972656, "loss": 0.6949763298034668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.877111801591155, "step_time": 0.5173667945861816} +{"epoch": 0, "iter": 11868, "iter_tflops": 44.62814073459708, "iter_time": 0.46228888702392573, "loss": 0.6566661596298218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90593209885146, "step_time": 0.43065843009948734} +{"epoch": 0, "iter": 11869, "iter_tflops": 31.86189640565755, "iter_time": 0.6475161819458007, "loss": 0.6682006120681763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.10014526025797, "step_time": 0.6050148277282715} +{"epoch": 0, "iter": 11870, "iter_tflops": 14.363796012504242, "iter_time": 1.4363259887695314, "loss": 0.6119099259376526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.13256681989465, "step_time": 1.204203300476074} +{"epoch": 0, "iter": 11871, "iter_tflops": 38.544613007450465, "iter_time": 0.5352523193359375, "loss": 0.6822386980056763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.07462775629288, "step_time": 0.49034524154663084} +{"epoch": 0, "iter": 11872, "iter_tflops": 40.34088597099041, "iter_time": 0.5114189491271972, "loss": 0.8953519463539124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.61321909998832, "step_time": 0.4730467948913574} +{"epoch": 0, "iter": 11873, "iter_tflops": 18.409485026931723, "iter_time": 1.1206773834228516, "loss": 0.5016549825668335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.532576389013126, "step_time": 1.0562402572631835} +{"epoch": 0, "iter": 11874, "iter_tflops": 17.75263613186034, "iter_time": 1.1621425323486327, "loss": 0.6229142546653748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.725335745895233, "step_time": 0.9496328964233398} +{"epoch": 0, "iter": 11875, "iter_tflops": 37.756778163051415, "iter_time": 0.5464209213256837, "loss": 0.6562899947166443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14579519916995, "step_time": 0.5014143829345703} +{"epoch": 0, "iter": 11876, "iter_tflops": 40.78887185832925, "iter_time": 0.5058020133972168, "loss": 0.773245096206665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46387523270333, "step_time": 0.46399674797058105} +{"epoch": 0, "iter": 11877, "iter_tflops": 19.515347337238243, "iter_time": 0.6470655136108399, "loss": 0.003744455287232995, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 21.31805781018515, "step_time": 0.5923479690551757} +{"epoch": 0, "iter": 11878, "iter_tflops": 4.541539179464211, "iter_time": 2.7804908752441406, "loss": 0.003078076057136059, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 5.154033835319336, "step_time": 2.4500631256103516} +{"epoch": 0, "iter": 11879, "iter_tflops": 6.744243603115757, "iter_time": 1.872368347167969, "loss": 0.00044001659261994064, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 8.876810872511179, "step_time": 1.422550106048584} +{"epoch": 0, "iter": 11880, "iter_tflops": 34.21932529684828, "iter_time": 0.3690227127075195, "loss": 0.011700695380568504, "lr": 3e-05, "seqlen": 5072.0, "step_tflops": 37.73527486999353, "step_time": 0.33463936042785647} +{"epoch": 0, "iter": 11881, "iter_tflops": 19.847748070990846, "iter_time": 0.7243711929321289, "loss": 0.2793998122215271, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 21.169024673031505, "step_time": 0.6791591567993164} +{"epoch": 0, "iter": 11882, "iter_tflops": 14.030911112892928, "iter_time": 1.0246759338378908, "loss": 0.3336406648159027, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 15.489431508847009, "step_time": 0.9281900978088379} +{"epoch": 0, "iter": 11883, "iter_tflops": 24.16326218978711, "iter_time": 0.5949998321533203, "loss": 0.3810572922229767, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 25.848748150986207, "step_time": 0.5562024459838868} +{"epoch": 0, "iter": 11884, "iter_tflops": 25.617775665591203, "iter_time": 0.5612172241210938, "loss": 0.31246477365493774, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 27.4123640928247, "step_time": 0.5244763603210449} +{"epoch": 0, "iter": 11885, "iter_tflops": 27.935126547804398, "iter_time": 0.738535888671875, "loss": 0.7239986062049866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.525402836903655, "step_time": 0.6987573928833007} +{"epoch": 0, "iter": 11886, "iter_tflops": 14.905149605361345, "iter_time": 1.3841587677001952, "loss": 0.5826333165168762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.77416971599833, "step_time": 0.993112783432007} +{"epoch": 0, "iter": 11887, "iter_tflops": 35.33247328584838, "iter_time": 0.5839130859375, "loss": 0.6776464581489563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49813574658434, "step_time": 0.5358985080718994} +{"epoch": 0, "iter": 11888, "iter_tflops": 33.63268340529437, "iter_time": 0.6134239501953125, "loss": 0.6890226602554321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.37476578980237, "step_time": 0.5671814804077149} +{"epoch": 0, "iter": 11889, "iter_tflops": 22.25187867833667, "iter_time": 0.9271618728637696, "loss": 0.29373136162757874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.793213216699193, "step_time": 0.8670999298095702} +{"epoch": 0, "iter": 11890, "iter_tflops": 18.76830722927619, "iter_time": 1.0992516937255858, "loss": 0.2706151008605957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.793002454508866, "step_time": 0.9051503219604492} +{"epoch": 0, "iter": 11891, "iter_tflops": 42.80482291124122, "iter_time": 0.48198058319091797, "loss": 0.19624121487140656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.912526332681054, "step_time": 0.43977792549133304} +{"epoch": 0, "iter": 11892, "iter_tflops": 43.03653798629462, "iter_time": 0.4793855285644531, "loss": 0.21456801891326904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.94075365747468, "step_time": 0.43951346969604493} +{"epoch": 0, "iter": 11893, "iter_tflops": 35.66587228449265, "iter_time": 0.5784547576904298, "loss": 0.11937537789344788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.32897490739901, "step_time": 0.5245774536132813} +{"epoch": 0, "iter": 11894, "iter_tflops": 51.64637143339435, "iter_time": 0.39946840286254887, "loss": 0.008816498331725597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.96609031373224, "step_time": 0.3621644630432129} +{"epoch": 0, "iter": 11895, "iter_tflops": 56.05074144491753, "iter_time": 0.3680788688659668, "loss": 0.007526052184402943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.637236903316676, "step_time": 0.33471801376342775} +{"epoch": 0, "iter": 11896, "iter_tflops": 56.50543638707713, "iter_time": 0.36511696624755857, "loss": 0.005772674456238747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.28281433974179, "step_time": 0.3312485752105713} +{"epoch": 0, "iter": 11897, "iter_tflops": 26.034792308094552, "iter_time": 0.7924431762695313, "loss": 0.4368063509464264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.379681321050377, "step_time": 0.7535183944702147} +{"epoch": 0, "iter": 11898, "iter_tflops": 13.902479911692328, "iter_time": 1.4839865722656251, "loss": 0.40881916880607605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.074930071907584, "step_time": 1.1414203777313232} +{"epoch": 0, "iter": 11899, "iter_tflops": 34.822154782085725, "iter_time": 0.592470329284668, "loss": 0.5008307099342346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.07274580033044, "step_time": 0.5418861465454101} +{"epoch": 0, "iter": 11900, "iter_tflops": 35.17065491878261, "iter_time": 0.586599639892578, "loss": 0.39842215180397034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.260106042422635, "step_time": 0.5392325229644775} +{"epoch": 0, "iter": 11901, "iter_tflops": 23.58985532583225, "iter_time": 0.8745748214721679, "loss": 0.5556350350379944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.489491356099272, "step_time": 0.8093960456848144} +{"epoch": 0, "iter": 11902, "iter_tflops": 22.00897951564933, "iter_time": 0.9373943710327148, "loss": 0.8435065746307373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.013485113607874, "step_time": 0.7930922527313231} +{"epoch": 0, "iter": 11903, "iter_tflops": 42.247505525116615, "iter_time": 0.488338737487793, "loss": 0.9579264521598816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27267330634046, "step_time": 0.4557074279785156} +{"epoch": 0, "iter": 11904, "iter_tflops": 41.761198163504574, "iter_time": 0.4940254211425782, "loss": 0.4553642272949219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90385685406805, "step_time": 0.459450366973877} +{"epoch": 0, "iter": 11905, "iter_tflops": 30.78915123847553, "iter_time": 0.6700767211914063, "loss": 0.3971693217754364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.807414333668085, "step_time": 0.6288546028137206} +{"epoch": 0, "iter": 11906, "iter_tflops": 23.13267846454938, "iter_time": 0.8918592605590819, "loss": 0.22268930077552795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.827429601728625, "step_time": 0.7413941497802735} +{"epoch": 0, "iter": 11907, "iter_tflops": 46.07768278165341, "iter_time": 0.4477458992004394, "loss": 0.4134543538093567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.976379988474875, "step_time": 0.4128168849945069} +{"epoch": 0, "iter": 11908, "iter_tflops": 48.03229400606705, "iter_time": 0.42952546691894533, "loss": 0.29603612422943115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.12196470796604, "step_time": 0.39582340431213375} +{"epoch": 0, "iter": 11909, "iter_tflops": 41.03797942367492, "iter_time": 0.5027317085266112, "loss": 0.498686283826828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.90877141286001, "step_time": 0.45940008735656745} +{"epoch": 0, "iter": 11910, "iter_tflops": 45.83681494667084, "iter_time": 0.45009875869750976, "loss": 0.42222052812576294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77528144735807, "step_time": 0.4144847183227539} +{"epoch": 0, "iter": 11911, "iter_tflops": 50.74572154481433, "iter_time": 0.4065582847595215, "loss": 0.5482229590415955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.893744686141524, "step_time": 0.37583687591552734} +{"epoch": 0, "iter": 11912, "iter_tflops": 52.57682972353066, "iter_time": 0.3923989639282226, "loss": 0.4360733926296234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.88180672734391, "step_time": 0.3627010936737061} +{"epoch": 0, "iter": 11913, "iter_tflops": 36.70767855492082, "iter_time": 0.5620375442504882, "loss": 0.32550856471061707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6766959926357, "step_time": 0.5199801292419434} +{"epoch": 0, "iter": 11914, "iter_tflops": 33.870158732647965, "iter_time": 0.6091230239868165, "loss": 0.4212518036365509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.44386003424363, "step_time": 0.4860795764923096} +{"epoch": 0, "iter": 11915, "iter_tflops": 40.24459252555919, "iter_time": 0.5126426239013671, "loss": 0.31326112151145935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14540696871404, "step_time": 0.46734405517578115} +{"epoch": 0, "iter": 11916, "iter_tflops": 42.34774075159564, "iter_time": 0.48718286132812505, "loss": 0.3955862820148468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.982628208818646, "step_time": 0.44867147254943845} +{"epoch": 0, "iter": 11917, "iter_tflops": 18.727813975029857, "iter_time": 1.1016284942626955, "loss": 0.004663373343646526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.02974782650803, "step_time": 1.0300226287841796} +{"epoch": 0, "iter": 11918, "iter_tflops": 43.59257767011774, "iter_time": 0.47327078628540037, "loss": 0.07084046304225922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.3816521878411, "step_time": 0.4354236831665039} +{"epoch": 0, "iter": 11919, "iter_tflops": 43.82168154427157, "iter_time": 0.4707964820861817, "loss": 0.5151894688606262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06445900132269, "step_time": 0.4383582420349121} +{"epoch": 0, "iter": 11920, "iter_tflops": 43.743440503783496, "iter_time": 0.47163856506347657, "loss": 0.7125148177146912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.032367437028455, "step_time": 0.4386573467254639} +{"epoch": 0, "iter": 11921, "iter_tflops": 25.76459881239597, "iter_time": 0.800753532409668, "loss": 0.5471367835998535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.156138811279412, "step_time": 0.7597211685180664} +{"epoch": 0, "iter": 11922, "iter_tflops": 18.245493557125084, "iter_time": 1.1307500915527342, "loss": 0.5062757134437561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.55072051721589, "step_time": 0.8760281238555909} +{"epoch": 0, "iter": 11923, "iter_tflops": 47.991940676667866, "iter_time": 0.42988662719726567, "loss": 0.6205697059631348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.390771116023984, "step_time": 0.3937925148010254} +{"epoch": 0, "iter": 11924, "iter_tflops": 51.659804588991626, "iter_time": 0.39936452865600586, "loss": 0.5657389760017395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.861321181983364, "step_time": 0.36932698822021487} +{"epoch": 0, "iter": 11925, "iter_tflops": 28.6113530631274, "iter_time": 0.7210806655883789, "loss": 0.0807836577296257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.257264171184076, "step_time": 0.6818558807373046} +{"epoch": 0, "iter": 11926, "iter_tflops": 15.97723746284444, "iter_time": 1.2912803955078125, "loss": 0.12688298523426056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.78611321423905, "step_time": 0.9925421504974364} +{"epoch": 0, "iter": 11927, "iter_tflops": 40.238602825107876, "iter_time": 0.5127189331054687, "loss": 0.11803470551967621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.177241361571575, "step_time": 0.4670072841644287} +{"epoch": 0, "iter": 11928, "iter_tflops": 44.7455742631941, "iter_time": 0.46107562255859375, "loss": 0.08868185430765152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8344910223011, "step_time": 0.4224697151184082} +{"epoch": 0, "iter": 11929, "iter_tflops": 27.082226474663837, "iter_time": 0.7617945861816405, "loss": 0.14236478507518768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.468298464695255, "step_time": 0.7001114616394043} +{"epoch": 0, "iter": 11930, "iter_tflops": 47.74732567872898, "iter_time": 0.43208898544311525, "loss": 0.11669187992811203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.08248801398517, "step_time": 0.3961234245300293} +{"epoch": 0, "iter": 11931, "iter_tflops": 47.48854871037031, "iter_time": 0.43444354629516596, "loss": 0.11886090040206909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52438570110329, "step_time": 0.4004141578674316} +{"epoch": 0, "iter": 11932, "iter_tflops": 49.74920190123146, "iter_time": 0.4147019996643066, "loss": 0.0706961452960968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.109119905863565, "step_time": 0.3812868061065674} +{"epoch": 0, "iter": 11933, "iter_tflops": 29.65166002666646, "iter_time": 0.6957820739746093, "loss": 0.2902073860168457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.612419879292595, "step_time": 0.6526262016296387} +{"epoch": 0, "iter": 11934, "iter_tflops": 13.086254672576615, "iter_time": 1.5765468444824218, "loss": 0.3004976511001587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.32675615592506, "step_time": 1.34608349609375} +{"epoch": 0, "iter": 11935, "iter_tflops": 38.99862384415627, "iter_time": 0.5290210647583008, "loss": 0.2544676661491394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87908310747736, "step_time": 0.4811458644866944} +{"epoch": 0, "iter": 11936, "iter_tflops": 39.22687849835781, "iter_time": 0.5259427795410156, "loss": 0.2693158984184265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.923792363269655, "step_time": 0.48064470481872557} +{"epoch": 0, "iter": 11937, "iter_tflops": 21.341453978976045, "iter_time": 0.9667145233154297, "loss": 0.7659156918525696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.51827669415519, "step_time": 0.9161932678222656} +{"epoch": 0, "iter": 11938, "iter_tflops": 9.080464231375332, "iter_time": 2.272030700683594, "loss": 0.7478384971618652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.789116293167819, "step_time": 1.9122134704589844} +{"epoch": 0, "iter": 11939, "iter_tflops": 14.135148408631961, "iter_time": 1.4595597381591796, "loss": 0.6134588718414307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.743966674734725, "step_time": 1.2321508941650392} +{"epoch": 0, "iter": 11940, "iter_tflops": 26.03695689242194, "iter_time": 0.7923772964477539, "loss": 0.7893774509429932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.81201438681253, "step_time": 0.5760941925048828} +{"epoch": 0, "iter": 11941, "iter_tflops": 21.07126355287269, "iter_time": 0.7579694061279297, "loss": 0.2325802594423294, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 22.30023691032723, "step_time": 0.7161974639892578} +{"epoch": 0, "iter": 11942, "iter_tflops": 16.978002809618566, "iter_time": 0.9407097702026368, "loss": 0.11815556138753891, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 19.010340147412396, "step_time": 0.8401413650512695} +{"epoch": 0, "iter": 11943, "iter_tflops": 22.634657467246125, "iter_time": 0.705615852355957, "loss": 0.24420799314975739, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.58174271262017, "step_time": 0.6497250137329101} +{"epoch": 0, "iter": 11944, "iter_tflops": 23.8391605811755, "iter_time": 0.6699637374877929, "loss": 0.2265225648880005, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.53878939169014, "step_time": 0.6253770637512206} +{"epoch": 0, "iter": 11945, "iter_tflops": 29.75681436520105, "iter_time": 0.6933233261108398, "loss": 0.008924758993089199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.354305185366854, "step_time": 0.6376614608764648} +{"epoch": 0, "iter": 11946, "iter_tflops": 9.323552032047512, "iter_time": 2.212793304443359, "loss": 0.0015569351380690932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.124472783057106, "step_time": 2.037744972229004} +{"epoch": 0, "iter": 11947, "iter_tflops": 12.791284003432384, "iter_time": 1.6129024658203124, "loss": 0.013219049200415611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.923548131036942, "step_time": 1.382452304840088} +{"epoch": 0, "iter": 11948, "iter_tflops": 42.20122381457271, "iter_time": 0.4888742942810058, "loss": 0.005448354873806238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1341093146162, "step_time": 0.4377104778289795} +{"epoch": 0, "iter": 11949, "iter_tflops": 13.11236800496774, "iter_time": 1.080908248901367, "loss": 0.28809061646461487, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 14.055541818063782, "step_time": 1.0083756942749025} +{"epoch": 0, "iter": 11950, "iter_tflops": 16.473573575905263, "iter_time": 0.8603638229370117, "loss": 0.20224373042583466, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 18.18980179236871, "step_time": 0.7791875305175782} +{"epoch": 0, "iter": 11951, "iter_tflops": 25.468824464114917, "iter_time": 0.5564947357177734, "loss": 0.10989449173212051, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.137715859938186, "step_time": 0.5222719116210938} +{"epoch": 0, "iter": 11952, "iter_tflops": 26.72738401945918, "iter_time": 0.5302900848388672, "loss": 0.4171254634857178, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 28.353920051841225, "step_time": 0.49986974334716794} +{"epoch": 0, "iter": 11953, "iter_tflops": 41.93597462044793, "iter_time": 0.49196647262573245, "loss": 0.36944580078125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80574067515268, "step_time": 0.4504041023254394} +{"epoch": 0, "iter": 11954, "iter_tflops": 44.850723253617495, "iter_time": 0.4599946670532226, "loss": 0.348503440618515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82020711280627, "step_time": 0.414110954284668} +{"epoch": 0, "iter": 11955, "iter_tflops": 48.98754916207802, "iter_time": 0.4211497383117676, "loss": 0.3953128755092621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.924508359247135, "step_time": 0.3898211650848388} +{"epoch": 0, "iter": 11956, "iter_tflops": 50.964918848342506, "iter_time": 0.40480970001220706, "loss": 0.3750686049461365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.13193871770994, "step_time": 0.3742130966186523} +{"epoch": 0, "iter": 11957, "iter_tflops": 36.69396378007547, "iter_time": 0.5622476119995117, "loss": 0.07958667725324631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44021443076946, "step_time": 0.5230979042053222} +{"epoch": 0, "iter": 11958, "iter_tflops": 29.839997354300923, "iter_time": 0.6913905944824218, "loss": 0.06815186887979507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.579985274992424, "step_time": 0.6143866157531739} +{"epoch": 0, "iter": 11959, "iter_tflops": 48.61913206208708, "iter_time": 0.4243410491943359, "loss": 0.0694216638803482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28702800547537, "step_time": 0.3871691532135009} +{"epoch": 0, "iter": 11960, "iter_tflops": 54.6775316630165, "iter_time": 0.37732305908203123, "loss": 0.08691984415054321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.54493665897235, "step_time": 0.3464793930053711} +{"epoch": 0, "iter": 11961, "iter_tflops": 38.987844530698325, "iter_time": 0.5291673278808593, "loss": 0.3065342605113983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01490948550395, "step_time": 0.49104219818115236} +{"epoch": 0, "iter": 11962, "iter_tflops": 15.75365709277794, "iter_time": 1.3096066131591797, "loss": 0.3728581368923187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.480976957020236, "step_time": 1.1163421478271485} +{"epoch": 0, "iter": 11963, "iter_tflops": 46.53440374585467, "iter_time": 0.44335140991210936, "loss": 0.35389670729637146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52718375915425, "step_time": 0.4083167114257813} +{"epoch": 0, "iter": 11964, "iter_tflops": 49.192371306925864, "iter_time": 0.4193961982727051, "loss": 0.3398899734020233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.12645581999191, "step_time": 0.38833935356140137} +{"epoch": 0, "iter": 11965, "iter_tflops": 41.41761810904803, "iter_time": 0.49812361145019524, "loss": 0.5906426310539246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99537807047305, "step_time": 0.45851583862304696} +{"epoch": 0, "iter": 11966, "iter_tflops": 45.41860896560268, "iter_time": 0.45424318313598633, "loss": 0.42547234892845154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59612482147294, "step_time": 0.41598196601867676} +{"epoch": 0, "iter": 11967, "iter_tflops": 43.96406942536279, "iter_time": 0.4692716979980468, "loss": 0.6800200343132019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.22819140935285, "step_time": 0.4368385257720947} +{"epoch": 0, "iter": 11968, "iter_tflops": 51.51306929433933, "iter_time": 0.4005021209716797, "loss": 0.6482399702072144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.69498611237416, "step_time": 0.3704299964904785} +{"epoch": 0, "iter": 11969, "iter_tflops": 1.7555364022048656, "iter_time": 0.9151270904541016, "loss": 0.015116692520678043, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.8452136090511935, "step_time": 0.8706520004272461} +{"epoch": 0, "iter": 11970, "iter_tflops": 0.9854404385235765, "iter_time": 1.6302750091552731, "loss": 0.526772141456604, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.1689089562010944, "step_time": 1.3743918304443359} +{"epoch": 0, "iter": 11971, "iter_tflops": 3.4656582277805517, "iter_time": 0.4635595359802246, "loss": 0.9946395754814148, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.7586642831380805, "step_time": 0.4274228286743164} +{"epoch": 0, "iter": 11972, "iter_tflops": 3.64200853774062, "iter_time": 0.4411134414672852, "loss": 0.6665236353874207, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.945218972213455, "step_time": 0.40721159744262697} +{"epoch": 0, "iter": 11973, "iter_tflops": 21.949806999534463, "iter_time": 0.7593935852050782, "loss": 0.01956416480243206, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 23.088449574656313, "step_time": 0.7219429168701172} +{"epoch": 0, "iter": 11974, "iter_tflops": 11.12418063073907, "iter_time": 1.4984063262939453, "loss": 0.06006668135523796, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 15.229190075264615, "step_time": 1.0945127449035645} +{"epoch": 0, "iter": 11975, "iter_tflops": 34.89553795423162, "iter_time": 0.4776697425842285, "loss": 0.037562742829322815, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 38.57419805744039, "step_time": 0.43211637496948246} +{"epoch": 0, "iter": 11976, "iter_tflops": 36.80851070213862, "iter_time": 0.45284479904174807, "loss": 0.02806132100522518, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 40.586666257968375, "step_time": 0.4106901149749756} +{"epoch": 0, "iter": 11977, "iter_tflops": 23.707715059680634, "iter_time": 0.8702269897460937, "loss": 0.06958400458097458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.310170275986486, "step_time": 0.8151305694580079} +{"epoch": 0, "iter": 11978, "iter_tflops": 43.736914486792394, "iter_time": 0.47170893859863283, "loss": 0.08381857722997665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.834791660845426, "step_time": 0.43129891014099125} +{"epoch": 0, "iter": 11979, "iter_tflops": 47.67401211609745, "iter_time": 0.43275345611572263, "loss": 0.06362628191709518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54794456577597, "step_time": 0.40023115730285647} +{"epoch": 0, "iter": 11980, "iter_tflops": 48.94344716671438, "iter_time": 0.4215292282104492, "loss": 0.060322027653455734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.521441356722875, "step_time": 0.3854734287261963} +{"epoch": 0, "iter": 11981, "iter_tflops": 24.005230991796527, "iter_time": 0.8594415740966798, "loss": 0.7048768401145935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.149779479060474, "step_time": 0.8203290023803711} +{"epoch": 0, "iter": 11982, "iter_tflops": 14.572323568086995, "iter_time": 1.4157723999023437, "loss": 0.8016576766967773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.138390757093326, "step_time": 1.203794090270996} +{"epoch": 0, "iter": 11983, "iter_tflops": 37.424035687171134, "iter_time": 0.5512792282104492, "loss": 0.9544728398323059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03695527547886, "step_time": 0.5027442550659179} +{"epoch": 0, "iter": 11984, "iter_tflops": 40.11209558153808, "iter_time": 0.5143359680175782, "loss": 0.8861276507377625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53499793000086, "step_time": 0.47389673805236815} +{"epoch": 0, "iter": 11985, "iter_tflops": 19.859458574517927, "iter_time": 1.0388547821044922, "loss": 0.2514887750148773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.175810175339716, "step_time": 0.9742764663696288} +{"epoch": 0, "iter": 11986, "iter_tflops": 20.05561419233239, "iter_time": 1.0286941757202148, "loss": 0.23269124329090118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.05739518529136, "step_time": 0.823353479385376} +{"epoch": 0, "iter": 11987, "iter_tflops": 47.07622877717794, "iter_time": 0.4382486457824707, "loss": 0.26619815826416016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.924905352353825, "step_time": 0.4051277732849121} +{"epoch": 0, "iter": 11988, "iter_tflops": 44.35399719133962, "iter_time": 0.4651462059020996, "loss": 0.25956323742866516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.81594016708941, "step_time": 0.4314689502716064} +{"epoch": 0, "iter": 11989, "iter_tflops": 36.38745520743167, "iter_time": 0.5453359375000001, "loss": 0.22355832159519196, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 38.95261007868875, "step_time": 0.5094238090515137} +{"epoch": 0, "iter": 11990, "iter_tflops": 12.9400145933285, "iter_time": 1.5334903106689453, "loss": 0.3237372934818268, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 15.43390849644721, "step_time": 1.2857007026672365} +{"epoch": 0, "iter": 11991, "iter_tflops": 46.10070396611485, "iter_time": 0.430435661315918, "loss": 0.2131110578775406, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 50.3223366061857, "step_time": 0.39432562828063966} +{"epoch": 0, "iter": 11992, "iter_tflops": 51.22348732312122, "iter_time": 0.3873884429931641, "loss": 0.36506906151771545, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 55.48040576792776, "step_time": 0.3576647777557373} +{"epoch": 0, "iter": 11993, "iter_tflops": 38.05375324013612, "iter_time": 0.5421566009521486, "loss": 0.05859839543700218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.07054089441659, "step_time": 0.5023331336975098} +{"epoch": 0, "iter": 11994, "iter_tflops": 8.861708183566787, "iter_time": 2.328117004394531, "loss": 0.09285049140453339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.818089649396192, "step_time": 1.7457215270996094} +{"epoch": 0, "iter": 11995, "iter_tflops": 11.531533069406908, "iter_time": 1.7891024017333985, "loss": 0.09320898354053497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.265507961614736, "step_time": 1.5552433853149414} +{"epoch": 0, "iter": 11996, "iter_tflops": 21.652703416084393, "iter_time": 0.9528183670043946, "loss": 0.13186588883399963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.413391451805037, "step_time": 0.7810846080780028} +{"epoch": 0, "iter": 11997, "iter_tflops": 19.53580584316559, "iter_time": 0.7213294296264648, "loss": 0.2370980978012085, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 21.285794993828567, "step_time": 0.6620260925292969} +{"epoch": 0, "iter": 11998, "iter_tflops": 21.065513439164878, "iter_time": 0.6689488830566406, "loss": 0.20911407470703125, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 22.717221933604023, "step_time": 0.6203113975524902} +{"epoch": 0, "iter": 11999, "iter_tflops": 21.291163847102684, "iter_time": 0.6618591537475585, "loss": 0.310294508934021, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 22.867085936789483, "step_time": 0.6162460632324218} +{"epoch": 0, "iter": 12000, "iter_tflops": 2.845287204686921, "iter_time": 4.9526640625, "loss": 0.16633076965808868, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 2.8697467590238426, "step_time": 4.910451293945313} +{"epoch": 0, "iter": 12001, "iter_tflops": 16.738701410137335, "iter_time": 1.232538475036621, "loss": 0.0730404332280159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.116706474350455, "step_time": 0.8924754714965819} +{"epoch": 0, "iter": 12002, "iter_tflops": 15.376135103635187, "iter_time": 1.3417606811523435, "loss": 0.08465618640184402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.732617890605372, "step_time": 1.1634544677734375} +{"epoch": 0, "iter": 12003, "iter_tflops": 27.62981543315614, "iter_time": 0.7466967544555664, "loss": 0.10570275783538818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.16488126322127, "step_time": 0.6220765075683594} +{"epoch": 0, "iter": 12004, "iter_tflops": 28.36586933734951, "iter_time": 0.7273210372924804, "loss": 0.06225547194480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.58517576431748, "step_time": 0.6142916641235352} +{"epoch": 0, "iter": 12005, "iter_tflops": 9.318663605349181, "iter_time": 2.2139541015624995, "loss": 0.14050516486167908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.563639390672616, "step_time": 2.1572429351806637} +{"epoch": 0, "iter": 12006, "iter_tflops": 7.754550577446103, "iter_time": 2.660514404296875, "loss": 0.14414577186107635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.028761075270394, "step_time": 1.715147003173828} +{"epoch": 0, "iter": 12007, "iter_tflops": 19.988092767112143, "iter_time": 1.0321691894531249, "loss": 0.18202140927314758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.195651590034185, "step_time": 0.8894379806518555} +{"epoch": 0, "iter": 12008, "iter_tflops": 20.984329781498477, "iter_time": 0.9831666641235352, "loss": 0.22099988162517548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.889668160192127, "step_time": 0.8635989990234376} +{"epoch": 0, "iter": 12009, "iter_tflops": 3.937445244540981, "iter_time": 3.6721064453125, "loss": 0.1990799903869629, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 4.471680554494991, "step_time": 3.2333969039916997} +{"epoch": 0, "iter": 12010, "iter_tflops": 14.687905278326195, "iter_time": 0.9843961944580077, "loss": 0.315674364566803, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 16.623984275776916, "step_time": 0.8697504653930663} +{"epoch": 0, "iter": 12011, "iter_tflops": 18.375277383030202, "iter_time": 0.7868571319580078, "loss": 0.38733479380607605, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 21.590533456714912, "step_time": 0.6696785926818848} +{"epoch": 0, "iter": 12012, "iter_tflops": 22.598089762390583, "iter_time": 0.6398203659057617, "loss": 0.2534216046333313, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 24.263223501241875, "step_time": 0.595910846710205} +{"epoch": 0, "iter": 12013, "iter_tflops": 18.937799482333663, "iter_time": 1.0894134521484375, "loss": 0.7651491761207581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.602191558811064, "step_time": 0.9550463180541992} +{"epoch": 0, "iter": 12014, "iter_tflops": 25.846518810713132, "iter_time": 0.7982155609130859, "loss": 1.0086698532104492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.97633348501363, "step_time": 0.7119980697631837} +{"epoch": 0, "iter": 12015, "iter_tflops": 32.85073154385096, "iter_time": 0.628025390625, "loss": 0.6497148871421814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.520284366456394, "step_time": 0.53559037399292} +{"epoch": 0, "iter": 12016, "iter_tflops": 36.74021776841513, "iter_time": 0.5615397720336914, "loss": 0.5887828469276428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.18095496801974, "step_time": 0.5265592308044433} +{"epoch": 0, "iter": 12017, "iter_tflops": 24.132423692172974, "iter_time": 0.8549117889404297, "loss": 0.08723901212215424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.50988054653783, "step_time": 0.808749122619629} +{"epoch": 0, "iter": 12018, "iter_tflops": 29.235684359170435, "iter_time": 0.7056819076538087, "loss": 0.054723214358091354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.995961155152706, "step_time": 0.5895278434753418} +{"epoch": 0, "iter": 12019, "iter_tflops": 38.26726007668393, "iter_time": 0.5391317138671875, "loss": 0.046824704855680466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.042656778136156, "step_time": 0.5026744155883789} +{"epoch": 0, "iter": 12020, "iter_tflops": 48.4052244817467, "iter_time": 0.4262162551879883, "loss": 0.050586555153131485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.96905809258148, "step_time": 0.3894933052062988} +{"epoch": 0, "iter": 12021, "iter_tflops": 27.099305153676664, "iter_time": 0.7613144836425783, "loss": 0.5633997917175293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.582974068282443, "step_time": 0.7217966003417969} +{"epoch": 0, "iter": 12022, "iter_tflops": 19.53551242674357, "iter_time": 1.0560815124511718, "loss": 0.5348365902900696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.283355934243396, "step_time": 0.9258521728515625} +{"epoch": 0, "iter": 12023, "iter_tflops": 23.8381943564112, "iter_time": 0.8654637680053711, "loss": 0.5041201710700989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.946019190843042, "step_time": 0.765645320892334} +{"epoch": 0, "iter": 12024, "iter_tflops": 30.217890933779007, "iter_time": 0.6827443237304687, "loss": 0.7248415350914001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.10588598546865, "step_time": 0.5876818923950194} +{"epoch": 0, "iter": 12025, "iter_tflops": 6.104190173556492, "iter_time": 3.3798248291015627, "loss": 0.06190494820475578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 6.1809908832479605, "step_time": 3.3378294677734375} +{"epoch": 0, "iter": 12026, "iter_tflops": 14.774434497512644, "iter_time": 1.3964049530029299, "loss": 0.041925352066755295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.109428966484533, "step_time": 0.790177890777588} +{"epoch": 0, "iter": 12027, "iter_tflops": 25.85947181610737, "iter_time": 0.7978157348632812, "loss": 0.05685698986053467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.622268250675074, "step_time": 0.720805679321289} +{"epoch": 0, "iter": 12028, "iter_tflops": 37.86770515220524, "iter_time": 0.5448202743530274, "loss": 0.0626462921500206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72308935610972, "step_time": 0.4512182750701904} +{"epoch": 0, "iter": 12029, "iter_tflops": 7.66479994344507, "iter_time": 2.0036212463378904, "loss": 0.2696691155433655, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 7.832966203022962, "step_time": 1.9606054229736327} +{"epoch": 0, "iter": 12030, "iter_tflops": 19.84222275232157, "iter_time": 0.7739735717773438, "loss": 0.2717965543270111, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.092093289860877, "step_time": 0.6650482406616212} +{"epoch": 0, "iter": 12031, "iter_tflops": 18.814478027093138, "iter_time": 0.8162520370483398, "loss": 0.21498234570026398, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 21.190569324964493, "step_time": 0.7247259750366211} +{"epoch": 0, "iter": 12032, "iter_tflops": 21.70941388067975, "iter_time": 0.7074053726196289, "loss": 0.24129924178123474, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 23.78409031679256, "step_time": 0.6456986923217773} +{"epoch": 0, "iter": 12033, "iter_tflops": 8.59013313302893, "iter_time": 2.4017198791503906, "loss": 0.5584021210670471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.051253627356838, "step_time": 2.279363098144531} +{"epoch": 0, "iter": 12034, "iter_tflops": 25.036553292593567, "iter_time": 0.8240388870239258, "loss": 0.44579648971557617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.768194054950445, "step_time": 0.7171494140625} +{"epoch": 0, "iter": 12035, "iter_tflops": 35.42948435793711, "iter_time": 0.5823142471313477, "loss": 0.6016547083854675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38945892430839, "step_time": 0.4754863052368164} +{"epoch": 0, "iter": 12036, "iter_tflops": 36.86461971920563, "iter_time": 0.5596448211669922, "loss": 0.6416558027267456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.43421016456277, "step_time": 0.5231775512695312} +{"epoch": 0, "iter": 12037, "iter_tflops": 9.985363912814677, "iter_time": 2.066133361816406, "loss": 0.7106841802597046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.2142680696611, "step_time": 2.019830825805664} +{"epoch": 0, "iter": 12038, "iter_tflops": 21.47555714016238, "iter_time": 0.9606779174804688, "loss": 0.6916837692260742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.75556357852233, "step_time": 0.8333921966552735} +{"epoch": 0, "iter": 12039, "iter_tflops": 26.147799912740666, "iter_time": 0.7890183334350586, "loss": 0.7297183871269226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.137998786154053, "step_time": 0.6845541954040528} +{"epoch": 0, "iter": 12040, "iter_tflops": 46.855708396014606, "iter_time": 0.440311206817627, "loss": 0.8621347546577454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.7921936446484, "step_time": 0.40618630599975586} +{"epoch": 0, "iter": 12041, "iter_tflops": 11.433670045987189, "iter_time": 1.8044156799316409, "loss": 0.22039073705673218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.721297023771474, "step_time": 1.760137420654297} +{"epoch": 0, "iter": 12042, "iter_tflops": 26.009085242127007, "iter_time": 0.793226417541504, "loss": 0.16997654736042023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.254614884504363, "step_time": 0.6819155883789063} +{"epoch": 0, "iter": 12043, "iter_tflops": 25.636147157198657, "iter_time": 0.8047657623291016, "loss": 0.2690674662590027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.68972524486019, "step_time": 0.7191108779907226} +{"epoch": 0, "iter": 12044, "iter_tflops": 25.679419022060475, "iter_time": 0.80340966796875, "loss": 0.1733466237783432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.006148524954106, "step_time": 0.7112662162780762} +{"epoch": 0, "iter": 12045, "iter_tflops": 8.794223514272218, "iter_time": 2.345982391357422, "loss": 0.799611508846283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.238068114379194, "step_time": 2.233269256591797} +{"epoch": 0, "iter": 12046, "iter_tflops": 19.898332028672897, "iter_time": 1.0368252716064454, "loss": 0.6912444233894348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.96917413261818, "step_time": 0.898207893371582} +{"epoch": 0, "iter": 12047, "iter_tflops": 21.327157069494664, "iter_time": 0.9673625717163086, "loss": 0.6665728092193604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.048055250855402, "step_time": 0.8236604919433594} +{"epoch": 0, "iter": 12048, "iter_tflops": 26.50839906744529, "iter_time": 0.7782851562499999, "loss": 0.7067912220954895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.58192205285657, "step_time": 0.6746172943115234} +{"epoch": 0, "iter": 12049, "iter_tflops": 20.578684476458236, "iter_time": 0.9763477554321289, "loss": 0.3244986832141876, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 22.180595093861335, "step_time": 0.905834686279297} +{"epoch": 0, "iter": 12050, "iter_tflops": 23.603877345838228, "iter_time": 0.8512140655517578, "loss": 0.24421142041683197, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 32.35024065994897, "step_time": 0.6210758247375489} +{"epoch": 0, "iter": 12051, "iter_tflops": 37.22643768543047, "iter_time": 0.539722671508789, "loss": 0.24443382024765015, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 40.800377514268206, "step_time": 0.4924452571868896} +{"epoch": 0, "iter": 12052, "iter_tflops": 39.927839560890675, "iter_time": 0.5032066001892089, "loss": 0.3343135118484497, "lr": 3e-05, "seqlen": 7984.0, "step_tflops": 43.64267032787822, "step_time": 0.46037403869628907} +{"epoch": 0, "iter": 12053, "iter_tflops": 20.260292533526066, "iter_time": 1.018301856994629, "loss": 0.018435949459671974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.562335806141956, "step_time": 0.9568116226196288} +{"epoch": 0, "iter": 12054, "iter_tflops": 29.176521987859715, "iter_time": 0.7071128463745117, "loss": 0.030778387561440468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.091904651258716, "step_time": 0.5716266212463379} +{"epoch": 0, "iter": 12055, "iter_tflops": 53.53788611808829, "iter_time": 0.3853550262451172, "loss": 0.03894482180476189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.58950022636765, "step_time": 0.35212953567504884} +{"epoch": 0, "iter": 12056, "iter_tflops": 57.28103717503528, "iter_time": 0.3601731834411621, "loss": 0.04922080039978027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.53492344934664, "step_time": 0.3299131488800049} +{"epoch": 0, "iter": 12057, "iter_tflops": 25.28190416242681, "iter_time": 0.8160419158935547, "loss": 0.5202880501747131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.68529944434969, "step_time": 0.7731258010864259} +{"epoch": 0, "iter": 12058, "iter_tflops": 10.277760662718533, "iter_time": 2.007352981567383, "loss": 0.4968855679035187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.596082233269449, "step_time": 1.6378976516723633} +{"epoch": 0, "iter": 12059, "iter_tflops": 14.656185568254909, "iter_time": 1.4076714172363283, "loss": 0.8386789560317993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.64902808843011, "step_time": 1.1062825050354004} +{"epoch": 0, "iter": 12060, "iter_tflops": 20.079620889908615, "iter_time": 1.0274642944335939, "loss": 0.6749294996261597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.08839006919699, "step_time": 0.8564745693206788} +{"epoch": 0, "iter": 12061, "iter_tflops": 22.42127591517141, "iter_time": 0.679474266052246, "loss": 0.23851774632930756, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.80804481062005, "step_time": 0.6398963088989258} +{"epoch": 0, "iter": 12062, "iter_tflops": 10.13290255805828, "iter_time": 1.5034862823486328, "loss": 0.16726654767990112, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 12.393727729300172, "step_time": 1.2292250022888183} +{"epoch": 0, "iter": 12063, "iter_tflops": 21.19785343005662, "iter_time": 0.7186897506713867, "loss": 0.25660765171051025, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 22.877437265174414, "step_time": 0.6659259872436524} +{"epoch": 0, "iter": 12064, "iter_tflops": 21.8161094855965, "iter_time": 0.698322494506836, "loss": 0.15561218559741974, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 23.39455572899692, "step_time": 0.6512062110900879} +{"epoch": 0, "iter": 12065, "iter_tflops": 1.4592912244737437, "iter_time": 1.0739875946044921, "loss": 0.07164373993873596, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.5557717689467065, "step_time": 1.0073846969604492} +{"epoch": 0, "iter": 12066, "iter_tflops": 1.522326601649105, "iter_time": 1.0295167083740235, "loss": 0.04026687517762184, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.8169645272242505, "step_time": 0.8625708694458007} +{"epoch": 0, "iter": 12067, "iter_tflops": 3.41877791782286, "iter_time": 0.45842716598510735, "loss": 0.18059085309505463, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.70863386391898, "step_time": 0.422597843170166} +{"epoch": 0, "iter": 12068, "iter_tflops": 3.496549999257606, "iter_time": 0.4482305908203125, "loss": 0.026947418227791786, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.791111486227487, "step_time": 0.41340400505065916} +{"epoch": 0, "iter": 12069, "iter_tflops": 24.588502403222257, "iter_time": 0.8390544967651368, "loss": 0.15478944778442383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.855831473181365, "step_time": 0.7979280624389647} +{"epoch": 0, "iter": 12070, "iter_tflops": 39.620660425073325, "iter_time": 0.5207155380249022, "loss": 0.09244056791067123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5658405904887, "step_time": 0.46293513679504394} +{"epoch": 0, "iter": 12071, "iter_tflops": 38.83883312071511, "iter_time": 0.5311975631713867, "loss": 0.1378050446510315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.210113086363535, "step_time": 0.48877133941650397} +{"epoch": 0, "iter": 12072, "iter_tflops": 41.52333015564286, "iter_time": 0.49685546493530275, "loss": 0.08489067107439041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63660971297891, "step_time": 0.45207331657409666} +{"epoch": 0, "iter": 12073, "iter_tflops": 21.13372934994037, "iter_time": 0.9762164154052735, "loss": 0.36667755246162415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.480020109804926, "step_time": 0.9177524490356446} +{"epoch": 0, "iter": 12074, "iter_tflops": 8.314576378442338, "iter_time": 2.481316253662109, "loss": 0.3437391519546509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.601214137776196, "step_time": 2.1488004760742188} +{"epoch": 0, "iter": 12075, "iter_tflops": 14.379964544627848, "iter_time": 1.4347110137939454, "loss": 0.32789346575737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.978417206113722, "step_time": 1.2151364440917969} +{"epoch": 0, "iter": 12076, "iter_tflops": 32.434709379827716, "iter_time": 0.6360807266235352, "loss": 0.3545039892196655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.81492426365762, "step_time": 0.5925933761596679} +{"epoch": 0, "iter": 12077, "iter_tflops": 12.22796119147196, "iter_time": 1.2425454406738283, "loss": 0.2425978034734726, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 13.215726118770652, "step_time": 1.149675567626953} +{"epoch": 0, "iter": 12078, "iter_tflops": 10.747955307916916, "iter_time": 1.4136453857421873, "loss": 0.20671220123767853, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 12.496144435229747, "step_time": 1.2158788261413573} +{"epoch": 0, "iter": 12079, "iter_tflops": 22.24454522278891, "iter_time": 0.68303475189209, "loss": 0.2473476678133011, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 23.888604441310957, "step_time": 0.6360270004272461} +{"epoch": 0, "iter": 12080, "iter_tflops": 23.382593275995266, "iter_time": 0.6497909469604493, "loss": 0.190857395529747, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.143249573920624, "step_time": 0.6042893295288086} +{"epoch": 0, "iter": 12081, "iter_tflops": 32.055679540841616, "iter_time": 0.6436018142700194, "loss": 0.5797116160392761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.383060286400955, "step_time": 0.5830782680511475} +{"epoch": 0, "iter": 12082, "iter_tflops": 36.76007512320082, "iter_time": 0.5612364349365234, "loss": 0.40629932284355164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.03039257339452, "step_time": 0.5153857402801514} +{"epoch": 0, "iter": 12083, "iter_tflops": 38.86409794789909, "iter_time": 0.5308522415161132, "loss": 0.46134987473487854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.401676931214084, "step_time": 0.4865631504058837} +{"epoch": 0, "iter": 12084, "iter_tflops": 41.68884919885364, "iter_time": 0.4948827781677246, "loss": 0.47609347105026245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.47945620293322, "step_time": 0.4536354484558106} +{"epoch": 0, "iter": 12085, "iter_tflops": 17.053903927687184, "iter_time": 1.2097578125000001, "loss": 0.2634965479373932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.872790847542483, "step_time": 1.1543297119140625} +{"epoch": 0, "iter": 12086, "iter_tflops": 24.60963047160356, "iter_time": 0.8383341445922853, "loss": 0.19755230844020844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.496038033976863, "step_time": 0.7503296833038331} +{"epoch": 0, "iter": 12087, "iter_tflops": 41.686190522560935, "iter_time": 0.4949143409729003, "loss": 0.1675199270248413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.62549154088398, "step_time": 0.45218347930908204} +{"epoch": 0, "iter": 12088, "iter_tflops": 38.34623375668734, "iter_time": 0.5380213775634766, "loss": 0.24095302820205688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93194695613884, "step_time": 0.4920137271881104} +{"epoch": 0, "iter": 12089, "iter_tflops": 16.447716383571056, "iter_time": 1.2543439483642578, "loss": 0.7398934364318848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.494882601130357, "step_time": 1.179264472961426} +{"epoch": 0, "iter": 12090, "iter_tflops": 16.885616394431818, "iter_time": 1.2218146514892578, "loss": 0.6648537516593933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.76248924852102, "step_time": 0.7708959102630615} +{"epoch": 0, "iter": 12091, "iter_tflops": 47.366611888717685, "iter_time": 0.4355619430541992, "loss": 0.6319877505302429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15599402988578, "step_time": 0.4032976760864258} +{"epoch": 0, "iter": 12092, "iter_tflops": 44.83450458224931, "iter_time": 0.46016106796264644, "loss": 0.7148930430412292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.184334544081004, "step_time": 0.42817014503479006} +{"epoch": 0, "iter": 12093, "iter_tflops": 24.17492889408985, "iter_time": 0.8534086532592773, "loss": 0.8057814836502075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.436225310358253, "step_time": 0.8110910034179687} +{"epoch": 0, "iter": 12094, "iter_tflops": 13.063130708022914, "iter_time": 1.5793376007080078, "loss": 0.722993016242981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.53634684930464, "step_time": 1.3279243640899658} +{"epoch": 0, "iter": 12095, "iter_tflops": 38.35438166871393, "iter_time": 0.5379070816040038, "loss": 0.7032474279403687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.210627177272876, "step_time": 0.4887653865814209} +{"epoch": 0, "iter": 12096, "iter_tflops": 41.20950210301427, "iter_time": 0.500639232635498, "loss": 0.7475847005844116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83551871862957, "step_time": 0.46015065956115725} +{"epoch": 0, "iter": 12097, "iter_tflops": 31.378530284348408, "iter_time": 0.6574907531738281, "loss": 0.5681059956550598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.76817164869105, "step_time": 0.5933902339935303} +{"epoch": 0, "iter": 12098, "iter_tflops": 38.36120625751444, "iter_time": 0.5378113861083984, "loss": 0.6601150035858154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.09948635742743, "step_time": 0.4900557060241699} +{"epoch": 0, "iter": 12099, "iter_tflops": 33.989770717150186, "iter_time": 0.6069794845581055, "loss": 0.6495925188064575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.27294469802295, "step_time": 0.5535139141082763} +{"epoch": 0, "iter": 12100, "iter_tflops": 41.613080226596935, "iter_time": 0.4957838592529297, "loss": 0.6366548538208008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2708745748404, "step_time": 0.45572553443908687} +{"epoch": 0, "iter": 12101, "iter_tflops": 21.30410405752082, "iter_time": 0.9684093475341797, "loss": 0.08505374193191528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.655733832344584, "step_time": 0.9106345291137696} +{"epoch": 0, "iter": 12102, "iter_tflops": 16.4579141029924, "iter_time": 1.2535667266845703, "loss": 0.17488519847393036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.305552723539503, "step_time": 1.1270401840209963} +{"epoch": 0, "iter": 12103, "iter_tflops": 43.338400325454785, "iter_time": 0.4760464935302734, "loss": 0.06922390311956406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81958015993902, "step_time": 0.4225987491607666} +{"epoch": 0, "iter": 12104, "iter_tflops": 48.731046799704146, "iter_time": 0.42336651611328124, "loss": 0.15543462336063385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.11036658843781, "step_time": 0.38845699691772456} +{"epoch": 0, "iter": 12105, "iter_tflops": 9.60455980747199, "iter_time": 0.865458236694336, "loss": 0.0032982639968395233, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 10.082161494994946, "step_time": 0.8244606475830079} +{"epoch": 0, "iter": 12106, "iter_tflops": 6.91016621680932, "iter_time": 1.2029154052734374, "loss": 0.0028945293743163347, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 7.839254290532733, "step_time": 1.0603489933013917} +{"epoch": 0, "iter": 12107, "iter_tflops": 21.83342743527533, "iter_time": 0.38071646881103516, "loss": 0.008324852213263512, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 23.96946955493219, "step_time": 0.34678887557983407} +{"epoch": 0, "iter": 12108, "iter_tflops": 25.12045854007524, "iter_time": 0.3308994293212891, "loss": 0.0020572796929627657, "lr": 3e-05, "seqlen": 3360.0, "step_tflops": 27.629647602303113, "step_time": 0.30084876632690427} +{"epoch": 0, "iter": 12109, "iter_tflops": 26.48788700924232, "iter_time": 0.7788878555297852, "loss": 0.02363608032464981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.944306757176474, "step_time": 0.7382932662963867} +{"epoch": 0, "iter": 12110, "iter_tflops": 19.40191877850159, "iter_time": 1.063353256225586, "loss": 0.013681932352483273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.69276730262651, "step_time": 0.9510586280822754} +{"epoch": 0, "iter": 12111, "iter_tflops": 54.57315337146225, "iter_time": 0.37804473876953126, "loss": 0.05137230455875397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.8660931989771, "step_time": 0.3446206760406494} +{"epoch": 0, "iter": 12112, "iter_tflops": 57.136680839139316, "iter_time": 0.3610831642150879, "loss": 0.0263357050716877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.036011111676764, "step_time": 0.3325664100646972} +{"epoch": 0, "iter": 12113, "iter_tflops": 34.0005066150566, "iter_time": 0.6067878265380859, "loss": 0.721561074256897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.264452097595345, "step_time": 0.5689068031311035} +{"epoch": 0, "iter": 12114, "iter_tflops": 12.720274153621485, "iter_time": 1.6219063568115235, "loss": 0.6640511751174927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.682488207334165, "step_time": 1.4051496734619142} +{"epoch": 0, "iter": 12115, "iter_tflops": 13.775273015144196, "iter_time": 1.4976903533935548, "loss": 0.5616755485534668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.060124917286718, "step_time": 1.2846160049438478} +{"epoch": 0, "iter": 12116, "iter_tflops": 22.427980785404316, "iter_time": 0.9198818969726563, "loss": 0.7477208971977234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.20081214365722, "step_time": 0.7584734382629394} +{"epoch": 0, "iter": 12117, "iter_tflops": 15.417144048857596, "iter_time": 1.0678599853515625, "loss": 0.30483996868133545, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 16.063393768121575, "step_time": 1.0248986892700194} +{"epoch": 0, "iter": 12118, "iter_tflops": 13.653869101155626, "iter_time": 1.2057645416259768, "loss": 0.19952332973480225, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 17.674459681172802, "step_time": 0.9314769172668458} +{"epoch": 0, "iter": 12119, "iter_tflops": 24.839935516178837, "iter_time": 0.6627775344848633, "loss": 0.2282286286354065, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.76526317575348, "step_time": 0.6151014137268066} +{"epoch": 0, "iter": 12120, "iter_tflops": 25.161925854147867, "iter_time": 0.6542961502075195, "loss": 0.3721829354763031, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 27.013575416674207, "step_time": 0.609447322845459} +{"epoch": 0, "iter": 12121, "iter_tflops": 19.033448453799, "iter_time": 1.0839388122558593, "loss": 0.24345536530017853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.551323855171066, "step_time": 1.0038814849853515} +{"epoch": 0, "iter": 12122, "iter_tflops": 13.979531199953195, "iter_time": 1.4758072509765623, "loss": 0.22121334075927734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.950914839757477, "step_time": 1.149305965423584} +{"epoch": 0, "iter": 12123, "iter_tflops": 49.08908046193156, "iter_time": 0.4202786712646484, "loss": 0.17960494756698608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.24764002520559, "step_time": 0.3874555473327637} +{"epoch": 0, "iter": 12124, "iter_tflops": 51.56394848780854, "iter_time": 0.4001069374084472, "loss": 0.2398819476366043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.95678371339283, "step_time": 0.3686969146728516} +{"epoch": 0, "iter": 12125, "iter_tflops": 28.946960340341423, "iter_time": 0.7127205505371094, "loss": 0.7409113645553589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.104279686354936, "step_time": 0.6632879371643066} +{"epoch": 0, "iter": 12126, "iter_tflops": 16.914097480516535, "iter_time": 1.2197572784423827, "loss": 0.6297532916069031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.533318057979766, "step_time": 1.0047617950439454} +{"epoch": 0, "iter": 12127, "iter_tflops": 47.969569645374555, "iter_time": 0.43008710861206056, "loss": 0.6155363917350769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.06675111764442, "step_time": 0.3962431507110596} +{"epoch": 0, "iter": 12128, "iter_tflops": 46.92247554563863, "iter_time": 0.4396846771240234, "loss": 0.7757517695426941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.59101583247494, "step_time": 0.40780152702331546} +{"epoch": 0, "iter": 12129, "iter_tflops": 26.155405141843108, "iter_time": 0.7887889099121095, "loss": 0.08487504720687866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.472506892477924, "step_time": 0.7509723663330077} +{"epoch": 0, "iter": 12130, "iter_tflops": 20.274508598530776, "iter_time": 1.0175878448486328, "loss": 0.07908879965543747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.316169433740065, "step_time": 0.8149374084472656} +{"epoch": 0, "iter": 12131, "iter_tflops": 50.73162355835333, "iter_time": 0.40667126464843745, "loss": 0.06680228561162949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.75820575141927, "step_time": 0.3700099964141846} +{"epoch": 0, "iter": 12132, "iter_tflops": 57.03767515089409, "iter_time": 0.3617099304199219, "loss": 0.07881499081850052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.09633117955858, "step_time": 0.33224335670471195} +{"epoch": 0, "iter": 12133, "iter_tflops": 38.96958879583999, "iter_time": 0.5294152221679687, "loss": 0.0832824558019638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89988412761279, "step_time": 0.4923902282714844} +{"epoch": 0, "iter": 12134, "iter_tflops": 20.962751419835662, "iter_time": 0.9841787033081054, "loss": 0.07056771963834763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.364423481976942, "step_time": 0.8133870468139649} +{"epoch": 0, "iter": 12135, "iter_tflops": 38.86227770632425, "iter_time": 0.5308771057128906, "loss": 0.06126601994037628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.700502521970364, "step_time": 0.4831580963134766} +{"epoch": 0, "iter": 12136, "iter_tflops": 40.15900381696944, "iter_time": 0.5137351913452148, "loss": 0.10697281360626221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32165465451923, "step_time": 0.46548563385009756} +{"epoch": 0, "iter": 12137, "iter_tflops": 19.052128853664716, "iter_time": 1.0828760223388672, "loss": 0.5144492983818054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.32403332808774, "step_time": 1.0151082305908203} +{"epoch": 0, "iter": 12138, "iter_tflops": 11.847527555414722, "iter_time": 1.7413838806152344, "loss": 0.8120471239089966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.511892165185044, "step_time": 1.3300178527832034} +{"epoch": 0, "iter": 12139, "iter_tflops": 37.45000912529481, "iter_time": 0.5508968887329102, "loss": 0.62903892993927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56176348540573, "step_time": 0.5086340370178223} +{"epoch": 0, "iter": 12140, "iter_tflops": 36.672539966709905, "iter_time": 0.562576072692871, "loss": 0.7700570821762085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0586627735249, "step_time": 0.5150220222473144} +{"epoch": 0, "iter": 12141, "iter_tflops": 19.72937819889808, "iter_time": 1.0457041931152342, "loss": 0.3424625098705292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.998688797066396, "step_time": 0.982494369506836} +{"epoch": 0, "iter": 12142, "iter_tflops": 15.755627593089788, "iter_time": 1.3094428253173829, "loss": 0.43190667033195496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.930836306979366, "step_time": 1.0351343612670898} +{"epoch": 0, "iter": 12143, "iter_tflops": 48.099986614288824, "iter_time": 0.4289209823608398, "loss": 0.37817448377609253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.378235493033564, "step_time": 0.39388676071166995} +{"epoch": 0, "iter": 12144, "iter_tflops": 48.85730329350891, "iter_time": 0.4222724571228027, "loss": 0.4587256908416748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.709074809042946, "step_time": 0.3914144496917724} +{"epoch": 0, "iter": 12145, "iter_tflops": 44.626364315512845, "iter_time": 0.46230728912353514, "loss": 0.4491479694843292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76882382246808, "step_time": 0.4230385704040528} +{"epoch": 0, "iter": 12146, "iter_tflops": 38.94425790118575, "iter_time": 0.5297595748901367, "loss": 0.4236075282096863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95356866322029, "step_time": 0.4803115119934081} +{"epoch": 0, "iter": 12147, "iter_tflops": 40.96750925648661, "iter_time": 0.5035964813232422, "loss": 0.48871928453445435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.757097711131884, "step_time": 0.4609569110870361} +{"epoch": 0, "iter": 12148, "iter_tflops": 37.861879854693896, "iter_time": 0.5449040985107422, "loss": 0.5351256132125854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34673660462842, "step_time": 0.4989775543212891} +{"epoch": 0, "iter": 12149, "iter_tflops": 17.757933515503424, "iter_time": 1.1617958526611327, "loss": 0.0649302750825882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.909778653551832, "step_time": 1.0910277633666992} +{"epoch": 0, "iter": 12150, "iter_tflops": 17.20052256345613, "iter_time": 1.1994457397460936, "loss": 0.11849046498537064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.427515094633865, "step_time": 0.9628318271636963} +{"epoch": 0, "iter": 12151, "iter_tflops": 39.87806299726543, "iter_time": 0.5173544540405273, "loss": 0.056519702076911926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14907820736928, "step_time": 0.4673051929473877} +{"epoch": 0, "iter": 12152, "iter_tflops": 41.71461163228367, "iter_time": 0.4945771446228027, "loss": 0.07894647866487503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.793809225949104, "step_time": 0.45052145385742187} +{"epoch": 0, "iter": 12153, "iter_tflops": 21.002808056369002, "iter_time": 0.9823016738891601, "loss": 0.22738990187644958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.682263021873297, "step_time": 0.909569450378418} +{"epoch": 0, "iter": 12154, "iter_tflops": 14.700861113086246, "iter_time": 1.4033935394287111, "loss": 0.2709380090236664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.546677171744122, "step_time": 1.1757835006713866} +{"epoch": 0, "iter": 12155, "iter_tflops": 41.85767108780063, "iter_time": 0.49288679885864256, "loss": 0.2679726481437683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18867673596903, "step_time": 0.4466699409484863} +{"epoch": 0, "iter": 12156, "iter_tflops": 43.235687413988906, "iter_time": 0.4771774139404297, "loss": 0.232629656791687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.09979071496654, "step_time": 0.4380294094085693} +{"epoch": 0, "iter": 12157, "iter_tflops": 14.218244098589805, "iter_time": 1.4510296325683594, "loss": 0.6650056838989258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.172991911831211, "step_time": 1.3597248077392579} +{"epoch": 0, "iter": 12158, "iter_tflops": 22.102463250673654, "iter_time": 0.9334296035766602, "loss": 0.8059149980545044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.89850392528344, "step_time": 0.7669978065490722} +{"epoch": 0, "iter": 12159, "iter_tflops": 49.80449684016647, "iter_time": 0.41424158096313474, "loss": 0.7734274864196777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99150833816309, "step_time": 0.38211737632751464} +{"epoch": 0, "iter": 12160, "iter_tflops": 47.10081803626938, "iter_time": 0.43801985549926753, "loss": 0.7237476706504822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00903063043344, "step_time": 0.4044596271514893} +{"epoch": 0, "iter": 12161, "iter_tflops": 34.20287118481831, "iter_time": 0.6031977081298827, "loss": 0.7288109660148621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.647840014591, "step_time": 0.5629552383422851} +{"epoch": 0, "iter": 12162, "iter_tflops": 17.782782645422436, "iter_time": 1.1601723937988282, "loss": 0.6693367958068848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.93050940469939, "step_time": 0.9407484855651855} +{"epoch": 0, "iter": 12163, "iter_tflops": 38.290181602358494, "iter_time": 0.5388089752197266, "loss": 0.6491023302078247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89193574557874, "step_time": 0.4924836521148681} +{"epoch": 0, "iter": 12164, "iter_tflops": 42.71303177277822, "iter_time": 0.4830163688659668, "loss": 0.8279085159301758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60170746135325, "step_time": 0.4427111072540283} +{"epoch": 0, "iter": 12165, "iter_tflops": 13.50847250012534, "iter_time": 1.5272706451416014, "loss": 0.7629478573799133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.179288089245386, "step_time": 1.4550161743164063} +{"epoch": 0, "iter": 12166, "iter_tflops": 17.506180688747225, "iter_time": 1.1785034027099608, "loss": 0.8058820962905884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.08982129900817, "step_time": 0.9782488536834717} +{"epoch": 0, "iter": 12167, "iter_tflops": 48.3769590624007, "iter_time": 0.42646528244018556, "loss": 0.6621721982955933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.36102439148806, "step_time": 0.39401623153686527} +{"epoch": 0, "iter": 12168, "iter_tflops": 45.27489610153524, "iter_time": 0.45568505477905275, "loss": 0.60132896900177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.954290519560374, "step_time": 0.42143585968017583} +{"epoch": 0, "iter": 12169, "iter_tflops": 27.63155371530211, "iter_time": 0.7466497802734375, "loss": 0.0779859721660614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.170947628445074, "step_time": 0.7072479705810547} +{"epoch": 0, "iter": 12170, "iter_tflops": 16.275276588603663, "iter_time": 1.2676339721679688, "loss": 0.09911739081144333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.93032241168224, "step_time": 0.9857035694122314} +{"epoch": 0, "iter": 12171, "iter_tflops": 41.08430818991468, "iter_time": 0.5021648025512695, "loss": 0.0722549557685852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41898257130931, "step_time": 0.45423944664001464} +{"epoch": 0, "iter": 12172, "iter_tflops": 38.456878170235754, "iter_time": 0.5364734344482422, "loss": 0.09186331182718277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47893487653507, "step_time": 0.48567822074890143} +{"epoch": 0, "iter": 12173, "iter_tflops": 16.1922590535739, "iter_time": 1.2741331176757813, "loss": 0.43998438119888306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.31112835482716, "step_time": 1.1917821350097657} +{"epoch": 0, "iter": 12174, "iter_tflops": 25.767260081142812, "iter_time": 0.8006708297729493, "loss": 0.39834755659103394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.26485717609026, "step_time": 0.6394292526245118} +{"epoch": 0, "iter": 12175, "iter_tflops": 49.864757868743695, "iter_time": 0.41374097442626956, "loss": 0.3243228495121002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.372700790764625, "step_time": 0.3794384536743164} +{"epoch": 0, "iter": 12176, "iter_tflops": 45.750364749544694, "iter_time": 0.4509492683410645, "loss": 0.3067832887172699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63198867358756, "step_time": 0.41568137931823734} +{"epoch": 0, "iter": 12177, "iter_tflops": 37.132426697477385, "iter_time": 0.5556085433959961, "loss": 0.3474128842353821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.928418756221916, "step_time": 0.5167019920349121} +{"epoch": 0, "iter": 12178, "iter_tflops": 13.37746950866199, "iter_time": 1.5422269134521482, "loss": 0.4135653078556061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.287932777938268, "step_time": 1.3495018463134765} +{"epoch": 0, "iter": 12179, "iter_tflops": 35.37361566417124, "iter_time": 0.5832339477539062, "loss": 0.3827345073223114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26376941532174, "step_time": 0.4104565525054932} +{"epoch": 0, "iter": 12180, "iter_tflops": 42.961667216523544, "iter_time": 0.4802209701538086, "loss": 0.37788012623786926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.257873418328906, "step_time": 0.4460017719268799} +{"epoch": 0, "iter": 12181, "iter_tflops": 23.54540237181102, "iter_time": 0.8762259902954102, "loss": 0.5534688830375671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.824428776610432, "step_time": 0.8310802917480469} +{"epoch": 0, "iter": 12182, "iter_tflops": 13.693010157701181, "iter_time": 1.506687957763672, "loss": 0.43238675594329834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.390574350316125, "step_time": 1.186337673187256} +{"epoch": 0, "iter": 12183, "iter_tflops": 46.42592617290404, "iter_time": 0.4443873329162598, "loss": 0.3813185393810272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21629648378689, "step_time": 0.4108445854187012} +{"epoch": 0, "iter": 12184, "iter_tflops": 47.16974442487683, "iter_time": 0.43737980270385746, "loss": 0.42758437991142273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.87686968930404, "step_time": 0.4055102767944336} +{"epoch": 0, "iter": 12185, "iter_tflops": 40.7345881420332, "iter_time": 0.5064760551452636, "loss": 0.18689467012882233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.093795052222895, "step_time": 0.46789108276367186} +{"epoch": 0, "iter": 12186, "iter_tflops": 27.370093698219463, "iter_time": 0.7537823486328126, "loss": 0.135450541973114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.569312442472196, "step_time": 0.6748955688476562} +{"epoch": 0, "iter": 12187, "iter_tflops": 44.86599560867642, "iter_time": 0.4598380851745606, "loss": 0.2026165872812271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.64277030945781, "step_time": 0.424134838104248} +{"epoch": 0, "iter": 12188, "iter_tflops": 47.53978451890954, "iter_time": 0.4339753265380859, "loss": 0.2099829912185669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.47142439453203, "step_time": 0.40082616233825685} +{"epoch": 0, "iter": 12189, "iter_tflops": 27.18866122967174, "iter_time": 0.7588124084472656, "loss": 0.2376948446035385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.72519070789084, "step_time": 0.7182230300903321} +{"epoch": 0, "iter": 12190, "iter_tflops": 16.71836270806197, "iter_time": 1.2340379180908205, "loss": 0.14927411079406738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.8131329770034, "step_time": 1.0966325244903563} +{"epoch": 0, "iter": 12191, "iter_tflops": 38.51103936707511, "iter_time": 0.5357189483642578, "loss": 0.15077750384807587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34937537591677, "step_time": 0.48716405677795405} +{"epoch": 0, "iter": 12192, "iter_tflops": 48.895215395042484, "iter_time": 0.4219450378417968, "loss": 0.219480499625206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.59933100229427, "step_time": 0.38491326522827146} +{"epoch": 0, "iter": 12193, "iter_tflops": 25.31732490262456, "iter_time": 0.8149002151489257, "loss": 0.30764704942703247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.99504392054123, "step_time": 0.7642548599243164} +{"epoch": 0, "iter": 12194, "iter_tflops": 15.770870172026742, "iter_time": 1.30817724609375, "loss": 0.24077419936656952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.27454964096806, "step_time": 1.0175857849121093} +{"epoch": 0, "iter": 12195, "iter_tflops": 45.75527029404175, "iter_time": 0.45090092086791994, "loss": 0.20694024860858917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.487457444285525, "step_time": 0.41689540290832516} +{"epoch": 0, "iter": 12196, "iter_tflops": 49.20253383931936, "iter_time": 0.4193095741271972, "loss": 0.2804023027420044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.503986430127476, "step_time": 0.38559918403625487} +{"epoch": 0, "iter": 12197, "iter_tflops": 39.06534606812374, "iter_time": 0.5281175155639648, "loss": 0.09735821932554245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.46546904993259, "step_time": 0.4858322296142578} +{"epoch": 0, "iter": 12198, "iter_tflops": 8.941126588056541, "iter_time": 2.3074378051757813, "loss": 0.101802758872509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.480653022645251, "step_time": 1.9684931335449218} +{"epoch": 0, "iter": 12199, "iter_tflops": 12.418824910473038, "iter_time": 1.6612758178710938, "loss": 0.07863815873861313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.659124046124436, "step_time": 1.4073892440795899} +{"epoch": 0, "iter": 12200, "iter_tflops": 49.13034202634263, "iter_time": 0.4199257049560547, "loss": 0.12007855623960495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.70639547961675, "step_time": 0.38414593505859373} +{"epoch": 0, "iter": 12201, "iter_tflops": 21.63982878761886, "iter_time": 0.7021218872070313, "loss": 0.2709267735481262, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 22.97060116109783, "step_time": 0.6614453544616699} +{"epoch": 0, "iter": 12202, "iter_tflops": 12.444421167845855, "iter_time": 1.2209324340820316, "loss": 0.2731074094772339, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 14.132607127309237, "step_time": 1.0750880775451659} +{"epoch": 0, "iter": 12203, "iter_tflops": 27.479443230486144, "iter_time": 0.5529150390625001, "loss": 0.29458731412887573, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.316218062438228, "step_time": 0.5182727661132812} +{"epoch": 0, "iter": 12204, "iter_tflops": 27.20400038426037, "iter_time": 0.5585133514404297, "loss": 0.16372542083263397, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.95626375271545, "step_time": 0.5247153968811035} +{"epoch": 0, "iter": 12205, "iter_tflops": 26.33805734021174, "iter_time": 0.7833187255859375, "loss": 0.41671496629714966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.774208571240155, "step_time": 0.742814811706543} +{"epoch": 0, "iter": 12206, "iter_tflops": 13.677980798140206, "iter_time": 1.5083435058593748, "loss": 0.39915284514427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.325625833588735, "step_time": 1.2637245101928714} +{"epoch": 0, "iter": 12207, "iter_tflops": 43.48016017449435, "iter_time": 0.4744944229125977, "loss": 0.5086725354194641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.025005428019135, "step_time": 0.4387260208129883} +{"epoch": 0, "iter": 12208, "iter_tflops": 47.703950156488894, "iter_time": 0.4324818687438965, "loss": 0.3885304927825928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45816319222526, "step_time": 0.40092945861816404} +{"epoch": 0, "iter": 12209, "iter_tflops": 20.86104154329171, "iter_time": 0.7538280792236328, "loss": 0.008436139672994614, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 22.01973944899773, "step_time": 0.714160987854004} +{"epoch": 0, "iter": 12210, "iter_tflops": 12.663316391600153, "iter_time": 1.2418262634277344, "loss": 0.011002239771187305, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 14.898552776253727, "step_time": 1.0555145263671875} +{"epoch": 0, "iter": 12211, "iter_tflops": 9.925229252569462, "iter_time": 1.58441064453125, "loss": 0.008407224901020527, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.373783746439859, "step_time": 1.3826215820312502} +{"epoch": 0, "iter": 12212, "iter_tflops": 25.221487579595713, "iter_time": 0.6235016403198241, "loss": 0.010739165358245373, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 31.552260901150834, "step_time": 0.4983997478485107} +{"epoch": 0, "iter": 12213, "iter_tflops": 19.641003107532917, "iter_time": 0.7465379104614258, "loss": 0.22787882387638092, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 20.80981374414024, "step_time": 0.7046076240539552} +{"epoch": 0, "iter": 12214, "iter_tflops": 14.644988527060779, "iter_time": 1.001213035583496, "loss": 0.29534536600112915, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 17.988793030399275, "step_time": 0.8151049041748046} +{"epoch": 0, "iter": 12215, "iter_tflops": 23.497341214542335, "iter_time": 0.6240175552368165, "loss": 0.22298723459243774, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 25.28136806634066, "step_time": 0.5799825935363769} +{"epoch": 0, "iter": 12216, "iter_tflops": 24.265732464734747, "iter_time": 0.6042576065063476, "loss": 0.15379948914051056, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 26.013165310547635, "step_time": 0.5636666374206543} +{"epoch": 0, "iter": 12217, "iter_tflops": 25.98774625346005, "iter_time": 0.7110779647827149, "loss": 0.026703739538788795, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 28.850231570435152, "step_time": 0.6405256633758545} +{"epoch": 0, "iter": 12218, "iter_tflops": 36.68140425868365, "iter_time": 0.5037787971496581, "loss": 0.014061292633414268, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 40.40020814042663, "step_time": 0.4574063987731934} +{"epoch": 0, "iter": 12219, "iter_tflops": 41.17518823882164, "iter_time": 0.44879730987548827, "loss": 0.025398273020982742, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 45.50234052335228, "step_time": 0.4061178722381592} +{"epoch": 0, "iter": 12220, "iter_tflops": 37.25238731046763, "iter_time": 0.49605716705322267, "loss": 0.017765667289495468, "lr": 3e-05, "seqlen": 7360.0, "step_tflops": 40.83680593964685, "step_time": 0.45251613807678226} +{"epoch": 0, "iter": 12221, "iter_tflops": 19.783483131711883, "iter_time": 1.0428443450927734, "loss": 0.43928393721580505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.370239641094578, "step_time": 0.9654123611450194} +{"epoch": 0, "iter": 12222, "iter_tflops": 46.95528497492395, "iter_time": 0.4393774528503418, "loss": 0.45782995223999023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.23206309497921, "step_time": 0.4026988620758057} +{"epoch": 0, "iter": 12223, "iter_tflops": 49.958155822381975, "iter_time": 0.41296747589111327, "loss": 0.35164910554885864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07599950837714, "step_time": 0.381520336151123} +{"epoch": 0, "iter": 12224, "iter_tflops": 47.52367942448679, "iter_time": 0.4341223945617676, "loss": 0.4184861481189728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05417211463356, "step_time": 0.4041020088195801} +{"epoch": 0, "iter": 12225, "iter_tflops": 33.10278979132653, "iter_time": 0.6232433471679687, "loss": 0.24062131345272064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.56386902379463, "step_time": 0.5801138648986817} +{"epoch": 0, "iter": 12226, "iter_tflops": 44.896156354333826, "iter_time": 0.4595291709899903, "loss": 0.271334707736969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.16679225105363, "step_time": 0.41961438941955564} +{"epoch": 0, "iter": 12227, "iter_tflops": 42.782355695926675, "iter_time": 0.4822336959838867, "loss": 0.2943952679634094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90746393838404, "step_time": 0.44940608215332034} +{"epoch": 0, "iter": 12228, "iter_tflops": 49.19568302217244, "iter_time": 0.41936796569824225, "loss": 0.14824511110782623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.08876909901367, "step_time": 0.38861502838134765} +{"epoch": 0, "iter": 12229, "iter_tflops": 29.001336230668446, "iter_time": 0.7113842391967774, "loss": 0.05435661971569061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.665195873291943, "step_time": 0.6727853164672852} +{"epoch": 0, "iter": 12230, "iter_tflops": 16.119073673563303, "iter_time": 1.279918060302734, "loss": 0.056084778159856796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.926840289926048, "step_time": 1.0900442543029785} +{"epoch": 0, "iter": 12231, "iter_tflops": 39.47416215258419, "iter_time": 0.5226480407714844, "loss": 0.02309008128941059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.684652161848135, "step_time": 0.47227326965332034} +{"epoch": 0, "iter": 12232, "iter_tflops": 42.98565048747336, "iter_time": 0.4799530372619628, "loss": 0.04291098192334175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.16472463812304, "step_time": 0.4374263534545898} +{"epoch": 0, "iter": 12233, "iter_tflops": 15.764573551636365, "iter_time": 1.308699752807617, "loss": 0.27794620394706726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.699525904947688, "step_time": 1.2354298934936523} +{"epoch": 0, "iter": 12234, "iter_tflops": 16.511149931918677, "iter_time": 1.2495249328613283, "loss": 0.23979035019874573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.236382311179952, "step_time": 1.019505027770996} +{"epoch": 0, "iter": 12235, "iter_tflops": 40.377682512303615, "iter_time": 0.5109528884887695, "loss": 0.217180073261261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.332655686184324, "step_time": 0.46537012481689455} +{"epoch": 0, "iter": 12236, "iter_tflops": 37.967662135066156, "iter_time": 0.5433859329223633, "loss": 0.24650239944458008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38329833448726, "step_time": 0.49853671264648436} +{"epoch": 0, "iter": 12237, "iter_tflops": 33.808454967906485, "iter_time": 0.610234733581543, "loss": 0.6738913059234619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.50456051628067, "step_time": 0.5500955944061279} +{"epoch": 0, "iter": 12238, "iter_tflops": 38.47405498507056, "iter_time": 0.5362339248657226, "loss": 0.39902210235595703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.622026463566485, "step_time": 0.48404769134521486} +{"epoch": 0, "iter": 12239, "iter_tflops": 42.76779569570395, "iter_time": 0.4823978691101074, "loss": 0.4254545569419861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.764850231715755, "step_time": 0.44116667556762695} +{"epoch": 0, "iter": 12240, "iter_tflops": 38.904188126183215, "iter_time": 0.5303052062988282, "loss": 0.4075559079647064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.72731517223412, "step_time": 0.48285490036010736} +{"epoch": 0, "iter": 12241, "iter_tflops": 24.90292700116563, "iter_time": 0.8284605865478516, "loss": 0.14964376389980316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.69916133082489, "step_time": 0.7727244033813476} +{"epoch": 0, "iter": 12242, "iter_tflops": 20.514412623434158, "iter_time": 1.0056877517700196, "loss": 0.17567002773284912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.265406200266497, "step_time": 0.8867712574005128} +{"epoch": 0, "iter": 12243, "iter_tflops": 46.50943344913162, "iter_time": 0.4435894393920899, "loss": 0.23840460181236267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38145177011258, "step_time": 0.4094977970123291} +{"epoch": 0, "iter": 12244, "iter_tflops": 49.04556365764331, "iter_time": 0.4206515731811523, "loss": 0.2744508683681488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28107356327087, "step_time": 0.3872124214172363} +{"epoch": 0, "iter": 12245, "iter_tflops": 46.678012784029384, "iter_time": 0.44198740005493165, "loss": 0.35026612877845764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.147773874351756, "step_time": 0.403362491607666} +{"epoch": 0, "iter": 12246, "iter_tflops": 43.64811326746404, "iter_time": 0.47266862106323243, "loss": 0.27391207218170166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47456834768251, "step_time": 0.43457148170471194} +{"epoch": 0, "iter": 12247, "iter_tflops": 44.54987882265296, "iter_time": 0.46310100173950197, "loss": 0.20324766635894775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17353820383604, "step_time": 0.42826610374450685} +{"epoch": 0, "iter": 12248, "iter_tflops": 48.49331623353706, "iter_time": 0.42544200134277343, "loss": 0.20472896099090576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43868557933985, "step_time": 0.39343269729614255} +{"epoch": 0, "iter": 12249, "iter_tflops": 30.280684787528404, "iter_time": 0.6813284988403321, "loss": 0.17188866436481476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.17975432415474, "step_time": 0.6411202926635743} +{"epoch": 0, "iter": 12250, "iter_tflops": 18.896175428629032, "iter_time": 1.0918131866455079, "loss": 0.21887382864952087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.46865336660348, "step_time": 0.9182167339324951} +{"epoch": 0, "iter": 12251, "iter_tflops": 48.292605020034145, "iter_time": 0.42721020126342774, "loss": 0.16338688135147095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.385706115681884, "step_time": 0.39383058929443354} +{"epoch": 0, "iter": 12252, "iter_tflops": 51.31585250377337, "iter_time": 0.4020413284301758, "loss": 0.206512451171875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.70767244953007, "step_time": 0.3703456382751465} +{"epoch": 0, "iter": 12253, "iter_tflops": 44.2783898443909, "iter_time": 0.4659404640197754, "loss": 0.1870013028383255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38278861046882, "step_time": 0.4264138984680176} +{"epoch": 0, "iter": 12254, "iter_tflops": 35.27246397109805, "iter_time": 0.5849065017700195, "loss": 0.14087511599063873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.416560523737736, "step_time": 0.5234118156433105} +{"epoch": 0, "iter": 12255, "iter_tflops": 39.611094735310296, "iter_time": 0.5208412857055664, "loss": 0.14708930253982544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.28637543139071, "step_time": 0.4766186428070069} +{"epoch": 0, "iter": 12256, "iter_tflops": 44.61789439131193, "iter_time": 0.46239505004882814, "loss": 0.1515643447637558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.57506194100477, "step_time": 0.42472603607177734} +{"epoch": 0, "iter": 12257, "iter_tflops": 21.217249863220015, "iter_time": 0.9274421157836914, "loss": 0.07484341412782669, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 22.988836745970488, "step_time": 0.855970718383789} +{"epoch": 0, "iter": 12258, "iter_tflops": 13.94337737669102, "iter_time": 1.4112628936767577, "loss": 0.05609246343374252, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 17.602885711568863, "step_time": 1.1178718891143797} +{"epoch": 0, "iter": 12259, "iter_tflops": 45.478701170023335, "iter_time": 0.43268102645874024, "loss": 0.07527164369821548, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 49.24581510857323, "step_time": 0.3995826053619385} +{"epoch": 0, "iter": 12260, "iter_tflops": 49.690757667854484, "iter_time": 0.39600465011596675, "loss": 0.07655061036348343, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 54.223646458302234, "step_time": 0.36290018081665043} +{"epoch": 0, "iter": 12261, "iter_tflops": 22.087280810173997, "iter_time": 0.9340712280273438, "loss": 0.731200635433197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.38215035093818, "step_time": 0.8823437194824219} +{"epoch": 0, "iter": 12262, "iter_tflops": 22.393967688788848, "iter_time": 0.9212790603637696, "loss": 0.7742998600006104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.447455149659483, "step_time": 0.7516577911376954} +{"epoch": 0, "iter": 12263, "iter_tflops": 42.274891806884426, "iter_time": 0.4880223846435547, "loss": 0.5558702349662781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41775001999877, "step_time": 0.4542517738342285} +{"epoch": 0, "iter": 12264, "iter_tflops": 43.81026186835598, "iter_time": 0.47091920089721684, "loss": 0.6652117371559143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98540650215359, "step_time": 0.43909577560424806} +{"epoch": 0, "iter": 12265, "iter_tflops": 34.12224303738377, "iter_time": 0.6046230163574218, "loss": 0.03601305931806564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.3980105126818, "step_time": 0.566819263458252} +{"epoch": 0, "iter": 12266, "iter_tflops": 9.318841769306545, "iter_time": 2.2139117736816405, "loss": 0.07594988495111465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.365448102924853, "step_time": 1.8152468185424804} +{"epoch": 0, "iter": 12267, "iter_tflops": 10.27021908609269, "iter_time": 2.008827011108399, "loss": 0.04635869339108467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.321481630912972, "step_time": 1.548708625793457} +{"epoch": 0, "iter": 12268, "iter_tflops": 28.759881372893577, "iter_time": 0.7173566970825194, "loss": 0.04579094424843788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.813997174931075, "step_time": 0.5604143829345702} +{"epoch": 0, "iter": 12269, "iter_tflops": 21.541368723127228, "iter_time": 0.6749950866699219, "loss": 0.22047343850135803, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 22.998628365868125, "step_time": 0.6322254447937011} +{"epoch": 0, "iter": 12270, "iter_tflops": 21.424465325183522, "iter_time": 0.6786782226562499, "loss": 0.2886084020137787, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.22974960625033, "step_time": 0.6259352035522461} +{"epoch": 0, "iter": 12271, "iter_tflops": 22.177991683913216, "iter_time": 0.6556192398071289, "loss": 0.2170078605413437, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.955414827373946, "step_time": 0.6069741706848144} +{"epoch": 0, "iter": 12272, "iter_tflops": 21.494584396925188, "iter_time": 0.6764642562866211, "loss": 0.234292134642601, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.084250944345698, "step_time": 0.6298804359436034} +{"epoch": 0, "iter": 12273, "iter_tflops": 20.11986426377758, "iter_time": 1.0254091796874998, "loss": 0.7220830917358398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.6308771334818, "step_time": 0.9537797927856445} +{"epoch": 0, "iter": 12274, "iter_tflops": 17.660189440142513, "iter_time": 1.1682260589599611, "loss": 0.7187718749046326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.255476706045275, "step_time": 0.9706248321533204} +{"epoch": 0, "iter": 12275, "iter_tflops": 37.574880139992615, "iter_time": 0.5490661163330077, "loss": 0.5912209749221802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.762111157362924, "step_time": 0.5061340770721435} +{"epoch": 0, "iter": 12276, "iter_tflops": 33.93216052350029, "iter_time": 0.6080100173950196, "loss": 0.6143407821655273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.07338484851207, "step_time": 0.55649338722229} +{"epoch": 0, "iter": 12277, "iter_tflops": 19.61844571654921, "iter_time": 1.051617126464844, "loss": 0.5132197737693787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.20483580964146, "step_time": 0.972942855834961} +{"epoch": 0, "iter": 12278, "iter_tflops": 14.0693587243245, "iter_time": 1.466384780883789, "loss": 0.4091309607028961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.736843723348034, "step_time": 1.163177272796631} +{"epoch": 0, "iter": 12279, "iter_tflops": 49.17231500296026, "iter_time": 0.41956726074218753, "loss": 0.4218027591705322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.372251643285736, "step_time": 0.3865509300231933} +{"epoch": 0, "iter": 12280, "iter_tflops": 50.322201153010404, "iter_time": 0.4099799499511719, "loss": 0.5143765211105347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.42709011748532, "step_time": 0.37905927848815923} +{"epoch": 0, "iter": 12281, "iter_tflops": 14.047952415938571, "iter_time": 0.7347794036865234, "loss": 0.06858688592910767, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 14.840384424049308, "step_time": 0.6955443878173828} +{"epoch": 0, "iter": 12282, "iter_tflops": 5.881799866530872, "iter_time": 1.7549298400878905, "loss": 0.043904948979616165, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 7.612904303078568, "step_time": 1.3558749313354492} +{"epoch": 0, "iter": 12283, "iter_tflops": 23.685027251363742, "iter_time": 0.4358089179992676, "loss": 0.10583759099245071, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 25.998461550555113, "step_time": 0.3970291118621826} +{"epoch": 0, "iter": 12284, "iter_tflops": 24.3156191824043, "iter_time": 0.42450681686401365, "loss": 0.07453597337007523, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 26.35247485074482, "step_time": 0.39169551086425786} +{"epoch": 0, "iter": 12285, "iter_tflops": 26.87873721800795, "iter_time": 0.7675618591308595, "loss": 0.10534404963254929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.353114046765793, "step_time": 0.7276482391357423} +{"epoch": 0, "iter": 12286, "iter_tflops": 15.869041708269668, "iter_time": 1.3000843963623048, "loss": 0.08676544576883316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.018158073033675, "step_time": 1.0306189727783204} +{"epoch": 0, "iter": 12287, "iter_tflops": 44.48663480144617, "iter_time": 0.46375936508178706, "loss": 0.11170491576194763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.7908767993805, "step_time": 0.4143548946380615} +{"epoch": 0, "iter": 12288, "iter_tflops": 54.18574359084734, "iter_time": 0.380747631072998, "loss": 0.10469140112400055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.993090851960666, "step_time": 0.34972050476074223} +{"epoch": 0, "iter": 12289, "iter_tflops": 31.349330180370544, "iter_time": 0.48596508789062504, "loss": 0.085108682513237, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 34.088376289563264, "step_time": 0.4469171504974366} +{"epoch": 0, "iter": 12290, "iter_tflops": 6.251408640986364, "iter_time": 2.436999542236328, "loss": 0.10364211350679398, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 7.830896556724489, "step_time": 1.9454579544067383} +{"epoch": 0, "iter": 12291, "iter_tflops": 6.906044210505394, "iter_time": 2.2059922485351566, "loss": 0.09509832412004471, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 9.358375571731145, "step_time": 1.6279192771911621} +{"epoch": 0, "iter": 12292, "iter_tflops": 24.642219870201203, "iter_time": 0.6182348861694336, "loss": 0.1349070519208908, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.79935372377108, "step_time": 0.5480228118896484} +{"epoch": 0, "iter": 12293, "iter_tflops": 10.98316099606635, "iter_time": 1.283032424926758, "loss": 0.16630159318447113, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 11.753405359143846, "step_time": 1.198950538635254} +{"epoch": 0, "iter": 12294, "iter_tflops": 13.822815600854812, "iter_time": 1.0194559555053713, "loss": 0.4061504900455475, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 18.427905633945503, "step_time": 0.7646963233947754} +{"epoch": 0, "iter": 12295, "iter_tflops": 24.831375645686407, "iter_time": 0.5674978256225587, "loss": 0.20570406317710876, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 26.388191900681335, "step_time": 0.5340173263549804} +{"epoch": 0, "iter": 12296, "iter_tflops": 25.43386666042604, "iter_time": 0.5540546340942383, "loss": 0.2853364646434784, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 27.084401767944833, "step_time": 0.5202903060913086} +{"epoch": 0, "iter": 12297, "iter_tflops": 36.57527147733452, "iter_time": 0.46134591293334964, "loss": 0.06890720129013062, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 40.3246032102522, "step_time": 0.41845054054260256} +{"epoch": 0, "iter": 12298, "iter_tflops": 29.459730084880864, "iter_time": 0.572776870727539, "loss": 0.09196984022855759, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.90197072676566, "step_time": 0.5128523197174073} +{"epoch": 0, "iter": 12299, "iter_tflops": 35.485645966114866, "iter_time": 0.4755120429992675, "loss": 0.05668722093105316, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 39.21101853309545, "step_time": 0.43033444786071773} +{"epoch": 0, "iter": 12300, "iter_tflops": 31.613947331685328, "iter_time": 0.5337470779418946, "loss": 0.06380298733711243, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 34.5602208189287, "step_time": 0.48824491310119633} +{"epoch": 0, "iter": 12301, "iter_tflops": 1.3520591027661675, "iter_time": 1.0139647064208985, "loss": 0.5352897644042969, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 1.4602279142784744, "step_time": 0.9388535842895507} +{"epoch": 0, "iter": 12302, "iter_tflops": 1.792241183359614, "iter_time": 0.7649306488037111, "loss": 0.8514087200164795, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 2.234084483560052, "step_time": 0.6136474342346192} +{"epoch": 0, "iter": 12303, "iter_tflops": 3.1284660718906907, "iter_time": 0.4382148246765137, "loss": 0.7791464328765869, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.3979384229772718, "step_time": 0.40346234703063966} +{"epoch": 0, "iter": 12304, "iter_tflops": 3.3120433906961098, "iter_time": 0.4139258003234863, "loss": 0.6843318343162537, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.5815226207926645, "step_time": 0.38278139114379883} +{"epoch": 0, "iter": 12305, "iter_tflops": 23.671404638597355, "iter_time": 0.7719476089477539, "loss": 0.0032991443295031786, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 24.927478260362463, "step_time": 0.7330498504638671} +{"epoch": 0, "iter": 12306, "iter_tflops": 15.952868118564462, "iter_time": 1.145441940307617, "loss": 0.005390893202275038, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 17.777032880429772, "step_time": 1.0279040565490722} +{"epoch": 0, "iter": 12307, "iter_tflops": 45.07844562674298, "iter_time": 0.4053618965148926, "loss": 0.001620689406991005, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 49.72127429775028, "step_time": 0.36751037597656255} +{"epoch": 0, "iter": 12308, "iter_tflops": 52.13997506279543, "iter_time": 0.3504620819091797, "loss": 0.009451097808778286, "lr": 3e-05, "seqlen": 7280.0, "step_tflops": 57.145915795792995, "step_time": 0.3197618579864502} +{"epoch": 0, "iter": 12309, "iter_tflops": 28.525780687464472, "iter_time": 0.723243782043457, "loss": 0.4117257297039032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.106380604568066, "step_time": 0.6852731246948242} +{"epoch": 0, "iter": 12310, "iter_tflops": 13.665119239469949, "iter_time": 1.509763153076172, "loss": 0.6277506351470947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.54676680195578, "step_time": 1.1123822135925294} +{"epoch": 0, "iter": 12311, "iter_tflops": 41.63346854479037, "iter_time": 0.49554106903076167, "loss": 0.5928918123245239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.43957948677461, "step_time": 0.45403354835510257} +{"epoch": 0, "iter": 12312, "iter_tflops": 46.55627626328948, "iter_time": 0.4431431198120117, "loss": 0.5725522041320801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85445565705568, "step_time": 0.40568900489807125} +{"epoch": 0, "iter": 12313, "iter_tflops": 21.48028844079817, "iter_time": 0.9604663162231444, "loss": 0.31022554636001587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.927972106991206, "step_time": 0.8998219909667968} +{"epoch": 0, "iter": 12314, "iter_tflops": 12.788156261993077, "iter_time": 1.6132969512939455, "loss": 0.5850433707237244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.63744749935317, "step_time": 1.4094734420776367} +{"epoch": 0, "iter": 12315, "iter_tflops": 9.868083611225211, "iter_time": 2.09068896484375, "loss": 0.5567949414253235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.090816189215001, "step_time": 1.860196144104004} +{"epoch": 0, "iter": 12316, "iter_tflops": 28.085223978841306, "iter_time": 0.7345888900756836, "loss": 0.46125322580337524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.84355311450497, "step_time": 0.6096018772125245} +{"epoch": 0, "iter": 12317, "iter_tflops": 10.782485032201366, "iter_time": 1.3333787994384765, "loss": 0.2261192500591278, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 11.438692096690527, "step_time": 1.2568864364624022} +{"epoch": 0, "iter": 12318, "iter_tflops": 12.362510305218915, "iter_time": 1.1629625854492187, "loss": 0.20292846858501434, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 16.424548335453753, "step_time": 0.8753444328308105} +{"epoch": 0, "iter": 12319, "iter_tflops": 26.32889958374144, "iter_time": 0.5460591659545899, "loss": 0.24261824786663055, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 28.123907382520073, "step_time": 0.5112069511413574} +{"epoch": 0, "iter": 12320, "iter_tflops": 24.187077156384643, "iter_time": 0.5944139862060547, "loss": 0.22382231056690216, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 25.699968901890838, "step_time": 0.5594223480224609} +{"epoch": 0, "iter": 12321, "iter_tflops": 29.352585508313098, "iter_time": 0.7028714218139649, "loss": 0.5918066501617432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.22128362274851, "step_time": 0.6608022193908691} +{"epoch": 0, "iter": 12322, "iter_tflops": 19.93155746031487, "iter_time": 1.035096908569336, "loss": 0.6154966950416565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.279286020258425, "step_time": 0.849740535736084} +{"epoch": 0, "iter": 12323, "iter_tflops": 50.2344260462251, "iter_time": 0.41069631195068357, "loss": 0.7265324592590332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.9185421877847, "step_time": 0.3756671733856201} +{"epoch": 0, "iter": 12324, "iter_tflops": 51.94618810901846, "iter_time": 0.3971627998352051, "loss": 0.6754132509231567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.92490591518362, "step_time": 0.36890707588195804} +{"epoch": 0, "iter": 12325, "iter_tflops": 30.937104833558823, "iter_time": 0.6668721466064453, "loss": 0.4970414340496063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.785452815388844, "step_time": 0.6292758445739747} +{"epoch": 0, "iter": 12326, "iter_tflops": 15.043134594784853, "iter_time": 1.37146240234375, "loss": 0.3941372036933899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.911838072944224, "step_time": 1.151813310623169} +{"epoch": 0, "iter": 12327, "iter_tflops": 38.08368198811061, "iter_time": 0.5417305374145508, "loss": 0.622823178768158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.69445235922587, "step_time": 0.4948162727355957} +{"epoch": 0, "iter": 12328, "iter_tflops": 41.834803445846816, "iter_time": 0.49315621948242183, "loss": 0.5545196533203125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.807079147600874, "step_time": 0.45039094161987303} +{"epoch": 0, "iter": 12329, "iter_tflops": 29.91389417584879, "iter_time": 0.6896826400756836, "loss": 0.3080562949180603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.191211891116765, "step_time": 0.6215830135345458} +{"epoch": 0, "iter": 12330, "iter_tflops": 35.68574533496101, "iter_time": 0.5781326217651368, "loss": 0.5568187236785889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46939920209856, "step_time": 0.5227111110687256} +{"epoch": 0, "iter": 12331, "iter_tflops": 38.756949646477466, "iter_time": 0.5323198471069336, "loss": 0.505866289138794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37596821160845, "step_time": 0.48685833930969247} +{"epoch": 0, "iter": 12332, "iter_tflops": 40.605272693101476, "iter_time": 0.5080890274047851, "loss": 0.5336914658546448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.22078391198375, "step_time": 0.4665474395751953} +{"epoch": 0, "iter": 12333, "iter_tflops": 23.33176849881163, "iter_time": 0.8842490234375, "loss": 0.02804105542600155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.993140701055328, "step_time": 0.8254702262878417} +{"epoch": 0, "iter": 12334, "iter_tflops": 9.246199059207939, "iter_time": 2.2313053588867184, "loss": 0.004188975784927607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.396439289178701, "step_time": 1.984438415527344} +{"epoch": 0, "iter": 12335, "iter_tflops": 11.72982489423518, "iter_time": 1.7588577575683595, "loss": 0.002439871896058321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.965669907652376, "step_time": 1.3785613098144534} +{"epoch": 0, "iter": 12336, "iter_tflops": 52.9378506669679, "iter_time": 0.3897229156494141, "loss": 0.0030286742839962244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.86937029928075, "step_time": 0.3446018123626709} +{"epoch": 0, "iter": 12337, "iter_tflops": 26.1268819894978, "iter_time": 0.6003280334472657, "loss": 0.16250987350940704, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.111658335923305, "step_time": 0.5579428825378419} +{"epoch": 0, "iter": 12338, "iter_tflops": 23.314071168434193, "iter_time": 0.6727567901611329, "loss": 0.3882767856121063, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.083179073831563, "step_time": 0.625307487487793} +{"epoch": 0, "iter": 12339, "iter_tflops": 24.032513769190114, "iter_time": 0.6526449890136719, "loss": 0.3225114941596985, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.872033916675647, "step_time": 0.6062414627075196} +{"epoch": 0, "iter": 12340, "iter_tflops": 26.136723345143846, "iter_time": 0.6001019897460937, "loss": 0.19532158970832825, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 27.928951895246623, "step_time": 0.5615928497314453} +{"epoch": 0, "iter": 12341, "iter_tflops": 6.475628946103106, "iter_time": 0.9689408493041992, "loss": 0.002972091780975461, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 6.971068897990852, "step_time": 0.900077377319336} +{"epoch": 0, "iter": 12342, "iter_tflops": 9.876512039659346, "iter_time": 0.6352952728271484, "loss": 0.014928932301700115, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 11.154162700334943, "step_time": 0.562525541305542} +{"epoch": 0, "iter": 12343, "iter_tflops": 16.508054995074392, "iter_time": 0.3800872611999512, "loss": 0.0014084980357438326, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 19.041181368425775, "step_time": 0.32952269554138186} +{"epoch": 0, "iter": 12344, "iter_tflops": 18.963947140266082, "iter_time": 0.3308647384643555, "loss": 0.004749870393425226, "lr": 3e-05, "seqlen": 2544.0, "step_tflops": 20.793825839733078, "step_time": 0.301748291015625} +{"epoch": 0, "iter": 12345, "iter_tflops": 31.199065824661243, "iter_time": 0.6612727966308594, "loss": 0.3788301348686218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.164887364432474, "step_time": 0.6220763931274413} +{"epoch": 0, "iter": 12346, "iter_tflops": 13.041793285265355, "iter_time": 1.5819215240478517, "loss": 0.508668839931488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.503250093160435, "step_time": 1.2501230602264404} +{"epoch": 0, "iter": 12347, "iter_tflops": 38.957288239752764, "iter_time": 0.5295823822021484, "loss": 0.38195231556892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.453185400225024, "step_time": 0.48597280311584473} +{"epoch": 0, "iter": 12348, "iter_tflops": 42.781703553712894, "iter_time": 0.4822410469055176, "loss": 0.42188572883605957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71533385567298, "step_time": 0.44163429450988767} +{"epoch": 0, "iter": 12349, "iter_tflops": 28.725841272904102, "iter_time": 0.7182067642211916, "loss": 0.3292255103588104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.077396409470357, "step_time": 0.6638617095947266} +{"epoch": 0, "iter": 12350, "iter_tflops": 7.920103406819991, "iter_time": 2.6049020385742185, "loss": 0.2523166835308075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.280025171832012, "step_time": 2.223172149658203} +{"epoch": 0, "iter": 12351, "iter_tflops": 12.81545404797904, "iter_time": 1.6098605194091795, "loss": 0.21858540177345276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.963812047806856, "step_time": 1.3787324676513675} +{"epoch": 0, "iter": 12352, "iter_tflops": 34.430432390698044, "iter_time": 0.5992109909057617, "loss": 0.3026237189769745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.472268433705096, "step_time": 0.40081958961486813} +{"epoch": 0, "iter": 12353, "iter_tflops": 14.780392871557526, "iter_time": 1.0362687530517578, "loss": 0.20476004481315613, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 15.433937944157968, "step_time": 0.9923882904052734} +{"epoch": 0, "iter": 12354, "iter_tflops": 9.92475517369176, "iter_time": 1.5432581481933592, "loss": 0.27342674136161804, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 12.102422103356135, "step_time": 1.2655697479248045} +{"epoch": 0, "iter": 12355, "iter_tflops": 22.837261664659625, "iter_time": 0.6706784515380859, "loss": 0.14231161773204803, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 24.517722646832528, "step_time": 0.6247097053527831} +{"epoch": 0, "iter": 12356, "iter_tflops": 24.048002309842154, "iter_time": 0.6369119186401367, "loss": 0.18293435871601105, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 25.79899339769711, "step_time": 0.5936843757629395} +{"epoch": 0, "iter": 12357, "iter_tflops": 15.934759759653785, "iter_time": 1.2947225952148438, "loss": 0.6436241269111633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.07468143924851, "step_time": 1.2082857055664062} +{"epoch": 0, "iter": 12358, "iter_tflops": 18.128408563009017, "iter_time": 1.138053207397461, "loss": 0.6482744812965393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.102597839218557, "step_time": 0.9334239196777344} +{"epoch": 0, "iter": 12359, "iter_tflops": 44.961994314580124, "iter_time": 0.45885628128051753, "loss": 0.8230871558189392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.631029648194975, "step_time": 0.42423723411560066} +{"epoch": 0, "iter": 12360, "iter_tflops": 44.99977894259896, "iter_time": 0.45847099685668946, "loss": 0.838092565536499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.651624659514304, "step_time": 0.42405764770507814} +{"epoch": 0, "iter": 12361, "iter_tflops": 27.688339991403964, "iter_time": 0.7451184692382813, "loss": 0.34433260560035706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.258433749691495, "step_time": 0.7051332168579101} +{"epoch": 0, "iter": 12362, "iter_tflops": 14.859340356107785, "iter_time": 1.3884259338378908, "loss": 0.3409799337387085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.172037950944986, "step_time": 1.1353208465576172} +{"epoch": 0, "iter": 12363, "iter_tflops": 36.69983217410057, "iter_time": 0.5621577072143554, "loss": 0.4951363503932953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13620705804537, "step_time": 0.514026985168457} +{"epoch": 0, "iter": 12364, "iter_tflops": 35.582715254525375, "iter_time": 0.579806610107422, "loss": 0.48171335458755493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.05534292585351, "step_time": 0.5282527809143066} +{"epoch": 0, "iter": 12365, "iter_tflops": 19.430583160082165, "iter_time": 1.0617845764160156, "loss": 0.11344116926193237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.020284138321284, "step_time": 0.9814849967956544} +{"epoch": 0, "iter": 12366, "iter_tflops": 26.201791941993523, "iter_time": 0.787392463684082, "loss": 0.14956289529800415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.684298171591262, "step_time": 0.6950170555114746} +{"epoch": 0, "iter": 12367, "iter_tflops": 43.785532790275546, "iter_time": 0.47118516540527344, "loss": 0.18915675580501556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42358064248131, "step_time": 0.43503871345520023} +{"epoch": 0, "iter": 12368, "iter_tflops": 47.343057691345265, "iter_time": 0.4357786445617676, "loss": 0.170998215675354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.28708401032348, "step_time": 0.40226684570312504} +{"epoch": 0, "iter": 12369, "iter_tflops": 41.63179113474699, "iter_time": 0.49556103515625, "loss": 0.6663052439689636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.214221170953614, "step_time": 0.456296558380127} +{"epoch": 0, "iter": 12370, "iter_tflops": 41.10736668902983, "iter_time": 0.5018831214904785, "loss": 0.6172178387641907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.060612149346056, "step_time": 0.45785204696655274} +{"epoch": 0, "iter": 12371, "iter_tflops": 42.10814430404636, "iter_time": 0.48995494461059574, "loss": 0.7952549457550049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26871223138119, "step_time": 0.45574730300903316} +{"epoch": 0, "iter": 12372, "iter_tflops": 42.44336289639232, "iter_time": 0.4860852699279785, "loss": 0.6670514345169067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70373530919396, "step_time": 0.45140935134887694} +{"epoch": 0, "iter": 12373, "iter_tflops": 31.799116268827824, "iter_time": 0.6487945556640625, "loss": 0.6565760374069214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04789495392335, "step_time": 0.6059432907104492} +{"epoch": 0, "iter": 12374, "iter_tflops": 9.195730549626198, "iter_time": 2.2435513305664063, "loss": 0.6182543635368347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.948227056943127, "step_time": 1.884423240661621} +{"epoch": 0, "iter": 12375, "iter_tflops": 12.556602569598759, "iter_time": 1.6430474243164062, "loss": 0.6918672919273376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.305519609538708, "step_time": 1.4421771507263184} +{"epoch": 0, "iter": 12376, "iter_tflops": 25.128781289679598, "iter_time": 0.8210144882202149, "loss": 0.6369723081588745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.028696603259306, "step_time": 0.6441440238952636} +{"epoch": 0, "iter": 12377, "iter_tflops": 17.555039036542208, "iter_time": 0.7934347610473633, "loss": 0.22512581944465637, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 19.16581550165033, "step_time": 0.7267511367797851} +{"epoch": 0, "iter": 12378, "iter_tflops": 20.6704318575877, "iter_time": 0.673850372314453, "loss": 0.2071511447429657, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 22.19143746299998, "step_time": 0.6276645317077636} +{"epoch": 0, "iter": 12379, "iter_tflops": 22.442817464863563, "iter_time": 0.6206341171264649, "loss": 0.19183072447776794, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 24.11130187571529, "step_time": 0.5776866912841797} +{"epoch": 0, "iter": 12380, "iter_tflops": 21.40156936364501, "iter_time": 0.6508297576904296, "loss": 0.1832936406135559, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 23.029155520849756, "step_time": 0.6048323478698729} +{"epoch": 0, "iter": 12381, "iter_tflops": 34.586528421463015, "iter_time": 0.5965066299438476, "loss": 0.10824130475521088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.315760655526276, "step_time": 0.5384492740631104} +{"epoch": 0, "iter": 12382, "iter_tflops": 40.76801050428661, "iter_time": 0.5060608367919922, "loss": 0.0801519900560379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.58425611450248, "step_time": 0.45259252357482904} +{"epoch": 0, "iter": 12383, "iter_tflops": 43.65205792865408, "iter_time": 0.4726259078979492, "loss": 0.1294306069612503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19716231728865, "step_time": 0.4280561866760254} +{"epoch": 0, "iter": 12384, "iter_tflops": 38.339120422342944, "iter_time": 0.5381212005615235, "loss": 0.06528586149215698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.197218953897064, "step_time": 0.4889206924438477} +{"epoch": 0, "iter": 12385, "iter_tflops": 22.413974602740343, "iter_time": 0.9204567184448242, "loss": 0.10201671719551086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.13751011434515, "step_time": 0.8547316360473632} +{"epoch": 0, "iter": 12386, "iter_tflops": 20.480786798399038, "iter_time": 1.0073389129638672, "loss": 0.057449083775281906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.44245451801023, "step_time": 0.8108924198150634} +{"epoch": 0, "iter": 12387, "iter_tflops": 51.130035355868515, "iter_time": 0.40350242996215824, "loss": 0.0557730458676815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.85367234932409, "step_time": 0.36937756538391114} +{"epoch": 0, "iter": 12388, "iter_tflops": 53.76853268472342, "iter_time": 0.3837019996643067, "loss": 0.06610531359910965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.79472875226275, "step_time": 0.3509003944396973} +{"epoch": 0, "iter": 12389, "iter_tflops": 25.97832271312282, "iter_time": 0.7941657257080078, "loss": 0.503138542175293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.32055184964786, "step_time": 0.7551492233276367} +{"epoch": 0, "iter": 12390, "iter_tflops": 12.831927578960247, "iter_time": 1.6077937927246093, "loss": 0.78553307056427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.46021098762705, "step_time": 1.1816061973571776} +{"epoch": 0, "iter": 12391, "iter_tflops": 37.109453145135895, "iter_time": 0.555952507019043, "loss": 0.7687428593635559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.5293735106072, "step_time": 0.5090405235290527} +{"epoch": 0, "iter": 12392, "iter_tflops": 39.44833527672471, "iter_time": 0.5229902191162109, "loss": 0.7915377020835876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.8643681973532, "step_time": 0.4813110370635987} +{"epoch": 0, "iter": 12393, "iter_tflops": 19.80488563541313, "iter_time": 1.0417173767089842, "loss": 0.007182605564594269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33499349838919, "step_time": 0.9670072555541991} +{"epoch": 0, "iter": 12394, "iter_tflops": 16.264441610078947, "iter_time": 1.2684784393310546, "loss": 0.004264539573341608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.43401866918979, "step_time": 1.0615968761444092} +{"epoch": 0, "iter": 12395, "iter_tflops": 37.83027996809622, "iter_time": 0.545359260559082, "loss": 0.0028843642212450504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20093831590832, "step_time": 0.48887760162353516} +{"epoch": 0, "iter": 12396, "iter_tflops": 45.78721145377084, "iter_time": 0.4505863723754883, "loss": 0.0033177321311086416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66530662586354, "step_time": 0.4072035655975342} +{"epoch": 0, "iter": 12397, "iter_tflops": 17.642331389669582, "iter_time": 1.1694085693359375, "loss": 0.5031259059906006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.06913949670563, "step_time": 1.0819100418090821} +{"epoch": 0, "iter": 12398, "iter_tflops": 16.57968110684162, "iter_time": 1.2443600921630857, "loss": 0.6932001709938049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.780745125920507, "step_time": 1.0429886932373045} +{"epoch": 0, "iter": 12399, "iter_tflops": 37.473894548767625, "iter_time": 0.5505457534790039, "loss": 0.4921363890171051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.878287530064846, "step_time": 0.5046956405639648} +{"epoch": 0, "iter": 12400, "iter_tflops": 39.69420126209284, "iter_time": 0.5197508163452148, "loss": 0.597195029258728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.464608454722736, "step_time": 0.47466419792175296} +{"epoch": 0, "iter": 12401, "iter_tflops": 14.423486940259512, "iter_time": 1.430381820678711, "loss": 0.24429979920387268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.378759860889511, "step_time": 1.3415316772460937} +{"epoch": 0, "iter": 12402, "iter_tflops": 26.344945130485463, "iter_time": 0.7831139297485352, "loss": 0.2482587844133377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.762280431318224, "step_time": 0.6110693130493164} +{"epoch": 0, "iter": 12403, "iter_tflops": 43.258392942493735, "iter_time": 0.4769269523620606, "loss": 0.35929328203201294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.46491310536759, "step_time": 0.4346598815917969} +{"epoch": 0, "iter": 12404, "iter_tflops": 38.343285675902486, "iter_time": 0.5380627441406249, "loss": 0.2873537540435791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.755940052513594, "step_time": 0.4940876312255859} +{"epoch": 0, "iter": 12405, "iter_tflops": 19.87020016002946, "iter_time": 1.0382931900024415, "loss": 0.2340674102306366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.155033904089063, "step_time": 0.9752332992553711} +{"epoch": 0, "iter": 12406, "iter_tflops": 17.584466610160344, "iter_time": 1.1732567138671874, "loss": 0.23747014999389648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.22376115390006, "step_time": 0.92833491897583} +{"epoch": 0, "iter": 12407, "iter_tflops": 37.21111784461078, "iter_time": 0.5544335861206054, "loss": 0.27301308512687683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9987649818273, "step_time": 0.5032125606536865} +{"epoch": 0, "iter": 12408, "iter_tflops": 43.393413721466274, "iter_time": 0.47544297027587895, "loss": 0.313952773809433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.608346522078676, "step_time": 0.433350347518921} +{"epoch": 0, "iter": 12409, "iter_tflops": 20.96873604233398, "iter_time": 0.9838978118896484, "loss": 0.13947878777980804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.24575036342475, "step_time": 0.9274172897338867} +{"epoch": 0, "iter": 12410, "iter_tflops": 24.242517239027496, "iter_time": 0.8510293426513672, "loss": 0.11949478834867477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.422101057638205, "step_time": 0.7012107486724853} +{"epoch": 0, "iter": 12411, "iter_tflops": 39.10087448892554, "iter_time": 0.5276376495361328, "loss": 0.12318193912506104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52431904213232, "step_time": 0.48515987968444824} +{"epoch": 0, "iter": 12412, "iter_tflops": 39.55958789523064, "iter_time": 0.5215194244384765, "loss": 0.1621101200580597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.214021880474625, "step_time": 0.4774166488647461} +{"epoch": 0, "iter": 12413, "iter_tflops": 30.39926129365319, "iter_time": 0.6786708831787108, "loss": 0.07100546360015869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.95386461665843, "step_time": 0.6260599098205566} +{"epoch": 0, "iter": 12414, "iter_tflops": 10.720258770280855, "iter_time": 1.924495849609375, "loss": 0.0634835958480835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.927017782411616, "step_time": 1.7297780456542966} +{"epoch": 0, "iter": 12415, "iter_tflops": 9.107932544256315, "iter_time": 2.2651785583496094, "loss": 0.05307423695921898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.125553915617493, "step_time": 1.854387985229492} +{"epoch": 0, "iter": 12416, "iter_tflops": 29.493544827176667, "iter_time": 0.6995121688842774, "loss": 0.1291743367910385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.40323809977855, "step_time": 0.44460460853576655} +{"epoch": 0, "iter": 12417, "iter_tflops": 20.471955593403973, "iter_time": 0.7381828460693359, "loss": 0.23994283378124237, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 21.616060959504228, "step_time": 0.6991119461059572} +{"epoch": 0, "iter": 12418, "iter_tflops": 10.667481057488528, "iter_time": 1.4166461944580075, "loss": 0.271168053150177, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.78133741307646, "step_time": 1.0965587730407715} +{"epoch": 0, "iter": 12419, "iter_tflops": 23.68311507417022, "iter_time": 0.6380936965942383, "loss": 0.24652914702892303, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.52353621415592, "step_time": 0.5920827865600585} +{"epoch": 0, "iter": 12420, "iter_tflops": 23.91869484652084, "iter_time": 0.6318089904785156, "loss": 0.3484009802341461, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.659893551714813, "step_time": 0.5889364433288574} +{"epoch": 0, "iter": 12421, "iter_tflops": 17.289958892110175, "iter_time": 1.0974430847167969, "loss": 0.032317452132701874, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 18.55829342378956, "step_time": 1.0224402313232421} +{"epoch": 0, "iter": 12422, "iter_tflops": 16.046183624214105, "iter_time": 1.182508331298828, "loss": 0.04318714514374733, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 19.396414167874404, "step_time": 0.9782604999542236} +{"epoch": 0, "iter": 12423, "iter_tflops": 48.37450393629329, "iter_time": 0.39224682998657223, "loss": 0.025280926376581192, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 52.88018270590537, "step_time": 0.35882526969909667} +{"epoch": 0, "iter": 12424, "iter_tflops": 48.35310962749403, "iter_time": 0.3924203834533691, "loss": 0.06677175313234329, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 52.82468033699619, "step_time": 0.3592022838592529} +{"epoch": 0, "iter": 12425, "iter_tflops": 32.807434832038346, "iter_time": 0.6288542098999023, "loss": 0.3912286162376404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.92353345780176, "step_time": 0.5907504615783692} +{"epoch": 0, "iter": 12426, "iter_tflops": 9.324750727769244, "iter_time": 2.212508850097656, "loss": 0.4813966155052185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.826795101163572, "step_time": 1.744436538696289} +{"epoch": 0, "iter": 12427, "iter_tflops": 13.133267109554906, "iter_time": 1.5709033660888672, "loss": 0.49955588579177856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.070771401017538, "step_time": 1.2085624618530273} +{"epoch": 0, "iter": 12428, "iter_tflops": 28.862024134873923, "iter_time": 0.7148179702758789, "loss": 0.31428784132003784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.28806620730309, "step_time": 0.6389696235656739} +{"epoch": 0, "iter": 12429, "iter_tflops": 18.23185748944109, "iter_time": 0.8042380447387696, "loss": 0.3886979818344116, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 19.32976728329877, "step_time": 0.7585581970214845} +{"epoch": 0, "iter": 12430, "iter_tflops": 7.349838727093159, "iter_time": 1.9949762115478515, "loss": 0.17554926872253418, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 9.901000557195163, "step_time": 1.4809365310668945} +{"epoch": 0, "iter": 12431, "iter_tflops": 11.26316709734214, "iter_time": 1.3018321838378906, "loss": 0.2154650241136551, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 13.30186413887429, "step_time": 1.1023081626892088} +{"epoch": 0, "iter": 12432, "iter_tflops": 21.458591563426136, "iter_time": 0.683304557800293, "loss": 0.15336067974567413, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.035413618515353, "step_time": 0.6365309371948242} +{"epoch": 0, "iter": 12433, "iter_tflops": 15.136383314139534, "iter_time": 1.098511749267578, "loss": 0.23271960020065308, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 16.246815412183544, "step_time": 1.0234310226440428} +{"epoch": 0, "iter": 12434, "iter_tflops": 13.022006418713612, "iter_time": 1.2768765716552732, "loss": 0.16197434067726135, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 15.57422352577973, "step_time": 1.0676291427612303} +{"epoch": 0, "iter": 12435, "iter_tflops": 29.68592623306421, "iter_time": 0.5601137313842773, "loss": 0.1941792070865631, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.87593468012896, "step_time": 0.5216316032409667} +{"epoch": 0, "iter": 12436, "iter_tflops": 31.755092751980047, "iter_time": 0.5236166381835938, "loss": 0.2389710396528244, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 33.64765948889229, "step_time": 0.4941649780273437} +{"epoch": 0, "iter": 12437, "iter_tflops": 26.980676389825348, "iter_time": 0.7646618347167967, "loss": 0.5176929831504822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66809778828422, "step_time": 0.7196533813476563} +{"epoch": 0, "iter": 12438, "iter_tflops": 14.58440618372734, "iter_time": 1.4145994873046874, "loss": 0.4472150206565857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.16998601135357, "step_time": 1.1354490585327148} +{"epoch": 0, "iter": 12439, "iter_tflops": 45.01989313932755, "iter_time": 0.4582661590576172, "loss": 0.3693141043186188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.85895228471706, "step_time": 0.42225820541381837} +{"epoch": 0, "iter": 12440, "iter_tflops": 52.535824061897316, "iter_time": 0.39270524215698244, "loss": 0.4240454435348511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.16197762011809, "step_time": 0.3609233684539795} +{"epoch": 0, "iter": 12441, "iter_tflops": 27.595606773531955, "iter_time": 0.7476223907470704, "loss": 0.4868326485157013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.21605124645842, "step_time": 0.7061561241149903} +{"epoch": 0, "iter": 12442, "iter_tflops": 14.946379905514993, "iter_time": 1.3803404998779296, "loss": 0.5154018402099609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.063382399697296, "step_time": 1.1421500720977784} +{"epoch": 0, "iter": 12443, "iter_tflops": 40.90517613525731, "iter_time": 0.5043638839721679, "loss": 0.4761516749858856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95496160063878, "step_time": 0.4589280643463135} +{"epoch": 0, "iter": 12444, "iter_tflops": 41.01268245902915, "iter_time": 0.5030417976379394, "loss": 0.443486750125885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89295250497429, "step_time": 0.4595619659423829} +{"epoch": 0, "iter": 12445, "iter_tflops": 20.820689787945444, "iter_time": 0.9908938522338868, "loss": 0.26452136039733887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.184947751127943, "step_time": 0.9299590759277343} +{"epoch": 0, "iter": 12446, "iter_tflops": 14.640819026498951, "iter_time": 1.409148864746094, "loss": 0.24011340737342834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.541249757981557, "step_time": 0.9152595233917238} +{"epoch": 0, "iter": 12447, "iter_tflops": 45.20102462505758, "iter_time": 0.45642977523803707, "loss": 0.2647453844547272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78952495091511, "step_time": 0.4228590774536133} +{"epoch": 0, "iter": 12448, "iter_tflops": 45.185783858693554, "iter_time": 0.45658372497558597, "loss": 0.2589278519153595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.071255566857985, "step_time": 0.4204313354492187} +{"epoch": 0, "iter": 12449, "iter_tflops": 29.828630329617194, "iter_time": 0.6916540679931641, "loss": 0.1406649649143219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.76346348169392, "step_time": 0.6495227928161621} +{"epoch": 0, "iter": 12450, "iter_tflops": 17.717734632802205, "iter_time": 1.1644317932128907, "loss": 0.12402811646461487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.1073212458647, "step_time": 0.9774377937316896} +{"epoch": 0, "iter": 12451, "iter_tflops": 39.175377271391625, "iter_time": 0.5266342010498047, "loss": 0.14302287995815277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86289926604875, "step_time": 0.48132753181457516} +{"epoch": 0, "iter": 12452, "iter_tflops": 42.01070462159137, "iter_time": 0.4910913467407227, "loss": 0.16674798727035522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78161232160093, "step_time": 0.4506414794921876} +{"epoch": 0, "iter": 12453, "iter_tflops": 28.32776485766506, "iter_time": 0.7282993774414063, "loss": 0.64326012134552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.83446628498972, "step_time": 0.6690919609069824} +{"epoch": 0, "iter": 12454, "iter_tflops": 36.62218630318782, "iter_time": 0.5633495864868164, "loss": 0.5805166959762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.95656335992838, "step_time": 0.5163380374908446} +{"epoch": 0, "iter": 12455, "iter_tflops": 36.894544260575834, "iter_time": 0.559190902709961, "loss": 0.5404742956161499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.876952440324374, "step_time": 0.5173688621520995} +{"epoch": 0, "iter": 12456, "iter_tflops": 35.612801526182324, "iter_time": 0.579316780090332, "loss": 0.6676737070083618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.76846173959944, "step_time": 0.532161777496338} +{"epoch": 0, "iter": 12457, "iter_tflops": 18.697142367311184, "iter_time": 1.1034356536865235, "loss": 0.5751320719718933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.97913934210483, "step_time": 1.0326317443847657} +{"epoch": 0, "iter": 12458, "iter_tflops": 13.122294679422643, "iter_time": 1.5722169036865234, "loss": 0.6243559122085571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.762391583550798, "step_time": 1.1615042610168458} +{"epoch": 0, "iter": 12459, "iter_tflops": 38.99489024680811, "iter_time": 0.5290717163085937, "loss": 0.5726380944252014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.698125852723756, "step_time": 0.4831849899291992} +{"epoch": 0, "iter": 12460, "iter_tflops": 39.678703074420085, "iter_time": 0.5199538269042968, "loss": 0.6957522034645081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.031261121552284, "step_time": 0.47944431495666506} +{"epoch": 0, "iter": 12461, "iter_tflops": 29.467447711801135, "iter_time": 0.6762113723754883, "loss": 0.0023506965953856707, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 32.010035059531695, "step_time": 0.6224992637634277} +{"epoch": 0, "iter": 12462, "iter_tflops": 48.95122480062278, "iter_time": 0.40706281280517576, "loss": 0.006468301173299551, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 54.33728044231918, "step_time": 0.36671366500854485} +{"epoch": 0, "iter": 12463, "iter_tflops": 52.61586665900753, "iter_time": 0.37871129989624025, "loss": 0.037392258644104004, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 57.825570347759744, "step_time": 0.3445919017791748} +{"epoch": 0, "iter": 12464, "iter_tflops": 62.03941410013929, "iter_time": 0.3211865158081055, "loss": 0.002672485075891018, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 68.07177549520264, "step_time": 0.29272371864318847} +{"epoch": 0, "iter": 12465, "iter_tflops": 26.241726212221597, "iter_time": 0.786194221496582, "loss": 0.4063626229763031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.74651171000403, "step_time": 0.7435562973022461} +{"epoch": 0, "iter": 12466, "iter_tflops": 18.701421868942205, "iter_time": 1.1031831512451171, "loss": 0.5954734086990356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.63145906954865, "step_time": 0.9116112861633301} +{"epoch": 0, "iter": 12467, "iter_tflops": 44.45418045844786, "iter_time": 0.4640979385375977, "loss": 0.49117156863212585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13756132543386, "step_time": 0.4285861797332764} +{"epoch": 0, "iter": 12468, "iter_tflops": 49.31322082101923, "iter_time": 0.41836840438842776, "loss": 0.45990923047065735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26891577842604, "step_time": 0.38730079650878907} +{"epoch": 0, "iter": 12469, "iter_tflops": 26.299177668515686, "iter_time": 0.7844767532348633, "loss": 0.004984267055988312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.64505295824574, "step_time": 0.7462851867675782} +{"epoch": 0, "iter": 12470, "iter_tflops": 14.777638708207617, "iter_time": 1.3961021728515624, "loss": 0.0006347472663037479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.84642970471291, "step_time": 1.0946950607299806} +{"epoch": 0, "iter": 12471, "iter_tflops": 50.57372587777591, "iter_time": 0.4079409446716309, "loss": 0.013560604304075241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.37040051740979, "step_time": 0.36599160766601563} +{"epoch": 0, "iter": 12472, "iter_tflops": 47.9469503900722, "iter_time": 0.43029000473022455, "loss": 0.0018671845318749547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03103225782315, "step_time": 0.3890381278991699} +{"epoch": 0, "iter": 12473, "iter_tflops": 41.009936114721526, "iter_time": 0.5030754852294922, "loss": 0.5610232949256897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.37786440846474, "step_time": 0.46489604187011724} +{"epoch": 0, "iter": 12474, "iter_tflops": 34.73669376001803, "iter_time": 0.5939279556274414, "loss": 0.7354311943054199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.06082977024463, "step_time": 0.542055799484253} +{"epoch": 0, "iter": 12475, "iter_tflops": 33.829147989991554, "iter_time": 0.609861457824707, "loss": 0.7181074619293213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84621865268228, "step_time": 0.5599243087768555} +{"epoch": 0, "iter": 12476, "iter_tflops": 39.0671565978727, "iter_time": 0.5280930404663087, "loss": 0.8270385265350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.596861154110854, "step_time": 0.48433365631103514} +{"epoch": 0, "iter": 12477, "iter_tflops": 20.165895237637184, "iter_time": 1.0230685653686522, "loss": 0.7046558260917664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.495707812330583, "step_time": 0.9597773513793947} +{"epoch": 0, "iter": 12478, "iter_tflops": 22.013077870939263, "iter_time": 0.9372198486328125, "loss": 0.5291575789451599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.631755522091836, "step_time": 0.7466443271636963} +{"epoch": 0, "iter": 12479, "iter_tflops": 36.508095040569074, "iter_time": 0.565110107421875, "loss": 0.5067656636238098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9563806329179, "step_time": 0.5163403987884521} +{"epoch": 0, "iter": 12480, "iter_tflops": 35.9439835331133, "iter_time": 0.5739790496826171, "loss": 0.58387690782547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.51740648278834, "step_time": 0.5220761013031006} +{"epoch": 0, "iter": 12481, "iter_tflops": 30.64466735954903, "iter_time": 0.6732360076904297, "loss": 0.5014685392379761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.78977726117046, "step_time": 0.6105720481872559} +{"epoch": 0, "iter": 12482, "iter_tflops": 34.37887400489991, "iter_time": 0.6001096343994141, "loss": 0.8654606342315674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.13285449814233, "step_time": 0.5556021423339844} +{"epoch": 0, "iter": 12483, "iter_tflops": 35.478433147612044, "iter_time": 0.5815108413696288, "loss": 0.600079357624054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.58612645837396, "step_time": 0.5346764602661133} +{"epoch": 0, "iter": 12484, "iter_tflops": 36.41507621948092, "iter_time": 0.5665536270141601, "loss": 0.687350869178772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81775359520361, "step_time": 0.5181380577087402} +{"epoch": 0, "iter": 12485, "iter_tflops": 1.3991850002462474, "iter_time": 0.7835202255249023, "loss": 1.197326421737671, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 1.5243120922744755, "step_time": 0.7192029457092286} +{"epoch": 0, "iter": 12486, "iter_tflops": 2.3747403721979734, "iter_time": 0.4616461486816406, "loss": 1.439768671989441, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 2.62359804970007, "step_time": 0.41785735702514654} +{"epoch": 0, "iter": 12487, "iter_tflops": 2.6201189031494416, "iter_time": 0.41841221237182613, "loss": 1.4980878829956055, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 2.837746799336821, "step_time": 0.3863240184783936} +{"epoch": 0, "iter": 12488, "iter_tflops": 2.7705468719217885, "iter_time": 0.39569435119628904, "loss": 1.1403988599777222, "lr": 3e-05, "seqlen": 448.0, "step_tflops": 2.9969636036465093, "step_time": 0.3658001537322998} +{"epoch": 0, "iter": 12489, "iter_tflops": 27.671821757852218, "iter_time": 0.7455632553100586, "loss": 0.6896534562110901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.366935105625178, "step_time": 0.702527976989746} +{"epoch": 0, "iter": 12490, "iter_tflops": 18.84483303302023, "iter_time": 1.0947878112792968, "loss": 0.7032407522201538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.645611307455873, "step_time": 0.9110415802001952} +{"epoch": 0, "iter": 12491, "iter_tflops": 43.440779636581624, "iter_time": 0.4749245681762695, "loss": 0.6923143863677979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.91914876779237, "step_time": 0.43971585273742675} +{"epoch": 0, "iter": 12492, "iter_tflops": 41.94480648853035, "iter_time": 0.49186288452148436, "loss": 0.7145472168922424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.00986244970302, "step_time": 0.4583682861328125} +{"epoch": 0, "iter": 12493, "iter_tflops": 21.465835646992854, "iter_time": 0.9611129913330076, "loss": 0.5522416830062866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.462244014797427, "step_time": 0.9184787368774414} +{"epoch": 0, "iter": 12494, "iter_tflops": 16.82729477298104, "iter_time": 1.226049331665039, "loss": 0.6012924909591675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.840942361663252, "step_time": 0.9446063804626464} +{"epoch": 0, "iter": 12495, "iter_tflops": 41.53183819597995, "iter_time": 0.49675368118286134, "loss": 0.7528170347213745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.333191450138706, "step_time": 0.4550990753173828} +{"epoch": 0, "iter": 12496, "iter_tflops": 37.242256613915075, "iter_time": 0.5539700164794922, "loss": 0.6504676938056946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59878840839585, "step_time": 0.5081701774597167} +{"epoch": 0, "iter": 12497, "iter_tflops": 12.20426096566134, "iter_time": 1.0247224884033204, "loss": 0.01333522330969572, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 13.14576622882578, "step_time": 0.9513314361572267} +{"epoch": 0, "iter": 12498, "iter_tflops": 7.69020384060902, "iter_time": 1.6262222595214846, "loss": 0.01801597699522972, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 9.186970667953132, "step_time": 1.3612736034393311} +{"epoch": 0, "iter": 12499, "iter_tflops": 26.814370573417133, "iter_time": 0.46639098358154296, "loss": 0.008551975712180138, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 29.74062454020118, "step_time": 0.420501615524292} +{"epoch": 0, "iter": 12500, "iter_tflops": 29.878267699186964, "iter_time": 0.4185644493103028, "loss": 0.012337437830865383, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 32.99639266664833, "step_time": 0.3790105419158935} +{"epoch": 0, "iter": 12501, "iter_tflops": 32.838543831589874, "iter_time": 0.6282584762573242, "loss": 0.8695336580276489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.1148602928947, "step_time": 0.5712632789611817} +{"epoch": 0, "iter": 12502, "iter_tflops": 40.80270058435945, "iter_time": 0.5056305885314941, "loss": 0.8912218809127808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.53854764509015, "step_time": 0.4632188205718995} +{"epoch": 0, "iter": 12503, "iter_tflops": 34.35074666076534, "iter_time": 0.6006010208129883, "loss": 0.7568942904472351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.3600401127347, "step_time": 0.5522235374450684} +{"epoch": 0, "iter": 12504, "iter_tflops": 36.127187659901296, "iter_time": 0.5710683517456054, "loss": 0.7308573126792908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35350122753046, "step_time": 0.5242505207061767} +{"epoch": 0, "iter": 12505, "iter_tflops": 17.797130492747137, "iter_time": 1.159237075805664, "loss": 0.23732522130012512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.699196531900867, "step_time": 1.103314437866211} +{"epoch": 0, "iter": 12506, "iter_tflops": 17.41581078987593, "iter_time": 1.1846186065673825, "loss": 0.3006512224674225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.385668251727086, "step_time": 0.9647158679962158} +{"epoch": 0, "iter": 12507, "iter_tflops": 48.492658806587436, "iter_time": 0.425447769165039, "loss": 0.27502021193504333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74233415459183, "step_time": 0.3911676235198975} +{"epoch": 0, "iter": 12508, "iter_tflops": 50.55151643241187, "iter_time": 0.4081201705932618, "loss": 0.31116023659706116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.913947665582825, "step_time": 0.37569860458374027} +{"epoch": 0, "iter": 12509, "iter_tflops": 29.27168441028422, "iter_time": 0.7048140182495117, "loss": 0.08881384134292603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.98616611393595, "step_time": 0.6658162689208984} +{"epoch": 0, "iter": 12510, "iter_tflops": 13.830461627792745, "iter_time": 1.4917140197753906, "loss": 0.11892500519752502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.7345429492158, "step_time": 1.1633281764984131} +{"epoch": 0, "iter": 12511, "iter_tflops": 39.782794694451724, "iter_time": 0.5185933685302734, "loss": 0.0702214166522026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7316340411935, "step_time": 0.47176589584350587} +{"epoch": 0, "iter": 12512, "iter_tflops": 45.4877724480144, "iter_time": 0.45355251312255856, "loss": 0.06890333443880081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.053860060092866, "step_time": 0.41217787170410153} +{"epoch": 0, "iter": 12513, "iter_tflops": 25.489862450498656, "iter_time": 0.809384262084961, "loss": 0.5652620792388916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.413046689455246, "step_time": 0.7526012611389161} +{"epoch": 0, "iter": 12514, "iter_tflops": 10.316635467396413, "iter_time": 1.9997889404296876, "loss": 0.6668735146522522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.879129572021816, "step_time": 1.7367512817382813} +{"epoch": 0, "iter": 12515, "iter_tflops": 20.772941198055285, "iter_time": 0.993171516418457, "loss": 0.49755212664604187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.391557747044967, "step_time": 0.8819888668060303} +{"epoch": 0, "iter": 12516, "iter_tflops": 35.15399162385173, "iter_time": 0.5868776931762696, "loss": 0.5027769804000854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.16297047591598, "step_time": 0.5406050224304199} +{"epoch": 0, "iter": 12517, "iter_tflops": 16.23353592028482, "iter_time": 1.0394440307617188, "loss": 0.38298743963241577, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 17.331270643923588, "step_time": 0.9736073226928711} +{"epoch": 0, "iter": 12518, "iter_tflops": 15.730438780819233, "iter_time": 1.0726879425048828, "loss": 0.31112048029899597, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.647342902261595, "step_time": 0.9048931045532227} +{"epoch": 0, "iter": 12519, "iter_tflops": 26.303308597970304, "iter_time": 0.6415106277465821, "loss": 0.11174747347831726, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 28.326109522866112, "step_time": 0.5956995964050292} +{"epoch": 0, "iter": 12520, "iter_tflops": 24.331592101042776, "iter_time": 0.6934955978393554, "loss": 0.22386282682418823, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 26.28409569400052, "step_time": 0.6419795532226563} +{"epoch": 0, "iter": 12521, "iter_tflops": 15.776556214665463, "iter_time": 1.307705764770508, "loss": 0.2879141867160797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.67440014921723, "step_time": 1.2372914962768553} +{"epoch": 0, "iter": 12522, "iter_tflops": 25.53273694501655, "iter_time": 0.808025146484375, "loss": 0.2792710065841675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.2191786007245, "step_time": 0.6403358001708984} +{"epoch": 0, "iter": 12523, "iter_tflops": 47.718806760348656, "iter_time": 0.43234722137451176, "loss": 0.2811126112937927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.86845754439641, "step_time": 0.3977579917907715} +{"epoch": 0, "iter": 12524, "iter_tflops": 50.458943973474994, "iter_time": 0.4088689117431641, "loss": 0.3338266909122467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.905920876614076, "step_time": 0.37575352859497074} +{"epoch": 0, "iter": 12525, "iter_tflops": 28.237871314431015, "iter_time": 0.7306178741455079, "loss": 0.24335502088069916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.88076202147731, "step_time": 0.6904473686218262} +{"epoch": 0, "iter": 12526, "iter_tflops": 21.06192010585545, "iter_time": 0.9795447616577149, "loss": 0.19061535596847534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.669657845340023, "step_time": 0.8716261825561524} +{"epoch": 0, "iter": 12527, "iter_tflops": 36.527308957939944, "iter_time": 0.5648128509521484, "loss": 0.18582765758037567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.92217520246215, "step_time": 0.5167828006744385} +{"epoch": 0, "iter": 12528, "iter_tflops": 41.71606469145743, "iter_time": 0.4945599174499511, "loss": 0.21865421533584595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86375989402998, "step_time": 0.44983432579040533} +{"epoch": 0, "iter": 12529, "iter_tflops": 19.64879514781179, "iter_time": 0.6964528579711914, "loss": 0.058242227882146835, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 21.308094143264608, "step_time": 0.6422188415527343} +{"epoch": 0, "iter": 12530, "iter_tflops": 31.4102809645836, "iter_time": 0.4356681671142578, "loss": 0.0910130962729454, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 34.448451573494886, "step_time": 0.3972445468902588} +{"epoch": 0, "iter": 12531, "iter_tflops": 34.188027541472856, "iter_time": 0.40027051925659174, "loss": 0.10235296189785004, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 37.38957430306719, "step_time": 0.36599666595458985} +{"epoch": 0, "iter": 12532, "iter_tflops": 35.35130404995851, "iter_time": 0.3870991439819335, "loss": 0.034748706966638565, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 38.23812448181676, "step_time": 0.3578747577667237} +{"epoch": 0, "iter": 12533, "iter_tflops": 27.009251140058563, "iter_time": 0.7638528518676758, "loss": 0.054237693548202515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.629456717655962, "step_time": 0.7206246948242188} +{"epoch": 0, "iter": 12534, "iter_tflops": 15.794362186095519, "iter_time": 1.3062315063476562, "loss": 0.04604018107056618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.66595832770329, "step_time": 0.9983129348754882} +{"epoch": 0, "iter": 12535, "iter_tflops": 53.28457388436863, "iter_time": 0.3871869850158691, "loss": 0.06688816845417023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.48352665987745, "step_time": 0.3527676029205322} +{"epoch": 0, "iter": 12536, "iter_tflops": 54.84994306531924, "iter_time": 0.3761370086669922, "loss": 0.025117062032222748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.81156511308322, "step_time": 0.34493485450744626} +{"epoch": 0, "iter": 12537, "iter_tflops": 36.43422963367728, "iter_time": 0.5662557907104492, "loss": 0.065638467669487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.318384677756875, "step_time": 0.5247187461853028} +{"epoch": 0, "iter": 12538, "iter_tflops": 12.458546612278575, "iter_time": 1.6559791564941408, "loss": 0.12695181369781494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.52134005631348, "step_time": 1.4207430877685545} +{"epoch": 0, "iter": 12539, "iter_tflops": 11.513368947186999, "iter_time": 1.7919249877929688, "loss": 0.08619552105665207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.470388126015656, "step_time": 1.5315886459350585} +{"epoch": 0, "iter": 12540, "iter_tflops": 38.7412332124001, "iter_time": 0.5325357971191406, "loss": 0.08801054209470749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.491523374697415, "step_time": 0.4855343341827392} +{"epoch": 0, "iter": 12541, "iter_tflops": 10.666568057521884, "iter_time": 1.4512704010009767, "loss": 0.1436558961868286, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 11.215914197129303, "step_time": 1.3801883850097656} +{"epoch": 0, "iter": 12542, "iter_tflops": 11.502792372835044, "iter_time": 1.3457666625976563, "loss": 0.5137169361114502, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 16.970629169889275, "step_time": 0.9121685676574709} +{"epoch": 0, "iter": 12543, "iter_tflops": 27.685017288704074, "iter_time": 0.5591498947143555, "loss": 0.2679063081741333, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.588693683249797, "step_time": 0.5231753273010253} +{"epoch": 0, "iter": 12544, "iter_tflops": 27.92310871841293, "iter_time": 0.5543822021484375, "loss": 0.1083669513463974, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.690568889110533, "step_time": 0.5213801918029785} +{"epoch": 0, "iter": 12545, "iter_tflops": 21.486205029547524, "iter_time": 0.9602018356323242, "loss": 0.8144853711128235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.391777908339467, "step_time": 0.921369155883789} +{"epoch": 0, "iter": 12546, "iter_tflops": 12.067698675330032, "iter_time": 1.7096129150390624, "loss": 0.6733201146125793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.926875771892135, "step_time": 1.382144115447998} +{"epoch": 0, "iter": 12547, "iter_tflops": 32.77211696387009, "iter_time": 0.6295319137573242, "loss": 0.6119094491004944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.71326066901388, "step_time": 0.577687198638916} +{"epoch": 0, "iter": 12548, "iter_tflops": 34.66887581320243, "iter_time": 0.5950897750854492, "loss": 0.6933659911155701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.66682936084912, "step_time": 0.5477257804870604} +{"epoch": 0, "iter": 12549, "iter_tflops": 19.821180116951847, "iter_time": 1.0408610076904297, "loss": 0.09541704505681992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.160132335101082, "step_time": 0.974998321533203} +{"epoch": 0, "iter": 12550, "iter_tflops": 19.69592161868946, "iter_time": 1.047480484008789, "loss": 0.0662079006433487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.603194704869765, "step_time": 0.8385534381866454} +{"epoch": 0, "iter": 12551, "iter_tflops": 48.5946509227607, "iter_time": 0.4245548248291016, "loss": 0.1399269849061966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.68449345805159, "step_time": 0.391597074508667} +{"epoch": 0, "iter": 12552, "iter_tflops": 53.81198718557049, "iter_time": 0.3833921508789062, "loss": 0.13490071892738342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.386555422513844, "step_time": 0.3533534965515137} +{"epoch": 0, "iter": 12553, "iter_tflops": 34.89258673413954, "iter_time": 0.5912744064331055, "loss": 0.4028203785419464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.411380263638954, "step_time": 0.5514657135009766} +{"epoch": 0, "iter": 12554, "iter_tflops": 8.724761506190042, "iter_time": 2.3646598815917965, "loss": 0.4875327944755554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.813811181636368, "step_time": 1.9078466567993164} +{"epoch": 0, "iter": 12555, "iter_tflops": 15.668710586576925, "iter_time": 1.316706527709961, "loss": 0.48248815536499023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.30283096510931, "step_time": 1.1272077827453613} +{"epoch": 0, "iter": 12556, "iter_tflops": 20.630331866345927, "iter_time": 1.000036918640137, "loss": 0.5704271793365479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.227807424346143, "step_time": 0.7308783569335938} +{"epoch": 0, "iter": 12557, "iter_tflops": 25.30791559317459, "iter_time": 0.5971272659301757, "loss": 0.10018634051084518, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.28003020603312, "step_time": 0.553960033416748} +{"epoch": 0, "iter": 12558, "iter_tflops": 25.237578524819366, "iter_time": 0.5987914581298828, "loss": 0.1929391771554947, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.796692800034204, "step_time": 0.5639519233703614} +{"epoch": 0, "iter": 12559, "iter_tflops": 27.027050115571292, "iter_time": 0.5591452407836914, "loss": 0.27693867683410645, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.796557467109213, "step_time": 0.524786563873291} +{"epoch": 0, "iter": 12560, "iter_tflops": 27.101613283378818, "iter_time": 0.5576068954467773, "loss": 0.2848172187805176, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.81289186537886, "step_time": 0.5244890556335449} +{"epoch": 0, "iter": 12561, "iter_tflops": 43.07052540045228, "iter_time": 0.4790072402954101, "loss": 0.661499559879303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93484666701104, "step_time": 0.43956878471374505} +{"epoch": 0, "iter": 12562, "iter_tflops": 36.20722164699524, "iter_time": 0.569806037902832, "loss": 0.5425602793693542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.87955133464938, "step_time": 0.5173351459503174} +{"epoch": 0, "iter": 12563, "iter_tflops": 49.31041972532983, "iter_time": 0.4183921699523926, "loss": 0.6456719040870667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.630224160320914, "step_time": 0.3846915397644043} +{"epoch": 0, "iter": 12564, "iter_tflops": 49.08440384879365, "iter_time": 0.4203187141418457, "loss": 0.5772368907928467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.980047331614564, "step_time": 0.3894125156402588} +{"epoch": 0, "iter": 12565, "iter_tflops": 40.9064106047639, "iter_time": 0.5043486633300782, "loss": 0.1446961760520935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.581110606693905, "step_time": 0.4627765712738037} +{"epoch": 0, "iter": 12566, "iter_tflops": 10.500093883975007, "iter_time": 1.9648484802246096, "loss": 0.2226906269788742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.689130627696732, "step_time": 1.625887077331543} +{"epoch": 0, "iter": 12567, "iter_tflops": 38.29498319929712, "iter_time": 0.5387414169311524, "loss": 0.13487450778484344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56846438851556, "step_time": 0.48465674781799317} +{"epoch": 0, "iter": 12568, "iter_tflops": 40.0837934629659, "iter_time": 0.5146991271972656, "loss": 0.16419322788715363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.92593976277349, "step_time": 0.46967904663085935} +{"epoch": 0, "iter": 12569, "iter_tflops": 11.293968677315638, "iter_time": 1.2657707061767578, "loss": 0.49251338839530945, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 11.948883523208885, "step_time": 1.1963941802978515} +{"epoch": 0, "iter": 12570, "iter_tflops": 11.446175602354163, "iter_time": 1.24893896484375, "loss": 0.32796451449394226, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 15.026344671958368, "step_time": 0.9513674163818359} +{"epoch": 0, "iter": 12571, "iter_tflops": 25.566426868054283, "iter_time": 0.5591541900634766, "loss": 0.25766822695732117, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.381110997311566, "step_time": 0.5220962257385253} +{"epoch": 0, "iter": 12572, "iter_tflops": 25.555389048122276, "iter_time": 0.5593956985473633, "loss": 0.3855838477611542, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.190644125739666, "step_time": 0.5257534408569335} +{"epoch": 0, "iter": 12573, "iter_tflops": 33.588845587345446, "iter_time": 0.6142245483398437, "loss": 0.005113956984132528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.007225903409704, "step_time": 0.5729709243774413} +{"epoch": 0, "iter": 12574, "iter_tflops": 21.205543350801932, "iter_time": 0.9729103927612305, "loss": 0.004234507214277983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.75169084283689, "step_time": 0.8686157817840576} +{"epoch": 0, "iter": 12575, "iter_tflops": 53.14754525150808, "iter_time": 0.38818525695800776, "loss": 0.0035638974513858557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.51081688649218, "step_time": 0.3526030673980713} +{"epoch": 0, "iter": 12576, "iter_tflops": 50.868531337801116, "iter_time": 0.40557674789428716, "loss": 0.011356623843312263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.36055441986963, "step_time": 0.36605554580688476} +{"epoch": 0, "iter": 12577, "iter_tflops": 27.43504020465172, "iter_time": 0.7519979324340821, "loss": 0.675267219543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.96317648965571, "step_time": 0.7123215065002442} +{"epoch": 0, "iter": 12578, "iter_tflops": 12.128468214579211, "iter_time": 1.701046920776367, "loss": 0.5620836019515991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.364411290446634, "step_time": 1.342784511566162} +{"epoch": 0, "iter": 12579, "iter_tflops": 34.62059581751664, "iter_time": 0.5959196548461914, "loss": 0.7836613655090332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.811600522497905, "step_time": 0.5456286754608154} +{"epoch": 0, "iter": 12580, "iter_tflops": 36.33116542641123, "iter_time": 0.5678621444702148, "loss": 0.8155772089958191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6057776248162, "step_time": 0.5209112091064454} +{"epoch": 0, "iter": 12581, "iter_tflops": 22.530275852387593, "iter_time": 0.915705322265625, "loss": 0.004960221238434315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.185911563390096, "step_time": 0.853021125793457} +{"epoch": 0, "iter": 12582, "iter_tflops": 21.795919904288606, "iter_time": 0.9465575942993164, "loss": 0.0036421138793230057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.841365756948477, "step_time": 0.8305136566162109} +{"epoch": 0, "iter": 12583, "iter_tflops": 53.2864591458543, "iter_time": 0.3871732864379883, "loss": 0.009792809374630451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.09020254061722, "step_time": 0.35515616416931156} +{"epoch": 0, "iter": 12584, "iter_tflops": 55.39236171583699, "iter_time": 0.37245376205444336, "loss": 0.02197263576090336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.494270675563804, "step_time": 0.34104210662841794} +{"epoch": 0, "iter": 12585, "iter_tflops": 26.544810921780233, "iter_time": 0.6573543090820313, "loss": 0.07164017111063004, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 28.207076654648862, "step_time": 0.6186158905029296} +{"epoch": 0, "iter": 12586, "iter_tflops": 11.05622250512756, "iter_time": 1.578237579345703, "loss": 0.047487448900938034, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 13.416909041809868, "step_time": 1.3005488662719726} +{"epoch": 0, "iter": 12587, "iter_tflops": 27.775029061566265, "iter_time": 0.6282386169433594, "loss": 0.03230338543653488, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 40.81901501702507, "step_time": 0.42748081588745124} +{"epoch": 0, "iter": 12588, "iter_tflops": 48.38298931909117, "iter_time": 0.3606504287719726, "loss": 0.05637429282069206, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 52.76995997567958, "step_time": 0.3306681652069092} +{"epoch": 0, "iter": 12589, "iter_tflops": 42.31286856702149, "iter_time": 0.48758437347412115, "loss": 0.2954930365085602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94033135325411, "step_time": 0.4490845603942871} +{"epoch": 0, "iter": 12590, "iter_tflops": 40.57439085529699, "iter_time": 0.5084757423400879, "loss": 0.3950691819190979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.449475394839006, "step_time": 0.46414706420898433} +{"epoch": 0, "iter": 12591, "iter_tflops": 45.921202983429325, "iter_time": 0.44927162551879885, "loss": 0.3124742805957794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.69067054822105, "step_time": 0.4151904830932617} +{"epoch": 0, "iter": 12592, "iter_tflops": 51.58668859509468, "iter_time": 0.39993056488037104, "loss": 0.33711037039756775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.81289463013001, "step_time": 0.36964743804931643} +{"epoch": 0, "iter": 12593, "iter_tflops": 37.089067591827266, "iter_time": 0.5562580795288086, "loss": 0.8839092254638672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.81347610262128, "step_time": 0.5181937255859375} +{"epoch": 0, "iter": 12594, "iter_tflops": 35.113446244852824, "iter_time": 0.5875553588867187, "loss": 0.7240778207778931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.35396238636159, "step_time": 0.5379129619598388} +{"epoch": 0, "iter": 12595, "iter_tflops": 36.25812922252485, "iter_time": 0.5690060119628906, "loss": 0.5533663630485535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.28523387494761, "step_time": 0.5251615295410157} +{"epoch": 0, "iter": 12596, "iter_tflops": 39.46625098808112, "iter_time": 0.5227528076171876, "loss": 0.6575764417648315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87997042346193, "step_time": 0.48113590812683105} +{"epoch": 0, "iter": 12597, "iter_tflops": 20.49471605423711, "iter_time": 1.0066542739868165, "loss": 0.18398967385292053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.800031904551094, "step_time": 0.9463790512084961} +{"epoch": 0, "iter": 12598, "iter_tflops": 10.333250977474659, "iter_time": 1.9965733489990232, "loss": 0.15303750336170197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.456372267212473, "step_time": 1.6562682189941407} +{"epoch": 0, "iter": 12599, "iter_tflops": 12.814701105261848, "iter_time": 1.6099551086425783, "loss": 0.16268153488636017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.937702520044908, "step_time": 1.2944835357666016} +{"epoch": 0, "iter": 12600, "iter_tflops": 30.71740304416359, "iter_time": 0.6716418533325195, "loss": 0.36746492981910706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38562907626184, "step_time": 0.45457326316833496} +{"epoch": 0, "iter": 12601, "iter_tflops": 23.496658250862815, "iter_time": 0.6327243041992188, "loss": 0.15966397523880005, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.05528270716662, "step_time": 0.5933641586303711} +{"epoch": 0, "iter": 12602, "iter_tflops": 20.98861235498193, "iter_time": 0.7083320465087891, "loss": 0.11877095699310303, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.690040773802178, "step_time": 0.6552172775268554} +{"epoch": 0, "iter": 12603, "iter_tflops": 22.632875005453815, "iter_time": 0.6568722152709962, "loss": 0.2901209890842438, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.412162318831715, "step_time": 0.6089958992004395} +{"epoch": 0, "iter": 12604, "iter_tflops": 25.353631961744128, "iter_time": 0.5863817367553711, "loss": 0.23791322112083435, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.107017123910705, "step_time": 0.5484523315429688} +{"epoch": 0, "iter": 12605, "iter_tflops": 20.39972521554391, "iter_time": 1.0113417358398438, "loss": 0.7208852171897888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.108368261005854, "step_time": 0.9331802902221679} +{"epoch": 0, "iter": 12606, "iter_tflops": 41.57501368634161, "iter_time": 0.49623780441284177, "loss": 0.5393742322921753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.783572504039626, "step_time": 0.4606844062805176} +{"epoch": 0, "iter": 12607, "iter_tflops": 44.07932545704934, "iter_time": 0.46804467391967775, "loss": 0.7886102795600891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.23427354202097, "step_time": 0.43678227615356446} +{"epoch": 0, "iter": 12608, "iter_tflops": 46.33025698482676, "iter_time": 0.4453049659729003, "loss": 0.7150709629058838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48138787516928, "step_time": 0.4169465408325196} +{"epoch": 0, "iter": 12609, "iter_tflops": 24.410567790288273, "iter_time": 0.708087776184082, "loss": 0.006285991985350847, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 25.843185911111625, "step_time": 0.6688348999023437} +{"epoch": 0, "iter": 12610, "iter_tflops": 12.884486035634442, "iter_time": 1.341522247314453, "loss": 0.02262224070727825, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 15.506260371305071, "step_time": 1.1146997566223145} +{"epoch": 0, "iter": 12611, "iter_tflops": 43.526792139235646, "iter_time": 0.3971077079772949, "loss": 0.011525009758770466, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 47.94593437934117, "step_time": 0.36050657653808593} +{"epoch": 0, "iter": 12612, "iter_tflops": 51.874554043659366, "iter_time": 0.3332043037414551, "loss": 0.0068709696643054485, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 57.029250155181174, "step_time": 0.3030870056152344} +{"epoch": 0, "iter": 12613, "iter_tflops": 37.29236842346987, "iter_time": 0.5532256164550782, "loss": 0.08026190847158432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.217566476009, "step_time": 0.512987117767334} +{"epoch": 0, "iter": 12614, "iter_tflops": 12.551670899581884, "iter_time": 1.6436929931640625, "loss": 0.11293278634548187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.16961998284145, "step_time": 1.2759170303344727} +{"epoch": 0, "iter": 12615, "iter_tflops": 17.77615782693646, "iter_time": 1.1606047668457031, "loss": 0.13077667355537415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.03376839159795, "step_time": 1.0298159141540528} +{"epoch": 0, "iter": 12616, "iter_tflops": 20.076148047450783, "iter_time": 1.0276420288085937, "loss": 0.09572381526231766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.029026449628027, "step_time": 0.8585904865264894} +{"epoch": 0, "iter": 12617, "iter_tflops": 13.974036763398791, "iter_time": 1.0726646575927736, "loss": 0.11675404757261276, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.870942214704614, "step_time": 1.0079694442749023} +{"epoch": 0, "iter": 12618, "iter_tflops": 10.858274200870447, "iter_time": 1.3804638824462891, "loss": 0.24820828437805176, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 14.568220637542925, "step_time": 1.0289146308898927} +{"epoch": 0, "iter": 12619, "iter_tflops": 23.219868647825592, "iter_time": 0.6455443649291992, "loss": 0.37541264295578003, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.841619559588214, "step_time": 0.6034008903503418} +{"epoch": 0, "iter": 12620, "iter_tflops": 23.94393762292175, "iter_time": 0.6260229873657227, "loss": 0.2365165501832962, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 25.569446153332137, "step_time": 0.5862252655029296} +{"epoch": 0, "iter": 12621, "iter_tflops": 18.730340624237275, "iter_time": 1.1014798889160156, "loss": 0.2223985344171524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14149604890905, "step_time": 1.0243078994750976} +{"epoch": 0, "iter": 12622, "iter_tflops": 32.2799254062531, "iter_time": 0.6391307678222657, "loss": 0.14790518581867218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.80303480188177, "step_time": 0.5762386798858643} +{"epoch": 0, "iter": 12623, "iter_tflops": 44.24382457632451, "iter_time": 0.4663044776916504, "loss": 0.09765392541885376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90406209493004, "step_time": 0.4306752414703369} +{"epoch": 0, "iter": 12624, "iter_tflops": 54.716530746591076, "iter_time": 0.3770541229248047, "loss": 0.11393845081329346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.58208809658948, "step_time": 0.34626335144042975} +{"epoch": 0, "iter": 12625, "iter_tflops": 27.55197454006232, "iter_time": 0.7488063507080078, "loss": 0.1993352472782135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.025089988589325, "step_time": 0.7108020515441895} +{"epoch": 0, "iter": 12626, "iter_tflops": 13.918249087788464, "iter_time": 1.4823052368164062, "loss": 0.2578127086162567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.10371535355867, "step_time": 1.2062346153259278} +{"epoch": 0, "iter": 12627, "iter_tflops": 35.60808205676956, "iter_time": 0.5793935623168945, "loss": 0.2005763202905655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.10873160441008, "step_time": 0.527531644821167} +{"epoch": 0, "iter": 12628, "iter_tflops": 39.711478507300164, "iter_time": 0.5195246887207031, "loss": 0.2720564305782318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.627492120100996, "step_time": 0.4728920345306396} +{"epoch": 0, "iter": 12629, "iter_tflops": 17.70656086466013, "iter_time": 1.1651666107177734, "loss": 0.08739578723907471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.089162182223426, "step_time": 1.0807752227783203} +{"epoch": 0, "iter": 12630, "iter_tflops": 25.717698407928186, "iter_time": 0.8022138366699217, "loss": 0.08943065255880356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.33218295764477, "step_time": 0.7033603172302245} +{"epoch": 0, "iter": 12631, "iter_tflops": 53.274412255067126, "iter_time": 0.3872608375549316, "loss": 0.05205227807164192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.932583039611856, "step_time": 0.35612245178222657} +{"epoch": 0, "iter": 12632, "iter_tflops": 49.43881396812803, "iter_time": 0.417305591583252, "loss": 0.05899684876203537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.81740298468043, "step_time": 0.3833535690307617} +{"epoch": 0, "iter": 12633, "iter_tflops": 45.76928595787697, "iter_time": 0.45076284408569334, "loss": 0.1903524249792099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30179891504225, "step_time": 0.4101462364196778} +{"epoch": 0, "iter": 12634, "iter_tflops": 37.575252408987396, "iter_time": 0.549060676574707, "loss": 0.1896979659795761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42081411947782, "step_time": 0.4980851764678955} +{"epoch": 0, "iter": 12635, "iter_tflops": 40.02723481165628, "iter_time": 0.515426399230957, "loss": 0.20747296512126923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95458296044857, "step_time": 0.4693729782104492} +{"epoch": 0, "iter": 12636, "iter_tflops": 37.49405248143298, "iter_time": 0.5502497634887695, "loss": 0.14829032123088837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.98540685053223, "step_time": 0.5033765697479248} +{"epoch": 0, "iter": 12637, "iter_tflops": 27.35315961496856, "iter_time": 0.754249008178711, "loss": 0.29627352952957153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.53101787987385, "step_time": 0.6986245307922363} +{"epoch": 0, "iter": 12638, "iter_tflops": 9.777828832378782, "iter_time": 2.1099871826171874, "loss": 0.3045201897621155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.249460912712033, "step_time": 1.8339628601074218} +{"epoch": 0, "iter": 12639, "iter_tflops": 12.606426510378258, "iter_time": 1.636553665161133, "loss": 0.19989623129367828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.862820213223802, "step_time": 1.3005942974090576} +{"epoch": 0, "iter": 12640, "iter_tflops": 40.47367690272941, "iter_time": 0.509741024017334, "loss": 0.18089640140533447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.401311322695705, "step_time": 0.46465054512023923} +{"epoch": 0, "iter": 12641, "iter_tflops": 16.99338551468101, "iter_time": 0.850843879699707, "loss": 0.25101253390312195, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 18.109613659736894, "step_time": 0.7984001388549806} +{"epoch": 0, "iter": 12642, "iter_tflops": 8.82022354318255, "iter_time": 1.6392688903808594, "loss": 0.14986282587051392, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 10.326395126839708, "step_time": 1.4001709098815918} +{"epoch": 0, "iter": 12643, "iter_tflops": 25.235655421373583, "iter_time": 0.5729479904174805, "loss": 0.23606723546981812, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 27.02101548597704, "step_time": 0.5350915870666504} +{"epoch": 0, "iter": 12644, "iter_tflops": 26.501036986607442, "iter_time": 0.5455906524658203, "loss": 0.2109163999557495, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 28.18979226982817, "step_time": 0.5129061584472656} +{"epoch": 0, "iter": 12645, "iter_tflops": 15.153453714776377, "iter_time": 0.9541533126831054, "loss": 0.20073150098323822, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 15.940546908348049, "step_time": 0.9070402755737305} +{"epoch": 0, "iter": 12646, "iter_tflops": 10.099764239729405, "iter_time": 1.4315896606445313, "loss": 0.09769783169031143, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 12.66159871470095, "step_time": 1.1419346313476564} +{"epoch": 0, "iter": 12647, "iter_tflops": 25.131045133548092, "iter_time": 0.5753329391479493, "loss": 0.1459723860025406, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 26.82554541183071, "step_time": 0.5389906463623046} +{"epoch": 0, "iter": 12648, "iter_tflops": 25.60707120662372, "iter_time": 0.564637710571289, "loss": 0.28146108984947205, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 27.266295233221946, "step_time": 0.5302780570983887} +{"epoch": 0, "iter": 12649, "iter_tflops": 50.19796428128044, "iter_time": 0.4109946250915527, "loss": 0.0073867314495146275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.609269883632756, "step_time": 0.3644472637176514} +{"epoch": 0, "iter": 12650, "iter_tflops": 47.99234951290971, "iter_time": 0.4298829650878906, "loss": 0.07415800541639328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.270238313926704, "step_time": 0.3947005825042724} +{"epoch": 0, "iter": 12651, "iter_tflops": 54.01256368684973, "iter_time": 0.38196841812133786, "loss": 0.09608091413974762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.03003600209612, "step_time": 0.3495016250610352} +{"epoch": 0, "iter": 12652, "iter_tflops": 54.4085340846313, "iter_time": 0.3791885566711426, "loss": 0.03544417396187782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.31133770498323, "step_time": 0.34784400939941407} +{"epoch": 0, "iter": 12653, "iter_tflops": 38.16298043969324, "iter_time": 0.5406048812866211, "loss": 0.37947574257850647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.322116889444665, "step_time": 0.49927484512329096} +{"epoch": 0, "iter": 12654, "iter_tflops": 33.72626813505674, "iter_time": 0.6117218017578125, "loss": 0.3999048173427582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62264608697349, "step_time": 0.5483690185546876} +{"epoch": 0, "iter": 12655, "iter_tflops": 42.49083950806816, "iter_time": 0.4855421485900879, "loss": 0.43601182103157043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.31104945202093, "step_time": 0.4454896564483642} +{"epoch": 0, "iter": 12656, "iter_tflops": 46.920340410371786, "iter_time": 0.4397046852111816, "loss": 0.5984126925468445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.22617329111151, "step_time": 0.40274516296386725} +{"epoch": 0, "iter": 12657, "iter_tflops": 22.516151776848485, "iter_time": 0.9162797317504883, "loss": 0.3137871325016022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.864015644499947, "step_time": 0.8645273208618163} +{"epoch": 0, "iter": 12658, "iter_tflops": 17.941484181904425, "iter_time": 1.1499100799560547, "loss": 0.22474083304405212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.42217624555064, "step_time": 0.9201200313568114} +{"epoch": 0, "iter": 12659, "iter_tflops": 46.532697334794754, "iter_time": 0.44336766815185547, "loss": 0.19556257128715515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.494578132550174, "step_time": 0.40858037185668944} +{"epoch": 0, "iter": 12660, "iter_tflops": 42.825141991484976, "iter_time": 0.48175189971923826, "loss": 0.2716272175312042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.952782243680325, "step_time": 0.4489628810882568} +{"epoch": 0, "iter": 12661, "iter_tflops": 27.9976747200254, "iter_time": 0.7368859634399414, "loss": 0.5784946084022522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.61010703328743, "step_time": 0.6967584915161134} +{"epoch": 0, "iter": 12662, "iter_tflops": 14.567717933909385, "iter_time": 1.4162200012207031, "loss": 0.8561198711395264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.334257396069034, "step_time": 1.1901919441223143} +{"epoch": 0, "iter": 12663, "iter_tflops": 47.11791682868185, "iter_time": 0.43786090087890617, "loss": 1.0152572393417358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.921999443658876, "step_time": 0.4051508922576904} +{"epoch": 0, "iter": 12664, "iter_tflops": 48.2395224417969, "iter_time": 0.4276803016662598, "loss": 0.7039058208465576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00518916638734, "step_time": 0.39671220970153803} +{"epoch": 0, "iter": 12665, "iter_tflops": 42.51693985714023, "iter_time": 0.485244083404541, "loss": 0.7447012066841125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.17347805771743, "step_time": 0.4468169689178467} +{"epoch": 0, "iter": 12666, "iter_tflops": 43.20961639672913, "iter_time": 0.4774653244018555, "loss": 0.7891735434532166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49484677142341, "step_time": 0.44372860527038577} +{"epoch": 0, "iter": 12667, "iter_tflops": 41.24697807825217, "iter_time": 0.5001843643188477, "loss": 0.5290423631668091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.28527137694384, "step_time": 0.4658680610656738} +{"epoch": 0, "iter": 12668, "iter_tflops": 41.655942332235654, "iter_time": 0.49527371978759765, "loss": 0.6890138387680054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.660483455741605, "step_time": 0.4619541015625} +{"epoch": 0, "iter": 12669, "iter_tflops": 31.469135669931546, "iter_time": 0.6555977172851563, "loss": 0.669377326965332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.44891359952021, "step_time": 0.6167941284179688} +{"epoch": 0, "iter": 12670, "iter_tflops": 17.02184798809092, "iter_time": 1.2120360565185546, "loss": 0.6088324189186096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.408801270847256, "step_time": 1.0108919792175293} +{"epoch": 0, "iter": 12671, "iter_tflops": 36.76471504383942, "iter_time": 0.5611656036376953, "loss": 0.5513339638710022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.31855512625277, "step_time": 0.5117022037506104} +{"epoch": 0, "iter": 12672, "iter_tflops": 37.52471800679723, "iter_time": 0.5498000946044922, "loss": 0.4690170884132385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01419915478162, "step_time": 0.5030231952667237} +{"epoch": 0, "iter": 12673, "iter_tflops": 21.15055975857361, "iter_time": 0.9754395980834961, "loss": 0.35571688413619995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.092359899249058, "step_time": 0.8934164199829101} +{"epoch": 0, "iter": 12674, "iter_tflops": 17.959292740808298, "iter_time": 1.1487698211669921, "loss": 0.4287492334842682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87918365628327, "step_time": 0.9429553604125978} +{"epoch": 0, "iter": 12675, "iter_tflops": 36.32383290558077, "iter_time": 0.5679767761230468, "loss": 0.35145488381385803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.88424466942873, "step_time": 0.5172742691040039} +{"epoch": 0, "iter": 12676, "iter_tflops": 44.41697126360371, "iter_time": 0.4644867248535156, "loss": 0.35914871096611023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.466727060925976, "step_time": 0.4256754016876221} +{"epoch": 0, "iter": 12677, "iter_tflops": 33.30933028477554, "iter_time": 0.6193788146972656, "loss": 0.06776712089776993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.03282949220063, "step_time": 0.5571028137207031} +{"epoch": 0, "iter": 12678, "iter_tflops": 38.340832187440206, "iter_time": 0.5380971755981446, "loss": 0.09928383678197861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.175685458292094, "step_time": 0.47784055519104} +{"epoch": 0, "iter": 12679, "iter_tflops": 41.28498082004711, "iter_time": 0.49972394561767575, "loss": 0.0968778058886528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.47379214131341, "step_time": 0.453691951751709} +{"epoch": 0, "iter": 12680, "iter_tflops": 40.193533715001784, "iter_time": 0.5132938461303711, "loss": 0.15313446521759033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.225661467948875, "step_time": 0.46649598503112794} +{"epoch": 0, "iter": 12681, "iter_tflops": 19.1056122873643, "iter_time": 1.0798446655273437, "loss": 0.32044482231140137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.10389571723231, "step_time": 1.026223663330078} +{"epoch": 0, "iter": 12682, "iter_tflops": 19.70656955416602, "iter_time": 1.0469145050048827, "loss": 0.3214508593082428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66629008391376, "step_time": 0.7196987628936768} +{"epoch": 0, "iter": 12683, "iter_tflops": 37.304643412839, "iter_time": 0.5530435791015625, "loss": 0.22043095529079437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.46142558179938, "step_time": 0.5098953685760499} +{"epoch": 0, "iter": 12684, "iter_tflops": 37.94995921477098, "iter_time": 0.5436394119262695, "loss": 0.20031894743442535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46985855619444, "step_time": 0.49749611473083505} +{"epoch": 0, "iter": 12685, "iter_tflops": 21.750208022913814, "iter_time": 0.9485469512939453, "loss": 0.39360928535461426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.10377546732351, "step_time": 0.892974983215332} +{"epoch": 0, "iter": 12686, "iter_tflops": 13.669572246969661, "iter_time": 1.5092713317871094, "loss": 0.5502623319625854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.84967118033934, "step_time": 1.2244211349487304} +{"epoch": 0, "iter": 12687, "iter_tflops": 15.939915518463344, "iter_time": 1.2943038177490234, "loss": 0.4866008460521698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.314458003699713, "step_time": 1.1264921684265137} +{"epoch": 0, "iter": 12688, "iter_tflops": 32.67622989527261, "iter_time": 0.6313792495727539, "loss": 0.4631231427192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.888456773817786, "step_time": 0.5172196464538574} +{"epoch": 0, "iter": 12689, "iter_tflops": 14.121237790251307, "iter_time": 1.1310179290771485, "loss": 0.2869884967803955, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.008507171513061, "step_time": 1.0641546783447264} +{"epoch": 0, "iter": 12690, "iter_tflops": 19.868037438488393, "iter_time": 0.8038727111816406, "loss": 0.21106037497520447, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 21.536201188525652, "step_time": 0.7416058654785156} +{"epoch": 0, "iter": 12691, "iter_tflops": 23.279554353026466, "iter_time": 0.6860686798095703, "loss": 0.19041509926319122, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 24.977375723835284, "step_time": 0.63943359375} +{"epoch": 0, "iter": 12692, "iter_tflops": 25.04041038365368, "iter_time": 0.6378239364624023, "loss": 0.17141208052635193, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 26.769804606444726, "step_time": 0.5966189651489259} +{"epoch": 0, "iter": 12693, "iter_tflops": 21.06227067646124, "iter_time": 0.9795284576416015, "loss": 0.7422771453857422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.71692555815227, "step_time": 0.9081815872192384} +{"epoch": 0, "iter": 12694, "iter_tflops": 18.572117430112606, "iter_time": 1.1108638305664063, "loss": 0.6684499979019165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.987494176795042, "step_time": 0.9830184268951416} +{"epoch": 0, "iter": 12695, "iter_tflops": 39.17434324622086, "iter_time": 0.5266481018066407, "loss": 0.826744019985199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74925389321193, "step_time": 0.4826071014404296} +{"epoch": 0, "iter": 12696, "iter_tflops": 38.33816268334597, "iter_time": 0.5381346435546875, "loss": 0.49589309096336365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.66320378009443, "step_time": 0.49518739891052255} +{"epoch": 0, "iter": 12697, "iter_tflops": 30.881161515779503, "iter_time": 0.6680802307128906, "loss": 0.649620532989502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.29877009592261, "step_time": 0.6195752410888671} +{"epoch": 0, "iter": 12698, "iter_tflops": 11.06173782057592, "iter_time": 1.8650861053466796, "loss": 0.6893225908279419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.859874368337518, "step_time": 1.488548377990723} +{"epoch": 0, "iter": 12699, "iter_tflops": 12.600106320026006, "iter_time": 1.637374557495117, "loss": 0.5732753872871399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.667870810987996, "step_time": 1.4065499877929688} +{"epoch": 0, "iter": 12700, "iter_tflops": 27.19731380896963, "iter_time": 0.7585709991455079, "loss": 0.7427507042884827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27724411721882, "step_time": 0.6391838607788085} +{"epoch": 0, "iter": 12701, "iter_tflops": 16.61134774225219, "iter_time": 0.9072820739746092, "loss": 0.1955016404390335, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 17.77359471782938, "step_time": 0.8479532852172851} +{"epoch": 0, "iter": 12702, "iter_tflops": 6.233176505122749, "iter_time": 2.4178968811035157, "loss": 0.16239196062088013, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 7.227034527963176, "step_time": 2.0853889617919923} +{"epoch": 0, "iter": 12703, "iter_tflops": 7.793394643548486, "iter_time": 1.9338399658203123, "loss": 0.22600767016410828, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 9.50796688099807, "step_time": 1.5851104888916017} +{"epoch": 0, "iter": 12704, "iter_tflops": 20.958949439146163, "iter_time": 0.719080795288086, "loss": 0.31101489067077637, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 22.50344306093509, "step_time": 0.6697276496887207} +{"epoch": 0, "iter": 12705, "iter_tflops": 11.881821386013318, "iter_time": 1.3441855926513673, "loss": 0.15485292673110962, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.501629173028785, "step_time": 1.277543342590332} +{"epoch": 0, "iter": 12706, "iter_tflops": 12.013597562867963, "iter_time": 1.3294413299560548, "loss": 0.20637544989585876, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.216005516445508, "step_time": 1.0496429634094238} +{"epoch": 0, "iter": 12707, "iter_tflops": 28.824012778315865, "iter_time": 0.5540995712280273, "loss": 0.18883225321769714, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.734030916214543, "step_time": 0.519664119720459} +{"epoch": 0, "iter": 12708, "iter_tflops": 29.10773234846788, "iter_time": 0.5486986389160156, "loss": 0.1713569611310959, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.9739806006005, "step_time": 0.5156383781433105} +{"epoch": 0, "iter": 12709, "iter_tflops": 5.68767852691312, "iter_time": 0.5316225967407227, "loss": 0.000643337843939662, "lr": 3e-05, "seqlen": 1232.0, "step_tflops": 6.197097138359976, "step_time": 0.4879217414855957} +{"epoch": 0, "iter": 12710, "iter_tflops": 6.3397702446271715, "iter_time": 0.4769413261413574, "loss": 0.1123247891664505, "lr": 3e-05, "seqlen": 1232.0, "step_tflops": 6.831526170964986, "step_time": 0.4426095066070557} +{"epoch": 0, "iter": 12711, "iter_tflops": 6.7940921065494875, "iter_time": 0.4450481948852538, "loss": 1.0049927234649658, "lr": 3e-05, "seqlen": 1232.0, "step_tflops": 7.360340049503886, "step_time": 0.41080961036682123} +{"epoch": 0, "iter": 12712, "iter_tflops": 7.479933632230816, "iter_time": 0.4042413444519043, "loss": 0.786773145198822, "lr": 3e-05, "seqlen": 1232.0, "step_tflops": 8.09182001191069, "step_time": 0.37367346572875976} +{"epoch": 0, "iter": 12713, "iter_tflops": 25.76615843578254, "iter_time": 0.8007050628662109, "loss": 0.024245938286185265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.159127515634495, "step_time": 0.7596375656127929} +{"epoch": 0, "iter": 12714, "iter_tflops": 14.895942624998218, "iter_time": 1.3850142974853514, "loss": 0.030035177245736122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.499199873143393, "step_time": 1.178973533630371} +{"epoch": 0, "iter": 12715, "iter_tflops": 41.58127393520544, "iter_time": 0.49616309356689453, "loss": 0.0694495365023613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.85741296267956, "step_time": 0.4498965854644776} +{"epoch": 0, "iter": 12716, "iter_tflops": 47.72830254116138, "iter_time": 0.43226120376586913, "loss": 0.05204576998949051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09570341070331, "step_time": 0.39602293777465825} +{"epoch": 0, "iter": 12717, "iter_tflops": 14.512322838045776, "iter_time": 1.0751439056396486, "loss": 0.000767704623285681, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.741225885969934, "step_time": 0.9912084083557129} +{"epoch": 0, "iter": 12718, "iter_tflops": 11.820565461815024, "iter_time": 1.3199736938476563, "loss": 0.004697462543845177, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.66253138065267, "step_time": 0.9961886157989502} +{"epoch": 0, "iter": 12719, "iter_tflops": 31.12002602287801, "iter_time": 0.5013760414123535, "loss": 0.011032449081540108, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 34.72834943027486, "step_time": 0.44928237915039065} +{"epoch": 0, "iter": 12720, "iter_tflops": 30.424745737744107, "iter_time": 0.5128337173461914, "loss": 0.016666820272803307, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 33.76754801901498, "step_time": 0.4620659885406494} +{"epoch": 0, "iter": 12721, "iter_tflops": 19.132989700486775, "iter_time": 1.0782995147705077, "loss": 0.7348202466964722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.67919102522548, "step_time": 0.9976741104125978} +{"epoch": 0, "iter": 12722, "iter_tflops": 20.560132723086994, "iter_time": 1.0034513778686522, "loss": 0.5560339093208313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.621290963041883, "step_time": 0.8379371147155763} +{"epoch": 0, "iter": 12723, "iter_tflops": 41.22149482063175, "iter_time": 0.5004935798645019, "loss": 0.6042308807373047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.1306913878297, "step_time": 0.46749989318847657} +{"epoch": 0, "iter": 12724, "iter_tflops": 45.73720888827481, "iter_time": 0.4510789794921875, "loss": 0.8643165230751038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22302024032468, "step_time": 0.41913505935668943} +{"epoch": 0, "iter": 12725, "iter_tflops": 28.616506024273917, "iter_time": 0.7209508209228516, "loss": 0.13048924505710602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.36020656911514, "step_time": 0.6795439109802246} +{"epoch": 0, "iter": 12726, "iter_tflops": 8.907359374460725, "iter_time": 2.316185150146484, "loss": 0.14373594522476196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.655557378555223, "step_time": 1.936181541442871} +{"epoch": 0, "iter": 12727, "iter_tflops": 12.182945130746802, "iter_time": 1.6934405670166015, "loss": 0.09480992704629898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.954680893504154, "step_time": 1.4784353485107422} +{"epoch": 0, "iter": 12728, "iter_tflops": 23.02721202488104, "iter_time": 0.8959440460205077, "loss": 0.1527847796678543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.28917640992872, "step_time": 0.7043930912017822} +{"epoch": 0, "iter": 12729, "iter_tflops": 17.296773653777834, "iter_time": 0.7935102691650392, "loss": 0.25064748525619507, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 18.206059079882376, "step_time": 0.7538791046142579} +{"epoch": 0, "iter": 12730, "iter_tflops": 9.602110436813966, "iter_time": 1.4293907165527346, "loss": 0.24795964360237122, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 12.390542024114577, "step_time": 1.1077132453918457} +{"epoch": 0, "iter": 12731, "iter_tflops": 21.14406579174861, "iter_time": 0.6491262207031251, "loss": 0.25800609588623047, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 22.795593519401958, "step_time": 0.6020973968505859} +{"epoch": 0, "iter": 12732, "iter_tflops": 21.937956227031734, "iter_time": 0.6256356506347656, "loss": 0.2731797397136688, "lr": 3e-05, "seqlen": 5504.0, "step_tflops": 23.446423992083655, "step_time": 0.5853842582702637} +{"epoch": 0, "iter": 12733, "iter_tflops": 29.34328215388576, "iter_time": 0.7030942687988281, "loss": 0.5868880748748779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.11375907739064, "step_time": 0.6424378242492675} +{"epoch": 0, "iter": 12734, "iter_tflops": 45.512611518379096, "iter_time": 0.45330498123168944, "loss": 0.44546395540237427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9068164131173, "step_time": 0.41339229774475095} +{"epoch": 0, "iter": 12735, "iter_tflops": 45.26365285554742, "iter_time": 0.4557982444763184, "loss": 0.5203335285186768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.69229780475934, "step_time": 0.4237034282684326} +{"epoch": 0, "iter": 12736, "iter_tflops": 49.266332961411884, "iter_time": 0.4187665748596191, "loss": 0.4456062316894531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16987639941544, "step_time": 0.38802222061157227} +{"epoch": 0, "iter": 12737, "iter_tflops": 31.350497371397616, "iter_time": 0.6580786666870118, "loss": 0.7374087572097778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.30077694332367, "step_time": 0.6195379028320314} +{"epoch": 0, "iter": 12738, "iter_tflops": 13.923823670773267, "iter_time": 1.4817117767333985, "loss": 0.513516902923584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.426998985694953, "step_time": 1.3373368034362791} +{"epoch": 0, "iter": 12739, "iter_tflops": 44.13226794805492, "iter_time": 0.46748319244384773, "loss": 0.7249538898468018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56308860373653, "step_time": 0.4337626953125} +{"epoch": 0, "iter": 12740, "iter_tflops": 47.07418984527428, "iter_time": 0.4382676277160644, "loss": 0.6877397298812866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.88137233913533, "step_time": 0.4054743919372559} +{"epoch": 0, "iter": 12741, "iter_tflops": 27.239629890773838, "iter_time": 0.7573925781249999, "loss": 0.20950226485729218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.778575052259423, "step_time": 0.716890724182129} +{"epoch": 0, "iter": 12742, "iter_tflops": 13.142195442106939, "iter_time": 1.569836151123047, "loss": 0.18464045226573944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.240413412338647, "step_time": 1.1966704635620116} +{"epoch": 0, "iter": 12743, "iter_tflops": 11.284134647219126, "iter_time": 1.8283274841308594, "loss": 0.209060937166214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.128579944430891, "step_time": 1.4602382965087892} +{"epoch": 0, "iter": 12744, "iter_tflops": 16.087757090275296, "iter_time": 1.2824095611572268, "loss": 0.2756488025188446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.75557470204178, "step_time": 0.9940025177001952} +{"epoch": 0, "iter": 12745, "iter_tflops": 22.846747739040204, "iter_time": 0.6417873382568359, "loss": 0.29079392552375793, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 24.423347834979694, "step_time": 0.6003580474853515} +{"epoch": 0, "iter": 12746, "iter_tflops": 26.11156826734175, "iter_time": 0.5615424270629883, "loss": 0.1968870311975479, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.943556495345177, "step_time": 0.5247275314331055} +{"epoch": 0, "iter": 12747, "iter_tflops": 26.172490145597727, "iter_time": 0.5602353210449218, "loss": 0.14891205728054047, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.02165499027155, "step_time": 0.5232650756835937} +{"epoch": 0, "iter": 12748, "iter_tflops": 28.04007543600555, "iter_time": 0.5229213256835937, "loss": 0.25003957748413086, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 29.750979760204075, "step_time": 0.4928494300842285} +{"epoch": 0, "iter": 12749, "iter_tflops": 29.163114519403255, "iter_time": 0.7074379348754883, "loss": 0.676807701587677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.065590663573207, "step_time": 0.6641139945983886} +{"epoch": 0, "iter": 12750, "iter_tflops": 16.46146753383151, "iter_time": 1.253296127319336, "loss": 0.7704848647117615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.430489058421102, "step_time": 1.0617897186279297} +{"epoch": 0, "iter": 12751, "iter_tflops": 42.67917354896892, "iter_time": 0.4833995552062988, "loss": 0.7516447901725769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.80617014170258, "step_time": 0.4503998794555664} +{"epoch": 0, "iter": 12752, "iter_tflops": 44.119868811812765, "iter_time": 0.46761457061767575, "loss": 0.6311615109443665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.48025746083761, "step_time": 0.43451941108703607} +{"epoch": 0, "iter": 12753, "iter_tflops": 49.84907784270489, "iter_time": 0.4138711166381836, "loss": 0.00294246687553823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.908483139708515, "step_time": 0.3757359943389893} +{"epoch": 0, "iter": 12754, "iter_tflops": 41.85461347693322, "iter_time": 0.4929228057861328, "loss": 0.00230738939717412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71505160054609, "step_time": 0.44163696289062504} +{"epoch": 0, "iter": 12755, "iter_tflops": 43.2887690119587, "iter_time": 0.4765922889709473, "loss": 0.003073263680562377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.84907201337571, "step_time": 0.43117019081115726} +{"epoch": 0, "iter": 12756, "iter_tflops": 44.935221778557924, "iter_time": 0.45912966918945314, "loss": 0.007443335372954607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.76676901710511, "step_time": 0.4145556144714355} +{"epoch": 0, "iter": 12757, "iter_tflops": 21.59962282563387, "iter_time": 0.955159896850586, "loss": 0.4374246895313263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.144440181912458, "step_time": 0.8914060287475587} +{"epoch": 0, "iter": 12758, "iter_tflops": 14.534010003872872, "iter_time": 1.4195045623779294, "loss": 0.4133971035480499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.629850550943402, "step_time": 1.1074213104248045} +{"epoch": 0, "iter": 12759, "iter_tflops": 44.1035366410153, "iter_time": 0.4677877349853516, "loss": 0.653281569480896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47782556281023, "step_time": 0.43454166793823246} +{"epoch": 0, "iter": 12760, "iter_tflops": 50.38699118819938, "iter_time": 0.40945277786254886, "loss": 0.34358498454093933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.47309874884801, "step_time": 0.3787391204833985} +{"epoch": 0, "iter": 12761, "iter_tflops": 41.049853807742856, "iter_time": 0.5025862846374511, "loss": 0.1321234405040741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64400367838392, "step_time": 0.462124626159668} +{"epoch": 0, "iter": 12762, "iter_tflops": 36.39398701857437, "iter_time": 0.5668819274902344, "loss": 0.04721401259303093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.34314492118591, "step_time": 0.4451811275482178} +{"epoch": 0, "iter": 12763, "iter_tflops": 52.96758532014295, "iter_time": 0.3895041351318359, "loss": 0.0987476110458374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.68824856854704, "step_time": 0.35763078308105467} +{"epoch": 0, "iter": 12764, "iter_tflops": 51.9490506649414, "iter_time": 0.39714091491699216, "loss": 0.04790113493800163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.74955472102478, "step_time": 0.3635463504791259} +{"epoch": 0, "iter": 12765, "iter_tflops": 28.58819629831221, "iter_time": 0.7216647491455076, "loss": 0.6439674496650696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.283649973148865, "step_time": 0.6812617874145508} +{"epoch": 0, "iter": 12766, "iter_tflops": 10.473467973230502, "iter_time": 1.9698435668945313, "loss": 0.6998122930526733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.27237554737061, "step_time": 1.681100242614746} +{"epoch": 0, "iter": 12767, "iter_tflops": 13.987133856979353, "iter_time": 1.4750050811767579, "loss": 0.642035961151123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.233491887842245, "step_time": 1.1971510848999023} +{"epoch": 0, "iter": 12768, "iter_tflops": 39.55553724342807, "iter_time": 0.5215728302001953, "loss": 0.7386535406112671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.117622992574105, "step_time": 0.47848401832580567} +{"epoch": 0, "iter": 12769, "iter_tflops": 23.7782570713695, "iter_time": 0.6699568252563476, "loss": 0.22141185402870178, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 25.278948048685034, "step_time": 0.6301846733093261} +{"epoch": 0, "iter": 12770, "iter_tflops": 13.755266749236096, "iter_time": 1.15813134765625, "loss": 0.389056921005249, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 16.166858879832755, "step_time": 0.9853741989135743} +{"epoch": 0, "iter": 12771, "iter_tflops": 29.70784175001102, "iter_time": 0.5362357101440429, "loss": 0.4724779427051544, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 31.59339543421585, "step_time": 0.5042321472167969} +{"epoch": 0, "iter": 12772, "iter_tflops": 28.09843635289949, "iter_time": 0.5669498977661133, "loss": 0.13896575570106506, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.902376238219862, "step_time": 0.5327471466064454} +{"epoch": 0, "iter": 12773, "iter_tflops": 20.636224541621452, "iter_time": 0.9997513580322265, "loss": 0.1609387844800949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.6129186114249, "step_time": 0.9545723037719727} +{"epoch": 0, "iter": 12774, "iter_tflops": 19.129658198546924, "iter_time": 1.0784873046875, "loss": 0.22830934822559357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.377291583624764, "step_time": 0.8463242702484131} +{"epoch": 0, "iter": 12775, "iter_tflops": 37.211314985308796, "iter_time": 0.554430648803711, "loss": 0.13791808485984802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9055907106918, "step_time": 0.5043587722778321} +{"epoch": 0, "iter": 12776, "iter_tflops": 41.90533960411999, "iter_time": 0.4923261260986328, "loss": 0.1490403264760971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86991270032127, "step_time": 0.4497739868164063} +{"epoch": 0, "iter": 12777, "iter_tflops": 26.815415481586374, "iter_time": 0.7693743743896484, "loss": 0.6588046550750732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.062869029209523, "step_time": 0.7098780746459962} +{"epoch": 0, "iter": 12778, "iter_tflops": 38.48005541032759, "iter_time": 0.5361503067016601, "loss": 0.41258805990219116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29908318459673, "step_time": 0.487743278503418} +{"epoch": 0, "iter": 12779, "iter_tflops": 39.673898060696914, "iter_time": 0.5200167999267578, "loss": 0.651766836643219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26666916312637, "step_time": 0.4768357238769531} +{"epoch": 0, "iter": 12780, "iter_tflops": 40.90960603012743, "iter_time": 0.5043092689514161, "loss": 0.5845247507095337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70895062762534, "step_time": 0.46145331573486326} +{"epoch": 0, "iter": 12781, "iter_tflops": 14.807557288613664, "iter_time": 1.3932813568115234, "loss": 0.7164987921714783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.859018040730227, "step_time": 1.3009061126708985} +{"epoch": 0, "iter": 12782, "iter_tflops": 19.344674985048787, "iter_time": 1.0664998779296875, "loss": 0.7005394697189331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.068572299421874, "step_time": 0.7350246849060059} +{"epoch": 0, "iter": 12783, "iter_tflops": 44.620931354174466, "iter_time": 0.46236357879638673, "loss": 0.6029751896858215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.467693476436295, "step_time": 0.42566691398620604} +{"epoch": 0, "iter": 12784, "iter_tflops": 49.185143552697554, "iter_time": 0.41945782852172847, "loss": 0.5010738372802734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.467295754243835, "step_time": 0.38586379241943364} +{"epoch": 0, "iter": 12785, "iter_tflops": 28.127317368615415, "iter_time": 0.7334895553588866, "loss": 0.18818379938602448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.77383331531221, "step_time": 0.6929270172119142} +{"epoch": 0, "iter": 12786, "iter_tflops": 17.212145000206647, "iter_time": 1.1986358184814452, "loss": 0.2367563545703888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.064049628924057, "step_time": 0.9794457321166993} +{"epoch": 0, "iter": 12787, "iter_tflops": 45.736045448434474, "iter_time": 0.4510904541015625, "loss": 0.25607651472091675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.01254426618598, "step_time": 0.41251837539672853} +{"epoch": 0, "iter": 12788, "iter_tflops": 41.03448401616793, "iter_time": 0.5027745323181153, "loss": 0.1802147775888443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.389198123019916, "step_time": 0.4647773418426514} +{"epoch": 0, "iter": 12789, "iter_tflops": 18.090282717678527, "iter_time": 1.140451690673828, "loss": 0.566519021987915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.36359721700138, "step_time": 1.0654576873779298} +{"epoch": 0, "iter": 12790, "iter_tflops": 18.989191406232237, "iter_time": 1.086465087890625, "loss": 0.42776376008987427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.189171677038473, "step_time": 0.9736621055603026} +{"epoch": 0, "iter": 12791, "iter_tflops": 38.03179846379672, "iter_time": 0.5424695739746094, "loss": 0.47691312432289124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.72749336391589, "step_time": 0.49442446327209477} +{"epoch": 0, "iter": 12792, "iter_tflops": 37.58857070593776, "iter_time": 0.5488661346435546, "loss": 0.6651684045791626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.94418035757874, "step_time": 0.5038834171295167} +{"epoch": 0, "iter": 12793, "iter_tflops": 18.477460812907786, "iter_time": 1.1165545806884765, "loss": 0.4796324670314789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.805417396587952, "step_time": 1.041689407348633} +{"epoch": 0, "iter": 12794, "iter_tflops": 15.622566705956478, "iter_time": 1.3205956420898437, "loss": 0.7833584547042847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.089907939814605, "step_time": 1.0807330017089842} +{"epoch": 0, "iter": 12795, "iter_tflops": 38.65814431212806, "iter_time": 0.533680389404297, "loss": 0.7050179839134216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.365804062848916, "step_time": 0.4869751434326172} +{"epoch": 0, "iter": 12796, "iter_tflops": 41.976172989764066, "iter_time": 0.4914953422546387, "loss": 0.7438123226165771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.6648767948619, "step_time": 0.45179347801208497} +{"epoch": 0, "iter": 12797, "iter_tflops": 21.508062473608895, "iter_time": 0.9592260360717774, "loss": 0.0684811994433403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.12349851821701, "step_time": 0.8922133255004883} +{"epoch": 0, "iter": 12798, "iter_tflops": 25.920063080992257, "iter_time": 0.7959507446289062, "loss": 0.07491205632686615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.276827347724392, "step_time": 0.70469020652771} +{"epoch": 0, "iter": 12799, "iter_tflops": 44.04465515045132, "iter_time": 0.46841310119628904, "loss": 0.07470674067735672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42134614583413, "step_time": 0.42607434844970704} +{"epoch": 0, "iter": 12800, "iter_tflops": 43.66457374378642, "iter_time": 0.472490436553955, "loss": 0.07314249873161316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91093922584869, "step_time": 0.43061342239379885} +{"epoch": 0, "iter": 12801, "iter_tflops": 15.204163621439013, "iter_time": 1.3569370880126952, "loss": 0.33945000171661377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.146174686734202, "step_time": 1.277769744873047} +{"epoch": 0, "iter": 12802, "iter_tflops": 17.690223431691454, "iter_time": 1.16624267578125, "loss": 0.33691155910491943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.867238712792766, "step_time": 1.0384479598999024} +{"epoch": 0, "iter": 12803, "iter_tflops": 43.01457135793416, "iter_time": 0.4796303405761719, "loss": 0.24303081631660461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.255682422564796, "step_time": 0.446022897720337} +{"epoch": 0, "iter": 12804, "iter_tflops": 51.974131097659324, "iter_time": 0.3969492721557617, "loss": 0.36843472719192505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.248947528799114, "step_time": 0.36678185844421385} +{"epoch": 0, "iter": 12805, "iter_tflops": 21.760382521869673, "iter_time": 0.9481034393310548, "loss": 0.23623427748680115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.76913322063326, "step_time": 0.9060992050170898} +{"epoch": 0, "iter": 12806, "iter_tflops": 17.49549325694805, "iter_time": 1.1792233123779297, "loss": 0.23217587172985077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.42763437259083, "step_time": 0.962826467514038} +{"epoch": 0, "iter": 12807, "iter_tflops": 47.99269362214523, "iter_time": 0.4298798828125, "loss": 0.257514089345932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.83359969131675, "step_time": 0.3980254821777343} +{"epoch": 0, "iter": 12808, "iter_tflops": 45.13468417646542, "iter_time": 0.45710065078735346, "loss": 0.15260496735572815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68457441937772, "step_time": 0.42377064514160157} +{"epoch": 0, "iter": 12809, "iter_tflops": 26.350352065566547, "iter_time": 0.7829532394409179, "loss": 0.43118467926979065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.81700252109293, "step_time": 0.7416720581054688} +{"epoch": 0, "iter": 12810, "iter_tflops": 14.410147544972, "iter_time": 1.4317059173583984, "loss": 0.3423326015472412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.526028002918697, "step_time": 1.2484000091552736} +{"epoch": 0, "iter": 12811, "iter_tflops": 47.63011692844982, "iter_time": 0.43315227508544923, "loss": 0.3106503188610077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.996529888356356, "step_time": 0.3967782764434815} +{"epoch": 0, "iter": 12812, "iter_tflops": 49.454394290620314, "iter_time": 0.4171741218566895, "loss": 0.3459440767765045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25863477200503, "step_time": 0.38737556076049806} +{"epoch": 0, "iter": 12813, "iter_tflops": 43.52804751730878, "iter_time": 0.4739724082946777, "loss": 0.3993739187717438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70695297229814, "step_time": 0.432454647064209} +{"epoch": 0, "iter": 12814, "iter_tflops": 46.344641471695965, "iter_time": 0.4451667518615723, "loss": 0.39387211203575134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.19912630914819, "step_time": 0.4109851112365722} +{"epoch": 0, "iter": 12815, "iter_tflops": 50.72618818980359, "iter_time": 0.4067148399353027, "loss": 0.28728702664375305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.93504129358896, "step_time": 0.3755543460845947} +{"epoch": 0, "iter": 12816, "iter_tflops": 44.58442905934874, "iter_time": 0.4627421264648437, "loss": 0.2991638779640198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.182224252750096, "step_time": 0.4281888980865479} +{"epoch": 0, "iter": 12817, "iter_tflops": 7.510303810590237, "iter_time": 0.920402687072754, "loss": 0.004180110991001129, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 7.89451237664102, "step_time": 0.8756087112426758} +{"epoch": 0, "iter": 12818, "iter_tflops": 8.51338102889755, "iter_time": 0.8119575271606445, "loss": 0.010045948438346386, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 10.43206700395602, "step_time": 0.6626207256317139} +{"epoch": 0, "iter": 12819, "iter_tflops": 19.36168760252944, "iter_time": 0.35701969528198246, "loss": 0.0006733000045642257, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 21.27135522052738, "step_time": 0.32496772003173835} +{"epoch": 0, "iter": 12820, "iter_tflops": 20.899678142002685, "iter_time": 0.3307469024658203, "loss": 0.003456285921856761, "lr": 3e-05, "seqlen": 2800.0, "step_tflops": 23.0053979683162, "step_time": 0.30047312450408936} +{"epoch": 0, "iter": 12821, "iter_tflops": 30.902742748444933, "iter_time": 0.6676136703491211, "loss": 0.5169904828071594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.92579546956555, "step_time": 0.6265936241149903} +{"epoch": 0, "iter": 12822, "iter_tflops": 18.83685569917487, "iter_time": 1.095251449584961, "loss": 0.6232866644859314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.685416717241953, "step_time": 0.9973738403320314} +{"epoch": 0, "iter": 12823, "iter_tflops": 36.7927739172718, "iter_time": 0.5607376480102539, "loss": 0.4113543927669525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22065847485469, "step_time": 0.512947681427002} +{"epoch": 0, "iter": 12824, "iter_tflops": 39.98099075422279, "iter_time": 0.5160225677490234, "loss": 0.5734871029853821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.54780277851201, "step_time": 0.47375739288330077} +{"epoch": 0, "iter": 12825, "iter_tflops": 20.19823485227517, "iter_time": 1.0214305191040038, "loss": 0.02854052186012268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.54301616251674, "step_time": 0.9576696853637696} +{"epoch": 0, "iter": 12826, "iter_tflops": 20.311744054426473, "iter_time": 1.0157224044799806, "loss": 0.014103135094046593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.101063741443408, "step_time": 0.8930798053741456} +{"epoch": 0, "iter": 12827, "iter_tflops": 50.94730259352608, "iter_time": 0.40494967269897464, "loss": 0.01871144399046898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.579186792603245, "step_time": 0.3712017879486084} +{"epoch": 0, "iter": 12828, "iter_tflops": 53.642535212112286, "iter_time": 0.3846032524108887, "loss": 0.05956612899899483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.68498249496705, "step_time": 0.35155661010742184} +{"epoch": 0, "iter": 12829, "iter_tflops": 28.795519992683936, "iter_time": 0.716468864440918, "loss": 0.5721067786216736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.418873494711647, "step_time": 0.6782333183288575} +{"epoch": 0, "iter": 12830, "iter_tflops": 10.823523537331047, "iter_time": 1.9061346740722656, "loss": 0.7154043912887573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.309152369048661, "step_time": 1.6760775146484377} +{"epoch": 0, "iter": 12831, "iter_tflops": 13.879387830098686, "iter_time": 1.486455581665039, "loss": 0.5917425751686096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.0879605245879, "step_time": 1.2073467445373536} +{"epoch": 0, "iter": 12832, "iter_tflops": 27.73619524013069, "iter_time": 0.743832862854004, "loss": 0.5702629685401917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.806657105999264, "step_time": 0.6288691177368162} +{"epoch": 0, "iter": 12833, "iter_tflops": 19.861665189962146, "iter_time": 0.773215934753418, "loss": 0.3595944344997406, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 21.010965707141946, "step_time": 0.7309209976196289} +{"epoch": 0, "iter": 12834, "iter_tflops": 10.84400106506482, "iter_time": 1.416207534790039, "loss": 0.17846554517745972, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 13.647576090030807, "step_time": 1.1252808494567872} +{"epoch": 0, "iter": 12835, "iter_tflops": 25.768078460769424, "iter_time": 0.5959837493896485, "loss": 0.2793499231338501, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.775621633671733, "step_time": 0.5529077339172364} +{"epoch": 0, "iter": 12836, "iter_tflops": 25.426584902696103, "iter_time": 0.603988151550293, "loss": 0.21011857688426971, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.98354826624623, "step_time": 0.5691377525329591} +{"epoch": 0, "iter": 12837, "iter_tflops": 30.958457774049393, "iter_time": 0.6664121856689453, "loss": 0.6435294151306152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.971605268397205, "step_time": 0.6257230529785156} +{"epoch": 0, "iter": 12838, "iter_tflops": 32.72299480120447, "iter_time": 0.6304769363403321, "loss": 0.6480268836021423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.86283348843822, "step_time": 0.5752778434753418} +{"epoch": 0, "iter": 12839, "iter_tflops": 36.30808470707548, "iter_time": 0.5682231292724609, "loss": 0.6897560358047485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47745124270279, "step_time": 0.5226044960021973} +{"epoch": 0, "iter": 12840, "iter_tflops": 34.59282310840997, "iter_time": 0.5963980865478515, "loss": 0.7478156089782715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.57037773125978, "step_time": 0.5491319160461425} +{"epoch": 0, "iter": 12841, "iter_tflops": 16.81660498384523, "iter_time": 1.1996978759765624, "loss": 0.06479111313819885, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 17.709756769962237, "step_time": 1.1391938095092775} +{"epoch": 0, "iter": 12842, "iter_tflops": 18.796650626279717, "iter_time": 1.0733212890625001, "loss": 0.1130492240190506, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 26.21998146167332, "step_time": 0.7694454441070557} +{"epoch": 0, "iter": 12843, "iter_tflops": 53.60188248888223, "iter_time": 0.3763831481933594, "loss": 0.08743614703416824, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 58.45266422860291, "step_time": 0.3451484298706054} +{"epoch": 0, "iter": 12844, "iter_tflops": 47.694941530615225, "iter_time": 0.4229975891113281, "loss": 0.06227150186896324, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 51.64207004602357, "step_time": 0.3906668586730957} +{"epoch": 0, "iter": 12845, "iter_tflops": 16.940523361800903, "iter_time": 1.2178545532226561, "loss": 0.1401672661304474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.595345828893542, "step_time": 1.1725312881469727} +{"epoch": 0, "iter": 12846, "iter_tflops": 18.860486487195832, "iter_time": 1.0938791809082031, "loss": 0.16220571100711823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.267067199627597, "step_time": 0.9700958442687987} +{"epoch": 0, "iter": 12847, "iter_tflops": 50.034247368043026, "iter_time": 0.41233943939208983, "loss": 0.2938244640827179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.35423671912307, "step_time": 0.3795673484802246} +{"epoch": 0, "iter": 12848, "iter_tflops": 46.97579148172322, "iter_time": 0.4391856498718262, "loss": 0.2371402233839035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0725804934796, "step_time": 0.403956356048584} +{"epoch": 0, "iter": 12849, "iter_tflops": 21.128399142806106, "iter_time": 0.9764626922607422, "loss": 0.9229230284690857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.09368710071391, "step_time": 0.9338003845214844} +{"epoch": 0, "iter": 12850, "iter_tflops": 22.822616487969302, "iter_time": 0.9039758224487305, "loss": 0.628055989742279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.886566585083955, "step_time": 0.7398219299316405} +{"epoch": 0, "iter": 12851, "iter_tflops": 36.325477768017905, "iter_time": 0.567951057434082, "loss": 0.6659538149833679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.394207113117865, "step_time": 0.5237088146209717} +{"epoch": 0, "iter": 12852, "iter_tflops": 34.69250731183275, "iter_time": 0.5946844177246093, "loss": 0.6082226634025574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.21951174226774, "step_time": 0.5543085479736328} +{"epoch": 0, "iter": 12853, "iter_tflops": 27.128137853029152, "iter_time": 0.7605053329467774, "loss": 0.15309731662273407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.104133941592213, "step_time": 0.6853242664337158} +{"epoch": 0, "iter": 12854, "iter_tflops": 38.36908941321449, "iter_time": 0.5377008895874023, "loss": 0.2951107919216156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03595950628858, "step_time": 0.49079630279541014} +{"epoch": 0, "iter": 12855, "iter_tflops": 40.35950911898765, "iter_time": 0.5111829643249513, "loss": 0.1999209076166153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.12830101453269, "step_time": 0.4675252170562744} +{"epoch": 0, "iter": 12856, "iter_tflops": 40.24407804317742, "iter_time": 0.5126491775512696, "loss": 0.19211336970329285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03688075181258, "step_time": 0.4684957962036133} +{"epoch": 0, "iter": 12857, "iter_tflops": 32.91891238499929, "iter_time": 0.6267246398925781, "loss": 0.3633483052253723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.51046917866676, "step_time": 0.5650733604431152} +{"epoch": 0, "iter": 12858, "iter_tflops": 34.715419188242905, "iter_time": 0.5942919311523438, "loss": 0.3105473518371582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.29458338175978, "step_time": 0.5387470417022705} +{"epoch": 0, "iter": 12859, "iter_tflops": 39.17008400130673, "iter_time": 0.5267053680419921, "loss": 0.40287673473358154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94273141202427, "step_time": 0.48043272590637204} +{"epoch": 0, "iter": 12860, "iter_tflops": 40.186347432636836, "iter_time": 0.5133856353759766, "loss": 0.3102572560310364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80867185691897, "step_time": 0.4709362926483154} +{"epoch": 0, "iter": 12861, "iter_tflops": 18.257906644818213, "iter_time": 1.1299813232421874, "loss": 0.3929426074028015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.576413061600835, "step_time": 1.0538750610351562} +{"epoch": 0, "iter": 12862, "iter_tflops": 17.30035094211386, "iter_time": 1.1925245666503907, "loss": 0.3367593288421631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.180096893621908, "step_time": 0.9740792789459229} +{"epoch": 0, "iter": 12863, "iter_tflops": 38.517073279895484, "iter_time": 0.5356350250244141, "loss": 0.44751179218292236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.17012208398011, "step_time": 0.48923485374450687} +{"epoch": 0, "iter": 12864, "iter_tflops": 40.96934180837252, "iter_time": 0.5035739555358887, "loss": 0.3278440833091736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.63753630712249, "step_time": 0.4621915817260742} +{"epoch": 0, "iter": 12865, "iter_tflops": 31.435174320848223, "iter_time": 0.6563059997558593, "loss": 0.382904976606369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.84686747267027, "step_time": 0.5920501613616943} +{"epoch": 0, "iter": 12866, "iter_tflops": 37.69904668918275, "iter_time": 0.547257698059082, "loss": 0.4512786269187927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43189223675672, "step_time": 0.49795199775695803} +{"epoch": 0, "iter": 12867, "iter_tflops": 38.41678283810479, "iter_time": 0.5370333480834961, "loss": 0.4682130217552185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80137167868736, "step_time": 0.49355063438415525} +{"epoch": 0, "iter": 12868, "iter_tflops": 41.85402040592806, "iter_time": 0.4929297904968262, "loss": 0.43885546922683716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.494378151610995, "step_time": 0.4534866580963135} +{"epoch": 0, "iter": 12869, "iter_tflops": 17.736266416260644, "iter_time": 1.163215133666992, "loss": 0.012606551870703697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.815428490194556, "step_time": 1.0964987335205076} +{"epoch": 0, "iter": 12870, "iter_tflops": 17.04115337287209, "iter_time": 1.2106629791259766, "loss": 0.02074618637561798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.551760984013058, "step_time": 0.7488121547698975} +{"epoch": 0, "iter": 12871, "iter_tflops": 43.15661704186229, "iter_time": 0.47805168533325193, "loss": 0.039051156491041183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.36633809473597, "step_time": 0.43556446075439453} +{"epoch": 0, "iter": 12872, "iter_tflops": 37.59224574980022, "iter_time": 0.5488124771118164, "loss": 0.010692555457353592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37164059548756, "step_time": 0.49867719078063966} +{"epoch": 0, "iter": 12873, "iter_tflops": 21.225632426379562, "iter_time": 0.9719895782470702, "loss": 0.48757120966911316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.763054728083322, "step_time": 0.906341163635254} +{"epoch": 0, "iter": 12874, "iter_tflops": 13.793694234645553, "iter_time": 1.4956902160644532, "loss": 0.5766990184783936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.927399073963239, "step_time": 1.2953209381103516} +{"epoch": 0, "iter": 12875, "iter_tflops": 46.36786358856524, "iter_time": 0.44494380187988286, "loss": 0.6979752779006958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.307586627891396, "step_time": 0.4100990505218506} +{"epoch": 0, "iter": 12876, "iter_tflops": 44.59472144362972, "iter_time": 0.4626353263854981, "loss": 0.4423184394836426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24282673247183, "step_time": 0.42765100860595706} +{"epoch": 0, "iter": 12877, "iter_tflops": 30.759029708767248, "iter_time": 0.67073291015625, "loss": 0.0048485505394637585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.775024310139656, "step_time": 0.6294760704040527} +{"epoch": 0, "iter": 12878, "iter_tflops": 10.211491013454554, "iter_time": 2.020380126953125, "loss": 0.007450522854924202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.302402162205896, "step_time": 1.6769971618652344} +{"epoch": 0, "iter": 12879, "iter_tflops": 13.753476966686696, "iter_time": 1.5000638427734376, "loss": 0.0012011739891022444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.01859911195406, "step_time": 1.2879461784362793} +{"epoch": 0, "iter": 12880, "iter_tflops": 41.7771527726559, "iter_time": 0.49383675384521486, "loss": 0.002613915130496025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22036712930135, "step_time": 0.4463636875152588} +{"epoch": 0, "iter": 12881, "iter_tflops": 21.051952829304895, "iter_time": 0.7489368286132811, "loss": 0.2342076599597931, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 22.962384069801736, "step_time": 0.6866265602111816} +{"epoch": 0, "iter": 12882, "iter_tflops": 23.691701498665832, "iter_time": 0.6654896774291992, "loss": 0.18364740908145905, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 25.416641149179974, "step_time": 0.6203251914978027} +{"epoch": 0, "iter": 12883, "iter_tflops": 23.129023905268742, "iter_time": 0.681679557800293, "loss": 0.1413680762052536, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 24.990976793791923, "step_time": 0.6308910179138184} +{"epoch": 0, "iter": 12884, "iter_tflops": 24.92375959085841, "iter_time": 0.6325924758911132, "loss": 0.2956324815750122, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 26.660959791584045, "step_time": 0.5913734130859375} +{"epoch": 0, "iter": 12885, "iter_tflops": 21.48446927740772, "iter_time": 0.9602794113159179, "loss": 0.6794439554214478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.175667667415052, "step_time": 0.8902049255371094} +{"epoch": 0, "iter": 12886, "iter_tflops": 15.256582253792796, "iter_time": 1.3522749176025388, "loss": 0.6766980886459351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.00841794099047, "step_time": 1.0311206798553467} +{"epoch": 0, "iter": 12887, "iter_tflops": 44.47586445709255, "iter_time": 0.4638716697692871, "loss": 0.6110755205154419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76150133533993, "step_time": 0.43196074104309085} +{"epoch": 0, "iter": 12888, "iter_tflops": 43.31811807344805, "iter_time": 0.47626938629150395, "loss": 0.6937615871429443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90314449500592, "step_time": 0.4398658924102783} +{"epoch": 0, "iter": 12889, "iter_tflops": 33.71542683459317, "iter_time": 0.6119185028076172, "loss": 0.06985527276992798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.93592557364482, "step_time": 0.5741077537536621} +{"epoch": 0, "iter": 12890, "iter_tflops": 13.860465636963506, "iter_time": 1.488484878540039, "loss": 0.03626169264316559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.19618092784192, "step_time": 1.2738245887756345} +{"epoch": 0, "iter": 12891, "iter_tflops": 40.99997791015644, "iter_time": 0.5031976737976074, "loss": 0.09664075821638107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.30959162348457, "step_time": 0.38700528144836427} +{"epoch": 0, "iter": 12892, "iter_tflops": 52.626170355134974, "iter_time": 0.392031063079834, "loss": 0.0440511628985405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.37846643587927, "step_time": 0.3595616054534912} +{"epoch": 0, "iter": 12893, "iter_tflops": 25.558804352563918, "iter_time": 0.807201042175293, "loss": 0.0475102961063385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.942896940127056, "step_time": 0.7657340469360351} +{"epoch": 0, "iter": 12894, "iter_tflops": 16.019152287783363, "iter_time": 1.2879017028808595, "loss": 0.017380498349666595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.955488052857515, "step_time": 0.9845198287963868} +{"epoch": 0, "iter": 12895, "iter_tflops": 48.78215239683048, "iter_time": 0.4229229850769043, "loss": 0.03610248491168022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.12936642958333, "step_time": 0.38831807899475096} +{"epoch": 0, "iter": 12896, "iter_tflops": 52.94996457488291, "iter_time": 0.3896337547302246, "loss": 0.011433242820203304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.588424225374446, "step_time": 0.3582507038116455} +{"epoch": 0, "iter": 12897, "iter_tflops": 34.14203917071121, "iter_time": 0.604272445678711, "loss": 0.6161763668060303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.61560074359694, "step_time": 0.5634509086608886} +{"epoch": 0, "iter": 12898, "iter_tflops": 31.05900380371902, "iter_time": 0.6642548370361329, "loss": 0.6133314967155457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.26294853675285, "step_time": 0.6021400489807129} +{"epoch": 0, "iter": 12899, "iter_tflops": 38.493637734681506, "iter_time": 0.5359611282348633, "loss": 0.6025241613388062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74001371854768, "step_time": 0.4942761554718017} +{"epoch": 0, "iter": 12900, "iter_tflops": 35.91562295640238, "iter_time": 0.5744322891235352, "loss": 0.6658126711845398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.8666931621386, "step_time": 0.5308167953491211} +{"epoch": 0, "iter": 12901, "iter_tflops": 20.392123930155734, "iter_time": 1.0117187194824218, "loss": 0.006687760818749666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.76624069178838, "step_time": 0.9478482666015625} +{"epoch": 0, "iter": 12902, "iter_tflops": 18.12552654823328, "iter_time": 1.138234161376953, "loss": 0.00339365447871387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.49411104011811, "step_time": 0.9171775436401367} +{"epoch": 0, "iter": 12903, "iter_tflops": 43.568649259186635, "iter_time": 0.4735307121276856, "loss": 0.015462537296116352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19035285148646, "step_time": 0.42811667251586916} +{"epoch": 0, "iter": 12904, "iter_tflops": 45.152700989303675, "iter_time": 0.45691825866699215, "loss": 0.006745346821844578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73276691320896, "step_time": 0.4148390445709228} +{"epoch": 0, "iter": 12905, "iter_tflops": 19.965364256304408, "iter_time": 1.0333442077636719, "loss": 0.6430837512016296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.303433727343865, "step_time": 0.9684398193359375} +{"epoch": 0, "iter": 12906, "iter_tflops": 25.462813992607213, "iter_time": 0.8102440490722655, "loss": 0.6162155270576477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.383273669407334, "step_time": 0.7268750514984131} +{"epoch": 0, "iter": 12907, "iter_tflops": 40.40531818974289, "iter_time": 0.5106034164428712, "loss": 0.5642498135566711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33507812322585, "step_time": 0.47608298873901367} +{"epoch": 0, "iter": 12908, "iter_tflops": 42.89948382625481, "iter_time": 0.48091705703735355, "loss": 0.6012201905250549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.487331996670534, "step_time": 0.44380033493041987} +{"epoch": 0, "iter": 12909, "iter_tflops": 18.895364982956426, "iter_time": 1.0918600158691407, "loss": 0.6583344340324402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.686192898296493, "step_time": 1.0479981384277344} +{"epoch": 0, "iter": 12910, "iter_tflops": 18.200662186319633, "iter_time": 1.1335353240966797, "loss": 0.7038866281509399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.08278237360471, "step_time": 0.8937871170043945} +{"epoch": 0, "iter": 12911, "iter_tflops": 41.595546420718236, "iter_time": 0.49599284744262695, "loss": 0.9087852239608765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71915231488974, "step_time": 0.46134804534912116} +{"epoch": 0, "iter": 12912, "iter_tflops": 47.571340395531706, "iter_time": 0.43368745422363275, "loss": 0.7145388126373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.23972829741915, "step_time": 0.40263862037658693} +{"epoch": 0, "iter": 12913, "iter_tflops": 25.03352354542536, "iter_time": 0.8241386184692384, "loss": 0.4987964630126953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.275989425211147, "step_time": 0.7851690444946289} +{"epoch": 0, "iter": 12914, "iter_tflops": 13.126394511710494, "iter_time": 1.5717258453369143, "loss": 0.5432918071746826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.930124731727478, "step_time": 1.38184334564209} +{"epoch": 0, "iter": 12915, "iter_tflops": 49.00327432953099, "iter_time": 0.421014591217041, "loss": 0.5985256433486938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.390419464127895, "step_time": 0.38641939353942867} +{"epoch": 0, "iter": 12916, "iter_tflops": 49.656167991604484, "iter_time": 0.41547896957397457, "loss": 0.49594172835350037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.67230742736774, "step_time": 0.38438991165161135} +{"epoch": 0, "iter": 12917, "iter_tflops": 14.973644526654647, "iter_time": 0.8351995162963867, "loss": 0.003622655291110277, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 15.822810980245583, "step_time": 0.7903766708374024} +{"epoch": 0, "iter": 12918, "iter_tflops": 7.889044570021901, "iter_time": 1.5852338714599608, "loss": 0.0035116805229336023, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 9.618773477883842, "step_time": 1.3001637573242188} +{"epoch": 0, "iter": 12919, "iter_tflops": 30.695476076540636, "iter_time": 0.40742097091674806, "loss": 0.003502601059153676, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 34.246344823529675, "step_time": 0.36517709350585936} +{"epoch": 0, "iter": 12920, "iter_tflops": 34.87483069754533, "iter_time": 0.35859616851806636, "loss": 0.02006182074546814, "lr": 3e-05, "seqlen": 5024.0, "step_tflops": 38.112141843547825, "step_time": 0.3281363906860352} +{"epoch": 0, "iter": 12921, "iter_tflops": 24.39775623612636, "iter_time": 0.8456143798828124, "loss": 0.0018041374860331416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.70102404170312, "step_time": 0.8027342987060547} +{"epoch": 0, "iter": 12922, "iter_tflops": 12.611766214977063, "iter_time": 1.6358607635498048, "loss": 0.011391152627766132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.074037628402962, "step_time": 1.2083312664031982} +{"epoch": 0, "iter": 12923, "iter_tflops": 46.28215066149461, "iter_time": 0.445767822265625, "loss": 0.002035473706200719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.241130823338814, "step_time": 0.40262759971618656} +{"epoch": 0, "iter": 12924, "iter_tflops": 43.759487189645604, "iter_time": 0.47146561431884765, "loss": 0.017323672771453857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26263894486885, "step_time": 0.42747545433044437} +{"epoch": 0, "iter": 12925, "iter_tflops": 18.188592260831463, "iter_time": 1.1342875366210936, "loss": 0.7131164073944092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.297371475177606, "step_time": 1.0691141815185545} +{"epoch": 0, "iter": 12926, "iter_tflops": 20.85228198494391, "iter_time": 0.9893926010131836, "loss": 0.6285938024520874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.869244791152905, "step_time": 0.7402817573547364} +{"epoch": 0, "iter": 12927, "iter_tflops": 43.68671610501491, "iter_time": 0.47225095748901375, "loss": 0.6454218029975891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.04153160885549, "step_time": 0.438571891784668} +{"epoch": 0, "iter": 12928, "iter_tflops": 43.56376377339801, "iter_time": 0.4735838165283203, "loss": 0.6735014915466309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79965865531903, "step_time": 0.4408385467529297} +{"epoch": 0, "iter": 12929, "iter_tflops": 31.33969724293135, "iter_time": 0.6583054504394531, "loss": 0.7464404702186584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.30789389266744, "step_time": 0.6194055252075197} +{"epoch": 0, "iter": 12930, "iter_tflops": 21.973369453582286, "iter_time": 0.9389135131835937, "loss": 0.7637462615966797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.02866865603912, "step_time": 0.6870465602874756} +{"epoch": 0, "iter": 12931, "iter_tflops": 42.655986478870965, "iter_time": 0.48366232299804685, "loss": 0.8028948903083801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.96610540432907, "step_time": 0.44883275032043457} +{"epoch": 0, "iter": 12932, "iter_tflops": 43.01344617912968, "iter_time": 0.47964288711547853, "loss": 0.6355462670326233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15862896299691, "step_time": 0.44696070861816406} +{"epoch": 0, "iter": 12933, "iter_tflops": 38.05975129611387, "iter_time": 0.5420711593627929, "loss": 0.721535861492157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.105195293156676, "step_time": 0.5019096336364747} +{"epoch": 0, "iter": 12934, "iter_tflops": 46.114552374481185, "iter_time": 0.4473879165649414, "loss": 0.7368614673614502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.953892363956896, "step_time": 0.41300272178649905} +{"epoch": 0, "iter": 12935, "iter_tflops": 47.07595177364989, "iter_time": 0.4382512245178222, "loss": 0.6169700622558594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.04569629521489, "step_time": 0.4041691074371338} +{"epoch": 0, "iter": 12936, "iter_tflops": 44.77512954675244, "iter_time": 0.4607712745666504, "loss": 0.7906503081321716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66482311886148, "step_time": 0.42394263839721685} +{"epoch": 0, "iter": 12937, "iter_tflops": 28.103112547406234, "iter_time": 0.7341212997436524, "loss": 0.7251206040382385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.859392032661024, "step_time": 0.6909415130615235} +{"epoch": 0, "iter": 12938, "iter_tflops": 11.951586894453825, "iter_time": 1.7262221069335937, "loss": 0.7681712508201599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.564264520155039, "step_time": 1.416555809020996} +{"epoch": 0, "iter": 12939, "iter_tflops": 43.846602914813936, "iter_time": 0.4705288925170899, "loss": 0.7367146611213684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.79834757537416, "step_time": 0.4316277561187744} +{"epoch": 0, "iter": 12940, "iter_tflops": 45.18941892627678, "iter_time": 0.4565469970703125, "loss": 0.7489691376686096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62453267713718, "step_time": 0.4242939186096191} +{"epoch": 0, "iter": 12941, "iter_tflops": 38.872951925154005, "iter_time": 0.530731330871582, "loss": 0.3747994303703308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.92040083103241, "step_time": 0.49214924240112307} +{"epoch": 0, "iter": 12942, "iter_tflops": 35.798054486757984, "iter_time": 0.57631884765625, "loss": 0.3300556540489197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.927387395972346, "step_time": 0.5040901660919189} +{"epoch": 0, "iter": 12943, "iter_tflops": 36.9963203621858, "iter_time": 0.5576525802612304, "loss": 0.3787223696708679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.60653607878869, "step_time": 0.5080732192993164} +{"epoch": 0, "iter": 12944, "iter_tflops": 39.113188393651484, "iter_time": 0.5274715347290039, "loss": 0.4741649627685547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.770268745488615, "step_time": 0.48236997604370124} +{"epoch": 0, "iter": 12945, "iter_tflops": 24.256005395582413, "iter_time": 0.8505561065673829, "loss": 0.006123004946857691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.040662964862076, "step_time": 0.7922645263671874} +{"epoch": 0, "iter": 12946, "iter_tflops": 9.309666265714409, "iter_time": 2.216093780517578, "loss": 0.007055680267512798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.814815399481358, "step_time": 1.9076695022583008} +{"epoch": 0, "iter": 12947, "iter_tflops": 11.181283605503582, "iter_time": 1.8451453552246095, "loss": 0.006639633793383837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.311793492726515, "step_time": 1.5498357543945311} +{"epoch": 0, "iter": 12948, "iter_tflops": 40.9929248159915, "iter_time": 0.503284252166748, "loss": 0.0020452830940485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.805661186309266, "step_time": 0.35083515930175785} +{"epoch": 0, "iter": 12949, "iter_tflops": 20.605366098249185, "iter_time": 0.719524543762207, "loss": 0.2905499041080475, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 21.90349440022643, "step_time": 0.6768813400268555} +{"epoch": 0, "iter": 12950, "iter_tflops": 13.783887314221312, "iter_time": 1.0756085205078125, "loss": 0.2744998633861542, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 16.24101888321367, "step_time": 0.9128778648376464} +{"epoch": 0, "iter": 12951, "iter_tflops": 26.737593601085038, "iter_time": 0.5545026550292969, "loss": 0.16278310120105743, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.46943203183565, "step_time": 0.5207714233398437} +{"epoch": 0, "iter": 12952, "iter_tflops": 25.413956599102555, "iter_time": 0.5833828582763672, "loss": 0.3379252552986145, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.984049934537, "step_time": 0.5494381561279297} +{"epoch": 0, "iter": 12953, "iter_tflops": 34.9864788372377, "iter_time": 0.5896876220703124, "loss": 0.12828321754932404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.69828489437854, "step_time": 0.5472687568664552} +{"epoch": 0, "iter": 12954, "iter_tflops": 29.75168088575585, "iter_time": 0.69344295501709, "loss": 0.13357968628406525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.7588421228483, "step_time": 0.5612552604675293} +{"epoch": 0, "iter": 12955, "iter_tflops": 49.1441145745529, "iter_time": 0.41980802154541014, "loss": 0.07259087264537811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.652487548360334, "step_time": 0.3845319099426269} +{"epoch": 0, "iter": 12956, "iter_tflops": 50.00517106269224, "iter_time": 0.4125792007446289, "loss": 0.12123264372348785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02923503516543, "step_time": 0.3818505573272705} +{"epoch": 0, "iter": 12957, "iter_tflops": 31.18283587745583, "iter_time": 0.6616169738769531, "loss": 0.25786179304122925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.30730558616088, "step_time": 0.6194164657592774} +{"epoch": 0, "iter": 12958, "iter_tflops": 16.1759122006836, "iter_time": 1.2754207153320312, "loss": 0.24546606838703156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.981827175334075, "step_time": 1.147330207824707} +{"epoch": 0, "iter": 12959, "iter_tflops": 52.79803063503255, "iter_time": 0.3907549819946289, "loss": 0.2834177613258362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.55729435869493, "step_time": 0.3584444637298584} +{"epoch": 0, "iter": 12960, "iter_tflops": 54.85863569632632, "iter_time": 0.3760774078369141, "loss": 0.26061874628067017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.45080373169078, "step_time": 0.34702799987792965} +{"epoch": 0, "iter": 12961, "iter_tflops": 30.862883088154202, "iter_time": 0.6684758987426758, "loss": 0.3636123538017273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.90835376404472, "step_time": 0.626925724029541} +{"epoch": 0, "iter": 12962, "iter_tflops": 12.240105012858985, "iter_time": 1.6855323944091798, "loss": 0.5878214836120605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.071789560585422, "step_time": 1.4661314697265628} +{"epoch": 0, "iter": 12963, "iter_tflops": 16.936292117169792, "iter_time": 1.2181588134765624, "loss": 0.5218132734298706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.780608901905673, "step_time": 1.0985316619873047} +{"epoch": 0, "iter": 12964, "iter_tflops": 27.75270670059628, "iter_time": 0.7433903198242188, "loss": 0.3975968062877655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.417318078019925, "step_time": 0.5234017562866211} +{"epoch": 0, "iter": 12965, "iter_tflops": 18.523586427516058, "iter_time": 0.7497493362426757, "loss": 0.23460161685943604, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 19.66585819788303, "step_time": 0.7062008934020997} +{"epoch": 0, "iter": 12966, "iter_tflops": 8.684198332762726, "iter_time": 1.5992318572998048, "loss": 0.2843822240829468, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 10.076786286700097, "step_time": 1.378221809387207} +{"epoch": 0, "iter": 12967, "iter_tflops": 21.028979417186466, "iter_time": 0.6604241867065429, "loss": 0.25534549355506897, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 22.711337613669734, "step_time": 0.6115028038024903} +{"epoch": 0, "iter": 12968, "iter_tflops": 21.28857981782217, "iter_time": 0.6523707427978516, "loss": 0.15050673484802246, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 22.789968096249744, "step_time": 0.6093929824829102} +{"epoch": 0, "iter": 12969, "iter_tflops": 23.179991134015935, "iter_time": 0.8900388870239258, "loss": 0.020375337451696396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.761831201026734, "step_time": 0.8331812515258789} +{"epoch": 0, "iter": 12970, "iter_tflops": 9.410917565330784, "iter_time": 2.192251007080078, "loss": 0.07292851060628891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.243979062220557, "step_time": 1.6849990844726561} +{"epoch": 0, "iter": 12971, "iter_tflops": 15.469621383336365, "iter_time": 1.3336521301269533, "loss": 0.025627022609114647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.604501298813307, "step_time": 1.0523651275634767} +{"epoch": 0, "iter": 12972, "iter_tflops": 29.48335211363817, "iter_time": 0.6997539978027344, "loss": 0.03451940789818764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.454485838200306, "step_time": 0.49768060302734374} +{"epoch": 0, "iter": 12973, "iter_tflops": 19.253934851991264, "iter_time": 0.8273844146728514, "loss": 0.18955866992473602, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 20.285612298173763, "step_time": 0.7853056335449219} +{"epoch": 0, "iter": 12974, "iter_tflops": 28.768566533279948, "iter_time": 0.5537434616088867, "loss": 0.15038849413394928, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.688127986437113, "step_time": 0.5191064643859863} +{"epoch": 0, "iter": 12975, "iter_tflops": 28.846415803983486, "iter_time": 0.5522490463256835, "loss": 0.3511632978916168, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.79483447876578, "step_time": 0.5173077201843261} +{"epoch": 0, "iter": 12976, "iter_tflops": 30.139303135646937, "iter_time": 0.5285591888427735, "loss": 0.16970524191856384, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 32.0119818309551, "step_time": 0.49763884353637694} +{"epoch": 0, "iter": 12977, "iter_tflops": 31.698056785911827, "iter_time": 0.6508630371093751, "loss": 0.5366615056991577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.6838891547891, "step_time": 0.6124914321899414} +{"epoch": 0, "iter": 12978, "iter_tflops": 13.134825288230786, "iter_time": 1.570717010498047, "loss": 0.4254717528820038, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.116950127722884, "step_time": 1.2800867004394532} +{"epoch": 0, "iter": 12979, "iter_tflops": 36.541778843208355, "iter_time": 0.5645891952514648, "loss": 0.4865451157093048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9916397911137, "step_time": 0.5158851604461671} +{"epoch": 0, "iter": 12980, "iter_tflops": 37.43572222853399, "iter_time": 0.5511071319580079, "loss": 0.4894700050354004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.84705782191392, "step_time": 0.5050815067291259} +{"epoch": 0, "iter": 12981, "iter_tflops": 17.25717471242195, "iter_time": 1.1955081787109374, "loss": 0.014157017692923546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.29605799218823, "step_time": 1.1276250610351561} +{"epoch": 0, "iter": 12982, "iter_tflops": 25.430253783944487, "iter_time": 0.8112814636230469, "loss": 0.008695503696799278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.63621441988358, "step_time": 0.578936170578003} +{"epoch": 0, "iter": 12983, "iter_tflops": 57.57128187160739, "iter_time": 0.3583573760986328, "loss": 0.009565788321197033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.92092590042296, "step_time": 0.3278892230987549} +{"epoch": 0, "iter": 12984, "iter_tflops": 59.720284934765836, "iter_time": 0.34546207427978515, "loss": 0.0068667177110910416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.50568184382915, "step_time": 0.31495120620727535} +{"epoch": 0, "iter": 12985, "iter_tflops": 38.20429239540851, "iter_time": 0.5400203018188476, "loss": 0.6140482425689697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4955023458022, "step_time": 0.4971886672973633} +{"epoch": 0, "iter": 12986, "iter_tflops": 44.230590380293286, "iter_time": 0.4664440002441407, "loss": 0.41827550530433655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69220321070054, "step_time": 0.4325883922576904} +{"epoch": 0, "iter": 12987, "iter_tflops": 46.08500187484729, "iter_time": 0.44767478942871086, "loss": 0.5490632653236389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.74626755181182, "step_time": 0.41472646141052244} +{"epoch": 0, "iter": 12988, "iter_tflops": 42.913963727301095, "iter_time": 0.48075478744506833, "loss": 0.8276956677436829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15073216167947, "step_time": 0.44703718757629396} +{"epoch": 0, "iter": 12989, "iter_tflops": 24.999350910730925, "iter_time": 0.8252651672363281, "loss": 0.19422022998332977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.3326423714736, "step_time": 0.7834798049926758} +{"epoch": 0, "iter": 12990, "iter_tflops": 12.529049628487735, "iter_time": 1.646660690307617, "loss": 0.23940955102443695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.73689293752443, "step_time": 1.399962230682373} +{"epoch": 0, "iter": 12991, "iter_tflops": 49.07935800891493, "iter_time": 0.4203619270324708, "loss": 0.3040771782398224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.442190278716595, "step_time": 0.38604505920410154} +{"epoch": 0, "iter": 12992, "iter_tflops": 49.4419813164999, "iter_time": 0.41727885818481447, "loss": 0.20104163885116577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2097663187737, "step_time": 0.38773133087158207} +{"epoch": 0, "iter": 12993, "iter_tflops": 38.37359770663088, "iter_time": 0.5376377182006835, "loss": 0.11568541824817657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.176475161331744, "step_time": 0.501040786743164} +{"epoch": 0, "iter": 12994, "iter_tflops": 22.277995959117394, "iter_time": 0.9260749282836914, "loss": 0.09346375614404678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.138236458173363, "step_time": 0.7602223358154298} +{"epoch": 0, "iter": 12995, "iter_tflops": 48.54089391465557, "iter_time": 0.4250250015258789, "loss": 0.09718450158834457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6320971167842, "step_time": 0.39198691749572756} +{"epoch": 0, "iter": 12996, "iter_tflops": 48.936717454320835, "iter_time": 0.42158719635009767, "loss": 0.08416371047496796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.00443129580978, "step_time": 0.38923337173461914} +{"epoch": 0, "iter": 12997, "iter_tflops": 38.720808973444655, "iter_time": 0.5328166961669922, "loss": 0.16874779760837555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5666464048063, "step_time": 0.4963376960754395} +{"epoch": 0, "iter": 12998, "iter_tflops": 12.19705378223144, "iter_time": 1.6914817199707033, "loss": 0.1967698037624359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.049673480411432, "step_time": 1.3708665199279786} +{"epoch": 0, "iter": 12999, "iter_tflops": 46.72957996256419, "iter_time": 0.4414996566772461, "loss": 0.14743958413600922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69683198427282, "step_time": 0.40695034980773925} +{"epoch": 0, "iter": 13000, "iter_tflops": 47.29946674925155, "iter_time": 0.4361802558898926, "loss": 0.18910259008407593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.95526687949095, "step_time": 0.4048863792419434} +{"epoch": 0, "iter": 13001, "iter_tflops": 25.130360869499196, "iter_time": 0.8209628829956054, "loss": 0.6050912141799927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.523540497736576, "step_time": 0.7778408584594727} +{"epoch": 0, "iter": 13002, "iter_tflops": 15.374610644412275, "iter_time": 1.3418937225341796, "loss": 0.49881991744041443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.4746468377539, "step_time": 1.0593821640014647} +{"epoch": 0, "iter": 13003, "iter_tflops": 39.19027340237009, "iter_time": 0.5264340286254883, "loss": 0.7580114603042603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.02665115196514, "step_time": 0.47949568367004397} +{"epoch": 0, "iter": 13004, "iter_tflops": 42.45030622686757, "iter_time": 0.48600576400756834, "loss": 0.47115403413772583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13345378556208, "step_time": 0.4472046165466309} +{"epoch": 0, "iter": 13005, "iter_tflops": 2.823181251161992, "iter_time": 0.5968832397460937, "loss": 0.19670070707798004, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.117287095904965, "step_time": 0.540569257736206} +{"epoch": 0, "iter": 13006, "iter_tflops": 2.9520646140040485, "iter_time": 0.5708240814208985, "loss": 0.15893568098545074, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.260628545095457, "step_time": 0.51680513381958} +{"epoch": 0, "iter": 13007, "iter_tflops": 3.158025476604626, "iter_time": 0.5335959396362305, "loss": 0.1723077893257141, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.4551873543107794, "step_time": 0.4877042541503906} +{"epoch": 0, "iter": 13008, "iter_tflops": 3.047898904273005, "iter_time": 0.5528758087158203, "loss": 0.4741289019584656, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 3.3325000112372973, "step_time": 0.5056592845916749} +{"epoch": 0, "iter": 13009, "iter_tflops": 20.959218038255084, "iter_time": 0.9843446197509765, "loss": 0.5408703088760376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.591808023287943, "step_time": 0.9132112617492676} +{"epoch": 0, "iter": 13010, "iter_tflops": 19.90880072521499, "iter_time": 1.0362800750732422, "loss": 0.6378013491630554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.738977280156778, "step_time": 0.8690809745788575} +{"epoch": 0, "iter": 13011, "iter_tflops": 35.03793039672148, "iter_time": 0.5888216934204102, "loss": 0.39724525809288025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.397063645576246, "step_time": 0.5373091468811035} +{"epoch": 0, "iter": 13012, "iter_tflops": 35.69311975583109, "iter_time": 0.5780131759643554, "loss": 0.6120284199714661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.84848970775802, "step_time": 0.531065523147583} +{"epoch": 0, "iter": 13013, "iter_tflops": 20.349539485200978, "iter_time": 1.0138358917236328, "loss": 0.1880422979593277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.924851529257197, "step_time": 0.9409912528991698} +{"epoch": 0, "iter": 13014, "iter_tflops": 20.234061494031604, "iter_time": 1.0196219635009767, "loss": 0.16910529136657715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.893900510129107, "step_time": 0.7671290931701661} +{"epoch": 0, "iter": 13015, "iter_tflops": 50.35044878882196, "iter_time": 0.409749942779541, "loss": 0.2548520863056183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.79499472561437, "step_time": 0.37651419830322264} +{"epoch": 0, "iter": 13016, "iter_tflops": 52.4345349873402, "iter_time": 0.3934638404846192, "loss": 0.20712341368198395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.918206156065686, "step_time": 0.36246914482116693} +{"epoch": 0, "iter": 13017, "iter_tflops": 36.65205868606604, "iter_time": 0.5628904418945313, "loss": 0.7312133312225342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29407353232854, "step_time": 0.5250433883666992} +{"epoch": 0, "iter": 13018, "iter_tflops": 14.789328741597883, "iter_time": 1.3949986419677736, "loss": 0.5431820154190063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.60568693594824, "step_time": 1.1088595428466799} +{"epoch": 0, "iter": 13019, "iter_tflops": 42.54742122431745, "iter_time": 0.48489645004272464, "loss": 0.7271597981452942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72966653372068, "step_time": 0.45115337753295903} +{"epoch": 0, "iter": 13020, "iter_tflops": 43.87329439393117, "iter_time": 0.4702426338195801, "loss": 0.6446411609649658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.996673243757016, "step_time": 0.4389905090332031} +{"epoch": 0, "iter": 13021, "iter_tflops": 34.91116634694952, "iter_time": 0.5909597320556641, "loss": 0.04795524477958679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.41804318856461, "step_time": 0.5513675155639649} +{"epoch": 0, "iter": 13022, "iter_tflops": 42.004657281396845, "iter_time": 0.4911620483398438, "loss": 0.03808309882879257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82556983331635, "step_time": 0.4405946063995361} +{"epoch": 0, "iter": 13023, "iter_tflops": 44.03490083107408, "iter_time": 0.46851686096191403, "loss": 0.013346602208912373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66251947217214, "step_time": 0.4239627075195312} +{"epoch": 0, "iter": 13024, "iter_tflops": 48.561292945439526, "iter_time": 0.42484646224975586, "loss": 0.04920440539717674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23181578253641, "step_time": 0.38757072639465334} +{"epoch": 0, "iter": 13025, "iter_tflops": 34.4324999246718, "iter_time": 0.5991750106811524, "loss": 0.18345387279987335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.325417136298086, "step_time": 0.538313606262207} +{"epoch": 0, "iter": 13026, "iter_tflops": 44.318473075667164, "iter_time": 0.4655190505981445, "loss": 0.2201615571975708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.26790184597728, "step_time": 0.4274288444519043} +{"epoch": 0, "iter": 13027, "iter_tflops": 48.4120774772213, "iter_time": 0.4261559219360352, "loss": 0.1176743134856224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.28970942079474, "step_time": 0.39455360794067385} +{"epoch": 0, "iter": 13028, "iter_tflops": 49.20711927296081, "iter_time": 0.41927050018310547, "loss": 0.12099132686853409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.191324932441404, "step_time": 0.3878657569885254} +{"epoch": 0, "iter": 13029, "iter_tflops": 27.086884553523657, "iter_time": 0.7616635818481445, "loss": 0.7671315670013428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.675222939018735, "step_time": 0.7194745635986328} +{"epoch": 0, "iter": 13030, "iter_tflops": 12.13350208094667, "iter_time": 1.7003412017822266, "loss": 0.7726110219955444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.82988967925364, "step_time": 1.4917757110595702} +{"epoch": 0, "iter": 13031, "iter_tflops": 13.02793167513681, "iter_time": 1.5836046752929687, "loss": 0.709766685962677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.14125804585225, "step_time": 1.2781589546203613} +{"epoch": 0, "iter": 13032, "iter_tflops": 42.699767243262535, "iter_time": 0.4831664161682128, "loss": 0.581449568271637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06755273122599, "step_time": 0.4478443565368652} +{"epoch": 0, "iter": 13033, "iter_tflops": 23.82493811816699, "iter_time": 0.6000256805419921, "loss": 0.25488513708114624, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 25.696165414093485, "step_time": 0.5563310508728028} +{"epoch": 0, "iter": 13034, "iter_tflops": 25.259459269047973, "iter_time": 0.5659493560791017, "loss": 0.3141714036464691, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 26.97495931630063, "step_time": 0.5299572296142578} +{"epoch": 0, "iter": 13035, "iter_tflops": 23.794734277824418, "iter_time": 0.6007873229980469, "loss": 0.16464222967624664, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 25.256503772363306, "step_time": 0.56601558303833} +{"epoch": 0, "iter": 13036, "iter_tflops": 25.573162684444963, "iter_time": 0.5590069122314453, "loss": 0.19124102592468262, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 27.205856716383604, "step_time": 0.5254594573974609} +{"epoch": 0, "iter": 13037, "iter_tflops": 30.290554485926318, "iter_time": 0.6811064987182617, "loss": 0.20081046223640442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.455494202327586, "step_time": 0.6356733741760254} +{"epoch": 0, "iter": 13038, "iter_tflops": 12.697794644408917, "iter_time": 1.6247776947021486, "loss": 0.20045243203639984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.125851260725728, "step_time": 1.2793801193237304} +{"epoch": 0, "iter": 13039, "iter_tflops": 22.54103860877935, "iter_time": 0.915268096923828, "loss": 0.1709701120853424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.968727452038912, "step_time": 0.8262773323059082} +{"epoch": 0, "iter": 13040, "iter_tflops": 52.19057444441618, "iter_time": 0.39530305480957034, "loss": 0.24779723584651947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.62900783385344, "step_time": 0.3643202362060547} +{"epoch": 0, "iter": 13041, "iter_tflops": 19.196729080298013, "iter_time": 0.765941665649414, "loss": 0.2394537478685379, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 20.305864462875544, "step_time": 0.7241048355102538} +{"epoch": 0, "iter": 13042, "iter_tflops": 7.931779060432467, "iter_time": 1.8537549438476564, "loss": 0.16198556125164032, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 9.742171544389535, "step_time": 1.5092707595825199} +{"epoch": 0, "iter": 13043, "iter_tflops": 6.5822364428121185, "iter_time": 2.2338265686035155, "loss": 0.19174857437610626, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 8.183291239015238, "step_time": 1.796780075073242} +{"epoch": 0, "iter": 13044, "iter_tflops": 18.47913508906832, "iter_time": 0.7956852188110352, "loss": 0.20419898629188538, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 20.32923594093778, "step_time": 0.7232723693847656} +{"epoch": 0, "iter": 13045, "iter_tflops": 22.542349742975993, "iter_time": 0.733970230102539, "loss": 0.2776636779308319, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 24.444558659862537, "step_time": 0.6768546676635742} +{"epoch": 0, "iter": 13046, "iter_tflops": 28.35530968355946, "iter_time": 0.583503189086914, "loss": 0.3118501603603363, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.319994476856383, "step_time": 0.5456931610107421} +{"epoch": 0, "iter": 13047, "iter_tflops": 28.108278480634652, "iter_time": 0.5886313400268555, "loss": 0.36989444494247437, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 29.84759498965636, "step_time": 0.5543298759460449} +{"epoch": 0, "iter": 13048, "iter_tflops": 29.717494007679473, "iter_time": 0.5567566909790039, "loss": 0.18281303346157074, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.507968432275465, "step_time": 0.5251183891296387} +{"epoch": 0, "iter": 13049, "iter_tflops": 23.772846624832738, "iter_time": 0.8678427886962892, "loss": 0.526445746421814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.116859581853497, "step_time": 0.821404182434082} +{"epoch": 0, "iter": 13050, "iter_tflops": 15.35937777587334, "iter_time": 1.343224563598633, "loss": 0.46080654859542847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.035398178386068, "step_time": 1.0297321434020996} +{"epoch": 0, "iter": 13051, "iter_tflops": 37.77237379741155, "iter_time": 0.5461953125, "loss": 0.5113670825958252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53446924507876, "step_time": 0.49672221374511716} +{"epoch": 0, "iter": 13052, "iter_tflops": 42.02338312612461, "iter_time": 0.49094318389892583, "loss": 0.48096999526023865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83957369666695, "step_time": 0.4500716705322266} +{"epoch": 0, "iter": 13053, "iter_tflops": 20.06524598044581, "iter_time": 1.0282003784179687, "loss": 0.04535418748855591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.666484055217538, "step_time": 0.9522123413085939} +{"epoch": 0, "iter": 13054, "iter_tflops": 21.039617021159223, "iter_time": 0.9805831298828125, "loss": 0.05569303035736084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.61634479741786, "step_time": 0.8735938472747803} +{"epoch": 0, "iter": 13055, "iter_tflops": 40.24959908395852, "iter_time": 0.512578857421875, "loss": 0.06796681135892868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32214954730357, "step_time": 0.4654804363250732} +{"epoch": 0, "iter": 13056, "iter_tflops": 45.373351879076765, "iter_time": 0.4546962623596192, "loss": 0.0568414181470871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.821244322025905, "step_time": 0.4141023330688477} +{"epoch": 0, "iter": 13057, "iter_tflops": 23.529900324190592, "iter_time": 0.8768032684326171, "loss": 0.001315648783929646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.007719325997506, "step_time": 0.8249890060424804} +{"epoch": 0, "iter": 13058, "iter_tflops": 8.412570352093677, "iter_time": 2.45241259765625, "loss": 0.0006708028959110379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.592810822500356, "step_time": 1.9476505203247074} +{"epoch": 0, "iter": 13059, "iter_tflops": 13.74060701728813, "iter_time": 1.5014688568115235, "loss": 0.0015193946892395616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.338085480283084, "step_time": 1.189929161071777} +{"epoch": 0, "iter": 13060, "iter_tflops": 42.44916173023862, "iter_time": 0.48601886749267575, "loss": 0.002004080219194293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.6686497790423, "step_time": 0.43280213737487794} +{"epoch": 0, "iter": 13061, "iter_tflops": 17.639612747439863, "iter_time": 0.8289261856079101, "loss": 0.21200014650821686, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 18.57347126224707, "step_time": 0.7872484741210938} +{"epoch": 0, "iter": 13062, "iter_tflops": 7.005563779837604, "iter_time": 2.0871891784667964, "loss": 0.32925814390182495, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 8.449108127137173, "step_time": 1.7305893936157226} +{"epoch": 0, "iter": 13063, "iter_tflops": 9.627147250916892, "iter_time": 1.518823440551758, "loss": 0.23634503781795502, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.22467495036545, "step_time": 1.3026601638793947} +{"epoch": 0, "iter": 13064, "iter_tflops": 20.235634497481175, "iter_time": 0.7225835647583008, "loss": 0.22499002516269684, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 21.951698132344774, "step_time": 0.6660959358215331} +{"epoch": 0, "iter": 13065, "iter_tflops": 13.641817308121798, "iter_time": 1.173769226074219, "loss": 0.14643052220344543, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 14.50470401909706, "step_time": 1.1039415435791016} +{"epoch": 0, "iter": 13066, "iter_tflops": 15.085016858463362, "iter_time": 1.0614734802246093, "loss": 0.331924706697464, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.931045919080198, "step_time": 0.8929956130981447} +{"epoch": 0, "iter": 13067, "iter_tflops": 29.056523938404123, "iter_time": 0.5510757369995117, "loss": 0.3167356550693512, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.897988803188113, "step_time": 0.5182326087951661} +{"epoch": 0, "iter": 13068, "iter_tflops": 30.831924366643708, "iter_time": 0.5193430404663085, "loss": 0.1948419213294983, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 32.70364691177993, "step_time": 0.4896195640563965} +{"epoch": 0, "iter": 13069, "iter_tflops": 42.648209243644494, "iter_time": 0.4837505226135254, "loss": 0.7351513504981995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.462244932890144, "step_time": 0.44403996276855473} +{"epoch": 0, "iter": 13070, "iter_tflops": 28.968860865129926, "iter_time": 0.7121817321777343, "loss": 0.5813894867897034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.63864923820559, "step_time": 0.5788966178894043} +{"epoch": 0, "iter": 13071, "iter_tflops": 38.288315513248754, "iter_time": 0.5388352355957031, "loss": 0.5335997343063354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.826042581781316, "step_time": 0.4932595157623291} +{"epoch": 0, "iter": 13072, "iter_tflops": 40.19765574252837, "iter_time": 0.5132412109375, "loss": 0.5319803953170776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71508622737844, "step_time": 0.47194447708129883} +{"epoch": 0, "iter": 13073, "iter_tflops": 25.483840979963514, "iter_time": 0.8095755081176758, "loss": 0.07882693409919739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.122442548382423, "step_time": 0.7606650276184083} +{"epoch": 0, "iter": 13074, "iter_tflops": 14.498674990885489, "iter_time": 1.4229640655517577, "loss": 0.055958349257707596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.837667533825273, "step_time": 1.3026598434448244} +{"epoch": 0, "iter": 13075, "iter_tflops": 49.30452947042851, "iter_time": 0.41844215393066403, "loss": 0.046750571578741074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07917380514141, "step_time": 0.3814979419708252} +{"epoch": 0, "iter": 13076, "iter_tflops": 49.835993887148334, "iter_time": 0.4139797744750976, "loss": 0.08789770305156708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02225098801395, "step_time": 0.3818999233245849} +{"epoch": 0, "iter": 13077, "iter_tflops": 27.63876948627506, "iter_time": 0.746454849243164, "loss": 0.6596797108650208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.186873138309718, "step_time": 0.7068620681762694} +{"epoch": 0, "iter": 13078, "iter_tflops": 16.14729404586632, "iter_time": 1.2776811676025392, "loss": 0.7531418800354004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.189895634815066, "step_time": 1.1342062606811523} +{"epoch": 0, "iter": 13079, "iter_tflops": 43.88572980045694, "iter_time": 0.4701093864440918, "loss": 0.5038847327232361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.12099657850188, "step_time": 0.43783228302001953} +{"epoch": 0, "iter": 13080, "iter_tflops": 47.583135252352534, "iter_time": 0.43357995223999024, "loss": 0.6413044333457947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.05766165764691, "step_time": 0.40407439041137694} +{"epoch": 0, "iter": 13081, "iter_tflops": 28.211211292886038, "iter_time": 0.7313083190917969, "loss": 0.40097859501838684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.851149295764465, "step_time": 0.6911323013305664} +{"epoch": 0, "iter": 13082, "iter_tflops": 13.748421800548908, "iter_time": 1.5006154022216798, "loss": 0.307373583316803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.246530480716512, "step_time": 1.269876884460449} +{"epoch": 0, "iter": 13083, "iter_tflops": 22.519303765653657, "iter_time": 0.916151481628418, "loss": 0.3699623644351959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.433186033321114, "step_time": 0.8111879291534424} +{"epoch": 0, "iter": 13084, "iter_tflops": 47.3231999417734, "iter_time": 0.4359615058898926, "loss": 0.45279887318611145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3616774476715, "step_time": 0.40168262672424315} +{"epoch": 0, "iter": 13085, "iter_tflops": 16.749884636288954, "iter_time": 0.8778313980102538, "loss": 0.26994892954826355, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 17.601172183172537, "step_time": 0.8353747406005858} +{"epoch": 0, "iter": 13086, "iter_tflops": 7.075149319663608, "iter_time": 2.078199905395508, "loss": 0.25283730030059814, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 9.215280205744191, "step_time": 1.5955645751953125} +{"epoch": 0, "iter": 13087, "iter_tflops": 10.57460322254923, "iter_time": 1.3904611206054687, "loss": 0.22175712883472443, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 13.217654249373053, "step_time": 1.1124193725585938} +{"epoch": 0, "iter": 13088, "iter_tflops": 25.305655350237913, "iter_time": 0.581039077758789, "loss": 0.09379719942808151, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 26.85921488991058, "step_time": 0.5474312896728515} +{"epoch": 0, "iter": 13089, "iter_tflops": 21.499280385769893, "iter_time": 0.7352586288452149, "loss": 0.28085169196128845, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 22.870610808473238, "step_time": 0.6911722450256347} +{"epoch": 0, "iter": 13090, "iter_tflops": 6.3648747491778614, "iter_time": 2.4835573425292967, "loss": 0.1459323614835739, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 7.449329472785467, "step_time": 2.1220072860717774} +{"epoch": 0, "iter": 13091, "iter_tflops": 8.822773794165414, "iter_time": 1.7916736602783205, "loss": 0.2710616886615753, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 10.893402070853451, "step_time": 1.4511106185913085} +{"epoch": 0, "iter": 13092, "iter_tflops": 22.973484951950258, "iter_time": 0.6880772094726562, "loss": 0.30496644973754883, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 24.802135579341634, "step_time": 0.6373455772399902} +{"epoch": 0, "iter": 13093, "iter_tflops": 16.67813379455913, "iter_time": 0.906099365234375, "loss": 0.2764616906642914, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.938317519344693, "step_time": 0.8424450302124022} +{"epoch": 0, "iter": 13094, "iter_tflops": 22.137378410587207, "iter_time": 0.682648422241211, "loss": 0.2282784879207611, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 23.81455879117109, "step_time": 0.6345717582702637} +{"epoch": 0, "iter": 13095, "iter_tflops": 24.3386604501175, "iter_time": 0.6209070739746093, "loss": 0.19814413785934448, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.17255269787825, "step_time": 0.5774005546569825} +{"epoch": 0, "iter": 13096, "iter_tflops": 22.906852616011456, "iter_time": 0.6597172775268555, "loss": 0.16731558740139008, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.59359319715373, "step_time": 0.6144708633422852} +{"epoch": 0, "iter": 13097, "iter_tflops": 30.855904292080496, "iter_time": 0.6686270904541016, "loss": 0.42748305201530457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.35067891674892, "step_time": 0.6006022052764893} +{"epoch": 0, "iter": 13098, "iter_tflops": 34.66155152383498, "iter_time": 0.5952155227661133, "loss": 0.4237683117389679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.08188370223973, "step_time": 0.541756118774414} +{"epoch": 0, "iter": 13099, "iter_tflops": 40.772159613179284, "iter_time": 0.5060093383789062, "loss": 0.429728239774704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52960212252113, "step_time": 0.46331187629699705} +{"epoch": 0, "iter": 13100, "iter_tflops": 37.15729015970412, "iter_time": 0.5552367630004882, "loss": 0.24778889119625092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.570774320203846, "step_time": 0.5085210685729981} +{"epoch": 0, "iter": 13101, "iter_tflops": 14.600330343111931, "iter_time": 1.1051318664550782, "loss": 0.04403005540370941, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 15.679310001072825, "step_time": 1.0290816574096677} +{"epoch": 0, "iter": 13102, "iter_tflops": 12.881461555859797, "iter_time": 1.2525977935791015, "loss": 0.051296837627887726, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 16.103556590831857, "step_time": 1.0019706039428713} +{"epoch": 0, "iter": 13103, "iter_tflops": 33.23222679951948, "iter_time": 0.48553142166137686, "loss": 0.020033525303006172, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 36.5678005033061, "step_time": 0.44124311828613283} +{"epoch": 0, "iter": 13104, "iter_tflops": 32.2986729410588, "iter_time": 0.49956511688232424, "loss": 0.01648869924247265, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 35.48685492692429, "step_time": 0.45468358230590816} +{"epoch": 0, "iter": 13105, "iter_tflops": 19.14889873924078, "iter_time": 1.0774036560058593, "loss": 0.3009263575077057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.679742149161047, "step_time": 0.9976475219726562} +{"epoch": 0, "iter": 13106, "iter_tflops": 17.141624861970765, "iter_time": 1.2035669708251953, "loss": 0.3009360432624817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.754732236332774, "step_time": 0.9940428657531738} +{"epoch": 0, "iter": 13107, "iter_tflops": 51.714637677014494, "iter_time": 0.39894108200073247, "loss": 0.3120138943195343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.44236412421599, "step_time": 0.36552497100830084} +{"epoch": 0, "iter": 13108, "iter_tflops": 53.18414682592068, "iter_time": 0.38791810607910154, "loss": 0.1918250322341919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.725989975033336, "step_time": 0.3573969631195068} +{"epoch": 0, "iter": 13109, "iter_tflops": 25.3922366906016, "iter_time": 0.812496109008789, "loss": 0.007023687474429607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.704796935630526, "step_time": 0.7725613327026367} +{"epoch": 0, "iter": 13110, "iter_tflops": 37.407859774817695, "iter_time": 0.5515176124572755, "loss": 0.008433866314589977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04528945733199, "step_time": 0.4206539249420166} +{"epoch": 0, "iter": 13111, "iter_tflops": 56.423541002099974, "iter_time": 0.36564691162109375, "loss": 0.011226601898670197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.0468376012929, "step_time": 0.3325083808898926} +{"epoch": 0, "iter": 13112, "iter_tflops": 56.12422610342507, "iter_time": 0.3675969352722168, "loss": 0.0049091121181845665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.94605072314352, "step_time": 0.3330493755340576} +{"epoch": 0, "iter": 13113, "iter_tflops": 29.17764681199284, "iter_time": 0.7070855865478516, "loss": 0.08211743831634521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.889389833522994, "step_time": 0.6679022674560546} +{"epoch": 0, "iter": 13114, "iter_tflops": 15.877142929252772, "iter_time": 1.2994210357666014, "loss": 0.06334009766578674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.476545055380583, "step_time": 1.0075475845336914} +{"epoch": 0, "iter": 13115, "iter_tflops": 50.1590688077878, "iter_time": 0.4113133277893066, "loss": 0.05878151208162308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.508074019668555, "step_time": 0.3784961013793945} +{"epoch": 0, "iter": 13116, "iter_tflops": 49.54844280914528, "iter_time": 0.4163822784423828, "loss": 0.06400049477815628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.03552551501039, "step_time": 0.38180610466003423} +{"epoch": 0, "iter": 13117, "iter_tflops": 25.2958391180606, "iter_time": 0.7713612747192382, "loss": 0.10196077823638916, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 26.65671356825472, "step_time": 0.7319818572998047} +{"epoch": 0, "iter": 13118, "iter_tflops": 22.236891554073107, "iter_time": 0.8774711456298827, "loss": 0.1728357970714569, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 27.79043140402952, "step_time": 0.7021204681396485} +{"epoch": 0, "iter": 13119, "iter_tflops": 46.34355366228155, "iter_time": 0.4210344085693359, "loss": 0.12579262256622314, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 50.55522086475117, "step_time": 0.3859587669372559} +{"epoch": 0, "iter": 13120, "iter_tflops": 48.59736754357041, "iter_time": 0.40150797653198245, "loss": 0.150594562292099, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 52.90696519615452, "step_time": 0.36880268287658685} +{"epoch": 0, "iter": 13121, "iter_tflops": 34.589450053515144, "iter_time": 0.5964562454223634, "loss": 0.0998961478471756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.085482892250724, "step_time": 0.5563118476867676} +{"epoch": 0, "iter": 13122, "iter_tflops": 10.68193866947811, "iter_time": 1.9313997344970701, "loss": 0.029231879860162735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.043509955702106, "step_time": 1.7130465774536132} +{"epoch": 0, "iter": 13123, "iter_tflops": 12.27450490172741, "iter_time": 1.680808609008789, "loss": 0.09026093035936356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.831542135558669, "step_time": 1.4915974884033203} +{"epoch": 0, "iter": 13124, "iter_tflops": 25.05445492804619, "iter_time": 0.8234501037597657, "loss": 0.04973470792174339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.95138784829843, "step_time": 0.5902796649932861} +{"epoch": 0, "iter": 13125, "iter_tflops": 22.11275040747964, "iter_time": 0.6667827529907226, "loss": 0.1314205378293991, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.52753469863242, "step_time": 0.6266870193481446} +{"epoch": 0, "iter": 13126, "iter_tflops": 9.117120915624376, "iter_time": 1.6172211303710942, "loss": 0.13449105620384216, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 10.073409039491034, "step_time": 1.4636952133178711} +{"epoch": 0, "iter": 13127, "iter_tflops": 26.612615992574387, "iter_time": 0.5540380020141602, "loss": 0.2688092887401581, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.370831296985443, "step_time": 0.5197028045654296} +{"epoch": 0, "iter": 13128, "iter_tflops": 27.33862245125125, "iter_time": 0.5393249282836915, "loss": 0.1356969177722931, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 29.069075380784643, "step_time": 0.5072194557189942} +{"epoch": 0, "iter": 13129, "iter_tflops": 44.84118331649419, "iter_time": 0.4517667808532715, "loss": 0.06398151069879532, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 49.19348991204769, "step_time": 0.41179751777648926} +{"epoch": 0, "iter": 13130, "iter_tflops": 12.352939215656031, "iter_time": 1.6399139251708985, "loss": 0.06353176385164261, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 13.5889141609046, "step_time": 1.4907561264038085} +{"epoch": 0, "iter": 13131, "iter_tflops": 11.99302395841632, "iter_time": 1.6891283721923827, "loss": 0.10813131928443909, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 13.82275673039903, "step_time": 1.4655366821289062} +{"epoch": 0, "iter": 13132, "iter_tflops": 34.012041322476264, "iter_time": 0.5956054458618165, "loss": 0.03706884756684303, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 37.984827537337154, "step_time": 0.5333118076324463} +{"epoch": 0, "iter": 13133, "iter_tflops": 20.05598778669618, "iter_time": 0.7412702331542969, "loss": 0.31090301275253296, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 21.835021215088414, "step_time": 0.6808743896484375} +{"epoch": 0, "iter": 13134, "iter_tflops": 22.85621577658667, "iter_time": 0.6504535522460937, "loss": 0.25601041316986084, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.595764136994205, "step_time": 0.6044498825073242} +{"epoch": 0, "iter": 13135, "iter_tflops": 24.092632540160878, "iter_time": 0.6170727386474609, "loss": 0.24025271832942963, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.841710969253867, "step_time": 0.5753065948486329} +{"epoch": 0, "iter": 13136, "iter_tflops": 22.621514793945536, "iter_time": 0.6572020874023438, "loss": 0.20270834863185883, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.31181214255746, "step_time": 0.6115096092224122} +{"epoch": 0, "iter": 13137, "iter_tflops": 16.053508120755833, "iter_time": 1.2748066558837892, "loss": 0.03302544727921486, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 17.257739116034088, "step_time": 1.1858516845703124} +{"epoch": 0, "iter": 13138, "iter_tflops": 23.596868193578057, "iter_time": 0.867281150817871, "loss": 0.03753058239817619, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 32.83039955883584, "step_time": 0.6233588161468506} +{"epoch": 0, "iter": 13139, "iter_tflops": 51.70590886305633, "iter_time": 0.3957984580993652, "loss": 0.02715182863175869, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 56.437861489527485, "step_time": 0.36261329650878904} +{"epoch": 0, "iter": 13140, "iter_tflops": 55.38224074053881, "iter_time": 0.3695249366760254, "loss": 0.0308104045689106, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 60.3380040572642, "step_time": 0.3391746101379395} +{"epoch": 0, "iter": 13141, "iter_tflops": 34.458931589873714, "iter_time": 0.5470519027709961, "loss": 0.024075716733932495, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 37.1118650753331, "step_time": 0.5079460182189942} +{"epoch": 0, "iter": 13142, "iter_tflops": 8.578953312785137, "iter_time": 2.197333801269531, "loss": 0.019011972472071648, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 11.246434841612558, "step_time": 1.6761599884033203} +{"epoch": 0, "iter": 13143, "iter_tflops": 12.29184358026366, "iter_time": 1.5336042938232421, "loss": 0.018811184912919998, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 14.685817985144018, "step_time": 1.2836073627471924} +{"epoch": 0, "iter": 13144, "iter_tflops": 47.04539561608505, "iter_time": 0.4006943473815918, "loss": 0.015698136761784554, "lr": 3e-05, "seqlen": 7504.0, "step_tflops": 51.50746596692552, "step_time": 0.36598236274719237} +{"epoch": 0, "iter": 13145, "iter_tflops": 24.310026549659508, "iter_time": 0.6519320220947266, "loss": 0.3043636083602905, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 25.9745990224574, "step_time": 0.6101532020568847} +{"epoch": 0, "iter": 13146, "iter_tflops": 13.903975516448272, "iter_time": 1.1398527526855469, "loss": 0.2416124790906906, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 15.446137696784072, "step_time": 1.0260483932495117} +{"epoch": 0, "iter": 13147, "iter_tflops": 28.63968451991212, "iter_time": 0.5533749771118164, "loss": 0.29498687386512756, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 30.537029532157494, "step_time": 0.5189923515319824} +{"epoch": 0, "iter": 13148, "iter_tflops": 29.924249950809955, "iter_time": 0.5296201171875, "loss": 0.23516374826431274, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 31.72617885838449, "step_time": 0.499539665222168} +{"epoch": 0, "iter": 13149, "iter_tflops": 29.117069977355975, "iter_time": 0.7085566482543946, "loss": 0.17777132987976074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.884707204649228, "step_time": 0.668003532409668} +{"epoch": 0, "iter": 13150, "iter_tflops": 14.414063977746478, "iter_time": 1.4313169097900391, "loss": 0.1718158721923828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.803303212095894, "step_time": 1.1588351478576662} +{"epoch": 0, "iter": 13151, "iter_tflops": 51.0636899546658, "iter_time": 0.40402668762207034, "loss": 0.31467190384864807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.667827509392886, "step_time": 0.3706107177734375} +{"epoch": 0, "iter": 13152, "iter_tflops": 49.10524362310486, "iter_time": 0.4201403350830078, "loss": 0.22546890377998352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.205247548767815, "step_time": 0.38776426124572755} +{"epoch": 0, "iter": 13153, "iter_tflops": 33.40140184721882, "iter_time": 0.6176714859008788, "loss": 0.29351577162742615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.646107344768346, "step_time": 0.5787754974365233} +{"epoch": 0, "iter": 13154, "iter_tflops": 18.355611864105114, "iter_time": 1.1239665374755858, "loss": 0.2765408456325531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.844314979651173, "step_time": 0.9897707614898682} +{"epoch": 0, "iter": 13155, "iter_tflops": 38.150861869826194, "iter_time": 0.5407766036987305, "loss": 0.285410612821579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.956654446899364, "step_time": 0.4917239894866943} +{"epoch": 0, "iter": 13156, "iter_tflops": 41.40234573641946, "iter_time": 0.49830735778808594, "loss": 0.3791189193725586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.07639410445025, "step_time": 0.45769174575805666} +{"epoch": 0, "iter": 13157, "iter_tflops": 17.886073044688466, "iter_time": 1.137236083984375, "loss": 0.07374324649572372, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 19.387063552423864, "step_time": 1.0491886825561525} +{"epoch": 0, "iter": 13158, "iter_tflops": 28.64792711627722, "iter_time": 0.7100230178833008, "loss": 0.025630371645092964, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 36.553065977759125, "step_time": 0.5564700832366942} +{"epoch": 0, "iter": 13159, "iter_tflops": 55.2899095154516, "iter_time": 0.3678914985656738, "loss": 0.023301998153328896, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 60.55867205985206, "step_time": 0.3358839778900147} +{"epoch": 0, "iter": 13160, "iter_tflops": 53.45560238020644, "iter_time": 0.3805155448913574, "loss": 0.03228645399212837, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 58.15773910880349, "step_time": 0.3497503166198731} +{"epoch": 0, "iter": 13161, "iter_tflops": 21.046332748223545, "iter_time": 0.9802702331542968, "loss": 0.4066920280456543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.012566277006563, "step_time": 0.9372416305541992} +{"epoch": 0, "iter": 13162, "iter_tflops": 18.406493161263203, "iter_time": 1.1208595428466797, "loss": 0.3677820861339569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.609563916338068, "step_time": 0.8738447513580322} +{"epoch": 0, "iter": 13163, "iter_tflops": 48.53782832873513, "iter_time": 0.4250518455505371, "loss": 0.35715585947036743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.566409962242965, "step_time": 0.39247674560546875} +{"epoch": 0, "iter": 13164, "iter_tflops": 46.51684154562017, "iter_time": 0.4435187950134277, "loss": 0.37899914383888245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48950499451459, "step_time": 0.4086214256286621} +{"epoch": 0, "iter": 13165, "iter_tflops": 43.812581529600685, "iter_time": 0.47089426803588863, "loss": 0.514575719833374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.843074733629024, "step_time": 0.4312242393493652} +{"epoch": 0, "iter": 13166, "iter_tflops": 43.275821161314774, "iter_time": 0.4767348823547363, "loss": 0.5547749996185303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.086609539309954, "step_time": 0.4381520290374756} +{"epoch": 0, "iter": 13167, "iter_tflops": 48.09093976224508, "iter_time": 0.42900167083740237, "loss": 0.6909409165382385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.138040850825256, "step_time": 0.39570135688781743} +{"epoch": 0, "iter": 13168, "iter_tflops": 44.733392033007, "iter_time": 0.46120118713378905, "loss": 0.4563214182853699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01167070652282, "step_time": 0.4297099685668945} +{"epoch": 0, "iter": 13169, "iter_tflops": 25.731723779319225, "iter_time": 0.8017765808105468, "loss": 0.007696429267525673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.10401556466605, "step_time": 0.7611821746826172} +{"epoch": 0, "iter": 13170, "iter_tflops": 12.82509573965739, "iter_time": 1.6086502532958984, "loss": 0.5948435068130493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.005463540217455, "step_time": 1.2132038307189943} +{"epoch": 0, "iter": 13171, "iter_tflops": 39.20518996034876, "iter_time": 0.5262337341308594, "loss": 0.7494280934333801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.937872787250434, "step_time": 0.4804870891571045} +{"epoch": 0, "iter": 13172, "iter_tflops": 37.564051541988775, "iter_time": 0.5492243957519531, "loss": 0.6314224600791931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.82430621942634, "step_time": 0.5053629913330078} +{"epoch": 0, "iter": 13173, "iter_tflops": 18.065997768002102, "iter_time": 1.1419847259521485, "loss": 0.6058120727539062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.433007080591313, "step_time": 1.0616521377563477} +{"epoch": 0, "iter": 13174, "iter_tflops": 35.23210495280381, "iter_time": 0.5855765228271484, "loss": 0.4981909394264221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.31895400923152, "step_time": 0.5384044017791748} +{"epoch": 0, "iter": 13175, "iter_tflops": 37.89194812613096, "iter_time": 0.5444717025756836, "loss": 0.5575960874557495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46428666522425, "step_time": 0.49756296730041505} +{"epoch": 0, "iter": 13176, "iter_tflops": 37.38133627008791, "iter_time": 0.551908935546875, "loss": 0.5738815665245056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.75677788093667, "step_time": 0.5062003078460693} +{"epoch": 0, "iter": 13177, "iter_tflops": 16.558923360354935, "iter_time": 1.2459199829101564, "loss": 0.318112313747406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.47757266558943, "step_time": 1.1804324264526367} +{"epoch": 0, "iter": 13178, "iter_tflops": 16.26954630701525, "iter_time": 1.2680804443359375, "loss": 0.40126800537109375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.556908724577298, "step_time": 0.8757979984283447} +{"epoch": 0, "iter": 13179, "iter_tflops": 39.52155661952262, "iter_time": 0.5220212783813476, "loss": 0.5292706489562988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.302711850724606, "step_time": 0.47643883323669434} +{"epoch": 0, "iter": 13180, "iter_tflops": 36.79265177070227, "iter_time": 0.5607395095825195, "loss": 0.3676730692386627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.097557066785015, "step_time": 0.5145224552154541} +{"epoch": 0, "iter": 13181, "iter_tflops": 24.71862154971484, "iter_time": 0.8346377029418945, "loss": 0.6703611612319946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.705435949364368, "step_time": 0.7725428466796874} +{"epoch": 0, "iter": 13182, "iter_tflops": 35.37260879449579, "iter_time": 0.5832505493164062, "loss": 0.4562987685203552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6436295874292, "step_time": 0.5338808422088622} +{"epoch": 0, "iter": 13183, "iter_tflops": 36.23995014942735, "iter_time": 0.5692914428710938, "loss": 0.5615459680557251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27866821640189, "step_time": 0.5252493133544922} +{"epoch": 0, "iter": 13184, "iter_tflops": 36.31032051915979, "iter_time": 0.5681881408691406, "loss": 0.6963919997215271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.53498761969376, "step_time": 0.5218439350128173} +{"epoch": 0, "iter": 13185, "iter_tflops": 23.637497042965272, "iter_time": 0.8728121032714843, "loss": 0.7459343671798706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.34089505275231, "step_time": 0.8141422576904298} +{"epoch": 0, "iter": 13186, "iter_tflops": 10.691966002973075, "iter_time": 1.9295883941650394, "loss": 0.5256251096725464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.967037775427052, "step_time": 1.723993347167969} +{"epoch": 0, "iter": 13187, "iter_tflops": 15.749597771432208, "iter_time": 1.309944152832031, "loss": 0.8124595284461975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.617620378516715, "step_time": 1.0516613693237304} +{"epoch": 0, "iter": 13188, "iter_tflops": 33.89091803594751, "iter_time": 0.6087499160766601, "loss": 0.6751808524131775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.70739204074059, "step_time": 0.5620419311523437} +{"epoch": 0, "iter": 13189, "iter_tflops": 13.685506936970565, "iter_time": 1.0833406982421874, "loss": 0.27883318066596985, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 14.586763021167936, "step_time": 1.0164055328369141} +{"epoch": 0, "iter": 13190, "iter_tflops": 12.634091810253016, "iter_time": 1.1734968261718748, "loss": 0.24529874324798584, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.07947486208635, "step_time": 0.9831951560974121} +{"epoch": 0, "iter": 13191, "iter_tflops": 26.857402825916104, "iter_time": 0.552029052734375, "loss": 0.15978530049324036, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.545746895304227, "step_time": 0.5193791809082031} +{"epoch": 0, "iter": 13192, "iter_tflops": 26.65253441830636, "iter_time": 0.5562723007202148, "loss": 0.16566359996795654, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.41645395387941, "step_time": 0.5217423210144043} +{"epoch": 0, "iter": 13193, "iter_tflops": 21.93873800257416, "iter_time": 0.940395637512207, "loss": 0.4641529321670532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.325882904121407, "step_time": 0.8844721374511718} +{"epoch": 0, "iter": 13194, "iter_tflops": 13.772580733815838, "iter_time": 1.4979831237792969, "loss": 0.4055019021034241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.776057512971644, "step_time": 0.9474209690093995} +{"epoch": 0, "iter": 13195, "iter_tflops": 39.30922465275224, "iter_time": 0.5248410186767578, "loss": 0.49450549483299255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86609968842099, "step_time": 0.48129159545898437} +{"epoch": 0, "iter": 13196, "iter_tflops": 41.700450251484845, "iter_time": 0.4947451019287109, "loss": 0.3500480651855469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.331659388405114, "step_time": 0.4551144561767578} +{"epoch": 0, "iter": 13197, "iter_tflops": 19.960731400373916, "iter_time": 1.0335840454101564, "loss": 0.433991938829422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.513104877261302, "step_time": 0.9590012054443359} +{"epoch": 0, "iter": 13198, "iter_tflops": 22.2808782063093, "iter_time": 0.9259551315307617, "loss": 0.3526667654514313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.90851158157335, "step_time": 0.7667125492095948} +{"epoch": 0, "iter": 13199, "iter_tflops": 44.895859316614136, "iter_time": 0.459532211303711, "loss": 0.36965155601501465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62256200209192, "step_time": 0.42431111526489257} +{"epoch": 0, "iter": 13200, "iter_tflops": 46.79909737317917, "iter_time": 0.44084383392333987, "loss": 0.47474905848503113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.590508520562224, "step_time": 0.40780561637878415} +{"epoch": 0, "iter": 13201, "iter_tflops": 18.852732963140507, "iter_time": 0.9714411544799806, "loss": 0.015770677477121353, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 19.65850709310888, "step_time": 0.9316231689453125} +{"epoch": 0, "iter": 13202, "iter_tflops": 13.683098388733805, "iter_time": 1.3384629821777343, "loss": 0.034016598016023636, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 15.481566184318758, "step_time": 1.1829759635925292} +{"epoch": 0, "iter": 13203, "iter_tflops": 44.63931805120207, "iter_time": 0.410273307800293, "loss": 0.051798492670059204, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 48.91161484867809, "step_time": 0.37443704795837407} +{"epoch": 0, "iter": 13204, "iter_tflops": 49.75810448185717, "iter_time": 0.36806708908081054, "loss": 0.0765623077750206, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 54.6139668539458, "step_time": 0.3353413372039795} +{"epoch": 0, "iter": 13205, "iter_tflops": 25.92741110041312, "iter_time": 0.7957251663208008, "loss": 0.04180034622550011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.36394759663868, "step_time": 0.7539516525268555} +{"epoch": 0, "iter": 13206, "iter_tflops": 16.917949721888075, "iter_time": 1.2194795379638674, "loss": 0.03281262516975403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.218772513577182, "step_time": 1.0203929786682129} +{"epoch": 0, "iter": 13207, "iter_tflops": 54.44028406943365, "iter_time": 0.37896741104125975, "loss": 0.017279941588640213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.49843907857851, "step_time": 0.3467501640319824} +{"epoch": 0, "iter": 13208, "iter_tflops": 54.54277739408765, "iter_time": 0.37825527954101557, "loss": 0.03681895136833191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.45984676655556, "step_time": 0.34697522163391115} +{"epoch": 0, "iter": 13209, "iter_tflops": 29.04414869943708, "iter_time": 0.7103356246948244, "loss": 0.09923522174358368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.780909483928923, "step_time": 0.6702561378479004} +{"epoch": 0, "iter": 13210, "iter_tflops": 18.524090183252202, "iter_time": 1.1137439575195314, "loss": 0.1614854335784912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.624554310296283, "step_time": 0.9118894996643065} +{"epoch": 0, "iter": 13211, "iter_tflops": 48.331193815643054, "iter_time": 0.4268691062927246, "loss": 0.09225823730230331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78916175923936, "step_time": 0.3908206310272217} +{"epoch": 0, "iter": 13212, "iter_tflops": 54.3345804181726, "iter_time": 0.37970466232299804, "loss": 0.04619733989238739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.20026986561705, "step_time": 0.3484966125488281} +{"epoch": 0, "iter": 13213, "iter_tflops": 29.395609646794597, "iter_time": 0.7018426818847656, "loss": 0.34594637155532837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.11268247662641, "step_time": 0.6631087989807128} +{"epoch": 0, "iter": 13214, "iter_tflops": 16.767110201153557, "iter_time": 1.2304501647949218, "loss": 0.34919556975364685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.454989658118443, "step_time": 0.96159885597229} +{"epoch": 0, "iter": 13215, "iter_tflops": 37.42480845112189, "iter_time": 0.5512678451538086, "loss": 0.45297959446907043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.8755823247605, "step_time": 0.5047290420532227} +{"epoch": 0, "iter": 13216, "iter_tflops": 38.88324286888226, "iter_time": 0.5305908660888672, "loss": 0.42398524284362793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81437537114585, "step_time": 0.481873046875} +{"epoch": 0, "iter": 13217, "iter_tflops": 17.814170687432455, "iter_time": 1.158128204345703, "loss": 0.10873088240623474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.730596725820586, "step_time": 1.101464828491211} +{"epoch": 0, "iter": 13218, "iter_tflops": 28.533755040033707, "iter_time": 0.7230416564941408, "loss": 0.10860858857631683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.34560828697899, "step_time": 0.5676364898681641} +{"epoch": 0, "iter": 13219, "iter_tflops": 41.48333186720853, "iter_time": 0.4973345336914063, "loss": 0.12379343807697296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31866154967212, "step_time": 0.45524498748779296} +{"epoch": 0, "iter": 13220, "iter_tflops": 39.39676613433593, "iter_time": 0.5236747970581055, "loss": 0.08512811362743378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34649856903588, "step_time": 0.47595755577087406} +{"epoch": 0, "iter": 13221, "iter_tflops": 17.70307474779964, "iter_time": 1.1653960571289064, "loss": 0.21777968108654022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.810055227585952, "step_time": 1.0968119583129883} +{"epoch": 0, "iter": 13222, "iter_tflops": 19.57924647350312, "iter_time": 1.0537225494384765, "loss": 0.16032178699970245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.863391658031365, "step_time": 0.8645499267578125} +{"epoch": 0, "iter": 13223, "iter_tflops": 50.99547477667138, "iter_time": 0.4045671424865722, "loss": 0.13931696116924286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.3626347088014, "step_time": 0.372653751373291} +{"epoch": 0, "iter": 13224, "iter_tflops": 48.01715504968766, "iter_time": 0.42966088867187496, "loss": 0.14382418990135193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94410213965439, "step_time": 0.39717874908447265} +{"epoch": 0, "iter": 13225, "iter_tflops": 28.824755436319737, "iter_time": 0.7157421875000001, "loss": 0.47161489725112915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.593495542259188, "step_time": 0.6743620872497559} +{"epoch": 0, "iter": 13226, "iter_tflops": 16.346777010134463, "iter_time": 1.2620893707275391, "loss": 0.5110357403755188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.407720694773467, "step_time": 1.0630353679656983} +{"epoch": 0, "iter": 13227, "iter_tflops": 35.44559588836336, "iter_time": 0.582049560546875, "loss": 0.3625021278858185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.7042750701636, "step_time": 0.5330443077087402} +{"epoch": 0, "iter": 13228, "iter_tflops": 43.794306304386396, "iter_time": 0.4710907707214355, "loss": 0.46004006266593933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8434922532365, "step_time": 0.43122047615051273} +{"epoch": 0, "iter": 13229, "iter_tflops": 17.44930096976669, "iter_time": 1.182344985961914, "loss": 0.722885012626648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.724363666831465, "step_time": 1.1018314895629882} +{"epoch": 0, "iter": 13230, "iter_tflops": 17.631115468915848, "iter_time": 1.1701524810791017, "loss": 0.5503295063972473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.504136640108094, "step_time": 0.877764362335205} +{"epoch": 0, "iter": 13231, "iter_tflops": 37.61559981734129, "iter_time": 0.5484717407226563, "loss": 0.8016287684440613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.836285425102844, "step_time": 0.5052147445678711} +{"epoch": 0, "iter": 13232, "iter_tflops": 31.844537315563592, "iter_time": 0.6478691558837891, "loss": 0.5256568193435669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.21265261675623, "step_time": 0.6030252532958985} +{"epoch": 0, "iter": 13233, "iter_tflops": 12.671438810493106, "iter_time": 1.6281571350097657, "loss": 0.22283023595809937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.19398103932041, "step_time": 1.56367463684082} +{"epoch": 0, "iter": 13234, "iter_tflops": 19.244494444500237, "iter_time": 1.072051727294922, "loss": 0.24694038927555084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.468000816208008, "step_time": 0.879115936279297} +{"epoch": 0, "iter": 13235, "iter_tflops": 38.969978543293664, "iter_time": 0.5294099273681641, "loss": 0.1434747576713562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6338790263111, "step_time": 0.483913122177124} +{"epoch": 0, "iter": 13236, "iter_tflops": 40.2704653628736, "iter_time": 0.5123132629394532, "loss": 0.21125635504722595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.94120114620163, "step_time": 0.469515920639038} +{"epoch": 0, "iter": 13237, "iter_tflops": 17.466723023868305, "iter_time": 1.1811656646728517, "loss": 0.5790544152259827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.605913908467482, "step_time": 1.1088460159301756} +{"epoch": 0, "iter": 13238, "iter_tflops": 22.4707943023758, "iter_time": 0.9181292495727539, "loss": 0.5593474507331848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.146012784634262, "step_time": 0.7330023498535156} +{"epoch": 0, "iter": 13239, "iter_tflops": 43.18790938164893, "iter_time": 0.4777053070068359, "loss": 0.61928391456604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.541214621400066, "step_time": 0.4432865295410156} +{"epoch": 0, "iter": 13240, "iter_tflops": 42.012465575167866, "iter_time": 0.49107076263427735, "loss": 0.39685654640197754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.880462344561586, "step_time": 0.4596898612976074} +{"epoch": 0, "iter": 13241, "iter_tflops": 18.711395833202815, "iter_time": 1.1025951080322265, "loss": 0.000766100303735584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.617671186249957, "step_time": 1.0516586456298829} +{"epoch": 0, "iter": 13242, "iter_tflops": 15.37286695262915, "iter_time": 1.3420459289550781, "loss": 0.5811396837234497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.357419063868765, "step_time": 1.1238558883666991} +{"epoch": 0, "iter": 13243, "iter_tflops": 48.93133360009229, "iter_time": 0.4216335830688477, "loss": 0.6689497232437134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15154727008416, "step_time": 0.38815602874755867} +{"epoch": 0, "iter": 13244, "iter_tflops": 46.470518829334296, "iter_time": 0.44396090316772463, "loss": 0.5149276852607727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.18144195810591, "step_time": 0.4111299457550049} +{"epoch": 0, "iter": 13245, "iter_tflops": 28.674353917571196, "iter_time": 0.7194963684082032, "loss": 0.13552868366241455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.295179155406252, "step_time": 0.6810025253295898} +{"epoch": 0, "iter": 13246, "iter_tflops": 19.755371323383137, "iter_time": 1.0443283081054688, "loss": 0.09724747389554977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.78489475730538, "step_time": 0.8674031867980958} +{"epoch": 0, "iter": 13247, "iter_tflops": 40.485844268984074, "iter_time": 0.5095878295898437, "loss": 0.0820598155260086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09082600365232, "step_time": 0.4679225902557373} +{"epoch": 0, "iter": 13248, "iter_tflops": 41.082574563464895, "iter_time": 0.50218599319458, "loss": 0.23180006444454193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.76104625357413, "step_time": 0.4609162483215332} +{"epoch": 0, "iter": 13249, "iter_tflops": 19.530930748233104, "iter_time": 1.0563292541503908, "loss": 0.729719340801239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.75568303198906, "step_time": 0.993997329711914} +{"epoch": 0, "iter": 13250, "iter_tflops": 14.883783344469439, "iter_time": 1.386145782470703, "loss": 0.561444878578186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.283803098372346, "step_time": 1.0171215629577637} +{"epoch": 0, "iter": 13251, "iter_tflops": 45.923272328903614, "iter_time": 0.44925138092041017, "loss": 0.6471816897392273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77917243947867, "step_time": 0.41445232009887695} +{"epoch": 0, "iter": 13252, "iter_tflops": 48.21631523427659, "iter_time": 0.42788615036010735, "loss": 0.6731216907501221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.236044300338556, "step_time": 0.3949589557647705} +{"epoch": 0, "iter": 13253, "iter_tflops": 20.345148954666385, "iter_time": 1.0140546798706056, "loss": 0.742019534111023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.242567205598895, "step_time": 0.9712146987915039} +{"epoch": 0, "iter": 13254, "iter_tflops": 13.160102408795892, "iter_time": 1.5677000732421875, "loss": 0.6640040278434753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.56749435495511, "step_time": 1.245275421142578} +{"epoch": 0, "iter": 13255, "iter_tflops": 44.98611191135214, "iter_time": 0.45861028289794925, "loss": 0.6552721261978149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82504237091678, "step_time": 0.42255147171020513} +{"epoch": 0, "iter": 13256, "iter_tflops": 42.84251804736713, "iter_time": 0.48155651092529295, "loss": 0.7831872701644897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.300599128009104, "step_time": 0.4455902061462403} +{"epoch": 0, "iter": 13257, "iter_tflops": 30.32129422349436, "iter_time": 0.6804159927368163, "loss": 0.07121989130973816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.15417096975379, "step_time": 0.6416303977966309} +{"epoch": 0, "iter": 13258, "iter_tflops": 12.834226009083041, "iter_time": 1.6075058593749998, "loss": 0.12004512548446655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.607723627811012, "step_time": 1.4123414459228516} +{"epoch": 0, "iter": 13259, "iter_tflops": 34.99920037941871, "iter_time": 0.5894732818603515, "loss": 0.10767205059528351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.259612493356364, "step_time": 0.5392394790649414} +{"epoch": 0, "iter": 13260, "iter_tflops": 43.286563034752895, "iter_time": 0.4766165771484375, "loss": 0.18341553211212158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.401989863311144, "step_time": 0.4352368659973144} +{"epoch": 0, "iter": 13261, "iter_tflops": 22.04740802075462, "iter_time": 0.935760498046875, "loss": 0.23774848878383636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.281001979643708, "step_time": 0.8861772155761719} +{"epoch": 0, "iter": 13262, "iter_tflops": 19.00450202153162, "iter_time": 1.0855897979736329, "loss": 0.43130040168762207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.878831533620016, "step_time": 0.9017546844482421} +{"epoch": 0, "iter": 13263, "iter_tflops": 47.06061594713927, "iter_time": 0.4383940391540527, "loss": 0.22809898853302002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13642186467155, "step_time": 0.40345203590393064} +{"epoch": 0, "iter": 13264, "iter_tflops": 48.656377188616254, "iter_time": 0.42401622772216796, "loss": 0.3676574230194092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.732855407699425, "step_time": 0.3912379360198974} +{"epoch": 0, "iter": 13265, "iter_tflops": 27.821658618235027, "iter_time": 0.7415479354858399, "loss": 0.5653787851333618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.371343275292766, "step_time": 0.7024225387573243} +{"epoch": 0, "iter": 13266, "iter_tflops": 13.937209672136257, "iter_time": 1.4802886657714844, "loss": 0.6181896924972534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.88339719409761, "step_time": 1.2989093742370605} +{"epoch": 0, "iter": 13267, "iter_tflops": 46.78649555847372, "iter_time": 0.4409625740051269, "loss": 0.5026284456253052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.751143241281554, "step_time": 0.4065148525238037} +{"epoch": 0, "iter": 13268, "iter_tflops": 43.09918385170893, "iter_time": 0.4786887283325195, "loss": 0.4881080985069275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83237131744291, "step_time": 0.4405306186676025} +{"epoch": 0, "iter": 13269, "iter_tflops": 40.216714450385716, "iter_time": 0.5129979858398437, "loss": 0.4024735391139984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74359794748117, "step_time": 0.47163686752319334} +{"epoch": 0, "iter": 13270, "iter_tflops": 12.073105438036189, "iter_time": 1.7088472900390625, "loss": 0.509590208530426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.011428270021069, "step_time": 1.374359130859375} +{"epoch": 0, "iter": 13271, "iter_tflops": 9.936669533194266, "iter_time": 2.076258392333984, "loss": 0.4176117777824402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.81080491127691, "step_time": 1.7467982635498047} +{"epoch": 0, "iter": 13272, "iter_tflops": 30.215674611852993, "iter_time": 0.682794403076172, "loss": 0.44411617517471313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.77779395654479, "step_time": 0.6107886600494383} +{"epoch": 0, "iter": 13273, "iter_tflops": 18.725254165578576, "iter_time": 0.7547297744750977, "loss": 0.2750793695449829, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 20.425103465019802, "step_time": 0.6919184951782226} +{"epoch": 0, "iter": 13274, "iter_tflops": 21.69743901399788, "iter_time": 0.6513444671630859, "loss": 0.296377956867218, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 23.397891206536, "step_time": 0.6040077171325683} +{"epoch": 0, "iter": 13275, "iter_tflops": 21.764421704914916, "iter_time": 0.6493398742675782, "loss": 0.2748880088329315, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 23.48308686201901, "step_time": 0.6018164024353027} +{"epoch": 0, "iter": 13276, "iter_tflops": 21.47725276496785, "iter_time": 0.6580220947265625, "loss": 0.21124236285686493, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 22.92433303572647, "step_time": 0.6164849739074707} +{"epoch": 0, "iter": 13277, "iter_tflops": 16.49427574598698, "iter_time": 1.2508032379150391, "loss": 0.022439805790781975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.363514705437364, "step_time": 1.1881864852905273} +{"epoch": 0, "iter": 13278, "iter_tflops": 26.698434048663213, "iter_time": 0.7727454528808593, "loss": 0.03610740974545479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.44665090194651, "step_time": 0.6358466262817383} +{"epoch": 0, "iter": 13279, "iter_tflops": 48.75821324043171, "iter_time": 0.4231306304931641, "loss": 0.04466262832283974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.344654109618844, "step_time": 0.38675090980529786} +{"epoch": 0, "iter": 13280, "iter_tflops": 53.17566144849682, "iter_time": 0.38798000717163084, "loss": 0.029611391946673393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.10677789726168, "step_time": 0.3550548534393311} +{"epoch": 0, "iter": 13281, "iter_tflops": 39.66268468117524, "iter_time": 0.5201638183593751, "loss": 0.45285263657569885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74051609877214, "step_time": 0.48270576477050786} +{"epoch": 0, "iter": 13282, "iter_tflops": 32.55556530326481, "iter_time": 0.6337194061279298, "loss": 0.5546685457229614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.86938452032208, "step_time": 0.5751727771759033} +{"epoch": 0, "iter": 13283, "iter_tflops": 39.149247712760456, "iter_time": 0.5269856948852539, "loss": 0.5133765935897827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86365970739367, "step_time": 0.48131899261474614} +{"epoch": 0, "iter": 13284, "iter_tflops": 40.36083677465414, "iter_time": 0.5111661491394043, "loss": 0.5852355360984802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.92501148475151, "step_time": 0.4696889724731445} +{"epoch": 0, "iter": 13285, "iter_tflops": 36.38420276847884, "iter_time": 0.5670343704223633, "loss": 0.14649498462677002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17971534909221, "step_time": 0.5134703750610351} +{"epoch": 0, "iter": 13286, "iter_tflops": 38.036675113411235, "iter_time": 0.5424000244140623, "loss": 0.15104252099990845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70488926711164, "step_time": 0.4831084651947021} +{"epoch": 0, "iter": 13287, "iter_tflops": 36.00637872772103, "iter_time": 0.5729844055175782, "loss": 0.1529737114906311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.21173846686946, "step_time": 0.5261458511352539} +{"epoch": 0, "iter": 13288, "iter_tflops": 43.40040705173171, "iter_time": 0.47536635971069335, "loss": 0.17230361700057983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.547848475545166, "step_time": 0.433901725769043} +{"epoch": 0, "iter": 13289, "iter_tflops": 21.701894719128124, "iter_time": 0.9506586303710935, "loss": 0.8030934929847717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.291085363774425, "step_time": 0.8857935638427735} +{"epoch": 0, "iter": 13290, "iter_tflops": 14.029130426025448, "iter_time": 1.4705896148681643, "loss": 0.6815286874771118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.795233530098564, "step_time": 1.2283897972106932} +{"epoch": 0, "iter": 13291, "iter_tflops": 36.911090081478704, "iter_time": 0.5589402389526368, "loss": 0.6852302551269531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25439790517324, "step_time": 0.5125177516937256} +{"epoch": 0, "iter": 13292, "iter_tflops": 33.270680778598425, "iter_time": 0.6200983276367188, "loss": 0.8052549958229065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.09856916234282, "step_time": 0.5715210876464843} +{"epoch": 0, "iter": 13293, "iter_tflops": 1.3641258734424173, "iter_time": 1.0337718048095703, "loss": 0.02892185002565384, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.4513381866418904, "step_time": 0.9716514587402343} +{"epoch": 0, "iter": 13294, "iter_tflops": 1.3631827822308216, "iter_time": 1.0344869995117187, "loss": 0.009991386905312538, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 1.6684822049754544, "step_time": 0.8451962280273436} +{"epoch": 0, "iter": 13295, "iter_tflops": 2.4411125265826588, "iter_time": 0.577685317993164, "loss": 0.024513300508260727, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 2.65370576252912, "step_time": 0.5314058876037598} +{"epoch": 0, "iter": 13296, "iter_tflops": 2.940029966695576, "iter_time": 0.4796532287597656, "loss": 0.01794257201254368, "lr": 3e-05, "seqlen": 576.0, "step_tflops": 3.2061337675539754, "step_time": 0.43984280395507813} +{"epoch": 0, "iter": 13297, "iter_tflops": 18.32453961931157, "iter_time": 1.1258724060058594, "loss": 0.020690105855464935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.54466189387816, "step_time": 1.0555871276855469} +{"epoch": 0, "iter": 13298, "iter_tflops": 15.120710870143245, "iter_time": 1.3644261627197267, "loss": 0.054472241550683975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.18241269957756, "step_time": 0.9739727859497069} +{"epoch": 0, "iter": 13299, "iter_tflops": 42.64651061086052, "iter_time": 0.48376979064941406, "loss": 0.021855361759662628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1080698323539, "step_time": 0.43795242691040037} +{"epoch": 0, "iter": 13300, "iter_tflops": 43.70733059294316, "iter_time": 0.47202822113037113, "loss": 0.025516057386994362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.87033205959245, "step_time": 0.4309787006378174} +{"epoch": 0, "iter": 13301, "iter_tflops": 24.213865952934373, "iter_time": 0.8520363311767577, "loss": 0.04000714421272278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.275058431359188, "step_time": 0.785196865081787} +{"epoch": 0, "iter": 13302, "iter_tflops": 19.422859805187475, "iter_time": 1.062206787109375, "loss": 0.020333468914031982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.945123762030427, "step_time": 0.9401219940185546} +{"epoch": 0, "iter": 13303, "iter_tflops": 54.59895536080751, "iter_time": 0.37786608505249025, "loss": 0.03358421474695206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.019294794841805, "step_time": 0.34374101829528814} +{"epoch": 0, "iter": 13304, "iter_tflops": 50.42552832324372, "iter_time": 0.4091398582458496, "loss": 0.03632142022252083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.066661033286756, "step_time": 0.3746567001342773} +{"epoch": 0, "iter": 13305, "iter_tflops": 27.377103140358177, "iter_time": 0.7535893554687499, "loss": 0.38518646359443665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.05004107225913, "step_time": 0.7101915435791015} +{"epoch": 0, "iter": 13306, "iter_tflops": 22.72976577444455, "iter_time": 0.9076685485839844, "loss": 0.3594270944595337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.57426224657022, "step_time": 0.8067131443023682} +{"epoch": 0, "iter": 13307, "iter_tflops": 47.59331548099011, "iter_time": 0.43348720932006835, "loss": 0.31551888585090637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93250289597639, "step_time": 0.3972674598693848} +{"epoch": 0, "iter": 13308, "iter_tflops": 49.206480851315256, "iter_time": 0.4192759399414062, "loss": 0.3242948055267334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47914765533252, "step_time": 0.38577827835083006} +{"epoch": 0, "iter": 13309, "iter_tflops": 24.79361820159936, "iter_time": 0.8321130599975586, "loss": 0.07975982874631882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.08496689512852, "step_time": 0.7909189071655274} +{"epoch": 0, "iter": 13310, "iter_tflops": 14.65731267309192, "iter_time": 1.407563171386719, "loss": 0.10646628588438034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41421185318162, "step_time": 1.0626799411773682} +{"epoch": 0, "iter": 13311, "iter_tflops": 38.256092654689645, "iter_time": 0.5392890930175781, "loss": 0.08470847457647324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96268829780854, "step_time": 0.49165328407287595} +{"epoch": 0, "iter": 13312, "iter_tflops": 42.60707690632784, "iter_time": 0.484217529296875, "loss": 0.05361032485961914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.98145574611306, "step_time": 0.4391326999664307} +{"epoch": 0, "iter": 13313, "iter_tflops": 22.6345143110087, "iter_time": 0.9114882354736328, "loss": 0.23679202795028687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.175907532451507, "step_time": 0.8533741073608399} +{"epoch": 0, "iter": 13314, "iter_tflops": 13.897144229222631, "iter_time": 1.4845563354492188, "loss": 0.23066896200180054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.639482347461904, "step_time": 1.1695974464416503} +{"epoch": 0, "iter": 13315, "iter_tflops": 39.73281205730739, "iter_time": 0.5192457427978515, "loss": 0.2625887989997864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6076128209162, "step_time": 0.47310761070251467} +{"epoch": 0, "iter": 13316, "iter_tflops": 40.28471110870628, "iter_time": 0.5121320953369141, "loss": 0.28953802585601807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04768327504619, "step_time": 0.4683808994293213} +{"epoch": 0, "iter": 13317, "iter_tflops": 32.3256733382303, "iter_time": 0.6382262573242187, "loss": 0.5675896406173706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.66410670023884, "step_time": 0.5784833946228027} +{"epoch": 0, "iter": 13318, "iter_tflops": 34.89031817408204, "iter_time": 0.5913128509521485, "loss": 0.43843650817871094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.9081134556842, "step_time": 0.5442395210266113} +{"epoch": 0, "iter": 13319, "iter_tflops": 40.29856576762617, "iter_time": 0.5119560241699218, "loss": 0.47511762380599976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.79475615474006, "step_time": 0.4710859317779541} +{"epoch": 0, "iter": 13320, "iter_tflops": 34.584454291531785, "iter_time": 0.5965424041748046, "loss": 0.5836679339408875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.51185807402361, "step_time": 0.5499885787963867} +{"epoch": 0, "iter": 13321, "iter_tflops": 29.94210328254147, "iter_time": 0.6890328750610352, "loss": 0.18675082921981812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.94773571946712, "step_time": 0.626176368713379} +{"epoch": 0, "iter": 13322, "iter_tflops": 36.48218273151917, "iter_time": 0.565511489868164, "loss": 0.17295442521572113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23328607965091, "step_time": 0.5127866878509522} +{"epoch": 0, "iter": 13323, "iter_tflops": 38.71042482944786, "iter_time": 0.5329596252441406, "loss": 0.1485629379749298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30789359453205, "step_time": 0.48764170837402343} +{"epoch": 0, "iter": 13324, "iter_tflops": 43.48716785808879, "iter_time": 0.4744179611206054, "loss": 0.20175616443157196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.752390142556635, "step_time": 0.43204315948486327} +{"epoch": 0, "iter": 13325, "iter_tflops": 35.59814403119051, "iter_time": 0.5795553131103516, "loss": 0.6587550044059753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.17527965526612, "step_time": 0.526635513305664} +{"epoch": 0, "iter": 13326, "iter_tflops": 35.36750869567987, "iter_time": 0.5833346557617187, "loss": 0.5437666773796082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.83872099764608, "step_time": 0.5311990966796876} +{"epoch": 0, "iter": 13327, "iter_tflops": 38.53320846914773, "iter_time": 0.5354107360839844, "loss": 0.6231358051300049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.73523211623166, "step_time": 0.49433278465270997} +{"epoch": 0, "iter": 13328, "iter_tflops": 35.98161777966682, "iter_time": 0.5733787078857422, "loss": 0.721429705619812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14283040419691, "step_time": 0.5270720920562744} +{"epoch": 0, "iter": 13329, "iter_tflops": 16.15081202258962, "iter_time": 1.2774028625488283, "loss": 0.5839187502861023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.103764954434613, "step_time": 1.2062311172485354} +{"epoch": 0, "iter": 13330, "iter_tflops": 17.173850585633918, "iter_time": 1.201308547973633, "loss": 0.6678857207298279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.85734279877157, "step_time": 0.9026024456024169} +{"epoch": 0, "iter": 13331, "iter_tflops": 42.84952470023328, "iter_time": 0.4814777679443359, "loss": 0.8011565208435059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.066322788770826, "step_time": 0.4478563137054444} +{"epoch": 0, "iter": 13332, "iter_tflops": 45.80754433358856, "iter_time": 0.4503863677978516, "loss": 0.508356511592865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45276161233728, "step_time": 0.41718789482116697} +{"epoch": 0, "iter": 13333, "iter_tflops": 35.17912635505152, "iter_time": 0.586458381652832, "loss": 0.37731263041496277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.86372926005677, "step_time": 0.54487748336792} +{"epoch": 0, "iter": 13334, "iter_tflops": 18.82937703264981, "iter_time": 1.0956864624023437, "loss": 0.447868287563324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.769352403303323, "step_time": 0.906090482711792} +{"epoch": 0, "iter": 13335, "iter_tflops": 43.74149961683296, "iter_time": 0.4716594924926758, "loss": 0.483090877532959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47610406549137, "step_time": 0.43455742454528806} +{"epoch": 0, "iter": 13336, "iter_tflops": 49.11907213998658, "iter_time": 0.42002205276489263, "loss": 0.5447046160697937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.35409565626903, "step_time": 0.3866824703216553} +{"epoch": 0, "iter": 13337, "iter_tflops": 33.753785732527824, "iter_time": 0.6112230987548828, "loss": 0.34352174401283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.33030830814504, "step_time": 0.5678755416870117} +{"epoch": 0, "iter": 13338, "iter_tflops": 9.70681478330343, "iter_time": 2.1254236297607423, "loss": 0.304732084274292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.852092759030185, "step_time": 1.6052711334228515} +{"epoch": 0, "iter": 13339, "iter_tflops": 16.293877252588803, "iter_time": 1.2661868743896485, "loss": 0.2896403670310974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.956420548323685, "step_time": 1.0883433113098144} +{"epoch": 0, "iter": 13340, "iter_tflops": 21.23754823345418, "iter_time": 0.971444221496582, "loss": 0.27574771642684937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.93062758873213, "step_time": 0.7956264629364013} +{"epoch": 0, "iter": 13341, "iter_tflops": 17.196200280663486, "iter_time": 0.866921257019043, "loss": 0.2600735127925873, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 18.425431262466205, "step_time": 0.8090856246948241} +{"epoch": 0, "iter": 13342, "iter_tflops": 5.403412852982428, "iter_time": 2.758951049804687, "loss": 0.16069792211055756, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 6.650285020289631, "step_time": 2.241671073913574} +{"epoch": 0, "iter": 13343, "iter_tflops": 8.66715371366793, "iter_time": 1.7200285186767577, "loss": 0.17988628149032593, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 10.645428988156917, "step_time": 1.4003899307250975} +{"epoch": 0, "iter": 13344, "iter_tflops": 22.366740115617255, "iter_time": 0.6665142745971679, "loss": 0.1518450379371643, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 24.092914312247615, "step_time": 0.6187608261108398} +{"epoch": 0, "iter": 13345, "iter_tflops": 14.78924731399352, "iter_time": 1.002489601135254, "loss": 0.21515390276908875, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.584499147404042, "step_time": 0.9513341751098633} +{"epoch": 0, "iter": 13346, "iter_tflops": 9.688670529205082, "iter_time": 1.530247787475586, "loss": 0.14665569365024567, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 12.367675034549327, "step_time": 1.1987755661010742} +{"epoch": 0, "iter": 13347, "iter_tflops": 22.59963581091929, "iter_time": 0.6560312194824218, "loss": 0.15626244246959686, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.34771118651092, "step_time": 0.6089306106567383} +{"epoch": 0, "iter": 13348, "iter_tflops": 23.94105455558186, "iter_time": 0.6192737503051757, "loss": 0.23829644918441772, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.688379923626428, "step_time": 0.577150707244873} +{"epoch": 0, "iter": 13349, "iter_tflops": 25.18678869659037, "iter_time": 0.8191236190795899, "loss": 0.034022893756628036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.784981141434965, "step_time": 0.7702485733032227} +{"epoch": 0, "iter": 13350, "iter_tflops": 15.287512220312298, "iter_time": 1.3495389709472658, "loss": 0.018613778054714203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.113166570704642, "step_time": 1.1390108642578123} +{"epoch": 0, "iter": 13351, "iter_tflops": 51.888486913280666, "iter_time": 0.39760445404052736, "loss": 0.043617505580186844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.920052091454366, "step_time": 0.36245738983154296} +{"epoch": 0, "iter": 13352, "iter_tflops": 58.80366183449754, "iter_time": 0.3508470878601075, "loss": 0.02873755246400833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.53298368631019, "step_time": 0.31969842910766605} +{"epoch": 0, "iter": 13353, "iter_tflops": 22.877978700985377, "iter_time": 0.9017882995605467, "loss": 0.005787697155028582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.89007718464184, "step_time": 0.8635842132568359} +{"epoch": 0, "iter": 13354, "iter_tflops": 14.149827416570187, "iter_time": 1.4580455932617187, "loss": 0.011041336692869663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.91875526402353, "step_time": 1.1513686752319336} +{"epoch": 0, "iter": 13355, "iter_tflops": 56.41164273740157, "iter_time": 0.36572403335571285, "loss": 0.00727895088493824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.1621150076256, "step_time": 0.33189175605773924} +{"epoch": 0, "iter": 13356, "iter_tflops": 50.651487856551924, "iter_time": 0.4073146591186524, "loss": 0.00679943198338151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.1765303783627, "step_time": 0.37391067123413085} +{"epoch": 0, "iter": 13357, "iter_tflops": 21.809288437033295, "iter_time": 0.9459773788452148, "loss": 0.8908082842826843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.912949100486045, "step_time": 0.9004119644165041} +{"epoch": 0, "iter": 13358, "iter_tflops": 13.863739803874639, "iter_time": 1.4881333465576172, "loss": 0.8786786794662476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.130307532373664, "step_time": 1.1379340076446534} +{"epoch": 0, "iter": 13359, "iter_tflops": 40.546380625263254, "iter_time": 0.5088270072937011, "loss": 0.5701689124107361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13985631461689, "step_time": 0.46740282440185543} +{"epoch": 0, "iter": 13360, "iter_tflops": 36.38309544900954, "iter_time": 0.567051628112793, "loss": 0.606555700302124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.593241945320365, "step_time": 0.5210761356353759} +{"epoch": 0, "iter": 13361, "iter_tflops": 18.144650898156467, "iter_time": 1.137034469604492, "loss": 0.4200619161128998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.483672966961258, "step_time": 1.058891387939453} +{"epoch": 0, "iter": 13362, "iter_tflops": 26.24677266687935, "iter_time": 0.7860430603027344, "loss": 0.47972652316093445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.95484813458257, "step_time": 0.6260412254333496} +{"epoch": 0, "iter": 13363, "iter_tflops": 48.53159205645831, "iter_time": 0.42510646438598626, "loss": 0.4409250319004059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74814846379854, "step_time": 0.39112450599670406} +{"epoch": 0, "iter": 13364, "iter_tflops": 45.53256203883783, "iter_time": 0.45310636138916016, "loss": 0.40385228395462036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37424556974602, "step_time": 0.4178513164520264} +{"epoch": 0, "iter": 13365, "iter_tflops": 44.43336778625931, "iter_time": 0.4643153228759766, "loss": 0.16259655356407166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.38962399740414, "step_time": 0.42635366439819333} +{"epoch": 0, "iter": 13366, "iter_tflops": 47.16527909456622, "iter_time": 0.43742121124267574, "loss": 0.14866535365581512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36469351461871, "step_time": 0.4016590404510498} +{"epoch": 0, "iter": 13367, "iter_tflops": 49.36136167449168, "iter_time": 0.41796038055419926, "loss": 0.3497406542301178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.56664534103577, "step_time": 0.38514813423156735} +{"epoch": 0, "iter": 13368, "iter_tflops": 50.65593314501563, "iter_time": 0.40727891540527345, "loss": 0.17052890360355377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.097436585539924, "step_time": 0.3744474296569824} +{"epoch": 0, "iter": 13369, "iter_tflops": 20.583859087711254, "iter_time": 1.0022947311401367, "loss": 0.4889392554759979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.46831777474379, "step_time": 0.9610018692016602} +{"epoch": 0, "iter": 13370, "iter_tflops": 22.06390684082941, "iter_time": 0.935060760498047, "loss": 0.5408960580825806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.95575551452621, "step_time": 0.7653687725067139} +{"epoch": 0, "iter": 13371, "iter_tflops": 42.209904883931614, "iter_time": 0.4887737503051758, "loss": 0.5560640692710876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15082766262311, "step_time": 0.447036262512207} +{"epoch": 0, "iter": 13372, "iter_tflops": 42.33752269214951, "iter_time": 0.48730044174194337, "loss": 0.6532661318778992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97263154429304, "step_time": 0.44876903533935547} +{"epoch": 0, "iter": 13373, "iter_tflops": 32.967060251186645, "iter_time": 0.6258093185424805, "loss": 0.8689874410629272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.11354311156923, "step_time": 0.5712841148376464} +{"epoch": 0, "iter": 13374, "iter_tflops": 44.7887855025486, "iter_time": 0.460630786895752, "loss": 0.6355554461479187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.44304086911237, "step_time": 0.42588353538513185} +{"epoch": 0, "iter": 13375, "iter_tflops": 45.38944207625436, "iter_time": 0.4545350761413574, "loss": 0.6636142134666443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95624718511368, "step_time": 0.42141901588439945} +{"epoch": 0, "iter": 13376, "iter_tflops": 47.49604219616918, "iter_time": 0.43437500381469724, "loss": 0.7924842238426208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32501835331787, "step_time": 0.4019695301055908} +{"epoch": 0, "iter": 13377, "iter_tflops": 43.147398693816214, "iter_time": 0.4781538200378418, "loss": 0.500273585319519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.99539563930472, "step_time": 0.4390024433135986} +{"epoch": 0, "iter": 13378, "iter_tflops": 43.48651538109473, "iter_time": 0.4744250793457031, "loss": 0.5262269377708435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.844295570313946, "step_time": 0.4223849124908447} +{"epoch": 0, "iter": 13379, "iter_tflops": 46.52524216286384, "iter_time": 0.4434387130737305, "loss": 0.4268066883087158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.52377909182831, "step_time": 0.40834422683715815} +{"epoch": 0, "iter": 13380, "iter_tflops": 41.84670605800824, "iter_time": 0.4930159492492676, "loss": 0.4020223915576935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89142769739474, "step_time": 0.45957757568359375} +{"epoch": 0, "iter": 13381, "iter_tflops": 43.055545488541114, "iter_time": 0.4791738967895508, "loss": 0.14440667629241943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.800607520030255, "step_time": 0.4408296089172364} +{"epoch": 0, "iter": 13382, "iter_tflops": 35.857353557748326, "iter_time": 0.5753657608032227, "loss": 0.15187695622444153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22043369222838, "step_time": 0.512950548171997} +{"epoch": 0, "iter": 13383, "iter_tflops": 41.21755531856066, "iter_time": 0.5005414161682129, "loss": 0.08773566782474518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26070504226628, "step_time": 0.45582793045043946} +{"epoch": 0, "iter": 13384, "iter_tflops": 37.840569997813176, "iter_time": 0.5452109603881836, "loss": 0.1317548155784607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68538839126277, "step_time": 0.49492386436462404} +{"epoch": 0, "iter": 13385, "iter_tflops": 11.809651070184628, "iter_time": 0.9287127685546875, "loss": 0.022502664476633072, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 12.67731848582882, "step_time": 0.8651493415832521} +{"epoch": 0, "iter": 13386, "iter_tflops": 14.161624888593346, "iter_time": 0.7744714202880859, "loss": 0.028429679572582245, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 16.200916976258156, "step_time": 0.6769847507476806} +{"epoch": 0, "iter": 13387, "iter_tflops": 28.95402449017279, "iter_time": 0.3787996292114258, "loss": 0.06195252761244774, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 31.529309748610974, "step_time": 0.34785962104797363} +{"epoch": 0, "iter": 13388, "iter_tflops": 30.507472502904434, "iter_time": 0.3595110588073731, "loss": 0.03159583359956741, "lr": 3e-05, "seqlen": 4416.0, "step_tflops": 33.33480574887903, "step_time": 0.32901867866516116} +{"epoch": 0, "iter": 13389, "iter_tflops": 34.39663040364883, "iter_time": 0.5997998428344726, "loss": 0.569156289100647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.97796737575888, "step_time": 0.5579293556213379} +{"epoch": 0, "iter": 13390, "iter_tflops": 23.07154835645508, "iter_time": 0.8942223205566406, "loss": 0.7113116383552551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.689834242168445, "step_time": 0.803083948135376} +{"epoch": 0, "iter": 13391, "iter_tflops": 37.68855980396403, "iter_time": 0.5474099731445312, "loss": 0.7412429451942444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.677494893240905, "step_time": 0.5071869239807129} +{"epoch": 0, "iter": 13392, "iter_tflops": 44.55940594001833, "iter_time": 0.4630019874572754, "loss": 0.781583309173584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.09589496433109, "step_time": 0.4289574718475341} +{"epoch": 0, "iter": 13393, "iter_tflops": 21.86466288598049, "iter_time": 0.9435815963745117, "loss": 0.5766688585281372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9428061564459, "step_time": 0.8992401962280274} +{"epoch": 0, "iter": 13394, "iter_tflops": 45.979154260567455, "iter_time": 0.44870537185668946, "loss": 0.5393474102020264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.272810774345, "step_time": 0.40237882804870606} +{"epoch": 0, "iter": 13395, "iter_tflops": 51.54445670682874, "iter_time": 0.40025823974609376, "loss": 0.6311160326004028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.90984744778089, "step_time": 0.3690064353942871} +{"epoch": 0, "iter": 13396, "iter_tflops": 51.32593723721738, "iter_time": 0.4019623336791992, "loss": 0.6432410478591919, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.48333370331874, "step_time": 0.37184307670593264} +{"epoch": 0, "iter": 13397, "iter_tflops": 23.32227259450693, "iter_time": 0.8846090545654297, "loss": 0.08906692266464233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.496135400327073, "step_time": 0.8422182998657225} +{"epoch": 0, "iter": 13398, "iter_tflops": 19.64757093375236, "iter_time": 1.0500582275390626, "loss": 0.11987420171499252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.2479584268276, "step_time": 0.7571610755920412} +{"epoch": 0, "iter": 13399, "iter_tflops": 44.05619844089466, "iter_time": 0.4682903709411621, "loss": 0.10698886960744858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.341647209080094, "step_time": 0.4267768001556396} +{"epoch": 0, "iter": 13400, "iter_tflops": 37.49513539652253, "iter_time": 0.5502338714599609, "loss": 0.06408897042274475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11473052235101, "step_time": 0.5017932319641113} +{"epoch": 0, "iter": 13401, "iter_tflops": 19.9696618419053, "iter_time": 1.0331218261718749, "loss": 0.5166698098182678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.28477055463838, "step_time": 0.9692889785766601} +{"epoch": 0, "iter": 13402, "iter_tflops": 21.37302353764159, "iter_time": 0.9652866134643555, "loss": 0.6263991594314575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.864674738368514, "step_time": 0.7976552467346192} +{"epoch": 0, "iter": 13403, "iter_tflops": 47.51411585986178, "iter_time": 0.43420977401733396, "loss": 0.764085054397583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59248786220999, "step_time": 0.39988561058044436} +{"epoch": 0, "iter": 13404, "iter_tflops": 49.8998281640744, "iter_time": 0.4134501914978027, "loss": 0.5443776845932007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.84755169309962, "step_time": 0.38313893318176273} +{"epoch": 0, "iter": 13405, "iter_tflops": 23.02168289286427, "iter_time": 0.8961592254638673, "loss": 0.19098009169101715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.21481110114086, "step_time": 0.852003074645996} +{"epoch": 0, "iter": 13406, "iter_tflops": 18.140420866619255, "iter_time": 1.1372996063232423, "loss": 0.20747245848178864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.71357532994943, "step_time": 0.9960179824829102} +{"epoch": 0, "iter": 13407, "iter_tflops": 43.00365116088406, "iter_time": 0.4797521362304688, "loss": 0.27260538935661316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25061980144329, "step_time": 0.4366311721801758} +{"epoch": 0, "iter": 13408, "iter_tflops": 39.62725962413076, "iter_time": 0.5206288223266602, "loss": 0.24074625968933105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.56693302015953, "step_time": 0.47354936599731445} +{"epoch": 0, "iter": 13409, "iter_tflops": 33.2520409863132, "iter_time": 0.620445930480957, "loss": 0.0534462034702301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.943987673749746, "step_time": 0.558442518234253} +{"epoch": 0, "iter": 13410, "iter_tflops": 39.56267099718277, "iter_time": 0.5214787826538085, "loss": 0.10669107735157013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.381368276176225, "step_time": 0.464859338760376} +{"epoch": 0, "iter": 13411, "iter_tflops": 41.1459019433625, "iter_time": 0.5014130821228027, "loss": 0.06399735063314438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.34006800446103, "step_time": 0.4550300521850586} +{"epoch": 0, "iter": 13412, "iter_tflops": 43.666812777651636, "iter_time": 0.4724662094116211, "loss": 0.07287651300430298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.77414396713352, "step_time": 0.4318464298248291} +{"epoch": 0, "iter": 13413, "iter_tflops": 17.92101379429906, "iter_time": 1.1512235717773436, "loss": 0.7113103866577148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.45345256622739, "step_time": 1.0605363464355468} +{"epoch": 0, "iter": 13414, "iter_tflops": 18.595670841898855, "iter_time": 1.1094568023681641, "loss": 0.8044090270996094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.19672288635536, "step_time": 0.8526399879455566} +{"epoch": 0, "iter": 13415, "iter_tflops": 46.90388786669788, "iter_time": 0.43985892105102536, "loss": 0.7779691815376282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.711535200724704, "step_time": 0.40683235931396483} +{"epoch": 0, "iter": 13416, "iter_tflops": 45.11971847350989, "iter_time": 0.45725226593017576, "loss": 0.7774134278297424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.40609204793151, "step_time": 0.42620861625671386} +{"epoch": 0, "iter": 13417, "iter_tflops": 40.4939675924051, "iter_time": 0.5094856033325196, "loss": 0.08072003722190857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.202928551059756, "step_time": 0.4667358970642089} +{"epoch": 0, "iter": 13418, "iter_tflops": 8.37390896772705, "iter_time": 2.463735107421875, "loss": 0.13036130368709564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.224942346394881, "step_time": 1.8379687728881837} +{"epoch": 0, "iter": 13419, "iter_tflops": 10.921269779975319, "iter_time": 1.8890746154785156, "loss": 0.09215045720338821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.844886064965994, "step_time": 1.6061717796325683} +{"epoch": 0, "iter": 13420, "iter_tflops": 27.665915013287155, "iter_time": 0.7457224349975585, "loss": 0.08852989971637726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.44584148457838, "step_time": 0.5660753784179687} +{"epoch": 0, "iter": 13421, "iter_tflops": 18.839879774623206, "iter_time": 0.7847837371826171, "loss": 0.1712348312139511, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 19.990037126661164, "step_time": 0.7396300048828126} +{"epoch": 0, "iter": 13422, "iter_tflops": 11.744360493242137, "iter_time": 1.2589217834472657, "loss": 0.1406586915254593, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 13.761589025109942, "step_time": 1.0743840141296388} +{"epoch": 0, "iter": 13423, "iter_tflops": 21.553642244721477, "iter_time": 0.6859736785888673, "loss": 0.18070358037948608, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.12610993560443, "step_time": 0.6393306655883789} +{"epoch": 0, "iter": 13424, "iter_tflops": 22.0919231398058, "iter_time": 0.6692595825195312, "loss": 0.2730502188205719, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 23.841881943510092, "step_time": 0.6201369209289551} +{"epoch": 0, "iter": 13425, "iter_tflops": 21.256716469301793, "iter_time": 0.9705682220458984, "loss": 0.46570301055908203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.54842681388276, "step_time": 0.9149682006835939} +{"epoch": 0, "iter": 13426, "iter_tflops": 9.285921980256635, "iter_time": 2.2217603759765625, "loss": 0.655279815196991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.111120012384868, "step_time": 1.8567969284057615} +{"epoch": 0, "iter": 13427, "iter_tflops": 19.10047581947213, "iter_time": 1.0801350555419922, "loss": 0.6041327714920044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.93487941786478, "step_time": 0.8995509910583497} +{"epoch": 0, "iter": 13428, "iter_tflops": 39.03196545622031, "iter_time": 0.5285691680908203, "loss": 0.8454927206039429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25635157711593, "step_time": 0.4882365074157715} +{"epoch": 0, "iter": 13429, "iter_tflops": 26.76079724748225, "iter_time": 0.5952889022827148, "loss": 0.24548207223415375, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.790356565399687, "step_time": 0.5533243598937987} +{"epoch": 0, "iter": 13430, "iter_tflops": 27.98653241436327, "iter_time": 0.5692168426513672, "loss": 0.15421657264232635, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.8354136309059, "step_time": 0.5339428443908691} +{"epoch": 0, "iter": 13431, "iter_tflops": 27.086776752222676, "iter_time": 0.5881248168945312, "loss": 0.17127454280853271, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 28.917499683057514, "step_time": 0.5508915290832519} +{"epoch": 0, "iter": 13432, "iter_tflops": 28.646268564804735, "iter_time": 0.5561075286865235, "loss": 0.24131062626838684, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.54089812447813, "step_time": 0.521608943939209} +{"epoch": 0, "iter": 13433, "iter_tflops": 32.390820814308185, "iter_time": 0.6369425964355468, "loss": 0.5039068460464478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.79280461613203, "step_time": 0.5929701194763183} +{"epoch": 0, "iter": 13434, "iter_tflops": 41.62161227710092, "iter_time": 0.49568222808837886, "loss": 0.4392651915550232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.5553162798057, "step_time": 0.4528800411224365} +{"epoch": 0, "iter": 13435, "iter_tflops": 46.57020635713858, "iter_time": 0.4430105667114258, "loss": 0.5366988778114319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3822142097639, "step_time": 0.4094916000366211} +{"epoch": 0, "iter": 13436, "iter_tflops": 46.442580259161275, "iter_time": 0.4442279777526856, "loss": 0.49249905347824097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.29463273723873, "step_time": 0.41020467567443847} +{"epoch": 0, "iter": 13437, "iter_tflops": 31.892086914347757, "iter_time": 0.6469032135009766, "loss": 0.6617193818092346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.0120815491367, "step_time": 0.6065813255310059} +{"epoch": 0, "iter": 13438, "iter_tflops": 13.044056777339001, "iter_time": 1.5816470184326175, "loss": 0.910595715045929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.383841429532758, "step_time": 1.259234203338623} +{"epoch": 0, "iter": 13439, "iter_tflops": 41.86643791595367, "iter_time": 0.4927835884094238, "loss": 0.8086171746253967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97890313108473, "step_time": 0.45868378448486324} +{"epoch": 0, "iter": 13440, "iter_tflops": 48.279434835636884, "iter_time": 0.4273267402648926, "loss": 0.9234256744384766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85232720880303, "step_time": 0.39788172721862797} +{"epoch": 0, "iter": 13441, "iter_tflops": 22.635076253813338, "iter_time": 0.9114656066894531, "loss": 0.060779184103012085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.64421945229033, "step_time": 0.8725639495849609} +{"epoch": 0, "iter": 13442, "iter_tflops": 15.911874668021488, "iter_time": 1.296584716796875, "loss": 0.14158710837364197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.361276996862554, "step_time": 1.0132514533996582} +{"epoch": 0, "iter": 13443, "iter_tflops": 48.990307926264414, "iter_time": 0.42112602233886715, "loss": 0.11582344770431519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.74849479378973, "step_time": 0.38384504699707034} +{"epoch": 0, "iter": 13444, "iter_tflops": 52.31958418125838, "iter_time": 0.3943283157348632, "loss": 0.12721586227416992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.83977586372344, "step_time": 0.36296929740905765} +{"epoch": 0, "iter": 13445, "iter_tflops": 38.82400837092973, "iter_time": 0.5314003982543946, "loss": 0.9033995270729065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18093833059617, "step_time": 0.48910940170288086} +{"epoch": 0, "iter": 13446, "iter_tflops": 40.394887170235705, "iter_time": 0.5107352676391601, "loss": 0.804000973701477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32441420871103, "step_time": 0.4654566535949707} +{"epoch": 0, "iter": 13447, "iter_tflops": 42.273135880312196, "iter_time": 0.4880426559448242, "loss": 0.6779907941818237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.29573617950517, "step_time": 0.45547539901733397} +{"epoch": 0, "iter": 13448, "iter_tflops": 42.80983072858445, "iter_time": 0.481924201965332, "loss": 0.5135655999183655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1421713696821, "step_time": 0.4471201267242432} +{"epoch": 0, "iter": 13449, "iter_tflops": 27.297384396396176, "iter_time": 0.7557901229858398, "loss": 0.3602319061756134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.958148962402763, "step_time": 0.7124451751708984} +{"epoch": 0, "iter": 13450, "iter_tflops": 15.858137861654404, "iter_time": 1.300978317260742, "loss": 0.5481380820274353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.572201922887782, "step_time": 1.1740755996704102} +{"epoch": 0, "iter": 13451, "iter_tflops": 47.31109239278421, "iter_time": 0.4360730743408203, "loss": 0.40994712710380554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.465192930650375, "step_time": 0.4008746948242187} +{"epoch": 0, "iter": 13452, "iter_tflops": 52.139947642370906, "iter_time": 0.39568688583374023, "loss": 0.4075721204280853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.510474689264484, "step_time": 0.36508441352844234} +{"epoch": 0, "iter": 13453, "iter_tflops": 44.56708352620039, "iter_time": 0.46292222595214844, "loss": 0.2694612443447113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.52001381898799, "step_time": 0.4252079067230225} +{"epoch": 0, "iter": 13454, "iter_tflops": 13.646056325160279, "iter_time": 1.5118722229003907, "loss": 0.27618539333343506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.79565943672091, "step_time": 1.3061242294311524} +{"epoch": 0, "iter": 13455, "iter_tflops": 12.168509424198664, "iter_time": 1.6954495239257814, "loss": 0.23536674678325653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.282769671007472, "step_time": 1.4444742851257324} +{"epoch": 0, "iter": 13456, "iter_tflops": 45.59701753923771, "iter_time": 0.45246585464477546, "loss": 0.1996302455663681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38061285502523, "step_time": 0.4095046157836914} +{"epoch": 0, "iter": 13457, "iter_tflops": 17.38406652909812, "iter_time": 0.8763588180541992, "loss": 0.4821619987487793, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 18.28162916239988, "step_time": 0.833332733154297} +{"epoch": 0, "iter": 13458, "iter_tflops": 10.493316721823984, "iter_time": 1.4518460083007816, "loss": 0.23787304759025574, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.263914265092621, "step_time": 1.0680574569702148} +{"epoch": 0, "iter": 13459, "iter_tflops": 24.721188710930672, "iter_time": 0.616260009765625, "loss": 0.22762645781040192, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 26.496542531035367, "step_time": 0.5749686012268066} +{"epoch": 0, "iter": 13460, "iter_tflops": 25.286120458020097, "iter_time": 0.6024917907714844, "loss": 0.17964160442352295, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 27.058303052608007, "step_time": 0.5630316123962402} +{"epoch": 0, "iter": 13461, "iter_tflops": 30.574188902571166, "iter_time": 0.6747879257202148, "loss": 0.4181796908378601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.976788023105996, "step_time": 0.6072114143371582} +{"epoch": 0, "iter": 13462, "iter_tflops": 38.21290441829849, "iter_time": 0.5398985977172852, "loss": 0.3226379454135895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.249894343364794, "step_time": 0.48831112670898436} +{"epoch": 0, "iter": 13463, "iter_tflops": 41.292556606903815, "iter_time": 0.4996322631835937, "loss": 0.38816261291503906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31737387121546, "step_time": 0.45525792312622065} +{"epoch": 0, "iter": 13464, "iter_tflops": 40.49431960198869, "iter_time": 0.5094811744689941, "loss": 0.3596511483192444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.29926111172654, "step_time": 0.4657209396362305} +{"epoch": 0, "iter": 13465, "iter_tflops": 18.739702611452973, "iter_time": 1.1009296112060547, "loss": 0.679934024810791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.746247053564723, "step_time": 1.0448108673095704} +{"epoch": 0, "iter": 13466, "iter_tflops": 16.783660380724648, "iter_time": 1.229236831665039, "loss": 0.5241208672523499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.088871984928698, "step_time": 1.026991138458252} +{"epoch": 0, "iter": 13467, "iter_tflops": 48.37812399413145, "iter_time": 0.42645501327514646, "loss": 0.5284091830253601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.48973042712052, "step_time": 0.39305009460449214} +{"epoch": 0, "iter": 13468, "iter_tflops": 49.546104677787426, "iter_time": 0.416401927947998, "loss": 0.6152721047401428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53857589719818, "step_time": 0.385350061416626} +{"epoch": 0, "iter": 13469, "iter_tflops": 16.6222708131848, "iter_time": 1.2411717834472655, "loss": 0.18927264213562012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.160813164637688, "step_time": 1.202221206665039} +{"epoch": 0, "iter": 13470, "iter_tflops": 16.35740952649225, "iter_time": 1.2612689971923827, "loss": 0.16472700238227844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.764169884683856, "step_time": 0.9062967643737794} +{"epoch": 0, "iter": 13471, "iter_tflops": 50.24767712547224, "iter_time": 0.410588005065918, "loss": 0.12875314056873322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.28146898983005, "step_time": 0.373200891494751} +{"epoch": 0, "iter": 13472, "iter_tflops": 45.87832410856654, "iter_time": 0.4496915245056152, "loss": 0.10898000746965408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.753304621926134, "step_time": 0.41466780281066895} +{"epoch": 0, "iter": 13473, "iter_tflops": 28.18295690218523, "iter_time": 0.7320414810180663, "loss": 0.7873794436454773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.992439371499792, "step_time": 0.6878764762878418} +{"epoch": 0, "iter": 13474, "iter_tflops": 12.919699266551941, "iter_time": 1.5968710327148439, "loss": 0.626668393611908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.14342854488046, "step_time": 1.3623792953491212} +{"epoch": 0, "iter": 13475, "iter_tflops": 44.36989295417823, "iter_time": 0.4649795646667481, "loss": 0.6408551931381226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.906109262455125, "step_time": 0.43065683746337896} +{"epoch": 0, "iter": 13476, "iter_tflops": 46.169616937709506, "iter_time": 0.44685433578491207, "loss": 0.6626541614532471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.87404110493061, "step_time": 0.41366396331787103} +{"epoch": 0, "iter": 13477, "iter_tflops": 29.11646176417935, "iter_time": 0.7085714492797851, "loss": 0.7386250495910645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.797141012711222, "step_time": 0.6699028816223144} +{"epoch": 0, "iter": 13478, "iter_tflops": 18.364814931163526, "iter_time": 1.123403289794922, "loss": 0.7280573844909668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.99798984469676, "step_time": 0.9378626708984374} +{"epoch": 0, "iter": 13479, "iter_tflops": 33.17637707914326, "iter_time": 0.6218609542846679, "loss": 0.6902540326118469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.411675280851924, "step_time": 0.566606544494629} +{"epoch": 0, "iter": 13480, "iter_tflops": 35.280887864525106, "iter_time": 0.584766845703125, "loss": 0.7225249409675598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.193065400290514, "step_time": 0.5401790428161621} +{"epoch": 0, "iter": 13481, "iter_tflops": 23.28771404008462, "iter_time": 0.8859217987060547, "loss": 0.7661538124084473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.848722189124047, "step_time": 0.8302677841186523} +{"epoch": 0, "iter": 13482, "iter_tflops": 14.52821409472121, "iter_time": 1.4200708618164062, "loss": 0.5447829961776733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.56756777286608, "step_time": 1.1111360282897949} +{"epoch": 0, "iter": 13483, "iter_tflops": 47.06636437982196, "iter_time": 0.4383404960632324, "loss": 0.7932344079017639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18118691039783, "step_time": 0.40309916114807126} +{"epoch": 0, "iter": 13484, "iter_tflops": 44.10877346256112, "iter_time": 0.4677321968078614, "loss": 0.6618434190750122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.22139754948243, "step_time": 0.4369013748168945} +{"epoch": 0, "iter": 13485, "iter_tflops": 24.114451533618055, "iter_time": 0.8555489425659181, "loss": 0.4505511522293091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.335477346222845, "step_time": 0.8143163528442383} +{"epoch": 0, "iter": 13486, "iter_tflops": 20.316759273382758, "iter_time": 1.0154716720581054, "loss": 0.3796236217021942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.288605044012172, "step_time": 0.7560332775115965} +{"epoch": 0, "iter": 13487, "iter_tflops": 49.42385996705602, "iter_time": 0.4174318542480469, "loss": 0.35928383469581604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.663917706162806, "step_time": 0.3844500064849853} +{"epoch": 0, "iter": 13488, "iter_tflops": 47.5328950836389, "iter_time": 0.43403822708129874, "loss": 0.2839076519012451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48347086201734, "step_time": 0.40073237419128416} +{"epoch": 0, "iter": 13489, "iter_tflops": 39.460914529517, "iter_time": 0.522823501586914, "loss": 0.002802459988743067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56870244516076, "step_time": 0.4846540374755859} +{"epoch": 0, "iter": 13490, "iter_tflops": 9.337861306067184, "iter_time": 2.209402435302734, "loss": 0.019799940288066864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.377073729446955, "step_time": 1.813391914367676} +{"epoch": 0, "iter": 13491, "iter_tflops": 17.389710040782365, "iter_time": 1.1863966369628907, "loss": 0.002483527874574065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.01248395290751, "step_time": 0.9818493404388429} +{"epoch": 0, "iter": 13492, "iter_tflops": 57.89152737556424, "iter_time": 0.35637500762939456, "loss": 0.0023372971918433905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.80301655091143, "step_time": 0.3233560829162597} +{"epoch": 0, "iter": 13493, "iter_tflops": 22.46791934170845, "iter_time": 0.6489753189086914, "loss": 0.2197597324848175, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.878895719714293, "step_time": 0.610628116607666} +{"epoch": 0, "iter": 13494, "iter_tflops": 10.988665381646609, "iter_time": 1.3269241180419922, "loss": 0.17682205140590668, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 12.809590179140718, "step_time": 1.138297550201416} +{"epoch": 0, "iter": 13495, "iter_tflops": 21.5864873815769, "iter_time": 0.6754746551513672, "loss": 0.2589651048183441, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.296014146483454, "step_time": 0.6259064331054687} +{"epoch": 0, "iter": 13496, "iter_tflops": 22.931332735437863, "iter_time": 0.6358603439331054, "loss": 0.18202166259288788, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 24.622957841361302, "step_time": 0.5921760177612304} +{"epoch": 0, "iter": 13497, "iter_tflops": 19.219933203006487, "iter_time": 1.0734217071533203, "loss": 0.32472455501556396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.797312866155494, "step_time": 0.9920076522827148} +{"epoch": 0, "iter": 13498, "iter_tflops": 28.087358738682326, "iter_time": 0.7345330581665039, "loss": 0.18287935853004456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.44347590881189, "step_time": 0.5989840736389159} +{"epoch": 0, "iter": 13499, "iter_tflops": 48.68245251190396, "iter_time": 0.42378911590576174, "loss": 0.2835582494735718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.516457387706225, "step_time": 0.39285006141662593} +{"epoch": 0, "iter": 13500, "iter_tflops": 53.70558217524167, "iter_time": 0.3841517524719239, "loss": 0.2926842272281647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.420486852963805, "step_time": 0.3531482639312744} +{"epoch": 0, "iter": 13501, "iter_tflops": 35.001445518380926, "iter_time": 0.5894354705810547, "loss": 0.27909642457962036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.66075964873034, "step_time": 0.5478140563964844} +{"epoch": 0, "iter": 13502, "iter_tflops": 14.23171859380386, "iter_time": 1.4496558074951171, "loss": 0.21667839586734772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.34721063318527, "step_time": 1.1893032226562499} +{"epoch": 0, "iter": 13503, "iter_tflops": 39.993183936917106, "iter_time": 0.5158652420043945, "loss": 0.25841253995895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21802025435373, "step_time": 0.46657659912109384} +{"epoch": 0, "iter": 13504, "iter_tflops": 40.81553418952571, "iter_time": 0.5054716033935547, "loss": 0.4206947386264801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68775107186627, "step_time": 0.4616722259521484} +{"epoch": 0, "iter": 13505, "iter_tflops": 18.804541451404244, "iter_time": 1.097133560180664, "loss": 0.39999377727508545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14724543386645, "step_time": 1.0240155944824219} +{"epoch": 0, "iter": 13506, "iter_tflops": 27.37212777362728, "iter_time": 0.753726333618164, "loss": 0.42867326736450195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.625735261371275, "step_time": 0.613550703048706} +{"epoch": 0, "iter": 13507, "iter_tflops": 47.89462667924835, "iter_time": 0.4307600860595703, "loss": 0.4783152937889099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.85371027525135, "step_time": 0.39787111473083503} +{"epoch": 0, "iter": 13508, "iter_tflops": 46.00299242727147, "iter_time": 0.448472858428955, "loss": 0.39371803402900696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.61382838554743, "step_time": 0.41583353233337406} +{"epoch": 0, "iter": 13509, "iter_tflops": 41.852093608006385, "iter_time": 0.4929524841308594, "loss": 0.5293503999710083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97859040187016, "step_time": 0.44871087455749514} +{"epoch": 0, "iter": 13510, "iter_tflops": 34.83968088933265, "iter_time": 0.5921722869873047, "loss": 0.6380183100700378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.04311695178021, "step_time": 0.5423081798553466} +{"epoch": 0, "iter": 13511, "iter_tflops": 38.86436605557228, "iter_time": 0.5308485794067384, "loss": 0.626782238483429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.51098381962617, "step_time": 0.4853120689392089} +{"epoch": 0, "iter": 13512, "iter_tflops": 40.47859338438814, "iter_time": 0.5096791114807129, "loss": 0.595395565032959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05081102247063, "step_time": 0.46834764289855957} +{"epoch": 0, "iter": 13513, "iter_tflops": 33.6038022429594, "iter_time": 0.6139511642456055, "loss": 0.5667011737823486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.157467839159594, "step_time": 0.5552341079711913} +{"epoch": 0, "iter": 13514, "iter_tflops": 38.44769938304583, "iter_time": 0.5366015090942383, "loss": 0.5478312969207764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.200511355429896, "step_time": 0.4775659561157226} +{"epoch": 0, "iter": 13515, "iter_tflops": 38.06824682235857, "iter_time": 0.5419501876831054, "loss": 0.6008741855621338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.724413699138715, "step_time": 0.4944609565734864} +{"epoch": 0, "iter": 13516, "iter_tflops": 37.46737161086493, "iter_time": 0.5506416015625, "loss": 0.5197118520736694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50104503955297, "step_time": 0.5093965721130371} +{"epoch": 0, "iter": 13517, "iter_tflops": 16.125197082660268, "iter_time": 1.2794320220947264, "loss": 0.2006503939628601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.102005570357914, "step_time": 1.206355209350586} +{"epoch": 0, "iter": 13518, "iter_tflops": 18.454287573850834, "iter_time": 1.1179566497802733, "loss": 0.09301237761974335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.599420875866596, "step_time": 0.912903636932373} +{"epoch": 0, "iter": 13519, "iter_tflops": 50.94823895917341, "iter_time": 0.40494223022460935, "loss": 0.10877487808465958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49449819549513, "step_time": 0.371768268585205} +{"epoch": 0, "iter": 13520, "iter_tflops": 55.57892891243415, "iter_time": 0.3712035102844238, "loss": 0.1271773874759674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.54866123714145, "step_time": 0.3407357501983642} +{"epoch": 0, "iter": 13521, "iter_tflops": 33.34622655652562, "iter_time": 0.6186934967041016, "loss": 0.7384753227233887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.676264008803706, "step_time": 0.5782862663269043} +{"epoch": 0, "iter": 13522, "iter_tflops": 17.682760342386125, "iter_time": 1.1667348937988282, "loss": 0.6584545373916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.7357957791489, "step_time": 0.9491758995056151} +{"epoch": 0, "iter": 13523, "iter_tflops": 34.99560449892303, "iter_time": 0.5895338516235351, "loss": 0.5099056959152222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.00783862241921, "step_time": 0.5428115425109863} +{"epoch": 0, "iter": 13524, "iter_tflops": 38.77488488968025, "iter_time": 0.5320736236572265, "loss": 0.6251392364501953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.869779258903726, "step_time": 0.4927442626953126} +{"epoch": 0, "iter": 13525, "iter_tflops": 16.985002812106682, "iter_time": 1.214665298461914, "loss": 0.6581940650939941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.11665902802375, "step_time": 1.1387912902832031} +{"epoch": 0, "iter": 13526, "iter_tflops": 17.409178789252337, "iter_time": 1.1850698852539063, "loss": 0.7149503827095032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.666919574870118, "step_time": 0.9101851463317872} +{"epoch": 0, "iter": 13527, "iter_tflops": 44.337881635601526, "iter_time": 0.46531527328491207, "loss": 0.7577109932899475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86371713544013, "step_time": 0.4310382633209228} +{"epoch": 0, "iter": 13528, "iter_tflops": 42.281613191889655, "iter_time": 0.48794480514526367, "loss": 0.787541925907135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.482163305840444, "step_time": 0.4536084480285645} +{"epoch": 0, "iter": 13529, "iter_tflops": 35.728024700378114, "iter_time": 0.5774484786987305, "loss": 0.7408137917518616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42290489913084, "step_time": 0.536947780609131} +{"epoch": 0, "iter": 13530, "iter_tflops": 16.010598207891572, "iter_time": 1.2885897979736327, "loss": 0.750153124332428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.564542214933077, "step_time": 1.1113171157836914} +{"epoch": 0, "iter": 13531, "iter_tflops": 36.83524342596366, "iter_time": 0.5600911407470703, "loss": 0.6352160573005676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12475318751194, "step_time": 0.5141737174987793} +{"epoch": 0, "iter": 13532, "iter_tflops": 35.327572131515204, "iter_time": 0.5839940948486328, "loss": 0.6953598260879517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.323240069584706, "step_time": 0.5383441867828369} +{"epoch": 0, "iter": 13533, "iter_tflops": 19.33702888627835, "iter_time": 1.0669215850830078, "loss": 0.6685250401496887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.891497986565575, "step_time": 0.9875353851318359} +{"epoch": 0, "iter": 13534, "iter_tflops": 22.363764152430704, "iter_time": 0.9225233001708985, "loss": 0.5899102091789246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.939061196300283, "step_time": 0.7658430767059325} +{"epoch": 0, "iter": 13535, "iter_tflops": 34.32918810574603, "iter_time": 0.6009781951904296, "loss": 0.7214568853378296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.40517916190669, "step_time": 0.5515571365356444} +{"epoch": 0, "iter": 13536, "iter_tflops": 34.81993975810647, "iter_time": 0.5925080184936523, "loss": 0.7760943174362183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.90350533672317, "step_time": 0.5443056869506836} +{"epoch": 0, "iter": 13537, "iter_tflops": 23.264781350572726, "iter_time": 0.8867950744628906, "loss": 0.1953350007534027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.259590119493367, "step_time": 0.8167627983093262} +{"epoch": 0, "iter": 13538, "iter_tflops": 19.02849853255959, "iter_time": 1.0842207794189451, "loss": 0.22859399020671844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.09385412260146, "step_time": 0.8933586139678955} +{"epoch": 0, "iter": 13539, "iter_tflops": 47.20677030635859, "iter_time": 0.43703675079345705, "loss": 0.2120363414287567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.16601844371085, "step_time": 0.4032186622619629} +{"epoch": 0, "iter": 13540, "iter_tflops": 50.72318384399327, "iter_time": 0.4067389297485351, "loss": 0.24372176826000214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.86804969989853, "step_time": 0.376012882232666} +{"epoch": 0, "iter": 13541, "iter_tflops": 49.3985131949605, "iter_time": 0.41764604187011717, "loss": 0.03630688786506653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.598450469428684, "step_time": 0.3778695793151855} +{"epoch": 0, "iter": 13542, "iter_tflops": 37.70657274230218, "iter_time": 0.5471484680175782, "loss": 0.020692110061645508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38137899639497, "step_time": 0.4867961826324463} +{"epoch": 0, "iter": 13543, "iter_tflops": 46.55860685266827, "iter_time": 0.44312093734741215, "loss": 0.04100200906395912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.99881058594715, "step_time": 0.4045406799316406} +{"epoch": 0, "iter": 13544, "iter_tflops": 52.29967534641271, "iter_time": 0.39447842407226563, "loss": 0.018919821828603745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.266132806623034, "step_time": 0.36026692390441895} +{"epoch": 0, "iter": 13545, "iter_tflops": 26.350182920401195, "iter_time": 0.5332411270141602, "loss": 0.07994754612445831, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 28.40945629025518, "step_time": 0.4945888824462891} +{"epoch": 0, "iter": 13546, "iter_tflops": 7.347535371644969, "iter_time": 1.9123421020507811, "loss": 0.13430514931678772, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 8.333914402705574, "step_time": 1.6860025863647463} +{"epoch": 0, "iter": 13547, "iter_tflops": 8.261506057833895, "iter_time": 1.7007796325683593, "loss": 0.14929115772247314, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 9.646948437745868, "step_time": 1.4565228919982909} +{"epoch": 0, "iter": 13548, "iter_tflops": 27.924179064902457, "iter_time": 0.5031840400695801, "loss": 0.1375921219587326, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 30.90441196810564, "step_time": 0.45466004180908204} +{"epoch": 0, "iter": 13549, "iter_tflops": 24.518437709450026, "iter_time": 0.6196886444091796, "loss": 0.16817481815814972, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 26.23712514703708, "step_time": 0.5790953598022461} +{"epoch": 0, "iter": 13550, "iter_tflops": 24.11562364999029, "iter_time": 0.6300395812988281, "loss": 0.267761766910553, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 25.729577490270803, "step_time": 0.5905187301635743} +{"epoch": 0, "iter": 13551, "iter_tflops": 27.36341138756349, "iter_time": 0.5552596206665039, "loss": 0.28870540857315063, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.247312062170455, "step_time": 0.5194938049316407} +{"epoch": 0, "iter": 13552, "iter_tflops": 28.210551359979533, "iter_time": 0.5385856246948242, "loss": 0.16730332374572754, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.98925905242978, "step_time": 0.5066413078308105} +{"epoch": 0, "iter": 13553, "iter_tflops": 31.92661164539115, "iter_time": 0.6462036666870117, "loss": 0.3175519108772278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.07019203747083, "step_time": 0.6055467338562011} +{"epoch": 0, "iter": 13554, "iter_tflops": 16.63640557738693, "iter_time": 1.2401172485351564, "loss": 0.39960813522338867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.931538215170452, "step_time": 1.0350979080200196} +{"epoch": 0, "iter": 13555, "iter_tflops": 46.823534516284795, "iter_time": 0.44061375808715814, "loss": 0.4073202908039093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85398512747568, "step_time": 0.40569275856018067} +{"epoch": 0, "iter": 13556, "iter_tflops": 48.67652775136559, "iter_time": 0.4238406982421875, "loss": 0.39920690655708313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.805974156401966, "step_time": 0.3906962013244629} +{"epoch": 0, "iter": 13557, "iter_tflops": 29.31003340369058, "iter_time": 0.703891845703125, "loss": 0.18871556222438812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.04421215390858, "step_time": 0.6645713348388672} +{"epoch": 0, "iter": 13558, "iter_tflops": 10.832180406305966, "iter_time": 1.9046113281250001, "loss": 0.1400352418422699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.754331418248672, "step_time": 1.7551907272338867} +{"epoch": 0, "iter": 13559, "iter_tflops": 12.972103059774494, "iter_time": 1.5904201049804687, "loss": 0.15878058969974518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.299720476601076, "step_time": 1.3484621200561522} +{"epoch": 0, "iter": 13560, "iter_tflops": 15.754004565559503, "iter_time": 1.3095777282714844, "loss": 0.16636346280574799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.480496256656313, "step_time": 1.1802350006103515} +{"epoch": 0, "iter": 13561, "iter_tflops": 20.681628257979167, "iter_time": 0.697136848449707, "loss": 0.1661115437746048, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 22.02370438774002, "step_time": 0.6546548614501954} +{"epoch": 0, "iter": 13562, "iter_tflops": 10.212239659356085, "iter_time": 1.4118279266357423, "loss": 0.2557106912136078, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 11.974999485459506, "step_time": 1.2040021514892578} +{"epoch": 0, "iter": 13563, "iter_tflops": 22.14469343825245, "iter_time": 0.6510781097412109, "loss": 0.09818388521671295, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.898622019707027, "step_time": 0.6032952499389649} +{"epoch": 0, "iter": 13564, "iter_tflops": 21.37915880205368, "iter_time": 0.6743916015625, "loss": 0.1647290289402008, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.004351029222605, "step_time": 0.626747745513916} +{"epoch": 0, "iter": 13565, "iter_tflops": 13.317137652796792, "iter_time": 1.549213806152344, "loss": 0.17876550555229187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.187611837618583, "step_time": 1.4541625289916993} +{"epoch": 0, "iter": 13566, "iter_tflops": 25.86167264000475, "iter_time": 0.7977478408813476, "loss": 0.11323095858097076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.886503544796355, "step_time": 0.647016487121582} +{"epoch": 0, "iter": 13567, "iter_tflops": 47.673868393177365, "iter_time": 0.43275476074218755, "loss": 0.15041843056678772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.61962411324963, "step_time": 0.3996753921508789} +{"epoch": 0, "iter": 13568, "iter_tflops": 48.83437857984011, "iter_time": 0.42247068786621095, "loss": 0.1388014554977417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95779027711383, "step_time": 0.38957617759704594} +{"epoch": 0, "iter": 13569, "iter_tflops": 36.142936290500245, "iter_time": 0.5708195190429687, "loss": 0.2951198220252991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.74207215503254, "step_time": 0.5325242652893066} +{"epoch": 0, "iter": 13570, "iter_tflops": 13.590236834987397, "iter_time": 1.5180819702148436, "loss": 0.34007635712623596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.756645918768736, "step_time": 1.231218563079834} +{"epoch": 0, "iter": 13571, "iter_tflops": 35.89804201029286, "iter_time": 0.5747136154174805, "loss": 0.23537710309028625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.27990674157502, "step_time": 0.5252327518463135} +{"epoch": 0, "iter": 13572, "iter_tflops": 38.98563666240938, "iter_time": 0.5291972961425782, "loss": 0.39587438106536865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.92321970449368, "step_time": 0.4806511173248291} +{"epoch": 0, "iter": 13573, "iter_tflops": 32.17516886137576, "iter_time": 0.6412116622924805, "loss": 0.44504353404045105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.10149263922572, "step_time": 0.5877554473876954} +{"epoch": 0, "iter": 13574, "iter_tflops": 36.121313265046275, "iter_time": 0.5711612243652343, "loss": 0.5852537751197815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35344166580614, "step_time": 0.5242513141632079} +{"epoch": 0, "iter": 13575, "iter_tflops": 36.357053236409946, "iter_time": 0.5674578018188476, "loss": 0.439317524433136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.633845773282026, "step_time": 0.5205423069000245} +{"epoch": 0, "iter": 13576, "iter_tflops": 38.071326422510914, "iter_time": 0.5419063491821289, "loss": 0.651254415512085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59725658443173, "step_time": 0.49597245597839357} +{"epoch": 0, "iter": 13577, "iter_tflops": 30.850691183908918, "iter_time": 0.6687400741577149, "loss": 0.46265798807144165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.072810167382535, "step_time": 0.6055002040863037} +{"epoch": 0, "iter": 13578, "iter_tflops": 38.94393148323352, "iter_time": 0.5297640151977538, "loss": 0.46664637327194214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11302182397056, "step_time": 0.47853508377075193} +{"epoch": 0, "iter": 13579, "iter_tflops": 41.84596556857261, "iter_time": 0.49302467346191403, "loss": 0.42236533761024475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15619308755039, "step_time": 0.44698429679870605} +{"epoch": 0, "iter": 13580, "iter_tflops": 42.49478109870449, "iter_time": 0.4854971122741699, "loss": 0.42388978600502014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.41392501089718, "step_time": 0.44450223731994626} +{"epoch": 0, "iter": 13581, "iter_tflops": 21.02734421193171, "iter_time": 0.9811554565429688, "loss": 0.02182767540216446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.39486104341734, "step_time": 0.9212423095703125} +{"epoch": 0, "iter": 13582, "iter_tflops": 11.489605880109972, "iter_time": 1.7956310882568358, "loss": 0.02573472261428833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.461225985133371, "step_time": 1.4266489944458007} +{"epoch": 0, "iter": 13583, "iter_tflops": 14.957930791290696, "iter_time": 1.3792745666503905, "loss": 0.11016169935464859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.626657241251316, "step_time": 1.1076111640930177} +{"epoch": 0, "iter": 13584, "iter_tflops": 18.904486440836898, "iter_time": 1.0913331909179689, "loss": 0.027161534875631332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.468895710871596, "step_time": 0.8790824146270751} +{"epoch": 0, "iter": 13585, "iter_tflops": 12.549792165786117, "iter_time": 1.3445523071289063, "loss": 0.18751612305641174, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 13.440679787428365, "step_time": 1.2554314422607422} +{"epoch": 0, "iter": 13586, "iter_tflops": 13.823639295871292, "iter_time": 1.2206519317626954, "loss": 0.23296493291854858, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 18.0779488168623, "step_time": 0.9333941688537598} +{"epoch": 0, "iter": 13587, "iter_tflops": 30.57816327107253, "iter_time": 0.5518268661499023, "loss": 0.188004732131958, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.54064117151293, "step_time": 0.518547004699707} +{"epoch": 0, "iter": 13588, "iter_tflops": 27.83640076667239, "iter_time": 0.6061793746948243, "loss": 0.2001422643661499, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 29.424469326242207, "step_time": 0.5734632568359375} +{"epoch": 0, "iter": 13589, "iter_tflops": 37.70569800068229, "iter_time": 0.42140694046020505, "loss": 0.03530643880367279, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 41.74309348135913, "step_time": 0.3806484260559082} +{"epoch": 0, "iter": 13590, "iter_tflops": 30.2465092443864, "iter_time": 0.5253314590454102, "loss": 0.08059997111558914, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 33.30339099769312, "step_time": 0.47711186027526853} +{"epoch": 0, "iter": 13591, "iter_tflops": 38.707301939574386, "iter_time": 0.4105024642944336, "loss": 0.03771583363413811, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 42.74352893878939, "step_time": 0.37173914337158204} +{"epoch": 0, "iter": 13592, "iter_tflops": 33.376587342482765, "iter_time": 0.47606553268432616, "loss": 0.03147049993276596, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 36.990022753831525, "step_time": 0.42956023406982424} +{"epoch": 0, "iter": 13593, "iter_tflops": 19.00398141363387, "iter_time": 1.0856195373535156, "loss": 0.29687508940696716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.023082420045544, "step_time": 1.0303655090332031} +{"epoch": 0, "iter": 13594, "iter_tflops": 18.334192846637414, "iter_time": 1.1252796173095703, "loss": 0.35884684324264526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.800277466513226, "step_time": 0.9463683910369872} +{"epoch": 0, "iter": 13595, "iter_tflops": 50.37283515464369, "iter_time": 0.40956784439086913, "loss": 0.2546272575855255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.53278733221216, "step_time": 0.3783245735168457} +{"epoch": 0, "iter": 13596, "iter_tflops": 56.13497037080121, "iter_time": 0.3675265769958496, "loss": 0.2817961871623993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.85154166427178, "step_time": 0.33903978347778324} +{"epoch": 0, "iter": 13597, "iter_tflops": 23.28041692781284, "iter_time": 0.8861994857788086, "loss": 0.35819488763809204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.427641847879844, "step_time": 0.8445798263549803} +{"epoch": 0, "iter": 13598, "iter_tflops": 19.915104538895577, "iter_time": 1.0359520568847658, "loss": 0.39692068099975586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.159855925664804, "step_time": 0.8539410820007324} +{"epoch": 0, "iter": 13599, "iter_tflops": 46.99775549461454, "iter_time": 0.4389804000854492, "loss": 0.3463912308216095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.828762381254045, "step_time": 0.4058940753936767} +{"epoch": 0, "iter": 13600, "iter_tflops": 47.39628916829083, "iter_time": 0.43528921508789065, "loss": 0.2853700816631317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00007851951115, "step_time": 0.40453062248229976} +{"epoch": 0, "iter": 13601, "iter_tflops": 35.13765594991977, "iter_time": 0.5871505355834961, "loss": 0.23175762593746185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.713820399775884, "step_time": 0.5470433197021485} +{"epoch": 0, "iter": 13602, "iter_tflops": 12.554763056364223, "iter_time": 1.6432881622314455, "loss": 0.26062139868736267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.644407475165023, "step_time": 1.3187519912719727} +{"epoch": 0, "iter": 13603, "iter_tflops": 36.22347151193959, "iter_time": 0.569550422668457, "loss": 0.20430785417556763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.848211516535926, "step_time": 0.5177420196533203} +{"epoch": 0, "iter": 13604, "iter_tflops": 39.02192337097532, "iter_time": 0.5287051925659181, "loss": 0.2318829894065857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56527843836899, "step_time": 0.48469302368164063} +{"epoch": 0, "iter": 13605, "iter_tflops": 19.656455334384916, "iter_time": 1.0495836181640625, "loss": 0.6306648850440979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.898816385401478, "step_time": 0.987189567565918} +{"epoch": 0, "iter": 13606, "iter_tflops": 7.963961092918822, "iter_time": 2.5905567932128903, "loss": 0.5532839894294739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.403819964171724, "step_time": 2.1939056243896484} +{"epoch": 0, "iter": 13607, "iter_tflops": 17.619159308416616, "iter_time": 1.170946533203125, "loss": 0.4975000023841858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.519223811654236, "step_time": 1.0569628028869629} +{"epoch": 0, "iter": 13608, "iter_tflops": 49.0430232524392, "iter_time": 0.4206733627319336, "loss": 0.5805931687355042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.98904250929416, "step_time": 0.3893464107513428} +{"epoch": 0, "iter": 13609, "iter_tflops": 17.89852156964374, "iter_time": 0.8146552810668946, "loss": 0.2924647331237793, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 18.858514908682626, "step_time": 0.7731852264404298} +{"epoch": 0, "iter": 13610, "iter_tflops": 14.358437338108436, "iter_time": 1.0155091934204101, "loss": 0.3119055926799774, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 17.070106954610193, "step_time": 0.8541906127929687} +{"epoch": 0, "iter": 13611, "iter_tflops": 26.43639623208588, "iter_time": 0.5515549468994141, "loss": 0.34432387351989746, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.1619596579342, "step_time": 0.5177596054077149} +{"epoch": 0, "iter": 13612, "iter_tflops": 26.40951123690472, "iter_time": 0.5521164321899413, "loss": 0.2787095308303833, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 28.123411061189703, "step_time": 0.5184692955017091} +{"epoch": 0, "iter": 13613, "iter_tflops": 30.757091172778974, "iter_time": 0.6707751846313477, "loss": 0.21994447708129883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.660060319876436, "step_time": 0.631691837310791} +{"epoch": 0, "iter": 13614, "iter_tflops": 11.759117995805163, "iter_time": 1.7544762725830076, "loss": 0.25161975622177124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.461511435542379, "step_time": 1.426620834350586} +{"epoch": 0, "iter": 13615, "iter_tflops": 11.745360314440267, "iter_time": 1.7565313415527344, "loss": 0.2729349136352539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.325036443194092, "step_time": 1.4402122879028318} +{"epoch": 0, "iter": 13616, "iter_tflops": 24.709107434062588, "iter_time": 0.8349590759277343, "loss": 0.17544116079807281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.35714569061944, "step_time": 0.6796124286651611} +{"epoch": 0, "iter": 13617, "iter_tflops": 16.976804316673984, "iter_time": 0.8612891235351563, "loss": 0.3698665201663971, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 17.83491446800972, "step_time": 0.8198490066528321} +{"epoch": 0, "iter": 13618, "iter_tflops": 6.918226212823394, "iter_time": 2.1135384216308593, "loss": 0.2821776866912842, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 8.697364579387308, "step_time": 1.6811916732788088} +{"epoch": 0, "iter": 13619, "iter_tflops": 7.030204284278026, "iter_time": 2.0798736877441404, "loss": 0.3124207556247711, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 8.410816063757778, "step_time": 1.7384682769775393} +{"epoch": 0, "iter": 13620, "iter_tflops": 21.290405745101335, "iter_time": 0.6867852630615235, "loss": 0.15181215107440948, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 26.229348228416583, "step_time": 0.5574647445678711} +{"epoch": 0, "iter": 13621, "iter_tflops": 21.092699322925736, "iter_time": 0.7280886993408204, "loss": 0.15341706573963165, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 22.334029133470647, "step_time": 0.6876213836669923} +{"epoch": 0, "iter": 13622, "iter_tflops": 8.886871560890487, "iter_time": 1.728094741821289, "loss": 0.2527753412723541, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 11.118342010827844, "step_time": 1.38126314163208} +{"epoch": 0, "iter": 13623, "iter_tflops": 27.194736483374918, "iter_time": 0.5647179565429687, "loss": 0.3142409920692444, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.01735065875731, "step_time": 0.5292473526000976} +{"epoch": 0, "iter": 13624, "iter_tflops": 28.459605721240763, "iter_time": 0.5396194229125977, "loss": 0.24581538140773773, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.230090278562233, "step_time": 0.5080155525207519} +{"epoch": 0, "iter": 13625, "iter_tflops": 30.93063195466983, "iter_time": 0.667011703491211, "loss": 0.6173046827316284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.77483224532805, "step_time": 0.6294797592163086} +{"epoch": 0, "iter": 13626, "iter_tflops": 12.691996191730919, "iter_time": 1.625519989013672, "loss": 0.6653293371200562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.932325389267428, "step_time": 1.2949204216003418} +{"epoch": 0, "iter": 13627, "iter_tflops": 47.14803987227075, "iter_time": 0.43758115005493164, "loss": 0.47598886489868164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.27214290246496, "step_time": 0.40238406944274896} +{"epoch": 0, "iter": 13628, "iter_tflops": 48.320057001484265, "iter_time": 0.42696749114990235, "loss": 0.5913652181625366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.146677190168994, "step_time": 0.3956358222961425} +{"epoch": 0, "iter": 13629, "iter_tflops": 29.262848670070166, "iter_time": 0.7050268325805664, "loss": 0.36495399475097656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.034996608375717, "step_time": 0.6647686729431153} +{"epoch": 0, "iter": 13630, "iter_tflops": 13.27377994619779, "iter_time": 1.554274185180664, "loss": 0.31688156723976135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.45273833169047, "step_time": 1.1180505104064942} +{"epoch": 0, "iter": 13631, "iter_tflops": 33.60326607265024, "iter_time": 0.6139609603881836, "loss": 0.2788044810295105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.29969555432608, "step_time": 0.5683544502258301} +{"epoch": 0, "iter": 13632, "iter_tflops": 38.1804120870735, "iter_time": 0.5403580627441407, "loss": 0.34370264410972595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74241362934545, "step_time": 0.49424773788452153} +{"epoch": 0, "iter": 13633, "iter_tflops": 36.14639012471197, "iter_time": 0.5707649765014648, "loss": 0.12384628504514694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.66348285024065, "step_time": 0.5201533508300781} +{"epoch": 0, "iter": 13634, "iter_tflops": 44.58989247123147, "iter_time": 0.4626854286193847, "loss": 0.10021666437387466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.181962517648806, "step_time": 0.4111256809234619} +{"epoch": 0, "iter": 13635, "iter_tflops": 47.279440950649764, "iter_time": 0.43636500549316404, "loss": 0.15397700667381287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.10610564215236, "step_time": 0.4036913642883301} +{"epoch": 0, "iter": 13636, "iter_tflops": 47.17993207974262, "iter_time": 0.4372853584289551, "loss": 0.2239164113998413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19284227792954, "step_time": 0.4030073852539062} +{"epoch": 0, "iter": 13637, "iter_tflops": 34.679496240132124, "iter_time": 0.5949075317382813, "loss": 0.8061053156852722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.389093726504456, "step_time": 0.5517944259643555} +{"epoch": 0, "iter": 13638, "iter_tflops": 35.622191193177684, "iter_time": 0.579164077758789, "loss": 0.7063936591148376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.99458772120373, "step_time": 0.5290758209228515} +{"epoch": 0, "iter": 13639, "iter_tflops": 37.627475436281536, "iter_time": 0.5482986373901367, "loss": 0.7082080245018005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.71229398720419, "step_time": 0.5067534027099609} +{"epoch": 0, "iter": 13640, "iter_tflops": 38.57517112963949, "iter_time": 0.5348283081054687, "loss": 0.834438681602478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67945907154266, "step_time": 0.49499427223205567} +{"epoch": 0, "iter": 13641, "iter_tflops": 21.24115689068551, "iter_time": 0.9419956970214843, "loss": 0.019297944381833076, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 22.947032847033523, "step_time": 0.8719680023193358} +{"epoch": 0, "iter": 13642, "iter_tflops": 29.666570967296895, "iter_time": 0.6744654922485351, "loss": 0.015344088897109032, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 37.1852413868736, "step_time": 0.5380919322967529} +{"epoch": 0, "iter": 13643, "iter_tflops": 50.48484898336666, "iter_time": 0.39633828353881834, "loss": 0.025777144357562065, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 54.82087706637157, "step_time": 0.36499011802673337} +{"epoch": 0, "iter": 13644, "iter_tflops": 50.214837494490105, "iter_time": 0.39846944427490233, "loss": 0.022184288129210472, "lr": 3e-05, "seqlen": 7952.0, "step_tflops": 54.747060347734234, "step_time": 0.3654822425842285} +{"epoch": 0, "iter": 13645, "iter_tflops": 24.10114659879332, "iter_time": 0.4266118392944336, "loss": 0.021203022450208664, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 26.72332808877687, "step_time": 0.38475127220153815} +{"epoch": 0, "iter": 13646, "iter_tflops": 25.56800176274353, "iter_time": 0.40213680267333984, "loss": 0.045432914048433304, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 28.07711582053443, "step_time": 0.36619981002807617} +{"epoch": 0, "iter": 13647, "iter_tflops": 26.87577741958549, "iter_time": 0.3825688209533691, "loss": 0.0395047627389431, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 29.343388288159435, "step_time": 0.35039697456359864} +{"epoch": 0, "iter": 13648, "iter_tflops": 27.349332803747515, "iter_time": 0.37594461822509767, "loss": 0.03215133398771286, "lr": 3e-05, "seqlen": 4144.0, "step_tflops": 29.876606781363908, "step_time": 0.34414331436157225} +{"epoch": 0, "iter": 13649, "iter_tflops": 38.95489696136352, "iter_time": 0.529614891052246, "loss": 0.7260141372680664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.0676366140666, "step_time": 0.49042673110961915} +{"epoch": 0, "iter": 13650, "iter_tflops": 38.10605112623893, "iter_time": 0.5414125289916992, "loss": 0.6671104431152344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92115171787508, "step_time": 0.5041669807434082} +{"epoch": 0, "iter": 13651, "iter_tflops": 47.544335674706325, "iter_time": 0.43393378448486325, "loss": 0.9012764692306519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18824969542097, "step_time": 0.40304354286193844} +{"epoch": 0, "iter": 13652, "iter_tflops": 44.677447446037704, "iter_time": 0.46177869796752935, "loss": 0.6471463441848755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.95597588632695, "step_time": 0.4302090225219727} +{"epoch": 0, "iter": 13653, "iter_tflops": 39.7854297675007, "iter_time": 0.5185590209960937, "loss": 0.6819627285003662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11226470935709, "step_time": 0.47854348754882814} +{"epoch": 0, "iter": 13654, "iter_tflops": 39.89225604955265, "iter_time": 0.5171703872680664, "loss": 0.5408185124397278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01587074071982, "step_time": 0.47961585235595705} +{"epoch": 0, "iter": 13655, "iter_tflops": 43.70245916003801, "iter_time": 0.4720808372497559, "loss": 0.7104812860488892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.25994253415671, "step_time": 0.43654504013061524} +{"epoch": 0, "iter": 13656, "iter_tflops": 47.16137185289515, "iter_time": 0.4374574508666992, "loss": 0.6908249855041504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.835455387333276, "step_time": 0.4058406352996826} +{"epoch": 0, "iter": 13657, "iter_tflops": 31.497710336583086, "iter_time": 0.6550029602050781, "loss": 0.17181545495986938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.42619545421019, "step_time": 0.6172133331298829} +{"epoch": 0, "iter": 13658, "iter_tflops": 14.300925216418346, "iter_time": 1.4426404724121094, "loss": 0.16070054471492767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.035388157487535, "step_time": 1.1439228992462158} +{"epoch": 0, "iter": 13659, "iter_tflops": 40.540339660609796, "iter_time": 0.5089028282165527, "loss": 0.21562576293945312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.26134152618113, "step_time": 0.4661199321746826} +{"epoch": 0, "iter": 13660, "iter_tflops": 42.635474643678386, "iter_time": 0.48389501190185547, "loss": 0.10792133957147598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.547346835344335, "step_time": 0.44322813034057623} +{"epoch": 0, "iter": 13661, "iter_tflops": 37.58398535513643, "iter_time": 0.5489330978393555, "loss": 0.16037751734256744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57477590756735, "step_time": 0.4962406425476074} +{"epoch": 0, "iter": 13662, "iter_tflops": 43.69957594444603, "iter_time": 0.47211198425292966, "loss": 0.20247532427310944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.23933677982506, "step_time": 0.4276819477081299} +{"epoch": 0, "iter": 13663, "iter_tflops": 46.3485635001491, "iter_time": 0.44512908172607424, "loss": 0.17804425954818726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.73166829089647, "step_time": 0.4066709060668945} +{"epoch": 0, "iter": 13664, "iter_tflops": 48.588563300042104, "iter_time": 0.4246080169677734, "loss": 0.19785021245479584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99419188269898, "step_time": 0.3893085784912109} +{"epoch": 0, "iter": 13665, "iter_tflops": 28.417931235706504, "iter_time": 0.7259885787963867, "loss": 0.6280296444892883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.14009236394852, "step_time": 0.6845066452026367} +{"epoch": 0, "iter": 13666, "iter_tflops": 13.720005452375489, "iter_time": 1.503723419189453, "loss": 0.7411072254180908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.34064183296675, "step_time": 1.2625632286071777} +{"epoch": 0, "iter": 13667, "iter_tflops": 35.70414090469321, "iter_time": 0.5778347549438476, "loss": 0.5232908129692078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.848332043665636, "step_time": 0.5310676784515381} +{"epoch": 0, "iter": 13668, "iter_tflops": 37.64716425818675, "iter_time": 0.5480118865966797, "loss": 0.6867092251777649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88377087083654, "step_time": 0.5046279506683349} +{"epoch": 0, "iter": 13669, "iter_tflops": 22.8207284020059, "iter_time": 0.9040506134033203, "loss": 0.49854132533073425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.4052801963513, "step_time": 0.8453536834716797} +{"epoch": 0, "iter": 13670, "iter_tflops": 19.120949586244496, "iter_time": 1.078978500366211, "loss": 0.46669161319732666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.850108270931, "step_time": 0.8302214736938477} +{"epoch": 0, "iter": 13671, "iter_tflops": 39.922345092292574, "iter_time": 0.5167806015014649, "loss": 0.6125136017799377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7270704237895, "step_time": 0.47181513214111326} +{"epoch": 0, "iter": 13672, "iter_tflops": 41.5501035735601, "iter_time": 0.49653530883789065, "loss": 0.5975441932678223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25347653257277, "step_time": 0.45590074157714844} +{"epoch": 0, "iter": 13673, "iter_tflops": 16.216699971688378, "iter_time": 1.2722128143310547, "loss": 0.598829984664917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.318884138056323, "step_time": 1.1912484283447267} +{"epoch": 0, "iter": 13674, "iter_tflops": 39.79891261389298, "iter_time": 0.5183833465576172, "loss": 0.5637200474739075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.69187152171253, "step_time": 0.472195234298706} +{"epoch": 0, "iter": 13675, "iter_tflops": 46.556466228650585, "iter_time": 0.4431413116455078, "loss": 0.5045658349990845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32723838239911, "step_time": 0.4099389152526855} +{"epoch": 0, "iter": 13676, "iter_tflops": 44.052334695561996, "iter_time": 0.46833144378662106, "loss": 0.6713917851448059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.361399145998256, "step_time": 0.4356098823547363} +{"epoch": 0, "iter": 13677, "iter_tflops": 29.989401060763957, "iter_time": 0.6879461669921876, "loss": 0.1628231406211853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.951314731988216, "step_time": 0.6457040557861329} +{"epoch": 0, "iter": 13678, "iter_tflops": 15.606064551861193, "iter_time": 1.3219920654296875, "loss": 0.16228264570236206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.589139530847657, "step_time": 1.1098466110229492} +{"epoch": 0, "iter": 13679, "iter_tflops": 40.223742871865845, "iter_time": 0.5129083480834961, "loss": 0.1985904574394226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.09997150119493, "step_time": 0.46782555198669434} +{"epoch": 0, "iter": 13680, "iter_tflops": 43.611246847638945, "iter_time": 0.47306818771362297, "loss": 0.13705670833587646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.62828685023262, "step_time": 0.4331689186096192} +{"epoch": 0, "iter": 13681, "iter_tflops": 18.36418386262177, "iter_time": 1.12344189453125, "loss": 0.5592539310455322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.54543829852485, "step_time": 1.0555451965332032} +{"epoch": 0, "iter": 13682, "iter_tflops": 16.855333023956366, "iter_time": 1.2240098419189454, "loss": 0.5247697830200195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.210576522113136, "step_time": 1.0739445266723633} +{"epoch": 0, "iter": 13683, "iter_tflops": 46.32346803112558, "iter_time": 0.4453702278137207, "loss": 0.5985891222953796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22286926401471, "step_time": 0.4107908172607421} +{"epoch": 0, "iter": 13684, "iter_tflops": 47.657684517409756, "iter_time": 0.4329017181396485, "loss": 0.5207533836364746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40139076343737, "step_time": 0.40137228202819825} +{"epoch": 0, "iter": 13685, "iter_tflops": 44.20289824394392, "iter_time": 0.3966173133850098, "loss": 0.007857823744416237, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 49.026427970816975, "step_time": 0.3575955963134766} +{"epoch": 0, "iter": 13686, "iter_tflops": 44.976441735764986, "iter_time": 0.38979594802856443, "loss": 0.0037735418882220984, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 49.64331117888895, "step_time": 0.35315200233459476} +{"epoch": 0, "iter": 13687, "iter_tflops": 49.042524787688855, "iter_time": 0.3574782257080078, "loss": 0.005202935542911291, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 53.997921191400785, "step_time": 0.3246724014282226} +{"epoch": 0, "iter": 13688, "iter_tflops": 46.209890235010775, "iter_time": 0.37939139556884766, "loss": 0.004886687267571688, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 50.63801939505705, "step_time": 0.34621485900878907} +{"epoch": 0, "iter": 13689, "iter_tflops": 19.623925535648358, "iter_time": 1.051323471069336, "loss": 0.3321886956691742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.528256143483812, "step_time": 1.005009552001953} +{"epoch": 0, "iter": 13690, "iter_tflops": 23.810044753579575, "iter_time": 0.8664869689941407, "loss": 0.24690932035446167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.348731221564016, "step_time": 0.7029637279510498} +{"epoch": 0, "iter": 13691, "iter_tflops": 52.86811074021941, "iter_time": 0.3902370109558106, "loss": 0.3105592131614685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.18361807817425, "step_time": 0.36078678131103514} +{"epoch": 0, "iter": 13692, "iter_tflops": 43.0614973910085, "iter_time": 0.479107666015625, "loss": 0.2709413468837738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66902150002019, "step_time": 0.44207255363464354} +{"epoch": 0, "iter": 13693, "iter_tflops": 32.27361300813795, "iter_time": 0.6392557754516601, "loss": 0.0023105081636458635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.5095001836827, "step_time": 0.5978380851745605} +{"epoch": 0, "iter": 13694, "iter_tflops": 13.545317375369258, "iter_time": 1.5231162872314452, "loss": 0.012220356613397598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.220859802875495, "step_time": 1.1980292358398437} +{"epoch": 0, "iter": 13695, "iter_tflops": 12.35511288196679, "iter_time": 1.669842575073242, "loss": 0.006962703075259924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.870349318550113, "step_time": 1.3873980407714845} +{"epoch": 0, "iter": 13696, "iter_tflops": 28.581526976150172, "iter_time": 0.7218331451416016, "loss": 0.002668625907972455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.368905059203804, "step_time": 0.6373738460540772} +{"epoch": 0, "iter": 13697, "iter_tflops": 13.31959595624405, "iter_time": 1.223707015991211, "loss": 0.32739415764808655, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 14.211530364367825, "step_time": 1.146905548095703} +{"epoch": 0, "iter": 13698, "iter_tflops": 9.550815226167671, "iter_time": 1.7065855255126954, "loss": 0.263202428817749, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 12.592971267415606, "step_time": 1.2943159065246583} +{"epoch": 0, "iter": 13699, "iter_tflops": 23.317644135789223, "iter_time": 0.6990107116699219, "loss": 0.254469096660614, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 25.020308397525383, "step_time": 0.6514421310424805} +{"epoch": 0, "iter": 13700, "iter_tflops": 24.324529022705544, "iter_time": 0.6700759963989258, "loss": 0.1449977159500122, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.216298146412072, "step_time": 0.6217232856750489} +{"epoch": 0, "iter": 13701, "iter_tflops": 28.13294981441401, "iter_time": 0.7333427047729492, "loss": 0.6952459812164307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.12241389348011, "step_time": 0.6629014568328857} +{"epoch": 0, "iter": 13702, "iter_tflops": 34.48223887079125, "iter_time": 0.5983107299804687, "loss": 0.748276948928833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.59702234266028, "step_time": 0.5487427520751953} +{"epoch": 0, "iter": 13703, "iter_tflops": 37.23789894127596, "iter_time": 0.5540348434448241, "loss": 0.8201345205307007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58551874144121, "step_time": 0.5083363265991211} +{"epoch": 0, "iter": 13704, "iter_tflops": 37.764837278946686, "iter_time": 0.5463043136596679, "loss": 0.5776009559631348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.57601244844617, "step_time": 0.5084554214477539} +{"epoch": 0, "iter": 13705, "iter_tflops": 16.143724208117668, "iter_time": 1.2779636993408203, "loss": 0.3595908284187317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.05596754804154, "step_time": 1.209611442565918} +{"epoch": 0, "iter": 13706, "iter_tflops": 24.802406981519166, "iter_time": 0.8318181991577149, "loss": 0.36593958735466003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.40045203359788, "step_time": 0.5997332096099854} +{"epoch": 0, "iter": 13707, "iter_tflops": 51.44554572281494, "iter_time": 0.40102779006958, "loss": 0.39376091957092285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.733120764933325, "step_time": 0.37017653465270994} +{"epoch": 0, "iter": 13708, "iter_tflops": 51.351728109497856, "iter_time": 0.40176045227050783, "loss": 0.37679633498191833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.0033308545084, "step_time": 0.36839047241210937} +{"epoch": 0, "iter": 13709, "iter_tflops": 43.58922727680517, "iter_time": 0.4733071632385254, "loss": 0.2952774167060852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76375969273482, "step_time": 0.43194031715393066} +{"epoch": 0, "iter": 13710, "iter_tflops": 46.21280023482374, "iter_time": 0.4464367752075195, "loss": 0.3738248646259308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60135736933772, "step_time": 0.407718183517456} +{"epoch": 0, "iter": 13711, "iter_tflops": 47.964687868006855, "iter_time": 0.43013088226318363, "loss": 0.3908670246601105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.08508345586911, "step_time": 0.3961036853790283} +{"epoch": 0, "iter": 13712, "iter_tflops": 49.04109944421043, "iter_time": 0.4206898651123047, "loss": 0.30274876952171326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05537548732388, "step_time": 0.38885962677001956} +{"epoch": 0, "iter": 13713, "iter_tflops": 30.778095019214987, "iter_time": 0.6703174285888671, "loss": 0.6550102233886719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.648591510694374, "step_time": 0.6319137382507324} +{"epoch": 0, "iter": 13714, "iter_tflops": 25.402133924211373, "iter_time": 0.8121795425415038, "loss": 0.5117096900939941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.215677482821736, "step_time": 0.7311925621032714} +{"epoch": 0, "iter": 13715, "iter_tflops": 38.14527736367497, "iter_time": 0.5408557739257812, "loss": 0.84113609790802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80449599771096, "step_time": 0.49351374816894533} +{"epoch": 0, "iter": 13716, "iter_tflops": 35.416294153535944, "iter_time": 0.582531120300293, "loss": 0.6126522421836853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49392226296981, "step_time": 0.5359571666717529} +{"epoch": 0, "iter": 13717, "iter_tflops": 35.3870968571048, "iter_time": 0.5830117568969726, "loss": 0.10931960493326187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.46555217313794, "step_time": 0.5227620639801025} +{"epoch": 0, "iter": 13718, "iter_tflops": 48.21546326910231, "iter_time": 0.4278937110900879, "loss": 0.09774595499038696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.884383169726895, "step_time": 0.39011693572998046} +{"epoch": 0, "iter": 13719, "iter_tflops": 50.369482685972656, "iter_time": 0.4095951042175293, "loss": 0.04907617345452309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.13535848216037, "step_time": 0.37418988609313963} +{"epoch": 0, "iter": 13720, "iter_tflops": 52.21171179300282, "iter_time": 0.3951430206298828, "loss": 0.045710328966379166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.66994777505592, "step_time": 0.3640570411682129} +{"epoch": 0, "iter": 13721, "iter_tflops": 24.218110049192347, "iter_time": 0.8518870162963867, "loss": 0.2821253538131714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.568846149202646, "step_time": 0.8068840255737305} +{"epoch": 0, "iter": 13722, "iter_tflops": 14.716099660947657, "iter_time": 1.4019403228759766, "loss": 0.366451621055603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.363754360467162, "step_time": 1.1881700859069824} +{"epoch": 0, "iter": 13723, "iter_tflops": 47.417846275271366, "iter_time": 0.43509132385253907, "loss": 0.3362678587436676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45661241317441, "step_time": 0.40094154167175294} +{"epoch": 0, "iter": 13724, "iter_tflops": 49.97571521556544, "iter_time": 0.4128223762512207, "loss": 0.2845365107059479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99684092212357, "step_time": 0.3820796394348145} +{"epoch": 0, "iter": 13725, "iter_tflops": 32.2518935045528, "iter_time": 0.6396862716674805, "loss": 0.03947722539305687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.280081910410104, "step_time": 0.6018390960693358} +{"epoch": 0, "iter": 13726, "iter_tflops": 20.865776182632693, "iter_time": 0.9887527465820314, "loss": 0.017584191635251045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.02750702497638, "step_time": 0.858644781112671} +{"epoch": 0, "iter": 13727, "iter_tflops": 50.80501712866717, "iter_time": 0.4060837821960449, "loss": 0.04359985888004303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.54884789866964, "step_time": 0.3714045257568359} +{"epoch": 0, "iter": 13728, "iter_tflops": 53.28626279103835, "iter_time": 0.38717471313476565, "loss": 0.0287400521337986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.12587455672221, "step_time": 0.35493820381164554} +{"epoch": 0, "iter": 13729, "iter_tflops": 33.39410219879596, "iter_time": 0.6178065032958985, "loss": 0.515626847743988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.6568512248965, "step_time": 0.5786011047363281} +{"epoch": 0, "iter": 13730, "iter_tflops": 34.35274964807452, "iter_time": 0.6005660018920898, "loss": 0.7497646808624268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.27944160771276, "step_time": 0.5534174499511718} +{"epoch": 0, "iter": 13731, "iter_tflops": 38.42897266158745, "iter_time": 0.5368629989624023, "loss": 0.7573739886283875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78531156429248, "step_time": 0.49374032974243165} +{"epoch": 0, "iter": 13732, "iter_tflops": 37.059798884294096, "iter_time": 0.5566973953247071, "loss": 0.7472922801971436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22861048665149, "step_time": 0.5128462867736817} +{"epoch": 0, "iter": 13733, "iter_tflops": 15.591702733595524, "iter_time": 1.3232097778320313, "loss": 0.42252641916275024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.69886137530356, "step_time": 1.2354790573120118} +{"epoch": 0, "iter": 13734, "iter_tflops": 17.352797035895204, "iter_time": 1.1889203491210938, "loss": 0.46704941987991333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.62420123442742, "step_time": 0.8051409416198732} +{"epoch": 0, "iter": 13735, "iter_tflops": 49.41401575106497, "iter_time": 0.41751501464843743, "loss": 0.5631733536720276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.69652842801167, "step_time": 0.3842165241241455} +{"epoch": 0, "iter": 13736, "iter_tflops": 43.24563580646211, "iter_time": 0.4770676422119141, "loss": 0.6292468309402466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.57497384002184, "step_time": 0.44296521949768064} +{"epoch": 0, "iter": 13737, "iter_tflops": 22.573859760258937, "iter_time": 0.9139373474121094, "loss": 0.6720234155654907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.66581752349894, "step_time": 0.8717676239013672} +{"epoch": 0, "iter": 13738, "iter_tflops": 14.090620973083675, "iter_time": 1.464172058105469, "loss": 0.6528457403182983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.936578313208614, "step_time": 1.150224594116211} +{"epoch": 0, "iter": 13739, "iter_tflops": 35.83998445083392, "iter_time": 0.5756445999145507, "loss": 0.5435438752174377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.11362571292254, "step_time": 0.5274656372070312} +{"epoch": 0, "iter": 13740, "iter_tflops": 38.76910954490353, "iter_time": 0.5321528854370118, "loss": 0.6084351539611816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84827292550089, "step_time": 0.49299748992919923} +{"epoch": 0, "iter": 13741, "iter_tflops": 18.549369463822398, "iter_time": 1.1122261352539062, "loss": 0.4516337811946869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.675199193783232, "step_time": 1.0485837173461914} +{"epoch": 0, "iter": 13742, "iter_tflops": 24.103318163290858, "iter_time": 0.855944122314453, "loss": 0.4968535304069519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.098189606927868, "step_time": 0.6634178314208985} +{"epoch": 0, "iter": 13743, "iter_tflops": 48.36807073246373, "iter_time": 0.42654365158081053, "loss": 0.43157681822776794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.485021402313215, "step_time": 0.39308535957336427} +{"epoch": 0, "iter": 13744, "iter_tflops": 49.718165447078995, "iter_time": 0.41496087646484375, "loss": 0.5516518950462341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.004471165246315, "step_time": 0.3820256557464599} +{"epoch": 0, "iter": 13745, "iter_tflops": 25.674082065022933, "iter_time": 0.8035766754150391, "loss": 0.044847309589385986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.072844921634026, "step_time": 0.7620585708618164} +{"epoch": 0, "iter": 13746, "iter_tflops": 17.036644380727072, "iter_time": 1.2109833984375, "loss": 0.02166537567973137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.68938199876144, "step_time": 0.9971826858520509} +{"epoch": 0, "iter": 13747, "iter_tflops": 46.3513520192756, "iter_time": 0.4451023025512696, "loss": 0.05171338841319084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62765151150539, "step_time": 0.4075064296722412} +{"epoch": 0, "iter": 13748, "iter_tflops": 53.34801728728637, "iter_time": 0.38672652816772457, "loss": 0.0446685329079628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.99857958063837, "step_time": 0.3557172203063965} +{"epoch": 0, "iter": 13749, "iter_tflops": 30.77038839978001, "iter_time": 0.6704853134155274, "loss": 0.6201988458633423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.70609377719561, "step_time": 0.6308027381896972} +{"epoch": 0, "iter": 13750, "iter_tflops": 11.223932684535656, "iter_time": 1.8381341094970702, "loss": 0.48565006256103516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.276343586619973, "step_time": 1.4451244735717774} +{"epoch": 0, "iter": 13751, "iter_tflops": 15.28362631181854, "iter_time": 1.3498820953369144, "loss": 0.6919949650764465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.541195729302878, "step_time": 1.1761509208679202} +{"epoch": 0, "iter": 13752, "iter_tflops": 17.905683273100284, "iter_time": 1.152209228515625, "loss": 0.6676115393638611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.984418088668853, "step_time": 0.9384416465759278} +{"epoch": 0, "iter": 13753, "iter_tflops": 20.237325797756018, "iter_time": 0.7770611114501952, "loss": 0.1363372653722763, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.368704717499867, "step_time": 0.7359191436767579} +{"epoch": 0, "iter": 13754, "iter_tflops": 9.25791343711492, "iter_time": 1.6986158905029296, "loss": 0.2862756550312042, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.506439924663086, "step_time": 1.366681526184082} +{"epoch": 0, "iter": 13755, "iter_tflops": 24.313509992507992, "iter_time": 0.6467860412597657, "loss": 0.13469380140304565, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.157254546126406, "step_time": 0.6011960792541504} +{"epoch": 0, "iter": 13756, "iter_tflops": 24.366466012180034, "iter_time": 0.64538037109375, "loss": 0.23401114344596863, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.184898231965477, "step_time": 0.6005613899230957} +{"epoch": 0, "iter": 13757, "iter_tflops": 20.624213369995058, "iter_time": 1.000333595275879, "loss": 0.6821818947792053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.259168294992495, "step_time": 0.9268582382202148} +{"epoch": 0, "iter": 13758, "iter_tflops": 17.825344188657827, "iter_time": 1.1574022521972656, "loss": 0.5673604011535645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.40451475989641, "step_time": 0.8815005874633789} +{"epoch": 0, "iter": 13759, "iter_tflops": 35.4648785656452, "iter_time": 0.5817330932617187, "loss": 0.6463220715522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.74299661586883, "step_time": 0.5325115585327148} +{"epoch": 0, "iter": 13760, "iter_tflops": 38.99596430685029, "iter_time": 0.529057144165039, "loss": 0.6139066219329834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71828587205102, "step_time": 0.4829569606781007} +{"epoch": 0, "iter": 13761, "iter_tflops": 17.036045479953437, "iter_time": 1.2110259704589845, "loss": 0.5599181652069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.011697061897078, "step_time": 1.145427520751953} +{"epoch": 0, "iter": 13762, "iter_tflops": 13.490849827074296, "iter_time": 1.529265670776367, "loss": 0.5878090262413025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.6038332783198, "step_time": 1.108970027923584} +{"epoch": 0, "iter": 13763, "iter_tflops": 45.74423681284021, "iter_time": 0.45100967788696283, "loss": 0.4411240518093109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03217878929312, "step_time": 0.4207664031982422} +{"epoch": 0, "iter": 13764, "iter_tflops": 44.23262447588533, "iter_time": 0.466422550201416, "loss": 0.49096837639808655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.70713687338674, "step_time": 0.4324529800415039} +{"epoch": 0, "iter": 13765, "iter_tflops": 28.373407092324804, "iter_time": 0.727127815246582, "loss": 0.5368804335594177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.027429497191164, "step_time": 0.6870749130249024} +{"epoch": 0, "iter": 13766, "iter_tflops": 11.96975827067113, "iter_time": 1.7236015167236327, "loss": 0.5939480662345886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.513317950128481, "step_time": 1.3298956146240233} +{"epoch": 0, "iter": 13767, "iter_tflops": 37.224166407059336, "iter_time": 0.5542392349243165, "loss": 0.5689175128936768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.499274156936224, "step_time": 0.5094188461303711} +{"epoch": 0, "iter": 13768, "iter_tflops": 40.0728899773506, "iter_time": 0.5148391723632812, "loss": 0.8235975503921509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50578411071266, "step_time": 0.4742149562835693} +{"epoch": 0, "iter": 13769, "iter_tflops": 22.11302123389138, "iter_time": 0.9329839324951172, "loss": 0.6596932411193848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.836155410049205, "step_time": 0.8655377998352051} +{"epoch": 0, "iter": 13770, "iter_tflops": 45.05102601541737, "iter_time": 0.4579494705200195, "loss": 0.8375099301338196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17675037428868, "step_time": 0.4195294189453125} +{"epoch": 0, "iter": 13771, "iter_tflops": 46.61534393052935, "iter_time": 0.44258160018920895, "loss": 0.7071611881256104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.543731697646656, "step_time": 0.40818302917480476} +{"epoch": 0, "iter": 13772, "iter_tflops": 50.002569573740736, "iter_time": 0.41260066604614254, "loss": 0.7875816226005554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.02792184681565, "step_time": 0.3818598384857178} +{"epoch": 0, "iter": 13773, "iter_tflops": 30.144968932846282, "iter_time": 0.6843959121704102, "loss": 0.5911064147949219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.24240766782949, "step_time": 0.6398744697570801} +{"epoch": 0, "iter": 13774, "iter_tflops": 11.427710548859219, "iter_time": 1.8053566741943359, "loss": 0.7596718072891235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.843310672649615, "step_time": 1.4903294448852538} +{"epoch": 0, "iter": 13775, "iter_tflops": 16.30529988807042, "iter_time": 1.265299850463867, "loss": 0.6120826005935669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.321006778358417, "step_time": 1.1260895080566407} +{"epoch": 0, "iter": 13776, "iter_tflops": 39.13371899629401, "iter_time": 0.5271948089599608, "loss": 0.6598724722862244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56025046876929, "step_time": 0.48475028419494626} +{"epoch": 0, "iter": 13777, "iter_tflops": 17.577802340332738, "iter_time": 0.8225555038452147, "loss": 0.17171365022659302, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 18.96756537035453, "step_time": 0.7622864494323731} +{"epoch": 0, "iter": 13778, "iter_tflops": 25.310547199795284, "iter_time": 0.5712526855468749, "loss": 0.1599356234073639, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 26.989121728655917, "step_time": 0.5357239189147949} +{"epoch": 0, "iter": 13779, "iter_tflops": 26.68933632016434, "iter_time": 0.5417413864135742, "loss": 0.21885789930820465, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 28.411898676666283, "step_time": 0.5088965797424316} +{"epoch": 0, "iter": 13780, "iter_tflops": 25.71232406942369, "iter_time": 0.5623263778686524, "loss": 0.3327775299549103, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 27.379023551734168, "step_time": 0.5280947303771973} +{"epoch": 0, "iter": 13781, "iter_tflops": 22.497532559536804, "iter_time": 0.9170380554199219, "loss": 0.23893891274929047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.761172618880043, "step_time": 0.8682691650390625} +{"epoch": 0, "iter": 13782, "iter_tflops": 27.931338269628046, "iter_time": 0.7386360549926757, "loss": 0.23210351169109344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.15177872839724, "step_time": 0.5706799011230469} +{"epoch": 0, "iter": 13783, "iter_tflops": 47.95344037740113, "iter_time": 0.4302317695617677, "loss": 0.30066296458244324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99716227452095, "step_time": 0.3967734508514404} +{"epoch": 0, "iter": 13784, "iter_tflops": 51.260190015180505, "iter_time": 0.4024778976440429, "loss": 0.31422656774520874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66073795851751, "step_time": 0.37065792274475096} +{"epoch": 0, "iter": 13785, "iter_tflops": 32.78534052381639, "iter_time": 0.6292779998779297, "loss": 0.6118016839027405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93579040383198, "step_time": 0.5905432014465332} +{"epoch": 0, "iter": 13786, "iter_tflops": 13.413447367275827, "iter_time": 1.5380903167724609, "loss": 0.5966717004776001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.955324789024282, "step_time": 1.2930538101196287} +{"epoch": 0, "iter": 13787, "iter_tflops": 42.50061603513544, "iter_time": 0.48543045806884766, "loss": 0.37409499287605286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.01790266301828, "step_time": 0.4483275489807129} +{"epoch": 0, "iter": 13788, "iter_tflops": 46.00615904591199, "iter_time": 0.44844198989868167, "loss": 0.4881732761859894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59020431173893, "step_time": 0.41603162956237794} +{"epoch": 0, "iter": 13789, "iter_tflops": 42.20686411064622, "iter_time": 0.48880896377563476, "loss": 0.28677529096603394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90971092418746, "step_time": 0.44938408660888673} +{"epoch": 0, "iter": 13790, "iter_tflops": 8.428172517264974, "iter_time": 2.447872711181641, "loss": 0.20720602571964264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.251328756430135, "step_time": 2.0125287170410155} +{"epoch": 0, "iter": 13791, "iter_tflops": 11.94719314987437, "iter_time": 1.7268569488525392, "loss": 0.183849036693573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.780023871988906, "step_time": 1.4971740036010743} +{"epoch": 0, "iter": 13792, "iter_tflops": 19.53249216045864, "iter_time": 1.0562448120117187, "loss": 0.15313997864723206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.723438782810486, "step_time": 0.8020348167419434} +{"epoch": 0, "iter": 13793, "iter_tflops": 20.718172376924638, "iter_time": 0.6998453063964845, "loss": 0.176582932472229, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.142205753748964, "step_time": 0.6548361015319826} +{"epoch": 0, "iter": 13794, "iter_tflops": 7.652757269747645, "iter_time": 1.8946786346435547, "loss": 0.14898620545864105, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 9.268334298851235, "step_time": 1.564414405822754} +{"epoch": 0, "iter": 13795, "iter_tflops": 9.428038574564935, "iter_time": 1.5379143371582034, "loss": 0.10703293979167938, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 11.147187186199416, "step_time": 1.3007331314086914} +{"epoch": 0, "iter": 13796, "iter_tflops": 12.226972987799668, "iter_time": 1.185863067626953, "loss": 0.32008183002471924, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 14.12928933480328, "step_time": 1.0262027587890625} +{"epoch": 0, "iter": 13797, "iter_tflops": 19.5864865400823, "iter_time": 0.7486158065795899, "loss": 0.17130735516548157, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 21.349006216723502, "step_time": 0.6868119888305664} +{"epoch": 0, "iter": 13798, "iter_tflops": 25.82249312760785, "iter_time": 0.5678287277221681, "loss": 0.17913419008255005, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.545552067132373, "step_time": 0.5323092956542969} +{"epoch": 0, "iter": 13799, "iter_tflops": 27.179551519448356, "iter_time": 0.5394773864746093, "loss": 0.14676499366760254, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 28.919098107453916, "step_time": 0.5070266494750976} +{"epoch": 0, "iter": 13800, "iter_tflops": 25.792855053525297, "iter_time": 0.5684812088012695, "loss": 0.238161101937294, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.46076753022336, "step_time": 0.5339527893066407} +{"epoch": 0, "iter": 13801, "iter_tflops": 43.09614650723275, "iter_time": 0.4787224655151367, "loss": 0.10137823969125748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26674501191243, "step_time": 0.436482213973999} +{"epoch": 0, "iter": 13802, "iter_tflops": 38.60860261067637, "iter_time": 0.5343651962280274, "loss": 0.15505492687225342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73401608393422, "step_time": 0.4827791862487793} +{"epoch": 0, "iter": 13803, "iter_tflops": 46.05330441571704, "iter_time": 0.44798291397094725, "loss": 0.08165805041790009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.40680698689236, "step_time": 0.4092918148040771} +{"epoch": 0, "iter": 13804, "iter_tflops": 42.59315050758118, "iter_time": 0.4843758506774902, "loss": 0.09024854004383087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.619456982633515, "step_time": 0.44254255294799805} +{"epoch": 0, "iter": 13805, "iter_tflops": 25.123095605853987, "iter_time": 0.8212002944946288, "loss": 0.5516640543937683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.154173247191732, "step_time": 0.7597761611938476} +{"epoch": 0, "iter": 13806, "iter_tflops": 16.722050396372172, "iter_time": 1.2337657775878907, "loss": 0.6827242970466614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.028997449397863, "step_time": 0.9365425529479979} +{"epoch": 0, "iter": 13807, "iter_tflops": 41.638853579044344, "iter_time": 0.49547698211669916, "loss": 0.5072550773620605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.764066055660805, "step_time": 0.4608851547241211} +{"epoch": 0, "iter": 13808, "iter_tflops": 46.50854634788985, "iter_time": 0.443597900390625, "loss": 0.6640337109565735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.07833148053631, "step_time": 0.4119764556884766} +{"epoch": 0, "iter": 13809, "iter_tflops": 28.812646071879794, "iter_time": 0.7160429992675781, "loss": 0.02201257087290287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.524325421930808, "step_time": 0.6758902359008789} +{"epoch": 0, "iter": 13810, "iter_tflops": 16.773570120075064, "iter_time": 1.2299762878417968, "loss": 0.03189418092370033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.28744121135285, "step_time": 0.9256824645996093} +{"epoch": 0, "iter": 13811, "iter_tflops": 53.70531712333458, "iter_time": 0.3841536483764648, "loss": 0.03424884006381035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.78054777063771, "step_time": 0.350985050201416} +{"epoch": 0, "iter": 13812, "iter_tflops": 60.25300092996955, "iter_time": 0.34240773391723633, "loss": 0.052295245230197906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.94149114069825, "step_time": 0.31286968421936034} +{"epoch": 0, "iter": 13813, "iter_tflops": 31.396994984201708, "iter_time": 0.6571040802001953, "loss": 0.1702067106962204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.31091800939693, "step_time": 0.619349292755127} +{"epoch": 0, "iter": 13814, "iter_tflops": 24.542250035961235, "iter_time": 0.8406357803344725, "loss": 0.12537018954753876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.034571155531708, "step_time": 0.6647777862548828} +{"epoch": 0, "iter": 13815, "iter_tflops": 46.081895069461744, "iter_time": 0.4477049713134766, "loss": 0.25079673528671265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.706329475767454, "step_time": 0.4068741264343262} +{"epoch": 0, "iter": 13816, "iter_tflops": 45.528154843778324, "iter_time": 0.4531502227783203, "loss": 0.1022883951663971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72892455331064, "step_time": 0.41487109756469726} +{"epoch": 0, "iter": 13817, "iter_tflops": 31.074714761851464, "iter_time": 0.6639189987182617, "loss": 0.08876393735408783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.71352987635572, "step_time": 0.5943242759704589} +{"epoch": 0, "iter": 13818, "iter_tflops": 36.60322275389187, "iter_time": 0.5636414489746094, "loss": 0.07805246859788895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.55604563584312, "step_time": 0.49646430969238275} +{"epoch": 0, "iter": 13819, "iter_tflops": 40.39058070997373, "iter_time": 0.510789722442627, "loss": 0.1065998300909996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.438000978385446, "step_time": 0.46426691246032714} +{"epoch": 0, "iter": 13820, "iter_tflops": 40.39122835499774, "iter_time": 0.5107815322875976, "loss": 0.11385879665613174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.513433285338444, "step_time": 0.46348016738891606} +{"epoch": 0, "iter": 13821, "iter_tflops": 18.307716459263577, "iter_time": 1.126906982421875, "loss": 0.2935907244682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.632611386381388, "step_time": 1.0508583450317384} +{"epoch": 0, "iter": 13822, "iter_tflops": 20.06171529975834, "iter_time": 1.0283813323974609, "loss": 0.30580994486808777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.96418316850294, "step_time": 0.8609136962890627} +{"epoch": 0, "iter": 13823, "iter_tflops": 45.33600505938896, "iter_time": 0.4550708312988281, "loss": 0.35870498418807983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.02291810770359, "step_time": 0.4208458881378174} +{"epoch": 0, "iter": 13824, "iter_tflops": 49.164119719489264, "iter_time": 0.41963719940185545, "loss": 0.36540043354034424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99646119575765, "step_time": 0.38929190826416016} +{"epoch": 0, "iter": 13825, "iter_tflops": 32.67837713670701, "iter_time": 0.4737100143432617, "loss": 0.11620848625898361, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 35.67972386845332, "step_time": 0.43386194801330563} +{"epoch": 0, "iter": 13826, "iter_tflops": 28.99156900315235, "iter_time": 0.5339509048461915, "loss": 0.10533507913351059, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 31.996918038587072, "step_time": 0.4837989234924317} +{"epoch": 0, "iter": 13827, "iter_tflops": 29.609467355931244, "iter_time": 0.5228082733154297, "loss": 0.09179087728261948, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 32.663947343495124, "step_time": 0.47391928291320795} +{"epoch": 0, "iter": 13828, "iter_tflops": 32.68197535792393, "iter_time": 0.47365785980224606, "loss": 0.029494380578398705, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 36.01576086709397, "step_time": 0.42981389617919924} +{"epoch": 0, "iter": 13829, "iter_tflops": 18.574608108807812, "iter_time": 1.1107148742675783, "loss": 0.1116839274764061, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.034500501174634, "step_time": 1.0297782821655272} +{"epoch": 0, "iter": 13830, "iter_tflops": 14.703780676757663, "iter_time": 1.4031148834228517, "loss": 0.1754239797592163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.53663896102694, "step_time": 1.112989984512329} +{"epoch": 0, "iter": 13831, "iter_tflops": 38.323788759960685, "iter_time": 0.5383364791870117, "loss": 0.20846274495124817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.105794106459584, "step_time": 0.48998229217529293} +{"epoch": 0, "iter": 13832, "iter_tflops": 40.390412392031095, "iter_time": 0.5107918510437012, "loss": 0.20668160915374756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10019013644348, "step_time": 0.4678232326507568} +{"epoch": 0, "iter": 13833, "iter_tflops": 2.7863298925325766, "iter_time": 0.5624820938110351, "loss": 0.40917471051216125, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.0721143224319722, "step_time": 0.5101570148468018} +{"epoch": 0, "iter": 13834, "iter_tflops": 3.1510449533093756, "iter_time": 0.49737807464599615, "loss": 0.7253285646438599, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.4513525424434905, "step_time": 0.4541004295349121} +{"epoch": 0, "iter": 13835, "iter_tflops": 3.7013334343372564, "iter_time": 0.42343136596679687, "loss": 0.6622399091720581, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 4.019117295090818, "step_time": 0.3899514636993408} +{"epoch": 0, "iter": 13836, "iter_tflops": 3.5078994390114038, "iter_time": 0.446780387878418, "loss": 0.6328384876251221, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.8190027492425034, "step_time": 0.4103847980499268} +{"epoch": 0, "iter": 13837, "iter_tflops": 42.123568554473444, "iter_time": 0.4897755393981934, "loss": 0.010515319183468819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.813278264875045, "step_time": 0.4503299980163574} +{"epoch": 0, "iter": 13838, "iter_tflops": 18.821017425663605, "iter_time": 1.0961731262207033, "loss": 0.0008678577141836286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.439093342736143, "step_time": 0.919426342010498} +{"epoch": 0, "iter": 13839, "iter_tflops": 50.93526872784483, "iter_time": 0.4050453453063965, "loss": 0.016077883541584015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.014740718218995, "step_time": 0.36831543350219725} +{"epoch": 0, "iter": 13840, "iter_tflops": 55.131147418410144, "iter_time": 0.3742184677124023, "loss": 0.0019665956497192383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.62010319634241, "step_time": 0.3403341865539551} +{"epoch": 0, "iter": 13841, "iter_tflops": 33.18843930104874, "iter_time": 0.6216349411010742, "loss": 0.18895664811134338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.46439066174041, "step_time": 0.581741096496582} +{"epoch": 0, "iter": 13842, "iter_tflops": 9.754413252480036, "iter_time": 2.115052230834961, "loss": 0.21600306034088135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.833804445129633, "step_time": 1.7434032821655272} +{"epoch": 0, "iter": 13843, "iter_tflops": 12.198339168005687, "iter_time": 1.691303482055664, "loss": 0.2849809229373932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.266279068147758, "step_time": 1.4461439743041993} +{"epoch": 0, "iter": 13844, "iter_tflops": 40.110662867555355, "iter_time": 0.5143543395996094, "loss": 0.25641459226608276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.89735703114071, "step_time": 0.4699848670959472} +{"epoch": 0, "iter": 13845, "iter_tflops": 17.889848000548202, "iter_time": 0.8630125656127929, "loss": 0.23961889743804932, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.154057143014754, "step_time": 0.806051872253418} +{"epoch": 0, "iter": 13846, "iter_tflops": 6.45657880242216, "iter_time": 2.3912297973632812, "loss": 0.21772511303424835, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 8.018489017444162, "step_time": 1.9254455032348632} +{"epoch": 0, "iter": 13847, "iter_tflops": 9.55345798430132, "iter_time": 1.6160811767578127, "loss": 0.2415376901626587, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 11.17921262855859, "step_time": 1.3810600204467773} +{"epoch": 0, "iter": 13848, "iter_tflops": 22.14818037622684, "iter_time": 0.6970849685668946, "loss": 0.1529460996389389, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.032824409989647, "step_time": 0.6424198570251465} +{"epoch": 0, "iter": 13849, "iter_tflops": 14.864596297004502, "iter_time": 1.0138975677490236, "loss": 0.3999001681804657, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 16.00596059067485, "step_time": 0.9415978469848633} +{"epoch": 0, "iter": 13850, "iter_tflops": 22.939239268056017, "iter_time": 0.6570042648315431, "loss": 0.1949465125799179, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 24.653868830087156, "step_time": 0.6113108711242676} +{"epoch": 0, "iter": 13851, "iter_tflops": 23.241068125074143, "iter_time": 0.6484718322753906, "loss": 0.17428293824195862, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 25.028477890245792, "step_time": 0.602161190032959} +{"epoch": 0, "iter": 13852, "iter_tflops": 25.14230614546559, "iter_time": 0.5994349899291992, "loss": 0.14676669239997864, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 26.93100965701749, "step_time": 0.5596217231750488} +{"epoch": 0, "iter": 13853, "iter_tflops": 18.70258647651897, "iter_time": 1.1031144561767579, "loss": 0.2769624888896942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.19468300839622, "step_time": 1.0216101684570313} +{"epoch": 0, "iter": 13854, "iter_tflops": 35.53392253759335, "iter_time": 0.5806027603149415, "loss": 0.3379002809524536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.406954786000206, "step_time": 0.47529465293884277} +{"epoch": 0, "iter": 13855, "iter_tflops": 49.74247617571962, "iter_time": 0.4147580718994141, "loss": 0.24495218694210052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.990519830293344, "step_time": 0.3821243724822998} +{"epoch": 0, "iter": 13856, "iter_tflops": 44.09832874959296, "iter_time": 0.4678429794311523, "loss": 0.24147069454193115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.987410300491106, "step_time": 0.42992721176147464} +{"epoch": 0, "iter": 13857, "iter_tflops": 13.936994625772869, "iter_time": 1.0345074768066407, "loss": 0.05550459399819374, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 14.527501533726976, "step_time": 0.9924573135375975} +{"epoch": 0, "iter": 13858, "iter_tflops": 11.042547613749761, "iter_time": 1.3056701812744143, "loss": 0.03240193799138069, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 13.37203189723917, "step_time": 1.0782149829864502} +{"epoch": 0, "iter": 13859, "iter_tflops": 37.54348975507782, "iter_time": 0.3840326309204102, "loss": 0.03359483554959297, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 41.12852938916766, "step_time": 0.35055776023864743} +{"epoch": 0, "iter": 13860, "iter_tflops": 36.65040280244507, "iter_time": 0.39339063262939455, "loss": 0.04107896611094475, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 40.30003623627931, "step_time": 0.3577645702362061} +{"epoch": 0, "iter": 13861, "iter_tflops": 33.70730232284275, "iter_time": 0.6120659942626954, "loss": 0.25763779878616333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.9642811666351, "step_time": 0.5736551055908203} +{"epoch": 0, "iter": 13862, "iter_tflops": 10.383004758015296, "iter_time": 1.9870060729980468, "loss": 0.36027422547340393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.745804643076447, "step_time": 1.6186575965881347} +{"epoch": 0, "iter": 13863, "iter_tflops": 34.64920917835971, "iter_time": 0.5954275436401368, "loss": 0.4684327244758606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.75828292222808, "step_time": 0.5463991451263428} +{"epoch": 0, "iter": 13864, "iter_tflops": 39.505429932956844, "iter_time": 0.522234375, "loss": 0.2766784131526947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01310836329549, "step_time": 0.47964665412902835} +{"epoch": 0, "iter": 13865, "iter_tflops": 29.87617965659828, "iter_time": 0.6905532684326172, "loss": 0.6180232167243958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.26436519144163, "step_time": 0.6394390029907226} +{"epoch": 0, "iter": 13866, "iter_tflops": 9.469612789250263, "iter_time": 2.178662841796875, "loss": 0.5327398180961609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.887396094249771, "step_time": 1.7355435409545898} +{"epoch": 0, "iter": 13867, "iter_tflops": 11.77238834534728, "iter_time": 1.752498550415039, "loss": 0.6735401749610901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.792761098986535, "step_time": 1.3946749610900877} +{"epoch": 0, "iter": 13868, "iter_tflops": 20.956797822443654, "iter_time": 0.9844582977294921, "loss": 0.7604563236236572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.458800862798654, "step_time": 0.8435038833618164} +{"epoch": 0, "iter": 13869, "iter_tflops": 14.585055266965426, "iter_time": 0.9745613632202148, "loss": 0.38254180550575256, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 15.538187264423453, "step_time": 0.9147805404663086} +{"epoch": 0, "iter": 13870, "iter_tflops": 6.455047144092601, "iter_time": 2.2020027160644533, "loss": 0.27686789631843567, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 7.337287325609648, "step_time": 1.9372324829101566} +{"epoch": 0, "iter": 13871, "iter_tflops": 7.482856816019007, "iter_time": 1.8995460815429688, "loss": 0.23750506341457367, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 8.720920286834078, "step_time": 1.629877452850342} +{"epoch": 0, "iter": 13872, "iter_tflops": 23.810624651181225, "iter_time": 0.5969617156982421, "loss": 0.33144664764404297, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 25.281502422756926, "step_time": 0.562230484008789} +{"epoch": 0, "iter": 13873, "iter_tflops": 17.448382842935615, "iter_time": 0.9130018386840819, "loss": 0.1905340552330017, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.3034457055924, "step_time": 0.8703500900268555} +{"epoch": 0, "iter": 13874, "iter_tflops": 11.278946840716186, "iter_time": 1.412401870727539, "loss": 0.12157954275608063, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 14.01621550784711, "step_time": 1.1365696830749512} +{"epoch": 0, "iter": 13875, "iter_tflops": 23.66297426600798, "iter_time": 0.6732207641601563, "loss": 0.2890720069408417, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 25.416880739550155, "step_time": 0.6267647781372071} +{"epoch": 0, "iter": 13876, "iter_tflops": 24.628406585470806, "iter_time": 0.6468305435180663, "loss": 0.12778136134147644, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.346163712515796, "step_time": 0.6046575050354005} +{"epoch": 0, "iter": 13877, "iter_tflops": 17.783221185454888, "iter_time": 1.1601437835693362, "loss": 0.5052937865257263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.107826724550247, "step_time": 1.0797195205688477} +{"epoch": 0, "iter": 13878, "iter_tflops": 20.037099866696607, "iter_time": 1.0296446914672852, "loss": 0.4813331365585327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.82282249352689, "step_time": 0.7691619148254394} +{"epoch": 0, "iter": 13879, "iter_tflops": 49.21708634987057, "iter_time": 0.4191855926513672, "loss": 0.30550435185432434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2715641777259, "step_time": 0.3872815418243408} +{"epoch": 0, "iter": 13880, "iter_tflops": 43.6952354186107, "iter_time": 0.4721588821411133, "loss": 0.33714547753334045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83395681671745, "step_time": 0.44051570510864263} +{"epoch": 0, "iter": 13881, "iter_tflops": 42.13096754456092, "iter_time": 0.48968952560424805, "loss": 0.7695325613021851, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87942279610656, "step_time": 0.4496807556152344} +{"epoch": 0, "iter": 13882, "iter_tflops": 44.558132044898876, "iter_time": 0.46301522445678706, "loss": 0.754292905330658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15461077772923, "step_time": 0.4197183780670166} +{"epoch": 0, "iter": 13883, "iter_tflops": 43.286090131887576, "iter_time": 0.47662178421020507, "loss": 0.638032853603363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54324169029376, "step_time": 0.4432672233581544} +{"epoch": 0, "iter": 13884, "iter_tflops": 43.946342885659334, "iter_time": 0.46946098709106443, "loss": 0.6077664494514465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90113415362076, "step_time": 0.43988474655151366} +{"epoch": 0, "iter": 13885, "iter_tflops": 34.890701273960936, "iter_time": 0.5913063583374024, "loss": 0.6466551423072815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.304522990220136, "step_time": 0.5530453643798827} +{"epoch": 0, "iter": 13886, "iter_tflops": 13.9105612483395, "iter_time": 1.4831244506835939, "loss": 0.5922877788543701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.442572971821665, "step_time": 1.2547363204956055} +{"epoch": 0, "iter": 13887, "iter_tflops": 32.78278843322821, "iter_time": 0.6293269882202149, "loss": 0.5753194689750671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.84908167173609, "step_time": 0.5754985218048095} +{"epoch": 0, "iter": 13888, "iter_tflops": 34.19513966063259, "iter_time": 0.6033340911865235, "loss": 0.6434794664382935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.243876449059805, "step_time": 0.5539459228515625} +{"epoch": 0, "iter": 13889, "iter_tflops": 21.556218959444127, "iter_time": 0.9570831298828125, "loss": 0.032722730189561844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.21352303198408, "step_time": 0.8887532272338866} +{"epoch": 0, "iter": 13890, "iter_tflops": 39.227234145465864, "iter_time": 0.5259380111694335, "loss": 0.03147264942526817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36420439850229, "step_time": 0.475763219833374} +{"epoch": 0, "iter": 13891, "iter_tflops": 41.92661374731898, "iter_time": 0.49207631301879884, "loss": 0.025041108950972557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.216285884065094, "step_time": 0.44640310478210454} +{"epoch": 0, "iter": 13892, "iter_tflops": 44.73030311796323, "iter_time": 0.46123303604125976, "loss": 0.03673766553401947, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.103028269919655, "step_time": 0.42015929031372073} +{"epoch": 0, "iter": 13893, "iter_tflops": 31.942758540448963, "iter_time": 0.6458770141601563, "loss": 0.09516704082489014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.78109132700258, "step_time": 0.5931698150634765} +{"epoch": 0, "iter": 13894, "iter_tflops": 10.395467303121002, "iter_time": 1.9846239624023438, "loss": 0.15077932178974152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.79597721962997, "step_time": 1.7489940109252933} +{"epoch": 0, "iter": 13895, "iter_tflops": 19.22156797621075, "iter_time": 1.0733304138183595, "loss": 0.10145539790391922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.65056397899919, "step_time": 0.9529125213623046} +{"epoch": 0, "iter": 13896, "iter_tflops": 40.5584177710077, "iter_time": 0.5086759948730469, "loss": 0.07233072817325592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.56854267319131, "step_time": 0.46290707015991217} +{"epoch": 0, "iter": 13897, "iter_tflops": 13.358874754628742, "iter_time": 1.1037157592773437, "loss": 0.25041449069976807, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 14.353231132445162, "step_time": 1.0272530593872071} +{"epoch": 0, "iter": 13898, "iter_tflops": 17.269797687306617, "iter_time": 0.8537679977416992, "loss": 0.2701399624347687, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 21.20345794637445, "step_time": 0.6953771705627442} +{"epoch": 0, "iter": 13899, "iter_tflops": 22.34725085295767, "iter_time": 0.6597858810424805, "loss": 0.26571813225746155, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.941173895084074, "step_time": 0.6158595504760742} +{"epoch": 0, "iter": 13900, "iter_tflops": 21.62757331019813, "iter_time": 0.6817408676147461, "loss": 0.2223038375377655, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.10490163402818, "step_time": 0.6381503295898436} +{"epoch": 0, "iter": 13901, "iter_tflops": 17.129876068575456, "iter_time": 1.2043924560546875, "loss": 0.1042243018746376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.245998551725155, "step_time": 1.1307187957763674} +{"epoch": 0, "iter": 13902, "iter_tflops": 21.255600007558467, "iter_time": 0.9706192016601562, "loss": 0.15149915218353271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.42537760126557, "step_time": 0.7807303199768066} +{"epoch": 0, "iter": 13903, "iter_tflops": 42.715677638163775, "iter_time": 0.48298645019531244, "loss": 0.1765972077846527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.04043998272504, "step_time": 0.4481080875396729} +{"epoch": 0, "iter": 13904, "iter_tflops": 49.25166879070107, "iter_time": 0.41889125823974616, "loss": 0.1067231297492981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.313558434551766, "step_time": 0.3869764862060546} +{"epoch": 0, "iter": 13905, "iter_tflops": 39.18097966292651, "iter_time": 0.5265588989257812, "loss": 0.5858868956565857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.1527316556761, "step_time": 0.48943669128417966} +{"epoch": 0, "iter": 13906, "iter_tflops": 15.527313655388257, "iter_time": 1.3286968994140624, "loss": 0.5160333514213562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.744669718071723, "step_time": 1.100637878417969} +{"epoch": 0, "iter": 13907, "iter_tflops": 21.788992474404353, "iter_time": 0.9468585357666016, "loss": 0.5578528642654419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.120138909271898, "step_time": 0.7898538970947265} +{"epoch": 0, "iter": 13908, "iter_tflops": 45.49920655843335, "iter_time": 0.45343853378295895, "loss": 0.6744946241378784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.897963326972544, "step_time": 0.4219213256835938} +{"epoch": 0, "iter": 13909, "iter_tflops": 25.316854976991426, "iter_time": 0.6340962295532226, "loss": 0.1941775530576706, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.121877443677143, "step_time": 0.5918956871032715} +{"epoch": 0, "iter": 13910, "iter_tflops": 27.200750061422006, "iter_time": 0.5901793975830077, "loss": 0.1849384605884552, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.919958666842724, "step_time": 0.5550949249267578} +{"epoch": 0, "iter": 13911, "iter_tflops": 27.64774278667779, "iter_time": 0.5806377182006837, "loss": 0.12987835705280304, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 29.536217766867583, "step_time": 0.5435131340026855} +{"epoch": 0, "iter": 13912, "iter_tflops": 29.181241172321318, "iter_time": 0.5501247253417969, "loss": 0.22368264198303223, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 31.082199491443628, "step_time": 0.5164796104431153} +{"epoch": 0, "iter": 13913, "iter_tflops": 40.403769975884074, "iter_time": 0.5106229820251466, "loss": 0.16680945456027985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.994202421123674, "step_time": 0.4689502792358398} +{"epoch": 0, "iter": 13914, "iter_tflops": 49.32804809745314, "iter_time": 0.4182426490783691, "loss": 0.23700876533985138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.8254906935952, "step_time": 0.3832959671020508} +{"epoch": 0, "iter": 13915, "iter_tflops": 50.60997467921555, "iter_time": 0.4076487617492677, "loss": 0.2155204862356186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.66137938257875, "step_time": 0.3774345569610596} +{"epoch": 0, "iter": 13916, "iter_tflops": 50.102441595896565, "iter_time": 0.411778205871582, "loss": 0.20746155083179474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.191212081197584, "step_time": 0.3807092094421387} +{"epoch": 0, "iter": 13917, "iter_tflops": 17.814972957625276, "iter_time": 1.1580760498046874, "loss": 0.651516318321228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.490278254094722, "step_time": 1.1157805862426757} +{"epoch": 0, "iter": 13918, "iter_tflops": 14.530330589047363, "iter_time": 1.419864013671875, "loss": 0.6464664340019226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.812355811910464, "step_time": 1.1582462043762207} +{"epoch": 0, "iter": 13919, "iter_tflops": 46.637791310728566, "iter_time": 0.442368579864502, "loss": 0.5611197352409363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.58945890937362, "step_time": 0.40781407737731934} +{"epoch": 0, "iter": 13920, "iter_tflops": 49.563272452163964, "iter_time": 0.4162576942443848, "loss": 0.6265898942947388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47241135402216, "step_time": 0.38582687759399414} +{"epoch": 0, "iter": 13921, "iter_tflops": 35.481223891853425, "iter_time": 0.5814651031494141, "loss": 0.6978098154067993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10364320486232, "step_time": 0.5414467430114746} +{"epoch": 0, "iter": 13922, "iter_tflops": 13.19027354681297, "iter_time": 1.5641141510009766, "loss": 0.7368771433830261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.591153163549736, "step_time": 1.4139453735351564} +{"epoch": 0, "iter": 13923, "iter_tflops": 14.855473996522154, "iter_time": 1.3887872924804687, "loss": 0.604643702507019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.754762024537204, "step_time": 0.9940414390563964} +{"epoch": 0, "iter": 13924, "iter_tflops": 44.721535572684004, "iter_time": 0.4613234596252441, "loss": 0.5227792263031006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87100398052286, "step_time": 0.4221540756225586} +{"epoch": 0, "iter": 13925, "iter_tflops": 26.105659757495605, "iter_time": 0.5898436431884765, "loss": 0.15231011807918549, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.06750763100701, "step_time": 0.5486150627136231} +{"epoch": 0, "iter": 13926, "iter_tflops": 12.145942323360508, "iter_time": 1.2677696838378907, "loss": 0.18169556558132172, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 13.692891836320538, "step_time": 1.1245438613891603} +{"epoch": 0, "iter": 13927, "iter_tflops": 20.34493873372391, "iter_time": 0.7568593673706054, "loss": 0.3099789619445801, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.851591647878813, "step_time": 0.6196085014343261} +{"epoch": 0, "iter": 13928, "iter_tflops": 23.553273992217918, "iter_time": 0.6537629318237305, "loss": 0.3194826543331146, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.282960251034474, "step_time": 0.6090369682312012} +{"epoch": 0, "iter": 13929, "iter_tflops": 32.91837180048576, "iter_time": 0.6267349319458008, "loss": 0.3980731666088104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.873211353905496, "step_time": 0.5751114196777344} +{"epoch": 0, "iter": 13930, "iter_tflops": 40.83698136102193, "iter_time": 0.5052061347961426, "loss": 0.42526596784591675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26130730135602, "step_time": 0.4558218650817871} +{"epoch": 0, "iter": 13931, "iter_tflops": 47.56025987380677, "iter_time": 0.43378849411010745, "loss": 0.3202194273471832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45881804806978, "step_time": 0.4009243564605713} +{"epoch": 0, "iter": 13932, "iter_tflops": 43.22751494559324, "iter_time": 0.47726762771606446, "loss": 0.3259003460407257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.62951483632699, "step_time": 0.44244709777832036} +{"epoch": 0, "iter": 13933, "iter_tflops": 19.390574632222997, "iter_time": 0.7435532684326172, "loss": 0.2912808656692505, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 20.58550887138732, "step_time": 0.7003919715881347} +{"epoch": 0, "iter": 13934, "iter_tflops": 8.58009574122357, "iter_time": 1.6803921051025388, "loss": 0.2284214347600937, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 9.844571715956079, "step_time": 1.4645558547973634} +{"epoch": 0, "iter": 13935, "iter_tflops": 22.132058641994124, "iter_time": 0.6514497985839843, "loss": 0.13372711837291718, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.789567281963183, "step_time": 0.6060608406066894} +{"epoch": 0, "iter": 13936, "iter_tflops": 21.8555721837942, "iter_time": 0.6596910400390625, "loss": 0.25869691371917725, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 23.472494237159832, "step_time": 0.6142476806640625} +{"epoch": 0, "iter": 13937, "iter_tflops": 22.66255526805184, "iter_time": 0.9103604278564452, "loss": 0.6800861358642578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.80925084759873, "step_time": 0.8665158615112305} +{"epoch": 0, "iter": 13938, "iter_tflops": 15.11954907697436, "iter_time": 1.364531005859375, "loss": 0.6874983906745911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.540031023193098, "step_time": 1.0558372955322266} +{"epoch": 0, "iter": 13939, "iter_tflops": 36.423725585013955, "iter_time": 0.5664190902709961, "loss": 0.6583099961280823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.57812050930138, "step_time": 0.5212752208709717} +{"epoch": 0, "iter": 13940, "iter_tflops": 38.64402264443414, "iter_time": 0.5338754119873047, "loss": 0.7533193826675415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89032667121831, "step_time": 0.4925025691986084} +{"epoch": 0, "iter": 13941, "iter_tflops": 18.50798508739264, "iter_time": 1.114713104248047, "loss": 0.003660168731585145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.832631133113903, "step_time": 1.0402600326538085} +{"epoch": 0, "iter": 13942, "iter_tflops": 16.136820948307673, "iter_time": 1.2785104064941408, "loss": 0.02486243098974228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.82978073497725, "step_time": 1.040409563064575} +{"epoch": 0, "iter": 13943, "iter_tflops": 42.122749666900425, "iter_time": 0.48978506088256835, "loss": 0.0014134583761915565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.64031488681181, "step_time": 0.44234464454650874} +{"epoch": 0, "iter": 13944, "iter_tflops": 45.82626281120261, "iter_time": 0.45020240020751956, "loss": 0.0017396112671121955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.98893570258819, "step_time": 0.4046190261840821} +{"epoch": 0, "iter": 13945, "iter_tflops": 20.39437809588305, "iter_time": 1.0116068954467774, "loss": 0.6124672293663025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.720064100963807, "step_time": 0.9498633804321288} +{"epoch": 0, "iter": 13946, "iter_tflops": 12.394891568121736, "iter_time": 1.6644835815429686, "loss": 0.6664119958877563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.183846756773196, "step_time": 1.0754409046173097} +{"epoch": 0, "iter": 13947, "iter_tflops": 46.77240747576728, "iter_time": 0.4410953941345215, "loss": 0.5023866891860962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74682312375809, "step_time": 0.40654945945739746} +{"epoch": 0, "iter": 13948, "iter_tflops": 50.25702924179901, "iter_time": 0.4105116004943848, "loss": 0.6385628581047058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.212139028767446, "step_time": 0.38056224822998047} +{"epoch": 0, "iter": 13949, "iter_tflops": 20.918973421718718, "iter_time": 0.9862383346557617, "loss": 0.6816993951797485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.80934630628532, "step_time": 0.9459748687744141} +{"epoch": 0, "iter": 13950, "iter_tflops": 15.32239480099947, "iter_time": 1.3464666442871094, "loss": 0.6814361214637756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.727024881557814, "step_time": 1.0458289394378664} +{"epoch": 0, "iter": 13951, "iter_tflops": 41.23645702150074, "iter_time": 0.5003119812011718, "loss": 0.39949193596839905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.959238521674614, "step_time": 0.45888440704345707} +{"epoch": 0, "iter": 13952, "iter_tflops": 39.563804347482076, "iter_time": 0.5214638442993164, "loss": 0.38442301750183105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04008601466419, "step_time": 0.47934601020812984} +{"epoch": 0, "iter": 13953, "iter_tflops": 41.87226073505949, "iter_time": 0.49271506118774405, "loss": 0.00884486548602581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.84021368193102, "step_time": 0.4404568614959717} +{"epoch": 0, "iter": 13954, "iter_tflops": 44.70296398075967, "iter_time": 0.4615151138305664, "loss": 0.0027220798656344414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.38039889171606, "step_time": 0.41779924774169924} +{"epoch": 0, "iter": 13955, "iter_tflops": 43.063243301958074, "iter_time": 0.4790882415771484, "loss": 0.0034369281493127346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.605533567855375, "step_time": 0.43337595367431647} +{"epoch": 0, "iter": 13956, "iter_tflops": 46.32719083614903, "iter_time": 0.44533443832397457, "loss": 0.014789445325732231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.872419755978164, "step_time": 0.405545747756958} +{"epoch": 0, "iter": 13957, "iter_tflops": 27.98602106347755, "iter_time": 0.7371928100585937, "loss": 0.13350261747837067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.171359960368, "step_time": 0.6837972679138183} +{"epoch": 0, "iter": 13958, "iter_tflops": 11.99294044241754, "iter_time": 1.7202698211669922, "loss": 0.16584816575050354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.052624794739113, "step_time": 1.4681309585571287} +{"epoch": 0, "iter": 13959, "iter_tflops": 12.943268429594625, "iter_time": 1.5939631958007814, "loss": 0.004760184790939093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.722571993878482, "step_time": 1.2337272949218752} +{"epoch": 0, "iter": 13960, "iter_tflops": 24.098322377185617, "iter_time": 0.856121566772461, "loss": 0.013161665759980679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.537906393604334, "step_time": 0.6984616050720215} +{"epoch": 0, "iter": 13961, "iter_tflops": 21.473817186545485, "iter_time": 0.7399449615478516, "loss": 0.19314196705818176, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 22.771713581857828, "step_time": 0.6977710647583008} +{"epoch": 0, "iter": 13962, "iter_tflops": 7.832044119711625, "iter_time": 2.0287734069824217, "loss": 0.12402844429016113, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 10.463857089980007, "step_time": 1.5185072479248047} +{"epoch": 0, "iter": 13963, "iter_tflops": 8.482717683881404, "iter_time": 1.8731547393798829, "loss": 0.22742627561092377, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 10.803827367111436, "step_time": 1.470723503112793} +{"epoch": 0, "iter": 13964, "iter_tflops": 13.557058453517058, "iter_time": 1.1720420684814452, "loss": 0.3167342245578766, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 18.94573729496229, "step_time": 0.8386816825866699} +{"epoch": 0, "iter": 13965, "iter_tflops": 15.52562649563254, "iter_time": 1.1186070861816406, "loss": 0.25276657938957214, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 16.403541816114547, "step_time": 1.058739387512207} +{"epoch": 0, "iter": 13966, "iter_tflops": 7.265273971366543, "iter_time": 2.3904226989746094, "loss": 0.21651746332645416, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 9.498498490666725, "step_time": 1.828402229309082} +{"epoch": 0, "iter": 13967, "iter_tflops": 9.897711533769884, "iter_time": 1.7546556854248045, "loss": 0.16679787635803223, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 13.199812668601222, "step_time": 1.3157062339782715} +{"epoch": 0, "iter": 13968, "iter_tflops": 25.107599361770884, "iter_time": 0.6917059478759765, "loss": 0.1866736114025116, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 26.886520151563804, "step_time": 0.6459398880004882} +{"epoch": 0, "iter": 13969, "iter_tflops": 10.59703601014934, "iter_time": 1.38366552734375, "loss": 0.1466018408536911, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 11.14494800724168, "step_time": 1.315641258239746} +{"epoch": 0, "iter": 13970, "iter_tflops": 12.88844644915067, "iter_time": 1.1376664733886719, "loss": 0.18398618698120117, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 16.40077713935362, "step_time": 0.8940279655456542} +{"epoch": 0, "iter": 13971, "iter_tflops": 26.034995073891583, "iter_time": 0.5631940155029297, "loss": 0.17334794998168945, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.764306448746616, "step_time": 0.5281152420043946} +{"epoch": 0, "iter": 13972, "iter_tflops": 25.91802631665763, "iter_time": 0.565735725402832, "loss": 0.31579381227493286, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 27.39548770681389, "step_time": 0.5352251281738281} +{"epoch": 0, "iter": 13973, "iter_tflops": 29.986830062104417, "iter_time": 0.6880051498413087, "loss": 0.06672212481498718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27907134480973, "step_time": 0.6391476783752441} +{"epoch": 0, "iter": 13974, "iter_tflops": 50.79763267170598, "iter_time": 0.4061428146362305, "loss": 0.053845085203647614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.465863706507086, "step_time": 0.3719601955413818} +{"epoch": 0, "iter": 13975, "iter_tflops": 52.41330576656534, "iter_time": 0.3936232070922851, "loss": 0.0587865486741066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.87173632974404, "step_time": 0.3627653179168701} +{"epoch": 0, "iter": 13976, "iter_tflops": 53.3895416511588, "iter_time": 0.38642574691772463, "loss": 0.07681930065155029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.256518095517364, "step_time": 0.3541422348022461} +{"epoch": 0, "iter": 13977, "iter_tflops": 2.557688943025564, "iter_time": 5.876521606445312, "loss": 0.21139509975910187, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 19.86055786717083, "step_time": 0.7567921524047851} +{"epoch": 0, "iter": 13978, "iter_tflops": 23.03775652337332, "iter_time": 0.6524209213256836, "loss": 0.2764865458011627, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.8478253583938, "step_time": 0.6048945579528808} +{"epoch": 0, "iter": 13979, "iter_tflops": 22.529838627045333, "iter_time": 0.667129249572754, "loss": 0.32668519020080566, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.23899937035978, "step_time": 0.6200880699157716} +{"epoch": 0, "iter": 13980, "iter_tflops": 23.264503373837265, "iter_time": 0.6460621185302735, "loss": 0.0919698104262352, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 25.04191958683816, "step_time": 0.6002061576843262} +{"epoch": 0, "iter": 13981, "iter_tflops": 19.230409086509887, "iter_time": 1.0039001922607422, "loss": 0.11858860403299332, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 20.793030034988064, "step_time": 0.928455898284912} +{"epoch": 0, "iter": 13982, "iter_tflops": 14.81046402294442, "iter_time": 1.3034980773925782, "loss": 0.13446871936321259, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 17.860128461307095, "step_time": 1.0809223136901855} +{"epoch": 0, "iter": 13983, "iter_tflops": 44.35530399185032, "iter_time": 0.43524470901489265, "loss": 0.196016326546669, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 48.611048377418285, "step_time": 0.39714040374755855} +{"epoch": 0, "iter": 13984, "iter_tflops": 47.17033957960948, "iter_time": 0.4092701377868653, "loss": 0.1503085345029831, "lr": 3e-05, "seqlen": 7680.0, "step_tflops": 51.3222360941137, "step_time": 0.3761607608795166} +{"epoch": 0, "iter": 13985, "iter_tflops": 31.578850353093696, "iter_time": 0.6428106079101562, "loss": 0.024449344724416733, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 33.78772825833306, "step_time": 0.6007867660522461} +{"epoch": 0, "iter": 13986, "iter_tflops": 16.888911961721252, "iter_time": 1.2019258575439453, "loss": 0.010682830587029457, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 22.514283626933974, "step_time": 0.9016151847839355} +{"epoch": 0, "iter": 13987, "iter_tflops": 46.15772630834502, "iter_time": 0.4397794609069824, "loss": 0.005299893673509359, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 50.891729951639164, "step_time": 0.39887070083618165} +{"epoch": 0, "iter": 13988, "iter_tflops": 49.633254748793874, "iter_time": 0.40898426055908205, "loss": 0.007939395494759083, "lr": 3e-05, "seqlen": 8064.0, "step_tflops": 54.798537440738485, "step_time": 0.3704336090087891} +{"epoch": 0, "iter": 13989, "iter_tflops": 24.65103816223143, "iter_time": 0.8369259490966796, "loss": 0.622820258140564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.319115966705603, "step_time": 0.7838824653625488} +{"epoch": 0, "iter": 13990, "iter_tflops": 12.79375359572133, "iter_time": 1.6125911254882814, "loss": 0.6563767194747925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.983039510647163, "step_time": 1.3769631652832028} +{"epoch": 0, "iter": 13991, "iter_tflops": 10.950646310853838, "iter_time": 1.8840069274902345, "loss": 0.735868513584137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.724510319730324, "step_time": 1.6213664016723632} +{"epoch": 0, "iter": 13992, "iter_tflops": 18.502094613622823, "iter_time": 1.1150679931640624, "loss": 0.7269095778465271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.269170023264405, "step_time": 0.9264419593811035} +{"epoch": 0, "iter": 13993, "iter_tflops": 22.938985780524295, "iter_time": 0.7123372039794922, "loss": 0.3141756057739258, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 24.53342085407201, "step_time": 0.6660421752929687} +{"epoch": 0, "iter": 13994, "iter_tflops": 7.835667822930586, "iter_time": 2.08537336730957, "loss": 0.2148178070783615, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 9.162355570265667, "step_time": 1.783416160583496} +{"epoch": 0, "iter": 13995, "iter_tflops": 10.370555446611254, "iter_time": 1.5756429901123048, "loss": 0.17984598875045776, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 12.10439055343042, "step_time": 1.3499476013183593} +{"epoch": 0, "iter": 13996, "iter_tflops": 15.570231753304947, "iter_time": 1.049457275390625, "loss": 0.21100710332393646, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 19.528500982612957, "step_time": 0.8367407722473145} +{"epoch": 0, "iter": 13997, "iter_tflops": 17.27913232080686, "iter_time": 0.822612564086914, "loss": 0.22077549993991852, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 18.194920328319245, "step_time": 0.781208770751953} +{"epoch": 0, "iter": 13998, "iter_tflops": 7.304626230776868, "iter_time": 1.9458944091796875, "loss": 0.18709403276443481, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 8.548295955168639, "step_time": 1.6627912063598633} +{"epoch": 0, "iter": 13999, "iter_tflops": 9.967432536488486, "iter_time": 1.4260474090576174, "loss": 0.2841995656490326, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 10.86291200151936, "step_time": 1.3084918060302735} +{"epoch": 0, "iter": 14000, "iter_tflops": 11.03422656842938, "iter_time": 1.288176498413086, "loss": 0.3191832900047302, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 13.830477390901756, "step_time": 1.0277325172424316} +{"epoch": 0, "iter": 14001, "iter_tflops": 11.686072908186437, "iter_time": 1.335164993286133, "loss": 0.3073892593383789, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 12.485174231187687, "step_time": 1.2497090682983398} +{"epoch": 0, "iter": 14002, "iter_tflops": 14.043314418487572, "iter_time": 1.11105078125, "loss": 0.2371537983417511, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 19.786578308474557, "step_time": 0.788556526184082} +{"epoch": 0, "iter": 14003, "iter_tflops": 28.275650233249916, "iter_time": 0.5518117294311524, "loss": 0.07556917518377304, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.13160045949418, "step_time": 0.5178229904174805} +{"epoch": 0, "iter": 14004, "iter_tflops": 28.155482990744485, "iter_time": 0.5541668548583984, "loss": 0.16669613122940063, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.927816049093103, "step_time": 0.5213489494323731} +{"epoch": 0, "iter": 14005, "iter_tflops": 38.50639289736576, "iter_time": 0.5357835922241211, "loss": 0.11901984363794327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.35663878539434, "step_time": 0.4870805168151856} +{"epoch": 0, "iter": 14006, "iter_tflops": 18.19871608702107, "iter_time": 1.1336565399169922, "loss": 0.050776828080415726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.778654396867555, "step_time": 0.9473079986572265} +{"epoch": 0, "iter": 14007, "iter_tflops": 50.425177121776265, "iter_time": 0.409142707824707, "loss": 0.045343950390815735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.755972451051925, "step_time": 0.37678252410888674} +{"epoch": 0, "iter": 14008, "iter_tflops": 49.04467324056929, "iter_time": 0.4206592102050782, "loss": 0.058333031833171844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.32430939197908, "step_time": 0.3868984661102295} +{"epoch": 0, "iter": 14009, "iter_tflops": 25.390444025497864, "iter_time": 0.8125534744262696, "loss": 0.10383908450603485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.687413681784665, "step_time": 0.7730645523071289} +{"epoch": 0, "iter": 14010, "iter_tflops": 14.746826616582148, "iter_time": 1.3990191955566404, "loss": 0.17170968651771545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.870063757466557, "step_time": 1.2229410514831545} +{"epoch": 0, "iter": 14011, "iter_tflops": 40.23288307426646, "iter_time": 0.5127918243408203, "loss": 0.09573475271463394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13988999768186, "step_time": 0.4674024677276611} +{"epoch": 0, "iter": 14012, "iter_tflops": 47.79407140078157, "iter_time": 0.43166637420654297, "loss": 0.09990742802619934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16400801323695, "step_time": 0.3955043773651123} +{"epoch": 0, "iter": 14013, "iter_tflops": 20.82644005960663, "iter_time": 0.8892817840576173, "loss": 0.043958310037851334, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 22.24372297025972, "step_time": 0.832620231628418} +{"epoch": 0, "iter": 14014, "iter_tflops": 19.03038532570722, "iter_time": 0.9732106552124025, "loss": 0.02524857595562935, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 23.09495805615495, "step_time": 0.8019314746856689} +{"epoch": 0, "iter": 14015, "iter_tflops": 43.571443579006875, "iter_time": 0.4250622024536133, "loss": 0.03478184714913368, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 48.049490240793105, "step_time": 0.3854478721618652} +{"epoch": 0, "iter": 14016, "iter_tflops": 44.239705984546966, "iter_time": 0.418641429901123, "loss": 0.021756021305918694, "lr": 3e-05, "seqlen": 7376.0, "step_tflops": 48.45824372048534, "step_time": 0.3821965541839599} +{"epoch": 0, "iter": 14017, "iter_tflops": 35.50217430260531, "iter_time": 0.5811219711303711, "loss": 0.11239074170589447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7277180063692, "step_time": 0.5193123226165772} +{"epoch": 0, "iter": 14018, "iter_tflops": 50.90840498353863, "iter_time": 0.40525908279418943, "loss": 0.06307854503393173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.46668211394165, "step_time": 0.3653675537109375} +{"epoch": 0, "iter": 14019, "iter_tflops": 52.49678854233963, "iter_time": 0.3929972496032715, "loss": 0.08191999793052673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.53056639521936, "step_time": 0.35861099243164063} +{"epoch": 0, "iter": 14020, "iter_tflops": 50.48060047783125, "iter_time": 0.40869350433349605, "loss": 0.05229302495718002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.109688716631304, "step_time": 0.3743641815185547} +{"epoch": 0, "iter": 14021, "iter_tflops": 30.385333974177506, "iter_time": 0.6789819564819336, "loss": 0.010517995804548264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.51874064835588, "step_time": 0.6344370384216309} +{"epoch": 0, "iter": 14022, "iter_tflops": 52.55271929081071, "iter_time": 0.39257899093627935, "loss": 0.010608422569930553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.87505723626254, "step_time": 0.35647642517089845} +{"epoch": 0, "iter": 14023, "iter_tflops": 46.961824067375076, "iter_time": 0.43931627273559565, "loss": 0.0014765089144930243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.988933958123205, "step_time": 0.3968362483978271} +{"epoch": 0, "iter": 14024, "iter_tflops": 55.205765907166715, "iter_time": 0.3737126579284668, "loss": 0.0020348753314465284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.29415451787272, "step_time": 0.34217402458190915} +{"epoch": 0, "iter": 14025, "iter_tflops": 26.25129353144243, "iter_time": 0.7859076919555664, "loss": 0.42255309224128723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.800127829510625, "step_time": 0.7421222534179687} +{"epoch": 0, "iter": 14026, "iter_tflops": 34.90609795511186, "iter_time": 0.5910455398559571, "loss": 0.586029589176178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.224225574415996, "step_time": 0.5259783515930175} +{"epoch": 0, "iter": 14027, "iter_tflops": 45.505650750954764, "iter_time": 0.4533743209838867, "loss": 0.5567111968994141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.07964105270861, "step_time": 0.42035950279235834} +{"epoch": 0, "iter": 14028, "iter_tflops": 50.578465453776126, "iter_time": 0.40790271759033203, "loss": 0.5376477837562561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.734421432450056, "step_time": 0.3769308776855469} +{"epoch": 0, "iter": 14029, "iter_tflops": 34.49030885651369, "iter_time": 0.5981707382202148, "loss": 0.59868985414505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.02509444249656, "step_time": 0.5572192001342773} +{"epoch": 0, "iter": 14030, "iter_tflops": 15.134828286932539, "iter_time": 1.3631534576416016, "loss": 0.5253020524978638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.317827219459527, "step_time": 1.015418296813965} +{"epoch": 0, "iter": 14031, "iter_tflops": 33.76018765070368, "iter_time": 0.6111071929931641, "loss": 0.4431653916835785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18854753140288, "step_time": 0.5547700805664063} +{"epoch": 0, "iter": 14032, "iter_tflops": 47.574240330880464, "iter_time": 0.433661018371582, "loss": 0.47418153285980225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.79501081680005, "step_time": 0.3983220233917236} +{"epoch": 0, "iter": 14033, "iter_tflops": 37.24331272223695, "iter_time": 0.5539543075561524, "loss": 0.5494969487190247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.24630766167107, "step_time": 0.512620777130127} +{"epoch": 0, "iter": 14034, "iter_tflops": 41.262717159567934, "iter_time": 0.49999357604980466, "loss": 0.5261535048484802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4265040191526, "step_time": 0.46438705825805665} +{"epoch": 0, "iter": 14035, "iter_tflops": 44.80135865669212, "iter_time": 0.46050151443481446, "loss": 0.5597583651542664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.296524914190314, "step_time": 0.4271755275726319} +{"epoch": 0, "iter": 14036, "iter_tflops": 48.92518302262245, "iter_time": 0.4216865882873535, "loss": 0.5550277233123779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03743727240378, "step_time": 0.38899114608764657} +{"epoch": 0, "iter": 14037, "iter_tflops": 28.64124844198576, "iter_time": 0.720328010559082, "loss": 0.07185762375593185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.426427615560947, "step_time": 0.6780649299621582} +{"epoch": 0, "iter": 14038, "iter_tflops": 22.417172381426028, "iter_time": 0.9203254165649415, "loss": 0.0316106453537941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.441866437527636, "step_time": 0.8109111633300782} +{"epoch": 0, "iter": 14039, "iter_tflops": 49.21115523325911, "iter_time": 0.41923611450195314, "loss": 0.049788784235715866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.652909095502345, "step_time": 0.3845288887023926} +{"epoch": 0, "iter": 14040, "iter_tflops": 51.885930689349536, "iter_time": 0.3976240425109863, "loss": 0.05444303900003433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.26411109628094, "step_time": 0.36668300819396976} +{"epoch": 0, "iter": 14041, "iter_tflops": 34.9876322427924, "iter_time": 0.589668182373047, "loss": 0.1609824001789093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.47047183899494, "step_time": 0.5505960426330567} +{"epoch": 0, "iter": 14042, "iter_tflops": 9.377637743004398, "iter_time": 2.2000309753417966, "loss": 0.17669007182121277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.301577066354744, "step_time": 1.6771096420288087} +{"epoch": 0, "iter": 14043, "iter_tflops": 11.510511206636673, "iter_time": 1.7923698730468751, "loss": 0.1432356834411621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.423449539903496, "step_time": 1.5369442443847656} +{"epoch": 0, "iter": 14044, "iter_tflops": 20.35867906308168, "iter_time": 1.0133807525634766, "loss": 0.10814736783504486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.848085615929126, "step_time": 0.7408442287445068} +{"epoch": 0, "iter": 14045, "iter_tflops": 16.263861560554364, "iter_time": 0.9845352706909181, "loss": 0.2915760278701782, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 17.226938338689557, "step_time": 0.9294945526123046} +{"epoch": 0, "iter": 14046, "iter_tflops": 10.57661338474239, "iter_time": 1.5139387969970703, "loss": 0.2541094720363617, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 15.277079815010543, "step_time": 1.0481286697387695} +{"epoch": 0, "iter": 14047, "iter_tflops": 22.106712339587457, "iter_time": 0.7243205184936523, "loss": 0.2524457573890686, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 23.946830672106937, "step_time": 0.6686624031066896} +{"epoch": 0, "iter": 14048, "iter_tflops": 25.101128068605377, "iter_time": 0.6379133758544923, "loss": 0.21869704127311707, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.97383078887695, "step_time": 0.593625186920166} +{"epoch": 0, "iter": 14049, "iter_tflops": 23.120203404140693, "iter_time": 0.8923404846191405, "loss": 0.5531732439994812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.715715921743264, "step_time": 0.8347358245849609} +{"epoch": 0, "iter": 14050, "iter_tflops": 8.561186911812575, "iter_time": 2.40984033203125, "loss": 0.6162790656089783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.679632309821907, "step_time": 2.1313922729492187} +{"epoch": 0, "iter": 14051, "iter_tflops": 13.609820313109543, "iter_time": 1.5158975677490234, "loss": 0.4845131039619446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.18718136993486, "step_time": 1.13437553024292} +{"epoch": 0, "iter": 14052, "iter_tflops": 29.30602187435665, "iter_time": 0.7039881973266602, "loss": 0.7221336364746094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.93280906878864, "step_time": 0.4805437602996826} +{"epoch": 0, "iter": 14053, "iter_tflops": 17.27028603622013, "iter_time": 0.9271615600585938, "loss": 0.2891677916049957, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 18.18164934638448, "step_time": 0.8806871719360351} +{"epoch": 0, "iter": 14054, "iter_tflops": 8.560727301683364, "iter_time": 1.870442169189453, "loss": 0.2518737316131592, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 12.162138194875002, "step_time": 1.3165732116699218} +{"epoch": 0, "iter": 14055, "iter_tflops": 24.546184768606214, "iter_time": 0.6523354034423828, "loss": 0.2589283585548401, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 26.334933740734694, "step_time": 0.6080267944335938} +{"epoch": 0, "iter": 14056, "iter_tflops": 25.38006000446212, "iter_time": 0.6309025802612305, "loss": 0.16679850220680237, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 27.23938450715593, "step_time": 0.5878380012512207} +{"epoch": 0, "iter": 14057, "iter_tflops": 18.43396016091627, "iter_time": 1.1191894378662108, "loss": 0.7288963198661804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.539903807617435, "step_time": 1.0558441696166991} +{"epoch": 0, "iter": 14058, "iter_tflops": 17.19386588217048, "iter_time": 1.1999101104736327, "loss": 0.6498281955718994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.159566463507687, "step_time": 0.8200098972320555} +{"epoch": 0, "iter": 14059, "iter_tflops": 37.730697692190176, "iter_time": 0.5467986221313477, "loss": 0.5725034475326538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.59134438368727, "step_time": 0.508263370513916} +{"epoch": 0, "iter": 14060, "iter_tflops": 37.37184088359974, "iter_time": 0.5520491638183593, "loss": 0.5783013105392456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.34453236048236, "step_time": 0.5113727264404296} +{"epoch": 0, "iter": 14061, "iter_tflops": 32.45963955783533, "iter_time": 0.6355921936035156, "loss": 0.7812471389770508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.59523647008691, "step_time": 0.579602653503418} +{"epoch": 0, "iter": 14062, "iter_tflops": 36.774325962851755, "iter_time": 0.5610189437866211, "loss": 0.6812919974327087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99362917816004, "step_time": 0.5158594989776611} +{"epoch": 0, "iter": 14063, "iter_tflops": 40.31331639550266, "iter_time": 0.511768699645996, "loss": 0.5450220704078674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73318557691234, "step_time": 0.47174915885925295} +{"epoch": 0, "iter": 14064, "iter_tflops": 33.88924842170584, "iter_time": 0.6087799072265625, "loss": 0.7206761240959167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.07838344336715, "step_time": 0.5564183654785156} +{"epoch": 0, "iter": 14065, "iter_tflops": 33.79582516277966, "iter_time": 0.6104627838134765, "loss": 0.45631998777389526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.49445785242796, "step_time": 0.5502438144683838} +{"epoch": 0, "iter": 14066, "iter_tflops": 37.96246581173204, "iter_time": 0.5434603118896484, "loss": 0.7380644083023071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86491408542023, "step_time": 0.49280152511596675} +{"epoch": 0, "iter": 14067, "iter_tflops": 42.31048553168553, "iter_time": 0.4876118354797363, "loss": 0.5255597233772278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18663527073424, "step_time": 0.44668968391418457} +{"epoch": 0, "iter": 14068, "iter_tflops": 42.93031463131585, "iter_time": 0.48057168197631833, "loss": 0.57371586561203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.620516504435116, "step_time": 0.44253249549865725} +{"epoch": 0, "iter": 14069, "iter_tflops": 33.705847367834316, "iter_time": 0.612092414855957, "loss": 0.4688253700733185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.40320976037015, "step_time": 0.5515861778259278} +{"epoch": 0, "iter": 14070, "iter_tflops": 38.78332724814458, "iter_time": 0.5319578018188477, "loss": 0.4382174015045166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95473386839463, "step_time": 0.48029848289489746} +{"epoch": 0, "iter": 14071, "iter_tflops": 40.65632184208029, "iter_time": 0.507451057434082, "loss": 0.43612539768218994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03942260427918, "step_time": 0.4684687557220459} +{"epoch": 0, "iter": 14072, "iter_tflops": 43.13930808309881, "iter_time": 0.4782434959411621, "loss": 0.3903478980064392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76684061639187, "step_time": 0.4411478996276856} +{"epoch": 0, "iter": 14073, "iter_tflops": 17.082575524220754, "iter_time": 1.207727340698242, "loss": 0.5926499962806702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.054652743885086, "step_time": 1.1427023162841796} +{"epoch": 0, "iter": 14074, "iter_tflops": 18.148487530194558, "iter_time": 1.1367940979003905, "loss": 0.6364473700523376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.278340781766108, "step_time": 0.9260605945587159} +{"epoch": 0, "iter": 14075, "iter_tflops": 48.18953186022971, "iter_time": 0.4281239662170411, "loss": 0.7147728800773621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.096143253243284, "step_time": 0.39601959419250493} +{"epoch": 0, "iter": 14076, "iter_tflops": 40.06284651067967, "iter_time": 0.5149682388305663, "loss": 0.6265234351158142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84809650399454, "step_time": 0.4814938163757324} +{"epoch": 0, "iter": 14077, "iter_tflops": 27.64809513054718, "iter_time": 0.7462030715942383, "loss": 0.5437854528427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.22187006034271, "step_time": 0.706015510559082} +{"epoch": 0, "iter": 14078, "iter_tflops": 13.595719042580065, "iter_time": 1.5174698333740235, "loss": 0.7476561665534973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.66313170188336, "step_time": 1.0492272453308105} +{"epoch": 0, "iter": 14079, "iter_tflops": 48.24304534195855, "iter_time": 0.42764907073974606, "loss": 0.7477623820304871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.133840721513444, "step_time": 0.3957332363128662} +{"epoch": 0, "iter": 14080, "iter_tflops": 45.13612008029778, "iter_time": 0.45708610916137693, "loss": 0.6851354837417603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.46190424638593, "step_time": 0.42571776390075683} +{"epoch": 0, "iter": 14081, "iter_tflops": 40.33787384088142, "iter_time": 0.5114571380615236, "loss": 0.058449458330869675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14389144324628, "step_time": 0.4673600997924805} +{"epoch": 0, "iter": 14082, "iter_tflops": 47.907699762305576, "iter_time": 0.4306425399780273, "loss": 0.09166283160448074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.34140030156694, "step_time": 0.3796570091247558} +{"epoch": 0, "iter": 14083, "iter_tflops": 54.489168753371466, "iter_time": 0.3786274223327637, "loss": 0.09539351612329483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.43640186548566, "step_time": 0.34711208724975584} +{"epoch": 0, "iter": 14084, "iter_tflops": 55.68564515028621, "iter_time": 0.37049213409423826, "loss": 0.043363478034734726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.68187737255812, "step_time": 0.3399877262115478} +{"epoch": 0, "iter": 14085, "iter_tflops": 26.699767912869667, "iter_time": 0.7727068481445312, "loss": 0.4447751045227051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.20621942935653, "step_time": 0.731437744140625} +{"epoch": 0, "iter": 14086, "iter_tflops": 17.968355228112394, "iter_time": 1.1481904296875, "loss": 0.3327942192554474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.074287270621035, "step_time": 1.0277372856140137} +{"epoch": 0, "iter": 14087, "iter_tflops": 50.56342494000589, "iter_time": 0.40802405166625977, "loss": 0.343669056892395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.150023398871376, "step_time": 0.37409038543701173} +{"epoch": 0, "iter": 14088, "iter_tflops": 46.26073670350616, "iter_time": 0.44597416687011715, "loss": 0.3839704990386963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99117720555681, "step_time": 0.41269469261169434} +{"epoch": 0, "iter": 14089, "iter_tflops": 42.8384285546741, "iter_time": 0.4816024818420411, "loss": 0.4673754572868347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.73146559044049, "step_time": 0.44148184204101565} +{"epoch": 0, "iter": 14090, "iter_tflops": 49.57154956110343, "iter_time": 0.41618819046020505, "loss": 0.3683290481567383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.95493691446981, "step_time": 0.38237638092041015} +{"epoch": 0, "iter": 14091, "iter_tflops": 51.23721350681094, "iter_time": 0.40265838241577145, "loss": 0.45476657152175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.61267413247536, "step_time": 0.37097826766967773} +{"epoch": 0, "iter": 14092, "iter_tflops": 47.80829321541116, "iter_time": 0.4315379638671875, "loss": 0.2975303530693054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.88076425208392, "step_time": 0.3976636390686035} +{"epoch": 0, "iter": 14093, "iter_tflops": 35.10892747588059, "iter_time": 0.5876309814453125, "loss": 0.007213301956653595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.628011583593334, "step_time": 0.5482908248901367} +{"epoch": 0, "iter": 14094, "iter_tflops": 9.916659361242068, "iter_time": 2.080447937011719, "loss": 0.002689991146326065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.36961967385997, "step_time": 1.6678842239379883} +{"epoch": 0, "iter": 14095, "iter_tflops": 13.47165289454455, "iter_time": 1.531444854736328, "loss": 0.013219665735960007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.469856129572722, "step_time": 1.117014305114746} +{"epoch": 0, "iter": 14096, "iter_tflops": 28.129844199055725, "iter_time": 0.733423667907715, "loss": 0.015681570395827293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.24646895841398, "step_time": 0.48835071945190434} +{"epoch": 0, "iter": 14097, "iter_tflops": 11.61679457245431, "iter_time": 1.5410592193603518, "loss": 0.19844935834407806, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 12.26891310018341, "step_time": 1.4591486816406247} +{"epoch": 0, "iter": 14098, "iter_tflops": 15.377465670710963, "iter_time": 1.1641819763183592, "loss": 0.3298984467983246, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 20.24848121947774, "step_time": 0.8841240081787108} +{"epoch": 0, "iter": 14099, "iter_tflops": 26.068983526443773, "iter_time": 0.686722915649414, "loss": 0.2661419212818146, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 28.188998026330562, "step_time": 0.6350764350891114} +{"epoch": 0, "iter": 14100, "iter_tflops": 26.110491918165103, "iter_time": 0.6856312179565429, "loss": 0.20063428580760956, "lr": 3e-05, "seqlen": 7136.0, "step_tflops": 28.208621041877702, "step_time": 0.634634651184082} +{"epoch": 0, "iter": 14101, "iter_tflops": 24.67319032209658, "iter_time": 0.8361745376586914, "loss": 0.22996723651885986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.900681786933042, "step_time": 0.7669357109069825} +{"epoch": 0, "iter": 14102, "iter_tflops": 38.731834987624595, "iter_time": 0.5326650161743164, "loss": 0.17700916528701782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.357764364984575, "step_time": 0.4870675735473633} +{"epoch": 0, "iter": 14103, "iter_tflops": 40.579566586464644, "iter_time": 0.5084108886718749, "loss": 0.18393388390541077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.50807869787135, "step_time": 0.4635359268188477} +{"epoch": 0, "iter": 14104, "iter_tflops": 39.41740598722835, "iter_time": 0.5234005889892577, "loss": 0.18874762952327728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39557158311304, "step_time": 0.4754193286895752} +{"epoch": 0, "iter": 14105, "iter_tflops": 17.79806195851041, "iter_time": 1.1591764068603516, "loss": 0.5329187512397766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.895573199328695, "step_time": 1.0918479843139648} +{"epoch": 0, "iter": 14106, "iter_tflops": 19.992943205093834, "iter_time": 1.0319187774658203, "loss": 0.7122892737388611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.92042088206308, "step_time": 0.6672319755554199} +{"epoch": 0, "iter": 14107, "iter_tflops": 35.93515123115381, "iter_time": 0.5741201248168946, "loss": 0.6403692960739136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.11400717306619, "step_time": 0.5274604930877685} +{"epoch": 0, "iter": 14108, "iter_tflops": 38.21797562304887, "iter_time": 0.5398269577026368, "loss": 0.4605587422847748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.70993267166551, "step_time": 0.4946326255798339} +{"epoch": 0, "iter": 14109, "iter_tflops": 17.194495391914174, "iter_time": 1.199866180419922, "loss": 0.7004909515380859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.411876863048647, "step_time": 1.1205317993164061} +{"epoch": 0, "iter": 14110, "iter_tflops": 13.037557294322909, "iter_time": 1.5824355010986328, "loss": 0.7254793047904968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.32372039227472, "step_time": 0.9241781005859375} +{"epoch": 0, "iter": 14111, "iter_tflops": 46.70980215580889, "iter_time": 0.4416865959167481, "loss": 0.6761325597763062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.331234652294526, "step_time": 0.40990636634826655} +{"epoch": 0, "iter": 14112, "iter_tflops": 42.85159740059772, "iter_time": 0.4814544792175293, "loss": 0.5185518860816956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21978114408877, "step_time": 0.4463693466186523} +{"epoch": 0, "iter": 14113, "iter_tflops": 31.165171670161044, "iter_time": 0.6619919738769531, "loss": 0.389114111661911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.10658374682471, "step_time": 0.6231719245910644} +{"epoch": 0, "iter": 14114, "iter_tflops": 11.519629154807784, "iter_time": 1.790951187133789, "loss": 0.369232177734375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.3939231496989, "step_time": 1.258459815979004} +{"epoch": 0, "iter": 14115, "iter_tflops": 50.184566175878025, "iter_time": 0.41110435104370113, "loss": 0.4690067172050476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.41830509387116, "step_time": 0.3791204719543457} +{"epoch": 0, "iter": 14116, "iter_tflops": 48.25775525344865, "iter_time": 0.4275187149047851, "loss": 0.5786131024360657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.36559330751986, "step_time": 0.3939818534851074} +{"epoch": 0, "iter": 14117, "iter_tflops": 30.517569827207513, "iter_time": 0.6760398559570312, "loss": 0.5309082865715027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.38542876032877, "step_time": 0.6370486450195314} +{"epoch": 0, "iter": 14118, "iter_tflops": 11.238410035184861, "iter_time": 1.8357662200927736, "loss": 0.6693092584609985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.916896290317858, "step_time": 1.3830687770843508} +{"epoch": 0, "iter": 14119, "iter_tflops": 44.00042635215404, "iter_time": 0.4688839454650879, "loss": 0.752496063709259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.6195178071836, "step_time": 0.433248685836792} +{"epoch": 0, "iter": 14120, "iter_tflops": 49.544817914545284, "iter_time": 0.41641274261474615, "loss": 0.7409660220146179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.55048726076876, "step_time": 0.385264347076416} +{"epoch": 0, "iter": 14121, "iter_tflops": 43.34949677556694, "iter_time": 0.47592463684082037, "loss": 0.6697066426277161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.217182549504464, "step_time": 0.4369403762817383} +{"epoch": 0, "iter": 14122, "iter_tflops": 37.17731633997429, "iter_time": 0.5549376754760743, "loss": 0.6718142032623291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.160508353518466, "step_time": 0.42838197135925293} +{"epoch": 0, "iter": 14123, "iter_tflops": 46.99152853736052, "iter_time": 0.4390385704040527, "loss": 0.6303714513778687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.023984357266244, "step_time": 0.40434109115600586} +{"epoch": 0, "iter": 14124, "iter_tflops": 50.71052668336425, "iter_time": 0.4068404502868653, "loss": 0.6794365644454956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.75928141675282, "step_time": 0.37675975608825685} +{"epoch": 0, "iter": 14125, "iter_tflops": 39.70552225389025, "iter_time": 0.5196026229858398, "loss": 0.48786237835884094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.19843202346987, "step_time": 0.4775889434814453} +{"epoch": 0, "iter": 14126, "iter_tflops": 21.954234622759568, "iter_time": 0.9397318496704102, "loss": 0.3989413380622864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.244550745385776, "step_time": 0.7054679584503173} +{"epoch": 0, "iter": 14127, "iter_tflops": 48.40045374841701, "iter_time": 0.42625826644897463, "loss": 0.45007094740867615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.5872548947841, "step_time": 0.3923211727142334} +{"epoch": 0, "iter": 14128, "iter_tflops": 45.13375909898801, "iter_time": 0.45711001968383796, "loss": 0.3474263846874237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.39825930163728, "step_time": 0.4262775936126709} +{"epoch": 0, "iter": 14129, "iter_tflops": 41.44287354767415, "iter_time": 0.49782005310058586, "loss": 0.036768145859241486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.236476087938144, "step_time": 0.4560720748901368} +{"epoch": 0, "iter": 14130, "iter_tflops": 39.72420982931984, "iter_time": 0.5193581848144532, "loss": 0.06944103538990021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.017260078243496, "step_time": 0.46870462799072277} +{"epoch": 0, "iter": 14131, "iter_tflops": 41.20990936777476, "iter_time": 0.5006342849731445, "loss": 0.0958394929766655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50040468100675, "step_time": 0.4534265937805176} +{"epoch": 0, "iter": 14132, "iter_tflops": 41.423271374964656, "iter_time": 0.4980556297302246, "loss": 0.05857901647686958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70926344755834, "step_time": 0.4513547573089599} +{"epoch": 0, "iter": 14133, "iter_tflops": 16.673391264758372, "iter_time": 1.029273712158203, "loss": 0.12318490445613861, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 17.78758635770007, "step_time": 0.9648011245727539} +{"epoch": 0, "iter": 14134, "iter_tflops": 27.03174980451287, "iter_time": 0.6348639450073241, "loss": 0.08876322209835052, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 30.61706149090738, "step_time": 0.5605202617645264} +{"epoch": 0, "iter": 14135, "iter_tflops": 34.0439639016316, "iter_time": 0.5040976829528808, "loss": 0.10817983746528625, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 37.42394828363228, "step_time": 0.45856955528259274} +{"epoch": 0, "iter": 14136, "iter_tflops": 37.91854192785919, "iter_time": 0.45258816528320317, "loss": 0.10464602708816528, "lr": 3e-05, "seqlen": 6848.0, "step_tflops": 41.67982415933247, "step_time": 0.41174557876586915} +{"epoch": 0, "iter": 14137, "iter_tflops": 20.108387579316826, "iter_time": 1.0259944229125975, "loss": 0.6715074777603149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.707089763764348, "step_time": 0.9504311141967773} +{"epoch": 0, "iter": 14138, "iter_tflops": 16.027158278492113, "iter_time": 1.2872583618164062, "loss": 0.5275061130523682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.801097034722016, "step_time": 0.991827184677124} +{"epoch": 0, "iter": 14139, "iter_tflops": 37.6762671218985, "iter_time": 0.5475885772705078, "loss": 0.40360015630722046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.214687401212366, "step_time": 0.5005762462615967} +{"epoch": 0, "iter": 14140, "iter_tflops": 37.11881615826704, "iter_time": 0.5558122711181641, "loss": 0.5892230868339539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.477732232239404, "step_time": 0.5096899547576904} +{"epoch": 0, "iter": 14141, "iter_tflops": 20.874096809735114, "iter_time": 0.9883586196899414, "loss": 0.40283989906311035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.514929563912496, "step_time": 0.9163294715881347} +{"epoch": 0, "iter": 14142, "iter_tflops": 47.05008803124868, "iter_time": 0.43849213409423826, "loss": 0.6024913787841797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57236173631789, "step_time": 0.40004166603088376} +{"epoch": 0, "iter": 14143, "iter_tflops": 47.952385517988034, "iter_time": 0.4302412338256835, "loss": 0.5002251267433167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00650988598372, "step_time": 0.3967021350860596} +{"epoch": 0, "iter": 14144, "iter_tflops": 48.21356687349642, "iter_time": 0.4279105415344238, "loss": 0.43762630224227905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.07846708211916, "step_time": 0.39615400886535646} +{"epoch": 0, "iter": 14145, "iter_tflops": 32.472259595023786, "iter_time": 0.6353451766967773, "loss": 0.04354269057512283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.75291234306406, "step_time": 0.5936507797241211} +{"epoch": 0, "iter": 14146, "iter_tflops": 12.417000795842613, "iter_time": 1.6615198669433595, "loss": 0.032891254872083664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.105954024708078, "step_time": 1.3657590560913087} +{"epoch": 0, "iter": 14147, "iter_tflops": 14.644299248536983, "iter_time": 1.408813980102539, "loss": 0.04117623716592789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.372740080285627, "step_time": 1.2600880126953125} +{"epoch": 0, "iter": 14148, "iter_tflops": 37.06229332455434, "iter_time": 0.556659927368164, "loss": 0.04290509968996048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.14093590829931, "step_time": 0.48957369041442866} +{"epoch": 0, "iter": 14149, "iter_tflops": 12.52292635642711, "iter_time": 1.193698715209961, "loss": 0.3001457452774048, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.187752243523601, "step_time": 1.1335215301513673} +{"epoch": 0, "iter": 14150, "iter_tflops": 6.876127698721284, "iter_time": 2.1739853820800783, "loss": 0.21317648887634277, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 7.997425939660103, "step_time": 1.8691765594482423} +{"epoch": 0, "iter": 14151, "iter_tflops": 8.42790517456755, "iter_time": 1.7737030487060548, "loss": 0.19322934746742249, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 10.369877726094836, "step_time": 1.441540729522705} +{"epoch": 0, "iter": 14152, "iter_tflops": 26.06803790277542, "iter_time": 0.5734455795288086, "loss": 0.2579576373100281, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 27.767504793826678, "step_time": 0.5383487358093263} +{"epoch": 0, "iter": 14153, "iter_tflops": 18.312316454827254, "iter_time": 0.9438906707763672, "loss": 0.23903203010559082, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 19.140071183572115, "step_time": 0.9030700302124024} +{"epoch": 0, "iter": 14154, "iter_tflops": 11.035931737028584, "iter_time": 1.5662315673828124, "loss": 0.15205302834510803, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 13.118628662986044, "step_time": 1.3175786209106444} +{"epoch": 0, "iter": 14155, "iter_tflops": 27.291999264872253, "iter_time": 0.6333293685913086, "loss": 0.2989863157272339, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 29.35653561018299, "step_time": 0.5887896614074708} +{"epoch": 0, "iter": 14156, "iter_tflops": 27.161244559146976, "iter_time": 0.6363782272338867, "loss": 0.18164736032485962, "lr": 3e-05, "seqlen": 6896.0, "step_tflops": 29.224841399528103, "step_time": 0.5914428901672364} +{"epoch": 0, "iter": 14157, "iter_tflops": 19.44992460188359, "iter_time": 1.0607287139892578, "loss": 0.31016066670417786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.84133255114274, "step_time": 0.9899123992919922} +{"epoch": 0, "iter": 14158, "iter_tflops": 14.241806996822985, "iter_time": 1.448628921508789, "loss": 0.24941644072532654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.723501979868095, "step_time": 1.3121182250976562} +{"epoch": 0, "iter": 14159, "iter_tflops": 44.44677731963422, "iter_time": 0.4641752395629883, "loss": 0.34269654750823975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01735179489671, "step_time": 0.4296591281890869} +{"epoch": 0, "iter": 14160, "iter_tflops": 47.51844124060621, "iter_time": 0.4341702499389648, "loss": 0.26543447375297546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54662689107384, "step_time": 0.4002413883209228} +{"epoch": 0, "iter": 14161, "iter_tflops": 37.94673680528212, "iter_time": 0.5436855773925782, "loss": 0.6813477277755737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.121758061265396, "step_time": 0.5017074775695801} +{"epoch": 0, "iter": 14162, "iter_tflops": 36.61968628360401, "iter_time": 0.5633880462646484, "loss": 0.6260044574737549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.902791327691794, "step_time": 0.5170338420867919} +{"epoch": 0, "iter": 14163, "iter_tflops": 37.80248656093657, "iter_time": 0.5457602233886718, "loss": 0.6952447295188904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18205667031331, "step_time": 0.5009728794097901} +{"epoch": 0, "iter": 14164, "iter_tflops": 36.88000193774483, "iter_time": 0.5594113998413086, "loss": 0.5459805727005005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05069000944174, "step_time": 0.5151245460510253} +{"epoch": 0, "iter": 14165, "iter_tflops": 19.299348085450696, "iter_time": 1.0690046844482421, "loss": 0.32807499170303345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.568478693463426, "step_time": 1.0030442123413086} +{"epoch": 0, "iter": 14166, "iter_tflops": 23.45813425974568, "iter_time": 0.8794856948852539, "loss": 0.2695195972919464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.09651683005919, "step_time": 0.6427829418182374} +{"epoch": 0, "iter": 14167, "iter_tflops": 40.08896631761706, "iter_time": 0.5146327133178711, "loss": 0.36129096150398254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.75118188401257, "step_time": 0.4715551128387451} +{"epoch": 0, "iter": 14168, "iter_tflops": 42.485434114290115, "iter_time": 0.4856039237976074, "loss": 0.2453288733959198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21403287785253, "step_time": 0.44642486763000483} +{"epoch": 0, "iter": 14169, "iter_tflops": 19.857303500283482, "iter_time": 0.6932401580810545, "loss": 0.10828713327646255, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 22.129722218210805, "step_time": 0.6220539093017579} +{"epoch": 0, "iter": 14170, "iter_tflops": 27.044981309480555, "iter_time": 0.5089994354248047, "loss": 0.04395965486764908, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 29.846633613459872, "step_time": 0.4612205314636231} +{"epoch": 0, "iter": 14171, "iter_tflops": 36.38222782836055, "iter_time": 0.3783682594299316, "loss": 0.13010168075561523, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 39.65018291857591, "step_time": 0.3471832714080811} +{"epoch": 0, "iter": 14172, "iter_tflops": 33.77375369126411, "iter_time": 0.407591064453125, "loss": 0.10046964138746262, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 36.85166792573708, "step_time": 0.3735483627319336} +{"epoch": 0, "iter": 14173, "iter_tflops": 38.71527420877074, "iter_time": 0.5328928680419922, "loss": 0.6084456443786621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.90615590505291, "step_time": 0.49231653594970703} +{"epoch": 0, "iter": 14174, "iter_tflops": 24.57455739049797, "iter_time": 0.8395306243896484, "loss": 0.6851068139076233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.6090689653135, "step_time": 0.632679624557495} +{"epoch": 0, "iter": 14175, "iter_tflops": 42.306867300346326, "iter_time": 0.4876535377502442, "loss": 0.5772619843482971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.235676987065766, "step_time": 0.45608013153076177} +{"epoch": 0, "iter": 14176, "iter_tflops": 41.39047881016941, "iter_time": 0.49845022583007814, "loss": 0.5669638514518738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.601617061122944, "step_time": 0.46256380081176757} +{"epoch": 0, "iter": 14177, "iter_tflops": 31.950045544593614, "iter_time": 0.6457297058105469, "loss": 0.580036997795105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.01377590996191, "step_time": 0.6065511093139648} +{"epoch": 0, "iter": 14178, "iter_tflops": 13.756926687176827, "iter_time": 1.4996876831054688, "loss": 0.6044701933860779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.20533588018799, "step_time": 1.2731049613952634} +{"epoch": 0, "iter": 14179, "iter_tflops": 43.1939417496565, "iter_time": 0.4776385917663575, "loss": 0.6009984016418457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.675027120376754, "step_time": 0.4420156726837158} +{"epoch": 0, "iter": 14180, "iter_tflops": 39.549499261811775, "iter_time": 0.521652458190918, "loss": 0.5440508723258972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34588162137087, "step_time": 0.4872042503356933} +{"epoch": 0, "iter": 14181, "iter_tflops": 39.83693695858821, "iter_time": 0.5178885498046876, "loss": 0.2750665545463562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.9891667228497, "step_time": 0.4799137802124023} +{"epoch": 0, "iter": 14182, "iter_tflops": 43.117884762648586, "iter_time": 0.47848111343383787, "loss": 0.208282008767128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.00145757778325, "step_time": 0.4389458236694336} +{"epoch": 0, "iter": 14183, "iter_tflops": 48.195061209733865, "iter_time": 0.4280748481750488, "loss": 0.22559234499931335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21298681883546, "step_time": 0.3951333713531494} +{"epoch": 0, "iter": 14184, "iter_tflops": 47.68872392422357, "iter_time": 0.43261995315551754, "loss": 0.18466466665267944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.541531730084465, "step_time": 0.4002809543609619} +{"epoch": 0, "iter": 14185, "iter_tflops": 25.27959506486, "iter_time": 0.816116455078125, "loss": 0.5672818422317505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.58873604174843, "step_time": 0.77593359375} +{"epoch": 0, "iter": 14186, "iter_tflops": 14.65157091795697, "iter_time": 1.408114776611328, "loss": 0.7111256122589111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.16083491420322, "step_time": 1.1360212020874023} +{"epoch": 0, "iter": 14187, "iter_tflops": 33.67222767766406, "iter_time": 0.6127035522460939, "loss": 0.43769675493240356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84324031032512, "step_time": 0.5599695720672607} +{"epoch": 0, "iter": 14188, "iter_tflops": 35.6981921453008, "iter_time": 0.5779310455322265, "loss": 0.5811070799827576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.694511608821955, "step_time": 0.5331788063049316} +{"epoch": 0, "iter": 14189, "iter_tflops": 24.219394132072907, "iter_time": 0.8518418502807618, "loss": 0.41047972440719604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.80677034445097, "step_time": 0.7994449996948243} +{"epoch": 0, "iter": 14190, "iter_tflops": 9.585587776512188, "iter_time": 2.152303436279297, "loss": 0.660652756690979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.61196018033464, "step_time": 1.9441359710693358} +{"epoch": 0, "iter": 14191, "iter_tflops": 10.132835812418444, "iter_time": 2.0360631408691408, "loss": 0.9343963861465454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.902412007962718, "step_time": 1.73335400390625} +{"epoch": 0, "iter": 14192, "iter_tflops": 31.637416431481654, "iter_time": 0.6521105651855469, "loss": 0.7447280287742615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19054439550507, "step_time": 0.5402147006988525} +{"epoch": 0, "iter": 14193, "iter_tflops": 16.765595186018995, "iter_time": 0.9648498153686522, "loss": 0.20719347894191742, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 18.450765418630354, "step_time": 0.876726848602295} +{"epoch": 0, "iter": 14194, "iter_tflops": 23.074104424616156, "iter_time": 0.7010578231811522, "loss": 0.26615965366363525, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 24.769602145273886, "step_time": 0.6530698928833008} +{"epoch": 0, "iter": 14195, "iter_tflops": 24.520680186878533, "iter_time": 0.6596995391845704, "loss": 0.2285253405570984, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 26.434697438064347, "step_time": 0.6119336700439453} +{"epoch": 0, "iter": 14196, "iter_tflops": 24.401084361252366, "iter_time": 0.6629328918457031, "loss": 0.25821220874786377, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 26.260503817322704, "step_time": 0.615992805480957} +{"epoch": 0, "iter": 14197, "iter_tflops": 34.272368436575086, "iter_time": 0.6019745483398439, "loss": 0.21816270053386688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.05038950057736, "step_time": 0.5422045288085937} +{"epoch": 0, "iter": 14198, "iter_tflops": 38.302708407184774, "iter_time": 0.5386327590942382, "loss": 0.2689778506755829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.938727370512126, "step_time": 0.49193418121337895} +{"epoch": 0, "iter": 14199, "iter_tflops": 36.84283810597337, "iter_time": 0.5599756851196289, "loss": 0.3037564158439636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.0683072474661, "step_time": 0.5148980560302734} +{"epoch": 0, "iter": 14200, "iter_tflops": 40.313288449639245, "iter_time": 0.5117690544128417, "loss": 0.1816275417804718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.092677770242744, "step_time": 0.46790293884277345} +{"epoch": 0, "iter": 14201, "iter_tflops": 24.090752311929243, "iter_time": 0.8563905868530273, "loss": 0.5115621089935303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.364921293765967, "step_time": 0.782520580291748} +{"epoch": 0, "iter": 14202, "iter_tflops": 44.285270651696, "iter_time": 0.46586806869506836, "loss": 0.7252552509307861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.039777027179305, "step_time": 0.4294585609436035} +{"epoch": 0, "iter": 14203, "iter_tflops": 49.28917368617831, "iter_time": 0.41857251739501955, "loss": 0.5311915874481201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94994435709578, "step_time": 0.389633903503418} +{"epoch": 0, "iter": 14204, "iter_tflops": 45.87676354120169, "iter_time": 0.4497068214416505, "loss": 0.4616473317146301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33517502729562, "step_time": 0.4181822299957275} +{"epoch": 0, "iter": 14205, "iter_tflops": 1.948900704479696, "iter_time": 0.6631661911010742, "loss": 2.576716899871826, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 2.0901766312823447, "step_time": 0.6183425064086914} +{"epoch": 0, "iter": 14206, "iter_tflops": 0.888602407793328, "iter_time": 1.4544694519042969, "loss": 2.4685444831848145, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 1.1271984384542242, "step_time": 1.1465994033813478} +{"epoch": 0, "iter": 14207, "iter_tflops": 2.4782856740833346, "iter_time": 0.521507698059082, "loss": 2.5180909633636475, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 2.7112050979191933, "step_time": 0.47670501136779775} +{"epoch": 0, "iter": 14208, "iter_tflops": 2.6755927744230306, "iter_time": 0.4830499877929687, "loss": 2.7816483974456787, "lr": 3e-05, "seqlen": 528.0, "step_tflops": 2.9253275264991885, "step_time": 0.44181208610534667} +{"epoch": 0, "iter": 14209, "iter_tflops": 14.742750169125198, "iter_time": 1.083337432861328, "loss": 0.039925467222929, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 15.882553474670521, "step_time": 1.0055922775268555} +{"epoch": 0, "iter": 14210, "iter_tflops": 24.811362312873904, "iter_time": 0.643712059020996, "loss": 0.05175158008933067, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 28.695015385358925, "step_time": 0.5565905055999756} +{"epoch": 0, "iter": 14211, "iter_tflops": 42.90639944342534, "iter_time": 0.3722375526428223, "loss": 0.06724590063095093, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 46.84433493530016, "step_time": 0.34094566917419433} +{"epoch": 0, "iter": 14212, "iter_tflops": 43.3334719778092, "iter_time": 0.3685689697265625, "loss": 0.0927257239818573, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 47.295573997516684, "step_time": 0.3376927642822265} +{"epoch": 0, "iter": 14213, "iter_tflops": 44.72889994653847, "iter_time": 0.46124750518798824, "loss": 0.28596872091293335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.010288172991224, "step_time": 0.4209543399810791} +{"epoch": 0, "iter": 14214, "iter_tflops": 33.374703547667146, "iter_time": 0.6181655960083007, "loss": 0.2709202766418457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.61364210598431, "step_time": 0.507984323501587} +{"epoch": 0, "iter": 14215, "iter_tflops": 47.38151188268037, "iter_time": 0.43542497253417967, "loss": 0.34979310631752014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.50593327362873, "step_time": 0.4005576095581055} +{"epoch": 0, "iter": 14216, "iter_tflops": 49.56124629912198, "iter_time": 0.41627471160888674, "loss": 0.393551230430603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86184414266244, "step_time": 0.3830372657775879} +{"epoch": 0, "iter": 14217, "iter_tflops": 29.504737044278215, "iter_time": 0.6992468185424804, "loss": 0.814737856388092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.20984432945295, "step_time": 0.6610444221496581} +{"epoch": 0, "iter": 14218, "iter_tflops": 13.425094269320438, "iter_time": 1.5367559509277346, "loss": 0.586981475353241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.157312835212398, "step_time": 1.2024664764404296} +{"epoch": 0, "iter": 14219, "iter_tflops": 38.13719115058662, "iter_time": 0.5409704513549805, "loss": 0.7427026629447937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.61307814541096, "step_time": 0.49578388404846185} +{"epoch": 0, "iter": 14220, "iter_tflops": 37.762598583527556, "iter_time": 0.546336700439453, "loss": 0.517850399017334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10602320840902, "step_time": 0.5018995246887207} +{"epoch": 0, "iter": 14221, "iter_tflops": 36.53959343469055, "iter_time": 0.5646229629516601, "loss": 0.08981665223836899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.758349120286965, "step_time": 0.506180793762207} +{"epoch": 0, "iter": 14222, "iter_tflops": 35.896393696209124, "iter_time": 0.574740005493164, "loss": 0.07670173794031143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.482053055609846, "step_time": 0.5096355533599853} +{"epoch": 0, "iter": 14223, "iter_tflops": 41.462635097916156, "iter_time": 0.4975827865600586, "loss": 0.07072754204273224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.555982046076394, "step_time": 0.45287342262268065} +{"epoch": 0, "iter": 14224, "iter_tflops": 44.360303335499665, "iter_time": 0.46508008193969724, "loss": 0.09215839207172394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80768683106945, "step_time": 0.4227017269134521} +{"epoch": 0, "iter": 14225, "iter_tflops": 17.120122740541337, "iter_time": 1.205078598022461, "loss": 0.11891894042491913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.36721276345849, "step_time": 1.1232566299438478} +{"epoch": 0, "iter": 14226, "iter_tflops": 25.965771483562694, "iter_time": 0.7945496063232422, "loss": 0.22870869934558868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.02806081610273, "step_time": 0.644156810760498} +{"epoch": 0, "iter": 14227, "iter_tflops": 49.41001054873971, "iter_time": 0.41754885864257807, "loss": 0.1447063684463501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.894598189006516, "step_time": 0.38280447769165044} +{"epoch": 0, "iter": 14228, "iter_tflops": 50.56745337130849, "iter_time": 0.4079915466308593, "loss": 0.2120257467031479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.91084382194318, "step_time": 0.375719841003418} +{"epoch": 0, "iter": 14229, "iter_tflops": 40.90481416208554, "iter_time": 0.5043683471679689, "loss": 0.3893575668334961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.26388037572485, "step_time": 0.4660931968688965} +{"epoch": 0, "iter": 14230, "iter_tflops": 37.943330731406064, "iter_time": 0.5437343826293946, "loss": 0.4789571166038513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53547674860915, "step_time": 0.49671016502380366} +{"epoch": 0, "iter": 14231, "iter_tflops": 40.872850912685195, "iter_time": 0.5047627716064453, "loss": 0.28973388671875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.6797928557444, "step_time": 0.46175445747375493} +{"epoch": 0, "iter": 14232, "iter_tflops": 37.96872993207949, "iter_time": 0.5433706512451172, "loss": 0.4258810877799988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.73100028953574, "step_time": 0.4943829135894775} +{"epoch": 0, "iter": 14233, "iter_tflops": 21.1186527134107, "iter_time": 0.9769133377075194, "loss": 0.39199259877204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.67749637421694, "step_time": 0.9097606353759766} +{"epoch": 0, "iter": 14234, "iter_tflops": 24.009338734527955, "iter_time": 0.8592945327758789, "loss": 0.5185673236846924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.301694726295352, "step_time": 0.7040921592712402} +{"epoch": 0, "iter": 14235, "iter_tflops": 46.569325757954324, "iter_time": 0.44301894378662104, "loss": 0.4736131429672241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.50165498183963, "step_time": 0.40852311706542965} +{"epoch": 0, "iter": 14236, "iter_tflops": 45.746656292646584, "iter_time": 0.4509858245849609, "loss": 0.2845432758331299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46706664336319, "step_time": 0.4170672512054443} +{"epoch": 0, "iter": 14237, "iter_tflops": 30.93214837246046, "iter_time": 0.66697900390625, "loss": 0.5399775505065918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.93483071606327, "step_time": 0.6264217262268067} +{"epoch": 0, "iter": 14238, "iter_tflops": 14.287843765011372, "iter_time": 1.4439613037109373, "loss": 0.4928135871887207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.227395022222108, "step_time": 1.0730051307678223} +{"epoch": 0, "iter": 14239, "iter_tflops": 34.3819696142457, "iter_time": 0.6000556030273437, "loss": 0.6867152452468872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.66283009748854, "step_time": 0.5477839412689209} +{"epoch": 0, "iter": 14240, "iter_tflops": 40.808320240098126, "iter_time": 0.5055609588623048, "loss": 0.6066954135894775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65280739164024, "step_time": 0.46203351402282716} +{"epoch": 0, "iter": 14241, "iter_tflops": 18.667325115916558, "iter_time": 1.0386129150390624, "loss": 0.08913717418909073, "lr": 3e-05, "seqlen": 7712.0, "step_tflops": 20.1190836896544, "step_time": 0.9636683883666992} +{"epoch": 0, "iter": 14242, "iter_tflops": 14.21323309116862, "iter_time": 1.3640897064208983, "loss": 0.07305141538381577, "lr": 3e-05, "seqlen": 7712.0, "step_tflops": 20.317127110839444, "step_time": 0.9542749252319336} +{"epoch": 0, "iter": 14243, "iter_tflops": 50.58218579196722, "iter_time": 0.38329946899414064, "loss": 0.08414138108491898, "lr": 3e-05, "seqlen": 7712.0, "step_tflops": 55.3292122241366, "step_time": 0.3504138984680176} +{"epoch": 0, "iter": 14244, "iter_tflops": 51.79305200977683, "iter_time": 0.37433833694458013, "loss": 0.08102945238351822, "lr": 3e-05, "seqlen": 7712.0, "step_tflops": 56.388313877145414, "step_time": 0.34383232307434086} +{"epoch": 0, "iter": 14245, "iter_tflops": 23.780973140004978, "iter_time": 0.8675462265014648, "loss": 0.5615713000297546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.91708082642891, "step_time": 0.827989990234375} +{"epoch": 0, "iter": 14246, "iter_tflops": 15.876527882652448, "iter_time": 1.299471374511719, "loss": 0.5437765717506409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.533217727584294, "step_time": 1.0047667045593263} +{"epoch": 0, "iter": 14247, "iter_tflops": 42.67449829099919, "iter_time": 0.4834525146484375, "loss": 0.6281644105911255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78811660613565, "step_time": 0.45057746505737306} +{"epoch": 0, "iter": 14248, "iter_tflops": 45.01168564058261, "iter_time": 0.4583497200012207, "loss": 0.49201589822769165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.261991849013576, "step_time": 0.42748118591308587} +{"epoch": 0, "iter": 14249, "iter_tflops": 31.041648118379783, "iter_time": 0.6646262283325196, "loss": 0.00672530010342598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.08121566261797, "step_time": 0.6236497993469238} +{"epoch": 0, "iter": 14250, "iter_tflops": 12.55520361832361, "iter_time": 1.6432304992675781, "loss": 0.0029344605281949043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.83707307163088, "step_time": 1.302708740234375} +{"epoch": 0, "iter": 14251, "iter_tflops": 56.02137845422605, "iter_time": 0.36827179336547855, "loss": 0.017658919095993042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.76672488333597, "step_time": 0.33401630973815916} +{"epoch": 0, "iter": 14252, "iter_tflops": 57.29477570134272, "iter_time": 0.3600868186950684, "loss": 0.0021338178776204586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.788954994996836, "step_time": 0.3285783863067627} +{"epoch": 0, "iter": 14253, "iter_tflops": 29.455392693173295, "iter_time": 0.7004182128906249, "loss": 0.00794378574937582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.245199478915143, "step_time": 0.6602964248657226} +{"epoch": 0, "iter": 14254, "iter_tflops": 28.326138754587802, "iter_time": 0.7283411865234376, "loss": 0.022782914340496063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.24206555652017, "step_time": 0.553972858428955} +{"epoch": 0, "iter": 14255, "iter_tflops": 57.868516014254276, "iter_time": 0.3565167198181153, "loss": 0.002853080863133073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.45887215670515, "step_time": 0.3251096782684326} +{"epoch": 0, "iter": 14256, "iter_tflops": 60.95163286156884, "iter_time": 0.33848303222656245, "loss": 0.0037626060657203197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.60862003423273, "step_time": 0.30973608970642086} +{"epoch": 0, "iter": 14257, "iter_tflops": 33.83011292303904, "iter_time": 0.6098440628051758, "loss": 0.03225380554795265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.20145423125345, "step_time": 0.5698968162536622} +{"epoch": 0, "iter": 14258, "iter_tflops": 15.52078308087806, "iter_time": 1.3292559661865233, "loss": 0.04279560223221779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.669192818398983, "step_time": 1.048903923034668} +{"epoch": 0, "iter": 14259, "iter_tflops": 43.154623886819394, "iter_time": 0.4780737648010254, "loss": 0.03264005854725838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.74280575218842, "step_time": 0.4321298923492432} +{"epoch": 0, "iter": 14260, "iter_tflops": 44.47127803514523, "iter_time": 0.46391950988769537, "loss": 0.03741191700100899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.88168823505466, "step_time": 0.422061803817749} +{"epoch": 0, "iter": 14261, "iter_tflops": 17.101850039398954, "iter_time": 1.206366180419922, "loss": 0.2948738634586334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.213758707599478, "step_time": 1.1327202606201172} +{"epoch": 0, "iter": 14262, "iter_tflops": 16.22951762451349, "iter_time": 1.271208053588867, "loss": 0.3153100609779358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.868997430955634, "step_time": 1.0383560409545898} +{"epoch": 0, "iter": 14263, "iter_tflops": 38.54176288802245, "iter_time": 0.5352919006347656, "loss": 0.34894081950187683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.21994129269156, "step_time": 0.4886575603485108} +{"epoch": 0, "iter": 14264, "iter_tflops": 35.52744129568326, "iter_time": 0.5807086791992188, "loss": 0.3613204061985016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.991837371498704, "step_time": 0.5291131401062013} +{"epoch": 0, "iter": 14265, "iter_tflops": 22.555295525065898, "iter_time": 0.9146895675659179, "loss": 0.515423059463501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.164222109745797, "step_time": 0.8537867851257325} +{"epoch": 0, "iter": 14266, "iter_tflops": 23.803165504617745, "iter_time": 0.8667373886108399, "loss": 0.6599937677383423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.132035946753764, "step_time": 0.7081926422119141} +{"epoch": 0, "iter": 14267, "iter_tflops": 49.47110576391542, "iter_time": 0.4170331993103028, "loss": 0.6950689554214478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.19197337464418, "step_time": 0.3878610286712646} +{"epoch": 0, "iter": 14268, "iter_tflops": 44.18820890112033, "iter_time": 0.46689137268066405, "loss": 0.6134422421455383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47309086005264, "step_time": 0.43458500671386724} +{"epoch": 0, "iter": 14269, "iter_tflops": 54.5124434548301, "iter_time": 0.378465763092041, "loss": 0.0036275656893849373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.61509862329638, "step_time": 0.34036228561401366} +{"epoch": 0, "iter": 14270, "iter_tflops": 43.64033806336974, "iter_time": 0.47275283432006837, "loss": 0.008739671669900417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56341171712474, "step_time": 0.42482792663574215} +{"epoch": 0, "iter": 14271, "iter_tflops": 45.649281797152604, "iter_time": 0.45194782257080074, "loss": 0.0022936586756259203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.68284165168378, "step_time": 0.4070626831054688} +{"epoch": 0, "iter": 14272, "iter_tflops": 42.96962987477607, "iter_time": 0.4801319808959961, "loss": 0.006405337247997522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.446197746833434, "step_time": 0.434831335067749} +{"epoch": 0, "iter": 14273, "iter_tflops": 31.255816789912174, "iter_time": 0.6600721282958986, "loss": 0.13316769897937775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.008511131076325, "step_time": 0.6066450080871583} +{"epoch": 0, "iter": 14274, "iter_tflops": 8.936724304015426, "iter_time": 2.308574462890625, "loss": 0.15528419613838196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.964584057276824, "step_time": 2.0704420166015627} +{"epoch": 0, "iter": 14275, "iter_tflops": 20.69461222716548, "iter_time": 0.9969306640624999, "loss": 0.111874058842659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.92298602764829, "step_time": 0.8277938079833984} +{"epoch": 0, "iter": 14276, "iter_tflops": 43.684532897370836, "iter_time": 0.4722745590209961, "loss": 0.13150690495967865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.985809181681425, "step_time": 0.429941556930542} +{"epoch": 0, "iter": 14277, "iter_tflops": 12.726550593233593, "iter_time": 1.145724838256836, "loss": 0.12953601777553558, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 13.637899630318318, "step_time": 1.0691620788574219} +{"epoch": 0, "iter": 14278, "iter_tflops": 10.76096292198623, "iter_time": 1.355001892089844, "loss": 0.17762133479118347, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 13.758085535045684, "step_time": 1.0598222465515135} +{"epoch": 0, "iter": 14279, "iter_tflops": 26.19267921420368, "iter_time": 0.5566870422363281, "loss": 0.35726457834243774, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 27.974703256009697, "step_time": 0.5212253723144531} +{"epoch": 0, "iter": 14280, "iter_tflops": 27.739201198278096, "iter_time": 0.5256505050659179, "loss": 0.2486892193555832, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 29.440426920369184, "step_time": 0.4952756004333496} +{"epoch": 0, "iter": 14281, "iter_tflops": 31.74152459542784, "iter_time": 0.6499717254638671, "loss": 0.6478168368339539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.70353095885363, "step_time": 0.6121344833374024} +{"epoch": 0, "iter": 14282, "iter_tflops": 11.682938878544103, "iter_time": 1.7659164123535156, "loss": 0.7891058921813965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.780357959489718, "step_time": 1.3958453216552735} +{"epoch": 0, "iter": 14283, "iter_tflops": 43.30624703456649, "iter_time": 0.47639994049072265, "loss": 0.7081969380378723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8387407314128, "step_time": 0.4404707126617431} +{"epoch": 0, "iter": 14284, "iter_tflops": 43.56876192500557, "iter_time": 0.4735294876098633, "loss": 0.7161973714828491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.92257162111712, "step_time": 0.4396837768554687} +{"epoch": 0, "iter": 14285, "iter_tflops": 46.12141356056954, "iter_time": 0.447321361541748, "loss": 0.26502013206481934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.476677266829945, "step_time": 0.408725269317627} +{"epoch": 0, "iter": 14286, "iter_tflops": 46.544734969178535, "iter_time": 0.44325300216674807, "loss": 0.2145783007144928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.13732185044579, "step_time": 0.3957068138122558} +{"epoch": 0, "iter": 14287, "iter_tflops": 50.5661333404839, "iter_time": 0.408002197265625, "loss": 0.25784510374069214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.961841029106495, "step_time": 0.375371223449707} +{"epoch": 0, "iter": 14288, "iter_tflops": 43.91720472809494, "iter_time": 0.46977246475219725, "loss": 0.13463793694972992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.5105579572628, "step_time": 0.43424229049682617} +{"epoch": 0, "iter": 14289, "iter_tflops": 29.201865243499032, "iter_time": 0.7064991683959961, "loss": 0.2867810130119324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.923439310320557, "step_time": 0.6671668472290039} +{"epoch": 0, "iter": 14290, "iter_tflops": 13.714354968823407, "iter_time": 1.5043429718017578, "loss": 0.3545612096786499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.18025273134801, "step_time": 1.2008608856201173} +{"epoch": 0, "iter": 14291, "iter_tflops": 39.16535656380888, "iter_time": 0.5267689437866211, "loss": 0.2714468538761139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1599889180471, "step_time": 0.4196724605560303} +{"epoch": 0, "iter": 14292, "iter_tflops": 48.53001865050632, "iter_time": 0.425120246887207, "loss": 0.19513735175132751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.726508835431765, "step_time": 0.39128502845764157} +{"epoch": 0, "iter": 14293, "iter_tflops": 31.70662884514814, "iter_time": 0.6506870727539062, "loss": 0.6118341088294983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.884222733357724, "step_time": 0.6088702011108399} +{"epoch": 0, "iter": 14294, "iter_tflops": 19.81667375692805, "iter_time": 1.0410977020263672, "loss": 0.5574949979782104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.076997231702645, "step_time": 0.8568798389434814} +{"epoch": 0, "iter": 14295, "iter_tflops": 46.45410645382742, "iter_time": 0.44411775588989255, "loss": 0.6185517907142639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28003632169133, "step_time": 0.41032375907897944} +{"epoch": 0, "iter": 14296, "iter_tflops": 42.038878979394404, "iter_time": 0.4907622184753418, "loss": 0.6708834767341614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17016931215717, "step_time": 0.4567415580749511} +{"epoch": 0, "iter": 14297, "iter_tflops": 32.059875994147234, "iter_time": 0.6435175704956054, "loss": 0.2026597410440445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.263781430601696, "step_time": 0.6021254119873047} +{"epoch": 0, "iter": 14298, "iter_tflops": 17.950654055578145, "iter_time": 1.1493226623535158, "loss": 0.3115188479423523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.540054835033935, "step_time": 0.9578013458251954} +{"epoch": 0, "iter": 14299, "iter_tflops": 44.69698817187915, "iter_time": 0.4615768165588379, "loss": 0.175959974527359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20621158543426, "step_time": 0.4279758319854737} +{"epoch": 0, "iter": 14300, "iter_tflops": 46.66503075805389, "iter_time": 0.4421103591918945, "loss": 0.24729026854038239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62725507290048, "step_time": 0.40750962066650387} +{"epoch": 0, "iter": 14301, "iter_tflops": 42.415584995786894, "iter_time": 0.4864036064147949, "loss": 0.4565424919128418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.08156368067016, "step_time": 0.44770819091796876} +{"epoch": 0, "iter": 14302, "iter_tflops": 10.740736585093263, "iter_time": 1.9208266906738283, "loss": 0.46953535079956055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.424931444460322, "step_time": 1.5367745895385743} +{"epoch": 0, "iter": 14303, "iter_tflops": 14.565440842882735, "iter_time": 1.4164414062500001, "loss": 0.4117312431335449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.273202344223698, "step_time": 1.1290354652404786} +{"epoch": 0, "iter": 14304, "iter_tflops": 20.562036743067146, "iter_time": 1.0033584594726563, "loss": 0.44359031319618225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.171395930261795, "step_time": 0.732341896057129} +{"epoch": 0, "iter": 14305, "iter_tflops": 15.683980674710051, "iter_time": 0.8880894775390625, "loss": 0.11100956052541733, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 16.457363185406194, "step_time": 0.8463554000854493} +{"epoch": 0, "iter": 14306, "iter_tflops": 8.936385325802357, "iter_time": 1.5586590881347657, "loss": 0.12986910343170166, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 11.297876216054789, "step_time": 1.2328669509887695} +{"epoch": 0, "iter": 14307, "iter_tflops": 20.543983533796954, "iter_time": 0.6779979248046875, "loss": 0.1922740489244461, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 22.0652953396669, "step_time": 0.631252742767334} +{"epoch": 0, "iter": 14308, "iter_tflops": 21.32922350109378, "iter_time": 0.6530372848510742, "loss": 0.26639753580093384, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 22.91313888579708, "step_time": 0.6078948097229003} +{"epoch": 0, "iter": 14309, "iter_tflops": 17.577139179781994, "iter_time": 0.9249672317504882, "loss": 0.005402895621955395, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 18.584588846992034, "step_time": 0.874825798034668} +{"epoch": 0, "iter": 14310, "iter_tflops": 14.678855859267417, "iter_time": 1.1075984344482421, "loss": 0.005336868576705456, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 17.729924430672618, "step_time": 0.9169964504241944} +{"epoch": 0, "iter": 14311, "iter_tflops": 34.994620657338324, "iter_time": 0.4645936279296875, "loss": 0.00543187977746129, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 38.86576952758679, "step_time": 0.4183186893463135} +{"epoch": 0, "iter": 14312, "iter_tflops": 37.62716490908129, "iter_time": 0.4320888328552246, "loss": 0.018091727048158646, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 41.46292439387535, "step_time": 0.39211604118347165} +{"epoch": 0, "iter": 14313, "iter_tflops": 17.170934781388066, "iter_time": 1.2015125427246094, "loss": 0.23361554741859436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.266073345218032, "step_time": 1.1294761123657224} +{"epoch": 0, "iter": 14314, "iter_tflops": 16.132360063607855, "iter_time": 1.2788639373779296, "loss": 0.20388342440128326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.658901824403983, "step_time": 0.9986539306640625} +{"epoch": 0, "iter": 14315, "iter_tflops": 49.20356294700756, "iter_time": 0.41930080413818366, "loss": 0.20455004274845123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.492758945316154, "step_time": 0.3856801166534424} +{"epoch": 0, "iter": 14316, "iter_tflops": 53.57177015330068, "iter_time": 0.38511128997802735, "loss": 0.2664584815502167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.15063976947238, "step_time": 0.35478704261779787} +{"epoch": 0, "iter": 14317, "iter_tflops": 29.68811553867994, "iter_time": 0.6949276885986329, "loss": 0.4542585015296936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.421921885544855, "step_time": 0.6565828018188477} +{"epoch": 0, "iter": 14318, "iter_tflops": 13.566445209259397, "iter_time": 1.5207442474365234, "loss": 0.36830416321754456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.391712596497474, "step_time": 1.3404027252197266} +{"epoch": 0, "iter": 14319, "iter_tflops": 11.700106622913585, "iter_time": 1.7633252563476562, "loss": 0.3443760871887207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.344975054836645, "step_time": 1.4382104835510254} +{"epoch": 0, "iter": 14320, "iter_tflops": 15.927373557189771, "iter_time": 1.2953230133056641, "loss": 0.5487319231033325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.195646222671495, "step_time": 0.9733646850585936} +{"epoch": 0, "iter": 14321, "iter_tflops": 22.085191139470844, "iter_time": 0.6288397750854492, "loss": 0.19321654736995697, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 23.763493909740642, "step_time": 0.584427806854248} +{"epoch": 0, "iter": 14322, "iter_tflops": 21.34276969940148, "iter_time": 0.65071435546875, "loss": 0.21200992166996002, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 22.989788600495636, "step_time": 0.6040963172912599} +{"epoch": 0, "iter": 14323, "iter_tflops": 22.205130973160145, "iter_time": 0.6254431304931641, "loss": 0.18747487664222717, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 23.851957310636074, "step_time": 0.5822602500915528} +{"epoch": 0, "iter": 14324, "iter_tflops": 22.165032361321703, "iter_time": 0.6265746154785157, "loss": 0.21944662928581238, "lr": 3e-05, "seqlen": 5568.0, "step_tflops": 23.815816368830017, "step_time": 0.5831438407897949} +{"epoch": 0, "iter": 14325, "iter_tflops": 20.973157659745745, "iter_time": 0.9836903839111328, "loss": 0.12367350608110428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.380181755488444, "step_time": 0.9218465576171876} +{"epoch": 0, "iter": 14326, "iter_tflops": 40.4582381935283, "iter_time": 0.5099355392456055, "loss": 0.05452229455113411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9757699327253, "step_time": 0.4587157382965088} +{"epoch": 0, "iter": 14327, "iter_tflops": 43.572736839120346, "iter_time": 0.47348628997802733, "loss": 0.14451062679290771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86232418429718, "step_time": 0.43105080795288087} +{"epoch": 0, "iter": 14328, "iter_tflops": 39.59671253180119, "iter_time": 0.5210304641723633, "loss": 0.1376715451478958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.37113544950368, "step_time": 0.47568718910217284} +{"epoch": 0, "iter": 14329, "iter_tflops": 11.022752181075047, "iter_time": 1.1935027618408203, "loss": 0.0022017029114067554, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 11.847084761314026, "step_time": 1.1104575881958005} +{"epoch": 0, "iter": 14330, "iter_tflops": 10.787928997019202, "iter_time": 1.2194819946289064, "loss": 0.0070448219776153564, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 12.364063488702566, "step_time": 1.0640260124206542} +{"epoch": 0, "iter": 14331, "iter_tflops": 26.381258981467923, "iter_time": 0.4986754112243652, "loss": 0.007387333549559116, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 29.173658170485137, "step_time": 0.45094396781921386} +{"epoch": 0, "iter": 14332, "iter_tflops": 34.18950212448442, "iter_time": 0.3847872695922852, "loss": 0.007430394180119038, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 37.86576014952528, "step_time": 0.34742958068847657} +{"epoch": 0, "iter": 14333, "iter_tflops": 20.56483971307188, "iter_time": 1.0032217025756838, "loss": 0.6683177947998047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.853187752246228, "step_time": 0.9440770721435547} +{"epoch": 0, "iter": 14334, "iter_tflops": 21.959500194737792, "iter_time": 0.9395065155029297, "loss": 0.5351060628890991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.798767009569623, "step_time": 0.7698523406982423} +{"epoch": 0, "iter": 14335, "iter_tflops": 34.80094651683296, "iter_time": 0.5928313903808594, "loss": 0.7078961730003357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82678523607048, "step_time": 0.5454096450805664} +{"epoch": 0, "iter": 14336, "iter_tflops": 39.04223142021304, "iter_time": 0.5284301834106446, "loss": 0.8622321486473083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60330692404857, "step_time": 0.4842603778839112} +{"epoch": 0, "iter": 14337, "iter_tflops": 21.255978441867036, "iter_time": 0.970601921081543, "loss": 0.2564069628715515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.555287999750334, "step_time": 0.9146898727416992} +{"epoch": 0, "iter": 14338, "iter_tflops": 19.845460990098992, "iter_time": 1.039587516784668, "loss": 0.12513980269432068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.994992859934765, "step_time": 0.7936564407348632} +{"epoch": 0, "iter": 14339, "iter_tflops": 50.25622878968192, "iter_time": 0.41051813888549804, "loss": 0.22317087650299072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.54696011783823, "step_time": 0.37822627449035645} +{"epoch": 0, "iter": 14340, "iter_tflops": 52.654945240057025, "iter_time": 0.39181682586669925, "loss": 0.16781386733055115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.07901767016131, "step_time": 0.3614479427337647} +{"epoch": 0, "iter": 14341, "iter_tflops": 41.47076911848755, "iter_time": 0.49748519134521485, "loss": 0.4627208113670349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97743382308449, "step_time": 0.4586987686157226} +{"epoch": 0, "iter": 14342, "iter_tflops": 38.605829000484434, "iter_time": 0.5344035873413087, "loss": 0.5924739241600037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79280554250775, "step_time": 0.4821159362792969} +{"epoch": 0, "iter": 14343, "iter_tflops": 43.2257733088872, "iter_time": 0.4772868576049804, "loss": 0.5504249930381775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.682504990750914, "step_time": 0.44194486808776856} +{"epoch": 0, "iter": 14344, "iter_tflops": 48.726400837612374, "iter_time": 0.42340688323974607, "loss": 0.7035495042800903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.68420785275151, "step_time": 0.39159919738769533} +{"epoch": 0, "iter": 14345, "iter_tflops": 41.40985368909817, "iter_time": 0.49821701049804684, "loss": 0.6628254652023315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.042023516779345, "step_time": 0.4580410003662109} +{"epoch": 0, "iter": 14346, "iter_tflops": 32.12452827959885, "iter_time": 0.6422224578857423, "loss": 0.6741913557052612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.999724944212055, "step_time": 0.5894644470214843} +{"epoch": 0, "iter": 14347, "iter_tflops": 33.79786491470677, "iter_time": 0.6104259414672852, "loss": 0.770581841468811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.829430716150384, "step_time": 0.5601795387268067} +{"epoch": 0, "iter": 14348, "iter_tflops": 39.57732678576199, "iter_time": 0.5212856750488282, "loss": 0.7156137228012085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10535688021761, "step_time": 0.47862017631530757} +{"epoch": 0, "iter": 14349, "iter_tflops": 18.534524980541573, "iter_time": 1.113116928100586, "loss": 0.1952182948589325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.686536000110276, "step_time": 1.0479798736572266} +{"epoch": 0, "iter": 14350, "iter_tflops": 25.03538066193287, "iter_time": 0.8240774841308595, "loss": 0.19862140715122223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.159873763711637, "step_time": 0.7326415481567382} +{"epoch": 0, "iter": 14351, "iter_tflops": 49.38655036527275, "iter_time": 0.41774720764160156, "loss": 0.24928873777389526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.38816319256055, "step_time": 0.3864357242584229} +{"epoch": 0, "iter": 14352, "iter_tflops": 50.9907773969677, "iter_time": 0.40460441207885744, "loss": 0.16589026153087616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.47960940998355, "step_time": 0.3718680381774902} +{"epoch": 0, "iter": 14353, "iter_tflops": 21.12844074367556, "iter_time": 0.9764607696533203, "loss": 0.08604379743337631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.134437622210356, "step_time": 0.932081214904785} +{"epoch": 0, "iter": 14354, "iter_tflops": 16.215789367156084, "iter_time": 1.2722842559814451, "loss": 0.05731759965419769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.441538031803177, "step_time": 0.962202127456665} +{"epoch": 0, "iter": 14355, "iter_tflops": 39.74696030854525, "iter_time": 0.5190609130859374, "loss": 0.0796615481376648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.981655936865, "step_time": 0.4690840549468994} +{"epoch": 0, "iter": 14356, "iter_tflops": 44.585775030341296, "iter_time": 0.462728157043457, "loss": 0.09405303746461868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83303592457829, "step_time": 0.42248230361938477} +{"epoch": 0, "iter": 14357, "iter_tflops": 24.003918118154168, "iter_time": 0.828398208618164, "loss": 0.05367058143019676, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 25.578764006796003, "step_time": 0.7773949813842774} +{"epoch": 0, "iter": 14358, "iter_tflops": 21.12082067890601, "iter_time": 0.9414786987304687, "loss": 0.029291972517967224, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 23.970457713575644, "step_time": 0.8295545711517335} +{"epoch": 0, "iter": 14359, "iter_tflops": 41.286557027514895, "iter_time": 0.4816289901733398, "loss": 0.04723038896918297, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 45.67108604596523, "step_time": 0.4353915023803711} +{"epoch": 0, "iter": 14360, "iter_tflops": 43.74078580172386, "iter_time": 0.4546055221557617, "loss": 0.04941181465983391, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 48.15956730360967, "step_time": 0.4128941326141357} +{"epoch": 0, "iter": 14361, "iter_tflops": 16.529901304685243, "iter_time": 1.2481074829101564, "loss": 0.2594723403453827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.755609458829856, "step_time": 1.1619479217529296} +{"epoch": 0, "iter": 14362, "iter_tflops": 16.44871485570351, "iter_time": 1.254267807006836, "loss": 0.2519645690917969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.728679445800626, "step_time": 1.0457412300109863} +{"epoch": 0, "iter": 14363, "iter_tflops": 38.563971786428425, "iter_time": 0.5349836273193359, "loss": 0.1441652774810791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.241031199295755, "step_time": 0.48841358566284176} +{"epoch": 0, "iter": 14364, "iter_tflops": 37.961407426455146, "iter_time": 0.5434754638671876, "loss": 0.35326457023620605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.43999498828604, "step_time": 0.49785463333129887} +{"epoch": 0, "iter": 14365, "iter_tflops": 24.40014966016119, "iter_time": 0.8455314331054687, "loss": 0.6926483511924744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.529587599212974, "step_time": 0.777663558959961} +{"epoch": 0, "iter": 14366, "iter_tflops": 13.991125265839555, "iter_time": 1.4745842895507815, "loss": 0.48550158739089966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.98531037251844, "step_time": 1.1471080055236815} +{"epoch": 0, "iter": 14367, "iter_tflops": 36.365877524055584, "iter_time": 0.5673201065063477, "loss": 0.6515681147575378, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44652463305992, "step_time": 0.5230142250061035} +{"epoch": 0, "iter": 14368, "iter_tflops": 33.54734042283167, "iter_time": 0.6149844741821289, "loss": 0.5417652130126953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.29310197751233, "step_time": 0.568457706451416} +{"epoch": 0, "iter": 14369, "iter_tflops": 17.88263017474428, "iter_time": 1.153694580078125, "loss": 0.6647976040840149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.168616627407776, "step_time": 1.0762953796386718} +{"epoch": 0, "iter": 14370, "iter_tflops": 21.800987118513945, "iter_time": 0.9463375854492188, "loss": 0.49126166105270386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.14708162813379, "step_time": 0.6843479499816894} +{"epoch": 0, "iter": 14371, "iter_tflops": 48.5212128576072, "iter_time": 0.42519739913940435, "loss": 0.6937550902366638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.30356358791807, "step_time": 0.39444909858703614} +{"epoch": 0, "iter": 14372, "iter_tflops": 46.84745909913007, "iter_time": 0.44038874053955074, "loss": 0.48891302943229675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.176393835777894, "step_time": 0.411171308517456} +{"epoch": 0, "iter": 14373, "iter_tflops": 36.24914916766966, "iter_time": 0.56914697265625, "loss": 0.5707544684410095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.881313495905424, "step_time": 0.5306171951293945} +{"epoch": 0, "iter": 14374, "iter_tflops": 15.964169441458626, "iter_time": 1.292337417602539, "loss": 0.7788735032081604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.549649726799792, "step_time": 1.1755843467712401} +{"epoch": 0, "iter": 14375, "iter_tflops": 37.90234664216683, "iter_time": 0.5443223266601562, "loss": 0.8224805593490601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.42363211211095, "step_time": 0.4980512924194336} +{"epoch": 0, "iter": 14376, "iter_tflops": 38.25370821429516, "iter_time": 0.5393227081298828, "loss": 0.8371143937110901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35048585440069, "step_time": 0.4989323120117188} +{"epoch": 0, "iter": 14377, "iter_tflops": 16.512906305171153, "iter_time": 1.2493920288085938, "loss": 0.032177966088056564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.979556160818728, "step_time": 1.147475128173828} +{"epoch": 0, "iter": 14378, "iter_tflops": 21.59188374035488, "iter_time": 0.9555022506713867, "loss": 0.015409434214234352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.981627305954223, "step_time": 0.7118680152893067} +{"epoch": 0, "iter": 14379, "iter_tflops": 42.13232896879698, "iter_time": 0.48967370223999024, "loss": 0.05681447684764862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35941896492899, "step_time": 0.44502485084533694} +{"epoch": 0, "iter": 14380, "iter_tflops": 43.329677636688025, "iter_time": 0.47614232635498044, "loss": 0.032643698155879974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.480042791890334, "step_time": 0.4345213756561279} +{"epoch": 0, "iter": 14381, "iter_tflops": 19.422668404272876, "iter_time": 1.062217254638672, "loss": 0.007939132861793041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.747888075346225, "step_time": 0.9943707733154297} +{"epoch": 0, "iter": 14382, "iter_tflops": 19.942208221291686, "iter_time": 1.0345440826416015, "loss": 0.005392970517277718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.344543522857048, "step_time": 0.8474627380371094} +{"epoch": 0, "iter": 14383, "iter_tflops": 56.05763407696162, "iter_time": 0.3680336112976075, "loss": 0.003232163144275546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.639780279651994, "step_time": 0.33470420265197753} +{"epoch": 0, "iter": 14384, "iter_tflops": 59.43038720145005, "iter_time": 0.34714721679687505, "loss": 0.004115967079997063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.23225532127843, "step_time": 0.31627135086059566} +{"epoch": 0, "iter": 14385, "iter_tflops": 27.02455736079505, "iter_time": 0.7634202194213867, "loss": 0.7898690700531006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.473290262795633, "step_time": 0.7245770797729492} +{"epoch": 0, "iter": 14386, "iter_tflops": 11.100094857393849, "iter_time": 1.858641189575195, "loss": 0.6482076644897461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.133324705465618, "step_time": 1.3632888946533204} +{"epoch": 0, "iter": 14387, "iter_tflops": 35.048449286063196, "iter_time": 0.5886449737548828, "loss": 0.5038110017776489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.05829887433206, "step_time": 0.5420918464660645} +{"epoch": 0, "iter": 14388, "iter_tflops": 34.6191136933872, "iter_time": 0.595945167541504, "loss": 0.5162188410758972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.66544743165305, "step_time": 0.5477458763122559} +{"epoch": 0, "iter": 14389, "iter_tflops": 24.228067859315864, "iter_time": 0.8515368881225586, "loss": 0.402253121137619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.899570449240052, "step_time": 0.7965805282592773} +{"epoch": 0, "iter": 14390, "iter_tflops": 8.624789199626584, "iter_time": 2.392069305419922, "loss": 0.4551624655723572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.611064716534035, "step_time": 1.635951759338379} +{"epoch": 0, "iter": 14391, "iter_tflops": 16.17890096068431, "iter_time": 1.275185104370117, "loss": 0.4883263409137726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.10133826815043, "step_time": 1.1397551498413088} +{"epoch": 0, "iter": 14392, "iter_tflops": 33.58012559374406, "iter_time": 0.6143840484619141, "loss": 0.520442545413971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.6986546198502, "step_time": 0.5472633895874024} +{"epoch": 0, "iter": 14393, "iter_tflops": 15.649829404267106, "iter_time": 1.004844108581543, "loss": 0.3381572663784027, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 16.65252482220541, "step_time": 0.944339614868164} +{"epoch": 0, "iter": 14394, "iter_tflops": 7.311505525738438, "iter_time": 2.1508072204589848, "loss": 0.28702783584594727, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 8.30009688271587, "step_time": 1.894633171081543} +{"epoch": 0, "iter": 14395, "iter_tflops": 7.30646650831853, "iter_time": 2.152290557861328, "loss": 0.3148505389690399, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 9.011000288208402, "step_time": 1.7451601791381834} +{"epoch": 0, "iter": 14396, "iter_tflops": 22.693218365344556, "iter_time": 0.6929664459228515, "loss": 0.24244658648967743, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.452341586077534, "step_time": 0.6431138229370118} +{"epoch": 0, "iter": 14397, "iter_tflops": 12.521020636513734, "iter_time": 1.125447525024414, "loss": 0.2732369303703308, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 13.284855822891338, "step_time": 1.060738021850586} +{"epoch": 0, "iter": 14398, "iter_tflops": 9.331846730747934, "iter_time": 1.510071060180664, "loss": 0.267202228307724, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 11.034514185523411, "step_time": 1.2770613594055176} +{"epoch": 0, "iter": 14399, "iter_tflops": 22.006782789558173, "iter_time": 0.6403367462158204, "loss": 0.28636640310287476, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 23.701709125483905, "step_time": 0.5945458030700683} +{"epoch": 0, "iter": 14400, "iter_tflops": 23.781322439958135, "iter_time": 0.5925554275512697, "loss": 0.33331969380378723, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 25.47038432275423, "step_time": 0.553260269165039} +{"epoch": 0, "iter": 14401, "iter_tflops": 16.617373125945583, "iter_time": 1.24153759765625, "loss": 0.30501890182495117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.815649122364906, "step_time": 1.1580320968627928} +{"epoch": 0, "iter": 14402, "iter_tflops": 20.175776423268406, "iter_time": 1.0225675125122071, "loss": 0.4030930697917938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.165488121076073, "step_time": 0.8537420558929443} +{"epoch": 0, "iter": 14403, "iter_tflops": 38.374379687543815, "iter_time": 0.5376267623901367, "loss": 0.31224584579467773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13753916551572, "step_time": 0.4896131553649902} +{"epoch": 0, "iter": 14404, "iter_tflops": 39.831082060111115, "iter_time": 0.5179646759033203, "loss": 0.3374667167663574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44164987489885, "step_time": 0.47491505432128905} +{"epoch": 0, "iter": 14405, "iter_tflops": 6.582244147822346, "iter_time": 1.0441109619140625, "loss": 0.005933152046054602, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 6.927429748117101, "step_time": 0.9920841522216797} +{"epoch": 0, "iter": 14406, "iter_tflops": 6.4905444666036125, "iter_time": 1.0588623657226564, "loss": 0.007047455292195082, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 8.701619233145319, "step_time": 0.7898062515258789} +{"epoch": 0, "iter": 14407, "iter_tflops": 14.521977029191449, "iter_time": 0.4732546577453613, "loss": 0.0038402993232011795, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 16.178996795091514, "step_time": 0.4247848834991455} +{"epoch": 0, "iter": 14408, "iter_tflops": 16.920565024175122, "iter_time": 0.4061680717468262, "loss": 0.003476210404187441, "lr": 3e-05, "seqlen": 2784.0, "step_tflops": 18.731545851174623, "step_time": 0.36689941787719726} +{"epoch": 0, "iter": 14409, "iter_tflops": 24.50051521506956, "iter_time": 0.842067741394043, "loss": 0.48200008273124695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.180133012264672, "step_time": 0.7880438766479494} +{"epoch": 0, "iter": 14410, "iter_tflops": 8.25458176141191, "iter_time": 2.4993505554199213, "loss": 0.6793968677520752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.262833044428898, "step_time": 2.2272984313964845} +{"epoch": 0, "iter": 14411, "iter_tflops": 13.009191881408986, "iter_time": 1.5858858642578126, "loss": 0.6973845958709717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.837921137612321, "step_time": 1.3026389846801758} +{"epoch": 0, "iter": 14412, "iter_tflops": 35.15813939693126, "iter_time": 0.5868084564208984, "loss": 0.7018300294876099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.14903460456614, "step_time": 0.5269885635375976} +{"epoch": 0, "iter": 14413, "iter_tflops": 23.62877268668794, "iter_time": 0.7349969482421875, "loss": 0.37102705240249634, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 25.029848550451923, "step_time": 0.6938546104431152} +{"epoch": 0, "iter": 14414, "iter_tflops": 11.825648617994633, "iter_time": 1.468593933105469, "loss": 0.1323595643043518, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 13.97090413041748, "step_time": 1.2430888977050782} +{"epoch": 0, "iter": 14415, "iter_tflops": 31.494211095402278, "iter_time": 0.5514370803833009, "loss": 0.31928154826164246, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.53684862500188, "step_time": 0.5178505592346191} +{"epoch": 0, "iter": 14416, "iter_tflops": 27.5699027362592, "iter_time": 0.6299288024902343, "loss": 0.2121967375278473, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 29.581090349557403, "step_time": 0.5871005973815918} +{"epoch": 0, "iter": 14417, "iter_tflops": 19.206532432827075, "iter_time": 1.074170654296875, "loss": 0.13597236573696136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.739419921953612, "step_time": 0.9947767868041991} +{"epoch": 0, "iter": 14418, "iter_tflops": 22.57032776487355, "iter_time": 0.9140803680419921, "loss": 0.24479466676712036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.33460130603657, "step_time": 0.7547610912322997} +{"epoch": 0, "iter": 14419, "iter_tflops": 49.676604599078125, "iter_time": 0.4153080444335938, "loss": 0.21097375452518463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.010624809615756, "step_time": 0.38198213005065923} +{"epoch": 0, "iter": 14420, "iter_tflops": 50.66643034765053, "iter_time": 0.40719453430175784, "loss": 0.20185300707817078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.0670899570781, "step_time": 0.3746537818908691} +{"epoch": 0, "iter": 14421, "iter_tflops": 25.186098076320626, "iter_time": 0.8191460800170899, "loss": 0.060475677251815796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.49472516041493, "step_time": 0.7786868286132812} +{"epoch": 0, "iter": 14422, "iter_tflops": 12.926596330740884, "iter_time": 1.596019012451172, "loss": 0.01231522299349308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.008063366695962, "step_time": 1.47280126953125} +{"epoch": 0, "iter": 14423, "iter_tflops": 41.7327351348506, "iter_time": 0.494362361907959, "loss": 0.025104746222496033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58976917543916, "step_time": 0.4428245487213135} +{"epoch": 0, "iter": 14424, "iter_tflops": 45.33579262046177, "iter_time": 0.4550729637145996, "loss": 0.04352298006415367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.85592204852738, "step_time": 0.4138143005371094} +{"epoch": 0, "iter": 14425, "iter_tflops": 16.592514849164143, "iter_time": 0.8763179168701172, "loss": 0.03646146506071091, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.75945950048035, "step_time": 0.8187365188598632} +{"epoch": 0, "iter": 14426, "iter_tflops": 8.387451124821892, "iter_time": 1.7335800628662112, "loss": 0.052518393844366074, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.24091388092231, "step_time": 1.2935174312591553} +{"epoch": 0, "iter": 14427, "iter_tflops": 30.104386671150266, "iter_time": 0.48299665451049806, "loss": 0.02853480540215969, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 33.36230873380635, "step_time": 0.4358306903839111} +{"epoch": 0, "iter": 14428, "iter_tflops": 30.198733897223857, "iter_time": 0.48148767089843747, "loss": 0.03867223486304283, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 33.02954654231925, "step_time": 0.4402215461730957} +{"epoch": 0, "iter": 14429, "iter_tflops": 26.76192079897851, "iter_time": 0.7709122848510742, "loss": 0.5839598178863525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.48343344223205, "step_time": 0.699752067565918} +{"epoch": 0, "iter": 14430, "iter_tflops": 33.20954075928687, "iter_time": 0.6212399520874023, "loss": 0.6510893702507019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38287137432379, "step_time": 0.5670551204681397} +{"epoch": 0, "iter": 14431, "iter_tflops": 37.46369914172287, "iter_time": 0.5506955795288087, "loss": 0.5143153667449951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87876227553221, "step_time": 0.5046897792816162} +{"epoch": 0, "iter": 14432, "iter_tflops": 40.1711608248158, "iter_time": 0.513579719543457, "loss": 0.7982246279716492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.48009340888739, "step_time": 0.4744951515197754} +{"epoch": 0, "iter": 14433, "iter_tflops": 21.916371519971936, "iter_time": 0.9413553466796875, "loss": 0.5671722888946533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.583962538041316, "step_time": 0.8747933464050291} +{"epoch": 0, "iter": 14434, "iter_tflops": 19.149308798401222, "iter_time": 1.0773805847167968, "loss": 0.5970255136489868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.913076315757156, "step_time": 0.9004069652557373} +{"epoch": 0, "iter": 14435, "iter_tflops": 44.58728502570643, "iter_time": 0.4627124862670899, "loss": 0.7307905554771423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.130161318136125, "step_time": 0.42865207481384276} +{"epoch": 0, "iter": 14436, "iter_tflops": 43.969717870757634, "iter_time": 0.4692114143371582, "loss": 0.7257294058799744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.41602374255921, "step_time": 0.43510804748535153} +{"epoch": 0, "iter": 14437, "iter_tflops": 26.131921375263143, "iter_time": 0.7894977645874024, "loss": 0.5426633358001709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.52940182956749, "step_time": 0.7494203338623047} +{"epoch": 0, "iter": 14438, "iter_tflops": 17.95465512892097, "iter_time": 1.1490665435791017, "loss": 0.5316857099533081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.8105241054626, "step_time": 1.041420883178711} +{"epoch": 0, "iter": 14439, "iter_tflops": 37.629261431619135, "iter_time": 0.5482726135253907, "loss": 0.40015214681625366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33818716005844, "step_time": 0.4990807514190674} +{"epoch": 0, "iter": 14440, "iter_tflops": 36.51957351910976, "iter_time": 0.5649324874877929, "loss": 0.5351969599723816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.838521269109144, "step_time": 0.5178679542541503} +{"epoch": 0, "iter": 14441, "iter_tflops": 19.990839709868883, "iter_time": 1.032027359008789, "loss": 0.03971431404352188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.316310634972442, "step_time": 0.9678547973632812} +{"epoch": 0, "iter": 14442, "iter_tflops": 19.493798038410358, "iter_time": 1.0583414001464844, "loss": 0.08587875217199326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.831066656940102, "step_time": 0.9450336914062502} +{"epoch": 0, "iter": 14443, "iter_tflops": 49.963396911811294, "iter_time": 0.41292415618896483, "loss": 0.0676356628537178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.67368289747117, "step_time": 0.3773496208190918} +{"epoch": 0, "iter": 14444, "iter_tflops": 48.986524189731966, "iter_time": 0.42115855026245114, "loss": 0.03608362376689911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.28838208199149, "step_time": 0.3871593151092529} +{"epoch": 0, "iter": 14445, "iter_tflops": 27.988838910599164, "iter_time": 0.7371185913085937, "loss": 0.005001169629395008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.67053060108063, "step_time": 0.6953395538330078} +{"epoch": 0, "iter": 14446, "iter_tflops": 14.020293892504478, "iter_time": 1.4715164794921876, "loss": 0.004742083139717579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.57107461778882, "step_time": 1.1741509246826172} +{"epoch": 0, "iter": 14447, "iter_tflops": 40.06487593346639, "iter_time": 0.5149421539306641, "loss": 0.007984453812241554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88509723929812, "step_time": 0.45964239311218263} +{"epoch": 0, "iter": 14448, "iter_tflops": 45.63029449937895, "iter_time": 0.45213588333129884, "loss": 0.006049952935427427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0449756259108, "step_time": 0.4122510452270508} +{"epoch": 0, "iter": 14449, "iter_tflops": 20.095044342802616, "iter_time": 1.0266756896972655, "loss": 0.33218643069267273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.46361832193306, "step_time": 0.9612122802734375} +{"epoch": 0, "iter": 14450, "iter_tflops": 28.34661931746893, "iter_time": 0.727814956665039, "loss": 0.25891169905662537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.766743076471172, "step_time": 0.6494557361602783} +{"epoch": 0, "iter": 14451, "iter_tflops": 46.164653673751076, "iter_time": 0.4469023780822754, "loss": 0.1907797008752823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.91483114561005, "step_time": 0.4133259201049804} +{"epoch": 0, "iter": 14452, "iter_tflops": 50.29115060779343, "iter_time": 0.4102330780029297, "loss": 0.32685595750808716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.59772346429433, "step_time": 0.3778746109008789} +{"epoch": 0, "iter": 14453, "iter_tflops": 25.411500139281387, "iter_time": 0.8118801879882813, "loss": 0.12289907038211823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.742971390483536, "step_time": 0.7714585342407226} +{"epoch": 0, "iter": 14454, "iter_tflops": 8.52465141820942, "iter_time": 2.420168579101562, "loss": 0.14814092218875885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.800339935774401, "step_time": 1.9102263107299806} +{"epoch": 0, "iter": 14455, "iter_tflops": 10.18254303548224, "iter_time": 2.0261238708496094, "loss": 0.13018588721752167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.780358711354378, "step_time": 1.7513128433227538} +{"epoch": 0, "iter": 14456, "iter_tflops": 47.74955402850188, "iter_time": 0.4320688209533692, "loss": 0.13827022910118103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.173165318619276, "step_time": 0.39543495941162105} +{"epoch": 0, "iter": 14457, "iter_tflops": 17.75429886670469, "iter_time": 0.8258705978393555, "loss": 0.300047367811203, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 18.752146987352614, "step_time": 0.7819239807128906} +{"epoch": 0, "iter": 14458, "iter_tflops": 14.571524858844024, "iter_time": 1.0062607421875, "loss": 0.2798057198524475, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 16.09924352705984, "step_time": 0.910772819519043} +{"epoch": 0, "iter": 14459, "iter_tflops": 21.982003064064738, "iter_time": 0.6670344543457031, "loss": 0.16509808599948883, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.73685947626847, "step_time": 0.6177208671569824} +{"epoch": 0, "iter": 14460, "iter_tflops": 21.57181226649167, "iter_time": 0.6797182006835938, "loss": 0.2608758211135864, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.283022349652985, "step_time": 0.6297616004943848} +{"epoch": 0, "iter": 14461, "iter_tflops": 23.620564748550567, "iter_time": 0.8734377746582032, "loss": 0.16510196030139923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.94944830297684, "step_time": 0.7950494079589844} +{"epoch": 0, "iter": 14462, "iter_tflops": 42.62777342693725, "iter_time": 0.48398243331909174, "loss": 0.09786132723093033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.46490810658314, "step_time": 0.43465992736816406} +{"epoch": 0, "iter": 14463, "iter_tflops": 45.521972869849805, "iter_time": 0.4532117614746094, "loss": 0.1929192990064621, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39083705632006, "step_time": 0.4177109508514404} +{"epoch": 0, "iter": 14464, "iter_tflops": 51.08649031416359, "iter_time": 0.40384636688232417, "loss": 0.14445872604846954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.72931118052013, "step_time": 0.37020183944702145} +{"epoch": 0, "iter": 14465, "iter_tflops": 27.5899050723359, "iter_time": 0.7477768936157226, "loss": 0.21202760934829712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.225218811244993, "step_time": 0.7059346122741699} +{"epoch": 0, "iter": 14466, "iter_tflops": 15.436225588942477, "iter_time": 1.3365374450683594, "loss": 0.1496470868587494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.175904988730966, "step_time": 1.1350792999267578} +{"epoch": 0, "iter": 14467, "iter_tflops": 37.19707195562577, "iter_time": 0.5546429443359375, "loss": 0.10874439775943756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.997951637416655, "step_time": 0.5032225437164306} +{"epoch": 0, "iter": 14468, "iter_tflops": 45.42139198942825, "iter_time": 0.45421535110473626, "loss": 0.16787967085838318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.607046160281676, "step_time": 0.4158903846740723} +{"epoch": 0, "iter": 14469, "iter_tflops": 16.592018756039, "iter_time": 1.243434799194336, "loss": 0.28680992126464844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.88182830304455, "step_time": 1.1537463150024414} +{"epoch": 0, "iter": 14470, "iter_tflops": 22.520691220404803, "iter_time": 0.9160950393676758, "loss": 0.4723328948020935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.28259443600856, "step_time": 0.7561998386383056} +{"epoch": 0, "iter": 14471, "iter_tflops": 49.594957115380204, "iter_time": 0.41599176025390633, "loss": 0.4372446537017822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.963329311330845, "step_time": 0.3823169136047363} +{"epoch": 0, "iter": 14472, "iter_tflops": 48.799935166334, "iter_time": 0.42276887130737306, "loss": 0.2864976227283478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.80284703432235, "step_time": 0.39071933937072756} +{"epoch": 0, "iter": 14473, "iter_tflops": 14.572247721648052, "iter_time": 0.7443344879150391, "loss": 0.020486783236265182, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 15.492093518452023, "step_time": 0.7001394958496092} +{"epoch": 0, "iter": 14474, "iter_tflops": 7.9468001677156765, "iter_time": 1.3649049072265627, "loss": 0.04149336367845535, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 9.645113723197813, "step_time": 1.1245721778869628} +{"epoch": 0, "iter": 14475, "iter_tflops": 27.019453350512585, "iter_time": 0.40143767547607423, "loss": 0.035051129758358, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 29.49883877317211, "step_time": 0.36769672966003414} +{"epoch": 0, "iter": 14476, "iter_tflops": 29.631291714777692, "iter_time": 0.3660531120300293, "loss": 0.020290344953536987, "lr": 3e-05, "seqlen": 4368.0, "step_tflops": 32.30818172080633, "step_time": 0.3357238311767578} +{"epoch": 0, "iter": 14477, "iter_tflops": 38.03865105812204, "iter_time": 0.5423718490600586, "loss": 0.5567731857299805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10034937748169, "step_time": 0.5019688110351562} +{"epoch": 0, "iter": 14478, "iter_tflops": 33.73733359807929, "iter_time": 0.6115211639404297, "loss": 0.5146665573120117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.66255667769953, "step_time": 0.5627292633056641} +{"epoch": 0, "iter": 14479, "iter_tflops": 37.57798382548379, "iter_time": 0.5490207672119141, "loss": 0.575268566608429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.722336750929, "step_time": 0.5066284294128418} +{"epoch": 0, "iter": 14480, "iter_tflops": 38.3323944322969, "iter_time": 0.5382156219482421, "loss": 0.7027268409729004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40961684457828, "step_time": 0.4982198600769043} +{"epoch": 0, "iter": 14481, "iter_tflops": 20.12152755340553, "iter_time": 1.025324417114258, "loss": 0.052408576011657715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.46034070959306, "step_time": 0.9613590850830077} +{"epoch": 0, "iter": 14482, "iter_tflops": 20.041868508789683, "iter_time": 1.0293997039794922, "loss": 0.062125302851200104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.678195298512456, "step_time": 0.8360049533843993} +{"epoch": 0, "iter": 14483, "iter_tflops": 46.76867222730834, "iter_time": 0.44113062286376953, "loss": 0.034422215074300766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83304318896359, "step_time": 0.40585989379882814} +{"epoch": 0, "iter": 14484, "iter_tflops": 50.98841701393046, "iter_time": 0.4046231422424317, "loss": 0.06414046883583069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.58877003684566, "step_time": 0.3711377944946289} +{"epoch": 0, "iter": 14485, "iter_tflops": 28.70937140200767, "iter_time": 0.7186187820434571, "loss": 0.009176726453006268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.433467265874373, "step_time": 0.6779080848693848} +{"epoch": 0, "iter": 14486, "iter_tflops": 14.634428525936144, "iter_time": 1.4097642059326172, "loss": 0.0033889475744217634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.67187625101675, "step_time": 1.104928783416748} +{"epoch": 0, "iter": 14487, "iter_tflops": 43.9069449925691, "iter_time": 0.4698822364807129, "loss": 0.056349076330661774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.745198030263296, "step_time": 0.42324360847473147} +{"epoch": 0, "iter": 14488, "iter_tflops": 55.08524825115303, "iter_time": 0.37453028106689457, "loss": 0.02706087753176689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.61585272139278, "step_time": 0.34035805130004887} +{"epoch": 0, "iter": 14489, "iter_tflops": 38.49923537177331, "iter_time": 0.5358832015991211, "loss": 0.5698833465576172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.14910126425771, "step_time": 0.48947884750366205} +{"epoch": 0, "iter": 14490, "iter_tflops": 49.38550096210872, "iter_time": 0.41775608444213863, "loss": 0.5993460416793823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.71073227438754, "step_time": 0.3841149177551269} +{"epoch": 0, "iter": 14491, "iter_tflops": 48.083840076622295, "iter_time": 0.429065013885498, "loss": 0.5069249272346497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.19063060056538, "step_time": 0.39530262947082523} +{"epoch": 0, "iter": 14492, "iter_tflops": 49.96879930149039, "iter_time": 0.4128795127868653, "loss": 0.6285451054573059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.16263902870927, "step_time": 0.38091004943847656} +{"epoch": 0, "iter": 14493, "iter_tflops": 26.72178457607715, "iter_time": 0.772070198059082, "loss": 0.3249032497406006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.18661102140633, "step_time": 0.7319465789794921} +{"epoch": 0, "iter": 14494, "iter_tflops": 11.61363993399543, "iter_time": 1.7764536895751952, "loss": 0.29745009541511536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.269947014990585, "step_time": 1.3510913619995117} +{"epoch": 0, "iter": 14495, "iter_tflops": 10.805466397065839, "iter_time": 1.9093200378417967, "loss": 0.19246233999729156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.995877548785723, "step_time": 1.7198486251831058} +{"epoch": 0, "iter": 14496, "iter_tflops": 37.97062471325851, "iter_time": 0.5433435363769531, "loss": 0.18283484876155853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.173822858022284, "step_time": 0.4891919231414795} +{"epoch": 0, "iter": 14497, "iter_tflops": 18.255376290657438, "iter_time": 0.785322265625, "loss": 0.2849893271923065, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 19.51862797088747, "step_time": 0.7344959640502929} +{"epoch": 0, "iter": 14498, "iter_tflops": 6.764567175955794, "iter_time": 2.119330490112305, "loss": 0.29508543014526367, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 8.991473692442302, "step_time": 1.5944386825561523} +{"epoch": 0, "iter": 14499, "iter_tflops": 8.6904634541902, "iter_time": 1.6496650085449218, "loss": 0.13080672919750214, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 9.848276880435307, "step_time": 1.4557220153808592} +{"epoch": 0, "iter": 14500, "iter_tflops": 13.718034858381028, "iter_time": 1.0450734100341796, "loss": 0.25608518719673157, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 18.019549305143954, "step_time": 0.7956000022888184} +{"epoch": 0, "iter": 14501, "iter_tflops": 19.43966603716186, "iter_time": 0.8405644912719726, "loss": 0.16029417514801025, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 20.522759985325433, "step_time": 0.7962034835815429} +{"epoch": 0, "iter": 14502, "iter_tflops": 11.144704549478092, "iter_time": 1.4661934661865237, "loss": 0.22436904907226562, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 13.220067737340907, "step_time": 1.2360218811035155} +{"epoch": 0, "iter": 14503, "iter_tflops": 24.58446729844166, "iter_time": 0.6646592254638671, "loss": 0.20175665616989136, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.531885489320217, "step_time": 0.615873794555664} +{"epoch": 0, "iter": 14504, "iter_tflops": 25.00520135234595, "iter_time": 0.6534757614135742, "loss": 0.2271062284708023, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.849903628987875, "step_time": 0.6085792045593261} +{"epoch": 0, "iter": 14505, "iter_tflops": 16.46525878720267, "iter_time": 1.1373610992431642, "loss": 0.032393451780080795, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 17.569317148086522, "step_time": 1.0658891677856446} +{"epoch": 0, "iter": 14506, "iter_tflops": 15.867314820438395, "iter_time": 1.180221420288086, "loss": 0.031243136152625084, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 20.594899396205467, "step_time": 0.909300136566162} +{"epoch": 0, "iter": 14507, "iter_tflops": 51.122607746552696, "iter_time": 0.36631435012817387, "loss": 0.026891207322478294, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 56.102311296440995, "step_time": 0.3337998809814453} +{"epoch": 0, "iter": 14508, "iter_tflops": 50.77585789404084, "iter_time": 0.36881592178344724, "loss": 0.03215356171131134, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 55.63899128554362, "step_time": 0.3365795173645019} +{"epoch": 0, "iter": 14509, "iter_tflops": 29.577266984015136, "iter_time": 0.697532112121582, "loss": 0.19161121547222137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.399238517489227, "step_time": 0.65705712890625} +{"epoch": 0, "iter": 14510, "iter_tflops": 11.792136614219052, "iter_time": 1.7495636444091796, "loss": 0.2924240231513977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.67173795349196, "step_time": 1.4061792526245118} +{"epoch": 0, "iter": 14511, "iter_tflops": 45.75299698487163, "iter_time": 0.4509233245849609, "loss": 0.1753109097480774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88950351412268, "step_time": 0.41353575515747065} +{"epoch": 0, "iter": 14512, "iter_tflops": 47.50117614831168, "iter_time": 0.4343280563354492, "loss": 0.24784018099308014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.78237191390917, "step_time": 0.39841924476623536} +{"epoch": 0, "iter": 14513, "iter_tflops": 27.936485546156206, "iter_time": 0.7384999618530274, "loss": 0.4128580391407013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.541070444846852, "step_time": 0.6983867950439453} +{"epoch": 0, "iter": 14514, "iter_tflops": 13.441065210708272, "iter_time": 1.534929946899414, "loss": 0.7438357472419739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.469999450581776, "step_time": 1.3336195373535156} +{"epoch": 0, "iter": 14515, "iter_tflops": 37.613227057929514, "iter_time": 0.5485063400268555, "loss": 0.7629483938217163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35104814243165, "step_time": 0.4989255275726318} +{"epoch": 0, "iter": 14516, "iter_tflops": 36.21860142445663, "iter_time": 0.5696270065307616, "loss": 0.6587982177734375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.636238783911885, "step_time": 0.5205108795166016} +{"epoch": 0, "iter": 14517, "iter_tflops": 32.79857038356157, "iter_time": 0.629024169921875, "loss": 0.560769259929657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.965770211202255, "step_time": 0.5736313552856445} +{"epoch": 0, "iter": 14518, "iter_tflops": 42.31159839795839, "iter_time": 0.4875990104675293, "loss": 0.6477400660514832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.18335660282451, "step_time": 0.4467213954925537} +{"epoch": 0, "iter": 14519, "iter_tflops": 44.3882630951619, "iter_time": 0.46478713226318363, "loss": 0.815866231918335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.759407462780075, "step_time": 0.43197967910766605} +{"epoch": 0, "iter": 14520, "iter_tflops": 43.49192250988085, "iter_time": 0.474366096496582, "loss": 0.7337736487388611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.581877039942896, "step_time": 0.44289957427978516} +{"epoch": 0, "iter": 14521, "iter_tflops": 34.98899940751982, "iter_time": 0.5896451416015625, "loss": 0.16218073666095734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.483517729842994, "step_time": 0.550404411315918} +{"epoch": 0, "iter": 14522, "iter_tflops": 22.775766646666852, "iter_time": 0.9058353042602538, "loss": 0.30883219838142395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.25415491219753, "step_time": 0.7301968002319337} +{"epoch": 0, "iter": 14523, "iter_tflops": 49.42050526799339, "iter_time": 0.41746018981933586, "loss": 0.20002280175685883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.020567714690316, "step_time": 0.3819118232727051} +{"epoch": 0, "iter": 14524, "iter_tflops": 51.6986317664841, "iter_time": 0.39906459426879876, "loss": 0.2116086333990097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.034834268423026, "step_time": 0.3681833591461182} +{"epoch": 0, "iter": 14525, "iter_tflops": 33.66024745777783, "iter_time": 0.6129216232299805, "loss": 0.32223641872406006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.6289846007334, "step_time": 0.5632450294494629} +{"epoch": 0, "iter": 14526, "iter_tflops": 10.523189470323754, "iter_time": 1.9605361633300782, "loss": 0.330450177192688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.443129691968778, "step_time": 1.6580308990478516} +{"epoch": 0, "iter": 14527, "iter_tflops": 32.59567237034471, "iter_time": 0.6329396514892578, "loss": 0.3528001308441162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.65900710631035, "step_time": 0.5627837505340576} +{"epoch": 0, "iter": 14528, "iter_tflops": 44.15680484828933, "iter_time": 0.46722342300415043, "loss": 0.43090352416038513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.228969730242795, "step_time": 0.42777388000488287} +{"epoch": 0, "iter": 14529, "iter_tflops": 14.48952295323439, "iter_time": 1.1872435302734377, "loss": 0.2931743264198303, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 15.74239196636373, "step_time": 1.0927559432983398} +{"epoch": 0, "iter": 14530, "iter_tflops": 20.642784032289835, "iter_time": 0.8333465270996093, "loss": 0.2440585494041443, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 25.37670345016582, "step_time": 0.6778891677856446} +{"epoch": 0, "iter": 14531, "iter_tflops": 25.593956117693825, "iter_time": 0.6721349487304686, "loss": 0.23756210505962372, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 27.44597269023139, "step_time": 0.6267802047729493} +{"epoch": 0, "iter": 14532, "iter_tflops": 24.395569138633878, "iter_time": 0.7051523284912109, "loss": 0.17133554816246033, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 26.351315788939893, "step_time": 0.6528172073364258} +{"epoch": 0, "iter": 14533, "iter_tflops": 19.78384642304462, "iter_time": 1.0428251953125, "loss": 0.38512107729911804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.992461521260307, "step_time": 0.9827858200073242} +{"epoch": 0, "iter": 14534, "iter_tflops": 8.280589183490429, "iter_time": 2.491500671386719, "loss": 0.38433972001075745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.060330570658312, "step_time": 2.0507371368408207} +{"epoch": 0, "iter": 14535, "iter_tflops": 14.466884118860241, "iter_time": 1.4260910186767577, "loss": 0.3682525157928467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.272104671620173, "step_time": 1.1291032905578613} +{"epoch": 0, "iter": 14536, "iter_tflops": 31.521829603357485, "iter_time": 0.6545017776489258, "loss": 0.353866845369339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.284460244931815, "step_time": 0.6017622375488281} +{"epoch": 0, "iter": 14537, "iter_tflops": 11.5897759116739, "iter_time": 1.3180209350585936, "loss": 0.13588714599609375, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.281496929969865, "step_time": 1.2437870864868164} +{"epoch": 0, "iter": 14538, "iter_tflops": 12.303286696293588, "iter_time": 1.2415842742919923, "loss": 0.19225753843784332, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.817960956037014, "step_time": 0.9657102661132813} +{"epoch": 0, "iter": 14539, "iter_tflops": 25.30275942337749, "iter_time": 0.6037115173339844, "loss": 0.13158315420150757, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.895808814991657, "step_time": 0.5679534454345703} +{"epoch": 0, "iter": 14540, "iter_tflops": 25.227800063058286, "iter_time": 0.6055053253173828, "loss": 0.16066057980060577, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.716115408967834, "step_time": 0.5717735176086426} +{"epoch": 0, "iter": 14541, "iter_tflops": 33.26306949191645, "iter_time": 0.620240219116211, "loss": 0.5793830156326294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.47655482127633, "step_time": 0.5815416297912597} +{"epoch": 0, "iter": 14542, "iter_tflops": 14.47295698054457, "iter_time": 1.425492630004883, "loss": 0.5186901688575745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.87018667738449, "step_time": 1.2999906005859376} +{"epoch": 0, "iter": 14543, "iter_tflops": 33.56635742820173, "iter_time": 0.6146360549926758, "loss": 0.6601634621620178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.375827431429606, "step_time": 0.486859956741333} +{"epoch": 0, "iter": 14544, "iter_tflops": 40.536992618002174, "iter_time": 0.5089448471069336, "loss": 0.5155564546585083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70566239429175, "step_time": 0.4720462379455566} +{"epoch": 0, "iter": 14545, "iter_tflops": 27.620814140291483, "iter_time": 0.7469400939941406, "loss": 0.5299102067947388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.203475810590756, "step_time": 0.706460205078125} +{"epoch": 0, "iter": 14546, "iter_tflops": 19.305096768769136, "iter_time": 1.0686863555908204, "loss": 0.6267666816711426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.577338414886093, "step_time": 0.9561463565826416} +{"epoch": 0, "iter": 14547, "iter_tflops": 41.483127909094925, "iter_time": 0.49733697891235357, "loss": 0.61225825548172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.022696102049935, "step_time": 0.45823762893676756} +{"epoch": 0, "iter": 14548, "iter_tflops": 41.605436953946764, "iter_time": 0.4958749389648438, "loss": 0.7937137484550476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.339542707151665, "step_time": 0.4550353240966797} +{"epoch": 0, "iter": 14549, "iter_tflops": 15.596271840962956, "iter_time": 1.3228221282958985, "loss": 0.21080242097377777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.94900154528715, "step_time": 1.217245361328125} +{"epoch": 0, "iter": 14550, "iter_tflops": 15.963007290933065, "iter_time": 1.2924315032958984, "loss": 0.16840824484825134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.899929355505048, "step_time": 0.9420621032714843} +{"epoch": 0, "iter": 14551, "iter_tflops": 48.224063395257154, "iter_time": 0.42781740188598627, "loss": 0.19173310697078705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.22774777769939, "step_time": 0.3950216960906982} +{"epoch": 0, "iter": 14552, "iter_tflops": 51.82887031895579, "iter_time": 0.39806180191040036, "loss": 0.19128862023353577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.999847802041415, "step_time": 0.36841338539123536} +{"epoch": 0, "iter": 14553, "iter_tflops": 31.543415885699105, "iter_time": 0.6540538787841796, "loss": 0.04577823355793953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.60885013963721, "step_time": 0.6138589515686034} +{"epoch": 0, "iter": 14554, "iter_tflops": 15.391265985980036, "iter_time": 1.3404416198730469, "loss": 0.028561752289533615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.79942376585877, "step_time": 1.0974322280883788} +{"epoch": 0, "iter": 14555, "iter_tflops": 44.347036830547076, "iter_time": 0.46521921157836915, "loss": 0.01983116939663887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.8287917235287, "step_time": 0.42251902580261236} +{"epoch": 0, "iter": 14556, "iter_tflops": 55.579490940064325, "iter_time": 0.37119975662231447, "loss": 0.01667945086956024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.600050138032, "step_time": 0.34044680595397947} +{"epoch": 0, "iter": 14557, "iter_tflops": 23.302723204294857, "iter_time": 0.8853511810302734, "loss": 0.016975251957774162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.491946811844628, "step_time": 0.8423623352050781} +{"epoch": 0, "iter": 14558, "iter_tflops": 15.157374246195232, "iter_time": 1.3611258239746094, "loss": 0.02017483301460743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.192252376038102, "step_time": 1.07496989440918} +{"epoch": 0, "iter": 14559, "iter_tflops": 39.47611738792452, "iter_time": 0.5226221542358399, "loss": 0.034254420548677444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.908165348943754, "step_time": 0.46986917686462404} +{"epoch": 0, "iter": 14560, "iter_tflops": 42.983010357695115, "iter_time": 0.47998251724243157, "loss": 0.03350584954023361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.37755254798434, "step_time": 0.4354613609313965} +{"epoch": 0, "iter": 14561, "iter_tflops": 17.984867610088735, "iter_time": 1.1471362457275391, "loss": 0.3981040120124817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.270557758490153, "step_time": 1.0706017837524415} +{"epoch": 0, "iter": 14562, "iter_tflops": 20.48226579647188, "iter_time": 1.0072661743164062, "loss": 0.42688673734664917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.971890712163717, "step_time": 0.7375652122497559} +{"epoch": 0, "iter": 14563, "iter_tflops": 46.453459664239105, "iter_time": 0.4441239395141602, "loss": 0.5730052590370178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30028430244657, "step_time": 0.4101585865020752} +{"epoch": 0, "iter": 14564, "iter_tflops": 48.15320397160124, "iter_time": 0.4284469528198242, "loss": 0.38580116629600525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.865757059660254, "step_time": 0.3977787017822266} +{"epoch": 0, "iter": 14565, "iter_tflops": 29.190259718928928, "iter_time": 0.7067800598144531, "loss": 0.397592157125473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.07022727843436, "step_time": 0.6640148887634277} +{"epoch": 0, "iter": 14566, "iter_tflops": 23.368542306976803, "iter_time": 0.8828575286865235, "loss": 0.406707227230072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.998871716736335, "step_time": 0.7935380325317383} +{"epoch": 0, "iter": 14567, "iter_tflops": 47.45043520956789, "iter_time": 0.4347925033569336, "loss": 0.43760526180267334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.280212241379644, "step_time": 0.40232075119018557} +{"epoch": 0, "iter": 14568, "iter_tflops": 48.55730924833881, "iter_time": 0.4248813171386719, "loss": 0.3082401752471924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66908828120487, "step_time": 0.39171161270141597} +{"epoch": 0, "iter": 14569, "iter_tflops": 48.63598087641851, "iter_time": 0.4241940460205078, "loss": 0.024239586666226387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.67766344722499, "step_time": 0.38435155677795413} +{"epoch": 0, "iter": 14570, "iter_tflops": 39.46292373891063, "iter_time": 0.5227968826293945, "loss": 0.0445307195186615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.757313165129254, "step_time": 0.4714890384674072} +{"epoch": 0, "iter": 14571, "iter_tflops": 45.47217486175787, "iter_time": 0.45370808792114253, "loss": 0.024154776707291603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.161678000014994, "step_time": 0.41129193305969236} +{"epoch": 0, "iter": 14572, "iter_tflops": 50.17552915051006, "iter_time": 0.4111783943176269, "loss": 0.021870562806725502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43859304126977, "step_time": 0.37214316558837895} +{"epoch": 0, "iter": 14573, "iter_tflops": 21.897738836149674, "iter_time": 0.9421563415527343, "loss": 0.867072582244873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.574386162466016, "step_time": 0.8751487045288087} +{"epoch": 0, "iter": 14574, "iter_tflops": 26.502034093665745, "iter_time": 0.7784720764160156, "loss": 0.5825594067573547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.509853440739423, "step_time": 0.6991255836486817} +{"epoch": 0, "iter": 14575, "iter_tflops": 43.68627535261262, "iter_time": 0.47225572204589844, "loss": 0.7219444513320923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06411066795705, "step_time": 0.43836148643493655} +{"epoch": 0, "iter": 14576, "iter_tflops": 46.03797855118037, "iter_time": 0.44813204574584964, "loss": 0.6224663853645325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.4762576007769, "step_time": 0.41698977470397947} +{"epoch": 0, "iter": 14577, "iter_tflops": 22.158170711475083, "iter_time": 0.7189404678344726, "loss": 0.12323376536369324, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 23.47236641002706, "step_time": 0.6786876678466797} +{"epoch": 0, "iter": 14578, "iter_tflops": 6.390230326893077, "iter_time": 2.4929313659667964, "loss": 0.12091182917356491, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 8.22976279039694, "step_time": 1.9357065353393554} +{"epoch": 0, "iter": 14579, "iter_tflops": 9.84727885560753, "iter_time": 1.6177469787597658, "loss": 0.09216448664665222, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 11.534369295801982, "step_time": 1.381124984741211} +{"epoch": 0, "iter": 14580, "iter_tflops": 29.476081713443296, "iter_time": 0.5404519424438476, "loss": 0.11374443024396896, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 32.30305504819414, "step_time": 0.4931547679901123} +{"epoch": 0, "iter": 14581, "iter_tflops": 14.89201616171402, "iter_time": 1.0175196838378906, "loss": 0.23718306422233582, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.919126652624591, "step_time": 0.9518687744140625} +{"epoch": 0, "iter": 14582, "iter_tflops": 10.937300753552833, "iter_time": 1.385435028076172, "loss": 0.13362261652946472, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 12.130493314360596, "step_time": 1.2491593856811523} +{"epoch": 0, "iter": 14583, "iter_tflops": 27.04749323171712, "iter_time": 0.5602337875366211, "loss": 0.19123932719230652, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.83516186025538, "step_time": 0.5255014572143555} +{"epoch": 0, "iter": 14584, "iter_tflops": 27.154869863536398, "iter_time": 0.5580184936523438, "loss": 0.35685715079307556, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.915580630778642, "step_time": 0.5240399551391601} +{"epoch": 0, "iter": 14585, "iter_tflops": 37.924924983972204, "iter_time": 0.5439982681274413, "loss": 0.3563464283943176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99238791563469, "step_time": 0.5032908439636231} +{"epoch": 0, "iter": 14586, "iter_tflops": 14.108390261720823, "iter_time": 1.4623279571533203, "loss": 0.3117961585521698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.293889628326525, "step_time": 1.1929701156616213} +{"epoch": 0, "iter": 14587, "iter_tflops": 31.91932552947997, "iter_time": 0.6463511734008789, "loss": 0.37644529342651367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.26614356850741, "step_time": 0.5123682498931885} +{"epoch": 0, "iter": 14588, "iter_tflops": 49.03131463991669, "iter_time": 0.4207738189697266, "loss": 0.2687893211841583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99961363043405, "step_time": 0.3892687530517578} +{"epoch": 0, "iter": 14589, "iter_tflops": 22.10687445321884, "iter_time": 0.9332433471679688, "loss": 0.3561849594116211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.152569256637534, "step_time": 0.8910930480957031} +{"epoch": 0, "iter": 14590, "iter_tflops": 14.377586915555856, "iter_time": 1.4349482727050782, "loss": 0.31915414333343506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.774843060038982, "step_time": 1.098869026184082} +{"epoch": 0, "iter": 14591, "iter_tflops": 36.22489717483945, "iter_time": 0.5695280075073242, "loss": 0.38283929228782654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.68706557737391, "step_time": 0.5198442668914794} +{"epoch": 0, "iter": 14592, "iter_tflops": 34.08520954293554, "iter_time": 0.6052799377441406, "loss": 0.3850457966327667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.17116575841393, "step_time": 0.555029499053955} +{"epoch": 0, "iter": 14593, "iter_tflops": 17.07928351154218, "iter_time": 1.2079601287841797, "loss": 0.6514332890510559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.12094282773019, "step_time": 1.1385220794677733} +{"epoch": 0, "iter": 14594, "iter_tflops": 18.22896851395812, "iter_time": 1.131775146484375, "loss": 0.7164005041122437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.661307471657928, "step_time": 0.9524398994445801} +{"epoch": 0, "iter": 14595, "iter_tflops": 44.74118817732845, "iter_time": 0.46112082290649425, "loss": 0.5326210260391235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0996871651435, "step_time": 0.42892365264892585} +{"epoch": 0, "iter": 14596, "iter_tflops": 45.33794104360701, "iter_time": 0.45505139923095705, "loss": 0.6921914219856262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81558724109076, "step_time": 0.42263331604003906} +{"epoch": 0, "iter": 14597, "iter_tflops": 38.78907956637323, "iter_time": 0.5318789138793945, "loss": 0.4217836260795593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.126480850117886, "step_time": 0.4897416801452637} +{"epoch": 0, "iter": 14598, "iter_tflops": 45.42931189377523, "iter_time": 0.4541361656188964, "loss": 0.5680735111236572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.233272304588986, "step_time": 0.41904778099060064} +{"epoch": 0, "iter": 14599, "iter_tflops": 46.740472673000696, "iter_time": 0.44139676666259764, "loss": 0.4238376021385193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.286233047059, "step_time": 0.4102731952667236} +{"epoch": 0, "iter": 14600, "iter_tflops": 51.37161112353133, "iter_time": 0.40160495376586913, "loss": 0.41080203652381897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.718808016985776, "step_time": 0.37027162361145016} +{"epoch": 0, "iter": 14601, "iter_tflops": 29.695400704512743, "iter_time": 0.6947572021484374, "loss": 0.043204642832279205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.595607626561456, "step_time": 0.6529734687805175} +{"epoch": 0, "iter": 14602, "iter_tflops": 16.012470048470025, "iter_time": 1.2884391632080079, "loss": 0.02617848478257656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.26480717714795, "step_time": 0.9701989459991456} +{"epoch": 0, "iter": 14603, "iter_tflops": 45.04695917488401, "iter_time": 0.4579908142089844, "loss": 0.020381413400173187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.747794514307465, "step_time": 0.41471373176574705} +{"epoch": 0, "iter": 14604, "iter_tflops": 39.9630229035215, "iter_time": 0.5162545776367188, "loss": 0.07574938237667084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10422359048989, "step_time": 0.4677804489135742} +{"epoch": 0, "iter": 14605, "iter_tflops": 19.537421529318937, "iter_time": 1.0559783172607422, "loss": 0.11870583891868591, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.850367403023593, "step_time": 0.9894834518432617} +{"epoch": 0, "iter": 14606, "iter_tflops": 20.0640513822192, "iter_time": 1.0282615966796875, "loss": 0.08546379208564758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.523477805322656, "step_time": 0.8412792701721192} +{"epoch": 0, "iter": 14607, "iter_tflops": 48.350568838803454, "iter_time": 0.4266980514526367, "loss": 0.06168324127793312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.57350966903274, "step_time": 0.39242374420166015} +{"epoch": 0, "iter": 14608, "iter_tflops": 50.29941300293907, "iter_time": 0.41016569137573244, "loss": 0.1706346720457077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.279404130249546, "step_time": 0.3800906410217285} +{"epoch": 0, "iter": 14609, "iter_tflops": 25.148012230568515, "iter_time": 0.8203866500854492, "loss": 0.6763535141944885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.428554625379924, "step_time": 0.7806364669799806} +{"epoch": 0, "iter": 14610, "iter_tflops": 12.816803590322001, "iter_time": 1.6096910095214845, "loss": 0.5942292809486389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.397850310744953, "step_time": 1.2581584243774415} +{"epoch": 0, "iter": 14611, "iter_tflops": 37.41941893213509, "iter_time": 0.5513472442626953, "loss": 0.6112005710601807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88626589225702, "step_time": 0.5045971565246582} +{"epoch": 0, "iter": 14612, "iter_tflops": 40.193089238977905, "iter_time": 0.5132995223999023, "loss": 0.7499539256095886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59733536859884, "step_time": 0.4732191390991211} +{"epoch": 0, "iter": 14613, "iter_tflops": 16.568120046836967, "iter_time": 1.2452283935546875, "loss": 0.04542657360434532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.676330222749318, "step_time": 1.1671593170166017} +{"epoch": 0, "iter": 14614, "iter_tflops": 19.37135121330392, "iter_time": 1.065031204223633, "loss": 0.05143427476286888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.61687507713518, "step_time": 0.8735742321014404} +{"epoch": 0, "iter": 14615, "iter_tflops": 46.22508418978645, "iter_time": 0.4463181381225586, "loss": 0.023467881605029106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.815968925633484, "step_time": 0.40599626350402834} +{"epoch": 0, "iter": 14616, "iter_tflops": 44.692932920961574, "iter_time": 0.4616186981201172, "loss": 0.0850340723991394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.950396487146186, "step_time": 0.42146938514709475} +{"epoch": 0, "iter": 14617, "iter_tflops": 20.509325336957733, "iter_time": 1.005937210083008, "loss": 0.05463185906410217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.12085627215917, "step_time": 0.9326534767150879} +{"epoch": 0, "iter": 14618, "iter_tflops": 22.95289770170942, "iter_time": 0.8988448333740234, "loss": 0.07322625070810318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.861004612395224, "step_time": 0.7977684478759766} +{"epoch": 0, "iter": 14619, "iter_tflops": 41.618632291020596, "iter_time": 0.49571772003173825, "loss": 0.06273623555898666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78304627594544, "step_time": 0.4506273651123047} +{"epoch": 0, "iter": 14620, "iter_tflops": 42.373167891099634, "iter_time": 0.4868905143737793, "loss": 0.06253577023744583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67366483556083, "step_time": 0.44202857398986817} +{"epoch": 0, "iter": 14621, "iter_tflops": 12.930098248732156, "iter_time": 1.1056044921875, "loss": 0.11469785869121552, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 13.898884416170622, "step_time": 1.028541160583496} +{"epoch": 0, "iter": 14622, "iter_tflops": 13.979599308864383, "iter_time": 1.022602607727051, "loss": 0.037669483572244644, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 19.300903581023377, "step_time": 0.7406686763763427} +{"epoch": 0, "iter": 14623, "iter_tflops": 28.59240155561524, "iter_time": 0.4999781036376953, "loss": 0.05089687556028366, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 31.530078465511973, "step_time": 0.4533948345184326} +{"epoch": 0, "iter": 14624, "iter_tflops": 30.66874388957501, "iter_time": 0.46612847137451174, "loss": 0.0736079066991806, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 33.576548433623834, "step_time": 0.42576069831848146} +{"epoch": 0, "iter": 14625, "iter_tflops": 27.607482015139254, "iter_time": 0.7473008041381837, "loss": 0.6521029472351074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.800860477579203, "step_time": 0.692298583984375} +{"epoch": 0, "iter": 14626, "iter_tflops": 8.96297446987414, "iter_time": 2.301813262939453, "loss": 0.5852309465408325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.023614852223071, "step_time": 1.8715361328125002} +{"epoch": 0, "iter": 14627, "iter_tflops": 11.982088504887832, "iter_time": 1.7218278350830079, "loss": 0.6171773672103882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.799326862575665, "step_time": 1.394056209564209} +{"epoch": 0, "iter": 14628, "iter_tflops": 35.916794073417904, "iter_time": 0.5744135589599609, "loss": 0.6943300366401672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.245853876756065, "step_time": 0.5256884860992431} +{"epoch": 0, "iter": 14629, "iter_tflops": 16.771821375738135, "iter_time": 0.8304869155883788, "loss": 0.09235823154449463, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 18.31133467221044, "step_time": 0.7606642799377442} +{"epoch": 0, "iter": 14630, "iter_tflops": 20.068744322956558, "iter_time": 0.6940532989501953, "loss": 0.12743321061134338, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 21.696586840190633, "step_time": 0.6419801559448242} +{"epoch": 0, "iter": 14631, "iter_tflops": 21.845063897391668, "iter_time": 0.6376167297363282, "loss": 0.272183895111084, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 23.518097596810524, "step_time": 0.5922578620910646} +{"epoch": 0, "iter": 14632, "iter_tflops": 20.500508631081168, "iter_time": 0.6794357376098634, "loss": 0.4072783887386322, "lr": 3e-05, "seqlen": 5584.0, "step_tflops": 21.980285323346102, "step_time": 0.633694149017334} +{"epoch": 0, "iter": 14633, "iter_tflops": 32.50245891867294, "iter_time": 0.6347548522949219, "loss": 0.2196664661169052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.92777396782353, "step_time": 0.5742380123138427} +{"epoch": 0, "iter": 14634, "iter_tflops": 35.46107010903856, "iter_time": 0.5817955703735352, "loss": 0.20027503371238708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.989327309695355, "step_time": 0.5291472034454346} +{"epoch": 0, "iter": 14635, "iter_tflops": 40.39290201177255, "iter_time": 0.510760368347168, "loss": 0.17310374975204468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.038578993003235, "step_time": 0.4684777297973633} +{"epoch": 0, "iter": 14636, "iter_tflops": 41.449677301417026, "iter_time": 0.49773833847045895, "loss": 0.1873847097158432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2703883942776, "step_time": 0.4557304286956787} +{"epoch": 0, "iter": 14637, "iter_tflops": 18.58572530819665, "iter_time": 1.1100504913330078, "loss": 0.5850392580032349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.68724072918859, "step_time": 1.0479423599243163} +{"epoch": 0, "iter": 14638, "iter_tflops": 15.692712021670339, "iter_time": 1.314692672729492, "loss": 0.37850672006607056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.716184366304166, "step_time": 1.1645336875915528} +{"epoch": 0, "iter": 14639, "iter_tflops": 34.68144389823429, "iter_time": 0.5948741226196289, "loss": 0.8205859661102295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.64010146754926, "step_time": 0.5481147155761719} +{"epoch": 0, "iter": 14640, "iter_tflops": 39.41908553061448, "iter_time": 0.523378288269043, "loss": 0.6694662570953369, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.763544964191, "step_time": 0.48244581985473634} +{"epoch": 0, "iter": 14641, "iter_tflops": 17.568664750436273, "iter_time": 1.1743119812011718, "loss": 0.6651135683059692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.71409962623129, "step_time": 1.1024358062744142} +{"epoch": 0, "iter": 14642, "iter_tflops": 17.716888629928803, "iter_time": 1.1644873962402345, "loss": 0.7120454907417297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.338838014037467, "step_time": 0.9668330345153809} +{"epoch": 0, "iter": 14643, "iter_tflops": 44.08910126692797, "iter_time": 0.4679408950805664, "loss": 0.676146388053894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52240787395863, "step_time": 0.4341340103149414} +{"epoch": 0, "iter": 14644, "iter_tflops": 41.582624043465984, "iter_time": 0.4961469841003418, "loss": 0.615207850933075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.818113048590476, "step_time": 0.46032936477661135} +{"epoch": 0, "iter": 14645, "iter_tflops": 30.78314450084674, "iter_time": 0.6702074737548829, "loss": 0.5462606549263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.67262689506693, "step_time": 0.6314488754272461} +{"epoch": 0, "iter": 14646, "iter_tflops": 14.307286287911015, "iter_time": 1.4419990692138671, "loss": 0.6537177562713623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.839642460081524, "step_time": 1.2251503295898438} +{"epoch": 0, "iter": 14647, "iter_tflops": 36.799370535825304, "iter_time": 0.5606371307373047, "loss": 0.5715062022209167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.9183819631293, "step_time": 0.44929922676086426} +{"epoch": 0, "iter": 14648, "iter_tflops": 49.584595024699674, "iter_time": 0.41607869338989256, "loss": 0.6348722577095032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.63147527413222, "step_time": 0.38468256568908693} +{"epoch": 0, "iter": 14649, "iter_tflops": 21.191423912109116, "iter_time": 0.9735586242675781, "loss": 0.2430274486541748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.063252469608084, "step_time": 0.935088493347168} +{"epoch": 0, "iter": 14650, "iter_tflops": 18.504094492467235, "iter_time": 1.1149474792480467, "loss": 0.3257444500923157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.88557531087242, "step_time": 0.9878154277801515} +{"epoch": 0, "iter": 14651, "iter_tflops": 47.18152000220881, "iter_time": 0.43727064132690424, "loss": 0.42704179883003235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.298876465195704, "step_time": 0.40217437362670905} +{"epoch": 0, "iter": 14652, "iter_tflops": 47.80775396549642, "iter_time": 0.43154283142089844, "loss": 0.2889518439769745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.77931403398379, "step_time": 0.39844277381896975} +{"epoch": 0, "iter": 14653, "iter_tflops": 45.97655456688028, "iter_time": 0.4487307434082032, "loss": 0.22529621422290802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.348667354180364, "step_time": 0.409764440536499} +{"epoch": 0, "iter": 14654, "iter_tflops": 43.758323056824885, "iter_time": 0.47147815704345697, "loss": 0.20861494541168213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.86152470889793, "step_time": 0.4310580081939697} +{"epoch": 0, "iter": 14655, "iter_tflops": 49.311071637326016, "iter_time": 0.4183866386413574, "loss": 0.27887508273124695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.50355160548217, "step_time": 0.3856023178100586} +{"epoch": 0, "iter": 14656, "iter_tflops": 54.76723460463117, "iter_time": 0.3767050437927246, "loss": 0.1807708591222763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.70411829062115, "step_time": 0.3455556182861329} +{"epoch": 0, "iter": 14657, "iter_tflops": 23.122066160683588, "iter_time": 0.7066969223022461, "loss": 0.07483422011137009, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 24.487883778106173, "step_time": 0.6672807312011719} +{"epoch": 0, "iter": 14658, "iter_tflops": 11.054936652359237, "iter_time": 1.4780991973876954, "loss": 0.06888541579246521, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 14.131361199887248, "step_time": 1.1563141555786134} +{"epoch": 0, "iter": 14659, "iter_tflops": 41.14315890011984, "iter_time": 0.3971569862365723, "loss": 0.04634426161646843, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 44.870125442362514, "step_time": 0.3641686496734619} +{"epoch": 0, "iter": 14660, "iter_tflops": 42.42489517047014, "iter_time": 0.38515812301635743, "loss": 0.07952240854501724, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 46.28925311191489, "step_time": 0.35300403213500975} +{"epoch": 0, "iter": 14661, "iter_tflops": 32.41790480824616, "iter_time": 0.6364104537963867, "loss": 0.3692253530025482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.644363248143996, "step_time": 0.5955108299255372} +{"epoch": 0, "iter": 14662, "iter_tflops": 13.332724694807956, "iter_time": 1.5474026489257813, "loss": 0.3817180395126343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.853353086180592, "step_time": 1.3013709716796875} +{"epoch": 0, "iter": 14663, "iter_tflops": 36.943274384010756, "iter_time": 0.5584533004760741, "loss": 0.465554416179657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.239916250553264, "step_time": 0.5127021980285644} +{"epoch": 0, "iter": 14664, "iter_tflops": 38.43337377310673, "iter_time": 0.5368015213012696, "loss": 0.5596515536308289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10687033260561, "step_time": 0.48996976852416996} +{"epoch": 0, "iter": 14665, "iter_tflops": 18.74598812298934, "iter_time": 1.1005604705810548, "loss": 0.12969547510147095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.200099591415675, "step_time": 1.0213362274169921} +{"epoch": 0, "iter": 14666, "iter_tflops": 20.24701032558341, "iter_time": 1.018969871520996, "loss": 0.1040034368634224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.233020084621646, "step_time": 0.8513628692626953} +{"epoch": 0, "iter": 14667, "iter_tflops": 47.52501535661407, "iter_time": 0.4341101913452149, "loss": 0.05192021653056145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.64430651422602, "step_time": 0.399484375} +{"epoch": 0, "iter": 14668, "iter_tflops": 49.186097229761934, "iter_time": 0.4194496955871582, "loss": 0.23777532577514648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.73082315224642, "step_time": 0.3839712905883789} +{"epoch": 0, "iter": 14669, "iter_tflops": 27.64953460222296, "iter_time": 0.44057093811035153, "loss": 0.0323987752199173, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 30.478608713420904, "step_time": 0.39967642593383784} +{"epoch": 0, "iter": 14670, "iter_tflops": 30.973296228575833, "iter_time": 0.393293025970459, "loss": 0.03736140951514244, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 33.984342981424994, "step_time": 0.35844687080383303} +{"epoch": 0, "iter": 14671, "iter_tflops": 33.98597510198048, "iter_time": 0.3584296569824219, "loss": 0.05018872767686844, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 37.20120899998769, "step_time": 0.3274512233734131} +{"epoch": 0, "iter": 14672, "iter_tflops": 33.78784194504739, "iter_time": 0.3605315017700196, "loss": 0.018193230032920837, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 36.93684079785808, "step_time": 0.3297948913574219} +{"epoch": 0, "iter": 14673, "iter_tflops": 41.76216655091388, "iter_time": 0.4940139656066895, "loss": 0.4761451184749603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.148967046103245, "step_time": 0.4569560470581055} +{"epoch": 0, "iter": 14674, "iter_tflops": 39.347761365431474, "iter_time": 0.5243269958496094, "loss": 0.5117433071136475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.19690585410176, "step_time": 0.48892432022094723} +{"epoch": 0, "iter": 14675, "iter_tflops": 39.24080875090653, "iter_time": 0.5257560729980468, "loss": 0.5902681350708008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.76339382008433, "step_time": 0.48244752502441407} +{"epoch": 0, "iter": 14676, "iter_tflops": 46.05232404532157, "iter_time": 0.44799245071411137, "loss": 0.7873921394348145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.78767742624072, "step_time": 0.4143815212249756} +{"epoch": 0, "iter": 14677, "iter_tflops": 20.778262681390455, "iter_time": 0.7332027816772461, "loss": 0.012134934775531292, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 21.97515002862707, "step_time": 0.6932685317993165} +{"epoch": 0, "iter": 14678, "iter_tflops": 10.777382352585287, "iter_time": 1.4135788726806642, "loss": 0.0041368636302649975, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 13.282630680701338, "step_time": 1.146962553024292} +{"epoch": 0, "iter": 14679, "iter_tflops": 32.24185935975352, "iter_time": 0.47251245117187496, "loss": 0.0051603480242192745, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 35.82739708424217, "step_time": 0.42522430419921875} +{"epoch": 0, "iter": 14680, "iter_tflops": 38.244535438560874, "iter_time": 0.3983491973876953, "loss": 0.004393694456666708, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 42.29206380605286, "step_time": 0.3602255039215088} +{"epoch": 0, "iter": 14681, "iter_tflops": 18.227898511086455, "iter_time": 1.131841583251953, "loss": 0.10501521080732346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.545335593715087, "step_time": 1.0555507431030273} +{"epoch": 0, "iter": 14682, "iter_tflops": 35.6509172718465, "iter_time": 0.5786974105834961, "loss": 0.1318829208612442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.3925629962741, "step_time": 0.4754522914886474} +{"epoch": 0, "iter": 14683, "iter_tflops": 47.97345493136394, "iter_time": 0.43005227661132817, "loss": 0.12133733183145523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.2109673216386, "step_time": 0.3951486549377442} +{"epoch": 0, "iter": 14684, "iter_tflops": 46.36540856143945, "iter_time": 0.44496736145019533, "loss": 0.1401539146900177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.05955842001434, "step_time": 0.41213095283508294} +{"epoch": 0, "iter": 14685, "iter_tflops": 47.172587785521685, "iter_time": 0.4373534393310547, "loss": 0.11947082728147507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98181862967292, "step_time": 0.396890567779541} +{"epoch": 0, "iter": 14686, "iter_tflops": 47.67434621219876, "iter_time": 0.43275042343139647, "loss": 0.10783421248197556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.82021329850256, "step_time": 0.39812830162048335} +{"epoch": 0, "iter": 14687, "iter_tflops": 51.86811033056883, "iter_time": 0.39776065444946296, "loss": 0.150878444314003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.501462055366844, "step_time": 0.36514264869689944} +{"epoch": 0, "iter": 14688, "iter_tflops": 46.118261731129955, "iter_time": 0.4473519325256347, "loss": 0.13254673779010773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67280512870631, "step_time": 0.41533981132507325} +{"epoch": 0, "iter": 14689, "iter_tflops": 31.0445354376951, "iter_time": 0.6552099227905274, "loss": 0.0568036325275898, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 33.34411639013946, "step_time": 0.6100232925415039} +{"epoch": 0, "iter": 14690, "iter_tflops": 8.900767436312588, "iter_time": 2.285273468017578, "loss": 0.07222434878349304, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 11.694684819339995, "step_time": 1.73931046295166} +{"epoch": 0, "iter": 14691, "iter_tflops": 11.402339705794315, "iter_time": 1.7839047241210935, "loss": 0.08490006625652313, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 13.219775658675752, "step_time": 1.5386560401916503} +{"epoch": 0, "iter": 14692, "iter_tflops": 40.407553060296905, "iter_time": 0.5033882560729981, "loss": 0.074860580265522, "lr": 3e-05, "seqlen": 8080.0, "step_tflops": 45.882937898575925, "step_time": 0.44331702804565426} +{"epoch": 0, "iter": 14693, "iter_tflops": 15.513631876417552, "iter_time": 0.9057196502685547, "loss": 0.2718642055988312, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 16.439252265258467, "step_time": 0.8547226486206055} +{"epoch": 0, "iter": 14694, "iter_tflops": 9.948139264002307, "iter_time": 1.4124250640869143, "loss": 0.2924181818962097, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 11.7199428184083, "step_time": 1.1988967399597168} +{"epoch": 0, "iter": 14695, "iter_tflops": 22.276585602683898, "iter_time": 0.6307520141601562, "loss": 0.14001192152500153, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 23.972497296131095, "step_time": 0.5861300582885742} +{"epoch": 0, "iter": 14696, "iter_tflops": 22.732805586741403, "iter_time": 0.6180935821533203, "loss": 0.19011317193508148, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 24.373355820407344, "step_time": 0.576490219116211} +{"epoch": 0, "iter": 14697, "iter_tflops": 20.179402935175602, "iter_time": 1.022383743286133, "loss": 0.4156002700328827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.432345215647587, "step_time": 0.9626148376464844} +{"epoch": 0, "iter": 14698, "iter_tflops": 10.717707739560382, "iter_time": 1.9249539184570312, "loss": 0.4343918561935425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.231012171936353, "step_time": 1.6867854614257813} +{"epoch": 0, "iter": 14699, "iter_tflops": 9.774731712595724, "iter_time": 2.1106557312011716, "loss": 0.5159226059913635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.487002209398254, "step_time": 1.6522054824829102} +{"epoch": 0, "iter": 14700, "iter_tflops": 38.64771089437897, "iter_time": 0.533824462890625, "loss": 0.5584564208984375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60650293318625, "step_time": 0.4842240524291992} +{"epoch": 0, "iter": 14701, "iter_tflops": 17.10161337226905, "iter_time": 0.9602794036865234, "loss": 0.32092422246932983, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 18.38418274232634, "step_time": 0.893285675048828} +{"epoch": 0, "iter": 14702, "iter_tflops": 28.411697941596476, "iter_time": 0.5780128707885741, "loss": 0.4184213876724243, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.419707829870255, "step_time": 0.5398581466674804} +{"epoch": 0, "iter": 14703, "iter_tflops": 28.80490127423041, "iter_time": 0.5701226654052735, "loss": 0.17335300147533417, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 30.608931990385717, "step_time": 0.5365207481384278} +{"epoch": 0, "iter": 14704, "iter_tflops": 30.610366902054967, "iter_time": 0.5364955978393555, "loss": 0.21628102660179138, "lr": 3e-05, "seqlen": 6560.0, "step_tflops": 32.53662096196695, "step_time": 0.5047336387634278} +{"epoch": 0, "iter": 14705, "iter_tflops": 25.306672514326095, "iter_time": 0.8152432327270508, "loss": 0.8605445623397827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.688076359280167, "step_time": 0.7730453567504882} +{"epoch": 0, "iter": 14706, "iter_tflops": 18.27679218514004, "iter_time": 1.128813705444336, "loss": 0.6208578944206238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.318998411224268, "step_time": 0.8847332611083984} +{"epoch": 0, "iter": 14707, "iter_tflops": 35.23370338227346, "iter_time": 0.5855499572753906, "loss": 0.4762919843196869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.291727145930764, "step_time": 0.5387872276306153} +{"epoch": 0, "iter": 14708, "iter_tflops": 34.84151370331529, "iter_time": 0.5921411361694336, "loss": 0.5760719180107117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.894789239898685, "step_time": 0.5444308815002442} +{"epoch": 0, "iter": 14709, "iter_tflops": 1.431771961947048, "iter_time": 1.0672000732421874, "loss": 0.9211358428001404, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 1.5499050397791074, "step_time": 0.985858554840088} +{"epoch": 0, "iter": 14710, "iter_tflops": 1.8473586232108135, "iter_time": 0.8271199340820312, "loss": 0.8934361934661865, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 2.2680543087582317, "step_time": 0.6736995391845704} +{"epoch": 0, "iter": 14711, "iter_tflops": 3.502165912155494, "iter_time": 0.43629776000976567, "loss": 1.0571101903915405, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.788773551450987, "step_time": 0.4032933406829834} +{"epoch": 0, "iter": 14712, "iter_tflops": 3.5827318029463417, "iter_time": 0.4264866104125976, "loss": 1.0255458354949951, "lr": 3e-05, "seqlen": 624.0, "step_tflops": 3.8694837879141333, "step_time": 0.3948813915252685} +{"epoch": 0, "iter": 14713, "iter_tflops": 28.832383218417995, "iter_time": 0.715552833557129, "loss": 0.7043080925941467, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.58260340759775, "step_time": 0.6746022644042968} +{"epoch": 0, "iter": 14714, "iter_tflops": 14.026884414359994, "iter_time": 1.4708250885009766, "loss": 0.640842616558075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.63019582866163, "step_time": 1.2405803108215332} +{"epoch": 0, "iter": 14715, "iter_tflops": 33.633707023618044, "iter_time": 0.6134052810668945, "loss": 0.585227370262146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.57488150277038, "step_time": 0.5640782051086426} +{"epoch": 0, "iter": 14716, "iter_tflops": 37.38084020011504, "iter_time": 0.551916259765625, "loss": 0.7185829281806946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.929181513773145, "step_time": 0.5040680694580078} +{"epoch": 0, "iter": 14717, "iter_tflops": 16.374671384727087, "iter_time": 1.2599393920898438, "loss": 0.29986751079559326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.503059075963755, "step_time": 1.1787135848999024} +{"epoch": 0, "iter": 14718, "iter_tflops": 18.331495310358715, "iter_time": 1.1254452056884767, "loss": 0.17876200377941132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.583788180687144, "step_time": 0.9135355567932129} +{"epoch": 0, "iter": 14719, "iter_tflops": 46.17258539594229, "iter_time": 0.4468256072998047, "loss": 0.16879206895828247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92664248735954, "step_time": 0.4132281379699707} +{"epoch": 0, "iter": 14720, "iter_tflops": 47.28188377598305, "iter_time": 0.4363424606323242, "loss": 0.23114082217216492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15329585754446, "step_time": 0.40331894874572755} +{"epoch": 0, "iter": 14721, "iter_tflops": 50.996470136093194, "iter_time": 0.4045592460632324, "loss": 0.004506074823439121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.4638927848176, "step_time": 0.36538560295104977} +{"epoch": 0, "iter": 14722, "iter_tflops": 37.346240829254384, "iter_time": 0.5524275817871094, "loss": 0.005580882541835308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42862385313452, "step_time": 0.417391622543335} +{"epoch": 0, "iter": 14723, "iter_tflops": 55.18106450783957, "iter_time": 0.37387994766235344, "loss": 0.006672610063105822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.482957115628686, "step_time": 0.34110589981079104} +{"epoch": 0, "iter": 14724, "iter_tflops": 58.143752627678296, "iter_time": 0.35482906723022456, "loss": 0.0029923978727310896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.796727633710866, "step_time": 0.32338795852661134} +{"epoch": 0, "iter": 14725, "iter_tflops": 39.5912050980289, "iter_time": 0.5211029434204102, "loss": 0.6707358956336975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.82677891863701, "step_time": 0.48173348617553713} +{"epoch": 0, "iter": 14726, "iter_tflops": 40.61933167788621, "iter_time": 0.5079131698608398, "loss": 0.614330530166626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.83808417677197, "step_time": 0.47062032699584966} +{"epoch": 0, "iter": 14727, "iter_tflops": 43.0255580295291, "iter_time": 0.4795078659057617, "loss": 0.5042815804481506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.3228665340449, "step_time": 0.4453760108947754} +{"epoch": 0, "iter": 14728, "iter_tflops": 45.30879033218083, "iter_time": 0.4553441696166992, "loss": 0.7600383162498474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6344879453208, "step_time": 0.42420706748962406} +{"epoch": 0, "iter": 14729, "iter_tflops": 27.478178597553544, "iter_time": 0.7508173599243164, "loss": 0.5817870497703552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.065528015773786, "step_time": 0.709813133239746} +{"epoch": 0, "iter": 14730, "iter_tflops": 14.559531418651197, "iter_time": 1.417016311645508, "loss": 0.7134898900985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.850635116972065, "step_time": 1.0944508438110352} +{"epoch": 0, "iter": 14731, "iter_tflops": 35.18849246316998, "iter_time": 0.5863022842407226, "loss": 0.563624382019043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19658012442464, "step_time": 0.5401293373107909} +{"epoch": 0, "iter": 14732, "iter_tflops": 33.48758499668377, "iter_time": 0.6160818557739257, "loss": 0.5435970425605774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.42718569668105, "step_time": 0.5663652877807617} +{"epoch": 0, "iter": 14733, "iter_tflops": 25.375244088077856, "iter_time": 0.8130401992797851, "loss": 0.18693538010120392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.851412520214797, "step_time": 0.7407557334899902} +{"epoch": 0, "iter": 14734, "iter_tflops": 42.51567946558153, "iter_time": 0.4852584686279297, "loss": 0.14485231041908264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.2856729247993, "step_time": 0.4457339000701904} +{"epoch": 0, "iter": 14735, "iter_tflops": 48.6173427582685, "iter_time": 0.4243566665649413, "loss": 0.17064140737056732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78312668066529, "step_time": 0.3908653163909912} +{"epoch": 0, "iter": 14736, "iter_tflops": 52.189858275245996, "iter_time": 0.3953084793090821, "loss": 0.1545032411813736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.60796782112955, "step_time": 0.36445564651489265} +{"epoch": 0, "iter": 14737, "iter_tflops": 36.001193941503665, "iter_time": 0.5730669250488282, "loss": 0.35039258003234863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57038153898224, "step_time": 0.5348947219848633} +{"epoch": 0, "iter": 14738, "iter_tflops": 10.472151244624218, "iter_time": 1.9700912475585939, "loss": 0.4989728629589081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.480002942973806, "step_time": 1.6531321029663086} +{"epoch": 0, "iter": 14739, "iter_tflops": 10.076600506841132, "iter_time": 2.047425964355469, "loss": 0.3462836742401123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.356507063404345, "step_time": 1.6696541671752931} +{"epoch": 0, "iter": 14740, "iter_tflops": 31.249028593143077, "iter_time": 0.6602155151367187, "loss": 0.3474269211292267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14382366779433, "step_time": 0.44710411643981934} +{"epoch": 0, "iter": 14741, "iter_tflops": 26.571131200300467, "iter_time": 0.6195954208374024, "loss": 0.21030846238136292, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 28.432251203581924, "step_time": 0.5790379066467284} +{"epoch": 0, "iter": 14742, "iter_tflops": 27.952847352380424, "iter_time": 0.5889686660766602, "loss": 0.3527314364910126, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 29.70205052746111, "step_time": 0.5542833213806153} +{"epoch": 0, "iter": 14743, "iter_tflops": 29.144109947694115, "iter_time": 0.564894630432129, "loss": 0.2720876634120941, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.067245140620432, "step_time": 0.5299263305664063} +{"epoch": 0, "iter": 14744, "iter_tflops": 29.681508475676925, "iter_time": 0.5546669311523438, "loss": 0.20763491094112396, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 31.548384490599584, "step_time": 0.5218445091247559} +{"epoch": 0, "iter": 14745, "iter_tflops": 40.71711810130831, "iter_time": 0.5066933631896973, "loss": 0.08744436502456665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13437620620374, "step_time": 0.4674608612060547} +{"epoch": 0, "iter": 14746, "iter_tflops": 11.029924912488537, "iter_time": 1.8704654541015624, "loss": 0.10172850638628006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.316276738152734, "step_time": 1.54931396484375} +{"epoch": 0, "iter": 14747, "iter_tflops": 20.29770686498641, "iter_time": 1.0164248428344727, "loss": 0.09687969833612442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.91173696173111, "step_time": 0.9004596004486085} +{"epoch": 0, "iter": 14748, "iter_tflops": 44.187164448374794, "iter_time": 0.4669024085998535, "loss": 0.08458539843559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.66491310606552, "step_time": 0.4239418544769287} +{"epoch": 0, "iter": 14749, "iter_tflops": 16.944423925834005, "iter_time": 1.0249434204101562, "loss": 0.23535597324371338, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 18.294281437379496, "step_time": 0.9493171882629395} +{"epoch": 0, "iter": 14750, "iter_tflops": 11.312558701437048, "iter_time": 1.535203155517578, "loss": 0.16153046488761902, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 15.400023740524903, "step_time": 1.1277304573059082} +{"epoch": 0, "iter": 14751, "iter_tflops": 31.346485530835178, "iter_time": 0.5540358200073242, "loss": 0.2719099223613739, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 33.445315357337094, "step_time": 0.5192678146362305} +{"epoch": 0, "iter": 14752, "iter_tflops": 30.40895358700073, "iter_time": 0.5711171798706055, "loss": 0.20100265741348267, "lr": 3e-05, "seqlen": 6928.0, "step_tflops": 32.367515370840934, "step_time": 0.5365588188171386} +{"epoch": 0, "iter": 14753, "iter_tflops": 28.152070063075627, "iter_time": 0.7328446350097657, "loss": 0.16484187543392181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.794961636683063, "step_time": 0.6924356460571289} +{"epoch": 0, "iter": 14754, "iter_tflops": 11.475137833870615, "iter_time": 1.797895050048828, "loss": 0.10469049215316772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.737744493221273, "step_time": 1.3998813400268557} +{"epoch": 0, "iter": 14755, "iter_tflops": 13.594078850006786, "iter_time": 1.5176529235839844, "loss": 0.14640429615974426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.977577813403046, "step_time": 1.3774652862548828} +{"epoch": 0, "iter": 14756, "iter_tflops": 19.935202073375223, "iter_time": 1.0349076690673829, "loss": 0.07504788041114807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.253901528349054, "step_time": 0.8872099800109863} +{"epoch": 0, "iter": 14757, "iter_tflops": 18.250557950467357, "iter_time": 0.8011775283813478, "loss": 0.15819568932056427, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 19.3031889415173, "step_time": 0.7574881515502929} +{"epoch": 0, "iter": 14758, "iter_tflops": 7.902767649539198, "iter_time": 1.8502298889160156, "loss": 0.1542215347290039, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 10.59367466219751, "step_time": 1.3802516479492186} +{"epoch": 0, "iter": 14759, "iter_tflops": 8.097926841571576, "iter_time": 1.8056395416259765, "loss": 0.20696797966957092, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 9.857661882243802, "step_time": 1.4833068008422852} +{"epoch": 0, "iter": 14760, "iter_tflops": 14.080290877017259, "iter_time": 1.0384683837890625, "loss": 0.2124200463294983, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 18.83327564814607, "step_time": 0.7763884086608885} +{"epoch": 0, "iter": 14761, "iter_tflops": 16.242704307478625, "iter_time": 0.8650653839111327, "loss": 0.16379430890083313, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 17.066794377624724, "step_time": 0.823294692993164} +{"epoch": 0, "iter": 14762, "iter_tflops": 8.457423794499578, "iter_time": 1.6613807678222656, "loss": 0.2748696208000183, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 10.076005471177504, "step_time": 1.3945011520385742} +{"epoch": 0, "iter": 14763, "iter_tflops": 21.350332758491522, "iter_time": 0.6581162643432618, "loss": 0.2301478236913681, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 23.025713618483906, "step_time": 0.610230869293213} +{"epoch": 0, "iter": 14764, "iter_tflops": 23.780174859689385, "iter_time": 0.5908703918457032, "loss": 0.18385177850723267, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 25.41936345939829, "step_time": 0.5527676277160645} +{"epoch": 0, "iter": 14765, "iter_tflops": 37.427002093707436, "iter_time": 0.5512355346679688, "loss": 0.08464862406253815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08158504125405, "step_time": 0.49026417350769047} +{"epoch": 0, "iter": 14766, "iter_tflops": 39.55141337488868, "iter_time": 0.521627212524414, "loss": 0.11420832574367523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.64819006124303, "step_time": 0.4726677894592285} +{"epoch": 0, "iter": 14767, "iter_tflops": 40.0250119229733, "iter_time": 0.5154550247192383, "loss": 0.08498021215200424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.171167712052444, "step_time": 0.46707149887084953} +{"epoch": 0, "iter": 14768, "iter_tflops": 42.115899948455365, "iter_time": 0.4898647193908692, "loss": 0.17050662636756897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.261007559789654, "step_time": 0.4459715557098389} +{"epoch": 0, "iter": 14769, "iter_tflops": 22.43874096577607, "iter_time": 0.9194407806396484, "loss": 0.13066132366657257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.01769037621389, "step_time": 0.8589957313537598} +{"epoch": 0, "iter": 14770, "iter_tflops": 23.269385419153807, "iter_time": 0.8866196136474609, "loss": 0.1867884397506714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.135571040141357, "step_time": 0.7332743835449218} +{"epoch": 0, "iter": 14771, "iter_tflops": 47.84772690207963, "iter_time": 0.43118231201171875, "loss": 0.15521296858787537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.931414813075605, "step_time": 0.3972757835388184} +{"epoch": 0, "iter": 14772, "iter_tflops": 50.64172092570733, "iter_time": 0.4073932151794434, "loss": 0.120064876973629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.97828931909996, "step_time": 0.37525892066955563} +{"epoch": 0, "iter": 14773, "iter_tflops": 31.534668532377005, "iter_time": 0.6542353057861328, "loss": 0.2770044505596161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.49909048383662, "step_time": 0.6158702583312987} +{"epoch": 0, "iter": 14774, "iter_tflops": 23.091565215080017, "iter_time": 0.8934471664428711, "loss": 0.15386633574962616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.054270655691123, "step_time": 0.7918507404327393} +{"epoch": 0, "iter": 14775, "iter_tflops": 45.749079511338415, "iter_time": 0.45096193695068365, "loss": 0.1588928997516632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46852741982272, "step_time": 0.41705493545532235} +{"epoch": 0, "iter": 14776, "iter_tflops": 47.32472878079876, "iter_time": 0.43594742202758785, "loss": 0.20499438047409058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96745765306285, "step_time": 0.40478953552246094} +{"epoch": 0, "iter": 14777, "iter_tflops": 19.6232821496752, "iter_time": 1.051357940673828, "loss": 0.591284453868866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.444956927041975, "step_time": 1.009104278564453} +{"epoch": 0, "iter": 14778, "iter_tflops": 20.22122432568639, "iter_time": 1.0202692565917968, "loss": 0.8492829203605652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.236843273692124, "step_time": 0.8512285728454589} +{"epoch": 0, "iter": 14779, "iter_tflops": 37.04884327340307, "iter_time": 0.5568620147705078, "loss": 0.6269975900650024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.292881041018376, "step_time": 0.5120282535552978} +{"epoch": 0, "iter": 14780, "iter_tflops": 36.49404532242615, "iter_time": 0.5653276672363281, "loss": 0.5039326548576355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.6904083032833, "step_time": 0.5198004856109618} +{"epoch": 0, "iter": 14781, "iter_tflops": 24.208143300518206, "iter_time": 0.8522377471923828, "loss": 0.4762808680534363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.12303438297295, "step_time": 0.7897663497924805} +{"epoch": 0, "iter": 14782, "iter_tflops": 10.647050241527372, "iter_time": 1.9377285766601562, "loss": 0.7774083018302917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.995732202865796, "step_time": 1.587528366088867} +{"epoch": 0, "iter": 14783, "iter_tflops": 34.23142669646224, "iter_time": 0.6026945266723632, "loss": 0.7438408732414246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.26654917268734, "step_time": 0.5536089057922363} +{"epoch": 0, "iter": 14784, "iter_tflops": 35.15634122283735, "iter_time": 0.5868384704589843, "loss": 0.7161081433296204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.59946159496834, "step_time": 0.5344917430877686} +{"epoch": 0, "iter": 14785, "iter_tflops": 27.368588145204264, "iter_time": 0.7538238143920898, "loss": 0.0187864750623703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.382387369913502, "step_time": 0.7021585159301759} +{"epoch": 0, "iter": 14786, "iter_tflops": 9.342759571204647, "iter_time": 2.2082440795898437, "loss": 0.0020952012855559587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.226051695167584, "step_time": 1.6874698410034177} +{"epoch": 0, "iter": 14787, "iter_tflops": 10.688918764215925, "iter_time": 1.930138488769531, "loss": 0.003347292309626937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.610239289796793, "step_time": 1.6360588436126708} +{"epoch": 0, "iter": 14788, "iter_tflops": 45.28893715750945, "iter_time": 0.45554377746582037, "loss": 0.009580554440617561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.20890363774366, "step_time": 0.4109050788879394} +{"epoch": 0, "iter": 14789, "iter_tflops": 10.977416359262634, "iter_time": 1.4922735137939453, "loss": 0.22988085448741913, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 11.563713831454272, "step_time": 1.4166130294799806} +{"epoch": 0, "iter": 14790, "iter_tflops": 17.982069832645845, "iter_time": 0.910980094909668, "loss": 0.18311414122581482, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 22.322813445569086, "step_time": 0.7338370552062988} +{"epoch": 0, "iter": 14791, "iter_tflops": 27.396823213478413, "iter_time": 0.5979272689819336, "loss": 0.11813605576753616, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 29.167671977896592, "step_time": 0.5616254768371581} +{"epoch": 0, "iter": 14792, "iter_tflops": 31.05485471432159, "iter_time": 0.5274958724975587, "loss": 0.11046413332223892, "lr": 3e-05, "seqlen": 6544.0, "step_tflops": 32.97453243539322, "step_time": 0.49678665542602535} +{"epoch": 0, "iter": 14793, "iter_tflops": 34.27428800849156, "iter_time": 0.6019408340454101, "loss": 0.8375298976898193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.99198513296184, "step_time": 0.5577179336547852} +{"epoch": 0, "iter": 14794, "iter_tflops": 13.233044722891668, "iter_time": 1.5590587005615235, "loss": 0.6993116140365601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.049633002291067, "step_time": 1.285455780029297} +{"epoch": 0, "iter": 14795, "iter_tflops": 15.09233606008752, "iter_time": 1.366991394042969, "loss": 0.6832486987113953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.1019955115942, "step_time": 1.2063559188842774} +{"epoch": 0, "iter": 14796, "iter_tflops": 35.218176479381626, "iter_time": 0.5858081130981446, "loss": 0.6090377569198608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.3050897360452, "step_time": 0.5385992736816406} +{"epoch": 0, "iter": 14797, "iter_tflops": 21.380921815061203, "iter_time": 0.7201867904663085, "loss": 0.17341408133506775, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.292966506525875, "step_time": 0.6610689735412599} +{"epoch": 0, "iter": 14798, "iter_tflops": 22.035411398370364, "iter_time": 0.6987960052490234, "loss": 0.26384252309799194, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 23.77455600108183, "step_time": 0.6476780242919922} +{"epoch": 0, "iter": 14799, "iter_tflops": 23.83450722079051, "iter_time": 0.6460489120483398, "loss": 0.10498537868261337, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.639792776837844, "step_time": 0.6005609169006347} +{"epoch": 0, "iter": 14800, "iter_tflops": 25.071628083490676, "iter_time": 0.6141706237792969, "loss": 0.16362982988357544, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.887345334546183, "step_time": 0.5726953430175782} +{"epoch": 0, "iter": 14801, "iter_tflops": 18.906762761652217, "iter_time": 1.0912017974853516, "loss": 0.31973013281822205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.153187796518015, "step_time": 1.023713653564453} +{"epoch": 0, "iter": 14802, "iter_tflops": 15.89834498981828, "iter_time": 1.2976881256103516, "loss": 0.28156548738479614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.93501207265746, "step_time": 1.089573823928833} +{"epoch": 0, "iter": 14803, "iter_tflops": 38.55650092343525, "iter_time": 0.5350872879028321, "loss": 0.38407161831855774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.23713142615416, "step_time": 0.48845868110656737} +{"epoch": 0, "iter": 14804, "iter_tflops": 39.93160295852772, "iter_time": 0.516660789489746, "loss": 0.2298194020986557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.713482977486606, "step_time": 0.4719617862701416} +{"epoch": 0, "iter": 14805, "iter_tflops": 21.99482412254969, "iter_time": 0.9379976577758788, "loss": 0.5485735535621643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.699414516936937, "step_time": 0.8705317802429198} +{"epoch": 0, "iter": 14806, "iter_tflops": 12.571569074066387, "iter_time": 1.6410913696289062, "loss": 0.3566663861274719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.026223424604275, "step_time": 1.4708944015502932} +{"epoch": 0, "iter": 14807, "iter_tflops": 42.19298180068057, "iter_time": 0.4889697914123535, "loss": 0.7615022659301758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.31553114874675, "step_time": 0.4552764358520508} +{"epoch": 0, "iter": 14808, "iter_tflops": 44.587328768576526, "iter_time": 0.4627120323181152, "loss": 0.6599130630493164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.78722798805788, "step_time": 0.43172819137573243} +{"epoch": 0, "iter": 14809, "iter_tflops": 37.6654896645094, "iter_time": 0.5477452621459961, "loss": 0.2709047496318817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.55420683080821, "step_time": 0.5087288131713867} +{"epoch": 0, "iter": 14810, "iter_tflops": 18.15588309592975, "iter_time": 1.1363310394287107, "loss": 0.3542056381702423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.50820176689864, "step_time": 0.8088023490905762} +{"epoch": 0, "iter": 14811, "iter_tflops": 47.83335047193207, "iter_time": 0.4313119049072266, "loss": 0.34938517212867737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7636963710877, "step_time": 0.39856298828125} +{"epoch": 0, "iter": 14812, "iter_tflops": 44.1107870149117, "iter_time": 0.4677108459472656, "loss": 0.3107227683067322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.57393148977566, "step_time": 0.4336638336181641} +{"epoch": 0, "iter": 14813, "iter_tflops": 28.58844534016124, "iter_time": 0.721658462524414, "loss": 0.15549123287200928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.194916777063746, "step_time": 0.6832637977600098} +{"epoch": 0, "iter": 14814, "iter_tflops": 13.924274929557784, "iter_time": 1.4816637573242188, "loss": 0.1990198791027069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.440726223187763, "step_time": 1.2548772621154785} +{"epoch": 0, "iter": 14815, "iter_tflops": 50.81405604860966, "iter_time": 0.406011547088623, "loss": 0.19144633412361145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43778495915179, "step_time": 0.37214859008789064} +{"epoch": 0, "iter": 14816, "iter_tflops": 49.601608431499706, "iter_time": 0.41593597793579107, "loss": 0.24907872080802917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07355327500098, "step_time": 0.3815375957489014} +{"epoch": 0, "iter": 14817, "iter_tflops": 30.121714086069655, "iter_time": 0.6849242858886719, "loss": 0.06688756495714188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.99236052784197, "step_time": 0.6448756256103516} +{"epoch": 0, "iter": 14818, "iter_tflops": 15.148475809150286, "iter_time": 1.3619253692626951, "loss": 0.048309337347745895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.202619605857937, "step_time": 1.1992995243072508} +{"epoch": 0, "iter": 14819, "iter_tflops": 44.68164200721448, "iter_time": 0.4617353477478028, "loss": 0.12917321920394897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.27131587610561, "step_time": 0.41872422409057614} +{"epoch": 0, "iter": 14820, "iter_tflops": 44.45965621716589, "iter_time": 0.46404077911376956, "loss": 0.043455179780721664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.7513379318182, "step_time": 0.42319030380249023} +{"epoch": 0, "iter": 14821, "iter_tflops": 20.370986695826364, "iter_time": 1.012768493652344, "loss": 0.19949398934841156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.961398987351387, "step_time": 0.9394252853393554} +{"epoch": 0, "iter": 14822, "iter_tflops": 18.6718537532891, "iter_time": 1.104930114746094, "loss": 0.305546373128891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.956912756257488, "step_time": 0.898687629699707} +{"epoch": 0, "iter": 14823, "iter_tflops": 48.63073994848817, "iter_time": 0.42423976135253905, "loss": 0.20147180557250977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78238539986692, "step_time": 0.39087080574035643} +{"epoch": 0, "iter": 14824, "iter_tflops": 51.92509815094891, "iter_time": 0.39732411193847655, "loss": 0.2287951409816742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.28536132879225, "step_time": 0.3665445690155029} +{"epoch": 0, "iter": 14825, "iter_tflops": 45.459377177208204, "iter_time": 0.4538358154296875, "loss": 0.09594201296567917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.53643675298875, "step_time": 0.4164831962585449} +{"epoch": 0, "iter": 14826, "iter_tflops": 43.35369624743265, "iter_time": 0.47587853622436527, "loss": 0.08754000067710876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81076435421513, "step_time": 0.42267507553100586} +{"epoch": 0, "iter": 14827, "iter_tflops": 52.528645246110145, "iter_time": 0.3927589111328125, "loss": 0.09631085395812988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.30156304705124, "step_time": 0.3600441665649414} +{"epoch": 0, "iter": 14828, "iter_tflops": 51.98192255106913, "iter_time": 0.39688977432250977, "loss": 0.12649646401405334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.23573954093982, "step_time": 0.36686800384521484} +{"epoch": 0, "iter": 14829, "iter_tflops": 44.96433063038455, "iter_time": 0.4588324394226074, "loss": 0.43266451358795166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.069051070206115, "step_time": 0.42045022392272946} +{"epoch": 0, "iter": 14830, "iter_tflops": 9.375192462937349, "iter_time": 2.2006047973632814, "loss": 0.37788835167884827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.762953391521746, "step_time": 1.75390421295166} +{"epoch": 0, "iter": 14831, "iter_tflops": 11.501964885479799, "iter_time": 1.79370166015625, "loss": 0.28664156794548035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.210656245693759, "step_time": 1.5617008819580078} +{"epoch": 0, "iter": 14832, "iter_tflops": 16.592248633420095, "iter_time": 1.2434175720214844, "loss": 0.30062174797058105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.455766257654552, "step_time": 1.0085710430145265} +{"epoch": 0, "iter": 14833, "iter_tflops": 18.100151450233707, "iter_time": 0.9458693771362304, "loss": 0.28811678290367126, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 19.29235851052813, "step_time": 0.8874176254272461} +{"epoch": 0, "iter": 14834, "iter_tflops": 28.146645445040978, "iter_time": 0.608256462097168, "loss": 0.2083510309457779, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 30.18945015340148, "step_time": 0.5670980720520019} +{"epoch": 0, "iter": 14835, "iter_tflops": 29.05672042656285, "iter_time": 0.58920548248291, "loss": 0.21175044775009155, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 31.007802329447152, "step_time": 0.5521313247680665} +{"epoch": 0, "iter": 14836, "iter_tflops": 30.67674767868252, "iter_time": 0.5580897674560547, "loss": 0.1650507003068924, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 32.65503549214902, "step_time": 0.5242799072265625} +{"epoch": 0, "iter": 14837, "iter_tflops": 31.596968614245235, "iter_time": 0.6529453430175782, "loss": 0.18134479224681854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.67849949420883, "step_time": 0.6125894508361817} +{"epoch": 0, "iter": 14838, "iter_tflops": 18.486331783915038, "iter_time": 1.1160187835693358, "loss": 0.14259526133537292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.559494324521953, "step_time": 1.0034825363159179} +{"epoch": 0, "iter": 14839, "iter_tflops": 47.50410467086643, "iter_time": 0.4343012809753418, "loss": 0.16486681997776031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.813529768184, "step_time": 0.39817965698242186} +{"epoch": 0, "iter": 14840, "iter_tflops": 46.78384181210306, "iter_time": 0.4409875869750976, "loss": 0.17698313295841217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.60008361924593, "step_time": 0.4077284469604492} +{"epoch": 0, "iter": 14841, "iter_tflops": 25.219441624914843, "iter_time": 0.8180630569458007, "loss": 0.6315497756004333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.498379382703323, "step_time": 0.7785794448852539} +{"epoch": 0, "iter": 14842, "iter_tflops": 20.44060300323318, "iter_time": 1.009319221496582, "loss": 0.555768609046936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.168377299802085, "step_time": 0.8197228317260742} +{"epoch": 0, "iter": 14843, "iter_tflops": 36.75658745771909, "iter_time": 0.5612896881103515, "loss": 0.5147477388381958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.20228066109775, "step_time": 0.5131821670532226} +{"epoch": 0, "iter": 14844, "iter_tflops": 37.58503742628202, "iter_time": 0.5489177322387696, "loss": 0.4465029835700989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.82146842246893, "step_time": 0.5053981227874755} +{"epoch": 0, "iter": 14845, "iter_tflops": 23.544147967971902, "iter_time": 0.876272674560547, "loss": 0.006483756005764008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.150538050062213, "step_time": 0.8203042602539063} +{"epoch": 0, "iter": 14846, "iter_tflops": 8.189383648526743, "iter_time": 2.5192486267089844, "loss": 0.012613162398338318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.6217564999436, "step_time": 2.14421280670166} +{"epoch": 0, "iter": 14847, "iter_tflops": 12.920493737256612, "iter_time": 1.5967728424072265, "loss": 0.01141965389251709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.079920693872513, "step_time": 1.368116844177246} +{"epoch": 0, "iter": 14848, "iter_tflops": 31.47379682656127, "iter_time": 0.6555006256103515, "loss": 0.0021077794954180717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.725804870546035, "step_time": 0.39885495376586916} +{"epoch": 0, "iter": 14849, "iter_tflops": 18.52120272858661, "iter_time": 0.7564441528320314, "loss": 0.18260589241981506, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 19.69288751660825, "step_time": 0.7114373397827148} +{"epoch": 0, "iter": 14850, "iter_tflops": 10.132062713303007, "iter_time": 1.382764389038086, "loss": 0.08567611873149872, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 12.448211610145771, "step_time": 1.1254833984375} +{"epoch": 0, "iter": 14851, "iter_tflops": 21.097232013096452, "iter_time": 0.6640802688598633, "loss": 0.2802010476589203, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 22.791046798380236, "step_time": 0.6147262840270995} +{"epoch": 0, "iter": 14852, "iter_tflops": 18.148277896909246, "iter_time": 0.7719881515502929, "loss": 0.1274818480014801, "lr": 3e-05, "seqlen": 5616.0, "step_tflops": 19.614237590591955, "step_time": 0.7142900886535645} +{"epoch": 0, "iter": 14853, "iter_tflops": 17.252531983076945, "iter_time": 1.1958298950195312, "loss": 0.02947385422885418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.487152514991184, "step_time": 1.11596923828125} +{"epoch": 0, "iter": 14854, "iter_tflops": 17.515819623927595, "iter_time": 1.1778548736572267, "loss": 0.044273655861616135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.916398883108926, "step_time": 0.9863597278594971} +{"epoch": 0, "iter": 14855, "iter_tflops": 38.397253107003834, "iter_time": 0.5373064956665039, "loss": 0.06453973054885864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.98775067710739, "step_time": 0.49135981750488283} +{"epoch": 0, "iter": 14856, "iter_tflops": 40.21474855920001, "iter_time": 0.513023063659668, "loss": 0.10544644296169281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.30316212566673, "step_time": 0.465679931640625} +{"epoch": 0, "iter": 14857, "iter_tflops": 20.123189620579698, "iter_time": 1.025239730834961, "loss": 0.5867587327957153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.459531765905957, "step_time": 0.9613953247070313} +{"epoch": 0, "iter": 14858, "iter_tflops": 13.4500421858975, "iter_time": 1.533905487060547, "loss": 0.6165286898612976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.374393867846496, "step_time": 1.1874424896240234} +{"epoch": 0, "iter": 14859, "iter_tflops": 38.85053261991231, "iter_time": 0.53103759765625, "loss": 0.622388482093811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.560211952552, "step_time": 0.48475072288513177} +{"epoch": 0, "iter": 14860, "iter_tflops": 36.55735139472706, "iter_time": 0.5643486938476563, "loss": 0.5067847371101379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.70979247012512, "step_time": 0.5195467472076416} +{"epoch": 0, "iter": 14861, "iter_tflops": 16.17548878071724, "iter_time": 1.2754541015625, "loss": 0.36184531450271606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.495781338299917, "step_time": 1.1792038955688477} +{"epoch": 0, "iter": 14862, "iter_tflops": 29.979884817052792, "iter_time": 0.6881645355224608, "loss": 0.30672064423561096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.02704355914731, "step_time": 0.6246727313995362} +{"epoch": 0, "iter": 14863, "iter_tflops": 45.16999426399117, "iter_time": 0.4567433280944824, "loss": 0.3198052644729614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83710203775652, "step_time": 0.42244712829589837} +{"epoch": 0, "iter": 14864, "iter_tflops": 48.57011765531163, "iter_time": 0.42476927185058594, "loss": 0.46945545077323914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74206232340848, "step_time": 0.39116963958740236} +{"epoch": 0, "iter": 14865, "iter_tflops": 28.685396656522794, "iter_time": 0.7192193908691407, "loss": 0.18790949881076813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.498451567653557, "step_time": 0.676463638305664} +{"epoch": 0, "iter": 14866, "iter_tflops": 18.249917333951498, "iter_time": 1.1304759979248047, "loss": 0.12090089917182922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.597795937507858, "step_time": 1.0016165599822997} +{"epoch": 0, "iter": 14867, "iter_tflops": 40.55262407674199, "iter_time": 0.5087486686706543, "loss": 0.16422057151794434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73173893497987, "step_time": 0.4612182312011719} +{"epoch": 0, "iter": 14868, "iter_tflops": 44.16589767111479, "iter_time": 0.46712723159790037, "loss": 0.22436697781085968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.25280022456213, "step_time": 0.4275626163482666} +{"epoch": 0, "iter": 14869, "iter_tflops": 29.01881769080766, "iter_time": 0.7109556884765624, "loss": 0.048701971769332886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.463105978435117, "step_time": 0.6557233581542968} +{"epoch": 0, "iter": 14870, "iter_tflops": 12.123921083938303, "iter_time": 1.7016849060058594, "loss": 0.06229834258556366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.765563819864138, "step_time": 1.498746711730957} +{"epoch": 0, "iter": 14871, "iter_tflops": 12.216946211226826, "iter_time": 1.6887275390624998, "loss": 0.04743995517492294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.026584432972449, "step_time": 1.4708565444946289} +{"epoch": 0, "iter": 14872, "iter_tflops": 48.76278393445301, "iter_time": 0.42309096908569344, "loss": 0.031619030982255936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.26532730765968, "step_time": 0.37330989456176755} +{"epoch": 0, "iter": 14873, "iter_tflops": 16.92493716771586, "iter_time": 0.8880573196411131, "loss": 0.1682635098695755, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 17.835262305167, "step_time": 0.842730209350586} +{"epoch": 0, "iter": 14874, "iter_tflops": 9.153712913873555, "iter_time": 1.641991012573242, "loss": 0.2728387415409088, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 11.218557523812217, "step_time": 1.3397724533081055} +{"epoch": 0, "iter": 14875, "iter_tflops": 26.726495866306486, "iter_time": 0.5623750457763672, "loss": 0.29349905252456665, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.58251415114222, "step_time": 0.5258569717407227} +{"epoch": 0, "iter": 14876, "iter_tflops": 25.45217517482162, "iter_time": 0.5905316238403321, "loss": 0.1845439374446869, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.048021365295188, "step_time": 0.5556899757385254} +{"epoch": 0, "iter": 14877, "iter_tflops": 40.68090373464579, "iter_time": 0.5071444244384765, "loss": 0.06920535862445831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.499125454858195, "step_time": 0.4636291904449463} +{"epoch": 0, "iter": 14878, "iter_tflops": 9.696800313107929, "iter_time": 2.1276186828613284, "loss": 0.1196063905954361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.612782952424965, "step_time": 1.7765847854614258} +{"epoch": 0, "iter": 14879, "iter_tflops": 10.0136460825301, "iter_time": 2.0602978515625, "loss": 0.1201208233833313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.174037704635857, "step_time": 1.455555145263672} +{"epoch": 0, "iter": 14880, "iter_tflops": 32.65886890014531, "iter_time": 0.6317148818969727, "loss": 0.10687537491321564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.74491664391188, "step_time": 0.48265607070922845} +{"epoch": 0, "iter": 14881, "iter_tflops": 13.909502916794162, "iter_time": 1.0952713470458983, "loss": 0.3100574016571045, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 14.48971084586142, "step_time": 1.0514136657714845} +{"epoch": 0, "iter": 14882, "iter_tflops": 13.516406601017632, "iter_time": 1.127125015258789, "loss": 0.16598442196846008, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 16.456335874526555, "step_time": 0.9257637977600098} +{"epoch": 0, "iter": 14883, "iter_tflops": 24.39174459181471, "iter_time": 0.6245834503173828, "loss": 0.13291651010513306, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 25.931849579222508, "step_time": 0.5874891395568848} +{"epoch": 0, "iter": 14884, "iter_tflops": 29.136737915521426, "iter_time": 0.5228684158325195, "loss": 0.2669685184955597, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 30.945351831947562, "step_time": 0.492309154510498} +{"epoch": 0, "iter": 14885, "iter_tflops": 3.265121735169819, "iter_time": 0.5642385330200195, "loss": 0.3382902443408966, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 3.5377794947464634, "step_time": 0.5207524948120118} +{"epoch": 0, "iter": 14886, "iter_tflops": 3.8544609446899694, "iter_time": 0.47796761322021486, "loss": 0.3365585505962372, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.312785542980598, "step_time": 0.42717345428466796} +{"epoch": 0, "iter": 14887, "iter_tflops": 4.30440520911276, "iter_time": 0.428005126953125, "loss": 0.35489553213119507, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.649539310872676, "step_time": 0.39623441696166994} +{"epoch": 0, "iter": 14888, "iter_tflops": 4.527859410645671, "iter_time": 0.40688266372680665, "loss": 0.2897648513317108, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.8955633876637386, "step_time": 0.37632185554504394} +{"epoch": 0, "iter": 14889, "iter_tflops": 42.47571594337985, "iter_time": 0.48571502685546875, "loss": 0.5716898441314697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.330540761014916, "step_time": 0.4453022384643555} +{"epoch": 0, "iter": 14890, "iter_tflops": 42.79317766062929, "iter_time": 0.48211174392700196, "loss": 0.39316365122795105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45227386360719, "step_time": 0.44413527679443354} +{"epoch": 0, "iter": 14891, "iter_tflops": 43.35471661433763, "iter_time": 0.47586733627319333, "loss": 0.5549797415733337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.07418759172942, "step_time": 0.4382676486968994} +{"epoch": 0, "iter": 14892, "iter_tflops": 46.92314563958846, "iter_time": 0.43967839813232423, "loss": 0.45396631956100464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.73483688809564, "step_time": 0.4066455078125} +{"epoch": 0, "iter": 14893, "iter_tflops": 28.189042435973224, "iter_time": 0.7318834457397461, "loss": 0.0030244409572333097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.774533066585718, "step_time": 0.692910732269287} +{"epoch": 0, "iter": 14894, "iter_tflops": 22.509004663873384, "iter_time": 0.916570671081543, "loss": 0.0014067738084122539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.301290559262046, "step_time": 0.7289806613922118} +{"epoch": 0, "iter": 14895, "iter_tflops": 56.2734776420186, "iter_time": 0.3666219749450684, "loss": 0.009420128539204597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.57777383591411, "step_time": 0.33504123687744136} +{"epoch": 0, "iter": 14896, "iter_tflops": 56.85710215098559, "iter_time": 0.3628586883544922, "loss": 0.0062261223793029785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.56529575545918, "step_time": 0.3297529926300049} +{"epoch": 0, "iter": 14897, "iter_tflops": 31.09104108647609, "iter_time": 0.6635703659057617, "loss": 0.48164674639701843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.03741825237238, "step_time": 0.6244765663146972} +{"epoch": 0, "iter": 14898, "iter_tflops": 15.673481378693422, "iter_time": 1.3163057403564455, "loss": 0.49578359723091125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.0023126021679, "step_time": 1.0857148780822754} +{"epoch": 0, "iter": 14899, "iter_tflops": 38.98829816393331, "iter_time": 0.5291611709594727, "loss": 0.4192831516265869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.75366536322958, "step_time": 0.4825573043823242} +{"epoch": 0, "iter": 14900, "iter_tflops": 44.68406704724432, "iter_time": 0.4617102890014649, "loss": 0.2862453758716583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.68490749062998, "step_time": 0.42376774597167965} +{"epoch": 0, "iter": 14901, "iter_tflops": 10.337671542206238, "iter_time": 1.0961336975097657, "loss": 0.002020122017711401, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 11.205443219515466, "step_time": 1.0112469367980956} +{"epoch": 0, "iter": 14902, "iter_tflops": 16.607004759213186, "iter_time": 0.6823307571411132, "loss": 0.01582295261323452, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 18.62215048562576, "step_time": 0.608494176864624} +{"epoch": 0, "iter": 14903, "iter_tflops": 29.22138855897646, "iter_time": 0.3877800025939942, "loss": 0.0029525968711823225, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 32.46029936642253, "step_time": 0.3490870494842529} +{"epoch": 0, "iter": 14904, "iter_tflops": 26.797499497115993, "iter_time": 0.4228555030822754, "loss": 0.0010907358955591917, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 29.3558377934233, "step_time": 0.38600397682189935} +{"epoch": 0, "iter": 14905, "iter_tflops": 23.5349526109814, "iter_time": 0.8766150436401368, "loss": 0.31782665848731995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.94696832609711, "step_time": 0.8269980239868164} +{"epoch": 0, "iter": 14906, "iter_tflops": 17.791139675512046, "iter_time": 1.159627426147461, "loss": 0.2514810562133789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.41748511570928, "step_time": 0.8449311389923096} +{"epoch": 0, "iter": 14907, "iter_tflops": 45.4309341399583, "iter_time": 0.45411994934082034, "loss": 0.25270596146583557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.97961828110032, "step_time": 0.4212179317474365} +{"epoch": 0, "iter": 14908, "iter_tflops": 46.3061653599487, "iter_time": 0.4455366439819336, "loss": 0.24743397533893585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.14721909002974, "step_time": 0.41141052055358895} +{"epoch": 0, "iter": 14909, "iter_tflops": 30.144663807025946, "iter_time": 0.6844028396606444, "loss": 0.49032455682754517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.12230926038121, "step_time": 0.6422668228149414} +{"epoch": 0, "iter": 14910, "iter_tflops": 10.216754808063438, "iter_time": 2.019339202880859, "loss": 0.7265576720237732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.394846344559017, "step_time": 1.6644896545410157} +{"epoch": 0, "iter": 14911, "iter_tflops": 11.083095966099632, "iter_time": 1.861491912841797, "loss": 0.929461658000946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.331496269791625, "step_time": 1.4395631217956544} +{"epoch": 0, "iter": 14912, "iter_tflops": 41.03040834963503, "iter_time": 0.5028244743347168, "loss": 0.7634196281433105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.62775553719052, "step_time": 0.46229287719726564} +{"epoch": 0, "iter": 14913, "iter_tflops": 21.177191313174273, "iter_time": 0.7078113021850587, "loss": 0.2324972003698349, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 22.494972335613042, "step_time": 0.6663469123840332} +{"epoch": 0, "iter": 14914, "iter_tflops": 11.140113525890023, "iter_time": 1.3455388336181642, "loss": 0.15632431209087372, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 13.112560432768213, "step_time": 1.1431371803283692} +{"epoch": 0, "iter": 14915, "iter_tflops": 22.439511254950222, "iter_time": 0.6679938430786133, "loss": 0.20820677280426025, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.18483726322182, "step_time": 0.6197873153686523} +{"epoch": 0, "iter": 14916, "iter_tflops": 23.44150644593104, "iter_time": 0.6394407882690429, "loss": 0.20277781784534454, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 25.180285474355003, "step_time": 0.5952853622436524} +{"epoch": 0, "iter": 14917, "iter_tflops": 23.634559282253527, "iter_time": 0.8729205932617187, "loss": 0.01035258173942566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.58369830280851, "step_time": 0.8064156036376954} +{"epoch": 0, "iter": 14918, "iter_tflops": 40.51227899555153, "iter_time": 0.5092553176879883, "loss": 0.03857149928808212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72368220125837, "step_time": 0.4613013172149658} +{"epoch": 0, "iter": 14919, "iter_tflops": 47.71667305636586, "iter_time": 0.43236655426025394, "loss": 0.02220792882144451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41948265804635, "step_time": 0.3935768241882325} +{"epoch": 0, "iter": 14920, "iter_tflops": 47.487841105201916, "iter_time": 0.4344500198364258, "loss": 0.037269774824380875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.24537327976955, "step_time": 0.39488843154907227} +{"epoch": 0, "iter": 14921, "iter_tflops": 35.01444408103316, "iter_time": 0.589216651916504, "loss": 0.2697429955005646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.897640799268615, "step_time": 0.5303944683074951} +{"epoch": 0, "iter": 14922, "iter_tflops": 47.63540283968193, "iter_time": 0.4331042098999024, "loss": 0.19507303833961487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00583526543457, "step_time": 0.39670728111267095} +{"epoch": 0, "iter": 14923, "iter_tflops": 48.50887271119312, "iter_time": 0.4253055648803711, "loss": 0.2799057364463806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.772355172286645, "step_time": 0.39094509696960444} +{"epoch": 0, "iter": 14924, "iter_tflops": 49.17433809477071, "iter_time": 0.41954999923706054, "loss": 0.296459823846817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48216341388449, "step_time": 0.38575652503967284} +{"epoch": 0, "iter": 14925, "iter_tflops": 32.11008477683003, "iter_time": 0.6425113372802733, "loss": 0.1642855554819107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.15864231762736, "step_time": 0.6039787330627441} +{"epoch": 0, "iter": 14926, "iter_tflops": 17.90966930386257, "iter_time": 1.1519527893066406, "loss": 0.12785951793193817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.60999109642375, "step_time": 0.9547016201019286} +{"epoch": 0, "iter": 14927, "iter_tflops": 38.831153908034324, "iter_time": 0.5313026123046874, "loss": 0.13051430881023407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.452891983575356, "step_time": 0.48597616195678717} +{"epoch": 0, "iter": 14928, "iter_tflops": 33.923034138000055, "iter_time": 0.6081735916137696, "loss": 0.1579565554857254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.16318397387207, "step_time": 0.5551487064361572} +{"epoch": 0, "iter": 14929, "iter_tflops": 14.690274757867497, "iter_time": 1.4044048767089845, "loss": 0.15964867174625397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.6110465397542, "step_time": 1.3215701751708986} +{"epoch": 0, "iter": 14930, "iter_tflops": 18.15001357386701, "iter_time": 1.136698516845703, "loss": 0.18079623579978943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.721246701484286, "step_time": 0.7720857391357422} +{"epoch": 0, "iter": 14931, "iter_tflops": 38.10085658156907, "iter_time": 0.5414863433837891, "loss": 0.24944138526916504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58704200073353, "step_time": 0.4960942764282227} +{"epoch": 0, "iter": 14932, "iter_tflops": 40.0371294239834, "iter_time": 0.5152990188598633, "loss": 0.18096186220645905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87259255365656, "step_time": 0.4702501564025879} +{"epoch": 0, "iter": 14933, "iter_tflops": 32.05624118318802, "iter_time": 0.6435905380249023, "loss": 0.572640061378479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.45310331897216, "step_time": 0.5819263076782226} +{"epoch": 0, "iter": 14934, "iter_tflops": 34.44611300288145, "iter_time": 0.598938217163086, "loss": 0.6602610945701599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.08126643538812, "step_time": 0.5417649002075196} +{"epoch": 0, "iter": 14935, "iter_tflops": 38.84842287476969, "iter_time": 0.531066436767578, "loss": 0.420676052570343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.32210350205372, "step_time": 0.48747797966003426} +{"epoch": 0, "iter": 14936, "iter_tflops": 39.95440218131136, "iter_time": 0.516365966796875, "loss": 0.4599047005176544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32636406048023, "step_time": 0.4761787414550782} +{"epoch": 0, "iter": 14937, "iter_tflops": 16.599164424519167, "iter_time": 1.2428995208740234, "loss": 0.6769781708717346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.784324063046014, "step_time": 1.1600718383789062} +{"epoch": 0, "iter": 14938, "iter_tflops": 18.6813308020252, "iter_time": 1.1043695831298828, "loss": 0.5220496654510498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.421924036049635, "step_time": 0.7523576202392579} +{"epoch": 0, "iter": 14939, "iter_tflops": 37.878236272639384, "iter_time": 0.5446688003540039, "loss": 0.5272859334945679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.473915294140134, "step_time": 0.49744745254516604} +{"epoch": 0, "iter": 14940, "iter_tflops": 39.647824360381264, "iter_time": 0.5203587799072266, "loss": 0.5077048540115356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.52878129396326, "step_time": 0.47396441841125486} +{"epoch": 0, "iter": 14941, "iter_tflops": 16.84819586878458, "iter_time": 0.8122210617065428, "loss": 0.0044299340806901455, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 18.196028488039985, "step_time": 0.7520574913024903} +{"epoch": 0, "iter": 14942, "iter_tflops": 15.20140458216244, "iter_time": 0.900210205078125, "loss": 0.010823420248925686, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 18.796615770372778, "step_time": 0.728027837753296} +{"epoch": 0, "iter": 14943, "iter_tflops": 38.80048378024925, "iter_time": 0.35268785858154295, "loss": 0.02497231960296631, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 43.00399480897226, "step_time": 0.3182136821746826} +{"epoch": 0, "iter": 14944, "iter_tflops": 41.36422251089133, "iter_time": 0.3308283996582031, "loss": 0.001002238248474896, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 45.45440356062486, "step_time": 0.3010590496063233} +{"epoch": 0, "iter": 14945, "iter_tflops": 40.9710693054375, "iter_time": 0.5035527229309082, "loss": 0.6432863473892212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.34559833262456, "step_time": 0.46523430252075193} +{"epoch": 0, "iter": 14946, "iter_tflops": 40.024904102562, "iter_time": 0.5154564132690429, "loss": 0.5473142862319946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29150852010372, "step_time": 0.47656212997436526} +{"epoch": 0, "iter": 14947, "iter_tflops": 45.406412847259155, "iter_time": 0.45436519241333007, "loss": 0.6846246719360352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.08167991071135, "step_time": 0.420342041015625} +{"epoch": 0, "iter": 14948, "iter_tflops": 48.1799104852619, "iter_time": 0.4282094612121582, "loss": 0.5479645729064941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.78138579741664, "step_time": 0.3984268321990967} +{"epoch": 0, "iter": 14949, "iter_tflops": 30.59809010282398, "iter_time": 0.6742608261108398, "loss": 0.731046736240387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.88984721308253, "step_time": 0.6272784843444824} +{"epoch": 0, "iter": 14950, "iter_tflops": 12.814183971473186, "iter_time": 1.610020080566406, "loss": 0.6162164807319641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.94449302725616, "step_time": 1.2939322357177732} +{"epoch": 0, "iter": 14951, "iter_tflops": 16.89048845326531, "iter_time": 1.2214622192382811, "loss": 0.6109492182731628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.876479695107957, "step_time": 1.0379651641845704} +{"epoch": 0, "iter": 14952, "iter_tflops": 16.758011402845188, "iter_time": 1.2311182403564451, "loss": 0.6351319551467896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.752946041734756, "step_time": 1.0444565315246581} +{"epoch": 0, "iter": 14953, "iter_tflops": 14.685485044101322, "iter_time": 1.1546100616455077, "loss": 0.3053571581840515, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 15.613721990259888, "step_time": 1.085968406677246} +{"epoch": 0, "iter": 14954, "iter_tflops": 16.698893581348514, "iter_time": 1.0153971405029296, "loss": 0.23940013349056244, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 18.332197329254075, "step_time": 0.9249305191040039} +{"epoch": 0, "iter": 14955, "iter_tflops": 23.303132228691354, "iter_time": 0.7276278839111328, "loss": 0.2948158383369446, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 25.063101957379153, "step_time": 0.676532730102539} +{"epoch": 0, "iter": 14956, "iter_tflops": 25.886281012143183, "iter_time": 0.6550191116333007, "loss": 0.2771516740322113, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 27.743436066276317, "step_time": 0.6111719093322754} +{"epoch": 0, "iter": 14957, "iter_tflops": 17.258083336063333, "iter_time": 1.1954452362060548, "loss": 0.12135351449251175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.597782454364435, "step_time": 1.1093308334350587} +{"epoch": 0, "iter": 14958, "iter_tflops": 23.915927928182025, "iter_time": 0.8626507644653321, "loss": 0.12676823139190674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.912227143319914, "step_time": 0.7666066951751709} +{"epoch": 0, "iter": 14959, "iter_tflops": 41.75583173155064, "iter_time": 0.49408891296386714, "loss": 0.1716972142457962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83596924515072, "step_time": 0.45010706329345707} +{"epoch": 0, "iter": 14960, "iter_tflops": 42.517113329768264, "iter_time": 0.48524210357666014, "loss": 0.15449102222919464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6052227094646, "step_time": 0.4426777153015137} +{"epoch": 0, "iter": 14961, "iter_tflops": 20.677729779690615, "iter_time": 0.9977446136474609, "loss": 0.16857416927814484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.212190265829122, "step_time": 0.9288185119628907} +{"epoch": 0, "iter": 14962, "iter_tflops": 12.205083820316919, "iter_time": 1.6903688507080077, "loss": 0.29332128167152405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.20037550561235, "step_time": 1.2734947719573975} +{"epoch": 0, "iter": 14963, "iter_tflops": 38.94393540919012, "iter_time": 0.5297639617919923, "loss": 0.20877783000469208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89713412144649, "step_time": 0.4809433994293213} +{"epoch": 0, "iter": 14964, "iter_tflops": 41.217693848083606, "iter_time": 0.5005397338867187, "loss": 0.2857343852519989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.77004072905993, "step_time": 0.46082364845275886} +{"epoch": 0, "iter": 14965, "iter_tflops": 30.67457530328637, "iter_time": 0.6725795974731446, "loss": 0.03792708367109299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.31456117745668, "step_time": 0.6192815628051758} +{"epoch": 0, "iter": 14966, "iter_tflops": 10.569083813019098, "iter_time": 1.9520228881835937, "loss": 0.04119453951716423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.88876997497458, "step_time": 1.7353429794311521} +{"epoch": 0, "iter": 14967, "iter_tflops": 12.33011527042672, "iter_time": 1.6732279510498047, "loss": 0.030104123055934906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.689272880050941, "step_time": 1.4045006637573243} +{"epoch": 0, "iter": 14968, "iter_tflops": 20.164267457715727, "iter_time": 1.023151153564453, "loss": 0.04798562824726105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.891881818714243, "step_time": 0.8635189838409424} +{"epoch": 0, "iter": 14969, "iter_tflops": 19.232967675725153, "iter_time": 0.847465835571289, "loss": 0.1840769201517105, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 20.173986373217762, "step_time": 0.8079356613159179} +{"epoch": 0, "iter": 14970, "iter_tflops": 9.07715343088535, "iter_time": 1.795638153076172, "loss": 0.33635297417640686, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 11.963073152171475, "step_time": 1.3624662170410156} +{"epoch": 0, "iter": 14971, "iter_tflops": 25.115124321127695, "iter_time": 0.6489827728271484, "loss": 0.20534589886665344, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.986829523914967, "step_time": 0.6039717636108399} +{"epoch": 0, "iter": 14972, "iter_tflops": 25.772788077519458, "iter_time": 0.6324221878051758, "loss": 0.2297363579273224, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.67012357503241, "step_time": 0.5890571098327636} +{"epoch": 0, "iter": 14973, "iter_tflops": 18.830595921086573, "iter_time": 1.0956155395507814, "loss": 0.7244667410850525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.86413937967342, "step_time": 1.0386099853515625} +{"epoch": 0, "iter": 14974, "iter_tflops": 8.57485640102482, "iter_time": 2.405998718261719, "loss": 0.6373134851455688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.161641863230482, "step_time": 1.8483923568725587} +{"epoch": 0, "iter": 14975, "iter_tflops": 13.072541738512742, "iter_time": 1.5782006225585936, "loss": 0.5597742795944214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.41216329537122, "step_time": 1.3386241188049315} +{"epoch": 0, "iter": 14976, "iter_tflops": 34.59129999731364, "iter_time": 0.5964243469238282, "loss": 0.6293362975120544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.63499310305593, "step_time": 0.5481891136169433} +{"epoch": 0, "iter": 14977, "iter_tflops": 17.439101694904785, "iter_time": 0.8267584762573243, "loss": 0.145631343126297, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 18.648159330029728, "step_time": 0.773155403137207} +{"epoch": 0, "iter": 14978, "iter_tflops": 25.572299504028912, "iter_time": 0.5638102722167969, "loss": 0.2344137579202652, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 27.350771149935337, "step_time": 0.5271487617492675} +{"epoch": 0, "iter": 14979, "iter_tflops": 27.006406108783438, "iter_time": 0.5338705596923828, "loss": 0.16348354518413544, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 28.758897113246782, "step_time": 0.5013379020690918} +{"epoch": 0, "iter": 14980, "iter_tflops": 26.22243435803046, "iter_time": 0.5498316802978516, "loss": 0.0892309918999672, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 27.88926001122921, "step_time": 0.516970516204834} +{"epoch": 0, "iter": 14981, "iter_tflops": 23.218767904344666, "iter_time": 0.8885524673461913, "loss": 0.5299715399742126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.333663122360424, "step_time": 0.8478416671752931} +{"epoch": 0, "iter": 14982, "iter_tflops": 14.72666056614389, "iter_time": 1.4009349517822267, "loss": 0.7837686538696289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.174854643042575, "step_time": 1.1351448974609375} +{"epoch": 0, "iter": 14983, "iter_tflops": 44.713738142342095, "iter_time": 0.46140390777587886, "loss": 0.535750150680542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.228729959321306, "step_time": 0.42777600669860844} +{"epoch": 0, "iter": 14984, "iter_tflops": 42.36150511592222, "iter_time": 0.4870245628356934, "loss": 0.603237509727478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15320387115901, "step_time": 0.45691316986083985} +{"epoch": 0, "iter": 14985, "iter_tflops": 29.129697078532807, "iter_time": 0.7082495040893555, "loss": 0.6367056369781494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.90380400716071, "step_time": 0.6675907440185547} +{"epoch": 0, "iter": 14986, "iter_tflops": 15.843864228123419, "iter_time": 1.3021503601074218, "loss": 0.8874491453170776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.241785015994832, "step_time": 1.0722026824951172} +{"epoch": 0, "iter": 14987, "iter_tflops": 37.60575181490083, "iter_time": 0.5486153717041016, "loss": 0.6416134834289551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87220981587134, "step_time": 0.5047706890106202} +{"epoch": 0, "iter": 14988, "iter_tflops": 38.46239619014766, "iter_time": 0.536396469116211, "loss": 0.8895583748817444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95941560949012, "step_time": 0.49169163131713867} +{"epoch": 0, "iter": 14989, "iter_tflops": 27.78687362810718, "iter_time": 0.7424762420654296, "loss": 0.3793083429336548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.157757005269527, "step_time": 0.6841057014465332} +{"epoch": 0, "iter": 14990, "iter_tflops": 8.966290029521486, "iter_time": 2.300962097167969, "loss": 0.3210280239582062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.603565022121469, "step_time": 1.9456752014160157} +{"epoch": 0, "iter": 14991, "iter_tflops": 12.252280643883703, "iter_time": 1.6838574066162109, "loss": 0.3498430848121643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.62660057389809, "step_time": 1.4105186920166017} +{"epoch": 0, "iter": 14992, "iter_tflops": 39.15697785357716, "iter_time": 0.5268816604614257, "loss": 0.39091363549232483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.612797482101115, "step_time": 0.4076260261535644} +{"epoch": 0, "iter": 14993, "iter_tflops": 15.363705082602245, "iter_time": 1.0582263641357423, "loss": 0.2319636344909668, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 16.0880884710731, "step_time": 1.0105785903930664} +{"epoch": 0, "iter": 14994, "iter_tflops": 10.077166246224627, "iter_time": 1.613377944946289, "loss": 0.2278336137533188, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 12.51226193674295, "step_time": 1.299387580871582} +{"epoch": 0, "iter": 14995, "iter_tflops": 28.75401136770758, "iter_time": 0.5654264221191407, "loss": 0.15838193893432617, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 30.724388316009282, "step_time": 0.5291652221679688} +{"epoch": 0, "iter": 14996, "iter_tflops": 29.31977528868635, "iter_time": 0.554515769958496, "loss": 0.24434129893779755, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 31.24298119516599, "step_time": 0.5203817672729493} +{"epoch": 0, "iter": 14997, "iter_tflops": 41.40347367837959, "iter_time": 0.46927194976806635, "loss": 0.0011422177776694298, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 45.48793915760882, "step_time": 0.4271349544525147} +{"epoch": 0, "iter": 14998, "iter_tflops": 40.61810360376338, "iter_time": 0.47834554290771486, "loss": 0.017227597534656525, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 45.0654126244702, "step_time": 0.4311397075653077} +{"epoch": 0, "iter": 14999, "iter_tflops": 41.6771886129844, "iter_time": 0.4661900062561035, "loss": 0.03892134130001068, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 46.14204989149291, "step_time": 0.42107987976074224} +{"epoch": 0, "iter": 15000, "iter_tflops": 4.1674627276680924, "iter_time": 4.662186584472656, "loss": 0.06380201876163483, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 4.202280551912841, "step_time": 4.623558227539062} +{"epoch": 0, "iter": 15001, "iter_tflops": 9.400702062475025, "iter_time": 2.194633270263672, "loss": 0.04931752383708954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.781213845268308, "step_time": 2.1092569732666018} +{"epoch": 0, "iter": 15002, "iter_tflops": 24.536457512400506, "iter_time": 0.8408342361450196, "loss": 0.02833530493080616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.544973240332393, "step_time": 0.6982945404052734} +{"epoch": 0, "iter": 15003, "iter_tflops": 22.18697383999639, "iter_time": 0.929874153137207, "loss": 0.03801792487502098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.780217311252294, "step_time": 0.8002684097290038} +{"epoch": 0, "iter": 15004, "iter_tflops": 26.72193007263092, "iter_time": 0.7720659942626953, "loss": 0.03404484689235687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.622278854050215, "step_time": 0.7208054122924804} +{"epoch": 0, "iter": 15005, "iter_tflops": 14.700436432528237, "iter_time": 1.4034340820312499, "loss": 0.2590075135231018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.974133066984205, "step_time": 1.2915313415527345} +{"epoch": 0, "iter": 15006, "iter_tflops": 39.82203024773409, "iter_time": 0.5180824127197265, "loss": 0.21311286091804504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.729356356349676, "step_time": 0.47179046821594234} +{"epoch": 0, "iter": 15007, "iter_tflops": 37.499879020918364, "iter_time": 0.5501642684936523, "loss": 0.21279403567314148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.821304042932425, "step_time": 0.5054001579284668} +{"epoch": 0, "iter": 15008, "iter_tflops": 30.047367145196564, "iter_time": 0.686619010925293, "loss": 0.2141597419977188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.36055005304827, "step_time": 0.6375384063720703} +{"epoch": 0, "iter": 15009, "iter_tflops": 7.573392279027896, "iter_time": 2.724154876708984, "loss": 0.35731565952301025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.92865269497678, "step_time": 2.602093231201172} +{"epoch": 0, "iter": 15010, "iter_tflops": 26.479665090294624, "iter_time": 0.7791296997070313, "loss": 0.24831773340702057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.132668656089027, "step_time": 0.6846752853393555} +{"epoch": 0, "iter": 15011, "iter_tflops": 30.039400722480686, "iter_time": 0.6868011016845702, "loss": 0.25206005573272705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.15164325278579, "step_time": 0.5869169006347656} +{"epoch": 0, "iter": 15012, "iter_tflops": 30.155462903146688, "iter_time": 0.6841577453613281, "loss": 0.3780876100063324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.68121015516823, "step_time": 0.6125401496887207} +{"epoch": 0, "iter": 15013, "iter_tflops": 23.654929546401313, "iter_time": 0.8721688842773438, "loss": 0.009690787643194199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.396099054974528, "step_time": 0.7815963058471679} +{"epoch": 0, "iter": 15014, "iter_tflops": 23.85975179074704, "iter_time": 0.8646818161010742, "loss": 0.0072963787242770195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.09952730095704, "step_time": 0.7613082427978516} +{"epoch": 0, "iter": 15015, "iter_tflops": 23.87434511686429, "iter_time": 0.8641532745361328, "loss": 0.005095833446830511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.211548311561586, "step_time": 0.758174186706543} +{"epoch": 0, "iter": 15016, "iter_tflops": 25.278302910614507, "iter_time": 0.8161581726074219, "loss": 0.0039470987394452095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.196197346782785, "step_time": 0.7066363220214844} +{"epoch": 0, "iter": 15017, "iter_tflops": 10.546161402482882, "iter_time": 1.9562656707763673, "loss": 0.5747466683387756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.25554762526958, "step_time": 1.8329710998535156} +{"epoch": 0, "iter": 15018, "iter_tflops": 22.587287317328375, "iter_time": 0.9133940353393555, "loss": 0.6991448402404785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.808499980408794, "step_time": 0.7695728416442871} +{"epoch": 0, "iter": 15019, "iter_tflops": 28.241924895087724, "iter_time": 0.7305130081176758, "loss": 0.8012831807136536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.382740898697786, "step_time": 0.6790399055480957} +{"epoch": 0, "iter": 15020, "iter_tflops": 25.42828621529239, "iter_time": 0.8113442382812499, "loss": 0.6118833422660828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.978019571270238, "step_time": 0.7119566421508788} +{"epoch": 0, "iter": 15021, "iter_tflops": 11.53038347899436, "iter_time": 1.7892807769775392, "loss": 0.6523439884185791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.305306072255739, "step_time": 1.6766014099121092} +{"epoch": 0, "iter": 15022, "iter_tflops": 38.981162673846406, "iter_time": 0.5292580337524414, "loss": 0.6309383511543274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99010627112646, "step_time": 0.4799032917022705} +{"epoch": 0, "iter": 15023, "iter_tflops": 45.418789760246035, "iter_time": 0.4542413749694824, "loss": 0.7041149735450745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.99258457336143, "step_time": 0.42110645294189447} +{"epoch": 0, "iter": 15024, "iter_tflops": 31.32354274334944, "iter_time": 0.6586449584960937, "loss": 0.665050745010376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.886982089714735, "step_time": 0.5593055419921874} +{"epoch": 0, "iter": 15025, "iter_tflops": 14.93853396285539, "iter_time": 1.3810654754638674, "loss": 0.7100265622138977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.393554627563695, "step_time": 1.2584881057739257} +{"epoch": 0, "iter": 15026, "iter_tflops": 20.57751726750439, "iter_time": 1.0026036300659178, "loss": 0.7020798325538635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.920824034658068, "step_time": 0.8624741973876954} +{"epoch": 0, "iter": 15027, "iter_tflops": 33.982323123265694, "iter_time": 0.6071125106811522, "loss": 0.6300539970397949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.92023633748831, "step_time": 0.5440655307769775} +{"epoch": 0, "iter": 15028, "iter_tflops": 24.404919856487957, "iter_time": 0.8453661651611328, "loss": 0.6567398309707642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.19396358299177, "step_time": 0.7876277847290039} +{"epoch": 0, "iter": 15029, "iter_tflops": 9.368622810952937, "iter_time": 2.20214794921875, "loss": 0.5158964395523071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.976305482939052, "step_time": 2.0680093994140627} +{"epoch": 0, "iter": 15030, "iter_tflops": 26.610788031469063, "iter_time": 0.7752905883789063, "loss": 0.40381672978401184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.55019351419488, "step_time": 0.6753179321289062} +{"epoch": 0, "iter": 15031, "iter_tflops": 42.553470511622265, "iter_time": 0.4848275184631348, "loss": 0.42721834778785706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91909328357281, "step_time": 0.4217390823364258} +{"epoch": 0, "iter": 15032, "iter_tflops": 39.001046931495, "iter_time": 0.5289881973266601, "loss": 0.38316890597343445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.05098825626489, "step_time": 0.49062089538574216} +{"epoch": 0, "iter": 15033, "iter_tflops": 26.966687719475743, "iter_time": 0.7650584945678711, "loss": 0.0996592789888382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.786055224015346, "step_time": 0.7167044372558593} +{"epoch": 0, "iter": 15034, "iter_tflops": 26.45347663208344, "iter_time": 0.779901023864746, "loss": 0.09221232682466507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.949894181226227, "step_time": 0.6665965766906738} +{"epoch": 0, "iter": 15035, "iter_tflops": 33.12461220125176, "iter_time": 0.6228327560424805, "loss": 0.0767507329583168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.129604174023264, "step_time": 0.5556507797241211} +{"epoch": 0, "iter": 15036, "iter_tflops": 49.04691935970893, "iter_time": 0.42063994598388665, "loss": 0.06670621037483215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.41964710222022, "step_time": 0.37227038764953607} +{"epoch": 0, "iter": 15037, "iter_tflops": 16.38483866402082, "iter_time": 1.2591575622558595, "loss": 0.3698384165763855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.908514190116726, "step_time": 1.2201600494384766} +{"epoch": 0, "iter": 15038, "iter_tflops": 21.329186127216598, "iter_time": 0.9672705459594727, "loss": 0.40907037258148193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.040050280151956, "step_time": 0.8239238052368164} +{"epoch": 0, "iter": 15039, "iter_tflops": 19.725450883461207, "iter_time": 1.0459123916625976, "loss": 0.36667436361312866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.089122938911622, "step_time": 0.8935416717529298} +{"epoch": 0, "iter": 15040, "iter_tflops": 29.303776926402694, "iter_time": 0.7040421295166016, "loss": 0.23867569863796234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.79093080474703, "step_time": 0.6105512046813965} +{"epoch": 0, "iter": 15041, "iter_tflops": 10.12478821212527, "iter_time": 2.0376814880371095, "loss": 0.12331453710794449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.418118068174458, "step_time": 1.9803090515136719} +{"epoch": 0, "iter": 15042, "iter_tflops": 25.87816587895143, "iter_time": 0.7972394027709961, "loss": 0.16253992915153503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.201013357827584, "step_time": 0.7065197792053222} +{"epoch": 0, "iter": 15043, "iter_tflops": 27.41898036034675, "iter_time": 0.7524383926391601, "loss": 0.12441230565309525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14442117049983, "step_time": 0.6418250122070313} +{"epoch": 0, "iter": 15044, "iter_tflops": 34.94320193812772, "iter_time": 0.5904179458618164, "loss": 0.12279345840215683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54501748218118, "step_time": 0.5088441143035889} +{"epoch": 0, "iter": 15045, "iter_tflops": 8.017979893837301, "iter_time": 2.2738710632324217, "loss": 0.05671505257487297, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 8.456540351241836, "step_time": 2.1559469604492185} +{"epoch": 0, "iter": 15046, "iter_tflops": 18.44487578034217, "iter_time": 0.988450813293457, "loss": 0.12096193432807922, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 21.26302980451917, "step_time": 0.8574437713623047} +{"epoch": 0, "iter": 15047, "iter_tflops": 23.192565942202, "iter_time": 0.7861076049804688, "loss": 0.055691104382276535, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 26.494931256409608, "step_time": 0.6881260528564453} +{"epoch": 0, "iter": 15048, "iter_tflops": 34.36164631911312, "iter_time": 0.5305872802734375, "loss": 0.06630557775497437, "lr": 3e-05, "seqlen": 7264.0, "step_tflops": 37.86055780937539, "step_time": 0.4815526638031006} +{"epoch": 0, "iter": 15049, "iter_tflops": 18.842611775573328, "iter_time": 1.0949168701171874, "loss": 0.7514153718948364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.14391557383035, "step_time": 1.024184867858887} +{"epoch": 0, "iter": 15050, "iter_tflops": 20.006125840007076, "iter_time": 1.0312388153076173, "loss": 0.6153971552848816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.46823462790308, "step_time": 0.8791071777343751} +{"epoch": 0, "iter": 15051, "iter_tflops": 21.071445928560312, "iter_time": 0.979101936340332, "loss": 0.6091945767402649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.132558509734857, "step_time": 0.8549070129394533} +{"epoch": 0, "iter": 15052, "iter_tflops": 22.415112022950296, "iter_time": 0.920410011291504, "loss": 0.8078266978263855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.471485065383845, "step_time": 0.8099682235717773} +{"epoch": 0, "iter": 15053, "iter_tflops": 10.752596761018012, "iter_time": 1.9187080078124998, "loss": 0.5552155375480652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.72308924256994, "step_time": 1.7598683319091797} +{"epoch": 0, "iter": 15054, "iter_tflops": 25.198504043676166, "iter_time": 0.818742790222168, "loss": 0.5857813358306885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.2561017632128, "step_time": 0.7051894226074218} +{"epoch": 0, "iter": 15055, "iter_tflops": 26.65417458843323, "iter_time": 0.7740286026000978, "loss": 0.7308133840560913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.523965709574078, "step_time": 0.6758982009887695} +{"epoch": 0, "iter": 15056, "iter_tflops": 23.68592118817721, "iter_time": 0.871027702331543, "loss": 0.5481520295143127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.53958688768111, "step_time": 0.7491431732177734} +{"epoch": 0, "iter": 15057, "iter_tflops": 9.398068323843711, "iter_time": 1.8873495025634766, "loss": 0.06600724905729294, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 10.095180819595235, "step_time": 1.757020492553711} +{"epoch": 0, "iter": 15058, "iter_tflops": 31.535628040919836, "iter_time": 0.5624571533203125, "loss": 0.05120228976011276, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 34.861484768751765, "step_time": 0.5087975940704345} +{"epoch": 0, "iter": 15059, "iter_tflops": 35.553301915468694, "iter_time": 0.49889710998535153, "loss": 0.06328526139259338, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 39.17609243166847, "step_time": 0.4527618370056152} +{"epoch": 0, "iter": 15060, "iter_tflops": 37.95671494890379, "iter_time": 0.4673070259094238, "loss": 0.043597690761089325, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 41.50249406068165, "step_time": 0.4273824977874756} +{"epoch": 0, "iter": 15061, "iter_tflops": 23.97666492060198, "iter_time": 0.796554069519043, "loss": 0.08495501428842545, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 25.649375151038214, "step_time": 0.7446072235107422} +{"epoch": 0, "iter": 15062, "iter_tflops": 10.632348693027264, "iter_time": 1.7962832641601565, "loss": 0.07045460492372513, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 12.004111220459126, "step_time": 1.5910140838623046} +{"epoch": 0, "iter": 15063, "iter_tflops": 9.579382665791645, "iter_time": 1.9937307739257812, "loss": 0.07962285727262497, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 13.31132749813419, "step_time": 1.434771251678467} +{"epoch": 0, "iter": 15064, "iter_tflops": 50.83045846334254, "iter_time": 0.3757335777282715, "loss": 0.12101885676383972, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 55.812826120114714, "step_time": 0.3421921329498291} +{"epoch": 0, "iter": 15065, "iter_tflops": 16.296169901619155, "iter_time": 0.9398809280395508, "loss": 0.18682417273521423, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 17.05274521378681, "step_time": 0.8981814422607423} +{"epoch": 0, "iter": 15066, "iter_tflops": 11.316779877031902, "iter_time": 1.3534291076660154, "loss": 0.11030649393796921, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 15.57660138028277, "step_time": 0.9832991752624511} +{"epoch": 0, "iter": 15067, "iter_tflops": 26.309156312748375, "iter_time": 0.5821721954345703, "loss": 0.1724054217338562, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.019013069130846, "step_time": 0.5466452102661132} +{"epoch": 0, "iter": 15068, "iter_tflops": 27.166133452159897, "iter_time": 0.563807113647461, "loss": 0.15253475308418274, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 28.937120361670516, "step_time": 0.5293014335632324} +{"epoch": 0, "iter": 15069, "iter_tflops": 27.408626770145684, "iter_time": 0.7527226257324219, "loss": 0.38345569372177124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.981924716661723, "step_time": 0.7118607101440431} +{"epoch": 0, "iter": 15070, "iter_tflops": 10.119413252777933, "iter_time": 2.038763809204102, "loss": 0.3945070505142212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.979689837704193, "step_time": 1.5894904861450196} +{"epoch": 0, "iter": 15071, "iter_tflops": 11.854427166459011, "iter_time": 1.740370346069336, "loss": 0.34433165192604065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.962034394086276, "step_time": 1.4776566886901856} +{"epoch": 0, "iter": 15072, "iter_tflops": 20.690533014250544, "iter_time": 0.9971272125244142, "loss": 0.37124285101890564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.699773844381664, "step_time": 0.7727066764831544} +{"epoch": 0, "iter": 15073, "iter_tflops": 16.392070334602476, "iter_time": 0.8845445022583007, "loss": 0.2575637400150299, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 17.561089759245775, "step_time": 0.8256614990234376} +{"epoch": 0, "iter": 15074, "iter_tflops": 7.516832384052441, "iter_time": 1.9289396057128907, "loss": 0.2061045616865158, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 8.607215914460472, "step_time": 1.6845767364501953} +{"epoch": 0, "iter": 15075, "iter_tflops": 6.649569375661953, "iter_time": 2.180519500732422, "loss": 0.18278688192367554, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 7.887377672080646, "step_time": 1.8383189315795898} +{"epoch": 0, "iter": 15076, "iter_tflops": 24.52034774517089, "iter_time": 0.5913258590698243, "loss": 0.20335999131202698, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.119517584383328, "step_time": 0.5551218795776367} +{"epoch": 0, "iter": 15077, "iter_tflops": 16.854060703865358, "iter_time": 0.8820964279174804, "loss": 0.20246270298957825, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.76087872989886, "step_time": 0.8370591888427734} +{"epoch": 0, "iter": 15078, "iter_tflops": 9.699682239800373, "iter_time": 1.5327210083007812, "loss": 0.21070384979248047, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 11.541566643774976, "step_time": 1.2881186065673829} +{"epoch": 0, "iter": 15079, "iter_tflops": 23.14574937452534, "iter_time": 0.6423169326782227, "loss": 0.2179308384656906, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.923724600455365, "step_time": 0.5964961891174316} +{"epoch": 0, "iter": 15080, "iter_tflops": 23.030564225986875, "iter_time": 0.6455294189453125, "loss": 0.26869702339172363, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.503995151578483, "step_time": 0.6067135848999023} +{"epoch": 0, "iter": 15081, "iter_tflops": 30.864048347812798, "iter_time": 0.6684506607055664, "loss": 0.5158066153526306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.1622490776604, "step_time": 0.6039149665832519} +{"epoch": 0, "iter": 15082, "iter_tflops": 34.185503938606374, "iter_time": 0.6035041503906251, "loss": 0.6593971252441406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.299437774132954, "step_time": 0.5531207637786865} +{"epoch": 0, "iter": 15083, "iter_tflops": 37.11613936514257, "iter_time": 0.5558523559570312, "loss": 0.571448564529419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.17885388226783, "step_time": 0.5134813842773437} +{"epoch": 0, "iter": 15084, "iter_tflops": 37.11121423912148, "iter_time": 0.5559261245727539, "loss": 0.4922914505004883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.9040512713593, "step_time": 0.5170175170898438} +{"epoch": 0, "iter": 15085, "iter_tflops": 17.003493752353613, "iter_time": 1.2133443756103517, "loss": 0.14443370699882507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.881177256464017, "step_time": 1.1537883224487304} +{"epoch": 0, "iter": 15086, "iter_tflops": 25.82680503216457, "iter_time": 0.7988248443603516, "loss": 0.08280881494283676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.5182814612344, "step_time": 0.6344459972381593} +{"epoch": 0, "iter": 15087, "iter_tflops": 44.73132715942586, "iter_time": 0.4612224769592286, "loss": 0.11101480573415756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.651902353600434, "step_time": 0.4240552272796631} +{"epoch": 0, "iter": 15088, "iter_tflops": 43.175417471114415, "iter_time": 0.477843521118164, "loss": 0.13039642572402954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.60262541699255, "step_time": 0.43340242958068853} +{"epoch": 0, "iter": 15089, "iter_tflops": 29.336833243902422, "iter_time": 0.7032488250732422, "loss": 0.5902777314186096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.189593011320895, "step_time": 0.6409243354797364} +{"epoch": 0, "iter": 15090, "iter_tflops": 37.590420940350036, "iter_time": 0.5488391189575196, "loss": 0.5034935474395752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.28377397535898, "step_time": 0.49973855400085454} +{"epoch": 0, "iter": 15091, "iter_tflops": 42.08567294406114, "iter_time": 0.49021655273437503, "loss": 0.8030356764793396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.0236255422633, "step_time": 0.44827180099487307} +{"epoch": 0, "iter": 15092, "iter_tflops": 39.3340600382841, "iter_time": 0.524509635925293, "loss": 0.6586799621582031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.902310754544935, "step_time": 0.480885368347168} +{"epoch": 0, "iter": 15093, "iter_tflops": 20.935956768807667, "iter_time": 0.9854382934570312, "loss": 0.5047529935836792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.830324227736742, "step_time": 0.9036706314086914} +{"epoch": 0, "iter": 15094, "iter_tflops": 19.62385760645204, "iter_time": 1.0513271102905273, "loss": 0.6950852870941162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.155080438042805, "step_time": 0.8541099071502686} +{"epoch": 0, "iter": 15095, "iter_tflops": 45.384736529525206, "iter_time": 0.45458220291137696, "loss": 0.6771107912063599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.967927727681435, "step_time": 0.4213184928894042} +{"epoch": 0, "iter": 15096, "iter_tflops": 50.800210665007874, "iter_time": 0.40612220382690434, "loss": 0.6698981523513794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.62986521208363, "step_time": 0.37765228652954097} +{"epoch": 0, "iter": 15097, "iter_tflops": 19.088742298095738, "iter_time": 1.0807989959716797, "loss": 0.324919730424881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.812874923734036, "step_time": 1.0412973175048827} +{"epoch": 0, "iter": 15098, "iter_tflops": 15.677601897629852, "iter_time": 1.315959777832031, "loss": 0.33751943707466125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.78472502717344, "step_time": 1.0982909507751466} +{"epoch": 0, "iter": 15099, "iter_tflops": 44.62342970408045, "iter_time": 0.4623376922607422, "loss": 0.4151182174682617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24742354316645, "step_time": 0.42761026382446293} +{"epoch": 0, "iter": 15100, "iter_tflops": 48.2317800661724, "iter_time": 0.42774895477294916, "loss": 0.35384634137153625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98376222841379, "step_time": 0.39687572860717774} +{"epoch": 0, "iter": 15101, "iter_tflops": 38.33357957000209, "iter_time": 0.5381989822387695, "loss": 0.4719048738479614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53789817868457, "step_time": 0.496681209564209} +{"epoch": 0, "iter": 15102, "iter_tflops": 38.60357931770715, "iter_time": 0.5344347305297852, "loss": 0.5998737812042236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.644577071624525, "step_time": 0.48379172515869145} +{"epoch": 0, "iter": 15103, "iter_tflops": 41.578759060834855, "iter_time": 0.4961931037902832, "loss": 0.6067811250686646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.500109546554, "step_time": 0.45342953491210936} +{"epoch": 0, "iter": 15104, "iter_tflops": 38.06375801588646, "iter_time": 0.5420140991210938, "loss": 0.6550617814064026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.645375181505905, "step_time": 0.49539939117431636} +{"epoch": 0, "iter": 15105, "iter_tflops": 19.276831393694106, "iter_time": 1.070253356933594, "loss": 0.12263444066047668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.38756575442184, "step_time": 1.0119449157714844} +{"epoch": 0, "iter": 15106, "iter_tflops": 19.165372679927135, "iter_time": 1.076477554321289, "loss": 0.19317148625850677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.591164455036733, "step_time": 0.8061803340911865} +{"epoch": 0, "iter": 15107, "iter_tflops": 41.69118811181391, "iter_time": 0.49485501480102545, "loss": 0.23501056432724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.654099015272294, "step_time": 0.4519001350402832} +{"epoch": 0, "iter": 15108, "iter_tflops": 42.925792700900416, "iter_time": 0.48062230682373047, "loss": 0.2532852590084076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.846867250211076, "step_time": 0.44039430427551274} +{"epoch": 0, "iter": 15109, "iter_tflops": 23.563395901645706, "iter_time": 0.875556884765625, "loss": 0.5920376181602478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.83319883928424, "step_time": 0.7986271324157714} +{"epoch": 0, "iter": 15110, "iter_tflops": 23.028899483940176, "iter_time": 0.8958783950805664, "loss": 0.6444236040115356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.881417849821897, "step_time": 0.7399585494995118} +{"epoch": 0, "iter": 15111, "iter_tflops": 48.531096463947996, "iter_time": 0.42511080551147457, "loss": 0.5478724241256714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63663995043405, "step_time": 0.3919530868530274} +{"epoch": 0, "iter": 15112, "iter_tflops": 47.342321676375086, "iter_time": 0.43578541946411137, "loss": 0.5431020855903625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.81893843149541, "step_time": 0.4059725399017335} +{"epoch": 0, "iter": 15113, "iter_tflops": 38.25195714416212, "iter_time": 0.5393473968505859, "loss": 0.20050126314163208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.044830605054706, "step_time": 0.5026477928161621} +{"epoch": 0, "iter": 15114, "iter_tflops": 10.245889294379488, "iter_time": 2.013597152709961, "loss": 0.10958345234394073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.287301016957391, "step_time": 1.552692565917969} +{"epoch": 0, "iter": 15115, "iter_tflops": 13.96126348753206, "iter_time": 1.4777382812500002, "loss": 0.22669990360736847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.012576013640235, "step_time": 1.2126966247558595} +{"epoch": 0, "iter": 15116, "iter_tflops": 28.759224983110485, "iter_time": 0.7173730697631835, "loss": 0.15738236904144287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.69623615738428, "step_time": 0.5946205062866211} +{"epoch": 0, "iter": 15117, "iter_tflops": 21.89587965296675, "iter_time": 0.651026626586914, "loss": 0.18943409621715546, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.399228970710887, "step_time": 0.6091995887756348} +{"epoch": 0, "iter": 15118, "iter_tflops": 16.67135007671253, "iter_time": 0.8550477676391603, "loss": 0.24896270036697388, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 19.906974077570236, "step_time": 0.7160706901550292} +{"epoch": 0, "iter": 15119, "iter_tflops": 22.484411928143764, "iter_time": 0.6339859237670897, "loss": 0.17267954349517822, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.19374052005934, "step_time": 0.5891937484741211} +{"epoch": 0, "iter": 15120, "iter_tflops": 22.639366277102475, "iter_time": 0.6296466293334961, "loss": 0.2597525715827942, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.267524011692334, "step_time": 0.5874023513793946} +{"epoch": 0, "iter": 15121, "iter_tflops": 20.96431645141215, "iter_time": 0.9841052322387694, "loss": 0.5158148407936096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.559893490861523, "step_time": 0.9145031433105468} +{"epoch": 0, "iter": 15122, "iter_tflops": 21.644462812569508, "iter_time": 0.9531811294555664, "loss": 0.6012776494026184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.241243958962695, "step_time": 0.8510740432739258} +{"epoch": 0, "iter": 15123, "iter_tflops": 43.66639393288471, "iter_time": 0.47247074127197264, "loss": 0.5718321800231934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10700362581303, "step_time": 0.43796233940124507} +{"epoch": 0, "iter": 15124, "iter_tflops": 43.14786547297159, "iter_time": 0.4781486473083496, "loss": 0.4837039113044739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.330983895399775, "step_time": 0.44529797935485843} +{"epoch": 0, "iter": 15125, "iter_tflops": 29.85885989311062, "iter_time": 0.690953826904297, "loss": 0.42313218116760254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.615754758280264, "step_time": 0.6525573616027832} +{"epoch": 0, "iter": 15126, "iter_tflops": 17.34557137334813, "iter_time": 1.1894156188964844, "loss": 0.642512857913971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.21248372518085, "step_time": 1.073837917327881} +{"epoch": 0, "iter": 15127, "iter_tflops": 47.88045180419644, "iter_time": 0.43088761138916015, "loss": 0.4249950349330902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.037025125708546, "step_time": 0.39646950340271} +{"epoch": 0, "iter": 15128, "iter_tflops": 45.5024971447823, "iter_time": 0.4534057426452637, "loss": 0.4879263639450073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.92508941476839, "step_time": 0.42168739509582515} +{"epoch": 0, "iter": 15129, "iter_tflops": 31.300612021408078, "iter_time": 0.6591274795532227, "loss": 0.4002336263656616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.33797504387967, "step_time": 0.6188466300964357} +{"epoch": 0, "iter": 15130, "iter_tflops": 26.385288197977452, "iter_time": 0.7819165496826171, "loss": 0.6046053171157837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84105320266946, "step_time": 0.5600028152465821} +{"epoch": 0, "iter": 15131, "iter_tflops": 35.04576299248778, "iter_time": 0.5886900939941407, "loss": 0.7463175654411316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.06293506677676, "step_time": 0.5420258178710938} +{"epoch": 0, "iter": 15132, "iter_tflops": 37.86566103055369, "iter_time": 0.5448496856689453, "loss": 0.5773282051086426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.103583922039284, "step_time": 0.5019293098449706} +{"epoch": 0, "iter": 15133, "iter_tflops": 16.15859466480966, "iter_time": 1.276787612915039, "loss": 0.039022527635097504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.24640550777046, "step_time": 1.1962546920776367} +{"epoch": 0, "iter": 15134, "iter_tflops": 17.315441661422582, "iter_time": 1.1914852600097654, "loss": 0.043515849858522415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.55999796147911, "step_time": 0.8071633472442628} +{"epoch": 0, "iter": 15135, "iter_tflops": 41.70848422111479, "iter_time": 0.4946498031616211, "loss": 0.04542526975274086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88363842266035, "step_time": 0.449639440536499} +{"epoch": 0, "iter": 15136, "iter_tflops": 50.92582507046303, "iter_time": 0.40512045669555663, "loss": 0.03054048679769039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.19557790781378, "step_time": 0.36713019561767574} +{"epoch": 0, "iter": 15137, "iter_tflops": 22.258613502291716, "iter_time": 0.9268813400268556, "loss": 0.14543169736862183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.726732866639583, "step_time": 0.8695294723510743} +{"epoch": 0, "iter": 15138, "iter_tflops": 9.058648616310299, "iter_time": 2.277502349853516, "loss": 0.15351919829845428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.060117125412381, "step_time": 2.0507806472778323} +{"epoch": 0, "iter": 15139, "iter_tflops": 10.806270244329642, "iter_time": 1.909178009033203, "loss": 0.2099819928407669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.193274944002962, "step_time": 1.5637583236694335} +{"epoch": 0, "iter": 15140, "iter_tflops": 38.575841382047855, "iter_time": 0.5348190155029297, "loss": 0.28555992245674133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51757488177598, "step_time": 0.4166418399810791} +{"epoch": 0, "iter": 15141, "iter_tflops": 21.61155118249627, "iter_time": 0.6709150848388672, "loss": 0.2914736866950989, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.038577856849418, "step_time": 0.6293581047058106} +{"epoch": 0, "iter": 15142, "iter_tflops": 10.705196643486644, "iter_time": 1.3544371185302735, "loss": 0.25475504994392395, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 13.619418837272333, "step_time": 1.0646207351684571} +{"epoch": 0, "iter": 15143, "iter_tflops": 20.78248766629192, "iter_time": 0.6976795043945312, "loss": 0.32255327701568604, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.32171587230765, "step_time": 0.6495699424743653} +{"epoch": 0, "iter": 15144, "iter_tflops": 23.58528765315807, "iter_time": 0.6147695083618164, "loss": 0.13781960308551788, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.300670312734166, "step_time": 0.5730882034301757} +{"epoch": 0, "iter": 15145, "iter_tflops": 22.373834480255425, "iter_time": 0.9221080780029297, "loss": 0.11655423790216446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.27686818985016, "step_time": 0.8498251647949219} +{"epoch": 0, "iter": 15146, "iter_tflops": 13.208879103741028, "iter_time": 1.5619109954833985, "loss": 0.0957232117652893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.530917724482338, "step_time": 1.3283885650634766} +{"epoch": 0, "iter": 15147, "iter_tflops": 40.08621185146303, "iter_time": 0.5146680755615234, "loss": 0.07510072737932205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.173799956973845, "step_time": 0.4670436668395995} +{"epoch": 0, "iter": 15148, "iter_tflops": 39.778970888997755, "iter_time": 0.5186432189941407, "loss": 0.09855987131595612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23200632789896, "step_time": 0.47721804428100584} +{"epoch": 0, "iter": 15149, "iter_tflops": 19.769519994698772, "iter_time": 1.0435809020996094, "loss": 0.021946363151073456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.544785333664827, "step_time": 0.9575910453796387} +{"epoch": 0, "iter": 15150, "iter_tflops": 50.013525674267385, "iter_time": 0.41251028060913086, "loss": 0.01712764799594879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.7355002423426, "step_time": 0.37692344856262205} +{"epoch": 0, "iter": 15151, "iter_tflops": 52.71075285984803, "iter_time": 0.3914019889831543, "loss": 0.01977282017469406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.40251434499106, "step_time": 0.3594109725952148} +{"epoch": 0, "iter": 15152, "iter_tflops": 57.3322314606701, "iter_time": 0.35985157012939456, "loss": 0.019958434626460075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.48268521747278, "step_time": 0.3301889705657959} +{"epoch": 0, "iter": 15153, "iter_tflops": 42.99556206679329, "iter_time": 0.4798423957824707, "loss": 0.0326051227748394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.21351152830064, "step_time": 0.436974349975586} +{"epoch": 0, "iter": 15154, "iter_tflops": 47.75098869668686, "iter_time": 0.4320558395385742, "loss": 0.06004877761006355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.47049901802044, "step_time": 0.39319415473937985} +{"epoch": 0, "iter": 15155, "iter_tflops": 52.760001988170444, "iter_time": 0.3910366325378418, "loss": 0.034922946244478226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.62134750250026, "step_time": 0.35804600906372075} +{"epoch": 0, "iter": 15156, "iter_tflops": 52.072086772839846, "iter_time": 0.39620254898071283, "loss": 0.037077177315950394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.34781675622583, "step_time": 0.3661382942199707} +{"epoch": 0, "iter": 15157, "iter_tflops": 29.421776537786453, "iter_time": 0.7012184829711914, "loss": 0.685404896736145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.245723692573424, "step_time": 0.6602853469848633} +{"epoch": 0, "iter": 15158, "iter_tflops": 10.83860590905675, "iter_time": 1.9034822082519531, "loss": 0.7934552431106567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.476493849139434, "step_time": 1.530894737243652} +{"epoch": 0, "iter": 15159, "iter_tflops": 14.586998597365431, "iter_time": 1.414348083496094, "loss": 0.5574930906295776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.125534750880433, "step_time": 1.204697769165039} +{"epoch": 0, "iter": 15160, "iter_tflops": 20.55640949861105, "iter_time": 1.0036331253051758, "loss": 0.8535791039466858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.505911427672583, "step_time": 0.7783582000732421} +{"epoch": 0, "iter": 15161, "iter_tflops": 19.63963743658902, "iter_time": 0.7632246475219727, "loss": 0.18818248808383942, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 20.75784474729787, "step_time": 0.7221103897094727} +{"epoch": 0, "iter": 15162, "iter_tflops": 10.112514326571034, "iter_time": 1.4822678985595703, "loss": 0.22463366389274597, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 11.483551161259177, "step_time": 1.3052979125976563} +{"epoch": 0, "iter": 15163, "iter_tflops": 9.714690180639137, "iter_time": 1.5429679260253906, "loss": 0.21440890431404114, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 10.907078178074865, "step_time": 1.3742869644165037} +{"epoch": 0, "iter": 15164, "iter_tflops": 22.70651728418805, "iter_time": 0.660138900756836, "loss": 0.41887107491493225, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 24.47880571590033, "step_time": 0.6123442268371582} +{"epoch": 0, "iter": 15165, "iter_tflops": 12.670808298504106, "iter_time": 1.1378851928710938, "loss": 0.2573433816432953, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 13.728572774029331, "step_time": 1.0502129669189455} +{"epoch": 0, "iter": 15166, "iter_tflops": 9.955995448035404, "iter_time": 1.4481651000976563, "loss": 0.23152732849121094, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 13.542891630510097, "step_time": 1.0646120147705078} +{"epoch": 0, "iter": 15167, "iter_tflops": 26.33247439187108, "iter_time": 0.5475340042114257, "loss": 0.20412090420722961, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 28.058743904790006, "step_time": 0.5138478469848633} +{"epoch": 0, "iter": 15168, "iter_tflops": 26.641635798580804, "iter_time": 0.54118017578125, "loss": 0.41669896245002747, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 28.22188139898792, "step_time": 0.5108775329589844} +{"epoch": 0, "iter": 15169, "iter_tflops": 26.14827227730368, "iter_time": 0.7462149124145508, "loss": 0.037880998104810715, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 27.60234842237144, "step_time": 0.7069047317504882} +{"epoch": 0, "iter": 15170, "iter_tflops": 12.250927965117835, "iter_time": 1.5927145080566405, "loss": 0.04143637791275978, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 15.482773283199906, "step_time": 1.2602542419433593} +{"epoch": 0, "iter": 15171, "iter_tflops": 39.6346973195546, "iter_time": 0.4923017463684082, "loss": 0.0348457470536232, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 52.04263765160051, "step_time": 0.3749277820587158} +{"epoch": 0, "iter": 15172, "iter_tflops": 52.86507997289507, "iter_time": 0.3690948867797852, "loss": 0.04266731068491936, "lr": 3e-05, "seqlen": 7760.0, "step_tflops": 57.70471862818198, "step_time": 0.33813925743103024} +{"epoch": 0, "iter": 15173, "iter_tflops": 27.733624005423284, "iter_time": 0.7439018249511719, "loss": 0.5881792306900024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.24634197007952, "step_time": 0.7054247512817383} +{"epoch": 0, "iter": 15174, "iter_tflops": 13.553677918320526, "iter_time": 1.5221767578124998, "loss": 0.5759721994400024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.37948620736084, "step_time": 1.1225065422058107} +{"epoch": 0, "iter": 15175, "iter_tflops": 39.16051004036823, "iter_time": 0.5268341369628906, "loss": 0.7414745688438416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89129967792761, "step_time": 0.48100882148742674} +{"epoch": 0, "iter": 15176, "iter_tflops": 36.679154234871156, "iter_time": 0.562474624633789, "loss": 0.6402313113212585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.083869664489875, "step_time": 0.514698148727417} +{"epoch": 0, "iter": 15177, "iter_tflops": 29.759724996676663, "iter_time": 0.6932555160522461, "loss": 0.6112334728240967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.047274126341755, "step_time": 0.6242903251647949} +{"epoch": 0, "iter": 15178, "iter_tflops": 36.43724151236718, "iter_time": 0.566208984375, "loss": 0.4782833158969879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.132989993711824, "step_time": 0.5140681896209717} +{"epoch": 0, "iter": 15179, "iter_tflops": 36.778064593963464, "iter_time": 0.5609619140625001, "loss": 0.5600568652153015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.198819491522286, "step_time": 0.5132263526916504} +{"epoch": 0, "iter": 15180, "iter_tflops": 42.92127785433371, "iter_time": 0.4806728630065918, "loss": 0.6456727981567383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.72959187344042, "step_time": 0.4414995441436768} +{"epoch": 0, "iter": 15181, "iter_tflops": 19.086789422838823, "iter_time": 0.8797558212280272, "loss": 0.06900184601545334, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 20.33901060285343, "step_time": 0.8255914916992187} +{"epoch": 0, "iter": 15182, "iter_tflops": 8.357967221895771, "iter_time": 2.009066757202149, "loss": 0.03554446995258331, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 9.460095248575461, "step_time": 1.7750047607421875} +{"epoch": 0, "iter": 15183, "iter_tflops": 12.75972109114172, "iter_time": 1.3159938201904298, "loss": 0.04325344040989876, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 18.223131247137733, "step_time": 0.921450538635254} +{"epoch": 0, "iter": 15184, "iter_tflops": 34.76651324688016, "iter_time": 0.482985279083252, "loss": 0.019569186493754387, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 38.34757288539046, "step_time": 0.4378820571899414} +{"epoch": 0, "iter": 15185, "iter_tflops": 15.914135379472643, "iter_time": 0.9290628051757813, "loss": 0.12627142667770386, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 17.035986302502042, "step_time": 0.8678823165893554} +{"epoch": 0, "iter": 15186, "iter_tflops": 13.10025901593853, "iter_time": 1.128621292114258, "loss": 0.16499121487140656, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 16.53540225892479, "step_time": 0.8941561279296876} +{"epoch": 0, "iter": 15187, "iter_tflops": 22.402048109113107, "iter_time": 0.6599946212768555, "loss": 0.18766961991786957, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.13594117542002, "step_time": 0.6125815086364746} +{"epoch": 0, "iter": 15188, "iter_tflops": 22.78720289486676, "iter_time": 0.6488392333984374, "loss": 0.15418820083141327, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.485280135786123, "step_time": 0.6038416213989257} +{"epoch": 0, "iter": 15189, "iter_tflops": 18.07907011375076, "iter_time": 1.1411589965820315, "loss": 0.13976693153381348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.96093743601126, "step_time": 1.0880840454101564} +{"epoch": 0, "iter": 15190, "iter_tflops": 18.215623850806384, "iter_time": 1.132604278564453, "loss": 0.1348242610692978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.066037599667457, "step_time": 0.762250234603882} +{"epoch": 0, "iter": 15191, "iter_tflops": 46.448371762108664, "iter_time": 0.44417258834838863, "loss": 0.170664981007576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.44798151610843, "step_time": 0.4089577598571777} +{"epoch": 0, "iter": 15192, "iter_tflops": 51.39064422016025, "iter_time": 0.40145621490478517, "loss": 0.10806941986083984, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.67584506825187, "step_time": 0.37055734825134273} +{"epoch": 0, "iter": 15193, "iter_tflops": 25.674277559886633, "iter_time": 0.8035705566406249, "loss": 0.5542951226234436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.109258915368624, "step_time": 0.7610349502563476} +{"epoch": 0, "iter": 15194, "iter_tflops": 16.305544895904763, "iter_time": 1.2652808380126954, "loss": 0.6278818249702454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.370847778022622, "step_time": 1.065058883666992} +{"epoch": 0, "iter": 15195, "iter_tflops": 42.906363096600195, "iter_time": 0.4808399505615234, "loss": 0.6246457695960999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.217518120612965, "step_time": 0.44639120292663576} +{"epoch": 0, "iter": 15196, "iter_tflops": 39.74824680039651, "iter_time": 0.5190441131591798, "loss": 0.5427829027175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.4600007722367, "step_time": 0.4858947982788086} +{"epoch": 0, "iter": 15197, "iter_tflops": 41.60116708673732, "iter_time": 0.49592583465576173, "loss": 0.1451256424188614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22696608419688, "step_time": 0.4561679744720459} +{"epoch": 0, "iter": 15198, "iter_tflops": 37.8366800071605, "iter_time": 0.5452670135498047, "loss": 0.09284663200378418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.48703225745577, "step_time": 0.4972901744842529} +{"epoch": 0, "iter": 15199, "iter_tflops": 39.47080994810383, "iter_time": 0.5226924285888672, "loss": 0.1515505313873291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.23593644822373, "step_time": 0.4771746654510498} +{"epoch": 0, "iter": 15200, "iter_tflops": 39.34661001574653, "iter_time": 0.5243423385620117, "loss": 0.139351487159729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.32024798027445, "step_time": 0.4762459697723388} +{"epoch": 0, "iter": 15201, "iter_tflops": 24.14821323289669, "iter_time": 0.7515836868286132, "loss": 0.040306154638528824, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 26.111038653877074, "step_time": 0.6950854530334473} +{"epoch": 0, "iter": 15202, "iter_tflops": 33.26302996877815, "iter_time": 0.5456328887939453, "loss": 0.022089222446084023, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 37.12670658264352, "step_time": 0.4888503398895263} +{"epoch": 0, "iter": 15203, "iter_tflops": 36.64653059575251, "iter_time": 0.49525569915771483, "loss": 0.032871171832084656, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 40.368109042372375, "step_time": 0.44959755516052247} +{"epoch": 0, "iter": 15204, "iter_tflops": 37.35486757404143, "iter_time": 0.4858644752502441, "loss": 0.03256303071975708, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 41.14012965275744, "step_time": 0.44116057205200193} +{"epoch": 0, "iter": 15205, "iter_tflops": 34.204597804180985, "iter_time": 0.6031672592163085, "loss": 0.30994296073913574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.79770147217316, "step_time": 0.5458293151855468} +{"epoch": 0, "iter": 15206, "iter_tflops": 39.488878137910696, "iter_time": 0.5224532699584961, "loss": 0.2650302052497864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.269795501794995, "step_time": 0.47680127143859863} +{"epoch": 0, "iter": 15207, "iter_tflops": 43.54730451459921, "iter_time": 0.4737628135681152, "loss": 0.17661771178245544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.45433137088423, "step_time": 0.43475680541992184} +{"epoch": 0, "iter": 15208, "iter_tflops": 45.93071914829326, "iter_time": 0.44917854309082034, "loss": 0.24484041333198547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03240214731307, "step_time": 0.4123546466827393} +{"epoch": 0, "iter": 15209, "iter_tflops": 38.17665348837541, "iter_time": 0.5404112625122071, "loss": 0.06490293145179749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63753845232635, "step_time": 0.4838715896606445} +{"epoch": 0, "iter": 15210, "iter_tflops": 38.82517560593685, "iter_time": 0.531384422302246, "loss": 0.09065409749746323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.627878720446425, "step_time": 0.4728878440856934} +{"epoch": 0, "iter": 15211, "iter_tflops": 42.78834706173367, "iter_time": 0.4821661720275879, "loss": 0.0509147047996521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.176700382636945, "step_time": 0.4373153133392334} +{"epoch": 0, "iter": 15212, "iter_tflops": 44.00033184719591, "iter_time": 0.468884952545166, "loss": 0.06213489547371864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.37447727848667, "step_time": 0.42648716163635253} +{"epoch": 0, "iter": 15213, "iter_tflops": 22.907950333679914, "iter_time": 0.9006084442138673, "loss": 0.11926600337028503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.873202183012303, "step_time": 0.82945064163208} +{"epoch": 0, "iter": 15214, "iter_tflops": 20.45456505191207, "iter_time": 1.008630271911621, "loss": 0.13278746604919434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.336287753658446, "step_time": 0.8142903060913086} +{"epoch": 0, "iter": 15215, "iter_tflops": 53.249252143207734, "iter_time": 0.3874438171386719, "loss": 0.09933431446552277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.21477408916007, "step_time": 0.35439617919921873} +{"epoch": 0, "iter": 15216, "iter_tflops": 49.58327853217237, "iter_time": 0.4160897407531738, "loss": 0.09385287016630173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.49533228808445, "step_time": 0.38566156387329104} +{"epoch": 0, "iter": 15217, "iter_tflops": 38.5856292798654, "iter_time": 0.534683349609375, "loss": 0.0819576308131218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.96435910394491, "step_time": 0.49163370895385744} +{"epoch": 0, "iter": 15218, "iter_tflops": 46.32258205834674, "iter_time": 0.44537874603271493, "loss": 0.18145133554935455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43567552122519, "step_time": 0.3934552822113037} +{"epoch": 0, "iter": 15219, "iter_tflops": 47.53268077396726, "iter_time": 0.43404018402099603, "loss": 0.13765905797481537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.05335025484328, "step_time": 0.39634516143798826} +{"epoch": 0, "iter": 15220, "iter_tflops": 51.30142432040905, "iter_time": 0.40215439987182616, "loss": 0.12481741607189178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66231790486682, "step_time": 0.3706474018096924} +{"epoch": 0, "iter": 15221, "iter_tflops": 21.331744952284307, "iter_time": 0.9671545181274414, "loss": 0.6810503005981445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.367588676652506, "step_time": 0.9223655624389648} +{"epoch": 0, "iter": 15222, "iter_tflops": 13.34792763550321, "iter_time": 1.545640197753906, "loss": 0.6197193264961243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.820691645733074, "step_time": 1.3040576210021972} +{"epoch": 0, "iter": 15223, "iter_tflops": 35.674072417884936, "iter_time": 0.578321792602539, "loss": 0.6796783208847046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88330562843398, "step_time": 0.5305900096893311} +{"epoch": 0, "iter": 15224, "iter_tflops": 37.21447310558993, "iter_time": 0.5543835983276367, "loss": 0.6063397526741028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27002862897129, "step_time": 0.5123188190460205} +{"epoch": 0, "iter": 15225, "iter_tflops": 30.234493293609162, "iter_time": 0.682369415283203, "loss": 0.0024083557073026896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.3510478411526, "step_time": 0.6186040573120117} +{"epoch": 0, "iter": 15226, "iter_tflops": 51.82160979695189, "iter_time": 0.3981175727844239, "loss": 0.006250129546970129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.2083997113408, "step_time": 0.36063049507141115} +{"epoch": 0, "iter": 15227, "iter_tflops": 54.029123036546146, "iter_time": 0.38185134887695316, "loss": 0.003782606218010187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.272148369374726, "step_time": 0.34807399559020996} +{"epoch": 0, "iter": 15228, "iter_tflops": 63.16355508010838, "iter_time": 0.32662970733642577, "loss": 0.0011926935985684395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.6687923767284, "step_time": 0.29613106250762933} +{"epoch": 0, "iter": 15229, "iter_tflops": 33.11715925237839, "iter_time": 0.6229729232788085, "loss": 0.5848628878593445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.34166003460123, "step_time": 0.5837613029479981} +{"epoch": 0, "iter": 15230, "iter_tflops": 10.2637764364946, "iter_time": 2.0100879669189453, "loss": 0.1772337257862091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.066819088617127, "step_time": 1.5788917999267575} +{"epoch": 0, "iter": 15231, "iter_tflops": 14.440159403605126, "iter_time": 1.4287303161621094, "loss": 0.2737621068954468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.563697897790927, "step_time": 1.1746440658569335} +{"epoch": 0, "iter": 15232, "iter_tflops": 26.517916896682, "iter_time": 0.7780058135986327, "loss": 0.31705325841903687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.34796405334813, "step_time": 0.6377864608764648} +{"epoch": 0, "iter": 15233, "iter_tflops": 24.6447250861397, "iter_time": 0.6248094635009767, "loss": 0.23846665024757385, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.406168132729515, "step_time": 0.5831310844421387} +{"epoch": 0, "iter": 15234, "iter_tflops": 26.02219900524254, "iter_time": 0.5917354431152344, "loss": 0.1659208983182907, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 27.66394237571114, "step_time": 0.5566183319091798} +{"epoch": 0, "iter": 15235, "iter_tflops": 28.646532669844216, "iter_time": 0.5375260467529296, "loss": 0.2920806407928467, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 30.512842465663645, "step_time": 0.5046484107971191} +{"epoch": 0, "iter": 15236, "iter_tflops": 26.698353152019862, "iter_time": 0.5767493362426758, "loss": 0.18082645535469055, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.310300453394415, "step_time": 0.5439100685119628} +{"epoch": 0, "iter": 15237, "iter_tflops": 26.881681739422636, "iter_time": 0.767477783203125, "loss": 0.04560668393969536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.386497404489482, "step_time": 0.7267925033569336} +{"epoch": 0, "iter": 15238, "iter_tflops": 15.586661928239804, "iter_time": 1.323637710571289, "loss": 0.02123526856303215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.771189706905286, "step_time": 1.1609292259216308} +{"epoch": 0, "iter": 15239, "iter_tflops": 52.71930530953814, "iter_time": 0.39133849334716797, "loss": 0.06582003831863403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.775239807103034, "step_time": 0.3570923042297363} +{"epoch": 0, "iter": 15240, "iter_tflops": 57.81749721976153, "iter_time": 0.3568313140869141, "loss": 0.021696561947464943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.18272669177205, "step_time": 0.32653059768676757} +{"epoch": 0, "iter": 15241, "iter_tflops": 28.399349564530308, "iter_time": 0.7264635925292968, "loss": 0.28603866696357727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.054687079732496, "step_time": 0.6864517822265623} +{"epoch": 0, "iter": 15242, "iter_tflops": 15.396874238523461, "iter_time": 1.3399533691406251, "loss": 0.33404192328453064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.795001584430192, "step_time": 0.9465974769592286} +{"epoch": 0, "iter": 15243, "iter_tflops": 43.982186544841355, "iter_time": 0.46907839584350586, "loss": 0.34419214725494385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.075433690306326, "step_time": 0.4291400394439697} +{"epoch": 0, "iter": 15244, "iter_tflops": 44.40325706957582, "iter_time": 0.464630184173584, "loss": 0.3171502649784088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.372188058073526, "step_time": 0.426507345199585} +{"epoch": 0, "iter": 15245, "iter_tflops": 24.370616321696794, "iter_time": 0.8465560836791992, "loss": 0.36781901121139526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.348509628198602, "step_time": 0.7830079879760743} +{"epoch": 0, "iter": 15246, "iter_tflops": 16.928070580012406, "iter_time": 1.218750442504883, "loss": 0.25454068183898926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.204534995032812, "step_time": 0.9291387329101563} +{"epoch": 0, "iter": 15247, "iter_tflops": 39.946775943861816, "iter_time": 0.5164645462036133, "loss": 0.3702082335948944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.67216803527253, "step_time": 0.47240827369689947} +{"epoch": 0, "iter": 15248, "iter_tflops": 41.57754057343666, "iter_time": 0.4962076454162597, "loss": 0.2413933277130127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.546693391242094, "step_time": 0.45296578025817874} +{"epoch": 0, "iter": 15249, "iter_tflops": 19.57470037200603, "iter_time": 1.053967269897461, "loss": 0.1380283534526825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.761259249737154, "step_time": 0.9937303543090821} +{"epoch": 0, "iter": 15250, "iter_tflops": 10.298031804250641, "iter_time": 2.0034016113281248, "loss": 0.14198274910449982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.61633868349147, "step_time": 1.776040977478027} +{"epoch": 0, "iter": 15251, "iter_tflops": 11.949101162001318, "iter_time": 1.7265812072753906, "loss": 0.12499083578586578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.91833766743032, "step_time": 1.4822958030700684} +{"epoch": 0, "iter": 15252, "iter_tflops": 39.20242374712145, "iter_time": 0.526270866394043, "loss": 0.11994220316410065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88070461171082, "step_time": 0.4811276702880859} +{"epoch": 0, "iter": 15253, "iter_tflops": 12.392431595314468, "iter_time": 1.1272617797851563, "loss": 0.10223366320133209, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 13.005065350548461, "step_time": 1.0741595001220703} +{"epoch": 0, "iter": 15254, "iter_tflops": 9.485066202211183, "iter_time": 1.4727904052734375, "loss": 0.28089386224746704, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 11.366771287774968, "step_time": 1.2289782333374024} +{"epoch": 0, "iter": 15255, "iter_tflops": 22.197025669539975, "iter_time": 0.6293417282104492, "loss": 0.10618390887975693, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 23.869575890388706, "step_time": 0.5852435150146483} +{"epoch": 0, "iter": 15256, "iter_tflops": 22.161084228959666, "iter_time": 0.6303624114990234, "loss": 0.17319142818450928, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 23.66696854909254, "step_time": 0.5902536468505859} +{"epoch": 0, "iter": 15257, "iter_tflops": 33.08213657789363, "iter_time": 0.623632438659668, "loss": 0.11202128976583481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.85715470715043, "step_time": 0.559758171081543} +{"epoch": 0, "iter": 15258, "iter_tflops": 41.11933847856908, "iter_time": 0.5017369995117187, "loss": 0.05493195354938507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06207627429811, "step_time": 0.4478976020812988} +{"epoch": 0, "iter": 15259, "iter_tflops": 42.099186831424696, "iter_time": 0.4900591926574707, "loss": 0.09148862212896347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30321537973092, "step_time": 0.4455650291442871} +{"epoch": 0, "iter": 15260, "iter_tflops": 43.684577003980344, "iter_time": 0.47227408218383793, "loss": 0.1046953797340393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76414545705547, "step_time": 0.4319368286132813} +{"epoch": 0, "iter": 15261, "iter_tflops": 33.77243694624659, "iter_time": 0.6108855438232422, "loss": 0.484508752822876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.39685492045338, "step_time": 0.5516799087524414} +{"epoch": 0, "iter": 15262, "iter_tflops": 37.00297599990974, "iter_time": 0.5575522766113281, "loss": 0.36818212270736694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.03917505447486, "step_time": 0.5027170619964599} +{"epoch": 0, "iter": 15263, "iter_tflops": 41.011296334856205, "iter_time": 0.5030587997436523, "loss": 0.4674879014492035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.73050474091699, "step_time": 0.46123095703125} +{"epoch": 0, "iter": 15264, "iter_tflops": 40.224512923145454, "iter_time": 0.5128985290527344, "loss": 0.41259604692459106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.91591023113663, "step_time": 0.46978631210327154} +{"epoch": 0, "iter": 15265, "iter_tflops": 30.64752016907367, "iter_time": 0.6731733398437499, "loss": 0.11551256477832794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.77695657104863, "step_time": 0.6108038024902344} +{"epoch": 0, "iter": 15266, "iter_tflops": 39.608211193259244, "iter_time": 0.5208792037963867, "loss": 0.17268715798854828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.536084324150316, "step_time": 0.4738849124908447} +{"epoch": 0, "iter": 15267, "iter_tflops": 39.43967745489921, "iter_time": 0.5231050262451172, "loss": 0.09461527317762375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.515653776409806, "step_time": 0.474107400894165} +{"epoch": 0, "iter": 15268, "iter_tflops": 40.97365927211422, "iter_time": 0.5035208930969239, "loss": 0.15627481043338776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7847532590874, "step_time": 0.4606722602844238} +{"epoch": 0, "iter": 15269, "iter_tflops": 20.727999977997687, "iter_time": 0.9953248519897462, "loss": 0.15720658004283905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.048358430621743, "step_time": 0.9357201614379883} +{"epoch": 0, "iter": 15270, "iter_tflops": 23.711992109434764, "iter_time": 0.8700700225830078, "loss": 0.17936113476753235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.4500376501537, "step_time": 0.7251692867279053} +{"epoch": 0, "iter": 15271, "iter_tflops": 46.59323485533211, "iter_time": 0.4427916107177734, "loss": 0.22432143986225128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8590798863136, "step_time": 0.4056521186828613} +{"epoch": 0, "iter": 15272, "iter_tflops": 48.07439589193541, "iter_time": 0.42914930343627933, "loss": 0.15863211452960968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.39141642710947, "step_time": 0.39378766441345214} +{"epoch": 0, "iter": 15273, "iter_tflops": 42.49787318502821, "iter_time": 0.48546178817749025, "loss": 0.19003356993198395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46651066179044, "step_time": 0.4439991989135742} +{"epoch": 0, "iter": 15274, "iter_tflops": 45.92209472485432, "iter_time": 0.44926290130615243, "loss": 0.12501774728298187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.52069478022084, "step_time": 0.37159285545349124} +{"epoch": 0, "iter": 15275, "iter_tflops": 52.158794352355656, "iter_time": 0.3955439109802246, "loss": 0.13995105028152466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.63123295418052, "step_time": 0.3643059215545654} +{"epoch": 0, "iter": 15276, "iter_tflops": 47.23956355478544, "iter_time": 0.4367333641052246, "loss": 0.12291121482849121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.296297238558275, "step_time": 0.4021945953369141} +{"epoch": 0, "iter": 15277, "iter_tflops": 29.03586535589735, "iter_time": 0.7105382690429688, "loss": 0.16264717280864716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.86368956668371, "step_time": 0.6684584312438965} +{"epoch": 0, "iter": 15278, "iter_tflops": 13.937266994510614, "iter_time": 1.4802825775146484, "loss": 0.17531107366085052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.862293888295643, "step_time": 1.0937743644714355} +{"epoch": 0, "iter": 15279, "iter_tflops": 17.118742204349292, "iter_time": 1.2051757812500001, "loss": 0.17182497680187225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.173294308268908, "step_time": 1.0226933288574218} +{"epoch": 0, "iter": 15280, "iter_tflops": 28.007367168268736, "iter_time": 0.7366309509277345, "loss": 0.15771518647670746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.15466422788902, "step_time": 0.5868664646148682} +{"epoch": 0, "iter": 15281, "iter_tflops": 20.913675749012505, "iter_time": 0.7558466339111329, "loss": 0.29585060477256775, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 22.151657367570323, "step_time": 0.7136049079895019} +{"epoch": 0, "iter": 15282, "iter_tflops": 8.487280348899477, "iter_time": 1.8624966735839843, "loss": 0.1760103553533554, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 9.98732286002228, "step_time": 1.5827596282958984} +{"epoch": 0, "iter": 15283, "iter_tflops": 26.394058262205427, "iter_time": 0.5989049224853515, "loss": 0.23158535361289978, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 28.239379520243176, "step_time": 0.5597690773010253} +{"epoch": 0, "iter": 15284, "iter_tflops": 28.386869844676617, "iter_time": 0.5568606719970703, "loss": 0.2024005800485611, "lr": 3e-05, "seqlen": 6320.0, "step_tflops": 30.162273654889397, "step_time": 0.5240828857421875} +{"epoch": 0, "iter": 15285, "iter_tflops": 26.76693166744561, "iter_time": 0.7707679672241211, "loss": 0.654732882976532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.27461839489952, "step_time": 0.729668327331543} +{"epoch": 0, "iter": 15286, "iter_tflops": 17.90324025767074, "iter_time": 1.1523664550781252, "loss": 0.5640271902084351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.584626917406673, "step_time": 0.8747687034606935} +{"epoch": 0, "iter": 15287, "iter_tflops": 45.00810270675055, "iter_time": 0.4583862075805664, "loss": 0.5236397385597229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62337005571244, "step_time": 0.4243040637969971} +{"epoch": 0, "iter": 15288, "iter_tflops": 44.34165057898895, "iter_time": 0.4652757225036621, "loss": 0.5194193124771118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.428419645315614, "step_time": 0.43499432754516604} +{"epoch": 0, "iter": 15289, "iter_tflops": 37.32491997334013, "iter_time": 0.5527431411743164, "loss": 0.6872819066047668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53165429459457, "step_time": 0.5090118789672851} +{"epoch": 0, "iter": 15290, "iter_tflops": 41.630280491749694, "iter_time": 0.4955790176391602, "loss": 0.9228253364562988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.68107390147386, "step_time": 0.46174121856689454} +{"epoch": 0, "iter": 15291, "iter_tflops": 44.1951779967736, "iter_time": 0.46681774902343753, "loss": 0.6062638163566589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.755040002306295, "step_time": 0.43201918601989747} +{"epoch": 0, "iter": 15292, "iter_tflops": 45.32095874317064, "iter_time": 0.4552219123840332, "loss": 0.5507742762565613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.45754239114773, "step_time": 0.4257560844421387} +{"epoch": 0, "iter": 15293, "iter_tflops": 30.69762837912747, "iter_time": 0.6720745086669921, "loss": 0.08052031695842743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.792167659443784, "step_time": 0.629146987915039} +{"epoch": 0, "iter": 15294, "iter_tflops": 39.063166092434194, "iter_time": 0.5281469879150391, "loss": 0.13255561888217926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91632310890003, "step_time": 0.4807283573150634} +{"epoch": 0, "iter": 15295, "iter_tflops": 41.58129791221, "iter_time": 0.4961628074645996, "loss": 0.0781819075345993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59695641587398, "step_time": 0.4524664611816407} +{"epoch": 0, "iter": 15296, "iter_tflops": 42.02318753702069, "iter_time": 0.4909454689025879, "loss": 0.10580426454544067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.88385330198956, "step_time": 0.4496373348236084} +{"epoch": 0, "iter": 15297, "iter_tflops": 21.06557632553881, "iter_time": 0.9793747482299804, "loss": 0.03897064924240112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.62617844125669, "step_time": 0.9118240432739259} +{"epoch": 0, "iter": 15298, "iter_tflops": 16.50795775789228, "iter_time": 1.2497665557861328, "loss": 0.08266032487154007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.728836162254513, "step_time": 0.8694523983001708} +{"epoch": 0, "iter": 15299, "iter_tflops": 41.19177489293406, "iter_time": 0.5008546867370606, "loss": 0.05522983521223068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.014416207900986, "step_time": 0.4583219165802002} +{"epoch": 0, "iter": 15300, "iter_tflops": 45.09237735435278, "iter_time": 0.45752951431274413, "loss": 0.03689825162291527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.403126657352885, "step_time": 0.41760704040527347} +{"epoch": 0, "iter": 15301, "iter_tflops": 23.25081174070947, "iter_time": 0.887327880859375, "loss": 0.6488555073738098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.288486577680505, "step_time": 0.8158295059204101} +{"epoch": 0, "iter": 15302, "iter_tflops": 35.525792301337255, "iter_time": 0.5807356338500976, "loss": 0.6218354105949402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.605389872894975, "step_time": 0.5344096660614014} +{"epoch": 0, "iter": 15303, "iter_tflops": 35.16265026826131, "iter_time": 0.5867331771850586, "loss": 0.5623736381530762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.108470092868444, "step_time": 0.5413781623840332} +{"epoch": 0, "iter": 15304, "iter_tflops": 36.493431669824375, "iter_time": 0.565337173461914, "loss": 0.5425733923912048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.628966390975194, "step_time": 0.5206063995361327} +{"epoch": 0, "iter": 15305, "iter_tflops": 29.83442434305557, "iter_time": 0.6915197448730469, "loss": 0.1275743544101715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.390885607455054, "step_time": 0.6369413223266602} +{"epoch": 0, "iter": 15306, "iter_tflops": 8.70183267528168, "iter_time": 2.3708906250000004, "loss": 0.13573938608169556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.0196847408267, "step_time": 2.05905615234375} +{"epoch": 0, "iter": 15307, "iter_tflops": 13.674505551319136, "iter_time": 1.508726837158203, "loss": 0.18974313139915466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.842857732635032, "step_time": 1.2249164505004884} +{"epoch": 0, "iter": 15308, "iter_tflops": 47.703459120977314, "iter_time": 0.4324863204956055, "loss": 0.14374291896820068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10836847778426, "step_time": 0.3959266834259033} +{"epoch": 0, "iter": 15309, "iter_tflops": 19.800878896410396, "iter_time": 0.7652649993896485, "loss": 0.25498107075691223, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 20.93383328645441, "step_time": 0.7238482971191407} +{"epoch": 0, "iter": 15310, "iter_tflops": 12.437400740971079, "iter_time": 1.2183349151611327, "loss": 0.21068847179412842, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 15.972509648128794, "step_time": 0.9486874580383301} +{"epoch": 0, "iter": 15311, "iter_tflops": 26.28803982367898, "iter_time": 0.5764187698364257, "loss": 0.2352444976568222, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.11229931454303, "step_time": 0.5390138816833496} +{"epoch": 0, "iter": 15312, "iter_tflops": 28.095555031038362, "iter_time": 0.5393351211547851, "loss": 0.1361047625541687, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 29.84686773314953, "step_time": 0.5076887702941895} +{"epoch": 0, "iter": 15313, "iter_tflops": 22.352704233827353, "iter_time": 0.5612990570068359, "loss": 0.026615550741553307, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 24.128932482262094, "step_time": 0.5199795646667481} +{"epoch": 0, "iter": 15314, "iter_tflops": 12.066368132602923, "iter_time": 1.0397952117919922, "loss": 0.020045539364218712, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 14.236469395970808, "step_time": 0.8812965812683105} +{"epoch": 0, "iter": 15315, "iter_tflops": 24.249608284474512, "iter_time": 0.5173919372558593, "loss": 0.026450976729393005, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 26.80924058056948, "step_time": 0.4679935550689698} +{"epoch": 0, "iter": 15316, "iter_tflops": 25.462219350293157, "iter_time": 0.49275169754028325, "loss": 0.02005959488451481, "lr": 3e-05, "seqlen": 5040.0, "step_tflops": 28.123279844755487, "step_time": 0.4461269054412841} +{"epoch": 0, "iter": 15317, "iter_tflops": 25.52355531172674, "iter_time": 0.8083158187866211, "loss": 0.09607277065515518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.268713210057392, "step_time": 0.7298207511901856} +{"epoch": 0, "iter": 15318, "iter_tflops": 40.14759028025673, "iter_time": 0.5138812408447265, "loss": 0.10710101574659348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96510889770159, "step_time": 0.4692606029510497} +{"epoch": 0, "iter": 15319, "iter_tflops": 41.422782470355656, "iter_time": 0.498061508178711, "loss": 0.08461710810661316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.35778852295889, "step_time": 0.454852279663086} +{"epoch": 0, "iter": 15320, "iter_tflops": 52.02177203790106, "iter_time": 0.3965857505798339, "loss": 0.09546254575252533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.47705689850383, "step_time": 0.3653004360198975} +{"epoch": 0, "iter": 15321, "iter_tflops": 32.89815691545706, "iter_time": 0.6271200408935547, "loss": 0.07824813574552536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.224106262724696, "step_time": 0.5857094955444336} +{"epoch": 0, "iter": 15322, "iter_tflops": 37.609822004182064, "iter_time": 0.5485559997558593, "loss": 0.0554184690117836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.56437007099169, "step_time": 0.43375100898742674} +{"epoch": 0, "iter": 15323, "iter_tflops": 52.82243999511686, "iter_time": 0.39057441329956055, "loss": 0.08530505001544952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.33182517280088, "step_time": 0.3598541202545166} +{"epoch": 0, "iter": 15324, "iter_tflops": 52.74662647550287, "iter_time": 0.3911357917785645, "loss": 0.08146420866250992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.733088800403245, "step_time": 0.35735301780700685} +{"epoch": 0, "iter": 15325, "iter_tflops": 36.351378971437, "iter_time": 0.5675463790893555, "loss": 0.5685884952545166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.13486471675228, "step_time": 0.5271793746948242} +{"epoch": 0, "iter": 15326, "iter_tflops": 10.23598801615412, "iter_time": 2.0155449066162108, "loss": 0.5850053429603577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.206425832828064, "step_time": 1.690183006286621} +{"epoch": 0, "iter": 15327, "iter_tflops": 13.822480694857015, "iter_time": 1.4925753173828125, "loss": 0.6872045397758484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.100728058880787, "step_time": 1.2813764343261718} +{"epoch": 0, "iter": 15328, "iter_tflops": 28.83354499682579, "iter_time": 0.7155240020751952, "loss": 0.6011801958084106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.75672829508388, "step_time": 0.5769849338531494} +{"epoch": 0, "iter": 15329, "iter_tflops": 25.45373787230715, "iter_time": 0.5904953689575195, "loss": 0.19457706809043884, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.515003607918807, "step_time": 0.5462588539123535} +{"epoch": 0, "iter": 15330, "iter_tflops": 26.565109977409026, "iter_time": 0.5657915344238281, "loss": 0.20330770313739777, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.34883870662497, "step_time": 0.530191535949707} +{"epoch": 0, "iter": 15331, "iter_tflops": 26.12174280875629, "iter_time": 0.575394775390625, "loss": 0.27054187655448914, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 27.852616888636305, "step_time": 0.5396374206542968} +{"epoch": 0, "iter": 15332, "iter_tflops": 26.920565461980487, "iter_time": 0.5583208999633789, "loss": 0.17406578361988068, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.677395070168288, "step_time": 0.5241171417236328} +{"epoch": 0, "iter": 15333, "iter_tflops": 39.83435432428461, "iter_time": 0.5179221267700195, "loss": 0.42801278829574585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.221493230968264, "step_time": 0.4773341217041016} +{"epoch": 0, "iter": 15334, "iter_tflops": 46.699959248932835, "iter_time": 0.4417796897888184, "loss": 0.4393683671951294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.83892656430933, "step_time": 0.40581292533874513} +{"epoch": 0, "iter": 15335, "iter_tflops": 48.04463440950041, "iter_time": 0.4294151420593262, "loss": 0.4761369824409485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.10285170962364, "step_time": 0.3959686050415039} +{"epoch": 0, "iter": 15336, "iter_tflops": 50.203717637472884, "iter_time": 0.4109475250244141, "loss": 0.4439745545387268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27983204532456, "step_time": 0.3800876445770264} +{"epoch": 0, "iter": 15337, "iter_tflops": 29.288861794736654, "iter_time": 0.7044006576538087, "loss": 0.7547513842582703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.010887979490064, "step_time": 0.6652854804992676} +{"epoch": 0, "iter": 15338, "iter_tflops": 10.287506956828743, "iter_time": 2.005451232910156, "loss": 0.6760120391845703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.672297469940649, "step_time": 1.5089704971313478} +{"epoch": 0, "iter": 15339, "iter_tflops": 10.497491935948199, "iter_time": 1.9653354949951174, "loss": 0.6336007118225098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.644094212965834, "step_time": 1.6316782493591309} +{"epoch": 0, "iter": 15340, "iter_tflops": 23.78182560550938, "iter_time": 0.8675151290893555, "loss": 0.5650506019592285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.383673961666837, "step_time": 0.7819643898010253} +{"epoch": 0, "iter": 15341, "iter_tflops": 15.163226959479642, "iter_time": 0.9858456344604494, "loss": 0.2393876612186432, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.2664551336631, "step_time": 0.9189833297729492} +{"epoch": 0, "iter": 15342, "iter_tflops": 13.445928336549525, "iter_time": 1.1117567138671876, "loss": 0.27423226833343506, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 15.715480509373343, "step_time": 0.951202293395996} +{"epoch": 0, "iter": 15343, "iter_tflops": 23.33088186063707, "iter_time": 0.6407216491699219, "loss": 0.2474467009305954, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.11585487804592, "step_time": 0.5951858367919922} +{"epoch": 0, "iter": 15344, "iter_tflops": 22.454620912719708, "iter_time": 0.6657249374389649, "loss": 0.20287759602069855, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.179624111737304, "step_time": 0.6182313270568847} +{"epoch": 0, "iter": 15345, "iter_tflops": 19.78522502515514, "iter_time": 1.0427525329589844, "loss": 0.44178178906440735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.864491449081942, "step_time": 0.9888136291503906} +{"epoch": 0, "iter": 15346, "iter_tflops": 14.709572875181081, "iter_time": 1.4025623779296876, "loss": 0.6290121078491211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.831193338050422, "step_time": 1.1570225906372071} +{"epoch": 0, "iter": 15347, "iter_tflops": 38.9177013405713, "iter_time": 0.5301210708618165, "loss": 0.6710003018379211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60831721104584, "step_time": 0.48420343399047855} +{"epoch": 0, "iter": 15348, "iter_tflops": 41.45225537716557, "iter_time": 0.49770738220214844, "loss": 0.55284583568573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26917279909752, "step_time": 0.4557426662445069} +{"epoch": 0, "iter": 15349, "iter_tflops": 17.713219760625357, "iter_time": 1.1647285919189452, "loss": 0.7369253635406494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.00137666565221, "step_time": 1.0857683563232423} +{"epoch": 0, "iter": 15350, "iter_tflops": 23.30922959570907, "iter_time": 0.885104049682617, "loss": 0.5635804533958435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.169750242188652, "step_time": 0.7072770023345948} +{"epoch": 0, "iter": 15351, "iter_tflops": 45.965711414121486, "iter_time": 0.44883659744262694, "loss": 0.7306909561157227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.523981431109455, "step_time": 0.41658794212341305} +{"epoch": 0, "iter": 15352, "iter_tflops": 45.44177956642072, "iter_time": 0.4540115661621093, "loss": 0.689818263053894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.98716246292719, "step_time": 0.4211530628204346} +{"epoch": 0, "iter": 15353, "iter_tflops": 48.12924707780768, "iter_time": 0.4286602172851562, "loss": 0.0771792083978653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.987465835518904, "step_time": 0.3893579959869385} +{"epoch": 0, "iter": 15354, "iter_tflops": 44.02601131002553, "iter_time": 0.46861146163940426, "loss": 0.12659135460853577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.011478909329774, "step_time": 0.42971168518066405} +{"epoch": 0, "iter": 15355, "iter_tflops": 50.207912682601574, "iter_time": 0.4109131889343261, "loss": 0.06909216940402985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.67147216128536, "step_time": 0.3773648796081543} +{"epoch": 0, "iter": 15356, "iter_tflops": 54.545135615557534, "iter_time": 0.3782389259338379, "loss": 0.07038644701242447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.56751824410538, "step_time": 0.3463480453491211} +{"epoch": 0, "iter": 15357, "iter_tflops": 43.45603339621306, "iter_time": 0.4747578620910644, "loss": 0.3214031755924225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27344304603721, "step_time": 0.43642037010192875} +{"epoch": 0, "iter": 15358, "iter_tflops": 9.525211461239763, "iter_time": 2.1659459838867186, "loss": 0.25291839241981506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.656777549479859, "step_time": 1.9359598541259766} +{"epoch": 0, "iter": 15359, "iter_tflops": 10.858888426536181, "iter_time": 1.8999268341064455, "loss": 0.24841834604740143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.412284972554469, "step_time": 1.538223617553711} +{"epoch": 0, "iter": 15360, "iter_tflops": 20.822137158700098, "iter_time": 0.9908249740600586, "loss": 0.2710925340652466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.105377104517363, "step_time": 0.8217798690795898} +{"epoch": 0, "iter": 15361, "iter_tflops": 19.8783653218227, "iter_time": 0.7972730407714844, "loss": 0.22266151010990143, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 21.007761423240385, "step_time": 0.7544109268188476} +{"epoch": 0, "iter": 15362, "iter_tflops": 24.30631276272959, "iter_time": 0.6520316314697265, "loss": 0.2298584282398224, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.289844720184334, "step_time": 0.5410914573669433} +{"epoch": 0, "iter": 15363, "iter_tflops": 26.29613082201845, "iter_time": 0.6026926498413087, "loss": 0.2500479221343994, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 27.92189829165719, "step_time": 0.5676005477905274} +{"epoch": 0, "iter": 15364, "iter_tflops": 28.104212452764916, "iter_time": 0.5639184799194337, "loss": 0.17021162807941437, "lr": 3e-05, "seqlen": 6336.0, "step_tflops": 29.732915724758, "step_time": 0.5330282745361329} +{"epoch": 0, "iter": 15365, "iter_tflops": 26.159200432158634, "iter_time": 0.7886744689941407, "loss": 0.6516536474227905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.546916053240576, "step_time": 0.7489438552856446} +{"epoch": 0, "iter": 15366, "iter_tflops": 14.795318876938783, "iter_time": 1.394433853149414, "loss": 0.6306204199790955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.17493109486724, "step_time": 0.9743169136047363} +{"epoch": 0, "iter": 15367, "iter_tflops": 42.53693194838855, "iter_time": 0.4850160217285156, "loss": 0.6832385063171387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.748187124035, "step_time": 0.4509707336425781} +{"epoch": 0, "iter": 15368, "iter_tflops": 47.4428649633554, "iter_time": 0.43486188125610353, "loss": 0.61370849609375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.051870434097445, "step_time": 0.40412022781372064} +{"epoch": 0, "iter": 15369, "iter_tflops": 36.09893588426404, "iter_time": 0.5715152816772461, "loss": 0.2510455548763275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.86814481224832, "step_time": 0.5307969703674316} +{"epoch": 0, "iter": 15370, "iter_tflops": 10.841413368253018, "iter_time": 1.9029892883300783, "loss": 0.2576892673969269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.999301816752027, "step_time": 1.5870924301147462} +{"epoch": 0, "iter": 15371, "iter_tflops": 12.730818886922048, "iter_time": 1.6205629577636718, "loss": 0.22923146188259125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.570699708122989, "step_time": 1.4159301834106446} +{"epoch": 0, "iter": 15372, "iter_tflops": 19.99200506130007, "iter_time": 1.0319672012329102, "loss": 0.26722452044487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.020759554359053, "step_time": 0.7635275192260742} +{"epoch": 0, "iter": 15373, "iter_tflops": 19.74228637084029, "iter_time": 0.7613259201049805, "loss": 0.266234427690506, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 20.93577611532229, "step_time": 0.7179248695373536} +{"epoch": 0, "iter": 15374, "iter_tflops": 13.960745210075789, "iter_time": 1.0766126098632813, "loss": 0.19863012433052063, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 16.65559115930919, "step_time": 0.902418544769287} +{"epoch": 0, "iter": 15375, "iter_tflops": 27.64543061169074, "iter_time": 0.5436816864013672, "loss": 0.26254209876060486, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.465519241850085, "step_time": 0.5100984039306641} +{"epoch": 0, "iter": 15376, "iter_tflops": 28.078825942942984, "iter_time": 0.5352899856567382, "loss": 0.16454795002937317, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.84839608278664, "step_time": 0.50355517578125} +{"epoch": 0, "iter": 15377, "iter_tflops": 33.13303142991598, "iter_time": 0.6226744918823242, "loss": 0.6023333668708801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.54707038392094, "step_time": 0.580388011932373} +{"epoch": 0, "iter": 15378, "iter_tflops": 35.18200844279095, "iter_time": 0.5864103393554687, "loss": 0.6777424812316895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.6699556922382, "step_time": 0.5335173816680908} +{"epoch": 0, "iter": 15379, "iter_tflops": 39.504307426942184, "iter_time": 0.5222492141723633, "loss": 0.5576918125152588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84620098956709, "step_time": 0.48151511764526367} +{"epoch": 0, "iter": 15380, "iter_tflops": 38.29744817949226, "iter_time": 0.5387067413330079, "loss": 0.6570830941200256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.22308245805882, "step_time": 0.5004743041992188} +{"epoch": 0, "iter": 15381, "iter_tflops": 17.519376067461778, "iter_time": 1.1776157684326174, "loss": 0.2935117185115814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.961933547663502, "step_time": 1.0880268859863282} +{"epoch": 0, "iter": 15382, "iter_tflops": 19.276368448366966, "iter_time": 1.0702790603637695, "loss": 0.16087637841701508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.1721855393661, "step_time": 0.8195988178253175} +{"epoch": 0, "iter": 15383, "iter_tflops": 46.1040244816689, "iter_time": 0.447490077972412, "loss": 0.3055327534675598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82976300375634, "step_time": 0.41403153991699226} +{"epoch": 0, "iter": 15384, "iter_tflops": 48.701562041759004, "iter_time": 0.4236228294372559, "loss": 0.2177218347787857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8211507651145, "step_time": 0.39058394622802733} +{"epoch": 0, "iter": 15385, "iter_tflops": 25.655621955035656, "iter_time": 0.8041548767089843, "loss": 0.34038692712783813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.023158442715946, "step_time": 0.7634597396850585} +{"epoch": 0, "iter": 15386, "iter_tflops": 14.080925330368878, "iter_time": 1.4651802368164064, "loss": 0.4349784553050995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.62340020125078, "step_time": 1.1706647567749022} +{"epoch": 0, "iter": 15387, "iter_tflops": 48.251948906782694, "iter_time": 0.4275701599121094, "loss": 0.38137730956077576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.09671884848224, "step_time": 0.3960152187347412} +{"epoch": 0, "iter": 15388, "iter_tflops": 50.37338784345945, "iter_time": 0.4095633506774903, "loss": 0.3489301800727844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.307371981129045, "step_time": 0.3798948974609375} +{"epoch": 0, "iter": 15389, "iter_tflops": 45.23434521501481, "iter_time": 0.45609355926513667, "loss": 0.5322750210762024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.746101453962865, "step_time": 0.4147278461456299} +{"epoch": 0, "iter": 15390, "iter_tflops": 44.86705788304067, "iter_time": 0.4598271980285645, "loss": 0.501914918422699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.810358635475225, "step_time": 0.4226785888671875} +{"epoch": 0, "iter": 15391, "iter_tflops": 50.27082306552871, "iter_time": 0.4103989601135254, "loss": 0.5170421600341797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.48208039505804, "step_time": 0.3786766834259033} +{"epoch": 0, "iter": 15392, "iter_tflops": 49.53606289101703, "iter_time": 0.4164863395690918, "loss": 0.5497602224349976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.31772925450145, "step_time": 0.3869462146759033} +{"epoch": 0, "iter": 15393, "iter_tflops": 33.45746125480432, "iter_time": 0.6166365509033203, "loss": 0.6391069293022156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.55249584366995, "step_time": 0.5802994422912598} +{"epoch": 0, "iter": 15394, "iter_tflops": 13.81422287036185, "iter_time": 1.493467544555664, "loss": 0.5091934204101562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.717373754474687, "step_time": 1.3126298217773438} +{"epoch": 0, "iter": 15395, "iter_tflops": 41.59581994788607, "iter_time": 0.49598958587646486, "loss": 0.6439917683601379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.752599566524275, "step_time": 0.4610032424926758} +{"epoch": 0, "iter": 15396, "iter_tflops": 42.95281986761389, "iter_time": 0.48031988525390623, "loss": 0.6835335493087769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.074171102045185, "step_time": 0.4477800254821777} +{"epoch": 0, "iter": 15397, "iter_tflops": 39.13012539198149, "iter_time": 0.5272432250976562, "loss": 0.058617573231458664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01862224053136, "step_time": 0.49099880981445315} +{"epoch": 0, "iter": 15398, "iter_tflops": 15.74040840261702, "iter_time": 1.3107089080810546, "loss": 0.04224274307489395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.02847798820848, "step_time": 0.8958947925567626} +{"epoch": 0, "iter": 15399, "iter_tflops": 43.51959816904604, "iter_time": 0.4740644302368164, "loss": 0.046138595789670944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.265797796953514, "step_time": 0.4274474773406982} +{"epoch": 0, "iter": 15400, "iter_tflops": 43.171776966543966, "iter_time": 0.47788381576538086, "loss": 0.03252173960208893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.50246137436355, "step_time": 0.43431630516052244} +{"epoch": 0, "iter": 15401, "iter_tflops": 13.53907400554858, "iter_time": 1.1433628692626951, "loss": 0.037012744694948196, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.465221853433247, "step_time": 1.0701581115722658} +{"epoch": 0, "iter": 15402, "iter_tflops": 21.482543994758924, "iter_time": 0.7205885162353515, "loss": 0.013394761830568314, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.7403790878862, "step_time": 0.578902582168579} +{"epoch": 0, "iter": 15403, "iter_tflops": 32.10464587721518, "iter_time": 0.4821755256652832, "loss": 0.03681832551956177, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 35.5427396551593, "step_time": 0.43553408241271974} +{"epoch": 0, "iter": 15404, "iter_tflops": 32.21252238809221, "iter_time": 0.4805607681274413, "loss": 0.01905512437224388, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 35.26805323550965, "step_time": 0.4389262542724609} +{"epoch": 0, "iter": 15405, "iter_tflops": 17.264094335802984, "iter_time": 1.1950290069580078, "loss": 0.36162957549095154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.762672806411345, "step_time": 1.0995817985534668} +{"epoch": 0, "iter": 15406, "iter_tflops": 24.94032187065929, "iter_time": 0.8272184143066407, "loss": 0.29028770327568054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.971650460155516, "step_time": 0.6661283206939698} +{"epoch": 0, "iter": 15407, "iter_tflops": 48.79713836306117, "iter_time": 0.4227931022644043, "loss": 0.308705598115921, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.015500699428706, "step_time": 0.3891521015167236} +{"epoch": 0, "iter": 15408, "iter_tflops": 49.89313805845523, "iter_time": 0.4135056304931641, "loss": 0.366071492433548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.03220708141926, "step_time": 0.38182955360412596} +{"epoch": 0, "iter": 15409, "iter_tflops": 19.35968880546906, "iter_time": 0.8249808807373047, "loss": 0.0579504556953907, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 20.35251957035284, "step_time": 0.7847369003295899} +{"epoch": 0, "iter": 15410, "iter_tflops": 10.81195650745227, "iter_time": 1.4771954650878907, "loss": 0.028457172214984894, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 12.81583867965216, "step_time": 1.2462214546203612} +{"epoch": 0, "iter": 15411, "iter_tflops": 32.12745755405954, "iter_time": 0.49712533569335937, "loss": 0.03235424682497978, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 35.49026450940124, "step_time": 0.45002124786376957} +{"epoch": 0, "iter": 15412, "iter_tflops": 36.787725182137024, "iter_time": 0.43414951705932625, "loss": 0.019739169627428055, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 40.47724262181446, "step_time": 0.39457660865783695} +{"epoch": 0, "iter": 15413, "iter_tflops": 33.541254439029274, "iter_time": 0.615096061706543, "loss": 0.3936861455440521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.0251728922809, "step_time": 0.5572180194854737} +{"epoch": 0, "iter": 15414, "iter_tflops": 46.27500202632221, "iter_time": 0.44583668518066405, "loss": 0.40981602668762207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55817676945948, "step_time": 0.4001517276763916} +{"epoch": 0, "iter": 15415, "iter_tflops": 47.99043741163211, "iter_time": 0.42990009307861327, "loss": 0.27454501390457153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.91151871772856, "step_time": 0.3974280471801758} +{"epoch": 0, "iter": 15416, "iter_tflops": 48.25212196616921, "iter_time": 0.42756862640380855, "loss": 0.427343487739563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.98537592550386, "step_time": 0.3968634090423584} +{"epoch": 0, "iter": 15417, "iter_tflops": 30.03380202190479, "iter_time": 0.6869291305541992, "loss": 0.1142701581120491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.02852039361021, "step_time": 0.6441475677490234} +{"epoch": 0, "iter": 15418, "iter_tflops": 13.839970567230026, "iter_time": 1.4906891174316406, "loss": 0.18749572336673737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.37205728619828, "step_time": 1.1876022033691405} +{"epoch": 0, "iter": 15419, "iter_tflops": 49.52103731404711, "iter_time": 0.41661270904541015, "loss": 0.2538474202156067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.351702160374394, "step_time": 0.3795850486755371} +{"epoch": 0, "iter": 15420, "iter_tflops": 50.902294479441075, "iter_time": 0.40530773162841804, "loss": 0.22966891527175903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.611144464310414, "step_time": 0.3709884719848633} +{"epoch": 0, "iter": 15421, "iter_tflops": 29.42971175976329, "iter_time": 0.701029411315918, "loss": 0.6299322247505188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.574903443406587, "step_time": 0.6534016342163085} +{"epoch": 0, "iter": 15422, "iter_tflops": 16.52228754853883, "iter_time": 1.248682632446289, "loss": 0.6768408417701721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.498822130508994, "step_time": 1.0580687065124512} +{"epoch": 0, "iter": 15423, "iter_tflops": 41.49944230050805, "iter_time": 0.4971414642333985, "loss": 0.5633514523506165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65400172336491, "step_time": 0.46202115631103513} +{"epoch": 0, "iter": 15424, "iter_tflops": 44.527417260705526, "iter_time": 0.4633346099853516, "loss": 0.7514521479606628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.96738920807003, "step_time": 0.4301066589355469} +{"epoch": 0, "iter": 15425, "iter_tflops": 43.71648552339082, "iter_time": 0.471929370880127, "loss": 0.09565841406583786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72205252029755, "step_time": 0.43231781578063966} +{"epoch": 0, "iter": 15426, "iter_tflops": 43.88918576650428, "iter_time": 0.47007236862182616, "loss": 0.025128314271569252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.542151931970324, "step_time": 0.42501398658752443} +{"epoch": 0, "iter": 15427, "iter_tflops": 48.67745348228436, "iter_time": 0.4238326377868652, "loss": 0.048674631863832474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.16274614788878, "step_time": 0.38807426261901856} +{"epoch": 0, "iter": 15428, "iter_tflops": 56.032341124799515, "iter_time": 0.3681997413635254, "loss": 0.060946159064769745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.0771558379103, "step_time": 0.3377873973846436} +{"epoch": 0, "iter": 15429, "iter_tflops": 25.21733642521415, "iter_time": 0.6528584518432617, "loss": 0.006154726259410381, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 26.934421278503752, "step_time": 0.6112383499145508} +{"epoch": 0, "iter": 15430, "iter_tflops": 10.105052429201509, "iter_time": 1.6292197723388673, "loss": 0.028233753517270088, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 12.695377497407275, "step_time": 1.2967988719940187} +{"epoch": 0, "iter": 15431, "iter_tflops": 33.40687968990384, "iter_time": 0.4928131980895996, "loss": 0.021268168464303017, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 37.05522920864034, "step_time": 0.4442922515869141} +{"epoch": 0, "iter": 15432, "iter_tflops": 41.308456529014805, "iter_time": 0.3985467529296875, "loss": 0.00805822666734457, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 45.654765213211185, "step_time": 0.36060531997680667} +{"epoch": 0, "iter": 15433, "iter_tflops": 19.42889421578998, "iter_time": 1.0618768768310547, "loss": 0.34409332275390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.028102503554653, "step_time": 0.9811200752258301} +{"epoch": 0, "iter": 15434, "iter_tflops": 22.205296054375044, "iter_time": 0.9291068878173828, "loss": 0.28479480743408203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.61153415062272, "step_time": 0.673964702606201} +{"epoch": 0, "iter": 15435, "iter_tflops": 45.24064079877427, "iter_time": 0.45603009033203123, "loss": 0.28397294878959656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.517894479809726, "step_time": 0.4252264804840088} +{"epoch": 0, "iter": 15436, "iter_tflops": 47.549346715091154, "iter_time": 0.433888053894043, "loss": 0.3536541163921356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49294256155274, "step_time": 0.40065866279602047} +{"epoch": 0, "iter": 15437, "iter_tflops": 26.623821323228228, "iter_time": 0.7749110565185546, "loss": 0.3287760019302368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.236381411005034, "step_time": 0.7306564254760742} +{"epoch": 0, "iter": 15438, "iter_tflops": 14.56660503638607, "iter_time": 1.4163282012939453, "loss": 0.2843496799468994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.646232801033037, "step_time": 1.1691500244140627} +{"epoch": 0, "iter": 15439, "iter_tflops": 41.58839664102967, "iter_time": 0.4960781173706055, "loss": 0.28158918023109436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.57209626152095, "step_time": 0.4527132873535156} +{"epoch": 0, "iter": 15440, "iter_tflops": 44.70232660817859, "iter_time": 0.4615216941833496, "loss": 0.2526649534702301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1037400234039, "step_time": 0.42015320014953617} +{"epoch": 0, "iter": 15441, "iter_tflops": 32.29346425878716, "iter_time": 0.6388628158569336, "loss": 0.05002616345882416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.9964314995938, "step_time": 0.5895199203491212} +{"epoch": 0, "iter": 15442, "iter_tflops": 9.198171578016993, "iter_time": 2.2429559326171873, "loss": 0.020576266571879387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.790462013851458, "step_time": 1.9119749908447263} +{"epoch": 0, "iter": 15443, "iter_tflops": 10.043028168185163, "iter_time": 2.054270202636719, "loss": 0.02089453674852848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.926644005395174, "step_time": 1.7298322563171387} +{"epoch": 0, "iter": 15444, "iter_tflops": 29.520595417318727, "iter_time": 0.6988711853027344, "loss": 0.03203893080353737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.89588177306358, "step_time": 0.4495194931030273} +{"epoch": 0, "iter": 15445, "iter_tflops": 12.439937569643735, "iter_time": 1.1918119812011718, "loss": 0.257904052734375, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 13.28279972318044, "step_time": 1.1161853637695314} +{"epoch": 0, "iter": 15446, "iter_tflops": 13.077013492290114, "iter_time": 1.1337501983642577, "loss": 0.12423937022686005, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.689893482592225, "step_time": 0.9449437408447265} +{"epoch": 0, "iter": 15447, "iter_tflops": 22.358976414515855, "iter_time": 0.6630923690795898, "loss": 0.351910799741745, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 23.88720600638397, "step_time": 0.6206697692871094} +{"epoch": 0, "iter": 15448, "iter_tflops": 22.394659311231788, "iter_time": 0.6620358200073241, "loss": 0.25215721130371094, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.002582845772913, "step_time": 0.6176863021850586} +{"epoch": 0, "iter": 15449, "iter_tflops": 11.844312507478234, "iter_time": 1.0867064514160156, "loss": 0.01706991158425808, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 12.926867309922628, "step_time": 0.9957006988525391} +{"epoch": 0, "iter": 15450, "iter_tflops": 11.827258418593244, "iter_time": 1.088273406982422, "loss": 0.03944569453597069, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 15.686825462829715, "step_time": 0.8205159702301026} +{"epoch": 0, "iter": 15451, "iter_tflops": 33.98115319322558, "iter_time": 0.3787773399353027, "loss": 0.02953244373202324, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 37.26230233906082, "step_time": 0.3454239273071289} +{"epoch": 0, "iter": 15452, "iter_tflops": 32.169249137497815, "iter_time": 0.4001116333007813, "loss": 0.01791071891784668, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 35.19305914052361, "step_time": 0.3657337875366211} +{"epoch": 0, "iter": 15453, "iter_tflops": 28.859219610977025, "iter_time": 0.7148874359130859, "loss": 0.11749304085969925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.587720417874273, "step_time": 0.6744894104003907} +{"epoch": 0, "iter": 15454, "iter_tflops": 15.636772434367261, "iter_time": 1.3193959045410155, "loss": 0.22182705998420715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.649124676262254, "step_time": 1.1062767753601073} +{"epoch": 0, "iter": 15455, "iter_tflops": 48.178740487278795, "iter_time": 0.42821986007690427, "loss": 0.1568801999092102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14104674348588, "step_time": 0.39567854499816896} +{"epoch": 0, "iter": 15456, "iter_tflops": 49.23885079627505, "iter_time": 0.4190003051757813, "loss": 0.11634112149477005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03986790085122, "step_time": 0.38897332000732426} +{"epoch": 0, "iter": 15457, "iter_tflops": 43.3891783560522, "iter_time": 0.4754893798828125, "loss": 0.31937485933303833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.30250449721929, "step_time": 0.4361522445678711} +{"epoch": 0, "iter": 15458, "iter_tflops": 36.051232148026145, "iter_time": 0.5722715225219727, "loss": 0.448316752910614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45169049682678, "step_time": 0.5100180797576904} +{"epoch": 0, "iter": 15459, "iter_tflops": 43.85044167584211, "iter_time": 0.4704877014160156, "loss": 0.3066669702529907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.93753588981133, "step_time": 0.43037450981140135} +{"epoch": 0, "iter": 15460, "iter_tflops": 45.8416046068244, "iter_time": 0.4500517311096191, "loss": 0.41352155804634094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.002225163736966, "step_time": 0.4126035079956054} +{"epoch": 0, "iter": 15461, "iter_tflops": 26.303947035901224, "iter_time": 0.7433716583251953, "loss": 0.15249520540237427, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 29.31560188891626, "step_time": 0.6670034885406495} +{"epoch": 0, "iter": 15462, "iter_tflops": 45.94849724708812, "iter_time": 0.4255549125671387, "loss": 0.10908260941505432, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 50.846330442374274, "step_time": 0.38456282997131347} +{"epoch": 0, "iter": 15463, "iter_tflops": 45.01032470097949, "iter_time": 0.43442496490478516, "loss": 0.15410606563091278, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 48.88947946878192, "step_time": 0.3999553470611573} +{"epoch": 0, "iter": 15464, "iter_tflops": 49.23002394514688, "iter_time": 0.39718869018554687, "loss": 0.13832604885101318, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 53.59654480240206, "step_time": 0.36482965087890623} +{"epoch": 0, "iter": 15465, "iter_tflops": 32.37125389890124, "iter_time": 0.6373275985717773, "loss": 0.1367836743593216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.48312577314334, "step_time": 0.5982953414916992} +{"epoch": 0, "iter": 15466, "iter_tflops": 14.503588508005125, "iter_time": 1.422481994628906, "loss": 0.07176749408245087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.78944036053594, "step_time": 1.0980153274536133} +{"epoch": 0, "iter": 15467, "iter_tflops": 38.737035992195395, "iter_time": 0.5325934982299805, "loss": 0.10561637580394745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.610955658233976, "step_time": 0.48417345237731924} +{"epoch": 0, "iter": 15468, "iter_tflops": 43.62689331661415, "iter_time": 0.4728985252380371, "loss": 0.12359735369682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.94504147954105, "step_time": 0.4303071365356445} +{"epoch": 0, "iter": 15469, "iter_tflops": 20.747344774307916, "iter_time": 0.9943968124389649, "loss": 0.555881142616272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.909989876149833, "step_time": 0.9416295318603515} +{"epoch": 0, "iter": 15470, "iter_tflops": 15.606550195855302, "iter_time": 1.321950927734375, "loss": 0.537658154964447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.138687588497227, "step_time": 1.1374082832336425} +{"epoch": 0, "iter": 15471, "iter_tflops": 44.390595182318876, "iter_time": 0.4647627143859863, "loss": 0.43890875577926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.94363529075828, "step_time": 0.4303197574615478} +{"epoch": 0, "iter": 15472, "iter_tflops": 45.734113217042605, "iter_time": 0.45110951232910157, "loss": 0.6044028997421265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1935306473919, "step_time": 0.41938631439208984} +{"epoch": 0, "iter": 15473, "iter_tflops": 17.754077677079316, "iter_time": 1.1620481719970701, "loss": 0.35423552989959717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.49737719000366, "step_time": 1.1153523712158202} +{"epoch": 0, "iter": 15474, "iter_tflops": 16.21849892039714, "iter_time": 1.2720717010498046, "loss": 0.3120245933532715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.229382499443673, "step_time": 0.9281001625061036} +{"epoch": 0, "iter": 15475, "iter_tflops": 41.93785841852398, "iter_time": 0.49194437408447256, "loss": 0.24709928035736084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08326061255396, "step_time": 0.4576220359802246} +{"epoch": 0, "iter": 15476, "iter_tflops": 46.143217575199934, "iter_time": 0.44710998916625977, "loss": 0.31830495595932007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.101155941509354, "step_time": 0.41178877258300783} +{"epoch": 0, "iter": 15477, "iter_tflops": 31.047591483341677, "iter_time": 0.6644990005493163, "loss": 0.23673082888126373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.19443030590004, "step_time": 0.6215227470397948} +{"epoch": 0, "iter": 15478, "iter_tflops": 11.935398786610142, "iter_time": 1.7285634002685548, "loss": 0.18106625974178314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.600705886320702, "step_time": 1.5169134368896484} +{"epoch": 0, "iter": 15479, "iter_tflops": 17.141220437038424, "iter_time": 1.2035953674316406, "loss": 0.2933116853237152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.852321553567926, "step_time": 1.039228256225586} +{"epoch": 0, "iter": 15480, "iter_tflops": 17.598647278149468, "iter_time": 1.1723113250732422, "loss": 0.16606049239635468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.938618951524777, "step_time": 0.8272749004364013} +{"epoch": 0, "iter": 15481, "iter_tflops": 16.619639461920364, "iter_time": 0.8920811233520508, "loss": 0.2540760636329651, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.500265318406427, "step_time": 0.8471909637451173} +{"epoch": 0, "iter": 15482, "iter_tflops": 8.673021561613407, "iter_time": 1.709446533203125, "loss": 0.07724485546350479, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 9.955059442902023, "step_time": 1.489299659729004} +{"epoch": 0, "iter": 15483, "iter_tflops": 9.220784288647152, "iter_time": 1.607896484375, "loss": 0.3128429055213928, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 10.705403213812732, "step_time": 1.3849143600463867} +{"epoch": 0, "iter": 15484, "iter_tflops": 22.91752285921505, "iter_time": 0.6469314651489257, "loss": 0.18185889720916748, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.75629918652887, "step_time": 0.5341514205932617} +{"epoch": 0, "iter": 15485, "iter_tflops": 18.322754500165896, "iter_time": 0.8426224136352539, "loss": 0.13297051191329956, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 19.358549429454055, "step_time": 0.7975372161865234} +{"epoch": 0, "iter": 15486, "iter_tflops": 11.500885612196623, "iter_time": 1.3424325866699218, "loss": 0.1349889039993286, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 14.751370988873205, "step_time": 1.046625675201416} +{"epoch": 0, "iter": 15487, "iter_tflops": 27.88578039518239, "iter_time": 0.5536572189331055, "loss": 0.17252381145954132, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.747026308144562, "step_time": 0.5190153617858887} +{"epoch": 0, "iter": 15488, "iter_tflops": 27.16128018751947, "iter_time": 0.5684254760742187, "loss": 0.2567727267742157, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 28.91675872078432, "step_time": 0.5339175033569336} +{"epoch": 0, "iter": 15489, "iter_tflops": 27.862929772181737, "iter_time": 0.7404495391845702, "loss": 0.7651439309120178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.723111484228532, "step_time": 0.6941094818115234} +{"epoch": 0, "iter": 15490, "iter_tflops": 17.82301819609922, "iter_time": 1.157553298950195, "loss": 0.5592824816703796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.732515648494932, "step_time": 0.9493191604614258} +{"epoch": 0, "iter": 15491, "iter_tflops": 45.36634230955849, "iter_time": 0.45476651763916015, "loss": 0.8076691031455994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0688271366626, "step_time": 0.42045214271545406} +{"epoch": 0, "iter": 15492, "iter_tflops": 45.827471619103115, "iter_time": 0.4501905250549316, "loss": 0.566478967666626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62629479474382, "step_time": 0.41572907257080083} +{"epoch": 0, "iter": 15493, "iter_tflops": 24.681913290622884, "iter_time": 0.835879020690918, "loss": 0.5983041524887085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.903128831590262, "step_time": 0.7964710998535157} +{"epoch": 0, "iter": 15494, "iter_tflops": 14.877991009409897, "iter_time": 1.3866854400634765, "loss": 0.5364123582839966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.367952457816056, "step_time": 1.065218099594116} +{"epoch": 0, "iter": 15495, "iter_tflops": 37.52009772304105, "iter_time": 0.5498677978515626, "loss": 0.50252366065979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.765914603346765, "step_time": 0.5060868549346923} +{"epoch": 0, "iter": 15496, "iter_tflops": 41.073917359656136, "iter_time": 0.5022918395996093, "loss": 0.6019390821456909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.75494102211523, "step_time": 0.4609791240692139} +{"epoch": 0, "iter": 15497, "iter_tflops": 19.41929802266843, "iter_time": 1.062401611328125, "loss": 0.41217535734176636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.501552951248218, "step_time": 1.006318572998047} +{"epoch": 0, "iter": 15498, "iter_tflops": 27.86578864247829, "iter_time": 0.7403735733032226, "loss": 0.35685643553733826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.33054657207576, "step_time": 0.6009544143676758} +{"epoch": 0, "iter": 15499, "iter_tflops": 41.351990487361505, "iter_time": 0.49891415786743165, "loss": 0.4818425476551056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27292911564507, "step_time": 0.45570485305786135} +{"epoch": 0, "iter": 15500, "iter_tflops": 40.41944111790319, "iter_time": 0.510425006866455, "loss": 0.2668169438838959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.0896186558055, "step_time": 0.4679354038238525} +{"epoch": 0, "iter": 15501, "iter_tflops": 21.646491531864292, "iter_time": 0.953091796875, "loss": 0.7980397939682007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.963921275241436, "step_time": 0.8984133529663086} +{"epoch": 0, "iter": 15502, "iter_tflops": 17.371151128661538, "iter_time": 1.1876641540527344, "loss": 0.6212970018386841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.6427680209657, "step_time": 0.9111559810638428} +{"epoch": 0, "iter": 15503, "iter_tflops": 37.57022452896032, "iter_time": 0.5491341552734376, "loss": 0.697762131690979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.91173236149458, "step_time": 0.5042830581665039} +{"epoch": 0, "iter": 15504, "iter_tflops": 39.20667126972366, "iter_time": 0.526213851928711, "loss": 0.5274800062179565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.608855817814046, "step_time": 0.48419731330871585} +{"epoch": 0, "iter": 15505, "iter_tflops": 37.905246965057685, "iter_time": 0.5442806777954101, "loss": 0.06030594930052757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.10607225467063, "step_time": 0.48997905540466313} +{"epoch": 0, "iter": 15506, "iter_tflops": 36.80467207568681, "iter_time": 0.5605563735961914, "loss": 0.06405479460954666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.29392507768616, "step_time": 0.49961570549011225} +{"epoch": 0, "iter": 15507, "iter_tflops": 45.855472212537876, "iter_time": 0.4499156265258789, "loss": 0.037408001720905304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.401596700484525, "step_time": 0.40933412551879883} +{"epoch": 0, "iter": 15508, "iter_tflops": 42.876788665006316, "iter_time": 0.48117161178588863, "loss": 0.10139831900596619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.938575509576765, "step_time": 0.4395338649749756} +{"epoch": 0, "iter": 15509, "iter_tflops": 18.5882671704891, "iter_time": 1.109898696899414, "loss": 0.7420917749404907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.916165732646153, "step_time": 1.035896858215332} +{"epoch": 0, "iter": 15510, "iter_tflops": 16.78130751828953, "iter_time": 1.2294091796875, "loss": 0.5785160064697266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.98253023505039, "step_time": 0.9385222396850585} +{"epoch": 0, "iter": 15511, "iter_tflops": 33.00145574451572, "iter_time": 0.6251570739746094, "loss": 0.5773900151252747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.74689839742794, "step_time": 0.5771435966491699} +{"epoch": 0, "iter": 15512, "iter_tflops": 38.75815562713029, "iter_time": 0.5323032836914062, "loss": 0.5493130087852478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20429012275913, "step_time": 0.48883877563476563} +{"epoch": 0, "iter": 15513, "iter_tflops": 33.00578867984722, "iter_time": 0.6250750045776367, "loss": 0.08211849629878998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.66001883869497, "step_time": 0.5627682189941406} +{"epoch": 0, "iter": 15514, "iter_tflops": 40.2905617658197, "iter_time": 0.5120577278137207, "loss": 0.05727093294262886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19855939319406, "step_time": 0.4564546699523926} +{"epoch": 0, "iter": 15515, "iter_tflops": 43.84587455751395, "iter_time": 0.47053670883178716, "loss": 0.05880779027938843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.14398516307958, "step_time": 0.4285289936065675} +{"epoch": 0, "iter": 15516, "iter_tflops": 43.093875990682186, "iter_time": 0.47874768829345704, "loss": 0.06062689796090126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.387196567223306, "step_time": 0.43537273788452147} +{"epoch": 0, "iter": 15517, "iter_tflops": 18.43059503542983, "iter_time": 1.1193937835693357, "loss": 0.133397176861763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.533581262067017, "step_time": 1.056185920715332} +{"epoch": 0, "iter": 15518, "iter_tflops": 14.80710988113105, "iter_time": 1.3933234558105467, "loss": 0.139332115650177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.778159821814025, "step_time": 1.0986749343872069} +{"epoch": 0, "iter": 15519, "iter_tflops": 43.69625921636159, "iter_time": 0.47214781951904294, "loss": 0.08699384331703186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.14028618918643, "step_time": 0.43765312385559085} +{"epoch": 0, "iter": 15520, "iter_tflops": 51.42199258134094, "iter_time": 0.4012114753723145, "loss": 0.1165766566991806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.4257310234204, "step_time": 0.37222952461242675} +{"epoch": 0, "iter": 15521, "iter_tflops": 37.13673064802821, "iter_time": 0.5555441513061523, "loss": 0.2756862938404083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.172962511133385, "step_time": 0.5135566864013672} +{"epoch": 0, "iter": 15522, "iter_tflops": 15.205804437870718, "iter_time": 1.3567906646728516, "loss": 0.2954891324043274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91955835856538, "step_time": 1.0904638004302978} +{"epoch": 0, "iter": 15523, "iter_tflops": 38.190579049479496, "iter_time": 0.5402142105102539, "loss": 0.3042532801628113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78314930491491, "step_time": 0.4937658805847168} +{"epoch": 0, "iter": 15524, "iter_tflops": 38.15382457668982, "iter_time": 0.5407346115112305, "loss": 0.22190764546394348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.33927126866544, "step_time": 0.499067663192749} +{"epoch": 0, "iter": 15525, "iter_tflops": 22.72653454344912, "iter_time": 0.9077975997924805, "loss": 0.08013208955526352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.079258930933808, "step_time": 0.8567993545532226} +{"epoch": 0, "iter": 15526, "iter_tflops": 23.041526894888822, "iter_time": 0.8953874282836913, "loss": 0.10180293768644333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.218162175147615, "step_time": 0.7061051063537598} +{"epoch": 0, "iter": 15527, "iter_tflops": 45.973995420238616, "iter_time": 0.4487557220458984, "loss": 0.15564456582069397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88759670624103, "step_time": 0.41355156135559085} +{"epoch": 0, "iter": 15528, "iter_tflops": 49.8808190197313, "iter_time": 0.4136077537536621, "loss": 0.08215328305959702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.4622780641173, "step_time": 0.3788143692016602} +{"epoch": 0, "iter": 15529, "iter_tflops": 24.09743569016986, "iter_time": 0.8561530685424804, "loss": 0.3131452798843384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.395123272626947, "step_time": 0.8124037551879884} +{"epoch": 0, "iter": 15530, "iter_tflops": 14.555973052928515, "iter_time": 1.4173627166748046, "loss": 0.4125748574733734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.00156943339565, "step_time": 1.0857573413848876} +{"epoch": 0, "iter": 15531, "iter_tflops": 47.74643287957661, "iter_time": 0.43209706497192374, "loss": 0.3777461647987366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.016574261857905, "step_time": 0.3966253795623779} +{"epoch": 0, "iter": 15532, "iter_tflops": 52.72433068618903, "iter_time": 0.3913011932373047, "loss": 0.3034692406654358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.31118591341917, "step_time": 0.3599837131500244} +{"epoch": 0, "iter": 15533, "iter_tflops": 31.098677426502643, "iter_time": 0.6634074249267577, "loss": 0.4205192029476166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.12206888881429, "step_time": 0.6228805809020996} +{"epoch": 0, "iter": 15534, "iter_tflops": 9.739633134742421, "iter_time": 2.1182618713378907, "loss": 0.2602323293685913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.370130746217399, "step_time": 1.667815315246582} +{"epoch": 0, "iter": 15535, "iter_tflops": 13.058444460636887, "iter_time": 1.5799043731689455, "loss": 0.2837027907371521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.167335111836692, "step_time": 1.2760973510742186} +{"epoch": 0, "iter": 15536, "iter_tflops": 23.32097427485789, "iter_time": 0.8846583023071289, "loss": 0.29730603098869324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.960773080049986, "step_time": 0.712380620956421} +{"epoch": 0, "iter": 15537, "iter_tflops": 22.014529207251627, "iter_time": 0.6456659240722655, "loss": 0.14808762073516846, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 23.748247573213654, "step_time": 0.5985296936035156} +{"epoch": 0, "iter": 15538, "iter_tflops": 25.404829794630164, "iter_time": 0.5595011444091796, "loss": 0.2271864116191864, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 27.121694120707083, "step_time": 0.5240834617614746} +{"epoch": 0, "iter": 15539, "iter_tflops": 24.30406906435811, "iter_time": 0.5848416290283203, "loss": 0.2244841605424881, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 25.827678776730014, "step_time": 0.5503410301208496} +{"epoch": 0, "iter": 15540, "iter_tflops": 25.192014704273785, "iter_time": 0.564227653503418, "loss": 0.13906000554561615, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 26.79247848035601, "step_time": 0.5305231971740723} +{"epoch": 0, "iter": 15541, "iter_tflops": 23.658278880737875, "iter_time": 0.87204541015625, "loss": 0.6479552984237671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.88704497204845, "step_time": 0.8289892807006836} +{"epoch": 0, "iter": 15542, "iter_tflops": 23.868264589938775, "iter_time": 0.864373420715332, "loss": 0.749133288860321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.33902498717583, "step_time": 0.6188271408081055} +{"epoch": 0, "iter": 15543, "iter_tflops": 38.55771920180218, "iter_time": 0.5350703811645507, "loss": 0.6185694932937622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.06595642884715, "step_time": 0.49044631958007806} +{"epoch": 0, "iter": 15544, "iter_tflops": 36.997885464546606, "iter_time": 0.5576289901733398, "loss": 0.3977232277393341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13028009579779, "step_time": 0.5141029033660889} +{"epoch": 0, "iter": 15545, "iter_tflops": 8.701247360534959, "iter_time": 1.1855250549316407, "loss": 0.01131876278668642, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 9.196764106292663, "step_time": 1.1216495971679687} +{"epoch": 0, "iter": 15546, "iter_tflops": 10.205312992928807, "iter_time": 1.0108016052246094, "loss": 0.012124977074563503, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 13.568175955516306, "step_time": 0.760275131225586} +{"epoch": 0, "iter": 15547, "iter_tflops": 31.735922676506917, "iter_time": 0.32504322814941405, "loss": 0.013081705197691917, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 35.72498438220632, "step_time": 0.2887488107681274} +{"epoch": 0, "iter": 15548, "iter_tflops": 32.025746058525165, "iter_time": 0.32210168457031246, "loss": 0.027417879551649094, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 35.37985921175217, "step_time": 0.29156551170349126} +{"epoch": 0, "iter": 15549, "iter_tflops": 47.48030935682592, "iter_time": 0.4345189361572266, "loss": 0.025458019226789474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43862812545593, "step_time": 0.39343312835693356} +{"epoch": 0, "iter": 15550, "iter_tflops": 48.896581365069316, "iter_time": 0.4219332504272461, "loss": 0.030619069933891296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.60292453321291, "step_time": 0.3848874607086182} +{"epoch": 0, "iter": 15551, "iter_tflops": 49.77405146512699, "iter_time": 0.4144949607849121, "loss": 0.04706760123372078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.28646736280617, "step_time": 0.3800411872863769} +{"epoch": 0, "iter": 15552, "iter_tflops": 57.5589253081203, "iter_time": 0.3584343070983887, "loss": 0.028698813170194626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.68825623882592, "step_time": 0.32910619544982905} +{"epoch": 0, "iter": 15553, "iter_tflops": 40.82399744601695, "iter_time": 0.505366813659668, "loss": 0.1720358282327652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35822728321491, "step_time": 0.4651018486022949} +{"epoch": 0, "iter": 15554, "iter_tflops": 49.17044094135787, "iter_time": 0.41958325195312496, "loss": 0.220145121216774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.18699388403397, "step_time": 0.3807388458251953} +{"epoch": 0, "iter": 15555, "iter_tflops": 54.442373116153945, "iter_time": 0.37895286941528317, "loss": 0.13290193676948547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.12123342746617, "step_time": 0.34896250152587893} +{"epoch": 0, "iter": 15556, "iter_tflops": 44.13084550980956, "iter_time": 0.4674982604980468, "loss": 0.14341600239276886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90315409031655, "step_time": 0.4306834049224853} +{"epoch": 0, "iter": 15557, "iter_tflops": 26.04315132147268, "iter_time": 0.7921888275146484, "loss": 0.05418558046221733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.47695902662321, "step_time": 0.7508506851196289} +{"epoch": 0, "iter": 15558, "iter_tflops": 11.849840968873064, "iter_time": 1.7410439147949217, "loss": 0.13957425951957703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.151896934220595, "step_time": 1.2028461685180662} +{"epoch": 0, "iter": 15559, "iter_tflops": 39.96767609615332, "iter_time": 0.5161944732666016, "loss": 0.09881889075040817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70254320803621, "step_time": 0.4720799293518066} +{"epoch": 0, "iter": 15560, "iter_tflops": 41.53010805382637, "iter_time": 0.4967743759155273, "loss": 0.13668014109134674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41910596362031, "step_time": 0.4542382125854493} +{"epoch": 0, "iter": 15561, "iter_tflops": 30.659585403365483, "iter_time": 0.6081094055175781, "loss": 0.015497165732085705, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 34.32330392436246, "step_time": 0.543198938369751} +{"epoch": 0, "iter": 15562, "iter_tflops": 37.21230994834809, "iter_time": 0.5010272750854492, "loss": 0.03271453455090523, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 41.453768764137514, "step_time": 0.44976326179504394} +{"epoch": 0, "iter": 15563, "iter_tflops": 42.3772292550279, "iter_time": 0.4399622764587402, "loss": 0.02938712388277054, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 46.8377489267762, "step_time": 0.39806315803527825} +{"epoch": 0, "iter": 15564, "iter_tflops": 39.122031272801166, "iter_time": 0.4765698928833008, "loss": 0.038942575454711914, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 43.229053233924944, "step_time": 0.4312928657531739} +{"epoch": 0, "iter": 15565, "iter_tflops": 18.492058199309078, "iter_time": 1.1156731872558594, "loss": 0.2662835717201233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.652227358491324, "step_time": 1.0498094253540038} +{"epoch": 0, "iter": 15566, "iter_tflops": 14.33986760858652, "iter_time": 1.4387227325439453, "loss": 0.21185629069805145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.331713369752165, "step_time": 1.1903666458129882} +{"epoch": 0, "iter": 15567, "iter_tflops": 41.97654667917934, "iter_time": 0.491490966796875, "loss": 0.2521132528781891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53752604191324, "step_time": 0.4530569686889649} +{"epoch": 0, "iter": 15568, "iter_tflops": 50.50049022307181, "iter_time": 0.40853253936767575, "loss": 0.20192572474479675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81949636380894, "step_time": 0.3763459148406983} +{"epoch": 0, "iter": 15569, "iter_tflops": 35.50146724522254, "iter_time": 0.581133544921875, "loss": 0.05335291102528572, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.243622008356915, "step_time": 0.5394649467468261} +{"epoch": 0, "iter": 15570, "iter_tflops": 41.37760563870215, "iter_time": 0.49860530090332034, "loss": 0.01604386977851391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.55973161369359, "step_time": 0.45283615112304687} +{"epoch": 0, "iter": 15571, "iter_tflops": 46.049423177744586, "iter_time": 0.4480206718444824, "loss": 0.021496329456567764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8649202563121, "step_time": 0.405605541229248} +{"epoch": 0, "iter": 15572, "iter_tflops": 44.51636956894354, "iter_time": 0.46344959640502925, "loss": 0.051892075687646866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.05396216565297, "step_time": 0.42057955360412597} +{"epoch": 0, "iter": 15573, "iter_tflops": 29.98428477209051, "iter_time": 0.6880635528564454, "loss": 0.6413708329200745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.24383417539255, "step_time": 0.6205990982055665} +{"epoch": 0, "iter": 15574, "iter_tflops": 35.200048118162066, "iter_time": 0.5861098098754883, "loss": 0.6290403008460999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.87450463610701, "step_time": 0.5307101325988769} +{"epoch": 0, "iter": 15575, "iter_tflops": 41.94277634201109, "iter_time": 0.4918866920471192, "loss": 0.7013585567474365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86068945696843, "step_time": 0.44986444282531735} +{"epoch": 0, "iter": 15576, "iter_tflops": 41.776729378420455, "iter_time": 0.4938417587280274, "loss": 0.5526161789894104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.33071064118305, "step_time": 0.45512398147583005} +{"epoch": 0, "iter": 15577, "iter_tflops": 20.045538422711488, "iter_time": 1.0292112426757811, "loss": 0.44915610551834106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.603083951349223, "step_time": 0.9550068664550782} +{"epoch": 0, "iter": 15578, "iter_tflops": 40.89825149305544, "iter_time": 0.5044492797851563, "loss": 0.4948206841945648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48954339603777, "step_time": 0.46372904586791985} +{"epoch": 0, "iter": 15579, "iter_tflops": 49.07080499019341, "iter_time": 0.42043519592285156, "loss": 0.4402247369289398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76041323030195, "step_time": 0.3910335845947266} +{"epoch": 0, "iter": 15580, "iter_tflops": 48.60968048590354, "iter_time": 0.42442355728149417, "loss": 0.3232925534248352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.24780579578383, "step_time": 0.3948700466156006} +{"epoch": 0, "iter": 15581, "iter_tflops": 38.252489049935306, "iter_time": 0.5393398971557617, "loss": 0.3536125421524048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3667606080388, "step_time": 0.4987360191345214} +{"epoch": 0, "iter": 15582, "iter_tflops": 8.89147224670608, "iter_time": 2.3203236694335936, "loss": 0.2807120084762573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.747906705924201, "step_time": 1.9195452728271483} +{"epoch": 0, "iter": 15583, "iter_tflops": 11.710375279746765, "iter_time": 1.761779022216797, "loss": 0.3693495988845825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.541739432965052, "step_time": 1.5235187187194825} +{"epoch": 0, "iter": 15584, "iter_tflops": 32.95901558467247, "iter_time": 0.6259620666503906, "loss": 0.26759645342826843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.39451172167046, "step_time": 0.48664538574218746} +{"epoch": 0, "iter": 15585, "iter_tflops": 17.26702923770473, "iter_time": 0.910732162475586, "loss": 0.24156548082828522, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 18.785441318412225, "step_time": 0.8371184158325194} +{"epoch": 0, "iter": 15586, "iter_tflops": 27.800533544248857, "iter_time": 0.5656596069335939, "loss": 0.1890111118555069, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 29.707641464063077, "step_time": 0.5293465957641601} +{"epoch": 0, "iter": 15587, "iter_tflops": 26.377589000602754, "iter_time": 0.5961742324829101, "loss": 0.13228271901607513, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 28.106049901020047, "step_time": 0.559510814666748} +{"epoch": 0, "iter": 15588, "iter_tflops": 29.393016271773554, "iter_time": 0.5350127639770508, "loss": 0.22778788208961487, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 31.157649851676805, "step_time": 0.5047119712829591} +{"epoch": 0, "iter": 15589, "iter_tflops": 30.152608500415745, "iter_time": 0.6842225112915039, "loss": 0.11429642140865326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.02860176467116, "step_time": 0.6441459312438964} +{"epoch": 0, "iter": 15590, "iter_tflops": 15.386292472304397, "iter_time": 1.3408749084472655, "loss": 0.13380850851535797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.15704103392879, "step_time": 1.1362585716247557} +{"epoch": 0, "iter": 15591, "iter_tflops": 47.17941637778842, "iter_time": 0.4372901382446289, "loss": 0.14252962172031403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.13379416495623, "step_time": 0.4034727687835693} +{"epoch": 0, "iter": 15592, "iter_tflops": 52.690951838465715, "iter_time": 0.39154907608032224, "loss": 0.15681204199790955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.30536414092776, "step_time": 0.36002028465270997} +{"epoch": 0, "iter": 15593, "iter_tflops": 33.272646577175166, "iter_time": 0.6200616912841798, "loss": 0.6316007971763611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.4479934593061, "step_time": 0.5820101928710937} +{"epoch": 0, "iter": 15594, "iter_tflops": 10.262241460339329, "iter_time": 2.010388626098633, "loss": 0.7457599639892578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.114445432962318, "step_time": 1.5731579055786131} +{"epoch": 0, "iter": 15595, "iter_tflops": 17.16378850016216, "iter_time": 1.2020128021240235, "loss": 0.5792754292488098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.077617627547372, "step_time": 1.02756681060791} +{"epoch": 0, "iter": 15596, "iter_tflops": 16.79202791457162, "iter_time": 1.2286242980957032, "loss": 0.5401570796966553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.918592139457406, "step_time": 1.0905194931030273} +{"epoch": 0, "iter": 15597, "iter_tflops": 20.304689821697618, "iter_time": 0.720126091003418, "loss": 0.15411801636219025, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 22.03095759875333, "step_time": 0.6636995620727538} +{"epoch": 0, "iter": 15598, "iter_tflops": 22.53064877885525, "iter_time": 0.6489798431396484, "loss": 0.2852850556373596, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 24.196376141414913, "step_time": 0.6043027610778808} +{"epoch": 0, "iter": 15599, "iter_tflops": 22.34066780632096, "iter_time": 0.654498649597168, "loss": 0.32889991998672485, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.8780986009367, "step_time": 0.6123576736450196} +{"epoch": 0, "iter": 15600, "iter_tflops": 21.731016492945013, "iter_time": 0.672860237121582, "loss": 0.16558939218521118, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 23.3728553281011, "step_time": 0.6255948066711426} +{"epoch": 0, "iter": 15601, "iter_tflops": 21.106730704259718, "iter_time": 0.9774651412963866, "loss": 0.6731176376342773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.750544374626134, "step_time": 0.9068395538330078} +{"epoch": 0, "iter": 15602, "iter_tflops": 18.380298915190963, "iter_time": 1.1224569091796874, "loss": 0.556701123714447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.03011561518794, "step_time": 0.8585515708923339} +{"epoch": 0, "iter": 15603, "iter_tflops": 33.313819141802796, "iter_time": 0.6192953567504883, "loss": 0.6073804497718811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.38304307084565, "step_time": 0.5670524444580077} +{"epoch": 0, "iter": 15604, "iter_tflops": 36.567424820070435, "iter_time": 0.564193229675293, "loss": 0.4335913360118866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.395306870398315, "step_time": 0.5236941947937012} +{"epoch": 0, "iter": 15605, "iter_tflops": 17.564020400618002, "iter_time": 1.1746224975585937, "loss": 0.16565541923046112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.465553071338096, "step_time": 1.1172746047973632} +{"epoch": 0, "iter": 15606, "iter_tflops": 17.896100915707716, "iter_time": 1.152826171875, "loss": 0.1433974653482437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.044452280616913, "step_time": 0.8580396537780761} +{"epoch": 0, "iter": 15607, "iter_tflops": 49.77716433120288, "iter_time": 0.41446903991699224, "loss": 0.1358833909034729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.863164558591706, "step_time": 0.3830278759002686} +{"epoch": 0, "iter": 15608, "iter_tflops": 49.63317452190852, "iter_time": 0.41567144775390624, "loss": 0.17414651811122894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.44089491016305, "step_time": 0.3860544166564942} +{"epoch": 0, "iter": 15609, "iter_tflops": 24.069272386208638, "iter_time": 0.43387723159790037, "loss": 0.033496659249067307, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 26.440094134214586, "step_time": 0.394972469329834} +{"epoch": 0, "iter": 15610, "iter_tflops": 25.636047850595162, "iter_time": 0.40736034393310544, "loss": 0.029570728540420532, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 28.362058546910653, "step_time": 0.36820702743530276} +{"epoch": 0, "iter": 15611, "iter_tflops": 27.798289302949527, "iter_time": 0.37567453002929685, "loss": 0.029000429436564445, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 30.62122962858064, "step_time": 0.34104147338867186} +{"epoch": 0, "iter": 15612, "iter_tflops": 26.823441847211786, "iter_time": 0.38932771301269536, "loss": 0.020193317905068398, "lr": 3e-05, "seqlen": 4208.0, "step_tflops": 29.406033092664266, "step_time": 0.3551349220275879} +{"epoch": 0, "iter": 15613, "iter_tflops": 37.72073987848797, "iter_time": 0.546942970275879, "loss": 0.5837067365646362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.699576974421625, "step_time": 0.5069117431640625} +{"epoch": 0, "iter": 15614, "iter_tflops": 45.877887839517086, "iter_time": 0.44969580078125004, "loss": 0.6286256909370422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.651852443842465, "step_time": 0.41551508140563964} +{"epoch": 0, "iter": 15615, "iter_tflops": 43.81860897030465, "iter_time": 0.47082949447631833, "loss": 0.7176375985145569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79000008195722, "step_time": 0.44092954635620113} +{"epoch": 0, "iter": 15616, "iter_tflops": 44.175440578609354, "iter_time": 0.46702632141113276, "loss": 0.6221320629119873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52300459642267, "step_time": 0.43412855911254883} +{"epoch": 0, "iter": 15617, "iter_tflops": 45.276412199585884, "iter_time": 0.4556697959899903, "loss": 0.962184727191925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.28943534726496, "step_time": 0.4185702953338623} +{"epoch": 0, "iter": 15618, "iter_tflops": 41.0089419734032, "iter_time": 0.5030876808166505, "loss": 0.5444822907447815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.582919257414744, "step_time": 0.4627577972412109} +{"epoch": 0, "iter": 15619, "iter_tflops": 47.31476908430386, "iter_time": 0.43603918838500977, "loss": 0.5388097167015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.02972303274144, "step_time": 0.4042956199645996} +{"epoch": 0, "iter": 15620, "iter_tflops": 48.97669506402558, "iter_time": 0.42124307250976567, "loss": 0.6219983696937561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.90954948825283, "step_time": 0.38993137741088874} +{"epoch": 0, "iter": 15621, "iter_tflops": 40.33780464329795, "iter_time": 0.5114580154418945, "loss": 0.2435850352048874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.55737790149623, "step_time": 0.473653247833252} +{"epoch": 0, "iter": 15622, "iter_tflops": 47.80931469628392, "iter_time": 0.4315287437438965, "loss": 0.4512682557106018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.86868214149012, "step_time": 0.397756269454956} +{"epoch": 0, "iter": 15623, "iter_tflops": 53.63086021343801, "iter_time": 0.3846869773864746, "loss": 0.24192142486572266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.2990874370014, "step_time": 0.3538836441040039} +{"epoch": 0, "iter": 15624, "iter_tflops": 45.55391823397546, "iter_time": 0.45289393997192384, "loss": 0.31664368510246277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.026126383470604, "step_time": 0.42081834793090817} +{"epoch": 0, "iter": 15625, "iter_tflops": 26.058362397237474, "iter_time": 0.791726402282715, "loss": 0.43866005539894104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.432140185689427, "step_time": 0.7520774307250977} +{"epoch": 0, "iter": 15626, "iter_tflops": 36.99813198530116, "iter_time": 0.5576252746582031, "loss": 0.5200040340423584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.306421484014464, "step_time": 0.4455341796875} +{"epoch": 0, "iter": 15627, "iter_tflops": 45.55559582252402, "iter_time": 0.4528772621154785, "loss": 0.5348513126373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.273042312766314, "step_time": 0.4187095527648925} +{"epoch": 0, "iter": 15628, "iter_tflops": 43.365672597618314, "iter_time": 0.4757471122741698, "loss": 0.6937850713729858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79294804448311, "step_time": 0.4409017677307129} +{"epoch": 0, "iter": 15629, "iter_tflops": 28.285334332818096, "iter_time": 0.7293918914794921, "loss": 0.0022925492376089096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.11590019380733, "step_time": 0.6850565109252931} +{"epoch": 0, "iter": 15630, "iter_tflops": 17.5297401983166, "iter_time": 1.1769195251464843, "loss": 0.008319185115396976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.04213196922745, "step_time": 0.9804659309387206} +{"epoch": 0, "iter": 15631, "iter_tflops": 43.43783629817874, "iter_time": 0.47495674896240236, "loss": 0.003785405308008194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.20072929380077, "step_time": 0.42802450942993164} +{"epoch": 0, "iter": 15632, "iter_tflops": 50.069355848537, "iter_time": 0.4120503082275391, "loss": 0.01782718487083912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.01468685015365, "step_time": 0.3750106506347657} +{"epoch": 0, "iter": 15633, "iter_tflops": 18.175746170822844, "iter_time": 1.1350892181396484, "loss": 0.22355033457279205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.474745715447963, "step_time": 1.0593767852783205} +{"epoch": 0, "iter": 15634, "iter_tflops": 16.986745566484615, "iter_time": 1.2145406799316407, "loss": 0.3027113080024719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.028340571344117, "step_time": 0.9365704803466796} +{"epoch": 0, "iter": 15635, "iter_tflops": 43.49392980537674, "iter_time": 0.4743442039489746, "loss": 0.20529943704605103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.594290728594025, "step_time": 0.4334783267974854} +{"epoch": 0, "iter": 15636, "iter_tflops": 35.92002112251709, "iter_time": 0.5743619537353515, "loss": 0.1574135273694992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.553368171139766, "step_time": 0.521601432800293} +{"epoch": 0, "iter": 15637, "iter_tflops": 22.350073188570846, "iter_time": 0.9230884094238281, "loss": 0.5759952664375305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.841380967641374, "step_time": 0.8653480911254883} +{"epoch": 0, "iter": 15638, "iter_tflops": 13.724656888545626, "iter_time": 1.5032137908935546, "loss": 0.5298637747764587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.727455746584113, "step_time": 1.3117883682250977} +{"epoch": 0, "iter": 15639, "iter_tflops": 13.77354487809343, "iter_time": 1.4978782653808596, "loss": 0.4680546522140503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.938046417022363, "step_time": 1.2944556045532227} +{"epoch": 0, "iter": 15640, "iter_tflops": 35.06512314179168, "iter_time": 0.5883650665283203, "loss": 0.4925377070903778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.095448512068685, "step_time": 0.5415632133483886} +{"epoch": 0, "iter": 15641, "iter_tflops": 12.002603671847362, "iter_time": 1.221631057739258, "loss": 0.16287390887737274, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 12.921435766355698, "step_time": 1.134761932373047} +{"epoch": 0, "iter": 15642, "iter_tflops": 17.53611004532491, "iter_time": 0.8361462936401367, "loss": 0.1341962218284607, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 20.74136942387164, "step_time": 0.7069327545166015} +{"epoch": 0, "iter": 15643, "iter_tflops": 24.15530907027175, "iter_time": 0.6070199050903321, "loss": 0.23090161383152008, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 25.92270065992987, "step_time": 0.5656337127685547} +{"epoch": 0, "iter": 15644, "iter_tflops": 21.74743449397233, "iter_time": 0.6742291107177734, "loss": 0.29502323269844055, "lr": 3e-05, "seqlen": 5872.0, "step_tflops": 23.33360787703134, "step_time": 0.6283963241577148} +{"epoch": 0, "iter": 15645, "iter_tflops": 28.312063558209598, "iter_time": 0.7287032775878906, "loss": 0.16904182732105255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.55743307380662, "step_time": 0.6537633609771728} +{"epoch": 0, "iter": 15646, "iter_tflops": 40.08053177462408, "iter_time": 0.5147410125732422, "loss": 0.14979985356330872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.027722745605686, "step_time": 0.4581864738464355} +{"epoch": 0, "iter": 15647, "iter_tflops": 44.41803135104143, "iter_time": 0.4644756393432617, "loss": 0.18814833462238312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.59597264134685, "step_time": 0.4245432777404785} +{"epoch": 0, "iter": 15648, "iter_tflops": 40.25661623191148, "iter_time": 0.5124895095825195, "loss": 0.16613425314426422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.773371095334674, "step_time": 0.47131607627868655} +{"epoch": 0, "iter": 15649, "iter_tflops": 24.675449180532944, "iter_time": 0.8360979919433594, "loss": 0.0403297021985054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.726116483712595, "step_time": 0.7719450569152833} +{"epoch": 0, "iter": 15650, "iter_tflops": 24.074572743134315, "iter_time": 0.8569661331176758, "loss": 0.10527396202087402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.245795896839496, "step_time": 0.7572211723327638} +{"epoch": 0, "iter": 15651, "iter_tflops": 47.93155931969993, "iter_time": 0.43042817306518555, "loss": 0.07835053652524948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.0882377587503, "step_time": 0.39607969856262204} +{"epoch": 0, "iter": 15652, "iter_tflops": 53.07333938181324, "iter_time": 0.38872800827026366, "loss": 0.11032526195049286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.83182979798678, "step_time": 0.3567428798675537} +{"epoch": 0, "iter": 15653, "iter_tflops": 39.391671664069285, "iter_time": 0.5237425231933595, "loss": 0.7547730803489685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.48657789757947, "step_time": 0.48559085083007814} +{"epoch": 0, "iter": 15654, "iter_tflops": 42.90564896295715, "iter_time": 0.48084795379638673, "loss": 0.5282975435256958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.201888200197494, "step_time": 0.44654221534729005} +{"epoch": 0, "iter": 15655, "iter_tflops": 40.579955101178705, "iter_time": 0.508406021118164, "loss": 0.6259926557540894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.343175298840315, "step_time": 0.4759940490722656} +{"epoch": 0, "iter": 15656, "iter_tflops": 46.85429657598612, "iter_time": 0.4403244743347168, "loss": 0.7642467021942139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.30536627580762, "step_time": 0.41011715126037596} +{"epoch": 0, "iter": 15657, "iter_tflops": 38.51030116537517, "iter_time": 0.5357292175292969, "loss": 0.6515335440635681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77519529161849, "step_time": 0.4938598937988281} +{"epoch": 0, "iter": 15658, "iter_tflops": 45.797205388896906, "iter_time": 0.4504880447387696, "loss": 0.5782455205917358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.645732230804306, "step_time": 0.4155663051605225} +{"epoch": 0, "iter": 15659, "iter_tflops": 46.47992330921682, "iter_time": 0.44387107467651365, "loss": 0.6248265504837036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.47081643693596, "step_time": 0.4087727317810059} +{"epoch": 0, "iter": 15660, "iter_tflops": 47.46568084784661, "iter_time": 0.43465285110473634, "loss": 0.565352737903595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.409398216949434, "step_time": 0.4013097648620605} +{"epoch": 0, "iter": 15661, "iter_tflops": 33.07825738457317, "iter_time": 0.6237055740356446, "loss": 0.4326482117176056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.28026416063105, "step_time": 0.5847771835327149} +{"epoch": 0, "iter": 15662, "iter_tflops": 8.58834179345964, "iter_time": 2.4022208251953123, "loss": 0.3060826361179352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.784914573330058, "step_time": 1.91295845413208} +{"epoch": 0, "iter": 15663, "iter_tflops": 16.162481167759648, "iter_time": 1.2764805908203125, "loss": 0.2755885720252991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.636276844089956, "step_time": 1.1698100280761718} +{"epoch": 0, "iter": 15664, "iter_tflops": 36.30900903036379, "iter_time": 0.5682086639404298, "loss": 0.3738231956958771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.911266753081286, "step_time": 0.50428879737854} +{"epoch": 0, "iter": 15665, "iter_tflops": 20.729227543543878, "iter_time": 0.7152252349853515, "loss": 0.2693925201892853, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 22.514910839916798, "step_time": 0.6584999046325684} +{"epoch": 0, "iter": 15666, "iter_tflops": 20.91796787390865, "iter_time": 0.7087718429565428, "loss": 0.19647565484046936, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 22.581906794781336, "step_time": 0.6565462684631347} +{"epoch": 0, "iter": 15667, "iter_tflops": 23.288217716779837, "iter_time": 0.636633804321289, "loss": 0.1402154266834259, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.075434667446107, "step_time": 0.5912586097717285} +{"epoch": 0, "iter": 15668, "iter_tflops": 23.513796955354916, "iter_time": 0.630526268005371, "loss": 0.25715371966362, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.245107080551808, "step_time": 0.5872847595214844} +{"epoch": 0, "iter": 15669, "iter_tflops": 16.151035045961667, "iter_time": 1.2773852233886718, "loss": 0.00852450355887413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.38117780047094, "step_time": 1.1869790267944336} +{"epoch": 0, "iter": 15670, "iter_tflops": 17.932756775286922, "iter_time": 1.1504697113037108, "loss": 0.002283977111801505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.649532334430656, "step_time": 0.8723679275512696} +{"epoch": 0, "iter": 15671, "iter_tflops": 41.746509852495045, "iter_time": 0.4941992416381836, "loss": 0.009393679909408092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14957613933862, "step_time": 0.4470483856201172} +{"epoch": 0, "iter": 15672, "iter_tflops": 47.273915558000695, "iter_time": 0.4364160079956055, "loss": 0.0011338874464854598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.086301388866374, "step_time": 0.3960944232940674} +{"epoch": 0, "iter": 15673, "iter_tflops": 15.665622001093444, "iter_time": 1.3169661254882812, "loss": 0.007778347469866276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.7799578625725, "step_time": 1.2295080642700196} +{"epoch": 0, "iter": 15674, "iter_tflops": 15.061732147723724, "iter_time": 1.3697689819335939, "loss": 0.0012413327349349856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.29993036939612, "step_time": 0.9685991058349609} +{"epoch": 0, "iter": 15675, "iter_tflops": 40.89441063620552, "iter_time": 0.5044966583251953, "loss": 0.0023775994777679443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.004618649352864, "step_time": 0.4584216938018798} +{"epoch": 0, "iter": 15676, "iter_tflops": 42.1622979767252, "iter_time": 0.48932564163208003, "loss": 0.007162007503211498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5873441684117, "step_time": 0.442847599029541} +{"epoch": 0, "iter": 15677, "iter_tflops": 36.54798951895744, "iter_time": 0.5384091262817383, "loss": 0.007868909277021885, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 40.83707946658522, "step_time": 0.48186039161682126} +{"epoch": 0, "iter": 15678, "iter_tflops": 37.64327355607212, "iter_time": 0.5227433547973632, "loss": 0.004590717609971762, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 42.60684263553396, "step_time": 0.4618453254699707} +{"epoch": 0, "iter": 15679, "iter_tflops": 44.007564335021314, "iter_time": 0.4471451988220215, "loss": 0.007709067314863205, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 48.69329444730905, "step_time": 0.4041166515350342} +{"epoch": 0, "iter": 15680, "iter_tflops": 44.55011629009639, "iter_time": 0.4416996574401856, "loss": 0.04148537665605545, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 49.27588685771675, "step_time": 0.3993387508392334} +{"epoch": 0, "iter": 15681, "iter_tflops": 17.544915192581872, "iter_time": 1.175901580810547, "loss": 0.5712449550628662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.611180636687436, "step_time": 1.1085322265625} +{"epoch": 0, "iter": 15682, "iter_tflops": 17.466546122027097, "iter_time": 1.1811776275634767, "loss": 0.5336012840270996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.018284998576902, "step_time": 0.9815783500671387} +{"epoch": 0, "iter": 15683, "iter_tflops": 39.21290354455139, "iter_time": 0.5261302185058594, "loss": 0.6485967040061951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.509174646961334, "step_time": 0.48533272361755375} +{"epoch": 0, "iter": 15684, "iter_tflops": 36.17226630912438, "iter_time": 0.5703566741943359, "loss": 0.6198541522026062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31964386114675, "step_time": 0.5247019424438476} +{"epoch": 0, "iter": 15685, "iter_tflops": 29.600149513158062, "iter_time": 0.6969928817749023, "loss": 0.0693659856915474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.103758012435684, "step_time": 0.6426379585266113} +{"epoch": 0, "iter": 15686, "iter_tflops": 10.692707049418528, "iter_time": 1.9294546661376955, "loss": 0.06498977541923523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.390105950914636, "step_time": 1.665126480102539} +{"epoch": 0, "iter": 15687, "iter_tflops": 17.21053905299497, "iter_time": 1.1987476654052736, "loss": 0.05326449126005173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.04092806661293, "step_time": 0.9360356082916261} +{"epoch": 0, "iter": 15688, "iter_tflops": 40.493430948744574, "iter_time": 0.5094923553466797, "loss": 0.06949970126152039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45161589626356, "step_time": 0.46412471389770515} +{"epoch": 0, "iter": 15689, "iter_tflops": 13.734879571071101, "iter_time": 1.0616128845214843, "loss": 0.21483439207077026, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 14.508861752180918, "step_time": 1.0049806365966798} +{"epoch": 0, "iter": 15690, "iter_tflops": 6.4885629693701805, "iter_time": 2.247204071044922, "loss": 0.3144015669822693, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 7.346318455923226, "step_time": 1.9848207244873048} +{"epoch": 0, "iter": 15691, "iter_tflops": 7.058057058041947, "iter_time": 2.0658837127685543, "loss": 0.23260433971881866, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 9.478145085709894, "step_time": 1.5383943786621095} +{"epoch": 0, "iter": 15692, "iter_tflops": 21.955911949915567, "iter_time": 0.6641092910766602, "loss": 0.301978200674057, "lr": 3e-05, "seqlen": 5840.0, "step_tflops": 23.725936590904915, "step_time": 0.6145647850036621} +{"epoch": 0, "iter": 15693, "iter_tflops": 12.669955693995153, "iter_time": 1.118651641845703, "loss": 0.35176414251327515, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 13.451464784175544, "step_time": 1.0536597290039063} +{"epoch": 0, "iter": 15694, "iter_tflops": 10.447012403621665, "iter_time": 1.3566813354492187, "loss": 0.29675763845443726, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 13.665302651273567, "step_time": 1.0371718139648436} +{"epoch": 0, "iter": 15695, "iter_tflops": 25.35496279394236, "iter_time": 0.5589937896728515, "loss": 0.2034398913383484, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 27.010138501671022, "step_time": 0.5247387657165528} +{"epoch": 0, "iter": 15696, "iter_tflops": 25.197145233123248, "iter_time": 0.5624949417114258, "loss": 0.1972242295742035, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 26.81407063594244, "step_time": 0.5285757217407226} +{"epoch": 0, "iter": 15697, "iter_tflops": 38.97440727335057, "iter_time": 0.5293497695922852, "loss": 0.09208499640226364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.121620467912365, "step_time": 0.4897981910705566} +{"epoch": 0, "iter": 15698, "iter_tflops": 18.00339815877319, "iter_time": 1.145955520629883, "loss": 0.16220758855342865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.05757118633142, "step_time": 1.0285938072204588} +{"epoch": 0, "iter": 15699, "iter_tflops": 41.3942578582742, "iter_time": 0.4984047203063965, "loss": 0.0864558070898056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.63166662896473, "step_time": 0.4521222877502442} +{"epoch": 0, "iter": 15700, "iter_tflops": 39.839144874675014, "iter_time": 0.517859848022461, "loss": 0.13648197054862976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.708506496964915, "step_time": 0.4720155220031738} +{"epoch": 0, "iter": 15701, "iter_tflops": 18.09985880235574, "iter_time": 1.1398483123779297, "loss": 0.28494030237197876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.643213362679777, "step_time": 1.0502911682128908} +{"epoch": 0, "iter": 15702, "iter_tflops": 23.90774571128783, "iter_time": 0.8629459991455078, "loss": 0.26148518919944763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.81320833309176, "step_time": 0.7694377059936524} +{"epoch": 0, "iter": 15703, "iter_tflops": 34.084291007828654, "iter_time": 0.6052962493896485, "loss": 0.2911206781864166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.126134093320864, "step_time": 0.555702714920044} +{"epoch": 0, "iter": 15704, "iter_tflops": 36.02268294841141, "iter_time": 0.5727250671386718, "loss": 0.2471621036529541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.253560252458676, "step_time": 0.5255852813720703} +{"epoch": 0, "iter": 15705, "iter_tflops": 17.944653525476173, "iter_time": 1.1497069854736328, "loss": 0.04742278531193733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.31246287978299, "step_time": 1.0682787399291993} +{"epoch": 0, "iter": 15706, "iter_tflops": 18.354930097827772, "iter_time": 1.124008285522461, "loss": 0.08096383512020111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.350060378037266, "step_time": 0.8138479042053223} +{"epoch": 0, "iter": 15707, "iter_tflops": 52.56673337775639, "iter_time": 0.39247433090209966, "loss": 0.022175103425979614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.248200655057445, "step_time": 0.3603797721862793} +{"epoch": 0, "iter": 15708, "iter_tflops": 53.877592502743276, "iter_time": 0.3829253044128417, "loss": 0.029425447806715965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.46909241275277, "step_time": 0.3528546905517578} +{"epoch": 0, "iter": 15709, "iter_tflops": 42.38637010449062, "iter_time": 0.4867388610839844, "loss": 0.6584162712097168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13221540292591, "step_time": 0.4472166213989258} +{"epoch": 0, "iter": 15710, "iter_tflops": 36.446645606777054, "iter_time": 0.5660628890991211, "loss": 0.5473626852035522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.60824194109064, "step_time": 0.5208787994384766} +{"epoch": 0, "iter": 15711, "iter_tflops": 41.26288684479253, "iter_time": 0.4999915199279785, "loss": 0.5518912076950073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.195055928414675, "step_time": 0.4668190383911133} +{"epoch": 0, "iter": 15712, "iter_tflops": 48.84603742997414, "iter_time": 0.4223698501586914, "loss": 0.5944302082061768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70804974286784, "step_time": 0.39142206192016604} +{"epoch": 0, "iter": 15713, "iter_tflops": 44.38714941963547, "iter_time": 0.4647987937927247, "loss": 0.41797712445259094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.243357552446156, "step_time": 0.4276463031768799} +{"epoch": 0, "iter": 15714, "iter_tflops": 44.5100387589337, "iter_time": 0.4635155143737793, "loss": 0.486939013004303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.95069115326525, "step_time": 0.41302919006347655} +{"epoch": 0, "iter": 15715, "iter_tflops": 47.38772637141929, "iter_time": 0.4353678703308106, "loss": 0.5109626650810242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45506735737796, "step_time": 0.4009535808563233} +{"epoch": 0, "iter": 15716, "iter_tflops": 37.9462517072404, "iter_time": 0.5436925277709961, "loss": 0.4228803217411041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.48353948653694, "step_time": 0.4973320446014405} +{"epoch": 0, "iter": 15717, "iter_tflops": 18.32754072611756, "iter_time": 1.051099105834961, "loss": 0.12272561341524124, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 19.788534374386977, "step_time": 0.9734961318969727} +{"epoch": 0, "iter": 15718, "iter_tflops": 16.86260209407371, "iter_time": 1.142413345336914, "loss": 0.0810893252491951, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 20.180186873142496, "step_time": 0.9546027393341064} +{"epoch": 0, "iter": 15719, "iter_tflops": 38.1229330905572, "iter_time": 0.5053142585754395, "loss": 0.08247524499893188, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 41.956304608275666, "step_time": 0.4591458148956299} +{"epoch": 0, "iter": 15720, "iter_tflops": 38.886901525297844, "iter_time": 0.49538690185546874, "loss": 0.14092466235160828, "lr": 3e-05, "seqlen": 7664.0, "step_tflops": 42.59045006466129, "step_time": 0.4523094177246094} +{"epoch": 0, "iter": 15721, "iter_tflops": 19.507589583720026, "iter_time": 1.0575931701660157, "loss": 0.6546738147735596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.959643664194772, "step_time": 0.9843246307373048} +{"epoch": 0, "iter": 15722, "iter_tflops": 22.768791585909593, "iter_time": 0.9061128005981445, "loss": 0.5621466636657715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.520864250180384, "step_time": 0.7496528205871581} +{"epoch": 0, "iter": 15723, "iter_tflops": 36.44730239020693, "iter_time": 0.5660526885986328, "loss": 0.5914422273635864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.58287048255806, "step_time": 0.52121266746521} +{"epoch": 0, "iter": 15724, "iter_tflops": 39.63958021629004, "iter_time": 0.5204670028686524, "loss": 0.7563015818595886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14943438622286, "step_time": 0.47813126182556154} +{"epoch": 0, "iter": 15725, "iter_tflops": 19.955524583878635, "iter_time": 1.033853729248047, "loss": 0.3559742271900177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.38399372753168, "step_time": 0.9647914123535155} +{"epoch": 0, "iter": 15726, "iter_tflops": 15.891054879884752, "iter_time": 1.298283447265625, "loss": 0.5129647254943848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.91657593963755, "step_time": 1.0358755226135252} +{"epoch": 0, "iter": 15727, "iter_tflops": 37.99336029909691, "iter_time": 0.5430183944702148, "loss": 0.37274864315986633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.38623616285902, "step_time": 0.49850132369995115} +{"epoch": 0, "iter": 15728, "iter_tflops": 35.001126125713576, "iter_time": 0.5894408493041993, "loss": 0.34939783811569214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.153813271882896, "step_time": 0.5407347717285157} +{"epoch": 0, "iter": 15729, "iter_tflops": 27.774061374357963, "iter_time": 0.7428187484741211, "loss": 0.04201444983482361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.099357765501335, "step_time": 0.6854330139160156} +{"epoch": 0, "iter": 15730, "iter_tflops": 9.947072791532165, "iter_time": 2.0740869140625002, "loss": 0.018770933151245117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.803537932818452, "step_time": 1.7478736991882324} +{"epoch": 0, "iter": 15731, "iter_tflops": 15.26171979691904, "iter_time": 1.3518197021484375, "loss": 0.06110589578747749, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.128269350280533, "step_time": 1.1380619468688964} +{"epoch": 0, "iter": 15732, "iter_tflops": 44.36272491433231, "iter_time": 0.46505469512939457, "loss": 0.03419569507241249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6726675784776, "step_time": 0.41534096145629884} +{"epoch": 0, "iter": 15733, "iter_tflops": 23.753210531450705, "iter_time": 0.6810145263671875, "loss": 0.15174032747745514, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 25.32068055289372, "step_time": 0.638856502532959} +{"epoch": 0, "iter": 15734, "iter_tflops": 14.048256699570882, "iter_time": 1.1514796295166014, "loss": 0.17540153861045837, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 16.724256391278725, "step_time": 0.9672347183227539} +{"epoch": 0, "iter": 15735, "iter_tflops": 29.275311131179613, "iter_time": 0.552557113647461, "loss": 0.24933797121047974, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 31.20672150944117, "step_time": 0.51835888671875} +{"epoch": 0, "iter": 15736, "iter_tflops": 30.004531524307392, "iter_time": 0.5391279449462891, "loss": 0.3018016219139099, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 31.936950715445082, "step_time": 0.5065067596435546} +{"epoch": 0, "iter": 15737, "iter_tflops": 27.84812913582981, "iter_time": 0.7408430709838867, "loss": 0.6903314590454102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.520202415939348, "step_time": 0.6988804893493653} +{"epoch": 0, "iter": 15738, "iter_tflops": 40.185641546352535, "iter_time": 0.5133946533203125, "loss": 0.6412504315376282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.53373269961226, "step_time": 0.4739105110168456} +{"epoch": 0, "iter": 15739, "iter_tflops": 45.95757215400358, "iter_time": 0.44891608810424805, "loss": 0.5661934614181519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.482491836915024, "step_time": 0.41693723869323734} +{"epoch": 0, "iter": 15740, "iter_tflops": 45.92106183645288, "iter_time": 0.449273006439209, "loss": 0.5861451625823975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22917244521633, "step_time": 0.4190826797485352} +{"epoch": 0, "iter": 15741, "iter_tflops": 33.90105265318836, "iter_time": 0.6085679321289063, "loss": 0.030178360641002655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.1842593546423, "step_time": 0.5701676330566406} +{"epoch": 0, "iter": 15742, "iter_tflops": 35.869677252755075, "iter_time": 0.575168083190918, "loss": 0.03477121889591217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.48402030651942, "step_time": 0.4169243602752686} +{"epoch": 0, "iter": 15743, "iter_tflops": 51.34661290528965, "iter_time": 0.40180047607421876, "loss": 0.04988420754671097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.3022090779787, "step_time": 0.36643488502502436} +{"epoch": 0, "iter": 15744, "iter_tflops": 52.04094051016095, "iter_time": 0.39643967437744143, "loss": 0.045419782400131226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.375686430264004, "step_time": 0.35957902717590334} +{"epoch": 0, "iter": 15745, "iter_tflops": 44.19817068821512, "iter_time": 0.46678614044189454, "loss": 0.2343348264694214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.25869828161637, "step_time": 0.4275103607177735} +{"epoch": 0, "iter": 15746, "iter_tflops": 10.886151453106418, "iter_time": 1.895168701171875, "loss": 0.3277701139450073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.478022488413583, "step_time": 1.5307211074829101} +{"epoch": 0, "iter": 15747, "iter_tflops": 14.989019461265919, "iter_time": 1.3764138183593753, "loss": 0.27290821075439453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.992821996751946, "step_time": 1.031925033569336} +{"epoch": 0, "iter": 15748, "iter_tflops": 16.025583868086247, "iter_time": 1.2873848266601562, "loss": 0.3246789574623108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.353664602714897, "step_time": 1.0660044975280762} +{"epoch": 0, "iter": 15749, "iter_tflops": 16.334720154944378, "iter_time": 0.9101415023803712, "loss": 0.3055429756641388, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.465462153451913, "step_time": 0.8512174835205077} +{"epoch": 0, "iter": 15750, "iter_tflops": 6.754985778561608, "iter_time": 2.2008790588378906, "loss": 0.15124748647212982, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 8.07603684918134, "step_time": 1.8408666305541992} +{"epoch": 0, "iter": 15751, "iter_tflops": 8.95221133767844, "iter_time": 1.660696578979492, "loss": 0.162430077791214, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 11.035528604363366, "step_time": 1.3471857376098633} +{"epoch": 0, "iter": 15752, "iter_tflops": 20.331781219777003, "iter_time": 0.7312151641845704, "loss": 0.19025814533233643, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 21.795976251921918, "step_time": 0.6820940971374512} +{"epoch": 0, "iter": 15753, "iter_tflops": 11.740539073234878, "iter_time": 1.3568717346191406, "loss": 0.085932157933712, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 12.542094398384153, "step_time": 1.2701551361083985} +{"epoch": 0, "iter": 15754, "iter_tflops": 14.90474212026246, "iter_time": 1.0688145751953126, "loss": 0.2766474485397339, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.87731479783568, "step_time": 0.8438915061950684} +{"epoch": 0, "iter": 15755, "iter_tflops": 28.66716106442896, "iter_time": 0.5557022399902344, "loss": 0.14950884878635406, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 30.57867200498613, "step_time": 0.520964599609375} +{"epoch": 0, "iter": 15756, "iter_tflops": 30.370217281493098, "iter_time": 0.5245403900146485, "loss": 0.11250744014978409, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 32.3052569598329, "step_time": 0.4931211547851562} +{"epoch": 0, "iter": 15757, "iter_tflops": 34.96226899880099, "iter_time": 0.5900959548950195, "loss": 0.5909035801887512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.37582572367817, "step_time": 0.551990306854248} +{"epoch": 0, "iter": 15758, "iter_tflops": 16.285702884267188, "iter_time": 1.2668224182128907, "loss": 0.747876763343811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.624199820582607, "step_time": 1.0513087768554688} +{"epoch": 0, "iter": 15759, "iter_tflops": 39.46217105324242, "iter_time": 0.522806854248047, "loss": 0.8101174831390381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.136670376775754, "step_time": 0.4782727394104004} +{"epoch": 0, "iter": 15760, "iter_tflops": 41.906454314166936, "iter_time": 0.4923130302429199, "loss": 0.5003511905670166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61407428331566, "step_time": 0.4522966613769531} +{"epoch": 0, "iter": 15761, "iter_tflops": 32.20406845184711, "iter_time": 0.640636245727539, "loss": 0.18630646169185638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.20452781461659, "step_time": 0.5860352287292481} +{"epoch": 0, "iter": 15762, "iter_tflops": 13.054005822351236, "iter_time": 1.5804415740966797, "loss": 0.19685405492782593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.664398377245, "step_time": 1.3170690002441405} +{"epoch": 0, "iter": 15763, "iter_tflops": 11.397271581488674, "iter_time": 1.8101782836914064, "loss": 0.10109610110521317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.197918122097978, "step_time": 1.5632081756591798} +{"epoch": 0, "iter": 15764, "iter_tflops": 38.76020724430763, "iter_time": 0.5322751083374023, "loss": 0.14483317732810974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.49902515134105, "step_time": 0.4343477249145507} +{"epoch": 0, "iter": 15765, "iter_tflops": 17.887073156300932, "iter_time": 0.8791622161865233, "loss": 0.20341402292251587, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 18.76826465269148, "step_time": 0.8378845443725585} +{"epoch": 0, "iter": 15766, "iter_tflops": 9.20507084139809, "iter_time": 1.708366958618164, "loss": 0.19908185303211212, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.182504659692707, "step_time": 1.4062716140747071} +{"epoch": 0, "iter": 15767, "iter_tflops": 23.020179768048457, "iter_time": 0.6831240692138671, "loss": 0.20847812294960022, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.781655503383842, "step_time": 0.6345677299499511} +{"epoch": 0, "iter": 15768, "iter_tflops": 26.00134852515256, "iter_time": 0.604800895690918, "loss": 0.24320681393146515, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 27.894626851836914, "step_time": 0.5637515411376953} +{"epoch": 0, "iter": 15769, "iter_tflops": 19.074021386687694, "iter_time": 1.0816331329345703, "loss": 0.2588419020175934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.286342692392402, "step_time": 1.0169942321777345} +{"epoch": 0, "iter": 15770, "iter_tflops": 12.787529034767012, "iter_time": 1.6133760833740236, "loss": 0.32701948285102844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.830808950819248, "step_time": 1.4916765594482422} +{"epoch": 0, "iter": 15771, "iter_tflops": 13.111454616372116, "iter_time": 1.5735167541503907, "loss": 0.25818705558776855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.064401832762247, "step_time": 1.2842739944458006} +{"epoch": 0, "iter": 15772, "iter_tflops": 33.320999560008666, "iter_time": 0.6191619033813476, "loss": 0.3713747560977936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.4831500695679, "step_time": 0.48563003158569334} +{"epoch": 0, "iter": 15773, "iter_tflops": 14.149662697507734, "iter_time": 1.0709032363891602, "loss": 0.2850530445575714, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 14.708807016030393, "step_time": 1.030193649291992} +{"epoch": 0, "iter": 15774, "iter_tflops": 8.38433490049988, "iter_time": 1.8072893981933593, "loss": 0.20853376388549805, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 9.88478488815748, "step_time": 1.532953903198242} +{"epoch": 0, "iter": 15775, "iter_tflops": 27.100968872282813, "iter_time": 0.5591283340454102, "loss": 0.3075155019760132, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.902006733093643, "step_time": 0.5242860717773438} +{"epoch": 0, "iter": 15776, "iter_tflops": 28.606172196476958, "iter_time": 0.5297080459594727, "loss": 0.22675937414169312, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 30.35146499661318, "step_time": 0.4992483749389649} +{"epoch": 0, "iter": 15777, "iter_tflops": 22.609728515332392, "iter_time": 0.912487449645996, "loss": 0.004712763242423534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.704581739638662, "step_time": 0.8703420181274414} +{"epoch": 0, "iter": 15778, "iter_tflops": 21.24841815880057, "iter_time": 0.970947265625, "loss": 0.008204134181141853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.84981868986289, "step_time": 0.6094890403747558} +{"epoch": 0, "iter": 15779, "iter_tflops": 57.41039858835595, "iter_time": 0.3593616142272949, "loss": 0.008487886749207973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.034185730415196, "step_time": 0.3273000717163086} +{"epoch": 0, "iter": 15780, "iter_tflops": 51.07351568206009, "iter_time": 0.403948959350586, "loss": 0.004167634062469006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.36339874926756, "step_time": 0.366037073135376} +{"epoch": 0, "iter": 15781, "iter_tflops": 28.058167436377836, "iter_time": 0.6998453826904297, "loss": 0.024639684706926346, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 29.930184348444357, "step_time": 0.6560727691650391} +{"epoch": 0, "iter": 15782, "iter_tflops": 19.77578317108586, "iter_time": 0.9929507598876953, "loss": 0.0894031748175621, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 23.654758994066, "step_time": 0.8301238212585449} +{"epoch": 0, "iter": 15783, "iter_tflops": 47.93953681938035, "iter_time": 0.4096071891784668, "loss": 0.06766454875469208, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 52.843000903676845, "step_time": 0.371598482131958} +{"epoch": 0, "iter": 15784, "iter_tflops": 54.361842023483455, "iter_time": 0.3612162170410156, "loss": 0.03932808339595795, "lr": 3e-05, "seqlen": 7808.0, "step_tflops": 59.129976112817026, "step_time": 0.33208839607238766} +{"epoch": 0, "iter": 15785, "iter_tflops": 35.00003706319544, "iter_time": 0.5894591903686524, "loss": 0.3924902081489563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.348975418980885, "step_time": 0.552387134552002} +{"epoch": 0, "iter": 15786, "iter_tflops": 9.722168411905352, "iter_time": 2.1220670776367188, "loss": 0.4323916435241699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.49610022815503, "step_time": 1.6510025634765624} +{"epoch": 0, "iter": 15787, "iter_tflops": 10.800779814322626, "iter_time": 1.9101485137939453, "loss": 0.3417511582374573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.499527906487447, "step_time": 1.5282825927734374} +{"epoch": 0, "iter": 15788, "iter_tflops": 24.829490346418048, "iter_time": 0.830910873413086, "loss": 0.34595510363578796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23669960670931, "step_time": 0.602601703643799} +{"epoch": 0, "iter": 15789, "iter_tflops": 14.057385837187386, "iter_time": 1.0692111968994142, "loss": 0.18607398867607117, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.000931522577648, "step_time": 1.0019587326049804} +{"epoch": 0, "iter": 15790, "iter_tflops": 9.550947464112978, "iter_time": 1.5736987762451173, "loss": 0.2033744603395462, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 12.77216750565376, "step_time": 1.1768021621704101} +{"epoch": 0, "iter": 15791, "iter_tflops": 22.183176942400625, "iter_time": 0.6775546340942383, "loss": 0.24389800429344177, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.80430497026825, "step_time": 0.6314116020202636} +{"epoch": 0, "iter": 15792, "iter_tflops": 23.57973679193475, "iter_time": 0.6374250259399414, "loss": 0.09464994072914124, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 25.194578059851807, "step_time": 0.5965694007873537} +{"epoch": 0, "iter": 15793, "iter_tflops": 14.722236118514209, "iter_time": 1.401355972290039, "loss": 0.4484027922153473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.700682934179742, "step_time": 1.3140252304077147} +{"epoch": 0, "iter": 15794, "iter_tflops": 14.67567244506677, "iter_time": 1.405802261352539, "loss": 0.5960462689399719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.381399211377303, "step_time": 1.0122510871887207} +{"epoch": 0, "iter": 15795, "iter_tflops": 37.723486173236836, "iter_time": 0.5469031524658203, "loss": 0.6107484698295593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10149023428706, "step_time": 0.5019548778533935} +{"epoch": 0, "iter": 15796, "iter_tflops": 37.98157592325131, "iter_time": 0.5431868743896485, "loss": 0.5909614562988281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.97131916211289, "step_time": 0.5035496520996094} +{"epoch": 0, "iter": 15797, "iter_tflops": 32.01224804529396, "iter_time": 0.644474998474121, "loss": 0.3775987923145294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.48002607578495, "step_time": 0.581484733581543} +{"epoch": 0, "iter": 15798, "iter_tflops": 37.27625138380784, "iter_time": 0.5534648132324218, "loss": 0.4066588580608368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.356285411402055, "step_time": 0.49886234474182134} +{"epoch": 0, "iter": 15799, "iter_tflops": 41.028486293240164, "iter_time": 0.5028480300903321, "loss": 0.3046850562095642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.97510792673722, "step_time": 0.458722490310669} +{"epoch": 0, "iter": 15800, "iter_tflops": 42.303165310686715, "iter_time": 0.4876962127685547, "loss": 0.2802268862724304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.25133506973351, "step_time": 0.44606482124328617} +{"epoch": 0, "iter": 15801, "iter_tflops": 20.342208908331006, "iter_time": 1.0142012405395509, "loss": 0.21852174401283264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.68550694549552, "step_time": 0.9513770446777344} +{"epoch": 0, "iter": 15802, "iter_tflops": 15.367090686505149, "iter_time": 1.3425503845214843, "loss": 0.1656959354877472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.521132835333074, "step_time": 1.177497694015503} +{"epoch": 0, "iter": 15803, "iter_tflops": 39.04628868845142, "iter_time": 0.5283752746582031, "loss": 0.16842712461948395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80007520693016, "step_time": 0.48203404808044437} +{"epoch": 0, "iter": 15804, "iter_tflops": 41.63561309598755, "iter_time": 0.4955155448913574, "loss": 0.19917581975460052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.581197631590534, "step_time": 0.4526228923797608} +{"epoch": 0, "iter": 15805, "iter_tflops": 19.769572604009884, "iter_time": 1.0435781250000002, "loss": 0.6900781989097595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.55174133997082, "step_time": 0.9572819747924806} +{"epoch": 0, "iter": 15806, "iter_tflops": 24.45320355208957, "iter_time": 0.8436969604492187, "loss": 0.6296361088752747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.666465143217316, "step_time": 0.6128084259033204} +{"epoch": 0, "iter": 15807, "iter_tflops": 42.73025888508452, "iter_time": 0.48282163619995117, "loss": 0.5608369708061218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.544882780159526, "step_time": 0.4432515945434571} +{"epoch": 0, "iter": 15808, "iter_tflops": 46.404072812599715, "iter_time": 0.44459661102294923, "loss": 0.4691687524318695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89908117519769, "step_time": 0.41345638084411623} +{"epoch": 0, "iter": 15809, "iter_tflops": 44.90504507904037, "iter_time": 0.4594382095336914, "loss": 0.005425736773759127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15972840610504, "step_time": 0.4196746845245361} +{"epoch": 0, "iter": 15810, "iter_tflops": 16.381318036932953, "iter_time": 1.2594281768798827, "loss": 0.01615842431783676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.107686686201966, "step_time": 1.1393555603027345} +{"epoch": 0, "iter": 15811, "iter_tflops": 8.503805621410184, "iter_time": 2.426101257324219, "loss": 0.010604110546410084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.533391314074569, "step_time": 1.958637336730957} +{"epoch": 0, "iter": 15812, "iter_tflops": 32.044481963168614, "iter_time": 0.6438267135620117, "loss": 0.005677404813468456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.97910307230465, "step_time": 0.4212223625183105} +{"epoch": 0, "iter": 15813, "iter_tflops": 19.67814047330306, "iter_time": 0.7638076553344726, "loss": 0.22888174653053284, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 20.860539388552713, "step_time": 0.720514175415039} +{"epoch": 0, "iter": 15814, "iter_tflops": 9.280488169670704, "iter_time": 1.6195607452392577, "loss": 0.1261025220155716, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 10.732679337426815, "step_time": 1.4004251747131349} +{"epoch": 0, "iter": 15815, "iter_tflops": 22.85931541919834, "iter_time": 0.6575137557983398, "loss": 0.18765315413475037, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.657768957928152, "step_time": 0.6095569458007813} +{"epoch": 0, "iter": 15816, "iter_tflops": 24.820766923811412, "iter_time": 0.6055539855957031, "loss": 0.16818439960479736, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 26.571087084928664, "step_time": 0.5656642608642578} +{"epoch": 0, "iter": 15817, "iter_tflops": 18.93140909299506, "iter_time": 1.0897811889648437, "loss": 0.5229889750480652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.509557109375447, "step_time": 1.0059258422851562} +{"epoch": 0, "iter": 15818, "iter_tflops": 20.716948120318076, "iter_time": 0.9958558273315429, "loss": 0.3646087348461151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.96882923922822, "step_time": 0.8262739639282226} +{"epoch": 0, "iter": 15819, "iter_tflops": 44.653393397015876, "iter_time": 0.46202745056152345, "loss": 0.4055967926979065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.283573293117634, "step_time": 0.4272901134490967} +{"epoch": 0, "iter": 15820, "iter_tflops": 48.78590066583877, "iter_time": 0.42289049148559577, "loss": 0.4749664068222046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.880149575194004, "step_time": 0.39014816856384277} +{"epoch": 0, "iter": 15821, "iter_tflops": 29.736135747188133, "iter_time": 0.6938054656982422, "loss": 0.21793626248836517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.66730483816633, "step_time": 0.6514950866699218} +{"epoch": 0, "iter": 15822, "iter_tflops": 12.084302658417364, "iter_time": 1.707263885498047, "loss": 0.2526574432849884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.185499571974724, "step_time": 1.5646804580688478} +{"epoch": 0, "iter": 15823, "iter_tflops": 15.887965771724902, "iter_time": 1.2985358734130859, "loss": 0.22932995855808258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.43851770877731, "step_time": 1.183076099395752} +{"epoch": 0, "iter": 15824, "iter_tflops": 33.91498962426009, "iter_time": 0.6083178482055664, "loss": 0.15670041739940643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.886363118921174, "step_time": 0.544551965713501} +{"epoch": 0, "iter": 15825, "iter_tflops": 16.645025318793234, "iter_time": 0.8735533752441407, "loss": 0.26690393686294556, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.565928479105768, "step_time": 0.8277568740844727} +{"epoch": 0, "iter": 15826, "iter_tflops": 10.993366592091238, "iter_time": 1.3226446990966798, "loss": 0.2615264356136322, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.330039975608809, "step_time": 1.0907932815551757} +{"epoch": 0, "iter": 15827, "iter_tflops": 25.097701091637653, "iter_time": 0.5793486022949218, "loss": 0.18344956636428833, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 26.63765175806581, "step_time": 0.5458558502197266} +{"epoch": 0, "iter": 15828, "iter_tflops": 25.355474448270076, "iter_time": 0.573458724975586, "loss": 0.25107839703559875, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 26.80704611366654, "step_time": 0.5424065742492675} +{"epoch": 0, "iter": 15829, "iter_tflops": 27.376071837740238, "iter_time": 0.7536177444458009, "loss": 0.6215763092041016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.05389479428053, "step_time": 0.7100973434448241} +{"epoch": 0, "iter": 15830, "iter_tflops": 14.927194280084654, "iter_time": 1.3821146240234374, "loss": 0.5813003778457642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.318061882844876, "step_time": 1.264310287475586} +{"epoch": 0, "iter": 15831, "iter_tflops": 39.40050992052296, "iter_time": 0.5236250381469727, "loss": 0.48312702775001526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.36181029105946, "step_time": 0.47578948783874514} +{"epoch": 0, "iter": 15832, "iter_tflops": 43.227836616129075, "iter_time": 0.4772640762329101, "loss": 0.5528505444526672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.228530627818884, "step_time": 0.43683538818359374} +{"epoch": 0, "iter": 15833, "iter_tflops": 37.423115352441336, "iter_time": 0.5512927856445313, "loss": 0.0703258216381073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27201601488568, "step_time": 0.49988092422485353} +{"epoch": 0, "iter": 15834, "iter_tflops": 43.12998018364838, "iter_time": 0.4783469276428223, "loss": 0.05107087269425392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.489076196415574, "step_time": 0.434438720703125} +{"epoch": 0, "iter": 15835, "iter_tflops": 44.392107289781386, "iter_time": 0.464746883392334, "loss": 0.049964599311351776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86454692832214, "step_time": 0.42220985984802245} +{"epoch": 0, "iter": 15836, "iter_tflops": 48.04628064565784, "iter_time": 0.4294004287719727, "loss": 0.05847393348813057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.669599667172506, "step_time": 0.3917078094482422} +{"epoch": 0, "iter": 15837, "iter_tflops": 21.44634896432661, "iter_time": 0.9619862823486329, "loss": 0.2438431978225708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.881713943571466, "step_time": 0.9016410903930664} +{"epoch": 0, "iter": 15838, "iter_tflops": 20.617476305048704, "iter_time": 1.0006604690551757, "loss": 0.3186781108379364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.605594599704293, "step_time": 0.8739916896820068} +{"epoch": 0, "iter": 15839, "iter_tflops": 41.56273041157101, "iter_time": 0.4963844604492187, "loss": 0.30129241943359375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.45673296075028, "step_time": 0.45386221504211427} +{"epoch": 0, "iter": 15840, "iter_tflops": 42.4205500595213, "iter_time": 0.4863466758728028, "loss": 0.26090535521507263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14938199784968, "step_time": 0.44705026626586913} +{"epoch": 0, "iter": 15841, "iter_tflops": 17.735498205321413, "iter_time": 1.1632655181884766, "loss": 0.12575951218605042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.630268715670166, "step_time": 1.107396453857422} +{"epoch": 0, "iter": 15842, "iter_tflops": 17.01950564184018, "iter_time": 1.212202865600586, "loss": 0.15745364129543304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.186591838147507, "step_time": 0.9298901634216309} +{"epoch": 0, "iter": 15843, "iter_tflops": 36.99700014296635, "iter_time": 0.5576423339843749, "loss": 0.1458345204591751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.556645365290244, "step_time": 0.5086982250213623} +{"epoch": 0, "iter": 15844, "iter_tflops": 41.48842287873076, "iter_time": 0.4972735061645508, "loss": 0.10184226185083389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.269759747596986, "step_time": 0.4557367572784424} +{"epoch": 0, "iter": 15845, "iter_tflops": 22.602865535483, "iter_time": 0.9127645111083984, "loss": 0.20514701306819916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.163485270276542, "step_time": 0.8538128204345703} +{"epoch": 0, "iter": 15846, "iter_tflops": 29.49372850663338, "iter_time": 0.6995078125, "loss": 0.2885621190071106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.41424860416523, "step_time": 0.5665665035247803} +{"epoch": 0, "iter": 15847, "iter_tflops": 47.32572349021528, "iter_time": 0.43593825912475576, "loss": 0.28867098689079285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.20335475451194, "step_time": 0.4029246444702148} +{"epoch": 0, "iter": 15848, "iter_tflops": 51.20235663500468, "iter_time": 0.40293249893188476, "loss": 0.2976491451263428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.37468871097575, "step_time": 0.3725726318359375} +{"epoch": 0, "iter": 15849, "iter_tflops": 36.75168622073716, "iter_time": 0.44796179199218744, "loss": 0.006623086519539356, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 40.18278206933599, "step_time": 0.4097115821838379} +{"epoch": 0, "iter": 15850, "iter_tflops": 24.789624649219103, "iter_time": 0.6641226501464843, "loss": 0.0025069089606404305, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 28.609712725343293, "step_time": 0.5754462261199952} +{"epoch": 0, "iter": 15851, "iter_tflops": 45.03382211600674, "iter_time": 0.36557748031616216, "loss": 0.0005757877952419221, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 49.67583447946316, "step_time": 0.3314156951904297} +{"epoch": 0, "iter": 15852, "iter_tflops": 44.196143427299134, "iter_time": 0.3725065116882324, "loss": 0.0036092018708586693, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 48.54733158091844, "step_time": 0.33911959075927733} +{"epoch": 0, "iter": 15853, "iter_tflops": 34.30860940726951, "iter_time": 0.6013386688232422, "loss": 0.5267537236213684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.62674211632914, "step_time": 0.5632795143127441} +{"epoch": 0, "iter": 15854, "iter_tflops": 19.731049246433404, "iter_time": 1.0456156311035156, "loss": 0.5593129396438599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.122575532942225, "step_time": 0.8552608108520509} +{"epoch": 0, "iter": 15855, "iter_tflops": 42.7486247229402, "iter_time": 0.4826142044067383, "loss": 0.4651002585887909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79807142796027, "step_time": 0.4408534984588623} +{"epoch": 0, "iter": 15856, "iter_tflops": 42.107755154309565, "iter_time": 0.48995947265625, "loss": 0.6791443228721619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72909176469705, "step_time": 0.4511590480804443} +{"epoch": 0, "iter": 15857, "iter_tflops": 22.55603171333416, "iter_time": 0.9146597137451172, "loss": 0.4321187138557434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.542778384624643, "step_time": 0.8406176834106446} +{"epoch": 0, "iter": 15858, "iter_tflops": 43.72812293779234, "iter_time": 0.47180377578735355, "loss": 0.34886434674263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.12404006102968, "step_time": 0.4287065982818604} +{"epoch": 0, "iter": 15859, "iter_tflops": 44.48812894011633, "iter_time": 0.46374378967285157, "loss": 0.5807644128799438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72814817134672, "step_time": 0.43226260185241694} +{"epoch": 0, "iter": 15860, "iter_tflops": 45.83173033062942, "iter_time": 0.4501486930847168, "loss": 0.35837045311927795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.668302180171295, "step_time": 0.41537746620178223} +{"epoch": 0, "iter": 15861, "iter_tflops": 25.999454145048993, "iter_time": 0.7935202560424806, "loss": 0.4575118124485016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.450941362419456, "step_time": 0.7515623321533202} +{"epoch": 0, "iter": 15862, "iter_tflops": 18.65899573408624, "iter_time": 1.1056915283203126, "loss": 0.3854728639125824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.019885905691336, "step_time": 0.9815035915374756} +{"epoch": 0, "iter": 15863, "iter_tflops": 39.429373917410366, "iter_time": 0.5232417221069335, "loss": 0.5850987434387207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04052923822691, "step_time": 0.47934107398986814} +{"epoch": 0, "iter": 15864, "iter_tflops": 37.88636325162201, "iter_time": 0.5445519638061523, "loss": 0.4933792054653168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39774670103655, "step_time": 0.4983627166748047} +{"epoch": 0, "iter": 15865, "iter_tflops": 21.920742154523747, "iter_time": 0.9411676559448241, "loss": 0.35962367057800293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.34602303341626, "step_time": 0.8837091217041017} +{"epoch": 0, "iter": 15866, "iter_tflops": 9.439567666088786, "iter_time": 2.1855972900390626, "loss": 0.4269159734249115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.06978615141144, "step_time": 1.8637300872802736} +{"epoch": 0, "iter": 15867, "iter_tflops": 12.608666142773739, "iter_time": 1.6362629699707032, "loss": 0.4166877269744873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.959672362799845, "step_time": 1.3791139945983886} +{"epoch": 0, "iter": 15868, "iter_tflops": 37.87661384176485, "iter_time": 0.5446921310424805, "loss": 0.307735800743103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.322742025739934, "step_time": 0.4992672920227051} +{"epoch": 0, "iter": 15869, "iter_tflops": 15.86577766386494, "iter_time": 0.9293210144042968, "loss": 0.08773523569107056, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 16.969323429210466, "step_time": 0.8688855895996094} +{"epoch": 0, "iter": 15870, "iter_tflops": 10.628574133776585, "iter_time": 1.3872416381835937, "loss": 0.20096451044082642, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.237640840727261, "step_time": 1.2048401145935057} +{"epoch": 0, "iter": 15871, "iter_tflops": 26.55762855732538, "iter_time": 0.5551851348876953, "loss": 0.203927144408226, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 28.297989771697594, "step_time": 0.5210405654907226} +{"epoch": 0, "iter": 15872, "iter_tflops": 27.31496378368786, "iter_time": 0.5397920608520508, "loss": 0.24524493515491486, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 29.024338910085028, "step_time": 0.5080012550354004} +{"epoch": 0, "iter": 15873, "iter_tflops": 29.104203223258448, "iter_time": 0.7088698959350584, "loss": 0.3942718803882599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.931613752765603, "step_time": 0.6669905319213867} +{"epoch": 0, "iter": 15874, "iter_tflops": 21.278884497293944, "iter_time": 0.9695570983886719, "loss": 0.7009242177009583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.720591271340663, "step_time": 0.8021236095428467} +{"epoch": 0, "iter": 15875, "iter_tflops": 42.551199902320455, "iter_time": 0.48485338973999015, "loss": 0.706645131111145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.752534454303834, "step_time": 0.4509278831481933} +{"epoch": 0, "iter": 15876, "iter_tflops": 48.22336853010478, "iter_time": 0.42782356643676756, "loss": 0.7292531132698059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.79226416001193, "step_time": 0.39834314727783193} +{"epoch": 0, "iter": 15877, "iter_tflops": 30.157410084630463, "iter_time": 0.6841135711669921, "loss": 0.2121456116437912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.935984963587654, "step_time": 0.646014003753662} +{"epoch": 0, "iter": 15878, "iter_tflops": 15.684460721359772, "iter_time": 1.315384307861328, "loss": 0.14886319637298584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.378334687197505, "step_time": 1.1871732177734375} +{"epoch": 0, "iter": 15879, "iter_tflops": 35.036201240400445, "iter_time": 0.5888507537841796, "loss": 0.17799288034439087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48909510622388, "step_time": 0.5224503993988037} +{"epoch": 0, "iter": 15880, "iter_tflops": 37.56566817999816, "iter_time": 0.5492007598876953, "loss": 0.10238204151391983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.150685027189105, "step_time": 0.5013548011779785} +{"epoch": 0, "iter": 15881, "iter_tflops": 16.192209411337043, "iter_time": 1.2741370239257814, "loss": 0.3675414025783539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.138238908339964, "step_time": 1.2038047561645508} +{"epoch": 0, "iter": 15882, "iter_tflops": 14.514188636203178, "iter_time": 1.421443115234375, "loss": 0.3721320331096649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.753906318147056, "step_time": 1.100095796585083} +{"epoch": 0, "iter": 15883, "iter_tflops": 43.03752327224683, "iter_time": 0.4793745536804199, "loss": 0.3924759030342102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35948493128663, "step_time": 0.4450242176055908} +{"epoch": 0, "iter": 15884, "iter_tflops": 35.20744454914104, "iter_time": 0.5859866790771484, "loss": 0.37823712825775146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.31938976695892, "step_time": 0.5383982791900634} +{"epoch": 0, "iter": 15885, "iter_tflops": 15.669950684051692, "iter_time": 1.3166023254394532, "loss": 0.16848966479301453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.795553726050677, "step_time": 1.2283663787841799} +{"epoch": 0, "iter": 15886, "iter_tflops": 15.16227463790977, "iter_time": 1.3606859130859377, "loss": 0.23491805791854858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.142633499649357, "step_time": 1.137160903930664} +{"epoch": 0, "iter": 15887, "iter_tflops": 47.947401817991675, "iter_time": 0.43028595352172855, "loss": 0.15892526507377625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.15389606566249, "step_time": 0.3955810604095459} +{"epoch": 0, "iter": 15888, "iter_tflops": 50.70700408200423, "iter_time": 0.40686871337890623, "loss": 0.2611941397190094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.91287017336407, "step_time": 0.3757059764862061} +{"epoch": 0, "iter": 15889, "iter_tflops": 34.40050903356125, "iter_time": 0.5997322158813477, "loss": 0.25770142674446106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.76667076838798, "step_time": 0.5611357536315918} +{"epoch": 0, "iter": 15890, "iter_tflops": 10.74971075299871, "iter_time": 1.919223129272461, "loss": 0.2029557079076767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.406530467434212, "step_time": 1.6629220848083497} +{"epoch": 0, "iter": 15891, "iter_tflops": 19.55877860831758, "iter_time": 1.0548252487182617, "loss": 0.26898258924484253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.638753303172503, "step_time": 0.872765718460083} +{"epoch": 0, "iter": 15892, "iter_tflops": 43.53618753441469, "iter_time": 0.47388378906249995, "loss": 0.3212248384952545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.65768556729959, "step_time": 0.4329017086029052} +{"epoch": 0, "iter": 15893, "iter_tflops": 12.808774999968518, "iter_time": 1.1670593872070312, "loss": 0.11120395362377167, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 13.464150562120123, "step_time": 1.1102520751953124} +{"epoch": 0, "iter": 15894, "iter_tflops": 11.875823471652641, "iter_time": 1.2587422790527343, "loss": 0.32210344076156616, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 14.12568067694779, "step_time": 1.0582570457458496} +{"epoch": 0, "iter": 15895, "iter_tflops": 23.58481880365092, "iter_time": 0.6338230209350586, "loss": 0.06862294673919678, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.423783097468913, "step_time": 0.5879770545959472} +{"epoch": 0, "iter": 15896, "iter_tflops": 22.824556876728245, "iter_time": 0.6549349975585939, "loss": 0.13766099512577057, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.589541154287417, "step_time": 0.6079251747131349} +{"epoch": 0, "iter": 15897, "iter_tflops": 22.239618838605182, "iter_time": 0.927672981262207, "loss": 0.5235239863395691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.672857166069846, "step_time": 0.8715083847045899} +{"epoch": 0, "iter": 15898, "iter_tflops": 25.850340878608176, "iter_time": 0.798097541809082, "loss": 0.619117021560669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.86008216349926, "step_time": 0.714866069793701} +{"epoch": 0, "iter": 15899, "iter_tflops": 43.110430285901764, "iter_time": 0.478563850402832, "loss": 0.5992974638938904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27883941093396, "step_time": 0.4457997169494629} +{"epoch": 0, "iter": 15900, "iter_tflops": 43.381548012447524, "iter_time": 0.47557301330566404, "loss": 0.4763858914375305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66547809952904, "step_time": 0.44210612106323244} +{"epoch": 0, "iter": 15901, "iter_tflops": 16.909967357514503, "iter_time": 0.7563632431030273, "loss": 0.048123799264431, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 17.84000626050212, "step_time": 0.716932357788086} +{"epoch": 0, "iter": 15902, "iter_tflops": 10.863798868597064, "iter_time": 1.177311721801758, "loss": 0.0717766061425209, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 13.252093406763393, "step_time": 0.965136402130127} +{"epoch": 0, "iter": 15903, "iter_tflops": 26.67296099188738, "iter_time": 0.47951473236083986, "loss": 0.029742637649178505, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 29.196020951728322, "step_time": 0.438076057434082} +{"epoch": 0, "iter": 15904, "iter_tflops": 25.723076836244154, "iter_time": 0.497221923828125, "loss": 0.041081007570028305, "lr": 3e-05, "seqlen": 5136.0, "step_tflops": 28.42154957087448, "step_time": 0.4500133857727051} +{"epoch": 0, "iter": 15905, "iter_tflops": 21.919670876014568, "iter_time": 0.9412136535644532, "loss": 0.09687314927577972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.548889738590837, "step_time": 0.8760962295532226} +{"epoch": 0, "iter": 15906, "iter_tflops": 45.014496198643094, "iter_time": 0.458321102142334, "loss": 0.11278384178876877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15082283739657, "step_time": 0.41975072479248043} +{"epoch": 0, "iter": 15907, "iter_tflops": 51.3322565814825, "iter_time": 0.40191284942626954, "loss": 0.10997498035430908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.40696529715637, "step_time": 0.3723555946350097} +{"epoch": 0, "iter": 15908, "iter_tflops": 52.24489987524259, "iter_time": 0.3948920097351074, "loss": 0.11868235468864441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.94915779131895, "step_time": 0.36227214431762694} +{"epoch": 0, "iter": 15909, "iter_tflops": 24.135619876812168, "iter_time": 0.8547985763549805, "loss": 0.666885495185852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.36536160686285, "step_time": 0.8133569641113281} +{"epoch": 0, "iter": 15910, "iter_tflops": 15.404593096180452, "iter_time": 1.3392819519042967, "loss": 0.7309966087341309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.473898804784262, "step_time": 1.1167698669433594} +{"epoch": 0, "iter": 15911, "iter_tflops": 34.8100724263391, "iter_time": 0.5926759719848632, "loss": 0.484407901763916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.68856689518078, "step_time": 0.5474098701477051} +{"epoch": 0, "iter": 15912, "iter_tflops": 36.04103180700429, "iter_time": 0.5724334869384766, "loss": 0.7414249777793884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.0509273644701, "step_time": 0.5283125114440919} +{"epoch": 0, "iter": 15913, "iter_tflops": 22.387548833693263, "iter_time": 0.9215432052612303, "loss": 0.7354065179824829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.87011591987685, "step_time": 0.864306381225586} +{"epoch": 0, "iter": 15914, "iter_tflops": 7.732444393924185, "iter_time": 2.6681205139160156, "loss": 0.6020845770835876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.310958660859148, "step_time": 2.2157861785888673} +{"epoch": 0, "iter": 15915, "iter_tflops": 11.57998505293912, "iter_time": 1.7816165924072263, "loss": 0.5902741551399231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.27969150326574, "step_time": 1.4447856597900393} +{"epoch": 0, "iter": 15916, "iter_tflops": 34.477934722585005, "iter_time": 0.5983854217529297, "loss": 0.7277893424034119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.420039784433854, "step_time": 0.5513380966186524} +{"epoch": 0, "iter": 15917, "iter_tflops": 15.102683098687624, "iter_time": 1.1172751159667969, "loss": 0.3067447543144226, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 16.044759342848263, "step_time": 1.0516737365722655} +{"epoch": 0, "iter": 15918, "iter_tflops": 12.222288694377138, "iter_time": 1.3805803833007815, "loss": 0.20803417265415192, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 14.461613749017747, "step_time": 1.1668028411865234} +{"epoch": 0, "iter": 15919, "iter_tflops": 30.299039395482346, "iter_time": 0.5569104614257813, "loss": 0.2707083821296692, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 32.32037711542114, "step_time": 0.5220809135437011} +{"epoch": 0, "iter": 15920, "iter_tflops": 28.496165448560287, "iter_time": 0.5921446533203125, "loss": 0.32110685110092163, "lr": 3e-05, "seqlen": 6736.0, "step_tflops": 30.181029045181525, "step_time": 0.5590880279541015} +{"epoch": 0, "iter": 15921, "iter_tflops": 26.58787490377549, "iter_time": 0.7759587249755859, "loss": 0.35573095083236694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.95074091486105, "step_time": 0.7381233139038086} +{"epoch": 0, "iter": 15922, "iter_tflops": 14.809453110894648, "iter_time": 1.393102996826172, "loss": 0.32586705684661865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.966672310731624, "step_time": 1.1482979793548584} +{"epoch": 0, "iter": 15923, "iter_tflops": 40.80387931224986, "iter_time": 0.505615982055664, "loss": 0.2990400493144989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.06019016703203, "step_time": 0.4578563346862793} +{"epoch": 0, "iter": 15924, "iter_tflops": 43.116424872150645, "iter_time": 0.47849731445312504, "loss": 0.2260107398033142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06103384477131, "step_time": 0.43839014625549316} +{"epoch": 0, "iter": 15925, "iter_tflops": 22.624413763273115, "iter_time": 0.911895164489746, "loss": 0.3411000072956085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.087038684779454, "step_time": 0.8565226211547852} +{"epoch": 0, "iter": 15926, "iter_tflops": 12.869308875296763, "iter_time": 1.6031236572265626, "loss": 0.24267859756946564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.508890917531048, "step_time": 1.4219621353149414} +{"epoch": 0, "iter": 15927, "iter_tflops": 13.712612181178267, "iter_time": 1.5045341644287111, "loss": 0.30113187432289124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.905448975632416, "step_time": 1.297108528137207} +{"epoch": 0, "iter": 15928, "iter_tflops": 19.327767162229428, "iter_time": 1.0674328460693359, "loss": 0.3138473927974701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.08407932914176, "step_time": 0.7909458198547363} +{"epoch": 0, "iter": 15929, "iter_tflops": 14.238286548273729, "iter_time": 1.2544331817626952, "loss": 0.3115600347518921, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 15.151537671340686, "step_time": 1.178822868347168} +{"epoch": 0, "iter": 15930, "iter_tflops": 18.053890103407543, "iter_time": 0.989314712524414, "loss": 0.17546895146369934, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 23.43554834766582, "step_time": 0.7621319046020509} +{"epoch": 0, "iter": 15931, "iter_tflops": 32.9607996771412, "iter_time": 0.5418854904174806, "loss": 0.1706410050392151, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 35.144944763921764, "step_time": 0.5082090530395508} +{"epoch": 0, "iter": 15932, "iter_tflops": 33.474565767515735, "iter_time": 0.5335686569213867, "loss": 0.3304954171180725, "lr": 3e-05, "seqlen": 7120.0, "step_tflops": 35.60567553319427, "step_time": 0.5016329231262207} +{"epoch": 0, "iter": 15933, "iter_tflops": 38.992026563339564, "iter_time": 0.5291105728149414, "loss": 0.4205392897129059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.076000272320236, "step_time": 0.49032924652099613} +{"epoch": 0, "iter": 15934, "iter_tflops": 36.78570176561137, "iter_time": 0.5608454513549804, "loss": 0.3284584879875183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14121468977732, "step_time": 0.5139628601074219} +{"epoch": 0, "iter": 15935, "iter_tflops": 42.14175008503198, "iter_time": 0.48956423187255854, "loss": 0.3215976059436798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95552197436985, "step_time": 0.4489361152648926} +{"epoch": 0, "iter": 15936, "iter_tflops": 40.110722958568864, "iter_time": 0.5143535690307617, "loss": 0.31406503915786743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68523120366994, "step_time": 0.4722670097351074} +{"epoch": 0, "iter": 15937, "iter_tflops": 22.317453354981275, "iter_time": 0.9244376220703125, "loss": 0.1488075703382492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.911015642683534, "step_time": 0.8628279876708984} +{"epoch": 0, "iter": 15938, "iter_tflops": 18.212391195003377, "iter_time": 1.1328053131103515, "loss": 0.15700706839561462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.154164758073854, "step_time": 0.8541422863006591} +{"epoch": 0, "iter": 15939, "iter_tflops": 49.76809206574391, "iter_time": 0.41454459381103514, "loss": 0.1628110706806183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22260938015736, "step_time": 0.3804887619018555} +{"epoch": 0, "iter": 15940, "iter_tflops": 47.643952557047385, "iter_time": 0.4330264892578125, "loss": 0.10727441310882568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.339307760373686, "step_time": 0.4018576488494873} +{"epoch": 0, "iter": 15941, "iter_tflops": 39.77057327007785, "iter_time": 0.5187527313232422, "loss": 0.08274814486503601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.067320274093, "step_time": 0.47904288864135747} +{"epoch": 0, "iter": 15942, "iter_tflops": 46.73231922020495, "iter_time": 0.44147377777099606, "loss": 0.09834043681621552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.208158229733655, "step_time": 0.3951699161529541} +{"epoch": 0, "iter": 15943, "iter_tflops": 49.52550634247048, "iter_time": 0.41657511520385737, "loss": 0.06915091723203659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86886859801015, "step_time": 0.3829873180389405} +{"epoch": 0, "iter": 15944, "iter_tflops": 48.07523817823167, "iter_time": 0.4291417846679688, "loss": 0.14037087559700012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14223562443786, "step_time": 0.3956695232391358} +{"epoch": 0, "iter": 15945, "iter_tflops": 24.453751663664555, "iter_time": 0.7135651855468751, "loss": 0.015188478864729404, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 26.021573424124274, "step_time": 0.6705722808837891} +{"epoch": 0, "iter": 15946, "iter_tflops": 14.587735095450103, "iter_time": 1.1961655273437501, "loss": 0.0239117294549942, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 17.142878053430255, "step_time": 1.0178772659301758} +{"epoch": 0, "iter": 15947, "iter_tflops": 35.40351043679607, "iter_time": 0.4928704986572266, "loss": 0.029686523601412773, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 39.14832708779565, "step_time": 0.4457239208221435} +{"epoch": 0, "iter": 15948, "iter_tflops": 37.49223549815792, "iter_time": 0.4654122543334961, "loss": 0.02818683348596096, "lr": 3e-05, "seqlen": 6960.0, "step_tflops": 41.260116492341616, "step_time": 0.4229107265472412} +{"epoch": 0, "iter": 15949, "iter_tflops": 21.918910081323915, "iter_time": 0.9412463226318358, "loss": 0.0008119853446260095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.104563885480403, "step_time": 0.8929445114135742} +{"epoch": 0, "iter": 15950, "iter_tflops": 13.85420599340404, "iter_time": 1.4891574096679687, "loss": 0.0029016556218266487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.795891491013908, "step_time": 1.1593177852630616} +{"epoch": 0, "iter": 15951, "iter_tflops": 48.04827104758968, "iter_time": 0.42938264083862304, "loss": 0.02382246032357216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.14151881185044, "step_time": 0.3882292785644531} +{"epoch": 0, "iter": 15952, "iter_tflops": 44.21999353715617, "iter_time": 0.46655577850341795, "loss": 0.0028473574202507734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.62563808243651, "step_time": 0.42428427314758305} +{"epoch": 0, "iter": 15953, "iter_tflops": 33.848855386405575, "iter_time": 0.6095063858032227, "loss": 0.5727523565292358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.53645983809118, "step_time": 0.5496281108856201} +{"epoch": 0, "iter": 15954, "iter_tflops": 36.811805645039485, "iter_time": 0.5604477462768556, "loss": 0.5573235154151917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.92833887053662, "step_time": 0.504078447341919} +{"epoch": 0, "iter": 15955, "iter_tflops": 40.76355130757541, "iter_time": 0.5061161956787109, "loss": 0.6887255311012268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.27359053464563, "step_time": 0.4659909725189209} +{"epoch": 0, "iter": 15956, "iter_tflops": 36.7485493608777, "iter_time": 0.5614124603271484, "loss": 0.396098256111145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.04879028648411, "step_time": 0.5151489810943604} +{"epoch": 0, "iter": 15957, "iter_tflops": 16.172647138210472, "iter_time": 1.2756782073974609, "loss": 0.6177477836608887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.26960151788843, "step_time": 1.1946479187011718} +{"epoch": 0, "iter": 15958, "iter_tflops": 18.699955450949798, "iter_time": 1.103269660949707, "loss": 0.4607231914997101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.98513627373879, "step_time": 0.8257346801757812} +{"epoch": 0, "iter": 15959, "iter_tflops": 42.30949055567001, "iter_time": 0.4876233024597168, "loss": 0.7298853993415833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.583155382194754, "step_time": 0.4526034526824952} +{"epoch": 0, "iter": 15960, "iter_tflops": 47.50724512914998, "iter_time": 0.43427257156372073, "loss": 0.7277886867523193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.36595409458333, "step_time": 0.40164918327331545} +{"epoch": 0, "iter": 15961, "iter_tflops": 35.71793328460705, "iter_time": 0.5776116256713867, "loss": 0.093213751912117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.39816227526255, "step_time": 0.5372937736511231} +{"epoch": 0, "iter": 15962, "iter_tflops": 16.445787214617845, "iter_time": 1.2544910888671874, "loss": 0.045942045748233795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.509357476431724, "step_time": 1.0574973335266113} +{"epoch": 0, "iter": 15963, "iter_tflops": 40.89521214676217, "iter_time": 0.5044867706298829, "loss": 0.06086136773228645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.092675118322276, "step_time": 0.45752649307250975} +{"epoch": 0, "iter": 15964, "iter_tflops": 41.45004040469781, "iter_time": 0.49773397827148436, "loss": 0.05662040784955025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2130775710453, "step_time": 0.4563080997467041} +{"epoch": 0, "iter": 15965, "iter_tflops": 19.308189941865876, "iter_time": 1.068515151977539, "loss": 0.4574121832847595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.588599835137902, "step_time": 1.0020639419555664} +{"epoch": 0, "iter": 15966, "iter_tflops": 15.139703767880908, "iter_time": 1.3627144775390625, "loss": 0.6899526715278625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.666129136029962, "step_time": 0.9983046836853027} +{"epoch": 0, "iter": 15967, "iter_tflops": 43.35010345080392, "iter_time": 0.47591797637939454, "loss": 0.7451184391975403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.65671643867331, "step_time": 0.4421891441345215} +{"epoch": 0, "iter": 15968, "iter_tflops": 45.84284841930445, "iter_time": 0.45003952026367194, "loss": 0.5376937389373779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52011617319107, "step_time": 0.41662045860290525} +{"epoch": 0, "iter": 15969, "iter_tflops": 25.858094043884023, "iter_time": 0.7962533035278321, "loss": 0.10448838025331497, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 27.224982518927334, "step_time": 0.7562757034301759} +{"epoch": 0, "iter": 15970, "iter_tflops": 17.1600055972382, "iter_time": 1.1998593292236328, "loss": 0.20405837893486023, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 20.451416704219103, "step_time": 1.0067563095092773} +{"epoch": 0, "iter": 15971, "iter_tflops": 52.65381603300653, "iter_time": 0.3910370483398437, "loss": 0.09921512752771378, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 57.54857511319595, "step_time": 0.3577776298522949} +{"epoch": 0, "iter": 15972, "iter_tflops": 48.43233931812093, "iter_time": 0.4251207580566406, "loss": 0.07778920978307724, "lr": 3e-05, "seqlen": 8176.0, "step_tflops": 52.69816925609158, "step_time": 0.39070793342590326} +{"epoch": 0, "iter": 15973, "iter_tflops": 26.01606670738572, "iter_time": 0.6517514343261718, "loss": 0.00746152875944972, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 27.637576667507734, "step_time": 0.6135128631591796} +{"epoch": 0, "iter": 15974, "iter_tflops": 14.610054029187989, "iter_time": 1.1605712585449217, "loss": 0.0077026598155498505, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 16.244836575323095, "step_time": 1.0437783546447754} +{"epoch": 0, "iter": 15975, "iter_tflops": 35.415726894142395, "iter_time": 0.4787705993652344, "loss": 0.024980027228593826, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 39.180857336088955, "step_time": 0.4327625770568848} +{"epoch": 0, "iter": 15976, "iter_tflops": 39.838044655401006, "iter_time": 0.42562351989746094, "loss": 0.0010135634802281857, "lr": 3e-05, "seqlen": 6768.0, "step_tflops": 43.95686869305855, "step_time": 0.3857419624328613} +{"epoch": 0, "iter": 15977, "iter_tflops": 3.0156976769830104, "iter_time": 0.6500172424316406, "loss": 0.02704722248017788, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 3.333395929205367, "step_time": 0.5880656032562256} +{"epoch": 0, "iter": 15978, "iter_tflops": 3.836683916822759, "iter_time": 0.510924415588379, "loss": 0.18849743902683258, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 4.198119353264393, "step_time": 0.4669365787506104} +{"epoch": 0, "iter": 15979, "iter_tflops": 3.8058344296501923, "iter_time": 0.5150658874511719, "loss": 0.060110270977020264, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 4.161243813413098, "step_time": 0.47107441329956057} +{"epoch": 0, "iter": 15980, "iter_tflops": 4.041528883654686, "iter_time": 0.4850282020568848, "loss": 0.04976750165224075, "lr": 3e-05, "seqlen": 800.0, "step_tflops": 4.391655232498916, "step_time": 0.4463591480255127} +{"epoch": 0, "iter": 15981, "iter_tflops": 32.37423492746766, "iter_time": 0.6372689132690429, "loss": 0.22865331172943115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.69531050725469, "step_time": 0.5779777011871338} +{"epoch": 0, "iter": 15982, "iter_tflops": 36.439708334522706, "iter_time": 0.566170654296875, "loss": 0.30724889039993286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.65874186573326, "step_time": 0.5202155323028564} +{"epoch": 0, "iter": 15983, "iter_tflops": 42.93350007872684, "iter_time": 0.48053602600097656, "loss": 0.3948099613189697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.91033127291729, "step_time": 0.43979850387573244} +{"epoch": 0, "iter": 15984, "iter_tflops": 41.8471496519009, "iter_time": 0.49301072311401367, "loss": 0.2877086400985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.696744920058336, "step_time": 0.4514784049987793} +{"epoch": 0, "iter": 15985, "iter_tflops": 20.55766266585822, "iter_time": 1.0035719451904297, "loss": 0.2656085789203644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.85677779246967, "step_time": 0.943922004699707} +{"epoch": 0, "iter": 15986, "iter_tflops": 16.984251576984374, "iter_time": 1.2147190246582031, "loss": 0.3225456476211548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.467779378881627, "step_time": 1.0079790840148926} +{"epoch": 0, "iter": 15987, "iter_tflops": 45.50171809489219, "iter_time": 0.45341350555419924, "loss": 0.3373951017856598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44705972833892, "step_time": 0.4172360019683838} +{"epoch": 0, "iter": 15988, "iter_tflops": 47.45600694195705, "iter_time": 0.434741455078125, "loss": 0.23891150951385498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.09401450912295, "step_time": 0.40378689575195315} +{"epoch": 0, "iter": 15989, "iter_tflops": 29.847708574285853, "iter_time": 0.5722140884399415, "loss": 0.022383566945791245, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 32.13496546532033, "step_time": 0.5314858474731445} +{"epoch": 0, "iter": 15990, "iter_tflops": 20.86414884546268, "iter_time": 0.8185945892333985, "loss": 0.027042172849178314, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 24.51870819731271, "step_time": 0.6965815334320069} +{"epoch": 0, "iter": 15991, "iter_tflops": 37.54298307129462, "iter_time": 0.45492600631713864, "loss": 0.01960311457514763, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 41.077477917322696, "step_time": 0.41578208351135254} +{"epoch": 0, "iter": 15992, "iter_tflops": 37.04877161999114, "iter_time": 0.4609944839477539, "loss": 0.03878626972436905, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 41.01042980135371, "step_time": 0.41646184730529784} +{"epoch": 0, "iter": 15993, "iter_tflops": 18.83226401898712, "iter_time": 1.0955184936523439, "loss": 0.38077178597450256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.886497479384307, "step_time": 1.0374422912597656} +{"epoch": 0, "iter": 15994, "iter_tflops": 14.291009999090774, "iter_time": 1.4436413879394532, "loss": 0.3477177917957306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.80008820269058, "step_time": 1.0419697780609132} +{"epoch": 0, "iter": 15995, "iter_tflops": 37.07989499164339, "iter_time": 0.5563956832885742, "loss": 0.26336294412612915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.63838569160716, "step_time": 0.5076750259399414} +{"epoch": 0, "iter": 15996, "iter_tflops": 39.84180561545245, "iter_time": 0.5178252639770508, "loss": 0.2779049277305603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41087967096091, "step_time": 0.47525168037414556} +{"epoch": 0, "iter": 15997, "iter_tflops": 20.371881093306715, "iter_time": 1.0127240295410156, "loss": 0.12332858890295029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.658961194990567, "step_time": 0.9525430755615236} +{"epoch": 0, "iter": 15998, "iter_tflops": 9.903302230664814, "iter_time": 2.083253952026367, "loss": 0.07387279719114304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.405420037608833, "step_time": 1.8088850250244142} +{"epoch": 0, "iter": 15999, "iter_tflops": 10.96112580532529, "iter_time": 1.8822057037353515, "loss": 0.10750027745962143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.785698371556302, "step_time": 1.4965577335357667} +{"epoch": 0, "iter": 16000, "iter_tflops": 40.88941581838664, "iter_time": 0.5045582847595215, "loss": 0.03173813968896866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20608021140218, "step_time": 0.45637873077392577} +{"epoch": 0, "iter": 16001, "iter_tflops": 12.396374176199496, "iter_time": 1.2190698852539061, "loss": 0.33715566992759705, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.17934706184113, "step_time": 1.1466460647583008} +{"epoch": 0, "iter": 16002, "iter_tflops": 12.557101151138871, "iter_time": 1.2034661712646482, "loss": 0.20148971676826477, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.68670114393491, "step_time": 0.8544299087524413} +{"epoch": 0, "iter": 16003, "iter_tflops": 24.532980170036126, "iter_time": 0.6159890213012694, "loss": 0.32569006085395813, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 26.330844884343687, "step_time": 0.5739294166564941} +{"epoch": 0, "iter": 16004, "iter_tflops": 23.616949046280343, "iter_time": 0.6398814010620116, "loss": 0.2336624264717102, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.40835054142174, "step_time": 0.5947669219970704} +{"epoch": 0, "iter": 16005, "iter_tflops": 14.034395351680873, "iter_time": 1.01860994720459, "loss": 0.04197835549712181, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 14.954452921183242, "step_time": 0.9559410018920899} +{"epoch": 0, "iter": 16006, "iter_tflops": 9.699039012296824, "iter_time": 1.4739166107177735, "loss": 0.015351611189544201, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 12.86339069518798, "step_time": 1.1113379859924317} +{"epoch": 0, "iter": 16007, "iter_tflops": 28.04224082326285, "iter_time": 0.5097871742248534, "loss": 0.08007212728261948, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 30.92398368809756, "step_time": 0.46228114891052247} +{"epoch": 0, "iter": 16008, "iter_tflops": 31.061254235688256, "iter_time": 0.460238166809082, "loss": 0.01601230911910534, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 34.32557317034058, "step_time": 0.41647009468078616} +{"epoch": 0, "iter": 16009, "iter_tflops": 19.140105137573716, "iter_time": 1.0778986511230468, "loss": 0.2620804011821747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.451380622854984, "step_time": 1.0087873229980469} +{"epoch": 0, "iter": 16010, "iter_tflops": 19.905044586424907, "iter_time": 1.0364756240844726, "loss": 0.4173978865146637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.517028291512105, "step_time": 0.7780318851470948} +{"epoch": 0, "iter": 16011, "iter_tflops": 48.02217372881823, "iter_time": 0.4296159858703613, "loss": 0.2998848557472229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.308927907561845, "step_time": 0.3944086475372314} +{"epoch": 0, "iter": 16012, "iter_tflops": 52.241649362215874, "iter_time": 0.3949165802001953, "loss": 0.4177532494068146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.395608590720165, "step_time": 0.3658280143737793} +{"epoch": 0, "iter": 16013, "iter_tflops": 28.918726587716783, "iter_time": 0.713416389465332, "loss": 0.5515740513801575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.733077942065155, "step_time": 0.6712992935180664} +{"epoch": 0, "iter": 16014, "iter_tflops": 40.356670674561194, "iter_time": 0.5112189178466797, "loss": 0.6044185757637024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.097850707954954, "step_time": 0.467848051071167} +{"epoch": 0, "iter": 16015, "iter_tflops": 44.12849382633635, "iter_time": 0.4675231742858886, "loss": 0.6454707384109497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53545274508549, "step_time": 0.43401487350463863} +{"epoch": 0, "iter": 16016, "iter_tflops": 47.39671657939663, "iter_time": 0.43528528976440434, "loss": 0.6374396085739136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.15133814918242, "step_time": 0.4033343849182129} +{"epoch": 0, "iter": 16017, "iter_tflops": 28.85602425780179, "iter_time": 0.7149665985107422, "loss": 0.24857361614704132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.820116529237552, "step_time": 0.6694034881591797} +{"epoch": 0, "iter": 16018, "iter_tflops": 10.839129329413264, "iter_time": 1.9033902893066408, "loss": 0.21060064435005188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.793351165302447, "step_time": 1.4957274169921875} +{"epoch": 0, "iter": 16019, "iter_tflops": 15.10919838807721, "iter_time": 1.3654657897949218, "loss": 0.28441449999809265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.054363535369102, "step_time": 1.0827490234375001} +{"epoch": 0, "iter": 16020, "iter_tflops": 26.325981165835444, "iter_time": 0.7836780471801759, "loss": 0.18449558317661285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.631483920163586, "step_time": 0.6962558326721192} +{"epoch": 0, "iter": 16021, "iter_tflops": 15.593933741713501, "iter_time": 0.9481399307250976, "loss": 0.20297877490520477, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 16.72464853769496, "step_time": 0.8840383834838866} +{"epoch": 0, "iter": 16022, "iter_tflops": 24.848300395442678, "iter_time": 0.5950198211669921, "loss": 0.14945466816425323, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 26.463162649055317, "step_time": 0.5587099113464356} +{"epoch": 0, "iter": 16023, "iter_tflops": 26.00666349461758, "iter_time": 0.5685170364379882, "loss": 0.12054330855607986, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.750510120544995, "step_time": 0.5327913322448731} +{"epoch": 0, "iter": 16024, "iter_tflops": 26.296519014003152, "iter_time": 0.5622505111694336, "loss": 0.21495328843593597, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 27.906938214791847, "step_time": 0.5298048515319824} +{"epoch": 0, "iter": 16025, "iter_tflops": 31.422868665201822, "iter_time": 0.6565630187988281, "loss": 0.09374181181192398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.53381536225873, "step_time": 0.6152325134277344} +{"epoch": 0, "iter": 16026, "iter_tflops": 14.749002946614565, "iter_time": 1.398812759399414, "loss": 0.19707940518856049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.62839443686276, "step_time": 1.1075078735351562} +{"epoch": 0, "iter": 16027, "iter_tflops": 42.221664290977564, "iter_time": 0.4886376190185547, "loss": 0.21128056943416595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.213607974957924, "step_time": 0.4369734573364258} +{"epoch": 0, "iter": 16028, "iter_tflops": 48.944158066336946, "iter_time": 0.42152310562133793, "loss": 0.20773598551750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.0055235083092, "step_time": 0.3892253513336182} +{"epoch": 0, "iter": 16029, "iter_tflops": 29.88986208485797, "iter_time": 0.6902371597290039, "loss": 0.7449824213981628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.858595727725504, "step_time": 0.6475832672119141} +{"epoch": 0, "iter": 16030, "iter_tflops": 12.664309722295831, "iter_time": 1.6290736694335939, "loss": 0.5849677324295044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.217007884914747, "step_time": 1.1982972679138184} +{"epoch": 0, "iter": 16031, "iter_tflops": 36.72768181755711, "iter_time": 0.5617314376831055, "loss": 0.599460244178772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.288504463667394, "step_time": 0.5120838756561279} +{"epoch": 0, "iter": 16032, "iter_tflops": 39.75291205424842, "iter_time": 0.5189832000732422, "loss": 0.7135461568832397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.97672837162898, "step_time": 0.48005267715454103} +{"epoch": 0, "iter": 16033, "iter_tflops": 22.811930764366284, "iter_time": 0.9043992691040039, "loss": 0.6730118989944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.11882066479342, "step_time": 0.8213400535583496} +{"epoch": 0, "iter": 16034, "iter_tflops": 36.737997580102444, "iter_time": 0.5615737075805665, "loss": 0.713272213935852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.009528908032294, "step_time": 0.5156544971466065} +{"epoch": 0, "iter": 16035, "iter_tflops": 39.30851838813476, "iter_time": 0.5248504486083985, "loss": 0.6522388458251953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55366889175691, "step_time": 0.4848252582550049} +{"epoch": 0, "iter": 16036, "iter_tflops": 39.87856934002267, "iter_time": 0.517347885131836, "loss": 0.5901235342025757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.24594080239758, "step_time": 0.47706427764892584} +{"epoch": 0, "iter": 16037, "iter_tflops": 25.572311326299275, "iter_time": 0.8067746887207031, "loss": 0.10971439629793167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.56000870108914, "step_time": 0.748588062286377} +{"epoch": 0, "iter": 16038, "iter_tflops": 48.214265324605705, "iter_time": 0.4279043426513671, "loss": 0.12495213001966476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87728481810888, "step_time": 0.39016930580139164} +{"epoch": 0, "iter": 16039, "iter_tflops": 50.49299131209449, "iter_time": 0.40859321212768557, "loss": 0.06461326777935028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.914618436298, "step_time": 0.3756940155029297} +{"epoch": 0, "iter": 16040, "iter_tflops": 46.65763335230089, "iter_time": 0.4421804542541504, "loss": 0.11110947281122208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49685152328372, "step_time": 0.4085619773864746} +{"epoch": 0, "iter": 16041, "iter_tflops": 20.76736061799789, "iter_time": 0.9934384002685547, "loss": 0.284370481967926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.760571813036794, "step_time": 0.9480951919555665} +{"epoch": 0, "iter": 16042, "iter_tflops": 17.76407819489085, "iter_time": 1.1613939819335937, "loss": 0.33670106530189514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.00098526380178, "step_time": 0.9377349815368653} +{"epoch": 0, "iter": 16043, "iter_tflops": 42.26167340375619, "iter_time": 0.4881750259399414, "loss": 0.3271418511867523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.494029773914775, "step_time": 0.4437364025115967} +{"epoch": 0, "iter": 16044, "iter_tflops": 39.20366045251556, "iter_time": 0.5262542648315429, "loss": 0.3879089057445526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.957945460814315, "step_time": 0.48026257514953613} +{"epoch": 0, "iter": 16045, "iter_tflops": 20.081086952194216, "iter_time": 1.0273892822265627, "loss": 0.19394232332706451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.168851991920576, "step_time": 0.9745967102050782} +{"epoch": 0, "iter": 16046, "iter_tflops": 14.855228029484687, "iter_time": 1.3888102874755859, "loss": 0.2140815407037735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.73540086913207, "step_time": 1.0453850746154785} +{"epoch": 0, "iter": 16047, "iter_tflops": 39.53424735459689, "iter_time": 0.5218537063598633, "loss": 0.17194585502147675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.4252619973077, "step_time": 0.4750942783355713} +{"epoch": 0, "iter": 16048, "iter_tflops": 41.957443290164164, "iter_time": 0.49171474456787106, "loss": 0.25849276781082153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.803404743168066, "step_time": 0.4504270725250244} +{"epoch": 0, "iter": 16049, "iter_tflops": 15.980097156607838, "iter_time": 1.29104931640625, "loss": 0.09386368840932846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.94296598977242, "step_time": 1.2176789779663086} +{"epoch": 0, "iter": 16050, "iter_tflops": 18.555297467135038, "iter_time": 1.1118708038330078, "loss": 0.13047155737876892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.910651805824752, "step_time": 0.7391835079193115} +{"epoch": 0, "iter": 16051, "iter_tflops": 50.84964437681388, "iter_time": 0.4057273902893066, "loss": 0.12667956948280334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.47602446383157, "step_time": 0.37189206886291504} +{"epoch": 0, "iter": 16052, "iter_tflops": 52.71194320281458, "iter_time": 0.3913931503295898, "loss": 0.16158315539360046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.01797504388288, "step_time": 0.361834903717041} +{"epoch": 0, "iter": 16053, "iter_tflops": 22.925188229942723, "iter_time": 0.8999312591552734, "loss": 0.06412183493375778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.00160460981278, "step_time": 0.8595714263916014} +{"epoch": 0, "iter": 16054, "iter_tflops": 22.16522733981, "iter_time": 0.9307864608764649, "loss": 0.049445655196905136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.001005352735557, "step_time": 0.7367983131408692} +{"epoch": 0, "iter": 16055, "iter_tflops": 59.55693820819514, "iter_time": 0.3464095726013184, "loss": 0.02817266434431076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.18291722914798, "step_time": 0.3165107421875} +{"epoch": 0, "iter": 16056, "iter_tflops": 56.57351264246725, "iter_time": 0.3646776123046875, "loss": 0.0336039662361145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.02960788151391, "step_time": 0.3326007404327393} +{"epoch": 0, "iter": 16057, "iter_tflops": 36.66561935195444, "iter_time": 0.562682258605957, "loss": 0.0266786627471447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.316010600387564, "step_time": 0.5247504310607911} +{"epoch": 0, "iter": 16058, "iter_tflops": 15.598748064888666, "iter_time": 1.3226121368408204, "loss": 0.02538779005408287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.39839991322231, "step_time": 1.1213525962829591} +{"epoch": 0, "iter": 16059, "iter_tflops": 42.50176698210739, "iter_time": 0.48541731262207033, "loss": 0.02031487412750721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8278461682895, "step_time": 0.4405731887817383} +{"epoch": 0, "iter": 16060, "iter_tflops": 48.1703268455522, "iter_time": 0.42829465484619145, "loss": 0.01646183617413044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.99116392472534, "step_time": 0.38933082389831547} +{"epoch": 0, "iter": 16061, "iter_tflops": 19.528813901307654, "iter_time": 1.0564437561035156, "loss": 0.10787418484687805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.106025293771534, "step_time": 0.9774978103637695} +{"epoch": 0, "iter": 16062, "iter_tflops": 18.827553452747665, "iter_time": 1.0957925872802734, "loss": 0.09828764200210571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.43966647846522, "step_time": 0.8109812889099121} +{"epoch": 0, "iter": 16063, "iter_tflops": 45.903148285044196, "iter_time": 0.4494483337402343, "loss": 0.13555563986301422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.32245657250936, "step_time": 0.4099778690338135} +{"epoch": 0, "iter": 16064, "iter_tflops": 54.20130117103008, "iter_time": 0.3806383438110351, "loss": 0.14954495429992676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.004446409530104, "step_time": 0.34965320014953616} +{"epoch": 0, "iter": 16065, "iter_tflops": 23.74974535567869, "iter_time": 0.8686869354248047, "loss": 0.14381679892539978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.970757704179537, "step_time": 0.8262101516723632} +{"epoch": 0, "iter": 16066, "iter_tflops": 19.43781635677913, "iter_time": 1.061389465332031, "loss": 0.1728033870458603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.80328515960812, "step_time": 0.9462378425598146} +{"epoch": 0, "iter": 16067, "iter_tflops": 47.90594843603911, "iter_time": 0.4306582832336426, "loss": 0.05560605227947235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.200444924991004, "step_time": 0.39522830772399903} +{"epoch": 0, "iter": 16068, "iter_tflops": 51.07785301256453, "iter_time": 0.40391465759277334, "loss": 0.09206421673297882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.54869071467676, "step_time": 0.37140557670593266} +{"epoch": 0, "iter": 16069, "iter_tflops": 28.805228785595794, "iter_time": 0.7162273788452149, "loss": 0.11817234009504318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.633281904705512, "step_time": 0.6734862289428711} +{"epoch": 0, "iter": 16070, "iter_tflops": 11.324818119443032, "iter_time": 1.8217593688964844, "loss": 0.16977852582931519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.441892200483055, "step_time": 1.4285588912963867} +{"epoch": 0, "iter": 16071, "iter_tflops": 12.189289680454234, "iter_time": 1.6925591278076173, "loss": 0.1681278645992279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.851928414514365, "step_time": 1.2242571296691893} +{"epoch": 0, "iter": 16072, "iter_tflops": 19.94376460246751, "iter_time": 1.0344633483886718, "loss": 0.19981969892978668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.508067103124944, "step_time": 0.8418082675933838} +{"epoch": 0, "iter": 16073, "iter_tflops": 12.945733645154867, "iter_time": 1.1294792022705078, "loss": 0.25664660334587097, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 13.898190687366135, "step_time": 1.052074851989746} +{"epoch": 0, "iter": 16074, "iter_tflops": 10.304806943452009, "iter_time": 1.418943313598633, "loss": 0.2372891753911972, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 12.382298614893239, "step_time": 1.180874195098877} +{"epoch": 0, "iter": 16075, "iter_tflops": 26.88325184954367, "iter_time": 0.5439050674438477, "loss": 0.25622475147247314, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.63567914739262, "step_time": 0.5106195259094238} +{"epoch": 0, "iter": 16076, "iter_tflops": 26.795313074700964, "iter_time": 0.5456900939941406, "loss": 0.13145776093006134, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.411826863962528, "step_time": 0.5146426162719727} +{"epoch": 0, "iter": 16077, "iter_tflops": 23.943226579445202, "iter_time": 0.8616672210693359, "loss": 0.031174086034297943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.159269939454905, "step_time": 0.820019561767578} +{"epoch": 0, "iter": 16078, "iter_tflops": 14.884602925885783, "iter_time": 1.3860694580078126, "loss": 0.03847252577543259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.764463188916064, "step_time": 0.9935770225524901} +{"epoch": 0, "iter": 16079, "iter_tflops": 43.93998430767746, "iter_time": 0.469528923034668, "loss": 0.019797641783952713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.402429635422806, "step_time": 0.4262408657073975} +{"epoch": 0, "iter": 16080, "iter_tflops": 42.316003433585614, "iter_time": 0.48754825210571284, "loss": 0.016095370054244995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.6778596947707, "step_time": 0.44198884963989254} +{"epoch": 0, "iter": 16081, "iter_tflops": 18.978751604670844, "iter_time": 1.0870627288818357, "loss": 0.602541446685791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34655193631599, "step_time": 1.0139847564697264} +{"epoch": 0, "iter": 16082, "iter_tflops": 24.887736009938124, "iter_time": 0.8289662628173828, "loss": 0.46575090289115906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.10957936269012, "step_time": 0.7339524097442627} +{"epoch": 0, "iter": 16083, "iter_tflops": 49.03916778309935, "iter_time": 0.4207064361572265, "loss": 0.6187602281570435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.51665105210962, "step_time": 0.38550793266296385} +{"epoch": 0, "iter": 16084, "iter_tflops": 46.19904008780575, "iter_time": 0.4465697441101074, "loss": 0.5374834537506104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.03091250800885, "step_time": 0.41236692428588867} +{"epoch": 0, "iter": 16085, "iter_tflops": 25.542496489521135, "iter_time": 0.8077164077758789, "loss": 0.36448752880096436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.91453413577462, "step_time": 0.7665409851074219} +{"epoch": 0, "iter": 16086, "iter_tflops": 16.972192484659587, "iter_time": 1.2155821075439452, "loss": 0.2827794551849365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.101652496311914, "step_time": 0.9777003726959229} +{"epoch": 0, "iter": 16087, "iter_tflops": 42.12552272733848, "iter_time": 0.48975281906127927, "loss": 0.32237204909324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19150187243057, "step_time": 0.4466426219940186} +{"epoch": 0, "iter": 16088, "iter_tflops": 37.83165919964177, "iter_time": 0.5453393783569336, "loss": 0.3981533646583557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.99831407882154, "step_time": 0.4912362308502197} +{"epoch": 0, "iter": 16089, "iter_tflops": 16.77340240212783, "iter_time": 1.2299885864257811, "loss": 0.6103146076202393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.71444278030046, "step_time": 1.164648178100586} +{"epoch": 0, "iter": 16090, "iter_tflops": 10.13215712297554, "iter_time": 2.0361995239257813, "loss": 0.43809789419174194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.815522551176162, "step_time": 1.3925322875976565} +{"epoch": 0, "iter": 16091, "iter_tflops": 14.627929339052407, "iter_time": 1.4103905639648437, "loss": 0.6398128271102905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.039669258266308, "step_time": 1.2107684249877928} +{"epoch": 0, "iter": 16092, "iter_tflops": 31.213464280001723, "iter_time": 0.6609677581787109, "loss": 0.554385244846344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.26491282327902, "step_time": 0.5391647853851318} +{"epoch": 0, "iter": 16093, "iter_tflops": 12.541809786328932, "iter_time": 1.231015609741211, "loss": 0.30513519048690796, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 13.35212322244643, "step_time": 1.1563077545166014} +{"epoch": 0, "iter": 16094, "iter_tflops": 11.577113415921634, "iter_time": 1.333593536376953, "loss": 0.23690567910671234, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.553506660071207, "step_time": 0.9326823577880861} +{"epoch": 0, "iter": 16095, "iter_tflops": 23.723444756659028, "iter_time": 0.6507977142333985, "loss": 0.15507330000400543, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 25.554739700260658, "step_time": 0.6041604728698731} +{"epoch": 0, "iter": 16096, "iter_tflops": 23.02750209253917, "iter_time": 0.670466278076172, "loss": 0.1540021151304245, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 24.833881056919292, "step_time": 0.6216975746154785} +{"epoch": 0, "iter": 16097, "iter_tflops": 17.71248904635657, "iter_time": 1.1647766418457033, "loss": 0.6619677543640137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.96512521340893, "step_time": 1.087843780517578} +{"epoch": 0, "iter": 16098, "iter_tflops": 13.164041724961898, "iter_time": 1.567230941772461, "loss": 0.5047229528427124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.41262915063359, "step_time": 1.2570255088806153} +{"epoch": 0, "iter": 16099, "iter_tflops": 38.51191252181508, "iter_time": 0.5357068023681639, "loss": 0.577281653881073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.777579241302504, "step_time": 0.49383171272277837} +{"epoch": 0, "iter": 16100, "iter_tflops": 38.174499890976065, "iter_time": 0.5404417495727539, "loss": 0.6992244124412537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40207601560854, "step_time": 0.498310604095459} +{"epoch": 0, "iter": 16101, "iter_tflops": 19.65092420533641, "iter_time": 1.0498790435791017, "loss": 0.004317940212786198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.93987227366779, "step_time": 0.9852540283203124} +{"epoch": 0, "iter": 16102, "iter_tflops": 24.536309239032384, "iter_time": 0.8408393173217774, "loss": 0.0049267481081187725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.85722880903998, "step_time": 0.6909915733337402} +{"epoch": 0, "iter": 16103, "iter_tflops": 43.96147283558409, "iter_time": 0.46929941558837895, "loss": 0.021878646686673164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.584814958196134, "step_time": 0.42464077568054204} +{"epoch": 0, "iter": 16104, "iter_tflops": 47.65407065083121, "iter_time": 0.4329345474243164, "loss": 0.0018799538956955075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65788234326786, "step_time": 0.39179497146606446} +{"epoch": 0, "iter": 16105, "iter_tflops": 17.845792211523765, "iter_time": 1.1560760803222656, "loss": 0.5997306704521179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.939436354418923, "step_time": 1.0893192977905275} +{"epoch": 0, "iter": 16106, "iter_tflops": 13.702209185176708, "iter_time": 1.5056764373779297, "loss": 0.6200822591781616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.659002750917338, "step_time": 1.1056911125183106} +{"epoch": 0, "iter": 16107, "iter_tflops": 44.80760815980841, "iter_time": 0.46043728637695314, "loss": 0.7647126317024231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.59646235480882, "step_time": 0.4245389995574951} +{"epoch": 0, "iter": 16108, "iter_tflops": 45.88191887661313, "iter_time": 0.44965629196166995, "loss": 0.7024260759353638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89602186183987, "step_time": 0.41348173141479494} +{"epoch": 0, "iter": 16109, "iter_tflops": 21.41714447746743, "iter_time": 0.9632980499267578, "loss": 0.8832300305366516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.389908524471437, "step_time": 0.9214460830688477} +{"epoch": 0, "iter": 16110, "iter_tflops": 11.422806448083067, "iter_time": 1.8061317596435544, "loss": 0.7088614106178284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.181504080783824, "step_time": 1.4547888145446777} +{"epoch": 0, "iter": 16111, "iter_tflops": 34.274533020245, "iter_time": 0.6019365310668945, "loss": 0.48276686668395996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.225428772459594, "step_time": 0.5542204399108887} +{"epoch": 0, "iter": 16112, "iter_tflops": 34.117552630669095, "iter_time": 0.6047061386108398, "loss": 0.6306799054145813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.258093108588255, "step_time": 0.5537345523834228} +{"epoch": 0, "iter": 16113, "iter_tflops": 28.31564572306342, "iter_time": 0.5930189361572266, "loss": 0.08219625055789948, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 31.480525072736064, "step_time": 0.5334000644683838} +{"epoch": 0, "iter": 16114, "iter_tflops": 31.738065700701146, "iter_time": 0.5290717544555664, "loss": 0.09125719219446182, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 35.21291208018327, "step_time": 0.4768624095916748} +{"epoch": 0, "iter": 16115, "iter_tflops": 32.792876986952145, "iter_time": 0.5120537033081054, "loss": 0.08078905940055847, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 36.04728806794978, "step_time": 0.46582461547851567} +{"epoch": 0, "iter": 16116, "iter_tflops": 36.84329403391456, "iter_time": 0.4557603912353515, "loss": 0.07142798602581024, "lr": 3e-05, "seqlen": 6704.0, "step_tflops": 40.39165188507743, "step_time": 0.41572239112854004} +{"epoch": 0, "iter": 16117, "iter_tflops": 25.597323439680018, "iter_time": 0.8059863586425781, "loss": 0.06559406965970993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.558472072084, "step_time": 0.7486298027038574} +{"epoch": 0, "iter": 16118, "iter_tflops": 18.297173401200645, "iter_time": 1.1275563201904297, "loss": 0.08685819804668427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.50401900017965, "step_time": 0.9594064025878907} +{"epoch": 0, "iter": 16119, "iter_tflops": 48.82888408155316, "iter_time": 0.42251822662353516, "loss": 0.08245806396007538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.90885436928765, "step_time": 0.3827032451629639} +{"epoch": 0, "iter": 16120, "iter_tflops": 51.56722780643045, "iter_time": 0.40008149337768556, "loss": 0.07895810902118683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18309154109196, "step_time": 0.3672117881774903} +{"epoch": 0, "iter": 16121, "iter_tflops": 28.36402881219305, "iter_time": 0.7273682327270508, "loss": 0.24940688908100128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.08417404546407, "step_time": 0.6857789573669434} +{"epoch": 0, "iter": 16122, "iter_tflops": 13.949456186063411, "iter_time": 1.4789890899658205, "loss": 0.19889603555202484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.07194567945867, "step_time": 1.208479331970215} +{"epoch": 0, "iter": 16123, "iter_tflops": 46.23840642667918, "iter_time": 0.44618954467773436, "loss": 0.273784875869751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.189047251868644, "step_time": 0.41106764602661133} +{"epoch": 0, "iter": 16124, "iter_tflops": 50.146796661917634, "iter_time": 0.4114139862060546, "loss": 0.21666137874126434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.61644601934247, "step_time": 0.37774507522583006} +{"epoch": 0, "iter": 16125, "iter_tflops": 26.943276259201383, "iter_time": 0.7657232666015624, "loss": 0.6239557266235352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.47083534721363, "step_time": 0.7246395568847657} +{"epoch": 0, "iter": 16126, "iter_tflops": 18.96640963005015, "iter_time": 1.0877701110839846, "loss": 0.5552487969398499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.311704685992133, "step_time": 0.8150811557769776} +{"epoch": 0, "iter": 16127, "iter_tflops": 35.86055709397153, "iter_time": 0.5753143615722656, "loss": 0.5058258771896362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88350341320178, "step_time": 0.5305873107910156} +{"epoch": 0, "iter": 16128, "iter_tflops": 40.326585139391995, "iter_time": 0.5116003112792968, "loss": 0.73847496509552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8543372939757, "step_time": 0.4704459075927734} +{"epoch": 0, "iter": 16129, "iter_tflops": 26.74839584635634, "iter_time": 0.7713020858764648, "loss": 0.6708540916442871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.59719032061864, "step_time": 0.6970625686645507} +{"epoch": 0, "iter": 16130, "iter_tflops": 36.35506239893671, "iter_time": 0.5674888763427735, "loss": 0.642072856426239, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7092494406445, "step_time": 0.5195538520812988} +{"epoch": 0, "iter": 16131, "iter_tflops": 36.82808421921894, "iter_time": 0.5602000198364258, "loss": 0.7557125091552734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.06511649216287, "step_time": 0.5149390621185302} +{"epoch": 0, "iter": 16132, "iter_tflops": 38.58851452529145, "iter_time": 0.5346433715820312, "loss": 0.7223446369171143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60761190841209, "step_time": 0.49584901809692383} +{"epoch": 0, "iter": 16133, "iter_tflops": 15.5106034711611, "iter_time": 1.3301283569335938, "loss": 0.1858234405517578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.539254936354304, "step_time": 1.2474016265869141} +{"epoch": 0, "iter": 16134, "iter_tflops": 18.771364457346532, "iter_time": 1.0990726623535156, "loss": 0.17008711397647858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.333865287250195, "step_time": 0.847834623336792} +{"epoch": 0, "iter": 16135, "iter_tflops": 45.09148258224993, "iter_time": 0.45753859329223634, "loss": 0.1798669546842575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.587669756332225, "step_time": 0.4246158256530762} +{"epoch": 0, "iter": 16136, "iter_tflops": 49.77635481113012, "iter_time": 0.4144757804870606, "loss": 0.14510950446128845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.389252006360294, "step_time": 0.37932298660278324} +{"epoch": 0, "iter": 16137, "iter_tflops": 18.550748855831685, "iter_time": 1.1121434326171873, "loss": 0.13800908625125885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.247157647139144, "step_time": 1.0719033889770508} +{"epoch": 0, "iter": 16138, "iter_tflops": 12.926572726025407, "iter_time": 1.5960219268798828, "loss": 0.1398560106754303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.556292281649863, "step_time": 1.2461179809570313} +{"epoch": 0, "iter": 16139, "iter_tflops": 40.8176054682479, "iter_time": 0.5054459533691407, "loss": 0.12567251920700073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.87946013977033, "step_time": 0.4597001266479492} +{"epoch": 0, "iter": 16140, "iter_tflops": 40.45697978184728, "iter_time": 0.5099514007568359, "loss": 0.07182092219591141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00099267446145, "step_time": 0.46887791061401374} +{"epoch": 0, "iter": 16141, "iter_tflops": 19.49214585827233, "iter_time": 1.0584311065673828, "loss": 0.6641234755516052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.880550389522153, "step_time": 0.9880531463623047} +{"epoch": 0, "iter": 16142, "iter_tflops": 26.668894156863676, "iter_time": 0.7736013870239258, "loss": 0.498233437538147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.401906921474957, "step_time": 0.6786118240356446} +{"epoch": 0, "iter": 16143, "iter_tflops": 33.03998814248767, "iter_time": 0.6244279937744142, "loss": 0.7556915283203125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.27460754256546, "step_time": 0.568747531890869} +{"epoch": 0, "iter": 16144, "iter_tflops": 37.951234803192094, "iter_time": 0.5436211395263671, "loss": 0.8067014813423157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3499066678744, "step_time": 0.4989393005371094} +{"epoch": 0, "iter": 16145, "iter_tflops": 26.80591358234614, "iter_time": 0.7696470947265626, "loss": 0.6609604358673096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.463708336315477, "step_time": 0.7002205314636232} +{"epoch": 0, "iter": 16146, "iter_tflops": 44.37007678053069, "iter_time": 0.46497763824462895, "loss": 0.5308694839477539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24208614301074, "step_time": 0.42765757369995117} +{"epoch": 0, "iter": 16147, "iter_tflops": 49.37895526254541, "iter_time": 0.4178114624023438, "loss": 0.7343565225601196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.41587673239068, "step_time": 0.3862352313995361} +{"epoch": 0, "iter": 16148, "iter_tflops": 49.03731452754351, "iter_time": 0.42072233581542967, "loss": 0.7188629508018494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.26495718140427, "step_time": 0.38732958030700687} +{"epoch": 0, "iter": 16149, "iter_tflops": 34.216652221333206, "iter_time": 0.6029547653198243, "loss": 0.07446678727865219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.64177324828924, "step_time": 0.5630484466552734} +{"epoch": 0, "iter": 16150, "iter_tflops": 15.648133392332875, "iter_time": 1.31843798828125, "loss": 0.09234409779310226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.522837315162114, "step_time": 1.1138192901611328} +{"epoch": 0, "iter": 16151, "iter_tflops": 39.881010080714134, "iter_time": 0.5173162231445312, "loss": 0.07041724026203156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8420836867906, "step_time": 0.4705773944854736} +{"epoch": 0, "iter": 16152, "iter_tflops": 42.79021173054714, "iter_time": 0.4821451606750488, "loss": 0.045558247715234756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.06333190248656, "step_time": 0.4383687400817871} +{"epoch": 0, "iter": 16153, "iter_tflops": 19.344361685268847, "iter_time": 1.0665171508789062, "loss": 0.19439440965652466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.463809300730738, "step_time": 1.0081746368408202} +{"epoch": 0, "iter": 16154, "iter_tflops": 19.976485632619916, "iter_time": 1.0327689208984374, "loss": 0.1759367436170578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.87105766196916, "step_time": 0.8295221614837648} +{"epoch": 0, "iter": 16155, "iter_tflops": 39.14227359147029, "iter_time": 0.52707958984375, "loss": 0.1289566308259964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94483138324467, "step_time": 0.4804092330932617} +{"epoch": 0, "iter": 16156, "iter_tflops": 38.2620815841428, "iter_time": 0.5392046813964844, "loss": 0.15808124840259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.88638611301801, "step_time": 0.4925489025115968} +{"epoch": 0, "iter": 16157, "iter_tflops": 23.631196835213355, "iter_time": 0.8730447998046875, "loss": 0.006247563287615776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.794264349105944, "step_time": 0.7998325996398926} +{"epoch": 0, "iter": 16158, "iter_tflops": 51.377356518847165, "iter_time": 0.401560043334961, "loss": 0.011126701720058918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.952572218104656, "step_time": 0.3622504253387451} +{"epoch": 0, "iter": 16159, "iter_tflops": 56.43154898081808, "iter_time": 0.3655950241088867, "loss": 0.005438602529466152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.92680669685133, "step_time": 0.33315287208557126} +{"epoch": 0, "iter": 16160, "iter_tflops": 58.30895618746828, "iter_time": 0.35382374954223633, "loss": 0.017217252403497696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.22758146278689, "step_time": 0.32121859550476073} +{"epoch": 0, "iter": 16161, "iter_tflops": 33.81885794892929, "iter_time": 0.610047019958496, "loss": 0.4785308241844177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.166917307753245, "step_time": 0.5704410285949707} +{"epoch": 0, "iter": 16162, "iter_tflops": 39.592718909891865, "iter_time": 0.5210830192565918, "loss": 0.4019964933395386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.23064409711198, "step_time": 0.4664434337615967} +{"epoch": 0, "iter": 16163, "iter_tflops": 47.80076763742777, "iter_time": 0.4316059036254883, "loss": 0.334242582321167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.640242996846894, "step_time": 0.39951581001281744} +{"epoch": 0, "iter": 16164, "iter_tflops": 51.26983829479918, "iter_time": 0.402402156829834, "loss": 0.403719961643219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.58126220108934, "step_time": 0.37118792724609373} +{"epoch": 0, "iter": 16165, "iter_tflops": 24.450419652440775, "iter_time": 0.7422941360473633, "loss": 0.01306520402431488, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 25.863158115262976, "step_time": 0.7017473678588867} +{"epoch": 0, "iter": 16166, "iter_tflops": 15.242628171512225, "iter_time": 1.1907003784179688, "loss": 0.007881668396294117, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 18.90308429777958, "step_time": 0.9601291961669922} +{"epoch": 0, "iter": 16167, "iter_tflops": 36.65957437933776, "iter_time": 0.4950794830322266, "loss": 0.003882473334670067, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 40.81566256878551, "step_time": 0.44466761016845696} +{"epoch": 0, "iter": 16168, "iter_tflops": 40.499742059398876, "iter_time": 0.44813626480102536, "loss": 0.002871106844395399, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 44.352892314493054, "step_time": 0.4092045001983642} +{"epoch": 0, "iter": 16169, "iter_tflops": 20.795617542045772, "iter_time": 0.9920885238647461, "loss": 0.5693503618240356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.32762045848643, "step_time": 0.9240166702270509} +{"epoch": 0, "iter": 16170, "iter_tflops": 20.025035368598985, "iter_time": 1.030265022277832, "loss": 0.6907520294189453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.53432759056455, "step_time": 0.8409072322845459} +{"epoch": 0, "iter": 16171, "iter_tflops": 48.10141760489854, "iter_time": 0.42890822219848634, "loss": 0.6919794678688049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.20436327308434, "step_time": 0.3951986427307129} +{"epoch": 0, "iter": 16172, "iter_tflops": 46.843712665741954, "iter_time": 0.4404239616394042, "loss": 0.4900047779083252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.608390069154076, "step_time": 0.40766152572631836} +{"epoch": 0, "iter": 16173, "iter_tflops": 25.68835784603558, "iter_time": 0.8031301040649415, "loss": 0.3677378296852112, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.222717592754158, "step_time": 0.7578631134033204} +{"epoch": 0, "iter": 16174, "iter_tflops": 17.624736862032204, "iter_time": 1.1705759735107422, "loss": 0.3994768261909485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.821168642104865, "step_time": 0.9908710632324219} +{"epoch": 0, "iter": 16175, "iter_tflops": 36.10220731564821, "iter_time": 0.571463493347168, "loss": 0.25810495018959045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.28276736801685, "step_time": 0.5121568069458008} +{"epoch": 0, "iter": 16176, "iter_tflops": 39.499985941541226, "iter_time": 0.5223063507080078, "loss": 0.27954912185668945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.41832531412447, "step_time": 0.47517018127441407} +{"epoch": 0, "iter": 16177, "iter_tflops": 14.458821642259757, "iter_time": 1.4268862304687502, "loss": 0.46784812211990356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.52850819744621, "step_time": 1.3285946884155273} +{"epoch": 0, "iter": 16178, "iter_tflops": 21.201928818227778, "iter_time": 0.9730762557983399, "loss": 0.4493127763271332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.99099924694534, "step_time": 0.7937783889770507} +{"epoch": 0, "iter": 16179, "iter_tflops": 42.11820174588229, "iter_time": 0.489837947845459, "loss": 0.343692421913147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60718898070837, "step_time": 0.4523649444580078} +{"epoch": 0, "iter": 16180, "iter_tflops": 46.92361789226507, "iter_time": 0.4396739730834961, "loss": 0.3764323890209198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6685379053858, "step_time": 0.40717759704589845} +{"epoch": 0, "iter": 16181, "iter_tflops": 30.378052432447717, "iter_time": 0.6791447067260742, "loss": 0.3188450336456299, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.42391863193428, "step_time": 0.6362924156188965} +{"epoch": 0, "iter": 16182, "iter_tflops": 13.771092551994334, "iter_time": 1.4981450042724611, "loss": 0.2159537523984909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.869313660886288, "step_time": 1.1545543327331544} +{"epoch": 0, "iter": 16183, "iter_tflops": 39.01261136424918, "iter_time": 0.5288313903808594, "loss": 0.14338470995426178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95492799011205, "step_time": 0.4802963123321533} +{"epoch": 0, "iter": 16184, "iter_tflops": 39.98877257704878, "iter_time": 0.5159221496582032, "loss": 0.25731122493743896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.92042150900609, "step_time": 0.46973805809020996} +{"epoch": 0, "iter": 16185, "iter_tflops": 21.644679198106537, "iter_time": 0.9531716003417968, "loss": 0.6274616718292236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.26110308896199, "step_time": 0.886935302734375} +{"epoch": 0, "iter": 16186, "iter_tflops": 15.028572555911154, "iter_time": 1.3727912902832031, "loss": 0.6346518993377686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.792645957592587, "step_time": 1.1595292549133303} +{"epoch": 0, "iter": 16187, "iter_tflops": 42.99424032715926, "iter_time": 0.4798571472167969, "loss": 0.6963602900505066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.52525617109471, "step_time": 0.4434385795593261} +{"epoch": 0, "iter": 16188, "iter_tflops": 44.00735424152248, "iter_time": 0.468810131072998, "loss": 0.5567155480384827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.32778199235347, "step_time": 0.4359192981719971} +{"epoch": 0, "iter": 16189, "iter_tflops": 25.694483445068432, "iter_time": 0.8029386367797853, "loss": 0.0018306206911802292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.09231616394029, "step_time": 0.7615108795166015} +{"epoch": 0, "iter": 16190, "iter_tflops": 13.802702319467194, "iter_time": 1.4947140808105468, "loss": 0.02611975558102131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.192161801666366, "step_time": 1.2000290451049804} +{"epoch": 0, "iter": 16191, "iter_tflops": 52.23818985979458, "iter_time": 0.39494273376464845, "loss": 0.001207371475175023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.9708130132335, "step_time": 0.35588759994506836} +{"epoch": 0, "iter": 16192, "iter_tflops": 57.33168751636214, "iter_time": 0.3598549842834473, "loss": 0.0048790075816214085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.923732989258646, "step_time": 0.32787459564208987} +{"epoch": 0, "iter": 16193, "iter_tflops": 28.60793603761058, "iter_time": 0.7211667938232422, "loss": 0.10634025186300278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.271166309555547, "step_time": 0.6815427360534667} +{"epoch": 0, "iter": 16194, "iter_tflops": 16.7191551085076, "iter_time": 1.2339794311523438, "loss": 0.10414516180753708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.09968734863734, "step_time": 1.0801796455383301} +{"epoch": 0, "iter": 16195, "iter_tflops": 44.86383436689564, "iter_time": 0.459860237121582, "loss": 0.11377589404582977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.99791089012683, "step_time": 0.421060676574707} +{"epoch": 0, "iter": 16196, "iter_tflops": 51.35864295792005, "iter_time": 0.4017063598632813, "loss": 0.1161266639828682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.66882596084389, "step_time": 0.37060407066345213} +{"epoch": 0, "iter": 16197, "iter_tflops": 38.58241802457935, "iter_time": 0.5347278518676758, "loss": 0.1594192236661911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6858011002663, "step_time": 0.4949189643859863} +{"epoch": 0, "iter": 16198, "iter_tflops": 19.047424928435092, "iter_time": 1.0831434478759765, "loss": 0.08198322355747223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.8797469090202, "step_time": 0.9017186069488525} +{"epoch": 0, "iter": 16199, "iter_tflops": 48.414519130626445, "iter_time": 0.42613442993164063, "loss": 0.05907641351222992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.66893107176714, "step_time": 0.39171278190612796} +{"epoch": 0, "iter": 16200, "iter_tflops": 53.842978628281784, "iter_time": 0.3831714744567871, "loss": 0.10675500333309174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.63473367139843, "step_time": 0.35185788726806644} +{"epoch": 0, "iter": 16201, "iter_tflops": 35.49070639123202, "iter_time": 0.5813097457885742, "loss": 0.521195650100708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.000275142406004, "step_time": 0.5429195823669434} +{"epoch": 0, "iter": 16202, "iter_tflops": 21.1959924536787, "iter_time": 0.9733487854003907, "loss": 0.5058482885360718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.001806683566855, "step_time": 0.7934484615325927} +{"epoch": 0, "iter": 16203, "iter_tflops": 37.23661445072601, "iter_time": 0.554053955078125, "loss": 0.6138343214988708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.64311778028668, "step_time": 0.5076159172058106} +{"epoch": 0, "iter": 16204, "iter_tflops": 38.462524204246485, "iter_time": 0.5363946838378906, "loss": 0.6925599575042725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.11010901353029, "step_time": 0.48993208503723146} +{"epoch": 0, "iter": 16205, "iter_tflops": 23.900667095836372, "iter_time": 0.8632015762329102, "loss": 0.18017540872097015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.475779255544147, "step_time": 0.8098316955566407} +{"epoch": 0, "iter": 16206, "iter_tflops": 19.887179156029227, "iter_time": 1.0374067306518555, "loss": 0.12158739566802979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.233834087702064, "step_time": 0.8513342723846435} +{"epoch": 0, "iter": 16207, "iter_tflops": 46.23559709203702, "iter_time": 0.4462166557312012, "loss": 0.1017300933599472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.04372139716508, "step_time": 0.4122613773345948} +{"epoch": 0, "iter": 16208, "iter_tflops": 53.71652411556773, "iter_time": 0.38407350158691406, "loss": 0.188011035323143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.35012151792851, "step_time": 0.3535741310119629} +{"epoch": 0, "iter": 16209, "iter_tflops": 33.23919173914916, "iter_time": 0.6206857757568358, "loss": 0.14112617075443268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.45361926662114, "step_time": 0.581917839050293} +{"epoch": 0, "iter": 16210, "iter_tflops": 13.349845731253218, "iter_time": 1.5454181213378908, "loss": 0.1281234472990036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.398788145271203, "step_time": 1.3397868270874023} +{"epoch": 0, "iter": 16211, "iter_tflops": 16.09671846078112, "iter_time": 1.2816956176757812, "loss": 0.19152382016181946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.513545035451358, "step_time": 1.0572703971862794} +{"epoch": 0, "iter": 16212, "iter_tflops": 28.40348454761078, "iter_time": 0.7263578338623047, "loss": 0.2120402455329895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.01424130803089, "step_time": 0.589220064163208} +{"epoch": 0, "iter": 16213, "iter_tflops": 15.45140126146024, "iter_time": 0.9410355606079102, "loss": 0.24734558165073395, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 16.16303293826297, "step_time": 0.8996033172607422} +{"epoch": 0, "iter": 16214, "iter_tflops": 10.305542742757792, "iter_time": 1.4109221038818358, "loss": 0.2988989055156708, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 13.093683722200588, "step_time": 1.1104833717346192} +{"epoch": 0, "iter": 16215, "iter_tflops": 25.96423356082888, "iter_time": 0.5600133743286133, "loss": 0.25097236037254333, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.650064033153047, "step_time": 0.5258692359924317} +{"epoch": 0, "iter": 16216, "iter_tflops": 27.167823344867603, "iter_time": 0.5352036437988281, "loss": 0.16462674736976624, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 28.866341085650504, "step_time": 0.5037118492126464} +{"epoch": 0, "iter": 16217, "iter_tflops": 29.83223231822023, "iter_time": 0.6915705566406251, "loss": 0.7176361680030823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.677233190750364, "step_time": 0.6512908935546875} +{"epoch": 0, "iter": 16218, "iter_tflops": 18.776041730814462, "iter_time": 1.0987988739013672, "loss": 0.5608432292938232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.886011922503336, "step_time": 0.8290236930847168} +{"epoch": 0, "iter": 16219, "iter_tflops": 43.6634527266528, "iter_time": 0.47250256729125983, "loss": 0.6652587652206421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.01086939234717, "step_time": 0.43885794448852533} +{"epoch": 0, "iter": 16220, "iter_tflops": 47.36173260771726, "iter_time": 0.4356068153381348, "loss": 0.6957100629806519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.8602765506609, "step_time": 0.4056425743103027} +{"epoch": 0, "iter": 16221, "iter_tflops": 40.8542600143487, "iter_time": 0.5049924659729004, "loss": 0.5918243527412415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.04808525074352, "step_time": 0.4683766250610351} +{"epoch": 0, "iter": 16222, "iter_tflops": 45.8006559688653, "iter_time": 0.45045410537719727, "loss": 0.4561097025871277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.605693239844705, "step_time": 0.4076832504272462} +{"epoch": 0, "iter": 16223, "iter_tflops": 43.13352488814007, "iter_time": 0.47830761718750003, "loss": 0.5167651176452637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.580020715034834, "step_time": 0.4429172248840332} +{"epoch": 0, "iter": 16224, "iter_tflops": 51.167492939344726, "iter_time": 0.4032070426940918, "loss": 0.47440305352211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.37979985435385, "step_time": 0.37253824615478515} +{"epoch": 0, "iter": 16225, "iter_tflops": 29.986057618172847, "iter_time": 0.6880228729248047, "loss": 0.6006214618682861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.670424305511755, "step_time": 0.6514309158325196} +{"epoch": 0, "iter": 16226, "iter_tflops": 9.497764400177392, "iter_time": 2.1722052307128905, "loss": 0.6023179292678833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.099190522444092, "step_time": 1.7051631240844725} +{"epoch": 0, "iter": 16227, "iter_tflops": 12.968433120792653, "iter_time": 1.5908701782226564, "loss": 0.6976216435432434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.308291686419915, "step_time": 1.3477071075439453} +{"epoch": 0, "iter": 16228, "iter_tflops": 37.32627084472381, "iter_time": 0.5527231369018554, "loss": 0.7316309809684753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.66916168603841, "step_time": 0.5072908477783203} +{"epoch": 0, "iter": 16229, "iter_tflops": 10.555626532737639, "iter_time": 1.3774945526123048, "loss": 0.22043932974338531, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.209659643361663, "step_time": 1.2971239547729492} +{"epoch": 0, "iter": 16230, "iter_tflops": 17.081609200607744, "iter_time": 0.8512264785766601, "loss": 0.371168851852417, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 20.48914159740203, "step_time": 0.7096596984863281} +{"epoch": 0, "iter": 16231, "iter_tflops": 25.91513565130052, "iter_time": 0.5610743560791016, "loss": 0.23370829224586487, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.69311333715557, "step_time": 0.5250517654418946} +{"epoch": 0, "iter": 16232, "iter_tflops": 23.031430630541298, "iter_time": 0.6313250045776367, "loss": 0.1159185841679573, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.53104426716862, "step_time": 0.5927313117980956} +{"epoch": 0, "iter": 16233, "iter_tflops": 27.498936269144323, "iter_time": 0.7502506027221678, "loss": 0.21730750799179077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.13315546016103, "step_time": 0.7081654281616212} +{"epoch": 0, "iter": 16234, "iter_tflops": 21.65194283685744, "iter_time": 0.9528518371582032, "loss": 0.12528809905052185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.640191211641238, "step_time": 0.720354600906372} +{"epoch": 0, "iter": 16235, "iter_tflops": 53.628966989343354, "iter_time": 0.38470055770874023, "loss": 0.15565602481365204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.41392618091589, "step_time": 0.3531879272460937} +{"epoch": 0, "iter": 16236, "iter_tflops": 48.07374849254525, "iter_time": 0.42915508270263664, "loss": 0.1315898448228836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.16697513634915, "step_time": 0.3954818820953369} +{"epoch": 0, "iter": 16237, "iter_tflops": 33.57734445713283, "iter_time": 0.6144349365234375, "loss": 0.8087035417556763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.05156642711707, "step_time": 0.5722662162780762} +{"epoch": 0, "iter": 16238, "iter_tflops": 10.359057769356838, "iter_time": 1.9915994262695311, "loss": 0.5449212789535522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.076384277640056, "step_time": 1.7083833236694335} +{"epoch": 0, "iter": 16239, "iter_tflops": 13.785797975142113, "iter_time": 1.4965469207763673, "loss": 0.5387728810310364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.915717663200976, "step_time": 1.296271644592285} +{"epoch": 0, "iter": 16240, "iter_tflops": 19.29600191294357, "iter_time": 1.0691900634765625, "loss": 0.5438569784164429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.073978578666694, "step_time": 0.856987283706665} +{"epoch": 0, "iter": 16241, "iter_tflops": 13.360764628615069, "iter_time": 1.0608125457763673, "loss": 0.10629553347826004, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 14.128073438973608, "step_time": 1.003198829650879} +{"epoch": 0, "iter": 16242, "iter_tflops": 6.273426231352659, "iter_time": 2.259254547119141, "loss": 0.1444370448589325, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 7.399142214004377, "step_time": 1.9155283584594724} +{"epoch": 0, "iter": 16243, "iter_tflops": 8.051860672370994, "iter_time": 1.7602473907470702, "loss": 0.17190849781036377, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 9.42941751770967, "step_time": 1.5030903778076172} +{"epoch": 0, "iter": 16244, "iter_tflops": 14.739078942614514, "iter_time": 0.961611427307129, "loss": 0.16274063289165497, "lr": 3e-05, "seqlen": 5680.0, "step_tflops": 17.04145495357464, "step_time": 0.8316934661865235} +{"epoch": 0, "iter": 16245, "iter_tflops": 12.521078382801644, "iter_time": 1.1384642944335936, "loss": 0.17383407056331635, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 13.630481419214098, "step_time": 1.045803169250488} +{"epoch": 0, "iter": 16246, "iter_tflops": 12.966607324980995, "iter_time": 1.0993469848632813, "loss": 0.2512887120246887, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 15.037492986965512, "step_time": 0.947950611114502} +{"epoch": 0, "iter": 16247, "iter_tflops": 21.476460363900383, "iter_time": 0.6637406921386719, "loss": 0.15075644850730896, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.140032896252976, "step_time": 0.6160233535766602} +{"epoch": 0, "iter": 16248, "iter_tflops": 21.74013482447939, "iter_time": 0.6556905364990234, "loss": 0.2804681956768036, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.4148985268041, "step_time": 0.608791904449463} +{"epoch": 0, "iter": 16249, "iter_tflops": 16.75274778540367, "iter_time": 1.2315050506591798, "loss": 0.5963509678840637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.76711710889461, "step_time": 1.1611953353881834} +{"epoch": 0, "iter": 16250, "iter_tflops": 23.50286855112567, "iter_time": 0.8778117218017578, "loss": 0.5659560561180115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.925997025280246, "step_time": 0.6265897884368897} +{"epoch": 0, "iter": 16251, "iter_tflops": 37.134128780461886, "iter_time": 0.5555830764770507, "loss": 0.47722795605659485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.53531930394067, "step_time": 0.508965856552124} +{"epoch": 0, "iter": 16252, "iter_tflops": 42.19985398305355, "iter_time": 0.48889016342163083, "loss": 0.5124224424362183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.166330045178555, "step_time": 0.4468861503601074} +{"epoch": 0, "iter": 16253, "iter_tflops": 21.03798098645019, "iter_time": 0.9806593856811524, "loss": 0.0901336744427681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.42166010031326, "step_time": 0.9201412124633789} +{"epoch": 0, "iter": 16254, "iter_tflops": 16.263375787845426, "iter_time": 1.2685615692138672, "loss": 0.06390415132045746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.31132874089223, "step_time": 1.1266846771240233} +{"epoch": 0, "iter": 16255, "iter_tflops": 48.430066718738985, "iter_time": 0.4259976272583008, "loss": 0.09518201649188995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60446770744475, "step_time": 0.39219280052185057} +{"epoch": 0, "iter": 16256, "iter_tflops": 52.92085866376773, "iter_time": 0.38984804916381843, "loss": 0.1368054896593094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.43648064093132, "step_time": 0.3591984272003174} +{"epoch": 0, "iter": 16257, "iter_tflops": 29.059443552281806, "iter_time": 0.7099617538452148, "loss": 0.49249541759490967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.874581594179816, "step_time": 0.6682226104736327} +{"epoch": 0, "iter": 16258, "iter_tflops": 13.356621201145161, "iter_time": 1.5446341705322266, "loss": 0.48341846466064453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3097223280621, "step_time": 1.1918789405822752} +{"epoch": 0, "iter": 16259, "iter_tflops": 36.000830640617636, "iter_time": 0.5730727081298828, "loss": 0.4528707265853882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.16579150506387, "step_time": 0.5267630939483643} +{"epoch": 0, "iter": 16260, "iter_tflops": 37.203960235945054, "iter_time": 0.5545402526855469, "loss": 0.47277671098709106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.65298098470077, "step_time": 0.5074927597045898} +{"epoch": 0, "iter": 16261, "iter_tflops": 27.85364342605291, "iter_time": 0.7406964035034179, "loss": 0.12049567699432373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.058436925747632, "step_time": 0.6863661460876466} +{"epoch": 0, "iter": 16262, "iter_tflops": 12.965331790659326, "iter_time": 1.5912507171630859, "loss": 0.14231222867965698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.055845141503546, "step_time": 1.467794593811035} +{"epoch": 0, "iter": 16263, "iter_tflops": 14.778269120958464, "iter_time": 1.3960426177978515, "loss": 0.16651993989944458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.064981475967986, "step_time": 1.2089725112915037} +{"epoch": 0, "iter": 16264, "iter_tflops": 20.200179867874336, "iter_time": 1.0213321685791017, "loss": 0.08992461115121841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.215257516849494, "step_time": 0.7312034454345703} +{"epoch": 0, "iter": 16265, "iter_tflops": 15.948012748706864, "iter_time": 0.8861609954833983, "loss": 0.24687246978282928, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 16.75279662501556, "step_time": 0.8435909042358399} +{"epoch": 0, "iter": 16266, "iter_tflops": 18.8036551747917, "iter_time": 0.7515829620361328, "loss": 0.17949998378753662, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 26.090470561927983, "step_time": 0.5416731300354004} +{"epoch": 0, "iter": 16267, "iter_tflops": 24.030846780198967, "iter_time": 0.5880985794067383, "loss": 0.1814378798007965, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 25.53163087196475, "step_time": 0.5535293426513672} +{"epoch": 0, "iter": 16268, "iter_tflops": 25.373596450413206, "iter_time": 0.5569768905639648, "loss": 0.22005483508110046, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 27.043241517761928, "step_time": 0.5225892333984374} +{"epoch": 0, "iter": 16269, "iter_tflops": 15.676314091649607, "iter_time": 1.220952880859375, "loss": 0.055561456829309464, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 16.28570143170889, "step_time": 1.1752665939331053} +{"epoch": 0, "iter": 16270, "iter_tflops": 16.050497620163704, "iter_time": 1.1924889373779297, "loss": 0.0679771676659584, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 19.35122475550927, "step_time": 0.9890867938995362} +{"epoch": 0, "iter": 16271, "iter_tflops": 42.00948642408218, "iter_time": 0.4556123504638672, "loss": 0.04693601280450821, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 46.41377150325472, "step_time": 0.4123784866333008} +{"epoch": 0, "iter": 16272, "iter_tflops": 43.09651437510589, "iter_time": 0.44412039184570307, "loss": 0.05082167685031891, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 47.17220834854341, "step_time": 0.40574824714660646} +{"epoch": 0, "iter": 16273, "iter_tflops": 8.927415020634339, "iter_time": 0.9131502151489258, "loss": 0.04237131401896477, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 9.507114106066318, "step_time": 0.8574706115722655} +{"epoch": 0, "iter": 16274, "iter_tflops": 4.868381233405219, "iter_time": 1.674493133544922, "loss": 0.030218670144677162, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 5.92139408137518, "step_time": 1.3767148132324218} +{"epoch": 0, "iter": 16275, "iter_tflops": 6.766624163651176, "iter_time": 1.2047471160888672, "loss": 0.02747543528676033, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 8.275052436136635, "step_time": 0.985138282775879} +{"epoch": 0, "iter": 16276, "iter_tflops": 17.261090888506896, "iter_time": 0.47228017044067383, "loss": 0.04017380252480507, "lr": 3e-05, "seqlen": 3296.0, "step_tflops": 19.08954460471399, "step_time": 0.4270437622070313} +{"epoch": 0, "iter": 16277, "iter_tflops": 17.888673014826153, "iter_time": 0.9363831634521486, "loss": 0.2768574357032776, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 19.206172067121848, "step_time": 0.8721494407653809} +{"epoch": 0, "iter": 16278, "iter_tflops": 10.607269761972097, "iter_time": 1.5791671752929688, "loss": 0.37055760622024536, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 14.105411120049407, "step_time": 1.1875337829589845} +{"epoch": 0, "iter": 16279, "iter_tflops": 25.631950047208242, "iter_time": 0.6535067443847656, "loss": 0.3021557033061981, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 27.641572600054353, "step_time": 0.6059949073791504} +{"epoch": 0, "iter": 16280, "iter_tflops": 25.366663410527753, "iter_time": 0.6603411712646484, "loss": 0.28112635016441345, "lr": 3e-05, "seqlen": 6688.0, "step_tflops": 27.245698721961247, "step_time": 0.614799877166748} +{"epoch": 0, "iter": 16281, "iter_tflops": 29.287707761284175, "iter_time": 0.7044284133911134, "loss": 0.6253816485404968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.73251101230821, "step_time": 0.650156349182129} +{"epoch": 0, "iter": 16282, "iter_tflops": 8.008302750149195, "iter_time": 2.576212982177734, "loss": 0.4116125702857971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.415456202105212, "step_time": 2.1911942520141596} +{"epoch": 0, "iter": 16283, "iter_tflops": 11.73378075768541, "iter_time": 1.7582647857666014, "loss": 0.3848530054092407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.06003724272611, "step_time": 1.4673569602966308} +{"epoch": 0, "iter": 16284, "iter_tflops": 34.63025400583913, "iter_time": 0.5957534561157226, "loss": 0.43692663311958313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.240961291392686, "step_time": 0.5395024814605713} +{"epoch": 0, "iter": 16285, "iter_tflops": 16.528754165135478, "iter_time": 1.0233637619018556, "loss": 0.25670522451400757, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 17.769469492301784, "step_time": 0.951909568786621} +{"epoch": 0, "iter": 16286, "iter_tflops": 28.734499625730674, "iter_time": 0.5886626968383789, "loss": 0.23289594054222107, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 30.941373187482657, "step_time": 0.5466767082214357} +{"epoch": 0, "iter": 16287, "iter_tflops": 30.891436407255856, "iter_time": 0.5475604248046876, "loss": 0.24318844079971313, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.906134943290866, "step_time": 0.5140356979370116} +{"epoch": 0, "iter": 16288, "iter_tflops": 30.578146682812385, "iter_time": 0.5531704788208008, "loss": 0.32749202847480774, "lr": 3e-05, "seqlen": 6752.0, "step_tflops": 32.57585898965446, "step_time": 0.5192473373413086} +{"epoch": 0, "iter": 16289, "iter_tflops": 31.87131281048452, "iter_time": 0.4153259468078613, "loss": 0.002729384694248438, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 35.21801754236615, "step_time": 0.3758582706451416} +{"epoch": 0, "iter": 16290, "iter_tflops": 33.96608224925972, "iter_time": 0.3897118034362793, "loss": 0.003142830217257142, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 37.368755983968896, "step_time": 0.35422595214843744} +{"epoch": 0, "iter": 16291, "iter_tflops": 36.91737256097287, "iter_time": 0.3585570220947265, "loss": 0.002299256157130003, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 40.669893463683444, "step_time": 0.3254737606048584} +{"epoch": 0, "iter": 16292, "iter_tflops": 34.61860171364911, "iter_time": 0.3823661994934082, "loss": 0.008957473561167717, "lr": 3e-05, "seqlen": 5312.0, "step_tflops": 37.95446048518917, "step_time": 0.3487596187591553} +{"epoch": 0, "iter": 16293, "iter_tflops": 34.64578738881498, "iter_time": 0.5954863510131836, "loss": 0.13637526333332062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.035799129033165, "step_time": 0.5570581436157226} +{"epoch": 0, "iter": 16294, "iter_tflops": 15.725292291032465, "iter_time": 1.3119688415527344, "loss": 0.15404754877090454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.302825743572946, "step_time": 1.0161685752868652} +{"epoch": 0, "iter": 16295, "iter_tflops": 44.84310900756715, "iter_time": 0.4600727729797363, "loss": 0.13073372840881348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.51425038059489, "step_time": 0.42525842094421384} +{"epoch": 0, "iter": 16296, "iter_tflops": 46.81539501719002, "iter_time": 0.4406903648376465, "loss": 0.10750989615917206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.544560701751266, "step_time": 0.4081763343811035} +{"epoch": 0, "iter": 16297, "iter_tflops": 23.084018245459212, "iter_time": 0.8937392654418946, "loss": 0.07181423157453537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.223099214021744, "step_time": 0.8517115554809571} +{"epoch": 0, "iter": 16298, "iter_tflops": 14.676283673753177, "iter_time": 1.4057437133789064, "loss": 0.08124187588691711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.463113173383558, "step_time": 1.2531708488464355} +{"epoch": 0, "iter": 16299, "iter_tflops": 51.05117650078551, "iter_time": 0.4041257209777832, "loss": 0.08718547224998474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.90008761083641, "step_time": 0.3690708618164063} +{"epoch": 0, "iter": 16300, "iter_tflops": 51.22308032169722, "iter_time": 0.4027694816589355, "loss": 0.0639820471405983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.949024213878324, "step_time": 0.3687480487823486} +{"epoch": 0, "iter": 16301, "iter_tflops": 30.49967891003066, "iter_time": 0.6764364166259765, "loss": 0.6786466240882874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.49155317027182, "step_time": 0.6349679069519043} +{"epoch": 0, "iter": 16302, "iter_tflops": 33.68844428545579, "iter_time": 0.6124086151123046, "loss": 0.5495607852935791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.73571477958575, "step_time": 0.5616086044311523} +{"epoch": 0, "iter": 16303, "iter_tflops": 39.557004062233666, "iter_time": 0.5215534896850587, "loss": 0.6102150678634644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.818627739264, "step_time": 0.4818251914978028} +{"epoch": 0, "iter": 16304, "iter_tflops": 40.026493031374585, "iter_time": 0.5154359512329102, "loss": 0.8727549314498901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09200364698217, "step_time": 0.47876848983764647} +{"epoch": 0, "iter": 16305, "iter_tflops": 33.339205019327096, "iter_time": 0.6188237991333009, "loss": 0.6147622466087341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.77290123179264, "step_time": 0.5610406799316406} +{"epoch": 0, "iter": 16306, "iter_tflops": 35.264501672802, "iter_time": 0.5850385665893555, "loss": 0.565843939781189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.14093176131011, "step_time": 0.5409173965454102} +{"epoch": 0, "iter": 16307, "iter_tflops": 38.30038379097098, "iter_time": 0.5386654510498046, "loss": 0.6086754202842712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.82161008376761, "step_time": 0.49331179428100586} +{"epoch": 0, "iter": 16308, "iter_tflops": 46.32454409917194, "iter_time": 0.44535988235473634, "loss": 0.7795649766921997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.45758276192463, "step_time": 0.40887994194030763} +{"epoch": 0, "iter": 16309, "iter_tflops": 19.39001842188592, "iter_time": 1.064005874633789, "loss": 0.05047515034675598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.045910883180532, "step_time": 0.980289882659912} +{"epoch": 0, "iter": 16310, "iter_tflops": 38.89798756386931, "iter_time": 0.5303897399902343, "loss": 0.034317076206207275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84721271310254, "step_time": 0.48150374794006345} +{"epoch": 0, "iter": 16311, "iter_tflops": 39.18943907409362, "iter_time": 0.5264452362060548, "loss": 0.027268698439002037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.09572239832706, "step_time": 0.4787271766662598} +{"epoch": 0, "iter": 16312, "iter_tflops": 43.42042970394224, "iter_time": 0.47514715194702145, "loss": 0.059042513370513916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.561015228465166, "step_time": 0.43378160476684574} +{"epoch": 0, "iter": 16313, "iter_tflops": 15.72730966125302, "iter_time": 1.2801551513671876, "loss": 0.02523222379386425, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 16.64472510915054, "step_time": 1.2095962142944334} +{"epoch": 0, "iter": 16314, "iter_tflops": 15.358096231999895, "iter_time": 1.3109304809570312, "loss": 0.016467874869704247, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 20.708356155400455, "step_time": 0.9722353782653809} +{"epoch": 0, "iter": 16315, "iter_tflops": 51.2398105961397, "iter_time": 0.3929248809814453, "loss": 0.0653064176440239, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 56.21272285209568, "step_time": 0.35816440582275394} +{"epoch": 0, "iter": 16316, "iter_tflops": 56.99035631300074, "iter_time": 0.35327725219726563, "loss": 0.022510245442390442, "lr": 3e-05, "seqlen": 8000.0, "step_tflops": 62.21301422024263, "step_time": 0.32362033462524414} +{"epoch": 0, "iter": 16317, "iter_tflops": 40.18294184969914, "iter_time": 0.5134291458129883, "loss": 0.1679127961397171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65741080107327, "step_time": 0.47256795883178704} +{"epoch": 0, "iter": 16318, "iter_tflops": 45.22478032648177, "iter_time": 0.4561900215148926, "loss": 0.2127755880355835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72242487462882, "step_time": 0.414925329208374} +{"epoch": 0, "iter": 16319, "iter_tflops": 47.34184800316827, "iter_time": 0.43578977966308596, "loss": 0.18419572710990906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.286285183831566, "step_time": 0.4022731113433838} +{"epoch": 0, "iter": 16320, "iter_tflops": 49.81688971011184, "iter_time": 0.4141385307312011, "loss": 0.24224449694156647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.66594040432681, "step_time": 0.3844355163574219} +{"epoch": 0, "iter": 16321, "iter_tflops": 25.02241001526107, "iter_time": 0.824504653930664, "loss": 0.7832295894622803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.400618930889966, "step_time": 0.7814624938964844} +{"epoch": 0, "iter": 16322, "iter_tflops": 17.827424679276124, "iter_time": 1.1572671813964845, "loss": 0.7587289214134216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.74359975168558, "step_time": 0.9488352317810058} +{"epoch": 0, "iter": 16323, "iter_tflops": 44.47725399208608, "iter_time": 0.46385717773437496, "loss": 0.6803986430168152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.75255478833305, "step_time": 0.43204166984558107} +{"epoch": 0, "iter": 16324, "iter_tflops": 43.41647939155753, "iter_time": 0.4751903839111328, "loss": 0.6693167686462402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.864462320695985, "step_time": 0.4402289600372314} +{"epoch": 0, "iter": 16325, "iter_tflops": 28.949975959350834, "iter_time": 0.7126463088989258, "loss": 0.08117061108350754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.56114218280157, "step_time": 0.6750759963989257} +{"epoch": 0, "iter": 16326, "iter_tflops": 11.13724558307932, "iter_time": 1.8524412841796876, "loss": 0.04357653856277466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.832735707828082, "step_time": 1.4914687843322756} +{"epoch": 0, "iter": 16327, "iter_tflops": 49.53612958685327, "iter_time": 0.4164857788085938, "loss": 0.03433459997177124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86216196995303, "step_time": 0.383035005569458} +{"epoch": 0, "iter": 16328, "iter_tflops": 56.14740679268374, "iter_time": 0.36744517135620114, "loss": 0.07594002038240433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.07445728708084, "step_time": 0.3378023223876953} +{"epoch": 0, "iter": 16329, "iter_tflops": 31.091152974668056, "iter_time": 0.6635679779052734, "loss": 0.5910769104957581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.94932027325563, "step_time": 0.626146255493164} +{"epoch": 0, "iter": 16330, "iter_tflops": 15.876420501408969, "iter_time": 1.2994801635742188, "loss": 0.536720871925354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.0729396768413, "step_time": 1.0816944770812988} +{"epoch": 0, "iter": 16331, "iter_tflops": 38.982111786201365, "iter_time": 0.5292451477050781, "loss": 0.4894552528858185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.885334707596485, "step_time": 0.4810757255554199} +{"epoch": 0, "iter": 16332, "iter_tflops": 36.88857770836469, "iter_time": 0.5592813491821289, "loss": 0.6186333298683167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.36548651697175, "step_time": 0.5111072673797608} +{"epoch": 0, "iter": 16333, "iter_tflops": 31.987659172726726, "iter_time": 0.6449704055786134, "loss": 0.4618849456310272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.191932990391706, "step_time": 0.5862449645996093} +{"epoch": 0, "iter": 16334, "iter_tflops": 36.81470284996871, "iter_time": 0.5604036407470703, "loss": 0.5280718803405762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14258671889937, "step_time": 0.5139452934265136} +{"epoch": 0, "iter": 16335, "iter_tflops": 40.685882936365516, "iter_time": 0.5070823593139648, "loss": 0.676729679107666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13119318846275, "step_time": 0.46749457740783695} +{"epoch": 0, "iter": 16336, "iter_tflops": 37.33655138621963, "iter_time": 0.5525709457397461, "loss": 0.7261772751808167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.836500956110704, "step_time": 0.5052120780944824} +{"epoch": 0, "iter": 16337, "iter_tflops": 28.05630254657421, "iter_time": 0.7353461303710938, "loss": 0.6368440389633179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.218656964025286, "step_time": 0.6827270164489745} +{"epoch": 0, "iter": 16338, "iter_tflops": 10.236343484702893, "iter_time": 2.015474914550781, "loss": 0.606600284576416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.144271896764936, "step_time": 1.5695881576538087} +{"epoch": 0, "iter": 16339, "iter_tflops": 14.412770400865954, "iter_time": 1.4314453735351562, "loss": 0.6378645300865173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.240695748899974, "step_time": 1.1310475101470947} +{"epoch": 0, "iter": 16340, "iter_tflops": 36.89531544805428, "iter_time": 0.5591792144775392, "loss": 0.5227735638618469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25066788612676, "step_time": 0.5125652465820312} +{"epoch": 0, "iter": 16341, "iter_tflops": 13.484944137024383, "iter_time": 1.1904626464843748, "loss": 0.21118462085723877, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 14.37878471734182, "step_time": 1.1164589080810545} +{"epoch": 0, "iter": 16342, "iter_tflops": 13.02243402401983, "iter_time": 1.232743606567383, "loss": 0.1594906449317932, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 15.445629243980568, "step_time": 1.0393440132141112} +{"epoch": 0, "iter": 16343, "iter_tflops": 26.646151078371275, "iter_time": 0.6024630813598633, "loss": 0.21342356503009796, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 28.36052442381852, "step_time": 0.566044620513916} +{"epoch": 0, "iter": 16344, "iter_tflops": 28.366850441788255, "iter_time": 0.5659183883666993, "loss": 0.23964102566242218, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.23753350905844, "step_time": 0.5309071350097656} +{"epoch": 0, "iter": 16345, "iter_tflops": 2.174632338451669, "iter_time": 0.7207014465332031, "loss": 0.10644441843032837, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 2.3103678933570553, "step_time": 0.6783597869873047} +{"epoch": 0, "iter": 16346, "iter_tflops": 1.105008547864106, "iter_time": 1.418324478149414, "loss": 0.12412243336439133, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 1.3935662321350843, "step_time": 1.1246402473449706} +{"epoch": 0, "iter": 16347, "iter_tflops": 3.161838127874551, "iter_time": 0.49568023681640627, "loss": 0.15357860922813416, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.448954203705411, "step_time": 0.45441620254516607} +{"epoch": 0, "iter": 16348, "iter_tflops": 2.9747727001162847, "iter_time": 0.5268505630493164, "loss": 0.2303684502840042, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.2623052627103863, "step_time": 0.4804150886535644} +{"epoch": 0, "iter": 16349, "iter_tflops": 25.164294997171048, "iter_time": 0.8198558120727539, "loss": 0.06927245110273361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.391111159567618, "step_time": 0.7532039642333984} +{"epoch": 0, "iter": 16350, "iter_tflops": 18.360921206799564, "iter_time": 1.1236415252685545, "loss": 0.09194725006818771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.819866593420265, "step_time": 0.9909330310821534} +{"epoch": 0, "iter": 16351, "iter_tflops": 41.913293236232285, "iter_time": 0.49223270034790045, "loss": 0.044795844703912735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.526534957986726, "step_time": 0.4434263916015625} +{"epoch": 0, "iter": 16352, "iter_tflops": 45.04066898860001, "iter_time": 0.45805477523803717, "loss": 0.07124407589435577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52082918717502, "step_time": 0.4166144599914551} +{"epoch": 0, "iter": 16353, "iter_tflops": 21.566637914125696, "iter_time": 0.9566207580566405, "loss": 0.1592632383108139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.189809600895885, "step_time": 0.8896620483398439} +{"epoch": 0, "iter": 16354, "iter_tflops": 39.340245888026736, "iter_time": 0.5244271621704102, "loss": 0.18204133212566376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.052721121752036, "step_time": 0.4792053318023682} +{"epoch": 0, "iter": 16355, "iter_tflops": 41.94467929364117, "iter_time": 0.4918643760681153, "loss": 0.0850488469004631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.74872560943258, "step_time": 0.450965425491333} +{"epoch": 0, "iter": 16356, "iter_tflops": 38.56515863727848, "iter_time": 0.5349671630859375, "loss": 0.17052289843559265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.358323196172364, "step_time": 0.48706114768981934} +{"epoch": 0, "iter": 16357, "iter_tflops": 24.56186719548844, "iter_time": 0.8399643783569337, "loss": 0.42560890316963196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.15512117190695, "step_time": 0.7887974739074708} +{"epoch": 0, "iter": 16358, "iter_tflops": 20.479428678687363, "iter_time": 1.0074057159423828, "loss": 0.5210678577423096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.61816673586906, "step_time": 0.8380434551239015} +{"epoch": 0, "iter": 16359, "iter_tflops": 39.21087536465092, "iter_time": 0.5261574325561523, "loss": 0.5650946497917175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.89241881549053, "step_time": 0.4809962711334228} +{"epoch": 0, "iter": 16360, "iter_tflops": 42.72377245206255, "iter_time": 0.48289493942260747, "loss": 0.3902941942214966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.616759870062296, "step_time": 0.4425681571960449} +{"epoch": 0, "iter": 16361, "iter_tflops": 18.566261278301297, "iter_time": 1.1112142181396485, "loss": 0.48153772950172424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.60209877852743, "step_time": 1.0524941101074219} +{"epoch": 0, "iter": 16362, "iter_tflops": 19.60329328586731, "iter_time": 1.0524299774169923, "loss": 0.670282244682312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.196316270253092, "step_time": 0.8188138809204102} +{"epoch": 0, "iter": 16363, "iter_tflops": 42.94079631957987, "iter_time": 0.48045437622070314, "loss": 0.5759333372116089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97905380084773, "step_time": 0.4487063522338867} +{"epoch": 0, "iter": 16364, "iter_tflops": 41.96654634678668, "iter_time": 0.4916080856323242, "loss": 0.6993227601051331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.91810478284871, "step_time": 0.45930463027954105} +{"epoch": 0, "iter": 16365, "iter_tflops": 23.3336498829041, "iter_time": 0.8841777267456054, "loss": 0.39042386412620544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.460565717988864, "step_time": 0.8434430236816406} +{"epoch": 0, "iter": 16366, "iter_tflops": 14.942900133327692, "iter_time": 1.3806619415283203, "loss": 0.362529456615448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.088598217466476, "step_time": 0.9340155181884765} +{"epoch": 0, "iter": 16367, "iter_tflops": 48.13867555046537, "iter_time": 0.4285762596130371, "loss": 0.577764093875885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.25727152092343, "step_time": 0.3947985210418701} +{"epoch": 0, "iter": 16368, "iter_tflops": 48.402507825215714, "iter_time": 0.4262401771545411, "loss": 0.5580026507377625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.171148397358046, "step_time": 0.395450246810913} +{"epoch": 0, "iter": 16369, "iter_tflops": 39.825244718491966, "iter_time": 0.5180405960083008, "loss": 0.30626964569091797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.29774942029566, "step_time": 0.47649343872070316} +{"epoch": 0, "iter": 16370, "iter_tflops": 46.04197152925089, "iter_time": 0.44809318161010736, "loss": 0.19112809002399445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.85628834509404, "step_time": 0.41381126022338865} +{"epoch": 0, "iter": 16371, "iter_tflops": 49.579641721232925, "iter_time": 0.41612026214599607, "loss": 0.18293575942516327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.948025605147045, "step_time": 0.3824253673553467} +{"epoch": 0, "iter": 16372, "iter_tflops": 48.19981215584957, "iter_time": 0.4280326538085938, "loss": 0.15377815067768097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.067302002808376, "step_time": 0.3962389583587646} +{"epoch": 0, "iter": 16373, "iter_tflops": 25.199411618591597, "iter_time": 0.8187133026123047, "loss": 0.025197535753250122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.489172412906857, "step_time": 0.7788500595092772} +{"epoch": 0, "iter": 16374, "iter_tflops": 13.466675855215929, "iter_time": 1.5320108489990236, "loss": 0.05455826595425606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.929264107868665, "step_time": 0.9857534122467041} +{"epoch": 0, "iter": 16375, "iter_tflops": 44.65150216096453, "iter_time": 0.46204701995849606, "loss": 0.019736764952540398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.13960003360878, "step_time": 0.4198465900421142} +{"epoch": 0, "iter": 16376, "iter_tflops": 43.3090553433799, "iter_time": 0.4763690490722656, "loss": 0.038011759519577026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69552148511461, "step_time": 0.4325582962036133} +{"epoch": 0, "iter": 16377, "iter_tflops": 22.275513367621787, "iter_time": 0.9261781387329102, "loss": 0.2923947870731354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.850786036716954, "step_time": 0.8650068588256836} +{"epoch": 0, "iter": 16378, "iter_tflops": 23.07716271773726, "iter_time": 0.894004768371582, "loss": 0.38084444403648376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.09180206618813, "step_time": 0.7344168758392334} +{"epoch": 0, "iter": 16379, "iter_tflops": 45.94006833645553, "iter_time": 0.44908713150024415, "loss": 0.3905262351036072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9696109380163, "step_time": 0.4128728065490722} +{"epoch": 0, "iter": 16380, "iter_tflops": 48.62047521912421, "iter_time": 0.4243293266296387, "loss": 0.3357400596141815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.92695506341163, "step_time": 0.389803144454956} +{"epoch": 0, "iter": 16381, "iter_tflops": 22.11667979449801, "iter_time": 0.9328295974731446, "loss": 0.6453304886817932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.200864302981625, "step_time": 0.8892381439208984} +{"epoch": 0, "iter": 16382, "iter_tflops": 14.501411206755796, "iter_time": 1.422695571899414, "loss": 0.5008097290992737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3011331683522, "step_time": 1.192470649719238} +{"epoch": 0, "iter": 16383, "iter_tflops": 42.21665438989615, "iter_time": 0.4886956062316894, "loss": 0.6303716897964478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.13639280017293, "step_time": 0.4471761283874512} +{"epoch": 0, "iter": 16384, "iter_tflops": 36.234535716400295, "iter_time": 0.5693765106201172, "loss": 0.5769817233085632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.751714228813256, "step_time": 0.5189988384246826} +{"epoch": 0, "iter": 16385, "iter_tflops": 19.83291433793898, "iter_time": 1.0402451782226563, "loss": 0.33740440011024475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33935505980114, "step_time": 0.9668096084594727} +{"epoch": 0, "iter": 16386, "iter_tflops": 23.490301494586927, "iter_time": 0.8782813415527344, "loss": 0.3884749114513397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.269114398905323, "step_time": 0.6597914237976074} +{"epoch": 0, "iter": 16387, "iter_tflops": 38.87064250754973, "iter_time": 0.5307628631591796, "loss": 0.4362531006336212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60420786648718, "step_time": 0.4842501373291016} +{"epoch": 0, "iter": 16388, "iter_tflops": 38.927451126841774, "iter_time": 0.529988296508789, "loss": 0.4758920967578888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.63393784105028, "step_time": 0.4839124546051025} +{"epoch": 0, "iter": 16389, "iter_tflops": 21.77880120776658, "iter_time": 0.9473016128540038, "loss": 0.09928958863019943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.395269643007627, "step_time": 0.8818489303588867} +{"epoch": 0, "iter": 16390, "iter_tflops": 14.167127255844283, "iter_time": 1.4562651367187502, "loss": 0.14998453855514526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.61340810223505, "step_time": 1.2418339080810548} +{"epoch": 0, "iter": 16391, "iter_tflops": 46.65704971127504, "iter_time": 0.4421859855651855, "loss": 0.09326784312725067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00877204251029, "step_time": 0.40446167755126955} +{"epoch": 0, "iter": 16392, "iter_tflops": 53.45758742355946, "iter_time": 0.3859338684082031, "loss": 0.10750342905521393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.20913630829563, "step_time": 0.3544305038452148} +{"epoch": 0, "iter": 16393, "iter_tflops": 38.77670585032584, "iter_time": 0.5320486373901367, "loss": 0.046813275665044785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08404535045503, "step_time": 0.49023551177978514} +{"epoch": 0, "iter": 16394, "iter_tflops": 17.215269020550156, "iter_time": 1.1984183044433594, "loss": 0.07425190508365631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.61967766167478, "step_time": 1.051551094055176} +{"epoch": 0, "iter": 16395, "iter_tflops": 39.17996350741975, "iter_time": 0.5265725555419921, "loss": 0.09373927116394043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.07741417780483, "step_time": 0.4789306392669678} +{"epoch": 0, "iter": 16396, "iter_tflops": 42.865225856549536, "iter_time": 0.4813014068603515, "loss": 0.08624913543462753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.359037674619884, "step_time": 0.4356316032409668} +{"epoch": 0, "iter": 16397, "iter_tflops": 21.09669548054629, "iter_time": 0.9779300994873048, "loss": 0.5865406394004822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.686705631215432, "step_time": 0.9093913345336913} +{"epoch": 0, "iter": 16398, "iter_tflops": 26.57054140483155, "iter_time": 0.7764649276733399, "loss": 0.418376624584198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.334503063644103, "step_time": 0.7033046875} +{"epoch": 0, "iter": 16399, "iter_tflops": 37.72796192759926, "iter_time": 0.5468382720947266, "loss": 0.42225489020347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.44653988389328, "step_time": 0.4977760162353516} +{"epoch": 0, "iter": 16400, "iter_tflops": 40.16549244544041, "iter_time": 0.513652198791504, "loss": 0.41296258568763733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76324590621967, "step_time": 0.47142512130737313} +{"epoch": 0, "iter": 16401, "iter_tflops": 17.352825765339492, "iter_time": 1.1889183807373047, "loss": 0.5924240350723267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.38487030664555, "step_time": 1.1221778106689455} +{"epoch": 0, "iter": 16402, "iter_tflops": 14.056156750808112, "iter_time": 1.4677620544433594, "loss": 0.6964923739433289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.833882279013114, "step_time": 0.9902663955688478} +{"epoch": 0, "iter": 16403, "iter_tflops": 35.54228493200171, "iter_time": 0.5804661560058594, "loss": 0.5700367093086243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.46199738245618, "step_time": 0.5364020309448242} +{"epoch": 0, "iter": 16404, "iter_tflops": 37.29640500528099, "iter_time": 0.5531657409667968, "loss": 0.570590078830719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.33869971732265, "step_time": 0.5114466667175293} +{"epoch": 0, "iter": 16405, "iter_tflops": 19.149121123642978, "iter_time": 1.0773911437988282, "loss": 0.6292311549186707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.44171893187972, "step_time": 1.0092641220092773} +{"epoch": 0, "iter": 16406, "iter_tflops": 19.212200149179914, "iter_time": 1.0738537673950197, "loss": 0.640953004360199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.388797672120834, "step_time": 0.8820929489135743} +{"epoch": 0, "iter": 16407, "iter_tflops": 38.78194449837179, "iter_time": 0.5319767684936524, "loss": 0.6514711380004883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.04824396081532, "step_time": 0.4906529159545898} +{"epoch": 0, "iter": 16408, "iter_tflops": 40.50699329367878, "iter_time": 0.5093217697143555, "loss": 0.5863012075424194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90536292566116, "step_time": 0.46989916801452636} +{"epoch": 0, "iter": 16409, "iter_tflops": 21.304677916491016, "iter_time": 0.9683832626342774, "loss": 0.18457737565040588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9318815999893, "step_time": 0.899668586730957} +{"epoch": 0, "iter": 16410, "iter_tflops": 30.51239362994569, "iter_time": 0.676154541015625, "loss": 0.12676405906677246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.23791301983852, "step_time": 0.6025803470611572} +{"epoch": 0, "iter": 16411, "iter_tflops": 41.46990959241551, "iter_time": 0.49749550247192387, "loss": 0.18743979930877686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.40472735902318, "step_time": 0.45438205909729} +{"epoch": 0, "iter": 16412, "iter_tflops": 42.427309857088375, "iter_time": 0.4862691879272461, "loss": 0.18574875593185425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14469927235019, "step_time": 0.4470956325531006} +{"epoch": 0, "iter": 16413, "iter_tflops": 21.924908146889884, "iter_time": 0.9409888229370117, "loss": 0.2781069576740265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.54162705947741, "step_time": 0.8763665084838868} +{"epoch": 0, "iter": 16414, "iter_tflops": 20.102146426886975, "iter_time": 1.0263129653930665, "loss": 0.22100356221199036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.57638123738729, "step_time": 0.8750746479034424} +{"epoch": 0, "iter": 16415, "iter_tflops": 51.67568079954548, "iter_time": 0.3992418327331543, "loss": 0.3956262469291687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.22334315913941, "step_time": 0.3669488925933838} +{"epoch": 0, "iter": 16416, "iter_tflops": 46.89566754743081, "iter_time": 0.4399360237121582, "loss": 0.24329118430614471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.68028101825376, "step_time": 0.4070832500457764} +{"epoch": 0, "iter": 16417, "iter_tflops": 13.970124093155334, "iter_time": 0.8400802612304688, "loss": 0.020723624154925346, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 14.680757498663754, "step_time": 0.7994155273437501} +{"epoch": 0, "iter": 16418, "iter_tflops": 8.93932605147971, "iter_time": 1.312853500366211, "loss": 0.005789306480437517, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 10.8413278032312, "step_time": 1.0825265789031984} +{"epoch": 0, "iter": 16419, "iter_tflops": 27.48350866864893, "iter_time": 0.42702064132690426, "loss": 0.003526728367432952, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 30.457989928442494, "step_time": 0.3853184509277344} +{"epoch": 0, "iter": 16420, "iter_tflops": 24.55851045151735, "iter_time": 0.47788018417358397, "loss": 0.0038583073765039444, "lr": 3e-05, "seqlen": 4720.0, "step_tflops": 27.274742947410108, "step_time": 0.4302891330718994} +{"epoch": 0, "iter": 16421, "iter_tflops": 14.81358670954785, "iter_time": 1.3927142639160157, "loss": 0.6256259679794312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.755681479447155, "step_time": 1.309438346862793} +{"epoch": 0, "iter": 16422, "iter_tflops": 24.659145964199425, "iter_time": 0.8366507720947265, "loss": 0.5612891316413879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.125486931241454, "step_time": 0.6045655422210694} +{"epoch": 0, "iter": 16423, "iter_tflops": 42.385981774605156, "iter_time": 0.48674332046508784, "loss": 0.7381338477134705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.272073236481056, "step_time": 0.44586490440368653} +{"epoch": 0, "iter": 16424, "iter_tflops": 36.31228159104091, "iter_time": 0.568157455444336, "loss": 0.6165429949760437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97775743233632, "step_time": 0.5293042716979981} +{"epoch": 0, "iter": 16425, "iter_tflops": 15.60540818790798, "iter_time": 1.3220476684570313, "loss": 0.39908164739608765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.50281679082596, "step_time": 1.2501558837890625} +{"epoch": 0, "iter": 16426, "iter_tflops": 16.414332293230736, "iter_time": 1.256895080566406, "loss": 0.4948326051235199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.366190545574977, "step_time": 0.9655953159332274} +{"epoch": 0, "iter": 16427, "iter_tflops": 40.19669192852151, "iter_time": 0.5132535171508789, "loss": 0.40941187739372253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99234245572646, "step_time": 0.4689701061248779} +{"epoch": 0, "iter": 16428, "iter_tflops": 39.86030855642551, "iter_time": 0.5175848922729491, "loss": 0.29896828532218933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42627074196421, "step_time": 0.4750832424163818} +{"epoch": 0, "iter": 16429, "iter_tflops": 19.277345345391655, "iter_time": 1.0702248229980469, "loss": 0.1017070859670639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.64062975446805, "step_time": 0.9995379867553712} +{"epoch": 0, "iter": 16430, "iter_tflops": 20.95418153203254, "iter_time": 0.9845812149047851, "loss": 0.15875597298145294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.110901288492407, "step_time": 0.855674919128418} +{"epoch": 0, "iter": 16431, "iter_tflops": 48.729302374591384, "iter_time": 0.42338167190551756, "loss": 0.15746159851551056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.304812632783005, "step_time": 0.38703997802734375} +{"epoch": 0, "iter": 16432, "iter_tflops": 49.44859437382618, "iter_time": 0.4172230529785156, "loss": 0.15158867835998535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.86277832419884, "step_time": 0.3830306224822998} +{"epoch": 0, "iter": 16433, "iter_tflops": 30.079625068003697, "iter_time": 0.3995892562866211, "loss": 0.006420017220079899, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 33.441595655994604, "step_time": 0.3594175090789795} +{"epoch": 0, "iter": 16434, "iter_tflops": 30.625895830100518, "iter_time": 0.3924618263244629, "loss": 0.018560778349637985, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 33.90751951108524, "step_time": 0.3544787464141846} +{"epoch": 0, "iter": 16435, "iter_tflops": 33.295568242417474, "iter_time": 0.36099383926391604, "loss": 0.0008322166395373642, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 36.759697462346416, "step_time": 0.3269748077392578} +{"epoch": 0, "iter": 16436, "iter_tflops": 33.23651844087499, "iter_time": 0.36163520050048825, "loss": 0.004247395321726799, "lr": 3e-05, "seqlen": 4832.0, "step_tflops": 36.406890840591856, "step_time": 0.33014340782165524} +{"epoch": 0, "iter": 16437, "iter_tflops": 23.439159016282556, "iter_time": 0.8801976852416992, "loss": 0.8439401388168335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.537865753461514, "step_time": 0.8407859802246093} +{"epoch": 0, "iter": 16438, "iter_tflops": 12.996522060655263, "iter_time": 1.587431884765625, "loss": 0.6165173053741455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.585701362711372, "step_time": 1.1731743354797364} +{"epoch": 0, "iter": 16439, "iter_tflops": 43.521031906827375, "iter_time": 0.47404881286621103, "loss": 0.4511643946170807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.93302808340125, "step_time": 0.43958581733703617} +{"epoch": 0, "iter": 16440, "iter_tflops": 41.28546048910378, "iter_time": 0.4997181396484375, "loss": 0.769999623298645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4935585254531, "step_time": 0.46368719863891605} +{"epoch": 0, "iter": 16441, "iter_tflops": 22.910790587715027, "iter_time": 0.900496795654297, "loss": 0.022655494511127472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.148210163270857, "step_time": 0.8543529052734375} +{"epoch": 0, "iter": 16442, "iter_tflops": 13.22217278598417, "iter_time": 1.5603406372070312, "loss": 0.016668042168021202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.26855285353304, "step_time": 1.2681578807830811} +{"epoch": 0, "iter": 16443, "iter_tflops": 49.29768572180695, "iter_time": 0.4185002441406249, "loss": 0.014283623546361923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.814356262751495, "step_time": 0.3833752727508545} +{"epoch": 0, "iter": 16444, "iter_tflops": 53.95449499838551, "iter_time": 0.3823795127868652, "loss": 0.03903390094637871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.93519363418255, "step_time": 0.3500640659332275} +{"epoch": 0, "iter": 16445, "iter_tflops": 30.977295668514675, "iter_time": 0.6660069274902345, "loss": 0.6304746866226196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.900453402373614, "step_time": 0.6270762672424317} +{"epoch": 0, "iter": 16446, "iter_tflops": 16.423644129855614, "iter_time": 1.2561824493408202, "loss": 0.49485403299331665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.17512674642054, "step_time": 1.135127902984619} +{"epoch": 0, "iter": 16447, "iter_tflops": 35.30184746365817, "iter_time": 0.5844196548461915, "loss": 0.5191736817359924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.49458051617613, "step_time": 0.5359480018615723} +{"epoch": 0, "iter": 16448, "iter_tflops": 39.731135445418744, "iter_time": 0.5192676544189453, "loss": 0.44528284668922424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.26215050443836, "step_time": 0.4768855285644531} +{"epoch": 0, "iter": 16449, "iter_tflops": 30.607620118195054, "iter_time": 0.625335807800293, "loss": 0.07915184646844864, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 33.787665368604436, "step_time": 0.5664801235198975} +{"epoch": 0, "iter": 16450, "iter_tflops": 38.578022699003654, "iter_time": 0.4961384620666504, "loss": 0.06664356589317322, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 42.47164363669708, "step_time": 0.4506545829772949} +{"epoch": 0, "iter": 16451, "iter_tflops": 42.82481778851308, "iter_time": 0.4469380569458008, "loss": 0.07204631716012955, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 46.922950791449, "step_time": 0.40790360641479495} +{"epoch": 0, "iter": 16452, "iter_tflops": 40.64250909447298, "iter_time": 0.4709364967346192, "loss": 0.07735182344913483, "lr": 3e-05, "seqlen": 7616.0, "step_tflops": 44.705563957163896, "step_time": 0.42813554191589354} +{"epoch": 0, "iter": 16453, "iter_tflops": 20.399264780629913, "iter_time": 1.0113645629882813, "loss": 0.09612726420164108, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.673544502606, "step_time": 0.9519021453857421} +{"epoch": 0, "iter": 16454, "iter_tflops": 19.094222996037637, "iter_time": 1.0804887695312502, "loss": 0.09292783588171005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.484220272051946, "step_time": 0.8785087718963622} +{"epoch": 0, "iter": 16455, "iter_tflops": 41.43058506475981, "iter_time": 0.49796770858764644, "loss": 0.14977748692035675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.69130878556816, "step_time": 0.4515321197509765} +{"epoch": 0, "iter": 16456, "iter_tflops": 41.36205053366793, "iter_time": 0.49879281234741213, "loss": 0.1578165739774704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.160491826204705, "step_time": 0.456839433670044} +{"epoch": 0, "iter": 16457, "iter_tflops": 15.670238633533934, "iter_time": 1.1214115295410156, "loss": 0.017919864505529404, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 16.79787962797798, "step_time": 1.046131217956543} +{"epoch": 0, "iter": 16458, "iter_tflops": 19.038469388107348, "iter_time": 0.9230146560668945, "loss": 0.04599396139383316, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 23.533410873244648, "step_time": 0.7467165031433105} +{"epoch": 0, "iter": 16459, "iter_tflops": 36.82594097712778, "iter_time": 0.4771849899291992, "loss": 0.02772928588092327, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 40.51654682318084, "step_time": 0.4337187557220459} +{"epoch": 0, "iter": 16460, "iter_tflops": 38.531907262218596, "iter_time": 0.45605804443359377, "loss": 0.02561667002737522, "lr": 3e-05, "seqlen": 7008.0, "step_tflops": 42.4064760838045, "step_time": 0.4143892135620117} +{"epoch": 0, "iter": 16461, "iter_tflops": 27.786159543405606, "iter_time": 0.7424953231811524, "loss": 0.07357297837734222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.04196138634222, "step_time": 0.6867425613403321} +{"epoch": 0, "iter": 16462, "iter_tflops": 11.889262209674882, "iter_time": 1.7352711334228514, "loss": 0.08703591674566269, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.47270417461766, "step_time": 1.5313253555297852} +{"epoch": 0, "iter": 16463, "iter_tflops": 19.105850946153232, "iter_time": 1.0798311767578124, "loss": 0.06726226210594177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.384842706540063, "step_time": 0.9216546115875244} +{"epoch": 0, "iter": 16464, "iter_tflops": 41.050725298421874, "iter_time": 0.5025756149291992, "loss": 0.06178836524486542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.299542626463705, "step_time": 0.455437126159668} +{"epoch": 0, "iter": 16465, "iter_tflops": 17.126609806279546, "iter_time": 1.093441436767578, "loss": 0.1905941367149353, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 18.311931162260414, "step_time": 1.0226635665893555} +{"epoch": 0, "iter": 16466, "iter_tflops": 14.940837706879009, "iter_time": 1.2534066162109374, "loss": 0.2176801711320877, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 17.63651925007732, "step_time": 1.0618277091979982} +{"epoch": 0, "iter": 16467, "iter_tflops": 28.355822574805007, "iter_time": 0.6604267883300782, "loss": 0.2821584939956665, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 30.55952952223947, "step_time": 0.612802131652832} +{"epoch": 0, "iter": 16468, "iter_tflops": 27.764533495587862, "iter_time": 0.6744916076660157, "loss": 0.33888596296310425, "lr": 3e-05, "seqlen": 7456.0, "step_tflops": 29.687530132231643, "step_time": 0.6308017120361328} +{"epoch": 0, "iter": 16469, "iter_tflops": 20.54125279796689, "iter_time": 1.0043736724853516, "loss": 0.029077891260385513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18209572674203, "step_time": 0.9300786437988282} +{"epoch": 0, "iter": 16470, "iter_tflops": 21.153922305965107, "iter_time": 0.9752845458984375, "loss": 0.02659405581653118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.733674977892743, "step_time": 0.8017157878875733} +{"epoch": 0, "iter": 16471, "iter_tflops": 53.56625724615621, "iter_time": 0.3851509246826172, "loss": 0.022961854934692383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.23225340229863, "step_time": 0.3483084354400634} +{"epoch": 0, "iter": 16472, "iter_tflops": 53.63363700662445, "iter_time": 0.38466706085205077, "loss": 0.029293693602085114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.64312348260542, "step_time": 0.3518075485229492} +{"epoch": 0, "iter": 16473, "iter_tflops": 35.50034962066079, "iter_time": 0.5811518402099609, "loss": 0.5527653694152832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.08251064274838, "step_time": 0.541747200012207} +{"epoch": 0, "iter": 16474, "iter_tflops": 12.902858516671348, "iter_time": 1.5989552612304685, "loss": 0.5710147023200989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.569332715315436, "step_time": 1.3251109657287596} +{"epoch": 0, "iter": 16475, "iter_tflops": 34.703941208012516, "iter_time": 0.5944884872436523, "loss": 0.7284736633300781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.83144299342243, "step_time": 0.5453424949645996} +{"epoch": 0, "iter": 16476, "iter_tflops": 40.770047150213856, "iter_time": 0.5060355567932129, "loss": 0.7243453860282898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.36925175794283, "step_time": 0.46498628425598143} +{"epoch": 0, "iter": 16477, "iter_tflops": 23.882214465682317, "iter_time": 0.8638685302734374, "loss": 0.12491800636053085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.33951280581477, "step_time": 0.8141866683959962} +{"epoch": 0, "iter": 16478, "iter_tflops": 18.64419540878051, "iter_time": 1.1065692596435546, "loss": 0.12406696379184723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.602075752300784, "step_time": 0.9127964057922362} +{"epoch": 0, "iter": 16479, "iter_tflops": 38.25832421030441, "iter_time": 0.5392576370239258, "loss": 0.14789772033691406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.20732871350079, "step_time": 0.4888035831451416} +{"epoch": 0, "iter": 16480, "iter_tflops": 39.39383681075368, "iter_time": 0.5237137374877929, "loss": 0.13604573905467987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.140541371359014, "step_time": 0.4782298240661621} +{"epoch": 0, "iter": 16481, "iter_tflops": 18.012504744401806, "iter_time": 1.1453761596679688, "loss": 0.24498286843299866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.340913705711348, "step_time": 1.0667072830200195} +{"epoch": 0, "iter": 16482, "iter_tflops": 16.909181131296624, "iter_time": 1.2201119232177733, "loss": 0.3056976795196533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.258854675637803, "step_time": 1.0183741302490235} +{"epoch": 0, "iter": 16483, "iter_tflops": 47.47054407147176, "iter_time": 0.4346083221435547, "loss": 0.31108808517456055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.281719804882606, "step_time": 0.3946139030456543} +{"epoch": 0, "iter": 16484, "iter_tflops": 49.897272610936824, "iter_time": 0.4134713668823242, "loss": 0.23560047149658203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.97337977276366, "step_time": 0.3822457218170166} +{"epoch": 0, "iter": 16485, "iter_tflops": 38.68648726195681, "iter_time": 0.5332893981933593, "loss": 0.002028372371569276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08677499792898, "step_time": 0.49020371627807624} +{"epoch": 0, "iter": 16486, "iter_tflops": 11.481740961519655, "iter_time": 1.796861083984375, "loss": 0.00889961514621973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.048216780936713, "step_time": 1.5811427612304685} +{"epoch": 0, "iter": 16487, "iter_tflops": 12.9193224981308, "iter_time": 1.5969176025390626, "loss": 0.004459657706320286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.63561786272372, "step_time": 1.3194933319091797} +{"epoch": 0, "iter": 16488, "iter_tflops": 31.579766001144975, "iter_time": 0.653301025390625, "loss": 0.012604091316461563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.04900569824561, "step_time": 0.4293760757446288} +{"epoch": 0, "iter": 16489, "iter_tflops": 19.15986031102206, "iter_time": 0.8506994705200197, "loss": 0.16192415356636047, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 20.766704882892576, "step_time": 0.784875747680664} +{"epoch": 0, "iter": 16490, "iter_tflops": 24.864650517003405, "iter_time": 0.655520294189453, "loss": 0.19561421871185303, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.799215384603816, "step_time": 0.608200008392334} +{"epoch": 0, "iter": 16491, "iter_tflops": 25.24498288386499, "iter_time": 0.6456444473266602, "loss": 0.16843581199645996, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.18402999398375, "step_time": 0.5995903854370118} +{"epoch": 0, "iter": 16492, "iter_tflops": 25.236483834368464, "iter_time": 0.6458618850708008, "loss": 0.1725633442401886, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.167201883534112, "step_time": 0.5999617881774902} +{"epoch": 0, "iter": 16493, "iter_tflops": 33.55031014922431, "iter_time": 0.6149300384521484, "loss": 0.08976781368255615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.16581175167737, "step_time": 0.5551094551086426} +{"epoch": 0, "iter": 16494, "iter_tflops": 41.9333103415857, "iter_time": 0.49199773025512694, "loss": 0.061514489352703094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.33142525217837, "step_time": 0.445293737411499} +{"epoch": 0, "iter": 16495, "iter_tflops": 45.016466271133694, "iter_time": 0.4583010444641113, "loss": 0.074888214468956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.45958629541747, "step_time": 0.4171303291320801} +{"epoch": 0, "iter": 16496, "iter_tflops": 44.76858225544085, "iter_time": 0.46083866119384775, "loss": 0.04251306504011154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.110591343569325, "step_time": 0.42009458541870115} +{"epoch": 0, "iter": 16497, "iter_tflops": 36.00342279474595, "iter_time": 0.5730314483642579, "loss": 0.0012397231766954064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.28551020316442, "step_time": 0.5121219367980957} +{"epoch": 0, "iter": 16498, "iter_tflops": 39.53912959862836, "iter_time": 0.5217892684936524, "loss": 0.005064376164227724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.974200344206324, "step_time": 0.4691635856628418} +{"epoch": 0, "iter": 16499, "iter_tflops": 46.27012929486332, "iter_time": 0.4458836364746094, "loss": 0.005246254149824381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.12095761329945, "step_time": 0.4035740814208984} +{"epoch": 0, "iter": 16500, "iter_tflops": 41.7271812400417, "iter_time": 0.49442816162109376, "loss": 0.017424605786800385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.371203693551365, "step_time": 0.44491175270080563} +{"epoch": 0, "iter": 16501, "iter_tflops": 17.512399810125928, "iter_time": 1.1780848846435545, "loss": 0.054775334894657135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.849042725300073, "step_time": 1.0945433044433595} +{"epoch": 0, "iter": 16502, "iter_tflops": 21.296416423216833, "iter_time": 0.9687589263916014, "loss": 0.06995879113674164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.916558787365943, "step_time": 0.7960583686828614} +{"epoch": 0, "iter": 16503, "iter_tflops": 51.902393000620556, "iter_time": 0.3974979248046875, "loss": 0.1486375331878662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.6552024006777, "step_time": 0.3641517925262451} +{"epoch": 0, "iter": 16504, "iter_tflops": 54.57240226011732, "iter_time": 0.3780499420166015, "loss": 0.09640292078256607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.23752273910971, "step_time": 0.34827745246887204} +{"epoch": 0, "iter": 16505, "iter_tflops": 23.68482477745538, "iter_time": 0.8710680236816406, "loss": 0.683497965335846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.878408289690988, "step_time": 0.829277069091797} +{"epoch": 0, "iter": 16506, "iter_tflops": 17.80433596101919, "iter_time": 1.1587679290771484, "loss": 0.5244239568710327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.269268523143793, "step_time": 0.9699954414367675} +{"epoch": 0, "iter": 16507, "iter_tflops": 40.65479406208728, "iter_time": 0.5074701271057128, "loss": 0.5639045238494873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.691160473039496, "step_time": 0.4722029190063477} +{"epoch": 0, "iter": 16508, "iter_tflops": 46.76151524117787, "iter_time": 0.44119813919067385, "loss": 0.6805431842803955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.49646821016688, "step_time": 0.4085650787353516} +{"epoch": 0, "iter": 16509, "iter_tflops": 27.40486023187768, "iter_time": 0.7528260803222656, "loss": 0.059874746948480606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.01063904425419, "step_time": 0.711156120300293} +{"epoch": 0, "iter": 16510, "iter_tflops": 13.668273706048025, "iter_time": 1.5094147186279296, "loss": 0.09669630229473114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.978024713994232, "step_time": 1.377424186706543} +{"epoch": 0, "iter": 16511, "iter_tflops": 39.57488021488595, "iter_time": 0.5213179016113281, "loss": 0.05604912340641022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21116954356155, "step_time": 0.4666488971710205} +{"epoch": 0, "iter": 16512, "iter_tflops": 41.72440500789377, "iter_time": 0.4944610595703125, "loss": 0.08640386909246445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.806382550222, "step_time": 0.4503977909088135} +{"epoch": 0, "iter": 16513, "iter_tflops": 33.310630980907206, "iter_time": 0.6193546295166015, "loss": 0.6711618304252625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.381113016200516, "step_time": 0.5670825271606446} +{"epoch": 0, "iter": 16514, "iter_tflops": 41.878163324135585, "iter_time": 0.4926456146240234, "loss": 0.5679028034210205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3234346865802, "step_time": 0.4551970443725586} +{"epoch": 0, "iter": 16515, "iter_tflops": 40.97412552572534, "iter_time": 0.5035151634216308, "loss": 0.7189109325408936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00583314124513, "step_time": 0.46882633590698247} +{"epoch": 0, "iter": 16516, "iter_tflops": 41.03901857063834, "iter_time": 0.5027189788818359, "loss": 0.5094375014305115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.05745707658577, "step_time": 0.46827699279785157} +{"epoch": 0, "iter": 16517, "iter_tflops": 2.571461227930924, "iter_time": 0.6094825210571289, "loss": 0.29056912660598755, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 2.768077991829612, "step_time": 0.5661909370422363} +{"epoch": 0, "iter": 16518, "iter_tflops": 2.579288552073445, "iter_time": 0.6076329345703125, "loss": 0.3763940632343292, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.001111352390166, "step_time": 0.5222267646789551} +{"epoch": 0, "iter": 16519, "iter_tflops": 3.1358673993865374, "iter_time": 0.49978537750244145, "loss": 0.21855004131793976, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.4302569856691196, "step_time": 0.45689307785034183} +{"epoch": 0, "iter": 16520, "iter_tflops": 3.107020707188672, "iter_time": 0.5044255638122559, "loss": 0.24744562804698944, "lr": 3e-05, "seqlen": 640.0, "step_tflops": 3.393850874556695, "step_time": 0.46179420661926274} +{"epoch": 0, "iter": 16521, "iter_tflops": 21.934445946035176, "iter_time": 0.9405796508789064, "loss": 0.7052767276763916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.324364892877153, "step_time": 0.8845297012329103} +{"epoch": 0, "iter": 16522, "iter_tflops": 15.592350572792068, "iter_time": 1.323154800415039, "loss": 0.6734757423400879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.87891811654109, "step_time": 1.153934112548828} +{"epoch": 0, "iter": 16523, "iter_tflops": 39.433828902626566, "iter_time": 0.5231826095581055, "loss": 0.682794451713562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.006850750101194, "step_time": 0.47971644401550295} +{"epoch": 0, "iter": 16524, "iter_tflops": 37.913116040577485, "iter_time": 0.544167709350586, "loss": 0.6483436226844788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.31237383869002, "step_time": 0.49939259338378905} +{"epoch": 0, "iter": 16525, "iter_tflops": 18.34636673402949, "iter_time": 1.1245329284667969, "loss": 0.48703816533088684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.208930928038924, "step_time": 1.0740365295410157} +{"epoch": 0, "iter": 16526, "iter_tflops": 15.361454365788932, "iter_time": 1.3430429840087892, "loss": 0.30483976006507874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.508183207021453, "step_time": 1.114701171875} +{"epoch": 0, "iter": 16527, "iter_tflops": 40.13081832386726, "iter_time": 0.5140960083007813, "loss": 0.43604224920272827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.693232091115526, "step_time": 0.47218053054809567} +{"epoch": 0, "iter": 16528, "iter_tflops": 41.955428519322425, "iter_time": 0.4917383575439453, "loss": 0.4277111291885376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.60445159272203, "step_time": 0.45239209747314457} +{"epoch": 0, "iter": 16529, "iter_tflops": 21.153995283589826, "iter_time": 0.9752811813354493, "loss": 0.4228714406490326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.444801528852775, "step_time": 0.919192512512207} +{"epoch": 0, "iter": 16530, "iter_tflops": 11.398068943908207, "iter_time": 1.8100516510009768, "loss": 0.5413666367530823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.918434128468302, "step_time": 1.4822855300903321} +{"epoch": 0, "iter": 16531, "iter_tflops": 9.55783973461867, "iter_time": 2.1585519409179685, "loss": 0.553184449672699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.912892423504482, "step_time": 1.7318290786743162} +{"epoch": 0, "iter": 16532, "iter_tflops": 38.24454987832746, "iter_time": 0.5394518585205078, "loss": 0.5252327919006348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.965520347225805, "step_time": 0.48017790412902833} +{"epoch": 0, "iter": 16533, "iter_tflops": 18.053108447839087, "iter_time": 0.8642741775512696, "loss": 0.23850691318511963, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 18.9875430400836, "step_time": 0.821740623474121} +{"epoch": 0, "iter": 16534, "iter_tflops": 9.156218304282511, "iter_time": 1.704069839477539, "loss": 0.3084324300289154, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 12.20334699303192, "step_time": 1.2785701713562012} +{"epoch": 0, "iter": 16535, "iter_tflops": 24.273920864929455, "iter_time": 0.6427818374633788, "loss": 0.15280571579933167, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.088843097969487, "step_time": 0.5980654411315918} +{"epoch": 0, "iter": 16536, "iter_tflops": 23.582346662255063, "iter_time": 0.6616320114135742, "loss": 0.2922155261039734, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 25.330462031204377, "step_time": 0.6159712142944336} +{"epoch": 0, "iter": 16537, "iter_tflops": 19.51480099840977, "iter_time": 1.0168377838134766, "loss": 0.16960948705673218, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 20.756930266541268, "step_time": 0.9559885177612305} +{"epoch": 0, "iter": 16538, "iter_tflops": 18.648664261149012, "iter_time": 1.0640647888183594, "loss": 0.12890280783176422, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 24.587708995871907, "step_time": 0.8070449752807618} +{"epoch": 0, "iter": 16539, "iter_tflops": 47.08736246699846, "iter_time": 0.42141640472412106, "loss": 0.13600468635559082, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 51.34270776354764, "step_time": 0.38648890686035153} +{"epoch": 0, "iter": 16540, "iter_tflops": 46.11771260224919, "iter_time": 0.430276912689209, "loss": 0.1643521934747696, "lr": 3e-05, "seqlen": 7888.0, "step_tflops": 50.15354609041183, "step_time": 0.39565272140502933} +{"epoch": 0, "iter": 16541, "iter_tflops": 26.461310457870795, "iter_time": 0.779670135498047, "loss": 0.411633163690567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.94535445988655, "step_time": 0.7382655868530273} +{"epoch": 0, "iter": 16542, "iter_tflops": 14.012483211328394, "iter_time": 1.4723367156982423, "loss": 0.5742824077606201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.146163214347624, "step_time": 1.2777706527709962} +{"epoch": 0, "iter": 16543, "iter_tflops": 40.29112276189683, "iter_time": 0.5120505981445312, "loss": 0.6359100341796875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.134655149534815, "step_time": 0.46745790672302245} +{"epoch": 0, "iter": 16544, "iter_tflops": 46.208181814177905, "iter_time": 0.4464813957214356, "loss": 0.6543487310409546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.82030280043258, "step_time": 0.4141101589202881} +{"epoch": 0, "iter": 16545, "iter_tflops": 36.97835900933453, "iter_time": 0.5579234466552735, "loss": 0.33269771933555603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.70060728545161, "step_time": 0.5196669502258301} +{"epoch": 0, "iter": 16546, "iter_tflops": 34.65340521119973, "iter_time": 0.5953554458618164, "loss": 0.42934972047805786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.306769793534166, "step_time": 0.47639418983459475} +{"epoch": 0, "iter": 16547, "iter_tflops": 48.93023660799018, "iter_time": 0.42164303588867186, "loss": 0.44911813735961914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.19702020256352, "step_time": 0.3878242321014404} +{"epoch": 0, "iter": 16548, "iter_tflops": 46.98498117060703, "iter_time": 0.43909975051879885, "loss": 0.44912445545196533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.851909449997656, "step_time": 0.40570931816101075} +{"epoch": 0, "iter": 16549, "iter_tflops": 39.38075429789125, "iter_time": 0.5238877182006836, "loss": 0.6089293360710144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.73289911936275, "step_time": 0.482791805267334} +{"epoch": 0, "iter": 16550, "iter_tflops": 42.7272807412956, "iter_time": 0.4828552894592285, "loss": 0.5900878310203552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.54630005071589, "step_time": 0.4432380981445312} +{"epoch": 0, "iter": 16551, "iter_tflops": 44.69973555246995, "iter_time": 0.46154844665527345, "loss": 0.7156399488449097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.232045675770756, "step_time": 0.42774659919738767} +{"epoch": 0, "iter": 16552, "iter_tflops": 44.183101191828776, "iter_time": 0.4669453468322754, "loss": 0.6296619176864624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.503140000105496, "step_time": 0.4343101005554199} +{"epoch": 0, "iter": 16553, "iter_tflops": 43.40307676309148, "iter_time": 0.47533712005615236, "loss": 0.5198509693145752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08913067215015, "step_time": 0.4381285705566407} +{"epoch": 0, "iter": 16554, "iter_tflops": 42.23116959461128, "iter_time": 0.4885276374816895, "loss": 0.37645915150642395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.78549525134532, "step_time": 0.45060326194763184} +{"epoch": 0, "iter": 16555, "iter_tflops": 47.79442492124427, "iter_time": 0.43166318130493164, "loss": 0.5565145015716553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.52823241888941, "step_time": 0.40038426589965825} +{"epoch": 0, "iter": 16556, "iter_tflops": 47.32255398313908, "iter_time": 0.435967456817627, "loss": 0.6289263963699341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.2583169060039, "step_time": 0.4024926052093506} +{"epoch": 0, "iter": 16557, "iter_tflops": 21.446623132414004, "iter_time": 0.7676430511474609, "loss": 0.09904801845550537, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 22.631344008881324, "step_time": 0.7274579544067383} +{"epoch": 0, "iter": 16558, "iter_tflops": 12.586234299092768, "iter_time": 1.308044235229492, "loss": 0.15264372527599335, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 14.93696304234842, "step_time": 1.1021886558532716} +{"epoch": 0, "iter": 16559, "iter_tflops": 31.216789185658097, "iter_time": 0.5273877182006836, "loss": 0.22039873898029327, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 34.4139723600688, "step_time": 0.47839148139953613} +{"epoch": 0, "iter": 16560, "iter_tflops": 31.655020806938584, "iter_time": 0.5200865707397462, "loss": 0.12990373373031616, "lr": 3e-05, "seqlen": 6576.0, "step_tflops": 34.66550999860398, "step_time": 0.47492020797729495} +{"epoch": 0, "iter": 16561, "iter_tflops": 21.62686242073945, "iter_time": 0.9539568481445311, "loss": 0.022295240312814713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.511531051580942, "step_time": 0.8774883041381837} +{"epoch": 0, "iter": 16562, "iter_tflops": 19.986736271626004, "iter_time": 1.032239242553711, "loss": 0.040901727974414825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.077542076004843, "step_time": 0.8568604488372802} +{"epoch": 0, "iter": 16563, "iter_tflops": 43.067292837938425, "iter_time": 0.4790431938171387, "loss": 0.015125139616429806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.632119773194674, "step_time": 0.4331340618133545} +{"epoch": 0, "iter": 16564, "iter_tflops": 45.76339419769777, "iter_time": 0.45082087707519525, "loss": 0.01605338044464588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26284005558617, "step_time": 0.4104641418457031} +{"epoch": 0, "iter": 16565, "iter_tflops": 24.831930465727073, "iter_time": 0.8308292236328125, "loss": 0.5412559509277344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.90263196673143, "step_time": 0.7668801155090332} +{"epoch": 0, "iter": 16566, "iter_tflops": 16.323196160343123, "iter_time": 1.263912612915039, "loss": 0.43296241760253906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.485996836605207, "step_time": 1.0587651062011718} +{"epoch": 0, "iter": 16567, "iter_tflops": 39.93258712452237, "iter_time": 0.5166480560302734, "loss": 0.5384342670440674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.6731645670568, "step_time": 0.48346762466430665} +{"epoch": 0, "iter": 16568, "iter_tflops": 46.05179662331726, "iter_time": 0.4479975814819336, "loss": 0.5525330305099487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6690358893305, "step_time": 0.41537133026123046} +{"epoch": 0, "iter": 16569, "iter_tflops": 29.898751327843993, "iter_time": 0.6900319442749023, "loss": 0.17551787197589874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.75978403723484, "step_time": 0.6495980415344238} +{"epoch": 0, "iter": 16570, "iter_tflops": 18.238561141299986, "iter_time": 1.1311798858642579, "loss": 0.269766628742218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.05938979262611, "step_time": 0.8575069313049315} +{"epoch": 0, "iter": 16571, "iter_tflops": 40.96300569262379, "iter_time": 0.5036518478393555, "loss": 0.3275567293167114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.76735862658088, "step_time": 0.4608512573242187} +{"epoch": 0, "iter": 16572, "iter_tflops": 39.140335423145316, "iter_time": 0.5271056900024413, "loss": 0.18862317502498627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.545596565343175, "step_time": 0.4849172458648682} +{"epoch": 0, "iter": 16573, "iter_tflops": 27.451746170321346, "iter_time": 0.751540298461914, "loss": 0.4848219156265259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.882515051553355, "step_time": 0.6904068641662597} +{"epoch": 0, "iter": 16574, "iter_tflops": 34.97377106304449, "iter_time": 0.5899018859863281, "loss": 0.4892667233943939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.09104758790494, "step_time": 0.541625783920288} +{"epoch": 0, "iter": 16575, "iter_tflops": 35.08533908725417, "iter_time": 0.5880260543823242, "loss": 0.539860725402832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.19037193561039, "step_time": 0.540217140197754} +{"epoch": 0, "iter": 16576, "iter_tflops": 37.80528546937897, "iter_time": 0.5457198181152344, "loss": 0.5942884683609009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.05371425971275, "step_time": 0.5025390243530273} +{"epoch": 0, "iter": 16577, "iter_tflops": 39.78414028232216, "iter_time": 0.5185758285522462, "loss": 0.01161972712725401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57871657059368, "step_time": 0.46280142402648927} +{"epoch": 0, "iter": 16578, "iter_tflops": 38.348861421355075, "iter_time": 0.5379845123291016, "loss": 0.006934852339327335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.232878998670586, "step_time": 0.4885078639984131} +{"epoch": 0, "iter": 16579, "iter_tflops": 45.061228242700324, "iter_time": 0.45784578704833984, "loss": 0.010464864782989025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.90271228045021, "step_time": 0.41342629623413085} +{"epoch": 0, "iter": 16580, "iter_tflops": 46.71193995880738, "iter_time": 0.44166638183593754, "loss": 0.003095354652032256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.31926128268895, "step_time": 0.4020146236419678} +{"epoch": 0, "iter": 16581, "iter_tflops": 26.184318326015426, "iter_time": 0.7879179153442383, "loss": 0.10294226557016373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.051369422569227, "step_time": 0.7354754486083983} +{"epoch": 0, "iter": 16582, "iter_tflops": 8.278707343223328, "iter_time": 2.492067016601563, "loss": 0.06921125203371048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.311671853601995, "step_time": 2.000751556396484} +{"epoch": 0, "iter": 16583, "iter_tflops": 24.809965473340313, "iter_time": 0.8315647811889648, "loss": 0.09118449687957764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.88629345913159, "step_time": 0.7398291759490967} +{"epoch": 0, "iter": 16584, "iter_tflops": 49.92605715682224, "iter_time": 0.41323298263549807, "loss": 0.049086879938840866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.455656177328734, "step_time": 0.37886043357849125} +{"epoch": 0, "iter": 16585, "iter_tflops": 16.1496354235646, "iter_time": 0.8978230972290038, "loss": 0.27390483021736145, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 16.977233864983077, "step_time": 0.8540564270019531} +{"epoch": 0, "iter": 16586, "iter_tflops": 8.942271337937541, "iter_time": 1.6214578094482421, "loss": 0.2487293928861618, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 11.815694595095696, "step_time": 1.2271403579711915} +{"epoch": 0, "iter": 16587, "iter_tflops": 21.73704355344877, "iter_time": 0.6670417556762696, "loss": 0.13666406273841858, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.3726165743788, "step_time": 0.6203633918762207} +{"epoch": 0, "iter": 16588, "iter_tflops": 23.070079975988325, "iter_time": 0.6284987182617188, "loss": 0.24207144975662231, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 24.723893800787565, "step_time": 0.5864576110839843} +{"epoch": 0, "iter": 16589, "iter_tflops": 22.931572011782944, "iter_time": 0.8996807327270507, "loss": 0.09389884769916534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.72755417637868, "step_time": 0.8343361968994141} +{"epoch": 0, "iter": 16590, "iter_tflops": 17.58638762731837, "iter_time": 1.1731285552978514, "loss": 0.17451761662960052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.504890432211226, "step_time": 1.006154779434204} +{"epoch": 0, "iter": 16591, "iter_tflops": 39.827298239907506, "iter_time": 0.5180138854980468, "loss": 0.13734835386276245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.012899377586386, "step_time": 0.4687510662078857} +{"epoch": 0, "iter": 16592, "iter_tflops": 42.49257650892704, "iter_time": 0.48552230072021485, "loss": 0.19962656497955322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44261774771753, "step_time": 0.44422761917114256} +{"epoch": 0, "iter": 16593, "iter_tflops": 31.756926462245428, "iter_time": 0.6496564941406251, "loss": 0.24219542741775513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.276892398806694, "step_time": 0.5848330764770509} +{"epoch": 0, "iter": 16594, "iter_tflops": 46.954213647446615, "iter_time": 0.43938747787475585, "loss": 0.190132737159729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.0510765084779, "step_time": 0.4041265125274658} +{"epoch": 0, "iter": 16595, "iter_tflops": 48.08533893834168, "iter_time": 0.42905163955688475, "loss": 0.19164980947971344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.0423166301913, "step_time": 0.39642919158935547} +{"epoch": 0, "iter": 16596, "iter_tflops": 54.77755875592344, "iter_time": 0.3766340446472168, "loss": 0.27960261702537537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.55140501299471, "step_time": 0.3464417591094971} +{"epoch": 0, "iter": 16597, "iter_tflops": 21.04470680580974, "iter_time": 0.9803459701538086, "loss": 0.31834179162979126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.913647617390033, "step_time": 0.9414723587036132} +{"epoch": 0, "iter": 16598, "iter_tflops": 19.395738620129233, "iter_time": 1.0636920776367187, "loss": 0.2856709063053131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.318096563397926, "step_time": 0.884767478942871} +{"epoch": 0, "iter": 16599, "iter_tflops": 48.48318070878511, "iter_time": 0.4255309410095215, "loss": 0.2921827435493469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.52329009368501, "step_time": 0.3927989559173584} +{"epoch": 0, "iter": 16600, "iter_tflops": 49.28387368201114, "iter_time": 0.4186175308227539, "loss": 0.32980024814605713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.3549833577163, "step_time": 0.3866760368347168} +{"epoch": 0, "iter": 16601, "iter_tflops": 44.8739627757241, "iter_time": 0.4597564430236816, "loss": 0.003705097595229745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.261697448850065, "step_time": 0.4188059806823731} +{"epoch": 0, "iter": 16602, "iter_tflops": 11.847443051936716, "iter_time": 1.7413963012695315, "loss": 0.0032959752716124058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.673713734064615, "step_time": 1.4059899139404297} +{"epoch": 0, "iter": 16603, "iter_tflops": 12.859941496604455, "iter_time": 1.6042913970947266, "loss": 0.005333855282515287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.925598325212981, "step_time": 1.2954674034118652} +{"epoch": 0, "iter": 16604, "iter_tflops": 21.867308134984434, "iter_time": 0.9434674530029297, "loss": 0.0015282556414604187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.005341868567125, "step_time": 0.7639634265899657} +{"epoch": 0, "iter": 16605, "iter_tflops": 11.996972186564664, "iter_time": 1.2392215728759766, "loss": 0.25794169306755066, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 12.797049918824229, "step_time": 1.161744842529297} +{"epoch": 0, "iter": 16606, "iter_tflops": 11.79112911031472, "iter_time": 1.260855224609375, "loss": 0.23386672139167786, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.252655908714788, "step_time": 0.9747093772888183} +{"epoch": 0, "iter": 16607, "iter_tflops": 22.526506322095397, "iter_time": 0.6599739227294923, "loss": 0.16305974125862122, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.28126689913201, "step_time": 0.6122788734436035} +{"epoch": 0, "iter": 16608, "iter_tflops": 22.356555966161114, "iter_time": 0.6649909210205077, "loss": 0.1517682671546936, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.09605295939017, "step_time": 0.6169851455688476} +{"epoch": 0, "iter": 16609, "iter_tflops": 18.54214424270788, "iter_time": 1.1126595306396483, "loss": 0.3308714032173157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.85424847512384, "step_time": 1.039127395629883} +{"epoch": 0, "iter": 16610, "iter_tflops": 21.380181017058266, "iter_time": 0.9649634628295899, "loss": 0.3441013693809509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.950958599641154, "step_time": 0.7950031375885009} +{"epoch": 0, "iter": 16611, "iter_tflops": 39.73183888129694, "iter_time": 0.5192584609985351, "loss": 0.44837263226509094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.642745417901224, "step_time": 0.47272675704956046} +{"epoch": 0, "iter": 16612, "iter_tflops": 41.83407470331489, "iter_time": 0.49316481018066405, "loss": 0.4890727996826172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68865353752398, "step_time": 0.4515583610534668} +{"epoch": 0, "iter": 16613, "iter_tflops": 20.375098074357265, "iter_time": 1.0125641326904298, "loss": 0.49175339937210083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.777162194803577, "step_time": 0.9473729095458985} +{"epoch": 0, "iter": 16614, "iter_tflops": 21.825709583582032, "iter_time": 0.9452656478881837, "loss": 0.5871279835700989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.17805243923715, "step_time": 0.759108606338501} +{"epoch": 0, "iter": 16615, "iter_tflops": 49.817174670919584, "iter_time": 0.4141361618041992, "loss": 0.4675852954387665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.023293272502805, "step_time": 0.38189255523681637} +{"epoch": 0, "iter": 16616, "iter_tflops": 52.86905960840179, "iter_time": 0.3902300071716308, "loss": 0.5339211225509644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.58047476705275, "step_time": 0.35830016326904296} +{"epoch": 0, "iter": 16617, "iter_tflops": 38.2875392043602, "iter_time": 0.5388461608886719, "loss": 0.39978814125061035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.59207789643054, "step_time": 0.49603421020507815} +{"epoch": 0, "iter": 16618, "iter_tflops": 33.30929869184292, "iter_time": 0.6193794021606446, "loss": 0.36995261907577515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.608172425008895, "step_time": 0.5635652408599853} +{"epoch": 0, "iter": 16619, "iter_tflops": 41.452316378035505, "iter_time": 0.4977066497802734, "loss": 0.27580612897872925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41900030772079, "step_time": 0.45423926925659175} +{"epoch": 0, "iter": 16620, "iter_tflops": 42.607881501729004, "iter_time": 0.4842083854675293, "loss": 0.3308919072151184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.466962589117735, "step_time": 0.4439948806762695} +{"epoch": 0, "iter": 16621, "iter_tflops": 14.65668935983216, "iter_time": 1.407623031616211, "loss": 0.07975291460752487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.658198077631196, "step_time": 1.3175905303955078} +{"epoch": 0, "iter": 16622, "iter_tflops": 25.135017394954392, "iter_time": 0.820810791015625, "loss": 0.058582525700330734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.82250949979539, "step_time": 0.715797960281372} +{"epoch": 0, "iter": 16623, "iter_tflops": 52.510718258480054, "iter_time": 0.3928929977416992, "loss": 0.09247761219739914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.101740208802845, "step_time": 0.3613041114807129} +{"epoch": 0, "iter": 16624, "iter_tflops": 54.48110210658535, "iter_time": 0.3786834831237793, "loss": 0.06209950894117355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.310337006037905, "step_time": 0.3478498783111572} +{"epoch": 0, "iter": 16625, "iter_tflops": 28.11722832554674, "iter_time": 0.7337527465820313, "loss": 0.5745288729667664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.728283477745556, "step_time": 0.6939887237548829} +{"epoch": 0, "iter": 16626, "iter_tflops": 14.161611014547907, "iter_time": 1.4568323822021483, "loss": 0.5393365025520325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.782393200565537, "step_time": 1.2293296470642088} +{"epoch": 0, "iter": 16627, "iter_tflops": 39.91721750220027, "iter_time": 0.5168469848632812, "loss": 0.46915313601493835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.58819057214389, "step_time": 0.4733184204101562} +{"epoch": 0, "iter": 16628, "iter_tflops": 39.95108419478485, "iter_time": 0.5164088516235351, "loss": 0.5209857225418091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.73260225729295, "step_time": 0.4717554512023926} +{"epoch": 0, "iter": 16629, "iter_tflops": 16.01501817658063, "iter_time": 1.2882341613769532, "loss": 0.14395783841609955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.11903719424099, "step_time": 1.2051550140380858} +{"epoch": 0, "iter": 16630, "iter_tflops": 32.37481166407979, "iter_time": 0.6372575607299805, "loss": 0.1624821275472641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80862429163552, "step_time": 0.49346501731872555} +{"epoch": 0, "iter": 16631, "iter_tflops": 46.13222248595499, "iter_time": 0.447216552734375, "loss": 0.14019441604614258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.96555694275309, "step_time": 0.4129063053131104} +{"epoch": 0, "iter": 16632, "iter_tflops": 47.64489441375597, "iter_time": 0.4330179290771484, "loss": 0.09861377626657486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62926624066549, "step_time": 0.3996007499694824} +{"epoch": 0, "iter": 16633, "iter_tflops": 23.394457212500964, "iter_time": 0.8818795547485352, "loss": 0.3340812921524048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.490124416673815, "step_time": 0.8424250183105468} +{"epoch": 0, "iter": 16634, "iter_tflops": 18.397728420462887, "iter_time": 1.121393524169922, "loss": 0.261516273021698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.741337144241793, "step_time": 0.8338714027404786} +{"epoch": 0, "iter": 16635, "iter_tflops": 45.13007046597887, "iter_time": 0.45714738082885736, "loss": 0.2939116358757019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30083613589534, "step_time": 0.4184735012054443} +{"epoch": 0, "iter": 16636, "iter_tflops": 47.352370091087735, "iter_time": 0.43569294357299804, "loss": 0.3320077955722809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.3685403309794, "step_time": 0.40162896156311034} +{"epoch": 0, "iter": 16637, "iter_tflops": 25.307574160753557, "iter_time": 0.8152141876220703, "loss": 0.18167169392108917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.68300097311847, "step_time": 0.773192398071289} +{"epoch": 0, "iter": 16638, "iter_tflops": 12.352241165555567, "iter_time": 1.6702307891845702, "loss": 0.12121900171041489, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.99222294018356, "step_time": 1.2900704040527342} +{"epoch": 0, "iter": 16639, "iter_tflops": 43.32670941795681, "iter_time": 0.4761749458312988, "loss": 0.1579137146472931, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.245763355055956, "step_time": 0.4366760540008545} +{"epoch": 0, "iter": 16640, "iter_tflops": 48.911175482838075, "iter_time": 0.42180735397338864, "loss": 0.15162263810634613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.02413154094629, "step_time": 0.38908875846862795} +{"epoch": 0, "iter": 16641, "iter_tflops": 29.32718014086844, "iter_time": 0.7034803009033203, "loss": 0.06798721849918365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.082320039276777, "step_time": 0.663756549835205} +{"epoch": 0, "iter": 16642, "iter_tflops": 15.675105120289086, "iter_time": 1.3161693878173828, "loss": 0.08536040037870407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.194502884675018, "step_time": 1.0748438568115235} +{"epoch": 0, "iter": 16643, "iter_tflops": 43.2131766208508, "iter_time": 0.4774259872436523, "loss": 0.10356617718935013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59627003658044, "step_time": 0.4334603004455567} +{"epoch": 0, "iter": 16644, "iter_tflops": 42.88767036627395, "iter_time": 0.4810495262145996, "loss": 0.1264902949333191, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.052613646475535, "step_time": 0.4384685974121094} +{"epoch": 0, "iter": 16645, "iter_tflops": 33.54130103464494, "iter_time": 0.6150952072143556, "loss": 0.3811880648136139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.14897252764031, "step_time": 0.5553610801696778} +{"epoch": 0, "iter": 16646, "iter_tflops": 34.116006090883786, "iter_time": 0.6047335510253906, "loss": 0.4852273464202881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.74539309195916, "step_time": 0.5465857372283935} +{"epoch": 0, "iter": 16647, "iter_tflops": 40.03433821661807, "iter_time": 0.5153349456787109, "loss": 0.39439305663108826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.720528083909706, "step_time": 0.4718857345581054} +{"epoch": 0, "iter": 16648, "iter_tflops": 44.71714051794898, "iter_time": 0.46136880111694334, "loss": 0.4182714819908142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.642464501432855, "step_time": 0.4241375045776367} +{"epoch": 0, "iter": 16649, "iter_tflops": 19.858711565451994, "iter_time": 1.0388938598632813, "loss": 0.5130966901779175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.426376078259075, "step_time": 0.9628830108642579} +{"epoch": 0, "iter": 16650, "iter_tflops": 15.99152782950727, "iter_time": 1.290126480102539, "loss": 0.5131037831306458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.964953077985605, "step_time": 0.8983729877471923} +{"epoch": 0, "iter": 16651, "iter_tflops": 47.92986316723565, "iter_time": 0.4304434051513672, "loss": 0.3856745660305023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.75039582110979, "step_time": 0.3986654243469238} +{"epoch": 0, "iter": 16652, "iter_tflops": 47.7840605379612, "iter_time": 0.4317568092346192, "loss": 0.41468116641044617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.97371903542699, "step_time": 0.39695241928100583} +{"epoch": 0, "iter": 16653, "iter_tflops": 24.686386388871366, "iter_time": 0.8357275619506837, "loss": 0.5629556775093079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.981140149645235, "step_time": 0.7940796051025389} +{"epoch": 0, "iter": 16654, "iter_tflops": 19.231440448799034, "iter_time": 1.0727794189453126, "loss": 0.7288550734519958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.778695510193014, "step_time": 0.8676293239593506} +{"epoch": 0, "iter": 16655, "iter_tflops": 43.68993185524451, "iter_time": 0.47221619796752934, "loss": 0.5059002041816711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.956965850911274, "step_time": 0.43936172485351566} +{"epoch": 0, "iter": 16656, "iter_tflops": 43.93549560074297, "iter_time": 0.4695768928527832, "loss": 0.6271239519119263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.19459952617056, "step_time": 0.4371494560241699} +{"epoch": 0, "iter": 16657, "iter_tflops": 41.93342446228533, "iter_time": 0.4919963912963867, "loss": 0.07140445709228516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61672279344998, "step_time": 0.45227040100097654} +{"epoch": 0, "iter": 16658, "iter_tflops": 38.91613201588652, "iter_time": 0.5301424484252929, "loss": 0.04631584882736206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81772872264224, "step_time": 0.48183530807495123} +{"epoch": 0, "iter": 16659, "iter_tflops": 40.24771743511997, "iter_time": 0.5126028213500976, "loss": 0.02972172200679779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13555719248742, "step_time": 0.46744835281372077} +{"epoch": 0, "iter": 16660, "iter_tflops": 39.10376266544983, "iter_time": 0.5275986785888672, "loss": 0.04976420849561691, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94365239384598, "step_time": 0.4804224224090576} +{"epoch": 0, "iter": 16661, "iter_tflops": 24.732524739685292, "iter_time": 0.8341685180664061, "loss": 0.12424333393573761, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.584223399302857, "step_time": 0.7760653076171876} +{"epoch": 0, "iter": 16662, "iter_tflops": 7.903054756367841, "iter_time": 2.6105213928222657, "loss": 0.20118221640586853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.851898112230714, "step_time": 2.3306971282958986} +{"epoch": 0, "iter": 16663, "iter_tflops": 14.778497362048721, "iter_time": 1.3960210571289062, "loss": 0.1480274349451065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.53177674854175, "step_time": 1.1767828102111815} +{"epoch": 0, "iter": 16664, "iter_tflops": 34.71153561537413, "iter_time": 0.5943584213256836, "loss": 0.14913609623908997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.31415549252472, "step_time": 0.5384718322753906} +{"epoch": 0, "iter": 16665, "iter_tflops": 16.47607751057586, "iter_time": 1.1792545166015622, "loss": 0.13985279202461243, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 17.725630444842782, "step_time": 1.096123992919922} +{"epoch": 0, "iter": 16666, "iter_tflops": 19.981149936431194, "iter_time": 0.9723909225463867, "loss": 0.37220802903175354, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 22.156835007354776, "step_time": 0.8769072303771973} +{"epoch": 0, "iter": 16667, "iter_tflops": 29.08080932351503, "iter_time": 0.6681206359863282, "loss": 0.3772478997707367, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 31.414603424204692, "step_time": 0.6184858856201172} +{"epoch": 0, "iter": 16668, "iter_tflops": 30.274414600843002, "iter_time": 0.6417791748046875, "loss": 0.1885753571987152, "lr": 3e-05, "seqlen": 7728.0, "step_tflops": 32.396175444344806, "step_time": 0.5997463760375976} +{"epoch": 0, "iter": 16669, "iter_tflops": 21.870427702486143, "iter_time": 0.9433328781127929, "loss": 0.0439569428563118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.216547404047756, "step_time": 0.888637451171875} +{"epoch": 0, "iter": 16670, "iter_tflops": 8.59188721980175, "iter_time": 2.4012295532226564, "loss": 0.014199125580489635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.602821452191353, "step_time": 2.1484408111572266} +{"epoch": 0, "iter": 16671, "iter_tflops": 14.452501714645921, "iter_time": 1.4275101928710936, "loss": 0.027152350172400475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.080865595098505, "step_time": 1.2078482437133788} +{"epoch": 0, "iter": 16672, "iter_tflops": 54.31743127295435, "iter_time": 0.37982454299926754, "loss": 0.019539596512913704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.88247035442968, "step_time": 0.3445264263153076} +{"epoch": 0, "iter": 16673, "iter_tflops": 24.474839468821198, "iter_time": 0.6375051193237304, "loss": 0.16129323840141296, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.161708418855167, "step_time": 0.596399715423584} +{"epoch": 0, "iter": 16674, "iter_tflops": 22.567910329158423, "iter_time": 0.6913726272583008, "loss": 0.21338553726673126, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.364143211543993, "step_time": 0.6404015655517578} +{"epoch": 0, "iter": 16675, "iter_tflops": 22.927705636272652, "iter_time": 0.680523193359375, "loss": 0.2625995874404907, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 24.77261803823171, "step_time": 0.6298420066833497} +{"epoch": 0, "iter": 16676, "iter_tflops": 25.01592534000029, "iter_time": 0.6237161026000977, "loss": 0.2391674667596817, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.883361251368772, "step_time": 0.5803900527954101} +{"epoch": 0, "iter": 16677, "iter_tflops": 27.63134478238216, "iter_time": 0.7466554260253907, "loss": 0.5979119539260864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.668273405200015, "step_time": 0.6953924560546876} +{"epoch": 0, "iter": 16678, "iter_tflops": 10.333152737548147, "iter_time": 1.9965923309326172, "loss": 0.59665447473526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.03141073634535, "step_time": 1.5831818923950196} +{"epoch": 0, "iter": 16679, "iter_tflops": 20.445159577328656, "iter_time": 1.0090942764282227, "loss": 0.7516149282455444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.561158888071823, "step_time": 0.8399886016845703} +{"epoch": 0, "iter": 16680, "iter_tflops": 43.26500847404574, "iter_time": 0.47685402679443367, "loss": 0.5040565729141235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.27787357027211, "step_time": 0.44580902099609376} +{"epoch": 0, "iter": 16681, "iter_tflops": 19.96717338666465, "iter_time": 0.7322987899780272, "loss": 0.2345961630344391, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 21.117644374858656, "step_time": 0.6924037857055664} +{"epoch": 0, "iter": 16682, "iter_tflops": 8.632139260128564, "iter_time": 1.6938949279785156, "loss": 0.12587431073188782, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 9.969077326368835, "step_time": 1.4667292098999025} +{"epoch": 0, "iter": 16683, "iter_tflops": 9.1111589787799, "iter_time": 1.6048383026123048, "loss": 0.26299047470092773, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.315178272501237, "step_time": 1.292240966796875} +{"epoch": 0, "iter": 16684, "iter_tflops": 12.064700818768953, "iter_time": 1.2119601745605468, "loss": 0.25209155678749084, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 15.414030221702115, "step_time": 0.9486121864318848} +{"epoch": 0, "iter": 16685, "iter_tflops": 14.695466485092279, "iter_time": 0.9894424285888672, "loss": 0.15739010274410248, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 16.056299217271455, "step_time": 0.9055834007263184} +{"epoch": 0, "iter": 16686, "iter_tflops": 24.75423583230917, "iter_time": 0.5873870697021485, "loss": 0.24905654788017273, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 26.44351779125498, "step_time": 0.5498632278442384} +{"epoch": 0, "iter": 16687, "iter_tflops": 24.611636028635278, "iter_time": 0.5907903900146484, "loss": 0.26899537444114685, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 26.16676501899769, "step_time": 0.5556788558959961} +{"epoch": 0, "iter": 16688, "iter_tflops": 25.672543642582603, "iter_time": 0.5663762130737304, "loss": 0.28713393211364746, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 27.271899440878794, "step_time": 0.5331611785888672} +{"epoch": 0, "iter": 16689, "iter_tflops": 19.787443292885722, "iter_time": 1.0426356353759765, "loss": 0.1332046389579773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.575211792495796, "step_time": 1.0027159729003905} +{"epoch": 0, "iter": 16690, "iter_tflops": 13.554194366275071, "iter_time": 1.5221187591552734, "loss": 0.1584016978740692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.969969199177854, "step_time": 1.2918680839538572} +{"epoch": 0, "iter": 16691, "iter_tflops": 49.23396587732164, "iter_time": 0.4190418777465821, "loss": 0.20715394616127014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.736632141580394, "step_time": 0.3839297828674316} +{"epoch": 0, "iter": 16692, "iter_tflops": 51.47798173420524, "iter_time": 0.4007751045227051, "loss": 0.1652931123971939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.73687084417025, "step_time": 0.3701516284942627} +{"epoch": 0, "iter": 16693, "iter_tflops": 23.506785717506617, "iter_time": 0.8776654434204101, "loss": 0.20100443065166473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.002113253218738, "step_time": 0.8251739883422851} +{"epoch": 0, "iter": 16694, "iter_tflops": 21.979330194035747, "iter_time": 0.9386588821411131, "loss": 0.19556783139705658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.95395993796145, "step_time": 0.6887601356506348} +{"epoch": 0, "iter": 16695, "iter_tflops": 50.69928497788932, "iter_time": 0.4069306602478028, "loss": 0.12757721543312073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.988273606316305, "step_time": 0.3751907844543457} +{"epoch": 0, "iter": 16696, "iter_tflops": 50.16005364884621, "iter_time": 0.4113052520751953, "loss": 0.19807668030261993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.225789767351635, "step_time": 0.38046644592285156} +{"epoch": 0, "iter": 16697, "iter_tflops": 28.175060262194137, "iter_time": 0.7322466506958009, "loss": 0.004505489487200975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.910611879808865, "step_time": 0.6897583236694336} +{"epoch": 0, "iter": 16698, "iter_tflops": 17.08427856410362, "iter_time": 1.207606948852539, "loss": 0.00984126515686512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.89556587566381, "step_time": 1.036969425201416} +{"epoch": 0, "iter": 16699, "iter_tflops": 56.34660212862211, "iter_time": 0.3661461868286133, "loss": 0.013193052262067795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.21011572620433, "step_time": 0.3316356716156006} +{"epoch": 0, "iter": 16700, "iter_tflops": 59.32971012350409, "iter_time": 0.34773629379272464, "loss": 0.007993852719664574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.06955677964048, "step_time": 0.31706214904785157} +{"epoch": 0, "iter": 16701, "iter_tflops": 21.686422061508637, "iter_time": 0.951336898803711, "loss": 0.39353594183921814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.626100442808767, "step_time": 0.9118271865844727} +{"epoch": 0, "iter": 16702, "iter_tflops": 12.468681945575295, "iter_time": 1.6546330718994138, "loss": 0.601868212223053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.675391165973874, "step_time": 1.2372179641723633} +{"epoch": 0, "iter": 16703, "iter_tflops": 44.20512413843434, "iter_time": 0.46671271514892576, "loss": 0.646453857421875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61853293050893, "step_time": 0.4332576465606689} +{"epoch": 0, "iter": 16704, "iter_tflops": 45.71181928503295, "iter_time": 0.4513295211791992, "loss": 0.7246415019035339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.380366429420384, "step_time": 0.41779952239990237} +{"epoch": 0, "iter": 16705, "iter_tflops": 25.134417450753705, "iter_time": 0.8208303833007812, "loss": 0.19971351325511932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.7294983415843, "step_time": 0.7718473892211914} +{"epoch": 0, "iter": 16706, "iter_tflops": 12.060967577345608, "iter_time": 1.7105670318603514, "loss": 0.1700308620929718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.715996049837226, "step_time": 1.1645460662841798} +{"epoch": 0, "iter": 16707, "iter_tflops": 50.71529288728992, "iter_time": 0.40680221557617186, "loss": 0.1507980078458786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.26713338346792, "step_time": 0.37329769515991207} +{"epoch": 0, "iter": 16708, "iter_tflops": 49.79354088916646, "iter_time": 0.41433272552490236, "loss": 0.13932709395885468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.10477591458745, "step_time": 0.381317419052124} +{"epoch": 0, "iter": 16709, "iter_tflops": 25.275969900120312, "iter_time": 0.8162335052490234, "loss": 0.05787114426493645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.58216676575999, "step_time": 0.7761253509521484} +{"epoch": 0, "iter": 16710, "iter_tflops": 18.067963873205745, "iter_time": 1.1418604583740235, "loss": 0.04854239523410797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.625541318545004, "step_time": 0.9540151252746581} +{"epoch": 0, "iter": 16711, "iter_tflops": 45.33345554366406, "iter_time": 0.4550964241027832, "loss": 0.011485419236123562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06477191440759, "step_time": 0.4120880355834961} +{"epoch": 0, "iter": 16712, "iter_tflops": 42.431283279040805, "iter_time": 0.4862236518859863, "loss": 0.03316442295908928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.8792357531421, "step_time": 0.4400902271270752} +{"epoch": 0, "iter": 16713, "iter_tflops": 17.20515923281299, "iter_time": 1.1991224975585937, "loss": 0.02682012878358364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.123494937177057, "step_time": 1.1383617553710939} +{"epoch": 0, "iter": 16714, "iter_tflops": 21.71637983141799, "iter_time": 0.9500245285034179, "loss": 0.025308147072792053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.418673850683867, "step_time": 0.752446804046631} +{"epoch": 0, "iter": 16715, "iter_tflops": 52.254704834651356, "iter_time": 0.3948179130554199, "loss": 0.022439097985625267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.371899128968856, "step_time": 0.35960276412963865} +{"epoch": 0, "iter": 16716, "iter_tflops": 53.50842162172957, "iter_time": 0.3855672225952148, "loss": 0.025502635166049004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.1558969508645, "step_time": 0.3547549705505371} +{"epoch": 0, "iter": 16717, "iter_tflops": 42.45782078838692, "iter_time": 0.48591974639892577, "loss": 0.06740665435791016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.2019106975984, "step_time": 0.44654199790954585} +{"epoch": 0, "iter": 16718, "iter_tflops": 46.39264903167668, "iter_time": 0.44470608901977543, "loss": 0.06596175581216812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.266250683559896, "step_time": 0.4024303169250488} +{"epoch": 0, "iter": 16719, "iter_tflops": 50.19232636776427, "iter_time": 0.41104079055786125, "loss": 0.14218910038471222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.377656513859286, "step_time": 0.3794038734436035} +{"epoch": 0, "iter": 16720, "iter_tflops": 53.87539093475212, "iter_time": 0.38294095230102543, "loss": 0.16681957244873047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.487630398762434, "step_time": 0.3527428512573242} +{"epoch": 0, "iter": 16721, "iter_tflops": 26.457700621638228, "iter_time": 0.7797765121459961, "loss": 0.1407097578048706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.79996178101308, "step_time": 0.7421266860961913} +{"epoch": 0, "iter": 16722, "iter_tflops": 14.159987305780101, "iter_time": 1.4569994354248048, "loss": 0.18527251482009888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.44921162510665, "step_time": 1.1823510398864747} +{"epoch": 0, "iter": 16723, "iter_tflops": 39.915617204794174, "iter_time": 0.5168677062988282, "loss": 0.12339629232883453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.078195436695175, "step_time": 0.46805667304992676} +{"epoch": 0, "iter": 16724, "iter_tflops": 43.72218575337322, "iter_time": 0.4718678436279297, "loss": 0.17675764858722687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.72223801164673, "step_time": 0.4323161354064941} +{"epoch": 0, "iter": 16725, "iter_tflops": 26.679758929783713, "iter_time": 0.7732863540649414, "loss": 0.11716385185718536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.672168048004025, "step_time": 0.6953011817932129} +{"epoch": 0, "iter": 16726, "iter_tflops": 36.74994623477099, "iter_time": 0.5613911209106446, "loss": 0.08673370629549026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.195925481403826, "step_time": 0.5132633037567139} +{"epoch": 0, "iter": 16727, "iter_tflops": 42.70860979703435, "iter_time": 0.4830663795471192, "loss": 0.15208709239959717, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82818878306083, "step_time": 0.44056996536254883} +{"epoch": 0, "iter": 16728, "iter_tflops": 42.338289961025, "iter_time": 0.48729161071777344, "loss": 0.09768802672624588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15567805503869, "step_time": 0.44698928451538084} +{"epoch": 0, "iter": 16729, "iter_tflops": 18.509229612503788, "iter_time": 1.1146381530761718, "loss": 0.6401363015174866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.853463231297894, "step_time": 1.0391684951782227} +{"epoch": 0, "iter": 16730, "iter_tflops": 15.436255900576956, "iter_time": 1.3365348205566407, "loss": 0.7164884805679321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.187387726662333, "step_time": 1.1343626594543457} +{"epoch": 0, "iter": 16731, "iter_tflops": 43.526340080288925, "iter_time": 0.4739910011291503, "loss": 0.5779261589050293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.846903162455, "step_time": 0.4403939666748047} +{"epoch": 0, "iter": 16732, "iter_tflops": 44.71448563718414, "iter_time": 0.4613961944580078, "loss": 0.6384440064430237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.214663988079586, "step_time": 0.4279008045196533} +{"epoch": 0, "iter": 16733, "iter_tflops": 46.16483415170971, "iter_time": 0.44690063095092775, "loss": 0.004055374767631292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.12344602361924, "step_time": 0.40355443763732907} +{"epoch": 0, "iter": 16734, "iter_tflops": 52.004688850716235, "iter_time": 0.3967160263061523, "loss": 0.0029586604796350002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.380409312658806, "step_time": 0.35954943084716795} +{"epoch": 0, "iter": 16735, "iter_tflops": 54.82659220693218, "iter_time": 0.3762972068786622, "loss": 0.00851358100771904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.278303207196444, "step_time": 0.3422640056610108} +{"epoch": 0, "iter": 16736, "iter_tflops": 40.7318157208231, "iter_time": 0.5065105285644531, "loss": 0.010897759348154068, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.2041830532859, "step_time": 0.4563978843688965} +{"epoch": 0, "iter": 16737, "iter_tflops": 32.68514961301126, "iter_time": 0.6312069473266603, "loss": 0.5494173169136047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.16493018903463, "step_time": 0.5704723720550537} +{"epoch": 0, "iter": 16738, "iter_tflops": 41.56070258827211, "iter_time": 0.4964086799621582, "loss": 0.6169764995574951, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.190893621110995, "step_time": 0.4565320987701416} +{"epoch": 0, "iter": 16739, "iter_tflops": 44.96041061744953, "iter_time": 0.45887244415283207, "loss": 0.6832804679870605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61122651041738, "step_time": 0.4244100589752198} +{"epoch": 0, "iter": 16740, "iter_tflops": 49.0520968919347, "iter_time": 0.4205955467224121, "loss": 0.7292499542236328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03318589874438, "step_time": 0.3890223293304443} +{"epoch": 0, "iter": 16741, "iter_tflops": 29.697228588191226, "iter_time": 0.6947144393920898, "loss": 0.18832522630691528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.466254179161563, "step_time": 0.6556577529907227} +{"epoch": 0, "iter": 16742, "iter_tflops": 9.873947789078956, "iter_time": 2.089447296142578, "loss": 0.18807701766490936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.631895956177383, "step_time": 1.6332539138793947} +{"epoch": 0, "iter": 16743, "iter_tflops": 13.046174652638944, "iter_time": 1.5813902587890625, "loss": 0.20846185088157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.818962464894703, "step_time": 1.2266567306518554} +{"epoch": 0, "iter": 16744, "iter_tflops": 19.993729912520113, "iter_time": 1.0318781738281249, "loss": 0.18402953445911407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.80458791501635, "step_time": 0.8317450618743896} +{"epoch": 0, "iter": 16745, "iter_tflops": 12.466934338190677, "iter_time": 1.2023369140625, "loss": 0.21150000393390656, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 13.387559124588838, "step_time": 1.1196555862426756} +{"epoch": 0, "iter": 16746, "iter_tflops": 15.508588291137627, "iter_time": 0.9665261001586914, "loss": 0.1506168246269226, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 18.926189755065533, "step_time": 0.7919954071044921} +{"epoch": 0, "iter": 16747, "iter_tflops": 25.777970429419298, "iter_time": 0.5814831466674805, "loss": 0.13969647884368896, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.43868973706948, "step_time": 0.5462890357971192} +{"epoch": 0, "iter": 16748, "iter_tflops": 26.13218394180189, "iter_time": 0.5736013259887696, "loss": 0.18852569162845612, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.86249457655642, "step_time": 0.5379796600341796} +{"epoch": 0, "iter": 16749, "iter_tflops": 21.018961601742372, "iter_time": 0.9815467529296875, "loss": 0.39195185899734497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.897557790321724, "step_time": 0.9421641311645508} +{"epoch": 0, "iter": 16750, "iter_tflops": 17.46950360632083, "iter_time": 1.1809776611328124, "loss": 0.3847251534461975, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.23502328452934, "step_time": 0.887930830001831} +{"epoch": 0, "iter": 16751, "iter_tflops": 38.60664417564141, "iter_time": 0.5343923034667969, "loss": 0.42917072772979736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.25817115620253, "step_time": 0.4882154846191406} +{"epoch": 0, "iter": 16752, "iter_tflops": 38.49468709914593, "iter_time": 0.5359465179443359, "loss": 0.41381898522377014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.992146571595164, "step_time": 0.4913083801269531} +{"epoch": 0, "iter": 16753, "iter_tflops": 22.285797480433477, "iter_time": 0.9257507400512694, "loss": 0.45433634519577026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.23041638039159, "step_time": 0.8514543533325195} +{"epoch": 0, "iter": 16754, "iter_tflops": 34.40831882945691, "iter_time": 0.5995960922241211, "loss": 0.32895779609680176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.257961828669316, "step_time": 0.5392627449035645} +{"epoch": 0, "iter": 16755, "iter_tflops": 47.34714224433043, "iter_time": 0.4357410507202148, "loss": 0.3928803503513336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.41959773022601, "step_time": 0.4012301616668701} +{"epoch": 0, "iter": 16756, "iter_tflops": 51.35342251399611, "iter_time": 0.40174719619750976, "loss": 0.3298929035663605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.67477071146896, "step_time": 0.37056449890136717} +{"epoch": 0, "iter": 16757, "iter_tflops": 31.13727399193833, "iter_time": 0.662585090637207, "loss": 0.03404207527637482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.03668911659269, "step_time": 0.6244903488159179} +{"epoch": 0, "iter": 16758, "iter_tflops": 17.525007369673023, "iter_time": 1.1772373657226562, "loss": 0.11692528426647186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.63162831685236, "step_time": 0.9537466716766358} +{"epoch": 0, "iter": 16759, "iter_tflops": 50.79796904133009, "iter_time": 0.40614012527465815, "loss": 0.041984010487794876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.43929260254944, "step_time": 0.37213846969604486} +{"epoch": 0, "iter": 16760, "iter_tflops": 48.72051760425104, "iter_time": 0.4234580116271973, "loss": 0.11047941446304321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70148368990344, "step_time": 0.3914708290100098} +{"epoch": 0, "iter": 16761, "iter_tflops": 23.204410835810176, "iter_time": 0.8891022338867187, "loss": 0.6877605319023132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.224236479329907, "step_time": 0.8516715698242188} +{"epoch": 0, "iter": 16762, "iter_tflops": 11.520198040016362, "iter_time": 1.790862747192383, "loss": 0.5774659514427185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.315880711988616, "step_time": 1.3470393180847167} +{"epoch": 0, "iter": 16763, "iter_tflops": 41.47631571497982, "iter_time": 0.49741866302490234, "loss": 0.844436764717102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.72934571120404, "step_time": 0.4612429084777832} +{"epoch": 0, "iter": 16764, "iter_tflops": 42.46780955108585, "iter_time": 0.4858054542541504, "loss": 0.7194323539733887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.7003964384326, "step_time": 0.45144233131408695} +{"epoch": 0, "iter": 16765, "iter_tflops": 21.29938539646074, "iter_time": 0.828902015686035, "loss": 0.10321205854415894, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 22.407964976867596, "step_time": 0.7878941040039062} +{"epoch": 0, "iter": 16766, "iter_tflops": 15.45698159764152, "iter_time": 1.142208999633789, "loss": 0.12565948069095612, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 18.921030801576492, "step_time": 0.9330941677093506} +{"epoch": 0, "iter": 16767, "iter_tflops": 44.575907493960415, "iter_time": 0.3960682907104492, "loss": 0.08585656434297562, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 48.576844165709005, "step_time": 0.3634469013214111} +{"epoch": 0, "iter": 16768, "iter_tflops": 44.78352190491917, "iter_time": 0.39423213577270505, "loss": 0.05995054170489311, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 48.763197055894665, "step_time": 0.3620579566955566} +{"epoch": 0, "iter": 16769, "iter_tflops": 19.44989998031098, "iter_time": 1.0607300567626952, "loss": 0.20734156668186188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24383881392196, "step_time": 1.0191295089721681} +{"epoch": 0, "iter": 16770, "iter_tflops": 16.230502833494604, "iter_time": 1.2711308898925782, "loss": 0.16154368221759796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.08849744226531, "step_time": 1.0270102863311767} +{"epoch": 0, "iter": 16771, "iter_tflops": 38.152133774999186, "iter_time": 0.5407585754394533, "loss": 0.12341892719268799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.84984816815993, "step_time": 0.4929789333343506} +{"epoch": 0, "iter": 16772, "iter_tflops": 44.016992826816605, "iter_time": 0.4687074737548828, "loss": 0.22157394886016846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.19031656754772, "step_time": 0.42811699485778815} +{"epoch": 0, "iter": 16773, "iter_tflops": 24.22825562873477, "iter_time": 0.851530288696289, "loss": 0.25683146715164185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.80974887019511, "step_time": 0.7993527412414551} +{"epoch": 0, "iter": 16774, "iter_tflops": 14.460733453068714, "iter_time": 1.4266975860595703, "loss": 0.26554134488105774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.782224472915214, "step_time": 1.2293420066833496} +{"epoch": 0, "iter": 16775, "iter_tflops": 44.391398590445604, "iter_time": 0.4647543029785156, "loss": 0.2537255883216858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75184660218682, "step_time": 0.4231858882904053} +{"epoch": 0, "iter": 16776, "iter_tflops": 49.62906540210509, "iter_time": 0.4157058639526367, "loss": 0.2808856964111328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.82733138859528, "step_time": 0.38328285980224613} +{"epoch": 0, "iter": 16777, "iter_tflops": 24.583874061018356, "iter_time": 0.8392124633789062, "loss": 0.4824260175228119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.858507914348518, "step_time": 0.7978454742431642} +{"epoch": 0, "iter": 16778, "iter_tflops": 43.98844857863583, "iter_time": 0.4690116195678711, "loss": 0.46184733510017395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.475476013663545, "step_time": 0.43456317329406735} +{"epoch": 0, "iter": 16779, "iter_tflops": 49.75609515150515, "iter_time": 0.414644546508789, "loss": 0.5381037592887878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.83494139480985, "step_time": 0.38322867965698243} +{"epoch": 0, "iter": 16780, "iter_tflops": 48.89330891140628, "iter_time": 0.4219614906311035, "loss": 0.4896482527256012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.89030668590906, "step_time": 0.3900732440948486} +{"epoch": 0, "iter": 16781, "iter_tflops": 1.5763805083160953, "iter_time": 0.9194824676513672, "loss": 1.376279592514038, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.647817271458947, "step_time": 0.8796207351684571} +{"epoch": 0, "iter": 16782, "iter_tflops": 0.9512613737395252, "iter_time": 1.5237181701660159, "loss": 1.71748948097229, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.2640127147709321, "step_time": 1.1467085914611816} +{"epoch": 0, "iter": 16783, "iter_tflops": 2.8876374799829816, "iter_time": 0.5019515953063964, "loss": 1.5587080717086792, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.1613729865007527, "step_time": 0.45848884201049805} +{"epoch": 0, "iter": 16784, "iter_tflops": 2.879578382485218, "iter_time": 0.5033564109802247, "loss": 1.62981116771698, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.1571383032297864, "step_time": 0.4591038150787353} +{"epoch": 0, "iter": 16785, "iter_tflops": 31.198791179645514, "iter_time": 0.6612786178588866, "loss": 0.48829129338264465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.46904449807987, "step_time": 0.5985397567749023} +{"epoch": 0, "iter": 16786, "iter_tflops": 36.06503574819028, "iter_time": 0.572052490234375, "loss": 0.5776073336601257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.403173108568545, "step_time": 0.5235896472930909} +{"epoch": 0, "iter": 16787, "iter_tflops": 38.96749080803913, "iter_time": 0.5294437255859374, "loss": 0.6529361605644226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53991724295368, "step_time": 0.484981985092163} +{"epoch": 0, "iter": 16788, "iter_tflops": 39.53547387175933, "iter_time": 0.5218375167846679, "loss": 0.5227707028388977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94650901857863, "step_time": 0.48039046669006347} +{"epoch": 0, "iter": 16789, "iter_tflops": 27.981696331783795, "iter_time": 0.7373067474365235, "loss": 0.2745409309864044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.9070964919629, "step_time": 0.6675196266174317} +{"epoch": 0, "iter": 16790, "iter_tflops": 38.81658296631056, "iter_time": 0.531502052307129, "loss": 0.29221922159194946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52545304974203, "step_time": 0.48514694213867193} +{"epoch": 0, "iter": 16791, "iter_tflops": 39.39154427615738, "iter_time": 0.5237442169189452, "loss": 0.2594703435897827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04565317482334, "step_time": 0.47928401565551754} +{"epoch": 0, "iter": 16792, "iter_tflops": 36.98016887378316, "iter_time": 0.557896141052246, "loss": 0.3069077432155609, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.065885824498686, "step_time": 0.5149291744232177} +{"epoch": 0, "iter": 16793, "iter_tflops": 14.062212823519538, "iter_time": 1.4671299438476562, "loss": 0.5823739171028137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.006711244314468, "step_time": 1.3747911300659181} +{"epoch": 0, "iter": 16794, "iter_tflops": 17.36724591271721, "iter_time": 1.1879312133789064, "loss": 0.5860048532485962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.570472982901418, "step_time": 1.0029469680786134} +{"epoch": 0, "iter": 16795, "iter_tflops": 38.84627094755026, "iter_time": 0.5310958557128906, "loss": 0.6802548766136169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26921712807287, "step_time": 0.4880879020690918} +{"epoch": 0, "iter": 16796, "iter_tflops": 37.67070208344782, "iter_time": 0.5476694717407227, "loss": 0.5626577734947205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.944992654811486, "step_time": 0.503873420715332} +{"epoch": 0, "iter": 16797, "iter_tflops": 32.24988031406911, "iter_time": 0.639726203918457, "loss": 0.3126820921897888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.551383533872425, "step_time": 0.5803175983428955} +{"epoch": 0, "iter": 16798, "iter_tflops": 36.96296184877441, "iter_time": 0.5581558532714844, "loss": 0.20810769498348236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.26727920705239, "step_time": 0.5123537998199463} +{"epoch": 0, "iter": 16799, "iter_tflops": 38.62601677752848, "iter_time": 0.534124282836914, "loss": 0.33681511878967285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29388621464202, "step_time": 0.48780321121215825} +{"epoch": 0, "iter": 16800, "iter_tflops": 45.32592343612817, "iter_time": 0.45517205047607423, "loss": 0.30485740303993225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6674403219671, "step_time": 0.41538467407226565} +{"epoch": 0, "iter": 16801, "iter_tflops": 18.200878281928194, "iter_time": 1.1335218658447266, "loss": 0.32831087708473206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.540131413124158, "step_time": 1.055831871032715} +{"epoch": 0, "iter": 16802, "iter_tflops": 17.688223896235844, "iter_time": 1.16637451171875, "loss": 0.3377693295478821, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.381088380058387, "step_time": 0.9649225120544435} +{"epoch": 0, "iter": 16803, "iter_tflops": 47.49401970625506, "iter_time": 0.43439350128173826, "loss": 0.3075850009918213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.547174194459714, "step_time": 0.40023713874816896} +{"epoch": 0, "iter": 16804, "iter_tflops": 47.224414556389306, "iter_time": 0.4368734626770019, "loss": 0.294881135225296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.126455199869746, "step_time": 0.4035306854248047} +{"epoch": 0, "iter": 16805, "iter_tflops": 32.06804858676014, "iter_time": 0.6433535690307617, "loss": 0.1520709991455078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.13379847533491, "step_time": 0.6044183311462402} +{"epoch": 0, "iter": 16806, "iter_tflops": 16.17636196353619, "iter_time": 1.27538525390625, "loss": 0.060133762657642365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.69648663859615, "step_time": 1.0474504356384278} +{"epoch": 0, "iter": 16807, "iter_tflops": 38.3191444653617, "iter_time": 0.5384017257690429, "loss": 0.05606066808104515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.368492219061096, "step_time": 0.4869442462921142} +{"epoch": 0, "iter": 16808, "iter_tflops": 45.877818177341645, "iter_time": 0.4496964836120605, "loss": 0.041953202337026596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.43187970795772, "step_time": 0.4090883312225342} +{"epoch": 0, "iter": 16809, "iter_tflops": 28.8856971651847, "iter_time": 0.7142321472167968, "loss": 0.03384513035416603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.843341467725807, "step_time": 0.6478934860229492} +{"epoch": 0, "iter": 16810, "iter_tflops": 47.71180304879402, "iter_time": 0.43241068649291986, "loss": 0.054412513971328735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.34075132403953, "step_time": 0.3867792072296143} +{"epoch": 0, "iter": 16811, "iter_tflops": 53.16924339300999, "iter_time": 0.3880268402099609, "loss": 0.02990749478340149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.16761530303583, "step_time": 0.3546835021972656} +{"epoch": 0, "iter": 16812, "iter_tflops": 52.00294669715753, "iter_time": 0.39672931671142586, "loss": 0.02554965205490589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.48275729634861, "step_time": 0.3652635688781739} +{"epoch": 0, "iter": 16813, "iter_tflops": 39.06799701611982, "iter_time": 0.5280816802978516, "loss": 0.22103098034858704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.79697658150992, "step_time": 0.4820689487457276} +{"epoch": 0, "iter": 16814, "iter_tflops": 45.273923578093516, "iter_time": 0.45569484329223636, "loss": 0.40600037574768066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.86291411440254, "step_time": 0.42222396850585936} +{"epoch": 0, "iter": 16815, "iter_tflops": 45.76191849377927, "iter_time": 0.45083541488647455, "loss": 0.3413366675376892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50171142373547, "step_time": 0.4167753582000732} +{"epoch": 0, "iter": 16816, "iter_tflops": 45.498927133169296, "iter_time": 0.4534413185119629, "loss": 0.28237345814704895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03801570685045, "step_time": 0.4207163200378418} +{"epoch": 0, "iter": 16817, "iter_tflops": 40.80271505253133, "iter_time": 0.5056304092407226, "loss": 0.20459075272083282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48155573667869, "step_time": 0.4638123188018799} +{"epoch": 0, "iter": 16818, "iter_tflops": 9.816437754388712, "iter_time": 2.101688415527344, "loss": 0.17746128141880035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.332189544563212, "step_time": 1.8205743408203126} +{"epoch": 0, "iter": 16819, "iter_tflops": 13.511099904181723, "iter_time": 1.526973648071289, "loss": 0.26060017943382263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.01924870247314, "step_time": 1.2878939514160155} +{"epoch": 0, "iter": 16820, "iter_tflops": 35.83703772634861, "iter_time": 0.5756919326782226, "loss": 0.19651563465595245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17078199142257, "step_time": 0.4567353630065918} +{"epoch": 0, "iter": 16821, "iter_tflops": 14.18227207584191, "iter_time": 1.1030507049560547, "loss": 0.22291621565818787, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 14.964775077107635, "step_time": 1.04537255859375} +{"epoch": 0, "iter": 16822, "iter_tflops": 5.99768977995757, "iter_time": 2.608298492431641, "loss": 0.3295916020870209, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 7.0065020518470345, "step_time": 2.2327496795654294} +{"epoch": 0, "iter": 16823, "iter_tflops": 10.256668232537283, "iter_time": 1.525228744506836, "loss": 0.2064952254295349, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 12.963480468379093, "step_time": 1.2067565689086914} +{"epoch": 0, "iter": 16824, "iter_tflops": 21.70575509034087, "iter_time": 0.7207196960449219, "loss": 0.36333581805229187, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 23.336162488650825, "step_time": 0.6703657989501953} +{"epoch": 0, "iter": 16825, "iter_tflops": 18.54779830351719, "iter_time": 0.746574851989746, "loss": 0.24818119406700134, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 20.22379002794499, "step_time": 0.6847044868469238} +{"epoch": 0, "iter": 16826, "iter_tflops": 20.757255169546323, "iter_time": 0.6671074600219726, "loss": 0.35287782549858093, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.35248678302504, "step_time": 0.6194979515075684} +{"epoch": 0, "iter": 16827, "iter_tflops": 21.01782087471035, "iter_time": 0.6588370819091797, "loss": 0.17250043153762817, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.661556471768403, "step_time": 0.6110489273071289} +{"epoch": 0, "iter": 16828, "iter_tflops": 22.139401665286552, "iter_time": 0.6254604339599609, "loss": 0.21913284063339233, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 23.755045133879896, "step_time": 0.5829212150573732} +{"epoch": 0, "iter": 16829, "iter_tflops": 15.283348514024015, "iter_time": 1.3499066314697266, "loss": 0.19091135263442993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.129462070652878, "step_time": 1.2790937118530274} +{"epoch": 0, "iter": 16830, "iter_tflops": 19.042978921769212, "iter_time": 1.0833963317871094, "loss": 0.2716951370239258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.034513770905345, "step_time": 0.7924516544342041} +{"epoch": 0, "iter": 16831, "iter_tflops": 47.982432514608, "iter_time": 0.42997181320190425, "loss": 0.24566598236560822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.18608687598567, "step_time": 0.3953370475769043} +{"epoch": 0, "iter": 16832, "iter_tflops": 49.04368145541779, "iter_time": 0.4206677169799805, "loss": 0.2353094071149826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.10510701102441, "step_time": 0.3884954700469971} +{"epoch": 0, "iter": 16833, "iter_tflops": 19.265857235819187, "iter_time": 1.0708629913330079, "loss": 0.3956306278705597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.0823238477815, "step_time": 1.0273260040283203} +{"epoch": 0, "iter": 16834, "iter_tflops": 13.666351290459428, "iter_time": 1.5096270446777345, "loss": 0.511523962020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.720089112855106, "step_time": 1.233910499572754} +{"epoch": 0, "iter": 16835, "iter_tflops": 39.74233208048371, "iter_time": 0.5191213607788087, "loss": 0.31675174832344055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.57153523217106, "step_time": 0.4734993476867676} +{"epoch": 0, "iter": 16836, "iter_tflops": 44.61766396712524, "iter_time": 0.4623974380493164, "loss": 0.361856609582901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.775066230932154, "step_time": 0.4229844284057617} +{"epoch": 0, "iter": 16837, "iter_tflops": 29.132713231099267, "iter_time": 0.7081761779785155, "loss": 0.07575301826000214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.47932691860865, "step_time": 0.655385471343994} +{"epoch": 0, "iter": 16838, "iter_tflops": 10.813588696073763, "iter_time": 1.9078859100341796, "loss": 0.08331497758626938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.458975166239444, "step_time": 1.6559221954345702} +{"epoch": 0, "iter": 16839, "iter_tflops": 16.101713800387, "iter_time": 1.2812979888916016, "loss": 0.10712520778179169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.966430436111, "step_time": 1.1483134384155276} +{"epoch": 0, "iter": 16840, "iter_tflops": 28.24385757539516, "iter_time": 0.7304630203247071, "loss": 0.16277647018432617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.070693899787884, "step_time": 0.6433005027770996} +{"epoch": 0, "iter": 16841, "iter_tflops": 16.64400937079261, "iter_time": 0.9940762023925781, "loss": 0.22337545454502106, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 17.74413738072644, "step_time": 0.9324439544677734} +{"epoch": 0, "iter": 16842, "iter_tflops": 8.730760538348449, "iter_time": 1.8950712890625, "loss": 0.3013942539691925, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 10.286931320558315, "step_time": 1.608391571044922} +{"epoch": 0, "iter": 16843, "iter_tflops": 12.862542313817242, "iter_time": 1.2863253021240235, "loss": 0.23023612797260284, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 14.88563142623553, "step_time": 1.1115023040771483} +{"epoch": 0, "iter": 16844, "iter_tflops": 26.42718878092362, "iter_time": 0.6260754318237305, "loss": 0.1389552801847458, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 28.31221570577137, "step_time": 0.5843913383483887} +{"epoch": 0, "iter": 16845, "iter_tflops": 17.18196088694252, "iter_time": 0.8747729339599608, "loss": 0.17604146897792816, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 18.05128747441296, "step_time": 0.8326450042724609} +{"epoch": 0, "iter": 16846, "iter_tflops": 9.060676838519042, "iter_time": 1.6588511657714844, "loss": 0.19916154444217682, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 11.404245867397739, "step_time": 1.3179577598571777} +{"epoch": 0, "iter": 16847, "iter_tflops": 27.50958327882878, "iter_time": 0.5463664855957031, "loss": 0.19510851800441742, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.334277434930854, "step_time": 0.5123805885314942} +{"epoch": 0, "iter": 16848, "iter_tflops": 26.933874462803335, "iter_time": 0.5580450134277344, "loss": 0.24297034740447998, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.687631393809227, "step_time": 0.5239301261901856} +{"epoch": 0, "iter": 16849, "iter_tflops": 31.858714709918218, "iter_time": 0.6475808486938477, "loss": 0.3840220868587494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.92122625766051, "step_time": 0.6082060050964355} +{"epoch": 0, "iter": 16850, "iter_tflops": 13.59033600757892, "iter_time": 1.5180708923339843, "loss": 0.3160303235054016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.23361187613215, "step_time": 1.1971427497863771} +{"epoch": 0, "iter": 16851, "iter_tflops": 38.24552782790901, "iter_time": 0.5394380645751953, "loss": 0.4610706865787506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.887908100621175, "step_time": 0.492531005859375} +{"epoch": 0, "iter": 16852, "iter_tflops": 40.71262926892516, "iter_time": 0.5067492294311524, "loss": 0.382052481174469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.17880374678306, "step_time": 0.46699076843261716} +{"epoch": 0, "iter": 16853, "iter_tflops": 15.47570332149421, "iter_time": 1.3331280059814454, "loss": 0.6407806277275085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.577865681530014, "step_time": 1.2444963607788087} +{"epoch": 0, "iter": 16854, "iter_tflops": 16.51138846217932, "iter_time": 1.2495068817138673, "loss": 0.6126849055290222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.20136145422139, "step_time": 1.0212724304199219} +{"epoch": 0, "iter": 16855, "iter_tflops": 38.899980146373515, "iter_time": 0.5303625717163087, "loss": 0.6688714027404785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.201276831474296, "step_time": 0.4888736801147461} +{"epoch": 0, "iter": 16856, "iter_tflops": 39.13276248719661, "iter_time": 0.5272076950073242, "loss": 0.48318976163864136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.61278643139105, "step_time": 0.4841526508331299} +{"epoch": 0, "iter": 16857, "iter_tflops": 16.132038429997085, "iter_time": 1.278889434814453, "loss": 0.49445873498916626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.079156331872436, "step_time": 1.2079691238403323} +{"epoch": 0, "iter": 16858, "iter_tflops": 21.884791770467416, "iter_time": 0.9427137222290038, "loss": 0.6055393815040588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.7447624214834, "step_time": 0.743603178024292} +{"epoch": 0, "iter": 16859, "iter_tflops": 37.94185550604752, "iter_time": 0.5437555236816407, "loss": 0.5627973079681396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.030672004451866, "step_time": 0.5028212432861328} +{"epoch": 0, "iter": 16860, "iter_tflops": 36.281803613447465, "iter_time": 0.5686347274780273, "loss": 0.5718582272529602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.319795654648296, "step_time": 0.5246999168395996} +{"epoch": 0, "iter": 16861, "iter_tflops": 21.088894946091756, "iter_time": 0.9782918243408203, "loss": 0.3692605793476105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.934304144047438, "step_time": 0.8995735549926758} +{"epoch": 0, "iter": 16862, "iter_tflops": 18.729455869214863, "iter_time": 1.101531921386719, "loss": 0.4040786027908325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.489996275856143, "step_time": 0.8424294261932372} +{"epoch": 0, "iter": 16863, "iter_tflops": 43.64110714835126, "iter_time": 0.4727445030212402, "loss": 0.3690332770347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08272331317274, "step_time": 0.4381881942749023} +{"epoch": 0, "iter": 16864, "iter_tflops": 47.79541201861047, "iter_time": 0.43165426635742193, "loss": 0.47447091341018677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.67607037404525, "step_time": 0.3992388229370117} +{"epoch": 0, "iter": 16865, "iter_tflops": 29.794295229282188, "iter_time": 0.6924511337280274, "loss": 0.304103821516037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.695479087633487, "step_time": 0.6509159698486328} +{"epoch": 0, "iter": 16866, "iter_tflops": 6.587477479680398, "iter_time": 3.1318655090332026, "loss": 0.31472456455230713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.8961787957005605, "step_time": 2.6127946243286133} +{"epoch": 0, "iter": 16867, "iter_tflops": 14.462666963598865, "iter_time": 1.4265068511962888, "loss": 0.37392458319664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.528011401223544, "step_time": 1.248250198364258} +{"epoch": 0, "iter": 16868, "iter_tflops": 44.59218549606201, "iter_time": 0.4626616363525391, "loss": 0.3490280210971832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.4429549074238, "step_time": 0.4010479869842529} +{"epoch": 0, "iter": 16869, "iter_tflops": 19.436796157637747, "iter_time": 0.7753941497802735, "loss": 0.26914793252944946, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 20.578234229405528, "step_time": 0.7323844146728516} +{"epoch": 0, "iter": 16870, "iter_tflops": 13.366139108113378, "iter_time": 1.1275640563964844, "loss": 0.18898999691009521, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 15.484142990609838, "step_time": 0.9733298149108887} +{"epoch": 0, "iter": 16871, "iter_tflops": 26.70031994130316, "iter_time": 0.5644568328857422, "loss": 0.13752534985542297, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.470316437793638, "step_time": 0.5293646125793458} +{"epoch": 0, "iter": 16872, "iter_tflops": 26.202641255693756, "iter_time": 0.5751778182983399, "loss": 0.20949864387512207, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 27.976567165053567, "step_time": 0.5387071952819824} +{"epoch": 0, "iter": 16873, "iter_tflops": 18.378792748787852, "iter_time": 0.813361427307129, "loss": 0.021134987473487854, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 19.32725089893351, "step_time": 0.7734468383789064} +{"epoch": 0, "iter": 16874, "iter_tflops": 11.836962909292435, "iter_time": 1.2628747100830078, "loss": 0.02912798523902893, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 15.002529835778963, "step_time": 0.9964053573608398} +{"epoch": 0, "iter": 16875, "iter_tflops": 30.811161023351907, "iter_time": 0.48516838073730467, "loss": 0.02540113776922226, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 34.106451213465036, "step_time": 0.43829248046874997} +{"epoch": 0, "iter": 16876, "iter_tflops": 33.4551345252645, "iter_time": 0.44682531738281256, "loss": 0.015746796503663063, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 36.95450430836683, "step_time": 0.4045136413574218} +{"epoch": 0, "iter": 16877, "iter_tflops": 20.61289634485349, "iter_time": 1.0008828048706053, "loss": 0.6527082920074463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.972208936437475, "step_time": 0.9389631042480469} +{"epoch": 0, "iter": 16878, "iter_tflops": 21.13983993828776, "iter_time": 0.9759342346191408, "loss": 0.35306650400161743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.094487589044043, "step_time": 0.7906303367614745} +{"epoch": 0, "iter": 16879, "iter_tflops": 45.941794000124375, "iter_time": 0.44907026290893554, "loss": 0.48010924458503723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.75713472839862, "step_time": 0.41463588333129886} +{"epoch": 0, "iter": 16880, "iter_tflops": 43.53620435650755, "iter_time": 0.4738836059570312, "loss": 0.5272915959358215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35253551400903, "step_time": 0.43569142150878903} +{"epoch": 0, "iter": 16881, "iter_tflops": 33.313373854197394, "iter_time": 0.6193036346435546, "loss": 0.14580179750919342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.51718878931308, "step_time": 0.5808763084411621} +{"epoch": 0, "iter": 16882, "iter_tflops": 10.688166667630794, "iter_time": 1.9302743072509765, "loss": 0.11278317868709564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.336804555304928, "step_time": 1.5469292831420898} +{"epoch": 0, "iter": 16883, "iter_tflops": 10.56055391010656, "iter_time": 1.9535995635986327, "loss": 0.17731241881847382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.055240694675893, "step_time": 1.7113796424865724} +{"epoch": 0, "iter": 16884, "iter_tflops": 34.95997871395262, "iter_time": 0.5901346130371093, "loss": 0.15059418976306915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.84749359283859, "step_time": 0.4499939231872558} +{"epoch": 0, "iter": 16885, "iter_tflops": 22.09515306596883, "iter_time": 0.7098706054687499, "loss": 0.16292843222618103, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 23.48759318591463, "step_time": 0.6677865867614746} +{"epoch": 0, "iter": 16886, "iter_tflops": 12.59228373903067, "iter_time": 1.2455802307128905, "loss": 0.14986377954483032, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 13.883164185750838, "step_time": 1.1297640419006347} +{"epoch": 0, "iter": 16887, "iter_tflops": 27.812121794056974, "iter_time": 0.5639519271850586, "loss": 0.08754965662956238, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.667534019418756, "step_time": 0.5286822853088379} +{"epoch": 0, "iter": 16888, "iter_tflops": 26.2887226418254, "iter_time": 0.5966322479248047, "loss": 0.26639649271965027, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 27.971142059845846, "step_time": 0.5607457733154297} +{"epoch": 0, "iter": 16889, "iter_tflops": 34.67609914778864, "iter_time": 0.5949658126831054, "loss": 0.004252477549016476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.22671326012457, "step_time": 0.5542013168334962} +{"epoch": 0, "iter": 16890, "iter_tflops": 10.266811827329205, "iter_time": 2.0094936828613283, "loss": 0.007648559752851725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.23197806518092, "step_time": 1.5591843795776366} +{"epoch": 0, "iter": 16891, "iter_tflops": 14.541392411494822, "iter_time": 1.418783905029297, "loss": 0.009680365212261677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.853946644879954, "step_time": 1.2241105270385741} +{"epoch": 0, "iter": 16892, "iter_tflops": 19.241444782351188, "iter_time": 1.0722216415405275, "loss": 0.0056301262229681015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.711800462974416, "step_time": 0.8023978538513185} +{"epoch": 0, "iter": 16893, "iter_tflops": 21.58058945285888, "iter_time": 0.7248998107910156, "loss": 0.12662477791309357, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 22.945210393446455, "step_time": 0.6817878303527831} +{"epoch": 0, "iter": 16894, "iter_tflops": 9.90179741158784, "iter_time": 1.5798914642333985, "loss": 0.18286506831645966, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 11.324541139543992, "step_time": 1.3814038925170897} +{"epoch": 0, "iter": 16895, "iter_tflops": 11.193726036418976, "iter_time": 1.3975476226806638, "loss": 0.30532190203666687, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 13.724625880386865, "step_time": 1.1398318138122558} +{"epoch": 0, "iter": 16896, "iter_tflops": 22.0310410727191, "iter_time": 0.7100783462524414, "loss": 0.16719689965248108, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 23.997807650908342, "step_time": 0.6518830986022949} +{"epoch": 0, "iter": 16897, "iter_tflops": 23.575597872563275, "iter_time": 0.6323382186889649, "loss": 0.2668383717536926, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 25.31533334033124, "step_time": 0.588882293701172} +{"epoch": 0, "iter": 16898, "iter_tflops": 26.291642357075926, "iter_time": 0.5670148468017577, "loss": 0.23639759421348572, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 28.06165060593716, "step_time": 0.5312499885559082} +{"epoch": 0, "iter": 16899, "iter_tflops": 24.82040401447234, "iter_time": 0.6006248550415039, "loss": 0.2295272946357727, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 26.53411050250808, "step_time": 0.5618334770202636} +{"epoch": 0, "iter": 16900, "iter_tflops": 24.504745324700963, "iter_time": 0.6083618240356445, "loss": 0.18719199299812317, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 26.038544087383887, "step_time": 0.572526309967041} +{"epoch": 0, "iter": 16901, "iter_tflops": 26.218929432834507, "iter_time": 0.786877799987793, "loss": 0.4818539321422577, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.69123857271302, "step_time": 0.7450404739379882} +{"epoch": 0, "iter": 16902, "iter_tflops": 11.541856898633373, "iter_time": 1.7875021057128906, "loss": 0.6387370228767395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.40375918963167, "step_time": 1.5392020416259764} +{"epoch": 0, "iter": 16903, "iter_tflops": 14.508796958027183, "iter_time": 1.4219713439941404, "loss": 0.748831033706665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.251809660741387, "step_time": 1.269464382171631} +{"epoch": 0, "iter": 16904, "iter_tflops": 21.718785052759504, "iter_time": 0.9499193191528319, "loss": 0.5629379749298096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.925347955818506, "step_time": 0.7957884902954102} +{"epoch": 0, "iter": 16905, "iter_tflops": 14.633874399220714, "iter_time": 0.8795545501708985, "loss": 0.22716020047664642, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 15.36655150005734, "step_time": 0.8376173934936523} +{"epoch": 0, "iter": 16906, "iter_tflops": 5.013282301339444, "iter_time": 2.5674378662109376, "loss": 0.2929041385650635, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 6.071721113436299, "step_time": 2.1198751678466796} +{"epoch": 0, "iter": 16907, "iter_tflops": 7.586515093290644, "iter_time": 1.6966012268066406, "loss": 0.3037652373313904, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 8.764304049166718, "step_time": 1.4686038665771486} +{"epoch": 0, "iter": 16908, "iter_tflops": 18.564413775342796, "iter_time": 0.6933313903808594, "loss": 0.26985812187194824, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 19.955928804133993, "step_time": 0.6449858055114747} +{"epoch": 0, "iter": 16909, "iter_tflops": 15.739488686355559, "iter_time": 0.9913178100585938, "loss": 0.2583286464214325, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 16.713814454705037, "step_time": 0.933529296875} +{"epoch": 0, "iter": 16910, "iter_tflops": 6.62699389509076, "iter_time": 2.354436370849609, "loss": 0.2345024198293686, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 8.739665030068277, "step_time": 1.7852898712158205} +{"epoch": 0, "iter": 16911, "iter_tflops": 9.875104261483186, "iter_time": 1.580017288208008, "loss": 0.2867037355899811, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 12.065129189115511, "step_time": 1.2932174377441408} +{"epoch": 0, "iter": 16912, "iter_tflops": 27.517109013690767, "iter_time": 0.567023063659668, "loss": 0.09328802675008774, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.422496399994806, "step_time": 0.5303029098510743} +{"epoch": 0, "iter": 16913, "iter_tflops": 22.219692239984195, "iter_time": 0.6323670501708984, "loss": 0.16281820833683014, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 24.118192228608475, "step_time": 0.5825893211364745} +{"epoch": 0, "iter": 16914, "iter_tflops": 24.666421865869925, "iter_time": 0.5696408386230469, "loss": 0.201707124710083, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 26.352478908038503, "step_time": 0.5331946678161621} +{"epoch": 0, "iter": 16915, "iter_tflops": 26.617642832596566, "iter_time": 0.5278830032348633, "loss": 0.2176959365606308, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 28.309228394017932, "step_time": 0.4963399581909179} +{"epoch": 0, "iter": 16916, "iter_tflops": 25.80321248095925, "iter_time": 0.544544647216797, "loss": 0.17618413269519806, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 27.329866400360192, "step_time": 0.5141262321472169} +{"epoch": 0, "iter": 16917, "iter_tflops": 32.39115913826743, "iter_time": 0.6369359436035157, "loss": 0.5386806726455688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.62446217033542, "step_time": 0.5958531112670897} +{"epoch": 0, "iter": 16918, "iter_tflops": 35.70556699729519, "iter_time": 0.5778116760253906, "loss": 0.5770529508590698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.01791893345002, "step_time": 0.51554638671875} +{"epoch": 0, "iter": 16919, "iter_tflops": 40.023472871259095, "iter_time": 0.5154748458862305, "loss": 0.8080275058746338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.683566984955704, "step_time": 0.4722850017547607} +{"epoch": 0, "iter": 16920, "iter_tflops": 41.25306219744844, "iter_time": 0.500110595703125, "loss": 0.7378191947937012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.841867173126644, "step_time": 0.46008551406860354} +{"epoch": 0, "iter": 16921, "iter_tflops": 32.0667818966791, "iter_time": 0.6433789825439453, "loss": 0.19152866303920746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.51829581135468, "step_time": 0.5808582038879395} +{"epoch": 0, "iter": 16922, "iter_tflops": 38.29193237834264, "iter_time": 0.5387843399047851, "loss": 0.14118485152721405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.87586077600685, "step_time": 0.49267270278930664} +{"epoch": 0, "iter": 16923, "iter_tflops": 37.18519234144943, "iter_time": 0.5548201370239259, "loss": 0.15530674159526825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87581356318283, "step_time": 0.5047261867523194} +{"epoch": 0, "iter": 16924, "iter_tflops": 38.8053939397682, "iter_time": 0.5316553039550781, "loss": 0.15109486877918243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38266332256037, "step_time": 0.4867814311981201} +{"epoch": 0, "iter": 16925, "iter_tflops": 27.87255211808911, "iter_time": 0.7401939163208008, "loss": 0.07872536778450012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.152658092269036, "step_time": 0.6842213859558106} +{"epoch": 0, "iter": 16926, "iter_tflops": 8.16974368136291, "iter_time": 2.525304870605469, "loss": 0.07841246575117111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.85773314693977, "step_time": 2.092884155273438} +{"epoch": 0, "iter": 16927, "iter_tflops": 13.152190161346091, "iter_time": 1.5686431884765626, "loss": 0.12644724547863007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.36122927663022, "step_time": 1.260974536895752} +{"epoch": 0, "iter": 16928, "iter_tflops": 47.46308736938042, "iter_time": 0.43467660140991216, "loss": 0.14893312752246857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.82672033919476, "step_time": 0.3905427665710449} +{"epoch": 0, "iter": 16929, "iter_tflops": 25.342121191415504, "iter_time": 0.6642214584350586, "loss": 0.2555576264858246, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 27.010520632293026, "step_time": 0.6231934928894043} +{"epoch": 0, "iter": 16930, "iter_tflops": 13.11452492151445, "iter_time": 1.283521957397461, "loss": 0.3255470097064972, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 15.652873690043617, "step_time": 1.0753795776367188} +{"epoch": 0, "iter": 16931, "iter_tflops": 27.318854504285767, "iter_time": 0.6161598281860352, "loss": 0.0913669690489769, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 29.221703284285795, "step_time": 0.5760369453430175} +{"epoch": 0, "iter": 16932, "iter_tflops": 24.841618455591934, "iter_time": 0.6776040267944335, "loss": 0.2591063976287842, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 26.670271266352884, "step_time": 0.6311439628601073} +{"epoch": 0, "iter": 16933, "iter_tflops": 32.14362221556428, "iter_time": 0.641840965270996, "loss": 0.30360257625579834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.50097262475509, "step_time": 0.5811416416168214} +{"epoch": 0, "iter": 16934, "iter_tflops": 37.27755711015468, "iter_time": 0.5534454269409179, "loss": 0.3843730390071869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.893736241361594, "step_time": 0.5045049781799316} +{"epoch": 0, "iter": 16935, "iter_tflops": 40.90517706340188, "iter_time": 0.5043638725280761, "loss": 0.37271326780319214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.987741390377856, "step_time": 0.4585936717987061} +{"epoch": 0, "iter": 16936, "iter_tflops": 38.58361536997698, "iter_time": 0.5347112579345702, "loss": 0.31010106205940247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.272586068032695, "step_time": 0.4880490036010742} +{"epoch": 0, "iter": 16937, "iter_tflops": 19.203612752853704, "iter_time": 1.074333969116211, "loss": 0.44546276330947876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.57728442630371, "step_time": 1.002614974975586} +{"epoch": 0, "iter": 16938, "iter_tflops": 24.91519920911411, "iter_time": 0.8280525207519531, "loss": 0.45933157205581665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.24178936371244, "step_time": 0.7305165138244629} +{"epoch": 0, "iter": 16939, "iter_tflops": 44.35064293975869, "iter_time": 0.4651813850402832, "loss": 0.5922983884811401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47409223282607, "step_time": 0.4345758399963379} +{"epoch": 0, "iter": 16940, "iter_tflops": 45.773644272541475, "iter_time": 0.4507199249267578, "loss": 0.5937588214874268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.324964831673825, "step_time": 0.4182687931060791} +{"epoch": 0, "iter": 16941, "iter_tflops": 29.942993486735766, "iter_time": 0.6890123901367187, "loss": 0.49665242433547974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.60264715260375, "step_time": 0.6528280181884766} +{"epoch": 0, "iter": 16942, "iter_tflops": 7.438774497072886, "iter_time": 2.7734532775878904, "loss": 0.5468078255653381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.138506435888928, "step_time": 2.2576001510620114} +{"epoch": 0, "iter": 16943, "iter_tflops": 12.885839451507906, "iter_time": 1.601067092895508, "loss": 0.289320170879364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.385070907689926, "step_time": 1.4342017250061037} +{"epoch": 0, "iter": 16944, "iter_tflops": 37.284644879035945, "iter_time": 0.553340217590332, "loss": 0.400716096162796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.787860105753154, "step_time": 0.5058145599365234} +{"epoch": 0, "iter": 16945, "iter_tflops": 17.89623832805345, "iter_time": 0.8970221557617187, "loss": 0.2621336579322815, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 19.312628568744575, "step_time": 0.8312344551086427} +{"epoch": 0, "iter": 16946, "iter_tflops": 28.090508902043258, "iter_time": 0.5714856338500977, "loss": 0.24393230676651, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.179590497394727, "step_time": 0.5319264450073242} +{"epoch": 0, "iter": 16947, "iter_tflops": 28.646381857312324, "iter_time": 0.5603961563110352, "loss": 0.16319821774959564, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.5890619124555, "step_time": 0.5248059692382813} +{"epoch": 0, "iter": 16948, "iter_tflops": 28.60448467516001, "iter_time": 0.5612169723510743, "loss": 0.2464149296283722, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 30.504937525927787, "step_time": 0.5262532424926758} +{"epoch": 0, "iter": 16949, "iter_tflops": 27.228703077740246, "iter_time": 0.757696517944336, "loss": 0.050549592822790146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.786043579657175, "step_time": 0.7167047271728515} +{"epoch": 0, "iter": 16950, "iter_tflops": 11.718291777865511, "iter_time": 1.7605888214111327, "loss": 0.08281546086072922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.056179181366137, "step_time": 1.4677597122192383} +{"epoch": 0, "iter": 16951, "iter_tflops": 50.52638175890896, "iter_time": 0.4083231925964355, "loss": 0.07689745724201202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.256358541179935, "step_time": 0.3733704872131347} +{"epoch": 0, "iter": 16952, "iter_tflops": 49.89237493051586, "iter_time": 0.41351195526123047, "loss": 0.0713195651769638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.90994601988514, "step_time": 0.3826954956054687} +{"epoch": 0, "iter": 16953, "iter_tflops": 42.061847657204225, "iter_time": 0.49049422836303713, "loss": 0.5569464564323425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59721436527088, "step_time": 0.45246390151977534} +{"epoch": 0, "iter": 16954, "iter_tflops": 8.478388209265681, "iter_time": 2.433374481201172, "loss": 0.49017882347106934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.241826343472063, "step_time": 2.0143959503173825} +{"epoch": 0, "iter": 16955, "iter_tflops": 12.076810081036514, "iter_time": 1.7083230895996095, "loss": 0.44978010654449463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.03616725620782, "step_time": 1.4698523559570316} +{"epoch": 0, "iter": 16956, "iter_tflops": 45.39480242323171, "iter_time": 0.4544814033508301, "loss": 0.6386107802391052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42250615478279, "step_time": 0.4174432888031006} +{"epoch": 0, "iter": 16957, "iter_tflops": 19.770826033342598, "iter_time": 0.8700998306274415, "loss": 0.27043288946151733, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 20.811343721028482, "step_time": 0.8265969085693359} +{"epoch": 0, "iter": 16958, "iter_tflops": 10.890035067630684, "iter_time": 1.5796636352539062, "loss": 0.16277579963207245, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 12.712413980763221, "step_time": 1.3532120971679686} +{"epoch": 0, "iter": 16959, "iter_tflops": 27.350788817594733, "iter_time": 0.6289614715576172, "loss": 0.28812411427497864, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 29.417608292190476, "step_time": 0.584771957397461} +{"epoch": 0, "iter": 16960, "iter_tflops": 28.258917616104263, "iter_time": 0.6087491607666016, "loss": 0.181918665766716, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 30.289985268973503, "step_time": 0.5679300346374511} +{"epoch": 0, "iter": 16961, "iter_tflops": 17.42252507363739, "iter_time": 1.1841620788574219, "loss": 0.6321020126342773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.498037312398623, "step_time": 1.1153125686645506} +{"epoch": 0, "iter": 16962, "iter_tflops": 15.550047239476523, "iter_time": 1.3267543945312499, "loss": 0.568318247795105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.14109674270581, "step_time": 0.9758762168884277} +{"epoch": 0, "iter": 16963, "iter_tflops": 42.17885532823257, "iter_time": 0.48913355636596684, "loss": 0.5768371820449829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.268470489181, "step_time": 0.45574973678588865} +{"epoch": 0, "iter": 16964, "iter_tflops": 40.03302899054093, "iter_time": 0.5153517990112304, "loss": 0.46370288729667664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.64304078953997, "step_time": 0.483809154510498} +{"epoch": 0, "iter": 16965, "iter_tflops": 41.252509021536085, "iter_time": 0.5001173019409181, "loss": 0.8016843795776367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82028214488026, "step_time": 0.4603070869445801} +{"epoch": 0, "iter": 16966, "iter_tflops": 43.89322399586784, "iter_time": 0.4700291213989258, "loss": 0.5011740326881409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68733924762005, "step_time": 0.4326325149536132} +{"epoch": 0, "iter": 16967, "iter_tflops": 45.08744978134488, "iter_time": 0.457579517364502, "loss": 0.7283282279968262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49361908204098, "step_time": 0.4254393444061279} +{"epoch": 0, "iter": 16968, "iter_tflops": 43.54460335825607, "iter_time": 0.4737922019958496, "loss": 0.5238860845565796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.86481460870086, "step_time": 0.44022565078735354} +{"epoch": 0, "iter": 16969, "iter_tflops": 32.22906734149224, "iter_time": 0.6401393280029297, "loss": 0.042912065982818604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39201407524529, "step_time": 0.5998803520202637} +{"epoch": 0, "iter": 16970, "iter_tflops": 17.500033933846503, "iter_time": 1.1789173431396485, "loss": 0.043396033346652985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.005415455951173, "step_time": 0.9821797409057619} +{"epoch": 0, "iter": 16971, "iter_tflops": 44.96193600324325, "iter_time": 0.458856876373291, "loss": 0.032868094742298126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.44340490240706, "step_time": 0.41726684379577633} +{"epoch": 0, "iter": 16972, "iter_tflops": 41.32034230002324, "iter_time": 0.49929628753662103, "loss": 0.03710520640015602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48563600621042, "step_time": 0.4535738162994385} +{"epoch": 0, "iter": 16973, "iter_tflops": 17.758793647757752, "iter_time": 0.9594224014282225, "loss": 0.03083682619035244, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 18.912448463192966, "step_time": 0.9008978652954103} +{"epoch": 0, "iter": 16974, "iter_tflops": 7.807429578429211, "iter_time": 2.182303955078125, "loss": 0.03091837652027607, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 9.172752111691628, "step_time": 1.8574779129028318} +{"epoch": 0, "iter": 16975, "iter_tflops": 12.107218954399892, "iter_time": 1.4072748260498047, "loss": 0.059295669198036194, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 13.619829382792366, "step_time": 1.2509836921691895} +{"epoch": 0, "iter": 16976, "iter_tflops": 45.06562716998884, "iter_time": 0.37807494354248045, "loss": 0.029754290357232094, "lr": 3e-05, "seqlen": 6800.0, "step_tflops": 49.533979706985235, "step_time": 0.34396962547302246} +{"epoch": 0, "iter": 16977, "iter_tflops": 13.246813202243777, "iter_time": 1.1593245697021484, "loss": 0.21472270786762238, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 13.732987751757024, "step_time": 1.1182822189331054} +{"epoch": 0, "iter": 16978, "iter_tflops": 11.648360595555756, "iter_time": 1.318413513183594, "loss": 0.31147944927215576, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 13.616396852753407, "step_time": 1.127857551574707} +{"epoch": 0, "iter": 16979, "iter_tflops": 26.417439931761013, "iter_time": 0.5813339996337892, "loss": 0.2181457132101059, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 28.054094749438967, "step_time": 0.5474194107055664} +{"epoch": 0, "iter": 16980, "iter_tflops": 28.058028273740195, "iter_time": 0.5473426666259765, "loss": 0.20229534804821014, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.866609646490048, "step_time": 0.5141981697082519} +{"epoch": 0, "iter": 16981, "iter_tflops": 40.14628734494029, "iter_time": 0.5138979187011719, "loss": 0.0024621777702122927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7230867441506, "step_time": 0.4718581199645996} +{"epoch": 0, "iter": 16982, "iter_tflops": 50.063246285509294, "iter_time": 0.41210059356689455, "loss": 0.006214713212102652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.83645204538784, "step_time": 0.3694914836883545} +{"epoch": 0, "iter": 16983, "iter_tflops": 56.63294823956152, "iter_time": 0.3642948875427246, "loss": 0.00708328140899539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.26883026782046, "step_time": 0.33132296562194824} +{"epoch": 0, "iter": 16984, "iter_tflops": 58.73722100152066, "iter_time": 0.35124394989013674, "loss": 0.005391301587224007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.02113862848218, "step_time": 0.32225439834594727} +{"epoch": 0, "iter": 16985, "iter_tflops": 26.374766585666187, "iter_time": 0.7822284774780273, "loss": 0.1538376361131668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.661146783502193, "step_time": 0.7458509826660157} +{"epoch": 0, "iter": 16986, "iter_tflops": 18.50498290347765, "iter_time": 1.1148939514160154, "loss": 0.137381911277771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.30872206386877, "step_time": 0.9247994327545166} +{"epoch": 0, "iter": 16987, "iter_tflops": 38.30928450306686, "iter_time": 0.5385402984619141, "loss": 0.19581030309200287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.75545196785179, "step_time": 0.49409340667724605} +{"epoch": 0, "iter": 16988, "iter_tflops": 42.48106213894544, "iter_time": 0.4856539001464844, "loss": 0.1398373395204544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76901033677324, "step_time": 0.4411274337768555} +{"epoch": 0, "iter": 16989, "iter_tflops": 17.926809984321828, "iter_time": 0.9848435668945313, "loss": 0.05406744405627251, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 19.265322231691457, "step_time": 0.9164188003540039} +{"epoch": 0, "iter": 16990, "iter_tflops": 18.1169393413127, "iter_time": 0.9745080642700196, "loss": 0.07400627434253693, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 24.25363249901552, "step_time": 0.7279364643096924} +{"epoch": 0, "iter": 16991, "iter_tflops": 45.07791660011023, "iter_time": 0.3916574859619141, "loss": 0.06074560806155205, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 49.60418481845804, "step_time": 0.3559196376800537} +{"epoch": 0, "iter": 16992, "iter_tflops": 46.80523805910242, "iter_time": 0.3772035827636719, "loss": 0.07614291459321976, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 50.75855654314417, "step_time": 0.3478251686096191} +{"epoch": 0, "iter": 16993, "iter_tflops": 48.88660888716329, "iter_time": 0.4220193214416504, "loss": 0.044372472912073135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.585636760403425, "step_time": 0.3850116329193115} +{"epoch": 0, "iter": 16994, "iter_tflops": 22.112883805999253, "iter_time": 0.932989730834961, "loss": 0.05734604224562645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.310312440035293, "step_time": 0.7554323501586915} +{"epoch": 0, "iter": 16995, "iter_tflops": 52.34763757760221, "iter_time": 0.39411699295043945, "loss": 0.08261879533529282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.17306974208008, "step_time": 0.3608533458709717} +{"epoch": 0, "iter": 16996, "iter_tflops": 56.05884266991388, "iter_time": 0.36802567672729486, "loss": 0.05660644546151161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.955733385160606, "step_time": 0.33846026229858395} +{"epoch": 0, "iter": 16997, "iter_tflops": 25.396510162296114, "iter_time": 0.812359390258789, "loss": 0.165143221616745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.656914546817255, "step_time": 0.7739490432739258} +{"epoch": 0, "iter": 16998, "iter_tflops": 13.402745146191707, "iter_time": 1.5393184967041014, "loss": 0.16123314201831818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.884682044888713, "step_time": 1.2218822631835937} +{"epoch": 0, "iter": 16999, "iter_tflops": 36.8731405715069, "iter_time": 0.5595154953002929, "loss": 0.20241358876228333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.20102751322108, "step_time": 0.513198163986206} +{"epoch": 0, "iter": 17000, "iter_tflops": 40.29459834419476, "iter_time": 0.5120064315795898, "loss": 0.17968925833702087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.89054903552986, "step_time": 0.470057767868042} +{"epoch": 0, "iter": 17001, "iter_tflops": 31.84652496911452, "iter_time": 0.6478287200927734, "loss": 0.5384137034416199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.92252566324523, "step_time": 0.5907675094604492} +{"epoch": 0, "iter": 17002, "iter_tflops": 36.686381073369326, "iter_time": 0.5623638229370118, "loss": 0.7250514626502991, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.93869059701992, "step_time": 0.5165691013336182} +{"epoch": 0, "iter": 17003, "iter_tflops": 37.91631838788371, "iter_time": 0.5441217498779297, "loss": 0.7700074911117554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.40797612862455, "step_time": 0.4982396011352539} +{"epoch": 0, "iter": 17004, "iter_tflops": 38.295766318128074, "iter_time": 0.5387304000854493, "loss": 0.6931198835372925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56800498826133, "step_time": 0.4963214740753174} +{"epoch": 0, "iter": 17005, "iter_tflops": 17.34188361514675, "iter_time": 1.1896685485839842, "loss": 0.0009714058833196759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.478067998494996, "step_time": 1.1165178909301758} +{"epoch": 0, "iter": 17006, "iter_tflops": 16.364324992698794, "iter_time": 1.2607359924316404, "loss": 0.012190335430204868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.730607811212472, "step_time": 0.9494025058746337} +{"epoch": 0, "iter": 17007, "iter_tflops": 43.579256439883046, "iter_time": 0.47341545486450193, "loss": 0.0012588459067046642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15560050112179, "step_time": 0.428425630569458} +{"epoch": 0, "iter": 17008, "iter_tflops": 50.18793272950367, "iter_time": 0.41107677459716796, "loss": 0.002473654458299279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.55841668732246, "step_time": 0.3713405590057373} +{"epoch": 0, "iter": 17009, "iter_tflops": 16.25912288936228, "iter_time": 1.2688933868408203, "loss": 0.5472942590713501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.333604111597264, "step_time": 1.190236801147461} +{"epoch": 0, "iter": 17010, "iter_tflops": 24.109798929938986, "iter_time": 0.8557140426635743, "loss": 0.7090930938720703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.77546390641259, "step_time": 0.6928890705108643} +{"epoch": 0, "iter": 17011, "iter_tflops": 48.98409504345001, "iter_time": 0.42117943572998046, "loss": 0.5761008858680725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.82879755699896, "step_time": 0.39052741050720213} +{"epoch": 0, "iter": 17012, "iter_tflops": 46.25870013994737, "iter_time": 0.44599380111694337, "loss": 0.5953893065452576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.968430657316325, "step_time": 0.41288255882263186} +{"epoch": 0, "iter": 17013, "iter_tflops": 30.925689238570566, "iter_time": 0.667118309020996, "loss": 0.15878926217556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.85076267197373, "step_time": 0.6280247955322267} +{"epoch": 0, "iter": 17014, "iter_tflops": 16.35140038841969, "iter_time": 1.2617325134277344, "loss": 0.23792287707328796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.665849756820265, "step_time": 1.0490822296142577} +{"epoch": 0, "iter": 17015, "iter_tflops": 41.30958942116158, "iter_time": 0.49942625427246096, "loss": 0.20887918770313263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25129483753346, "step_time": 0.45592272186279303} +{"epoch": 0, "iter": 17016, "iter_tflops": 45.61471560698301, "iter_time": 0.4522903022766114, "loss": 0.17153248190879822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.89992599940493, "step_time": 0.4134493808746338} +{"epoch": 0, "iter": 17017, "iter_tflops": 21.167433895937037, "iter_time": 0.9746620025634767, "loss": 0.09461204707622528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.798713285120726, "step_time": 0.9049235916137695} +{"epoch": 0, "iter": 17018, "iter_tflops": 20.510580859479546, "iter_time": 1.0058756332397463, "loss": 0.08411511778831482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.373260723093665, "step_time": 0.882679304122925} +{"epoch": 0, "iter": 17019, "iter_tflops": 47.91818918152643, "iter_time": 0.4305482711791992, "loss": 0.1332160383462906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.932995839465214, "step_time": 0.39726368904113774} +{"epoch": 0, "iter": 17020, "iter_tflops": 53.966000093528834, "iter_time": 0.38229799270629883, "loss": 0.10401593893766403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.45855961679713, "step_time": 0.35291826629638673} +{"epoch": 0, "iter": 17021, "iter_tflops": 33.191032129729, "iter_time": 0.6215863800048829, "loss": 0.003450022777542472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.364435883827184, "step_time": 0.5833853416442871} +{"epoch": 0, "iter": 17022, "iter_tflops": 14.145907163084775, "iter_time": 1.4584496612548827, "loss": 0.012668310664594173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.82118481067036, "step_time": 1.1576723842620849} +{"epoch": 0, "iter": 17023, "iter_tflops": 46.12072487226077, "iter_time": 0.4473280410766602, "loss": 0.0024928771890699863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19296172465622, "step_time": 0.40300644493103027} +{"epoch": 0, "iter": 17024, "iter_tflops": 46.72226500536135, "iter_time": 0.4415687789916992, "loss": 0.002715935930609703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.657380628517664, "step_time": 0.3993832683563233} +{"epoch": 0, "iter": 17025, "iter_tflops": 33.51899280645291, "iter_time": 0.6155045776367187, "loss": 0.5920596122741699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.01607848102889, "step_time": 0.5573549213409424} +{"epoch": 0, "iter": 17026, "iter_tflops": 36.549627928126775, "iter_time": 0.5644679489135742, "loss": 0.7019146680831909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50191052037052, "step_time": 0.5093856868743897} +{"epoch": 0, "iter": 17027, "iter_tflops": 35.70132488319884, "iter_time": 0.5778803329467772, "loss": 0.5603854060173035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.74644784502003, "step_time": 0.5324641265869141} +{"epoch": 0, "iter": 17028, "iter_tflops": 36.88904419249499, "iter_time": 0.5592742767333985, "loss": 0.5912612080574036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.711503875489704, "step_time": 0.519524356842041} +{"epoch": 0, "iter": 17029, "iter_tflops": 18.641241913106185, "iter_time": 1.106744583129883, "loss": 0.18500769138336182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.743283660508276, "step_time": 1.0449676895141604} +{"epoch": 0, "iter": 17030, "iter_tflops": 20.163664982631637, "iter_time": 1.0231817245483399, "loss": 0.13107863068580627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.764640635247513, "step_time": 0.8330867309570312} +{"epoch": 0, "iter": 17031, "iter_tflops": 47.47223453856893, "iter_time": 0.434592845916748, "loss": 0.2677316665649414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.524293663781606, "step_time": 0.4004148731231689} +{"epoch": 0, "iter": 17032, "iter_tflops": 49.73542661753894, "iter_time": 0.41481686019897457, "loss": 0.27515673637390137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.838733538060396, "step_time": 0.3832016868591309} +{"epoch": 0, "iter": 17033, "iter_tflops": 32.08521333239403, "iter_time": 0.6430093917846681, "loss": 0.4525676667690277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.13336481936639, "step_time": 0.6044260101318361} +{"epoch": 0, "iter": 17034, "iter_tflops": 14.290234998327367, "iter_time": 1.4437196807861328, "loss": 0.5725879073143005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.295759417423564, "step_time": 1.1276434631347656} +{"epoch": 0, "iter": 17035, "iter_tflops": 39.574219970602, "iter_time": 0.5213265991210937, "loss": 0.7528022527694702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.225229875100325, "step_time": 0.47729285812377936} +{"epoch": 0, "iter": 17036, "iter_tflops": 41.47566779343945, "iter_time": 0.49742643356323246, "loss": 0.42318159341812134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.968449866136424, "step_time": 0.45879040908813473} +{"epoch": 0, "iter": 17037, "iter_tflops": 20.313759824751784, "iter_time": 1.0156216125488282, "loss": 0.25826239585876465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.68763850972445, "step_time": 0.9512835388183595} +{"epoch": 0, "iter": 17038, "iter_tflops": 19.2046519840586, "iter_time": 1.0742758331298827, "loss": 0.1787935048341751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.264852655900544, "step_time": 0.8867923564910888} +{"epoch": 0, "iter": 17039, "iter_tflops": 45.752586319190684, "iter_time": 0.4509273719787597, "loss": 0.24019336700439453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.30728606759284, "step_time": 0.4184187602996826} +{"epoch": 0, "iter": 17040, "iter_tflops": 48.71382711480377, "iter_time": 0.42351617050170903, "loss": 0.21689003705978394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77755959910351, "step_time": 0.3909065456390381} +{"epoch": 0, "iter": 17041, "iter_tflops": 36.210172350461, "iter_time": 0.5697596054077149, "loss": 0.39992132782936096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.81909382174527, "step_time": 0.5314676742553712} +{"epoch": 0, "iter": 17042, "iter_tflops": 17.131282711760388, "iter_time": 1.2042935638427734, "loss": 0.7461411356925964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.573001409349313, "step_time": 1.0028237056732177} +{"epoch": 0, "iter": 17043, "iter_tflops": 40.7551312120522, "iter_time": 0.5062207603454589, "loss": 0.7205504179000854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.25406692732231, "step_time": 0.46619655418396} +{"epoch": 0, "iter": 17044, "iter_tflops": 40.96646752150206, "iter_time": 0.503609287261963, "loss": 0.5654449462890625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20856807673934, "step_time": 0.46667635726928713} +{"epoch": 0, "iter": 17045, "iter_tflops": 20.784920347286686, "iter_time": 0.9925991134643555, "loss": 0.10565492510795593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.378194673767585, "step_time": 0.9219284133911132} +{"epoch": 0, "iter": 17046, "iter_tflops": 13.70987371453696, "iter_time": 1.5048346862792972, "loss": 0.23436541855335236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.456550440036626, "step_time": 1.2536706027984619} +{"epoch": 0, "iter": 17047, "iter_tflops": 41.577876193741375, "iter_time": 0.49620363998413086, "loss": 0.10061955451965332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.981464368887394, "step_time": 0.4486828289031982} +{"epoch": 0, "iter": 17048, "iter_tflops": 42.42625845778344, "iter_time": 0.48628123855590827, "loss": 0.09420686215162277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.494784216538456, "step_time": 0.4437292022705078} +{"epoch": 0, "iter": 17049, "iter_tflops": 34.90851600809117, "iter_time": 0.5601386413574219, "loss": 0.04057342931628227, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 38.72777672262375, "step_time": 0.5048988189697265} +{"epoch": 0, "iter": 17050, "iter_tflops": 44.77613033368778, "iter_time": 0.4366971549987793, "loss": 0.09206262975931168, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 50.05855287102047, "step_time": 0.3906147422790528} +{"epoch": 0, "iter": 17051, "iter_tflops": 48.03716305172848, "iter_time": 0.4070516967773437, "loss": 0.0684254989027977, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 52.525586194715835, "step_time": 0.37226826286315917} +{"epoch": 0, "iter": 17052, "iter_tflops": 52.74058126122531, "iter_time": 0.3707507247924805, "loss": 0.04086093232035637, "lr": 3e-05, "seqlen": 7776.0, "step_tflops": 57.545008960119794, "step_time": 0.3397967796325684} +{"epoch": 0, "iter": 17053, "iter_tflops": 25.356557431966507, "iter_time": 0.813639373779297, "loss": 0.10873233526945114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.703926945045176, "step_time": 0.7725865020751953} +{"epoch": 0, "iter": 17054, "iter_tflops": 15.289533978048635, "iter_time": 1.3493605194091798, "loss": 0.07484962046146393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.02664126504628, "step_time": 1.0301824073791503} +{"epoch": 0, "iter": 17055, "iter_tflops": 45.83604188801571, "iter_time": 0.4501063499450684, "loss": 0.09120059013366699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.38734819540137, "step_time": 0.40944987678527833} +{"epoch": 0, "iter": 17056, "iter_tflops": 41.585876748103246, "iter_time": 0.4961081771850585, "loss": 0.05843856930732727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.87998694959948, "step_time": 0.4496752262115479} +{"epoch": 0, "iter": 17057, "iter_tflops": 9.716397591199057, "iter_time": 1.0623429107666014, "loss": 0.009900529868900776, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 10.536349541979297, "step_time": 0.9796700515747071} +{"epoch": 0, "iter": 17058, "iter_tflops": 12.691232245385535, "iter_time": 0.8133289108276367, "loss": 0.002487203339114785, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 16.0947965361293, "step_time": 0.641334363937378} +{"epoch": 0, "iter": 17059, "iter_tflops": 29.450679103121406, "iter_time": 0.35048923873901366, "loss": 0.0041589452885091305, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 32.3813919391954, "step_time": 0.3187678318023681} +{"epoch": 0, "iter": 17060, "iter_tflops": 27.474728500271713, "iter_time": 0.3756960182189942, "loss": 0.0052849529311060905, "lr": 3e-05, "seqlen": 4160.0, "step_tflops": 30.3200474539324, "step_time": 0.34043964195251464} +{"epoch": 0, "iter": 17061, "iter_tflops": 40.191432696522725, "iter_time": 0.5133206787109375, "loss": 0.2584076225757599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.696991084641766, "step_time": 0.4721399116516113} +{"epoch": 0, "iter": 17062, "iter_tflops": 42.43433649909737, "iter_time": 0.4861886672973632, "loss": 0.2057419717311859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.509352214643854, "step_time": 0.4342533111572266} +{"epoch": 0, "iter": 17063, "iter_tflops": 45.42778248618997, "iter_time": 0.4541514549255371, "loss": 0.1579781025648117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.213780261201656, "step_time": 0.419213752746582} +{"epoch": 0, "iter": 17064, "iter_tflops": 47.4044939831572, "iter_time": 0.43521387481689455, "loss": 0.20412853360176086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42999496231672, "step_time": 0.4011490478515625} +{"epoch": 0, "iter": 17065, "iter_tflops": 29.752722498345246, "iter_time": 0.6934186782836914, "loss": 0.03325406834483147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.72066321764106, "step_time": 0.6503991851806641} +{"epoch": 0, "iter": 17066, "iter_tflops": 23.11102723129959, "iter_time": 0.8926947860717773, "loss": 0.02942708134651184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.105054481465064, "step_time": 0.7903103027343751} +{"epoch": 0, "iter": 17067, "iter_tflops": 54.63292688561599, "iter_time": 0.37763112258911136, "loss": 0.05178939551115036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.76473488873669, "step_time": 0.3452051372528076} +{"epoch": 0, "iter": 17068, "iter_tflops": 54.43569716866831, "iter_time": 0.37899934387207035, "loss": 0.025740012526512146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.174214278341914, "step_time": 0.34865006256103515} +{"epoch": 0, "iter": 17069, "iter_tflops": 24.176785953789764, "iter_time": 0.8533431015014649, "loss": 0.1246044859290123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.407691424272304, "step_time": 0.8120018920898437} +{"epoch": 0, "iter": 17070, "iter_tflops": 17.953504324771917, "iter_time": 1.1491401977539062, "loss": 0.09320489317178726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.795887711279335, "step_time": 0.9465589923858643} +{"epoch": 0, "iter": 17071, "iter_tflops": 47.23452270960477, "iter_time": 0.436779972076416, "loss": 0.12522703409194946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.029726443340614, "step_time": 0.39652511978149413} +{"epoch": 0, "iter": 17072, "iter_tflops": 46.24510403668354, "iter_time": 0.4461249237060547, "loss": 0.11516225337982178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.553735648531685, "step_time": 0.40810225486755364} +{"epoch": 0, "iter": 17073, "iter_tflops": 16.282335314424788, "iter_time": 1.2670844268798829, "loss": 0.13787326216697693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.234351666930184, "step_time": 1.1970913619995116} +{"epoch": 0, "iter": 17074, "iter_tflops": 30.754773017181847, "iter_time": 0.6708257446289063, "loss": 0.1140071228146553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.43456957054116, "step_time": 0.5991389980316162} +{"epoch": 0, "iter": 17075, "iter_tflops": 47.274827551393834, "iter_time": 0.4364075889587402, "loss": 0.17171864211559296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74953930228209, "step_time": 0.40652770042419434} +{"epoch": 0, "iter": 17076, "iter_tflops": 55.30763955879929, "iter_time": 0.373024299621582, "loss": 0.07802144438028336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.05205531585859, "step_time": 0.3435534954071045} +{"epoch": 0, "iter": 17077, "iter_tflops": 22.413758725169572, "iter_time": 0.9204655838012695, "loss": 0.11780926585197449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.47901824427637, "step_time": 0.8787034149169921} +{"epoch": 0, "iter": 17078, "iter_tflops": 15.765366351513853, "iter_time": 1.3086339416503905, "loss": 0.12549978494644165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34725564312556, "step_time": 1.0139496879577636} +{"epoch": 0, "iter": 17079, "iter_tflops": 48.74813761695906, "iter_time": 0.4232180862426758, "loss": 0.2017410695552826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.85432188677206, "step_time": 0.3903388175964355} +{"epoch": 0, "iter": 17080, "iter_tflops": 50.92675201497451, "iter_time": 0.4051130828857422, "loss": 0.15038733184337616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.367961290025136, "step_time": 0.3726179008483887} +{"epoch": 0, "iter": 17081, "iter_tflops": 27.18509345992877, "iter_time": 0.758911994934082, "loss": 0.2772027552127838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.631090247233647, "step_time": 0.7205835800170899} +{"epoch": 0, "iter": 17082, "iter_tflops": 13.890950889727895, "iter_time": 1.485218231201172, "loss": 0.25703537464141846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.04770059757856, "step_time": 1.1431424961090089} +{"epoch": 0, "iter": 17083, "iter_tflops": 42.520569039030455, "iter_time": 0.4852026672363281, "loss": 0.30975601077079773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66596511154309, "step_time": 0.4421015071868897} +{"epoch": 0, "iter": 17084, "iter_tflops": 41.038009006094164, "iter_time": 0.5027313461303711, "loss": 0.3717350661754608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.51054043190946, "step_time": 0.46351029014587397} +{"epoch": 0, "iter": 17085, "iter_tflops": 17.962749016159425, "iter_time": 1.148548782348633, "loss": 0.4150816798210144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.23637309967023, "step_time": 1.0725043334960938} +{"epoch": 0, "iter": 17086, "iter_tflops": 20.52070345428085, "iter_time": 1.0053794479370117, "loss": 0.21835775673389435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.382889736268737, "step_time": 0.7268848838806153} +{"epoch": 0, "iter": 17087, "iter_tflops": 36.931369679830034, "iter_time": 0.5586333160400391, "loss": 0.2866603434085846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.29777546326602, "step_time": 0.511966064453125} +{"epoch": 0, "iter": 17088, "iter_tflops": 40.701698375301234, "iter_time": 0.5068853225708009, "loss": 0.2662728428840637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64348185651234, "step_time": 0.4621300277709961} +{"epoch": 0, "iter": 17089, "iter_tflops": 20.1690527520422, "iter_time": 1.022908401489258, "loss": 0.2732833921909332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.924080865255686, "step_time": 0.9410243301391601} +{"epoch": 0, "iter": 17090, "iter_tflops": 21.70041824107788, "iter_time": 0.9507233123779296, "loss": 0.2118641436100006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.608889881956213, "step_time": 0.7753458938598632} +{"epoch": 0, "iter": 17091, "iter_tflops": 39.71045097707375, "iter_time": 0.5195381317138672, "loss": 0.21243885159492493, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.322910441468686, "step_time": 0.47621670150756834} +{"epoch": 0, "iter": 17092, "iter_tflops": 39.34922541159465, "iter_time": 0.524307487487793, "loss": 0.15923243761062622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.21216204221215, "step_time": 0.47743719673156737} +{"epoch": 0, "iter": 17093, "iter_tflops": 18.883455571823855, "iter_time": 1.0925486297607423, "loss": 0.6118291020393372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.259165210162088, "step_time": 1.0183585205078125} +{"epoch": 0, "iter": 17094, "iter_tflops": 13.941517346749185, "iter_time": 1.479831283569336, "loss": 0.5424708127975464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.000744014380636, "step_time": 1.0858045082092285} +{"epoch": 0, "iter": 17095, "iter_tflops": 43.87254094864025, "iter_time": 0.4702507095336914, "loss": 0.4768391251564026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.35886391214317, "step_time": 0.4356332015991211} +{"epoch": 0, "iter": 17096, "iter_tflops": 41.397357262402096, "iter_time": 0.4983674049377441, "loss": 0.5014485716819763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.18690632127511, "step_time": 0.4669051361083985} +{"epoch": 0, "iter": 17097, "iter_tflops": 41.627996671814316, "iter_time": 0.43104073715209956, "loss": 0.0002953199145849794, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 45.82169652245804, "step_time": 0.3915909652709961} +{"epoch": 0, "iter": 17098, "iter_tflops": 29.672120578500685, "iter_time": 0.6047212677001954, "loss": 0.004951409064233303, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 32.941948022576895, "step_time": 0.5446964569091797} +{"epoch": 0, "iter": 17099, "iter_tflops": 36.84811815589473, "iter_time": 0.486954647064209, "loss": 0.0021728193387389183, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 40.78015618655889, "step_time": 0.4400022964477539} +{"epoch": 0, "iter": 17100, "iter_tflops": 38.13271278220418, "iter_time": 0.4705503768920899, "loss": 0.002062637824565172, "lr": 3e-05, "seqlen": 7152.0, "step_tflops": 42.23613390484671, "step_time": 0.4248343944549561} +{"epoch": 0, "iter": 17101, "iter_tflops": 26.88032402648709, "iter_time": 0.7675165481567383, "loss": 0.12726299464702606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.860454396787443, "step_time": 0.7148568496704102} +{"epoch": 0, "iter": 17102, "iter_tflops": 8.606861926490698, "iter_time": 2.3970517578125, "loss": 0.14440973103046417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.65760585172569, "step_time": 2.1362534179687502} +{"epoch": 0, "iter": 17103, "iter_tflops": 22.48717350896977, "iter_time": 0.9174605026245116, "loss": 0.13943491876125336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.675558940170536, "step_time": 0.7454625778198242} +{"epoch": 0, "iter": 17104, "iter_tflops": 37.68044132357095, "iter_time": 0.5475279159545898, "loss": 0.1282537281513214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.062036845892, "step_time": 0.5024371681213379} +{"epoch": 0, "iter": 17105, "iter_tflops": 14.170231996345176, "iter_time": 1.0462825622558594, "loss": 0.17720964550971985, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 15.32492878003199, "step_time": 0.9674476699829101} +{"epoch": 0, "iter": 17106, "iter_tflops": 13.65817954173854, "iter_time": 1.0855082550048827, "loss": 0.1361965388059616, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 17.895166467463543, "step_time": 0.8284955978393553} +{"epoch": 0, "iter": 17107, "iter_tflops": 25.08519669082937, "iter_time": 0.5910285186767579, "loss": 0.3223925530910492, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 26.95611386788888, "step_time": 0.550007568359375} +{"epoch": 0, "iter": 17108, "iter_tflops": 25.68388488797841, "iter_time": 0.5772517166137695, "loss": 0.09180011600255966, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.43619945868347, "step_time": 0.5403833961486816} +{"epoch": 0, "iter": 17109, "iter_tflops": 26.778118053215096, "iter_time": 0.7704459838867187, "loss": 0.09286157041788101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.2451818736904, "step_time": 0.7304287719726563} +{"epoch": 0, "iter": 17110, "iter_tflops": 17.212988187691856, "iter_time": 1.198577102661133, "loss": 0.09012128412723541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.354353886080503, "step_time": 0.9661305427551269} +{"epoch": 0, "iter": 17111, "iter_tflops": 47.27099840719012, "iter_time": 0.4364429397583008, "loss": 0.07183320075273514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.33776900993462, "step_time": 0.40186969375610354} +{"epoch": 0, "iter": 17112, "iter_tflops": 49.98413389966609, "iter_time": 0.41275284576416016, "loss": 0.10851508378982544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.618800411000436, "step_time": 0.3777287921905518} +{"epoch": 0, "iter": 17113, "iter_tflops": 20.17427021609319, "iter_time": 0.7207357635498046, "loss": 0.02982851304113865, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 21.330520665985457, "step_time": 0.6816672821044921} +{"epoch": 0, "iter": 17114, "iter_tflops": 9.224810435944567, "iter_time": 1.5762186279296875, "loss": 0.03239712119102478, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 11.68917742940896, "step_time": 1.2439128532409667} +{"epoch": 0, "iter": 17115, "iter_tflops": 27.836483010235963, "iter_time": 0.5223475265502929, "loss": 0.024642253294587135, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 30.868137017635075, "step_time": 0.47104618072509763} +{"epoch": 0, "iter": 17116, "iter_tflops": 33.66923537613021, "iter_time": 0.4318576850891113, "loss": 0.015947336331009865, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 37.27729066541502, "step_time": 0.3900583381652832} +{"epoch": 0, "iter": 17117, "iter_tflops": 27.2244310650464, "iter_time": 0.7578154144287109, "loss": 0.30565622448921204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.164342264192324, "step_time": 0.7074081535339355} +{"epoch": 0, "iter": 17118, "iter_tflops": 25.894143833006932, "iter_time": 0.7967474670410156, "loss": 0.36128881573677063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.14396097324098, "step_time": 0.7079028663635254} +{"epoch": 0, "iter": 17119, "iter_tflops": 48.282547179164794, "iter_time": 0.4272991943359375, "loss": 0.28515079617500305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.65202127915593, "step_time": 0.39183858489990236} +{"epoch": 0, "iter": 17120, "iter_tflops": 53.68486162465089, "iter_time": 0.38430002212524417, "loss": 0.36967822909355164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.05563650462449, "step_time": 0.3553676223754883} +{"epoch": 0, "iter": 17121, "iter_tflops": 30.312271593780697, "iter_time": 0.680618522644043, "loss": 0.6790435910224915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.086991278671846, "step_time": 0.6429737625122071} +{"epoch": 0, "iter": 17122, "iter_tflops": 25.862523491520747, "iter_time": 0.7977215957641601, "loss": 0.5847340822219849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14563037702675, "step_time": 0.6418008689880371} +{"epoch": 0, "iter": 17123, "iter_tflops": 43.840175081962585, "iter_time": 0.47059788131713864, "loss": 0.6443009972572327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.982127729778476, "step_time": 0.4391264190673828} +{"epoch": 0, "iter": 17124, "iter_tflops": 43.6704956434187, "iter_time": 0.4724263648986816, "loss": 0.7796652913093567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.160457651807334, "step_time": 0.4374659309387207} +{"epoch": 0, "iter": 17125, "iter_tflops": 26.367179302323606, "iter_time": 0.7824535675048827, "loss": 0.39606091380119324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.83351123566009, "step_time": 0.7412321548461914} +{"epoch": 0, "iter": 17126, "iter_tflops": 12.200180181040931, "iter_time": 1.6910482635498048, "loss": 0.3394821882247925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.244240900387997, "step_time": 1.2700558700561522} +{"epoch": 0, "iter": 17127, "iter_tflops": 17.184986158900774, "iter_time": 1.2005301208496093, "loss": 0.39362892508506775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.47200292209787, "step_time": 1.0077711296081544} +{"epoch": 0, "iter": 17128, "iter_tflops": 30.51800447077872, "iter_time": 0.6760302276611329, "loss": 0.46674877405166626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.8871826557556, "step_time": 0.6088170185089111} +{"epoch": 0, "iter": 17129, "iter_tflops": 23.101639491106333, "iter_time": 0.7197538909912109, "loss": 0.21021001040935516, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 24.56206622029891, "step_time": 0.6769583129882812} +{"epoch": 0, "iter": 17130, "iter_tflops": 15.718104156522791, "iter_time": 1.057856262207031, "loss": 0.24732491374015808, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 18.88015869377158, "step_time": 0.8806861839294434} +{"epoch": 0, "iter": 17131, "iter_tflops": 27.066224291368123, "iter_time": 0.6143263549804687, "loss": 0.1575169414281845, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 29.10796472896356, "step_time": 0.5712352294921875} +{"epoch": 0, "iter": 17132, "iter_tflops": 22.91468890373471, "iter_time": 0.7256260375976563, "loss": 0.1955684870481491, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 24.65434941236601, "step_time": 0.6744244041442871} +{"epoch": 0, "iter": 17133, "iter_tflops": 17.136099517289676, "iter_time": 1.2039550476074217, "loss": 0.3217116892337799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.32618297370205, "step_time": 1.1257714462280275} +{"epoch": 0, "iter": 17134, "iter_tflops": 16.13561330666512, "iter_time": 1.2786060943603517, "loss": 0.4542126953601837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.443275610712618, "step_time": 0.9621241588592528} +{"epoch": 0, "iter": 17135, "iter_tflops": 42.61714677596832, "iter_time": 0.48410311508178716, "loss": 0.4684225916862488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.654728775786, "step_time": 0.44220798301696773} +{"epoch": 0, "iter": 17136, "iter_tflops": 37.00456546540569, "iter_time": 0.5575283279418946, "loss": 0.47371378540992737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.45716681355111, "step_time": 0.5099490432739258} +{"epoch": 0, "iter": 17137, "iter_tflops": 22.583170410233254, "iter_time": 0.913560546875, "loss": 0.15585726499557495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.003292168667336, "step_time": 0.8595109939575196} +{"epoch": 0, "iter": 17138, "iter_tflops": 7.8124872731291655, "iter_time": 2.6407842712402343, "loss": 0.13045407831668854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.924231903055645, "step_time": 2.078860481262207} +{"epoch": 0, "iter": 17139, "iter_tflops": 13.430550885299473, "iter_time": 1.536131591796875, "loss": 0.10011325031518936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.163502611783015, "step_time": 1.3605757217407226} +{"epoch": 0, "iter": 17140, "iter_tflops": 33.73179869676655, "iter_time": 0.6116215057373047, "loss": 0.13815273344516754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.97413996876044, "step_time": 0.4915191478729249} +{"epoch": 0, "iter": 17141, "iter_tflops": 10.233060490707349, "iter_time": 1.4807808074951172, "loss": 0.2320786565542221, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 10.845061460269642, "step_time": 1.3972184143066404} +{"epoch": 0, "iter": 17142, "iter_tflops": 13.37083114342454, "iter_time": 1.1332817993164064, "loss": 0.17271704971790314, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 17.674569212193134, "step_time": 0.8573289337158203} +{"epoch": 0, "iter": 17143, "iter_tflops": 27.215066984379447, "iter_time": 0.5567842102050781, "loss": 0.20988242328166962, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 29.03804084215508, "step_time": 0.5218299560546875} +{"epoch": 0, "iter": 17144, "iter_tflops": 26.89325476560174, "iter_time": 0.5634468460083009, "loss": 0.15502147376537323, "lr": 3e-05, "seqlen": 6064.0, "step_tflops": 28.490671133154823, "step_time": 0.5318554801940918} +{"epoch": 0, "iter": 17145, "iter_tflops": 30.586709471991895, "iter_time": 0.6745117034912109, "loss": 0.5674143433570862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.528783439817495, "step_time": 0.6342411651611328} +{"epoch": 0, "iter": 17146, "iter_tflops": 12.44473767071356, "iter_time": 1.657816665649414, "loss": 0.4071730673313141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.007176292277194, "step_time": 1.3747485275268556} +{"epoch": 0, "iter": 17147, "iter_tflops": 16.755908881420446, "iter_time": 1.2312727203369138, "loss": 0.506365180015564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.935049306119794, "step_time": 1.0349155998229982} +{"epoch": 0, "iter": 17148, "iter_tflops": 32.67414522234454, "iter_time": 0.6314195327758789, "loss": 0.45005035400390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.75444343741003, "step_time": 0.5062293033599854} +{"epoch": 0, "iter": 17149, "iter_tflops": 17.831472146423955, "iter_time": 0.8245855712890625, "loss": 0.1740688532590866, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 18.835615713454093, "step_time": 0.7806261749267579} +{"epoch": 0, "iter": 17150, "iter_tflops": 10.216324445272644, "iter_time": 1.4392235412597656, "loss": 0.17744015157222748, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 14.04604169447304, "step_time": 1.046812686920166} +{"epoch": 0, "iter": 17151, "iter_tflops": 25.58759447197351, "iter_time": 0.5746368484497071, "loss": 0.19689218699932098, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.422081367973824, "step_time": 0.5361946983337402} +{"epoch": 0, "iter": 17152, "iter_tflops": 26.943721614168545, "iter_time": 0.5457143173217773, "loss": 0.1307312548160553, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.687612272143586, "step_time": 0.512540901184082} +{"epoch": 0, "iter": 17153, "iter_tflops": 24.011008193094533, "iter_time": 0.8592347869873047, "loss": 0.35492444038391113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.282270773419544, "step_time": 0.8160300827026369} +{"epoch": 0, "iter": 17154, "iter_tflops": 13.773211228493432, "iter_time": 1.4979145507812501, "loss": 0.45207104086875916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.25502913688213, "step_time": 1.2692129516601562} +{"epoch": 0, "iter": 17155, "iter_tflops": 37.05396358173062, "iter_time": 0.5567850646972656, "loss": 0.3319147527217865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14120916535829, "step_time": 0.5014702758789062} +{"epoch": 0, "iter": 17156, "iter_tflops": 42.7643936903163, "iter_time": 0.4824362449645996, "loss": 0.4928493797779083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.67026651924507, "step_time": 0.4420607604980469} +{"epoch": 0, "iter": 17157, "iter_tflops": 8.153631798784337, "iter_time": 1.0784913940429688, "loss": 0.003885529935359955, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 8.713690935586456, "step_time": 1.0091730117797852} +{"epoch": 0, "iter": 17158, "iter_tflops": 9.533544405891337, "iter_time": 0.9223874511718749, "loss": 0.004518445115536451, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 11.700454062401429, "step_time": 0.7515624332427979} +{"epoch": 0, "iter": 17159, "iter_tflops": 26.241546054301555, "iter_time": 0.3351030349731446, "loss": 0.0018303244141861796, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 28.853671756035535, "step_time": 0.3047661247253418} +{"epoch": 0, "iter": 17160, "iter_tflops": 25.422277614616853, "iter_time": 0.3459021987915039, "loss": 0.0015315796481445432, "lr": 3e-05, "seqlen": 3552.0, "step_tflops": 27.766880834500757, "step_time": 0.31669461822509765} +{"epoch": 0, "iter": 17161, "iter_tflops": 34.69740745717464, "iter_time": 0.5946004333496093, "loss": 0.0051932730711996555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.19600235162319, "step_time": 0.5546588935852051} +{"epoch": 0, "iter": 17162, "iter_tflops": 9.915785920659605, "iter_time": 2.0806311950683596, "loss": 0.004434013739228249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.743806287700913, "step_time": 1.7567637786865233} +{"epoch": 0, "iter": 17163, "iter_tflops": 14.602861697876591, "iter_time": 1.4128116760253906, "loss": 0.003464845474809408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.263836834678663, "step_time": 1.2685256080627443} +{"epoch": 0, "iter": 17164, "iter_tflops": 31.297412476946516, "iter_time": 0.6591948623657227, "loss": 0.0013974735047668219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.700635204781385, "step_time": 0.5778914966583252} +{"epoch": 0, "iter": 17165, "iter_tflops": 17.70372803680676, "iter_time": 0.8259241714477539, "loss": 0.2795681357383728, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 18.917984576301684, "step_time": 0.772911979675293} +{"epoch": 0, "iter": 17166, "iter_tflops": 6.3218531082238485, "iter_time": 2.31291943359375, "loss": 0.1676606833934784, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 7.568274127600779, "step_time": 1.9320041351318358} +{"epoch": 0, "iter": 17167, "iter_tflops": 10.245367817149294, "iter_time": 1.4271753997802736, "loss": 0.28557339310646057, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 11.98521749849289, "step_time": 1.2199976272583009} +{"epoch": 0, "iter": 17168, "iter_tflops": 15.94909783082504, "iter_time": 0.9167877120971679, "loss": 0.2551339864730835, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 20.936787335578202, "step_time": 0.698384937286377} +{"epoch": 0, "iter": 17169, "iter_tflops": 13.441741866, "iter_time": 1.1577301940917968, "loss": 0.19694939255714417, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 14.442503663650761, "step_time": 1.0775078048706055} +{"epoch": 0, "iter": 17170, "iter_tflops": 14.73267160338152, "iter_time": 1.0562857055664063, "loss": 0.23424716293811798, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 18.135094465387926, "step_time": 0.8581102485656739} +{"epoch": 0, "iter": 17171, "iter_tflops": 28.19907902228653, "iter_time": 0.5518588180541992, "loss": 0.28525421023368835, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 30.1064410200824, "step_time": 0.5168963813781738} +{"epoch": 0, "iter": 17172, "iter_tflops": 26.463430132503643, "iter_time": 0.5880534133911133, "loss": 0.26331380009651184, "lr": 3e-05, "seqlen": 6224.0, "step_tflops": 28.12777482568062, "step_time": 0.5532577857971192} +{"epoch": 0, "iter": 17173, "iter_tflops": 25.118731883913934, "iter_time": 0.8213429565429688, "loss": 0.2553037106990814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.596503443303444, "step_time": 0.7757069854736328} +{"epoch": 0, "iter": 17174, "iter_tflops": 17.264505020583492, "iter_time": 1.1950005798339844, "loss": 0.29735061526298523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.58067883921018, "step_time": 1.0024496116638184} +{"epoch": 0, "iter": 17175, "iter_tflops": 38.60326354539713, "iter_time": 0.5344391021728515, "loss": 0.25284072756767273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.333649983106206, "step_time": 0.48734502029418947} +{"epoch": 0, "iter": 17176, "iter_tflops": 37.74534132797508, "iter_time": 0.5465864868164063, "loss": 0.26507505774497986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.263719392985045, "step_time": 0.4999814319610596} +{"epoch": 0, "iter": 17177, "iter_tflops": 18.68609269077637, "iter_time": 1.1040881500244142, "loss": 0.44055646657943726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.011792859591132, "step_time": 1.0309467849731446} +{"epoch": 0, "iter": 17178, "iter_tflops": 16.159643706990668, "iter_time": 1.2767047271728516, "loss": 0.40649086236953735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41376908083535, "step_time": 1.0627041778564452} +{"epoch": 0, "iter": 17179, "iter_tflops": 38.67406551602335, "iter_time": 0.5334606857299805, "loss": 0.43133416771888733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45476102202624, "step_time": 0.4859547672271728} +{"epoch": 0, "iter": 17180, "iter_tflops": 44.13810126389634, "iter_time": 0.4674214096069337, "loss": 0.47017061710357666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.984608359471565, "step_time": 0.4299523162841796} +{"epoch": 0, "iter": 17181, "iter_tflops": 18.622421334757387, "iter_time": 1.0100472946166994, "loss": 0.115432970225811, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 20.11467565515491, "step_time": 0.9351145706176758} +{"epoch": 0, "iter": 17182, "iter_tflops": 19.33873425087521, "iter_time": 0.9726348190307618, "loss": 0.08238770812749863, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 23.901142231937357, "step_time": 0.7869718570709229} +{"epoch": 0, "iter": 17183, "iter_tflops": 45.89070555315786, "iter_time": 0.40987659835815426, "loss": 0.10773707181215286, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 49.626138963340665, "step_time": 0.3790245761871338} +{"epoch": 0, "iter": 17184, "iter_tflops": 50.721418736730406, "iter_time": 0.37083990859985355, "loss": 0.12217742949724197, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 55.09372969656203, "step_time": 0.3414095649719238} +{"epoch": 0, "iter": 17185, "iter_tflops": 26.6496423344146, "iter_time": 0.7741602401733397, "loss": 0.342910498380661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.66654527452228, "step_time": 0.7196923561096191} +{"epoch": 0, "iter": 17186, "iter_tflops": 21.7234203124907, "iter_time": 0.9497166290283203, "loss": 0.36053428053855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.265008397392435, "step_time": 0.7854973125457765} +{"epoch": 0, "iter": 17187, "iter_tflops": 47.6367882780635, "iter_time": 0.4330916137695312, "loss": 0.3274592161178589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.43616336056249, "step_time": 0.4011009407043456} +{"epoch": 0, "iter": 17188, "iter_tflops": 45.363521509155724, "iter_time": 0.4547947959899902, "loss": 0.26229119300842285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.6753081009679, "step_time": 0.423851318359375} +{"epoch": 0, "iter": 17189, "iter_tflops": 26.64899994765706, "iter_time": 0.7741789016723633, "loss": 0.05986844375729561, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.15646052329996, "step_time": 0.7327303619384766} +{"epoch": 0, "iter": 17190, "iter_tflops": 15.236464266821686, "iter_time": 1.3540604400634766, "loss": 0.04654613882303238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.590639137533863, "step_time": 1.0531097717285156} +{"epoch": 0, "iter": 17191, "iter_tflops": 52.45907134854242, "iter_time": 0.3932798080444336, "loss": 0.08572747558355331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.371152987625216, "step_time": 0.3596074409484863} +{"epoch": 0, "iter": 17192, "iter_tflops": 56.53548347408088, "iter_time": 0.3649229164123535, "loss": 0.04086337983608246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.50716693430446, "step_time": 0.33542584609985354} +{"epoch": 0, "iter": 17193, "iter_tflops": 24.14500969580424, "iter_time": 0.5112337265014648, "loss": 0.018156535923480988, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 26.49561846707211, "step_time": 0.46587866210937495} +{"epoch": 0, "iter": 17194, "iter_tflops": 9.856362708104326, "iter_time": 1.2523629302978516, "loss": 0.004916232079267502, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 11.749775511173217, "step_time": 1.050551414489746} +{"epoch": 0, "iter": 17195, "iter_tflops": 25.194842635324022, "iter_time": 0.48993135070800786, "loss": 0.033618099987506866, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 27.967852917549685, "step_time": 0.44135469818115236} +{"epoch": 0, "iter": 17196, "iter_tflops": 29.339392098852922, "iter_time": 0.4207225303649902, "loss": 0.003224029904231429, "lr": 3e-05, "seqlen": 4960.0, "step_tflops": 32.59108633375431, "step_time": 0.37874599075317383} +{"epoch": 0, "iter": 17197, "iter_tflops": 20.584027679824676, "iter_time": 1.002286521911621, "loss": 0.36934617161750793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.411266402104335, "step_time": 0.9205679473876953} +{"epoch": 0, "iter": 17198, "iter_tflops": 16.79673009069538, "iter_time": 1.2282803497314452, "loss": 0.40924277901649475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.89157839972348, "step_time": 0.9424214706420898} +{"epoch": 0, "iter": 17199, "iter_tflops": 44.72091320172853, "iter_time": 0.46132987976074213, "loss": 0.46132510900497437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.30768691173684, "step_time": 0.4270768241882324} +{"epoch": 0, "iter": 17200, "iter_tflops": 46.958310475261506, "iter_time": 0.43934914398193364, "loss": 0.3601839542388916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.910457000656635, "step_time": 0.40524274826049805} +{"epoch": 0, "iter": 17201, "iter_tflops": 31.053276447026416, "iter_time": 0.6643773498535157, "loss": 0.6987605690956116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00768018940603, "step_time": 0.6250391845703126} +{"epoch": 0, "iter": 17202, "iter_tflops": 12.424230715606262, "iter_time": 1.6605529937744141, "loss": 0.695940375328064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.591085720846007, "step_time": 1.243504726409912} +{"epoch": 0, "iter": 17203, "iter_tflops": 34.72742119764014, "iter_time": 0.594086540222168, "loss": 0.6058952212333679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.76957659467294, "step_time": 0.5462357635498047} +{"epoch": 0, "iter": 17204, "iter_tflops": 37.8296417238364, "iter_time": 0.5453684616088866, "loss": 0.6024580001831055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9087606342122, "step_time": 0.5043196907043457} +{"epoch": 0, "iter": 17205, "iter_tflops": 18.788244416780316, "iter_time": 1.098085220336914, "loss": 0.6708455681800842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.3550840554411, "step_time": 1.0135597305297852} +{"epoch": 0, "iter": 17206, "iter_tflops": 25.161659184535736, "iter_time": 0.8199416961669922, "loss": 0.6243038773536682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.95177053660015, "step_time": 0.7380961246490478} +{"epoch": 0, "iter": 17207, "iter_tflops": 44.830047534992666, "iter_time": 0.4602068176269531, "loss": 0.6990785002708435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.603064297502705, "step_time": 0.42448133277893063} +{"epoch": 0, "iter": 17208, "iter_tflops": 41.440366495262865, "iter_time": 0.49785017013549804, "loss": 0.6695907711982727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5271947354226, "step_time": 0.46333692550659183} +{"epoch": 0, "iter": 17209, "iter_tflops": 18.41490433794953, "iter_time": 1.1203475799560547, "loss": 0.6682671904563904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.1310393032669, "step_time": 1.0784094467163086} +{"epoch": 0, "iter": 17210, "iter_tflops": 17.81072231190683, "iter_time": 1.1583524322509764, "loss": 0.5822478532791138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.304045313830944, "step_time": 0.9684120178222657} +{"epoch": 0, "iter": 17211, "iter_tflops": 46.0863119476694, "iter_time": 0.44766206359863275, "loss": 0.5869730710983276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.742935969093054, "step_time": 0.414754238128662} +{"epoch": 0, "iter": 17212, "iter_tflops": 48.921162854766884, "iter_time": 0.4217212409973144, "loss": 0.6097244620323181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.63618504265891, "step_time": 0.39195647430419917} +{"epoch": 0, "iter": 17213, "iter_tflops": 23.580806424639203, "iter_time": 0.8749104309082031, "loss": 0.5797677040100098, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.810056068460174, "step_time": 0.8315617446899414} +{"epoch": 0, "iter": 17214, "iter_tflops": 17.030536066269956, "iter_time": 1.2114177398681643, "loss": 0.5835444927215576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.213023712511333, "step_time": 1.0206831893920898} +{"epoch": 0, "iter": 17215, "iter_tflops": 38.022238703267966, "iter_time": 0.5426059646606445, "loss": 0.6857788562774658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.64415455023097, "step_time": 0.49541391181945804} +{"epoch": 0, "iter": 17216, "iter_tflops": 39.34331836325855, "iter_time": 0.5243862075805664, "loss": 0.6642118096351624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.15937170747357, "step_time": 0.4780211734771729} +{"epoch": 0, "iter": 17217, "iter_tflops": 33.622101999114484, "iter_time": 0.6136170043945312, "loss": 0.0033450874034315348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.825611927028525, "step_time": 0.5602376289367675} +{"epoch": 0, "iter": 17218, "iter_tflops": 13.010414651846318, "iter_time": 1.58573681640625, "loss": 0.0008346400572918355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.895425862013894, "step_time": 1.3850623474121095} +{"epoch": 0, "iter": 17219, "iter_tflops": 10.84290668342405, "iter_time": 1.9027272033691407, "loss": 0.010693144984543324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.509591902546317, "step_time": 1.6492219467163087} +{"epoch": 0, "iter": 17220, "iter_tflops": 42.00343001505457, "iter_time": 0.4911763992309571, "loss": 0.013092231005430222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.179583141634694, "step_time": 0.44675789833068846} +{"epoch": 0, "iter": 17221, "iter_tflops": 23.307593007563217, "iter_time": 0.7169163742065429, "loss": 0.2762940526008606, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 24.71734377440868, "step_time": 0.6760271339416504} +{"epoch": 0, "iter": 17222, "iter_tflops": 30.202708177892433, "iter_time": 0.5532482376098633, "loss": 0.2814463973045349, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 32.24824512169924, "step_time": 0.5181551742553712} +{"epoch": 0, "iter": 17223, "iter_tflops": 30.59238274151744, "iter_time": 0.5462011642456055, "loss": 0.14564338326454163, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 32.447774734968, "step_time": 0.514968906402588} +{"epoch": 0, "iter": 17224, "iter_tflops": 30.399571009782004, "iter_time": 0.5496654891967773, "loss": 0.2763386070728302, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 32.38565888415597, "step_time": 0.5159566192626954} +{"epoch": 0, "iter": 17225, "iter_tflops": 24.183291803038212, "iter_time": 0.8531135330200196, "loss": 0.5323390960693359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.410794753226853, "step_time": 0.8119027252197265} +{"epoch": 0, "iter": 17226, "iter_tflops": 17.809846761463465, "iter_time": 1.158409378051758, "loss": 0.7344997525215149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.880044780860608, "step_time": 1.037779026031494} +{"epoch": 0, "iter": 17227, "iter_tflops": 46.10375526401181, "iter_time": 0.447492691040039, "loss": 0.6110780239105225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.949939872854394, "step_time": 0.41303540229797364} +{"epoch": 0, "iter": 17228, "iter_tflops": 48.083534843560194, "iter_time": 0.4290677375793457, "loss": 0.6668662428855896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.866165919556956, "step_time": 0.3977755661010742} +{"epoch": 0, "iter": 17229, "iter_tflops": 24.071574625772808, "iter_time": 0.857072868347168, "loss": 0.36415189504623413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.246694121783854, "step_time": 0.8171800003051758} +{"epoch": 0, "iter": 17230, "iter_tflops": 14.30305089636411, "iter_time": 1.442426071166992, "loss": 0.348965048789978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.786080651096782, "step_time": 0.9469850883483887} +{"epoch": 0, "iter": 17231, "iter_tflops": 40.977800049990215, "iter_time": 0.503470012664795, "loss": 0.333132803440094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.578830846442195, "step_time": 0.4628002376556397} +{"epoch": 0, "iter": 17232, "iter_tflops": 41.43838583668819, "iter_time": 0.49787396621704105, "loss": 0.2842237949371338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.89907383140272, "step_time": 0.4594993114471435} +{"epoch": 0, "iter": 17233, "iter_tflops": 33.10834515262823, "iter_time": 0.6231387710571289, "loss": 0.29153677821159363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.779807756080444, "step_time": 0.5609353275299073} +{"epoch": 0, "iter": 17234, "iter_tflops": 35.68174663619446, "iter_time": 0.578197410583496, "loss": 0.40733620524406433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.73453143197706, "step_time": 0.519223274230957} +{"epoch": 0, "iter": 17235, "iter_tflops": 38.38682160880312, "iter_time": 0.537452507019043, "loss": 0.19011306762695312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08223893445676, "step_time": 0.49025655555725095} +{"epoch": 0, "iter": 17236, "iter_tflops": 38.32481802110864, "iter_time": 0.538322021484375, "loss": 0.18767236173152924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.89818777826275, "step_time": 0.49241016387939457} +{"epoch": 0, "iter": 17237, "iter_tflops": 32.91905104037447, "iter_time": 0.6267220001220704, "loss": 0.2996014654636383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.55902711270193, "step_time": 0.564322826385498} +{"epoch": 0, "iter": 17238, "iter_tflops": 40.4304941361035, "iter_time": 0.5102854652404785, "loss": 0.4013502895832062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.385487207128094, "step_time": 0.4648162002563476} +{"epoch": 0, "iter": 17239, "iter_tflops": 41.6026032926189, "iter_time": 0.4959087142944336, "loss": 0.2559385299682617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.500034519469516, "step_time": 0.4534302825927734} +{"epoch": 0, "iter": 17240, "iter_tflops": 39.327966504102605, "iter_time": 0.5245909042358399, "loss": 0.23820385336875916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.951915207246486, "step_time": 0.4803300018310547} +{"epoch": 0, "iter": 17241, "iter_tflops": 22.368079531339433, "iter_time": 0.9223453216552735, "loss": 0.5563745498657227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.01527093825098, "step_time": 0.8590822715759276} +{"epoch": 0, "iter": 17242, "iter_tflops": 26.373144505433377, "iter_time": 0.7822765884399414, "loss": 0.5723502039909363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.636801986684585, "step_time": 0.696130895614624} +{"epoch": 0, "iter": 17243, "iter_tflops": 38.80473851756883, "iter_time": 0.5316642837524415, "loss": 0.7251929044723511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45347348900204, "step_time": 0.48596950531005856} +{"epoch": 0, "iter": 17244, "iter_tflops": 39.198564659494124, "iter_time": 0.5263226776123048, "loss": 0.7453526258468628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.491820163305945, "step_time": 0.4855309429168701} +{"epoch": 0, "iter": 17245, "iter_tflops": 16.833545468140727, "iter_time": 1.2255940704345702, "loss": 0.6611550450325012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.9452426324293, "step_time": 1.1496692428588868} +{"epoch": 0, "iter": 17246, "iter_tflops": 13.590544056161976, "iter_time": 1.5180476531982423, "loss": 0.7068098783493042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.104087995658084, "step_time": 1.1395820388793945} +{"epoch": 0, "iter": 17247, "iter_tflops": 43.58081807711337, "iter_time": 0.4733984909057617, "loss": 0.7071982622146606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.094232594885234, "step_time": 0.4380811061859131} +{"epoch": 0, "iter": 17248, "iter_tflops": 41.91829799501767, "iter_time": 0.4921739311218261, "loss": 0.6015316843986511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95571325882381, "step_time": 0.45892039108276367} +{"epoch": 0, "iter": 17249, "iter_tflops": 16.65738239515315, "iter_time": 0.8606606445312499, "loss": 0.10463390499353409, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 17.48902299599837, "step_time": 0.8197343826293945} +{"epoch": 0, "iter": 17250, "iter_tflops": 8.062744277008393, "iter_time": 1.7780984954833985, "loss": 0.1271732598543167, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 11.496921056169821, "step_time": 1.2469732894897458} +{"epoch": 0, "iter": 17251, "iter_tflops": 25.61692474311801, "iter_time": 0.5596438140869141, "loss": 0.1214616447687149, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 28.196685650262882, "step_time": 0.5084410858154297} +{"epoch": 0, "iter": 17252, "iter_tflops": 26.677378183358275, "iter_time": 0.5373973922729492, "loss": 0.08165726065635681, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 29.30660903332143, "step_time": 0.4891849975585938} +{"epoch": 0, "iter": 17253, "iter_tflops": 18.641063293902047, "iter_time": 1.1067551879882813, "loss": 0.4891969859600067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.022533717619982, "step_time": 1.0303937454223633} +{"epoch": 0, "iter": 17254, "iter_tflops": 22.679279233759054, "iter_time": 0.9096891174316406, "loss": 0.7586885094642639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.60733248466922, "step_time": 0.8056713256835938} +{"epoch": 0, "iter": 17255, "iter_tflops": 48.04356016999023, "iter_time": 0.4294247436523438, "loss": 0.5408889651298523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.11763960017026, "step_time": 0.39585625267028807} +{"epoch": 0, "iter": 17256, "iter_tflops": 47.72658283780073, "iter_time": 0.4322767791748047, "loss": 0.5153417587280273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.63210950049663, "step_time": 0.3995787448883057} +{"epoch": 0, "iter": 17257, "iter_tflops": 31.434189528339825, "iter_time": 0.6563265609741211, "loss": 0.031504303216934204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.490515947864516, "step_time": 0.6160279388427734} +{"epoch": 0, "iter": 17258, "iter_tflops": 8.243790883058407, "iter_time": 2.502622131347656, "loss": 0.017369035631418228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.158825144622131, "step_time": 2.0308542785644534} +{"epoch": 0, "iter": 17259, "iter_tflops": 20.299898449123187, "iter_time": 1.0163151092529297, "loss": 0.0379951111972332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.39541797182285, "step_time": 0.8456954307556154} +{"epoch": 0, "iter": 17260, "iter_tflops": 51.555531349125474, "iter_time": 0.40017226028442376, "loss": 0.03860541433095932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.268378475330884, "step_time": 0.3666551990509033} +{"epoch": 0, "iter": 17261, "iter_tflops": 19.53682332973264, "iter_time": 0.7567878875732422, "loss": 0.26050519943237305, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 20.64738997947958, "step_time": 0.7160823364257812} +{"epoch": 0, "iter": 17262, "iter_tflops": 7.611086898305486, "iter_time": 1.94259130859375, "loss": 0.1558922678232193, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 8.912819055935373, "step_time": 1.6588725929260253} +{"epoch": 0, "iter": 17263, "iter_tflops": 23.227159772546663, "iter_time": 0.6365492553710937, "loss": 0.14656153321266174, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 25.012983781947227, "step_time": 0.5911022605895997} +{"epoch": 0, "iter": 17264, "iter_tflops": 22.51621454333067, "iter_time": 0.656648178100586, "loss": 0.20631924271583557, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 24.19828985687582, "step_time": 0.6110031471252442} +{"epoch": 0, "iter": 17265, "iter_tflops": 19.22264631912756, "iter_time": 1.0732702026367187, "loss": 0.5634199976921082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.773814107887645, "step_time": 0.9931297836303711} +{"epoch": 0, "iter": 17266, "iter_tflops": 18.46065853934894, "iter_time": 1.1175708312988282, "loss": 0.4287378489971161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.631623143384225, "step_time": 0.9116046772003175} +{"epoch": 0, "iter": 17267, "iter_tflops": 40.807305982970256, "iter_time": 0.5055735244750976, "loss": 0.7200807929039001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87891170460989, "step_time": 0.47018243408203125} +{"epoch": 0, "iter": 17268, "iter_tflops": 45.10465661256461, "iter_time": 0.45740495681762694, "loss": 0.6176048517227173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61332888579109, "step_time": 0.4243917045593262} +{"epoch": 0, "iter": 17269, "iter_tflops": 28.726167787088905, "iter_time": 0.7181986007690431, "loss": 0.1384701132774353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.384583198226075, "step_time": 0.6789987335205079} +{"epoch": 0, "iter": 17270, "iter_tflops": 19.12088658179449, "iter_time": 1.0789820556640626, "loss": 0.13803422451019287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.308113587499406, "step_time": 0.8851464290618897} +{"epoch": 0, "iter": 17271, "iter_tflops": 38.56354722175211, "iter_time": 0.534989517211914, "loss": 0.23013842105865479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.200271838097976, "step_time": 0.48888532257080075} +{"epoch": 0, "iter": 17272, "iter_tflops": 41.54721265270684, "iter_time": 0.4965698585510254, "loss": 0.30904778838157654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.40689223265177, "step_time": 0.4543603954315186} +{"epoch": 0, "iter": 17273, "iter_tflops": 20.751992007648962, "iter_time": 0.9941741256713866, "loss": 0.6670240759849548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.02308928853687, "step_time": 0.9367938003540038} +{"epoch": 0, "iter": 17274, "iter_tflops": 14.544578154112168, "iter_time": 1.41847314453125, "loss": 0.5348044633865356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.995615557215473, "step_time": 1.0317808647155762} +{"epoch": 0, "iter": 17275, "iter_tflops": 40.27344014305518, "iter_time": 0.5122754211425782, "loss": 0.7190849781036377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.71923525966692, "step_time": 0.47189968872070315} +{"epoch": 0, "iter": 17276, "iter_tflops": 37.30772165831307, "iter_time": 0.5529979476928711, "loss": 0.7206898331642151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.648556691309, "step_time": 0.507547996520996} +{"epoch": 0, "iter": 17277, "iter_tflops": 18.136614405083506, "iter_time": 1.1375382995605468, "loss": 0.5187727808952332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.19057587220357, "step_time": 1.075063804626465} +{"epoch": 0, "iter": 17278, "iter_tflops": 35.25669052353159, "iter_time": 0.5851681823730469, "loss": 0.5016183257102966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.2948996409703, "step_time": 0.4877915229797364} +{"epoch": 0, "iter": 17279, "iter_tflops": 43.39384893243588, "iter_time": 0.4754382019042969, "loss": 0.5177233815193176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.5709763072109, "step_time": 0.4430032424926758} +{"epoch": 0, "iter": 17280, "iter_tflops": 45.810200614061145, "iter_time": 0.45036025238037114, "loss": 0.7185373306274414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.41064297721772, "step_time": 0.41754351425170894} +{"epoch": 0, "iter": 17281, "iter_tflops": 31.551751719666107, "iter_time": 0.6538810806274414, "loss": 0.3428003191947937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.77932506912592, "step_time": 0.6107609748840332} +{"epoch": 0, "iter": 17282, "iter_tflops": 33.818316162810774, "iter_time": 0.6100567932128906, "loss": 0.29873859882354736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32924766645717, "step_time": 0.5526790599822998} +{"epoch": 0, "iter": 17283, "iter_tflops": 39.18797780730706, "iter_time": 0.5264648666381836, "loss": 0.2502489984035492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95838221715662, "step_time": 0.4802576923370362} +{"epoch": 0, "iter": 17284, "iter_tflops": 41.02485619897058, "iter_time": 0.5028925247192383, "loss": 0.37201517820358276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.98719092048719, "step_time": 0.45859928321838384} +{"epoch": 0, "iter": 17285, "iter_tflops": 17.275518567739095, "iter_time": 1.1942387390136717, "loss": 0.5698093771934509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.144513444685277, "step_time": 1.137043083190918} +{"epoch": 0, "iter": 17286, "iter_tflops": 15.526794238064564, "iter_time": 1.3287413482666015, "loss": 0.5958636999130249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.301521885358405, "step_time": 1.1272884101867675} +{"epoch": 0, "iter": 17287, "iter_tflops": 37.88320205059371, "iter_time": 0.5445974044799805, "loss": 0.5849380493164062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.32115948883111, "step_time": 0.499286413192749} +{"epoch": 0, "iter": 17288, "iter_tflops": 38.760561146623715, "iter_time": 0.5322702484130859, "loss": 0.5960395336151123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.8575045743411, "step_time": 0.4928887596130371} +{"epoch": 0, "iter": 17289, "iter_tflops": 17.654055324599106, "iter_time": 1.1686319732666015, "loss": 0.09509370476007462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.609423149867684, "step_time": 1.1086369171142578} +{"epoch": 0, "iter": 17290, "iter_tflops": 31.004039203563465, "iter_time": 0.6654324417114259, "loss": 0.12796732783317566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.56013682237256, "step_time": 0.5215121879577637} +{"epoch": 0, "iter": 17291, "iter_tflops": 39.81671088449894, "iter_time": 0.518151626586914, "loss": 0.07472889125347137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.550776995515136, "step_time": 0.4737250385284424} +{"epoch": 0, "iter": 17292, "iter_tflops": 46.314582461402225, "iter_time": 0.4454556732177734, "loss": 0.14169619977474213, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.84008117976839, "step_time": 0.4058037090301514} +{"epoch": 0, "iter": 17293, "iter_tflops": 19.50259311254123, "iter_time": 1.0578641204833983, "loss": 0.66264808177948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.847403131577227, "step_time": 0.9896241455078125} +{"epoch": 0, "iter": 17294, "iter_tflops": 21.924697676539186, "iter_time": 0.9409978561401366, "loss": 0.6547806262969971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.828457054159337, "step_time": 0.8309454536437989} +{"epoch": 0, "iter": 17295, "iter_tflops": 41.61805229390153, "iter_time": 0.4957246284484863, "loss": 0.6001338362693787, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.18817010814609, "step_time": 0.4565596141815186} +{"epoch": 0, "iter": 17296, "iter_tflops": 37.717798276877524, "iter_time": 0.5469856262207031, "loss": 0.8666292428970337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.973325109526016, "step_time": 0.5035249996185304} +{"epoch": 0, "iter": 17297, "iter_tflops": 38.32530089670974, "iter_time": 0.5383152389526368, "loss": 0.0039027314633131027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70233273759673, "step_time": 0.4831373882293701} +{"epoch": 0, "iter": 17298, "iter_tflops": 39.66425079832229, "iter_time": 0.5201432800292968, "loss": 0.0034988808911293745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.20703791326115, "step_time": 0.4666925106048584} +{"epoch": 0, "iter": 17299, "iter_tflops": 47.59045719077056, "iter_time": 0.4335132446289063, "loss": 0.0023268000222742558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76071793448908, "step_time": 0.39103132629394527} +{"epoch": 0, "iter": 17300, "iter_tflops": 43.25104787566999, "iter_time": 0.4770079460144043, "loss": 0.005294949281960726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.009539925765, "step_time": 0.42972904014587404} +{"epoch": 0, "iter": 17301, "iter_tflops": 20.952750164558122, "iter_time": 0.9846484756469727, "loss": 0.39677175879478455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.51242153249024, "step_time": 0.9164315567016603} +{"epoch": 0, "iter": 17302, "iter_tflops": 15.934827179094954, "iter_time": 1.2947171173095702, "loss": 0.42175108194351196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.96744197312202, "step_time": 1.0332366828918458} +{"epoch": 0, "iter": 17303, "iter_tflops": 41.382144471365336, "iter_time": 0.4985506134033203, "loss": 0.7707306742668152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55996287900969, "step_time": 0.4629962005615234} +{"epoch": 0, "iter": 17304, "iter_tflops": 48.03820377366259, "iter_time": 0.4294726257324219, "loss": 0.6676276922225952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.736167697262374, "step_time": 0.3987750625610351} +{"epoch": 0, "iter": 17305, "iter_tflops": 40.90812196881318, "iter_time": 0.504327564239502, "loss": 0.6996722221374512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31814913230263, "step_time": 0.46552245330810554} +{"epoch": 0, "iter": 17306, "iter_tflops": 33.680095341977335, "iter_time": 0.6125604248046874, "loss": 0.7709920406341553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.50660680277764, "step_time": 0.5500655822753906} +{"epoch": 0, "iter": 17307, "iter_tflops": 38.19386404634841, "iter_time": 0.5401677474975586, "loss": 0.6200330853462219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.56589630671556, "step_time": 0.4963466529846191} +{"epoch": 0, "iter": 17308, "iter_tflops": 39.135650845521354, "iter_time": 0.5271687850952148, "loss": 0.5337904691696167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45820660255839, "step_time": 0.48591533088684075} +{"epoch": 0, "iter": 17309, "iter_tflops": 31.866109388475287, "iter_time": 0.5282345733642577, "loss": 0.046717096120119095, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 35.55815427552706, "step_time": 0.47338735771179197} +{"epoch": 0, "iter": 17310, "iter_tflops": 40.418878796436225, "iter_time": 0.4164583778381348, "loss": 0.054293982684612274, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 45.40480779118677, "step_time": 0.3707268352508545} +{"epoch": 0, "iter": 17311, "iter_tflops": 42.1786615917797, "iter_time": 0.3990828552246094, "loss": 0.029504502192139626, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 46.07696793138506, "step_time": 0.3653187580108642} +{"epoch": 0, "iter": 17312, "iter_tflops": 39.7505709900498, "iter_time": 0.4234600982666016, "loss": 0.03947598859667778, "lr": 3e-05, "seqlen": 6720.0, "step_tflops": 43.432200545997084, "step_time": 0.3875645370483398} +{"epoch": 0, "iter": 17313, "iter_tflops": 39.92839605796371, "iter_time": 0.5167022857666015, "loss": 0.5366271138191223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.43081695071796, "step_time": 0.47503351211547845} +{"epoch": 0, "iter": 17314, "iter_tflops": 43.321897470449166, "iter_time": 0.47622783660888673, "loss": 0.7055588960647583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.625915528939096, "step_time": 0.4424812526702881} +{"epoch": 0, "iter": 17315, "iter_tflops": 47.27915907063781, "iter_time": 0.43636760711669925, "loss": 0.6982308030128479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.95900627744281, "step_time": 0.40485666847229} +{"epoch": 0, "iter": 17316, "iter_tflops": 46.202584248353155, "iter_time": 0.44653548812866206, "loss": 0.7267709970474243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62578615504374, "step_time": 0.4157333335876465} +{"epoch": 0, "iter": 17317, "iter_tflops": 28.871300348359156, "iter_time": 0.7145883026123047, "loss": 0.12214354425668716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.649771290485486, "step_time": 0.6731238975524902} +{"epoch": 0, "iter": 17318, "iter_tflops": 43.68059577097297, "iter_time": 0.4723171272277832, "loss": 0.12457513064146042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.41466887084779, "step_time": 0.4261331119537354} +{"epoch": 0, "iter": 17319, "iter_tflops": 48.97377020024062, "iter_time": 0.42126823043823247, "loss": 0.11107593774795532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.048246251665084, "step_time": 0.38891188621520995} +{"epoch": 0, "iter": 17320, "iter_tflops": 49.67657448390869, "iter_time": 0.4153082962036133, "loss": 0.12908615171909332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.666845436069, "step_time": 0.384429033279419} +{"epoch": 0, "iter": 17321, "iter_tflops": 40.99748412215694, "iter_time": 0.405573543548584, "loss": 0.008034255355596542, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 45.68132946910132, "step_time": 0.3639888572692871} +{"epoch": 0, "iter": 17322, "iter_tflops": 44.48732217615659, "iter_time": 0.3737580528259277, "loss": 0.0207076333463192, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 48.985251988079206, "step_time": 0.3394387950897217} +{"epoch": 0, "iter": 17323, "iter_tflops": 41.92924023111968, "iter_time": 0.3965608444213867, "loss": 0.0030225457157939672, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 45.59451054952595, "step_time": 0.3646819477081299} +{"epoch": 0, "iter": 17324, "iter_tflops": 42.43216376419419, "iter_time": 0.3918606414794922, "loss": 0.014665537513792515, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 46.68768841319433, "step_time": 0.3561430320739746} +{"epoch": 0, "iter": 17325, "iter_tflops": 36.59769329183342, "iter_time": 0.5637266082763671, "loss": 0.12442313134670258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.483127400583236, "step_time": 0.5225293655395508} +{"epoch": 0, "iter": 17326, "iter_tflops": 9.936203793049588, "iter_time": 2.076355712890625, "loss": 0.1662272959947586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.302586395740477, "step_time": 1.5509084396362305} +{"epoch": 0, "iter": 17327, "iter_tflops": 13.350196488458597, "iter_time": 1.5453775177001952, "loss": 0.20899781584739685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.751888096530246, "step_time": 1.3097536869049071} +{"epoch": 0, "iter": 17328, "iter_tflops": 38.243242597960425, "iter_time": 0.5394702987670899, "loss": 0.19361327588558197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.13272612214465, "step_time": 0.4896690864562988} +{"epoch": 0, "iter": 17329, "iter_tflops": 9.949241047187996, "iter_time": 1.617641372680664, "loss": 0.2952394485473633, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 10.34857149035602, "step_time": 1.5552198638916015} +{"epoch": 0, "iter": 17330, "iter_tflops": 13.96562522833606, "iter_time": 1.1524227294921876, "loss": 0.2475077211856842, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 19.8879807260385, "step_time": 0.8092477645874023} +{"epoch": 0, "iter": 17331, "iter_tflops": 26.74804642971876, "iter_time": 0.6017001647949218, "loss": 0.16415074467658997, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 28.65271639697541, "step_time": 0.5617025527954103} +{"epoch": 0, "iter": 17332, "iter_tflops": 23.0561780519595, "iter_time": 0.6980473480224609, "loss": 0.12797918915748596, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 24.964170583734948, "step_time": 0.6446961212158202} +{"epoch": 0, "iter": 17333, "iter_tflops": 33.78050849325806, "iter_time": 0.6107395782470704, "loss": 0.5074175000190735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.38953805968487, "step_time": 0.5517878684997558} +{"epoch": 0, "iter": 17334, "iter_tflops": 38.920678476599264, "iter_time": 0.5300805206298828, "loss": 0.4551561772823334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.16622726608569, "step_time": 0.477945255279541} +{"epoch": 0, "iter": 17335, "iter_tflops": 41.082491240730185, "iter_time": 0.5021870117187499, "loss": 0.492824524641037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.60860054092995, "step_time": 0.46249138641357423} +{"epoch": 0, "iter": 17336, "iter_tflops": 35.628817370305875, "iter_time": 0.5790563659667969, "loss": 0.5965175628662109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.019365940502155, "step_time": 0.5287398452758789} +{"epoch": 0, "iter": 17337, "iter_tflops": 19.138135762861587, "iter_time": 1.0173852691650391, "loss": 0.10010210424661636, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 20.38761685285142, "step_time": 0.9550335159301758} +{"epoch": 0, "iter": 17338, "iter_tflops": 21.071711526821048, "iter_time": 0.9240282821655276, "loss": 0.09900850057601929, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 23.707658670918324, "step_time": 0.8212897644042969} +{"epoch": 0, "iter": 17339, "iter_tflops": 46.855843862085436, "iter_time": 0.4155481109619141, "loss": 0.12799951434135437, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 51.21859628281592, "step_time": 0.38015210914611813} +{"epoch": 0, "iter": 17340, "iter_tflops": 46.28439979405597, "iter_time": 0.42067861938476564, "loss": 0.09570372849702835, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 50.02854560112655, "step_time": 0.38919495201110843} +{"epoch": 0, "iter": 17341, "iter_tflops": 36.90333982458644, "iter_time": 0.5590576248168946, "loss": 0.19312942028045654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8279308797895, "step_time": 0.5180056571960449} +{"epoch": 0, "iter": 17342, "iter_tflops": 45.38709832235019, "iter_time": 0.4545585479736328, "loss": 0.2720610499382019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.72359790519449, "step_time": 0.41491554069519043} +{"epoch": 0, "iter": 17343, "iter_tflops": 50.98257516449084, "iter_time": 0.4046695060729981, "loss": 0.4155450165271759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.23018034917856, "step_time": 0.37354745864868166} +{"epoch": 0, "iter": 17344, "iter_tflops": 53.16309601738779, "iter_time": 0.3880717086791992, "loss": 0.256621778011322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.63841280483258, "step_time": 0.3579400005340576} +{"epoch": 0, "iter": 17345, "iter_tflops": 44.430123064327034, "iter_time": 0.4643492317199707, "loss": 0.26782938838005066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54524965549024, "step_time": 0.4249868659973145} +{"epoch": 0, "iter": 17346, "iter_tflops": 44.8416154683616, "iter_time": 0.46008809661865235, "loss": 0.250764936208725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.978227929731304, "step_time": 0.4212298889160156} +{"epoch": 0, "iter": 17347, "iter_tflops": 46.61119303942665, "iter_time": 0.4426210136413574, "loss": 0.15222707390785217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.46274318495594, "step_time": 0.4088381290435791} +{"epoch": 0, "iter": 17348, "iter_tflops": 46.42673001806408, "iter_time": 0.444379638671875, "loss": 0.153756782412529, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3566197473478, "step_time": 0.4096997299194336} +{"epoch": 0, "iter": 17349, "iter_tflops": 28.902245532736536, "iter_time": 0.7138232040405272, "loss": 0.41766157746315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.496450984301703, "step_time": 0.676508014678955} +{"epoch": 0, "iter": 17350, "iter_tflops": 11.457668676333965, "iter_time": 1.8006362457275389, "loss": 0.4083390235900879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.284178736005371, "step_time": 1.4443317947387697} +{"epoch": 0, "iter": 17351, "iter_tflops": 10.840377957109206, "iter_time": 1.903171051025391, "loss": 0.5017584562301636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.378793206113137, "step_time": 1.5420743255615235} +{"epoch": 0, "iter": 17352, "iter_tflops": 31.788062060968976, "iter_time": 0.6490201721191406, "loss": 0.5416849255561829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.999688230551236, "step_time": 0.5032012290954591} +{"epoch": 0, "iter": 17353, "iter_tflops": 16.88072389231003, "iter_time": 0.8516895980834961, "loss": 0.16757832467556, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 17.774202184297696, "step_time": 0.8088766403198242} +{"epoch": 0, "iter": 17354, "iter_tflops": 10.861651976831618, "iter_time": 1.3236602478027344, "loss": 0.19012212753295898, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 11.961344250813555, "step_time": 1.2019666557312012} +{"epoch": 0, "iter": 17355, "iter_tflops": 23.90786211972332, "iter_time": 0.6013560256958008, "loss": 0.17919273674488068, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 25.416711347191224, "step_time": 0.5656568527221679} +{"epoch": 0, "iter": 17356, "iter_tflops": 24.635206539776068, "iter_time": 0.5836012344360352, "loss": 0.3437212109565735, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 26.208812377447718, "step_time": 0.5485611763000489} +{"epoch": 0, "iter": 17357, "iter_tflops": 13.801153287246313, "iter_time": 0.8357047119140626, "loss": 0.00563544686883688, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 14.526147329391124, "step_time": 0.7939950332641602} +{"epoch": 0, "iter": 17358, "iter_tflops": 8.654629363552482, "iter_time": 1.3326612091064454, "loss": 0.008018720895051956, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 10.307286135975582, "step_time": 1.1189840545654297} +{"epoch": 0, "iter": 17359, "iter_tflops": 31.4467730808275, "iter_time": 0.36676859664916994, "loss": 0.009990724734961987, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 34.56987509087201, "step_time": 0.33363409042358405} +{"epoch": 0, "iter": 17360, "iter_tflops": 32.01676808009411, "iter_time": 0.36023900985717777, "loss": 0.013364508748054504, "lr": 3e-05, "seqlen": 4640.0, "step_tflops": 35.0411385986952, "step_time": 0.3291470909118652} +{"epoch": 0, "iter": 17361, "iter_tflops": 34.52050989726379, "iter_time": 0.5976474151611328, "loss": 0.12135644257068634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.795653357056906, "step_time": 0.5606937675476075} +{"epoch": 0, "iter": 17362, "iter_tflops": 9.284641179914138, "iter_time": 2.222066864013672, "loss": 0.07937982678413391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.607174913655134, "step_time": 1.9450130386352538} +{"epoch": 0, "iter": 17363, "iter_tflops": 12.075362094527957, "iter_time": 1.7085279388427737, "loss": 0.06646600365638733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.93350755943027, "step_time": 1.4806819763183592} +{"epoch": 0, "iter": 17364, "iter_tflops": 21.857446827255227, "iter_time": 0.9438931121826172, "loss": 0.1800878345966339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.0304151080236, "step_time": 0.6062545356750487} +{"epoch": 0, "iter": 17365, "iter_tflops": 12.871463292531269, "iter_time": 1.2694976959228514, "loss": 0.373405396938324, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 13.56051713183514, "step_time": 1.2049904022216797} +{"epoch": 0, "iter": 17366, "iter_tflops": 18.41342932449002, "iter_time": 0.8874117202758789, "loss": 0.2652682960033417, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 21.144484831527535, "step_time": 0.7727922019958495} +{"epoch": 0, "iter": 17367, "iter_tflops": 23.943056171602443, "iter_time": 0.6824647979736328, "loss": 0.20678123831748962, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 25.841674207780805, "step_time": 0.6323233108520507} +{"epoch": 0, "iter": 17368, "iter_tflops": 24.430285815289302, "iter_time": 0.6688539428710938, "loss": 0.23231267929077148, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.258542152868717, "step_time": 0.6222848510742187} +{"epoch": 0, "iter": 17369, "iter_tflops": 15.509534882358912, "iter_time": 1.330220001220703, "loss": 0.1519087553024292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.434877816872245, "step_time": 1.25532381439209} +{"epoch": 0, "iter": 17370, "iter_tflops": 26.60707891904533, "iter_time": 0.775398666381836, "loss": 0.18390502035617828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.193386801725, "step_time": 0.6215422859191895} +{"epoch": 0, "iter": 17371, "iter_tflops": 37.20903084159502, "iter_time": 0.5544646835327148, "loss": 0.11290121078491211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96023978391917, "step_time": 0.5036858577728272} +{"epoch": 0, "iter": 17372, "iter_tflops": 40.74314247476388, "iter_time": 0.5063697166442872, "loss": 0.1852702498435974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55633660056117, "step_time": 0.46303388214111324} +{"epoch": 0, "iter": 17373, "iter_tflops": 31.348895321313933, "iter_time": 0.6581122970581055, "loss": 0.47677358984947205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.60219859322544, "step_time": 0.5962364921569825} +{"epoch": 0, "iter": 17374, "iter_tflops": 37.570703714415764, "iter_time": 0.5491271514892578, "loss": 0.5757369995117188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.13248500272107, "step_time": 0.5015766372680664} +{"epoch": 0, "iter": 17375, "iter_tflops": 42.17078415716646, "iter_time": 0.48922717285156253, "loss": 0.5139330625534058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.91367369187585, "step_time": 0.44934530067443845} +{"epoch": 0, "iter": 17376, "iter_tflops": 40.453867985421205, "iter_time": 0.5099906272888184, "loss": 0.4689040184020996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.13417830083985, "step_time": 0.4674629573822021} +{"epoch": 0, "iter": 17377, "iter_tflops": 11.563476202837997, "iter_time": 1.2186432037353516, "loss": 0.0006428410997614264, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 12.23763358431724, "step_time": 1.1515095291137694} +{"epoch": 0, "iter": 17378, "iter_tflops": 17.347781282779348, "iter_time": 0.8123085861206054, "loss": 0.024374745786190033, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 26.224811083858377, "step_time": 0.537344259262085} +{"epoch": 0, "iter": 17379, "iter_tflops": 42.14622212539239, "iter_time": 0.3343538513183594, "loss": 0.003939096815884113, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 46.51342323967301, "step_time": 0.30296096706390385} +{"epoch": 0, "iter": 17380, "iter_tflops": 40.434661682099616, "iter_time": 0.3485067291259766, "loss": 0.0010085515677928925, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 44.3779822620673, "step_time": 0.3175392608642578} +{"epoch": 0, "iter": 17381, "iter_tflops": 32.32962882091794, "iter_time": 0.6330143508911132, "loss": 0.05159720778465271, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 34.46229083402912, "step_time": 0.5938409347534179} +{"epoch": 0, "iter": 17382, "iter_tflops": 13.767875359691525, "iter_time": 1.4864398803710936, "loss": 0.061156388372182846, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 14.893150540390476, "step_time": 1.3741296005249022} +{"epoch": 0, "iter": 17383, "iter_tflops": 14.17168386647714, "iter_time": 1.444085205078125, "loss": 0.07933326065540314, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 15.855359231905316, "step_time": 1.2907382736206054} +{"epoch": 0, "iter": 17384, "iter_tflops": 40.99333650556335, "iter_time": 0.4992303810119629, "loss": 0.06847607344388962, "lr": 3e-05, "seqlen": 8128.0, "step_tflops": 45.23249167178176, "step_time": 0.4524428844451905} +{"epoch": 0, "iter": 17385, "iter_tflops": 16.52705410476523, "iter_time": 0.951508888244629, "loss": 0.31624147295951843, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 17.486084062404906, "step_time": 0.8993230743408203} +{"epoch": 0, "iter": 17386, "iter_tflops": 19.106821496287065, "iter_time": 0.8230379333496094, "loss": 0.2203725427389145, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.42860312557559, "step_time": 0.6712153854370118} +{"epoch": 0, "iter": 17387, "iter_tflops": 28.206244370361674, "iter_time": 0.5575233154296875, "loss": 0.20340083539485931, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 30.084264098443064, "step_time": 0.5227197456359864} +{"epoch": 0, "iter": 17388, "iter_tflops": 29.517083228935377, "iter_time": 0.5327639846801757, "loss": 0.14916501939296722, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 31.379205079127892, "step_time": 0.5011484146118164} +{"epoch": 0, "iter": 17389, "iter_tflops": 28.36039547301095, "iter_time": 0.7274614181518555, "loss": 0.49372240900993347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.963098006972515, "step_time": 0.6885500793457032} +{"epoch": 0, "iter": 17390, "iter_tflops": 31.58681525679795, "iter_time": 0.6531552276611328, "loss": 0.4537277817726135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.06414030285406, "step_time": 0.5420086555480956} +{"epoch": 0, "iter": 17391, "iter_tflops": 35.64444872611785, "iter_time": 0.5788024291992188, "loss": 0.7849959135055542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.93976494717235, "step_time": 0.5298206996917725} +{"epoch": 0, "iter": 17392, "iter_tflops": 36.3836407782913, "iter_time": 0.5670431289672851, "loss": 0.5917019248008728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.43256843298694, "step_time": 0.5231993331909179} +{"epoch": 0, "iter": 17393, "iter_tflops": 21.272430712066246, "iter_time": 0.9698512496948243, "loss": 0.1310100257396698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.161251552032656, "step_time": 0.8907590103149414} +{"epoch": 0, "iter": 17394, "iter_tflops": 27.422870361694397, "iter_time": 0.752331657409668, "loss": 0.07354431599378586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.17334119578151, "step_time": 0.6618184871673584} +{"epoch": 0, "iter": 17395, "iter_tflops": 46.92742679473959, "iter_time": 0.4396382865905762, "loss": 0.09099841862916946, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.355182862702364, "step_time": 0.40173342514038085} +{"epoch": 0, "iter": 17396, "iter_tflops": 46.84560751599893, "iter_time": 0.4404061470031738, "loss": 0.09517920762300491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77090863152889, "step_time": 0.4063565940856933} +{"epoch": 0, "iter": 17397, "iter_tflops": 30.542556777038495, "iter_time": 0.675486785888672, "loss": 0.47030824422836304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.36965296593087, "step_time": 0.6373591194152832} +{"epoch": 0, "iter": 17398, "iter_tflops": 20.833870722136055, "iter_time": 0.9902669448852539, "loss": 0.4285873472690582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.23470870135817, "step_time": 0.8879428520202637} +{"epoch": 0, "iter": 17399, "iter_tflops": 45.05808525408553, "iter_time": 0.45787772369384766, "loss": 0.46499595046043396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.602888711611115, "step_time": 0.42448286628723136} +{"epoch": 0, "iter": 17400, "iter_tflops": 45.61959749059356, "iter_time": 0.4522419013977051, "loss": 0.5451744794845581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39764443165138, "step_time": 0.4176533870697021} +{"epoch": 0, "iter": 17401, "iter_tflops": 40.47661786334581, "iter_time": 0.509703987121582, "loss": 0.0031012657564133406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77327171746146, "step_time": 0.4713171463012696} +{"epoch": 0, "iter": 17402, "iter_tflops": 14.588225736403466, "iter_time": 1.4142291107177734, "loss": 0.004796611610800028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.981840807693075, "step_time": 1.086885814666748} +{"epoch": 0, "iter": 17403, "iter_tflops": 43.58880219031187, "iter_time": 0.47331177902221677, "loss": 0.003580809338018298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.2504501869338, "step_time": 0.4275834407806396} +{"epoch": 0, "iter": 17404, "iter_tflops": 43.279017915895324, "iter_time": 0.47669966888427734, "loss": 0.003138328669592738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73422747430774, "step_time": 0.43220755004882816} +{"epoch": 0, "iter": 17405, "iter_tflops": 18.53415124477894, "iter_time": 1.1131393737792972, "loss": 0.27936089038848877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.88456487817165, "step_time": 1.0375431213378907} +{"epoch": 0, "iter": 17406, "iter_tflops": 30.51159318411455, "iter_time": 0.6761722793579101, "loss": 0.33311915397644043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.054526648228006, "step_time": 0.5885429210662841} +{"epoch": 0, "iter": 17407, "iter_tflops": 38.88860765500321, "iter_time": 0.5305176696777344, "loss": 0.33920034766197205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.72518916585801, "step_time": 0.4828789272308349} +{"epoch": 0, "iter": 17408, "iter_tflops": 38.91492402453171, "iter_time": 0.5301589050292969, "loss": 0.4077349603176117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7206819963212, "step_time": 0.4829298725128174} +{"epoch": 0, "iter": 17409, "iter_tflops": 21.043972290462705, "iter_time": 0.9803801879882813, "loss": 0.09405675530433655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.359058684310572, "step_time": 0.9227174453735353} +{"epoch": 0, "iter": 17410, "iter_tflops": 26.884062410328873, "iter_time": 0.7674098205566406, "loss": 0.1152803897857666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.637876602212465, "step_time": 0.520489372253418} +{"epoch": 0, "iter": 17411, "iter_tflops": 52.4766163309464, "iter_time": 0.39314831924438476, "loss": 0.07005827128887177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.12958821024344, "step_time": 0.3611279926300049} +{"epoch": 0, "iter": 17412, "iter_tflops": 51.757807286704036, "iter_time": 0.39860833740234375, "loss": 0.10609745979309082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.47631999174782, "step_time": 0.3653052024841309} +{"epoch": 0, "iter": 17413, "iter_tflops": 28.136187852903312, "iter_time": 0.7332583084106445, "loss": 0.6141157150268555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.71895392235707, "step_time": 0.6942065849304199} +{"epoch": 0, "iter": 17414, "iter_tflops": 14.703175949538592, "iter_time": 1.403172592163086, "loss": 0.6063568592071533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.608013695766786, "step_time": 1.1716877250671387} +{"epoch": 0, "iter": 17415, "iter_tflops": 30.68775981992784, "iter_time": 0.6722906341552735, "loss": 0.5789183974266052, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.69112726147477, "step_time": 0.5473726844787598} +{"epoch": 0, "iter": 17416, "iter_tflops": 37.04588372477619, "iter_time": 0.5569065017700195, "loss": 0.4624936282634735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.355800398636326, "step_time": 0.5112299423217773} +{"epoch": 0, "iter": 17417, "iter_tflops": 23.87244297296416, "iter_time": 0.8642221298217774, "loss": 0.6681324243545532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.445207493505038, "step_time": 0.8108046874999999} +{"epoch": 0, "iter": 17418, "iter_tflops": 8.961287853176788, "iter_time": 2.3022464904785154, "loss": 0.8215606212615967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.078847952651108, "step_time": 1.8622056732177734} +{"epoch": 0, "iter": 17419, "iter_tflops": 12.064650456018791, "iter_time": 1.7100448608398435, "loss": 0.7020149827003479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.911693098986895, "step_time": 1.4830037841796875} +{"epoch": 0, "iter": 17420, "iter_tflops": 43.47298666612064, "iter_time": 0.4745727195739746, "loss": 0.6358543634414673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.077463654308765, "step_time": 0.4382371501922608} +{"epoch": 0, "iter": 17421, "iter_tflops": 19.420391255009566, "iter_time": 0.8604149551391601, "loss": 0.2095886766910553, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 20.399912693597617, "step_time": 0.819101303100586} +{"epoch": 0, "iter": 17422, "iter_tflops": 9.386278681100231, "iter_time": 1.780215103149414, "loss": 0.1874464452266693, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 11.510227617016033, "step_time": 1.451717170715332} +{"epoch": 0, "iter": 17423, "iter_tflops": 25.06285304300315, "iter_time": 0.6667076187133789, "loss": 0.15406224131584167, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 27.06218063455171, "step_time": 0.6174519081115724} +{"epoch": 0, "iter": 17424, "iter_tflops": 28.069900082121617, "iter_time": 0.5952851638793945, "loss": 0.24871878325939178, "lr": 3e-05, "seqlen": 6672.0, "step_tflops": 30.04671215894664, "step_time": 0.5561205825805664} +{"epoch": 0, "iter": 17425, "iter_tflops": 19.24595505337945, "iter_time": 1.0719703674316405, "loss": 0.4091341495513916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.65091364171455, "step_time": 0.9990402297973634} +{"epoch": 0, "iter": 17426, "iter_tflops": 13.07959857121508, "iter_time": 1.577349136352539, "loss": 0.42297378182411194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.126207371093027, "step_time": 1.2046504554748536} +{"epoch": 0, "iter": 17427, "iter_tflops": 36.862255831693275, "iter_time": 0.5596807098388671, "loss": 0.3727826178073883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.43531986682078, "step_time": 0.5102245655059814} +{"epoch": 0, "iter": 17428, "iter_tflops": 36.19368638142151, "iter_time": 0.5700191268920898, "loss": 0.3715842068195343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.39163407881139, "step_time": 0.5237430229187012} +{"epoch": 0, "iter": 17429, "iter_tflops": 11.927930032405348, "iter_time": 1.729645751953125, "loss": 0.5719525814056396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.693476560052645, "step_time": 1.6253304138183593} +{"epoch": 0, "iter": 17430, "iter_tflops": 20.335421887426673, "iter_time": 1.0145397338867186, "loss": 0.46076807379722595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.9330174441421, "step_time": 0.8996240272521974} +{"epoch": 0, "iter": 17431, "iter_tflops": 43.597584544731255, "iter_time": 0.4732164344787598, "loss": 0.6080262660980225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90741569507555, "step_time": 0.4398258399963379} +{"epoch": 0, "iter": 17432, "iter_tflops": 48.32422725399559, "iter_time": 0.4269306449890137, "loss": 0.6091845035552979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32292716366507, "step_time": 0.39430312156677244} +{"epoch": 0, "iter": 17433, "iter_tflops": 39.841782722057104, "iter_time": 0.5178255615234375, "loss": 0.5486064553260803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33268063405958, "step_time": 0.4761093292236328} +{"epoch": 0, "iter": 17434, "iter_tflops": 36.75516709824853, "iter_time": 0.5613113784790038, "loss": 0.603965163230896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.274111778483636, "step_time": 0.5122668781280517} +{"epoch": 0, "iter": 17435, "iter_tflops": 36.31791536549561, "iter_time": 0.568069320678711, "loss": 0.6424148678779602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.08778336619905, "step_time": 0.5146478996276855} +{"epoch": 0, "iter": 17436, "iter_tflops": 40.03006710107491, "iter_time": 0.5153899307250976, "loss": 0.5999495983123779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74769665876061, "step_time": 0.471592679977417} +{"epoch": 0, "iter": 17437, "iter_tflops": 19.54606336989915, "iter_time": 1.0321692352294922, "loss": 0.024486374109983444, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 20.859877534463678, "step_time": 0.9671602935791017} +{"epoch": 0, "iter": 17438, "iter_tflops": 16.064883496814858, "iter_time": 1.2558351440429687, "loss": 0.025953637436032295, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 21.49626753059998, "step_time": 0.9385278282165528} +{"epoch": 0, "iter": 17439, "iter_tflops": 50.95882620862043, "iter_time": 0.3959048271179199, "loss": 0.009000701829791069, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 55.5078869024234, "step_time": 0.36345907592773435} +{"epoch": 0, "iter": 17440, "iter_tflops": 49.822472463967415, "iter_time": 0.4049346466064453, "loss": 0.03962430730462074, "lr": 3e-05, "seqlen": 8016.0, "step_tflops": 54.22132862338012, "step_time": 0.3720831966400146} +{"epoch": 0, "iter": 17441, "iter_tflops": 32.05576351994109, "iter_time": 0.6436001281738282, "loss": 0.42108505964279175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.234785759681344, "step_time": 0.6026353912353516} +{"epoch": 0, "iter": 17442, "iter_tflops": 13.723378923107413, "iter_time": 1.5033537750244141, "loss": 0.31484994292259216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.816172863207612, "step_time": 1.3044301986694335} +{"epoch": 0, "iter": 17443, "iter_tflops": 10.955216667539558, "iter_time": 1.883220947265625, "loss": 0.3669857978820801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.611553263396388, "step_time": 1.6358883857727051} +{"epoch": 0, "iter": 17444, "iter_tflops": 26.231453877131027, "iter_time": 0.786502098083496, "loss": 0.24188104271888733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.730455856407595, "step_time": 0.6116458549499512} +{"epoch": 0, "iter": 17445, "iter_tflops": 11.328674140393709, "iter_time": 1.442383529663086, "loss": 0.3127020001411438, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 11.973598286772447, "step_time": 1.364693603515625} +{"epoch": 0, "iter": 17446, "iter_tflops": 13.111684206285814, "iter_time": 1.2462390594482422, "loss": 0.2516147196292877, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 16.268946490880122, "step_time": 1.004385440826416} +{"epoch": 0, "iter": 17447, "iter_tflops": 25.653377896046468, "iter_time": 0.6369645767211913, "loss": 0.17037691175937653, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 27.635762082116887, "step_time": 0.5912734718322754} +{"epoch": 0, "iter": 17448, "iter_tflops": 27.191895836640352, "iter_time": 0.6009251098632813, "loss": 0.19318513572216034, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 29.073475984985432, "step_time": 0.5620343780517578} +{"epoch": 0, "iter": 17449, "iter_tflops": 20.35936697770447, "iter_time": 1.0133465118408203, "loss": 0.5944546461105347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.62117116595514, "step_time": 0.9542079544067384} +{"epoch": 0, "iter": 17450, "iter_tflops": 9.514869124468337, "iter_time": 2.16830029296875, "loss": 0.4594634175300598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.459379962818424, "step_time": 1.8003673477172852} +{"epoch": 0, "iter": 17451, "iter_tflops": 11.992796941086656, "iter_time": 1.7202904052734378, "loss": 0.4073086380958557, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.276222203346382, "step_time": 1.4451367607116699} +{"epoch": 0, "iter": 17452, "iter_tflops": 35.67198391501105, "iter_time": 0.5783556518554687, "loss": 0.5272778868675232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.85968713028636, "step_time": 0.5309124965667724} +{"epoch": 0, "iter": 17453, "iter_tflops": 16.16784258900179, "iter_time": 0.8993357009887696, "loss": 0.2514665126800537, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 17.270149895196386, "step_time": 0.8419335174560547} +{"epoch": 0, "iter": 17454, "iter_tflops": 7.8536861299875245, "iter_time": 1.8514004516601563, "loss": 0.2348541021347046, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 8.495786667945273, "step_time": 1.7114740066528318} +{"epoch": 0, "iter": 17455, "iter_tflops": 6.496454543440025, "iter_time": 2.238192840576172, "loss": 0.13899318873882294, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 8.038280432273039, "step_time": 1.8088841476440431} +{"epoch": 0, "iter": 17456, "iter_tflops": 21.08141184739237, "iter_time": 0.6897222137451172, "loss": 0.18347762525081635, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 22.607042301527667, "step_time": 0.6431764869689941} +{"epoch": 0, "iter": 17457, "iter_tflops": 16.25042892018157, "iter_time": 1.0788413543701172, "loss": 0.27068236470222473, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 17.322616584669454, "step_time": 1.0120662002563476} +{"epoch": 0, "iter": 17458, "iter_tflops": 14.520328518949524, "iter_time": 1.2073855438232421, "loss": 0.1444399356842041, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 16.759366678307263, "step_time": 1.0460797882080077} +{"epoch": 0, "iter": 17459, "iter_tflops": 30.900539703555594, "iter_time": 0.5673569107055665, "loss": 0.23957675695419312, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 33.11058277404567, "step_time": 0.5294873504638672} +{"epoch": 0, "iter": 17460, "iter_tflops": 31.043189689038993, "iter_time": 0.5647497863769531, "loss": 0.13238267600536346, "lr": 3e-05, "seqlen": 6992.0, "step_tflops": 33.05495215916071, "step_time": 0.5303784637451172} +{"epoch": 0, "iter": 17461, "iter_tflops": 27.884549073303514, "iter_time": 0.7398754577636718, "loss": 0.4279608726501465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.684926917798805, "step_time": 0.6950023345947265} +{"epoch": 0, "iter": 17462, "iter_tflops": 16.35938551269523, "iter_time": 1.2611166534423828, "loss": 0.5890101194381714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.306746130980706, "step_time": 1.068595058441162} +{"epoch": 0, "iter": 17463, "iter_tflops": 41.33220283596396, "iter_time": 0.4991530113220215, "loss": 0.693217933177948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.5409034374895, "step_time": 0.46319432067871097} +{"epoch": 0, "iter": 17464, "iter_tflops": 44.87351263467209, "iter_time": 0.45976105499267583, "loss": 0.6278955340385437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.348946636553144, "step_time": 0.42671236801147455} +{"epoch": 0, "iter": 17465, "iter_tflops": 26.73072565392522, "iter_time": 0.7718119506835939, "loss": 0.6480724215507507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.204165698152007, "step_time": 0.7314910049438477} +{"epoch": 0, "iter": 17466, "iter_tflops": 14.716394220888926, "iter_time": 1.4019122619628908, "loss": 0.6869732141494751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.89051927420803, "step_time": 1.09214009475708} +{"epoch": 0, "iter": 17467, "iter_tflops": 32.141260353423, "iter_time": 0.6418881301879883, "loss": 0.5153984427452087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.03489415871014, "step_time": 0.5888727226257324} +{"epoch": 0, "iter": 17468, "iter_tflops": 35.2564335670625, "iter_time": 0.5851724472045898, "loss": 0.5660302042961121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.2015814266899, "step_time": 0.5400586242675781} +{"epoch": 0, "iter": 17469, "iter_tflops": 15.58853195762867, "iter_time": 0.9484684829711915, "loss": 0.03454214334487915, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 16.638510660598612, "step_time": 0.8886150665283202} +{"epoch": 0, "iter": 17470, "iter_tflops": 6.116698045799337, "iter_time": 2.417191619873047, "loss": 0.03657212108373642, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 8.061145257742707, "step_time": 1.8341353225708008} +{"epoch": 0, "iter": 17471, "iter_tflops": 10.089252884369069, "iter_time": 1.465443618774414, "loss": 0.04114668816328049, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 12.567970017834858, "step_time": 1.176421588897705} +{"epoch": 0, "iter": 17472, "iter_tflops": 27.983720327390586, "iter_time": 0.5283511657714844, "loss": 0.02837049588561058, "lr": 3e-05, "seqlen": 5920.0, "step_tflops": 32.361325501527965, "step_time": 0.4568796558380127} +{"epoch": 0, "iter": 17473, "iter_tflops": 11.843839674044068, "iter_time": 1.2104481201171875, "loss": 0.1997680813074112, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 12.607884307259434, "step_time": 1.1370943069458008} +{"epoch": 0, "iter": 17474, "iter_tflops": 11.078212287748961, "iter_time": 1.294103515625, "loss": 0.40526893734931946, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 14.087919797812479, "step_time": 1.0176345176696777} +{"epoch": 0, "iter": 17475, "iter_tflops": 25.974011645091412, "iter_time": 0.5519499130249024, "loss": 0.16107895970344543, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 27.745852737991754, "step_time": 0.5167025718688966} +{"epoch": 0, "iter": 17476, "iter_tflops": 22.664146829124796, "iter_time": 0.6325565032958984, "loss": 0.1363043189048767, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 24.07840253399462, "step_time": 0.5954030151367188} +{"epoch": 0, "iter": 17477, "iter_tflops": 25.440513123868172, "iter_time": 0.8109542999267578, "loss": 0.1441957652568817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.905045929925258, "step_time": 0.7668113098144531} +{"epoch": 0, "iter": 17478, "iter_tflops": 35.444233692160616, "iter_time": 0.5820719299316406, "loss": 0.10801457613706589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.47406765156853, "step_time": 0.5226492919921876} +{"epoch": 0, "iter": 17479, "iter_tflops": 44.141273929764786, "iter_time": 0.46738781356811526, "loss": 0.17961028218269348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54620716718849, "step_time": 0.4339167041778565} +{"epoch": 0, "iter": 17480, "iter_tflops": 47.38953925501961, "iter_time": 0.4353512153625489, "loss": 0.12637539207935333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24478726693302, "step_time": 0.4025988712310791} +{"epoch": 0, "iter": 17481, "iter_tflops": 24.87631487560761, "iter_time": 0.8293468551635742, "loss": 0.032252728939056396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.20404481979144, "step_time": 0.7873247680664062} +{"epoch": 0, "iter": 17482, "iter_tflops": 15.660728289554296, "iter_time": 1.3173776550292968, "loss": 0.02603740803897381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.015605054768756, "step_time": 0.9371122646331789} +{"epoch": 0, "iter": 17483, "iter_tflops": 42.42974368527983, "iter_time": 0.48624129486083983, "loss": 0.04527236521244049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.80054029232673, "step_time": 0.44083024215698247} +{"epoch": 0, "iter": 17484, "iter_tflops": 47.66597168849026, "iter_time": 0.4328264541625977, "loss": 0.03478828817605972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.17903729684676, "step_time": 0.395390459060669} +{"epoch": 0, "iter": 17485, "iter_tflops": 33.31360696180013, "iter_time": 0.6192993011474609, "loss": 0.6496930718421936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48749591722735, "step_time": 0.5654291419982911} +{"epoch": 0, "iter": 17486, "iter_tflops": 33.932672325372295, "iter_time": 0.6080008468627929, "loss": 0.8227786421775818, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.02536857461344, "step_time": 0.5572150745391846} +{"epoch": 0, "iter": 17487, "iter_tflops": 31.041521976625514, "iter_time": 0.6646289291381836, "loss": 0.6210606098175049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.573044817776925, "step_time": 0.6145136260986327} +{"epoch": 0, "iter": 17488, "iter_tflops": 33.94877556641093, "iter_time": 0.6077124481201173, "loss": 0.737423300743103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.96672882830036, "step_time": 0.5580989761352539} +{"epoch": 0, "iter": 17489, "iter_tflops": 23.94959013896647, "iter_time": 0.8614382705688478, "loss": 0.08766347914934158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.012261662201176, "step_time": 0.7931295547485351} +{"epoch": 0, "iter": 17490, "iter_tflops": 23.076787160378487, "iter_time": 0.8940193176269532, "loss": 0.09098568558692932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.831065215961765, "step_time": 0.7412973003387451} +{"epoch": 0, "iter": 17491, "iter_tflops": 51.93441887046205, "iter_time": 0.39725280380249023, "loss": 0.0670965239405632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.69592853112605, "step_time": 0.3638902130126953} +{"epoch": 0, "iter": 17492, "iter_tflops": 51.719385791097025, "iter_time": 0.3989044570922852, "loss": 0.07779036462306976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.757412194002605, "step_time": 0.363496021270752} +{"epoch": 0, "iter": 17493, "iter_tflops": 35.05133635243984, "iter_time": 0.5885964889526367, "loss": 0.028170665726065636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.61879553663041, "step_time": 0.5484251480102539} +{"epoch": 0, "iter": 17494, "iter_tflops": 11.985664712203118, "iter_time": 1.7213140869140624, "loss": 0.058996379375457764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.184954945370672, "step_time": 1.3586535873413086} +{"epoch": 0, "iter": 17495, "iter_tflops": 51.523753472690025, "iter_time": 0.4004190711975098, "loss": 0.01265611220151186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.23709412330588, "step_time": 0.366859167098999} +{"epoch": 0, "iter": 17496, "iter_tflops": 53.029775463095845, "iter_time": 0.38904734802246094, "loss": 0.024732325226068497, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.99260208931922, "step_time": 0.355753885269165} +{"epoch": 0, "iter": 17497, "iter_tflops": 26.188944514806273, "iter_time": 0.7877787322998047, "loss": 0.586723268032074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.616633124178843, "step_time": 0.7470531768798828} +{"epoch": 0, "iter": 17498, "iter_tflops": 18.14096898459802, "iter_time": 1.1372652435302735, "loss": 0.6908515691757202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.393511001081304, "step_time": 0.8457615432739257} +{"epoch": 0, "iter": 17499, "iter_tflops": 45.07186254344655, "iter_time": 0.45773776245117187, "loss": 0.5754066109657288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.635903461303435, "step_time": 0.4241947212219238} +{"epoch": 0, "iter": 17500, "iter_tflops": 43.29254190301382, "iter_time": 0.47655075454711915, "loss": 0.5837424397468567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.998821049908486, "step_time": 0.43897044754028314} +{"epoch": 0, "iter": 17501, "iter_tflops": 28.58640598439045, "iter_time": 0.7217099456787109, "loss": 0.002977122785523534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.310695750735743, "step_time": 0.6806539077758789} +{"epoch": 0, "iter": 17502, "iter_tflops": 16.71952125472146, "iter_time": 1.233952407836914, "loss": 0.002530357101932168, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.69748204647735, "step_time": 1.1034156074523924} +{"epoch": 0, "iter": 17503, "iter_tflops": 55.196567468156104, "iter_time": 0.37377493667602535, "loss": 0.010119341313838959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.52301614816365, "step_time": 0.3353394355773926} +{"epoch": 0, "iter": 17504, "iter_tflops": 54.169940473385154, "iter_time": 0.3808587074279785, "loss": 0.0034316342789679766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.499286090326514, "step_time": 0.3467452278137207} +{"epoch": 0, "iter": 17505, "iter_tflops": 37.57983876251288, "iter_time": 0.5489936676025391, "loss": 0.14644527435302734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.5770350289524, "step_time": 0.5084426078796387} +{"epoch": 0, "iter": 17506, "iter_tflops": 15.801774356597251, "iter_time": 1.3056187896728517, "loss": 0.1841510534286499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.815519535247198, "step_time": 1.0411583442687988} +{"epoch": 0, "iter": 17507, "iter_tflops": 38.894087482852264, "iter_time": 0.5304429244995118, "loss": 0.06336525082588196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56707061275036, "step_time": 0.4846726169586182} +{"epoch": 0, "iter": 17508, "iter_tflops": 41.06812858237791, "iter_time": 0.5023626403808594, "loss": 0.14010342955589294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.08663112965313, "step_time": 0.4575878257751464} +{"epoch": 0, "iter": 17509, "iter_tflops": 16.835798331184645, "iter_time": 1.2254300689697266, "loss": 0.48856019973754883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.154472704251365, "step_time": 1.136419319152832} +{"epoch": 0, "iter": 17510, "iter_tflops": 22.099994699758057, "iter_time": 0.9335338668823242, "loss": 0.3918350338935852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.74334575333938, "step_time": 0.8338037109375} +{"epoch": 0, "iter": 17511, "iter_tflops": 36.287526297171674, "iter_time": 0.568545051574707, "loss": 0.46745407581329346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.40495651909983, "step_time": 0.5235659503936767} +{"epoch": 0, "iter": 17512, "iter_tflops": 36.11908426506726, "iter_time": 0.5711964721679688, "loss": 0.4210033714771271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48267471789907, "step_time": 0.5225353565216064} +{"epoch": 0, "iter": 17513, "iter_tflops": 17.564642623833144, "iter_time": 1.1745808868408203, "loss": 0.4451643228530884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.61249762452732, "step_time": 1.1084537887573243} +{"epoch": 0, "iter": 17514, "iter_tflops": 20.341201896814052, "iter_time": 1.0142514495849608, "loss": 0.5265246033668518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.934505660892924, "step_time": 0.7659726066589355} +{"epoch": 0, "iter": 17515, "iter_tflops": 45.065070852859336, "iter_time": 0.45780674743652344, "loss": 0.42635440826416016, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.595824834095644, "step_time": 0.4245445690155029} +{"epoch": 0, "iter": 17516, "iter_tflops": 49.01630801507902, "iter_time": 0.4209026412963867, "loss": 0.6113806366920471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94424925836333, "step_time": 0.3896758155822754} +{"epoch": 0, "iter": 17517, "iter_tflops": 23.77452536920573, "iter_time": 0.867781509399414, "loss": 0.4467415511608124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.927018668933425, "step_time": 0.8276598892211914} +{"epoch": 0, "iter": 17518, "iter_tflops": 17.1250339128069, "iter_time": 1.2047330017089843, "loss": 0.39795783162117004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.51996589956025, "step_time": 1.0054155845642092} +{"epoch": 0, "iter": 17519, "iter_tflops": 36.879085533108686, "iter_time": 0.5594253005981445, "loss": 0.43160003423690796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37055424038909, "step_time": 0.5110431079864503} +{"epoch": 0, "iter": 17520, "iter_tflops": 41.254325621277985, "iter_time": 0.5000952796936036, "loss": 0.4935853183269501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1510593502744, "step_time": 0.456934871673584} +{"epoch": 0, "iter": 17521, "iter_tflops": 21.430950364406183, "iter_time": 0.962677490234375, "loss": 0.47459766268730164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.00944078420418, "step_time": 0.8966360244750976} +{"epoch": 0, "iter": 17522, "iter_tflops": 19.89653019771035, "iter_time": 1.0369191665649415, "loss": 0.3208860456943512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.251576458378924, "step_time": 0.8507114391326903} +{"epoch": 0, "iter": 17523, "iter_tflops": 49.12808781387607, "iter_time": 0.4199449729919434, "loss": 0.492826372385025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.47541921480534, "step_time": 0.38580517578125} +{"epoch": 0, "iter": 17524, "iter_tflops": 49.332658306632645, "iter_time": 0.4182035636901856, "loss": 0.299617201089859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.15079168753684, "step_time": 0.38816154670715336} +{"epoch": 0, "iter": 17525, "iter_tflops": 28.702520215319055, "iter_time": 0.7187903137207032, "loss": 0.16118542850017548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.306257904833195, "step_time": 0.6807535781860351} +{"epoch": 0, "iter": 17526, "iter_tflops": 16.063406054878616, "iter_time": 1.2843536071777342, "loss": 0.1772507280111313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.041670001529642, "step_time": 1.0834708042144774} +{"epoch": 0, "iter": 17527, "iter_tflops": 39.347903356534175, "iter_time": 0.5243251037597656, "loss": 0.25787442922592163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.069814366560365, "step_time": 0.47901514816284185} +{"epoch": 0, "iter": 17528, "iter_tflops": 38.24825098864789, "iter_time": 0.539399658203125, "loss": 0.22118207812309265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78855537300125, "step_time": 0.49370200347900384} +{"epoch": 0, "iter": 17529, "iter_tflops": 30.7393964094149, "iter_time": 0.6711613082885743, "loss": 0.047518156468868256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.39307455632848, "step_time": 0.6178255157470702} +{"epoch": 0, "iter": 17530, "iter_tflops": 40.55387932285557, "iter_time": 0.5087329216003419, "loss": 0.023321373388171196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.71997028007063, "step_time": 0.4512490577697754} +{"epoch": 0, "iter": 17531, "iter_tflops": 43.45517061214419, "iter_time": 0.4747672882080078, "loss": 0.07390911132097244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.941490995729865, "step_time": 0.43033900451660156} +{"epoch": 0, "iter": 17532, "iter_tflops": 45.78213938627083, "iter_time": 0.45063629150390627, "loss": 0.03030795231461525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.345297480572775, "step_time": 0.4097918682098388} +{"epoch": 0, "iter": 17533, "iter_tflops": 22.120330337616316, "iter_time": 0.932675651550293, "loss": 0.7044204473495483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.609591177222356, "step_time": 0.8738437423706054} +{"epoch": 0, "iter": 17534, "iter_tflops": 9.429064749694065, "iter_time": 2.1880317993164065, "loss": 0.7198265790939331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.995887513310874, "step_time": 1.876255416870117} +{"epoch": 0, "iter": 17535, "iter_tflops": 11.757459209474295, "iter_time": 1.7547238006591797, "loss": 0.6322897672653198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.73452732241193, "step_time": 1.3111988105773926} +{"epoch": 0, "iter": 17536, "iter_tflops": 44.66817857642635, "iter_time": 0.46187451934814455, "loss": 0.6553913354873657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.50636388815183, "step_time": 0.4253275623321533} +{"epoch": 0, "iter": 17537, "iter_tflops": 16.20048094130063, "iter_time": 0.9076011199951173, "loss": 0.23088477551937103, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 17.05552462158958, "step_time": 0.8621004028320313} +{"epoch": 0, "iter": 17538, "iter_tflops": 9.216602404510974, "iter_time": 1.5953356781005859, "loss": 0.0765511617064476, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 11.495295386196506, "step_time": 1.2790949821472168} +{"epoch": 0, "iter": 17539, "iter_tflops": 20.43079298232077, "iter_time": 0.7196771392822267, "loss": 0.20320507884025574, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 22.08873517704805, "step_time": 0.6656594200134277} +{"epoch": 0, "iter": 17540, "iter_tflops": 21.936446387506066, "iter_time": 0.6702806091308593, "loss": 0.28462934494018555, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.49976112023465, "step_time": 0.6256903877258301} +{"epoch": 0, "iter": 17541, "iter_tflops": 20.772998006774163, "iter_time": 0.9931688003540038, "loss": 0.06962501257658005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.40091063438506, "step_time": 0.9209935188293458} +{"epoch": 0, "iter": 17542, "iter_tflops": 32.71885574358444, "iter_time": 0.6305566940307618, "loss": 0.0753684714436531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.61756852673929, "step_time": 0.5634206295013428} +{"epoch": 0, "iter": 17543, "iter_tflops": 45.67284809565231, "iter_time": 0.45171462631225584, "loss": 0.058450549840927124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.34660756847675, "step_time": 0.41808534622192384} +{"epoch": 0, "iter": 17544, "iter_tflops": 55.08838025997655, "iter_time": 0.37450898742675776, "loss": 0.04473184794187546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.7365840505047, "step_time": 0.3453678150177002} +{"epoch": 0, "iter": 17545, "iter_tflops": 27.825777682650617, "iter_time": 0.7414381637573243, "loss": 0.17038299143314362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.33000828102837, "step_time": 0.703412467956543} +{"epoch": 0, "iter": 17546, "iter_tflops": 12.933662957988712, "iter_time": 1.595146987915039, "loss": 0.15021178126335144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.322250404455065, "step_time": 1.2639858474731445} +{"epoch": 0, "iter": 17547, "iter_tflops": 37.49747300113791, "iter_time": 0.5501995697021484, "loss": 0.11699887365102768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.013342121588444, "step_time": 0.503033706665039} +{"epoch": 0, "iter": 17548, "iter_tflops": 39.44941949824269, "iter_time": 0.522975845336914, "loss": 0.08986957371234894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.995972243015785, "step_time": 0.479837818145752} +{"epoch": 0, "iter": 17549, "iter_tflops": 16.807897899414485, "iter_time": 1.2274642333984376, "loss": 0.13068658113479614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.953499318488216, "step_time": 1.1491405181884766} +{"epoch": 0, "iter": 17550, "iter_tflops": 19.657836245306065, "iter_time": 1.0495098876953124, "loss": 0.22902563214302063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.443868839059437, "step_time": 0.9192307109832765} +{"epoch": 0, "iter": 17551, "iter_tflops": 49.215117512062555, "iter_time": 0.4192023620605469, "loss": 0.2577388286590576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42095321156399, "step_time": 0.38619852828979495} +{"epoch": 0, "iter": 17552, "iter_tflops": 50.22873189138454, "iter_time": 0.41074287033081053, "loss": 0.20602896809577942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.216737245603824, "step_time": 0.38052997207641603} +{"epoch": 0, "iter": 17553, "iter_tflops": 32.06949298653946, "iter_time": 0.643324592590332, "loss": 0.5563306212425232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.33399551981105, "step_time": 0.6008940467834473} +{"epoch": 0, "iter": 17554, "iter_tflops": 15.339632018840293, "iter_time": 1.34495361328125, "loss": 0.5265814065933228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.70614361100766, "step_time": 1.0469371337890625} +{"epoch": 0, "iter": 17555, "iter_tflops": 43.3201076223888, "iter_time": 0.47624751281738276, "loss": 0.547602653503418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.87097045266992, "step_time": 0.4401678333282471} +{"epoch": 0, "iter": 17556, "iter_tflops": 43.477084275648856, "iter_time": 0.47452799224853515, "loss": 0.6338018178939819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79985304188281, "step_time": 0.4408367156982422} +{"epoch": 0, "iter": 17557, "iter_tflops": 25.593071746492285, "iter_time": 0.8061202545166017, "loss": 0.11719805747270584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.95387762014612, "step_time": 0.7654220962524414} +{"epoch": 0, "iter": 17558, "iter_tflops": 14.098685153499009, "iter_time": 1.4633345794677732, "loss": 0.10966671258211136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.081504167168326, "step_time": 1.1410053787231444} +{"epoch": 0, "iter": 17559, "iter_tflops": 50.641287991595156, "iter_time": 0.40739669799804684, "loss": 0.07661996781826019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.944789666210745, "step_time": 0.37548771476745607} +{"epoch": 0, "iter": 17560, "iter_tflops": 50.60058449529638, "iter_time": 0.40772441101074214, "loss": 0.09733147919178009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.744231996562036, "step_time": 0.3768633289337158} +{"epoch": 0, "iter": 17561, "iter_tflops": 29.791655170211264, "iter_time": 0.6925124969482421, "loss": 0.2947256565093994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.62725889102876, "step_time": 0.6523199996948242} +{"epoch": 0, "iter": 17562, "iter_tflops": 17.390001694406994, "iter_time": 1.186376739501953, "loss": 0.3007323145866394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.708703836982025, "step_time": 0.8701906967163087} +{"epoch": 0, "iter": 17563, "iter_tflops": 42.44814856070559, "iter_time": 0.48603046798706057, "loss": 0.3086393177509308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.31167681579589, "step_time": 0.44548362159729005} +{"epoch": 0, "iter": 17564, "iter_tflops": 37.69124047404707, "iter_time": 0.5473710403442383, "loss": 0.3336217403411865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.24772772047134, "step_time": 0.5001752738952636} +{"epoch": 0, "iter": 17565, "iter_tflops": 17.225528073259614, "iter_time": 1.197704559326172, "loss": 0.5365166664123535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.540311041825454, "step_time": 1.1127695465087892} +{"epoch": 0, "iter": 17566, "iter_tflops": 23.867789531086146, "iter_time": 0.864390625, "loss": 0.7978400588035583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.46856482313241, "step_time": 0.7794564476013185} +{"epoch": 0, "iter": 17567, "iter_tflops": 35.68118212527286, "iter_time": 0.5782065582275391, "loss": 0.4806327819824219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.93641080377639, "step_time": 0.5298663406372071} +{"epoch": 0, "iter": 17568, "iter_tflops": 36.946180710648356, "iter_time": 0.5584093704223633, "loss": 0.6060441732406616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.046985338511234, "step_time": 0.5151721992492676} +{"epoch": 0, "iter": 17569, "iter_tflops": 25.94687574349744, "iter_time": 0.7951282348632813, "loss": 0.18829071521759033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.86883507449838, "step_time": 0.7402926406860353} +{"epoch": 0, "iter": 17570, "iter_tflops": 8.496564166458471, "iter_time": 2.4281689758300784, "loss": 0.14544056355953217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.941531375394987, "step_time": 2.075243011474609} +{"epoch": 0, "iter": 17571, "iter_tflops": 12.149540422804336, "iter_time": 1.6980966186523438, "loss": 0.17523521184921265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.843602086376642, "step_time": 1.4902980728149413} +{"epoch": 0, "iter": 17572, "iter_tflops": 34.50398574285058, "iter_time": 0.5979336318969726, "loss": 0.15945351123809814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56290055191855, "step_time": 0.4847201023101807} +{"epoch": 0, "iter": 17573, "iter_tflops": 19.32963399086704, "iter_time": 0.8920865325927735, "loss": 0.2760823965072632, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 20.4984608174395, "step_time": 0.8412195587158203} +{"epoch": 0, "iter": 17574, "iter_tflops": 6.6831939918119465, "iter_time": 2.580159454345703, "loss": 0.2819271385669708, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 8.177584314134752, "step_time": 2.108655258178711} +{"epoch": 0, "iter": 17575, "iter_tflops": 12.277380711127801, "iter_time": 1.4045101776123048, "loss": 0.2042376846075058, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 14.848518768682393, "step_time": 1.1613081703186034} +{"epoch": 0, "iter": 17576, "iter_tflops": 26.49917224021231, "iter_time": 0.6507262191772462, "loss": 0.22798392176628113, "lr": 3e-05, "seqlen": 6880.0, "step_tflops": 28.261236339990123, "step_time": 0.6101539916992187} +{"epoch": 0, "iter": 17577, "iter_tflops": 17.688877342696507, "iter_time": 0.8473943862915039, "loss": 0.285296231508255, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 18.640499865230943, "step_time": 0.8041337661743164} +{"epoch": 0, "iter": 17578, "iter_tflops": 9.560870517274061, "iter_time": 1.5677919006347656, "loss": 0.17309239506721497, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 11.433027437377373, "step_time": 1.3110661582946777} +{"epoch": 0, "iter": 17579, "iter_tflops": 20.933476946059958, "iter_time": 0.7160518722534179, "loss": 0.1768099069595337, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 22.51052542284168, "step_time": 0.6658865165710449} +{"epoch": 0, "iter": 17580, "iter_tflops": 24.572723356638356, "iter_time": 0.6100038299560546, "loss": 0.11134898662567139, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 26.213600552317025, "step_time": 0.5718197822570801} +{"epoch": 0, "iter": 17581, "iter_tflops": 15.636033922845808, "iter_time": 1.319458221435547, "loss": 0.27018308639526367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.623378780473136, "step_time": 1.2410890579223632} +{"epoch": 0, "iter": 17582, "iter_tflops": 24.419329847945814, "iter_time": 0.8448673095703125, "loss": 0.31135645508766174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.983059045032554, "step_time": 0.6070993633270264} +{"epoch": 0, "iter": 17583, "iter_tflops": 46.777955430846184, "iter_time": 0.4410430793762207, "loss": 0.19180342555046082, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.615635995272925, "step_time": 0.40760316658020024} +{"epoch": 0, "iter": 17584, "iter_tflops": 49.06838172422898, "iter_time": 0.42045595932006846, "loss": 0.31723424792289734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.88589863813642, "step_time": 0.39010575675964354} +{"epoch": 0, "iter": 17585, "iter_tflops": 26.46475137273776, "iter_time": 0.7795687637329101, "loss": 0.21988672018051147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.10376138053409, "step_time": 0.7341043510437012} +{"epoch": 0, "iter": 17586, "iter_tflops": 14.846940586171106, "iter_time": 1.3895855102539063, "loss": 0.20684756338596344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.22784280077992, "step_time": 1.0729801425933838} +{"epoch": 0, "iter": 17587, "iter_tflops": 40.12979220415598, "iter_time": 0.5141091537475586, "loss": 0.2319781333208084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.91119986919965, "step_time": 0.469836706161499} +{"epoch": 0, "iter": 17588, "iter_tflops": 45.60198676334297, "iter_time": 0.45241654968261724, "loss": 0.15308578312397003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.83067274415191, "step_time": 0.41402398109436034} +{"epoch": 0, "iter": 17589, "iter_tflops": 30.779527849735825, "iter_time": 0.6702862243652344, "loss": 0.43579646944999695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.33503497913494, "step_time": 0.618901210784912} +{"epoch": 0, "iter": 17590, "iter_tflops": 9.75187636714472, "iter_time": 2.1156024475097657, "loss": 0.4497625231742859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.68293585007482, "step_time": 1.7659168701171875} +{"epoch": 0, "iter": 17591, "iter_tflops": 15.300166378196215, "iter_time": 1.348422821044922, "loss": 0.36834749579429626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.79067974134657, "step_time": 1.0979429054260255} +{"epoch": 0, "iter": 17592, "iter_tflops": 29.4421537770961, "iter_time": 0.7007331619262696, "loss": 0.3771618604660034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.77002052139048, "step_time": 0.5462293434143066} +{"epoch": 0, "iter": 17593, "iter_tflops": 15.34180824153359, "iter_time": 1.0063457565307619, "loss": 0.1758204698562622, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 16.385338142684205, "step_time": 0.9422548065185546} +{"epoch": 0, "iter": 17594, "iter_tflops": 10.585781265377967, "iter_time": 1.458481262207031, "loss": 0.2847369313240051, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 12.56489843289489, "step_time": 1.2287535552978515} +{"epoch": 0, "iter": 17595, "iter_tflops": 28.126431299005418, "iter_time": 0.5489201049804688, "loss": 0.232646644115448, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 30.004343380667578, "step_time": 0.5145642890930175} +{"epoch": 0, "iter": 17596, "iter_tflops": 28.10549516678437, "iter_time": 0.5493290023803711, "loss": 0.14097639918327332, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.813953059675406, "step_time": 0.517850269317627} +{"epoch": 0, "iter": 17597, "iter_tflops": 31.997845106641538, "iter_time": 0.6447650909423828, "loss": 0.21126504242420197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.158822896437, "step_time": 0.6039755401611328} +{"epoch": 0, "iter": 17598, "iter_tflops": 12.567463140044369, "iter_time": 1.6416275329589844, "loss": 0.19651760160923004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.035813712361865, "step_time": 1.2110424461364746} +{"epoch": 0, "iter": 17599, "iter_tflops": 39.18067878457417, "iter_time": 0.5265629425048828, "loss": 0.24658329784870148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.91313867751778, "step_time": 0.4807640304565429} +{"epoch": 0, "iter": 17600, "iter_tflops": 38.097333687764205, "iter_time": 0.5415364151000976, "loss": 0.2613038122653961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.63492573221401, "step_time": 0.4955237255096435} +{"epoch": 0, "iter": 17601, "iter_tflops": 27.077450156024103, "iter_time": 0.7619289627075195, "loss": 0.13417087495326996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.062934500424124, "step_time": 0.6862634620666505} +{"epoch": 0, "iter": 17602, "iter_tflops": 40.422048517357915, "iter_time": 0.5103920822143555, "loss": 0.16768339276313782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.39705332937809, "step_time": 0.4646951084136963} +{"epoch": 0, "iter": 17603, "iter_tflops": 38.78348911307216, "iter_time": 0.5319555816650391, "loss": 0.1491740643978119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.611546873665354, "step_time": 0.4841667346954346} +{"epoch": 0, "iter": 17604, "iter_tflops": 41.54048277380243, "iter_time": 0.49665030670166016, "loss": 0.1670137196779251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44263865752166, "step_time": 0.4540029830932617} +{"epoch": 0, "iter": 17605, "iter_tflops": 18.70120743347073, "iter_time": 1.10319580078125, "loss": 0.4923691749572754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.97269683218451, "step_time": 1.0329648361206054} +{"epoch": 0, "iter": 17606, "iter_tflops": 15.340120540639871, "iter_time": 1.3449107818603516, "loss": 0.5006018877029419, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.169253200884622, "step_time": 1.1354948539733887} +{"epoch": 0, "iter": 17607, "iter_tflops": 41.50139217660709, "iter_time": 0.497118106842041, "loss": 0.3689178228378296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57628807175721, "step_time": 0.4628266372680664} +{"epoch": 0, "iter": 17608, "iter_tflops": 42.70519831703693, "iter_time": 0.48310496902465816, "loss": 0.4395044445991516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.998169376236376, "step_time": 0.4485198822021484} +{"epoch": 0, "iter": 17609, "iter_tflops": 23.261131301884603, "iter_time": 0.8869342269897461, "loss": 0.6257697343826294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.415209585787732, "step_time": 0.8450098876953125} +{"epoch": 0, "iter": 17610, "iter_tflops": 16.061034625354253, "iter_time": 1.2845432434082031, "loss": 0.7174305319786072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.145833987982154, "step_time": 1.077576120376587} +{"epoch": 0, "iter": 17611, "iter_tflops": 37.825482549950884, "iter_time": 0.5454284286499023, "loss": 0.7352887988090515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.32050156801216, "step_time": 0.49929436302185054} +{"epoch": 0, "iter": 17612, "iter_tflops": 37.28749616597129, "iter_time": 0.5532979049682618, "loss": 0.6456892490386963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54699406195473, "step_time": 0.5088193092346192} +{"epoch": 0, "iter": 17613, "iter_tflops": 25.696325894385115, "iter_time": 0.8028810653686523, "loss": 0.7717775702476501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.63849222374558, "step_time": 0.7464623374938966} +{"epoch": 0, "iter": 17614, "iter_tflops": 14.575889933400907, "iter_time": 1.415425994873047, "loss": 0.5761498212814331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.768945164039373, "step_time": 1.2303155212402344} +{"epoch": 0, "iter": 17615, "iter_tflops": 10.121285213414781, "iter_time": 2.0383867340087893, "loss": 0.4428744316101074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.401073412355261, "step_time": 1.5395105209350584} +{"epoch": 0, "iter": 17616, "iter_tflops": 20.184177789910102, "iter_time": 1.0221418838500975, "loss": 0.6251976490020752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.80337774931247, "step_time": 0.7420355072021485} +{"epoch": 0, "iter": 17617, "iter_tflops": 22.907330973304777, "iter_time": 0.6258412857055664, "loss": 0.19725462794303894, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 24.64963754671944, "step_time": 0.5816050415039062} +{"epoch": 0, "iter": 17618, "iter_tflops": 24.316017608469057, "iter_time": 0.5895847625732422, "loss": 0.2625047564506531, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 25.83945305747925, "step_time": 0.5548241844177246} +{"epoch": 0, "iter": 17619, "iter_tflops": 24.9406481164682, "iter_time": 0.5748188018798828, "loss": 0.2239447385072708, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 26.49105295683113, "step_time": 0.5411771850585937} +{"epoch": 0, "iter": 17620, "iter_tflops": 24.510018593053825, "iter_time": 0.584918098449707, "loss": 0.21828535199165344, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 25.982293671502756, "step_time": 0.5517739753723145} +{"epoch": 0, "iter": 17621, "iter_tflops": 33.15628525980986, "iter_time": 0.6222377853393556, "loss": 0.5928131937980652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.47755411918867, "step_time": 0.5815252494812012} +{"epoch": 0, "iter": 17622, "iter_tflops": 8.935327793029122, "iter_time": 2.308935272216797, "loss": 0.45909005403518677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.392663784942368, "step_time": 1.810910415649414} +{"epoch": 0, "iter": 17623, "iter_tflops": 13.644884800897016, "iter_time": 1.5120020294189453, "loss": 0.48010778427124023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.910707527333198, "step_time": 1.2966798286437986} +{"epoch": 0, "iter": 17624, "iter_tflops": 27.05847868210283, "iter_time": 0.7624631729125977, "loss": 0.6803333759307861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.814068892778806, "step_time": 0.6287270736694336} +{"epoch": 0, "iter": 17625, "iter_tflops": 24.959097979843918, "iter_time": 0.7519595947265626, "loss": 0.18524274230003357, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 26.535623533291343, "step_time": 0.707284423828125} +{"epoch": 0, "iter": 17626, "iter_tflops": 16.777656749201736, "iter_time": 1.1186444854736328, "loss": 0.1888558268547058, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 21.176810744081163, "step_time": 0.8862634429931641} +{"epoch": 0, "iter": 17627, "iter_tflops": 31.194901694866623, "iter_time": 0.6016442489624023, "loss": 0.17981770634651184, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 33.17310628480511, "step_time": 0.565766529083252} +{"epoch": 0, "iter": 17628, "iter_tflops": 33.35651388563501, "iter_time": 0.5626557159423828, "loss": 0.19716469943523407, "lr": 3e-05, "seqlen": 7472.0, "step_tflops": 35.51988606283238, "step_time": 0.5283866386413575} +{"epoch": 0, "iter": 17629, "iter_tflops": 27.42676202331614, "iter_time": 0.7522249069213868, "loss": 0.22270901501178741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.194444801981465, "step_time": 0.7066787414550781} +{"epoch": 0, "iter": 17630, "iter_tflops": 8.810168382937912, "iter_time": 2.341736572265625, "loss": 0.30988237261772156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.838488485287176, "step_time": 1.903502830505371} +{"epoch": 0, "iter": 17631, "iter_tflops": 11.26808959186306, "iter_time": 1.8309309082031247, "loss": 0.26860782504081726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.414483992529282, "step_time": 1.5379714584350586} +{"epoch": 0, "iter": 17632, "iter_tflops": 39.89006224950368, "iter_time": 0.5171988296508789, "loss": 0.17190057039260864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70202374008019, "step_time": 0.4720855407714844} +{"epoch": 0, "iter": 17633, "iter_tflops": 12.585046142788888, "iter_time": 1.249549560546875, "loss": 0.23441243171691895, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 13.604593560586363, "step_time": 1.1559065551757812} +{"epoch": 0, "iter": 17634, "iter_tflops": 18.049220269383266, "iter_time": 0.8712641677856445, "loss": 0.22149285674095154, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 22.41620106962096, "step_time": 0.701530059814453} +{"epoch": 0, "iter": 17635, "iter_tflops": 23.894328766615292, "iter_time": 0.6581326904296875, "loss": 0.21876223385334015, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.754490775211288, "step_time": 0.6105979347229002} +{"epoch": 0, "iter": 17636, "iter_tflops": 23.167602185909303, "iter_time": 0.6787771453857422, "loss": 0.3639751374721527, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.913710592537676, "step_time": 0.631204204559326} +{"epoch": 0, "iter": 17637, "iter_tflops": 23.06658365141439, "iter_time": 0.8944147872924805, "loss": 0.41312119364738464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.521874972560465, "step_time": 0.8413342590332032} +{"epoch": 0, "iter": 17638, "iter_tflops": 10.917927794587673, "iter_time": 1.889652862548828, "loss": 0.45071762800216675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.311789298800091, "step_time": 1.5498362426757815} +{"epoch": 0, "iter": 17639, "iter_tflops": 20.567570704854653, "iter_time": 1.0030884933471682, "loss": 0.6466957926750183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.02240243232715, "step_time": 0.824504903793335} +{"epoch": 0, "iter": 17640, "iter_tflops": 48.921262421618785, "iter_time": 0.42172038269042966, "loss": 0.6167370676994324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.95492410156264, "step_time": 0.3895972633361816} +{"epoch": 0, "iter": 17641, "iter_tflops": 20.05964953084233, "iter_time": 0.7615071868896485, "loss": 0.25347986817359924, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 21.167152045157284, "step_time": 0.7216637954711914} +{"epoch": 0, "iter": 17642, "iter_tflops": 7.551050660684052, "iter_time": 2.022972427368164, "loss": 0.2620601952075958, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.356446103883231, "step_time": 1.8279980621337892} +{"epoch": 0, "iter": 17643, "iter_tflops": 8.39732534580436, "iter_time": 1.819099136352539, "loss": 0.29459747672080994, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 10.291034558792552, "step_time": 1.4843568153381348} +{"epoch": 0, "iter": 17644, "iter_tflops": 21.676687096205576, "iter_time": 0.7047002716064453, "loss": 0.2575848698616028, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.293915890322882, "step_time": 0.6557749824523926} +{"epoch": 0, "iter": 17645, "iter_tflops": 15.041625835484428, "iter_time": 0.9965315933227539, "loss": 0.24448877573013306, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 16.40393548381212, "step_time": 0.9137719039916992} +{"epoch": 0, "iter": 17646, "iter_tflops": 26.64983638615948, "iter_time": 0.5624595642089844, "loss": 0.3283107578754425, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.50187568562784, "step_time": 0.5259111900329589} +{"epoch": 0, "iter": 17647, "iter_tflops": 25.82414722613463, "iter_time": 0.5804433822631836, "loss": 0.34914952516555786, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.63709962185536, "step_time": 0.5423671646118164} +{"epoch": 0, "iter": 17648, "iter_tflops": 26.943836528672342, "iter_time": 0.5563222351074218, "loss": 0.278591513633728, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 28.715015811339192, "step_time": 0.5220075607299806} +{"epoch": 0, "iter": 17649, "iter_tflops": 44.3834495911496, "iter_time": 0.4648375396728516, "loss": 0.398393452167511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.36534353889978, "step_time": 0.4265677032470703} +{"epoch": 0, "iter": 17650, "iter_tflops": 40.94023402195559, "iter_time": 0.5039319877624511, "loss": 0.39515620470046997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.042030127872415, "step_time": 0.4480926113128662} +{"epoch": 0, "iter": 17651, "iter_tflops": 42.15785652915349, "iter_time": 0.4893771934509278, "loss": 0.35845091938972473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22394533544587, "step_time": 0.4561984443664551} +{"epoch": 0, "iter": 17652, "iter_tflops": 48.60256746259836, "iter_time": 0.4244856719970703, "loss": 0.45750659704208374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.33916805283878, "step_time": 0.3941807689666748} +{"epoch": 0, "iter": 17653, "iter_tflops": 38.06267683096695, "iter_time": 0.5420294952392578, "loss": 0.6088143587112427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.4038756999303, "step_time": 0.49828894424438475} +{"epoch": 0, "iter": 17654, "iter_tflops": 46.73047632343554, "iter_time": 0.4414911880493164, "loss": 0.6730008125305176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.631920314033465, "step_time": 0.40747207260131835} +{"epoch": 0, "iter": 17655, "iter_tflops": 45.47000796395758, "iter_time": 0.4537297096252441, "loss": 0.7785695791244507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.959914110839044, "step_time": 0.42138745307922365} +{"epoch": 0, "iter": 17656, "iter_tflops": 41.09583623476383, "iter_time": 0.5020239372253419, "loss": 0.6313759684562683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14872123707311, "step_time": 0.4673089714050293} +{"epoch": 0, "iter": 17657, "iter_tflops": 34.30809028625831, "iter_time": 0.4333353042602539, "loss": 0.019872797653079033, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 37.644430265654115, "step_time": 0.39492978477478036} +{"epoch": 0, "iter": 17658, "iter_tflops": 7.69524511468052, "iter_time": 1.9319601287841797, "loss": 0.00625476473942399, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 8.354031368697594, "step_time": 1.7796086807250977} +{"epoch": 0, "iter": 17659, "iter_tflops": 9.114351977820212, "iter_time": 1.6311534576416014, "loss": 0.019617736339569092, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 10.423453468838341, "step_time": 1.4262937698364258} +{"epoch": 0, "iter": 17660, "iter_tflops": 13.983044588058059, "iter_time": 1.0632095642089845, "loss": 0.002951547969132662, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 16.53751907294211, "step_time": 0.8989804744720459} +{"epoch": 0, "iter": 17661, "iter_tflops": 16.172380641431303, "iter_time": 0.9875709381103517, "loss": 0.2766624689102173, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 17.44062291993616, "step_time": 0.9157570343017579} +{"epoch": 0, "iter": 17662, "iter_tflops": 15.96994648500499, "iter_time": 1.0000893325805664, "loss": 0.14797921478748322, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 21.25675802838411, "step_time": 0.7513550796508789} +{"epoch": 0, "iter": 17663, "iter_tflops": 27.988491984973958, "iter_time": 0.5706407165527344, "loss": 0.12627547979354858, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 29.877461475779125, "step_time": 0.5345625877380371} +{"epoch": 0, "iter": 17664, "iter_tflops": 28.935176267485872, "iter_time": 0.5519708251953126, "loss": 0.21849700808525085, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 30.74420835930702, "step_time": 0.5194920921325684} +{"epoch": 0, "iter": 17665, "iter_tflops": 47.72913105870177, "iter_time": 0.4322537002563477, "loss": 0.039138223975896835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78603353681334, "step_time": 0.39084379196166996} +{"epoch": 0, "iter": 17666, "iter_tflops": 36.6812792458771, "iter_time": 0.562442039489746, "loss": 0.04580294340848923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44119802578219, "step_time": 0.5101504039764404} +{"epoch": 0, "iter": 17667, "iter_tflops": 45.80026733086474, "iter_time": 0.45045792770385734, "loss": 0.031079519540071487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.449892813777986, "step_time": 0.4089422664642334} +{"epoch": 0, "iter": 17668, "iter_tflops": 44.278517448713075, "iter_time": 0.4659391212463379, "loss": 0.04203404113650322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.87301560360367, "step_time": 0.4221366996765137} +{"epoch": 0, "iter": 17669, "iter_tflops": 19.31784856145263, "iter_time": 1.0679809112548828, "loss": 0.7141826152801514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.65831709058915, "step_time": 0.9986821975708009} +{"epoch": 0, "iter": 17670, "iter_tflops": 27.182694700247463, "iter_time": 0.7589789657592774, "loss": 0.6240725517272949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.664281247617893, "step_time": 0.672805383682251} +{"epoch": 0, "iter": 17671, "iter_tflops": 41.44454370823716, "iter_time": 0.497799991607666, "loss": 0.7808331847190857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.51856892101208, "step_time": 0.463426700592041} +{"epoch": 0, "iter": 17672, "iter_tflops": 43.548837562868165, "iter_time": 0.4737461357116699, "loss": 0.5743721723556519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.890678480509784, "step_time": 0.439982831954956} +{"epoch": 0, "iter": 17673, "iter_tflops": 26.94585097169861, "iter_time": 0.7656501007080077, "loss": 0.13106365501880646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.53022287497732, "step_time": 0.7231311721801756} +{"epoch": 0, "iter": 17674, "iter_tflops": 36.884461887234636, "iter_time": 0.5593437576293945, "loss": 0.16842691600322723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.123952919412176, "step_time": 0.4287073745727539} +{"epoch": 0, "iter": 17675, "iter_tflops": 48.04235496202559, "iter_time": 0.4294355163574219, "loss": 0.21742233633995056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.89812342843345, "step_time": 0.3975306262969971} +{"epoch": 0, "iter": 17676, "iter_tflops": 48.14394937749994, "iter_time": 0.428529312133789, "loss": 0.1894984394311905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.18268155273494, "step_time": 0.3953628463745117} +{"epoch": 0, "iter": 17677, "iter_tflops": 33.90523266917493, "iter_time": 0.608492904663086, "loss": 0.5991673469543457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.185697669679406, "step_time": 0.5701449699401856} +{"epoch": 0, "iter": 17678, "iter_tflops": 8.643084348361814, "iter_time": 2.3870059204101564, "loss": 0.40468236804008484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.83134684268664, "step_time": 1.9047579040527345} +{"epoch": 0, "iter": 17679, "iter_tflops": 16.88912719165783, "iter_time": 1.2215606689453127, "loss": 0.41615769267082214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.81135095045859, "step_time": 1.0413774185180664} +{"epoch": 0, "iter": 17680, "iter_tflops": 24.045240034209417, "iter_time": 0.8580115432739259, "loss": 0.3483663499355316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.2926841930087, "step_time": 0.5387737617492675} +{"epoch": 0, "iter": 17681, "iter_tflops": 20.545712357148517, "iter_time": 0.7236014251708984, "loss": 0.3555198609828949, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 22.087017600286227, "step_time": 0.6731061210632324} +{"epoch": 0, "iter": 17682, "iter_tflops": 22.205517049305723, "iter_time": 0.6695140991210937, "loss": 0.39574992656707764, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 23.92328516570766, "step_time": 0.6214408531188964} +{"epoch": 0, "iter": 17683, "iter_tflops": 24.70389395593356, "iter_time": 0.6018041839599609, "loss": 0.10922359675168991, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.425487669526664, "step_time": 0.5625972518920899} +{"epoch": 0, "iter": 17684, "iter_tflops": 23.468804969349414, "iter_time": 0.6334752349853516, "loss": 0.0999317318201065, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.235905846624, "step_time": 0.5891172218322754} +{"epoch": 0, "iter": 17685, "iter_tflops": 34.55199998356315, "iter_time": 0.5971027297973633, "loss": 0.1885703057050705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.29401342911069, "step_time": 0.5387550601959229} +{"epoch": 0, "iter": 17686, "iter_tflops": 37.76872200577458, "iter_time": 0.5462481231689453, "loss": 0.1875523030757904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.102759300814874, "step_time": 0.49001761054992676} +{"epoch": 0, "iter": 17687, "iter_tflops": 38.34536809835003, "iter_time": 0.5380335235595703, "loss": 0.27400216460227966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29236732372992, "step_time": 0.4878207302093506} +{"epoch": 0, "iter": 17688, "iter_tflops": 40.316190518924884, "iter_time": 0.5117322158813477, "loss": 0.2411041557788849, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.82187612435908, "step_time": 0.4707943916320801} +{"epoch": 0, "iter": 17689, "iter_tflops": 34.213218366141604, "iter_time": 0.6030152816772462, "loss": 0.46982452273368835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.00989024331175, "step_time": 0.5427822437286377} +{"epoch": 0, "iter": 17690, "iter_tflops": 36.88653679032105, "iter_time": 0.5593122940063476, "loss": 0.5412880182266235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.27535638830071, "step_time": 0.49984046936035154} +{"epoch": 0, "iter": 17691, "iter_tflops": 40.64955268441236, "iter_time": 0.5075355606079102, "loss": 0.36330264806747437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35536911802913, "step_time": 0.46513181877136234} +{"epoch": 0, "iter": 17692, "iter_tflops": 37.902188861180704, "iter_time": 0.5443245925903321, "loss": 0.35924485325813293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.58097118794, "step_time": 0.4961667060852051} +{"epoch": 0, "iter": 17693, "iter_tflops": 17.159951995577025, "iter_time": 1.2022815399169922, "loss": 0.48649904131889343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.206874653560106, "step_time": 1.1331485443115235} +{"epoch": 0, "iter": 17694, "iter_tflops": 20.517516302836057, "iter_time": 1.0055356216430666, "loss": 0.3733421564102173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.234078715682333, "step_time": 0.7575469589233399} +{"epoch": 0, "iter": 17695, "iter_tflops": 48.54874021503112, "iter_time": 0.4249563102722167, "loss": 0.6572864055633545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51337848247748, "step_time": 0.39287309455871583} +{"epoch": 0, "iter": 17696, "iter_tflops": 45.72509468735333, "iter_time": 0.451198486328125, "loss": 0.5058236122131348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.499325827473136, "step_time": 0.41679544448852535} +{"epoch": 0, "iter": 17697, "iter_tflops": 20.15858244043605, "iter_time": 1.0234396972656248, "loss": 0.600471019744873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.057409170715736, "step_time": 0.9797546005249023} +{"epoch": 0, "iter": 17698, "iter_tflops": 15.340226533267753, "iter_time": 1.3449014892578124, "loss": 0.7531940937042236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.86120947606656, "step_time": 1.1550781898498534} +{"epoch": 0, "iter": 17699, "iter_tflops": 43.098782694386934, "iter_time": 0.47869318389892584, "loss": 0.5868838429450989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44799778363979, "step_time": 0.44417616462707515} +{"epoch": 0, "iter": 17700, "iter_tflops": 39.82276270964972, "iter_time": 0.518072883605957, "loss": 0.4605204164981842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.84214778565547, "step_time": 0.48156067276000974} +{"epoch": 0, "iter": 17701, "iter_tflops": 19.497914390414625, "iter_time": 1.0581179656982425, "loss": 0.01503879763185978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.290453437580695, "step_time": 0.9690302543640137} +{"epoch": 0, "iter": 17702, "iter_tflops": 22.736251639361548, "iter_time": 0.907409622192383, "loss": 0.025172434747219086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.21744720337586, "step_time": 0.7311467037200927} +{"epoch": 0, "iter": 17703, "iter_tflops": 51.65080414497055, "iter_time": 0.39943412017822266, "loss": 0.024440979585051537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.726355910030094, "step_time": 0.36369502639770507} +{"epoch": 0, "iter": 17704, "iter_tflops": 50.3259566192931, "iter_time": 0.4099493560791016, "loss": 0.01528394129127264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.07891714872131, "step_time": 0.37457333183288577} +{"epoch": 0, "iter": 17705, "iter_tflops": 31.33458226948409, "iter_time": 0.6584129104614258, "loss": 0.7579212188720703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.418276832210566, "step_time": 0.6173595848083497} +{"epoch": 0, "iter": 17706, "iter_tflops": 16.572132337880525, "iter_time": 1.2449269104003908, "loss": 0.6485812664031982, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.25202183535322, "step_time": 1.018717720031738} +{"epoch": 0, "iter": 17707, "iter_tflops": 36.8916229001569, "iter_time": 0.5592351837158204, "loss": 0.6661741137504578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.37010689533132, "step_time": 0.511048770904541} +{"epoch": 0, "iter": 17708, "iter_tflops": 38.17631663591511, "iter_time": 0.5404160308837891, "loss": 0.6202415227890015, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.417650937375974, "step_time": 0.4981232166290283} +{"epoch": 0, "iter": 17709, "iter_tflops": 31.779432100812212, "iter_time": 0.649196418762207, "loss": 0.5574352741241455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.91875658972594, "step_time": 0.5908312759399413} +{"epoch": 0, "iter": 17710, "iter_tflops": 33.93154059116086, "iter_time": 0.6080211257934569, "loss": 0.4837365448474884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.76199736958596, "step_time": 0.561207088470459} +{"epoch": 0, "iter": 17711, "iter_tflops": 33.55165762524801, "iter_time": 0.6149053421020507, "loss": 0.5824432373046875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48452371941231, "step_time": 0.5654752044677736} +{"epoch": 0, "iter": 17712, "iter_tflops": 34.32258296805974, "iter_time": 0.6010938491821289, "loss": 0.6128327250480652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.18210283955933, "step_time": 0.5548662376403809} +{"epoch": 0, "iter": 17713, "iter_tflops": 16.10128505223337, "iter_time": 1.2813321075439454, "loss": 0.4481813907623291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.2264475236189, "step_time": 1.1976406326293945} +{"epoch": 0, "iter": 17714, "iter_tflops": 16.45585155678431, "iter_time": 1.253723846435547, "loss": 0.40476253628730774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.891570004225212, "step_time": 1.0371777343749997} +{"epoch": 0, "iter": 17715, "iter_tflops": 47.81325057390605, "iter_time": 0.43149322128295897, "loss": 0.32735902070999146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81669942072838, "step_time": 0.39815530014038086} +{"epoch": 0, "iter": 17716, "iter_tflops": 52.28342963814673, "iter_time": 0.3946009979248047, "loss": 0.45072755217552185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.592652419690594, "step_time": 0.364554277420044} +{"epoch": 0, "iter": 17717, "iter_tflops": 33.63000647988312, "iter_time": 0.6134727783203124, "loss": 0.22956708073616028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.79892127077913, "step_time": 0.5763048934936522} +{"epoch": 0, "iter": 17718, "iter_tflops": 37.0700647616275, "iter_time": 0.556543228149414, "loss": 0.1965796947479248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34227126921421, "step_time": 0.4990314483642578} +{"epoch": 0, "iter": 17719, "iter_tflops": 39.65330860037263, "iter_time": 0.5202868118286132, "loss": 0.20930549502372742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39142056677079, "step_time": 0.4754648094177246} +{"epoch": 0, "iter": 17720, "iter_tflops": 36.42269778568644, "iter_time": 0.5664350738525391, "loss": 0.2102322280406952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.903577550292624, "step_time": 0.5170236549377442} +{"epoch": 0, "iter": 17721, "iter_tflops": 30.722896514696426, "iter_time": 0.6715217590332031, "loss": 0.44057497382164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.94736681230135, "step_time": 0.6077376670837402} +{"epoch": 0, "iter": 17722, "iter_tflops": 37.003605895992415, "iter_time": 0.5575427856445312, "loss": 0.45535531640052795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90325759576729, "step_time": 0.5043875408172608} +{"epoch": 0, "iter": 17723, "iter_tflops": 38.906382860131224, "iter_time": 0.5302752914428711, "loss": 0.3926229476928711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.274410845610845, "step_time": 0.4880279369354248} +{"epoch": 0, "iter": 17724, "iter_tflops": 34.20563792663505, "iter_time": 0.6031489181518556, "loss": 0.4105411469936371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.530199235484275, "step_time": 0.5497197971343994} +{"epoch": 0, "iter": 17725, "iter_tflops": 24.350567665964643, "iter_time": 0.8472530822753906, "loss": 0.2401217669248581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.656806414461734, "step_time": 0.7739521827697754} +{"epoch": 0, "iter": 17726, "iter_tflops": 37.72433818997216, "iter_time": 0.5468908004760742, "loss": 0.23264318704605103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.85832678792185, "step_time": 0.492879077911377} +{"epoch": 0, "iter": 17727, "iter_tflops": 36.70743937755758, "iter_time": 0.5620412063598632, "loss": 0.2532082498073578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.077306106763075, "step_time": 0.5147824420928955} +{"epoch": 0, "iter": 17728, "iter_tflops": 42.043256825055096, "iter_time": 0.4907111167907714, "loss": 0.2876296043395996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.794197366600294, "step_time": 0.45051763534545897} +{"epoch": 0, "iter": 17729, "iter_tflops": 30.23570218724126, "iter_time": 0.6823421325683594, "loss": 0.6321247816085815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.20076995435918, "step_time": 0.621404067993164} +{"epoch": 0, "iter": 17730, "iter_tflops": 35.038020286647246, "iter_time": 0.588820182800293, "loss": 0.5766176581382751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.156782225062614, "step_time": 0.5406926975250245} +{"epoch": 0, "iter": 17731, "iter_tflops": 33.41590666389612, "iter_time": 0.6174033737182617, "loss": 0.7393629550933838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.43000598744817, "step_time": 0.5663214416503906} +{"epoch": 0, "iter": 17732, "iter_tflops": 35.28199723738336, "iter_time": 0.5847484588623046, "loss": 0.7959243655204773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.20113647166961, "step_time": 0.5400649147033691} +{"epoch": 0, "iter": 17733, "iter_tflops": 14.971845881076058, "iter_time": 1.377992645263672, "loss": 0.7374833822250366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.771082641709853, "step_time": 1.3081596221923828} +{"epoch": 0, "iter": 17734, "iter_tflops": 19.469679140672586, "iter_time": 1.0596524658203126, "loss": 0.6325694918632507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.520209861731374, "step_time": 0.8771645164489748} +{"epoch": 0, "iter": 17735, "iter_tflops": 45.850233415733925, "iter_time": 0.4499670333862305, "loss": 0.6615972518920898, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23565047766718, "step_time": 0.41902754020690913} +{"epoch": 0, "iter": 17736, "iter_tflops": 44.8011968467805, "iter_time": 0.4605031776428222, "loss": 0.5280138254165649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.89184743713903, "step_time": 0.430785083770752} +{"epoch": 0, "iter": 17737, "iter_tflops": 29.81871898693755, "iter_time": 0.6918839645385741, "loss": 0.5866543650627136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.690464588951986, "step_time": 0.6510189666748046} +{"epoch": 0, "iter": 17738, "iter_tflops": 15.435287749402235, "iter_time": 1.33661865234375, "loss": 0.42739471793174744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.112273406906954, "step_time": 1.205631362915039} +{"epoch": 0, "iter": 17739, "iter_tflops": 48.28290580760697, "iter_time": 0.4272960205078125, "loss": 0.674609363079071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.514729729652466, "step_time": 0.3928629856109619} +{"epoch": 0, "iter": 17740, "iter_tflops": 45.124938115364856, "iter_time": 0.4571993751525879, "loss": 0.5414721369743347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.890470899909836, "step_time": 0.42198598480224614} +{"epoch": 0, "iter": 17741, "iter_tflops": 26.015918157662295, "iter_time": 0.793018081665039, "loss": 0.27542349696159363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.46545911896217, "step_time": 0.7511650695800781} +{"epoch": 0, "iter": 17742, "iter_tflops": 17.785269161369033, "iter_time": 1.1600101928710937, "loss": 0.35219213366508484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.835269508216996, "step_time": 0.944851791381836} +{"epoch": 0, "iter": 17743, "iter_tflops": 50.185816065053174, "iter_time": 0.4110941123962402, "loss": 0.2956637442111969, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.42436665672693, "step_time": 0.3790782470703125} +{"epoch": 0, "iter": 17744, "iter_tflops": 43.04023004419476, "iter_time": 0.4793444061279296, "loss": 0.2694801688194275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.37691835078496, "step_time": 0.4448569297790527} +{"epoch": 0, "iter": 17745, "iter_tflops": 23.80716099178365, "iter_time": 0.8665919265747071, "loss": 0.4251696467399597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.0179344523767, "step_time": 0.8246521530151366} +{"epoch": 0, "iter": 17746, "iter_tflops": 13.168017128411163, "iter_time": 1.5667577972412108, "loss": 0.3672073781490326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54846107493728, "step_time": 1.2467076797485352} +{"epoch": 0, "iter": 17747, "iter_tflops": 45.7102622976395, "iter_time": 0.4513448944091797, "loss": 0.47284770011901855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.46667889715106, "step_time": 0.417070520401001} +{"epoch": 0, "iter": 17748, "iter_tflops": 46.7709780205578, "iter_time": 0.44110887527465814, "loss": 0.4822416305541992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74602508729675, "step_time": 0.4065558528900146} +{"epoch": 0, "iter": 17749, "iter_tflops": 33.14096835145546, "iter_time": 0.6225253677368164, "loss": 0.15960818529129028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.31792981059045, "step_time": 0.5841535339355468} +{"epoch": 0, "iter": 17750, "iter_tflops": 9.60203044523478, "iter_time": 2.1486177978515624, "loss": 0.17692376673221588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.034631446153043, "step_time": 1.8696676559448244} +{"epoch": 0, "iter": 17751, "iter_tflops": 13.14125532834287, "iter_time": 1.5699484558105468, "loss": 0.15673397481441498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.761732130437881, "step_time": 1.308935676574707} +{"epoch": 0, "iter": 17752, "iter_tflops": 15.560922324616087, "iter_time": 1.3258271636962888, "loss": 0.09339489042758942, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.650306897216694, "step_time": 1.1062066497802736} +{"epoch": 0, "iter": 17753, "iter_tflops": 20.00635564479502, "iter_time": 0.7594485321044921, "loss": 0.13220956921577454, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 21.841722362696906, "step_time": 0.6956318359375} +{"epoch": 0, "iter": 17754, "iter_tflops": 25.785964454994385, "iter_time": 0.5892274246215821, "loss": 0.32458460330963135, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.501649719120927, "step_time": 0.5524685821533203} +{"epoch": 0, "iter": 17755, "iter_tflops": 25.843990544267946, "iter_time": 0.5879044647216797, "loss": 0.20897428691387177, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.457446904146842, "step_time": 0.5533579826354981} +{"epoch": 0, "iter": 17756, "iter_tflops": 27.087938707345224, "iter_time": 0.5609063720703125, "loss": 0.20820780098438263, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.599625565841766, "step_time": 0.5312586135864259} +{"epoch": 0, "iter": 17757, "iter_tflops": 29.98907047434531, "iter_time": 0.6879537506103515, "loss": 0.3580024838447571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.727223470169047, "step_time": 0.6502647018432617} +{"epoch": 0, "iter": 17758, "iter_tflops": 12.369398723964622, "iter_time": 1.6679140167236326, "loss": 0.6804748773574829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.352534153319448, "step_time": 1.3438233261108399} +{"epoch": 0, "iter": 17759, "iter_tflops": 41.73015328660796, "iter_time": 0.4943929481506347, "loss": 0.5562117695808411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.66286054940183, "step_time": 0.45181342697143556} +{"epoch": 0, "iter": 17760, "iter_tflops": 36.60397338751661, "iter_time": 0.5636298904418945, "loss": 0.3999565839767456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.13793337317569, "step_time": 0.5140048770904542} +{"epoch": 0, "iter": 17761, "iter_tflops": 19.498317601322718, "iter_time": 1.0580960845947265, "loss": 0.10722728818655014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.320700617831537, "step_time": 0.9676555137634277} +{"epoch": 0, "iter": 17762, "iter_tflops": 23.106156875425626, "iter_time": 0.8928829498291015, "loss": 0.07242653518915176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.910047935965114, "step_time": 0.7962584075927734} +{"epoch": 0, "iter": 17763, "iter_tflops": 48.3381489931641, "iter_time": 0.4268076858520508, "loss": 0.13051392138004303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.60157544167222, "step_time": 0.3922143650054931} +{"epoch": 0, "iter": 17764, "iter_tflops": 50.566263827567404, "iter_time": 0.40800114440917973, "loss": 0.1224709153175354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.924171030323095, "step_time": 0.37562867355346674} +{"epoch": 0, "iter": 17765, "iter_tflops": 26.659317860442364, "iter_time": 0.7738792724609374, "loss": 0.5726727843284607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.19716743084183, "step_time": 0.7316725540161133} +{"epoch": 0, "iter": 17766, "iter_tflops": 16.31637624655076, "iter_time": 1.264440902709961, "loss": 0.5023677945137024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.720521034755055, "step_time": 1.046173854827881} +{"epoch": 0, "iter": 17767, "iter_tflops": 41.230505727682704, "iter_time": 0.5003841972351074, "loss": 0.5074648857116699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.063180067088716, "step_time": 0.45782595634460443} +{"epoch": 0, "iter": 17768, "iter_tflops": 35.96091895290922, "iter_time": 0.573708740234375, "loss": 0.48203492164611816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.592210380243564, "step_time": 0.5210897121429443} +{"epoch": 0, "iter": 17769, "iter_tflops": 18.28090686312041, "iter_time": 1.1285596313476562, "loss": 0.5535928010940552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.67953015202022, "step_time": 1.0483529510498046} +{"epoch": 0, "iter": 17770, "iter_tflops": 15.886097727098331, "iter_time": 1.2986885681152343, "loss": 0.6357206702232361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.631119791269235, "step_time": 1.050938190460205} +{"epoch": 0, "iter": 17771, "iter_tflops": 36.82196064346843, "iter_time": 0.5602931823730468, "loss": 0.5842111110687256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.23504799487401, "step_time": 0.5127642326354981} +{"epoch": 0, "iter": 17772, "iter_tflops": 39.425698794979844, "iter_time": 0.5232904968261719, "loss": 0.6141032576560974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60609293786399, "step_time": 0.4842287120819092} +{"epoch": 0, "iter": 17773, "iter_tflops": 19.840285743817127, "iter_time": 1.0398586883544922, "loss": 0.7006060481071472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.427994973636004, "step_time": 0.9628102645874024} +{"epoch": 0, "iter": 17774, "iter_tflops": 15.948918896978492, "iter_time": 1.2935731658935548, "loss": 0.6206885576248169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.8022403248558, "step_time": 1.097267833709717} +{"epoch": 0, "iter": 17775, "iter_tflops": 44.779001003040364, "iter_time": 0.4607314376831055, "loss": 0.45223039388656616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.83988203316924, "step_time": 0.4224230823516846} +{"epoch": 0, "iter": 17776, "iter_tflops": 46.60193895706906, "iter_time": 0.44270890808105473, "loss": 0.5955688953399658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.355081905938896, "step_time": 0.4097122421264649} +{"epoch": 0, "iter": 17777, "iter_tflops": 24.928983378359437, "iter_time": 0.6851173629760743, "loss": 0.044719398021698, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 26.485862631154934, "step_time": 0.6448451232910156} +{"epoch": 0, "iter": 17778, "iter_tflops": 13.88409844886428, "iter_time": 1.2301324005126952, "loss": 0.020993275567889214, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 17.419878331266005, "step_time": 0.9804476833343505} +{"epoch": 0, "iter": 17779, "iter_tflops": 33.681180802258616, "iter_time": 0.5070867156982422, "loss": 0.016803130507469177, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 36.9577302064535, "step_time": 0.4621300945281983} +{"epoch": 0, "iter": 17780, "iter_tflops": 35.21713297667272, "iter_time": 0.4849707489013672, "loss": 0.013531963340938091, "lr": 3e-05, "seqlen": 6816.0, "step_tflops": 38.76876591582046, "step_time": 0.44054224967956546} +{"epoch": 0, "iter": 17781, "iter_tflops": 29.79052385954345, "iter_time": 0.6925387954711915, "loss": 0.20230266451835632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.26490625988889, "step_time": 0.6394282798767089} +{"epoch": 0, "iter": 17782, "iter_tflops": 9.76188861677952, "iter_time": 2.113432586669922, "loss": 0.268363893032074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.86512694388425, "step_time": 1.6036447677612304} +{"epoch": 0, "iter": 17783, "iter_tflops": 12.153753909149115, "iter_time": 1.6975079193115234, "loss": 0.15632206201553345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.044632987682101, "step_time": 1.3713258094787597} +{"epoch": 0, "iter": 17784, "iter_tflops": 30.507280106513743, "iter_time": 0.6762678756713868, "loss": 0.19670499861240387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.112257938547565, "step_time": 0.6047999973297119} +{"epoch": 0, "iter": 17785, "iter_tflops": 20.918589058732522, "iter_time": 0.7341487503051758, "loss": 0.1779720038175583, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 22.200468530031014, "step_time": 0.6917581939697266} +{"epoch": 0, "iter": 17786, "iter_tflops": 10.52573486130746, "iter_time": 1.4590293426513672, "loss": 0.33527877926826477, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 12.438741142167443, "step_time": 1.2346390876770021} +{"epoch": 0, "iter": 17787, "iter_tflops": 23.377493608372305, "iter_time": 0.6569291076660158, "loss": 0.15342599153518677, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.172233245237464, "step_time": 0.6100911216735839} +{"epoch": 0, "iter": 17788, "iter_tflops": 24.361516533549437, "iter_time": 0.6303940887451172, "loss": 0.12221518903970718, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 26.11788751126455, "step_time": 0.5880014610290527} +{"epoch": 0, "iter": 17789, "iter_tflops": 16.70170063956963, "iter_time": 1.235269027709961, "loss": 0.11236976832151413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.002889124785952, "step_time": 1.145987922668457} +{"epoch": 0, "iter": 17790, "iter_tflops": 16.972756010473454, "iter_time": 1.215541748046875, "loss": 0.17149806022644043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.82152145443331, "step_time": 0.9040191974639891} +{"epoch": 0, "iter": 17791, "iter_tflops": 46.227517266461916, "iter_time": 0.4462946472167969, "loss": 0.14951491355895996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.94144807759647, "step_time": 0.4131056327819824} +{"epoch": 0, "iter": 17792, "iter_tflops": 48.09073236440751, "iter_time": 0.4290035209655762, "loss": 0.15346847474575043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.950099566044166, "step_time": 0.3971328964233398} +{"epoch": 0, "iter": 17793, "iter_tflops": 23.52077582902669, "iter_time": 0.8771434097290038, "loss": 0.06372716277837753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.725011544015576, "step_time": 0.8344219970703125} +{"epoch": 0, "iter": 17794, "iter_tflops": 16.951463089713286, "iter_time": 1.2170686035156248, "loss": 0.03301801159977913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.988363992268926, "step_time": 1.0321551837921141} +{"epoch": 0, "iter": 17795, "iter_tflops": 45.32527197420874, "iter_time": 0.4551785926818848, "loss": 0.03074677102267742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06700375443177, "step_time": 0.4120696659088135} +{"epoch": 0, "iter": 17796, "iter_tflops": 52.609783132623974, "iter_time": 0.39215317535400396, "loss": 0.0348004586994648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.36808646529914, "step_time": 0.35962666320800785} +{"epoch": 0, "iter": 17797, "iter_tflops": 20.827298541747165, "iter_time": 0.9905794296264648, "loss": 0.13331153988838196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.799742984194655, "step_time": 0.9463915939331053} +{"epoch": 0, "iter": 17798, "iter_tflops": 20.19671573442369, "iter_time": 1.0215073471069336, "loss": 0.1156768724322319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.04143611805224, "step_time": 0.8581473007202148} +{"epoch": 0, "iter": 17799, "iter_tflops": 47.28207846886246, "iter_time": 0.43634066390991216, "loss": 0.15041114389896393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26374592269616, "step_time": 0.40244997978210445} +{"epoch": 0, "iter": 17800, "iter_tflops": 48.55804298080217, "iter_time": 0.4248748970031738, "loss": 0.08700986206531525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.72067565499674, "step_time": 0.39132832145690916} +{"epoch": 0, "iter": 17801, "iter_tflops": 34.1373428895317, "iter_time": 0.6043555755615234, "loss": 0.004469854291528463, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.39298293704762, "step_time": 0.5668975677490234} +{"epoch": 0, "iter": 17802, "iter_tflops": 13.595739275778746, "iter_time": 1.517467575073242, "loss": 0.008675612509250641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.352595254234876, "step_time": 1.3438179779052735} +{"epoch": 0, "iter": 17803, "iter_tflops": 54.97685945618809, "iter_time": 0.3752686805725097, "loss": 0.0139754768460989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.55026241029039, "step_time": 0.34072673988342284} +{"epoch": 0, "iter": 17804, "iter_tflops": 56.23924001123878, "iter_time": 0.3668451690673828, "loss": 0.0026423733215779066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.279565254286446, "step_time": 0.33667166900634765} +{"epoch": 0, "iter": 17805, "iter_tflops": 48.531221449856865, "iter_time": 0.42510971069335934, "loss": 0.051951661705970764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.45566942806316, "step_time": 0.3859477157592773} +{"epoch": 0, "iter": 17806, "iter_tflops": 48.17773835150327, "iter_time": 0.4282287673950196, "loss": 0.08086872845888138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.49715946677374, "step_time": 0.3856483917236328} +{"epoch": 0, "iter": 17807, "iter_tflops": 49.15962183383362, "iter_time": 0.41967559432983403, "loss": 0.06514471769332886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.433348115624845, "step_time": 0.38610894203186036} +{"epoch": 0, "iter": 17808, "iter_tflops": 47.50918360753863, "iter_time": 0.43425485229492183, "loss": 0.04604678973555565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.750881351988134, "step_time": 0.3986616840362549} +{"epoch": 0, "iter": 17809, "iter_tflops": 32.51761777936866, "iter_time": 0.6344589462280273, "loss": 0.5054381489753723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.67965990793598, "step_time": 0.5949047241210937} +{"epoch": 0, "iter": 17810, "iter_tflops": 32.4990695050678, "iter_time": 0.6348210525512695, "loss": 0.5423046350479126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.50940096919115, "step_time": 0.509291498184204} +{"epoch": 0, "iter": 17811, "iter_tflops": 43.608464263583436, "iter_time": 0.4730983734130859, "loss": 0.6387143135070801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.174387125692846, "step_time": 0.4373367576599121} +{"epoch": 0, "iter": 17812, "iter_tflops": 44.67359463613944, "iter_time": 0.46181852340698243, "loss": 0.7053536176681519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.21057343382097, "step_time": 0.4279371109008789} +{"epoch": 0, "iter": 17813, "iter_tflops": 24.93105353483519, "iter_time": 0.8275259399414062, "loss": 0.5524279475212097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.245657914230065, "step_time": 0.7860764465332031} +{"epoch": 0, "iter": 17814, "iter_tflops": 17.47014014365915, "iter_time": 1.180934631347656, "loss": 0.5844770073890686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.169189973540067, "step_time": 0.9745811500549317} +{"epoch": 0, "iter": 17815, "iter_tflops": 42.39492812332117, "iter_time": 0.48664060592651365, "loss": 0.5118189454078674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59963523379564, "step_time": 0.45243988037109373} +{"epoch": 0, "iter": 17816, "iter_tflops": 42.07524411718816, "iter_time": 0.4903380584716797, "loss": 0.5321400761604309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.105390149972806, "step_time": 0.4573975181579589} +{"epoch": 0, "iter": 17817, "iter_tflops": 25.69956721235449, "iter_time": 0.7383293914794922, "loss": 0.12037817388772964, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 27.054839103049392, "step_time": 0.701343879699707} +{"epoch": 0, "iter": 17818, "iter_tflops": 23.844099651925752, "iter_time": 0.7957836990356446, "loss": 0.1327333152294159, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 30.87764093701625, "step_time": 0.6145141029357911} +{"epoch": 0, "iter": 17819, "iter_tflops": 38.482509573024515, "iter_time": 0.4930745429992675, "loss": 0.11787612736225128, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 42.35164661863059, "step_time": 0.4480285263061523} +{"epoch": 0, "iter": 17820, "iter_tflops": 38.07305857427046, "iter_time": 0.49837723922729493, "loss": 0.13440026342868805, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 41.8752604050704, "step_time": 0.4531254405975341} +{"epoch": 0, "iter": 17821, "iter_tflops": 20.49339491369688, "iter_time": 1.0067191696166993, "loss": 0.028520844876766205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.094780782134716, "step_time": 0.9337541618347167} +{"epoch": 0, "iter": 17822, "iter_tflops": 29.996006999947216, "iter_time": 0.687794662475586, "loss": 0.027521925047039986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.935179824114215, "step_time": 0.6079559211730957} +{"epoch": 0, "iter": 17823, "iter_tflops": 53.93437960477825, "iter_time": 0.3825221252441406, "loss": 0.024238720536231995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.873727827043915, "step_time": 0.35042954254150394} +{"epoch": 0, "iter": 17824, "iter_tflops": 52.2156240188549, "iter_time": 0.3951134147644043, "loss": 0.014478611759841442, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.142118516641425, "step_time": 0.36104880332946776} +{"epoch": 0, "iter": 17825, "iter_tflops": 19.99570829879215, "iter_time": 0.42573108673095694, "loss": 0.004289725795388222, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 22.204098387112037, "step_time": 0.3833884391784668} +{"epoch": 0, "iter": 17826, "iter_tflops": 22.371890382531365, "iter_time": 0.3805129776000976, "loss": 0.0042356583289802074, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 24.56750668892188, "step_time": 0.34650625038146965} +{"epoch": 0, "iter": 17827, "iter_tflops": 23.41026279803813, "iter_time": 0.36363515853881834, "loss": 0.002943003084510565, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 25.73263901856177, "step_time": 0.33081700706481937} +{"epoch": 0, "iter": 17828, "iter_tflops": 24.742235543897866, "iter_time": 0.3440592346191406, "loss": 0.007163833826780319, "lr": 3e-05, "seqlen": 3440.0, "step_tflops": 27.127398144544223, "step_time": 0.31380800247192386} +{"epoch": 0, "iter": 17829, "iter_tflops": 22.002506955232757, "iter_time": 0.9376701278686522, "loss": 0.0715067982673645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.052705353806175, "step_time": 0.8949532470703123} +{"epoch": 0, "iter": 17830, "iter_tflops": 17.668378050272963, "iter_time": 1.1676846313476563, "loss": 0.15983518958091736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.545811611830487, "step_time": 0.9575454330444336} +{"epoch": 0, "iter": 17831, "iter_tflops": 47.358965930053714, "iter_time": 0.4356322631835937, "loss": 0.11786985397338867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.73288615635639, "step_time": 0.3988003578186035} +{"epoch": 0, "iter": 17832, "iter_tflops": 49.785578118342535, "iter_time": 0.41439899444580075, "loss": 0.08297083526849747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.99658592597274, "step_time": 0.3820814437866211} +{"epoch": 0, "iter": 17833, "iter_tflops": 23.474161313803616, "iter_time": 0.8788852233886718, "loss": 0.05752415582537651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.59178610666352, "step_time": 0.8389424591064453} +{"epoch": 0, "iter": 17834, "iter_tflops": 14.49062224300659, "iter_time": 1.4237548370361328, "loss": 0.06869231909513474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.968496955661134, "step_time": 1.0331820945739747} +{"epoch": 0, "iter": 17835, "iter_tflops": 41.03849354105215, "iter_time": 0.5027254104614257, "loss": 0.052198879420757294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.34412084293204, "step_time": 0.45498938179016113} +{"epoch": 0, "iter": 17836, "iter_tflops": 41.79684362641947, "iter_time": 0.49360410308837893, "loss": 0.08953718841075897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68905610870755, "step_time": 0.45155438232421874} +{"epoch": 0, "iter": 17837, "iter_tflops": 31.03539587844847, "iter_time": 0.6113904876708984, "loss": 0.005458779167383909, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 34.267809127375116, "step_time": 0.5537192573547363} +{"epoch": 0, "iter": 17838, "iter_tflops": 45.237706460086606, "iter_time": 0.4194453544616699, "loss": 0.00608479417860508, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 49.71802289507106, "step_time": 0.38164723205566403} +{"epoch": 0, "iter": 17839, "iter_tflops": 50.007325174237224, "iter_time": 0.37943932723999024, "loss": 0.004583498928695917, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 55.06421416780169, "step_time": 0.3445930557250977} +{"epoch": 0, "iter": 17840, "iter_tflops": 51.703053847118255, "iter_time": 0.36699468231201177, "loss": 0.0018427801551297307, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 56.64550333569461, "step_time": 0.3349735584259033} +{"epoch": 0, "iter": 17841, "iter_tflops": 26.14628298746795, "iter_time": 0.7890641098022461, "loss": 0.16180996596813202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.507350420359113, "step_time": 0.750021110534668} +{"epoch": 0, "iter": 17842, "iter_tflops": 12.14137841637439, "iter_time": 1.6992381591796875, "loss": 0.11341781914234161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.373448660245545, "step_time": 1.4353614082336426} +{"epoch": 0, "iter": 17843, "iter_tflops": 41.86117951691683, "iter_time": 0.4928454895019531, "loss": 0.1063506081700325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20922781986024, "step_time": 0.4563469562530517} +{"epoch": 0, "iter": 17844, "iter_tflops": 53.26832185954778, "iter_time": 0.38730511474609375, "loss": 0.0452834777534008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.93661726929531, "step_time": 0.3560976543426514} +{"epoch": 0, "iter": 17845, "iter_tflops": 24.823833996474487, "iter_time": 0.8311002044677733, "loss": 0.2795705199241638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.076450514464693, "step_time": 0.7911772155761718} +{"epoch": 0, "iter": 17846, "iter_tflops": 13.114616205342795, "iter_time": 1.573137420654297, "loss": 0.33277344703674316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.543910111276702, "step_time": 1.3272782306671143} +{"epoch": 0, "iter": 17847, "iter_tflops": 44.546177867961944, "iter_time": 0.46313947677612305, "loss": 0.30716997385025024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.72537820391895, "step_time": 0.42341576957702637} +{"epoch": 0, "iter": 17848, "iter_tflops": 41.388500383526676, "iter_time": 0.49847405242919923, "loss": 0.33140140771865845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.99242991154141, "step_time": 0.45854588317871103} +{"epoch": 0, "iter": 17849, "iter_tflops": 16.163132288129344, "iter_time": 1.276429168701172, "loss": 0.6032521724700928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.44289175699566, "step_time": 1.182779426574707} +{"epoch": 0, "iter": 17850, "iter_tflops": 27.01606049070071, "iter_time": 0.7636603240966797, "loss": 0.6430152654647827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.98564014126583, "step_time": 0.6254568176269532} +{"epoch": 0, "iter": 17851, "iter_tflops": 37.91528490194546, "iter_time": 0.5441365814208985, "loss": 0.6421910524368286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.0879461602264, "step_time": 0.50212034034729} +{"epoch": 0, "iter": 17852, "iter_tflops": 42.23462745968057, "iter_time": 0.4884876403808594, "loss": 0.5218808650970459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.69185423233679, "step_time": 0.45152672958374024} +{"epoch": 0, "iter": 17853, "iter_tflops": 20.887860150956417, "iter_time": 0.9877073745727538, "loss": 0.3171789348125458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.48283654691209, "step_time": 0.917637481689453} +{"epoch": 0, "iter": 17854, "iter_tflops": 31.09510854306135, "iter_time": 0.6634835662841797, "loss": 0.2677515149116516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.420113668766255, "step_time": 0.5993906269073487} +{"epoch": 0, "iter": 17855, "iter_tflops": 40.2263289727903, "iter_time": 0.5128753738403321, "loss": 0.2546042501926422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81388857667404, "step_time": 0.47088022041320804} +{"epoch": 0, "iter": 17856, "iter_tflops": 42.940121609825574, "iter_time": 0.48046192550659184, "loss": 0.4128129780292511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.846442800064224, "step_time": 0.4403982944488525} +{"epoch": 0, "iter": 17857, "iter_tflops": 17.953280713489463, "iter_time": 1.149154510498047, "loss": 0.4154031574726105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.521589147193552, "step_time": 1.0568347358703614} +{"epoch": 0, "iter": 17858, "iter_tflops": 16.474152885968763, "iter_time": 1.252331069946289, "loss": 0.3025956153869629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.613363461760155, "step_time": 1.0518896236419677} +{"epoch": 0, "iter": 17859, "iter_tflops": 37.61962870021691, "iter_time": 0.5484130020141601, "loss": 0.2988463342189789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.37047646448087, "step_time": 0.49869122314453124} +{"epoch": 0, "iter": 17860, "iter_tflops": 37.48673885399998, "iter_time": 0.5503571166992188, "loss": 0.2526242733001709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01028067041073, "step_time": 0.5030712585449219} +{"epoch": 0, "iter": 17861, "iter_tflops": 29.958015911975348, "iter_time": 0.6886668853759765, "loss": 0.4688080847263336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.46998194619565, "step_time": 0.6353897438049316} +{"epoch": 0, "iter": 17862, "iter_tflops": 35.628278942481344, "iter_time": 0.5790651168823242, "loss": 0.4713176488876343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.99306608494953, "step_time": 0.5158667621612548} +{"epoch": 0, "iter": 17863, "iter_tflops": 37.88217249189165, "iter_time": 0.544612205505371, "loss": 0.38188636302948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.085726720387136, "step_time": 0.5021474647521973} +{"epoch": 0, "iter": 17864, "iter_tflops": 41.17672345403946, "iter_time": 0.5010377655029297, "loss": 0.46429169178009033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92463356548867, "step_time": 0.4592378807067871} +{"epoch": 0, "iter": 17865, "iter_tflops": 1.32841834781844, "iter_time": 1.327668426513672, "loss": 0.1275937408208847, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 1.4213601136779546, "step_time": 1.2408530960083006} +{"epoch": 0, "iter": 17866, "iter_tflops": 1.57193144818319, "iter_time": 1.1219949188232423, "loss": 0.23830856382846832, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 1.9042850690767243, "step_time": 0.9261738834381104} +{"epoch": 0, "iter": 17867, "iter_tflops": 3.610789239349807, "iter_time": 0.48845251846313475, "loss": 0.08710627257823944, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 3.9086134950154183, "step_time": 0.4512339477539062} +{"epoch": 0, "iter": 17868, "iter_tflops": 4.051423339042749, "iter_time": 0.4353282661437988, "loss": 0.21329687535762787, "lr": 3e-05, "seqlen": 720.0, "step_tflops": 4.410294950150711, "step_time": 0.3999050216674805} +{"epoch": 0, "iter": 17869, "iter_tflops": 26.649350288844126, "iter_time": 0.7741687240600585, "loss": 0.15229329466819763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.16521035064895, "step_time": 0.7325027313232422} +{"epoch": 0, "iter": 17870, "iter_tflops": 16.928203255435463, "iter_time": 1.2187408905029296, "loss": 0.1687227189540863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.07606912639932, "step_time": 1.0276460685729982} +{"epoch": 0, "iter": 17871, "iter_tflops": 50.77677834711543, "iter_time": 0.4063096199035644, "loss": 0.16799841821193695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.53309227542939, "step_time": 0.37150989913940435} +{"epoch": 0, "iter": 17872, "iter_tflops": 47.03130751739585, "iter_time": 0.43866723251342776, "loss": 0.11814439296722412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00935537246922, "step_time": 0.404457052230835} +{"epoch": 0, "iter": 17873, "iter_tflops": 19.767172516950204, "iter_time": 1.0437048339843749, "loss": 0.04189843311905861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.506910393386562, "step_time": 1.0060556716918945} +{"epoch": 0, "iter": 17874, "iter_tflops": 16.03182082589882, "iter_time": 1.2868839874267577, "loss": 0.0698612630367279, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.516213217155265, "step_time": 1.1142177543640137} +{"epoch": 0, "iter": 17875, "iter_tflops": 33.90649019108691, "iter_time": 0.6084703369140625, "loss": 0.05068051069974899, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.439966569864204, "step_time": 0.49785497474670415} +{"epoch": 0, "iter": 17876, "iter_tflops": 40.97825863564372, "iter_time": 0.5034643783569336, "loss": 0.04288393259048462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15994277701937, "step_time": 0.4568449878692627} +{"epoch": 0, "iter": 17877, "iter_tflops": 17.569257852098165, "iter_time": 1.1742723388671874, "loss": 0.0009896500268951058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.923683810193346, "step_time": 1.09022607421875} +{"epoch": 0, "iter": 17878, "iter_tflops": 41.76764138025235, "iter_time": 0.4939492111206055, "loss": 0.0026624170131981373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91833395677432, "step_time": 0.43054697036743167} +{"epoch": 0, "iter": 17879, "iter_tflops": 44.70256899236339, "iter_time": 0.46151919174194334, "loss": 0.016120899468660355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.47041047144433, "step_time": 0.4170390605926514} +{"epoch": 0, "iter": 17880, "iter_tflops": 41.858152818051565, "iter_time": 0.4928811264038086, "loss": 0.007441114634275436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.140375707384734, "step_time": 0.4471375274658203} +{"epoch": 0, "iter": 17881, "iter_tflops": 17.66681672928978, "iter_time": 1.1677878265380859, "loss": 0.3744156062602997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.554323504475455, "step_time": 1.1119291687011719} +{"epoch": 0, "iter": 17882, "iter_tflops": 16.810380072583232, "iter_time": 1.227282989501953, "loss": 0.44918861985206604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.87717306786032, "step_time": 1.0379289569854737} +{"epoch": 0, "iter": 17883, "iter_tflops": 39.96993870238816, "iter_time": 0.5161652526855469, "loss": 0.3710137605667114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.936773048698164, "step_time": 0.4695632400512696} +{"epoch": 0, "iter": 17884, "iter_tflops": 39.30777900884268, "iter_time": 0.5248603210449219, "loss": 0.47022733092308044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90289170433763, "step_time": 0.48087885665893554} +{"epoch": 0, "iter": 17885, "iter_tflops": 25.112132823812264, "iter_time": 0.8215587921142578, "loss": 0.2151808738708496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.235827359176124, "step_time": 0.7574983215332032} +{"epoch": 0, "iter": 17886, "iter_tflops": 22.645430958770085, "iter_time": 0.9110488357543945, "loss": 0.23674362897872925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.333054069339152, "step_time": 0.7548038158416748} +{"epoch": 0, "iter": 17887, "iter_tflops": 49.31924223063197, "iter_time": 0.41831732559204093, "loss": 0.1180291548371315, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48438320831911, "step_time": 0.385740514755249} +{"epoch": 0, "iter": 17888, "iter_tflops": 49.33813129258657, "iter_time": 0.4181571731567383, "loss": 0.2574649155139923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.53873754604511, "step_time": 0.38534889793396} +{"epoch": 0, "iter": 17889, "iter_tflops": 27.571458483323482, "iter_time": 0.7482771911621094, "loss": 0.16790667176246643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.21624963614555, "step_time": 0.7061513290405275} +{"epoch": 0, "iter": 17890, "iter_tflops": 14.67017574632117, "iter_time": 1.4063289947509765, "loss": 0.222664937376976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.27537721198857, "step_time": 1.1289011039733887} +{"epoch": 0, "iter": 17891, "iter_tflops": 48.51137113293588, "iter_time": 0.4252836608886719, "loss": 0.17704801261425018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.701653162253145, "step_time": 0.3914695701599121} +{"epoch": 0, "iter": 17892, "iter_tflops": 50.94101673247503, "iter_time": 0.40499964141845707, "loss": 0.20269207656383514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.1189028782195, "step_time": 0.3743015995025635} +{"epoch": 0, "iter": 17893, "iter_tflops": 31.637594101245263, "iter_time": 0.6521069030761718, "loss": 0.13403424620628357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57786103822484, "step_time": 0.6144254837036133} +{"epoch": 0, "iter": 17894, "iter_tflops": 13.46434297310238, "iter_time": 1.5322762908935545, "loss": 0.5875600576400757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.864890437545599, "step_time": 1.387907539367676} +{"epoch": 0, "iter": 17895, "iter_tflops": 44.78025604277686, "iter_time": 0.46071852493286136, "loss": 0.691795289516449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3623271149781, "step_time": 0.42659430885314936} +{"epoch": 0, "iter": 17896, "iter_tflops": 46.944913334716865, "iter_time": 0.43947452545166016, "loss": 0.5360857248306274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66304343298408, "step_time": 0.4072217559814453} +{"epoch": 0, "iter": 17897, "iter_tflops": 30.201758451839655, "iter_time": 0.683109016418457, "loss": 0.3935851752758026, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.10479949938442, "step_time": 0.6426171112060547} +{"epoch": 0, "iter": 17898, "iter_tflops": 22.533251353122715, "iter_time": 0.9155844039916992, "loss": 0.4593885540962219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.222510549088806, "step_time": 0.7578688774108887} +{"epoch": 0, "iter": 17899, "iter_tflops": 43.68665964304157, "iter_time": 0.4722515678405762, "loss": 0.38842251896858215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.144064391274924, "step_time": 0.4376180496215821} +{"epoch": 0, "iter": 17900, "iter_tflops": 50.67506057833164, "iter_time": 0.4071251869201661, "loss": 0.4272545576095581, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.000141912207134, "step_time": 0.37510982322692865} +{"epoch": 0, "iter": 17901, "iter_tflops": 29.153218020883855, "iter_time": 0.7076780853271485, "loss": 0.002656959928572178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.930405353908206, "step_time": 0.6670165901184082} +{"epoch": 0, "iter": 17902, "iter_tflops": 23.91802190392163, "iter_time": 0.8625752410888673, "loss": 0.0025452603586018085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.53788772840235, "step_time": 0.615157808303833} +{"epoch": 0, "iter": 17903, "iter_tflops": 45.50310891862857, "iter_time": 0.4533996467590332, "loss": 0.030620083212852478, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.51417127611653, "step_time": 0.4084218940734863} +{"epoch": 0, "iter": 17904, "iter_tflops": 51.37049811093601, "iter_time": 0.40161365509033203, "loss": 0.0005983344744890928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.79231932236281, "step_time": 0.3632726001739502} +{"epoch": 0, "iter": 17905, "iter_tflops": 28.24705480690903, "iter_time": 0.7303803405761718, "loss": 0.6709063053131104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.357102741285093, "step_time": 0.6579400424957276} +{"epoch": 0, "iter": 17906, "iter_tflops": 39.07339201889856, "iter_time": 0.5280087661743164, "loss": 0.5911192893981934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.98528833867939, "step_time": 0.4799570808410645} +{"epoch": 0, "iter": 17907, "iter_tflops": 41.895176496028554, "iter_time": 0.49244555664062495, "loss": 0.6892809867858887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.29532704255619, "step_time": 0.4554795131683349} +{"epoch": 0, "iter": 17908, "iter_tflops": 44.54671796500518, "iter_time": 0.46313386154174807, "loss": 0.6573607921600342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76107976239119, "step_time": 0.4319645538330078} +{"epoch": 0, "iter": 17909, "iter_tflops": 43.55384596887303, "iter_time": 0.47369165802001956, "loss": 0.2170051783323288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.504162043307375, "step_time": 0.43430075645446775} +{"epoch": 0, "iter": 17910, "iter_tflops": 46.11754207007256, "iter_time": 0.44735891342163087, "loss": 0.19732272624969482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99384313257709, "step_time": 0.41267268562316894} +{"epoch": 0, "iter": 17911, "iter_tflops": 50.72609208328746, "iter_time": 0.4067156105041504, "loss": 0.19203004240989685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.01315688495122, "step_time": 0.37502108001708984} +{"epoch": 0, "iter": 17912, "iter_tflops": 53.288998763529236, "iter_time": 0.3871548347473145, "loss": 0.20789693295955658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.71296644588849, "step_time": 0.3574776134490966} +{"epoch": 0, "iter": 17913, "iter_tflops": 43.068069637580614, "iter_time": 0.4790345535278321, "loss": 0.11088693886995316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.03617642541585, "step_time": 0.43862182426452634} +{"epoch": 0, "iter": 17914, "iter_tflops": 45.82334842130367, "iter_time": 0.4502310333251953, "loss": 0.10592138767242432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.59907355250993, "step_time": 0.4159572353363037} +{"epoch": 0, "iter": 17915, "iter_tflops": 46.143648275283056, "iter_time": 0.44710581588745113, "loss": 0.09429463744163513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06922791340806, "step_time": 0.4120513610839843} +{"epoch": 0, "iter": 17916, "iter_tflops": 48.26597634000384, "iter_time": 0.42744589614868167, "loss": 0.12089303135871887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.26485919777068, "step_time": 0.3947412052154541} +{"epoch": 0, "iter": 17917, "iter_tflops": 39.50681396391579, "iter_time": 0.5222160797119141, "loss": 0.4041134715080261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.52836098649432, "step_time": 0.48511376953125} +{"epoch": 0, "iter": 17918, "iter_tflops": 18.772350132556635, "iter_time": 1.0990149536132814, "loss": 0.44116294384002686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.74733140306187, "step_time": 0.9943974533081055} +{"epoch": 0, "iter": 17919, "iter_tflops": 38.191988454290254, "iter_time": 0.5401942749023437, "loss": 0.34707266092300415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.964092267456614, "step_time": 0.49163683509826656} +{"epoch": 0, "iter": 17920, "iter_tflops": 42.22839612660898, "iter_time": 0.48855972290039057, "loss": 0.3745858371257782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.06315651395454, "step_time": 0.44788709831237794} +{"epoch": 0, "iter": 17921, "iter_tflops": 16.149023813930945, "iter_time": 1.2775443115234375, "loss": 0.3093949258327484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.44316314388189, "step_time": 1.1827610244750977} +{"epoch": 0, "iter": 17922, "iter_tflops": 19.396123147377953, "iter_time": 1.0636709899902344, "loss": 0.6015117168426514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.952536308099756, "step_time": 0.9398045501708985} +{"epoch": 0, "iter": 17923, "iter_tflops": 43.16708584989093, "iter_time": 0.4779357490539551, "loss": 0.8065780401229858, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.32858733994867, "step_time": 0.4453210144042969} +{"epoch": 0, "iter": 17924, "iter_tflops": 50.03235424235397, "iter_time": 0.41235504150390623, "loss": 0.6177957057952881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.123987587792115, "step_time": 0.3811820678710937} +{"epoch": 0, "iter": 17925, "iter_tflops": 31.387459718104406, "iter_time": 0.6573037033081054, "loss": 0.18445353209972382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.28939455003213, "step_time": 0.6197497367858886} +{"epoch": 0, "iter": 17926, "iter_tflops": 18.56678800990389, "iter_time": 1.1111826934814453, "loss": 0.2790404260158539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.455963911859325, "step_time": 0.9187356014251709} +{"epoch": 0, "iter": 17927, "iter_tflops": 40.101424082932404, "iter_time": 0.5144728393554687, "loss": 0.1745525598526001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85568790805019, "step_time": 0.4704314193725586} +{"epoch": 0, "iter": 17928, "iter_tflops": 38.48073112288446, "iter_time": 0.5361408920288085, "loss": 0.20522218942642212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68641447580615, "step_time": 0.49491168212890624} +{"epoch": 0, "iter": 17929, "iter_tflops": 32.42472866581169, "iter_time": 0.6362765197753907, "loss": 0.6148419380187988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.07729915654011, "step_time": 0.5718580379486083} +{"epoch": 0, "iter": 17930, "iter_tflops": 35.67946233679923, "iter_time": 0.5782344284057618, "loss": 0.5254735350608826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.83966695759549, "step_time": 0.5178530616760254} +{"epoch": 0, "iter": 17931, "iter_tflops": 39.55893221203209, "iter_time": 0.5215280685424805, "loss": 0.5646111369132996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.10982531779378, "step_time": 0.47857056617736815} +{"epoch": 0, "iter": 17932, "iter_tflops": 39.53008227273401, "iter_time": 0.52190869140625, "loss": 0.6226832866668701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.76434279961312, "step_time": 0.4824368190765381} +{"epoch": 0, "iter": 17933, "iter_tflops": 32.2787736958472, "iter_time": 0.6391535720825195, "loss": 0.2331158071756363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.792729591149445, "step_time": 0.5764045867919922} +{"epoch": 0, "iter": 17934, "iter_tflops": 37.4988306717719, "iter_time": 0.5501796493530273, "loss": 0.2750133275985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.910601147016, "step_time": 0.49226431846618657} +{"epoch": 0, "iter": 17935, "iter_tflops": 41.57475161849082, "iter_time": 0.4962409324645996, "loss": 0.13358747959136963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.512546791088305, "step_time": 0.4533056259155273} +{"epoch": 0, "iter": 17936, "iter_tflops": 41.25347347038394, "iter_time": 0.5001056098937988, "loss": 0.20538856089115143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.197172771223954, "step_time": 0.4564686737060547} +{"epoch": 0, "iter": 17937, "iter_tflops": 20.490163605579458, "iter_time": 1.0068779296875001, "loss": 0.2039860486984253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.034236199682308, "step_time": 0.9363198852539063} +{"epoch": 0, "iter": 17938, "iter_tflops": 40.00240308857228, "iter_time": 0.515746353149414, "loss": 0.19236324727535248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.627636775468915, "step_time": 0.4622941074371338} +{"epoch": 0, "iter": 17939, "iter_tflops": 46.1752341142349, "iter_time": 0.44679997634887697, "loss": 0.22191083431243896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92575643131245, "step_time": 0.4132354717254638} +{"epoch": 0, "iter": 17940, "iter_tflops": 49.95400425342613, "iter_time": 0.41300179672241216, "loss": 0.29446345567703247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.04615728787466, "step_time": 0.3817309970855713} +{"epoch": 0, "iter": 17941, "iter_tflops": 27.319510450994763, "iter_time": 0.7551780090332031, "loss": 0.3298044800758362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.845811019711853, "step_time": 0.7152197418212891} +{"epoch": 0, "iter": 17942, "iter_tflops": 14.605452946479886, "iter_time": 1.4125610198974612, "loss": 0.2180747538805008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.671675460402444, "step_time": 1.237493709564209} +{"epoch": 0, "iter": 17943, "iter_tflops": 42.83001821248505, "iter_time": 0.48169705200195306, "loss": 0.22708933055400848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30696367549776, "step_time": 0.44552896308898926} +{"epoch": 0, "iter": 17944, "iter_tflops": 51.1185445771425, "iter_time": 0.403593132019043, "loss": 0.4481528699398041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.566311985661024, "step_time": 0.3712877960205078} +{"epoch": 0, "iter": 17945, "iter_tflops": 43.41632255114232, "iter_time": 0.47519210052490235, "loss": 0.09510262310504913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52073387392193, "step_time": 0.43414930343627933} +{"epoch": 0, "iter": 17946, "iter_tflops": 43.44412679050262, "iter_time": 0.4748879776000976, "loss": 0.08157160878181458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.79802751977038, "step_time": 0.4227853984832764} +{"epoch": 0, "iter": 17947, "iter_tflops": 44.0561062083727, "iter_time": 0.4682913513183593, "loss": 0.080027274787426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.55658424058836, "step_time": 0.43382202148437493} +{"epoch": 0, "iter": 17948, "iter_tflops": 50.33866239801484, "iter_time": 0.4098458824157715, "loss": 0.07089769840240479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.665085243602626, "step_time": 0.37740896987915035} +{"epoch": 0, "iter": 17949, "iter_tflops": 43.57571288315999, "iter_time": 0.47345395278930663, "loss": 0.764715313911438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.53910045814957, "step_time": 0.4339815711975098} +{"epoch": 0, "iter": 17950, "iter_tflops": 43.86030260035045, "iter_time": 0.4703819236755371, "loss": 0.48439276218414307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.52809879392319, "step_time": 0.41655331039428717} +{"epoch": 0, "iter": 17951, "iter_tflops": 46.83974777170198, "iter_time": 0.44046124267578124, "loss": 0.5311545133590698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.487081684085716, "step_time": 0.4086410388946533} +{"epoch": 0, "iter": 17952, "iter_tflops": 45.2969839316134, "iter_time": 0.45546285247802737, "loss": 0.6177868247032166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.642643217100655, "step_time": 0.42413594627380374} +{"epoch": 0, "iter": 17953, "iter_tflops": 25.55382480662064, "iter_time": 0.8073583374023438, "loss": 0.02401784621179104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.89391321496995, "step_time": 0.7671287307739258} +{"epoch": 0, "iter": 17954, "iter_tflops": 15.8968744708403, "iter_time": 1.2978081665039065, "loss": 0.02408505417406559, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.20119588426776, "step_time": 1.07446919631958} +{"epoch": 0, "iter": 17955, "iter_tflops": 41.403918174116505, "iter_time": 0.4982884330749512, "loss": 0.038559019565582275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.68158403087157, "step_time": 0.45162824249267575} +{"epoch": 0, "iter": 17956, "iter_tflops": 48.919796838483066, "iter_time": 0.42173301696777343, "loss": 0.031237341463565826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.014525360588, "step_time": 0.38195454597473144} +{"epoch": 0, "iter": 17957, "iter_tflops": 20.573765517448077, "iter_time": 1.002786460876465, "loss": 0.0019718080293387175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.4229159944534, "step_time": 0.9200896759033204} +{"epoch": 0, "iter": 17958, "iter_tflops": 21.180070890186517, "iter_time": 0.9740804748535157, "loss": 0.0044356477446854115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.721751443066957, "step_time": 0.6503768730163574} +{"epoch": 0, "iter": 17959, "iter_tflops": 56.604030298283874, "iter_time": 0.36448099899291986, "loss": 0.006643925793468952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.97731414040019, "step_time": 0.33288137435913084} +{"epoch": 0, "iter": 17960, "iter_tflops": 60.04670974746602, "iter_time": 0.3435840797424316, "loss": 0.002185314893722534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.01935175270776, "step_time": 0.3125006980895996} +{"epoch": 0, "iter": 17961, "iter_tflops": 42.47369011890538, "iter_time": 0.4857381935119629, "loss": 0.06788600236177444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07929748602077, "step_time": 0.44773020935058594} +{"epoch": 0, "iter": 17962, "iter_tflops": 48.46275473788767, "iter_time": 0.42571029281616213, "loss": 0.060706738382577896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03843331939317, "step_time": 0.3889838409423828} +{"epoch": 0, "iter": 17963, "iter_tflops": 53.75893428534067, "iter_time": 0.38377050781250005, "loss": 0.06282659620046616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.820162633025674, "step_time": 0.350748664855957} +{"epoch": 0, "iter": 17964, "iter_tflops": 56.806071106872565, "iter_time": 0.3631846580505371, "loss": 0.061798106878995895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.80532551937087, "step_time": 0.3338076992034912} +{"epoch": 0, "iter": 17965, "iter_tflops": 28.470441772699367, "iter_time": 0.7246495742797852, "loss": 0.13886219263076782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.198747564381595, "step_time": 0.6831771240234376} +{"epoch": 0, "iter": 17966, "iter_tflops": 14.143544602170907, "iter_time": 1.4586932830810546, "loss": 0.13451524078845978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.92795014601579, "step_time": 1.2187591133117675} +{"epoch": 0, "iter": 17967, "iter_tflops": 47.972166432530976, "iter_time": 0.43006382751464844, "loss": 0.08938346058130264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.278379811800285, "step_time": 0.3946391143798828} +{"epoch": 0, "iter": 17968, "iter_tflops": 49.97598306219634, "iter_time": 0.41282016372680663, "loss": 0.07810443639755249, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.23568342831867, "step_time": 0.38039704132080077} +{"epoch": 0, "iter": 17969, "iter_tflops": 25.927671626838357, "iter_time": 0.7957171707153321, "loss": 0.2286127805709839, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.36681299659497, "step_time": 0.7538727111816407} +{"epoch": 0, "iter": 17970, "iter_tflops": 18.63575459806535, "iter_time": 1.1070704650878909, "loss": 0.1797993928194046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.79059133787669, "step_time": 0.8322146587371827} +{"epoch": 0, "iter": 17971, "iter_tflops": 36.531234906745524, "iter_time": 0.5647521514892577, "loss": 0.24020040035247803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.279403021612026, "step_time": 0.5121995849609374} +{"epoch": 0, "iter": 17972, "iter_tflops": 42.22870309937127, "iter_time": 0.4885561714172363, "loss": 0.17109186947345734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.24515919920099, "step_time": 0.44612439155578615} +{"epoch": 0, "iter": 17973, "iter_tflops": 37.05179771050311, "iter_time": 0.556817611694336, "loss": 0.5393783450126648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.98334257594315, "step_time": 0.5034019241333008} +{"epoch": 0, "iter": 17974, "iter_tflops": 39.205997106604755, "iter_time": 0.526222900390625, "loss": 0.5275511145591736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.811067955148175, "step_time": 0.4819102745056153} +{"epoch": 0, "iter": 17975, "iter_tflops": 37.20958841099415, "iter_time": 0.5544563751220702, "loss": 0.33631905913352966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.7200597504945, "step_time": 0.506656759262085} +{"epoch": 0, "iter": 17976, "iter_tflops": 37.68943178099705, "iter_time": 0.5473973083496094, "loss": 0.26251980662345886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.15817777536918, "step_time": 0.5012635307312011} +{"epoch": 0, "iter": 17977, "iter_tflops": 31.228723156368776, "iter_time": 0.6606447982788086, "loss": 0.6100832223892212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.372262405657814, "step_time": 0.6002250671386719} +{"epoch": 0, "iter": 17978, "iter_tflops": 33.34262271071717, "iter_time": 0.6187603683471681, "loss": 0.5755904912948608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.26480687798942, "step_time": 0.568901237487793} +{"epoch": 0, "iter": 17979, "iter_tflops": 38.04949431702233, "iter_time": 0.54221728515625, "loss": 0.7513779997825623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.21957586861969, "step_time": 0.5005168800354004} +{"epoch": 0, "iter": 17980, "iter_tflops": 27.91427073149467, "iter_time": 0.7390876770019532, "loss": 0.6067596673965454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.270161270677942, "step_time": 0.6815653648376465} +{"epoch": 0, "iter": 17981, "iter_tflops": 15.882845984479465, "iter_time": 1.2989544525146486, "loss": 0.5917885303497314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.9997942891725, "step_time": 1.2136084213256837} +{"epoch": 0, "iter": 17982, "iter_tflops": 14.305644291696193, "iter_time": 1.442164581298828, "loss": 0.42932045459747314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.412680612313004, "step_time": 0.9634988670349122} +{"epoch": 0, "iter": 17983, "iter_tflops": 39.19219323066183, "iter_time": 0.5264082412719727, "loss": 0.41796472668647766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.94353390174535, "step_time": 0.48042374801635745} +{"epoch": 0, "iter": 17984, "iter_tflops": 38.48220035938893, "iter_time": 0.5361204223632813, "loss": 0.5353277325630188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.031008111832534, "step_time": 0.4908541202545166} +{"epoch": 0, "iter": 17985, "iter_tflops": 29.908420223346997, "iter_time": 0.6898088684082031, "loss": 0.035435937345027924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.58673132023698, "step_time": 0.6331133155822753} +{"epoch": 0, "iter": 17986, "iter_tflops": 32.82533032297071, "iter_time": 0.6285113754272461, "loss": 0.015282686799764633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.48939566988582, "step_time": 0.5653997039794922} +{"epoch": 0, "iter": 17987, "iter_tflops": 48.07663649847959, "iter_time": 0.4291293029785156, "loss": 0.047685395926237106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.56459669862959, "step_time": 0.3851628646850586} +{"epoch": 0, "iter": 17988, "iter_tflops": 48.6011012582821, "iter_time": 0.42449847793579104, "loss": 0.04247453436255455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2579558585618, "step_time": 0.38738049888610837} +{"epoch": 0, "iter": 17989, "iter_tflops": 17.567586869524895, "iter_time": 0.8439444046020508, "loss": 0.39653971791267395, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 18.471467170100446, "step_time": 0.8026469421386719} +{"epoch": 0, "iter": 17990, "iter_tflops": 25.5483846274321, "iter_time": 0.5803132705688476, "loss": 0.18113164603710175, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.350805437160204, "step_time": 0.5420705680847168} +{"epoch": 0, "iter": 17991, "iter_tflops": 26.61253917357702, "iter_time": 0.5571083068847655, "loss": 0.14905600249767303, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.421048396678625, "step_time": 0.5216579780578613} +{"epoch": 0, "iter": 17992, "iter_tflops": 26.94966828091826, "iter_time": 0.5501391143798828, "loss": 0.3710727393627167, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.53545312525095, "step_time": 0.5195665397644043} +{"epoch": 0, "iter": 17993, "iter_tflops": 30.322663416070494, "iter_time": 0.6803852691650392, "loss": 0.08319448679685593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.36135518101155, "step_time": 0.6375225448608399} +{"epoch": 0, "iter": 17994, "iter_tflops": 9.787919412570483, "iter_time": 2.107811950683594, "loss": 0.10865288972854614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.792806686917451, "step_time": 1.7494642333984376} +{"epoch": 0, "iter": 17995, "iter_tflops": 13.618189234210234, "iter_time": 1.5149659881591795, "loss": 0.10756807029247284, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.21030255373106, "step_time": 1.3563894233703615} +{"epoch": 0, "iter": 17996, "iter_tflops": 41.7449992504649, "iter_time": 0.4942171249389649, "loss": 0.07038804888725281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.03383502068214, "step_time": 0.44817238235473633} +{"epoch": 0, "iter": 17997, "iter_tflops": 19.11703282297105, "iter_time": 0.8097529907226564, "loss": 0.2274855524301529, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 20.77347972464087, "step_time": 0.7451844711303711} +{"epoch": 0, "iter": 17998, "iter_tflops": 26.677975682478987, "iter_time": 0.5802567138671875, "loss": 0.257374107837677, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.60147823577715, "step_time": 0.5412333717346192} +{"epoch": 0, "iter": 17999, "iter_tflops": 28.385525214127238, "iter_time": 0.5453509979248047, "loss": 0.1360601931810379, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 30.220029197565097, "step_time": 0.5122455177307129} +{"epoch": 0, "iter": 18000, "iter_tflops": 3.20790089974617, "iter_time": 4.825608703613281, "loss": 0.35884740948677063, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 3.2297302674232253, "step_time": 4.792992980957031} +{"epoch": 0, "iter": 18001, "iter_tflops": 7.509982993779443, "iter_time": 2.747155822753906, "loss": 0.31164082884788513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 7.692887459187689, "step_time": 2.6818400268554687} +{"epoch": 0, "iter": 18002, "iter_tflops": 19.829570651570002, "iter_time": 1.040420585632324, "loss": 0.17535431683063507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.678721941165858, "step_time": 0.909711471557617} +{"epoch": 0, "iter": 18003, "iter_tflops": 21.348056063791542, "iter_time": 0.966415557861328, "loss": 0.18356157839298248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.996093097380815, "step_time": 0.8253727264404297} +{"epoch": 0, "iter": 18004, "iter_tflops": 29.501887983427498, "iter_time": 0.6993143463134766, "loss": 0.2036494016647339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.44121294988497, "step_time": 0.5990234298706054} +{"epoch": 0, "iter": 18005, "iter_tflops": 8.966953768852777, "iter_time": 2.300791778564453, "loss": 0.5819810032844543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.345951361883708, "step_time": 2.2074899291992187} +{"epoch": 0, "iter": 18006, "iter_tflops": 24.432111988508723, "iter_time": 0.8444253005981446, "loss": 0.46221861243247986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.756890275071633, "step_time": 0.7432782745361328} +{"epoch": 0, "iter": 18007, "iter_tflops": 25.918844985270457, "iter_time": 0.7959881515502929, "loss": 0.492419570684433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.140620454275776, "step_time": 0.7079840164184571} +{"epoch": 0, "iter": 18008, "iter_tflops": 27.420505921250655, "iter_time": 0.7523965301513671, "loss": 0.5451820492744446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.62835532930543, "step_time": 0.6522973861694337} +{"epoch": 0, "iter": 18009, "iter_tflops": 7.755084575079503, "iter_time": 2.66033120727539, "loss": 0.5496370792388916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.095379143038953, "step_time": 2.5485024414062503} +{"epoch": 0, "iter": 18010, "iter_tflops": 21.255575280311064, "iter_time": 0.9706203308105469, "loss": 0.6841347217559814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85761429406906, "step_time": 0.8299707794189451} +{"epoch": 0, "iter": 18011, "iter_tflops": 19.39793302305154, "iter_time": 1.0635717468261718, "loss": 0.5474133491516113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.35069572923268, "step_time": 0.9230626983642578} +{"epoch": 0, "iter": 18012, "iter_tflops": 21.460847394858757, "iter_time": 0.9613363876342773, "loss": 0.3988482356071472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.82558218437151, "step_time": 0.8310416793823242} +{"epoch": 0, "iter": 18013, "iter_tflops": 10.184228160089939, "iter_time": 2.0257886199951174, "loss": 0.45917418599128723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.75400634637482, "step_time": 1.9184565124511719} +{"epoch": 0, "iter": 18014, "iter_tflops": 17.4001555019428, "iter_time": 1.1856844329833984, "loss": 0.4462302327156067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.46206523743659, "step_time": 0.8102678756713867} +{"epoch": 0, "iter": 18015, "iter_tflops": 22.861135809011248, "iter_time": 0.9024526901245118, "loss": 0.6428700089454651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.821737019678075, "step_time": 0.7691930427551269} +{"epoch": 0, "iter": 18016, "iter_tflops": 22.386125103806833, "iter_time": 0.9216018142700196, "loss": 0.5912731885910034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.825011895249773, "step_time": 0.7988803100585937} +{"epoch": 0, "iter": 18017, "iter_tflops": 30.741814296342017, "iter_time": 0.6711085205078124, "loss": 0.19448719918727875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.923123884388296, "step_time": 0.5587580718994141} +{"epoch": 0, "iter": 18018, "iter_tflops": 32.798551288606575, "iter_time": 0.6290245361328125, "loss": 0.21147602796554565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.829142024669174, "step_time": 0.5923514709472656} +{"epoch": 0, "iter": 18019, "iter_tflops": 29.29396975429517, "iter_time": 0.7042778320312499, "loss": 0.11848588287830353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.016775812660036, "step_time": 0.6443838577270509} +{"epoch": 0, "iter": 18020, "iter_tflops": 29.51433957531091, "iter_time": 0.6990193176269531, "loss": 0.1066446602344513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.88247677504464, "step_time": 0.6470982055664062} +{"epoch": 0, "iter": 18021, "iter_tflops": 11.448344222303415, "iter_time": 1.8021028289794923, "loss": 0.31658726930618286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.48758098238983, "step_time": 1.65212890625} +{"epoch": 0, "iter": 18022, "iter_tflops": 31.867299912282814, "iter_time": 0.6474063873291015, "loss": 0.2903118133544922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.669258475707004, "step_time": 0.5476904602050782} +{"epoch": 0, "iter": 18023, "iter_tflops": 33.89196465493099, "iter_time": 0.6087311172485352, "loss": 0.24149899184703827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.3680688582732, "step_time": 0.5672859230041504} +{"epoch": 0, "iter": 18024, "iter_tflops": 36.73157212649665, "iter_time": 0.5616719436645508, "loss": 0.2168346494436264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.608686919768736, "step_time": 0.5208729476928711} +{"epoch": 0, "iter": 18025, "iter_tflops": 8.270299716917878, "iter_time": 2.494600463867188, "loss": 0.09382167458534241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.420159330710337, "step_time": 2.450202270507812} +{"epoch": 0, "iter": 18026, "iter_tflops": 25.139167559570037, "iter_time": 0.8206752853393555, "loss": 0.06082819774746895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.92008816261591, "step_time": 0.6895398635864258} +{"epoch": 0, "iter": 18027, "iter_tflops": 31.989261709181957, "iter_time": 0.6449380950927734, "loss": 0.05122171342372894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.40645963371621, "step_time": 0.5826929244995117} +{"epoch": 0, "iter": 18028, "iter_tflops": 24.727051532052204, "iter_time": 0.834353157043457, "loss": 0.051341041922569275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.426576369060232, "step_time": 0.7806949043273925} +{"epoch": 0, "iter": 18029, "iter_tflops": 10.366805225461812, "iter_time": 1.9901110382080078, "loss": 0.5627835988998413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.999461377579058, "step_time": 1.875645797729492} +{"epoch": 0, "iter": 18030, "iter_tflops": 23.97896026964315, "iter_time": 0.8603831558227539, "loss": 0.5691965818405151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.337372169814852, "step_time": 0.7280524597167969} +{"epoch": 0, "iter": 18031, "iter_tflops": 29.08387396582023, "iter_time": 0.7093653869628906, "loss": 0.5366314053535461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.159245142234038, "step_time": 0.6621178855895997} +{"epoch": 0, "iter": 18032, "iter_tflops": 22.374114752430184, "iter_time": 0.9220965270996093, "loss": 0.7164445519447327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.99005667180845, "step_time": 0.8255720977783202} +{"epoch": 0, "iter": 18033, "iter_tflops": 11.127690377155359, "iter_time": 1.8540319519042967, "loss": 0.3556157648563385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.443709501354027, "step_time": 1.6579536437988283} +{"epoch": 0, "iter": 18034, "iter_tflops": 27.262705969670037, "iter_time": 0.7567514953613281, "loss": 0.31963664293289185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.43638122863855, "step_time": 0.6170253105163575} +{"epoch": 0, "iter": 18035, "iter_tflops": 30.87209731192644, "iter_time": 0.6682763824462892, "loss": 0.24422916769981384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.34924591669102, "step_time": 0.6186374816894531} +{"epoch": 0, "iter": 18036, "iter_tflops": 36.61655145084023, "iter_time": 0.563436279296875, "loss": 0.3281271457672119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.470751183010805, "step_time": 0.5226932067871094} +{"epoch": 0, "iter": 18037, "iter_tflops": 9.58620636263839, "iter_time": 2.15216455078125, "loss": 0.163886159658432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.90252665630691, "step_time": 2.083417114257813} +{"epoch": 0, "iter": 18038, "iter_tflops": 27.45312543320368, "iter_time": 0.751502540588379, "loss": 0.16382387280464172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.2701971426059, "step_time": 0.6201073417663575} +{"epoch": 0, "iter": 18039, "iter_tflops": 31.35456901629161, "iter_time": 0.6579932098388672, "loss": 0.19828712940216064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.266366329304844, "step_time": 0.6020799903869629} +{"epoch": 0, "iter": 18040, "iter_tflops": 32.05770200145088, "iter_time": 0.6435612106323242, "loss": 0.16799545288085938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.088151647925855, "step_time": 0.5879789199829101} +{"epoch": 0, "iter": 18041, "iter_tflops": 8.29452340269674, "iter_time": 2.487315124511719, "loss": 0.08712133765220642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.649027351326383, "step_time": 2.3853657379150386} +{"epoch": 0, "iter": 18042, "iter_tflops": 23.16756047645641, "iter_time": 0.8905164413452148, "loss": 0.09133599698543549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.968475752378396, "step_time": 0.76500777053833} +{"epoch": 0, "iter": 18043, "iter_tflops": 38.390995599003176, "iter_time": 0.5373940734863281, "loss": 0.10862327367067337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.204918261544506, "step_time": 0.4775172443389893} +{"epoch": 0, "iter": 18044, "iter_tflops": 32.39790187207784, "iter_time": 0.6368033828735352, "loss": 0.06449618935585022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.93682740055498, "step_time": 0.5905256729125976} +{"epoch": 0, "iter": 18045, "iter_tflops": 9.690777305122712, "iter_time": 1.2068741607666014, "loss": 0.03676271438598633, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 10.386142190053205, "step_time": 1.1260724639892579} +{"epoch": 0, "iter": 18046, "iter_tflops": 11.992520243917191, "iter_time": 0.9752369384765625, "loss": 0.07916989922523499, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 14.012674678165277, "step_time": 0.8346407089233399} +{"epoch": 0, "iter": 18047, "iter_tflops": 12.334520398030111, "iter_time": 0.9481964721679687, "loss": 0.07211124897003174, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 14.21702206479813, "step_time": 0.822644058227539} +{"epoch": 0, "iter": 18048, "iter_tflops": 14.093506129530489, "iter_time": 0.8298537368774412, "loss": 0.0708686113357544, "lr": 3e-05, "seqlen": 4704.0, "step_tflops": 16.65116250720128, "step_time": 0.702386318206787} +{"epoch": 0, "iter": 18049, "iter_tflops": 19.170184245012532, "iter_time": 1.0762073669433594, "loss": 0.018013183027505875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.754318717460553, "step_time": 0.9483677139282226} +{"epoch": 0, "iter": 18050, "iter_tflops": 18.54138040732937, "iter_time": 1.112705368041992, "loss": 0.03967214748263359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.982268606949095, "step_time": 0.9832632446289062} +{"epoch": 0, "iter": 18051, "iter_tflops": 18.497999730847976, "iter_time": 1.1153148345947266, "loss": 0.03531787544488907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.239246483937517, "step_time": 0.9713665466308594} +{"epoch": 0, "iter": 18052, "iter_tflops": 21.163451219190918, "iter_time": 0.9748454208374024, "loss": 0.039453841745853424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.930118530240353, "step_time": 0.8275569763183593} +{"epoch": 0, "iter": 18053, "iter_tflops": 12.298082200076362, "iter_time": 1.6775862426757813, "loss": 0.6621695756912231, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.322291302402185, "step_time": 1.548614501953125} +{"epoch": 0, "iter": 18054, "iter_tflops": 12.32692372994686, "iter_time": 1.6736611633300784, "loss": 0.6153520345687866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.67447108385049, "step_time": 1.405917350769043} +{"epoch": 0, "iter": 18055, "iter_tflops": 11.976067979195015, "iter_time": 1.7226934204101563, "loss": 0.5874209403991699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.609733272513418, "step_time": 1.4121471710205078} +{"epoch": 0, "iter": 18056, "iter_tflops": 37.254547645899024, "iter_time": 0.5537872505187988, "loss": 0.43677785992622375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.732252883106995, "step_time": 0.45112786293029783} +{"epoch": 0, "iter": 18057, "iter_tflops": 22.276495458447336, "iter_time": 0.746414306640625, "loss": 0.22855328023433685, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 23.616051899788527, "step_time": 0.7040759811401368} +{"epoch": 0, "iter": 18058, "iter_tflops": 15.995327644117433, "iter_time": 1.0395219955444337, "loss": 0.18227531015872955, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 22.80973130210875, "step_time": 0.7289649620056152} +{"epoch": 0, "iter": 18059, "iter_tflops": 29.64392941664401, "iter_time": 0.5609072494506836, "loss": 0.20670628547668457, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.64943902971356, "step_time": 0.5253646011352539} +{"epoch": 0, "iter": 18060, "iter_tflops": 30.094892898307194, "iter_time": 0.552502212524414, "loss": 0.2623187303543091, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 31.975258133030145, "step_time": 0.5200112800598145} +{"epoch": 0, "iter": 18061, "iter_tflops": 31.767863109145726, "iter_time": 0.6494328384399414, "loss": 0.09126785397529602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.005639984322045, "step_time": 0.6066962280273438} +{"epoch": 0, "iter": 18062, "iter_tflops": 9.082676283801915, "iter_time": 2.2714773559570314, "loss": 0.03796200081706047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.644338308555417, "step_time": 1.7717703628540038} +{"epoch": 0, "iter": 18063, "iter_tflops": 12.126303794037684, "iter_time": 1.7013505401611329, "loss": 0.050422247499227524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.059508721127356, "step_time": 1.4674121208190916} +{"epoch": 0, "iter": 18064, "iter_tflops": 26.925087149287545, "iter_time": 0.7662405471801758, "loss": 0.03833644464612007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.94592945228069, "step_time": 0.5164754905700684} +{"epoch": 0, "iter": 18065, "iter_tflops": 17.76107341660182, "iter_time": 0.9361762390136719, "loss": 0.3185446262359619, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 19.135386672049325, "step_time": 0.8689395828247071} +{"epoch": 0, "iter": 18066, "iter_tflops": 12.624813098957192, "iter_time": 1.3170487976074219, "loss": 0.13887061178684235, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 15.303197569651594, "step_time": 1.086537296295166} +{"epoch": 0, "iter": 18067, "iter_tflops": 26.106327248789842, "iter_time": 0.6369143676757812, "loss": 0.15724143385887146, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 28.047095373534212, "step_time": 0.5928419570922852} +{"epoch": 0, "iter": 18068, "iter_tflops": 24.399491270328095, "iter_time": 0.6814689178466798, "loss": 0.3526175618171692, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 26.32668926114258, "step_time": 0.6315832099914551} +{"epoch": 0, "iter": 18069, "iter_tflops": 17.371609105827538, "iter_time": 1.1876328430175782, "loss": 0.10880076885223389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.629170720368464, "step_time": 1.1074617233276367} +{"epoch": 0, "iter": 18070, "iter_tflops": 20.659180235617537, "iter_time": 0.9986404724121093, "loss": 0.16234739124774933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.56556373381026, "step_time": 0.7484372062683105} +{"epoch": 0, "iter": 18071, "iter_tflops": 37.728756242847645, "iter_time": 0.5468267593383789, "loss": 0.1765904575586319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3450109453552, "step_time": 0.49899838066101077} +{"epoch": 0, "iter": 18072, "iter_tflops": 40.56230224599211, "iter_time": 0.5086272811889648, "loss": 0.14907306432724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.00593035531366, "step_time": 0.4688253002166748} +{"epoch": 0, "iter": 18073, "iter_tflops": 12.455407287073688, "iter_time": 0.9097631149291991, "loss": 0.0024686073884367943, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 13.40686535718852, "step_time": 0.8451990699768066} +{"epoch": 0, "iter": 18074, "iter_tflops": 10.558627300788432, "iter_time": 1.0731953887939452, "loss": 0.001563863130286336, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 14.57790087918747, "step_time": 0.7773046493530273} +{"epoch": 0, "iter": 18075, "iter_tflops": 29.688220252999514, "iter_time": 0.38168236541748046, "loss": 0.0017210342921316624, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 33.0691313820898, "step_time": 0.34266004753112794} +{"epoch": 0, "iter": 18076, "iter_tflops": 28.89778905410457, "iter_time": 0.39212239074707034, "loss": 0.0034156732726842165, "lr": 3e-05, "seqlen": 4560.0, "step_tflops": 31.71600138814019, "step_time": 0.35727927970886236} +{"epoch": 0, "iter": 18077, "iter_tflops": 30.593561478160147, "iter_time": 0.6743606338500977, "loss": 0.1006862223148346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.40341107085282, "step_time": 0.6366951141357422} +{"epoch": 0, "iter": 18078, "iter_tflops": 15.428836803932821, "iter_time": 1.3371775054931638, "loss": 0.09471841901540756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.07990022942238, "step_time": 1.141106601715088} +{"epoch": 0, "iter": 18079, "iter_tflops": 40.929447895989526, "iter_time": 0.5040647888183595, "loss": 0.1461767852306366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.013949944338805, "step_time": 0.45832666397094723} +{"epoch": 0, "iter": 18080, "iter_tflops": 41.938289637951364, "iter_time": 0.4919393157958984, "loss": 0.10130651295185089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.82465783186848, "step_time": 0.45021816825866706} +{"epoch": 0, "iter": 18081, "iter_tflops": 18.7932224035924, "iter_time": 1.0977943572998048, "loss": 0.002860582433640957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.319282163704006, "step_time": 1.0153455886840819} +{"epoch": 0, "iter": 18082, "iter_tflops": 18.47390360066811, "iter_time": 1.1167695770263673, "loss": 0.004656113684177399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.596253956408404, "step_time": 0.8743376617431641} +{"epoch": 0, "iter": 18083, "iter_tflops": 42.305534946253424, "iter_time": 0.48766889572143557, "loss": 0.010117430239915848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.806768179320116, "step_time": 0.44077158737182615} +{"epoch": 0, "iter": 18084, "iter_tflops": 47.71230604183015, "iter_time": 0.43240612792968747, "loss": 0.012217582203447819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6615899490347, "step_time": 0.3917673873901367} +{"epoch": 0, "iter": 18085, "iter_tflops": 15.680282769891583, "iter_time": 1.3157347869873048, "loss": 0.1654173731803894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.45683669511702, "step_time": 1.253648796081543} +{"epoch": 0, "iter": 18086, "iter_tflops": 17.693111984738934, "iter_time": 1.1660522766113282, "loss": 0.15272612869739532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.89355714906835, "step_time": 0.9423362941741944} +{"epoch": 0, "iter": 18087, "iter_tflops": 42.60910943263439, "iter_time": 0.48419443130493167, "loss": 0.14318935573101044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.46278958205416, "step_time": 0.44403475761413574} +{"epoch": 0, "iter": 18088, "iter_tflops": 44.03347928464997, "iter_time": 0.4685319862365722, "loss": 0.16694992780685425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49595981767317, "step_time": 0.42541880989074704} +{"epoch": 0, "iter": 18089, "iter_tflops": 14.913441106237563, "iter_time": 1.1838383483886719, "loss": 0.2839830219745636, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 15.86920145989029, "step_time": 1.1125388717651368} +{"epoch": 0, "iter": 18090, "iter_tflops": 16.86021691262737, "iter_time": 1.0471456909179686, "loss": 0.3636769950389862, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 20.619598284354097, "step_time": 0.8562292652130127} +{"epoch": 0, "iter": 18091, "iter_tflops": 41.43247207794898, "iter_time": 0.42611754989624023, "loss": 0.2620764374732971, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 44.87530178460083, "step_time": 0.3934258441925049} +{"epoch": 0, "iter": 18092, "iter_tflops": 42.274523497757045, "iter_time": 0.4176298637390137, "loss": 0.26984140276908875, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 45.68104917680027, "step_time": 0.38648638343811037} +{"epoch": 0, "iter": 18093, "iter_tflops": 28.275192241324262, "iter_time": 0.7296535186767578, "loss": 0.667015016078949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.925729986838498, "step_time": 0.6894098663330077} +{"epoch": 0, "iter": 18094, "iter_tflops": 10.1181532206257, "iter_time": 2.0390177001953123, "loss": 0.6220083236694336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.750602540533873, "step_time": 1.618048515319824} +{"epoch": 0, "iter": 18095, "iter_tflops": 14.583733370472187, "iter_time": 1.414664749145508, "loss": 0.6296792030334473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.107055948213183, "step_time": 1.139395248413086} +{"epoch": 0, "iter": 18096, "iter_tflops": 35.71392548277164, "iter_time": 0.5776764450073243, "loss": 0.6072712540626526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.88522569338472, "step_time": 0.5305638103485107} +{"epoch": 0, "iter": 18097, "iter_tflops": 13.677374658743679, "iter_time": 1.0571267089843748, "loss": 0.2674785256385803, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 14.771510474811253, "step_time": 0.9788246154785156} +{"epoch": 0, "iter": 18098, "iter_tflops": 15.294701036966188, "iter_time": 0.9453416595458984, "loss": 0.21323949098587036, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 18.210897164362905, "step_time": 0.7939596786499024} +{"epoch": 0, "iter": 18099, "iter_tflops": 21.179323079166355, "iter_time": 0.6826808395385743, "loss": 0.26642611622810364, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 22.83193427437654, "step_time": 0.6332673301696777} +{"epoch": 0, "iter": 18100, "iter_tflops": 22.5174948187749, "iter_time": 0.6421104202270508, "loss": 0.272938996553421, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 24.044202010318525, "step_time": 0.6013390693664551} +{"epoch": 0, "iter": 18101, "iter_tflops": 32.111729347086026, "iter_time": 0.6424784317016602, "loss": 0.19563111662864685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.35180935979004, "step_time": 0.58359370803833} +{"epoch": 0, "iter": 18102, "iter_tflops": 41.315470460890964, "iter_time": 0.4993551635742187, "loss": 0.22064565122127533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.50098213737526, "step_time": 0.45342083930969235} +{"epoch": 0, "iter": 18103, "iter_tflops": 43.323326540241254, "iter_time": 0.4762121276855469, "loss": 0.2438143938779831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.446747602253474, "step_time": 0.43482629585266114} +{"epoch": 0, "iter": 18104, "iter_tflops": 40.284709908438046, "iter_time": 0.5121321105957031, "loss": 0.258098840713501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.189854926006575, "step_time": 0.46687398147583004} +{"epoch": 0, "iter": 18105, "iter_tflops": 20.39192371383789, "iter_time": 1.0117286529541016, "loss": 0.1339799463748932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.646440415041937, "step_time": 0.9530940475463868} +{"epoch": 0, "iter": 18106, "iter_tflops": 15.523773818762928, "iter_time": 1.3289998779296874, "loss": 0.1026369109749794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.49026588663165, "step_time": 0.9600203933715821} +{"epoch": 0, "iter": 18107, "iter_tflops": 47.456330077622276, "iter_time": 0.43473849487304683, "loss": 0.09878025203943253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70302204902378, "step_time": 0.39903070831298826} +{"epoch": 0, "iter": 18108, "iter_tflops": 51.00713013051175, "iter_time": 0.40447469711303713, "loss": 0.08663934469223022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.263376505668795, "step_time": 0.3733230724334717} +{"epoch": 0, "iter": 18109, "iter_tflops": 41.54444372118808, "iter_time": 0.496602954864502, "loss": 0.11384521424770355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.09271572280462, "step_time": 0.4575260810852051} +{"epoch": 0, "iter": 18110, "iter_tflops": 34.95194091009714, "iter_time": 0.5902703247070312, "loss": 0.14276133477687836, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.97650558465266, "step_time": 0.5293212718963622} +{"epoch": 0, "iter": 18111, "iter_tflops": 48.09516679162427, "iter_time": 0.4289639663696289, "loss": 0.14230011403560638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.4102016142082, "step_time": 0.393646520614624} +{"epoch": 0, "iter": 18112, "iter_tflops": 51.859886005304205, "iter_time": 0.3978237342834473, "loss": 0.16643421351909637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.458196193863465, "step_time": 0.36542247009277345} +{"epoch": 0, "iter": 18113, "iter_tflops": 20.10896133435471, "iter_time": 1.0259651489257813, "loss": 0.4472121000289917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.981506698492925, "step_time": 0.9832989501953124} +{"epoch": 0, "iter": 18114, "iter_tflops": 15.712227091677274, "iter_time": 1.313059783935547, "loss": 0.360171914100647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.805784002309633, "step_time": 1.0970610694885254} +{"epoch": 0, "iter": 18115, "iter_tflops": 40.76888574230787, "iter_time": 0.5060499725341797, "loss": 0.383993923664093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52939772372285, "step_time": 0.4633140029907227} +{"epoch": 0, "iter": 18116, "iter_tflops": 38.83552659889463, "iter_time": 0.531242790222168, "loss": 0.3755223751068115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.486103789096106, "step_time": 0.485596269607544} +{"epoch": 0, "iter": 18117, "iter_tflops": 18.733734344006994, "iter_time": 1.1012803497314452, "loss": 0.00419971626251936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.83431797213449, "step_time": 1.0401715621948242} +{"epoch": 0, "iter": 18118, "iter_tflops": 15.81980076903352, "iter_time": 1.3041310577392577, "loss": 0.0019417984876781702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.17019545679344, "step_time": 1.0762067375183106} +{"epoch": 0, "iter": 18119, "iter_tflops": 41.559935778322895, "iter_time": 0.49641783905029296, "loss": 0.005081813782453537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.805424690679104, "step_time": 0.4504072093963623} +{"epoch": 0, "iter": 18120, "iter_tflops": 49.845258146449076, "iter_time": 0.41390283203125, "loss": 0.004092901945114136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.16414554343557, "step_time": 0.37399461746215823} +{"epoch": 0, "iter": 18121, "iter_tflops": 20.330266614608966, "iter_time": 1.0147969970703126, "loss": 0.6419242024421692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.660530932872135, "step_time": 0.9524740447998047} +{"epoch": 0, "iter": 18122, "iter_tflops": 18.511440374724014, "iter_time": 1.1145050354003907, "loss": 0.4588255286216736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.861180888841847, "step_time": 0.9437318878173828} +{"epoch": 0, "iter": 18123, "iter_tflops": 43.40134567993199, "iter_time": 0.47535607910156247, "loss": 0.4307628571987152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.795330327854565, "step_time": 0.440879322052002} +{"epoch": 0, "iter": 18124, "iter_tflops": 43.977197153925026, "iter_time": 0.46913161468505854, "loss": 0.7880449295043945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.24701023821147, "step_time": 0.43666452980041504} +{"epoch": 0, "iter": 18125, "iter_tflops": 44.8873966974923, "iter_time": 0.45961884689331056, "loss": 0.2093283236026764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04285536903434, "step_time": 0.42067480278015135} +{"epoch": 0, "iter": 18126, "iter_tflops": 45.746324290662656, "iter_time": 0.4509890975952149, "loss": 0.2670254707336426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.39710389018108, "step_time": 0.4093706169128418} +{"epoch": 0, "iter": 18127, "iter_tflops": 43.84318492840167, "iter_time": 0.47056557464599613, "loss": 0.20094603300094604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33176200862767, "step_time": 0.43588264274597166} +{"epoch": 0, "iter": 18128, "iter_tflops": 48.54398472393208, "iter_time": 0.4249979400634766, "loss": 0.2793157398700714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81585727589676, "step_time": 0.39062309265136724} +{"epoch": 0, "iter": 18129, "iter_tflops": 28.19576972287151, "iter_time": 0.7317088241577148, "loss": 0.6615868210792542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.86076302230074, "step_time": 0.6909097900390626} +{"epoch": 0, "iter": 18130, "iter_tflops": 18.213984928795753, "iter_time": 1.1327061920166015, "loss": 0.5300089716911316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.720999844910367, "step_time": 0.9498224601745604} +{"epoch": 0, "iter": 18131, "iter_tflops": 38.74236328189071, "iter_time": 0.532520263671875, "loss": 0.5282726287841797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.72346144575625, "step_time": 0.4828984546661378} +{"epoch": 0, "iter": 18132, "iter_tflops": 41.13963533848856, "iter_time": 0.501489459991455, "loss": 0.5271715521812439, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83862515683688, "step_time": 0.4601187801361084} +{"epoch": 0, "iter": 18133, "iter_tflops": 30.879548889986992, "iter_time": 0.668115119934082, "loss": 0.38091322779655457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.74766252572697, "step_time": 0.611333999633789} +{"epoch": 0, "iter": 18134, "iter_tflops": 32.93487905158175, "iter_time": 0.6264208068847656, "loss": 0.4635832607746124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.938319250309455, "step_time": 0.5740695152282715} +{"epoch": 0, "iter": 18135, "iter_tflops": 33.833548616201426, "iter_time": 0.6097821350097656, "loss": 0.5492134094238281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.60348485282771, "step_time": 0.5636374130249023} +{"epoch": 0, "iter": 18136, "iter_tflops": 38.64904597358094, "iter_time": 0.5338060226440431, "loss": 0.6034391522407532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.16613298639476, "step_time": 0.48928113746643065} +{"epoch": 0, "iter": 18137, "iter_tflops": 26.44241989604139, "iter_time": 0.7802271347045897, "loss": 0.35162079334259033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.175382141217, "step_time": 0.7071404724121094} +{"epoch": 0, "iter": 18138, "iter_tflops": 45.98983442057328, "iter_time": 0.4486011695861817, "loss": 0.42525795102119446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.176005363797, "step_time": 0.41117449188232424} +{"epoch": 0, "iter": 18139, "iter_tflops": 46.528938627184104, "iter_time": 0.4434034843444824, "loss": 0.5057806968688965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.92750322471332, "step_time": 0.4132210140228271} +{"epoch": 0, "iter": 18140, "iter_tflops": 47.78985870100764, "iter_time": 0.4317044258117676, "loss": 0.2903880178928375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.604247380721134, "step_time": 0.39979448509216303} +{"epoch": 0, "iter": 18141, "iter_tflops": 25.33152883537351, "iter_time": 0.8144432830810547, "loss": 0.4463063180446625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.68375769492699, "step_time": 0.7731704711914061} +{"epoch": 0, "iter": 18142, "iter_tflops": 16.440730471328177, "iter_time": 1.254876937866211, "loss": 0.39618077874183655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.33262401704748, "step_time": 0.967114664077759} +{"epoch": 0, "iter": 18143, "iter_tflops": 38.08746468135945, "iter_time": 0.5416767349243164, "loss": 0.3215344250202179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.57004918074043, "step_time": 0.4962970676422119} +{"epoch": 0, "iter": 18144, "iter_tflops": 39.00859372883344, "iter_time": 0.528885856628418, "loss": 0.3533678352832794, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45149243697174, "step_time": 0.48599218368530267} +{"epoch": 0, "iter": 18145, "iter_tflops": 27.24531728752467, "iter_time": 0.7572344741821289, "loss": 0.0720009133219719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.36456123891452, "step_time": 0.7025847702026367} +{"epoch": 0, "iter": 18146, "iter_tflops": 9.374680108028235, "iter_time": 2.2007250671386718, "loss": 0.08540301769971848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.46798389543158, "step_time": 1.970875549316406} +{"epoch": 0, "iter": 18147, "iter_tflops": 13.105167765001356, "iter_time": 1.5742716064453124, "loss": 0.05928792059421539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.641783234153264, "step_time": 1.3189732398986815} +{"epoch": 0, "iter": 18148, "iter_tflops": 50.05287012000028, "iter_time": 0.4121860237121582, "loss": 0.04856734722852707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.813950064656915, "step_time": 0.37638399505615233} +{"epoch": 0, "iter": 18149, "iter_tflops": 19.765776930974376, "iter_time": 0.7438905487060546, "loss": 0.2416314333677292, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.128659360716274, "step_time": 0.6959066543579102} +{"epoch": 0, "iter": 18150, "iter_tflops": 8.33279338450167, "iter_time": 1.7645432891845703, "loss": 0.2968865931034088, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 10.188269640255324, "step_time": 1.4431866416931152} +{"epoch": 0, "iter": 18151, "iter_tflops": 26.37237633592991, "iter_time": 0.5575369644165039, "loss": 0.0913345068693161, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 28.122641752608434, "step_time": 0.5228376045227051} +{"epoch": 0, "iter": 18152, "iter_tflops": 25.869330806776833, "iter_time": 0.5683786239624025, "loss": 0.2693197429180145, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 27.32358927677635, "step_time": 0.5381274948120117} +{"epoch": 0, "iter": 18153, "iter_tflops": 36.04648754339815, "iter_time": 0.5723468475341796, "loss": 0.1529824435710907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.739386445553144, "step_time": 0.5325611839294433} +{"epoch": 0, "iter": 18154, "iter_tflops": 16.001035181144022, "iter_time": 1.2893599243164062, "loss": 0.13968069851398468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.973898917037726, "step_time": 1.0873407516479492} +{"epoch": 0, "iter": 18155, "iter_tflops": 32.79576883441612, "iter_time": 0.6290779037475587, "loss": 0.16507679224014282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.233610204533065, "step_time": 0.5693910541534424} +{"epoch": 0, "iter": 18156, "iter_tflops": 40.73918584880864, "iter_time": 0.5064188957214356, "loss": 0.16422998905181885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.479529227628944, "step_time": 0.4638334503173829} +{"epoch": 0, "iter": 18157, "iter_tflops": 19.06576768406647, "iter_time": 1.0821013793945315, "loss": 0.5580586194992065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.199351330832396, "step_time": 1.0213740615844726} +{"epoch": 0, "iter": 18158, "iter_tflops": 10.266470454044518, "iter_time": 2.009560501098633, "loss": 0.4005665183067322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.626211431244371, "step_time": 1.7745327987670898} +{"epoch": 0, "iter": 18159, "iter_tflops": 9.54465062466565, "iter_time": 2.161534698486328, "loss": 0.5473684668540955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.370257217217656, "step_time": 1.8144790496826169} +{"epoch": 0, "iter": 18160, "iter_tflops": 35.446868512086056, "iter_time": 0.582028663635254, "loss": 0.5819799900054932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.05059111997545, "step_time": 0.5283170604705811} +{"epoch": 0, "iter": 18161, "iter_tflops": 13.549598296636933, "iter_time": 1.1424747924804688, "loss": 0.10288535803556442, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.512422275433655, "step_time": 1.0666775131225585} +{"epoch": 0, "iter": 18162, "iter_tflops": 13.813849704498747, "iter_time": 1.1206198730468748, "loss": 0.23324395716190338, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 16.410452519973223, "step_time": 0.9433057670593262} +{"epoch": 0, "iter": 18163, "iter_tflops": 25.030748959184667, "iter_time": 0.6184423217773437, "loss": 0.22820454835891724, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 26.894149665995222, "step_time": 0.5755926361083985} +{"epoch": 0, "iter": 18164, "iter_tflops": 23.246247642527525, "iter_time": 0.6659171295166015, "loss": 0.2683712840080261, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.892331695501767, "step_time": 0.6218812561035156} +{"epoch": 0, "iter": 18165, "iter_tflops": 32.6385203133186, "iter_time": 0.6321087265014648, "loss": 0.02456231229007244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.56090132613129, "step_time": 0.5642938976287842} +{"epoch": 0, "iter": 18166, "iter_tflops": 40.96531816790905, "iter_time": 0.5036234169006348, "loss": 0.03843839094042778, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.325768261138094, "step_time": 0.45517360877990726} +{"epoch": 0, "iter": 18167, "iter_tflops": 46.80378445172071, "iter_time": 0.4407996864318847, "loss": 0.03565611317753792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.62963737218038, "step_time": 0.3995978775024414} +{"epoch": 0, "iter": 18168, "iter_tflops": 44.38986649261513, "iter_time": 0.4647703437805175, "loss": 0.021834267303347588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.79860343058675, "step_time": 0.4227804088592529} +{"epoch": 0, "iter": 18169, "iter_tflops": 21.67573834912544, "iter_time": 0.9518058013916015, "loss": 0.08514538407325745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.30308165222765, "step_time": 0.8853375625610352} +{"epoch": 0, "iter": 18170, "iter_tflops": 25.877174334384833, "iter_time": 0.7972699508666994, "loss": 0.07868240773677826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.05705318637458, "step_time": 0.6435742359161377} +{"epoch": 0, "iter": 18171, "iter_tflops": 52.40254654767895, "iter_time": 0.3937040252685547, "loss": 0.053379807621240616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.25132619072704, "step_time": 0.36036009788513185} +{"epoch": 0, "iter": 18172, "iter_tflops": 49.15050435448723, "iter_time": 0.4197534446716309, "loss": 0.09052298218011856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.36796617462069, "step_time": 0.3865819702148438} +{"epoch": 0, "iter": 18173, "iter_tflops": 36.043429898397825, "iter_time": 0.5723954010009765, "loss": 0.35802069306373596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.61441845402704, "step_time": 0.5342847137451172} +{"epoch": 0, "iter": 18174, "iter_tflops": 19.543925101099873, "iter_time": 1.055626922607422, "loss": 0.44564375281333923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.965334940382323, "step_time": 0.9392569503784179} +{"epoch": 0, "iter": 18175, "iter_tflops": 40.4122082594403, "iter_time": 0.5105163612365722, "loss": 0.44278237223625183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.03193827425807, "step_time": 0.4685483837127685} +{"epoch": 0, "iter": 18176, "iter_tflops": 42.56535749924052, "iter_time": 0.48469212341308593, "loss": 0.37440669536590576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39357827993851, "step_time": 0.44469718170166017} +{"epoch": 0, "iter": 18177, "iter_tflops": 16.119828923818936, "iter_time": 1.2798580932617187, "loss": 0.49773237109184265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.524999532974476, "step_time": 1.1772378921508788} +{"epoch": 0, "iter": 18178, "iter_tflops": 31.862681416764463, "iter_time": 0.6475002288818359, "loss": 0.4357573688030243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.691347689091906, "step_time": 0.44186116981506346} +{"epoch": 0, "iter": 18179, "iter_tflops": 47.32519589974685, "iter_time": 0.43594311904907224, "loss": 0.5271257162094116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.863946289801405, "step_time": 0.40561330795288086} +{"epoch": 0, "iter": 18180, "iter_tflops": 46.057125513572736, "iter_time": 0.44794574737548826, "loss": 0.42768511176109314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.42149564267153, "step_time": 0.4174518241882324} +{"epoch": 0, "iter": 18181, "iter_tflops": 44.03591981186945, "iter_time": 0.4685060195922852, "loss": 0.015654070302844048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.29018082176753, "step_time": 0.4272316474914551} +{"epoch": 0, "iter": 18182, "iter_tflops": 27.10411743975534, "iter_time": 0.7611793136596681, "loss": 0.023134317249059677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.4574030941394, "step_time": 0.616637622833252} +{"epoch": 0, "iter": 18183, "iter_tflops": 57.59940367159518, "iter_time": 0.3581824150085449, "loss": 0.03524221107363701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 63.12820546006262, "step_time": 0.3268126087188721} +{"epoch": 0, "iter": 18184, "iter_tflops": 59.10542368220362, "iter_time": 0.34905584335327144, "loss": 0.01821179874241352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.81931874750767, "step_time": 0.31828618240356443} +{"epoch": 0, "iter": 18185, "iter_tflops": 25.88248632406796, "iter_time": 0.7971063232421874, "loss": 0.5557913184165955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.158562889766614, "step_time": 0.7596533584594727} +{"epoch": 0, "iter": 18186, "iter_tflops": 16.40684232300456, "iter_time": 1.2574688720703124, "loss": 0.38895055651664734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.773941309893466, "step_time": 1.0433475646972656} +{"epoch": 0, "iter": 18187, "iter_tflops": 40.888354866589495, "iter_time": 0.5045713768005372, "loss": 0.4125250577926636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.95039199753228, "step_time": 0.4694177360534668} +{"epoch": 0, "iter": 18188, "iter_tflops": 48.104409056699616, "iter_time": 0.4288815498352051, "loss": 0.68015056848526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00347473137321, "step_time": 0.39672528839111326} +{"epoch": 0, "iter": 18189, "iter_tflops": 38.24261359916342, "iter_time": 0.5394791717529297, "loss": 0.08594618737697601, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14198000578458, "step_time": 0.5014608802795409} +{"epoch": 0, "iter": 18190, "iter_tflops": 17.693274519918784, "iter_time": 1.1660415649414064, "loss": 0.09140077233314514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.10303272457014, "step_time": 0.9776364269256591} +{"epoch": 0, "iter": 18191, "iter_tflops": 42.068959588517465, "iter_time": 0.4904113082885742, "loss": 0.09569067507982254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.174886992235905, "step_time": 0.44680333518981935} +{"epoch": 0, "iter": 18192, "iter_tflops": 41.22048662595797, "iter_time": 0.5005058212280273, "loss": 0.0828438475728035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.4062521648777, "step_time": 0.4543668003082275} +{"epoch": 0, "iter": 18193, "iter_tflops": 17.99376172502988, "iter_time": 1.1465692291259764, "loss": 0.33251315355300903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.86228013924174, "step_time": 1.0937751617431641} +{"epoch": 0, "iter": 18194, "iter_tflops": 16.199482721453524, "iter_time": 1.273564956665039, "loss": 0.39398056268692017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.318973174622904, "step_time": 0.8847342185974121} +{"epoch": 0, "iter": 18195, "iter_tflops": 46.02674290547386, "iter_time": 0.44824143981933595, "loss": 0.34361666440963745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.871537859199265, "step_time": 0.41368472671508794} +{"epoch": 0, "iter": 18196, "iter_tflops": 44.651435804714, "iter_time": 0.4620477066040039, "loss": 0.3065755069255829, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.168260466688686, "step_time": 0.4283130283355713} +{"epoch": 0, "iter": 18197, "iter_tflops": 25.589095572063044, "iter_time": 0.8062455139160156, "loss": 0.6097182035446167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.929054942948714, "step_time": 0.7661276473999024} +{"epoch": 0, "iter": 18198, "iter_tflops": 8.195255886459584, "iter_time": 2.5174434814453126, "loss": 0.6120263338088989, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.26501003399811, "step_time": 2.2267750854492188} +{"epoch": 0, "iter": 18199, "iter_tflops": 9.931233944816588, "iter_time": 2.077394775390625, "loss": 0.6265401244163513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.20889307250942, "step_time": 1.5619093437194826} +{"epoch": 0, "iter": 18200, "iter_tflops": 35.20687936113257, "iter_time": 0.5859960861206055, "loss": 0.5007971525192261, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.57632427537305, "step_time": 0.5348123207092286} +{"epoch": 0, "iter": 18201, "iter_tflops": 20.07694508863785, "iter_time": 0.7323611526489258, "loss": 0.35352566838264465, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.734467167939638, "step_time": 0.6765095520019532} +{"epoch": 0, "iter": 18202, "iter_tflops": 8.300197191975009, "iter_time": 1.771472930908203, "loss": 0.2623358964920044, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 9.288963165564574, "step_time": 1.582908058166504} +{"epoch": 0, "iter": 18203, "iter_tflops": 13.83869426436989, "iter_time": 1.0624972534179686, "loss": 0.2077382653951645, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 17.07242318521965, "step_time": 0.8612470817565918} +{"epoch": 0, "iter": 18204, "iter_tflops": 22.296048796743687, "iter_time": 0.659469970703125, "loss": 0.3602995276451111, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.023257011174675, "step_time": 0.612055835723877} +{"epoch": 0, "iter": 18205, "iter_tflops": 14.68741003471058, "iter_time": 1.0846300048828126, "loss": 0.42095163464546204, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 15.707417499867747, "step_time": 1.0141963577270507} +{"epoch": 0, "iter": 18206, "iter_tflops": 17.42975642482569, "iter_time": 0.9139775238037109, "loss": 0.12360642105340958, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 23.76963721071244, "step_time": 0.670199779510498} +{"epoch": 0, "iter": 18207, "iter_tflops": 27.512357885581633, "iter_time": 0.5790272750854493, "loss": 0.21401870250701904, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.197738342236416, "step_time": 0.5456040954589844} +{"epoch": 0, "iter": 18208, "iter_tflops": 28.104254527279824, "iter_time": 0.5668325271606445, "loss": 0.16366098821163177, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 29.96819828887726, "step_time": 0.5315770225524902} +{"epoch": 0, "iter": 18209, "iter_tflops": 22.219000397910364, "iter_time": 0.9285338287353515, "loss": 0.7915728092193604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.31915707126812, "step_time": 0.8847272415161133} +{"epoch": 0, "iter": 18210, "iter_tflops": 14.734400920584449, "iter_time": 1.4001990051269533, "loss": 0.5654462575912476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.924163662991948, "step_time": 0.9859936981201172} +{"epoch": 0, "iter": 18211, "iter_tflops": 33.332880397976105, "iter_time": 0.6189412155151368, "loss": 0.517539381980896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.36723958780684, "step_time": 0.5672988586425782} +{"epoch": 0, "iter": 18212, "iter_tflops": 35.03231682219976, "iter_time": 0.5889160461425781, "loss": 0.44570210576057434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.069075762480345, "step_time": 0.5419383869171143} +{"epoch": 0, "iter": 18213, "iter_tflops": 7.249295136977588, "iter_time": 0.9865877990722656, "loss": 0.016291340813040733, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 7.917180187590727, "step_time": 0.9033602828979491} +{"epoch": 0, "iter": 18214, "iter_tflops": 8.84953481495777, "iter_time": 0.8081855468749999, "loss": 0.02706378698348999, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 9.944866960859693, "step_time": 0.7191716251373291} +{"epoch": 0, "iter": 18215, "iter_tflops": 18.32633010555811, "iter_time": 0.39026177597045897, "loss": 0.02736896462738514, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 20.21262069937581, "step_time": 0.3538416042327881} +{"epoch": 0, "iter": 18216, "iter_tflops": 17.973430824040236, "iter_time": 0.39792436981201174, "loss": 0.05234072357416153, "lr": 3e-05, "seqlen": 2896.0, "step_tflops": 19.669419685989975, "step_time": 0.36361347961425783} +{"epoch": 0, "iter": 18217, "iter_tflops": 41.790264491607886, "iter_time": 0.493681812286377, "loss": 0.5332635045051575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37510870235619, "step_time": 0.45467865753173836} +{"epoch": 0, "iter": 18218, "iter_tflops": 42.443889180040195, "iter_time": 0.4860792427062988, "loss": 0.5938272476196289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79710378365518, "step_time": 0.4504890441894531} +{"epoch": 0, "iter": 18219, "iter_tflops": 42.96126759125669, "iter_time": 0.4802254371643067, "loss": 0.5194540023803711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95926984118257, "step_time": 0.44889950561523445} +{"epoch": 0, "iter": 18220, "iter_tflops": 43.952287886871034, "iter_time": 0.46939748764038086, "loss": 0.5821384787559509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.885827046177, "step_time": 0.44002835845947263} +{"epoch": 0, "iter": 18221, "iter_tflops": 26.3900225079004, "iter_time": 0.7817762756347655, "loss": 0.5103060603141785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.842082965584357, "step_time": 0.7410039520263673} +{"epoch": 0, "iter": 18222, "iter_tflops": 10.90417416228838, "iter_time": 1.8920363159179687, "loss": 0.5202343463897705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.151656604673569, "step_time": 1.4578571319580078} +{"epoch": 0, "iter": 18223, "iter_tflops": 14.675411848056337, "iter_time": 1.4058272247314452, "loss": 0.5371562242507935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.490712563230428, "step_time": 1.251073501586914} +{"epoch": 0, "iter": 18224, "iter_tflops": 18.229468907512143, "iter_time": 1.131744079589844, "loss": 0.6418606638908386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.315076362887076, "step_time": 0.9245360927581787} +{"epoch": 0, "iter": 18225, "iter_tflops": 13.953934207226112, "iter_time": 1.0829953918457031, "loss": 0.24543368816375732, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 14.945445976048891, "step_time": 1.0111472396850587} +{"epoch": 0, "iter": 18226, "iter_tflops": 13.000162370781254, "iter_time": 1.1624505920410158, "loss": 0.3037501871585846, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.65502271454949, "step_time": 0.9653161621093751} +{"epoch": 0, "iter": 18227, "iter_tflops": 25.46423138140226, "iter_time": 0.5934617156982421, "loss": 0.21554724872112274, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.040997087995688, "step_time": 0.5588568496704102} +{"epoch": 0, "iter": 18228, "iter_tflops": 28.68946438874981, "iter_time": 0.5267455062866211, "loss": 0.17400576174259186, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 30.326573951155538, "step_time": 0.4983103752136231} +{"epoch": 0, "iter": 18229, "iter_tflops": 25.48221016932308, "iter_time": 0.8096273193359375, "loss": 0.3088540732860565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.647685331157295, "step_time": 0.7742170944213868} +{"epoch": 0, "iter": 18230, "iter_tflops": 25.002860861776362, "iter_time": 0.825149314880371, "loss": 0.24575859308242798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.84007023688587, "step_time": 0.7410575237274171} +{"epoch": 0, "iter": 18231, "iter_tflops": 46.80141750754372, "iter_time": 0.4408219795227051, "loss": 0.26243558526039124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.33252277372928, "step_time": 0.4098958759307862} +{"epoch": 0, "iter": 18232, "iter_tflops": 53.647476860856, "iter_time": 0.3845678253173828, "loss": 0.32249313592910767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.79532656160173, "step_time": 0.35696819686889647} +{"epoch": 0, "iter": 18233, "iter_tflops": 31.26388671059643, "iter_time": 0.6599017486572265, "loss": 0.6228931546211243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.107168834603065, "step_time": 0.6231609115600586} +{"epoch": 0, "iter": 18234, "iter_tflops": 13.62669809640855, "iter_time": 1.514020004272461, "loss": 0.6059247255325317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.34345438037637, "step_time": 1.1895608024597168} +{"epoch": 0, "iter": 18235, "iter_tflops": 43.661906663500105, "iter_time": 0.4725192985534668, "loss": 0.6762307286262512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34882538465996, "step_time": 0.43572556114196775} +{"epoch": 0, "iter": 18236, "iter_tflops": 42.30360705336515, "iter_time": 0.4876911201477051, "loss": 0.8495038747787476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38562736235565, "step_time": 0.4545732803344727} +{"epoch": 0, "iter": 18237, "iter_tflops": 46.08005050379795, "iter_time": 0.4477228927612304, "loss": 0.0984664112329483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.62445458754403, "step_time": 0.40753216361999517} +{"epoch": 0, "iter": 18238, "iter_tflops": 47.61534986466731, "iter_time": 0.4332866096496582, "loss": 0.1700342446565628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68213200138856, "step_time": 0.39919199752807616} +{"epoch": 0, "iter": 18239, "iter_tflops": 48.76625396098363, "iter_time": 0.42306086349487304, "loss": 0.11165299266576767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.886750400018784, "step_time": 0.3900994739532471} +{"epoch": 0, "iter": 18240, "iter_tflops": 54.20142556339783, "iter_time": 0.3806374702453614, "loss": 0.10341507941484451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.95958612192403, "step_time": 0.3499192390441894} +{"epoch": 0, "iter": 18241, "iter_tflops": 38.34006896811442, "iter_time": 0.5381078872680665, "loss": 0.1240353137254715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.2813014168257, "step_time": 0.49976848602294927} +{"epoch": 0, "iter": 18242, "iter_tflops": 15.797766066186181, "iter_time": 1.3059500579833985, "loss": 0.04265855252742767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.054580976965642, "step_time": 1.0827366676330565} +{"epoch": 0, "iter": 18243, "iter_tflops": 39.275371378645936, "iter_time": 0.5252934036254883, "loss": 0.059974439442157745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.462184562339104, "step_time": 0.4746906700134278} +{"epoch": 0, "iter": 18244, "iter_tflops": 44.15171196521747, "iter_time": 0.46727731704711917, "loss": 0.08058766275644302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.269917330373886, "step_time": 0.42741099739074706} +{"epoch": 0, "iter": 18245, "iter_tflops": 31.98171926899279, "iter_time": 0.6450901947021483, "loss": 0.6579236388206482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.38519369334141, "step_time": 0.583043113708496} +{"epoch": 0, "iter": 18246, "iter_tflops": 37.93307573687542, "iter_time": 0.5438813781738282, "loss": 0.6463882327079773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.64464949612424, "step_time": 0.49540802383422855} +{"epoch": 0, "iter": 18247, "iter_tflops": 36.70154862572228, "iter_time": 0.5621314163208009, "loss": 0.5986156463623047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14356909573701, "step_time": 0.5139327163696289} +{"epoch": 0, "iter": 18248, "iter_tflops": 35.90639693960537, "iter_time": 0.5745798873901369, "loss": 0.5111373662948608, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.95139040559886, "step_time": 0.5296625690460206} +{"epoch": 0, "iter": 18249, "iter_tflops": 13.389433450958117, "iter_time": 1.2786484985351563, "loss": 0.044256336987018585, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 13.993538617694616, "step_time": 1.2234488677978517} +{"epoch": 0, "iter": 18250, "iter_tflops": 14.427875180621578, "iter_time": 1.186618179321289, "loss": 0.11340032517910004, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 17.351405203584488, "step_time": 0.9866854457855224} +{"epoch": 0, "iter": 18251, "iter_tflops": 32.89183198658855, "iter_time": 0.5205054855346679, "loss": 0.07526913285255432, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 36.070805333655585, "step_time": 0.474632568359375} +{"epoch": 0, "iter": 18252, "iter_tflops": 31.999979143625268, "iter_time": 0.535012191772461, "loss": 0.08227954804897308, "lr": 3e-05, "seqlen": 6832.0, "step_tflops": 35.37084459463473, "step_time": 0.48402516746520996} +{"epoch": 0, "iter": 18253, "iter_tflops": 20.68936417178788, "iter_time": 0.8792259292602539, "loss": 0.4785536527633667, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 22.067209052703927, "step_time": 0.8243283233642578} +{"epoch": 0, "iter": 18254, "iter_tflops": 15.484972827460535, "iter_time": 1.1747276306152343, "loss": 0.658833384513855, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 20.159219000652914, "step_time": 0.9023477268218993} +{"epoch": 0, "iter": 18255, "iter_tflops": 34.12634981591303, "iter_time": 0.5330375366210938, "loss": 0.7685186266899109, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 37.526772856590064, "step_time": 0.4847372703552245} +{"epoch": 0, "iter": 18256, "iter_tflops": 39.64405799516755, "iter_time": 0.4588487243652344, "loss": 0.547741711139679, "lr": 3e-05, "seqlen": 7248.0, "step_tflops": 43.24448919535572, "step_time": 0.42064609336853026} +{"epoch": 0, "iter": 18257, "iter_tflops": 20.300217862918146, "iter_time": 1.0162991180419922, "loss": 0.4569285213947296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.86734438540559, "step_time": 0.9434658889770509} +{"epoch": 0, "iter": 18258, "iter_tflops": 15.221519888780424, "iter_time": 1.3553898468017578, "loss": 0.6103767156600952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.573652478751267, "step_time": 1.0540236949920654} +{"epoch": 0, "iter": 18259, "iter_tflops": 40.596039011819116, "iter_time": 0.5082045936584473, "loss": 0.6207257509231567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.566698583280015, "step_time": 0.4735519142150879} +{"epoch": 0, "iter": 18260, "iter_tflops": 42.44887853099074, "iter_time": 0.48602210998535156, "loss": 0.5293694734573364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.641082995824476, "step_time": 0.4520290088653564} +{"epoch": 0, "iter": 18261, "iter_tflops": 24.953063850290505, "iter_time": 0.8267960052490235, "loss": 0.03998640552163124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.27233323132606, "step_time": 0.7852783126831056} +{"epoch": 0, "iter": 18262, "iter_tflops": 21.654171327291458, "iter_time": 0.952753776550293, "loss": 0.02306349016726017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.290946308860992, "step_time": 0.7559684181213379} +{"epoch": 0, "iter": 18263, "iter_tflops": 55.09987838009782, "iter_time": 0.374430835723877, "loss": 0.035841722041368484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.182976973368966, "step_time": 0.342806131362915} +{"epoch": 0, "iter": 18264, "iter_tflops": 57.46214010773616, "iter_time": 0.359038028717041, "loss": 0.037043843418359756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.88131735794967, "step_time": 0.32809575843811034} +{"epoch": 0, "iter": 18265, "iter_tflops": 32.82349312121856, "iter_time": 0.6285465545654296, "loss": 0.06713470071554184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.9798848820764, "step_time": 0.5897987823486328} +{"epoch": 0, "iter": 18266, "iter_tflops": 14.743885755121239, "iter_time": 1.3992982482910157, "loss": 0.1007254496216774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.363004255028976, "step_time": 1.1882214164733889} +{"epoch": 0, "iter": 18267, "iter_tflops": 32.19458676650588, "iter_time": 0.640824920654297, "loss": 0.07473880052566528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99857508461982, "step_time": 0.5032148914337158} +{"epoch": 0, "iter": 18268, "iter_tflops": 46.336138411110454, "iter_time": 0.4452484436035157, "loss": 0.06999395042657852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62490687501534, "step_time": 0.41574069976806644} +{"epoch": 0, "iter": 18269, "iter_tflops": 26.76664007233817, "iter_time": 0.7274300155639648, "loss": 0.18226799368858337, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 28.433971713740068, "step_time": 0.6847744522094728} +{"epoch": 0, "iter": 18270, "iter_tflops": 21.287286140935265, "iter_time": 0.9146707229614258, "loss": 0.24079908430576324, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 25.56338019302493, "step_time": 0.761669906616211} +{"epoch": 0, "iter": 18271, "iter_tflops": 47.89213616361414, "iter_time": 0.4065564613342285, "loss": 0.3506900668144226, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 51.837236726746056, "step_time": 0.3756152648925781} +{"epoch": 0, "iter": 18272, "iter_tflops": 48.703074334719275, "iter_time": 0.39978702926635745, "loss": 0.11253342032432556, "lr": 3e-05, "seqlen": 7744.0, "step_tflops": 53.08034054265045, "step_time": 0.3668186225891113} +{"epoch": 0, "iter": 18273, "iter_tflops": 24.27266905583077, "iter_time": 0.849972183227539, "loss": 0.23761819303035736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.456974726073945, "step_time": 0.8104299011230468} +{"epoch": 0, "iter": 18274, "iter_tflops": 13.651102498909768, "iter_time": 1.5113133544921877, "loss": 0.17456084489822388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.442405430768964, "step_time": 1.1828124046325683} +{"epoch": 0, "iter": 18275, "iter_tflops": 47.53014889117071, "iter_time": 0.43406330490112305, "loss": 0.1812514215707779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.81565490663579, "step_time": 0.39816332626342776} +{"epoch": 0, "iter": 18276, "iter_tflops": 47.50121453094216, "iter_time": 0.43432770538330073, "loss": 0.2434600293636322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.45416636244265, "step_time": 0.4009606018066406} +{"epoch": 0, "iter": 18277, "iter_tflops": 45.29242953264264, "iter_time": 0.45550865173339844, "loss": 0.6065620183944702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.22710339251995, "step_time": 0.4191002941131592} +{"epoch": 0, "iter": 18278, "iter_tflops": 45.40530010221295, "iter_time": 0.4543763275146484, "loss": 0.6787708401679993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.4814216022028, "step_time": 0.41694625663757323} +{"epoch": 0, "iter": 18279, "iter_tflops": 44.77371244157315, "iter_time": 0.46078585815429696, "loss": 0.6510704159736633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.3181515060673, "step_time": 0.4269843292236328} +{"epoch": 0, "iter": 18280, "iter_tflops": 48.50166127833165, "iter_time": 0.42536880111694336, "loss": 0.6297863721847534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.43189291145966, "step_time": 0.3934836673736572} +{"epoch": 0, "iter": 18281, "iter_tflops": 37.80982506378071, "iter_time": 0.5456542968749999, "loss": 0.2959888279438019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.73658033375907, "step_time": 0.506451286315918} +{"epoch": 0, "iter": 18282, "iter_tflops": 44.202408680045856, "iter_time": 0.4667413864135742, "loss": 0.3309536874294281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.81590646550348, "step_time": 0.4226305522918701} +{"epoch": 0, "iter": 18283, "iter_tflops": 47.38070991765132, "iter_time": 0.43543234252929697, "loss": 0.28182774782180786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49422464341822, "step_time": 0.4006486873626709} +{"epoch": 0, "iter": 18284, "iter_tflops": 50.50451240510303, "iter_time": 0.4085000038146972, "loss": 0.2343902587890625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.80149921864509, "step_time": 0.37646950912475585} +{"epoch": 0, "iter": 18285, "iter_tflops": 42.36273912804044, "iter_time": 0.4870103759765625, "loss": 0.19569551944732666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.95213782088982, "step_time": 0.44896917724609375} +{"epoch": 0, "iter": 18286, "iter_tflops": 17.357512610555833, "iter_time": 1.1885973510742187, "loss": 0.16370685398578644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.319375681150625, "step_time": 1.0153409156799316} +{"epoch": 0, "iter": 18287, "iter_tflops": 45.14272031065977, "iter_time": 0.4570192794799805, "loss": 0.15492752194404602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.08784250061806, "step_time": 0.42028927040100095} +{"epoch": 0, "iter": 18288, "iter_tflops": 51.153091927637966, "iter_time": 0.40332055664062505, "loss": 0.15801994502544403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.41012037319672, "step_time": 0.3723343925476075} +{"epoch": 0, "iter": 18289, "iter_tflops": 40.104175951793174, "iter_time": 0.44125751876831054, "loss": 0.002223695395514369, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 43.8667341407704, "step_time": 0.4034097709655762} +{"epoch": 0, "iter": 18290, "iter_tflops": 14.76314009536669, "iter_time": 1.1986792144775391, "loss": 0.003030589781701565, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 17.8627132333387, "step_time": 0.9906820392608642} +{"epoch": 0, "iter": 18291, "iter_tflops": 40.53916702889289, "iter_time": 0.436522762298584, "loss": 0.0007972773746587336, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 45.061558746227114, "step_time": 0.3927132053375244} +{"epoch": 0, "iter": 18292, "iter_tflops": 43.27190561454326, "iter_time": 0.40895516204833987, "loss": 0.0025662670377641916, "lr": 3e-05, "seqlen": 7056.0, "step_tflops": 47.757990526477435, "step_time": 0.37054048919677735} +{"epoch": 0, "iter": 18293, "iter_tflops": 24.06925443701522, "iter_time": 0.8571554870605468, "loss": 0.638692319393158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.08220551151884, "step_time": 0.7910026435852051} +{"epoch": 0, "iter": 18294, "iter_tflops": 36.99156259608618, "iter_time": 0.5577243041992187, "loss": 0.39919936656951904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.43131549753248, "step_time": 0.5102750988006591} +{"epoch": 0, "iter": 18295, "iter_tflops": 38.54785863547299, "iter_time": 0.5352072525024414, "loss": 0.5358908772468567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.70533725176908, "step_time": 0.4946871280670166} +{"epoch": 0, "iter": 18296, "iter_tflops": 37.464460568386684, "iter_time": 0.5506843872070313, "loss": 0.6778759956359863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.68761752720599, "step_time": 0.5070607414245605} +{"epoch": 0, "iter": 18297, "iter_tflops": 36.04460215854503, "iter_time": 0.5723767852783203, "loss": 0.01747008040547371, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.47094079068821, "step_time": 0.5097754859924316} +{"epoch": 0, "iter": 18298, "iter_tflops": 38.37714410394401, "iter_time": 0.537588035583496, "loss": 0.051683459430933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.386422258832454, "step_time": 0.4867382621765137} +{"epoch": 0, "iter": 18299, "iter_tflops": 43.75365157017925, "iter_time": 0.4715284957885742, "loss": 0.019126713275909424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.42717533948546, "step_time": 0.4260230617523193} +{"epoch": 0, "iter": 18300, "iter_tflops": 43.86644386981326, "iter_time": 0.47031607055664065, "loss": 0.018923025578260422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.34695825814729, "step_time": 0.4267299175262451} +{"epoch": 0, "iter": 18301, "iter_tflops": 17.256266404689683, "iter_time": 1.1955711059570313, "loss": 0.18196497857570648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.5746312021218, "step_time": 1.1107134933471678} +{"epoch": 0, "iter": 18302, "iter_tflops": 18.938982575208673, "iter_time": 1.0893453979492187, "loss": 0.30255404114723206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.210701595065707, "step_time": 0.8183466625213623} +{"epoch": 0, "iter": 18303, "iter_tflops": 48.680640579141695, "iter_time": 0.4238048896789551, "loss": 0.3505779206752777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.808852075264305, "step_time": 0.3906749095916748} +{"epoch": 0, "iter": 18304, "iter_tflops": 47.43261919702421, "iter_time": 0.4349558143615722, "loss": 0.36579397320747375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.58089407517601, "step_time": 0.39997549247741704} +{"epoch": 0, "iter": 18305, "iter_tflops": 33.734767168701595, "iter_time": 0.5833083419799805, "loss": 0.0033448573667556047, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 36.16107085832932, "step_time": 0.5441700325012206} +{"epoch": 0, "iter": 18306, "iter_tflops": 9.882720520653189, "iter_time": 1.9911289672851562, "loss": 0.0010578091023489833, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 13.571587749789796, "step_time": 1.4499240226745604} +{"epoch": 0, "iter": 18307, "iter_tflops": 14.22341236682061, "iter_time": 1.3834775085449218, "loss": 0.004102068953216076, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 16.455120832256014, "step_time": 1.195844825744629} +{"epoch": 0, "iter": 18308, "iter_tflops": 32.33863480382205, "iter_time": 0.6084910888671875, "loss": 0.04669981449842453, "lr": 3e-05, "seqlen": 7824.0, "step_tflops": 36.66955767804724, "step_time": 0.536624174118042} +{"epoch": 0, "iter": 18309, "iter_tflops": 11.441274346933666, "iter_time": 1.2887026519775389, "loss": 0.1639842987060547, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 12.128094758508603, "step_time": 1.21572274017334} +{"epoch": 0, "iter": 18310, "iter_tflops": 11.379824326588722, "iter_time": 1.2956615295410154, "loss": 0.2625960409641266, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 14.850008749525745, "step_time": 0.9928883438110352} +{"epoch": 0, "iter": 18311, "iter_tflops": 22.22412232306817, "iter_time": 0.6634412994384765, "loss": 0.22453679144382477, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.99437133428078, "step_time": 0.614494140625} +{"epoch": 0, "iter": 18312, "iter_tflops": 22.19849790426578, "iter_time": 0.6642071304321289, "loss": 0.28499433398246765, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.864593070787084, "step_time": 0.6178358268737792} +{"epoch": 0, "iter": 18313, "iter_tflops": 16.040486853365607, "iter_time": 1.286188735961914, "loss": 0.4854351282119751, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.166846275650624, "step_time": 1.201798698425293} +{"epoch": 0, "iter": 18314, "iter_tflops": 20.073284333453714, "iter_time": 1.0277886352539063, "loss": 0.4486495852470398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.307704195517665, "step_time": 0.8487471027374267} +{"epoch": 0, "iter": 18315, "iter_tflops": 47.314199931664305, "iter_time": 0.43604443359375006, "loss": 0.6270209550857544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.56603894377775, "step_time": 0.4000907173156738} +{"epoch": 0, "iter": 18316, "iter_tflops": 46.7957012149996, "iter_time": 0.44087582778930656, "loss": 0.5140154361724854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.85498142678002, "step_time": 0.40568481063842765} +{"epoch": 0, "iter": 18317, "iter_tflops": 39.87854875673304, "iter_time": 0.5173481521606446, "loss": 0.167104572057724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.44520725805344, "step_time": 0.4748761672973633} +{"epoch": 0, "iter": 18318, "iter_tflops": 46.49318502708469, "iter_time": 0.4437444648742676, "loss": 0.1835472583770752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.33664333144386, "step_time": 0.4098623218536377} +{"epoch": 0, "iter": 18319, "iter_tflops": 52.21462032402398, "iter_time": 0.39512100982666015, "loss": 0.22304263710975647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.58222328664389, "step_time": 0.3646214714050293} +{"epoch": 0, "iter": 18320, "iter_tflops": 49.40205579160848, "iter_time": 0.4176160926818847, "loss": 0.1264728456735611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.23278482460941, "step_time": 0.3875636711120605} +{"epoch": 0, "iter": 18321, "iter_tflops": 25.199186656024445, "iter_time": 0.8187206115722656, "loss": 0.5500109195709229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.600657054348776, "step_time": 0.7755858612060547} +{"epoch": 0, "iter": 18322, "iter_tflops": 13.99530973768178, "iter_time": 1.4741434020996094, "loss": 0.5667003393173218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54625999974855, "step_time": 1.2468735237121582} +{"epoch": 0, "iter": 18323, "iter_tflops": 34.03514948189366, "iter_time": 0.6061702041625976, "loss": 0.583223283290863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.866034226721595, "step_time": 0.559623348236084} +{"epoch": 0, "iter": 18324, "iter_tflops": 36.035555632215996, "iter_time": 0.5725204772949218, "loss": 0.6159195899963379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.358456077030134, "step_time": 0.5241845226287841} +{"epoch": 0, "iter": 18325, "iter_tflops": 17.613219711731634, "iter_time": 0.8626360015869141, "loss": 0.06867923587560654, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 18.75916411265187, "step_time": 0.8099400024414063} +{"epoch": 0, "iter": 18326, "iter_tflops": 15.946273268137555, "iter_time": 0.9528118057250977, "loss": 0.05088207870721817, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 19.180471125770126, "step_time": 0.7921493339538573} +{"epoch": 0, "iter": 18327, "iter_tflops": 29.64434442893553, "iter_time": 0.5125361251831055, "loss": 0.05204344913363457, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 32.63345244522919, "step_time": 0.465589641571045} +{"epoch": 0, "iter": 18328, "iter_tflops": 30.05659939702953, "iter_time": 0.5055062026977539, "loss": 0.05567387863993645, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 32.99380611310783, "step_time": 0.46050453758239746} +{"epoch": 0, "iter": 18329, "iter_tflops": 31.76231792399572, "iter_time": 0.6495462188720704, "loss": 0.4889792799949646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.942307466477, "step_time": 0.5904330596923829} +{"epoch": 0, "iter": 18330, "iter_tflops": 34.79258011691133, "iter_time": 0.5929739456176758, "loss": 0.4311968684196472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.10607878057664, "step_time": 0.5414121360778809} +{"epoch": 0, "iter": 18331, "iter_tflops": 35.07062488453046, "iter_time": 0.5882727661132813, "loss": 0.6890491247177124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.15687335079409, "step_time": 0.54069140625} +{"epoch": 0, "iter": 18332, "iter_tflops": 36.34287480162236, "iter_time": 0.567679183959961, "loss": 0.6084803342819214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31308555740308, "step_time": 0.5247894744873047} +{"epoch": 0, "iter": 18333, "iter_tflops": 20.565919979184585, "iter_time": 1.0031690063476562, "loss": 0.4898012578487396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.135088612934627, "step_time": 0.9320538024902344} +{"epoch": 0, "iter": 18334, "iter_tflops": 14.508548481094525, "iter_time": 1.4219956970214844, "loss": 0.4148326516151428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.400520142307016, "step_time": 1.2579536094665527} +{"epoch": 0, "iter": 18335, "iter_tflops": 36.3009807866398, "iter_time": 0.5683343276977538, "loss": 0.5611441135406494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.35042129235768, "step_time": 0.5242915534973145} +{"epoch": 0, "iter": 18336, "iter_tflops": 37.17700966978037, "iter_time": 0.554942253112793, "loss": 0.49492162466049194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.173252860931655, "step_time": 0.5135529747009278} +{"epoch": 0, "iter": 18337, "iter_tflops": 20.871774826104634, "iter_time": 0.9884685745239259, "loss": 0.5261220335960388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.131609436924247, "step_time": 0.932200325012207} +{"epoch": 0, "iter": 18338, "iter_tflops": 8.695183696782458, "iter_time": 2.372703582763672, "loss": 0.4965834319591522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.774479259360797, "step_time": 1.9148111953735352} +{"epoch": 0, "iter": 18339, "iter_tflops": 14.632982812944666, "iter_time": 1.4099034881591797, "loss": 0.5215253233909607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.66261917640562, "step_time": 1.1680653533935548} +{"epoch": 0, "iter": 18340, "iter_tflops": 36.60238495430304, "iter_time": 0.5636543502807617, "loss": 0.5039688944816589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.687449567577055, "step_time": 0.5198392372131347} +{"epoch": 0, "iter": 18341, "iter_tflops": 13.394307493342675, "iter_time": 1.143505126953125, "loss": 0.1807698756456375, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 14.372363685771596, "step_time": 1.065688262939453} +{"epoch": 0, "iter": 18342, "iter_tflops": 15.599937015720062, "iter_time": 0.9818282775878906, "loss": 0.21302026510238647, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 18.299866712887052, "step_time": 0.8369710845947265} +{"epoch": 0, "iter": 18343, "iter_tflops": 28.609859763226904, "iter_time": 0.5353559722900391, "loss": 0.21915000677108765, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 30.448618163460015, "step_time": 0.5030264167785645} +{"epoch": 0, "iter": 18344, "iter_tflops": 27.469762592561928, "iter_time": 0.5575752334594727, "loss": 0.2594476342201233, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 29.202481568421042, "step_time": 0.5244917030334473} +{"epoch": 0, "iter": 18345, "iter_tflops": 29.541528385722813, "iter_time": 0.6983759689331055, "loss": 0.006682465318590403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.31308333586603, "step_time": 0.6588649635314942} +{"epoch": 0, "iter": 18346, "iter_tflops": 17.262525404468676, "iter_time": 1.1951376190185548, "loss": 0.008806639350950718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.920190101083502, "step_time": 0.9861809768676758} +{"epoch": 0, "iter": 18347, "iter_tflops": 43.37828112212988, "iter_time": 0.4756088294982911, "loss": 0.006385462824255228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.17144387512134, "step_time": 0.4282847232818603} +{"epoch": 0, "iter": 18348, "iter_tflops": 44.91479634223, "iter_time": 0.4593384628295898, "loss": 0.004395963158458471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.739720796127564, "step_time": 0.4147810478210449} +{"epoch": 0, "iter": 18349, "iter_tflops": 22.916168891927967, "iter_time": 0.9002854537963867, "loss": 0.1661541908979416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.849372052707714, "step_time": 0.8302460708618163} +{"epoch": 0, "iter": 18350, "iter_tflops": 18.435631370105593, "iter_time": 1.1190879821777344, "loss": 0.23096022009849548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.18770264718441, "step_time": 0.8529579601287842} +{"epoch": 0, "iter": 18351, "iter_tflops": 47.54663707744871, "iter_time": 0.43391278076171874, "loss": 0.20615661144256592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7467297505003, "step_time": 0.39869366836547854} +{"epoch": 0, "iter": 18352, "iter_tflops": 50.20556829255109, "iter_time": 0.41093237686157225, "loss": 0.16105349361896515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.53136460299249, "step_time": 0.37833444404602057} +{"epoch": 0, "iter": 18353, "iter_tflops": 40.18954453608308, "iter_time": 0.5133447952270508, "loss": 0.5528981685638428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39340188384986, "step_time": 0.4754430999755859} +{"epoch": 0, "iter": 18354, "iter_tflops": 34.84064642563992, "iter_time": 0.5921558761596679, "loss": 0.6532915234565735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.24828128423396, "step_time": 0.5393992309570312} +{"epoch": 0, "iter": 18355, "iter_tflops": 37.3650353814872, "iter_time": 0.5521497116088867, "loss": 0.6568602919578552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.923182172904205, "step_time": 0.5041419658660888} +{"epoch": 0, "iter": 18356, "iter_tflops": 42.373728954776205, "iter_time": 0.4868840675354004, "loss": 0.7061347365379333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.07309956932095, "step_time": 0.44779043960571285} +{"epoch": 0, "iter": 18357, "iter_tflops": 25.467946176556477, "iter_time": 0.8100807723999023, "loss": 0.10624542087316513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.23338782338174, "step_time": 0.757566177368164} +{"epoch": 0, "iter": 18358, "iter_tflops": 10.112885371672187, "iter_time": 2.040079833984375, "loss": 0.13626210391521454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.123347491323335, "step_time": 1.4607792892456057} +{"epoch": 0, "iter": 18359, "iter_tflops": 22.667211893123387, "iter_time": 0.9101734085083009, "loss": 0.17813614010810852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.702548738262557, "step_time": 0.8026866798400878} +{"epoch": 0, "iter": 18360, "iter_tflops": 41.56003829479911, "iter_time": 0.4964166145324707, "loss": 0.21289147436618805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.482331602052234, "step_time": 0.4536067695617676} +{"epoch": 0, "iter": 18361, "iter_tflops": 11.928295193997211, "iter_time": 1.2806161346435545, "loss": 0.2239467203617096, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.794512458499181, "step_time": 1.1939155426025392} +{"epoch": 0, "iter": 18362, "iter_tflops": 11.139076694649495, "iter_time": 1.3713495025634765, "loss": 0.18618522584438324, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.525901811526362, "step_time": 1.2195183639526368} +{"epoch": 0, "iter": 18363, "iter_tflops": 26.887466572820678, "iter_time": 0.5681296615600585, "loss": 0.14398913085460663, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.662498693213223, "step_time": 0.5329461135864259} +{"epoch": 0, "iter": 18364, "iter_tflops": 27.51826343161147, "iter_time": 0.5551065139770508, "loss": 0.07754841446876526, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.3144437726207, "step_time": 0.5210935401916503} +{"epoch": 0, "iter": 18365, "iter_tflops": 28.207505480364304, "iter_time": 0.7314043960571289, "loss": 0.3025014400482178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.865447362361998, "step_time": 0.6908014221191408} +{"epoch": 0, "iter": 18366, "iter_tflops": 14.329637877394735, "iter_time": 1.4397498168945313, "loss": 0.1971769481897354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.28597471479165, "step_time": 1.193516353607178} +{"epoch": 0, "iter": 18367, "iter_tflops": 37.98663873897036, "iter_time": 0.5431144790649414, "loss": 0.21657492220401764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.5331924317724, "step_time": 0.4967374839782715} +{"epoch": 0, "iter": 18368, "iter_tflops": 43.132038139588865, "iter_time": 0.478324104309082, "loss": 0.15004877746105194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90134300959479, "step_time": 0.43988278770446776} +{"epoch": 0, "iter": 18369, "iter_tflops": 24.01821824661766, "iter_time": 0.8589768524169923, "loss": 0.6558731198310852, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.292335642879284, "step_time": 0.7846808967590332} +{"epoch": 0, "iter": 18370, "iter_tflops": 22.191366395557225, "iter_time": 0.9296900939941406, "loss": 0.5608217716217041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.330676396127092, "step_time": 0.7835383033752441} +{"epoch": 0, "iter": 18371, "iter_tflops": 41.613468931753005, "iter_time": 0.4957792282104492, "loss": 0.4229304790496826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7110082609834, "step_time": 0.46143207931518554} +{"epoch": 0, "iter": 18372, "iter_tflops": 42.14251684203595, "iter_time": 0.4895553245544434, "loss": 0.5183159112930298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.997856380023656, "step_time": 0.4584905853271485} +{"epoch": 0, "iter": 18373, "iter_tflops": 33.71258736901985, "iter_time": 0.3974668464660644, "loss": 0.0018281021621078253, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 37.45139839275558, "step_time": 0.3577873287200928} +{"epoch": 0, "iter": 18374, "iter_tflops": 27.542272236954002, "iter_time": 0.4865116310119629, "loss": 0.0055742422118783, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 30.63945448476101, "step_time": 0.43733271408081054} +{"epoch": 0, "iter": 18375, "iter_tflops": 26.094489810067376, "iter_time": 0.5135044174194336, "loss": 0.0046303728595376015, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 29.019195292881687, "step_time": 0.46175077056884767} +{"epoch": 0, "iter": 18376, "iter_tflops": 29.553225084044698, "iter_time": 0.4534068870544433, "loss": 0.00122932274825871, "lr": 3e-05, "seqlen": 5376.0, "step_tflops": 32.669926695025005, "step_time": 0.4101520004272461} +{"epoch": 0, "iter": 18377, "iter_tflops": 20.617304492303397, "iter_time": 1.0006688079833985, "loss": 0.38516882061958313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.22674027731266, "step_time": 0.9282104911804199} +{"epoch": 0, "iter": 18378, "iter_tflops": 14.359680064720097, "iter_time": 1.4367376861572267, "loss": 0.3494932949542999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.32602512039943, "step_time": 1.1257811431884766} +{"epoch": 0, "iter": 18379, "iter_tflops": 45.25546379227427, "iter_time": 0.45588072204589847, "loss": 0.3026316463947296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91249191549361, "step_time": 0.42179600143432616} +{"epoch": 0, "iter": 18380, "iter_tflops": 47.17402379028531, "iter_time": 0.43734012603759764, "loss": 0.3408636748790741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74033337738788, "step_time": 0.40660145759582517} +{"epoch": 0, "iter": 18381, "iter_tflops": 38.06237788185381, "iter_time": 0.5420337524414063, "loss": 0.555778443813324, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.35856896984123, "step_time": 0.4988348007202148} +{"epoch": 0, "iter": 18382, "iter_tflops": 40.85564603696363, "iter_time": 0.5049753341674805, "loss": 0.5408387780189514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.85179809448678, "step_time": 0.4704731483459473} +{"epoch": 0, "iter": 18383, "iter_tflops": 40.957157874566626, "iter_time": 0.5037237586975097, "loss": 0.5379045009613037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8782018528657, "step_time": 0.47019004058837893} +{"epoch": 0, "iter": 18384, "iter_tflops": 43.329271135735844, "iter_time": 0.47614679336547855, "loss": 0.5533889532089233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.696740553920016, "step_time": 0.4418101406097412} +{"epoch": 0, "iter": 18385, "iter_tflops": 22.67549463899687, "iter_time": 0.6700536270141602, "loss": 0.0034960336051881313, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 24.049626530848286, "step_time": 0.6317685394287109} +{"epoch": 0, "iter": 18386, "iter_tflops": 15.11100497890793, "iter_time": 1.0054789505004882, "loss": 0.0027974937111139297, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 18.187924438217713, "step_time": 0.8353783016204833} +{"epoch": 0, "iter": 18387, "iter_tflops": 31.907917591084964, "iter_time": 0.47617640304565434, "loss": 0.001938413130119443, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 35.470121284687565, "step_time": 0.4283548202514648} +{"epoch": 0, "iter": 18388, "iter_tflops": 29.911971414041346, "iter_time": 0.5079503860473633, "loss": 0.0013627633452415466, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 33.09739508984233, "step_time": 0.4590632400512695} +{"epoch": 0, "iter": 18389, "iter_tflops": 24.953674740920757, "iter_time": 0.8267757644653322, "loss": 0.18916672468185425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.527094419884193, "step_time": 0.7777366485595703} +{"epoch": 0, "iter": 18390, "iter_tflops": 10.346349626292174, "iter_time": 1.9940456542968752, "loss": 0.20040999352931976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.895657919873019, "step_time": 1.599848075866699} +{"epoch": 0, "iter": 18391, "iter_tflops": 11.707611527815452, "iter_time": 1.7621949157714842, "loss": 0.15731900930404663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.700258055165492, "step_time": 1.5058908691406248} +{"epoch": 0, "iter": 18392, "iter_tflops": 22.025916177195164, "iter_time": 0.9366735687255859, "loss": 0.17063964903354645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.15016402182355, "step_time": 0.8203164596557617} +{"epoch": 0, "iter": 18393, "iter_tflops": 11.4616633146868, "iter_time": 1.4077616729736326, "loss": 0.21750777959823608, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 12.080913060201489, "step_time": 1.3356018905639648} +{"epoch": 0, "iter": 18394, "iter_tflops": 16.70287167157339, "iter_time": 0.9660189361572266, "loss": 0.1580958515405655, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 19.850464295593177, "step_time": 0.8128419609069825} +{"epoch": 0, "iter": 18395, "iter_tflops": 26.452390378966786, "iter_time": 0.6099747543334961, "loss": 0.20199261605739594, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 28.162562571270943, "step_time": 0.572934024810791} +{"epoch": 0, "iter": 18396, "iter_tflops": 28.07858716901313, "iter_time": 0.5746475143432619, "loss": 0.1753954142332077, "lr": 3e-05, "seqlen": 6448.0, "step_tflops": 29.95620041947079, "step_time": 0.5386294021606446} +{"epoch": 0, "iter": 18397, "iter_tflops": 25.059426123717444, "iter_time": 0.823286750793457, "loss": 0.21346209943294525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.464960130162336, "step_time": 0.779562614440918} +{"epoch": 0, "iter": 18398, "iter_tflops": 13.519507467257693, "iter_time": 1.5260240478515623, "loss": 0.13123328983783722, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.314672220429415, "step_time": 1.1264789924621583} +{"epoch": 0, "iter": 18399, "iter_tflops": 47.87081615538854, "iter_time": 0.43097434234619136, "loss": 0.16105854511260986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.051171259214634, "step_time": 0.3963617534637452} +{"epoch": 0, "iter": 18400, "iter_tflops": 50.977220430749874, "iter_time": 0.40471201324462885, "loss": 0.2110448181629181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.243306690129984, "step_time": 0.3734587001800537} +{"epoch": 0, "iter": 18401, "iter_tflops": 33.26890512776362, "iter_time": 0.6201314239501954, "loss": 0.11674945801496506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.43504880287564, "step_time": 0.5822228050231935} +{"epoch": 0, "iter": 18402, "iter_tflops": 8.807014643100636, "iter_time": 2.342575134277344, "loss": 0.23104546964168549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.034174204882795, "step_time": 1.869745132446289} +{"epoch": 0, "iter": 18403, "iter_tflops": 9.125878437640848, "iter_time": 2.26072412109375, "loss": 0.19275905191898346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.049730408372172, "step_time": 1.8671128387451172} +{"epoch": 0, "iter": 18404, "iter_tflops": 40.56030242413075, "iter_time": 0.508652359008789, "loss": 0.22161687910556793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.49445461677858, "step_time": 0.4636778602600098} +{"epoch": 0, "iter": 18405, "iter_tflops": 13.398752133888076, "iter_time": 1.115671142578125, "loss": 0.1896565705537796, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 14.162209805564668, "step_time": 1.055527442932129} +{"epoch": 0, "iter": 18406, "iter_tflops": 6.277424059880792, "iter_time": 2.381327270507813, "loss": 0.3017352819442749, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 8.069253204305642, "step_time": 1.852538360595703} +{"epoch": 0, "iter": 18407, "iter_tflops": 8.808181388920671, "iter_time": 1.6971268463134768, "loss": 0.22343546152114868, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 10.978006672856496, "step_time": 1.3616862831115721} +{"epoch": 0, "iter": 18408, "iter_tflops": 25.187452253907804, "iter_time": 0.5934939727783203, "loss": 0.2000526487827301, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.93727157892304, "step_time": 0.5549411735534668} +{"epoch": 0, "iter": 18409, "iter_tflops": 16.292489097458105, "iter_time": 0.9125006408691405, "loss": 0.1470528393983841, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 17.05391494041578, "step_time": 0.8717591705322266} +{"epoch": 0, "iter": 18410, "iter_tflops": 9.604841315421437, "iter_time": 1.5478555297851564, "loss": 0.16985857486724854, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 11.339607672491496, "step_time": 1.3110600624084472} +{"epoch": 0, "iter": 18411, "iter_tflops": 23.50837183904984, "iter_time": 0.6324090347290039, "loss": 0.15919899940490723, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.28004989327409, "step_time": 0.5880885047912598} +{"epoch": 0, "iter": 18412, "iter_tflops": 23.112643762325344, "iter_time": 0.6432369613647461, "loss": 0.13125655055046082, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 24.869770770355952, "step_time": 0.5977902603149414} +{"epoch": 0, "iter": 18413, "iter_tflops": 23.64649749781217, "iter_time": 0.8724798889160158, "loss": 0.5129625797271729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.761601492599002, "step_time": 0.8008466987609864} +{"epoch": 0, "iter": 18414, "iter_tflops": 42.59323872899398, "iter_time": 0.4843748474121094, "loss": 0.568220317363739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.90109341372256, "step_time": 0.4494684543609619} +{"epoch": 0, "iter": 18415, "iter_tflops": 45.79219663210192, "iter_time": 0.4505373191833496, "loss": 0.5185472965240479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0867725620462, "step_time": 0.42029843139648443} +{"epoch": 0, "iter": 18416, "iter_tflops": 45.89357227232346, "iter_time": 0.4495421142578125, "loss": 0.4862207770347595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.1967458546482, "step_time": 0.41935890579223634} +{"epoch": 0, "iter": 18417, "iter_tflops": 32.16297871557326, "iter_time": 0.6414546890258789, "loss": 0.36646223068237305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.375268773010056, "step_time": 0.6001725730895996} +{"epoch": 0, "iter": 18418, "iter_tflops": 15.800549313603973, "iter_time": 1.3057200164794922, "loss": 0.3090914189815521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.37609105162362, "step_time": 1.1227139358520508} +{"epoch": 0, "iter": 18419, "iter_tflops": 44.69309505777662, "iter_time": 0.4616170234680176, "loss": 0.3283439874649048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.96092201334699, "step_time": 0.4213787784576416} +{"epoch": 0, "iter": 18420, "iter_tflops": 50.94948398043819, "iter_time": 0.4049323348999024, "loss": 0.29431700706481934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.14261083179466, "step_time": 0.3741406726837158} +{"epoch": 0, "iter": 18421, "iter_tflops": 32.049660775499504, "iter_time": 0.4996104469299317, "loss": 0.12459630519151688, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 34.780879271398305, "step_time": 0.4603778190612793} +{"epoch": 0, "iter": 18422, "iter_tflops": 10.430539356855602, "iter_time": 1.5351406860351562, "loss": 0.16457755863666534, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 13.417634709702819, "step_time": 1.1933806285858155} +{"epoch": 0, "iter": 18423, "iter_tflops": 38.23820847997849, "iter_time": 0.41875249862670905, "loss": 0.19257037341594696, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 41.714388557922405, "step_time": 0.3838566474914551} +{"epoch": 0, "iter": 18424, "iter_tflops": 39.79791007423054, "iter_time": 0.40234136199951176, "loss": 0.12150830030441284, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 43.33668946773307, "step_time": 0.36948704528808596} +{"epoch": 0, "iter": 18425, "iter_tflops": 38.20180485252547, "iter_time": 0.5400554656982421, "loss": 0.08546717464923859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.15266990450379, "step_time": 0.5013306198120118} +{"epoch": 0, "iter": 18426, "iter_tflops": 14.667766586399688, "iter_time": 1.4065599822998045, "loss": 0.13557298481464386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.54256896901399, "step_time": 1.112634044647217} +{"epoch": 0, "iter": 18427, "iter_tflops": 28.28135080456554, "iter_time": 0.7294946289062498, "loss": 0.10910222679376602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.74850769049222, "step_time": 0.649828763961792} +{"epoch": 0, "iter": 18428, "iter_tflops": 42.47793413090445, "iter_time": 0.4856896629333496, "loss": 0.15614211559295654, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.69258664368726, "step_time": 0.4418494453430176} +{"epoch": 0, "iter": 18429, "iter_tflops": 31.662055313989605, "iter_time": 0.6516031036376952, "loss": 0.20985284447669983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.95454664602266, "step_time": 0.5902263221740722} +{"epoch": 0, "iter": 18430, "iter_tflops": 38.21526973290685, "iter_time": 0.5398651809692383, "loss": 0.26441270112991333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.81881609886321, "step_time": 0.4933447532653809} +{"epoch": 0, "iter": 18431, "iter_tflops": 45.14577035269592, "iter_time": 0.4569884033203125, "loss": 0.3807308077812195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6373364848793, "step_time": 0.4156365947723389} +{"epoch": 0, "iter": 18432, "iter_tflops": 42.45883175306218, "iter_time": 0.4859081764221191, "loss": 0.27408599853515625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21650092830659, "step_time": 0.4464010276794434} +{"epoch": 0, "iter": 18433, "iter_tflops": 26.599042915200446, "iter_time": 0.7756329269409179, "loss": 0.5054446458816528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.608742925640165, "step_time": 0.7211464538574218} +{"epoch": 0, "iter": 18434, "iter_tflops": 10.41513970085359, "iter_time": 1.9808753509521484, "loss": 0.3817642331123352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.460326630236551, "step_time": 1.6557425918579103} +{"epoch": 0, "iter": 18435, "iter_tflops": 10.22733590486417, "iter_time": 2.017250015258789, "loss": 0.5304526090621948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.11705549203681, "step_time": 1.5728448753356934} +{"epoch": 0, "iter": 18436, "iter_tflops": 35.8500850754738, "iter_time": 0.5754824142456054, "loss": 0.5445032715797424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28836038505009, "step_time": 0.4457080211639404} +{"epoch": 0, "iter": 18437, "iter_tflops": 17.33666945748905, "iter_time": 0.8292905960083008, "loss": 0.12973366677761078, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 18.292037916745436, "step_time": 0.7859778671264648} +{"epoch": 0, "iter": 18438, "iter_tflops": 7.638097912930498, "iter_time": 1.8822928314208984, "loss": 0.1388336420059204, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 9.72075213159554, "step_time": 1.4790148696899414} +{"epoch": 0, "iter": 18439, "iter_tflops": 22.095192350050638, "iter_time": 0.6506907348632812, "loss": 0.33839765191078186, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.791282899117974, "step_time": 0.6043027191162109} +{"epoch": 0, "iter": 18440, "iter_tflops": 21.710559785649746, "iter_time": 0.662218620300293, "loss": 0.18301454186439514, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.229099228663358, "step_time": 0.6189278717041016} +{"epoch": 0, "iter": 18441, "iter_tflops": 19.253907919214402, "iter_time": 1.071527587890625, "loss": 0.06843714416027069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.333347538984427, "step_time": 1.0146432342529297} +{"epoch": 0, "iter": 18442, "iter_tflops": 21.716223397112284, "iter_time": 0.9500313720703126, "loss": 0.0557839572429657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.06273905480434, "step_time": 0.7623431415557862} +{"epoch": 0, "iter": 18443, "iter_tflops": 54.229780185879825, "iter_time": 0.3804384498596191, "loss": 0.08168467879295349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.216705825337435, "step_time": 0.34839988517761233} +{"epoch": 0, "iter": 18444, "iter_tflops": 54.57353939787295, "iter_time": 0.37804206466674806, "loss": 0.05549398437142372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.2967556219651, "step_time": 0.34792955017089844} +{"epoch": 0, "iter": 18445, "iter_tflops": 2.843396665380097, "iter_time": 0.509761531829834, "loss": 0.4345032870769501, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 3.0802070476827934, "step_time": 0.4705703926086426} +{"epoch": 0, "iter": 18446, "iter_tflops": 0.6887092776938013, "iter_time": 2.1045951995849608, "loss": 0.5107414722442627, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 0.8587941461378864, "step_time": 1.687778434753418} +{"epoch": 0, "iter": 18447, "iter_tflops": 0.8051018588152911, "iter_time": 1.8003364715576173, "loss": 0.6150727272033691, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 0.9273897265861024, "step_time": 1.5629397201538087} +{"epoch": 0, "iter": 18448, "iter_tflops": 1.4440764563610267, "iter_time": 1.0037240295410155, "loss": 0.5604978799819946, "lr": 3e-05, "seqlen": 592.0, "step_tflops": 1.7689378763192347, "step_time": 0.8193923931121827} +{"epoch": 0, "iter": 18449, "iter_tflops": 16.863475129686673, "iter_time": 0.8864484329223633, "loss": 0.20646987855434418, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.455245350120766, "step_time": 0.809992000579834} +{"epoch": 0, "iter": 18450, "iter_tflops": 22.578771697153474, "iter_time": 0.6620644073486328, "loss": 0.22109021246433258, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.43158343277746, "step_time": 0.6118555984497069} +{"epoch": 0, "iter": 18451, "iter_tflops": 21.704911824366203, "iter_time": 0.6887197341918945, "loss": 0.45122405886650085, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.313549937056813, "step_time": 0.6411979789733886} +{"epoch": 0, "iter": 18452, "iter_tflops": 24.324235415256386, "iter_time": 0.6145558471679688, "loss": 0.2194846272468567, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 26.059574734657957, "step_time": 0.5736318130493163} +{"epoch": 0, "iter": 18453, "iter_tflops": 19.67187698734384, "iter_time": 1.048760803222656, "loss": 0.64061439037323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.799074056807502, "step_time": 0.9919236526489258} +{"epoch": 0, "iter": 18454, "iter_tflops": 10.23222119511413, "iter_time": 2.016286895751953, "loss": 0.7232949733734131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.299135422802584, "step_time": 1.5513108825683595} +{"epoch": 0, "iter": 18455, "iter_tflops": 14.058416532517848, "iter_time": 1.467526123046875, "loss": 0.656714677810669, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.525879943897415, "step_time": 1.248411193847656} +{"epoch": 0, "iter": 18456, "iter_tflops": 23.55931863097058, "iter_time": 0.8757084121704102, "loss": 0.5768295526504517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.098113841636387, "step_time": 0.7905204811096191} +{"epoch": 0, "iter": 18457, "iter_tflops": 13.035342814171628, "iter_time": 1.244100555419922, "loss": 0.27474725246429443, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 13.866570450396532, "step_time": 1.1695233001708982} +{"epoch": 0, "iter": 18458, "iter_tflops": 12.352457442331096, "iter_time": 1.3128786163330077, "loss": 0.16342204809188843, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 14.378752065154279, "step_time": 1.127864028930664} +{"epoch": 0, "iter": 18459, "iter_tflops": 28.81075333739423, "iter_time": 0.5628897323608398, "loss": 0.19414976239204407, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.686262757765938, "step_time": 0.5284865531921387} +{"epoch": 0, "iter": 18460, "iter_tflops": 28.243357596438855, "iter_time": 0.5741979217529297, "loss": 0.23696976900100708, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.100740522122226, "step_time": 0.5387667198181152} +{"epoch": 0, "iter": 18461, "iter_tflops": 27.90125179669901, "iter_time": 0.7394325408935547, "loss": 0.5821380019187927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.55589117916402, "step_time": 0.698036590576172} +{"epoch": 0, "iter": 18462, "iter_tflops": 16.777695872085193, "iter_time": 1.229673828125, "loss": 0.7056524157524109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.202169818887853, "step_time": 1.0212315654754638} +{"epoch": 0, "iter": 18463, "iter_tflops": 43.0101896844284, "iter_time": 0.4796792030334473, "loss": 0.7494050860404968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02206721481783, "step_time": 0.44828697967529296} +{"epoch": 0, "iter": 18464, "iter_tflops": 40.997943401576, "iter_time": 0.5032226448059082, "loss": 0.5540975332260132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.97945012554787, "step_time": 0.46910758209228515} +{"epoch": 0, "iter": 18465, "iter_tflops": 20.852333922153022, "iter_time": 0.9893901367187501, "loss": 0.2174312323331833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.78593189910766, "step_time": 0.9469915542602539} +{"epoch": 0, "iter": 18466, "iter_tflops": 13.775355819441131, "iter_time": 1.4976813507080078, "loss": 0.23408885300159454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.298020040736564, "step_time": 1.1926852588653563} +{"epoch": 0, "iter": 18467, "iter_tflops": 45.22240968288753, "iter_time": 0.4562139358520507, "loss": 0.20132550597190857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.78106560334746, "step_time": 0.4229324073791504} +{"epoch": 0, "iter": 18468, "iter_tflops": 43.8949792205684, "iter_time": 0.47001032638549806, "loss": 0.16223138570785522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20351165420777, "step_time": 0.4370669212341309} +{"epoch": 0, "iter": 18469, "iter_tflops": 31.12312088854605, "iter_time": 0.6628863983154297, "loss": 0.08327480405569077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.38607264248012, "step_time": 0.6179550895690917} +{"epoch": 0, "iter": 18470, "iter_tflops": 21.67679356192118, "iter_time": 0.9517594680786132, "loss": 0.044200439006090164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.7881771588557, "step_time": 0.7701566772460937} +{"epoch": 0, "iter": 18471, "iter_tflops": 46.78845945175873, "iter_time": 0.44094406509399414, "loss": 0.09236057102680206, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.94904768811199, "step_time": 0.40493580245971683} +{"epoch": 0, "iter": 18472, "iter_tflops": 49.45419667225333, "iter_time": 0.4171757888793945, "loss": 0.07642273604869843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.171314562874294, "step_time": 0.38084904670715336} +{"epoch": 0, "iter": 18473, "iter_tflops": 26.185828003714136, "iter_time": 0.7878724899291992, "loss": 0.18508626520633698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.59694617461718, "step_time": 0.7475861053466797} +{"epoch": 0, "iter": 18474, "iter_tflops": 19.350494463146568, "iter_time": 1.0661791381835937, "loss": 0.20144468545913696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.579905082775326, "step_time": 0.8749438743591309} +{"epoch": 0, "iter": 18475, "iter_tflops": 36.65646218095341, "iter_time": 0.5628228225708007, "loss": 0.22156496345996857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12759140497689, "step_time": 0.5141373500823975} +{"epoch": 0, "iter": 18476, "iter_tflops": 40.79008947328016, "iter_time": 0.5057869148254395, "loss": 0.2520524561405182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46182054430802, "step_time": 0.4640181903839111} +{"epoch": 0, "iter": 18477, "iter_tflops": 19.702560730574678, "iter_time": 1.0471275177001953, "loss": 0.5581081509590149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.02798926731576, "step_time": 0.9811253585815428} +{"epoch": 0, "iter": 18478, "iter_tflops": 20.43398395099659, "iter_time": 1.0096461639404295, "loss": 0.5605378746986389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.220714355991426, "step_time": 0.8884779853820801} +{"epoch": 0, "iter": 18479, "iter_tflops": 43.266259343885125, "iter_time": 0.4768402404785157, "loss": 0.40404486656188965, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.45309697518321, "step_time": 0.4441274070739746} +{"epoch": 0, "iter": 18480, "iter_tflops": 48.81008816006235, "iter_time": 0.42268093109130855, "loss": 0.5337307453155518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.97365513224226, "step_time": 0.38945950508117677} +{"epoch": 0, "iter": 18481, "iter_tflops": 39.85339422392251, "iter_time": 0.5176746902465821, "loss": 0.05733431503176689, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.12538101983142, "step_time": 0.4783979415893554} +{"epoch": 0, "iter": 18482, "iter_tflops": 40.43499865975857, "iter_time": 0.5102286186218262, "loss": 0.050877466797828674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.841233826866734, "step_time": 0.4600920124053955} +{"epoch": 0, "iter": 18483, "iter_tflops": 39.116851881154055, "iter_time": 0.5274221343994141, "loss": 0.05696435645222664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99163639263429, "step_time": 0.47988621139526366} +{"epoch": 0, "iter": 18484, "iter_tflops": 41.106152553653025, "iter_time": 0.5018979454040527, "loss": 0.06756189465522766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.24340621801158, "step_time": 0.4560022163391113} +{"epoch": 0, "iter": 18485, "iter_tflops": 24.31104014319653, "iter_time": 0.8486306381225586, "loss": 0.6483799815177917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.453151347652184, "step_time": 0.7799106140136719} +{"epoch": 0, "iter": 18486, "iter_tflops": 33.601685221477915, "iter_time": 0.6139898452758789, "loss": 0.6393426656723022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.724520708249386, "step_time": 0.4828864822387695} +{"epoch": 0, "iter": 18487, "iter_tflops": 37.53502793338317, "iter_time": 0.5496490783691407, "loss": 0.5867167115211487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01834844210309, "step_time": 0.5029723110198976} +{"epoch": 0, "iter": 18488, "iter_tflops": 39.38044977001791, "iter_time": 0.5238917694091797, "loss": 0.6472650766372681, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01984857848796, "step_time": 0.4795715045928955} +{"epoch": 0, "iter": 18489, "iter_tflops": 22.68061171154531, "iter_time": 0.9096356735229492, "loss": 0.27884864807128906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.350630378326578, "step_time": 0.8472509002685547} +{"epoch": 0, "iter": 18490, "iter_tflops": 14.218646908037988, "iter_time": 1.450988525390625, "loss": 0.36821869015693665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.22348982237787, "step_time": 1.1321154022216797} +{"epoch": 0, "iter": 18491, "iter_tflops": 46.181878731703065, "iter_time": 0.4467356910705566, "loss": 0.28362634778022766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.88436947784803, "step_time": 0.41357831573486326} +{"epoch": 0, "iter": 18492, "iter_tflops": 44.64773601854105, "iter_time": 0.462085994720459, "loss": 0.24678082764148712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28057136991573, "step_time": 0.4273166809082032} +{"epoch": 0, "iter": 18493, "iter_tflops": 27.938615361666834, "iter_time": 0.7384436645507813, "loss": 0.07606527954339981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.501264553152332, "step_time": 0.6993291244506836} +{"epoch": 0, "iter": 18494, "iter_tflops": 12.063632249482238, "iter_time": 1.7101891937255862, "loss": 0.09388087689876556, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.215953647237667, "step_time": 1.4512634201049806} +{"epoch": 0, "iter": 18495, "iter_tflops": 41.52192109490068, "iter_time": 0.49687232589721675, "loss": 0.043357253074645996, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72092572850148, "step_time": 0.4512396278381348} +{"epoch": 0, "iter": 18496, "iter_tflops": 44.34091295273892, "iter_time": 0.46528346252441416, "loss": 0.058947812765836716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.27238990501649, "step_time": 0.4273891048431397} +{"epoch": 0, "iter": 18497, "iter_tflops": 11.693445323007214, "iter_time": 1.121572204589844, "loss": 0.02250933088362217, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 12.541309074146739, "step_time": 1.0457475509643555} +{"epoch": 0, "iter": 18498, "iter_tflops": 11.707196589438402, "iter_time": 1.1202548065185547, "loss": 0.0601847767829895, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 13.385979532392858, "step_time": 0.9797596969604492} +{"epoch": 0, "iter": 18499, "iter_tflops": 24.38868842444695, "iter_time": 0.5377510681152344, "loss": 0.009058774448931217, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 27.057289343690524, "step_time": 0.48471386337280276} +{"epoch": 0, "iter": 18500, "iter_tflops": 28.535488449810682, "iter_time": 0.45960465240478515, "loss": 0.041847314685583115, "lr": 3e-05, "seqlen": 5264.0, "step_tflops": 31.452955143418123, "step_time": 0.41697332382202146} +{"epoch": 0, "iter": 18501, "iter_tflops": 17.33356455721742, "iter_time": 1.190239517211914, "loss": 0.17939499020576477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.269792026849583, "step_time": 1.1292462158203125} +{"epoch": 0, "iter": 18502, "iter_tflops": 25.90763781766211, "iter_time": 0.7963324813842773, "loss": 0.19516827166080475, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.816653276413756, "step_time": 0.6484369468688965} +{"epoch": 0, "iter": 18503, "iter_tflops": 41.803533724244474, "iter_time": 0.4935251083374024, "loss": 0.16151316463947296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.96100768884483, "step_time": 0.44888253211975093} +{"epoch": 0, "iter": 18504, "iter_tflops": 45.52868145448656, "iter_time": 0.4531449813842774, "loss": 0.23515529930591583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.697277464593625, "step_time": 0.4151352863311767} +{"epoch": 0, "iter": 18505, "iter_tflops": 21.374994593815902, "iter_time": 0.9651976013183594, "loss": 0.5071269869804382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.01665156219394, "step_time": 0.8963551216125487} +{"epoch": 0, "iter": 18506, "iter_tflops": 18.17434576135344, "iter_time": 1.1351766815185547, "loss": 0.6235908269882202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.447492918027965, "step_time": 0.9190823040008546} +{"epoch": 0, "iter": 18507, "iter_tflops": 38.31878826099684, "iter_time": 0.5384067306518555, "loss": 0.5398832559585571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68278685797576, "step_time": 0.4949547538757324} +{"epoch": 0, "iter": 18508, "iter_tflops": 41.68221278130695, "iter_time": 0.49496157073974606, "loss": 0.8277832269668579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.503244253800965, "step_time": 0.4533982982635498} +{"epoch": 0, "iter": 18509, "iter_tflops": 20.231069611201825, "iter_time": 1.0197727508544923, "loss": 0.5513444542884827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.576335428061345, "step_time": 0.956190803527832} +{"epoch": 0, "iter": 18510, "iter_tflops": 14.722318034506408, "iter_time": 1.4013481750488284, "loss": 0.6458966135978699, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.2348959529952, "step_time": 1.072586696624756} +{"epoch": 0, "iter": 18511, "iter_tflops": 38.991858455193935, "iter_time": 0.5291128540039063, "loss": 0.589763879776001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45719233461372, "step_time": 0.4859269390106201} +{"epoch": 0, "iter": 18512, "iter_tflops": 37.643982582627906, "iter_time": 0.548058204650879, "loss": 0.5772905945777893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.88986809983285, "step_time": 0.5045527038574219} +{"epoch": 0, "iter": 18513, "iter_tflops": 22.58501486311278, "iter_time": 0.9134859390258789, "loss": 0.2988009452819824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.294853179644182, "step_time": 0.8491960563659668} +{"epoch": 0, "iter": 18514, "iter_tflops": 15.097202404463546, "iter_time": 1.3665507659912108, "loss": 0.41252508759498596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.549813290084405, "step_time": 1.1121995239257814} +{"epoch": 0, "iter": 18515, "iter_tflops": 49.5447171549545, "iter_time": 0.41641358947753904, "loss": 0.31299135088920593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.024131338333554, "step_time": 0.3818866310119629} +{"epoch": 0, "iter": 18516, "iter_tflops": 48.858010810096616, "iter_time": 0.42226634216308595, "loss": 0.3544275760650635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.04647692433381, "step_time": 0.3889248580932617} +{"epoch": 0, "iter": 18517, "iter_tflops": 24.967694756687333, "iter_time": 0.8263115081787111, "loss": 0.13460859656333923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.236685770667947, "step_time": 0.7863452606201172} +{"epoch": 0, "iter": 18518, "iter_tflops": 13.446326209165735, "iter_time": 1.5343293914794922, "loss": 0.1355823576450348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.212453843641683, "step_time": 1.1328014163970948} +{"epoch": 0, "iter": 18519, "iter_tflops": 38.10321180322007, "iter_time": 0.5414528732299804, "loss": 0.17046517133712769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.715925687618416, "step_time": 0.49456156539916996} +{"epoch": 0, "iter": 18520, "iter_tflops": 42.972830378609665, "iter_time": 0.4800962219238281, "loss": 0.14058443903923035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.08741039353876, "step_time": 0.4381445770263672} +{"epoch": 0, "iter": 18521, "iter_tflops": 32.49805793510391, "iter_time": 0.6348408126831054, "loss": 0.2862964868545532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.64539806671307, "step_time": 0.5787870140075684} +{"epoch": 0, "iter": 18522, "iter_tflops": 40.38988331573381, "iter_time": 0.510798542022705, "loss": 0.3130486011505127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90204143733823, "step_time": 0.46993471908569334} +{"epoch": 0, "iter": 18523, "iter_tflops": 48.14634048547795, "iter_time": 0.4285080299377442, "loss": 0.23619316518306732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.90646898689323, "step_time": 0.3974667110443115} +{"epoch": 0, "iter": 18524, "iter_tflops": 43.742929261838846, "iter_time": 0.4716440773010254, "loss": 0.27017390727996826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20218467241591, "step_time": 0.4370792083740234} +{"epoch": 0, "iter": 18525, "iter_tflops": 43.69414848033302, "iter_time": 0.47217062759399414, "loss": 0.19903327524662018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.68678989851136, "step_time": 0.4326374988555909} +{"epoch": 0, "iter": 18526, "iter_tflops": 7.091456845713753, "iter_time": 2.909288452148438, "loss": 0.18613480031490326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.698847676718511, "step_time": 2.3717041931152343} +{"epoch": 0, "iter": 18527, "iter_tflops": 11.864391090779325, "iter_time": 1.7389087524414062, "loss": 0.2293456792831421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.822611900385994, "step_time": 1.3918662681579592} +{"epoch": 0, "iter": 18528, "iter_tflops": 47.85441955219661, "iter_time": 0.4311220092773438, "loss": 0.23881390690803528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40709249730235, "step_time": 0.3936698741912842} +{"epoch": 0, "iter": 18529, "iter_tflops": 17.039813004630833, "iter_time": 0.8437379531860351, "loss": 0.18829666078090668, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 17.900360112582536, "step_time": 0.8031758499145507} +{"epoch": 0, "iter": 18530, "iter_tflops": 6.487264727686154, "iter_time": 2.216209381103516, "loss": 0.2722199857234955, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 8.603005312888277, "step_time": 1.671176109313965} +{"epoch": 0, "iter": 18531, "iter_tflops": 8.084107028963484, "iter_time": 1.7784446563720704, "loss": 0.35724350810050964, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 9.277236186133525, "step_time": 1.549721992492676} +{"epoch": 0, "iter": 18532, "iter_tflops": 19.273724107617365, "iter_time": 0.745944938659668, "loss": 0.2076527178287506, "lr": 3e-05, "seqlen": 5760.0, "step_tflops": 23.08515400260629, "step_time": 0.6227871360778808} +{"epoch": 0, "iter": 18533, "iter_tflops": 18.319447076240525, "iter_time": 0.8740626983642579, "loss": 0.33042436838150024, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 19.310854976982913, "step_time": 0.8291888351440431} +{"epoch": 0, "iter": 18534, "iter_tflops": 11.532939803960582, "iter_time": 1.3884010162353515, "loss": 0.18060611188411713, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 13.598083579338976, "step_time": 1.1775442657470703} +{"epoch": 0, "iter": 18535, "iter_tflops": 28.269084798773847, "iter_time": 0.566426025390625, "loss": 0.1854279637336731, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 30.175342021425575, "step_time": 0.5306433753967285} +{"epoch": 0, "iter": 18536, "iter_tflops": 27.735205221090165, "iter_time": 0.5773292541503907, "loss": 0.2996431887149811, "lr": 3e-05, "seqlen": 6400.0, "step_tflops": 29.42566179156094, "step_time": 0.5441626243591309} +{"epoch": 0, "iter": 18537, "iter_tflops": 32.87158223450669, "iter_time": 0.6276270294189453, "loss": 0.07792066782712936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.36489398499766, "step_time": 0.5833777847290039} +{"epoch": 0, "iter": 18538, "iter_tflops": 10.744964209732904, "iter_time": 1.9200709381103516, "loss": 0.034566402435302734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.060216043101105, "step_time": 1.5796900634765627} +{"epoch": 0, "iter": 18539, "iter_tflops": 14.66302951190754, "iter_time": 1.4070143890380857, "loss": 0.09028926491737366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.318374977369643, "step_time": 1.1912834510803223} +{"epoch": 0, "iter": 18540, "iter_tflops": 16.894642374539558, "iter_time": 1.2211618957519532, "loss": 0.03170425072312355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.79514598081858, "step_time": 0.9465912055969238} +{"epoch": 0, "iter": 18541, "iter_tflops": 11.708444129905097, "iter_time": 1.2872058715820314, "loss": 0.1563894897699356, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 12.623262233042523, "step_time": 1.193921012878418} +{"epoch": 0, "iter": 18542, "iter_tflops": 10.127136879708729, "iter_time": 1.4881973266601562, "loss": 0.16488979756832123, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 12.913723737964283, "step_time": 1.1670667839050293} +{"epoch": 0, "iter": 18543, "iter_tflops": 23.73810958973661, "iter_time": 0.6348937759399413, "loss": 0.16885679960250854, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 25.30957455759132, "step_time": 0.5954733848571777} +{"epoch": 0, "iter": 18544, "iter_tflops": 26.965117890099837, "iter_time": 0.5589138565063476, "loss": 0.24007682502269745, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 28.529734540247667, "step_time": 0.5282621192932129} +{"epoch": 0, "iter": 18545, "iter_tflops": 30.09639941599044, "iter_time": 0.6855003890991211, "loss": 0.020251700654625893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.19214823463834, "step_time": 0.640873462677002} +{"epoch": 0, "iter": 18546, "iter_tflops": 13.693841282651123, "iter_time": 1.5065965118408204, "loss": 0.022843774408102036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.224767818007162, "step_time": 1.2715802001953125} +{"epoch": 0, "iter": 18547, "iter_tflops": 17.39725659408749, "iter_time": 1.1858820037841797, "loss": 0.036196328699588776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.67029677052366, "step_time": 0.9981034011840819} +{"epoch": 0, "iter": 18548, "iter_tflops": 23.647685075687484, "iter_time": 0.8724360733032226, "loss": 0.034926220774650574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.76631494275541, "step_time": 0.7707857265472412} +{"epoch": 0, "iter": 18549, "iter_tflops": 24.051236204927438, "iter_time": 0.6045559539794921, "loss": 0.21142710745334625, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 25.936342885956293, "step_time": 0.5606155853271484} +{"epoch": 0, "iter": 18550, "iter_tflops": 21.80791704800307, "iter_time": 0.6667449264526367, "loss": 0.32996827363967896, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.519035978581698, "step_time": 0.618236141204834} +{"epoch": 0, "iter": 18551, "iter_tflops": 21.891322243852986, "iter_time": 0.6642046508789062, "loss": 0.19765867292881012, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 23.577833882769514, "step_time": 0.6166943969726563} +{"epoch": 0, "iter": 18552, "iter_tflops": 22.46230251132759, "iter_time": 0.6473209075927734, "loss": 0.3065401017665863, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.091464268841484, "step_time": 0.6035464630126953} +{"epoch": 0, "iter": 18553, "iter_tflops": 17.76708710800409, "iter_time": 1.161197296142578, "loss": 0.11694828420877457, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.37863223314652, "step_time": 1.064631046295166} +{"epoch": 0, "iter": 18554, "iter_tflops": 48.88861118881457, "iter_time": 0.42200203704833983, "loss": 0.20469623804092407, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.46906683464265, "step_time": 0.38585101127624516} +{"epoch": 0, "iter": 18555, "iter_tflops": 48.90030346810015, "iter_time": 0.4219011344909668, "loss": 0.17177726328372955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.053106590617475, "step_time": 0.38887625694274897} +{"epoch": 0, "iter": 18556, "iter_tflops": 46.70913652771849, "iter_time": 0.4416928901672363, "loss": 0.14537766575813293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.23107657913704, "step_time": 0.41072369766235356} +{"epoch": 0, "iter": 18557, "iter_tflops": 25.639935699891957, "iter_time": 0.8046468505859375, "loss": 0.06886811554431915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.88445023130351, "step_time": 0.7673987503051758} +{"epoch": 0, "iter": 18558, "iter_tflops": 17.944828098114836, "iter_time": 1.1496958007812499, "loss": 0.04792441427707672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.24324593303842, "step_time": 1.0191593570709228} +{"epoch": 0, "iter": 18559, "iter_tflops": 44.003423873444135, "iter_time": 0.46885200500488283, "loss": 0.0599086619913578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54225431893288, "step_time": 0.425013090133667} +{"epoch": 0, "iter": 18560, "iter_tflops": 47.61162547381652, "iter_time": 0.43332050323486326, "loss": 0.05016084387898445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.41375708399521, "step_time": 0.39361981773376464} +{"epoch": 0, "iter": 18561, "iter_tflops": 15.282454417023919, "iter_time": 1.0584871368408202, "loss": 0.012518317438662052, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 16.53822866474027, "step_time": 0.9781145095825194} +{"epoch": 0, "iter": 18562, "iter_tflops": 15.741414515406179, "iter_time": 1.0276256561279298, "loss": 0.027290651574730873, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 19.39621921582217, "step_time": 0.8339914722442627} +{"epoch": 0, "iter": 18563, "iter_tflops": 39.165403118629655, "iter_time": 0.41302476501464847, "loss": 0.040573712438344955, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 44.45121037400608, "step_time": 0.3639109325408935} +{"epoch": 0, "iter": 18564, "iter_tflops": 44.24915874729818, "iter_time": 0.3655726318359375, "loss": 0.0290505550801754, "lr": 3e-05, "seqlen": 6464.0, "step_tflops": 48.34870290237019, "step_time": 0.33457529258728025} +{"epoch": 0, "iter": 18565, "iter_tflops": 29.003079353823953, "iter_time": 0.7113414840698242, "loss": 0.3054085671901703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.65898329940366, "step_time": 0.672921646118164} +{"epoch": 0, "iter": 18566, "iter_tflops": 12.100212195741467, "iter_time": 1.7050191497802734, "loss": 0.24751834571361542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.71098830541577, "step_time": 1.4024274291992187} +{"epoch": 0, "iter": 18567, "iter_tflops": 34.98310461208472, "iter_time": 0.5897444992065429, "loss": 0.22162318229675293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.57064293357901, "step_time": 0.4735090446472169} +{"epoch": 0, "iter": 18568, "iter_tflops": 47.79525489126908, "iter_time": 0.43165568542480476, "loss": 0.2834579348564148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.794696579545956, "step_time": 0.3983244400024414} +{"epoch": 0, "iter": 18569, "iter_tflops": 26.45234528745419, "iter_time": 0.7799343795776368, "loss": 0.005194836761802435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.85003668863871, "step_time": 0.7407923278808594} +{"epoch": 0, "iter": 18570, "iter_tflops": 11.852783465306429, "iter_time": 1.7406116943359375, "loss": 0.0068287318572402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.874952637337364, "step_time": 1.2996003189086913} +{"epoch": 0, "iter": 18571, "iter_tflops": 44.02119972643986, "iter_time": 0.4686626815795898, "loss": 0.0030827501323074102, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.93115497030175, "step_time": 0.42163512229919436} +{"epoch": 0, "iter": 18572, "iter_tflops": 42.94119556408856, "iter_time": 0.48044990921020503, "loss": 0.01031559705734253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.991591254963225, "step_time": 0.42988975715637207} +{"epoch": 0, "iter": 18573, "iter_tflops": 26.091703042257762, "iter_time": 0.790714714050293, "loss": 0.614041805267334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.908132444446757, "step_time": 0.7392502365112305} +{"epoch": 0, "iter": 18574, "iter_tflops": 21.035206283768378, "iter_time": 0.9807887420654297, "loss": 0.6420724987983704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.259312232154727, "step_time": 0.8167717838287353} +{"epoch": 0, "iter": 18575, "iter_tflops": 43.156692460293115, "iter_time": 0.47805084991455077, "loss": 0.5287562012672424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.50924186759449, "step_time": 0.44359126663208004} +{"epoch": 0, "iter": 18576, "iter_tflops": 46.5976531456265, "iter_time": 0.44274962615966795, "loss": 0.5799732804298401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.306290423832564, "step_time": 0.4101096172332764} +{"epoch": 0, "iter": 18577, "iter_tflops": 20.861183103130845, "iter_time": 0.9889704437255861, "loss": 0.05717088654637337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.707024071978804, "step_time": 0.9504339904785155} +{"epoch": 0, "iter": 18578, "iter_tflops": 14.68568695053539, "iter_time": 1.4048436126708985, "loss": 0.08112771809101105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.338221212228408, "step_time": 1.262750286102295} +{"epoch": 0, "iter": 18579, "iter_tflops": 39.531625221392545, "iter_time": 0.5218883209228516, "loss": 0.12059461325407028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.50452722683586, "step_time": 0.47422865676879883} +{"epoch": 0, "iter": 18580, "iter_tflops": 43.504760295181384, "iter_time": 0.47422611618041993, "loss": 0.10875178128480911, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.762838861159565, "step_time": 0.43194864463806154} +{"epoch": 0, "iter": 18581, "iter_tflops": 20.822508492714, "iter_time": 0.9908073043823241, "loss": 0.5117083787918091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.218518437499416, "step_time": 0.928553970336914} +{"epoch": 0, "iter": 18582, "iter_tflops": 16.988460502454718, "iter_time": 1.2144180755615235, "loss": 0.6640320420265198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.418524522068513, "step_time": 1.0104105949401854} +{"epoch": 0, "iter": 18583, "iter_tflops": 45.87107051098858, "iter_time": 0.4497626342773438, "loss": 0.5576544404029846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77378096880932, "step_time": 0.4144972133636474} +{"epoch": 0, "iter": 18584, "iter_tflops": 47.64371919723768, "iter_time": 0.4330286102294922, "loss": 0.5710852146148682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.57871436395908, "step_time": 0.39999239540100096} +{"epoch": 0, "iter": 18585, "iter_tflops": 40.89188416637498, "iter_time": 0.5045278282165527, "loss": 0.24778608977794647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.36834450512285, "step_time": 0.46499579238891603} +{"epoch": 0, "iter": 18586, "iter_tflops": 45.23412843113363, "iter_time": 0.4560957450866699, "loss": 0.37091413140296936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.31965600357837, "step_time": 0.4183138160705567} +{"epoch": 0, "iter": 18587, "iter_tflops": 45.43721127470993, "iter_time": 0.4540572128295899, "loss": 0.21348395943641663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04079172004972, "step_time": 0.4206925048828125} +{"epoch": 0, "iter": 18588, "iter_tflops": 53.28417122954434, "iter_time": 0.3871899108886719, "loss": 0.28533273935317993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.72679127845044, "step_time": 0.35739200210571287} +{"epoch": 0, "iter": 18589, "iter_tflops": 25.8672504468257, "iter_time": 0.7975758209228516, "loss": 0.5306825637817383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.28108727801881, "step_time": 0.7562416152954101} +{"epoch": 0, "iter": 18590, "iter_tflops": 14.045865539597791, "iter_time": 1.468837463378906, "loss": 0.6071378588676453, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.54896303750873, "step_time": 1.2466698646545409} +{"epoch": 0, "iter": 18591, "iter_tflops": 32.284645618745905, "iter_time": 0.6390373229980469, "loss": 0.5225250720977783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.3085553055345, "step_time": 0.5843086280822753} +{"epoch": 0, "iter": 18592, "iter_tflops": 39.34288734068187, "iter_time": 0.5243919525146484, "loss": 0.6678368449211121, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.9475616358477, "step_time": 0.4803786926269532} +{"epoch": 0, "iter": 18593, "iter_tflops": 23.372247154041407, "iter_time": 0.8827175827026367, "loss": 0.7193505764007568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.07151395226307, "step_time": 0.8228898162841798} +{"epoch": 0, "iter": 18594, "iter_tflops": 15.162245222710137, "iter_time": 1.3606885528564454, "loss": 0.4172375202178955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23012285835088, "step_time": 1.1317034816741944} +{"epoch": 0, "iter": 18595, "iter_tflops": 39.0344360750908, "iter_time": 0.5285357131958007, "loss": 0.49520114064216614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.917635866289, "step_time": 0.4921817054748535} +{"epoch": 0, "iter": 18596, "iter_tflops": 45.5797069572395, "iter_time": 0.4526376953125, "loss": 0.6096590757369995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.03716291182659, "step_time": 0.4207236366271973} +{"epoch": 0, "iter": 18597, "iter_tflops": 21.337847521663672, "iter_time": 0.966877914428711, "loss": 0.002260797191411257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.274251179844924, "step_time": 0.9262306213378907} +{"epoch": 0, "iter": 18598, "iter_tflops": 18.935954444290928, "iter_time": 1.0895195999145506, "loss": 0.011602341197431087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.546035373775233, "step_time": 0.8076045150756836} +{"epoch": 0, "iter": 18599, "iter_tflops": 53.654759297407864, "iter_time": 0.38451562881469725, "loss": 0.003393798600882292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.25102831556595, "step_time": 0.3481980667114258} +{"epoch": 0, "iter": 18600, "iter_tflops": 57.44347274361692, "iter_time": 0.3591547050476074, "loss": 0.013731776736676693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.75265514275277, "step_time": 0.32876845550537104} +{"epoch": 0, "iter": 18601, "iter_tflops": 47.93801752158641, "iter_time": 0.4303701858520508, "loss": 0.059840139001607895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.6775207496338, "step_time": 0.39164890861511226} +{"epoch": 0, "iter": 18602, "iter_tflops": 49.268106623474004, "iter_time": 0.41875149917602544, "loss": 0.07928327471017838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.17397449901985, "step_time": 0.3808303470611572} +{"epoch": 0, "iter": 18603, "iter_tflops": 50.93374378853572, "iter_time": 0.4050574722290039, "loss": 0.13435058295726776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.49914567448326, "step_time": 0.3717371368408204} +{"epoch": 0, "iter": 18604, "iter_tflops": 50.241966431001494, "iter_time": 0.4106346740722656, "loss": 0.13103222846984863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.46562869263012, "step_time": 0.3787910652160645} +{"epoch": 0, "iter": 18605, "iter_tflops": 37.88902834104796, "iter_time": 0.5248169097900391, "loss": 0.03244421258568764, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 41.1614739718244, "step_time": 0.48309258270263666} +{"epoch": 0, "iter": 18606, "iter_tflops": 14.589408646822905, "iter_time": 1.3629615325927733, "loss": 0.010281817056238651, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 18.164092390414798, "step_time": 1.0947314262390135} +{"epoch": 0, "iter": 18607, "iter_tflops": 48.70270957338815, "iter_time": 0.40828945541381834, "loss": 0.026040196418762207, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 54.508264932589505, "step_time": 0.3648034439086914} +{"epoch": 0, "iter": 18608, "iter_tflops": 54.74378200895938, "iter_time": 0.3632339973449707, "loss": 0.018012449145317078, "lr": 3e-05, "seqlen": 7904.0, "step_tflops": 59.933075594444816, "step_time": 0.33178345298767087} +{"epoch": 0, "iter": 18609, "iter_tflops": 21.575014194815807, "iter_time": 0.6512626647949218, "loss": 0.21952535212039948, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 22.963905669715547, "step_time": 0.6118733215332031} +{"epoch": 0, "iter": 18610, "iter_tflops": 25.498669835704206, "iter_time": 0.5510484008789063, "loss": 0.24953486025333405, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 27.21131460529703, "step_time": 0.5163661308288575} +{"epoch": 0, "iter": 18611, "iter_tflops": 24.02621232786431, "iter_time": 0.5848196563720703, "loss": 0.22720986604690552, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 25.695911805992996, "step_time": 0.5468185501098632} +{"epoch": 0, "iter": 18612, "iter_tflops": 25.495490798235334, "iter_time": 0.5511171112060547, "loss": 0.12439560145139694, "lr": 3e-05, "seqlen": 5632.0, "step_tflops": 27.11236076433402, "step_time": 0.518250747680664} +{"epoch": 0, "iter": 18613, "iter_tflops": 42.659617566376234, "iter_time": 0.4836211547851562, "loss": 0.5226072072982788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.28791450017426, "step_time": 0.44571231460571287} +{"epoch": 0, "iter": 18614, "iter_tflops": 35.434051201526415, "iter_time": 0.5822391967773437, "loss": 0.6262640357017517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.487514103368255, "step_time": 0.5224713172912597} +{"epoch": 0, "iter": 18615, "iter_tflops": 41.629101599822185, "iter_time": 0.49559305191040043, "loss": 0.7539002895355225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.535556298101845, "step_time": 0.453076566696167} +{"epoch": 0, "iter": 18616, "iter_tflops": 44.05352528989226, "iter_time": 0.4683187866210937, "loss": 0.5876158475875854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.42750837091291, "step_time": 0.43500268554687493} +{"epoch": 0, "iter": 18617, "iter_tflops": 26.90760289866631, "iter_time": 0.7667384414672851, "loss": 0.19871564209461212, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.353848353576858, "step_time": 0.72762939453125} +{"epoch": 0, "iter": 18618, "iter_tflops": 9.901221371996744, "iter_time": 2.0836917724609374, "loss": 0.24594128131866455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.921576065433495, "step_time": 1.7305676193237305} +{"epoch": 0, "iter": 18619, "iter_tflops": 17.386760728847758, "iter_time": 1.186597885131836, "loss": 0.21657666563987732, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.701772582718398, "step_time": 1.047169406890869} +{"epoch": 0, "iter": 18620, "iter_tflops": 39.53523226151132, "iter_time": 0.521840705871582, "loss": 0.2099854052066803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.12365774676245, "step_time": 0.4784170589447021} +{"epoch": 0, "iter": 18621, "iter_tflops": 17.758243830349556, "iter_time": 0.86249853515625, "loss": 0.2160668671131134, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 19.474663529076068, "step_time": 0.786481330871582} +{"epoch": 0, "iter": 18622, "iter_tflops": 22.253554712703014, "iter_time": 0.6882702331542969, "loss": 0.24428744614124298, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 23.89769013187251, "step_time": 0.6409179801940917} +{"epoch": 0, "iter": 18623, "iter_tflops": 23.12720129948901, "iter_time": 0.6622703323364258, "loss": 0.2115085870027542, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 24.945931385763018, "step_time": 0.6139862670898437} +{"epoch": 0, "iter": 18624, "iter_tflops": 24.28415647320296, "iter_time": 0.6307181930541993, "loss": 0.20470333099365234, "lr": 3e-05, "seqlen": 6128.0, "step_tflops": 26.073012943973616, "step_time": 0.5874449310302734} +{"epoch": 0, "iter": 18625, "iter_tflops": 23.126615377639876, "iter_time": 0.8920930786132812, "loss": 0.6849762201309204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.091807764379585, "step_time": 0.822224277496338} +{"epoch": 0, "iter": 18626, "iter_tflops": 40.842586097777456, "iter_time": 0.5051368064880372, "loss": 0.6044392585754395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.274001718309975, "step_time": 0.465986644744873} +{"epoch": 0, "iter": 18627, "iter_tflops": 43.40312274142811, "iter_time": 0.47533661651611325, "loss": 0.5873761177062988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59369647648432, "step_time": 0.44278722381591795} +{"epoch": 0, "iter": 18628, "iter_tflops": 46.528919412856965, "iter_time": 0.4434036674499512, "loss": 0.5858727693557739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26369420574137, "step_time": 0.4104571666717529} +{"epoch": 0, "iter": 18629, "iter_tflops": 20.9427409308875, "iter_time": 0.9851190719604492, "loss": 0.42026180028915405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.87160809634884, "step_time": 0.9432819671630859} +{"epoch": 0, "iter": 18630, "iter_tflops": 12.189753539998696, "iter_time": 1.6924947204589844, "loss": 0.5288299918174744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.16402575122157, "step_time": 1.201996187210083} +{"epoch": 0, "iter": 18631, "iter_tflops": 35.39882829111497, "iter_time": 0.5828185424804688, "loss": 0.5753287672996521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.452342360127496, "step_time": 0.5365367164611816} +{"epoch": 0, "iter": 18632, "iter_tflops": 36.156911742368386, "iter_time": 0.5705988845825196, "loss": 0.5090878009796143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.008708241651654, "step_time": 0.5288843040466309} +{"epoch": 0, "iter": 18633, "iter_tflops": 29.45062183518704, "iter_time": 0.7005316772460938, "loss": 0.04866909980773926, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27392962867306, "step_time": 0.6392495040893554} +{"epoch": 0, "iter": 18634, "iter_tflops": 44.41940341131997, "iter_time": 0.46446129226684574, "loss": 0.04521657153964043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.0552716146946, "step_time": 0.41216624832153326} +{"epoch": 0, "iter": 18635, "iter_tflops": 48.703736053661515, "iter_time": 0.42360391998291014, "loss": 0.047087814658880234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8363673290039, "step_time": 0.39047146034240726} +{"epoch": 0, "iter": 18636, "iter_tflops": 51.94713760236257, "iter_time": 0.39715554046630863, "loss": 0.054467856884002686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.64643100533053, "step_time": 0.36420817947387696} +{"epoch": 0, "iter": 18637, "iter_tflops": 42.30294493913678, "iter_time": 0.4876987533569336, "loss": 0.5818747282028198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.060775620780745, "step_time": 0.44791024971008303} +{"epoch": 0, "iter": 18638, "iter_tflops": 45.200252464058494, "iter_time": 0.45643757247924804, "loss": 0.4814037084579468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.82579501646166, "step_time": 0.422544958114624} +{"epoch": 0, "iter": 18639, "iter_tflops": 48.62308700439045, "iter_time": 0.42430653381347655, "loss": 0.5547237992286682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.53233212328132, "step_time": 0.3927313461303711} +{"epoch": 0, "iter": 18640, "iter_tflops": 48.87421868191773, "iter_time": 0.42212630844116206, "loss": 0.5560317039489746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.9134520675226, "step_time": 0.3899026184082031} +{"epoch": 0, "iter": 18641, "iter_tflops": 32.57216845714663, "iter_time": 0.6333963775634766, "loss": 0.5433772206306458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.82131290525933, "step_time": 0.5924846534729004} +{"epoch": 0, "iter": 18642, "iter_tflops": 21.845239635822416, "iter_time": 0.9444205627441405, "loss": 0.42648595571517944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85313638134962, "step_time": 0.830120319366455} +{"epoch": 0, "iter": 18643, "iter_tflops": 46.76072765765934, "iter_time": 0.4412055702209473, "loss": 0.41880935430526733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.68772284113049, "step_time": 0.40702348327636717} +{"epoch": 0, "iter": 18644, "iter_tflops": 43.23108503122708, "iter_time": 0.477228214263916, "loss": 0.4380546808242798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15203041761886, "step_time": 0.44702461242675784} +{"epoch": 0, "iter": 18645, "iter_tflops": 19.56421421755206, "iter_time": 0.7640787887573242, "loss": 0.0018961101304739714, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 20.750487804576025, "step_time": 0.720397575378418} +{"epoch": 0, "iter": 18646, "iter_tflops": 18.517517138041473, "iter_time": 0.8072681121826171, "loss": 0.0018352980259805918, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.72110071574167, "step_time": 0.6579171180725097} +{"epoch": 0, "iter": 18647, "iter_tflops": 42.98704881931608, "iter_time": 0.3477466239929199, "loss": 0.0019400542369112372, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 47.16059962003212, "step_time": 0.3169722442626953} +{"epoch": 0, "iter": 18648, "iter_tflops": 41.40140155626556, "iter_time": 0.3610650978088379, "loss": 0.00670417957007885, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 45.58263172347317, "step_time": 0.3279451084136963} +{"epoch": 0, "iter": 18649, "iter_tflops": 32.079558717487465, "iter_time": 0.6431227340698243, "loss": 0.646408200263977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39248472871045, "step_time": 0.599872142791748} +{"epoch": 0, "iter": 18650, "iter_tflops": 20.061977996295884, "iter_time": 1.0283678665161133, "loss": 0.5076609253883362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.323599314623127, "step_time": 0.9241831130981446} +{"epoch": 0, "iter": 18651, "iter_tflops": 35.163597209048, "iter_time": 0.5867173767089844, "loss": 0.6058917045593262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.37600024840184, "step_time": 0.5376040592193604} +{"epoch": 0, "iter": 18652, "iter_tflops": 40.12845767194161, "iter_time": 0.5141262512207032, "loss": 0.5702970027923584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.5706366153086, "step_time": 0.4735091133117676} +{"epoch": 0, "iter": 18653, "iter_tflops": 18.615965264327862, "iter_time": 1.1082473144531249, "loss": 0.4549417495727539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.894868328358623, "step_time": 1.0370057830810548} +{"epoch": 0, "iter": 18654, "iter_tflops": 18.59886703067499, "iter_time": 1.109266143798828, "loss": 0.332535058259964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.061514212046962, "step_time": 0.8574312210083007} +{"epoch": 0, "iter": 18655, "iter_tflops": 47.756161469181464, "iter_time": 0.4320090408325195, "loss": 0.3772597908973694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.68951256317572, "step_time": 0.3991349983215332} +{"epoch": 0, "iter": 18656, "iter_tflops": 48.11823908792952, "iter_time": 0.42875828170776364, "loss": 0.37339499592781067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94924976327344, "step_time": 0.39713939285278327} +{"epoch": 0, "iter": 18657, "iter_tflops": 23.7806483575237, "iter_time": 0.8675580749511719, "loss": 0.6298198103904724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.857575906083103, "step_time": 0.8299720611572267} +{"epoch": 0, "iter": 18658, "iter_tflops": 11.169801170212525, "iter_time": 1.8470421447753906, "loss": 0.5641184449195862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.782477062220469, "step_time": 1.496907516479492} +{"epoch": 0, "iter": 18659, "iter_tflops": 13.058812108038154, "iter_time": 1.5798598937988282, "loss": 0.48341822624206543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.58852527139964, "step_time": 1.3234794921875002} +{"epoch": 0, "iter": 18660, "iter_tflops": 33.783967461771766, "iter_time": 0.6106770477294923, "loss": 0.5629691481590271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.62811858223142, "step_time": 0.5632583465576173} +{"epoch": 0, "iter": 18661, "iter_tflops": 16.3940442715064, "iter_time": 0.9218010025024413, "loss": 0.17305245995521545, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.572707936330634, "step_time": 0.8599725494384766} +{"epoch": 0, "iter": 18662, "iter_tflops": 26.760339790509647, "iter_time": 0.564718032836914, "loss": 0.3274965286254883, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.734211984197135, "step_time": 0.5259252090454101} +{"epoch": 0, "iter": 18663, "iter_tflops": 26.55681890931544, "iter_time": 0.5690458068847656, "loss": 0.20943810045719147, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.347235027649894, "step_time": 0.5331047782897949} +{"epoch": 0, "iter": 18664, "iter_tflops": 25.878088701947515, "iter_time": 0.5839707336425782, "loss": 0.18851439654827118, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.45003341724882, "step_time": 0.55052925491333} +{"epoch": 0, "iter": 18665, "iter_tflops": 43.33030423934658, "iter_time": 0.476135440826416, "loss": 0.003500340972095728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31848133022251, "step_time": 0.4360049800872803} +{"epoch": 0, "iter": 18666, "iter_tflops": 41.576362429243254, "iter_time": 0.4962217063903809, "loss": 0.0036984123289585114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.177413749652736, "step_time": 0.446778886795044} +{"epoch": 0, "iter": 18667, "iter_tflops": 47.106401097877, "iter_time": 0.43796794128417965, "loss": 0.002637546043843031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.21099554765382, "step_time": 0.3951484413146973} +{"epoch": 0, "iter": 18668, "iter_tflops": 42.10764303331071, "iter_time": 0.48996077728271487, "loss": 0.002062878804281354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70074156065696, "step_time": 0.44177228927612305} +{"epoch": 0, "iter": 18669, "iter_tflops": 31.85527209227963, "iter_time": 0.6476508331298828, "loss": 0.05248544365167618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.61703033574009, "step_time": 0.5959810333251953} +{"epoch": 0, "iter": 18670, "iter_tflops": 10.72026965002719, "iter_time": 1.9244938964843747, "loss": 0.13056598603725433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.177052759633815, "step_time": 1.5656834564208983} +{"epoch": 0, "iter": 18671, "iter_tflops": 12.288341707175276, "iter_time": 1.678916000366211, "loss": 0.13762803375720978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.237439291989238, "step_time": 1.44907332611084} +{"epoch": 0, "iter": 18672, "iter_tflops": 22.508479501776705, "iter_time": 0.9165920562744141, "loss": 0.11464633792638779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.47609374308149, "step_time": 0.7245057449340822} +{"epoch": 0, "iter": 18673, "iter_tflops": 15.283187430944277, "iter_time": 0.9888020095825195, "loss": 0.1384539008140564, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 16.289388226137476, "step_time": 0.9277233886718751} +{"epoch": 0, "iter": 18674, "iter_tflops": 6.243522491864912, "iter_time": 2.4204359741210935, "loss": 0.20205941796302795, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 7.4376746126716204, "step_time": 2.0318240890502928} +{"epoch": 0, "iter": 18675, "iter_tflops": 9.316153363677845, "iter_time": 1.622133712768555, "loss": 0.1917031854391098, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 10.781440734476387, "step_time": 1.4016722640991208} +{"epoch": 0, "iter": 18676, "iter_tflops": 26.776839919295426, "iter_time": 0.5643700485229493, "loss": 0.2346910983324051, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.619223303359092, "step_time": 0.5280383148193359} +{"epoch": 0, "iter": 18677, "iter_tflops": 18.650566333547417, "iter_time": 0.823425720214844, "loss": 0.17790240049362183, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 19.635238426836796, "step_time": 0.7821323928833008} +{"epoch": 0, "iter": 18678, "iter_tflops": 5.996311937766849, "iter_time": 2.5611336059570315, "loss": 0.31531283259391785, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 7.867447651935258, "step_time": 1.952012481689453} +{"epoch": 0, "iter": 18679, "iter_tflops": 9.12911133017088, "iter_time": 1.6822399749755863, "loss": 0.2752029597759247, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 10.801668452289338, "step_time": 1.4217577667236325} +{"epoch": 0, "iter": 18680, "iter_tflops": 26.051329979218423, "iter_time": 0.5895037231445313, "loss": 0.15668551623821259, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 27.92399726348876, "step_time": 0.5499698295593262} +{"epoch": 0, "iter": 18681, "iter_tflops": 26.435617783285103, "iter_time": 0.5824814682006836, "loss": 0.22419136762619019, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 28.53068542534117, "step_time": 0.5397086410522461} +{"epoch": 0, "iter": 18682, "iter_tflops": 23.361882708137678, "iter_time": 0.6591188583374024, "loss": 0.21809978783130646, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.168482437720737, "step_time": 0.6118071479797363} +{"epoch": 0, "iter": 18683, "iter_tflops": 22.99900425973162, "iter_time": 0.6695184402465819, "loss": 0.16947191953659058, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 24.80121511641383, "step_time": 0.620867057800293} +{"epoch": 0, "iter": 18684, "iter_tflops": 23.63136060928619, "iter_time": 0.6516026611328126, "loss": 0.10764486342668533, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 25.293304570464983, "step_time": 0.6087878875732421} +{"epoch": 0, "iter": 18685, "iter_tflops": 26.49566438752914, "iter_time": 0.7786592254638672, "loss": 0.429350882768631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.20477193047842, "step_time": 0.7064288520812989} +{"epoch": 0, "iter": 18686, "iter_tflops": 37.229735602080815, "iter_time": 0.5541563262939453, "loss": 0.4365607798099518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.18438171334185, "step_time": 0.5009445972442627} +{"epoch": 0, "iter": 18687, "iter_tflops": 37.53293048725677, "iter_time": 0.5496797943115235, "loss": 0.5930400490760803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.801261668180445, "step_time": 0.5056484203338623} +{"epoch": 0, "iter": 18688, "iter_tflops": 39.671884771815115, "iter_time": 0.5200431900024414, "loss": 0.4309889078140259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.042407045491586, "step_time": 0.479320161819458} +{"epoch": 0, "iter": 18689, "iter_tflops": 22.924813327066758, "iter_time": 0.8999459762573242, "loss": 0.3592557907104492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.879035329709183, "step_time": 0.8292561683654787} +{"epoch": 0, "iter": 18690, "iter_tflops": 18.243664139884824, "iter_time": 1.1308634796142578, "loss": 0.32276928424835205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.720444076384453, "step_time": 0.9498467636108398} +{"epoch": 0, "iter": 18691, "iter_tflops": 37.62677804877388, "iter_time": 0.5483087997436523, "loss": 0.2585354149341583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.85124155105759, "step_time": 0.5050297794342041} +{"epoch": 0, "iter": 18692, "iter_tflops": 39.90842864458306, "iter_time": 0.516960807800293, "loss": 0.4239501655101776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.6820369612603, "step_time": 0.47230154418945314} +{"epoch": 0, "iter": 18693, "iter_tflops": 15.697263699726689, "iter_time": 1.208793212890625, "loss": 0.07304315268993378, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 16.510079701960297, "step_time": 1.1492825088500975} +{"epoch": 0, "iter": 18694, "iter_tflops": 17.59419981527461, "iter_time": 1.0784659729003905, "loss": 0.07811761647462845, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 22.88702256375522, "step_time": 0.8290613498687744} +{"epoch": 0, "iter": 18695, "iter_tflops": 44.791687019801415, "iter_time": 0.42362203979492186, "loss": 0.062445562332868576, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 49.05057996571113, "step_time": 0.38684039688110355} +{"epoch": 0, "iter": 18696, "iter_tflops": 47.9944727608418, "iter_time": 0.3953527297973633, "loss": 0.10327926278114319, "lr": 3e-05, "seqlen": 7552.0, "step_tflops": 52.2865473308324, "step_time": 0.3628991928100586} +{"epoch": 0, "iter": 18697, "iter_tflops": 41.705421351319565, "iter_time": 0.4946861305236816, "loss": 0.061544328927993774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.573130215973926, "step_time": 0.45270301628112797} +{"epoch": 0, "iter": 18698, "iter_tflops": 38.308482379551485, "iter_time": 0.5385515747070312, "loss": 0.17056812345981598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.183695697339196, "step_time": 0.48907743072509763} +{"epoch": 0, "iter": 18699, "iter_tflops": 42.785028434538006, "iter_time": 0.4822035713195801, "loss": 0.09537787735462189, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79125500549181, "step_time": 0.4409177207946778} +{"epoch": 0, "iter": 18700, "iter_tflops": 42.94143763738888, "iter_time": 0.4804472007751465, "loss": 0.1749541461467743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60638721342619, "step_time": 0.44266665458679194} +{"epoch": 0, "iter": 18701, "iter_tflops": 25.78754723151351, "iter_time": 0.8000409393310547, "loss": 0.6700419783592224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.807328982896138, "step_time": 0.7419300689697266} +{"epoch": 0, "iter": 18702, "iter_tflops": 10.961627622841524, "iter_time": 1.8821195373535158, "loss": 0.604404628276825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.871093696235361, "step_time": 1.4873443984985353} +{"epoch": 0, "iter": 18703, "iter_tflops": 8.990669484700213, "iter_time": 2.2947227172851563, "loss": 0.6986167430877686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.251596641047643, "step_time": 2.0124761276245113} +{"epoch": 0, "iter": 18704, "iter_tflops": 37.73155056119103, "iter_time": 0.546786262512207, "loss": 0.5301849246025085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10487158410249, "step_time": 0.42887742614746094} +{"epoch": 0, "iter": 18705, "iter_tflops": 20.47703585625628, "iter_time": 0.7499794464111329, "loss": 0.3688488304615021, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 21.758125065835785, "step_time": 0.705821662902832} +{"epoch": 0, "iter": 18706, "iter_tflops": 13.419029821670687, "iter_time": 1.1444460754394532, "loss": 0.30797266960144043, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 14.970789782955762, "step_time": 1.0258213653564454} +{"epoch": 0, "iter": 18707, "iter_tflops": 28.48603330557998, "iter_time": 0.5391187973022461, "loss": 0.19363370537757874, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 30.361551903957796, "step_time": 0.5058159103393555} +{"epoch": 0, "iter": 18708, "iter_tflops": 27.55976563866888, "iter_time": 0.557238265991211, "loss": 0.21255706250667572, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 29.108487052083735, "step_time": 0.5275903205871583} +{"epoch": 0, "iter": 18709, "iter_tflops": 36.03997217517037, "iter_time": 0.5724503173828125, "loss": 0.20589128136634827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.82797329796723, "step_time": 0.531346134185791} +{"epoch": 0, "iter": 18710, "iter_tflops": 10.445221076360111, "iter_time": 1.9751705932617187, "loss": 0.2805101275444031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.904715578382126, "step_time": 1.5987251625061034} +{"epoch": 0, "iter": 18711, "iter_tflops": 37.571220496743464, "iter_time": 0.5491195983886719, "loss": 0.2951319217681885, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.44369289085585, "step_time": 0.4978102111816406} +{"epoch": 0, "iter": 18712, "iter_tflops": 40.5869246623096, "iter_time": 0.5083187179565429, "loss": 0.3479796051979065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.35577418274395, "step_time": 0.46512757110595704} +{"epoch": 0, "iter": 18713, "iter_tflops": 17.290588089248647, "iter_time": 1.1931979064941407, "loss": 0.5388097167015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.539391653919896, "step_time": 1.1128247299194336} +{"epoch": 0, "iter": 18714, "iter_tflops": 17.473825071257522, "iter_time": 1.1806855926513673, "loss": 0.6278939247131348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.22532797570746, "step_time": 0.9282694740295411} +{"epoch": 0, "iter": 18715, "iter_tflops": 45.342035887030804, "iter_time": 0.4550103034973144, "loss": 0.4364847242832184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75991753388749, "step_time": 0.42311584091186527} +{"epoch": 0, "iter": 18716, "iter_tflops": 45.18214560127242, "iter_time": 0.456620491027832, "loss": 0.7136871218681335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.77677038303996, "step_time": 0.4229696502685547} +{"epoch": 0, "iter": 18717, "iter_tflops": 26.00674220388431, "iter_time": 0.7932978820800781, "loss": 0.2920554578304291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.405684004226746, "step_time": 0.752803451538086} +{"epoch": 0, "iter": 18718, "iter_tflops": 16.14790922919598, "iter_time": 1.2776324920654294, "loss": 0.2096920907497406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.190611009176322, "step_time": 1.0750618362426758} +{"epoch": 0, "iter": 18719, "iter_tflops": 49.5484087637159, "iter_time": 0.4163825645446777, "loss": 0.24803492426872253, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.810380165619, "step_time": 0.383403600692749} +{"epoch": 0, "iter": 18720, "iter_tflops": 47.95927291662456, "iter_time": 0.4301794471740722, "loss": 0.2984725832939148, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.3904889401594, "step_time": 0.3937946357727051} +{"epoch": 0, "iter": 18721, "iter_tflops": 19.7159139148256, "iter_time": 1.0464183197021484, "loss": 0.4820064902305603, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.57384769561241, "step_time": 1.002782455444336} +{"epoch": 0, "iter": 18722, "iter_tflops": 14.360820443739431, "iter_time": 1.4366235961914062, "loss": 0.525684654712677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.452464085316805, "step_time": 1.1180671272277833} +{"epoch": 0, "iter": 18723, "iter_tflops": 43.95304264492971, "iter_time": 0.4693894271850586, "loss": 0.3944675624370575, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.41325549859633, "step_time": 0.435133451461792} +{"epoch": 0, "iter": 18724, "iter_tflops": 45.96006073207133, "iter_time": 0.44889178085327147, "loss": 0.40421295166015625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.294382050569716, "step_time": 0.4185282917022705} +{"epoch": 0, "iter": 18725, "iter_tflops": 27.04049742397474, "iter_time": 0.7629701919555665, "loss": 0.12333594262599945, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.746044652388846, "step_time": 0.7177019920349121} +{"epoch": 0, "iter": 18726, "iter_tflops": 16.880278646593187, "iter_time": 1.2222010040283204, "loss": 0.09906357526779175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.34758956302595, "step_time": 1.013933048248291} +{"epoch": 0, "iter": 18727, "iter_tflops": 49.86104195628857, "iter_time": 0.41377180862426755, "loss": 0.19027462601661682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.37180158649166, "step_time": 0.37944472885131836} +{"epoch": 0, "iter": 18728, "iter_tflops": 48.67188867436869, "iter_time": 0.42388109588623046, "loss": 0.16147977113723755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.8653351345643, "step_time": 0.39025749969482426} +{"epoch": 0, "iter": 18729, "iter_tflops": 42.68714806380147, "iter_time": 0.48330924987792967, "loss": 0.4689768850803375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.616216828600095, "step_time": 0.44257331275939943} +{"epoch": 0, "iter": 18730, "iter_tflops": 47.03290303874373, "iter_time": 0.4386523513793945, "loss": 0.38069066405296326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.7850545490519, "step_time": 0.3983986053466797} +{"epoch": 0, "iter": 18731, "iter_tflops": 47.77612939987596, "iter_time": 0.43182848358154297, "loss": 0.35233932733535767, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93416851907213, "step_time": 0.3972547187805176} +{"epoch": 0, "iter": 18732, "iter_tflops": 46.243360257193686, "iter_time": 0.44614174652099603, "loss": 0.3203287124633789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.43425304193525, "step_time": 0.40906908035278317} +{"epoch": 0, "iter": 18733, "iter_tflops": 2.631064372223189, "iter_time": 0.6404668731689453, "loss": 0.6419578194618225, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 2.8096398700374547, "step_time": 0.5997599868774415} +{"epoch": 0, "iter": 18734, "iter_tflops": 0.7676345935439614, "iter_time": 2.195197540283203, "loss": 0.9366538524627686, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 0.9106616423410964, "step_time": 1.850423355102539} +{"epoch": 0, "iter": 18735, "iter_tflops": 0.945105527942945, "iter_time": 1.7829856262207031, "loss": 0.7985701560974121, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 1.0933484818912003, "step_time": 1.5412374000549318} +{"epoch": 0, "iter": 18736, "iter_tflops": 2.376028426436416, "iter_time": 0.7092127151489257, "loss": 0.7522911429405212, "lr": 3e-05, "seqlen": 688.0, "step_tflops": 2.6647309445372573, "step_time": 0.6323751277923584} +{"epoch": 0, "iter": 18737, "iter_tflops": 17.864805732288435, "iter_time": 0.850487693786621, "loss": 0.13159587979316711, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 19.516688836939856, "step_time": 0.7785028266906738} +{"epoch": 0, "iter": 18738, "iter_tflops": 26.529944718991814, "iter_time": 0.5727036972045899, "loss": 0.20138677954673767, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.361375666701786, "step_time": 0.5357214546203612} +{"epoch": 0, "iter": 18739, "iter_tflops": 27.11823573114323, "iter_time": 0.5602797164916993, "loss": 0.22144360840320587, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 28.81335744415276, "step_time": 0.5273178405761718} +{"epoch": 0, "iter": 18740, "iter_tflops": 27.52073743022924, "iter_time": 0.5520854034423829, "loss": 0.19294707477092743, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.29791178642242, "step_time": 0.5185965995788575} +{"epoch": 0, "iter": 18741, "iter_tflops": 30.377143679452367, "iter_time": 0.6791650238037108, "loss": 0.5695546865463257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.29211645661339, "step_time": 0.6388894805908203} +{"epoch": 0, "iter": 18742, "iter_tflops": 8.865798262945807, "iter_time": 2.3270429687500003, "loss": 0.47116148471832275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.616334940942588, "step_time": 1.7760415496826172} +{"epoch": 0, "iter": 18743, "iter_tflops": 10.436988544477826, "iter_time": 1.9767285766601561, "loss": 0.49449047446250916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.573749827846315, "step_time": 1.640806743621826} +{"epoch": 0, "iter": 18744, "iter_tflops": 38.37955485198171, "iter_time": 0.5375542678833007, "loss": 0.5244315266609192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.13811061200443, "step_time": 0.42858128929138184} +{"epoch": 0, "iter": 18745, "iter_tflops": 23.78578135072512, "iter_time": 0.6714672470092772, "loss": 0.16559413075447083, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 25.29325791968284, "step_time": 0.6314478416442871} +{"epoch": 0, "iter": 18746, "iter_tflops": 13.553008393180376, "iter_time": 1.178437484741211, "loss": 0.09963428229093552, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 17.349063797196962, "step_time": 0.9205899124145507} +{"epoch": 0, "iter": 18747, "iter_tflops": 25.759721967570986, "iter_time": 0.620013412475586, "loss": 0.3052423596382141, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 27.65304870741988, "step_time": 0.5775628318786621} +{"epoch": 0, "iter": 18748, "iter_tflops": 27.00514464931425, "iter_time": 0.5914196472167969, "loss": 0.16315022110939026, "lr": 3e-05, "seqlen": 6384.0, "step_tflops": 28.8917906122287, "step_time": 0.5527996978759766} +{"epoch": 0, "iter": 18749, "iter_tflops": 20.812370989048002, "iter_time": 0.9912899169921876, "loss": 0.056852422654628754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.780503405357738, "step_time": 0.9056469535827637} +{"epoch": 0, "iter": 18750, "iter_tflops": 46.353882235283685, "iter_time": 0.44507800674438475, "loss": 0.023893889039754868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.784991266988925, "step_time": 0.4062439117431641} +{"epoch": 0, "iter": 18751, "iter_tflops": 54.91911129401595, "iter_time": 0.37566328048706055, "loss": 0.04044140875339508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.88782518214099, "step_time": 0.3444956207275391} +{"epoch": 0, "iter": 18752, "iter_tflops": 55.6758980850642, "iter_time": 0.3705569953918458, "loss": 0.043556421995162964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.263509489002054, "step_time": 0.3367599029541015} +{"epoch": 0, "iter": 18753, "iter_tflops": 29.35660533452033, "iter_time": 0.7027751770019531, "loss": 0.5412589907646179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.060821641368225, "step_time": 0.6642159614562988} +{"epoch": 0, "iter": 18754, "iter_tflops": 10.31924247864708, "iter_time": 1.9992837219238284, "loss": 0.49673670530319214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.49239800219101, "step_time": 1.6514918518066406} +{"epoch": 0, "iter": 18755, "iter_tflops": 14.6094489654448, "iter_time": 1.4121746520996095, "loss": 0.6028472185134888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.264746910496953, "step_time": 1.1949838371276857} +{"epoch": 0, "iter": 18756, "iter_tflops": 25.952383017972117, "iter_time": 0.7949595031738281, "loss": 0.6005715727806091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.6010503420982, "step_time": 0.652861005783081} +{"epoch": 0, "iter": 18757, "iter_tflops": 16.7628325652389, "iter_time": 0.9088368530273437, "loss": 0.23543240129947662, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 17.986294466606186, "step_time": 0.8470160446166992} +{"epoch": 0, "iter": 18758, "iter_tflops": 11.9560415455638, "iter_time": 1.2742244110107421, "loss": 0.1704540252685547, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 15.169037385157026, "step_time": 1.004327407836914} +{"epoch": 0, "iter": 18759, "iter_tflops": 22.60537611477647, "iter_time": 0.6739405670166015, "loss": 0.23761101067066193, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.422010553015955, "step_time": 0.6238094100952148} +{"epoch": 0, "iter": 18760, "iter_tflops": 22.69210008886581, "iter_time": 0.6713649215698242, "loss": 0.27661997079849243, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.322960257558975, "step_time": 0.6263497467041016} +{"epoch": 0, "iter": 18761, "iter_tflops": 17.586385568616006, "iter_time": 1.173128692626953, "loss": 0.10987488180398941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.704015271994653, "step_time": 1.1030301895141603} +{"epoch": 0, "iter": 18762, "iter_tflops": 32.44218133658761, "iter_time": 0.6359342269897461, "loss": 0.019377904012799263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.46890892381853, "step_time": 0.4975075073242187} +{"epoch": 0, "iter": 18763, "iter_tflops": 51.673013680451064, "iter_time": 0.3992624397277832, "loss": 0.011742768809199333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.459169269022986, "step_time": 0.3654161720275879} +{"epoch": 0, "iter": 18764, "iter_tflops": 52.060657321016556, "iter_time": 0.39628953170776365, "loss": 0.03471839055418968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.9083016113584, "step_time": 0.3625322303771973} +{"epoch": 0, "iter": 18765, "iter_tflops": 36.35930388337463, "iter_time": 0.5674226760864258, "loss": 0.5035350322723389, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.0061756114596, "step_time": 0.5289186439514161} +{"epoch": 0, "iter": 18766, "iter_tflops": 12.752015730921677, "iter_time": 1.6178692016601564, "loss": 0.3224530816078186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.837917668867687, "step_time": 1.3904305152893068} +{"epoch": 0, "iter": 18767, "iter_tflops": 14.873799142173148, "iter_time": 1.3870762481689454, "loss": 0.5056906342506409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.621074823539566, "step_time": 1.1708192443847656} +{"epoch": 0, "iter": 18768, "iter_tflops": 19.606507924139564, "iter_time": 1.0522574234008788, "loss": 0.5233502388000488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.296914170624795, "step_time": 0.8855719413757324} +{"epoch": 0, "iter": 18769, "iter_tflops": 23.402255505162312, "iter_time": 0.57605517578125, "loss": 0.18524719774723053, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 25.168263530479862, "step_time": 0.5356345062255858} +{"epoch": 0, "iter": 18770, "iter_tflops": 23.91898943366772, "iter_time": 0.5636103668212891, "loss": 0.18092086911201477, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 25.486966205114037, "step_time": 0.5289366455078125} +{"epoch": 0, "iter": 18771, "iter_tflops": 25.131023044389313, "iter_time": 0.5364282379150391, "loss": 0.2995545268058777, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 26.740334228072296, "step_time": 0.5041444244384765} +{"epoch": 0, "iter": 18772, "iter_tflops": 25.30480672576368, "iter_time": 0.5327442550659179, "loss": 0.2760729193687439, "lr": 3e-05, "seqlen": 5408.0, "step_tflops": 26.888369515820084, "step_time": 0.5013688316345215} +{"epoch": 0, "iter": 18773, "iter_tflops": 22.367871752677697, "iter_time": 0.922353889465332, "loss": 0.5756102800369263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.39706046824457, "step_time": 0.8817814331054687} +{"epoch": 0, "iter": 18774, "iter_tflops": 17.02713584957718, "iter_time": 1.2116596527099608, "loss": 0.5809282064437866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.215134167400745, "step_time": 0.9286954269409179} +{"epoch": 0, "iter": 18775, "iter_tflops": 41.20610268262341, "iter_time": 0.500680534362793, "loss": 0.6285800337791443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87840583499215, "step_time": 0.47018785476684577} +{"epoch": 0, "iter": 18776, "iter_tflops": 41.94675484629499, "iter_time": 0.4918400382995606, "loss": 0.5860389471054077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.25313063544131, "step_time": 0.45590422630310057} +{"epoch": 0, "iter": 18777, "iter_tflops": 22.18451950032069, "iter_time": 0.9299770278930664, "loss": 0.5618537664413452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.28804053921943, "step_time": 0.8859093780517578} +{"epoch": 0, "iter": 18778, "iter_tflops": 14.207686494530728, "iter_time": 1.4521078796386717, "loss": 0.5070648193359375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.867525706622697, "step_time": 1.0934710693359375} +{"epoch": 0, "iter": 18779, "iter_tflops": 42.278808967362544, "iter_time": 0.4879771690368653, "loss": 0.715392529964447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.563084508072315, "step_time": 0.4528028278350831} +{"epoch": 0, "iter": 18780, "iter_tflops": 41.32800907636969, "iter_time": 0.4992036628723145, "loss": 0.38458091020584106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46806762725204, "step_time": 0.46395300292968755} +{"epoch": 0, "iter": 18781, "iter_tflops": 28.000959396586524, "iter_time": 0.7367995223999023, "loss": 0.6194372177124023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.652925024545652, "step_time": 0.6957523918151856} +{"epoch": 0, "iter": 18782, "iter_tflops": 17.357808978929523, "iter_time": 1.1885770568847656, "loss": 0.6109498739242554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.863165344982946, "step_time": 0.988876480102539} +{"epoch": 0, "iter": 18783, "iter_tflops": 37.52800350513182, "iter_time": 0.5497519607543945, "loss": 0.5031754970550537, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.94507619569832, "step_time": 0.503872392654419} +{"epoch": 0, "iter": 18784, "iter_tflops": 37.95798752947776, "iter_time": 0.5435244293212891, "loss": 0.6044227480888367, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39201914277108, "step_time": 0.49843167686462403} +{"epoch": 0, "iter": 18785, "iter_tflops": 18.66405926673791, "iter_time": 1.1053915557861327, "loss": 0.5580352544784546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.025234228628925, "step_time": 1.0302547912597657} +{"epoch": 0, "iter": 18786, "iter_tflops": 28.295273490851695, "iter_time": 0.7291356811523438, "loss": 0.6059563755989075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.260204825461166, "step_time": 0.6599794731140137} +{"epoch": 0, "iter": 18787, "iter_tflops": 40.41150377701493, "iter_time": 0.5105252609252929, "loss": 0.580400288105011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10930156606472, "step_time": 0.4677265968322754} +{"epoch": 0, "iter": 18788, "iter_tflops": 41.13349796964854, "iter_time": 0.5015642852783203, "loss": 0.6234132647514343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.4796281800513, "step_time": 0.4638324184417725} +{"epoch": 0, "iter": 18789, "iter_tflops": 19.129539112133575, "iter_time": 1.0784940185546876, "loss": 0.21867920458316803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.694439759901922, "step_time": 0.9969389724731446} +{"epoch": 0, "iter": 18790, "iter_tflops": 22.745614297390517, "iter_time": 0.9070361099243164, "loss": 0.1978970617055893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.556524007105338, "step_time": 0.7486827259063721} +{"epoch": 0, "iter": 18791, "iter_tflops": 43.11539884862963, "iter_time": 0.4785087013244629, "loss": 0.23862388730049133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1799094429478, "step_time": 0.4372855682373047} +{"epoch": 0, "iter": 18792, "iter_tflops": 44.74931286881113, "iter_time": 0.46103710174560547, "loss": 0.28914186358451843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.75932363321825, "step_time": 0.4231209945678711} +{"epoch": 0, "iter": 18793, "iter_tflops": 24.140049711850832, "iter_time": 0.8546417160034179, "loss": 0.2195434868335724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.843621201762858, "step_time": 0.7983050575256346} +{"epoch": 0, "iter": 18794, "iter_tflops": 8.895934817947007, "iter_time": 2.3191596984863283, "loss": 0.25947174429893494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.52432515912494, "step_time": 1.9603246002197268} +{"epoch": 0, "iter": 18795, "iter_tflops": 12.23923257889619, "iter_time": 1.6856525421142579, "loss": 0.1768454909324646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.3525510648009, "step_time": 1.437451322555542} +{"epoch": 0, "iter": 18796, "iter_tflops": 45.239532001110575, "iter_time": 0.4560412673950195, "loss": 0.21739819645881653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.079572017335956, "step_time": 0.42036009407043456} +{"epoch": 0, "iter": 18797, "iter_tflops": 28.05119894459741, "iter_time": 0.5781313400268554, "loss": 0.12424883991479874, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.150387716014176, "step_time": 0.5378795585632324} +{"epoch": 0, "iter": 18798, "iter_tflops": 26.9045812694686, "iter_time": 0.6027701034545899, "loss": 0.14766323566436768, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 28.62144772841049, "step_time": 0.5666127510070801} +{"epoch": 0, "iter": 18799, "iter_tflops": 28.604481369485214, "iter_time": 0.5669488296508789, "loss": 0.24918363988399506, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.375879264785755, "step_time": 0.5338866767883301} +{"epoch": 0, "iter": 18800, "iter_tflops": 28.9109440525091, "iter_time": 0.5609390411376953, "loss": 0.21241407096385956, "lr": 3e-05, "seqlen": 6480.0, "step_tflops": 30.819296678286648, "step_time": 0.5262052993774414} +{"epoch": 0, "iter": 18801, "iter_tflops": 39.90307852130931, "iter_time": 0.5170301208496094, "loss": 0.45584481954574585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.132251410502086, "step_time": 0.47832173919677734} +{"epoch": 0, "iter": 18802, "iter_tflops": 34.84170853250034, "iter_time": 0.592137825012207, "loss": 0.6256514191627502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.295182094345414, "step_time": 0.538738618850708} +{"epoch": 0, "iter": 18803, "iter_tflops": 36.14369231891226, "iter_time": 0.5708075790405274, "loss": 0.5057681798934937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.311606054380675, "step_time": 0.5248092250823975} +{"epoch": 0, "iter": 18804, "iter_tflops": 39.87079566102033, "iter_time": 0.5174487533569336, "loss": 0.5265517234802246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.435304111250325, "step_time": 0.4749844379425049} +{"epoch": 0, "iter": 18805, "iter_tflops": 3.0418243861115584, "iter_time": 0.7349643936157226, "loss": 0.44638651609420776, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 3.262378007769356, "step_time": 0.6852770004272462} +{"epoch": 0, "iter": 18806, "iter_tflops": 1.8663616701412464, "iter_time": 1.1978560485839844, "loss": 0.48549705743789673, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 2.4342648839919603, "step_time": 0.918401538848877} +{"epoch": 0, "iter": 18807, "iter_tflops": 4.89866926567355, "iter_time": 0.4563754959106445, "loss": 0.00984779093414545, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 5.282680112009279, "step_time": 0.4232004528045654} +{"epoch": 0, "iter": 18808, "iter_tflops": 4.78665473097314, "iter_time": 0.4670553321838379, "loss": 0.005274653900414705, "lr": 3e-05, "seqlen": 912.0, "step_tflops": 5.163285328838787, "step_time": 0.4329864559173584} +{"epoch": 0, "iter": 18809, "iter_tflops": 27.592335700590237, "iter_time": 0.7477110214233398, "loss": 0.5611197352409363, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.247758942885735, "step_time": 0.7053905754089356} +{"epoch": 0, "iter": 18810, "iter_tflops": 15.362484143805068, "iter_time": 1.3429529571533203, "loss": 0.41362452507019043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.36223147339173, "step_time": 1.1882743034362795} +{"epoch": 0, "iter": 18811, "iter_tflops": 42.80209451663578, "iter_time": 0.48201130676269527, "loss": 0.56154865026474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.442905693752365, "step_time": 0.44422486495971686} +{"epoch": 0, "iter": 18812, "iter_tflops": 45.362991482886805, "iter_time": 0.4548001098632812, "loss": 0.40642207860946655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80104195590922, "step_time": 0.4227592830657959} +{"epoch": 0, "iter": 18813, "iter_tflops": 42.43643781984257, "iter_time": 0.48616459274291995, "loss": 0.4409146010875702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.19243945306946, "step_time": 0.4466335563659668} +{"epoch": 0, "iter": 18814, "iter_tflops": 38.19503092262248, "iter_time": 0.5401512451171875, "loss": 0.5327110886573792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65433882364445, "step_time": 0.4952927856445313} +{"epoch": 0, "iter": 18815, "iter_tflops": 50.157464860355695, "iter_time": 0.4113264808654785, "loss": 0.3693309426307678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.55021553331021, "step_time": 0.3782037029266358} +{"epoch": 0, "iter": 18816, "iter_tflops": 49.33727837920196, "iter_time": 0.4181644020080566, "loss": 0.5005475878715515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.37760114976965, "step_time": 0.3865121898651124} +{"epoch": 0, "iter": 18817, "iter_tflops": 25.665618920039474, "iter_time": 0.8038416519165039, "loss": 0.28575342893600464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.069928559419097, "step_time": 0.7621406707763672} +{"epoch": 0, "iter": 18818, "iter_tflops": 12.135860673207754, "iter_time": 1.7000107421875001, "loss": 0.2783683240413666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.155355626709106, "step_time": 1.2770436000823973} +{"epoch": 0, "iter": 18819, "iter_tflops": 37.017456369111315, "iter_time": 0.5573341751098633, "loss": 0.3366811275482178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.3354021300794, "step_time": 0.5114884796142578} +{"epoch": 0, "iter": 18820, "iter_tflops": 37.455924188583104, "iter_time": 0.5508098907470703, "loss": 0.2659785747528076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.059771575097635, "step_time": 0.5024648876190186} +{"epoch": 0, "iter": 18821, "iter_tflops": 33.279572525367115, "iter_time": 0.6199326477050782, "loss": 0.42158588767051697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.68791869269698, "step_time": 0.562340253829956} +{"epoch": 0, "iter": 18822, "iter_tflops": 37.67157061033189, "iter_time": 0.5476568450927735, "loss": 0.5354412198066711, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.23091340769977, "step_time": 0.5003792495727539} +{"epoch": 0, "iter": 18823, "iter_tflops": 42.728641480720874, "iter_time": 0.4828399124145508, "loss": 0.4687662720680237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49462633115568, "step_time": 0.4437307090759277} +{"epoch": 0, "iter": 18824, "iter_tflops": 38.912677931220024, "iter_time": 0.5301895065307618, "loss": 0.4975273311138153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.490968701590624, "step_time": 0.4855406723022461} +{"epoch": 0, "iter": 18825, "iter_tflops": 27.79318692228041, "iter_time": 0.742307586669922, "loss": 0.5335537195205688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.325838841100534, "step_time": 0.6803140258789062} +{"epoch": 0, "iter": 18826, "iter_tflops": 40.459102905131935, "iter_time": 0.5099246406555176, "loss": 0.5111436247825623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.21047401827948, "step_time": 0.4666562385559082} +{"epoch": 0, "iter": 18827, "iter_tflops": 40.99806336546853, "iter_time": 0.5032211723327638, "loss": 0.4961376190185547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.46287966658604, "step_time": 0.464007137298584} +{"epoch": 0, "iter": 18828, "iter_tflops": 38.87934294893028, "iter_time": 0.5306440887451173, "loss": 0.4627944529056549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.50649082610766, "step_time": 0.4853633670806885} +{"epoch": 0, "iter": 18829, "iter_tflops": 12.99844832707912, "iter_time": 1.5871966400146487, "loss": 0.5577474236488342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.201416941798223, "step_time": 1.4527489471435546} +{"epoch": 0, "iter": 18830, "iter_tflops": 21.295013047561334, "iter_time": 0.9688227691650391, "loss": 0.5366278886795044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.414841614014577, "step_time": 0.6783232269287108} +{"epoch": 0, "iter": 18831, "iter_tflops": 49.11907793936626, "iter_time": 0.4200220031738282, "loss": 0.5989207029342651, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.09211416008893, "step_time": 0.38859054374694824} +{"epoch": 0, "iter": 18832, "iter_tflops": 47.07909652892726, "iter_time": 0.4382219505310058, "loss": 0.5661402344703674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.95026610953312, "step_time": 0.404926118850708} +{"epoch": 0, "iter": 18833, "iter_tflops": 47.11005219800349, "iter_time": 0.43793399810791017, "loss": 0.12842245399951935, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.14548157063031, "step_time": 0.3956448936462402} +{"epoch": 0, "iter": 18834, "iter_tflops": 46.57667192127591, "iter_time": 0.44294906997680666, "loss": 0.09566327929496765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.450339188408876, "step_time": 0.40893864822387693} +{"epoch": 0, "iter": 18835, "iter_tflops": 50.77231182485616, "iter_time": 0.40634536361694334, "loss": 0.14795167744159698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.25136413720075, "step_time": 0.3734042377471924} +{"epoch": 0, "iter": 18836, "iter_tflops": 51.72891184780325, "iter_time": 0.39883099746704104, "loss": 0.11171663552522659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.863952331541604, "step_time": 0.3693095932006836} +{"epoch": 0, "iter": 18837, "iter_tflops": 33.33333935568311, "iter_time": 0.6189326934814453, "loss": 0.575037956237793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.724047669643966, "step_time": 0.5775127639770509} +{"epoch": 0, "iter": 18838, "iter_tflops": 42.215979506930765, "iter_time": 0.4887034187316895, "loss": 0.6642549633979797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.345133833097634, "step_time": 0.4549792175292969} +{"epoch": 0, "iter": 18839, "iter_tflops": 45.342345320461156, "iter_time": 0.45500719833374026, "loss": 0.460907518863678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.91354166207171, "step_time": 0.42178694915771486} +{"epoch": 0, "iter": 18840, "iter_tflops": 42.69143551697518, "iter_time": 0.4832607116699219, "loss": 0.6875020265579224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.839498517202486, "step_time": 0.4500724086761474} +{"epoch": 0, "iter": 18841, "iter_tflops": 38.249600811771785, "iter_time": 0.5393806228637695, "loss": 0.2111734300851822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14592761213657, "step_time": 0.5014127693176269} +{"epoch": 0, "iter": 18842, "iter_tflops": 13.657000306232156, "iter_time": 1.5106606903076172, "loss": 0.21944597363471985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.669356306206794, "step_time": 1.316652267456055} +{"epoch": 0, "iter": 18843, "iter_tflops": 15.496676579496876, "iter_time": 1.331323745727539, "loss": 0.1499423086643219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.542425484234354, "step_time": 1.1126426544189454} +{"epoch": 0, "iter": 18844, "iter_tflops": 22.007122992753988, "iter_time": 0.9374734497070313, "loss": 0.17928524315357208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.564154255549496, "step_time": 0.7766516227722169} +{"epoch": 0, "iter": 18845, "iter_tflops": 18.67401418593144, "iter_time": 0.8180119781494141, "loss": 0.36851444840431213, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.740835270743105, "step_time": 0.7738055191040039} +{"epoch": 0, "iter": 18846, "iter_tflops": 12.248943086681908, "iter_time": 1.2470926818847654, "loss": 0.320913702249527, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 13.638552592006807, "step_time": 1.1200284767150879} +{"epoch": 0, "iter": 18847, "iter_tflops": 24.797743920541166, "iter_time": 0.6160063323974609, "loss": 0.19461481273174286, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 26.66519505604818, "step_time": 0.5728653869628906} +{"epoch": 0, "iter": 18848, "iter_tflops": 22.338051679685726, "iter_time": 0.6838361511230467, "loss": 0.1900327056646347, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 23.957740393301115, "step_time": 0.6376046752929687} +{"epoch": 0, "iter": 18849, "iter_tflops": 1.598193635092909, "iter_time": 1.1773409271240236, "loss": 0.030614808201789856, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 1.713455972441731, "step_time": 1.0981424713134766} +{"epoch": 0, "iter": 18850, "iter_tflops": 1.794696542084926, "iter_time": 1.048432830810547, "loss": 0.023457471281290054, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 2.3922938541452035, "step_time": 0.7865332984924316} +{"epoch": 0, "iter": 18851, "iter_tflops": 2.860292926197042, "iter_time": 0.6578412857055664, "loss": 0.053997673094272614, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 3.1406377074419756, "step_time": 0.5991199722290039} +{"epoch": 0, "iter": 18852, "iter_tflops": 3.6405915146616636, "iter_time": 0.5168442459106446, "loss": 0.020677370950579643, "lr": 3e-05, "seqlen": 768.0, "step_tflops": 3.965133426493283, "step_time": 0.47454109954833984} +{"epoch": 0, "iter": 18853, "iter_tflops": 17.61718246147184, "iter_time": 1.1710779266357423, "loss": 0.09346040338277817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.710114005932624, "step_time": 1.1026706466674805} +{"epoch": 0, "iter": 18854, "iter_tflops": 11.259319490363803, "iter_time": 1.8323570556640625, "loss": 0.06817840784788132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.410510832974484, "step_time": 1.5384271163940428} +{"epoch": 0, "iter": 18855, "iter_tflops": 10.557426867235472, "iter_time": 1.954178207397461, "loss": 0.08273988962173462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.593145392327413, "step_time": 1.6382796249389648} +{"epoch": 0, "iter": 18856, "iter_tflops": 38.9459910366049, "iter_time": 0.5297360000610352, "loss": 0.0779973641037941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.13404127883603, "step_time": 0.3741988258361816} +{"epoch": 0, "iter": 18857, "iter_tflops": 18.735248079356957, "iter_time": 0.7913461608886719, "loss": 0.2553614675998688, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 19.95269287230078, "step_time": 0.743060935974121} +{"epoch": 0, "iter": 18858, "iter_tflops": 6.508368508771589, "iter_time": 2.27800048828125, "loss": 0.2947797477245331, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 8.887301521619463, "step_time": 1.668230407714844} +{"epoch": 0, "iter": 18859, "iter_tflops": 7.79145426512201, "iter_time": 1.9028625640869143, "loss": 0.24909590184688568, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 8.645021387028148, "step_time": 1.714983222961426} +{"epoch": 0, "iter": 18860, "iter_tflops": 24.13073795106713, "iter_time": 0.6144058532714844, "loss": 0.18847571313381195, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 25.736381652056803, "step_time": 0.5760742454528809} +{"epoch": 0, "iter": 18861, "iter_tflops": 18.22253557851728, "iter_time": 0.8809598541259765, "loss": 0.14108827710151672, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 19.229043788011523, "step_time": 0.8348476638793945} +{"epoch": 0, "iter": 18862, "iter_tflops": 9.769334085052046, "iter_time": 1.6432360839843752, "loss": 0.22321616113185883, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 11.580586956861602, "step_time": 1.3862269973754884} +{"epoch": 0, "iter": 18863, "iter_tflops": 24.281639486865263, "iter_time": 0.6611300811767579, "loss": 0.14600618183612823, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.224070652816625, "step_time": 0.6121598167419434} +{"epoch": 0, "iter": 18864, "iter_tflops": 25.338833400847165, "iter_time": 0.6335462265014649, "loss": 0.20481498539447784, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.22293459406307, "step_time": 0.5896984481811524} +{"epoch": 0, "iter": 18865, "iter_tflops": 21.17653269600719, "iter_time": 0.9742432250976563, "loss": 0.6052663326263428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.788193955636228, "step_time": 0.9053413162231445} +{"epoch": 0, "iter": 18866, "iter_tflops": 44.48635267190421, "iter_time": 0.46376230621337894, "loss": 0.5428680777549744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.14416516320918, "step_time": 0.4285273914337159} +{"epoch": 0, "iter": 18867, "iter_tflops": 44.27825172804151, "iter_time": 0.4659419174194336, "loss": 0.6642393469810486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.92228865477944, "step_time": 0.43051144027709964} +{"epoch": 0, "iter": 18868, "iter_tflops": 46.84206423013645, "iter_time": 0.4404394607543945, "loss": 0.5647919774055481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.370551805728525, "step_time": 0.4095864105224609} +{"epoch": 0, "iter": 18869, "iter_tflops": 43.241329321990804, "iter_time": 0.4771151542663574, "loss": 0.0989871472120285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.268212368485074, "step_time": 0.4364686641693115} +{"epoch": 0, "iter": 18870, "iter_tflops": 36.76820875135074, "iter_time": 0.5611122817993164, "loss": 0.13406427204608917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.10556066522931, "step_time": 0.5019051723480225} +{"epoch": 0, "iter": 18871, "iter_tflops": 43.340282334015725, "iter_time": 0.476025821685791, "loss": 0.11658047139644623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.361891874672814, "step_time": 0.43560535049438487} +{"epoch": 0, "iter": 18872, "iter_tflops": 41.29502058176514, "iter_time": 0.4996024513244629, "loss": 0.13353833556175232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14783710911047, "step_time": 0.4569674835205078} +{"epoch": 0, "iter": 18873, "iter_tflops": 20.407001060344076, "iter_time": 1.010981155395508, "loss": 0.02897423692047596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.0071754690319, "step_time": 0.9374712142944337} +{"epoch": 0, "iter": 18874, "iter_tflops": 15.436287974645397, "iter_time": 1.3365320434570311, "loss": 0.03696517273783684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.623644687012032, "step_time": 1.1077903308868406} +{"epoch": 0, "iter": 18875, "iter_tflops": 51.154603418296496, "iter_time": 0.40330863952636714, "loss": 0.035612113773822784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.11244150051334, "step_time": 0.3676741371154785} +{"epoch": 0, "iter": 18876, "iter_tflops": 48.259974227252314, "iter_time": 0.42749905776977537, "loss": 0.031798187643289566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.744399854526364, "step_time": 0.3911523036956787} +{"epoch": 0, "iter": 18877, "iter_tflops": 41.96880351303583, "iter_time": 0.49158164596557613, "loss": 0.3188731372356415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83106191764953, "step_time": 0.45015525817871094} +{"epoch": 0, "iter": 18878, "iter_tflops": 44.70925258926395, "iter_time": 0.46145019912719726, "loss": 0.3488084375858307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.486666477380055, "step_time": 0.42550034904479983} +{"epoch": 0, "iter": 18879, "iter_tflops": 42.83880791380564, "iter_time": 0.48159821701049804, "loss": 0.3262846767902374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92816627163252, "step_time": 0.44920351028442385} +{"epoch": 0, "iter": 18880, "iter_tflops": 48.38701342220816, "iter_time": 0.42637666702270505, "loss": 0.44147205352783203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.079616751981746, "step_time": 0.396145263671875} +{"epoch": 0, "iter": 18881, "iter_tflops": 30.844768775409815, "iter_time": 0.6688684768676758, "loss": 0.5116502642631531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.64895159987131, "step_time": 0.6319067687988281} +{"epoch": 0, "iter": 18882, "iter_tflops": 13.687604968944754, "iter_time": 1.5072829437255861, "loss": 0.5319517850875854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.241774616720726, "step_time": 1.1965759887695313} +{"epoch": 0, "iter": 18883, "iter_tflops": 36.313588927433194, "iter_time": 0.5681370010375977, "loss": 0.52386873960495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.790611964323034, "step_time": 0.5184914855957031} +{"epoch": 0, "iter": 18884, "iter_tflops": 41.05620809444261, "iter_time": 0.5025084991455079, "loss": 0.5605393648147583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52596722035707, "step_time": 0.46334969902038575} +{"epoch": 0, "iter": 18885, "iter_tflops": 20.11647820089488, "iter_time": 1.0255817794799806, "loss": 0.5478004217147827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.39584579850359, "step_time": 0.9642569732666016} +{"epoch": 0, "iter": 18886, "iter_tflops": 18.2454211711113, "iter_time": 1.130754577636719, "loss": 0.6999693512916565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.82087755822244, "step_time": 0.9454749679565431} +{"epoch": 0, "iter": 18887, "iter_tflops": 34.85039905246495, "iter_time": 0.5919901657104493, "loss": 0.6245535612106323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.846961601081595, "step_time": 0.5451188850402832} +{"epoch": 0, "iter": 18888, "iter_tflops": 32.526811827589526, "iter_time": 0.6342796096801757, "loss": 0.4950498938560486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.093708896022484, "step_time": 0.5878858108520508} +{"epoch": 0, "iter": 18889, "iter_tflops": 30.485980108136307, "iter_time": 0.6767403717041015, "loss": 0.17519688606262207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.858824743911754, "step_time": 0.6093269233703613} +{"epoch": 0, "iter": 18890, "iter_tflops": 37.85141239788043, "iter_time": 0.5450547866821289, "loss": 0.2087285816669464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.54144238780816, "step_time": 0.4966388339996338} +{"epoch": 0, "iter": 18891, "iter_tflops": 38.99055917992895, "iter_time": 0.5291304855346679, "loss": 0.22224459052085876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.56028931998083, "step_time": 0.48474984169006347} +{"epoch": 0, "iter": 18892, "iter_tflops": 40.310588100419345, "iter_time": 0.511803337097168, "loss": 0.1545249968767166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.70092445806168, "step_time": 0.4720974159240723} +{"epoch": 0, "iter": 18893, "iter_tflops": 21.256412028863597, "iter_time": 0.9705821228027345, "loss": 0.5922870635986328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.87652500599307, "step_time": 0.9018456039428709} +{"epoch": 0, "iter": 18894, "iter_tflops": 18.700788664745055, "iter_time": 1.1032205047607424, "loss": 0.47771602869033813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.026810047734006, "step_time": 0.9811803817749025} +{"epoch": 0, "iter": 18895, "iter_tflops": 36.37857532802893, "iter_time": 0.5671220855712891, "loss": 0.6417298316955566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.41863388428373, "step_time": 0.5233842849731446} +{"epoch": 0, "iter": 18896, "iter_tflops": 39.02067558230425, "iter_time": 0.5287220993041992, "loss": 0.7164618968963623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22072556495705, "step_time": 0.4886484832763673} +{"epoch": 0, "iter": 18897, "iter_tflops": 19.255054607815893, "iter_time": 1.0714637756347656, "loss": 0.24701538681983948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.691891570425966, "step_time": 0.9970617446899416} +{"epoch": 0, "iter": 18898, "iter_tflops": 20.299975710984974, "iter_time": 1.0163112411499022, "loss": 0.262451171875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.097445192064182, "step_time": 0.790540735244751} +{"epoch": 0, "iter": 18899, "iter_tflops": 47.00444164275777, "iter_time": 0.4389179573059082, "loss": 0.17786170542240143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.24383681329937, "step_time": 0.40260633850097655} +{"epoch": 0, "iter": 18900, "iter_tflops": 50.11675673136076, "iter_time": 0.411660587310791, "loss": 0.32212796807289124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.132849321740494, "step_time": 0.3811196670532227} +{"epoch": 0, "iter": 18901, "iter_tflops": 30.26689603271619, "iter_time": 0.6816388931274413, "loss": 0.6189180016517639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.27756947766248, "step_time": 0.639177417755127} +{"epoch": 0, "iter": 18902, "iter_tflops": 35.68137515869093, "iter_time": 0.5782034301757812, "loss": 0.5958855152130127, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.03095377593704, "step_time": 0.5285828685760499} +{"epoch": 0, "iter": 18903, "iter_tflops": 35.28948894124547, "iter_time": 0.5846243209838866, "loss": 0.4662458002567291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.42061234346306, "step_time": 0.5369798202514648} +{"epoch": 0, "iter": 18904, "iter_tflops": 39.53227942175264, "iter_time": 0.5218796844482423, "loss": 0.7101986408233643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.85473704928521, "step_time": 0.4814192066192627} +{"epoch": 0, "iter": 18905, "iter_tflops": 28.551449792820872, "iter_time": 0.7225935516357421, "loss": 0.2615025043487549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.171194413435316, "step_time": 0.6618640670776368} +{"epoch": 0, "iter": 18906, "iter_tflops": 36.0122882133157, "iter_time": 0.5728903808593749, "loss": 0.2702193856239319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.65402847226196, "step_time": 0.5202773666381836} +{"epoch": 0, "iter": 18907, "iter_tflops": 42.54675112133722, "iter_time": 0.4849040870666504, "loss": 0.31084972620010376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.817806943456674, "step_time": 0.44066766166687016} +{"epoch": 0, "iter": 18908, "iter_tflops": 37.025387458574905, "iter_time": 0.5572147903442384, "loss": 0.2974918782711029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.4584866770473, "step_time": 0.5099324073791504} +{"epoch": 0, "iter": 18909, "iter_tflops": 35.48318023486591, "iter_time": 0.5814330444335938, "loss": 0.0198528915643692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.72516935521854, "step_time": 0.5193456401824951} +{"epoch": 0, "iter": 18910, "iter_tflops": 39.57445105359542, "iter_time": 0.5213235549926758, "loss": 0.049806270748376846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99460861053625, "step_time": 0.46894594955444335} +{"epoch": 0, "iter": 18911, "iter_tflops": 41.226828485100086, "iter_time": 0.5004288291931153, "loss": 0.03352842107415199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.56977584300036, "step_time": 0.4527363395690918} +{"epoch": 0, "iter": 18912, "iter_tflops": 47.32650910001927, "iter_time": 0.435931022644043, "loss": 0.029703468084335327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.945874279912225, "step_time": 0.3971651992797852} +{"epoch": 0, "iter": 18913, "iter_tflops": 24.32129656204106, "iter_time": 0.8482727661132813, "loss": 0.08574728667736053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.766790145570063, "step_time": 0.800685432434082} +{"epoch": 0, "iter": 18914, "iter_tflops": 12.180063676742186, "iter_time": 1.6938411865234375, "loss": 0.07726102322340012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.684323546969825, "step_time": 1.2365555877685546} +{"epoch": 0, "iter": 18915, "iter_tflops": 11.063407691605363, "iter_time": 1.8648045959472657, "loss": 0.10877656936645508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.8727855904058, "step_time": 1.6026906814575195} +{"epoch": 0, "iter": 18916, "iter_tflops": 24.840073746453612, "iter_time": 0.8305568542480469, "loss": 0.05696914717555046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.24752030866788, "step_time": 0.639773021697998} +{"epoch": 0, "iter": 18917, "iter_tflops": 20.495622531276243, "iter_time": 0.753290786743164, "loss": 0.1968967318534851, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.93025297326962, "step_time": 0.7040121078491209} +{"epoch": 0, "iter": 18918, "iter_tflops": 9.264394162557672, "iter_time": 1.6665054779052735, "loss": 0.22630470991134644, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 11.23098374982958, "step_time": 1.374693790435791} +{"epoch": 0, "iter": 18919, "iter_tflops": 21.7064855464315, "iter_time": 0.7112696151733399, "loss": 0.23796962201595306, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 23.362005772625132, "step_time": 0.6608663558959962} +{"epoch": 0, "iter": 18920, "iter_tflops": 24.49792321199832, "iter_time": 0.6302233657836913, "loss": 0.2796775996685028, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 26.290620401328642, "step_time": 0.5872498779296875} +{"epoch": 0, "iter": 18921, "iter_tflops": 25.230362255219394, "iter_time": 0.8177089691162109, "loss": 0.2071579247713089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.997085968988117, "step_time": 0.7641970520019531} +{"epoch": 0, "iter": 18922, "iter_tflops": 8.19693329908282, "iter_time": 2.516928314208984, "loss": 0.3388117253780365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.167359843505224, "step_time": 2.0291495361328122} +{"epoch": 0, "iter": 18923, "iter_tflops": 14.28542027745009, "iter_time": 1.4442062683105468, "loss": 0.360231876373291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.91248688498972, "step_time": 1.2965348320007326} +{"epoch": 0, "iter": 18924, "iter_tflops": 38.71006796312584, "iter_time": 0.5329645385742188, "loss": 0.3444388508796692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47806791680467, "step_time": 0.4856881332397461} +{"epoch": 0, "iter": 18925, "iter_tflops": 14.19272191627233, "iter_time": 1.0475021514892577, "loss": 0.3175579905509949, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 15.291703840142237, "step_time": 0.9722204208374023} +{"epoch": 0, "iter": 18926, "iter_tflops": 23.738360779339782, "iter_time": 0.6262819442749025, "loss": 0.26889750361442566, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 25.672697069223744, "step_time": 0.5790940742492675} +{"epoch": 0, "iter": 18927, "iter_tflops": 26.838801018266395, "iter_time": 0.5539333419799805, "loss": 0.17602021992206573, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 28.628796586123745, "step_time": 0.5192990455627441} +{"epoch": 0, "iter": 18928, "iter_tflops": 26.157230500425324, "iter_time": 0.5683670043945312, "loss": 0.30355870723724365, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.895523953577044, "step_time": 0.5329495429992676} +{"epoch": 0, "iter": 18929, "iter_tflops": 43.34180813466451, "iter_time": 0.4760090637207032, "loss": 0.1167483776807785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.44416368392831, "step_time": 0.43484997749328613} +{"epoch": 0, "iter": 18930, "iter_tflops": 43.85015262552009, "iter_time": 0.4704908027648926, "loss": 0.15741673111915588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.78288119018821, "step_time": 0.43176746559143064} +{"epoch": 0, "iter": 18931, "iter_tflops": 48.483245468792035, "iter_time": 0.4255303726196289, "loss": 0.1175166517496109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.57338267096443, "step_time": 0.39242469215393067} +{"epoch": 0, "iter": 18932, "iter_tflops": 51.06401732157501, "iter_time": 0.4040240974426269, "loss": 0.14156031608581543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.050431319178095, "step_time": 0.37476715469360344} +{"epoch": 0, "iter": 18933, "iter_tflops": 31.860065989606895, "iter_time": 0.6475533828735351, "loss": 0.07022271305322647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.04191935233303, "step_time": 0.6060496559143066} +{"epoch": 0, "iter": 18934, "iter_tflops": 18.33906991460202, "iter_time": 1.1249803619384766, "loss": 0.0636131763458252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.380257048913904, "step_time": 0.9218434562683103} +{"epoch": 0, "iter": 18935, "iter_tflops": 44.75537378770837, "iter_time": 0.46097466659545894, "loss": 0.04272546246647835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.67132582480472, "step_time": 0.42388599777221675} +{"epoch": 0, "iter": 18936, "iter_tflops": 54.564570788303136, "iter_time": 0.3781042022705078, "loss": 0.09920121729373932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.60019221392206, "step_time": 0.34615817070007326} +{"epoch": 0, "iter": 18937, "iter_tflops": 45.046342721259215, "iter_time": 0.45799708175659176, "loss": 0.17862094938755035, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63317269993562, "step_time": 0.4156714630126953} +{"epoch": 0, "iter": 18938, "iter_tflops": 38.34178353612232, "iter_time": 0.5380838241577148, "loss": 0.12584498524665833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.18849192643227, "step_time": 0.4890218296051026} +{"epoch": 0, "iter": 18939, "iter_tflops": 41.07450537363725, "iter_time": 0.5022846488952637, "loss": 0.1595141887664795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.94320365551943, "step_time": 0.45904812812805174} +{"epoch": 0, "iter": 18940, "iter_tflops": 40.205363734401104, "iter_time": 0.5131428146362305, "loss": 0.12807945907115936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.52474013650145, "step_time": 0.47400842475891114} +{"epoch": 0, "iter": 18941, "iter_tflops": 21.815180187828613, "iter_time": 0.9457218933105469, "loss": 0.620299220085144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.74663657555655, "step_time": 0.8688006591796875} +{"epoch": 0, "iter": 18942, "iter_tflops": 12.507929342420493, "iter_time": 1.649441162109375, "loss": 0.5527917742729187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.70552905174945, "step_time": 1.1652345123291015} +{"epoch": 0, "iter": 18943, "iter_tflops": 32.82970122732773, "iter_time": 0.6284276962280273, "loss": 0.5721304416656494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.77669080124051, "step_time": 0.5766629905700683} +{"epoch": 0, "iter": 18944, "iter_tflops": 36.28471000874439, "iter_time": 0.5685891799926758, "loss": 0.6072721481323242, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.10172144714607, "step_time": 0.527626220703125} +{"epoch": 0, "iter": 18945, "iter_tflops": 24.718638044226907, "iter_time": 0.8346371459960937, "loss": 0.12154857814311981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.465640816204488, "step_time": 0.7795425643920899} +{"epoch": 0, "iter": 18946, "iter_tflops": 7.765996325984882, "iter_time": 2.65659326171875, "loss": 0.07959652692079544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.053765893182147, "step_time": 2.2787306137084964} +{"epoch": 0, "iter": 18947, "iter_tflops": 30.28155488587223, "iter_time": 0.6813089218139649, "loss": 0.09838201850652695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.55726367150235, "step_time": 0.5643500480651855} +{"epoch": 0, "iter": 18948, "iter_tflops": 43.562405816309486, "iter_time": 0.47359857940673833, "loss": 0.14010369777679443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.562895563486876, "step_time": 0.4337644557952881} +{"epoch": 0, "iter": 18949, "iter_tflops": 11.373027610383573, "iter_time": 1.2749037628173827, "loss": 0.2572021186351776, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 12.172133506280845, "step_time": 1.1912057723999023} +{"epoch": 0, "iter": 18950, "iter_tflops": 10.183052935039763, "iter_time": 1.4238869018554687, "loss": 0.20002692937850952, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 12.12887555490843, "step_time": 1.1954542388916014} +{"epoch": 0, "iter": 18951, "iter_tflops": 25.575955373176694, "iter_time": 0.566919807434082, "loss": 0.33462831377983093, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.299104576438666, "step_time": 0.53113521194458} +{"epoch": 0, "iter": 18952, "iter_tflops": 24.91015208860393, "iter_time": 0.5820725479125977, "loss": 0.1675083488225937, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.491026519729278, "step_time": 0.5473368759155273} +{"epoch": 0, "iter": 18953, "iter_tflops": 22.342970641396537, "iter_time": 0.8344629974365234, "loss": 0.05240736901760101, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 23.500059897203304, "step_time": 0.7933759460449218} +{"epoch": 0, "iter": 18954, "iter_tflops": 17.07589178890898, "iter_time": 1.09185408782959, "loss": 0.023021742701530457, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 23.021474880531514, "step_time": 0.8098691482543946} +{"epoch": 0, "iter": 18955, "iter_tflops": 50.07538429664196, "iter_time": 0.37232629394531247, "loss": 0.01773148402571678, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 54.97050190966581, "step_time": 0.33917067527770994} +{"epoch": 0, "iter": 18956, "iter_tflops": 46.062669052800324, "iter_time": 0.4047612228393555, "loss": 0.03058622218668461, "lr": 3e-05, "seqlen": 7424.0, "step_tflops": 49.8496303660472, "step_time": 0.37401244735717776} +{"epoch": 0, "iter": 18957, "iter_tflops": 36.89644711833139, "iter_time": 0.5591620635986329, "loss": 0.6167057156562805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.7593601341999, "step_time": 0.5188990325927734} +{"epoch": 0, "iter": 18958, "iter_tflops": 13.581563469473965, "iter_time": 1.5190514373779298, "loss": 0.6635951399803162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.749688185123205, "step_time": 1.0446288223266602} +{"epoch": 0, "iter": 18959, "iter_tflops": 34.76808257924531, "iter_time": 0.5933917541503906, "loss": 0.5622263550758362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.074715576798766, "step_time": 0.5418581123352051} +{"epoch": 0, "iter": 18960, "iter_tflops": 38.750769271004465, "iter_time": 0.5324047470092773, "loss": 0.5021007657051086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.86553339151014, "step_time": 0.4927942352294922} +{"epoch": 0, "iter": 18961, "iter_tflops": 33.5450940190808, "iter_time": 0.6150256576538086, "loss": 0.5553102493286133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.16410727383201, "step_time": 0.5551349143981934} +{"epoch": 0, "iter": 18962, "iter_tflops": 38.54876093693246, "iter_time": 0.5351947250366211, "loss": 0.6467551589012146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.31192876036908, "step_time": 0.48759520339965823} +{"epoch": 0, "iter": 18963, "iter_tflops": 36.212746730211606, "iter_time": 0.5697191009521484, "loss": 0.5126761794090271, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.78848436049128, "step_time": 0.5185192108154297} +{"epoch": 0, "iter": 18964, "iter_tflops": 39.01369540652519, "iter_time": 0.5288166961669922, "loss": 0.4665304124355316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.55692392938047, "step_time": 0.4847881755828857} +{"epoch": 0, "iter": 18965, "iter_tflops": 16.729169224891113, "iter_time": 1.2332407684326172, "loss": 0.5629016757011414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.733895808665878, "step_time": 1.1633706283569334} +{"epoch": 0, "iter": 18966, "iter_tflops": 15.697462816359158, "iter_time": 1.3142947845458983, "loss": 0.5517564415931702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.567004904623325, "step_time": 1.0543817825317383} +{"epoch": 0, "iter": 18967, "iter_tflops": 44.90048900490483, "iter_time": 0.4594848289489746, "loss": 0.5013455152511597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.274082003816964, "step_time": 0.41870071792602537} +{"epoch": 0, "iter": 18968, "iter_tflops": 38.69081082297601, "iter_time": 0.5332298049926758, "loss": 0.4242575168609619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22865627831779, "step_time": 0.48855671310424803} +{"epoch": 0, "iter": 18969, "iter_tflops": 30.788706733891967, "iter_time": 0.6700863952636719, "loss": 0.24131658673286438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.53170091266713, "step_time": 0.6152713088989258} +{"epoch": 0, "iter": 18970, "iter_tflops": 10.417537878054477, "iter_time": 1.9804193420410159, "loss": 0.10294152051210403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.683178573372457, "step_time": 1.6266500854492187} +{"epoch": 0, "iter": 18971, "iter_tflops": 11.974495787786958, "iter_time": 1.7229196014404295, "loss": 0.21565639972686768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.68715594001852, "step_time": 1.4047031021118166} +{"epoch": 0, "iter": 18972, "iter_tflops": 20.015286581593337, "iter_time": 1.0307668304443358, "loss": 0.17808224260807037, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.5922118247748, "step_time": 0.8744874649047851} +{"epoch": 0, "iter": 18973, "iter_tflops": 12.498024964244772, "iter_time": 1.1601445617675783, "loss": 0.19939085841178894, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 12.971623837950524, "step_time": 1.1177872467041015} +{"epoch": 0, "iter": 18974, "iter_tflops": 11.705570834579182, "iter_time": 1.2386850585937499, "loss": 0.15214529633522034, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 13.882429360399955, "step_time": 1.0444508895874023} +{"epoch": 0, "iter": 18975, "iter_tflops": 21.912412533943947, "iter_time": 0.6617033004760742, "loss": 0.2037959098815918, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.573278248515035, "step_time": 0.6150827026367188} +{"epoch": 0, "iter": 18976, "iter_tflops": 21.85636392294981, "iter_time": 0.6634001770019531, "loss": 0.1108933612704277, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.389440623132494, "step_time": 0.619917163848877} +{"epoch": 0, "iter": 18977, "iter_tflops": 18.666585749936296, "iter_time": 1.105241943359375, "loss": 0.1482454389333725, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.9925309549177, "step_time": 1.031940055847168} +{"epoch": 0, "iter": 18978, "iter_tflops": 20.337396933177143, "iter_time": 1.014441207885742, "loss": 0.16801807284355164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.78450331132874, "step_time": 0.8324190826416016} +{"epoch": 0, "iter": 18979, "iter_tflops": 40.14907153314138, "iter_time": 0.5138622817993164, "loss": 0.11028280854225159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.76311151610357, "step_time": 0.47142656898498536} +{"epoch": 0, "iter": 18980, "iter_tflops": 41.28741331734694, "iter_time": 0.4996945037841797, "loss": 0.12405618280172348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.360850196323376, "step_time": 0.4548215789794922} +{"epoch": 0, "iter": 18981, "iter_tflops": 30.52891525502912, "iter_time": 0.6757886199951172, "loss": 0.1228501945734024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.127895126492625, "step_time": 0.6227710342407227} +{"epoch": 0, "iter": 18982, "iter_tflops": 10.706012524514085, "iter_time": 1.9270567321777343, "loss": 0.19027240574359894, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.957714594542118, "step_time": 1.5921861343383787} +{"epoch": 0, "iter": 18983, "iter_tflops": 12.860235790578193, "iter_time": 1.6042546844482422, "loss": 0.11682647466659546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.789567594500962, "step_time": 1.3066281509399413} +{"epoch": 0, "iter": 18984, "iter_tflops": 39.95090653483713, "iter_time": 0.5164111480712891, "loss": 0.17094099521636963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.884160478683114, "step_time": 0.47012619781494136} +{"epoch": 0, "iter": 18985, "iter_tflops": 17.398930793923874, "iter_time": 0.9509442749023438, "loss": 0.23033550381660461, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 18.53898237346877, "step_time": 0.8924661178588866} +{"epoch": 0, "iter": 18986, "iter_tflops": 13.2500922682934, "iter_time": 1.2487017669677734, "loss": 0.17517994344234467, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 15.279408462434974, "step_time": 1.082856948852539} +{"epoch": 0, "iter": 18987, "iter_tflops": 25.73862860432894, "iter_time": 0.6428242111206054, "loss": 0.17850641906261444, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 27.785562534801077, "step_time": 0.5954680099487305} +{"epoch": 0, "iter": 18988, "iter_tflops": 27.42664814325313, "iter_time": 0.6032605056762695, "loss": 0.22160695493221283, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 29.42037968005999, "step_time": 0.562379337310791} +{"epoch": 0, "iter": 18989, "iter_tflops": 27.56296489769461, "iter_time": 0.7485077743530273, "loss": 0.4848770499229431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.511840198939012, "step_time": 0.676166805267334} +{"epoch": 0, "iter": 18990, "iter_tflops": 38.27682209539171, "iter_time": 0.5389970321655273, "loss": 0.4297971725463867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.850632190665635, "step_time": 0.4929696979522705} +{"epoch": 0, "iter": 18991, "iter_tflops": 41.18480463345718, "iter_time": 0.500939453125, "loss": 0.4511643648147583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.92666930099593, "step_time": 0.4592170715332031} +{"epoch": 0, "iter": 18992, "iter_tflops": 39.85858004213759, "iter_time": 0.5176073379516601, "loss": 0.6441606879234314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.18310131954553, "step_time": 0.4777584953308105} +{"epoch": 0, "iter": 18993, "iter_tflops": 31.540322999029048, "iter_time": 0.5623734283447266, "loss": 0.002061850391328335, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 35.45168984888215, "step_time": 0.500327054977417} +{"epoch": 0, "iter": 18994, "iter_tflops": 39.467305502224576, "iter_time": 0.4494210929870605, "loss": 0.001743206987157464, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 43.79620361617868, "step_time": 0.4049994773864746} +{"epoch": 0, "iter": 18995, "iter_tflops": 42.49434271192262, "iter_time": 0.4174070816040039, "loss": 0.01215670071542263, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 46.9424731523365, "step_time": 0.37785481643676755} +{"epoch": 0, "iter": 18996, "iter_tflops": 42.69590562732687, "iter_time": 0.4154365463256836, "loss": 0.014137090183794498, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 47.00857449936828, "step_time": 0.3773234939575196} +{"epoch": 0, "iter": 18997, "iter_tflops": 19.838170586088868, "iter_time": 1.0399695587158204, "loss": 0.6131113767623901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.98254815005887, "step_time": 0.983250144958496} +{"epoch": 0, "iter": 18998, "iter_tflops": 14.97350807278658, "iter_time": 1.3778396759033202, "loss": 0.6760303974151611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.057807453002745, "step_time": 1.142502685546875} +{"epoch": 0, "iter": 18999, "iter_tflops": 42.30156882699362, "iter_time": 0.4877146186828613, "loss": 0.6104416847229004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.474332598317616, "step_time": 0.45368655967712396} +{"epoch": 0, "iter": 19000, "iter_tflops": 43.094649972319935, "iter_time": 0.4787390899658203, "loss": 0.5916799306869507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.14341520806068, "step_time": 0.44710807418823245} +{"epoch": 0, "iter": 19001, "iter_tflops": 43.047354629230455, "iter_time": 0.4792650718688965, "loss": 0.07853221893310547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.917201564810185, "step_time": 0.43973410224914544} +{"epoch": 0, "iter": 19002, "iter_tflops": 28.614918972432907, "iter_time": 0.7209908065795898, "loss": 0.0464167445898056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.983828682827696, "step_time": 0.645047649383545} +{"epoch": 0, "iter": 19003, "iter_tflops": 50.76450278109595, "iter_time": 0.406407871246338, "loss": 0.04139278829097748, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.4665448996171, "step_time": 0.37195562744140626} +{"epoch": 0, "iter": 19004, "iter_tflops": 45.36669926303564, "iter_time": 0.454762939453125, "loss": 0.07377753406763077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.165576066791104, "step_time": 0.41962476921081543} +{"epoch": 0, "iter": 19005, "iter_tflops": 28.966109076383212, "iter_time": 0.7122493896484375, "loss": 0.49971121549606323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.795959405923128, "step_time": 0.6699285850524902} +{"epoch": 0, "iter": 19006, "iter_tflops": 13.163398229759219, "iter_time": 1.5673075561523437, "loss": 0.4223976135253906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.793964178334306, "step_time": 1.3062644233703615} +{"epoch": 0, "iter": 19007, "iter_tflops": 34.51726328069626, "iter_time": 0.597703628540039, "loss": 0.569988489151001, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.944152776106414, "step_time": 0.5437226028442382} +{"epoch": 0, "iter": 19008, "iter_tflops": 35.58511455367878, "iter_time": 0.5797675170898438, "loss": 0.5489493608474731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.68156430626648, "step_time": 0.5333572692871094} +{"epoch": 0, "iter": 19009, "iter_tflops": 31.655675025634384, "iter_time": 0.6517344360351564, "loss": 0.027865679934620857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.45196154767646, "step_time": 0.5819450492858886} +{"epoch": 0, "iter": 19010, "iter_tflops": 41.991173031782694, "iter_time": 0.4913197708129883, "loss": 0.017908601090312004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.55214950821841, "step_time": 0.4431824035644531} +{"epoch": 0, "iter": 19011, "iter_tflops": 43.41691506532322, "iter_time": 0.4751856155395508, "loss": 0.017508098855614662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.16018842283202, "step_time": 0.42838481712341303} +{"epoch": 0, "iter": 19012, "iter_tflops": 46.371063941371816, "iter_time": 0.44491309356689457, "loss": 0.027434775605797768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.6604710378582, "step_time": 0.40724243354797357} +{"epoch": 0, "iter": 19013, "iter_tflops": 44.79389620520886, "iter_time": 0.46057823181152346, "loss": 0.44486990571022034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95126487558716, "step_time": 0.42146190834045416} +{"epoch": 0, "iter": 19014, "iter_tflops": 46.698156402489744, "iter_time": 0.441796745300293, "loss": 0.565479576587677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.00977021520334, "step_time": 0.39667726707458495} +{"epoch": 0, "iter": 19015, "iter_tflops": 47.33987508838782, "iter_time": 0.43580794143676754, "loss": 0.4035293161869049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32519223976929, "step_time": 0.401968168258667} +{"epoch": 0, "iter": 19016, "iter_tflops": 44.49597014748348, "iter_time": 0.46366206741333005, "loss": 0.49959617853164673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.8683004470267, "step_time": 0.43099699211120607} +{"epoch": 0, "iter": 19017, "iter_tflops": 25.263763470923003, "iter_time": 0.8166278762817383, "loss": 0.24254943430423737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.59292462375551, "step_time": 0.7758113784790038} +{"epoch": 0, "iter": 19018, "iter_tflops": 15.442922240264153, "iter_time": 1.3359578704833983, "loss": 0.2098292112350464, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.129493596238444, "step_time": 1.137985095977783} +{"epoch": 0, "iter": 19019, "iter_tflops": 38.08771681609931, "iter_time": 0.5416731491088866, "loss": 0.3285648226737976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.740113420737295, "step_time": 0.49427497482299804} +{"epoch": 0, "iter": 19020, "iter_tflops": 42.77415612442629, "iter_time": 0.4823261375427246, "loss": 0.19409048557281494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81003244683272, "step_time": 0.44074085044860845} +{"epoch": 0, "iter": 19021, "iter_tflops": 17.033409958361524, "iter_time": 1.2112133483886718, "loss": 0.0021957498975098133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.16323699003413, "step_time": 1.135870964050293} +{"epoch": 0, "iter": 19022, "iter_tflops": 17.869304391456872, "iter_time": 1.154554931640625, "loss": 0.0009322986006736755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.04576897287223, "step_time": 0.7628214797973633} +{"epoch": 0, "iter": 19023, "iter_tflops": 48.4182817698883, "iter_time": 0.4261013145446777, "loss": 0.03324470669031143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57570814609314, "step_time": 0.3850829830169678} +{"epoch": 0, "iter": 19024, "iter_tflops": 41.803250026822624, "iter_time": 0.4935284576416016, "loss": 0.019497839733958244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.4130347780377, "step_time": 0.44451076316833493} +{"epoch": 0, "iter": 19025, "iter_tflops": 20.373302653233356, "iter_time": 1.0126533660888672, "loss": 0.004582828842103481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.688421081213868, "step_time": 0.9512492141723633} +{"epoch": 0, "iter": 19026, "iter_tflops": 15.384131092694803, "iter_time": 1.3410632934570312, "loss": 0.002543632872402668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.389421187814015, "step_time": 1.0118528289794921} +{"epoch": 0, "iter": 19027, "iter_tflops": 43.65229222809518, "iter_time": 0.47262337112426756, "loss": 0.0006514183478429914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.49757320004174, "step_time": 0.4254046573638916} +{"epoch": 0, "iter": 19028, "iter_tflops": 47.93975759527502, "iter_time": 0.43035456466674804, "loss": 0.008094565011560917, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.07777534677984, "step_time": 0.38869552040100097} +{"epoch": 0, "iter": 19029, "iter_tflops": 1.4197028223898147, "iter_time": 1.1592721099853516, "loss": 0.1604844033718109, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 1.4899564603302191, "step_time": 1.1046107254028321} +{"epoch": 0, "iter": 19030, "iter_tflops": 1.9695973521148198, "iter_time": 0.8356133728027343, "loss": 0.060222793370485306, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 2.5791839620317925, "step_time": 0.6381172924041748} +{"epoch": 0, "iter": 19031, "iter_tflops": 3.964977452760938, "iter_time": 0.41508984756469725, "loss": 0.1240893304347992, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.303736459824608, "step_time": 0.38241697692871096} +{"epoch": 0, "iter": 19032, "iter_tflops": 3.79355375269032, "iter_time": 0.4338469924926758, "loss": 0.27613890171051025, "lr": 3e-05, "seqlen": 672.0, "step_tflops": 4.104541085958895, "step_time": 0.4009758586883545} +{"epoch": 0, "iter": 19033, "iter_tflops": 43.30433573408361, "iter_time": 0.4764209671020508, "loss": 0.45288097858428955, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.728881278905675, "step_time": 0.4322559623718261} +{"epoch": 0, "iter": 19034, "iter_tflops": 42.366951038579515, "iter_time": 0.48696195983886714, "loss": 0.2914901673793793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.15427797312353, "step_time": 0.4470028438568115} +{"epoch": 0, "iter": 19035, "iter_tflops": 47.67399110392055, "iter_time": 0.4327536468505859, "loss": 0.3622781038284302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.32017362967417, "step_time": 0.4020074768066406} +{"epoch": 0, "iter": 19036, "iter_tflops": 46.78993247992275, "iter_time": 0.4409301834106445, "loss": 0.22751465439796448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.22364906519467, "step_time": 0.4107844390869141} +{"epoch": 0, "iter": 19037, "iter_tflops": 37.58804299666314, "iter_time": 0.5488738403320312, "loss": 0.4120009243488312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.621112465434855, "step_time": 0.5078909034729004} +{"epoch": 0, "iter": 19038, "iter_tflops": 42.23246758802012, "iter_time": 0.4885126228332519, "loss": 0.6694409847259521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.97175796369398, "step_time": 0.44877756309509276} +{"epoch": 0, "iter": 19039, "iter_tflops": 44.58011933799202, "iter_time": 0.46278686141967773, "loss": 0.6768109202384949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.15602327891167, "step_time": 0.4284218692779541} +{"epoch": 0, "iter": 19040, "iter_tflops": 47.35755762408599, "iter_time": 0.4356452178955078, "loss": 0.5715433359146118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66841875724343, "step_time": 0.40717855453491214} +{"epoch": 0, "iter": 19041, "iter_tflops": 27.49146435612024, "iter_time": 0.7504545135498046, "loss": 0.40771764516830444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.1054069033898, "step_time": 0.7088405799865722} +{"epoch": 0, "iter": 19042, "iter_tflops": 16.089210678715386, "iter_time": 1.2822937011718751, "loss": 0.3826737701892853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.876850334279315, "step_time": 0.9018327789306642} +{"epoch": 0, "iter": 19043, "iter_tflops": 41.27909364858648, "iter_time": 0.4997952156066895, "loss": 0.2856043875217438, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.14811054264507, "step_time": 0.4569647159576416} +{"epoch": 0, "iter": 19044, "iter_tflops": 36.04760762457601, "iter_time": 0.5723290634155273, "loss": 0.3270662724971771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.20189805729455, "step_time": 0.5262779235839844} +{"epoch": 0, "iter": 19045, "iter_tflops": 19.765682293764012, "iter_time": 1.0437835235595703, "loss": 0.3591781556606293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.134353034382805, "step_time": 0.9761876068115234} +{"epoch": 0, "iter": 19046, "iter_tflops": 20.317462371052788, "iter_time": 1.0154365310668947, "loss": 0.45212507247924805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.767919093134385, "step_time": 0.9061475238800049} +{"epoch": 0, "iter": 19047, "iter_tflops": 44.87131046184472, "iter_time": 0.4597836189270019, "loss": 0.31231164932250977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.432120924427274, "step_time": 0.42597955894470213} +{"epoch": 0, "iter": 19048, "iter_tflops": 50.04864073280524, "iter_time": 0.41222085571289063, "loss": 0.41793081164360046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.12569302127016, "step_time": 0.38117005729675296} +{"epoch": 0, "iter": 19049, "iter_tflops": 25.043293354718607, "iter_time": 0.8238171081542971, "loss": 0.32801780104637146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.46852272321072, "step_time": 0.7794576873779296} +{"epoch": 0, "iter": 19050, "iter_tflops": 12.25039179727833, "iter_time": 1.6841170349121095, "loss": 0.142317533493042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.499760321464127, "step_time": 1.178935775756836} +{"epoch": 0, "iter": 19051, "iter_tflops": 35.033842254607364, "iter_time": 0.5888904037475586, "loss": 0.26945987343788147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.25830445369779, "step_time": 0.5392579154968261} +{"epoch": 0, "iter": 19052, "iter_tflops": 37.69873503018796, "iter_time": 0.5472622222900391, "loss": 0.2642296552658081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.22089944885214, "step_time": 0.5005008087158204} +{"epoch": 0, "iter": 19053, "iter_tflops": 19.364676301204074, "iter_time": 1.0653983154296873, "loss": 0.21888592839241028, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.67715931589837, "step_time": 0.9977721405029297} +{"epoch": 0, "iter": 19054, "iter_tflops": 20.575530535716382, "iter_time": 1.002700439453125, "loss": 0.1943691372871399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.954842589878083, "step_time": 0.7380150127410889} +{"epoch": 0, "iter": 19055, "iter_tflops": 48.66498295037785, "iter_time": 0.4239412460327149, "loss": 0.20336249470710754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.7443309266701, "step_time": 0.39115281486511233} +{"epoch": 0, "iter": 19056, "iter_tflops": 47.32992102810791, "iter_time": 0.43589959716796867, "loss": 0.17375189065933228, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.55247587634356, "step_time": 0.40019597816467284} +{"epoch": 0, "iter": 19057, "iter_tflops": 43.02524963083385, "iter_time": 0.47951130294799804, "loss": 0.04913581162691116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.005601670078384, "step_time": 0.43890712547302246} +{"epoch": 0, "iter": 19058, "iter_tflops": 10.393396523941865, "iter_time": 1.9850193786621093, "loss": 0.09057746082544327, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.412587183574288, "step_time": 1.8077490386962891} +{"epoch": 0, "iter": 19059, "iter_tflops": 13.822727000823251, "iter_time": 1.4925487213134765, "loss": 0.10122979432344437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.23298032265011, "step_time": 1.1971866226196288} +{"epoch": 0, "iter": 19060, "iter_tflops": 21.19739975657967, "iter_time": 0.973284164428711, "loss": 0.06931839883327484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.741238430800582, "step_time": 0.8014802227020263} +{"epoch": 0, "iter": 19061, "iter_tflops": 15.927821562969118, "iter_time": 1.1187841644287109, "loss": 0.2706390917301178, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 17.071541921791926, "step_time": 1.043830406188965} +{"epoch": 0, "iter": 19062, "iter_tflops": 10.645562839402281, "iter_time": 1.6739175567626954, "loss": 0.3906084895133972, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 12.489585463037663, "step_time": 1.4267722969055174} +{"epoch": 0, "iter": 19063, "iter_tflops": 25.33880966640128, "iter_time": 0.7032609176635742, "loss": 0.30393847823143005, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 27.261310872318685, "step_time": 0.6536660919189453} +{"epoch": 0, "iter": 19064, "iter_tflops": 27.655374183748428, "iter_time": 0.6443519592285155, "loss": 0.2292289137840271, "lr": 3e-05, "seqlen": 7104.0, "step_tflops": 29.716730763700422, "step_time": 0.5996552810668946} +{"epoch": 0, "iter": 19065, "iter_tflops": 21.514686409001055, "iter_time": 0.9589307098388672, "loss": 0.2912980020046234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.934642788640012, "step_time": 0.8995602722167969} +{"epoch": 0, "iter": 19066, "iter_tflops": 13.962253798502006, "iter_time": 1.4776334686279298, "loss": 0.2271387279033661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.96874319611939, "step_time": 1.2158292026519775} +{"epoch": 0, "iter": 19067, "iter_tflops": 38.71934308244063, "iter_time": 0.5328368682861329, "loss": 0.39050495624542236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26940675503162, "step_time": 0.48808571243286136} +{"epoch": 0, "iter": 19068, "iter_tflops": 41.699804310965405, "iter_time": 0.49475276565551757, "loss": 0.3428153395652771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.61946242443151, "step_time": 0.45224324035644536} +{"epoch": 0, "iter": 19069, "iter_tflops": 36.1074067763082, "iter_time": 0.5713812026977538, "loss": 0.10325087606906891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67966942790554, "step_time": 0.5199411640167236} +{"epoch": 0, "iter": 19070, "iter_tflops": 46.07868425406364, "iter_time": 0.44773616790771475, "loss": 0.12479006499052048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.416208097645836, "step_time": 0.40921549415588376} +{"epoch": 0, "iter": 19071, "iter_tflops": 48.30154802666881, "iter_time": 0.427131103515625, "loss": 0.10795837640762329, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.37065999749948, "step_time": 0.3939437370300293} +{"epoch": 0, "iter": 19072, "iter_tflops": 46.79321889350837, "iter_time": 0.4408992156982422, "loss": 0.10032476484775543, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.668636879467975, "step_time": 0.40717680168151854} +{"epoch": 0, "iter": 19073, "iter_tflops": 46.60689146832316, "iter_time": 0.442661865234375, "loss": 0.04894500970840454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.17176708486826, "step_time": 0.4031733646392822} +{"epoch": 0, "iter": 19074, "iter_tflops": 44.11304470019022, "iter_time": 0.46768690872192376, "loss": 0.0943320244550705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.5847816233113, "step_time": 0.4335649509429932} +{"epoch": 0, "iter": 19075, "iter_tflops": 53.056006826274896, "iter_time": 0.3888549995422363, "loss": 0.0933876782655716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.88340574281415, "step_time": 0.35642501068115234} +{"epoch": 0, "iter": 19076, "iter_tflops": 55.41803859632901, "iter_time": 0.3722811927795411, "loss": 0.03418682888150215, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.12378822808725, "step_time": 0.3431436061859131} +{"epoch": 0, "iter": 19077, "iter_tflops": 30.896108443747924, "iter_time": 0.6677570266723633, "loss": 0.3635728359222412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.9234711923182, "step_time": 0.6266378593444824} +{"epoch": 0, "iter": 19078, "iter_tflops": 15.777866836232969, "iter_time": 1.307597137451172, "loss": 0.39897584915161133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.626487871533822, "step_time": 1.0002232875823975} +{"epoch": 0, "iter": 19079, "iter_tflops": 37.843841661220225, "iter_time": 0.5451638259887696, "loss": 0.5423853993415833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.292482361169526, "step_time": 0.49963316154479975} +{"epoch": 0, "iter": 19080, "iter_tflops": 31.72431759895917, "iter_time": 0.6503242645263672, "loss": 0.4646659791469574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.708131018414115, "step_time": 0.5944167232513428} +{"epoch": 0, "iter": 19081, "iter_tflops": 18.513946239542467, "iter_time": 1.1143541870117186, "loss": 0.23943963646888733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.925082171673754, "step_time": 1.0354332962036135} +{"epoch": 0, "iter": 19082, "iter_tflops": 18.112750793453657, "iter_time": 1.1390370101928713, "loss": 0.2246592789888382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.66999094670808, "step_time": 0.8716139163970948} +{"epoch": 0, "iter": 19083, "iter_tflops": 44.97548885831075, "iter_time": 0.45871860504150397, "loss": 0.16485938429832458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.61313159603083, "step_time": 0.4243934268951416} +{"epoch": 0, "iter": 19084, "iter_tflops": 50.219092794471415, "iter_time": 0.4108217086791992, "loss": 0.33447742462158203, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.34340613818663, "step_time": 0.37964299583435057} +{"epoch": 0, "iter": 19085, "iter_tflops": 38.6766094126477, "iter_time": 0.5334255981445313, "loss": 0.325295627117157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.76622730157052, "step_time": 0.493965934753418} +{"epoch": 0, "iter": 19086, "iter_tflops": 43.253697859165094, "iter_time": 0.4769787216186524, "loss": 0.3271682560443878, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.54757968991147, "step_time": 0.42496646881103517} +{"epoch": 0, "iter": 19087, "iter_tflops": 49.39774166123799, "iter_time": 0.41765256500244147, "loss": 0.3602261245250702, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.43344287623081, "step_time": 0.3861082572937012} +{"epoch": 0, "iter": 19088, "iter_tflops": 47.71608493196901, "iter_time": 0.432371883392334, "loss": 0.17940759658813477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70363347589708, "step_time": 0.3990259895324707} +{"epoch": 0, "iter": 19089, "iter_tflops": 30.03463263698424, "iter_time": 0.6869101333618164, "loss": 0.07869276404380798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.002745453846835, "step_time": 0.6446663627624512} +{"epoch": 0, "iter": 19090, "iter_tflops": 13.021120696767797, "iter_time": 1.5844330139160154, "loss": 0.09162148833274841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.88683978742612, "step_time": 1.2217261352539064} +{"epoch": 0, "iter": 19091, "iter_tflops": 41.224477593670805, "iter_time": 0.5004573669433594, "loss": 0.10150963068008423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.11433219598327, "step_time": 0.4573068580627441} +{"epoch": 0, "iter": 19092, "iter_tflops": 43.71198548048054, "iter_time": 0.47197795486450195, "loss": 0.13414713740348816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.726295600873236, "step_time": 0.4322793807983398} +{"epoch": 0, "iter": 19093, "iter_tflops": 17.891065239140453, "iter_time": 1.153150650024414, "loss": 0.17754486203193665, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.197194243124823, "step_time": 1.0746931686401369} +{"epoch": 0, "iter": 19094, "iter_tflops": 23.58434871684771, "iter_time": 0.8747790222167969, "loss": 0.21941260993480682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.96177907588906, "step_time": 0.7651977806091308} +{"epoch": 0, "iter": 19095, "iter_tflops": 44.77140812280348, "iter_time": 0.4608095741271972, "loss": 0.17230413854122162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.973238928133156, "step_time": 0.4212728004455567} +{"epoch": 0, "iter": 19096, "iter_tflops": 40.34302340895753, "iter_time": 0.5113918533325195, "loss": 0.2320398986339569, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10221782993526, "step_time": 0.46780172348022464} +{"epoch": 0, "iter": 19097, "iter_tflops": 29.963171711692173, "iter_time": 0.6885483856201172, "loss": 0.03213071823120117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.34788298334452, "step_time": 0.6186627655029296} +{"epoch": 0, "iter": 19098, "iter_tflops": 38.68454748110918, "iter_time": 0.5333161392211914, "loss": 0.0334148071706295, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.31941001034788, "step_time": 0.4762551822662353} +{"epoch": 0, "iter": 19099, "iter_tflops": 44.35986671530757, "iter_time": 0.46508465957641604, "loss": 0.03408051282167435, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.80887304000063, "step_time": 0.4226914539337158} +{"epoch": 0, "iter": 19100, "iter_tflops": 42.447811070379856, "iter_time": 0.4860343322753906, "loss": 0.0251927487552166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.74217981397005, "step_time": 0.4413806457519531} +{"epoch": 0, "iter": 19101, "iter_tflops": 26.529888217731003, "iter_time": 0.7776547470092773, "loss": 0.010667315684258938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.63456268512897, "step_time": 0.7204961967468262} +{"epoch": 0, "iter": 19102, "iter_tflops": 11.060056601759952, "iter_time": 1.865369613647461, "loss": 0.002227041870355606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.527404990602083, "step_time": 1.64687686920166} +{"epoch": 0, "iter": 19103, "iter_tflops": 12.38268274531556, "iter_time": 1.666124694824219, "loss": 0.005289163440465927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.67322311195733, "step_time": 1.3163274307250974} +{"epoch": 0, "iter": 19104, "iter_tflops": 22.012771090917532, "iter_time": 0.93723291015625, "loss": 0.018498050048947334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.091581660029103, "step_time": 0.6856101398468017} +{"epoch": 0, "iter": 19105, "iter_tflops": 18.477234302371553, "iter_time": 0.7869315185546876, "loss": 0.16839535534381866, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 19.87132915236754, "step_time": 0.7317234764099121} +{"epoch": 0, "iter": 19106, "iter_tflops": 22.76279840560813, "iter_time": 0.6387755050659181, "loss": 0.26455631852149963, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 24.482911015606614, "step_time": 0.5938966178894043} +{"epoch": 0, "iter": 19107, "iter_tflops": 27.43322560655606, "iter_time": 0.5300258255004883, "loss": 0.26851552724838257, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 29.178977344551093, "step_time": 0.49831486129760744} +{"epoch": 0, "iter": 19108, "iter_tflops": 26.51258096483943, "iter_time": 0.5484308776855468, "loss": 0.3060627579689026, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 28.234041782955703, "step_time": 0.5149924392700195} +{"epoch": 0, "iter": 19109, "iter_tflops": 23.103978386894294, "iter_time": 0.6417105484008789, "loss": 0.1449129432439804, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 24.674150051904622, "step_time": 0.6008744621276855} +{"epoch": 0, "iter": 19110, "iter_tflops": 25.288398545941114, "iter_time": 0.5862793807983397, "loss": 0.18711522221565247, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 27.004446710715502, "step_time": 0.5490231590270995} +{"epoch": 0, "iter": 19111, "iter_tflops": 26.57784191275447, "iter_time": 0.5578356094360353, "loss": 0.14352507889270782, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.35925734513416, "step_time": 0.5227946014404297} +{"epoch": 0, "iter": 19112, "iter_tflops": 26.995918009168044, "iter_time": 0.5491966094970704, "loss": 0.14063605666160583, "lr": 3e-05, "seqlen": 5936.0, "step_tflops": 28.672364936484207, "step_time": 0.5170855865478516} +{"epoch": 0, "iter": 19113, "iter_tflops": 29.095049115474136, "iter_time": 0.7090929260253906, "loss": 0.4598495066165924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.84387638855207, "step_time": 0.6688878288269042} +{"epoch": 0, "iter": 19114, "iter_tflops": 8.508648151186165, "iter_time": 2.4247204895019534, "loss": 0.5122575759887695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.5402153716005, "step_time": 1.957369255065918} +{"epoch": 0, "iter": 19115, "iter_tflops": 16.64095872636786, "iter_time": 1.2397779388427734, "loss": 0.6498076915740967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.37540323810477, "step_time": 1.0648084716796875} +{"epoch": 0, "iter": 19116, "iter_tflops": 39.582038908699325, "iter_time": 0.521223617553711, "loss": 0.49590620398521423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33326261862415, "step_time": 0.48734947967529296} +{"epoch": 0, "iter": 19117, "iter_tflops": 23.82805062592589, "iter_time": 0.6085061645507812, "loss": 0.24393779039382935, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 25.431606025240402, "step_time": 0.5701376342773438} +{"epoch": 0, "iter": 19118, "iter_tflops": 25.80350390584157, "iter_time": 0.5619204177856445, "loss": 0.14513185620307922, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.622769752809027, "step_time": 0.5249117240905762} +{"epoch": 0, "iter": 19119, "iter_tflops": 25.945279230089014, "iter_time": 0.5588498611450196, "loss": 0.31243816018104553, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 27.700817222401653, "step_time": 0.5234327774047852} +{"epoch": 0, "iter": 19120, "iter_tflops": 25.088289389746375, "iter_time": 0.5779395904541017, "loss": 0.1396283656358719, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 26.586207503379946, "step_time": 0.5453773612976074} +{"epoch": 0, "iter": 19121, "iter_tflops": 29.22894207287508, "iter_time": 0.7058446884155274, "loss": 0.090010866522789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.270851142462426, "step_time": 0.6597547798156738} +{"epoch": 0, "iter": 19122, "iter_tflops": 19.94643287142099, "iter_time": 1.0343249664306642, "loss": 0.05671217292547226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.02168571273955, "step_time": 0.8588528614044189} +{"epoch": 0, "iter": 19123, "iter_tflops": 40.60426301666167, "iter_time": 0.508101661682129, "loss": 0.06796146184206009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.658857683493395, "step_time": 0.46197091865539547} +{"epoch": 0, "iter": 19124, "iter_tflops": 41.50031642612507, "iter_time": 0.4971309928894043, "loss": 0.056311242282390594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3699688072353, "step_time": 0.45473016738891603} +{"epoch": 0, "iter": 19125, "iter_tflops": 17.90106406022695, "iter_time": 1.1525065460205077, "loss": 0.1711767464876175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.005407076368893, "step_time": 1.0855381011962892} +{"epoch": 0, "iter": 19126, "iter_tflops": 15.371172760305967, "iter_time": 1.34219384765625, "loss": 0.18847855925559998, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.273369509190104, "step_time": 1.0176450195312499} +{"epoch": 0, "iter": 19127, "iter_tflops": 36.19419988738423, "iter_time": 0.5700110397338867, "loss": 0.1564948856830597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.97589196858831, "step_time": 0.516088384628296} +{"epoch": 0, "iter": 19128, "iter_tflops": 42.895744087105165, "iter_time": 0.48095898437499995, "loss": 0.16845254600048065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.71235814224946, "step_time": 0.4416624279022217} +{"epoch": 0, "iter": 19129, "iter_tflops": 22.946914262932577, "iter_time": 0.8990792083740234, "loss": 0.3349214792251587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.625716099336035, "step_time": 0.8377865409851074} +{"epoch": 0, "iter": 19130, "iter_tflops": 17.888001174728153, "iter_time": 1.1533481750488281, "loss": 0.3298426866531372, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.005817796980025, "step_time": 1.0312546939849854} +{"epoch": 0, "iter": 19131, "iter_tflops": 46.56032558636841, "iter_time": 0.4431045799255371, "loss": 0.4060138761997223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.36450336260125, "step_time": 0.4096355991363526} +{"epoch": 0, "iter": 19132, "iter_tflops": 50.997702608386874, "iter_time": 0.4045494689941406, "loss": 0.4252374768257141, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.15581707536321, "step_time": 0.3740510902404785} +{"epoch": 0, "iter": 19133, "iter_tflops": 14.40739676587242, "iter_time": 1.0035621490478515, "loss": 0.00811685249209404, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 15.080156332374766, "step_time": 0.9587909927368163} +{"epoch": 0, "iter": 19134, "iter_tflops": 11.722987637067032, "iter_time": 1.2333646087646484, "loss": 0.006032553501427174, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 14.483119650852956, "step_time": 0.9983151702880858} +{"epoch": 0, "iter": 19135, "iter_tflops": 30.13113594094709, "iter_time": 0.4798597068786621, "loss": 0.0035693238023668528, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 33.39086292957775, "step_time": 0.4330142078399658} +{"epoch": 0, "iter": 19136, "iter_tflops": 32.930164260138056, "iter_time": 0.439072151184082, "loss": 0.00646169250831008, "lr": 3e-05, "seqlen": 5792.0, "step_tflops": 36.2775016291019, "step_time": 0.3985588150024414} +{"epoch": 0, "iter": 19137, "iter_tflops": 28.631353678530775, "iter_time": 0.7205769500732422, "loss": 0.3232504725456238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.994468076440537, "step_time": 0.6656379280090333} +{"epoch": 0, "iter": 19138, "iter_tflops": 37.45878668554026, "iter_time": 0.5507677993774414, "loss": 0.29907381534576416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.11460643659734, "step_time": 0.5017947463989259} +{"epoch": 0, "iter": 19139, "iter_tflops": 37.212306865136924, "iter_time": 0.5544158706665039, "loss": 0.25121936202049255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.785384000009216, "step_time": 0.5058452682495118} +{"epoch": 0, "iter": 19140, "iter_tflops": 38.76191681340234, "iter_time": 0.5322516326904296, "loss": 0.20971131324768066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.465378856268906, "step_time": 0.4858332614898681} +{"epoch": 0, "iter": 19141, "iter_tflops": 26.378205355852426, "iter_time": 0.7821265029907226, "loss": 0.508566677570343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.359240430382172, "step_time": 0.7274910469055176} +{"epoch": 0, "iter": 19142, "iter_tflops": 9.797683649598044, "iter_time": 2.1057113342285154, "loss": 0.4893113672733307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.212901008158099, "step_time": 1.8399425354003907} +{"epoch": 0, "iter": 19143, "iter_tflops": 12.071566944314549, "iter_time": 1.7090650787353514, "loss": 0.6021952033042908, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.838683110942215, "step_time": 1.302576316833496} +{"epoch": 0, "iter": 19144, "iter_tflops": 23.815911277704636, "iter_time": 0.8662735290527342, "loss": 0.48139962553977966, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.32850419110889, "step_time": 0.6190224857330322} +{"epoch": 0, "iter": 19145, "iter_tflops": 16.751191714331068, "iter_time": 0.9119092864990235, "loss": 0.1590486615896225, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 17.511677058980624, "step_time": 0.8723075027465821} +{"epoch": 0, "iter": 19146, "iter_tflops": 12.938112044962253, "iter_time": 1.180664321899414, "loss": 0.2245175838470459, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.729654181834132, "step_time": 0.97113179397583} +{"epoch": 0, "iter": 19147, "iter_tflops": 27.725197605160727, "iter_time": 0.5509633331298828, "loss": 0.13995133340358734, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.533471334411626, "step_time": 0.5172289810180664} +{"epoch": 0, "iter": 19148, "iter_tflops": 28.982079306533436, "iter_time": 0.5270694046020509, "loss": 0.30814868211746216, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 30.75626282888716, "step_time": 0.4966652603149414} +{"epoch": 0, "iter": 19149, "iter_tflops": 27.74990885922956, "iter_time": 0.7434652709960938, "loss": 0.5245921611785889, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.329197890436806, "step_time": 0.7034319038391112} +{"epoch": 0, "iter": 19150, "iter_tflops": 19.767959764515396, "iter_time": 1.0436632690429686, "loss": 0.6609318256378174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.05635800419327, "step_time": 0.8576150016784667} +{"epoch": 0, "iter": 19151, "iter_tflops": 37.51900139272233, "iter_time": 0.5498838653564453, "loss": 0.6765646934509277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99788404146394, "step_time": 0.503223373413086} +{"epoch": 0, "iter": 19152, "iter_tflops": 40.10731114505194, "iter_time": 0.5143973236083985, "loss": 0.7637291550636292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38515385408198, "step_time": 0.47553348731994627} +{"epoch": 0, "iter": 19153, "iter_tflops": 14.26975168676411, "iter_time": 1.292107864379883, "loss": 0.08068600296974182, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 14.987479024766126, "step_time": 1.2302308044433594} +{"epoch": 0, "iter": 19154, "iter_tflops": 15.313421695219521, "iter_time": 1.204045623779297, "loss": 0.1185511127114296, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 23.13147081280495, "step_time": 0.7970984001159668} +{"epoch": 0, "iter": 19155, "iter_tflops": 42.733137795890045, "iter_time": 0.43146979904174804, "loss": 0.048314571380615234, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 46.71326871672707, "step_time": 0.3947070903778076} +{"epoch": 0, "iter": 19156, "iter_tflops": 41.92217633068932, "iter_time": 0.43981634521484375, "loss": 0.09144673496484756, "lr": 3e-05, "seqlen": 7344.0, "step_tflops": 45.66554423143475, "step_time": 0.40376302719116214} +{"epoch": 0, "iter": 19157, "iter_tflops": 37.94686673485469, "iter_time": 0.5436837158203126, "loss": 0.386320561170578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.9291784163259, "step_time": 0.5040681076049806} +{"epoch": 0, "iter": 19158, "iter_tflops": 43.65687830924372, "iter_time": 0.4725737228393554, "loss": 0.49687203764915466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73026120805837, "step_time": 0.43224346542358394} +{"epoch": 0, "iter": 19159, "iter_tflops": 46.27494421897545, "iter_time": 0.4458372421264648, "loss": 0.4651312232017517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.99429626215463, "step_time": 0.4126689453125} +{"epoch": 0, "iter": 19160, "iter_tflops": 49.406861738290026, "iter_time": 0.41757546997070316, "loss": 0.47390857338905334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.44077081577877, "step_time": 0.3860553131103515} +{"epoch": 0, "iter": 19161, "iter_tflops": 38.18269412718961, "iter_time": 0.5403257675170899, "loss": 0.14224469661712646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.41357221785902, "step_time": 0.49817227554321286} +{"epoch": 0, "iter": 19162, "iter_tflops": 10.26475940824096, "iter_time": 2.009895477294922, "loss": 0.05342685058712959, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.36820946449219, "step_time": 1.6680743942260743} +{"epoch": 0, "iter": 19163, "iter_tflops": 14.70398631453278, "iter_time": 1.4030952606201172, "loss": 0.04474904015660286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.587256099586334, "step_time": 1.173070625305176} +{"epoch": 0, "iter": 19164, "iter_tflops": 16.930524988035973, "iter_time": 1.2185737609863283, "loss": 0.04230356216430664, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.21370631404198, "step_time": 1.0206487216949462} +{"epoch": 0, "iter": 19165, "iter_tflops": 15.235892212490121, "iter_time": 1.0563414154052735, "loss": 0.38041922450065613, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 16.150969484116022, "step_time": 0.9964915084838868} +{"epoch": 0, "iter": 19166, "iter_tflops": 10.670707587161317, "iter_time": 1.5082696075439452, "loss": 0.21094773709774017, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 13.19614132520766, "step_time": 1.2196219749450683} +{"epoch": 0, "iter": 19167, "iter_tflops": 24.0237182166256, "iter_time": 0.6699339294433593, "loss": 0.3397829830646515, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.02522946343148, "step_time": 0.6184116058349609} +{"epoch": 0, "iter": 19168, "iter_tflops": 24.789172335672774, "iter_time": 0.649247329711914, "loss": 0.20292167365550995, "lr": 3e-05, "seqlen": 6432.0, "step_tflops": 26.469366936771706, "step_time": 0.6080350914001464} +{"epoch": 0, "iter": 19169, "iter_tflops": 20.727117568829062, "iter_time": 0.9953672256469727, "loss": 0.542128324508667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.18745789173129, "step_time": 0.9298538665771484} +{"epoch": 0, "iter": 19170, "iter_tflops": 9.715790458570023, "iter_time": 2.1234601135253905, "loss": 0.567180871963501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.342399905783052, "step_time": 1.8189354705810548} +{"epoch": 0, "iter": 19171, "iter_tflops": 10.295528318075737, "iter_time": 2.0038887634277343, "loss": 0.38594770431518555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.315191900276815, "step_time": 1.5494401931762696} +{"epoch": 0, "iter": 19172, "iter_tflops": 38.21169109313439, "iter_time": 0.539915740966797, "loss": 0.5441065430641174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.768452325323594, "step_time": 0.49393962097167965} +{"epoch": 0, "iter": 19173, "iter_tflops": 17.141799626374056, "iter_time": 0.8720555267333984, "loss": 0.4248890280723572, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 18.24577250078017, "step_time": 0.8192912139892577} +{"epoch": 0, "iter": 19174, "iter_tflops": 6.074188965201618, "iter_time": 2.461003631591797, "loss": 0.30835363268852234, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 6.707817876722772, "step_time": 2.228534133911133} +{"epoch": 0, "iter": 19175, "iter_tflops": 8.602964205936555, "iter_time": 1.7376105194091798, "loss": 0.31425708532333374, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 10.109616023131831, "step_time": 1.4786517181396481} +{"epoch": 0, "iter": 19176, "iter_tflops": 22.01115829527694, "iter_time": 0.6791374130249023, "loss": 0.13961383700370789, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.63779356685617, "step_time": 0.632402557373047} +{"epoch": 0, "iter": 19177, "iter_tflops": 10.971305131950068, "iter_time": 1.2992803039550782, "loss": 0.1568540334701538, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 11.631350863669834, "step_time": 1.2255498809814451} +{"epoch": 0, "iter": 19178, "iter_tflops": 8.443843549519679, "iter_time": 1.6881886291503907, "loss": 0.15846319496631622, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 12.146023783639176, "step_time": 1.1736187019348145} +{"epoch": 0, "iter": 19179, "iter_tflops": 22.742544442667665, "iter_time": 0.626790054321289, "loss": 0.19144675135612488, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.459022154174303, "step_time": 0.5828033752441406} +{"epoch": 0, "iter": 19180, "iter_tflops": 21.415882177029744, "iter_time": 0.6656181869506835, "loss": 0.22411403059959412, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 22.950257721690054, "step_time": 0.6211172370910645} +{"epoch": 0, "iter": 19181, "iter_tflops": 22.47791679312811, "iter_time": 0.9178383255004883, "loss": 0.20831815898418427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.965354658472748, "step_time": 0.8608716125488283} +{"epoch": 0, "iter": 19182, "iter_tflops": 8.953616513605656, "iter_time": 2.3042190246582033, "loss": 0.22251026332378387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.53423570618774, "step_time": 1.7886831893920898} +{"epoch": 0, "iter": 19183, "iter_tflops": 14.543045644895214, "iter_time": 1.4186226196289065, "loss": 0.14082376658916473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.378484210247848, "step_time": 1.122567741394043} +{"epoch": 0, "iter": 19184, "iter_tflops": 41.30893218424752, "iter_time": 0.49943420028686525, "loss": 0.1816740781068802, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.22556675135182, "step_time": 0.4561820888519287} +{"epoch": 0, "iter": 19185, "iter_tflops": 15.483376813199824, "iter_time": 0.9496361694335937, "loss": 0.17884519696235657, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 16.45422893218578, "step_time": 0.8936045989990234} +{"epoch": 0, "iter": 19186, "iter_tflops": 19.581893562386732, "iter_time": 0.7508760375976563, "loss": 0.28235772252082825, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 21.480685178841153, "step_time": 0.684502124786377} +{"epoch": 0, "iter": 19187, "iter_tflops": 24.507081147903584, "iter_time": 0.5999724960327149, "loss": 0.22887861728668213, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 26.271116891999, "step_time": 0.5596859359741211} +{"epoch": 0, "iter": 19188, "iter_tflops": 22.798298423985226, "iter_time": 0.6449417572021484, "loss": 0.18878570199012756, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.442057139717154, "step_time": 0.6015686225891114} +{"epoch": 0, "iter": 19189, "iter_tflops": 27.23960601875749, "iter_time": 0.7573932418823243, "loss": 0.5938295125961304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.64563289445128, "step_time": 0.6959235305786133} +{"epoch": 0, "iter": 19190, "iter_tflops": 10.004446939758953, "iter_time": 2.0621923065185546, "loss": 0.5196914672851562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.682997782583934, "step_time": 1.7659075088500975} +{"epoch": 0, "iter": 19191, "iter_tflops": 13.956124748674673, "iter_time": 1.4782823944091799, "loss": 0.5306388139724731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.427774461773616, "step_time": 1.25586661529541} +{"epoch": 0, "iter": 19192, "iter_tflops": 43.32051672739199, "iter_time": 0.4762430152893067, "loss": 0.4157198965549469, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.764707490199406, "step_time": 0.44116802215576173} +{"epoch": 0, "iter": 19193, "iter_tflops": 17.43275197853017, "iter_time": 0.8340804748535157, "loss": 0.23032404482364655, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 18.354995481780264, "step_time": 0.7921722488403321} +{"epoch": 0, "iter": 19194, "iter_tflops": 6.85301725657591, "iter_time": 2.121739593505859, "loss": 0.12724187970161438, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 7.935704184654662, "step_time": 1.8322656326293947} +{"epoch": 0, "iter": 19195, "iter_tflops": 7.724385129317602, "iter_time": 1.882391647338867, "loss": 0.16801737248897552, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 9.867168010250039, "step_time": 1.4736060066223144} +{"epoch": 0, "iter": 19196, "iter_tflops": 16.21497531874087, "iter_time": 0.8967215652465821, "loss": 0.15681029856204987, "lr": 3e-05, "seqlen": 5824.0, "step_tflops": 19.79174302811187, "step_time": 0.7346658668518067} +{"epoch": 0, "iter": 19197, "iter_tflops": 14.314089058848353, "iter_time": 1.1386881103515625, "loss": 0.13312092423439026, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 15.29703784283328, "step_time": 1.0655189056396486} +{"epoch": 0, "iter": 19198, "iter_tflops": 14.031120935797972, "iter_time": 1.1616522369384767, "loss": 0.1310482621192932, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 19.67044557604054, "step_time": 0.8286178855895997} +{"epoch": 0, "iter": 19199, "iter_tflops": 27.370449723256108, "iter_time": 0.5955065841674805, "loss": 0.21238963305950165, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.078065574487738, "step_time": 0.560535327911377} +{"epoch": 0, "iter": 19200, "iter_tflops": 31.192880693459745, "iter_time": 0.5225321502685547, "loss": 0.24420838057994843, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 33.088387372817486, "step_time": 0.49259828948974604} +{"epoch": 0, "iter": 19201, "iter_tflops": 31.973477955031985, "iter_time": 0.6452564697265625, "loss": 0.5602153539657593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.29303304884736, "step_time": 0.6016118049621583} +{"epoch": 0, "iter": 19202, "iter_tflops": 9.436205594707834, "iter_time": 2.186376007080078, "loss": 0.7645768523216248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.563289144812252, "step_time": 1.642172943115234} +{"epoch": 0, "iter": 19203, "iter_tflops": 12.42200327942587, "iter_time": 1.66085075378418, "loss": 0.7410436272621155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.617087977222528, "step_time": 1.4114366378784178} +{"epoch": 0, "iter": 19204, "iter_tflops": 21.758177636208764, "iter_time": 0.9481995162963868, "loss": 0.4646541178226471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.899336536425544, "step_time": 0.7669740657806396} +{"epoch": 0, "iter": 19205, "iter_tflops": 16.682927194103467, "iter_time": 1.0311495208740233, "loss": 0.19223691523075104, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 17.885979764639163, "step_time": 0.9617920074462889} +{"epoch": 0, "iter": 19206, "iter_tflops": 14.8187876917055, "iter_time": 1.1608636779785155, "loss": 0.19731773436069489, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 16.76969644608281, "step_time": 1.025814178466797} +{"epoch": 0, "iter": 19207, "iter_tflops": 30.868441637777384, "iter_time": 0.5572873611450195, "loss": 0.1157701164484024, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 32.87712391178672, "step_time": 0.5232389678955078} +{"epoch": 0, "iter": 19208, "iter_tflops": 29.268103782644747, "iter_time": 0.5877590332031251, "loss": 0.26583075523376465, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 31.0192134550487, "step_time": 0.5545786132812501} +{"epoch": 0, "iter": 19209, "iter_tflops": 32.585024976830866, "iter_time": 0.633146469116211, "loss": 0.11699400097131729, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.85801950833168, "step_time": 0.5918607482910156} +{"epoch": 0, "iter": 19210, "iter_tflops": 26.032744368663675, "iter_time": 0.7925055160522461, "loss": 0.13156737387180328, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.34914266791997, "step_time": 0.6377632236480713} +{"epoch": 0, "iter": 19211, "iter_tflops": 51.58312439918859, "iter_time": 0.39995819854736325, "loss": 0.1235257089138031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.833189440265336, "step_time": 0.36951307487487794} +{"epoch": 0, "iter": 19212, "iter_tflops": 52.35167763267207, "iter_time": 0.3940865783691406, "loss": 0.11166838556528091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.018739378616985, "step_time": 0.36183005332946777} +{"epoch": 0, "iter": 19213, "iter_tflops": 19.05390493331685, "iter_time": 0.7160608978271485, "loss": 0.0015654137823730707, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 20.313326499634883, "step_time": 0.6716652870178222} +{"epoch": 0, "iter": 19214, "iter_tflops": 32.739110766782716, "iter_time": 0.4167418098449708, "loss": 0.0032868566922843456, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 36.04595586078368, "step_time": 0.3785100421905518} +{"epoch": 0, "iter": 19215, "iter_tflops": 35.43130529599112, "iter_time": 0.3850763092041015, "loss": 0.006262197624891996, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 38.96491585592729, "step_time": 0.3501549015045166} +{"epoch": 0, "iter": 19216, "iter_tflops": 36.03502396142898, "iter_time": 0.37862487030029296, "loss": 0.0012584771029651165, "lr": 3e-05, "seqlen": 5472.0, "step_tflops": 39.66757589116961, "step_time": 0.34395235824584963} +{"epoch": 0, "iter": 19217, "iter_tflops": 29.832864878507554, "iter_time": 0.691555892944336, "loss": 0.5860958099365234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.598042461451207, "step_time": 0.652923152923584} +{"epoch": 0, "iter": 19218, "iter_tflops": 15.668968795408706, "iter_time": 1.3166848297119143, "loss": 0.6313852071762085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.915026770373274, "step_time": 1.1516082992553711} +{"epoch": 0, "iter": 19219, "iter_tflops": 27.786291165215662, "iter_time": 0.7424918060302734, "loss": 0.47581613063812256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.99492472095962, "step_time": 0.5895453033447265} +{"epoch": 0, "iter": 19220, "iter_tflops": 35.87868732630948, "iter_time": 0.5750236434936524, "loss": 0.44311410188674927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.98176323912747, "step_time": 0.5292498798370362} +{"epoch": 0, "iter": 19221, "iter_tflops": 21.65334146312215, "iter_time": 0.9527902908325196, "loss": 0.5491014719009399, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.246815838950383, "step_time": 0.8874804039001465} +{"epoch": 0, "iter": 19222, "iter_tflops": 25.313441523550555, "iter_time": 0.8150252304077149, "loss": 0.6790213584899902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.287918857117308, "step_time": 0.7293252506256103} +{"epoch": 0, "iter": 19223, "iter_tflops": 43.18597885556486, "iter_time": 0.4777266616821289, "loss": 0.46051281690597534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.66987486362939, "step_time": 0.4420644702911377} +{"epoch": 0, "iter": 19224, "iter_tflops": 46.22576572571378, "iter_time": 0.4463115577697754, "loss": 0.6214227676391602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.58745099914722, "step_time": 0.4160547294616699} +{"epoch": 0, "iter": 19225, "iter_tflops": 42.87328908910357, "iter_time": 0.48121088790893557, "loss": 0.18104971945285797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.59683273190595, "step_time": 0.44275742149353026} +{"epoch": 0, "iter": 19226, "iter_tflops": 20.489199333106573, "iter_time": 1.0069253158569336, "loss": 0.08251851797103882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.577855240997724, "step_time": 0.8394179763793946} +{"epoch": 0, "iter": 19227, "iter_tflops": 47.129243110545076, "iter_time": 0.43775567245483393, "loss": 0.11967414617538452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.54270644573759, "step_time": 0.40027183151245116} +{"epoch": 0, "iter": 19228, "iter_tflops": 52.014756023496666, "iter_time": 0.39663924407958984, "loss": 0.19366848468780518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.46611084349948, "step_time": 0.365371250152588} +{"epoch": 0, "iter": 19229, "iter_tflops": 2.796345641353211, "iter_time": 0.47622520446777344, "loss": 2.1314384937286377, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.06148633149243, "step_time": 0.434981616973877} +{"epoch": 0, "iter": 19230, "iter_tflops": 2.3380001147209284, "iter_time": 0.569585205078125, "loss": 2.2814269065856934, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 2.611414727446499, "step_time": 0.5099497451782227} +{"epoch": 0, "iter": 19231, "iter_tflops": 2.870469488119111, "iter_time": 0.46392768859863287, "loss": 2.089346170425415, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.143738445008106, "step_time": 0.4236008491516114} +{"epoch": 0, "iter": 19232, "iter_tflops": 3.034532472570116, "iter_time": 0.438845287322998, "loss": 2.00771164894104, "lr": 3e-05, "seqlen": 544.0, "step_tflops": 3.307684176812114, "step_time": 0.4026050262451172} +{"epoch": 0, "iter": 19233, "iter_tflops": 20.32852325021168, "iter_time": 1.0148840255737306, "loss": 0.17036503553390503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.966326821839008, "step_time": 0.9392145385742188} +{"epoch": 0, "iter": 19234, "iter_tflops": 19.935306123986315, "iter_time": 1.0349022674560546, "loss": 0.1680748015642166, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.49092700459342, "step_time": 0.8423974113464355} +{"epoch": 0, "iter": 19235, "iter_tflops": 46.786668384180985, "iter_time": 0.44096094512939454, "loss": 0.1600506603717804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.74661908877812, "step_time": 0.40655109405517575} +{"epoch": 0, "iter": 19236, "iter_tflops": 49.84834822354609, "iter_time": 0.4138771743774414, "loss": 0.1327773630619049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.933378934911204, "step_time": 0.38252922248840326} +{"epoch": 0, "iter": 19237, "iter_tflops": 25.954174954942257, "iter_time": 0.7949046173095703, "loss": 0.27492383122444153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.32845228280596, "step_time": 0.7549309158325195} +{"epoch": 0, "iter": 19238, "iter_tflops": 16.245035934275265, "iter_time": 1.269993713378906, "loss": 0.20459526777267456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.50358491537874, "step_time": 1.0578103256225586} +{"epoch": 0, "iter": 19239, "iter_tflops": 38.648187579427194, "iter_time": 0.5338178787231446, "loss": 0.3103950023651123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.29176525465234, "step_time": 0.4878276748657227} +{"epoch": 0, "iter": 19240, "iter_tflops": 40.80187191385159, "iter_time": 0.5056408576965332, "loss": 0.37755879759788513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.65792140155277, "step_time": 0.461980604171753} +{"epoch": 0, "iter": 19241, "iter_tflops": 13.209505098991515, "iter_time": 0.6839404373168946, "loss": 0.007150373421609402, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 14.282294079710493, "step_time": 0.6325674743652344} +{"epoch": 0, "iter": 19242, "iter_tflops": 4.071041090873345, "iter_time": 2.219214813232422, "loss": 0.010724948719143867, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 4.482310003114046, "step_time": 2.015593452453613} +{"epoch": 0, "iter": 19243, "iter_tflops": 6.464977319199351, "iter_time": 1.3974549713134767, "loss": 0.007622112520039082, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 8.1009774742596, "step_time": 1.1152376022338868} +{"epoch": 0, "iter": 19244, "iter_tflops": 15.02297972877034, "iter_time": 0.6013796768188476, "loss": 0.0037024717312306166, "lr": 3e-05, "seqlen": 3648.0, "step_tflops": 19.04066521370033, "step_time": 0.47448524475097653} +{"epoch": 0, "iter": 19245, "iter_tflops": 12.340473105731025, "iter_time": 1.2113474884033204, "loss": 0.23942118883132935, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 12.985212364233828, "step_time": 1.1512018966674804} +{"epoch": 0, "iter": 19246, "iter_tflops": 6.223591747669364, "iter_time": 2.401925079345703, "loss": 0.19470098614692688, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 7.689241580786783, "step_time": 1.944093048095703} +{"epoch": 0, "iter": 19247, "iter_tflops": 9.342747742242187, "iter_time": 1.6000219116210936, "loss": 0.23005425930023193, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 10.821099917393155, "step_time": 1.3814308357238767} +{"epoch": 0, "iter": 19248, "iter_tflops": 21.732341143579898, "iter_time": 0.687850471496582, "loss": 0.2204653024673462, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 23.321649776581754, "step_time": 0.6409752845764161} +{"epoch": 0, "iter": 19249, "iter_tflops": 13.168011001237426, "iter_time": 1.1911213989257812, "loss": 0.11166328936815262, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.016751110313555, "step_time": 1.1189968032836914} +{"epoch": 0, "iter": 19250, "iter_tflops": 13.548626669102559, "iter_time": 1.1576597442626952, "loss": 0.2226579189300537, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.193212834894897, "step_time": 1.032349105834961} +{"epoch": 0, "iter": 19251, "iter_tflops": 23.866871481377, "iter_time": 0.6571745147705078, "loss": 0.29735976457595825, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.74241713697527, "step_time": 0.6092939758300782} +{"epoch": 0, "iter": 19252, "iter_tflops": 26.10266163537976, "iter_time": 0.6008850708007811, "loss": 0.2643256187438965, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 27.966642356259296, "step_time": 0.5608359947204589} +{"epoch": 0, "iter": 19253, "iter_tflops": 35.647495440599535, "iter_time": 0.5787529602050782, "loss": 0.5391635894775391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31876013750937, "step_time": 0.5247137355804443} +{"epoch": 0, "iter": 19254, "iter_tflops": 42.87182124554889, "iter_time": 0.4812273635864258, "loss": 0.5892646908760071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.79651670151431, "step_time": 0.44086814498901367} +{"epoch": 0, "iter": 19255, "iter_tflops": 40.322927262015085, "iter_time": 0.5116467208862305, "loss": 0.5280066132545471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.14653779503616, "step_time": 0.47816336059570314} +{"epoch": 0, "iter": 19256, "iter_tflops": 48.28604705146647, "iter_time": 0.42726822280883797, "loss": 0.7019627094268799, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.34756258937307, "step_time": 0.3941175575256348} +{"epoch": 0, "iter": 19257, "iter_tflops": 38.29483514788268, "iter_time": 0.5387434997558594, "loss": 0.4594133496284485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.441723346135056, "step_time": 0.497833869934082} +{"epoch": 0, "iter": 19258, "iter_tflops": 11.614595765010224, "iter_time": 1.7763074951171878, "loss": 0.5403138995170593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.473640062731388, "step_time": 1.4254253540039064} +{"epoch": 0, "iter": 19259, "iter_tflops": 9.92167811609659, "iter_time": 2.079395568847656, "loss": 0.43857234716415405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.950329296600042, "step_time": 1.7264037666320802} +{"epoch": 0, "iter": 19260, "iter_tflops": 24.303759178706212, "iter_time": 0.8488848724365234, "loss": 0.5315786600112915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.63497554998235, "step_time": 0.5956722412109374} +{"epoch": 0, "iter": 19261, "iter_tflops": 16.102025207887994, "iter_time": 0.8498595275878907, "loss": 0.19573019444942474, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 17.412989215645972, "step_time": 0.7858765296936036} +{"epoch": 0, "iter": 19262, "iter_tflops": 14.076161882107685, "iter_time": 0.9721726455688477, "loss": 0.17868150770664215, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 17.161023995703545, "step_time": 0.7974150924682617} +{"epoch": 0, "iter": 19263, "iter_tflops": 23.233380228246165, "iter_time": 0.5889999389648437, "loss": 0.21241532266139984, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 24.743747909507558, "step_time": 0.5530471611022949} +{"epoch": 0, "iter": 19264, "iter_tflops": 25.199158340322153, "iter_time": 0.54305224609375, "loss": 0.25705811381340027, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 26.834710119299682, "step_time": 0.5099536933898926} +{"epoch": 0, "iter": 19265, "iter_tflops": 29.976479029741327, "iter_time": 0.6882427215576171, "loss": 0.3489597737789154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14378670303958, "step_time": 0.6418376808166504} +{"epoch": 0, "iter": 19266, "iter_tflops": 11.438642995317137, "iter_time": 1.8036312103271486, "loss": 0.42034947872161865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.943114441547541, "step_time": 1.2940441207885742} +{"epoch": 0, "iter": 19267, "iter_tflops": 14.718429873881213, "iter_time": 1.4017183685302732, "loss": 0.4733172059059143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.619364966684106, "step_time": 1.1080449600219726} +{"epoch": 0, "iter": 19268, "iter_tflops": 15.338880239880538, "iter_time": 1.34501953125, "loss": 0.6459236145019531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.867936931255706, "step_time": 1.1546432914733886} +{"epoch": 0, "iter": 19269, "iter_tflops": 13.991274639899006, "iter_time": 1.1796194763183592, "loss": 0.18799522519111633, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 14.519033204818347, "step_time": 1.1367409820556642} +{"epoch": 0, "iter": 19270, "iter_tflops": 9.769000319168057, "iter_time": 1.689464584350586, "loss": 0.21936815977096558, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 11.990485894172584, "step_time": 1.3764563179016114} +{"epoch": 0, "iter": 19271, "iter_tflops": 28.534778846182466, "iter_time": 0.5783952331542969, "loss": 0.27658554911613464, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 30.536129904170007, "step_time": 0.5404869613647462} +{"epoch": 0, "iter": 19272, "iter_tflops": 30.07426893472474, "iter_time": 0.5487874069213867, "loss": 0.17392297089099884, "lr": 3e-05, "seqlen": 6592.0, "step_tflops": 32.040040483974295, "step_time": 0.5151173286437988} +{"epoch": 0, "iter": 19273, "iter_tflops": 27.837145788594018, "iter_time": 0.7411353759765624, "loss": 0.17189544439315796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.422119544681287, "step_time": 0.7012103080749512} +{"epoch": 0, "iter": 19274, "iter_tflops": 12.987188192195818, "iter_time": 1.588572769165039, "loss": 0.1385250687599182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.05865688662535, "step_time": 1.28473344039917} +{"epoch": 0, "iter": 19275, "iter_tflops": 38.89499543645787, "iter_time": 0.5304305419921875, "loss": 0.21176066994667053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.573867639938776, "step_time": 0.48459523773193364} +{"epoch": 0, "iter": 19276, "iter_tflops": 39.69147514077677, "iter_time": 0.5197865142822267, "loss": 0.19537118077278137, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.414930729238904, "step_time": 0.47520733451843256} +{"epoch": 0, "iter": 19277, "iter_tflops": 38.412425903086785, "iter_time": 0.5370942611694336, "loss": 0.07361984997987747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.634470037796305, "step_time": 0.4839064140319824} +{"epoch": 0, "iter": 19278, "iter_tflops": 37.42644420555477, "iter_time": 0.551243751525879, "loss": 0.08790198713541031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.14916150776921, "step_time": 0.501373363494873} +{"epoch": 0, "iter": 19279, "iter_tflops": 42.423576111271, "iter_time": 0.4863119850158692, "loss": 0.1320975124835968, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.794710846061555, "step_time": 0.4408851585388183} +{"epoch": 0, "iter": 19280, "iter_tflops": 42.182761619954725, "iter_time": 0.48908826065063477, "loss": 0.12635935842990875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.399828292301905, "step_time": 0.4446372814178467} +{"epoch": 0, "iter": 19281, "iter_tflops": 28.78720469110215, "iter_time": 0.7166758193969727, "loss": 0.5620280504226685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.05204907453439, "step_time": 0.6644036102294921} +{"epoch": 0, "iter": 19282, "iter_tflops": 45.34527982602915, "iter_time": 0.45497775268554685, "loss": 0.4814590513706207, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.64996147968538, "step_time": 0.41553090667724607} +{"epoch": 0, "iter": 19283, "iter_tflops": 44.306397403810514, "iter_time": 0.4656459274291992, "loss": 0.5259571671485901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.193721054788604, "step_time": 0.4280867519378662} +{"epoch": 0, "iter": 19284, "iter_tflops": 48.66623405142313, "iter_time": 0.4239303474426269, "loss": 0.5315545797348022, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.71621490628031, "step_time": 0.3913614349365235} +{"epoch": 0, "iter": 19285, "iter_tflops": 31.616890507447682, "iter_time": 0.652533920288086, "loss": 0.05220648646354675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.57383596212294, "step_time": 0.6144991455078124} +{"epoch": 0, "iter": 19286, "iter_tflops": 8.98608606355391, "iter_time": 2.2958931579589845, "loss": 0.0524117536842823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.369589462253574, "step_time": 1.8145856170654298} +{"epoch": 0, "iter": 19287, "iter_tflops": 12.773690917879414, "iter_time": 1.6151239013671874, "loss": 0.07922017574310303, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.06247856688345, "step_time": 1.3697011032104491} +{"epoch": 0, "iter": 19288, "iter_tflops": 27.428113738540414, "iter_time": 0.7521878356933593, "loss": 0.08816475421190262, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.032511068038104, "step_time": 0.5889127807617187} +{"epoch": 0, "iter": 19289, "iter_tflops": 18.89503282376518, "iter_time": 0.830096450805664, "loss": 0.3427906334400177, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 19.89776341124956, "step_time": 0.7882644577026366} +{"epoch": 0, "iter": 19290, "iter_tflops": 11.92595263173594, "iter_time": 1.3151737365722656, "loss": 0.18539512157440186, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 14.136852110404256, "step_time": 1.1094902572631837} +{"epoch": 0, "iter": 19291, "iter_tflops": 26.984821663044915, "iter_time": 0.581241554260254, "loss": 0.30223503708839417, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 28.73350918444673, "step_time": 0.5458678779602051} +{"epoch": 0, "iter": 19292, "iter_tflops": 28.290755544245705, "iter_time": 0.554410774230957, "loss": 0.27458083629608154, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 30.11400236716057, "step_time": 0.5208440742492676} +{"epoch": 0, "iter": 19293, "iter_tflops": 41.953450716847925, "iter_time": 0.4917615394592285, "loss": 0.35296064615249634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.576980548882766, "step_time": 0.4526647720336915} +{"epoch": 0, "iter": 19294, "iter_tflops": 42.34533488843498, "iter_time": 0.4872105407714844, "loss": 0.5719187259674072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59473398172657, "step_time": 0.45248851585388183} +{"epoch": 0, "iter": 19295, "iter_tflops": 43.766446015878685, "iter_time": 0.4713906517028809, "loss": 0.5271376371383667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.146526537679996, "step_time": 0.4375951957702637} +{"epoch": 0, "iter": 19296, "iter_tflops": 42.0009616717911, "iter_time": 0.49120526504516604, "loss": 0.5676990747451782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.82408897068798, "step_time": 0.4602679939270019} +{"epoch": 0, "iter": 19297, "iter_tflops": 27.7986900467645, "iter_time": 0.7421606369018554, "loss": 0.43080052733421326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.463005783732353, "step_time": 0.7002372283935545} +{"epoch": 0, "iter": 19298, "iter_tflops": 18.950673028357542, "iter_time": 1.0886733932495118, "loss": 0.4465881884098053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.105338488642392, "step_time": 0.8929145755767821} +{"epoch": 0, "iter": 19299, "iter_tflops": 38.06725165956714, "iter_time": 0.54196435546875, "loss": 0.31289422512054443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.39088079028548, "step_time": 0.49844538497924806} +{"epoch": 0, "iter": 19300, "iter_tflops": 37.35332326871733, "iter_time": 0.5523228378295899, "loss": 0.30798253417015076, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.782131376867014, "step_time": 0.505885612487793} +{"epoch": 0, "iter": 19301, "iter_tflops": 32.26970391123683, "iter_time": 0.6393332138061524, "loss": 0.29735082387924194, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.74128023002176, "step_time": 0.577234317779541} +{"epoch": 0, "iter": 19302, "iter_tflops": 38.4176915669676, "iter_time": 0.5370206451416016, "loss": 0.3332161009311676, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.45814493848353, "step_time": 0.48591603660583493} +{"epoch": 0, "iter": 19303, "iter_tflops": 37.757346997213155, "iter_time": 0.5464126892089843, "loss": 0.23546719551086426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.343113660821054, "step_time": 0.49902128028869625} +{"epoch": 0, "iter": 19304, "iter_tflops": 40.45550265531771, "iter_time": 0.5099700202941895, "loss": 0.2901458442211151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.122683747543626, "step_time": 0.4675847377777099} +{"epoch": 0, "iter": 19305, "iter_tflops": 33.6931887218021, "iter_time": 0.612322380065918, "loss": 0.40742629766464233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.392017498293406, "step_time": 0.5517512798309325} +{"epoch": 0, "iter": 19306, "iter_tflops": 39.478460698448835, "iter_time": 0.5225911331176758, "loss": 0.40336766839027405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65337602785896, "step_time": 0.47261163711547854} +{"epoch": 0, "iter": 19307, "iter_tflops": 36.2709824818296, "iter_time": 0.5688043746948243, "loss": 0.4871617555618286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.328966899455025, "step_time": 0.5245775604248046} +{"epoch": 0, "iter": 19308, "iter_tflops": 48.91195887558866, "iter_time": 0.4218005981445312, "loss": 0.2751842141151428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.223229814041346, "step_time": 0.38763324928283693} +{"epoch": 0, "iter": 19309, "iter_tflops": 24.92913556391688, "iter_time": 0.8275896072387694, "loss": 0.19599828124046326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.23607331979353, "step_time": 0.7863636169433593} +{"epoch": 0, "iter": 19310, "iter_tflops": 21.554713441573803, "iter_time": 0.9571499786376952, "loss": 0.2817595303058624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.86765173744354, "step_time": 0.7975634479522704} +{"epoch": 0, "iter": 19311, "iter_tflops": 47.941919796892705, "iter_time": 0.4303351554870606, "loss": 0.11868437379598618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.08895339923745, "step_time": 0.3960742568969726} +{"epoch": 0, "iter": 19312, "iter_tflops": 51.536692702031075, "iter_time": 0.40031853866577144, "loss": 0.21765732765197754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.84410192824823, "step_time": 0.3694408683776855} +{"epoch": 0, "iter": 19313, "iter_tflops": 24.158333737892953, "iter_time": 0.6475514984130859, "loss": 0.05400634557008743, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 25.701705080753968, "step_time": 0.6086664352416993} +{"epoch": 0, "iter": 19314, "iter_tflops": 15.962186052461426, "iter_time": 0.9800515518188477, "loss": 0.03991525620222092, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 21.0307257573809, "step_time": 0.7438528461456299} +{"epoch": 0, "iter": 19315, "iter_tflops": 29.76793796419523, "iter_time": 0.5255239791870117, "loss": 0.09247176349163055, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 32.842966310781826, "step_time": 0.4763201065063477} +{"epoch": 0, "iter": 19316, "iter_tflops": 33.54248042973657, "iter_time": 0.4663866539001465, "loss": 0.061772726476192474, "lr": 3e-05, "seqlen": 6256.0, "step_tflops": 36.808076153595955, "step_time": 0.4250090427398682} +{"epoch": 0, "iter": 19317, "iter_tflops": 23.1544507967262, "iter_time": 0.8910206375122071, "loss": 0.3931017220020294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.10906878739304, "step_time": 0.8216590461730956} +{"epoch": 0, "iter": 19318, "iter_tflops": 16.675549217200412, "iter_time": 1.237206237792969, "loss": 0.5181527733802795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.29347352470121, "step_time": 1.0166368751525878} +{"epoch": 0, "iter": 19319, "iter_tflops": 34.31232758241288, "iter_time": 0.6012735061645507, "loss": 0.5739166140556335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.44460664265721, "step_time": 0.5509763717651367} +{"epoch": 0, "iter": 19320, "iter_tflops": 37.04382536370368, "iter_time": 0.5569374465942383, "loss": 0.47336333990097046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.31930732038155, "step_time": 0.5116926574707031} +{"epoch": 0, "iter": 19321, "iter_tflops": 32.53198689001287, "iter_time": 0.6341787109375, "loss": 0.6175982356071472, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.9287789173953, "step_time": 0.5742219505310059} +{"epoch": 0, "iter": 19322, "iter_tflops": 37.70000061846673, "iter_time": 0.5472438507080077, "loss": 0.5407894849777222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.1184826729024, "step_time": 0.5017474422454834} +{"epoch": 0, "iter": 19323, "iter_tflops": 40.95776891529716, "iter_time": 0.5037162437438965, "loss": 0.647438645362854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.71344351350709, "step_time": 0.4614069480895996} +{"epoch": 0, "iter": 19324, "iter_tflops": 38.60774601205452, "iter_time": 0.534377052307129, "loss": 0.5278635621070862, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.24202196821142, "step_time": 0.48840213012695316} +{"epoch": 0, "iter": 19325, "iter_tflops": 23.73783792326679, "iter_time": 0.8691226882934571, "loss": 0.605367124080658, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.518912665963427, "step_time": 0.8084628753662109} +{"epoch": 0, "iter": 19326, "iter_tflops": 35.7127670861609, "iter_time": 0.577695182800293, "loss": 0.5185428857803345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.839191095629374, "step_time": 0.5178592472076416} +{"epoch": 0, "iter": 19327, "iter_tflops": 36.23724321626213, "iter_time": 0.5693339691162109, "loss": 0.5172476768493652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.634498278266385, "step_time": 0.5205337371826172} +{"epoch": 0, "iter": 19328, "iter_tflops": 39.5792094148261, "iter_time": 0.5212608795166015, "loss": 0.4736473858356476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11743736634496, "step_time": 0.47848607826232914} +{"epoch": 0, "iter": 19329, "iter_tflops": 28.02458624045371, "iter_time": 0.7361783447265624, "loss": 0.6630426049232483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.955181534335065, "step_time": 0.6664827175140382} +{"epoch": 0, "iter": 19330, "iter_tflops": 39.94086865909779, "iter_time": 0.5165409317016602, "loss": 0.7948884963989258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.04015468974223, "step_time": 0.47934524536132816} +{"epoch": 0, "iter": 19331, "iter_tflops": 44.37517028627199, "iter_time": 0.4649242668151855, "loss": 0.40813276171684265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.88324519763631, "step_time": 0.4308624744415283} +{"epoch": 0, "iter": 19332, "iter_tflops": 43.119139171828095, "iter_time": 0.47846719360351564, "loss": 0.6835481524467468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.327959914782376, "step_time": 0.44532704544067386} +{"epoch": 0, "iter": 19333, "iter_tflops": 26.695711639514958, "iter_time": 0.7728242568969725, "loss": 0.03739263117313385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.230636449926795, "step_time": 0.7308051147460938} +{"epoch": 0, "iter": 19334, "iter_tflops": 14.614054649539355, "iter_time": 1.4117295989990235, "loss": 0.03099767118692398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.442648373094052, "step_time": 1.0092182350158692} +{"epoch": 0, "iter": 19335, "iter_tflops": 39.70013546819827, "iter_time": 0.519673126220703, "loss": 0.03148752078413963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59498010953233, "step_time": 0.47324470520019535} +{"epoch": 0, "iter": 19336, "iter_tflops": 45.22313079509708, "iter_time": 0.4562066612243652, "loss": 0.030198421329259872, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.6590479480628, "step_time": 0.41545487403869624} +{"epoch": 0, "iter": 19337, "iter_tflops": 16.79907099980063, "iter_time": 1.2281091918945313, "loss": 0.0014878712827339768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.017636395675122, "step_time": 1.1450499420166016} +{"epoch": 0, "iter": 19338, "iter_tflops": 20.495888076110194, "iter_time": 1.0065967102050781, "loss": 0.0035885435063391924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.42716572004206, "step_time": 0.8445962886810302} +{"epoch": 0, "iter": 19339, "iter_tflops": 56.80030258168682, "iter_time": 0.36322154235839843, "loss": 0.0060525997541844845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.45884815958638, "step_time": 0.33031498527526854} +{"epoch": 0, "iter": 19340, "iter_tflops": 63.43882308749554, "iter_time": 0.3252124252319336, "loss": 0.0032963966950774193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 69.63100542234686, "step_time": 0.2962917652130127} +{"epoch": 0, "iter": 19341, "iter_tflops": 33.832303694687326, "iter_time": 0.609804573059082, "loss": 0.282507985830307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.19713805143856, "step_time": 0.569964771270752} +{"epoch": 0, "iter": 19342, "iter_tflops": 17.283965813053737, "iter_time": 1.1936550750732422, "loss": 0.22745124995708466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.111272726983977, "step_time": 0.933057710647583} +{"epoch": 0, "iter": 19343, "iter_tflops": 47.86529952633562, "iter_time": 0.4310240135192871, "loss": 0.2606227695941925, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.8946119236533, "step_time": 0.3975575256347656} +{"epoch": 0, "iter": 19344, "iter_tflops": 52.60781495683135, "iter_time": 0.3921678466796875, "loss": 0.2898499071598053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.126298550154466, "step_time": 0.36114878845214843} +{"epoch": 0, "iter": 19345, "iter_tflops": 28.063076994987323, "iter_time": 0.7351686172485352, "loss": 0.08599510788917542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.723939378874658, "step_time": 0.6940901489257814} +{"epoch": 0, "iter": 19346, "iter_tflops": 17.886415232479035, "iter_time": 1.1534504394531249, "loss": 0.0500725619494915, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.065727763262963, "step_time": 0.9349835968017577} +{"epoch": 0, "iter": 19347, "iter_tflops": 41.42092752973284, "iter_time": 0.49808381271362306, "loss": 0.07077817618846893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.566504120341385, "step_time": 0.4527688465118408} +{"epoch": 0, "iter": 19348, "iter_tflops": 42.039231892254975, "iter_time": 0.4907580986022949, "loss": 0.03813212364912033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.785250090072545, "step_time": 0.4506056747436524} +{"epoch": 0, "iter": 19349, "iter_tflops": 21.246446838990767, "iter_time": 0.9710373535156249, "loss": 0.591365396976471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.185623714201824, "step_time": 0.8898226661682128} +{"epoch": 0, "iter": 19350, "iter_tflops": 16.728169115438217, "iter_time": 1.2333144989013671, "loss": 0.7183750867843628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.290867551082062, "step_time": 1.0167674427032471} +{"epoch": 0, "iter": 19351, "iter_tflops": 42.88797067398766, "iter_time": 0.481046157836914, "loss": 0.5476839542388916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1250892069876, "step_time": 0.4472857151031494} +{"epoch": 0, "iter": 19352, "iter_tflops": 44.564595153614334, "iter_time": 0.4629480743408203, "loss": 0.5521723628044128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.808004360201785, "step_time": 0.43154057121276856} +{"epoch": 0, "iter": 19353, "iter_tflops": 28.11844984431461, "iter_time": 0.7337208709716797, "loss": 0.45662030577659607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.79764233998256, "step_time": 0.6923733520507812} +{"epoch": 0, "iter": 19354, "iter_tflops": 17.23229648935501, "iter_time": 1.197234130859375, "loss": 0.42848509550094604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.445500351113058, "step_time": 1.0090774574279786} +{"epoch": 0, "iter": 19355, "iter_tflops": 38.574021629449874, "iter_time": 0.5348442459106445, "loss": 0.3790743947029114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.974636437087376, "step_time": 0.49151333427429206} +{"epoch": 0, "iter": 19356, "iter_tflops": 42.467889250709156, "iter_time": 0.48580454254150385, "loss": 0.4605957567691803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.063581208936654, "step_time": 0.44788296890258794} +{"epoch": 0, "iter": 19357, "iter_tflops": 17.456577044120067, "iter_time": 1.1818521728515625, "loss": 0.5141451954841614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.362656756458787, "step_time": 1.1235353240966797} +{"epoch": 0, "iter": 19358, "iter_tflops": 7.442894846461088, "iter_time": 2.7719179077148435, "loss": 0.3841590881347656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.435295031554938, "step_time": 2.1865870056152343} +{"epoch": 0, "iter": 19359, "iter_tflops": 17.088933976882657, "iter_time": 1.2072779693603515, "loss": 0.2785586416721344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.105821518446355, "step_time": 1.0798328399658204} +{"epoch": 0, "iter": 19360, "iter_tflops": 46.41281431624965, "iter_time": 0.44451287460327155, "loss": 0.427047461271286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.271483563276554, "step_time": 0.4103935680389404} +{"epoch": 0, "iter": 19361, "iter_tflops": 19.454529706221354, "iter_time": 0.7557918319702148, "loss": 0.17486301064491272, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 20.67857477236555, "step_time": 0.7110535812377929} +{"epoch": 0, "iter": 19362, "iter_tflops": 11.90887823570379, "iter_time": 1.234673355102539, "loss": 0.13644041121006012, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 13.216641203705324, "step_time": 1.1125046386718749} +{"epoch": 0, "iter": 19363, "iter_tflops": 22.87293855630637, "iter_time": 0.6428371505737305, "loss": 0.2647949159145355, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.650288652652666, "step_time": 0.5964869155883789} +{"epoch": 0, "iter": 19364, "iter_tflops": 22.712933290886713, "iter_time": 0.6473657302856445, "loss": 0.11005852371454239, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.45621497538963, "step_time": 0.601220371246338} +{"epoch": 0, "iter": 19365, "iter_tflops": 17.777989117916203, "iter_time": 1.1604852142333983, "loss": 0.3691447377204895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.16000066887148, "step_time": 1.0767793731689455} +{"epoch": 0, "iter": 19366, "iter_tflops": 18.13875578416675, "iter_time": 1.137404006958008, "loss": 0.2175760716199875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.331464844735446, "step_time": 1.014737190246582} +{"epoch": 0, "iter": 19367, "iter_tflops": 46.41803864881028, "iter_time": 0.44446284484863285, "loss": 0.3587702512741089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.48715025847513, "step_time": 0.40864048385620116} +{"epoch": 0, "iter": 19368, "iter_tflops": 49.67368268516341, "iter_time": 0.4153324737548828, "loss": 0.35072243213653564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.061122239470436, "step_time": 0.3816253280639648} +{"epoch": 0, "iter": 19369, "iter_tflops": 25.48326437293424, "iter_time": 0.8095938262939453, "loss": 0.03517424315214157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.857389775026306, "step_time": 0.7681719512939452} +{"epoch": 0, "iter": 19370, "iter_tflops": 11.992379221771994, "iter_time": 1.720350326538086, "loss": 0.03460027277469635, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.43126099548943, "step_time": 1.4296112804412844} +{"epoch": 0, "iter": 19371, "iter_tflops": 49.908346109121936, "iter_time": 0.41337962722778315, "loss": 0.039367955178022385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.64934370447749, "step_time": 0.37751768112182615} +{"epoch": 0, "iter": 19372, "iter_tflops": 57.21149427329085, "iter_time": 0.36061098861694335, "loss": 0.03474805876612663, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.62896057593759, "step_time": 0.3294177856445313} +{"epoch": 0, "iter": 19373, "iter_tflops": 33.648360754183194, "iter_time": 0.6131381454467774, "loss": 0.47161322832107544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.0637714971571, "step_time": 0.5720725440979004} +{"epoch": 0, "iter": 19374, "iter_tflops": 14.404433103450485, "iter_time": 1.432273895263672, "loss": 0.4274122714996338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.38056959004621, "step_time": 1.2594857215881348} +{"epoch": 0, "iter": 19375, "iter_tflops": 45.49509552554057, "iter_time": 0.45347950744628907, "loss": 0.39308616518974304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.73721935151145, "step_time": 0.414801908493042} +{"epoch": 0, "iter": 19376, "iter_tflops": 50.20918284618515, "iter_time": 0.4109027938842773, "loss": 0.45399269461631775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.24825881757001, "step_time": 0.3803088607788086} +{"epoch": 0, "iter": 19377, "iter_tflops": 29.69034413428158, "iter_time": 0.6948755264282227, "loss": 0.5900051593780518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.708930420595692, "step_time": 0.6506398429870606} +{"epoch": 0, "iter": 19378, "iter_tflops": 31.007380281603133, "iter_time": 0.6653607406616212, "loss": 0.559853196144104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.36306502130272, "step_time": 0.6003857192993164} +{"epoch": 0, "iter": 19379, "iter_tflops": 38.785975661644976, "iter_time": 0.5319214782714844, "loss": 0.5188877582550049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.01518202784857, "step_time": 0.49103901290893553} +{"epoch": 0, "iter": 19380, "iter_tflops": 39.81661649505317, "iter_time": 0.5181528549194335, "loss": 0.6481162905693054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.20903487784483, "step_time": 0.4774717502593995} +{"epoch": 0, "iter": 19381, "iter_tflops": 33.275212453413225, "iter_time": 0.6200138778686524, "loss": 0.5673402547836304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.69362955623515, "step_time": 0.5622527332305909} +{"epoch": 0, "iter": 19382, "iter_tflops": 40.36569695629077, "iter_time": 0.5111046028137207, "loss": 0.4237062931060791, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.457138713559736, "step_time": 0.46406705665588377} +{"epoch": 0, "iter": 19383, "iter_tflops": 41.68141353357686, "iter_time": 0.49497106170654304, "loss": 0.3533608317375183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.42573008230285, "step_time": 0.45417197418212896} +{"epoch": 0, "iter": 19384, "iter_tflops": 36.907468423468195, "iter_time": 0.5589950866699219, "loss": 0.3441852033138275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10056983527567, "step_time": 0.514483798980713} +{"epoch": 0, "iter": 19385, "iter_tflops": 12.789068612975967, "iter_time": 1.0064291000366212, "loss": 0.057498667389154434, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 13.594660071443602, "step_time": 0.9467901916503907} +{"epoch": 0, "iter": 19386, "iter_tflops": 12.139327662289755, "iter_time": 1.060296844482422, "loss": 0.042293526232242584, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 13.742473262368724, "step_time": 0.936606575012207} +{"epoch": 0, "iter": 19387, "iter_tflops": 25.822329189637628, "iter_time": 0.4984558410644531, "loss": 0.0207793228328228, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 28.505041275652125, "step_time": 0.45154436683654786} +{"epoch": 0, "iter": 19388, "iter_tflops": 28.371381755872214, "iter_time": 0.4536716232299805, "loss": 0.03452129289507866, "lr": 3e-05, "seqlen": 5168.0, "step_tflops": 31.159385417081342, "step_time": 0.4130790977478027} +{"epoch": 0, "iter": 19389, "iter_tflops": 20.48022839023503, "iter_time": 1.0073663787841796, "loss": 0.319105863571167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.31419161780518, "step_time": 0.9245727500915527} +{"epoch": 0, "iter": 19390, "iter_tflops": 18.77714080980386, "iter_time": 1.0987345581054688, "loss": 0.3619188368320465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.539720531131977, "step_time": 0.7491395378112793} +{"epoch": 0, "iter": 19391, "iter_tflops": 43.925377868046496, "iter_time": 0.4696850547790527, "loss": 0.3001096546649933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.09880322435432, "step_time": 0.43803859329223627} +{"epoch": 0, "iter": 19392, "iter_tflops": 52.10268355677702, "iter_time": 0.3959698829650879, "loss": 0.2659343183040619, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.573142779117774, "step_time": 0.3646799964904785} +{"epoch": 0, "iter": 19393, "iter_tflops": 24.976820797473206, "iter_time": 0.8260095901489258, "loss": 0.1876596212387085, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.16461543437878, "step_time": 0.7885112457275391} +{"epoch": 0, "iter": 19394, "iter_tflops": 12.158238499444867, "iter_time": 1.6968817901611326, "loss": 0.1704978495836258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.410565989960745, "step_time": 1.1849754638671874} +{"epoch": 0, "iter": 19395, "iter_tflops": 46.425533227491876, "iter_time": 0.44439109420776374, "loss": 0.080964595079422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.433764988222386, "step_time": 0.4011196441650391} +{"epoch": 0, "iter": 19396, "iter_tflops": 49.806408538336036, "iter_time": 0.41422568130493165, "loss": 0.18677081167697906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.978758340672876, "step_time": 0.382207633972168} +{"epoch": 0, "iter": 19397, "iter_tflops": 24.723003084951635, "iter_time": 0.8344897842407226, "loss": 0.5010514855384827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.033781205818173, "step_time": 0.7924739532470704} +{"epoch": 0, "iter": 19398, "iter_tflops": 15.884596068332625, "iter_time": 1.2988113403320312, "loss": 0.49386075139045715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.28788887692153, "step_time": 1.0696397953033447} +{"epoch": 0, "iter": 19399, "iter_tflops": 37.03914821315973, "iter_time": 0.5570077743530273, "loss": 0.4253418743610382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.448629126210825, "step_time": 0.5100566806793213} +{"epoch": 0, "iter": 19400, "iter_tflops": 42.018053892521586, "iter_time": 0.4910054512023926, "loss": 0.5939372777938843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83566721645637, "step_time": 0.4501100292205811} +{"epoch": 0, "iter": 19401, "iter_tflops": 23.937697234842982, "iter_time": 0.8618662567138671, "loss": 0.08397428691387177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.610251808788085, "step_time": 0.8055794868469238} +{"epoch": 0, "iter": 19402, "iter_tflops": 25.848960069720757, "iter_time": 0.7981401748657228, "loss": 0.12056942284107208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.67886405886908, "step_time": 0.6512573642730713} +{"epoch": 0, "iter": 19403, "iter_tflops": 54.79068201271435, "iter_time": 0.3765438346862793, "loss": 0.06903771311044693, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.96159741740322, "step_time": 0.34407177925109866} +{"epoch": 0, "iter": 19404, "iter_tflops": 52.67613109824059, "iter_time": 0.39165924072265623, "loss": 0.08268778771162033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.022019968894675, "step_time": 0.3618092365264893} +{"epoch": 0, "iter": 19405, "iter_tflops": 47.993338840262275, "iter_time": 0.4298741035461425, "loss": 0.06096424162387848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.06048856760434, "step_time": 0.38882215499877926} +{"epoch": 0, "iter": 19406, "iter_tflops": 49.76365378788577, "iter_time": 0.4145815658569336, "loss": 0.04957335442304611, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.95161284998732, "step_time": 0.37544109153747557} +{"epoch": 0, "iter": 19407, "iter_tflops": 53.08204168761966, "iter_time": 0.3886642799377441, "loss": 0.045030947774648666, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.05571658578148, "step_time": 0.35536713218688964} +{"epoch": 0, "iter": 19408, "iter_tflops": 49.96276502440305, "iter_time": 0.4129293785095215, "loss": 0.02680320292711258, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.80507123053727, "step_time": 0.38344143104553224} +{"epoch": 0, "iter": 19409, "iter_tflops": 23.290628789934175, "iter_time": 0.8858109283447266, "loss": 0.22907015681266785, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.367534798015537, "step_time": 0.8466631393432618} +{"epoch": 0, "iter": 19410, "iter_tflops": 12.236881260859121, "iter_time": 1.6859764404296875, "loss": 0.17746903002262115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.895993783748807, "step_time": 1.2210642223358155} +{"epoch": 0, "iter": 19411, "iter_tflops": 39.77750102043889, "iter_time": 0.5186623840332032, "loss": 0.25791245698928833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93486564886836, "step_time": 0.469583625793457} +{"epoch": 0, "iter": 19412, "iter_tflops": 42.58759731949478, "iter_time": 0.4844390106201172, "loss": 0.2382892668247223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.68429151727311, "step_time": 0.44192795562744136} +{"epoch": 0, "iter": 19413, "iter_tflops": 22.918499543047663, "iter_time": 0.9001939010620117, "loss": 0.01594192534685135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.926983168278152, "step_time": 0.8276610679626465} +{"epoch": 0, "iter": 19414, "iter_tflops": 22.267141455083625, "iter_time": 0.9265263595581056, "loss": 0.002713622059673071, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.3690650583823, "step_time": 0.8132382278442383} +{"epoch": 0, "iter": 19415, "iter_tflops": 47.100669134457476, "iter_time": 0.438021240234375, "loss": 0.0015304111875593662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.907726905990444, "step_time": 0.39745707893371585} +{"epoch": 0, "iter": 19416, "iter_tflops": 49.12730149897686, "iter_time": 0.41995169448852543, "loss": 0.0026590311899781227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.22850372092132, "step_time": 0.3804474048614502} +{"epoch": 0, "iter": 19417, "iter_tflops": 28.339952327971552, "iter_time": 0.7279861755371093, "loss": 0.49865520000457764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.100786599715057, "step_time": 0.6633624343872071} +{"epoch": 0, "iter": 19418, "iter_tflops": 39.49869181497385, "iter_time": 0.5223234634399414, "loss": 0.555536150932312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.249851494284975, "step_time": 0.4770211410522461} +{"epoch": 0, "iter": 19419, "iter_tflops": 44.15393418937947, "iter_time": 0.46725379943847656, "loss": 0.747528612613678, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47834530824008, "step_time": 0.4345369110107422} +{"epoch": 0, "iter": 19420, "iter_tflops": 47.42565598402023, "iter_time": 0.43501967620849613, "loss": 0.47234341502189636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.78337827353051, "step_time": 0.40625681495666505} +{"epoch": 0, "iter": 19421, "iter_tflops": 34.86976511860995, "iter_time": 0.5916613845825196, "loss": 0.5304092764854431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.426630425424065, "step_time": 0.5512410087585449} +{"epoch": 0, "iter": 19422, "iter_tflops": 7.604278826651231, "iter_time": 2.7130900878906252, "loss": 0.45554327964782715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.086015556205892, "step_time": 2.2706425476074217} +{"epoch": 0, "iter": 19423, "iter_tflops": 11.070180772409431, "iter_time": 1.8636636505126951, "loss": 0.44236284494400024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.321717558905782, "step_time": 1.5486811981201172} +{"epoch": 0, "iter": 19424, "iter_tflops": 31.066323899022166, "iter_time": 0.6640983200073243, "loss": 0.6343763470649719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.209405919628544, "step_time": 0.4464695682525635} +{"epoch": 0, "iter": 19425, "iter_tflops": 19.11081276135288, "iter_time": 0.8228660430908203, "loss": 0.12928172945976257, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 20.118248805817892, "step_time": 0.7816604232788086} +{"epoch": 0, "iter": 19426, "iter_tflops": 9.757906833765357, "iter_time": 1.6115791168212892, "loss": 0.23289768397808075, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 11.507079600563355, "step_time": 1.3666055526733398} +{"epoch": 0, "iter": 19427, "iter_tflops": 21.949036809909018, "iter_time": 0.7164614562988281, "loss": 0.29119327664375305, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 23.734549691481295, "step_time": 0.6625631866455077} +{"epoch": 0, "iter": 19428, "iter_tflops": 24.396141810030073, "iter_time": 0.6445953216552733, "loss": 0.1904766857624054, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.076599727390814, "step_time": 0.6030555763244628} +{"epoch": 0, "iter": 19429, "iter_tflops": 23.740205115640197, "iter_time": 0.8690360260009765, "loss": 0.0038336534053087234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.482954706584508, "step_time": 0.8096036643981934} +{"epoch": 0, "iter": 19430, "iter_tflops": 8.680798238594752, "iter_time": 2.376635528564453, "loss": 0.0046320450492203236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.056035937395789, "step_time": 2.0516129455566405} +{"epoch": 0, "iter": 19431, "iter_tflops": 14.441887958405678, "iter_time": 1.4285593109130859, "loss": 0.004040928557515144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.351148161864433, "step_time": 1.0661431217193604} +{"epoch": 0, "iter": 19432, "iter_tflops": 40.04542295105742, "iter_time": 0.5151922988891602, "loss": 0.002610510913655162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.58386893589192, "step_time": 0.4627479400634765} +{"epoch": 0, "iter": 19433, "iter_tflops": 17.25224474266428, "iter_time": 1.044835891723633, "loss": 0.14273954927921295, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 18.49567781325257, "step_time": 0.9745933456420899} +{"epoch": 0, "iter": 19434, "iter_tflops": 21.556577755118955, "iter_time": 0.8362071533203125, "loss": 0.14256025850772858, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 25.81786616812512, "step_time": 0.6981895561218262} +{"epoch": 0, "iter": 19435, "iter_tflops": 32.49443508606531, "iter_time": 0.5547338943481446, "loss": 0.25646406412124634, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 34.82772787506053, "step_time": 0.5175693511962891} +{"epoch": 0, "iter": 19436, "iter_tflops": 33.96472827766141, "iter_time": 0.530720115661621, "loss": 0.20417916774749756, "lr": 3e-05, "seqlen": 7184.0, "step_tflops": 35.949349639926496, "step_time": 0.5014211578369141} +{"epoch": 0, "iter": 19437, "iter_tflops": 35.1164967979192, "iter_time": 0.5875043182373046, "loss": 0.10879162698984146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.83980935622673, "step_time": 0.5452219200134278} +{"epoch": 0, "iter": 19438, "iter_tflops": 14.390352816810626, "iter_time": 1.433675308227539, "loss": 0.05106639489531517, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.981996750575686, "step_time": 1.2148803119659424} +{"epoch": 0, "iter": 19439, "iter_tflops": 48.714078973220715, "iter_time": 0.42351398086547853, "loss": 0.06633368879556656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.11552705042093, "step_time": 0.38841925621032714} +{"epoch": 0, "iter": 19440, "iter_tflops": 54.19220523663571, "iter_time": 0.38070223236083983, "loss": 0.034455981105566025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.001990983493044, "step_time": 0.34966775131225586} +{"epoch": 0, "iter": 19441, "iter_tflops": 27.032035116007716, "iter_time": 0.7632090377807617, "loss": 0.03149406611919403, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.56776185588781, "step_time": 0.7221809539794922} +{"epoch": 0, "iter": 19442, "iter_tflops": 12.190282389539558, "iter_time": 1.6924212951660156, "loss": 0.032000135630369186, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.94781690333601, "step_time": 1.3802078018188477} +{"epoch": 0, "iter": 19443, "iter_tflops": 52.511983711396695, "iter_time": 0.39288352966308593, "loss": 0.04649494215846062, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.803050930801206, "step_time": 0.3508507328033448} +{"epoch": 0, "iter": 19444, "iter_tflops": 61.45966685387043, "iter_time": 0.3356850852966308, "loss": 0.04342666640877724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 67.14688243790053, "step_time": 0.30725318527221673} +{"epoch": 0, "iter": 19445, "iter_tflops": 45.33644058441606, "iter_time": 0.4550664596557617, "loss": 0.04348272085189819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21471241894287, "step_time": 0.4108575458526611} +{"epoch": 0, "iter": 19446, "iter_tflops": 51.04008480313742, "iter_time": 0.40421354293823236, "loss": 0.02862759307026863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.02903033848536, "step_time": 0.3682214984893799} +{"epoch": 0, "iter": 19447, "iter_tflops": 57.32099305466973, "iter_time": 0.3599221229553222, "loss": 0.03304968774318695, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.442015370539295, "step_time": 0.33040402984619144} +{"epoch": 0, "iter": 19448, "iter_tflops": 59.669092921636235, "iter_time": 0.3457584571838379, "loss": 0.036987051367759705, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 65.0142102604494, "step_time": 0.3173320636749268} +{"epoch": 0, "iter": 19449, "iter_tflops": 30.169570016830882, "iter_time": 0.6838378372192383, "loss": 0.015195761807262897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.94046515051312, "step_time": 0.6459233894348145} +{"epoch": 0, "iter": 19450, "iter_tflops": 13.458604595622754, "iter_time": 1.5329296112060546, "loss": 0.002873887773603201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.199000134428207, "step_time": 1.273602897644043} +{"epoch": 0, "iter": 19451, "iter_tflops": 38.43393531777144, "iter_time": 0.5367936782836914, "loss": 0.004706766456365585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.265696143768025, "step_time": 0.41877198791503906} +{"epoch": 0, "iter": 19452, "iter_tflops": 45.78519621308544, "iter_time": 0.4506062049865722, "loss": 0.00774715980514884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.77343148005717, "step_time": 0.4063364028930664} +{"epoch": 0, "iter": 19453, "iter_tflops": 18.678066474387776, "iter_time": 1.1045625915527344, "loss": 0.08022910356521606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.76715662236249, "step_time": 1.0437056732177734} +{"epoch": 0, "iter": 19454, "iter_tflops": 14.909623533044998, "iter_time": 1.3837434234619144, "loss": 0.038403093814849854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.324118369983896, "step_time": 1.0676343994140625} +{"epoch": 0, "iter": 19455, "iter_tflops": 46.58353690505129, "iter_time": 0.44288379287719726, "loss": 0.07113200426101685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.01895757758197, "step_time": 0.40438092994689945} +{"epoch": 0, "iter": 19456, "iter_tflops": 47.19288553435412, "iter_time": 0.43716533279418945, "loss": 0.07040252536535263, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.70829157018245, "step_time": 0.3989900436401367} +{"epoch": 0, "iter": 19457, "iter_tflops": 18.02776593408718, "iter_time": 1.1444065551757812, "loss": 0.5573064684867859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.30814417111835, "step_time": 1.0685176849365234} +{"epoch": 0, "iter": 19458, "iter_tflops": 17.345864886243753, "iter_time": 1.189395492553711, "loss": 0.46628403663635254, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.073074363369404, "step_time": 0.9790262756347656} +{"epoch": 0, "iter": 19459, "iter_tflops": 48.082219906216814, "iter_time": 0.4290794715881348, "loss": 0.38089942932128906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.07893246035873, "step_time": 0.39615046882629396} +{"epoch": 0, "iter": 19460, "iter_tflops": 47.68525334397411, "iter_time": 0.432651439666748, "loss": 0.4078174829483032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.418773505215505, "step_time": 0.40123659324646} +{"epoch": 0, "iter": 19461, "iter_tflops": 24.68908518202869, "iter_time": 0.8356362075805662, "loss": 0.14624668657779694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.949017515591436, "step_time": 0.7950626068115234} +{"epoch": 0, "iter": 19462, "iter_tflops": 22.797783768658995, "iter_time": 0.9049604873657228, "loss": 0.07592881470918655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.102230617576197, "step_time": 0.734144338607788} +{"epoch": 0, "iter": 19463, "iter_tflops": 41.54225143349566, "iter_time": 0.4966291618347167, "loss": 0.14266616106033325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.73691550865163, "step_time": 0.4510818729400634} +{"epoch": 0, "iter": 19464, "iter_tflops": 45.04844841335515, "iter_time": 0.45797567367553715, "loss": 0.08127274364233017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.17150491872749, "step_time": 0.41957417297363286} +{"epoch": 0, "iter": 19465, "iter_tflops": 27.149162182866096, "iter_time": 0.7599163970947267, "loss": 0.6727705597877502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.40232195518572, "step_time": 0.7016824569702149} +{"epoch": 0, "iter": 19466, "iter_tflops": 8.632928125939966, "iter_time": 2.3898141174316407, "loss": 0.4265497028827667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.322716944882284, "step_time": 1.9986107940673827} +{"epoch": 0, "iter": 19467, "iter_tflops": 12.477546065797563, "iter_time": 1.6534576110839845, "loss": 0.3534500300884247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.610518982129173, "step_time": 1.3216148376464842} +{"epoch": 0, "iter": 19468, "iter_tflops": 37.44113510763709, "iter_time": 0.551027458190918, "loss": 0.4214268922805786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.002191669942796, "step_time": 0.5031705055236817} +{"epoch": 0, "iter": 19469, "iter_tflops": 12.580135598365954, "iter_time": 1.2012626037597658, "loss": 0.248289093375206, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 13.404255686914384, "step_time": 1.1274066085815428} +{"epoch": 0, "iter": 19470, "iter_tflops": 10.806087049228207, "iter_time": 1.398475357055664, "loss": 0.26653531193733215, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 15.00533463670117, "step_time": 1.0071115913391115} +{"epoch": 0, "iter": 19471, "iter_tflops": 23.802791268404285, "iter_time": 0.6348854751586914, "loss": 0.22168698906898499, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.568830376145915, "step_time": 0.591033935546875} +{"epoch": 0, "iter": 19472, "iter_tflops": 23.069037190260556, "iter_time": 0.6550792007446289, "loss": 0.13852961361408234, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.796111485403493, "step_time": 0.6094522705078125} +{"epoch": 0, "iter": 19473, "iter_tflops": 31.514694729526678, "iter_time": 0.6546499557495117, "loss": 0.03421661630272865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.908169274581006, "step_time": 0.5910104694366456} +{"epoch": 0, "iter": 19474, "iter_tflops": 43.47290804150795, "iter_time": 0.4745735778808593, "loss": 0.04225137084722519, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.191713645987406, "step_time": 0.42810458374023436} +{"epoch": 0, "iter": 19475, "iter_tflops": 46.41309313006293, "iter_time": 0.44451020431518556, "loss": 0.062478207051754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.23694726073745, "step_time": 0.40266047477722167} +{"epoch": 0, "iter": 19476, "iter_tflops": 43.39704121060875, "iter_time": 0.47540322875976565, "loss": 0.02732519805431366, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.69673185855977, "step_time": 0.43254731941223146} +{"epoch": 0, "iter": 19477, "iter_tflops": 11.856947003236254, "iter_time": 1.0478964385986327, "loss": 0.07183542847633362, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 12.652101548053201, "step_time": 0.9820386352539063} +{"epoch": 0, "iter": 19478, "iter_tflops": 10.970825294483316, "iter_time": 1.1325358123779297, "loss": 0.027658946812152863, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 13.674910393211277, "step_time": 0.9085874919891358} +{"epoch": 0, "iter": 19479, "iter_tflops": 23.69664642163097, "iter_time": 0.5243295745849609, "loss": 0.059041827917099, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 26.198098237241783, "step_time": 0.474265438079834} +{"epoch": 0, "iter": 19480, "iter_tflops": 24.14111995795814, "iter_time": 0.514675895690918, "loss": 0.10309062153100967, "lr": 3e-05, "seqlen": 4992.0, "step_tflops": 26.740409109222213, "step_time": 0.46464706230163577} +{"epoch": 0, "iter": 19481, "iter_tflops": 16.61851690063966, "iter_time": 1.2414521484375, "loss": 0.2343391627073288, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.868061194238102, "step_time": 1.1546352615356443} +{"epoch": 0, "iter": 19482, "iter_tflops": 17.844934171047488, "iter_time": 1.1561316680908202, "loss": 0.14631806313991547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.567159564875592, "step_time": 0.9565976200103761} +{"epoch": 0, "iter": 19483, "iter_tflops": 39.562663472602054, "iter_time": 0.5214788818359375, "loss": 0.16897504031658173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.335735786459914, "step_time": 0.4760757637023926} +{"epoch": 0, "iter": 19484, "iter_tflops": 39.227693935604925, "iter_time": 0.5259318466186523, "loss": 0.17576076090335846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.82909056649205, "step_time": 0.48170748519897455} +{"epoch": 0, "iter": 19485, "iter_tflops": 19.29821126471538, "iter_time": 1.0690676574707032, "loss": 0.03234696015715599, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.62325767292958, "step_time": 1.0003799514770508} +{"epoch": 0, "iter": 19486, "iter_tflops": 34.51078465174422, "iter_time": 0.5978158340454102, "loss": 0.09325433522462845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.48623232546813, "step_time": 0.5224882774353027} +{"epoch": 0, "iter": 19487, "iter_tflops": 56.82484287952734, "iter_time": 0.36306468200683595, "loss": 0.06101004034280777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.83626921464549, "step_time": 0.3283309745788574} +{"epoch": 0, "iter": 19488, "iter_tflops": 54.92221105270825, "iter_time": 0.37564207839965824, "loss": 0.06317029148340225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.600880548283044, "step_time": 0.34615417289733885} +{"epoch": 0, "iter": 19489, "iter_tflops": 46.72434824307962, "iter_time": 0.44154909133911135, "loss": 0.2086193710565567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.01233886278872, "step_time": 0.40443339729309086} +{"epoch": 0, "iter": 19490, "iter_tflops": 43.93383706373076, "iter_time": 0.46959461975097655, "loss": 0.08714832365512848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.0394798207292, "step_time": 0.42946121788024905} +{"epoch": 0, "iter": 19491, "iter_tflops": 54.40171539951361, "iter_time": 0.37923608398437497, "loss": 0.13942982256412506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.29070613655895, "step_time": 0.34796504974365233} +{"epoch": 0, "iter": 19492, "iter_tflops": 49.41759308287855, "iter_time": 0.41748479080200196, "loss": 0.13469728827476501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.66385540617839, "step_time": 0.3844504528045654} +{"epoch": 0, "iter": 19493, "iter_tflops": 39.59625100801022, "iter_time": 0.5210365371704102, "loss": 0.40346473455429077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.742036780392674, "step_time": 0.48268859100341804} +{"epoch": 0, "iter": 19494, "iter_tflops": 45.62541225070306, "iter_time": 0.4521842651367188, "loss": 0.3654062747955322, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.63417662727213, "step_time": 0.4156630554199219} +{"epoch": 0, "iter": 19495, "iter_tflops": 48.51457916703123, "iter_time": 0.4252555389404297, "loss": 0.2853456437587738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.660776702463416, "step_time": 0.39177343750000004} +{"epoch": 0, "iter": 19496, "iter_tflops": 47.42119280013515, "iter_time": 0.43506061935424806, "loss": 0.4088326394557953, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.33167144577667, "step_time": 0.4019174308776855} +{"epoch": 0, "iter": 19497, "iter_tflops": 32.518383425659906, "iter_time": 0.6344440078735352, "loss": 0.624245822429657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.696098153114676, "step_time": 0.5946228713989257} +{"epoch": 0, "iter": 19498, "iter_tflops": 20.356349572759267, "iter_time": 1.0134967193603515, "loss": 0.6105673313140869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.91355061700256, "step_time": 0.8281073150634766} +{"epoch": 0, "iter": 19499, "iter_tflops": 45.09111866829254, "iter_time": 0.4575422859191895, "loss": 0.5497787594795227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.581650214834, "step_time": 0.42466843795776366} +{"epoch": 0, "iter": 19500, "iter_tflops": 44.91277846633051, "iter_time": 0.4593591003417969, "loss": 0.5175208449363708, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.28240536722423, "step_time": 0.4273004493713379} +{"epoch": 0, "iter": 19501, "iter_tflops": 30.266186667163854, "iter_time": 0.6816548690795899, "loss": 0.41548776626586914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.30970630822357, "step_time": 0.6385416603088379} +{"epoch": 0, "iter": 19502, "iter_tflops": 45.17483688333026, "iter_time": 0.45669436645507816, "loss": 0.7293496131896973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.931652569294044, "step_time": 0.4216308345794678} +{"epoch": 0, "iter": 19503, "iter_tflops": 40.13475594226455, "iter_time": 0.5140455703735352, "loss": 0.5386707782745361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.17164359987522, "step_time": 0.47788529205322267} +{"epoch": 0, "iter": 19504, "iter_tflops": 41.55102197279475, "iter_time": 0.4965243339538574, "loss": 0.5859121084213257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.79552199220228, "step_time": 0.4605615158081055} +{"epoch": 0, "iter": 19505, "iter_tflops": 43.72798222253938, "iter_time": 0.4718052940368653, "loss": 0.16867771744728088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.799349409515955, "step_time": 0.43161870956420895} +{"epoch": 0, "iter": 19506, "iter_tflops": 10.952134119487313, "iter_time": 1.8837509918212891, "loss": 0.15294069051742554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.082841275432093, "step_time": 1.5769581756591797} +{"epoch": 0, "iter": 19507, "iter_tflops": 12.936332897840128, "iter_time": 1.5948177642822265, "loss": 0.12275947630405426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.267987587185992, "step_time": 1.3512647552490236} +{"epoch": 0, "iter": 19508, "iter_tflops": 20.072529942791043, "iter_time": 1.0278272628784182, "loss": 0.15341529250144958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.904323381850052, "step_time": 0.7668319034576416} +{"epoch": 0, "iter": 19509, "iter_tflops": 14.621690126310956, "iter_time": 1.0279464416503907, "loss": 0.16843388974666595, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.259714275722402, "step_time": 0.9849669570922852} +{"epoch": 0, "iter": 19510, "iter_tflops": 11.003577879431523, "iter_time": 1.3659479217529298, "loss": 0.19533348083496094, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.216460015625836, "step_time": 1.1372420692443848} +{"epoch": 0, "iter": 19511, "iter_tflops": 22.980166445173563, "iter_time": 0.6540559387207032, "loss": 0.2594396471977234, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.76537716901425, "step_time": 0.6069083557128906} +{"epoch": 0, "iter": 19512, "iter_tflops": 22.040787629167085, "iter_time": 0.6819318161010742, "loss": 0.24700728058815002, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 23.619377880780142, "step_time": 0.6363552169799805} +{"epoch": 0, "iter": 19513, "iter_tflops": 28.35034929178342, "iter_time": 0.7277192001342774, "loss": 0.033608708530664444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.563621059375404, "step_time": 0.6750212440490723} +{"epoch": 0, "iter": 19514, "iter_tflops": 46.00387952178023, "iter_time": 0.448464210510254, "loss": 0.08852007985115051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.2918885703827, "step_time": 0.4102270584106445} +{"epoch": 0, "iter": 19515, "iter_tflops": 50.93243957976603, "iter_time": 0.40506784439086907, "loss": 0.09211144596338272, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.62170489211332, "step_time": 0.37091803550720215} +{"epoch": 0, "iter": 19516, "iter_tflops": 50.95964272831745, "iter_time": 0.40485161209106446, "loss": 0.07253213971853256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.10768852365151, "step_time": 0.37437776947021484} +{"epoch": 0, "iter": 19517, "iter_tflops": 25.587221560563723, "iter_time": 0.7464159393310548, "loss": 0.0025162813253700733, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 27.093040423037323, "step_time": 0.7049304809570311} +{"epoch": 0, "iter": 19518, "iter_tflops": 16.428325511509136, "iter_time": 1.1625475769042968, "loss": 0.0038711768575012684, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 21.696912329186127, "step_time": 0.8802501354217529} +{"epoch": 0, "iter": 19519, "iter_tflops": 45.2676251099561, "iter_time": 0.42190660476684566, "loss": 0.006449687294661999, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 50.1069911495343, "step_time": 0.38115858840942385} +{"epoch": 0, "iter": 19520, "iter_tflops": 44.93214131943549, "iter_time": 0.4250567512512207, "loss": 0.007661346346139908, "lr": 3e-05, "seqlen": 7600.0, "step_tflops": 49.510693354884836, "step_time": 0.38574919319152834} +{"epoch": 0, "iter": 19521, "iter_tflops": 22.426287245897942, "iter_time": 0.9199513626098632, "loss": 0.015867646783590317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.24815331029692, "step_time": 0.8508315353393556} +{"epoch": 0, "iter": 19522, "iter_tflops": 28.15191590301888, "iter_time": 0.732848648071289, "loss": 0.029236426576972008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.44906664413387, "step_time": 0.4539387722015381} +{"epoch": 0, "iter": 19523, "iter_tflops": 52.50038278107904, "iter_time": 0.39297034454345703, "loss": 0.023706631734967232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.543412213080956, "step_time": 0.3585309371948242} +{"epoch": 0, "iter": 19524, "iter_tflops": 53.757119104016745, "iter_time": 0.3837834663391113, "loss": 0.037411049008369446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.5033916792159, "step_time": 0.35264781951904295} +{"epoch": 0, "iter": 19525, "iter_tflops": 24.291436734057033, "iter_time": 0.8493154907226563, "loss": 0.08466468751430511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.523724188941078, "step_time": 0.8083104705810547} +{"epoch": 0, "iter": 19526, "iter_tflops": 14.509332395584655, "iter_time": 1.4219188690185547, "loss": 0.08811594545841217, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.15664925807773, "step_time": 1.023537853240967} +{"epoch": 0, "iter": 19527, "iter_tflops": 48.507449569973154, "iter_time": 0.425318042755127, "loss": 0.11109593510627747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.78926120482037, "step_time": 0.39081989479064944} +{"epoch": 0, "iter": 19528, "iter_tflops": 51.200794333737555, "iter_time": 0.40294479370117187, "loss": 0.1340666264295578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.613649161143776, "step_time": 0.37097176361083983} +{"epoch": 0, "iter": 19529, "iter_tflops": 28.665674499903922, "iter_time": 0.7197142181396484, "loss": 0.1729935258626938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.230416356861415, "step_time": 0.682461441040039} +{"epoch": 0, "iter": 19530, "iter_tflops": 13.564623318026953, "iter_time": 1.520948501586914, "loss": 0.15767130255699158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.870561031976585, "step_time": 1.387378288269043} +{"epoch": 0, "iter": 19531, "iter_tflops": 13.076401863540955, "iter_time": 1.5777347412109375, "loss": 0.2321515530347824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.368340466081587, "step_time": 1.4358717041015625} +{"epoch": 0, "iter": 19532, "iter_tflops": 36.167764073137135, "iter_time": 0.5704276733398438, "loss": 0.21384507417678833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.783176581950144, "step_time": 0.4937655582427978} +{"epoch": 0, "iter": 19533, "iter_tflops": 15.816176376593242, "iter_time": 0.9503127670288086, "loss": 0.2620050013065338, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 16.5974467472855, "step_time": 0.9055799102783203} +{"epoch": 0, "iter": 19534, "iter_tflops": 9.996246692089256, "iter_time": 1.5035957794189454, "loss": 0.12793436646461487, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 11.79852195822734, "step_time": 1.2739150199890137} +{"epoch": 0, "iter": 19535, "iter_tflops": 21.402715080126516, "iter_time": 0.7022620391845702, "loss": 0.18742595613002777, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 22.963322391782363, "step_time": 0.6545357017517089} +{"epoch": 0, "iter": 19536, "iter_tflops": 22.438470632553386, "iter_time": 0.6698457565307616, "loss": 0.20762549340724945, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 24.040902995811326, "step_time": 0.6251975784301758} +{"epoch": 0, "iter": 19537, "iter_tflops": 26.652013603217316, "iter_time": 0.7740913619995118, "loss": 0.5114797353744507, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.022379369201214, "step_time": 0.710868438720703} +{"epoch": 0, "iter": 19538, "iter_tflops": 41.29769203296898, "iter_time": 0.4995701332092285, "loss": 0.46307528018951416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.94950008187503, "step_time": 0.4589838256835937} +{"epoch": 0, "iter": 19539, "iter_tflops": 44.603577276747785, "iter_time": 0.4625434722900391, "loss": 0.462585985660553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.042978683739975, "step_time": 0.42942994117736816} +{"epoch": 0, "iter": 19540, "iter_tflops": 43.210847493988155, "iter_time": 0.4774517211914063, "loss": 0.5751188397407532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.72819632345145, "step_time": 0.4415127296447754} +{"epoch": 0, "iter": 19541, "iter_tflops": 29.187284564814203, "iter_time": 0.7068521041870117, "loss": 0.4107343554496765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.96695004070811, "step_time": 0.6662294311523437} +{"epoch": 0, "iter": 19542, "iter_tflops": 15.782467241084387, "iter_time": 1.3072159881591796, "loss": 0.4882190227508545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.94281975753251, "step_time": 1.0891247329711915} +{"epoch": 0, "iter": 19543, "iter_tflops": 39.47714377671606, "iter_time": 0.5226085662841797, "loss": 0.45076704025268555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.238468084643365, "step_time": 0.4771467266082763} +{"epoch": 0, "iter": 19544, "iter_tflops": 39.446430552034926, "iter_time": 0.5230154724121093, "loss": 0.5723163485527039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11489616648669, "step_time": 0.47851428031921384} +{"epoch": 0, "iter": 19545, "iter_tflops": 23.212292980743232, "iter_time": 0.8888003234863281, "loss": 0.5933484435081482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.706656629399166, "step_time": 0.8350419006347655} +{"epoch": 0, "iter": 19546, "iter_tflops": 9.860685185809825, "iter_time": 2.0922575988769534, "loss": 0.4909554719924927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.628385045374143, "step_time": 1.513832595825195} +{"epoch": 0, "iter": 19547, "iter_tflops": 14.747525822670838, "iter_time": 1.398952865600586, "loss": 0.5973817706108093, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.168745609394414, "step_time": 1.1355265769958496} +{"epoch": 0, "iter": 19548, "iter_tflops": 28.646600631033543, "iter_time": 0.7201934280395509, "loss": 0.5015335083007812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.10664767597969, "step_time": 0.5876691417694092} +{"epoch": 0, "iter": 19549, "iter_tflops": 17.31365568892627, "iter_time": 0.8610400848388672, "loss": 0.1724189668893814, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 18.203317038788352, "step_time": 0.8189579696655275} +{"epoch": 0, "iter": 19550, "iter_tflops": 9.078257803500163, "iter_time": 1.642137939453125, "loss": 0.1748945564031601, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 11.294235358057836, "step_time": 1.3199434127807619} +{"epoch": 0, "iter": 19551, "iter_tflops": 21.93685032716957, "iter_time": 0.6795757522583007, "loss": 0.28513580560684204, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.555084400407654, "step_time": 0.6328889045715332} +{"epoch": 0, "iter": 19552, "iter_tflops": 22.258125921593248, "iter_time": 0.6697667007446289, "loss": 0.20034350454807281, "lr": 3e-05, "seqlen": 5968.0, "step_tflops": 23.82376527097485, "step_time": 0.625751277923584} +{"epoch": 0, "iter": 19553, "iter_tflops": 19.019291016517478, "iter_time": 1.0847456665039064, "loss": 0.6034085154533386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.432212410377787, "step_time": 1.0097337036132812} +{"epoch": 0, "iter": 19554, "iter_tflops": 18.57551989177589, "iter_time": 1.1106603546142577, "loss": 0.586777925491333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.9213182210251, "step_time": 0.986127799987793} +{"epoch": 0, "iter": 19555, "iter_tflops": 37.15523726831923, "iter_time": 0.5552674407958984, "loss": 0.5163190960884094, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58916456001608, "step_time": 0.5082906665802002} +{"epoch": 0, "iter": 19556, "iter_tflops": 40.89361410437179, "iter_time": 0.5045064849853516, "loss": 0.5900861024856567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.565071433651845, "step_time": 0.4629431266784668} +{"epoch": 0, "iter": 19557, "iter_tflops": 18.110375757405578, "iter_time": 1.1391863861083984, "loss": 0.33439940214157104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.207601585622285, "step_time": 1.0741108627319336} +{"epoch": 0, "iter": 19558, "iter_tflops": 15.145176302254685, "iter_time": 1.3622220764160156, "loss": 0.27600881457328796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.497149441627464, "step_time": 1.1153661041259766} +{"epoch": 0, "iter": 19559, "iter_tflops": 43.04972818751151, "iter_time": 0.4792386474609374, "loss": 0.2361563742160797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1433583850525, "step_time": 0.4376246032714844} +{"epoch": 0, "iter": 19560, "iter_tflops": 41.62310146916686, "iter_time": 0.495664493560791, "loss": 0.2727625072002411, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.20564038602073, "step_time": 0.45638317108154297} +{"epoch": 0, "iter": 19561, "iter_tflops": 21.21286747046894, "iter_time": 0.972574478149414, "loss": 0.006975874304771423, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.12388192511366, "step_time": 0.8921985321044922} +{"epoch": 0, "iter": 19562, "iter_tflops": 14.994158934333594, "iter_time": 1.3759420318603515, "loss": 0.00796272698789835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.377184463919754, "step_time": 1.0647105903625487} +{"epoch": 0, "iter": 19563, "iter_tflops": 54.53185753246039, "iter_time": 0.37833102416992187, "loss": 0.004089732654392719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.6988944505533, "step_time": 0.34558585548400883} +{"epoch": 0, "iter": 19564, "iter_tflops": 59.16444206384347, "iter_time": 0.34870764923095704, "loss": 0.0016806769417598844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.76934143886038, "step_time": 0.31853177833557134} +{"epoch": 0, "iter": 19565, "iter_tflops": 35.12270610866323, "iter_time": 0.5014973068237304, "loss": 0.044403497129678726, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 38.17001154848802, "step_time": 0.46146023559570315} +{"epoch": 0, "iter": 19566, "iter_tflops": 13.26345195720221, "iter_time": 1.328005905151367, "loss": 0.05182154104113579, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 16.13792235632292, "step_time": 1.0914628372192383} +{"epoch": 0, "iter": 19567, "iter_tflops": 44.49935430250419, "iter_time": 0.39582467651367187, "loss": 0.04900451377034187, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 48.562971191212625, "step_time": 0.3627031478881836} +{"epoch": 0, "iter": 19568, "iter_tflops": 44.35434219963461, "iter_time": 0.3971187858581543, "loss": 0.04793993756175041, "lr": 3e-05, "seqlen": 7024.0, "step_tflops": 48.443649976409894, "step_time": 0.3635965194702148} +{"epoch": 0, "iter": 19569, "iter_tflops": 26.08407618408265, "iter_time": 0.7909459152221681, "loss": 0.28612956404685974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.59857610483426, "step_time": 0.7475419540405273} +{"epoch": 0, "iter": 19570, "iter_tflops": 26.90992255473858, "iter_time": 0.766672348022461, "loss": 0.22884710133075714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.59101586173188, "step_time": 0.6141848640441895} +{"epoch": 0, "iter": 19571, "iter_tflops": 48.35423594435616, "iter_time": 0.42666569137573235, "loss": 0.21304668486118317, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70407185688047, "step_time": 0.3914516048431396} +{"epoch": 0, "iter": 19572, "iter_tflops": 50.94398407599575, "iter_time": 0.40497605133056636, "loss": 0.32040950655937195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.18459059845356, "step_time": 0.37385605812072753} +{"epoch": 0, "iter": 19573, "iter_tflops": 31.085269034303185, "iter_time": 0.6636935806274414, "loss": 0.5146198868751526, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.22099144628215, "step_time": 0.6210258216857911} +{"epoch": 0, "iter": 19574, "iter_tflops": 24.189092722269894, "iter_time": 0.8529089431762695, "loss": 0.5234735012054443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.061152970399643, "step_time": 0.6863041324615479} +{"epoch": 0, "iter": 19575, "iter_tflops": 42.4914120383079, "iter_time": 0.48553560638427734, "loss": 0.5414878129959106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70947148138662, "step_time": 0.45135270309448244} +{"epoch": 0, "iter": 19576, "iter_tflops": 43.59487152494231, "iter_time": 0.47324588394165035, "loss": 0.457611083984375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.01950806999234, "step_time": 0.4387773151397704} +{"epoch": 0, "iter": 19577, "iter_tflops": 25.312645608782315, "iter_time": 0.8150508575439454, "loss": 0.38341057300567627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.769030506188653, "step_time": 0.7707075347900391} +{"epoch": 0, "iter": 19578, "iter_tflops": 16.143201859415765, "iter_time": 1.2780050506591798, "loss": 0.5431379079818726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.823614100587797, "step_time": 1.0960219116210936} +{"epoch": 0, "iter": 19579, "iter_tflops": 49.066820507312, "iter_time": 0.42046933746337883, "loss": 0.5680738091468811, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.32232208973091, "step_time": 0.3869128856658936} +{"epoch": 0, "iter": 19580, "iter_tflops": 49.08104295817747, "iter_time": 0.4203474960327148, "loss": 0.5522533059120178, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.03370853990233, "step_time": 0.3890184955596923} +{"epoch": 0, "iter": 19581, "iter_tflops": 27.086192969922678, "iter_time": 0.7616830291748048, "loss": 0.2715866267681122, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.624512705918953, "step_time": 0.7207491607666016} +{"epoch": 0, "iter": 19582, "iter_tflops": 15.363827426172422, "iter_time": 1.3428355407714845, "loss": 0.294552743434906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.473788405488584, "step_time": 1.1167765407562256} +{"epoch": 0, "iter": 19583, "iter_tflops": 39.00643696187684, "iter_time": 0.5289151000976563, "loss": 0.3020663261413574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.993212591473686, "step_time": 0.4798686180114746} +{"epoch": 0, "iter": 19584, "iter_tflops": 38.281983985020545, "iter_time": 0.5389243545532227, "loss": 0.3701055943965912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.65175383504572, "step_time": 0.49532352447509764} +{"epoch": 0, "iter": 19585, "iter_tflops": 24.645306035193727, "iter_time": 0.8371206054687499, "loss": 0.1257527619600296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.588689506455328, "step_time": 0.7759349517822266} +{"epoch": 0, "iter": 19586, "iter_tflops": 40.57560848398482, "iter_time": 0.5084604835510254, "loss": 0.20053300261497498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.096364599305765, "step_time": 0.42895328330993654} +{"epoch": 0, "iter": 19587, "iter_tflops": 49.48474450629137, "iter_time": 0.4169182586669922, "loss": 0.16494904458522797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.57029365474169, "step_time": 0.38512190437316896} +{"epoch": 0, "iter": 19588, "iter_tflops": 47.63932524056572, "iter_time": 0.4330685501098633, "loss": 0.1634891778230667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.94493980011795, "step_time": 0.39717234420776365} +{"epoch": 0, "iter": 19589, "iter_tflops": 33.7014866164668, "iter_time": 0.612171615600586, "loss": 0.1033027172088623, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.99080753104706, "step_time": 0.5732323036193848} +{"epoch": 0, "iter": 19590, "iter_tflops": 10.864454433144779, "iter_time": 1.8989534759521487, "loss": 0.18862596154212952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.382180670688417, "step_time": 1.666192253112793} +{"epoch": 0, "iter": 19591, "iter_tflops": 15.958838059873715, "iter_time": 1.2927691497802734, "loss": 0.12385211139917374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.16465116706086, "step_time": 1.076518081665039} +{"epoch": 0, "iter": 19592, "iter_tflops": 40.31299456967969, "iter_time": 0.5117727851867675, "loss": 0.09257207065820694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.99581505520672, "step_time": 0.4689330902099609} +{"epoch": 0, "iter": 19593, "iter_tflops": 16.017854832587354, "iter_time": 0.8644927749633788, "loss": 0.17342332005500793, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 17.233690114513898, "step_time": 0.8035028877258301} +{"epoch": 0, "iter": 19594, "iter_tflops": 6.391544136711564, "iter_time": 2.1665061645507815, "loss": 0.19808271527290344, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 7.710000497352547, "step_time": 1.7960206069946285} +{"epoch": 0, "iter": 19595, "iter_tflops": 9.26546257230025, "iter_time": 1.4945092773437498, "loss": 0.2561627924442291, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 12.204818221031724, "step_time": 1.134578125} +{"epoch": 0, "iter": 19596, "iter_tflops": 21.242100858949104, "iter_time": 0.6518808975219726, "loss": 0.3484465479850769, "lr": 3e-05, "seqlen": 5552.0, "step_tflops": 22.74125737398871, "step_time": 0.6089073944091796} +{"epoch": 0, "iter": 19597, "iter_tflops": 10.947086152438622, "iter_time": 1.3468790130615234, "loss": 0.21150250732898712, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 11.597644304315606, "step_time": 1.2713271942138673} +{"epoch": 0, "iter": 19598, "iter_tflops": 11.458698113906683, "iter_time": 1.2867430877685546, "loss": 0.19414357841014862, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 17.219539379955044, "step_time": 0.8562598724365235} +{"epoch": 0, "iter": 19599, "iter_tflops": 26.286087173766816, "iter_time": 0.5609203262329102, "loss": 0.16457806527614594, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 27.99093771178881, "step_time": 0.5267562217712403} +{"epoch": 0, "iter": 19600, "iter_tflops": 27.43595881816804, "iter_time": 0.5374115295410156, "loss": 0.24655647575855255, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 29.179104898412344, "step_time": 0.5053068161010742} +{"epoch": 0, "iter": 19601, "iter_tflops": 36.08591916867613, "iter_time": 0.5717214355468749, "loss": 0.40667158365249634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.669444876946656, "step_time": 0.5335244293212891} +{"epoch": 0, "iter": 19602, "iter_tflops": 33.66549694928814, "iter_time": 0.6128260498046875, "loss": 0.45852217078208923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.2729512483879, "step_time": 0.5535138168334961} +{"epoch": 0, "iter": 19603, "iter_tflops": 38.91142648086624, "iter_time": 0.530206558227539, "loss": 0.4702017605304718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.51050549074075, "step_time": 0.4853175296783447} +{"epoch": 0, "iter": 19604, "iter_tflops": 41.381173996907556, "iter_time": 0.49856230545043945, "loss": 0.41507595777511597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.93842867699176, "step_time": 0.45909690475463866} +{"epoch": 0, "iter": 19605, "iter_tflops": 19.672834989051996, "iter_time": 1.048709732055664, "loss": 0.6164268255233765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.822338055811926, "step_time": 0.9908154144287108} +{"epoch": 0, "iter": 19606, "iter_tflops": 21.357187031173247, "iter_time": 0.9660023803710938, "loss": 0.5746662616729736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.87260714606448, "step_time": 0.8642161865234375} +{"epoch": 0, "iter": 19607, "iter_tflops": 38.807911144995174, "iter_time": 0.5316208190917969, "loss": 0.7130167484283447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.34809174001211, "step_time": 0.48717882347106933} +{"epoch": 0, "iter": 19608, "iter_tflops": 38.03875272343521, "iter_time": 0.5423703994750976, "loss": 0.4512619078159332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.00717801206465, "step_time": 0.5031093215942383} +{"epoch": 0, "iter": 19609, "iter_tflops": 13.90328819466009, "iter_time": 1.4839002990722656, "loss": 0.16733883321285248, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.833524948637793, "step_time": 1.3908422698974612} +{"epoch": 0, "iter": 19610, "iter_tflops": 24.067950451177676, "iter_time": 0.8572019271850587, "loss": 0.1076444461941719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.17753087837679, "step_time": 0.7070883960723877} +{"epoch": 0, "iter": 19611, "iter_tflops": 42.129148073459724, "iter_time": 0.48971067428588866, "loss": 0.19741342961788177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.39991069504975, "step_time": 0.4446364917755127} +{"epoch": 0, "iter": 19612, "iter_tflops": 42.46208229905359, "iter_time": 0.4858709793090821, "loss": 0.21640385687351227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.40534215919375, "step_time": 0.44458444976806644} +{"epoch": 0, "iter": 19613, "iter_tflops": 33.20469343099649, "iter_time": 0.6213306427001954, "loss": 0.4960039556026459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.53647513756313, "step_time": 0.5646711521148682} +{"epoch": 0, "iter": 19614, "iter_tflops": 34.2516304486421, "iter_time": 0.6023390197753906, "loss": 0.5976089239120483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.3239294222255, "step_time": 0.5527578105926514} +{"epoch": 0, "iter": 19615, "iter_tflops": 36.21510952702274, "iter_time": 0.5696819305419922, "loss": 0.46183568239212036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.411060581900564, "step_time": 0.5234848594665527} +{"epoch": 0, "iter": 19616, "iter_tflops": 39.66999157035149, "iter_time": 0.5200680084228515, "loss": 0.7328433990478516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.88421236922905, "step_time": 0.4810883159637451} +{"epoch": 0, "iter": 19617, "iter_tflops": 24.204421127071406, "iter_time": 0.8523688049316407, "loss": 0.5215266346931458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.16720704547084, "step_time": 0.7884331512451173} +{"epoch": 0, "iter": 19618, "iter_tflops": 25.189065618952412, "iter_time": 0.819049575805664, "loss": 0.6526916027069092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.36293898608372, "step_time": 0.6794827575683593} +{"epoch": 0, "iter": 19619, "iter_tflops": 35.97332687201604, "iter_time": 0.573510856628418, "loss": 0.5756366848945618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.842372860840854, "step_time": 0.5311491546630859} +{"epoch": 0, "iter": 19620, "iter_tflops": 40.480721806780856, "iter_time": 0.5096523132324219, "loss": 0.6203691959381104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.93976850749985, "step_time": 0.46953122901916505} +{"epoch": 0, "iter": 19621, "iter_tflops": 10.676169157319178, "iter_time": 1.2322477264404297, "loss": 0.019757729023694992, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 11.222485862062232, "step_time": 1.1722612380981445} +{"epoch": 0, "iter": 19622, "iter_tflops": 11.26425669368859, "iter_time": 1.1679141845703125, "loss": 0.009766009636223316, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 16.41176488806896, "step_time": 0.8016008796691896} +{"epoch": 0, "iter": 19623, "iter_tflops": 37.00320292683481, "iter_time": 0.3555282821655274, "loss": 0.0014674034900963306, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 40.58229520512484, "step_time": 0.3241730194091797} +{"epoch": 0, "iter": 19624, "iter_tflops": 35.90090377403309, "iter_time": 0.3664444007873535, "loss": 0.003205015789717436, "lr": 3e-05, "seqlen": 5280.0, "step_tflops": 39.442976766747265, "step_time": 0.33353682327270506} +{"epoch": 0, "iter": 19625, "iter_tflops": 25.612170133722685, "iter_time": 0.8055191497802734, "loss": 0.18048860132694244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.8519076948503, "step_time": 0.7683287811279297} +{"epoch": 0, "iter": 19626, "iter_tflops": 11.899484513081099, "iter_time": 1.7337804412841797, "loss": 0.15021313726902008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.737813368922023, "step_time": 1.3998747978210448} +{"epoch": 0, "iter": 19627, "iter_tflops": 38.982946863473956, "iter_time": 0.5292338104248047, "loss": 0.16327998042106628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66193711583933, "step_time": 0.4835948600769042} +{"epoch": 0, "iter": 19628, "iter_tflops": 37.09745994798835, "iter_time": 0.5561322402954101, "loss": 0.11143846809864044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.58799241707391, "step_time": 0.5083053455352783} +{"epoch": 0, "iter": 19629, "iter_tflops": 21.645002138611993, "iter_time": 0.9531573791503907, "loss": 0.4340294599533081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.24640985953051, "step_time": 0.8874959030151368} +{"epoch": 0, "iter": 19630, "iter_tflops": 21.60129458101252, "iter_time": 0.9550859756469727, "loss": 0.4149216115474701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.2494698250636, "step_time": 0.7859622936248779} +{"epoch": 0, "iter": 19631, "iter_tflops": 47.789451618296766, "iter_time": 0.4317081031799316, "loss": 0.36777761578559875, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.71279227521232, "step_time": 0.39895531845092774} +{"epoch": 0, "iter": 19632, "iter_tflops": 49.09572691097956, "iter_time": 0.4202217750549317, "loss": 0.3360873758792877, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.94089119128522, "step_time": 0.389700532913208} +{"epoch": 0, "iter": 19633, "iter_tflops": 39.57147845501286, "iter_time": 0.5213627166748047, "loss": 0.3593408763408661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.01645819151169, "step_time": 0.47960930252075196} +{"epoch": 0, "iter": 19634, "iter_tflops": 45.42671524354825, "iter_time": 0.4541621246337891, "loss": 0.38355162739753723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.86130036035995, "step_time": 0.4056344089508057} +{"epoch": 0, "iter": 19635, "iter_tflops": 45.42221559490659, "iter_time": 0.45420711517333984, "loss": 0.23230071365833282, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.033579318284126, "step_time": 0.42075438499450685} +{"epoch": 0, "iter": 19636, "iter_tflops": 43.76934018777945, "iter_time": 0.47135948181152343, "loss": 0.372488409280777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.10687232797965, "step_time": 0.43796356010437015} +{"epoch": 0, "iter": 19637, "iter_tflops": 34.391210363630684, "iter_time": 0.5998943710327147, "loss": 0.06905537098646164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.723219304219974, "step_time": 0.5617996978759765} +{"epoch": 0, "iter": 19638, "iter_tflops": 9.437197484067935, "iter_time": 2.1861462097167967, "loss": 0.1322237253189087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.7202299949882, "step_time": 1.6219119873046877} +{"epoch": 0, "iter": 19639, "iter_tflops": 12.263030896342553, "iter_time": 1.6823812713623048, "loss": 0.07330111414194107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.954509732376247, "step_time": 1.3795900955200195} +{"epoch": 0, "iter": 19640, "iter_tflops": 24.296395035926775, "iter_time": 0.8491421661376953, "loss": 0.10091421753168106, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.63882563155522, "step_time": 0.746453332901001} +{"epoch": 0, "iter": 19641, "iter_tflops": 20.67355715741017, "iter_time": 0.7388939971923829, "loss": 0.25706636905670166, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 22.569052558312674, "step_time": 0.6768368873596191} +{"epoch": 0, "iter": 19642, "iter_tflops": 22.45598145833893, "iter_time": 0.6802449188232422, "loss": 0.2891116738319397, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 24.157691892855897, "step_time": 0.6323272666931152} +{"epoch": 0, "iter": 19643, "iter_tflops": 23.5689694301651, "iter_time": 0.6481219863891602, "loss": 0.3049888610839844, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.40496723881803, "step_time": 0.601282699584961} +{"epoch": 0, "iter": 19644, "iter_tflops": 23.773802057922634, "iter_time": 0.6425378341674804, "loss": 0.33630988001823425, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 25.544421356388018, "step_time": 0.598000129699707} +{"epoch": 0, "iter": 19645, "iter_tflops": 31.32484719070774, "iter_time": 0.6586175308227539, "loss": 0.0015685890102759004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.346988089903974, "step_time": 0.6006667442321778} +{"epoch": 0, "iter": 19646, "iter_tflops": 46.77910410370339, "iter_time": 0.4410322494506836, "loss": 0.0025458119343966246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.981840101450466, "step_time": 0.4046753406524658} +{"epoch": 0, "iter": 19647, "iter_tflops": 53.4087911135119, "iter_time": 0.3862864723205567, "loss": 0.014875675551593304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.34814483825767, "step_time": 0.35358610916137695} +{"epoch": 0, "iter": 19648, "iter_tflops": 63.85225532072997, "iter_time": 0.32310673141479496, "loss": 0.005253437906503677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 70.06046035367925, "step_time": 0.29447556304931644} +{"epoch": 0, "iter": 19649, "iter_tflops": 47.49585825049393, "iter_time": 0.43437668609619146, "loss": 0.12639017403125763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.29577124854388, "step_time": 0.39450787353515626} +{"epoch": 0, "iter": 19650, "iter_tflops": 49.43182762161907, "iter_time": 0.4173645706176758, "loss": 0.058287639170885086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.43252580428324, "step_time": 0.37902142524719235} +{"epoch": 0, "iter": 19651, "iter_tflops": 53.62425631321826, "iter_time": 0.3847343521118164, "loss": 0.07440105080604553, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.365833013843115, "step_time": 0.35347895240783694} +{"epoch": 0, "iter": 19652, "iter_tflops": 52.95599431155172, "iter_time": 0.3895893898010254, "loss": 0.07486826181411743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.68442050777317, "step_time": 0.35765451622009276} +{"epoch": 0, "iter": 19653, "iter_tflops": 24.34367105846629, "iter_time": 0.8474931106567384, "loss": 0.2811686098575592, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.941975547799686, "step_time": 0.7952784271240234} +{"epoch": 0, "iter": 19654, "iter_tflops": 15.027691445591836, "iter_time": 1.372871780395508, "loss": 0.35403352975845337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.12218343819958, "step_time": 1.0252909965515136} +{"epoch": 0, "iter": 19655, "iter_tflops": 45.86136554007918, "iter_time": 0.4498578109741211, "loss": 0.25182273983955383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.25901133449082, "step_time": 0.41049541091918945} +{"epoch": 0, "iter": 19656, "iter_tflops": 43.109019350024845, "iter_time": 0.4785795135498047, "loss": 0.20301149785518646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.58216591296809, "step_time": 0.44289682769775385} +{"epoch": 0, "iter": 19657, "iter_tflops": 50.36497777597149, "iter_time": 0.4096317405700683, "loss": 0.05039113387465477, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.53330211674703, "step_time": 0.3715084953308106} +{"epoch": 0, "iter": 19658, "iter_tflops": 53.39221655405129, "iter_time": 0.38640638732910154, "loss": 0.04020477458834648, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.64825926200649, "step_time": 0.351776741027832} +{"epoch": 0, "iter": 19659, "iter_tflops": 54.05892347874858, "iter_time": 0.38164085006713866, "loss": 0.022147290408611298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.76493468859318, "step_time": 0.3510783023834228} +{"epoch": 0, "iter": 19660, "iter_tflops": 54.32949937503838, "iter_time": 0.3797401733398438, "loss": 0.018630506470799446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.19101765455863, "step_time": 0.34855108642578125} +{"epoch": 0, "iter": 19661, "iter_tflops": 2.2377593091500447, "iter_time": 0.7179230194091797, "loss": 0.31625422835350037, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 2.370772871664056, "step_time": 0.6776435394287109} +{"epoch": 0, "iter": 19662, "iter_tflops": 1.4827550712877426, "iter_time": 1.0834823303222654, "loss": 0.2530806362628937, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.786949488639847, "step_time": 0.8990399169921875} +{"epoch": 0, "iter": 19663, "iter_tflops": 3.1197865766269652, "iter_time": 0.5149515457153321, "loss": 0.2951864004135132, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.411092885305921, "step_time": 0.47097483825683595} +{"epoch": 0, "iter": 19664, "iter_tflops": 3.2519053244746945, "iter_time": 0.49403004074096674, "loss": 0.124300517141819, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.549360537958247, "step_time": 0.45262770652770995} +{"epoch": 0, "iter": 19665, "iter_tflops": 18.757285196577335, "iter_time": 1.0998976287841797, "loss": 0.5513218641281128, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.307517591549566, "step_time": 1.0159337997436522} +{"epoch": 0, "iter": 19666, "iter_tflops": 28.207814726773762, "iter_time": 0.7313963775634763, "loss": 0.39678847789764404, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.75214683197311, "step_time": 0.5936638565063477} +{"epoch": 0, "iter": 19667, "iter_tflops": 47.24809146011589, "iter_time": 0.43665453720092773, "loss": 0.48070576786994934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.82245869327745, "step_time": 0.40594441986083984} +{"epoch": 0, "iter": 19668, "iter_tflops": 46.15016763118547, "iter_time": 0.44704265594482423, "loss": 0.5769950747489929, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.29554148651963, "step_time": 0.4185184478759766} +{"epoch": 0, "iter": 19669, "iter_tflops": 23.081806498796286, "iter_time": 0.8938249053955079, "loss": 0.1784997284412384, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.23901913674425, "step_time": 0.8511521606445313} +{"epoch": 0, "iter": 19670, "iter_tflops": 10.372378879574624, "iter_time": 1.9890416412353515, "loss": 0.17808382213115692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.978368376766781, "step_time": 1.5896523284912107} +{"epoch": 0, "iter": 19671, "iter_tflops": 13.741828975440168, "iter_time": 1.5013353424072264, "loss": 0.14485576748847961, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.808072457248574, "step_time": 1.305098617553711} +{"epoch": 0, "iter": 19672, "iter_tflops": 27.366557365359107, "iter_time": 0.753879753112793, "loss": 0.161319300532341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.021260055256022, "step_time": 0.6650630397796631} +{"epoch": 0, "iter": 19673, "iter_tflops": 15.353558600113853, "iter_time": 0.9523483963012696, "loss": 0.2401789426803589, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 16.03873991018262, "step_time": 0.9116636962890625} +{"epoch": 0, "iter": 19674, "iter_tflops": 10.963194178744898, "iter_time": 1.3337296295166015, "loss": 0.1334049552679062, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 12.81257719670893, "step_time": 1.1412174682617187} +{"epoch": 0, "iter": 19675, "iter_tflops": 22.811412322202845, "iter_time": 0.6409921798706055, "loss": 0.2149064689874649, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 24.577326357456066, "step_time": 0.5949360275268555} +{"epoch": 0, "iter": 19676, "iter_tflops": 23.337504533934737, "iter_time": 0.6265424346923828, "loss": 0.2755456268787384, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 25.062315276590066, "step_time": 0.5834232292175293} +{"epoch": 0, "iter": 19677, "iter_tflops": 20.726287975726493, "iter_time": 0.9954070663452148, "loss": 0.11734449863433838, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.365449741614363, "step_time": 0.9224537734985352} +{"epoch": 0, "iter": 19678, "iter_tflops": 20.298209959535413, "iter_time": 1.0163996505737305, "loss": 0.13040319085121155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.948621912687432, "step_time": 0.8990123062133788} +{"epoch": 0, "iter": 19679, "iter_tflops": 49.70987220499199, "iter_time": 0.4150301055908203, "loss": 0.11852247267961502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.21280091524434, "step_time": 0.3805576019287109} +{"epoch": 0, "iter": 19680, "iter_tflops": 48.39576493830391, "iter_time": 0.42629956436157224, "loss": 0.11532288044691086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.46152304172365, "step_time": 0.39326142883300785} +{"epoch": 0, "iter": 19681, "iter_tflops": 36.52398272713648, "iter_time": 0.5648642883300781, "loss": 0.11914712935686111, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.155000243351935, "step_time": 0.5269082717895508} +{"epoch": 0, "iter": 19682, "iter_tflops": 21.305254827815947, "iter_time": 0.9683570404052733, "loss": 0.13795870542526245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.605122197743846, "step_time": 0.7212377338409424} +{"epoch": 0, "iter": 19683, "iter_tflops": 37.20373604520908, "iter_time": 0.5545435943603515, "loss": 0.14536815881729126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52095044241191, "step_time": 0.5091463375091553} +{"epoch": 0, "iter": 19684, "iter_tflops": 39.06389516931335, "iter_time": 0.5281371307373047, "loss": 0.1786671131849289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53711511914926, "step_time": 0.48501393318176267} +{"epoch": 0, "iter": 19685, "iter_tflops": 17.9203910058276, "iter_time": 1.1512635803222655, "loss": 0.1540372222661972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.16505089912298, "step_time": 1.0764956283569336} +{"epoch": 0, "iter": 19686, "iter_tflops": 36.75363020611203, "iter_time": 0.5613348503112793, "loss": 0.10293666273355484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.865078534898295, "step_time": 0.45984748458862307} +{"epoch": 0, "iter": 19687, "iter_tflops": 47.689617930639045, "iter_time": 0.4326118431091308, "loss": 0.11162938177585602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.49708369526272, "step_time": 0.40062644386291507} +{"epoch": 0, "iter": 19688, "iter_tflops": 50.675305585158114, "iter_time": 0.4071232185363769, "loss": 0.07386185228824615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.81701990603285, "step_time": 0.37636291694641116} +{"epoch": 0, "iter": 19689, "iter_tflops": 27.849189715555813, "iter_time": 0.7408148574829102, "loss": 0.4531785845756531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.54547940115435, "step_time": 0.6982825775146484} +{"epoch": 0, "iter": 19690, "iter_tflops": 34.65767599547589, "iter_time": 0.595282081604004, "loss": 0.4430754482746124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.61825598869465, "step_time": 0.5484330139160156} +{"epoch": 0, "iter": 19691, "iter_tflops": 36.726019283635786, "iter_time": 0.561756866455078, "loss": 0.45862826704978943, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67363409227611, "step_time": 0.5200202598571777} +{"epoch": 0, "iter": 19692, "iter_tflops": 41.32131750028608, "iter_time": 0.4992845039367676, "loss": 0.4723512530326843, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.9596447864723, "step_time": 0.45888026046752933} +{"epoch": 0, "iter": 19693, "iter_tflops": 31.71807142454854, "iter_time": 0.6504523315429687, "loss": 0.5950067639350891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.13753130502222, "step_time": 0.5871526184082032} +{"epoch": 0, "iter": 19694, "iter_tflops": 33.597972518143216, "iter_time": 0.6140576934814452, "loss": 0.5969027280807495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.603529940168585, "step_time": 0.5636367187500001} +{"epoch": 0, "iter": 19695, "iter_tflops": 41.498916569100636, "iter_time": 0.49714776229858393, "loss": 0.5201117992401123, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27568806058786, "step_time": 0.45567708396911627} +{"epoch": 0, "iter": 19696, "iter_tflops": 42.0229073808415, "iter_time": 0.49094874191284177, "loss": 0.48859134316444397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.8834698683632, "step_time": 0.449641092300415} +{"epoch": 0, "iter": 19697, "iter_tflops": 36.54151713342669, "iter_time": 0.5645932388305664, "loss": 0.2152690589427948, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25407102714855, "step_time": 0.5125219135284425} +{"epoch": 0, "iter": 19698, "iter_tflops": 36.115962680157295, "iter_time": 0.5712458419799805, "loss": 0.2945592999458313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.060957504053036, "step_time": 0.5149925212860108} +{"epoch": 0, "iter": 19699, "iter_tflops": 36.89227920961832, "iter_time": 0.5592252349853516, "loss": 0.23626384139060974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.14421510100815, "step_time": 0.513924446105957} +{"epoch": 0, "iter": 19700, "iter_tflops": 36.67580576501418, "iter_time": 0.5625259780883789, "loss": 0.2331015169620514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.74951701528743, "step_time": 0.5190275268554687} +{"epoch": 0, "iter": 19701, "iter_tflops": 30.362416702067897, "iter_time": 0.6794944458007812, "loss": 0.670653760433197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.46710091842241, "step_time": 0.6164589385986329} +{"epoch": 0, "iter": 19702, "iter_tflops": 42.23482601142131, "iter_time": 0.48848534393310544, "loss": 0.5763178467750549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.496014807572244, "step_time": 0.453470344543457} +{"epoch": 0, "iter": 19703, "iter_tflops": 46.97331531269391, "iter_time": 0.4392088012695312, "loss": 0.572256863117218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.56126537700834, "step_time": 0.4080414791107178} +{"epoch": 0, "iter": 19704, "iter_tflops": 43.08342446764356, "iter_time": 0.478863826751709, "loss": 0.5963975191116333, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.165862272498934, "step_time": 0.44689067840576174} +{"epoch": 0, "iter": 19705, "iter_tflops": 41.58107828388162, "iter_time": 0.4961654281616211, "loss": 0.3094019591808319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.373805442416995, "step_time": 0.45469171714782713} +{"epoch": 0, "iter": 19706, "iter_tflops": 43.466210268691334, "iter_time": 0.47464670562744143, "loss": 0.3205547332763672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82280258958713, "step_time": 0.4314070358276367} +{"epoch": 0, "iter": 19707, "iter_tflops": 48.39554277725391, "iter_time": 0.4263015213012695, "loss": 0.25580745935440063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.38562010895415, "step_time": 0.39383123588562013} +{"epoch": 0, "iter": 19708, "iter_tflops": 53.51124769630372, "iter_time": 0.38554685974121095, "loss": 0.3644229769706726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.00724093518242, "step_time": 0.3556641063690186} +{"epoch": 0, "iter": 19709, "iter_tflops": 28.7155168788812, "iter_time": 0.7184649887084961, "loss": 0.45493054389953613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.462491130872717, "step_time": 0.6772621917724609} +{"epoch": 0, "iter": 19710, "iter_tflops": 15.094811559937554, "iter_time": 1.3667672119140624, "loss": 0.44261783361434937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.351985056232074, "step_time": 1.0660970153808595} +{"epoch": 0, "iter": 19711, "iter_tflops": 38.89942168789093, "iter_time": 0.5303701858520509, "loss": 0.5185905694961548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67316524046366, "step_time": 0.48346761703491214} +{"epoch": 0, "iter": 19712, "iter_tflops": 41.43956315948772, "iter_time": 0.49785982131958006, "loss": 0.45434728264808655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.27012276145247, "step_time": 0.4557331027984619} +{"epoch": 0, "iter": 19713, "iter_tflops": 32.31321792911739, "iter_time": 0.6384722671508789, "loss": 0.17887775599956512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.80255781370417, "step_time": 0.5762463569641114} +{"epoch": 0, "iter": 19714, "iter_tflops": 35.87078351760994, "iter_time": 0.5751503448486328, "loss": 0.07541988044977188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.128988556209, "step_time": 0.5141194496154785} +{"epoch": 0, "iter": 19715, "iter_tflops": 39.32793962168213, "iter_time": 0.5245912628173828, "loss": 0.09833355247974396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03999250730429, "step_time": 0.4793470516204834} +{"epoch": 0, "iter": 19716, "iter_tflops": 41.54290686178411, "iter_time": 0.4966213264465332, "loss": 0.1168501228094101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.70811591522689, "step_time": 0.45136608886718754} +{"epoch": 0, "iter": 19717, "iter_tflops": 19.790477168717963, "iter_time": 1.042475799560547, "loss": 0.434353768825531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.88108214677418, "step_time": 0.9880279846191405} +{"epoch": 0, "iter": 19718, "iter_tflops": 15.524585007596185, "iter_time": 1.328930435180664, "loss": 0.42485231161117554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.531626941789494, "step_time": 1.1767928657531739} +{"epoch": 0, "iter": 19719, "iter_tflops": 43.63595472611937, "iter_time": 0.4728003234863281, "loss": 0.3061794638633728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.1943145386041, "step_time": 0.4371520957946777} +{"epoch": 0, "iter": 19720, "iter_tflops": 48.65986404669661, "iter_time": 0.42398584365844727, "loss": 0.38281485438346863, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51977534025926, "step_time": 0.39282524299621585} +{"epoch": 0, "iter": 19721, "iter_tflops": 30.970887900561653, "iter_time": 0.6661447219848633, "loss": 0.5231376886367798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.86036512080577, "step_time": 0.6278412742614746} +{"epoch": 0, "iter": 19722, "iter_tflops": 18.300628441903893, "iter_time": 1.1273434448242188, "loss": 0.6221418976783752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.20419482348319, "step_time": 0.9291529674530028} +{"epoch": 0, "iter": 19723, "iter_tflops": 35.817039978829925, "iter_time": 0.5760133590698242, "loss": 0.4659275710582733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.01988702777364, "step_time": 0.5287327842712403} +{"epoch": 0, "iter": 19724, "iter_tflops": 40.63930453761622, "iter_time": 0.5076635475158692, "loss": 0.6339734792709351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.31320613343496, "step_time": 0.4655743808746338} +{"epoch": 0, "iter": 19725, "iter_tflops": 12.896281302624715, "iter_time": 0.6072873535156249, "loss": 0.008401390165090561, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 14.174054436277228, "step_time": 0.5525411643981935} +{"epoch": 0, "iter": 19726, "iter_tflops": 20.065495812505578, "iter_time": 0.3903092460632324, "loss": 0.015834327787160873, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 22.1462071990768, "step_time": 0.3536383666992187} +{"epoch": 0, "iter": 19727, "iter_tflops": 20.25655223067984, "iter_time": 0.3866279144287109, "loss": 0.006150725297629833, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 22.26280742295983, "step_time": 0.35178620529174803} +{"epoch": 0, "iter": 19728, "iter_tflops": 21.590648703995672, "iter_time": 0.3627379913330078, "loss": 0.0012671025469899178, "lr": 3e-05, "seqlen": 3168.0, "step_tflops": 23.801022220223256, "step_time": 0.32905093193054197} +{"epoch": 0, "iter": 19729, "iter_tflops": 33.75506533202338, "iter_time": 0.6111999282836914, "loss": 0.009597529657185078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.2453358372815, "step_time": 0.5692068519592285} +{"epoch": 0, "iter": 19730, "iter_tflops": 11.145295520659454, "iter_time": 1.8511033172607423, "loss": 0.01028314046561718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.90234531935266, "step_time": 1.384419235229492} +{"epoch": 0, "iter": 19731, "iter_tflops": 13.021132358898068, "iter_time": 1.584431594848633, "loss": 0.00106915645301342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.04210126132011, "step_time": 1.286059299468994} +{"epoch": 0, "iter": 19732, "iter_tflops": 25.075018388457075, "iter_time": 0.8227748107910157, "loss": 0.003896175418049097, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.500764089066163, "step_time": 0.7238786106109618} +{"epoch": 0, "iter": 19733, "iter_tflops": 27.82222635640287, "iter_time": 0.6183039474487305, "loss": 0.25972452759742737, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 29.90657307116944, "step_time": 0.5752110862731933} +{"epoch": 0, "iter": 19734, "iter_tflops": 25.374939958690156, "iter_time": 0.677936279296875, "loss": 0.1850314438343048, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 27.395788577522744, "step_time": 0.627928352355957} +{"epoch": 0, "iter": 19735, "iter_tflops": 25.058307848173136, "iter_time": 0.6865025558471679, "loss": 0.17568452656269073, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 26.989470973977536, "step_time": 0.6373816070556642} +{"epoch": 0, "iter": 19736, "iter_tflops": 27.69855062776533, "iter_time": 0.6210647125244141, "loss": 0.1524152308702469, "lr": 3e-05, "seqlen": 6864.0, "step_tflops": 29.757783104765878, "step_time": 0.5780871620178222} +{"epoch": 0, "iter": 19737, "iter_tflops": 30.14811834004653, "iter_time": 0.6843244171142578, "loss": 0.07468020170927048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.73930366487787, "step_time": 0.6301628684997558} +{"epoch": 0, "iter": 19738, "iter_tflops": 11.425787835831354, "iter_time": 1.8056604766845703, "loss": 0.04653917998075485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.035524332091866, "step_time": 1.4699196853637697} +{"epoch": 0, "iter": 19739, "iter_tflops": 17.673582984109466, "iter_time": 1.1673407440185546, "loss": 0.037652768194675446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.828289744409066, "step_time": 1.0404877967834472} +{"epoch": 0, "iter": 19740, "iter_tflops": 49.51467595176414, "iter_time": 0.41666623306274414, "loss": 0.061806149780750275, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.07455672024576, "step_time": 0.38153051567077634} +{"epoch": 0, "iter": 19741, "iter_tflops": 21.86985265366519, "iter_time": 0.6201397018432616, "loss": 0.23674415051937103, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 23.472390253333643, "step_time": 0.5778007164001465} +{"epoch": 0, "iter": 19742, "iter_tflops": 24.405959216266794, "iter_time": 0.5556988677978515, "loss": 0.23273052275180817, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 26.029374734091533, "step_time": 0.5210407104492187} +{"epoch": 0, "iter": 19743, "iter_tflops": 25.563384865288548, "iter_time": 0.5305386581420899, "loss": 0.18084195256233215, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 27.206697865682433, "step_time": 0.49849356842041026} +{"epoch": 0, "iter": 19744, "iter_tflops": 24.007670824607604, "iter_time": 0.5649179382324219, "loss": 0.1629376858472824, "lr": 3e-05, "seqlen": 5440.0, "step_tflops": 25.404825828689624, "step_time": 0.5338499069213867} +{"epoch": 0, "iter": 19745, "iter_tflops": 27.978776865748426, "iter_time": 0.7373836822509765, "loss": 0.19784393906593323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.6580134187462, "step_time": 0.6956330223083497} +{"epoch": 0, "iter": 19746, "iter_tflops": 13.184639457359866, "iter_time": 1.564782531738281, "loss": 0.32092100381851196, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.821328671819494, "step_time": 1.2264841804504394} +{"epoch": 0, "iter": 19747, "iter_tflops": 39.95010442869813, "iter_time": 0.5164215164184571, "loss": 0.1851605772972107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.81295438056455, "step_time": 0.47089026069641116} +{"epoch": 0, "iter": 19748, "iter_tflops": 39.20184804645626, "iter_time": 0.5262785949707032, "loss": 0.18411226570606232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90570512608592, "step_time": 0.4808473243713378} +{"epoch": 0, "iter": 19749, "iter_tflops": 2.0557193575263537, "iter_time": 0.8961862869262696, "loss": 0.967602550983429, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 2.2336010862825417, "step_time": 0.8248149185180664} +{"epoch": 0, "iter": 19750, "iter_tflops": 1.3732719495087478, "iter_time": 1.3415460052490236, "loss": 1.0563771724700928, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 1.7842372108941855, "step_time": 1.0325462818145752} +{"epoch": 0, "iter": 19751, "iter_tflops": 3.2044745794852756, "iter_time": 0.5749171829223633, "loss": 0.8275802135467529, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 3.514389464366772, "step_time": 0.5242183647155763} +{"epoch": 0, "iter": 19752, "iter_tflops": 3.6769442784264617, "iter_time": 0.501043083190918, "loss": 0.9077453017234802, "lr": 3e-05, "seqlen": 752.0, "step_tflops": 4.012421976524948, "step_time": 0.45915098381042485} +{"epoch": 0, "iter": 19753, "iter_tflops": 19.0234283307987, "iter_time": 1.0845097503662111, "loss": 0.5476937890052795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.360276298046138, "step_time": 1.0133012542724609} +{"epoch": 0, "iter": 19754, "iter_tflops": 19.400762326022324, "iter_time": 1.0634166412353516, "loss": 0.4992811977863312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.17201777633852, "step_time": 0.8903451442718506} +{"epoch": 0, "iter": 19755, "iter_tflops": 42.59178261928353, "iter_time": 0.4843914070129395, "loss": 0.44901758432388306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.854489360976935, "step_time": 0.44992527008056643} +{"epoch": 0, "iter": 19756, "iter_tflops": 44.310788323802655, "iter_time": 0.4655997848510742, "loss": 0.43778136372566223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.490898928123514, "step_time": 0.43442204666137696} +{"epoch": 0, "iter": 19757, "iter_tflops": 35.663403290497016, "iter_time": 0.5784948043823243, "loss": 0.01193503849208355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.38168942835206, "step_time": 0.53752437210083} +{"epoch": 0, "iter": 19758, "iter_tflops": 11.41233711379533, "iter_time": 1.8077886505126954, "loss": 0.001965778646990657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.15435257729063, "step_time": 1.5683853225708007} +{"epoch": 0, "iter": 19759, "iter_tflops": 10.930486880481265, "iter_time": 1.8874816589355468, "loss": 0.007780521176755428, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.76944751501275, "step_time": 1.6156606216430662} +{"epoch": 0, "iter": 19760, "iter_tflops": 36.75033580023509, "iter_time": 0.5613851699829102, "loss": 0.001818459015339613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.33396499454522, "step_time": 0.43586235618591307} +{"epoch": 0, "iter": 19761, "iter_tflops": 24.92134382969777, "iter_time": 0.6441595764160157, "loss": 0.23574139177799225, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.79139404214697, "step_time": 0.5991969757080078} +{"epoch": 0, "iter": 19762, "iter_tflops": 23.6548986437766, "iter_time": 0.678646842956543, "loss": 0.20802871882915497, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 25.396421594977006, "step_time": 0.6321096153259278} +{"epoch": 0, "iter": 19763, "iter_tflops": 24.77153509303584, "iter_time": 0.6480552062988281, "loss": 0.25380200147628784, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.659414607965005, "step_time": 0.6021633453369141} +{"epoch": 0, "iter": 19764, "iter_tflops": 24.715507153424667, "iter_time": 0.6495242919921874, "loss": 0.1914094090461731, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.58675261513674, "step_time": 0.603809066772461} +{"epoch": 0, "iter": 19765, "iter_tflops": 32.694558032833534, "iter_time": 0.6310253067016602, "loss": 0.5553066730499268, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.9205412068593, "step_time": 0.5743536376953126} +{"epoch": 0, "iter": 19766, "iter_tflops": 36.510938226383196, "iter_time": 0.5650661010742188, "loss": 0.5068976879119873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.729585785253356, "step_time": 0.5192879085540771} +{"epoch": 0, "iter": 19767, "iter_tflops": 36.945716818539566, "iter_time": 0.5584163818359376, "loss": 0.5932937264442444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.22140746929723, "step_time": 0.5129381294250488} +{"epoch": 0, "iter": 19768, "iter_tflops": 36.19747697993092, "iter_time": 0.5699594345092773, "loss": 0.524349570274353, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.39301287351695, "step_time": 0.5237246913909912} +{"epoch": 0, "iter": 19769, "iter_tflops": 36.559518168440945, "iter_time": 0.5643152465820314, "loss": 0.08812176436185837, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.647969505975105, "step_time": 0.5075553283691406} +{"epoch": 0, "iter": 19770, "iter_tflops": 38.60383392322681, "iter_time": 0.5344312057495118, "loss": 0.11425334960222244, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.480946686407975, "step_time": 0.4856552200317383} +{"epoch": 0, "iter": 19771, "iter_tflops": 41.71000923036763, "iter_time": 0.49463171768188474, "loss": 0.08900854736566544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.870994450059996, "step_time": 0.44976338005065913} +{"epoch": 0, "iter": 19772, "iter_tflops": 40.564059477666014, "iter_time": 0.5086052474975585, "loss": 0.09999415278434753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.644007179348684, "step_time": 0.46212458992004396} +{"epoch": 0, "iter": 19773, "iter_tflops": 18.69649187252697, "iter_time": 1.1034740447998046, "loss": 0.07289524376392365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.991395100745514, "step_time": 1.0319986877441405} +{"epoch": 0, "iter": 19774, "iter_tflops": 14.56275946504212, "iter_time": 1.4167022094726562, "loss": 0.12016662210226059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.442997949546232, "step_time": 1.0092009773254393} +{"epoch": 0, "iter": 19775, "iter_tflops": 41.255119273528315, "iter_time": 0.5000856590270997, "loss": 0.14323848485946655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.15151055351444, "step_time": 0.45693030548095703} +{"epoch": 0, "iter": 19776, "iter_tflops": 44.10054488393851, "iter_time": 0.46781946945190434, "loss": 0.06920322775840759, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.424305767427406, "step_time": 0.42604830741882327} +{"epoch": 0, "iter": 19777, "iter_tflops": 17.223377256662456, "iter_time": 1.1978541259765623, "loss": 0.006673440337181091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.351105819714356, "step_time": 1.1242425231933595} +{"epoch": 0, "iter": 19778, "iter_tflops": 21.806061612956295, "iter_time": 0.9461173629760742, "loss": 0.008397718891501427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.703529974943024, "step_time": 0.7447099170684814} +{"epoch": 0, "iter": 19779, "iter_tflops": 56.73254504678619, "iter_time": 0.3636553497314453, "loss": 0.009036034345626831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.986993697293165, "step_time": 0.3328293933868408} +{"epoch": 0, "iter": 19780, "iter_tflops": 56.418409602066696, "iter_time": 0.3656801681518555, "loss": 0.002841015113517642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.9648566009147, "step_time": 0.3329482975006104} +{"epoch": 0, "iter": 19781, "iter_tflops": 44.7865148655482, "iter_time": 0.46065414047241204, "loss": 0.47018149495124817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.920086009844155, "step_time": 0.42173052406311035} +{"epoch": 0, "iter": 19782, "iter_tflops": 44.31941805859161, "iter_time": 0.46550912475585937, "loss": 0.4686492383480072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.24784061764452, "step_time": 0.4276065673828125} +{"epoch": 0, "iter": 19783, "iter_tflops": 47.15048516922832, "iter_time": 0.43755845642089847, "loss": 0.570124626159668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.81998039536209, "step_time": 0.4059642162322998} +{"epoch": 0, "iter": 19784, "iter_tflops": 47.398177893477694, "iter_time": 0.4352718696594239, "loss": 0.4262653589248657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.96825690538067, "step_time": 0.404783187866211} +{"epoch": 0, "iter": 19785, "iter_tflops": 38.046898415826526, "iter_time": 0.542254280090332, "loss": 0.3111739456653595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.87595830092143, "step_time": 0.5047243995666504} +{"epoch": 0, "iter": 19786, "iter_tflops": 31.677927117181365, "iter_time": 0.6512766265869141, "loss": 0.3025638461112976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.177181544514816, "step_time": 0.5864908046722412} +{"epoch": 0, "iter": 19787, "iter_tflops": 46.581095897080964, "iter_time": 0.44290700149536133, "loss": 0.2974988520145416, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.46900057187402, "step_time": 0.40878743934631345} +{"epoch": 0, "iter": 19788, "iter_tflops": 47.900626072830164, "iter_time": 0.43070613479614256, "loss": 0.27718421816825867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.081264742434556, "step_time": 0.39613272857666015} +{"epoch": 0, "iter": 19789, "iter_tflops": 32.42986779322178, "iter_time": 0.6361756896972657, "loss": 0.55312180519104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.401890000185745, "step_time": 0.5997081413269043} +{"epoch": 0, "iter": 19790, "iter_tflops": 16.542223845960972, "iter_time": 1.247177749633789, "loss": 0.5407237410545349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.73440722617394, "step_time": 1.101240795135498} +{"epoch": 0, "iter": 19791, "iter_tflops": 39.16762454753432, "iter_time": 0.5267384414672852, "loss": 0.4732283055782318, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.86201099154213, "step_time": 0.4137637672424317} +{"epoch": 0, "iter": 19792, "iter_tflops": 48.10057825120341, "iter_time": 0.42891570663452155, "loss": 0.7050383687019348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.061090558212946, "step_time": 0.39628623390197754} +{"epoch": 0, "iter": 19793, "iter_tflops": 19.69558285228843, "iter_time": 0.5158988189697266, "loss": 0.015963230282068253, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 21.19002748340168, "step_time": 0.4795146179199219} +{"epoch": 0, "iter": 19794, "iter_tflops": 9.326867772327493, "iter_time": 1.089425537109375, "loss": 0.0025835547130554914, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 10.47599245436791, "step_time": 0.9699250907897947} +{"epoch": 0, "iter": 19795, "iter_tflops": 19.335090624141973, "iter_time": 0.5255174713134766, "loss": 0.0005618844879791141, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 21.427108931702847, "step_time": 0.47420900154113765} +{"epoch": 0, "iter": 19796, "iter_tflops": 22.70732834120819, "iter_time": 0.44747351074218744, "loss": 0.009515066631138325, "lr": 3e-05, "seqlen": 4096.0, "step_tflops": 25.039286638510216, "step_time": 0.40579941749572757} +{"epoch": 0, "iter": 19797, "iter_tflops": 35.928685114949566, "iter_time": 0.5742234497070313, "loss": 0.2484835535287857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.49747031922086, "step_time": 0.5223396167755127} +{"epoch": 0, "iter": 19798, "iter_tflops": 36.96314171659217, "iter_time": 0.5581531372070312, "loss": 0.19866111874580383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.34126976797025, "step_time": 0.47601497650146485} +{"epoch": 0, "iter": 19799, "iter_tflops": 42.200814502232845, "iter_time": 0.48887903594970705, "loss": 0.2871135175228119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.192847992116235, "step_time": 0.44662960624694825} +{"epoch": 0, "iter": 19800, "iter_tflops": 38.956279164160186, "iter_time": 0.5295960998535156, "loss": 0.2049027532339096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42119306771544, "step_time": 0.48633930397033687} +{"epoch": 0, "iter": 19801, "iter_tflops": 27.951912183239205, "iter_time": 0.738092384338379, "loss": 0.15872012078762054, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.999987669681303, "step_time": 0.6877033996582032} +{"epoch": 0, "iter": 19802, "iter_tflops": 47.261737393517926, "iter_time": 0.43652846145629887, "loss": 0.1606195867061615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.93669358800581, "step_time": 0.3972354049682617} +{"epoch": 0, "iter": 19803, "iter_tflops": 51.996613622399764, "iter_time": 0.3967776374816894, "loss": 0.18556171655654907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.424391028163505, "step_time": 0.3656414031982421} +{"epoch": 0, "iter": 19804, "iter_tflops": 48.75258644538033, "iter_time": 0.4231794662475586, "loss": 0.19394908845424652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.87763611027902, "step_time": 0.3901667137145996} +{"epoch": 0, "iter": 19805, "iter_tflops": 21.53388498671225, "iter_time": 0.4307433395385742, "loss": 0.001687420648522675, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 23.700874432554453, "step_time": 0.3913601398468018} +{"epoch": 0, "iter": 19806, "iter_tflops": 15.858747031675533, "iter_time": 0.5848871612548827, "loss": 0.006063147448003292, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 19.530783238136518, "step_time": 0.4749209194183349} +{"epoch": 0, "iter": 19807, "iter_tflops": 20.439836193767, "iter_time": 0.453799015045166, "loss": 0.006458418909460306, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 22.630382622737113, "step_time": 0.409872766494751} +{"epoch": 0, "iter": 19808, "iter_tflops": 20.035925987679363, "iter_time": 0.46294728469848634, "loss": 0.004082659725099802, "lr": 3e-05, "seqlen": 3744.0, "step_tflops": 22.251305084916055, "step_time": 0.4168554382324219} +{"epoch": 0, "iter": 19809, "iter_tflops": 23.72794290898337, "iter_time": 0.7840168914794922, "loss": 0.010172393172979355, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 25.62703265042257, "step_time": 0.725917366027832} +{"epoch": 0, "iter": 19810, "iter_tflops": 19.02789202601449, "iter_time": 0.9776757202148437, "loss": 0.025464337319135666, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 23.89182856292034, "step_time": 0.7786389389038086} +{"epoch": 0, "iter": 19811, "iter_tflops": 52.23672127679839, "iter_time": 0.35613085174560544, "loss": 0.005900036543607712, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 57.53593473773668, "step_time": 0.32333024787902837} +{"epoch": 0, "iter": 19812, "iter_tflops": 49.052063338077204, "iter_time": 0.37925230407714844, "loss": 0.0013043198268860579, "lr": 3e-05, "seqlen": 7408.0, "step_tflops": 54.07625566931083, "step_time": 0.34401620101928715} +{"epoch": 0, "iter": 19813, "iter_tflops": 23.737436178872006, "iter_time": 0.8691373977661133, "loss": 0.25952237844467163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.9336292973384, "step_time": 0.8274404525756835} +{"epoch": 0, "iter": 19814, "iter_tflops": 13.541772763426259, "iter_time": 1.5235149688720704, "loss": 0.2726407051086426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.550369513302202, "step_time": 1.3267268981933593} +{"epoch": 0, "iter": 19815, "iter_tflops": 37.38886884986221, "iter_time": 0.5517977447509765, "loss": 0.359406054019928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.96616683394691, "step_time": 0.5036129837036133} +{"epoch": 0, "iter": 19816, "iter_tflops": 39.1739425903286, "iter_time": 0.5266534881591797, "loss": 0.30698004364967346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87565623679777, "step_time": 0.48118432044982906} +{"epoch": 0, "iter": 19817, "iter_tflops": 19.375765163009913, "iter_time": 1.0647885818481446, "loss": 0.4593210518360138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.836782176251493, "step_time": 0.9901285781860352} +{"epoch": 0, "iter": 19818, "iter_tflops": 21.583851122961633, "iter_time": 0.9558578491210937, "loss": 0.5220450162887573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.089669031193754, "step_time": 0.8222943668365478} +{"epoch": 0, "iter": 19819, "iter_tflops": 43.6297190812978, "iter_time": 0.47286789703369136, "loss": 0.5755556225776672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.61441399640047, "step_time": 0.43329512596130376} +{"epoch": 0, "iter": 19820, "iter_tflops": 43.788501811972395, "iter_time": 0.47115321731567383, "loss": 0.5941238403320312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.0851358091449, "step_time": 0.43816574287414556} +{"epoch": 0, "iter": 19821, "iter_tflops": 32.02883487892722, "iter_time": 0.644141242980957, "loss": 0.3648446798324585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.10897525656489, "step_time": 0.6048582038879394} +{"epoch": 0, "iter": 19822, "iter_tflops": 18.907665405490285, "iter_time": 1.0911497039794922, "loss": 0.3700288236141205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.93631149143912, "step_time": 0.9404996604919433} +{"epoch": 0, "iter": 19823, "iter_tflops": 49.514337776705354, "iter_time": 0.4166690788269043, "loss": 0.412400484085083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.843023387533954, "step_time": 0.3831711559295654} +{"epoch": 0, "iter": 19824, "iter_tflops": 43.61131718167396, "iter_time": 0.4730674247741699, "loss": 0.40690797567367554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.356565930021134, "step_time": 0.4356543407440186} +{"epoch": 0, "iter": 19825, "iter_tflops": 43.36709064723366, "iter_time": 0.47573155593872063, "loss": 0.46551746129989624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.34267434735931, "step_time": 0.4357821731567383} +{"epoch": 0, "iter": 19826, "iter_tflops": 10.671364614046203, "iter_time": 1.933313522338867, "loss": 0.6070326566696167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.906525209956193, "step_time": 1.7327552032470703} +{"epoch": 0, "iter": 19827, "iter_tflops": 13.36750664006555, "iter_time": 1.5433763427734375, "loss": 0.5480367541313171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.666635725505806, "step_time": 1.4066684341430664} +{"epoch": 0, "iter": 19828, "iter_tflops": 34.32196698414945, "iter_time": 0.601104637145996, "loss": 0.5103235840797424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.73117071384623, "step_time": 0.5467917671203614} +{"epoch": 0, "iter": 19829, "iter_tflops": 13.398119477236852, "iter_time": 1.0608974914550782, "loss": 0.2646366357803345, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 14.331118523793966, "step_time": 0.9918298645019531} +{"epoch": 0, "iter": 19830, "iter_tflops": 14.950060392642197, "iter_time": 0.9507674865722656, "loss": 0.20566929876804352, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 17.483295816296174, "step_time": 0.8130063972473145} +{"epoch": 0, "iter": 19831, "iter_tflops": 20.920329070856187, "iter_time": 0.6794363174438477, "loss": 0.14904920756816864, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 22.409120769955774, "step_time": 0.6342966995239258} +{"epoch": 0, "iter": 19832, "iter_tflops": 22.85871596354412, "iter_time": 0.6218210754394531, "loss": 0.2642747759819031, "lr": 3e-05, "seqlen": 5696.0, "step_tflops": 24.532326376305946, "step_time": 0.5794000587463379} +{"epoch": 0, "iter": 19833, "iter_tflops": 27.867919179466508, "iter_time": 0.7403169708251953, "loss": 0.17588406801223755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.10516644490472, "step_time": 0.6853007621765136} +{"epoch": 0, "iter": 19834, "iter_tflops": 7.620818703182755, "iter_time": 2.7072017211914057, "loss": 0.1419166624546051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.727864940296437, "step_time": 2.3638190612792966} +{"epoch": 0, "iter": 19835, "iter_tflops": 10.797076584215285, "iter_time": 1.9108036651611326, "loss": 0.1369170844554901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.225422995449303, "step_time": 1.5599571762084963} +{"epoch": 0, "iter": 19836, "iter_tflops": 43.4641789839479, "iter_time": 0.474668888092041, "loss": 0.18656060099601746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58741479767445, "step_time": 0.43354096031188966} +{"epoch": 0, "iter": 19837, "iter_tflops": 13.700352640723953, "iter_time": 1.1149762115478516, "loss": 0.1776045560836792, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 14.696428954735742, "step_time": 1.03940673828125} +{"epoch": 0, "iter": 19838, "iter_tflops": 16.097295587095303, "iter_time": 0.9489524002075195, "loss": 0.31437328457832336, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 19.212261030637276, "step_time": 0.7950947189331053} +{"epoch": 0, "iter": 19839, "iter_tflops": 27.572483804640367, "iter_time": 0.5540149154663087, "loss": 0.15465833246707916, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 29.39755397884847, "step_time": 0.5196203498840333} +{"epoch": 0, "iter": 19840, "iter_tflops": 26.167916686580025, "iter_time": 0.5837517547607423, "loss": 0.24075385928153992, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 27.81344468029386, "step_time": 0.5492152252197265} +{"epoch": 0, "iter": 19841, "iter_tflops": 39.15543623027553, "iter_time": 0.5269024047851563, "loss": 0.2427065372467041, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.589293610001064, "step_time": 0.48441971588134763} +{"epoch": 0, "iter": 19842, "iter_tflops": 45.73557088408639, "iter_time": 0.45109513473510743, "loss": 0.2224036604166031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.622785529622654, "step_time": 0.415758472442627} +{"epoch": 0, "iter": 19843, "iter_tflops": 43.54625788193504, "iter_time": 0.4737742004394531, "loss": 0.24753724038600922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.82080157930586, "step_time": 0.44063947677612303} +{"epoch": 0, "iter": 19844, "iter_tflops": 45.18614967980491, "iter_time": 0.45658002853393553, "loss": 0.27028411626815796, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.99571275639947, "step_time": 0.42107956695556636} +{"epoch": 0, "iter": 19845, "iter_tflops": 33.413087429437944, "iter_time": 0.617455467224121, "loss": 0.344260573387146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.02019429558715, "step_time": 0.5727646369934082} +{"epoch": 0, "iter": 19846, "iter_tflops": 40.74386194387329, "iter_time": 0.5063607749938965, "loss": 0.5669980049133301, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.125235695219146, "step_time": 0.4675576953887939} +{"epoch": 0, "iter": 19847, "iter_tflops": 46.257920302684134, "iter_time": 0.44600131988525393, "loss": 0.5484632849693298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.02497195809034, "step_time": 0.41241589355468744} +{"epoch": 0, "iter": 19848, "iter_tflops": 44.52647585098448, "iter_time": 0.4633444061279297, "loss": 0.5272654891014099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.60642229421299, "step_time": 0.42445200729370114} +{"epoch": 0, "iter": 19849, "iter_tflops": 2.5350040038541035, "iter_time": 0.866380828857422, "loss": 0.28479811549186707, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 2.792658872288759, "step_time": 0.7864472427368163} +{"epoch": 0, "iter": 19850, "iter_tflops": 1.659686303181603, "iter_time": 1.3233096313476562, "loss": 0.01427625585347414, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 2.143852566754834, "step_time": 1.0244542484283448} +{"epoch": 0, "iter": 19851, "iter_tflops": 3.8223959119955406, "iter_time": 0.5745817337036132, "loss": 0.10512571781873703, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 4.165896447933329, "step_time": 0.5272043838500977} +{"epoch": 0, "iter": 19852, "iter_tflops": 4.019637827805091, "iter_time": 0.5463872528076172, "loss": 0.3165927529335022, "lr": 3e-05, "seqlen": 896.0, "step_tflops": 4.40127775815872, "step_time": 0.499009376525879} +{"epoch": 0, "iter": 19853, "iter_tflops": 18.09082950430969, "iter_time": 1.140417221069336, "loss": 0.17199039459228516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.42503076762696, "step_time": 1.0620880737304688} +{"epoch": 0, "iter": 19854, "iter_tflops": 19.030078402429687, "iter_time": 1.0841307678222656, "loss": 0.2623036503791809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.504740064096634, "step_time": 0.9593742332458496} +{"epoch": 0, "iter": 19855, "iter_tflops": 45.83457159464703, "iter_time": 0.4501207885742188, "loss": 0.20589420199394226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.263211888782436, "step_time": 0.4104611053466797} +{"epoch": 0, "iter": 19856, "iter_tflops": 48.20035513166758, "iter_time": 0.42802783203125, "loss": 0.2927301824092865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.207733628791594, "step_time": 0.39517313003540044} +{"epoch": 0, "iter": 19857, "iter_tflops": 38.14802492077075, "iter_time": 0.4930668449401855, "loss": 0.06668045371770859, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 41.670897701078, "step_time": 0.4513827953338623} +{"epoch": 0, "iter": 19858, "iter_tflops": 35.00577450882016, "iter_time": 0.5373263854980469, "loss": 0.06569702178239822, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 39.56282285301642, "step_time": 0.475434383392334} +{"epoch": 0, "iter": 19859, "iter_tflops": 39.05688325848219, "iter_time": 0.4815931205749512, "loss": 0.04393019154667854, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 43.122965138821804, "step_time": 0.4361835098266601} +{"epoch": 0, "iter": 19860, "iter_tflops": 37.91555597345508, "iter_time": 0.49608995056152344, "loss": 0.05880551040172577, "lr": 3e-05, "seqlen": 7488.0, "step_tflops": 41.53717341022283, "step_time": 0.45283597183227536} +{"epoch": 0, "iter": 19861, "iter_tflops": 20.440842341276387, "iter_time": 1.0093074035644531, "loss": 0.4663911461830139, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.88346305405593, "step_time": 0.9427709617614747} +{"epoch": 0, "iter": 19862, "iter_tflops": 15.59177735013249, "iter_time": 1.3232034454345705, "loss": 0.4512861967086792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.509031326769517, "step_time": 1.1783115310668946} +{"epoch": 0, "iter": 19863, "iter_tflops": 47.039684700948065, "iter_time": 0.438589111328125, "loss": 0.5693396925926208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.497912884586725, "step_time": 0.4006199932098389} +{"epoch": 0, "iter": 19864, "iter_tflops": 44.22946724033252, "iter_time": 0.46645584487915037, "loss": 0.681571900844574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.76333301807842, "step_time": 0.43194417572021493} +{"epoch": 0, "iter": 19865, "iter_tflops": 34.33684259517757, "iter_time": 0.6008442230224609, "loss": 0.13737738132476807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.63493967974373, "step_time": 0.5631534729003906} +{"epoch": 0, "iter": 19866, "iter_tflops": 15.872413572422785, "iter_time": 1.2998082122802734, "loss": 0.0916135236620903, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.40403706011421, "step_time": 1.0632371730804444} +{"epoch": 0, "iter": 19867, "iter_tflops": 50.90159454592048, "iter_time": 0.405313304901123, "loss": 0.09349384903907776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.28707639758601, "step_time": 0.37316304016113283} +{"epoch": 0, "iter": 19868, "iter_tflops": 52.472521333302765, "iter_time": 0.3931790008544922, "loss": 0.07751719653606415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.835976254729815, "step_time": 0.36299356269836425} +{"epoch": 0, "iter": 19869, "iter_tflops": 36.77180262899962, "iter_time": 0.5610574417114258, "loss": 0.5337882041931152, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.84633667753651, "step_time": 0.5177663803100586} +{"epoch": 0, "iter": 19870, "iter_tflops": 39.72873344343201, "iter_time": 0.5192990493774414, "loss": 0.4171242117881775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59217992450783, "step_time": 0.47327510452270505} +{"epoch": 0, "iter": 19871, "iter_tflops": 40.96041614191314, "iter_time": 0.5036836891174317, "loss": 0.5524458885192871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.69598509455868, "step_time": 0.4615871753692627} +{"epoch": 0, "iter": 19872, "iter_tflops": 40.31366497081815, "iter_time": 0.5117642745971679, "loss": 0.42736878991127014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84798646930538, "step_time": 0.47051404571533195} +{"epoch": 0, "iter": 19873, "iter_tflops": 15.66264313209363, "iter_time": 1.3172165985107422, "loss": 0.3072398900985718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.75003379890076, "step_time": 1.23170458984375} +{"epoch": 0, "iter": 19874, "iter_tflops": 21.049178383432306, "iter_time": 0.9801377105712891, "loss": 0.20403917133808136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.49237383860548, "step_time": 0.8093045253753661} +{"epoch": 0, "iter": 19875, "iter_tflops": 40.04244677571781, "iter_time": 0.5152305908203125, "loss": 0.2938292324542999, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.9442793327361, "step_time": 0.4694830322265625} +{"epoch": 0, "iter": 19876, "iter_tflops": 42.646826382879134, "iter_time": 0.48376620864868164, "loss": 0.24830563366413116, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.1294614499547, "step_time": 0.4472433204650879} +{"epoch": 0, "iter": 19877, "iter_tflops": 25.335228347437457, "iter_time": 0.8143243560791016, "loss": 0.17577269673347473, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.73107444785773, "step_time": 0.7718018798828125} +{"epoch": 0, "iter": 19878, "iter_tflops": 16.891714029579227, "iter_time": 1.2213735961914063, "loss": 0.15336793661117554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.819317328888225, "step_time": 0.9455425758361817} +{"epoch": 0, "iter": 19879, "iter_tflops": 47.54662286541502, "iter_time": 0.4339129104614258, "loss": 0.17232640087604523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.42874003998151, "step_time": 0.4011588363647461} +{"epoch": 0, "iter": 19880, "iter_tflops": 46.86324712001122, "iter_time": 0.44024037551879885, "loss": 0.16974540054798126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.79066891290611, "step_time": 0.40619849967956545} +{"epoch": 0, "iter": 19881, "iter_tflops": 29.21488795112916, "iter_time": 0.7061842422485353, "loss": 0.6524332165718079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.094462262252307, "step_time": 0.6634973564147949} +{"epoch": 0, "iter": 19882, "iter_tflops": 15.723580423165307, "iter_time": 1.3121116790771483, "loss": 0.6011318564414978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.35325779213027, "step_time": 1.1241107025146486} +{"epoch": 0, "iter": 19883, "iter_tflops": 41.34926047672954, "iter_time": 0.4989470977783203, "loss": 0.5157232880592346, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.64587880001753, "step_time": 0.46210521697998047} +{"epoch": 0, "iter": 19884, "iter_tflops": 43.554718989397756, "iter_time": 0.4736821632385254, "loss": 0.42194023728370667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.89766786349166, "step_time": 0.43991725921630864} +{"epoch": 0, "iter": 19885, "iter_tflops": 47.58048126321904, "iter_time": 0.4187898635864258, "loss": 0.09081615507602692, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 52.39743120271843, "step_time": 0.3802900791168212} +{"epoch": 0, "iter": 19886, "iter_tflops": 47.491066771141895, "iter_time": 0.41957834625244145, "loss": 0.06914344429969788, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 51.83555143399567, "step_time": 0.3844122943878173} +{"epoch": 0, "iter": 19887, "iter_tflops": 47.44098090615943, "iter_time": 0.4200213165283203, "loss": 0.06271561980247498, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 51.537383334072985, "step_time": 0.3866363010406494} +{"epoch": 0, "iter": 19888, "iter_tflops": 49.17274368928444, "iter_time": 0.40522903060913085, "loss": 0.06252151727676392, "lr": 3e-05, "seqlen": 7920.0, "step_tflops": 53.50025575549347, "step_time": 0.3724509906768799} +{"epoch": 0, "iter": 19889, "iter_tflops": 46.67888876410868, "iter_time": 0.4233299064636231, "loss": 0.03380940109491348, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 51.30257380677853, "step_time": 0.38517696380615235} +{"epoch": 0, "iter": 19890, "iter_tflops": 42.69318502780613, "iter_time": 0.46285067749023434, "loss": 0.05484248697757721, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 48.74187663914624, "step_time": 0.4054125728607178} +{"epoch": 0, "iter": 19891, "iter_tflops": 47.87128425968634, "iter_time": 0.4127854499816895, "loss": 0.056547485291957855, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 52.18317804712908, "step_time": 0.37867700576782226} +{"epoch": 0, "iter": 19892, "iter_tflops": 48.99818673817307, "iter_time": 0.4032918548583984, "loss": 0.0896596908569336, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 53.38671654119636, "step_time": 0.370140193939209} +{"epoch": 0, "iter": 19893, "iter_tflops": 31.60600250380583, "iter_time": 0.6527587127685547, "loss": 0.08913201838731766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.69302247813954, "step_time": 0.6123254013061524} +{"epoch": 0, "iter": 19894, "iter_tflops": 18.33798993220425, "iter_time": 1.125046615600586, "loss": 0.10731717199087143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.608622757629643, "step_time": 1.0010903568267824} +{"epoch": 0, "iter": 19895, "iter_tflops": 38.13929320389351, "iter_time": 0.5409406356811524, "loss": 0.08237864822149277, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.93291888954649, "step_time": 0.49200232315063475} +{"epoch": 0, "iter": 19896, "iter_tflops": 45.49483260699553, "iter_time": 0.4534821281433105, "loss": 0.11967448890209198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.927875183050475, "step_time": 0.41321793556213376} +{"epoch": 0, "iter": 19897, "iter_tflops": 23.22662808731624, "iter_time": 0.8882517700195313, "loss": 0.6523222923278809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.757564865056448, "step_time": 0.8333248291015624} +{"epoch": 0, "iter": 19898, "iter_tflops": 9.775693282144466, "iter_time": 2.1104481201171876, "loss": 0.540611743927002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.038720340253466, "step_time": 1.8689751052856447} +{"epoch": 0, "iter": 19899, "iter_tflops": 13.429448482938785, "iter_time": 1.5362576904296876, "loss": 0.4710644781589508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.06240525140558, "step_time": 1.1422118606567384} +{"epoch": 0, "iter": 19900, "iter_tflops": 35.21966217836294, "iter_time": 0.5857834014892579, "loss": 0.5296832323074341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.33002166017505, "step_time": 0.5382489395141602} +{"epoch": 0, "iter": 19901, "iter_tflops": 13.03964352476347, "iter_time": 1.140131378173828, "loss": 0.24625475704669952, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 13.884274027256128, "step_time": 1.0707730712890626} +{"epoch": 0, "iter": 19902, "iter_tflops": 24.849369525781384, "iter_time": 0.5982810440063476, "loss": 0.15921641886234283, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 26.838999708884685, "step_time": 0.55392924118042} +{"epoch": 0, "iter": 19903, "iter_tflops": 26.25892359884384, "iter_time": 0.566165885925293, "loss": 0.24806363880634308, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 28.05076158421559, "step_time": 0.5300001106262208} +{"epoch": 0, "iter": 19904, "iter_tflops": 25.684379274825275, "iter_time": 0.578830680847168, "loss": 0.1962924599647522, "lr": 3e-05, "seqlen": 5952.0, "step_tflops": 27.31980621135664, "step_time": 0.544180534362793} +{"epoch": 0, "iter": 19905, "iter_tflops": 30.332284230224094, "iter_time": 0.6801694641113281, "loss": 0.28292858600616455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.1802333920302, "step_time": 0.6411107482910156} +{"epoch": 0, "iter": 19906, "iter_tflops": 9.831604419762346, "iter_time": 2.0984462585449215, "loss": 0.2898460328578949, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.351376932190078, "step_time": 1.6703476562499997} +{"epoch": 0, "iter": 19907, "iter_tflops": 12.91959964052366, "iter_time": 1.596883346557617, "loss": 0.3142551779747009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.232795955668609, "step_time": 1.354386520385742} +{"epoch": 0, "iter": 19908, "iter_tflops": 18.89114882966417, "iter_time": 1.0921036987304689, "loss": 0.23947905004024506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.169025242010015, "step_time": 0.8904601421356201} +{"epoch": 0, "iter": 19909, "iter_tflops": 15.796891817341137, "iter_time": 0.9566468276977539, "loss": 0.2444547712802887, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 17.085924121313944, "step_time": 0.884473461151123} +{"epoch": 0, "iter": 19910, "iter_tflops": 26.85745609822089, "iter_time": 0.5626760177612304, "loss": 0.15390579402446747, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.772029129435904, "step_time": 0.5252339477539062} +{"epoch": 0, "iter": 19911, "iter_tflops": 26.266286610280602, "iter_time": 0.5753400421142577, "loss": 0.1497538536787033, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 27.82601740392422, "step_time": 0.5430905265808106} +{"epoch": 0, "iter": 19912, "iter_tflops": 26.733624124669355, "iter_time": 0.565282371520996, "loss": 0.23158302903175354, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 28.319397154532094, "step_time": 0.5336288185119629} +{"epoch": 0, "iter": 19913, "iter_tflops": 24.202399091732932, "iter_time": 0.8524400177001952, "loss": 0.45236727595329285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.468307169180836, "step_time": 0.8100692901611328} +{"epoch": 0, "iter": 19914, "iter_tflops": 15.368561946779698, "iter_time": 1.342421859741211, "loss": 0.5551345944404602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.601960507447473, "step_time": 1.1720906600952148} +{"epoch": 0, "iter": 19915, "iter_tflops": 33.52637584360065, "iter_time": 0.6153690338134765, "loss": 0.5641929507255554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.443871854391716, "step_time": 0.566105972290039} +{"epoch": 0, "iter": 19916, "iter_tflops": 34.40490286594592, "iter_time": 0.5996556243896485, "loss": 0.49994179606437683, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.531539222084305, "step_time": 0.5497001705169678} +{"epoch": 0, "iter": 19917, "iter_tflops": 21.69943298922702, "iter_time": 0.9507664794921875, "loss": 0.22992388904094696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.265076982810463, "step_time": 0.886783805847168} +{"epoch": 0, "iter": 19918, "iter_tflops": 17.54309632064487, "iter_time": 1.1760234985351563, "loss": 0.234065443277359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.816615005084735, "step_time": 0.9042135963439941} +{"epoch": 0, "iter": 19919, "iter_tflops": 39.30357124752601, "iter_time": 0.5249165115356444, "loss": 0.1504439413547516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.03913760170969, "step_time": 0.47935657310485835} +{"epoch": 0, "iter": 19920, "iter_tflops": 39.04923027005905, "iter_time": 0.5283354721069335, "loss": 0.17428146302700043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.80683741732497, "step_time": 0.4819579010009766} +{"epoch": 0, "iter": 19921, "iter_tflops": 27.19937152782372, "iter_time": 0.7585136108398438, "loss": 0.6333109140396118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.089524147587465, "step_time": 0.7092276039123536} +{"epoch": 0, "iter": 19922, "iter_tflops": 15.981125793850678, "iter_time": 1.2909662170410157, "loss": 0.6374796032905579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.76492317459869, "step_time": 1.0994499320983886} +{"epoch": 0, "iter": 19923, "iter_tflops": 42.32070180260266, "iter_time": 0.4874941253662109, "loss": 0.5671225190162659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.58196980120716, "step_time": 0.4526152248382568} +{"epoch": 0, "iter": 19924, "iter_tflops": 47.30375520465534, "iter_time": 0.43614071273803706, "loss": 0.5345660448074341, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.1307763904334, "step_time": 0.40349658203125} +{"epoch": 0, "iter": 19925, "iter_tflops": 21.41172783627669, "iter_time": 0.5689209899902344, "loss": 0.013347845524549484, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 22.98000500486204, "step_time": 0.5300948104858398} +{"epoch": 0, "iter": 19926, "iter_tflops": 9.662985949029252, "iter_time": 1.260643600463867, "loss": 0.008627044036984444, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 12.783441270550023, "step_time": 0.9529187908172607} +{"epoch": 0, "iter": 19927, "iter_tflops": 30.213569456978306, "iter_time": 0.4031824645996094, "loss": 0.002333605196326971, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 33.728660673069314, "step_time": 0.3611641006469727} +{"epoch": 0, "iter": 19928, "iter_tflops": 36.311736910121105, "iter_time": 0.3354722862243652, "loss": 0.0028534922748804092, "lr": 3e-05, "seqlen": 4896.0, "step_tflops": 39.880282465245934, "step_time": 0.3054537391662598} +{"epoch": 0, "iter": 19929, "iter_tflops": 29.156249106094748, "iter_time": 0.7076045150756837, "loss": 0.3640505075454712, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.954846232658515, "step_time": 0.6664899368286132} +{"epoch": 0, "iter": 19930, "iter_tflops": 20.8161325579871, "iter_time": 0.9911107864379883, "loss": 0.4719688594341278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.50619607558273, "step_time": 0.8088659496307373} +{"epoch": 0, "iter": 19931, "iter_tflops": 44.558167287127105, "iter_time": 0.4630148582458496, "loss": 0.3063502013683319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.084367403654824, "step_time": 0.4290603084564209} +{"epoch": 0, "iter": 19932, "iter_tflops": 46.5423721238015, "iter_time": 0.44327550506591795, "loss": 0.2634735703468323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.3499828521559, "step_time": 0.409753734588623} +{"epoch": 0, "iter": 19933, "iter_tflops": 29.35905792139879, "iter_time": 0.7027164688110352, "loss": 0.10357246547937393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.15401105277599, "step_time": 0.6622291259765625} +{"epoch": 0, "iter": 19934, "iter_tflops": 13.57225318364389, "iter_time": 1.520093475341797, "loss": 0.23962658643722534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.644516946227009, "step_time": 1.4087930374145508} +{"epoch": 0, "iter": 19935, "iter_tflops": 26.145648206481837, "iter_time": 0.789083267211914, "loss": 0.20117755234241486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.714713280384224, "step_time": 0.6306365375518799} +{"epoch": 0, "iter": 19936, "iter_tflops": 51.44170840446641, "iter_time": 0.40105770492553705, "loss": 0.12697486579418182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.82160047602865, "step_time": 0.36958978843688967} +{"epoch": 0, "iter": 19937, "iter_tflops": 17.987051217164097, "iter_time": 0.7925034790039063, "loss": 0.27756720781326294, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 18.97073157527421, "step_time": 0.751410171508789} +{"epoch": 0, "iter": 19938, "iter_tflops": 8.309801191702121, "iter_time": 1.715420181274414, "loss": 0.30237433314323425, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 10.2675796802713, "step_time": 1.3883311462402346} +{"epoch": 0, "iter": 19939, "iter_tflops": 21.352447640031336, "iter_time": 0.6675956268310547, "loss": 0.19970478117465973, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 23.0620965017017, "step_time": 0.6181051521301268} +{"epoch": 0, "iter": 19940, "iter_tflops": 23.265512962975315, "iter_time": 0.6127008972167969, "loss": 0.32466962933540344, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 24.954576400267026, "step_time": 0.5712299194335938} +{"epoch": 0, "iter": 19941, "iter_tflops": 20.140085202766475, "iter_time": 1.024379653930664, "loss": 0.749039888381958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.385043894588957, "step_time": 0.9647440338134765} +{"epoch": 0, "iter": 19942, "iter_tflops": 19.340585311766105, "iter_time": 1.0667253952026368, "loss": 0.5659453868865967, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.931229860421286, "step_time": 0.8996941566467286} +{"epoch": 0, "iter": 19943, "iter_tflops": 43.36308953762642, "iter_time": 0.4757754516601563, "loss": 0.5152651071548462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.712717426912484, "step_time": 0.4416590309143066} +{"epoch": 0, "iter": 19944, "iter_tflops": 41.549334920252626, "iter_time": 0.49654449462890626, "loss": 0.38668695092201233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55410525323606, "step_time": 0.463057071685791} +{"epoch": 0, "iter": 19945, "iter_tflops": 38.49038260300357, "iter_time": 0.5360064544677734, "loss": 0.07521370053291321, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6725843358819, "step_time": 0.49507593154907226} +{"epoch": 0, "iter": 19946, "iter_tflops": 10.64836804970768, "iter_time": 1.9374887695312502, "loss": 0.11379098147153854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.885883293769362, "step_time": 1.6010616455078126} +{"epoch": 0, "iter": 19947, "iter_tflops": 15.259976643231465, "iter_time": 1.3519741210937497, "loss": 0.09880072623491287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.080139348118905, "step_time": 1.0812863121032714} +{"epoch": 0, "iter": 19948, "iter_tflops": 27.90528359728847, "iter_time": 0.7393257064819335, "loss": 0.12281034886837006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.58004969965045, "step_time": 0.6532951564788819} +{"epoch": 0, "iter": 19949, "iter_tflops": 18.69400046138646, "iter_time": 0.8083901824951172, "loss": 0.2933558523654938, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 20.401813639537345, "step_time": 0.7407207374572755} +{"epoch": 0, "iter": 19950, "iter_tflops": 22.263472849538555, "iter_time": 0.6787820816040039, "loss": 0.18602009117603302, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 24.126977937966647, "step_time": 0.6263547172546386} +{"epoch": 0, "iter": 19951, "iter_tflops": 21.81846523573589, "iter_time": 0.69262646484375, "loss": 0.18322211503982544, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 23.55544828919059, "step_time": 0.6415520629882813} +{"epoch": 0, "iter": 19952, "iter_tflops": 23.75395100360805, "iter_time": 0.6361908569335937, "loss": 0.32109007239341736, "lr": 3e-05, "seqlen": 6048.0, "step_tflops": 25.479379231284128, "step_time": 0.5931088943481445} +{"epoch": 0, "iter": 19953, "iter_tflops": 21.33498912189743, "iter_time": 0.967007453918457, "loss": 0.5457555055618286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.969913587665705, "step_time": 0.8981789779663086} +{"epoch": 0, "iter": 19954, "iter_tflops": 14.731528893923635, "iter_time": 1.4004719848632812, "loss": 0.5532647371292114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.81730066598826, "step_time": 1.1579247550964353} +{"epoch": 0, "iter": 19955, "iter_tflops": 38.65824931583054, "iter_time": 0.5336789398193359, "loss": 0.4998222291469574, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.38846999953147, "step_time": 0.48671474838256834} +{"epoch": 0, "iter": 19956, "iter_tflops": 39.82994214527233, "iter_time": 0.5179794998168946, "loss": 0.40360894799232483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.662151467161735, "step_time": 0.47251664924621584} +{"epoch": 0, "iter": 19957, "iter_tflops": 17.096513515857044, "iter_time": 1.2067427368164063, "loss": 0.0504596121609211, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.51754564638637, "step_time": 1.114137580871582} +{"epoch": 0, "iter": 19958, "iter_tflops": 21.91258608794338, "iter_time": 0.9415179672241211, "loss": 0.06503903120756149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.704086337931166, "step_time": 0.7446949615478515} +{"epoch": 0, "iter": 19959, "iter_tflops": 52.343956821123484, "iter_time": 0.3941447067260742, "loss": 0.09042537212371826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.77547234749747, "step_time": 0.3633803939819336} +{"epoch": 0, "iter": 19960, "iter_tflops": 49.76526882275958, "iter_time": 0.41456811141967775, "loss": 0.10929078608751297, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.69818783543701, "step_time": 0.38420465087890626} +{"epoch": 0, "iter": 19961, "iter_tflops": 30.045608741550144, "iter_time": 0.6866591949462891, "loss": 0.4120648205280304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.07333128017858, "step_time": 0.6432476043701171} +{"epoch": 0, "iter": 19962, "iter_tflops": 21.180109376952487, "iter_time": 0.9740787048339845, "loss": 0.4393702447414398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.687566303051128, "step_time": 0.803154851913452} +{"epoch": 0, "iter": 19963, "iter_tflops": 47.80917353745325, "iter_time": 0.43153001785278317, "loss": 0.34804970026016235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.884998365000776, "step_time": 0.39763118743896486} +{"epoch": 0, "iter": 19964, "iter_tflops": 44.211602337940334, "iter_time": 0.4666443290710449, "loss": 0.28781330585479736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.16518192749283, "step_time": 0.42834040451049804} +{"epoch": 0, "iter": 19965, "iter_tflops": 30.51608211694245, "iter_time": 0.4484343528747559, "loss": 0.006726115010678768, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 33.48001429045212, "step_time": 0.4087351760864258} +{"epoch": 0, "iter": 19966, "iter_tflops": 8.093133917713992, "iter_time": 1.6908727416992189, "loss": 0.0019597799982875586, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 10.813490885674451, "step_time": 1.2654987812042238} +{"epoch": 0, "iter": 19967, "iter_tflops": 28.010324561196285, "iter_time": 0.4885505523681641, "loss": 0.008781642653048038, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 30.855271559526, "step_time": 0.4435047512054443} +{"epoch": 0, "iter": 19968, "iter_tflops": 28.44419862553138, "iter_time": 0.4810984382629395, "loss": 0.01772451028227806, "lr": 3e-05, "seqlen": 5488.0, "step_tflops": 31.71163351204548, "step_time": 0.4315280551910401} +{"epoch": 0, "iter": 19969, "iter_tflops": 17.22898121099732, "iter_time": 1.1974645080566406, "loss": 0.43066713213920593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.576263189989593, "step_time": 1.1106159133911133} +{"epoch": 0, "iter": 19970, "iter_tflops": 29.144492907678085, "iter_time": 0.7078899459838867, "loss": 0.32567551732063293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.81743972670259, "step_time": 0.560361982345581} +{"epoch": 0, "iter": 19971, "iter_tflops": 44.595104600124834, "iter_time": 0.46263135147094725, "loss": 0.2859630286693573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.534391603870596, "step_time": 0.42508194351196293} +{"epoch": 0, "iter": 19972, "iter_tflops": 48.98124559590381, "iter_time": 0.4212039375305176, "loss": 0.45253676176071167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.751502704123396, "step_time": 0.39109963607788084} +{"epoch": 0, "iter": 19973, "iter_tflops": 28.966054467914475, "iter_time": 0.712250732421875, "loss": 0.42864152789115906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.79345479755759, "step_time": 0.6699830741882324} +{"epoch": 0, "iter": 19974, "iter_tflops": 8.458940779545486, "iter_time": 2.4389689025878907, "loss": 0.4332089126110077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.710555171540861, "step_time": 2.1246049423217777} +{"epoch": 0, "iter": 19975, "iter_tflops": 12.850623042154933, "iter_time": 1.6054547271728516, "loss": 0.46082285046577454, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.888752577294557, "step_time": 1.2215877647399902} +{"epoch": 0, "iter": 19976, "iter_tflops": 38.8672339219519, "iter_time": 0.5308094100952148, "loss": 0.3362984359264374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.67514178116513, "step_time": 0.4834452247619629} +{"epoch": 0, "iter": 19977, "iter_tflops": 12.252105452428319, "iter_time": 1.1634572296142578, "loss": 0.30342862010002136, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 13.103499959833577, "step_time": 1.0878620758056643} +{"epoch": 0, "iter": 19978, "iter_tflops": 10.678312057900547, "iter_time": 1.3349301452636717, "loss": 0.33734214305877686, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 13.267389816271098, "step_time": 1.0744238967895507} +{"epoch": 0, "iter": 19979, "iter_tflops": 24.801494459425378, "iter_time": 0.5747557144165039, "loss": 0.30313095450401306, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 26.533078674465408, "step_time": 0.5372463874816895} +{"epoch": 0, "iter": 19980, "iter_tflops": 24.499337277680716, "iter_time": 0.5818443374633788, "loss": 0.20503589510917664, "lr": 3e-05, "seqlen": 5712.0, "step_tflops": 26.05162353699228, "step_time": 0.5471751365661621} +{"epoch": 0, "iter": 19981, "iter_tflops": 29.51605599480229, "iter_time": 0.6989786682128906, "loss": 0.029343757778406143, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.491516394082623, "step_time": 0.6551317901611328} +{"epoch": 0, "iter": 19982, "iter_tflops": 14.182044008546047, "iter_time": 1.454733428955078, "loss": 0.021064678207039833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.22174343833619, "step_time": 1.1979677658081056} +{"epoch": 0, "iter": 19983, "iter_tflops": 48.07543903218861, "iter_time": 0.42913999176025386, "loss": 0.055120501667261124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.08690874266904, "step_time": 0.38862864685058596} +{"epoch": 0, "iter": 19984, "iter_tflops": 53.02033398833842, "iter_time": 0.3891166267395019, "loss": 0.020439179614186287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.09650752600428, "step_time": 0.35511762046813966} +{"epoch": 0, "iter": 19985, "iter_tflops": 41.45794479840751, "iter_time": 0.4976390800476075, "loss": 0.07278148829936981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.04105309465711, "step_time": 0.45805086898803715} +{"epoch": 0, "iter": 19986, "iter_tflops": 8.619254768911246, "iter_time": 2.393605255126953, "loss": 0.04752586781978607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.716294645936802, "step_time": 1.6224139251708984} +{"epoch": 0, "iter": 19987, "iter_tflops": 13.604236250230025, "iter_time": 1.5165197906494141, "loss": 0.04253557696938515, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.120036489270348, "step_time": 1.3644870185852052} +{"epoch": 0, "iter": 19988, "iter_tflops": 28.752516905058208, "iter_time": 0.7175404357910156, "loss": 0.050527576357126236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.15881530452648, "step_time": 0.47802733612060544} +{"epoch": 0, "iter": 19989, "iter_tflops": 19.38561200751999, "iter_time": 0.796423843383789, "loss": 0.32257136702537537, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 21.129188884732265, "step_time": 0.7307030906677245} +{"epoch": 0, "iter": 19990, "iter_tflops": 27.406918353011633, "iter_time": 0.5633308868408203, "loss": 0.15213090181350708, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.29083046826584, "step_time": 0.5270988693237305} +{"epoch": 0, "iter": 19991, "iter_tflops": 27.330785009581177, "iter_time": 0.5649001159667968, "loss": 0.23308224976062775, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.215442696737142, "step_time": 0.5284589996337891} +{"epoch": 0, "iter": 19992, "iter_tflops": 27.315654200790547, "iter_time": 0.5652130279541016, "loss": 0.19843046367168427, "lr": 3e-05, "seqlen": 6176.0, "step_tflops": 29.149165401460515, "step_time": 0.5296605720520019} +{"epoch": 0, "iter": 19993, "iter_tflops": 28.870517727455134, "iter_time": 0.7146076736450195, "loss": 0.4151793420314789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6068617639426, "step_time": 0.6740675888061524} +{"epoch": 0, "iter": 19994, "iter_tflops": 12.495886574107505, "iter_time": 1.651030792236328, "loss": 0.3781580328941345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.98148152429732, "step_time": 1.3771063613891599} +{"epoch": 0, "iter": 19995, "iter_tflops": 46.552361478490866, "iter_time": 0.44318038558959966, "loss": 0.4436788856983185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.5567442933866, "step_time": 0.4080779685974121} +{"epoch": 0, "iter": 19996, "iter_tflops": 45.54687386451154, "iter_time": 0.4529639854431152, "loss": 0.3585663437843323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62441146840193, "step_time": 0.41574485015869145} +{"epoch": 0, "iter": 19997, "iter_tflops": 38.99664138871575, "iter_time": 0.5290479583740234, "loss": 0.16271190345287323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.980074755695235, "step_time": 0.4800153007507324} +{"epoch": 0, "iter": 19998, "iter_tflops": 47.70954960182014, "iter_time": 0.43243111038208004, "loss": 0.17960333824157715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.75046080912888, "step_time": 0.3911073608398438} +{"epoch": 0, "iter": 19999, "iter_tflops": 49.99237519703971, "iter_time": 0.41268480300903315, "loss": 0.12304756790399551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.306709964654225, "step_time": 0.37989952850341796} +{"epoch": 0, "iter": 20000, "iter_tflops": 44.64151850046442, "iter_time": 0.4621503524780273, "loss": 0.13113358616828918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.708936575762685, "step_time": 0.4235586929321289} +{"epoch": 0, "iter": 20001, "iter_tflops": 33.897804650036996, "iter_time": 0.6086262435913086, "loss": 0.14145709574222565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.33850310562882, "step_time": 0.5677474784851075} +{"epoch": 0, "iter": 20002, "iter_tflops": 37.518363197644454, "iter_time": 0.5498932189941407, "loss": 0.12426275759935379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.239070588141935, "step_time": 0.5002802734374999} +{"epoch": 0, "iter": 20003, "iter_tflops": 40.910135193959434, "iter_time": 0.5043027458190918, "loss": 0.1540793478488922, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.81190609918832, "step_time": 0.4603931255340576} +{"epoch": 0, "iter": 20004, "iter_tflops": 43.01394085371245, "iter_time": 0.4796373710632324, "loss": 0.09430848807096481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.977104948712764, "step_time": 0.4391733703613281} +{"epoch": 0, "iter": 20005, "iter_tflops": 15.479574638017569, "iter_time": 1.3327946014404297, "loss": 0.3552250266075134, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.442212956491467, "step_time": 1.2547637939453127} +{"epoch": 0, "iter": 20006, "iter_tflops": 20.10047916805785, "iter_time": 1.026398094177246, "loss": 0.2646760940551758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.743628365736644, "step_time": 0.8337941875457763} +{"epoch": 0, "iter": 20007, "iter_tflops": 44.07643432989908, "iter_time": 0.46807537460327153, "loss": 0.29794567823410034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.58018002492834, "step_time": 0.43360688209533693} +{"epoch": 0, "iter": 20008, "iter_tflops": 48.08522307889816, "iter_time": 0.42905267333984376, "loss": 0.3325253427028656, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.17434182318038, "step_time": 0.3954260425567627} +{"epoch": 0, "iter": 20009, "iter_tflops": 29.319751515826724, "iter_time": 0.7036585388183594, "loss": 0.04275898262858391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.08222518478926, "step_time": 0.663758575439453} +{"epoch": 0, "iter": 20010, "iter_tflops": 14.755546420612701, "iter_time": 1.3981924438476563, "loss": 0.03415288031101227, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.555859865734657, "step_time": 1.1118371047973632} +{"epoch": 0, "iter": 20011, "iter_tflops": 47.55348659431919, "iter_time": 0.4338502807617187, "loss": 0.0921938344836235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.74340404789437, "step_time": 0.3987192935943603} +{"epoch": 0, "iter": 20012, "iter_tflops": 55.456733352276416, "iter_time": 0.3720214347839356, "loss": 0.01983509585261345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.6350396183588, "step_time": 0.34025035095214845} +{"epoch": 0, "iter": 20013, "iter_tflops": 21.47721312251128, "iter_time": 0.6960214538574219, "loss": 0.009040758945047855, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 22.765631424290852, "step_time": 0.656630199432373} +{"epoch": 0, "iter": 20014, "iter_tflops": 9.891906219001115, "iter_time": 1.5111951904296874, "loss": 0.007834364660084248, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 12.678340305086195, "step_time": 1.1790660877227785} +{"epoch": 0, "iter": 20015, "iter_tflops": 38.253220638108665, "iter_time": 0.39078019714355466, "loss": 0.009354307316243649, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 42.547285936927885, "step_time": 0.3513408851623535} +{"epoch": 0, "iter": 20016, "iter_tflops": 42.32783055580934, "iter_time": 0.353162467956543, "loss": 0.006851347628980875, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 46.60994705940429, "step_time": 0.3207169723510742} +{"epoch": 0, "iter": 20017, "iter_tflops": 36.234001644766295, "iter_time": 0.5693849029541015, "loss": 0.5730408430099487, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.09135184463701, "step_time": 0.5277661819458007} +{"epoch": 0, "iter": 20018, "iter_tflops": 37.79431206287301, "iter_time": 0.5458782653808594, "loss": 0.40836021304130554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62303019437173, "step_time": 0.4956653423309326} +{"epoch": 0, "iter": 20019, "iter_tflops": 37.111859030773296, "iter_time": 0.5559164657592773, "loss": 0.41029518842697144, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.35927254160376, "step_time": 0.5111859607696534} +{"epoch": 0, "iter": 20020, "iter_tflops": 41.62303163588203, "iter_time": 0.4956653251647949, "loss": 0.448238343000412, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.284939109969145, "step_time": 0.45558399581909176} +{"epoch": 0, "iter": 20021, "iter_tflops": 21.629287297149908, "iter_time": 0.9538498992919924, "loss": 0.4249483346939087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.263475114315614, "step_time": 0.886844867706299} +{"epoch": 0, "iter": 20022, "iter_tflops": 31.129521214865978, "iter_time": 0.6627501068115234, "loss": 0.3286563456058502, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.888195346025554, "step_time": 0.5305232944488525} +{"epoch": 0, "iter": 20023, "iter_tflops": 49.78520415258608, "iter_time": 0.41440210723876947, "loss": 0.5045569539070129, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.10516319165627, "step_time": 0.38131468963623044} +{"epoch": 0, "iter": 20024, "iter_tflops": 36.836822026494744, "iter_time": 0.560067138671875, "loss": 0.40239647030830383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25540344042691, "step_time": 0.5125049495697022} +{"epoch": 0, "iter": 20025, "iter_tflops": 17.591724717436467, "iter_time": 1.1727726440429684, "loss": 0.5900506973266602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.85654399371787, "step_time": 1.0941078872680665} +{"epoch": 0, "iter": 20026, "iter_tflops": 22.953650721888536, "iter_time": 0.8988153457641601, "loss": 0.563696563243866, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.281260296317043, "step_time": 0.7294969635009765} +{"epoch": 0, "iter": 20027, "iter_tflops": 45.35471280958785, "iter_time": 0.4548831253051757, "loss": 0.720001757144928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.711271820562516, "step_time": 0.423538387298584} +{"epoch": 0, "iter": 20028, "iter_tflops": 48.053195044950705, "iter_time": 0.42933864212036127, "loss": 0.7102975845336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.832068673031344, "step_time": 0.398037239074707} +{"epoch": 0, "iter": 20029, "iter_tflops": 35.91414283100411, "iter_time": 0.5744559631347657, "loss": 0.09329389035701752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.64885457158134, "step_time": 0.533808666229248} +{"epoch": 0, "iter": 20030, "iter_tflops": 33.91133196862773, "iter_time": 0.6083834609985352, "loss": 0.08000081777572632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.32511731500476, "step_time": 0.5246289119720459} +{"epoch": 0, "iter": 20031, "iter_tflops": 41.69127713580706, "iter_time": 0.4948539581298828, "loss": 0.1065978854894638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.81883235408973, "step_time": 0.45027540969848623} +{"epoch": 0, "iter": 20032, "iter_tflops": 45.06069887444839, "iter_time": 0.4578511657714843, "loss": 0.07690789550542831, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.56282551243713, "step_time": 0.41626144790649416} +{"epoch": 0, "iter": 20033, "iter_tflops": 23.519401924892634, "iter_time": 0.8771946487426758, "loss": 0.36670106649398804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.056730274703177, "step_time": 0.8233753280639649} +{"epoch": 0, "iter": 20034, "iter_tflops": 19.1698347158264, "iter_time": 1.0762269897460937, "loss": 0.44372889399528503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.32050525690064, "step_time": 0.7838410892486574} +{"epoch": 0, "iter": 20035, "iter_tflops": 44.975215454632796, "iter_time": 0.4587213935852051, "loss": 0.3834728002548218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.689479134964145, "step_time": 0.4237279567718506} +{"epoch": 0, "iter": 20036, "iter_tflops": 47.27820971356157, "iter_time": 0.43637636947631836, "loss": 0.5314453840255737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.04715398682477, "step_time": 0.40415756607055664} +{"epoch": 0, "iter": 20037, "iter_tflops": 28.42416528819057, "iter_time": 0.7258293533325195, "loss": 0.13312917947769165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.282196979983834, "step_time": 0.68129447555542} +{"epoch": 0, "iter": 20038, "iter_tflops": 19.449986995466055, "iter_time": 1.060725311279297, "loss": 0.16781266033649445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.15538397192984, "step_time": 0.8540991744995116} +{"epoch": 0, "iter": 20039, "iter_tflops": 39.34828524897957, "iter_time": 0.5243200149536132, "loss": 0.17992444336414337, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.11312080415147, "step_time": 0.47853398513793943} +{"epoch": 0, "iter": 20040, "iter_tflops": 40.855033101536854, "iter_time": 0.5049829101562501, "loss": 0.18120543658733368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.54315822564416, "step_time": 0.46317087364196774} +{"epoch": 0, "iter": 20041, "iter_tflops": 12.777874287203238, "iter_time": 1.6145951232910156, "loss": 0.4148012697696686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.373809128533198, "step_time": 1.5426490173339844} +{"epoch": 0, "iter": 20042, "iter_tflops": 22.79440808567986, "iter_time": 0.9050945053100585, "loss": 0.480579137802124, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.85344017629181, "step_time": 0.7150306301116943} +{"epoch": 0, "iter": 20043, "iter_tflops": 48.360949100568114, "iter_time": 0.4266064643859863, "loss": 0.4609110355377197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.40814372554318, "step_time": 0.3936619777679443} +{"epoch": 0, "iter": 20044, "iter_tflops": 49.86824212593832, "iter_time": 0.41371206665039056, "loss": 0.5953128337860107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.00861892345451, "step_time": 0.38199631690978997} +{"epoch": 0, "iter": 20045, "iter_tflops": 31.63062909034626, "iter_time": 0.6522504959106445, "loss": 0.3204803764820099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.70072094450294, "step_time": 0.6121855239868164} +{"epoch": 0, "iter": 20046, "iter_tflops": 7.918284818144407, "iter_time": 2.6055003051757812, "loss": 0.35572829842567444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.309451627850564, "step_time": 2.216144874572754} +{"epoch": 0, "iter": 20047, "iter_tflops": 13.546962245246743, "iter_time": 1.5229313507080078, "loss": 0.2679484188556671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.23894820810334, "step_time": 1.1311558799743653} +{"epoch": 0, "iter": 20048, "iter_tflops": 30.48612377164811, "iter_time": 0.6767371826171875, "loss": 0.42216524481773376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.58943502975111, "step_time": 0.5638538417816162} +{"epoch": 0, "iter": 20049, "iter_tflops": 11.906289583970013, "iter_time": 1.1220091247558595, "loss": 0.12982667982578278, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 12.733844096469435, "step_time": 1.0490913391113281} +{"epoch": 0, "iter": 20050, "iter_tflops": 8.81393487865339, "iter_time": 1.5156641998291016, "loss": 0.06700804084539413, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 11.656504879448194, "step_time": 1.1460524139404296} +{"epoch": 0, "iter": 20051, "iter_tflops": 20.66738338451422, "iter_time": 0.6463791427612304, "loss": 0.2773972451686859, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 22.324482152134284, "step_time": 0.5983997955322267} +{"epoch": 0, "iter": 20052, "iter_tflops": 20.784382277831696, "iter_time": 0.6427405624389649, "loss": 0.2021847665309906, "lr": 3e-05, "seqlen": 5360.0, "step_tflops": 22.343603279849308, "step_time": 0.5978876991271972} +{"epoch": 0, "iter": 20053, "iter_tflops": 32.599702503719946, "iter_time": 0.6328614044189452, "loss": 0.09401410818099976, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.24861964568665, "step_time": 0.5691552867889405} +{"epoch": 0, "iter": 20054, "iter_tflops": 38.93740371242319, "iter_time": 0.5298528289794922, "loss": 0.0544959157705307, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.86217214154157, "step_time": 0.4813356971740722} +{"epoch": 0, "iter": 20055, "iter_tflops": 39.29576142573971, "iter_time": 0.5250208358764649, "loss": 0.048982176929712296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.0935020583166, "step_time": 0.4787518424987792} +{"epoch": 0, "iter": 20056, "iter_tflops": 43.70970683914041, "iter_time": 0.4720025596618652, "loss": 0.04639405757188797, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.91166395291565, "step_time": 0.4306069087982178} +{"epoch": 0, "iter": 20057, "iter_tflops": 15.821863761184641, "iter_time": 1.3039610137939452, "loss": 0.003108041360974312, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.610327067272507, "step_time": 1.2420642547607421} +{"epoch": 0, "iter": 20058, "iter_tflops": 21.03385430081567, "iter_time": 0.9808517837524413, "loss": 0.005299676675349474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.401634241035058, "step_time": 0.7814324417114257} +{"epoch": 0, "iter": 20059, "iter_tflops": 48.182038173072755, "iter_time": 0.4281905517578125, "loss": 0.013121782802045345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.40996651306877, "step_time": 0.38627797126770025} +{"epoch": 0, "iter": 20060, "iter_tflops": 46.6638961356036, "iter_time": 0.44212110900878904, "loss": 0.0018287372076883912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.26169933890734, "step_time": 0.4024660472869873} +{"epoch": 0, "iter": 20061, "iter_tflops": 20.16549161393785, "iter_time": 1.0230890426635741, "loss": 0.543484628200531, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.5665567296325, "step_time": 0.9566243591308593} +{"epoch": 0, "iter": 20062, "iter_tflops": 13.525465814973762, "iter_time": 1.5253517913818357, "loss": 0.4343211054801941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.551663438049452, "step_time": 1.2464664707183837} +{"epoch": 0, "iter": 20063, "iter_tflops": 47.480839994606306, "iter_time": 0.4345140800476075, "loss": 0.5981410145759583, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.72068906623662, "step_time": 0.3988944053649902} +{"epoch": 0, "iter": 20064, "iter_tflops": 48.87077875970704, "iter_time": 0.422156021118164, "loss": 0.4523232877254486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.81609376317444, "step_time": 0.3906213436126709} +{"epoch": 0, "iter": 20065, "iter_tflops": 24.615123516080025, "iter_time": 0.8381470642089844, "loss": 0.471731036901474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.777953905198956, "step_time": 0.8003386764526367} +{"epoch": 0, "iter": 20066, "iter_tflops": 18.038323428235106, "iter_time": 1.143736755371094, "loss": 0.5023031830787659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.153515905779987, "step_time": 0.8541652317047119} +{"epoch": 0, "iter": 20067, "iter_tflops": 35.541268437614924, "iter_time": 0.5804827575683594, "loss": 0.4790460467338562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.5580597947455, "step_time": 0.5350656547546386} +{"epoch": 0, "iter": 20068, "iter_tflops": 39.23496323848315, "iter_time": 0.5258344039916991, "loss": 0.6179579496383667, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.53027162645482, "step_time": 0.4850919761657715} +{"epoch": 0, "iter": 20069, "iter_tflops": 21.28837283346419, "iter_time": 0.9691249618530273, "loss": 0.4598444700241089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.924097761110815, "step_time": 0.8999740676879884} +{"epoch": 0, "iter": 20070, "iter_tflops": 24.33967092735262, "iter_time": 0.8476323928833007, "loss": 0.422096312046051, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.186591825026063, "step_time": 0.706868881225586} +{"epoch": 0, "iter": 20071, "iter_tflops": 44.055398148216156, "iter_time": 0.46829887771606443, "loss": 0.5416783690452576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.66465428508202, "step_time": 0.4328384170532226} +{"epoch": 0, "iter": 20072, "iter_tflops": 48.75664049032735, "iter_time": 0.4231442794799805, "loss": 0.574272871017456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.58053512549576, "step_time": 0.39237131118774415} +{"epoch": 0, "iter": 20073, "iter_tflops": 23.48018375813832, "iter_time": 0.878659797668457, "loss": 0.32598069310188293, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.757116983930967, "step_time": 0.8333399047851563} +{"epoch": 0, "iter": 20074, "iter_tflops": 13.712795340819719, "iter_time": 1.5045140686035157, "loss": 0.23237758874893188, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.62114350188097, "step_time": 1.1079391288757325} +{"epoch": 0, "iter": 20075, "iter_tflops": 41.76615795482811, "iter_time": 0.4939667549133301, "loss": 0.3034912049770355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.75726702876685, "step_time": 0.45088124465942375} +{"epoch": 0, "iter": 20076, "iter_tflops": 38.71573537872513, "iter_time": 0.5328865203857422, "loss": 0.4867970645427704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.26527647691947, "step_time": 0.4881334095001221} +{"epoch": 0, "iter": 20077, "iter_tflops": 1.711653037899295, "iter_time": 1.122268783569336, "loss": 0.30791550874710083, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 1.839228621262234, "step_time": 1.0444241409301758} +{"epoch": 0, "iter": 20078, "iter_tflops": 1.616590081432882, "iter_time": 1.1882633666992186, "loss": 0.005036181304603815, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 2.0080474985939647, "step_time": 0.956618194580078} +{"epoch": 0, "iter": 20079, "iter_tflops": 3.9213093883654744, "iter_time": 0.4898707504272461, "loss": 0.18684302270412445, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 4.230192713013657, "step_time": 0.4541010074615479} +{"epoch": 0, "iter": 20080, "iter_tflops": 4.389926433160756, "iter_time": 0.4375778961181641, "loss": 0.7378234267234802, "lr": 3e-05, "seqlen": 784.0, "step_tflops": 4.758480040696486, "step_time": 0.40368663024902346} +{"epoch": 0, "iter": 20081, "iter_tflops": 26.12334730919563, "iter_time": 0.7897568893432618, "loss": 0.5878769159317017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.509825854420615, "step_time": 0.7499536209106447} +{"epoch": 0, "iter": 20082, "iter_tflops": 19.171429444393645, "iter_time": 1.076137466430664, "loss": 0.7026767134666443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.232608987865635, "step_time": 0.9716702041625976} +{"epoch": 0, "iter": 20083, "iter_tflops": 33.21909561063661, "iter_time": 0.621061264038086, "loss": 0.5613491535186768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.23323527947688, "step_time": 0.5693969459533692} +{"epoch": 0, "iter": 20084, "iter_tflops": 35.69687125333185, "iter_time": 0.5779524307250977, "loss": 0.46159666776657104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.73163166969842, "step_time": 0.5326678123474122} +{"epoch": 0, "iter": 20085, "iter_tflops": 22.019717846753952, "iter_time": 0.9369372329711914, "loss": 0.4215599596500397, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.678544725700423, "step_time": 0.8712990493774415} +{"epoch": 0, "iter": 20086, "iter_tflops": 15.479664312504331, "iter_time": 1.3327868804931642, "loss": 0.4879266321659088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.783095900999783, "step_time": 1.0428647575378418} +{"epoch": 0, "iter": 20087, "iter_tflops": 43.8074786700408, "iter_time": 0.4709491195678711, "loss": 0.43985846638679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.28212249199695, "step_time": 0.43634025764465334} +{"epoch": 0, "iter": 20088, "iter_tflops": 45.205266314467345, "iter_time": 0.456386947631836, "loss": 0.6494384407997131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.69753140130635, "step_time": 0.4236578922271728} +{"epoch": 0, "iter": 20089, "iter_tflops": 38.6626085781374, "iter_time": 0.533618766784668, "loss": 0.4508788585662842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.754612192349214, "step_time": 0.494103343963623} +{"epoch": 0, "iter": 20090, "iter_tflops": 32.91554814611615, "iter_time": 0.6267886962890624, "loss": 0.373815655708313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.86800715470855, "step_time": 0.5595934009552002} +{"epoch": 0, "iter": 20091, "iter_tflops": 47.11428092679983, "iter_time": 0.43789469146728516, "loss": 0.5542779564857483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.19617996980947, "step_time": 0.4029811115264893} +{"epoch": 0, "iter": 20092, "iter_tflops": 43.95020663205123, "iter_time": 0.4694197158813477, "loss": 0.5131940841674805, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.026488072521644, "step_time": 0.4387121887207031} +{"epoch": 0, "iter": 20093, "iter_tflops": 29.850056297408283, "iter_time": 0.6911576080322265, "loss": 0.5087828040122986, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.54323154532777, "step_time": 0.6540577011108399} +{"epoch": 0, "iter": 20094, "iter_tflops": 11.577706498120206, "iter_time": 1.7819672241210935, "loss": 0.6258090138435364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.19578353756105, "step_time": 1.5634610443115233} +{"epoch": 0, "iter": 20095, "iter_tflops": 40.0374281874344, "iter_time": 0.5152951736450195, "loss": 0.5732523798942566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.44689710790062, "step_time": 0.4089665508270264} +{"epoch": 0, "iter": 20096, "iter_tflops": 46.59223176403191, "iter_time": 0.44280114364624024, "loss": 0.5575692057609558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.66244925081296, "step_time": 0.40722653198242187} +{"epoch": 0, "iter": 20097, "iter_tflops": 33.24986093805504, "iter_time": 0.6204866104125977, "loss": 0.05854388326406479, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.522195196454064, "step_time": 0.5807944412231446} +{"epoch": 0, "iter": 20098, "iter_tflops": 13.50201726755537, "iter_time": 1.5280008239746092, "loss": 0.09936407208442688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.865658756352058, "step_time": 1.3003616065979005} +{"epoch": 0, "iter": 20099, "iter_tflops": 41.245943157481406, "iter_time": 0.5001969146728514, "loss": 0.10494454205036163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.352974869852474, "step_time": 0.45490055656433104} +{"epoch": 0, "iter": 20100, "iter_tflops": 41.978308354379855, "iter_time": 0.4914703407287597, "loss": 0.1074485331773758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.92030133432031, "step_time": 0.44928044700622566} +{"epoch": 0, "iter": 20101, "iter_tflops": 26.389291367625624, "iter_time": 0.7817979354858399, "loss": 0.7195197939872742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.317226545735142, "step_time": 0.7285704154968262} +{"epoch": 0, "iter": 20102, "iter_tflops": 7.63213799699292, "iter_time": 2.7031866455078126, "loss": 0.546983540058136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.223666267776501, "step_time": 2.2367562866210937} +{"epoch": 0, "iter": 20103, "iter_tflops": 18.005749173735733, "iter_time": 1.1458058929443358, "loss": 0.7302420735359192, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.570809262017107, "step_time": 1.0029305725097657} +{"epoch": 0, "iter": 20104, "iter_tflops": 35.82424475936689, "iter_time": 0.5758975143432616, "loss": 0.49545297026634216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.93926898702165, "step_time": 0.5298274478912354} +{"epoch": 0, "iter": 20105, "iter_tflops": 18.283724935063187, "iter_time": 0.857850341796875, "loss": 0.12630516290664673, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 20.011253853554084, "step_time": 0.7837939491271972} +{"epoch": 0, "iter": 20106, "iter_tflops": 24.566254946838317, "iter_time": 0.6384652328491212, "loss": 0.2354983240365982, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 26.463826951875593, "step_time": 0.5926844863891602} +{"epoch": 0, "iter": 20107, "iter_tflops": 23.566495520608946, "iter_time": 0.6655507888793945, "loss": 0.2436925619840622, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 25.419259457288884, "step_time": 0.6170399932861328} +{"epoch": 0, "iter": 20108, "iter_tflops": 23.279910416061625, "iter_time": 0.6737439880371093, "loss": 0.18722695112228394, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 24.99014251367446, "step_time": 0.6276354637145997} +{"epoch": 0, "iter": 20109, "iter_tflops": 19.49626348035664, "iter_time": 1.058207565307617, "loss": 0.34701108932495117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.010710720031593, "step_time": 0.9819322052001954} +{"epoch": 0, "iter": 20110, "iter_tflops": 21.307933670069772, "iter_time": 0.9682352981567384, "loss": 0.5532360672950745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.43182078707389, "step_time": 0.811231475830078} +{"epoch": 0, "iter": 20111, "iter_tflops": 47.12066896134568, "iter_time": 0.4378353271484375, "loss": 0.5050020813941956, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.08496885254327, "step_time": 0.40385839462280276} +{"epoch": 0, "iter": 20112, "iter_tflops": 51.838443709573596, "iter_time": 0.3979882888793945, "loss": 0.4943735897541046, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.911944734773854, "step_time": 0.3689925937652588} +{"epoch": 0, "iter": 20113, "iter_tflops": 33.55570153559942, "iter_time": 0.6148312377929687, "loss": 0.42049261927604675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.853364797432164, "step_time": 0.5754297714233398} +{"epoch": 0, "iter": 20114, "iter_tflops": 18.07109376977058, "iter_time": 1.1416626892089845, "loss": 0.34882840514183044, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.00836877263913, "step_time": 0.9820416679382324} +{"epoch": 0, "iter": 20115, "iter_tflops": 44.49954413372236, "iter_time": 0.46362482833862306, "loss": 0.30518218874931335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99865861138694, "step_time": 0.4298264598846436} +{"epoch": 0, "iter": 20116, "iter_tflops": 43.189442407133406, "iter_time": 0.4776883506774902, "loss": 0.4421343207359314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.70503687867097, "step_time": 0.44173166084289545} +{"epoch": 0, "iter": 20117, "iter_tflops": 33.40479516423794, "iter_time": 0.6176087417602539, "loss": 0.3932371735572815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.545801068209514, "step_time": 0.5804087371826172} +{"epoch": 0, "iter": 20118, "iter_tflops": 12.908657667372863, "iter_time": 1.5982369384765625, "loss": 0.4358377158641815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.16446608979352, "step_time": 1.2019653511047363} +{"epoch": 0, "iter": 20119, "iter_tflops": 37.91763371844202, "iter_time": 0.5441028747558594, "loss": 0.5208198428153992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.60498918801628, "step_time": 0.4958802757263184} +{"epoch": 0, "iter": 20120, "iter_tflops": 42.39786310629966, "iter_time": 0.4866069183349609, "loss": 0.4407147765159607, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.30360050902026, "step_time": 0.4455613231658936} +{"epoch": 0, "iter": 20121, "iter_tflops": 21.288750592081033, "iter_time": 0.9691077651977539, "loss": 0.035209205001592636, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.151178568650014, "step_time": 0.8911465759277345} +{"epoch": 0, "iter": 20122, "iter_tflops": 27.304826988599373, "iter_time": 0.755584114074707, "loss": 0.03372571989893913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.59916602123891, "step_time": 0.6140358810424805} +{"epoch": 0, "iter": 20123, "iter_tflops": 53.866458236454996, "iter_time": 0.38300445556640633, "loss": 0.02766275778412819, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.90825690271092, "step_time": 0.35022413825988763} +{"epoch": 0, "iter": 20124, "iter_tflops": 54.84265737606518, "iter_time": 0.3761869773864746, "loss": 0.019333917647600174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.86004393171865, "step_time": 0.34465550231933595} +{"epoch": 0, "iter": 20125, "iter_tflops": 36.86078760012618, "iter_time": 0.5597030029296874, "loss": 0.6126658320426941, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76298137457141, "step_time": 0.5188517761230469} +{"epoch": 0, "iter": 20126, "iter_tflops": 23.3927058415174, "iter_time": 0.8819455795288086, "loss": 0.4074377715587616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.403799220875324, "step_time": 0.7813683681488037} +{"epoch": 0, "iter": 20127, "iter_tflops": 47.843902161734455, "iter_time": 0.4312167816162109, "loss": 0.4669809639453888, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.59983201007224, "step_time": 0.39982869529724124} +{"epoch": 0, "iter": 20128, "iter_tflops": 52.872353003401024, "iter_time": 0.39020569992065435, "loss": 0.4196854829788208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.40360462892774, "step_time": 0.359404146194458} +{"epoch": 0, "iter": 20129, "iter_tflops": 32.97338189193742, "iter_time": 0.6256893386840819, "loss": 0.07887943089008331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.14813978678387, "step_time": 0.5869754028320312} +{"epoch": 0, "iter": 20130, "iter_tflops": 13.699827168186532, "iter_time": 1.505938232421875, "loss": 0.13192260265350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.760243373076717, "step_time": 1.3090593223571778} +{"epoch": 0, "iter": 20131, "iter_tflops": 47.70409995168207, "iter_time": 0.43248051071166993, "loss": 0.12590639293193817, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.80847674906239, "step_time": 0.3982184925079346} +{"epoch": 0, "iter": 20132, "iter_tflops": 50.35384794990391, "iter_time": 0.40972228240966796, "loss": 0.09999322891235352, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.27170608011538, "step_time": 0.3801445541381836} +{"epoch": 0, "iter": 20133, "iter_tflops": 29.567516135017605, "iter_time": 0.6977621459960937, "loss": 0.06964900344610214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.44401152880511, "step_time": 0.6561215476989746} +{"epoch": 0, "iter": 20134, "iter_tflops": 13.279802085602118, "iter_time": 1.5535693511962891, "loss": 0.049356624484062195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.842215600787277, "step_time": 1.3900278816223146} +{"epoch": 0, "iter": 20135, "iter_tflops": 44.5143572898941, "iter_time": 0.4634705467224121, "loss": 0.05745900049805641, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.117429541980016, "step_time": 0.4287654953002929} +{"epoch": 0, "iter": 20136, "iter_tflops": 48.57111000883426, "iter_time": 0.4247605934143066, "loss": 0.05149046331644058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.70107002788155, "step_time": 0.39147390174865726} +{"epoch": 0, "iter": 20137, "iter_tflops": 29.947129237332085, "iter_time": 0.688917236328125, "loss": 0.05224179849028587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.837783023213525, "step_time": 0.6480065994262695} +{"epoch": 0, "iter": 20138, "iter_tflops": 13.720659546286827, "iter_time": 1.5036517333984376, "loss": 0.03720565512776375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.900669130824443, "step_time": 1.0915536041259766} +{"epoch": 0, "iter": 20139, "iter_tflops": 43.01388543317249, "iter_time": 0.47963798904418947, "loss": 0.09157714247703552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.45628015788949, "step_time": 0.37885609245300295} +{"epoch": 0, "iter": 20140, "iter_tflops": 54.38575216655566, "iter_time": 0.379347396850586, "loss": 0.0517328679561615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 59.26049312392288, "step_time": 0.34814245414733885} +{"epoch": 0, "iter": 20141, "iter_tflops": 44.069213557397724, "iter_time": 0.46815206909179685, "loss": 0.48553332686424255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.82100080424571, "step_time": 0.43142329025268555} +{"epoch": 0, "iter": 20142, "iter_tflops": 44.62641586797132, "iter_time": 0.46230675506591795, "loss": 0.4169802665710449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.129896165889996, "step_time": 0.41992951583862304} +{"epoch": 0, "iter": 20143, "iter_tflops": 49.07050312626625, "iter_time": 0.4204377822875976, "loss": 0.4865434765815735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.2413481942584, "step_time": 0.38750133514404295} +{"epoch": 0, "iter": 20144, "iter_tflops": 51.886738598142806, "iter_time": 0.3976178512573242, "loss": 0.4739123284816742, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.06753677786263, "step_time": 0.3679686088562012} +{"epoch": 0, "iter": 20145, "iter_tflops": 23.061733974166266, "iter_time": 0.8946028747558593, "loss": 0.41213753819465637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.229295033954376, "step_time": 0.8514937591552734} +{"epoch": 0, "iter": 20146, "iter_tflops": 17.50691397427077, "iter_time": 1.1784540405273438, "loss": 0.5851432681083679, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.498457828395853, "step_time": 1.0064705200195314} +{"epoch": 0, "iter": 20147, "iter_tflops": 38.51929588183514, "iter_time": 0.5356041183471679, "loss": 0.5256666541099548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.149735905262915, "step_time": 0.489471477508545} +{"epoch": 0, "iter": 20148, "iter_tflops": 37.956867058436906, "iter_time": 0.5435404739379882, "loss": 0.46635106205940247, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.3915948073158, "step_time": 0.49843678665161134} +{"epoch": 0, "iter": 20149, "iter_tflops": 18.255586931970278, "iter_time": 1.1301249084472655, "loss": 0.006738248746842146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.555278806307612, "step_time": 1.055014030456543} +{"epoch": 0, "iter": 20150, "iter_tflops": 16.21900183372883, "iter_time": 1.2720322570800782, "loss": 0.015715960413217545, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.572228816123577, "step_time": 0.9563728294372558} +{"epoch": 0, "iter": 20151, "iter_tflops": 40.22732951566439, "iter_time": 0.5128626174926757, "loss": 0.28454476594924927, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.907806203868944, "step_time": 0.4698730201721191} +{"epoch": 0, "iter": 20152, "iter_tflops": 40.160455493852844, "iter_time": 0.5137166213989257, "loss": 0.3387354910373688, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.977459988720135, "step_time": 0.4691288108825684} +{"epoch": 0, "iter": 20153, "iter_tflops": 33.47974814556535, "iter_time": 0.6162260665893555, "loss": 0.1176857128739357, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.083385547624644, "step_time": 0.5563433113098144} +{"epoch": 0, "iter": 20154, "iter_tflops": 44.79442563041609, "iter_time": 0.46057278823852543, "loss": 0.08379567414522171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.727073836971705, "step_time": 0.4234010353088379} +{"epoch": 0, "iter": 20155, "iter_tflops": 45.78652960887749, "iter_time": 0.45059308242797846, "loss": 0.08452460169792175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.78508247743917, "step_time": 0.4144031200408935} +{"epoch": 0, "iter": 20156, "iter_tflops": 53.465813644920466, "iter_time": 0.3858744888305664, "loss": 0.13568510115146637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.234563207757326, "step_time": 0.354275749206543} +{"epoch": 0, "iter": 20157, "iter_tflops": 25.549222077147732, "iter_time": 0.8075037841796875, "loss": 0.04194880649447441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.844092747743385, "step_time": 0.768552459716797} +{"epoch": 0, "iter": 20158, "iter_tflops": 13.915486599362797, "iter_time": 1.4825995025634766, "loss": 0.05621388554573059, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.387408475871407, "step_time": 1.258960105895996} +{"epoch": 0, "iter": 20159, "iter_tflops": 37.84372038033724, "iter_time": 0.5451655731201172, "loss": 0.07990828156471252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.67034623681966, "step_time": 0.4951025218963624} +{"epoch": 0, "iter": 20160, "iter_tflops": 42.276440672380076, "iter_time": 0.48800450515747074, "loss": 0.058576010167598724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.20695797096846, "step_time": 0.44649322128295904} +{"epoch": 0, "iter": 20161, "iter_tflops": 35.61110520561923, "iter_time": 0.5793443756103515, "loss": 0.018991658464074135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71143651864385, "step_time": 0.5195252380371095} +{"epoch": 0, "iter": 20162, "iter_tflops": 36.2132884197275, "iter_time": 0.569710578918457, "loss": 0.042560093104839325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.1990123606693, "step_time": 0.5132238903045654} +{"epoch": 0, "iter": 20163, "iter_tflops": 41.13646739519825, "iter_time": 0.5015280799865722, "loss": 0.012299217283725739, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.41314117987058, "step_time": 0.4542978744506835} +{"epoch": 0, "iter": 20164, "iter_tflops": 48.89402321755912, "iter_time": 0.42195532608032227, "loss": 0.01893170364201069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.80230582795683, "step_time": 0.383461139678955} +{"epoch": 0, "iter": 20165, "iter_tflops": 29.59011320761996, "iter_time": 0.6972292861938476, "loss": 0.8079627156257629, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.916091671487678, "step_time": 0.6464166641235352} +{"epoch": 0, "iter": 20166, "iter_tflops": 8.472591254766119, "iter_time": 2.4350393981933594, "loss": 0.4146145284175873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.252520746183823, "step_time": 1.6838244094848631} +{"epoch": 0, "iter": 20167, "iter_tflops": 16.373954726404946, "iter_time": 1.2599945373535155, "loss": 0.5069432854652405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.241858335524835, "step_time": 1.0721985969543457} +{"epoch": 0, "iter": 20168, "iter_tflops": 35.854249474470336, "iter_time": 0.5754155731201172, "loss": 0.4364386200904846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.823930334798064, "step_time": 0.5314014663696289} +{"epoch": 0, "iter": 20169, "iter_tflops": 9.995418370077827, "iter_time": 1.6060680694580078, "loss": 0.272476464509964, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 10.427355539278025, "step_time": 1.5395391693115232} +{"epoch": 0, "iter": 20170, "iter_tflops": 15.092721546312506, "iter_time": 1.0636466217041016, "loss": 0.21271111071109772, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 18.599285688106047, "step_time": 0.863114990234375} +{"epoch": 0, "iter": 20171, "iter_tflops": 25.08171434167781, "iter_time": 0.6400408706665038, "loss": 0.1878715306520462, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 27.068852730681872, "step_time": 0.5930551414489746} +{"epoch": 0, "iter": 20172, "iter_tflops": 24.785773026528737, "iter_time": 0.6476829376220702, "loss": 0.13001304864883423, "lr": 3e-05, "seqlen": 6416.0, "step_tflops": 26.561735051577156, "step_time": 0.6043777732849122} +{"epoch": 0, "iter": 20173, "iter_tflops": 14.027855676659616, "iter_time": 1.4707232513427733, "loss": 0.6427863836288452, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.844451592647268, "step_time": 1.389818504333496} +{"epoch": 0, "iter": 20174, "iter_tflops": 17.885818979581806, "iter_time": 1.1534888916015626, "loss": 0.4845784604549408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.93260757807112, "step_time": 0.7955657157897948} +{"epoch": 0, "iter": 20175, "iter_tflops": 38.28393292512837, "iter_time": 0.5388969192504882, "loss": 0.6443135142326355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.77512075219779, "step_time": 0.49386077499389647} +{"epoch": 0, "iter": 20176, "iter_tflops": 36.85233272222463, "iter_time": 0.5598314132690428, "loss": 0.4714691936969757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05831221490428, "step_time": 0.5150265293121339} +{"epoch": 0, "iter": 20177, "iter_tflops": 29.013969906270372, "iter_time": 0.711074478149414, "loss": 0.6358749866485596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.93344912459682, "step_time": 0.6460653038024902} +{"epoch": 0, "iter": 20178, "iter_tflops": 40.941047557270174, "iter_time": 0.5039219741821288, "loss": 0.652278482913971, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.055979164986425, "step_time": 0.46829270172119136} +{"epoch": 0, "iter": 20179, "iter_tflops": 44.158661620391996, "iter_time": 0.4672037773132325, "loss": 0.5538503527641296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.71789902870297, "step_time": 0.4323554458618164} +{"epoch": 0, "iter": 20180, "iter_tflops": 45.92089183737897, "iter_time": 0.4492746696472168, "loss": 0.6360711455345154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.18836817676956, "step_time": 0.41943033027648924} +{"epoch": 0, "iter": 20181, "iter_tflops": 33.73556208053411, "iter_time": 0.6115532760620117, "loss": 0.26595139503479004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.14209286098419, "step_time": 0.5708328399658203} +{"epoch": 0, "iter": 20182, "iter_tflops": 16.146210164469807, "iter_time": 1.2777669372558593, "loss": 0.2625924348831177, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.309985098347795, "step_time": 1.068415817260742} +{"epoch": 0, "iter": 20183, "iter_tflops": 37.89305308583105, "iter_time": 0.544455825805664, "loss": 0.1997796893119812, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.449052130621745, "step_time": 0.49774584579467773} +{"epoch": 0, "iter": 20184, "iter_tflops": 41.58793261242631, "iter_time": 0.4960836524963379, "loss": 0.29615694284439087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.331903325826936, "step_time": 0.45511200714111333} +{"epoch": 0, "iter": 20185, "iter_tflops": 20.680470120578157, "iter_time": 0.9976124038696288, "loss": 0.24445638060569763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.310652510598487, "step_time": 0.9247194137573242} +{"epoch": 0, "iter": 20186, "iter_tflops": 15.346640681071163, "iter_time": 1.344339385986328, "loss": 0.08533240109682083, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.323952818364052, "step_time": 1.1259084606170655} +{"epoch": 0, "iter": 20187, "iter_tflops": 40.15101073883852, "iter_time": 0.5138374633789062, "loss": 0.14036543667316437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.18353127108475, "step_time": 0.46694080162048335} +{"epoch": 0, "iter": 20188, "iter_tflops": 38.033742875249956, "iter_time": 0.5424418411254883, "loss": 0.11552973836660385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62186852970643, "step_time": 0.49567917633056646} +{"epoch": 0, "iter": 20189, "iter_tflops": 14.064514932016289, "iter_time": 1.4668898010253906, "loss": 0.16264748573303223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.035827142269985, "step_time": 1.3721289367675782} +{"epoch": 0, "iter": 20190, "iter_tflops": 18.796099096905966, "iter_time": 1.0976263427734376, "loss": 0.14311841130256653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.44149644010214, "step_time": 0.8109229564666748} +{"epoch": 0, "iter": 20191, "iter_tflops": 55.19049714110196, "iter_time": 0.373816047668457, "loss": 0.18898746371269226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.26391804212807, "step_time": 0.3423457050323486} +{"epoch": 0, "iter": 20192, "iter_tflops": 50.50123764306805, "iter_time": 0.40852649307250977, "loss": 0.2661847770214081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.044203174624414, "step_time": 0.37480955886840817} +{"epoch": 0, "iter": 20193, "iter_tflops": 34.2425148445899, "iter_time": 0.602499366760254, "loss": 0.052086878567934036, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.84127881574005, "step_time": 0.5599993858337402} +{"epoch": 0, "iter": 20194, "iter_tflops": 51.796801818727154, "iter_time": 0.39830825042724605, "loss": 0.07555044442415237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.38153954732103, "step_time": 0.35954234886169434} +{"epoch": 0, "iter": 20195, "iter_tflops": 55.64142180431776, "iter_time": 0.37078659820556636, "loss": 0.06357011198997498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.71002070292489, "step_time": 0.3398301181793213} +{"epoch": 0, "iter": 20196, "iter_tflops": 52.85707463935999, "iter_time": 0.39031848907470706, "loss": 0.09111495316028595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.68871315119758, "step_time": 0.3576279029846191} +{"epoch": 0, "iter": 20197, "iter_tflops": 30.310915910421695, "iter_time": 0.6806489639282227, "loss": 0.12406288087368011, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.38493347965253, "step_time": 0.6370583877563477} +{"epoch": 0, "iter": 20198, "iter_tflops": 47.21966347070694, "iter_time": 0.43691741943359375, "loss": 0.13147786259651184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.53416390092816, "step_time": 0.4003381824493408} +{"epoch": 0, "iter": 20199, "iter_tflops": 44.83809227006218, "iter_time": 0.4601242485046387, "loss": 0.12456119805574417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.56308510289154, "step_time": 0.42483078384399414} +{"epoch": 0, "iter": 20200, "iter_tflops": 46.884705641647564, "iter_time": 0.4400388832092285, "loss": 0.14169856905937195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.396928721559284, "step_time": 0.40937203979492187} +{"epoch": 0, "iter": 20201, "iter_tflops": 37.50215740636758, "iter_time": 0.5501308441162109, "loss": 0.1452491581439972, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.33306306837303, "step_time": 0.5115181427001952} +{"epoch": 0, "iter": 20202, "iter_tflops": 17.17004555891591, "iter_time": 1.2015747680664062, "loss": 0.12896808981895447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.004115672319088, "step_time": 0.9822405204772949} +{"epoch": 0, "iter": 20203, "iter_tflops": 41.47995870227679, "iter_time": 0.49737497711181644, "loss": 0.10589649528265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59372460726998, "step_time": 0.4524985332489014} +{"epoch": 0, "iter": 20204, "iter_tflops": 42.14951351594467, "iter_time": 0.48947406005859373, "loss": 0.09790702164173126, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94680626225347, "step_time": 0.4490212745666505} +{"epoch": 0, "iter": 20205, "iter_tflops": 20.525014923873556, "iter_time": 1.005168258666992, "loss": 0.4813401699066162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.919652575102003, "step_time": 0.9412144393920898} +{"epoch": 0, "iter": 20206, "iter_tflops": 25.178187030750646, "iter_time": 0.8194034576416016, "loss": 0.4704258441925049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.079336911952105, "step_time": 0.7347429027557373} +{"epoch": 0, "iter": 20207, "iter_tflops": 36.96335897499834, "iter_time": 0.5581498565673828, "loss": 0.4886520802974701, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.092514699169016, "step_time": 0.5145871658325194} +{"epoch": 0, "iter": 20208, "iter_tflops": 40.339125756424735, "iter_time": 0.5114412651062011, "loss": 0.617939829826355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87877802716251, "step_time": 0.47018386650085453} +{"epoch": 0, "iter": 20209, "iter_tflops": 40.89650075216026, "iter_time": 0.504470874786377, "loss": 0.001322861760854721, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.39459476717315, "step_time": 0.4544834823608398} +{"epoch": 0, "iter": 20210, "iter_tflops": 40.91974824394134, "iter_time": 0.5041842727661133, "loss": 0.008819494396448135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.37325119419379, "step_time": 0.4546972713470459} +{"epoch": 0, "iter": 20211, "iter_tflops": 44.55054047988158, "iter_time": 0.46309412384033205, "loss": 0.0038097158540040255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.12438628433876, "step_time": 0.4199766159057617} +{"epoch": 0, "iter": 20212, "iter_tflops": 45.483234302989615, "iter_time": 0.4535977668762207, "loss": 0.0073141842149198055, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.28575894712105, "step_time": 0.410277063369751} +{"epoch": 0, "iter": 20213, "iter_tflops": 20.616467319477024, "iter_time": 1.0007094421386717, "loss": 0.11064472794532776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.97982809565635, "step_time": 0.9386376190185546} +{"epoch": 0, "iter": 20214, "iter_tflops": 24.011871900181504, "iter_time": 0.8592038803100586, "loss": 0.06321927160024643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.890824929999738, "step_time": 0.6902149257659912} +{"epoch": 0, "iter": 20215, "iter_tflops": 51.527920673945566, "iter_time": 0.40038668823242185, "loss": 0.11872230470180511, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.32446768623756, "step_time": 0.36629007530212404} +{"epoch": 0, "iter": 20216, "iter_tflops": 49.404931394555625, "iter_time": 0.41759178543090814, "loss": 0.043520037084817886, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33046360808266, "step_time": 0.3868538188934326} +{"epoch": 0, "iter": 20217, "iter_tflops": 28.666426909274954, "iter_time": 0.7196953277587891, "loss": 0.5858365297317505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.48006186730569, "step_time": 0.6768717727661132} +{"epoch": 0, "iter": 20218, "iter_tflops": 20.300424969123398, "iter_time": 1.0162887496948243, "loss": 0.49300867319107056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.483161843058465, "step_time": 0.8426645889282227} +{"epoch": 0, "iter": 20219, "iter_tflops": 43.30022347323162, "iter_time": 0.4764662132263183, "loss": 0.6089493036270142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.500036215308256, "step_time": 0.44367908477783197} +{"epoch": 0, "iter": 20220, "iter_tflops": 45.7819998679713, "iter_time": 0.45063766479492184, "loss": 0.39053329825401306, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23177810831259, "step_time": 0.41906049919128413} +{"epoch": 0, "iter": 20221, "iter_tflops": 26.432972969585833, "iter_time": 0.7805059814453126, "loss": 0.508651077747345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.9350440136203, "step_time": 0.7385380706787109} +{"epoch": 0, "iter": 20222, "iter_tflops": 26.638762708405377, "iter_time": 0.774476417541504, "loss": 0.6807239651679993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.913544494842085, "step_time": 0.626826852798462} +{"epoch": 0, "iter": 20223, "iter_tflops": 38.62331237972481, "iter_time": 0.5341616821289061, "loss": 0.655251681804657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.95158553064415, "step_time": 0.49178340339660637} +{"epoch": 0, "iter": 20224, "iter_tflops": 40.656366158276604, "iter_time": 0.5074505043029784, "loss": 0.5680807828903198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.10785560703106, "step_time": 0.46774193000793457} +{"epoch": 0, "iter": 20225, "iter_tflops": 19.07404156775242, "iter_time": 1.0816319885253907, "loss": 0.182769775390625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.374071992965295, "step_time": 1.0126151275634767} +{"epoch": 0, "iter": 20226, "iter_tflops": 17.46241476844069, "iter_time": 1.1814570770263673, "loss": 0.22277557849884033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.778752188004063, "step_time": 0.7426933135986328} +{"epoch": 0, "iter": 20227, "iter_tflops": 49.28408296564284, "iter_time": 0.4186157531738281, "loss": 0.10588657855987549, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.365883201944, "step_time": 0.3865970592498779} +{"epoch": 0, "iter": 20228, "iter_tflops": 52.10789679976849, "iter_time": 0.3959302673339844, "loss": 0.18041785061359406, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.27637760333473, "step_time": 0.36660308265686037} +{"epoch": 0, "iter": 20229, "iter_tflops": 22.997566275361, "iter_time": 0.8970989913940429, "loss": 0.02566908672451973, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.158021429083732, "step_time": 0.8540059280395509} +{"epoch": 0, "iter": 20230, "iter_tflops": 16.616982031973667, "iter_time": 1.2415668182373047, "loss": 0.02298027090728283, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.548204916885748, "step_time": 0.8761217079162598} +{"epoch": 0, "iter": 20231, "iter_tflops": 56.09170323707704, "iter_time": 0.367810073852539, "loss": 0.02440284937620163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.463265686624034, "step_time": 0.33566543006896976} +{"epoch": 0, "iter": 20232, "iter_tflops": 56.359312226681105, "iter_time": 0.3660636138916015, "loss": 0.025490401312708855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 61.326547332864955, "step_time": 0.33641374588012696} +{"epoch": 0, "iter": 20233, "iter_tflops": 22.74336592123095, "iter_time": 0.9071257781982422, "loss": 0.06722116470336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.895127827778865, "step_time": 0.8634016799926758} +{"epoch": 0, "iter": 20234, "iter_tflops": 20.150441856056446, "iter_time": 1.023853157043457, "loss": 0.060275912284851074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.835799754933603, "step_time": 0.9034539508819579} +{"epoch": 0, "iter": 20235, "iter_tflops": 53.9433579816792, "iter_time": 0.3824584579467773, "loss": 0.09653527289628983, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.68676140354577, "step_time": 0.3515459537506104} +{"epoch": 0, "iter": 20236, "iter_tflops": 54.1262669399188, "iter_time": 0.38116601562499997, "loss": 0.09940187633037567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.81820739229365, "step_time": 0.3507603244781494} +{"epoch": 0, "iter": 20237, "iter_tflops": 45.774882462647, "iter_time": 0.4507077331542969, "loss": 0.26597100496292114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.06000857137732, "step_time": 0.41212724685668944} +{"epoch": 0, "iter": 20238, "iter_tflops": 37.34147205228157, "iter_time": 0.5524981307983398, "loss": 0.3186674118041992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.85907565239267, "step_time": 0.4928702602386475} +{"epoch": 0, "iter": 20239, "iter_tflops": 41.829167031257136, "iter_time": 0.49322267150878907, "loss": 0.3047080338001251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.86427096044906, "step_time": 0.4498293132781982} +{"epoch": 0, "iter": 20240, "iter_tflops": 40.1404716582164, "iter_time": 0.5139723739624023, "loss": 0.27097880840301514, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87279256872896, "step_time": 0.4702480125427246} +{"epoch": 0, "iter": 20241, "iter_tflops": 1.3380440039203843, "iter_time": 1.2006622467041015, "loss": 0.424345463514328, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.4265336567966063, "step_time": 1.1261836776733398} +{"epoch": 0, "iter": 20242, "iter_tflops": 1.5860986830472485, "iter_time": 1.0128871154785157, "loss": 0.29579463601112366, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.787436593679919, "step_time": 0.8987949142456055} +{"epoch": 0, "iter": 20243, "iter_tflops": 3.302757694748461, "iter_time": 0.48642348861694334, "loss": 0.40135353803634644, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.6066140621235427, "step_time": 0.44544242668151857} +{"epoch": 0, "iter": 20244, "iter_tflops": 3.0519576576831984, "iter_time": 0.5263962020874023, "loss": 0.5730659365653992, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.337248766770568, "step_time": 0.4813962135314942} +{"epoch": 0, "iter": 20245, "iter_tflops": 19.46663379053815, "iter_time": 1.0598182373046874, "loss": 0.5106080770492554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.592022993250893, "step_time": 1.001897361755371} +{"epoch": 0, "iter": 20246, "iter_tflops": 17.723633824377615, "iter_time": 1.164044219970703, "loss": 0.47052109241485596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.691909822134065, "step_time": 0.9091827735900879} +{"epoch": 0, "iter": 20247, "iter_tflops": 44.14891837189904, "iter_time": 0.467306884765625, "loss": 0.4917004406452179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.59683698964021, "step_time": 0.43345513725280765} +{"epoch": 0, "iter": 20248, "iter_tflops": 47.14150799295147, "iter_time": 0.43764178085327143, "loss": 0.418997585773468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.93195345651262, "step_time": 0.4050717105865479} +{"epoch": 0, "iter": 20249, "iter_tflops": 42.9467615533484, "iter_time": 0.4803876419067383, "loss": 0.08648895472288132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.7663223802567, "step_time": 0.44115278816223147} +{"epoch": 0, "iter": 20250, "iter_tflops": 42.74745428573639, "iter_time": 0.4826274185180664, "loss": 0.09895212948322296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.12757510913613, "step_time": 0.44726161003112797} +{"epoch": 0, "iter": 20251, "iter_tflops": 48.123051974946435, "iter_time": 0.4287154006958008, "loss": 0.1905028223991394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.247570584633934, "step_time": 0.3948718242645264} +{"epoch": 0, "iter": 20252, "iter_tflops": 50.294655421416785, "iter_time": 0.410204490661621, "loss": 0.19138626754283905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.76993009074465, "step_time": 0.3766865043640137} +{"epoch": 0, "iter": 20253, "iter_tflops": 31.748784923687346, "iter_time": 0.6498230895996093, "loss": 0.42511409521102905, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.767200857596556, "step_time": 0.6109802703857422} +{"epoch": 0, "iter": 20254, "iter_tflops": 13.271635344197554, "iter_time": 1.554525344848633, "loss": 0.46294331550598145, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.398634099177954, "step_time": 1.2580982894897461} +{"epoch": 0, "iter": 20255, "iter_tflops": 13.28682390058719, "iter_time": 1.5527483215332032, "loss": 0.4806683659553528, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.106095834668945, "step_time": 1.3657462348937988} +{"epoch": 0, "iter": 20256, "iter_tflops": 13.069296431267793, "iter_time": 1.5785925140380859, "loss": 0.5583512187004089, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.35099789971386, "step_time": 1.3439578094482423} +{"epoch": 0, "iter": 20257, "iter_tflops": 13.560083503931484, "iter_time": 1.0301938400268553, "loss": 0.1911192685365677, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 14.603019512811514, "step_time": 0.9566182174682616} +{"epoch": 0, "iter": 20258, "iter_tflops": 9.891437919387073, "iter_time": 1.4122834930419923, "loss": 0.17093096673488617, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 11.657622555461618, "step_time": 1.198315902709961} +{"epoch": 0, "iter": 20259, "iter_tflops": 24.445593443729006, "iter_time": 0.5714532775878907, "loss": 0.11809979379177094, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 26.050350182730902, "step_time": 0.536250545501709} +{"epoch": 0, "iter": 20260, "iter_tflops": 25.00216024898368, "iter_time": 0.5587322998046875, "loss": 0.2547062933444977, "lr": 3e-05, "seqlen": 5600.0, "step_tflops": 26.65956454417316, "step_time": 0.5239963493347168} +{"epoch": 0, "iter": 20261, "iter_tflops": 21.762098695414565, "iter_time": 0.9480286712646484, "loss": 0.6824326515197754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.680831428486748, "step_time": 0.9096268615722657} +{"epoch": 0, "iter": 20262, "iter_tflops": 12.572263087404615, "iter_time": 1.6410007781982423, "loss": 0.685653030872345, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.676174518589587, "step_time": 1.3160796012878417} +{"epoch": 0, "iter": 20263, "iter_tflops": 36.01540725103957, "iter_time": 0.5728407669067382, "loss": 0.4965320825576782, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.132633795269626, "step_time": 0.5272094287872314} +{"epoch": 0, "iter": 20264, "iter_tflops": 35.443430454319916, "iter_time": 0.5820851211547851, "loss": 0.5690293312072754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.370354405293455, "step_time": 0.537683162689209} +{"epoch": 0, "iter": 20265, "iter_tflops": 17.33818497399486, "iter_time": 1.1899223327636719, "loss": 0.3400948643684387, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.473200399232685, "step_time": 1.1168120880126953} +{"epoch": 0, "iter": 20266, "iter_tflops": 18.640425946799454, "iter_time": 1.1067930297851563, "loss": 0.3884257674217224, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.729639235046655, "step_time": 0.7440087242126465} +{"epoch": 0, "iter": 20267, "iter_tflops": 49.903974416442345, "iter_time": 0.41341584014892574, "loss": 0.34518933296203613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.16754025685737, "step_time": 0.3808755836486816} +{"epoch": 0, "iter": 20268, "iter_tflops": 45.35690524919172, "iter_time": 0.4548611373901367, "loss": 0.38508036732673645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04459429655987, "step_time": 0.42065988731384274} +{"epoch": 0, "iter": 20269, "iter_tflops": 22.593334015451525, "iter_time": 0.9131495819091797, "loss": 0.5561051964759827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.64326519373684, "step_time": 0.8725991668701172} +{"epoch": 0, "iter": 20270, "iter_tflops": 17.464870245124803, "iter_time": 1.181290969848633, "loss": 0.5635582804679871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.939820385924225, "step_time": 0.9852564697265626} +{"epoch": 0, "iter": 20271, "iter_tflops": 42.19486802049884, "iter_time": 0.48894793319702157, "loss": 0.508335530757904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.227415212756576, "step_time": 0.45616344451904306} +{"epoch": 0, "iter": 20272, "iter_tflops": 45.47786949629144, "iter_time": 0.45365127563476565, "loss": 0.5385487079620361, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.00417678666557, "step_time": 0.42100683784484866} +{"epoch": 0, "iter": 20273, "iter_tflops": 28.897696632467067, "iter_time": 0.7139355697631836, "loss": 0.008668947033584118, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.668855965145568, "step_time": 0.6727050247192383} +{"epoch": 0, "iter": 20274, "iter_tflops": 13.630208441751208, "iter_time": 1.5136300811767578, "loss": 0.00233750999905169, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.156355850230792, "step_time": 1.2025335502624512} +{"epoch": 0, "iter": 20275, "iter_tflops": 58.36346823942081, "iter_time": 0.3534932746887207, "loss": 0.005545667838305235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 64.58692697496099, "step_time": 0.31943141555786136} +{"epoch": 0, "iter": 20276, "iter_tflops": 60.87482248509682, "iter_time": 0.3389101219177246, "loss": 0.0036776785273104906, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.92341961381601, "step_time": 0.30827912902832033} +{"epoch": 0, "iter": 20277, "iter_tflops": 44.96976752288213, "iter_time": 0.4587769660949707, "loss": 0.07279545813798904, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.0322454687516, "step_time": 0.42076583099365233} +{"epoch": 0, "iter": 20278, "iter_tflops": 36.27499074514286, "iter_time": 0.5687415237426757, "loss": 0.07281088829040527, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.92270756805663, "step_time": 0.5167759094238281} +{"epoch": 0, "iter": 20279, "iter_tflops": 44.85397315764339, "iter_time": 0.45996133804321293, "loss": 0.03209063783288002, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25366163613804, "step_time": 0.41887430953979493} +{"epoch": 0, "iter": 20280, "iter_tflops": 44.605592112142034, "iter_time": 0.46252257919311524, "loss": 0.05321136862039566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.790657677947266, "step_time": 0.42284926033020026} +{"epoch": 0, "iter": 20281, "iter_tflops": 17.023505718288064, "iter_time": 1.2119180297851562, "loss": 0.6841780543327332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.315017743844244, "step_time": 1.1264577407836915} +{"epoch": 0, "iter": 20282, "iter_tflops": 31.119822151465044, "iter_time": 0.6629566650390625, "loss": 0.42257365584373474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.99347887566588, "step_time": 0.5290908660888672} +{"epoch": 0, "iter": 20283, "iter_tflops": 44.25372853524067, "iter_time": 0.4662001190185547, "loss": 0.6346406936645508, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.47997318121159, "step_time": 0.43452201271057134} +{"epoch": 0, "iter": 20284, "iter_tflops": 46.69497645778995, "iter_time": 0.44182683181762694, "loss": 0.630879819393158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.14387264088052, "step_time": 0.4114379768371582} +{"epoch": 0, "iter": 20285, "iter_tflops": 26.771739000620084, "iter_time": 0.7706295623779297, "loss": 0.4866136312484741, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.33080376726657, "step_time": 0.7282212562561035} +{"epoch": 0, "iter": 20286, "iter_tflops": 13.118330157474933, "iter_time": 1.5726920471191406, "loss": 0.5398697853088379, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.268353246181643, "step_time": 1.1947342758178712} +{"epoch": 0, "iter": 20287, "iter_tflops": 43.073196875050854, "iter_time": 0.47897753143310545, "loss": 0.5162930488586426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.404121984551544, "step_time": 0.4445961399078369} +{"epoch": 0, "iter": 20288, "iter_tflops": 43.17123557880131, "iter_time": 0.47788980865478514, "loss": 0.6205093860626221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.53178572536591, "step_time": 0.4433763542175293} +{"epoch": 0, "iter": 20289, "iter_tflops": 28.49132343198811, "iter_time": 0.7241184692382814, "loss": 0.0706477239727974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.140994212032556, "step_time": 0.6844861640930175} +{"epoch": 0, "iter": 20290, "iter_tflops": 13.585928050080637, "iter_time": 1.5185634307861329, "loss": 0.043698716908693314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.458243305051464, "step_time": 1.3346337680816651} +{"epoch": 0, "iter": 20291, "iter_tflops": 39.3162000930244, "iter_time": 0.5247479019165039, "loss": 0.04197687283158302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.2444774143878, "step_time": 0.4770804214477539} +{"epoch": 0, "iter": 20292, "iter_tflops": 41.73095504863181, "iter_time": 0.49438344955444335, "loss": 0.03058679960668087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.967157510436095, "step_time": 0.4488224773406982} +{"epoch": 0, "iter": 20293, "iter_tflops": 24.361022944021673, "iter_time": 0.8468894577026368, "loss": 0.2902292013168335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.593424392256377, "step_time": 0.7757967987060548} +{"epoch": 0, "iter": 20294, "iter_tflops": 36.56967043223267, "iter_time": 0.5641585845947267, "loss": 0.277251273393631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7449438397806, "step_time": 0.48265576362609863} +{"epoch": 0, "iter": 20295, "iter_tflops": 40.36558638875868, "iter_time": 0.5111060028076172, "loss": 0.20117633044719696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.037432235541026, "step_time": 0.4684899291992187} +{"epoch": 0, "iter": 20296, "iter_tflops": 37.643603710305534, "iter_time": 0.5480637207031249, "loss": 0.19859561324119568, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.783883564706834, "step_time": 0.505863878250122} +{"epoch": 0, "iter": 20297, "iter_tflops": 34.173912392324475, "iter_time": 0.6037088546752929, "loss": 0.4806860387325287, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.65961823892416, "step_time": 0.547830659866333} +{"epoch": 0, "iter": 20298, "iter_tflops": 35.98944741342085, "iter_time": 0.5732539672851563, "loss": 0.41996902227401733, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.804914059739964, "step_time": 0.5183051891326904} +{"epoch": 0, "iter": 20299, "iter_tflops": 38.438076943305965, "iter_time": 0.53673583984375, "loss": 0.42374542355537415, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.79186814848363, "step_time": 0.4936628684997558} +{"epoch": 0, "iter": 20300, "iter_tflops": 34.67551176156132, "iter_time": 0.5949758911132812, "loss": 0.4404458999633789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.82134373064039, "step_time": 0.5454881153106689} +{"epoch": 0, "iter": 20301, "iter_tflops": 25.620304102379052, "iter_time": 0.805263412475586, "loss": 0.21083983778953552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.589638782136213, "step_time": 0.7477841110229492} +{"epoch": 0, "iter": 20302, "iter_tflops": 8.21379925919279, "iter_time": 2.5117601318359375, "loss": 0.22470346093177795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.6527748277202, "step_time": 2.1373225708007815} +{"epoch": 0, "iter": 20303, "iter_tflops": 10.408994946189061, "iter_time": 1.982044723510742, "loss": 0.20455685257911682, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.10591518105334, "step_time": 1.7042159309387206} +{"epoch": 0, "iter": 20304, "iter_tflops": 39.56614939637211, "iter_time": 0.5214329376220702, "loss": 0.17579442262649536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.7993002146116, "step_time": 0.4710370578765869} +{"epoch": 0, "iter": 20305, "iter_tflops": 18.274607361436853, "iter_time": 0.7889595031738281, "loss": 0.25065669417381287, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 19.848920705264074, "step_time": 0.726383331298828} +{"epoch": 0, "iter": 20306, "iter_tflops": 22.4548303520961, "iter_time": 0.6420856857299804, "loss": 0.18123778700828552, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.126968378266696, "step_time": 0.5975854454040528} +{"epoch": 0, "iter": 20307, "iter_tflops": 22.612226817595246, "iter_time": 0.6376163330078126, "loss": 0.2732490301132202, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.33291366177626, "step_time": 0.5925276908874512} +{"epoch": 0, "iter": 20308, "iter_tflops": 22.68999468969905, "iter_time": 0.6354309616088868, "loss": 0.13086853921413422, "lr": 3e-05, "seqlen": 5776.0, "step_tflops": 24.218575195679744, "step_time": 0.5953250770568848} +{"epoch": 0, "iter": 20309, "iter_tflops": 23.563301041494263, "iter_time": 0.8755604095458983, "loss": 0.02111315354704857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.51024549648395, "step_time": 0.8087375526428223} +{"epoch": 0, "iter": 20310, "iter_tflops": 33.57540293813967, "iter_time": 0.6144704666137695, "loss": 0.03211395442485809, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.267999701054805, "step_time": 0.5535873584747314} +{"epoch": 0, "iter": 20311, "iter_tflops": 41.2298211451424, "iter_time": 0.5003925056457519, "loss": 0.02021779492497444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.59070443838189, "step_time": 0.4525285091400147} +{"epoch": 0, "iter": 20312, "iter_tflops": 43.86883780490615, "iter_time": 0.47029040527343746, "loss": 0.05098993331193924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.261253900975206, "step_time": 0.4274877223968505} +{"epoch": 0, "iter": 20313, "iter_tflops": 21.883926071935218, "iter_time": 0.9427510147094726, "loss": 0.47021955251693726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.66833195553268, "step_time": 0.8716750106811524} +{"epoch": 0, "iter": 20314, "iter_tflops": 15.49692328800105, "iter_time": 1.3313025512695311, "loss": 0.6927495002746582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.723222379330622, "step_time": 1.1640712432861327} +{"epoch": 0, "iter": 20315, "iter_tflops": 36.20215576222293, "iter_time": 0.5698857727050781, "loss": 0.6134948134422302, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.44148430601224, "step_time": 0.5230810623168946} +{"epoch": 0, "iter": 20316, "iter_tflops": 37.53436679056393, "iter_time": 0.5496587600708007, "loss": 0.4975118041038513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.54738955303228, "step_time": 0.5088143463134766} +{"epoch": 0, "iter": 20317, "iter_tflops": 22.28419420852119, "iter_time": 0.9258173446655273, "loss": 0.06462346762418747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.939156154708105, "step_time": 0.8618137321472169} +{"epoch": 0, "iter": 20318, "iter_tflops": 13.67408361210222, "iter_time": 1.5087733917236328, "loss": 0.10902806371450424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.32880063051342, "step_time": 1.1905667304992675} +{"epoch": 0, "iter": 20319, "iter_tflops": 39.959871184879646, "iter_time": 0.516295295715332, "loss": 0.11851827800273895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.91752747269258, "step_time": 0.4697690124511719} +{"epoch": 0, "iter": 20320, "iter_tflops": 43.93899974774898, "iter_time": 0.4695394439697265, "loss": 0.06275855749845505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.01434814222735, "step_time": 0.4296860065460205} +{"epoch": 0, "iter": 20321, "iter_tflops": 37.09952732792075, "iter_time": 0.5561012496948242, "loss": 0.5703392028808594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.703918333729284, "step_time": 0.5068576774597168} +{"epoch": 0, "iter": 20322, "iter_tflops": 34.16322382761193, "iter_time": 0.603897735595703, "loss": 0.704901397228241, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.80301806269896, "step_time": 0.545752550125122} +{"epoch": 0, "iter": 20323, "iter_tflops": 40.63131053411885, "iter_time": 0.507763427734375, "loss": 0.5252178907394409, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.212823785397546, "step_time": 0.4666314373016358} +{"epoch": 0, "iter": 20324, "iter_tflops": 37.92785270193647, "iter_time": 0.5439562759399414, "loss": 0.5136339664459229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.32784517128203, "step_time": 0.4992056427001953} +{"epoch": 0, "iter": 20325, "iter_tflops": 22.74811877671965, "iter_time": 0.9069362487792969, "loss": 0.4134386479854584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.631764780610858, "step_time": 0.8375808105468749} +{"epoch": 0, "iter": 20326, "iter_tflops": 21.992718678516184, "iter_time": 0.9380874557495117, "loss": 0.624603271484375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.690607004937448, "step_time": 0.7729720611572266} +{"epoch": 0, "iter": 20327, "iter_tflops": 43.973783447786985, "iter_time": 0.4691680335998536, "loss": 0.47331252694129944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.13063253059557, "step_time": 0.4377427673339843} +{"epoch": 0, "iter": 20328, "iter_tflops": 45.36520032449306, "iter_time": 0.4547779655456543, "loss": 0.4631248712539673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76544598812029, "step_time": 0.4230678730010986} +{"epoch": 0, "iter": 20329, "iter_tflops": 28.052468529540675, "iter_time": 0.735446632385254, "loss": 0.5046626925468445, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.708762324310513, "step_time": 0.6944447326660156} +{"epoch": 0, "iter": 20330, "iter_tflops": 10.363605413372202, "iter_time": 1.9907254943847656, "loss": 0.39090168476104736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.512959400927969, "step_time": 1.6487781066894531} +{"epoch": 0, "iter": 20331, "iter_tflops": 13.732830543654895, "iter_time": 1.5023190917968752, "loss": 0.552585244178772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.040975647197712, "step_time": 1.286149543762207} +{"epoch": 0, "iter": 20332, "iter_tflops": 33.851942303471404, "iter_time": 0.6094508056640625, "loss": 0.562583327293396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.74861802845675, "step_time": 0.5614114112854004} +{"epoch": 0, "iter": 20333, "iter_tflops": 10.942786997264747, "iter_time": 1.3735362243652343, "loss": 0.13392341136932373, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 11.76272749771616, "step_time": 1.2777915954589845} +{"epoch": 0, "iter": 20334, "iter_tflops": 12.79437956623907, "iter_time": 1.1747591400146484, "loss": 0.17164036631584167, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 15.641846733607746, "step_time": 0.9609040794372559} +{"epoch": 0, "iter": 20335, "iter_tflops": 27.14539971561137, "iter_time": 0.5536965560913085, "loss": 0.20061138272285461, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.9433374980994, "step_time": 0.5193013534545898} +{"epoch": 0, "iter": 20336, "iter_tflops": 27.95764557931201, "iter_time": 0.5376101608276368, "loss": 0.21973063051700592, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.73013182212395, "step_time": 0.5055582809448242} +{"epoch": 0, "iter": 20337, "iter_tflops": 28.02153441043624, "iter_time": 0.7362585220336914, "loss": 0.17311789095401764, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.572370381516095, "step_time": 0.6976476097106934} +{"epoch": 0, "iter": 20338, "iter_tflops": 14.197016803050285, "iter_time": 1.4531992034912107, "loss": 0.21419796347618103, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.860339654159862, "step_time": 1.223646375656128} +{"epoch": 0, "iter": 20339, "iter_tflops": 39.56174028614883, "iter_time": 0.5214910507202148, "loss": 0.27438199520111084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.42036852491411, "step_time": 0.47514782142639167} +{"epoch": 0, "iter": 20340, "iter_tflops": 45.49314877570502, "iter_time": 0.4534989128112793, "loss": 0.3017093241214752, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.613910993272675, "step_time": 0.4158328399658203} +{"epoch": 0, "iter": 20341, "iter_tflops": 28.318484631946028, "iter_time": 0.7285380477905273, "loss": 0.019724616780877113, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.222129488926228, "step_time": 0.6607843170166015} +{"epoch": 0, "iter": 20342, "iter_tflops": 47.728924242529295, "iter_time": 0.43225557327270503, "loss": 0.030225997790694237, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.77377771323769, "step_time": 0.39093455886840817} +{"epoch": 0, "iter": 20343, "iter_tflops": 52.51630873733278, "iter_time": 0.3928511734008789, "loss": 0.013000261969864368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.17033861161081, "step_time": 0.36087058448791504} +{"epoch": 0, "iter": 20344, "iter_tflops": 57.54546210032474, "iter_time": 0.3585181655883789, "loss": 0.045209746807813644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.943463980373586, "step_time": 0.3277718162536621} +{"epoch": 0, "iter": 20345, "iter_tflops": 29.632977102017964, "iter_time": 0.6962207489013671, "loss": 0.5667065978050232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.430531354448796, "step_time": 0.6564029502868651} +{"epoch": 0, "iter": 20346, "iter_tflops": 17.558987421201657, "iter_time": 1.1749591827392578, "loss": 0.43368959426879883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.334135576401735, "step_time": 1.0146039123535155} +{"epoch": 0, "iter": 20347, "iter_tflops": 44.5783640094091, "iter_time": 0.46280508422851563, "loss": 0.41507428884506226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.10077953085425, "step_time": 0.428913911819458} +{"epoch": 0, "iter": 20348, "iter_tflops": 47.798851761163604, "iter_time": 0.4316232032775879, "loss": 0.4742763340473175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.66039377527631, "step_time": 0.3993599739074707} +{"epoch": 0, "iter": 20349, "iter_tflops": 30.286675440718298, "iter_time": 0.681193733215332, "loss": 0.659578263759613, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.14086457871303, "step_time": 0.6418960342407227} +{"epoch": 0, "iter": 20350, "iter_tflops": 10.255908932575737, "iter_time": 2.0116299438476566, "loss": 0.6317428946495056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.712100486606461, "step_time": 1.5045903091430666} +{"epoch": 0, "iter": 20351, "iter_tflops": 10.244324967778093, "iter_time": 2.013904632568359, "loss": 0.4318936765193939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.37009837855777, "step_time": 1.667819679260254} +{"epoch": 0, "iter": 20352, "iter_tflops": 24.332933320279704, "iter_time": 0.8478670959472656, "loss": 0.6275588870048523, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.320581651631635, "step_time": 0.7036386165618896} +{"epoch": 0, "iter": 20353, "iter_tflops": 28.28131139955577, "iter_time": 0.5763269882202149, "loss": 0.17032462358474731, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.43041282746315, "step_time": 0.5356247749328613} +{"epoch": 0, "iter": 20354, "iter_tflops": 28.221981612003024, "iter_time": 0.57753857421875, "loss": 0.21553529798984528, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 30.136093240817996, "step_time": 0.5408558731079102} +{"epoch": 0, "iter": 20355, "iter_tflops": 29.427198123249124, "iter_time": 0.5538849792480469, "loss": 0.25150954723358154, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 31.36752494538203, "step_time": 0.5196228599548339} +{"epoch": 0, "iter": 20356, "iter_tflops": 27.861527360916146, "iter_time": 0.5850103912353516, "loss": 0.22415031492710114, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.53189344953943, "step_time": 0.5519213676452637} +{"epoch": 0, "iter": 20357, "iter_tflops": 27.56373330082956, "iter_time": 0.7484869079589844, "loss": 0.4397369623184204, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.058864286772103, "step_time": 0.7099759063720703} +{"epoch": 0, "iter": 20358, "iter_tflops": 18.031265209647206, "iter_time": 1.1441844635009764, "loss": 0.4616330564022064, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.68136232587498, "step_time": 0.951558910369873} +{"epoch": 0, "iter": 20359, "iter_tflops": 36.626985948396495, "iter_time": 0.563275764465332, "loss": 0.3759564161300659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.85124947666116, "step_time": 0.5177025508880615} +{"epoch": 0, "iter": 20360, "iter_tflops": 44.828680087055304, "iter_time": 0.46022085571289056, "loss": 0.4886772930622101, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.10362031897876, "step_time": 0.42015422439575195} +{"epoch": 0, "iter": 20361, "iter_tflops": 13.599218230452445, "iter_time": 1.517079376220703, "loss": 0.16568002104759216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.450232390141904, "step_time": 1.427734375} +{"epoch": 0, "iter": 20362, "iter_tflops": 18.111875746010075, "iter_time": 1.139092041015625, "loss": 0.19159410893917084, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.559892361603605, "step_time": 0.914503189086914} +{"epoch": 0, "iter": 20363, "iter_tflops": 47.499434810299825, "iter_time": 0.4343439788818359, "loss": 0.2196352779865265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.419956320352014, "step_time": 0.40122736358642574} +{"epoch": 0, "iter": 20364, "iter_tflops": 49.90711184461475, "iter_time": 0.4133898506164551, "loss": 0.2002195417881012, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.23743969382988, "step_time": 0.3803847236633301} +{"epoch": 0, "iter": 20365, "iter_tflops": 41.44506996850046, "iter_time": 0.49779367065429686, "loss": 0.021634289994835854, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.7194536748092, "step_time": 0.4613449363708496} +{"epoch": 0, "iter": 20366, "iter_tflops": 10.181468025513666, "iter_time": 2.0263377990722655, "loss": 0.0026666924823075533, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.052031577454024, "step_time": 1.711835334777832} +{"epoch": 0, "iter": 20367, "iter_tflops": 14.039679936519425, "iter_time": 1.469484603881836, "loss": 0.003118733409792185, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.41708533607777, "step_time": 1.2566843070983886} +{"epoch": 0, "iter": 20368, "iter_tflops": 15.507790691024542, "iter_time": 1.3303696136474608, "loss": 0.0007063091034069657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.461961688234943, "step_time": 1.1814877319335937} +{"epoch": 0, "iter": 20369, "iter_tflops": 15.370232622039495, "iter_time": 0.9885209808349611, "loss": 0.29217344522476196, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 18.690488482402994, "step_time": 0.8129160156250002} +{"epoch": 0, "iter": 20370, "iter_tflops": 11.83227761603291, "iter_time": 1.284097442626953, "loss": 0.24680761992931366, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 15.32500102888129, "step_time": 0.991438591003418} +{"epoch": 0, "iter": 20371, "iter_tflops": 26.0157187404533, "iter_time": 0.5840237426757813, "loss": 0.3496171832084656, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 27.94280513062132, "step_time": 0.5437463188171386} +{"epoch": 0, "iter": 20372, "iter_tflops": 27.766155182006344, "iter_time": 0.5472056655883789, "loss": 0.22671860456466675, "lr": 3e-05, "seqlen": 6080.0, "step_tflops": 29.521972344361078, "step_time": 0.5146606483459473} +{"epoch": 0, "iter": 20373, "iter_tflops": 29.29119773779499, "iter_time": 0.704344482421875, "loss": 0.5067605972290039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.131819511130352, "step_time": 0.6627011795043946} +{"epoch": 0, "iter": 20374, "iter_tflops": 10.440219569957366, "iter_time": 1.9761168212890623, "loss": 0.41075459122657776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.343050006518643, "step_time": 1.5462052154541015} +{"epoch": 0, "iter": 20375, "iter_tflops": 22.505253936551675, "iter_time": 0.9167234268188476, "loss": 0.433073490858078, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.798338197023874, "step_time": 0.742170030593872} +{"epoch": 0, "iter": 20376, "iter_tflops": 39.9013510058637, "iter_time": 0.5170525054931641, "loss": 0.3498927354812622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.39274995450621, "step_time": 0.4754502429962158} +{"epoch": 0, "iter": 20377, "iter_tflops": 12.094559386525082, "iter_time": 1.1685011749267575, "loss": 0.18049968779087067, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 13.006858660189783, "step_time": 1.0865426635742188} +{"epoch": 0, "iter": 20378, "iter_tflops": 9.95162214399911, "iter_time": 1.4201209259033203, "loss": 0.31241941452026367, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 13.730993208467952, "step_time": 1.0292414131164551} +{"epoch": 0, "iter": 20379, "iter_tflops": 21.124445174313028, "iter_time": 0.669011978149414, "loss": 0.31807392835617065, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 22.80055011147507, "step_time": 0.6198318367004395} +{"epoch": 0, "iter": 20380, "iter_tflops": 22.949965171150495, "iter_time": 0.6157964401245116, "loss": 0.24163655936717987, "lr": 3e-05, "seqlen": 5664.0, "step_tflops": 24.558069658757113, "step_time": 0.5754730339050292} +{"epoch": 0, "iter": 20381, "iter_tflops": 13.759286974319833, "iter_time": 1.2084561462402343, "loss": 0.0877373069524765, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 14.672934283373364, "step_time": 1.1332085723876952} +{"epoch": 0, "iter": 20382, "iter_tflops": 11.453627142138599, "iter_time": 1.451723083496094, "loss": 0.045569855719804764, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 14.927811853550086, "step_time": 1.1138601608276366} +{"epoch": 0, "iter": 20383, "iter_tflops": 40.746697938578, "iter_time": 0.4080697517395019, "loss": 0.07334180921316147, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 44.57015630566335, "step_time": 0.37306341934204096} +{"epoch": 0, "iter": 20384, "iter_tflops": 45.10181641585722, "iter_time": 0.36866574859619144, "loss": 0.07950584590435028, "lr": 3e-05, "seqlen": 6640.0, "step_tflops": 49.25228372525274, "step_time": 0.3375984554290772} +{"epoch": 0, "iter": 20385, "iter_tflops": 31.790347631522913, "iter_time": 0.6489735107421875, "loss": 0.5269103646278381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.83505250705679, "step_time": 0.6097550315856934} +{"epoch": 0, "iter": 20386, "iter_tflops": 16.674957954387835, "iter_time": 1.2372501068115231, "loss": 0.4742804765701294, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.009997741360458, "step_time": 1.0310392723083497} +{"epoch": 0, "iter": 20387, "iter_tflops": 40.24125491301182, "iter_time": 0.5126851425170899, "loss": 0.46168026328086853, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59828270995384, "step_time": 0.47320885658264156} +{"epoch": 0, "iter": 20388, "iter_tflops": 43.815827196201965, "iter_time": 0.4708593864440918, "loss": 0.5075368881225586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.39673630944431, "step_time": 0.43528510856628416} +{"epoch": 0, "iter": 20389, "iter_tflops": 34.2426540339151, "iter_time": 0.6024969177246093, "loss": 0.037758998572826385, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.664155308946874, "step_time": 0.5627047271728516} +{"epoch": 0, "iter": 20390, "iter_tflops": 10.80678346334973, "iter_time": 1.9090873413085936, "loss": 0.09150445461273193, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.917815397334873, "step_time": 1.5971039123535156} +{"epoch": 0, "iter": 20391, "iter_tflops": 11.583773787458226, "iter_time": 1.7810338745117187, "loss": 0.06113467365503311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.11028786364453, "step_time": 1.4621312980651855} +{"epoch": 0, "iter": 20392, "iter_tflops": 20.775184261471907, "iter_time": 0.9930642852783204, "loss": 0.0543629452586174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.363893363849094, "step_time": 0.813404047012329} +{"epoch": 0, "iter": 20393, "iter_tflops": 19.46284694148306, "iter_time": 0.8374562606811523, "loss": 0.19152165949344635, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 21.34645843535938, "step_time": 0.7635591201782226} +{"epoch": 0, "iter": 20394, "iter_tflops": 25.277227468199637, "iter_time": 0.6448208389282227, "loss": 0.23459547758102417, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.229641746718595, "step_time": 0.5985860252380372} +{"epoch": 0, "iter": 20395, "iter_tflops": 25.274145873678517, "iter_time": 0.6448994598388671, "loss": 0.3177589774131775, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.218317529552507, "step_time": 0.5988350677490234} +{"epoch": 0, "iter": 20396, "iter_tflops": 27.704993936803536, "iter_time": 0.5883157043457031, "loss": 0.17688623070716858, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 29.402869483135127, "step_time": 0.554343276977539} +{"epoch": 0, "iter": 20397, "iter_tflops": 29.28207086334316, "iter_time": 0.7045640182495118, "loss": 0.35557132959365845, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.380205099284584, "step_time": 0.6574556617736818} +{"epoch": 0, "iter": 20398, "iter_tflops": 10.433947203241539, "iter_time": 1.9773047637939454, "loss": 0.286540150642395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.876668018150847, "step_time": 1.4867469253540038} +{"epoch": 0, "iter": 20399, "iter_tflops": 13.258434090595522, "iter_time": 1.556073165893555, "loss": 0.40274155139923096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.109351909289668, "step_time": 1.3654519157409666} +{"epoch": 0, "iter": 20400, "iter_tflops": 38.75853949024182, "iter_time": 0.5322980117797851, "loss": 0.31496545672416687, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28832977071563, "step_time": 0.48786730575561527} +{"epoch": 0, "iter": 20401, "iter_tflops": 15.528716385102806, "iter_time": 1.0126811828613282, "loss": 0.1923983097076416, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 16.619151441443677, "step_time": 0.946235969543457} +{"epoch": 0, "iter": 20402, "iter_tflops": 20.282163795509952, "iter_time": 0.7753432540893554, "loss": 0.3499571681022644, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.871521361894445, "step_time": 0.7190006866455079} +{"epoch": 0, "iter": 20403, "iter_tflops": 24.389453851917278, "iter_time": 0.6447720794677734, "loss": 0.24738825857639313, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 26.283642218943395, "step_time": 0.5983051643371582} +{"epoch": 0, "iter": 20404, "iter_tflops": 24.115738585066595, "iter_time": 0.6520902862548829, "loss": 0.2747611701488495, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.91901122071347, "step_time": 0.6067221755981446} +{"epoch": 0, "iter": 20405, "iter_tflops": 29.945415375253344, "iter_time": 0.6889566650390625, "loss": 0.499234139919281, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.40133835030461, "step_time": 0.6367358436584473} +{"epoch": 0, "iter": 20406, "iter_tflops": 8.394946748990584, "iter_time": 2.4575609741210935, "loss": 0.4886886179447174, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.639386848924973, "step_time": 2.140291061401367} +{"epoch": 0, "iter": 20407, "iter_tflops": 11.433783364547459, "iter_time": 1.8043977966308593, "loss": 0.4018048346042633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.945154679673353, "step_time": 1.479445297241211} +{"epoch": 0, "iter": 20408, "iter_tflops": 29.2200193943153, "iter_time": 0.7060602264404297, "loss": 0.3940998613834381, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.220663233398554, "step_time": 0.6403062953948975} +{"epoch": 0, "iter": 20409, "iter_tflops": 14.955354933250565, "iter_time": 1.1063203582763672, "loss": 0.10566110908985138, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 16.270225343343725, "step_time": 1.0169136123657228} +{"epoch": 0, "iter": 20410, "iter_tflops": 18.59334842130604, "iter_time": 0.8898565902709961, "loss": 0.16228865087032318, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 21.53958577958793, "step_time": 0.7681398239135743} +{"epoch": 0, "iter": 20411, "iter_tflops": 29.73255886763583, "iter_time": 0.556474594116211, "loss": 0.21495366096496582, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 31.71633923018868, "step_time": 0.5216684532165528} +{"epoch": 0, "iter": 20412, "iter_tflops": 28.429124889789485, "iter_time": 0.5819881439208985, "loss": 0.12971599400043488, "lr": 3e-05, "seqlen": 6608.0, "step_tflops": 30.11896177762604, "step_time": 0.5493354568481444} +{"epoch": 0, "iter": 20413, "iter_tflops": 27.602277186538785, "iter_time": 0.7474417190551758, "loss": 0.48918184638023376, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.35459002599982, "step_time": 0.7028234252929687} +{"epoch": 0, "iter": 20414, "iter_tflops": 12.836783995558408, "iter_time": 1.607185531616211, "loss": 0.5952564477920532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.333850351058478, "step_time": 1.3454607315063478} +{"epoch": 0, "iter": 20415, "iter_tflops": 35.330952665463286, "iter_time": 0.5839382171630859, "loss": 0.4532760679721832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.45785689062163, "step_time": 0.5364597816467285} +{"epoch": 0, "iter": 20416, "iter_tflops": 36.76740437496097, "iter_time": 0.5611245574951171, "loss": 0.465029776096344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.10509124962112, "step_time": 0.5144257965087891} +{"epoch": 0, "iter": 20417, "iter_tflops": 17.805518368564474, "iter_time": 1.1586909790039064, "loss": 0.43797263503074646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.913524109058795, "step_time": 1.0908117065429688} +{"epoch": 0, "iter": 20418, "iter_tflops": 20.804177274898073, "iter_time": 0.9916803359985351, "loss": 0.28987187147140503, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.109375845186715, "step_time": 0.821648998260498} +{"epoch": 0, "iter": 20419, "iter_tflops": 46.39590534458636, "iter_time": 0.44467487716674803, "loss": 0.31904545426368713, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.2801277070345, "step_time": 0.4103230133056641} +{"epoch": 0, "iter": 20420, "iter_tflops": 46.05224208824709, "iter_time": 0.44799324798583984, "loss": 0.3600275218486786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.51879947510398, "step_time": 0.4166315364837646} +{"epoch": 0, "iter": 20421, "iter_tflops": 28.63503646450055, "iter_time": 0.720484275817871, "loss": 0.26699399948120117, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.39474623204365, "step_time": 0.6787716979980469} +{"epoch": 0, "iter": 20422, "iter_tflops": 24.343665141438258, "iter_time": 0.8474933166503906, "loss": 0.2857723832130432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.646588844551697, "step_time": 0.6959010906219482} +{"epoch": 0, "iter": 20423, "iter_tflops": 38.08558718721664, "iter_time": 0.5417034378051758, "loss": 0.23390324413776398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.48976059828522, "step_time": 0.49725747299194334} +{"epoch": 0, "iter": 20424, "iter_tflops": 40.48903919139084, "iter_time": 0.5095476188659668, "loss": 0.28555747866630554, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.84277815383709, "step_time": 0.4705699405670165} +{"epoch": 0, "iter": 20425, "iter_tflops": 14.305001737582893, "iter_time": 1.0964486389160155, "loss": 0.26915600895881653, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 15.317303972755814, "step_time": 1.0239856643676757} +{"epoch": 0, "iter": 20426, "iter_tflops": 14.136726805415057, "iter_time": 1.1095000915527344, "loss": 0.24992264807224274, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 16.76966832318216, "step_time": 0.9353017234802246} +{"epoch": 0, "iter": 20427, "iter_tflops": 27.404130125975016, "iter_time": 0.5723480224609374, "loss": 0.2940730154514313, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.244574180163884, "step_time": 0.5363285369873046} +{"epoch": 0, "iter": 20428, "iter_tflops": 27.404495427678427, "iter_time": 0.5723403930664062, "loss": 0.16613970696926117, "lr": 3e-05, "seqlen": 6272.0, "step_tflops": 29.208473296756562, "step_time": 0.5369914245605469} +{"epoch": 0, "iter": 20429, "iter_tflops": 41.50364990535751, "iter_time": 0.497091064453125, "loss": 0.2774847745895386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.38131653620988, "step_time": 0.45461646080017093} +{"epoch": 0, "iter": 20430, "iter_tflops": 43.737683796147934, "iter_time": 0.47170064163208003, "loss": 0.3044600784778595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.54399065069228, "step_time": 0.43393693351745605} +{"epoch": 0, "iter": 20431, "iter_tflops": 46.50585085564189, "iter_time": 0.4436236114501953, "loss": 0.34574607014656067, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21527749550877, "step_time": 0.41085292243957516} +{"epoch": 0, "iter": 20432, "iter_tflops": 45.7497579192552, "iter_time": 0.45095524978637697, "loss": 0.3215414583683014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.58085120506614, "step_time": 0.41611011123657227} +{"epoch": 0, "iter": 20433, "iter_tflops": 27.231049910785735, "iter_time": 0.757631217956543, "loss": 0.23367983102798462, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.786160636528514, "step_time": 0.7167018127441406} +{"epoch": 0, "iter": 20434, "iter_tflops": 15.184517780097597, "iter_time": 1.3586927032470704, "loss": 0.22594290971755981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.557080459512317, "step_time": 1.0549168395996094} +{"epoch": 0, "iter": 20435, "iter_tflops": 48.46297143669037, "iter_time": 0.42570838928222654, "loss": 0.31514179706573486, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.590250419708546, "step_time": 0.39229882621765133} +{"epoch": 0, "iter": 20436, "iter_tflops": 49.402778722430426, "iter_time": 0.41760998153686524, "loss": 0.2633073925971985, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.301920066547716, "step_time": 0.38706098175048825} +{"epoch": 0, "iter": 20437, "iter_tflops": 16.803891984020854, "iter_time": 1.227756851196289, "loss": 0.3412609398365021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.025151920560855, "step_time": 1.1445725173950194} +{"epoch": 0, "iter": 20438, "iter_tflops": 25.05743448316297, "iter_time": 0.8233521881103516, "loss": 0.17780594527721405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.49089533798674, "step_time": 0.6766312789916993} +{"epoch": 0, "iter": 20439, "iter_tflops": 40.07623594900686, "iter_time": 0.5147961883544923, "loss": 0.18934310972690582, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.91990701352601, "step_time": 0.46974356079101565} +{"epoch": 0, "iter": 20440, "iter_tflops": 40.90138285089317, "iter_time": 0.504410659790039, "loss": 0.1364193707704544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.42983854865001, "step_time": 0.46435220527648935} +{"epoch": 0, "iter": 20441, "iter_tflops": 26.717504623236284, "iter_time": 0.7721938781738281, "loss": 0.2804505527019501, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.16556020360309, "step_time": 0.7073786125183106} +{"epoch": 0, "iter": 20442, "iter_tflops": 22.72199059779541, "iter_time": 0.9079791412353515, "loss": 0.31692272424697876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.577499608176662, "step_time": 0.7481132736206055} +{"epoch": 0, "iter": 20443, "iter_tflops": 39.871507578291244, "iter_time": 0.5174395141601562, "loss": 0.33902648091316223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.65223585506754, "step_time": 0.4726239814758302} +{"epoch": 0, "iter": 20444, "iter_tflops": 41.58573381409089, "iter_time": 0.4961098823547363, "loss": 0.3625243902206421, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.3789132715555, "step_time": 0.4546405372619629} +{"epoch": 0, "iter": 20445, "iter_tflops": 31.33990282096112, "iter_time": 0.6583011322021484, "loss": 0.5187550783157349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.56177110365547, "step_time": 0.6147200469970704} +{"epoch": 0, "iter": 20446, "iter_tflops": 9.849317808539404, "iter_time": 2.094672332763672, "loss": 0.7042032480239868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.19191015221437, "step_time": 1.5639201049804687} +{"epoch": 0, "iter": 20447, "iter_tflops": 13.2045818683037, "iter_time": 1.5624192962646488, "loss": 0.471035361289978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.493932499278852, "step_time": 1.3315595321655274} +{"epoch": 0, "iter": 20448, "iter_tflops": 41.02649096901753, "iter_time": 0.502872486114502, "loss": 0.4494624137878418, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.273084403230904, "step_time": 0.46599629974365236} +{"epoch": 0, "iter": 20449, "iter_tflops": 20.646662582775658, "iter_time": 0.8590947570800782, "loss": 0.16375814378261566, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 21.85897713127985, "step_time": 0.8114487457275391} +{"epoch": 0, "iter": 20450, "iter_tflops": 15.125982621566129, "iter_time": 1.1726470947265626, "loss": 0.08403157442808151, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 19.12272026235322, "step_time": 0.9275583877563478} +{"epoch": 0, "iter": 20451, "iter_tflops": 30.17809878730128, "iter_time": 0.5877586822509766, "loss": 0.14129376411437988, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 32.11995553120016, "step_time": 0.5522249107360839} +{"epoch": 0, "iter": 20452, "iter_tflops": 31.473709831225722, "iter_time": 0.5635636749267579, "loss": 0.1975940465927124, "lr": 3e-05, "seqlen": 7072.0, "step_tflops": 33.44320808805036, "step_time": 0.530374942779541} +{"epoch": 0, "iter": 20453, "iter_tflops": 33.339380943033575, "iter_time": 0.6188205337524414, "loss": 0.6232110857963562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.62149365899333, "step_time": 0.5791754188537598} +{"epoch": 0, "iter": 20454, "iter_tflops": 11.163568638474777, "iter_time": 1.8480733337402342, "loss": 0.5507749319076538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.464970376996604, "step_time": 1.6551257553100585} +{"epoch": 0, "iter": 20455, "iter_tflops": 42.39140641426321, "iter_time": 0.4866810340881348, "loss": 0.625564455986023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.79196458147909, "step_time": 0.45053960227966305} +{"epoch": 0, "iter": 20456, "iter_tflops": 44.74825394106295, "iter_time": 0.46104801177978516, "loss": 0.7040956020355225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.215387401816926, "step_time": 0.4278943843841553} +{"epoch": 0, "iter": 20457, "iter_tflops": 38.981869586767786, "iter_time": 0.5292484359741211, "loss": 0.23828618228435516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.08513847690613, "step_time": 0.4902227783203125} +{"epoch": 0, "iter": 20458, "iter_tflops": 22.232505428795882, "iter_time": 0.9279697952270508, "loss": 0.12176969647407532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.91580526272742, "step_time": 0.8280323791503906} +{"epoch": 0, "iter": 20459, "iter_tflops": 40.789399749164374, "iter_time": 0.5057954673767089, "loss": 0.19426989555358887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.45029737561211, "step_time": 0.4641384811401367} +{"epoch": 0, "iter": 20460, "iter_tflops": 41.07012827072854, "iter_time": 0.5023381805419922, "loss": 0.16569484770298004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.59481190032756, "step_time": 0.46263438796997075} +{"epoch": 0, "iter": 20461, "iter_tflops": 21.097110578241264, "iter_time": 0.9779108581542969, "loss": 0.10002299398183823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.311301022031905, "step_time": 0.9246925354003906} +{"epoch": 0, "iter": 20462, "iter_tflops": 14.678440824193396, "iter_time": 1.4055371246337889, "loss": 0.061328206211328506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.556960246770643, "step_time": 1.1750948467254638} +{"epoch": 0, "iter": 20463, "iter_tflops": 48.3485269478069, "iter_time": 0.42671607208251955, "loss": 0.04060253128409386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.76321334715442, "step_time": 0.3910128326416016} +{"epoch": 0, "iter": 20464, "iter_tflops": 47.29290773604385, "iter_time": 0.4362407493591309, "loss": 0.03942742571234703, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.998860359506544, "step_time": 0.40454028511047363} +{"epoch": 0, "iter": 20465, "iter_tflops": 22.602963778131123, "iter_time": 0.9127605438232422, "loss": 0.5762288570404053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.632621008013327, "step_time": 0.8729921875} +{"epoch": 0, "iter": 20466, "iter_tflops": 12.621868864786725, "iter_time": 1.6345514068603515, "loss": 0.7173901200294495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.254867617903102, "step_time": 1.3524269123077393} +{"epoch": 0, "iter": 20467, "iter_tflops": 45.87056590688238, "iter_time": 0.4497675819396973, "loss": 0.5168110728263855, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.5856522242629, "step_time": 0.4160698223114013} +{"epoch": 0, "iter": 20468, "iter_tflops": 47.17026732078305, "iter_time": 0.4373749542236328, "loss": 0.4601115882396698, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.992377389816895, "step_time": 0.40459171676635747} +{"epoch": 0, "iter": 20469, "iter_tflops": 30.46405911683223, "iter_time": 0.6772273330688477, "loss": 0.05059864744544029, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.465914438853694, "step_time": 0.6354693489074708} +{"epoch": 0, "iter": 20470, "iter_tflops": 34.56083632215598, "iter_time": 0.596950065612793, "loss": 0.05056018382310867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.27252592113923, "step_time": 0.5122870502471923} +{"epoch": 0, "iter": 20471, "iter_tflops": 43.437528240653165, "iter_time": 0.47496011734008786, "loss": 0.07484439760446548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.813736263513825, "step_time": 0.4314888381958008} +{"epoch": 0, "iter": 20472, "iter_tflops": 45.163648552395365, "iter_time": 0.456807502746582, "loss": 0.04454972967505455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.58748714419843, "step_time": 0.4160544261932373} +{"epoch": 0, "iter": 20473, "iter_tflops": 14.930384172716346, "iter_time": 1.0889390106201173, "loss": 0.00698165874928236, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 15.765593470975341, "step_time": 1.031250602722168} +{"epoch": 0, "iter": 20474, "iter_tflops": 14.976612810630662, "iter_time": 1.0855777587890625, "loss": 0.024786729365587234, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 20.164985377644193, "step_time": 0.806262809753418} +{"epoch": 0, "iter": 20475, "iter_tflops": 38.41865167932932, "iter_time": 0.42318709945678706, "loss": 0.009705966338515282, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 42.38525484610268, "step_time": 0.3835833435058594} +{"epoch": 0, "iter": 20476, "iter_tflops": 38.495174990946005, "iter_time": 0.42234585952758796, "loss": 0.0017599054845049977, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 42.41539534263885, "step_time": 0.38331076812744136} +{"epoch": 0, "iter": 20477, "iter_tflops": 34.44905001760245, "iter_time": 0.5988871536254883, "loss": 0.6088324785232544, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.17510266896418, "step_time": 0.5404332160949706} +{"epoch": 0, "iter": 20478, "iter_tflops": 35.41122689442181, "iter_time": 0.5826144790649415, "loss": 0.4905824363231659, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.94437399961166, "step_time": 0.5297579956054688} +{"epoch": 0, "iter": 20479, "iter_tflops": 37.64704004168991, "iter_time": 0.5480136947631836, "loss": 0.7562156915664673, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.90125525508365, "step_time": 0.504412233352661} +{"epoch": 0, "iter": 20480, "iter_tflops": 37.95746005270157, "iter_time": 0.543531982421875, "loss": 0.35850176215171814, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.17375714764943, "step_time": 0.5010738620758056} +{"epoch": 0, "iter": 20481, "iter_tflops": 17.206827898343263, "iter_time": 1.1990062103271484, "loss": 0.4164818227291107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.26076198269793, "step_time": 1.1298046340942383} +{"epoch": 0, "iter": 20482, "iter_tflops": 17.549910265523877, "iter_time": 1.17556689453125, "loss": 0.4333621561527252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.15504416502976, "step_time": 0.9752328262329102} +{"epoch": 0, "iter": 20483, "iter_tflops": 41.772348461900314, "iter_time": 0.4938935508728028, "loss": 0.3505702316761017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.26819767976763, "step_time": 0.45575248336791996} +{"epoch": 0, "iter": 20484, "iter_tflops": 42.57089249017638, "iter_time": 0.4846291046142578, "loss": 0.46437209844589233, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.21485033236565, "step_time": 0.446416971206665} +{"epoch": 0, "iter": 20485, "iter_tflops": 40.851736034376955, "iter_time": 0.5050236663818358, "loss": 0.003773256903514266, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.53571636337874, "step_time": 0.4530749740600586} +{"epoch": 0, "iter": 20486, "iter_tflops": 38.00644998735482, "iter_time": 0.5428313751220704, "loss": 0.00906999222934246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78315398558707, "step_time": 0.4937658252716064} +{"epoch": 0, "iter": 20487, "iter_tflops": 49.40800187186863, "iter_time": 0.41756583404541014, "loss": 0.007184841204434633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.68109458942798, "step_time": 0.3772984733581543} +{"epoch": 0, "iter": 20488, "iter_tflops": 49.39636243556568, "iter_time": 0.4176642265319824, "loss": 0.008488047868013382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.48565329872774, "step_time": 0.3786518516540528} +{"epoch": 0, "iter": 20489, "iter_tflops": 25.091427914290147, "iter_time": 0.8222367248535156, "loss": 0.04626622051000595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.61453354031738, "step_time": 0.7751814804077148} +{"epoch": 0, "iter": 20490, "iter_tflops": 17.607328723116154, "iter_time": 1.1717333068847655, "loss": 0.02108805626630783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.02149991964819, "step_time": 0.93686141204834} +{"epoch": 0, "iter": 20491, "iter_tflops": 41.82214208269246, "iter_time": 0.4933055191040039, "loss": 0.01951352134346962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.34172510438158, "step_time": 0.44519476699829097} +{"epoch": 0, "iter": 20492, "iter_tflops": 44.600096532127814, "iter_time": 0.4625795707702637, "loss": 0.03911764547228813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.15366971298705, "step_time": 0.41972641372680664} +{"epoch": 0, "iter": 20493, "iter_tflops": 14.584152121392286, "iter_time": 1.4146241302490232, "loss": 0.5995028614997864, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.517236337194314, "step_time": 1.3295597915649415} +{"epoch": 0, "iter": 20494, "iter_tflops": 18.553277340588746, "iter_time": 1.1119918670654296, "loss": 0.7249640226364136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.338126311869487, "step_time": 0.8476861877441407} +{"epoch": 0, "iter": 20495, "iter_tflops": 45.69295481603017, "iter_time": 0.4515158538818359, "loss": 0.6040598750114441, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.37282618886293, "step_time": 0.4178633289337158} +{"epoch": 0, "iter": 20496, "iter_tflops": 46.389541991339954, "iter_time": 0.44473587417602534, "loss": 0.5395084023475647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.95747422926335, "step_time": 0.41297311019897465} +{"epoch": 0, "iter": 20497, "iter_tflops": 43.38266991254373, "iter_time": 0.4755607147216797, "loss": 0.625580906867981, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.290161911847854, "step_time": 0.4362660789489746} +{"epoch": 0, "iter": 20498, "iter_tflops": 44.43936569160186, "iter_time": 0.4642526550292969, "loss": 0.6399689316749573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.99153120860125, "step_time": 0.42989029502868653} +{"epoch": 0, "iter": 20499, "iter_tflops": 45.932535393436055, "iter_time": 0.4491607818603515, "loss": 0.6178107261657715, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.39670441499235, "step_time": 0.4176613349914551} +{"epoch": 0, "iter": 20500, "iter_tflops": 40.30005668072422, "iter_time": 0.5119370841979981, "loss": 0.5904913544654846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.90504785649655, "step_time": 0.4808546905517578} +{"epoch": 0, "iter": 20501, "iter_tflops": 26.312018549444918, "iter_time": 0.7840939102172853, "loss": 0.35366925597190857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.84130326472216, "step_time": 0.7410247039794922} +{"epoch": 0, "iter": 20502, "iter_tflops": 16.51148524734442, "iter_time": 1.249499557495117, "loss": 0.3947661519050598, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.186800623083133, "step_time": 1.0752753372192383} +{"epoch": 0, "iter": 20503, "iter_tflops": 45.6421474312257, "iter_time": 0.4520184669494629, "loss": 0.4122682511806488, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.43703635415091, "step_time": 0.41732059669494626} +{"epoch": 0, "iter": 20504, "iter_tflops": 47.811862037943676, "iter_time": 0.4315057525634767, "loss": 0.46051397919654846, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.806665337409314, "step_time": 0.3982324161529541} +{"epoch": 0, "iter": 20505, "iter_tflops": 30.70680484934189, "iter_time": 0.671873664855957, "loss": 0.42037129402160645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.645140418429705, "step_time": 0.6319805412292481} +{"epoch": 0, "iter": 20506, "iter_tflops": 14.836004662717716, "iter_time": 1.3906098022460938, "loss": 0.3265748620033264, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.441837716090852, "step_time": 1.118711368560791} +{"epoch": 0, "iter": 20507, "iter_tflops": 45.44018403407127, "iter_time": 0.4540275077819824, "loss": 0.443555623292923, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.25453565940188, "step_time": 0.4188668766021728} +{"epoch": 0, "iter": 20508, "iter_tflops": 49.0858672829727, "iter_time": 0.42030618286132815, "loss": 0.3972177803516388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.05346386587097, "step_time": 0.3888736381530762} +{"epoch": 0, "iter": 20509, "iter_tflops": 45.551443901283136, "iter_time": 0.4529185409545899, "loss": 0.06335581094026566, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26381893231119, "step_time": 0.410456148147583} +{"epoch": 0, "iter": 20510, "iter_tflops": 38.76380879379074, "iter_time": 0.5322256546020508, "loss": 0.06903056055307388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.280888709075235, "step_time": 0.47667906379699715} +{"epoch": 0, "iter": 20511, "iter_tflops": 40.773409121409635, "iter_time": 0.5059938316345215, "loss": 0.08746571093797684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.70118658794307, "step_time": 0.4615334644317627} +{"epoch": 0, "iter": 20512, "iter_tflops": 45.155414196110854, "iter_time": 0.4568908042907715, "loss": 0.04800331965088844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.66749688120742, "step_time": 0.41538420104980467} +{"epoch": 0, "iter": 20513, "iter_tflops": 17.55030960240378, "iter_time": 1.1755401458740233, "loss": 0.8008438348770142, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.833845444072114, "step_time": 1.0954265060424806} +{"epoch": 0, "iter": 20514, "iter_tflops": 15.38483269544791, "iter_time": 1.3410021362304687, "loss": 0.6128626465797424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.02741983601113, "step_time": 1.0301423587799072} +{"epoch": 0, "iter": 20515, "iter_tflops": 43.28222068492633, "iter_time": 0.47666439437866204, "loss": 0.5048533082008362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.60017900486824, "step_time": 0.44272562789916997} +{"epoch": 0, "iter": 20516, "iter_tflops": 44.15713545045874, "iter_time": 0.4672199249267578, "loss": 0.6544219255447388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52388509217202, "step_time": 0.43412051582336425} +{"epoch": 0, "iter": 20517, "iter_tflops": 32.18269099323006, "iter_time": 0.6410617904663086, "loss": 0.38012149930000305, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39006511214672, "step_time": 0.5999143486022949} +{"epoch": 0, "iter": 20518, "iter_tflops": 10.020517393436016, "iter_time": 2.058885055541992, "loss": 0.3545706868171692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.385813025585662, "step_time": 1.66570361328125} +{"epoch": 0, "iter": 20519, "iter_tflops": 11.378399921419573, "iter_time": 1.8131805572509765, "loss": 0.3558809161186218, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.756698308576379, "step_time": 1.617275333404541} +{"epoch": 0, "iter": 20520, "iter_tflops": 27.08104776355414, "iter_time": 0.7618277435302735, "loss": 0.41157448291778564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.15395393682423, "step_time": 0.6222815399169922} +{"epoch": 0, "iter": 20521, "iter_tflops": 15.307586379441082, "iter_time": 0.9765485382080078, "loss": 0.10727515816688538, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 16.913685695932394, "step_time": 0.8838168907165527} +{"epoch": 0, "iter": 20522, "iter_tflops": 22.521310995476558, "iter_time": 0.6637535934448243, "loss": 0.22715847194194794, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 24.25810577123085, "step_time": 0.6162311782836913} +{"epoch": 0, "iter": 20523, "iter_tflops": 24.04004524535005, "iter_time": 0.6218208389282227, "loss": 0.14212745428085327, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 25.7959379654483, "step_time": 0.5794943809509279} +{"epoch": 0, "iter": 20524, "iter_tflops": 27.43812052296758, "iter_time": 0.544811408996582, "loss": 0.19649408757686615, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.156193732712044, "step_time": 0.5127075653076172} +{"epoch": 0, "iter": 20525, "iter_tflops": 24.5609518692412, "iter_time": 0.8399956817626953, "loss": 0.5975404381752014, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.89347387891137, "step_time": 0.7967680816650391} +{"epoch": 0, "iter": 20526, "iter_tflops": 14.131065257806865, "iter_time": 1.459981475830078, "loss": 0.6136643886566162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.569209791312392, "step_time": 1.2451464958190916} +{"epoch": 0, "iter": 20527, "iter_tflops": 42.324984521908554, "iter_time": 0.48744479751586905, "loss": 0.46122679114341736, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.77162344687646, "step_time": 0.45073982429504394} +{"epoch": 0, "iter": 20528, "iter_tflops": 42.70079212287354, "iter_time": 0.48315481948852534, "loss": 0.5131476521492004, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.896647503704884, "step_time": 0.4495119934082031} +{"epoch": 0, "iter": 20529, "iter_tflops": 27.76254862407782, "iter_time": 0.7431267852783204, "loss": 0.02658437192440033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.29574696392531, "step_time": 0.704235107421875} +{"epoch": 0, "iter": 20530, "iter_tflops": 17.40333202096224, "iter_time": 1.185468017578125, "loss": 0.052580296993255615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.187070731126354, "step_time": 0.9737586555480957} +{"epoch": 0, "iter": 20531, "iter_tflops": 39.658784470067864, "iter_time": 0.520214973449707, "loss": 0.027934158220887184, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.15573880718273, "step_time": 0.4672347030639648} +{"epoch": 0, "iter": 20532, "iter_tflops": 45.12231816725494, "iter_time": 0.45722592163085934, "loss": 0.024185793474316597, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.93219367010305, "step_time": 0.4131821975708008} +{"epoch": 0, "iter": 20533, "iter_tflops": 19.55718767251458, "iter_time": 1.0549110565185547, "loss": 0.027979321777820587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.096315125826365, "step_time": 0.9779477310180664} +{"epoch": 0, "iter": 20534, "iter_tflops": 25.234976792244233, "iter_time": 0.8175594406127931, "loss": 0.05230925977230072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.84844520764867, "step_time": 0.7151544342041015} +{"epoch": 0, "iter": 20535, "iter_tflops": 50.92133564552839, "iter_time": 0.4051561737060546, "loss": 0.0256898645311594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.57975225251338, "step_time": 0.37119801139831543} +{"epoch": 0, "iter": 20536, "iter_tflops": 51.99381730119058, "iter_time": 0.3967989768981934, "loss": 0.021679982542991638, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.464156584660124, "step_time": 0.3653838958740234} +{"epoch": 0, "iter": 20537, "iter_tflops": 26.325173354689696, "iter_time": 0.7837020950317383, "loss": 0.10108808428049088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.67806077679544, "step_time": 0.7453951950073242} +{"epoch": 0, "iter": 20538, "iter_tflops": 12.5863229055884, "iter_time": 1.6391676635742187, "loss": 0.06302209198474884, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.667551186035904, "step_time": 1.237799919128418} +{"epoch": 0, "iter": 20539, "iter_tflops": 38.05655520402275, "iter_time": 0.5421166839599609, "loss": 0.10416411608457565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80405831612376, "step_time": 0.49351891517639157} +{"epoch": 0, "iter": 20540, "iter_tflops": 40.40098505927467, "iter_time": 0.5106581802368164, "loss": 0.07785126566886902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.48922280254411, "step_time": 0.4637323875427246} +{"epoch": 0, "iter": 20541, "iter_tflops": 22.906075072945683, "iter_time": 0.9006821746826172, "loss": 0.07411100715398788, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.60780395765085, "step_time": 0.838396369934082} +{"epoch": 0, "iter": 20542, "iter_tflops": 17.99769363880493, "iter_time": 1.1463187408447264, "loss": 0.05910729989409447, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.881074077664163, "step_time": 0.863909782409668} +{"epoch": 0, "iter": 20543, "iter_tflops": 41.513712189406185, "iter_time": 0.4969705772399902, "loss": 0.03975721448659897, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.94800621375645, "step_time": 0.44900954818725586} +{"epoch": 0, "iter": 20544, "iter_tflops": 41.518850183907745, "iter_time": 0.4969090766906738, "loss": 0.07103097438812256, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.83432998610823, "step_time": 0.45012316131591795} +{"epoch": 0, "iter": 20545, "iter_tflops": 33.36613636536386, "iter_time": 0.6183243179321288, "loss": 0.4394267201423645, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.87042996942825, "step_time": 0.5595566291809082} +{"epoch": 0, "iter": 20546, "iter_tflops": 38.21006804697329, "iter_time": 0.5399386749267578, "loss": 0.4614899456501007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.713907983895794, "step_time": 0.4945854873657226} +{"epoch": 0, "iter": 20547, "iter_tflops": 38.875600857116375, "iter_time": 0.5306951675415039, "loss": 0.3526306748390198, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.30815936022044, "step_time": 0.4876386451721192} +{"epoch": 0, "iter": 20548, "iter_tflops": 34.23554251746108, "iter_time": 0.6026220703124999, "loss": 0.4817034602165222, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.32245148531269, "step_time": 0.5527796993255616} +{"epoch": 0, "iter": 20549, "iter_tflops": 19.317022516880296, "iter_time": 1.068026580810547, "loss": 0.4895820617675781, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.69740251025162, "step_time": 0.9967962646484374} +{"epoch": 0, "iter": 20550, "iter_tflops": 19.725155775539704, "iter_time": 1.045928039550781, "loss": 0.5592700242996216, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.70431945491676, "step_time": 0.8703516483306885} +{"epoch": 0, "iter": 20551, "iter_tflops": 45.27403197107701, "iter_time": 0.45569375228881837, "loss": 0.4298018217086792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.95599370269146, "step_time": 0.42142119789123533} +{"epoch": 0, "iter": 20552, "iter_tflops": 43.85915586089323, "iter_time": 0.47039422225952154, "loss": 0.7287433743476868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.31646337753714, "step_time": 0.4360235748291016} +{"epoch": 0, "iter": 20553, "iter_tflops": 28.231334658161256, "iter_time": 0.7307870407104493, "loss": 0.07180151343345642, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.798751533808385, "step_time": 0.6923475799560547} +{"epoch": 0, "iter": 20554, "iter_tflops": 13.018614074431559, "iter_time": 1.584738082885742, "loss": 0.03523489087820053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.77869425635228, "step_time": 1.3075285682678224} +{"epoch": 0, "iter": 20555, "iter_tflops": 38.2584178514846, "iter_time": 0.539256317138672, "loss": 0.0525088757276535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.018777143330105, "step_time": 0.4909969997406006} +{"epoch": 0, "iter": 20556, "iter_tflops": 47.23736687155393, "iter_time": 0.4367536735534667, "loss": 0.07286150008440018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.87643429609939, "step_time": 0.3976968307495118} +{"epoch": 0, "iter": 20557, "iter_tflops": 18.120931170366116, "iter_time": 1.1385228118896484, "loss": 0.08119814842939377, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.16345166350185, "step_time": 1.0765854644775392} +{"epoch": 0, "iter": 20558, "iter_tflops": 13.92014213649801, "iter_time": 1.4821036529541014, "loss": 0.10234160721302032, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.38011261547264, "step_time": 1.0645497226715088} +{"epoch": 0, "iter": 20559, "iter_tflops": 39.14230588645897, "iter_time": 0.5270791549682616, "loss": 0.04971207305788994, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.87208003956264, "step_time": 0.48122445869445796} +{"epoch": 0, "iter": 20560, "iter_tflops": 39.04900697172455, "iter_time": 0.528338493347168, "loss": 0.09806273132562637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.672558509502736, "step_time": 0.4834744911193848} +{"epoch": 0, "iter": 20561, "iter_tflops": 23.435515387126753, "iter_time": 0.8803345336914064, "loss": 0.10747047513723373, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.04135227888395, "step_time": 0.8238809661865235} +{"epoch": 0, "iter": 20562, "iter_tflops": 13.203974763575541, "iter_time": 1.5624911346435546, "loss": 0.13904018700122833, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.751805058057746, "step_time": 1.3097605915069581} +{"epoch": 0, "iter": 20563, "iter_tflops": 49.73761752568375, "iter_time": 0.4147985877990723, "loss": 0.10973326861858368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.3070251566466, "step_time": 0.37989732360839845} +{"epoch": 0, "iter": 20564, "iter_tflops": 44.3740587239399, "iter_time": 0.46493591308593746, "loss": 0.0671067014336586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.73090044220897, "step_time": 0.4322376766204834} +{"epoch": 0, "iter": 20565, "iter_tflops": 33.05943764430604, "iter_time": 0.6240606307983398, "loss": 0.666318416595459, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.351803582807406, "step_time": 0.5835938034057617} +{"epoch": 0, "iter": 20566, "iter_tflops": 13.520874703264944, "iter_time": 1.5258697357177735, "loss": 0.4464903473854065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.38700467080912, "step_time": 1.2589911289215086} +{"epoch": 0, "iter": 20567, "iter_tflops": 37.95879209299622, "iter_time": 0.5435129089355468, "loss": 0.49726396799087524, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.09654604198206, "step_time": 0.5020152664184571} +{"epoch": 0, "iter": 20568, "iter_tflops": 33.107199639936205, "iter_time": 0.6231603317260742, "loss": 0.6756832599639893, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.74144202706366, "step_time": 0.5772317047119141} +{"epoch": 0, "iter": 20569, "iter_tflops": 16.78533601259906, "iter_time": 1.2291141204833984, "loss": 0.5678779482841492, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.717261590227835, "step_time": 1.1644628829956054} +{"epoch": 0, "iter": 20570, "iter_tflops": 16.56913765837607, "iter_time": 1.2451519165039062, "loss": 0.8330885767936707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.97152965103736, "step_time": 0.8606498546600343} +{"epoch": 0, "iter": 20571, "iter_tflops": 44.594225041454735, "iter_time": 0.4626404762268067, "loss": 0.6939573287963867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.243939380704965, "step_time": 0.42764114570617673} +{"epoch": 0, "iter": 20572, "iter_tflops": 43.28021972131855, "iter_time": 0.47668643188476567, "loss": 0.49506935477256775, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.532808636064985, "step_time": 0.44336660766601566} +{"epoch": 0, "iter": 20573, "iter_tflops": 33.17829551947273, "iter_time": 0.6218249969482422, "loss": 0.10346407443284988, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.47182393084349, "step_time": 0.5816191902160645} +{"epoch": 0, "iter": 20574, "iter_tflops": 8.249756535298738, "iter_time": 2.5008124084472656, "loss": 0.11747504770755768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.29563583960788, "step_time": 2.0038678359985354} +{"epoch": 0, "iter": 20575, "iter_tflops": 12.15155894076761, "iter_time": 1.6978145446777342, "loss": 0.11267123371362686, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.155605707637166, "step_time": 1.3612846565246581} +{"epoch": 0, "iter": 20576, "iter_tflops": 35.24090591756259, "iter_time": 0.5854302825927734, "loss": 0.10984209179878235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.68436705731611, "step_time": 0.4722763519287109} +{"epoch": 0, "iter": 20577, "iter_tflops": 14.807638477633844, "iter_time": 1.0122785873413087, "loss": 0.18656352162361145, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 15.981688667495563, "step_time": 0.937914363861084} +{"epoch": 0, "iter": 20578, "iter_tflops": 9.809792345970688, "iter_time": 1.5280094451904296, "loss": 0.3217677175998688, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 11.49015749109665, "step_time": 1.3045474243164064} +{"epoch": 0, "iter": 20579, "iter_tflops": 25.845029147164023, "iter_time": 0.5799744033813476, "loss": 0.19991044700145721, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 27.577753982265133, "step_time": 0.5435343055725098} +{"epoch": 0, "iter": 20580, "iter_tflops": 27.471329859825595, "iter_time": 0.5456399612426757, "loss": 0.3289972245693207, "lr": 3e-05, "seqlen": 6000.0, "step_tflops": 29.228402056551662, "step_time": 0.5128386878967285} +{"epoch": 0, "iter": 20581, "iter_tflops": 31.65950496573839, "iter_time": 0.6516555938720703, "loss": 0.3803248107433319, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.212939384859936, "step_time": 0.6030201988220215} +{"epoch": 0, "iter": 20582, "iter_tflops": 12.039409051442737, "iter_time": 1.7136300811767577, "loss": 0.5904673337936401, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.44857249440878, "step_time": 1.4278983974456787} +{"epoch": 0, "iter": 20583, "iter_tflops": 37.177264205684395, "iter_time": 0.5549384536743164, "loss": 0.6381596326828003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.12159823140599, "step_time": 0.5142141494750976} +{"epoch": 0, "iter": 20584, "iter_tflops": 36.59231454338841, "iter_time": 0.5638094711303712, "loss": 0.4194941222667694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.735209443626246, "step_time": 0.5192144145965576} +{"epoch": 0, "iter": 20585, "iter_tflops": 21.06544438816188, "iter_time": 0.9793808822631835, "loss": 0.44549477100372314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.553591733632857, "step_time": 0.9147586669921874} +{"epoch": 0, "iter": 20586, "iter_tflops": 10.863400563880226, "iter_time": 1.8991376953125, "loss": 0.3773755133152008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.028888600571172, "step_time": 1.5834883651733398} +{"epoch": 0, "iter": 20587, "iter_tflops": 10.096057937882286, "iter_time": 2.0434801025390623, "loss": 0.2950577139854431, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.957027507267245, "step_time": 1.592270565032959} +{"epoch": 0, "iter": 20588, "iter_tflops": 37.87661967759805, "iter_time": 0.5446920471191407, "loss": 0.3557506799697876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.36796060195487, "step_time": 0.49872155189514156} +{"epoch": 0, "iter": 20589, "iter_tflops": 13.4709284416522, "iter_time": 1.1704154510498046, "loss": 0.18117664754390717, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 14.52094664436955, "step_time": 1.0857820205688478} +{"epoch": 0, "iter": 20590, "iter_tflops": 9.818504074707414, "iter_time": 1.605802947998047, "loss": 0.15963269770145416, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 11.66077509709071, "step_time": 1.3521041831970213} +{"epoch": 0, "iter": 20591, "iter_tflops": 24.457958723066167, "iter_time": 0.6446401748657227, "loss": 0.13666774332523346, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 26.411022658356366, "step_time": 0.5969697952270508} +{"epoch": 0, "iter": 20592, "iter_tflops": 24.818692748374687, "iter_time": 0.6352704772949217, "loss": 0.10374359786510468, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 26.632148280854665, "step_time": 0.5920131797790527} +{"epoch": 0, "iter": 20593, "iter_tflops": 14.583348461743302, "iter_time": 1.4147020874023437, "loss": 0.32358065247535706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.717727302586008, "step_time": 1.3126002960205077} +{"epoch": 0, "iter": 20594, "iter_tflops": 25.43514269732414, "iter_time": 0.8111255264282228, "loss": 0.3491264283657074, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.33935789913753, "step_time": 0.5525294132232667} +{"epoch": 0, "iter": 20595, "iter_tflops": 50.44528387521203, "iter_time": 0.40897962951660155, "loss": 0.22870956361293793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.56167639955543, "step_time": 0.37812425994873045} +{"epoch": 0, "iter": 20596, "iter_tflops": 46.547788317756996, "iter_time": 0.44322392654418946, "loss": 0.33594274520874023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.9849210879163, "step_time": 0.4127463455200195} +{"epoch": 0, "iter": 20597, "iter_tflops": 40.88240379933072, "iter_time": 0.5046448249816895, "loss": 0.14134663343429565, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.52535871177478, "step_time": 0.4633560314178467} +{"epoch": 0, "iter": 20598, "iter_tflops": 47.227086766333, "iter_time": 0.4368487434387207, "loss": 0.08474945276975632, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.18836427586666, "step_time": 0.40304264068603524} +{"epoch": 0, "iter": 20599, "iter_tflops": 52.41218068154447, "iter_time": 0.3936316566467285, "loss": 0.12527061998844147, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.69601827797514, "step_time": 0.36388963699340815} +{"epoch": 0, "iter": 20600, "iter_tflops": 49.03573705856365, "iter_time": 0.42073587036132815, "loss": 0.09468438476324081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.25694997614278, "step_time": 0.38738781547546386} +{"epoch": 0, "iter": 20601, "iter_tflops": 20.34442159288267, "iter_time": 1.014090934753418, "loss": 0.2619316279888153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.224705307760424, "step_time": 0.9720320358276366} +{"epoch": 0, "iter": 20602, "iter_tflops": 14.863666232224537, "iter_time": 1.3880218505859374, "loss": 0.3332327902317047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.802472870095556, "step_time": 0.9917615871429443} +{"epoch": 0, "iter": 20603, "iter_tflops": 37.971849970701264, "iter_time": 0.5433260040283203, "loss": 0.44741126894950867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.53674014449246, "step_time": 0.49669505691528315} +{"epoch": 0, "iter": 20604, "iter_tflops": 38.119851599578006, "iter_time": 0.5412165222167968, "loss": 0.3990587890148163, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.71238558495288, "step_time": 0.49460353851318367} +{"epoch": 0, "iter": 20605, "iter_tflops": 20.062597481319827, "iter_time": 1.0283361129760742, "loss": 0.2343701273202896, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.380445230511285, "step_time": 0.9649515380859375} +{"epoch": 0, "iter": 20606, "iter_tflops": 15.741609658848084, "iter_time": 1.31060888671875, "loss": 0.21154259145259857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.997543149902544, "step_time": 1.0316814098358154} +{"epoch": 0, "iter": 20607, "iter_tflops": 48.09339745590712, "iter_time": 0.42897974777221676, "loss": 0.2575604319572449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.35965721775397, "step_time": 0.3940265197753906} +{"epoch": 0, "iter": 20608, "iter_tflops": 40.982758411406536, "iter_time": 0.5034090995788574, "loss": 0.19601841270923615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.95272732691635, "step_time": 0.4589508743286133} +{"epoch": 0, "iter": 20609, "iter_tflops": 21.720743983727843, "iter_time": 0.9498336486816406, "loss": 0.36643579602241516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.904802305985182, "step_time": 0.9007322235107422} +{"epoch": 0, "iter": 20610, "iter_tflops": 13.300851101035303, "iter_time": 1.5511107788085936, "loss": 0.5471289157867432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.902832786107137, "step_time": 1.2205701713562014} +{"epoch": 0, "iter": 20611, "iter_tflops": 49.126809730261684, "iter_time": 0.41995589828491214, "loss": 0.38999953866004944, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.33106575010779, "step_time": 0.3868494510650635} +{"epoch": 0, "iter": 20612, "iter_tflops": 50.36974069667779, "iter_time": 0.4095930061340332, "loss": 0.4469745457172394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.67669614244517, "step_time": 0.37732882499694825} +{"epoch": 0, "iter": 20613, "iter_tflops": 37.736050873271715, "iter_time": 0.5467210540771484, "loss": 0.046685896813869476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.754442362540395, "step_time": 0.5062293167114258} +{"epoch": 0, "iter": 20614, "iter_tflops": 39.25815797423294, "iter_time": 0.5255237274169922, "loss": 0.019601060077548027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.59288845784751, "step_time": 0.473267412185669} +{"epoch": 0, "iter": 20615, "iter_tflops": 42.398526533234474, "iter_time": 0.4865993041992187, "loss": 0.02048993483185768, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.61571980506955, "step_time": 0.44257803153991704} +{"epoch": 0, "iter": 20616, "iter_tflops": 48.069360310825466, "iter_time": 0.4291942596435547, "loss": 0.017160015180706978, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.79755979003943, "step_time": 0.390758466720581} +{"epoch": 0, "iter": 20617, "iter_tflops": 35.29118515725236, "iter_time": 0.5845962219238281, "loss": 0.38257405161857605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.92179579033802, "step_time": 0.5300653038024903} +{"epoch": 0, "iter": 20618, "iter_tflops": 36.83837717564879, "iter_time": 0.5600434951782226, "loss": 0.38695016503334045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.157735922121674, "step_time": 0.5137514114379882} +{"epoch": 0, "iter": 20619, "iter_tflops": 37.68027278361444, "iter_time": 0.5475303649902343, "loss": 0.35768866539001465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01022018616365, "step_time": 0.5030720005035401} +{"epoch": 0, "iter": 20620, "iter_tflops": 42.781933002624385, "iter_time": 0.48223846054077146, "loss": 0.26808080077171326, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.47949170132746, "step_time": 0.4438751964569092} +{"epoch": 0, "iter": 20621, "iter_tflops": 22.022152713230156, "iter_time": 0.9368336410522461, "loss": 0.10351460427045822, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.119541805793034, "step_time": 0.8923660202026367} +{"epoch": 0, "iter": 20622, "iter_tflops": 19.437251060634406, "iter_time": 1.0614203338623047, "loss": 0.12474533915519714, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.78562707357554, "step_time": 0.9470048046112062} +{"epoch": 0, "iter": 20623, "iter_tflops": 47.80773917433004, "iter_time": 0.4315429649353028, "loss": 0.12599661946296692, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.99010541700149, "step_time": 0.3968273067474365} +{"epoch": 0, "iter": 20624, "iter_tflops": 48.21283148007588, "iter_time": 0.4279170684814453, "loss": 0.15136389434337616, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.32095963936713, "step_time": 0.39431794929504393} +{"epoch": 0, "iter": 20625, "iter_tflops": 33.366559597162855, "iter_time": 0.6183164749145508, "loss": 0.08483785390853882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.542127735939644, "step_time": 0.5804687232971191} +{"epoch": 0, "iter": 20626, "iter_tflops": 9.12992075221701, "iter_time": 2.259723175048828, "loss": 0.08630941808223724, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.681874110478429, "step_time": 1.9314114074707032} +{"epoch": 0, "iter": 20627, "iter_tflops": 14.810802004427995, "iter_time": 1.392976119995117, "loss": 0.08453042060136795, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.598539340361288, "step_time": 1.2429463272094727} +{"epoch": 0, "iter": 20628, "iter_tflops": 47.65050435313258, "iter_time": 0.4329669494628907, "loss": 0.06893334537744522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.31933959356303, "step_time": 0.3798111991882324} +{"epoch": 0, "iter": 20629, "iter_tflops": 16.972056877025324, "iter_time": 0.8807772216796875, "loss": 0.17381593585014343, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 17.828423466732477, "step_time": 0.8384701614379884} +{"epoch": 0, "iter": 20630, "iter_tflops": 11.372332957955635, "iter_time": 1.3144709320068357, "loss": 0.2392774224281311, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 14.482408738689804, "step_time": 1.0321902503967286} +{"epoch": 0, "iter": 20631, "iter_tflops": 27.345475757914635, "iter_time": 0.5466571960449219, "loss": 0.24707670509815216, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 29.099719109675746, "step_time": 0.5137025909423828} +{"epoch": 0, "iter": 20632, "iter_tflops": 26.490257080254064, "iter_time": 0.5643056259155274, "loss": 0.24839960038661957, "lr": 3e-05, "seqlen": 5984.0, "step_tflops": 28.125046000197017, "step_time": 0.5315049476623535} +{"epoch": 0, "iter": 20633, "iter_tflops": 43.95653923104003, "iter_time": 0.46935208892822267, "loss": 0.03182158246636391, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.237107872056036, "step_time": 0.4277017097473145} +{"epoch": 0, "iter": 20634, "iter_tflops": 47.42622407831202, "iter_time": 0.43501446533203125, "loss": 0.02364707924425602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.13620833114991, "step_time": 0.3957152652740478} +{"epoch": 0, "iter": 20635, "iter_tflops": 57.42816206434982, "iter_time": 0.35925045776367187, "loss": 0.045580312609672546, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.63016524074856, "step_time": 0.329411449432373} +{"epoch": 0, "iter": 20636, "iter_tflops": 61.02094939192819, "iter_time": 0.3380985336303711, "loss": 0.07508943974971771, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 66.6133238379181, "step_time": 0.30971421813964845} +{"epoch": 0, "iter": 20637, "iter_tflops": 25.88716779433189, "iter_time": 0.7969621734619141, "loss": 0.5330994725227356, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.270013334209562, "step_time": 0.756548713684082} +{"epoch": 0, "iter": 20638, "iter_tflops": 12.194368012427692, "iter_time": 1.6918542633056641, "loss": 0.42850756645202637, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.752832984608343, "step_time": 1.3096751251220704} +{"epoch": 0, "iter": 20639, "iter_tflops": 15.02304306547913, "iter_time": 1.373296569824219, "loss": 0.3354429006576538, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.543812499138767, "step_time": 1.1759754905700683} +{"epoch": 0, "iter": 20640, "iter_tflops": 22.883191728861494, "iter_time": 0.9015828628540039, "loss": 0.34927958250045776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.215024649413106, "step_time": 0.7312094802856446} +{"epoch": 0, "iter": 20641, "iter_tflops": 12.47074230085146, "iter_time": 1.3070018310546876, "loss": 0.22934284806251526, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 13.108923394035207, "step_time": 1.2433731231689453} +{"epoch": 0, "iter": 20642, "iter_tflops": 11.554200638479601, "iter_time": 1.4106802825927733, "loss": 0.2938190996646881, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 13.572135166455904, "step_time": 1.200937274932861} +{"epoch": 0, "iter": 20643, "iter_tflops": 25.09574833790199, "iter_time": 0.6494838409423829, "loss": 0.20806778967380524, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 27.024183823334432, "step_time": 0.6031369209289551} +{"epoch": 0, "iter": 20644, "iter_tflops": 24.547566243544768, "iter_time": 0.6639877395629883, "loss": 0.237986221909523, "lr": 3e-05, "seqlen": 6512.0, "step_tflops": 26.240795734006223, "step_time": 0.6211428642272949} +{"epoch": 0, "iter": 20645, "iter_tflops": 19.13183774177641, "iter_time": 1.0783644409179687, "loss": 0.11702809482812881, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.660847071926145, "step_time": 0.9985599060058594} +{"epoch": 0, "iter": 20646, "iter_tflops": 23.181988423656044, "iter_time": 0.8899622039794922, "loss": 0.0794188380241394, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.29567696712582, "step_time": 0.7291252841949463} +{"epoch": 0, "iter": 20647, "iter_tflops": 40.012341670868466, "iter_time": 0.5156182479858398, "loss": 0.10008647292852402, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8460588662373, "step_time": 0.47053473091125486} +{"epoch": 0, "iter": 20648, "iter_tflops": 40.37295144954644, "iter_time": 0.5110127639770509, "loss": 0.09170298278331757, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.309944085886926, "step_time": 0.4656086559295654} +{"epoch": 0, "iter": 20649, "iter_tflops": 26.00025633903756, "iter_time": 0.7934957733154296, "loss": 0.5933738350868225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.601175523539183, "step_time": 0.7213372573852539} +{"epoch": 0, "iter": 20650, "iter_tflops": 35.69993730515741, "iter_time": 0.5779027938842773, "loss": 0.5143027305603027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.75792120216799, "step_time": 0.5323065032958985} +{"epoch": 0, "iter": 20651, "iter_tflops": 38.89151825420461, "iter_time": 0.5304779663085937, "loss": 0.4838724732398987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.37841607688522, "step_time": 0.48683021736145016} +{"epoch": 0, "iter": 20652, "iter_tflops": 36.246726053946034, "iter_time": 0.5691850204467773, "loss": 0.5194961428642273, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.29387283340442, "step_time": 0.5250460700988768} +{"epoch": 0, "iter": 20653, "iter_tflops": 19.940094949850067, "iter_time": 1.0346537246704102, "loss": 0.5859399437904358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.175428952379534, "step_time": 0.9742940063476563} +{"epoch": 0, "iter": 20654, "iter_tflops": 19.091968711409926, "iter_time": 1.0806163482666016, "loss": 0.5125126242637634, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.04144134368604, "step_time": 0.8953907527923582} +{"epoch": 0, "iter": 20655, "iter_tflops": 47.835022030663715, "iter_time": 0.43129683303833005, "loss": 0.7119815349578857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.511608177117374, "step_time": 0.4005134811401367} +{"epoch": 0, "iter": 20656, "iter_tflops": 45.44761935317321, "iter_time": 0.4539532279968262, "loss": 0.6179741024971008, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.07722203546479, "step_time": 0.4203802223205566} +{"epoch": 0, "iter": 20657, "iter_tflops": 25.912444845968327, "iter_time": 0.7961847534179688, "loss": 0.040429696440696716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.321494779992133, "step_time": 0.755123161315918} +{"epoch": 0, "iter": 20658, "iter_tflops": 17.709491869191453, "iter_time": 1.1649737701416016, "loss": 0.049242667853832245, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.778655952737136, "step_time": 1.043098861694336} +{"epoch": 0, "iter": 20659, "iter_tflops": 44.85879810826413, "iter_time": 0.459911865234375, "loss": 0.03744367137551308, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.76880799087009, "step_time": 0.4230387077331543} +{"epoch": 0, "iter": 20660, "iter_tflops": 53.34969811419382, "iter_time": 0.3867143440246582, "loss": 0.06713149696588516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.08104296953686, "step_time": 0.355212173461914} +{"epoch": 0, "iter": 20661, "iter_tflops": 28.853138085038218, "iter_time": 0.7150381164550782, "loss": 0.013927260413765907, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.502947259614228, "step_time": 0.6763639373779297} +{"epoch": 0, "iter": 20662, "iter_tflops": 14.236338134682677, "iter_time": 1.4491854095458985, "loss": 0.017534911632537842, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.656488963794725, "step_time": 1.2386219902038573} +{"epoch": 0, "iter": 20663, "iter_tflops": 42.72289867591425, "iter_time": 0.48290481567382815, "loss": 0.029816124588251114, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.27742446660171, "step_time": 0.43638361740112297} +{"epoch": 0, "iter": 20664, "iter_tflops": 48.22199476839172, "iter_time": 0.42783575439453125, "loss": 0.015606128610670567, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.01779652521864, "step_time": 0.3891352500915527} +{"epoch": 0, "iter": 20665, "iter_tflops": 18.29923670920044, "iter_time": 1.1274291839599608, "loss": 0.05845494195818901, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.55692699703612, "step_time": 1.0549251174926757} +{"epoch": 0, "iter": 20666, "iter_tflops": 31.65378115250853, "iter_time": 0.6517734298706054, "loss": 0.11926239728927612, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.905507256738844, "step_time": 0.5910555419921875} +{"epoch": 0, "iter": 20667, "iter_tflops": 51.92060427969435, "iter_time": 0.39735850143432616, "loss": 0.15153607726097107, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.74128241236924, "step_time": 0.3635993518829346} +{"epoch": 0, "iter": 20668, "iter_tflops": 50.21060364108373, "iter_time": 0.41089116668701164, "loss": 0.06843986362218857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.32732429790277, "step_time": 0.37975537681579585} +{"epoch": 0, "iter": 20669, "iter_tflops": 28.27146694167553, "iter_time": 0.7297496643066407, "loss": 0.16628123819828033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.056231069788044, "step_time": 0.6864165191650391} +{"epoch": 0, "iter": 20670, "iter_tflops": 9.738389375585461, "iter_time": 2.1185324096679685, "loss": 0.13203613460063934, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.091158813836854, "step_time": 1.8601386795043942} +{"epoch": 0, "iter": 20671, "iter_tflops": 12.291532852478342, "iter_time": 1.6784801177978514, "loss": 0.17123088240623474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.512628643490315, "step_time": 1.4215959091186523} +{"epoch": 0, "iter": 20672, "iter_tflops": 45.597822922133886, "iter_time": 0.45245786285400386, "loss": 0.16405898332595825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.553696125487, "step_time": 0.4163381366729736} +{"epoch": 0, "iter": 20673, "iter_tflops": 22.284385709941116, "iter_time": 0.6616471633911133, "loss": 0.2995202839374542, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.677132847663156, "step_time": 0.6227274513244629} +{"epoch": 0, "iter": 20674, "iter_tflops": 12.99545743232713, "iter_time": 1.1345811157226562, "loss": 0.2217877358198166, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 15.31742687382968, "step_time": 0.9625899124145508} +{"epoch": 0, "iter": 20675, "iter_tflops": 22.940269577839583, "iter_time": 0.6427300491333008, "loss": 0.06389791518449783, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.710956141008413, "step_time": 0.5966746292114258} +{"epoch": 0, "iter": 20676, "iter_tflops": 22.356829346301666, "iter_time": 0.6595032043457031, "loss": 0.1770007610321045, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.06949367997764, "step_time": 0.6125762672424315} +{"epoch": 0, "iter": 20677, "iter_tflops": 21.368038981056863, "iter_time": 0.9655117874145508, "loss": 0.022927874699234962, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.74499558135918, "step_time": 0.9070607833862305} +{"epoch": 0, "iter": 20678, "iter_tflops": 16.617528751748, "iter_time": 1.2415259704589843, "loss": 0.019919874146580696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.67586179659204, "step_time": 0.9518003807067872} +{"epoch": 0, "iter": 20679, "iter_tflops": 55.313437548894406, "iter_time": 0.37298519897460936, "loss": 0.031726714223623276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.878859227781575, "step_time": 0.3388876495361328} +{"epoch": 0, "iter": 20680, "iter_tflops": 51.40527801350478, "iter_time": 0.4013419303894043, "loss": 0.012973872944712639, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.47165367012269, "step_time": 0.37192137145996096} +{"epoch": 0, "iter": 20681, "iter_tflops": 3.0612083988059324, "iter_time": 0.4478428230285645, "loss": 1.2010236978530884, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.3523886382599914, "step_time": 0.4089442958831787} +{"epoch": 0, "iter": 20682, "iter_tflops": 3.0475073843053004, "iter_time": 0.4498562393188476, "loss": 1.2969847917556763, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.3456434877951, "step_time": 0.4097687683105469} +{"epoch": 0, "iter": 20683, "iter_tflops": 3.2556490213697353, "iter_time": 0.4210958251953125, "loss": 1.079056739807129, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.531885567815049, "step_time": 0.38816099357604983} +{"epoch": 0, "iter": 20684, "iter_tflops": 3.4262604777839094, "iter_time": 0.4001272583007812, "loss": 1.2697687149047852, "lr": 3e-05, "seqlen": 560.0, "step_tflops": 3.7103844296724136, "step_time": 0.36948737716674807} +{"epoch": 0, "iter": 20685, "iter_tflops": 35.76183214150438, "iter_time": 0.576902587890625, "loss": 0.07603935152292252, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.47672045391208, "step_time": 0.53619677734375} +{"epoch": 0, "iter": 20686, "iter_tflops": 17.061366731381195, "iter_time": 1.2092286529541016, "loss": 0.11030448228120804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.881999173380695, "step_time": 0.9428340320587159} +{"epoch": 0, "iter": 20687, "iter_tflops": 40.0651840153267, "iter_time": 0.5149381942749023, "loss": 0.08634786307811737, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.247039609659225, "step_time": 0.46627059555053707} +{"epoch": 0, "iter": 20688, "iter_tflops": 42.64568370714514, "iter_time": 0.48377917098999024, "loss": 0.1079910472035408, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.85410579602123, "step_time": 0.4403262672424316} +{"epoch": 0, "iter": 20689, "iter_tflops": 21.04638008727054, "iter_time": 0.9802680282592774, "loss": 0.12202232331037521, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.65979407311886, "step_time": 0.9104713592529297} +{"epoch": 0, "iter": 20690, "iter_tflops": 16.68149564126868, "iter_time": 1.2367652130126954, "loss": 0.08498632907867432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.432565024889527, "step_time": 0.9626049652099609} +{"epoch": 0, "iter": 20691, "iter_tflops": 36.76322308464407, "iter_time": 0.5611883773803712, "loss": 0.13192306458950043, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.08181811754679, "step_time": 0.5147244930267334} +{"epoch": 0, "iter": 20692, "iter_tflops": 39.0373324780886, "iter_time": 0.5284964981079101, "loss": 0.12363141775131226, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.81051727713978, "step_time": 0.48191647338867183} +{"epoch": 0, "iter": 20693, "iter_tflops": 19.220473907005573, "iter_time": 1.0733915100097655, "loss": 0.1817425936460495, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.325335013394245, "step_time": 1.0150432205200195} +{"epoch": 0, "iter": 20694, "iter_tflops": 13.50359027390673, "iter_time": 1.5278228302001953, "loss": 0.19126124680042267, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.046802158485143, "step_time": 1.2102618026733398} +{"epoch": 0, "iter": 20695, "iter_tflops": 39.88356465050405, "iter_time": 0.5172830886840821, "loss": 0.30803871154785156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77758594739061, "step_time": 0.47127069854736325} +{"epoch": 0, "iter": 20696, "iter_tflops": 39.64084175204577, "iter_time": 0.520450439453125, "loss": 0.21492555737495422, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.05548241993842, "step_time": 0.4791745986938477} +{"epoch": 0, "iter": 20697, "iter_tflops": 28.119863289763508, "iter_time": 0.7336839904785155, "loss": 0.4073159098625183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.84358808457554, "step_time": 0.6688940811157227} +{"epoch": 0, "iter": 20698, "iter_tflops": 34.273734138448376, "iter_time": 0.6019505615234375, "loss": 0.4567088484764099, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.35989126207063, "step_time": 0.5378298225402832} +{"epoch": 0, "iter": 20699, "iter_tflops": 40.46931304920467, "iter_time": 0.5097959899902345, "loss": 0.5151668787002563, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55145446524454, "step_time": 0.463084623336792} +{"epoch": 0, "iter": 20700, "iter_tflops": 39.82134413891597, "iter_time": 0.5180913391113281, "loss": 0.329261839389801, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.354571341185235, "step_time": 0.4758689308166504} +{"epoch": 0, "iter": 20701, "iter_tflops": 20.875697139522575, "iter_time": 0.9882828521728515, "loss": 0.36128848791122437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.650975291427365, "step_time": 0.9108258361816406} +{"epoch": 0, "iter": 20702, "iter_tflops": 18.942809009181214, "iter_time": 1.0891253509521484, "loss": 0.4822843372821808, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.85694876310683, "step_time": 0.9026180057525635} +{"epoch": 0, "iter": 20703, "iter_tflops": 39.73923142607967, "iter_time": 0.5191618652343749, "loss": 0.4854583442211151, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.57172707061095, "step_time": 0.4734972629547119} +{"epoch": 0, "iter": 20704, "iter_tflops": 43.097361874452396, "iter_time": 0.47870896530151363, "loss": 0.41606998443603516, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81193531059503, "step_time": 0.44072293472290036} +{"epoch": 0, "iter": 20705, "iter_tflops": 14.851640317728219, "iter_time": 1.3891457824707032, "loss": 0.36815351247787476, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.650798760061807, "step_time": 1.3182134552001954} +{"epoch": 0, "iter": 20706, "iter_tflops": 20.22184975276796, "iter_time": 1.0202377014160158, "loss": 0.2675897181034088, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.85166368677426, "step_time": 0.830169511795044} +{"epoch": 0, "iter": 20707, "iter_tflops": 49.65337247594315, "iter_time": 0.4155023612976075, "loss": 0.26985564827919006, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64872026492373, "step_time": 0.38455891227722167} +{"epoch": 0, "iter": 20708, "iter_tflops": 48.44725603000536, "iter_time": 0.42584648132324215, "loss": 0.3640983998775482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.275967691531655, "step_time": 0.39465732383728025} +{"epoch": 0, "iter": 20709, "iter_tflops": 31.272314850358853, "iter_time": 0.6597238998413085, "loss": 0.2624630033969879, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.260838291113345, "step_time": 0.620281826019287} +{"epoch": 0, "iter": 20710, "iter_tflops": 15.378651585926466, "iter_time": 1.3415411224365232, "loss": 0.37019407749176025, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.19091611136313, "step_time": 1.0218007640838622} +{"epoch": 0, "iter": 20711, "iter_tflops": 39.41964004694271, "iter_time": 0.5233709259033202, "loss": 0.2641698122024536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.15567398935653, "step_time": 0.47806213188171387} +{"epoch": 0, "iter": 20712, "iter_tflops": 43.263433048600106, "iter_time": 0.47687139129638667, "loss": 0.3271697461605072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.26767738418187, "step_time": 0.43647360420227044} +{"epoch": 0, "iter": 20713, "iter_tflops": 21.842879723583554, "iter_time": 0.9445225982666017, "loss": 0.4418434798717499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.327475368413126, "step_time": 0.8844117584228515} +{"epoch": 0, "iter": 20714, "iter_tflops": 10.934755658952286, "iter_time": 1.8867448120117187, "loss": 0.6606463193893433, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.244119490040415, "step_time": 1.6849797592163085} +{"epoch": 0, "iter": 20715, "iter_tflops": 10.845221717552565, "iter_time": 1.902321044921875, "loss": 0.6285130977630615, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.117256460574737, "step_time": 1.5728207778930665} +{"epoch": 0, "iter": 20716, "iter_tflops": 23.9987615853599, "iter_time": 0.8596732559204101, "loss": 0.5044713616371155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.093648957707128, "step_time": 0.7614734191894531} +{"epoch": 0, "iter": 20717, "iter_tflops": 19.536537031288198, "iter_time": 0.7338226547241211, "loss": 0.15807746350765228, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 20.76507784658456, "step_time": 0.6904069213867189} +{"epoch": 0, "iter": 20718, "iter_tflops": 6.797070244068824, "iter_time": 2.1091960144042967, "loss": 0.14668139815330505, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 7.661505449364985, "step_time": 1.8712188568115236} +{"epoch": 0, "iter": 20719, "iter_tflops": 9.112525502814789, "iter_time": 1.5732579803466795, "loss": 0.20308437943458557, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 11.350998786073395, "step_time": 1.2630037002563475} +{"epoch": 0, "iter": 20720, "iter_tflops": 17.10003836269538, "iter_time": 0.8383813629150391, "loss": 0.3403065502643585, "lr": 3e-05, "seqlen": 5744.0, "step_tflops": 19.282155428152656, "step_time": 0.743503677368164} +{"epoch": 0, "iter": 20721, "iter_tflops": 14.11027064291982, "iter_time": 1.1813056640625, "loss": 0.21207495033740997, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 15.143782494212289, "step_time": 1.1006855545043945} +{"epoch": 0, "iter": 20722, "iter_tflops": 12.787756792563128, "iter_time": 1.3034766693115234, "loss": 0.13037943840026855, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 15.465797905383205, "step_time": 1.077768035888672} +{"epoch": 0, "iter": 20723, "iter_tflops": 25.35744548016836, "iter_time": 0.657343132019043, "loss": 0.3271155059337616, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 27.261137218527562, "step_time": 0.6114397392272949} +{"epoch": 0, "iter": 20724, "iter_tflops": 24.31201906554378, "iter_time": 0.685609146118164, "loss": 0.2451706826686859, "lr": 3e-05, "seqlen": 6656.0, "step_tflops": 26.19164840697254, "step_time": 0.6364067802429199} +{"epoch": 0, "iter": 20725, "iter_tflops": 16.327446961261224, "iter_time": 1.2635835571289062, "loss": 0.034905385226011276, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.277059837282106, "step_time": 1.1941322021484375} +{"epoch": 0, "iter": 20726, "iter_tflops": 17.693035348816863, "iter_time": 1.166057327270508, "loss": 0.024341639131307602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.685407148732825, "step_time": 0.8710466060638429} +{"epoch": 0, "iter": 20727, "iter_tflops": 40.30176543582868, "iter_time": 0.5119153785705566, "loss": 0.011300456710159779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32142255815065, "step_time": 0.4654880714416505} +{"epoch": 0, "iter": 20728, "iter_tflops": 44.223566719022, "iter_time": 0.466518081665039, "loss": 0.038862284272909164, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04272751158311, "step_time": 0.42067589950561524} +{"epoch": 0, "iter": 20729, "iter_tflops": 34.195482133461375, "iter_time": 0.6033280487060547, "loss": 0.21979668736457825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.80134618800402, "step_time": 0.5457766876220703} +{"epoch": 0, "iter": 20730, "iter_tflops": 9.511451067791167, "iter_time": 2.1690794982910155, "loss": 0.21141715347766876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.678937163910772, "step_time": 1.9319425888061525} +{"epoch": 0, "iter": 20731, "iter_tflops": 14.753586938438243, "iter_time": 1.398378143310547, "loss": 0.22767004370689392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.282072796015918, "step_time": 1.1937858238220216} +{"epoch": 0, "iter": 20732, "iter_tflops": 45.1953598280751, "iter_time": 0.45648698425292966, "loss": 0.12060472369194031, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.968381959207065, "step_time": 0.42131458473205563} +{"epoch": 0, "iter": 20733, "iter_tflops": 14.93109663327814, "iter_time": 0.9219604263305665, "loss": 0.2950535714626312, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 15.709020713496813, "step_time": 0.8763041610717774} +{"epoch": 0, "iter": 20734, "iter_tflops": 8.994416329345256, "iter_time": 1.5304917755126952, "loss": 0.2954837381839752, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 11.496453632683506, "step_time": 1.197402317047119} +{"epoch": 0, "iter": 20735, "iter_tflops": 22.630768135570815, "iter_time": 0.6082816162109375, "loss": 0.26013660430908203, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 24.10546726460911, "step_time": 0.5710687980651855} +{"epoch": 0, "iter": 20736, "iter_tflops": 23.944737480669687, "iter_time": 0.5749021148681641, "loss": 0.25330790877342224, "lr": 3e-05, "seqlen": 5520.0, "step_tflops": 25.400872711990303, "step_time": 0.5419451675415039} +{"epoch": 0, "iter": 20737, "iter_tflops": 36.3379593727237, "iter_time": 0.5677559738159179, "loss": 0.5525012612342834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.21931498481715, "step_time": 0.5260442085266114} +{"epoch": 0, "iter": 20738, "iter_tflops": 42.64076229264024, "iter_time": 0.48383500671386726, "loss": 0.5329166054725647, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.871086462417374, "step_time": 0.4497624778747559} +{"epoch": 0, "iter": 20739, "iter_tflops": 41.341007825170266, "iter_time": 0.4990466995239258, "loss": 0.4327791929244995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.18220821304763, "step_time": 0.4669547843933105} +{"epoch": 0, "iter": 20740, "iter_tflops": 42.95421991488568, "iter_time": 0.4803042297363281, "loss": 0.5130906105041504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.35365856084511, "step_time": 0.44508015441894533} +{"epoch": 0, "iter": 20741, "iter_tflops": 39.91312922107081, "iter_time": 0.5168999252319336, "loss": 0.562706708908081, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.38977952845756, "step_time": 0.47548279190063475} +{"epoch": 0, "iter": 20742, "iter_tflops": 44.806520486597364, "iter_time": 0.46044846343994134, "loss": 0.6465874910354614, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.679400348394545, "step_time": 0.4238156871795654} +{"epoch": 0, "iter": 20743, "iter_tflops": 48.464362871565704, "iter_time": 0.42569616699218754, "loss": 0.5208871960639954, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.12105050388274, "step_time": 0.3958303470611572} +{"epoch": 0, "iter": 20744, "iter_tflops": 47.8885282919872, "iter_time": 0.43081494140625, "loss": 0.6241615414619446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.79080774409269, "step_time": 0.39835434913635254} +{"epoch": 0, "iter": 20745, "iter_tflops": 17.06321142111044, "iter_time": 0.747191017150879, "loss": 0.03162824735045433, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 18.088294955407925, "step_time": 0.704846881866455} +{"epoch": 0, "iter": 20746, "iter_tflops": 12.543914835806737, "iter_time": 1.0163875045776367, "loss": 0.10827841609716415, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 16.69867949871222, "step_time": 0.7635021858215332} +{"epoch": 0, "iter": 20747, "iter_tflops": 32.57892157180958, "iter_time": 0.39134132385253906, "loss": 0.09154924750328064, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 35.55011619627642, "step_time": 0.35863394165039064} +{"epoch": 0, "iter": 20748, "iter_tflops": 30.98633736815975, "iter_time": 0.411454833984375, "loss": 0.047483377158641815, "lr": 3e-05, "seqlen": 5120.0, "step_tflops": 33.831774342095294, "step_time": 0.376849235534668} +{"epoch": 0, "iter": 20749, "iter_tflops": 30.74313679922148, "iter_time": 0.6710796508789063, "loss": 0.04347590357065201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.866592776209444, "step_time": 0.6277223091125488} +{"epoch": 0, "iter": 20750, "iter_tflops": 22.089469539053855, "iter_time": 0.933978675842285, "loss": 0.049733053892850876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.854484108440687, "step_time": 0.7682550678253174} +{"epoch": 0, "iter": 20751, "iter_tflops": 49.292584706912315, "iter_time": 0.41854355239868163, "loss": 0.03362199291586876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.050300108454444, "step_time": 0.38170173835754395} +{"epoch": 0, "iter": 20752, "iter_tflops": 51.70914092372339, "iter_time": 0.39898348999023436, "loss": 0.0199583787471056, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.03577567724415, "step_time": 0.368177173614502} +{"epoch": 0, "iter": 20753, "iter_tflops": 31.19815263967546, "iter_time": 0.661292152404785, "loss": 0.05260138213634491, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.4118480747348, "step_time": 0.6174783706665039} +{"epoch": 0, "iter": 20754, "iter_tflops": 20.02524001208935, "iter_time": 1.0302544937133788, "loss": 0.06788600981235504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.131765239863505, "step_time": 0.854935115814209} +{"epoch": 0, "iter": 20755, "iter_tflops": 52.50038481963566, "iter_time": 0.39297032928466796, "loss": 0.053582631051540375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.42016473198299, "step_time": 0.35930049324035646} +{"epoch": 0, "iter": 20756, "iter_tflops": 55.58945110964639, "iter_time": 0.37113324737548836, "loss": 0.07307332009077072, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.371865936010416, "step_time": 0.34173357391357423} +{"epoch": 0, "iter": 20757, "iter_tflops": 20.931383293305668, "iter_time": 0.9856536102294922, "loss": 0.4658263325691223, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.833112881312704, "step_time": 0.9449451217651367} +{"epoch": 0, "iter": 20758, "iter_tflops": 15.76718643490478, "iter_time": 1.3084828796386718, "loss": 0.5318185091018677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.707162299003482, "step_time": 0.9963264503479005} +{"epoch": 0, "iter": 20759, "iter_tflops": 40.78576539298309, "iter_time": 0.5058405380249024, "loss": 0.43868979811668396, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.32019274221574, "step_time": 0.46550098800659184} +{"epoch": 0, "iter": 20760, "iter_tflops": 38.076305966865924, "iter_time": 0.5418354797363281, "loss": 0.4789135754108429, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.678170759055945, "step_time": 0.4950095729827881} +{"epoch": 0, "iter": 20761, "iter_tflops": 16.872764091004825, "iter_time": 1.2227453308105467, "loss": 0.1737980842590332, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.836185598323173, "step_time": 1.156698745727539} +{"epoch": 0, "iter": 20762, "iter_tflops": 15.937190412527267, "iter_time": 1.2945251312255859, "loss": 0.1511104553937912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.704372683376267, "step_time": 1.0470312271118165} +{"epoch": 0, "iter": 20763, "iter_tflops": 38.6485372295861, "iter_time": 0.5338130493164063, "loss": 0.1901349127292633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.33682571131119, "step_time": 0.48730846405029293} +{"epoch": 0, "iter": 20764, "iter_tflops": 39.54612962354682, "iter_time": 0.5216969070434571, "loss": 0.1851002424955368, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.501843796219404, "step_time": 0.47425790977478033} +{"epoch": 0, "iter": 20765, "iter_tflops": 35.022604100836276, "iter_time": 0.5890793685913086, "loss": 0.42420125007629395, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.540146698315986, "step_time": 0.5353143482208251} +{"epoch": 0, "iter": 20766, "iter_tflops": 34.79873777253374, "iter_time": 0.5928690185546874, "loss": 0.3463386297225952, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.03238671420893, "step_time": 0.5424611835479737} +{"epoch": 0, "iter": 20767, "iter_tflops": 40.452684278806544, "iter_time": 0.5100055503845214, "loss": 0.3796004056930542, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.74916085995506, "step_time": 0.4715768966674805} +{"epoch": 0, "iter": 20768, "iter_tflops": 43.56051394075489, "iter_time": 0.47361914825439455, "loss": 0.4482095241546631, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.145589897181324, "step_time": 0.437603889465332} +{"epoch": 0, "iter": 20769, "iter_tflops": 18.90774155513394, "iter_time": 1.0911453094482422, "loss": 0.074663907289505, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.188973595190422, "step_time": 1.0218990783691406} +{"epoch": 0, "iter": 20770, "iter_tflops": 15.543805238913729, "iter_time": 1.3272871856689452, "loss": 0.12179303914308548, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.379130597321375, "step_time": 1.0123637714385987} +{"epoch": 0, "iter": 20771, "iter_tflops": 42.41770442081358, "iter_time": 0.4863793029785156, "loss": 0.05896330997347832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.49305752833676, "step_time": 0.4437456817626953} +{"epoch": 0, "iter": 20772, "iter_tflops": 42.35624668772083, "iter_time": 0.4870850257873536, "loss": 0.04989135265350342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.75159718368923, "step_time": 0.4412917366027832} +{"epoch": 0, "iter": 20773, "iter_tflops": 16.755602945066297, "iter_time": 1.0930266571044922, "loss": 0.03151765093207359, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 17.998000920269213, "step_time": 1.0175752716064455} +{"epoch": 0, "iter": 20774, "iter_tflops": 19.430270963803512, "iter_time": 0.94256640625, "loss": 0.06774012744426727, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 22.06319225780205, "step_time": 0.8300848064422607} +{"epoch": 0, "iter": 20775, "iter_tflops": 37.98345459633514, "iter_time": 0.48216574478149404, "loss": 0.03515157476067543, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 41.665013317073715, "step_time": 0.4395611381530762} +{"epoch": 0, "iter": 20776, "iter_tflops": 39.18225079885096, "iter_time": 0.4674136962890625, "loss": 0.021143713966012, "lr": 3e-05, "seqlen": 7296.0, "step_tflops": 43.245578832096406, "step_time": 0.423495792388916} +{"epoch": 0, "iter": 20777, "iter_tflops": 22.099404287340086, "iter_time": 0.9335588073730469, "loss": 0.4085693955421448, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.498271688501788, "step_time": 0.8779834442138673} +{"epoch": 0, "iter": 20778, "iter_tflops": 8.96990521490033, "iter_time": 2.3000347290039067, "loss": 0.6888934969902039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.457800759911636, "step_time": 1.6560783004760742} +{"epoch": 0, "iter": 20779, "iter_tflops": 13.475512621033403, "iter_time": 1.5310062103271485, "loss": 0.643373429775238, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.7065523755315, "step_time": 1.3135341873168946} +{"epoch": 0, "iter": 20780, "iter_tflops": 44.86134586577388, "iter_time": 0.4598857460021973, "loss": 0.7004356980323792, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.7474899591579, "step_time": 0.4232237091064453} +{"epoch": 0, "iter": 20781, "iter_tflops": 22.297201547430692, "iter_time": 0.6960958786010742, "loss": 0.21440911293029785, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 23.78191129225561, "step_time": 0.6526384658813477} +{"epoch": 0, "iter": 20782, "iter_tflops": 18.35790640535068, "iter_time": 0.8454662399291992, "loss": 0.28658825159072876, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 21.929705532897408, "step_time": 0.7077609901428223} +{"epoch": 0, "iter": 20783, "iter_tflops": 23.400508405452598, "iter_time": 0.663275764465332, "loss": 0.1337296962738037, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 25.142942410194685, "step_time": 0.6173100128173828} +{"epoch": 0, "iter": 20784, "iter_tflops": 24.944000033060554, "iter_time": 0.6222334060668946, "loss": 0.15837951004505157, "lr": 3e-05, "seqlen": 6208.0, "step_tflops": 26.776010137567287, "step_time": 0.5796603012084961} +{"epoch": 0, "iter": 20785, "iter_tflops": 26.38462090658283, "iter_time": 0.7819363250732421, "loss": 0.4420519471168518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.52210838500563, "step_time": 0.6988353691101075} +{"epoch": 0, "iter": 20786, "iter_tflops": 40.68737050656213, "iter_time": 0.5070638198852538, "loss": 0.3239181935787201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.01338572027308, "step_time": 0.4583324089050293} +{"epoch": 0, "iter": 20787, "iter_tflops": 42.277592398659735, "iter_time": 0.4879912109375, "loss": 0.5112053751945496, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.48663792250183, "step_time": 0.4535638256072998} +{"epoch": 0, "iter": 20788, "iter_tflops": 50.13896082999503, "iter_time": 0.4114782829284668, "loss": 0.4169935882091522, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.20688526685679, "step_time": 0.3805991325378418} +{"epoch": 0, "iter": 20789, "iter_tflops": 30.63692466290507, "iter_time": 0.6734061508178711, "loss": 0.03332773223519325, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.512339007829446, "step_time": 0.6345619583129882} +{"epoch": 0, "iter": 20790, "iter_tflops": 12.089259928037697, "iter_time": 1.7065638122558595, "loss": 0.045991115272045135, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.138047002407987, "step_time": 1.459260498046875} +{"epoch": 0, "iter": 20791, "iter_tflops": 10.368585131002435, "iter_time": 1.9897694091796876, "loss": 0.03495677560567856, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.689282702639064, "step_time": 1.6258675918579102} +{"epoch": 0, "iter": 20792, "iter_tflops": 37.568557392892835, "iter_time": 0.5491585235595703, "loss": 0.018098967149853706, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.23954265884638, "step_time": 0.4884307975769043} +{"epoch": 0, "iter": 20793, "iter_tflops": 20.17348003618471, "iter_time": 0.7632920761108398, "loss": 0.19843630492687225, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 21.35826397748097, "step_time": 0.720950798034668} +{"epoch": 0, "iter": 20794, "iter_tflops": 6.735412351775817, "iter_time": 2.286164031982422, "loss": 0.20515215396881104, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 8.509505667978031, "step_time": 1.8095360717773437} +{"epoch": 0, "iter": 20795, "iter_tflops": 10.08272783770254, "iter_time": 1.5271916198730469, "loss": 0.3111635744571686, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 11.709940363329643, "step_time": 1.314973175048828} +{"epoch": 0, "iter": 20796, "iter_tflops": 15.933562404962169, "iter_time": 0.966403938293457, "loss": 0.1805247962474823, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 19.084486476541457, "step_time": 0.8068468322753907} +{"epoch": 0, "iter": 20797, "iter_tflops": 21.18585460566809, "iter_time": 0.7712831649780274, "loss": 0.18307340145111084, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 23.16693606826814, "step_time": 0.7053281860351563} +{"epoch": 0, "iter": 20798, "iter_tflops": 24.81614638425536, "iter_time": 0.6584540863037109, "loss": 0.1459764689207077, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 26.795045091471604, "step_time": 0.6098251724243164} +{"epoch": 0, "iter": 20799, "iter_tflops": 25.94869456745575, "iter_time": 0.6297154159545899, "loss": 0.34274736046791077, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 27.599188065524164, "step_time": 0.5920570182800292} +{"epoch": 0, "iter": 20800, "iter_tflops": 23.885884885360362, "iter_time": 0.6840982894897462, "loss": 0.4799574613571167, "lr": 3e-05, "seqlen": 6528.0, "step_tflops": 25.62422446362244, "step_time": 0.6376892700195311} +{"epoch": 0, "iter": 20801, "iter_tflops": 17.492781569041192, "iter_time": 1.1794061126708983, "loss": 0.1802648901939392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.622069284945795, "step_time": 1.1078840484619141} +{"epoch": 0, "iter": 20802, "iter_tflops": 15.683680403687209, "iter_time": 1.3154497528076172, "loss": 0.29527056217193604, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.59651434133409, "step_time": 1.0016788845062257} +{"epoch": 0, "iter": 20803, "iter_tflops": 39.92055284951944, "iter_time": 0.5168038024902344, "loss": 0.2298360913991928, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.66121421357947, "step_time": 0.4725267925262451} +{"epoch": 0, "iter": 20804, "iter_tflops": 42.56227903224528, "iter_time": 0.484727180480957, "loss": 0.24219770729541779, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.439032679879325, "step_time": 0.44426191329956055} +{"epoch": 0, "iter": 20805, "iter_tflops": 16.32928239866557, "iter_time": 1.2634415283203126, "loss": 0.3055088222026825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.476392750486077, "step_time": 1.18051212310791} +{"epoch": 0, "iter": 20806, "iter_tflops": 17.472234272484876, "iter_time": 1.1807930908203124, "loss": 0.2972913980484009, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.05316434430676, "step_time": 0.979952142715454} +{"epoch": 0, "iter": 20807, "iter_tflops": 40.40003561331433, "iter_time": 0.510670181274414, "loss": 0.21334750950336456, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.14226073012034, "step_time": 0.46737736511230477} +{"epoch": 0, "iter": 20808, "iter_tflops": 35.17678789499345, "iter_time": 0.5864973678588867, "loss": 0.2802640199661255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.58615068445368, "step_time": 0.5346761245727539} +{"epoch": 0, "iter": 20809, "iter_tflops": 35.213998479631016, "iter_time": 0.5858776168823242, "loss": 0.19494792819023132, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.79065271081632, "step_time": 0.531857343673706} +{"epoch": 0, "iter": 20810, "iter_tflops": 35.85907817504016, "iter_time": 0.5753380889892579, "loss": 0.11621464788913727, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.05414040042518, "step_time": 0.515080171585083} +{"epoch": 0, "iter": 20811, "iter_tflops": 40.14570504483571, "iter_time": 0.5139053726196289, "loss": 0.12469536811113358, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.90272444069992, "step_time": 0.4699274082183838} +{"epoch": 0, "iter": 20812, "iter_tflops": 45.22571594636487, "iter_time": 0.4561805839538574, "loss": 0.15686552226543427, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.50068045367522, "step_time": 0.41678403854370116} +{"epoch": 0, "iter": 20813, "iter_tflops": 28.631983629412687, "iter_time": 0.7205610961914064, "loss": 0.12042027711868286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.060103649450184, "step_time": 0.664231315612793} +{"epoch": 0, "iter": 20814, "iter_tflops": 9.935108840414868, "iter_time": 2.0765845489501955, "loss": 0.11166192591190338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.222935873494668, "step_time": 1.8382973709106447} +{"epoch": 0, "iter": 20815, "iter_tflops": 15.878411764883806, "iter_time": 1.2993171997070314, "loss": 0.058707937598228455, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.345651621082244, "step_time": 1.0664460372924804} +{"epoch": 0, "iter": 20816, "iter_tflops": 39.997018275414035, "iter_time": 0.515815788269043, "loss": 0.09398862719535828, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.661424290236745, "step_time": 0.4725245189666748} +{"epoch": 0, "iter": 20817, "iter_tflops": 12.942543694014882, "iter_time": 1.1865794219970702, "loss": 0.27492019534111023, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 13.768982283485869, "step_time": 1.1153588333129882} +{"epoch": 0, "iter": 20818, "iter_tflops": 14.221347945289132, "iter_time": 1.0798804779052733, "loss": 0.3061400055885315, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 17.3549967518113, "step_time": 0.8848953552246095} +{"epoch": 0, "iter": 20819, "iter_tflops": 22.94022299335633, "iter_time": 0.6694510345458985, "loss": 0.1875656396150589, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 24.791126609448813, "step_time": 0.6194698715209961} +{"epoch": 0, "iter": 20820, "iter_tflops": 23.82594145627313, "iter_time": 0.6445644989013672, "loss": 0.1978398561477661, "lr": 3e-05, "seqlen": 6144.0, "step_tflops": 25.628855907811374, "step_time": 0.5992212867736816} +{"epoch": 0, "iter": 20821, "iter_tflops": 26.749767783912347, "iter_time": 0.7712625274658202, "loss": 0.4770563542842865, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.997388072659728, "step_time": 0.7114810981750489} +{"epoch": 0, "iter": 20822, "iter_tflops": 42.51501036205767, "iter_time": 0.48526610565185546, "loss": 0.4073355495929718, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.324954184719466, "step_time": 0.44535593986511235} +{"epoch": 0, "iter": 20823, "iter_tflops": 46.68544969954558, "iter_time": 0.4419169921875, "loss": 0.4380302131175995, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.26811770584672, "step_time": 0.4104210472106934} +{"epoch": 0, "iter": 20824, "iter_tflops": 41.99306929165088, "iter_time": 0.4912975845336914, "loss": 0.4499913454055786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.17794995106663, "step_time": 0.45666289710998537} +{"epoch": 0, "iter": 20825, "iter_tflops": 33.55119180340864, "iter_time": 0.6149138793945312, "loss": 0.07343417406082153, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.932838192133055, "step_time": 0.5741570816040039} +{"epoch": 0, "iter": 20826, "iter_tflops": 15.674240148918772, "iter_time": 1.3162420196533207, "loss": 0.08428400754928589, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.845404104626382, "step_time": 1.1561012229919434} +{"epoch": 0, "iter": 20827, "iter_tflops": 40.80137171113273, "iter_time": 0.5056470565795899, "loss": 0.05045892298221588, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.83550849712511, "step_time": 0.460150764465332} +{"epoch": 0, "iter": 20828, "iter_tflops": 39.58391561230193, "iter_time": 0.5211989059448241, "loss": 0.052244801074266434, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.77655762650692, "step_time": 0.4712817687988281} +{"epoch": 0, "iter": 20829, "iter_tflops": 18.315602034646098, "iter_time": 1.0788927154541015, "loss": 0.4065890312194824, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 19.302629917535977, "step_time": 1.0237242126464845} +{"epoch": 0, "iter": 20830, "iter_tflops": 14.469752097465596, "iter_time": 1.365646728515625, "loss": 0.47660645842552185, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 18.6076506691874, "step_time": 1.0619594039916993} +{"epoch": 0, "iter": 20831, "iter_tflops": 44.58417750324063, "iter_time": 0.44321933746337894, "loss": 0.4979594051837921, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 48.1871077801022, "step_time": 0.4100800094604492} +{"epoch": 0, "iter": 20832, "iter_tflops": 42.84008308394075, "iter_time": 0.4612635688781739, "loss": 0.33464315533638, "lr": 3e-05, "seqlen": 7856.0, "step_tflops": 45.919890173584555, "step_time": 0.4303270225524902} +{"epoch": 0, "iter": 20833, "iter_tflops": 28.86178046927777, "iter_time": 0.7148240051269531, "loss": 0.3544427156448364, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.620154549348907, "step_time": 0.6737749633789062} +{"epoch": 0, "iter": 20834, "iter_tflops": 15.127596352471661, "iter_time": 1.363805130004883, "loss": 0.30723804235458374, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.142840003088235, "step_time": 1.1371479606628416} +{"epoch": 0, "iter": 20835, "iter_tflops": 45.88893333185915, "iter_time": 0.4495875587463379, "loss": 0.4022880792617798, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.67787562940603, "step_time": 0.4152974185943603} +{"epoch": 0, "iter": 20836, "iter_tflops": 44.212152062265424, "iter_time": 0.4666385269165039, "loss": 0.4362068474292755, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.63316247691641, "step_time": 0.4331245803833008} +{"epoch": 0, "iter": 20837, "iter_tflops": 33.794148011376656, "iter_time": 0.6104930801391601, "loss": 0.4499962031841278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.01514127480745, "step_time": 0.5728449974060059} +{"epoch": 0, "iter": 20838, "iter_tflops": 18.10719635090691, "iter_time": 1.1393864135742189, "loss": 0.46010664105415344, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.061081621310606, "step_time": 0.935180507659912} +{"epoch": 0, "iter": 20839, "iter_tflops": 38.72333739725415, "iter_time": 0.5327819061279296, "loss": 0.4491785764694214, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.42600817984107, "step_time": 0.48628410720825194} +{"epoch": 0, "iter": 20840, "iter_tflops": 39.1205884965455, "iter_time": 0.5273717575073241, "loss": 0.4560546576976776, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.95600217336983, "step_time": 0.48028430175781256} +{"epoch": 0, "iter": 20841, "iter_tflops": 18.612771908356663, "iter_time": 1.1084374542236328, "loss": 0.17071837186813354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.948622244210288, "step_time": 1.0342114486694336} +{"epoch": 0, "iter": 20842, "iter_tflops": 18.236102454456898, "iter_time": 1.1313323974609375, "loss": 0.2398567944765091, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.37036805186356, "step_time": 0.6793165454864503} +{"epoch": 0, "iter": 20843, "iter_tflops": 40.383148004551074, "iter_time": 0.5108837356567383, "loss": 0.2304001897573471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.242549479157915, "step_time": 0.4663179168701172} +{"epoch": 0, "iter": 20844, "iter_tflops": 41.51000528617616, "iter_time": 0.4970149574279785, "loss": 0.20857886970043182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.23489039971798, "step_time": 0.4560880622863769} +{"epoch": 0, "iter": 20845, "iter_tflops": 19.84557328164077, "iter_time": 1.0395816345214843, "loss": 0.2388758659362793, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.95819790599274, "step_time": 0.9843925323486329} +{"epoch": 0, "iter": 20846, "iter_tflops": 15.00367806323113, "iter_time": 1.3750690612792968, "loss": 0.17594920098781586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.94749593338915, "step_time": 1.14952490234375} +{"epoch": 0, "iter": 20847, "iter_tflops": 47.9652553368786, "iter_time": 0.4301257934570312, "loss": 0.22551897168159485, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.06933220800848, "step_time": 0.39622350883483887} +{"epoch": 0, "iter": 20848, "iter_tflops": 51.68154231915387, "iter_time": 0.3991965522766113, "loss": 0.23951014876365662, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.02892440674898, "step_time": 0.3682221946716308} +{"epoch": 0, "iter": 20849, "iter_tflops": 46.986857667256025, "iter_time": 0.43908221435546874, "loss": 0.04427759349346161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.639639055011884, "step_time": 0.3919307556152344} +{"epoch": 0, "iter": 20850, "iter_tflops": 34.35402661985572, "iter_time": 0.6005436782836915, "loss": 0.035620201379060745, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.82264411295619, "step_time": 0.4817800006866455} +{"epoch": 0, "iter": 20851, "iter_tflops": 53.310504119051366, "iter_time": 0.3869986572265625, "loss": 0.05170251429080963, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 58.21393286394457, "step_time": 0.3544013004302978} +{"epoch": 0, "iter": 20852, "iter_tflops": 55.602295198817856, "iter_time": 0.3710475158691406, "loss": 0.02701437473297119, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.75867160334698, "step_time": 0.3395580081939697} +{"epoch": 0, "iter": 20853, "iter_tflops": 33.38629543319683, "iter_time": 0.6179509658813477, "loss": 0.5099697113037109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.60937176766677, "step_time": 0.5793725776672363} +{"epoch": 0, "iter": 20854, "iter_tflops": 18.68197249825171, "iter_time": 1.1043316497802735, "loss": 0.49093782901763916, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.279903517051935, "step_time": 0.9695106697082521} +{"epoch": 0, "iter": 20855, "iter_tflops": 45.52295569605503, "iter_time": 0.453201976776123, "loss": 0.5221667289733887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.33538474685663, "step_time": 0.41818045234680173} +{"epoch": 0, "iter": 20856, "iter_tflops": 46.97155942681933, "iter_time": 0.4392252197265625, "loss": 0.6060953140258789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.90612097920546, "step_time": 0.4052772655487061} +{"epoch": 0, "iter": 20857, "iter_tflops": 34.21860192922868, "iter_time": 0.6029204101562501, "loss": 0.03784853592514992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.575471925564564, "step_time": 0.5640690994262696} +{"epoch": 0, "iter": 20858, "iter_tflops": 20.081845565408543, "iter_time": 1.027350471496582, "loss": 0.0017090851906687021, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.558429408813055, "step_time": 0.9145624961853027} +{"epoch": 0, "iter": 20859, "iter_tflops": 42.11168566769587, "iter_time": 0.4899137420654297, "loss": 0.001403573900461197, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.64851874758847, "step_time": 0.4422668514251709} +{"epoch": 0, "iter": 20860, "iter_tflops": 39.234105375195114, "iter_time": 0.5258459014892578, "loss": 0.011852771043777466, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.405157206177286, "step_time": 0.4753143367767334} +{"epoch": 0, "iter": 20861, "iter_tflops": 19.327963604618326, "iter_time": 1.0674219970703125, "loss": 0.502144992351532, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.681994867247287, "step_time": 0.9975388565063477} +{"epoch": 0, "iter": 20862, "iter_tflops": 13.401470501871058, "iter_time": 1.5394649047851563, "loss": 0.32368502020835876, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.426720704014226, "step_time": 1.2559471778869629} +{"epoch": 0, "iter": 20863, "iter_tflops": 37.55571590648158, "iter_time": 0.5493462982177735, "loss": 0.3510579764842987, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.01598020639444, "step_time": 0.5030013523101807} +{"epoch": 0, "iter": 20864, "iter_tflops": 39.02161873650443, "iter_time": 0.5287093200683594, "loss": 0.38902875781059265, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.71617054819734, "step_time": 0.48298087692260744} +{"epoch": 0, "iter": 20865, "iter_tflops": 35.12726323810397, "iter_time": 0.5873242492675781, "loss": 0.29457759857177734, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.921957132493134, "step_time": 0.5300631065368652} +{"epoch": 0, "iter": 20866, "iter_tflops": 40.257092379292, "iter_time": 0.5124834480285645, "loss": 0.2942831516265869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.1349139450729, "step_time": 0.45709832382202154} +{"epoch": 0, "iter": 20867, "iter_tflops": 41.27155240559685, "iter_time": 0.49988653945922856, "loss": 0.22279609739780426, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.19516607837866, "step_time": 0.456488941192627} +{"epoch": 0, "iter": 20868, "iter_tflops": 38.03096833823022, "iter_time": 0.542481414794922, "loss": 0.42670804262161255, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.82022986030591, "step_time": 0.49332807540893553} +{"epoch": 0, "iter": 20869, "iter_tflops": 27.41602230691352, "iter_time": 0.7525195770263672, "loss": 0.030154002830386162, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.482688321930617, "step_time": 0.6997697525024414} +{"epoch": 0, "iter": 20870, "iter_tflops": 51.0255416638082, "iter_time": 0.40432875061035156, "loss": 0.024496085941791534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.399393951944305, "step_time": 0.3658034610748291} +{"epoch": 0, "iter": 20871, "iter_tflops": 50.55511433986439, "iter_time": 0.4080911254882813, "loss": 0.016825703904032707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.186528521400696, "step_time": 0.3738429298400879} +{"epoch": 0, "iter": 20872, "iter_tflops": 51.792531496741994, "iter_time": 0.3983410911560059, "loss": 0.01866183429956436, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.43115300285763, "step_time": 0.36559758949279786} +{"epoch": 0, "iter": 20873, "iter_tflops": 32.33844889315233, "iter_time": 0.63797412109375, "loss": 0.16808371245861053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.39082372065021, "step_time": 0.5999011154174805} +{"epoch": 0, "iter": 20874, "iter_tflops": 9.70623453495829, "iter_time": 2.1255506896972656, "loss": 0.14463017880916595, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.962307373691365, "step_time": 1.7246750869750977} +{"epoch": 0, "iter": 20875, "iter_tflops": 16.24670159078916, "iter_time": 1.269863510131836, "loss": 0.11876489967107773, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.46476495797845, "step_time": 1.117322292327881} +{"epoch": 0, "iter": 20876, "iter_tflops": 40.14094357242778, "iter_time": 0.5139663314819336, "loss": 0.19070348143577576, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.96951143014554, "step_time": 0.46921361732482914} +{"epoch": 0, "iter": 20877, "iter_tflops": 12.400233009488703, "iter_time": 1.2285801391601563, "loss": 0.1857389509677887, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 13.083946240015532, "step_time": 1.1643795928955079} +{"epoch": 0, "iter": 20878, "iter_tflops": 9.974418668272518, "iter_time": 1.5273752288818359, "loss": 0.40699341893196106, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 11.918797787269096, "step_time": 1.27820609664917} +{"epoch": 0, "iter": 20879, "iter_tflops": 23.179177590388825, "iter_time": 0.6572571411132813, "loss": 0.3679223358631134, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 24.965564438978745, "step_time": 0.6102277412414551} +{"epoch": 0, "iter": 20880, "iter_tflops": 23.422568033412116, "iter_time": 0.6504273986816407, "loss": 0.275592565536499, "lr": 3e-05, "seqlen": 6096.0, "step_tflops": 25.196016160208394, "step_time": 0.6046463813781738} +{"epoch": 0, "iter": 20881, "iter_tflops": 24.273319641269964, "iter_time": 0.8499494018554689, "loss": 0.1674577295780182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.424744560433705, "step_time": 0.7807490234375001} +{"epoch": 0, "iter": 20882, "iter_tflops": 23.480985434625165, "iter_time": 0.8786297988891602, "loss": 0.17216578125953674, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.807969041342744, "step_time": 0.7161592502593994} +{"epoch": 0, "iter": 20883, "iter_tflops": 52.60832873578818, "iter_time": 0.3921640167236328, "loss": 0.1461857557296753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.1813185059907, "step_time": 0.3608012905120849} +{"epoch": 0, "iter": 20884, "iter_tflops": 45.811209119066916, "iter_time": 0.4503503379821777, "loss": 0.16384154558181763, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.23380878530758, "step_time": 0.4190432147979737} +{"epoch": 0, "iter": 20885, "iter_tflops": 26.469625396725913, "iter_time": 0.7794252166748047, "loss": 0.28695109486579895, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.927622836882307, "step_time": 0.7387343215942384} +{"epoch": 0, "iter": 20886, "iter_tflops": 27.879662648264222, "iter_time": 0.7400051345825195, "loss": 0.4378121495246887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.240028053973713, "step_time": 0.6604057292938232} +{"epoch": 0, "iter": 20887, "iter_tflops": 49.74740212587381, "iter_time": 0.4147170028686523, "loss": 0.3561139702796936, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.8046708410943, "step_time": 0.3834442844390869} +{"epoch": 0, "iter": 20888, "iter_tflops": 48.63521548356539, "iter_time": 0.42420072174072265, "loss": 0.30102086067199707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.395008179537456, "step_time": 0.39376066970825196} +{"epoch": 0, "iter": 20889, "iter_tflops": 43.10118288050705, "iter_time": 0.4786665267944336, "loss": 0.5155142545700073, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.88670644532006, "step_time": 0.4400201053619384} +{"epoch": 0, "iter": 20890, "iter_tflops": 33.35956036063264, "iter_time": 0.6184462051391602, "loss": 0.6224269866943359, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.373042522085036, "step_time": 0.5520314140319824} +{"epoch": 0, "iter": 20891, "iter_tflops": 35.43248561231467, "iter_time": 0.5822649230957031, "loss": 0.5902912020683289, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.55271587301906, "step_time": 0.5351398220062256} +{"epoch": 0, "iter": 20892, "iter_tflops": 37.09383164766708, "iter_time": 0.5561866378784179, "loss": 0.4097668528556824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.276561285804036, "step_time": 0.5122357234954834} +{"epoch": 0, "iter": 20893, "iter_tflops": 18.819328006534455, "iter_time": 1.0962715301513672, "loss": 0.39644938707351685, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.150645166655433, "step_time": 1.0238428268432618} +{"epoch": 0, "iter": 20894, "iter_tflops": 15.821384619205046, "iter_time": 1.304000503540039, "loss": 0.5381075143814087, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.883674102879297, "step_time": 0.9015638580322265} +{"epoch": 0, "iter": 20895, "iter_tflops": 44.04460062901243, "iter_time": 0.4684136810302734, "loss": 0.5314317345619202, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.20946813007306, "step_time": 0.4370117759704589} +{"epoch": 0, "iter": 20896, "iter_tflops": 48.41025138200285, "iter_time": 0.4261719970703125, "loss": 0.47551676630973816, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.03023675137409, "step_time": 0.39652123069763184} +{"epoch": 0, "iter": 20897, "iter_tflops": 43.0333610069671, "iter_time": 0.4746000938415528, "loss": 0.11049232631921768, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 46.793481007994615, "step_time": 0.43646330070495604} +{"epoch": 0, "iter": 20898, "iter_tflops": 38.09718333442908, "iter_time": 0.5360931015014647, "loss": 0.15076908469200134, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 41.88437534617612, "step_time": 0.4876194763183594} +{"epoch": 0, "iter": 20899, "iter_tflops": 37.99057670843715, "iter_time": 0.5375974502563476, "loss": 0.17194584012031555, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 41.4900234084074, "step_time": 0.4922541732788086} +{"epoch": 0, "iter": 20900, "iter_tflops": 40.406471651555655, "iter_time": 0.505454605102539, "loss": 0.1472138911485672, "lr": 3e-05, "seqlen": 8112.0, "step_tflops": 44.33551064760765, "step_time": 0.4606609210968018} +{"epoch": 0, "iter": 20901, "iter_tflops": 21.397335299970624, "iter_time": 0.9641898498535157, "loss": 0.0762641578912735, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.636703885521218, "step_time": 0.9114000701904296} +{"epoch": 0, "iter": 20902, "iter_tflops": 21.013680140882208, "iter_time": 0.9817934494018554, "loss": 0.07929763942956924, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.6092584363234, "step_time": 0.8056107349395751} +{"epoch": 0, "iter": 20903, "iter_tflops": 40.589016058431966, "iter_time": 0.5082925262451171, "loss": 0.04267767071723938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.88241641880697, "step_time": 0.45966984748840334} +{"epoch": 0, "iter": 20904, "iter_tflops": 40.58000594965813, "iter_time": 0.5084053840637206, "loss": 0.04661542549729347, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.57788671034371, "step_time": 0.46281003952026367} +{"epoch": 0, "iter": 20905, "iter_tflops": 23.477594586551053, "iter_time": 0.8787566986083983, "loss": 0.5276118516921997, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.082532490724674, "step_time": 0.8225283279418946} +{"epoch": 0, "iter": 20906, "iter_tflops": 25.782706779562446, "iter_time": 0.8001911392211914, "loss": 0.6643310785293579, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.2954963963348, "step_time": 0.6592352218627929} +{"epoch": 0, "iter": 20907, "iter_tflops": 39.20422881756061, "iter_time": 0.5262466354370118, "loss": 0.3958624601364136, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.47606038278314, "step_time": 0.485711088180542} +{"epoch": 0, "iter": 20908, "iter_tflops": 35.26566014539235, "iter_time": 0.5850193481445313, "loss": 0.45630189776420593, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.26285469539738, "step_time": 0.5391937866210937} +{"epoch": 0, "iter": 20909, "iter_tflops": 20.77125606724044, "iter_time": 0.9932520904541016, "loss": 0.10001637041568756, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.70323225110823, "step_time": 0.9087293510437012} +{"epoch": 0, "iter": 20910, "iter_tflops": 26.457893218017823, "iter_time": 0.7797708358764649, "loss": 0.07196276634931564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.162738656673895, "step_time": 0.62211669921875} +{"epoch": 0, "iter": 20911, "iter_tflops": 53.21994781428268, "iter_time": 0.3876571540832519, "loss": 0.1099594309926033, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.93357750371089, "step_time": 0.3561163387298583} +{"epoch": 0, "iter": 20912, "iter_tflops": 51.25612769642011, "iter_time": 0.4025097961425781, "loss": 0.07127135992050171, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.48921015065654, "step_time": 0.3718036975860595} +{"epoch": 0, "iter": 20913, "iter_tflops": 27.528249448002374, "iter_time": 0.7494517059326172, "loss": 0.0362398661673069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.508990414452374, "step_time": 0.6991460304260253} +{"epoch": 0, "iter": 20914, "iter_tflops": 16.193180592864593, "iter_time": 1.2740606079101564, "loss": 0.059404294937849045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.933721663966246, "step_time": 1.0896480827331543} +{"epoch": 0, "iter": 20915, "iter_tflops": 42.2539240648039, "iter_time": 0.4882645568847656, "loss": 0.05440438166260719, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.800459497842205, "step_time": 0.4408310031890869} +{"epoch": 0, "iter": 20916, "iter_tflops": 36.8151308791171, "iter_time": 0.5603971252441405, "loss": 0.059526022523641586, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.494267755231604, "step_time": 0.5094818267822265} +{"epoch": 0, "iter": 20917, "iter_tflops": 21.932493464063295, "iter_time": 0.9406633834838867, "loss": 0.40137428045272827, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.69417480618308, "step_time": 0.8707242889404297} +{"epoch": 0, "iter": 20918, "iter_tflops": 15.94975443043664, "iter_time": 1.2935054016113283, "loss": 0.38692986965179443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.760280011733368, "step_time": 0.9937772274017332} +{"epoch": 0, "iter": 20919, "iter_tflops": 42.39171277107473, "iter_time": 0.48667751693725586, "loss": 0.4130097031593323, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.02783696153211, "step_time": 0.44823078536987304} +{"epoch": 0, "iter": 20920, "iter_tflops": 46.58359307852866, "iter_time": 0.44288325881958013, "loss": 0.44074490666389465, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.255939951348275, "step_time": 0.4105204982757568} +{"epoch": 0, "iter": 20921, "iter_tflops": 43.13256168904849, "iter_time": 0.47831829833984374, "loss": 0.04089762642979622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.83727375504138, "step_time": 0.4404845085144043} +{"epoch": 0, "iter": 20922, "iter_tflops": 19.6750799465061, "iter_time": 1.048590072631836, "loss": 0.025293545797467232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.84518252219121, "step_time": 0.6912704753875732} +{"epoch": 0, "iter": 20923, "iter_tflops": 49.10836257641159, "iter_time": 0.4201136512756347, "loss": 0.027608854696154594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.97908239800348, "step_time": 0.38220533943176277} +{"epoch": 0, "iter": 20924, "iter_tflops": 56.787379706893425, "iter_time": 0.36330419921874996, "loss": 0.055490974336862564, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 62.27783733835158, "step_time": 0.3312750473022461} +{"epoch": 0, "iter": 20925, "iter_tflops": 35.807747891192356, "iter_time": 0.5761628341674805, "loss": 0.6118612885475159, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.489421948521745, "step_time": 0.536019832611084} +{"epoch": 0, "iter": 20926, "iter_tflops": 18.124095480828938, "iter_time": 1.1383240356445312, "loss": 0.508279025554657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.980358893987525, "step_time": 0.9386149520874024} +{"epoch": 0, "iter": 20927, "iter_tflops": 46.38132872344609, "iter_time": 0.44481462860107424, "loss": 0.443779319524765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.18074797311497, "step_time": 0.4111356315612793} +{"epoch": 0, "iter": 20928, "iter_tflops": 48.79756191373845, "iter_time": 0.4227894325256348, "loss": 0.48130154609680176, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.743559358484916, "step_time": 0.39115853691101077} +{"epoch": 0, "iter": 20929, "iter_tflops": 43.59910883365419, "iter_time": 0.47319989013671876, "loss": 0.016456885263323784, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.932181229868405, "step_time": 0.43042258834838865} +{"epoch": 0, "iter": 20930, "iter_tflops": 12.724682105187133, "iter_time": 1.6213445129394533, "loss": 0.003983090166002512, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.9197310246211, "step_time": 1.2959448547363281} +{"epoch": 0, "iter": 20931, "iter_tflops": 23.076332844872518, "iter_time": 0.8940369186401368, "loss": 0.004799178335815668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.74539042186765, "step_time": 0.7177183265686033} +{"epoch": 0, "iter": 20932, "iter_tflops": 43.58346050841854, "iter_time": 0.4733697891235352, "loss": 0.02435668744146824, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.391579948720654, "step_time": 0.42633643150329587} +{"epoch": 0, "iter": 20933, "iter_tflops": 16.56757316674151, "iter_time": 0.9294214248657227, "loss": 0.12153251469135284, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 17.797488030689607, "step_time": 0.8651927413940429} +{"epoch": 0, "iter": 20934, "iter_tflops": 12.420404616392624, "iter_time": 1.2397548980712891, "loss": 0.24188901484012604, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 14.68122529353498, "step_time": 1.0488400764465333} +{"epoch": 0, "iter": 20935, "iter_tflops": 24.554289353999167, "iter_time": 0.6271106948852538, "loss": 0.2232029139995575, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.434819513967277, "step_time": 0.5824990577697753} +{"epoch": 0, "iter": 20936, "iter_tflops": 24.73857819849141, "iter_time": 0.6224390640258789, "loss": 0.21642720699310303, "lr": 3e-05, "seqlen": 6160.0, "step_tflops": 26.539558899020413, "step_time": 0.5802002029418946} +{"epoch": 0, "iter": 20937, "iter_tflops": 14.650763938658356, "iter_time": 1.408192337036133, "loss": 0.2975163459777832, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.763133187030789, "step_time": 1.3088193359375002} +{"epoch": 0, "iter": 20938, "iter_tflops": 15.936680028499495, "iter_time": 1.294566589355469, "loss": 0.3677535057067871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.41886920723288, "step_time": 1.0624250717163086} +{"epoch": 0, "iter": 20939, "iter_tflops": 34.72299766492036, "iter_time": 0.594162223815918, "loss": 0.39507049322128296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.98161099924194, "step_time": 0.543186372756958} +{"epoch": 0, "iter": 20940, "iter_tflops": 40.26917183045445, "iter_time": 0.5123297195434571, "loss": 0.3743888735771179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.056162552707924, "step_time": 0.46829075241088863} +{"epoch": 0, "iter": 20941, "iter_tflops": 22.371527222243532, "iter_time": 0.9222031784057618, "loss": 0.6307398080825806, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.95119211135832, "step_time": 0.8613806533813476} +{"epoch": 0, "iter": 20942, "iter_tflops": 22.681864250261597, "iter_time": 0.9095854415893554, "loss": 0.4906098544597626, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.409046681081577, "step_time": 0.7527110939025878} +{"epoch": 0, "iter": 20943, "iter_tflops": 44.08851218801935, "iter_time": 0.4679471473693848, "loss": 0.5607302188873291, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.4999619124408, "step_time": 0.43433915901184084} +{"epoch": 0, "iter": 20944, "iter_tflops": 43.87209181217635, "iter_time": 0.47025552368164064, "loss": 0.5865294933319092, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.90241456964362, "step_time": 0.43987273788452147} +{"epoch": 0, "iter": 20945, "iter_tflops": 27.752816928413658, "iter_time": 0.7433873672485352, "loss": 0.13665397465229034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.446697750582853, "step_time": 0.7006250305175781} +{"epoch": 0, "iter": 20946, "iter_tflops": 14.88761790071094, "iter_time": 1.385788757324219, "loss": 0.06468936800956726, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.283060944072634, "step_time": 1.1284266662597657} +{"epoch": 0, "iter": 20947, "iter_tflops": 42.91018299193315, "iter_time": 0.4807971458435059, "loss": 0.11708182841539383, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.658962147331536, "step_time": 0.364127628326416} +{"epoch": 0, "iter": 20948, "iter_tflops": 53.372936370839604, "iter_time": 0.386545970916748, "loss": 0.07526490837335587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.90767486574333, "step_time": 0.35627563285827635} +{"epoch": 0, "iter": 20949, "iter_tflops": 38.725342627733156, "iter_time": 0.5327543182373047, "loss": 0.44944170117378235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.68282412371179, "step_time": 0.49495431137084966} +{"epoch": 0, "iter": 20950, "iter_tflops": 9.38426303606763, "iter_time": 2.198477752685547, "loss": 0.5443102121353149, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.956736173035981, "step_time": 1.882959777832031} +{"epoch": 0, "iter": 20951, "iter_tflops": 10.817997129532888, "iter_time": 1.9071084289550781, "loss": 0.532454252243042, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.141128340270207, "step_time": 1.5699636268615724} +{"epoch": 0, "iter": 20952, "iter_tflops": 31.50048860822788, "iter_time": 0.6549451904296875, "loss": 0.49632686376571655, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.968238186455025, "step_time": 0.44881192588806146} +{"epoch": 0, "iter": 20953, "iter_tflops": 23.563584354479676, "iter_time": 0.7492537307739258, "loss": 0.3196910619735718, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 25.014448141531933, "step_time": 0.7057962417602539} +{"epoch": 0, "iter": 20954, "iter_tflops": 8.932118494382731, "iter_time": 1.9765863494873048, "loss": 0.27900704741477966, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 10.442149514534396, "step_time": 1.6907537536621096} +{"epoch": 0, "iter": 20955, "iter_tflops": 19.735079742102158, "iter_time": 0.8946051254272461, "loss": 0.15240278840065002, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 24.358541645174284, "step_time": 0.7248013343811036} +{"epoch": 0, "iter": 20956, "iter_tflops": 32.853051293097735, "iter_time": 0.5373961563110351, "loss": 0.2940235137939453, "lr": 3e-05, "seqlen": 7040.0, "step_tflops": 34.90123660103908, "step_time": 0.5058589668273925} +{"epoch": 0, "iter": 20957, "iter_tflops": 19.917223502118002, "iter_time": 0.7177493743896484, "loss": 0.26886677742004395, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 21.193955682156936, "step_time": 0.6745118713378907} +{"epoch": 0, "iter": 20958, "iter_tflops": 10.323520832957088, "iter_time": 1.3847576751708988, "loss": 0.41145655512809753, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 11.761077640952152, "step_time": 1.215498710632324} +{"epoch": 0, "iter": 20959, "iter_tflops": 21.764822270265363, "iter_time": 0.6568201904296874, "loss": 0.2503200173377991, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 23.464413369792716, "step_time": 0.6092449226379395} +{"epoch": 0, "iter": 20960, "iter_tflops": 23.161236497167035, "iter_time": 0.6172198410034179, "loss": 0.13576796650886536, "lr": 3e-05, "seqlen": 5728.0, "step_tflops": 24.805533090143104, "step_time": 0.5763058853149414} +{"epoch": 0, "iter": 20961, "iter_tflops": 15.764027112078427, "iter_time": 1.3087451171875, "loss": 0.4372248947620392, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.52218680062275, "step_time": 1.2486902465820313} +{"epoch": 0, "iter": 20962, "iter_tflops": 17.025972872804743, "iter_time": 1.211742416381836, "loss": 0.47351598739624023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.663766209192026, "step_time": 0.9103117866516113} +{"epoch": 0, "iter": 20963, "iter_tflops": 47.46779424665809, "iter_time": 0.4346334991455078, "loss": 0.5314639806747437, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.48958028032805, "step_time": 0.40068482589721677} +{"epoch": 0, "iter": 20964, "iter_tflops": 47.45687392155091, "iter_time": 0.43473351287841794, "loss": 0.5096375942230225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.40598331694468, "step_time": 0.40133642387390134} +{"epoch": 0, "iter": 20965, "iter_tflops": 29.044390462777113, "iter_time": 0.7103297119140625, "loss": 0.07434213161468506, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.6677140924438, "step_time": 0.6727300720214844} +{"epoch": 0, "iter": 20966, "iter_tflops": 20.278567891901446, "iter_time": 1.0173841476440428, "loss": 0.11249491572380066, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.969768863395696, "step_time": 0.8981846370697022} +{"epoch": 0, "iter": 20967, "iter_tflops": 38.4410046456241, "iter_time": 0.5366949615478516, "loss": 0.0783797949552536, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.28355263451723, "step_time": 0.4879224243164062} +{"epoch": 0, "iter": 20968, "iter_tflops": 39.903904060376426, "iter_time": 0.5170194244384766, "loss": 0.06991168856620789, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80845113462508, "step_time": 0.47093866539001467} +{"epoch": 0, "iter": 20969, "iter_tflops": 33.68498177027679, "iter_time": 0.6124715652465821, "loss": 0.08339593559503555, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.70051181196298, "step_time": 0.5621472969055176} +{"epoch": 0, "iter": 20970, "iter_tflops": 11.852687561308947, "iter_time": 1.740625778198242, "loss": 0.09786327928304672, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.109299416506047, "step_time": 1.5737754440307616} +{"epoch": 0, "iter": 20971, "iter_tflops": 18.067831804354363, "iter_time": 1.1418688049316408, "loss": 0.15054422616958618, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.233656592948666, "step_time": 0.9716222648620606} +{"epoch": 0, "iter": 20972, "iter_tflops": 37.547772377399944, "iter_time": 0.549462516784668, "loss": 0.08791166543960571, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.107318884607956, "step_time": 0.5018837051391601} +{"epoch": 0, "iter": 20973, "iter_tflops": 17.50123726529659, "iter_time": 0.9102445373535157, "loss": 0.16691704094409943, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 18.926675000221792, "step_time": 0.8416906623840333} +{"epoch": 0, "iter": 20974, "iter_tflops": 13.041653497899992, "iter_time": 1.221501983642578, "loss": 0.19607189297676086, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 15.422626933513927, "step_time": 1.0329242668151855} +{"epoch": 0, "iter": 20975, "iter_tflops": 24.51321916284404, "iter_time": 0.6498699951171876, "loss": 0.12971378862857819, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.355686903304754, "step_time": 0.604439022064209} +{"epoch": 0, "iter": 20976, "iter_tflops": 24.905969989603417, "iter_time": 0.6396219711303711, "loss": 0.18790410459041595, "lr": 3e-05, "seqlen": 6368.0, "step_tflops": 26.781869522325977, "step_time": 0.5948205223083496} +{"epoch": 0, "iter": 20977, "iter_tflops": 23.90579851030525, "iter_time": 0.8630162887573243, "loss": 0.5235177278518677, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.626041753190666, "step_time": 0.8050831146240234} +{"epoch": 0, "iter": 20978, "iter_tflops": 8.957748094660973, "iter_time": 2.30315625, "loss": 0.47473832964897156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.78267991396868, "step_time": 2.1089408721923824} +{"epoch": 0, "iter": 20979, "iter_tflops": 11.02540224521422, "iter_time": 1.871232727050781, "loss": 0.5425751805305481, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.52352800159636, "step_time": 1.5255703620910646} +{"epoch": 0, "iter": 20980, "iter_tflops": 44.062961177604706, "iter_time": 0.4682184982299805, "loss": 0.4796413481235504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.5364951901409, "step_time": 0.4340053558349609} +{"epoch": 0, "iter": 20981, "iter_tflops": 20.667168022932245, "iter_time": 0.7608995513916016, "loss": 0.23035314679145813, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 21.798904653213683, "step_time": 0.7213958282470703} +{"epoch": 0, "iter": 20982, "iter_tflops": 9.85748298591656, "iter_time": 1.595299621582031, "loss": 0.20731639862060547, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 12.48355568907823, "step_time": 1.2597083129882813} +{"epoch": 0, "iter": 20983, "iter_tflops": 22.61573795119935, "iter_time": 0.6953405151367188, "loss": 0.15538989007472992, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 24.58038267562268, "step_time": 0.6397637939453126} +{"epoch": 0, "iter": 20984, "iter_tflops": 23.685778417997255, "iter_time": 0.6639274673461915, "loss": 0.2360057681798935, "lr": 3e-05, "seqlen": 6288.0, "step_tflops": 25.472437679381287, "step_time": 0.6173590087890625} +{"epoch": 0, "iter": 20985, "iter_tflops": 1.2866258654010119, "iter_time": 1.248644973754883, "loss": 0.2074863612651825, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.373739770693104, "step_time": 1.1694637908935548} +{"epoch": 0, "iter": 20986, "iter_tflops": 1.3394770969432135, "iter_time": 1.199377670288086, "loss": 0.21501535177230835, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 1.6056606650069813, "step_time": 1.000546974182129} +{"epoch": 0, "iter": 20987, "iter_tflops": 3.6162357931692286, "iter_time": 0.4442572364807128, "loss": 0.2711179554462433, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 3.9219781966540466, "step_time": 0.4096246433258056} +{"epoch": 0, "iter": 20988, "iter_tflops": 3.7272949336382246, "iter_time": 0.43102006912231444, "loss": 0.22350655496120453, "lr": 3e-05, "seqlen": 656.0, "step_tflops": 4.044721423176309, "step_time": 0.3971939601898193} +{"epoch": 0, "iter": 20989, "iter_tflops": 27.60979367854096, "iter_time": 0.7472382354736328, "loss": 0.6722401976585388, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.206014856495692, "step_time": 0.7063987884521485} +{"epoch": 0, "iter": 20990, "iter_tflops": 16.65021972235538, "iter_time": 1.2390883636474608, "loss": 0.46233999729156494, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.20333990215339, "step_time": 0.9730114974975586} +{"epoch": 0, "iter": 20991, "iter_tflops": 33.794240501665, "iter_time": 0.6104914093017578, "loss": 0.4663352966308594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.67796446559501, "step_time": 0.5624928703308105} +{"epoch": 0, "iter": 20992, "iter_tflops": 34.26129176927732, "iter_time": 0.6021691665649413, "loss": 0.4193454384803772, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.1452144930802, "step_time": 0.5554172668457031} +{"epoch": 0, "iter": 20993, "iter_tflops": 35.545033868875066, "iter_time": 0.5804212646484375, "loss": 0.15099577605724335, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.28473521075037, "step_time": 0.5251681957244874} +{"epoch": 0, "iter": 20994, "iter_tflops": 45.35954718466123, "iter_time": 0.45483464431762693, "loss": 0.10427581518888474, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.349038349055014, "step_time": 0.40976142120361325} +{"epoch": 0, "iter": 20995, "iter_tflops": 48.243124523836194, "iter_time": 0.42764836883544927, "loss": 0.16705463826656342, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.51659303509351, "step_time": 0.39284904670715326} +{"epoch": 0, "iter": 20996, "iter_tflops": 46.57049348137626, "iter_time": 0.44300783538818356, "loss": 0.10286752134561539, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.763248439706615, "step_time": 0.4064179134368897} +{"epoch": 0, "iter": 20997, "iter_tflops": 38.81783445285152, "iter_time": 0.5314849166870118, "loss": 0.11349222809076309, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.83277584419003, "step_time": 0.49318012237548825} +{"epoch": 0, "iter": 20998, "iter_tflops": 29.32042356853325, "iter_time": 0.7036424102783204, "loss": 0.10076470673084259, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.85141792487771, "step_time": 0.5754610195159913} +{"epoch": 0, "iter": 20999, "iter_tflops": 51.68827655989244, "iter_time": 0.3991445426940918, "loss": 0.1425371915102005, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.47733527084606, "step_time": 0.3652986354827881} +{"epoch": 0, "iter": 21000, "iter_tflops": 4.402773674128128, "iter_time": 4.685930969238282, "loss": 0.13150376081466675, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 4.431366888569848, "step_time": 4.655695190429688} +{"epoch": 0, "iter": 21001, "iter_tflops": 32.49035098082957, "iter_time": 0.6349914016723632, "loss": 0.6165488958358765, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.97324094354331, "step_time": 0.5899108276367186} +{"epoch": 0, "iter": 21002, "iter_tflops": 17.69235861992833, "iter_time": 1.1661019287109375, "loss": 0.6556260585784912, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.506223486520888, "step_time": 0.841871597290039} +{"epoch": 0, "iter": 21003, "iter_tflops": 18.590541842493117, "iter_time": 1.1097628936767578, "loss": 0.4500350058078766, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.633076022260823, "step_time": 0.9536828460693358} +{"epoch": 0, "iter": 21004, "iter_tflops": 24.143861834801495, "iter_time": 0.8545067749023437, "loss": 0.5084056854248047, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.117549922335897, "step_time": 0.7608022689819336} +{"epoch": 0, "iter": 21005, "iter_tflops": 7.353133445344059, "iter_time": 2.0496266174316404, "loss": 0.21770727634429932, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 8.1113179522591, "step_time": 1.8580430603027345} +{"epoch": 0, "iter": 21006, "iter_tflops": 16.91744163442348, "iter_time": 0.8908662643432617, "loss": 0.35402223467826843, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 17.899688365259696, "step_time": 0.8419799118041991} +{"epoch": 0, "iter": 21007, "iter_tflops": 15.86808056226994, "iter_time": 0.9497795257568358, "loss": 0.1599501371383667, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.521738902364326, "step_time": 0.8137021102905274} +{"epoch": 0, "iter": 21008, "iter_tflops": 16.28208587023265, "iter_time": 0.9256294403076172, "loss": 0.1662527620792389, "lr": 3e-05, "seqlen": 6032.0, "step_tflops": 18.29244048735059, "step_time": 0.823901985168457} +{"epoch": 0, "iter": 21009, "iter_tflops": 9.4827945791279, "iter_time": 2.1756343383789063, "loss": 0.11398515105247498, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.771528717490193, "step_time": 2.1113475799560546} +{"epoch": 0, "iter": 21010, "iter_tflops": 25.88268574881012, "iter_time": 0.7971001815795898, "loss": 0.09379222989082336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.196172242946638, "step_time": 0.6832353897094727} +{"epoch": 0, "iter": 21011, "iter_tflops": 24.687819105450252, "iter_time": 0.8356790618896486, "loss": 0.1347343474626541, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.883106570347827, "step_time": 0.7399137344360351} +{"epoch": 0, "iter": 21012, "iter_tflops": 26.530301546781757, "iter_time": 0.7776426315307619, "loss": 0.07379647344350815, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.99949244105519, "step_time": 0.6877147521972655} +{"epoch": 0, "iter": 21013, "iter_tflops": 8.044208089314013, "iter_time": 2.564714050292969, "loss": 0.4606561064720154, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.349795386319567, "step_time": 2.4708501892089845} +{"epoch": 0, "iter": 21014, "iter_tflops": 36.45847034819606, "iter_time": 0.5658792953491211, "loss": 0.5365384817123413, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.7671472159605, "step_time": 0.4939550552368164} +{"epoch": 0, "iter": 21015, "iter_tflops": 25.03938089913284, "iter_time": 0.8239458312988283, "loss": 0.45584943890571594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.116141360479528, "step_time": 0.7608417892456054} +{"epoch": 0, "iter": 21016, "iter_tflops": 27.36476863425411, "iter_time": 0.7539290313720702, "loss": 0.5591074228286743, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.33340770610901, "step_time": 0.6380735893249512} +{"epoch": 0, "iter": 21017, "iter_tflops": 8.455864741676095, "iter_time": 2.439856140136719, "loss": 0.3006646931171417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.923309619978927, "step_time": 2.3120450134277344} +{"epoch": 0, "iter": 21018, "iter_tflops": 32.809701353528176, "iter_time": 0.6288107681274414, "loss": 0.160167396068573, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.78706274372118, "step_time": 0.5459829902648926} +{"epoch": 0, "iter": 21019, "iter_tflops": 32.18261860409889, "iter_time": 0.641063232421875, "loss": 0.32877442240715027, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.0860374051728, "step_time": 0.5880143508911133} +{"epoch": 0, "iter": 21020, "iter_tflops": 35.879926970078785, "iter_time": 0.5750037765502929, "loss": 0.2059934139251709, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.40525717463289, "step_time": 0.5371945152282714} +{"epoch": 0, "iter": 21021, "iter_tflops": 10.9891586549399, "iter_time": 1.877404281616211, "loss": 0.3282265067100525, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.259926719174134, "step_time": 1.8322582397460938} +{"epoch": 0, "iter": 21022, "iter_tflops": 21.80799481401473, "iter_time": 0.9460334930419922, "loss": 0.3200327157974243, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.496593039123837, "step_time": 0.8091706008911131} +{"epoch": 0, "iter": 21023, "iter_tflops": 39.72806280264382, "iter_time": 0.5193078155517579, "loss": 0.38415175676345825, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.488886943452854, "step_time": 0.45354140090942385} +{"epoch": 0, "iter": 21024, "iter_tflops": 37.03920604877194, "iter_time": 0.5570069046020507, "loss": 0.27401503920555115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.450200761678204, "step_time": 0.5229654884338379} +{"epoch": 0, "iter": 21025, "iter_tflops": 9.184605457740176, "iter_time": 2.2462688903808594, "loss": 0.14220203459262848, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.032899853254431, "step_time": 2.056344009399414} +{"epoch": 0, "iter": 21026, "iter_tflops": 23.72109246871349, "iter_time": 0.869736228942871, "loss": 0.18112727999687195, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.12948111417707, "step_time": 0.7604676780700683} +{"epoch": 0, "iter": 21027, "iter_tflops": 30.560358172616308, "iter_time": 0.6750933151245118, "loss": 0.1887471079826355, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.1099974243972, "step_time": 0.5713402099609375} +{"epoch": 0, "iter": 21028, "iter_tflops": 32.860501885808645, "iter_time": 0.6278386611938477, "loss": 0.15363360941410065, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.84251727647871, "step_time": 0.592124080657959} +{"epoch": 0, "iter": 21029, "iter_tflops": 20.7026031939256, "iter_time": 0.9965458602905275, "loss": 0.1546643078327179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.767704938635745, "step_time": 0.9477845077514648} +{"epoch": 0, "iter": 21030, "iter_tflops": 22.26863132787579, "iter_time": 0.9264643707275391, "loss": 0.1117144450545311, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.74872710942209, "step_time": 0.7712925338745117} +{"epoch": 0, "iter": 21031, "iter_tflops": 26.564347491547828, "iter_time": 0.7766459732055664, "loss": 0.08822352439165115, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.535518194412692, "step_time": 0.6985180816650389} +{"epoch": 0, "iter": 21032, "iter_tflops": 38.054136664953795, "iter_time": 0.5421511383056641, "loss": 0.1301182508468628, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.023402240651805, "step_time": 0.4795318927764892} +{"epoch": 0, "iter": 21033, "iter_tflops": 9.868663276041596, "iter_time": 2.0905661621093747, "loss": 0.5345291495323181, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.205017443787845, "step_time": 2.0216617584228516} +{"epoch": 0, "iter": 21034, "iter_tflops": 25.813247991969302, "iter_time": 0.799244384765625, "loss": 0.5661217570304871, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.21354501339569, "step_time": 0.7062167053222657} +{"epoch": 0, "iter": 21035, "iter_tflops": 25.684358354014524, "iter_time": 0.8032551651000976, "loss": 0.3879903256893158, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.285688897296673, "step_time": 0.7044769744873046} +{"epoch": 0, "iter": 21036, "iter_tflops": 30.734185564731906, "iter_time": 0.6712751007080078, "loss": 0.49767595529556274, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.975071278233365, "step_time": 0.5734830474853515} +{"epoch": 0, "iter": 21037, "iter_tflops": 8.293550005483713, "iter_time": 2.4876070556640624, "loss": 0.12113610655069351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.441718835638602, "step_time": 2.4439446411132812} +{"epoch": 0, "iter": 21038, "iter_tflops": 19.393448188700116, "iter_time": 1.0638177032470701, "loss": 0.11984867602586746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.139854168504826, "step_time": 0.9318531799316405} +{"epoch": 0, "iter": 21039, "iter_tflops": 23.73816799730218, "iter_time": 0.8691106033325197, "loss": 0.11818841099739075, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.799250167061686, "step_time": 0.7421456832885743} +{"epoch": 0, "iter": 21040, "iter_tflops": 38.08652537472315, "iter_time": 0.5416900939941407, "loss": 0.102143794298172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.62931933251312, "step_time": 0.49559045982360844} +{"epoch": 0, "iter": 21041, "iter_tflops": 7.807762638130662, "iter_time": 2.642382263183594, "loss": 0.42448070645332336, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.332156367900783, "step_time": 2.4760809326171875} +{"epoch": 0, "iter": 21042, "iter_tflops": 21.998963917426913, "iter_time": 0.937821144104004, "loss": 0.545726478099823, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.280457440997274, "step_time": 0.8160886154174806} +{"epoch": 0, "iter": 21043, "iter_tflops": 29.25475946476404, "iter_time": 0.7052217788696289, "loss": 0.417966365814209, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.00316898025152, "step_time": 0.5894064483642578} +{"epoch": 0, "iter": 21044, "iter_tflops": 40.75446416704991, "iter_time": 0.50622904586792, "loss": 0.5512380003929138, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.80083881882639, "step_time": 0.47102051162719727} +{"epoch": 0, "iter": 21045, "iter_tflops": 11.696456328603157, "iter_time": 1.7638755645751953, "loss": 0.6041256189346313, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.017409438766318, "step_time": 1.5848847351074218} +{"epoch": 0, "iter": 21046, "iter_tflops": 19.75152699459393, "iter_time": 1.0445315704345703, "loss": 0.6770817041397095, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.944130456620417, "step_time": 0.8991882934570313} +{"epoch": 0, "iter": 21047, "iter_tflops": 20.73281562678003, "iter_time": 0.9950936660766602, "loss": 0.49666717648506165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.381413730308658, "step_time": 0.8461811828613282} +{"epoch": 0, "iter": 21048, "iter_tflops": 22.823421470359865, "iter_time": 0.9039439392089845, "loss": 0.4815070331096649, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.366503285955396, "step_time": 0.7824736289978027} +{"epoch": 0, "iter": 21049, "iter_tflops": 8.365826625353774, "iter_time": 2.4661153564453127, "loss": 0.50271075963974, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 8.992222691143537, "step_time": 2.2943263549804684} +{"epoch": 0, "iter": 21050, "iter_tflops": 23.630580421533416, "iter_time": 0.8730675735473633, "loss": 0.41015157103538513, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.061320105123276, "step_time": 0.7352146453857422} +{"epoch": 0, "iter": 21051, "iter_tflops": 23.65367999757245, "iter_time": 0.872214958190918, "loss": 0.3465832769870758, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.117459095594104, "step_time": 0.7608048171997072} +{"epoch": 0, "iter": 21052, "iter_tflops": 29.38973597579805, "iter_time": 0.7019829483032227, "loss": 0.3681216239929199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.02030182285898, "step_time": 0.5891180953979491} +{"epoch": 0, "iter": 21053, "iter_tflops": 28.900756048804606, "iter_time": 0.713859992980957, "loss": 0.4247487783432007, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.938619054334175, "step_time": 0.6668395080566406} +{"epoch": 0, "iter": 21054, "iter_tflops": 11.915307197243646, "iter_time": 1.7314781036376952, "loss": 0.7021241188049316, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.893240797917159, "step_time": 1.6001480026245118} +{"epoch": 0, "iter": 21055, "iter_tflops": 12.719841795158045, "iter_time": 1.6219614868164063, "loss": 0.5369047522544861, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.386732140358749, "step_time": 1.3408365936279296} +{"epoch": 0, "iter": 21056, "iter_tflops": 25.821043954411266, "iter_time": 0.799003074645996, "loss": 0.49674832820892334, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.757220435893018, "step_time": 0.6707723655700684} +{"epoch": 0, "iter": 21057, "iter_tflops": 25.56382878642999, "iter_time": 0.7099641952514648, "loss": 0.2370193600654602, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 27.85352330520949, "step_time": 0.6516016998291015} +{"epoch": 0, "iter": 21058, "iter_tflops": 27.947137192485133, "iter_time": 0.6494190444946288, "loss": 0.10916408151388168, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 30.10764992747022, "step_time": 0.6028169975280762} +{"epoch": 0, "iter": 21059, "iter_tflops": 27.514897692733427, "iter_time": 0.6596209564208986, "loss": 0.19198352098464966, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 29.60108513616965, "step_time": 0.6131330337524414} +{"epoch": 0, "iter": 21060, "iter_tflops": 29.96465435455284, "iter_time": 0.6056937255859376, "loss": 0.19757741689682007, "lr": 3e-05, "seqlen": 7232.0, "step_tflops": 32.11402114190425, "step_time": 0.5651551094055175} +{"epoch": 0, "iter": 21061, "iter_tflops": 22.094740433954257, "iter_time": 0.9337558670043945, "loss": 0.5354815125465393, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.52476773057561, "step_time": 0.8769945678710938} +{"epoch": 0, "iter": 21062, "iter_tflops": 8.51600537958753, "iter_time": 2.422625701904297, "loss": 0.4642099142074585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.42760323435397, "step_time": 1.9785077209472657} +{"epoch": 0, "iter": 21063, "iter_tflops": 13.108374995927361, "iter_time": 1.5738864288330079, "loss": 0.5849160552024841, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.613960456097082, "step_time": 1.2417926216125486} +{"epoch": 0, "iter": 21064, "iter_tflops": 36.73523669507628, "iter_time": 0.5616159133911133, "loss": 0.4806285798549652, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.90099892665523, "step_time": 0.5170570678710937} +{"epoch": 0, "iter": 21065, "iter_tflops": 20.276533582483918, "iter_time": 0.7150884857177735, "loss": 0.145009845495224, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.095498200880435, "step_time": 0.6562203559875489} +{"epoch": 0, "iter": 21066, "iter_tflops": 22.140209075016646, "iter_time": 0.6548951568603516, "loss": 0.12002269178628922, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 23.870761770648794, "step_time": 0.6074173851013183} +{"epoch": 0, "iter": 21067, "iter_tflops": 21.37037204628939, "iter_time": 0.67848681640625, "loss": 0.23191764950752258, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.895126988074363, "step_time": 0.6333013877868652} +{"epoch": 0, "iter": 21068, "iter_tflops": 21.21684623010593, "iter_time": 0.683396369934082, "loss": 0.20419526100158691, "lr": 3e-05, "seqlen": 5808.0, "step_tflops": 22.748034699441398, "step_time": 0.6373964118957519} +{"epoch": 0, "iter": 21069, "iter_tflops": 20.533396015451896, "iter_time": 1.0047579803466797, "loss": 0.18460048735141754, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.154039553778578, "step_time": 0.9312565078735352} +{"epoch": 0, "iter": 21070, "iter_tflops": 26.40739300881661, "iter_time": 0.7812620315551757, "loss": 0.17317408323287964, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.64203195782129, "step_time": 0.696008071899414} +{"epoch": 0, "iter": 21071, "iter_tflops": 51.0662472888529, "iter_time": 0.40400645446777345, "loss": 0.18890339136123657, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.673119566965866, "step_time": 0.37057548904418947} +{"epoch": 0, "iter": 21072, "iter_tflops": 51.80936140914564, "iter_time": 0.39821169281005864, "loss": 0.25842511653900146, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.18903368006787, "step_time": 0.3671729545593262} +{"epoch": 0, "iter": 21073, "iter_tflops": 33.10883078313689, "iter_time": 0.6231296310424804, "loss": 0.18016371130943298, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.08652474193563, "step_time": 0.5880061836242676} +{"epoch": 0, "iter": 21074, "iter_tflops": 14.1075186541766, "iter_time": 1.4624183044433596, "loss": 0.19323749840259552, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.249831305591346, "step_time": 1.2696189346313478} +{"epoch": 0, "iter": 21075, "iter_tflops": 47.963757996770646, "iter_time": 0.43013922119140624, "loss": 0.19620612263679504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.16439490632579, "step_time": 0.38089770126342776} +{"epoch": 0, "iter": 21076, "iter_tflops": 49.36590826980766, "iter_time": 0.4179218864440918, "loss": 0.21980690956115723, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.42561873542295, "step_time": 0.3861648025512695} +{"epoch": 0, "iter": 21077, "iter_tflops": 26.349479083747084, "iter_time": 0.7829791793823242, "loss": 0.12567873299121857, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.825980690042545, "step_time": 0.7414327545166015} +{"epoch": 0, "iter": 21078, "iter_tflops": 14.82842278073594, "iter_time": 1.391320831298828, "loss": 0.14606249332427979, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.304958949896957, "step_time": 1.0686939849853516} +{"epoch": 0, "iter": 21079, "iter_tflops": 46.298945095953826, "iter_time": 0.44560612487792967, "loss": 0.1416274756193161, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.21006920207212, "step_time": 0.4108955402374268} +{"epoch": 0, "iter": 21080, "iter_tflops": 49.57940264948945, "iter_time": 0.41612226867675783, "loss": 0.12768103182315826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.64571229988279, "step_time": 0.38458047485351565} +{"epoch": 0, "iter": 21081, "iter_tflops": 42.248405906751636, "iter_time": 0.4794916305541992, "loss": 0.07638391107320786, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 46.14872863594354, "step_time": 0.43896674156188964} +{"epoch": 0, "iter": 21082, "iter_tflops": 10.523026607853893, "iter_time": 1.9250884552001954, "loss": 0.06906628608703613, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 11.380299769758928, "step_time": 1.7800723571777346} +{"epoch": 0, "iter": 21083, "iter_tflops": 13.070014360903343, "iter_time": 1.5499414520263675, "loss": 0.08521917462348938, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 18.414010475727412, "step_time": 1.1001273765563966} +{"epoch": 0, "iter": 21084, "iter_tflops": 21.637605030630272, "iter_time": 0.9362291717529296, "loss": 0.12683218717575073, "lr": 3e-05, "seqlen": 8048.0, "step_tflops": 26.863554801853592, "step_time": 0.7540981521606446} +{"epoch": 0, "iter": 21085, "iter_tflops": 12.453546006135927, "iter_time": 1.2528829498291014, "loss": 0.1530185043811798, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 12.941978188714444, "step_time": 1.2055989608764648} +{"epoch": 0, "iter": 21086, "iter_tflops": 13.467403683546895, "iter_time": 1.15856298828125, "loss": 0.22154167294502258, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 15.759767131910912, "step_time": 0.9900422592163086} +{"epoch": 0, "iter": 21087, "iter_tflops": 27.68033621899071, "iter_time": 0.5636794052124025, "loss": 0.13669584691524506, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.54522357645193, "step_time": 0.52810009765625} +{"epoch": 0, "iter": 21088, "iter_tflops": 28.9657646332269, "iter_time": 0.5386647186279296, "loss": 0.2745157480239868, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 30.71973533297117, "step_time": 0.5079091758728027} +{"epoch": 0, "iter": 21089, "iter_tflops": 26.882150196905616, "iter_time": 0.7674644088745117, "loss": 0.04927800968289375, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.31981564129549, "step_time": 0.7285038070678711} +{"epoch": 0, "iter": 21090, "iter_tflops": 12.23358338396851, "iter_time": 1.686430938720703, "loss": 0.06358268111944199, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.512439524119426, "step_time": 1.2494273471832273} +{"epoch": 0, "iter": 21091, "iter_tflops": 41.97115700823829, "iter_time": 0.4915540809631348, "loss": 0.06839035451412201, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.952825973721765, "step_time": 0.44896245384216305} +{"epoch": 0, "iter": 21092, "iter_tflops": 39.182976363337886, "iter_time": 0.5265320663452149, "loss": 0.05033925920724869, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.85540296588651, "step_time": 0.48141172599792476} +{"epoch": 0, "iter": 21093, "iter_tflops": 25.68433591040442, "iter_time": 0.8032558670043946, "loss": 0.2687048017978668, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.649623252909215, "step_time": 0.7461618309020996} +{"epoch": 0, "iter": 21094, "iter_tflops": 10.49046757033791, "iter_time": 1.9666514739990235, "loss": 0.15392471849918365, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 11.813262435208651, "step_time": 1.746434875488281} +{"epoch": 0, "iter": 21095, "iter_tflops": 24.535668522768738, "iter_time": 0.8408612747192382, "loss": 0.2814255654811859, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.421210161859722, "step_time": 0.6781812229156494} +{"epoch": 0, "iter": 21096, "iter_tflops": 38.127832081142486, "iter_time": 0.5411032409667969, "loss": 0.18605273962020874, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.74811965977225, "step_time": 0.49418018531799324} +{"epoch": 0, "iter": 21097, "iter_tflops": 17.041135520299736, "iter_time": 0.8652240676879883, "loss": 0.3570629060268402, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 18.69341278061411, "step_time": 0.7887484626770019} +{"epoch": 0, "iter": 21098, "iter_tflops": 22.30973090151273, "iter_time": 0.6608954925537108, "loss": 0.13757051527500153, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.093509741383105, "step_time": 0.6119656600952148} +{"epoch": 0, "iter": 21099, "iter_tflops": 23.156299238617663, "iter_time": 0.6367338943481445, "loss": 0.22005435824394226, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 24.92018415861477, "step_time": 0.5916649932861328} +{"epoch": 0, "iter": 21100, "iter_tflops": 22.21006278628277, "iter_time": 0.6638612747192384, "loss": 0.18385322391986847, "lr": 3e-05, "seqlen": 5904.0, "step_tflops": 23.9374030738405, "step_time": 0.6159565658569336} +{"epoch": 0, "iter": 21101, "iter_tflops": 31.501328570500075, "iter_time": 0.6549277267456055, "loss": 0.37574419379234314, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.88720698089453, "step_time": 0.5913655834197998} +{"epoch": 0, "iter": 21102, "iter_tflops": 36.18590273754102, "iter_time": 0.5701417388916017, "loss": 0.28801414370536804, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.628138976881, "step_time": 0.5206172695159912} +{"epoch": 0, "iter": 21103, "iter_tflops": 34.69458418481167, "iter_time": 0.5946488189697265, "loss": 0.28663480281829834, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.574939399710615, "step_time": 0.5490652503967285} +{"epoch": 0, "iter": 21104, "iter_tflops": 41.80312918193766, "iter_time": 0.49352988433837897, "loss": 0.278586208820343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.779223824385255, "step_time": 0.45066499137878413} +{"epoch": 0, "iter": 21105, "iter_tflops": 18.960388635439738, "iter_time": 1.088115539550781, "loss": 0.5977446436882019, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.277599711019132, "step_time": 1.0174327239990235} +{"epoch": 0, "iter": 21106, "iter_tflops": 23.47133429510042, "iter_time": 0.878991081237793, "loss": 0.4739087224006653, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.88903127496945, "step_time": 0.6469652004241943} +{"epoch": 0, "iter": 21107, "iter_tflops": 38.78203293355948, "iter_time": 0.5319755554199219, "loss": 0.6646826267242432, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.111731082069035, "step_time": 0.4899132137298584} +{"epoch": 0, "iter": 21108, "iter_tflops": 36.10120143677549, "iter_time": 0.5714794158935546, "loss": 0.5756214261054993, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.247306931340674, "step_time": 0.525669023513794} +{"epoch": 0, "iter": 21109, "iter_tflops": 18.347068528030345, "iter_time": 1.1244899139404296, "loss": 0.3000868856906891, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.43157840376936, "step_time": 1.0617301940917967} +{"epoch": 0, "iter": 21110, "iter_tflops": 20.688753592970745, "iter_time": 0.9972129745483398, "loss": 0.2863515615463257, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.869398455955835, "step_time": 0.7402776756286621} +{"epoch": 0, "iter": 21111, "iter_tflops": 49.13310841649987, "iter_time": 0.4199020614624024, "loss": 0.2226896435022354, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.21449450926822, "step_time": 0.38769688034057614} +{"epoch": 0, "iter": 21112, "iter_tflops": 43.42276718567684, "iter_time": 0.4751215744018555, "loss": 0.31671005487442017, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.76574471114133, "step_time": 0.4411582374572754} +{"epoch": 0, "iter": 21113, "iter_tflops": 29.834912490936222, "iter_time": 0.6915084304809571, "loss": 0.027193685993552208, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.634390614659804, "step_time": 0.6521729393005371} +{"epoch": 0, "iter": 21114, "iter_tflops": 16.886164906413892, "iter_time": 1.2217749633789063, "loss": 0.024191897362470627, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.56266118800929, "step_time": 1.003327989578247} +{"epoch": 0, "iter": 21115, "iter_tflops": 45.761757027885956, "iter_time": 0.4508370056152343, "loss": 0.045819301158189774, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.55862177659677, "step_time": 0.40806281471252437} +{"epoch": 0, "iter": 21116, "iter_tflops": 44.45939124183133, "iter_time": 0.46404354476928716, "loss": 0.0356123112142086, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.04624950542363, "step_time": 0.4206456909179688} +{"epoch": 0, "iter": 21117, "iter_tflops": 23.302541876011464, "iter_time": 0.8853580703735352, "loss": 0.014223619364202023, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.217956772212393, "step_time": 0.8181112251281738} +{"epoch": 0, "iter": 21118, "iter_tflops": 19.99604619553286, "iter_time": 1.0317586441040039, "loss": 0.03710734099149704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.30325203830349, "step_time": 0.7556276988983155} +{"epoch": 0, "iter": 21119, "iter_tflops": 53.10287974238122, "iter_time": 0.3885117645263672, "loss": 0.03879006952047348, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.983451200348526, "step_time": 0.3558100299835205} +{"epoch": 0, "iter": 21120, "iter_tflops": 52.351027979335726, "iter_time": 0.3940914688110351, "loss": 0.022701948881149292, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.40296215148697, "step_time": 0.3594081687927246} +{"epoch": 0, "iter": 21121, "iter_tflops": 41.9345364400762, "iter_time": 0.49198334503173824, "loss": 0.35474297404289246, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.72068145147841, "step_time": 0.45124203872680657} +{"epoch": 0, "iter": 21122, "iter_tflops": 41.62145372238144, "iter_time": 0.4956841163635254, "loss": 0.29433128237724304, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.05342389406014, "step_time": 0.429336597442627} +{"epoch": 0, "iter": 21123, "iter_tflops": 46.023538987506086, "iter_time": 0.44827264404296874, "loss": 0.279239684343338, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.959960957244036, "step_time": 0.41295255470275877} +{"epoch": 0, "iter": 21124, "iter_tflops": 38.9214453799522, "iter_time": 0.5300700759887695, "loss": 0.2863699197769165, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.685277037676, "step_time": 0.48333043479919435} +{"epoch": 0, "iter": 21125, "iter_tflops": 20.500210259330068, "iter_time": 1.0063844833374025, "loss": 0.4892166256904602, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.638759318112324, "step_time": 0.9113173217773437} +{"epoch": 0, "iter": 21126, "iter_tflops": 37.62468604134679, "iter_time": 0.5483392868041992, "loss": 0.4627601206302643, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.13621082691878, "step_time": 0.5015312080383301} +{"epoch": 0, "iter": 21127, "iter_tflops": 37.23233191757708, "iter_time": 0.5541176834106445, "loss": 0.47665631771087646, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.74187379709862, "step_time": 0.5063854846954345} +{"epoch": 0, "iter": 21128, "iter_tflops": 42.403341986383644, "iter_time": 0.48654404449462885, "loss": 0.575892984867096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.411490394661264, "step_time": 0.4445255546569824} +{"epoch": 0, "iter": 21129, "iter_tflops": 28.891016410091556, "iter_time": 0.7141006469726563, "loss": 0.5910105109214783, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.162468389629495, "step_time": 0.6620494003295899} +{"epoch": 0, "iter": 21130, "iter_tflops": 9.350031585490825, "iter_time": 2.206526611328125, "loss": 0.5311543941497803, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.249094919345941, "step_time": 1.684295341491699} +{"epoch": 0, "iter": 21131, "iter_tflops": 16.41777183476251, "iter_time": 1.2566317596435548, "loss": 0.4696517586708069, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.91451974328177, "step_time": 1.0907542877197265} +{"epoch": 0, "iter": 21132, "iter_tflops": 44.714259389817066, "iter_time": 0.4613985290527344, "loss": 0.7136642336845398, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.77799037118581, "step_time": 0.41446216201782227} +{"epoch": 0, "iter": 21133, "iter_tflops": 14.230658473292047, "iter_time": 1.0877974853515624, "loss": 0.13067321479320526, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 14.79889320812493, "step_time": 1.0460292053222655} +{"epoch": 0, "iter": 21134, "iter_tflops": 19.96304537413459, "iter_time": 0.7754365234375, "loss": 0.12882661819458008, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 24.114594557039997, "step_time": 0.6419379959106446} +{"epoch": 0, "iter": 21135, "iter_tflops": 28.010687236743138, "iter_time": 0.5526488647460938, "loss": 0.22070540487766266, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 29.828870124942895, "step_time": 0.518962818145752} +{"epoch": 0, "iter": 21136, "iter_tflops": 27.000571404129953, "iter_time": 0.5733239593505859, "loss": 0.21285606920719147, "lr": 3e-05, "seqlen": 6192.0, "step_tflops": 28.5710197631408, "step_time": 0.5418103599548341} +{"epoch": 0, "iter": 21137, "iter_tflops": 34.587108373055166, "iter_time": 0.5964966278076173, "loss": 0.4095729887485504, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.248155225133885, "step_time": 0.5538822898864746} +{"epoch": 0, "iter": 21138, "iter_tflops": 44.577243713905496, "iter_time": 0.46281671524047846, "loss": 0.49271848797798157, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.377384654097526, "step_time": 0.42646153068542475} +{"epoch": 0, "iter": 21139, "iter_tflops": 51.00116519812292, "iter_time": 0.4045220031738281, "loss": 0.4463547468185425, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.08800178531683, "step_time": 0.3745115604400635} +{"epoch": 0, "iter": 21140, "iter_tflops": 51.53689847421989, "iter_time": 0.40031694030761716, "loss": 0.4594605267047882, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.87861976092165, "step_time": 0.36921265411376947} +{"epoch": 0, "iter": 21141, "iter_tflops": 29.980570189017243, "iter_time": 0.6881488037109375, "loss": 0.12312043458223343, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.959646607582087, "step_time": 0.6455357208251953} +{"epoch": 0, "iter": 21142, "iter_tflops": 11.395196598679467, "iter_time": 1.8105079040527343, "loss": 0.13090266287326813, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.719667152409068, "step_time": 1.5037604980468748} +{"epoch": 0, "iter": 21143, "iter_tflops": 11.878543783731098, "iter_time": 1.736836929321289, "loss": 0.14818856120109558, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.423401630205227, "step_time": 1.5369497299194337} +{"epoch": 0, "iter": 21144, "iter_tflops": 22.010501865273202, "iter_time": 0.9373295364379883, "loss": 0.13546694815158844, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 29.18106691392352, "step_time": 0.7070027141571045} +{"epoch": 0, "iter": 21145, "iter_tflops": 21.968703420843646, "iter_time": 0.7400654220581054, "loss": 0.19400344789028168, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 23.2349647847639, "step_time": 0.6997332649230957} +{"epoch": 0, "iter": 21146, "iter_tflops": 10.613287951697586, "iter_time": 1.5318794555664064, "loss": 0.28772038221359253, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 14.186674313339552, "step_time": 1.146024600982666} +{"epoch": 0, "iter": 21147, "iter_tflops": 24.95215612372638, "iter_time": 0.6515780715942383, "loss": 0.17096689343452454, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 26.99942820851952, "step_time": 0.6021711883544921} +{"epoch": 0, "iter": 21148, "iter_tflops": 22.694256269339434, "iter_time": 0.7164049606323244, "loss": 0.17903175950050354, "lr": 3e-05, "seqlen": 6496.0, "step_tflops": 24.531554274105716, "step_time": 0.6627495994567871} +{"epoch": 0, "iter": 21149, "iter_tflops": 22.494190563705068, "iter_time": 0.917174301147461, "loss": 0.2317904680967331, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.130657850417542, "step_time": 0.854974349975586} +{"epoch": 0, "iter": 21150, "iter_tflops": 9.353506058936953, "iter_time": 2.2057069702148437, "loss": 0.18326281011104584, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.736477032692553, "step_time": 1.921588752746582} +{"epoch": 0, "iter": 21151, "iter_tflops": 11.856189946700502, "iter_time": 1.7401115875244142, "loss": 0.19581729173660278, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.486172474438618, "step_time": 1.4241921768188477} +{"epoch": 0, "iter": 21152, "iter_tflops": 38.02208526865632, "iter_time": 0.542608154296875, "loss": 0.14548040926456451, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.6139216831572, "step_time": 0.4957738342285156} +{"epoch": 0, "iter": 21153, "iter_tflops": 18.813855657544902, "iter_time": 0.7815290451049806, "loss": 0.24780836701393127, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 20.448163061924824, "step_time": 0.7190657958984376} +{"epoch": 0, "iter": 21154, "iter_tflops": 23.013187213855975, "iter_time": 0.6389195251464843, "loss": 0.16660206019878387, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 24.799702429648075, "step_time": 0.5928931884765626} +{"epoch": 0, "iter": 21155, "iter_tflops": 22.313204331462153, "iter_time": 0.6589629364013672, "loss": 0.2831243574619293, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.938466729511795, "step_time": 0.6142237434387207} +{"epoch": 0, "iter": 21156, "iter_tflops": 21.672547565761413, "iter_time": 0.6784423751831055, "loss": 0.267570823431015, "lr": 3e-05, "seqlen": 5888.0, "step_tflops": 23.346643384270987, "step_time": 0.6297939453125001} +{"epoch": 0, "iter": 21157, "iter_tflops": 28.4926776436231, "iter_time": 0.7240840530395507, "loss": 0.4156486392021179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.51103340069776, "step_time": 0.6547260208129883} +{"epoch": 0, "iter": 21158, "iter_tflops": 37.93547146130162, "iter_time": 0.5438470306396485, "loss": 0.38741233944892883, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.505735989473585, "step_time": 0.4853719863891602} +{"epoch": 0, "iter": 21159, "iter_tflops": 42.30060669356705, "iter_time": 0.48772571182250973, "loss": 0.46759647130966187, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.22767630645507, "step_time": 0.44629311180114745} +{"epoch": 0, "iter": 21160, "iter_tflops": 41.60768777185498, "iter_time": 0.4958481140136719, "loss": 0.44826486706733704, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.290024863863145, "step_time": 0.45553283691406254} +{"epoch": 0, "iter": 21161, "iter_tflops": 17.66044756211303, "iter_time": 1.1682089843749999, "loss": 0.5257115960121155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.811441734144488, "step_time": 1.0967311172485352} +{"epoch": 0, "iter": 21162, "iter_tflops": 21.312281150132634, "iter_time": 0.9680377883911133, "loss": 0.4685666859149933, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.43333044393708, "step_time": 0.7804954261779786} +{"epoch": 0, "iter": 21163, "iter_tflops": 36.58734824429589, "iter_time": 0.563886001586914, "loss": 0.6063382029533386, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.02376076800728, "step_time": 0.5154711380004883} +{"epoch": 0, "iter": 21164, "iter_tflops": 38.948247148156696, "iter_time": 0.5297053146362305, "loss": 0.4880858361721039, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.54819645469857, "step_time": 0.48488761520385737} +{"epoch": 0, "iter": 21165, "iter_tflops": 24.86105232756489, "iter_time": 0.8298560028076172, "loss": 0.6113359928131104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.421363446080605, "step_time": 0.7523730010986327} +{"epoch": 0, "iter": 21166, "iter_tflops": 38.85461992629026, "iter_time": 0.5309817352294922, "loss": 0.4599962830543518, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.57068372922959, "step_time": 0.48463148117065424} +{"epoch": 0, "iter": 21167, "iter_tflops": 38.621131814136994, "iter_time": 0.5341918411254882, "loss": 0.45937392115592957, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.22056142403463, "step_time": 0.4886503829956055} +{"epoch": 0, "iter": 21168, "iter_tflops": 40.19096138646036, "iter_time": 0.5133266983032226, "loss": 0.29859229922294617, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.8001041772545, "step_time": 0.4710284118652345} +{"epoch": 0, "iter": 21169, "iter_tflops": 36.769441630211276, "iter_time": 0.5610934677124022, "loss": 0.013212506659328938, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.99598926998403, "step_time": 0.5032466316223144} +{"epoch": 0, "iter": 21170, "iter_tflops": 42.11879084704283, "iter_time": 0.4898310966491699, "loss": 0.014768753200769424, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.52131802766182, "step_time": 0.4341439666748047} +{"epoch": 0, "iter": 21171, "iter_tflops": 42.27528933930337, "iter_time": 0.48801779556274416, "loss": 0.013784684240818024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.81067218723667, "step_time": 0.44073482704162603} +{"epoch": 0, "iter": 21172, "iter_tflops": 47.716645270635006, "iter_time": 0.43236680603027344, "loss": 0.001948362565599382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 52.74568276855235, "step_time": 0.3911427898406982} +{"epoch": 0, "iter": 21173, "iter_tflops": 16.190096020324727, "iter_time": 1.2743033447265626, "loss": 0.34805047512054443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.12829292178029, "step_time": 1.2045037765502928} +{"epoch": 0, "iter": 21174, "iter_tflops": 18.01376296337999, "iter_time": 1.1452961578369143, "loss": 0.3982715308666229, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.477971839542217, "step_time": 1.0074773845672607} +{"epoch": 0, "iter": 21175, "iter_tflops": 38.81645035581351, "iter_time": 0.5315038681030273, "loss": 0.32836854457855225, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.60501940173861, "step_time": 0.4842409133911133} +{"epoch": 0, "iter": 21176, "iter_tflops": 39.435369522168145, "iter_time": 0.5231621704101561, "loss": 0.4427034258842468, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.99016043437219, "step_time": 0.47990268707275396} +{"epoch": 0, "iter": 21177, "iter_tflops": 23.07120624734328, "iter_time": 0.894235580444336, "loss": 0.36665117740631104, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.6646708113797, "step_time": 0.8364633636474608} +{"epoch": 0, "iter": 21178, "iter_tflops": 7.925010171967187, "iter_time": 2.603289215087891, "loss": 0.39353081583976746, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 9.34610715180594, "step_time": 2.2074531326293942} +{"epoch": 0, "iter": 21179, "iter_tflops": 13.738832562447131, "iter_time": 1.5016627807617187, "loss": 0.3388218581676483, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.370945972989503, "step_time": 1.1876781806945798} +{"epoch": 0, "iter": 21180, "iter_tflops": 38.9624410646484, "iter_time": 0.5295123443603516, "loss": 0.37479689717292786, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.66728670337795, "step_time": 0.48353422737121593} +{"epoch": 0, "iter": 21181, "iter_tflops": 13.482221459107103, "iter_time": 1.1572896575927736, "loss": 0.1749335527420044, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 14.357535181008585, "step_time": 1.0867349624633789} +{"epoch": 0, "iter": 21182, "iter_tflops": 24.964527587316955, "iter_time": 0.6250002288818359, "loss": 0.24883277714252472, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 26.844401469724282, "step_time": 0.5812323837280273} +{"epoch": 0, "iter": 21183, "iter_tflops": 27.96238689057259, "iter_time": 0.5579936904907228, "loss": 0.2915690541267395, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.958846714200273, "step_time": 0.5208089485168458} +{"epoch": 0, "iter": 21184, "iter_tflops": 28.039042369056652, "iter_time": 0.5564682006835937, "loss": 0.23737862706184387, "lr": 3e-05, "seqlen": 6240.0, "step_tflops": 29.896436149576857, "step_time": 0.5218961677551269} +{"epoch": 0, "iter": 21185, "iter_tflops": 23.928299100146074, "iter_time": 0.8622047653198244, "loss": 0.0795377790927887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.190018975164865, "step_time": 0.8190185775756836} +{"epoch": 0, "iter": 21186, "iter_tflops": 17.333470560735606, "iter_time": 1.1902459716796876, "loss": 0.09522747248411179, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.08679181741771, "step_time": 1.0809094429016113} +{"epoch": 0, "iter": 21187, "iter_tflops": 47.22383685445944, "iter_time": 0.43687880706787113, "loss": 0.13074082136154175, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.06755908274611, "step_time": 0.4039960765838623} +{"epoch": 0, "iter": 21188, "iter_tflops": 51.50620600145027, "iter_time": 0.40055548858642576, "loss": 0.20910397171974182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.873187550646996, "step_time": 0.369248550415039} +{"epoch": 0, "iter": 21189, "iter_tflops": 23.22865656815069, "iter_time": 0.888174201965332, "loss": 0.20285427570343018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.444513082182407, "step_time": 0.8439969100952148} +{"epoch": 0, "iter": 21190, "iter_tflops": 23.354902086446273, "iter_time": 0.8833731536865234, "loss": 0.12921790778636932, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.287862706990175, "step_time": 0.7293266983032227} +{"epoch": 0, "iter": 21191, "iter_tflops": 47.77661391356031, "iter_time": 0.431824104309082, "loss": 0.12753108143806458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.818335033688975, "step_time": 0.3981427326202393} +{"epoch": 0, "iter": 21192, "iter_tflops": 45.92234778764956, "iter_time": 0.4492604255676269, "loss": 0.14550434052944183, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.62636036767517, "step_time": 0.4157285232543945} +{"epoch": 0, "iter": 21193, "iter_tflops": 32.189016149529145, "iter_time": 0.6409358215332032, "loss": 0.0657602995634079, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.29230897195143, "step_time": 0.6016245079040526} +{"epoch": 0, "iter": 21194, "iter_tflops": 12.128852164990555, "iter_time": 1.7009930725097657, "loss": 0.10082030296325684, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.852333210803058, "step_time": 1.3014546966552734} +{"epoch": 0, "iter": 21195, "iter_tflops": 10.697666518383434, "iter_time": 1.9285601654052733, "loss": 0.06572935730218887, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 12.330331783061089, "step_time": 1.6731985702514647} +{"epoch": 0, "iter": 21196, "iter_tflops": 15.465176762268118, "iter_time": 1.334035415649414, "loss": 0.04205823689699173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.155790385525446, "step_time": 0.9751984272003174} +{"epoch": 0, "iter": 21197, "iter_tflops": 14.215755365993704, "iter_time": 1.0285726318359374, "loss": 0.20223119854927063, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 15.184689217617905, "step_time": 0.9629394912719726} +{"epoch": 0, "iter": 21198, "iter_tflops": 11.510351754315252, "iter_time": 1.2703292846679688, "loss": 0.3168385326862335, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 14.682389750311962, "step_time": 0.9958826293945313} +{"epoch": 0, "iter": 21199, "iter_tflops": 26.910495382039773, "iter_time": 0.5433544311523437, "loss": 0.23683682084083557, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.67496030669947, "step_time": 0.5099200401306152} +{"epoch": 0, "iter": 21200, "iter_tflops": 26.753147464066465, "iter_time": 0.5465501556396485, "loss": 0.17514479160308838, "lr": 3e-05, "seqlen": 5856.0, "step_tflops": 28.465167797340904, "step_time": 0.5136782264709472} +{"epoch": 0, "iter": 21201, "iter_tflops": 22.154457734831894, "iter_time": 0.9312389297485352, "loss": 0.5630068182945251, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 23.146861681698137, "step_time": 0.8913127746582031} +{"epoch": 0, "iter": 21202, "iter_tflops": 14.764343772339904, "iter_time": 1.3973593292236328, "loss": 0.6354003548622131, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.152612422117187, "step_time": 1.0771947479248047} +{"epoch": 0, "iter": 21203, "iter_tflops": 34.615537882263176, "iter_time": 0.5960067291259765, "loss": 0.6973516345024109, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.62779311742677, "step_time": 0.5482940082550048} +{"epoch": 0, "iter": 21204, "iter_tflops": 37.79004763812222, "iter_time": 0.5459398651123046, "loss": 0.5473513603210449, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.13076842237772, "step_time": 0.5015975704193116} +{"epoch": 0, "iter": 21205, "iter_tflops": 29.208898864955174, "iter_time": 0.7063290405273438, "loss": 0.3035237193107605, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 31.670255910534475, "step_time": 0.6514343795776367} +{"epoch": 0, "iter": 21206, "iter_tflops": 8.73103993059694, "iter_time": 2.36295947265625, "loss": 0.20154476165771484, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.172631534428941, "step_time": 2.028097984313965} +{"epoch": 0, "iter": 21207, "iter_tflops": 11.377370268298726, "iter_time": 1.8133446502685548, "loss": 0.1944689303636551, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 14.77910927032996, "step_time": 1.3959632568359377} +{"epoch": 0, "iter": 21208, "iter_tflops": 24.024541268457906, "iter_time": 0.8587507781982421, "loss": 0.27450987696647644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 33.00840109491249, "step_time": 0.6250255336761474} +{"epoch": 0, "iter": 21209, "iter_tflops": 14.732986653578875, "iter_time": 1.0368276062011719, "loss": 0.3902265131473541, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 15.503520320211464, "step_time": 0.9852966918945312} +{"epoch": 0, "iter": 21210, "iter_tflops": 6.561778608228957, "iter_time": 2.3279613952636717, "loss": 0.2227555364370346, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 8.236297352565732, "step_time": 1.854664375305176} +{"epoch": 0, "iter": 21211, "iter_tflops": 9.7754533910502, "iter_time": 1.5626454010009767, "loss": 0.15906065702438354, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 12.014623829678909, "step_time": 1.2714145278930666} +{"epoch": 0, "iter": 21212, "iter_tflops": 23.090089502421254, "iter_time": 0.6615637969970702, "loss": 0.21445201337337494, "lr": 3e-05, "seqlen": 6112.0, "step_tflops": 28.130423148054867, "step_time": 0.5430265731811523} +{"epoch": 0, "iter": 21213, "iter_tflops": 15.103572345228699, "iter_time": 0.9330078582763671, "loss": 0.26137876510620117, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 15.816864918229994, "step_time": 0.8909320373535156} +{"epoch": 0, "iter": 21214, "iter_tflops": 8.349127797159417, "iter_time": 1.6878112335205078, "loss": 0.17552264034748077, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 10.69258074155544, "step_time": 1.3178999557495117} +{"epoch": 0, "iter": 21215, "iter_tflops": 25.719048969812572, "iter_time": 0.5479110717773438, "loss": 0.29163795709609985, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 27.428659839857087, "step_time": 0.513760124206543} +{"epoch": 0, "iter": 21216, "iter_tflops": 25.690612356178562, "iter_time": 0.5485175476074219, "loss": 0.2801935374736786, "lr": 3e-05, "seqlen": 5648.0, "step_tflops": 27.246960008333932, "step_time": 0.5171861991882324} +{"epoch": 0, "iter": 21217, "iter_tflops": 23.85825696017267, "iter_time": 0.8647359924316405, "loss": 0.24949175119400024, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.064433907901414, "step_time": 0.8231222610473632} +{"epoch": 0, "iter": 21218, "iter_tflops": 11.01973966415433, "iter_time": 1.8721942749023435, "loss": 0.21020780503749847, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.0126050210717, "step_time": 1.2884283027648926} +{"epoch": 0, "iter": 21219, "iter_tflops": 36.18140194760651, "iter_time": 0.5702126617431641, "loss": 0.20501980185508728, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.67301929521159, "step_time": 0.5200283184051514} +{"epoch": 0, "iter": 21220, "iter_tflops": 42.713243618855095, "iter_time": 0.48301397323608397, "loss": 0.26347848773002625, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.44468768852344, "step_time": 0.444207820892334} +{"epoch": 0, "iter": 21221, "iter_tflops": 24.712194207161858, "iter_time": 0.8348547821044923, "loss": 0.5053523182868958, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.409089586163677, "step_time": 0.781211841583252} +{"epoch": 0, "iter": 21222, "iter_tflops": 14.893176580254751, "iter_time": 1.3852715301513674, "loss": 0.5978649854660034, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.660172284179765, "step_time": 1.1682271938323976} +{"epoch": 0, "iter": 21223, "iter_tflops": 36.54356054451947, "iter_time": 0.5645616683959961, "loss": 0.6629267334938049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.71343341081887, "step_time": 0.5194991149902344} +{"epoch": 0, "iter": 21224, "iter_tflops": 36.07928652489969, "iter_time": 0.5718265380859375, "loss": 0.4728766977787018, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.31493384244438, "step_time": 0.5247648029327393} +{"epoch": 0, "iter": 21225, "iter_tflops": 21.29979128580732, "iter_time": 0.9686054306030274, "loss": 0.3866834044456482, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.951123867187103, "step_time": 0.8989143028259279} +{"epoch": 0, "iter": 21226, "iter_tflops": 16.165463806518286, "iter_time": 1.2762450714111326, "loss": 0.3606882095336914, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.86313162312058, "step_time": 0.9888780784606934} +{"epoch": 0, "iter": 21227, "iter_tflops": 40.0515717936295, "iter_time": 0.5151132049560546, "loss": 0.334534227848053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.87801229002431, "step_time": 0.4701920719146728} +{"epoch": 0, "iter": 21228, "iter_tflops": 39.87638880562029, "iter_time": 0.5173761749267578, "loss": 0.2830646336078644, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.33118793611295, "step_time": 0.47612573051452634} +{"epoch": 0, "iter": 21229, "iter_tflops": 33.241982929714176, "iter_time": 0.620633659362793, "loss": 0.18586720526218414, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 37.06068924563002, "step_time": 0.5566840209960937} +{"epoch": 0, "iter": 21230, "iter_tflops": 37.577537351938155, "iter_time": 0.5490272903442383, "loss": 0.1565323770046234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.15800033674972, "step_time": 0.5012656917572021} +{"epoch": 0, "iter": 21231, "iter_tflops": 38.16434579567336, "iter_time": 0.5405855407714844, "loss": 0.14853240549564362, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.78841088090748, "step_time": 0.49370371055603024} +{"epoch": 0, "iter": 21232, "iter_tflops": 42.398635887663275, "iter_time": 0.4865980491638184, "loss": 0.19919835031032562, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.398342704559575, "step_time": 0.444651517868042} +{"epoch": 0, "iter": 21233, "iter_tflops": 34.912023166403564, "iter_time": 0.5909452285766601, "loss": 0.44808879494667053, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 38.486642292226335, "step_time": 0.5360585460662841} +{"epoch": 0, "iter": 21234, "iter_tflops": 36.748960870790484, "iter_time": 0.5614061737060547, "loss": 0.37431949377059937, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.56182022038897, "step_time": 0.5086333255767822} +{"epoch": 0, "iter": 21235, "iter_tflops": 33.945644544978194, "iter_time": 0.6077685012817382, "loss": 0.42954540252685547, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 36.946022965833066, "step_time": 0.5584117546081543} +{"epoch": 0, "iter": 21236, "iter_tflops": 38.377975249469465, "iter_time": 0.5375763931274413, "loss": 0.35319584608078003, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.80432489850218, "step_time": 0.49351576805114744} +{"epoch": 0, "iter": 21237, "iter_tflops": 19.752074938609244, "iter_time": 1.0445025939941406, "loss": 0.3801083266735077, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 21.240170202023997, "step_time": 0.9713243026733398} +{"epoch": 0, "iter": 21238, "iter_tflops": 37.28886026973536, "iter_time": 0.5532776641845704, "loss": 0.35834428668022156, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.78831368093054, "step_time": 0.5058089351654053} +{"epoch": 0, "iter": 21239, "iter_tflops": 39.054216803630204, "iter_time": 0.5282680130004883, "loss": 0.39393988251686096, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.59959934878417, "step_time": 0.4843025245666504} +{"epoch": 0, "iter": 21240, "iter_tflops": 36.60470621400162, "iter_time": 0.5636186065673828, "loss": 0.32863929867744446, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.76040598170196, "step_time": 0.518885383605957} +{"epoch": 0, "iter": 21241, "iter_tflops": 32.01296771778261, "iter_time": 0.6444605102539063, "loss": 0.5208144783973694, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.86119953946978, "step_time": 0.5918067588806153} +{"epoch": 0, "iter": 21242, "iter_tflops": 41.575877894470416, "iter_time": 0.4962274894714356, "loss": 0.3946935832500458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.876505925821306, "step_time": 0.45973038864135746} +{"epoch": 0, "iter": 21243, "iter_tflops": 49.4811307363448, "iter_time": 0.41694870758056646, "loss": 0.5218682289123535, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.48292553840245, "step_time": 0.3857510280609131} +{"epoch": 0, "iter": 21244, "iter_tflops": 43.615400109532025, "iter_time": 0.47302313995361334, "loss": 0.44641393423080444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.11892462038442, "step_time": 0.43785153579711916} +{"epoch": 0, "iter": 21245, "iter_tflops": 30.332673122984573, "iter_time": 0.680160743713379, "loss": 0.4696100056171417, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 32.36427609090034, "step_time": 0.6374650077819823} +{"epoch": 0, "iter": 21246, "iter_tflops": 12.307079367400066, "iter_time": 1.6763598327636717, "loss": 0.40134599804878235, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.691615826619092, "step_time": 1.3147845153808595} +{"epoch": 0, "iter": 21247, "iter_tflops": 36.79793434944642, "iter_time": 0.5606590118408202, "loss": 0.4673477113246918, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.86266802651115, "step_time": 0.5175542564392089} +{"epoch": 0, "iter": 21248, "iter_tflops": 49.142313213225385, "iter_time": 0.4198234100341797, "loss": 0.5467711687088013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 53.27327298758714, "step_time": 0.3872691192626953} +{"epoch": 0, "iter": 21249, "iter_tflops": 24.95765305433886, "iter_time": 0.8266439743041991, "loss": 0.11453516036272049, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.39557034685185, "step_time": 0.7816119613647462} +{"epoch": 0, "iter": 21250, "iter_tflops": 17.917822876398738, "iter_time": 1.1514285888671876, "loss": 0.2642673850059509, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.556730355196557, "step_time": 1.0036174602508543} +{"epoch": 0, "iter": 21251, "iter_tflops": 44.15600089638599, "iter_time": 0.46723192977905276, "loss": 0.1531001627445221, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.530033394288616, "step_time": 0.43406435966491697} +{"epoch": 0, "iter": 21252, "iter_tflops": 51.91736060181814, "iter_time": 0.3973833274841308, "loss": 0.15134090185165405, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.49656257575059, "step_time": 0.3651743144989014} +{"epoch": 0, "iter": 21253, "iter_tflops": 33.399730198899846, "iter_time": 0.6177024002075195, "loss": 0.0012750333407893777, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.82346596813588, "step_time": 0.5759100341796876} +{"epoch": 0, "iter": 21254, "iter_tflops": 31.74936531191593, "iter_time": 0.6498112106323242, "loss": 0.01786210760474205, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.51756783515119, "step_time": 0.4341782302856445} +{"epoch": 0, "iter": 21255, "iter_tflops": 51.44661305120927, "iter_time": 0.4010194702148438, "loss": 0.12299139052629471, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.914079753623504, "step_time": 0.3689785041809083} +{"epoch": 0, "iter": 21256, "iter_tflops": 50.792613421060814, "iter_time": 0.40618294906616215, "loss": 0.09088385105133057, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.37520607634722, "step_time": 0.3725691509246826} +{"epoch": 0, "iter": 21257, "iter_tflops": 28.50519135154752, "iter_time": 0.7237661819458008, "loss": 0.3607946038246155, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 30.15232137519695, "step_time": 0.6842290267944336} +{"epoch": 0, "iter": 21258, "iter_tflops": 13.283545729521377, "iter_time": 1.55313151550293, "loss": 0.48794087767601013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.791953874605882, "step_time": 1.306430709838867} +{"epoch": 0, "iter": 21259, "iter_tflops": 46.808318117900825, "iter_time": 0.44075699234008786, "loss": 0.583681046962738, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.673277465324404, "step_time": 0.4071395130157471} +{"epoch": 0, "iter": 21260, "iter_tflops": 46.7711972465557, "iter_time": 0.4411068077087403, "loss": 0.5998260378837585, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.69550614060565, "step_time": 0.4069609928131104} +{"epoch": 0, "iter": 21261, "iter_tflops": 21.29200784978289, "iter_time": 0.9689595108032227, "loss": 0.154940664768219, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 22.164265374696924, "step_time": 0.930826858520508} +{"epoch": 0, "iter": 21262, "iter_tflops": 16.428874617542846, "iter_time": 1.2557825164794922, "loss": 0.15534254908561707, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.853926433097268, "step_time": 1.039144250869751} +{"epoch": 0, "iter": 21263, "iter_tflops": 39.76185120082192, "iter_time": 0.5188665237426757, "loss": 0.1093997061252594, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.47736720521396, "step_time": 0.4745249042510986} +{"epoch": 0, "iter": 21264, "iter_tflops": 39.155605184905625, "iter_time": 0.526900131225586, "loss": 0.1342812329530716, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.70753395516152, "step_time": 0.4830785484313965} +{"epoch": 0, "iter": 21265, "iter_tflops": 25.44553072904446, "iter_time": 0.8107943878173829, "loss": 0.016333360224962234, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 27.308885574900945, "step_time": 0.7554718208312988} +{"epoch": 0, "iter": 21266, "iter_tflops": 8.914379601725976, "iter_time": 2.314361114501953, "loss": 0.000551756820641458, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 10.9028555389108, "step_time": 1.8922651443481444} +{"epoch": 0, "iter": 21267, "iter_tflops": 10.550140566761474, "iter_time": 1.95552783203125, "loss": 0.0048194765113294125, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 13.493762905627461, "step_time": 1.5289355278015135} +{"epoch": 0, "iter": 21268, "iter_tflops": 51.84050082809757, "iter_time": 0.39797249603271484, "loss": 0.00604578573256731, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.5280898307463, "step_time": 0.35862643051147464} +{"epoch": 0, "iter": 21269, "iter_tflops": 18.108660109688536, "iter_time": 0.8706653442382812, "loss": 0.2189195603132248, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 18.992943058548487, "step_time": 0.8301284713745117} +{"epoch": 0, "iter": 21270, "iter_tflops": 8.162988788751496, "iter_time": 1.9314718170166014, "loss": 0.1527594029903412, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 9.613094664126121, "step_time": 1.6401152114868165} +{"epoch": 0, "iter": 21271, "iter_tflops": 9.868499467317953, "iter_time": 1.5976676940917969, "loss": 0.18023428320884705, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 10.864463865472393, "step_time": 1.4512067031860352} +{"epoch": 0, "iter": 21272, "iter_tflops": 16.87687204197009, "iter_time": 0.9342123794555665, "loss": 0.25136861205101013, "lr": 3e-05, "seqlen": 6304.0, "step_tflops": 19.133682896106926, "step_time": 0.8240223731994628} +{"epoch": 0, "iter": 21273, "iter_tflops": 12.472929952578866, "iter_time": 1.2050347747802734, "loss": 0.19490860402584076, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 13.392397422685567, "step_time": 1.1223019943237305} +{"epoch": 0, "iter": 21274, "iter_tflops": 12.203654425219769, "iter_time": 1.231624053955078, "loss": 0.3302270770072937, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 17.088589817691254, "step_time": 0.8795526428222656} +{"epoch": 0, "iter": 21275, "iter_tflops": 28.162402553853976, "iter_time": 0.5337014236450195, "loss": 0.15188950300216675, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 29.939379803423915, "step_time": 0.5020249061584472} +{"epoch": 0, "iter": 21276, "iter_tflops": 26.83682332928267, "iter_time": 0.5600630950927734, "loss": 0.2535487115383148, "lr": 3e-05, "seqlen": 6016.0, "step_tflops": 28.25893385303901, "step_time": 0.5318783226013184} +{"epoch": 0, "iter": 21277, "iter_tflops": 44.92758404259542, "iter_time": 0.4592077217102051, "loss": 0.07620212435722351, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.102475014507746, "step_time": 0.41177793121337897} +{"epoch": 0, "iter": 21278, "iter_tflops": 38.67364737326161, "iter_time": 0.5334664535522461, "loss": 0.09453830868005753, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90176584326853, "step_time": 0.43069588661193847} +{"epoch": 0, "iter": 21279, "iter_tflops": 54.90401013531888, "iter_time": 0.3757666053771973, "loss": 0.06088875234127045, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 60.03860636750753, "step_time": 0.3436304531097412} +{"epoch": 0, "iter": 21280, "iter_tflops": 51.46131350760977, "iter_time": 0.40090491485595703, "loss": 0.03242838755249977, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 56.17285918221219, "step_time": 0.36727867889404303} +{"epoch": 0, "iter": 21281, "iter_tflops": 19.95107086741321, "iter_time": 1.0340845184326173, "loss": 0.36423251032829285, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.89778839691186, "step_time": 0.9872381286621094} +{"epoch": 0, "iter": 21282, "iter_tflops": 14.846657894777389, "iter_time": 1.3896119689941406, "loss": 0.47379767894744873, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.80041294561967, "step_time": 1.0973744869232178} +{"epoch": 0, "iter": 21283, "iter_tflops": 38.84788319635008, "iter_time": 0.5310738143920898, "loss": 0.40860626101493835, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.62209868152627, "step_time": 0.4840468711853028} +{"epoch": 0, "iter": 21284, "iter_tflops": 38.49429967698708, "iter_time": 0.5359519119262696, "loss": 0.4484151601791382, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.793470152136884, "step_time": 0.4936439456939698} +{"epoch": 0, "iter": 21285, "iter_tflops": 36.16730936414587, "iter_time": 0.5704348449707031, "loss": 0.06935805082321167, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.25830901847011, "step_time": 0.5124679603576661} +{"epoch": 0, "iter": 21286, "iter_tflops": 37.36465177761979, "iter_time": 0.5521553802490236, "loss": 0.06692627817392349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.055799157078695, "step_time": 0.4905647716522217} +{"epoch": 0, "iter": 21287, "iter_tflops": 43.61193402088031, "iter_time": 0.47306073379516606, "loss": 0.07831955701112747, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 47.90039401008123, "step_time": 0.4307082214355469} +{"epoch": 0, "iter": 21288, "iter_tflops": 42.217673016326394, "iter_time": 0.4886838150024414, "loss": 0.07744353264570236, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.36832413386518, "step_time": 0.44493938255310067} +{"epoch": 0, "iter": 21289, "iter_tflops": 32.252813255452715, "iter_time": 0.6396680297851562, "loss": 0.2525757849216461, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 35.83817558239021, "step_time": 0.5756736545562744} +{"epoch": 0, "iter": 21290, "iter_tflops": 36.50569681656329, "iter_time": 0.5651472320556641, "loss": 0.22066864371299744, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.44613096883126, "step_time": 0.5100881843566895} +{"epoch": 0, "iter": 21291, "iter_tflops": 36.80292192283596, "iter_time": 0.5605830307006836, "loss": 0.32667508721351624, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.52272701651555, "step_time": 0.5091240158081055} +{"epoch": 0, "iter": 21292, "iter_tflops": 40.78850455773244, "iter_time": 0.505806568145752, "loss": 0.26383140683174133, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 44.55815976143827, "step_time": 0.46301493644714353} +{"epoch": 0, "iter": 21293, "iter_tflops": 24.59531810081984, "iter_time": 0.8388219833374023, "loss": 0.4932354986667633, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.616413501617696, "step_time": 0.7751267280578613} +{"epoch": 0, "iter": 21294, "iter_tflops": 44.17701348161337, "iter_time": 0.46700969314575197, "loss": 0.6069253087043762, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.166954833173115, "step_time": 0.4283246383666992} +{"epoch": 0, "iter": 21295, "iter_tflops": 46.453456073230605, "iter_time": 0.4441239738464355, "loss": 0.5573021173477173, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.277465982052505, "step_time": 0.41034473609924316} +{"epoch": 0, "iter": 21296, "iter_tflops": 47.225525053829315, "iter_time": 0.4368631896972657, "loss": 0.600184977054596, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 51.00328193765253, "step_time": 0.40450521469116213} +{"epoch": 0, "iter": 21297, "iter_tflops": 32.39980353933178, "iter_time": 0.6367660064697266, "loss": 0.28721168637275696, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 34.70308588721597, "step_time": 0.5945031394958497} +{"epoch": 0, "iter": 21298, "iter_tflops": 12.921274969455668, "iter_time": 1.596676300048828, "loss": 0.22211839258670807, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.053691839265646, "step_time": 1.2097728576660156} +{"epoch": 0, "iter": 21299, "iter_tflops": 15.693729312575806, "iter_time": 1.3146074523925781, "loss": 0.3036862015724182, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 17.3182166506367, "step_time": 1.1912943420410156} +{"epoch": 0, "iter": 21300, "iter_tflops": 21.257256529997672, "iter_time": 0.9705435638427735, "loss": 0.18975980579853058, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.201130535803117, "step_time": 0.7874123401641846} +{"epoch": 0, "iter": 21301, "iter_tflops": 17.068907015869428, "iter_time": 0.9308998413085938, "loss": 0.18767336010932922, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 18.328354362246724, "step_time": 0.8669323234558106} +{"epoch": 0, "iter": 21302, "iter_tflops": 13.684727311160769, "iter_time": 1.1611077423095701, "loss": 0.1235390156507492, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 16.54735115768779, "step_time": 0.9602408676147461} +{"epoch": 0, "iter": 21303, "iter_tflops": 26.89230035846227, "iter_time": 0.5908547286987305, "loss": 0.17080266773700714, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 28.501577601830263, "step_time": 0.5574934501647949} +{"epoch": 0, "iter": 21304, "iter_tflops": 28.25129758283274, "iter_time": 0.5624323196411133, "loss": 0.26875993609428406, "lr": 3e-05, "seqlen": 6352.0, "step_tflops": 29.849701796966233, "step_time": 0.5323149604797364} +{"epoch": 0, "iter": 21305, "iter_tflops": 41.497232152970746, "iter_time": 0.49716794204711917, "loss": 0.31604260206222534, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 45.03006214810675, "step_time": 0.45816267013549805} +{"epoch": 0, "iter": 21306, "iter_tflops": 23.218357621022818, "iter_time": 0.8885681686401368, "loss": 0.24442757666110992, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 28.616882827322385, "step_time": 0.720941328048706} +{"epoch": 0, "iter": 21307, "iter_tflops": 50.55875340651574, "iter_time": 0.40806175231933595, "loss": 0.2629142999649048, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 54.84301302226789, "step_time": 0.3761845378875733} +{"epoch": 0, "iter": 21308, "iter_tflops": 51.38176220858681, "iter_time": 0.40152561187744135, "loss": 0.2984964847564697, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 55.467877754240206, "step_time": 0.37194668960571287} +{"epoch": 0, "iter": 21309, "iter_tflops": 23.457379315821843, "iter_time": 0.8795139999389648, "loss": 0.09382915496826172, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.551045592652706, "step_time": 0.840334617614746} +{"epoch": 0, "iter": 21310, "iter_tflops": 13.363354683210297, "iter_time": 1.5438558654785157, "loss": 0.06276480853557587, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 15.888103881100546, "step_time": 1.298524585723877} +{"epoch": 0, "iter": 21311, "iter_tflops": 40.038985504836596, "iter_time": 0.5152751312255859, "loss": 0.09351732581853867, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.779141808643686, "step_time": 0.4712539501190185} +{"epoch": 0, "iter": 21312, "iter_tflops": 45.50619407142762, "iter_time": 0.45336890792846674, "loss": 0.07936061173677444, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 49.81422174488924, "step_time": 0.41416071128845217} +{"epoch": 0, "iter": 21313, "iter_tflops": 36.269969122193466, "iter_time": 0.5688202667236327, "loss": 0.11161588877439499, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.335528776836014, "step_time": 0.5114868736267091} +{"epoch": 0, "iter": 21314, "iter_tflops": 38.8139944394694, "iter_time": 0.5315374984741211, "loss": 0.10627822577953339, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 43.30232007047833, "step_time": 0.47644314384460446} +{"epoch": 0, "iter": 21315, "iter_tflops": 42.49302188239571, "iter_time": 0.48551721191406244, "loss": 0.06835546344518661, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.41895357574279, "step_time": 0.4444540843963623} +{"epoch": 0, "iter": 21316, "iter_tflops": 42.65631921310769, "iter_time": 0.4836585502624512, "loss": 0.07774016261100769, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.95381863834964, "step_time": 0.43939117431640623} +{"epoch": 0, "iter": 21317, "iter_tflops": 18.001809185906605, "iter_time": 1.1460566711425781, "loss": 0.1475788950920105, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.0857259009587, "step_time": 1.0809698104858398} +{"epoch": 0, "iter": 21318, "iter_tflops": 15.595331542491447, "iter_time": 1.3229018859863282, "loss": 0.12073136121034622, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 19.9999586087396, "step_time": 1.0315568103790282} +{"epoch": 0, "iter": 21319, "iter_tflops": 42.46817603843942, "iter_time": 0.48580126190185546, "loss": 0.10977544635534286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 46.52835259734569, "step_time": 0.4434090690612793} +{"epoch": 0, "iter": 21320, "iter_tflops": 38.630965338337475, "iter_time": 0.5340558624267578, "loss": 0.10476807504892349, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.19323246398172, "step_time": 0.4889668865203858} +{"epoch": 0, "iter": 21321, "iter_tflops": 36.55496053149734, "iter_time": 0.5643856048583984, "loss": 0.15346495807170868, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 40.422410758307436, "step_time": 0.510387508392334} +{"epoch": 0, "iter": 21322, "iter_tflops": 43.23269404305776, "iter_time": 0.4772104530334472, "loss": 0.255087286233902, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 48.902051314547904, "step_time": 0.42188605499267573} +{"epoch": 0, "iter": 21323, "iter_tflops": 46.150250724796656, "iter_time": 0.44704185104370114, "loss": 0.1463211476802826, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 50.24411117342919, "step_time": 0.41061714553833006} +{"epoch": 0, "iter": 21324, "iter_tflops": 53.273611456716104, "iter_time": 0.38726665878295896, "loss": 0.22021308541297913, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 57.779112030037, "step_time": 0.35706837272644043} +{"epoch": 0, "iter": 21325, "iter_tflops": 23.30160475698469, "iter_time": 0.8853936767578124, "loss": 0.9082140326499939, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 24.700022075825288, "step_time": 0.8352661972045898} +{"epoch": 0, "iter": 21326, "iter_tflops": 12.306441204542374, "iter_time": 1.676446762084961, "loss": 0.5477659106254578, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 16.184598747620157, "step_time": 1.2747361755371094} +{"epoch": 0, "iter": 21327, "iter_tflops": 36.612932333707235, "iter_time": 0.563491973876953, "loss": 0.5700744390487671, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 39.8149273807129, "step_time": 0.5181748371124267} +{"epoch": 0, "iter": 21328, "iter_tflops": 37.85117556782804, "iter_time": 0.5450581970214844, "loss": 0.6004959344863892, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.02795344082415, "step_time": 0.5028545608520507} +{"epoch": 0, "iter": 21329, "iter_tflops": 17.23218777507413, "iter_time": 1.1972416839599611, "loss": 0.5203961133956909, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 18.454700035339602, "step_time": 1.1179316635131835} +{"epoch": 0, "iter": 21330, "iter_tflops": 21.59219803854693, "iter_time": 0.9554883422851563, "loss": 0.611329197883606, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 26.71025294220675, "step_time": 0.7724035243988037} +{"epoch": 0, "iter": 21331, "iter_tflops": 39.69680479822817, "iter_time": 0.5197167282104492, "loss": 0.5272359251976013, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.7223128035428, "step_time": 0.4829114379882812} +{"epoch": 0, "iter": 21332, "iter_tflops": 38.699904468961215, "iter_time": 0.533104507446289, "loss": 0.36434686183929443, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.34772158618893, "step_time": 0.49896566772460943} +{"epoch": 0, "iter": 21333, "iter_tflops": 23.73311120329226, "iter_time": 0.869295783996582, "loss": 0.14332468807697296, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 25.025101031403945, "step_time": 0.8244159927368163} +{"epoch": 0, "iter": 21334, "iter_tflops": 17.15812932037374, "iter_time": 1.2024092559814452, "loss": 0.13716812431812286, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 20.952908334366434, "step_time": 0.9846410427093506} +{"epoch": 0, "iter": 21335, "iter_tflops": 37.78781915399653, "iter_time": 0.5459720611572265, "loss": 0.15856274962425232, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 41.12679291894146, "step_time": 0.5016460571289063} +{"epoch": 0, "iter": 21336, "iter_tflops": 38.433841361684465, "iter_time": 0.5367949905395508, "loss": 0.11322983354330063, "lr": 3e-05, "seqlen": 8192.0, "step_tflops": 42.03080657217073, "step_time": 0.49085647392272946} +{"epoch": 0, "iter": 21337, "iter_tflops": 16.55366447094701, "iter_time": 1.2463157958984374, "loss": 0.3410671651363373, "lr": 2.9999925978027876e-05, "seqlen": 8192.0, "step_tflops": 17.62693543628929, "step_time": 1.1704299697875977} +{"epoch": 0, "iter": 21338, "iter_tflops": 15.584248816157006, "iter_time": 1.3238426666259766, "loss": 0.287996768951416, "lr": 2.9999703912842056e-05, "seqlen": 8192.0, "step_tflops": 18.526998355736975, "step_time": 1.113569133758545} +{"epoch": 0, "iter": 21339, "iter_tflops": 34.947945976975255, "iter_time": 0.5903377990722656, "loss": 0.33441945910453796, "lr": 2.999933380663425e-05, "seqlen": 8192.0, "step_tflops": 38.026698337432514, "step_time": 0.5425423297882079} +{"epoch": 0, "iter": 21340, "iter_tflops": 42.48135077303463, "iter_time": 0.4856506004333496, "loss": 0.40025514364242554, "lr": 2.9998815663057245e-05, "seqlen": 8192.0, "step_tflops": 46.42901596786871, "step_time": 0.444357759475708} +{"epoch": 0, "iter": 21341, "iter_tflops": 20.42186999370959, "iter_time": 1.0102450714111326, "loss": 0.2659853398799896, "lr": 2.999814948722491e-05, "seqlen": 8192.0, "step_tflops": 21.681712177469244, "step_time": 0.9515435562133789} +{"epoch": 0, "iter": 21342, "iter_tflops": 11.00492389581916, "iter_time": 1.8747147827148436, "loss": 0.27307140827178955, "lr": 2.9997335285712135e-05, "seqlen": 8192.0, "step_tflops": 14.890139921942227, "step_time": 1.385554039001465} +{"epoch": 0, "iter": 21343, "iter_tflops": 11.536563771967378, "iter_time": 1.7883222351074217, "loss": 0.30743032693862915, "lr": 2.9996373066554763e-05, "seqlen": 8192.0, "step_tflops": 13.402299358432781, "step_time": 1.5393696975708009} +{"epoch": 0, "iter": 21344, "iter_tflops": 20.740148919655013, "iter_time": 0.9947418212890625, "loss": 0.2304479330778122, "lr": 2.99952628392495e-05, "seqlen": 8192.0, "step_tflops": 25.825134225149668, "step_time": 0.7988765258789063} +{"epoch": 0, "iter": 21345, "iter_tflops": 11.367882014207886, "iter_time": 1.3149855957031251, "loss": 0.3339361548423767, "lr": 2.999400461475385e-05, "seqlen": 5984.0, "step_tflops": 12.18963034624449, "step_time": 1.2263375244140624} +{"epoch": 0, "iter": 21346, "iter_tflops": 11.264475471161187, "iter_time": 1.3270570068359373, "loss": 0.19265666604042053, "lr": 2.9992598405485974e-05, "seqlen": 5984.0, "step_tflops": 16.57821039713798, "step_time": 0.9017017364501954} +{"epoch": 0, "iter": 21347, "iter_tflops": 26.9325352501916, "iter_time": 0.5550387649536133, "loss": 0.1781153827905655, "lr": 2.9991044225324593e-05, "seqlen": 5984.0, "step_tflops": 28.678957202941174, "step_time": 0.5212393531799316} +{"epoch": 0, "iter": 21348, "iter_tflops": 26.54474214489925, "iter_time": 0.5631473464965819, "loss": 0.290341317653656, "lr": 2.9989342089608838e-05, "seqlen": 5984.0, "step_tflops": 28.100652448242755, "step_time": 0.531966335296631} +{"epoch": 0, "iter": 21349, "iter_tflops": 41.697647676919985, "iter_time": 0.4947783546447754, "loss": 0.17324429750442505, "lr": 2.99874920151381e-05, "seqlen": 8192.0, "step_tflops": 45.50641423669431, "step_time": 0.45336671447753907} +{"epoch": 0, "iter": 21350, "iter_tflops": 45.42659009288748, "iter_time": 0.4541633758544922, "loss": 0.12490363419055939, "lr": 2.9985494020171873e-05, "seqlen": 8192.0, "step_tflops": 49.424170484914654, "step_time": 0.41742923164367673} +{"epoch": 0, "iter": 21351, "iter_tflops": 48.051106897516334, "iter_time": 0.4293572998046875, "loss": 0.15523868799209595, "lr": 2.9983348124429553e-05, "seqlen": 8192.0, "step_tflops": 52.170231209016904, "step_time": 0.39545719909667965} +{"epoch": 0, "iter": 21352, "iter_tflops": 51.37347031833213, "iter_time": 0.40159041976928705, "loss": 0.1874576061964035, "lr": 2.9981054349090264e-05, "seqlen": 8192.0, "step_tflops": 56.23709704714587, "step_time": 0.36685914802551267} +{"epoch": 0, "iter": 21353, "iter_tflops": 27.154505366095165, "iter_time": 0.7597668685913086, "loss": 0.11753040552139282, "lr": 2.9978612716792647e-05, "seqlen": 8192.0, "step_tflops": 28.766708562765704, "step_time": 0.7171864471435547} +{"epoch": 0, "iter": 21354, "iter_tflops": 17.29788175407522, "iter_time": 1.1926947937011718, "loss": 0.13239583373069763, "lr": 2.997602325163463e-05, "seqlen": 8192.0, "step_tflops": 19.38037495321396, "step_time": 1.064535312652588} +{"epoch": 0, "iter": 21355, "iter_tflops": 41.55801201152158, "iter_time": 0.4964408187866211, "loss": 0.07561666518449783, "lr": 2.997328597917318e-05, "seqlen": 8192.0, "step_tflops": 45.79714411545289, "step_time": 0.4504886474609376} +{"epoch": 0, "iter": 21356, "iter_tflops": 47.97672840412961, "iter_time": 0.43002293395996094, "loss": 0.08892170339822769, "lr": 2.9970400926424075e-05, "seqlen": 8192.0, "step_tflops": 52.605834903612646, "step_time": 0.39218260765075685} +{"epoch": 0, "iter": 21357, "iter_tflops": 17.245307517646772, "iter_time": 1.1963308563232422, "loss": 0.07735712826251984, "lr": 2.9967368121861623e-05, "seqlen": 8192.0, "step_tflops": 18.52397064973464, "step_time": 1.1137511444091797} +{"epoch": 0, "iter": 21358, "iter_tflops": 22.970943636193642, "iter_time": 0.8981387023925782, "loss": 0.10171698778867722, "lr": 2.9964187595418376e-05, "seqlen": 8192.0, "step_tflops": 27.82410198265982, "step_time": 0.741482816696167} +{"epoch": 0, "iter": 21359, "iter_tflops": 51.75025568487589, "iter_time": 0.39866650390624997, "loss": 0.14615556597709656, "lr": 2.9960859378484856e-05, "seqlen": 8192.0, "step_tflops": 56.043013069899835, "step_time": 0.3681296272277832} +{"epoch": 0, "iter": 21360, "iter_tflops": 45.76793187676572, "iter_time": 0.450776180267334, "loss": 0.13880769908428192, "lr": 2.9957383503909207e-05, "seqlen": 8192.0, "step_tflops": 49.35049712299919, "step_time": 0.41805239486694334} +{"epoch": 0, "iter": 21361, "iter_tflops": 25.751343303774743, "iter_time": 0.6792068176269531, "loss": 0.12114012241363525, "lr": 2.995376000599692e-05, "seqlen": 6976.0, "step_tflops": 27.41671945511559, "step_time": 0.6379496994018554} +{"epoch": 0, "iter": 21362, "iter_tflops": 7.983133543890813, "iter_time": 2.190930145263672, "loss": 0.07458584010601044, "lr": 2.9949988920510447e-05, "seqlen": 6976.0, "step_tflops": 10.052246670795906, "step_time": 1.7399580917358397} +{"epoch": 0, "iter": 21363, "iter_tflops": 8.782704878703953, "iter_time": 1.9914693908691408, "loss": 0.08266936987638474, "lr": 2.9946070284668884e-05, "seqlen": 6976.0, "step_tflops": 10.475584421544575, "step_time": 1.669643165588379} +{"epoch": 0, "iter": 21364, "iter_tflops": 24.19284696215919, "iter_time": 0.7229611282348632, "loss": 0.13456745445728302, "lr": 2.9942004137147587e-05, "seqlen": 6976.0, "step_tflops": 30.814283674942455, "step_time": 0.5676097526550292} +{"epoch": 0, "iter": 21365, "iter_tflops": 17.617348781552327, "iter_time": 0.8833287353515625, "loss": 0.1411970555782318, "lr": 2.9937790518077784e-05, "seqlen": 6224.0, "step_tflops": 18.837835128221304, "step_time": 0.8260986633300781} +{"epoch": 0, "iter": 21366, "iter_tflops": 5.942236431069289, "iter_time": 2.618864227294922, "loss": 0.18479157984256744, "lr": 2.9933429469046202e-05, "seqlen": 6224.0, "step_tflops": 6.800161420134956, "step_time": 2.2884619140624998} +{"epoch": 0, "iter": 21367, "iter_tflops": 7.84611721559799, "iter_time": 1.9833899993896487, "loss": 0.22924306988716125, "lr": 2.9928921033094626e-05, "seqlen": 6224.0, "step_tflops": 9.91281742297503, "step_time": 1.5698776397705079} +{"epoch": 0, "iter": 21368, "iter_tflops": 27.885966754786626, "iter_time": 0.5580552597045898, "loss": 0.2549281418323517, "lr": 2.9924265254719506e-05, "seqlen": 6224.0, "step_tflops": 29.660658250191165, "step_time": 0.5246650390625} +{"epoch": 0, "iter": 21369, "iter_tflops": 20.368242217214142, "iter_time": 0.7359228744506836, "loss": 0.18099108338356018, "lr": 2.9919462179871485e-05, "seqlen": 6000.0, "step_tflops": 21.613204908411948, "step_time": 0.693532283782959} +{"epoch": 0, "iter": 21370, "iter_tflops": 6.263659564913693, "iter_time": 2.3930827026367187, "loss": 0.16910220682621002, "lr": 2.9914511855954986e-05, "seqlen": 6000.0, "step_tflops": 8.582153820055298, "step_time": 1.7465843276977542} +{"epoch": 0, "iter": 21371, "iter_tflops": 8.717441032511024, "iter_time": 1.7194788360595705, "loss": 0.20985166728496552, "lr": 2.99094143318277e-05, "seqlen": 6000.0, "step_tflops": 10.09091008563646, "step_time": 1.4854413757324219} +{"epoch": 0, "iter": 21372, "iter_tflops": 20.440648851893492, "iter_time": 0.7333160247802735, "loss": 0.12395769357681274, "lr": 2.9904169657800128e-05, "seqlen": 6000.0, "step_tflops": 24.578238629681966, "step_time": 0.6098669471740723} +{"epoch": 0, "iter": 21373, "iter_tflops": 15.755934305719421, "iter_time": 1.060527084350586, "loss": 0.29120180010795593, "lr": 2.989877788563509e-05, "seqlen": 6672.0, "step_tflops": 16.800376088256748, "step_time": 0.9945964889526367} +{"epoch": 0, "iter": 21374, "iter_tflops": 10.049077415627902, "iter_time": 1.6627989196777342, "loss": 0.1896597445011139, "lr": 2.9893239068547206e-05, "seqlen": 6672.0, "step_tflops": 14.162009549358581, "step_time": 1.179888702392578} +{"epoch": 0, "iter": 21375, "iter_tflops": 25.234181091191395, "iter_time": 0.6621809921264648, "loss": 0.2002875804901123, "lr": 2.988755326120236e-05, "seqlen": 6672.0, "step_tflops": 27.073070323284615, "step_time": 0.6172035484313964} +{"epoch": 0, "iter": 21376, "iter_tflops": 25.631167547263363, "iter_time": 0.6519248504638672, "loss": 0.18798628449440002, "lr": 2.988172051971717e-05, "seqlen": 6672.0, "step_tflops": 27.538236075098027, "step_time": 0.6067779731750489} +{"epoch": 0, "iter": 21377, "iter_tflops": 22.66735021729073, "iter_time": 0.9101678543090821, "loss": 0.4636458456516266, "lr": 2.9875740901658446e-05, "seqlen": 8192.0, "step_tflops": 24.709874764989284, "step_time": 0.8349331474304199} +{"epoch": 0, "iter": 21378, "iter_tflops": 23.735708082893918, "iter_time": 0.8692006759643555, "loss": 0.376271516084671, "lr": 2.98696144660426e-05, "seqlen": 8192.0, "step_tflops": 29.56616014094135, "step_time": 0.6977941474914551} +{"epoch": 0, "iter": 21379, "iter_tflops": 47.606584117602615, "iter_time": 0.4333663902282715, "loss": 0.4487915635108948, "lr": 2.986334127333508e-05, "seqlen": 8192.0, "step_tflops": 51.78306306067493, "step_time": 0.3984139270782471} +{"epoch": 0, "iter": 21380, "iter_tflops": 48.10506370031904, "iter_time": 0.4288757133483887, "loss": 0.37797486782073975, "lr": 2.9856921385449766e-05, "seqlen": 8192.0, "step_tflops": 51.92793968592681, "step_time": 0.3973023700714111} +{"epoch": 0, "iter": 21381, "iter_tflops": 25.64782459421334, "iter_time": 0.8043993530273439, "loss": 0.23039880394935608, "lr": 2.9850354865748363e-05, "seqlen": 8192.0, "step_tflops": 27.002286407863625, "step_time": 0.7640498733520509} +{"epoch": 0, "iter": 21382, "iter_tflops": 11.05177388691993, "iter_time": 1.8667676086425782, "loss": 0.17926518619060516, "lr": 2.984364177903976e-05, "seqlen": 8192.0, "step_tflops": 14.482414079332312, "step_time": 1.4245617752075193} +{"epoch": 0, "iter": 21383, "iter_tflops": 38.97093946693385, "iter_time": 0.529396873474121, "loss": 0.23161378502845764, "lr": 2.9836782191579425e-05, "seqlen": 8192.0, "step_tflops": 42.72225205732691, "step_time": 0.48291212463378913} +{"epoch": 0, "iter": 21384, "iter_tflops": 38.954702237467636, "iter_time": 0.5296175384521484, "loss": 0.23807087540626526, "lr": 2.982977617106871e-05, "seqlen": 8192.0, "step_tflops": 42.599016182125055, "step_time": 0.48430915451049805} +{"epoch": 0, "iter": 21385, "iter_tflops": 27.458849085015434, "iter_time": 0.7513458938598633, "loss": 0.02878795936703682, "lr": 2.982262378665421e-05, "seqlen": 8192.0, "step_tflops": 30.45251131635002, "step_time": 0.6774841423034668} +{"epoch": 0, "iter": 21386, "iter_tflops": 43.0216665743112, "iter_time": 0.4795512390136719, "loss": 0.01648879423737526, "lr": 2.981532510892707e-05, "seqlen": 8192.0, "step_tflops": 47.618983645940624, "step_time": 0.4332535457611084} +{"epoch": 0, "iter": 21387, "iter_tflops": 39.99704844668205, "iter_time": 0.5158153991699219, "loss": 0.017629489302635193, "lr": 2.9807880209922288e-05, "seqlen": 8192.0, "step_tflops": 44.046579269768735, "step_time": 0.46839263916015633} +{"epoch": 0, "iter": 21388, "iter_tflops": 44.61519497043346, "iter_time": 0.4624230270385742, "loss": 0.02905796468257904, "lr": 2.980028916311802e-05, "seqlen": 8192.0, "step_tflops": 48.89282270009713, "step_time": 0.4219656867980957} +{"epoch": 0, "iter": 21389, "iter_tflops": 20.389891479298186, "iter_time": 1.011829490661621, "loss": 0.03633818402886391, "lr": 2.9792552043434835e-05, "seqlen": 8192.0, "step_tflops": 22.00565035613571, "step_time": 0.9375361862182618} +{"epoch": 0, "iter": 21390, "iter_tflops": 19.4402262899454, "iter_time": 1.0612578887939454, "loss": 0.05277356877923012, "lr": 2.9784668927234972e-05, "seqlen": 8192.0, "step_tflops": 21.95691054948981, "step_time": 0.9396173229217529} +{"epoch": 0, "iter": 21391, "iter_tflops": 41.34627700477189, "iter_time": 0.49898310089111325, "loss": 0.0725516527891159, "lr": 2.977663989232161e-05, "seqlen": 8192.0, "step_tflops": 45.49729926216755, "step_time": 0.4534575424194336} +{"epoch": 0, "iter": 21392, "iter_tflops": 42.87749809895431, "iter_time": 0.4811636505126953, "loss": 0.04921721667051315, "lr": 2.9768465017938083e-05, "seqlen": 8192.0, "step_tflops": 47.03320060052334, "step_time": 0.4386495761871338} +{"epoch": 0, "iter": 21393, "iter_tflops": 25.614062907114153, "iter_time": 0.8054596252441406, "loss": 0.39320963621139526, "lr": 2.9760144384767095e-05, "seqlen": 8192.0, "step_tflops": 27.531328472780586, "step_time": 0.7493678894042968} +{"epoch": 0, "iter": 21394, "iter_tflops": 10.983550095548823, "iter_time": 1.8783629455566406, "loss": 0.4256851077079773, "lr": 2.9751678074929946e-05, "seqlen": 8192.0, "step_tflops": 12.910124294770204, "step_time": 1.5980553741455077} +{"epoch": 0, "iter": 21395, "iter_tflops": 9.767177899404416, "iter_time": 2.1122880859375, "loss": 0.4984194040298462, "lr": 2.9743066171985686e-05, "seqlen": 8192.0, "step_tflops": 11.907654550157261, "step_time": 1.7325908660888671} +{"epoch": 0, "iter": 21396, "iter_tflops": 41.87818732047807, "iter_time": 0.49264533233642577, "loss": 0.5620660185813904, "lr": 2.9734308760930333e-05, "seqlen": 8192.0, "step_tflops": 45.273829966389094, "step_time": 0.4556957855224609} +{"epoch": 0, "iter": 21397, "iter_tflops": 15.70667104089884, "iter_time": 1.0090288848876952, "loss": 0.11382865160703659, "lr": 2.9725405928195985e-05, "seqlen": 6336.0, "step_tflops": 16.41817886243664, "step_time": 0.9653010177612305} +{"epoch": 0, "iter": 21398, "iter_tflops": 11.459209796429183, "iter_time": 1.383034698486328, "loss": 0.3242764472961426, "lr": 2.971635776165001e-05, "seqlen": 6336.0, "step_tflops": 14.202351482475677, "step_time": 1.1159056854248046} +{"epoch": 0, "iter": 21399, "iter_tflops": 28.554133881073586, "iter_time": 0.5550329360961914, "loss": 0.17570215463638306, "lr": 2.9707164350594168e-05, "seqlen": 6336.0, "step_tflops": 30.62246857394346, "step_time": 0.5175443229675293} +{"epoch": 0, "iter": 21400, "iter_tflops": 27.802496336721397, "iter_time": 0.5700381927490235, "loss": 0.34923478960990906, "lr": 2.9697825785763704e-05, "seqlen": 6336.0, "step_tflops": 29.51061738148186, "step_time": 0.5370434837341308} +{"epoch": 0, "iter": 21401, "iter_tflops": 27.00419175092119, "iter_time": 0.7639959640502931, "loss": 0.31397345662117004, "lr": 2.9688342159326487e-05, "seqlen": 8192.0, "step_tflops": 28.554874896323405, "step_time": 0.72250687789917} +{"epoch": 0, "iter": 21402, "iter_tflops": 12.267439569332872, "iter_time": 1.681776657104492, "loss": 0.2588478922843933, "lr": 2.967871356488208e-05, "seqlen": 8192.0, "step_tflops": 14.165796138923918, "step_time": 1.4564019775390624} +{"epoch": 0, "iter": 21403, "iter_tflops": 45.543704570637004, "iter_time": 0.4529955062866211, "loss": 0.3814273774623871, "lr": 2.966894009746083e-05, "seqlen": 8192.0, "step_tflops": 49.815877689662386, "step_time": 0.4141469440460205} +{"epoch": 0, "iter": 21404, "iter_tflops": 45.70317953710188, "iter_time": 0.4514148406982422, "loss": 0.31491267681121826, "lr": 2.9659021853522903e-05, "seqlen": 8192.0, "step_tflops": 49.3997077662063, "step_time": 0.41763594245910646} +{"epoch": 0, "iter": 21405, "iter_tflops": 19.54484748139679, "iter_time": 0.5095722236633301, "loss": 0.023087985813617706, "lr": 2.964895893095737e-05, "seqlen": 4016.0, "step_tflops": 21.25645635626878, "step_time": 0.46854053306579585} +{"epoch": 0, "iter": 21406, "iter_tflops": 6.880214407162616, "iter_time": 1.4475582885742186, "loss": 0.025883276015520096, "lr": 2.9638751429081213e-05, "seqlen": 4016.0, "step_tflops": 7.494853567482858, "step_time": 1.328846694946289} +{"epoch": 0, "iter": 21407, "iter_tflops": 5.626773520237088, "iter_time": 1.7700217285156252, "loss": 0.026468070223927498, "lr": 2.9628399448638352e-05, "seqlen": 4016.0, "step_tflops": 7.195463781252742, "step_time": 1.3841375198364256} +{"epoch": 0, "iter": 21408, "iter_tflops": 9.267765389258352, "iter_time": 1.0746399993896483, "loss": 0.021290428936481476, "lr": 2.961790309179866e-05, "seqlen": 4016.0, "step_tflops": 11.283402204751406, "step_time": 0.8826691818237304} +{"epoch": 0, "iter": 21409, "iter_tflops": 14.250831191585164, "iter_time": 1.1207327423095703, "loss": 0.12539026141166687, "lr": 2.960726246215693e-05, "seqlen": 6384.0, "step_tflops": 15.295280350450069, "step_time": 1.0442027053833007} +{"epoch": 0, "iter": 21410, "iter_tflops": 14.819364487957262, "iter_time": 1.0777367095947268, "loss": 0.1347537487745285, "lr": 2.95964776647319e-05, "seqlen": 6384.0, "step_tflops": 17.47220882596484, "step_time": 0.914101547241211} +{"epoch": 0, "iter": 21411, "iter_tflops": 28.753808640845325, "iter_time": 0.5554524383544922, "loss": 0.29296964406967163, "lr": 2.958554880596515e-05, "seqlen": 6384.0, "step_tflops": 30.74946108070259, "step_time": 0.5194033508300782} +{"epoch": 0, "iter": 21412, "iter_tflops": 28.255237676941004, "iter_time": 0.5652535400390626, "loss": 0.3785271644592285, "lr": 2.9574475993720112e-05, "seqlen": 6384.0, "step_tflops": 29.855256512402544, "step_time": 0.5349601707458496} +{"epoch": 0, "iter": 21413, "iter_tflops": 20.19662537900318, "iter_time": 1.0215119171142577, "loss": 0.11995238065719604, "lr": 2.9563259337280967e-05, "seqlen": 8192.0, "step_tflops": 21.08636543734545, "step_time": 0.9784091796874999} +{"epoch": 0, "iter": 21414, "iter_tflops": 18.8311519208848, "iter_time": 1.095583190917969, "loss": 0.10541916638612747, "lr": 2.9551898947351588e-05, "seqlen": 8192.0, "step_tflops": 23.070614223741906, "step_time": 0.8942585277557373} +{"epoch": 0, "iter": 21415, "iter_tflops": 51.414419386053275, "iter_time": 0.40127057266235355, "loss": 0.13398848474025726, "lr": 2.954039493605444e-05, "seqlen": 8192.0, "step_tflops": 55.874971808435085, "step_time": 0.369236759185791} +{"epoch": 0, "iter": 21416, "iter_tflops": 48.341325947488606, "iter_time": 0.42677963638305666, "loss": 0.150039404630661, "lr": 2.9528747416929467e-05, "seqlen": 8192.0, "step_tflops": 52.55839328860921, "step_time": 0.3925366096496582} +{"epoch": 0, "iter": 21417, "iter_tflops": 3.0116878102078033, "iter_time": 0.559523323059082, "loss": 0.8078209161758423, "lr": 2.9516956504932984e-05, "seqlen": 688.0, "step_tflops": 3.2708407537393276, "step_time": 0.5151915664672853} +{"epoch": 0, "iter": 21418, "iter_tflops": 3.083032131548096, "iter_time": 0.5465754165649415, "loss": 1.0973371267318726, "lr": 2.9505022316436527e-05, "seqlen": 688.0, "step_tflops": 3.834242790973256, "step_time": 0.4394895324707031} +{"epoch": 0, "iter": 21419, "iter_tflops": 3.7220089640477663, "iter_time": 0.45274194335937507, "loss": 0.1494736671447754, "lr": 2.949294496922572e-05, "seqlen": 688.0, "step_tflops": 4.010538963089366, "step_time": 0.42017035293579097} +{"epoch": 0, "iter": 21420, "iter_tflops": 3.4989751112089187, "iter_time": 0.4816009025573731, "loss": 0.1532454490661621, "lr": 2.948072458249911e-05, "seqlen": 688.0, "step_tflops": 3.763295328773753, "step_time": 0.4477750015258789} +{"epoch": 0, "iter": 21421, "iter_tflops": 18.300137013975096, "iter_time": 1.1273737182617187, "loss": 0.20605425536632538, "lr": 2.9468361276866973e-05, "seqlen": 8192.0, "step_tflops": 19.00511201581596, "step_time": 1.0855549545288086} +{"epoch": 0, "iter": 21422, "iter_tflops": 12.4511040297742, "iter_time": 1.656969009399414, "loss": 0.2994935214519501, "lr": 2.945585517435015e-05, "seqlen": 8192.0, "step_tflops": 15.740083540576952, "step_time": 1.3107359600067139} +{"epoch": 0, "iter": 21423, "iter_tflops": 37.591108097224975, "iter_time": 0.5488290863037109, "loss": 0.28500595688819885, "lr": 2.9443206398378815e-05, "seqlen": 8192.0, "step_tflops": 40.93148645423411, "step_time": 0.5040396842956543} +{"epoch": 0, "iter": 21424, "iter_tflops": 38.72559662362396, "iter_time": 0.5327508239746093, "loss": 0.2814742922782898, "lr": 2.9430415073791287e-05, "seqlen": 8192.0, "step_tflops": 42.037999827335554, "step_time": 0.490772481918335} +{"epoch": 0, "iter": 21425, "iter_tflops": 16.83342496073134, "iter_time": 1.2256028442382814, "loss": 0.3260444104671478, "lr": 2.9417481326832778e-05, "seqlen": 8192.0, "step_tflops": 17.961040634845645, "step_time": 1.1486580276489258} +{"epoch": 0, "iter": 21426, "iter_tflops": 32.28243371439685, "iter_time": 0.6390811080932617, "loss": 0.4590960741043091, "lr": 2.9404405285154146e-05, "seqlen": 8192.0, "step_tflops": 49.71537848476668, "step_time": 0.4149841384887695} +{"epoch": 0, "iter": 21427, "iter_tflops": 48.42167779274634, "iter_time": 0.4260714302062988, "loss": 0.47551649808883667, "lr": 2.9391187077810644e-05, "seqlen": 8192.0, "step_tflops": 52.33977537018157, "step_time": 0.3941761951446533} +{"epoch": 0, "iter": 21428, "iter_tflops": 48.01473157085981, "iter_time": 0.4296825752258301, "loss": 0.48667097091674805, "lr": 2.937782683526064e-05, "seqlen": 8192.0, "step_tflops": 51.93997609579059, "step_time": 0.3972103004455566} +{"epoch": 0, "iter": 21429, "iter_tflops": 28.074161858036625, "iter_time": 0.7348783416748047, "loss": 0.5113911032676697, "lr": 2.9364324689364332e-05, "seqlen": 8192.0, "step_tflops": 29.67397206966965, "step_time": 0.6952589111328125} +{"epoch": 0, "iter": 21430, "iter_tflops": 13.125292036683755, "iter_time": 1.5718578643798828, "loss": 0.44167360663414, "lr": 2.935068077338246e-05, "seqlen": 8192.0, "step_tflops": 19.294312156773366, "step_time": 1.0692837009429932} +{"epoch": 0, "iter": 21431, "iter_tflops": 44.41170003170709, "iter_time": 0.4645418548583985, "loss": 0.5379915237426758, "lr": 2.933689522197495e-05, "seqlen": 8192.0, "step_tflops": 48.00158595746299, "step_time": 0.42980024719238286} +{"epoch": 0, "iter": 21432, "iter_tflops": 46.63992695142417, "iter_time": 0.4423483238220215, "loss": 0.5490190982818604, "lr": 2.9322968171199646e-05, "seqlen": 8192.0, "step_tflops": 50.181269216672796, "step_time": 0.4111313610076905} +{"epoch": 0, "iter": 21433, "iter_tflops": 26.104676976587733, "iter_time": 0.7903217315673828, "loss": 0.038266826421022415, "lr": 2.9308899758510903e-05, "seqlen": 8192.0, "step_tflops": 27.45573356360677, "step_time": 0.75143115234375} +{"epoch": 0, "iter": 21434, "iter_tflops": 24.16559129299779, "iter_time": 0.853738410949707, "loss": 0.009732455015182495, "lr": 2.9294690122758285e-05, "seqlen": 8192.0, "step_tflops": 27.384331714404123, "step_time": 0.753390432357788} +{"epoch": 0, "iter": 21435, "iter_tflops": 39.21662384118842, "iter_time": 0.526080307006836, "loss": 0.03279335796833038, "lr": 2.928033940418515e-05, "seqlen": 8192.0, "step_tflops": 43.25744647476632, "step_time": 0.47693738746643066} +{"epoch": 0, "iter": 21436, "iter_tflops": 44.87013593395815, "iter_time": 0.45979565429687497, "loss": 0.043389804661273956, "lr": 2.9265847744427305e-05, "seqlen": 8192.0, "step_tflops": 49.49353122021832, "step_time": 0.4168442420959472} +{"epoch": 0, "iter": 21437, "iter_tflops": 26.30940252753136, "iter_time": 0.7841718750000001, "loss": 0.21876011788845062, "lr": 2.9251215286511574e-05, "seqlen": 8192.0, "step_tflops": 28.899827747380158, "step_time": 0.7138829231262207} +{"epoch": 0, "iter": 21438, "iter_tflops": 37.50512788812371, "iter_time": 0.550087272644043, "loss": 0.31744828820228577, "lr": 2.923644217485442e-05, "seqlen": 8192.0, "step_tflops": 40.92709920578156, "step_time": 0.5040937156677247} +{"epoch": 0, "iter": 21439, "iter_tflops": 39.908713121681124, "iter_time": 0.5169571228027344, "loss": 0.2651219069957733, "lr": 2.922152855526048e-05, "seqlen": 8192.0, "step_tflops": 43.74150934560344, "step_time": 0.47165938758850096} +{"epoch": 0, "iter": 21440, "iter_tflops": 41.780608338961684, "iter_time": 0.49379590988159183, "loss": 0.3588528037071228, "lr": 2.9206474574921165e-05, "seqlen": 8192.0, "step_tflops": 45.63058863019782, "step_time": 0.4521329689025878} +{"epoch": 0, "iter": 21441, "iter_tflops": 36.80256830598282, "iter_time": 0.5605884170532226, "loss": 0.06443572044372559, "lr": 2.919128038241318e-05, "seqlen": 8192.0, "step_tflops": 41.04734532384492, "step_time": 0.5026169986724853} +{"epoch": 0, "iter": 21442, "iter_tflops": 37.514288839099606, "iter_time": 0.5499529418945313, "loss": 0.09562020003795624, "lr": 2.9175946127697073e-05, "seqlen": 8192.0, "step_tflops": 41.70572848640232, "step_time": 0.49468248748779303} +{"epoch": 0, "iter": 21443, "iter_tflops": 43.84302889951992, "iter_time": 0.47056724929809574, "loss": 0.06399510055780411, "lr": 2.916047196211575e-05, "seqlen": 8192.0, "step_tflops": 48.319913458138096, "step_time": 0.42696875953674324} +{"epoch": 0, "iter": 21444, "iter_tflops": 40.01595880944467, "iter_time": 0.5155716400146485, "loss": 0.03660966828465462, "lr": 2.914485803839297e-05, "seqlen": 8192.0, "step_tflops": 44.06347274763822, "step_time": 0.46821306228637694} +{"epoch": 0, "iter": 21445, "iter_tflops": 19.732442674244616, "iter_time": 1.045541793823242, "loss": 0.008694257587194443, "lr": 2.912910451063186e-05, "seqlen": 8192.0, "step_tflops": 21.266654502195596, "step_time": 0.9701146697998045} +{"epoch": 0, "iter": 21446, "iter_tflops": 33.9324713497513, "iter_time": 0.6080044479370118, "loss": 0.00765551021322608, "lr": 2.9113211534313385e-05, "seqlen": 8192.0, "step_tflops": 50.94598377297201, "step_time": 0.40496015548706055} +{"epoch": 0, "iter": 21447, "iter_tflops": 56.97718937171018, "iter_time": 0.36209391403198243, "loss": 0.010721773840487003, "lr": 2.9097179266294794e-05, "seqlen": 8192.0, "step_tflops": 62.606586243376874, "step_time": 0.32953551292419436} +{"epoch": 0, "iter": 21448, "iter_tflops": 59.336190714406854, "iter_time": 0.34769831466674805, "loss": 0.01514088548719883, "lr": 2.9081007864808114e-05, "seqlen": 8192.0, "step_tflops": 65.30163836969115, "step_time": 0.3159353122711182} +{"epoch": 0, "iter": 21449, "iter_tflops": 21.237169621172846, "iter_time": 0.9714615402221679, "loss": 0.505514919757843, "lr": 2.9064697489458532e-05, "seqlen": 8192.0, "step_tflops": 22.15476067185281, "step_time": 0.9312261962890626} +{"epoch": 0, "iter": 21450, "iter_tflops": 16.163631387121942, "iter_time": 1.2763897552490233, "loss": 0.49001652002334595, "lr": 2.9048248301222885e-05, "seqlen": 8192.0, "step_tflops": 20.799155965029744, "step_time": 0.9919197463989258} +{"epoch": 0, "iter": 21451, "iter_tflops": 38.72166227956593, "iter_time": 0.5328049545288086, "loss": 0.49787667393684387, "lr": 2.9031660462448015e-05, "seqlen": 8192.0, "step_tflops": 42.34178130269882, "step_time": 0.4872514305114746} +{"epoch": 0, "iter": 21452, "iter_tflops": 38.177966461263594, "iter_time": 0.5403926773071289, "loss": 0.5488418340682983, "lr": 2.9014934136849183e-05, "seqlen": 8192.0, "step_tflops": 42.03717217383183, "step_time": 0.49078214454650876} +{"epoch": 0, "iter": 21453, "iter_tflops": 17.451535165223078, "iter_time": 1.182193618774414, "loss": 0.2610592544078827, "lr": 2.899806948950848e-05, "seqlen": 8192.0, "step_tflops": 18.91380905664531, "step_time": 1.0907952728271484} +{"epoch": 0, "iter": 21454, "iter_tflops": 19.55200050497884, "iter_time": 1.0551909255981444, "loss": 0.2774764597415924, "lr": 2.8981066686873162e-05, "seqlen": 8192.0, "step_tflops": 25.042255931682437, "step_time": 0.8238512363433836} +{"epoch": 0, "iter": 21455, "iter_tflops": 48.05177802222874, "iter_time": 0.429351303100586, "loss": 0.3302859961986542, "lr": 2.8963925896754036e-05, "seqlen": 8192.0, "step_tflops": 52.01828305652719, "step_time": 0.3966123504638672} +{"epoch": 0, "iter": 21456, "iter_tflops": 46.9878537368129, "iter_time": 0.43907290649414066, "loss": 0.25729498267173767, "lr": 2.894664728832377e-05, "seqlen": 8192.0, "step_tflops": 50.72983432131134, "step_time": 0.40668560791015623} +{"epoch": 0, "iter": 21457, "iter_tflops": 31.1023433580432, "iter_time": 0.6633292312622071, "loss": 0.4596398174762726, "lr": 2.892923103211526e-05, "seqlen": 8192.0, "step_tflops": 33.14637848662487, "step_time": 0.6224237594604493} +{"epoch": 0, "iter": 21458, "iter_tflops": 12.840664044850623, "iter_time": 1.6066998901367187, "loss": 0.6078147888183594, "lr": 2.8911677300019924e-05, "seqlen": 8192.0, "step_tflops": 17.47236370458289, "step_time": 1.1807843437194823} +{"epoch": 0, "iter": 21459, "iter_tflops": 37.14863238771153, "iter_time": 0.5553661651611328, "loss": 0.6135169267654419, "lr": 2.8893986265286e-05, "seqlen": 8192.0, "step_tflops": 40.62838459260731, "step_time": 0.5077999954223633} +{"epoch": 0, "iter": 21460, "iter_tflops": 39.94932891299063, "iter_time": 0.5164315414428711, "loss": 0.5633045434951782, "lr": 2.887615810251687e-05, "seqlen": 8192.0, "step_tflops": 43.61526750523972, "step_time": 0.47302457809448245} +{"epoch": 0, "iter": 21461, "iter_tflops": 16.590611532225438, "iter_time": 1.243540267944336, "loss": 0.4428193271160126, "lr": 2.8858192987669303e-05, "seqlen": 8192.0, "step_tflops": 17.56286494387364, "step_time": 1.1746997756958006} +{"epoch": 0, "iter": 21462, "iter_tflops": 15.458822061618427, "iter_time": 1.334583801269531, "loss": 0.3781217932701111, "lr": 2.8840091098051725e-05, "seqlen": 8192.0, "step_tflops": 19.53542979583965, "step_time": 1.05608597946167} +{"epoch": 0, "iter": 21463, "iter_tflops": 43.823118566060685, "iter_time": 0.4707810440063477, "loss": 0.3906424045562744, "lr": 2.8821852612322477e-05, "seqlen": 8192.0, "step_tflops": 47.25753604241205, "step_time": 0.43656727027893066} +{"epoch": 0, "iter": 21464, "iter_tflops": 49.07297958875474, "iter_time": 0.4204165649414062, "loss": 0.5190625190734863, "lr": 2.8803477710488058e-05, "seqlen": 8192.0, "step_tflops": 52.71864598625404, "step_time": 0.39134338760375975} +{"epoch": 0, "iter": 21465, "iter_tflops": 26.819352079319966, "iter_time": 0.7692614440917968, "loss": 0.023623595014214516, "lr": 2.878496657390132e-05, "seqlen": 8192.0, "step_tflops": 28.229998396200035, "step_time": 0.7308216323852539} +{"epoch": 0, "iter": 21466, "iter_tflops": 15.354476633411464, "iter_time": 1.3436533203124998, "loss": 0.019736209884285927, "lr": 2.8766319385259717e-05, "seqlen": 8192.0, "step_tflops": 19.49308636380215, "step_time": 1.0583800392150877} +{"epoch": 0, "iter": 21467, "iter_tflops": 42.9293502636225, "iter_time": 0.4805824775695801, "loss": 0.03597452491521835, "lr": 2.874753632860347e-05, "seqlen": 8192.0, "step_tflops": 47.33967473961515, "step_time": 0.43580978584289554} +{"epoch": 0, "iter": 21468, "iter_tflops": 47.40150045450087, "iter_time": 0.4352413597106934, "loss": 0.03500764071941376, "lr": 2.8728617589313762e-05, "seqlen": 8192.0, "step_tflops": 52.20708451927689, "step_time": 0.39517804336547857} +{"epoch": 0, "iter": 21469, "iter_tflops": 26.183130031990125, "iter_time": 0.5241988830566406, "loss": 0.03959708660840988, "lr": 2.870956335411092e-05, "seqlen": 5504.0, "step_tflops": 29.25271429406044, "step_time": 0.4691929569244385} +{"epoch": 0, "iter": 21470, "iter_tflops": 27.88788581913103, "iter_time": 0.4921551818847656, "loss": 0.021472716704010963, "lr": 2.8690373811052545e-05, "seqlen": 5504.0, "step_tflops": 30.847323588152747, "step_time": 0.4449386825561524} +{"epoch": 0, "iter": 21471, "iter_tflops": 29.90563086248419, "iter_time": 0.45894927215576176, "loss": 0.05063531547784805, "lr": 2.8671049149531677e-05, "seqlen": 5504.0, "step_tflops": 33.10044899647361, "step_time": 0.4146520042419434} +{"epoch": 0, "iter": 21472, "iter_tflops": 30.872873286247742, "iter_time": 0.44457046127319333, "loss": 0.017629524692893028, "lr": 2.8651589560274936e-05, "seqlen": 5504.0, "step_tflops": 34.057705405818886, "step_time": 0.40299742317199705} +{"epoch": 0, "iter": 21473, "iter_tflops": 21.921539329020213, "iter_time": 0.941133430480957, "loss": 0.5030578970909119, "lr": 2.8631995235340606e-05, "seqlen": 8192.0, "step_tflops": 23.677130975757123, "step_time": 0.87135107421875} +{"epoch": 0, "iter": 21474, "iter_tflops": 36.87345331058562, "iter_time": 0.5595107498168945, "loss": 0.6403180360794067, "lr": 2.8612266368116755e-05, "seqlen": 8192.0, "step_tflops": 40.16782419318995, "step_time": 0.5136223812103271} +{"epoch": 0, "iter": 21475, "iter_tflops": 36.13153689823689, "iter_time": 0.5709996109008788, "loss": 0.5516971349716187, "lr": 2.859240315331935e-05, "seqlen": 8192.0, "step_tflops": 39.33519091349311, "step_time": 0.524494556427002} +{"epoch": 0, "iter": 21476, "iter_tflops": 35.33260670414371, "iter_time": 0.5839108810424806, "loss": 0.6475228071212769, "lr": 2.8572405786990293e-05, "seqlen": 8192.0, "step_tflops": 38.33040238691118, "step_time": 0.5382435932159424} +{"epoch": 0, "iter": 21477, "iter_tflops": 27.67375592671616, "iter_time": 0.7455111465454103, "loss": 0.499584823846817, "lr": 2.8552274466495525e-05, "seqlen": 8192.0, "step_tflops": 29.917467800494236, "step_time": 0.6896002578735351} +{"epoch": 0, "iter": 21478, "iter_tflops": 12.223784288117121, "iter_time": 1.6877828521728517, "loss": 0.5558403134346008, "lr": 2.853200939052304e-05, "seqlen": 8192.0, "step_tflops": 13.671459903387863, "step_time": 1.5090629425048827} +{"epoch": 0, "iter": 21479, "iter_tflops": 36.108392750607926, "iter_time": 0.5713656005859374, "loss": 0.4645920693874359, "lr": 2.851161075908097e-05, "seqlen": 8192.0, "step_tflops": 39.82232331723247, "step_time": 0.5180785999298096} +{"epoch": 0, "iter": 21480, "iter_tflops": 40.81064515446467, "iter_time": 0.5055321578979491, "loss": 0.5123162865638733, "lr": 2.8491078773495568e-05, "seqlen": 8192.0, "step_tflops": 44.46770474868587, "step_time": 0.4639567890167236} +{"epoch": 0, "iter": 21481, "iter_tflops": 13.008076907863144, "iter_time": 1.1177915191650392, "loss": 0.2060609608888626, "lr": 2.8470413636409233e-05, "seqlen": 5824.0, "step_tflops": 13.787500652693677, "step_time": 1.0546014404296875} +{"epoch": 0, "iter": 21482, "iter_tflops": 15.636629854004338, "iter_time": 0.9298882293701172, "loss": 0.18384785950183868, "lr": 2.844961555177854e-05, "seqlen": 5824.0, "step_tflops": 18.95932237008243, "step_time": 0.7669218215942383} +{"epoch": 0, "iter": 21483, "iter_tflops": 24.56595227350963, "iter_time": 0.5918890457153321, "loss": 0.19136275351047516, "lr": 2.8428684724872176e-05, "seqlen": 5824.0, "step_tflops": 26.123069974791935, "step_time": 0.5566083183288575} +{"epoch": 0, "iter": 21484, "iter_tflops": 26.19951317889459, "iter_time": 0.5549842834472657, "loss": 0.1949853152036667, "lr": 2.8407621362268957e-05, "seqlen": 5824.0, "step_tflops": 27.855852483021774, "step_time": 0.5219843139648437} +{"epoch": 0, "iter": 21485, "iter_tflops": 44.42060334951768, "iter_time": 0.46444874572753914, "loss": 0.2275014966726303, "lr": 2.8386425671855768e-05, "seqlen": 8192.0, "step_tflops": 48.567443300899754, "step_time": 0.42479266166687013} +{"epoch": 0, "iter": 21486, "iter_tflops": 43.49209738450427, "iter_time": 0.47436418914794914, "loss": 0.23443350195884705, "lr": 2.8365097862825516e-05, "seqlen": 8192.0, "step_tflops": 47.81136856703766, "step_time": 0.43151020622253417} +{"epoch": 0, "iter": 21487, "iter_tflops": 47.59678441972668, "iter_time": 0.43345561599731447, "loss": 0.27071720361709595, "lr": 2.8343638145675072e-05, "seqlen": 8192.0, "step_tflops": 51.77849509114427, "step_time": 0.3984490756988525} +{"epoch": 0, "iter": 21488, "iter_tflops": 49.158143721843985, "iter_time": 0.41968821334838863, "loss": 0.25861772894859314, "lr": 2.832204673220317e-05, "seqlen": 8192.0, "step_tflops": 53.3196116062154, "step_time": 0.38693255424499506} +{"epoch": 0, "iter": 21489, "iter_tflops": 51.28834662032466, "iter_time": 0.40225694274902346, "loss": 0.0025037697050720453, "lr": 2.830032383550835e-05, "seqlen": 8192.0, "step_tflops": 56.61779268768416, "step_time": 0.36439240264892575} +{"epoch": 0, "iter": 21490, "iter_tflops": 49.190081870696645, "iter_time": 0.4194157180786133, "loss": 0.004354032687842846, "lr": 2.827846966998683e-05, "seqlen": 8192.0, "step_tflops": 54.03214014470454, "step_time": 0.3818300266265869} +{"epoch": 0, "iter": 21491, "iter_tflops": 56.67792487929889, "iter_time": 0.364005802154541, "loss": 0.011852127499878407, "lr": 2.8256484451330406e-05, "seqlen": 8192.0, "step_tflops": 61.985806543154204, "step_time": 0.33283576774597173} +{"epoch": 0, "iter": 21492, "iter_tflops": 58.285748156254954, "iter_time": 0.3539646339416504, "loss": 0.0045968093909323215, "lr": 2.8234368396524303e-05, "seqlen": 8192.0, "step_tflops": 63.539382687599755, "step_time": 0.3246977329254151} +{"epoch": 0, "iter": 21493, "iter_tflops": 23.13009136706655, "iter_time": 0.8919590148925782, "loss": 0.049262888729572296, "lr": 2.8212121723845054e-05, "seqlen": 8192.0, "step_tflops": 24.31781592714475, "step_time": 0.8483941802978515} +{"epoch": 0, "iter": 21494, "iter_tflops": 26.283652556273413, "iter_time": 0.7849401245117188, "loss": 0.01500933337956667, "lr": 2.818974465285834e-05, "seqlen": 8192.0, "step_tflops": 33.678700303900925, "step_time": 0.6125857982635499} +{"epoch": 0, "iter": 21495, "iter_tflops": 59.70361079366447, "iter_time": 0.3455585556030274, "loss": 0.0302264541387558, "lr": 2.8167237404416827e-05, "seqlen": 8192.0, "step_tflops": 65.4582082807846, "step_time": 0.3151796245574951} +{"epoch": 0, "iter": 21496, "iter_tflops": 56.269526212207225, "iter_time": 0.36664772033691406, "loss": 0.022942936047911644, "lr": 2.8144600200657953e-05, "seqlen": 8192.0, "step_tflops": 61.22091782557431, "step_time": 0.33699418830871586} +{"epoch": 0, "iter": 21497, "iter_tflops": 26.056123953902674, "iter_time": 0.7917944183349609, "loss": 0.40468651056289673, "lr": 2.8121833265001792e-05, "seqlen": 8192.0, "step_tflops": 27.417279839454437, "step_time": 0.7524850616455078} +{"epoch": 0, "iter": 21498, "iter_tflops": 13.96629636992323, "iter_time": 1.4772057647705077, "loss": 0.44898203015327454, "lr": 2.8098936822148805e-05, "seqlen": 8192.0, "step_tflops": 16.614359523100724, "step_time": 1.241762794494629} +{"epoch": 0, "iter": 21499, "iter_tflops": 43.69798424756835, "iter_time": 0.47212918090820316, "loss": 0.5539473295211792, "lr": 2.8075911098077637e-05, "seqlen": 8192.0, "step_tflops": 47.138911812687994, "step_time": 0.4376658840179443} +{"epoch": 0, "iter": 21500, "iter_tflops": 45.52138396079639, "iter_time": 0.45321762466430665, "loss": 0.41480752825737, "lr": 2.8052756320042885e-05, "seqlen": 8192.0, "step_tflops": 49.10021043691912, "step_time": 0.42018340301513674} +{"epoch": 0, "iter": 21501, "iter_tflops": 22.075546909674966, "iter_time": 0.9345677185058594, "loss": 0.08822637796401978, "lr": 2.8029472716572872e-05, "seqlen": 8192.0, "step_tflops": 23.17582418440452, "step_time": 0.8901989135742188} +{"epoch": 0, "iter": 21502, "iter_tflops": 16.31446752643528, "iter_time": 1.2645888366699218, "loss": 0.09302526712417603, "lr": 2.8006060517467352e-05, "seqlen": 8192.0, "step_tflops": 23.156201026422686, "step_time": 0.8909532909393311} +{"epoch": 0, "iter": 21503, "iter_tflops": 41.74316141282361, "iter_time": 0.494238883972168, "loss": 0.08861105889081955, "lr": 2.7982519953795284e-05, "seqlen": 8192.0, "step_tflops": 45.6348029325103, "step_time": 0.45209121513366696} +{"epoch": 0, "iter": 21504, "iter_tflops": 37.09303076928578, "iter_time": 0.5561986465454102, "loss": 0.09076575934886932, "lr": 2.7958851257892527e-05, "seqlen": 8192.0, "step_tflops": 40.15415377655954, "step_time": 0.5137972431182861} +{"epoch": 0, "iter": 21505, "iter_tflops": 3.8333632492832392, "iter_time": 0.6242807235717773, "loss": 0.005670356564223766, "lr": 2.7935054663359562e-05, "seqlen": 976.0, "step_tflops": 4.219258976264569, "step_time": 0.5671836681365967} +{"epoch": 0, "iter": 21506, "iter_tflops": 4.271454178978789, "iter_time": 0.5602529449462891, "loss": 0.02149065025150776, "lr": 2.7911130405059155e-05, "seqlen": 976.0, "step_tflops": 4.709625595994942, "step_time": 0.5081284561157227} +{"epoch": 0, "iter": 21507, "iter_tflops": 4.459597664992313, "iter_time": 0.5366167449951171, "loss": 0.015715453773736954, "lr": 2.788707871911409e-05, "seqlen": 976.0, "step_tflops": 4.838700161058452, "step_time": 0.49457389450073247} +{"epoch": 0, "iter": 21508, "iter_tflops": 4.561804418943012, "iter_time": 0.5245939025878907, "loss": 0.0049687097780406475, "lr": 2.7862899842904784e-05, "seqlen": 976.0, "step_tflops": 4.949936534544865, "step_time": 0.48345969009399414} +{"epoch": 0, "iter": 21509, "iter_tflops": 18.476529089796934, "iter_time": 1.1166108856201171, "loss": 0.6376233696937561, "lr": 2.7838594015066997e-05, "seqlen": 8192.0, "step_tflops": 20.012948658199033, "step_time": 1.0308872451782227} +{"epoch": 0, "iter": 21510, "iter_tflops": 21.33249733806406, "iter_time": 0.9671204071044922, "loss": 0.6132104992866516, "lr": 2.7814161475489428e-05, "seqlen": 8192.0, "step_tflops": 28.687442400718332, "step_time": 0.7191681022644043} +{"epoch": 0, "iter": 21511, "iter_tflops": 34.998401331662734, "iter_time": 0.5894867401123047, "loss": 0.3886224627494812, "lr": 2.7789602465311384e-05, "seqlen": 8192.0, "step_tflops": 37.7858984855136, "step_time": 0.545999813079834} +{"epoch": 0, "iter": 21512, "iter_tflops": 33.59384537935693, "iter_time": 0.6141331329345703, "loss": 0.5072025656700134, "lr": 2.776491722692038e-05, "seqlen": 8192.0, "step_tflops": 36.12936118275451, "step_time": 0.5710339965820312} +{"epoch": 0, "iter": 21513, "iter_tflops": 22.409358107283392, "iter_time": 0.9206463394165039, "loss": 0.19562548398971558, "lr": 2.7740106003949747e-05, "seqlen": 8192.0, "step_tflops": 23.820628863124007, "step_time": 0.8661019668579102} +{"epoch": 0, "iter": 21514, "iter_tflops": 9.824320677416317, "iter_time": 2.1000020446777343, "loss": 0.15762211382389069, "lr": 2.7715169041276248e-05, "seqlen": 8192.0, "step_tflops": 11.094460366884565, "step_time": 1.8595851287841794} +{"epoch": 0, "iter": 21515, "iter_tflops": 15.368606492473255, "iter_time": 1.34241796875, "loss": 0.24749913811683655, "lr": 2.7690106585017634e-05, "seqlen": 8192.0, "step_tflops": 18.121106093118634, "step_time": 1.1385118217468262} +{"epoch": 0, "iter": 21516, "iter_tflops": 37.11368402464395, "iter_time": 0.5558891296386719, "loss": 0.19949349761009216, "lr": 2.7664918882530227e-05, "seqlen": 8192.0, "step_tflops": 40.73582031401898, "step_time": 0.5064607353210449} +{"epoch": 0, "iter": 21517, "iter_tflops": 14.784434499186139, "iter_time": 1.0055783157348634, "loss": 0.24139423668384552, "lr": 2.7639606182406484e-05, "seqlen": 5952.0, "step_tflops": 15.62502077751192, "step_time": 0.9514807662963868} +{"epoch": 0, "iter": 21518, "iter_tflops": 11.712214818732528, "iter_time": 1.2693505859375, "loss": 0.2681756615638733, "lr": 2.7614168734472538e-05, "seqlen": 5952.0, "step_tflops": 14.885846583963776, "step_time": 0.9987276611328124} +{"epoch": 0, "iter": 21519, "iter_tflops": 25.656224839242814, "iter_time": 0.5794658737182617, "loss": 0.27597683668136597, "lr": 2.7588606789785723e-05, "seqlen": 5952.0, "step_tflops": 27.29318814388272, "step_time": 0.5447112541198731} +{"epoch": 0, "iter": 21520, "iter_tflops": 27.068727580115105, "iter_time": 0.5492281341552734, "loss": 0.2506045997142792, "lr": 2.7562920600632128e-05, "seqlen": 5952.0, "step_tflops": 28.81114594462776, "step_time": 0.516012336730957} +{"epoch": 0, "iter": 21521, "iter_tflops": 35.05128501256633, "iter_time": 0.5885973510742188, "loss": 0.47425344586372375, "lr": 2.7537110420524057e-05, "seqlen": 8192.0, "step_tflops": 37.51663325680859, "step_time": 0.5499185752868653} +{"epoch": 0, "iter": 21522, "iter_tflops": 12.296725383032548, "iter_time": 1.6777713470458984, "loss": 0.3740549087524414, "lr": 2.751117650419757e-05, "seqlen": 8192.0, "step_tflops": 14.774514250846956, "step_time": 1.3963974151611327} +{"epoch": 0, "iter": 21523, "iter_tflops": 38.75463621795744, "iter_time": 0.5323516235351562, "loss": 0.42540842294692993, "lr": 2.7485119107609953e-05, "seqlen": 8192.0, "step_tflops": 42.6255592162973, "step_time": 0.4840075740814209} +{"epoch": 0, "iter": 21524, "iter_tflops": 38.02199491923782, "iter_time": 0.5426094436645508, "loss": 0.4384467601776123, "lr": 2.745893848793719e-05, "seqlen": 8192.0, "step_tflops": 41.23162537401756, "step_time": 0.5003706092834472} +{"epoch": 0, "iter": 21525, "iter_tflops": 13.178071780767493, "iter_time": 1.1343542022705078, "loss": 0.30455899238586426, "lr": 2.7432634903571426e-05, "seqlen": 5984.0, "step_tflops": 13.870819622943609, "step_time": 1.0777013549804686} +{"epoch": 0, "iter": 21526, "iter_tflops": 11.992936605561821, "iter_time": 1.2464504394531248, "loss": 0.29565396904945374, "lr": 2.7406208614118427e-05, "seqlen": 5984.0, "step_tflops": 15.60544966112064, "step_time": 0.9579090270996093} +{"epoch": 0, "iter": 21527, "iter_tflops": 23.23955842802725, "iter_time": 0.6432394638061524, "loss": 0.18702882528305054, "lr": 2.7379659880394996e-05, "seqlen": 5984.0, "step_tflops": 25.072377656179135, "step_time": 0.5962179298400879} +{"epoch": 0, "iter": 21528, "iter_tflops": 23.261883143540192, "iter_time": 0.6426221389770508, "loss": 0.18439453840255737, "lr": 2.7352988964426412e-05, "seqlen": 5984.0, "step_tflops": 24.98887566304352, "step_time": 0.5982102317810059} +{"epoch": 0, "iter": 21529, "iter_tflops": 17.23655587420941, "iter_time": 1.196938278198242, "loss": 0.6413223743438721, "lr": 2.7326196129443855e-05, "seqlen": 8192.0, "step_tflops": 18.7515454975431, "step_time": 1.1002342987060547} +{"epoch": 0, "iter": 21530, "iter_tflops": 23.828022952878317, "iter_time": 0.8658332061767577, "loss": 0.5125089287757874, "lr": 2.7299281639881783e-05, "seqlen": 8192.0, "step_tflops": 29.02873352651672, "step_time": 0.7107128353118897} +{"epoch": 0, "iter": 21531, "iter_tflops": 38.42972631727098, "iter_time": 0.5368524703979491, "loss": 0.5365314483642578, "lr": 2.727224576137535e-05, "seqlen": 8192.0, "step_tflops": 41.89338658182982, "step_time": 0.4924665966033936} +{"epoch": 0, "iter": 21532, "iter_tflops": 39.16869736470676, "iter_time": 0.5267240142822265, "loss": 0.5118288397789001, "lr": 2.724508876075776e-05, "seqlen": 8192.0, "step_tflops": 42.231963023175936, "step_time": 0.48851845932006843} +{"epoch": 0, "iter": 21533, "iter_tflops": 19.339398400365127, "iter_time": 1.0667908630371095, "loss": 0.1294703632593155, "lr": 2.721781090605764e-05, "seqlen": 8192.0, "step_tflops": 20.65565440572885, "step_time": 0.9988109359741211} +{"epoch": 0, "iter": 21534, "iter_tflops": 22.415171479647935, "iter_time": 0.920407569885254, "loss": 0.1314774751663208, "lr": 2.719041246649641e-05, "seqlen": 8192.0, "step_tflops": 27.937450330533466, "step_time": 0.738474458694458} +{"epoch": 0, "iter": 21535, "iter_tflops": 50.359762768439154, "iter_time": 0.40967416000366214, "loss": 0.1809350699186325, "lr": 2.7162893712485604e-05, "seqlen": 8192.0, "step_tflops": 54.61590440284483, "step_time": 0.3777488212585449} +{"epoch": 0, "iter": 21536, "iter_tflops": 52.88637002573318, "iter_time": 0.3901022796630859, "loss": 0.12976239621639252, "lr": 2.7135254915624213e-05, "seqlen": 8192.0, "step_tflops": 57.56720766951587, "step_time": 0.3583827381134033} +{"epoch": 0, "iter": 21537, "iter_tflops": 30.23888816614081, "iter_time": 0.6822702407836915, "loss": 0.5248264074325562, "lr": 2.7107496348696004e-05, "seqlen": 8192.0, "step_tflops": 32.20808422712534, "step_time": 0.6405563697814941} +{"epoch": 0, "iter": 21538, "iter_tflops": 35.508871070910644, "iter_time": 0.5810123748779297, "loss": 0.5071062445640564, "lr": 2.707961828566683e-05, "seqlen": 8192.0, "step_tflops": 44.4988590935248, "step_time": 0.46363196563720704} +{"epoch": 0, "iter": 21539, "iter_tflops": 45.4437219222463, "iter_time": 0.4539921607971191, "loss": 0.6252972483634949, "lr": 2.7051621001681917e-05, "seqlen": 8192.0, "step_tflops": 48.89927617471359, "step_time": 0.4219099979400635} +{"epoch": 0, "iter": 21540, "iter_tflops": 50.2172285527467, "iter_time": 0.4108369598388672, "loss": 0.6945075988769531, "lr": 2.702350477306315e-05, "seqlen": 8192.0, "step_tflops": 53.932879824019516, "step_time": 0.3825327625274658} +{"epoch": 0, "iter": 21541, "iter_tflops": 32.15359231827262, "iter_time": 0.641641944885254, "loss": 0.04810159653425217, "lr": 2.699526987730636e-05, "seqlen": 8192.0, "step_tflops": 34.245205605084315, "step_time": 0.6024520263671874} +{"epoch": 0, "iter": 21542, "iter_tflops": 12.14296791010095, "iter_time": 1.6990157318115233, "loss": 0.011815977282822132, "lr": 2.6966916593078566e-05, "seqlen": 8192.0, "step_tflops": 16.417991277267593, "step_time": 1.2566149635314943} +{"epoch": 0, "iter": 21543, "iter_tflops": 38.874711133375804, "iter_time": 0.5307073135375976, "loss": 0.03448181971907616, "lr": 2.6938445200215237e-05, "seqlen": 8192.0, "step_tflops": 44.174933080583834, "step_time": 0.4670316867828369} +{"epoch": 0, "iter": 21544, "iter_tflops": 42.680757906773344, "iter_time": 0.4833816108703613, "loss": 0.03958357870578766, "lr": 2.6909855979717533e-05, "seqlen": 8192.0, "step_tflops": 46.945085906555, "step_time": 0.4394729099273682} +{"epoch": 0, "iter": 21545, "iter_tflops": 18.784378386662155, "iter_time": 1.0983112182617187, "loss": 0.17932847142219543, "lr": 2.6881149213749506e-05, "seqlen": 8192.0, "step_tflops": 20.290871852261912, "step_time": 1.0167672271728516} +{"epoch": 0, "iter": 21546, "iter_tflops": 24.082049000652688, "iter_time": 0.8567000885009765, "loss": 0.19851212203502655, "lr": 2.6852325185635357e-05, "seqlen": 8192.0, "step_tflops": 33.3538541473151, "step_time": 0.6185520095825195} +{"epoch": 0, "iter": 21547, "iter_tflops": 48.33778090305291, "iter_time": 0.4268109359741211, "loss": 0.13074277341365814, "lr": 2.6823384179856602e-05, "seqlen": 8192.0, "step_tflops": 51.99872980868005, "step_time": 0.39676148986816406} +{"epoch": 0, "iter": 21548, "iter_tflops": 48.5003072802293, "iter_time": 0.42538067626953124, "loss": 0.13325680792331696, "lr": 2.6794326482049282e-05, "seqlen": 8192.0, "step_tflops": 52.51699182207222, "step_time": 0.3928460636138916} +{"epoch": 0, "iter": 21549, "iter_tflops": 33.301938761816125, "iter_time": 0.6195162887573242, "loss": 0.01782715693116188, "lr": 2.676515237900115e-05, "seqlen": 8192.0, "step_tflops": 35.76683325037294, "step_time": 0.576821922302246} +{"epoch": 0, "iter": 21550, "iter_tflops": 13.442772264722144, "iter_time": 1.5347350311279295, "loss": 0.04944229498505592, "lr": 2.673586215864882e-05, "seqlen": 8192.0, "step_tflops": 16.792301298178113, "step_time": 1.228604295730591} +{"epoch": 0, "iter": 21551, "iter_tflops": 51.92433540917221, "iter_time": 0.397329948425293, "loss": 0.027778906747698784, "lr": 2.6706456110074946e-05, "seqlen": 8192.0, "step_tflops": 56.80660959813915, "step_time": 0.3631812152862549} +{"epoch": 0, "iter": 21552, "iter_tflops": 56.0856060147618, "iter_time": 0.3678500595092773, "loss": 0.0348995141685009, "lr": 2.6676934523505354e-05, "seqlen": 8192.0, "step_tflops": 61.45770120655752, "step_time": 0.33569582176208496} +{"epoch": 0, "iter": 21553, "iter_tflops": 43.97906460069996, "iter_time": 0.4691116943359375, "loss": 0.012667161412537098, "lr": 2.664729769030618e-05, "seqlen": 8192.0, "step_tflops": 48.27938915126691, "step_time": 0.42732714462280275} +{"epoch": 0, "iter": 21554, "iter_tflops": 39.79634486373975, "iter_time": 0.5184167938232421, "loss": 0.018947219476103783, "lr": 2.6617545902981005e-05, "seqlen": 8192.0, "step_tflops": 44.679159281109825, "step_time": 0.46176100540161136} +{"epoch": 0, "iter": 21555, "iter_tflops": 53.793799708583535, "iter_time": 0.3835217742919922, "loss": 0.019279997795820236, "lr": 2.6587679455167966e-05, "seqlen": 8192.0, "step_tflops": 58.94410746069475, "step_time": 0.35001112747192387} +{"epoch": 0, "iter": 21556, "iter_tflops": 53.33942272894935, "iter_time": 0.3867888412475586, "loss": 0.02543475292623043, "lr": 2.655769864163684e-05, "seqlen": 8192.0, "step_tflops": 58.383816164849804, "step_time": 0.3533700752258301} +{"epoch": 0, "iter": 21557, "iter_tflops": 22.01360006173863, "iter_time": 0.9371976165771484, "loss": 0.4657524824142456, "lr": 2.652760375828615e-05, "seqlen": 8192.0, "step_tflops": 22.96797959382149, "step_time": 0.8982546081542969} +{"epoch": 0, "iter": 21558, "iter_tflops": 12.58786251655165, "iter_time": 1.6389671783447266, "loss": 0.5351840853691101, "lr": 2.649739510214025e-05, "seqlen": 8192.0, "step_tflops": 16.809982531817116, "step_time": 1.2273120136260987} +{"epoch": 0, "iter": 21559, "iter_tflops": 40.93475860830193, "iter_time": 0.5039993934631348, "loss": 0.5363261699676514, "lr": 2.6467072971346375e-05, "seqlen": 8192.0, "step_tflops": 44.66142261119304, "step_time": 0.46194438743591315} +{"epoch": 0, "iter": 21560, "iter_tflops": 43.67170518337936, "iter_time": 0.47241328048706055, "loss": 0.4601445198059082, "lr": 2.643663766517172e-05, "seqlen": 8192.0, "step_tflops": 47.39527716097034, "step_time": 0.43529850959777827} +{"epoch": 0, "iter": 21561, "iter_tflops": 21.174487809970717, "iter_time": 0.9743373107910156, "loss": 0.06824963539838791, "lr": 2.6406089484000465e-05, "seqlen": 8192.0, "step_tflops": 22.466398699923168, "step_time": 0.9183088836669921} +{"epoch": 0, "iter": 21562, "iter_tflops": 14.13281706437513, "iter_time": 1.459800506591797, "loss": 0.05823024362325668, "lr": 2.637542872933083e-05, "seqlen": 8192.0, "step_tflops": 19.50140553639346, "step_time": 1.0579285411834716} +{"epoch": 0, "iter": 21563, "iter_tflops": 51.61966106449502, "iter_time": 0.399675106048584, "loss": 0.048668619245290756, "lr": 2.634465570377208e-05, "seqlen": 8192.0, "step_tflops": 56.35535519330691, "step_time": 0.36608931732177735} +{"epoch": 0, "iter": 21564, "iter_tflops": 51.18359302788248, "iter_time": 0.40308021163940433, "loss": 0.034730568528175354, "lr": 2.6313770711041557e-05, "seqlen": 8192.0, "step_tflops": 55.36680894313126, "step_time": 0.37262565612792964} +{"epoch": 0, "iter": 21565, "iter_tflops": 29.53224109522102, "iter_time": 0.4440923805236816, "loss": 0.0026615241076797247, "lr": 2.6282774055961672e-05, "seqlen": 5264.0, "step_tflops": 32.40005584083288, "step_time": 0.4047845878601074} +{"epoch": 0, "iter": 21566, "iter_tflops": 25.43338426358126, "iter_time": 0.5156625289916993, "loss": 0.004437687341123819, "lr": 2.6251666044456895e-05, "seqlen": 5264.0, "step_tflops": 28.75030259701397, "step_time": 0.45617061614990234} +{"epoch": 0, "iter": 21567, "iter_tflops": 27.685931763936924, "iter_time": 0.4737078514099122, "loss": 0.0008860334637574852, "lr": 2.6220446983550738e-05, "seqlen": 5264.0, "step_tflops": 30.778320446082823, "step_time": 0.4261130256652832} +{"epoch": 0, "iter": 21568, "iter_tflops": 28.26281401475929, "iter_time": 0.46403883361816406, "loss": 0.003658997593447566, "lr": 2.6189117181362733e-05, "seqlen": 5264.0, "step_tflops": 31.35458097319101, "step_time": 0.4182815666198731} +{"epoch": 0, "iter": 21569, "iter_tflops": 17.03259457127983, "iter_time": 1.2112713317871093, "loss": 0.04548273980617523, "lr": 2.6157676947105377e-05, "seqlen": 8192.0, "step_tflops": 18.172342270883398, "step_time": 1.1353018341064454} +{"epoch": 0, "iter": 21570, "iter_tflops": 14.150296257380436, "iter_time": 1.4579972839355468, "loss": 0.07233128696680069, "lr": 2.612612659108109e-05, "seqlen": 8192.0, "step_tflops": 18.287019995769754, "step_time": 1.1281823673248292} +{"epoch": 0, "iter": 21571, "iter_tflops": 52.59845048420453, "iter_time": 0.3922376670837402, "loss": 0.08357895910739899, "lr": 2.6094466424679148e-05, "seqlen": 8192.0, "step_tflops": 57.15448036771034, "step_time": 0.36097071266174313} +{"epoch": 0, "iter": 21572, "iter_tflops": 49.01595484571535, "iter_time": 0.4209056739807129, "loss": 0.09629317373037338, "lr": 2.606269676037261e-05, "seqlen": 8192.0, "step_tflops": 53.35201850294094, "step_time": 0.38669752502441407} +{"epoch": 0, "iter": 21573, "iter_tflops": 26.779559871889067, "iter_time": 0.7704045028686524, "loss": 0.4288974106311798, "lr": 2.603081791171524e-05, "seqlen": 8192.0, "step_tflops": 28.254236095443513, "step_time": 0.7301947021484375} +{"epoch": 0, "iter": 21574, "iter_tflops": 9.035273242443091, "iter_time": 2.28339453125, "loss": 0.4187479317188263, "lr": 2.5998830193338402e-05, "seqlen": 8192.0, "step_tflops": 10.895447408896532, "step_time": 1.8935517501831054} +{"epoch": 0, "iter": 21575, "iter_tflops": 8.973913662190167, "iter_time": 2.299007354736328, "loss": 0.5815092921257019, "lr": 2.5966733920947963e-05, "seqlen": 8192.0, "step_tflops": 11.396490228437527, "step_time": 1.8103023910522458} +{"epoch": 0, "iter": 21576, "iter_tflops": 34.11198737871996, "iter_time": 0.6048047943115235, "loss": 0.43310049176216125, "lr": 2.5934529411321174e-05, "seqlen": 8192.0, "step_tflops": 37.02169746749684, "step_time": 0.5572703285217285} +{"epoch": 0, "iter": 21577, "iter_tflops": 11.241514963475904, "iter_time": 1.2753043975830078, "loss": 0.15571282804012299, "lr": 2.5902216982303544e-05, "seqlen": 5744.0, "step_tflops": 12.029679804035188, "step_time": 1.191748550415039} +{"epoch": 0, "iter": 21578, "iter_tflops": 10.89386729898833, "iter_time": 1.3160022125244142, "loss": 0.25144243240356445, "lr": 2.5869796952805702e-05, "seqlen": 5744.0, "step_tflops": 13.464019721082696, "step_time": 1.0647899932861329} +{"epoch": 0, "iter": 21579, "iter_tflops": 21.874737928877366, "iter_time": 0.6553840103149415, "loss": 0.2024608999490738, "lr": 2.583726964280025e-05, "seqlen": 5744.0, "step_tflops": 23.599404174685642, "step_time": 0.6074879417419433} +{"epoch": 0, "iter": 21580, "iter_tflops": 22.99847590821202, "iter_time": 0.6233610229492187, "loss": 0.21680957078933716, "lr": 2.5804635373318604e-05, "seqlen": 5744.0, "step_tflops": 24.65578061962506, "step_time": 0.5814601325988769} +{"epoch": 0, "iter": 21581, "iter_tflops": 1.328750533883016, "iter_time": 1.0022124099731446, "loss": 0.7232904434204102, "lr": 2.5771894466447834e-05, "seqlen": 544.0, "step_tflops": 1.4059599549741346, "step_time": 0.9471751098632812} +{"epoch": 0, "iter": 21582, "iter_tflops": 0.7400748378809697, "iter_time": 1.7993994750976563, "loss": 0.6463519930839539, "lr": 2.5739047245327468e-05, "seqlen": 544.0, "step_tflops": 0.8637093164774793, "step_time": 1.541826919555664} +{"epoch": 0, "iter": 21583, "iter_tflops": 1.1604129297581796, "iter_time": 1.147600341796875, "loss": 0.5645241141319275, "lr": 2.570609403414632e-05, "seqlen": 544.0, "step_tflops": 1.2732217348807848, "step_time": 1.0459217262268066} +{"epoch": 0, "iter": 21584, "iter_tflops": 2.275738931973219, "iter_time": 0.5851682968139649, "loss": 0.31959134340286255, "lr": 2.5673035158139285e-05, "seqlen": 544.0, "step_tflops": 2.8052624127523353, "step_time": 0.47471148109436034} +{"epoch": 0, "iter": 21585, "iter_tflops": 20.115643839081255, "iter_time": 0.7756567764282227, "loss": 0.18436285853385925, "lr": 2.5639870943584108e-05, "seqlen": 6240.0, "step_tflops": 21.266450735950226, "step_time": 0.7336830978393556} +{"epoch": 0, "iter": 21586, "iter_tflops": 9.390791318848114, "iter_time": 1.6615037994384765, "loss": 0.21800820529460907, "lr": 2.5606601717798212e-05, "seqlen": 6240.0, "step_tflops": 11.4485148499095, "step_time": 1.3628698272705078} +{"epoch": 0, "iter": 21587, "iter_tflops": 28.441468397992153, "iter_time": 0.5485945816040039, "loss": 0.10588368773460388, "lr": 2.557322780913542e-05, "seqlen": 6240.0, "step_tflops": 30.33063514267177, "step_time": 0.514424949645996} +{"epoch": 0, "iter": 21588, "iter_tflops": 26.86242827885434, "iter_time": 0.5808423309326172, "loss": 0.25894036889076233, "lr": 2.553974954698274e-05, "seqlen": 6240.0, "step_tflops": 28.463613725740434, "step_time": 0.5481677627563477} +{"epoch": 0, "iter": 21589, "iter_tflops": 23.206924167008022, "iter_time": 0.8890059432983399, "loss": 0.029043855145573616, "lr": 2.5506167261757097e-05, "seqlen": 8192.0, "step_tflops": 24.438765493273454, "step_time": 0.8441954040527345} +{"epoch": 0, "iter": 21590, "iter_tflops": 14.71557912435925, "iter_time": 1.4019899139404297, "loss": 0.01497188862413168, "lr": 2.5472481284902092e-05, "seqlen": 8192.0, "step_tflops": 16.83163937411877, "step_time": 1.225732862472534} +{"epoch": 0, "iter": 21591, "iter_tflops": 43.406197937976586, "iter_time": 0.4753029403686524, "loss": 0.00972055271267891, "lr": 2.5438691948884715e-05, "seqlen": 8192.0, "step_tflops": 47.873237416886134, "step_time": 0.4309525451660156} +{"epoch": 0, "iter": 21592, "iter_tflops": 44.30372501704435, "iter_time": 0.46567401504516603, "loss": 0.036441072821617126, "lr": 2.5404799587192074e-05, "seqlen": 8192.0, "step_tflops": 48.78424120027584, "step_time": 0.42290487670898436} +{"epoch": 0, "iter": 21593, "iter_tflops": 17.11315448935044, "iter_time": 1.2055692901611326, "loss": 0.12742272019386292, "lr": 2.5370804534328097e-05, "seqlen": 8192.0, "step_tflops": 18.15122650360659, "step_time": 1.13662255859375} +{"epoch": 0, "iter": 21594, "iter_tflops": 17.025016923671803, "iter_time": 1.2118104553222657, "loss": 0.17337442934513092, "lr": 2.5336707125810227e-05, "seqlen": 8192.0, "step_tflops": 22.226554800873423, "step_time": 0.9282182369232177} +{"epoch": 0, "iter": 21595, "iter_tflops": 50.86641332945353, "iter_time": 0.40559363555908207, "loss": 0.13966655731201172, "lr": 2.5302507698166127e-05, "seqlen": 8192.0, "step_tflops": 55.24109873800332, "step_time": 0.3734736270904541} +{"epoch": 0, "iter": 21596, "iter_tflops": 49.15327480265152, "iter_time": 0.4197297859191894, "loss": 0.13516448438167572, "lr": 2.5268206588930332e-05, "seqlen": 8192.0, "step_tflops": 53.717530893472144, "step_time": 0.38406630325317387} +{"epoch": 0, "iter": 21597, "iter_tflops": 37.507797614108604, "iter_time": 0.5500481185913085, "loss": 0.08833131194114685, "lr": 2.523380413664095e-05, "seqlen": 8192.0, "step_tflops": 40.22861856591451, "step_time": 0.5128461837768554} +{"epoch": 0, "iter": 21598, "iter_tflops": 13.685543159031324, "iter_time": 1.507510025024414, "loss": 0.1726037859916687, "lr": 2.519930068083629e-05, "seqlen": 8192.0, "step_tflops": 16.758611789897564, "step_time": 1.23107413482666} +{"epoch": 0, "iter": 21599, "iter_tflops": 11.73962389529674, "iter_time": 1.7573896484374998, "loss": 0.12658217549324036, "lr": 2.5164696562051537e-05, "seqlen": 8192.0, "step_tflops": 13.529499371239634, "step_time": 1.5248970375061035} +{"epoch": 0, "iter": 21600, "iter_tflops": 28.111572948573308, "iter_time": 0.7339003601074219, "loss": 0.1393408626317978, "lr": 2.5129992121815365e-05, "seqlen": 8192.0, "step_tflops": 31.601098719531432, "step_time": 0.6528600063323975} +{"epoch": 0, "iter": 21601, "iter_tflops": 13.747542295803257, "iter_time": 1.0903370971679687, "loss": 0.25326454639434814, "lr": 2.50951877026466e-05, "seqlen": 6000.0, "step_tflops": 14.733025385096099, "step_time": 1.0174051132202149} +{"epoch": 0, "iter": 21602, "iter_tflops": 12.225895088041906, "iter_time": 1.2260415496826171, "loss": 0.2665554881095886, "lr": 2.5060283648050798e-05, "seqlen": 6000.0, "step_tflops": 14.416760408883887, "step_time": 1.0397242469787598} +{"epoch": 0, "iter": 21603, "iter_tflops": 27.057246263827388, "iter_time": 0.5539904251098633, "loss": 0.17864978313446045, "lr": 2.5025280302516897e-05, "seqlen": 6000.0, "step_tflops": 28.758708547520804, "step_time": 0.5212144813537598} +{"epoch": 0, "iter": 21604, "iter_tflops": 27.89475029729045, "iter_time": 0.5373575744628907, "loss": 0.1114901676774025, "lr": 2.4990178011513776e-05, "seqlen": 6000.0, "step_tflops": 29.64667958599416, "step_time": 0.5056031761169434} +{"epoch": 0, "iter": 21605, "iter_tflops": 40.988112478537296, "iter_time": 0.5033433418273925, "loss": 0.24104292690753937, "lr": 2.495497712148688e-05, "seqlen": 8192.0, "step_tflops": 44.678146853962346, "step_time": 0.4617714691162109} +{"epoch": 0, "iter": 21606, "iter_tflops": 33.99813499693137, "iter_time": 0.6068301544189452, "loss": 0.24104373157024384, "lr": 2.491967797985478e-05, "seqlen": 8192.0, "step_tflops": 37.638524649340276, "step_time": 0.5481376781463624} +{"epoch": 0, "iter": 21607, "iter_tflops": 36.885236678245526, "iter_time": 0.5593320083618165, "loss": 0.2103811502456665, "lr": 2.4884280935005755e-05, "seqlen": 8192.0, "step_tflops": 39.99324885191109, "step_time": 0.5158644046783447} +{"epoch": 0, "iter": 21608, "iter_tflops": 40.78505674591775, "iter_time": 0.5058493270874023, "loss": 0.2844407558441162, "lr": 2.4848786336294347e-05, "seqlen": 8192.0, "step_tflops": 44.513874582490885, "step_time": 0.4634755725860596} +{"epoch": 0, "iter": 21609, "iter_tflops": 26.869963591563195, "iter_time": 0.7678124847412109, "loss": 0.5502508878707886, "lr": 2.4813194534037917e-05, "seqlen": 8192.0, "step_tflops": 29.143514647015188, "step_time": 0.7079137077331542} +{"epoch": 0, "iter": 21610, "iter_tflops": 9.45394917564861, "iter_time": 2.1822725219726564, "loss": 0.5614202618598938, "lr": 2.4777505879513183e-05, "seqlen": 8192.0, "step_tflops": 10.34223866603474, "step_time": 1.9948382720947264} +{"epoch": 0, "iter": 21611, "iter_tflops": 15.067992715017365, "iter_time": 1.3691998596191406, "loss": 0.542905330657959, "lr": 2.4741720724952754e-05, "seqlen": 8192.0, "step_tflops": 20.07888674632148, "step_time": 1.0275018615722655} +{"epoch": 0, "iter": 21612, "iter_tflops": 44.63179344353189, "iter_time": 0.4622510528564453, "loss": 0.41343846917152405, "lr": 2.4705839423541668e-05, "seqlen": 8192.0, "step_tflops": 48.03017461734421, "step_time": 0.42954442024230954} +{"epoch": 0, "iter": 21613, "iter_tflops": 23.03531300640974, "iter_time": 0.7325210647583008, "loss": 0.3207707107067108, "lr": 2.466986232941387e-05, "seqlen": 6736.0, "step_tflops": 24.42921087061987, "step_time": 0.6907243995666504} +{"epoch": 0, "iter": 21614, "iter_tflops": 24.621153449443398, "iter_time": 0.6853396224975585, "loss": 0.1385663002729416, "lr": 2.4633789797648766e-05, "seqlen": 6736.0, "step_tflops": 29.072094465493677, "step_time": 0.580414047241211} +{"epoch": 0, "iter": 21615, "iter_tflops": 26.615698040205903, "iter_time": 0.6339811935424805, "loss": 0.17980638146400452, "lr": 2.4597622184267677e-05, "seqlen": 6736.0, "step_tflops": 28.471359349386663, "step_time": 0.5926605682373047} +{"epoch": 0, "iter": 21616, "iter_tflops": 29.332077447816438, "iter_time": 0.5752695846557617, "loss": 0.19637112319469452, "lr": 2.4561359846230346e-05, "seqlen": 6736.0, "step_tflops": 31.03869854711387, "step_time": 0.5436391601562499} +{"epoch": 0, "iter": 21617, "iter_tflops": 22.710445372659233, "iter_time": 0.9084407272338866, "loss": 0.6650761961936951, "lr": 2.4525003141431413e-05, "seqlen": 8192.0, "step_tflops": 23.886917423078245, "step_time": 0.8636984481811523} +{"epoch": 0, "iter": 21618, "iter_tflops": 14.058781538895987, "iter_time": 1.467488021850586, "loss": 0.5069460272789001, "lr": 2.448855242869687e-05, "seqlen": 8192.0, "step_tflops": 16.5779656359589, "step_time": 1.2444888572692874} +{"epoch": 0, "iter": 21619, "iter_tflops": 32.72670080736167, "iter_time": 0.6304055404663086, "loss": 0.4511454403400421, "lr": 2.445200806778054e-05, "seqlen": 8192.0, "step_tflops": 35.52237671426256, "step_time": 0.580791473388672} +{"epoch": 0, "iter": 21620, "iter_tflops": 33.65032621369467, "iter_time": 0.6131023330688478, "loss": 0.6281953454017639, "lr": 2.4415370419360508e-05, "seqlen": 8192.0, "step_tflops": 36.50511307954965, "step_time": 0.5651562690734864} +{"epoch": 0, "iter": 21621, "iter_tflops": 26.815420799814575, "iter_time": 0.7693742218017579, "loss": 0.4218178689479828, "lr": 2.4378639845035583e-05, "seqlen": 8192.0, "step_tflops": 28.47818426930031, "step_time": 0.7244525604248048} +{"epoch": 0, "iter": 21622, "iter_tflops": 9.793112107943761, "iter_time": 2.106694305419922, "loss": 0.30128803849220276, "lr": 2.4341816707321688e-05, "seqlen": 8192.0, "step_tflops": 11.126512259742778, "step_time": 1.8542282638549805} +{"epoch": 0, "iter": 21623, "iter_tflops": 10.74539706983222, "iter_time": 1.919993591308594, "loss": 0.4291263818740845, "lr": 2.4304901369648344e-05, "seqlen": 8192.0, "step_tflops": 15.221025055005567, "step_time": 1.3554339103698732} +{"epoch": 0, "iter": 21624, "iter_tflops": 36.79467380131574, "iter_time": 0.5607086944580078, "loss": 0.5257352590560913, "lr": 2.4267894196355016e-05, "seqlen": 8192.0, "step_tflops": 39.783839141332635, "step_time": 0.5185797538757324} +{"epoch": 0, "iter": 21625, "iter_tflops": 16.990473067615753, "iter_time": 0.8461881484985352, "loss": 0.19928549230098724, "lr": 2.4230795552687573e-05, "seqlen": 5760.0, "step_tflops": 18.334400707085297, "step_time": 0.7841618156433104} +{"epoch": 0, "iter": 21626, "iter_tflops": 19.457770174419217, "iter_time": 0.7388892364501954, "loss": 0.10429128259420395, "lr": 2.419360580479465e-05, "seqlen": 5760.0, "step_tflops": 21.007689743841883, "step_time": 0.6843749656677247} +{"epoch": 0, "iter": 21627, "iter_tflops": 22.41737176996562, "iter_time": 0.6413390960693359, "loss": 0.209535151720047, "lr": 2.4156325319724037e-05, "seqlen": 5760.0, "step_tflops": 24.134811158321654, "step_time": 0.5957012405395508} +{"epoch": 0, "iter": 21628, "iter_tflops": 21.879161144330535, "iter_time": 0.6571155471801757, "loss": 0.23598314821720123, "lr": 2.4118954465419083e-05, "seqlen": 5760.0, "step_tflops": 23.54641799638526, "step_time": 0.6105870094299317} +{"epoch": 0, "iter": 21629, "iter_tflops": 15.707370097313152, "iter_time": 1.3134658050537111, "loss": 0.3260825574398041, "lr": 2.408149361071502e-05, "seqlen": 8192.0, "step_tflops": 16.8562818853819, "step_time": 1.2239409408569335} +{"epoch": 0, "iter": 21630, "iter_tflops": 32.879378785478025, "iter_time": 0.6274782028198242, "loss": 0.31295374035835266, "lr": 2.4043943125335373e-05, "seqlen": 8192.0, "step_tflops": 36.959253475660326, "step_time": 0.558211856842041} +{"epoch": 0, "iter": 21631, "iter_tflops": 45.156513595735596, "iter_time": 0.45687968063354495, "loss": 0.2916046679019928, "lr": 2.4006303379888262e-05, "seqlen": 8192.0, "step_tflops": 48.84868343270599, "step_time": 0.42234697151184086} +{"epoch": 0, "iter": 21632, "iter_tflops": 45.846140321215515, "iter_time": 0.45000720596313476, "loss": 0.3193589448928833, "lr": 2.3968574745862785e-05, "seqlen": 8192.0, "step_tflops": 49.36947954060225, "step_time": 0.4178916549682617} +{"epoch": 0, "iter": 21633, "iter_tflops": 26.092135810875494, "iter_time": 0.7907015991210937, "loss": 0.35592153668403625, "lr": 2.3930757595625326e-05, "seqlen": 8192.0, "step_tflops": 27.489276696103158, "step_time": 0.7505142364501953} +{"epoch": 0, "iter": 21634, "iter_tflops": 17.734131549954476, "iter_time": 1.163355163574219, "loss": 0.49162667989730835, "lr": 2.389285230241589e-05, "seqlen": 8192.0, "step_tflops": 20.540579451531304, "step_time": 1.0044065971374512} +{"epoch": 0, "iter": 21635, "iter_tflops": 36.624210473075166, "iter_time": 0.5633184509277344, "loss": 0.3862381875514984, "lr": 2.385485924034442e-05, "seqlen": 8192.0, "step_tflops": 39.92810142512249, "step_time": 0.5167060985565185} +{"epoch": 0, "iter": 21636, "iter_tflops": 38.08252726848338, "iter_time": 0.5417469635009765, "loss": 0.3461343050003052, "lr": 2.3816778784387097e-05, "seqlen": 8192.0, "step_tflops": 40.84710887922414, "step_time": 0.5050808753967285} +{"epoch": 0, "iter": 21637, "iter_tflops": 24.997180250232375, "iter_time": 0.8253368301391603, "loss": 0.5716069936752319, "lr": 2.3778611310382653e-05, "seqlen": 8192.0, "step_tflops": 26.766097098886632, "step_time": 0.7707919998168946} +{"epoch": 0, "iter": 21638, "iter_tflops": 38.03946279053777, "iter_time": 0.5423602752685547, "loss": 0.5197840929031372, "lr": 2.374035719502864e-05, "seqlen": 8192.0, "step_tflops": 40.961449503087486, "step_time": 0.5036709823608398} +{"epoch": 0, "iter": 21639, "iter_tflops": 46.87801572291718, "iter_time": 0.44010168075561523, "loss": 0.6364180445671082, "lr": 2.3702016815877742e-05, "seqlen": 8192.0, "step_tflops": 50.770793767574794, "step_time": 0.4063575134277344} +{"epoch": 0, "iter": 21640, "iter_tflops": 50.50943147689308, "iter_time": 0.40846022033691404, "loss": 0.5529360175132751, "lr": 2.3663590551334014e-05, "seqlen": 8192.0, "step_tflops": 54.46423634096684, "step_time": 0.37880074882507325} +{"epoch": 0, "iter": 21641, "iter_tflops": 41.29187784373726, "iter_time": 0.4996404762268066, "loss": 0.24019847810268402, "lr": 2.362507878064918e-05, "seqlen": 8192.0, "step_tflops": 44.64231148684752, "step_time": 0.46214214324951175} +{"epoch": 0, "iter": 21642, "iter_tflops": 16.702366426843625, "iter_time": 1.2352197875976563, "loss": 0.24760839343070984, "lr": 2.3586481883918867e-05, "seqlen": 8192.0, "step_tflops": 18.775704568819506, "step_time": 1.0988186054229738} +{"epoch": 0, "iter": 21643, "iter_tflops": 35.726847924352256, "iter_time": 0.5774674987792968, "loss": 0.20619691908359528, "lr": 2.3547800242078857e-05, "seqlen": 8192.0, "step_tflops": 38.93232126604676, "step_time": 0.5299219989776611} +{"epoch": 0, "iter": 21644, "iter_tflops": 37.874212814248345, "iter_time": 0.5447266616821289, "loss": 0.2913074493408203, "lr": 2.350903423690135e-05, "seqlen": 8192.0, "step_tflops": 41.26449577562685, "step_time": 0.4999720249176025} +{"epoch": 0, "iter": 21645, "iter_tflops": 16.303797753813353, "iter_time": 1.2654164276123046, "loss": 0.4472545087337494, "lr": 2.3470184250991157e-05, "seqlen": 8192.0, "step_tflops": 17.162633795865798, "step_time": 1.2020936737060548} +{"epoch": 0, "iter": 21646, "iter_tflops": 16.106679047601695, "iter_time": 1.2809029998779298, "loss": 0.4001307487487793, "lr": 2.3431250667781958e-05, "seqlen": 8192.0, "step_tflops": 20.556897917182734, "step_time": 1.0036092796325684} +{"epoch": 0, "iter": 21647, "iter_tflops": 35.04841294522184, "iter_time": 0.5886455841064453, "loss": 0.6325585246086121, "lr": 2.3392233871532504e-05, "seqlen": 8192.0, "step_tflops": 38.13433320315108, "step_time": 0.5410109939575196} +{"epoch": 0, "iter": 21648, "iter_tflops": 37.6627481354391, "iter_time": 0.5477851333618163, "loss": 0.4734672009944916, "lr": 2.335313424732282e-05, "seqlen": 8192.0, "step_tflops": 40.917665038243626, "step_time": 0.5042099418640138} +{"epoch": 0, "iter": 21649, "iter_tflops": 20.114408414308063, "iter_time": 0.8042136306762695, "loss": 0.021990034729242325, "lr": 2.3313952181050412e-05, "seqlen": 6464.0, "step_tflops": 22.042577429602705, "step_time": 0.7338652420043945} +{"epoch": 0, "iter": 21650, "iter_tflops": 39.6034472677544, "iter_time": 0.4084563980102539, "loss": 0.04781259596347809, "lr": 2.3274688059426456e-05, "seqlen": 6464.0, "step_tflops": 43.75594058817544, "step_time": 0.3696933765411377} +{"epoch": 0, "iter": 21651, "iter_tflops": 42.158922819508696, "iter_time": 0.38369769287109373, "loss": 0.021541327238082886, "lr": 2.3235342269971978e-05, "seqlen": 6464.0, "step_tflops": 47.41478280099215, "step_time": 0.34116535949707033} +{"epoch": 0, "iter": 21652, "iter_tflops": 44.26345863741723, "iter_time": 0.36545452880859375, "loss": 0.04126061499118805, "lr": 2.3195915201014038e-05, "seqlen": 6464.0, "step_tflops": 48.278216751067006, "step_time": 0.33506377220153805} +{"epoch": 0, "iter": 21653, "iter_tflops": 28.787564473856804, "iter_time": 0.716666862487793, "loss": 0.09921062737703323, "lr": 2.3156407241681888e-05, "seqlen": 8192.0, "step_tflops": 30.41671056034895, "step_time": 0.6782815475463867} +{"epoch": 0, "iter": 21654, "iter_tflops": 11.844450288577633, "iter_time": 1.7418363037109372, "loss": 0.05761662870645523, "lr": 2.311681878190314e-05, "seqlen": 8192.0, "step_tflops": 14.821641791499983, "step_time": 1.3919573688507079} +{"epoch": 0, "iter": 21655, "iter_tflops": 40.902137306385754, "iter_time": 0.5044013557434082, "loss": 0.07152508944272995, "lr": 2.30771502123999e-05, "seqlen": 8192.0, "step_tflops": 45.02648059918762, "step_time": 0.4581991138458252} +{"epoch": 0, "iter": 21656, "iter_tflops": 44.967449710028454, "iter_time": 0.45880061340332023, "loss": 0.06297837197780609, "lr": 2.303740192468495e-05, "seqlen": 8192.0, "step_tflops": 49.02908261723977, "step_time": 0.42079297447204594} +{"epoch": 0, "iter": 21657, "iter_tflops": 15.931282891906742, "iter_time": 1.295005157470703, "loss": 0.12668229639530182, "lr": 2.299757431105783e-05, "seqlen": 8192.0, "step_tflops": 17.23189458279122, "step_time": 1.1972620544433592} +{"epoch": 0, "iter": 21658, "iter_tflops": 22.11932902304056, "iter_time": 0.932717872619629, "loss": 0.12429339438676834, "lr": 2.295766776460101e-05, "seqlen": 8192.0, "step_tflops": 26.874745308955386, "step_time": 0.7676758708953857} +{"epoch": 0, "iter": 21659, "iter_tflops": 36.66659527985961, "iter_time": 0.5626672821044922, "loss": 0.1593669354915619, "lr": 2.2917682679175998e-05, "seqlen": 8192.0, "step_tflops": 39.962511461010116, "step_time": 0.5162611846923828} +{"epoch": 0, "iter": 21660, "iter_tflops": 35.76535448312309, "iter_time": 0.5768457717895508, "loss": 0.0929994285106659, "lr": 2.2877619449419437e-05, "seqlen": 8192.0, "step_tflops": 38.8451788910389, "step_time": 0.5311107864379883} +{"epoch": 0, "iter": 21661, "iter_tflops": 15.373470513654901, "iter_time": 1.3419932403564454, "loss": 0.3334251046180725, "lr": 2.2837478470739234e-05, "seqlen": 8192.0, "step_tflops": 16.47740790243253, "step_time": 1.252083679199219} +{"epoch": 0, "iter": 21662, "iter_tflops": 17.056302873007215, "iter_time": 1.209587661743164, "loss": 0.30026310682296753, "lr": 2.279726013931064e-05, "seqlen": 8192.0, "step_tflops": 21.639273714491775, "step_time": 0.9534097023010254} +{"epoch": 0, "iter": 21663, "iter_tflops": 41.46552506939713, "iter_time": 0.4975481071472168, "loss": 0.6082141995429993, "lr": 2.2756964852072357e-05, "seqlen": 8192.0, "step_tflops": 44.3899291589894, "step_time": 0.46476968765258786} +{"epoch": 0, "iter": 21664, "iter_tflops": 45.217272943795706, "iter_time": 0.4562657623291016, "loss": 0.6029061079025269, "lr": 2.27165930067226e-05, "seqlen": 8192.0, "step_tflops": 48.51366266690318, "step_time": 0.42526357269287113} +{"epoch": 0, "iter": 21665, "iter_tflops": 40.685774892808645, "iter_time": 0.5070837059020996, "loss": 0.17442025244235992, "lr": 2.2676145001715175e-05, "seqlen": 8192.0, "step_tflops": 44.17023914129201, "step_time": 0.46708131790161134} +{"epoch": 0, "iter": 21666, "iter_tflops": 44.371148463887565, "iter_time": 0.46496640777587894, "loss": 0.1855034977197647, "lr": 2.263562123625557e-05, "seqlen": 8192.0, "step_tflops": 48.61842314286671, "step_time": 0.4243472366333008} +{"epoch": 0, "iter": 21667, "iter_tflops": 49.50107598226632, "iter_time": 0.41678070831298825, "loss": 0.21196101605892181, "lr": 2.2595022110296988e-05, "seqlen": 8192.0, "step_tflops": 53.91129109739066, "step_time": 0.3826859474182129} +{"epoch": 0, "iter": 21668, "iter_tflops": 46.708582255530196, "iter_time": 0.44169813156127924, "loss": 0.21456481516361237, "lr": 2.2554348024536413e-05, "seqlen": 8192.0, "step_tflops": 50.46344875861781, "step_time": 0.40883241271972653} +{"epoch": 0, "iter": 21669, "iter_tflops": 29.586270663977775, "iter_time": 0.6973198394775392, "loss": 0.5612286329269409, "lr": 2.251359938041065e-05, "seqlen": 8192.0, "step_tflops": 31.379541997570858, "step_time": 0.657469554901123} +{"epoch": 0, "iter": 21670, "iter_tflops": 16.492284543335288, "iter_time": 1.2509542541503906, "loss": 0.53874671459198, "lr": 2.247277658009236e-05, "seqlen": 8192.0, "step_tflops": 21.011704175197256, "step_time": 0.981885778427124} +{"epoch": 0, "iter": 21671, "iter_tflops": 35.24180471920278, "iter_time": 0.5854153518676757, "loss": 0.4385850429534912, "lr": 2.2431880026486114e-05, "seqlen": 8192.0, "step_tflops": 38.227612999990136, "step_time": 0.5396908645629883} +{"epoch": 0, "iter": 21672, "iter_tflops": 32.784955757111334, "iter_time": 0.629285385131836, "loss": 0.4266519844532013, "lr": 2.2390910123224375e-05, "seqlen": 8192.0, "step_tflops": 35.73386818969283, "step_time": 0.5773540496826173} +{"epoch": 0, "iter": 21673, "iter_tflops": 30.46657633092561, "iter_time": 0.6771713790893554, "loss": 0.5389395356178284, "lr": 2.234986727466355e-05, "seqlen": 8192.0, "step_tflops": 33.623140441852705, "step_time": 0.6135980529785157} +{"epoch": 0, "iter": 21674, "iter_tflops": 37.340079347452956, "iter_time": 0.5525187377929688, "loss": 0.5284919738769531, "lr": 2.2308751885879984e-05, "seqlen": 8192.0, "step_tflops": 40.6625450873321, "step_time": 0.5073733940124512} +{"epoch": 0, "iter": 21675, "iter_tflops": 38.870900089287076, "iter_time": 0.5307593460083008, "loss": 0.5886666774749756, "lr": 2.226756436266597e-05, "seqlen": 8192.0, "step_tflops": 42.29215136399523, "step_time": 0.4878232212066651} +{"epoch": 0, "iter": 21676, "iter_tflops": 35.99860881744288, "iter_time": 0.5731080780029297, "loss": 0.45492613315582275, "lr": 2.222630511152573e-05, "seqlen": 8192.0, "step_tflops": 38.92883866802653, "step_time": 0.5299694061279298} +{"epoch": 0, "iter": 21677, "iter_tflops": 28.896069902005173, "iter_time": 0.7139757614135742, "loss": 0.08797647804021835, "lr": 2.2184974539671417e-05, "seqlen": 8192.0, "step_tflops": 31.272817008772677, "step_time": 0.659713306427002} +{"epoch": 0, "iter": 21678, "iter_tflops": 10.353127949187057, "iter_time": 1.9927401275634764, "loss": 0.13239337503910065, "lr": 2.2143573055019088e-05, "seqlen": 8192.0, "step_tflops": 11.574416675228603, "step_time": 1.7824737167358398} +{"epoch": 0, "iter": 21679, "iter_tflops": 15.84180162034074, "iter_time": 1.3023199005126953, "loss": 0.10034988820552826, "lr": 2.2102101066184684e-05, "seqlen": 8192.0, "step_tflops": 20.972304920501507, "step_time": 0.9837303810119628} +{"epoch": 0, "iter": 21680, "iter_tflops": 42.155087753263345, "iter_time": 0.4894093360900879, "loss": 0.12254369258880615, "lr": 2.2060558982479992e-05, "seqlen": 8192.0, "step_tflops": 46.0658103484798, "step_time": 0.44786129570007327} +{"epoch": 0, "iter": 21681, "iter_tflops": 15.847356989560755, "iter_time": 0.9716609191894531, "loss": 0.18980319797992706, "lr": 2.20189472139086e-05, "seqlen": 6160.0, "step_tflops": 16.789090225380967, "step_time": 0.9171585388183593} +{"epoch": 0, "iter": 21682, "iter_tflops": 8.4137573521865, "iter_time": 1.8301285400390623, "loss": 0.16484110057353973, "lr": 2.1977266171161876e-05, "seqlen": 6160.0, "step_tflops": 9.044814007376331, "step_time": 1.7024404754638671} +{"epoch": 0, "iter": 21683, "iter_tflops": 10.295031345557, "iter_time": 1.495697967529297, "loss": 0.18821606040000916, "lr": 2.193551626561488e-05, "seqlen": 6160.0, "step_tflops": 12.614744485611588, "step_time": 1.2206555175781246} +{"epoch": 0, "iter": 21684, "iter_tflops": 21.409799861057856, "iter_time": 0.7192153854370118, "loss": 0.2041081041097641, "lr": 2.1893697909322322e-05, "seqlen": 6160.0, "step_tflops": 25.7875881520842, "step_time": 0.5971189460754395} +{"epoch": 0, "iter": 21685, "iter_tflops": 16.350865474798052, "iter_time": 0.874300796508789, "loss": 0.32883402705192566, "lr": 2.1851811515014494e-05, "seqlen": 5728.0, "step_tflops": 17.183866563146804, "step_time": 0.8319183959960937} +{"epoch": 0, "iter": 21686, "iter_tflops": 6.805667658021285, "iter_time": 2.1005396423339846, "loss": 0.2929348051548004, "lr": 2.1809857496093203e-05, "seqlen": 5728.0, "step_tflops": 9.124437913343028, "step_time": 1.5667348327636719} +{"epoch": 0, "iter": 21687, "iter_tflops": 7.981324034624763, "iter_time": 1.7911282196044924, "loss": 0.3323727250099182, "lr": 2.1767836266627676e-05, "seqlen": 5728.0, "step_tflops": 10.281016743635277, "step_time": 1.390482582092285} +{"epoch": 0, "iter": 21688, "iter_tflops": 13.440560938497583, "iter_time": 1.0636144409179689, "loss": 0.18302568793296814, "lr": 2.1725748241350487e-05, "seqlen": 5728.0, "step_tflops": 16.1978727591931, "step_time": 0.8825587730407715} +{"epoch": 0, "iter": 21689, "iter_tflops": 19.11147330613075, "iter_time": 0.7672225570678711, "loss": 0.2862628102302551, "lr": 2.1683593835653447e-05, "seqlen": 5872.0, "step_tflops": 20.73335413386146, "step_time": 0.7072060470581055} +{"epoch": 0, "iter": 21690, "iter_tflops": 21.97142867723986, "iter_time": 0.6673554840087891, "loss": 0.1805228590965271, "lr": 2.1641373465583527e-05, "seqlen": 5872.0, "step_tflops": 23.71325851055676, "step_time": 0.6183356628417969} +{"epoch": 0, "iter": 21691, "iter_tflops": 22.842773623478635, "iter_time": 0.6418989944458007, "loss": 0.20603156089782715, "lr": 2.1599087547838727e-05, "seqlen": 5872.0, "step_tflops": 24.6288430944324, "step_time": 0.5953488502502441} +{"epoch": 0, "iter": 21692, "iter_tflops": 21.764865338786766, "iter_time": 0.6736891403198242, "loss": 0.1929546594619751, "lr": 2.1556736499763993e-05, "seqlen": 5872.0, "step_tflops": 23.48385564295658, "step_time": 0.6243758964538575} +{"epoch": 0, "iter": 21693, "iter_tflops": 18.608935743658318, "iter_time": 1.1086659545898436, "loss": 0.15783150494098663, "lr": 2.151432073934707e-05, "seqlen": 8192.0, "step_tflops": 19.638613693145043, "step_time": 1.0505371627807616} +{"epoch": 0, "iter": 21694, "iter_tflops": 8.899544677310375, "iter_time": 2.318218994140625, "loss": 0.10387519747018814, "lr": 2.1471840685214387e-05, "seqlen": 8192.0, "step_tflops": 10.722397234754506, "step_time": 1.9241120300292966} +{"epoch": 0, "iter": 21695, "iter_tflops": 13.717953773155896, "iter_time": 1.5039483184814453, "loss": 0.22724370658397675, "lr": 2.1429296756626926e-05, "seqlen": 8192.0, "step_tflops": 19.853155134447302, "step_time": 1.039184621810913} +{"epoch": 0, "iter": 21696, "iter_tflops": 40.29155950320526, "iter_time": 0.5120450477600097, "loss": 0.18327610194683075, "lr": 2.138668937347609e-05, "seqlen": 8192.0, "step_tflops": 44.25835802120129, "step_time": 0.4661513538360596} +{"epoch": 0, "iter": 21697, "iter_tflops": 15.423453686584782, "iter_time": 0.9718611450195311, "loss": 0.21607473492622375, "lr": 2.1344018956279547e-05, "seqlen": 6000.0, "step_tflops": 16.60155274113223, "step_time": 0.9028947830200196} +{"epoch": 0, "iter": 21698, "iter_tflops": 23.12267926110726, "iter_time": 0.6482577209472656, "loss": 0.16505461931228638, "lr": 2.1301285926177098e-05, "seqlen": 6000.0, "step_tflops": 24.937485253868836, "step_time": 0.6010812721252441} +{"epoch": 0, "iter": 21699, "iter_tflops": 22.16246777990785, "iter_time": 0.6763441467285156, "loss": 0.2277252972126007, "lr": 2.1258490704926483e-05, "seqlen": 6000.0, "step_tflops": 23.78772196444296, "step_time": 0.6301341247558594} +{"epoch": 0, "iter": 21700, "iter_tflops": 22.93532575512157, "iter_time": 0.6535531921386718, "loss": 0.2588537931442261, "lr": 2.1215633714899264e-05, "seqlen": 6000.0, "step_tflops": 24.619650372218565, "step_time": 0.6088411140441895} +{"epoch": 0, "iter": 21701, "iter_tflops": 20.07452265238779, "iter_time": 1.0277252349853516, "loss": 0.13420936465263367, "lr": 2.1172715379076635e-05, "seqlen": 8192.0, "step_tflops": 21.56048355829027, "step_time": 0.9568938217163085} +{"epoch": 0, "iter": 21702, "iter_tflops": 19.330332693758436, "iter_time": 1.0672911758422852, "loss": 0.1318250298500061, "lr": 2.1129736121045237e-05, "seqlen": 8192.0, "step_tflops": 25.46116483554815, "step_time": 0.8102965297698975} +{"epoch": 0, "iter": 21703, "iter_tflops": 43.32878584398958, "iter_time": 0.47615212631225584, "loss": 0.1467692255973816, "lr": 2.1086696364993e-05, "seqlen": 8192.0, "step_tflops": 47.702927492812435, "step_time": 0.43249114036560055} +{"epoch": 0, "iter": 21704, "iter_tflops": 50.57825543878571, "iter_time": 0.4079044113159179, "loss": 0.06947503983974457, "lr": 2.1043596535704943e-05, "seqlen": 8192.0, "step_tflops": 54.461670124722964, "step_time": 0.37881859779357907} +{"epoch": 0, "iter": 21705, "iter_tflops": 46.344699850442495, "iter_time": 0.4451661911010742, "loss": 0.2560524344444275, "lr": 2.100043705855897e-05, "seqlen": 8192.0, "step_tflops": 50.66162017166622, "step_time": 0.4072331962585449} +{"epoch": 0, "iter": 21706, "iter_tflops": 36.70037956825183, "iter_time": 0.5621493225097656, "loss": 0.2778719663619995, "lr": 2.0957218359521707e-05, "seqlen": 8192.0, "step_tflops": 40.48146794376949, "step_time": 0.5096429195404053} +{"epoch": 0, "iter": 21707, "iter_tflops": 41.53526733313438, "iter_time": 0.4967126693725586, "loss": 0.25440260767936707, "lr": 2.0913940865144266e-05, "seqlen": 8192.0, "step_tflops": 45.304539440482486, "step_time": 0.45538689422607426} +{"epoch": 0, "iter": 21708, "iter_tflops": 37.222195269303505, "iter_time": 0.5542685852050782, "loss": 0.2498961240053177, "lr": 2.087060500255804e-05, "seqlen": 8192.0, "step_tflops": 40.63975924281745, "step_time": 0.5076578674316405} +{"epoch": 0, "iter": 21709, "iter_tflops": 15.02359609850769, "iter_time": 1.3732460174560548, "loss": 0.6127929091453552, "lr": 2.0827211199470495e-05, "seqlen": 8192.0, "step_tflops": 15.965783275519781, "step_time": 1.292206787109375} +{"epoch": 0, "iter": 21710, "iter_tflops": 23.14174212297428, "iter_time": 0.8915099563598633, "loss": 0.6795806884765625, "lr": 2.078375988416095e-05, "seqlen": 8192.0, "step_tflops": 28.493609620743104, "step_time": 0.7240603694915771} +{"epoch": 0, "iter": 21711, "iter_tflops": 42.63776467672747, "iter_time": 0.4838690223693848, "loss": 0.48233845829963684, "lr": 2.074025148547635e-05, "seqlen": 8192.0, "step_tflops": 45.84075056911021, "step_time": 0.45006011581420896} +{"epoch": 0, "iter": 21712, "iter_tflops": 41.928786005128934, "iter_time": 0.4920508193969726, "loss": 0.49547868967056274, "lr": 2.069668643282702e-05, "seqlen": 8192.0, "step_tflops": 44.88688966021781, "step_time": 0.45962403869628904} +{"epoch": 0, "iter": 21713, "iter_tflops": 46.394076945424324, "iter_time": 0.44469240188598635, "loss": 0.16146066784858704, "lr": 2.065306515618244e-05, "seqlen": 8192.0, "step_tflops": 50.85553255223604, "step_time": 0.4056804141998291} +{"epoch": 0, "iter": 21714, "iter_tflops": 46.71671612785023, "iter_time": 0.4416212272644043, "loss": 0.1604093760251999, "lr": 2.0609388086067004e-05, "seqlen": 8192.0, "step_tflops": 50.61651473180249, "step_time": 0.4075960903167724} +{"epoch": 0, "iter": 21715, "iter_tflops": 46.77243659970184, "iter_time": 0.44109511947631835, "loss": 0.2504666745662689, "lr": 2.0565655653555764e-05, "seqlen": 8192.0, "step_tflops": 50.621444748905354, "step_time": 0.40755639457702636} +{"epoch": 0, "iter": 21716, "iter_tflops": 44.579120948052505, "iter_time": 0.46279722595214845, "loss": 0.13714087009429932, "lr": 2.052186829027017e-05, "seqlen": 8192.0, "step_tflops": 48.12371440564068, "step_time": 0.42870949935913083} +{"epoch": 0, "iter": 21717, "iter_tflops": 28.62001899797981, "iter_time": 0.7208623275756836, "loss": 0.08312304317951202, "lr": 2.047802642837382e-05, "seqlen": 8192.0, "step_tflops": 30.24939129957407, "step_time": 0.6820333442687988} +{"epoch": 0, "iter": 21718, "iter_tflops": 14.825925441537441, "iter_time": 1.391555191040039, "loss": 0.027802472934126854, "lr": 2.0434130500568188e-05, "seqlen": 8192.0, "step_tflops": 17.024714553947682, "step_time": 1.2118319778442384} +{"epoch": 0, "iter": 21719, "iter_tflops": 29.549200280032128, "iter_time": 0.6981946487426758, "loss": 0.05388854444026947, "lr": 2.039018094008836e-05, "seqlen": 8192.0, "step_tflops": 34.0591252128122, "step_time": 0.6057434940338134} +{"epoch": 0, "iter": 21720, "iter_tflops": 44.161118928771536, "iter_time": 0.4671777801513672, "loss": 0.03733319044113159, "lr": 2.034617818069876e-05, "seqlen": 8192.0, "step_tflops": 48.61600654802632, "step_time": 0.424368330001831} +{"epoch": 0, "iter": 21721, "iter_tflops": 18.077169988850944, "iter_time": 1.1412789459228514, "loss": 0.08914579451084137, "lr": 2.030212265668886e-05, "seqlen": 8192.0, "step_tflops": 19.63891819791077, "step_time": 1.0505208740234373} +{"epoch": 0, "iter": 21722, "iter_tflops": 16.97257278058344, "iter_time": 1.2155548706054689, "loss": 0.04975834861397743, "lr": 2.0258014802868886e-05, "seqlen": 8192.0, "step_tflops": 22.506385279909995, "step_time": 0.9166773452758787} +{"epoch": 0, "iter": 21723, "iter_tflops": 39.783854651884496, "iter_time": 0.5185795516967773, "loss": 0.06524856388568878, "lr": 2.0213855054565554e-05, "seqlen": 8192.0, "step_tflops": 43.83653602624298, "step_time": 0.47063694763183594} +{"epoch": 0, "iter": 21724, "iter_tflops": 42.544783778103515, "iter_time": 0.4849265098571777, "loss": 0.08587204664945602, "lr": 2.0169643847617754e-05, "seqlen": 8192.0, "step_tflops": 46.705228261630225, "step_time": 0.441729850769043} +{"epoch": 0, "iter": 21725, "iter_tflops": 18.84184903347945, "iter_time": 0.9939016494750977, "loss": 0.01012761052697897, "lr": 2.0125381618372253e-05, "seqlen": 7456.0, "step_tflops": 19.990437905182716, "step_time": 0.9367951278686524} +{"epoch": 0, "iter": 21726, "iter_tflops": 12.682487610238054, "iter_time": 1.4765987091064454, "loss": 0.013456694781780243, "lr": 2.0081068803679374e-05, "seqlen": 7456.0, "step_tflops": 15.498722232557935, "step_time": 1.2082895965576173} +{"epoch": 0, "iter": 21727, "iter_tflops": 41.951331888835405, "iter_time": 0.44639690780639646, "loss": 0.0020259995944797993, "lr": 2.003670584088871e-05, "seqlen": 7456.0, "step_tflops": 46.30520730612607, "step_time": 0.40442416572570805} +{"epoch": 0, "iter": 21728, "iter_tflops": 45.96599202032234, "iter_time": 0.4074086952209473, "loss": 0.0102147925645113, "lr": 1.9992293167844802e-05, "seqlen": 7456.0, "step_tflops": 50.698940223890716, "step_time": 0.3693754692077637} +{"epoch": 0, "iter": 21729, "iter_tflops": 21.316004485128612, "iter_time": 0.9678686981201172, "loss": 0.39164766669273376, "lr": 1.9947831222882792e-05, "seqlen": 8192.0, "step_tflops": 23.205771384292007, "step_time": 0.8890501060485841} +{"epoch": 0, "iter": 21730, "iter_tflops": 14.6483232205612, "iter_time": 1.4084269714355468, "loss": 0.2977648377418518, "lr": 1.9903320444824137e-05, "seqlen": 8192.0, "step_tflops": 17.55059242956134, "step_time": 1.1755212020874022} +{"epoch": 0, "iter": 21731, "iter_tflops": 37.65108824420438, "iter_time": 0.5479547729492187, "loss": 0.3344390094280243, "lr": 1.985876127297224e-05, "seqlen": 8192.0, "step_tflops": 41.30744030567386, "step_time": 0.49945223808288575} +{"epoch": 0, "iter": 21732, "iter_tflops": 39.27640446898579, "iter_time": 0.5252795867919922, "loss": 0.29791495203971863, "lr": 1.981415414710814e-05, "seqlen": 8192.0, "step_tflops": 43.05426684299882, "step_time": 0.4791881275177001} +{"epoch": 0, "iter": 21733, "iter_tflops": 22.068495190138677, "iter_time": 0.9348663482666016, "loss": 0.5394077301025391, "lr": 1.976949950748616e-05, "seqlen": 8192.0, "step_tflops": 23.692932317668248, "step_time": 0.8707699508666992} +{"epoch": 0, "iter": 21734, "iter_tflops": 15.113108600223399, "iter_time": 1.3651125030517577, "loss": 0.2912416458129883, "lr": 1.972479779482957e-05, "seqlen": 8192.0, "step_tflops": 18.84303383459165, "step_time": 1.0948923454284667} +{"epoch": 0, "iter": 21735, "iter_tflops": 37.92665102753868, "iter_time": 0.5439735107421875, "loss": 0.6533217430114746, "lr": 1.9680049450326224e-05, "seqlen": 8192.0, "step_tflops": 41.45935857821075, "step_time": 0.4976221103668213} +{"epoch": 0, "iter": 21736, "iter_tflops": 37.192238377056704, "iter_time": 0.5547150268554688, "loss": 0.5616589784622192, "lr": 1.963525491562421e-05, "seqlen": 8192.0, "step_tflops": 40.26671122987334, "step_time": 0.5123610267639159} +{"epoch": 0, "iter": 21737, "iter_tflops": 13.293845854976468, "iter_time": 0.9315811004638673, "loss": 0.01793939247727394, "lr": 1.9590414632827513e-05, "seqlen": 4976.0, "step_tflops": 14.247440217095829, "step_time": 0.8692295150756836} +{"epoch": 0, "iter": 21738, "iter_tflops": 12.317822987599529, "iter_time": 1.0053964538574218, "loss": 0.021947816014289856, "lr": 1.9545529044491612e-05, "seqlen": 4976.0, "step_tflops": 15.120202541753791, "step_time": 0.81905619430542} +{"epoch": 0, "iter": 21739, "iter_tflops": 32.67008060550768, "iter_time": 0.37907147216796877, "loss": 0.02159368433058262, "lr": 1.9500598593619143e-05, "seqlen": 4976.0, "step_tflops": 35.80444548604169, "step_time": 0.345887092590332} +{"epoch": 0, "iter": 21740, "iter_tflops": 32.970683534038045, "iter_time": 0.37561537170410153, "loss": 0.02124813385307789, "lr": 1.9455623723655524e-05, "seqlen": 4976.0, "step_tflops": 36.22113551105417, "step_time": 0.3419079875946045} +{"epoch": 0, "iter": 21741, "iter_tflops": 19.76691482633043, "iter_time": 0.614210807800293, "loss": 0.016643160954117775, "lr": 1.941060487848456e-05, "seqlen": 4880.0, "step_tflops": 21.102796384240676, "step_time": 0.5753290939331055} +{"epoch": 0, "iter": 21742, "iter_tflops": 7.657568394892165, "iter_time": 1.5854971313476562, "loss": 0.047553859651088715, "lr": 1.9365542502424075e-05, "seqlen": 4880.0, "step_tflops": 9.185674343300295, "step_time": 1.3217377700805664} +{"epoch": 0, "iter": 21743, "iter_tflops": 31.997132246242636, "iter_time": 0.37944190216064455, "loss": 0.02101743035018444, "lr": 1.932043704022154e-05, "seqlen": 4880.0, "step_tflops": 35.111865656315224, "step_time": 0.3457820453643799} +{"epoch": 0, "iter": 21744, "iter_tflops": 33.76337628265594, "iter_time": 0.35959237670898436, "loss": 0.03756215423345566, "lr": 1.927528893704964e-05, "seqlen": 4880.0, "step_tflops": 36.80720922468793, "step_time": 0.32985529136657715} +{"epoch": 0, "iter": 21745, "iter_tflops": 38.193182727152355, "iter_time": 0.5401773834228516, "loss": 0.13972258567810059, "lr": 1.9230098638501942e-05, "seqlen": 8192.0, "step_tflops": 41.19982588856396, "step_time": 0.5007568130493164} +{"epoch": 0, "iter": 21746, "iter_tflops": 42.61485828549141, "iter_time": 0.48412911224365235, "loss": 0.2281830608844757, "lr": 1.918486659058844e-05, "seqlen": 8192.0, "step_tflops": 45.877156397225974, "step_time": 0.44970297050476077} +{"epoch": 0, "iter": 21747, "iter_tflops": 46.97216320176264, "iter_time": 0.43921957397460937, "loss": 0.34309858083724976, "lr": 1.913959323973119e-05, "seqlen": 8192.0, "step_tflops": 50.8172220539363, "step_time": 0.4059862518310547} +{"epoch": 0, "iter": 21748, "iter_tflops": 48.974175092315974, "iter_time": 0.42126474761962895, "loss": 0.3189783990383148, "lr": 1.909427903275988e-05, "seqlen": 8192.0, "step_tflops": 53.16593616657876, "step_time": 0.38805097770690916} +{"epoch": 0, "iter": 21749, "iter_tflops": 39.85173737279686, "iter_time": 0.5176962127685547, "loss": 0.39816492795944214, "lr": 1.904892441690745e-05, "seqlen": 8192.0, "step_tflops": 42.89495988278237, "step_time": 0.48096777725219725} +{"epoch": 0, "iter": 21750, "iter_tflops": 43.05654707013882, "iter_time": 0.47916275024414057, "loss": 0.35393139719963074, "lr": 1.9003529839805637e-05, "seqlen": 8192.0, "step_tflops": 46.426223676060935, "step_time": 0.44438448524475094} +{"epoch": 0, "iter": 21751, "iter_tflops": 47.251103209838135, "iter_time": 0.4366267051696777, "loss": 0.6996651887893677, "lr": 1.8958095749480597e-05, "seqlen": 8192.0, "step_tflops": 51.305662953074574, "step_time": 0.40212117576599127} +{"epoch": 0, "iter": 21752, "iter_tflops": 46.67666200741462, "iter_time": 0.4420001907348633, "loss": 0.4781203866004944, "lr": 1.8912622594348456e-05, "seqlen": 8192.0, "step_tflops": 50.8224443657811, "step_time": 0.40594453430175786} +{"epoch": 0, "iter": 21753, "iter_tflops": 24.976984362790795, "iter_time": 0.8260041809082032, "loss": 0.543768048286438, "lr": 1.88671108232109e-05, "seqlen": 8192.0, "step_tflops": 26.37013366534343, "step_time": 0.7823659057617188} +{"epoch": 0, "iter": 21754, "iter_tflops": 15.175133199499928, "iter_time": 1.359532943725586, "loss": 0.58791583776474, "lr": 1.8821560885250735e-05, "seqlen": 8192.0, "step_tflops": 16.6823583925804, "step_time": 1.2367012519836424} +{"epoch": 0, "iter": 21755, "iter_tflops": 39.44234896095485, "iter_time": 0.5230695953369141, "loss": 0.43006008863449097, "lr": 1.8775973230027457e-05, "seqlen": 8192.0, "step_tflops": 43.993767252066604, "step_time": 0.4689549179077149} +{"epoch": 0, "iter": 21756, "iter_tflops": 46.01211074478058, "iter_time": 0.44838398361206055, "loss": 0.48116785287857056, "lr": 1.8730348307472828e-05, "seqlen": 8192.0, "step_tflops": 49.75804983004953, "step_time": 0.41462825775146483} +{"epoch": 0, "iter": 21757, "iter_tflops": 25.97703363735505, "iter_time": 0.7097830581665039, "loss": 0.1374729424715042, "lr": 1.8684686567886398e-05, "seqlen": 7344.0, "step_tflops": 27.570781209988887, "step_time": 0.6687535705566406} +{"epoch": 0, "iter": 21758, "iter_tflops": 15.16938142703673, "iter_time": 1.2154785919189453, "loss": 0.13471266627311707, "lr": 1.863898846193111e-05, "seqlen": 7344.0, "step_tflops": 19.36886316408402, "step_time": 0.9519432411193849} +{"epoch": 0, "iter": 21759, "iter_tflops": 41.95210037179156, "iter_time": 0.439502628326416, "loss": 0.09340820461511612, "lr": 1.859325444062883e-05, "seqlen": 7344.0, "step_tflops": 45.77425587029125, "step_time": 0.4028041095733642} +{"epoch": 0, "iter": 21760, "iter_tflops": 41.00334534400208, "iter_time": 0.4496720504760742, "loss": 0.12935267388820648, "lr": 1.8547484955355872e-05, "seqlen": 7344.0, "step_tflops": 44.29709252117192, "step_time": 0.41623631095886227} +{"epoch": 0, "iter": 21761, "iter_tflops": 38.232055043808394, "iter_time": 0.418819896697998, "loss": 0.13263896107673645, "lr": 1.8501680457838582e-05, "seqlen": 6400.0, "step_tflops": 41.94583349244238, "step_time": 0.3817386379241943} +{"epoch": 0, "iter": 21762, "iter_tflops": 28.37950144003909, "iter_time": 0.5642222213745116, "loss": 0.1119852215051651, "lr": 1.845584140014886e-05, "seqlen": 6400.0, "step_tflops": 30.971150090417094, "step_time": 0.5170084190368652} +{"epoch": 0, "iter": 21763, "iter_tflops": 31.541397556559932, "iter_time": 0.5076612510681152, "loss": 0.09550575911998749, "lr": 1.8409968234699698e-05, "seqlen": 6400.0, "step_tflops": 34.63774208332321, "step_time": 0.46228028678894045} +{"epoch": 0, "iter": 21764, "iter_tflops": 31.91360833659496, "iter_time": 0.5017403602600098, "loss": 0.12856614589691162, "lr": 1.836406141424072e-05, "seqlen": 6400.0, "step_tflops": 35.379799370583925, "step_time": 0.4525844020843506} +{"epoch": 0, "iter": 21765, "iter_tflops": 16.07170030454201, "iter_time": 1.2836907806396485, "loss": 0.13341645896434784, "lr": 1.8318121391853712e-05, "seqlen": 8192.0, "step_tflops": 17.1738077214408, "step_time": 1.2013115463256834} +{"epoch": 0, "iter": 21766, "iter_tflops": 19.303305276904123, "iter_time": 1.0687855377197266, "loss": 0.1338927149772644, "lr": 1.8272148620948143e-05, "seqlen": 8192.0, "step_tflops": 23.299888139196906, "step_time": 0.8854589080810547} +{"epoch": 0, "iter": 21767, "iter_tflops": 47.32049448662574, "iter_time": 0.43598643112182617, "loss": 0.08885331451892853, "lr": 1.8226143555256703e-05, "seqlen": 8192.0, "step_tflops": 51.23847439065491, "step_time": 0.402648473739624} +{"epoch": 0, "iter": 21768, "iter_tflops": 49.1080905718625, "iter_time": 0.42011597824096675, "loss": 0.09384152293205261, "lr": 1.8180106648830824e-05, "seqlen": 8192.0, "step_tflops": 53.3833685240813, "step_time": 0.3864704322814942} +{"epoch": 0, "iter": 21769, "iter_tflops": 30.02025080760278, "iter_time": 0.6872392120361327, "loss": 0.5180010795593262, "lr": 1.8134038356036174e-05, "seqlen": 8192.0, "step_tflops": 31.929353938896078, "step_time": 0.6461481666564941} +{"epoch": 0, "iter": 21770, "iter_tflops": 10.610630300693877, "iter_time": 1.944379638671875, "loss": 0.4427263140678406, "lr": 1.808793913154822e-05, "seqlen": 8192.0, "step_tflops": 12.671164493784428, "step_time": 1.6281923828124998} +{"epoch": 0, "iter": 21771, "iter_tflops": 17.62332014817211, "iter_time": 1.1706700744628908, "loss": 0.4252997934818268, "lr": 1.8041809430347688e-05, "seqlen": 8192.0, "step_tflops": 19.61453533375004, "step_time": 1.0518267784118653} +{"epoch": 0, "iter": 21772, "iter_tflops": 37.61381668815975, "iter_time": 0.5484977416992187, "loss": 0.5440186858177185, "lr": 1.7995649707716104e-05, "seqlen": 8192.0, "step_tflops": 40.84322333537282, "step_time": 0.5051289253234863} +{"epoch": 0, "iter": 21773, "iter_tflops": 14.47360311541016, "iter_time": 1.0949923553466796, "loss": 0.12212225794792175, "lr": 1.79494604192313e-05, "seqlen": 6336.0, "step_tflops": 15.344983013009328, "step_time": 1.032812141418457} +{"epoch": 0, "iter": 21774, "iter_tflops": 13.13191469213631, "iter_time": 1.2068677825927734, "loss": 0.2770174443721771, "lr": 1.790324202076291e-05, "seqlen": 6336.0, "step_tflops": 15.650669388961932, "step_time": 1.012639419555664} +{"epoch": 0, "iter": 21775, "iter_tflops": 27.12967811924573, "iter_time": 0.5841751861572265, "loss": 0.30001387000083923, "lr": 1.7856994968467852e-05, "seqlen": 6336.0, "step_tflops": 28.846065908243833, "step_time": 0.549415813446045} +{"epoch": 0, "iter": 21776, "iter_tflops": 27.42422932511042, "iter_time": 0.5779008255004883, "loss": 0.24852147698402405, "lr": 1.781071971878587e-05, "seqlen": 6336.0, "step_tflops": 29.248296119592876, "step_time": 0.5418601036071777} +{"epoch": 0, "iter": 21777, "iter_tflops": 29.395423992004517, "iter_time": 0.7018471145629883, "loss": 0.20131142437458038, "lr": 1.7764416728435e-05, "seqlen": 8192.0, "step_tflops": 31.338203069531232, "step_time": 0.6583368377685548} +{"epoch": 0, "iter": 21778, "iter_tflops": 24.586235069015085, "iter_time": 0.8391318740844725, "loss": 0.2790903151035309, "lr": 1.771808645440706e-05, "seqlen": 8192.0, "step_tflops": 27.959142257562668, "step_time": 0.737901517868042} +{"epoch": 0, "iter": 21779, "iter_tflops": 49.255243737430234, "iter_time": 0.41886085510253906, "loss": 0.251077800989151, "lr": 1.767172935396315e-05, "seqlen": 8192.0, "step_tflops": 54.07032828334164, "step_time": 0.38156035232543944} +{"epoch": 0, "iter": 21780, "iter_tflops": 48.27663488828705, "iter_time": 0.42735152435302726, "loss": 0.29652684926986694, "lr": 1.7625345884629144e-05, "seqlen": 8192.0, "step_tflops": 52.402853733054414, "step_time": 0.393701717376709} +{"epoch": 0, "iter": 21781, "iter_tflops": 20.135425789834198, "iter_time": 1.02461669921875, "loss": 0.39518454670906067, "lr": 1.757893650419114e-05, "seqlen": 8192.0, "step_tflops": 21.010179849184627, "step_time": 0.981957015991211} +{"epoch": 0, "iter": 21782, "iter_tflops": 16.674961039128483, "iter_time": 1.2372498779296874, "loss": 0.48754507303237915, "lr": 1.7532501670691006e-05, "seqlen": 8192.0, "step_tflops": 18.94751430373045, "step_time": 1.0888548851013184} +{"epoch": 0, "iter": 21783, "iter_tflops": 35.837506016068325, "iter_time": 0.5756844100952149, "loss": 0.36895087361335754, "lr": 1.74860418424218e-05, "seqlen": 8192.0, "step_tflops": 38.83594824950551, "step_time": 0.5312370223999023} +{"epoch": 0, "iter": 21784, "iter_tflops": 38.40213340532358, "iter_time": 0.5372382125854491, "loss": 0.5216084718704224, "lr": 1.7439557477923257e-05, "seqlen": 8192.0, "step_tflops": 41.87344663118657, "step_time": 0.4927011070251464} +{"epoch": 0, "iter": 21785, "iter_tflops": 13.850836710899891, "iter_time": 1.3759012756347655, "loss": 0.06787727028131485, "lr": 1.7393049035977296e-05, "seqlen": 7584.0, "step_tflops": 14.739545785378143, "step_time": 1.2929424133300782} +{"epoch": 0, "iter": 21786, "iter_tflops": 18.990002221560314, "iter_time": 1.003548271179199, "loss": 0.06594133377075195, "lr": 1.7346516975603466e-05, "seqlen": 7584.0, "step_tflops": 22.8679443874812, "step_time": 0.8333667240142822} +{"epoch": 0, "iter": 21787, "iter_tflops": 46.07544390024969, "iter_time": 0.41361259460449223, "loss": 0.0731659010052681, "lr": 1.729996175605441e-05, "seqlen": 7584.0, "step_tflops": 50.65239281561444, "step_time": 0.3762385711669922} +{"epoch": 0, "iter": 21788, "iter_tflops": 47.82875732013056, "iter_time": 0.398450325012207, "loss": 0.08445586264133453, "lr": 1.7253383836811357e-05, "seqlen": 7584.0, "step_tflops": 51.99708716882715, "step_time": 0.36650868225097655} +{"epoch": 0, "iter": 21789, "iter_tflops": 27.25598381712062, "iter_time": 0.756938133239746, "loss": 0.07803028076887131, "lr": 1.720678367757956e-05, "seqlen": 8192.0, "step_tflops": 28.91427514141523, "step_time": 0.713526222229004} +{"epoch": 0, "iter": 21790, "iter_tflops": 15.440213472186583, "iter_time": 1.3361922454833983, "loss": 0.06555734574794769, "lr": 1.7160161738283782e-05, "seqlen": 8192.0, "step_tflops": 20.09514110881867, "step_time": 1.0266707458496094} +{"epoch": 0, "iter": 21791, "iter_tflops": 40.29592083194802, "iter_time": 0.5119896278381348, "loss": 0.05848507583141327, "lr": 1.711351847906374e-05, "seqlen": 8192.0, "step_tflops": 44.2854682826092, "step_time": 0.4658659896850586} +{"epoch": 0, "iter": 21792, "iter_tflops": 41.91121027561646, "iter_time": 0.4922571640014648, "loss": 0.05482717603445053, "lr": 1.706685436026957e-05, "seqlen": 8192.0, "step_tflops": 46.21452255727624, "step_time": 0.4464201374053955} +{"epoch": 0, "iter": 21793, "iter_tflops": 23.964689681565233, "iter_time": 0.8608955001831056, "loss": 0.4531137943267822, "lr": 1.702016984245729e-05, "seqlen": 8192.0, "step_tflops": 25.578581788155535, "step_time": 0.8065769119262696} +{"epoch": 0, "iter": 21794, "iter_tflops": 10.72461540911311, "iter_time": 1.9237140655517577, "loss": 0.5925272703170776, "lr": 1.697346538638424e-05, "seqlen": 8192.0, "step_tflops": 14.65999131490722, "step_time": 1.4073059844970706} +{"epoch": 0, "iter": 21795, "iter_tflops": 15.756132872490358, "iter_time": 1.3094008331298828, "loss": 0.48084133863449097, "lr": 1.6926741453004544e-05, "seqlen": 8192.0, "step_tflops": 18.319506887607382, "step_time": 1.1261817054748535} +{"epoch": 0, "iter": 21796, "iter_tflops": 35.422368334198126, "iter_time": 0.5824312286376954, "loss": 0.4338996112346649, "lr": 1.6879998503464565e-05, "seqlen": 8192.0, "step_tflops": 43.57785362531955, "step_time": 0.47343069458007814} +{"epoch": 0, "iter": 21797, "iter_tflops": 20.04715517292063, "iter_time": 0.7029302444458007, "loss": 0.23663148283958435, "lr": 1.683323699909834e-05, "seqlen": 5648.0, "step_tflops": 21.31391290893437, "step_time": 0.6611527290344239} +{"epoch": 0, "iter": 21798, "iter_tflops": 21.358609584635854, "iter_time": 0.6597691497802733, "loss": 0.2682866156101227, "lr": 1.6786457401423047e-05, "seqlen": 5648.0, "step_tflops": 25.760459662532014, "step_time": 0.547030288696289} +{"epoch": 0, "iter": 21799, "iter_tflops": 24.744398920020902, "iter_time": 0.5694925842285157, "loss": 0.23544159531593323, "lr": 1.673966017213442e-05, "seqlen": 5648.0, "step_tflops": 26.414486486919323, "step_time": 0.5334857330322265} +{"epoch": 0, "iter": 21800, "iter_tflops": 25.599859362847827, "iter_time": 0.5504620742797852, "loss": 0.16276851296424866, "lr": 1.6692845773102225e-05, "seqlen": 5648.0, "step_tflops": 27.110903832327335, "step_time": 0.5197817001342773} +{"epoch": 0, "iter": 21801, "iter_tflops": 38.470833813525616, "iter_time": 0.5362788238525391, "loss": 0.19378794729709625, "lr": 1.6646014666365676e-05, "seqlen": 8192.0, "step_tflops": 41.311367504862076, "step_time": 0.4994047584533691} +{"epoch": 0, "iter": 21802, "iter_tflops": 11.619169389692088, "iter_time": 1.7756082916259766, "loss": 0.2171093374490738, "lr": 1.65991673141289e-05, "seqlen": 8192.0, "step_tflops": 14.637343231286104, "step_time": 1.40948348236084} +{"epoch": 0, "iter": 21803, "iter_tflops": 14.6188823084559, "iter_time": 1.4112633972167972, "loss": 0.1866026222705841, "lr": 1.6552304178756337e-05, "seqlen": 8192.0, "step_tflops": 17.1392233662154, "step_time": 1.203735610961914} +{"epoch": 0, "iter": 21804, "iter_tflops": 20.198948177310598, "iter_time": 1.0213944473266603, "loss": 0.15432752668857574, "lr": 1.650542572276822e-05, "seqlen": 8192.0, "step_tflops": 22.695902084189473, "step_time": 0.9090228462219239} +{"epoch": 0, "iter": 21805, "iter_tflops": 30.082877710733975, "iter_time": 0.5923567123413086, "loss": 0.21661463379859924, "lr": 1.6458532408835996e-05, "seqlen": 7104.0, "step_tflops": 32.327729684069425, "step_time": 0.5512231979370117} +{"epoch": 0, "iter": 21806, "iter_tflops": 31.629968374444434, "iter_time": 0.5633832550048828, "loss": 0.17425104975700378, "lr": 1.641162469977772e-05, "seqlen": 7104.0, "step_tflops": 33.75591028576316, "step_time": 0.5279014663696289} +{"epoch": 0, "iter": 21807, "iter_tflops": 30.01985376337101, "iter_time": 0.593600311279297, "loss": 0.22143058478832245, "lr": 1.6364703058553552e-05, "seqlen": 7104.0, "step_tflops": 31.950772103903446, "step_time": 0.5577265701293945} +{"epoch": 0, "iter": 21808, "iter_tflops": 32.47018247325045, "iter_time": 0.5488048782348633, "loss": 0.21593402326107025, "lr": 1.631776794826115e-05, "seqlen": 7104.0, "step_tflops": 34.61036047221018, "step_time": 0.5148687934875489} +{"epoch": 0, "iter": 21809, "iter_tflops": 25.672349067423053, "iter_time": 0.8036309204101564, "loss": 0.6547315120697021, "lr": 1.6270819832131102e-05, "seqlen": 8192.0, "step_tflops": 27.005715463822515, "step_time": 0.7639528579711914} +{"epoch": 0, "iter": 21810, "iter_tflops": 13.174668165523304, "iter_time": 1.5659668426513673, "loss": 0.5295345187187195, "lr": 1.6223859173522367e-05, "seqlen": 8192.0, "step_tflops": 19.243475683275303, "step_time": 1.0721084823608398} +{"epoch": 0, "iter": 21811, "iter_tflops": 37.642603377298244, "iter_time": 0.5480782852172852, "loss": 0.6300495266914368, "lr": 1.6176886435917676e-05, "seqlen": 8192.0, "step_tflops": 40.96632927915222, "step_time": 0.5036109867095947} +{"epoch": 0, "iter": 21812, "iter_tflops": 39.50579757475804, "iter_time": 0.5222295150756836, "loss": 0.5847985744476318, "lr": 1.612990208291899e-05, "seqlen": 8192.0, "step_tflops": 42.65894779418869, "step_time": 0.4836287479400635} +{"epoch": 0, "iter": 21813, "iter_tflops": 35.94378532344867, "iter_time": 0.558982650756836, "loss": 0.046101164072752, "lr": 1.6082906578242913e-05, "seqlen": 7984.0, "step_tflops": 40.02092134102912, "step_time": 0.5020362281799317} +{"epoch": 0, "iter": 21814, "iter_tflops": 51.15663047316718, "iter_time": 0.3927536315917969, "loss": 0.021611183881759644, "lr": 1.603590038571609e-05, "seqlen": 7984.0, "step_tflops": 56.0928284095437, "step_time": 0.35819110870361326} +{"epoch": 0, "iter": 21815, "iter_tflops": 50.34031921904125, "iter_time": 0.3991224670410156, "loss": 0.008918283507227898, "lr": 1.598888396927067e-05, "seqlen": 7984.0, "step_tflops": 55.13058740564509, "step_time": 0.3644429225921631} +{"epoch": 0, "iter": 21816, "iter_tflops": 52.94750600500936, "iter_time": 0.37946928787231443, "loss": 0.03853641077876091, "lr": 1.5941857792939702e-05, "seqlen": 7984.0, "step_tflops": 57.72434646109788, "step_time": 0.3480672130584717} +{"epoch": 0, "iter": 21817, "iter_tflops": 29.110362227181003, "iter_time": 0.7087199172973633, "loss": 0.3068815767765045, "lr": 1.5894822320852563e-05, "seqlen": 8192.0, "step_tflops": 30.752581812182143, "step_time": 0.6708735427856445} +{"epoch": 0, "iter": 21818, "iter_tflops": 12.7534130547877, "iter_time": 1.617691940307617, "loss": 0.6054625511169434, "lr": 1.5847778017230372e-05, "seqlen": 8192.0, "step_tflops": 17.188585140359336, "step_time": 1.200278751373291} +{"epoch": 0, "iter": 21819, "iter_tflops": 37.31952776622592, "iter_time": 0.5528230056762695, "loss": 0.5468512773513794, "lr": 1.5800725346381406e-05, "seqlen": 8192.0, "step_tflops": 40.868775479460275, "step_time": 0.5048131065368653} +{"epoch": 0, "iter": 21820, "iter_tflops": 39.10173105903203, "iter_time": 0.527626091003418, "loss": 0.48279112577438354, "lr": 1.5753664772696546e-05, "seqlen": 8192.0, "step_tflops": 42.647216648581434, "step_time": 0.48376178169250483} +{"epoch": 0, "iter": 21821, "iter_tflops": 20.880840288996076, "iter_time": 0.9880394287109374, "loss": 0.31735968589782715, "lr": 1.570659676064464e-05, "seqlen": 8192.0, "step_tflops": 22.57075823007125, "step_time": 0.9140629348754883} +{"epoch": 0, "iter": 21822, "iter_tflops": 30.158461465386758, "iter_time": 0.6840897216796875, "loss": 0.49113860726356506, "lr": 1.5659521774767974e-05, "seqlen": 8192.0, "step_tflops": 40.18717249105738, "step_time": 0.5133750953674316} +{"epoch": 0, "iter": 21823, "iter_tflops": 47.630046038089446, "iter_time": 0.4331529197692871, "loss": 0.43948009610176086, "lr": 1.561244027967765e-05, "seqlen": 8192.0, "step_tflops": 51.53739007505093, "step_time": 0.4003131217956543} +{"epoch": 0, "iter": 21824, "iter_tflops": 48.267541717297014, "iter_time": 0.4274320335388183, "loss": 0.4759674668312073, "lr": 1.556535274004902e-05, "seqlen": 8192.0, "step_tflops": 52.29527291116314, "step_time": 0.3945116329193115} +{"epoch": 0, "iter": 21825, "iter_tflops": 23.614072712244525, "iter_time": 0.8736779022216797, "loss": 0.19195644557476044, "lr": 1.5518259620617088e-05, "seqlen": 8192.0, "step_tflops": 24.71385217004267, "step_time": 0.8347987747192382} +{"epoch": 0, "iter": 21826, "iter_tflops": 17.32031445133755, "iter_time": 1.1911500549316405, "loss": 0.12455404549837112, "lr": 1.5471161386171925e-05, "seqlen": 8192.0, "step_tflops": 22.226365491127748, "step_time": 0.9282261428833007} +{"epoch": 0, "iter": 21827, "iter_tflops": 40.92471207543749, "iter_time": 0.504123119354248, "loss": 0.12190495431423187, "lr": 1.5424058501554102e-05, "seqlen": 8192.0, "step_tflops": 44.34405357850366, "step_time": 0.46525050926208494} +{"epoch": 0, "iter": 21828, "iter_tflops": 51.16340706198994, "iter_time": 0.4032392425537109, "loss": 0.09668378531932831, "lr": 1.5376951431650065e-05, "seqlen": 8192.0, "step_tflops": 55.27863223589703, "step_time": 0.37322004318237306} +{"epoch": 0, "iter": 21829, "iter_tflops": 48.25049215757107, "iter_time": 0.4275830688476563, "loss": 0.003233375959098339, "lr": 1.5329840641387577e-05, "seqlen": 8192.0, "step_tflops": 53.07893179366608, "step_time": 0.38868705177307133} +{"epoch": 0, "iter": 21830, "iter_tflops": 55.72959830976103, "iter_time": 0.37019993209838864, "loss": 0.004584117326885462, "lr": 1.5282726595731126e-05, "seqlen": 8192.0, "step_tflops": 61.40692712174515, "step_time": 0.3359733905792237} +{"epoch": 0, "iter": 21831, "iter_tflops": 56.47834409869295, "iter_time": 0.3652921104431152, "loss": 0.010866167023777962, "lr": 1.5235609759677311e-05, "seqlen": 8192.0, "step_tflops": 62.01495398180455, "step_time": 0.3326793327331543} +{"epoch": 0, "iter": 21832, "iter_tflops": 59.82537807904873, "iter_time": 0.34485521316528317, "loss": 0.006992808543145657, "lr": 1.518849059825029e-05, "seqlen": 8192.0, "step_tflops": 65.51506879139697, "step_time": 0.31490608024597166} +{"epoch": 0, "iter": 21833, "iter_tflops": 42.75549185799318, "iter_time": 0.4825366897583008, "loss": 0.27924439311027527, "lr": 1.5141369576497164e-05, "seqlen": 8192.0, "step_tflops": 46.57152972455117, "step_time": 0.4429979782104493} +{"epoch": 0, "iter": 21834, "iter_tflops": 42.348349886835535, "iter_time": 0.4871758537292481, "loss": 0.3046702742576599, "lr": 1.5094247159483387e-05, "seqlen": 8192.0, "step_tflops": 49.57336866954734, "step_time": 0.41617291831970216} +{"epoch": 0, "iter": 21835, "iter_tflops": 51.52462720859878, "iter_time": 0.40041228103637694, "loss": 0.2668763995170593, "lr": 1.5047123812288194e-05, "seqlen": 8192.0, "step_tflops": 55.83765429045782, "step_time": 0.369483528137207} +{"epoch": 0, "iter": 21836, "iter_tflops": 45.24546034816306, "iter_time": 0.4559815139770508, "loss": 0.3231869637966156, "lr": 1.5e-05, "seqlen": 8192.0, "step_tflops": 48.726369009988254, "step_time": 0.42340715980529786} +{"epoch": 0, "iter": 21837, "iter_tflops": 47.11739673471723, "iter_time": 0.4378657341003418, "loss": 0.05852283537387848, "lr": 1.4952876187711806e-05, "seqlen": 8192.0, "step_tflops": 51.808198825623535, "step_time": 0.39822062873840325} +{"epoch": 0, "iter": 21838, "iter_tflops": 44.38348965687545, "iter_time": 0.46483712005615235, "loss": 0.04983987286686897, "lr": 1.4905752840516615e-05, "seqlen": 8192.0, "step_tflops": 48.52020186541774, "step_time": 0.4252062587738037} +{"epoch": 0, "iter": 21839, "iter_tflops": 50.20624222400741, "iter_time": 0.41092686080932617, "loss": 0.05865569785237312, "lr": 1.485863042350284e-05, "seqlen": 8192.0, "step_tflops": 54.80938219053567, "step_time": 0.37641536331176756} +{"epoch": 0, "iter": 21840, "iter_tflops": 50.927615211442024, "iter_time": 0.405106216430664, "loss": 0.057134293019771576, "lr": 1.481150940174971e-05, "seqlen": 8192.0, "step_tflops": 55.35694142190526, "step_time": 0.37269207763671874} +{"epoch": 0, "iter": 21841, "iter_tflops": 32.09152764747868, "iter_time": 0.6428828735351564, "loss": 0.06375628709793091, "lr": 1.4764390240322693e-05, "seqlen": 8192.0, "step_tflops": 34.13250917284611, "step_time": 0.604441162109375} +{"epoch": 0, "iter": 21842, "iter_tflops": 14.183146825031345, "iter_time": 1.4546203155517579, "loss": 0.10989454388618469, "lr": 1.471727340426888e-05, "seqlen": 8192.0, "step_tflops": 16.52141992514535, "step_time": 1.248748207092285} +{"epoch": 0, "iter": 21843, "iter_tflops": 41.742078569180265, "iter_time": 0.49425170516967776, "loss": 0.09726645052433014, "lr": 1.4670159358612425e-05, "seqlen": 8192.0, "step_tflops": 46.27537421406806, "step_time": 0.44583309936523435} +{"epoch": 0, "iter": 21844, "iter_tflops": 38.8463825561706, "iter_time": 0.5310943298339844, "loss": 0.0971376970410347, "lr": 1.462304856834994e-05, "seqlen": 8192.0, "step_tflops": 42.75102171750689, "step_time": 0.4825871448516845} +{"epoch": 0, "iter": 21845, "iter_tflops": 19.933226640523795, "iter_time": 1.0350102310180664, "loss": 0.2242901772260666, "lr": 1.4575941498445902e-05, "seqlen": 8192.0, "step_tflops": 21.47795588022428, "step_time": 0.9605706253051757} +{"epoch": 0, "iter": 21846, "iter_tflops": 16.68685481884752, "iter_time": 1.2363680114746092, "loss": 0.24652132391929626, "lr": 1.4528838613828076e-05, "seqlen": 8192.0, "step_tflops": 22.128050097564394, "step_time": 0.9323502712249755} +{"epoch": 0, "iter": 21847, "iter_tflops": 47.14985131601296, "iter_time": 0.43756433868408207, "loss": 0.21633125841617584, "lr": 1.4481740379382916e-05, "seqlen": 8192.0, "step_tflops": 51.23366710316955, "step_time": 0.4026862545013428} +{"epoch": 0, "iter": 21848, "iter_tflops": 40.601047751276056, "iter_time": 0.5081418991088867, "loss": 0.1765347719192505, "lr": 1.443464725995098e-05, "seqlen": 8192.0, "step_tflops": 44.18314342332794, "step_time": 0.4669449005126952} +{"epoch": 0, "iter": 21849, "iter_tflops": 26.196564056867924, "iter_time": 0.7875495986938477, "loss": 0.452408105134964, "lr": 1.438755972032235e-05, "seqlen": 8192.0, "step_tflops": 27.80157976662186, "step_time": 0.7420834960937499} +{"epoch": 0, "iter": 21850, "iter_tflops": 10.841037322160476, "iter_time": 1.9030552978515625, "loss": 0.4574274718761444, "lr": 1.434047822523203e-05, "seqlen": 8192.0, "step_tflops": 13.168206804688364, "step_time": 1.5667352294921875} +{"epoch": 0, "iter": 21851, "iter_tflops": 16.897059224188784, "iter_time": 1.2209872283935546, "loss": 0.4980770945549011, "lr": 1.4293403239355362e-05, "seqlen": 8192.0, "step_tflops": 18.404853160463794, "step_time": 1.1209594192504884} +{"epoch": 0, "iter": 21852, "iter_tflops": 26.296239693678412, "iter_time": 0.7845643997192382, "loss": 0.4304705858230591, "lr": 1.4246335227303458e-05, "seqlen": 8192.0, "step_tflops": 32.028071247967546, "step_time": 0.6441566009521484} +{"epoch": 0, "iter": 21853, "iter_tflops": 12.693786123691952, "iter_time": 1.2195001831054688, "loss": 0.16973267495632172, "lr": 1.4199274653618593e-05, "seqlen": 6192.0, "step_tflops": 13.392753236572497, "step_time": 1.1558545303344727} +{"epoch": 0, "iter": 21854, "iter_tflops": 13.408544945109062, "iter_time": 1.1544932403564452, "loss": 0.19008532166481018, "lr": 1.4152221982769634e-05, "seqlen": 6192.0, "step_tflops": 15.601328013466244, "step_time": 0.9922280006408691} +{"epoch": 0, "iter": 21855, "iter_tflops": 23.38388673557462, "iter_time": 0.6619974975585938, "loss": 0.3167838752269745, "lr": 1.4105177679147441e-05, "seqlen": 6192.0, "step_tflops": 25.110120920134815, "step_time": 0.6164874534606933} +{"epoch": 0, "iter": 21856, "iter_tflops": 24.029710715646104, "iter_time": 0.6442056121826172, "loss": 0.24034833908081055, "lr": 1.40581422070603e-05, "seqlen": 6192.0, "step_tflops": 25.853921578813914, "step_time": 0.59875150680542} +{"epoch": 0, "iter": 21857, "iter_tflops": 18.591610875019576, "iter_time": 1.1096990814208985, "loss": 0.2955024540424347, "lr": 1.4011116030729333e-05, "seqlen": 8192.0, "step_tflops": 19.695779597709887, "step_time": 1.047488037109375} +{"epoch": 0, "iter": 21858, "iter_tflops": 15.418311466928818, "iter_time": 1.3380903320312498, "loss": 0.36617130041122437, "lr": 1.3964099614283912e-05, "seqlen": 8192.0, "step_tflops": 18.86835265828687, "step_time": 1.0934231452941894} +{"epoch": 0, "iter": 21859, "iter_tflops": 40.009872989595536, "iter_time": 0.5156500625610352, "loss": 0.2810908257961273, "lr": 1.391709342175709e-05, "seqlen": 8192.0, "step_tflops": 43.494828583981125, "step_time": 0.4743344020843506} +{"epoch": 0, "iter": 21860, "iter_tflops": 41.18471054591723, "iter_time": 0.5009405975341797, "loss": 0.24033379554748535, "lr": 1.3870097917081011e-05, "seqlen": 8192.0, "step_tflops": 44.877086087406944, "step_time": 0.4597244453430176} +{"epoch": 0, "iter": 21861, "iter_tflops": 17.80644344290714, "iter_time": 1.1586307830810547, "loss": 0.46805477142333984, "lr": 1.3823113564082329e-05, "seqlen": 8192.0, "step_tflops": 19.105867684889333, "step_time": 1.0798302307128906} +{"epoch": 0, "iter": 21862, "iter_tflops": 22.58238492585989, "iter_time": 0.9135923233032226, "loss": 0.419353723526001, "lr": 1.3776140826477637e-05, "seqlen": 8192.0, "step_tflops": 27.282519772616123, "step_time": 0.7562019081115722} +{"epoch": 0, "iter": 21863, "iter_tflops": 43.859472061908484, "iter_time": 0.4703908309936523, "loss": 0.6756177544593811, "lr": 1.3729180167868899e-05, "seqlen": 8192.0, "step_tflops": 47.08472322222171, "step_time": 0.43816958236694337} +{"epoch": 0, "iter": 21864, "iter_tflops": 44.41557673837495, "iter_time": 0.4645013084411621, "loss": 0.512120246887207, "lr": 1.3682232051738853e-05, "seqlen": 8192.0, "step_tflops": 47.81717822141633, "step_time": 0.43145777893066406} +{"epoch": 0, "iter": 21865, "iter_tflops": 37.85436639977072, "iter_time": 0.5450122528076172, "loss": 0.5749598741531372, "lr": 1.363529694144645e-05, "seqlen": 8192.0, "step_tflops": 40.72671302273781, "step_time": 0.506573989868164} +{"epoch": 0, "iter": 21866, "iter_tflops": 39.45450992074743, "iter_time": 0.5229083709716796, "loss": 0.5418790578842163, "lr": 1.3588375300222285e-05, "seqlen": 8192.0, "step_tflops": 43.45559135039888, "step_time": 0.4747626914978027} +{"epoch": 0, "iter": 21867, "iter_tflops": 45.71052887108221, "iter_time": 0.4513422622680664, "loss": 0.5257335901260376, "lr": 1.354146759116401e-05, "seqlen": 8192.0, "step_tflops": 49.35113230703503, "step_time": 0.4180470142364502} +{"epoch": 0, "iter": 21868, "iter_tflops": 41.268057718867624, "iter_time": 0.4999288711547852, "loss": 0.4272313117980957, "lr": 1.3494574277231775e-05, "seqlen": 8192.0, "step_tflops": 44.00694889096417, "step_time": 0.4688144493103028} +{"epoch": 0, "iter": 21869, "iter_tflops": 27.680114819075005, "iter_time": 0.7453398818969728, "loss": 0.12958043813705444, "lr": 1.3447695821243663e-05, "seqlen": 8192.0, "step_tflops": 29.249113095535865, "step_time": 0.7053579177856446} +{"epoch": 0, "iter": 21870, "iter_tflops": 17.633815252709798, "iter_time": 1.1699733276367188, "loss": 0.08867350220680237, "lr": 1.3400832685871102e-05, "seqlen": 8192.0, "step_tflops": 21.29083059951973, "step_time": 0.9690130882263183} +{"epoch": 0, "iter": 21871, "iter_tflops": 37.45301855569878, "iter_time": 0.5508526229858399, "loss": 0.08622948080301285, "lr": 1.3353985333634321e-05, "seqlen": 8192.0, "step_tflops": 40.726573020221885, "step_time": 0.5065757312774658} +{"epoch": 0, "iter": 21872, "iter_tflops": 43.91635669915215, "iter_time": 0.46978153610229495, "loss": 0.08218736946582794, "lr": 1.3307154226897774e-05, "seqlen": 8192.0, "step_tflops": 48.26273606457967, "step_time": 0.4274745941162109} +{"epoch": 0, "iter": 21873, "iter_tflops": 14.537643920608154, "iter_time": 1.4191497344970703, "loss": 0.07466623932123184, "lr": 1.326033982786558e-05, "seqlen": 8192.0, "step_tflops": 15.454196578335317, "step_time": 1.3349832458496094} +{"epoch": 0, "iter": 21874, "iter_tflops": 22.542191216833896, "iter_time": 0.9152212982177734, "loss": 0.07058732211589813, "lr": 1.3213542598576953e-05, "seqlen": 8192.0, "step_tflops": 29.748172772216297, "step_time": 0.693524730682373} +{"epoch": 0, "iter": 21875, "iter_tflops": 50.13375998963559, "iter_time": 0.41152096939086913, "loss": 0.07013912498950958, "lr": 1.3166763000901658e-05, "seqlen": 8192.0, "step_tflops": 54.470327342200974, "step_time": 0.3787583904266357} +{"epoch": 0, "iter": 21876, "iter_tflops": 52.508038167367594, "iter_time": 0.39291305160522466, "loss": 0.08601628988981247, "lr": 1.3120001496535434e-05, "seqlen": 8192.0, "step_tflops": 56.920817697058155, "step_time": 0.3624525146484375} +{"epoch": 0, "iter": 21877, "iter_tflops": 37.28609823027447, "iter_time": 0.5533186492919921, "loss": 0.230346217751503, "lr": 1.3073258546995455e-05, "seqlen": 8192.0, "step_tflops": 40.01147315761449, "step_time": 0.5156294403076173} +{"epoch": 0, "iter": 21878, "iter_tflops": 11.493029319639142, "iter_time": 1.7950962219238282, "loss": 0.23691105842590332, "lr": 1.3026534613615764e-05, "seqlen": 8192.0, "step_tflops": 13.948727859688134, "step_time": 1.4790663146972658} +{"epoch": 0, "iter": 21879, "iter_tflops": 13.804501345745525, "iter_time": 1.4945192871093749, "loss": 0.22059905529022217, "lr": 1.2979830157542712e-05, "seqlen": 8192.0, "step_tflops": 16.135187709505725, "step_time": 1.278639820098877} +{"epoch": 0, "iter": 21880, "iter_tflops": 15.69452228986216, "iter_time": 1.314541030883789, "loss": 0.27041590213775635, "lr": 1.293314563973043e-05, "seqlen": 8192.0, "step_tflops": 18.487254700732315, "step_time": 1.1159630699157714} +{"epoch": 0, "iter": 21881, "iter_tflops": 12.606148312823056, "iter_time": 1.205268814086914, "loss": 0.2126065045595169, "lr": 1.2886481520936261e-05, "seqlen": 6080.0, "step_tflops": 13.430281717661043, "step_time": 1.1313089141845702} +{"epoch": 0, "iter": 21882, "iter_tflops": 9.853356036280905, "iter_time": 1.541992126464844, "loss": 0.1549719125032425, "lr": 1.2839838261716217e-05, "seqlen": 6080.0, "step_tflops": 12.458912170777928, "step_time": 1.2195123634338378} +{"epoch": 0, "iter": 21883, "iter_tflops": 22.40608011368374, "iter_time": 0.6781104660034181, "loss": 0.21264740824699402, "lr": 1.2793216322420439e-05, "seqlen": 6080.0, "step_tflops": 24.080295134647443, "step_time": 0.6309639205932618} +{"epoch": 0, "iter": 21884, "iter_tflops": 24.438486226866562, "iter_time": 0.6217159805297853, "loss": 0.16125348210334778, "lr": 1.2746616163188644e-05, "seqlen": 6080.0, "step_tflops": 26.21205311047126, "step_time": 0.5796492691040039} +{"epoch": 0, "iter": 21885, "iter_tflops": 20.11820828378576, "iter_time": 1.0254935836791992, "loss": 0.24451151490211487, "lr": 1.270003824394559e-05, "seqlen": 8192.0, "step_tflops": 21.516693782912448, "step_time": 0.9588412475585938} +{"epoch": 0, "iter": 21886, "iter_tflops": 16.58531907196697, "iter_time": 1.2439370880126952, "loss": 0.2533838450908661, "lr": 1.2653483024396535e-05, "seqlen": 8192.0, "step_tflops": 20.363314188535476, "step_time": 1.0131500854492188} +{"epoch": 0, "iter": 21887, "iter_tflops": 36.302441310849275, "iter_time": 0.5683114624023438, "loss": 0.21780526638031006, "lr": 1.2606950964022701e-05, "seqlen": 8192.0, "step_tflops": 39.656996911556696, "step_time": 0.5202384223937988} +{"epoch": 0, "iter": 21888, "iter_tflops": 38.957345485874974, "iter_time": 0.5295816040039063, "loss": 0.3149261474609375, "lr": 1.2560442522076747e-05, "seqlen": 8192.0, "step_tflops": 42.541637009668875, "step_time": 0.4849623794555665} +{"epoch": 0, "iter": 21889, "iter_tflops": 18.762262410682375, "iter_time": 1.0996058502197266, "loss": 0.0485813282430172, "lr": 1.2513958157578204e-05, "seqlen": 8192.0, "step_tflops": 20.065912420827743, "step_time": 1.028166229248047} +{"epoch": 0, "iter": 21890, "iter_tflops": 22.533792693753128, "iter_time": 0.9155624084472656, "loss": 0.040592461824417114, "lr": 1.2467498329308992e-05, "seqlen": 8192.0, "step_tflops": 27.370594016163484, "step_time": 0.7537685699462892} +{"epoch": 0, "iter": 21891, "iter_tflops": 52.20885096146679, "iter_time": 0.39516467285156254, "loss": 0.07071438431739807, "lr": 1.2421063495808856e-05, "seqlen": 8192.0, "step_tflops": 56.95454813691881, "step_time": 0.36223785781860357} +{"epoch": 0, "iter": 21892, "iter_tflops": 49.121926938684005, "iter_time": 0.4199976425170898, "loss": 0.04026293754577637, "lr": 1.2374654115370859e-05, "seqlen": 8192.0, "step_tflops": 53.71645475758913, "step_time": 0.3840739974975586} +{"epoch": 0, "iter": 21893, "iter_tflops": 31.061842584920473, "iter_time": 0.6641941299438476, "loss": 0.5335447192192078, "lr": 1.2328270646036847e-05, "seqlen": 8192.0, "step_tflops": 33.25157015836619, "step_time": 0.6204547157287598} +{"epoch": 0, "iter": 21894, "iter_tflops": 15.584228698053838, "iter_time": 1.3238443756103517, "loss": 0.5184913873672485, "lr": 1.2281913545592938e-05, "seqlen": 8192.0, "step_tflops": 18.425495915957274, "step_time": 1.1197035675048828} +{"epoch": 0, "iter": 21895, "iter_tflops": 40.87076383105824, "iter_time": 0.5047885475158691, "loss": 0.4073674976825714, "lr": 1.2235583271564999e-05, "seqlen": 8192.0, "step_tflops": 43.879892149450555, "step_time": 0.47017192840576166} +{"epoch": 0, "iter": 21896, "iter_tflops": 46.17395682562525, "iter_time": 0.4468123359680176, "loss": 0.686802864074707, "lr": 1.2189280281214128e-05, "seqlen": 8192.0, "step_tflops": 49.62694074120846, "step_time": 0.41572366142272954} +{"epoch": 0, "iter": 21897, "iter_tflops": 30.632651133945735, "iter_time": 0.6005620498657226, "loss": 0.09244082868099213, "lr": 1.2143005031532152e-05, "seqlen": 7328.0, "step_tflops": 32.955742630005865, "step_time": 0.5582276802062989} +{"epoch": 0, "iter": 21898, "iter_tflops": 41.98306684147768, "iter_time": 0.43819589996337893, "loss": 0.06777200847864151, "lr": 1.2096757979237096e-05, "seqlen": 7328.0, "step_tflops": 45.62600504410787, "step_time": 0.40320882225036625} +{"epoch": 0, "iter": 21899, "iter_tflops": 40.440244664316594, "iter_time": 0.45491336441040037, "loss": 0.11758101731538773, "lr": 1.2050539580768702e-05, "seqlen": 7328.0, "step_tflops": 43.87808385651374, "step_time": 0.4192709922790528} +{"epoch": 0, "iter": 21900, "iter_tflops": 44.92466863852099, "iter_time": 0.4095034713745117, "loss": 0.08551960438489914, "lr": 1.2004350292283897e-05, "seqlen": 7328.0, "step_tflops": 48.846528463617126, "step_time": 0.37662467193603516} +{"epoch": 0, "iter": 21901, "iter_tflops": 28.56187885914956, "iter_time": 0.7223297042846679, "loss": 0.0369328111410141, "lr": 1.1958190569652318e-05, "seqlen": 8192.0, "step_tflops": 30.357363032426, "step_time": 0.6796075630187989} +{"epoch": 0, "iter": 21902, "iter_tflops": 15.810811821648954, "iter_time": 1.3048724975585937, "loss": 0.03627753630280495, "lr": 1.1912060868451784e-05, "seqlen": 8192.0, "step_tflops": 18.987382570139054, "step_time": 1.0865685901641848} +{"epoch": 0, "iter": 21903, "iter_tflops": 42.32862374250209, "iter_time": 0.48740288925170905, "loss": 0.08327318727970123, "lr": 1.1865961643963828e-05, "seqlen": 8192.0, "step_tflops": 46.64017250231182, "step_time": 0.4423459949493408} +{"epoch": 0, "iter": 21904, "iter_tflops": 49.03005892158124, "iter_time": 0.420784595489502, "loss": 0.059537049382925034, "lr": 1.1819893351169185e-05, "seqlen": 8192.0, "step_tflops": 53.960486241885626, "step_time": 0.3823370571136474} +{"epoch": 0, "iter": 21905, "iter_tflops": 25.695451515961636, "iter_time": 0.8029083862304687, "loss": 0.6235747337341309, "lr": 1.1773856444743304e-05, "seqlen": 8192.0, "step_tflops": 27.493754394196245, "step_time": 0.7503920059204101} +{"epoch": 0, "iter": 21906, "iter_tflops": 7.661984246574647, "iter_time": 2.69265673828125, "loss": 0.5465964078903198, "lr": 1.1727851379051866e-05, "seqlen": 8192.0, "step_tflops": 9.233109812640244, "step_time": 2.2344685516357425} +{"epoch": 0, "iter": 21907, "iter_tflops": 13.306016506333634, "iter_time": 1.5505086364746095, "loss": 0.44327473640441895, "lr": 1.1681878608146297e-05, "seqlen": 8192.0, "step_tflops": 15.554519624403035, "step_time": 1.3263729133605955} +{"epoch": 0, "iter": 21908, "iter_tflops": 43.57914898706294, "iter_time": 0.47341662216186525, "loss": 0.38522741198539734, "lr": 1.1635938585759284e-05, "seqlen": 8192.0, "step_tflops": 46.97337508203033, "step_time": 0.43920824241638184} +{"epoch": 0, "iter": 21909, "iter_tflops": 15.907346146147734, "iter_time": 0.9397294158935547, "loss": 0.25937575101852417, "lr": 1.1590031765300306e-05, "seqlen": 5984.0, "step_tflops": 16.668575918245484, "step_time": 0.8968133316040039} +{"epoch": 0, "iter": 21910, "iter_tflops": 10.017397688104037, "iter_time": 1.492263916015625, "loss": 0.2826977074146271, "lr": 1.1544158599851146e-05, "seqlen": 5984.0, "step_tflops": 11.736727390893682, "step_time": 1.2736600761413572} +{"epoch": 0, "iter": 21911, "iter_tflops": 21.611736456938644, "iter_time": 0.6916890335083008, "loss": 0.2021419107913971, "lr": 1.1498319542161424e-05, "seqlen": 5984.0, "step_tflops": 23.26231370327571, "step_time": 0.6426102447509766} +{"epoch": 0, "iter": 21912, "iter_tflops": 24.10771211303526, "iter_time": 0.6200754776000977, "loss": 0.2623547911643982, "lr": 1.1452515044644134e-05, "seqlen": 5984.0, "step_tflops": 25.82122325855828, "step_time": 0.5789269142150878} +{"epoch": 0, "iter": 21913, "iter_tflops": 20.528541018367196, "iter_time": 1.0049956054687499, "loss": 0.024787044152617455, "lr": 1.1406745559371177e-05, "seqlen": 8192.0, "step_tflops": 22.141023489249235, "step_time": 0.9318039665222168} +{"epoch": 0, "iter": 21914, "iter_tflops": 22.279037204645793, "iter_time": 0.9260316467285157, "loss": 0.008483690209686756, "lr": 1.1361011538068891e-05, "seqlen": 8192.0, "step_tflops": 28.43493637310127, "step_time": 0.7255544109344483} +{"epoch": 0, "iter": 21915, "iter_tflops": 48.46687538691527, "iter_time": 0.42567409896850583, "loss": 0.019445709884166718, "lr": 1.1315313432113608e-05, "seqlen": 8192.0, "step_tflops": 53.612300952885285, "step_time": 0.384820146560669} +{"epoch": 0, "iter": 21916, "iter_tflops": 46.08001791701344, "iter_time": 0.4477232093811035, "loss": 0.005303398240357637, "lr": 1.1269651692527181e-05, "seqlen": 8192.0, "step_tflops": 50.98739529063526, "step_time": 0.40463125038146974} +{"epoch": 0, "iter": 21917, "iter_tflops": 34.59257175486281, "iter_time": 0.5964024200439454, "loss": 0.04862479865550995, "lr": 1.1224026769972545e-05, "seqlen": 8192.0, "step_tflops": 38.28696078796618, "step_time": 0.5388543014526367} +{"epoch": 0, "iter": 21918, "iter_tflops": 35.66420947422942, "iter_time": 0.5784817276000978, "loss": 0.06770934164524078, "lr": 1.117843911474927e-05, "seqlen": 8192.0, "step_tflops": 40.52982150691279, "step_time": 0.5090348968505859} +{"epoch": 0, "iter": 21919, "iter_tflops": 41.05211814011534, "iter_time": 0.5025585632324219, "loss": 0.07493375241756439, "lr": 1.1132889176789103e-05, "seqlen": 8192.0, "step_tflops": 45.195688979759396, "step_time": 0.45648365974426264} +{"epoch": 0, "iter": 21920, "iter_tflops": 43.788939665954295, "iter_time": 0.4711485061645508, "loss": 0.18810562789440155, "lr": 1.108737740565155e-05, "seqlen": 8192.0, "step_tflops": 48.07557706683841, "step_time": 0.4291387596130371} +{"epoch": 0, "iter": 21921, "iter_tflops": 3.3437674842071696, "iter_time": 0.809938377380371, "loss": 0.20673055946826935, "lr": 1.104190425051941e-05, "seqlen": 1104.0, "step_tflops": 3.5855888583994715, "step_time": 0.7553140411376953} +{"epoch": 0, "iter": 21922, "iter_tflops": 4.5182903790013835, "iter_time": 0.5993960952758789, "loss": 0.05798257887363434, "lr": 1.099647016019437e-05, "seqlen": 1104.0, "step_tflops": 4.95041752383701, "step_time": 0.5470741806030274} +{"epoch": 0, "iter": 21923, "iter_tflops": 4.791322158211209, "iter_time": 0.5652397232055664, "loss": 0.2503613829612732, "lr": 1.0951075583092559e-05, "seqlen": 1104.0, "step_tflops": 5.214243979290714, "step_time": 0.5193937263488769} +{"epoch": 0, "iter": 21924, "iter_tflops": 5.276756138600079, "iter_time": 0.5132406234741211, "loss": 0.09049076586961746, "lr": 1.0905720967240124e-05, "seqlen": 1104.0, "step_tflops": 5.770391710477307, "step_time": 0.46933479499816894} +{"epoch": 0, "iter": 21925, "iter_tflops": 31.166364178911493, "iter_time": 0.6619666442871093, "loss": 0.310051828622818, "lr": 1.0860406760268818e-05, "seqlen": 8192.0, "step_tflops": 34.22745136394459, "step_time": 0.6027645263671876} +{"epoch": 0, "iter": 21926, "iter_tflops": 46.696561739339934, "iter_time": 0.4418118324279785, "loss": 0.34441667795181274, "lr": 1.0815133409411564e-05, "seqlen": 8192.0, "step_tflops": 50.79598023116545, "step_time": 0.40615602684020996} +{"epoch": 0, "iter": 21927, "iter_tflops": 51.34322656897816, "iter_time": 0.401826976776123, "loss": 0.4323149025440216, "lr": 1.076990136149806e-05, "seqlen": 8192.0, "step_tflops": 55.93970894288622, "step_time": 0.36880945396423337} +{"epoch": 0, "iter": 21928, "iter_tflops": 46.98799580263342, "iter_time": 0.4390715789794922, "loss": 0.40269431471824646, "lr": 1.0724711062950358e-05, "seqlen": 8192.0, "step_tflops": 50.89023733005726, "step_time": 0.4054037590026856} +{"epoch": 0, "iter": 21929, "iter_tflops": 33.52049774127492, "iter_time": 0.6154769439697265, "loss": 0.312374085187912, "lr": 1.0679562959778467e-05, "seqlen": 8192.0, "step_tflops": 35.75333020362317, "step_time": 0.5770397720336914} +{"epoch": 0, "iter": 21930, "iter_tflops": 15.700583843749035, "iter_time": 1.3140335235595704, "loss": 0.38420945405960083, "lr": 1.0634457497575926e-05, "seqlen": 8192.0, "step_tflops": 20.096772010255037, "step_time": 1.0265874290466308} +{"epoch": 0, "iter": 21931, "iter_tflops": 43.19333564283142, "iter_time": 0.4776452941894532, "loss": 0.36492210626602173, "lr": 1.0589395121515443e-05, "seqlen": 8192.0, "step_tflops": 46.45630730926089, "step_time": 0.444096715927124} +{"epoch": 0, "iter": 21932, "iter_tflops": 47.02431852017303, "iter_time": 0.4387324295043945, "loss": 0.44268712401390076, "lr": 1.0544376276344478e-05, "seqlen": 8192.0, "step_tflops": 50.860587204749045, "step_time": 0.4056400966644287} +{"epoch": 0, "iter": 21933, "iter_tflops": 30.184246582536165, "iter_time": 0.6835053329467774, "loss": 0.32317233085632324, "lr": 1.0499401406380859e-05, "seqlen": 8192.0, "step_tflops": 32.00409704812072, "step_time": 0.6446391372680663} +{"epoch": 0, "iter": 21934, "iter_tflops": 16.839125907371656, "iter_time": 1.2251879119873048, "loss": 0.30560293793678284, "lr": 1.0454470955508394e-05, "seqlen": 8192.0, "step_tflops": 19.7678818028228, "step_time": 1.0436673851013185} +{"epoch": 0, "iter": 21935, "iter_tflops": 47.77865505645626, "iter_time": 0.4318056564331055, "loss": 0.24804727733135223, "lr": 1.0409585367172491e-05, "seqlen": 8192.0, "step_tflops": 51.85634685682797, "step_time": 0.3978508853912353} +{"epoch": 0, "iter": 21936, "iter_tflops": 47.05981334241958, "iter_time": 0.4384015159606934, "loss": 0.23662728071212769, "lr": 1.036474508437579e-05, "seqlen": 8192.0, "step_tflops": 50.97220718418159, "step_time": 0.4047518177032471} +{"epoch": 0, "iter": 21937, "iter_tflops": 28.32355814433508, "iter_time": 0.7284075469970703, "loss": 0.17116418480873108, "lr": 1.0319950549673779e-05, "seqlen": 8192.0, "step_tflops": 29.957443910806923, "step_time": 0.6886800346374511} +{"epoch": 0, "iter": 21938, "iter_tflops": 12.48742632244311, "iter_time": 1.6521493682861328, "loss": 0.099245585501194, "lr": 1.0275202205170431e-05, "seqlen": 8192.0, "step_tflops": 15.587432913533101, "step_time": 1.323572240829468} +{"epoch": 0, "iter": 21939, "iter_tflops": 38.608242658233905, "iter_time": 0.5343701782226562, "loss": 0.16516225039958954, "lr": 1.023050049251384e-05, "seqlen": 8192.0, "step_tflops": 42.56472250803411, "step_time": 0.4846993541717529} +{"epoch": 0, "iter": 21940, "iter_tflops": 43.0901857098988, "iter_time": 0.47878868865966795, "loss": 0.1365039199590683, "lr": 1.018584585289186e-05, "seqlen": 8192.0, "step_tflops": 47.106332168186555, "step_time": 0.4379685821533203} +{"epoch": 0, "iter": 21941, "iter_tflops": 21.033060502906242, "iter_time": 0.980888801574707, "loss": 0.3461417555809021, "lr": 1.0141238727027761e-05, "seqlen": 8192.0, "step_tflops": 22.74509133282914, "step_time": 0.9070569648742676} +{"epoch": 0, "iter": 21942, "iter_tflops": 28.00746173349423, "iter_time": 0.736628463745117, "loss": 0.28170058131217957, "lr": 1.0096679555175865e-05, "seqlen": 8192.0, "step_tflops": 32.40574618383279, "step_time": 0.6366492347717285} +{"epoch": 0, "iter": 21943, "iter_tflops": 38.99438416572511, "iter_time": 0.5290785827636719, "loss": 0.4397730231285095, "lr": 1.0052168777117207e-05, "seqlen": 8192.0, "step_tflops": 42.846008358475665, "step_time": 0.4815172824859619} +{"epoch": 0, "iter": 21944, "iter_tflops": 38.24641224462837, "iter_time": 0.5394255905151366, "loss": 0.28586333990097046, "lr": 1.0007706832155202e-05, "seqlen": 8192.0, "step_tflops": 41.936064530319825, "step_time": 0.4919654178619384} +{"epoch": 0, "iter": 21945, "iter_tflops": 33.41930168981747, "iter_time": 0.6173406524658203, "loss": 0.5254848599433899, "lr": 9.963294159111292e-06, "seqlen": 8192.0, "step_tflops": 36.769564747654876, "step_time": 0.5610915889739989} +{"epoch": 0, "iter": 21946, "iter_tflops": 35.31421095570325, "iter_time": 0.5842150497436523, "loss": 0.4843408465385437, "lr": 9.91893119632063e-06, "seqlen": 8192.0, "step_tflops": 38.584744796283836, "step_time": 0.5346956062316894} +{"epoch": 0, "iter": 21947, "iter_tflops": 34.88576393664596, "iter_time": 0.5913900451660156, "loss": 0.4774199426174164, "lr": 9.874618381627751e-06, "seqlen": 8192.0, "step_tflops": 37.9416822238419, "step_time": 0.5437580070495605} +{"epoch": 0, "iter": 21948, "iter_tflops": 38.36111320086894, "iter_time": 0.5378126907348633, "loss": 0.5135754346847534, "lr": 9.830356152382247e-06, "seqlen": 8192.0, "step_tflops": 41.830779984321815, "step_time": 0.49320365333557126} +{"epoch": 0, "iter": 21949, "iter_tflops": 17.714563699009357, "iter_time": 1.1646402282714845, "loss": 0.32526013255119324, "lr": 9.786144945434447e-06, "seqlen": 8192.0, "step_tflops": 18.908511811529642, "step_time": 1.091100860595703} +{"epoch": 0, "iter": 21950, "iter_tflops": 17.25785534513976, "iter_time": 1.1954610290527343, "loss": 0.3519619405269623, "lr": 9.741985197131118e-06, "seqlen": 8192.0, "step_tflops": 21.551755811111306, "step_time": 0.9572813320159911} +{"epoch": 0, "iter": 21951, "iter_tflops": 48.02326790689001, "iter_time": 0.4296061973571777, "loss": 0.3124783933162689, "lr": 9.697877343311145e-06, "seqlen": 8192.0, "step_tflops": 52.32875518385239, "step_time": 0.3942592067718506} +{"epoch": 0, "iter": 21952, "iter_tflops": 45.80878862644683, "iter_time": 0.45037413406372073, "loss": 0.28607261180877686, "lr": 9.65382181930124e-06, "seqlen": 8192.0, "step_tflops": 49.34847144322309, "step_time": 0.4180695552825928} +{"epoch": 0, "iter": 21953, "iter_tflops": 16.712828543163553, "iter_time": 1.234446548461914, "loss": 0.06739167124032974, "lr": 9.609819059911643e-06, "seqlen": 8192.0, "step_tflops": 17.335534940339677, "step_time": 1.190104232788086} +{"epoch": 0, "iter": 21954, "iter_tflops": 15.455507363223163, "iter_time": 1.3348700256347654, "loss": 0.07935107499361038, "lr": 9.565869499431816e-06, "seqlen": 8192.0, "step_tflops": 19.95521938564131, "step_time": 1.0338695411682128} +{"epoch": 0, "iter": 21955, "iter_tflops": 49.413063147478866, "iter_time": 0.4175230636596679, "loss": 0.08936802297830582, "lr": 9.521973571626184e-06, "seqlen": 8192.0, "step_tflops": 53.67851826131897, "step_time": 0.3843454360961913} +{"epoch": 0, "iter": 21956, "iter_tflops": 46.344716927246495, "iter_time": 0.44516602706909175, "loss": 0.050260186195373535, "lr": 9.478131709729831e-06, "seqlen": 8192.0, "step_tflops": 50.27576986074834, "step_time": 0.4103585796356201} +{"epoch": 0, "iter": 21957, "iter_tflops": 20.271390389900603, "iter_time": 0.8202444229125977, "loss": 0.025229403749108315, "lr": 9.434344346444237e-06, "seqlen": 6640.0, "step_tflops": 21.26642250412857, "step_time": 0.7818661041259766} +{"epoch": 0, "iter": 21958, "iter_tflops": 14.403553008692125, "iter_time": 1.154402313232422, "loss": 0.041457757353782654, "lr": 9.390611913932996e-06, "seqlen": 6640.0, "step_tflops": 17.478133782172055, "step_time": 0.9513312530517577} +{"epoch": 0, "iter": 21959, "iter_tflops": 43.89094673907857, "iter_time": 0.37883655166625985, "loss": 0.020948780700564384, "lr": 9.346934843817563e-06, "seqlen": 6640.0, "step_tflops": 48.25213087294212, "step_time": 0.3445960750579834} +{"epoch": 0, "iter": 21960, "iter_tflops": 44.62563699379884, "iter_time": 0.37259960937500003, "loss": 0.02182229980826378, "lr": 9.303313567172985e-06, "seqlen": 6640.0, "step_tflops": 48.9918386055529, "step_time": 0.339393159866333} +{"epoch": 0, "iter": 21961, "iter_tflops": 34.79849147819764, "iter_time": 0.5928732147216796, "loss": 0.060073450207710266, "lr": 9.259748514523654e-06, "seqlen": 8192.0, "step_tflops": 37.160210853703894, "step_time": 0.5551931228637695} +{"epoch": 0, "iter": 21962, "iter_tflops": 27.291393745817423, "iter_time": 0.7559560241699218, "loss": 0.06926684826612473, "lr": 9.216240115839053e-06, "seqlen": 8192.0, "step_tflops": 34.86010062964322, "step_time": 0.5918254146575928} +{"epoch": 0, "iter": 21963, "iter_tflops": 44.54463910349811, "iter_time": 0.46315547561645504, "loss": 0.08876194059848785, "lr": 9.17278880052951e-06, "seqlen": 8192.0, "step_tflops": 49.146274462093224, "step_time": 0.419789571762085} +{"epoch": 0, "iter": 21964, "iter_tflops": 38.51180831156816, "iter_time": 0.535708251953125, "loss": 0.05302940309047699, "lr": 9.129394997441965e-06, "seqlen": 8192.0, "step_tflops": 42.23613445471076, "step_time": 0.48847021102905275} +{"epoch": 0, "iter": 21965, "iter_tflops": 13.49403814608256, "iter_time": 0.9448230514526368, "loss": 0.01825009658932686, "lr": 9.086059134855735e-06, "seqlen": 5120.0, "step_tflops": 14.472823908087095, "step_time": 0.8809254074096678} +{"epoch": 0, "iter": 21966, "iter_tflops": 9.514957192835139, "iter_time": 1.339940689086914, "loss": 0.030504778027534485, "lr": 9.042781640478292e-06, "seqlen": 5120.0, "step_tflops": 11.268932414628386, "step_time": 1.1313829765319825} +{"epoch": 0, "iter": 21967, "iter_tflops": 32.61541067327755, "iter_time": 0.39090350341796876, "loss": 0.025717325508594513, "lr": 8.999562941441031e-06, "seqlen": 5120.0, "step_tflops": 35.73100152641025, "step_time": 0.35681838607788086} +{"epoch": 0, "iter": 21968, "iter_tflops": 34.492245015680666, "iter_time": 0.3696331825256347, "loss": 0.02525504119694233, "lr": 8.956403464295062e-06, "seqlen": 5120.0, "step_tflops": 37.68645438835256, "step_time": 0.3383040008544922} +{"epoch": 0, "iter": 21969, "iter_tflops": 29.63813596360734, "iter_time": 0.6960995635986327, "loss": 0.1874627023935318, "lr": 8.913303635007002e-06, "seqlen": 8192.0, "step_tflops": 31.51609044491122, "step_time": 0.6546209640502929} +{"epoch": 0, "iter": 21970, "iter_tflops": 16.70674766921861, "iter_time": 1.2348958587646486, "loss": 0.1651538610458374, "lr": 8.870263878954767e-06, "seqlen": 8192.0, "step_tflops": 21.864578779293936, "step_time": 0.94358522605896} +{"epoch": 0, "iter": 21971, "iter_tflops": 37.95515744194213, "iter_time": 0.5435649566650391, "loss": 0.09413430094718933, "lr": 8.827284620923369e-06, "seqlen": 8192.0, "step_tflops": 41.731376871542544, "step_time": 0.4943784523010254} +{"epoch": 0, "iter": 21972, "iter_tflops": 42.8144218053593, "iter_time": 0.48187252426147464, "loss": 0.1108681783080101, "lr": 8.784366285100738e-06, "seqlen": 8192.0, "step_tflops": 46.63379484994616, "step_time": 0.44240649032592777} +{"epoch": 0, "iter": 21973, "iter_tflops": 22.722698375196334, "iter_time": 0.9079508590698242, "loss": 0.11857723444700241, "lr": 8.741509295073525e-06, "seqlen": 8192.0, "step_tflops": 24.380928356857527, "step_time": 0.846198028564453} +{"epoch": 0, "iter": 21974, "iter_tflops": 20.026542710443675, "iter_time": 1.0301874771118165, "loss": 0.1123066172003746, "lr": 8.698714073822906e-06, "seqlen": 8192.0, "step_tflops": 22.84833042264048, "step_time": 0.9029584712982177} +{"epoch": 0, "iter": 21975, "iter_tflops": 48.945853678524266, "iter_time": 0.42150850296020503, "loss": 0.11708036065101624, "lr": 8.655981043720454e-06, "seqlen": 8192.0, "step_tflops": 53.22794599144199, "step_time": 0.38759890365600586} +{"epoch": 0, "iter": 21976, "iter_tflops": 51.24592858619711, "iter_time": 0.4025899047851562, "loss": 0.15960924327373505, "lr": 8.61331062652391e-06, "seqlen": 8192.0, "step_tflops": 55.85432531868071, "step_time": 0.36937324714660646} +{"epoch": 0, "iter": 21977, "iter_tflops": 33.274513523063824, "iter_time": 0.6200269012451172, "loss": 0.06654013693332672, "lr": 8.570703243373076e-06, "seqlen": 8192.0, "step_tflops": 35.5206801317382, "step_time": 0.5808192138671875} +{"epoch": 0, "iter": 21978, "iter_tflops": 10.515802054138252, "iter_time": 1.9619134521484376, "loss": 0.0442240945994854, "lr": 8.528159314785614e-06, "seqlen": 8192.0, "step_tflops": 11.779682406693208, "step_time": 1.7514133911132812} +{"epoch": 0, "iter": 21979, "iter_tflops": 21.927221809461592, "iter_time": 0.940889533996582, "loss": 0.06439007818698883, "lr": 8.48567926065293e-06, "seqlen": 8192.0, "step_tflops": 26.98516454954008, "step_time": 0.7645346565246582} +{"epoch": 0, "iter": 21980, "iter_tflops": 38.86745905676506, "iter_time": 0.5308063354492187, "loss": 0.02588862180709839, "lr": 8.44326350023601e-06, "seqlen": 8192.0, "step_tflops": 42.74195385282798, "step_time": 0.48268952751159666} +{"epoch": 0, "iter": 21981, "iter_tflops": 17.323283836927036, "iter_time": 0.9195950241088867, "loss": 0.3004394471645355, "lr": 8.400912452161272e-06, "seqlen": 6368.0, "step_tflops": 18.38265537956032, "step_time": 0.8665998077392578} +{"epoch": 0, "iter": 21982, "iter_tflops": 17.585227412016696, "iter_time": 0.9058970489501954, "loss": 0.16204774379730225, "lr": 8.35862653441648e-06, "seqlen": 6368.0, "step_tflops": 21.62458755027547, "step_time": 0.7366802062988281} +{"epoch": 0, "iter": 21983, "iter_tflops": 28.99864183485171, "iter_time": 0.5493500595092773, "loss": 0.20043782889842987, "lr": 8.316406164346556e-06, "seqlen": 6368.0, "step_tflops": 30.88908244266595, "step_time": 0.5157293243408203} +{"epoch": 0, "iter": 21984, "iter_tflops": 29.57029424012905, "iter_time": 0.5387300338745118, "loss": 0.2550521194934845, "lr": 8.274251758649519e-06, "seqlen": 6368.0, "step_tflops": 31.437587699794523, "step_time": 0.506731170654297} +{"epoch": 0, "iter": 21985, "iter_tflops": 33.72779931243225, "iter_time": 0.6116940307617187, "loss": 0.6209157109260559, "lr": 8.232163733372323e-06, "seqlen": 8192.0, "step_tflops": 36.02142982206872, "step_time": 0.5727449913024902} +{"epoch": 0, "iter": 21986, "iter_tflops": 22.942811412075642, "iter_time": 0.8992399902343751, "loss": 0.5584537386894226, "lr": 8.190142503906798e-06, "seqlen": 8192.0, "step_tflops": 28.300778666467128, "step_time": 0.7289938468933106} +{"epoch": 0, "iter": 21987, "iter_tflops": 42.595796302498535, "iter_time": 0.48434576416015623, "loss": 0.5330014824867249, "lr": 8.148188484985505e-06, "seqlen": 8192.0, "step_tflops": 45.78241144940075, "step_time": 0.45063361358642573} +{"epoch": 0, "iter": 21988, "iter_tflops": 42.557995712857675, "iter_time": 0.4847759666442872, "loss": 0.512713611125946, "lr": 8.106302090677683e-06, "seqlen": 8192.0, "step_tflops": 45.791223082095705, "step_time": 0.4505468978881836} +{"epoch": 0, "iter": 21989, "iter_tflops": 38.97312432649156, "iter_time": 0.5293671951293946, "loss": 0.2542341947555542, "lr": 8.064483734385127e-06, "seqlen": 8192.0, "step_tflops": 41.94397501876581, "step_time": 0.49187263488769534} +{"epoch": 0, "iter": 21990, "iter_tflops": 33.71540035170424, "iter_time": 0.6119189834594727, "loss": 0.2794913351535797, "lr": 8.022733828838124e-06, "seqlen": 8192.0, "step_tflops": 42.472135438857954, "step_time": 0.485755973815918} +{"epoch": 0, "iter": 21991, "iter_tflops": 47.0168276210851, "iter_time": 0.43880233001708985, "loss": 0.2774064242839813, "lr": 7.981052786091401e-06, "seqlen": 8192.0, "step_tflops": 50.44270649310437, "step_time": 0.4090005264282226} +{"epoch": 0, "iter": 21992, "iter_tflops": 48.85699213393654, "iter_time": 0.42227514648437503, "loss": 0.33020028471946716, "lr": 7.939441017520012e-06, "seqlen": 8192.0, "step_tflops": 53.113485122430504, "step_time": 0.3884341888427735} +{"epoch": 0, "iter": 21993, "iter_tflops": 44.22385239601116, "iter_time": 0.46651506805419923, "loss": 0.03758388012647629, "lr": 7.897898933815319e-06, "seqlen": 8192.0, "step_tflops": 48.255077297462485, "step_time": 0.42754244041442874} +{"epoch": 0, "iter": 21994, "iter_tflops": 50.43835170166607, "iter_time": 0.40903583908081054, "loss": 0.042192310094833374, "lr": 7.856426944980911e-06, "seqlen": 8192.0, "step_tflops": 55.20857715284742, "step_time": 0.3736936283111572} +{"epoch": 0, "iter": 21995, "iter_tflops": 50.36615786816208, "iter_time": 0.409622142791748, "loss": 0.07471662759780884, "lr": 7.815025460328585e-06, "seqlen": 8192.0, "step_tflops": 54.888079463645745, "step_time": 0.3758756675720215} +{"epoch": 0, "iter": 21996, "iter_tflops": 52.99566146142748, "iter_time": 0.3892977828979492, "loss": 0.05183584988117218, "lr": 7.773694888474268e-06, "seqlen": 8192.0, "step_tflops": 57.3052764013448, "step_time": 0.36002083587646483} +{"epoch": 0, "iter": 21997, "iter_tflops": 37.14877987447767, "iter_time": 0.5553639602661133, "loss": 0.3446683883666992, "lr": 7.73243563733403e-06, "seqlen": 8192.0, "step_tflops": 39.9883902734806, "step_time": 0.5159270820617675} +{"epoch": 0, "iter": 21998, "iter_tflops": 12.855853598227338, "iter_time": 1.6048015289306639, "loss": 0.3403876721858978, "lr": 7.691248114120012e-06, "seqlen": 8192.0, "step_tflops": 15.703798584966336, "step_time": 1.3137645263671875} +{"epoch": 0, "iter": 21999, "iter_tflops": 21.27523872805295, "iter_time": 0.9697232437133789, "loss": 0.43757364153862, "lr": 7.650132725336451e-06, "seqlen": 8192.0, "step_tflops": 25.55202410270184, "step_time": 0.8074152336120606} +{"epoch": 0, "iter": 22000, "iter_tflops": 46.982194663914164, "iter_time": 0.4391257934570313, "loss": 0.4131377339363098, "lr": 7.609089876775629e-06, "seqlen": 8192.0, "step_tflops": 50.91792703067513, "step_time": 0.40518329620361326} +{"epoch": 0, "iter": 22001, "iter_tflops": 16.76100429176661, "iter_time": 0.8772490234374999, "loss": 0.23427261412143707, "lr": 7.568119973513886e-06, "seqlen": 5888.0, "step_tflops": 17.56592730328178, "step_time": 0.8370508651733398} +{"epoch": 0, "iter": 22002, "iter_tflops": 13.520278191292629, "iter_time": 1.0875201263427734, "loss": 0.2394682914018631, "lr": 7.527223419907638e-06, "seqlen": 5888.0, "step_tflops": 15.104885343638983, "step_time": 0.9734317283630372} +{"epoch": 0, "iter": 22003, "iter_tflops": 25.688861768107074, "iter_time": 0.5723715896606446, "loss": 0.2223762720823288, "lr": 7.486400619589349e-06, "seqlen": 5888.0, "step_tflops": 27.399849824751502, "step_time": 0.536629753112793} +{"epoch": 0, "iter": 22004, "iter_tflops": 27.135135296414937, "iter_time": 0.5418647994995117, "loss": 0.16471326351165771, "lr": 7.4456519754635884e-06, "seqlen": 5888.0, "step_tflops": 28.860629310214136, "step_time": 0.50946826171875} +{"epoch": 0, "iter": 22005, "iter_tflops": 24.57929415392523, "iter_time": 0.8393688354492187, "loss": 0.5442801117897034, "lr": 7.404977889703009e-06, "seqlen": 8192.0, "step_tflops": 25.842034282546106, "step_time": 0.7983540802001953} +{"epoch": 0, "iter": 22006, "iter_tflops": 13.431770755547483, "iter_time": 1.5359920806884766, "loss": 0.4609856605529785, "lr": 7.3643787637444305e-06, "seqlen": 8192.0, "step_tflops": 16.523361529032957, "step_time": 1.2486014709472655} +{"epoch": 0, "iter": 22007, "iter_tflops": 42.03914529734708, "iter_time": 0.49075910949707036, "loss": 0.42670804262161255, "lr": 7.323854998284823e-06, "seqlen": 8192.0, "step_tflops": 45.49807222800757, "step_time": 0.4534498386383056} +{"epoch": 0, "iter": 22008, "iter_tflops": 41.548480114136005, "iter_time": 0.4965547103881836, "loss": 0.4624568819999695, "lr": 7.283406993277402e-06, "seqlen": 8192.0, "step_tflops": 44.64958638790606, "step_time": 0.4620668449401855} +{"epoch": 0, "iter": 22009, "iter_tflops": 38.32976467729941, "iter_time": 0.5382525482177735, "loss": 0.3128885328769684, "lr": 7.243035147927644e-06, "seqlen": 8192.0, "step_tflops": 41.47430426622381, "step_time": 0.49744278717041024} +{"epoch": 0, "iter": 22010, "iter_tflops": 27.25622145268945, "iter_time": 0.7569315338134766, "loss": 0.24973846971988678, "lr": 7.202739860689357e-06, "seqlen": 8192.0, "step_tflops": 34.34320896942187, "step_time": 0.6007328414916991} +{"epoch": 0, "iter": 22011, "iter_tflops": 47.64451665926662, "iter_time": 0.43302136230468746, "loss": 0.21247947216033936, "lr": 7.1625215292607685e-06, "seqlen": 8192.0, "step_tflops": 52.20110041939735, "step_time": 0.39522334480285637} +{"epoch": 0, "iter": 22012, "iter_tflops": 43.799292588370015, "iter_time": 0.47103713989257817, "loss": 0.1912379264831543, "lr": 7.122380550580563e-06, "seqlen": 8192.0, "step_tflops": 47.12125912711771, "step_time": 0.43782984352111814} +{"epoch": 0, "iter": 22013, "iter_tflops": 21.663628952891685, "iter_time": 0.7713202209472656, "loss": 0.22248956561088562, "lr": 7.082317320824005e-06, "seqlen": 6672.0, "step_tflops": 23.232314924386532, "step_time": 0.7192393493652344} +{"epoch": 0, "iter": 22014, "iter_tflops": 12.011575356860074, "iter_time": 1.3911243591308593, "loss": 0.19477222859859467, "lr": 7.042332235398986e-06, "seqlen": 6672.0, "step_tflops": 14.71726927621802, "step_time": 1.1353733329772948} +{"epoch": 0, "iter": 22015, "iter_tflops": 24.274673608477958, "iter_time": 0.6883550872802734, "loss": 0.2635524570941925, "lr": 7.002425688942171e-06, "seqlen": 6672.0, "step_tflops": 26.119294032712983, "step_time": 0.6397414512634279} +{"epoch": 0, "iter": 22016, "iter_tflops": 25.33389243973509, "iter_time": 0.6595747222900391, "loss": 0.23239171504974365, "lr": 6.962598075315047e-06, "seqlen": 6672.0, "step_tflops": 27.277223208673202, "step_time": 0.6125841674804687} +{"epoch": 0, "iter": 22017, "iter_tflops": 18.323791619986398, "iter_time": 1.1259183654785156, "loss": 0.22692309319972992, "lr": 6.922849787600097e-06, "seqlen": 8192.0, "step_tflops": 19.626869728407833, "step_time": 1.0511657638549803} +{"epoch": 0, "iter": 22018, "iter_tflops": 31.154907297890976, "iter_time": 0.662210075378418, "loss": 0.25633931159973145, "lr": 6.883181218096863e-06, "seqlen": 8192.0, "step_tflops": 35.201161004688736, "step_time": 0.5860912799835205} +{"epoch": 0, "iter": 22019, "iter_tflops": 48.96520606200749, "iter_time": 0.42134191131591797, "loss": 0.3132563531398773, "lr": 6.843592758318111e-06, "seqlen": 8192.0, "step_tflops": 53.12370807342896, "step_time": 0.3883594398498535} +{"epoch": 0, "iter": 22020, "iter_tflops": 48.344701243821426, "iter_time": 0.42674983978271486, "loss": 0.23137448728084564, "lr": 6.804084798985965e-06, "seqlen": 8192.0, "step_tflops": 52.290663424608816, "step_time": 0.39454640960693355} +{"epoch": 0, "iter": 22021, "iter_tflops": 28.012273219424635, "iter_time": 0.736501937866211, "loss": 0.16589997708797455, "lr": 6.764657730028022e-06, "seqlen": 8192.0, "step_tflops": 29.533738225004125, "step_time": 0.6985601806640624} +{"epoch": 0, "iter": 22022, "iter_tflops": 9.85953434037849, "iter_time": 2.092501815795899, "loss": 0.20853176712989807, "lr": 6.7253119405735475e-06, "seqlen": 8192.0, "step_tflops": 11.613121331072806, "step_time": 1.776533020019531} +{"epoch": 0, "iter": 22023, "iter_tflops": 13.229413752784962, "iter_time": 1.5594866027832033, "loss": 0.24930007755756378, "lr": 6.686047818949586e-06, "seqlen": 8192.0, "step_tflops": 16.195367195758617, "step_time": 1.2738885917663576} +{"epoch": 0, "iter": 22024, "iter_tflops": 34.304791073838636, "iter_time": 0.6014056015014649, "loss": 0.18041940033435822, "lr": 6.646865752677186e-06, "seqlen": 8192.0, "step_tflops": 39.333862220476846, "step_time": 0.5245122737884522} +{"epoch": 0, "iter": 22025, "iter_tflops": 14.980354835094769, "iter_time": 1.098996078491211, "loss": 0.11852942407131195, "lr": 6.6077661284674966e-06, "seqlen": 6576.0, "step_tflops": 15.599686960607784, "step_time": 1.0553642044067382} +{"epoch": 0, "iter": 22026, "iter_tflops": 10.176370625757173, "iter_time": 1.6178018493652344, "loss": 0.325537770986557, "lr": 6.568749332218045e-06, "seqlen": 6576.0, "step_tflops": 12.840119450384433, "step_time": 1.2821805343627928} +{"epoch": 0, "iter": 22027, "iter_tflops": 24.799725975567128, "iter_time": 0.6638521423339845, "loss": 0.16355052590370178, "lr": 6.529815749008846e-06, "seqlen": 6576.0, "step_tflops": 26.710109321915652, "step_time": 0.6163715400695802} +{"epoch": 0, "iter": 22028, "iter_tflops": 23.894898303917945, "iter_time": 0.688990219116211, "loss": 0.2916499972343445, "lr": 6.490965763098655e-06, "seqlen": 6576.0, "step_tflops": 25.77899592226546, "step_time": 0.6386343078613281} +{"epoch": 0, "iter": 22029, "iter_tflops": 7.992977320119418, "iter_time": 1.049985580444336, "loss": 0.03966858983039856, "lr": 6.452199757921144e-06, "seqlen": 3392.0, "step_tflops": 8.617993844004502, "step_time": 0.9738358001708984} +{"epoch": 0, "iter": 22030, "iter_tflops": 9.552862120791865, "iter_time": 0.878533660888672, "loss": 0.03719577193260193, "lr": 6.41351811608114e-06, "seqlen": 3392.0, "step_tflops": 11.896063631099556, "step_time": 0.7054863853454589} +{"epoch": 0, "iter": 22031, "iter_tflops": 17.688578363806297, "iter_time": 0.47445932388305667, "loss": 0.045877229422330856, "lr": 6.374921219350826e-06, "seqlen": 3392.0, "step_tflops": 19.48383231569697, "step_time": 0.43074230957031245} +{"epoch": 0, "iter": 22032, "iter_tflops": 16.580652467788045, "iter_time": 0.5061628875732421, "loss": 0.0613621324300766, "lr": 6.336409448665989e-06, "seqlen": 3392.0, "step_tflops": 18.198165008368758, "step_time": 0.4611734714508057} +{"epoch": 0, "iter": 22033, "iter_tflops": 24.942336808003375, "iter_time": 0.8271515884399414, "loss": 0.09783057123422623, "lr": 6.297983184122264e-06, "seqlen": 8192.0, "step_tflops": 26.926553284166836, "step_time": 0.7661988258361816} +{"epoch": 0, "iter": 22034, "iter_tflops": 14.85072192548782, "iter_time": 1.389231689453125, "loss": 0.07550162076950073, "lr": 6.259642804971361e-06, "seqlen": 8192.0, "step_tflops": 19.093679496502777, "step_time": 1.0805195255279543} +{"epoch": 0, "iter": 22035, "iter_tflops": 38.757527352395414, "iter_time": 0.5323119125366211, "loss": 0.04284808784723282, "lr": 6.221388689617353e-06, "seqlen": 8192.0, "step_tflops": 42.65711641700927, "step_time": 0.4836495113372803} +{"epoch": 0, "iter": 22036, "iter_tflops": 43.80216201622147, "iter_time": 0.47100628280639645, "loss": 0.09973066300153732, "lr": 6.1832212156129045e-06, "seqlen": 8192.0, "step_tflops": 48.17954265512026, "step_time": 0.42821273040771485} +{"epoch": 0, "iter": 22037, "iter_tflops": 31.790244856005934, "iter_time": 0.6489756088256836, "loss": 0.1861777901649475, "lr": 6.145140759655586e-06, "seqlen": 8192.0, "step_tflops": 35.3244777607967, "step_time": 0.5840452518463135} +{"epoch": 0, "iter": 22038, "iter_tflops": 35.33397833957708, "iter_time": 0.5838882141113281, "loss": 0.20673413574695587, "lr": 6.1071476975841114e-06, "seqlen": 8192.0, "step_tflops": 38.69907690496228, "step_time": 0.5331159076690674} +{"epoch": 0, "iter": 22039, "iter_tflops": 35.22244172153487, "iter_time": 0.5857371749877929, "loss": 0.23366297781467438, "lr": 6.0692424043746794e-06, "seqlen": 8192.0, "step_tflops": 38.46671713697298, "step_time": 0.5363362159729004} +{"epoch": 0, "iter": 22040, "iter_tflops": 40.12612766404596, "iter_time": 0.5141561050415039, "loss": 0.2839832007884979, "lr": 6.031425254137223e-06, "seqlen": 8192.0, "step_tflops": 43.866548118679525, "step_time": 0.47031495285034175} +{"epoch": 0, "iter": 22041, "iter_tflops": 15.078451963889336, "iter_time": 1.3682501068115234, "loss": 0.3746652603149414, "lr": 5.993696620111741e-06, "seqlen": 8192.0, "step_tflops": 16.024617871212225, "step_time": 1.2874624328613282} +{"epoch": 0, "iter": 22042, "iter_tflops": 21.153428024061174, "iter_time": 0.9753073348999023, "loss": 0.6184440851211548, "lr": 5.956056874664633e-06, "seqlen": 8192.0, "step_tflops": 26.080882201985325, "step_time": 0.7910427780151368} +{"epoch": 0, "iter": 22043, "iter_tflops": 35.87103665963458, "iter_time": 0.5751462860107421, "loss": 0.5842316746711731, "lr": 5.91850638928498e-06, "seqlen": 8192.0, "step_tflops": 39.12219770200301, "step_time": 0.5273500652313232} +{"epoch": 0, "iter": 22044, "iter_tflops": 36.33185564101648, "iter_time": 0.5678513565063477, "loss": 0.474128782749176, "lr": 5.881045534580923e-06, "seqlen": 8192.0, "step_tflops": 39.54862238340259, "step_time": 0.5216640243530273} +{"epoch": 0, "iter": 22045, "iter_tflops": 21.87203231216776, "iter_time": 0.943263671875, "loss": 0.39139240980148315, "lr": 5.8436746802759634e-06, "seqlen": 8192.0, "step_tflops": 23.232453115546413, "step_time": 0.8880290603637694} +{"epoch": 0, "iter": 22046, "iter_tflops": 9.22211976973171, "iter_time": 2.237131378173828, "loss": 0.3776143789291382, "lr": 5.806394195205357e-06, "seqlen": 8192.0, "step_tflops": 11.386171364722605, "step_time": 1.811943000793457} +{"epoch": 0, "iter": 22047, "iter_tflops": 11.225825244688686, "iter_time": 1.8378242187499998, "loss": 0.48390594124794006, "lr": 5.7692044473124276e-06, "seqlen": 8192.0, "step_tflops": 13.993467263888027, "step_time": 1.4743374977111816} +{"epoch": 0, "iter": 22048, "iter_tflops": 42.680522468612644, "iter_time": 0.48338427734375006, "loss": 0.36372262239456177, "lr": 5.732105803644987e-06, "seqlen": 8192.0, "step_tflops": 52.13761663568976, "step_time": 0.39570457649230956} +{"epoch": 0, "iter": 22049, "iter_tflops": 24.175001150034728, "iter_time": 0.6996867523193359, "loss": 0.19677132368087769, "lr": 5.695098630351665e-06, "seqlen": 6752.0, "step_tflops": 25.627069078220675, "step_time": 0.6600414581298829} +{"epoch": 0, "iter": 22050, "iter_tflops": 10.662808767032253, "iter_time": 1.5863482513427734, "loss": 0.18215227127075195, "lr": 5.658183292678315e-06, "seqlen": 6752.0, "step_tflops": 13.440317515126619, "step_time": 1.2585214614868163} +{"epoch": 0, "iter": 22051, "iter_tflops": 24.42470432951305, "iter_time": 0.6925335845947265, "loss": 0.22782129049301147, "lr": 5.621360154964428e-06, "seqlen": 6752.0, "step_tflops": 26.20326390160077, "step_time": 0.6455275230407715} +{"epoch": 0, "iter": 22052, "iter_tflops": 26.107813978472215, "iter_time": 0.6478875656127929, "loss": 0.1727885901927948, "lr": 5.584629580639495e-06, "seqlen": 6752.0, "step_tflops": 27.978357940450532, "step_time": 0.6045718650817872} +{"epoch": 0, "iter": 22053, "iter_tflops": 16.26114443043925, "iter_time": 1.2687356414794921, "loss": 0.08435360342264175, "lr": 5.547991932219466e-06, "seqlen": 8192.0, "step_tflops": 17.52830122070969, "step_time": 1.177016143798828} +{"epoch": 0, "iter": 22054, "iter_tflops": 17.745238843077242, "iter_time": 1.1626269836425782, "loss": 0.07677267491817474, "lr": 5.511447571303134e-06, "seqlen": 8192.0, "step_tflops": 23.633998670260016, "step_time": 0.8729412994384766} +{"epoch": 0, "iter": 22055, "iter_tflops": 39.184236257132675, "iter_time": 0.5265151367187499, "loss": 0.12674997746944427, "lr": 5.474996858568593e-06, "seqlen": 8192.0, "step_tflops": 42.79495182867865, "step_time": 0.48209175682067873} +{"epoch": 0, "iter": 22056, "iter_tflops": 39.44547538171463, "iter_time": 0.5230281372070311, "loss": 0.06918919831514359, "lr": 5.438640153769654e-06, "seqlen": 8192.0, "step_tflops": 43.47569135122973, "step_time": 0.47454319572448733} +{"epoch": 0, "iter": 22057, "iter_tflops": 21.2630884935785, "iter_time": 0.9702773666381835, "loss": 0.2656157612800598, "lr": 5.402377815732326e-06, "seqlen": 8192.0, "step_tflops": 22.620541409637408, "step_time": 0.91205126953125} +{"epoch": 0, "iter": 22058, "iter_tflops": 12.627992480255072, "iter_time": 1.633758773803711, "loss": 0.2709871828556061, "lr": 5.366210202351234e-06, "seqlen": 8192.0, "step_tflops": 14.567546224904001, "step_time": 1.4162366943359375} +{"epoch": 0, "iter": 22059, "iter_tflops": 10.345134873175958, "iter_time": 1.9942798004150388, "loss": 0.2209307998418808, "lr": 5.330137670586132e-06, "seqlen": 8192.0, "step_tflops": 12.95092304184385, "step_time": 1.5930210876464845} +{"epoch": 0, "iter": 22060, "iter_tflops": 22.942383767410558, "iter_time": 0.8992567520141601, "loss": 0.1498262882232666, "lr": 5.29416057645834e-06, "seqlen": 8192.0, "step_tflops": 36.1341506859341, "step_time": 0.5709583072662353} +{"epoch": 0, "iter": 22061, "iter_tflops": 15.68280528231751, "iter_time": 0.9401634674072266, "loss": 0.2454102337360382, "lr": 5.258279275047247e-06, "seqlen": 5904.0, "step_tflops": 16.962089860890234, "step_time": 0.8692561302185058} +{"epoch": 0, "iter": 22062, "iter_tflops": 24.103492918518537, "iter_time": 0.6117121963500977, "loss": 0.13543546199798584, "lr": 5.222494120486821e-06, "seqlen": 5904.0, "step_tflops": 25.668219353162623, "step_time": 0.5744224166870118} +{"epoch": 0, "iter": 22063, "iter_tflops": 26.585302116265016, "iter_time": 0.5546072235107422, "loss": 0.24706000089645386, "lr": 5.186805465962085e-06, "seqlen": 5904.0, "step_tflops": 28.383060617953273, "step_time": 0.5194788818359375} +{"epoch": 0, "iter": 22064, "iter_tflops": 27.505106591923703, "iter_time": 0.5360604782104492, "loss": 0.2197035700082779, "lr": 5.1512136637056556e-06, "seqlen": 5904.0, "step_tflops": 29.255196194329983, "step_time": 0.5039925384521484} +{"epoch": 0, "iter": 22065, "iter_tflops": 27.86568584275914, "iter_time": 0.7403763046264648, "loss": 0.0023332065902650356, "lr": 5.115719064994246e-06, "seqlen": 8192.0, "step_tflops": 29.50210974685845, "step_time": 0.6993090896606444} +{"epoch": 0, "iter": 22066, "iter_tflops": 12.07161609071635, "iter_time": 1.709058120727539, "loss": 0.002771724248304963, "lr": 5.080322020145225e-06, "seqlen": 8192.0, "step_tflops": 15.969680084965498, "step_time": 1.291891471862793} +{"epoch": 0, "iter": 22067, "iter_tflops": 55.482519478352856, "iter_time": 0.37184853363037107, "loss": 0.008892664685845375, "lr": 5.045022878513122e-06, "seqlen": 8192.0, "step_tflops": 61.280715094949635, "step_time": 0.33666535186767577} +{"epoch": 0, "iter": 22068, "iter_tflops": 60.09530655333963, "iter_time": 0.34330623626708984, "loss": 0.008418370969593525, "lr": 5.009821988486227e-06, "seqlen": 8192.0, "step_tflops": 65.97935749445395, "step_time": 0.31269012451171874} +{"epoch": 0, "iter": 22069, "iter_tflops": 30.410371169654983, "iter_time": 0.6784229431152344, "loss": 0.5430469512939453, "lr": 4.9747196974831075e-06, "seqlen": 8192.0, "step_tflops": 32.19178588787301, "step_time": 0.6408806762695313} +{"epoch": 0, "iter": 22070, "iter_tflops": 12.53648900290653, "iter_time": 1.6456835327148438, "loss": 0.5406408309936523, "lr": 4.9397163519492e-06, "seqlen": 8192.0, "step_tflops": 15.96082014194667, "step_time": 1.2926086082458497} +{"epoch": 0, "iter": 22071, "iter_tflops": 37.680202427816965, "iter_time": 0.5475313873291017, "loss": 0.42922064661979675, "lr": 4.904812297353403e-06, "seqlen": 8192.0, "step_tflops": 41.07593274232897, "step_time": 0.5022671947479248} +{"epoch": 0, "iter": 22072, "iter_tflops": 37.38240131478791, "iter_time": 0.5518932113647461, "loss": 0.47978100180625916, "lr": 4.870007878184633e-06, "seqlen": 8192.0, "step_tflops": 40.72789165820101, "step_time": 0.5065593299865723} +{"epoch": 0, "iter": 22073, "iter_tflops": 17.982146562210147, "iter_time": 1.1473098297119142, "loss": 0.24114291369915009, "lr": 4.835303437948468e-06, "seqlen": 8192.0, "step_tflops": 19.313451301277016, "step_time": 1.0682240676879884} +{"epoch": 0, "iter": 22074, "iter_tflops": 21.668391891583397, "iter_time": 0.9521285018920899, "loss": 0.2741016745567322, "lr": 4.800699319163711e-06, "seqlen": 8192.0, "step_tflops": 29.507779928032093, "step_time": 0.6991747112274169} +{"epoch": 0, "iter": 22075, "iter_tflops": 46.262418081795644, "iter_time": 0.4459579582214356, "loss": 0.23028287291526794, "lr": 4.766195863359055e-06, "seqlen": 8192.0, "step_tflops": 50.110436412227614, "step_time": 0.4117125091552734} +{"epoch": 0, "iter": 22076, "iter_tflops": 48.87735429540751, "iter_time": 0.42209922790527343, "loss": 0.20771320164203644, "lr": 4.731793411069669e-06, "seqlen": 8192.0, "step_tflops": 53.125331482155055, "step_time": 0.38834757232666023} +{"epoch": 0, "iter": 22077, "iter_tflops": 34.089258412299564, "iter_time": 0.46371004104614255, "loss": 0.037305451929569244, "lr": 4.697492301833878e-06, "seqlen": 6320.0, "step_tflops": 37.328745657884575, "step_time": 0.42346805763244627} +{"epoch": 0, "iter": 22078, "iter_tflops": 36.79157697226867, "iter_time": 0.42965082550048833, "loss": 0.039407409727573395, "lr": 4.663292874189776e-06, "seqlen": 6320.0, "step_tflops": 40.3672335520718, "step_time": 0.39159313201904294} +{"epoch": 0, "iter": 22079, "iter_tflops": 42.06631941203983, "iter_time": 0.37577643203735356, "loss": 0.007331247441470623, "lr": 4.629195465671905e-06, "seqlen": 6320.0, "step_tflops": 46.183383869255934, "step_time": 0.34227746200561526} +{"epoch": 0, "iter": 22080, "iter_tflops": 43.246339625726826, "iter_time": 0.36552299118042, "loss": 0.010270035825669765, "lr": 4.595200412807928e-06, "seqlen": 6320.0, "step_tflops": 47.045205896555665, "step_time": 0.3360072746276856} +{"epoch": 0, "iter": 22081, "iter_tflops": 35.195489168307496, "iter_time": 0.5861857299804687, "loss": 0.0028564324602484703, "lr": 4.561308051115286e-06, "seqlen": 8192.0, "step_tflops": 37.980090254466354, "step_time": 0.543208122253418} +{"epoch": 0, "iter": 22082, "iter_tflops": 40.58158018418672, "iter_time": 0.5083856620788575, "loss": 0.0012893072562292218, "lr": 4.5275187150979124e-06, "seqlen": 8192.0, "step_tflops": 44.885281261787256, "step_time": 0.45964050865173334} +{"epoch": 0, "iter": 22083, "iter_tflops": 45.79958276998854, "iter_time": 0.4504646606445313, "loss": 0.01093425415456295, "lr": 4.493832738242905e-06, "seqlen": 8192.0, "step_tflops": 50.4845445626276, "step_time": 0.40866157531738284} +{"epoch": 0, "iter": 22084, "iter_tflops": 51.722311945804385, "iter_time": 0.39888188934326174, "loss": 0.009638169780373573, "lr": 4.460250453017264e-06, "seqlen": 8192.0, "step_tflops": 57.251850429353794, "step_time": 0.36035679817199706} +{"epoch": 0, "iter": 22085, "iter_tflops": 33.25608660815147, "iter_time": 0.6203704528808593, "loss": 0.43013322353363037, "lr": 4.426772190864578e-06, "seqlen": 8192.0, "step_tflops": 36.331093184814456, "step_time": 0.5678632736206055} +{"epoch": 0, "iter": 22086, "iter_tflops": 31.919941182007456, "iter_time": 0.6463387069702148, "loss": 0.6804568767547607, "lr": 4.393398282201788e-06, "seqlen": 8192.0, "step_tflops": 34.97946203957446, "step_time": 0.5898059120178223} +{"epoch": 0, "iter": 22087, "iter_tflops": 37.595042358000576, "iter_time": 0.5487716522216797, "loss": 0.5527966618537903, "lr": 4.360129056415895e-06, "seqlen": 8192.0, "step_tflops": 40.78104046803957, "step_time": 0.5058991451263428} +{"epoch": 0, "iter": 22088, "iter_tflops": 32.30975861691373, "iter_time": 0.6385406265258788, "loss": 0.42594441771507263, "lr": 4.32696484186072e-06, "seqlen": 8192.0, "step_tflops": 35.1021004695253, "step_time": 0.5877452697753905} +{"epoch": 0, "iter": 22089, "iter_tflops": 19.14349611454147, "iter_time": 1.077707717895508, "loss": 0.45789700746536255, "lr": 4.293905965853682e-06, "seqlen": 8192.0, "step_tflops": 20.199991852960782, "step_time": 1.0213416748046875} +{"epoch": 0, "iter": 22090, "iter_tflops": 9.037501869595658, "iter_time": 2.2828314514160155, "loss": 0.38152262568473816, "lr": 4.260952754672532e-06, "seqlen": 8192.0, "step_tflops": 11.31585227096809, "step_time": 1.8232027969360352} +{"epoch": 0, "iter": 22091, "iter_tflops": 21.370516275459902, "iter_time": 0.9653998641967773, "loss": 0.5871544480323792, "lr": 4.22810553355217e-06, "seqlen": 8192.0, "step_tflops": 25.617626869183933, "step_time": 0.8053475685119629} +{"epoch": 0, "iter": 22092, "iter_tflops": 46.67142722775885, "iter_time": 0.4420497665405273, "loss": 0.4599904716014862, "lr": 4.195364626681396e-06, "seqlen": 8192.0, "step_tflops": 50.25183516331567, "step_time": 0.41055403137207036} +{"epoch": 0, "iter": 22093, "iter_tflops": 18.237835152447936, "iter_time": 0.8084512481689452, "loss": 0.23877961933612823, "lr": 4.162730357199753e-06, "seqlen": 5904.0, "step_tflops": 19.198114054582234, "step_time": 0.7680129699707031} +{"epoch": 0, "iter": 22094, "iter_tflops": 13.131098228130034, "iter_time": 1.1228611907958983, "loss": 0.23066091537475586, "lr": 4.1302030471943e-06, "seqlen": 5904.0, "step_tflops": 16.780510991998156, "step_time": 0.8786621932983398} +{"epoch": 0, "iter": 22095, "iter_tflops": 22.566557000301234, "iter_time": 0.6533739547729492, "loss": 0.1558394730091095, "lr": 4.097783017696459e-06, "seqlen": 5904.0, "step_tflops": 24.328349718695886, "step_time": 0.6060583953857421} +{"epoch": 0, "iter": 22096, "iter_tflops": 22.78223006578894, "iter_time": 0.6471886444091797, "loss": 0.21236805617809296, "lr": 4.06547058867883e-06, "seqlen": 5904.0, "step_tflops": 24.474271146544147, "step_time": 0.6024449310302734} +{"epoch": 0, "iter": 22097, "iter_tflops": 19.762985610230604, "iter_time": 1.0439259490966797, "loss": 0.6187970638275146, "lr": 4.033266079052039e-06, "seqlen": 8192.0, "step_tflops": 21.18611386388148, "step_time": 0.9738026351928711} +{"epoch": 0, "iter": 22098, "iter_tflops": 16.954983677406897, "iter_time": 1.216815887451172, "loss": 0.6716530323028564, "lr": 4.001169806661603e-06, "seqlen": 8192.0, "step_tflops": 20.4759990882854, "step_time": 1.0075744495391847} +{"epoch": 0, "iter": 22099, "iter_tflops": 35.19820487924699, "iter_time": 0.5861405029296874, "loss": 0.4185832738876343, "lr": 3.969182088284761e-06, "seqlen": 8192.0, "step_tflops": 38.273729069619726, "step_time": 0.5390405902862548} +{"epoch": 0, "iter": 22100, "iter_tflops": 35.28249486671255, "iter_time": 0.5847402114868164, "loss": 0.5731154680252075, "lr": 3.937303239627393e-06, "seqlen": 8192.0, "step_tflops": 38.58480315498432, "step_time": 0.5346947975158691} +{"epoch": 0, "iter": 22101, "iter_tflops": 31.24091978085175, "iter_time": 0.6603868789672851, "loss": 0.35562652349472046, "lr": 3.905533575320853e-06, "seqlen": 8192.0, "step_tflops": 34.68761099759782, "step_time": 0.5947683601379394} +{"epoch": 0, "iter": 22102, "iter_tflops": 37.20525783129321, "iter_time": 0.5545209121704102, "loss": 0.3491996228694916, "lr": 3.873873408918913e-06, "seqlen": 8192.0, "step_tflops": 40.56468105572171, "step_time": 0.508597454071045} +{"epoch": 0, "iter": 22103, "iter_tflops": 39.30307026115555, "iter_time": 0.5249232025146484, "loss": 0.3153945803642273, "lr": 3.842323052894623e-06, "seqlen": 8192.0, "step_tflops": 42.55370555461721, "step_time": 0.4848248405456542} +{"epoch": 0, "iter": 22104, "iter_tflops": 36.619099639308075, "iter_time": 0.563397071838379, "loss": 0.5286025404930115, "lr": 3.8108828186372686e-06, "seqlen": 8192.0, "step_tflops": 40.21147419515908, "step_time": 0.5130648384094239} +{"epoch": 0, "iter": 22105, "iter_tflops": 34.22232444030667, "iter_time": 0.6028548278808594, "loss": 0.07619928568601608, "lr": 3.779553016449266e-06, "seqlen": 8192.0, "step_tflops": 37.91383099319499, "step_time": 0.5441574478149414} +{"epoch": 0, "iter": 22106, "iter_tflops": 42.073863467676425, "iter_time": 0.4903541488647461, "loss": 0.07003418356180191, "lr": 3.7483339555431063e-06, "seqlen": 8192.0, "step_tflops": 46.58693584493728, "step_time": 0.44285148048400874} +{"epoch": 0, "iter": 22107, "iter_tflops": 45.72763702371128, "iter_time": 0.45117340087890634, "loss": 0.10713567584753036, "lr": 3.717225944038331e-06, "seqlen": 8192.0, "step_tflops": 50.32295173318564, "step_time": 0.4099738349914551} +{"epoch": 0, "iter": 22108, "iter_tflops": 45.08896274895801, "iter_time": 0.4575641632080078, "loss": 0.08886221051216125, "lr": 3.686229288958442e-06, "seqlen": 8192.0, "step_tflops": 49.30485916704476, "step_time": 0.4184393558502197} +{"epoch": 0, "iter": 22109, "iter_tflops": 28.327383834284426, "iter_time": 0.7283091735839844, "loss": 0.007354807108640671, "lr": 3.655344296227923e-06, "seqlen": 8192.0, "step_tflops": 30.686701333321857, "step_time": 0.6723138236999512} +{"epoch": 0, "iter": 22110, "iter_tflops": 13.018298949990394, "iter_time": 1.5847764434814453, "loss": 0.002901853062212467, "lr": 3.624571270669172e-06, "seqlen": 8192.0, "step_tflops": 16.97100739160478, "step_time": 1.2156669921874999} +{"epoch": 0, "iter": 22111, "iter_tflops": 23.781048219767573, "iter_time": 0.867543487548828, "loss": 0.003379247384145856, "lr": 3.5939105159995363e-06, "seqlen": 8192.0, "step_tflops": 26.706175218015083, "step_time": 0.7725214614868163} +{"epoch": 0, "iter": 22112, "iter_tflops": 55.000329007581385, "iter_time": 0.37510854721069337, "loss": 0.002694162307307124, "lr": 3.56336233482828e-06, "seqlen": 8192.0, "step_tflops": 60.69361897884469, "step_time": 0.3399219532012939} +{"epoch": 0, "iter": 22113, "iter_tflops": 21.252922765258646, "iter_time": 0.6688036041259766, "loss": 0.2107432335615158, "lr": 3.532927028653627e-06, "seqlen": 5696.0, "step_tflops": 22.546786865477294, "step_time": 0.630423812866211} +{"epoch": 0, "iter": 22114, "iter_tflops": 21.578641971644515, "iter_time": 0.6587083358764649, "loss": 0.1571912318468094, "lr": 3.502604897859754e-06, "seqlen": 5696.0, "step_tflops": 25.45046871911708, "step_time": 0.5584978218078613} +{"epoch": 0, "iter": 22115, "iter_tflops": 25.60587388244546, "iter_time": 0.5551082305908204, "loss": 0.06360944360494614, "lr": 3.4723962417138512e-06, "seqlen": 5696.0, "step_tflops": 27.33440595601054, "step_time": 0.5200051307678223} +{"epoch": 0, "iter": 22116, "iter_tflops": 24.323690132509423, "iter_time": 0.5843698577880859, "loss": 0.20661543309688568, "lr": 3.442301358363163e-06, "seqlen": 5696.0, "step_tflops": 25.97467160354703, "step_time": 0.5472266044616699} +{"epoch": 0, "iter": 22117, "iter_tflops": 41.02140565298677, "iter_time": 0.5029348258972168, "loss": 0.031191792339086533, "lr": 3.412320544832033e-06, "seqlen": 8192.0, "step_tflops": 44.91488959411307, "step_time": 0.45933750915527344} +{"epoch": 0, "iter": 22118, "iter_tflops": 48.14394466323592, "iter_time": 0.428529354095459, "loss": 0.0811299979686737, "lr": 3.3824540970189943e-06, "seqlen": 8192.0, "step_tflops": 52.78624631528814, "step_time": 0.39084221649169926} +{"epoch": 0, "iter": 22119, "iter_tflops": 53.89288661146866, "iter_time": 0.382816635131836, "loss": 0.02859392948448658, "lr": 3.35270230969382e-06, "seqlen": 8192.0, "step_tflops": 58.729137145893105, "step_time": 0.35129229736328127} +{"epoch": 0, "iter": 22120, "iter_tflops": 52.50304680001294, "iter_time": 0.39295040512084956, "loss": 0.03063216805458069, "lr": 3.32306547649465e-06, "seqlen": 8192.0, "step_tflops": 57.31047171558213, "step_time": 0.3599881992340088} +{"epoch": 0, "iter": 22121, "iter_tflops": 36.939855820914445, "iter_time": 0.5585049819946288, "loss": 0.6524646282196045, "lr": 3.293543889925053e-06, "seqlen": 8192.0, "step_tflops": 39.790460612394654, "step_time": 0.5184934577941895} +{"epoch": 0, "iter": 22122, "iter_tflops": 44.052207315109236, "iter_time": 0.4683327980041504, "loss": 0.5440371036529541, "lr": 3.2641378413511783e-06, "seqlen": 8192.0, "step_tflops": 47.70607283374394, "step_time": 0.43246262550354003} +{"epoch": 0, "iter": 22123, "iter_tflops": 43.623948280288, "iter_time": 0.4729304504394531, "loss": 0.6734974980354309, "lr": 3.2348476209988465e-06, "seqlen": 8192.0, "step_tflops": 47.102889838081175, "step_time": 0.43800058937072756} +{"epoch": 0, "iter": 22124, "iter_tflops": 40.826161115286475, "iter_time": 0.5053400306701661, "loss": 0.4109265208244324, "lr": 3.205673517950716e-06, "seqlen": 8192.0, "step_tflops": 43.79400948268634, "step_time": 0.47109396362304684} +{"epoch": 0, "iter": 22125, "iter_tflops": 40.12567693730127, "iter_time": 0.5141618804931641, "loss": 0.34556418657302856, "lr": 3.176615820143401e-06, "seqlen": 8192.0, "step_tflops": 43.649724936699435, "step_time": 0.47265116882324215} +{"epoch": 0, "iter": 22126, "iter_tflops": 42.64166666908345, "iter_time": 0.4838247451782226, "loss": 0.1592630296945572, "lr": 3.1476748143646437e-06, "seqlen": 8192.0, "step_tflops": 46.06619958233058, "step_time": 0.4478575115203858} +{"epoch": 0, "iter": 22127, "iter_tflops": 46.21954967711105, "iter_time": 0.44637158203125, "loss": 0.30319109559059143, "lr": 3.118850786250495e-06, "seqlen": 8192.0, "step_tflops": 50.11899971111624, "step_time": 0.41164216423034666} +{"epoch": 0, "iter": 22128, "iter_tflops": 46.87730709837918, "iter_time": 0.44010833358764645, "loss": 0.19513927400112152, "lr": 3.0901440202824693e-06, "seqlen": 8192.0, "step_tflops": 50.80023905635932, "step_time": 0.406121976852417} +{"epoch": 0, "iter": 22129, "iter_tflops": 29.58578867633843, "iter_time": 0.6973311996459962, "loss": 0.15407726168632507, "lr": 3.0615547997847626e-06, "seqlen": 8192.0, "step_tflops": 31.355084545016368, "step_time": 0.6579823913574219} +{"epoch": 0, "iter": 22130, "iter_tflops": 12.029390242066132, "iter_time": 1.7150572967529296, "loss": 0.11419124901294708, "lr": 3.0330834069214337e-06, "seqlen": 8192.0, "step_tflops": 15.97004224567451, "step_time": 1.291862174987793} +{"epoch": 0, "iter": 22131, "iter_tflops": 48.09376611023889, "iter_time": 0.42897645950317387, "loss": 0.13258536159992218, "lr": 3.0047301226936412e-06, "seqlen": 8192.0, "step_tflops": 52.485103661004395, "step_time": 0.39308474349975586} +{"epoch": 0, "iter": 22132, "iter_tflops": 52.570155797145176, "iter_time": 0.3924487800598145, "loss": 0.09160968661308289, "lr": 2.976495226936849e-06, "seqlen": 8192.0, "step_tflops": 57.00649265240121, "step_time": 0.3619077854156494} +{"epoch": 0, "iter": 22133, "iter_tflops": 36.66740270930706, "iter_time": 0.5626548919677734, "loss": 0.035041242837905884, "lr": 2.9483789983180857e-06, "seqlen": 8192.0, "step_tflops": 39.49589239528064, "step_time": 0.5223604850769042} +{"epoch": 0, "iter": 22134, "iter_tflops": 36.34978599931689, "iter_time": 0.5675712509155273, "loss": 0.041749849915504456, "lr": 2.920381714333172e-06, "seqlen": 8192.0, "step_tflops": 40.64184723070953, "step_time": 0.5076317863464355} +{"epoch": 0, "iter": 22135, "iter_tflops": 45.60984399947932, "iter_time": 0.4523386116027832, "loss": 0.05313148722052574, "lr": 2.8925036513039955e-06, "seqlen": 8192.0, "step_tflops": 50.19923790206538, "step_time": 0.4109841976165772} +{"epoch": 0, "iter": 22136, "iter_tflops": 46.473137946823066, "iter_time": 0.44393588256835936, "loss": 0.06765882670879364, "lr": 2.86474508437579e-06, "seqlen": 8192.0, "step_tflops": 50.91492701221558, "step_time": 0.4052071704864502} +{"epoch": 0, "iter": 22137, "iter_tflops": 15.261270018259486, "iter_time": 1.3518595428466798, "loss": 0.5428698658943176, "lr": 2.837106287514397e-06, "seqlen": 8192.0, "step_tflops": 16.138296996942394, "step_time": 1.2783934707641602} +{"epoch": 0, "iter": 22138, "iter_tflops": 22.686049196824204, "iter_time": 0.9094176483154297, "loss": 0.5222179293632507, "lr": 2.809587533503591e-06, "seqlen": 8192.0, "step_tflops": 27.93953121867442, "step_time": 0.7384194583892822} +{"epoch": 0, "iter": 22139, "iter_tflops": 44.39184093277537, "iter_time": 0.46474967193603517, "loss": 0.5778778791427612, "lr": 2.7821890939423576e-06, "seqlen": 8192.0, "step_tflops": 47.87906783972108, "step_time": 0.4309000663757324} +{"epoch": 0, "iter": 22140, "iter_tflops": 47.59819944961642, "iter_time": 0.4334427299499512, "loss": 0.700298011302948, "lr": 2.754911239242241e-06, "seqlen": 8192.0, "step_tflops": 51.32226530817787, "step_time": 0.40199109268188477} +{"epoch": 0, "iter": 22141, "iter_tflops": 30.926105168108748, "iter_time": 0.6671093368530273, "loss": 0.06431994587182999, "lr": 2.7277542386246455e-06, "seqlen": 8192.0, "step_tflops": 33.03480315932957, "step_time": 0.6245260009765625} +{"epoch": 0, "iter": 22142, "iter_tflops": 16.257735986047326, "iter_time": 1.2690016326904296, "loss": 0.03823331370949745, "lr": 2.7007183601182144e-06, "seqlen": 8192.0, "step_tflops": 18.253582494523982, "step_time": 1.1302490081787109} +{"epoch": 0, "iter": 22143, "iter_tflops": 48.32236287780123, "iter_time": 0.4269471168518067, "loss": 0.08034154027700424, "lr": 2.673803870556148e-06, "seqlen": 8192.0, "step_tflops": 53.51917346825896, "step_time": 0.38548976325988765} +{"epoch": 0, "iter": 22144, "iter_tflops": 49.94319227961227, "iter_time": 0.4130912055969238, "loss": 0.09493133425712585, "lr": 2.6470110355735884e-06, "seqlen": 8192.0, "step_tflops": 54.39270497763049, "step_time": 0.3792989063262939} +{"epoch": 0, "iter": 22145, "iter_tflops": 26.331702358894777, "iter_time": 0.7835077743530274, "loss": 0.1764046549797058, "lr": 2.6203401196050065e-06, "seqlen": 8192.0, "step_tflops": 27.862426508652796, "step_time": 0.7404629135131836} +{"epoch": 0, "iter": 22146, "iter_tflops": 27.27061945582187, "iter_time": 0.7565318984985352, "loss": 0.18768960237503052, "lr": 2.5937913858815708e-06, "seqlen": 8192.0, "step_tflops": 31.031280745702354, "step_time": 0.6648482761383058} +{"epoch": 0, "iter": 22147, "iter_tflops": 40.46405762524165, "iter_time": 0.5098622016906739, "loss": 0.2025747001171112, "lr": 2.5673650964285718e-06, "seqlen": 8192.0, "step_tflops": 44.09296121946466, "step_time": 0.4678999309539794} +{"epoch": 0, "iter": 22148, "iter_tflops": 40.88821730548482, "iter_time": 0.5045730743408203, "loss": 0.15699425339698792, "lr": 2.541061512062808e-06, "seqlen": 8192.0, "step_tflops": 44.56834728224967, "step_time": 0.4629090995788574} +{"epoch": 0, "iter": 22149, "iter_tflops": 16.87628660754519, "iter_time": 1.2224901123046874, "loss": 0.17299732565879822, "lr": 2.5148808923900515e-06, "seqlen": 8192.0, "step_tflops": 18.011222348270707, "step_time": 1.1454577102661132} +{"epoch": 0, "iter": 22150, "iter_tflops": 36.81540353987644, "iter_time": 0.5603929748535156, "loss": 0.14274102449417114, "lr": 2.4888234958024333e-06, "seqlen": 8192.0, "step_tflops": 40.839943593766435, "step_time": 0.505169490814209} +{"epoch": 0, "iter": 22151, "iter_tflops": 39.39979979784303, "iter_time": 0.5236344757080078, "loss": 0.12711843848228455, "lr": 2.4628895794759493e-06, "seqlen": 8192.0, "step_tflops": 43.22922476059049, "step_time": 0.4772487506866454} +{"epoch": 0, "iter": 22152, "iter_tflops": 41.2558594557232, "iter_time": 0.500076686859131, "loss": 0.1318923532962799, "lr": 2.437079399367875e-06, "seqlen": 8192.0, "step_tflops": 45.13487439457215, "step_time": 0.45709872436523435} +{"epoch": 0, "iter": 22153, "iter_tflops": 22.42309616312339, "iter_time": 0.9200822830200195, "loss": 0.6321308016777039, "lr": 2.4113932102142765e-06, "seqlen": 8192.0, "step_tflops": 24.149235918331843, "step_time": 0.8543166160583495} +{"epoch": 0, "iter": 22154, "iter_tflops": 24.748708395600843, "iter_time": 0.8336230392456055, "loss": 0.5817630290985107, "lr": 2.3858312655274643e-06, "seqlen": 8192.0, "step_tflops": 28.101262084522613, "step_time": 0.7341696414947511} +{"epoch": 0, "iter": 22155, "iter_tflops": 34.58507663708868, "iter_time": 0.5965316696166992, "loss": 0.5180642008781433, "lr": 2.360393817593519e-06, "seqlen": 8192.0, "step_tflops": 37.82470240985369, "step_time": 0.5454396781921387} +{"epoch": 0, "iter": 22156, "iter_tflops": 34.19646029439194, "iter_time": 0.6033107910156249, "loss": 0.474892258644104, "lr": 2.335081117469777e-06, "seqlen": 8192.0, "step_tflops": 37.19506250163169, "step_time": 0.5546729087829589} +{"epoch": 0, "iter": 22157, "iter_tflops": 21.311910282261625, "iter_time": 0.9680546340942382, "loss": 0.07564475387334824, "lr": 2.3098934149823686e-06, "seqlen": 8192.0, "step_tflops": 23.235767826980947, "step_time": 0.8879023780822755} +{"epoch": 0, "iter": 22158, "iter_tflops": 18.251788658482997, "iter_time": 1.130360092163086, "loss": 0.09236831218004227, "lr": 2.284830958723755e-06, "seqlen": 8192.0, "step_tflops": 20.66653220117491, "step_time": 0.998285213470459} +{"epoch": 0, "iter": 22159, "iter_tflops": 48.41750024053519, "iter_time": 0.4261081924438476, "loss": 0.11437670886516571, "lr": 2.2598939960502528e-06, "seqlen": 8192.0, "step_tflops": 53.06028658666295, "step_time": 0.38882363510131834} +{"epoch": 0, "iter": 22160, "iter_tflops": 52.435385998341566, "iter_time": 0.3934574546813965, "loss": 0.1107955127954483, "lr": 2.235082773079624e-06, "seqlen": 8192.0, "step_tflops": 57.267878885535374, "step_time": 0.36025593948364265} +{"epoch": 0, "iter": 22161, "iter_tflops": 23.548021286036594, "iter_time": 0.8761285400390626, "loss": 0.014839625917375088, "lr": 2.2103975346886172e-06, "seqlen": 8192.0, "step_tflops": 24.743632837324544, "step_time": 0.8337940368652346} +{"epoch": 0, "iter": 22162, "iter_tflops": 15.04390268942823, "iter_time": 1.3713923797607421, "loss": 0.014265380799770355, "lr": 2.1858385245105744e-06, "seqlen": 8192.0, "step_tflops": 19.532037310600884, "step_time": 1.0562694091796876} +{"epoch": 0, "iter": 22163, "iter_tflops": 53.42436902759338, "iter_time": 0.38617383575439457, "loss": 0.014679289422929287, "lr": 2.1614059849330054e-06, "seqlen": 8192.0, "step_tflops": 58.73032815337215, "step_time": 0.3512851734161377} +{"epoch": 0, "iter": 22164, "iter_tflops": 51.015964410333076, "iter_time": 0.404404655456543, "loss": 0.009726988151669502, "lr": 2.1371001570952187e-06, "seqlen": 8192.0, "step_tflops": 55.218997629839016, "step_time": 0.3736231079101563} +{"epoch": 0, "iter": 22165, "iter_tflops": 46.64625538776138, "iter_time": 0.4422883110046387, "loss": 0.25927260518074036, "lr": 2.1129212808859184e-06, "seqlen": 8192.0, "step_tflops": 50.935545519636776, "step_time": 0.40504314422607424} +{"epoch": 0, "iter": 22166, "iter_tflops": 46.12722830990066, "iter_time": 0.44726497268676757, "loss": 0.2063177525997162, "lr": 2.088869594940847e-06, "seqlen": 8192.0, "step_tflops": 52.2767653063335, "step_time": 0.39465130233764645} +{"epoch": 0, "iter": 22167, "iter_tflops": 51.721387470683965, "iter_time": 0.39888901901245116, "loss": 0.20069481432437897, "lr": 2.0649453366404438e-06, "seqlen": 8192.0, "step_tflops": 55.99131524125428, "step_time": 0.36846952819824214} +{"epoch": 0, "iter": 22168, "iter_tflops": 44.36062825927062, "iter_time": 0.465076675415039, "loss": 0.23858694732189178, "lr": 2.041148742107471e-06, "seqlen": 8192.0, "step_tflops": 48.09045248706873, "step_time": 0.42900601768493657} +{"epoch": 0, "iter": 22169, "iter_tflops": 22.641075396021584, "iter_time": 0.9112240982055665, "loss": 0.5337960720062256, "lr": 2.017480046204718e-06, "seqlen": 8192.0, "step_tflops": 23.67807262764403, "step_time": 0.8713164215087891} +{"epoch": 0, "iter": 22170, "iter_tflops": 14.207739643662613, "iter_time": 1.452102447509766, "loss": 0.4664947986602783, "lr": 1.9939394825326494e-06, "seqlen": 8192.0, "step_tflops": 20.842874966451237, "step_time": 0.9898391437530517} +{"epoch": 0, "iter": 22171, "iter_tflops": 39.641861033959074, "iter_time": 0.5204370574951173, "loss": 0.49933865666389465, "lr": 1.9705272834271337e-06, "seqlen": 8192.0, "step_tflops": 43.34172529478961, "step_time": 0.47600997352600094} +{"epoch": 0, "iter": 22172, "iter_tflops": 38.069665434603294, "iter_time": 0.5419299926757813, "loss": 0.5234244465827942, "lr": 1.9472436799571145e-06, "seqlen": 8192.0, "step_tflops": 41.5715906054256, "step_time": 0.4962786655426026} +{"epoch": 0, "iter": 22173, "iter_tflops": 21.402641153822092, "iter_time": 0.9639508209228514, "loss": 0.05823400244116783, "lr": 1.9240889019223683e-06, "seqlen": 8192.0, "step_tflops": 22.997314758095207, "step_time": 0.8971088027954102} +{"epoch": 0, "iter": 22174, "iter_tflops": 15.291767781948275, "iter_time": 1.34916340637207, "loss": 0.07729322463274002, "lr": 1.901063177851201e-06, "seqlen": 8192.0, "step_tflops": 18.291696637510714, "step_time": 1.1278939247131348} +{"epoch": 0, "iter": 22175, "iter_tflops": 50.139257854252385, "iter_time": 0.41147584533691406, "loss": 0.07707890123128891, "lr": 1.8781667349982096e-06, "seqlen": 8192.0, "step_tflops": 54.82912650463821, "step_time": 0.37627981376647945} +{"epoch": 0, "iter": 22176, "iter_tflops": 47.97478520455735, "iter_time": 0.43004035186767575, "loss": 0.13815023005008698, "lr": 1.8553997993420495e-06, "seqlen": 8192.0, "step_tflops": 52.28294442431823, "step_time": 0.39460466003417977} +{"epoch": 0, "iter": 22177, "iter_tflops": 41.00494785829457, "iter_time": 0.5031366844177246, "loss": 0.09164939820766449, "lr": 1.8327625955831763e-06, "seqlen": 8192.0, "step_tflops": 44.545748771664705, "step_time": 0.46314393806457527} +{"epoch": 0, "iter": 22178, "iter_tflops": 8.636306025902273, "iter_time": 2.38887939453125, "loss": 0.15353988111019135, "lr": 1.8102553471416584e-06, "seqlen": 8192.0, "step_tflops": 10.21785836327176, "step_time": 2.0191211090087893} +{"epoch": 0, "iter": 22179, "iter_tflops": 10.916034064559723, "iter_time": 1.889980682373047, "loss": 0.06557980924844742, "lr": 1.787878276154946e-06, "seqlen": 8192.0, "step_tflops": 13.13690981699413, "step_time": 1.5704677734375} +{"epoch": 0, "iter": 22180, "iter_tflops": 26.991280806361864, "iter_time": 0.7643614120483398, "loss": 0.08738928288221359, "lr": 1.7656316034757024e-06, "seqlen": 8192.0, "step_tflops": 34.026240563364915, "step_time": 0.6063289146423341} +{"epoch": 0, "iter": 22181, "iter_tflops": 12.126577765997729, "iter_time": 1.1519750061035157, "loss": 0.3020990788936615, "lr": 1.7435155486695982e-06, "seqlen": 5600.0, "step_tflops": 12.883581845625258, "step_time": 1.084288101196289} +{"epoch": 0, "iter": 22182, "iter_tflops": 9.288137836648296, "iter_time": 1.5040167083740232, "loss": 0.21574614942073822, "lr": 1.721530330013173e-06, "seqlen": 5600.0, "step_tflops": 12.623815364050333, "step_time": 1.1066000328063965} +{"epoch": 0, "iter": 22183, "iter_tflops": 24.780004545628227, "iter_time": 0.5637414016723633, "loss": 0.11721059679985046, "lr": 1.699676164491652e-06, "seqlen": 5600.0, "step_tflops": 26.451024845706915, "step_time": 0.5281275329589843} +{"epoch": 0, "iter": 22184, "iter_tflops": 26.29955609517467, "iter_time": 0.5311692123413086, "loss": 0.2900674045085907, "lr": 1.677953267796833e-06, "seqlen": 5600.0, "step_tflops": 27.923861029858998, "step_time": 0.5002715950012208} +{"epoch": 0, "iter": 22185, "iter_tflops": 43.195524537952544, "iter_time": 0.4776210899353028, "loss": 0.09664493799209595, "lr": 1.6563618543249331e-06, "seqlen": 8192.0, "step_tflops": 47.56396807562585, "step_time": 0.433754674911499} +{"epoch": 0, "iter": 22186, "iter_tflops": 37.528018087798436, "iter_time": 0.5497517471313477, "loss": 0.06362275779247284, "lr": 1.6349021371744833e-06, "seqlen": 8192.0, "step_tflops": 41.91104366070236, "step_time": 0.4922591209411621} +{"epoch": 0, "iter": 22187, "iter_tflops": 40.246015655936226, "iter_time": 0.5126244964599609, "loss": 0.09278875589370728, "lr": 1.6135743281442333e-06, "seqlen": 8192.0, "step_tflops": 44.29159970594979, "step_time": 0.46580149841308593} +{"epoch": 0, "iter": 22188, "iter_tflops": 41.02499436981069, "iter_time": 0.5028908309936524, "loss": 0.06355469673871994, "lr": 1.5923786377310433e-06, "seqlen": 8192.0, "step_tflops": 45.01035672324733, "step_time": 0.4583632526397705} +{"epoch": 0, "iter": 22189, "iter_tflops": 24.488773987427972, "iter_time": 0.8424714736938478, "loss": 0.5547279715538025, "lr": 1.5713152751278265e-06, "seqlen": 8192.0, "step_tflops": 26.14513921579942, "step_time": 0.7890986289978027} +{"epoch": 0, "iter": 22190, "iter_tflops": 17.510340489709808, "iter_time": 1.1782234344482423, "loss": 0.37515950202941895, "lr": 1.5503844482214618e-06, "seqlen": 8192.0, "step_tflops": 19.74160945920064, "step_time": 1.045056308746338} +{"epoch": 0, "iter": 22191, "iter_tflops": 32.83421203482708, "iter_time": 0.6283413619995117, "loss": 0.6497554779052734, "lr": 1.529586363590767e-06, "seqlen": 8192.0, "step_tflops": 35.701267850861676, "step_time": 0.5778812561035157} +{"epoch": 0, "iter": 22192, "iter_tflops": 35.85650771428701, "iter_time": 0.5753793334960938, "loss": 0.48521944880485535, "lr": 1.508921226504434e-06, "seqlen": 8192.0, "step_tflops": 38.8472701465538, "step_time": 0.5310821952819824} +{"epoch": 0, "iter": 22193, "iter_tflops": 14.61921155802292, "iter_time": 1.4112316131591796, "loss": 0.5551809668540955, "lr": 1.4883892409190292e-06, "seqlen": 8192.0, "step_tflops": 15.44145390299899, "step_time": 1.3360849075317385} +{"epoch": 0, "iter": 22194, "iter_tflops": 21.196852431451376, "iter_time": 0.9733092956542968, "loss": 0.3886428773403168, "lr": 1.467990609476959e-06, "seqlen": 8192.0, "step_tflops": 23.725733946148363, "step_time": 0.8695660820007324} +{"epoch": 0, "iter": 22195, "iter_tflops": 46.674174978145125, "iter_time": 0.44202374267578126, "loss": 0.4059547781944275, "lr": 1.4477255335044775e-06, "seqlen": 8192.0, "step_tflops": 50.41219056980185, "step_time": 0.40924810600280764} +{"epoch": 0, "iter": 22196, "iter_tflops": 46.621043570671965, "iter_time": 0.44252749252319334, "loss": 0.3550593852996826, "lr": 1.4275942130097097e-06, "seqlen": 8192.0, "step_tflops": 50.112298769515895, "step_time": 0.411697208404541} +{"epoch": 0, "iter": 22197, "iter_tflops": 27.00509301307088, "iter_time": 0.7639704666137695, "loss": 0.12007951736450195, "lr": 1.4075968466806533e-06, "seqlen": 8192.0, "step_tflops": 28.62443634987614, "step_time": 0.7207510833740235} +{"epoch": 0, "iter": 22198, "iter_tflops": 18.836646281802203, "iter_time": 1.0952636260986328, "loss": 0.15629354119300842, "lr": 1.3877336318832468e-06, "seqlen": 8192.0, "step_tflops": 21.091565015529618, "step_time": 0.9781679782867433} +{"epoch": 0, "iter": 22199, "iter_tflops": 38.44790273754182, "iter_time": 0.5365986709594727, "loss": 0.14636264741420746, "lr": 1.368004764659398e-06, "seqlen": 8192.0, "step_tflops": 42.196011685106356, "step_time": 0.4889346809387207} +{"epoch": 0, "iter": 22200, "iter_tflops": 37.6460589174082, "iter_time": 0.5480279769897461, "loss": 0.18272773921489716, "lr": 1.3484104397250652e-06, "seqlen": 8192.0, "step_tflops": 40.93942129372109, "step_time": 0.5039419918060302} +{"epoch": 0, "iter": 22201, "iter_tflops": 20.312865335465805, "iter_time": 1.0156663360595704, "loss": 0.03202279657125473, "lr": 1.3289508504683206e-06, "seqlen": 8192.0, "step_tflops": 21.92568198014, "step_time": 0.9409556121826171} +{"epoch": 0, "iter": 22202, "iter_tflops": 25.718622243349188, "iter_time": 0.8021850204467773, "loss": 0.07057084888219833, "lr": 1.3096261889474587e-06, "seqlen": 8192.0, "step_tflops": 32.57501708535992, "step_time": 0.6333409881591796} +{"epoch": 0, "iter": 22203, "iter_tflops": 46.517142415642176, "iter_time": 0.44351592636108395, "loss": 0.044127147644758224, "lr": 1.2904366458890837e-06, "seqlen": 8192.0, "step_tflops": 50.683464335776385, "step_time": 0.4070576820373535} +{"epoch": 0, "iter": 22204, "iter_tflops": 48.91908620387782, "iter_time": 0.421739143371582, "loss": 0.056104425340890884, "lr": 1.271382410686237e-06, "seqlen": 8192.0, "step_tflops": 53.37575262930539, "step_time": 0.38652557563781736} +{"epoch": 0, "iter": 22205, "iter_tflops": 18.635052262819933, "iter_time": 0.9143083801269531, "loss": 0.036990076303482056, "lr": 1.2524636713965316e-06, "seqlen": 6800.0, "step_tflops": 19.588845284433432, "step_time": 0.869790138244629} +{"epoch": 0, "iter": 22206, "iter_tflops": 16.58987987764815, "iter_time": 1.0270227737426758, "loss": 0.029233671724796295, "lr": 1.233680614740283e-06, "seqlen": 6800.0, "step_tflops": 20.67717847560676, "step_time": 0.8240091590881348} +{"epoch": 0, "iter": 22207, "iter_tflops": 42.57276987458222, "iter_time": 0.4002131996154785, "loss": 0.019857777282595634, "lr": 1.2150334260986818e-06, "seqlen": 6800.0, "step_tflops": 46.4293839635397, "step_time": 0.3669698581695557} +{"epoch": 0, "iter": 22208, "iter_tflops": 41.22626753727675, "iter_time": 0.41328467178344724, "loss": 0.016861535608768463, "lr": 1.1965222895119443e-06, "seqlen": 6800.0, "step_tflops": 44.85112685854096, "step_time": 0.3798830852508545} +{"epoch": 0, "iter": 22209, "iter_tflops": 17.562294971502077, "iter_time": 1.1747378997802735, "loss": 0.245351642370224, "lr": 1.1781473876775246e-06, "seqlen": 8192.0, "step_tflops": 18.195349122903053, "step_time": 1.1338663177490234} +{"epoch": 0, "iter": 22210, "iter_tflops": 16.23618536351992, "iter_time": 1.2706860046386719, "loss": 0.20305076241493225, "lr": 1.1599089019482768e-06, "seqlen": 8192.0, "step_tflops": 20.799052699984497, "step_time": 0.9919246711730957} +{"epoch": 0, "iter": 22211, "iter_tflops": 36.81146238077877, "iter_time": 0.5604529724121095, "loss": 0.27094539999961853, "lr": 1.1418070123306989e-06, "seqlen": 8192.0, "step_tflops": 40.32113209013478, "step_time": 0.5116695003509522} +{"epoch": 0, "iter": 22212, "iter_tflops": 39.139197313193826, "iter_time": 0.5271210174560547, "loss": 0.17799024283885956, "lr": 1.123841897483131e-06, "seqlen": 8192.0, "step_tflops": 42.89512964952937, "step_time": 0.4809658737182617} +{"epoch": 0, "iter": 22213, "iter_tflops": 17.596721749773604, "iter_time": 1.1724396057128905, "loss": 0.07602840662002563, "lr": 1.1060137347140008e-06, "seqlen": 8192.0, "step_tflops": 18.794443277432034, "step_time": 1.0977230453491211} +{"epoch": 0, "iter": 22214, "iter_tflops": 15.669828826562894, "iter_time": 1.316612564086914, "loss": 0.08991792798042297, "lr": 1.088322699980081e-06, "seqlen": 8192.0, "step_tflops": 19.4001182495633, "step_time": 1.063451946258545} +{"epoch": 0, "iter": 22215, "iter_tflops": 38.342167894379585, "iter_time": 0.5380784301757813, "loss": 0.060325492173433304, "lr": 1.0707689678847398e-06, "seqlen": 8192.0, "step_tflops": 42.16154759100802, "step_time": 0.4893343505859375} +{"epoch": 0, "iter": 22216, "iter_tflops": 40.61705443768933, "iter_time": 0.5079416465759277, "loss": 0.11895328015089035, "lr": 1.0533527116762298e-06, "seqlen": 8192.0, "step_tflops": 44.70510254474848, "step_time": 0.4614930362701416} +{"epoch": 0, "iter": 22217, "iter_tflops": 17.024766966788615, "iter_time": 1.2118282470703126, "loss": 0.10029352456331253, "lr": 1.0360741032459636e-06, "seqlen": 8192.0, "step_tflops": 18.502959157474468, "step_time": 1.1150158920288087} +{"epoch": 0, "iter": 22218, "iter_tflops": 17.406833515117615, "iter_time": 1.1852295532226562, "loss": 0.14992094039916992, "lr": 1.0189333131268357e-06, "seqlen": 8192.0, "step_tflops": 21.66326663380059, "step_time": 0.9523537635803222} +{"epoch": 0, "iter": 22219, "iter_tflops": 44.572280405687714, "iter_time": 0.46286825180053714, "loss": 0.1573081612586975, "lr": 1.0019305104915205e-06, "seqlen": 8192.0, "step_tflops": 48.17277442426606, "step_time": 0.42827289390563966} +{"epoch": 0, "iter": 22220, "iter_tflops": 48.59616084035938, "iter_time": 0.42454163360595704, "loss": 0.10728538781404495, "lr": 9.850658631508198e-07, "seqlen": 8192.0, "step_tflops": 52.81507716657774, "step_time": 0.39062886238098143} +{"epoch": 0, "iter": 22221, "iter_tflops": 37.98511372315294, "iter_time": 0.5431362838745117, "loss": 0.030797231942415237, "lr": 9.683395375519911e-07, "seqlen": 8192.0, "step_tflops": 40.99222106925626, "step_time": 0.5032928924560547} +{"epoch": 0, "iter": 22222, "iter_tflops": 37.12781074859809, "iter_time": 0.5556776199340819, "loss": 0.07502178102731705, "lr": 9.517516987771157e-07, "seqlen": 8192.0, "step_tflops": 41.25060055266223, "step_time": 0.5001404399871827} +{"epoch": 0, "iter": 22223, "iter_tflops": 41.68838935277494, "iter_time": 0.4948882369995118, "loss": 0.0786832645535469, "lr": 9.353025105414669e-07, "seqlen": 8192.0, "step_tflops": 46.049239092033226, "step_time": 0.44802246284484865} +{"epoch": 0, "iter": 22224, "iter_tflops": 46.48844006692216, "iter_time": 0.44378975677490234, "loss": 0.04636915400624275, "lr": 9.189921351918889e-07, "seqlen": 8192.0, "step_tflops": 51.00441805618302, "step_time": 0.4044962043762207} +{"epoch": 0, "iter": 22225, "iter_tflops": 30.006286953118778, "iter_time": 0.6875590286254882, "loss": 0.24269333481788635, "lr": 9.028207337052052e-07, "seqlen": 8192.0, "step_tflops": 32.53974650283347, "step_time": 0.6340274810791016} +{"epoch": 0, "iter": 22226, "iter_tflops": 8.163408222658612, "iter_time": 2.527264709472656, "loss": 0.26947957277297974, "lr": 8.867884656866181e-07, "seqlen": 8192.0, "step_tflops": 9.967712032010976, "step_time": 2.0697922897338867} +{"epoch": 0, "iter": 22227, "iter_tflops": 12.789916237404732, "iter_time": 1.613074951171875, "loss": 0.2806338369846344, "lr": 8.708954893681421e-07, "seqlen": 8192.0, "step_tflops": 15.814940592220823, "step_time": 1.304531837463379} +{"epoch": 0, "iter": 22228, "iter_tflops": 36.974826769450594, "iter_time": 0.5579767456054687, "loss": 0.31323322653770447, "lr": 8.551419616070321e-07, "seqlen": 8192.0, "step_tflops": 44.803678493563986, "step_time": 0.46047767066955564} +{"epoch": 0, "iter": 22229, "iter_tflops": 23.824888765620305, "iter_time": 0.6137252960205078, "loss": 0.2228921800851822, "lr": 8.395280378842551e-07, "seqlen": 5856.0, "step_tflops": 25.552872748295027, "step_time": 0.5722228202819825} +{"epoch": 0, "iter": 22230, "iter_tflops": 22.046916647191782, "iter_time": 0.6632191314697267, "loss": 0.14896318316459656, "lr": 8.240538723029295e-07, "seqlen": 5856.0, "step_tflops": 23.859651599272446, "step_time": 0.6128311157226562} +{"epoch": 0, "iter": 22231, "iter_tflops": 20.81659663065761, "iter_time": 0.7024172668457032, "loss": 0.11792291700839996, "lr": 8.087196175868206e-07, "seqlen": 5856.0, "step_tflops": 22.354682732525585, "step_time": 0.654088321685791} +{"epoch": 0, "iter": 22232, "iter_tflops": 21.397167053288445, "iter_time": 0.6833585433959962, "loss": 0.10483614355325699, "lr": 7.935254250788365e-07, "seqlen": 5856.0, "step_tflops": 22.910693388747468, "step_time": 0.6382145080566406} +{"epoch": 0, "iter": 22233, "iter_tflops": 17.552921116709957, "iter_time": 1.1753652496337892, "loss": 0.5062882304191589, "lr": 7.784714447395197e-07, "seqlen": 8192.0, "step_tflops": 18.76061541060117, "step_time": 1.0997023849487304} +{"epoch": 0, "iter": 22234, "iter_tflops": 15.82010340873968, "iter_time": 1.3041061096191406, "loss": 0.4899674952030182, "lr": 7.635578251455827e-07, "seqlen": 8192.0, "step_tflops": 21.503381728694897, "step_time": 0.9594348354339599} +{"epoch": 0, "iter": 22235, "iter_tflops": 35.95947908466098, "iter_time": 0.5737317123413085, "loss": 0.414406955242157, "lr": 7.487847134884229e-07, "seqlen": 8192.0, "step_tflops": 39.001339713479936, "step_time": 0.5289842262268066} +{"epoch": 0, "iter": 22236, "iter_tflops": 36.903442058835694, "iter_time": 0.5590560760498047, "loss": 0.3770672082901001, "lr": 7.341522555726971e-07, "seqlen": 8192.0, "step_tflops": 40.1321704343325, "step_time": 0.5140786876678467} +{"epoch": 0, "iter": 22237, "iter_tflops": 28.58225475419024, "iter_time": 0.5731286010742187, "loss": 0.006581464316695929, "lr": 7.196605958148505e-07, "seqlen": 6544.0, "step_tflops": 32.051060044993875, "step_time": 0.5111003398895264} +{"epoch": 0, "iter": 22238, "iter_tflops": 31.999712550720552, "iter_time": 0.5119204635620117, "loss": 0.004413592163473368, "lr": 7.053098772417188e-07, "seqlen": 6544.0, "step_tflops": 35.543420662761044, "step_time": 0.4608815746307372} +{"epoch": 0, "iter": 22239, "iter_tflops": 36.88290860524468, "iter_time": 0.4441435966491699, "loss": 0.0047382027842104435, "lr": 6.911002414890983e-07, "seqlen": 6544.0, "step_tflops": 40.93224362085542, "step_time": 0.40020546722412104} +{"epoch": 0, "iter": 22240, "iter_tflops": 36.68951355046548, "iter_time": 0.446484733581543, "loss": 0.004114917479455471, "lr": 6.770318288003557e-07, "seqlen": 6544.0, "step_tflops": 40.5199151034368, "step_time": 0.4042779369354248} +{"epoch": 0, "iter": 22241, "iter_tflops": 22.838115291776507, "iter_time": 0.903362350463867, "loss": 0.506338894367218, "lr": 6.631047780250482e-07, "seqlen": 8192.0, "step_tflops": 24.316204114403753, "step_time": 0.8484504165649415} +{"epoch": 0, "iter": 22242, "iter_tflops": 15.204641329905193, "iter_time": 1.3568944549560547, "loss": 0.428520530462265, "lr": 6.493192266175418e-07, "seqlen": 8192.0, "step_tflops": 18.92750323610319, "step_time": 1.0900060749053955} +{"epoch": 0, "iter": 22243, "iter_tflops": 40.64196237099163, "iter_time": 0.5076303482055664, "loss": 0.49466753005981445, "lr": 6.356753106356666e-07, "seqlen": 8192.0, "step_tflops": 44.70480378014458, "step_time": 0.4614961204528809} +{"epoch": 0, "iter": 22244, "iter_tflops": 39.515249529879355, "iter_time": 0.5221045989990234, "loss": 0.46346527338027954, "lr": 6.221731647393608e-07, "seqlen": 8192.0, "step_tflops": 42.87942375088028, "step_time": 0.48114204216003414} +{"epoch": 0, "iter": 22245, "iter_tflops": 17.927047249168496, "iter_time": 1.1508361206054687, "loss": 0.3685618042945862, "lr": 6.088129221893584e-07, "seqlen": 8192.0, "step_tflops": 19.068575943604973, "step_time": 1.0819420166015625} +{"epoch": 0, "iter": 22246, "iter_tflops": 19.25953189436034, "iter_time": 1.0712146911621094, "loss": 0.4481590986251831, "lr": 5.95594714845854e-07, "seqlen": 8192.0, "step_tflops": 23.283677879211456, "step_time": 0.886075370788574} +{"epoch": 0, "iter": 22247, "iter_tflops": 42.664888991993784, "iter_time": 0.48356140136718756, "loss": 0.6575913429260254, "lr": 5.825186731672217e-07, "seqlen": 8192.0, "step_tflops": 46.51164735028548, "step_time": 0.4435683250427246} +{"epoch": 0, "iter": 22248, "iter_tflops": 37.795392846517856, "iter_time": 0.5458626556396484, "loss": 0.528810441493988, "lr": 5.69584926208711e-07, "seqlen": 8192.0, "step_tflops": 40.88139589954401, "step_time": 0.5046572666168213} +{"epoch": 0, "iter": 22249, "iter_tflops": 13.323310309865658, "iter_time": 1.1250548858642577, "loss": 0.018290584906935692, "lr": 5.567936016211856e-07, "seqlen": 6000.0, "step_tflops": 14.221164108276135, "step_time": 1.0540244979858397} +{"epoch": 0, "iter": 22250, "iter_tflops": 16.497864562101512, "iter_time": 0.9085694274902343, "loss": 0.016946328803896904, "lr": 5.441448256498544e-07, "seqlen": 6000.0, "step_tflops": 20.07684921667332, "step_time": 0.7466039714813233} +{"epoch": 0, "iter": 22251, "iter_tflops": 39.356215456148476, "iter_time": 0.3808662796020507, "loss": 0.021931370720267296, "lr": 5.316387231330288e-07, "seqlen": 6000.0, "step_tflops": 43.06325224587709, "step_time": 0.3480799655914307} +{"epoch": 0, "iter": 22252, "iter_tflops": 34.40181476457922, "iter_time": 0.43571699523925783, "loss": 0.030456120148301125, "lr": 5.192754175008918e-07, "seqlen": 6000.0, "step_tflops": 37.33828065187913, "step_time": 0.4014500694274903} +{"epoch": 0, "iter": 22253, "iter_tflops": 31.466607883125857, "iter_time": 0.6556503829956055, "loss": 0.47720590233802795, "lr": 5.070550307742783e-07, "seqlen": 8192.0, "step_tflops": 33.40548719836511, "step_time": 0.6175959472656251} +{"epoch": 0, "iter": 22254, "iter_tflops": 27.963163843020904, "iter_time": 0.7377953948974609, "loss": 0.5740593671798706, "lr": 4.949776835634751e-07, "seqlen": 8192.0, "step_tflops": 35.316524366816324, "step_time": 0.5841767807006836} +{"epoch": 0, "iter": 22255, "iter_tflops": 42.49130687853577, "iter_time": 0.485536808013916, "loss": 0.558056116104126, "lr": 4.830434950670182e-07, "seqlen": 8192.0, "step_tflops": 46.29749073143137, "step_time": 0.44562012290954595} +{"epoch": 0, "iter": 22256, "iter_tflops": 43.208379501175834, "iter_time": 0.47747899246215825, "loss": 0.517464280128479, "lr": 4.7125258307053385e-07, "seqlen": 8192.0, "step_tflops": 46.976716083015866, "step_time": 0.43917700576782226} +{"epoch": 0, "iter": 22257, "iter_tflops": 22.39361162792256, "iter_time": 0.9212937088012695, "loss": 0.37863099575042725, "lr": 4.5960506394555956e-07, "seqlen": 8192.0, "step_tflops": 23.935796847583813, "step_time": 0.861934684753418} +{"epoch": 0, "iter": 22258, "iter_tflops": 10.861717311224611, "iter_time": 1.8994320068359376, "loss": 0.3724632263183594, "lr": 4.4810105264841304e-07, "seqlen": 8192.0, "step_tflops": 12.775677658603625, "step_time": 1.6148727340698243} +{"epoch": 0, "iter": 22259, "iter_tflops": 11.623691357530063, "iter_time": 1.7749175262451171, "loss": 0.38008952140808105, "lr": 4.367406627190351e-07, "seqlen": 8192.0, "step_tflops": 13.997299303818066, "step_time": 1.473933868408203} +{"epoch": 0, "iter": 22260, "iter_tflops": 24.572741437884414, "iter_time": 0.8395926666259765, "loss": 0.23950400948524475, "lr": 4.2552400627989043e-07, "seqlen": 8192.0, "step_tflops": 27.60377073209043, "step_time": 0.7474012775421142} +{"epoch": 0, "iter": 22261, "iter_tflops": 14.804697506279965, "iter_time": 1.0622060241699218, "loss": 0.21729588508605957, "lr": 4.1445119403485165e-07, "seqlen": 6288.0, "step_tflops": 15.87088364112766, "step_time": 0.9908483505249024} +{"epoch": 0, "iter": 22262, "iter_tflops": 18.140515889516532, "iter_time": 0.8668793640136718, "loss": 0.26967859268188477, "lr": 4.0352233526810054e-07, "seqlen": 6288.0, "step_tflops": 20.827678304588222, "step_time": 0.7550356140136718} +{"epoch": 0, "iter": 22263, "iter_tflops": 29.388578975263155, "iter_time": 0.5350935440063477, "loss": 0.16225963830947876, "lr": 3.92737537843067e-07, "seqlen": 6288.0, "step_tflops": 31.258999326215783, "step_time": 0.503075569152832} +{"epoch": 0, "iter": 22264, "iter_tflops": 28.69561230590349, "iter_time": 0.5480154495239258, "loss": 0.20797181129455566, "lr": 3.8209690820134145e-07, "seqlen": 6288.0, "step_tflops": 30.552425453470068, "step_time": 0.5147099990844727} +{"epoch": 0, "iter": 22265, "iter_tflops": 27.04314970923765, "iter_time": 0.7628953628540039, "loss": 0.39193689823150635, "lr": 3.716005513616494e-07, "seqlen": 8192.0, "step_tflops": 28.625127508108125, "step_time": 0.7207336807250976} +{"epoch": 0, "iter": 22266, "iter_tflops": 11.734777346150953, "iter_time": 1.758115463256836, "loss": 0.4710119962692261, "lr": 3.612485709187885e-07, "seqlen": 8192.0, "step_tflops": 16.48715142222507, "step_time": 1.2513437271118166} +{"epoch": 0, "iter": 22267, "iter_tflops": 41.72117984031161, "iter_time": 0.494499282836914, "loss": 0.4621184468269348, "lr": 3.5104106904263134e-07, "seqlen": 8192.0, "step_tflops": 45.18846611805415, "step_time": 0.45655662345886233} +{"epoch": 0, "iter": 22268, "iter_tflops": 41.454525238814725, "iter_time": 0.4976801300048828, "loss": 0.4727238714694977, "lr": 3.409781464770978e-07, "seqlen": 8192.0, "step_tflops": 44.478703239002655, "step_time": 0.4638420639038086} +{"epoch": 0, "iter": 22269, "iter_tflops": 25.054138069881095, "iter_time": 0.8234605178833008, "loss": 0.5759679675102234, "lr": 3.310599025391725e-07, "seqlen": 8192.0, "step_tflops": 26.29879018039012, "step_time": 0.7844883117675782} +{"epoch": 0, "iter": 22270, "iter_tflops": 15.544948077755748, "iter_time": 1.3271896057128907, "loss": 0.46665042638778687, "lr": 3.21286435117919e-07, "seqlen": 8192.0, "step_tflops": 17.388099747652618, "step_time": 1.1865065078735353} +{"epoch": 0, "iter": 22271, "iter_tflops": 36.246544345622205, "iter_time": 0.569187873840332, "loss": 0.5584512948989868, "lr": 3.1165784067351213e-07, "seqlen": 8192.0, "step_tflops": 39.43320728321256, "step_time": 0.5231908569335937} +{"epoch": 0, "iter": 22272, "iter_tflops": 36.37682435832163, "iter_time": 0.5671493835449218, "loss": 0.4408750832080841, "lr": 3.021742142362971e-07, "seqlen": 8192.0, "step_tflops": 39.75900763343727, "step_time": 0.5189036331176758} +{"epoch": 0, "iter": 22273, "iter_tflops": 33.65799763819853, "iter_time": 0.6129625930786132, "loss": 0.0507841631770134, "lr": 2.928356494058337e-07, "seqlen": 8192.0, "step_tflops": 37.39503998094619, "step_time": 0.5517066841125488} +{"epoch": 0, "iter": 22274, "iter_tflops": 33.94520436464514, "iter_time": 0.6077763824462891, "loss": 0.03781573474407196, "lr": 2.8364223834999034e-07, "seqlen": 8192.0, "step_tflops": 37.6843244606301, "step_time": 0.5474714965820312} +{"epoch": 0, "iter": 22275, "iter_tflops": 40.20732213198286, "iter_time": 0.5131178207397461, "loss": 0.030312255024909973, "lr": 2.74594071804018e-07, "seqlen": 8192.0, "step_tflops": 44.29197966633587, "step_time": 0.4657975025177002} +{"epoch": 0, "iter": 22276, "iter_tflops": 38.59991656628892, "iter_time": 0.5344854431152344, "loss": 0.0673123300075531, "lr": 2.6569123906967083e-07, "seqlen": 8192.0, "step_tflops": 42.441743490227786, "step_time": 0.48610381698608396} +{"epoch": 0, "iter": 22277, "iter_tflops": 15.974667557739277, "iter_time": 1.2914881286621094, "loss": 0.4863278865814209, "lr": 2.569338280143124e-07, "seqlen": 8192.0, "step_tflops": 16.921854546457116, "step_time": 1.2191981353759767} +{"epoch": 0, "iter": 22278, "iter_tflops": 18.62363753642189, "iter_time": 1.1077907562255858, "loss": 0.5645962357521057, "lr": 2.483219250700558e-07, "seqlen": 8192.0, "step_tflops": 21.93028842250253, "step_time": 0.9407579650878906} +{"epoch": 0, "iter": 22279, "iter_tflops": 41.632238516336194, "iter_time": 0.49555570983886715, "loss": 0.3287180960178375, "lr": 2.398556152329046e-07, "seqlen": 8192.0, "step_tflops": 44.63232751540514, "step_time": 0.4622455215454102} +{"epoch": 0, "iter": 22280, "iter_tflops": 41.051849223942725, "iter_time": 0.5025618553161622, "loss": 0.4777749180793762, "lr": 2.3153498206192004e-07, "seqlen": 8192.0, "step_tflops": 44.3413288415054, "step_time": 0.46527909851074223} +{"epoch": 0, "iter": 22281, "iter_tflops": 26.12493738210762, "iter_time": 0.6003727188110352, "loss": 0.011683205142617226, "lr": 2.23360107678392e-07, "seqlen": 6272.0, "step_tflops": 28.068242781188236, "step_time": 0.5588059005737305} +{"epoch": 0, "iter": 22282, "iter_tflops": 12.48540845328106, "iter_time": 1.256242416381836, "loss": 0.0045702108182013035, "lr": 2.1533107276502929e-07, "seqlen": 6272.0, "step_tflops": 15.701480078671986, "step_time": 0.9989312858581543} +{"epoch": 0, "iter": 22283, "iter_tflops": 29.910256165976456, "iter_time": 0.5243920211791993, "loss": 0.01407061330974102, "lr": 2.074479565651688e-07, "seqlen": 6272.0, "step_tflops": 33.35738236668111, "step_time": 0.470201753616333} +{"epoch": 0, "iter": 22284, "iter_tflops": 32.51386036474729, "iter_time": 0.4824004135131836, "loss": 0.005377633962780237, "lr": 1.9971083688197943e-07, "seqlen": 6272.0, "step_tflops": 35.93264753773083, "step_time": 0.4365027561187744} +{"epoch": 0, "iter": 22285, "iter_tflops": 16.681582082179702, "iter_time": 1.2367588043212892, "loss": 0.4503002464771271, "lr": 1.9211979007771274e-07, "seqlen": 8192.0, "step_tflops": 17.47421259713141, "step_time": 1.1806594085693358} +{"epoch": 0, "iter": 22286, "iter_tflops": 9.74582150461592, "iter_time": 2.1169168243408203, "loss": 0.5328940749168396, "lr": 1.8467489107293513e-07, "seqlen": 8192.0, "step_tflops": 12.863525436155408, "step_time": 1.6038444213867187} +{"epoch": 0, "iter": 22287, "iter_tflops": 11.991030315827487, "iter_time": 1.720543853759766, "loss": 0.47377675771713257, "lr": 1.7737621334579346e-07, "seqlen": 8192.0, "step_tflops": 15.389234401730052, "step_time": 1.3406185760498046} +{"epoch": 0, "iter": 22288, "iter_tflops": 36.87404913846325, "iter_time": 0.559501708984375, "loss": 0.36294031143188477, "lr": 1.7022382893129074e-07, "seqlen": 8192.0, "step_tflops": 40.18022550053441, "step_time": 0.5134638557434081} +{"epoch": 0, "iter": 22289, "iter_tflops": 20.483183596743995, "iter_time": 0.737778205871582, "loss": 0.1424904316663742, "lr": 1.632178084205732e-07, "seqlen": 6048.0, "step_tflops": 22.337483239953624, "step_time": 0.6765330848693848} +{"epoch": 0, "iter": 22290, "iter_tflops": 24.920144550622116, "iter_time": 0.6064188919067383, "loss": 0.1930394172668457, "lr": 1.56358220960236e-07, "seqlen": 6048.0, "step_tflops": 26.605581737423087, "step_time": 0.568002857208252} +{"epoch": 0, "iter": 22291, "iter_tflops": 27.034209608719426, "iter_time": 0.5589971618652343, "loss": 0.3738650977611542, "lr": 1.4964513425163694e-07, "seqlen": 6048.0, "step_tflops": 28.888960798488768, "step_time": 0.5231079978942871} +{"epoch": 0, "iter": 22292, "iter_tflops": 25.397363096017003, "iter_time": 0.5950242309570313, "loss": 0.19547127187252045, "lr": 1.430786145502322e-07, "seqlen": 6048.0, "step_tflops": 27.020659242769256, "step_time": 0.5592774887084961} +{"epoch": 0, "iter": 22293, "iter_tflops": 23.65100065908933, "iter_time": 0.6285952529907227, "loss": 0.01522032916545868, "lr": 1.3665872666491996e-07, "seqlen": 5952.0, "step_tflops": 25.44439948557939, "step_time": 0.5842899436950684} +{"epoch": 0, "iter": 22294, "iter_tflops": 6.574794233476468, "iter_time": 2.261197265625, "loss": 0.0023350790143013, "lr": 1.3038553395740284e-07, "seqlen": 5952.0, "step_tflops": 7.80880042157538, "step_time": 1.9038656311035154} +{"epoch": 0, "iter": 22295, "iter_tflops": 9.307995118574413, "iter_time": 1.5972190093994143, "loss": 0.006989387329667807, "lr": 1.2425909834155813e-07, "seqlen": 5952.0, "step_tflops": 11.11745963635995, "step_time": 1.337257541656494} +{"epoch": 0, "iter": 22296, "iter_tflops": 35.461645110907, "iter_time": 0.4192390594482422, "loss": 0.014442894607782364, "lr": 1.1827948028283353e-07, "seqlen": 5952.0, "step_tflops": 38.884023746307044, "step_time": 0.3823397197723389} +{"epoch": 0, "iter": 22297, "iter_tflops": 16.403791518214504, "iter_time": 0.8938636779785156, "loss": 0.2882445454597473, "lr": 1.1244673879764411e-07, "seqlen": 5872.0, "step_tflops": 17.24502686246956, "step_time": 0.8502598190307618} +{"epoch": 0, "iter": 22298, "iter_tflops": 7.96400647644555, "iter_time": 1.8411277618408204, "loss": 0.2903327941894531, "lr": 1.0676093145279619e-07, "seqlen": 5872.0, "step_tflops": 10.368873554298977, "step_time": 1.4141124725341796} +{"epoch": 0, "iter": 22299, "iter_tflops": 21.977527347111227, "iter_time": 0.667170295715332, "loss": 0.2575576901435852, "lr": 1.0122211436490946e-07, "seqlen": 5872.0, "step_tflops": 23.605596312939323, "step_time": 0.6211558151245118} +{"epoch": 0, "iter": 22300, "iter_tflops": 21.794049029736403, "iter_time": 0.6727870254516601, "loss": 0.25173521041870117, "lr": 9.583034219987408e-08, "seqlen": 5872.0, "step_tflops": 23.491913799469362, "step_time": 0.6241617240905761} +{"epoch": 0, "iter": 22301, "iter_tflops": 15.32841137618811, "iter_time": 1.3459381408691407, "loss": 0.7495439052581787, "lr": 9.058566817230606e-08, "seqlen": 8192.0, "step_tflops": 16.370268219407656, "step_time": 1.2602782821655274} +{"epoch": 0, "iter": 22302, "iter_tflops": 17.515641499291316, "iter_time": 1.1778668518066406, "loss": 0.5031072497367859, "lr": 8.548814404501615e-08, "seqlen": 8192.0, "step_tflops": 24.684968598154345, "step_time": 0.8357755622863771} +{"epoch": 0, "iter": 22303, "iter_tflops": 42.44074333050599, "iter_time": 0.4861152725219726, "loss": 0.6424927115440369, "lr": 8.053782012851507e-08, "seqlen": 8192.0, "step_tflops": 45.742407185899836, "step_time": 0.4510277175903321} +{"epoch": 0, "iter": 22304, "iter_tflops": 43.85327585077511, "iter_time": 0.47045729446411133, "loss": 0.558439314365387, "lr": 7.573474528049739e-08, "seqlen": 8192.0, "step_tflops": 47.32405814165935, "step_time": 0.4359535999298096} +{"epoch": 0, "iter": 22305, "iter_tflops": 28.189364795171517, "iter_time": 0.7318750762939452, "loss": 0.5733075737953186, "lr": 7.107896690537518e-08, "seqlen": 8192.0, "step_tflops": 29.984288263048025, "step_time": 0.6880634727478028} +{"epoch": 0, "iter": 22306, "iter_tflops": 12.847371975144508, "iter_time": 1.6058609924316407, "loss": 0.6014383435249329, "lr": 6.657053095380006e-08, "seqlen": 8192.0, "step_tflops": 16.3071544422113, "step_time": 1.2651559524536133} +{"epoch": 0, "iter": 22307, "iter_tflops": 43.72421577442034, "iter_time": 0.4718459358215332, "loss": 0.6711877584457397, "lr": 6.22094819222152e-08, "seqlen": 8192.0, "step_tflops": 47.60063091711018, "step_time": 0.43342058944702144} +{"epoch": 0, "iter": 22308, "iter_tflops": 42.008416842544115, "iter_time": 0.49111809158325204, "loss": 0.4569561183452606, "lr": 5.799586285241243e-08, "seqlen": 8192.0, "step_tflops": 45.03285062851521, "step_time": 0.4581343002319336} +{"epoch": 0, "iter": 22309, "iter_tflops": 27.90505149889365, "iter_time": 0.7393318557739257, "loss": 0.12730728089809418, "lr": 5.3929715331114125e-08, "seqlen": 8192.0, "step_tflops": 29.616690283549868, "step_time": 0.6966036148071288} +{"epoch": 0, "iter": 22310, "iter_tflops": 18.2973358339235, "iter_time": 1.1275463104248047, "loss": 0.14368225634098053, "lr": 5.001107948955197e-08, "seqlen": 8192.0, "step_tflops": 22.11547940216383, "step_time": 0.9328802299499513} +{"epoch": 0, "iter": 22311, "iter_tflops": 50.49090823129946, "iter_time": 0.40861006927490234, "loss": 0.21067717671394348, "lr": 4.623999400308054e-08, "seqlen": 8192.0, "step_tflops": 54.80442049662517, "step_time": 0.37644944190979013} +{"epoch": 0, "iter": 22312, "iter_tflops": 49.30867268617648, "iter_time": 0.41840699386596686, "loss": 0.2100938856601715, "lr": 4.261649609079099e-08, "seqlen": 8192.0, "step_tflops": 53.46566247861588, "step_time": 0.3858755798339844} +{"epoch": 0, "iter": 22313, "iter_tflops": 24.336158969019493, "iter_time": 0.8477547149658202, "loss": 0.5421793460845947, "lr": 3.9140621515144634e-08, "seqlen": 8192.0, "step_tflops": 25.672863823718234, "step_time": 0.8036148071289064} +{"epoch": 0, "iter": 22314, "iter_tflops": 12.721268701609599, "iter_time": 1.6217795562744142, "loss": 0.4467580020427704, "lr": 3.5812404581621605e-08, "seqlen": 8192.0, "step_tflops": 14.62122499751846, "step_time": 1.4110372772216797} +{"epoch": 0, "iter": 22315, "iter_tflops": 31.85403416280849, "iter_time": 0.6476760025024415, "loss": 0.6356995105743408, "lr": 3.263187813837776e-08, "seqlen": 8192.0, "step_tflops": 34.6493121799087, "step_time": 0.5954257736206056} +{"epoch": 0, "iter": 22316, "iter_tflops": 34.95379188011341, "iter_time": 0.5902390670776366, "loss": 0.42365336418151855, "lr": 2.9599073575926615e-08, "seqlen": 8192.0, "step_tflops": 37.96800837619459, "step_time": 0.5433809776306153} +{"epoch": 0, "iter": 22317, "iter_tflops": 22.517605782208438, "iter_time": 0.9162205657958984, "loss": 0.3977116346359253, "lr": 2.671402082682295e-08, "seqlen": 8192.0, "step_tflops": 23.873751573280565, "step_time": 0.8641747589111327} +{"epoch": 0, "iter": 22318, "iter_tflops": 23.878815346840504, "iter_time": 0.8639915008544922, "loss": 0.340617299079895, "lr": 2.397674836537467e-08, "seqlen": 8192.0, "step_tflops": 29.392810933951356, "step_time": 0.7019095096588135} +{"epoch": 0, "iter": 22319, "iter_tflops": 43.43788025691349, "iter_time": 0.4749562683105469, "loss": 0.5599191188812256, "lr": 2.1387283207354723e-08, "seqlen": 8192.0, "step_tflops": 46.675790066836335, "step_time": 0.4420084476470947} +{"epoch": 0, "iter": 22320, "iter_tflops": 42.668289321828375, "iter_time": 0.48352286529541016, "loss": 0.48126697540283203, "lr": 1.894565090973799e-08, "seqlen": 8192.0, "step_tflops": 45.78063534632064, "step_time": 0.45065109634399414} +{"epoch": 0, "iter": 22321, "iter_tflops": 38.20492607168306, "iter_time": 0.540011344909668, "loss": 0.4531111419200897, "lr": 1.6651875570451447e-08, "seqlen": 8192.0, "step_tflops": 41.46982755294391, "step_time": 0.49749648666381835} +{"epoch": 0, "iter": 22322, "iter_tflops": 35.547158466066534, "iter_time": 0.5803865737915039, "loss": 0.40231525897979736, "lr": 1.4505979828129401e-08, "seqlen": 8192.0, "step_tflops": 38.6468555973251, "step_time": 0.5338362770080567} +{"epoch": 0, "iter": 22323, "iter_tflops": 38.95637905919001, "iter_time": 0.5295947418212891, "loss": 0.5021253824234009, "lr": 1.2507984861898635e-08, "seqlen": 8192.0, "step_tflops": 42.10308690174544, "step_time": 0.4900137977600098} +{"epoch": 0, "iter": 22324, "iter_tflops": 35.45551631676978, "iter_time": 0.581886703491211, "loss": 0.524980902671814, "lr": 1.0657910391161929e-08, "seqlen": 8192.0, "step_tflops": 38.69854967501649, "step_time": 0.5331231708526611} +{"epoch": 0, "iter": 22325, "iter_tflops": 33.87875116321824, "iter_time": 0.608968536376953, "loss": 0.017217472195625305, "lr": 8.955774675406536e-09, "seqlen": 8192.0, "step_tflops": 37.64313157165364, "step_time": 0.5480705947875977} +{"epoch": 0, "iter": 22326, "iter_tflops": 42.03348962524035, "iter_time": 0.4908251419067383, "loss": 0.03901355713605881, "lr": 7.401594514026e-09, "seqlen": 8192.0, "step_tflops": 47.02791069770091, "step_time": 0.438698917388916} +{"epoch": 0, "iter": 22327, "iter_tflops": 47.869407749825285, "iter_time": 0.43098702239990233, "loss": 0.024179600179195404, "lr": 5.995385246151952e-09, "seqlen": 8192.0, "step_tflops": 52.76085793489019, "step_time": 0.39103028869628914} +{"epoch": 0, "iter": 22328, "iter_tflops": 44.066265231908226, "iter_time": 0.4681833915710449, "loss": 0.01609279029071331, "lr": 4.737160750500902e-09, "seqlen": 8192.0, "step_tflops": 48.612570978497956, "step_time": 0.42439832115173337} +{"epoch": 0, "iter": 22329, "iter_tflops": 46.309562207771556, "iter_time": 0.445503963470459, "loss": 0.012725258246064186, "lr": 3.6269334452393442e-09, "seqlen": 8192.0, "step_tflops": 51.41180946775228, "step_time": 0.4012909431457519} +{"epoch": 0, "iter": 22330, "iter_tflops": 42.293277652979626, "iter_time": 0.487810230255127, "loss": 0.024859093129634857, "lr": 2.664714287865522e-09, "seqlen": 8192.0, "step_tflops": 46.75542125736434, "step_time": 0.4412556438446045} +{"epoch": 0, "iter": 22331, "iter_tflops": 46.47072207091074, "iter_time": 0.4439589614868164, "loss": 0.011620902456343174, "lr": 1.8505127750911844e-09, "seqlen": 8192.0, "step_tflops": 51.378682392857144, "step_time": 0.40154968070983893} +{"epoch": 0, "iter": 22332, "iter_tflops": 46.091552983879566, "iter_time": 0.4476111602783203, "loss": 0.017819594591856003, "lr": 1.184336942758324e-09, "seqlen": 8192.0, "step_tflops": 50.80912758644728, "step_time": 0.40605093002319337} diff --git a/projects/arctic_embed/examples/finetune_models/wandb/latest-run b/projects/arctic_embed/examples/finetune_models/wandb/latest-run new file mode 120000 index 00000000..d0be3038 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/latest-run @@ -0,0 +1 @@ +run-20251029_160201-xaj5tmfp \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/config.yaml b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/config.yaml new file mode 100644 index 00000000..262a39ea --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/config.yaml @@ -0,0 +1,234 @@ +wandb_version: 1 + +type: + desc: null + value: biencoder +code: + desc: null + value: train.py +skip_validation: + desc: null + value: false +model: + desc: null + value: + type: biencoder + name_or_path: /scratch/local + dtype: torch.float32 + save_name: null + attn_implementation: sdpa + disable_activation_checkpoint: true + peft_config: null + pooling: first_token + kwargs: + trust_remote_code: true +tokenizer: + desc: null + value: + type: fake + name_or_path: /scratch/local +data: + desc: null + value: + type: contrastive_pretokenized + sources: [] + eval_sources: [] + train_eval_split: + - 1.0 + - 0.0 + max_length: 8192 + num_proc: 16 + dl_num_workers: 2 + seed: 42 + use_data_cache: null + cache_processed_data: null + cache_dir: /tmp + cache_fs_type: shared + filesystem: s3 + root_directory: s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_h2h/batched_512/data/ + split_factor: 4 + pad_value: 0 + left_pad: false + max_seq_length_query: 1024 + max_seq_length_doc: 1024 + eval_root_directories: null + eval_split_factor: 1 + eval_max_seq_length_query: null + eval_max_seq_length_doc: null + preserve_relations_on_split: true +logger: + desc: null + value: + output_dir: /dev/null + level: INFO + print_output_ranks: + - 0 + file_output_ranks: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 +wandb: + desc: null + value: + enable: true + entity: null + project: arctic-embed + name: arctic2m-synthetic-experiment-1-unified-h2h-20251029T011519Z +scheduler: + desc: null + value: + type: wds + learning_rate: null +optimizer: + desc: null + value: + type: fusedadam + weight_decay: 0.01 + betas: + - 0.9 + - 0.999 + learning_rate: 3.0e-05 +deepspeed: + desc: null + value: + gradient_clipping: 10.0 + zero_optimization: + stage: 1.0 + communication_data_type: fp32 + train_micro_batch_size_per_gpu: 1 + train_batch_size: 8.0 + gradient_accumulation_steps: 1 + sequence_parallel_size: 1 + steps_per_print: 10 + prescale_gradients: false + wall_clock_breakdown: false +epochs: + desc: null + value: 1 +loss_log_interval: + desc: null + value: 0 +train_log_iter_interval: + desc: null + value: 1 +train_log_metrics_path: + desc: null + value: train-log-metrics.jsonl +gradient_accumulation_steps: + desc: null + value: 1 +micro_batch_size: + desc: null + value: 1 +sequence_parallel_size: + desc: null + value: 1 +activation_checkpoint_cpu_offload: + desc: null + value: false +tiled_mlp_compute: + desc: null + value: false +seed: + desc: null + value: 42 +checkpoint: + desc: null + value: + - type: biencoder_s3 + output_dir: /scratch/checkpoints-temp + enabled: true + auto_resume: false + save_every_n_steps: 3000 + save_every_n_epochs: 0 + save_end_of_training: true +train_iters: + desc: null + value: 0 +eval_frequency: + desc: null + value: 0 +exit_iteration: + desc: null + value: 0 +min_iterations: + desc: null + value: 0 +overfit_first_batch: + desc: null + value: false +mem_profiler: + desc: null + value: null +mem_profiler_dir: + desc: null + value: /dev/null/mem-prof +mem_profiler_max_entries: + desc: null + value: 100000 +kill_switch_path: + desc: null + value: /tmp/at_kill_switch +use_in_batch_negatives: + desc: null + value: false +loss_temperature: + desc: null + value: 0.02 +mrl_dim: + desc: null + value: null +eval_interval: + desc: null + value: null +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.6 + framework: xgboost + huggingface_version: 4.53.0 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1761700565.0 + t: + 1: + - 1 + - 5 + - 6 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + 2: + - 1 + - 5 + - 6 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + 3: + - 2 + - 13 + - 16 + - 23 + - 24 + - 61 + 4: 3.10.12 + 5: 0.16.6 + 6: 4.53.0 + 8: + - 5 + 13: linux-x86_64 diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/requirements.txt b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/requirements.txt new file mode 100644 index 00000000..a2f41cd1 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/requirements.txt @@ -0,0 +1,400 @@ +Babel==2.14.0 +Cython==3.0.10 +DataProperty==1.0.1 +GitPython==3.1.43 +Jinja2==3.1.2 +Markdown==3.6 +MarkupSafe==2.1.3 +PyJWT==2.8.0 +PyYAML==6.0.1 +Pygments==2.17.2 +Send2Trash==1.8.2 +Werkzeug==3.0.2 +absl-py==2.1.0 +accelerate==0.33.0 +aioboto3==12.3.0 +aiobotocore==2.11.2 +aiohttp==3.8.6 +aioitertools==0.11.0 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.3.0 +apex==0.1 +appdirs==1.4.4 +arctic-embed==0.1.0 +arctic-embed==0.1.0 +arctic_training==0.0.6.dev0 +argcomplete==3.6.2 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +arrow==1.3.0 +asciitree==0.3.3 +asn1crypto==1.5.1 +asttokens==2.4.1 +astunparse==1.6.3 +async-lru==2.0.4 +async-timeout==4.0.3 +attrs==23.1.0 +audioread==3.0.1 +awscli==1.32.34 +beautifulsoup4==4.12.3 +best-download==0.1.2 +bleach==6.1.0 +blis==0.7.11 +boto3==1.34.34 +botocore==1.34.34 +cachetools==5.3.3 +catalogue==2.0.10 +certifi==2023.7.22 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==2.2.1 +cmake==3.29.3 +colorama==0.4.4 +comm==0.2.2 +confection==0.1.5 +contourpy==1.2.1 +cramjam==2.8.3 +cryptography==42.0.5 +cuda-python==12.5.0 +cudf==24.4.0 +cugraph-dgl==24.4.0 +cugraph-equivariant==24.4.0 +cugraph-pyg==24.4.0 +cugraph-service-client==24.4.0 +cugraph-service-server==24.4.0 +cugraph==24.4.0 +cuml==24.4.0 +cupy-cuda12x==13.0.0 +cycler==0.12.1 +cymem==2.0.8 +dask-cuda==24.4.0 +dask-cudf==24.4.0 +dask-expr==0.4.0 +dask==2024.1.1 +datasets==2.21.0 +debugpy==1.8.1 +decorator==5.1.1 +deepspeed==0.15.1 +deepspeed==0.17.1 +defusedxml==0.7.1 +devtools==0.12.2 +dill==0.3.7 +distributed==2024.1.1 +distro==1.9.0 +dm-tree==0.1.8 +dnspython==2.6.1 +docker-pycreds==0.4.0 +docutils==0.16 +einops==0.8.0 +entrypoints==0.4 +evaluate==0.4.3 +exceptiongroup==1.2.0 +execnet==2.1.1 +executing==2.0.1 +expecttest==0.1.3 +fastcore==1.5.29 +fasteners==0.19 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.6.3 +fonttools==4.51.0 +fqdn==1.5.1 +frozenlist==1.4.0 +fsspec==2024.2.0 +gast==0.5.4 +ghapi==1.0.5 +gitdb==4.0.11 +google-auth-oauthlib==0.4.6 +google-auth==2.29.0 +grpcio==1.62.1 +h11==0.14.0 +hf-xet==1.1.5 +hf_transfer==0.1.8 +hjson==3.1.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.26.2 +huggingface-hub==0.33.1 +hypothesis==5.35.1 +idna==3.4 +igraph==0.11.5 +importlib_metadata==7.1.0 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipykernel==6.29.4 +ipython-genutils==0.2.0 +ipython==8.23.0 +ipywidgets==8.1.2 +isoduration==20.11.0 +jedi==0.19.1 +jiter==0.10.0 +jmespath==1.0.1 +joblib==1.4.2 +json5==0.9.24 +jsonlines==4.0.0 +jsonpointer==2.4 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-events==0.10.0 +jupyter-lsp==2.2.4 +jupyter-tensorboard==0.2.0 +jupyter_client==8.6.1 +jupyter_core==5.7.2 +jupyter_server==2.13.0 +jupyter_server_terminals==0.5.3 +jupyterlab==4.1.5 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.25.4 +jupyterlab_widgets==3.0.10 +jupytext==1.16.2 +kiwisolver==1.4.5 +kvikio==24.4.0 +langcodes==3.4.0 +language_data==1.2.0 +lazy_loader==0.4 +librosa==0.10.1 +liger-kernel==0.2.1 +lightning-thunder==0.2.0.dev0 +lightning-utilities==0.11.2 +lintrunner==0.12.5 +llvmlite==0.42.0 +lm_eval==0.4.5 +locket==1.0.0 +loguru==0.7.3 +looseversion==1.3.0 +lxml==5.3.0 +marisa-trie==1.1.1 +markdown-it-py==3.0.0 +matplotlib-inline==0.1.6 +matplotlib==3.8.4 +mbstrdecoder==1.1.3 +mdit-py-plugins==0.4.1 +mdurl==0.1.2 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==10.5.0 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.4 +multiprocess==0.70.15 +mup==1.0.0 +murmurhash==1.0.10 +nbclient==0.10.0 +nbconvert==7.16.3 +nbformat==5.10.4 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +nltk==3.9.1 +notebook==6.4.10 +notebook_shim==0.2.4 +numba==0.59.1 +numcodecs==0.11.0 +numexpr==2.10.1 +numpy==1.26.2 +nvfuser==0.2.3a0+f73ff1b +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cudnn-frontend==1.3.0 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-dali-cuda120==1.38.0 +nvidia-modelopt==0.11.2 +nvidia-nccl-cu12==2.20.5 +nvidia-nvimgcodec-cu12==0.2.0.7 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-nvtx-cu12==12.1.105 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +nx-cugraph==24.4.0 +oauthlib==3.2.2 +onnx==1.16.0 +openai==1.92.2 +opencv==4.7.0 +opt-einsum==3.3.0 +optree==0.11.0 +overrides==7.7.0 +packaging==23.2 +pandas==2.1.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.2 +pathvalidate==3.2.1 +peft==0.12.0 +pexpect==4.9.0 +pillow==10.3.0 +pip==24.0 +platformdirs==3.11.0 +pluggy==1.5.0 +ply==3.11 +polygraphy==0.49.11 +pooch==1.8.1 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.10.0 +prometheus_client==0.20.0 +prompt-toolkit==3.0.43 +protobuf==3.20.3 +pssh==2.3.4 +psutil==5.9.6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +py-cpuinfo==9.0.0 +py==1.11.0 +pyOpenSSL==24.1.0 +pyarrow==18.0.0 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pybind11==2.12.0 +pybind11_global==2.12.0 +pycocotools==2.0+nv0.8.0 +pycparser==2.22 +pydantic==2.11.7 +pydantic==2.9.1 +pydantic_core==2.23.3 +pydantic_core==2.33.2 +pylibcugraph==24.4.0 +pylibcugraphops==24.4.0 +pylibraft==24.4.0 +pylibwholegraph==24.4.0 +pynvjitlink==0.2.3 +pynvml==11.4.1 +pyparsing==3.1.2 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==14.0 +pytest-shard==0.1.2 +pytest-xdist==3.6.1 +pytest==8.1.1 +python-dateutil==2.8.2 +python-hostlist==1.23.0 +python-json-logger==2.0.7 +pytorch-quantization==2.1.2 +pytorch-triton==3.0.0+989adb9a2 +pytz==2023.3.post1 +pyzmq==25.1.2 +raft-dask==24.4.0 +rapids-dask-dependency==24.4.0a0 +referencing==0.34.0 +regex==2023.12.25 +remote-pdb==2.1.0 +requests-oauthlib==2.0.0 +requests==2.32.3 +retry==0.9.2 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rich==13.7.1 +rmm==24.4.0 +rouge-score==0.1.2 +rpds-py==0.18.0 +rsa==4.7.2 +s3fs==2024.2.0 +s3transfer==0.10.1 +sacrebleu==2.4.3 +safetensors==0.4.2 +safetensors==0.5.3 +scikit-learn==1.5.2 +scipy==1.14.1 +seaborn==0.13.2 +sentencepiece==0.2.0 +sentry-sdk==1.44.1 +setproctitle==1.3.3 +setuptools==69.2.0 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +snowflake-connector-python==3.7.1 +snowflake-snowpark-python==1.14.0 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.4 +sphinx_glpi_theme==0.6 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +supervisor==4.2.1 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.12.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==10.1.0 +terminado==0.18.1 +texttable==1.7.0 +thinc==8.2.3 +threadpoolctl==3.5.0 +thriftpy2==0.5.0 +tiktoken==0.6.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +tokenizers==0.21.2 +tomli==2.0.1 +tomlkit==0.12.4 +toolz==0.12.1 +torch-tensorrt==2.4.0a0 +torch==2.4.0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.6 +traitlets==5.14.2 +transformers==4.44.2 +transformers==4.53.0 +treelite==4.1.2 +triton==3.0.0 +typepy==1.3.2 +typer==0.9.4 +types-dataclasses==0.6.6 +types-python-dateutil==2.9.0.20240316 +typing-inspection==0.4.1 +typing_extensions==4.14.0 +typing_extensions==4.8.0 +tzdata==2023.3 +ucx-py==0.37.0 +uri-template==1.3.0 +urllib3==1.26.18 +wandb==0.16.6 +wasabi==1.1.3 +wcwidth==0.2.13 +weasel==0.3.4 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.7.0 +wheel==0.43.0 +widgetsnbextension==4.0.10 +word2number==1.1 +wrapt==1.16.0 +xdoctest==1.0.2 +xformers==0.0.27.post2 +xgboost==2.0.3 +xmltodict==0.14.2 +xxhash==3.4.1 +yarl==1.9.2 +yq==3.4.3 +zarr==2.18.2 +zict==3.0.0 +zipp==3.19.0 +zstandard==0.23.0 +zstd==1.5.0.2 \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-metadata.json b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-metadata.json new file mode 100644 index 00000000..c3f3305b --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-metadata.json @@ -0,0 +1,1031 @@ +{ + "os": "Linux-6.1.141-155.222.amzn2023.x86_64-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2025-10-29T01:16:05.633686", + "startedAt": "2025-10-29T01:16:05.056256", + "docker": null, + "cuda": null, + "args": [ + "--local_rank=0" + ], + "state": "running", + "program": "/scratch/ArcticTraining/projects/arctic_embed/examples/finetune_models/finetune_arctic2_h2h.py", + "codePathLocal": "finetune_arctic2_h2h.py", + "host": "gpu-notebook-qzeng-worker-0", + "username": "yak", + "executable": "/usr/bin/python", + "cpu_count": 96, + "cpu_count_logical": 192, + "cpu_freq": { + "current": 3031.646588541666, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 3101.663, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3127.042, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3100.704, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.349, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3070.518, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3111.076, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.031, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3058.986, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3031.413, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3020.5, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3081.239, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.022, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3125.208, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3068.074, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3059.958, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3085.781, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3007.174, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2978.488, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3079.89, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3074.605, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3058.774, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3071.684, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3103.003, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3100.253, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3116.711, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.594, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3026.99, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3121.57, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3056.345, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3019.204, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3084.641, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3209.012, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3097.09, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3058.787, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3214.63, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3080.27, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3040.76, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3055.342, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3092.546, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3074.325, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3085.873, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3041.479, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3041.48, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2945.461, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3081.481, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3048.297, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2996.298, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3046.374, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3089.511, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2990.39, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3071.825, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2992.229, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3052.623, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3087.397, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3045.694, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3043.525, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2992.37, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3174.467, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3143.467, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2976.757, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3193.953, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3042.962, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3111.321, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3138.897, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3018.467, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2970.476, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3086.902, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2983.557, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3192.706, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3080.851, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2973.487, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3073.031, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3034.547, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2991.089, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3073.232, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3005.231, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.202, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3013.164, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2978.705, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.509, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3054.185, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3047.763, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2986.629, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2977.872, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3054.96, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3042.664, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2978.316, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3077.22, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3121.9, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.215, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3129.016, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3179.042, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3109.767, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3091.82, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3080.632, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.896, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3108.509, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3122.565, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3106.552, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3090.263, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3138.334, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3141.257, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3048.275, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3060.252, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3099.865, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.142, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.955, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3041.767, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3078.509, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3108.687, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3110.108, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3059.727, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3063.919, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3094.523, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3094.781, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3050.449, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3042.719, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3140.615, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3055.191, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3045.255, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.996, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3071.148, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3107.759, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.009, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3093.023, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3093.468, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3021.958, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3144.719, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3053.366, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3092.866, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.542, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3084.892, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3085.843, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3094.234, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3053.56, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3086.337, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3178.86, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3193.438, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3173.774, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3060.619, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3028.682, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3119.112, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2941.194, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2978.407, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2977.26, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2996.202, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3170.316, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3017.919, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3176.473, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3151.81, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3021.875, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3198.195, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2968.386, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3058.971, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2960.029, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2940.845, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3068.308, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3096.916, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3136.109, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3012.399, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3038.644, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3039.707, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2989.194, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2997.91, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3110.545, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3110.183, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2961.886, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3054.227, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3050.422, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3028.68, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2993.888, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2991.755, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3138.877, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3020.508, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3033.442, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2944.742, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.012, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3021.342, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2974.411, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2984.775, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 511.9257621765137, + "used": 167.7535629272461 + } + }, + "gpu": "NVIDIA H200", + "gpu_count": 8, + "gpu_devices": [ + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + } + ], + "memory": { + "total": 1999.9604759216309 + } +} diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-summary.json b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-summary.json new file mode 100644 index 00000000..b444c43c --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/examples_query": 2858240, "train/examples_doc": 40071792, "train/batch_size_query": 128, "train/batch_size_doc": 3960, "train/loss_no_truncate": 0.17381365597248077, "_timestamp": 1761718598.1299222, "_runtime": 18033.066536188126, "_step": 22332, "train/gradient_norm": 2.634305715560913, "epoch": 0, "lr": 1.184336942758324e-09, "loss": 0.17381365597248077, "iter_time": 0.6387118072509766, "iter_tflops": 24.813201487393947, "seqlen": 6336.0, "step_time": 0.5951682205200195, "step_tflops": 26.628580322801223, "_wandb": {"runtime": 18037}} \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/run-efu52znd.wandb b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/run-efu52znd.wandb new file mode 100644 index 00000000..0e8608a5 Binary files /dev/null and b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_011605-efu52znd/run-efu52znd.wandb differ diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/config.yaml b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/config.yaml new file mode 100644 index 00000000..e1f4c35f --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/config.yaml @@ -0,0 +1,234 @@ +wandb_version: 1 + +type: + desc: null + value: biencoder +code: + desc: null + value: train.py +skip_validation: + desc: null + value: false +model: + desc: null + value: + type: biencoder + name_or_path: /scratch/local + dtype: torch.float32 + save_name: null + attn_implementation: sdpa + disable_activation_checkpoint: true + peft_config: null + pooling: first_token + kwargs: + trust_remote_code: true +tokenizer: + desc: null + value: + type: fake + name_or_path: /scratch/local +data: + desc: null + value: + type: contrastive_pretokenized + sources: [] + eval_sources: [] + train_eval_split: + - 1.0 + - 0.0 + max_length: 8192 + num_proc: 16 + dl_num_workers: 2 + seed: 42 + use_data_cache: null + cache_processed_data: null + cache_dir: /tmp + cache_fs_type: shared + filesystem: s3 + root_directory: s3://ml-dev-sfc-or-dev-misc1-k8s/cortexsearch/biencoder/fine_tune_data_arctic_training_format/synthetic_experiment_1/unified_synthetic/batched_512/data/ + split_factor: 4 + pad_value: 0 + left_pad: false + max_seq_length_query: 1024 + max_seq_length_doc: 1024 + eval_root_directories: null + eval_split_factor: 1 + eval_max_seq_length_query: null + eval_max_seq_length_doc: null + preserve_relations_on_split: true +logger: + desc: null + value: + output_dir: /dev/null + level: INFO + print_output_ranks: + - 0 + file_output_ranks: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 +wandb: + desc: null + value: + enable: true + entity: null + project: arctic-embed + name: arctic2m-synthetic-experiment-1-unified-synthetic-20251029T160040Z +scheduler: + desc: null + value: + type: wds + learning_rate: null +optimizer: + desc: null + value: + type: fusedadam + weight_decay: 0.01 + betas: + - 0.9 + - 0.999 + learning_rate: 3.0e-05 +deepspeed: + desc: null + value: + gradient_clipping: 10.0 + zero_optimization: + stage: 1.0 + communication_data_type: fp32 + train_micro_batch_size_per_gpu: 1 + train_batch_size: 8.0 + gradient_accumulation_steps: 1 + sequence_parallel_size: 1 + steps_per_print: 10 + prescale_gradients: false + wall_clock_breakdown: false +epochs: + desc: null + value: 1 +loss_log_interval: + desc: null + value: 0 +train_log_iter_interval: + desc: null + value: 1 +train_log_metrics_path: + desc: null + value: train-log-metrics.jsonl +gradient_accumulation_steps: + desc: null + value: 1 +micro_batch_size: + desc: null + value: 1 +sequence_parallel_size: + desc: null + value: 1 +activation_checkpoint_cpu_offload: + desc: null + value: false +tiled_mlp_compute: + desc: null + value: false +seed: + desc: null + value: 42 +checkpoint: + desc: null + value: + - type: biencoder_s3 + output_dir: /scratch/checkpoints-temp-synthetic + enabled: true + auto_resume: false + save_every_n_steps: 3000 + save_every_n_epochs: 0 + save_end_of_training: true +train_iters: + desc: null + value: 0 +eval_frequency: + desc: null + value: 0 +exit_iteration: + desc: null + value: 0 +min_iterations: + desc: null + value: 0 +overfit_first_batch: + desc: null + value: false +mem_profiler: + desc: null + value: null +mem_profiler_dir: + desc: null + value: /dev/null/mem-prof +mem_profiler_max_entries: + desc: null + value: 100000 +kill_switch_path: + desc: null + value: /tmp/at_kill_switch +use_in_batch_negatives: + desc: null + value: false +loss_temperature: + desc: null + value: 0.02 +mrl_dim: + desc: null + value: null +eval_interval: + desc: null + value: null +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.6 + framework: xgboost + huggingface_version: 4.53.0 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1761753721.0 + t: + 1: + - 1 + - 5 + - 6 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + 2: + - 1 + - 5 + - 6 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + 3: + - 2 + - 13 + - 16 + - 23 + - 24 + - 61 + 4: 3.10.12 + 5: 0.16.6 + 6: 4.53.0 + 8: + - 5 + 13: linux-x86_64 diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/requirements.txt b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/requirements.txt new file mode 100644 index 00000000..a2f41cd1 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/requirements.txt @@ -0,0 +1,400 @@ +Babel==2.14.0 +Cython==3.0.10 +DataProperty==1.0.1 +GitPython==3.1.43 +Jinja2==3.1.2 +Markdown==3.6 +MarkupSafe==2.1.3 +PyJWT==2.8.0 +PyYAML==6.0.1 +Pygments==2.17.2 +Send2Trash==1.8.2 +Werkzeug==3.0.2 +absl-py==2.1.0 +accelerate==0.33.0 +aioboto3==12.3.0 +aiobotocore==2.11.2 +aiohttp==3.8.6 +aioitertools==0.11.0 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.3.0 +apex==0.1 +appdirs==1.4.4 +arctic-embed==0.1.0 +arctic-embed==0.1.0 +arctic_training==0.0.6.dev0 +argcomplete==3.6.2 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +arrow==1.3.0 +asciitree==0.3.3 +asn1crypto==1.5.1 +asttokens==2.4.1 +astunparse==1.6.3 +async-lru==2.0.4 +async-timeout==4.0.3 +attrs==23.1.0 +audioread==3.0.1 +awscli==1.32.34 +beautifulsoup4==4.12.3 +best-download==0.1.2 +bleach==6.1.0 +blis==0.7.11 +boto3==1.34.34 +botocore==1.34.34 +cachetools==5.3.3 +catalogue==2.0.10 +certifi==2023.7.22 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==2.2.1 +cmake==3.29.3 +colorama==0.4.4 +comm==0.2.2 +confection==0.1.5 +contourpy==1.2.1 +cramjam==2.8.3 +cryptography==42.0.5 +cuda-python==12.5.0 +cudf==24.4.0 +cugraph-dgl==24.4.0 +cugraph-equivariant==24.4.0 +cugraph-pyg==24.4.0 +cugraph-service-client==24.4.0 +cugraph-service-server==24.4.0 +cugraph==24.4.0 +cuml==24.4.0 +cupy-cuda12x==13.0.0 +cycler==0.12.1 +cymem==2.0.8 +dask-cuda==24.4.0 +dask-cudf==24.4.0 +dask-expr==0.4.0 +dask==2024.1.1 +datasets==2.21.0 +debugpy==1.8.1 +decorator==5.1.1 +deepspeed==0.15.1 +deepspeed==0.17.1 +defusedxml==0.7.1 +devtools==0.12.2 +dill==0.3.7 +distributed==2024.1.1 +distro==1.9.0 +dm-tree==0.1.8 +dnspython==2.6.1 +docker-pycreds==0.4.0 +docutils==0.16 +einops==0.8.0 +entrypoints==0.4 +evaluate==0.4.3 +exceptiongroup==1.2.0 +execnet==2.1.1 +executing==2.0.1 +expecttest==0.1.3 +fastcore==1.5.29 +fasteners==0.19 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.6.3 +fonttools==4.51.0 +fqdn==1.5.1 +frozenlist==1.4.0 +fsspec==2024.2.0 +gast==0.5.4 +ghapi==1.0.5 +gitdb==4.0.11 +google-auth-oauthlib==0.4.6 +google-auth==2.29.0 +grpcio==1.62.1 +h11==0.14.0 +hf-xet==1.1.5 +hf_transfer==0.1.8 +hjson==3.1.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.26.2 +huggingface-hub==0.33.1 +hypothesis==5.35.1 +idna==3.4 +igraph==0.11.5 +importlib_metadata==7.1.0 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipykernel==6.29.4 +ipython-genutils==0.2.0 +ipython==8.23.0 +ipywidgets==8.1.2 +isoduration==20.11.0 +jedi==0.19.1 +jiter==0.10.0 +jmespath==1.0.1 +joblib==1.4.2 +json5==0.9.24 +jsonlines==4.0.0 +jsonpointer==2.4 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-events==0.10.0 +jupyter-lsp==2.2.4 +jupyter-tensorboard==0.2.0 +jupyter_client==8.6.1 +jupyter_core==5.7.2 +jupyter_server==2.13.0 +jupyter_server_terminals==0.5.3 +jupyterlab==4.1.5 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.25.4 +jupyterlab_widgets==3.0.10 +jupytext==1.16.2 +kiwisolver==1.4.5 +kvikio==24.4.0 +langcodes==3.4.0 +language_data==1.2.0 +lazy_loader==0.4 +librosa==0.10.1 +liger-kernel==0.2.1 +lightning-thunder==0.2.0.dev0 +lightning-utilities==0.11.2 +lintrunner==0.12.5 +llvmlite==0.42.0 +lm_eval==0.4.5 +locket==1.0.0 +loguru==0.7.3 +looseversion==1.3.0 +lxml==5.3.0 +marisa-trie==1.1.1 +markdown-it-py==3.0.0 +matplotlib-inline==0.1.6 +matplotlib==3.8.4 +mbstrdecoder==1.1.3 +mdit-py-plugins==0.4.1 +mdurl==0.1.2 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==10.5.0 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.4 +multiprocess==0.70.15 +mup==1.0.0 +murmurhash==1.0.10 +nbclient==0.10.0 +nbconvert==7.16.3 +nbformat==5.10.4 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +nltk==3.9.1 +notebook==6.4.10 +notebook_shim==0.2.4 +numba==0.59.1 +numcodecs==0.11.0 +numexpr==2.10.1 +numpy==1.26.2 +nvfuser==0.2.3a0+f73ff1b +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cudnn-frontend==1.3.0 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-dali-cuda120==1.38.0 +nvidia-modelopt==0.11.2 +nvidia-nccl-cu12==2.20.5 +nvidia-nvimgcodec-cu12==0.2.0.7 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-nvtx-cu12==12.1.105 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +nx-cugraph==24.4.0 +oauthlib==3.2.2 +onnx==1.16.0 +openai==1.92.2 +opencv==4.7.0 +opt-einsum==3.3.0 +optree==0.11.0 +overrides==7.7.0 +packaging==23.2 +pandas==2.1.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.2 +pathvalidate==3.2.1 +peft==0.12.0 +pexpect==4.9.0 +pillow==10.3.0 +pip==24.0 +platformdirs==3.11.0 +pluggy==1.5.0 +ply==3.11 +polygraphy==0.49.11 +pooch==1.8.1 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.10.0 +prometheus_client==0.20.0 +prompt-toolkit==3.0.43 +protobuf==3.20.3 +pssh==2.3.4 +psutil==5.9.6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +py-cpuinfo==9.0.0 +py==1.11.0 +pyOpenSSL==24.1.0 +pyarrow==18.0.0 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pybind11==2.12.0 +pybind11_global==2.12.0 +pycocotools==2.0+nv0.8.0 +pycparser==2.22 +pydantic==2.11.7 +pydantic==2.9.1 +pydantic_core==2.23.3 +pydantic_core==2.33.2 +pylibcugraph==24.4.0 +pylibcugraphops==24.4.0 +pylibraft==24.4.0 +pylibwholegraph==24.4.0 +pynvjitlink==0.2.3 +pynvml==11.4.1 +pyparsing==3.1.2 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==14.0 +pytest-shard==0.1.2 +pytest-xdist==3.6.1 +pytest==8.1.1 +python-dateutil==2.8.2 +python-hostlist==1.23.0 +python-json-logger==2.0.7 +pytorch-quantization==2.1.2 +pytorch-triton==3.0.0+989adb9a2 +pytz==2023.3.post1 +pyzmq==25.1.2 +raft-dask==24.4.0 +rapids-dask-dependency==24.4.0a0 +referencing==0.34.0 +regex==2023.12.25 +remote-pdb==2.1.0 +requests-oauthlib==2.0.0 +requests==2.32.3 +retry==0.9.2 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rich==13.7.1 +rmm==24.4.0 +rouge-score==0.1.2 +rpds-py==0.18.0 +rsa==4.7.2 +s3fs==2024.2.0 +s3transfer==0.10.1 +sacrebleu==2.4.3 +safetensors==0.4.2 +safetensors==0.5.3 +scikit-learn==1.5.2 +scipy==1.14.1 +seaborn==0.13.2 +sentencepiece==0.2.0 +sentry-sdk==1.44.1 +setproctitle==1.3.3 +setuptools==69.2.0 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +snowflake-connector-python==3.7.1 +snowflake-snowpark-python==1.14.0 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.4 +sphinx_glpi_theme==0.6 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +supervisor==4.2.1 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.12.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==10.1.0 +terminado==0.18.1 +texttable==1.7.0 +thinc==8.2.3 +threadpoolctl==3.5.0 +thriftpy2==0.5.0 +tiktoken==0.6.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +tokenizers==0.21.2 +tomli==2.0.1 +tomlkit==0.12.4 +toolz==0.12.1 +torch-tensorrt==2.4.0a0 +torch==2.4.0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.6 +traitlets==5.14.2 +transformers==4.44.2 +transformers==4.53.0 +treelite==4.1.2 +triton==3.0.0 +typepy==1.3.2 +typer==0.9.4 +types-dataclasses==0.6.6 +types-python-dateutil==2.9.0.20240316 +typing-inspection==0.4.1 +typing_extensions==4.14.0 +typing_extensions==4.8.0 +tzdata==2023.3 +ucx-py==0.37.0 +uri-template==1.3.0 +urllib3==1.26.18 +wandb==0.16.6 +wasabi==1.1.3 +wcwidth==0.2.13 +weasel==0.3.4 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.7.0 +wheel==0.43.0 +widgetsnbextension==4.0.10 +word2number==1.1 +wrapt==1.16.0 +xdoctest==1.0.2 +xformers==0.0.27.post2 +xgboost==2.0.3 +xmltodict==0.14.2 +xxhash==3.4.1 +yarl==1.9.2 +yq==3.4.3 +zarr==2.18.2 +zict==3.0.0 +zipp==3.19.0 +zstandard==0.23.0 +zstd==1.5.0.2 \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-metadata.json b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-metadata.json new file mode 100644 index 00000000..aaa33f44 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-metadata.json @@ -0,0 +1,1031 @@ +{ + "os": "Linux-6.1.141-155.222.amzn2023.x86_64-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2025-10-29T16:02:01.974392", + "startedAt": "2025-10-29T16:02:01.384750", + "docker": null, + "cuda": null, + "args": [ + "--local_rank=0" + ], + "state": "running", + "program": "/scratch/ArcticTraining/projects/arctic_embed/examples/finetune_models/finetune_arctic2_synthetic.py", + "codePathLocal": "finetune_arctic2_synthetic.py", + "host": "gpu-notebook-qzeng-worker-0", + "username": "yak", + "executable": "/usr/bin/python", + "cpu_count": 96, + "cpu_count_logical": 192, + "cpu_freq": { + "current": 3033.9742864583336, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 3140.931, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3024.909, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3063.705, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3038.674, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3017.496, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3016.934, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3059.367, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3009.572, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3044.337, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2966.977, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2976.188, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3036.521, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3215.739, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3036.013, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3126.776, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3016.384, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2942.948, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2978.299, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3047.358, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2983.783, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3073.901, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3005.631, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3163.888, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2963.366, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.971, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3001.285, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3163.399, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3009.776, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3178.633, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3090.377, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3025.77, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3116.164, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3005.463, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3018.63, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3024.534, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3026.49, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3028.849, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3007.676, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3010.724, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.101, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.003, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2959.941, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3048.744, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2983.023, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3074.733, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2998.847, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3092.275, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3089.622, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3084.057, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3083.165, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3030.263, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3140.847, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2990.94, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.012, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3081.312, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3141.847, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3036.716, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3119.285, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3080.108, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3074.141, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3017.709, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3066.666, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3113.337, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3064.318, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.908, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2982.716, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3150.229, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3077.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3024.739, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3036.233, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3083.704, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2947.26, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3044.845, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3038.445, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3065.711, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3211.243, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3050.779, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3137.534, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3082.095, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3061.672, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3038.739, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3040.756, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3019.545, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3087.892, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3110.419, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2985.584, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3041.113, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3033.19, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3045.134, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.416, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.996, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3081.714, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.691, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3175.921, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3013.698, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2998.678, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3080.393, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3072.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2981.487, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2989.607, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3060.869, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3008.978, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3065.186, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2981.93, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3060.367, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.003, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3000.129, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.244, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.919, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3044.878, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3037.764, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2974.971, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3004.576, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2947.122, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2992.005, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3050.306, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3155.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.087, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.996, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3195.524, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3043.499, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3058.125, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3043.406, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3130.577, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3155.634, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3095.665, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3191.744, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3108.106, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3047.262, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3063.274, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2952.19, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2963.844, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3040.423, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3066.288, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2979.606, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3188.987, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3018.685, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3007.001, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3032.688, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2988.534, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3033.4, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3000.591, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3016.46, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3052.198, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3033.914, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3078.175, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3032.443, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3006.107, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3200.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3026.528, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3053.401, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.796, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3011.911, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3018.075, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3120.147, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3068.254, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3015.344, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3075.474, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3077.002, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3057.957, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3067.29, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3035.467, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3022.531, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3059.564, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3090.794, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3019.711, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3092.362, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3082.184, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.923, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3038.739, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3199.868, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3103.793, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3128.552, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3013.108, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3103.693, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3086.849, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3089.366, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3020.71, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3032.174, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3029.396, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3048.907, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3001.73, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3025.518, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.0, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2999.358, + "min": 0.0, + "max": 0.0 + }, + { + "current": 3043.106, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 511.9257621765137, + "used": 168.04550170898438 + } + }, + "gpu": "NVIDIA H200", + "gpu_count": 8, + "gpu_devices": [ + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + }, + { + "name": "NVIDIA H200", + "memory_total": 150754820096 + } + ], + "memory": { + "total": 1999.9604759216309 + } +} diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-summary.json b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-summary.json new file mode 100644 index 00000000..451c31b7 --- /dev/null +++ b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/examples_query": 2858240, "train/examples_doc": 40070336, "train/batch_size_query": 128, "train/batch_size_doc": 1400, "train/loss_no_truncate": 0.017819594591856003, "_timestamp": 1761771486.9645138, "_runtime": 17765.57301568985, "_step": 22332, "train/gradient_norm": 1.0675112009048462, "epoch": 0, "lr": 1.184336942758324e-09, "loss": 0.017819594591856003, "iter_time": 0.4476111602783203, "iter_tflops": 46.091552983879566, "seqlen": 8192.0, "step_time": 0.40605093002319337, "step_tflops": 50.80912758644728, "_wandb": {"runtime": 17769}} \ No newline at end of file diff --git a/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/run-xaj5tmfp.wandb b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/run-xaj5tmfp.wandb new file mode 100644 index 00000000..3028870f Binary files /dev/null and b/projects/arctic_embed/examples/finetune_models/wandb/run-20251029_160201-xaj5tmfp/run-xaj5tmfp.wandb differ diff --git a/projects/arctic_embed/notebooks/.DS_Store b/projects/arctic_embed/notebooks/.DS_Store new file mode 100644 index 00000000..9b40b14b Binary files /dev/null and b/projects/arctic_embed/notebooks/.DS_Store differ diff --git a/projects/arctic_embed/notebooks/batch_data/documents.parquet b/projects/arctic_embed/notebooks/batch_data/documents.parquet new file mode 100644 index 00000000..13df905a Binary files /dev/null and b/projects/arctic_embed/notebooks/batch_data/documents.parquet differ diff --git a/projects/arctic_embed/notebooks/batch_data/queries.parquet b/projects/arctic_embed/notebooks/batch_data/queries.parquet new file mode 100644 index 00000000..97c66939 Binary files /dev/null and b/projects/arctic_embed/notebooks/batch_data/queries.parquet differ diff --git a/projects/arctic_embed/notebooks/batch_data/relations.parquet b/projects/arctic_embed/notebooks/batch_data/relations.parquet new file mode 100644 index 00000000..9a60686e Binary files /dev/null and b/projects/arctic_embed/notebooks/batch_data/relations.parquet differ diff --git a/projects/arctic_embed/notebooks/check_split_preserve_relations.ipynb b/projects/arctic_embed/notebooks/check_split_preserve_relations.ipynb new file mode 100644 index 00000000..2b8987c2 --- /dev/null +++ b/projects/arctic_embed/notebooks/check_split_preserve_relations.ipynb @@ -0,0 +1,1058 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inspect `split_batch_preserve_relations`\n", + "\n", + "Use this notebook to load a pre-tokenized batch directory and verify that ``split_batch_preserve_relations`` retains every labeled query–document relation when splitting large batches into smaller pieces." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to use\n", + "1. Update `PROJECT_ROOT` below if your checkout lives somewhere else.\n", + "2. Point `BATCH_DIR` at one of the numbered batch folders containing `queries.parquet`, `documents.parquet`, and `relations.parquet`.\n", + "3. Adjust `SPLIT_FACTOR` (and `PRESERVE_RELATIONS` if you want a baseline) then run the validation cells." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project root: /Users/qzeng/codes/ArcticTraining\n", + "Using src path: /Users/qzeng/codes/ArcticTraining/projects/arctic_embed/src\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import sys\n", + "\n", + "PROJECT_ROOT = Path('/Users/qzeng/codes/ArcticTraining').resolve()\n", + "ARCTIC_EMBED_ROOT = PROJECT_ROOT / 'projects' / 'arctic_embed'\n", + "SRC_PATH = ARCTIC_EMBED_ROOT / 'src'\n", + "\n", + "if not SRC_PATH.exists():\n", + " raise FileNotFoundError(f'Expected Arctic Embed sources at {SRC_PATH}')\n", + "\n", + "if str(SRC_PATH) not in sys.path:\n", + " sys.path.insert(0, str(SRC_PATH))\n", + "\n", + "print(f'Project root: {PROJECT_ROOT}')\n", + "print(f'Using src path: {SRC_PATH}')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "from typing import Dict, List, Set, Tuple\n", + "\n", + "import pandas as pd\n", + "import torch\n", + "\n", + "from arctic_embed.core.pretokenized_batch_loader import (\n", + " ContrastiveLearningBatch,\n", + " read_batch,\n", + " split_batch,\n", + " split_batch_preserve_relations,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def summarize_batch(batch: ContrastiveLearningBatch) -> Dict[str, int]:\n", + " relations = batch.relevance_labels.coalesce()\n", + " values = relations.values()\n", + " positives = int((values > 0).sum().item())\n", + " negatives = int((values < 0).sum().item())\n", + " return {\n", + " 'queries': batch.query_tokens.size(0),\n", + " 'documents': batch.document_tokens.size(0),\n", + " 'relations': relations._nnz(),\n", + " 'positives': positives,\n", + " 'negatives': negatives,\n", + " }\n", + "\n", + "def sparse_relations_to_dict(rel: torch.Tensor) -> Dict[int, Set[int]]:\n", + " rel = rel.coalesce()\n", + " indices = rel.indices()\n", + " mapping: Dict[int, Set[int]] = defaultdict(set)\n", + " for i in range(rel._nnz()):\n", + " mapping[int(indices[0, i])].add(int(indices[1, i]))\n", + " return mapping\n", + "\n", + "def build_token_lookup(tensor: torch.Tensor) -> Dict[Tuple[int, ...], List[int]]:\n", + " lookup: Dict[Tuple[int, ...], List[int]] = defaultdict(list)\n", + " for idx, row in enumerate(tensor.cpu().tolist()):\n", + " key = tuple(int(token) for token in row)\n", + " lookup[key].append(idx)\n", + " return lookup\n", + "\n", + "def map_rows_to_original(tensor: torch.Tensor, lookup: Dict[Tuple[int, ...], List[int]]) -> List[int]:\n", + " original_indices: List[int] = []\n", + " for row in tensor.cpu().tolist():\n", + " key = tuple(int(token) for token in row)\n", + " candidates = lookup.get(key)\n", + " if not candidates:\n", + " raise KeyError(\n", + " 'Could not map a row back to the original batch. '\n", + " 'Confirm that `BATCH_DIR` points to the same data used to construct the splits.'\n", + " )\n", + " original_indices.append(candidates[0])\n", + " return original_indices\n", + "\n", + "def analyze_splits(original: ContrastiveLearningBatch, splits: List[ContrastiveLearningBatch]):\n", + " query_lookup = build_token_lookup(original.query_tokens)\n", + " doc_lookup = build_token_lookup(original.document_tokens)\n", + " original_rel_map = sparse_relations_to_dict(original.relevance_labels)\n", + "\n", + " summary_records = []\n", + " per_query_records = []\n", + "\n", + " for split_idx, split in enumerate(splits):\n", + " local_to_original_query = map_rows_to_original(split.query_tokens, query_lookup)\n", + " local_to_original_doc = map_rows_to_original(split.document_tokens, doc_lookup)\n", + " split_rel_map = sparse_relations_to_dict(split.relevance_labels)\n", + "\n", + " missing_links = 0\n", + " extra_links = 0\n", + "\n", + " for local_q_idx, original_q_idx in enumerate(local_to_original_query):\n", + " expected_docs = original_rel_map.get(original_q_idx, set())\n", + " present_docs = {local_to_original_doc[d] for d in split_rel_map.get(local_q_idx, set())}\n", + " missing = sorted(expected_docs - present_docs)\n", + " extra = sorted(present_docs - expected_docs)\n", + " missing_links += len(missing)\n", + " extra_links += len(extra)\n", + "\n", + " per_query_records.append({\n", + " 'split': split_idx,\n", + " 'split_query_idx': local_q_idx,\n", + " 'original_query_idx': original_q_idx,\n", + " 'expected_docs': len(expected_docs),\n", + " 'present_docs': len(present_docs),\n", + " 'missing_doc_indices': missing,\n", + " 'extra_doc_indices': extra,\n", + " })\n", + "\n", + " rel = split.relevance_labels.coalesce()\n", + " summary_records.append({\n", + " 'split': split_idx,\n", + " 'num_queries': split.query_tokens.size(0),\n", + " 'num_documents': split.document_tokens.size(0),\n", + " 'nnz_relations': rel._nnz(),\n", + " 'missing_links': missing_links,\n", + " 'extra_links': extra_links,\n", + " })\n", + "\n", + " import pandas as pd\n", + " return pd.DataFrame(summary_records), pd.DataFrame(per_query_records)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# --- Configure the batch you want to inspect ---\n", + "BATCH_DIR = Path(ARCTIC_EMBED_ROOT / 'notebooks' / 'batch_data') # e.g. PROJECT_ROOT / 'data' / 'batch_000001'\n", + "SPLIT_FACTOR = 32\n", + "\n", + "if not BATCH_DIR.exists():\n", + " raise FileNotFoundError(f'Batch directory not found: {BATCH_DIR}')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original batch summary: {'queries': 512, 'documents': 5565, 'relations': 5632, 'positives': 512, 'negatives': 5120}\n" + ] + } + ], + "source": [ + "batch = read_batch(str(BATCH_DIR))\n", + "print('Original batch summary:', summarize_batch(batch))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 32 split batches using `split_batch`\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitnum_queriesnum_documentsnnz_relationsmissing_linksextra_links
001617317330
111617317060
221617316790
3316173164120
4416173161150
5516173159170
6616173158180
7716173156200
8816173153230
9916173152240
101016173156200
111116173153230
121216173152240
131316173151250
141416173148280
151516173155210
161616173153230
171716173151250
181816173150260
191916173149270
202016173147290
212116173144320
222216173142340
232316173142340
242416173139370
252516173141350
262616173144320
272716173143330
282816173141350
292916173148280
303016173147290
313116173146300
\n", + "
" + ], + "text/plain": [ + " split num_queries num_documents nnz_relations missing_links \\\n", + "0 0 16 173 173 3 \n", + "1 1 16 173 170 6 \n", + "2 2 16 173 167 9 \n", + "3 3 16 173 164 12 \n", + "4 4 16 173 161 15 \n", + "5 5 16 173 159 17 \n", + "6 6 16 173 158 18 \n", + "7 7 16 173 156 20 \n", + "8 8 16 173 153 23 \n", + "9 9 16 173 152 24 \n", + "10 10 16 173 156 20 \n", + "11 11 16 173 153 23 \n", + "12 12 16 173 152 24 \n", + "13 13 16 173 151 25 \n", + "14 14 16 173 148 28 \n", + "15 15 16 173 155 21 \n", + "16 16 16 173 153 23 \n", + "17 17 16 173 151 25 \n", + "18 18 16 173 150 26 \n", + "19 19 16 173 149 27 \n", + "20 20 16 173 147 29 \n", + "21 21 16 173 144 32 \n", + "22 22 16 173 142 34 \n", + "23 23 16 173 142 34 \n", + "24 24 16 173 139 37 \n", + "25 25 16 173 141 35 \n", + "26 26 16 173 144 32 \n", + "27 27 16 173 143 33 \n", + "28 28 16 173 141 35 \n", + "29 29 16 173 148 28 \n", + "30 30 16 173 147 29 \n", + "31 31 16 173 146 30 \n", + "\n", + " extra_links \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "5 0 \n", + "6 0 \n", + "7 0 \n", + "8 0 \n", + "9 0 \n", + "10 0 \n", + "11 0 \n", + "12 0 \n", + "13 0 \n", + "14 0 \n", + "15 0 \n", + "16 0 \n", + "17 0 \n", + "18 0 \n", + "19 0 \n", + "20 0 \n", + "21 0 \n", + "22 0 \n", + "23 0 \n", + "24 0 \n", + "25 0 \n", + "26 0 \n", + "27 0 \n", + "28 0 \n", + "29 0 \n", + "30 0 \n", + "31 0 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "split_batches = split_batch(batch, 32)\n", + "\n", + "print(f'Generated {len(split_batches)} split batches using `split_batch`')\n", + "summary_df, per_query_df = analyze_splits(batch, split_batches)\n", + "summary_df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "8fda51a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 32 split batches using `split_batch_preserve_relations`\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitnum_queriesnum_documentsnnz_relationsmissing_linksextra_links
001617617600
111617617600
221617617600
331617617600
441617617600
551617617600
661617617600
771617517600
881617617600
991617617600
10101617617600
11111617617600
12121617617600
13131617617600
14141617617600
15151617617600
16161617617600
17171617617600
18181617617600
19191617617600
20201617617600
21211617617600
22221617617600
23231617617600
24241617617600
25251617517600
26261617617600
27271617617600
28281617617600
29291617617600
30301617617600
31311617617600
\n", + "
" + ], + "text/plain": [ + " split num_queries num_documents nnz_relations missing_links \\\n", + "0 0 16 176 176 0 \n", + "1 1 16 176 176 0 \n", + "2 2 16 176 176 0 \n", + "3 3 16 176 176 0 \n", + "4 4 16 176 176 0 \n", + "5 5 16 176 176 0 \n", + "6 6 16 176 176 0 \n", + "7 7 16 175 176 0 \n", + "8 8 16 176 176 0 \n", + "9 9 16 176 176 0 \n", + "10 10 16 176 176 0 \n", + "11 11 16 176 176 0 \n", + "12 12 16 176 176 0 \n", + "13 13 16 176 176 0 \n", + "14 14 16 176 176 0 \n", + "15 15 16 176 176 0 \n", + "16 16 16 176 176 0 \n", + "17 17 16 176 176 0 \n", + "18 18 16 176 176 0 \n", + "19 19 16 176 176 0 \n", + "20 20 16 176 176 0 \n", + "21 21 16 176 176 0 \n", + "22 22 16 176 176 0 \n", + "23 23 16 176 176 0 \n", + "24 24 16 176 176 0 \n", + "25 25 16 175 176 0 \n", + "26 26 16 176 176 0 \n", + "27 27 16 176 176 0 \n", + "28 28 16 176 176 0 \n", + "29 29 16 176 176 0 \n", + "30 30 16 176 176 0 \n", + "31 31 16 176 176 0 \n", + "\n", + " extra_links \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "5 0 \n", + "6 0 \n", + "7 0 \n", + "8 0 \n", + "9 0 \n", + "10 0 \n", + "11 0 \n", + "12 0 \n", + "13 0 \n", + "14 0 \n", + "15 0 \n", + "16 0 \n", + "17 0 \n", + "18 0 \n", + "19 0 \n", + "20 0 \n", + "21 0 \n", + "22 0 \n", + "23 0 \n", + "24 0 \n", + "25 0 \n", + "26 0 \n", + "27 0 \n", + "28 0 \n", + "29 0 \n", + "30 0 \n", + "31 0 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preserve_split_batches = split_batch_preserve_relations(batch, SPLIT_FACTOR)\n", + "\n", + "print(f'Generated {len(split_batches)} split batches using `split_batch_preserve_relations`')\n", + "preserve_summary_df, per_query_df = analyze_splits(batch, preserve_split_batches)\n", + "preserve_summary_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/projects/arctic_embed/src/arctic_embed/.DS_Store b/projects/arctic_embed/src/arctic_embed/.DS_Store new file mode 100644 index 00000000..44eb9600 Binary files /dev/null and b/projects/arctic_embed/src/arctic_embed/.DS_Store differ diff --git a/projects/arctic_embed/src/arctic_embed/__init__.py b/projects/arctic_embed/src/arctic_embed/__init__.py index 3e86bce1..c2ffba06 100644 --- a/projects/arctic_embed/src/arctic_embed/__init__.py +++ b/projects/arctic_embed/src/arctic_embed/__init__.py @@ -12,3 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# Import checkpoint engines to register them +from . import checkpointing +from . import biencoder_s3_checkpoint diff --git a/projects/arctic_embed/src/arctic_embed/biencoder_model_factory.py b/projects/arctic_embed/src/arctic_embed/biencoder_model_factory.py index e5bff672..10aa11ad 100644 --- a/projects/arctic_embed/src/arctic_embed/biencoder_model_factory.py +++ b/projects/arctic_embed/src/arctic_embed/biencoder_model_factory.py @@ -49,17 +49,21 @@ class BiencoderModelFactory(ModelFactory): def create_config(self): arctic_training_model_config = self.config assert isinstance(arctic_training_model_config, BiencoderModelConfig) - return AutoConfig.from_pretrained(self.config.name_or_path, **arctic_training_model_config.kwargs) + return AutoConfig.from_pretrained( + self.config.name_or_path, **arctic_training_model_config.kwargs + ) def create_model(self, model_config: AutoConfig) -> Biencoder: arctic_training_model_config = self.config assert isinstance(arctic_training_model_config, BiencoderModelConfig) - trust_remote_code = arctic_training_model_config.kwargs.get("trust_remote_code", None) + trust_remote_code = arctic_training_model_config.kwargs.get( + "trust_remote_code", None + ) encoder = AutoModel.from_pretrained( self.config.name_or_path, config=model_config, attn_implementation=self.config.attn_implementation, - torch_dtype=self.config.dtype, + torch_dtype=self.config.dtype.value, trust_remote_code=trust_remote_code, ) return Biencoder(encoder, pooling=arctic_training_model_config.pooling) @@ -71,7 +75,34 @@ def post_create_model_callback(self, model: Biencoder): model.encoder = get_peft_model(model.encoder, peft_config) if not self.config.disable_activation_checkpoint: - model.encoder.gradient_checkpointing_enable() - model.encoder = HFModelFactory.make_model_gradient_checkpointing_compatible(model.encoder) + import torch + + def enable_gc_every_n_layers(model, n=2, use_reentrant=False): + try: + model.gradient_checkpointing_enable( + gradient_checkpointing_kwargs={"use_reentrant": use_reentrant} + ) + except TypeError: + model.gradient_checkpointing_enable() + + if hasattr(model, "layers"): + for i, layer in enumerate(model.layers): + if not hasattr(layer, "_gradient_checkpointing_func"): + layer._gradient_checkpointing_func = ( + lambda f, *args, **kwargs: torch.utils.checkpoint.checkpoint( + f, *args, use_reentrant=use_reentrant, **kwargs + ) + ) + layer.gradient_checkpointing = (i % n == 0) + else: + raise ValueError(f"Model {model} has no `layers` attribute") + + if hasattr(model, "config") and hasattr(model.config, "use_cache"): + model.config.use_cache = False + + model.encoder = HFModelFactory.make_model_gradient_checkpointing_compatible( + model.encoder + ) + enable_gc_every_n_layers(model.encoder, n=1, use_reentrant=False) return model diff --git a/projects/arctic_embed/src/arctic_embed/biencoder_s3_checkpoint.py b/projects/arctic_embed/src/arctic_embed/biencoder_s3_checkpoint.py new file mode 100644 index 00000000..cb62a39c --- /dev/null +++ b/projects/arctic_embed/src/arctic_embed/biencoder_s3_checkpoint.py @@ -0,0 +1,615 @@ +# Copyright 2025 Snowflake Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent.futures +import json +import shutil +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Optional + +if TYPE_CHECKING: + from arctic_training.trainer.trainer import Trainer + +import boto3 +import deepspeed +import torch + +from arctic_training.checkpoint.hf_engine import HFCheckpointEngine +from arctic_training.config.checkpoint import CheckpointConfig +from arctic_training.logging import logger + + +class BiencoderS3CheckpointConfig(CheckpointConfig): + type: str = "biencoder_s3" + s3_path: str # S3 path like s3://bucket/path/to/checkpoints + local_cache_dir: Optional[str] = None # Local cache directory + async_upload: bool = True # Whether to upload to S3 asynchronously + max_local_checkpoints: int = 3 # Maximum number of checkpoints to keep locally + + +class BiencoderS3CheckpointEngine(HFCheckpointEngine): + name = "biencoder_s3" + config: BiencoderS3CheckpointConfig + + def __init__(self, trainer: "Trainer", config: BiencoderS3CheckpointConfig) -> None: + super().__init__(trainer, config) + self.s3_client = boto3.client("s3") + + # Initialize thread pool for async uploads if enabled + if getattr(config, "async_upload", True): + self.upload_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="s3-upload") + self.pending_uploads: list[tuple[int, concurrent.futures.Future]] = [] + + # Parse S3 path + if not config.s3_path.startswith("s3://"): + raise ValueError(f"S3 path must start with s3://, got: {config.s3_path}") + + path_parts = config.s3_path[5:].split("/", 1) + self.s3_bucket = path_parts[0] + self.s3_prefix = path_parts[1] if len(path_parts) > 1 else "" + + # Setup local cache directory + if config.local_cache_dir: + self.local_cache_dir = Path(config.local_cache_dir) + else: + self.local_cache_dir = Path("/tmp") / "arctic_embed_checkpoints_cache" + self.local_cache_dir.mkdir(parents=True, exist_ok=True) + + logger.info( + f"S3 checkpoint engine initialized: bucket={self.s3_bucket}, prefix={self.s3_prefix}, " + f"local_cache={self.local_cache_dir}, max_local_checkpoints={config.max_local_checkpoints}" + ) + + def _get_zero_stage(self) -> int: + return self.trainer.config.deepspeed.get("zero_optimization", {}).get("stage", 0) + + def _export_encoder_weights(self, encoder, export_dir: Path) -> None: + zero_stage = self._get_zero_stage() + state_dict = None + + if zero_stage >= 3: + params = list(encoder.parameters()) + with deepspeed.zero.GatheredParameters(params, modifier_rank=0, enabled=True): + if self.global_rank == 0: + state_dict = {k: v.detach().cpu() for k, v in encoder.state_dict().items()} + elif self.global_rank != 0: + return + + if self.global_rank != 0: + return + + encoder.save_pretrained(export_dir, safe_serialization=True, state_dict=state_dict) + + config_path = export_dir / "config.json" + if not config_path.exists() and hasattr(encoder, "config"): + encoder.config.to_json_file(config_path) + + self._export_tokenizer_assets(export_dir) + + def _export_tokenizer_assets(self, export_dir: Path) -> None: + try: + from transformers import AutoTokenizer + except ImportError: + logger.warning("transformers not available, skipping tokenizer export") + return + + tokenizer_source = None + + tokenizer_cfg = getattr(self.trainer.config, "tokenizer", None) + if tokenizer_cfg is not None: + tokenizer_source = getattr(tokenizer_cfg, "name_or_path", None) + + if tokenizer_source is None: + tokenizer_source = getattr(self.trainer.config.model, "tokenizer_name", None) + + if tokenizer_source is None: + tokenizer_source = self.trainer.config.model.name_or_path + + try: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_source, trust_remote_code=True) + except Exception as err: + logger.warning( + "Failed to export tokenizer from %s: %s", tokenizer_source, err + ) + return + + try: + tokenizer.save_pretrained(export_dir) + except Exception as err: + logger.warning("Failed to save tokenizer to %s: %s", export_dir, err) + + @property + def biencoder_config_file(self) -> Path: + return self.checkpoint_dir / "biencoder_config.json" + + @property + def s3_checkpoint_prefix(self) -> str: + """S3 prefix for current checkpoint""" + return f"{self.s3_prefix}/global_step_{self.trainer.global_step}" + + def _upload_to_s3(self, local_path: Path, s3_key: str) -> None: + """Upload a file to S3""" + try: + self.s3_client.upload_file(str(local_path), self.s3_bucket, s3_key) + logger.info(f"Uploaded {local_path} to s3://{self.s3_bucket}/{s3_key}") + except Exception as e: + logger.error(f"Failed to upload {local_path} to S3: {e}") + raise + + def _download_from_s3(self, s3_key: str, local_path: Path) -> None: + """Download a file from S3""" + try: + local_path.parent.mkdir(parents=True, exist_ok=True) + self.s3_client.download_file(self.s3_bucket, s3_key, str(local_path)) + logger.info(f"Downloaded s3://{self.s3_bucket}/{s3_key} to {local_path}") + except Exception as e: + logger.error(f"Failed to download from S3: {e}") + raise + + def _list_s3_checkpoints(self) -> list[int]: + """List all available checkpoints in S3 and return sorted global steps""" + try: + paginator = self.s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate(Bucket=self.s3_bucket, Prefix=self.s3_prefix + "/") + + global_steps = set() + for page in pages: + if "Contents" not in page: + continue + for obj in page["Contents"]: + key = obj["Key"] + # Extract global_step from path like prefix/global_step_1000/... + parts = key.split("/") + for part in parts: + if part.startswith("global_step_"): + try: + step = int(part.split("_")[2]) + global_steps.add(step) + except (IndexError, ValueError): + continue + + return sorted(global_steps) + except Exception as e: + logger.error(f"Failed to list S3 checkpoints: {e}") + return [] + + def _cleanup_local_cache(self) -> None: + """Remove old checkpoints from local cache to stay within limit""" + # List all checkpoint directories in cache + checkpoint_dirs = [] + for item in self.local_cache_dir.iterdir(): + if item.is_dir() and item.name.startswith("global_step_"): + try: + step = int(item.name.split("_")[2]) + checkpoint_dirs.append((step, item)) + except (IndexError, ValueError): + continue + + # Sort by step number (oldest first) + checkpoint_dirs.sort(key=lambda x: x[0]) + + # Remove oldest checkpoints if we exceed the limit + while len(checkpoint_dirs) >= self.config.max_local_checkpoints: + step, dir_to_remove = checkpoint_dirs.pop(0) + logger.info(f"Removing old checkpoint from cache: {dir_to_remove}") + shutil.rmtree(dir_to_remove, ignore_errors=True) + + def _cleanup_deepspeed_subdirs(self) -> None: + """Remove old DeepSpeed training-step directories that are not checkpoint_dir.""" + subdirs = [] + for item in self.checkpoint_dir.iterdir(): + if item.is_dir() and item.name.startswith("global_step_"): + try: + step = int(item.name.split("_")[2]) + subdirs.append((step, item)) + except (IndexError, ValueError): + continue + + subdirs.sort(key=lambda x: x[0]) + limit = max(self.config.max_local_checkpoints, 1) + while len(subdirs) > limit: + step, dir_to_remove = subdirs.pop(0) + if dir_to_remove == self.checkpoint_dir: + continue + logger.info(f"Removing old DeepSpeed subdir: {dir_to_remove}") + shutil.rmtree(dir_to_remove, ignore_errors=True) + + def save(self, model) -> None: + """Save model checkpoint locally then upload to S3 + + For multi-node training: + - All ranks save model weights locally (handled by parent class) + - Only rank 0 uploads to S3 + """ + # Clean up old local checkpoints before saving new one + self._cleanup_local_cache() + self._cleanup_deepspeed_subdirs() + + # The model is already a DeepSpeedEngine, use DeepSpeed's checkpoint saving + # which includes optimizer & scheduler states + model.save_checkpoint( + self.checkpoint_dir, + tag="checkpoint", # Use a fixed tag name + client_state={ + "train_batch_idx": self.trainer.train_batch_idx, + "epoch_idx": self.trainer.epoch_idx, + }, + ) + + # Only rank 0 saves additional metadata files + if self.global_rank == 0: + # Save biencoder configuration + biencoder_model = model.module if hasattr(model, "module") else model + pooling_attr = getattr(biencoder_model, "pooling", None) + if pooling_attr is not None: + biencoder_config = {"pooling": pooling_attr} + self.biencoder_config_file.write_text(json.dumps(biencoder_config, indent=2)) + + # Save the full trainer config for reproducibility + trainer_config_path = self.checkpoint_dir / "trainer_config.json" + trainer_config_path.write_text(self.trainer.config.model_dump_json(indent=2)) + + # Also export Hugging Face compatible weights for the encoder + encoder = getattr(biencoder_model, "encoder", None) + if encoder is not None: + hf_export_dir = self.checkpoint_dir / "hf_encoder" + hf_export_dir.mkdir(parents=True, exist_ok=True) + self._export_encoder_weights(encoder, hf_export_dir) + + logger.info("DeepSpeed checkpoint saved with optimizer, scheduler, and HF weights") + + # Synchronize all ranks before uploading + if self.trainer.world_size > 1: + torch.distributed.barrier() + + # Check if async upload is enabled + if hasattr(self, "upload_executor") and getattr(self.config, "async_upload", True): + # Start async upload + logger.info(f"Starting async upload for checkpoint at global_step={self.trainer.global_step}") + + # Submit upload task to thread pool + future = self.upload_executor.submit(self._do_s3_uploads, self.checkpoint_dir, self.trainer.global_step) + self.pending_uploads.append((self.trainer.global_step, future)) + + # Clean up completed uploads + self._cleanup_completed_uploads() + else: + # Synchronous upload (original behavior) + self._do_s3_uploads(self.checkpoint_dir, self.trainer.global_step) + + # Synchronize all ranks after upload completes + if self.trainer.world_size > 1: + torch.distributed.barrier() + + def _do_s3_uploads(self, checkpoint_dir: Path, global_step: int) -> None: + """Perform the actual S3 uploads - can be called sync or async""" + try: + # Upload files based on rank + for local_file in checkpoint_dir.rglob("*"): + if local_file.is_file(): + relative_path = local_file.relative_to(checkpoint_dir) + s3_key = f"{self.s3_prefix}/global_step_{global_step}/{relative_path}" + + filename = local_file.name + is_rank_specific = False + + # Check if this is a rank-specific file + patterns = [ + f"pp_rank_{self.global_rank}_", + f"pp_rank_{self.global_rank:02d}_", + f"pp_rank_{self.global_rank:03d}_", + f"_rank_{self.global_rank}_", + f"_rank_{self.global_rank:02d}_", + f"_rank_{self.global_rank:03d}_", + ] + + for pattern in patterns: + if pattern in filename: + is_rank_specific = True + break + + # Upload based on file type + if is_rank_specific: + logger.info(f"[Async] Rank {self.global_rank} uploading rank-specific file: {filename}") + self._upload_to_s3(local_file, s3_key) + elif "mp_rank_" in filename and self.global_rank == 0: + logger.info(f"[Async] Rank 0 uploading model states file: {filename}") + self._upload_to_s3(local_file, s3_key) + elif self.global_rank == 0: + logger.info(f"[Async] Rank 0 uploading general file: {filename}") + self._upload_to_s3(local_file, s3_key) + + # Upload latest marker for rank 0 + if self.global_rank == 0: + latest_marker = self.local_cache_dir / "latest" + latest_marker.write_text(str(global_step)) + self._upload_to_s3(latest_marker, f"{self.s3_prefix}/latest") + + logger.info(f"[Async] Upload completed for checkpoint at global_step={global_step}") + + except Exception as e: + logger.error(f"[Async] Upload failed for checkpoint: {e}") + + def _cleanup_completed_uploads(self) -> None: + """Remove completed uploads from pending list""" + if not hasattr(self, "pending_uploads"): + return + + pending = [] + for step, future in self.pending_uploads: + if future.done(): + try: + future.result() # This will raise any exception that occurred + logger.info(f"Async upload completed for global_step={step}") + except Exception as e: + logger.error(f"Async upload failed for global_step={step}: {e}") + else: + pending.append((step, future)) + + self.pending_uploads = pending + + if len(pending) > 0: + logger.info(f"Still have {len(pending)} uploads in progress") + + def wait_for_all_uploads(self) -> None: + """Wait for all pending uploads to complete (call at end of training)""" + if not hasattr(self, "pending_uploads"): + return + + logger.info("Waiting for all async uploads to complete...") + for step, future in self.pending_uploads: + try: + future.result() + logger.info(f"Upload completed for global_step={step}") + except Exception as e: + logger.error(f"Upload failed for global_step={step}: {e}") + + self.pending_uploads = [] + logger.info("All async uploads completed") + + def load(self, model) -> None: + """Load checkpoint from S3 for resuming training + + For multi-node training without shared storage: + - Each node's local_rank=0 downloads from S3 + - Checkpoint path is broadcast within each node + - All ranks on a node load from the same local checkpoint + """ + logger.info(f"Rank {self.global_rank} entering load method, auto_resume={self.config.auto_resume}") + + if not self.config.auto_resume: + logger.info(f"Rank {self.global_rank}: Auto-resume disabled, skipping checkpoint loading") + # Even if auto_resume is disabled, we need to ensure all ranks are synchronized + if self.trainer.world_size > 1: + torch.distributed.barrier() + return + + # First, only global rank 0 checks if there are any checkpoints + global_steps = [] + if self.global_rank == 0: + global_steps = self._list_s3_checkpoints() + if not global_steps: + logger.info("No checkpoints found in S3, starting from scratch") + + # Broadcast whether checkpoints exist to all ranks + has_checkpoints = len(global_steps) > 0 if self.global_rank == 0 else False + if self.trainer.world_size > 1: + has_checkpoints_list = [has_checkpoints] + torch.distributed.broadcast_object_list(has_checkpoints_list, src=0) + has_checkpoints = has_checkpoints_list[0] + + # If no checkpoints, all ranks return + if not has_checkpoints: + # Ensure all ranks are synchronized before returning + if self.trainer.world_size > 1: + torch.distributed.barrier() + return + + # Now proceed with normal checkpoint loading logic + local_checkpoint_dir = None + local_rank = self.trainer.config.local_rank + + # Get node information for multi-node setup + import socket + + hostname = socket.gethostname() + all_hostnames = [None] * self.trainer.world_size + torch.distributed.all_gather_object(all_hostnames, hostname) + node_ranks = [i for i, h in enumerate(all_hostnames) if h == hostname] + + # Get the latest checkpoint step - broadcast from global rank 0 to all ranks + latest_step = 0 + if self.global_rank == 0: + latest_step = global_steps[-1] + logger.info(f"Found {len(global_steps)} checkpoints, loading latest: global_step_{latest_step}") + + # Broadcast latest_step to ALL ranks (not just local_rank 0) + latest_step_list = [latest_step] + torch.distributed.broadcast_object_list(latest_step_list, src=0) + latest_step = latest_step_list[0] + + # Only local_rank 0 on each node downloads checkpoint + # This is necessary when nodes don't share storage + if local_rank == 0: + # Check if checkpoint already exists in local cache + local_checkpoint_dir = self.local_cache_dir / f"global_step_{latest_step}" + + if not local_checkpoint_dir.exists(): + # Download checkpoint files + checkpoint_s3_prefix = f"{self.s3_prefix}/global_step_{latest_step}" + local_checkpoint_dir.mkdir(parents=True, exist_ok=True) + + # Use the node_ranks we already computed + logger.info(f"Node {hostname} contains ranks: {node_ranks}") + + # List and download files for this checkpoint + paginator = self.s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate(Bucket=self.s3_bucket, Prefix=checkpoint_s3_prefix) + + for page in pages: + if "Contents" not in page: + continue + for obj in page["Contents"]: + s3_key = obj["Key"] + # Preserve the full directory structure including subdirectories + if s3_key.startswith(checkpoint_s3_prefix + "/"): + relative_path = s3_key[len(checkpoint_s3_prefix) + 1 :] # Remove prefix + local_path = local_checkpoint_dir / relative_path + filename = local_path.name + + # Determine if this node needs this file + should_download = False + + # Check if it's a rank-specific optimizer state file + if "pp_rank_" in filename: + # Extract rank number from filename + for rank in node_ranks: + if ( + f"pp_rank_{rank}_" in filename + or f"pp_rank_{rank:02d}_" in filename + or f"pp_rank_{rank:03d}_" in filename + ): + should_download = True + break + else: + # Non-rank-specific files (model states, config, etc.) are needed by all nodes + should_download = True + + if should_download: + logger.info(f"Downloading {filename} for node {hostname}") + self._download_from_s3(s3_key, local_path) + + logger.info(f"Downloaded checkpoint to {local_checkpoint_dir}") + else: + logger.info(f"Using cached checkpoint from {local_checkpoint_dir}") + + # Broadcast checkpoint directory path within each node + if self.trainer.world_size > 1: + # Instead of creating a new group, use all_gather to share checkpoint paths + # Each rank will put its checkpoint dir (or None) in the list + checkpoint_paths = [None] * self.trainer.world_size + my_checkpoint_str = str(local_checkpoint_dir) if local_checkpoint_dir else None + torch.distributed.all_gather_object(checkpoint_paths, my_checkpoint_str) + + # Find the checkpoint path from the same node's local_rank 0 + # node_ranks contains the global ranks on this node + local_rank_0_global_rank = node_ranks[0] # First rank in node_ranks is local_rank 0 + checkpoint_dir_str = checkpoint_paths[local_rank_0_global_rank] + + if checkpoint_dir_str: + local_checkpoint_dir = Path(checkpoint_dir_str) + logger.info(f"Rank {self.global_rank} received checkpoint path: {local_checkpoint_dir}") + + # Synchronize all ranks to ensure local_rank 0 has finished downloading + torch.distributed.barrier() + else: + # This should not happen if checkpoint exists + logger.error( + f"Rank {self.global_rank}: checkpoint directory not received from node's local_rank 0 (global rank" + f" {local_rank_0_global_rank})" + ) + raise RuntimeError( + f"Rank {self.global_rank}: Failed to receive checkpoint directory from local_rank 0" + ) + elif local_checkpoint_dir is None and local_rank == 0: + # Single GPU case where local_rank 0 didn't find/download checkpoint + logger.error("Checkpoint directory is None for single GPU training") + # In single GPU case, no need for barrier + return + + # All ranks load from the same local checkpoint using DeepSpeed + # This includes model weights, optimizer state, and scheduler state + + # At this point, local_checkpoint_dir should not be None + if local_checkpoint_dir is None: + logger.error(f"Rank {self.global_rank}: local_checkpoint_dir is None, cannot load checkpoint") + raise RuntimeError("Checkpoint directory is None") + + # Debug: Log the directory structure + logger.info(f"Loading checkpoint from directory: {local_checkpoint_dir}") + if local_checkpoint_dir.exists(): + logger.info(f"Directory contents: {list(local_checkpoint_dir.iterdir())}") + # Check for the DeepSpeed checkpoint subdirectory + checkpoint_subdir = local_checkpoint_dir / "checkpoint" + if checkpoint_subdir.exists(): + logger.info(f"DeepSpeed checkpoint subdirectory found: {checkpoint_subdir}") + logger.info(f"Checkpoint subdir contents: {list(checkpoint_subdir.iterdir())}") + else: + logger.warning(f"DeepSpeed checkpoint subdirectory not found at {checkpoint_subdir}") + + # Synchronize before calling DeepSpeed load_checkpoint + # This ensures all ranks have the checkpoint files visible before loading + if self.trainer.world_size > 1: + logger.info(f"Rank {self.global_rank} synchronizing before DeepSpeed load_checkpoint...") + torch.distributed.barrier() + + # DeepSpeed expects: load_checkpoint(checkpoint_dir, tag) + # where the actual files are in checkpoint_dir/tag/ + logger.info(f"Rank {self.global_rank} calling DeepSpeed load_checkpoint...") + _, client_states = model.load_checkpoint( + local_checkpoint_dir, + tag="checkpoint", # Use the same fixed tag name as in save + ) + logger.info(f"Rank {self.global_rank} completed DeepSpeed load_checkpoint") + + # Restore training state from client_states + self.trainer.train_batch_idx = client_states.get("train_batch_idx", 0) + self.trainer.epoch_idx = client_states.get("epoch_idx", 0) + self.trainer.global_step = model.global_steps # DeepSpeed tracks this + + logger.info( + f"Loaded DeepSpeed checkpoint from {local_checkpoint_dir}: " + f"global_step={self.trainer.global_step}, " + f"epoch_idx={self.trainer.epoch_idx}, " + f"train_batch_idx={self.trainer.train_batch_idx}" + ) + + # Debug: Check if metrics will be logged + if hasattr(self.trainer, "config") and hasattr(self.trainer.config, "train_log_iter_interval"): + log_interval = self.trainer.config.train_log_iter_interval + next_log_batch = ( + ((self.trainer.train_batch_idx // log_interval) + 1) * log_interval if log_interval > 0 else 0 + ) + logger.info( + f"Metrics logging: interval={log_interval}, current_batch={self.trainer.train_batch_idx}, " + f"next_log_batch={next_log_batch}" + ) + + # Load biencoder config + # local_checkpoint_dir is guaranteed to be not None here due to the check above + biencoder_config_path = local_checkpoint_dir / "biencoder_config.json" + if biencoder_config_path.exists(): + biencoder_config = json.loads(biencoder_config_path.read_text()) + # Access the underlying Biencoder model from DeepSpeedEngine + biencoder_model = model.module if hasattr(model, "module") else model + biencoder_model.pooling = biencoder_config.get("pooling", biencoder_model.pooling) + + # Recreate dataloader to skip batches if needed + # Only if we're in the same epoch (for your case with 1 epoch) + if self.trainer.epoch_idx == 0 and self.trainer.train_batch_idx > 0: + # Call the trainer's method to recreate dataloader + if hasattr(self.trainer, "_recreate_dataloader_for_resume"): + self.trainer._recreate_dataloader_for_resume(self.trainer.train_batch_idx) + + logger.info(f"Rank {self.global_rank}: Successfully resumed training with scheduler state intact") + + # IMPORTANT: Synchronize all ranks after checkpoint loading + # This ensures all nodes are ready before starting training + if self.trainer.world_size > 1: + logger.info(f"Rank {self.global_rank} waiting for all ranks to complete checkpoint loading...") + torch.distributed.barrier() + logger.info(f"Rank {self.global_rank} synchronized, ready to start training") diff --git a/projects/arctic_embed/src/arctic_embed/contrastive_dataloader.py b/projects/arctic_embed/src/arctic_embed/contrastive_dataloader.py index ebb58784..d4c59142 100644 --- a/projects/arctic_embed/src/arctic_embed/contrastive_dataloader.py +++ b/projects/arctic_embed/src/arctic_embed/contrastive_dataloader.py @@ -39,6 +39,8 @@ class ContrastivePretokenizedDataConfig(DataConfig): filesystem: FilesystemOption root_directory: str split_factor: int = 1 + pad_value: int = 0 + left_pad: bool = False sources: List[DataSourceConfig] = [] max_seq_length_query: Optional[int] = None max_seq_length_doc: Optional[int] = None @@ -46,13 +48,16 @@ class ContrastivePretokenizedDataConfig(DataConfig): eval_split_factor: int = 1 eval_max_seq_length_query: Optional[int] = None eval_max_seq_length_doc: Optional[int] = None + preserve_relations_on_split: bool = False class ContrastivePretokenizedDataFactory(DataFactory): name: str = "contrastive_pretokenized" config: ContrastivePretokenizedDataConfig - def __call__(self) -> Tuple[DataLoader, Optional[Dict[str, DataLoader]]]: + def __call__(self, **kwargs) -> Tuple[DataLoader, Optional[Dict[str, DataLoader]]]: + # Support optional start_batch_idx for resuming + start_batch_idx = kwargs.get('start_batch_idx', 0) fs = self.get_filesystem(self.config.filesystem) # Create the train loader. @@ -62,9 +67,13 @@ def __call__(self) -> Tuple[DataLoader, Optional[Dict[str, DataLoader]]]: split_factor=self.config.split_factor, shard_id=self.global_rank, world_size=self.world_size, + pad_value=self.config.pad_value, + left_pad=self.config.left_pad, max_seq_len_query=self.config.max_seq_length_query, max_seq_len_doc=self.config.max_seq_length_doc, device=self.trainer.device, + start_batch_idx=start_batch_idx, + preserve_relations_on_split=self.config.preserve_relations_on_split, ) train_dl = DataLoader(train_ds, batch_size=None) @@ -79,9 +88,12 @@ def __call__(self) -> Tuple[DataLoader, Optional[Dict[str, DataLoader]]]: split_factor=self.config.eval_split_factor, shard_id=self.global_rank, world_size=self.world_size, + pad_value=self.config.pad_value, + left_pad=self.config.left_pad, max_seq_len_query=self.config.eval_max_seq_length_query, max_seq_len_doc=self.config.eval_max_seq_length_doc, device=self.trainer.device, + preserve_relations_on_split=self.config.preserve_relations_on_split, ) eval_dl = DataLoader(eval_ds, batch_size=None) if eval_name in eval_dl_map: diff --git a/projects/arctic_embed/src/arctic_embed/core/biencoder_model.py b/projects/arctic_embed/src/arctic_embed/core/biencoder_model.py index 6ef38f81..10aa5135 100644 --- a/projects/arctic_embed/src/arctic_embed/core/biencoder_model.py +++ b/projects/arctic_embed/src/arctic_embed/core/biencoder_model.py @@ -41,6 +41,7 @@ def __init__(self, encoder: PreTrainedModel, pooling: PoolingOption = "first_tok super().__init__() self.encoder = encoder self.pooling = pooling + self.config = encoder.config def encode(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor: out = self.encoder(input_ids=input_ids, attention_mask=attention_mask) @@ -125,10 +126,25 @@ def first_token_pool(out: Tensor, attention_mask: Tensor) -> Tensor: def last_token_pool(out: Tensor, attention_mask: Tensor) -> Tensor: - """Selecting the last non-padding token representation for each sequence.""" - assert out.ndim == 3 - assert attention_mask.ndim == 2 - batch_size = out.shape[0] - row = torch.arange(batch_size, device=out.device) - col = attention_mask.sum(dim=1) - 1 # position of the last non-padding token - return out[row, col, ...] + """ + Select the last non-padding token representation for each sequence. + Works for left or right padding. + out: (B, S, H) + attention_mask: (B, S) with 1/True for real tokens, 0/False for pad + returns: (B, H) + """ + B, S, H = out.shape + # Ensure same device + boolean mask + mask = attention_mask.to(device=out.device, dtype=torch.bool) + mask_int = mask.to(dtype=torch.int64) + + # Index of last 1 in each row: S-1 - argmax(flip(mask)) + offset_from_right = torch.flip(mask_int, dims=[1]).argmax(dim=1) # 0..S-1 + col = S - 1 - offset_from_right + + # Handle all-zero rows (no valid tokens): default to index 0 + has_token = mask.any(dim=1) + col = torch.where(has_token, col, torch.zeros_like(col)) + + row = torch.arange(B, device=out.device) + return out[row, col, ...] # (B, H) diff --git a/projects/arctic_embed/src/arctic_embed/core/pretokenized_batch_loader.py b/projects/arctic_embed/src/arctic_embed/core/pretokenized_batch_loader.py index c23b67d8..4fa713cb 100644 --- a/projects/arctic_embed/src/arctic_embed/core/pretokenized_batch_loader.py +++ b/projects/arctic_embed/src/arctic_embed/core/pretokenized_batch_loader.py @@ -86,6 +86,8 @@ def __init__( max_seq_len_query: Optional[int] = None, max_seq_len_doc: Optional[int] = None, device: Optional[torch.device] = None, + start_batch_idx: int = 0, # Add support for resuming from specific batch + preserve_relations_on_split: bool = False, ) -> None: super().__init__() self.filesystem = filesystem @@ -98,11 +100,17 @@ def __init__( self.max_seq_len_query = max_seq_len_query self.max_seq_len_doc = max_seq_len_doc self.device = device + self.start_batch_idx = start_batch_idx + self.preserve_relations_on_split = preserve_relations_on_split # Look up the batch directories. batch_paths = sorted(filesystem.ls(root_directory)) assert len(batch_paths) > 0, f"No batches subdirectories in {root_directory=}" self.batch_paths = batch_paths + logger.info( + "[dataset.init]" + f" shard_id={self.shard_id} world_size={self.world_size} split_factor={self.split_factor} root={self.root_directory}" + ) def __len__(self) -> int: return len(self.split_factor * self.batch_paths) @@ -140,11 +148,51 @@ def _read_batches_for_path( max_seq_len_query=self.max_seq_len_query, max_seq_len_doc=self.max_seq_len_doc, ) - split_batches = split_batch(un_split_batch, self.split_factor) - split_sharded_batches = [shard_batch(b, self.shard_id, self.world_size) for b in split_batches] + if self.preserve_relations_on_split: + split_batches = split_batch_preserve_relations(un_split_batch, self.split_factor) + else: + split_batches = split_batch(un_split_batch, self.split_factor) + + # Skip split batches that would result in zero-length shards for the current + # world size. This happens when the (already split) batch has fewer samples + # than the number of ranks, leading to empty tensors downstream. + filtered_splits: List[ContrastiveLearningBatch] = [] + for split_idx, split_batch_item in enumerate(split_batches): + num_queries = split_batch_item.query_tokens.size(0) + num_documents = split_batch_item.document_tokens.size(0) + if num_queries < self.world_size or num_documents < self.world_size: + logger.warning( + "[dataset.batch] Skipping split %d from %s because it has " + "%d queries and %d documents, which is fewer than world_size=%d", + split_idx, + batch_directory, + num_queries, + num_documents, + self.world_size, + ) + continue + filtered_splits.append(split_batch_item) + + if not filtered_splits: + logger.warning( + "[dataset.batch] Skipping entire batch directory %s because" + " all %d splits were smaller than world_size=%d", + batch_directory, + len(split_batches), + self.world_size, + ) + return [] + + split_sharded_batches = [shard_batch(b, self.shard_id, self.world_size) for b in filtered_splits] # Move only the first split batch to the device, to avoid hogging device memory. - if self.device is not None: + if self.device is not None and len(split_sharded_batches) > 0: split_sharded_batches[0] = split_sharded_batches[0].to_device(self.device, non_blocking=True) + b0 = split_sharded_batches[0] + logger.info( + f"[dataset.batch] dir={batch_directory} " + f"q_tokens={b0.query_tokens.size(0)} d_tokens={b0.document_tokens.size(0)} " + f"shard_id={self.shard_id}/{self.world_size} split_factor={self.split_factor}" + ) return split_sharded_batches def __iter__(self) -> Iterator[ContrastiveLearningBatch]: @@ -166,8 +214,32 @@ def __iter__(self) -> Iterator[ContrastiveLearningBatch]: tokenization_metadata = "" logger.info(f"Iterating dataset: {self.root_directory} | {tokenization_metadata}") - # Initialize iteration over batch paths. - path_iter = iter(self.batch_paths) + # Track the current step index + step_idx = 0 + + # Calculate which batches to skip based on start_batch_idx (which is actually start_step_idx) + batches_to_skip = 0 + remaining_steps_to_skip = 0 + + if self.start_batch_idx > 0: + # start_batch_idx is actually the step index we want to start from + batches_to_skip = self.start_batch_idx // self.split_factor + remaining_steps_to_skip = self.start_batch_idx % self.split_factor + + logger.info(f"Will skip first {self.start_batch_idx} steps to resume from step {self.start_batch_idx}") + logger.info( + f"Skipping {batches_to_skip} complete batch files (each with {self.split_factor} steps), " + f"and {remaining_steps_to_skip} additional steps in the next batch" + ) + + # Skip the appropriate number of batch paths + batch_paths_to_use = self.batch_paths[batches_to_skip:] + # Update step counter to reflect skipped steps + step_idx = batches_to_skip * self.split_factor + else: + batch_paths_to_use = self.batch_paths + + path_iter = iter(batch_paths_to_use) first_path = next(path_iter, None) # Edge case: no batches. @@ -195,13 +267,27 @@ def __iter__(self) -> Iterator[ContrastiveLearningBatch]: # Yield the split sharded batches from the last future, pushing # each next sharded split batch to device non-blocking as we go. split_sharded_batches = future.get() - ssb = split_sharded_batches[0] - for next_ssb in split_sharded_batches[1:]: - if self.device is not None: - next_ssb = next_ssb.to_device(self.device, non_blocking=True) + + # For the first batch after skipping complete batches, we may need to skip some steps + if step_idx == batches_to_skip * self.split_factor and remaining_steps_to_skip > 0: + # Skip the remaining steps in the first batch + split_sharded_batches = split_sharded_batches[remaining_steps_to_skip:] + step_idx += remaining_steps_to_skip + + if len(split_sharded_batches) > 0: + ssb = split_sharded_batches[0] + for next_ssb in split_sharded_batches[1:]: + if self.device is not None: + next_ssb = next_ssb.to_device(self.device, non_blocking=True) + + yield ssb + step_idx += 1 + + ssb = next_ssb + + # Handle the last batch yield ssb - ssb = next_ssb - yield ssb + step_idx += 1 # Move up to the next future. future = next_future @@ -352,6 +438,205 @@ def split_batch(batch: ContrastiveLearningBatch, total_splits: int) -> List[Cont return split_batches +def split_batch_preserve_relations(batch: ContrastiveLearningBatch, total_splits: int) -> List[ContrastiveLearningBatch]: + """Split a batch into smaller ones while keeping each query's labeled docs. + + Queries are grouped into equally-sized chunks (up to one-off differences due to + integer division). For each chunk we gather the union of all documents that + participate in a labeled relation with those queries, ensuring positives and + hard negatives remain present after the split. + """ + + assert total_splits > 0 + if total_splits == 1: + return [batch] + + num_queries = batch.query_tokens.size(0) + num_documents = batch.document_tokens.size(0) + + if num_queries == 0 or num_documents == 0: + return [batch] + + relations = batch.relevance_labels.coalesce() + if relations._nnz() == 0: + return split_batch(batch, total_splits) + + # Build query -> document adjacency (include positives and negatives alike). + indices = relations.indices() + query_to_docs: List[set[int]] = [set() for _ in range(num_queries)] + for idx in range(indices.shape[1]): + q_idx = int(indices[0, idx]) + d_idx = int(indices[1, idx]) + query_to_docs[q_idx].add(d_idx) + + if any(len(doc_set) == 0 for doc_set in query_to_docs): + # Fall back if the dataset has queries without labeled docs. + return split_batch(batch, total_splits) + + # Determine the desired query count per split. + if total_splits >= num_queries: + desired_sizes = [1] * num_queries + else: + base = num_queries // total_splits + remainder = num_queries % total_splits + desired_sizes = [base + (1 if split_idx < remainder else 0) for split_idx in range(total_splits)] + desired_sizes = [size for size in desired_sizes if size > 0] + # Ensure all queries are assigned even if integer math truncates. + assigned = sum(desired_sizes) + if assigned < num_queries: + desired_sizes[-1] += num_queries - assigned + + split_batches: List[ContrastiveLearningBatch] = [] + query_cursor = 0 + + def _ordered_doc_indices(doc_set: set[int]) -> List[int]: + doc_mask = torch.zeros(num_documents, dtype=torch.bool) + if doc_set: + doc_mask[list(doc_set)] = True + return torch.nonzero(doc_mask, as_tuple=False).squeeze(1).tolist() + + for desired_size in desired_sizes: + if query_cursor >= num_queries: + break + q_end = min(query_cursor + desired_size, num_queries) + q_indices = list(range(query_cursor, q_end)) + query_cursor = q_end + + doc_set: set[int] = set() + for q_idx in q_indices: + doc_set.update(query_to_docs[q_idx]) + + doc_indices = _ordered_doc_indices(doc_set) + if not doc_indices: + return split_batch(batch, total_splits) + + q_idx_tensor = torch.tensor(q_indices, dtype=torch.long) + d_idx_tensor = torch.tensor(doc_indices, dtype=torch.long) + + split_batches.append( + ContrastiveLearningBatch( + query_tokens=batch.query_tokens[q_idx_tensor], + query_attention_mask=batch.query_attention_mask[q_idx_tensor], + document_tokens=batch.document_tokens[d_idx_tensor], + document_attention_mask=batch.document_attention_mask[d_idx_tensor], + relevance_labels=_slice_relations_by_indices(relations, q_idx_tensor, d_idx_tensor), + ) + ) + + # Handle any remaining queries (e.g., when total_splits < num_queries). + if query_cursor < num_queries: + q_indices = list(range(query_cursor, num_queries)) + doc_set: set[int] = set() + for q_idx in q_indices: + doc_set.update(query_to_docs[q_idx]) + doc_indices = _ordered_doc_indices(doc_set) + if not doc_indices: + return split_batch(batch, total_splits) + q_idx_tensor = torch.tensor(q_indices, dtype=torch.long) + d_idx_tensor = torch.tensor(doc_indices, dtype=torch.long) + split_batches.append( + ContrastiveLearningBatch( + query_tokens=batch.query_tokens[q_idx_tensor], + query_attention_mask=batch.query_attention_mask[q_idx_tensor], + document_tokens=batch.document_tokens[d_idx_tensor], + document_attention_mask=batch.document_attention_mask[d_idx_tensor], + relevance_labels=_slice_relations_by_indices(relations, q_idx_tensor, d_idx_tensor), + ) + ) + + total_queries_in_splits = sum(sb.query_tokens.size(0) for sb in split_batches) + # We expect roughly ``total_splits`` splits unless the dataset has fewer queries + # than required. Fall back if we did not produce the intended number and the + # remainder of queries is non-zero (indicating we had to abort early). + if (total_queries_in_splits != num_queries) or (len(split_batches) and len(split_batches) < min(total_splits, num_queries)): + print("Fallback to split_batch") + return split_batch(batch, total_splits) + + return split_batches + + +def _connected_components_from_relations(relations: torch.Tensor) -> List[Tuple[List[int], List[int]]]: + relations = relations.coalesce() + if relations._nnz() == 0: + return [] + + indices = relations.indices() + query_idx = indices[0].tolist() + doc_idx = indices[1].tolist() + + num_queries = relations.size(0) + num_docs = relations.size(1) + + query_to_docs: Dict[int, set[int]] = defaultdict(set) + doc_to_queries: Dict[int, set[int]] = defaultdict(set) + for q, d in zip(query_idx, doc_idx): + query_to_docs[q].add(d) + doc_to_queries[d].add(q) + + visited_queries = [False] * num_queries + visited_docs = [False] * num_docs + components: List[Tuple[List[int], List[int]]] = [] + + for seed_query in range(num_queries): + if visited_queries[seed_query] or seed_query not in query_to_docs: + continue + q_component: List[int] = [] + d_component: List[int] = [] + queue: deque[Tuple[str, int]] = deque() + queue.append(("q", seed_query)) + visited_queries[seed_query] = True + + while queue: + kind, idx = queue.popleft() + if kind == "q": + q_component.append(idx) + for doc in query_to_docs.get(idx, ()): # type: ignore[arg-type] + if not visited_docs[doc]: + visited_docs[doc] = True + queue.append(("d", doc)) + else: + d_component.append(idx) + for query in doc_to_queries.get(idx, ()): # type: ignore[arg-type] + if not visited_queries[query]: + visited_queries[query] = True + queue.append(("q", query)) + + components.append((sorted(q_component), sorted(d_component))) + + return components + + +def _slice_relations_by_indices(relations: torch.Tensor, query_indices: torch.Tensor, doc_indices: torch.Tensor) -> torch.Tensor: + relations = relations.coalesce() + device = relations.device + query_indices = query_indices.to(device) + doc_indices = doc_indices.to(device) + + num_queries = relations.size(0) + num_docs = relations.size(1) + + q_map = torch.full((num_queries,), -1, dtype=torch.long, device=device) + d_map = torch.full((num_docs,), -1, dtype=torch.long, device=device) + q_map[query_indices] = torch.arange(query_indices.size(0), device=device) + d_map[doc_indices] = torch.arange(doc_indices.size(0), device=device) + + indices = relations.indices() + values = relations.values() + + mapped_q = q_map[indices[0]] + mapped_d = d_map[indices[1]] + mask = (mapped_q >= 0) & (mapped_d >= 0) + + if mask.sum() == 0: + empty_indices = torch.zeros((2, 0), dtype=torch.long, device=device) + empty_values = torch.empty((0,), dtype=values.dtype, device=device) + return torch.sparse_coo_tensor(empty_indices, empty_values, size=(query_indices.size(0), doc_indices.size(0))).coalesce() + + new_indices = torch.stack([mapped_q[mask], mapped_d[mask]], dim=0) + new_values = values[mask] + return torch.sparse_coo_tensor(new_indices, new_values, size=(query_indices.size(0), doc_indices.size(0))).coalesce() + + def collate_tokens( tokens: pa.LargeListArray, max_seq_len: Optional[int] = None, diff --git a/projects/arctic_embed/src/arctic_embed/data_processing/utils.py b/projects/arctic_embed/src/arctic_embed/data_processing/utils.py index f9898252..d62a46e3 100644 --- a/projects/arctic_embed/src/arctic_embed/data_processing/utils.py +++ b/projects/arctic_embed/src/arctic_embed/data_processing/utils.py @@ -59,9 +59,17 @@ def first_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tens def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor: """Pools the hidden states by selecting the last non-padding token representation for each sequence.""" - batch_size = last_hidden_states.shape[0] + batch_size, seq_len, _ = last_hidden_states.shape + mask = attention_mask.to(device=last_hidden_states.device, dtype=torch.bool) + mask_int = mask.to(dtype=torch.int64) + + offset_from_right = torch.flip(mask_int, dims=[1]).argmax(dim=1) + col = seq_len - 1 - offset_from_right + + has_token = mask.any(dim=1) + col = torch.where(has_token, col, torch.zeros_like(col)) + row = torch.arange(batch_size, device=last_hidden_states.device) - col = attention_mask.sum(dim=1) - 1 # position of the last non-padding token return last_hidden_states[row, col] diff --git a/projects/arctic_embed/src/arctic_embed/trainer.py b/projects/arctic_embed/src/arctic_embed/trainer.py index 6542df12..2bb62cb8 100644 --- a/projects/arctic_embed/src/arctic_embed/trainer.py +++ b/projects/arctic_embed/src/arctic_embed/trainer.py @@ -40,7 +40,9 @@ from .biencoder_model_factory import BiencoderModelConfig from .biencoder_model_factory import BiencoderModelFactory -from .checkpointing import BiencoderCheckpointEngine + +# from .checkpointing import BiencoderCheckpointEngine +from .biencoder_s3_checkpoint import BiencoderS3CheckpointEngine from .contrastive_dataloader import ContrastivePretokenizedDataConfig from .contrastive_dataloader import ContrastivePretokenizedDataFactory from .core.biencoder_model import Biencoder @@ -143,7 +145,7 @@ class BiencoderTrainer(Trainer): config: BiencoderTrainerConfig data_factory: ContrastivePretokenizedDataFactory model_factory: BiencoderModelFactory - checkpoint_engine: BiencoderCheckpointEngine + checkpoint_engine: BiencoderS3CheckpointEngine optimizer_factory: FusedAdamOptimizerFactory scheduler_factory: Union[WSDSchedulerFactory, HFSchedulerFactory] tokenizer_factory: FakeTokenizerFactory @@ -159,7 +161,29 @@ class BiencoderTrainer(Trainer): def is_wandb_logger(self) -> bool: return self.global_rank == 0 and self.config.wandb.enable + def _recreate_dataloader_for_resume(self, start_batch_idx: int) -> None: + """Recreate dataloader to skip batches when resuming.""" + if start_batch_idx > 0: + logger.info(f"Recreating dataloader to skip {start_batch_idx} batches for resume") + # Create new data factory with start_batch_idx + data_factory = self.config.data.factory(self) + self.train_dataloader, _ = data_factory(start_batch_idx=start_batch_idx) # type: ignore[call-arg] + def pre_train_callback(self) -> None: + # Synchronize all ranks before starting training + # This is especially important when resuming from checkpoint + if self.world_size > 1: + logger.info(f"Rank {self.global_rank} synchronizing before training starts...") + torch.distributed.barrier() + logger.info(f"Rank {self.global_rank} synchronized, starting training") + + # Log training configuration for debugging + if self.global_rank == 0: + logger.info( + f"Starting training from batch_idx={self.train_batch_idx}, " + f"log_interval={self.config.train_log_iter_interval}" + ) + # Turn on weights and biases on the master worker. if self.is_wandb_logger: import wandb @@ -208,6 +232,8 @@ def forward_and_gather(self, batch: ContrastiveLearningBatch) -> Tuple[Tensor, T @torch.no_grad() def eval(self, batch: ContrastiveLearningBatch) -> Dict[str, float]: query_embeddings, document_embeddings, relations = self.forward_and_gather(batch) + if self.config.use_in_batch_negatives: + relations[relations == 0] = -1 q_emb = F.normalize(query_embeddings, dim=1) d_emb = F.normalize(document_embeddings, dim=1) scores = torch.matmul(q_emb, d_emb.transpose(0, 1)) diff --git a/projects/arctic_embed/tests/test_biencoder_model.py b/projects/arctic_embed/tests/test_biencoder_model.py deleted file mode 100644 index 6deec034..00000000 --- a/projects/arctic_embed/tests/test_biencoder_model.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2025 Snowflake Inc. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from arctic_embed.core.biencoder_model import average_pool -from arctic_embed.core.biencoder_model import first_token_pool -from arctic_embed.core.biencoder_model import last_token_pool -from torch.testing import assert_close - -attention_left_pad = torch.tensor( - [ - [False, False, True, True], - [True, True, True, True], - ] -) -values_left_pad = torch.tensor( - [ - [ - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [1.0, 1.0, 1.0], - [2.0, 2.0, 2.0], - ], - [ - [11.0, 11.0, 11.0], - [12.0, 12.0, 12.0], - [13.0, 13.0, 13.0], - [14.0, 14.0, 14.0], - ], - ] -) -attention_right_pad = attention_left_pad.flip(dims=[1]) -values_right_pad = values_left_pad.flip(dims=[1]) - - -def test_pooling() -> None: - want_first_or_last = torch.tensor( - [ - [1.0, 1.0, 1.0], - [11.0, 11.0, 11.0], - ] - ) - want_average = torch.tensor( - [ - [1.5, 1.5, 1.5], - [12.5, 12.5, 12.5], - ] - ) - first_of_left_pad = first_token_pool(values_left_pad, attention_left_pad) - assert_close(first_of_left_pad, want_first_or_last) - last_of_right_pad = last_token_pool(values_right_pad, attention_right_pad) - assert_close(last_of_right_pad, want_first_or_last) - assert_close( - average_pool(values_left_pad, attention_left_pad), - average_pool(values_right_pad, attention_right_pad), - ) - assert_close(average_pool(values_left_pad, attention_left_pad), want_average)